diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
commit | 2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch) | |
tree | 848558de17fb3008cdf4d861b01ac7781903ce39 /drivers/net/ethernet/netronome/nfp | |
parent | Initial commit. (diff) | |
download | linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip |
Adding upstream version 6.1.76.upstream/6.1.76upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/net/ethernet/netronome/nfp')
95 files changed, 53126 insertions, 0 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile new file mode 100644 index 000000000..9c0861d03 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -0,0 +1,83 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_NFP) += nfp.o + +nfp-objs := \ + nfpcore/nfp6000_pcie.o \ + nfpcore/nfp_cppcore.o \ + nfpcore/nfp_cpplib.o \ + nfpcore/nfp_dev.o \ + nfpcore/nfp_hwinfo.o \ + nfpcore/nfp_mip.o \ + nfpcore/nfp_mutex.o \ + nfpcore/nfp_nffw.o \ + nfpcore/nfp_nsp.o \ + nfpcore/nfp_nsp_cmds.o \ + nfpcore/nfp_nsp_eth.o \ + nfpcore/nfp_resource.o \ + nfpcore/nfp_rtsym.o \ + nfpcore/nfp_target.o \ + ccm.o \ + ccm_mbox.o \ + devlink_param.o \ + nfp_asm.o \ + nfd3/dp.o \ + nfd3/rings.o \ + nfd3/xsk.o \ + nfdk/dp.o \ + nfdk/rings.o \ + nfp_app.o \ + nfp_app_nic.o \ + nfp_devlink.o \ + nfp_hwmon.o \ + nfp_main.o \ + nfp_net_common.o \ + nfp_net_dp.o \ + nfp_net_ctrl.o \ + nfp_net_debugdump.o \ + nfp_net_ethtool.o \ + nfp_net_main.o \ + nfp_net_repr.o \ + nfp_net_sriov.o \ + nfp_net_xsk.o \ + nfp_netvf_main.o \ + nfp_port.o \ + nfp_shared_buf.o \ + nic/main.o + +ifeq ($(CONFIG_TLS_DEVICE),y) +nfp-objs += \ + crypto/tls.o +endif + +ifeq ($(CONFIG_NFP_APP_FLOWER),y) +nfp-objs += \ + flower/action.o \ + flower/cmsg.o \ + flower/lag_conf.o \ + flower/main.o \ + flower/match.o \ + flower/metadata.o \ + flower/offload.o \ + flower/tunnel_conf.o \ + flower/qos_conf.o \ + flower/conntrack.o +endif + +ifeq ($(CONFIG_BPF_SYSCALL),y) +nfp-objs += \ + bpf/cmsg.o \ + bpf/main.o \ + bpf/offload.o \ + bpf/verifier.o \ + bpf/jit.o +endif + +ifeq ($(CONFIG_NFP_APP_ABM_NIC),y) +nfp-objs += \ + abm/cls.o \ + abm/ctrl.o \ + abm/qdisc.o \ + abm/main.o +endif + +nfp-$(CONFIG_NFP_DEBUG) += nfp_net_debugfs.o diff --git a/drivers/net/ethernet/netronome/nfp/abm/cls.c b/drivers/net/ethernet/netronome/nfp/abm/cls.c new file mode 100644 index 000000000..23ebddfb9 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/abm/cls.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2018 Netronome Systems, Inc. */ + +#include <linux/bitfield.h> +#include <net/pkt_cls.h> + +#include "../nfpcore/nfp_cpp.h" +#include "../nfp_app.h" +#include "../nfp_net_repr.h" +#include "main.h" + +struct nfp_abm_u32_match { + u32 handle; + u32 band; + u8 mask; + u8 val; + struct list_head list; +}; + +static bool +nfp_abm_u32_check_knode(struct nfp_abm *abm, struct tc_cls_u32_knode *knode, + __be16 proto, struct netlink_ext_ack *extack) +{ + struct tc_u32_key *k; + unsigned int tos_off; + + if (knode->exts && tcf_exts_has_actions(knode->exts)) { + NL_SET_ERR_MSG_MOD(extack, "action offload not supported"); + return false; + } + if (knode->link_handle) { + NL_SET_ERR_MSG_MOD(extack, "linking not supported"); + return false; + } + if (knode->sel->flags != TC_U32_TERMINAL) { + NL_SET_ERR_MSG_MOD(extack, + "flags must be equal to TC_U32_TERMINAL"); + return false; + } + if (knode->sel->off || knode->sel->offshift || knode->sel->offmask || + knode->sel->offoff || knode->fshift) { + NL_SET_ERR_MSG_MOD(extack, "variable offsetting not supported"); + return false; + } + if (knode->sel->hoff || knode->sel->hmask) { + NL_SET_ERR_MSG_MOD(extack, "hashing not supported"); + return false; + } + if (knode->val || knode->mask) { + NL_SET_ERR_MSG_MOD(extack, "matching on mark not supported"); + return false; + } + if (knode->res && knode->res->class) { + NL_SET_ERR_MSG_MOD(extack, "setting non-0 class not supported"); + return false; + } + if (knode->res && knode->res->classid >= abm->num_bands) { + NL_SET_ERR_MSG_MOD(extack, + "classid higher than number of bands"); + return false; + } + if (knode->sel->nkeys != 1) { + NL_SET_ERR_MSG_MOD(extack, "exactly one key required"); + return false; + } + + switch (proto) { + case htons(ETH_P_IP): + tos_off = 16; + break; + case htons(ETH_P_IPV6): + tos_off = 20; + break; + default: + NL_SET_ERR_MSG_MOD(extack, "only IP and IPv6 supported as filter protocol"); + return false; + } + + k = &knode->sel->keys[0]; + if (k->offmask) { + NL_SET_ERR_MSG_MOD(extack, "offset mask - variable offsetting not supported"); + return false; + } + if (k->off) { + NL_SET_ERR_MSG_MOD(extack, "only DSCP fields can be matched"); + return false; + } + if (k->val & ~k->mask) { + NL_SET_ERR_MSG_MOD(extack, "mask does not cover the key"); + return false; + } + if (be32_to_cpu(k->mask) >> tos_off & ~abm->dscp_mask) { + NL_SET_ERR_MSG_MOD(extack, "only high DSCP class selector bits can be used"); + nfp_err(abm->app->cpp, + "u32 offload: requested mask %x FW can support only %x\n", + be32_to_cpu(k->mask) >> tos_off, abm->dscp_mask); + return false; + } + + return true; +} + +/* This filter list -> map conversion is O(n * m), we expect single digit or + * low double digit number of prios and likewise for the filters. Also u32 + * doesn't report stats, so it's really only setup time cost. + */ +static unsigned int +nfp_abm_find_band_for_prio(struct nfp_abm_link *alink, unsigned int prio) +{ + struct nfp_abm_u32_match *iter; + + list_for_each_entry(iter, &alink->dscp_map, list) + if ((prio & iter->mask) == iter->val) + return iter->band; + + return alink->def_band; +} + +static int nfp_abm_update_band_map(struct nfp_abm_link *alink) +{ + unsigned int i, bits_per_prio, prios_per_word, base_shift; + struct nfp_abm *abm = alink->abm; + u32 field_mask; + + alink->has_prio = !list_empty(&alink->dscp_map); + + bits_per_prio = roundup_pow_of_two(order_base_2(abm->num_bands)); + field_mask = (1 << bits_per_prio) - 1; + prios_per_word = sizeof(u32) * BITS_PER_BYTE / bits_per_prio; + + /* FW mask applies from top bits */ + base_shift = 8 - order_base_2(abm->num_prios); + + for (i = 0; i < abm->num_prios; i++) { + unsigned int offset; + u32 *word; + u8 band; + + word = &alink->prio_map[i / prios_per_word]; + offset = (i % prios_per_word) * bits_per_prio; + + band = nfp_abm_find_band_for_prio(alink, i << base_shift); + + *word &= ~(field_mask << offset); + *word |= band << offset; + } + + /* Qdisc offload status may change if has_prio changed */ + nfp_abm_qdisc_offload_update(alink); + + return nfp_abm_ctrl_prio_map_update(alink, alink->prio_map); +} + +static void +nfp_abm_u32_knode_delete(struct nfp_abm_link *alink, + struct tc_cls_u32_knode *knode) +{ + struct nfp_abm_u32_match *iter; + + list_for_each_entry(iter, &alink->dscp_map, list) + if (iter->handle == knode->handle) { + list_del(&iter->list); + kfree(iter); + nfp_abm_update_band_map(alink); + return; + } +} + +static int +nfp_abm_u32_knode_replace(struct nfp_abm_link *alink, + struct tc_cls_u32_knode *knode, + __be16 proto, struct netlink_ext_ack *extack) +{ + struct nfp_abm_u32_match *match = NULL, *iter; + unsigned int tos_off; + u8 mask, val; + int err; + + if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack)) + goto err_delete; + + tos_off = proto == htons(ETH_P_IP) ? 16 : 20; + + /* Extract the DSCP Class Selector bits */ + val = be32_to_cpu(knode->sel->keys[0].val) >> tos_off & 0xff; + mask = be32_to_cpu(knode->sel->keys[0].mask) >> tos_off & 0xff; + + /* Check if there is no conflicting mapping and find match by handle */ + list_for_each_entry(iter, &alink->dscp_map, list) { + u32 cmask; + + if (iter->handle == knode->handle) { + match = iter; + continue; + } + + cmask = iter->mask & mask; + if ((iter->val & cmask) == (val & cmask) && + iter->band != knode->res->classid) { + NL_SET_ERR_MSG_MOD(extack, "conflict with already offloaded filter"); + goto err_delete; + } + } + + if (!match) { + match = kzalloc(sizeof(*match), GFP_KERNEL); + if (!match) + return -ENOMEM; + list_add(&match->list, &alink->dscp_map); + } + match->handle = knode->handle; + match->band = knode->res->classid; + match->mask = mask; + match->val = val; + + err = nfp_abm_update_band_map(alink); + if (err) + goto err_delete; + + return 0; + +err_delete: + nfp_abm_u32_knode_delete(alink, knode); + return -EOPNOTSUPP; +} + +static int nfp_abm_setup_tc_block_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + struct tc_cls_u32_offload *cls_u32 = type_data; + struct nfp_repr *repr = cb_priv; + struct nfp_abm_link *alink; + + alink = repr->app_priv; + + if (type != TC_SETUP_CLSU32) { + NL_SET_ERR_MSG_MOD(cls_u32->common.extack, + "only offload of u32 classifier supported"); + return -EOPNOTSUPP; + } + if (!tc_cls_can_offload_and_chain0(repr->netdev, &cls_u32->common)) + return -EOPNOTSUPP; + + if (cls_u32->common.protocol != htons(ETH_P_IP) && + cls_u32->common.protocol != htons(ETH_P_IPV6)) { + NL_SET_ERR_MSG_MOD(cls_u32->common.extack, + "only IP and IPv6 supported as filter protocol"); + return -EOPNOTSUPP; + } + + switch (cls_u32->command) { + case TC_CLSU32_NEW_KNODE: + case TC_CLSU32_REPLACE_KNODE: + return nfp_abm_u32_knode_replace(alink, &cls_u32->knode, + cls_u32->common.protocol, + cls_u32->common.extack); + case TC_CLSU32_DELETE_KNODE: + nfp_abm_u32_knode_delete(alink, &cls_u32->knode); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static LIST_HEAD(nfp_abm_block_cb_list); + +int nfp_abm_setup_cls_block(struct net_device *netdev, struct nfp_repr *repr, + struct flow_block_offload *f) +{ + return flow_block_cb_setup_simple(f, &nfp_abm_block_cb_list, + nfp_abm_setup_tc_block_cb, + repr, repr, true); +} diff --git a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c new file mode 100644 index 000000000..69e84ff7f --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2018 Netronome Systems, Inc. */ + +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <linux/log2.h> + +#include "../nfpcore/nfp_cpp.h" +#include "../nfpcore/nfp_nffw.h" +#include "../nfp_app.h" +#include "../nfp_abi.h" +#include "../nfp_main.h" +#include "../nfp_net.h" +#include "main.h" + +#define NFP_NUM_PRIOS_SYM_NAME "_abi_pci_dscp_num_prio_%u" +#define NFP_NUM_BANDS_SYM_NAME "_abi_pci_dscp_num_band_%u" +#define NFP_ACT_MASK_SYM_NAME "_abi_nfd_out_q_actions_%u" + +#define NFP_RED_SUPPORT_SYM_NAME "_abi_nfd_out_red_offload_%u" + +#define NFP_QLVL_SYM_NAME "_abi_nfd_out_q_lvls_%u%s" +#define NFP_QLVL_STRIDE 16 +#define NFP_QLVL_BLOG_BYTES 0 +#define NFP_QLVL_BLOG_PKTS 4 +#define NFP_QLVL_THRS 8 +#define NFP_QLVL_ACT 12 + +#define NFP_QMSTAT_SYM_NAME "_abi_nfdqm%u_stats%s" +#define NFP_QMSTAT_STRIDE 32 +#define NFP_QMSTAT_NON_STO 0 +#define NFP_QMSTAT_STO 8 +#define NFP_QMSTAT_DROP 16 +#define NFP_QMSTAT_ECN 24 + +#define NFP_Q_STAT_SYM_NAME "_abi_nfd_rxq_stats%u%s" +#define NFP_Q_STAT_STRIDE 16 +#define NFP_Q_STAT_PKTS 0 +#define NFP_Q_STAT_BYTES 8 + +#define NFP_NET_ABM_MBOX_CMD NFP_NET_CFG_MBOX_SIMPLE_CMD +#define NFP_NET_ABM_MBOX_RET NFP_NET_CFG_MBOX_SIMPLE_RET +#define NFP_NET_ABM_MBOX_DATALEN NFP_NET_CFG_MBOX_SIMPLE_VAL +#define NFP_NET_ABM_MBOX_RESERVED (NFP_NET_CFG_MBOX_SIMPLE_VAL + 4) +#define NFP_NET_ABM_MBOX_DATA (NFP_NET_CFG_MBOX_SIMPLE_VAL + 8) + +static int +nfp_abm_ctrl_stat(struct nfp_abm_link *alink, const struct nfp_rtsym *sym, + unsigned int stride, unsigned int offset, unsigned int band, + unsigned int queue, bool is_u64, u64 *res) +{ + struct nfp_cpp *cpp = alink->abm->app->cpp; + u64 val, sym_offset; + unsigned int qid; + u32 val32; + int err; + + qid = band * NFP_NET_MAX_RX_RINGS + alink->queue_base + queue; + + sym_offset = qid * stride + offset; + if (is_u64) + err = __nfp_rtsym_readq(cpp, sym, 3, 0, sym_offset, &val); + else + err = __nfp_rtsym_readl(cpp, sym, 3, 0, sym_offset, &val32); + if (err) { + nfp_err(cpp, "RED offload reading stat failed on vNIC %d band %d queue %d (+ %d)\n", + alink->id, band, queue, alink->queue_base); + return err; + } + + *res = is_u64 ? val : val32; + return 0; +} + +int __nfp_abm_ctrl_set_q_lvl(struct nfp_abm *abm, unsigned int id, u32 val) +{ + struct nfp_cpp *cpp = abm->app->cpp; + u64 sym_offset; + int err; + + __clear_bit(id, abm->threshold_undef); + if (abm->thresholds[id] == val) + return 0; + + sym_offset = id * NFP_QLVL_STRIDE + NFP_QLVL_THRS; + err = __nfp_rtsym_writel(cpp, abm->q_lvls, 4, 0, sym_offset, val); + if (err) { + nfp_err(cpp, + "RED offload setting level failed on subqueue %d\n", + id); + return err; + } + + abm->thresholds[id] = val; + return 0; +} + +int nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int band, + unsigned int queue, u32 val) +{ + unsigned int threshold; + + threshold = band * NFP_NET_MAX_RX_RINGS + alink->queue_base + queue; + + return __nfp_abm_ctrl_set_q_lvl(alink->abm, threshold, val); +} + +int __nfp_abm_ctrl_set_q_act(struct nfp_abm *abm, unsigned int id, + enum nfp_abm_q_action act) +{ + struct nfp_cpp *cpp = abm->app->cpp; + u64 sym_offset; + int err; + + if (abm->actions[id] == act) + return 0; + + sym_offset = id * NFP_QLVL_STRIDE + NFP_QLVL_ACT; + err = __nfp_rtsym_writel(cpp, abm->q_lvls, 4, 0, sym_offset, act); + if (err) { + nfp_err(cpp, + "RED offload setting action failed on subqueue %d\n", + id); + return err; + } + + abm->actions[id] = act; + return 0; +} + +int nfp_abm_ctrl_set_q_act(struct nfp_abm_link *alink, unsigned int band, + unsigned int queue, enum nfp_abm_q_action act) +{ + unsigned int qid; + + qid = band * NFP_NET_MAX_RX_RINGS + alink->queue_base + queue; + + return __nfp_abm_ctrl_set_q_act(alink->abm, qid, act); +} + +u64 nfp_abm_ctrl_stat_non_sto(struct nfp_abm_link *alink, unsigned int queue) +{ + unsigned int band; + u64 val, sum = 0; + + for (band = 0; band < alink->abm->num_bands; band++) { + if (nfp_abm_ctrl_stat(alink, alink->abm->qm_stats, + NFP_QMSTAT_STRIDE, NFP_QMSTAT_NON_STO, + band, queue, true, &val)) + return 0; + sum += val; + } + + return sum; +} + +u64 nfp_abm_ctrl_stat_sto(struct nfp_abm_link *alink, unsigned int queue) +{ + unsigned int band; + u64 val, sum = 0; + + for (band = 0; band < alink->abm->num_bands; band++) { + if (nfp_abm_ctrl_stat(alink, alink->abm->qm_stats, + NFP_QMSTAT_STRIDE, NFP_QMSTAT_STO, + band, queue, true, &val)) + return 0; + sum += val; + } + + return sum; +} + +static int +nfp_abm_ctrl_stat_basic(struct nfp_abm_link *alink, unsigned int band, + unsigned int queue, unsigned int off, u64 *val) +{ + if (!nfp_abm_has_prio(alink->abm)) { + if (!band) { + unsigned int id = alink->queue_base + queue; + + *val = nn_readq(alink->vnic, + NFP_NET_CFG_RXR_STATS(id) + off); + } else { + *val = 0; + } + + return 0; + } else { + return nfp_abm_ctrl_stat(alink, alink->abm->q_stats, + NFP_Q_STAT_STRIDE, off, band, queue, + true, val); + } +} + +int nfp_abm_ctrl_read_q_stats(struct nfp_abm_link *alink, unsigned int band, + unsigned int queue, struct nfp_alink_stats *stats) +{ + int err; + + err = nfp_abm_ctrl_stat_basic(alink, band, queue, NFP_Q_STAT_PKTS, + &stats->tx_pkts); + if (err) + return err; + + err = nfp_abm_ctrl_stat_basic(alink, band, queue, NFP_Q_STAT_BYTES, + &stats->tx_bytes); + if (err) + return err; + + err = nfp_abm_ctrl_stat(alink, alink->abm->q_lvls, NFP_QLVL_STRIDE, + NFP_QLVL_BLOG_BYTES, band, queue, false, + &stats->backlog_bytes); + if (err) + return err; + + err = nfp_abm_ctrl_stat(alink, alink->abm->q_lvls, + NFP_QLVL_STRIDE, NFP_QLVL_BLOG_PKTS, + band, queue, false, &stats->backlog_pkts); + if (err) + return err; + + err = nfp_abm_ctrl_stat(alink, alink->abm->qm_stats, + NFP_QMSTAT_STRIDE, NFP_QMSTAT_DROP, + band, queue, true, &stats->drops); + if (err) + return err; + + return nfp_abm_ctrl_stat(alink, alink->abm->qm_stats, + NFP_QMSTAT_STRIDE, NFP_QMSTAT_ECN, + band, queue, true, &stats->overlimits); +} + +int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink, + unsigned int band, unsigned int queue, + struct nfp_alink_xstats *xstats) +{ + int err; + + err = nfp_abm_ctrl_stat(alink, alink->abm->qm_stats, + NFP_QMSTAT_STRIDE, NFP_QMSTAT_DROP, + band, queue, true, &xstats->pdrop); + if (err) + return err; + + return nfp_abm_ctrl_stat(alink, alink->abm->qm_stats, + NFP_QMSTAT_STRIDE, NFP_QMSTAT_ECN, + band, queue, true, &xstats->ecn_marked); +} + +int nfp_abm_ctrl_qm_enable(struct nfp_abm *abm) +{ + return nfp_mbox_cmd(abm->app->pf, NFP_MBOX_PCIE_ABM_ENABLE, + NULL, 0, NULL, 0); +} + +int nfp_abm_ctrl_qm_disable(struct nfp_abm *abm) +{ + return nfp_mbox_cmd(abm->app->pf, NFP_MBOX_PCIE_ABM_DISABLE, + NULL, 0, NULL, 0); +} + +int nfp_abm_ctrl_prio_map_update(struct nfp_abm_link *alink, u32 *packed) +{ + const u32 cmd = NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET; + struct nfp_net *nn = alink->vnic; + unsigned int i; + int err; + + err = nfp_net_mbox_lock(nn, alink->abm->prio_map_len); + if (err) + return err; + + /* Write data_len and wipe reserved */ + nn_writeq(nn, nn->tlv_caps.mbox_off + NFP_NET_ABM_MBOX_DATALEN, + alink->abm->prio_map_len); + + for (i = 0; i < alink->abm->prio_map_len; i += sizeof(u32)) + nn_writel(nn, nn->tlv_caps.mbox_off + NFP_NET_ABM_MBOX_DATA + i, + packed[i / sizeof(u32)]); + + err = nfp_net_mbox_reconfig_and_unlock(nn, cmd); + if (err) + nfp_err(alink->abm->app->cpp, + "setting DSCP -> VQ map failed with error %d\n", err); + return err; +} + +static int nfp_abm_ctrl_prio_check_params(struct nfp_abm_link *alink) +{ + struct nfp_abm *abm = alink->abm; + struct nfp_net *nn = alink->vnic; + unsigned int min_mbox_sz; + + if (!nfp_abm_has_prio(alink->abm)) + return 0; + + min_mbox_sz = NFP_NET_ABM_MBOX_DATA + alink->abm->prio_map_len; + if (nn->tlv_caps.mbox_len < min_mbox_sz) { + nfp_err(abm->app->pf->cpp, "vNIC mailbox too small for prio offload: %u, need: %u\n", + nn->tlv_caps.mbox_len, min_mbox_sz); + return -EINVAL; + } + + return 0; +} + +int nfp_abm_ctrl_read_params(struct nfp_abm_link *alink) +{ + alink->queue_base = nn_readl(alink->vnic, NFP_NET_CFG_START_RXQ); + alink->queue_base /= alink->vnic->stride_rx; + + return nfp_abm_ctrl_prio_check_params(alink); +} + +static unsigned int nfp_abm_ctrl_prio_map_size(struct nfp_abm *abm) +{ + unsigned int size; + + size = roundup_pow_of_two(order_base_2(abm->num_bands)); + size = DIV_ROUND_UP(size * abm->num_prios, BITS_PER_BYTE); + size = round_up(size, sizeof(u32)); + + return size; +} + +static const struct nfp_rtsym * +nfp_abm_ctrl_find_rtsym(struct nfp_pf *pf, const char *name, unsigned int size) +{ + const struct nfp_rtsym *sym; + + sym = nfp_rtsym_lookup(pf->rtbl, name); + if (!sym) { + nfp_err(pf->cpp, "Symbol '%s' not found\n", name); + return ERR_PTR(-ENOENT); + } + if (nfp_rtsym_size(sym) != size) { + nfp_err(pf->cpp, + "Symbol '%s' wrong size: expected %u got %llu\n", + name, size, nfp_rtsym_size(sym)); + return ERR_PTR(-EINVAL); + } + + return sym; +} + +static const struct nfp_rtsym * +nfp_abm_ctrl_find_q_rtsym(struct nfp_abm *abm, const char *name_fmt, + size_t size) +{ + char pf_symbol[64]; + + size = array3_size(size, abm->num_bands, NFP_NET_MAX_RX_RINGS); + snprintf(pf_symbol, sizeof(pf_symbol), name_fmt, + abm->pf_id, nfp_abm_has_prio(abm) ? "_per_band" : ""); + + return nfp_abm_ctrl_find_rtsym(abm->app->pf, pf_symbol, size); +} + +int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm) +{ + struct nfp_pf *pf = abm->app->pf; + const struct nfp_rtsym *sym; + int res; + + abm->pf_id = nfp_cppcore_pcie_unit(pf->cpp); + + /* Check if Qdisc offloads are supported */ + res = nfp_pf_rtsym_read_optional(pf, NFP_RED_SUPPORT_SYM_NAME, 1); + if (res < 0) + return res; + abm->red_support = res; + + /* Read count of prios and prio bands */ + res = nfp_pf_rtsym_read_optional(pf, NFP_NUM_BANDS_SYM_NAME, 1); + if (res < 0) + return res; + abm->num_bands = res; + + res = nfp_pf_rtsym_read_optional(pf, NFP_NUM_PRIOS_SYM_NAME, 1); + if (res < 0) + return res; + abm->num_prios = res; + + /* Read available actions */ + res = nfp_pf_rtsym_read_optional(pf, NFP_ACT_MASK_SYM_NAME, + BIT(NFP_ABM_ACT_MARK_DROP)); + if (res < 0) + return res; + abm->action_mask = res; + + abm->prio_map_len = nfp_abm_ctrl_prio_map_size(abm); + abm->dscp_mask = GENMASK(7, 8 - order_base_2(abm->num_prios)); + + /* Check values are sane, U16_MAX is arbitrarily chosen as max */ + if (!is_power_of_2(abm->num_bands) || !is_power_of_2(abm->num_prios) || + abm->num_bands > U16_MAX || abm->num_prios > U16_MAX || + (abm->num_bands == 1) != (abm->num_prios == 1)) { + nfp_err(pf->cpp, + "invalid priomap description num bands: %u and num prios: %u\n", + abm->num_bands, abm->num_prios); + return -EINVAL; + } + + /* Find level and stat symbols */ + if (!abm->red_support) + return 0; + + sym = nfp_abm_ctrl_find_q_rtsym(abm, NFP_QLVL_SYM_NAME, + NFP_QLVL_STRIDE); + if (IS_ERR(sym)) + return PTR_ERR(sym); + abm->q_lvls = sym; + + sym = nfp_abm_ctrl_find_q_rtsym(abm, NFP_QMSTAT_SYM_NAME, + NFP_QMSTAT_STRIDE); + if (IS_ERR(sym)) + return PTR_ERR(sym); + abm->qm_stats = sym; + + if (nfp_abm_has_prio(abm)) { + sym = nfp_abm_ctrl_find_q_rtsym(abm, NFP_Q_STAT_SYM_NAME, + NFP_Q_STAT_STRIDE); + if (IS_ERR(sym)) + return PTR_ERR(sym); + abm->q_stats = sym; + } + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c new file mode 100644 index 000000000..5d3df28c6 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/abm/main.c @@ -0,0 +1,557 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2018 Netronome Systems, Inc. */ + +#include <linux/bitfield.h> +#include <linux/bitmap.h> +#include <linux/etherdevice.h> +#include <linux/lockdep.h> +#include <linux/netdevice.h> +#include <linux/rcupdate.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> + +#include "../nfpcore/nfp.h" +#include "../nfpcore/nfp_cpp.h" +#include "../nfpcore/nfp_nsp.h" +#include "../nfp_app.h" +#include "../nfp_main.h" +#include "../nfp_net.h" +#include "../nfp_net_repr.h" +#include "../nfp_port.h" +#include "main.h" + +static u32 nfp_abm_portid(enum nfp_repr_type rtype, unsigned int id) +{ + return FIELD_PREP(NFP_ABM_PORTID_TYPE, rtype) | + FIELD_PREP(NFP_ABM_PORTID_ID, id); +} + +static int +nfp_abm_setup_tc(struct nfp_app *app, struct net_device *netdev, + enum tc_setup_type type, void *type_data) +{ + struct nfp_repr *repr = netdev_priv(netdev); + struct nfp_port *port; + + port = nfp_port_from_netdev(netdev); + if (!port || port->type != NFP_PORT_PF_PORT) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_ROOT_QDISC: + return nfp_abm_setup_root(netdev, repr->app_priv, type_data); + case TC_SETUP_QDISC_MQ: + return nfp_abm_setup_tc_mq(netdev, repr->app_priv, type_data); + case TC_SETUP_QDISC_RED: + return nfp_abm_setup_tc_red(netdev, repr->app_priv, type_data); + case TC_SETUP_QDISC_GRED: + return nfp_abm_setup_tc_gred(netdev, repr->app_priv, type_data); + case TC_SETUP_BLOCK: + return nfp_abm_setup_cls_block(netdev, repr, type_data); + default: + return -EOPNOTSUPP; + } +} + +static struct net_device * +nfp_abm_repr_get(struct nfp_app *app, u32 port_id, bool *redir_egress) +{ + enum nfp_repr_type rtype; + struct nfp_reprs *reprs; + u8 port; + + rtype = FIELD_GET(NFP_ABM_PORTID_TYPE, port_id); + port = FIELD_GET(NFP_ABM_PORTID_ID, port_id); + + reprs = rcu_dereference(app->reprs[rtype]); + if (!reprs) + return NULL; + + if (port >= reprs->num_reprs) + return NULL; + + return rcu_dereference(reprs->reprs[port]); +} + +static int +nfp_abm_spawn_repr(struct nfp_app *app, struct nfp_abm_link *alink, + enum nfp_port_type ptype) +{ + struct net_device *netdev; + enum nfp_repr_type rtype; + struct nfp_reprs *reprs; + struct nfp_repr *repr; + struct nfp_port *port; + unsigned int txqs; + int err; + + if (ptype == NFP_PORT_PHYS_PORT) { + rtype = NFP_REPR_TYPE_PHYS_PORT; + txqs = 1; + } else { + rtype = NFP_REPR_TYPE_PF; + txqs = alink->vnic->max_rx_rings; + } + + netdev = nfp_repr_alloc_mqs(app, txqs, 1); + if (!netdev) + return -ENOMEM; + repr = netdev_priv(netdev); + repr->app_priv = alink; + + port = nfp_port_alloc(app, ptype, netdev); + if (IS_ERR(port)) { + err = PTR_ERR(port); + goto err_free_repr; + } + + if (ptype == NFP_PORT_PHYS_PORT) { + port->eth_forced = true; + err = nfp_port_init_phy_port(app->pf, app, port, alink->id); + if (err) + goto err_free_port; + } else { + port->pf_id = alink->abm->pf_id; + port->pf_split = app->pf->max_data_vnics > 1; + port->pf_split_id = alink->id; + port->vnic = alink->vnic->dp.ctrl_bar; + } + + SET_NETDEV_DEV(netdev, &alink->vnic->pdev->dev); + eth_hw_addr_random(netdev); + + err = nfp_repr_init(app, netdev, nfp_abm_portid(rtype, alink->id), + port, alink->vnic->dp.netdev); + if (err) + goto err_free_port; + + reprs = nfp_reprs_get_locked(app, rtype); + WARN(nfp_repr_get_locked(app, reprs, alink->id), "duplicate repr"); + rtnl_lock(); + rcu_assign_pointer(reprs->reprs[alink->id], netdev); + rtnl_unlock(); + + nfp_info(app->cpp, "%s Port %d Representor(%s) created\n", + ptype == NFP_PORT_PF_PORT ? "PCIe" : "Phys", + alink->id, netdev->name); + + return 0; + +err_free_port: + nfp_port_free(port); +err_free_repr: + nfp_repr_free(netdev); + return err; +} + +static void +nfp_abm_kill_repr(struct nfp_app *app, struct nfp_abm_link *alink, + enum nfp_repr_type rtype) +{ + struct net_device *netdev; + struct nfp_reprs *reprs; + + reprs = nfp_reprs_get_locked(app, rtype); + netdev = nfp_repr_get_locked(app, reprs, alink->id); + if (!netdev) + return; + rtnl_lock(); + rcu_assign_pointer(reprs->reprs[alink->id], NULL); + rtnl_unlock(); + synchronize_rcu(); + /* Cast to make sure nfp_repr_clean_and_free() takes a nfp_repr */ + nfp_repr_clean_and_free((struct nfp_repr *)netdev_priv(netdev)); +} + +static void +nfp_abm_kill_reprs(struct nfp_abm *abm, struct nfp_abm_link *alink) +{ + nfp_abm_kill_repr(abm->app, alink, NFP_REPR_TYPE_PF); + nfp_abm_kill_repr(abm->app, alink, NFP_REPR_TYPE_PHYS_PORT); +} + +static void nfp_abm_kill_reprs_all(struct nfp_abm *abm) +{ + struct nfp_pf *pf = abm->app->pf; + struct nfp_net *nn; + + list_for_each_entry(nn, &pf->vnics, vnic_list) + nfp_abm_kill_reprs(abm, (struct nfp_abm_link *)nn->app_priv); +} + +static enum devlink_eswitch_mode nfp_abm_eswitch_mode_get(struct nfp_app *app) +{ + struct nfp_abm *abm = app->priv; + + return abm->eswitch_mode; +} + +static int nfp_abm_eswitch_set_legacy(struct nfp_abm *abm) +{ + nfp_abm_kill_reprs_all(abm); + nfp_abm_ctrl_qm_disable(abm); + + abm->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY; + return 0; +} + +static void nfp_abm_eswitch_clean_up(struct nfp_abm *abm) +{ + if (abm->eswitch_mode != DEVLINK_ESWITCH_MODE_LEGACY) + WARN_ON(nfp_abm_eswitch_set_legacy(abm)); +} + +static int nfp_abm_eswitch_set_switchdev(struct nfp_abm *abm) +{ + struct nfp_app *app = abm->app; + struct nfp_pf *pf = app->pf; + struct nfp_net *nn; + int err; + + if (!abm->red_support) + return -EOPNOTSUPP; + + err = nfp_abm_ctrl_qm_enable(abm); + if (err) + return err; + + list_for_each_entry(nn, &pf->vnics, vnic_list) { + struct nfp_abm_link *alink = nn->app_priv; + + err = nfp_abm_spawn_repr(app, alink, NFP_PORT_PHYS_PORT); + if (err) + goto err_kill_all_reprs; + + err = nfp_abm_spawn_repr(app, alink, NFP_PORT_PF_PORT); + if (err) + goto err_kill_all_reprs; + } + + abm->eswitch_mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; + return 0; + +err_kill_all_reprs: + nfp_abm_kill_reprs_all(abm); + nfp_abm_ctrl_qm_disable(abm); + return err; +} + +static int nfp_abm_eswitch_mode_set(struct nfp_app *app, u16 mode) +{ + struct nfp_abm *abm = app->priv; + + if (abm->eswitch_mode == mode) + return 0; + + switch (mode) { + case DEVLINK_ESWITCH_MODE_LEGACY: + return nfp_abm_eswitch_set_legacy(abm); + case DEVLINK_ESWITCH_MODE_SWITCHDEV: + return nfp_abm_eswitch_set_switchdev(abm); + default: + return -EINVAL; + } +} + +static void +nfp_abm_vnic_set_mac(struct nfp_pf *pf, struct nfp_abm *abm, struct nfp_net *nn, + unsigned int id) +{ + struct nfp_eth_table_port *eth_port = &pf->eth_tbl->ports[id]; + u8 mac_addr[ETH_ALEN]; + struct nfp_nsp *nsp; + char hwinfo[32]; + int err; + + if (id > pf->eth_tbl->count) { + nfp_warn(pf->cpp, "No entry for persistent MAC address\n"); + eth_hw_addr_random(nn->dp.netdev); + return; + } + + snprintf(hwinfo, sizeof(hwinfo), "eth%u.mac.pf%u", + eth_port->eth_index, abm->pf_id); + + nsp = nfp_nsp_open(pf->cpp); + if (IS_ERR(nsp)) { + nfp_warn(pf->cpp, "Failed to access the NSP for persistent MAC address: %ld\n", + PTR_ERR(nsp)); + eth_hw_addr_random(nn->dp.netdev); + return; + } + + if (!nfp_nsp_has_hwinfo_lookup(nsp)) { + nfp_warn(pf->cpp, "NSP doesn't support PF MAC generation\n"); + eth_hw_addr_random(nn->dp.netdev); + nfp_nsp_close(nsp); + return; + } + + err = nfp_nsp_hwinfo_lookup(nsp, hwinfo, sizeof(hwinfo)); + nfp_nsp_close(nsp); + if (err) { + nfp_warn(pf->cpp, "Reading persistent MAC address failed: %d\n", + err); + eth_hw_addr_random(nn->dp.netdev); + return; + } + + if (sscanf(hwinfo, "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx", + &mac_addr[0], &mac_addr[1], &mac_addr[2], + &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6) { + nfp_warn(pf->cpp, "Can't parse persistent MAC address (%s)\n", + hwinfo); + eth_hw_addr_random(nn->dp.netdev); + return; + } + + eth_hw_addr_set(nn->dp.netdev, mac_addr); + ether_addr_copy(nn->dp.netdev->perm_addr, mac_addr); +} + +static int +nfp_abm_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id) +{ + struct nfp_eth_table_port *eth_port = &app->pf->eth_tbl->ports[id]; + struct nfp_abm *abm = app->priv; + struct nfp_abm_link *alink; + int err; + + alink = kzalloc(sizeof(*alink), GFP_KERNEL); + if (!alink) + return -ENOMEM; + nn->app_priv = alink; + alink->abm = abm; + alink->vnic = nn; + alink->id = id; + alink->total_queues = alink->vnic->max_rx_rings; + + INIT_LIST_HEAD(&alink->dscp_map); + + err = nfp_abm_ctrl_read_params(alink); + if (err) + goto err_free_alink; + + alink->prio_map = kzalloc(abm->prio_map_len, GFP_KERNEL); + if (!alink->prio_map) { + err = -ENOMEM; + goto err_free_alink; + } + + /* This is a multi-host app, make sure MAC/PHY is up, but don't + * make the MAC/PHY state follow the state of any of the ports. + */ + err = nfp_eth_set_configured(app->cpp, eth_port->index, true); + if (err < 0) + goto err_free_priomap; + + netif_keep_dst(nn->dp.netdev); + + nfp_abm_vnic_set_mac(app->pf, abm, nn, id); + INIT_RADIX_TREE(&alink->qdiscs, GFP_KERNEL); + + return 0; + +err_free_priomap: + kfree(alink->prio_map); +err_free_alink: + kfree(alink); + return err; +} + +static void nfp_abm_vnic_free(struct nfp_app *app, struct nfp_net *nn) +{ + struct nfp_abm_link *alink = nn->app_priv; + + nfp_abm_kill_reprs(alink->abm, alink); + WARN(!radix_tree_empty(&alink->qdiscs), "left over qdiscs\n"); + kfree(alink->prio_map); + kfree(alink); +} + +static int nfp_abm_vnic_init(struct nfp_app *app, struct nfp_net *nn) +{ + struct nfp_abm_link *alink = nn->app_priv; + + if (nfp_abm_has_prio(alink->abm)) + return nfp_abm_ctrl_prio_map_update(alink, alink->prio_map); + return 0; +} + +static u64 * +nfp_abm_port_get_stats(struct nfp_app *app, struct nfp_port *port, u64 *data) +{ + struct nfp_repr *repr = netdev_priv(port->netdev); + struct nfp_abm_link *alink; + unsigned int i; + + if (port->type != NFP_PORT_PF_PORT) + return data; + alink = repr->app_priv; + for (i = 0; i < alink->vnic->dp.num_r_vecs; i++) { + *data++ = nfp_abm_ctrl_stat_non_sto(alink, i); + *data++ = nfp_abm_ctrl_stat_sto(alink, i); + } + return data; +} + +static int +nfp_abm_port_get_stats_count(struct nfp_app *app, struct nfp_port *port) +{ + struct nfp_repr *repr = netdev_priv(port->netdev); + struct nfp_abm_link *alink; + + if (port->type != NFP_PORT_PF_PORT) + return 0; + alink = repr->app_priv; + return alink->vnic->dp.num_r_vecs * 2; +} + +static u8 * +nfp_abm_port_get_stats_strings(struct nfp_app *app, struct nfp_port *port, + u8 *data) +{ + struct nfp_repr *repr = netdev_priv(port->netdev); + struct nfp_abm_link *alink; + unsigned int i; + + if (port->type != NFP_PORT_PF_PORT) + return data; + alink = repr->app_priv; + for (i = 0; i < alink->vnic->dp.num_r_vecs; i++) { + ethtool_sprintf(&data, "q%u_no_wait", i); + ethtool_sprintf(&data, "q%u_delayed", i); + } + return data; +} + +static int nfp_abm_fw_init_reset(struct nfp_abm *abm) +{ + unsigned int i; + + if (!abm->red_support) + return 0; + + for (i = 0; i < abm->num_bands * NFP_NET_MAX_RX_RINGS; i++) { + __nfp_abm_ctrl_set_q_lvl(abm, i, NFP_ABM_LVL_INFINITY); + __nfp_abm_ctrl_set_q_act(abm, i, NFP_ABM_ACT_DROP); + } + + return nfp_abm_ctrl_qm_disable(abm); +} + +static int nfp_abm_init(struct nfp_app *app) +{ + struct nfp_pf *pf = app->pf; + struct nfp_reprs *reprs; + struct nfp_abm *abm; + int err; + + if (!pf->eth_tbl) { + nfp_err(pf->cpp, "ABM NIC requires ETH table\n"); + return -EINVAL; + } + if (pf->max_data_vnics != pf->eth_tbl->count) { + nfp_err(pf->cpp, "ETH entries don't match vNICs (%d vs %d)\n", + pf->max_data_vnics, pf->eth_tbl->count); + return -EINVAL; + } + if (!pf->mac_stats_bar) { + nfp_warn(app->cpp, "ABM NIC requires mac_stats symbol\n"); + return -EINVAL; + } + + abm = kzalloc(sizeof(*abm), GFP_KERNEL); + if (!abm) + return -ENOMEM; + app->priv = abm; + abm->app = app; + + err = nfp_abm_ctrl_find_addrs(abm); + if (err) + goto err_free_abm; + + err = -ENOMEM; + abm->num_thresholds = array_size(abm->num_bands, NFP_NET_MAX_RX_RINGS); + abm->threshold_undef = bitmap_zalloc(abm->num_thresholds, GFP_KERNEL); + if (!abm->threshold_undef) + goto err_free_abm; + + abm->thresholds = kvcalloc(abm->num_thresholds, + sizeof(*abm->thresholds), GFP_KERNEL); + if (!abm->thresholds) + goto err_free_thresh_umap; + + abm->actions = kvcalloc(abm->num_thresholds, sizeof(*abm->actions), + GFP_KERNEL); + if (!abm->actions) + goto err_free_thresh; + + /* We start in legacy mode, make sure advanced queuing is disabled */ + err = nfp_abm_fw_init_reset(abm); + if (err) + goto err_free_act; + + err = -ENOMEM; + reprs = nfp_reprs_alloc(pf->max_data_vnics); + if (!reprs) + goto err_free_act; + RCU_INIT_POINTER(app->reprs[NFP_REPR_TYPE_PHYS_PORT], reprs); + + reprs = nfp_reprs_alloc(pf->max_data_vnics); + if (!reprs) + goto err_free_phys; + RCU_INIT_POINTER(app->reprs[NFP_REPR_TYPE_PF], reprs); + + return 0; + +err_free_phys: + nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT); +err_free_act: + kvfree(abm->actions); +err_free_thresh: + kvfree(abm->thresholds); +err_free_thresh_umap: + bitmap_free(abm->threshold_undef); +err_free_abm: + kfree(abm); + app->priv = NULL; + return err; +} + +static void nfp_abm_clean(struct nfp_app *app) +{ + struct nfp_abm *abm = app->priv; + + nfp_abm_eswitch_clean_up(abm); + nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PF); + nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT); + bitmap_free(abm->threshold_undef); + kvfree(abm->actions); + kvfree(abm->thresholds); + kfree(abm); + app->priv = NULL; +} + +const struct nfp_app_type app_abm = { + .id = NFP_APP_ACTIVE_BUFFER_MGMT_NIC, + .name = "abm", + + .init = nfp_abm_init, + .clean = nfp_abm_clean, + + .vnic_alloc = nfp_abm_vnic_alloc, + .vnic_free = nfp_abm_vnic_free, + .vnic_init = nfp_abm_vnic_init, + + .port_get_stats = nfp_abm_port_get_stats, + .port_get_stats_count = nfp_abm_port_get_stats_count, + .port_get_stats_strings = nfp_abm_port_get_stats_strings, + + .setup_tc = nfp_abm_setup_tc, + + .eswitch_mode_get = nfp_abm_eswitch_mode_get, + .eswitch_mode_set = nfp_abm_eswitch_mode_set, + + .dev_get = nfp_abm_repr_get, +}; diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h new file mode 100644 index 000000000..48746c9c6 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/abm/main.h @@ -0,0 +1,273 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2018 Netronome Systems, Inc. */ + +#ifndef __NFP_ABM_H__ +#define __NFP_ABM_H__ 1 + +#include <linux/bits.h> +#include <linux/list.h> +#include <linux/radix-tree.h> +#include <net/devlink.h> +#include <net/pkt_cls.h> +#include <net/pkt_sched.h> + +/* Dump of 64 PRIOs and 256 REDs seems to take 850us on Xeon v4 @ 2.20GHz; + * 2.5ms / 400Hz seems more than sufficient for stats resolution. + */ +#define NFP_ABM_STATS_REFRESH_IVAL (2500 * 1000) /* ns */ + +#define NFP_ABM_LVL_INFINITY S32_MAX + +struct nfp_app; +struct nfp_net; + +#define NFP_ABM_PORTID_TYPE GENMASK(23, 16) +#define NFP_ABM_PORTID_ID GENMASK(7, 0) + +/* The possible actions if thresholds are exceeded */ +enum nfp_abm_q_action { + /* mark if ECN capable, otherwise drop */ + NFP_ABM_ACT_MARK_DROP = 0, + /* mark if ECN capable, otherwise goto QM */ + NFP_ABM_ACT_MARK_QUEUE = 1, + NFP_ABM_ACT_DROP = 2, + NFP_ABM_ACT_QUEUE = 3, + NFP_ABM_ACT_NOQUEUE = 4, +}; + +/** + * struct nfp_abm - ABM NIC app structure + * @app: back pointer to nfp_app + * @pf_id: ID of our PF link + * + * @red_support: is RED offload supported + * @num_prios: number of supported DSCP priorities + * @num_bands: number of supported DSCP priority bands + * @action_mask: bitmask of supported actions + * + * @thresholds: current threshold configuration + * @threshold_undef: bitmap of thresholds which have not been set + * @actions: current FW action configuration + * @num_thresholds: number of @thresholds and bits in @threshold_undef + * + * @prio_map_len: computed length of FW priority map (in bytes) + * @dscp_mask: mask FW will apply on DSCP field + * + * @eswitch_mode: devlink eswitch mode, advanced functions only visible + * in switchdev mode + * + * @q_lvls: queue level control area + * @qm_stats: queue statistics symbol + * @q_stats: basic queue statistics (only in per-band case) + */ +struct nfp_abm { + struct nfp_app *app; + unsigned int pf_id; + + unsigned int red_support; + unsigned int num_prios; + unsigned int num_bands; + unsigned int action_mask; + + u32 *thresholds; + unsigned long *threshold_undef; + u8 *actions; + size_t num_thresholds; + + unsigned int prio_map_len; + u8 dscp_mask; + + enum devlink_eswitch_mode eswitch_mode; + + const struct nfp_rtsym *q_lvls; + const struct nfp_rtsym *qm_stats; + const struct nfp_rtsym *q_stats; +}; + +/** + * struct nfp_alink_stats - ABM NIC statistics + * @tx_pkts: number of TXed packets + * @tx_bytes: number of TXed bytes + * @backlog_pkts: momentary backlog length (packets) + * @backlog_bytes: momentary backlog length (bytes) + * @overlimits: number of ECN marked TXed packets (accumulative) + * @drops: number of tail-dropped packets (accumulative) + */ +struct nfp_alink_stats { + u64 tx_pkts; + u64 tx_bytes; + u64 backlog_pkts; + u64 backlog_bytes; + u64 overlimits; + u64 drops; +}; + +/** + * struct nfp_alink_xstats - extended ABM NIC statistics + * @ecn_marked: number of ECN marked TXed packets + * @pdrop: number of hard drops due to queue limit + */ +struct nfp_alink_xstats { + u64 ecn_marked; + u64 pdrop; +}; + +enum nfp_qdisc_type { + NFP_QDISC_NONE = 0, + NFP_QDISC_MQ, + NFP_QDISC_RED, + NFP_QDISC_GRED, +}; + +#define NFP_QDISC_UNTRACKED ((struct nfp_qdisc *)1UL) + +/** + * struct nfp_qdisc - tracked TC Qdisc + * @netdev: netdev on which Qdisc was created + * @type: Qdisc type + * @handle: handle of this Qdisc + * @parent_handle: handle of the parent (unreliable if Qdisc was grafted) + * @use_cnt: number of attachment points in the hierarchy + * @num_children: current size of the @children array + * @children: pointers to children + * + * @params_ok: parameters of this Qdisc are OK for offload + * @offload_mark: offload refresh state - selected for offload + * @offloaded: Qdisc is currently offloaded to the HW + * + * @mq: MQ Qdisc specific parameters and state + * @mq.stats: current stats of the MQ Qdisc + * @mq.prev_stats: previously reported @mq.stats + * + * @red: RED Qdisc specific parameters and state + * @red.num_bands: Number of valid entries in the @red.band table + * @red.band: Per-band array of RED instances + * @red.band.ecn: ECN marking is enabled (rather than drop) + * @red.band.threshold: ECN marking threshold + * @red.band.stats: current stats of the RED Qdisc + * @red.band.prev_stats: previously reported @red.stats + * @red.band.xstats: extended stats for RED - current + * @red.band.prev_xstats: extended stats for RED - previously reported + */ +struct nfp_qdisc { + struct net_device *netdev; + enum nfp_qdisc_type type; + u32 handle; + u32 parent_handle; + unsigned int use_cnt; + unsigned int num_children; + struct nfp_qdisc **children; + + bool params_ok; + bool offload_mark; + bool offloaded; + + union { + /* NFP_QDISC_MQ */ + struct { + struct nfp_alink_stats stats; + struct nfp_alink_stats prev_stats; + } mq; + /* TC_SETUP_QDISC_RED, TC_SETUP_QDISC_GRED */ + struct { + unsigned int num_bands; + + struct { + bool ecn; + u32 threshold; + struct nfp_alink_stats stats; + struct nfp_alink_stats prev_stats; + struct nfp_alink_xstats xstats; + struct nfp_alink_xstats prev_xstats; + } band[MAX_DPs]; + } red; + }; +}; + +/** + * struct nfp_abm_link - port tuple of a ABM NIC + * @abm: back pointer to nfp_abm + * @vnic: data vNIC + * @id: id of the data vNIC + * @queue_base: id of base to host queue within PCIe (not QC idx) + * @total_queues: number of PF queues + * + * @last_stats_update: ktime of last stats update + * + * @prio_map: current map of priorities + * @has_prio: @prio_map is valid + * + * @def_band: default band to use + * @dscp_map: list of DSCP to band mappings + * + * @root_qdisc: pointer to the current root of the Qdisc hierarchy + * @qdiscs: all qdiscs recorded by major part of the handle + */ +struct nfp_abm_link { + struct nfp_abm *abm; + struct nfp_net *vnic; + unsigned int id; + unsigned int queue_base; + unsigned int total_queues; + + u64 last_stats_update; + + u32 *prio_map; + bool has_prio; + + u8 def_band; + struct list_head dscp_map; + + struct nfp_qdisc *root_qdisc; + struct radix_tree_root qdiscs; +}; + +static inline bool nfp_abm_has_prio(struct nfp_abm *abm) +{ + return abm->num_bands > 1; +} + +static inline bool nfp_abm_has_drop(struct nfp_abm *abm) +{ + return abm->action_mask & BIT(NFP_ABM_ACT_DROP); +} + +static inline bool nfp_abm_has_mark(struct nfp_abm *abm) +{ + return abm->action_mask & BIT(NFP_ABM_ACT_MARK_DROP); +} + +void nfp_abm_qdisc_offload_update(struct nfp_abm_link *alink); +int nfp_abm_setup_root(struct net_device *netdev, struct nfp_abm_link *alink, + struct tc_root_qopt_offload *opt); +int nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink, + struct tc_red_qopt_offload *opt); +int nfp_abm_setup_tc_mq(struct net_device *netdev, struct nfp_abm_link *alink, + struct tc_mq_qopt_offload *opt); +int nfp_abm_setup_tc_gred(struct net_device *netdev, struct nfp_abm_link *alink, + struct tc_gred_qopt_offload *opt); +int nfp_abm_setup_cls_block(struct net_device *netdev, struct nfp_repr *repr, + struct flow_block_offload *opt); + +int nfp_abm_ctrl_read_params(struct nfp_abm_link *alink); +int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm); +int __nfp_abm_ctrl_set_q_lvl(struct nfp_abm *abm, unsigned int id, u32 val); +int nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int band, + unsigned int queue, u32 val); +int __nfp_abm_ctrl_set_q_act(struct nfp_abm *abm, unsigned int id, + enum nfp_abm_q_action act); +int nfp_abm_ctrl_set_q_act(struct nfp_abm_link *alink, unsigned int band, + unsigned int queue, enum nfp_abm_q_action act); +int nfp_abm_ctrl_read_q_stats(struct nfp_abm_link *alink, + unsigned int band, unsigned int queue, + struct nfp_alink_stats *stats); +int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink, + unsigned int band, unsigned int queue, + struct nfp_alink_xstats *xstats); +u64 nfp_abm_ctrl_stat_non_sto(struct nfp_abm_link *alink, unsigned int i); +u64 nfp_abm_ctrl_stat_sto(struct nfp_abm_link *alink, unsigned int i); +int nfp_abm_ctrl_qm_enable(struct nfp_abm *abm); +int nfp_abm_ctrl_qm_disable(struct nfp_abm *abm); +void nfp_abm_prio_map_update(struct nfp_abm *abm); +int nfp_abm_ctrl_prio_map_update(struct nfp_abm_link *alink, u32 *packed); +#endif diff --git a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c new file mode 100644 index 000000000..2a5cc6422 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c @@ -0,0 +1,850 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2018 Netronome Systems, Inc. */ + +#include <linux/rtnetlink.h> +#include <net/pkt_cls.h> +#include <net/pkt_sched.h> +#include <net/red.h> + +#include "../nfpcore/nfp_cpp.h" +#include "../nfp_app.h" +#include "../nfp_main.h" +#include "../nfp_net.h" +#include "../nfp_port.h" +#include "main.h" + +static bool nfp_abm_qdisc_is_red(struct nfp_qdisc *qdisc) +{ + return qdisc->type == NFP_QDISC_RED || qdisc->type == NFP_QDISC_GRED; +} + +static bool nfp_abm_qdisc_child_valid(struct nfp_qdisc *qdisc, unsigned int id) +{ + return qdisc->children[id] && + qdisc->children[id] != NFP_QDISC_UNTRACKED; +} + +static void *nfp_abm_qdisc_tree_deref_slot(void __rcu **slot) +{ + return rtnl_dereference(*slot); +} + +static void +nfp_abm_stats_propagate(struct nfp_alink_stats *parent, + struct nfp_alink_stats *child) +{ + parent->tx_pkts += child->tx_pkts; + parent->tx_bytes += child->tx_bytes; + parent->backlog_pkts += child->backlog_pkts; + parent->backlog_bytes += child->backlog_bytes; + parent->overlimits += child->overlimits; + parent->drops += child->drops; +} + +static void +nfp_abm_stats_update_red(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc, + unsigned int queue) +{ + struct nfp_cpp *cpp = alink->abm->app->cpp; + unsigned int i; + int err; + + if (!qdisc->offloaded) + return; + + for (i = 0; i < qdisc->red.num_bands; i++) { + err = nfp_abm_ctrl_read_q_stats(alink, i, queue, + &qdisc->red.band[i].stats); + if (err) + nfp_err(cpp, "RED stats (%d, %d) read failed with error %d\n", + i, queue, err); + + err = nfp_abm_ctrl_read_q_xstats(alink, i, queue, + &qdisc->red.band[i].xstats); + if (err) + nfp_err(cpp, "RED xstats (%d, %d) read failed with error %d\n", + i, queue, err); + } +} + +static void +nfp_abm_stats_update_mq(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc) +{ + unsigned int i; + + if (qdisc->type != NFP_QDISC_MQ) + return; + + for (i = 0; i < alink->total_queues; i++) + if (nfp_abm_qdisc_child_valid(qdisc, i)) + nfp_abm_stats_update_red(alink, qdisc->children[i], i); +} + +static void __nfp_abm_stats_update(struct nfp_abm_link *alink, u64 time_now) +{ + alink->last_stats_update = time_now; + if (alink->root_qdisc) + nfp_abm_stats_update_mq(alink, alink->root_qdisc); +} + +static void nfp_abm_stats_update(struct nfp_abm_link *alink) +{ + u64 now; + + /* Limit the frequency of updates - stats of non-leaf qdiscs are a sum + * of all their leafs, so we would read the same stat multiple times + * for every dump. + */ + now = ktime_get(); + if (now - alink->last_stats_update < NFP_ABM_STATS_REFRESH_IVAL) + return; + + __nfp_abm_stats_update(alink, now); +} + +static void +nfp_abm_qdisc_unlink_children(struct nfp_qdisc *qdisc, + unsigned int start, unsigned int end) +{ + unsigned int i; + + for (i = start; i < end; i++) + if (nfp_abm_qdisc_child_valid(qdisc, i)) { + qdisc->children[i]->use_cnt--; + qdisc->children[i] = NULL; + } +} + +static void +nfp_abm_qdisc_offload_stop(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc) +{ + unsigned int i; + + /* Don't complain when qdisc is getting unlinked */ + if (qdisc->use_cnt) + nfp_warn(alink->abm->app->cpp, "Offload of '%08x' stopped\n", + qdisc->handle); + + if (!nfp_abm_qdisc_is_red(qdisc)) + return; + + for (i = 0; i < qdisc->red.num_bands; i++) { + qdisc->red.band[i].stats.backlog_pkts = 0; + qdisc->red.band[i].stats.backlog_bytes = 0; + } +} + +static int +__nfp_abm_stats_init(struct nfp_abm_link *alink, unsigned int band, + unsigned int queue, struct nfp_alink_stats *prev_stats, + struct nfp_alink_xstats *prev_xstats) +{ + u64 backlog_pkts, backlog_bytes; + int err; + + /* Don't touch the backlog, backlog can only be reset after it has + * been reported back to the tc qdisc stats. + */ + backlog_pkts = prev_stats->backlog_pkts; + backlog_bytes = prev_stats->backlog_bytes; + + err = nfp_abm_ctrl_read_q_stats(alink, band, queue, prev_stats); + if (err) { + nfp_err(alink->abm->app->cpp, + "RED stats init (%d, %d) failed with error %d\n", + band, queue, err); + return err; + } + + err = nfp_abm_ctrl_read_q_xstats(alink, band, queue, prev_xstats); + if (err) { + nfp_err(alink->abm->app->cpp, + "RED xstats init (%d, %d) failed with error %d\n", + band, queue, err); + return err; + } + + prev_stats->backlog_pkts = backlog_pkts; + prev_stats->backlog_bytes = backlog_bytes; + return 0; +} + +static int +nfp_abm_stats_init(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc, + unsigned int queue) +{ + unsigned int i; + int err; + + for (i = 0; i < qdisc->red.num_bands; i++) { + err = __nfp_abm_stats_init(alink, i, queue, + &qdisc->red.band[i].prev_stats, + &qdisc->red.band[i].prev_xstats); + if (err) + return err; + } + + return 0; +} + +static void +nfp_abm_offload_compile_red(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc, + unsigned int queue) +{ + bool good_red, good_gred; + unsigned int i; + + good_red = qdisc->type == NFP_QDISC_RED && + qdisc->params_ok && + qdisc->use_cnt == 1 && + !alink->has_prio && + !qdisc->children[0]; + good_gred = qdisc->type == NFP_QDISC_GRED && + qdisc->params_ok && + qdisc->use_cnt == 1; + qdisc->offload_mark = good_red || good_gred; + + /* If we are starting offload init prev_stats */ + if (qdisc->offload_mark && !qdisc->offloaded) + if (nfp_abm_stats_init(alink, qdisc, queue)) + qdisc->offload_mark = false; + + if (!qdisc->offload_mark) + return; + + for (i = 0; i < alink->abm->num_bands; i++) { + enum nfp_abm_q_action act; + + nfp_abm_ctrl_set_q_lvl(alink, i, queue, + qdisc->red.band[i].threshold); + act = qdisc->red.band[i].ecn ? + NFP_ABM_ACT_MARK_DROP : NFP_ABM_ACT_DROP; + nfp_abm_ctrl_set_q_act(alink, i, queue, act); + } +} + +static void +nfp_abm_offload_compile_mq(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc) +{ + unsigned int i; + + qdisc->offload_mark = qdisc->type == NFP_QDISC_MQ; + if (!qdisc->offload_mark) + return; + + for (i = 0; i < alink->total_queues; i++) { + struct nfp_qdisc *child = qdisc->children[i]; + + if (!nfp_abm_qdisc_child_valid(qdisc, i)) + continue; + + nfp_abm_offload_compile_red(alink, child, i); + } +} + +void nfp_abm_qdisc_offload_update(struct nfp_abm_link *alink) +{ + struct nfp_abm *abm = alink->abm; + struct radix_tree_iter iter; + struct nfp_qdisc *qdisc; + void __rcu **slot; + size_t i; + + /* Mark all thresholds as unconfigured */ + for (i = 0; i < abm->num_bands; i++) + __bitmap_set(abm->threshold_undef, + i * NFP_NET_MAX_RX_RINGS + alink->queue_base, + alink->total_queues); + + /* Clear offload marks */ + radix_tree_for_each_slot(slot, &alink->qdiscs, &iter, 0) { + qdisc = nfp_abm_qdisc_tree_deref_slot(slot); + qdisc->offload_mark = false; + } + + if (alink->root_qdisc) + nfp_abm_offload_compile_mq(alink, alink->root_qdisc); + + /* Refresh offload status */ + radix_tree_for_each_slot(slot, &alink->qdiscs, &iter, 0) { + qdisc = nfp_abm_qdisc_tree_deref_slot(slot); + if (!qdisc->offload_mark && qdisc->offloaded) + nfp_abm_qdisc_offload_stop(alink, qdisc); + qdisc->offloaded = qdisc->offload_mark; + } + + /* Reset the unconfigured thresholds */ + for (i = 0; i < abm->num_thresholds; i++) + if (test_bit(i, abm->threshold_undef)) + __nfp_abm_ctrl_set_q_lvl(abm, i, NFP_ABM_LVL_INFINITY); + + __nfp_abm_stats_update(alink, ktime_get()); +} + +static void +nfp_abm_qdisc_clear_mq(struct net_device *netdev, struct nfp_abm_link *alink, + struct nfp_qdisc *qdisc) +{ + struct radix_tree_iter iter; + unsigned int mq_refs = 0; + void __rcu **slot; + + if (!qdisc->use_cnt) + return; + /* MQ doesn't notify well on destruction, we need special handling of + * MQ's children. + */ + if (qdisc->type == NFP_QDISC_MQ && + qdisc == alink->root_qdisc && + netdev->reg_state == NETREG_UNREGISTERING) + return; + + /* Count refs held by MQ instances and clear pointers */ + radix_tree_for_each_slot(slot, &alink->qdiscs, &iter, 0) { + struct nfp_qdisc *mq = nfp_abm_qdisc_tree_deref_slot(slot); + unsigned int i; + + if (mq->type != NFP_QDISC_MQ || mq->netdev != netdev) + continue; + for (i = 0; i < mq->num_children; i++) + if (mq->children[i] == qdisc) { + mq->children[i] = NULL; + mq_refs++; + } + } + + WARN(qdisc->use_cnt != mq_refs, "non-zero qdisc use count: %d (- %d)\n", + qdisc->use_cnt, mq_refs); +} + +static void +nfp_abm_qdisc_free(struct net_device *netdev, struct nfp_abm_link *alink, + struct nfp_qdisc *qdisc) +{ + struct nfp_port *port = nfp_port_from_netdev(netdev); + + if (!qdisc) + return; + nfp_abm_qdisc_clear_mq(netdev, alink, qdisc); + WARN_ON(radix_tree_delete(&alink->qdiscs, + TC_H_MAJ(qdisc->handle)) != qdisc); + + kfree(qdisc->children); + kfree(qdisc); + + port->tc_offload_cnt--; +} + +static struct nfp_qdisc * +nfp_abm_qdisc_alloc(struct net_device *netdev, struct nfp_abm_link *alink, + enum nfp_qdisc_type type, u32 parent_handle, u32 handle, + unsigned int children) +{ + struct nfp_port *port = nfp_port_from_netdev(netdev); + struct nfp_qdisc *qdisc; + int err; + + qdisc = kzalloc(sizeof(*qdisc), GFP_KERNEL); + if (!qdisc) + return NULL; + + if (children) { + qdisc->children = kcalloc(children, sizeof(void *), GFP_KERNEL); + if (!qdisc->children) + goto err_free_qdisc; + } + + qdisc->netdev = netdev; + qdisc->type = type; + qdisc->parent_handle = parent_handle; + qdisc->handle = handle; + qdisc->num_children = children; + + err = radix_tree_insert(&alink->qdiscs, TC_H_MAJ(qdisc->handle), qdisc); + if (err) { + nfp_err(alink->abm->app->cpp, + "Qdisc insertion into radix tree failed: %d\n", err); + goto err_free_child_tbl; + } + + port->tc_offload_cnt++; + return qdisc; + +err_free_child_tbl: + kfree(qdisc->children); +err_free_qdisc: + kfree(qdisc); + return NULL; +} + +static struct nfp_qdisc * +nfp_abm_qdisc_find(struct nfp_abm_link *alink, u32 handle) +{ + return radix_tree_lookup(&alink->qdiscs, TC_H_MAJ(handle)); +} + +static int +nfp_abm_qdisc_replace(struct net_device *netdev, struct nfp_abm_link *alink, + enum nfp_qdisc_type type, u32 parent_handle, u32 handle, + unsigned int children, struct nfp_qdisc **qdisc) +{ + *qdisc = nfp_abm_qdisc_find(alink, handle); + if (*qdisc) { + if (WARN_ON((*qdisc)->type != type)) + return -EINVAL; + return 1; + } + + *qdisc = nfp_abm_qdisc_alloc(netdev, alink, type, parent_handle, handle, + children); + return *qdisc ? 0 : -ENOMEM; +} + +static void +nfp_abm_qdisc_destroy(struct net_device *netdev, struct nfp_abm_link *alink, + u32 handle) +{ + struct nfp_qdisc *qdisc; + + qdisc = nfp_abm_qdisc_find(alink, handle); + if (!qdisc) + return; + + /* We don't get TC_SETUP_ROOT_QDISC w/ MQ when netdev is unregistered */ + if (alink->root_qdisc == qdisc) + qdisc->use_cnt--; + + nfp_abm_qdisc_unlink_children(qdisc, 0, qdisc->num_children); + nfp_abm_qdisc_free(netdev, alink, qdisc); + + if (alink->root_qdisc == qdisc) { + alink->root_qdisc = NULL; + /* Only root change matters, other changes are acted upon on + * the graft notification. + */ + nfp_abm_qdisc_offload_update(alink); + } +} + +static int +nfp_abm_qdisc_graft(struct nfp_abm_link *alink, u32 handle, u32 child_handle, + unsigned int id) +{ + struct nfp_qdisc *parent, *child; + + parent = nfp_abm_qdisc_find(alink, handle); + if (!parent) + return 0; + + if (WARN(id >= parent->num_children, + "graft child out of bound %d >= %d\n", + id, parent->num_children)) + return -EINVAL; + + nfp_abm_qdisc_unlink_children(parent, id, id + 1); + + child = nfp_abm_qdisc_find(alink, child_handle); + if (child) + child->use_cnt++; + else + child = NFP_QDISC_UNTRACKED; + parent->children[id] = child; + + nfp_abm_qdisc_offload_update(alink); + + return 0; +} + +static void +nfp_abm_stats_calculate(struct nfp_alink_stats *new, + struct nfp_alink_stats *old, + struct gnet_stats_basic_sync *bstats, + struct gnet_stats_queue *qstats) +{ + _bstats_update(bstats, new->tx_bytes - old->tx_bytes, + new->tx_pkts - old->tx_pkts); + qstats->qlen += new->backlog_pkts - old->backlog_pkts; + qstats->backlog += new->backlog_bytes - old->backlog_bytes; + qstats->overlimits += new->overlimits - old->overlimits; + qstats->drops += new->drops - old->drops; +} + +static void +nfp_abm_stats_red_calculate(struct nfp_alink_xstats *new, + struct nfp_alink_xstats *old, + struct red_stats *stats) +{ + stats->forced_mark += new->ecn_marked - old->ecn_marked; + stats->pdrop += new->pdrop - old->pdrop; +} + +static int +nfp_abm_gred_stats(struct nfp_abm_link *alink, u32 handle, + struct tc_gred_qopt_offload_stats *stats) +{ + struct nfp_qdisc *qdisc; + unsigned int i; + + nfp_abm_stats_update(alink); + + qdisc = nfp_abm_qdisc_find(alink, handle); + if (!qdisc) + return -EOPNOTSUPP; + /* If the qdisc offload has stopped we may need to adjust the backlog + * counters back so carry on even if qdisc is not currently offloaded. + */ + + for (i = 0; i < qdisc->red.num_bands; i++) { + if (!stats->xstats[i]) + continue; + + nfp_abm_stats_calculate(&qdisc->red.band[i].stats, + &qdisc->red.band[i].prev_stats, + &stats->bstats[i], &stats->qstats[i]); + qdisc->red.band[i].prev_stats = qdisc->red.band[i].stats; + + nfp_abm_stats_red_calculate(&qdisc->red.band[i].xstats, + &qdisc->red.band[i].prev_xstats, + stats->xstats[i]); + qdisc->red.band[i].prev_xstats = qdisc->red.band[i].xstats; + } + + return qdisc->offloaded ? 0 : -EOPNOTSUPP; +} + +static bool +nfp_abm_gred_check_params(struct nfp_abm_link *alink, + struct tc_gred_qopt_offload *opt) +{ + struct nfp_cpp *cpp = alink->abm->app->cpp; + struct nfp_abm *abm = alink->abm; + unsigned int i; + + if (opt->set.grio_on || opt->set.wred_on) { + nfp_warn(cpp, "GRED offload failed - GRIO and WRED not supported (p:%08x h:%08x)\n", + opt->parent, opt->handle); + return false; + } + if (opt->set.dp_def != alink->def_band) { + nfp_warn(cpp, "GRED offload failed - default band must be %d (p:%08x h:%08x)\n", + alink->def_band, opt->parent, opt->handle); + return false; + } + if (opt->set.dp_cnt != abm->num_bands) { + nfp_warn(cpp, "GRED offload failed - band count must be %d (p:%08x h:%08x)\n", + abm->num_bands, opt->parent, opt->handle); + return false; + } + + for (i = 0; i < abm->num_bands; i++) { + struct tc_gred_vq_qopt_offload_params *band = &opt->set.tab[i]; + + if (!band->present) + return false; + if (!band->is_ecn && !nfp_abm_has_drop(abm)) { + nfp_warn(cpp, "GRED offload failed - drop is not supported (ECN option required) (p:%08x h:%08x vq:%d)\n", + opt->parent, opt->handle, i); + return false; + } + if (band->is_ecn && !nfp_abm_has_mark(abm)) { + nfp_warn(cpp, "GRED offload failed - ECN marking not supported (p:%08x h:%08x vq:%d)\n", + opt->parent, opt->handle, i); + return false; + } + if (band->is_harddrop) { + nfp_warn(cpp, "GRED offload failed - harddrop is not supported (p:%08x h:%08x vq:%d)\n", + opt->parent, opt->handle, i); + return false; + } + if (band->min != band->max) { + nfp_warn(cpp, "GRED offload failed - threshold mismatch (p:%08x h:%08x vq:%d)\n", + opt->parent, opt->handle, i); + return false; + } + if (band->min > S32_MAX) { + nfp_warn(cpp, "GRED offload failed - threshold too large %d > %d (p:%08x h:%08x vq:%d)\n", + band->min, S32_MAX, opt->parent, opt->handle, + i); + return false; + } + } + + return true; +} + +static int +nfp_abm_gred_replace(struct net_device *netdev, struct nfp_abm_link *alink, + struct tc_gred_qopt_offload *opt) +{ + struct nfp_qdisc *qdisc; + unsigned int i; + int ret; + + ret = nfp_abm_qdisc_replace(netdev, alink, NFP_QDISC_GRED, opt->parent, + opt->handle, 0, &qdisc); + if (ret < 0) + return ret; + + qdisc->params_ok = nfp_abm_gred_check_params(alink, opt); + if (qdisc->params_ok) { + qdisc->red.num_bands = opt->set.dp_cnt; + for (i = 0; i < qdisc->red.num_bands; i++) { + qdisc->red.band[i].ecn = opt->set.tab[i].is_ecn; + qdisc->red.band[i].threshold = opt->set.tab[i].min; + } + } + + if (qdisc->use_cnt) + nfp_abm_qdisc_offload_update(alink); + + return 0; +} + +int nfp_abm_setup_tc_gred(struct net_device *netdev, struct nfp_abm_link *alink, + struct tc_gred_qopt_offload *opt) +{ + switch (opt->command) { + case TC_GRED_REPLACE: + return nfp_abm_gred_replace(netdev, alink, opt); + case TC_GRED_DESTROY: + nfp_abm_qdisc_destroy(netdev, alink, opt->handle); + return 0; + case TC_GRED_STATS: + return nfp_abm_gred_stats(alink, opt->handle, &opt->stats); + default: + return -EOPNOTSUPP; + } +} + +static int +nfp_abm_red_xstats(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt) +{ + struct nfp_qdisc *qdisc; + + nfp_abm_stats_update(alink); + + qdisc = nfp_abm_qdisc_find(alink, opt->handle); + if (!qdisc || !qdisc->offloaded) + return -EOPNOTSUPP; + + nfp_abm_stats_red_calculate(&qdisc->red.band[0].xstats, + &qdisc->red.band[0].prev_xstats, + opt->xstats); + qdisc->red.band[0].prev_xstats = qdisc->red.band[0].xstats; + return 0; +} + +static int +nfp_abm_red_stats(struct nfp_abm_link *alink, u32 handle, + struct tc_qopt_offload_stats *stats) +{ + struct nfp_qdisc *qdisc; + + nfp_abm_stats_update(alink); + + qdisc = nfp_abm_qdisc_find(alink, handle); + if (!qdisc) + return -EOPNOTSUPP; + /* If the qdisc offload has stopped we may need to adjust the backlog + * counters back so carry on even if qdisc is not currently offloaded. + */ + + nfp_abm_stats_calculate(&qdisc->red.band[0].stats, + &qdisc->red.band[0].prev_stats, + stats->bstats, stats->qstats); + qdisc->red.band[0].prev_stats = qdisc->red.band[0].stats; + + return qdisc->offloaded ? 0 : -EOPNOTSUPP; +} + +static bool +nfp_abm_red_check_params(struct nfp_abm_link *alink, + struct tc_red_qopt_offload *opt) +{ + struct nfp_cpp *cpp = alink->abm->app->cpp; + struct nfp_abm *abm = alink->abm; + + if (!opt->set.is_ecn && !nfp_abm_has_drop(abm)) { + nfp_warn(cpp, "RED offload failed - drop is not supported (ECN option required) (p:%08x h:%08x)\n", + opt->parent, opt->handle); + return false; + } + if (opt->set.is_ecn && !nfp_abm_has_mark(abm)) { + nfp_warn(cpp, "RED offload failed - ECN marking not supported (p:%08x h:%08x)\n", + opt->parent, opt->handle); + return false; + } + if (opt->set.is_harddrop) { + nfp_warn(cpp, "RED offload failed - harddrop is not supported (p:%08x h:%08x)\n", + opt->parent, opt->handle); + return false; + } + if (opt->set.min != opt->set.max) { + nfp_warn(cpp, "RED offload failed - unsupported min/max parameters (p:%08x h:%08x)\n", + opt->parent, opt->handle); + return false; + } + if (opt->set.min > NFP_ABM_LVL_INFINITY) { + nfp_warn(cpp, "RED offload failed - threshold too large %d > %d (p:%08x h:%08x)\n", + opt->set.min, NFP_ABM_LVL_INFINITY, opt->parent, + opt->handle); + return false; + } + + return true; +} + +static int +nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink, + struct tc_red_qopt_offload *opt) +{ + struct nfp_qdisc *qdisc; + int ret; + + ret = nfp_abm_qdisc_replace(netdev, alink, NFP_QDISC_RED, opt->parent, + opt->handle, 1, &qdisc); + if (ret < 0) + return ret; + + /* If limit != 0 child gets reset */ + if (opt->set.limit) { + if (nfp_abm_qdisc_child_valid(qdisc, 0)) + qdisc->children[0]->use_cnt--; + qdisc->children[0] = NULL; + } else { + /* Qdisc was just allocated without a limit will use noop_qdisc, + * i.e. a block hole. + */ + if (!ret) + qdisc->children[0] = NFP_QDISC_UNTRACKED; + } + + qdisc->params_ok = nfp_abm_red_check_params(alink, opt); + if (qdisc->params_ok) { + qdisc->red.num_bands = 1; + qdisc->red.band[0].ecn = opt->set.is_ecn; + qdisc->red.band[0].threshold = opt->set.min; + } + + if (qdisc->use_cnt == 1) + nfp_abm_qdisc_offload_update(alink); + + return 0; +} + +int nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink, + struct tc_red_qopt_offload *opt) +{ + switch (opt->command) { + case TC_RED_REPLACE: + return nfp_abm_red_replace(netdev, alink, opt); + case TC_RED_DESTROY: + nfp_abm_qdisc_destroy(netdev, alink, opt->handle); + return 0; + case TC_RED_STATS: + return nfp_abm_red_stats(alink, opt->handle, &opt->stats); + case TC_RED_XSTATS: + return nfp_abm_red_xstats(alink, opt); + case TC_RED_GRAFT: + return nfp_abm_qdisc_graft(alink, opt->handle, + opt->child_handle, 0); + default: + return -EOPNOTSUPP; + } +} + +static int +nfp_abm_mq_create(struct net_device *netdev, struct nfp_abm_link *alink, + struct tc_mq_qopt_offload *opt) +{ + struct nfp_qdisc *qdisc; + int ret; + + ret = nfp_abm_qdisc_replace(netdev, alink, NFP_QDISC_MQ, + TC_H_ROOT, opt->handle, alink->total_queues, + &qdisc); + if (ret < 0) + return ret; + + qdisc->params_ok = true; + qdisc->offloaded = true; + nfp_abm_qdisc_offload_update(alink); + return 0; +} + +static int +nfp_abm_mq_stats(struct nfp_abm_link *alink, u32 handle, + struct tc_qopt_offload_stats *stats) +{ + struct nfp_qdisc *qdisc, *red; + unsigned int i, j; + + qdisc = nfp_abm_qdisc_find(alink, handle); + if (!qdisc) + return -EOPNOTSUPP; + + nfp_abm_stats_update(alink); + + /* MQ stats are summed over the children in the core, so we need + * to add up the unreported child values. + */ + memset(&qdisc->mq.stats, 0, sizeof(qdisc->mq.stats)); + memset(&qdisc->mq.prev_stats, 0, sizeof(qdisc->mq.prev_stats)); + + for (i = 0; i < qdisc->num_children; i++) { + if (!nfp_abm_qdisc_child_valid(qdisc, i)) + continue; + + if (!nfp_abm_qdisc_is_red(qdisc->children[i])) + continue; + red = qdisc->children[i]; + + for (j = 0; j < red->red.num_bands; j++) { + nfp_abm_stats_propagate(&qdisc->mq.stats, + &red->red.band[j].stats); + nfp_abm_stats_propagate(&qdisc->mq.prev_stats, + &red->red.band[j].prev_stats); + } + } + + nfp_abm_stats_calculate(&qdisc->mq.stats, &qdisc->mq.prev_stats, + stats->bstats, stats->qstats); + + return qdisc->offloaded ? 0 : -EOPNOTSUPP; +} + +int nfp_abm_setup_tc_mq(struct net_device *netdev, struct nfp_abm_link *alink, + struct tc_mq_qopt_offload *opt) +{ + switch (opt->command) { + case TC_MQ_CREATE: + return nfp_abm_mq_create(netdev, alink, opt); + case TC_MQ_DESTROY: + nfp_abm_qdisc_destroy(netdev, alink, opt->handle); + return 0; + case TC_MQ_STATS: + return nfp_abm_mq_stats(alink, opt->handle, &opt->stats); + case TC_MQ_GRAFT: + return nfp_abm_qdisc_graft(alink, opt->handle, + opt->graft_params.child_handle, + opt->graft_params.queue); + default: + return -EOPNOTSUPP; + } +} + +int nfp_abm_setup_root(struct net_device *netdev, struct nfp_abm_link *alink, + struct tc_root_qopt_offload *opt) +{ + if (opt->ingress) + return -EOPNOTSUPP; + if (alink->root_qdisc) + alink->root_qdisc->use_cnt--; + alink->root_qdisc = nfp_abm_qdisc_find(alink, opt->handle); + if (alink->root_qdisc) + alink->root_qdisc->use_cnt++; + + nfp_abm_qdisc_offload_update(alink); + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c new file mode 100644 index 000000000..2ec62c8d8 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c @@ -0,0 +1,479 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <linux/bpf.h> +#include <linux/bitops.h> +#include <linux/bug.h> +#include <linux/jiffies.h> +#include <linux/skbuff.h> +#include <linux/timekeeping.h> + +#include "../ccm.h" +#include "../nfp_app.h" +#include "../nfp_net.h" +#include "fw.h" +#include "main.h" + +static struct sk_buff * +nfp_bpf_cmsg_alloc(struct nfp_app_bpf *bpf, unsigned int size) +{ + struct sk_buff *skb; + + skb = nfp_app_ctrl_msg_alloc(bpf->app, size, GFP_KERNEL); + skb_put(skb, size); + + return skb; +} + +static unsigned int +nfp_bpf_cmsg_map_req_size(struct nfp_app_bpf *bpf, unsigned int n) +{ + unsigned int size; + + size = sizeof(struct cmsg_req_map_op); + size += (bpf->cmsg_key_sz + bpf->cmsg_val_sz) * n; + + return size; +} + +static struct sk_buff * +nfp_bpf_cmsg_map_req_alloc(struct nfp_app_bpf *bpf, unsigned int n) +{ + return nfp_bpf_cmsg_alloc(bpf, nfp_bpf_cmsg_map_req_size(bpf, n)); +} + +static unsigned int +nfp_bpf_cmsg_map_reply_size(struct nfp_app_bpf *bpf, unsigned int n) +{ + unsigned int size; + + size = sizeof(struct cmsg_reply_map_op); + size += (bpf->cmsg_key_sz + bpf->cmsg_val_sz) * n; + + return size; +} + +static int +nfp_bpf_ctrl_rc_to_errno(struct nfp_app_bpf *bpf, + struct cmsg_reply_map_simple *reply) +{ + static const int res_table[] = { + [CMSG_RC_SUCCESS] = 0, + [CMSG_RC_ERR_MAP_FD] = -EBADFD, + [CMSG_RC_ERR_MAP_NOENT] = -ENOENT, + [CMSG_RC_ERR_MAP_ERR] = -EINVAL, + [CMSG_RC_ERR_MAP_PARSE] = -EIO, + [CMSG_RC_ERR_MAP_EXIST] = -EEXIST, + [CMSG_RC_ERR_MAP_NOMEM] = -ENOMEM, + [CMSG_RC_ERR_MAP_E2BIG] = -E2BIG, + }; + u32 rc; + + rc = be32_to_cpu(reply->rc); + if (rc >= ARRAY_SIZE(res_table)) { + cmsg_warn(bpf, "FW responded with invalid status: %u\n", rc); + return -EIO; + } + + return res_table[rc]; +} + +long long int +nfp_bpf_ctrl_alloc_map(struct nfp_app_bpf *bpf, struct bpf_map *map) +{ + struct cmsg_reply_map_alloc_tbl *reply; + struct cmsg_req_map_alloc_tbl *req; + struct sk_buff *skb; + u32 tid; + int err; + + skb = nfp_bpf_cmsg_alloc(bpf, sizeof(*req)); + if (!skb) + return -ENOMEM; + + req = (void *)skb->data; + req->key_size = cpu_to_be32(map->key_size); + req->value_size = cpu_to_be32(map->value_size); + req->max_entries = cpu_to_be32(map->max_entries); + req->map_type = cpu_to_be32(map->map_type); + req->map_flags = 0; + + skb = nfp_ccm_communicate(&bpf->ccm, skb, NFP_CCM_TYPE_BPF_MAP_ALLOC, + sizeof(*reply)); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + reply = (void *)skb->data; + err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr); + if (err) + goto err_free; + + tid = be32_to_cpu(reply->tid); + dev_consume_skb_any(skb); + + return tid; +err_free: + dev_kfree_skb_any(skb); + return err; +} + +void nfp_bpf_ctrl_free_map(struct nfp_app_bpf *bpf, struct nfp_bpf_map *nfp_map) +{ + struct cmsg_reply_map_free_tbl *reply; + struct cmsg_req_map_free_tbl *req; + struct sk_buff *skb; + int err; + + skb = nfp_bpf_cmsg_alloc(bpf, sizeof(*req)); + if (!skb) { + cmsg_warn(bpf, "leaking map - failed to allocate msg\n"); + return; + } + + req = (void *)skb->data; + req->tid = cpu_to_be32(nfp_map->tid); + + skb = nfp_ccm_communicate(&bpf->ccm, skb, NFP_CCM_TYPE_BPF_MAP_FREE, + sizeof(*reply)); + if (IS_ERR(skb)) { + cmsg_warn(bpf, "leaking map - I/O error\n"); + return; + } + + reply = (void *)skb->data; + err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr); + if (err) + cmsg_warn(bpf, "leaking map - FW responded with: %d\n", err); + + dev_consume_skb_any(skb); +} + +static void * +nfp_bpf_ctrl_req_key(struct nfp_app_bpf *bpf, struct cmsg_req_map_op *req, + unsigned int n) +{ + return &req->data[bpf->cmsg_key_sz * n + bpf->cmsg_val_sz * n]; +} + +static void * +nfp_bpf_ctrl_req_val(struct nfp_app_bpf *bpf, struct cmsg_req_map_op *req, + unsigned int n) +{ + return &req->data[bpf->cmsg_key_sz * (n + 1) + bpf->cmsg_val_sz * n]; +} + +static void * +nfp_bpf_ctrl_reply_key(struct nfp_app_bpf *bpf, struct cmsg_reply_map_op *reply, + unsigned int n) +{ + return &reply->data[bpf->cmsg_key_sz * n + bpf->cmsg_val_sz * n]; +} + +static void * +nfp_bpf_ctrl_reply_val(struct nfp_app_bpf *bpf, struct cmsg_reply_map_op *reply, + unsigned int n) +{ + return &reply->data[bpf->cmsg_key_sz * (n + 1) + bpf->cmsg_val_sz * n]; +} + +static bool nfp_bpf_ctrl_op_cache_invalidate(enum nfp_ccm_type op) +{ + return op == NFP_CCM_TYPE_BPF_MAP_UPDATE || + op == NFP_CCM_TYPE_BPF_MAP_DELETE; +} + +static bool nfp_bpf_ctrl_op_cache_capable(enum nfp_ccm_type op) +{ + return op == NFP_CCM_TYPE_BPF_MAP_LOOKUP || + op == NFP_CCM_TYPE_BPF_MAP_GETNEXT; +} + +static bool nfp_bpf_ctrl_op_cache_fill(enum nfp_ccm_type op) +{ + return op == NFP_CCM_TYPE_BPF_MAP_GETFIRST || + op == NFP_CCM_TYPE_BPF_MAP_GETNEXT; +} + +static unsigned int +nfp_bpf_ctrl_op_cache_get(struct nfp_bpf_map *nfp_map, enum nfp_ccm_type op, + const u8 *key, u8 *out_key, u8 *out_value, + u32 *cache_gen) +{ + struct bpf_map *map = &nfp_map->offmap->map; + struct nfp_app_bpf *bpf = nfp_map->bpf; + unsigned int i, count, n_entries; + struct cmsg_reply_map_op *reply; + + n_entries = nfp_bpf_ctrl_op_cache_fill(op) ? bpf->cmsg_cache_cnt : 1; + + spin_lock(&nfp_map->cache_lock); + *cache_gen = nfp_map->cache_gen; + if (nfp_map->cache_blockers) + n_entries = 1; + + if (nfp_bpf_ctrl_op_cache_invalidate(op)) + goto exit_block; + if (!nfp_bpf_ctrl_op_cache_capable(op)) + goto exit_unlock; + + if (!nfp_map->cache) + goto exit_unlock; + if (nfp_map->cache_to < ktime_get_ns()) + goto exit_invalidate; + + reply = (void *)nfp_map->cache->data; + count = be32_to_cpu(reply->count); + + for (i = 0; i < count; i++) { + void *cached_key; + + cached_key = nfp_bpf_ctrl_reply_key(bpf, reply, i); + if (memcmp(cached_key, key, map->key_size)) + continue; + + if (op == NFP_CCM_TYPE_BPF_MAP_LOOKUP) + memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, i), + map->value_size); + if (op == NFP_CCM_TYPE_BPF_MAP_GETNEXT) { + if (i + 1 == count) + break; + + memcpy(out_key, + nfp_bpf_ctrl_reply_key(bpf, reply, i + 1), + map->key_size); + } + + n_entries = 0; + goto exit_unlock; + } + goto exit_unlock; + +exit_block: + nfp_map->cache_blockers++; +exit_invalidate: + dev_consume_skb_any(nfp_map->cache); + nfp_map->cache = NULL; +exit_unlock: + spin_unlock(&nfp_map->cache_lock); + return n_entries; +} + +static void +nfp_bpf_ctrl_op_cache_put(struct nfp_bpf_map *nfp_map, enum nfp_ccm_type op, + struct sk_buff *skb, u32 cache_gen) +{ + bool blocker, filler; + + blocker = nfp_bpf_ctrl_op_cache_invalidate(op); + filler = nfp_bpf_ctrl_op_cache_fill(op); + if (blocker || filler) { + u64 to = 0; + + if (filler) + to = ktime_get_ns() + NFP_BPF_MAP_CACHE_TIME_NS; + + spin_lock(&nfp_map->cache_lock); + if (blocker) { + nfp_map->cache_blockers--; + nfp_map->cache_gen++; + } + if (filler && !nfp_map->cache_blockers && + nfp_map->cache_gen == cache_gen) { + nfp_map->cache_to = to; + swap(nfp_map->cache, skb); + } + spin_unlock(&nfp_map->cache_lock); + } + + dev_consume_skb_any(skb); +} + +static int +nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op, + u8 *key, u8 *value, u64 flags, u8 *out_key, u8 *out_value) +{ + struct nfp_bpf_map *nfp_map = offmap->dev_priv; + unsigned int n_entries, reply_entries, count; + struct nfp_app_bpf *bpf = nfp_map->bpf; + struct bpf_map *map = &offmap->map; + struct cmsg_reply_map_op *reply; + struct cmsg_req_map_op *req; + struct sk_buff *skb; + u32 cache_gen; + int err; + + /* FW messages have no space for more than 32 bits of flags */ + if (flags >> 32) + return -EOPNOTSUPP; + + /* Handle op cache */ + n_entries = nfp_bpf_ctrl_op_cache_get(nfp_map, op, key, out_key, + out_value, &cache_gen); + if (!n_entries) + return 0; + + skb = nfp_bpf_cmsg_map_req_alloc(bpf, 1); + if (!skb) { + err = -ENOMEM; + goto err_cache_put; + } + + req = (void *)skb->data; + req->tid = cpu_to_be32(nfp_map->tid); + req->count = cpu_to_be32(n_entries); + req->flags = cpu_to_be32(flags); + + /* Copy inputs */ + if (key) + memcpy(nfp_bpf_ctrl_req_key(bpf, req, 0), key, map->key_size); + if (value) + memcpy(nfp_bpf_ctrl_req_val(bpf, req, 0), value, + map->value_size); + + skb = nfp_ccm_communicate(&bpf->ccm, skb, op, 0); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + goto err_cache_put; + } + + if (skb->len < sizeof(*reply)) { + cmsg_warn(bpf, "cmsg drop - type 0x%02x too short %d!\n", + op, skb->len); + err = -EIO; + goto err_free; + } + + reply = (void *)skb->data; + count = be32_to_cpu(reply->count); + err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr); + /* FW responds with message sized to hold the good entries, + * plus one extra entry if there was an error. + */ + reply_entries = count + !!err; + if (n_entries > 1 && count) + err = 0; + if (err) + goto err_free; + + if (skb->len != nfp_bpf_cmsg_map_reply_size(bpf, reply_entries)) { + cmsg_warn(bpf, "cmsg drop - type 0x%02x too short %d for %d entries!\n", + op, skb->len, reply_entries); + err = -EIO; + goto err_free; + } + + /* Copy outputs */ + if (out_key) + memcpy(out_key, nfp_bpf_ctrl_reply_key(bpf, reply, 0), + map->key_size); + if (out_value) + memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, 0), + map->value_size); + + nfp_bpf_ctrl_op_cache_put(nfp_map, op, skb, cache_gen); + + return 0; +err_free: + dev_kfree_skb_any(skb); +err_cache_put: + nfp_bpf_ctrl_op_cache_put(nfp_map, op, NULL, cache_gen); + return err; +} + +int nfp_bpf_ctrl_update_entry(struct bpf_offloaded_map *offmap, + void *key, void *value, u64 flags) +{ + return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_UPDATE, + key, value, flags, NULL, NULL); +} + +int nfp_bpf_ctrl_del_entry(struct bpf_offloaded_map *offmap, void *key) +{ + return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_DELETE, + key, NULL, 0, NULL, NULL); +} + +int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap, + void *key, void *value) +{ + return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_LOOKUP, + key, NULL, 0, NULL, value); +} + +int nfp_bpf_ctrl_getfirst_entry(struct bpf_offloaded_map *offmap, + void *next_key) +{ + return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_GETFIRST, + NULL, NULL, 0, next_key, NULL); +} + +int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap, + void *key, void *next_key) +{ + return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_GETNEXT, + key, NULL, 0, next_key, NULL); +} + +unsigned int nfp_bpf_ctrl_cmsg_min_mtu(struct nfp_app_bpf *bpf) +{ + return max(nfp_bpf_cmsg_map_req_size(bpf, 1), + nfp_bpf_cmsg_map_reply_size(bpf, 1)); +} + +unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf) +{ + return max3(NFP_NET_DEFAULT_MTU, + nfp_bpf_cmsg_map_req_size(bpf, NFP_BPF_MAP_CACHE_CNT), + nfp_bpf_cmsg_map_reply_size(bpf, NFP_BPF_MAP_CACHE_CNT)); +} + +unsigned int nfp_bpf_ctrl_cmsg_cache_cnt(struct nfp_app_bpf *bpf) +{ + unsigned int mtu, req_max, reply_max, entry_sz; + + mtu = bpf->app->ctrl->dp.mtu; + entry_sz = bpf->cmsg_key_sz + bpf->cmsg_val_sz; + req_max = (mtu - sizeof(struct cmsg_req_map_op)) / entry_sz; + reply_max = (mtu - sizeof(struct cmsg_reply_map_op)) / entry_sz; + + return min3(req_max, reply_max, NFP_BPF_MAP_CACHE_CNT); +} + +void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_app_bpf *bpf = app->priv; + + if (unlikely(skb->len < sizeof(struct cmsg_reply_map_simple))) { + cmsg_warn(bpf, "cmsg drop - too short %d!\n", skb->len); + dev_kfree_skb_any(skb); + return; + } + + if (nfp_ccm_get_type(skb) == NFP_CCM_TYPE_BPF_BPF_EVENT) { + if (!nfp_bpf_event_output(bpf, skb->data, skb->len)) + dev_consume_skb_any(skb); + else + dev_kfree_skb_any(skb); + return; + } + + nfp_ccm_rx(&bpf->ccm, skb); +} + +void +nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data, unsigned int len) +{ + const struct nfp_ccm_hdr *hdr = data; + struct nfp_app_bpf *bpf = app->priv; + + if (unlikely(len < sizeof(struct cmsg_reply_map_simple))) { + cmsg_warn(bpf, "cmsg drop - too short %d!\n", len); + return; + } + + if (hdr->type == NFP_CCM_TYPE_BPF_BPF_EVENT) + nfp_bpf_event_output(bpf, data, len); + else + cmsg_warn(bpf, "cmsg drop - msg type %d with raw buffer!\n", + hdr->type); +} diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h new file mode 100644 index 000000000..4268a7e0f --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#ifndef NFP_BPF_FW_H +#define NFP_BPF_FW_H 1 + +#include <linux/bitops.h> +#include <linux/types.h> +#include "../ccm.h" + +/* Kernel's enum bpf_reg_type is not uABI so people may change it breaking + * our FW ABI. In that case we will do translation in the driver. + */ +#define NFP_BPF_SCALAR_VALUE 1 +#define NFP_BPF_MAP_VALUE 4 +#define NFP_BPF_STACK 6 +#define NFP_BPF_PACKET_DATA 8 + +enum bpf_cap_tlv_type { + NFP_BPF_CAP_TYPE_FUNC = 1, + NFP_BPF_CAP_TYPE_ADJUST_HEAD = 2, + NFP_BPF_CAP_TYPE_MAPS = 3, + NFP_BPF_CAP_TYPE_RANDOM = 4, + NFP_BPF_CAP_TYPE_QUEUE_SELECT = 5, + NFP_BPF_CAP_TYPE_ADJUST_TAIL = 6, + NFP_BPF_CAP_TYPE_ABI_VERSION = 7, + NFP_BPF_CAP_TYPE_CMSG_MULTI_ENT = 8, +}; + +struct nfp_bpf_cap_tlv_func { + __le32 func_id; + __le32 func_addr; +}; + +struct nfp_bpf_cap_tlv_adjust_head { + __le32 flags; + __le32 off_min; + __le32 off_max; + __le32 guaranteed_sub; + __le32 guaranteed_add; +}; + +#define NFP_BPF_ADJUST_HEAD_NO_META BIT(0) + +struct nfp_bpf_cap_tlv_maps { + __le32 types; + __le32 max_maps; + __le32 max_elems; + __le32 max_key_sz; + __le32 max_val_sz; + __le32 max_elem_sz; +}; + +/* + * Types defined for map related control messages + */ + +/* BPF ABIv2 fixed-length control message fields */ +#define CMSG_MAP_KEY_LW 16 +#define CMSG_MAP_VALUE_LW 16 + +enum nfp_bpf_cmsg_status { + CMSG_RC_SUCCESS = 0, + CMSG_RC_ERR_MAP_FD = 1, + CMSG_RC_ERR_MAP_NOENT = 2, + CMSG_RC_ERR_MAP_ERR = 3, + CMSG_RC_ERR_MAP_PARSE = 4, + CMSG_RC_ERR_MAP_EXIST = 5, + CMSG_RC_ERR_MAP_NOMEM = 6, + CMSG_RC_ERR_MAP_E2BIG = 7, +}; + +struct cmsg_reply_map_simple { + struct nfp_ccm_hdr hdr; + __be32 rc; +}; + +struct cmsg_req_map_alloc_tbl { + struct nfp_ccm_hdr hdr; + __be32 key_size; /* in bytes */ + __be32 value_size; /* in bytes */ + __be32 max_entries; + __be32 map_type; + __be32 map_flags; /* reserved */ +}; + +struct cmsg_reply_map_alloc_tbl { + struct cmsg_reply_map_simple reply_hdr; + __be32 tid; +}; + +struct cmsg_req_map_free_tbl { + struct nfp_ccm_hdr hdr; + __be32 tid; +}; + +struct cmsg_reply_map_free_tbl { + struct cmsg_reply_map_simple reply_hdr; + __be32 count; +}; + +struct cmsg_req_map_op { + struct nfp_ccm_hdr hdr; + __be32 tid; + __be32 count; + __be32 flags; + u8 data[]; +}; + +struct cmsg_reply_map_op { + struct cmsg_reply_map_simple reply_hdr; + __be32 count; + __be32 resv; + u8 data[]; +}; + +struct cmsg_bpf_event { + struct nfp_ccm_hdr hdr; + __be32 cpu_id; + __be64 map_ptr; + __be32 data_size; + __be32 pkt_size; + u8 data[]; +}; +#endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c new file mode 100644 index 000000000..df2ab5cbd --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -0,0 +1,4627 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2016-2018 Netronome Systems, Inc. */ + +#define pr_fmt(fmt) "NFP net bpf: " fmt + +#include <linux/bug.h> +#include <linux/bpf.h> +#include <linux/filter.h> +#include <linux/kernel.h> +#include <linux/pkt_cls.h> +#include <linux/reciprocal_div.h> +#include <linux/unistd.h> + +#include "main.h" +#include "../nfp_asm.h" +#include "../nfp_net_ctrl.h" + +/* --- NFP prog --- */ +/* Foreach "multiple" entries macros provide pos and next<n> pointers. + * It's safe to modify the next pointers (but not pos). + */ +#define nfp_for_each_insn_walk2(nfp_prog, pos, next) \ + for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ + next = list_next_entry(pos, l); \ + &(nfp_prog)->insns != &pos->l && \ + &(nfp_prog)->insns != &next->l; \ + pos = nfp_meta_next(pos), \ + next = nfp_meta_next(pos)) + +#define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \ + for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ + next = list_next_entry(pos, l), \ + next2 = list_next_entry(next, l); \ + &(nfp_prog)->insns != &pos->l && \ + &(nfp_prog)->insns != &next->l && \ + &(nfp_prog)->insns != &next2->l; \ + pos = nfp_meta_next(pos), \ + next = nfp_meta_next(pos), \ + next2 = nfp_meta_next(next)) + +static bool +nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return meta->l.prev != &nfp_prog->insns; +} + +static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) +{ + if (nfp_prog->__prog_alloc_len / sizeof(u64) == nfp_prog->prog_len) { + pr_warn("instruction limit reached (%u NFP instructions)\n", + nfp_prog->prog_len); + nfp_prog->error = -ENOSPC; + return; + } + + nfp_prog->prog[nfp_prog->prog_len] = insn; + nfp_prog->prog_len++; +} + +static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog) +{ + return nfp_prog->prog_len; +} + +static bool +nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off) +{ + /* If there is a recorded error we may have dropped instructions; + * that doesn't have to be due to translator bug, and the translation + * will fail anyway, so just return OK. + */ + if (nfp_prog->error) + return true; + return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off); +} + +/* --- Emitters --- */ +static void +__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, + u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, enum cmd_ctx_swap ctx, + bool indir) +{ + u64 insn; + + insn = FIELD_PREP(OP_CMD_A_SRC, areg) | + FIELD_PREP(OP_CMD_CTX, ctx) | + FIELD_PREP(OP_CMD_B_SRC, breg) | + FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) | + FIELD_PREP(OP_CMD_XFER, xfer) | + FIELD_PREP(OP_CMD_CNT, size) | + FIELD_PREP(OP_CMD_SIG, ctx != CMD_CTX_NO_SWAP) | + FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) | + FIELD_PREP(OP_CMD_INDIR, indir) | + FIELD_PREP(OP_CMD_MODE, mode); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, + swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx, bool indir) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(reg_none(), lreg, rreg, ®, false); + if (err) { + nfp_prog->error = err; + return; + } + if (reg.swap) { + pr_err("cmd can't swap arguments\n"); + nfp_prog->error = -EFAULT; + return; + } + if (reg.dst_lmextn || reg.src_lmextn) { + pr_err("cmd can't use LMextn\n"); + nfp_prog->error = -EFAULT; + return; + } + + __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, ctx, + indir); +} + +static void +emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, + swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx) +{ + emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, false); +} + +static void +emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, + swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx) +{ + emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, true); +} + +static void +__emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, + enum br_ctx_signal_state css, u16 addr, u8 defer) +{ + u16 addr_lo, addr_hi; + u64 insn; + + addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); + addr_hi = addr != addr_lo; + + insn = OP_BR_BASE | + FIELD_PREP(OP_BR_MASK, mask) | + FIELD_PREP(OP_BR_EV_PIP, ev_pip) | + FIELD_PREP(OP_BR_CSS, css) | + FIELD_PREP(OP_BR_DEFBR, defer) | + FIELD_PREP(OP_BR_ADDR_LO, addr_lo) | + FIELD_PREP(OP_BR_ADDR_HI, addr_hi); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_br_relo(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer, + enum nfp_relo_type relo) +{ + if (mask == BR_UNC && defer > 2) { + pr_err("BUG: branch defer out of bounds %d\n", defer); + nfp_prog->error = -EFAULT; + return; + } + + __emit_br(nfp_prog, mask, + mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, + BR_CSS_NONE, addr, defer); + + nfp_prog->prog[nfp_prog->prog_len - 1] |= + FIELD_PREP(OP_RELO_TYPE, relo); +} + +static void +emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) +{ + emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL); +} + +static void +__emit_br_bit(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 addr, u8 defer, + bool set, bool src_lmextn) +{ + u16 addr_lo, addr_hi; + u64 insn; + + addr_lo = addr & (OP_BR_BIT_ADDR_LO >> __bf_shf(OP_BR_BIT_ADDR_LO)); + addr_hi = addr != addr_lo; + + insn = OP_BR_BIT_BASE | + FIELD_PREP(OP_BR_BIT_A_SRC, areg) | + FIELD_PREP(OP_BR_BIT_B_SRC, breg) | + FIELD_PREP(OP_BR_BIT_BV, set) | + FIELD_PREP(OP_BR_BIT_DEFBR, defer) | + FIELD_PREP(OP_BR_BIT_ADDR_LO, addr_lo) | + FIELD_PREP(OP_BR_BIT_ADDR_HI, addr_hi) | + FIELD_PREP(OP_BR_BIT_SRC_LMEXTN, src_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_br_bit_relo(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, + u8 defer, bool set, enum nfp_relo_type relo) +{ + struct nfp_insn_re_regs reg; + int err; + + /* NOTE: The bit to test is specified as an rotation amount, such that + * the bit to test will be placed on the MSB of the result when + * doing a rotate right. For bit X, we need right rotate X + 1. + */ + bit += 1; + + err = swreg_to_restricted(reg_none(), src, reg_imm(bit), ®, false); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_br_bit(nfp_prog, reg.areg, reg.breg, addr, defer, set, + reg.src_lmextn); + + nfp_prog->prog[nfp_prog->prog_len - 1] |= + FIELD_PREP(OP_RELO_TYPE, relo); +} + +static void +emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer) +{ + emit_br_bit_relo(nfp_prog, src, bit, addr, defer, true, RELO_BR_REL); +} + +static void +__emit_br_alu(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, + u8 defer, bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + insn = OP_BR_ALU_BASE | + FIELD_PREP(OP_BR_ALU_A_SRC, areg) | + FIELD_PREP(OP_BR_ALU_B_SRC, breg) | + FIELD_PREP(OP_BR_ALU_DEFBR, defer) | + FIELD_PREP(OP_BR_ALU_IMM_HI, imm_hi) | + FIELD_PREP(OP_BR_ALU_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_BR_ALU_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void emit_rtn(struct nfp_prog *nfp_prog, swreg base, u8 defer) +{ + struct nfp_insn_ur_regs reg; + int err; + + err = swreg_to_unrestricted(reg_none(), base, reg_imm(0), ®); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_br_alu(nfp_prog, reg.areg, reg.breg, 0, defer, reg.dst_lmextn, + reg.src_lmextn); +} + +static void +__emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, + enum immed_width width, bool invert, + enum immed_shift shift, bool wr_both, + bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + insn = OP_IMMED_BASE | + FIELD_PREP(OP_IMMED_A_SRC, areg) | + FIELD_PREP(OP_IMMED_B_SRC, breg) | + FIELD_PREP(OP_IMMED_IMM, imm_hi) | + FIELD_PREP(OP_IMMED_WIDTH, width) | + FIELD_PREP(OP_IMMED_INV, invert) | + FIELD_PREP(OP_IMMED_SHIFT, shift) | + FIELD_PREP(OP_IMMED_WR_AB, wr_both) | + FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm, + enum immed_width width, bool invert, enum immed_shift shift) +{ + struct nfp_insn_ur_regs reg; + int err; + + if (swreg_type(dst) == NN_REG_IMM) { + nfp_prog->error = -EFAULT; + return; + } + + err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), ®); + if (err) { + nfp_prog->error = err; + return; + } + + /* Use reg.dst when destination is No-Dest. */ + __emit_immed(nfp_prog, + swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg, + reg.breg, imm >> 8, width, invert, shift, + reg.wr_both, reg.dst_lmextn, reg.src_lmextn); +} + +static void +__emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, + enum shf_sc sc, u8 shift, + u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both, + bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + if (!FIELD_FIT(OP_SHF_SHIFT, shift)) { + nfp_prog->error = -EFAULT; + return; + } + + /* NFP shift instruction has something special. If shift direction is + * left then shift amount of 1 to 31 is specified as 32 minus the amount + * to shift. + * + * But no need to do this for indirect shift which has shift amount be + * 0. Even after we do this subtraction, shift amount 0 will be turned + * into 32 which will eventually be encoded the same as 0 because only + * low 5 bits are encoded, but shift amount be 32 will fail the + * FIELD_PREP check done later on shift mask (0x1f), due to 32 is out of + * mask range. + */ + if (sc == SHF_SC_L_SHF && shift) + shift = 32 - shift; + + insn = OP_SHF_BASE | + FIELD_PREP(OP_SHF_A_SRC, areg) | + FIELD_PREP(OP_SHF_SC, sc) | + FIELD_PREP(OP_SHF_B_SRC, breg) | + FIELD_PREP(OP_SHF_I8, i8) | + FIELD_PREP(OP_SHF_SW, sw) | + FIELD_PREP(OP_SHF_DST, dst) | + FIELD_PREP(OP_SHF_SHIFT, shift) | + FIELD_PREP(OP_SHF_OP, op) | + FIELD_PREP(OP_SHF_DST_AB, dst_ab) | + FIELD_PREP(OP_SHF_WR_AB, wr_both) | + FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_shf(struct nfp_prog *nfp_prog, swreg dst, + swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(dst, lreg, rreg, ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift, + reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both, + reg.dst_lmextn, reg.src_lmextn); +} + +static void +emit_shf_indir(struct nfp_prog *nfp_prog, swreg dst, + swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc) +{ + if (sc == SHF_SC_R_ROT) { + pr_err("indirect shift is not allowed on rotation\n"); + nfp_prog->error = -EFAULT; + return; + } + + emit_shf(nfp_prog, dst, lreg, op, rreg, sc, 0); +} + +static void +__emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, + u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both, + bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + insn = OP_ALU_BASE | + FIELD_PREP(OP_ALU_A_SRC, areg) | + FIELD_PREP(OP_ALU_B_SRC, breg) | + FIELD_PREP(OP_ALU_DST, dst) | + FIELD_PREP(OP_ALU_SW, swap) | + FIELD_PREP(OP_ALU_OP, op) | + FIELD_PREP(OP_ALU_DST_AB, dst_ab) | + FIELD_PREP(OP_ALU_WR_AB, wr_both) | + FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_alu(struct nfp_prog *nfp_prog, swreg dst, + swreg lreg, enum alu_op op, swreg rreg) +{ + struct nfp_insn_ur_regs reg; + int err; + + err = swreg_to_unrestricted(dst, lreg, rreg, ®); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_alu(nfp_prog, reg.dst, reg.dst_ab, + reg.areg, op, reg.breg, reg.swap, reg.wr_both, + reg.dst_lmextn, reg.src_lmextn); +} + +static void +__emit_mul(struct nfp_prog *nfp_prog, enum alu_dst_ab dst_ab, u16 areg, + enum mul_type type, enum mul_step step, u16 breg, bool swap, + bool wr_both, bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + insn = OP_MUL_BASE | + FIELD_PREP(OP_MUL_A_SRC, areg) | + FIELD_PREP(OP_MUL_B_SRC, breg) | + FIELD_PREP(OP_MUL_STEP, step) | + FIELD_PREP(OP_MUL_DST_AB, dst_ab) | + FIELD_PREP(OP_MUL_SW, swap) | + FIELD_PREP(OP_MUL_TYPE, type) | + FIELD_PREP(OP_MUL_WR_AB, wr_both) | + FIELD_PREP(OP_MUL_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_MUL_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_mul(struct nfp_prog *nfp_prog, swreg lreg, enum mul_type type, + enum mul_step step, swreg rreg) +{ + struct nfp_insn_ur_regs reg; + u16 areg; + int err; + + if (type == MUL_TYPE_START && step != MUL_STEP_NONE) { + nfp_prog->error = -EINVAL; + return; + } + + if (step == MUL_LAST || step == MUL_LAST_2) { + /* When type is step and step Number is LAST or LAST2, left + * source is used as destination. + */ + err = swreg_to_unrestricted(lreg, reg_none(), rreg, ®); + areg = reg.dst; + } else { + err = swreg_to_unrestricted(reg_none(), lreg, rreg, ®); + areg = reg.areg; + } + + if (err) { + nfp_prog->error = err; + return; + } + + __emit_mul(nfp_prog, reg.dst_ab, areg, type, step, reg.breg, reg.swap, + reg.wr_both, reg.dst_lmextn, reg.src_lmextn); +} + +static void +__emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, + u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8, + bool zero, bool swap, bool wr_both, + bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + insn = OP_LDF_BASE | + FIELD_PREP(OP_LDF_A_SRC, areg) | + FIELD_PREP(OP_LDF_SC, sc) | + FIELD_PREP(OP_LDF_B_SRC, breg) | + FIELD_PREP(OP_LDF_I8, imm8) | + FIELD_PREP(OP_LDF_SW, swap) | + FIELD_PREP(OP_LDF_ZF, zero) | + FIELD_PREP(OP_LDF_BMASK, bmask) | + FIELD_PREP(OP_LDF_SHF, shift) | + FIELD_PREP(OP_LDF_WR_AB, wr_both) | + FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, + enum shf_sc sc, u8 shift, bool zero) +{ + struct nfp_insn_re_regs reg; + int err; + + /* Note: ld_field is special as it uses one of the src regs as dst */ + err = swreg_to_restricted(dst, dst, src, ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift, + reg.i8, zero, reg.swap, reg.wr_both, + reg.dst_lmextn, reg.src_lmextn); +} + +static void +emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, + enum shf_sc sc, u8 shift) +{ + emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false); +} + +static void +__emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr, + bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + insn = OP_LCSR_BASE | + FIELD_PREP(OP_LCSR_A_SRC, areg) | + FIELD_PREP(OP_LCSR_B_SRC, breg) | + FIELD_PREP(OP_LCSR_WRITE, wr) | + FIELD_PREP(OP_LCSR_ADDR, addr / 4) | + FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr) +{ + struct nfp_insn_ur_regs reg; + int err; + + /* This instruction takes immeds instead of reg_none() for the ignored + * operand, but we can't encode 2 immeds in one instr with our normal + * swreg infra so if param is an immed, we encode as reg_none() and + * copy the immed to both operands. + */ + if (swreg_type(src) == NN_REG_IMM) { + err = swreg_to_unrestricted(reg_none(), src, reg_none(), ®); + reg.breg = reg.areg; + } else { + err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), ®); + } + if (err) { + nfp_prog->error = err; + return; + } + + __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr, + false, reg.src_lmextn); +} + +/* CSR value is read in following immed[gpr, 0] */ +static void __emit_csr_rd(struct nfp_prog *nfp_prog, u16 addr) +{ + __emit_lcsr(nfp_prog, 0, 0, false, addr, false, false); +} + +static void emit_nop(struct nfp_prog *nfp_prog) +{ + __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0); +} + +/* --- Wrappers --- */ +static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift) +{ + if (!(imm & 0xffff0000)) { + *val = imm; + *shift = IMMED_SHIFT_0B; + } else if (!(imm & 0xff0000ff)) { + *val = imm >> 8; + *shift = IMMED_SHIFT_1B; + } else if (!(imm & 0x0000ffff)) { + *val = imm >> 16; + *shift = IMMED_SHIFT_2B; + } else { + return false; + } + + return true; +} + +static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm) +{ + enum immed_shift shift; + u16 val; + + if (pack_immed(imm, &val, &shift)) { + emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift); + } else if (pack_immed(~imm, &val, &shift)) { + emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift); + } else { + emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL, + false, IMMED_SHIFT_0B); + emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD, + false, IMMED_SHIFT_2B); + } +} + +static void +wrp_zext(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst) +{ + if (meta->flags & FLAG_INSN_DO_ZEXT) + wrp_immed(nfp_prog, reg_both(dst + 1), 0); +} + +static void +wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm, + enum nfp_relo_type relo) +{ + if (imm > 0xffff) { + pr_err("relocation of a large immediate!\n"); + nfp_prog->error = -EFAULT; + return; + } + emit_immed(nfp_prog, dst, imm, IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B); + + nfp_prog->prog[nfp_prog->prog_len - 1] |= + FIELD_PREP(OP_RELO_TYPE, relo); +} + +/* ur_load_imm_any() - encode immediate or use tmp register (unrestricted) + * If the @imm is small enough encode it directly in operand and return + * otherwise load @imm to a spare register and return its encoding. + */ +static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) +{ + if (FIELD_FIT(UR_REG_IMM_MAX, imm)) + return reg_imm(imm); + + wrp_immed(nfp_prog, tmp_reg, imm); + return tmp_reg; +} + +/* re_load_imm_any() - encode immediate or use tmp register (restricted) + * If the @imm is small enough encode it directly in operand and return + * otherwise load @imm to a spare register and return its encoding. + */ +static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) +{ + if (FIELD_FIT(RE_REG_IMM_MAX, imm)) + return reg_imm(imm); + + wrp_immed(nfp_prog, tmp_reg, imm); + return tmp_reg; +} + +static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count) +{ + while (count--) + emit_nop(nfp_prog); +} + +static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src) +{ + emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src); +} + +static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src) +{ + wrp_mov(nfp_prog, reg_both(dst), reg_b(src)); +} + +/* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the + * result to @dst from low end. + */ +static void +wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len, + u8 offset) +{ + enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE; + u8 mask = (1 << field_len) - 1; + + emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true); +} + +/* wrp_reg_or_subpart() - load @field_len bytes from low end of @src, or the + * result to @dst from offset, there is no change on the other bits of @dst. + */ +static void +wrp_reg_or_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, + u8 field_len, u8 offset) +{ + enum shf_sc sc = offset ? SHF_SC_L_SHF : SHF_SC_NONE; + u8 mask = ((1 << field_len) - 1) << offset; + + emit_ld_field(nfp_prog, dst, mask, src, sc, 32 - offset * 8); +} + +static void +addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, + swreg *rega, swreg *regb) +{ + if (offset == reg_imm(0)) { + *rega = reg_a(src_gpr); + *regb = reg_b(src_gpr + 1); + return; + } + + emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(src_gpr), ALU_OP_ADD, offset); + emit_alu(nfp_prog, imm_b(nfp_prog), reg_b(src_gpr + 1), ALU_OP_ADD_C, + reg_imm(0)); + *rega = imm_a(nfp_prog); + *regb = imm_b(nfp_prog); +} + +/* NFP has Command Push Pull bus which supports bluk memory operations. */ +static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + bool descending_seq = meta->ldst_gather_len < 0; + s16 len = abs(meta->ldst_gather_len); + swreg src_base, off; + bool src_40bit_addr; + unsigned int i; + u8 xfer_num; + + off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + src_40bit_addr = meta->ptr.type == PTR_TO_MAP_VALUE; + src_base = reg_a(meta->insn.src_reg * 2); + xfer_num = round_up(len, 4) / 4; + + if (src_40bit_addr) + addr40_offset(nfp_prog, meta->insn.src_reg * 2, off, &src_base, + &off); + + /* Setup PREV_ALU fields to override memory read length. */ + if (len > 32) + wrp_immed(nfp_prog, reg_none(), + CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); + + /* Memory read from source addr into transfer-in registers. */ + emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, + src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0, + src_base, off, xfer_num - 1, CMD_CTX_SWAP, len > 32); + + /* Move from transfer-in to transfer-out. */ + for (i = 0; i < xfer_num; i++) + wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i)); + + off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog)); + + if (len <= 8) { + /* Use single direct_ref write8. */ + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, len - 1, + CMD_CTX_SWAP); + } else if (len <= 32 && IS_ALIGNED(len, 4)) { + /* Use single direct_ref write32. */ + emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1, + CMD_CTX_SWAP); + } else if (len <= 32) { + /* Use single indirect_ref write8. */ + wrp_immed(nfp_prog, reg_none(), + CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1)); + emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, + len - 1, CMD_CTX_SWAP); + } else if (IS_ALIGNED(len, 4)) { + /* Use single indirect_ref write32. */ + wrp_immed(nfp_prog, reg_none(), + CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); + emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, + xfer_num - 1, CMD_CTX_SWAP); + } else if (len <= 40) { + /* Use one direct_ref write32 to write the first 32-bytes, then + * another direct_ref write8 to write the remaining bytes. + */ + emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, 7, + CMD_CTX_SWAP); + + off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32, + imm_b(nfp_prog)); + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8, + reg_a(meta->paired_st->dst_reg * 2), off, len - 33, + CMD_CTX_SWAP); + } else { + /* Use one indirect_ref write32 to write 4-bytes aligned length, + * then another direct_ref write8 to write the remaining bytes. + */ + u8 new_off; + + wrp_immed(nfp_prog, reg_none(), + CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2)); + emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, + xfer_num - 2, CMD_CTX_SWAP); + new_off = meta->paired_st->off + (xfer_num - 1) * 4; + off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog)); + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, + xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off, + (len & 0x3) - 1, CMD_CTX_SWAP); + } + + /* TODO: The following extra load is to make sure data flow be identical + * before and after we do memory copy optimization. + * + * The load destination register is not guaranteed to be dead, so we + * need to make sure it is loaded with the value the same as before + * this transformation. + * + * These extra loads could be removed once we have accurate register + * usage information. + */ + if (descending_seq) + xfer_num = 0; + else if (BPF_SIZE(meta->insn.code) != BPF_DW) + xfer_num = xfer_num - 1; + else + xfer_num = xfer_num - 2; + + switch (BPF_SIZE(meta->insn.code)) { + case BPF_B: + wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2), + reg_xfer(xfer_num), 1, + IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1); + break; + case BPF_H: + wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2), + reg_xfer(xfer_num), 2, (len & 3) ^ 2); + break; + case BPF_W: + wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), + reg_xfer(0)); + break; + case BPF_DW: + wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), + reg_xfer(xfer_num)); + wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), + reg_xfer(xfer_num + 1)); + break; + } + + if (BPF_SIZE(meta->insn.code) != BPF_DW) + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return 0; +} + +static int +data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, swreg offset, + u8 dst_gpr, int size) +{ + unsigned int i; + u16 shift, sz; + + /* We load the value from the address indicated in @offset and then + * shift out the data we don't need. Note: this is big endian! + */ + sz = max(size, 4); + shift = size < 4 ? 4 - size : 0; + + emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, + pptr_reg(nfp_prog), offset, sz - 1, CMD_CTX_SWAP); + + i = 0; + if (shift) + emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE, + reg_xfer(0), SHF_SC_R_SHF, shift * 8); + else + for (; i * 4 < size; i++) + wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); + + if (i < 2) + wrp_zext(nfp_prog, meta, dst_gpr); + + return 0; +} + +static int +data_ld_host_order(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u8 dst_gpr, swreg lreg, swreg rreg, int size, + enum cmd_mode mode) +{ + unsigned int i; + u8 mask, sz; + + /* We load the value from the address indicated in rreg + lreg and then + * mask out the data we don't need. Note: this is little endian! + */ + sz = max(size, 4); + mask = size < 4 ? GENMASK(size - 1, 0) : 0; + + emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0, + lreg, rreg, sz / 4 - 1, CMD_CTX_SWAP); + + i = 0; + if (mask) + emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask, + reg_xfer(0), SHF_SC_NONE, 0, true); + else + for (; i * 4 < size; i++) + wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); + + if (i < 2) + wrp_zext(nfp_prog, meta, dst_gpr); + + return 0; +} + +static int +data_ld_host_order_addr32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u8 src_gpr, swreg offset, u8 dst_gpr, u8 size) +{ + return data_ld_host_order(nfp_prog, meta, dst_gpr, reg_a(src_gpr), + offset, size, CMD_MODE_32b); +} + +static int +data_ld_host_order_addr40(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u8 src_gpr, swreg offset, u8 dst_gpr, u8 size) +{ + swreg rega, regb; + + addr40_offset(nfp_prog, src_gpr, offset, ®a, ®b); + + return data_ld_host_order(nfp_prog, meta, dst_gpr, rega, regb, + size, CMD_MODE_40b_BA); +} + +static int +construct_data_ind_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u16 offset, u16 src, u8 size) +{ + swreg tmp_reg; + + /* Calculate the true offset (src_reg + imm) */ + tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg); + + /* Check packet length (size guaranteed to fit b/c it's u8) */ + emit_alu(nfp_prog, imm_a(nfp_prog), + imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); + emit_alu(nfp_prog, reg_none(), + plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog)); + emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); + + /* Load data */ + return data_ld(nfp_prog, meta, imm_b(nfp_prog), 0, size); +} + +static int +construct_data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u16 offset, u8 size) +{ + swreg tmp_reg; + + /* Check packet length */ + tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog)); + emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg); + emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); + + /* Load data */ + tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); + return data_ld(nfp_prog, meta, tmp_reg, 0, size); +} + +static int +data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, + u8 src_gpr, u8 size) +{ + unsigned int i; + + for (i = 0; i * 4 < size; i++) + wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i)); + + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, + reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP); + + return 0; +} + +static int +data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, + u64 imm, u8 size) +{ + wrp_immed(nfp_prog, reg_xfer(0), imm); + if (size == 8) + wrp_immed(nfp_prog, reg_xfer(1), imm >> 32); + + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, + reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP); + + return 0; +} + +typedef int +(*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off, + unsigned int size, bool first, bool new_gpr, bool last, bool lm3, + bool needs_inc); + +static int +wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, + unsigned int size, bool first, bool new_gpr, bool last, bool lm3, + bool needs_inc) +{ + bool should_inc = needs_inc && new_gpr && !last; + u32 idx, src_byte; + enum shf_sc sc; + swreg reg; + int shf; + u8 mask; + + if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4)) + return -EOPNOTSUPP; + + idx = off / 4; + + /* Move the entire word */ + if (size == 4) { + wrp_mov(nfp_prog, reg_both(dst), + should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx)); + return 0; + } + + if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) + return -EOPNOTSUPP; + + src_byte = off % 4; + + mask = (1 << size) - 1; + mask <<= dst_byte; + + if (WARN_ON_ONCE(mask > 0xf)) + return -EOPNOTSUPP; + + shf = abs(src_byte - dst_byte) * 8; + if (src_byte == dst_byte) { + sc = SHF_SC_NONE; + } else if (src_byte < dst_byte) { + shf = 32 - shf; + sc = SHF_SC_L_SHF; + } else { + sc = SHF_SC_R_SHF; + } + + /* ld_field can address fewer indexes, if offset too large do RMW. + * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. + */ + if (idx <= RE_REG_LM_IDX_MAX) { + reg = reg_lm(lm3 ? 3 : 0, idx); + } else { + reg = imm_a(nfp_prog); + /* If it's not the first part of the load and we start a new GPR + * that means we are loading a second part of the LMEM word into + * a new GPR. IOW we've already looked that LMEM word and + * therefore it has been loaded into imm_a(). + */ + if (first || !new_gpr) + wrp_mov(nfp_prog, reg, reg_lm(0, idx)); + } + + emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr); + + if (should_inc) + wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); + + return 0; +} + +static int +wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, + unsigned int size, bool first, bool new_gpr, bool last, bool lm3, + bool needs_inc) +{ + bool should_inc = needs_inc && new_gpr && !last; + u32 idx, dst_byte; + enum shf_sc sc; + swreg reg; + int shf; + u8 mask; + + if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4)) + return -EOPNOTSUPP; + + idx = off / 4; + + /* Move the entire word */ + if (size == 4) { + wrp_mov(nfp_prog, + should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx), + reg_b(src)); + return 0; + } + + if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) + return -EOPNOTSUPP; + + dst_byte = off % 4; + + mask = (1 << size) - 1; + mask <<= dst_byte; + + if (WARN_ON_ONCE(mask > 0xf)) + return -EOPNOTSUPP; + + shf = abs(src_byte - dst_byte) * 8; + if (src_byte == dst_byte) { + sc = SHF_SC_NONE; + } else if (src_byte < dst_byte) { + shf = 32 - shf; + sc = SHF_SC_L_SHF; + } else { + sc = SHF_SC_R_SHF; + } + + /* ld_field can address fewer indexes, if offset too large do RMW. + * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. + */ + if (idx <= RE_REG_LM_IDX_MAX) { + reg = reg_lm(lm3 ? 3 : 0, idx); + } else { + reg = imm_a(nfp_prog); + /* Only first and last LMEM locations are going to need RMW, + * the middle location will be overwritten fully. + */ + if (first || last) + wrp_mov(nfp_prog, reg, reg_lm(0, idx)); + } + + emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf); + + if (new_gpr || last) { + if (idx > RE_REG_LM_IDX_MAX) + wrp_mov(nfp_prog, reg_lm(0, idx), reg); + if (should_inc) + wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); + } + + return 0; +} + +static int +mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr, + bool clr_gpr, lmem_step step) +{ + s32 off = nfp_prog->stack_frame_depth + meta->insn.off + ptr_off; + bool first = true, narrow_ld, last; + bool needs_inc = false; + swreg stack_off_reg; + u8 prev_gpr = 255; + u32 gpr_byte = 0; + bool lm3 = true; + int ret; + + if (meta->ptr_not_const || + meta->flags & FLAG_INSN_PTR_CALLER_STACK_FRAME) { + /* Use of the last encountered ptr_off is OK, they all have + * the same alignment. Depend on low bits of value being + * discarded when written to LMaddr register. + */ + stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off, + stack_imm(nfp_prog)); + + emit_alu(nfp_prog, imm_b(nfp_prog), + reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg); + + needs_inc = true; + } else if (off + size <= 64) { + /* We can reach bottom 64B with LMaddr0 */ + lm3 = false; + } else if (round_down(off, 32) == round_down(off + size - 1, 32)) { + /* We have to set up a new pointer. If we know the offset + * and the entire access falls into a single 32 byte aligned + * window we won't have to increment the LM pointer. + * The 32 byte alignment is imporant because offset is ORed in + * not added when doing *l$indexN[off]. + */ + stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32), + stack_imm(nfp_prog)); + emit_alu(nfp_prog, imm_b(nfp_prog), + stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); + + off %= 32; + } else { + stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4), + stack_imm(nfp_prog)); + + emit_alu(nfp_prog, imm_b(nfp_prog), + stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); + + needs_inc = true; + } + + narrow_ld = clr_gpr && size < 8; + + if (lm3) { + unsigned int nop_cnt; + + emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3); + /* For size < 4 one slot will be filled by zeroing of upper, + * but be careful, that zeroing could be eliminated by zext + * optimization. + */ + nop_cnt = narrow_ld && meta->flags & FLAG_INSN_DO_ZEXT ? 2 : 3; + wrp_nops(nfp_prog, nop_cnt); + } + + if (narrow_ld) + wrp_zext(nfp_prog, meta, gpr); + + while (size) { + u32 slice_end; + u8 slice_size; + + slice_size = min(size, 4 - gpr_byte); + slice_end = min(off + slice_size, round_up(off + 1, 4)); + slice_size = slice_end - off; + + last = slice_size == size; + + if (needs_inc) + off %= 4; + + ret = step(nfp_prog, gpr, gpr_byte, off, slice_size, + first, gpr != prev_gpr, last, lm3, needs_inc); + if (ret) + return ret; + + prev_gpr = gpr; + first = false; + + gpr_byte += slice_size; + if (gpr_byte >= 4) { + gpr_byte -= 4; + gpr++; + } + + size -= slice_size; + off += slice_size; + } + + return 0; +} + +static void +wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) +{ + swreg tmp_reg; + + if (alu_op == ALU_OP_AND) { + if (!imm) + wrp_immed(nfp_prog, reg_both(dst), 0); + if (!imm || !~imm) + return; + } + if (alu_op == ALU_OP_OR) { + if (!~imm) + wrp_immed(nfp_prog, reg_both(dst), ~0U); + if (!imm || !~imm) + return; + } + if (alu_op == ALU_OP_XOR) { + if (!~imm) + emit_alu(nfp_prog, reg_both(dst), reg_none(), + ALU_OP_NOT, reg_b(dst)); + if (!imm || !~imm) + return; + } + + tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg); +} + +static int +wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, bool skip) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + if (skip) { + meta->flags |= FLAG_INSN_SKIP_NOOP; + return 0; + } + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32); + + return 0; +} + +static int +wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op) +{ + u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; + + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); + emit_alu(nfp_prog, reg_both(dst + 1), + reg_a(dst + 1), alu_op, reg_b(src + 1)); + + return 0; +} + +static int +wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op) +{ + const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + + wrp_alu_imm(nfp_prog, dst, alu_op, insn->imm); + wrp_zext(nfp_prog, meta, dst); + + return 0; +} + +static int +wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op) +{ + u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; + + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); + wrp_zext(nfp_prog, meta, dst); + + return 0; +} + +static void +wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src, + enum br_mask br_mask, u16 off) +{ + emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src)); + emit_br(nfp_prog, br_mask, off, 0); +} + +static int +wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, enum br_mask br_mask) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op, + insn->src_reg * 2, br_mask, insn->off); + if (is_mbpf_jmp64(meta)) + wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op, + insn->src_reg * 2 + 1, br_mask, insn->off); + + return 0; +} + +static const struct jmp_code_map { + enum br_mask br_mask; + bool swap; +} jmp_code_map[] = { + [BPF_JGT >> 4] = { BR_BLO, true }, + [BPF_JGE >> 4] = { BR_BHS, false }, + [BPF_JLT >> 4] = { BR_BLO, false }, + [BPF_JLE >> 4] = { BR_BHS, true }, + [BPF_JSGT >> 4] = { BR_BLT, true }, + [BPF_JSGE >> 4] = { BR_BGE, false }, + [BPF_JSLT >> 4] = { BR_BLT, false }, + [BPF_JSLE >> 4] = { BR_BGE, true }, +}; + +static const struct jmp_code_map *nfp_jmp_code_get(struct nfp_insn_meta *meta) +{ + unsigned int op; + + op = BPF_OP(meta->insn.code) >> 4; + /* br_mask of 0 is BR_BEQ which we don't use in jump code table */ + if (WARN_ONCE(op >= ARRAY_SIZE(jmp_code_map) || + !jmp_code_map[op].br_mask, + "no code found for jump instruction")) + return NULL; + + return &jmp_code_map[op]; +} + +static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + const struct jmp_code_map *code; + enum alu_op alu_op, carry_op; + u8 reg = insn->dst_reg * 2; + swreg tmp_reg; + + code = nfp_jmp_code_get(meta); + if (!code) + return -EINVAL; + + alu_op = meta->jump_neg_op ? ALU_OP_ADD : ALU_OP_SUB; + carry_op = meta->jump_neg_op ? ALU_OP_ADD_C : ALU_OP_SUB_C; + + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + if (!code->swap) + emit_alu(nfp_prog, reg_none(), reg_a(reg), alu_op, tmp_reg); + else + emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg)); + + if (is_mbpf_jmp64(meta)) { + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + if (!code->swap) + emit_alu(nfp_prog, reg_none(), + reg_a(reg + 1), carry_op, tmp_reg); + else + emit_alu(nfp_prog, reg_none(), + tmp_reg, carry_op, reg_a(reg + 1)); + } + + emit_br(nfp_prog, code->br_mask, insn->off, 0); + + return 0; +} + +static int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + const struct jmp_code_map *code; + u8 areg, breg; + + code = nfp_jmp_code_get(meta); + if (!code) + return -EINVAL; + + areg = insn->dst_reg * 2; + breg = insn->src_reg * 2; + + if (code->swap) { + areg ^= breg; + breg ^= areg; + areg ^= breg; + } + + emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg)); + if (is_mbpf_jmp64(meta)) + emit_alu(nfp_prog, reg_none(), + reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1)); + emit_br(nfp_prog, code->br_mask, insn->off, 0); + + return 0; +} + +static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out) +{ + emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in, + SHF_SC_R_ROT, 8); + emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out), + SHF_SC_R_ROT, 16); +} + +static void +wrp_mul_u32(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg, + swreg rreg, bool gen_high_half) +{ + emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_1, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_2, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_3, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_4, rreg); + emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_32x32, MUL_LAST, reg_none()); + if (gen_high_half) + emit_mul(nfp_prog, dst_hi, MUL_TYPE_STEP_32x32, MUL_LAST_2, + reg_none()); + else + wrp_immed(nfp_prog, dst_hi, 0); +} + +static void +wrp_mul_u16(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg, + swreg rreg) +{ + emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_1, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_2, rreg); + emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_16x16, MUL_LAST, reg_none()); +} + +static int +wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + bool gen_high_half, bool ropnd_from_reg) +{ + swreg multiplier, multiplicand, dst_hi, dst_lo; + const struct bpf_insn *insn = &meta->insn; + u32 lopnd_max, ropnd_max; + u8 dst_reg; + + dst_reg = insn->dst_reg; + multiplicand = reg_a(dst_reg * 2); + dst_hi = reg_both(dst_reg * 2 + 1); + dst_lo = reg_both(dst_reg * 2); + lopnd_max = meta->umax_dst; + if (ropnd_from_reg) { + multiplier = reg_b(insn->src_reg * 2); + ropnd_max = meta->umax_src; + } else { + u32 imm = insn->imm; + + multiplier = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); + ropnd_max = imm; + } + if (lopnd_max > U16_MAX || ropnd_max > U16_MAX) + wrp_mul_u32(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier, + gen_high_half); + else + wrp_mul_u16(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier); + + return 0; +} + +static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm) +{ + swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst); + struct reciprocal_value_adv rvalue; + u8 pre_shift, exp; + swreg magic; + + if (imm > U32_MAX) { + wrp_immed(nfp_prog, dst_both, 0); + return 0; + } + + /* NOTE: because we are using "reciprocal_value_adv" which doesn't + * support "divisor > (1u << 31)", we need to JIT separate NFP sequence + * to handle such case which actually equals to the result of unsigned + * comparison "dst >= imm" which could be calculated using the following + * NFP sequence: + * + * alu[--, dst, -, imm] + * immed[imm, 0] + * alu[dst, imm, +carry, 0] + * + */ + if (imm > 1U << 31) { + swreg tmp_b = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); + + emit_alu(nfp_prog, reg_none(), dst_a, ALU_OP_SUB, tmp_b); + wrp_immed(nfp_prog, imm_a(nfp_prog), 0); + emit_alu(nfp_prog, dst_both, imm_a(nfp_prog), ALU_OP_ADD_C, + reg_imm(0)); + return 0; + } + + rvalue = reciprocal_value_adv(imm, 32); + exp = rvalue.exp; + if (rvalue.is_wide_m && !(imm & 1)) { + pre_shift = fls(imm & -imm) - 1; + rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift); + } else { + pre_shift = 0; + } + magic = ur_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog)); + if (imm == 1U << exp) { + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, + SHF_SC_R_SHF, exp); + } else if (rvalue.is_wide_m) { + wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a, + magic, true); + emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB, + imm_b(nfp_prog)); + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, + SHF_SC_R_SHF, 1); + emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD, + imm_b(nfp_prog)); + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, + SHF_SC_R_SHF, rvalue.sh - 1); + } else { + if (pre_shift) + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, + dst_b, SHF_SC_R_SHF, pre_shift); + wrp_mul_u32(nfp_prog, dst_both, reg_none(), dst_a, magic, true); + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, + dst_b, SHF_SC_R_SHF, rvalue.sh); + } + + return 0; +} + +static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog); + struct nfp_bpf_cap_adjust_head *adjust_head; + u32 ret_einval, end; + + adjust_head = &nfp_prog->bpf->adjust_head; + + /* Optimized version - 5 vs 14 cycles */ + if (nfp_prog->adjust_head_location != UINT_MAX) { + if (WARN_ON_ONCE(nfp_prog->adjust_head_location != meta->n)) + return -EINVAL; + + emit_alu(nfp_prog, pptr_reg(nfp_prog), + reg_a(2 * 2), ALU_OP_ADD, pptr_reg(nfp_prog)); + emit_alu(nfp_prog, plen_reg(nfp_prog), + plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); + emit_alu(nfp_prog, pv_len(nfp_prog), + pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); + + wrp_immed(nfp_prog, reg_both(0), 0); + wrp_immed(nfp_prog, reg_both(1), 0); + + /* TODO: when adjust head is guaranteed to succeed we can + * also eliminate the following if (r0 == 0) branch. + */ + + return 0; + } + + ret_einval = nfp_prog_current_offset(nfp_prog) + 14; + end = ret_einval + 2; + + /* We need to use a temp because offset is just a part of the pkt ptr */ + emit_alu(nfp_prog, tmp, + reg_a(2 * 2), ALU_OP_ADD_2B, pptr_reg(nfp_prog)); + + /* Validate result will fit within FW datapath constraints */ + emit_alu(nfp_prog, reg_none(), + tmp, ALU_OP_SUB, reg_imm(adjust_head->off_min)); + emit_br(nfp_prog, BR_BLO, ret_einval, 0); + emit_alu(nfp_prog, reg_none(), + reg_imm(adjust_head->off_max), ALU_OP_SUB, tmp); + emit_br(nfp_prog, BR_BLO, ret_einval, 0); + + /* Validate the length is at least ETH_HLEN */ + emit_alu(nfp_prog, tmp_len, + plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); + emit_alu(nfp_prog, reg_none(), + tmp_len, ALU_OP_SUB, reg_imm(ETH_HLEN)); + emit_br(nfp_prog, BR_BMI, ret_einval, 0); + + /* Load the ret code */ + wrp_immed(nfp_prog, reg_both(0), 0); + wrp_immed(nfp_prog, reg_both(1), 0); + + /* Modify the packet metadata */ + emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0); + + /* Skip over the -EINVAL ret code (defer 2) */ + emit_br(nfp_prog, BR_UNC, end, 2); + + emit_alu(nfp_prog, plen_reg(nfp_prog), + plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); + emit_alu(nfp_prog, pv_len(nfp_prog), + pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); + + /* return -EINVAL target */ + if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval)) + return -EINVAL; + + wrp_immed(nfp_prog, reg_both(0), -22); + wrp_immed(nfp_prog, reg_both(1), ~0); + + if (!nfp_prog_confirm_current_offset(nfp_prog, end)) + return -EINVAL; + + return 0; +} + +static int adjust_tail(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u32 ret_einval, end; + swreg plen, delta; + + BUILD_BUG_ON(plen_reg(nfp_prog) != reg_b(STATIC_REG_PKT_LEN)); + + plen = imm_a(nfp_prog); + delta = reg_a(2 * 2); + + ret_einval = nfp_prog_current_offset(nfp_prog) + 9; + end = nfp_prog_current_offset(nfp_prog) + 11; + + /* Calculate resulting length */ + emit_alu(nfp_prog, plen, plen_reg(nfp_prog), ALU_OP_ADD, delta); + /* delta == 0 is not allowed by the kernel, add must overflow to make + * length smaller. + */ + emit_br(nfp_prog, BR_BCC, ret_einval, 0); + + /* if (new_len < 14) then -EINVAL */ + emit_alu(nfp_prog, reg_none(), plen, ALU_OP_SUB, reg_imm(ETH_HLEN)); + emit_br(nfp_prog, BR_BMI, ret_einval, 0); + + emit_alu(nfp_prog, plen_reg(nfp_prog), + plen_reg(nfp_prog), ALU_OP_ADD, delta); + emit_alu(nfp_prog, pv_len(nfp_prog), + pv_len(nfp_prog), ALU_OP_ADD, delta); + + emit_br(nfp_prog, BR_UNC, end, 2); + wrp_immed(nfp_prog, reg_both(0), 0); + wrp_immed(nfp_prog, reg_both(1), 0); + + if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval)) + return -EINVAL; + + wrp_immed(nfp_prog, reg_both(0), -22); + wrp_immed(nfp_prog, reg_both(1), ~0); + + if (!nfp_prog_confirm_current_offset(nfp_prog, end)) + return -EINVAL; + + return 0; +} + +static int +map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + bool load_lm_ptr; + u32 ret_tgt; + s64 lm_off; + + /* We only have to reload LM0 if the key is not at start of stack */ + lm_off = nfp_prog->stack_frame_depth; + lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off; + load_lm_ptr = meta->arg2.var_off || lm_off; + + /* Set LM0 to start of key */ + if (load_lm_ptr) + emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0); + if (meta->func_id == BPF_FUNC_map_update_elem) + emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2); + + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id, + 2, RELO_BR_HELPER); + ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; + + /* Load map ID into A0 */ + wrp_mov(nfp_prog, reg_a(0), reg_a(2)); + + /* Load the return address into B0 */ + wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL); + + if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) + return -EINVAL; + + /* Reset the LM0 pointer */ + if (!load_lm_ptr) + return 0; + + emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0); + wrp_nops(nfp_prog, 3); + + return 0; +} + +static int +nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + __emit_csr_rd(nfp_prog, NFP_CSR_PSEUDO_RND_NUM); + /* CSR value is read in following immed[gpr, 0] */ + emit_immed(nfp_prog, reg_both(0), 0, + IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B); + emit_immed(nfp_prog, reg_both(1), 0, + IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B); + return 0; +} + +static int +nfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + swreg ptr_type; + u32 ret_tgt; + + ptr_type = ur_load_imm_any(nfp_prog, meta->arg1.type, imm_a(nfp_prog)); + + ret_tgt = nfp_prog_current_offset(nfp_prog) + 3; + + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id, + 2, RELO_BR_HELPER); + + /* Load ptr type into A1 */ + wrp_mov(nfp_prog, reg_a(1), ptr_type); + + /* Load the return address into B0 */ + wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL); + + if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) + return -EINVAL; + + return 0; +} + +static int +nfp_queue_select(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u32 jmp_tgt; + + jmp_tgt = nfp_prog_current_offset(nfp_prog) + 5; + + /* Make sure the queue id fits into FW field */ + emit_alu(nfp_prog, reg_none(), reg_a(meta->insn.src_reg * 2), + ALU_OP_AND_NOT_B, reg_imm(0xff)); + emit_br(nfp_prog, BR_BEQ, jmp_tgt, 2); + + /* Set the 'queue selected' bit and the queue value */ + emit_shf(nfp_prog, pv_qsel_set(nfp_prog), + pv_qsel_set(nfp_prog), SHF_OP_OR, reg_imm(1), + SHF_SC_L_SHF, PKT_VEL_QSEL_SET_BIT); + emit_ld_field(nfp_prog, + pv_qsel_val(nfp_prog), 0x1, reg_b(meta->insn.src_reg * 2), + SHF_SC_NONE, 0); + /* Delay slots end here, we will jump over next instruction if queue + * value fits into the field. + */ + emit_ld_field(nfp_prog, + pv_qsel_val(nfp_prog), 0x1, reg_imm(NFP_NET_RXR_MAX), + SHF_SC_NONE, 0); + + if (!nfp_prog_confirm_current_offset(nfp_prog, jmp_tgt)) + return -EINVAL; + + return 0; +} + +/* --- Callbacks --- */ +static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + u8 src = insn->src_reg * 2; + + if (insn->src_reg == BPF_REG_10) { + swreg stack_depth_reg; + + stack_depth_reg = ur_load_imm_any(nfp_prog, + nfp_prog->stack_frame_depth, + stack_imm(nfp_prog)); + emit_alu(nfp_prog, reg_both(dst), stack_reg(nfp_prog), + ALU_OP_ADD, stack_depth_reg); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + } else { + wrp_reg_mov(nfp_prog, dst, src); + wrp_reg_mov(nfp_prog, dst + 1, src + 1); + } + + return 0; +} + +static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u64 imm = meta->insn.imm; /* sign extend */ + + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U); + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32); + + return 0; +} + +static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR); +} + +static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm); +} + +static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND); +} + +static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); +} + +static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR); +} + +static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); +} + +static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), + reg_a(insn->dst_reg * 2), ALU_OP_ADD, + reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C, + reg_b(insn->src_reg * 2 + 1)); + + return 0; +} + +static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32); + + return 0; +} + +static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), + reg_a(insn->dst_reg * 2), ALU_OP_SUB, + reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C, + reg_b(insn->src_reg * 2 + 1)); + + return 0; +} + +static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32); + + return 0; +} + +static int mul_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_mul(nfp_prog, meta, true, true); +} + +static int mul_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_mul(nfp_prog, meta, true, false); +} + +static int div_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + return wrp_div_imm(nfp_prog, insn->dst_reg * 2, insn->imm); +} + +static int div_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + /* NOTE: verifier hook has rejected cases for which verifier doesn't + * know whether the source operand is constant or not. + */ + return wrp_div_imm(nfp_prog, meta->insn.dst_reg * 2, meta->umin_src); +} + +static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0), + ALU_OP_SUB, reg_b(insn->dst_reg * 2)); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0), + ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1)); + + return 0; +} + +/* Pseudo code: + * if shift_amt >= 32 + * dst_high = dst_low << shift_amt[4:0] + * dst_low = 0; + * else + * dst_high = (dst_high, dst_low) >> (32 - shift_amt) + * dst_low = dst_low << shift_amt + * + * The indirect shift will use the same logic at runtime. + */ +static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) +{ + if (!shift_amt) + return 0; + + if (shift_amt < 32) { + emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), + SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF, + 32 - shift_amt); + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_L_SHF, shift_amt); + } else if (shift_amt == 32) { + wrp_reg_mov(nfp_prog, dst + 1, dst); + wrp_immed(nfp_prog, reg_both(dst), 0); + } else if (shift_amt > 32) { + emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_L_SHF, shift_amt - 32); + wrp_immed(nfp_prog, reg_both(dst), 0); + } + + return 0; +} + +static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + + return __shl_imm64(nfp_prog, dst, insn->imm); +} + +static void shl_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, imm_both(nfp_prog), reg_imm(32), ALU_OP_SUB, + reg_b(src)); + emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_DSHF); +} + +/* NOTE: for indirect left shift, HIGH part should be calculated first. */ +static void shl_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_L_SHF); +} + +static void shl_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + shl_reg64_lt32_high(nfp_prog, dst, src); + shl_reg64_lt32_low(nfp_prog, dst, src); +} + +static void shl_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_L_SHF); + wrp_immed(nfp_prog, reg_both(dst), 0); +} + +static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + + dst = insn->dst_reg * 2; + umin = meta->umin_src; + umax = meta->umax_src; + if (umin == umax) + return __shl_imm64(nfp_prog, dst, umin); + + src = insn->src_reg * 2; + if (umax < 32) { + shl_reg64_lt32(nfp_prog, dst, src); + } else if (umin >= 32) { + shl_reg64_ge32(nfp_prog, dst, src); + } else { + /* Generate different instruction sequences depending on runtime + * value of shift amount. + */ + u16 label_ge32, label_end; + + label_ge32 = nfp_prog_current_offset(nfp_prog) + 7; + emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0); + + shl_reg64_lt32_high(nfp_prog, dst, src); + label_end = nfp_prog_current_offset(nfp_prog) + 6; + emit_br(nfp_prog, BR_UNC, label_end, 2); + /* shl_reg64_lt32_low packed in delay slot. */ + shl_reg64_lt32_low(nfp_prog, dst, src); + + if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32)) + return -EINVAL; + shl_reg64_ge32(nfp_prog, dst, src); + + if (!nfp_prog_confirm_current_offset(nfp_prog, label_end)) + return -EINVAL; + } + + return 0; +} + +/* Pseudo code: + * if shift_amt >= 32 + * dst_high = 0; + * dst_low = dst_high >> shift_amt[4:0] + * else + * dst_high = dst_high >> shift_amt + * dst_low = (dst_high, dst_low) >> shift_amt + * + * The indirect shift will use the same logic at runtime. + */ +static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) +{ + if (!shift_amt) + return 0; + + if (shift_amt < 32) { + emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_DSHF, shift_amt); + emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, + reg_b(dst + 1), SHF_SC_R_SHF, shift_amt); + } else if (shift_amt == 32) { + wrp_reg_mov(nfp_prog, dst, dst + 1); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + } else if (shift_amt > 32) { + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + } + + return 0; +} + +static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + + return __shr_imm64(nfp_prog, dst, insn->imm); +} + +/* NOTE: for indirect right shift, LOW part should be calculated first. */ +static void shr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, + reg_b(dst + 1), SHF_SC_R_SHF); +} + +static void shr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_DSHF); +} + +static void shr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + shr_reg64_lt32_low(nfp_prog, dst, src); + shr_reg64_lt32_high(nfp_prog, dst, src); +} + +static void shr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst + 1), SHF_SC_R_SHF); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); +} + +static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + + dst = insn->dst_reg * 2; + umin = meta->umin_src; + umax = meta->umax_src; + if (umin == umax) + return __shr_imm64(nfp_prog, dst, umin); + + src = insn->src_reg * 2; + if (umax < 32) { + shr_reg64_lt32(nfp_prog, dst, src); + } else if (umin >= 32) { + shr_reg64_ge32(nfp_prog, dst, src); + } else { + /* Generate different instruction sequences depending on runtime + * value of shift amount. + */ + u16 label_ge32, label_end; + + label_ge32 = nfp_prog_current_offset(nfp_prog) + 6; + emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0); + shr_reg64_lt32_low(nfp_prog, dst, src); + label_end = nfp_prog_current_offset(nfp_prog) + 6; + emit_br(nfp_prog, BR_UNC, label_end, 2); + /* shr_reg64_lt32_high packed in delay slot. */ + shr_reg64_lt32_high(nfp_prog, dst, src); + + if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32)) + return -EINVAL; + shr_reg64_ge32(nfp_prog, dst, src); + + if (!nfp_prog_confirm_current_offset(nfp_prog, label_end)) + return -EINVAL; + } + + return 0; +} + +/* Code logic is the same as __shr_imm64 except ashr requires signedness bit + * told through PREV_ALU result. + */ +static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) +{ + if (!shift_amt) + return 0; + + if (shift_amt < 32) { + emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_DSHF, shift_amt); + /* Set signedness bit. */ + emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR, + reg_imm(0)); + emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF, shift_amt); + } else if (shift_amt == 32) { + /* NOTE: this also helps setting signedness bit. */ + wrp_reg_mov(nfp_prog, dst, dst + 1); + emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF, 31); + } else if (shift_amt > 32) { + emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR, + reg_imm(0)); + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32); + emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF, 31); + } + + return 0; +} + +static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + + return __ashr_imm64(nfp_prog, dst, insn->imm); +} + +static void ashr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + /* NOTE: the first insn will set both indirect shift amount (source A) + * and signedness bit (MSB of result). + */ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1)); + emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF); +} + +static void ashr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + /* NOTE: it is the same as logic shift because we don't need to shift in + * signedness bit when the shift amount is less than 32. + */ + return shr_reg64_lt32_low(nfp_prog, dst, src); +} + +static void ashr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + ashr_reg64_lt32_low(nfp_prog, dst, src); + ashr_reg64_lt32_high(nfp_prog, dst, src); +} + +static void ashr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1)); + emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF); + emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF, 31); +} + +/* Like ashr_imm64, but need to use indirect shift. */ +static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + + dst = insn->dst_reg * 2; + umin = meta->umin_src; + umax = meta->umax_src; + if (umin == umax) + return __ashr_imm64(nfp_prog, dst, umin); + + src = insn->src_reg * 2; + if (umax < 32) { + ashr_reg64_lt32(nfp_prog, dst, src); + } else if (umin >= 32) { + ashr_reg64_ge32(nfp_prog, dst, src); + } else { + u16 label_ge32, label_end; + + label_ge32 = nfp_prog_current_offset(nfp_prog) + 6; + emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0); + ashr_reg64_lt32_low(nfp_prog, dst, src); + label_end = nfp_prog_current_offset(nfp_prog) + 6; + emit_br(nfp_prog, BR_UNC, label_end, 2); + /* ashr_reg64_lt32_high packed in delay slot. */ + ashr_reg64_lt32_high(nfp_prog, dst, src); + + if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32)) + return -EINVAL; + ashr_reg64_ge32(nfp_prog, dst, src); + + if (!nfp_prog_confirm_current_offset(nfp_prog, label_end)) + return -EINVAL; + } + + return 0; +} + +static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR); +} + +static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR); +} + +static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND); +} + +static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND); +} + +static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR); +} + +static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR); +} + +static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD); +} + +static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD); +} + +static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB); +} + +static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB); +} + +static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_mul(nfp_prog, meta, false, true); +} + +static int mul_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_mul(nfp_prog, meta, false, false); +} + +static int div_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return div_reg64(nfp_prog, meta); +} + +static int div_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return div_imm64(nfp_prog, meta); +} + +static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u8 dst = meta->insn.dst_reg * 2; + + emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst)); + wrp_zext(nfp_prog, meta, dst); + + return 0; +} + +static int +__ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst, + u8 shift_amt) +{ + if (shift_amt) { + /* Set signedness bit (MSB of result). */ + emit_alu(nfp_prog, reg_none(), reg_a(dst), ALU_OP_OR, + reg_imm(0)); + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, + reg_b(dst), SHF_SC_R_SHF, shift_amt); + } + wrp_zext(nfp_prog, meta, dst); + + return 0; +} + +static int ashr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + + dst = insn->dst_reg * 2; + umin = meta->umin_src; + umax = meta->umax_src; + if (umin == umax) + return __ashr_imm(nfp_prog, meta, dst, umin); + + src = insn->src_reg * 2; + /* NOTE: the first insn will set both indirect shift amount (source A) + * and signedness bit (MSB of result). + */ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst)); + emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, + reg_b(dst), SHF_SC_R_SHF); + wrp_zext(nfp_prog, meta, dst); + + return 0; +} + +static int ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + + return __ashr_imm(nfp_prog, meta, dst, insn->imm); +} + +static int +__shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst, + u8 shift_amt) +{ + if (shift_amt) + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_SHF, shift_amt); + wrp_zext(nfp_prog, meta, dst); + return 0; +} + +static int shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + + return __shr_imm(nfp_prog, meta, dst, insn->imm); +} + +static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + + dst = insn->dst_reg * 2; + umin = meta->umin_src; + umax = meta->umax_src; + if (umin == umax) + return __shr_imm(nfp_prog, meta, dst, umin); + + src = insn->src_reg * 2; + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_SHF); + wrp_zext(nfp_prog, meta, dst); + return 0; +} + +static int +__shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst, + u8 shift_amt) +{ + if (shift_amt) + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_L_SHF, shift_amt); + wrp_zext(nfp_prog, meta, dst); + return 0; +} + +static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + + return __shl_imm(nfp_prog, meta, dst, insn->imm); +} + +static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + + dst = insn->dst_reg * 2; + umin = meta->umin_src; + umax = meta->umax_src; + if (umin == umax) + return __shl_imm(nfp_prog, meta, dst, umin); + + src = insn->src_reg * 2; + shl_reg64_lt32_low(nfp_prog, dst, src); + wrp_zext(nfp_prog, meta, dst); + return 0; +} + +static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 gpr = insn->dst_reg * 2; + + switch (insn->imm) { + case 16: + emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr), + SHF_SC_R_ROT, 8); + emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr), + SHF_SC_R_SHF, 16); + + wrp_immed(nfp_prog, reg_both(gpr + 1), 0); + break; + case 32: + wrp_end32(nfp_prog, reg_a(gpr), gpr); + wrp_immed(nfp_prog, reg_both(gpr + 1), 0); + break; + case 64: + wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1)); + + wrp_end32(nfp_prog, reg_a(gpr), gpr + 1); + wrp_end32(nfp_prog, imm_a(nfp_prog), gpr); + break; + } + + return 0; +} + +static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + struct nfp_insn_meta *prev = nfp_meta_prev(meta); + u32 imm_lo, imm_hi; + u8 dst; + + dst = prev->insn.dst_reg * 2; + imm_lo = prev->insn.imm; + imm_hi = meta->insn.imm; + + wrp_immed(nfp_prog, reg_both(dst), imm_lo); + + /* mov is always 1 insn, load imm may be two, so try to use mov */ + if (imm_hi == imm_lo) + wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst)); + else + wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi); + + return 0; +} + +static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + meta->double_cb = imm_ld8_part2; + return 0; +} + +static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta, meta->insn.imm, 1); +} + +static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta, meta->insn.imm, 2); +} + +static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta, meta->insn.imm, 4); +} + +static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm, + meta->insn.src_reg * 2, 1); +} + +static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm, + meta->insn.src_reg * 2, 2); +} + +static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm, + meta->insn.src_reg * 2, 4); +} + +static int +mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size, unsigned int ptr_off) +{ + return mem_op_stack(nfp_prog, meta, size, ptr_off, + meta->insn.dst_reg * 2, meta->insn.src_reg * 2, + true, wrp_lmem_load); +} + +static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u8 size) +{ + swreg dst = reg_both(meta->insn.dst_reg * 2); + + switch (meta->insn.off) { + case offsetof(struct __sk_buff, len): + if (size != sizeof_field(struct __sk_buff, len)) + return -EOPNOTSUPP; + wrp_mov(nfp_prog, dst, plen_reg(nfp_prog)); + break; + case offsetof(struct __sk_buff, data): + if (size != sizeof_field(struct __sk_buff, data)) + return -EOPNOTSUPP; + wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); + break; + case offsetof(struct __sk_buff, data_end): + if (size != sizeof_field(struct __sk_buff, data_end)) + return -EOPNOTSUPP; + emit_alu(nfp_prog, dst, + plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); + break; + default: + return -EOPNOTSUPP; + } + + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return 0; +} + +static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u8 size) +{ + swreg dst = reg_both(meta->insn.dst_reg * 2); + + switch (meta->insn.off) { + case offsetof(struct xdp_md, data): + if (size != sizeof_field(struct xdp_md, data)) + return -EOPNOTSUPP; + wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); + break; + case offsetof(struct xdp_md, data_end): + if (size != sizeof_field(struct xdp_md, data_end)) + return -EOPNOTSUPP; + emit_alu(nfp_prog, dst, + plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); + break; + default: + return -EOPNOTSUPP; + } + + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return 0; +} + +static int +mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + swreg tmp_reg; + + tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + + return data_ld_host_order_addr32(nfp_prog, meta, meta->insn.src_reg * 2, + tmp_reg, meta->insn.dst_reg * 2, size); +} + +static int +mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + swreg tmp_reg; + + tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + + return data_ld_host_order_addr40(nfp_prog, meta, meta->insn.src_reg * 2, + tmp_reg, meta->insn.dst_reg * 2, size); +} + +static void +mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta) +{ + s16 range_start = meta->pkt_cache.range_start; + s16 range_end = meta->pkt_cache.range_end; + swreg src_base, off; + u8 xfer_num, len; + bool indir; + + off = re_load_imm_any(nfp_prog, range_start, imm_b(nfp_prog)); + src_base = reg_a(meta->insn.src_reg * 2); + len = range_end - range_start; + xfer_num = round_up(len, REG_WIDTH) / REG_WIDTH; + + indir = len > 8 * REG_WIDTH; + /* Setup PREV_ALU for indirect mode. */ + if (indir) + wrp_immed(nfp_prog, reg_none(), + CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); + + /* Cache memory into transfer-in registers. */ + emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base, + off, xfer_num - 1, CMD_CTX_SWAP, indir); +} + +static int +mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta, + unsigned int size) +{ + s16 range_start = meta->pkt_cache.range_start; + s16 insn_off = meta->insn.off - range_start; + swreg dst_lo, dst_hi, src_lo, src_mid; + u8 dst_gpr = meta->insn.dst_reg * 2; + u8 len_lo = size, len_mid = 0; + u8 idx = insn_off / REG_WIDTH; + u8 off = insn_off % REG_WIDTH; + + dst_hi = reg_both(dst_gpr + 1); + dst_lo = reg_both(dst_gpr); + src_lo = reg_xfer(idx); + + /* The read length could involve as many as three registers. */ + if (size > REG_WIDTH - off) { + /* Calculate the part in the second register. */ + len_lo = REG_WIDTH - off; + len_mid = size - len_lo; + + /* Calculate the part in the third register. */ + if (size > 2 * REG_WIDTH - off) + len_mid = REG_WIDTH; + } + + wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off); + + if (!len_mid) { + wrp_zext(nfp_prog, meta, dst_gpr); + return 0; + } + + src_mid = reg_xfer(idx + 1); + + if (size <= REG_WIDTH) { + wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo); + wrp_zext(nfp_prog, meta, dst_gpr); + } else { + swreg src_hi = reg_xfer(idx + 2); + + wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, + REG_WIDTH - len_lo, len_lo); + wrp_reg_subpart(nfp_prog, dst_hi, src_mid, len_lo, + REG_WIDTH - len_lo); + wrp_reg_or_subpart(nfp_prog, dst_hi, src_hi, REG_WIDTH - len_lo, + len_lo); + } + + return 0; +} + +static int +mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta, + unsigned int size) +{ + swreg dst_lo, dst_hi, src_lo; + u8 dst_gpr, idx; + + idx = (meta->insn.off - meta->pkt_cache.range_start) / REG_WIDTH; + dst_gpr = meta->insn.dst_reg * 2; + dst_hi = reg_both(dst_gpr + 1); + dst_lo = reg_both(dst_gpr); + src_lo = reg_xfer(idx); + + if (size < REG_WIDTH) { + wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0); + wrp_zext(nfp_prog, meta, dst_gpr); + } else if (size == REG_WIDTH) { + wrp_mov(nfp_prog, dst_lo, src_lo); + wrp_zext(nfp_prog, meta, dst_gpr); + } else { + swreg src_hi = reg_xfer(idx + 1); + + wrp_mov(nfp_prog, dst_lo, src_lo); + wrp_mov(nfp_prog, dst_hi, src_hi); + } + + return 0; +} + +static int +mem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta, unsigned int size) +{ + u8 off = meta->insn.off - meta->pkt_cache.range_start; + + if (IS_ALIGNED(off, REG_WIDTH)) + return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size); + + return mem_ldx_data_from_pktcache_unaligned(nfp_prog, meta, size); +} + +static int +mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + if (meta->ldst_gather_len) + return nfp_cpp_memcpy(nfp_prog, meta); + + if (meta->ptr.type == PTR_TO_CTX) { + if (nfp_prog->type == BPF_PROG_TYPE_XDP) + return mem_ldx_xdp(nfp_prog, meta, size); + else + return mem_ldx_skb(nfp_prog, meta, size); + } + + if (meta->ptr.type == PTR_TO_PACKET) { + if (meta->pkt_cache.range_end) { + if (meta->pkt_cache.do_init) + mem_ldx_data_init_pktcache(nfp_prog, meta); + + return mem_ldx_data_from_pktcache(nfp_prog, meta, size); + } else { + return mem_ldx_data(nfp_prog, meta, size); + } + } + + if (meta->ptr.type == PTR_TO_STACK) + return mem_ldx_stack(nfp_prog, meta, size, + meta->ptr.off + meta->ptr.var_off.value); + + if (meta->ptr.type == PTR_TO_MAP_VALUE) + return mem_ldx_emem(nfp_prog, meta, size); + + return -EOPNOTSUPP; +} + +static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_ldx(nfp_prog, meta, 1); +} + +static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_ldx(nfp_prog, meta, 2); +} + +static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_ldx(nfp_prog, meta, 4); +} + +static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_ldx(nfp_prog, meta, 8); +} + +static int +mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + u64 imm = meta->insn.imm; /* sign extend */ + swreg off_reg; + + off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + + return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg, + imm, size); +} + +static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + if (meta->ptr.type == PTR_TO_PACKET) + return mem_st_data(nfp_prog, meta, size); + + return -EOPNOTSUPP; +} + +static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_st(nfp_prog, meta, 1); +} + +static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_st(nfp_prog, meta, 2); +} + +static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_st(nfp_prog, meta, 4); +} + +static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_st(nfp_prog, meta, 8); +} + +static int +mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + swreg off_reg; + + off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + + return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg, + meta->insn.src_reg * 2, size); +} + +static int +mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size, unsigned int ptr_off) +{ + return mem_op_stack(nfp_prog, meta, size, ptr_off, + meta->insn.src_reg * 2, meta->insn.dst_reg * 2, + false, wrp_lmem_store); +} + +static int mem_stx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + switch (meta->insn.off) { + case offsetof(struct xdp_md, rx_queue_index): + return nfp_queue_select(nfp_prog, meta); + } + + WARN_ON_ONCE(1); /* verifier should have rejected bad accesses */ + return -EOPNOTSUPP; +} + +static int +mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + if (meta->ptr.type == PTR_TO_PACKET) + return mem_stx_data(nfp_prog, meta, size); + + if (meta->ptr.type == PTR_TO_STACK) + return mem_stx_stack(nfp_prog, meta, size, + meta->ptr.off + meta->ptr.var_off.value); + + return -EOPNOTSUPP; +} + +static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_stx(nfp_prog, meta, 1); +} + +static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_stx(nfp_prog, meta, 2); +} + +static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->ptr.type == PTR_TO_CTX) + if (nfp_prog->type == BPF_PROG_TYPE_XDP) + return mem_stx_xdp(nfp_prog, meta); + return mem_stx(nfp_prog, meta, 4); +} + +static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_stx(nfp_prog, meta, 8); +} + +static int +mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64) +{ + u8 dst_gpr = meta->insn.dst_reg * 2; + u8 src_gpr = meta->insn.src_reg * 2; + unsigned int full_add, out; + swreg addra, addrb, off; + + off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + + /* We can fit 16 bits into command immediate, if we know the immediate + * is guaranteed to either always or never fit into 16 bit we only + * generate code to handle that particular case, otherwise generate + * code for both. + */ + out = nfp_prog_current_offset(nfp_prog); + full_add = nfp_prog_current_offset(nfp_prog); + + if (meta->insn.off) { + out += 2; + full_add += 2; + } + if (meta->xadd_maybe_16bit) { + out += 3; + full_add += 3; + } + if (meta->xadd_over_16bit) + out += 2 + is64; + if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) { + out += 5; + full_add += 5; + } + + /* Generate the branch for choosing add_imm vs add */ + if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) { + swreg max_imm = imm_a(nfp_prog); + + wrp_immed(nfp_prog, max_imm, 0xffff); + emit_alu(nfp_prog, reg_none(), + max_imm, ALU_OP_SUB, reg_b(src_gpr)); + emit_alu(nfp_prog, reg_none(), + reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1)); + emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0); + /* defer for add */ + } + + /* If insn has an offset add to the address */ + if (!meta->insn.off) { + addra = reg_a(dst_gpr); + addrb = reg_b(dst_gpr + 1); + } else { + emit_alu(nfp_prog, imma_a(nfp_prog), + reg_a(dst_gpr), ALU_OP_ADD, off); + emit_alu(nfp_prog, imma_b(nfp_prog), + reg_a(dst_gpr + 1), ALU_OP_ADD_C, reg_imm(0)); + addra = imma_a(nfp_prog); + addrb = imma_b(nfp_prog); + } + + /* Generate the add_imm if 16 bits are possible */ + if (meta->xadd_maybe_16bit) { + swreg prev_alu = imm_a(nfp_prog); + + wrp_immed(nfp_prog, prev_alu, + FIELD_PREP(CMD_OVE_DATA, 2) | + CMD_OVE_LEN | + FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2)); + wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2); + emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0, + addra, addrb, 0, CMD_CTX_NO_SWAP); + + if (meta->xadd_over_16bit) + emit_br(nfp_prog, BR_UNC, out, 0); + } + + if (!nfp_prog_confirm_current_offset(nfp_prog, full_add)) + return -EINVAL; + + /* Generate the add if 16 bits are not guaranteed */ + if (meta->xadd_over_16bit) { + emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0, + addra, addrb, is64 << 2, + is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1); + + wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr)); + if (is64) + wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1)); + } + + if (!nfp_prog_confirm_current_offset(nfp_prog, out)) + return -EINVAL; + + return 0; +} + +static int mem_atomic4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->insn.imm != BPF_ADD) + return -EOPNOTSUPP; + + return mem_xadd(nfp_prog, meta, false); +} + +static int mem_atomic8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->insn.imm != BPF_ADD) + return -EOPNOTSUPP; + + return mem_xadd(nfp_prog, meta, true); +} + +static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + emit_br(nfp_prog, BR_UNC, meta->insn.off, 0); + + return 0; +} + +static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + swreg or1, or2, tmp_reg; + + or1 = reg_a(insn->dst_reg * 2); + or2 = reg_b(insn->dst_reg * 2 + 1); + + if (imm & ~0U) { + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_a(nfp_prog), + reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); + or1 = imm_a(nfp_prog); + } + + if (imm >> 32) { + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_b(nfp_prog), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); + or2 = imm_b(nfp_prog); + } + + emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2); + emit_br(nfp_prog, BR_BEQ, insn->off, 0); + + return 0; +} + +static int jeq32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + swreg tmp_reg; + + tmp_reg = ur_load_imm_any(nfp_prog, insn->imm, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); + emit_br(nfp_prog, BR_BEQ, insn->off, 0); + + return 0; +} + +static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u8 dst_gpr = insn->dst_reg * 2; + swreg tmp_reg; + + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_b(nfp_prog), + reg_a(dst_gpr), ALU_OP_AND, tmp_reg); + /* Upper word of the mask can only be 0 or ~0 from sign extension, + * so either ignore it or OR the whole thing in. + */ + if (is_mbpf_jmp64(meta) && imm >> 32) { + emit_alu(nfp_prog, reg_none(), + reg_a(dst_gpr + 1), ALU_OP_OR, imm_b(nfp_prog)); + } + emit_br(nfp_prog, BR_BNE, insn->off, 0); + + return 0; +} + +static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + bool is_jmp32 = is_mbpf_jmp32(meta); + swreg tmp_reg; + + if (!imm) { + if (is_jmp32) + emit_alu(nfp_prog, reg_none(), reg_none(), ALU_OP_NONE, + reg_b(insn->dst_reg * 2)); + else + emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), + ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + return 0; + } + + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + + if (is_jmp32) + return 0; + + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + + return 0; +} + +static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2), + ALU_OP_XOR, reg_b(insn->src_reg * 2)); + if (is_mbpf_jmp64(meta)) { + emit_alu(nfp_prog, imm_b(nfp_prog), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, + reg_b(insn->src_reg * 2 + 1)); + emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, + imm_b(nfp_prog)); + } + emit_br(nfp_prog, BR_BEQ, insn->off, 0); + + return 0; +} + +static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE); +} + +static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE); +} + +static int +bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u32 ret_tgt, stack_depth, offset_br; + swreg tmp_reg; + + stack_depth = round_up(nfp_prog->stack_frame_depth, STACK_FRAME_ALIGN); + /* Space for saving the return address is accounted for by the callee, + * so stack_depth can be zero for the main function. + */ + if (stack_depth) { + tmp_reg = ur_load_imm_any(nfp_prog, stack_depth, + stack_imm(nfp_prog)); + emit_alu(nfp_prog, stack_reg(nfp_prog), + stack_reg(nfp_prog), ALU_OP_ADD, tmp_reg); + emit_csr_wr(nfp_prog, stack_reg(nfp_prog), + NFP_CSR_ACT_LM_ADDR0); + } + + /* Two cases for jumping to the callee: + * + * - If callee uses and needs to save R6~R9 then: + * 1. Put the start offset of the callee into imm_b(). This will + * require a fixup step, as we do not necessarily know this + * address yet. + * 2. Put the return address from the callee to the caller into + * register ret_reg(). + * 3. (After defer slots are consumed) Jump to the subroutine that + * pushes the registers to the stack. + * The subroutine acts as a trampoline, and returns to the address in + * imm_b(), i.e. jumps to the callee. + * + * - If callee does not need to save R6~R9 then just load return + * address to the caller in ret_reg(), and jump to the callee + * directly. + * + * Using ret_reg() to pass the return address to the callee is set here + * as a convention. The callee can then push this address onto its + * stack frame in its prologue. The advantages of passing the return + * address through ret_reg(), instead of pushing it to the stack right + * here, are the following: + * - It looks cleaner. + * - If the called function is called multiple time, we get a lower + * program size. + * - We save two no-op instructions that should be added just before + * the emit_br() when stack depth is not null otherwise. + * - If we ever find a register to hold the return address during whole + * execution of the callee, we will not have to push the return + * address to the stack for leaf functions. + */ + if (!meta->jmp_dst) { + pr_err("BUG: BPF-to-BPF call has no destination recorded\n"); + return -ELOOP; + } + if (nfp_prog->subprog[meta->jmp_dst->subprog_idx].needs_reg_push) { + ret_tgt = nfp_prog_current_offset(nfp_prog) + 3; + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, + RELO_BR_GO_CALL_PUSH_REGS); + offset_br = nfp_prog_current_offset(nfp_prog); + wrp_immed_relo(nfp_prog, imm_b(nfp_prog), 0, RELO_IMMED_REL); + } else { + ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; + emit_br(nfp_prog, BR_UNC, meta->insn.imm, 1); + offset_br = nfp_prog_current_offset(nfp_prog); + } + wrp_immed_relo(nfp_prog, ret_reg(nfp_prog), ret_tgt, RELO_IMMED_REL); + + if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) + return -EINVAL; + + if (stack_depth) { + tmp_reg = ur_load_imm_any(nfp_prog, stack_depth, + stack_imm(nfp_prog)); + emit_alu(nfp_prog, stack_reg(nfp_prog), + stack_reg(nfp_prog), ALU_OP_SUB, tmp_reg); + emit_csr_wr(nfp_prog, stack_reg(nfp_prog), + NFP_CSR_ACT_LM_ADDR0); + wrp_nops(nfp_prog, 3); + } + + meta->num_insns_after_br = nfp_prog_current_offset(nfp_prog); + meta->num_insns_after_br -= offset_br; + + return 0; +} + +static int helper_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + switch (meta->insn.imm) { + case BPF_FUNC_xdp_adjust_head: + return adjust_head(nfp_prog, meta); + case BPF_FUNC_xdp_adjust_tail: + return adjust_tail(nfp_prog, meta); + case BPF_FUNC_map_lookup_elem: + case BPF_FUNC_map_update_elem: + case BPF_FUNC_map_delete_elem: + return map_call_stack_common(nfp_prog, meta); + case BPF_FUNC_get_prandom_u32: + return nfp_get_prandom_u32(nfp_prog, meta); + case BPF_FUNC_perf_event_output: + return nfp_perf_event_output(nfp_prog, meta); + default: + WARN_ONCE(1, "verifier allowed unsupported function\n"); + return -EOPNOTSUPP; + } +} + +static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (is_mbpf_pseudo_call(meta)) + return bpf_to_bpf_call(nfp_prog, meta); + else + return helper_call(nfp_prog, meta); +} + +static bool nfp_is_main_function(struct nfp_insn_meta *meta) +{ + return meta->subprog_idx == 0; +} + +static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT); + + return 0; +} + +static int +nfp_subprog_epilogue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (nfp_prog->subprog[meta->subprog_idx].needs_reg_push) { + /* Pop R6~R9 to the stack via related subroutine. + * We loaded the return address to the caller into ret_reg(). + * This means that the subroutine does not come back here, we + * make it jump back to the subprogram caller directly! + */ + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 1, + RELO_BR_GO_CALL_POP_REGS); + /* Pop return address from the stack. */ + wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0)); + } else { + /* Pop return address from the stack. */ + wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0)); + /* Jump back to caller if no callee-saved registers were used + * by the subprogram. + */ + emit_rtn(nfp_prog, ret_reg(nfp_prog), 0); + } + + return 0; +} + +static int jmp_exit(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (nfp_is_main_function(meta)) + return goto_out(nfp_prog, meta); + else + return nfp_subprog_epilogue(nfp_prog, meta); +} + +static const instr_cb_t instr_cb[256] = { + [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64, + [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64, + [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64, + [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64, + [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64, + [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64, + [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64, + [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64, + [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64, + [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64, + [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, + [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, + [BPF_ALU64 | BPF_MUL | BPF_X] = mul_reg64, + [BPF_ALU64 | BPF_MUL | BPF_K] = mul_imm64, + [BPF_ALU64 | BPF_DIV | BPF_X] = div_reg64, + [BPF_ALU64 | BPF_DIV | BPF_K] = div_imm64, + [BPF_ALU64 | BPF_NEG] = neg_reg64, + [BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64, + [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, + [BPF_ALU64 | BPF_RSH | BPF_X] = shr_reg64, + [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64, + [BPF_ALU64 | BPF_ARSH | BPF_X] = ashr_reg64, + [BPF_ALU64 | BPF_ARSH | BPF_K] = ashr_imm64, + [BPF_ALU | BPF_MOV | BPF_X] = mov_reg, + [BPF_ALU | BPF_MOV | BPF_K] = mov_imm, + [BPF_ALU | BPF_XOR | BPF_X] = xor_reg, + [BPF_ALU | BPF_XOR | BPF_K] = xor_imm, + [BPF_ALU | BPF_AND | BPF_X] = and_reg, + [BPF_ALU | BPF_AND | BPF_K] = and_imm, + [BPF_ALU | BPF_OR | BPF_X] = or_reg, + [BPF_ALU | BPF_OR | BPF_K] = or_imm, + [BPF_ALU | BPF_ADD | BPF_X] = add_reg, + [BPF_ALU | BPF_ADD | BPF_K] = add_imm, + [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, + [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, + [BPF_ALU | BPF_MUL | BPF_X] = mul_reg, + [BPF_ALU | BPF_MUL | BPF_K] = mul_imm, + [BPF_ALU | BPF_DIV | BPF_X] = div_reg, + [BPF_ALU | BPF_DIV | BPF_K] = div_imm, + [BPF_ALU | BPF_NEG] = neg_reg, + [BPF_ALU | BPF_LSH | BPF_X] = shl_reg, + [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, + [BPF_ALU | BPF_RSH | BPF_X] = shr_reg, + [BPF_ALU | BPF_RSH | BPF_K] = shr_imm, + [BPF_ALU | BPF_ARSH | BPF_X] = ashr_reg, + [BPF_ALU | BPF_ARSH | BPF_K] = ashr_imm, + [BPF_ALU | BPF_END | BPF_X] = end_reg32, + [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8, + [BPF_LD | BPF_ABS | BPF_B] = data_ld1, + [BPF_LD | BPF_ABS | BPF_H] = data_ld2, + [BPF_LD | BPF_ABS | BPF_W] = data_ld4, + [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1, + [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2, + [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4, + [BPF_LDX | BPF_MEM | BPF_B] = mem_ldx1, + [BPF_LDX | BPF_MEM | BPF_H] = mem_ldx2, + [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, + [BPF_LDX | BPF_MEM | BPF_DW] = mem_ldx8, + [BPF_STX | BPF_MEM | BPF_B] = mem_stx1, + [BPF_STX | BPF_MEM | BPF_H] = mem_stx2, + [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, + [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8, + [BPF_STX | BPF_ATOMIC | BPF_W] = mem_atomic4, + [BPF_STX | BPF_ATOMIC | BPF_DW] = mem_atomic8, + [BPF_ST | BPF_MEM | BPF_B] = mem_st1, + [BPF_ST | BPF_MEM | BPF_H] = mem_st2, + [BPF_ST | BPF_MEM | BPF_W] = mem_st4, + [BPF_ST | BPF_MEM | BPF_DW] = mem_st8, + [BPF_JMP | BPF_JA | BPF_K] = jump, + [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, + [BPF_JMP | BPF_JGT | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JGE | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JLT | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JLE | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JSGT | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JSGE | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JSLT | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JSLE | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JSET | BPF_K] = jset_imm, + [BPF_JMP | BPF_JNE | BPF_K] = jne_imm, + [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg, + [BPF_JMP | BPF_JGT | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JGE | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JLT | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JLE | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JSGT | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JSGE | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JSLT | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JSLE | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, + [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, + [BPF_JMP32 | BPF_JEQ | BPF_K] = jeq32_imm, + [BPF_JMP32 | BPF_JGT | BPF_K] = cmp_imm, + [BPF_JMP32 | BPF_JGE | BPF_K] = cmp_imm, + [BPF_JMP32 | BPF_JLT | BPF_K] = cmp_imm, + [BPF_JMP32 | BPF_JLE | BPF_K] = cmp_imm, + [BPF_JMP32 | BPF_JSGT | BPF_K] =cmp_imm, + [BPF_JMP32 | BPF_JSGE | BPF_K] =cmp_imm, + [BPF_JMP32 | BPF_JSLT | BPF_K] =cmp_imm, + [BPF_JMP32 | BPF_JSLE | BPF_K] =cmp_imm, + [BPF_JMP32 | BPF_JSET | BPF_K] =jset_imm, + [BPF_JMP32 | BPF_JNE | BPF_K] = jne_imm, + [BPF_JMP32 | BPF_JEQ | BPF_X] = jeq_reg, + [BPF_JMP32 | BPF_JGT | BPF_X] = cmp_reg, + [BPF_JMP32 | BPF_JGE | BPF_X] = cmp_reg, + [BPF_JMP32 | BPF_JLT | BPF_X] = cmp_reg, + [BPF_JMP32 | BPF_JLE | BPF_X] = cmp_reg, + [BPF_JMP32 | BPF_JSGT | BPF_X] =cmp_reg, + [BPF_JMP32 | BPF_JSGE | BPF_X] =cmp_reg, + [BPF_JMP32 | BPF_JSLT | BPF_X] =cmp_reg, + [BPF_JMP32 | BPF_JSLE | BPF_X] =cmp_reg, + [BPF_JMP32 | BPF_JSET | BPF_X] =jset_reg, + [BPF_JMP32 | BPF_JNE | BPF_X] = jne_reg, + [BPF_JMP | BPF_CALL] = call, + [BPF_JMP | BPF_EXIT] = jmp_exit, +}; + +/* --- Assembler logic --- */ +static int +nfp_fixup_immed_relo(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + struct nfp_insn_meta *jmp_dst, u32 br_idx) +{ + if (immed_get_value(nfp_prog->prog[br_idx + 1])) { + pr_err("BUG: failed to fix up callee register saving\n"); + return -EINVAL; + } + + immed_set_value(&nfp_prog->prog[br_idx + 1], jmp_dst->off); + + return 0; +} + +static int nfp_fixup_branches(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta, *jmp_dst; + u32 idx, br_idx; + int err; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + if (meta->flags & FLAG_INSN_SKIP_MASK) + continue; + if (!is_mbpf_jmp(meta)) + continue; + if (meta->insn.code == (BPF_JMP | BPF_EXIT) && + !nfp_is_main_function(meta)) + continue; + if (is_mbpf_helper_call(meta)) + continue; + + if (list_is_last(&meta->l, &nfp_prog->insns)) + br_idx = nfp_prog->last_bpf_off; + else + br_idx = list_next_entry(meta, l)->off - 1; + + /* For BPF-to-BPF function call, a stack adjustment sequence is + * generated after the return instruction. Therefore, we must + * withdraw the length of this sequence to have br_idx pointing + * to where the "branch" NFP instruction is expected to be. + */ + if (is_mbpf_pseudo_call(meta)) + br_idx -= meta->num_insns_after_br; + + if (!nfp_is_br(nfp_prog->prog[br_idx])) { + pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n", + br_idx, meta->insn.code, nfp_prog->prog[br_idx]); + return -ELOOP; + } + + if (meta->insn.code == (BPF_JMP | BPF_EXIT)) + continue; + + /* Leave special branches for later */ + if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) != + RELO_BR_REL && !is_mbpf_pseudo_call(meta)) + continue; + + if (!meta->jmp_dst) { + pr_err("Non-exit jump doesn't have destination info recorded!!\n"); + return -ELOOP; + } + + jmp_dst = meta->jmp_dst; + + if (jmp_dst->flags & FLAG_INSN_SKIP_PREC_DEPENDENT) { + pr_err("Branch landing on removed instruction!!\n"); + return -ELOOP; + } + + if (is_mbpf_pseudo_call(meta) && + nfp_prog->subprog[jmp_dst->subprog_idx].needs_reg_push) { + err = nfp_fixup_immed_relo(nfp_prog, meta, + jmp_dst, br_idx); + if (err) + return err; + } + + if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) != + RELO_BR_REL) + continue; + + for (idx = meta->off; idx <= br_idx; idx++) { + if (!nfp_is_br(nfp_prog->prog[idx])) + continue; + br_set_offset(&nfp_prog->prog[idx], jmp_dst->off); + } + } + + return 0; +} + +static void nfp_intro(struct nfp_prog *nfp_prog) +{ + wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0)); + emit_alu(nfp_prog, plen_reg(nfp_prog), + plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog)); +} + +static void +nfp_subprog_prologue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + /* Save return address into the stack. */ + wrp_mov(nfp_prog, reg_lm(0, 0), ret_reg(nfp_prog)); +} + +static void +nfp_start_subprog(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + unsigned int depth = nfp_prog->subprog[meta->subprog_idx].stack_depth; + + nfp_prog->stack_frame_depth = round_up(depth, 4); + nfp_subprog_prologue(nfp_prog, meta); +} + +bool nfp_is_subprog_start(struct nfp_insn_meta *meta) +{ + return meta->flags & FLAG_INSN_IS_SUBPROG_START; +} + +static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) +{ + /* TC direct-action mode: + * 0,1 ok NOT SUPPORTED[1] + * 2 drop 0x22 -> drop, count as stat1 + * 4,5 nuke 0x02 -> drop + * 7 redir 0x44 -> redir, count as stat2 + * * unspec 0x11 -> pass, count as stat0 + * + * [1] We can't support OK and RECLASSIFY because we can't tell TC + * the exact decision made. We are forced to support UNSPEC + * to handle aborts so that's the only one we handle for passing + * packets up the stack. + */ + /* Target for aborts */ + nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); + + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); + + wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); + + /* Target for normal exits */ + nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); + + /* if R0 > 7 jump to abort */ + emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0)); + emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); + wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); + + wrp_immed(nfp_prog, reg_b(2), 0x41221211); + wrp_immed(nfp_prog, reg_b(3), 0x41001211); + + emit_shf(nfp_prog, reg_a(1), + reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2); + + emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); + emit_shf(nfp_prog, reg_a(2), + reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); + + emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); + emit_shf(nfp_prog, reg_b(2), + reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0); + + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); + + emit_shf(nfp_prog, reg_b(2), + reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); +} + +static void nfp_outro_xdp(struct nfp_prog *nfp_prog) +{ + /* XDP return codes: + * 0 aborted 0x82 -> drop, count as stat3 + * 1 drop 0x22 -> drop, count as stat1 + * 2 pass 0x11 -> pass, count as stat0 + * 3 tx 0x44 -> redir, count as stat2 + * * unknown 0x82 -> drop, count as stat3 + */ + /* Target for aborts */ + nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); + + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); + + wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16); + + /* Target for normal exits */ + nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); + + /* if R0 > 3 jump to abort */ + emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0)); + emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); + + wrp_immed(nfp_prog, reg_b(2), 0x44112282); + + emit_shf(nfp_prog, reg_a(1), + reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3); + + emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); + emit_shf(nfp_prog, reg_b(2), + reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); + + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); + + wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); +} + +static bool nfp_prog_needs_callee_reg_save(struct nfp_prog *nfp_prog) +{ + unsigned int idx; + + for (idx = 1; idx < nfp_prog->subprog_cnt; idx++) + if (nfp_prog->subprog[idx].needs_reg_push) + return true; + + return false; +} + +static void nfp_push_callee_registers(struct nfp_prog *nfp_prog) +{ + u8 reg; + + /* Subroutine: Save all callee saved registers (R6 ~ R9). + * imm_b() holds the return address. + */ + nfp_prog->tgt_call_push_regs = nfp_prog_current_offset(nfp_prog); + for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) { + u8 adj = (reg - BPF_REG_0) * 2; + u8 idx = (reg - BPF_REG_6) * 2; + + /* The first slot in the stack frame is used to push the return + * address in bpf_to_bpf_call(), start just after. + */ + wrp_mov(nfp_prog, reg_lm(0, 1 + idx), reg_b(adj)); + + if (reg == BPF_REG_8) + /* Prepare to jump back, last 3 insns use defer slots */ + emit_rtn(nfp_prog, imm_b(nfp_prog), 3); + + wrp_mov(nfp_prog, reg_lm(0, 1 + idx + 1), reg_b(adj + 1)); + } +} + +static void nfp_pop_callee_registers(struct nfp_prog *nfp_prog) +{ + u8 reg; + + /* Subroutine: Restore all callee saved registers (R6 ~ R9). + * ret_reg() holds the return address. + */ + nfp_prog->tgt_call_pop_regs = nfp_prog_current_offset(nfp_prog); + for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) { + u8 adj = (reg - BPF_REG_0) * 2; + u8 idx = (reg - BPF_REG_6) * 2; + + /* The first slot in the stack frame holds the return address, + * start popping just after that. + */ + wrp_mov(nfp_prog, reg_both(adj), reg_lm(0, 1 + idx)); + + if (reg == BPF_REG_8) + /* Prepare to jump back, last 3 insns use defer slots */ + emit_rtn(nfp_prog, ret_reg(nfp_prog), 3); + + wrp_mov(nfp_prog, reg_both(adj + 1), reg_lm(0, 1 + idx + 1)); + } +} + +static void nfp_outro(struct nfp_prog *nfp_prog) +{ + switch (nfp_prog->type) { + case BPF_PROG_TYPE_SCHED_CLS: + nfp_outro_tc_da(nfp_prog); + break; + case BPF_PROG_TYPE_XDP: + nfp_outro_xdp(nfp_prog); + break; + default: + WARN_ON(1); + } + + if (!nfp_prog_needs_callee_reg_save(nfp_prog)) + return; + + nfp_push_callee_registers(nfp_prog); + nfp_pop_callee_registers(nfp_prog); +} + +static int nfp_translate(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + unsigned int depth; + int err; + + depth = nfp_prog->subprog[0].stack_depth; + nfp_prog->stack_frame_depth = round_up(depth, 4); + + nfp_intro(nfp_prog); + if (nfp_prog->error) + return nfp_prog->error; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + instr_cb_t cb = instr_cb[meta->insn.code]; + + meta->off = nfp_prog_current_offset(nfp_prog); + + if (nfp_is_subprog_start(meta)) { + nfp_start_subprog(nfp_prog, meta); + if (nfp_prog->error) + return nfp_prog->error; + } + + if (meta->flags & FLAG_INSN_SKIP_MASK) { + nfp_prog->n_translated++; + continue; + } + + if (nfp_meta_has_prev(nfp_prog, meta) && + nfp_meta_prev(meta)->double_cb) + cb = nfp_meta_prev(meta)->double_cb; + if (!cb) + return -ENOENT; + err = cb(nfp_prog, meta); + if (err) + return err; + if (nfp_prog->error) + return nfp_prog->error; + + nfp_prog->n_translated++; + } + + nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1; + + nfp_outro(nfp_prog); + if (nfp_prog->error) + return nfp_prog->error; + + wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW); + if (nfp_prog->error) + return nfp_prog->error; + + return nfp_fixup_branches(nfp_prog); +} + +/* --- Optimizations --- */ +static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + struct bpf_insn insn = meta->insn; + + /* Programs converted from cBPF start with register xoring */ + if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) && + insn.src_reg == insn.dst_reg) + continue; + + /* Programs start with R6 = R1 but we ignore the skb pointer */ + if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) && + insn.src_reg == 1 && insn.dst_reg == 6) + meta->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; + + /* Return as soon as something doesn't match */ + if (!(meta->flags & FLAG_INSN_SKIP_MASK)) + return; + } +} + +/* abs(insn.imm) will fit better into unrestricted reg immediate - + * convert add/sub of a negative number into a sub/add of a positive one. + */ +static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + struct bpf_insn insn = meta->insn; + + if (meta->flags & FLAG_INSN_SKIP_MASK) + continue; + + if (!is_mbpf_alu(meta) && !is_mbpf_jmp(meta)) + continue; + if (BPF_SRC(insn.code) != BPF_K) + continue; + if (insn.imm >= 0) + continue; + + if (is_mbpf_jmp(meta)) { + switch (BPF_OP(insn.code)) { + case BPF_JGE: + case BPF_JSGE: + case BPF_JLT: + case BPF_JSLT: + meta->jump_neg_op = true; + break; + default: + continue; + } + } else { + if (BPF_OP(insn.code) == BPF_ADD) + insn.code = BPF_CLASS(insn.code) | BPF_SUB; + else if (BPF_OP(insn.code) == BPF_SUB) + insn.code = BPF_CLASS(insn.code) | BPF_ADD; + else + continue; + + meta->insn.code = insn.code | BPF_K; + } + + meta->insn.imm = -insn.imm; + } +} + +/* Remove masking after load since our load guarantees this is not needed */ +static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta1, *meta2; + static const s32 exp_mask[] = { + [BPF_B] = 0x000000ffU, + [BPF_H] = 0x0000ffffU, + [BPF_W] = 0xffffffffU, + }; + + nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { + struct bpf_insn insn, next; + + insn = meta1->insn; + next = meta2->insn; + + if (BPF_CLASS(insn.code) != BPF_LD) + continue; + if (BPF_MODE(insn.code) != BPF_ABS && + BPF_MODE(insn.code) != BPF_IND) + continue; + + if (next.code != (BPF_ALU64 | BPF_AND | BPF_K)) + continue; + + if (!exp_mask[BPF_SIZE(insn.code)]) + continue; + if (exp_mask[BPF_SIZE(insn.code)] != next.imm) + continue; + + if (next.src_reg || next.dst_reg) + continue; + + if (meta2->flags & FLAG_INSN_IS_JUMP_DST) + continue; + + meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; + } +} + +static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta1, *meta2, *meta3; + + nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) { + struct bpf_insn insn, next1, next2; + + insn = meta1->insn; + next1 = meta2->insn; + next2 = meta3->insn; + + if (BPF_CLASS(insn.code) != BPF_LD) + continue; + if (BPF_MODE(insn.code) != BPF_ABS && + BPF_MODE(insn.code) != BPF_IND) + continue; + if (BPF_SIZE(insn.code) != BPF_W) + continue; + + if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) && + next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) && + !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) && + next2.code == (BPF_LSH | BPF_K | BPF_ALU64))) + continue; + + if (next1.src_reg || next1.dst_reg || + next2.src_reg || next2.dst_reg) + continue; + + if (next1.imm != 0x20 || next2.imm != 0x20) + continue; + + if (meta2->flags & FLAG_INSN_IS_JUMP_DST || + meta3->flags & FLAG_INSN_IS_JUMP_DST) + continue; + + meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; + meta3->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; + } +} + +/* load/store pair that forms memory copy sould look like the following: + * + * ld_width R, [addr_src + offset_src] + * st_width [addr_dest + offset_dest], R + * + * The destination register of load and source register of store should + * be the same, load and store should also perform at the same width. + * If either of addr_src or addr_dest is stack pointer, we don't do the + * CPP optimization as stack is modelled by registers on NFP. + */ +static bool +curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta, + struct nfp_insn_meta *st_meta) +{ + struct bpf_insn *ld = &ld_meta->insn; + struct bpf_insn *st = &st_meta->insn; + + if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta)) + return false; + + if (ld_meta->ptr.type != PTR_TO_PACKET && + ld_meta->ptr.type != PTR_TO_MAP_VALUE) + return false; + + if (st_meta->ptr.type != PTR_TO_PACKET) + return false; + + if (BPF_SIZE(ld->code) != BPF_SIZE(st->code)) + return false; + + if (ld->dst_reg != st->src_reg) + return false; + + /* There is jump to the store insn in this pair. */ + if (st_meta->flags & FLAG_INSN_IS_JUMP_DST) + return false; + + return true; +} + +/* Currently, we only support chaining load/store pairs if: + * + * - Their address base registers are the same. + * - Their address offsets are in the same order. + * - They operate at the same memory width. + * - There is no jump into the middle of them. + */ +static bool +curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta, + struct nfp_insn_meta *st_meta, + struct bpf_insn *prev_ld, + struct bpf_insn *prev_st) +{ + u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst; + struct bpf_insn *ld = &ld_meta->insn; + struct bpf_insn *st = &st_meta->insn; + s16 prev_ld_off, prev_st_off; + + /* This pair is the start pair. */ + if (!prev_ld) + return true; + + prev_size = BPF_LDST_BYTES(prev_ld); + curr_size = BPF_LDST_BYTES(ld); + prev_ld_base = prev_ld->src_reg; + prev_st_base = prev_st->dst_reg; + prev_ld_dst = prev_ld->dst_reg; + prev_ld_off = prev_ld->off; + prev_st_off = prev_st->off; + + if (ld->dst_reg != prev_ld_dst) + return false; + + if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base) + return false; + + if (curr_size != prev_size) + return false; + + /* There is jump to the head of this pair. */ + if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST) + return false; + + /* Both in ascending order. */ + if (prev_ld_off + prev_size == ld->off && + prev_st_off + prev_size == st->off) + return true; + + /* Both in descending order. */ + if (ld->off + curr_size == prev_ld_off && + st->off + curr_size == prev_st_off) + return true; + + return false; +} + +/* Return TRUE if cross memory access happens. Cross memory access means + * store area is overlapping with load area that a later load might load + * the value from previous store, for this case we can't treat the sequence + * as an memory copy. + */ +static bool +cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta, + struct nfp_insn_meta *head_st_meta) +{ + s16 head_ld_off, head_st_off, ld_off; + + /* Different pointer types does not overlap. */ + if (head_ld_meta->ptr.type != head_st_meta->ptr.type) + return false; + + /* load and store are both PTR_TO_PACKET, check ID info. */ + if (head_ld_meta->ptr.id != head_st_meta->ptr.id) + return true; + + /* Canonicalize the offsets. Turn all of them against the original + * base register. + */ + head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off; + head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off; + ld_off = ld->off + head_ld_meta->ptr.off; + + /* Ascending order cross. */ + if (ld_off > head_ld_off && + head_ld_off < head_st_off && ld_off >= head_st_off) + return true; + + /* Descending order cross. */ + if (ld_off < head_ld_off && + head_ld_off > head_st_off && ld_off <= head_st_off) + return true; + + return false; +} + +/* This pass try to identify the following instructoin sequences. + * + * load R, [regA + offA] + * store [regB + offB], R + * load R, [regA + offA + const_imm_A] + * store [regB + offB + const_imm_A], R + * load R, [regA + offA + 2 * const_imm_A] + * store [regB + offB + 2 * const_imm_A], R + * ... + * + * Above sequence is typically generated by compiler when lowering + * memcpy. NFP prefer using CPP instructions to accelerate it. + */ +static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *head_ld_meta = NULL; + struct nfp_insn_meta *head_st_meta = NULL; + struct nfp_insn_meta *meta1, *meta2; + struct bpf_insn *prev_ld = NULL; + struct bpf_insn *prev_st = NULL; + u8 count = 0; + + nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { + struct bpf_insn *ld = &meta1->insn; + struct bpf_insn *st = &meta2->insn; + + /* Reset record status if any of the following if true: + * - The current insn pair is not load/store. + * - The load/store pair doesn't chain with previous one. + * - The chained load/store pair crossed with previous pair. + * - The chained load/store pair has a total size of memory + * copy beyond 128 bytes which is the maximum length a + * single NFP CPP command can transfer. + */ + if (!curr_pair_is_memcpy(meta1, meta2) || + !curr_pair_chain_with_previous(meta1, meta2, prev_ld, + prev_st) || + (head_ld_meta && (cross_mem_access(ld, head_ld_meta, + head_st_meta) || + head_ld_meta->ldst_gather_len >= 128))) { + if (!count) + continue; + + if (count > 1) { + s16 prev_ld_off = prev_ld->off; + s16 prev_st_off = prev_st->off; + s16 head_ld_off = head_ld_meta->insn.off; + + if (prev_ld_off < head_ld_off) { + head_ld_meta->insn.off = prev_ld_off; + head_st_meta->insn.off = prev_st_off; + head_ld_meta->ldst_gather_len = + -head_ld_meta->ldst_gather_len; + } + + head_ld_meta->paired_st = &head_st_meta->insn; + head_st_meta->flags |= + FLAG_INSN_SKIP_PREC_DEPENDENT; + } else { + head_ld_meta->ldst_gather_len = 0; + } + + /* If the chain is ended by an load/store pair then this + * could serve as the new head of the next chain. + */ + if (curr_pair_is_memcpy(meta1, meta2)) { + head_ld_meta = meta1; + head_st_meta = meta2; + head_ld_meta->ldst_gather_len = + BPF_LDST_BYTES(ld); + meta1 = nfp_meta_next(meta1); + meta2 = nfp_meta_next(meta2); + prev_ld = ld; + prev_st = st; + count = 1; + } else { + head_ld_meta = NULL; + head_st_meta = NULL; + prev_ld = NULL; + prev_st = NULL; + count = 0; + } + + continue; + } + + if (!head_ld_meta) { + head_ld_meta = meta1; + head_st_meta = meta2; + } else { + meta1->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; + meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; + } + + head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld); + meta1 = nfp_meta_next(meta1); + meta2 = nfp_meta_next(meta2); + prev_ld = ld; + prev_st = st; + count++; + } +} + +static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta, *range_node = NULL; + s16 range_start = 0, range_end = 0; + bool cache_avail = false; + struct bpf_insn *insn; + s32 range_ptr_off = 0; + u32 range_ptr_id = 0; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + if (meta->flags & FLAG_INSN_IS_JUMP_DST) + cache_avail = false; + + if (meta->flags & FLAG_INSN_SKIP_MASK) + continue; + + insn = &meta->insn; + + if (is_mbpf_store_pkt(meta) || + insn->code == (BPF_JMP | BPF_CALL) || + is_mbpf_classic_store_pkt(meta) || + is_mbpf_classic_load(meta)) { + cache_avail = false; + continue; + } + + if (!is_mbpf_load(meta)) + continue; + + if (meta->ptr.type != PTR_TO_PACKET || meta->ldst_gather_len) { + cache_avail = false; + continue; + } + + if (!cache_avail) { + cache_avail = true; + if (range_node) + goto end_current_then_start_new; + goto start_new; + } + + /* Check ID to make sure two reads share the same + * variable offset against PTR_TO_PACKET, and check OFF + * to make sure they also share the same constant + * offset. + * + * OFFs don't really need to be the same, because they + * are the constant offsets against PTR_TO_PACKET, so + * for different OFFs, we could canonicalize them to + * offsets against original packet pointer. We don't + * support this. + */ + if (meta->ptr.id == range_ptr_id && + meta->ptr.off == range_ptr_off) { + s16 new_start = range_start; + s16 end, off = insn->off; + s16 new_end = range_end; + bool changed = false; + + if (off < range_start) { + new_start = off; + changed = true; + } + + end = off + BPF_LDST_BYTES(insn); + if (end > range_end) { + new_end = end; + changed = true; + } + + if (!changed) + continue; + + if (new_end - new_start <= 64) { + /* Install new range. */ + range_start = new_start; + range_end = new_end; + continue; + } + } + +end_current_then_start_new: + range_node->pkt_cache.range_start = range_start; + range_node->pkt_cache.range_end = range_end; +start_new: + range_node = meta; + range_node->pkt_cache.do_init = true; + range_ptr_id = range_node->ptr.id; + range_ptr_off = range_node->ptr.off; + range_start = insn->off; + range_end = insn->off + BPF_LDST_BYTES(insn); + } + + if (range_node) { + range_node->pkt_cache.range_start = range_start; + range_node->pkt_cache.range_end = range_end; + } + + list_for_each_entry(meta, &nfp_prog->insns, l) { + if (meta->flags & FLAG_INSN_SKIP_MASK) + continue; + + if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) { + if (meta->pkt_cache.do_init) { + range_start = meta->pkt_cache.range_start; + range_end = meta->pkt_cache.range_end; + } else { + meta->pkt_cache.range_start = range_start; + meta->pkt_cache.range_end = range_end; + } + } + } +} + +static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) +{ + nfp_bpf_opt_reg_init(nfp_prog); + + nfp_bpf_opt_neg_add_sub(nfp_prog); + nfp_bpf_opt_ld_mask(nfp_prog); + nfp_bpf_opt_ld_shift(nfp_prog); + nfp_bpf_opt_ldst_gather(nfp_prog); + nfp_bpf_opt_pkt_cache(nfp_prog); + + return 0; +} + +static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta1, *meta2; + struct nfp_bpf_map *nfp_map; + struct bpf_map *map; + u32 id; + + nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { + if (meta1->flags & FLAG_INSN_SKIP_MASK || + meta2->flags & FLAG_INSN_SKIP_MASK) + continue; + + if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) || + meta1->insn.src_reg != BPF_PSEUDO_MAP_FD) + continue; + + map = (void *)(unsigned long)((u32)meta1->insn.imm | + (u64)meta2->insn.imm << 32); + if (bpf_map_offload_neutral(map)) { + id = map->id; + } else { + nfp_map = map_to_offmap(map)->dev_priv; + id = nfp_map->tid; + } + + meta1->insn.imm = id; + meta2->insn.imm = 0; + } + + return 0; +} + +static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len) +{ + __le64 *ustore = (__force __le64 *)prog; + int i; + + for (i = 0; i < len; i++) { + int err; + + err = nfp_ustore_check_valid_no_ecc(prog[i]); + if (err) + return err; + + ustore[i] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog[i])); + } + + return 0; +} + +static void nfp_bpf_prog_trim(struct nfp_prog *nfp_prog) +{ + void *prog; + + prog = kvmalloc_array(nfp_prog->prog_len, sizeof(u64), GFP_KERNEL); + if (!prog) + return; + + nfp_prog->__prog_alloc_len = nfp_prog->prog_len * sizeof(u64); + memcpy(prog, nfp_prog->prog, nfp_prog->__prog_alloc_len); + kvfree(nfp_prog->prog); + nfp_prog->prog = prog; +} + +int nfp_bpf_jit(struct nfp_prog *nfp_prog) +{ + int ret; + + ret = nfp_bpf_replace_map_ptrs(nfp_prog); + if (ret) + return ret; + + ret = nfp_bpf_optimize(nfp_prog); + if (ret) + return ret; + + ret = nfp_translate(nfp_prog); + if (ret) { + pr_err("Translation failed with error %d (translated: %u)\n", + ret, nfp_prog->n_translated); + return -EINVAL; + } + + nfp_bpf_prog_trim(nfp_prog); + + return ret; +} + +void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + + /* Another pass to record jump information. */ + list_for_each_entry(meta, &nfp_prog->insns, l) { + struct nfp_insn_meta *dst_meta; + u64 code = meta->insn.code; + unsigned int dst_idx; + bool pseudo_call; + + if (!is_mbpf_jmp(meta)) + continue; + if (BPF_OP(code) == BPF_EXIT) + continue; + if (is_mbpf_helper_call(meta)) + continue; + + /* If opcode is BPF_CALL at this point, this can only be a + * BPF-to-BPF call (a.k.a pseudo call). + */ + pseudo_call = BPF_OP(code) == BPF_CALL; + + if (pseudo_call) + dst_idx = meta->n + 1 + meta->insn.imm; + else + dst_idx = meta->n + 1 + meta->insn.off; + + dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_idx); + + if (pseudo_call) + dst_meta->flags |= FLAG_INSN_IS_SUBPROG_START; + + dst_meta->flags |= FLAG_INSN_IS_JUMP_DST; + meta->jmp_dst = dst_meta; + } +} + +bool nfp_bpf_supported_opcode(u8 code) +{ + return !!instr_cb[code]; +} + +void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv) +{ + unsigned int i; + u64 *prog; + int err; + + prog = kmemdup(nfp_prog->prog, nfp_prog->prog_len * sizeof(u64), + GFP_KERNEL); + if (!prog) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < nfp_prog->prog_len; i++) { + enum nfp_relo_type special; + u32 val; + u16 off; + + special = FIELD_GET(OP_RELO_TYPE, prog[i]); + switch (special) { + case RELO_NONE: + continue; + case RELO_BR_REL: + br_add_offset(&prog[i], bv->start_off); + break; + case RELO_BR_GO_OUT: + br_set_offset(&prog[i], + nfp_prog->tgt_out + bv->start_off); + break; + case RELO_BR_GO_ABORT: + br_set_offset(&prog[i], + nfp_prog->tgt_abort + bv->start_off); + break; + case RELO_BR_GO_CALL_PUSH_REGS: + if (!nfp_prog->tgt_call_push_regs) { + pr_err("BUG: failed to detect subprogram registers needs\n"); + err = -EINVAL; + goto err_free_prog; + } + off = nfp_prog->tgt_call_push_regs + bv->start_off; + br_set_offset(&prog[i], off); + break; + case RELO_BR_GO_CALL_POP_REGS: + if (!nfp_prog->tgt_call_pop_regs) { + pr_err("BUG: failed to detect subprogram registers needs\n"); + err = -EINVAL; + goto err_free_prog; + } + off = nfp_prog->tgt_call_pop_regs + bv->start_off; + br_set_offset(&prog[i], off); + break; + case RELO_BR_NEXT_PKT: + br_set_offset(&prog[i], bv->tgt_done); + break; + case RELO_BR_HELPER: + val = br_get_offset(prog[i]); + val -= BR_OFF_RELO; + switch (val) { + case BPF_FUNC_map_lookup_elem: + val = nfp_prog->bpf->helpers.map_lookup; + break; + case BPF_FUNC_map_update_elem: + val = nfp_prog->bpf->helpers.map_update; + break; + case BPF_FUNC_map_delete_elem: + val = nfp_prog->bpf->helpers.map_delete; + break; + case BPF_FUNC_perf_event_output: + val = nfp_prog->bpf->helpers.perf_event_output; + break; + default: + pr_err("relocation of unknown helper %d\n", + val); + err = -EINVAL; + goto err_free_prog; + } + br_set_offset(&prog[i], val); + break; + case RELO_IMMED_REL: + immed_add_value(&prog[i], bv->start_off); + break; + } + + prog[i] &= ~OP_RELO_TYPE; + } + + err = nfp_bpf_ustore_calc(prog, nfp_prog->prog_len); + if (err) + goto err_free_prog; + + return prog; + +err_free_prog: + kfree(prog); + return ERR_PTR(err); +} diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c new file mode 100644 index 000000000..f469950c7 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -0,0 +1,547 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <net/pkt_cls.h> + +#include "../nfpcore/nfp_cpp.h" +#include "../nfpcore/nfp_nffw.h" +#include "../nfpcore/nfp_nsp.h" +#include "../nfp_app.h" +#include "../nfp_main.h" +#include "../nfp_net.h" +#include "../nfp_port.h" +#include "fw.h" +#include "main.h" + +const struct rhashtable_params nfp_bpf_maps_neutral_params = { + .nelem_hint = 4, + .key_len = sizeof_field(struct bpf_map, id), + .key_offset = offsetof(struct nfp_bpf_neutral_map, map_id), + .head_offset = offsetof(struct nfp_bpf_neutral_map, l), + .automatic_shrinking = true, +}; + +static bool nfp_net_ebpf_capable(struct nfp_net *nn) +{ +#ifdef __LITTLE_ENDIAN + struct nfp_app_bpf *bpf = nn->app->priv; + + return nn->cap & NFP_NET_CFG_CTRL_BPF && + bpf->abi_version && + nn_readb(nn, NFP_NET_CFG_BPF_ABI) == bpf->abi_version; +#else + return false; +#endif +} + +static int +nfp_bpf_xdp_offload(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog, struct netlink_ext_ack *extack) +{ + bool running, xdp_running; + + if (!nfp_net_ebpf_capable(nn)) + return -EINVAL; + + running = nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF; + xdp_running = running && nn->xdp_hw.prog; + + if (!prog && !xdp_running) + return 0; + if (prog && running && !xdp_running) + return -EBUSY; + + return nfp_net_bpf_offload(nn, prog, running, extack); +} + +static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn) +{ + return nfp_net_ebpf_capable(nn) ? "BPF" : ""; +} + +static int +nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id) +{ + struct nfp_pf *pf = app->pf; + struct nfp_bpf_vnic *bv; + int err; + + if (!pf->eth_tbl) { + nfp_err(pf->cpp, "No ETH table\n"); + return -EINVAL; + } + if (pf->max_data_vnics != pf->eth_tbl->count) { + nfp_err(pf->cpp, "ETH entries don't match vNICs (%d vs %d)\n", + pf->max_data_vnics, pf->eth_tbl->count); + return -EINVAL; + } + + bv = kzalloc(sizeof(*bv), GFP_KERNEL); + if (!bv) + return -ENOMEM; + nn->app_priv = bv; + + err = nfp_app_nic_vnic_alloc(app, nn, id); + if (err) + goto err_free_priv; + + bv->start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); + bv->tgt_done = nn_readw(nn, NFP_NET_CFG_BPF_DONE); + + return 0; +err_free_priv: + kfree(nn->app_priv); + return err; +} + +static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn) +{ + struct nfp_bpf_vnic *bv = nn->app_priv; + + WARN_ON(bv->tc_prog); + kfree(bv); +} + +static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + struct tc_cls_bpf_offload *cls_bpf = type_data; + struct nfp_net *nn = cb_priv; + struct bpf_prog *oldprog; + struct nfp_bpf_vnic *bv; + int err; + + if (type != TC_SETUP_CLSBPF) { + NL_SET_ERR_MSG_MOD(cls_bpf->common.extack, + "only offload of BPF classifiers supported"); + return -EOPNOTSUPP; + } + if (!tc_cls_can_offload_and_chain0(nn->dp.netdev, &cls_bpf->common)) + return -EOPNOTSUPP; + if (!nfp_net_ebpf_capable(nn)) { + NL_SET_ERR_MSG_MOD(cls_bpf->common.extack, + "NFP firmware does not support eBPF offload"); + return -EOPNOTSUPP; + } + if (cls_bpf->common.protocol != htons(ETH_P_ALL)) { + NL_SET_ERR_MSG_MOD(cls_bpf->common.extack, + "only ETH_P_ALL supported as filter protocol"); + return -EOPNOTSUPP; + } + + /* Only support TC direct action */ + if (!cls_bpf->exts_integrated || + tcf_exts_has_actions(cls_bpf->exts)) { + NL_SET_ERR_MSG_MOD(cls_bpf->common.extack, + "only direct action with no legacy actions supported"); + return -EOPNOTSUPP; + } + + if (cls_bpf->command != TC_CLSBPF_OFFLOAD) + return -EOPNOTSUPP; + + bv = nn->app_priv; + oldprog = cls_bpf->oldprog; + + /* Don't remove if oldprog doesn't match driver's state */ + if (bv->tc_prog != oldprog) { + oldprog = NULL; + if (!cls_bpf->prog) + return 0; + } + + err = nfp_net_bpf_offload(nn, cls_bpf->prog, oldprog, + cls_bpf->common.extack); + if (err) + return err; + + bv->tc_prog = cls_bpf->prog; + nn->port->tc_offload_cnt = !!bv->tc_prog; + return 0; +} + +static LIST_HEAD(nfp_bpf_block_cb_list); + +static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev, + enum tc_setup_type type, void *type_data) +{ + struct nfp_net *nn = netdev_priv(netdev); + + switch (type) { + case TC_SETUP_BLOCK: + return flow_block_cb_setup_simple(type_data, + &nfp_bpf_block_cb_list, + nfp_bpf_setup_tc_block_cb, + nn, nn, true); + default: + return -EOPNOTSUPP; + } +} + +static int +nfp_bpf_check_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu) +{ + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_bpf_vnic *bv; + struct bpf_prog *prog; + + if (~nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) + return 0; + + if (nn->xdp_hw.prog) { + prog = nn->xdp_hw.prog; + } else { + bv = nn->app_priv; + prog = bv->tc_prog; + } + + if (nfp_bpf_offload_check_mtu(nn, prog, new_mtu)) { + nn_info(nn, "BPF offload active, potential packet access beyond hardware packet boundary"); + return -EBUSY; + } + return 0; +} + +static int +nfp_bpf_parse_cap_adjust_head(struct nfp_app_bpf *bpf, void __iomem *value, + u32 length) +{ + struct nfp_bpf_cap_tlv_adjust_head __iomem *cap = value; + struct nfp_cpp *cpp = bpf->app->pf->cpp; + + if (length < sizeof(*cap)) { + nfp_err(cpp, "truncated adjust_head TLV: %d\n", length); + return -EINVAL; + } + + bpf->adjust_head.flags = readl(&cap->flags); + bpf->adjust_head.off_min = readl(&cap->off_min); + bpf->adjust_head.off_max = readl(&cap->off_max); + bpf->adjust_head.guaranteed_sub = readl(&cap->guaranteed_sub); + bpf->adjust_head.guaranteed_add = readl(&cap->guaranteed_add); + + if (bpf->adjust_head.off_min > bpf->adjust_head.off_max) { + nfp_err(cpp, "invalid adjust_head TLV: min > max\n"); + return -EINVAL; + } + if (!FIELD_FIT(UR_REG_IMM_MAX, bpf->adjust_head.off_min) || + !FIELD_FIT(UR_REG_IMM_MAX, bpf->adjust_head.off_max)) { + nfp_warn(cpp, "disabling adjust_head - driver expects min/max to fit in as immediates\n"); + memset(&bpf->adjust_head, 0, sizeof(bpf->adjust_head)); + return 0; + } + + return 0; +} + +static int +nfp_bpf_parse_cap_func(struct nfp_app_bpf *bpf, void __iomem *value, u32 length) +{ + struct nfp_bpf_cap_tlv_func __iomem *cap = value; + + if (length < sizeof(*cap)) { + nfp_err(bpf->app->cpp, "truncated function TLV: %d\n", length); + return -EINVAL; + } + + switch (readl(&cap->func_id)) { + case BPF_FUNC_map_lookup_elem: + bpf->helpers.map_lookup = readl(&cap->func_addr); + break; + case BPF_FUNC_map_update_elem: + bpf->helpers.map_update = readl(&cap->func_addr); + break; + case BPF_FUNC_map_delete_elem: + bpf->helpers.map_delete = readl(&cap->func_addr); + break; + case BPF_FUNC_perf_event_output: + bpf->helpers.perf_event_output = readl(&cap->func_addr); + break; + } + + return 0; +} + +static int +nfp_bpf_parse_cap_maps(struct nfp_app_bpf *bpf, void __iomem *value, u32 length) +{ + struct nfp_bpf_cap_tlv_maps __iomem *cap = value; + + if (length < sizeof(*cap)) { + nfp_err(bpf->app->cpp, "truncated maps TLV: %d\n", length); + return -EINVAL; + } + + bpf->maps.types = readl(&cap->types); + bpf->maps.max_maps = readl(&cap->max_maps); + bpf->maps.max_elems = readl(&cap->max_elems); + bpf->maps.max_key_sz = readl(&cap->max_key_sz); + bpf->maps.max_val_sz = readl(&cap->max_val_sz); + bpf->maps.max_elem_sz = readl(&cap->max_elem_sz); + + return 0; +} + +static int +nfp_bpf_parse_cap_random(struct nfp_app_bpf *bpf, void __iomem *value, + u32 length) +{ + bpf->pseudo_random = true; + return 0; +} + +static int +nfp_bpf_parse_cap_qsel(struct nfp_app_bpf *bpf, void __iomem *value, u32 length) +{ + bpf->queue_select = true; + return 0; +} + +static int +nfp_bpf_parse_cap_adjust_tail(struct nfp_app_bpf *bpf, void __iomem *value, + u32 length) +{ + bpf->adjust_tail = true; + return 0; +} + +static int +nfp_bpf_parse_cap_cmsg_multi_ent(struct nfp_app_bpf *bpf, void __iomem *value, + u32 length) +{ + bpf->cmsg_multi_ent = true; + return 0; +} + +static int +nfp_bpf_parse_cap_abi_version(struct nfp_app_bpf *bpf, void __iomem *value, + u32 length) +{ + if (length < 4) { + nfp_err(bpf->app->cpp, "truncated ABI version TLV: %d\n", + length); + return -EINVAL; + } + + bpf->abi_version = readl(value); + if (bpf->abi_version < 2 || bpf->abi_version > 3) { + nfp_warn(bpf->app->cpp, "unsupported BPF ABI version: %d\n", + bpf->abi_version); + bpf->abi_version = 0; + } + + return 0; +} + +static int nfp_bpf_parse_capabilities(struct nfp_app *app) +{ + struct nfp_cpp *cpp = app->pf->cpp; + struct nfp_cpp_area *area; + u8 __iomem *mem, *start; + + mem = nfp_rtsym_map(app->pf->rtbl, "_abi_bpf_capabilities", "bpf.cap", + 8, &area); + if (IS_ERR(mem)) + return PTR_ERR(mem) == -ENOENT ? 0 : PTR_ERR(mem); + + start = mem; + while (mem - start + 8 <= nfp_cpp_area_size(area)) { + u8 __iomem *value; + u32 type, length; + + type = readl(mem); + length = readl(mem + 4); + value = mem + 8; + + mem += 8 + length; + if (mem - start > nfp_cpp_area_size(area)) + goto err_release_free; + + switch (type) { + case NFP_BPF_CAP_TYPE_FUNC: + if (nfp_bpf_parse_cap_func(app->priv, value, length)) + goto err_release_free; + break; + case NFP_BPF_CAP_TYPE_ADJUST_HEAD: + if (nfp_bpf_parse_cap_adjust_head(app->priv, value, + length)) + goto err_release_free; + break; + case NFP_BPF_CAP_TYPE_MAPS: + if (nfp_bpf_parse_cap_maps(app->priv, value, length)) + goto err_release_free; + break; + case NFP_BPF_CAP_TYPE_RANDOM: + if (nfp_bpf_parse_cap_random(app->priv, value, length)) + goto err_release_free; + break; + case NFP_BPF_CAP_TYPE_QUEUE_SELECT: + if (nfp_bpf_parse_cap_qsel(app->priv, value, length)) + goto err_release_free; + break; + case NFP_BPF_CAP_TYPE_ADJUST_TAIL: + if (nfp_bpf_parse_cap_adjust_tail(app->priv, value, + length)) + goto err_release_free; + break; + case NFP_BPF_CAP_TYPE_ABI_VERSION: + if (nfp_bpf_parse_cap_abi_version(app->priv, value, + length)) + goto err_release_free; + break; + case NFP_BPF_CAP_TYPE_CMSG_MULTI_ENT: + if (nfp_bpf_parse_cap_cmsg_multi_ent(app->priv, value, + length)) + goto err_release_free; + break; + default: + nfp_dbg(cpp, "unknown BPF capability: %d\n", type); + break; + } + } + if (mem - start != nfp_cpp_area_size(area)) { + nfp_err(cpp, "BPF capabilities left after parsing, parsed:%zd total length:%zu\n", + mem - start, nfp_cpp_area_size(area)); + goto err_release_free; + } + + nfp_cpp_area_release_free(area); + + return 0; + +err_release_free: + nfp_err(cpp, "invalid BPF capabilities at offset:%zd\n", mem - start); + nfp_cpp_area_release_free(area); + return -EINVAL; +} + +static void nfp_bpf_init_capabilities(struct nfp_app_bpf *bpf) +{ + bpf->abi_version = 2; /* Original BPF ABI version */ +} + +static int nfp_bpf_ndo_init(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_app_bpf *bpf = app->priv; + + return bpf_offload_dev_netdev_register(bpf->bpf_dev, netdev); +} + +static void nfp_bpf_ndo_uninit(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_app_bpf *bpf = app->priv; + + bpf_offload_dev_netdev_unregister(bpf->bpf_dev, netdev); +} + +static int nfp_bpf_start(struct nfp_app *app) +{ + struct nfp_app_bpf *bpf = app->priv; + + if (app->ctrl->dp.mtu < nfp_bpf_ctrl_cmsg_min_mtu(bpf)) { + nfp_err(bpf->app->cpp, + "ctrl channel MTU below min required %u < %u\n", + app->ctrl->dp.mtu, nfp_bpf_ctrl_cmsg_min_mtu(bpf)); + return -EINVAL; + } + + if (bpf->cmsg_multi_ent) + bpf->cmsg_cache_cnt = nfp_bpf_ctrl_cmsg_cache_cnt(bpf); + else + bpf->cmsg_cache_cnt = 1; + + return 0; +} + +static int nfp_bpf_init(struct nfp_app *app) +{ + struct nfp_app_bpf *bpf; + int err; + + bpf = kzalloc(sizeof(*bpf), GFP_KERNEL); + if (!bpf) + return -ENOMEM; + bpf->app = app; + app->priv = bpf; + + INIT_LIST_HEAD(&bpf->map_list); + + err = nfp_ccm_init(&bpf->ccm, app); + if (err) + goto err_free_bpf; + + err = rhashtable_init(&bpf->maps_neutral, &nfp_bpf_maps_neutral_params); + if (err) + goto err_clean_ccm; + + nfp_bpf_init_capabilities(bpf); + + err = nfp_bpf_parse_capabilities(app); + if (err) + goto err_free_neutral_maps; + + if (bpf->abi_version < 3) { + bpf->cmsg_key_sz = CMSG_MAP_KEY_LW * 4; + bpf->cmsg_val_sz = CMSG_MAP_VALUE_LW * 4; + } else { + bpf->cmsg_key_sz = bpf->maps.max_key_sz; + bpf->cmsg_val_sz = bpf->maps.max_val_sz; + app->ctrl_mtu = nfp_bpf_ctrl_cmsg_mtu(bpf); + } + + bpf->bpf_dev = bpf_offload_dev_create(&nfp_bpf_dev_ops, bpf); + err = PTR_ERR_OR_ZERO(bpf->bpf_dev); + if (err) + goto err_free_neutral_maps; + + return 0; + +err_free_neutral_maps: + rhashtable_destroy(&bpf->maps_neutral); +err_clean_ccm: + nfp_ccm_clean(&bpf->ccm); +err_free_bpf: + kfree(bpf); + return err; +} + +static void nfp_bpf_clean(struct nfp_app *app) +{ + struct nfp_app_bpf *bpf = app->priv; + + bpf_offload_dev_destroy(bpf->bpf_dev); + nfp_ccm_clean(&bpf->ccm); + WARN_ON(!list_empty(&bpf->map_list)); + WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use); + rhashtable_free_and_destroy(&bpf->maps_neutral, + nfp_check_rhashtable_empty, NULL); + kfree(bpf); +} + +const struct nfp_app_type app_bpf = { + .id = NFP_APP_BPF_NIC, + .name = "ebpf", + + .ctrl_cap_mask = 0, + + .init = nfp_bpf_init, + .clean = nfp_bpf_clean, + .start = nfp_bpf_start, + + .check_mtu = nfp_bpf_check_mtu, + + .extra_cap = nfp_bpf_extra_cap, + + .ndo_init = nfp_bpf_ndo_init, + .ndo_uninit = nfp_bpf_ndo_uninit, + + .vnic_alloc = nfp_bpf_vnic_alloc, + .vnic_free = nfp_bpf_vnic_free, + + .ctrl_msg_rx = nfp_bpf_ctrl_msg_rx, + .ctrl_msg_rx_raw = nfp_bpf_ctrl_msg_rx_raw, + + .setup_tc = nfp_bpf_setup_tc, + .bpf = nfp_ndo_bpf, + .xdp_offload = nfp_bpf_xdp_offload, +}; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h new file mode 100644 index 000000000..16841bb75 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -0,0 +1,615 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2016-2018 Netronome Systems, Inc. */ + +#ifndef __NFP_BPF_H__ +#define __NFP_BPF_H__ 1 + +#include <linux/bitfield.h> +#include <linux/bpf.h> +#include <linux/bpf_verifier.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/rhashtable.h> +#include <linux/skbuff.h> +#include <linux/types.h> +#include <linux/wait.h> + +#include "../ccm.h" +#include "../nfp_asm.h" +#include "fw.h" + +#define cmsg_warn(bpf, msg...) nn_dp_warn(&(bpf)->app->ctrl->dp, msg) + +/* For relocation logic use up-most byte of branch instruction as scratch + * area. Remember to clear this before sending instructions to HW! + */ +#define OP_RELO_TYPE 0xff00000000000000ULL + +enum nfp_relo_type { + RELO_NONE = 0, + /* standard internal jumps */ + RELO_BR_REL, + /* internal jumps to parts of the outro */ + RELO_BR_GO_OUT, + RELO_BR_GO_ABORT, + RELO_BR_GO_CALL_PUSH_REGS, + RELO_BR_GO_CALL_POP_REGS, + /* external jumps to fixed addresses */ + RELO_BR_NEXT_PKT, + RELO_BR_HELPER, + /* immediate relocation against load address */ + RELO_IMMED_REL, +}; + +/* To make absolute relocated branches (branches other than RELO_BR_REL) + * distinguishable in user space dumps from normal jumps, add a large offset + * to them. + */ +#define BR_OFF_RELO 15000 + +enum static_regs { + STATIC_REG_IMMA = 20, /* Bank AB */ + STATIC_REG_IMM = 21, /* Bank AB */ + STATIC_REG_STACK = 22, /* Bank A */ + STATIC_REG_PKT_LEN = 22, /* Bank B */ +}; + +enum pkt_vec { + PKT_VEC_PKT_LEN = 0, + PKT_VEC_PKT_PTR = 2, + PKT_VEC_QSEL_SET = 4, + PKT_VEC_QSEL_VAL = 6, +}; + +#define PKT_VEL_QSEL_SET_BIT 4 + +#define pv_len(np) reg_lm(1, PKT_VEC_PKT_LEN) +#define pv_ctm_ptr(np) reg_lm(1, PKT_VEC_PKT_PTR) +#define pv_qsel_set(np) reg_lm(1, PKT_VEC_QSEL_SET) +#define pv_qsel_val(np) reg_lm(1, PKT_VEC_QSEL_VAL) + +#define stack_reg(np) reg_a(STATIC_REG_STACK) +#define stack_imm(np) imm_b(np) +#define plen_reg(np) reg_b(STATIC_REG_PKT_LEN) +#define pptr_reg(np) pv_ctm_ptr(np) +#define imm_a(np) reg_a(STATIC_REG_IMM) +#define imm_b(np) reg_b(STATIC_REG_IMM) +#define imma_a(np) reg_a(STATIC_REG_IMMA) +#define imma_b(np) reg_b(STATIC_REG_IMMA) +#define imm_both(np) reg_both(STATIC_REG_IMM) +#define ret_reg(np) imm_a(np) + +#define NFP_BPF_ABI_FLAGS reg_imm(0) +#define NFP_BPF_ABI_FLAG_MARK 1 + +/** + * struct nfp_app_bpf - bpf app priv structure + * @app: backpointer to the app + * @ccm: common control message handler data + * + * @bpf_dev: BPF offload device handle + * + * @cmsg_key_sz: size of key in cmsg element array + * @cmsg_val_sz: size of value in cmsg element array + * + * @map_list: list of offloaded maps + * @maps_in_use: number of currently offloaded maps + * @map_elems_in_use: number of elements allocated to offloaded maps + * + * @maps_neutral: hash table of offload-neutral maps (on pointer) + * + * @abi_version: global BPF ABI version + * @cmsg_cache_cnt: number of entries to read for caching + * + * @adjust_head: adjust head capability + * @adjust_head.flags: extra flags for adjust head + * @adjust_head.off_min: minimal packet offset within buffer required + * @adjust_head.off_max: maximum packet offset within buffer required + * @adjust_head.guaranteed_sub: negative adjustment guaranteed possible + * @adjust_head.guaranteed_add: positive adjustment guaranteed possible + * + * @maps: map capability + * @maps.types: supported map types + * @maps.max_maps: max number of maps supported + * @maps.max_elems: max number of entries in each map + * @maps.max_key_sz: max size of map key + * @maps.max_val_sz: max size of map value + * @maps.max_elem_sz: max size of map entry (key + value) + * + * @helpers: helper addressess for various calls + * @helpers.map_lookup: map lookup helper address + * @helpers.map_update: map update helper address + * @helpers.map_delete: map delete helper address + * @helpers.perf_event_output: output perf event to a ring buffer + * + * @pseudo_random: FW initialized the pseudo-random machinery (CSRs) + * @queue_select: BPF can set the RX queue ID in packet vector + * @adjust_tail: BPF can simply trunc packet size for adjust tail + * @cmsg_multi_ent: FW can pack multiple map entries in a single cmsg + */ +struct nfp_app_bpf { + struct nfp_app *app; + struct nfp_ccm ccm; + + struct bpf_offload_dev *bpf_dev; + + unsigned int cmsg_key_sz; + unsigned int cmsg_val_sz; + + unsigned int cmsg_cache_cnt; + + struct list_head map_list; + unsigned int maps_in_use; + unsigned int map_elems_in_use; + + struct rhashtable maps_neutral; + + u32 abi_version; + + struct nfp_bpf_cap_adjust_head { + u32 flags; + int off_min; + int off_max; + int guaranteed_sub; + int guaranteed_add; + } adjust_head; + + struct { + u32 types; + u32 max_maps; + u32 max_elems; + u32 max_key_sz; + u32 max_val_sz; + u32 max_elem_sz; + } maps; + + struct { + u32 map_lookup; + u32 map_update; + u32 map_delete; + u32 perf_event_output; + } helpers; + + bool pseudo_random; + bool queue_select; + bool adjust_tail; + bool cmsg_multi_ent; +}; + +enum nfp_bpf_map_use { + NFP_MAP_UNUSED = 0, + NFP_MAP_USE_READ, + NFP_MAP_USE_WRITE, + NFP_MAP_USE_ATOMIC_CNT, +}; + +struct nfp_bpf_map_word { + unsigned char type :4; + unsigned char non_zero_update :1; +}; + +#define NFP_BPF_MAP_CACHE_CNT 4U +#define NFP_BPF_MAP_CACHE_TIME_NS (250 * 1000) + +/** + * struct nfp_bpf_map - private per-map data attached to BPF maps for offload + * @offmap: pointer to the offloaded BPF map + * @bpf: back pointer to bpf app private structure + * @tid: table id identifying map on datapath + * + * @cache_lock: protects @cache_blockers, @cache_to, @cache + * @cache_blockers: number of ops in flight which block caching + * @cache_gen: counter incremented by every blocker on exit + * @cache_to: time when cache will no longer be valid (ns) + * @cache: skb with cached response + * + * @l: link on the nfp_app_bpf->map_list list + * @use_map: map of how the value is used (in 4B chunks) + */ +struct nfp_bpf_map { + struct bpf_offloaded_map *offmap; + struct nfp_app_bpf *bpf; + u32 tid; + + spinlock_t cache_lock; + u32 cache_blockers; + u32 cache_gen; + u64 cache_to; + struct sk_buff *cache; + + struct list_head l; + struct nfp_bpf_map_word use_map[]; +}; + +struct nfp_bpf_neutral_map { + struct rhash_head l; + struct bpf_map *ptr; + u32 map_id; + u32 count; +}; + +extern const struct rhashtable_params nfp_bpf_maps_neutral_params; + +struct nfp_prog; +struct nfp_insn_meta; +typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); + +#define nfp_prog_first_meta(nfp_prog) \ + list_first_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l) +#define nfp_prog_last_meta(nfp_prog) \ + list_last_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l) +#define nfp_meta_next(meta) list_next_entry(meta, l) +#define nfp_meta_prev(meta) list_prev_entry(meta, l) + +/** + * struct nfp_bpf_reg_state - register state for calls + * @reg: BPF register state from latest path + * @var_off: for stack arg - changes stack offset on different paths + */ +struct nfp_bpf_reg_state { + struct bpf_reg_state reg; + bool var_off; +}; + +#define FLAG_INSN_IS_JUMP_DST BIT(0) +#define FLAG_INSN_IS_SUBPROG_START BIT(1) +#define FLAG_INSN_PTR_CALLER_STACK_FRAME BIT(2) +/* Instruction is pointless, noop even on its own */ +#define FLAG_INSN_SKIP_NOOP BIT(3) +/* Instruction is optimized out based on preceding instructions */ +#define FLAG_INSN_SKIP_PREC_DEPENDENT BIT(4) +/* Instruction is optimized by the verifier */ +#define FLAG_INSN_SKIP_VERIFIER_OPT BIT(5) +/* Instruction needs to zero extend to high 32-bit */ +#define FLAG_INSN_DO_ZEXT BIT(6) + +#define FLAG_INSN_SKIP_MASK (FLAG_INSN_SKIP_NOOP | \ + FLAG_INSN_SKIP_PREC_DEPENDENT | \ + FLAG_INSN_SKIP_VERIFIER_OPT) + +/** + * struct nfp_insn_meta - BPF instruction wrapper + * @insn: BPF instruction + * @ptr: pointer type for memory operations + * @ldst_gather_len: memcpy length gathered from load/store sequence + * @paired_st: the paired store insn at the head of the sequence + * @ptr_not_const: pointer is not always constant + * @pkt_cache: packet data cache information + * @pkt_cache.range_start: start offset for associated packet data cache + * @pkt_cache.range_end: end offset for associated packet data cache + * @pkt_cache.do_init: this read needs to initialize packet data cache + * @xadd_over_16bit: 16bit immediate is not guaranteed + * @xadd_maybe_16bit: 16bit immediate is possible + * @jmp_dst: destination info for jump instructions + * @jump_neg_op: jump instruction has inverted immediate, use ADD instead of SUB + * @num_insns_after_br: number of insns following a branch jump, used for fixup + * @func_id: function id for call instructions + * @arg1: arg1 for call instructions + * @arg2: arg2 for call instructions + * @umin_src: copy of core verifier umin_value for src opearnd. + * @umax_src: copy of core verifier umax_value for src operand. + * @umin_dst: copy of core verifier umin_value for dst opearnd. + * @umax_dst: copy of core verifier umax_value for dst operand. + * @off: index of first generated machine instruction (in nfp_prog.prog) + * @n: eBPF instruction number + * @flags: eBPF instruction extra optimization flags + * @subprog_idx: index of subprogram to which the instruction belongs + * @double_cb: callback for second part of the instruction + * @l: link on nfp_prog->insns list + */ +struct nfp_insn_meta { + struct bpf_insn insn; + union { + /* pointer ops (ld/st/xadd) */ + struct { + struct bpf_reg_state ptr; + struct bpf_insn *paired_st; + s16 ldst_gather_len; + bool ptr_not_const; + struct { + s16 range_start; + s16 range_end; + bool do_init; + } pkt_cache; + bool xadd_over_16bit; + bool xadd_maybe_16bit; + }; + /* jump */ + struct { + struct nfp_insn_meta *jmp_dst; + bool jump_neg_op; + u32 num_insns_after_br; /* only for BPF-to-BPF calls */ + }; + /* function calls */ + struct { + u32 func_id; + struct bpf_reg_state arg1; + struct nfp_bpf_reg_state arg2; + }; + /* We are interested in range info for operands of ALU + * operations. For example, shift amount, multiplicand and + * multiplier etc. + */ + struct { + u64 umin_src; + u64 umax_src; + u64 umin_dst; + u64 umax_dst; + }; + }; + unsigned int off; + unsigned short n; + unsigned short flags; + unsigned short subprog_idx; + instr_cb_t double_cb; + + struct list_head l; +}; + +#define BPF_SIZE_MASK 0x18 + +static inline u8 mbpf_class(const struct nfp_insn_meta *meta) +{ + return BPF_CLASS(meta->insn.code); +} + +static inline u8 mbpf_src(const struct nfp_insn_meta *meta) +{ + return BPF_SRC(meta->insn.code); +} + +static inline u8 mbpf_op(const struct nfp_insn_meta *meta) +{ + return BPF_OP(meta->insn.code); +} + +static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) +{ + return BPF_MODE(meta->insn.code); +} + +static inline bool is_mbpf_alu(const struct nfp_insn_meta *meta) +{ + return mbpf_class(meta) == BPF_ALU64 || mbpf_class(meta) == BPF_ALU; +} + +static inline bool is_mbpf_load(const struct nfp_insn_meta *meta) +{ + return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM); +} + +static inline bool is_mbpf_jmp32(const struct nfp_insn_meta *meta) +{ + return mbpf_class(meta) == BPF_JMP32; +} + +static inline bool is_mbpf_jmp64(const struct nfp_insn_meta *meta) +{ + return mbpf_class(meta) == BPF_JMP; +} + +static inline bool is_mbpf_jmp(const struct nfp_insn_meta *meta) +{ + return is_mbpf_jmp32(meta) || is_mbpf_jmp64(meta); +} + +static inline bool is_mbpf_store(const struct nfp_insn_meta *meta) +{ + return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM); +} + +static inline bool is_mbpf_load_pkt(const struct nfp_insn_meta *meta) +{ + return is_mbpf_load(meta) && meta->ptr.type == PTR_TO_PACKET; +} + +static inline bool is_mbpf_store_pkt(const struct nfp_insn_meta *meta) +{ + return is_mbpf_store(meta) && meta->ptr.type == PTR_TO_PACKET; +} + +static inline bool is_mbpf_classic_load(const struct nfp_insn_meta *meta) +{ + u8 code = meta->insn.code; + + return BPF_CLASS(code) == BPF_LD && + (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND); +} + +static inline bool is_mbpf_classic_store(const struct nfp_insn_meta *meta) +{ + u8 code = meta->insn.code; + + return BPF_CLASS(code) == BPF_ST && BPF_MODE(code) == BPF_MEM; +} + +static inline bool is_mbpf_classic_store_pkt(const struct nfp_insn_meta *meta) +{ + return is_mbpf_classic_store(meta) && meta->ptr.type == PTR_TO_PACKET; +} + +static inline bool is_mbpf_atomic(const struct nfp_insn_meta *meta) +{ + return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_ATOMIC); +} + +static inline bool is_mbpf_mul(const struct nfp_insn_meta *meta) +{ + return is_mbpf_alu(meta) && mbpf_op(meta) == BPF_MUL; +} + +static inline bool is_mbpf_div(const struct nfp_insn_meta *meta) +{ + return is_mbpf_alu(meta) && mbpf_op(meta) == BPF_DIV; +} + +static inline bool is_mbpf_cond_jump(const struct nfp_insn_meta *meta) +{ + u8 op; + + if (is_mbpf_jmp32(meta)) + return true; + + if (!is_mbpf_jmp64(meta)) + return false; + + op = mbpf_op(meta); + return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL; +} + +static inline bool is_mbpf_helper_call(const struct nfp_insn_meta *meta) +{ + struct bpf_insn insn = meta->insn; + + return insn.code == (BPF_JMP | BPF_CALL) && + insn.src_reg != BPF_PSEUDO_CALL; +} + +static inline bool is_mbpf_pseudo_call(const struct nfp_insn_meta *meta) +{ + struct bpf_insn insn = meta->insn; + + return insn.code == (BPF_JMP | BPF_CALL) && + insn.src_reg == BPF_PSEUDO_CALL; +} + +#define STACK_FRAME_ALIGN 64 + +/** + * struct nfp_bpf_subprog_info - nfp BPF sub-program (a.k.a. function) info + * @stack_depth: maximum stack depth used by this sub-program + * @needs_reg_push: whether sub-program uses callee-saved registers + */ +struct nfp_bpf_subprog_info { + u16 stack_depth; + u8 needs_reg_push : 1; +}; + +/** + * struct nfp_prog - nfp BPF program + * @bpf: backpointer to the bpf app priv structure + * @prog: machine code + * @prog_len: number of valid instructions in @prog array + * @__prog_alloc_len: alloc size of @prog array + * @stack_size: total amount of stack used + * @verifier_meta: temporary storage for verifier's insn meta + * @type: BPF program type + * @last_bpf_off: address of the last instruction translated from BPF + * @tgt_out: jump target for normal exit + * @tgt_abort: jump target for abort (e.g. access outside of packet buffer) + * @tgt_call_push_regs: jump target for subroutine for saving R6~R9 to stack + * @tgt_call_pop_regs: jump target for subroutine used for restoring R6~R9 + * @n_translated: number of successfully translated instructions (for errors) + * @error: error code if something went wrong + * @stack_frame_depth: max stack depth for current frame + * @adjust_head_location: if program has single adjust head call - the insn no. + * @map_records_cnt: the number of map pointers recorded for this prog + * @subprog_cnt: number of sub-programs, including main function + * @map_records: the map record pointers from bpf->maps_neutral + * @subprog: pointer to an array of objects holding info about sub-programs + * @n_insns: number of instructions on @insns list + * @insns: list of BPF instruction wrappers (struct nfp_insn_meta) + */ +struct nfp_prog { + struct nfp_app_bpf *bpf; + + u64 *prog; + unsigned int prog_len; + unsigned int __prog_alloc_len; + + unsigned int stack_size; + + struct nfp_insn_meta *verifier_meta; + + enum bpf_prog_type type; + + unsigned int last_bpf_off; + unsigned int tgt_out; + unsigned int tgt_abort; + unsigned int tgt_call_push_regs; + unsigned int tgt_call_pop_regs; + + unsigned int n_translated; + int error; + + unsigned int stack_frame_depth; + unsigned int adjust_head_location; + + unsigned int map_records_cnt; + unsigned int subprog_cnt; + struct nfp_bpf_neutral_map **map_records; + struct nfp_bpf_subprog_info *subprog; + + unsigned int n_insns; + struct list_head insns; +}; + +/** + * struct nfp_bpf_vnic - per-vNIC BPF priv structure + * @tc_prog: currently loaded cls_bpf program + * @start_off: address of the first instruction in the memory + * @tgt_done: jump target to get the next packet + */ +struct nfp_bpf_vnic { + struct bpf_prog *tc_prog; + unsigned int start_off; + unsigned int tgt_done; +}; + +bool nfp_is_subprog_start(struct nfp_insn_meta *meta); +void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog); +int nfp_bpf_jit(struct nfp_prog *prog); +bool nfp_bpf_supported_opcode(u8 code); +bool nfp_bpf_offload_check_mtu(struct nfp_net *nn, struct bpf_prog *prog, + unsigned int mtu); + +int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, + int prev_insn_idx); +int nfp_bpf_finalize(struct bpf_verifier_env *env); + +int nfp_bpf_opt_replace_insn(struct bpf_verifier_env *env, u32 off, + struct bpf_insn *insn); +int nfp_bpf_opt_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); + +extern const struct bpf_prog_offload_ops nfp_bpf_dev_ops; + +struct netdev_bpf; +struct nfp_app; +struct nfp_net; + +int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf); +int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, + bool old_prog, struct netlink_ext_ack *extack); + +struct nfp_insn_meta * +nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int insn_idx); + +void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv); + +unsigned int nfp_bpf_ctrl_cmsg_min_mtu(struct nfp_app_bpf *bpf); +unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf); +unsigned int nfp_bpf_ctrl_cmsg_cache_cnt(struct nfp_app_bpf *bpf); +long long int +nfp_bpf_ctrl_alloc_map(struct nfp_app_bpf *bpf, struct bpf_map *map); +void +nfp_bpf_ctrl_free_map(struct nfp_app_bpf *bpf, struct nfp_bpf_map *nfp_map); +int nfp_bpf_ctrl_getfirst_entry(struct bpf_offloaded_map *offmap, + void *next_key); +int nfp_bpf_ctrl_update_entry(struct bpf_offloaded_map *offmap, + void *key, void *value, u64 flags); +int nfp_bpf_ctrl_del_entry(struct bpf_offloaded_map *offmap, void *key); +int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap, + void *key, void *value); +int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap, + void *key, void *next_key); + +int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data, + unsigned int len); + +void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb); +void +nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data, + unsigned int len); +#endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c new file mode 100644 index 000000000..9d97cd281 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -0,0 +1,619 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2016-2018 Netronome Systems, Inc. */ + +/* + * nfp_net_offload.c + * Netronome network device driver: TC offload functions for PF and VF + */ + +#define pr_fmt(fmt) "NFP net bpf: " fmt + +#include <linux/bpf.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/pci.h> +#include <linux/jiffies.h> +#include <linux/timer.h> +#include <linux/list.h> +#include <linux/mm.h> + +#include <net/pkt_cls.h> +#include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_mirred.h> + +#include "main.h" +#include "../ccm.h" +#include "../nfp_app.h" +#include "../nfp_net_ctrl.h" +#include "../nfp_net.h" + +static int +nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, + struct bpf_map *map) +{ + struct nfp_bpf_neutral_map *record; + int err; + + /* Reuse path - other offloaded program is already tracking this map. */ + record = rhashtable_lookup_fast(&bpf->maps_neutral, &map->id, + nfp_bpf_maps_neutral_params); + if (record) { + nfp_prog->map_records[nfp_prog->map_records_cnt++] = record; + record->count++; + return 0; + } + + /* Grab a single ref to the map for our record. The prog destroy ndo + * happens after free_used_maps(). + */ + bpf_map_inc(map); + + record = kmalloc(sizeof(*record), GFP_KERNEL); + if (!record) { + err = -ENOMEM; + goto err_map_put; + } + + record->ptr = map; + record->map_id = map->id; + record->count = 1; + + err = rhashtable_insert_fast(&bpf->maps_neutral, &record->l, + nfp_bpf_maps_neutral_params); + if (err) + goto err_free_rec; + + nfp_prog->map_records[nfp_prog->map_records_cnt++] = record; + + return 0; + +err_free_rec: + kfree(record); +err_map_put: + bpf_map_put(map); + return err; +} + +static void +nfp_map_ptrs_forget(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog) +{ + bool freed = false; + int i; + + for (i = 0; i < nfp_prog->map_records_cnt; i++) { + if (--nfp_prog->map_records[i]->count) { + nfp_prog->map_records[i] = NULL; + continue; + } + + WARN_ON(rhashtable_remove_fast(&bpf->maps_neutral, + &nfp_prog->map_records[i]->l, + nfp_bpf_maps_neutral_params)); + freed = true; + } + + if (freed) { + synchronize_rcu(); + + for (i = 0; i < nfp_prog->map_records_cnt; i++) + if (nfp_prog->map_records[i]) { + bpf_map_put(nfp_prog->map_records[i]->ptr); + kfree(nfp_prog->map_records[i]); + } + } + + kfree(nfp_prog->map_records); + nfp_prog->map_records = NULL; + nfp_prog->map_records_cnt = 0; +} + +static int +nfp_map_ptrs_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, + struct bpf_prog *prog) +{ + int i, cnt, err = 0; + + mutex_lock(&prog->aux->used_maps_mutex); + + /* Quickly count the maps we will have to remember */ + cnt = 0; + for (i = 0; i < prog->aux->used_map_cnt; i++) + if (bpf_map_offload_neutral(prog->aux->used_maps[i])) + cnt++; + if (!cnt) + goto out; + + nfp_prog->map_records = kmalloc_array(cnt, + sizeof(nfp_prog->map_records[0]), + GFP_KERNEL); + if (!nfp_prog->map_records) { + err = -ENOMEM; + goto out; + } + + for (i = 0; i < prog->aux->used_map_cnt; i++) + if (bpf_map_offload_neutral(prog->aux->used_maps[i])) { + err = nfp_map_ptr_record(bpf, nfp_prog, + prog->aux->used_maps[i]); + if (err) { + nfp_map_ptrs_forget(bpf, nfp_prog); + goto out; + } + } + WARN_ON(cnt != nfp_prog->map_records_cnt); + +out: + mutex_unlock(&prog->aux->used_maps_mutex); + return err; +} + +static int +nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, + unsigned int cnt) +{ + struct nfp_insn_meta *meta; + unsigned int i; + + for (i = 0; i < cnt; i++) { + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + return -ENOMEM; + + meta->insn = prog[i]; + meta->n = i; + if (is_mbpf_alu(meta)) { + meta->umin_src = U64_MAX; + meta->umin_dst = U64_MAX; + } + + list_add_tail(&meta->l, &nfp_prog->insns); + } + nfp_prog->n_insns = cnt; + + nfp_bpf_jit_prepare(nfp_prog); + + return 0; +} + +static void nfp_prog_free(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta, *tmp; + + kfree(nfp_prog->subprog); + + list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) { + list_del(&meta->l); + kfree(meta); + } + kfree(nfp_prog); +} + +static int nfp_bpf_verifier_prep(struct bpf_prog *prog) +{ + struct nfp_prog *nfp_prog; + int ret; + + nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL); + if (!nfp_prog) + return -ENOMEM; + prog->aux->offload->dev_priv = nfp_prog; + + INIT_LIST_HEAD(&nfp_prog->insns); + nfp_prog->type = prog->type; + nfp_prog->bpf = bpf_offload_dev_priv(prog->aux->offload->offdev); + + ret = nfp_prog_prepare(nfp_prog, prog->insnsi, prog->len); + if (ret) + goto err_free; + + nfp_prog->verifier_meta = nfp_prog_first_meta(nfp_prog); + + return 0; + +err_free: + nfp_prog_free(nfp_prog); + + return ret; +} + +static int nfp_bpf_translate(struct bpf_prog *prog) +{ + struct nfp_net *nn = netdev_priv(prog->aux->offload->netdev); + struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; + unsigned int max_instr; + int err; + + /* We depend on dead code elimination succeeding */ + if (prog->aux->offload->opt_failed) + return -EINVAL; + + max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); + nfp_prog->__prog_alloc_len = max_instr * sizeof(u64); + + nfp_prog->prog = kvmalloc(nfp_prog->__prog_alloc_len, GFP_KERNEL); + if (!nfp_prog->prog) + return -ENOMEM; + + err = nfp_bpf_jit(nfp_prog); + if (err) + return err; + + prog->aux->offload->jited_len = nfp_prog->prog_len * sizeof(u64); + prog->aux->offload->jited_image = nfp_prog->prog; + + return nfp_map_ptrs_record(nfp_prog->bpf, nfp_prog, prog); +} + +static void nfp_bpf_destroy(struct bpf_prog *prog) +{ + struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; + + kvfree(nfp_prog->prog); + nfp_map_ptrs_forget(nfp_prog->bpf, nfp_prog); + nfp_prog_free(nfp_prog); +} + +/* Atomic engine requires values to be in big endian, we need to byte swap + * the value words used with xadd. + */ +static void nfp_map_bpf_byte_swap(struct nfp_bpf_map *nfp_map, void *value) +{ + u32 *word = value; + unsigned int i; + + for (i = 0; i < DIV_ROUND_UP(nfp_map->offmap->map.value_size, 4); i++) + if (nfp_map->use_map[i].type == NFP_MAP_USE_ATOMIC_CNT) + word[i] = (__force u32)cpu_to_be32(word[i]); +} + +/* Mark value as unsafely initialized in case it becomes atomic later + * and we didn't byte swap something non-byte swap neutral. + */ +static void +nfp_map_bpf_byte_swap_record(struct nfp_bpf_map *nfp_map, void *value) +{ + u32 *word = value; + unsigned int i; + + for (i = 0; i < DIV_ROUND_UP(nfp_map->offmap->map.value_size, 4); i++) + if (nfp_map->use_map[i].type == NFP_MAP_UNUSED && + word[i] != (__force u32)cpu_to_be32(word[i])) + nfp_map->use_map[i].non_zero_update = 1; +} + +static int +nfp_bpf_map_lookup_entry(struct bpf_offloaded_map *offmap, + void *key, void *value) +{ + int err; + + err = nfp_bpf_ctrl_lookup_entry(offmap, key, value); + if (err) + return err; + + nfp_map_bpf_byte_swap(offmap->dev_priv, value); + return 0; +} + +static int +nfp_bpf_map_update_entry(struct bpf_offloaded_map *offmap, + void *key, void *value, u64 flags) +{ + nfp_map_bpf_byte_swap(offmap->dev_priv, value); + nfp_map_bpf_byte_swap_record(offmap->dev_priv, value); + return nfp_bpf_ctrl_update_entry(offmap, key, value, flags); +} + +static int +nfp_bpf_map_get_next_key(struct bpf_offloaded_map *offmap, + void *key, void *next_key) +{ + if (!key) + return nfp_bpf_ctrl_getfirst_entry(offmap, next_key); + return nfp_bpf_ctrl_getnext_entry(offmap, key, next_key); +} + +static int +nfp_bpf_map_delete_elem(struct bpf_offloaded_map *offmap, void *key) +{ + if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) + return -EINVAL; + return nfp_bpf_ctrl_del_entry(offmap, key); +} + +static const struct bpf_map_dev_ops nfp_bpf_map_ops = { + .map_get_next_key = nfp_bpf_map_get_next_key, + .map_lookup_elem = nfp_bpf_map_lookup_entry, + .map_update_elem = nfp_bpf_map_update_entry, + .map_delete_elem = nfp_bpf_map_delete_elem, +}; + +static int +nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap) +{ + struct nfp_bpf_map *nfp_map; + unsigned int use_map_size; + long long int res; + + if (!bpf->maps.types) + return -EOPNOTSUPP; + + if (offmap->map.map_flags || + offmap->map.numa_node != NUMA_NO_NODE) { + pr_info("map flags are not supported\n"); + return -EINVAL; + } + + if (!(bpf->maps.types & 1 << offmap->map.map_type)) { + pr_info("map type not supported\n"); + return -EOPNOTSUPP; + } + if (bpf->maps.max_maps == bpf->maps_in_use) { + pr_info("too many maps for a device\n"); + return -ENOMEM; + } + if (bpf->maps.max_elems - bpf->map_elems_in_use < + offmap->map.max_entries) { + pr_info("map with too many elements: %u, left: %u\n", + offmap->map.max_entries, + bpf->maps.max_elems - bpf->map_elems_in_use); + return -ENOMEM; + } + + if (round_up(offmap->map.key_size, 8) + + round_up(offmap->map.value_size, 8) > bpf->maps.max_elem_sz) { + pr_info("map elements too large: %u, FW max element size (key+value): %u\n", + round_up(offmap->map.key_size, 8) + + round_up(offmap->map.value_size, 8), + bpf->maps.max_elem_sz); + return -ENOMEM; + } + if (offmap->map.key_size > bpf->maps.max_key_sz) { + pr_info("map key size %u, FW max is %u\n", + offmap->map.key_size, bpf->maps.max_key_sz); + return -ENOMEM; + } + if (offmap->map.value_size > bpf->maps.max_val_sz) { + pr_info("map value size %u, FW max is %u\n", + offmap->map.value_size, bpf->maps.max_val_sz); + return -ENOMEM; + } + + use_map_size = DIV_ROUND_UP(offmap->map.value_size, 4) * + sizeof_field(struct nfp_bpf_map, use_map[0]); + + nfp_map = kzalloc(sizeof(*nfp_map) + use_map_size, GFP_USER); + if (!nfp_map) + return -ENOMEM; + + offmap->dev_priv = nfp_map; + nfp_map->offmap = offmap; + nfp_map->bpf = bpf; + spin_lock_init(&nfp_map->cache_lock); + + res = nfp_bpf_ctrl_alloc_map(bpf, &offmap->map); + if (res < 0) { + kfree(nfp_map); + return res; + } + + nfp_map->tid = res; + offmap->dev_ops = &nfp_bpf_map_ops; + bpf->maps_in_use++; + bpf->map_elems_in_use += offmap->map.max_entries; + list_add_tail(&nfp_map->l, &bpf->map_list); + + return 0; +} + +static int +nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap) +{ + struct nfp_bpf_map *nfp_map = offmap->dev_priv; + + nfp_bpf_ctrl_free_map(bpf, nfp_map); + dev_consume_skb_any(nfp_map->cache); + WARN_ON_ONCE(nfp_map->cache_blockers); + list_del_init(&nfp_map->l); + bpf->map_elems_in_use -= offmap->map.max_entries; + bpf->maps_in_use--; + kfree(nfp_map); + + return 0; +} + +int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf) +{ + switch (bpf->command) { + case BPF_OFFLOAD_MAP_ALLOC: + return nfp_bpf_map_alloc(app->priv, bpf->offmap); + case BPF_OFFLOAD_MAP_FREE: + return nfp_bpf_map_free(app->priv, bpf->offmap); + default: + return -EINVAL; + } +} + +static unsigned long +nfp_bpf_perf_event_copy(void *dst, const void *src, + unsigned long off, unsigned long len) +{ + memcpy(dst, src + off, len); + return 0; +} + +int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data, + unsigned int len) +{ + struct cmsg_bpf_event *cbe = (void *)data; + struct nfp_bpf_neutral_map *record; + u32 pkt_size, data_size, map_id; + u64 map_id_full; + + if (len < sizeof(struct cmsg_bpf_event)) + return -EINVAL; + + pkt_size = be32_to_cpu(cbe->pkt_size); + data_size = be32_to_cpu(cbe->data_size); + map_id_full = be64_to_cpu(cbe->map_ptr); + map_id = map_id_full; + + if (len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size) + return -EINVAL; + if (cbe->hdr.ver != NFP_CCM_ABI_VERSION) + return -EINVAL; + + rcu_read_lock(); + record = rhashtable_lookup(&bpf->maps_neutral, &map_id, + nfp_bpf_maps_neutral_params); + if (!record || map_id_full > U32_MAX) { + rcu_read_unlock(); + cmsg_warn(bpf, "perf event: map id %lld (0x%llx) not recognized, dropping event\n", + map_id_full, map_id_full); + return -EINVAL; + } + + bpf_event_output(record->ptr, be32_to_cpu(cbe->cpu_id), + &cbe->data[round_up(pkt_size, 4)], data_size, + cbe->data, pkt_size, nfp_bpf_perf_event_copy); + rcu_read_unlock(); + + return 0; +} + +bool nfp_bpf_offload_check_mtu(struct nfp_net *nn, struct bpf_prog *prog, + unsigned int mtu) +{ + unsigned int fw_mtu, pkt_off; + + fw_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; + pkt_off = min(prog->aux->max_pkt_offset, mtu); + + return fw_mtu < pkt_off; +} + +static int +nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; + unsigned int max_stack, max_prog_len; + dma_addr_t dma_addr; + void *img; + int err; + + if (nfp_bpf_offload_check_mtu(nn, prog, nn->dp.netdev->mtu)) { + NL_SET_ERR_MSG_MOD(extack, "BPF offload not supported with potential packet access beyond HW packet split boundary"); + return -EOPNOTSUPP; + } + + max_stack = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; + if (nfp_prog->stack_size > max_stack) { + NL_SET_ERR_MSG_MOD(extack, "stack too large"); + return -EOPNOTSUPP; + } + + max_prog_len = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); + if (nfp_prog->prog_len > max_prog_len) { + NL_SET_ERR_MSG_MOD(extack, "program too long"); + return -EOPNOTSUPP; + } + + img = nfp_bpf_relo_for_vnic(nfp_prog, nn->app_priv); + if (IS_ERR(img)) + return PTR_ERR(img); + + dma_addr = dma_map_single(nn->dp.dev, img, + nfp_prog->prog_len * sizeof(u64), + DMA_TO_DEVICE); + if (dma_mapping_error(nn->dp.dev, dma_addr)) { + kfree(img); + return -ENOMEM; + } + + nn_writew(nn, NFP_NET_CFG_BPF_SIZE, nfp_prog->prog_len); + nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, dma_addr); + + /* Load up the JITed code */ + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF); + if (err) + NL_SET_ERR_MSG_MOD(extack, + "FW command error while loading BPF"); + + dma_unmap_single(nn->dp.dev, dma_addr, nfp_prog->prog_len * sizeof(u64), + DMA_TO_DEVICE); + kfree(img); + + return err; +} + +static void +nfp_net_bpf_start(struct nfp_net *nn, struct netlink_ext_ack *extack) +{ + int err; + + /* Enable passing packets through BPF function */ + nn->dp.ctrl |= NFP_NET_CFG_CTRL_BPF; + nn_writel(nn, NFP_NET_CFG_CTRL, nn->dp.ctrl); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); + if (err) + NL_SET_ERR_MSG_MOD(extack, + "FW command error while enabling BPF"); +} + +static int nfp_net_bpf_stop(struct nfp_net *nn) +{ + if (!(nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF)) + return 0; + + nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_BPF; + nn_writel(nn, NFP_NET_CFG_CTRL, nn->dp.ctrl); + + return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); +} + +int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, + bool old_prog, struct netlink_ext_ack *extack) +{ + int err; + + if (prog && !bpf_offload_dev_match(prog, nn->dp.netdev)) + return -EINVAL; + + if (prog && old_prog) { + u8 cap; + + cap = nn_readb(nn, NFP_NET_CFG_BPF_CAP); + if (!(cap & NFP_NET_BPF_CAP_RELO)) { + NL_SET_ERR_MSG_MOD(extack, + "FW does not support live reload"); + return -EBUSY; + } + } + + /* Something else is loaded, different program type? */ + if (!old_prog && nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) + return -EBUSY; + + if (old_prog && !prog) + return nfp_net_bpf_stop(nn); + + err = nfp_net_bpf_load(nn, prog, extack); + if (err) + return err; + + if (!old_prog) + nfp_net_bpf_start(nn, extack); + + return 0; +} + +const struct bpf_prog_offload_ops nfp_bpf_dev_ops = { + .insn_hook = nfp_verify_insn, + .finalize = nfp_bpf_finalize, + .replace_insn = nfp_bpf_opt_replace_insn, + .remove_insns = nfp_bpf_opt_remove_insns, + .prepare = nfp_bpf_verifier_prep, + .translate = nfp_bpf_translate, + .destroy = nfp_bpf_destroy, +}; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c new file mode 100644 index 000000000..9d235c0ce --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -0,0 +1,863 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2016-2018 Netronome Systems, Inc. */ + +#include <linux/bpf.h> +#include <linux/bpf_verifier.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/pkt_cls.h> + +#include "../nfp_app.h" +#include "../nfp_main.h" +#include "../nfp_net.h" +#include "fw.h" +#include "main.h" + +#define pr_vlog(env, fmt, ...) \ + bpf_verifier_log_write(env, "[nfp] " fmt, ##__VA_ARGS__) + +struct nfp_insn_meta * +nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int insn_idx) +{ + unsigned int forward, backward, i; + + backward = meta->n - insn_idx; + forward = insn_idx - meta->n; + + if (min(forward, backward) > nfp_prog->n_insns - insn_idx - 1) { + backward = nfp_prog->n_insns - insn_idx - 1; + meta = nfp_prog_last_meta(nfp_prog); + } + if (min(forward, backward) > insn_idx && backward > insn_idx) { + forward = insn_idx; + meta = nfp_prog_first_meta(nfp_prog); + } + + if (forward < backward) + for (i = 0; i < forward; i++) + meta = nfp_meta_next(meta); + else + for (i = 0; i < backward; i++) + meta = nfp_meta_prev(meta); + + return meta; +} + +static void +nfp_record_adjust_head(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta, + const struct bpf_reg_state *reg2) +{ + unsigned int location = UINT_MAX; + int imm; + + /* Datapath usually can give us guarantees on how much adjust head + * can be done without the need for any checks. Optimize the simple + * case where there is only one adjust head by a constant. + */ + if (reg2->type != SCALAR_VALUE || !tnum_is_const(reg2->var_off)) + goto exit_set_location; + imm = reg2->var_off.value; + /* Translator will skip all checks, we need to guarantee min pkt len */ + if (imm > ETH_ZLEN - ETH_HLEN) + goto exit_set_location; + if (imm > (int)bpf->adjust_head.guaranteed_add || + imm < -bpf->adjust_head.guaranteed_sub) + goto exit_set_location; + + if (nfp_prog->adjust_head_location) { + /* Only one call per program allowed */ + if (nfp_prog->adjust_head_location != meta->n) + goto exit_set_location; + + if (meta->arg2.reg.var_off.value != imm) + goto exit_set_location; + } + + location = meta->n; +exit_set_location: + nfp_prog->adjust_head_location = location; +} + +static bool nfp_bpf_map_update_value_ok(struct bpf_verifier_env *env) +{ + const struct bpf_reg_state *reg1 = cur_regs(env) + BPF_REG_1; + const struct bpf_reg_state *reg3 = cur_regs(env) + BPF_REG_3; + struct bpf_offloaded_map *offmap; + struct bpf_func_state *state; + struct nfp_bpf_map *nfp_map; + int off, i; + + state = env->cur_state->frame[reg3->frameno]; + + /* We need to record each time update happens with non-zero words, + * in case such word is used in atomic operations. + * Implicitly depend on nfp_bpf_stack_arg_ok(reg3) being run before. + */ + + offmap = map_to_offmap(reg1->map_ptr); + nfp_map = offmap->dev_priv; + off = reg3->off + reg3->var_off.value; + + for (i = 0; i < offmap->map.value_size; i++) { + struct bpf_stack_state *stack_entry; + unsigned int soff; + + soff = -(off + i) - 1; + stack_entry = &state->stack[soff / BPF_REG_SIZE]; + if (stack_entry->slot_type[soff % BPF_REG_SIZE] == STACK_ZERO) + continue; + + if (nfp_map->use_map[i / 4].type == NFP_MAP_USE_ATOMIC_CNT) { + pr_vlog(env, "value at offset %d/%d may be non-zero, bpf_map_update_elem() is required to initialize atomic counters to zero to avoid offload endian issues\n", + i, soff); + return false; + } + nfp_map->use_map[i / 4].non_zero_update = 1; + } + + return true; +} + +static int +nfp_bpf_stack_arg_ok(const char *fname, struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + struct nfp_bpf_reg_state *old_arg) +{ + s64 off, old_off; + + if (reg->type != PTR_TO_STACK) { + pr_vlog(env, "%s: unsupported ptr type %d\n", + fname, reg->type); + return false; + } + if (!tnum_is_const(reg->var_off)) { + pr_vlog(env, "%s: variable pointer\n", fname); + return false; + } + + off = reg->var_off.value + reg->off; + if (-off % 4) { + pr_vlog(env, "%s: unaligned stack pointer %lld\n", fname, -off); + return false; + } + + /* Rest of the checks is only if we re-parse the same insn */ + if (!old_arg) + return true; + + old_off = old_arg->reg.var_off.value + old_arg->reg.off; + old_arg->var_off |= off != old_off; + + return true; +} + +static bool +nfp_bpf_map_call_ok(const char *fname, struct bpf_verifier_env *env, + struct nfp_insn_meta *meta, + u32 helper_tgt, const struct bpf_reg_state *reg1) +{ + if (!helper_tgt) { + pr_vlog(env, "%s: not supported by FW\n", fname); + return false; + } + + return true; +} + +static int +nfp_bpf_check_helper_call(struct nfp_prog *nfp_prog, + struct bpf_verifier_env *env, + struct nfp_insn_meta *meta) +{ + const struct bpf_reg_state *reg1 = cur_regs(env) + BPF_REG_1; + const struct bpf_reg_state *reg2 = cur_regs(env) + BPF_REG_2; + const struct bpf_reg_state *reg3 = cur_regs(env) + BPF_REG_3; + struct nfp_app_bpf *bpf = nfp_prog->bpf; + u32 func_id = meta->insn.imm; + + switch (func_id) { + case BPF_FUNC_xdp_adjust_head: + if (!bpf->adjust_head.off_max) { + pr_vlog(env, "adjust_head not supported by FW\n"); + return -EOPNOTSUPP; + } + if (!(bpf->adjust_head.flags & NFP_BPF_ADJUST_HEAD_NO_META)) { + pr_vlog(env, "adjust_head: FW requires shifting metadata, not supported by the driver\n"); + return -EOPNOTSUPP; + } + + nfp_record_adjust_head(bpf, nfp_prog, meta, reg2); + break; + + case BPF_FUNC_xdp_adjust_tail: + if (!bpf->adjust_tail) { + pr_vlog(env, "adjust_tail not supported by FW\n"); + return -EOPNOTSUPP; + } + break; + + case BPF_FUNC_map_lookup_elem: + if (!nfp_bpf_map_call_ok("map_lookup", env, meta, + bpf->helpers.map_lookup, reg1) || + !nfp_bpf_stack_arg_ok("map_lookup", env, reg2, + meta->func_id ? &meta->arg2 : NULL)) + return -EOPNOTSUPP; + break; + + case BPF_FUNC_map_update_elem: + if (!nfp_bpf_map_call_ok("map_update", env, meta, + bpf->helpers.map_update, reg1) || + !nfp_bpf_stack_arg_ok("map_update", env, reg2, + meta->func_id ? &meta->arg2 : NULL) || + !nfp_bpf_stack_arg_ok("map_update", env, reg3, NULL) || + !nfp_bpf_map_update_value_ok(env)) + return -EOPNOTSUPP; + break; + + case BPF_FUNC_map_delete_elem: + if (!nfp_bpf_map_call_ok("map_delete", env, meta, + bpf->helpers.map_delete, reg1) || + !nfp_bpf_stack_arg_ok("map_delete", env, reg2, + meta->func_id ? &meta->arg2 : NULL)) + return -EOPNOTSUPP; + break; + + case BPF_FUNC_get_prandom_u32: + if (bpf->pseudo_random) + break; + pr_vlog(env, "bpf_get_prandom_u32(): FW doesn't support random number generation\n"); + return -EOPNOTSUPP; + + case BPF_FUNC_perf_event_output: + BUILD_BUG_ON(NFP_BPF_SCALAR_VALUE != SCALAR_VALUE || + NFP_BPF_MAP_VALUE != PTR_TO_MAP_VALUE || + NFP_BPF_STACK != PTR_TO_STACK || + NFP_BPF_PACKET_DATA != PTR_TO_PACKET); + + if (!bpf->helpers.perf_event_output) { + pr_vlog(env, "event_output: not supported by FW\n"); + return -EOPNOTSUPP; + } + + /* Force current CPU to make sure we can report the event + * wherever we get the control message from FW. + */ + if (reg3->var_off.mask & BPF_F_INDEX_MASK || + (reg3->var_off.value & BPF_F_INDEX_MASK) != + BPF_F_CURRENT_CPU) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg3->var_off); + pr_vlog(env, "event_output: must use BPF_F_CURRENT_CPU, var_off: %s\n", + tn_buf); + return -EOPNOTSUPP; + } + + /* Save space in meta, we don't care about arguments other + * than 4th meta, shove it into arg1. + */ + reg1 = cur_regs(env) + BPF_REG_4; + + if (reg1->type != SCALAR_VALUE /* NULL ptr */ && + reg1->type != PTR_TO_STACK && + reg1->type != PTR_TO_MAP_VALUE && + reg1->type != PTR_TO_PACKET) { + pr_vlog(env, "event_output: unsupported ptr type: %d\n", + reg1->type); + return -EOPNOTSUPP; + } + + if (reg1->type == PTR_TO_STACK && + !nfp_bpf_stack_arg_ok("event_output", env, reg1, NULL)) + return -EOPNOTSUPP; + + /* Warn user that on offload NFP may return success even if map + * is not going to accept the event, since the event output is + * fully async and device won't know the state of the map. + * There is also FW limitation on the event length. + * + * Lost events will not show up on the perf ring, driver + * won't see them at all. Events may also get reordered. + */ + dev_warn_once(&nfp_prog->bpf->app->pf->pdev->dev, + "bpf: note: return codes and behavior of bpf_event_output() helper differs for offloaded programs!\n"); + pr_vlog(env, "warning: return codes and behavior of event_output helper differ for offload!\n"); + + if (!meta->func_id) + break; + + if (reg1->type != meta->arg1.type) { + pr_vlog(env, "event_output: ptr type changed: %d %d\n", + meta->arg1.type, reg1->type); + return -EINVAL; + } + break; + + default: + pr_vlog(env, "unsupported function id: %d\n", func_id); + return -EOPNOTSUPP; + } + + meta->func_id = func_id; + meta->arg1 = *reg1; + meta->arg2.reg = *reg2; + + return 0; +} + +static int +nfp_bpf_check_exit(struct nfp_prog *nfp_prog, + struct bpf_verifier_env *env) +{ + const struct bpf_reg_state *reg0 = cur_regs(env) + BPF_REG_0; + u64 imm; + + if (nfp_prog->type == BPF_PROG_TYPE_XDP) + return 0; + + if (!(reg0->type == SCALAR_VALUE && tnum_is_const(reg0->var_off))) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg0->var_off); + pr_vlog(env, "unsupported exit state: %d, var_off: %s\n", + reg0->type, tn_buf); + return -EINVAL; + } + + imm = reg0->var_off.value; + if (nfp_prog->type == BPF_PROG_TYPE_SCHED_CLS && + imm <= TC_ACT_REDIRECT && + imm != TC_ACT_SHOT && imm != TC_ACT_STOLEN && + imm != TC_ACT_QUEUED) { + pr_vlog(env, "unsupported exit state: %d, imm: %llx\n", + reg0->type, imm); + return -EINVAL; + } + + return 0; +} + +static int +nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta, + const struct bpf_reg_state *reg, + struct bpf_verifier_env *env) +{ + s32 old_off, new_off; + + if (reg->frameno != env->cur_state->curframe) + meta->flags |= FLAG_INSN_PTR_CALLER_STACK_FRAME; + + if (!tnum_is_const(reg->var_off)) { + pr_vlog(env, "variable ptr stack access\n"); + return -EINVAL; + } + + if (meta->ptr.type == NOT_INIT) + return 0; + + old_off = meta->ptr.off + meta->ptr.var_off.value; + new_off = reg->off + reg->var_off.value; + + meta->ptr_not_const |= old_off != new_off; + + if (!meta->ptr_not_const) + return 0; + + if (old_off % 4 == new_off % 4) + return 0; + + pr_vlog(env, "stack access changed location was:%d is:%d\n", + old_off, new_off); + return -EINVAL; +} + +static const char *nfp_bpf_map_use_name(enum nfp_bpf_map_use use) +{ + static const char * const names[] = { + [NFP_MAP_UNUSED] = "unused", + [NFP_MAP_USE_READ] = "read", + [NFP_MAP_USE_WRITE] = "write", + [NFP_MAP_USE_ATOMIC_CNT] = "atomic", + }; + + if (use >= ARRAY_SIZE(names) || !names[use]) + return "unknown"; + return names[use]; +} + +static int +nfp_bpf_map_mark_used_one(struct bpf_verifier_env *env, + struct nfp_bpf_map *nfp_map, + unsigned int off, enum nfp_bpf_map_use use) +{ + if (nfp_map->use_map[off / 4].type != NFP_MAP_UNUSED && + nfp_map->use_map[off / 4].type != use) { + pr_vlog(env, "map value use type conflict %s vs %s off: %u\n", + nfp_bpf_map_use_name(nfp_map->use_map[off / 4].type), + nfp_bpf_map_use_name(use), off); + return -EOPNOTSUPP; + } + + if (nfp_map->use_map[off / 4].non_zero_update && + use == NFP_MAP_USE_ATOMIC_CNT) { + pr_vlog(env, "atomic counter in map value may already be initialized to non-zero value off: %u\n", + off); + return -EOPNOTSUPP; + } + + nfp_map->use_map[off / 4].type = use; + + return 0; +} + +static int +nfp_bpf_map_mark_used(struct bpf_verifier_env *env, struct nfp_insn_meta *meta, + const struct bpf_reg_state *reg, + enum nfp_bpf_map_use use) +{ + struct bpf_offloaded_map *offmap; + struct nfp_bpf_map *nfp_map; + unsigned int size, off; + int i, err; + + if (!tnum_is_const(reg->var_off)) { + pr_vlog(env, "map value offset is variable\n"); + return -EOPNOTSUPP; + } + + off = reg->var_off.value + meta->insn.off + reg->off; + size = BPF_LDST_BYTES(&meta->insn); + offmap = map_to_offmap(reg->map_ptr); + nfp_map = offmap->dev_priv; + + if (off + size > offmap->map.value_size) { + pr_vlog(env, "map value access out-of-bounds\n"); + return -EINVAL; + } + + for (i = 0; i < size; i += 4 - (off + i) % 4) { + err = nfp_bpf_map_mark_used_one(env, nfp_map, off + i, use); + if (err) + return err; + } + + return 0; +} + +static int +nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + struct bpf_verifier_env *env, u8 reg_no) +{ + const struct bpf_reg_state *reg = cur_regs(env) + reg_no; + int err; + + if (reg->type != PTR_TO_CTX && + reg->type != PTR_TO_STACK && + reg->type != PTR_TO_MAP_VALUE && + reg->type != PTR_TO_PACKET) { + pr_vlog(env, "unsupported ptr type: %d\n", reg->type); + return -EINVAL; + } + + if (reg->type == PTR_TO_STACK) { + err = nfp_bpf_check_stack_access(nfp_prog, meta, reg, env); + if (err) + return err; + } + + if (reg->type == PTR_TO_MAP_VALUE) { + if (is_mbpf_load(meta)) { + err = nfp_bpf_map_mark_used(env, meta, reg, + NFP_MAP_USE_READ); + if (err) + return err; + } + if (is_mbpf_store(meta)) { + pr_vlog(env, "map writes not supported\n"); + return -EOPNOTSUPP; + } + if (is_mbpf_atomic(meta)) { + err = nfp_bpf_map_mark_used(env, meta, reg, + NFP_MAP_USE_ATOMIC_CNT); + if (err) + return err; + } + } + + if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) { + pr_vlog(env, "ptr type changed for instruction %d -> %d\n", + meta->ptr.type, reg->type); + return -EINVAL; + } + + meta->ptr = *reg; + + return 0; +} + +static int +nfp_bpf_check_store(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + struct bpf_verifier_env *env) +{ + const struct bpf_reg_state *reg = cur_regs(env) + meta->insn.dst_reg; + + if (reg->type == PTR_TO_CTX) { + if (nfp_prog->type == BPF_PROG_TYPE_XDP) { + /* XDP ctx accesses must be 4B in size */ + switch (meta->insn.off) { + case offsetof(struct xdp_md, rx_queue_index): + if (nfp_prog->bpf->queue_select) + goto exit_check_ptr; + pr_vlog(env, "queue selection not supported by FW\n"); + return -EOPNOTSUPP; + } + } + pr_vlog(env, "unsupported store to context field\n"); + return -EOPNOTSUPP; + } +exit_check_ptr: + return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg); +} + +static int +nfp_bpf_check_atomic(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + struct bpf_verifier_env *env) +{ + const struct bpf_reg_state *sreg = cur_regs(env) + meta->insn.src_reg; + const struct bpf_reg_state *dreg = cur_regs(env) + meta->insn.dst_reg; + + if (meta->insn.imm != BPF_ADD) { + pr_vlog(env, "atomic op not implemented: %d\n", meta->insn.imm); + return -EOPNOTSUPP; + } + + if (dreg->type != PTR_TO_MAP_VALUE) { + pr_vlog(env, "atomic add not to a map value pointer: %d\n", + dreg->type); + return -EOPNOTSUPP; + } + if (sreg->type != SCALAR_VALUE) { + pr_vlog(env, "atomic add not of a scalar: %d\n", sreg->type); + return -EOPNOTSUPP; + } + + meta->xadd_over_16bit |= + sreg->var_off.value > 0xffff || sreg->var_off.mask > 0xffff; + meta->xadd_maybe_16bit |= + (sreg->var_off.value & ~sreg->var_off.mask) <= 0xffff; + + return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg); +} + +static int +nfp_bpf_check_alu(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + struct bpf_verifier_env *env) +{ + const struct bpf_reg_state *sreg = + cur_regs(env) + meta->insn.src_reg; + const struct bpf_reg_state *dreg = + cur_regs(env) + meta->insn.dst_reg; + + meta->umin_src = min(meta->umin_src, sreg->umin_value); + meta->umax_src = max(meta->umax_src, sreg->umax_value); + meta->umin_dst = min(meta->umin_dst, dreg->umin_value); + meta->umax_dst = max(meta->umax_dst, dreg->umax_value); + + /* NFP supports u16 and u32 multiplication. + * + * For ALU64, if either operand is beyond u32's value range, we reject + * it. One thing to note, if the source operand is BPF_K, then we need + * to check "imm" field directly, and we'd reject it if it is negative. + * Because for ALU64, "imm" (with s32 type) is expected to be sign + * extended to s64 which NFP mul doesn't support. + * + * For ALU32, it is fine for "imm" be negative though, because the + * result is 32-bits and there is no difference on the low halve of + * the result for signed/unsigned mul, so we will get correct result. + */ + if (is_mbpf_mul(meta)) { + if (meta->umax_dst > U32_MAX) { + pr_vlog(env, "multiplier is not within u32 value range\n"); + return -EINVAL; + } + if (mbpf_src(meta) == BPF_X && meta->umax_src > U32_MAX) { + pr_vlog(env, "multiplicand is not within u32 value range\n"); + return -EINVAL; + } + if (mbpf_class(meta) == BPF_ALU64 && + mbpf_src(meta) == BPF_K && meta->insn.imm < 0) { + pr_vlog(env, "sign extended multiplicand won't be within u32 value range\n"); + return -EINVAL; + } + } + + /* NFP doesn't have divide instructions, we support divide by constant + * through reciprocal multiplication. Given NFP support multiplication + * no bigger than u32, we'd require divisor and dividend no bigger than + * that as well. + * + * Also eBPF doesn't support signed divide and has enforced this on C + * language level by failing compilation. However LLVM assembler hasn't + * enforced this, so it is possible for negative constant to leak in as + * a BPF_K operand through assembly code, we reject such cases as well. + */ + if (is_mbpf_div(meta)) { + if (meta->umax_dst > U32_MAX) { + pr_vlog(env, "dividend is not within u32 value range\n"); + return -EINVAL; + } + if (mbpf_src(meta) == BPF_X) { + if (meta->umin_src != meta->umax_src) { + pr_vlog(env, "divisor is not constant\n"); + return -EINVAL; + } + if (meta->umax_src > U32_MAX) { + pr_vlog(env, "divisor is not within u32 value range\n"); + return -EINVAL; + } + } + if (mbpf_src(meta) == BPF_K && meta->insn.imm < 0) { + pr_vlog(env, "divide by negative constant is not supported\n"); + return -EINVAL; + } + } + + return 0; +} + +int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, + int prev_insn_idx) +{ + struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv; + struct nfp_insn_meta *meta = nfp_prog->verifier_meta; + + meta = nfp_bpf_goto_meta(nfp_prog, meta, insn_idx); + nfp_prog->verifier_meta = meta; + + if (!nfp_bpf_supported_opcode(meta->insn.code)) { + pr_vlog(env, "instruction %#02x not supported\n", + meta->insn.code); + return -EINVAL; + } + + if (meta->insn.src_reg >= MAX_BPF_REG || + meta->insn.dst_reg >= MAX_BPF_REG) { + pr_vlog(env, "program uses extended registers - jit hardening?\n"); + return -EINVAL; + } + + if (is_mbpf_helper_call(meta)) + return nfp_bpf_check_helper_call(nfp_prog, env, meta); + if (meta->insn.code == (BPF_JMP | BPF_EXIT)) + return nfp_bpf_check_exit(nfp_prog, env); + + if (is_mbpf_load(meta)) + return nfp_bpf_check_ptr(nfp_prog, meta, env, + meta->insn.src_reg); + if (is_mbpf_store(meta)) + return nfp_bpf_check_store(nfp_prog, meta, env); + + if (is_mbpf_atomic(meta)) + return nfp_bpf_check_atomic(nfp_prog, meta, env); + + if (is_mbpf_alu(meta)) + return nfp_bpf_check_alu(nfp_prog, meta, env); + + return 0; +} + +static int +nfp_assign_subprog_idx_and_regs(struct bpf_verifier_env *env, + struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + int index = 0; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + if (nfp_is_subprog_start(meta)) + index++; + meta->subprog_idx = index; + + if (meta->insn.dst_reg >= BPF_REG_6 && + meta->insn.dst_reg <= BPF_REG_9) + nfp_prog->subprog[index].needs_reg_push = 1; + } + + if (index + 1 != nfp_prog->subprog_cnt) { + pr_vlog(env, "BUG: number of processed BPF functions is not consistent (processed %d, expected %d)\n", + index + 1, nfp_prog->subprog_cnt); + return -EFAULT; + } + + return 0; +} + +static unsigned int nfp_bpf_get_stack_usage(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta = nfp_prog_first_meta(nfp_prog); + unsigned int max_depth = 0, depth = 0, frame = 0; + struct nfp_insn_meta *ret_insn[MAX_CALL_FRAMES]; + unsigned short frame_depths[MAX_CALL_FRAMES]; + unsigned short ret_prog[MAX_CALL_FRAMES]; + unsigned short idx = meta->subprog_idx; + + /* Inspired from check_max_stack_depth() from kernel verifier. + * Starting from main subprogram, walk all instructions and recursively + * walk all callees that given subprogram can call. Since recursion is + * prevented by the kernel verifier, this algorithm only needs a local + * stack of MAX_CALL_FRAMES to remember callsites. + */ +process_subprog: + frame_depths[frame] = nfp_prog->subprog[idx].stack_depth; + frame_depths[frame] = round_up(frame_depths[frame], STACK_FRAME_ALIGN); + depth += frame_depths[frame]; + max_depth = max(max_depth, depth); + +continue_subprog: + for (; meta != nfp_prog_last_meta(nfp_prog) && meta->subprog_idx == idx; + meta = nfp_meta_next(meta)) { + if (!is_mbpf_pseudo_call(meta)) + continue; + + /* We found a call to a subprogram. Remember instruction to + * return to and subprog id. + */ + ret_insn[frame] = nfp_meta_next(meta); + ret_prog[frame] = idx; + + /* Find the callee and start processing it. */ + meta = nfp_bpf_goto_meta(nfp_prog, meta, + meta->n + 1 + meta->insn.imm); + idx = meta->subprog_idx; + frame++; + goto process_subprog; + } + /* End of for() loop means the last instruction of the subprog was + * reached. If we popped all stack frames, return; otherwise, go on + * processing remaining instructions from the caller. + */ + if (frame == 0) + return max_depth; + + depth -= frame_depths[frame]; + frame--; + meta = ret_insn[frame]; + idx = ret_prog[frame]; + goto continue_subprog; +} + +static void nfp_bpf_insn_flag_zext(struct nfp_prog *nfp_prog, + struct bpf_insn_aux_data *aux) +{ + struct nfp_insn_meta *meta; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + if (aux[meta->n].zext_dst) + meta->flags |= FLAG_INSN_DO_ZEXT; + } +} + +int nfp_bpf_finalize(struct bpf_verifier_env *env) +{ + struct bpf_subprog_info *info; + struct nfp_prog *nfp_prog; + unsigned int max_stack; + struct nfp_net *nn; + int i; + + nfp_prog = env->prog->aux->offload->dev_priv; + nfp_prog->subprog_cnt = env->subprog_cnt; + nfp_prog->subprog = kcalloc(nfp_prog->subprog_cnt, + sizeof(nfp_prog->subprog[0]), GFP_KERNEL); + if (!nfp_prog->subprog) + return -ENOMEM; + + nfp_assign_subprog_idx_and_regs(env, nfp_prog); + + info = env->subprog_info; + for (i = 0; i < nfp_prog->subprog_cnt; i++) { + nfp_prog->subprog[i].stack_depth = info[i].stack_depth; + + if (i == 0) + continue; + + /* Account for size of return address. */ + nfp_prog->subprog[i].stack_depth += REG_WIDTH; + /* Account for size of saved registers, if necessary. */ + if (nfp_prog->subprog[i].needs_reg_push) + nfp_prog->subprog[i].stack_depth += BPF_REG_SIZE * 4; + } + + nn = netdev_priv(env->prog->aux->offload->netdev); + max_stack = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; + nfp_prog->stack_size = nfp_bpf_get_stack_usage(nfp_prog); + if (nfp_prog->stack_size > max_stack) { + pr_vlog(env, "stack too large: program %dB > FW stack %dB\n", + nfp_prog->stack_size, max_stack); + return -EOPNOTSUPP; + } + + nfp_bpf_insn_flag_zext(nfp_prog, env->insn_aux_data); + return 0; +} + +int nfp_bpf_opt_replace_insn(struct bpf_verifier_env *env, u32 off, + struct bpf_insn *insn) +{ + struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv; + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + struct nfp_insn_meta *meta = nfp_prog->verifier_meta; + + meta = nfp_bpf_goto_meta(nfp_prog, meta, aux_data[off].orig_idx); + nfp_prog->verifier_meta = meta; + + /* conditional jump to jump conversion */ + if (is_mbpf_cond_jump(meta) && + insn->code == (BPF_JMP | BPF_JA | BPF_K)) { + unsigned int tgt_off; + + tgt_off = off + insn->off + 1; + + if (!insn->off) { + meta->jmp_dst = list_next_entry(meta, l); + meta->jump_neg_op = false; + } else if (meta->jmp_dst->n != aux_data[tgt_off].orig_idx) { + pr_vlog(env, "branch hard wire at %d changes target %d -> %d\n", + off, meta->jmp_dst->n, + aux_data[tgt_off].orig_idx); + return -EINVAL; + } + return 0; + } + + pr_vlog(env, "unsupported instruction replacement %hhx -> %hhx\n", + meta->insn.code, insn->code); + return -EINVAL; +} + +int nfp_bpf_opt_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) +{ + struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv; + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + struct nfp_insn_meta *meta = nfp_prog->verifier_meta; + unsigned int i; + + meta = nfp_bpf_goto_meta(nfp_prog, meta, aux_data[off].orig_idx); + + for (i = 0; i < cnt; i++) { + if (WARN_ON_ONCE(&meta->l == &nfp_prog->insns)) + return -EINVAL; + + /* doesn't count if it already has the flag */ + if (meta->flags & FLAG_INSN_SKIP_VERIFIER_OPT) + i--; + + meta->flags |= FLAG_INSN_SKIP_VERIFIER_OPT; + meta = list_next_entry(meta, l); + } + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/ccm.c b/drivers/net/ethernet/netronome/nfp/ccm.c new file mode 100644 index 000000000..71afd111b --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/ccm.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2016-2019 Netronome Systems, Inc. */ + +#include <linux/bitops.h> + +#include "ccm.h" +#include "nfp_app.h" +#include "nfp_net.h" + +#define ccm_warn(app, msg...) nn_dp_warn(&(app)->ctrl->dp, msg) + +#define NFP_CCM_TAG_ALLOC_SPAN (U16_MAX / 4) + +static bool nfp_ccm_all_tags_busy(struct nfp_ccm *ccm) +{ + u16 used_tags; + + used_tags = ccm->tag_alloc_next - ccm->tag_alloc_last; + + return used_tags > NFP_CCM_TAG_ALLOC_SPAN; +} + +static int nfp_ccm_alloc_tag(struct nfp_ccm *ccm) +{ + /* CCM is for FW communication which is request-reply. To make sure + * we don't reuse the message ID too early after timeout - limit the + * number of requests in flight. + */ + if (unlikely(nfp_ccm_all_tags_busy(ccm))) { + ccm_warn(ccm->app, "all FW request contexts busy!\n"); + return -EAGAIN; + } + + WARN_ON(__test_and_set_bit(ccm->tag_alloc_next, ccm->tag_allocator)); + return ccm->tag_alloc_next++; +} + +static void nfp_ccm_free_tag(struct nfp_ccm *ccm, u16 tag) +{ + WARN_ON(!__test_and_clear_bit(tag, ccm->tag_allocator)); + + while (!test_bit(ccm->tag_alloc_last, ccm->tag_allocator) && + ccm->tag_alloc_last != ccm->tag_alloc_next) + ccm->tag_alloc_last++; +} + +static struct sk_buff *__nfp_ccm_reply(struct nfp_ccm *ccm, u16 tag) +{ + unsigned int msg_tag; + struct sk_buff *skb; + + skb_queue_walk(&ccm->replies, skb) { + msg_tag = nfp_ccm_get_tag(skb); + if (msg_tag == tag) { + nfp_ccm_free_tag(ccm, tag); + __skb_unlink(skb, &ccm->replies); + return skb; + } + } + + return NULL; +} + +static struct sk_buff * +nfp_ccm_reply(struct nfp_ccm *ccm, struct nfp_app *app, u16 tag) +{ + struct sk_buff *skb; + + nfp_ctrl_lock(app->ctrl); + skb = __nfp_ccm_reply(ccm, tag); + nfp_ctrl_unlock(app->ctrl); + + return skb; +} + +static struct sk_buff * +nfp_ccm_reply_drop_tag(struct nfp_ccm *ccm, struct nfp_app *app, u16 tag) +{ + struct sk_buff *skb; + + nfp_ctrl_lock(app->ctrl); + skb = __nfp_ccm_reply(ccm, tag); + if (!skb) + nfp_ccm_free_tag(ccm, tag); + nfp_ctrl_unlock(app->ctrl); + + return skb; +} + +static struct sk_buff * +nfp_ccm_wait_reply(struct nfp_ccm *ccm, struct nfp_app *app, + enum nfp_ccm_type type, int tag) +{ + struct sk_buff *skb; + int i, err; + + for (i = 0; i < 50; i++) { + udelay(4); + skb = nfp_ccm_reply(ccm, app, tag); + if (skb) + return skb; + } + + err = wait_event_interruptible_timeout(ccm->wq, + skb = nfp_ccm_reply(ccm, app, + tag), + msecs_to_jiffies(5000)); + /* We didn't get a response - try last time and atomically drop + * the tag even if no response is matched. + */ + if (!skb) + skb = nfp_ccm_reply_drop_tag(ccm, app, tag); + if (err < 0) { + ccm_warn(app, "%s waiting for response to 0x%02x: %d\n", + err == ERESTARTSYS ? "interrupted" : "error", + type, err); + return ERR_PTR(err); + } + if (!skb) { + ccm_warn(app, "timeout waiting for response to 0x%02x\n", type); + return ERR_PTR(-ETIMEDOUT); + } + + return skb; +} + +struct sk_buff * +nfp_ccm_communicate(struct nfp_ccm *ccm, struct sk_buff *skb, + enum nfp_ccm_type type, unsigned int reply_size) +{ + struct nfp_app *app = ccm->app; + struct nfp_ccm_hdr *hdr; + int reply_type, tag; + + nfp_ctrl_lock(app->ctrl); + tag = nfp_ccm_alloc_tag(ccm); + if (tag < 0) { + nfp_ctrl_unlock(app->ctrl); + dev_kfree_skb_any(skb); + return ERR_PTR(tag); + } + + hdr = (void *)skb->data; + hdr->ver = NFP_CCM_ABI_VERSION; + hdr->type = type; + hdr->tag = cpu_to_be16(tag); + + __nfp_app_ctrl_tx(app, skb); + + nfp_ctrl_unlock(app->ctrl); + + skb = nfp_ccm_wait_reply(ccm, app, type, tag); + if (IS_ERR(skb)) + return skb; + + reply_type = nfp_ccm_get_type(skb); + if (reply_type != __NFP_CCM_REPLY(type)) { + ccm_warn(app, "cmsg drop - wrong type 0x%02x != 0x%02lx!\n", + reply_type, __NFP_CCM_REPLY(type)); + goto err_free; + } + /* 0 reply_size means caller will do the validation */ + if (reply_size && skb->len != reply_size) { + ccm_warn(app, "cmsg drop - type 0x%02x wrong size %d != %d!\n", + type, skb->len, reply_size); + goto err_free; + } + + return skb; +err_free: + dev_kfree_skb_any(skb); + return ERR_PTR(-EIO); +} + +void nfp_ccm_rx(struct nfp_ccm *ccm, struct sk_buff *skb) +{ + struct nfp_app *app = ccm->app; + unsigned int tag; + + if (unlikely(skb->len < sizeof(struct nfp_ccm_hdr))) { + ccm_warn(app, "cmsg drop - too short %d!\n", skb->len); + goto err_free; + } + + nfp_ctrl_lock(app->ctrl); + + tag = nfp_ccm_get_tag(skb); + if (unlikely(!test_bit(tag, ccm->tag_allocator))) { + ccm_warn(app, "cmsg drop - no one is waiting for tag %u!\n", + tag); + goto err_unlock; + } + + __skb_queue_tail(&ccm->replies, skb); + wake_up_interruptible_all(&ccm->wq); + + nfp_ctrl_unlock(app->ctrl); + return; + +err_unlock: + nfp_ctrl_unlock(app->ctrl); +err_free: + dev_kfree_skb_any(skb); +} + +int nfp_ccm_init(struct nfp_ccm *ccm, struct nfp_app *app) +{ + ccm->app = app; + skb_queue_head_init(&ccm->replies); + init_waitqueue_head(&ccm->wq); + return 0; +} + +void nfp_ccm_clean(struct nfp_ccm *ccm) +{ + WARN_ON(!skb_queue_empty(&ccm->replies)); +} diff --git a/drivers/net/ethernet/netronome/nfp/ccm.h b/drivers/net/ethernet/netronome/nfp/ccm.h new file mode 100644 index 000000000..d81d450be --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/ccm.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2016-2019 Netronome Systems, Inc. */ + +#ifndef NFP_CCM_H +#define NFP_CCM_H 1 + +#include <linux/bitmap.h> +#include <linux/skbuff.h> +#include <linux/wait.h> + +struct nfp_app; +struct nfp_net; + +/* Firmware ABI */ + +enum nfp_ccm_type { + NFP_CCM_TYPE_BPF_MAP_ALLOC = 1, + NFP_CCM_TYPE_BPF_MAP_FREE = 2, + NFP_CCM_TYPE_BPF_MAP_LOOKUP = 3, + NFP_CCM_TYPE_BPF_MAP_UPDATE = 4, + NFP_CCM_TYPE_BPF_MAP_DELETE = 5, + NFP_CCM_TYPE_BPF_MAP_GETNEXT = 6, + NFP_CCM_TYPE_BPF_MAP_GETFIRST = 7, + NFP_CCM_TYPE_BPF_BPF_EVENT = 8, + NFP_CCM_TYPE_CRYPTO_RESET = 9, + NFP_CCM_TYPE_CRYPTO_ADD = 10, + NFP_CCM_TYPE_CRYPTO_DEL = 11, + NFP_CCM_TYPE_CRYPTO_UPDATE = 12, + NFP_CCM_TYPE_CRYPTO_RESYNC = 13, + __NFP_CCM_TYPE_MAX, +}; + +#define NFP_CCM_ABI_VERSION 1 + +#define NFP_CCM_TYPE_REPLY_BIT 7 +#define __NFP_CCM_REPLY(req) (BIT(NFP_CCM_TYPE_REPLY_BIT) | (req)) + +struct nfp_ccm_hdr { + union { + struct { + u8 type; + u8 ver; + __be16 tag; + }; + __be32 raw; + }; +}; + +static inline u8 nfp_ccm_get_type(struct sk_buff *skb) +{ + struct nfp_ccm_hdr *hdr; + + hdr = (struct nfp_ccm_hdr *)skb->data; + + return hdr->type; +} + +static inline __be16 __nfp_ccm_get_tag(struct sk_buff *skb) +{ + struct nfp_ccm_hdr *hdr; + + hdr = (struct nfp_ccm_hdr *)skb->data; + + return hdr->tag; +} + +static inline unsigned int nfp_ccm_get_tag(struct sk_buff *skb) +{ + return be16_to_cpu(__nfp_ccm_get_tag(skb)); +} + +#define NFP_NET_MBOX_TLV_TYPE GENMASK(31, 16) +#define NFP_NET_MBOX_TLV_LEN GENMASK(15, 0) + +enum nfp_ccm_mbox_tlv_type { + NFP_NET_MBOX_TLV_TYPE_UNKNOWN = 0, + NFP_NET_MBOX_TLV_TYPE_END = 1, + NFP_NET_MBOX_TLV_TYPE_MSG = 2, + NFP_NET_MBOX_TLV_TYPE_MSG_NOSUP = 3, + NFP_NET_MBOX_TLV_TYPE_RESV = 4, +}; + +/* Implementation */ + +/** + * struct nfp_ccm - common control message handling + * @app: APP handle + * + * @tag_allocator: bitmap of control message tags in use + * @tag_alloc_next: next tag bit to allocate + * @tag_alloc_last: next tag bit to be freed + * + * @replies: received cmsg replies waiting to be consumed + * @wq: work queue for waiting for cmsg replies + */ +struct nfp_ccm { + struct nfp_app *app; + + DECLARE_BITMAP(tag_allocator, U16_MAX + 1); + u16 tag_alloc_next; + u16 tag_alloc_last; + + struct sk_buff_head replies; + wait_queue_head_t wq; +}; + +int nfp_ccm_init(struct nfp_ccm *ccm, struct nfp_app *app); +void nfp_ccm_clean(struct nfp_ccm *ccm); +void nfp_ccm_rx(struct nfp_ccm *ccm, struct sk_buff *skb); +struct sk_buff * +nfp_ccm_communicate(struct nfp_ccm *ccm, struct sk_buff *skb, + enum nfp_ccm_type type, unsigned int reply_size); + +int nfp_ccm_mbox_alloc(struct nfp_net *nn); +void nfp_ccm_mbox_free(struct nfp_net *nn); +int nfp_ccm_mbox_init(struct nfp_net *nn); +void nfp_ccm_mbox_clean(struct nfp_net *nn); +bool nfp_ccm_mbox_fits(struct nfp_net *nn, unsigned int size); +struct sk_buff * +nfp_ccm_mbox_msg_alloc(struct nfp_net *nn, unsigned int req_size, + unsigned int reply_size, gfp_t flags); +int __nfp_ccm_mbox_communicate(struct nfp_net *nn, struct sk_buff *skb, + enum nfp_ccm_type type, + unsigned int reply_size, + unsigned int max_reply_size, bool critical); +int nfp_ccm_mbox_communicate(struct nfp_net *nn, struct sk_buff *skb, + enum nfp_ccm_type type, + unsigned int reply_size, + unsigned int max_reply_size); +int nfp_ccm_mbox_post(struct nfp_net *nn, struct sk_buff *skb, + enum nfp_ccm_type type, unsigned int max_reply_size); +#endif diff --git a/drivers/net/ethernet/netronome/nfp/ccm_mbox.c b/drivers/net/ethernet/netronome/nfp/ccm_mbox.c new file mode 100644 index 000000000..4247bca09 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/ccm_mbox.c @@ -0,0 +1,743 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#include <linux/bitfield.h> +#include <linux/io.h> +#include <linux/skbuff.h> + +#include "ccm.h" +#include "nfp_net.h" + +/* CCM messages via the mailbox. CMSGs get wrapped into simple TLVs + * and copied into the mailbox. Multiple messages can be copied to + * form a batch. Threads come in with CMSG formed in an skb, then + * enqueue that skb onto the request queue. If threads skb is first + * in queue this thread will handle the mailbox operation. It copies + * up to 64 messages into the mailbox (making sure that both requests + * and replies will fit. After FW is done processing the batch it + * copies the data out and wakes waiting threads. + * If a thread is waiting it either gets its the message completed + * (response is copied into the same skb as the request, overwriting + * it), or becomes the first in queue. + * Completions and next-to-run are signaled via the control buffer + * to limit potential cache line bounces. + */ + +#define NFP_CCM_MBOX_BATCH_LIMIT 64 +#define NFP_CCM_TIMEOUT (NFP_NET_POLL_TIMEOUT * 1000) +#define NFP_CCM_MAX_QLEN 1024 + +enum nfp_net_mbox_cmsg_state { + NFP_NET_MBOX_CMSG_STATE_QUEUED, + NFP_NET_MBOX_CMSG_STATE_NEXT, + NFP_NET_MBOX_CMSG_STATE_BUSY, + NFP_NET_MBOX_CMSG_STATE_REPLY_FOUND, + NFP_NET_MBOX_CMSG_STATE_DONE, +}; + +/** + * struct nfp_ccm_mbox_cmsg_cb - CCM mailbox specific info + * @state: processing state (/stage) of the message + * @err: error encountered during processing if any + * @max_len: max(request_len, reply_len) + * @exp_reply: expected reply length (0 means don't validate) + * @posted: the message was posted and nobody waits for the reply + */ +struct nfp_ccm_mbox_cmsg_cb { + enum nfp_net_mbox_cmsg_state state; + int err; + unsigned int max_len; + unsigned int exp_reply; + bool posted; +}; + +static u32 nfp_ccm_mbox_max_msg(struct nfp_net *nn) +{ + return round_down(nn->tlv_caps.mbox_len, 4) - + NFP_NET_CFG_MBOX_SIMPLE_VAL - /* common mbox command header */ + 4 * 2; /* Msg TLV plus End TLV headers */ +} + +static void +nfp_ccm_mbox_msg_init(struct sk_buff *skb, unsigned int exp_reply, int max_len) +{ + struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb; + + cb->state = NFP_NET_MBOX_CMSG_STATE_QUEUED; + cb->err = 0; + cb->max_len = max_len; + cb->exp_reply = exp_reply; + cb->posted = false; +} + +static int nfp_ccm_mbox_maxlen(const struct sk_buff *skb) +{ + struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb; + + return cb->max_len; +} + +static bool nfp_ccm_mbox_done(struct sk_buff *skb) +{ + struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb; + + return cb->state == NFP_NET_MBOX_CMSG_STATE_DONE; +} + +static bool nfp_ccm_mbox_in_progress(struct sk_buff *skb) +{ + struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb; + + return cb->state != NFP_NET_MBOX_CMSG_STATE_QUEUED && + cb->state != NFP_NET_MBOX_CMSG_STATE_NEXT; +} + +static void nfp_ccm_mbox_set_busy(struct sk_buff *skb) +{ + struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb; + + cb->state = NFP_NET_MBOX_CMSG_STATE_BUSY; +} + +static bool nfp_ccm_mbox_is_posted(struct sk_buff *skb) +{ + struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb; + + return cb->posted; +} + +static void nfp_ccm_mbox_mark_posted(struct sk_buff *skb) +{ + struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb; + + cb->posted = true; +} + +static bool nfp_ccm_mbox_is_first(struct nfp_net *nn, struct sk_buff *skb) +{ + return skb_queue_is_first(&nn->mbox_cmsg.queue, skb); +} + +static bool nfp_ccm_mbox_should_run(struct nfp_net *nn, struct sk_buff *skb) +{ + struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb; + + return cb->state == NFP_NET_MBOX_CMSG_STATE_NEXT; +} + +static void nfp_ccm_mbox_mark_next_runner(struct nfp_net *nn) +{ + struct nfp_ccm_mbox_cmsg_cb *cb; + struct sk_buff *skb; + + skb = skb_peek(&nn->mbox_cmsg.queue); + if (!skb) + return; + + cb = (void *)skb->cb; + cb->state = NFP_NET_MBOX_CMSG_STATE_NEXT; + if (cb->posted) + queue_work(nn->mbox_cmsg.workq, &nn->mbox_cmsg.runq_work); +} + +static void +nfp_ccm_mbox_write_tlv(struct nfp_net *nn, u32 off, u32 type, u32 len) +{ + nn_writel(nn, off, + FIELD_PREP(NFP_NET_MBOX_TLV_TYPE, type) | + FIELD_PREP(NFP_NET_MBOX_TLV_LEN, len)); +} + +static void nfp_ccm_mbox_copy_in(struct nfp_net *nn, struct sk_buff *last) +{ + struct sk_buff *skb; + int reserve, i, cnt; + __be32 *data; + u32 off, len; + + off = nn->tlv_caps.mbox_off + NFP_NET_CFG_MBOX_SIMPLE_VAL; + skb = __skb_peek(&nn->mbox_cmsg.queue); + while (true) { + nfp_ccm_mbox_write_tlv(nn, off, NFP_NET_MBOX_TLV_TYPE_MSG, + skb->len); + off += 4; + + /* Write data word by word, skb->data should be aligned */ + data = (__be32 *)skb->data; + cnt = skb->len / 4; + for (i = 0 ; i < cnt; i++) { + nn_writel(nn, off, be32_to_cpu(data[i])); + off += 4; + } + if (skb->len & 3) { + __be32 tmp = 0; + + memcpy(&tmp, &data[i], skb->len & 3); + nn_writel(nn, off, be32_to_cpu(tmp)); + off += 4; + } + + /* Reserve space if reply is bigger */ + len = round_up(skb->len, 4); + reserve = nfp_ccm_mbox_maxlen(skb) - len; + if (reserve > 0) { + nfp_ccm_mbox_write_tlv(nn, off, + NFP_NET_MBOX_TLV_TYPE_RESV, + reserve); + off += 4 + reserve; + } + + if (skb == last) + break; + skb = skb_queue_next(&nn->mbox_cmsg.queue, skb); + } + + nfp_ccm_mbox_write_tlv(nn, off, NFP_NET_MBOX_TLV_TYPE_END, 0); +} + +static struct sk_buff * +nfp_ccm_mbox_find_req(struct nfp_net *nn, __be16 tag, struct sk_buff *last) +{ + struct sk_buff *skb; + + skb = __skb_peek(&nn->mbox_cmsg.queue); + while (true) { + if (__nfp_ccm_get_tag(skb) == tag) + return skb; + + if (skb == last) + return NULL; + skb = skb_queue_next(&nn->mbox_cmsg.queue, skb); + } +} + +static void nfp_ccm_mbox_copy_out(struct nfp_net *nn, struct sk_buff *last) +{ + struct nfp_ccm_mbox_cmsg_cb *cb; + u8 __iomem *data, *end; + struct sk_buff *skb; + + data = nn->dp.ctrl_bar + nn->tlv_caps.mbox_off + + NFP_NET_CFG_MBOX_SIMPLE_VAL; + end = data + nn->tlv_caps.mbox_len; + + while (true) { + unsigned int length, offset, type; + struct nfp_ccm_hdr hdr; + u32 tlv_hdr; + + tlv_hdr = readl(data); + type = FIELD_GET(NFP_NET_MBOX_TLV_TYPE, tlv_hdr); + length = FIELD_GET(NFP_NET_MBOX_TLV_LEN, tlv_hdr); + offset = data - nn->dp.ctrl_bar; + + /* Advance past the header */ + data += 4; + + if (data + length > end) { + nn_dp_warn(&nn->dp, "mailbox oversized TLV type:%d offset:%u len:%u\n", + type, offset, length); + break; + } + + if (type == NFP_NET_MBOX_TLV_TYPE_END) + break; + if (type == NFP_NET_MBOX_TLV_TYPE_RESV) + goto next_tlv; + if (type != NFP_NET_MBOX_TLV_TYPE_MSG && + type != NFP_NET_MBOX_TLV_TYPE_MSG_NOSUP) { + nn_dp_warn(&nn->dp, "mailbox unknown TLV type:%d offset:%u len:%u\n", + type, offset, length); + break; + } + + if (length < 4) { + nn_dp_warn(&nn->dp, "mailbox msg too short to contain header TLV type:%d offset:%u len:%u\n", + type, offset, length); + break; + } + + hdr.raw = cpu_to_be32(readl(data)); + + skb = nfp_ccm_mbox_find_req(nn, hdr.tag, last); + if (!skb) { + nn_dp_warn(&nn->dp, "mailbox request not found:%u\n", + be16_to_cpu(hdr.tag)); + break; + } + cb = (void *)skb->cb; + + if (type == NFP_NET_MBOX_TLV_TYPE_MSG_NOSUP) { + nn_dp_warn(&nn->dp, + "mailbox msg not supported type:%d\n", + nfp_ccm_get_type(skb)); + cb->err = -EIO; + goto next_tlv; + } + + if (hdr.type != __NFP_CCM_REPLY(nfp_ccm_get_type(skb))) { + nn_dp_warn(&nn->dp, "mailbox msg reply wrong type:%u expected:%lu\n", + hdr.type, + __NFP_CCM_REPLY(nfp_ccm_get_type(skb))); + cb->err = -EIO; + goto next_tlv; + } + if (cb->exp_reply && length != cb->exp_reply) { + nn_dp_warn(&nn->dp, "mailbox msg reply wrong size type:%u expected:%u have:%u\n", + hdr.type, length, cb->exp_reply); + cb->err = -EIO; + goto next_tlv; + } + if (length > cb->max_len) { + nn_dp_warn(&nn->dp, "mailbox msg oversized reply type:%u max:%u have:%u\n", + hdr.type, cb->max_len, length); + cb->err = -EIO; + goto next_tlv; + } + + if (!cb->posted) { + __be32 *skb_data; + int i, cnt; + + if (length <= skb->len) + __skb_trim(skb, length); + else + skb_put(skb, length - skb->len); + + /* We overcopy here slightly, but that's okay, + * the skb is large enough, and the garbage will + * be ignored (beyond skb->len). + */ + skb_data = (__be32 *)skb->data; + memcpy(skb_data, &hdr, 4); + + cnt = DIV_ROUND_UP(length, 4); + for (i = 1 ; i < cnt; i++) + skb_data[i] = cpu_to_be32(readl(data + i * 4)); + } + + cb->state = NFP_NET_MBOX_CMSG_STATE_REPLY_FOUND; +next_tlv: + data += round_up(length, 4); + if (data + 4 > end) { + nn_dp_warn(&nn->dp, + "reached end of MBOX without END TLV\n"); + break; + } + } + + smp_wmb(); /* order the skb->data vs. cb->state */ + spin_lock_bh(&nn->mbox_cmsg.queue.lock); + do { + skb = __skb_dequeue(&nn->mbox_cmsg.queue); + cb = (void *)skb->cb; + + if (cb->state != NFP_NET_MBOX_CMSG_STATE_REPLY_FOUND) { + cb->err = -ENOENT; + smp_wmb(); /* order the cb->err vs. cb->state */ + } + cb->state = NFP_NET_MBOX_CMSG_STATE_DONE; + + if (cb->posted) { + if (cb->err) + nn_dp_warn(&nn->dp, + "mailbox posted msg failed type:%u err:%d\n", + nfp_ccm_get_type(skb), cb->err); + dev_consume_skb_any(skb); + } + } while (skb != last); + + nfp_ccm_mbox_mark_next_runner(nn); + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); +} + +static void +nfp_ccm_mbox_mark_all_err(struct nfp_net *nn, struct sk_buff *last, int err) +{ + struct nfp_ccm_mbox_cmsg_cb *cb; + struct sk_buff *skb; + + spin_lock_bh(&nn->mbox_cmsg.queue.lock); + do { + skb = __skb_dequeue(&nn->mbox_cmsg.queue); + cb = (void *)skb->cb; + + cb->err = err; + smp_wmb(); /* order the cb->err vs. cb->state */ + cb->state = NFP_NET_MBOX_CMSG_STATE_DONE; + } while (skb != last); + + nfp_ccm_mbox_mark_next_runner(nn); + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); +} + +static void nfp_ccm_mbox_run_queue_unlock(struct nfp_net *nn) + __releases(&nn->mbox_cmsg.queue.lock) +{ + int space = nn->tlv_caps.mbox_len - NFP_NET_CFG_MBOX_SIMPLE_VAL; + struct sk_buff *skb, *last; + int cnt, err; + + space -= 4; /* for End TLV */ + + /* First skb must fit, because it's ours and we checked it fits */ + cnt = 1; + last = skb = __skb_peek(&nn->mbox_cmsg.queue); + space -= 4 + nfp_ccm_mbox_maxlen(skb); + + while (!skb_queue_is_last(&nn->mbox_cmsg.queue, last)) { + skb = skb_queue_next(&nn->mbox_cmsg.queue, last); + space -= 4 + nfp_ccm_mbox_maxlen(skb); + if (space < 0) + break; + last = skb; + nfp_ccm_mbox_set_busy(skb); + cnt++; + if (cnt == NFP_CCM_MBOX_BATCH_LIMIT) + break; + } + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); + + /* Now we own all skb's marked in progress, new requests may arrive + * at the end of the queue. + */ + + nn_ctrl_bar_lock(nn); + + nfp_ccm_mbox_copy_in(nn, last); + + err = nfp_net_mbox_reconfig(nn, NFP_NET_CFG_MBOX_CMD_TLV_CMSG); + if (!err) + nfp_ccm_mbox_copy_out(nn, last); + else + nfp_ccm_mbox_mark_all_err(nn, last, -EIO); + + nn_ctrl_bar_unlock(nn); + + wake_up_all(&nn->mbox_cmsg.wq); +} + +static int nfp_ccm_mbox_skb_return(struct sk_buff *skb) +{ + struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb; + + if (cb->err) + dev_kfree_skb_any(skb); + return cb->err; +} + +/* If wait timed out but the command is already in progress we have + * to wait until it finishes. Runners has ownership of the skbs marked + * as busy. + */ +static int +nfp_ccm_mbox_unlink_unlock(struct nfp_net *nn, struct sk_buff *skb, + enum nfp_ccm_type type) + __releases(&nn->mbox_cmsg.queue.lock) +{ + bool was_first; + + if (nfp_ccm_mbox_in_progress(skb)) { + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); + + wait_event(nn->mbox_cmsg.wq, nfp_ccm_mbox_done(skb)); + smp_rmb(); /* pairs with smp_wmb() after data is written */ + return nfp_ccm_mbox_skb_return(skb); + } + + was_first = nfp_ccm_mbox_should_run(nn, skb); + __skb_unlink(skb, &nn->mbox_cmsg.queue); + if (was_first) + nfp_ccm_mbox_mark_next_runner(nn); + + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); + + if (was_first) + wake_up_all(&nn->mbox_cmsg.wq); + + nn_dp_warn(&nn->dp, "time out waiting for mbox response to 0x%02x\n", + type); + return -ETIMEDOUT; +} + +static int +nfp_ccm_mbox_msg_prepare(struct nfp_net *nn, struct sk_buff *skb, + enum nfp_ccm_type type, + unsigned int reply_size, unsigned int max_reply_size, + gfp_t flags) +{ + const unsigned int mbox_max = nfp_ccm_mbox_max_msg(nn); + unsigned int max_len; + ssize_t undersize; + int err; + + if (unlikely(!(nn->tlv_caps.mbox_cmsg_types & BIT(type)))) { + nn_dp_warn(&nn->dp, + "message type %d not supported by mailbox\n", type); + return -EINVAL; + } + + /* If the reply size is unknown assume it will take the entire + * mailbox, the callers should do their best for this to never + * happen. + */ + if (!max_reply_size) + max_reply_size = mbox_max; + max_reply_size = round_up(max_reply_size, 4); + + /* Make sure we can fit the entire reply into the skb, + * and that we don't have to slow down the mbox handler + * with allocations. + */ + undersize = max_reply_size - (skb_end_pointer(skb) - skb->data); + if (undersize > 0) { + err = pskb_expand_head(skb, 0, undersize, flags); + if (err) { + nn_dp_warn(&nn->dp, + "can't allocate reply buffer for mailbox\n"); + return err; + } + } + + /* Make sure that request and response both fit into the mailbox */ + max_len = max(max_reply_size, round_up(skb->len, 4)); + if (max_len > mbox_max) { + nn_dp_warn(&nn->dp, + "message too big for tha mailbox: %u/%u vs %u\n", + skb->len, max_reply_size, mbox_max); + return -EMSGSIZE; + } + + nfp_ccm_mbox_msg_init(skb, reply_size, max_len); + + return 0; +} + +static int +nfp_ccm_mbox_msg_enqueue(struct nfp_net *nn, struct sk_buff *skb, + enum nfp_ccm_type type, bool critical) +{ + struct nfp_ccm_hdr *hdr; + + assert_spin_locked(&nn->mbox_cmsg.queue.lock); + + if (!critical && nn->mbox_cmsg.queue.qlen >= NFP_CCM_MAX_QLEN) { + nn_dp_warn(&nn->dp, "mailbox request queue too long\n"); + return -EBUSY; + } + + hdr = (void *)skb->data; + hdr->ver = NFP_CCM_ABI_VERSION; + hdr->type = type; + hdr->tag = cpu_to_be16(nn->mbox_cmsg.tag++); + + __skb_queue_tail(&nn->mbox_cmsg.queue, skb); + + return 0; +} + +int __nfp_ccm_mbox_communicate(struct nfp_net *nn, struct sk_buff *skb, + enum nfp_ccm_type type, + unsigned int reply_size, + unsigned int max_reply_size, bool critical) +{ + int err; + + err = nfp_ccm_mbox_msg_prepare(nn, skb, type, reply_size, + max_reply_size, GFP_KERNEL); + if (err) + goto err_free_skb; + + spin_lock_bh(&nn->mbox_cmsg.queue.lock); + + err = nfp_ccm_mbox_msg_enqueue(nn, skb, type, critical); + if (err) + goto err_unlock; + + /* First in queue takes the mailbox lock and processes the batch */ + if (!nfp_ccm_mbox_is_first(nn, skb)) { + bool to; + + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); + + to = !wait_event_timeout(nn->mbox_cmsg.wq, + nfp_ccm_mbox_done(skb) || + nfp_ccm_mbox_should_run(nn, skb), + msecs_to_jiffies(NFP_CCM_TIMEOUT)); + + /* fast path for those completed by another thread */ + if (nfp_ccm_mbox_done(skb)) { + smp_rmb(); /* pairs with wmb after data is written */ + return nfp_ccm_mbox_skb_return(skb); + } + + spin_lock_bh(&nn->mbox_cmsg.queue.lock); + + if (!nfp_ccm_mbox_is_first(nn, skb)) { + WARN_ON(!to); + + err = nfp_ccm_mbox_unlink_unlock(nn, skb, type); + if (err) + goto err_free_skb; + return 0; + } + } + + /* run queue expects the lock held */ + nfp_ccm_mbox_run_queue_unlock(nn); + return nfp_ccm_mbox_skb_return(skb); + +err_unlock: + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); +err_free_skb: + dev_kfree_skb_any(skb); + return err; +} + +int nfp_ccm_mbox_communicate(struct nfp_net *nn, struct sk_buff *skb, + enum nfp_ccm_type type, + unsigned int reply_size, + unsigned int max_reply_size) +{ + return __nfp_ccm_mbox_communicate(nn, skb, type, reply_size, + max_reply_size, false); +} + +static void nfp_ccm_mbox_post_runq_work(struct work_struct *work) +{ + struct sk_buff *skb; + struct nfp_net *nn; + + nn = container_of(work, struct nfp_net, mbox_cmsg.runq_work); + + spin_lock_bh(&nn->mbox_cmsg.queue.lock); + + skb = __skb_peek(&nn->mbox_cmsg.queue); + if (WARN_ON(!skb || !nfp_ccm_mbox_is_posted(skb) || + !nfp_ccm_mbox_should_run(nn, skb))) { + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); + return; + } + + nfp_ccm_mbox_run_queue_unlock(nn); +} + +static void nfp_ccm_mbox_post_wait_work(struct work_struct *work) +{ + struct sk_buff *skb; + struct nfp_net *nn; + int err; + + nn = container_of(work, struct nfp_net, mbox_cmsg.wait_work); + + skb = skb_peek(&nn->mbox_cmsg.queue); + if (WARN_ON(!skb || !nfp_ccm_mbox_is_posted(skb))) + /* Should never happen so it's unclear what to do here.. */ + goto exit_unlock_wake; + + err = nfp_net_mbox_reconfig_wait_posted(nn); + if (!err) + nfp_ccm_mbox_copy_out(nn, skb); + else + nfp_ccm_mbox_mark_all_err(nn, skb, -EIO); +exit_unlock_wake: + nn_ctrl_bar_unlock(nn); + wake_up_all(&nn->mbox_cmsg.wq); +} + +int nfp_ccm_mbox_post(struct nfp_net *nn, struct sk_buff *skb, + enum nfp_ccm_type type, unsigned int max_reply_size) +{ + int err; + + err = nfp_ccm_mbox_msg_prepare(nn, skb, type, 0, max_reply_size, + GFP_ATOMIC); + if (err) + goto err_free_skb; + + nfp_ccm_mbox_mark_posted(skb); + + spin_lock_bh(&nn->mbox_cmsg.queue.lock); + + err = nfp_ccm_mbox_msg_enqueue(nn, skb, type, false); + if (err) + goto err_unlock; + + if (nfp_ccm_mbox_is_first(nn, skb)) { + if (nn_ctrl_bar_trylock(nn)) { + nfp_ccm_mbox_copy_in(nn, skb); + nfp_net_mbox_reconfig_post(nn, + NFP_NET_CFG_MBOX_CMD_TLV_CMSG); + queue_work(nn->mbox_cmsg.workq, + &nn->mbox_cmsg.wait_work); + } else { + nfp_ccm_mbox_mark_next_runner(nn); + } + } + + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); + + return 0; + +err_unlock: + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); +err_free_skb: + dev_kfree_skb_any(skb); + return err; +} + +struct sk_buff * +nfp_ccm_mbox_msg_alloc(struct nfp_net *nn, unsigned int req_size, + unsigned int reply_size, gfp_t flags) +{ + unsigned int max_size; + struct sk_buff *skb; + + if (!reply_size) + max_size = nfp_ccm_mbox_max_msg(nn); + else + max_size = max(req_size, reply_size); + max_size = round_up(max_size, 4); + + skb = alloc_skb(max_size, flags); + if (!skb) + return NULL; + + skb_put(skb, req_size); + + return skb; +} + +bool nfp_ccm_mbox_fits(struct nfp_net *nn, unsigned int size) +{ + return nfp_ccm_mbox_max_msg(nn) >= size; +} + +int nfp_ccm_mbox_init(struct nfp_net *nn) +{ + return 0; +} + +void nfp_ccm_mbox_clean(struct nfp_net *nn) +{ + drain_workqueue(nn->mbox_cmsg.workq); +} + +int nfp_ccm_mbox_alloc(struct nfp_net *nn) +{ + skb_queue_head_init(&nn->mbox_cmsg.queue); + init_waitqueue_head(&nn->mbox_cmsg.wq); + INIT_WORK(&nn->mbox_cmsg.wait_work, nfp_ccm_mbox_post_wait_work); + INIT_WORK(&nn->mbox_cmsg.runq_work, nfp_ccm_mbox_post_runq_work); + + nn->mbox_cmsg.workq = alloc_workqueue("nfp-ccm-mbox", WQ_UNBOUND, 0); + if (!nn->mbox_cmsg.workq) + return -ENOMEM; + return 0; +} + +void nfp_ccm_mbox_free(struct nfp_net *nn) +{ + destroy_workqueue(nn->mbox_cmsg.workq); + WARN_ON(!skb_queue_empty(&nn->mbox_cmsg.queue)); +} diff --git a/drivers/net/ethernet/netronome/nfp/crypto/crypto.h b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h new file mode 100644 index 000000000..bffe58bb2 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#ifndef NFP_CRYPTO_H +#define NFP_CRYPTO_H 1 + +struct net_device; +struct nfp_net; +struct nfp_net_tls_resync_req; + +struct nfp_net_tls_offload_ctx { + __be32 fw_handle[2]; + + u8 rx_end[0]; + /* Tx only fields follow - Rx side does not have enough driver state + * to fit these + */ + + u32 next_seq; +}; + +#ifdef CONFIG_TLS_DEVICE +int nfp_net_tls_init(struct nfp_net *nn); +int nfp_net_tls_rx_resync_req(struct net_device *netdev, + struct nfp_net_tls_resync_req *req, + void *pkt, unsigned int pkt_len); +#else +static inline int nfp_net_tls_init(struct nfp_net *nn) +{ + return 0; +} + +static inline int +nfp_net_tls_rx_resync_req(struct net_device *netdev, + struct nfp_net_tls_resync_req *req, + void *pkt, unsigned int pkt_len) +{ + return -EOPNOTSUPP; +} +#endif + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/crypto/fw.h b/drivers/net/ethernet/netronome/nfp/crypto/fw.h new file mode 100644 index 000000000..dcb67c2b5 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/crypto/fw.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#ifndef NFP_CRYPTO_FW_H +#define NFP_CRYPTO_FW_H 1 + +#include "../ccm.h" + +#define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC 0 +#define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC 1 + +struct nfp_net_tls_resync_req { + __be32 fw_handle[2]; + __be32 tcp_seq; + u8 l3_offset; + u8 l4_offset; + u8 resv[2]; +}; + +struct nfp_crypto_reply_simple { + struct nfp_ccm_hdr hdr; + __be32 error; +}; + +struct nfp_crypto_req_reset { + struct nfp_ccm_hdr hdr; + __be32 ep_id; +}; + +#define NFP_NET_TLS_IPVER GENMASK(15, 12) +#define NFP_NET_TLS_VLAN GENMASK(11, 0) +#define NFP_NET_TLS_VLAN_UNUSED 4095 + +struct nfp_crypto_req_add_front { + struct nfp_ccm_hdr hdr; + __be32 ep_id; + u8 resv[3]; + u8 opcode; + u8 key_len; + __be16 ipver_vlan __packed; + u8 l4_proto; +#define NFP_NET_TLS_NON_ADDR_KEY_LEN 8 + u8 l3_addrs[]; +}; + +struct nfp_crypto_req_add_back { + __be16 src_port; + __be16 dst_port; + __be32 key[8]; + __be32 salt; + __be32 iv[2]; + __be32 counter; + __be32 rec_no[2]; + __be32 tcp_seq; +}; + +struct nfp_crypto_req_add_v4 { + struct nfp_crypto_req_add_front front; + __be32 src_ip; + __be32 dst_ip; + struct nfp_crypto_req_add_back back; +}; + +struct nfp_crypto_req_add_v6 { + struct nfp_crypto_req_add_front front; + __be32 src_ip[4]; + __be32 dst_ip[4]; + struct nfp_crypto_req_add_back back; +}; + +struct nfp_crypto_reply_add { + struct nfp_ccm_hdr hdr; + __be32 error; + __be32 handle[2]; +}; + +struct nfp_crypto_req_del { + struct nfp_ccm_hdr hdr; + __be32 ep_id; + __be32 handle[2]; +}; + +struct nfp_crypto_req_update { + struct nfp_ccm_hdr hdr; + __be32 ep_id; + u8 resv[3]; + u8 opcode; + __be32 handle[2]; + __be32 rec_no[2]; + __be32 tcp_seq; +}; +#endif diff --git a/drivers/net/ethernet/netronome/nfp/crypto/tls.c b/drivers/net/ethernet/netronome/nfp/crypto/tls.c new file mode 100644 index 000000000..f80f1a695 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/crypto/tls.c @@ -0,0 +1,602 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#include <linux/bitfield.h> +#include <linux/ipv6.h> +#include <linux/skbuff.h> +#include <linux/string.h> +#include <net/inet6_hashtables.h> +#include <net/tls.h> + +#include "../ccm.h" +#include "../nfp_net.h" +#include "crypto.h" +#include "fw.h" + +#define NFP_NET_TLS_CCM_MBOX_OPS_MASK \ + (BIT(NFP_CCM_TYPE_CRYPTO_RESET) | \ + BIT(NFP_CCM_TYPE_CRYPTO_ADD) | \ + BIT(NFP_CCM_TYPE_CRYPTO_DEL) | \ + BIT(NFP_CCM_TYPE_CRYPTO_UPDATE)) + +#define NFP_NET_TLS_OPCODE_MASK_RX \ + BIT(NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC) + +#define NFP_NET_TLS_OPCODE_MASK_TX \ + BIT(NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC) + +#define NFP_NET_TLS_OPCODE_MASK \ + (NFP_NET_TLS_OPCODE_MASK_RX | NFP_NET_TLS_OPCODE_MASK_TX) + +static void nfp_net_crypto_set_op(struct nfp_net *nn, u8 opcode, bool on) +{ + u32 off, val; + + off = nn->tlv_caps.crypto_enable_off + round_down(opcode / 8, 4); + + val = nn_readl(nn, off); + if (on) + val |= BIT(opcode & 31); + else + val &= ~BIT(opcode & 31); + nn_writel(nn, off, val); +} + +static bool +__nfp_net_tls_conn_cnt_changed(struct nfp_net *nn, int add, + enum tls_offload_ctx_dir direction) +{ + u8 opcode; + int cnt; + + if (direction == TLS_OFFLOAD_CTX_DIR_TX) { + opcode = NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC; + nn->ktls_tx_conn_cnt += add; + cnt = nn->ktls_tx_conn_cnt; + nn->dp.ktls_tx = !!nn->ktls_tx_conn_cnt; + } else { + opcode = NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC; + nn->ktls_rx_conn_cnt += add; + cnt = nn->ktls_rx_conn_cnt; + } + + /* Care only about 0 -> 1 and 1 -> 0 transitions */ + if (cnt > 1) + return false; + + nfp_net_crypto_set_op(nn, opcode, cnt); + return true; +} + +static int +nfp_net_tls_conn_cnt_changed(struct nfp_net *nn, int add, + enum tls_offload_ctx_dir direction) +{ + int ret = 0; + + /* Use the BAR lock to protect the connection counts */ + nn_ctrl_bar_lock(nn); + if (__nfp_net_tls_conn_cnt_changed(nn, add, direction)) { + ret = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_CRYPTO); + /* Undo the cnt adjustment if failed */ + if (ret) + __nfp_net_tls_conn_cnt_changed(nn, -add, direction); + } + nn_ctrl_bar_unlock(nn); + + return ret; +} + +static int +nfp_net_tls_conn_add(struct nfp_net *nn, enum tls_offload_ctx_dir direction) +{ + return nfp_net_tls_conn_cnt_changed(nn, 1, direction); +} + +static int +nfp_net_tls_conn_remove(struct nfp_net *nn, enum tls_offload_ctx_dir direction) +{ + return nfp_net_tls_conn_cnt_changed(nn, -1, direction); +} + +static struct sk_buff * +nfp_net_tls_alloc_simple(struct nfp_net *nn, size_t req_sz, gfp_t flags) +{ + return nfp_ccm_mbox_msg_alloc(nn, req_sz, + sizeof(struct nfp_crypto_reply_simple), + flags); +} + +static int +nfp_net_tls_communicate_simple(struct nfp_net *nn, struct sk_buff *skb, + const char *name, enum nfp_ccm_type type) +{ + struct nfp_crypto_reply_simple *reply; + int err; + + err = __nfp_ccm_mbox_communicate(nn, skb, type, + sizeof(*reply), sizeof(*reply), + type == NFP_CCM_TYPE_CRYPTO_DEL); + if (err) { + nn_dp_warn(&nn->dp, "failed to %s TLS: %d\n", name, err); + return err; + } + + reply = (void *)skb->data; + err = -be32_to_cpu(reply->error); + if (err) + nn_dp_warn(&nn->dp, "failed to %s TLS, fw replied: %d\n", + name, err); + dev_consume_skb_any(skb); + + return err; +} + +static void nfp_net_tls_del_fw(struct nfp_net *nn, __be32 *fw_handle) +{ + struct nfp_crypto_req_del *req; + struct sk_buff *skb; + + skb = nfp_net_tls_alloc_simple(nn, sizeof(*req), GFP_KERNEL); + if (!skb) + return; + + req = (void *)skb->data; + req->ep_id = 0; + memcpy(req->handle, fw_handle, sizeof(req->handle)); + + nfp_net_tls_communicate_simple(nn, skb, "delete", + NFP_CCM_TYPE_CRYPTO_DEL); +} + +static void +nfp_net_tls_set_ipver_vlan(struct nfp_crypto_req_add_front *front, u8 ipver) +{ + front->ipver_vlan = cpu_to_be16(FIELD_PREP(NFP_NET_TLS_IPVER, ipver) | + FIELD_PREP(NFP_NET_TLS_VLAN, + NFP_NET_TLS_VLAN_UNUSED)); +} + +static void +nfp_net_tls_assign_conn_id(struct nfp_net *nn, + struct nfp_crypto_req_add_front *front) +{ + u32 len; + u64 id; + + id = atomic64_inc_return(&nn->ktls_conn_id_gen); + len = front->key_len - NFP_NET_TLS_NON_ADDR_KEY_LEN; + + memcpy(front->l3_addrs, &id, sizeof(id)); + memset(front->l3_addrs + sizeof(id), 0, len - sizeof(id)); +} + +static struct nfp_crypto_req_add_back * +nfp_net_tls_set_ipv4(struct nfp_net *nn, struct nfp_crypto_req_add_v4 *req, + struct sock *sk, int direction) +{ + struct inet_sock *inet = inet_sk(sk); + + req->front.key_len += sizeof(__be32) * 2; + + if (direction == TLS_OFFLOAD_CTX_DIR_TX) { + nfp_net_tls_assign_conn_id(nn, &req->front); + } else { + req->src_ip = inet->inet_daddr; + req->dst_ip = inet->inet_saddr; + } + + return &req->back; +} + +static struct nfp_crypto_req_add_back * +nfp_net_tls_set_ipv6(struct nfp_net *nn, struct nfp_crypto_req_add_v6 *req, + struct sock *sk, int direction) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6_pinfo *np = inet6_sk(sk); + + req->front.key_len += sizeof(struct in6_addr) * 2; + + if (direction == TLS_OFFLOAD_CTX_DIR_TX) { + nfp_net_tls_assign_conn_id(nn, &req->front); + } else { + memcpy(req->src_ip, &sk->sk_v6_daddr, sizeof(req->src_ip)); + memcpy(req->dst_ip, &np->saddr, sizeof(req->dst_ip)); + } + +#endif + return &req->back; +} + +static void +nfp_net_tls_set_l4(struct nfp_crypto_req_add_front *front, + struct nfp_crypto_req_add_back *back, struct sock *sk, + int direction) +{ + struct inet_sock *inet = inet_sk(sk); + + front->l4_proto = IPPROTO_TCP; + + if (direction == TLS_OFFLOAD_CTX_DIR_TX) { + back->src_port = 0; + back->dst_port = 0; + } else { + back->src_port = inet->inet_dport; + back->dst_port = inet->inet_sport; + } +} + +static u8 nfp_tls_1_2_dir_to_opcode(enum tls_offload_ctx_dir direction) +{ + switch (direction) { + case TLS_OFFLOAD_CTX_DIR_TX: + return NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC; + case TLS_OFFLOAD_CTX_DIR_RX: + return NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC; + default: + WARN_ON_ONCE(1); + return 0; + } +} + +static bool +nfp_net_cipher_supported(struct nfp_net *nn, u16 cipher_type, + enum tls_offload_ctx_dir direction) +{ + u8 bit; + + switch (cipher_type) { + case TLS_CIPHER_AES_GCM_128: + if (direction == TLS_OFFLOAD_CTX_DIR_TX) + bit = NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC; + else + bit = NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC; + break; + default: + return false; + } + + return nn->tlv_caps.crypto_ops & BIT(bit); +} + +static int +nfp_net_tls_add(struct net_device *netdev, struct sock *sk, + enum tls_offload_ctx_dir direction, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn) +{ + struct tls12_crypto_info_aes_gcm_128 *tls_ci; + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_crypto_req_add_front *front; + struct nfp_net_tls_offload_ctx *ntls; + struct nfp_crypto_req_add_back *back; + struct nfp_crypto_reply_add *reply; + struct sk_buff *skb; + size_t req_sz; + void *req; + bool ipv6; + int err; + + BUILD_BUG_ON(sizeof(struct nfp_net_tls_offload_ctx) > + TLS_DRIVER_STATE_SIZE_TX); + BUILD_BUG_ON(offsetof(struct nfp_net_tls_offload_ctx, rx_end) > + TLS_DRIVER_STATE_SIZE_RX); + + if (!nfp_net_cipher_supported(nn, crypto_info->cipher_type, direction)) + return -EOPNOTSUPP; + + switch (sk->sk_family) { +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + if (ipv6_only_sock(sk) || + ipv6_addr_type(&sk->sk_v6_daddr) != IPV6_ADDR_MAPPED) { + req_sz = sizeof(struct nfp_crypto_req_add_v6); + ipv6 = true; + break; + } + fallthrough; +#endif + case AF_INET: + req_sz = sizeof(struct nfp_crypto_req_add_v4); + ipv6 = false; + break; + default: + return -EOPNOTSUPP; + } + + err = nfp_net_tls_conn_add(nn, direction); + if (err) + return err; + + skb = nfp_ccm_mbox_msg_alloc(nn, req_sz, sizeof(*reply), GFP_KERNEL); + if (!skb) { + err = -ENOMEM; + goto err_conn_remove; + } + + front = (void *)skb->data; + front->ep_id = 0; + front->key_len = NFP_NET_TLS_NON_ADDR_KEY_LEN; + front->opcode = nfp_tls_1_2_dir_to_opcode(direction); + memset(front->resv, 0, sizeof(front->resv)); + + nfp_net_tls_set_ipver_vlan(front, ipv6 ? 6 : 4); + + req = (void *)skb->data; + if (ipv6) + back = nfp_net_tls_set_ipv6(nn, req, sk, direction); + else + back = nfp_net_tls_set_ipv4(nn, req, sk, direction); + + nfp_net_tls_set_l4(front, back, sk, direction); + + back->counter = 0; + back->tcp_seq = cpu_to_be32(start_offload_tcp_sn); + + tls_ci = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + memcpy(back->key, tls_ci->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE); + memset(&back->key[TLS_CIPHER_AES_GCM_128_KEY_SIZE / 4], 0, + sizeof(back->key) - TLS_CIPHER_AES_GCM_128_KEY_SIZE); + memcpy(back->iv, tls_ci->iv, TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(&back->salt, tls_ci->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(back->rec_no, tls_ci->rec_seq, sizeof(tls_ci->rec_seq)); + + /* Get an extra ref on the skb so we can wipe the key after */ + skb_get(skb); + + err = nfp_ccm_mbox_communicate(nn, skb, NFP_CCM_TYPE_CRYPTO_ADD, + sizeof(*reply), sizeof(*reply)); + reply = (void *)skb->data; + + /* We depend on CCM MBOX code not reallocating skb we sent + * so we can clear the key material out of the memory. + */ + if (!WARN_ON_ONCE((u8 *)back < skb->head || + (u8 *)back > skb_end_pointer(skb)) && + !WARN_ON_ONCE((u8 *)&reply[1] > (u8 *)back)) + memzero_explicit(back, sizeof(*back)); + dev_consume_skb_any(skb); /* the extra ref from skb_get() above */ + + if (err) { + nn_dp_warn(&nn->dp, "failed to add TLS: %d (%d)\n", + err, direction == TLS_OFFLOAD_CTX_DIR_TX); + /* communicate frees skb on error */ + goto err_conn_remove; + } + + err = -be32_to_cpu(reply->error); + if (err) { + if (err == -ENOSPC) { + if (!atomic_fetch_inc(&nn->ktls_no_space)) + nn_info(nn, "HW TLS table full\n"); + } else { + nn_dp_warn(&nn->dp, + "failed to add TLS, FW replied: %d\n", err); + } + goto err_free_skb; + } + + if (!reply->handle[0] && !reply->handle[1]) { + nn_dp_warn(&nn->dp, "FW returned NULL handle\n"); + err = -EINVAL; + goto err_fw_remove; + } + + ntls = tls_driver_ctx(sk, direction); + memcpy(ntls->fw_handle, reply->handle, sizeof(ntls->fw_handle)); + if (direction == TLS_OFFLOAD_CTX_DIR_TX) + ntls->next_seq = start_offload_tcp_sn; + dev_consume_skb_any(skb); + + if (direction == TLS_OFFLOAD_CTX_DIR_TX) + return 0; + + if (!nn->tlv_caps.tls_resync_ss) + tls_offload_rx_resync_set_type(sk, TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT); + + return 0; + +err_fw_remove: + nfp_net_tls_del_fw(nn, reply->handle); +err_free_skb: + dev_consume_skb_any(skb); +err_conn_remove: + nfp_net_tls_conn_remove(nn, direction); + return err; +} + +static void +nfp_net_tls_del(struct net_device *netdev, struct tls_context *tls_ctx, + enum tls_offload_ctx_dir direction) +{ + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net_tls_offload_ctx *ntls; + + nfp_net_tls_conn_remove(nn, direction); + + ntls = __tls_driver_ctx(tls_ctx, direction); + nfp_net_tls_del_fw(nn, ntls->fw_handle); +} + +static int +nfp_net_tls_resync(struct net_device *netdev, struct sock *sk, u32 seq, + u8 *rcd_sn, enum tls_offload_ctx_dir direction) +{ + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net_tls_offload_ctx *ntls; + struct nfp_crypto_req_update *req; + enum nfp_ccm_type type; + struct sk_buff *skb; + gfp_t flags; + int err; + + flags = direction == TLS_OFFLOAD_CTX_DIR_TX ? GFP_KERNEL : GFP_ATOMIC; + skb = nfp_net_tls_alloc_simple(nn, sizeof(*req), flags); + if (!skb) + return -ENOMEM; + + ntls = tls_driver_ctx(sk, direction); + req = (void *)skb->data; + req->ep_id = 0; + req->opcode = nfp_tls_1_2_dir_to_opcode(direction); + memset(req->resv, 0, sizeof(req->resv)); + memcpy(req->handle, ntls->fw_handle, sizeof(ntls->fw_handle)); + req->tcp_seq = cpu_to_be32(seq); + memcpy(req->rec_no, rcd_sn, sizeof(req->rec_no)); + + type = NFP_CCM_TYPE_CRYPTO_UPDATE; + if (direction == TLS_OFFLOAD_CTX_DIR_TX) { + err = nfp_net_tls_communicate_simple(nn, skb, "sync", type); + if (err) + return err; + ntls->next_seq = seq; + } else { + if (nn->tlv_caps.tls_resync_ss) + type = NFP_CCM_TYPE_CRYPTO_RESYNC; + nfp_ccm_mbox_post(nn, skb, type, + sizeof(struct nfp_crypto_reply_simple)); + atomic_inc(&nn->ktls_rx_resync_sent); + } + + return 0; +} + +static const struct tlsdev_ops nfp_net_tls_ops = { + .tls_dev_add = nfp_net_tls_add, + .tls_dev_del = nfp_net_tls_del, + .tls_dev_resync = nfp_net_tls_resync, +}; + +int nfp_net_tls_rx_resync_req(struct net_device *netdev, + struct nfp_net_tls_resync_req *req, + void *pkt, unsigned int pkt_len) +{ + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net_tls_offload_ctx *ntls; + struct net *net = dev_net(netdev); + struct ipv6hdr *ipv6h; + struct tcphdr *th; + struct iphdr *iph; + struct sock *sk; + __be32 tcp_seq; + int err; + + iph = pkt + req->l3_offset; + ipv6h = pkt + req->l3_offset; + th = pkt + req->l4_offset; + + if ((u8 *)&th[1] > (u8 *)pkt + pkt_len) { + netdev_warn_once(netdev, "invalid TLS RX resync request (l3_off: %hhu l4_off: %hhu pkt_len: %u)\n", + req->l3_offset, req->l4_offset, pkt_len); + err = -EINVAL; + goto err_cnt_ign; + } + + switch (ipv6h->version) { + case 4: + sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, + iph->saddr, th->source, iph->daddr, + th->dest, netdev->ifindex); + break; +#if IS_ENABLED(CONFIG_IPV6) + case 6: + sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, + &ipv6h->saddr, th->source, + &ipv6h->daddr, ntohs(th->dest), + netdev->ifindex, 0); + break; +#endif + default: + netdev_warn_once(netdev, "invalid TLS RX resync request (l3_off: %hhu l4_off: %hhu ipver: %u)\n", + req->l3_offset, req->l4_offset, iph->version); + err = -EINVAL; + goto err_cnt_ign; + } + + err = 0; + if (!sk) + goto err_cnt_ign; + if (!tls_is_sk_rx_device_offloaded(sk) || + sk->sk_shutdown & RCV_SHUTDOWN) + goto err_put_sock; + + ntls = tls_driver_ctx(sk, TLS_OFFLOAD_CTX_DIR_RX); + /* some FW versions can't report the handle and report 0s */ + if (memchr_inv(&req->fw_handle, 0, sizeof(req->fw_handle)) && + memcmp(&req->fw_handle, &ntls->fw_handle, sizeof(ntls->fw_handle))) + goto err_put_sock; + + /* copy to ensure alignment */ + memcpy(&tcp_seq, &req->tcp_seq, sizeof(tcp_seq)); + tls_offload_rx_resync_request(sk, tcp_seq); + atomic_inc(&nn->ktls_rx_resync_req); + + sock_gen_put(sk); + return 0; + +err_put_sock: + sock_gen_put(sk); +err_cnt_ign: + atomic_inc(&nn->ktls_rx_resync_ign); + return err; +} + +static int nfp_net_tls_reset(struct nfp_net *nn) +{ + struct nfp_crypto_req_reset *req; + struct sk_buff *skb; + + skb = nfp_net_tls_alloc_simple(nn, sizeof(*req), GFP_KERNEL); + if (!skb) + return -ENOMEM; + + req = (void *)skb->data; + req->ep_id = 0; + + return nfp_net_tls_communicate_simple(nn, skb, "reset", + NFP_CCM_TYPE_CRYPTO_RESET); +} + +int nfp_net_tls_init(struct nfp_net *nn) +{ + struct net_device *netdev = nn->dp.netdev; + int err; + + if (!(nn->tlv_caps.crypto_ops & NFP_NET_TLS_OPCODE_MASK)) + return 0; + + if ((nn->tlv_caps.mbox_cmsg_types & NFP_NET_TLS_CCM_MBOX_OPS_MASK) != + NFP_NET_TLS_CCM_MBOX_OPS_MASK) + return 0; + + if (!nfp_ccm_mbox_fits(nn, sizeof(struct nfp_crypto_req_add_v6))) { + nn_warn(nn, "disabling TLS offload - mbox too small: %d\n", + nn->tlv_caps.mbox_len); + return 0; + } + + err = nfp_net_tls_reset(nn); + if (err) + return err; + + nn_ctrl_bar_lock(nn); + nn_writel(nn, nn->tlv_caps.crypto_enable_off, 0); + err = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_CRYPTO); + nn_ctrl_bar_unlock(nn); + if (err) + return err; + + if (nn->tlv_caps.crypto_ops & NFP_NET_TLS_OPCODE_MASK_RX) { + netdev->hw_features |= NETIF_F_HW_TLS_RX; + netdev->features |= NETIF_F_HW_TLS_RX; + } + if (nn->tlv_caps.crypto_ops & NFP_NET_TLS_OPCODE_MASK_TX) { + netdev->hw_features |= NETIF_F_HW_TLS_TX; + netdev->features |= NETIF_F_HW_TLS_TX; + } + + netdev->tlsdev_ops = &nfp_net_tls_ops; + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/devlink_param.c b/drivers/net/ethernet/netronome/nfp/devlink_param.c new file mode 100644 index 000000000..db297ee4d --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/devlink_param.c @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#include <net/devlink.h> + +#include "nfpcore/nfp.h" +#include "nfpcore/nfp_nsp.h" +#include "nfp_main.h" + +/** + * struct nfp_devlink_param_u8_arg - Devlink u8 parameter get/set arguments + * @hwinfo_name: HWinfo key name + * @default_hi_val: Default HWinfo value if HWinfo doesn't exist + * @invalid_dl_val: Devlink value to use if HWinfo is unknown/invalid. + * -errno if there is no unknown/invalid value available + * @hi_to_dl: HWinfo to devlink value mapping + * @dl_to_hi: Devlink to hwinfo value mapping + * @max_dl_val: Maximum devlink value supported, for validation only + * @max_hi_val: Maximum HWinfo value supported, for validation only + */ +struct nfp_devlink_param_u8_arg { + const char *hwinfo_name; + const char *default_hi_val; + int invalid_dl_val; + u8 hi_to_dl[4]; + u8 dl_to_hi[4]; + u8 max_dl_val; + u8 max_hi_val; +}; + +static const struct nfp_devlink_param_u8_arg nfp_devlink_u8_args[] = { + [DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY] = { + .hwinfo_name = "app_fw_from_flash", + .default_hi_val = NFP_NSP_APP_FW_LOAD_DEFAULT, + .invalid_dl_val = + DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_UNKNOWN, + .hi_to_dl = { + [NFP_NSP_APP_FW_LOAD_DISK] = + DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK, + [NFP_NSP_APP_FW_LOAD_FLASH] = + DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH, + [NFP_NSP_APP_FW_LOAD_PREF] = + DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER, + }, + .dl_to_hi = { + [DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER] = + NFP_NSP_APP_FW_LOAD_PREF, + [DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH] = + NFP_NSP_APP_FW_LOAD_FLASH, + [DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK] = + NFP_NSP_APP_FW_LOAD_DISK, + }, + .max_dl_val = DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK, + .max_hi_val = NFP_NSP_APP_FW_LOAD_PREF, + }, + [DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE] = { + .hwinfo_name = "abi_drv_reset", + .default_hi_val = NFP_NSP_DRV_RESET_DEFAULT, + .invalid_dl_val = + DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_UNKNOWN, + .hi_to_dl = { + [NFP_NSP_DRV_RESET_ALWAYS] = + DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_ALWAYS, + [NFP_NSP_DRV_RESET_NEVER] = + DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_NEVER, + [NFP_NSP_DRV_RESET_DISK] = + DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_DISK, + }, + .dl_to_hi = { + [DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_ALWAYS] = + NFP_NSP_DRV_RESET_ALWAYS, + [DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_NEVER] = + NFP_NSP_DRV_RESET_NEVER, + [DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_DISK] = + NFP_NSP_DRV_RESET_DISK, + }, + .max_dl_val = DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_DISK, + .max_hi_val = NFP_NSP_DRV_RESET_NEVER, + } +}; + +static int +nfp_devlink_param_u8_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + const struct nfp_devlink_param_u8_arg *arg; + struct nfp_pf *pf = devlink_priv(devlink); + struct nfp_nsp *nsp; + char hwinfo[32]; + long value; + int err; + + if (id >= ARRAY_SIZE(nfp_devlink_u8_args)) + return -EOPNOTSUPP; + + arg = &nfp_devlink_u8_args[id]; + + nsp = nfp_nsp_open(pf->cpp); + if (IS_ERR(nsp)) { + err = PTR_ERR(nsp); + nfp_warn(pf->cpp, "can't access NSP: %d\n", err); + return err; + } + + snprintf(hwinfo, sizeof(hwinfo), arg->hwinfo_name); + err = nfp_nsp_hwinfo_lookup_optional(nsp, hwinfo, sizeof(hwinfo), + arg->default_hi_val); + if (err) { + nfp_warn(pf->cpp, "HWinfo lookup failed: %d\n", err); + goto exit_close_nsp; + } + + err = kstrtol(hwinfo, 0, &value); + if (err || value < 0 || value > arg->max_hi_val) { + nfp_warn(pf->cpp, "HWinfo '%s' value %li invalid\n", + arg->hwinfo_name, value); + + if (arg->invalid_dl_val >= 0) + ctx->val.vu8 = arg->invalid_dl_val; + else + err = arg->invalid_dl_val; + + goto exit_close_nsp; + } + + ctx->val.vu8 = arg->hi_to_dl[value]; + +exit_close_nsp: + nfp_nsp_close(nsp); + return err; +} + +static int +nfp_devlink_param_u8_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + const struct nfp_devlink_param_u8_arg *arg; + struct nfp_pf *pf = devlink_priv(devlink); + struct nfp_nsp *nsp; + char hwinfo[32]; + int err; + + if (id >= ARRAY_SIZE(nfp_devlink_u8_args)) + return -EOPNOTSUPP; + + arg = &nfp_devlink_u8_args[id]; + + nsp = nfp_nsp_open(pf->cpp); + if (IS_ERR(nsp)) { + err = PTR_ERR(nsp); + nfp_warn(pf->cpp, "can't access NSP: %d\n", err); + return err; + } + + /* Note the value has already been validated. */ + snprintf(hwinfo, sizeof(hwinfo), "%s=%u", + arg->hwinfo_name, arg->dl_to_hi[ctx->val.vu8]); + err = nfp_nsp_hwinfo_set(nsp, hwinfo, sizeof(hwinfo)); + if (err) { + nfp_warn(pf->cpp, "HWinfo set failed: %d\n", err); + goto exit_close_nsp; + } + +exit_close_nsp: + nfp_nsp_close(nsp); + return err; +} + +static int +nfp_devlink_param_u8_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + const struct nfp_devlink_param_u8_arg *arg; + + if (id >= ARRAY_SIZE(nfp_devlink_u8_args)) + return -EOPNOTSUPP; + + arg = &nfp_devlink_u8_args[id]; + + if (val.vu8 > arg->max_dl_val) { + NL_SET_ERR_MSG_MOD(extack, "parameter out of range"); + return -EINVAL; + } + + if (val.vu8 == arg->invalid_dl_val) { + NL_SET_ERR_MSG_MOD(extack, "unknown/invalid value specified"); + return -EINVAL; + } + + return 0; +} + +static const struct devlink_param nfp_devlink_params[] = { + DEVLINK_PARAM_GENERIC(FW_LOAD_POLICY, + BIT(DEVLINK_PARAM_CMODE_PERMANENT), + nfp_devlink_param_u8_get, + nfp_devlink_param_u8_set, + nfp_devlink_param_u8_validate), + DEVLINK_PARAM_GENERIC(RESET_DEV_ON_DRV_PROBE, + BIT(DEVLINK_PARAM_CMODE_PERMANENT), + nfp_devlink_param_u8_get, + nfp_devlink_param_u8_set, + nfp_devlink_param_u8_validate), +}; + +static int nfp_devlink_supports_params(struct nfp_pf *pf) +{ + struct nfp_nsp *nsp; + bool supported; + int err; + + nsp = nfp_nsp_open(pf->cpp); + if (IS_ERR(nsp)) { + err = PTR_ERR(nsp); + dev_err(&pf->pdev->dev, "Failed to access the NSP: %d\n", err); + return err; + } + + supported = nfp_nsp_has_hwinfo_lookup(nsp) && + nfp_nsp_has_hwinfo_set(nsp); + + nfp_nsp_close(nsp); + return supported; +} + +int nfp_devlink_params_register(struct nfp_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + int err; + + err = nfp_devlink_supports_params(pf); + if (err <= 0) + return err; + + return devlink_params_register(devlink, nfp_devlink_params, + ARRAY_SIZE(nfp_devlink_params)); +} + +void nfp_devlink_params_unregister(struct nfp_pf *pf) +{ + int err; + + err = nfp_devlink_supports_params(pf); + if (err <= 0) + return; + + devlink_params_unregister(priv_to_devlink(pf), nfp_devlink_params, + ARRAY_SIZE(nfp_devlink_params)); +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c new file mode 100644 index 000000000..2b383d92d --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -0,0 +1,1307 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <linux/bitfield.h> +#include <linux/mpls.h> +#include <net/pkt_cls.h> +#include <net/tc_act/tc_csum.h> +#include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_mirred.h> +#include <net/tc_act/tc_mpls.h> +#include <net/tc_act/tc_pedit.h> +#include <net/tc_act/tc_vlan.h> +#include <net/tc_act/tc_tunnel_key.h> + +#include "cmsg.h" +#include "main.h" +#include "../nfp_net_repr.h" + +/* The kernel versions of TUNNEL_* are not ABI and therefore vulnerable + * to change. Such changes will break our FW ABI. + */ +#define NFP_FL_TUNNEL_CSUM cpu_to_be16(0x01) +#define NFP_FL_TUNNEL_KEY cpu_to_be16(0x04) +#define NFP_FL_TUNNEL_GENEVE_OPT cpu_to_be16(0x0800) +#define NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS (IP_TUNNEL_INFO_TX | \ + IP_TUNNEL_INFO_IPV6) +#define NFP_FL_SUPPORTED_UDP_TUN_FLAGS (NFP_FL_TUNNEL_CSUM | \ + NFP_FL_TUNNEL_KEY | \ + NFP_FL_TUNNEL_GENEVE_OPT) + +static int +nfp_fl_push_mpls(struct nfp_fl_push_mpls *push_mpls, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + size_t act_size = sizeof(struct nfp_fl_push_mpls); + u32 mpls_lse = 0; + + push_mpls->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_MPLS; + push_mpls->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + /* BOS is optional in the TC action but required for offload. */ + if (act->mpls_push.bos != ACT_MPLS_BOS_NOT_SET) { + mpls_lse |= act->mpls_push.bos << MPLS_LS_S_SHIFT; + } else { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: BOS field must explicitly be set for MPLS push"); + return -EOPNOTSUPP; + } + + /* Leave MPLS TC as a default value of 0 if not explicitly set. */ + if (act->mpls_push.tc != ACT_MPLS_TC_NOT_SET) + mpls_lse |= act->mpls_push.tc << MPLS_LS_TC_SHIFT; + + /* Proto, label and TTL are enforced and verified for MPLS push. */ + mpls_lse |= act->mpls_push.label << MPLS_LS_LABEL_SHIFT; + mpls_lse |= act->mpls_push.ttl << MPLS_LS_TTL_SHIFT; + push_mpls->ethtype = act->mpls_push.proto; + push_mpls->lse = cpu_to_be32(mpls_lse); + + return 0; +} + +static void +nfp_fl_pop_mpls(struct nfp_fl_pop_mpls *pop_mpls, + const struct flow_action_entry *act) +{ + size_t act_size = sizeof(struct nfp_fl_pop_mpls); + + pop_mpls->head.jump_id = NFP_FL_ACTION_OPCODE_POP_MPLS; + pop_mpls->head.len_lw = act_size >> NFP_FL_LW_SIZ; + pop_mpls->ethtype = act->mpls_pop.proto; +} + +static void +nfp_fl_set_mpls(struct nfp_fl_set_mpls *set_mpls, + const struct flow_action_entry *act) +{ + size_t act_size = sizeof(struct nfp_fl_set_mpls); + u32 mpls_lse = 0, mpls_mask = 0; + + set_mpls->head.jump_id = NFP_FL_ACTION_OPCODE_SET_MPLS; + set_mpls->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + if (act->mpls_mangle.label != ACT_MPLS_LABEL_NOT_SET) { + mpls_lse |= act->mpls_mangle.label << MPLS_LS_LABEL_SHIFT; + mpls_mask |= MPLS_LS_LABEL_MASK; + } + if (act->mpls_mangle.tc != ACT_MPLS_TC_NOT_SET) { + mpls_lse |= act->mpls_mangle.tc << MPLS_LS_TC_SHIFT; + mpls_mask |= MPLS_LS_TC_MASK; + } + if (act->mpls_mangle.bos != ACT_MPLS_BOS_NOT_SET) { + mpls_lse |= act->mpls_mangle.bos << MPLS_LS_S_SHIFT; + mpls_mask |= MPLS_LS_S_MASK; + } + if (act->mpls_mangle.ttl) { + mpls_lse |= act->mpls_mangle.ttl << MPLS_LS_TTL_SHIFT; + mpls_mask |= MPLS_LS_TTL_MASK; + } + + set_mpls->lse = cpu_to_be32(mpls_lse); + set_mpls->lse_mask = cpu_to_be32(mpls_mask); +} + +static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan) +{ + size_t act_size = sizeof(struct nfp_fl_pop_vlan); + + pop_vlan->head.jump_id = NFP_FL_ACTION_OPCODE_POP_VLAN; + pop_vlan->head.len_lw = act_size >> NFP_FL_LW_SIZ; + pop_vlan->reserved = 0; +} + +static void +nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan, + const struct flow_action_entry *act) +{ + size_t act_size = sizeof(struct nfp_fl_push_vlan); + u16 tmp_push_vlan_tci; + + push_vlan->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_VLAN; + push_vlan->head.len_lw = act_size >> NFP_FL_LW_SIZ; + push_vlan->reserved = 0; + push_vlan->vlan_tpid = act->vlan.proto; + + tmp_push_vlan_tci = + FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, act->vlan.prio) | + FIELD_PREP(NFP_FL_PUSH_VLAN_VID, act->vlan.vid); + push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci); +} + +static int +nfp_fl_pre_lag(struct nfp_app *app, const struct flow_action_entry *act, + struct nfp_fl_payload *nfp_flow, int act_len, + struct netlink_ext_ack *extack) +{ + size_t act_size = sizeof(struct nfp_fl_pre_lag); + struct nfp_fl_pre_lag *pre_lag; + struct net_device *out_dev; + int err; + + out_dev = act->dev; + if (!out_dev || !netif_is_lag_master(out_dev)) + return 0; + + if (act_len + act_size > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at LAG action"); + return -EOPNOTSUPP; + } + + /* Pre_lag action must be first on action list. + * If other actions already exist they need to be pushed forward. + */ + if (act_len) + memmove(nfp_flow->action_data + act_size, + nfp_flow->action_data, act_len); + + pre_lag = (struct nfp_fl_pre_lag *)nfp_flow->action_data; + err = nfp_flower_lag_populate_pre_action(app, out_dev, pre_lag, extack); + if (err) + return err; + + pre_lag->head.jump_id = NFP_FL_ACTION_OPCODE_PRE_LAG; + pre_lag->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + nfp_flow->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + + return act_size; +} + +static int +nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output, + const struct flow_action_entry *act, + struct nfp_fl_payload *nfp_flow, + bool last, struct net_device *in_dev, + enum nfp_flower_tun_type tun_type, int *tun_out_cnt, + bool pkt_host, struct netlink_ext_ack *extack) +{ + size_t act_size = sizeof(struct nfp_fl_output); + struct nfp_flower_priv *priv = app->priv; + struct net_device *out_dev; + u16 tmp_flags; + + output->head.jump_id = NFP_FL_ACTION_OPCODE_OUTPUT; + output->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + out_dev = act->dev; + if (!out_dev) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid egress interface for mirred action"); + return -EOPNOTSUPP; + } + + tmp_flags = last ? NFP_FL_OUT_FLAGS_LAST : 0; + + if (tun_type) { + /* Verify the egress netdev matches the tunnel type. */ + if (!nfp_fl_netdev_is_tunnel_type(out_dev, tun_type)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: egress interface does not match the required tunnel type"); + return -EOPNOTSUPP; + } + + if (*tun_out_cnt) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot offload more than one tunnel mirred output per filter"); + return -EOPNOTSUPP; + } + (*tun_out_cnt)++; + + output->flags = cpu_to_be16(tmp_flags | + NFP_FL_OUT_FLAGS_USE_TUN); + output->port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type); + } else if (netif_is_lag_master(out_dev) && + priv->flower_en_feats & NFP_FL_ENABLE_LAG) { + int gid; + + output->flags = cpu_to_be16(tmp_flags); + gid = nfp_flower_lag_get_output_id(app, out_dev); + if (gid < 0) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot find group id for LAG action"); + return gid; + } + output->port = cpu_to_be32(NFP_FL_LAG_OUT | gid); + } else if (nfp_flower_internal_port_can_offload(app, out_dev)) { + if (!(priv->flower_ext_feats & NFP_FL_FEATS_PRE_TUN_RULES) && + !(priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pre-tunnel rules not supported in loaded firmware"); + return -EOPNOTSUPP; + } + + if (nfp_flow->pre_tun_rule.dev || !pkt_host) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pre-tunnel rules require single egress dev and ptype HOST action"); + return -EOPNOTSUPP; + } + + nfp_flow->pre_tun_rule.dev = out_dev; + + return 0; + } else { + /* Set action output parameters. */ + output->flags = cpu_to_be16(tmp_flags); + + if (nfp_netdev_is_nfp_repr(in_dev)) { + /* Confirm ingress and egress are on same device. */ + if (!netdev_port_same_parent_id(in_dev, out_dev)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: ingress and egress interfaces are on different devices"); + return -EOPNOTSUPP; + } + } + + if (!nfp_netdev_is_nfp_repr(out_dev)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: egress interface is not an nfp port"); + return -EOPNOTSUPP; + } + + output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev)); + if (!output->port) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid port id for egress interface"); + return -EOPNOTSUPP; + } + } + nfp_flow->meta.shortcut = output->port; + + return 0; +} + +static bool +nfp_flower_tun_is_gre(struct flow_rule *rule, int start_idx) +{ + struct flow_action_entry *act = rule->action.entries; + int num_act = rule->action.num_entries; + int act_idx; + + /* Preparse action list for next mirred or redirect action */ + for (act_idx = start_idx + 1; act_idx < num_act; act_idx++) + if (act[act_idx].id == FLOW_ACTION_REDIRECT || + act[act_idx].id == FLOW_ACTION_MIRRED) + return netif_is_gretap(act[act_idx].dev) || + netif_is_ip6gretap(act[act_idx].dev); + + return false; +} + +static enum nfp_flower_tun_type +nfp_fl_get_tun_from_act(struct nfp_app *app, + struct flow_rule *rule, + const struct flow_action_entry *act, int act_idx) +{ + const struct ip_tunnel_info *tun = act->tunnel; + struct nfp_flower_priv *priv = app->priv; + + /* Determine the tunnel type based on the egress netdev + * in the mirred action for tunnels without l4. + */ + if (nfp_flower_tun_is_gre(rule, act_idx)) + return NFP_FL_TUNNEL_GRE; + + switch (tun->key.tp_dst) { + case htons(IANA_VXLAN_UDP_PORT): + return NFP_FL_TUNNEL_VXLAN; + case htons(GENEVE_UDP_PORT): + if (priv->flower_ext_feats & NFP_FL_FEATS_GENEVE) + return NFP_FL_TUNNEL_GENEVE; + fallthrough; + default: + return NFP_FL_TUNNEL_NONE; + } +} + +static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len) +{ + size_t act_size = sizeof(struct nfp_fl_pre_tunnel); + struct nfp_fl_pre_tunnel *pre_tun_act; + + /* Pre_tunnel action must be first on action list. + * If other actions already exist they need to be pushed forward. + */ + if (act_len) + memmove(act_data + act_size, act_data, act_len); + + pre_tun_act = (struct nfp_fl_pre_tunnel *)act_data; + + memset(pre_tun_act, 0, act_size); + + pre_tun_act->head.jump_id = NFP_FL_ACTION_OPCODE_PRE_TUNNEL; + pre_tun_act->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + return pre_tun_act; +} + +static int +nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + struct ip_tunnel_info *ip_tun = (struct ip_tunnel_info *)act->tunnel; + int opt_len, opt_cnt, act_start, tot_push_len; + u8 *src = ip_tunnel_info_opts(ip_tun); + + /* We need to populate the options in reverse order for HW. + * Therefore we go through the options, calculating the + * number of options and the total size, then we populate + * them in reverse order in the action list. + */ + opt_cnt = 0; + tot_push_len = 0; + opt_len = ip_tun->options_len; + while (opt_len > 0) { + struct geneve_opt *opt = (struct geneve_opt *)src; + + opt_cnt++; + if (opt_cnt > NFP_FL_MAX_GENEVE_OPT_CNT) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed number of geneve options exceeded"); + return -EOPNOTSUPP; + } + + tot_push_len += sizeof(struct nfp_fl_push_geneve) + + opt->length * 4; + if (tot_push_len > NFP_FL_MAX_GENEVE_OPT_ACT) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push geneve options"); + return -EOPNOTSUPP; + } + + opt_len -= sizeof(struct geneve_opt) + opt->length * 4; + src += sizeof(struct geneve_opt) + opt->length * 4; + } + + if (*list_len + tot_push_len > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push geneve options"); + return -EOPNOTSUPP; + } + + act_start = *list_len; + *list_len += tot_push_len; + src = ip_tunnel_info_opts(ip_tun); + while (opt_cnt) { + struct geneve_opt *opt = (struct geneve_opt *)src; + struct nfp_fl_push_geneve *push; + size_t act_size, len; + + opt_cnt--; + act_size = sizeof(struct nfp_fl_push_geneve) + opt->length * 4; + tot_push_len -= act_size; + len = act_start + tot_push_len; + + push = (struct nfp_fl_push_geneve *)&nfp_fl->action_data[len]; + push->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_GENEVE; + push->head.len_lw = act_size >> NFP_FL_LW_SIZ; + push->reserved = 0; + push->class = opt->opt_class; + push->type = opt->type; + push->length = opt->length; + memcpy(&push->opt_data, opt->opt_data, opt->length * 4); + + src += sizeof(struct geneve_opt) + opt->length * 4; + } + + return 0; +} + +static int +nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, + const struct flow_action_entry *act, + struct nfp_fl_pre_tunnel *pre_tun, + enum nfp_flower_tun_type tun_type, + struct net_device *netdev, struct netlink_ext_ack *extack) +{ + const struct ip_tunnel_info *ip_tun = act->tunnel; + bool ipv6 = ip_tunnel_info_af(ip_tun) == AF_INET6; + size_t act_size = sizeof(struct nfp_fl_set_tun); + struct nfp_flower_priv *priv = app->priv; + u32 tmp_set_ip_tun_type_index = 0; + /* Currently support one pre-tunnel so index is always 0. */ + int pretun_idx = 0; + + if (!IS_ENABLED(CONFIG_IPV6) && ipv6) + return -EOPNOTSUPP; + + if (ipv6 && !(priv->flower_ext_feats & NFP_FL_FEATS_IPV6_TUN)) + return -EOPNOTSUPP; + + BUILD_BUG_ON(NFP_FL_TUNNEL_CSUM != TUNNEL_CSUM || + NFP_FL_TUNNEL_KEY != TUNNEL_KEY || + NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT); + if (ip_tun->options_len && + (tun_type != NFP_FL_TUNNEL_GENEVE || + !(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT))) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve options offload"); + return -EOPNOTSUPP; + } + + if (ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_UDP_TUN_FLAGS) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: loaded firmware does not support tunnel flag offload"); + return -EOPNOTSUPP; + } + + set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_TUNNEL; + set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + /* Set tunnel type and pre-tunnel index. */ + tmp_set_ip_tun_type_index |= + FIELD_PREP(NFP_FL_TUNNEL_TYPE, tun_type) | + FIELD_PREP(NFP_FL_PRE_TUN_INDEX, pretun_idx); + + set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index); + if (ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) + set_tun->tun_id = ip_tun->key.tun_id; + + if (ip_tun->key.ttl) { + set_tun->ttl = ip_tun->key.ttl; +#ifdef CONFIG_IPV6 + } else if (ipv6) { + struct net *net = dev_net(netdev); + struct flowi6 flow = {}; + struct dst_entry *dst; + + flow.daddr = ip_tun->key.u.ipv6.dst; + flow.flowi4_proto = IPPROTO_UDP; + dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow, NULL); + if (!IS_ERR(dst)) { + set_tun->ttl = ip6_dst_hoplimit(dst); + dst_release(dst); + } else { + set_tun->ttl = net->ipv6.devconf_all->hop_limit; + } +#endif + } else { + struct net *net = dev_net(netdev); + struct flowi4 flow = {}; + struct rtable *rt; + int err; + + /* Do a route lookup to determine ttl - if fails then use + * default. Note that CONFIG_INET is a requirement of + * CONFIG_NET_SWITCHDEV so must be defined here. + */ + flow.daddr = ip_tun->key.u.ipv4.dst; + flow.flowi4_proto = IPPROTO_UDP; + rt = ip_route_output_key(net, &flow); + err = PTR_ERR_OR_ZERO(rt); + if (!err) { + set_tun->ttl = ip4_dst_hoplimit(&rt->dst); + ip_rt_put(rt); + } else { + set_tun->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); + } + } + + set_tun->tos = ip_tun->key.tos; + set_tun->tun_flags = ip_tun->key.tun_flags; + + if (tun_type == NFP_FL_TUNNEL_GENEVE) { + set_tun->tun_proto = htons(ETH_P_TEB); + set_tun->tun_len = ip_tun->options_len / 4; + } + + /* Complete pre_tunnel action. */ + if (ipv6) { + pre_tun->flags |= cpu_to_be16(NFP_FL_PRE_TUN_IPV6); + pre_tun->ipv6_dst = ip_tun->key.u.ipv6.dst; + } else { + pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst; + } + + return 0; +} + +static void nfp_fl_set_helper32(u32 value, u32 mask, u8 *p_exact, u8 *p_mask) +{ + u32 oldvalue = get_unaligned((u32 *)p_exact); + u32 oldmask = get_unaligned((u32 *)p_mask); + + value &= mask; + value |= oldvalue & ~mask; + + put_unaligned(oldmask | mask, (u32 *)p_mask); + put_unaligned(value, (u32 *)p_exact); +} + +static int +nfp_fl_set_eth(const struct flow_action_entry *act, u32 off, + struct nfp_fl_set_eth *set_eth, struct netlink_ext_ack *extack) +{ + u32 exact, mask; + + if (off + 4 > ETH_ALEN * 2) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit ethernet action"); + return -EOPNOTSUPP; + } + + mask = ~act->mangle.mask; + exact = act->mangle.val; + + if (exact & ~mask) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit ethernet action"); + return -EOPNOTSUPP; + } + + nfp_fl_set_helper32(exact, mask, &set_eth->eth_addr_val[off], + &set_eth->eth_addr_mask[off]); + + set_eth->reserved = cpu_to_be16(0); + set_eth->head.jump_id = NFP_FL_ACTION_OPCODE_SET_ETHERNET; + set_eth->head.len_lw = sizeof(*set_eth) >> NFP_FL_LW_SIZ; + + return 0; +} + +struct ipv4_ttl_word { + __u8 ttl; + __u8 protocol; + __sum16 check; +}; + +static int +nfp_fl_set_ip4(const struct flow_action_entry *act, u32 off, + struct nfp_fl_set_ip4_addrs *set_ip_addr, + struct nfp_fl_set_ip4_ttl_tos *set_ip_ttl_tos, + struct netlink_ext_ack *extack) +{ + struct ipv4_ttl_word *ttl_word_mask; + struct ipv4_ttl_word *ttl_word; + struct iphdr *tos_word_mask; + struct iphdr *tos_word; + __be32 exact, mask; + + /* We are expecting tcf_pedit to return a big endian value */ + mask = (__force __be32)~act->mangle.mask; + exact = (__force __be32)act->mangle.val; + + if (exact & ~mask) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv4 action"); + return -EOPNOTSUPP; + } + + switch (off) { + case offsetof(struct iphdr, daddr): + set_ip_addr->ipv4_dst_mask |= mask; + set_ip_addr->ipv4_dst &= ~mask; + set_ip_addr->ipv4_dst |= exact & mask; + set_ip_addr->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS; + set_ip_addr->head.len_lw = sizeof(*set_ip_addr) >> + NFP_FL_LW_SIZ; + break; + case offsetof(struct iphdr, saddr): + set_ip_addr->ipv4_src_mask |= mask; + set_ip_addr->ipv4_src &= ~mask; + set_ip_addr->ipv4_src |= exact & mask; + set_ip_addr->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS; + set_ip_addr->head.len_lw = sizeof(*set_ip_addr) >> + NFP_FL_LW_SIZ; + break; + case offsetof(struct iphdr, ttl): + ttl_word_mask = (struct ipv4_ttl_word *)&mask; + ttl_word = (struct ipv4_ttl_word *)&exact; + + if (ttl_word_mask->protocol || ttl_word_mask->check) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv4 ttl action"); + return -EOPNOTSUPP; + } + + set_ip_ttl_tos->ipv4_ttl_mask |= ttl_word_mask->ttl; + set_ip_ttl_tos->ipv4_ttl &= ~ttl_word_mask->ttl; + set_ip_ttl_tos->ipv4_ttl |= ttl_word->ttl & ttl_word_mask->ttl; + set_ip_ttl_tos->head.jump_id = + NFP_FL_ACTION_OPCODE_SET_IPV4_TTL_TOS; + set_ip_ttl_tos->head.len_lw = sizeof(*set_ip_ttl_tos) >> + NFP_FL_LW_SIZ; + break; + case round_down(offsetof(struct iphdr, tos), 4): + tos_word_mask = (struct iphdr *)&mask; + tos_word = (struct iphdr *)&exact; + + if (tos_word_mask->version || tos_word_mask->ihl || + tos_word_mask->tot_len) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv4 tos action"); + return -EOPNOTSUPP; + } + + set_ip_ttl_tos->ipv4_tos_mask |= tos_word_mask->tos; + set_ip_ttl_tos->ipv4_tos &= ~tos_word_mask->tos; + set_ip_ttl_tos->ipv4_tos |= tos_word->tos & tos_word_mask->tos; + set_ip_ttl_tos->head.jump_id = + NFP_FL_ACTION_OPCODE_SET_IPV4_TTL_TOS; + set_ip_ttl_tos->head.len_lw = sizeof(*set_ip_ttl_tos) >> + NFP_FL_LW_SIZ; + break; + default: + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported section of IPv4 header"); + return -EOPNOTSUPP; + } + + return 0; +} + +static void +nfp_fl_set_ip6_helper(int opcode_tag, u8 word, __be32 exact, __be32 mask, + struct nfp_fl_set_ipv6_addr *ip6) +{ + ip6->ipv6[word].mask |= mask; + ip6->ipv6[word].exact &= ~mask; + ip6->ipv6[word].exact |= exact & mask; + + ip6->reserved = cpu_to_be16(0); + ip6->head.jump_id = opcode_tag; + ip6->head.len_lw = sizeof(*ip6) >> NFP_FL_LW_SIZ; +} + +struct ipv6_hop_limit_word { + __be16 payload_len; + u8 nexthdr; + u8 hop_limit; +}; + +static int +nfp_fl_set_ip6_hop_limit_flow_label(u32 off, __be32 exact, __be32 mask, + struct nfp_fl_set_ipv6_tc_hl_fl *ip_hl_fl, + struct netlink_ext_ack *extack) +{ + struct ipv6_hop_limit_word *fl_hl_mask; + struct ipv6_hop_limit_word *fl_hl; + + switch (off) { + case offsetof(struct ipv6hdr, payload_len): + fl_hl_mask = (struct ipv6_hop_limit_word *)&mask; + fl_hl = (struct ipv6_hop_limit_word *)&exact; + + if (fl_hl_mask->nexthdr || fl_hl_mask->payload_len) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 hop limit action"); + return -EOPNOTSUPP; + } + + ip_hl_fl->ipv6_hop_limit_mask |= fl_hl_mask->hop_limit; + ip_hl_fl->ipv6_hop_limit &= ~fl_hl_mask->hop_limit; + ip_hl_fl->ipv6_hop_limit |= fl_hl->hop_limit & + fl_hl_mask->hop_limit; + break; + case round_down(offsetof(struct ipv6hdr, flow_lbl), 4): + if (mask & ~IPV6_FLOWINFO_MASK || + exact & ~IPV6_FLOWINFO_MASK) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 flow info action"); + return -EOPNOTSUPP; + } + + ip_hl_fl->ipv6_label_mask |= mask; + ip_hl_fl->ipv6_label &= ~mask; + ip_hl_fl->ipv6_label |= exact & mask; + break; + } + + ip_hl_fl->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV6_TC_HL_FL; + ip_hl_fl->head.len_lw = sizeof(*ip_hl_fl) >> NFP_FL_LW_SIZ; + + return 0; +} + +static int +nfp_fl_set_ip6(const struct flow_action_entry *act, u32 off, + struct nfp_fl_set_ipv6_addr *ip_dst, + struct nfp_fl_set_ipv6_addr *ip_src, + struct nfp_fl_set_ipv6_tc_hl_fl *ip_hl_fl, + struct netlink_ext_ack *extack) +{ + __be32 exact, mask; + int err = 0; + u8 word; + + /* We are expecting tcf_pedit to return a big endian value */ + mask = (__force __be32)~act->mangle.mask; + exact = (__force __be32)act->mangle.val; + + if (exact & ~mask) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 action"); + return -EOPNOTSUPP; + } + + if (off < offsetof(struct ipv6hdr, saddr)) { + err = nfp_fl_set_ip6_hop_limit_flow_label(off, exact, mask, + ip_hl_fl, extack); + } else if (off < offsetof(struct ipv6hdr, daddr)) { + word = (off - offsetof(struct ipv6hdr, saddr)) / sizeof(exact); + nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, word, + exact, mask, ip_src); + } else if (off < offsetof(struct ipv6hdr, daddr) + + sizeof(struct in6_addr)) { + word = (off - offsetof(struct ipv6hdr, daddr)) / sizeof(exact); + nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, word, + exact, mask, ip_dst); + } else { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported section of IPv6 header"); + return -EOPNOTSUPP; + } + + return err; +} + +static int +nfp_fl_set_tport(const struct flow_action_entry *act, u32 off, + struct nfp_fl_set_tport *set_tport, int opcode, + struct netlink_ext_ack *extack) +{ + u32 exact, mask; + + if (off) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported section of L4 header"); + return -EOPNOTSUPP; + } + + mask = ~act->mangle.mask; + exact = act->mangle.val; + + if (exact & ~mask) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit L4 action"); + return -EOPNOTSUPP; + } + + nfp_fl_set_helper32(exact, mask, set_tport->tp_port_val, + set_tport->tp_port_mask); + + set_tport->reserved = cpu_to_be16(0); + set_tport->head.jump_id = opcode; + set_tport->head.len_lw = sizeof(*set_tport) >> NFP_FL_LW_SIZ; + + return 0; +} + +static u32 nfp_fl_csum_l4_to_flag(u8 ip_proto) +{ + switch (ip_proto) { + case 0: + /* Filter doesn't force proto match, + * both TCP and UDP will be updated if encountered + */ + return TCA_CSUM_UPDATE_FLAG_TCP | TCA_CSUM_UPDATE_FLAG_UDP; + case IPPROTO_TCP: + return TCA_CSUM_UPDATE_FLAG_TCP; + case IPPROTO_UDP: + return TCA_CSUM_UPDATE_FLAG_UDP; + default: + /* All other protocols will be ignored by FW */ + return 0; + } +} + +struct nfp_flower_pedit_acts { + struct nfp_fl_set_ipv6_addr set_ip6_dst, set_ip6_src; + struct nfp_fl_set_ipv6_tc_hl_fl set_ip6_tc_hl_fl; + struct nfp_fl_set_ip4_ttl_tos set_ip_ttl_tos; + struct nfp_fl_set_ip4_addrs set_ip_addr; + struct nfp_fl_set_tport set_tport; + struct nfp_fl_set_eth set_eth; +}; + +static int +nfp_fl_commit_mangle(struct flow_rule *rule, char *nfp_action, + int *a_len, struct nfp_flower_pedit_acts *set_act, + u32 *csum_updated) +{ + size_t act_size = 0; + u8 ip_proto = 0; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + ip_proto = match.key->ip_proto; + } + + if (set_act->set_eth.head.len_lw) { + act_size = sizeof(set_act->set_eth); + memcpy(nfp_action, &set_act->set_eth, act_size); + *a_len += act_size; + } + + if (set_act->set_ip_ttl_tos.head.len_lw) { + nfp_action += act_size; + act_size = sizeof(set_act->set_ip_ttl_tos); + memcpy(nfp_action, &set_act->set_ip_ttl_tos, act_size); + *a_len += act_size; + + /* Hardware will automatically fix IPv4 and TCP/UDP checksum. */ + *csum_updated |= TCA_CSUM_UPDATE_FLAG_IPV4HDR | + nfp_fl_csum_l4_to_flag(ip_proto); + } + + if (set_act->set_ip_addr.head.len_lw) { + nfp_action += act_size; + act_size = sizeof(set_act->set_ip_addr); + memcpy(nfp_action, &set_act->set_ip_addr, act_size); + *a_len += act_size; + + /* Hardware will automatically fix IPv4 and TCP/UDP checksum. */ + *csum_updated |= TCA_CSUM_UPDATE_FLAG_IPV4HDR | + nfp_fl_csum_l4_to_flag(ip_proto); + } + + if (set_act->set_ip6_tc_hl_fl.head.len_lw) { + nfp_action += act_size; + act_size = sizeof(set_act->set_ip6_tc_hl_fl); + memcpy(nfp_action, &set_act->set_ip6_tc_hl_fl, act_size); + *a_len += act_size; + + /* Hardware will automatically fix TCP/UDP checksum. */ + *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); + } + + if (set_act->set_ip6_dst.head.len_lw && + set_act->set_ip6_src.head.len_lw) { + /* TC compiles set src and dst IPv6 address as a single action, + * the hardware requires this to be 2 separate actions. + */ + nfp_action += act_size; + act_size = sizeof(set_act->set_ip6_src); + memcpy(nfp_action, &set_act->set_ip6_src, act_size); + *a_len += act_size; + + act_size = sizeof(set_act->set_ip6_dst); + memcpy(&nfp_action[sizeof(set_act->set_ip6_src)], + &set_act->set_ip6_dst, act_size); + *a_len += act_size; + + /* Hardware will automatically fix TCP/UDP checksum. */ + *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); + } else if (set_act->set_ip6_dst.head.len_lw) { + nfp_action += act_size; + act_size = sizeof(set_act->set_ip6_dst); + memcpy(nfp_action, &set_act->set_ip6_dst, act_size); + *a_len += act_size; + + /* Hardware will automatically fix TCP/UDP checksum. */ + *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); + } else if (set_act->set_ip6_src.head.len_lw) { + nfp_action += act_size; + act_size = sizeof(set_act->set_ip6_src); + memcpy(nfp_action, &set_act->set_ip6_src, act_size); + *a_len += act_size; + + /* Hardware will automatically fix TCP/UDP checksum. */ + *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); + } + if (set_act->set_tport.head.len_lw) { + nfp_action += act_size; + act_size = sizeof(set_act->set_tport); + memcpy(nfp_action, &set_act->set_tport, act_size); + *a_len += act_size; + + /* Hardware will automatically fix TCP/UDP checksum. */ + *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); + } + + return 0; +} + +static int +nfp_fl_pedit(const struct flow_action_entry *act, + char *nfp_action, int *a_len, + u32 *csum_updated, struct nfp_flower_pedit_acts *set_act, + struct netlink_ext_ack *extack) +{ + enum flow_action_mangle_base htype; + u32 offset; + + htype = act->mangle.htype; + offset = act->mangle.offset; + + switch (htype) { + case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: + return nfp_fl_set_eth(act, offset, &set_act->set_eth, extack); + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: + return nfp_fl_set_ip4(act, offset, &set_act->set_ip_addr, + &set_act->set_ip_ttl_tos, extack); + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: + return nfp_fl_set_ip6(act, offset, &set_act->set_ip6_dst, + &set_act->set_ip6_src, + &set_act->set_ip6_tc_hl_fl, extack); + case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: + return nfp_fl_set_tport(act, offset, &set_act->set_tport, + NFP_FL_ACTION_OPCODE_SET_TCP, extack); + case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: + return nfp_fl_set_tport(act, offset, &set_act->set_tport, + NFP_FL_ACTION_OPCODE_SET_UDP, extack); + default: + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported header"); + return -EOPNOTSUPP; + } +} + +static struct nfp_fl_meter *nfp_fl_meter(char *act_data) +{ + size_t act_size = sizeof(struct nfp_fl_meter); + struct nfp_fl_meter *meter_act; + + meter_act = (struct nfp_fl_meter *)act_data; + + memset(meter_act, 0, act_size); + + meter_act->head.jump_id = NFP_FL_ACTION_OPCODE_METER; + meter_act->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + return meter_act; +} + +static int +nfp_flower_meter_action(struct nfp_app *app, + const struct flow_action_entry *action, + struct nfp_fl_payload *nfp_fl, int *a_len, + struct net_device *netdev, + struct netlink_ext_ack *extack) +{ + struct nfp_fl_meter *fl_meter; + u32 meter_id; + + if (*a_len + sizeof(struct nfp_fl_meter) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload:meter action size beyond the allowed maximum"); + return -EOPNOTSUPP; + } + + meter_id = action->hw_index; + if (!nfp_flower_search_meter_entry(app, meter_id)) { + NL_SET_ERR_MSG_MOD(extack, + "can not offload flow table with unsupported police action."); + return -EOPNOTSUPP; + } + + fl_meter = nfp_fl_meter(&nfp_fl->action_data[*a_len]); + *a_len += sizeof(struct nfp_fl_meter); + fl_meter->meter_id = cpu_to_be32(meter_id); + + return 0; +} + +static int +nfp_flower_output_action(struct nfp_app *app, + const struct flow_action_entry *act, + struct nfp_fl_payload *nfp_fl, int *a_len, + struct net_device *netdev, bool last, + enum nfp_flower_tun_type *tun_type, int *tun_out_cnt, + int *out_cnt, u32 *csum_updated, bool pkt_host, + struct netlink_ext_ack *extack) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_output *output; + int err, prelag_size; + + /* If csum_updated has not been reset by now, it means HW will + * incorrectly update csums when they are not requested. + */ + if (*csum_updated) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: set actions without updating checksums are not supported"); + return -EOPNOTSUPP; + } + + if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: mirred output increases action list size beyond the allowed maximum"); + return -EOPNOTSUPP; + } + + output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len]; + err = nfp_fl_output(app, output, act, nfp_fl, last, netdev, *tun_type, + tun_out_cnt, pkt_host, extack); + if (err) + return err; + + *a_len += sizeof(struct nfp_fl_output); + + if (priv->flower_en_feats & NFP_FL_ENABLE_LAG) { + /* nfp_fl_pre_lag returns -err or size of prelag action added. + * This will be 0 if it is not egressing to a lag dev. + */ + prelag_size = nfp_fl_pre_lag(app, act, nfp_fl, *a_len, extack); + if (prelag_size < 0) { + return prelag_size; + } else if (prelag_size > 0 && (!last || *out_cnt)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: LAG action has to be last action in action list"); + return -EOPNOTSUPP; + } + + *a_len += prelag_size; + } + (*out_cnt)++; + + return 0; +} + +static int +nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, + struct flow_rule *rule, + struct nfp_fl_payload *nfp_fl, int *a_len, + struct net_device *netdev, + enum nfp_flower_tun_type *tun_type, int *tun_out_cnt, + int *out_cnt, u32 *csum_updated, + struct nfp_flower_pedit_acts *set_act, bool *pkt_host, + struct netlink_ext_ack *extack, int act_idx) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_fl_pre_tunnel *pre_tun; + struct nfp_fl_set_tun *set_tun; + struct nfp_fl_push_vlan *psh_v; + struct nfp_fl_push_mpls *psh_m; + struct nfp_fl_pop_vlan *pop_v; + struct nfp_fl_pop_mpls *pop_m; + struct nfp_fl_set_mpls *set_m; + int err; + + switch (act->id) { + case FLOW_ACTION_DROP: + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_DROP); + break; + case FLOW_ACTION_REDIRECT_INGRESS: + case FLOW_ACTION_REDIRECT: + err = nfp_flower_output_action(app, act, nfp_fl, a_len, netdev, + true, tun_type, tun_out_cnt, + out_cnt, csum_updated, *pkt_host, + extack); + if (err) + return err; + break; + case FLOW_ACTION_MIRRED_INGRESS: + case FLOW_ACTION_MIRRED: + err = nfp_flower_output_action(app, act, nfp_fl, a_len, netdev, + false, tun_type, tun_out_cnt, + out_cnt, csum_updated, *pkt_host, + extack); + if (err) + return err; + break; + case FLOW_ACTION_VLAN_POP: + if (*a_len + + sizeof(struct nfp_fl_pop_vlan) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at pop vlan"); + return -EOPNOTSUPP; + } + + pop_v = (struct nfp_fl_pop_vlan *)&nfp_fl->action_data[*a_len]; + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_POPV); + + nfp_fl_pop_vlan(pop_v); + *a_len += sizeof(struct nfp_fl_pop_vlan); + break; + case FLOW_ACTION_VLAN_PUSH: + if (*a_len + + sizeof(struct nfp_fl_push_vlan) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push vlan"); + return -EOPNOTSUPP; + } + + psh_v = (struct nfp_fl_push_vlan *)&nfp_fl->action_data[*a_len]; + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + + nfp_fl_push_vlan(psh_v, act); + *a_len += sizeof(struct nfp_fl_push_vlan); + break; + case FLOW_ACTION_TUNNEL_ENCAP: { + const struct ip_tunnel_info *ip_tun = act->tunnel; + + *tun_type = nfp_fl_get_tun_from_act(app, rule, act, act_idx); + if (*tun_type == NFP_FL_TUNNEL_NONE) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported tunnel type in action list"); + return -EOPNOTSUPP; + } + + if (ip_tun->mode & ~NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported tunnel flags in action list"); + return -EOPNOTSUPP; + } + + /* Pre-tunnel action is required for tunnel encap. + * This checks for next hop entries on NFP. + * If none, the packet falls back before applying other actions. + */ + if (*a_len + sizeof(struct nfp_fl_pre_tunnel) + + sizeof(struct nfp_fl_set_tun) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at tunnel encap"); + return -EOPNOTSUPP; + } + + pre_tun = nfp_fl_pre_tunnel(nfp_fl->action_data, *a_len); + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + *a_len += sizeof(struct nfp_fl_pre_tunnel); + + err = nfp_fl_push_geneve_options(nfp_fl, a_len, act, extack); + if (err) + return err; + + set_tun = (void *)&nfp_fl->action_data[*a_len]; + err = nfp_fl_set_tun(app, set_tun, act, pre_tun, *tun_type, + netdev, extack); + if (err) + return err; + *a_len += sizeof(struct nfp_fl_set_tun); + } + break; + case FLOW_ACTION_TUNNEL_DECAP: + /* Tunnel decap is handled by default so accept action. */ + return 0; + case FLOW_ACTION_MANGLE: + if (nfp_fl_pedit(act, &nfp_fl->action_data[*a_len], + a_len, csum_updated, set_act, extack)) + return -EOPNOTSUPP; + break; + case FLOW_ACTION_CSUM: + /* csum action requests recalc of something we have not fixed */ + if (act->csum_flags & ~*csum_updated) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported csum update action in action list"); + return -EOPNOTSUPP; + } + /* If we will correctly fix the csum we can remove it from the + * csum update list. Which will later be used to check support. + */ + *csum_updated &= ~act->csum_flags; + break; + case FLOW_ACTION_MPLS_PUSH: + if (*a_len + + sizeof(struct nfp_fl_push_mpls) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push MPLS"); + return -EOPNOTSUPP; + } + + psh_m = (struct nfp_fl_push_mpls *)&nfp_fl->action_data[*a_len]; + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + + err = nfp_fl_push_mpls(psh_m, act, extack); + if (err) + return err; + *a_len += sizeof(struct nfp_fl_push_mpls); + break; + case FLOW_ACTION_MPLS_POP: + if (*a_len + + sizeof(struct nfp_fl_pop_mpls) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at pop MPLS"); + return -EOPNOTSUPP; + } + + pop_m = (struct nfp_fl_pop_mpls *)&nfp_fl->action_data[*a_len]; + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + + nfp_fl_pop_mpls(pop_m, act); + *a_len += sizeof(struct nfp_fl_pop_mpls); + break; + case FLOW_ACTION_MPLS_MANGLE: + if (*a_len + + sizeof(struct nfp_fl_set_mpls) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at set MPLS"); + return -EOPNOTSUPP; + } + + set_m = (struct nfp_fl_set_mpls *)&nfp_fl->action_data[*a_len]; + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + + nfp_fl_set_mpls(set_m, act); + *a_len += sizeof(struct nfp_fl_set_mpls); + break; + case FLOW_ACTION_PTYPE: + /* TC ptype skbedit sets PACKET_HOST for ingress redirect. */ + if (act->ptype != PACKET_HOST) + return -EOPNOTSUPP; + + *pkt_host = true; + break; + case FLOW_ACTION_POLICE: + if (!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_METER)) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: unsupported police action in action list"); + return -EOPNOTSUPP; + } + + err = nfp_flower_meter_action(app, act, nfp_fl, a_len, netdev, + extack); + if (err) + return err; + break; + default: + /* Currently we do not handle any other actions. */ + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported action in action list"); + return -EOPNOTSUPP; + } + + return 0; +} + +static bool nfp_fl_check_mangle_start(struct flow_action *flow_act, + int current_act_idx) +{ + struct flow_action_entry current_act; + struct flow_action_entry prev_act; + + current_act = flow_act->entries[current_act_idx]; + if (current_act.id != FLOW_ACTION_MANGLE) + return false; + + if (current_act_idx == 0) + return true; + + prev_act = flow_act->entries[current_act_idx - 1]; + + return prev_act.id != FLOW_ACTION_MANGLE; +} + +static bool nfp_fl_check_mangle_end(struct flow_action *flow_act, + int current_act_idx) +{ + struct flow_action_entry current_act; + struct flow_action_entry next_act; + + current_act = flow_act->entries[current_act_idx]; + if (current_act.id != FLOW_ACTION_MANGLE) + return false; + + if (current_act_idx == flow_act->num_entries) + return true; + + next_act = flow_act->entries[current_act_idx + 1]; + + return next_act.id != FLOW_ACTION_MANGLE; +} + +int nfp_flower_compile_action(struct nfp_app *app, + struct flow_rule *rule, + struct net_device *netdev, + struct nfp_fl_payload *nfp_flow, + struct netlink_ext_ack *extack) +{ + int act_len, act_cnt, err, tun_out_cnt, out_cnt, i; + struct nfp_flower_pedit_acts set_act; + enum nfp_flower_tun_type tun_type; + struct flow_action_entry *act; + bool pkt_host = false; + u32 csum_updated = 0; + + if (!flow_action_hw_stats_check(&rule->action, extack, + FLOW_ACTION_HW_STATS_DELAYED_BIT)) + return -EOPNOTSUPP; + + memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ); + nfp_flow->meta.act_len = 0; + tun_type = NFP_FL_TUNNEL_NONE; + act_len = 0; + act_cnt = 0; + tun_out_cnt = 0; + out_cnt = 0; + + flow_action_for_each(i, act, &rule->action) { + if (nfp_fl_check_mangle_start(&rule->action, i)) + memset(&set_act, 0, sizeof(set_act)); + err = nfp_flower_loop_action(app, act, rule, nfp_flow, &act_len, + netdev, &tun_type, &tun_out_cnt, + &out_cnt, &csum_updated, + &set_act, &pkt_host, extack, i); + if (err) + return err; + act_cnt++; + if (nfp_fl_check_mangle_end(&rule->action, i)) + nfp_fl_commit_mangle(rule, + &nfp_flow->action_data[act_len], + &act_len, &set_act, &csum_updated); + } + + /* We optimise when the action list is small, this can unfortunately + * not happen once we have more than one action in the action list. + */ + if (act_cnt > 1) + nfp_flow->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + + nfp_flow->meta.act_len = act_len; + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c new file mode 100644 index 000000000..153533cd8 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -0,0 +1,383 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +#include <linux/bitfield.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/workqueue.h> +#include <net/dst_metadata.h> + +#include "main.h" +#include "../nfp_net.h" +#include "../nfp_net_repr.h" +#include "./cmsg.h" + +static struct nfp_flower_cmsg_hdr * +nfp_flower_cmsg_get_hdr(struct sk_buff *skb) +{ + return (struct nfp_flower_cmsg_hdr *)skb->data; +} + +struct sk_buff * +nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size, + enum nfp_flower_cmsg_type_port type, gfp_t flag) +{ + struct nfp_flower_cmsg_hdr *ch; + struct sk_buff *skb; + + size += NFP_FLOWER_CMSG_HLEN; + + skb = nfp_app_ctrl_msg_alloc(app, size, flag); + if (!skb) + return NULL; + + ch = nfp_flower_cmsg_get_hdr(skb); + ch->pad = 0; + ch->version = NFP_FLOWER_CMSG_VER1; + ch->type = type; + skb_put(skb, size); + + return skb; +} + +struct sk_buff * +nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports) +{ + struct nfp_flower_cmsg_mac_repr *msg; + struct sk_buff *skb; + + skb = nfp_flower_cmsg_alloc(app, struct_size(msg, ports, num_ports), + NFP_FLOWER_CMSG_TYPE_MAC_REPR, GFP_KERNEL); + if (!skb) + return NULL; + + msg = nfp_flower_cmsg_get_data(skb); + memset(msg->reserved, 0, sizeof(msg->reserved)); + msg->num_ports = num_ports; + + return skb; +} + +void +nfp_flower_cmsg_mac_repr_add(struct sk_buff *skb, unsigned int idx, + unsigned int nbi, unsigned int nbi_port, + unsigned int phys_port) +{ + struct nfp_flower_cmsg_mac_repr *msg; + + msg = nfp_flower_cmsg_get_data(skb); + msg->ports[idx].idx = idx; + msg->ports[idx].info = nbi & NFP_FLOWER_CMSG_MAC_REPR_NBI; + msg->ports[idx].nbi_port = nbi_port; + msg->ports[idx].phys_port = phys_port; +} + +int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok, + unsigned int mtu, bool mtu_only) +{ + struct nfp_flower_cmsg_portmod *msg; + struct sk_buff *skb; + + skb = nfp_flower_cmsg_alloc(repr->app, sizeof(*msg), + NFP_FLOWER_CMSG_TYPE_PORT_MOD, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + msg = nfp_flower_cmsg_get_data(skb); + msg->portnum = cpu_to_be32(repr->dst->u.port_info.port_id); + msg->reserved = 0; + msg->info = carrier_ok; + + if (mtu_only) + msg->info |= NFP_FLOWER_CMSG_PORTMOD_MTU_CHANGE_ONLY; + + msg->mtu = cpu_to_be16(mtu); + + nfp_ctrl_tx(repr->app->ctrl, skb); + + return 0; +} + +int nfp_flower_cmsg_portreify(struct nfp_repr *repr, bool exists) +{ + struct nfp_flower_cmsg_portreify *msg; + struct sk_buff *skb; + + skb = nfp_flower_cmsg_alloc(repr->app, sizeof(*msg), + NFP_FLOWER_CMSG_TYPE_PORT_REIFY, + GFP_KERNEL); + if (!skb) + return -ENOMEM; + + msg = nfp_flower_cmsg_get_data(skb); + msg->portnum = cpu_to_be32(repr->dst->u.port_info.port_id); + msg->reserved = 0; + msg->info = cpu_to_be16(exists); + + nfp_ctrl_tx(repr->app->ctrl, skb); + + return 0; +} + +static bool +nfp_flower_process_mtu_ack(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_flower_priv *app_priv = app->priv; + struct nfp_flower_cmsg_portmod *msg; + + msg = nfp_flower_cmsg_get_data(skb); + + if (!(msg->info & NFP_FLOWER_CMSG_PORTMOD_MTU_CHANGE_ONLY)) + return false; + + spin_lock_bh(&app_priv->mtu_conf.lock); + if (!app_priv->mtu_conf.requested_val || + app_priv->mtu_conf.portnum != be32_to_cpu(msg->portnum) || + be16_to_cpu(msg->mtu) != app_priv->mtu_conf.requested_val) { + /* Not an ack for requested MTU change. */ + spin_unlock_bh(&app_priv->mtu_conf.lock); + return false; + } + + app_priv->mtu_conf.ack = true; + app_priv->mtu_conf.requested_val = 0; + wake_up(&app_priv->mtu_conf.wait_q); + spin_unlock_bh(&app_priv->mtu_conf.lock); + + return true; +} + +static void +nfp_flower_cmsg_portmod_rx(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_flower_cmsg_portmod *msg; + struct net_device *netdev; + bool link; + + msg = nfp_flower_cmsg_get_data(skb); + link = msg->info & NFP_FLOWER_CMSG_PORTMOD_INFO_LINK; + + rtnl_lock(); + rcu_read_lock(); + netdev = nfp_app_dev_get(app, be32_to_cpu(msg->portnum), NULL); + rcu_read_unlock(); + if (!netdev) { + nfp_flower_cmsg_warn(app, "ctrl msg for unknown port 0x%08x\n", + be32_to_cpu(msg->portnum)); + rtnl_unlock(); + return; + } + + if (link) { + u16 mtu = be16_to_cpu(msg->mtu); + + netif_carrier_on(netdev); + + /* An MTU of 0 from the firmware should be ignored */ + if (mtu) + dev_set_mtu(netdev, mtu); + } else { + netif_carrier_off(netdev); + } + rtnl_unlock(); +} + +static void +nfp_flower_cmsg_portreify_rx(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_flower_cmsg_portreify *msg; + bool exists; + + msg = nfp_flower_cmsg_get_data(skb); + + rcu_read_lock(); + exists = !!nfp_app_dev_get(app, be32_to_cpu(msg->portnum), NULL); + rcu_read_unlock(); + if (!exists) { + nfp_flower_cmsg_warn(app, "ctrl msg for unknown port 0x%08x\n", + be32_to_cpu(msg->portnum)); + return; + } + + atomic_inc(&priv->reify_replies); + wake_up(&priv->reify_wait_queue); +} + +static void +nfp_flower_cmsg_merge_hint_rx(struct nfp_app *app, struct sk_buff *skb) +{ + unsigned int msg_len = nfp_flower_cmsg_get_data_len(skb); + struct nfp_flower_cmsg_merge_hint *msg; + struct nfp_fl_payload *sub_flows[2]; + struct nfp_flower_priv *priv; + int err, i, flow_cnt; + + msg = nfp_flower_cmsg_get_data(skb); + /* msg->count starts at 0 and always assumes at least 1 entry. */ + flow_cnt = msg->count + 1; + + if (msg_len < struct_size(msg, flow, flow_cnt)) { + nfp_flower_cmsg_warn(app, "Merge hint ctrl msg too short - %d bytes but expect %zd\n", + msg_len, struct_size(msg, flow, flow_cnt)); + return; + } + + if (flow_cnt != 2) { + nfp_flower_cmsg_warn(app, "Merge hint contains %d flows - two are expected\n", + flow_cnt); + return; + } + + priv = app->priv; + mutex_lock(&priv->nfp_fl_lock); + for (i = 0; i < flow_cnt; i++) { + u32 ctx = be32_to_cpu(msg->flow[i].host_ctx); + + sub_flows[i] = nfp_flower_get_fl_payload_from_ctx(app, ctx); + if (!sub_flows[i]) { + nfp_flower_cmsg_warn(app, "Invalid flow in merge hint\n"); + goto err_mutex_unlock; + } + } + + err = nfp_flower_merge_offloaded_flows(app, sub_flows[0], sub_flows[1]); + /* Only warn on memory fail. Hint veto will not break functionality. */ + if (err == -ENOMEM) + nfp_flower_cmsg_warn(app, "Flow merge memory fail.\n"); + +err_mutex_unlock: + mutex_unlock(&priv->nfp_fl_lock); +} + +static void +nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_flower_priv *app_priv = app->priv; + struct nfp_flower_cmsg_hdr *cmsg_hdr; + enum nfp_flower_cmsg_type_port type; + bool skb_stored = false; + + cmsg_hdr = nfp_flower_cmsg_get_hdr(skb); + + type = cmsg_hdr->type; + switch (type) { + case NFP_FLOWER_CMSG_TYPE_PORT_MOD: + nfp_flower_cmsg_portmod_rx(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_MERGE_HINT: + if (app_priv->flower_en_feats & NFP_FL_ENABLE_FLOW_MERGE) { + nfp_flower_cmsg_merge_hint_rx(app, skb); + break; + } + goto err_default; + case NFP_FLOWER_CMSG_TYPE_NO_NEIGH: + nfp_tunnel_request_route_v4(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_NO_NEIGH_V6: + nfp_tunnel_request_route_v6(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS: + nfp_tunnel_keep_alive(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS_V6: + nfp_tunnel_keep_alive_v6(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_QOS_STATS: + nfp_flower_stats_rlim_reply(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_LAG_CONFIG: + if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) { + skb_stored = nfp_flower_lag_unprocessed_msg(app, skb); + break; + } + fallthrough; + default: +err_default: + nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n", + type); + goto out; + } + + if (!skb_stored) + dev_consume_skb_any(skb); + return; +out: + dev_kfree_skb_any(skb); +} + +void nfp_flower_cmsg_process_rx(struct work_struct *work) +{ + struct sk_buff_head cmsg_joined; + struct nfp_flower_priv *priv; + struct sk_buff *skb; + + priv = container_of(work, struct nfp_flower_priv, cmsg_work); + skb_queue_head_init(&cmsg_joined); + + spin_lock_bh(&priv->cmsg_skbs_high.lock); + skb_queue_splice_tail_init(&priv->cmsg_skbs_high, &cmsg_joined); + spin_unlock_bh(&priv->cmsg_skbs_high.lock); + + spin_lock_bh(&priv->cmsg_skbs_low.lock); + skb_queue_splice_tail_init(&priv->cmsg_skbs_low, &cmsg_joined); + spin_unlock_bh(&priv->cmsg_skbs_low.lock); + + while ((skb = __skb_dequeue(&cmsg_joined))) + nfp_flower_cmsg_process_one_rx(priv->app, skb); +} + +static void +nfp_flower_queue_ctl_msg(struct nfp_app *app, struct sk_buff *skb, int type) +{ + struct nfp_flower_priv *priv = app->priv; + struct sk_buff_head *skb_head; + + if (type == NFP_FLOWER_CMSG_TYPE_PORT_MOD) + skb_head = &priv->cmsg_skbs_high; + else + skb_head = &priv->cmsg_skbs_low; + + if (skb_queue_len(skb_head) >= NFP_FLOWER_WORKQ_MAX_SKBS) { + nfp_flower_cmsg_warn(app, "Dropping queued control messages\n"); + dev_kfree_skb_any(skb); + return; + } + + skb_queue_tail(skb_head, skb); + schedule_work(&priv->cmsg_work); +} + +void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_flower_cmsg_hdr *cmsg_hdr; + + cmsg_hdr = nfp_flower_cmsg_get_hdr(skb); + + if (unlikely(cmsg_hdr->version != NFP_FLOWER_CMSG_VER1)) { + nfp_flower_cmsg_warn(app, "Cannot handle repr control version %u\n", + cmsg_hdr->version); + dev_kfree_skb_any(skb); + return; + } + + if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_FLOW_STATS) { + /* We need to deal with stats updates from HW asap */ + nfp_flower_rx_flow_stats(app, skb); + dev_consume_skb_any(skb); + } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_PORT_MOD && + nfp_flower_process_mtu_ack(app, skb)) { + /* Handle MTU acks outside wq to prevent RTNL conflict. */ + dev_consume_skb_any(skb); + } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH || + cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6) { + /* Acks from the NFP that the route is added - ignore. */ + dev_consume_skb_any(skb); + } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_PORT_REIFY) { + /* Handle REIFY acks outside wq to prevent RTNL conflict. */ + nfp_flower_cmsg_portreify_rx(app, skb); + dev_consume_skb_any(skb); + } else { + nfp_flower_queue_ctl_msg(app, skb, cmsg_hdr->type); + } +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h new file mode 100644 index 000000000..2df2af1da --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -0,0 +1,752 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#ifndef NFP_FLOWER_CMSG_H +#define NFP_FLOWER_CMSG_H + +#include <linux/bitfield.h> +#include <linux/skbuff.h> +#include <linux/types.h> +#include <net/geneve.h> +#include <net/gre.h> +#include <net/vxlan.h> + +#include "../nfp_app.h" +#include "../nfpcore/nfp_cpp.h" + +#define NFP_FLOWER_LAYER_EXT_META BIT(0) +#define NFP_FLOWER_LAYER_PORT BIT(1) +#define NFP_FLOWER_LAYER_MAC BIT(2) +#define NFP_FLOWER_LAYER_TP BIT(3) +#define NFP_FLOWER_LAYER_IPV4 BIT(4) +#define NFP_FLOWER_LAYER_IPV6 BIT(5) +#define NFP_FLOWER_LAYER_CT BIT(6) +#define NFP_FLOWER_LAYER_VXLAN BIT(7) + +#define NFP_FLOWER_LAYER2_GRE BIT(0) +#define NFP_FLOWER_LAYER2_QINQ BIT(4) +#define NFP_FLOWER_LAYER2_GENEVE BIT(5) +#define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6) +#define NFP_FLOWER_LAYER2_TUN_IPV6 BIT(7) + +#define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13) +#define NFP_FLOWER_MASK_VLAN_PRESENT BIT(12) +#define NFP_FLOWER_MASK_VLAN_VID GENMASK(11, 0) + +#define NFP_FLOWER_MASK_MPLS_LB GENMASK(31, 12) +#define NFP_FLOWER_MASK_MPLS_TC GENMASK(11, 9) +#define NFP_FLOWER_MASK_MPLS_BOS BIT(8) +#define NFP_FLOWER_MASK_MPLS_Q BIT(0) + +#define NFP_FL_IP_FRAG_FIRST BIT(7) +#define NFP_FL_IP_FRAGMENTED BIT(6) + +/* GRE Tunnel flags */ +#define NFP_FL_GRE_FLAG_KEY BIT(2) + +/* Compressed HW representation of TCP Flags */ +#define NFP_FL_TCP_FLAG_URG BIT(4) +#define NFP_FL_TCP_FLAG_PSH BIT(3) +#define NFP_FL_TCP_FLAG_RST BIT(2) +#define NFP_FL_TCP_FLAG_SYN BIT(1) +#define NFP_FL_TCP_FLAG_FIN BIT(0) + +#define NFP_FL_SC_ACT_DROP 0x80000000 +#define NFP_FL_SC_ACT_USER 0x7D000000 +#define NFP_FL_SC_ACT_POPV 0x6A000000 +#define NFP_FL_SC_ACT_NULL 0x00000000 + +/* The maximum action list size (in bytes) supported by the NFP. + */ +#define NFP_FL_MAX_A_SIZ 1216 +#define NFP_FL_LW_SIZ 2 + +/* Maximum allowed geneve options */ +#define NFP_FL_MAX_GENEVE_OPT_ACT 32 +#define NFP_FL_MAX_GENEVE_OPT_CNT 64 +#define NFP_FL_MAX_GENEVE_OPT_KEY 32 +#define NFP_FL_MAX_GENEVE_OPT_KEY_V6 8 + +/* Action opcodes */ +#define NFP_FL_ACTION_OPCODE_OUTPUT 0 +#define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1 +#define NFP_FL_ACTION_OPCODE_POP_VLAN 2 +#define NFP_FL_ACTION_OPCODE_PUSH_MPLS 3 +#define NFP_FL_ACTION_OPCODE_POP_MPLS 4 +#define NFP_FL_ACTION_OPCODE_SET_TUNNEL 6 +#define NFP_FL_ACTION_OPCODE_SET_ETHERNET 7 +#define NFP_FL_ACTION_OPCODE_SET_MPLS 8 +#define NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS 9 +#define NFP_FL_ACTION_OPCODE_SET_IPV4_TTL_TOS 10 +#define NFP_FL_ACTION_OPCODE_SET_IPV6_SRC 11 +#define NFP_FL_ACTION_OPCODE_SET_IPV6_DST 12 +#define NFP_FL_ACTION_OPCODE_SET_IPV6_TC_HL_FL 13 +#define NFP_FL_ACTION_OPCODE_SET_UDP 14 +#define NFP_FL_ACTION_OPCODE_SET_TCP 15 +#define NFP_FL_ACTION_OPCODE_PRE_LAG 16 +#define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 +#define NFP_FL_ACTION_OPCODE_METER 24 +#define NFP_FL_ACTION_OPCODE_PUSH_GENEVE 26 +#define NFP_FL_ACTION_OPCODE_NUM 32 + +#define NFP_FL_OUT_FLAGS_LAST BIT(15) +#define NFP_FL_OUT_FLAGS_USE_TUN BIT(4) +#define NFP_FL_OUT_FLAGS_TYPE_IDX GENMASK(2, 0) + +#define NFP_FL_PUSH_VLAN_PRIO GENMASK(15, 13) +#define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0) + +/* LAG ports */ +#define NFP_FL_LAG_OUT 0xC0DE0000 + +/* Tunnel ports */ +#define NFP_FL_PORT_TYPE_TUN 0x50000000 +#define NFP_FL_TUNNEL_TYPE GENMASK(7, 4) +#define NFP_FL_PRE_TUN_INDEX GENMASK(2, 0) + +#define NFP_FLOWER_WORKQ_MAX_SKBS 30000 + +/* Cmesg reply (empirical) timeout*/ +#define NFP_FL_REPLY_TIMEOUT msecs_to_jiffies(40) + +#define nfp_flower_cmsg_warn(app, fmt, args...) \ + do { \ + if (net_ratelimit()) \ + nfp_warn((app)->cpp, fmt, ## args); \ + } while (0) + +enum nfp_flower_tun_type { + NFP_FL_TUNNEL_NONE = 0, + NFP_FL_TUNNEL_GRE = 1, + NFP_FL_TUNNEL_VXLAN = 2, + NFP_FL_TUNNEL_GENEVE = 4, +}; + +struct nfp_fl_act_head { + u8 jump_id; + u8 len_lw; +}; + +struct nfp_fl_set_eth { + struct nfp_fl_act_head head; + __be16 reserved; + u8 eth_addr_mask[ETH_ALEN * 2]; + u8 eth_addr_val[ETH_ALEN * 2]; +}; + +struct nfp_fl_set_ip4_addrs { + struct nfp_fl_act_head head; + __be16 reserved; + __be32 ipv4_src_mask; + __be32 ipv4_src; + __be32 ipv4_dst_mask; + __be32 ipv4_dst; +}; + +struct nfp_fl_set_ip4_ttl_tos { + struct nfp_fl_act_head head; + u8 ipv4_ttl_mask; + u8 ipv4_tos_mask; + u8 ipv4_ttl; + u8 ipv4_tos; + __be16 reserved; +}; + +struct nfp_fl_set_ipv6_tc_hl_fl { + struct nfp_fl_act_head head; + u8 ipv6_tc_mask; + u8 ipv6_hop_limit_mask; + __be16 reserved; + u8 ipv6_tc; + u8 ipv6_hop_limit; + __be32 ipv6_label_mask; + __be32 ipv6_label; +}; + +struct nfp_fl_set_ipv6_addr { + struct nfp_fl_act_head head; + __be16 reserved; + struct { + __be32 mask; + __be32 exact; + } ipv6[4]; +}; + +struct nfp_fl_set_tport { + struct nfp_fl_act_head head; + __be16 reserved; + u8 tp_port_mask[4]; + u8 tp_port_val[4]; +}; + +struct nfp_fl_output { + struct nfp_fl_act_head head; + __be16 flags; + __be32 port; +}; + +struct nfp_fl_push_vlan { + struct nfp_fl_act_head head; + __be16 reserved; + __be16 vlan_tpid; + __be16 vlan_tci; +}; + +struct nfp_fl_pop_vlan { + struct nfp_fl_act_head head; + __be16 reserved; +}; + +struct nfp_fl_pre_lag { + struct nfp_fl_act_head head; + __be16 group_id; + u8 lag_version[3]; + u8 instance; +}; + +#define NFP_FL_PRE_LAG_VER_OFF 8 + +struct nfp_fl_pre_tunnel { + struct nfp_fl_act_head head; + __be16 flags; + union { + __be32 ipv4_dst; + struct in6_addr ipv6_dst; + }; +}; + +#define NFP_FL_PRE_TUN_IPV6 BIT(0) + +struct nfp_fl_set_tun { + struct nfp_fl_act_head head; + __be16 reserved; + __be64 tun_id __packed; + __be32 tun_type_index; + __be16 tun_flags; + u8 ttl; + u8 tos; + __be16 outer_vlan_tpid; + __be16 outer_vlan_tci; + u8 tun_len; + u8 res2; + __be16 tun_proto; +}; + +struct nfp_fl_push_geneve { + struct nfp_fl_act_head head; + __be16 reserved; + __be16 class; + u8 type; + u8 length; + u8 opt_data[]; +}; + +struct nfp_fl_push_mpls { + struct nfp_fl_act_head head; + __be16 ethtype; + __be32 lse; +}; + +struct nfp_fl_pop_mpls { + struct nfp_fl_act_head head; + __be16 ethtype; +}; + +struct nfp_fl_set_mpls { + struct nfp_fl_act_head head; + __be16 reserved; + __be32 lse_mask; + __be32 lse; +}; + +struct nfp_fl_meter { + struct nfp_fl_act_head head; + __be16 reserved; + __be32 meter_id; +}; + +/* Metadata with L2 (1W/4B) + * ---------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | key_type | mask_id | PCP |p| vlan outermost VID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * ^ ^ + * NOTE: | TCI | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_meta_tci { + u8 nfp_flow_key_layer; + u8 mask_id; + __be16 tci; +}; + +/* Extended metadata for additional key_layers (1W/4B) + * ---------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | nfp_flow_key_layer2 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ext_meta { + __be32 nfp_flow_key_layer2; +}; + +/* Port details (1W/4B) + * ---------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | port_ingress | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_in_port { + __be32 in_port; +}; + +/* L2 details (4W/16B) + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | mac_addr_dst, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | mac_addr_dst, 47 - 32 | mac_addr_src, 15 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | mac_addr_src, 47 - 16 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | mpls outermost label | TC |B| reserved |q| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_mac_mpls { + u8 mac_dst[6]; + u8 mac_src[6]; + __be32 mpls_lse; +}; + +/* VLAN details (2W/8B) + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | outer_tpid | outer_tci | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | inner_tpid | inner_tci | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_vlan { + __be16 outer_tpid; + __be16 outer_tci; + __be16 inner_tpid; + __be16 inner_tci; +}; + +/* L4 ports (for UDP, TCP, SCTP) (1W/4B) + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | port_src | port_dst | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_tp_ports { + __be16 port_src; + __be16 port_dst; +}; + +struct nfp_flower_ip_ext { + u8 tos; + u8 proto; + u8 ttl; + u8 flags; +}; + +/* L3 IPv4 details (3W/12B) + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | DSCP |ECN| protocol | ttl | flags | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_src | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_dst | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ipv4 { + struct nfp_flower_ip_ext ip_ext; + __be32 ipv4_src; + __be32 ipv4_dst; +}; + +/* L3 IPv6 details (10W/40B) + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | DSCP |ECN| protocol | ttl | flags | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_exthdr | res | ipv6_flow_label | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ipv6 { + struct nfp_flower_ip_ext ip_ext; + __be32 ipv6_flow_label_exthdr; + struct in6_addr ipv6_src; + struct in6_addr ipv6_dst; +}; + +struct nfp_flower_tun_ipv4 { + __be32 src; + __be32 dst; +}; + +struct nfp_flower_tun_ipv6 { + struct in6_addr src; + struct in6_addr dst; +}; + +struct nfp_flower_tun_ip_ext { + u8 tos; + u8 ttl; +}; + +/* Flow Frame IPv4 UDP TUNNEL --> Tunnel details (4W/16B) + * ----------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_src | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_dst | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | tos | ttl | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VNI | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ipv4_udp_tun { + struct nfp_flower_tun_ipv4 ipv4; + __be16 reserved1; + struct nfp_flower_tun_ip_ext ip_ext; + __be32 reserved2; + __be32 tun_id; +}; + +/* Flow Frame IPv6 UDP TUNNEL --> Tunnel details (11W/44B) + * ----------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | tos | ttl | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VNI | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ipv6_udp_tun { + struct nfp_flower_tun_ipv6 ipv6; + __be16 reserved1; + struct nfp_flower_tun_ip_ext ip_ext; + __be32 reserved2; + __be32 tun_id; +}; + +/* Flow Frame GRE TUNNEL --> Tunnel details (6W/24B) + * ----------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_src | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_dst | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | tun_flags | tos | ttl | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | Ethertype | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Key | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + +struct nfp_flower_ipv4_gre_tun { + struct nfp_flower_tun_ipv4 ipv4; + __be16 tun_flags; + struct nfp_flower_tun_ip_ext ip_ext; + __be16 reserved1; + __be16 ethertype; + __be32 tun_key; + __be32 reserved2; +}; + +/* Flow Frame GRE TUNNEL V6 --> Tunnel details (12W/48B) + * ----------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | tun_flags | tos | ttl | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | Ethertype | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Key | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ipv6_gre_tun { + struct nfp_flower_tun_ipv6 ipv6; + __be16 tun_flags; + struct nfp_flower_tun_ip_ext ip_ext; + __be16 reserved1; + __be16 ethertype; + __be32 tun_key; + __be32 reserved2; +}; + +struct nfp_flower_geneve_options { + u8 data[NFP_FL_MAX_GENEVE_OPT_KEY]; +}; + +#define NFP_FL_TUN_VNI_OFFSET 8 + +/* The base header for a control message packet. + * Defines an 8-bit version, and an 8-bit type, padded + * to a 32-bit word. Rest of the packet is type-specific. + */ +struct nfp_flower_cmsg_hdr { + __be16 pad; + u8 type; + u8 version; +}; + +#define NFP_FLOWER_CMSG_HLEN sizeof(struct nfp_flower_cmsg_hdr) +#define NFP_FLOWER_CMSG_VER1 1 + +/* Types defined for port related control messages */ +enum nfp_flower_cmsg_type_port { + NFP_FLOWER_CMSG_TYPE_FLOW_ADD = 0, + NFP_FLOWER_CMSG_TYPE_FLOW_MOD = 1, + NFP_FLOWER_CMSG_TYPE_FLOW_DEL = 2, + NFP_FLOWER_CMSG_TYPE_LAG_CONFIG = 4, + NFP_FLOWER_CMSG_TYPE_PORT_REIFY = 6, + NFP_FLOWER_CMSG_TYPE_MAC_REPR = 7, + NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8, + NFP_FLOWER_CMSG_TYPE_MERGE_HINT = 9, + NFP_FLOWER_CMSG_TYPE_NO_NEIGH = 10, + NFP_FLOWER_CMSG_TYPE_TUN_MAC = 11, + NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS = 12, + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH = 13, + NFP_FLOWER_CMSG_TYPE_TUN_IPS = 14, + NFP_FLOWER_CMSG_TYPE_FLOW_STATS = 15, + NFP_FLOWER_CMSG_TYPE_PORT_ECHO = 16, + NFP_FLOWER_CMSG_TYPE_QOS_MOD = 18, + NFP_FLOWER_CMSG_TYPE_QOS_DEL = 19, + NFP_FLOWER_CMSG_TYPE_QOS_STATS = 20, + NFP_FLOWER_CMSG_TYPE_PRE_TUN_RULE = 21, + NFP_FLOWER_CMSG_TYPE_TUN_IPS_V6 = 22, + NFP_FLOWER_CMSG_TYPE_NO_NEIGH_V6 = 23, + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 = 24, + NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS_V6 = 25, + NFP_FLOWER_CMSG_TYPE_MAX = 32, +}; + +/* NFP_FLOWER_CMSG_TYPE_MAC_REPR */ +struct nfp_flower_cmsg_mac_repr { + u8 reserved[3]; + u8 num_ports; + struct { + u8 idx; + u8 info; + u8 nbi_port; + u8 phys_port; + } ports[]; +}; + +#define NFP_FLOWER_CMSG_MAC_REPR_NBI GENMASK(1, 0) + +/* NFP_FLOWER_CMSG_TYPE_PORT_MOD */ +struct nfp_flower_cmsg_portmod { + __be32 portnum; + u8 reserved; + u8 info; + __be16 mtu; +}; + +#define NFP_FLOWER_CMSG_PORTMOD_INFO_LINK BIT(0) +#define NFP_FLOWER_CMSG_PORTMOD_MTU_CHANGE_ONLY BIT(1) + +/* NFP_FLOWER_CMSG_TYPE_PORT_REIFY */ +struct nfp_flower_cmsg_portreify { + __be32 portnum; + u16 reserved; + __be16 info; +}; + +#define NFP_FLOWER_CMSG_PORTREIFY_INFO_EXIST BIT(0) + +/* NFP_FLOWER_CMSG_TYPE_FLOW_MERGE_HINT */ +struct nfp_flower_cmsg_merge_hint { + u8 reserved[3]; + u8 count; + struct { + __be32 host_ctx; + __be64 host_cookie; + } __packed flow[]; +}; + +enum nfp_flower_cmsg_port_type { + NFP_FLOWER_CMSG_PORT_TYPE_UNSPEC = 0x0, + NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT = 0x1, + NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT = 0x2, + NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT = 0x3, +}; + +enum nfp_flower_cmsg_port_vnic_type { + NFP_FLOWER_CMSG_PORT_VNIC_TYPE_VF = 0x0, + NFP_FLOWER_CMSG_PORT_VNIC_TYPE_PF = 0x1, + NFP_FLOWER_CMSG_PORT_VNIC_TYPE_CTRL = 0x2, +}; + +#define NFP_FLOWER_CMSG_PORT_TYPE GENMASK(31, 28) +#define NFP_FLOWER_CMSG_PORT_SYS_ID GENMASK(27, 24) +#define NFP_FLOWER_CMSG_PORT_NFP_ID GENMASK(23, 22) +#define NFP_FLOWER_CMSG_PORT_PCI GENMASK(15, 14) +#define NFP_FLOWER_CMSG_PORT_VNIC_TYPE GENMASK(13, 12) +#define NFP_FLOWER_CMSG_PORT_VNIC GENMASK(11, 6) +#define NFP_FLOWER_CMSG_PORT_PCIE_Q GENMASK(5, 0) +#define NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM GENMASK(7, 0) + +static inline u32 nfp_flower_internal_port_get_port_id(u8 internal_port) +{ + return FIELD_PREP(NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM, internal_port) | + FIELD_PREP(NFP_FLOWER_CMSG_PORT_TYPE, + NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT); +} + +static inline u32 nfp_flower_cmsg_phys_port(u8 phys_port) +{ + return FIELD_PREP(NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM, phys_port) | + FIELD_PREP(NFP_FLOWER_CMSG_PORT_TYPE, + NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT); +} + +static inline u32 +nfp_flower_cmsg_pcie_port(u8 nfp_pcie, enum nfp_flower_cmsg_port_vnic_type type, + u8 vnic, u8 q) +{ + return FIELD_PREP(NFP_FLOWER_CMSG_PORT_PCI, nfp_pcie) | + FIELD_PREP(NFP_FLOWER_CMSG_PORT_VNIC_TYPE, type) | + FIELD_PREP(NFP_FLOWER_CMSG_PORT_VNIC, vnic) | + FIELD_PREP(NFP_FLOWER_CMSG_PORT_PCIE_Q, q) | + FIELD_PREP(NFP_FLOWER_CMSG_PORT_TYPE, + NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT); +} + +static inline void *nfp_flower_cmsg_get_data(struct sk_buff *skb) +{ + return (unsigned char *)skb->data + NFP_FLOWER_CMSG_HLEN; +} + +static inline int nfp_flower_cmsg_get_data_len(struct sk_buff *skb) +{ + return skb->len - NFP_FLOWER_CMSG_HLEN; +} + +static inline bool +nfp_fl_netdev_is_tunnel_type(struct net_device *netdev, + enum nfp_flower_tun_type tun_type) +{ + if (netif_is_vxlan(netdev)) + return tun_type == NFP_FL_TUNNEL_VXLAN; + if (netif_is_gretap(netdev) || netif_is_ip6gretap(netdev)) + return tun_type == NFP_FL_TUNNEL_GRE; + if (netif_is_geneve(netdev)) + return tun_type == NFP_FL_TUNNEL_GENEVE; + + return false; +} + +static inline bool nfp_fl_is_netdev_to_offload(struct net_device *netdev) +{ + if (!netdev->rtnl_link_ops) + return false; + if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch")) + return true; + if (netif_is_vxlan(netdev)) + return true; + if (netif_is_geneve(netdev)) + return true; + if (netif_is_gretap(netdev)) + return true; + if (netif_is_ip6gretap(netdev)) + return true; + + return false; +} + +struct sk_buff * +nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports); +void +nfp_flower_cmsg_mac_repr_add(struct sk_buff *skb, unsigned int idx, + unsigned int nbi, unsigned int nbi_port, + unsigned int phys_port); +int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok, + unsigned int mtu, bool mtu_only); +int nfp_flower_cmsg_portreify(struct nfp_repr *repr, bool exists); +void nfp_flower_cmsg_process_rx(struct work_struct *work); +void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb); +struct sk_buff * +nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size, + enum nfp_flower_cmsg_type_port type, gfp_t flag); + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c new file mode 100644 index 000000000..f7492be45 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -0,0 +1,2095 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2021 Corigine, Inc. */ + +#include <net/tc_act/tc_csum.h> +#include <net/tc_act/tc_ct.h> + +#include "conntrack.h" +#include "../nfp_port.h" + +const struct rhashtable_params nfp_tc_ct_merge_params = { + .head_offset = offsetof(struct nfp_fl_ct_tc_merge, + hash_node), + .key_len = sizeof(unsigned long) * 2, + .key_offset = offsetof(struct nfp_fl_ct_tc_merge, cookie), + .automatic_shrinking = true, +}; + +const struct rhashtable_params nfp_nft_ct_merge_params = { + .head_offset = offsetof(struct nfp_fl_nft_tc_merge, + hash_node), + .key_len = sizeof(unsigned long) * 3, + .key_offset = offsetof(struct nfp_fl_nft_tc_merge, cookie), + .automatic_shrinking = true, +}; + +static struct flow_action_entry *get_flow_act(struct flow_rule *rule, + enum flow_action_id act_id); + +/** + * get_hashentry() - Wrapper around hashtable lookup. + * @ht: hashtable where entry could be found + * @key: key to lookup + * @params: hashtable params + * @size: size of entry to allocate if not in table + * + * Returns an entry from a hashtable. If entry does not exist + * yet allocate the memory for it and return the new entry. + */ +static void *get_hashentry(struct rhashtable *ht, void *key, + const struct rhashtable_params params, size_t size) +{ + void *result; + + result = rhashtable_lookup_fast(ht, key, params); + + if (result) + return result; + + result = kzalloc(size, GFP_KERNEL); + if (!result) + return ERR_PTR(-ENOMEM); + + return result; +} + +bool is_pre_ct_flow(struct flow_cls_offload *flow) +{ + struct flow_action_entry *act; + int i; + + flow_action_for_each(i, act, &flow->rule->action) { + if (act->id == FLOW_ACTION_CT) { + /* The pre_ct rule only have the ct or ct nat action, cannot + * contains other ct action e.g ct commit and so on. + */ + if ((!act->ct.action || act->ct.action == TCA_CT_ACT_NAT)) + return true; + else + return false; + } + } + + return false; +} + +bool is_post_ct_flow(struct flow_cls_offload *flow) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); + struct flow_dissector *dissector = rule->match.dissector; + struct flow_action_entry *act; + bool exist_ct_clear = false; + struct flow_match_ct ct; + int i; + + /* post ct entry cannot contains any ct action except ct_clear. */ + flow_action_for_each(i, act, &flow->rule->action) { + if (act->id == FLOW_ACTION_CT) { + /* ignore ct clear action. */ + if (act->ct.action == TCA_CT_ACT_CLEAR) { + exist_ct_clear = true; + continue; + } + + return false; + } + } + + if (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CT)) { + flow_rule_match_ct(rule, &ct); + if (ct.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED) + return true; + } else { + /* when do nat with ct, the post ct entry ignore the ct status, + * will match the nat field(sip/dip) instead. In this situation, + * the flow chain index is not zero and contains ct clear action. + */ + if (flow->common.chain_index && exist_ct_clear) + return true; + } + + return false; +} + +/** + * get_mangled_key() - Mangle the key if mangle act exists + * @rule: rule that carries the actions + * @buf: pointer to key to be mangled + * @offset: used to adjust mangled offset in L2/L3/L4 header + * @key_sz: key size + * @htype: mangling type + * + * Returns buf where the mangled key stores. + */ +static void *get_mangled_key(struct flow_rule *rule, void *buf, + u32 offset, size_t key_sz, + enum flow_action_mangle_base htype) +{ + struct flow_action_entry *act; + u32 *val = (u32 *)buf; + u32 off, msk, key; + int i; + + flow_action_for_each(i, act, &rule->action) { + if (act->id == FLOW_ACTION_MANGLE && + act->mangle.htype == htype) { + off = act->mangle.offset - offset; + msk = act->mangle.mask; + key = act->mangle.val; + + /* Mangling is supposed to be u32 aligned */ + if (off % 4 || off >= key_sz) + continue; + + val[off >> 2] &= msk; + val[off >> 2] |= key; + } + } + + return buf; +} + +/* Only tos and ttl are involved in flow_match_ip structure, which + * doesn't conform to the layout of ip/ipv6 header definition. So + * they need particular process here: fill them into the ip/ipv6 + * header, so that mangling actions can work directly. + */ +#define NFP_IPV4_TOS_MASK GENMASK(23, 16) +#define NFP_IPV4_TTL_MASK GENMASK(31, 24) +#define NFP_IPV6_TCLASS_MASK GENMASK(27, 20) +#define NFP_IPV6_HLIMIT_MASK GENMASK(7, 0) +static void *get_mangled_tos_ttl(struct flow_rule *rule, void *buf, + bool is_v6) +{ + struct flow_match_ip match; + /* IPv4's ttl field is in third dword. */ + __be32 ip_hdr[3]; + u32 tmp, hdr_len; + + flow_rule_match_ip(rule, &match); + + if (is_v6) { + tmp = FIELD_PREP(NFP_IPV6_TCLASS_MASK, match.key->tos); + ip_hdr[0] = cpu_to_be32(tmp); + tmp = FIELD_PREP(NFP_IPV6_HLIMIT_MASK, match.key->ttl); + ip_hdr[1] = cpu_to_be32(tmp); + hdr_len = 2 * sizeof(__be32); + } else { + tmp = FIELD_PREP(NFP_IPV4_TOS_MASK, match.key->tos); + ip_hdr[0] = cpu_to_be32(tmp); + tmp = FIELD_PREP(NFP_IPV4_TTL_MASK, match.key->ttl); + ip_hdr[2] = cpu_to_be32(tmp); + hdr_len = 3 * sizeof(__be32); + } + + get_mangled_key(rule, ip_hdr, 0, hdr_len, + is_v6 ? FLOW_ACT_MANGLE_HDR_TYPE_IP6 : + FLOW_ACT_MANGLE_HDR_TYPE_IP4); + + match.key = buf; + + if (is_v6) { + tmp = be32_to_cpu(ip_hdr[0]); + match.key->tos = FIELD_GET(NFP_IPV6_TCLASS_MASK, tmp); + tmp = be32_to_cpu(ip_hdr[1]); + match.key->ttl = FIELD_GET(NFP_IPV6_HLIMIT_MASK, tmp); + } else { + tmp = be32_to_cpu(ip_hdr[0]); + match.key->tos = FIELD_GET(NFP_IPV4_TOS_MASK, tmp); + tmp = be32_to_cpu(ip_hdr[2]); + match.key->ttl = FIELD_GET(NFP_IPV4_TTL_MASK, tmp); + } + + return buf; +} + +/* Note entry1 and entry2 are not swappable. only skip ip and + * tport merge check for pre_ct and post_ct when pre_ct do nat. + */ +static bool nfp_ct_merge_check_cannot_skip(struct nfp_fl_ct_flow_entry *entry1, + struct nfp_fl_ct_flow_entry *entry2) +{ + /* only pre_ct have NFP_FL_ACTION_DO_NAT flag. */ + if ((entry1->flags & NFP_FL_ACTION_DO_NAT) && + entry2->type == CT_TYPE_POST_CT) + return false; + + return true; +} + +/* Note entry1 and entry2 are not swappable, entry1 should be + * the former flow whose mangle action need be taken into account + * if existed, and entry2 should be the latter flow whose action + * we don't care. + */ +static int nfp_ct_merge_check(struct nfp_fl_ct_flow_entry *entry1, + struct nfp_fl_ct_flow_entry *entry2) +{ + unsigned int ovlp_keys = entry1->rule->match.dissector->used_keys & + entry2->rule->match.dissector->used_keys; + bool out, is_v6 = false; + u8 ip_proto = 0; + /* Temporary buffer for mangling keys, 64 is enough to cover max + * struct size of key in various fields that may be mangled. + * Supported fields to mangle: + * mac_src/mac_dst(struct flow_match_eth_addrs, 12B) + * nw_tos/nw_ttl(struct flow_match_ip, 2B) + * nw_src/nw_dst(struct flow_match_ipv4/6_addrs, 32B) + * tp_src/tp_dst(struct flow_match_ports, 4B) + */ + char buf[64]; + + if (entry1->netdev && entry2->netdev && + entry1->netdev != entry2->netdev) + return -EINVAL; + + /* Check the overlapped fields one by one, the unmasked part + * should not conflict with each other. + */ + if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match1, match2; + + flow_rule_match_control(entry1->rule, &match1); + flow_rule_match_control(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match1, match2; + + flow_rule_match_basic(entry1->rule, &match1); + flow_rule_match_basic(entry2->rule, &match2); + + /* n_proto field is a must in ct-related flows, + * it should be either ipv4 or ipv6. + */ + is_v6 = match1.key->n_proto == htons(ETH_P_IPV6); + /* ip_proto field is a must when port field is cared */ + ip_proto = match1.key->ip_proto; + + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + /* if pre ct entry do nat, the nat ip exists in nft entry, + * will be do merge check when do nft and post ct merge, + * so skip this ip merge check here. + */ + if ((ovlp_keys & BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS)) && + nfp_ct_merge_check_cannot_skip(entry1, entry2)) { + struct flow_match_ipv4_addrs match1, match2; + + flow_rule_match_ipv4_addrs(entry1->rule, &match1); + flow_rule_match_ipv4_addrs(entry2->rule, &match2); + + memcpy(buf, match1.key, sizeof(*match1.key)); + match1.key = get_mangled_key(entry1->rule, buf, + offsetof(struct iphdr, saddr), + sizeof(*match1.key), + FLOW_ACT_MANGLE_HDR_TYPE_IP4); + + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + /* if pre ct entry do nat, the nat ip exists in nft entry, + * will be do merge check when do nft and post ct merge, + * so skip this ip merge check here. + */ + if ((ovlp_keys & BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS)) && + nfp_ct_merge_check_cannot_skip(entry1, entry2)) { + struct flow_match_ipv6_addrs match1, match2; + + flow_rule_match_ipv6_addrs(entry1->rule, &match1); + flow_rule_match_ipv6_addrs(entry2->rule, &match2); + + memcpy(buf, match1.key, sizeof(*match1.key)); + match1.key = get_mangled_key(entry1->rule, buf, + offsetof(struct ipv6hdr, saddr), + sizeof(*match1.key), + FLOW_ACT_MANGLE_HDR_TYPE_IP6); + + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + /* if pre ct entry do nat, the nat tport exists in nft entry, + * will be do merge check when do nft and post ct merge, + * so skip this tport merge check here. + */ + if ((ovlp_keys & BIT(FLOW_DISSECTOR_KEY_PORTS)) && + nfp_ct_merge_check_cannot_skip(entry1, entry2)) { + enum flow_action_mangle_base htype = FLOW_ACT_MANGLE_UNSPEC; + struct flow_match_ports match1, match2; + + flow_rule_match_ports(entry1->rule, &match1); + flow_rule_match_ports(entry2->rule, &match2); + + if (ip_proto == IPPROTO_UDP) + htype = FLOW_ACT_MANGLE_HDR_TYPE_UDP; + else if (ip_proto == IPPROTO_TCP) + htype = FLOW_ACT_MANGLE_HDR_TYPE_TCP; + + memcpy(buf, match1.key, sizeof(*match1.key)); + match1.key = get_mangled_key(entry1->rule, buf, 0, + sizeof(*match1.key), htype); + + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match1, match2; + + flow_rule_match_eth_addrs(entry1->rule, &match1); + flow_rule_match_eth_addrs(entry2->rule, &match2); + + memcpy(buf, match1.key, sizeof(*match1.key)); + match1.key = get_mangled_key(entry1->rule, buf, 0, + sizeof(*match1.key), + FLOW_ACT_MANGLE_HDR_TYPE_ETH); + + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match1, match2; + + flow_rule_match_vlan(entry1->rule, &match1); + flow_rule_match_vlan(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_MPLS)) { + struct flow_match_mpls match1, match2; + + flow_rule_match_mpls(entry1->rule, &match1); + flow_rule_match_mpls(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_TCP)) { + struct flow_match_tcp match1, match2; + + flow_rule_match_tcp(entry1->rule, &match1); + flow_rule_match_tcp(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_IP)) { + struct flow_match_ip match1, match2; + + flow_rule_match_ip(entry1->rule, &match1); + flow_rule_match_ip(entry2->rule, &match2); + + match1.key = get_mangled_tos_ttl(entry1->rule, buf, is_v6); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid match1, match2; + + flow_rule_match_enc_keyid(entry1->rule, &match1); + flow_rule_match_enc_keyid(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + struct flow_match_ipv4_addrs match1, match2; + + flow_rule_match_enc_ipv4_addrs(entry1->rule, &match1); + flow_rule_match_enc_ipv4_addrs(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { + struct flow_match_ipv6_addrs match1, match2; + + flow_rule_match_enc_ipv6_addrs(entry1->rule, &match1); + flow_rule_match_enc_ipv6_addrs(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_match_control match1, match2; + + flow_rule_match_enc_control(entry1->rule, &match1); + flow_rule_match_enc_control(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match1, match2; + + flow_rule_match_enc_ip(entry1->rule, &match1); + flow_rule_match_enc_ip(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ENC_OPTS)) { + struct flow_match_enc_opts match1, match2; + + flow_rule_match_enc_opts(entry1->rule, &match1); + flow_rule_match_enc_opts(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + return 0; + +check_failed: + return -EINVAL; +} + +static int nfp_ct_check_vlan_merge(struct flow_action_entry *a_in, + struct flow_rule *rule) +{ + struct flow_match_vlan match; + + if (unlikely(flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN))) + return -EOPNOTSUPP; + + /* post_ct does not match VLAN KEY, can be merged. */ + if (likely(!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN))) + return 0; + + switch (a_in->id) { + /* pre_ct has pop vlan, post_ct cannot match VLAN KEY, cannot be merged. */ + case FLOW_ACTION_VLAN_POP: + return -EOPNOTSUPP; + + case FLOW_ACTION_VLAN_PUSH: + case FLOW_ACTION_VLAN_MANGLE: + flow_rule_match_vlan(rule, &match); + /* different vlan id, cannot be merged. */ + if ((match.key->vlan_id & match.mask->vlan_id) ^ + (a_in->vlan.vid & match.mask->vlan_id)) + return -EOPNOTSUPP; + + /* different tpid, cannot be merged. */ + if ((match.key->vlan_tpid & match.mask->vlan_tpid) ^ + (a_in->vlan.proto & match.mask->vlan_tpid)) + return -EOPNOTSUPP; + + /* different priority, cannot be merged. */ + if ((match.key->vlan_priority & match.mask->vlan_priority) ^ + (a_in->vlan.prio & match.mask->vlan_priority)) + return -EOPNOTSUPP; + + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int nfp_ct_merge_act_check(struct nfp_fl_ct_flow_entry *pre_ct_entry, + struct nfp_fl_ct_flow_entry *post_ct_entry, + struct nfp_fl_ct_flow_entry *nft_entry) +{ + struct flow_action_entry *act; + int i, err; + + /* Check for pre_ct->action conflicts */ + flow_action_for_each(i, act, &pre_ct_entry->rule->action) { + switch (act->id) { + case FLOW_ACTION_VLAN_PUSH: + case FLOW_ACTION_VLAN_POP: + case FLOW_ACTION_VLAN_MANGLE: + err = nfp_ct_check_vlan_merge(act, post_ct_entry->rule); + if (err) + return err; + break; + case FLOW_ACTION_MPLS_PUSH: + case FLOW_ACTION_MPLS_POP: + case FLOW_ACTION_MPLS_MANGLE: + return -EOPNOTSUPP; + default: + break; + } + } + + /* Check for nft->action conflicts */ + flow_action_for_each(i, act, &nft_entry->rule->action) { + switch (act->id) { + case FLOW_ACTION_VLAN_PUSH: + case FLOW_ACTION_VLAN_POP: + case FLOW_ACTION_VLAN_MANGLE: + case FLOW_ACTION_MPLS_PUSH: + case FLOW_ACTION_MPLS_POP: + case FLOW_ACTION_MPLS_MANGLE: + return -EOPNOTSUPP; + default: + break; + } + } + return 0; +} + +static int nfp_ct_check_meta(struct nfp_fl_ct_flow_entry *post_ct_entry, + struct nfp_fl_ct_flow_entry *nft_entry) +{ + struct flow_dissector *dissector = post_ct_entry->rule->match.dissector; + struct flow_action_entry *ct_met; + struct flow_match_ct ct; + int i; + + ct_met = get_flow_act(nft_entry->rule, FLOW_ACTION_CT_METADATA); + if (ct_met && (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CT))) { + u32 *act_lbl; + + act_lbl = ct_met->ct_metadata.labels; + flow_rule_match_ct(post_ct_entry->rule, &ct); + for (i = 0; i < 4; i++) { + if ((ct.key->ct_labels[i] & ct.mask->ct_labels[i]) ^ + (act_lbl[i] & ct.mask->ct_labels[i])) + return -EINVAL; + } + + if ((ct.key->ct_mark & ct.mask->ct_mark) ^ + (ct_met->ct_metadata.mark & ct.mask->ct_mark)) + return -EINVAL; + + return 0; + } else { + /* post_ct with ct clear action will not match the + * ct status when nft is nat entry. + */ + if (nft_entry->flags & NFP_FL_ACTION_DO_MANGLE) + return 0; + } + + return -EINVAL; +} + +static int +nfp_fl_calc_key_layers_sz(struct nfp_fl_key_ls in_key_ls, uint16_t *map) +{ + int key_size; + + /* This field must always be present */ + key_size = sizeof(struct nfp_flower_meta_tci); + map[FLOW_PAY_META_TCI] = 0; + + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_EXT_META) { + map[FLOW_PAY_EXT_META] = key_size; + key_size += sizeof(struct nfp_flower_ext_meta); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_PORT) { + map[FLOW_PAY_INPORT] = key_size; + key_size += sizeof(struct nfp_flower_in_port); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_MAC) { + map[FLOW_PAY_MAC_MPLS] = key_size; + key_size += sizeof(struct nfp_flower_mac_mpls); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_TP) { + map[FLOW_PAY_L4] = key_size; + key_size += sizeof(struct nfp_flower_tp_ports); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_IPV4) { + map[FLOW_PAY_IPV4] = key_size; + key_size += sizeof(struct nfp_flower_ipv4); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_IPV6) { + map[FLOW_PAY_IPV6] = key_size; + key_size += sizeof(struct nfp_flower_ipv6); + } + + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_QINQ) { + map[FLOW_PAY_QINQ] = key_size; + key_size += sizeof(struct nfp_flower_vlan); + } + + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GRE) { + map[FLOW_PAY_GRE] = key_size; + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) + key_size += sizeof(struct nfp_flower_ipv6_gre_tun); + else + key_size += sizeof(struct nfp_flower_ipv4_gre_tun); + } + + if ((in_key_ls.key_layer & NFP_FLOWER_LAYER_VXLAN) || + (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE)) { + map[FLOW_PAY_UDP_TUN] = key_size; + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) + key_size += sizeof(struct nfp_flower_ipv6_udp_tun); + else + key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + } + + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { + map[FLOW_PAY_GENEVE_OPT] = key_size; + key_size += sizeof(struct nfp_flower_geneve_options); + } + + return key_size; +} + +/* get the csum flag according the ip proto and mangle action. */ +static void nfp_fl_get_csum_flag(struct flow_action_entry *a_in, u8 ip_proto, u32 *csum) +{ + if (a_in->id != FLOW_ACTION_MANGLE) + return; + + switch (a_in->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + *csum |= TCA_CSUM_UPDATE_FLAG_IPV4HDR; + if (ip_proto == IPPROTO_TCP) + *csum |= TCA_CSUM_UPDATE_FLAG_TCP; + else if (ip_proto == IPPROTO_UDP) + *csum |= TCA_CSUM_UPDATE_FLAG_UDP; + break; + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + *csum |= TCA_CSUM_UPDATE_FLAG_TCP; + break; + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + *csum |= TCA_CSUM_UPDATE_FLAG_UDP; + break; + default: + break; + } +} + +static int nfp_fl_merge_actions_offload(struct flow_rule **rules, + struct nfp_flower_priv *priv, + struct net_device *netdev, + struct nfp_fl_payload *flow_pay) +{ + enum flow_action_hw_stats tmp_stats = FLOW_ACTION_HW_STATS_DONT_CARE; + struct flow_action_entry *a_in; + int i, j, num_actions, id; + struct flow_rule *a_rule; + int err = 0, offset = 0; + + num_actions = rules[CT_TYPE_PRE_CT]->action.num_entries + + rules[CT_TYPE_NFT]->action.num_entries + + rules[CT_TYPE_POST_CT]->action.num_entries; + + /* Add one action to make sure there is enough room to add an checksum action + * when do nat. + */ + a_rule = flow_rule_alloc(num_actions + 1); + if (!a_rule) + return -ENOMEM; + + /* Actions need a BASIC dissector. */ + a_rule->match = rules[CT_TYPE_PRE_CT]->match; + /* post_ct entry have one action at least. */ + if (rules[CT_TYPE_POST_CT]->action.num_entries != 0) { + tmp_stats = rules[CT_TYPE_POST_CT]->action.entries[0].hw_stats; + } + + /* Copy actions */ + for (j = 0; j < _CT_TYPE_MAX; j++) { + u32 csum_updated = 0; + u8 ip_proto = 0; + + if (flow_rule_match_key(rules[j], FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + /* ip_proto is the only field that is needed in later compile_action, + * needed to set the correct checksum flags. It doesn't really matter + * which input rule's ip_proto field we take as the earlier merge checks + * would have made sure that they don't conflict. We do not know which + * of the subflows would have the ip_proto filled in, so we need to iterate + * through the subflows and assign the proper subflow to a_rule + */ + flow_rule_match_basic(rules[j], &match); + if (match.mask->ip_proto) { + a_rule->match = rules[j]->match; + ip_proto = match.key->ip_proto; + } + } + + for (i = 0; i < rules[j]->action.num_entries; i++) { + a_in = &rules[j]->action.entries[i]; + id = a_in->id; + + /* Ignore CT related actions as these would already have + * been taken care of by previous checks, and we do not send + * any CT actions to the firmware. + */ + switch (id) { + case FLOW_ACTION_CT: + case FLOW_ACTION_GOTO: + case FLOW_ACTION_CT_METADATA: + continue; + default: + /* nft entry is generated by tc ct, which mangle action do not care + * the stats, inherit the post entry stats to meet the + * flow_action_hw_stats_check. + */ + if (j == CT_TYPE_NFT) { + if (a_in->hw_stats == FLOW_ACTION_HW_STATS_DONT_CARE) + a_in->hw_stats = tmp_stats; + nfp_fl_get_csum_flag(a_in, ip_proto, &csum_updated); + } + memcpy(&a_rule->action.entries[offset++], + a_in, sizeof(struct flow_action_entry)); + break; + } + } + /* nft entry have mangle action, but do not have checksum action when do NAT, + * hardware will automatically fix IPv4 and TCP/UDP checksum. so add an csum action + * to meet csum action check. + */ + if (csum_updated) { + struct flow_action_entry *csum_action; + + csum_action = &a_rule->action.entries[offset++]; + csum_action->id = FLOW_ACTION_CSUM; + csum_action->csum_flags = csum_updated; + csum_action->hw_stats = tmp_stats; + } + } + + /* Some actions would have been ignored, so update the num_entries field */ + a_rule->action.num_entries = offset; + err = nfp_flower_compile_action(priv->app, a_rule, netdev, flow_pay, NULL); + kfree(a_rule); + + return err; +} + +static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) +{ + enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE; + struct nfp_fl_ct_zone_entry *zt = m_entry->zt; + struct nfp_fl_key_ls key_layer, tmp_layer; + struct nfp_flower_priv *priv = zt->priv; + u16 key_map[_FLOW_PAY_LAYERS_MAX]; + struct nfp_fl_payload *flow_pay; + + struct flow_rule *rules[_CT_TYPE_MAX]; + u8 *key, *msk, *kdata, *mdata; + struct nfp_port *port = NULL; + struct net_device *netdev; + bool qinq_sup; + u32 port_id; + u16 offset; + int i, err; + + netdev = m_entry->netdev; + qinq_sup = !!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ); + + rules[CT_TYPE_PRE_CT] = m_entry->tc_m_parent->pre_ct_parent->rule; + rules[CT_TYPE_NFT] = m_entry->nft_parent->rule; + rules[CT_TYPE_POST_CT] = m_entry->tc_m_parent->post_ct_parent->rule; + + memset(&key_layer, 0, sizeof(struct nfp_fl_key_ls)); + memset(&key_map, 0, sizeof(key_map)); + + /* Calculate the resultant key layer and size for offload */ + for (i = 0; i < _CT_TYPE_MAX; i++) { + err = nfp_flower_calculate_key_layers(priv->app, + m_entry->netdev, + &tmp_layer, rules[i], + &tun_type, NULL); + if (err) + return err; + + key_layer.key_layer |= tmp_layer.key_layer; + key_layer.key_layer_two |= tmp_layer.key_layer_two; + } + key_layer.key_size = nfp_fl_calc_key_layers_sz(key_layer, key_map); + + flow_pay = nfp_flower_allocate_new(&key_layer); + if (!flow_pay) + return -ENOMEM; + + memset(flow_pay->unmasked_data, 0, key_layer.key_size); + memset(flow_pay->mask_data, 0, key_layer.key_size); + + kdata = flow_pay->unmasked_data; + mdata = flow_pay->mask_data; + + offset = key_map[FLOW_PAY_META_TCI]; + key = kdata + offset; + msk = mdata + offset; + nfp_flower_compile_meta((struct nfp_flower_meta_tci *)key, + (struct nfp_flower_meta_tci *)msk, + key_layer.key_layer); + + if (NFP_FLOWER_LAYER_EXT_META & key_layer.key_layer) { + offset = key_map[FLOW_PAY_EXT_META]; + key = kdata + offset; + msk = mdata + offset; + nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)key, + key_layer.key_layer_two); + nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)msk, + key_layer.key_layer_two); + } + + /* Using in_port from the -trk rule. The tc merge checks should already + * be checking that the ingress netdevs are the same + */ + port_id = nfp_flower_get_port_id_from_netdev(priv->app, netdev); + offset = key_map[FLOW_PAY_INPORT]; + key = kdata + offset; + msk = mdata + offset; + err = nfp_flower_compile_port((struct nfp_flower_in_port *)key, + port_id, false, tun_type, NULL); + if (err) + goto ct_offload_err; + err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk, + port_id, true, tun_type, NULL); + if (err) + goto ct_offload_err; + + /* This following part works on the assumption that previous checks has + * already filtered out flows that has different values for the different + * layers. Here we iterate through all three rules and merge their respective + * masked value(cared bits), basic method is: + * final_key = (r1_key & r1_mask) | (r2_key & r2_mask) | (r3_key & r3_mask) + * final_mask = r1_mask | r2_mask | r3_mask + * If none of the rules contains a match that is also fine, that simply means + * that the layer is not present. + */ + if (!qinq_sup) { + for (i = 0; i < _CT_TYPE_MAX; i++) { + offset = key_map[FLOW_PAY_META_TCI]; + key = kdata + offset; + msk = mdata + offset; + nfp_flower_compile_tci((struct nfp_flower_meta_tci *)key, + (struct nfp_flower_meta_tci *)msk, + rules[i]); + } + } + + if (NFP_FLOWER_LAYER_MAC & key_layer.key_layer) { + offset = key_map[FLOW_PAY_MAC_MPLS]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)key, + (struct nfp_flower_mac_mpls *)msk, + rules[i]); + err = nfp_flower_compile_mpls((struct nfp_flower_mac_mpls *)key, + (struct nfp_flower_mac_mpls *)msk, + rules[i], NULL); + if (err) + goto ct_offload_err; + } + } + + if (NFP_FLOWER_LAYER_IPV4 & key_layer.key_layer) { + offset = key_map[FLOW_PAY_IPV4]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)key, + (struct nfp_flower_ipv4 *)msk, + rules[i]); + } + } + + if (NFP_FLOWER_LAYER_IPV6 & key_layer.key_layer) { + offset = key_map[FLOW_PAY_IPV6]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)key, + (struct nfp_flower_ipv6 *)msk, + rules[i]); + } + } + + if (NFP_FLOWER_LAYER_TP & key_layer.key_layer) { + offset = key_map[FLOW_PAY_L4]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_tport((struct nfp_flower_tp_ports *)key, + (struct nfp_flower_tp_ports *)msk, + rules[i]); + } + } + + if (NFP_FLOWER_LAYER2_QINQ & key_layer.key_layer_two) { + offset = key_map[FLOW_PAY_QINQ]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_vlan((struct nfp_flower_vlan *)key, + (struct nfp_flower_vlan *)msk, + rules[i]); + } + } + + if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GRE) { + offset = key_map[FLOW_PAY_GRE]; + key = kdata + offset; + msk = mdata + offset; + if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { + struct nfp_flower_ipv6_gre_tun *gre_match; + struct nfp_ipv6_addr_entry *entry; + struct in6_addr *dst; + + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_ipv6_gre_tun((void *)key, + (void *)msk, rules[i]); + } + gre_match = (struct nfp_flower_ipv6_gre_tun *)key; + dst = &gre_match->ipv6.dst; + + entry = nfp_tunnel_add_ipv6_off(priv->app, dst); + if (!entry) { + err = -ENOMEM; + goto ct_offload_err; + } + + flow_pay->nfp_tun_ipv6 = entry; + } else { + __be32 dst; + + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_ipv4_gre_tun((void *)key, + (void *)msk, rules[i]); + } + dst = ((struct nfp_flower_ipv4_gre_tun *)key)->ipv4.dst; + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + flow_pay->nfp_tun_ipv4_addr = dst; + nfp_tunnel_add_ipv4_off(priv->app, dst); + } + } + + if (key_layer.key_layer & NFP_FLOWER_LAYER_VXLAN || + key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE) { + offset = key_map[FLOW_PAY_UDP_TUN]; + key = kdata + offset; + msk = mdata + offset; + if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { + struct nfp_flower_ipv6_udp_tun *udp_match; + struct nfp_ipv6_addr_entry *entry; + struct in6_addr *dst; + + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_ipv6_udp_tun((void *)key, + (void *)msk, rules[i]); + } + udp_match = (struct nfp_flower_ipv6_udp_tun *)key; + dst = &udp_match->ipv6.dst; + + entry = nfp_tunnel_add_ipv6_off(priv->app, dst); + if (!entry) { + err = -ENOMEM; + goto ct_offload_err; + } + + flow_pay->nfp_tun_ipv6 = entry; + } else { + __be32 dst; + + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_ipv4_udp_tun((void *)key, + (void *)msk, rules[i]); + } + dst = ((struct nfp_flower_ipv4_udp_tun *)key)->ipv4.dst; + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + flow_pay->nfp_tun_ipv4_addr = dst; + nfp_tunnel_add_ipv4_off(priv->app, dst); + } + + if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { + offset = key_map[FLOW_PAY_GENEVE_OPT]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) + nfp_flower_compile_geneve_opt(key, msk, rules[i]); + } + } + + /* Merge actions into flow_pay */ + err = nfp_fl_merge_actions_offload(rules, priv, netdev, flow_pay); + if (err) + goto ct_offload_err; + + /* Use the pointer address as the cookie, but set the last bit to 1. + * This is to avoid the 'is_merge_flow' check from detecting this as + * an already merged flow. This works since address alignment means + * that the last bit for pointer addresses will be 0. + */ + flow_pay->tc_flower_cookie = ((unsigned long)flow_pay) | 0x1; + err = nfp_compile_flow_metadata(priv->app, flow_pay->tc_flower_cookie, + flow_pay, netdev, NULL); + if (err) + goto ct_offload_err; + + if (nfp_netdev_is_nfp_repr(netdev)) + port = nfp_port_from_netdev(netdev); + + err = rhashtable_insert_fast(&priv->flow_table, &flow_pay->fl_node, + nfp_flower_table_params); + if (err) + goto ct_release_offload_meta_err; + + err = nfp_flower_xmit_flow(priv->app, flow_pay, + NFP_FLOWER_CMSG_TYPE_FLOW_ADD); + if (err) + goto ct_remove_rhash_err; + + m_entry->tc_flower_cookie = flow_pay->tc_flower_cookie; + m_entry->flow_pay = flow_pay; + + if (port) + port->tc_offload_cnt++; + + return err; + +ct_remove_rhash_err: + WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, + &flow_pay->fl_node, + nfp_flower_table_params)); +ct_release_offload_meta_err: + nfp_modify_flow_metadata(priv->app, flow_pay); +ct_offload_err: + if (flow_pay->nfp_tun_ipv4_addr) + nfp_tunnel_del_ipv4_off(priv->app, flow_pay->nfp_tun_ipv4_addr); + if (flow_pay->nfp_tun_ipv6) + nfp_tunnel_put_ipv6_off(priv->app, flow_pay->nfp_tun_ipv6); + kfree(flow_pay->action_data); + kfree(flow_pay->mask_data); + kfree(flow_pay->unmasked_data); + kfree(flow_pay); + return err; +} + +static int nfp_fl_ct_del_offload(struct nfp_app *app, unsigned long cookie, + struct net_device *netdev) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_payload *flow_pay; + struct nfp_port *port = NULL; + int err = 0; + + if (nfp_netdev_is_nfp_repr(netdev)) + port = nfp_port_from_netdev(netdev); + + flow_pay = nfp_flower_search_fl_table(app, cookie, netdev); + if (!flow_pay) + return -ENOENT; + + err = nfp_modify_flow_metadata(app, flow_pay); + if (err) + goto err_free_merge_flow; + + if (flow_pay->nfp_tun_ipv4_addr) + nfp_tunnel_del_ipv4_off(app, flow_pay->nfp_tun_ipv4_addr); + + if (flow_pay->nfp_tun_ipv6) + nfp_tunnel_put_ipv6_off(app, flow_pay->nfp_tun_ipv6); + + if (!flow_pay->in_hw) { + err = 0; + goto err_free_merge_flow; + } + + err = nfp_flower_xmit_flow(app, flow_pay, + NFP_FLOWER_CMSG_TYPE_FLOW_DEL); + +err_free_merge_flow: + nfp_flower_del_linked_merge_flows(app, flow_pay); + if (port) + port->tc_offload_cnt--; + kfree(flow_pay->action_data); + kfree(flow_pay->mask_data); + kfree(flow_pay->unmasked_data); + WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, + &flow_pay->fl_node, + nfp_flower_table_params)); + kfree_rcu(flow_pay, rcu); + return err; +} + +static int nfp_ct_do_nft_merge(struct nfp_fl_ct_zone_entry *zt, + struct nfp_fl_ct_flow_entry *nft_entry, + struct nfp_fl_ct_tc_merge *tc_m_entry) +{ + struct nfp_fl_ct_flow_entry *post_ct_entry, *pre_ct_entry; + struct nfp_fl_nft_tc_merge *nft_m_entry; + unsigned long new_cookie[3]; + int err; + + pre_ct_entry = tc_m_entry->pre_ct_parent; + post_ct_entry = tc_m_entry->post_ct_parent; + + err = nfp_ct_merge_act_check(pre_ct_entry, post_ct_entry, nft_entry); + if (err) + return err; + + /* Check that the two tc flows are also compatible with + * the nft entry. No need to check the pre_ct and post_ct + * entries as that was already done during pre_merge. + * The nft entry does not have a chain populated, so + * skip this check. + */ + err = nfp_ct_merge_check(pre_ct_entry, nft_entry); + if (err) + return err; + err = nfp_ct_merge_check(nft_entry, post_ct_entry); + if (err) + return err; + err = nfp_ct_check_meta(post_ct_entry, nft_entry); + if (err) + return err; + + /* Combine tc_merge and nft cookies for this cookie. */ + new_cookie[0] = tc_m_entry->cookie[0]; + new_cookie[1] = tc_m_entry->cookie[1]; + new_cookie[2] = nft_entry->cookie; + nft_m_entry = get_hashentry(&zt->nft_merge_tb, + &new_cookie, + nfp_nft_ct_merge_params, + sizeof(*nft_m_entry)); + + if (IS_ERR(nft_m_entry)) + return PTR_ERR(nft_m_entry); + + /* nft_m_entry already present, not merging again */ + if (!memcmp(&new_cookie, nft_m_entry->cookie, sizeof(new_cookie))) + return 0; + + memcpy(&nft_m_entry->cookie, &new_cookie, sizeof(new_cookie)); + nft_m_entry->zt = zt; + nft_m_entry->tc_m_parent = tc_m_entry; + nft_m_entry->nft_parent = nft_entry; + nft_m_entry->tc_flower_cookie = 0; + /* Copy the netdev from the pre_ct entry. When the tc_m_entry was created + * it only combined them if the netdevs were the same, so can use any of them. + */ + nft_m_entry->netdev = pre_ct_entry->netdev; + + /* Add this entry to the tc_m_list and nft_flow lists */ + list_add(&nft_m_entry->tc_merge_list, &tc_m_entry->children); + list_add(&nft_m_entry->nft_flow_list, &nft_entry->children); + + /* Generate offload structure and send to nfp */ + err = nfp_fl_ct_add_offload(nft_m_entry); + if (err) + goto err_nft_ct_offload; + + err = rhashtable_insert_fast(&zt->nft_merge_tb, &nft_m_entry->hash_node, + nfp_nft_ct_merge_params); + if (err) + goto err_nft_ct_merge_insert; + + zt->nft_merge_count++; + + return err; + +err_nft_ct_merge_insert: + nfp_fl_ct_del_offload(zt->priv->app, nft_m_entry->tc_flower_cookie, + nft_m_entry->netdev); +err_nft_ct_offload: + list_del(&nft_m_entry->tc_merge_list); + list_del(&nft_m_entry->nft_flow_list); + kfree(nft_m_entry); + return err; +} + +static int nfp_ct_do_tc_merge(struct nfp_fl_ct_zone_entry *zt, + struct nfp_fl_ct_flow_entry *ct_entry1, + struct nfp_fl_ct_flow_entry *ct_entry2) +{ + struct nfp_fl_ct_flow_entry *post_ct_entry, *pre_ct_entry; + struct nfp_fl_ct_flow_entry *nft_entry, *nft_tmp; + struct nfp_fl_ct_tc_merge *m_entry; + unsigned long new_cookie[2]; + int err; + + if (ct_entry1->type == CT_TYPE_PRE_CT) { + pre_ct_entry = ct_entry1; + post_ct_entry = ct_entry2; + } else { + post_ct_entry = ct_entry1; + pre_ct_entry = ct_entry2; + } + + /* Checks that the chain_index of the filter matches the + * chain_index of the GOTO action. + */ + if (post_ct_entry->chain_index != pre_ct_entry->chain_index) + return -EINVAL; + + err = nfp_ct_merge_check(pre_ct_entry, post_ct_entry); + if (err) + return err; + + new_cookie[0] = pre_ct_entry->cookie; + new_cookie[1] = post_ct_entry->cookie; + m_entry = get_hashentry(&zt->tc_merge_tb, &new_cookie, + nfp_tc_ct_merge_params, sizeof(*m_entry)); + if (IS_ERR(m_entry)) + return PTR_ERR(m_entry); + + /* m_entry already present, not merging again */ + if (!memcmp(&new_cookie, m_entry->cookie, sizeof(new_cookie))) + return 0; + + memcpy(&m_entry->cookie, &new_cookie, sizeof(new_cookie)); + m_entry->zt = zt; + m_entry->post_ct_parent = post_ct_entry; + m_entry->pre_ct_parent = pre_ct_entry; + + /* Add this entry to the pre_ct and post_ct lists */ + list_add(&m_entry->post_ct_list, &post_ct_entry->children); + list_add(&m_entry->pre_ct_list, &pre_ct_entry->children); + INIT_LIST_HEAD(&m_entry->children); + + err = rhashtable_insert_fast(&zt->tc_merge_tb, &m_entry->hash_node, + nfp_tc_ct_merge_params); + if (err) + goto err_ct_tc_merge_insert; + zt->tc_merge_count++; + + /* Merge with existing nft flows */ + list_for_each_entry_safe(nft_entry, nft_tmp, &zt->nft_flows_list, + list_node) { + nfp_ct_do_nft_merge(zt, nft_entry, m_entry); + } + + return 0; + +err_ct_tc_merge_insert: + list_del(&m_entry->post_ct_list); + list_del(&m_entry->pre_ct_list); + kfree(m_entry); + return err; +} + +static struct +nfp_fl_ct_zone_entry *get_nfp_zone_entry(struct nfp_flower_priv *priv, + u16 zone, bool wildcarded) +{ + struct nfp_fl_ct_zone_entry *zt; + int err; + + if (wildcarded && priv->ct_zone_wc) + return priv->ct_zone_wc; + + if (!wildcarded) { + zt = get_hashentry(&priv->ct_zone_table, &zone, + nfp_zone_table_params, sizeof(*zt)); + + /* If priv is set this is an existing entry, just return it */ + if (IS_ERR(zt) || zt->priv) + return zt; + } else { + zt = kzalloc(sizeof(*zt), GFP_KERNEL); + if (!zt) + return ERR_PTR(-ENOMEM); + } + + zt->zone = zone; + zt->priv = priv; + zt->nft = NULL; + + /* init the various hash tables and lists */ + INIT_LIST_HEAD(&zt->pre_ct_list); + INIT_LIST_HEAD(&zt->post_ct_list); + INIT_LIST_HEAD(&zt->nft_flows_list); + + err = rhashtable_init(&zt->tc_merge_tb, &nfp_tc_ct_merge_params); + if (err) + goto err_tc_merge_tb_init; + + err = rhashtable_init(&zt->nft_merge_tb, &nfp_nft_ct_merge_params); + if (err) + goto err_nft_merge_tb_init; + + if (wildcarded) { + priv->ct_zone_wc = zt; + } else { + err = rhashtable_insert_fast(&priv->ct_zone_table, + &zt->hash_node, + nfp_zone_table_params); + if (err) + goto err_zone_insert; + } + + return zt; + +err_zone_insert: + rhashtable_destroy(&zt->nft_merge_tb); +err_nft_merge_tb_init: + rhashtable_destroy(&zt->tc_merge_tb); +err_tc_merge_tb_init: + kfree(zt); + return ERR_PTR(err); +} + +static struct net_device *get_netdev_from_rule(struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) { + struct flow_match_meta match; + + flow_rule_match_meta(rule, &match); + if (match.key->ingress_ifindex & match.mask->ingress_ifindex) + return __dev_get_by_index(&init_net, + match.key->ingress_ifindex); + } + + return NULL; +} + +static void nfp_nft_ct_translate_mangle_action(struct flow_action_entry *mangle_action) +{ + if (mangle_action->id != FLOW_ACTION_MANGLE) + return; + + switch (mangle_action->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + case FLOW_ACT_MANGLE_HDR_TYPE_IP6: + mangle_action->mangle.val = (__force u32)cpu_to_be32(mangle_action->mangle.val); + mangle_action->mangle.mask = (__force u32)cpu_to_be32(mangle_action->mangle.mask); + return; + + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + mangle_action->mangle.val = (__force u16)cpu_to_be16(mangle_action->mangle.val); + mangle_action->mangle.mask = (__force u16)cpu_to_be16(mangle_action->mangle.mask); + return; + + default: + return; + } +} + +static int nfp_nft_ct_set_flow_flag(struct flow_action_entry *act, + struct nfp_fl_ct_flow_entry *entry) +{ + switch (act->id) { + case FLOW_ACTION_CT: + if (act->ct.action == TCA_CT_ACT_NAT) + entry->flags |= NFP_FL_ACTION_DO_NAT; + break; + + case FLOW_ACTION_MANGLE: + entry->flags |= NFP_FL_ACTION_DO_MANGLE; + break; + + default: + break; + } + + return 0; +} + +static struct +nfp_fl_ct_flow_entry *nfp_fl_ct_add_flow(struct nfp_fl_ct_zone_entry *zt, + struct net_device *netdev, + struct flow_cls_offload *flow, + bool is_nft, struct netlink_ext_ack *extack) +{ + struct nf_flow_match *nft_match = NULL; + struct nfp_fl_ct_flow_entry *entry; + struct nfp_fl_ct_map_entry *map; + struct flow_action_entry *act; + int err, i; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + + entry->rule = flow_rule_alloc(flow->rule->action.num_entries); + if (!entry->rule) { + err = -ENOMEM; + goto err_pre_ct_rule; + } + + /* nft flows gets destroyed after callback return, so need + * to do a full copy instead of just a reference. + */ + if (is_nft) { + nft_match = kzalloc(sizeof(*nft_match), GFP_KERNEL); + if (!nft_match) { + err = -ENOMEM; + goto err_pre_ct_act; + } + memcpy(&nft_match->dissector, flow->rule->match.dissector, + sizeof(nft_match->dissector)); + memcpy(&nft_match->mask, flow->rule->match.mask, + sizeof(nft_match->mask)); + memcpy(&nft_match->key, flow->rule->match.key, + sizeof(nft_match->key)); + entry->rule->match.dissector = &nft_match->dissector; + entry->rule->match.mask = &nft_match->mask; + entry->rule->match.key = &nft_match->key; + + if (!netdev) + netdev = get_netdev_from_rule(entry->rule); + } else { + entry->rule->match.dissector = flow->rule->match.dissector; + entry->rule->match.mask = flow->rule->match.mask; + entry->rule->match.key = flow->rule->match.key; + } + + entry->zt = zt; + entry->netdev = netdev; + entry->cookie = flow->cookie; + entry->chain_index = flow->common.chain_index; + entry->tun_offset = NFP_FL_CT_NO_TUN; + + /* Copy over action data. Unfortunately we do not get a handle to the + * original tcf_action data, and the flow objects gets destroyed, so we + * cannot just save a pointer to this either, so need to copy over the + * data unfortunately. + */ + entry->rule->action.num_entries = flow->rule->action.num_entries; + flow_action_for_each(i, act, &flow->rule->action) { + struct flow_action_entry *new_act; + + new_act = &entry->rule->action.entries[i]; + memcpy(new_act, act, sizeof(struct flow_action_entry)); + /* nft entry mangle field is host byte order, need translate to + * network byte order. + */ + if (is_nft) + nfp_nft_ct_translate_mangle_action(new_act); + + nfp_nft_ct_set_flow_flag(new_act, entry); + /* Entunnel is a special case, need to allocate and copy + * tunnel info. + */ + if (act->id == FLOW_ACTION_TUNNEL_ENCAP) { + struct ip_tunnel_info *tun = act->tunnel; + size_t tun_size = sizeof(*tun) + tun->options_len; + + new_act->tunnel = kmemdup(tun, tun_size, GFP_ATOMIC); + if (!new_act->tunnel) { + err = -ENOMEM; + goto err_pre_ct_tun_cp; + } + entry->tun_offset = i; + } + } + + INIT_LIST_HEAD(&entry->children); + + /* Now add a ct map entry to flower-priv */ + map = get_hashentry(&zt->priv->ct_map_table, &flow->cookie, + nfp_ct_map_params, sizeof(*map)); + if (IS_ERR(map)) { + NL_SET_ERR_MSG_MOD(extack, + "offload error: ct map entry creation failed"); + err = -ENOMEM; + goto err_ct_flow_insert; + } + map->cookie = flow->cookie; + map->ct_entry = entry; + err = rhashtable_insert_fast(&zt->priv->ct_map_table, + &map->hash_node, + nfp_ct_map_params); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "offload error: ct map entry table add failed"); + goto err_map_insert; + } + + return entry; + +err_map_insert: + kfree(map); +err_ct_flow_insert: + if (entry->tun_offset != NFP_FL_CT_NO_TUN) + kfree(entry->rule->action.entries[entry->tun_offset].tunnel); +err_pre_ct_tun_cp: + kfree(nft_match); +err_pre_ct_act: + kfree(entry->rule); +err_pre_ct_rule: + kfree(entry); + return ERR_PTR(err); +} + +static void cleanup_nft_merge_entry(struct nfp_fl_nft_tc_merge *m_entry) +{ + struct nfp_fl_ct_zone_entry *zt; + int err; + + zt = m_entry->zt; + + /* Flow is in HW, need to delete */ + if (m_entry->tc_flower_cookie) { + err = nfp_fl_ct_del_offload(zt->priv->app, m_entry->tc_flower_cookie, + m_entry->netdev); + if (err) + return; + } + + WARN_ON_ONCE(rhashtable_remove_fast(&zt->nft_merge_tb, + &m_entry->hash_node, + nfp_nft_ct_merge_params)); + zt->nft_merge_count--; + list_del(&m_entry->tc_merge_list); + list_del(&m_entry->nft_flow_list); + + kfree(m_entry); +} + +static void nfp_free_nft_merge_children(void *entry, bool is_nft_flow) +{ + struct nfp_fl_nft_tc_merge *m_entry, *tmp; + + /* These post entries are parts of two lists, one is a list of nft_entries + * and the other is of from a list of tc_merge structures. Iterate + * through the relevant list and cleanup the entries. + */ + + if (is_nft_flow) { + /* Need to iterate through list of nft_flow entries */ + struct nfp_fl_ct_flow_entry *ct_entry = entry; + + list_for_each_entry_safe(m_entry, tmp, &ct_entry->children, + nft_flow_list) { + cleanup_nft_merge_entry(m_entry); + } + } else { + /* Need to iterate through list of tc_merged_flow entries */ + struct nfp_fl_ct_tc_merge *ct_entry = entry; + + list_for_each_entry_safe(m_entry, tmp, &ct_entry->children, + tc_merge_list) { + cleanup_nft_merge_entry(m_entry); + } + } +} + +static void nfp_del_tc_merge_entry(struct nfp_fl_ct_tc_merge *m_ent) +{ + struct nfp_fl_ct_zone_entry *zt; + int err; + + zt = m_ent->zt; + err = rhashtable_remove_fast(&zt->tc_merge_tb, + &m_ent->hash_node, + nfp_tc_ct_merge_params); + if (err) + pr_warn("WARNING: could not remove merge_entry from hashtable\n"); + zt->tc_merge_count--; + list_del(&m_ent->post_ct_list); + list_del(&m_ent->pre_ct_list); + + if (!list_empty(&m_ent->children)) + nfp_free_nft_merge_children(m_ent, false); + kfree(m_ent); +} + +static void nfp_free_tc_merge_children(struct nfp_fl_ct_flow_entry *entry) +{ + struct nfp_fl_ct_tc_merge *m_ent, *tmp; + + switch (entry->type) { + case CT_TYPE_PRE_CT: + list_for_each_entry_safe(m_ent, tmp, &entry->children, pre_ct_list) { + nfp_del_tc_merge_entry(m_ent); + } + break; + case CT_TYPE_POST_CT: + list_for_each_entry_safe(m_ent, tmp, &entry->children, post_ct_list) { + nfp_del_tc_merge_entry(m_ent); + } + break; + default: + break; + } +} + +void nfp_fl_ct_clean_flow_entry(struct nfp_fl_ct_flow_entry *entry) +{ + list_del(&entry->list_node); + + if (!list_empty(&entry->children)) { + if (entry->type == CT_TYPE_NFT) + nfp_free_nft_merge_children(entry, true); + else + nfp_free_tc_merge_children(entry); + } + + if (entry->tun_offset != NFP_FL_CT_NO_TUN) + kfree(entry->rule->action.entries[entry->tun_offset].tunnel); + + if (entry->type == CT_TYPE_NFT) { + struct nf_flow_match *nft_match; + + nft_match = container_of(entry->rule->match.dissector, + struct nf_flow_match, dissector); + kfree(nft_match); + } + + kfree(entry->rule); + kfree(entry); +} + +static struct flow_action_entry *get_flow_act(struct flow_rule *rule, + enum flow_action_id act_id) +{ + struct flow_action_entry *act = NULL; + int i; + + flow_action_for_each(i, act, &rule->action) { + if (act->id == act_id) + return act; + } + return NULL; +} + +static void +nfp_ct_merge_tc_entries(struct nfp_fl_ct_flow_entry *ct_entry1, + struct nfp_fl_ct_zone_entry *zt_src, + struct nfp_fl_ct_zone_entry *zt_dst) +{ + struct nfp_fl_ct_flow_entry *ct_entry2, *ct_tmp; + struct list_head *ct_list; + + if (ct_entry1->type == CT_TYPE_PRE_CT) + ct_list = &zt_src->post_ct_list; + else if (ct_entry1->type == CT_TYPE_POST_CT) + ct_list = &zt_src->pre_ct_list; + else + return; + + list_for_each_entry_safe(ct_entry2, ct_tmp, ct_list, + list_node) { + nfp_ct_do_tc_merge(zt_dst, ct_entry2, ct_entry1); + } +} + +static void +nfp_ct_merge_nft_with_tc(struct nfp_fl_ct_flow_entry *nft_entry, + struct nfp_fl_ct_zone_entry *zt) +{ + struct nfp_fl_ct_tc_merge *tc_merge_entry; + struct rhashtable_iter iter; + + rhashtable_walk_enter(&zt->tc_merge_tb, &iter); + rhashtable_walk_start(&iter); + while ((tc_merge_entry = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(tc_merge_entry)) + continue; + rhashtable_walk_stop(&iter); + nfp_ct_do_nft_merge(zt, nft_entry, tc_merge_entry); + rhashtable_walk_start(&iter); + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); +} + +int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv, + struct net_device *netdev, + struct flow_cls_offload *flow, + struct netlink_ext_ack *extack) +{ + struct flow_action_entry *ct_act, *ct_goto; + struct nfp_fl_ct_flow_entry *ct_entry; + struct nfp_fl_ct_zone_entry *zt; + int err; + + ct_act = get_flow_act(flow->rule, FLOW_ACTION_CT); + if (!ct_act) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: Conntrack action empty in conntrack offload"); + return -EOPNOTSUPP; + } + + ct_goto = get_flow_act(flow->rule, FLOW_ACTION_GOTO); + if (!ct_goto) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: Conntrack requires ACTION_GOTO"); + return -EOPNOTSUPP; + } + + zt = get_nfp_zone_entry(priv, ct_act->ct.zone, false); + if (IS_ERR(zt)) { + NL_SET_ERR_MSG_MOD(extack, + "offload error: Could not create zone table entry"); + return PTR_ERR(zt); + } + + if (!zt->nft) { + zt->nft = ct_act->ct.flow_table; + err = nf_flow_table_offload_add_cb(zt->nft, nfp_fl_ct_handle_nft_flow, zt); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "offload error: Could not register nft_callback"); + return err; + } + } + + /* Add entry to pre_ct_list */ + ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow, false, extack); + if (IS_ERR(ct_entry)) + return PTR_ERR(ct_entry); + ct_entry->type = CT_TYPE_PRE_CT; + ct_entry->chain_index = ct_goto->chain_index; + list_add(&ct_entry->list_node, &zt->pre_ct_list); + zt->pre_ct_count++; + + nfp_ct_merge_tc_entries(ct_entry, zt, zt); + + /* Need to check and merge with tables in the wc_zone as well */ + if (priv->ct_zone_wc) + nfp_ct_merge_tc_entries(ct_entry, priv->ct_zone_wc, zt); + + return 0; +} + +int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, + struct net_device *netdev, + struct flow_cls_offload *flow, + struct netlink_ext_ack *extack) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); + struct nfp_fl_ct_flow_entry *ct_entry; + struct nfp_fl_ct_zone_entry *zt; + bool wildcarded = false; + struct flow_match_ct ct; + + flow_rule_match_ct(rule, &ct); + if (!ct.mask->ct_zone) { + wildcarded = true; + } else if (ct.mask->ct_zone != U16_MAX) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: partially wildcarded ct_zone is not supported"); + return -EOPNOTSUPP; + } + + zt = get_nfp_zone_entry(priv, ct.key->ct_zone, wildcarded); + if (IS_ERR(zt)) { + NL_SET_ERR_MSG_MOD(extack, + "offload error: Could not create zone table entry"); + return PTR_ERR(zt); + } + + /* Add entry to post_ct_list */ + ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow, false, extack); + if (IS_ERR(ct_entry)) + return PTR_ERR(ct_entry); + + ct_entry->type = CT_TYPE_POST_CT; + ct_entry->chain_index = flow->common.chain_index; + list_add(&ct_entry->list_node, &zt->post_ct_list); + zt->post_ct_count++; + + if (wildcarded) { + /* Iterate through all zone tables if not empty, look for merges with + * pre_ct entries and merge them. + */ + struct rhashtable_iter iter; + struct nfp_fl_ct_zone_entry *zone_table; + + rhashtable_walk_enter(&priv->ct_zone_table, &iter); + rhashtable_walk_start(&iter); + while ((zone_table = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(zone_table)) + continue; + rhashtable_walk_stop(&iter); + nfp_ct_merge_tc_entries(ct_entry, zone_table, zone_table); + rhashtable_walk_start(&iter); + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + } else { + nfp_ct_merge_tc_entries(ct_entry, zt, zt); + } + + return 0; +} + +static void +nfp_fl_ct_sub_stats(struct nfp_fl_nft_tc_merge *nft_merge, + enum ct_entry_type type, u64 *m_pkts, + u64 *m_bytes, u64 *m_used) +{ + struct nfp_flower_priv *priv = nft_merge->zt->priv; + struct nfp_fl_payload *nfp_flow; + u32 ctx_id; + + nfp_flow = nft_merge->flow_pay; + if (!nfp_flow) + return; + + ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id); + *m_pkts += priv->stats[ctx_id].pkts; + *m_bytes += priv->stats[ctx_id].bytes; + *m_used = max_t(u64, *m_used, priv->stats[ctx_id].used); + + /* If request is for a sub_flow which is part of a tunnel merged + * flow then update stats from tunnel merged flows first. + */ + if (!list_empty(&nfp_flow->linked_flows)) + nfp_flower_update_merge_stats(priv->app, nfp_flow); + + if (type != CT_TYPE_NFT) { + /* Update nft cached stats */ + flow_stats_update(&nft_merge->nft_parent->stats, + priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, + 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + } else { + /* Update pre_ct cached stats */ + flow_stats_update(&nft_merge->tc_m_parent->pre_ct_parent->stats, + priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, + 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + /* Update post_ct cached stats */ + flow_stats_update(&nft_merge->tc_m_parent->post_ct_parent->stats, + priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, + 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + } + /* Reset stats from the nfp */ + priv->stats[ctx_id].pkts = 0; + priv->stats[ctx_id].bytes = 0; +} + +int nfp_fl_ct_stats(struct flow_cls_offload *flow, + struct nfp_fl_ct_map_entry *ct_map_ent) +{ + struct nfp_fl_ct_flow_entry *ct_entry = ct_map_ent->ct_entry; + struct nfp_fl_nft_tc_merge *nft_merge, *nft_m_tmp; + struct nfp_fl_ct_tc_merge *tc_merge, *tc_m_tmp; + + u64 pkts = 0, bytes = 0, used = 0; + u64 m_pkts, m_bytes, m_used; + + spin_lock_bh(&ct_entry->zt->priv->stats_lock); + + if (ct_entry->type == CT_TYPE_PRE_CT) { + /* Iterate tc_merge entries associated with this flow */ + list_for_each_entry_safe(tc_merge, tc_m_tmp, &ct_entry->children, + pre_ct_list) { + m_pkts = 0; + m_bytes = 0; + m_used = 0; + /* Iterate nft_merge entries associated with this tc_merge flow */ + list_for_each_entry_safe(nft_merge, nft_m_tmp, &tc_merge->children, + tc_merge_list) { + nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_PRE_CT, + &m_pkts, &m_bytes, &m_used); + } + pkts += m_pkts; + bytes += m_bytes; + used = max_t(u64, used, m_used); + /* Update post_ct partner */ + flow_stats_update(&tc_merge->post_ct_parent->stats, + m_bytes, m_pkts, 0, m_used, + FLOW_ACTION_HW_STATS_DELAYED); + } + } else if (ct_entry->type == CT_TYPE_POST_CT) { + /* Iterate tc_merge entries associated with this flow */ + list_for_each_entry_safe(tc_merge, tc_m_tmp, &ct_entry->children, + post_ct_list) { + m_pkts = 0; + m_bytes = 0; + m_used = 0; + /* Iterate nft_merge entries associated with this tc_merge flow */ + list_for_each_entry_safe(nft_merge, nft_m_tmp, &tc_merge->children, + tc_merge_list) { + nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_POST_CT, + &m_pkts, &m_bytes, &m_used); + } + pkts += m_pkts; + bytes += m_bytes; + used = max_t(u64, used, m_used); + /* Update pre_ct partner */ + flow_stats_update(&tc_merge->pre_ct_parent->stats, + m_bytes, m_pkts, 0, m_used, + FLOW_ACTION_HW_STATS_DELAYED); + } + } else { + /* Iterate nft_merge entries associated with this nft flow */ + list_for_each_entry_safe(nft_merge, nft_m_tmp, &ct_entry->children, + nft_flow_list) { + nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_NFT, + &pkts, &bytes, &used); + } + } + + /* Add stats from this request to stats potentially cached by + * previous requests. + */ + flow_stats_update(&ct_entry->stats, bytes, pkts, 0, used, + FLOW_ACTION_HW_STATS_DELAYED); + /* Finally update the flow stats from the original stats request */ + flow_stats_update(&flow->stats, ct_entry->stats.bytes, + ct_entry->stats.pkts, 0, + ct_entry->stats.lastused, + FLOW_ACTION_HW_STATS_DELAYED); + /* Stats has been synced to original flow, can now clear + * the cache. + */ + ct_entry->stats.pkts = 0; + ct_entry->stats.bytes = 0; + spin_unlock_bh(&ct_entry->zt->priv->stats_lock); + + return 0; +} + +static int +nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offload *flow) +{ + struct nfp_fl_ct_map_entry *ct_map_ent; + struct nfp_fl_ct_flow_entry *ct_entry; + struct netlink_ext_ack *extack = NULL; + + extack = flow->common.extack; + switch (flow->command) { + case FLOW_CLS_REPLACE: + /* Netfilter can request offload multiple times for the same + * flow - protect against adding duplicates. + */ + ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie, + nfp_ct_map_params); + if (!ct_map_ent) { + ct_entry = nfp_fl_ct_add_flow(zt, NULL, flow, true, extack); + if (IS_ERR(ct_entry)) + return PTR_ERR(ct_entry); + ct_entry->type = CT_TYPE_NFT; + list_add(&ct_entry->list_node, &zt->nft_flows_list); + zt->nft_flows_count++; + nfp_ct_merge_nft_with_tc(ct_entry, zt); + } + return 0; + case FLOW_CLS_DESTROY: + ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie, + nfp_ct_map_params); + return nfp_fl_ct_del_flow(ct_map_ent); + case FLOW_CLS_STATS: + ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie, + nfp_ct_map_params); + if (ct_map_ent) + return nfp_fl_ct_stats(flow, ct_map_ent); + break; + default: + break; + } + return -EINVAL; +} + +int nfp_fl_ct_handle_nft_flow(enum tc_setup_type type, void *type_data, void *cb_priv) +{ + struct flow_cls_offload *flow = type_data; + struct nfp_fl_ct_zone_entry *zt = cb_priv; + int err = -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + while (!mutex_trylock(&zt->priv->nfp_fl_lock)) { + if (!zt->nft) /* avoid deadlock */ + return err; + msleep(20); + } + err = nfp_fl_ct_offload_nft_flow(zt, flow); + mutex_unlock(&zt->priv->nfp_fl_lock); + break; + default: + return -EOPNOTSUPP; + } + return err; +} + +static void +nfp_fl_ct_clean_nft_entries(struct nfp_fl_ct_zone_entry *zt) +{ + struct nfp_fl_ct_flow_entry *nft_entry, *ct_tmp; + struct nfp_fl_ct_map_entry *ct_map_ent; + + list_for_each_entry_safe(nft_entry, ct_tmp, &zt->nft_flows_list, + list_node) { + ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, + &nft_entry->cookie, + nfp_ct_map_params); + nfp_fl_ct_del_flow(ct_map_ent); + } +} + +int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent) +{ + struct nfp_fl_ct_flow_entry *ct_entry; + struct nfp_fl_ct_zone_entry *zt; + struct rhashtable *m_table; + struct nf_flowtable *nft; + + if (!ct_map_ent) + return -ENOENT; + + zt = ct_map_ent->ct_entry->zt; + ct_entry = ct_map_ent->ct_entry; + m_table = &zt->priv->ct_map_table; + + switch (ct_entry->type) { + case CT_TYPE_PRE_CT: + zt->pre_ct_count--; + rhashtable_remove_fast(m_table, &ct_map_ent->hash_node, + nfp_ct_map_params); + nfp_fl_ct_clean_flow_entry(ct_entry); + kfree(ct_map_ent); + + if (!zt->pre_ct_count && zt->nft) { + nft = zt->nft; + zt->nft = NULL; /* avoid deadlock */ + nf_flow_table_offload_del_cb(nft, + nfp_fl_ct_handle_nft_flow, + zt); + nfp_fl_ct_clean_nft_entries(zt); + } + break; + case CT_TYPE_POST_CT: + zt->post_ct_count--; + rhashtable_remove_fast(m_table, &ct_map_ent->hash_node, + nfp_ct_map_params); + nfp_fl_ct_clean_flow_entry(ct_entry); + kfree(ct_map_ent); + break; + case CT_TYPE_NFT: + zt->nft_flows_count--; + rhashtable_remove_fast(m_table, &ct_map_ent->hash_node, + nfp_ct_map_params); + nfp_fl_ct_clean_flow_entry(ct_map_ent->ct_entry); + kfree(ct_map_ent); + break; + default: + break; + } + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h new file mode 100644 index 000000000..762c0b36e --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h @@ -0,0 +1,263 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2021 Corigine, Inc. */ + +#ifndef __NFP_FLOWER_CONNTRACK_H__ +#define __NFP_FLOWER_CONNTRACK_H__ 1 + +#include <net/netfilter/nf_flow_table.h> +#include "main.h" + +#define NFP_FL_CT_NO_TUN 0xff + +#define COMPARE_UNMASKED_FIELDS(__match1, __match2, __out) \ + do { \ + typeof(__match1) _match1 = (__match1); \ + typeof(__match2) _match2 = (__match2); \ + bool *_out = (__out); \ + int i, size = sizeof(*(_match1).key); \ + char *k1, *m1, *k2, *m2; \ + *_out = false; \ + k1 = (char *)_match1.key; \ + m1 = (char *)_match1.mask; \ + k2 = (char *)_match2.key; \ + m2 = (char *)_match2.mask; \ + for (i = 0; i < size; i++) \ + if ((k1[i] & m1[i] & m2[i]) ^ \ + (k2[i] & m1[i] & m2[i])) { \ + *_out = true; \ + break; \ + } \ + } while (0) \ + +extern const struct rhashtable_params nfp_zone_table_params; +extern const struct rhashtable_params nfp_ct_map_params; +extern const struct rhashtable_params nfp_tc_ct_merge_params; +extern const struct rhashtable_params nfp_nft_ct_merge_params; + +/** + * struct nfp_fl_ct_zone_entry - Zone entry containing conntrack flow information + * @zone: The zone number, used as lookup key in hashtable + * @hash_node: Used by the hashtable + * @priv: Pointer to nfp_flower_priv data + * @nft: Pointer to nf_flowtable for this zone + * + * @pre_ct_list: The pre_ct_list of nfp_fl_ct_flow_entry entries + * @pre_ct_count: Keep count of the number of pre_ct entries + * + * @post_ct_list: The post_ct_list of nfp_fl_ct_flow_entry entries + * @post_ct_count: Keep count of the number of post_ct entries + * + * @tc_merge_tb: The table of merged tc flows + * @tc_merge_count: Keep count of the number of merged tc entries + * + * @nft_flows_list: The list of nft relatednfp_fl_ct_flow_entry entries + * @nft_flows_count: Keep count of the number of nft_flow entries + * + * @nft_merge_tb: The table of merged tc+nft flows + * @nft_merge_count: Keep count of the number of merged tc+nft entries + */ +struct nfp_fl_ct_zone_entry { + u16 zone; + struct rhash_head hash_node; + + struct nfp_flower_priv *priv; + struct nf_flowtable *nft; + + struct list_head pre_ct_list; + unsigned int pre_ct_count; + + struct list_head post_ct_list; + unsigned int post_ct_count; + + struct rhashtable tc_merge_tb; + unsigned int tc_merge_count; + + struct list_head nft_flows_list; + unsigned int nft_flows_count; + + struct rhashtable nft_merge_tb; + unsigned int nft_merge_count; +}; + +enum ct_entry_type { + CT_TYPE_PRE_CT, + CT_TYPE_NFT, + CT_TYPE_POST_CT, + _CT_TYPE_MAX, +}; + +enum nfp_nfp_layer_name { + FLOW_PAY_META_TCI = 0, + FLOW_PAY_INPORT, + FLOW_PAY_EXT_META, + FLOW_PAY_MAC_MPLS, + FLOW_PAY_L4, + FLOW_PAY_IPV4, + FLOW_PAY_IPV6, + FLOW_PAY_CT, + FLOW_PAY_GRE, + FLOW_PAY_QINQ, + FLOW_PAY_UDP_TUN, + FLOW_PAY_GENEVE_OPT, + + _FLOW_PAY_LAYERS_MAX +}; + +/* NFP flow entry flags. */ +#define NFP_FL_ACTION_DO_NAT BIT(0) +#define NFP_FL_ACTION_DO_MANGLE BIT(1) + +/** + * struct nfp_fl_ct_flow_entry - Flow entry containing conntrack flow information + * @cookie: Flow cookie, same as original TC flow, used as key + * @list_node: Used by the list + * @chain_index: Chain index of the original flow + * @netdev: netdev structure. + * @type: Type of pre-entry from enum ct_entry_type + * @zt: Reference to the zone table this belongs to + * @children: List of tc_merge flows this flow forms part of + * @rule: Reference to the original TC flow rule + * @stats: Used to cache stats for updating + * @tun_offset: Used to indicate tunnel action offset in action list + * @flags: Used to indicate flow flag like NAT which used by merge. + */ +struct nfp_fl_ct_flow_entry { + unsigned long cookie; + struct list_head list_node; + u32 chain_index; + enum ct_entry_type type; + struct net_device *netdev; + struct nfp_fl_ct_zone_entry *zt; + struct list_head children; + struct flow_rule *rule; + struct flow_stats stats; + u8 tun_offset; // Set to NFP_FL_CT_NO_TUN if no tun + u8 flags; +}; + +/** + * struct nfp_fl_ct_tc_merge - Merge of two flows from tc + * @cookie: Flow cookie, combination of pre and post ct cookies + * @hash_node: Used by the hashtable + * @pre_ct_list: This entry is part of a pre_ct_list + * @post_ct_list: This entry is part of a post_ct_list + * @zt: Reference to the zone table this belongs to + * @pre_ct_parent: The pre_ct_parent + * @post_ct_parent: The post_ct_parent + * @children: List of nft merged entries + */ +struct nfp_fl_ct_tc_merge { + unsigned long cookie[2]; + struct rhash_head hash_node; + struct list_head pre_ct_list; + struct list_head post_ct_list; + struct nfp_fl_ct_zone_entry *zt; + struct nfp_fl_ct_flow_entry *pre_ct_parent; + struct nfp_fl_ct_flow_entry *post_ct_parent; + struct list_head children; +}; + +/** + * struct nfp_fl_nft_tc_merge - Merge of tc_merge flows with nft flow + * @netdev: Ingress netdev name + * @cookie: Flow cookie, combination of tc_merge and nft cookies + * @hash_node: Used by the hashtable + * @zt: Reference to the zone table this belongs to + * @nft_flow_list: This entry is part of a nft_flows_list + * @tc_merge_list: This entry is part of a ct_merge_list + * @tc_m_parent: The tc_merge parent + * @nft_parent: The nft_entry parent + * @tc_flower_cookie: The cookie of the flow offloaded to the nfp + * @flow_pay: Reference to the offloaded flow struct + */ +struct nfp_fl_nft_tc_merge { + struct net_device *netdev; + unsigned long cookie[3]; + struct rhash_head hash_node; + struct nfp_fl_ct_zone_entry *zt; + struct list_head nft_flow_list; + struct list_head tc_merge_list; + struct nfp_fl_ct_tc_merge *tc_m_parent; + struct nfp_fl_ct_flow_entry *nft_parent; + unsigned long tc_flower_cookie; + struct nfp_fl_payload *flow_pay; +}; + +/** + * struct nfp_fl_ct_map_entry - Map between flow cookie and specific ct_flow + * @cookie: Flow cookie, same as original TC flow, used as key + * @hash_node: Used by the hashtable + * @ct_entry: Pointer to corresponding ct_entry + */ +struct nfp_fl_ct_map_entry { + unsigned long cookie; + struct rhash_head hash_node; + struct nfp_fl_ct_flow_entry *ct_entry; +}; + +bool is_pre_ct_flow(struct flow_cls_offload *flow); +bool is_post_ct_flow(struct flow_cls_offload *flow); + +/** + * nfp_fl_ct_handle_pre_ct() - Handles -trk conntrack rules + * @priv: Pointer to app priv + * @netdev: netdev structure. + * @flow: TC flower classifier offload structure. + * @extack: Extack pointer for errors + * + * Adds a new entry to the relevant zone table and tries to + * merge with other +trk+est entries and offload if possible. + * + * Return: negative value on error, 0 if configured successfully. + */ +int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv, + struct net_device *netdev, + struct flow_cls_offload *flow, + struct netlink_ext_ack *extack); +/** + * nfp_fl_ct_handle_post_ct() - Handles +trk+est conntrack rules + * @priv: Pointer to app priv + * @netdev: netdev structure. + * @flow: TC flower classifier offload structure. + * @extack: Extack pointer for errors + * + * Adds a new entry to the relevant zone table and tries to + * merge with other -trk entries and offload if possible. + * + * Return: negative value on error, 0 if configured successfully. + */ +int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, + struct net_device *netdev, + struct flow_cls_offload *flow, + struct netlink_ext_ack *extack); + +/** + * nfp_fl_ct_clean_flow_entry() - Free a nfp_fl_ct_flow_entry + * @entry: Flow entry to cleanup + */ +void nfp_fl_ct_clean_flow_entry(struct nfp_fl_ct_flow_entry *entry); + +/** + * nfp_fl_ct_del_flow() - Handle flow_del callbacks for conntrack + * @ct_map_ent: ct map entry for the flow that needs deleting + */ +int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent); + +/** + * nfp_fl_ct_handle_nft_flow() - Handle flower flow callbacks for nft table + * @type: Type provided by callback + * @type_data: Callback data + * @cb_priv: Pointer to data provided when registering the callback, in this + * case it's the zone table. + */ +int nfp_fl_ct_handle_nft_flow(enum tc_setup_type type, void *type_data, + void *cb_priv); + +/** + * nfp_fl_ct_stats() - Handle flower stats callbacks for ct flows + * @flow: TC flower classifier offload structure. + * @ct_map_ent: ct map entry for the flow that needs deleting + */ +int nfp_fl_ct_stats(struct flow_cls_offload *flow, + struct nfp_fl_ct_map_entry *ct_map_ent); +#endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c new file mode 100644 index 000000000..e92860e20 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c @@ -0,0 +1,693 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2018 Netronome Systems, Inc. */ + +#include "main.h" + +/* LAG group config flags. */ +#define NFP_FL_LAG_LAST BIT(1) +#define NFP_FL_LAG_FIRST BIT(2) +#define NFP_FL_LAG_DATA BIT(3) +#define NFP_FL_LAG_XON BIT(4) +#define NFP_FL_LAG_SYNC BIT(5) +#define NFP_FL_LAG_SWITCH BIT(6) +#define NFP_FL_LAG_RESET BIT(7) + +/* LAG port state flags. */ +#define NFP_PORT_LAG_LINK_UP BIT(0) +#define NFP_PORT_LAG_TX_ENABLED BIT(1) +#define NFP_PORT_LAG_CHANGED BIT(2) + +enum nfp_fl_lag_batch { + NFP_FL_LAG_BATCH_FIRST, + NFP_FL_LAG_BATCH_MEMBER, + NFP_FL_LAG_BATCH_FINISHED +}; + +/** + * struct nfp_flower_cmsg_lag_config - control message payload for LAG config + * @ctrl_flags: Configuration flags + * @reserved: Reserved for future use + * @ttl: Time to live of packet - host always sets to 0xff + * @pkt_number: Config message packet number - increment for each message + * @batch_ver: Batch version of messages - increment for each batch of messages + * @group_id: Group ID applicable + * @group_inst: Group instance number - increment when group is reused + * @members: Array of 32-bit words listing all active group members + */ +struct nfp_flower_cmsg_lag_config { + u8 ctrl_flags; + u8 reserved[2]; + u8 ttl; + __be32 pkt_number; + __be32 batch_ver; + __be32 group_id; + __be32 group_inst; + __be32 members[]; +}; + +/** + * struct nfp_fl_lag_group - list entry for each LAG group + * @group_id: Assigned group ID for host/kernel sync + * @group_inst: Group instance in case of ID reuse + * @list: List entry + * @master_ndev: Group master Netdev + * @dirty: Marked if the group needs synced to HW + * @offloaded: Marked if the group is currently offloaded to NIC + * @to_remove: Marked if the group should be removed from NIC + * @to_destroy: Marked if the group should be removed from driver + * @slave_cnt: Number of slaves in group + */ +struct nfp_fl_lag_group { + unsigned int group_id; + u8 group_inst; + struct list_head list; + struct net_device *master_ndev; + bool dirty; + bool offloaded; + bool to_remove; + bool to_destroy; + unsigned int slave_cnt; +}; + +#define NFP_FL_LAG_PKT_NUMBER_MASK GENMASK(30, 0) +#define NFP_FL_LAG_VERSION_MASK GENMASK(22, 0) +#define NFP_FL_LAG_HOST_TTL 0xff + +/* Use this ID with zero members to ack a batch config */ +#define NFP_FL_LAG_SYNC_ID 0 +#define NFP_FL_LAG_GROUP_MIN 1 /* ID 0 reserved */ +#define NFP_FL_LAG_GROUP_MAX 32 /* IDs 1 to 31 are valid */ + +/* wait for more config */ +#define NFP_FL_LAG_DELAY (msecs_to_jiffies(2)) + +#define NFP_FL_LAG_RETRANS_LIMIT 100 /* max retrans cmsgs to store */ + +static unsigned int nfp_fl_get_next_pkt_number(struct nfp_fl_lag *lag) +{ + lag->pkt_num++; + lag->pkt_num &= NFP_FL_LAG_PKT_NUMBER_MASK; + + return lag->pkt_num; +} + +static void nfp_fl_increment_version(struct nfp_fl_lag *lag) +{ + /* LSB is not considered by firmware so add 2 for each increment. */ + lag->batch_ver += 2; + lag->batch_ver &= NFP_FL_LAG_VERSION_MASK; + + /* Zero is reserved by firmware. */ + if (!lag->batch_ver) + lag->batch_ver += 2; +} + +static struct nfp_fl_lag_group * +nfp_fl_lag_group_create(struct nfp_fl_lag *lag, struct net_device *master) +{ + struct nfp_fl_lag_group *group; + struct nfp_flower_priv *priv; + int id; + + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + + id = ida_simple_get(&lag->ida_handle, NFP_FL_LAG_GROUP_MIN, + NFP_FL_LAG_GROUP_MAX, GFP_KERNEL); + if (id < 0) { + nfp_flower_cmsg_warn(priv->app, + "No more bonding groups available\n"); + return ERR_PTR(id); + } + + group = kmalloc(sizeof(*group), GFP_KERNEL); + if (!group) { + ida_simple_remove(&lag->ida_handle, id); + return ERR_PTR(-ENOMEM); + } + + group->group_id = id; + group->master_ndev = master; + group->dirty = true; + group->offloaded = false; + group->to_remove = false; + group->to_destroy = false; + group->slave_cnt = 0; + group->group_inst = ++lag->global_inst; + list_add_tail(&group->list, &lag->group_list); + + return group; +} + +static struct nfp_fl_lag_group * +nfp_fl_lag_find_group_for_master_with_lag(struct nfp_fl_lag *lag, + struct net_device *master) +{ + struct nfp_fl_lag_group *entry; + + if (!master) + return NULL; + + list_for_each_entry(entry, &lag->group_list, list) + if (entry->master_ndev == master) + return entry; + + return NULL; +} + +int nfp_flower_lag_populate_pre_action(struct nfp_app *app, + struct net_device *master, + struct nfp_fl_pre_lag *pre_act, + struct netlink_ext_ack *extack) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_lag_group *group = NULL; + __be32 temp_vers; + + mutex_lock(&priv->nfp_lag.lock); + group = nfp_fl_lag_find_group_for_master_with_lag(&priv->nfp_lag, + master); + if (!group) { + mutex_unlock(&priv->nfp_lag.lock); + NL_SET_ERR_MSG_MOD(extack, "invalid entry: group does not exist for LAG action"); + return -ENOENT; + } + + pre_act->group_id = cpu_to_be16(group->group_id); + temp_vers = cpu_to_be32(priv->nfp_lag.batch_ver << + NFP_FL_PRE_LAG_VER_OFF); + memcpy(pre_act->lag_version, &temp_vers, 3); + pre_act->instance = group->group_inst; + mutex_unlock(&priv->nfp_lag.lock); + + return 0; +} + +int nfp_flower_lag_get_output_id(struct nfp_app *app, struct net_device *master) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_lag_group *group = NULL; + int group_id = -ENOENT; + + mutex_lock(&priv->nfp_lag.lock); + group = nfp_fl_lag_find_group_for_master_with_lag(&priv->nfp_lag, + master); + if (group) + group_id = group->group_id; + mutex_unlock(&priv->nfp_lag.lock); + + return group_id; +} + +static int +nfp_fl_lag_config_group(struct nfp_fl_lag *lag, struct nfp_fl_lag_group *group, + struct net_device **active_members, + unsigned int member_cnt, enum nfp_fl_lag_batch *batch) +{ + struct nfp_flower_cmsg_lag_config *cmsg_payload; + struct nfp_flower_priv *priv; + unsigned long int flags; + unsigned int size, i; + struct sk_buff *skb; + + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + size = sizeof(*cmsg_payload) + sizeof(__be32) * member_cnt; + skb = nfp_flower_cmsg_alloc(priv->app, size, + NFP_FLOWER_CMSG_TYPE_LAG_CONFIG, + GFP_KERNEL); + if (!skb) + return -ENOMEM; + + cmsg_payload = nfp_flower_cmsg_get_data(skb); + flags = 0; + + /* Increment batch version for each new batch of config messages. */ + if (*batch == NFP_FL_LAG_BATCH_FIRST) { + flags |= NFP_FL_LAG_FIRST; + nfp_fl_increment_version(lag); + *batch = NFP_FL_LAG_BATCH_MEMBER; + } + + /* If it is a reset msg then it is also the end of the batch. */ + if (lag->rst_cfg) { + flags |= NFP_FL_LAG_RESET; + *batch = NFP_FL_LAG_BATCH_FINISHED; + } + + /* To signal the end of a batch, both the switch and last flags are set + * and the reserved SYNC group ID is used. + */ + if (*batch == NFP_FL_LAG_BATCH_FINISHED) { + flags |= NFP_FL_LAG_SWITCH | NFP_FL_LAG_LAST; + lag->rst_cfg = false; + cmsg_payload->group_id = cpu_to_be32(NFP_FL_LAG_SYNC_ID); + cmsg_payload->group_inst = 0; + } else { + cmsg_payload->group_id = cpu_to_be32(group->group_id); + cmsg_payload->group_inst = cpu_to_be32(group->group_inst); + } + + cmsg_payload->reserved[0] = 0; + cmsg_payload->reserved[1] = 0; + cmsg_payload->ttl = NFP_FL_LAG_HOST_TTL; + cmsg_payload->ctrl_flags = flags; + cmsg_payload->batch_ver = cpu_to_be32(lag->batch_ver); + cmsg_payload->pkt_number = cpu_to_be32(nfp_fl_get_next_pkt_number(lag)); + + for (i = 0; i < member_cnt; i++) + cmsg_payload->members[i] = + cpu_to_be32(nfp_repr_get_port_id(active_members[i])); + + nfp_ctrl_tx(priv->app->ctrl, skb); + return 0; +} + +static void nfp_fl_lag_do_work(struct work_struct *work) +{ + enum nfp_fl_lag_batch batch = NFP_FL_LAG_BATCH_FIRST; + struct nfp_fl_lag_group *entry, *storage; + struct delayed_work *delayed_work; + struct nfp_flower_priv *priv; + struct nfp_fl_lag *lag; + int err; + + delayed_work = to_delayed_work(work); + lag = container_of(delayed_work, struct nfp_fl_lag, work); + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + + mutex_lock(&lag->lock); + list_for_each_entry_safe(entry, storage, &lag->group_list, list) { + struct net_device *iter_netdev, **acti_netdevs; + struct nfp_flower_repr_priv *repr_priv; + int active_count = 0, slaves = 0; + struct nfp_repr *repr; + unsigned long *flags; + + if (entry->to_remove) { + /* Active count of 0 deletes group on hw. */ + err = nfp_fl_lag_config_group(lag, entry, NULL, 0, + &batch); + if (!err) { + entry->to_remove = false; + entry->offloaded = false; + } else { + nfp_flower_cmsg_warn(priv->app, + "group delete failed\n"); + schedule_delayed_work(&lag->work, + NFP_FL_LAG_DELAY); + continue; + } + + if (entry->to_destroy) { + ida_simple_remove(&lag->ida_handle, + entry->group_id); + list_del(&entry->list); + kfree(entry); + } + continue; + } + + acti_netdevs = kmalloc_array(entry->slave_cnt, + sizeof(*acti_netdevs), GFP_KERNEL); + + /* Include sanity check in the loop. It may be that a bond has + * changed between processing the last notification and the + * work queue triggering. If the number of slaves has changed + * or it now contains netdevs that cannot be offloaded, ignore + * the group until pending notifications are processed. + */ + rcu_read_lock(); + for_each_netdev_in_bond_rcu(entry->master_ndev, iter_netdev) { + if (!nfp_netdev_is_nfp_repr(iter_netdev)) { + slaves = 0; + break; + } + + repr = netdev_priv(iter_netdev); + + if (repr->app != priv->app) { + slaves = 0; + break; + } + + slaves++; + if (slaves > entry->slave_cnt) + break; + + /* Check the ports for state changes. */ + repr_priv = repr->app_priv; + flags = &repr_priv->lag_port_flags; + + if (*flags & NFP_PORT_LAG_CHANGED) { + *flags &= ~NFP_PORT_LAG_CHANGED; + entry->dirty = true; + } + + if ((*flags & NFP_PORT_LAG_TX_ENABLED) && + (*flags & NFP_PORT_LAG_LINK_UP)) + acti_netdevs[active_count++] = iter_netdev; + } + rcu_read_unlock(); + + if (slaves != entry->slave_cnt || !entry->dirty) { + kfree(acti_netdevs); + continue; + } + + err = nfp_fl_lag_config_group(lag, entry, acti_netdevs, + active_count, &batch); + if (!err) { + entry->offloaded = true; + entry->dirty = false; + } else { + nfp_flower_cmsg_warn(priv->app, + "group offload failed\n"); + schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); + } + + kfree(acti_netdevs); + } + + /* End the config batch if at least one packet has been batched. */ + if (batch == NFP_FL_LAG_BATCH_MEMBER) { + batch = NFP_FL_LAG_BATCH_FINISHED; + err = nfp_fl_lag_config_group(lag, NULL, NULL, 0, &batch); + if (err) + nfp_flower_cmsg_warn(priv->app, + "group batch end cmsg failed\n"); + } + + mutex_unlock(&lag->lock); +} + +static int +nfp_fl_lag_put_unprocessed(struct nfp_fl_lag *lag, struct sk_buff *skb) +{ + struct nfp_flower_cmsg_lag_config *cmsg_payload; + + cmsg_payload = nfp_flower_cmsg_get_data(skb); + if (be32_to_cpu(cmsg_payload->group_id) >= NFP_FL_LAG_GROUP_MAX) + return -EINVAL; + + /* Drop cmsg retrans if storage limit is exceeded to prevent + * overloading. If the fw notices that expected messages have not been + * received in a given time block, it will request a full resync. + */ + if (skb_queue_len(&lag->retrans_skbs) >= NFP_FL_LAG_RETRANS_LIMIT) + return -ENOSPC; + + __skb_queue_tail(&lag->retrans_skbs, skb); + + return 0; +} + +static void nfp_fl_send_unprocessed(struct nfp_fl_lag *lag) +{ + struct nfp_flower_priv *priv; + struct sk_buff *skb; + + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + + while ((skb = __skb_dequeue(&lag->retrans_skbs))) + nfp_ctrl_tx(priv->app->ctrl, skb); +} + +bool nfp_flower_lag_unprocessed_msg(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_flower_cmsg_lag_config *cmsg_payload; + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_lag_group *group_entry; + unsigned long int flags; + bool store_skb = false; + int err; + + cmsg_payload = nfp_flower_cmsg_get_data(skb); + flags = cmsg_payload->ctrl_flags; + + /* Note the intentional fall through below. If DATA and XON are both + * set, the message will stored and sent again with the rest of the + * unprocessed messages list. + */ + + /* Store */ + if (flags & NFP_FL_LAG_DATA) + if (!nfp_fl_lag_put_unprocessed(&priv->nfp_lag, skb)) + store_skb = true; + + /* Send stored */ + if (flags & NFP_FL_LAG_XON) + nfp_fl_send_unprocessed(&priv->nfp_lag); + + /* Resend all */ + if (flags & NFP_FL_LAG_SYNC) { + /* To resend all config: + * 1) Clear all unprocessed messages + * 2) Mark all groups dirty + * 3) Reset NFP group config + * 4) Schedule a LAG config update + */ + + __skb_queue_purge(&priv->nfp_lag.retrans_skbs); + + mutex_lock(&priv->nfp_lag.lock); + list_for_each_entry(group_entry, &priv->nfp_lag.group_list, + list) + group_entry->dirty = true; + + err = nfp_flower_lag_reset(&priv->nfp_lag); + if (err) + nfp_flower_cmsg_warn(priv->app, + "mem err in group reset msg\n"); + mutex_unlock(&priv->nfp_lag.lock); + + schedule_delayed_work(&priv->nfp_lag.work, 0); + } + + return store_skb; +} + +static void +nfp_fl_lag_schedule_group_remove(struct nfp_fl_lag *lag, + struct nfp_fl_lag_group *group) +{ + group->to_remove = true; + + schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); +} + +static void +nfp_fl_lag_schedule_group_delete(struct nfp_fl_lag *lag, + struct net_device *master) +{ + struct nfp_fl_lag_group *group; + struct nfp_flower_priv *priv; + + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + + if (!netif_is_bond_master(master)) + return; + + mutex_lock(&lag->lock); + group = nfp_fl_lag_find_group_for_master_with_lag(lag, master); + if (!group) { + mutex_unlock(&lag->lock); + nfp_warn(priv->app->cpp, "untracked bond got unregistered %s\n", + netdev_name(master)); + return; + } + + group->to_remove = true; + group->to_destroy = true; + mutex_unlock(&lag->lock); + + schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); +} + +static int +nfp_fl_lag_changeupper_event(struct nfp_fl_lag *lag, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *upper = info->upper_dev, *iter_netdev; + struct netdev_lag_upper_info *lag_upper_info; + struct nfp_fl_lag_group *group; + struct nfp_flower_priv *priv; + unsigned int slave_count = 0; + bool can_offload = true; + struct nfp_repr *repr; + + if (!netif_is_lag_master(upper)) + return 0; + + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + + rcu_read_lock(); + for_each_netdev_in_bond_rcu(upper, iter_netdev) { + if (!nfp_netdev_is_nfp_repr(iter_netdev)) { + can_offload = false; + break; + } + repr = netdev_priv(iter_netdev); + + /* Ensure all ports are created by the same app/on same card. */ + if (repr->app != priv->app) { + can_offload = false; + break; + } + + slave_count++; + } + rcu_read_unlock(); + + lag_upper_info = info->upper_info; + + /* Firmware supports active/backup and L3/L4 hash bonds. */ + if (lag_upper_info && + lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && + (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH || + (lag_upper_info->hash_type != NETDEV_LAG_HASH_L34 && + lag_upper_info->hash_type != NETDEV_LAG_HASH_E34 && + lag_upper_info->hash_type != NETDEV_LAG_HASH_UNKNOWN))) { + can_offload = false; + nfp_flower_cmsg_warn(priv->app, + "Unable to offload tx_type %u hash %u\n", + lag_upper_info->tx_type, + lag_upper_info->hash_type); + } + + mutex_lock(&lag->lock); + group = nfp_fl_lag_find_group_for_master_with_lag(lag, upper); + + if (slave_count == 0 || !can_offload) { + /* Cannot offload the group - remove if previously offloaded. */ + if (group && group->offloaded) + nfp_fl_lag_schedule_group_remove(lag, group); + + mutex_unlock(&lag->lock); + return 0; + } + + if (!group) { + group = nfp_fl_lag_group_create(lag, upper); + if (IS_ERR(group)) { + mutex_unlock(&lag->lock); + return PTR_ERR(group); + } + } + + group->dirty = true; + group->slave_cnt = slave_count; + + /* Group may have been on queue for removal but is now offloadable. */ + group->to_remove = false; + mutex_unlock(&lag->lock); + + schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); + return 0; +} + +static void +nfp_fl_lag_changels_event(struct nfp_fl_lag *lag, struct net_device *netdev, + struct netdev_notifier_changelowerstate_info *info) +{ + struct netdev_lag_lower_state_info *lag_lower_info; + struct nfp_flower_repr_priv *repr_priv; + struct nfp_flower_priv *priv; + struct nfp_repr *repr; + unsigned long *flags; + + if (!netif_is_lag_port(netdev) || !nfp_netdev_is_nfp_repr(netdev)) + return; + + lag_lower_info = info->lower_state_info; + if (!lag_lower_info) + return; + + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + repr = netdev_priv(netdev); + + /* Verify that the repr is associated with this app. */ + if (repr->app != priv->app) + return; + + repr_priv = repr->app_priv; + flags = &repr_priv->lag_port_flags; + + mutex_lock(&lag->lock); + if (lag_lower_info->link_up) + *flags |= NFP_PORT_LAG_LINK_UP; + else + *flags &= ~NFP_PORT_LAG_LINK_UP; + + if (lag_lower_info->tx_enabled) + *flags |= NFP_PORT_LAG_TX_ENABLED; + else + *flags &= ~NFP_PORT_LAG_TX_ENABLED; + + *flags |= NFP_PORT_LAG_CHANGED; + mutex_unlock(&lag->lock); + + schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); +} + +int nfp_flower_lag_netdev_event(struct nfp_flower_priv *priv, + struct net_device *netdev, + unsigned long event, void *ptr) +{ + struct nfp_fl_lag *lag = &priv->nfp_lag; + int err; + + switch (event) { + case NETDEV_CHANGEUPPER: + err = nfp_fl_lag_changeupper_event(lag, ptr); + if (err) + return NOTIFY_BAD; + return NOTIFY_OK; + case NETDEV_CHANGELOWERSTATE: + nfp_fl_lag_changels_event(lag, netdev, ptr); + return NOTIFY_OK; + case NETDEV_UNREGISTER: + nfp_fl_lag_schedule_group_delete(lag, netdev); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +int nfp_flower_lag_reset(struct nfp_fl_lag *lag) +{ + enum nfp_fl_lag_batch batch = NFP_FL_LAG_BATCH_FIRST; + + lag->rst_cfg = true; + return nfp_fl_lag_config_group(lag, NULL, NULL, 0, &batch); +} + +void nfp_flower_lag_init(struct nfp_fl_lag *lag) +{ + INIT_DELAYED_WORK(&lag->work, nfp_fl_lag_do_work); + INIT_LIST_HEAD(&lag->group_list); + mutex_init(&lag->lock); + ida_init(&lag->ida_handle); + + __skb_queue_head_init(&lag->retrans_skbs); + + /* 0 is a reserved batch version so increment to first valid value. */ + nfp_fl_increment_version(lag); +} + +void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag) +{ + struct nfp_fl_lag_group *entry, *storage; + + cancel_delayed_work_sync(&lag->work); + + __skb_queue_purge(&lag->retrans_skbs); + + /* Remove all groups. */ + mutex_lock(&lag->lock); + list_for_each_entry_safe(entry, storage, &lag->group_list, list) { + list_del(&entry->list); + kfree(entry); + } + mutex_unlock(&lag->lock); + mutex_destroy(&lag->lock); + ida_destroy(&lag->ida_handle); +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c new file mode 100644 index 000000000..4d960a964 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -0,0 +1,1023 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <linux/etherdevice.h> +#include <linux/lockdep.h> +#include <linux/pci.h> +#include <linux/skbuff.h> +#include <linux/vmalloc.h> +#include <net/devlink.h> +#include <net/dst_metadata.h> + +#include "main.h" +#include "../nfpcore/nfp_cpp.h" +#include "../nfpcore/nfp_nffw.h" +#include "../nfpcore/nfp_nsp.h" +#include "../nfp_app.h" +#include "../nfp_main.h" +#include "../nfp_net.h" +#include "../nfp_net_repr.h" +#include "../nfp_port.h" +#include "./cmsg.h" + +#define NFP_FLOWER_ALLOWED_VER 0x0001000000010000UL + +#define NFP_MIN_INT_PORT_ID 1 +#define NFP_MAX_INT_PORT_ID 256 + +static const char *nfp_flower_extra_cap(struct nfp_app *app, struct nfp_net *nn) +{ + return "FLOWER"; +} + +static enum devlink_eswitch_mode eswitch_mode_get(struct nfp_app *app) +{ + return DEVLINK_ESWITCH_MODE_SWITCHDEV; +} + +static int +nfp_flower_lookup_internal_port_id(struct nfp_flower_priv *priv, + struct net_device *netdev) +{ + struct net_device *entry; + int i, id = 0; + + rcu_read_lock(); + idr_for_each_entry(&priv->internal_ports.port_ids, entry, i) + if (entry == netdev) { + id = i; + break; + } + rcu_read_unlock(); + + return id; +} + +static int +nfp_flower_get_internal_port_id(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_flower_priv *priv = app->priv; + int id; + + id = nfp_flower_lookup_internal_port_id(priv, netdev); + if (id > 0) + return id; + + idr_preload(GFP_ATOMIC); + spin_lock_bh(&priv->internal_ports.lock); + id = idr_alloc(&priv->internal_ports.port_ids, netdev, + NFP_MIN_INT_PORT_ID, NFP_MAX_INT_PORT_ID, GFP_ATOMIC); + spin_unlock_bh(&priv->internal_ports.lock); + idr_preload_end(); + + return id; +} + +u32 nfp_flower_get_port_id_from_netdev(struct nfp_app *app, + struct net_device *netdev) +{ + int ext_port; + + if (nfp_netdev_is_nfp_repr(netdev)) { + return nfp_repr_get_port_id(netdev); + } else if (nfp_flower_internal_port_can_offload(app, netdev)) { + ext_port = nfp_flower_get_internal_port_id(app, netdev); + if (ext_port < 0) + return 0; + + return nfp_flower_internal_port_get_port_id(ext_port); + } + + return 0; +} + +static struct net_device * +nfp_flower_get_netdev_from_internal_port_id(struct nfp_app *app, int port_id) +{ + struct nfp_flower_priv *priv = app->priv; + struct net_device *netdev; + + rcu_read_lock(); + netdev = idr_find(&priv->internal_ports.port_ids, port_id); + rcu_read_unlock(); + + return netdev; +} + +static void +nfp_flower_free_internal_port_id(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_flower_priv *priv = app->priv; + int id; + + id = nfp_flower_lookup_internal_port_id(priv, netdev); + if (!id) + return; + + spin_lock_bh(&priv->internal_ports.lock); + idr_remove(&priv->internal_ports.port_ids, id); + spin_unlock_bh(&priv->internal_ports.lock); +} + +static int +nfp_flower_internal_port_event_handler(struct nfp_app *app, + struct net_device *netdev, + unsigned long event) +{ + if (event == NETDEV_UNREGISTER && + nfp_flower_internal_port_can_offload(app, netdev)) + nfp_flower_free_internal_port_id(app, netdev); + + return NOTIFY_OK; +} + +static void nfp_flower_internal_port_init(struct nfp_flower_priv *priv) +{ + spin_lock_init(&priv->internal_ports.lock); + idr_init(&priv->internal_ports.port_ids); +} + +static void nfp_flower_internal_port_cleanup(struct nfp_flower_priv *priv) +{ + idr_destroy(&priv->internal_ports.port_ids); +} + +static struct nfp_flower_non_repr_priv * +nfp_flower_non_repr_priv_lookup(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_flower_non_repr_priv *entry; + + ASSERT_RTNL(); + + list_for_each_entry(entry, &priv->non_repr_priv, list) + if (entry->netdev == netdev) + return entry; + + return NULL; +} + +void +__nfp_flower_non_repr_priv_get(struct nfp_flower_non_repr_priv *non_repr_priv) +{ + non_repr_priv->ref_count++; +} + +struct nfp_flower_non_repr_priv * +nfp_flower_non_repr_priv_get(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_flower_non_repr_priv *entry; + + entry = nfp_flower_non_repr_priv_lookup(app, netdev); + if (entry) + goto inc_ref; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return NULL; + + entry->netdev = netdev; + list_add(&entry->list, &priv->non_repr_priv); + +inc_ref: + __nfp_flower_non_repr_priv_get(entry); + return entry; +} + +void +__nfp_flower_non_repr_priv_put(struct nfp_flower_non_repr_priv *non_repr_priv) +{ + if (--non_repr_priv->ref_count) + return; + + list_del(&non_repr_priv->list); + kfree(non_repr_priv); +} + +void +nfp_flower_non_repr_priv_put(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_flower_non_repr_priv *entry; + + entry = nfp_flower_non_repr_priv_lookup(app, netdev); + if (!entry) + return; + + __nfp_flower_non_repr_priv_put(entry); +} + +static enum nfp_repr_type +nfp_flower_repr_get_type_and_port(struct nfp_app *app, u32 port_id, u8 *port) +{ + switch (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port_id)) { + case NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT: + *port = FIELD_GET(NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM, + port_id); + return NFP_REPR_TYPE_PHYS_PORT; + + case NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT: + *port = FIELD_GET(NFP_FLOWER_CMSG_PORT_VNIC, port_id); + if (FIELD_GET(NFP_FLOWER_CMSG_PORT_VNIC_TYPE, port_id) == + NFP_FLOWER_CMSG_PORT_VNIC_TYPE_PF) + return NFP_REPR_TYPE_PF; + else + return NFP_REPR_TYPE_VF; + } + + return __NFP_REPR_TYPE_MAX; +} + +static struct net_device * +nfp_flower_dev_get(struct nfp_app *app, u32 port_id, bool *redir_egress) +{ + enum nfp_repr_type repr_type; + struct nfp_reprs *reprs; + u8 port = 0; + + /* Check if the port is internal. */ + if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port_id) == + NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT) { + if (redir_egress) + *redir_egress = true; + port = FIELD_GET(NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM, port_id); + return nfp_flower_get_netdev_from_internal_port_id(app, port); + } + + repr_type = nfp_flower_repr_get_type_and_port(app, port_id, &port); + if (repr_type > NFP_REPR_TYPE_MAX) + return NULL; + + reprs = rcu_dereference(app->reprs[repr_type]); + if (!reprs) + return NULL; + + if (port >= reprs->num_reprs) + return NULL; + + return rcu_dereference(reprs->reprs[port]); +} + +static int +nfp_flower_reprs_reify(struct nfp_app *app, enum nfp_repr_type type, + bool exists) +{ + struct nfp_reprs *reprs; + int i, err, count = 0; + + reprs = rcu_dereference_protected(app->reprs[type], + nfp_app_is_locked(app)); + if (!reprs) + return 0; + + for (i = 0; i < reprs->num_reprs; i++) { + struct net_device *netdev; + + netdev = nfp_repr_get_locked(app, reprs, i); + if (netdev) { + struct nfp_repr *repr = netdev_priv(netdev); + + err = nfp_flower_cmsg_portreify(repr, exists); + if (err) + return err; + count++; + } + } + + return count; +} + +static int +nfp_flower_wait_repr_reify(struct nfp_app *app, atomic_t *replies, int tot_repl) +{ + struct nfp_flower_priv *priv = app->priv; + + if (!tot_repl) + return 0; + + assert_nfp_app_locked(app); + if (!wait_event_timeout(priv->reify_wait_queue, + atomic_read(replies) >= tot_repl, + NFP_FL_REPLY_TIMEOUT)) { + nfp_warn(app->cpp, "Not all reprs responded to reify\n"); + return -EIO; + } + + return 0; +} + +static int +nfp_flower_repr_netdev_open(struct nfp_app *app, struct nfp_repr *repr) +{ + int err; + + err = nfp_flower_cmsg_portmod(repr, true, repr->netdev->mtu, false); + if (err) + return err; + + netif_tx_wake_all_queues(repr->netdev); + + return 0; +} + +static int +nfp_flower_repr_netdev_stop(struct nfp_app *app, struct nfp_repr *repr) +{ + netif_tx_disable(repr->netdev); + + return nfp_flower_cmsg_portmod(repr, false, repr->netdev->mtu, false); +} + +static void +nfp_flower_repr_netdev_clean(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_repr *repr = netdev_priv(netdev); + + kfree(repr->app_priv); +} + +static void +nfp_flower_repr_netdev_preclean(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_repr *repr = netdev_priv(netdev); + struct nfp_flower_priv *priv = app->priv; + atomic_t *replies = &priv->reify_replies; + int err; + + atomic_set(replies, 0); + err = nfp_flower_cmsg_portreify(repr, false); + if (err) { + nfp_warn(app->cpp, "Failed to notify firmware about repr destruction\n"); + return; + } + + nfp_flower_wait_repr_reify(app, replies, 1); +} + +static void nfp_flower_sriov_disable(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + + if (!priv->nn) + return; + + nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_VF); +} + +static int +nfp_flower_spawn_vnic_reprs(struct nfp_app *app, + enum nfp_flower_cmsg_port_vnic_type vnic_type, + enum nfp_repr_type repr_type, unsigned int cnt) +{ + u8 nfp_pcie = nfp_cppcore_pcie_unit(app->pf->cpp); + struct nfp_flower_priv *priv = app->priv; + atomic_t *replies = &priv->reify_replies; + struct nfp_flower_repr_priv *repr_priv; + enum nfp_port_type port_type; + struct nfp_repr *nfp_repr; + struct nfp_reprs *reprs; + int i, err, reify_cnt; + const u8 queue = 0; + + port_type = repr_type == NFP_REPR_TYPE_PF ? NFP_PORT_PF_PORT : + NFP_PORT_VF_PORT; + + reprs = nfp_reprs_alloc(cnt); + if (!reprs) + return -ENOMEM; + + for (i = 0; i < cnt; i++) { + struct net_device *repr; + struct nfp_port *port; + u32 port_id; + + repr = nfp_repr_alloc(app); + if (!repr) { + err = -ENOMEM; + goto err_reprs_clean; + } + + repr_priv = kzalloc(sizeof(*repr_priv), GFP_KERNEL); + if (!repr_priv) { + err = -ENOMEM; + nfp_repr_free(repr); + goto err_reprs_clean; + } + + nfp_repr = netdev_priv(repr); + nfp_repr->app_priv = repr_priv; + repr_priv->nfp_repr = nfp_repr; + + /* For now we only support 1 PF */ + WARN_ON(repr_type == NFP_REPR_TYPE_PF && i); + + port = nfp_port_alloc(app, port_type, repr); + if (IS_ERR(port)) { + err = PTR_ERR(port); + kfree(repr_priv); + nfp_repr_free(repr); + goto err_reprs_clean; + } + if (repr_type == NFP_REPR_TYPE_PF) { + port->pf_id = i; + port->vnic = priv->nn->dp.ctrl_bar; + } else { + port->pf_id = 0; + port->vf_id = i; + port->vnic = + app->pf->vf_cfg_mem + i * NFP_NET_CFG_BAR_SZ; + } + + eth_hw_addr_random(repr); + + port_id = nfp_flower_cmsg_pcie_port(nfp_pcie, vnic_type, + i, queue); + err = nfp_repr_init(app, repr, + port_id, port, priv->nn->dp.netdev); + if (err) { + kfree(repr_priv); + nfp_port_free(port); + nfp_repr_free(repr); + goto err_reprs_clean; + } + + RCU_INIT_POINTER(reprs->reprs[i], repr); + nfp_info(app->cpp, "%s%d Representor(%s) created\n", + repr_type == NFP_REPR_TYPE_PF ? "PF" : "VF", i, + repr->name); + } + + nfp_app_reprs_set(app, repr_type, reprs); + + atomic_set(replies, 0); + reify_cnt = nfp_flower_reprs_reify(app, repr_type, true); + if (reify_cnt < 0) { + err = reify_cnt; + nfp_warn(app->cpp, "Failed to notify firmware about repr creation\n"); + goto err_reprs_remove; + } + + err = nfp_flower_wait_repr_reify(app, replies, reify_cnt); + if (err) + goto err_reprs_remove; + + return 0; +err_reprs_remove: + reprs = nfp_app_reprs_set(app, repr_type, NULL); +err_reprs_clean: + nfp_reprs_clean_and_free(app, reprs); + return err; +} + +static int nfp_flower_sriov_enable(struct nfp_app *app, int num_vfs) +{ + struct nfp_flower_priv *priv = app->priv; + + if (!priv->nn) + return 0; + + return nfp_flower_spawn_vnic_reprs(app, + NFP_FLOWER_CMSG_PORT_VNIC_TYPE_VF, + NFP_REPR_TYPE_VF, num_vfs); +} + +static int +nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv) +{ + struct nfp_eth_table *eth_tbl = app->pf->eth_tbl; + atomic_t *replies = &priv->reify_replies; + struct nfp_flower_repr_priv *repr_priv; + struct nfp_repr *nfp_repr; + struct sk_buff *ctrl_skb; + struct nfp_reprs *reprs; + int err, reify_cnt; + unsigned int i; + + ctrl_skb = nfp_flower_cmsg_mac_repr_start(app, eth_tbl->count); + if (!ctrl_skb) + return -ENOMEM; + + reprs = nfp_reprs_alloc(eth_tbl->max_index + 1); + if (!reprs) { + err = -ENOMEM; + goto err_free_ctrl_skb; + } + + for (i = 0; i < eth_tbl->count; i++) { + unsigned int phys_port = eth_tbl->ports[i].index; + struct net_device *repr; + struct nfp_port *port; + u32 cmsg_port_id; + + repr = nfp_repr_alloc(app); + if (!repr) { + err = -ENOMEM; + goto err_reprs_clean; + } + + repr_priv = kzalloc(sizeof(*repr_priv), GFP_KERNEL); + if (!repr_priv) { + err = -ENOMEM; + nfp_repr_free(repr); + goto err_reprs_clean; + } + + nfp_repr = netdev_priv(repr); + nfp_repr->app_priv = repr_priv; + repr_priv->nfp_repr = nfp_repr; + + port = nfp_port_alloc(app, NFP_PORT_PHYS_PORT, repr); + if (IS_ERR(port)) { + err = PTR_ERR(port); + kfree(repr_priv); + nfp_repr_free(repr); + goto err_reprs_clean; + } + err = nfp_port_init_phy_port(app->pf, app, port, i); + if (err) { + kfree(repr_priv); + nfp_port_free(port); + nfp_repr_free(repr); + goto err_reprs_clean; + } + + SET_NETDEV_DEV(repr, &priv->nn->pdev->dev); + nfp_net_get_mac_addr(app->pf, repr, port); + + cmsg_port_id = nfp_flower_cmsg_phys_port(phys_port); + err = nfp_repr_init(app, repr, + cmsg_port_id, port, priv->nn->dp.netdev); + if (err) { + kfree(repr_priv); + nfp_port_free(port); + nfp_repr_free(repr); + goto err_reprs_clean; + } + + nfp_flower_cmsg_mac_repr_add(ctrl_skb, i, + eth_tbl->ports[i].nbi, + eth_tbl->ports[i].base, + phys_port); + + RCU_INIT_POINTER(reprs->reprs[phys_port], repr); + nfp_info(app->cpp, "Phys Port %d Representor(%s) created\n", + phys_port, repr->name); + } + + nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, reprs); + + /* The REIFY/MAC_REPR control messages should be sent after the MAC + * representors are registered using nfp_app_reprs_set(). This is + * because the firmware may respond with control messages for the + * MAC representors, f.e. to provide the driver with information + * about their state, and without registration the driver will drop + * any such messages. + */ + atomic_set(replies, 0); + reify_cnt = nfp_flower_reprs_reify(app, NFP_REPR_TYPE_PHYS_PORT, true); + if (reify_cnt < 0) { + err = reify_cnt; + nfp_warn(app->cpp, "Failed to notify firmware about repr creation\n"); + goto err_reprs_remove; + } + + err = nfp_flower_wait_repr_reify(app, replies, reify_cnt); + if (err) + goto err_reprs_remove; + + nfp_ctrl_tx(app->ctrl, ctrl_skb); + + return 0; +err_reprs_remove: + reprs = nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, NULL); +err_reprs_clean: + nfp_reprs_clean_and_free(app, reprs); +err_free_ctrl_skb: + kfree_skb(ctrl_skb); + return err; +} + +static int nfp_flower_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, + unsigned int id) +{ + if (id > 0) { + nfp_warn(app->cpp, "FlowerNIC doesn't support more than one data vNIC\n"); + goto err_invalid_port; + } + + eth_hw_addr_random(nn->dp.netdev); + netif_keep_dst(nn->dp.netdev); + nn->vnic_no_name = true; + + return 0; + +err_invalid_port: + nn->port = nfp_port_alloc(app, NFP_PORT_INVALID, nn->dp.netdev); + return PTR_ERR_OR_ZERO(nn->port); +} + +static void nfp_flower_vnic_clean(struct nfp_app *app, struct nfp_net *nn) +{ + struct nfp_flower_priv *priv = app->priv; + + if (app->pf->num_vfs) + nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_VF); + nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PF); + nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT); + + priv->nn = NULL; +} + +static int nfp_flower_vnic_init(struct nfp_app *app, struct nfp_net *nn) +{ + struct nfp_flower_priv *priv = app->priv; + int err; + + priv->nn = nn; + + err = nfp_flower_spawn_phy_reprs(app, app->priv); + if (err) + goto err_clear_nn; + + err = nfp_flower_spawn_vnic_reprs(app, + NFP_FLOWER_CMSG_PORT_VNIC_TYPE_PF, + NFP_REPR_TYPE_PF, 1); + if (err) + goto err_destroy_reprs_phy; + + if (app->pf->num_vfs) { + err = nfp_flower_spawn_vnic_reprs(app, + NFP_FLOWER_CMSG_PORT_VNIC_TYPE_VF, + NFP_REPR_TYPE_VF, + app->pf->num_vfs); + if (err) + goto err_destroy_reprs_pf; + } + + return 0; + +err_destroy_reprs_pf: + nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PF); +err_destroy_reprs_phy: + nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT); +err_clear_nn: + priv->nn = NULL; + return err; +} + +static void nfp_flower_wait_host_bit(struct nfp_app *app) +{ + unsigned long err_at; + u64 feat; + int err; + + /* Wait for HOST_ACK flag bit to propagate */ + err_at = jiffies + msecs_to_jiffies(100); + do { + feat = nfp_rtsym_read_le(app->pf->rtbl, + "_abi_flower_combined_features_global", + &err); + if (time_is_before_eq_jiffies(err_at)) { + nfp_warn(app->cpp, + "HOST_ACK bit not propagated in FW.\n"); + break; + } + usleep_range(1000, 2000); + } while (!err && !(feat & NFP_FL_FEATS_HOST_ACK)); + + if (err) + nfp_warn(app->cpp, + "Could not read global features entry from FW\n"); +} + +static int nfp_flower_sync_feature_bits(struct nfp_app *app) +{ + struct nfp_flower_priv *app_priv = app->priv; + int err; + + /* Tell the firmware of the host supported features. */ + err = nfp_rtsym_write_le(app->pf->rtbl, "_abi_flower_host_mask", + app_priv->flower_ext_feats | + NFP_FL_FEATS_HOST_ACK); + if (!err) + nfp_flower_wait_host_bit(app); + else if (err != -ENOENT) + return err; + + /* Tell the firmware that the driver supports lag. */ + err = nfp_rtsym_write_le(app->pf->rtbl, + "_abi_flower_balance_sync_enable", 1); + if (!err) { + app_priv->flower_en_feats |= NFP_FL_ENABLE_LAG; + nfp_flower_lag_init(&app_priv->nfp_lag); + } else if (err == -ENOENT) { + nfp_warn(app->cpp, "LAG not supported by FW.\n"); + } else { + return err; + } + + if (app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MOD) { + /* Tell the firmware that the driver supports flow merging. */ + err = nfp_rtsym_write_le(app->pf->rtbl, + "_abi_flower_merge_hint_enable", 1); + if (!err) { + app_priv->flower_en_feats |= NFP_FL_ENABLE_FLOW_MERGE; + nfp_flower_internal_port_init(app_priv); + } else if (err == -ENOENT) { + nfp_warn(app->cpp, + "Flow merge not supported by FW.\n"); + } else { + return err; + } + } else { + nfp_warn(app->cpp, "Flow mod/merge not supported by FW.\n"); + } + + return 0; +} + +static int nfp_flower_init(struct nfp_app *app) +{ + u64 version, features, ctx_count, num_mems; + const struct nfp_pf *pf = app->pf; + struct nfp_flower_priv *app_priv; + int err; + + if (!pf->eth_tbl) { + nfp_warn(app->cpp, "FlowerNIC requires eth table\n"); + return -EINVAL; + } + + if (!pf->mac_stats_bar) { + nfp_warn(app->cpp, "FlowerNIC requires mac_stats BAR\n"); + return -EINVAL; + } + + if (!pf->vf_cfg_bar) { + nfp_warn(app->cpp, "FlowerNIC requires vf_cfg BAR\n"); + return -EINVAL; + } + + version = nfp_rtsym_read_le(app->pf->rtbl, "hw_flower_version", &err); + if (err) { + nfp_warn(app->cpp, "FlowerNIC requires hw_flower_version memory symbol\n"); + return err; + } + + num_mems = nfp_rtsym_read_le(app->pf->rtbl, "CONFIG_FC_HOST_CTX_SPLIT", + &err); + if (err) { + nfp_warn(app->cpp, + "FlowerNIC: unsupported host context memory: %d\n", + err); + err = 0; + num_mems = 1; + } + + if (!FIELD_FIT(NFP_FL_STAT_ID_MU_NUM, num_mems) || !num_mems) { + nfp_warn(app->cpp, + "FlowerNIC: invalid host context memory: %llu\n", + num_mems); + return -EINVAL; + } + + ctx_count = nfp_rtsym_read_le(app->pf->rtbl, "CONFIG_FC_HOST_CTX_COUNT", + &err); + if (err) { + nfp_warn(app->cpp, + "FlowerNIC: unsupported host context count: %d\n", + err); + err = 0; + ctx_count = BIT(17); + } + + /* We need to ensure hardware has enough flower capabilities. */ + if (version != NFP_FLOWER_ALLOWED_VER) { + nfp_warn(app->cpp, "FlowerNIC: unsupported firmware version\n"); + return -EINVAL; + } + + app_priv = vzalloc(sizeof(struct nfp_flower_priv)); + if (!app_priv) + return -ENOMEM; + + app_priv->total_mem_units = num_mems; + app_priv->active_mem_unit = 0; + app_priv->stats_ring_size = roundup_pow_of_two(ctx_count); + app->priv = app_priv; + app_priv->app = app; + skb_queue_head_init(&app_priv->cmsg_skbs_high); + skb_queue_head_init(&app_priv->cmsg_skbs_low); + INIT_WORK(&app_priv->cmsg_work, nfp_flower_cmsg_process_rx); + init_waitqueue_head(&app_priv->reify_wait_queue); + + init_waitqueue_head(&app_priv->mtu_conf.wait_q); + spin_lock_init(&app_priv->mtu_conf.lock); + + err = nfp_flower_metadata_init(app, ctx_count, num_mems); + if (err) + goto err_free_app_priv; + + /* Extract the extra features supported by the firmware. */ + features = nfp_rtsym_read_le(app->pf->rtbl, + "_abi_flower_extra_features", &err); + if (err) + app_priv->flower_ext_feats = 0; + else + app_priv->flower_ext_feats = features & NFP_FL_FEATS_HOST; + + err = nfp_flower_sync_feature_bits(app); + if (err) + goto err_cleanup; + + if (app_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM) + nfp_flower_qos_init(app); + + INIT_LIST_HEAD(&app_priv->indr_block_cb_priv); + INIT_LIST_HEAD(&app_priv->non_repr_priv); + app_priv->pre_tun_rule_cnt = 0; + + return 0; + +err_cleanup: + if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) + nfp_flower_lag_cleanup(&app_priv->nfp_lag); + nfp_flower_metadata_cleanup(app); +err_free_app_priv: + vfree(app->priv); + return err; +} + +static void nfp_flower_clean(struct nfp_app *app) +{ + struct nfp_flower_priv *app_priv = app->priv; + + skb_queue_purge(&app_priv->cmsg_skbs_high); + skb_queue_purge(&app_priv->cmsg_skbs_low); + flush_work(&app_priv->cmsg_work); + + if (app_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM) + nfp_flower_qos_cleanup(app); + + if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) + nfp_flower_lag_cleanup(&app_priv->nfp_lag); + + if (app_priv->flower_en_feats & NFP_FL_ENABLE_FLOW_MERGE) + nfp_flower_internal_port_cleanup(app_priv); + + nfp_flower_metadata_cleanup(app); + vfree(app->priv); + app->priv = NULL; +} + +static bool nfp_flower_check_ack(struct nfp_flower_priv *app_priv) +{ + bool ret; + + spin_lock_bh(&app_priv->mtu_conf.lock); + ret = app_priv->mtu_conf.ack; + spin_unlock_bh(&app_priv->mtu_conf.lock); + + return ret; +} + +static int +nfp_flower_repr_change_mtu(struct nfp_app *app, struct net_device *netdev, + int new_mtu) +{ + struct nfp_flower_priv *app_priv = app->priv; + struct nfp_repr *repr = netdev_priv(netdev); + int err; + + /* Only need to config FW for physical port MTU change. */ + if (repr->port->type != NFP_PORT_PHYS_PORT) + return 0; + + if (!(app_priv->flower_ext_feats & NFP_FL_NBI_MTU_SETTING)) { + nfp_err(app->cpp, "Physical port MTU setting not supported\n"); + return -EINVAL; + } + + spin_lock_bh(&app_priv->mtu_conf.lock); + app_priv->mtu_conf.ack = false; + app_priv->mtu_conf.requested_val = new_mtu; + app_priv->mtu_conf.portnum = repr->dst->u.port_info.port_id; + spin_unlock_bh(&app_priv->mtu_conf.lock); + + err = nfp_flower_cmsg_portmod(repr, netif_carrier_ok(netdev), new_mtu, + true); + if (err) { + spin_lock_bh(&app_priv->mtu_conf.lock); + app_priv->mtu_conf.requested_val = 0; + spin_unlock_bh(&app_priv->mtu_conf.lock); + return err; + } + + /* Wait for fw to ack the change. */ + if (!wait_event_timeout(app_priv->mtu_conf.wait_q, + nfp_flower_check_ack(app_priv), + NFP_FL_REPLY_TIMEOUT)) { + spin_lock_bh(&app_priv->mtu_conf.lock); + app_priv->mtu_conf.requested_val = 0; + spin_unlock_bh(&app_priv->mtu_conf.lock); + nfp_warn(app->cpp, "MTU change not verified with fw\n"); + return -EIO; + } + + return 0; +} + +static int nfp_flower_start(struct nfp_app *app) +{ + struct nfp_flower_priv *app_priv = app->priv; + int err; + + if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) { + err = nfp_flower_lag_reset(&app_priv->nfp_lag); + if (err) + return err; + } + + err = flow_indr_dev_register(nfp_flower_indr_setup_tc_cb, app); + if (err) + return err; + + err = nfp_tunnel_config_start(app); + if (err) + goto err_tunnel_config; + + return 0; + +err_tunnel_config: + flow_indr_dev_unregister(nfp_flower_indr_setup_tc_cb, app, + nfp_flower_setup_indr_tc_release); + return err; +} + +static void nfp_flower_stop(struct nfp_app *app) +{ + nfp_tunnel_config_stop(app); + + flow_indr_dev_unregister(nfp_flower_indr_setup_tc_cb, app, + nfp_flower_setup_indr_tc_release); +} + +static int +nfp_flower_netdev_event(struct nfp_app *app, struct net_device *netdev, + unsigned long event, void *ptr) +{ + struct nfp_flower_priv *app_priv = app->priv; + int ret; + + if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) { + ret = nfp_flower_lag_netdev_event(app_priv, netdev, event, ptr); + if (ret & NOTIFY_STOP_MASK) + return ret; + } + + ret = nfp_flower_internal_port_event_handler(app, netdev, event); + if (ret & NOTIFY_STOP_MASK) + return ret; + + return nfp_tunnel_mac_event_handler(app, netdev, event, ptr); +} + +const struct nfp_app_type app_flower = { + .id = NFP_APP_FLOWER_NIC, + .name = "flower", + + .ctrl_cap_mask = ~0U, + .ctrl_has_meta = true, + + .extra_cap = nfp_flower_extra_cap, + + .init = nfp_flower_init, + .clean = nfp_flower_clean, + + .repr_change_mtu = nfp_flower_repr_change_mtu, + + .vnic_alloc = nfp_flower_vnic_alloc, + .vnic_init = nfp_flower_vnic_init, + .vnic_clean = nfp_flower_vnic_clean, + + .repr_preclean = nfp_flower_repr_netdev_preclean, + .repr_clean = nfp_flower_repr_netdev_clean, + + .repr_open = nfp_flower_repr_netdev_open, + .repr_stop = nfp_flower_repr_netdev_stop, + + .start = nfp_flower_start, + .stop = nfp_flower_stop, + + .netdev_event = nfp_flower_netdev_event, + + .ctrl_msg_rx = nfp_flower_cmsg_rx, + + .sriov_enable = nfp_flower_sriov_enable, + .sriov_disable = nfp_flower_sriov_disable, + + .eswitch_mode_get = eswitch_mode_get, + .dev_get = nfp_flower_dev_get, + + .setup_tc = nfp_flower_setup_tc, +}; diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h new file mode 100644 index 000000000..d0ab71ce3 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -0,0 +1,713 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#ifndef __NFP_FLOWER_H__ +#define __NFP_FLOWER_H__ 1 + +#include "cmsg.h" +#include "../nfp_net.h" + +#include <linux/circ_buf.h> +#include <linux/hashtable.h> +#include <linux/rhashtable.h> +#include <linux/time64.h> +#include <linux/types.h> +#include <net/flow_offload.h> +#include <net/pkt_cls.h> +#include <net/pkt_sched.h> +#include <net/tcp.h> +#include <linux/workqueue.h> +#include <linux/idr.h> + +struct nfp_fl_pre_lag; +struct net_device; +struct nfp_app; + +#define NFP_FL_STAT_ID_MU_NUM GENMASK(31, 22) +#define NFP_FL_STAT_ID_STAT GENMASK(21, 0) + +#define NFP_FL_STATS_ELEM_RS sizeof_field(struct nfp_fl_stats_id, \ + init_unalloc) +#define NFP_FLOWER_MASK_ENTRY_RS 256 +#define NFP_FLOWER_MASK_ELEMENT_RS 1 +#define NFP_FLOWER_MASK_HASH_BITS 10 + +#define NFP_FLOWER_KEY_MAX_LW 32 + +#define NFP_FL_META_FLAG_MANAGE_MASK BIT(7) + +#define NFP_FL_MASK_REUSE_TIME_NS 40000 +#define NFP_FL_MASK_ID_LOCATION 1 + +/* Extra features bitmap. */ +#define NFP_FL_FEATS_GENEVE BIT(0) +#define NFP_FL_NBI_MTU_SETTING BIT(1) +#define NFP_FL_FEATS_GENEVE_OPT BIT(2) +#define NFP_FL_FEATS_VLAN_PCP BIT(3) +#define NFP_FL_FEATS_VF_RLIM BIT(4) +#define NFP_FL_FEATS_FLOW_MOD BIT(5) +#define NFP_FL_FEATS_PRE_TUN_RULES BIT(6) +#define NFP_FL_FEATS_IPV6_TUN BIT(7) +#define NFP_FL_FEATS_VLAN_QINQ BIT(8) +#define NFP_FL_FEATS_QOS_PPS BIT(9) +#define NFP_FL_FEATS_QOS_METER BIT(10) +#define NFP_FL_FEATS_DECAP_V2 BIT(11) +#define NFP_FL_FEATS_HOST_ACK BIT(31) + +#define NFP_FL_ENABLE_FLOW_MERGE BIT(0) +#define NFP_FL_ENABLE_LAG BIT(1) + +#define NFP_FL_FEATS_HOST \ + (NFP_FL_FEATS_GENEVE | \ + NFP_FL_NBI_MTU_SETTING | \ + NFP_FL_FEATS_GENEVE_OPT | \ + NFP_FL_FEATS_VLAN_PCP | \ + NFP_FL_FEATS_VF_RLIM | \ + NFP_FL_FEATS_FLOW_MOD | \ + NFP_FL_FEATS_PRE_TUN_RULES | \ + NFP_FL_FEATS_IPV6_TUN | \ + NFP_FL_FEATS_VLAN_QINQ | \ + NFP_FL_FEATS_QOS_PPS | \ + NFP_FL_FEATS_QOS_METER | \ + NFP_FL_FEATS_DECAP_V2) + +struct nfp_fl_mask_id { + struct circ_buf mask_id_free_list; + ktime_t *last_used; + u8 init_unallocated; +}; + +struct nfp_fl_stats_id { + struct circ_buf free_list; + u32 init_unalloc; + u8 repeated_em_count; +}; + +/** + * struct nfp_fl_tunnel_offloads - priv data for tunnel offloads + * @offloaded_macs: Hashtable of the offloaded MAC addresses + * @ipv4_off_list: List of IPv4 addresses to offload + * @ipv6_off_list: List of IPv6 addresses to offload + * @ipv4_off_lock: Lock for the IPv4 address list + * @ipv6_off_lock: Lock for the IPv6 address list + * @mac_off_ids: IDA to manage id assignment for offloaded MACs + * @neigh_nb: Notifier to monitor neighbour state + */ +struct nfp_fl_tunnel_offloads { + struct rhashtable offloaded_macs; + struct list_head ipv4_off_list; + struct list_head ipv6_off_list; + struct mutex ipv4_off_lock; + struct mutex ipv6_off_lock; + struct ida mac_off_ids; + struct notifier_block neigh_nb; +}; + +/** + * struct nfp_tun_neigh - basic neighbour data + * @dst_addr: Destination MAC address + * @src_addr: Source MAC address + * @port_id: NFP port to output packet on - associated with source IPv4 + */ +struct nfp_tun_neigh { + u8 dst_addr[ETH_ALEN]; + u8 src_addr[ETH_ALEN]; + __be32 port_id; +}; + +/** + * struct nfp_tun_neigh_ext - extended neighbour data + * @vlan_tpid: VLAN_TPID match field + * @vlan_tci: VLAN_TCI match field + * @host_ctx: Host context ID to be saved here + */ +struct nfp_tun_neigh_ext { + __be16 vlan_tpid; + __be16 vlan_tci; + __be32 host_ctx; +}; + +/** + * struct nfp_tun_neigh_v4 - neighbour/route entry on the NFP for IPv4 + * @dst_ipv4: Destination IPv4 address + * @src_ipv4: Source IPv4 address + * @common: Neighbour/route common info + * @ext: Neighbour/route extended info + */ +struct nfp_tun_neigh_v4 { + __be32 dst_ipv4; + __be32 src_ipv4; + struct nfp_tun_neigh common; + struct nfp_tun_neigh_ext ext; +}; + +/** + * struct nfp_tun_neigh_v6 - neighbour/route entry on the NFP for IPv6 + * @dst_ipv6: Destination IPv6 address + * @src_ipv6: Source IPv6 address + * @common: Neighbour/route common info + * @ext: Neighbour/route extended info + */ +struct nfp_tun_neigh_v6 { + struct in6_addr dst_ipv6; + struct in6_addr src_ipv6; + struct nfp_tun_neigh common; + struct nfp_tun_neigh_ext ext; +}; + +/** + * struct nfp_neigh_entry + * @neigh_cookie: Cookie for hashtable lookup + * @ht_node: rhash_head entry for hashtable + * @list_head: Needed as member of linked_nn_entries list + * @payload: The neighbour info payload + * @flow: Linked flow rule + * @is_ipv6: Flag to indicate if payload is ipv6 or ipv4 + */ +struct nfp_neigh_entry { + unsigned long neigh_cookie; + struct rhash_head ht_node; + struct list_head list_head; + char *payload; + struct nfp_predt_entry *flow; + bool is_ipv6; +}; + +/** + * struct nfp_predt_entry + * @list_head: List head to attach to predt_list + * @flow_pay: Direct link to flow_payload + * @nn_list: List of linked nfp_neigh_entries + */ +struct nfp_predt_entry { + struct list_head list_head; + struct nfp_fl_payload *flow_pay; + struct list_head nn_list; +}; + +/** + * struct nfp_mtu_conf - manage MTU setting + * @portnum: NFP port number of repr with requested MTU change + * @requested_val: MTU value requested for repr + * @ack: Received ack that MTU has been correctly set + * @wait_q: Wait queue for MTU acknowledgements + * @lock: Lock for setting/reading MTU variables + */ +struct nfp_mtu_conf { + u32 portnum; + unsigned int requested_val; + bool ack; + wait_queue_head_t wait_q; + spinlock_t lock; +}; + +/** + * struct nfp_fl_lag - Flower APP priv data for link aggregation + * @work: Work queue for writing configs to the HW + * @lock: Lock to protect lag_group_list + * @group_list: List of all master/slave groups offloaded + * @ida_handle: IDA to handle group ids + * @pkt_num: Incremented for each config packet sent + * @batch_ver: Incremented for each batch of config packets + * @global_inst: Instance allocator for groups + * @rst_cfg: Marker to reset HW LAG config + * @retrans_skbs: Cmsgs that could not be processed by HW and require + * retransmission + */ +struct nfp_fl_lag { + struct delayed_work work; + struct mutex lock; + struct list_head group_list; + struct ida ida_handle; + unsigned int pkt_num; + unsigned int batch_ver; + u8 global_inst; + bool rst_cfg; + struct sk_buff_head retrans_skbs; +}; + +/** + * struct nfp_fl_internal_ports - Flower APP priv data for additional ports + * @port_ids: Assignment of ids to any additional ports + * @lock: Lock for extra ports list + */ +struct nfp_fl_internal_ports { + struct idr port_ids; + spinlock_t lock; +}; + +/** + * struct nfp_flower_priv - Flower APP per-vNIC priv data + * @app: Back pointer to app + * @nn: Pointer to vNIC + * @mask_id_seed: Seed used for mask hash table + * @flower_version: HW version of flower + * @flower_ext_feats: Bitmap of extra features the HW supports + * @flower_en_feats: Bitmap of features enabled by HW + * @stats_ids: List of free stats ids + * @mask_ids: List of free mask ids + * @mask_table: Hash table used to store masks + * @stats_ring_size: Maximum number of allowed stats ids + * @flow_table: Hash table used to store flower rules + * @stats: Stored stats updates for flower rules + * @stats_lock: Lock for flower rule stats updates + * @stats_ctx_table: Hash table to map stats contexts to its flow rule + * @cmsg_work: Workqueue for control messages processing + * @cmsg_skbs_high: List of higher priority skbs for control message + * processing + * @cmsg_skbs_low: List of lower priority skbs for control message + * processing + * @tun: Tunnel offload data + * @reify_replies: atomically stores the number of replies received + * from firmware for repr reify + * @reify_wait_queue: wait queue for repr reify response counting + * @mtu_conf: Configuration of repr MTU value + * @nfp_lag: Link aggregation data block + * @indr_block_cb_priv: List of priv data passed to indirect block cbs + * @non_repr_priv: List of offloaded non-repr ports and their priv data + * @active_mem_unit: Current active memory unit for flower rules + * @total_mem_units: Total number of available memory units for flower rules + * @internal_ports: Internal port ids used in offloaded rules + * @qos_stats_work: Workqueue for qos stats processing + * @qos_rate_limiters: Current active qos rate limiters + * @qos_stats_lock: Lock on qos stats updates + * @meter_stats_lock: Lock on meter stats updates + * @meter_table: Hash table used to store the meter table + * @pre_tun_rule_cnt: Number of pre-tunnel rules offloaded + * @merge_table: Hash table to store merged flows + * @ct_zone_table: Hash table used to store the different zones + * @ct_zone_wc: Special zone entry for wildcarded zone matches + * @ct_map_table: Hash table used to referennce ct flows + * @predt_list: List to keep track of decap pretun flows + * @neigh_table: Table to keep track of neighbor entries + * @predt_lock: Lock to serialise predt/neigh table updates + * @nfp_fl_lock: Lock to protect the flow offload operation + */ +struct nfp_flower_priv { + struct nfp_app *app; + struct nfp_net *nn; + u32 mask_id_seed; + u64 flower_version; + u64 flower_ext_feats; + u8 flower_en_feats; + struct nfp_fl_stats_id stats_ids; + struct nfp_fl_mask_id mask_ids; + DECLARE_HASHTABLE(mask_table, NFP_FLOWER_MASK_HASH_BITS); + u32 stats_ring_size; + struct rhashtable flow_table; + struct nfp_fl_stats *stats; + spinlock_t stats_lock; /* lock stats */ + struct rhashtable stats_ctx_table; + struct work_struct cmsg_work; + struct sk_buff_head cmsg_skbs_high; + struct sk_buff_head cmsg_skbs_low; + struct nfp_fl_tunnel_offloads tun; + atomic_t reify_replies; + wait_queue_head_t reify_wait_queue; + struct nfp_mtu_conf mtu_conf; + struct nfp_fl_lag nfp_lag; + struct list_head indr_block_cb_priv; + struct list_head non_repr_priv; + unsigned int active_mem_unit; + unsigned int total_mem_units; + struct nfp_fl_internal_ports internal_ports; + struct delayed_work qos_stats_work; + unsigned int qos_rate_limiters; + spinlock_t qos_stats_lock; /* Protect the qos stats */ + struct mutex meter_stats_lock; /* Protect the meter stats */ + struct rhashtable meter_table; + int pre_tun_rule_cnt; + struct rhashtable merge_table; + struct rhashtable ct_zone_table; + struct nfp_fl_ct_zone_entry *ct_zone_wc; + struct rhashtable ct_map_table; + struct list_head predt_list; + struct rhashtable neigh_table; + spinlock_t predt_lock; /* Lock to serialise predt/neigh table updates */ + struct mutex nfp_fl_lock; /* Protect the flow operation */ +}; + +/** + * struct nfp_fl_qos - Flower APP priv data for quality of service + * @netdev_port_id: NFP port number of repr with qos info + * @curr_stats: Currently stored stats updates for qos info + * @prev_stats: Previously stored updates for qos info + * @last_update: Stored time when last stats were updated + */ +struct nfp_fl_qos { + u32 netdev_port_id; + struct nfp_stat_pair curr_stats; + struct nfp_stat_pair prev_stats; + u64 last_update; +}; + +/** + * struct nfp_flower_repr_priv - Flower APP per-repr priv data + * @nfp_repr: Back pointer to nfp_repr + * @lag_port_flags: Extended port flags to record lag state of repr + * @mac_offloaded: Flag indicating a MAC address is offloaded for repr + * @offloaded_mac_addr: MAC address that has been offloaded for repr + * @block_shared: Flag indicating if offload applies to shared blocks + * @mac_list: List entry of reprs that share the same offloaded MAC + * @qos_table: Stored info on filters implementing qos + * @on_bridge: Indicates if the repr is attached to a bridge + */ +struct nfp_flower_repr_priv { + struct nfp_repr *nfp_repr; + unsigned long lag_port_flags; + bool mac_offloaded; + u8 offloaded_mac_addr[ETH_ALEN]; + bool block_shared; + struct list_head mac_list; + struct nfp_fl_qos qos_table; + bool on_bridge; +}; + +/** + * struct nfp_flower_non_repr_priv - Priv data for non-repr offloaded ports + * @list: List entry of offloaded reprs + * @netdev: Pointer to non-repr net_device + * @ref_count: Number of references held for this priv data + * @mac_offloaded: Flag indicating a MAC address is offloaded for device + * @offloaded_mac_addr: MAC address that has been offloaded for dev + */ +struct nfp_flower_non_repr_priv { + struct list_head list; + struct net_device *netdev; + int ref_count; + bool mac_offloaded; + u8 offloaded_mac_addr[ETH_ALEN]; +}; + +struct nfp_fl_key_ls { + u32 key_layer_two; + u8 key_layer; + int key_size; +}; + +struct nfp_fl_rule_metadata { + u8 key_len; + u8 mask_len; + u8 act_len; + u8 flags; + __be32 host_ctx_id; + __be64 host_cookie __packed; + __be64 flow_version __packed; + __be32 shortcut; +}; + +struct nfp_fl_stats { + u64 pkts; + u64 bytes; + u64 used; +}; + +/** + * struct nfp_ipv6_addr_entry - cached IPv6 addresses + * @ipv6_addr: IP address + * @ref_count: number of rules currently using this IP + * @list: list pointer + */ +struct nfp_ipv6_addr_entry { + struct in6_addr ipv6_addr; + int ref_count; + struct list_head list; +}; + +struct nfp_fl_payload { + struct nfp_fl_rule_metadata meta; + unsigned long tc_flower_cookie; + struct rhash_head fl_node; + struct rcu_head rcu; + __be32 nfp_tun_ipv4_addr; + struct nfp_ipv6_addr_entry *nfp_tun_ipv6; + struct net_device *ingress_dev; + char *unmasked_data; + char *mask_data; + char *action_data; + struct list_head linked_flows; + bool in_hw; + struct { + struct nfp_predt_entry *predt; + struct net_device *dev; + __be16 vlan_tpid; + __be16 vlan_tci; + __be16 port_idx; + u8 loc_mac[ETH_ALEN]; + u8 rem_mac[ETH_ALEN]; + bool is_ipv6; + } pre_tun_rule; +}; + +struct nfp_fl_payload_link { + /* A link contains a pointer to a merge flow and an associated sub_flow. + * Each merge flow will feature in 2 links to its underlying sub_flows. + * A sub_flow will have at least 1 link to a merge flow or more if it + * has been used to create multiple merge flows. + * + * For a merge flow, 'linked_flows' in its nfp_fl_payload struct lists + * all links to sub_flows (sub_flow.flow) via merge.list. + * For a sub_flow, 'linked_flows' gives all links to merge flows it has + * formed (merge_flow.flow) via sub_flow.list. + */ + struct { + struct list_head list; + struct nfp_fl_payload *flow; + } merge_flow, sub_flow; +}; + +extern const struct rhashtable_params nfp_flower_table_params; +extern const struct rhashtable_params merge_table_params; +extern const struct rhashtable_params neigh_table_params; + +struct nfp_merge_info { + u64 parent_ctx; + struct rhash_head ht_node; +}; + +struct nfp_fl_stats_frame { + __be32 stats_con_id; + __be32 pkt_count; + __be64 byte_count; + __be64 stats_cookie; +}; + +struct nfp_meter_stats_entry { + u64 pkts; + u64 bytes; + u64 drops; +}; + +struct nfp_meter_entry { + struct rhash_head ht_node; + u32 meter_id; + bool bps; + u32 rate; + u32 burst; + u64 used; + struct nfp_meter_stats { + u64 update; + struct nfp_meter_stats_entry curr; + struct nfp_meter_stats_entry prev; + } stats; +}; + +enum nfp_meter_op { + NFP_METER_ADD, + NFP_METER_DEL, +}; + +static inline bool +nfp_flower_internal_port_can_offload(struct nfp_app *app, + struct net_device *netdev) +{ + struct nfp_flower_priv *app_priv = app->priv; + + if (!(app_priv->flower_en_feats & NFP_FL_ENABLE_FLOW_MERGE)) + return false; + if (!netdev->rtnl_link_ops) + return false; + if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch")) + return true; + + return false; +} + +/* The address of the merged flow acts as its cookie. + * Cookies supplied to us by TC flower are also addresses to allocated + * memory and thus this scheme should not generate any collisions. + */ +static inline bool nfp_flower_is_merge_flow(struct nfp_fl_payload *flow_pay) +{ + return flow_pay->tc_flower_cookie == (unsigned long)flow_pay; +} + +static inline bool nfp_flower_is_supported_bridge(struct net_device *netdev) +{ + return netif_is_ovs_master(netdev); +} + +int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, + unsigned int host_ctx_split); +void nfp_flower_metadata_cleanup(struct nfp_app *app); + +int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev, + enum tc_setup_type type, void *type_data); +int nfp_flower_merge_offloaded_flows(struct nfp_app *app, + struct nfp_fl_payload *sub_flow1, + struct nfp_fl_payload *sub_flow2); +void +nfp_flower_compile_meta(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, u8 key_type); +void +nfp_flower_compile_tci(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ext_meta(struct nfp_flower_ext_meta *frame, u32 key_ext); +int +nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, + bool mask_version, enum nfp_flower_tun_type tun_type, + struct netlink_ext_ack *extack); +void +nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule); +int +nfp_flower_compile_mpls(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule, + struct netlink_ext_ack *extack); +void +nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext, + struct nfp_flower_tp_ports *msk, + struct flow_rule *rule); +void +nfp_flower_compile_vlan(struct nfp_flower_vlan *ext, + struct nfp_flower_vlan *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext, + struct nfp_flower_ipv4 *msk, struct flow_rule *rule); +void +nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext, + struct nfp_flower_ipv6 *msk, struct flow_rule *rule); +void +nfp_flower_compile_geneve_opt(u8 *ext, u8 *msk, struct flow_rule *rule); +void +nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext, + struct nfp_flower_ipv4_gre_tun *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext, + struct nfp_flower_ipv4_udp_tun *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ipv6_udp_tun(struct nfp_flower_ipv6_udp_tun *ext, + struct nfp_flower_ipv6_udp_tun *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext, + struct nfp_flower_ipv6_gre_tun *msk, + struct flow_rule *rule); +int nfp_flower_compile_flow_match(struct nfp_app *app, + struct flow_rule *rule, + struct nfp_fl_key_ls *key_ls, + struct net_device *netdev, + struct nfp_fl_payload *nfp_flow, + enum nfp_flower_tun_type tun_type, + struct netlink_ext_ack *extack); +int nfp_flower_compile_action(struct nfp_app *app, + struct flow_rule *rule, + struct net_device *netdev, + struct nfp_fl_payload *nfp_flow, + struct netlink_ext_ack *extack); +int nfp_compile_flow_metadata(struct nfp_app *app, u32 cookie, + struct nfp_fl_payload *nfp_flow, + struct net_device *netdev, + struct netlink_ext_ack *extack); +void __nfp_modify_flow_metadata(struct nfp_flower_priv *priv, + struct nfp_fl_payload *nfp_flow); +int nfp_modify_flow_metadata(struct nfp_app *app, + struct nfp_fl_payload *nfp_flow); + +struct nfp_fl_payload * +nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie, + struct net_device *netdev); +struct nfp_fl_payload * +nfp_flower_get_fl_payload_from_ctx(struct nfp_app *app, u32 ctx_id); +struct nfp_fl_payload * +nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie); + +void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb); + +int nfp_tunnel_config_start(struct nfp_app *app); +void nfp_tunnel_config_stop(struct nfp_app *app); +int nfp_tunnel_mac_event_handler(struct nfp_app *app, + struct net_device *netdev, + unsigned long event, void *ptr); +void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4); +void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4); +void +nfp_tunnel_put_ipv6_off(struct nfp_app *app, struct nfp_ipv6_addr_entry *entry); +struct nfp_ipv6_addr_entry * +nfp_tunnel_add_ipv6_off(struct nfp_app *app, struct in6_addr *ipv6); +void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb); +void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb); +void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb); +void nfp_tunnel_keep_alive_v6(struct nfp_app *app, struct sk_buff *skb); +void nfp_flower_lag_init(struct nfp_fl_lag *lag); +void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag); +int nfp_flower_lag_reset(struct nfp_fl_lag *lag); +int nfp_flower_lag_netdev_event(struct nfp_flower_priv *priv, + struct net_device *netdev, + unsigned long event, void *ptr); +bool nfp_flower_lag_unprocessed_msg(struct nfp_app *app, struct sk_buff *skb); +int nfp_flower_lag_populate_pre_action(struct nfp_app *app, + struct net_device *master, + struct nfp_fl_pre_lag *pre_act, + struct netlink_ext_ack *extack); +int nfp_flower_lag_get_output_id(struct nfp_app *app, + struct net_device *master); +void nfp_flower_qos_init(struct nfp_app *app); +void nfp_flower_qos_cleanup(struct nfp_app *app); +int nfp_flower_setup_qos_offload(struct nfp_app *app, struct net_device *netdev, + struct tc_cls_matchall_offload *flow); +void nfp_flower_stats_rlim_reply(struct nfp_app *app, struct sk_buff *skb); +int nfp_flower_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv, + enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)); +void nfp_flower_setup_indr_tc_release(void *cb_priv); + +void +__nfp_flower_non_repr_priv_get(struct nfp_flower_non_repr_priv *non_repr_priv); +struct nfp_flower_non_repr_priv * +nfp_flower_non_repr_priv_get(struct nfp_app *app, struct net_device *netdev); +void +__nfp_flower_non_repr_priv_put(struct nfp_flower_non_repr_priv *non_repr_priv); +void +nfp_flower_non_repr_priv_put(struct nfp_app *app, struct net_device *netdev); +u32 nfp_flower_get_port_id_from_netdev(struct nfp_app *app, + struct net_device *netdev); +void nfp_tun_link_and_update_nn_entries(struct nfp_app *app, + struct nfp_predt_entry *predt); +void nfp_tun_unlink_and_update_nn_entries(struct nfp_app *app, + struct nfp_predt_entry *predt); +int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app, + struct nfp_fl_payload *flow); +int nfp_flower_xmit_pre_tun_del_flow(struct nfp_app *app, + struct nfp_fl_payload *flow); + +struct nfp_fl_payload * +nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer); +int nfp_flower_calculate_key_layers(struct nfp_app *app, + struct net_device *netdev, + struct nfp_fl_key_ls *ret_key_ls, + struct flow_rule *flow, + enum nfp_flower_tun_type *tun_type, + struct netlink_ext_ack *extack); +void +nfp_flower_del_linked_merge_flows(struct nfp_app *app, + struct nfp_fl_payload *sub_flow); +int +nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow, + u8 mtype); +void +nfp_flower_update_merge_stats(struct nfp_app *app, + struct nfp_fl_payload *sub_flow); + +int nfp_setup_tc_act_offload(struct nfp_app *app, + struct flow_offload_action *fl_act); +int nfp_init_meter_table(struct nfp_app *app); +void nfp_flower_stats_meter_request_all(struct nfp_flower_priv *fl_priv); +void nfp_act_stats_reply(struct nfp_app *app, void *pmsg); +int nfp_flower_offload_one_police(struct nfp_app *app, bool ingress, + bool pps, u32 id, u32 rate, u32 burst); +int nfp_flower_setup_meter_entry(struct nfp_app *app, + const struct flow_action_entry *action, + enum nfp_meter_op op, + u32 meter_id); +struct nfp_meter_entry * +nfp_flower_search_meter_entry(struct nfp_app *app, u32 meter_id); +#endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c new file mode 100644 index 000000000..e01430139 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -0,0 +1,722 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <linux/bitfield.h> +#include <net/pkt_cls.h> + +#include "cmsg.h" +#include "main.h" + +void +nfp_flower_compile_meta(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, u8 key_type) +{ + /* Populate the metadata frame. */ + ext->nfp_flow_key_layer = key_type; + ext->mask_id = ~0; + + msk->nfp_flow_key_layer = key_type; + msk->mask_id = ~0; +} + +void +nfp_flower_compile_tci(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, + struct flow_rule *rule) +{ + u16 msk_tci, key_tci; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); + /* Populate the tci field. */ + key_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + key_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + match.key->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + match.key->vlan_id); + + msk_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + msk_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + match.mask->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + match.mask->vlan_id); + + ext->tci |= cpu_to_be16((key_tci & msk_tci)); + msk->tci |= cpu_to_be16(msk_tci); + } +} + +static void +nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, + struct flow_rule *rule, u8 key_type, bool qinq_sup) +{ + memset(ext, 0, sizeof(struct nfp_flower_meta_tci)); + memset(msk, 0, sizeof(struct nfp_flower_meta_tci)); + + nfp_flower_compile_meta(ext, msk, key_type); + + if (!qinq_sup) + nfp_flower_compile_tci(ext, msk, rule); +} + +void +nfp_flower_compile_ext_meta(struct nfp_flower_ext_meta *frame, u32 key_ext) +{ + frame->nfp_flow_key_layer2 = cpu_to_be32(key_ext); +} + +int +nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, + bool mask_version, enum nfp_flower_tun_type tun_type, + struct netlink_ext_ack *extack) +{ + if (mask_version) { + frame->in_port = cpu_to_be32(~0); + return 0; + } + + if (tun_type) { + frame->in_port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type); + } else { + if (!cmsg_port) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid ingress interface for match offload"); + return -EOPNOTSUPP; + } + frame->in_port = cpu_to_be32(cmsg_port); + } + + return 0; +} + +void +nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + u8 tmp; + int i; + + flow_rule_match_eth_addrs(rule, &match); + /* Populate mac frame. */ + for (i = 0; i < ETH_ALEN; i++) { + tmp = match.key->dst[i] & match.mask->dst[i]; + ext->mac_dst[i] |= tmp & (~msk->mac_dst[i]); + msk->mac_dst[i] |= match.mask->dst[i]; + + tmp = match.key->src[i] & match.mask->src[i]; + ext->mac_src[i] |= tmp & (~msk->mac_src[i]); + msk->mac_src[i] |= match.mask->src[i]; + } + } +} + +int +nfp_flower_compile_mpls(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule, + struct netlink_ext_ack *extack) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) { + struct flow_match_mpls match; + u32 key_mpls, msk_mpls; + + flow_rule_match_mpls(rule, &match); + + /* Only support matching the first LSE */ + if (match.mask->used_lses != 1) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: invalid LSE depth for MPLS match offload"); + return -EOPNOTSUPP; + } + + key_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, + match.key->ls[0].mpls_label) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, + match.key->ls[0].mpls_tc) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, + match.key->ls[0].mpls_bos) | + NFP_FLOWER_MASK_MPLS_Q; + + msk_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, + match.mask->ls[0].mpls_label) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, + match.mask->ls[0].mpls_tc) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, + match.mask->ls[0].mpls_bos) | + NFP_FLOWER_MASK_MPLS_Q; + + ext->mpls_lse |= cpu_to_be32((key_mpls & msk_mpls)); + msk->mpls_lse |= cpu_to_be32(msk_mpls); + } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + /* Check for mpls ether type and set NFP_FLOWER_MASK_MPLS_Q + * bit, which indicates an mpls ether type but without any + * mpls fields. + */ + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + if (match.key->n_proto == cpu_to_be16(ETH_P_MPLS_UC) || + match.key->n_proto == cpu_to_be16(ETH_P_MPLS_MC)) { + ext->mpls_lse |= cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q); + msk->mpls_lse |= cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q); + } + } + + return 0; +} + +static int +nfp_flower_compile_mac_mpls(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule, + struct netlink_ext_ack *extack) +{ + memset(ext, 0, sizeof(struct nfp_flower_mac_mpls)); + memset(msk, 0, sizeof(struct nfp_flower_mac_mpls)); + + nfp_flower_compile_mac(ext, msk, rule); + + return nfp_flower_compile_mpls(ext, msk, rule, extack); +} + +void +nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext, + struct nfp_flower_tp_ports *msk, + struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + __be16 tmp; + + flow_rule_match_ports(rule, &match); + + tmp = match.key->src & match.mask->src; + ext->port_src |= tmp & (~msk->port_src); + msk->port_src |= match.mask->src; + + tmp = match.key->dst & match.mask->dst; + ext->port_dst |= tmp & (~msk->port_dst); + msk->port_dst |= match.mask->dst; + } +} + +static void +nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext, + struct nfp_flower_ip_ext *msk, struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + ext->proto |= match.key->ip_proto & match.mask->ip_proto; + msk->proto |= match.mask->ip_proto; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + struct flow_match_ip match; + u8 tmp; + + flow_rule_match_ip(rule, &match); + + tmp = match.key->tos & match.mask->tos; + ext->tos |= tmp & (~msk->tos); + msk->tos |= match.mask->tos; + + tmp = match.key->ttl & match.mask->ttl; + ext->ttl |= tmp & (~msk->ttl); + msk->ttl |= match.mask->ttl; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + u16 tcp_flags, tcp_flags_mask; + struct flow_match_tcp match; + + flow_rule_match_tcp(rule, &match); + tcp_flags = be16_to_cpu(match.key->flags); + tcp_flags_mask = be16_to_cpu(match.mask->flags); + + if (tcp_flags & TCPHDR_FIN) + ext->flags |= NFP_FL_TCP_FLAG_FIN; + if (tcp_flags_mask & TCPHDR_FIN) + msk->flags |= NFP_FL_TCP_FLAG_FIN; + + if (tcp_flags & TCPHDR_SYN) + ext->flags |= NFP_FL_TCP_FLAG_SYN; + if (tcp_flags_mask & TCPHDR_SYN) + msk->flags |= NFP_FL_TCP_FLAG_SYN; + + if (tcp_flags & TCPHDR_RST) + ext->flags |= NFP_FL_TCP_FLAG_RST; + if (tcp_flags_mask & TCPHDR_RST) + msk->flags |= NFP_FL_TCP_FLAG_RST; + + if (tcp_flags & TCPHDR_PSH) + ext->flags |= NFP_FL_TCP_FLAG_PSH; + if (tcp_flags_mask & TCPHDR_PSH) + msk->flags |= NFP_FL_TCP_FLAG_PSH; + + if (tcp_flags & TCPHDR_URG) + ext->flags |= NFP_FL_TCP_FLAG_URG; + if (tcp_flags_mask & TCPHDR_URG) + msk->flags |= NFP_FL_TCP_FLAG_URG; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + if (match.key->flags & FLOW_DIS_IS_FRAGMENT) + ext->flags |= NFP_FL_IP_FRAGMENTED; + if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) + msk->flags |= NFP_FL_IP_FRAGMENTED; + if (match.key->flags & FLOW_DIS_FIRST_FRAG) + ext->flags |= NFP_FL_IP_FRAG_FIRST; + if (match.mask->flags & FLOW_DIS_FIRST_FRAG) + msk->flags |= NFP_FL_IP_FRAG_FIRST; + } +} + +static void +nfp_flower_fill_vlan(struct flow_match_vlan *match, + struct nfp_flower_vlan *ext, + struct nfp_flower_vlan *msk, bool outer_vlan) +{ + struct flow_dissector_key_vlan *mask = match->mask; + struct flow_dissector_key_vlan *key = match->key; + u16 msk_tci, key_tci; + + key_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + key_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + key->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + key->vlan_id); + msk_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + msk_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + mask->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + mask->vlan_id); + + if (outer_vlan) { + ext->outer_tci |= cpu_to_be16((key_tci & msk_tci)); + ext->outer_tpid |= key->vlan_tpid & mask->vlan_tpid; + msk->outer_tci |= cpu_to_be16(msk_tci); + msk->outer_tpid |= mask->vlan_tpid; + } else { + ext->inner_tci |= cpu_to_be16((key_tci & msk_tci)); + ext->inner_tpid |= key->vlan_tpid & mask->vlan_tpid; + msk->inner_tci |= cpu_to_be16(msk_tci); + msk->inner_tpid |= mask->vlan_tpid; + } +} + +void +nfp_flower_compile_vlan(struct nfp_flower_vlan *ext, + struct nfp_flower_vlan *msk, + struct flow_rule *rule) +{ + struct flow_match_vlan match; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + flow_rule_match_vlan(rule, &match); + nfp_flower_fill_vlan(&match, ext, msk, true); + } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { + flow_rule_match_cvlan(rule, &match); + nfp_flower_fill_vlan(&match, ext, msk, false); + } +} + +void +nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext, + struct nfp_flower_ipv4 *msk, struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { + struct flow_match_ipv4_addrs match; + __be32 tmp; + + flow_rule_match_ipv4_addrs(rule, &match); + + tmp = match.key->src & match.mask->src; + ext->ipv4_src |= tmp & (~msk->ipv4_src); + msk->ipv4_src |= match.mask->src; + + tmp = match.key->dst & match.mask->dst; + ext->ipv4_dst |= tmp & (~msk->ipv4_dst); + msk->ipv4_dst |= match.mask->dst; + } + + nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); +} + +void +nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext, + struct nfp_flower_ipv6 *msk, struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { + struct flow_match_ipv6_addrs match; + u8 tmp; + int i; + + flow_rule_match_ipv6_addrs(rule, &match); + for (i = 0; i < sizeof(ext->ipv6_src); i++) { + tmp = match.key->src.s6_addr[i] & + match.mask->src.s6_addr[i]; + ext->ipv6_src.s6_addr[i] |= tmp & + (~msk->ipv6_src.s6_addr[i]); + msk->ipv6_src.s6_addr[i] |= match.mask->src.s6_addr[i]; + + tmp = match.key->dst.s6_addr[i] & + match.mask->dst.s6_addr[i]; + ext->ipv6_dst.s6_addr[i] |= tmp & + (~msk->ipv6_dst.s6_addr[i]); + msk->ipv6_dst.s6_addr[i] |= match.mask->dst.s6_addr[i]; + } + } + + nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); +} + +void +nfp_flower_compile_geneve_opt(u8 *ext, u8 *msk, struct flow_rule *rule) +{ + struct flow_match_enc_opts match; + int i; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) { + flow_rule_match_enc_opts(rule, &match); + + for (i = 0; i < match.mask->len; i++) { + ext[i] |= match.key->data[i] & match.mask->data[i]; + msk[i] |= match.mask->data[i]; + } + } +} + +static void +nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext, + struct nfp_flower_tun_ipv4 *msk, + struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_enc_ipv4_addrs(rule, &match); + ext->src |= match.key->src & match.mask->src; + ext->dst |= match.key->dst & match.mask->dst; + msk->src |= match.mask->src; + msk->dst |= match.mask->dst; + } +} + +static void +nfp_flower_compile_tun_ipv6_addrs(struct nfp_flower_tun_ipv6 *ext, + struct nfp_flower_tun_ipv6 *msk, + struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { + struct flow_match_ipv6_addrs match; + int i; + + flow_rule_match_enc_ipv6_addrs(rule, &match); + for (i = 0; i < sizeof(ext->src); i++) { + ext->src.s6_addr[i] |= match.key->src.s6_addr[i] & + match.mask->src.s6_addr[i]; + ext->dst.s6_addr[i] |= match.key->dst.s6_addr[i] & + match.mask->dst.s6_addr[i]; + msk->src.s6_addr[i] |= match.mask->src.s6_addr[i]; + msk->dst.s6_addr[i] |= match.mask->dst.s6_addr[i]; + } + } +} + +static void +nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext, + struct nfp_flower_tun_ip_ext *msk, + struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match; + + flow_rule_match_enc_ip(rule, &match); + ext->tos |= match.key->tos & match.mask->tos; + ext->ttl |= match.key->ttl & match.mask->ttl; + msk->tos |= match.mask->tos; + msk->ttl |= match.mask->ttl; + } +} + +static void +nfp_flower_compile_tun_udp_key(__be32 *key, __be32 *key_msk, + struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid match; + u32 vni; + + flow_rule_match_enc_keyid(rule, &match); + vni = be32_to_cpu((match.key->keyid & match.mask->keyid)) << + NFP_FL_TUN_VNI_OFFSET; + *key |= cpu_to_be32(vni); + vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET; + *key_msk |= cpu_to_be32(vni); + } +} + +static void +nfp_flower_compile_tun_gre_key(__be32 *key, __be32 *key_msk, __be16 *flags, + __be16 *flags_msk, struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid match; + + flow_rule_match_enc_keyid(rule, &match); + *key |= match.key->keyid & match.mask->keyid; + *key_msk |= match.mask->keyid; + + *flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); + *flags_msk = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); + } +} + +void +nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext, + struct nfp_flower_ipv4_gre_tun *msk, + struct flow_rule *rule) +{ + /* NVGRE is the only supported GRE tunnel type */ + ext->ethertype = cpu_to_be16(ETH_P_TEB); + msk->ethertype = cpu_to_be16(~0); + + nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_gre_key(&ext->tun_key, &msk->tun_key, + &ext->tun_flags, &msk->tun_flags, rule); +} + +void +nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext, + struct nfp_flower_ipv4_udp_tun *msk, + struct flow_rule *rule) +{ + nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule); +} + +void +nfp_flower_compile_ipv6_udp_tun(struct nfp_flower_ipv6_udp_tun *ext, + struct nfp_flower_ipv6_udp_tun *msk, + struct flow_rule *rule) +{ + nfp_flower_compile_tun_ipv6_addrs(&ext->ipv6, &msk->ipv6, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule); +} + +void +nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext, + struct nfp_flower_ipv6_gre_tun *msk, + struct flow_rule *rule) +{ + /* NVGRE is the only supported GRE tunnel type */ + ext->ethertype = cpu_to_be16(ETH_P_TEB); + msk->ethertype = cpu_to_be16(~0); + + nfp_flower_compile_tun_ipv6_addrs(&ext->ipv6, &msk->ipv6, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_gre_key(&ext->tun_key, &msk->tun_key, + &ext->tun_flags, &msk->tun_flags, rule); +} + +int nfp_flower_compile_flow_match(struct nfp_app *app, + struct flow_rule *rule, + struct nfp_fl_key_ls *key_ls, + struct net_device *netdev, + struct nfp_fl_payload *nfp_flow, + enum nfp_flower_tun_type tun_type, + struct netlink_ext_ack *extack) +{ + struct nfp_flower_priv *priv = app->priv; + bool qinq_sup; + u32 port_id; + int ext_len; + int err; + u8 *ext; + u8 *msk; + + port_id = nfp_flower_get_port_id_from_netdev(app, netdev); + + memset(nfp_flow->unmasked_data, 0, key_ls->key_size); + memset(nfp_flow->mask_data, 0, key_ls->key_size); + + ext = nfp_flow->unmasked_data; + msk = nfp_flow->mask_data; + + qinq_sup = !!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ); + + nfp_flower_compile_meta_tci((struct nfp_flower_meta_tci *)ext, + (struct nfp_flower_meta_tci *)msk, + rule, key_ls->key_layer, qinq_sup); + ext += sizeof(struct nfp_flower_meta_tci); + msk += sizeof(struct nfp_flower_meta_tci); + + /* Populate Extended Metadata if Required. */ + if (NFP_FLOWER_LAYER_EXT_META & key_ls->key_layer) { + nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)ext, + key_ls->key_layer_two); + nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)msk, + key_ls->key_layer_two); + ext += sizeof(struct nfp_flower_ext_meta); + msk += sizeof(struct nfp_flower_ext_meta); + } + + /* Populate Exact Port data. */ + err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext, + port_id, false, tun_type, extack); + if (err) + return err; + + /* Populate Mask Port Data. */ + err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk, + port_id, true, tun_type, extack); + if (err) + return err; + + ext += sizeof(struct nfp_flower_in_port); + msk += sizeof(struct nfp_flower_in_port); + + if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) { + err = nfp_flower_compile_mac_mpls((struct nfp_flower_mac_mpls *)ext, + (struct nfp_flower_mac_mpls *)msk, + rule, extack); + if (err) + return err; + + ext += sizeof(struct nfp_flower_mac_mpls); + msk += sizeof(struct nfp_flower_mac_mpls); + } + + if (NFP_FLOWER_LAYER_TP & key_ls->key_layer) { + nfp_flower_compile_tport((struct nfp_flower_tp_ports *)ext, + (struct nfp_flower_tp_ports *)msk, + rule); + ext += sizeof(struct nfp_flower_tp_ports); + msk += sizeof(struct nfp_flower_tp_ports); + } + + if (NFP_FLOWER_LAYER_IPV4 & key_ls->key_layer) { + nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)ext, + (struct nfp_flower_ipv4 *)msk, + rule); + ext += sizeof(struct nfp_flower_ipv4); + msk += sizeof(struct nfp_flower_ipv4); + } + + if (NFP_FLOWER_LAYER_IPV6 & key_ls->key_layer) { + nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)ext, + (struct nfp_flower_ipv6 *)msk, + rule); + ext += sizeof(struct nfp_flower_ipv6); + msk += sizeof(struct nfp_flower_ipv6); + } + + if (NFP_FLOWER_LAYER2_QINQ & key_ls->key_layer_two) { + nfp_flower_compile_vlan((struct nfp_flower_vlan *)ext, + (struct nfp_flower_vlan *)msk, + rule); + ext += sizeof(struct nfp_flower_vlan); + msk += sizeof(struct nfp_flower_vlan); + } + + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GRE) { + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { + struct nfp_flower_ipv6_gre_tun *gre_match; + struct nfp_ipv6_addr_entry *entry; + struct in6_addr *dst; + + nfp_flower_compile_ipv6_gre_tun((void *)ext, + (void *)msk, rule); + gre_match = (struct nfp_flower_ipv6_gre_tun *)ext; + dst = &gre_match->ipv6.dst; + ext += sizeof(struct nfp_flower_ipv6_gre_tun); + msk += sizeof(struct nfp_flower_ipv6_gre_tun); + + entry = nfp_tunnel_add_ipv6_off(app, dst); + if (!entry) + return -EOPNOTSUPP; + + nfp_flow->nfp_tun_ipv6 = entry; + } else { + __be32 dst; + + nfp_flower_compile_ipv4_gre_tun((void *)ext, + (void *)msk, rule); + dst = ((struct nfp_flower_ipv4_gre_tun *)ext)->ipv4.dst; + ext += sizeof(struct nfp_flower_ipv4_gre_tun); + msk += sizeof(struct nfp_flower_ipv4_gre_tun); + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + nfp_flow->nfp_tun_ipv4_addr = dst; + nfp_tunnel_add_ipv4_off(app, dst); + } + } + + if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN || + key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE) { + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { + struct nfp_flower_ipv6_udp_tun *udp_match; + struct nfp_ipv6_addr_entry *entry; + struct in6_addr *dst; + + nfp_flower_compile_ipv6_udp_tun((void *)ext, + (void *)msk, rule); + udp_match = (struct nfp_flower_ipv6_udp_tun *)ext; + dst = &udp_match->ipv6.dst; + ext += sizeof(struct nfp_flower_ipv6_udp_tun); + msk += sizeof(struct nfp_flower_ipv6_udp_tun); + + entry = nfp_tunnel_add_ipv6_off(app, dst); + if (!entry) + return -EOPNOTSUPP; + + nfp_flow->nfp_tun_ipv6 = entry; + } else { + __be32 dst; + + nfp_flower_compile_ipv4_udp_tun((void *)ext, + (void *)msk, rule); + dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ipv4.dst; + ext += sizeof(struct nfp_flower_ipv4_udp_tun); + msk += sizeof(struct nfp_flower_ipv4_udp_tun); + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + nfp_flow->nfp_tun_ipv4_addr = dst; + nfp_tunnel_add_ipv4_off(app, dst); + } + + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { + nfp_flower_compile_geneve_opt(ext, msk, rule); + } + } + + /* Check that the flow key does not exceed the maximum limit. + * All structures in the key is multiples of 4 bytes, so use u32. + */ + ext_len = (u32 *)ext - (u32 *)nfp_flow->unmasked_data; + if (ext_len > NFP_FLOWER_KEY_MAX_LW) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: flow key too long"); + return -EOPNOTSUPP; + } + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c new file mode 100644 index 000000000..80e467558 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -0,0 +1,726 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <linux/hash.h> +#include <linux/hashtable.h> +#include <linux/jhash.h> +#include <linux/math64.h> +#include <linux/vmalloc.h> +#include <net/pkt_cls.h> + +#include "cmsg.h" +#include "conntrack.h" +#include "main.h" +#include "../nfp_app.h" + +struct nfp_mask_id_table { + struct hlist_node link; + u32 hash_key; + u32 ref_cnt; + u8 mask_id; +}; + +struct nfp_fl_flow_table_cmp_arg { + struct net_device *netdev; + unsigned long cookie; +}; + +struct nfp_fl_stats_ctx_to_flow { + struct rhash_head ht_node; + u32 stats_cxt; + struct nfp_fl_payload *flow; +}; + +static const struct rhashtable_params stats_ctx_table_params = { + .key_offset = offsetof(struct nfp_fl_stats_ctx_to_flow, stats_cxt), + .head_offset = offsetof(struct nfp_fl_stats_ctx_to_flow, ht_node), + .key_len = sizeof(u32), +}; + +static int nfp_release_stats_entry(struct nfp_app *app, u32 stats_context_id) +{ + struct nfp_flower_priv *priv = app->priv; + struct circ_buf *ring; + + ring = &priv->stats_ids.free_list; + /* Check if buffer is full, stats_ring_size must be power of 2 */ + if (!CIRC_SPACE(ring->head, ring->tail, priv->stats_ring_size)) + return -ENOBUFS; + + /* Each increment of head represents size of NFP_FL_STATS_ELEM_RS */ + memcpy(&ring->buf[ring->head * NFP_FL_STATS_ELEM_RS], + &stats_context_id, NFP_FL_STATS_ELEM_RS); + ring->head = (ring->head + 1) & (priv->stats_ring_size - 1); + + return 0; +} + +static int nfp_get_stats_entry(struct nfp_app *app, u32 *stats_context_id) +{ + struct nfp_flower_priv *priv = app->priv; + u32 freed_stats_id, temp_stats_id; + struct circ_buf *ring; + + ring = &priv->stats_ids.free_list; + freed_stats_id = priv->stats_ring_size; + /* Check for unallocated entries first. */ + if (priv->stats_ids.init_unalloc > 0) { + *stats_context_id = + FIELD_PREP(NFP_FL_STAT_ID_STAT, + priv->stats_ids.init_unalloc - 1) | + FIELD_PREP(NFP_FL_STAT_ID_MU_NUM, + priv->active_mem_unit); + + if (++priv->active_mem_unit == priv->total_mem_units) { + priv->stats_ids.init_unalloc--; + priv->active_mem_unit = 0; + } + + return 0; + } + + /* Check if buffer is empty. */ + if (ring->head == ring->tail) { + *stats_context_id = freed_stats_id; + return -ENOENT; + } + + /* Each increment of tail represents size of NFP_FL_STATS_ELEM_RS */ + memcpy(&temp_stats_id, &ring->buf[ring->tail * NFP_FL_STATS_ELEM_RS], + NFP_FL_STATS_ELEM_RS); + *stats_context_id = temp_stats_id; + memcpy(&ring->buf[ring->tail * NFP_FL_STATS_ELEM_RS], &freed_stats_id, + NFP_FL_STATS_ELEM_RS); + /* stats_ring_size must be power of 2 */ + ring->tail = (ring->tail + 1) & (priv->stats_ring_size - 1); + + return 0; +} + +/* Must be called with either RTNL or rcu_read_lock */ +struct nfp_fl_payload * +nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie, + struct net_device *netdev) +{ + struct nfp_fl_flow_table_cmp_arg flower_cmp_arg; + struct nfp_flower_priv *priv = app->priv; + + flower_cmp_arg.netdev = netdev; + flower_cmp_arg.cookie = tc_flower_cookie; + + return rhashtable_lookup_fast(&priv->flow_table, &flower_cmp_arg, + nfp_flower_table_params); +} + +void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb) +{ + unsigned int msg_len = nfp_flower_cmsg_get_data_len(skb); + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_stats_frame *stats; + unsigned char *msg; + u32 ctx_id; + int i; + + msg = nfp_flower_cmsg_get_data(skb); + + spin_lock(&priv->stats_lock); + for (i = 0; i < msg_len / sizeof(*stats); i++) { + stats = (struct nfp_fl_stats_frame *)msg + i; + ctx_id = be32_to_cpu(stats->stats_con_id); + priv->stats[ctx_id].pkts += be32_to_cpu(stats->pkt_count); + priv->stats[ctx_id].bytes += be64_to_cpu(stats->byte_count); + priv->stats[ctx_id].used = jiffies; + } + spin_unlock(&priv->stats_lock); +} + +static int nfp_release_mask_id(struct nfp_app *app, u8 mask_id) +{ + struct nfp_flower_priv *priv = app->priv; + struct circ_buf *ring; + + ring = &priv->mask_ids.mask_id_free_list; + /* Checking if buffer is full, + * NFP_FLOWER_MASK_ENTRY_RS must be power of 2 + */ + if (CIRC_SPACE(ring->head, ring->tail, NFP_FLOWER_MASK_ENTRY_RS) == 0) + return -ENOBUFS; + + /* Each increment of head represents size of + * NFP_FLOWER_MASK_ELEMENT_RS + */ + memcpy(&ring->buf[ring->head * NFP_FLOWER_MASK_ELEMENT_RS], &mask_id, + NFP_FLOWER_MASK_ELEMENT_RS); + ring->head = (ring->head + 1) & (NFP_FLOWER_MASK_ENTRY_RS - 1); + + priv->mask_ids.last_used[mask_id] = ktime_get(); + + return 0; +} + +static int nfp_mask_alloc(struct nfp_app *app, u8 *mask_id) +{ + struct nfp_flower_priv *priv = app->priv; + ktime_t reuse_timeout; + struct circ_buf *ring; + u8 temp_id, freed_id; + + ring = &priv->mask_ids.mask_id_free_list; + freed_id = NFP_FLOWER_MASK_ENTRY_RS - 1; + /* Checking for unallocated entries first. */ + if (priv->mask_ids.init_unallocated > 0) { + *mask_id = priv->mask_ids.init_unallocated; + priv->mask_ids.init_unallocated--; + return 0; + } + + /* Checking if buffer is empty. */ + if (ring->head == ring->tail) + goto err_not_found; + + /* Each increment of tail represents size of + * NFP_FLOWER_MASK_ELEMENT_RS + */ + memcpy(&temp_id, &ring->buf[ring->tail * NFP_FLOWER_MASK_ELEMENT_RS], + NFP_FLOWER_MASK_ELEMENT_RS); + *mask_id = temp_id; + + reuse_timeout = ktime_add_ns(priv->mask_ids.last_used[*mask_id], + NFP_FL_MASK_REUSE_TIME_NS); + + if (ktime_before(ktime_get(), reuse_timeout)) + goto err_not_found; + + memcpy(&ring->buf[ring->tail * NFP_FLOWER_MASK_ELEMENT_RS], &freed_id, + NFP_FLOWER_MASK_ELEMENT_RS); + /* NFP_FLOWER_MASK_ENTRY_RS must be power of 2 */ + ring->tail = (ring->tail + 1) & (NFP_FLOWER_MASK_ENTRY_RS - 1); + + return 0; + +err_not_found: + *mask_id = freed_id; + return -ENOENT; +} + +static int +nfp_add_mask_table(struct nfp_app *app, char *mask_data, u32 mask_len) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_mask_id_table *mask_entry; + unsigned long hash_key; + u8 mask_id; + + if (nfp_mask_alloc(app, &mask_id)) + return -ENOENT; + + mask_entry = kmalloc(sizeof(*mask_entry), GFP_KERNEL); + if (!mask_entry) { + nfp_release_mask_id(app, mask_id); + return -ENOMEM; + } + + INIT_HLIST_NODE(&mask_entry->link); + mask_entry->mask_id = mask_id; + hash_key = jhash(mask_data, mask_len, priv->mask_id_seed); + mask_entry->hash_key = hash_key; + mask_entry->ref_cnt = 1; + hash_add(priv->mask_table, &mask_entry->link, hash_key); + + return mask_id; +} + +static struct nfp_mask_id_table * +nfp_search_mask_table(struct nfp_app *app, char *mask_data, u32 mask_len) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_mask_id_table *mask_entry; + unsigned long hash_key; + + hash_key = jhash(mask_data, mask_len, priv->mask_id_seed); + + hash_for_each_possible(priv->mask_table, mask_entry, link, hash_key) + if (mask_entry->hash_key == hash_key) + return mask_entry; + + return NULL; +} + +static int +nfp_find_in_mask_table(struct nfp_app *app, char *mask_data, u32 mask_len) +{ + struct nfp_mask_id_table *mask_entry; + + mask_entry = nfp_search_mask_table(app, mask_data, mask_len); + if (!mask_entry) + return -ENOENT; + + mask_entry->ref_cnt++; + + /* Casting u8 to int for later use. */ + return mask_entry->mask_id; +} + +static bool +nfp_check_mask_add(struct nfp_app *app, char *mask_data, u32 mask_len, + u8 *meta_flags, u8 *mask_id) +{ + int id; + + id = nfp_find_in_mask_table(app, mask_data, mask_len); + if (id < 0) { + id = nfp_add_mask_table(app, mask_data, mask_len); + if (id < 0) + return false; + *meta_flags |= NFP_FL_META_FLAG_MANAGE_MASK; + } + *mask_id = id; + + return true; +} + +static bool +nfp_check_mask_remove(struct nfp_app *app, char *mask_data, u32 mask_len, + u8 *meta_flags, u8 *mask_id) +{ + struct nfp_mask_id_table *mask_entry; + + mask_entry = nfp_search_mask_table(app, mask_data, mask_len); + if (!mask_entry) + return false; + + *mask_id = mask_entry->mask_id; + mask_entry->ref_cnt--; + if (!mask_entry->ref_cnt) { + hash_del(&mask_entry->link); + nfp_release_mask_id(app, *mask_id); + kfree(mask_entry); + if (meta_flags) + *meta_flags |= NFP_FL_META_FLAG_MANAGE_MASK; + } + + return true; +} + +int nfp_compile_flow_metadata(struct nfp_app *app, u32 cookie, + struct nfp_fl_payload *nfp_flow, + struct net_device *netdev, + struct netlink_ext_ack *extack) +{ + struct nfp_fl_stats_ctx_to_flow *ctx_entry; + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_payload *check_entry; + u8 new_mask_id; + u32 stats_cxt; + int err; + + err = nfp_get_stats_entry(app, &stats_cxt); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot allocate new stats context"); + return err; + } + + nfp_flow->meta.host_ctx_id = cpu_to_be32(stats_cxt); + nfp_flow->meta.host_cookie = cpu_to_be64(cookie); + nfp_flow->ingress_dev = netdev; + + ctx_entry = kzalloc(sizeof(*ctx_entry), GFP_KERNEL); + if (!ctx_entry) { + err = -ENOMEM; + goto err_release_stats; + } + + ctx_entry->stats_cxt = stats_cxt; + ctx_entry->flow = nfp_flow; + + if (rhashtable_insert_fast(&priv->stats_ctx_table, &ctx_entry->ht_node, + stats_ctx_table_params)) { + err = -ENOMEM; + goto err_free_ctx_entry; + } + + /* Do not allocate a mask-id for pre_tun_rules. These flows are used to + * configure the pre_tun table and are never actually send to the + * firmware as an add-flow message. This causes the mask-id allocation + * on the firmware to get out of sync if allocated here. + */ + new_mask_id = 0; + if (!nfp_flow->pre_tun_rule.dev && + !nfp_check_mask_add(app, nfp_flow->mask_data, + nfp_flow->meta.mask_len, + &nfp_flow->meta.flags, &new_mask_id)) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot allocate a new mask id"); + err = -ENOENT; + goto err_remove_rhash; + } + + nfp_flow->meta.flow_version = cpu_to_be64(priv->flower_version); + priv->flower_version++; + + /* Update flow payload with mask ids. */ + nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id; + priv->stats[stats_cxt].pkts = 0; + priv->stats[stats_cxt].bytes = 0; + priv->stats[stats_cxt].used = jiffies; + + check_entry = nfp_flower_search_fl_table(app, cookie, netdev); + if (check_entry) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot offload duplicate flow entry"); + err = -EEXIST; + goto err_remove_mask; + } + + return 0; + +err_remove_mask: + if (!nfp_flow->pre_tun_rule.dev) + nfp_check_mask_remove(app, nfp_flow->mask_data, + nfp_flow->meta.mask_len, + NULL, &new_mask_id); +err_remove_rhash: + WARN_ON_ONCE(rhashtable_remove_fast(&priv->stats_ctx_table, + &ctx_entry->ht_node, + stats_ctx_table_params)); +err_free_ctx_entry: + kfree(ctx_entry); +err_release_stats: + nfp_release_stats_entry(app, stats_cxt); + + return err; +} + +void __nfp_modify_flow_metadata(struct nfp_flower_priv *priv, + struct nfp_fl_payload *nfp_flow) +{ + nfp_flow->meta.flags &= ~NFP_FL_META_FLAG_MANAGE_MASK; + nfp_flow->meta.flow_version = cpu_to_be64(priv->flower_version); + priv->flower_version++; +} + +int nfp_modify_flow_metadata(struct nfp_app *app, + struct nfp_fl_payload *nfp_flow) +{ + struct nfp_fl_stats_ctx_to_flow *ctx_entry; + struct nfp_flower_priv *priv = app->priv; + u8 new_mask_id = 0; + u32 temp_ctx_id; + + __nfp_modify_flow_metadata(priv, nfp_flow); + + if (!nfp_flow->pre_tun_rule.dev) + nfp_check_mask_remove(app, nfp_flow->mask_data, + nfp_flow->meta.mask_len, &nfp_flow->meta.flags, + &new_mask_id); + + /* Update flow payload with mask ids. */ + nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id; + + /* Release the stats ctx id and ctx to flow table entry. */ + temp_ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id); + + ctx_entry = rhashtable_lookup_fast(&priv->stats_ctx_table, &temp_ctx_id, + stats_ctx_table_params); + if (!ctx_entry) + return -ENOENT; + + WARN_ON_ONCE(rhashtable_remove_fast(&priv->stats_ctx_table, + &ctx_entry->ht_node, + stats_ctx_table_params)); + kfree(ctx_entry); + + return nfp_release_stats_entry(app, temp_ctx_id); +} + +struct nfp_fl_payload * +nfp_flower_get_fl_payload_from_ctx(struct nfp_app *app, u32 ctx_id) +{ + struct nfp_fl_stats_ctx_to_flow *ctx_entry; + struct nfp_flower_priv *priv = app->priv; + + ctx_entry = rhashtable_lookup_fast(&priv->stats_ctx_table, &ctx_id, + stats_ctx_table_params); + if (!ctx_entry) + return NULL; + + return ctx_entry->flow; +} + +static int nfp_fl_obj_cmpfn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct nfp_fl_flow_table_cmp_arg *cmp_arg = arg->key; + const struct nfp_fl_payload *flow_entry = obj; + + if (flow_entry->ingress_dev == cmp_arg->netdev) + return flow_entry->tc_flower_cookie != cmp_arg->cookie; + + return 1; +} + +static u32 nfp_fl_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const struct nfp_fl_payload *flower_entry = data; + + return jhash2((u32 *)&flower_entry->tc_flower_cookie, + sizeof(flower_entry->tc_flower_cookie) / sizeof(u32), + seed); +} + +static u32 nfp_fl_key_hashfn(const void *data, u32 len, u32 seed) +{ + const struct nfp_fl_flow_table_cmp_arg *cmp_arg = data; + + return jhash2((u32 *)&cmp_arg->cookie, + sizeof(cmp_arg->cookie) / sizeof(u32), seed); +} + +const struct rhashtable_params nfp_flower_table_params = { + .head_offset = offsetof(struct nfp_fl_payload, fl_node), + .hashfn = nfp_fl_key_hashfn, + .obj_cmpfn = nfp_fl_obj_cmpfn, + .obj_hashfn = nfp_fl_obj_hashfn, + .automatic_shrinking = true, +}; + +const struct rhashtable_params merge_table_params = { + .key_offset = offsetof(struct nfp_merge_info, parent_ctx), + .head_offset = offsetof(struct nfp_merge_info, ht_node), + .key_len = sizeof(u64), +}; + +const struct rhashtable_params nfp_zone_table_params = { + .head_offset = offsetof(struct nfp_fl_ct_zone_entry, hash_node), + .key_len = sizeof(u16), + .key_offset = offsetof(struct nfp_fl_ct_zone_entry, zone), + .automatic_shrinking = false, +}; + +const struct rhashtable_params nfp_ct_map_params = { + .head_offset = offsetof(struct nfp_fl_ct_map_entry, hash_node), + .key_len = sizeof(unsigned long), + .key_offset = offsetof(struct nfp_fl_ct_map_entry, cookie), + .automatic_shrinking = true, +}; + +const struct rhashtable_params neigh_table_params = { + .key_offset = offsetof(struct nfp_neigh_entry, neigh_cookie), + .head_offset = offsetof(struct nfp_neigh_entry, ht_node), + .key_len = sizeof(unsigned long), +}; + +int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, + unsigned int host_num_mems) +{ + struct nfp_flower_priv *priv = app->priv; + int err, stats_size; + + hash_init(priv->mask_table); + + err = rhashtable_init(&priv->flow_table, &nfp_flower_table_params); + if (err) + return err; + + err = rhashtable_init(&priv->stats_ctx_table, &stats_ctx_table_params); + if (err) + goto err_free_flow_table; + + err = rhashtable_init(&priv->merge_table, &merge_table_params); + if (err) + goto err_free_stats_ctx_table; + + mutex_init(&priv->nfp_fl_lock); + + err = rhashtable_init(&priv->ct_zone_table, &nfp_zone_table_params); + if (err) + goto err_free_merge_table; + + err = rhashtable_init(&priv->ct_map_table, &nfp_ct_map_params); + if (err) + goto err_free_ct_zone_table; + + err = rhashtable_init(&priv->neigh_table, &neigh_table_params); + if (err) + goto err_free_ct_map_table; + + INIT_LIST_HEAD(&priv->predt_list); + + get_random_bytes(&priv->mask_id_seed, sizeof(priv->mask_id_seed)); + + /* Init ring buffer and unallocated mask_ids. */ + priv->mask_ids.mask_id_free_list.buf = + kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS, + NFP_FLOWER_MASK_ELEMENT_RS, GFP_KERNEL); + if (!priv->mask_ids.mask_id_free_list.buf) + goto err_free_neigh_table; + + priv->mask_ids.init_unallocated = NFP_FLOWER_MASK_ENTRY_RS - 1; + + /* Init timestamps for mask id*/ + priv->mask_ids.last_used = + kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS, + sizeof(*priv->mask_ids.last_used), GFP_KERNEL); + if (!priv->mask_ids.last_used) + goto err_free_mask_id; + + /* Init ring buffer and unallocated stats_ids. */ + priv->stats_ids.free_list.buf = + vmalloc(array_size(NFP_FL_STATS_ELEM_RS, + priv->stats_ring_size)); + if (!priv->stats_ids.free_list.buf) + goto err_free_last_used; + + priv->stats_ids.init_unalloc = div_u64(host_ctx_count, host_num_mems); + + stats_size = FIELD_PREP(NFP_FL_STAT_ID_STAT, host_ctx_count) | + FIELD_PREP(NFP_FL_STAT_ID_MU_NUM, host_num_mems - 1); + priv->stats = kvmalloc_array(stats_size, sizeof(struct nfp_fl_stats), + GFP_KERNEL); + if (!priv->stats) + goto err_free_ring_buf; + + spin_lock_init(&priv->stats_lock); + spin_lock_init(&priv->predt_lock); + + return 0; + +err_free_ring_buf: + vfree(priv->stats_ids.free_list.buf); +err_free_last_used: + kfree(priv->mask_ids.last_used); +err_free_mask_id: + kfree(priv->mask_ids.mask_id_free_list.buf); +err_free_neigh_table: + rhashtable_destroy(&priv->neigh_table); +err_free_ct_map_table: + rhashtable_destroy(&priv->ct_map_table); +err_free_ct_zone_table: + rhashtable_destroy(&priv->ct_zone_table); +err_free_merge_table: + rhashtable_destroy(&priv->merge_table); +err_free_stats_ctx_table: + rhashtable_destroy(&priv->stats_ctx_table); +err_free_flow_table: + rhashtable_destroy(&priv->flow_table); + return -ENOMEM; +} + +static void nfp_zone_table_entry_destroy(struct nfp_fl_ct_zone_entry *zt) +{ + if (!zt) + return; + + if (!list_empty(&zt->pre_ct_list)) { + struct rhashtable *m_table = &zt->priv->ct_map_table; + struct nfp_fl_ct_flow_entry *entry, *tmp; + struct nfp_fl_ct_map_entry *map; + + WARN_ONCE(1, "pre_ct_list not empty as expected, cleaning up\n"); + list_for_each_entry_safe(entry, tmp, &zt->pre_ct_list, + list_node) { + map = rhashtable_lookup_fast(m_table, + &entry->cookie, + nfp_ct_map_params); + WARN_ON_ONCE(rhashtable_remove_fast(m_table, + &map->hash_node, + nfp_ct_map_params)); + nfp_fl_ct_clean_flow_entry(entry); + kfree(map); + } + } + + if (!list_empty(&zt->post_ct_list)) { + struct rhashtable *m_table = &zt->priv->ct_map_table; + struct nfp_fl_ct_flow_entry *entry, *tmp; + struct nfp_fl_ct_map_entry *map; + + WARN_ONCE(1, "post_ct_list not empty as expected, cleaning up\n"); + list_for_each_entry_safe(entry, tmp, &zt->post_ct_list, + list_node) { + map = rhashtable_lookup_fast(m_table, + &entry->cookie, + nfp_ct_map_params); + WARN_ON_ONCE(rhashtable_remove_fast(m_table, + &map->hash_node, + nfp_ct_map_params)); + nfp_fl_ct_clean_flow_entry(entry); + kfree(map); + } + } + + if (zt->nft) { + nf_flow_table_offload_del_cb(zt->nft, + nfp_fl_ct_handle_nft_flow, + zt); + zt->nft = NULL; + } + + if (!list_empty(&zt->nft_flows_list)) { + struct rhashtable *m_table = &zt->priv->ct_map_table; + struct nfp_fl_ct_flow_entry *entry, *tmp; + struct nfp_fl_ct_map_entry *map; + + WARN_ONCE(1, "nft_flows_list not empty as expected, cleaning up\n"); + list_for_each_entry_safe(entry, tmp, &zt->nft_flows_list, + list_node) { + map = rhashtable_lookup_fast(m_table, + &entry->cookie, + nfp_ct_map_params); + WARN_ON_ONCE(rhashtable_remove_fast(m_table, + &map->hash_node, + nfp_ct_map_params)); + nfp_fl_ct_clean_flow_entry(entry); + kfree(map); + } + } + + rhashtable_free_and_destroy(&zt->tc_merge_tb, + nfp_check_rhashtable_empty, NULL); + rhashtable_free_and_destroy(&zt->nft_merge_tb, + nfp_check_rhashtable_empty, NULL); + + kfree(zt); +} + +static void nfp_free_zone_table_entry(void *ptr, void *arg) +{ + struct nfp_fl_ct_zone_entry *zt = ptr; + + nfp_zone_table_entry_destroy(zt); +} + +static void nfp_free_map_table_entry(void *ptr, void *arg) +{ + struct nfp_fl_ct_map_entry *map = ptr; + + if (!map) + return; + + kfree(map); +} + +void nfp_flower_metadata_cleanup(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + + if (!priv) + return; + + rhashtable_free_and_destroy(&priv->flow_table, + nfp_check_rhashtable_empty, NULL); + rhashtable_free_and_destroy(&priv->stats_ctx_table, + nfp_check_rhashtable_empty, NULL); + rhashtable_free_and_destroy(&priv->merge_table, + nfp_check_rhashtable_empty, NULL); + rhashtable_free_and_destroy(&priv->ct_zone_table, + nfp_free_zone_table_entry, NULL); + nfp_zone_table_entry_destroy(priv->ct_zone_wc); + + rhashtable_free_and_destroy(&priv->ct_map_table, + nfp_free_map_table_entry, NULL); + rhashtable_free_and_destroy(&priv->neigh_table, + nfp_check_rhashtable_empty, NULL); + kvfree(priv->stats); + kfree(priv->mask_ids.mask_id_free_list.buf); + kfree(priv->mask_ids.last_used); + vfree(priv->stats_ids.free_list.buf); +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c new file mode 100644 index 000000000..99165694f --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -0,0 +1,1974 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <linux/skbuff.h> +#include <net/devlink.h> +#include <net/pkt_cls.h> + +#include "cmsg.h" +#include "main.h" +#include "conntrack.h" +#include "../nfpcore/nfp_cpp.h" +#include "../nfpcore/nfp_nsp.h" +#include "../nfp_app.h" +#include "../nfp_main.h" +#include "../nfp_net.h" +#include "../nfp_port.h" + +#define NFP_FLOWER_SUPPORTED_TCPFLAGS \ + (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST | \ + TCPHDR_PSH | TCPHDR_URG) + +#define NFP_FLOWER_SUPPORTED_CTLFLAGS \ + (FLOW_DIS_IS_FRAGMENT | \ + FLOW_DIS_FIRST_FRAG) + +#define NFP_FLOWER_WHITELIST_DISSECTOR \ + (BIT(FLOW_DISSECTOR_KEY_CONTROL) | \ + BIT(FLOW_DISSECTOR_KEY_BASIC) | \ + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_TCP) | \ + BIT(FLOW_DISSECTOR_KEY_PORTS) | \ + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_VLAN) | \ + BIT(FLOW_DISSECTOR_KEY_CVLAN) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IP) | \ + BIT(FLOW_DISSECTOR_KEY_MPLS) | \ + BIT(FLOW_DISSECTOR_KEY_CT) | \ + BIT(FLOW_DISSECTOR_KEY_META) | \ + BIT(FLOW_DISSECTOR_KEY_IP)) + +#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR \ + (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IP)) + +#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \ + (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) + +#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R \ + (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) + +#define NFP_FLOWER_MERGE_FIELDS \ + (NFP_FLOWER_LAYER_PORT | \ + NFP_FLOWER_LAYER_MAC | \ + NFP_FLOWER_LAYER_TP | \ + NFP_FLOWER_LAYER_IPV4 | \ + NFP_FLOWER_LAYER_IPV6) + +#define NFP_FLOWER_PRE_TUN_RULE_FIELDS \ + (NFP_FLOWER_LAYER_EXT_META | \ + NFP_FLOWER_LAYER_PORT | \ + NFP_FLOWER_LAYER_MAC | \ + NFP_FLOWER_LAYER_IPV4 | \ + NFP_FLOWER_LAYER_IPV6) + +struct nfp_flower_merge_check { + union { + struct { + __be16 tci; + struct nfp_flower_mac_mpls l2; + struct nfp_flower_tp_ports l4; + union { + struct nfp_flower_ipv4 ipv4; + struct nfp_flower_ipv6 ipv6; + }; + }; + unsigned long vals[8]; + }; +}; + +int +nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow, + u8 mtype) +{ + u32 meta_len, key_len, mask_len, act_len, tot_len; + struct sk_buff *skb; + unsigned char *msg; + + meta_len = sizeof(struct nfp_fl_rule_metadata); + key_len = nfp_flow->meta.key_len; + mask_len = nfp_flow->meta.mask_len; + act_len = nfp_flow->meta.act_len; + + tot_len = meta_len + key_len + mask_len + act_len; + + /* Convert to long words as firmware expects + * lengths in units of NFP_FL_LW_SIZ. + */ + nfp_flow->meta.key_len >>= NFP_FL_LW_SIZ; + nfp_flow->meta.mask_len >>= NFP_FL_LW_SIZ; + nfp_flow->meta.act_len >>= NFP_FL_LW_SIZ; + + skb = nfp_flower_cmsg_alloc(app, tot_len, mtype, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + msg = nfp_flower_cmsg_get_data(skb); + memcpy(msg, &nfp_flow->meta, meta_len); + memcpy(&msg[meta_len], nfp_flow->unmasked_data, key_len); + memcpy(&msg[meta_len + key_len], nfp_flow->mask_data, mask_len); + memcpy(&msg[meta_len + key_len + mask_len], + nfp_flow->action_data, act_len); + + /* Convert back to bytes as software expects + * lengths in units of bytes. + */ + nfp_flow->meta.key_len <<= NFP_FL_LW_SIZ; + nfp_flow->meta.mask_len <<= NFP_FL_LW_SIZ; + nfp_flow->meta.act_len <<= NFP_FL_LW_SIZ; + + nfp_ctrl_tx(app->ctrl, skb); + + return 0; +} + +static bool nfp_flower_check_higher_than_mac(struct flow_rule *rule) +{ + return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP); +} + +static bool nfp_flower_check_higher_than_l3(struct flow_rule *rule) +{ + return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP); +} + +static int +nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts, + u32 *key_layer_two, int *key_size, bool ipv6, + struct netlink_ext_ack *extack) +{ + if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY || + (ipv6 && enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY_V6)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: geneve options exceed maximum length"); + return -EOPNOTSUPP; + } + + if (enc_opts->len > 0) { + *key_layer_two |= NFP_FLOWER_LAYER2_GENEVE_OP; + *key_size += sizeof(struct nfp_flower_geneve_options); + } + + return 0; +} + +static int +nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports, + struct flow_dissector_key_enc_opts *enc_op, + u32 *key_layer_two, u8 *key_layer, int *key_size, + struct nfp_flower_priv *priv, + enum nfp_flower_tun_type *tun_type, bool ipv6, + struct netlink_ext_ack *extack) +{ + int err; + + switch (enc_ports->dst) { + case htons(IANA_VXLAN_UDP_PORT): + *tun_type = NFP_FL_TUNNEL_VXLAN; + *key_layer |= NFP_FLOWER_LAYER_VXLAN; + + if (ipv6) { + *key_layer |= NFP_FLOWER_LAYER_EXT_META; + *key_size += sizeof(struct nfp_flower_ext_meta); + *key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6; + *key_size += sizeof(struct nfp_flower_ipv6_udp_tun); + } else { + *key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + } + + if (enc_op) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on vxlan tunnels"); + return -EOPNOTSUPP; + } + break; + case htons(GENEVE_UDP_PORT): + if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve offload"); + return -EOPNOTSUPP; + } + *tun_type = NFP_FL_TUNNEL_GENEVE; + *key_layer |= NFP_FLOWER_LAYER_EXT_META; + *key_size += sizeof(struct nfp_flower_ext_meta); + *key_layer_two |= NFP_FLOWER_LAYER2_GENEVE; + + if (ipv6) { + *key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6; + *key_size += sizeof(struct nfp_flower_ipv6_udp_tun); + } else { + *key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + } + + if (!enc_op) + break; + if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve option offload"); + return -EOPNOTSUPP; + } + err = nfp_flower_calc_opt_layer(enc_op, key_layer_two, key_size, + ipv6, extack); + if (err) + return err; + break; + default: + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel type unknown"); + return -EOPNOTSUPP; + } + + return 0; +} + +int +nfp_flower_calculate_key_layers(struct nfp_app *app, + struct net_device *netdev, + struct nfp_fl_key_ls *ret_key_ls, + struct flow_rule *rule, + enum nfp_flower_tun_type *tun_type, + struct netlink_ext_ack *extack) +{ + struct flow_dissector *dissector = rule->match.dissector; + struct flow_match_basic basic = { NULL, NULL}; + struct nfp_flower_priv *priv = app->priv; + u32 key_layer_two; + u8 key_layer; + int key_size; + int err; + + if (dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match not supported"); + return -EOPNOTSUPP; + } + + /* If any tun dissector is used then the required set must be used. */ + if (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR && + (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R) + != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R && + (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) + != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel match not supported"); + return -EOPNOTSUPP; + } + + key_layer_two = 0; + key_layer = NFP_FLOWER_LAYER_PORT; + key_size = sizeof(struct nfp_flower_meta_tci) + + sizeof(struct nfp_flower_in_port); + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) { + key_layer |= NFP_FLOWER_LAYER_MAC; + key_size += sizeof(struct nfp_flower_mac_mpls); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan vlan; + + flow_rule_match_vlan(rule, &vlan); + if (!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_PCP) && + vlan.key->vlan_priority) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support VLAN PCP offload"); + return -EOPNOTSUPP; + } + if (priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ && + !(key_layer_two & NFP_FLOWER_LAYER2_QINQ)) { + key_layer |= NFP_FLOWER_LAYER_EXT_META; + key_size += sizeof(struct nfp_flower_ext_meta); + key_size += sizeof(struct nfp_flower_vlan); + key_layer_two |= NFP_FLOWER_LAYER2_QINQ; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { + struct flow_match_vlan cvlan; + + if (!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support VLAN QinQ offload"); + return -EOPNOTSUPP; + } + + flow_rule_match_vlan(rule, &cvlan); + if (!(key_layer_two & NFP_FLOWER_LAYER2_QINQ)) { + key_layer |= NFP_FLOWER_LAYER_EXT_META; + key_size += sizeof(struct nfp_flower_ext_meta); + key_size += sizeof(struct nfp_flower_vlan); + key_layer_two |= NFP_FLOWER_LAYER2_QINQ; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_match_enc_opts enc_op = { NULL, NULL }; + struct flow_match_ipv4_addrs ipv4_addrs; + struct flow_match_ipv6_addrs ipv6_addrs; + struct flow_match_control enc_ctl; + struct flow_match_ports enc_ports; + bool ipv6_tun = false; + + flow_rule_match_enc_control(rule, &enc_ctl); + + if (enc_ctl.mask->addr_type != 0xffff) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: wildcarded protocols on tunnels are not supported"); + return -EOPNOTSUPP; + } + + ipv6_tun = enc_ctl.key->addr_type == + FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (ipv6_tun && + !(priv->flower_ext_feats & NFP_FL_FEATS_IPV6_TUN)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: firmware does not support IPv6 tunnels"); + return -EOPNOTSUPP; + } + + if (!ipv6_tun && + enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel address type not IPv4 or IPv6"); + return -EOPNOTSUPP; + } + + if (ipv6_tun) { + flow_rule_match_enc_ipv6_addrs(rule, &ipv6_addrs); + if (memchr_inv(&ipv6_addrs.mask->dst, 0xff, + sizeof(ipv6_addrs.mask->dst))) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv6 destination address is supported"); + return -EOPNOTSUPP; + } + } else { + flow_rule_match_enc_ipv4_addrs(rule, &ipv4_addrs); + if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv4 destination address is supported"); + return -EOPNOTSUPP; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) + flow_rule_match_enc_opts(rule, &enc_op); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + /* Check if GRE, which has no enc_ports */ + if (!netif_is_gretap(netdev) && !netif_is_ip6gretap(netdev)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: an exact match on L4 destination port is required for non-GRE tunnels"); + return -EOPNOTSUPP; + } + + *tun_type = NFP_FL_TUNNEL_GRE; + key_layer |= NFP_FLOWER_LAYER_EXT_META; + key_size += sizeof(struct nfp_flower_ext_meta); + key_layer_two |= NFP_FLOWER_LAYER2_GRE; + + if (ipv6_tun) { + key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6; + key_size += + sizeof(struct nfp_flower_ipv6_gre_tun); + } else { + key_size += + sizeof(struct nfp_flower_ipv4_gre_tun); + } + + if (enc_op.key) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on GRE tunnels"); + return -EOPNOTSUPP; + } + } else { + flow_rule_match_enc_ports(rule, &enc_ports); + if (enc_ports.mask->dst != cpu_to_be16(~0)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match L4 destination port is supported"); + return -EOPNOTSUPP; + } + + err = nfp_flower_calc_udp_tun_layer(enc_ports.key, + enc_op.key, + &key_layer_two, + &key_layer, + &key_size, priv, + tun_type, ipv6_tun, + extack); + if (err) + return err; + + /* Ensure the ingress netdev matches the expected + * tun type. + */ + if (!nfp_fl_netdev_is_tunnel_type(netdev, *tun_type)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: ingress netdev does not match the expected tunnel type"); + return -EOPNOTSUPP; + } + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) + flow_rule_match_basic(rule, &basic); + + if (basic.mask && basic.mask->n_proto) { + /* Ethernet type is present in the key. */ + switch (basic.key->n_proto) { + case cpu_to_be16(ETH_P_IP): + key_layer |= NFP_FLOWER_LAYER_IPV4; + key_size += sizeof(struct nfp_flower_ipv4); + break; + + case cpu_to_be16(ETH_P_IPV6): + key_layer |= NFP_FLOWER_LAYER_IPV6; + key_size += sizeof(struct nfp_flower_ipv6); + break; + + /* Currently we do not offload ARP + * because we rely on it to get to the host. + */ + case cpu_to_be16(ETH_P_ARP): + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: ARP not supported"); + return -EOPNOTSUPP; + + case cpu_to_be16(ETH_P_MPLS_UC): + case cpu_to_be16(ETH_P_MPLS_MC): + if (!(key_layer & NFP_FLOWER_LAYER_MAC)) { + key_layer |= NFP_FLOWER_LAYER_MAC; + key_size += sizeof(struct nfp_flower_mac_mpls); + } + break; + + /* Will be included in layer 2. */ + case cpu_to_be16(ETH_P_8021Q): + break; + + default: + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on given EtherType is not supported"); + return -EOPNOTSUPP; + } + } else if (nfp_flower_check_higher_than_mac(rule)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot match above L2 without specified EtherType"); + return -EOPNOTSUPP; + } + + if (basic.mask && basic.mask->ip_proto) { + switch (basic.key->ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_SCTP: + case IPPROTO_ICMP: + case IPPROTO_ICMPV6: + key_layer |= NFP_FLOWER_LAYER_TP; + key_size += sizeof(struct nfp_flower_tp_ports); + break; + } + } + + if (!(key_layer & NFP_FLOWER_LAYER_TP) && + nfp_flower_check_higher_than_l3(rule)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot match on L4 information without specified IP protocol type"); + return -EOPNOTSUPP; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_match_tcp tcp; + u32 tcp_flags; + + flow_rule_match_tcp(rule, &tcp); + tcp_flags = be16_to_cpu(tcp.key->flags); + + if (tcp_flags & ~NFP_FLOWER_SUPPORTED_TCPFLAGS) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: no match support for selected TCP flags"); + return -EOPNOTSUPP; + } + + /* We only support PSH and URG flags when either + * FIN, SYN or RST is present as well. + */ + if ((tcp_flags & (TCPHDR_PSH | TCPHDR_URG)) && + !(tcp_flags & (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST))) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: PSH and URG is only supported when used with FIN, SYN or RST"); + return -EOPNOTSUPP; + } + + /* We need to store TCP flags in the either the IPv4 or IPv6 key + * space, thus we need to ensure we include a IPv4/IPv6 key + * layer if we have not done so already. + */ + if (!basic.key) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on TCP flags requires a match on L3 protocol"); + return -EOPNOTSUPP; + } + + if (!(key_layer & NFP_FLOWER_LAYER_IPV4) && + !(key_layer & NFP_FLOWER_LAYER_IPV6)) { + switch (basic.key->n_proto) { + case cpu_to_be16(ETH_P_IP): + key_layer |= NFP_FLOWER_LAYER_IPV4; + key_size += sizeof(struct nfp_flower_ipv4); + break; + + case cpu_to_be16(ETH_P_IPV6): + key_layer |= NFP_FLOWER_LAYER_IPV6; + key_size += sizeof(struct nfp_flower_ipv6); + break; + + default: + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on TCP flags requires a match on IPv4/IPv6"); + return -EOPNOTSUPP; + } + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control ctl; + + flow_rule_match_control(rule, &ctl); + if (ctl.key->flags & ~NFP_FLOWER_SUPPORTED_CTLFLAGS) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on unknown control flag"); + return -EOPNOTSUPP; + } + } + + ret_key_ls->key_layer = key_layer; + ret_key_ls->key_layer_two = key_layer_two; + ret_key_ls->key_size = key_size; + + return 0; +} + +struct nfp_fl_payload * +nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer) +{ + struct nfp_fl_payload *flow_pay; + + flow_pay = kmalloc(sizeof(*flow_pay), GFP_KERNEL); + if (!flow_pay) + return NULL; + + flow_pay->meta.key_len = key_layer->key_size; + flow_pay->unmasked_data = kmalloc(key_layer->key_size, GFP_KERNEL); + if (!flow_pay->unmasked_data) + goto err_free_flow; + + flow_pay->meta.mask_len = key_layer->key_size; + flow_pay->mask_data = kmalloc(key_layer->key_size, GFP_KERNEL); + if (!flow_pay->mask_data) + goto err_free_unmasked; + + flow_pay->action_data = kmalloc(NFP_FL_MAX_A_SIZ, GFP_KERNEL); + if (!flow_pay->action_data) + goto err_free_mask; + + flow_pay->nfp_tun_ipv4_addr = 0; + flow_pay->nfp_tun_ipv6 = NULL; + flow_pay->meta.flags = 0; + INIT_LIST_HEAD(&flow_pay->linked_flows); + flow_pay->in_hw = false; + flow_pay->pre_tun_rule.dev = NULL; + + return flow_pay; + +err_free_mask: + kfree(flow_pay->mask_data); +err_free_unmasked: + kfree(flow_pay->unmasked_data); +err_free_flow: + kfree(flow_pay); + return NULL; +} + +static int +nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow, + struct nfp_flower_merge_check *merge, + u8 *last_act_id, int *act_out) +{ + struct nfp_fl_set_ipv6_tc_hl_fl *ipv6_tc_hl_fl; + struct nfp_fl_set_ip4_ttl_tos *ipv4_ttl_tos; + struct nfp_fl_set_ip4_addrs *ipv4_add; + struct nfp_fl_set_ipv6_addr *ipv6_add; + struct nfp_fl_push_vlan *push_vlan; + struct nfp_fl_pre_tunnel *pre_tun; + struct nfp_fl_set_tport *tport; + struct nfp_fl_set_eth *eth; + struct nfp_fl_act_head *a; + unsigned int act_off = 0; + bool ipv6_tun = false; + u8 act_id = 0; + u8 *ports; + int i; + + while (act_off < flow->meta.act_len) { + a = (struct nfp_fl_act_head *)&flow->action_data[act_off]; + act_id = a->jump_id; + + switch (act_id) { + case NFP_FL_ACTION_OPCODE_OUTPUT: + if (act_out) + (*act_out)++; + break; + case NFP_FL_ACTION_OPCODE_PUSH_VLAN: + push_vlan = (struct nfp_fl_push_vlan *)a; + if (push_vlan->vlan_tci) + merge->tci = cpu_to_be16(0xffff); + break; + case NFP_FL_ACTION_OPCODE_POP_VLAN: + merge->tci = cpu_to_be16(0); + break; + case NFP_FL_ACTION_OPCODE_SET_TUNNEL: + /* New tunnel header means l2 to l4 can be matched. */ + eth_broadcast_addr(&merge->l2.mac_dst[0]); + eth_broadcast_addr(&merge->l2.mac_src[0]); + memset(&merge->l4, 0xff, + sizeof(struct nfp_flower_tp_ports)); + if (ipv6_tun) + memset(&merge->ipv6, 0xff, + sizeof(struct nfp_flower_ipv6)); + else + memset(&merge->ipv4, 0xff, + sizeof(struct nfp_flower_ipv4)); + break; + case NFP_FL_ACTION_OPCODE_SET_ETHERNET: + eth = (struct nfp_fl_set_eth *)a; + for (i = 0; i < ETH_ALEN; i++) + merge->l2.mac_dst[i] |= eth->eth_addr_mask[i]; + for (i = 0; i < ETH_ALEN; i++) + merge->l2.mac_src[i] |= + eth->eth_addr_mask[ETH_ALEN + i]; + break; + case NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS: + ipv4_add = (struct nfp_fl_set_ip4_addrs *)a; + merge->ipv4.ipv4_src |= ipv4_add->ipv4_src_mask; + merge->ipv4.ipv4_dst |= ipv4_add->ipv4_dst_mask; + break; + case NFP_FL_ACTION_OPCODE_SET_IPV4_TTL_TOS: + ipv4_ttl_tos = (struct nfp_fl_set_ip4_ttl_tos *)a; + merge->ipv4.ip_ext.ttl |= ipv4_ttl_tos->ipv4_ttl_mask; + merge->ipv4.ip_ext.tos |= ipv4_ttl_tos->ipv4_tos_mask; + break; + case NFP_FL_ACTION_OPCODE_SET_IPV6_SRC: + ipv6_add = (struct nfp_fl_set_ipv6_addr *)a; + for (i = 0; i < 4; i++) + merge->ipv6.ipv6_src.in6_u.u6_addr32[i] |= + ipv6_add->ipv6[i].mask; + break; + case NFP_FL_ACTION_OPCODE_SET_IPV6_DST: + ipv6_add = (struct nfp_fl_set_ipv6_addr *)a; + for (i = 0; i < 4; i++) + merge->ipv6.ipv6_dst.in6_u.u6_addr32[i] |= + ipv6_add->ipv6[i].mask; + break; + case NFP_FL_ACTION_OPCODE_SET_IPV6_TC_HL_FL: + ipv6_tc_hl_fl = (struct nfp_fl_set_ipv6_tc_hl_fl *)a; + merge->ipv6.ip_ext.ttl |= + ipv6_tc_hl_fl->ipv6_hop_limit_mask; + merge->ipv6.ip_ext.tos |= ipv6_tc_hl_fl->ipv6_tc_mask; + merge->ipv6.ipv6_flow_label_exthdr |= + ipv6_tc_hl_fl->ipv6_label_mask; + break; + case NFP_FL_ACTION_OPCODE_SET_UDP: + case NFP_FL_ACTION_OPCODE_SET_TCP: + tport = (struct nfp_fl_set_tport *)a; + ports = (u8 *)&merge->l4.port_src; + for (i = 0; i < 4; i++) + ports[i] |= tport->tp_port_mask[i]; + break; + case NFP_FL_ACTION_OPCODE_PRE_TUNNEL: + pre_tun = (struct nfp_fl_pre_tunnel *)a; + ipv6_tun = be16_to_cpu(pre_tun->flags) & + NFP_FL_PRE_TUN_IPV6; + break; + case NFP_FL_ACTION_OPCODE_PRE_LAG: + case NFP_FL_ACTION_OPCODE_PUSH_GENEVE: + break; + default: + return -EOPNOTSUPP; + } + + act_off += a->len_lw << NFP_FL_LW_SIZ; + } + + if (last_act_id) + *last_act_id = act_id; + + return 0; +} + +static int +nfp_flower_populate_merge_match(struct nfp_fl_payload *flow, + struct nfp_flower_merge_check *merge, + bool extra_fields) +{ + struct nfp_flower_meta_tci *meta_tci; + u8 *mask = flow->mask_data; + u8 key_layer, match_size; + + memset(merge, 0, sizeof(struct nfp_flower_merge_check)); + + meta_tci = (struct nfp_flower_meta_tci *)mask; + key_layer = meta_tci->nfp_flow_key_layer; + + if (key_layer & ~NFP_FLOWER_MERGE_FIELDS && !extra_fields) + return -EOPNOTSUPP; + + merge->tci = meta_tci->tci; + mask += sizeof(struct nfp_flower_meta_tci); + + if (key_layer & NFP_FLOWER_LAYER_EXT_META) + mask += sizeof(struct nfp_flower_ext_meta); + + mask += sizeof(struct nfp_flower_in_port); + + if (key_layer & NFP_FLOWER_LAYER_MAC) { + match_size = sizeof(struct nfp_flower_mac_mpls); + memcpy(&merge->l2, mask, match_size); + mask += match_size; + } + + if (key_layer & NFP_FLOWER_LAYER_TP) { + match_size = sizeof(struct nfp_flower_tp_ports); + memcpy(&merge->l4, mask, match_size); + mask += match_size; + } + + if (key_layer & NFP_FLOWER_LAYER_IPV4) { + match_size = sizeof(struct nfp_flower_ipv4); + memcpy(&merge->ipv4, mask, match_size); + } + + if (key_layer & NFP_FLOWER_LAYER_IPV6) { + match_size = sizeof(struct nfp_flower_ipv6); + memcpy(&merge->ipv6, mask, match_size); + } + + return 0; +} + +static int +nfp_flower_can_merge(struct nfp_fl_payload *sub_flow1, + struct nfp_fl_payload *sub_flow2) +{ + /* Two flows can be merged if sub_flow2 only matches on bits that are + * either matched by sub_flow1 or set by a sub_flow1 action. This + * ensures that every packet that hits sub_flow1 and recirculates is + * guaranteed to hit sub_flow2. + */ + struct nfp_flower_merge_check sub_flow1_merge, sub_flow2_merge; + int err, act_out = 0; + u8 last_act_id = 0; + + err = nfp_flower_populate_merge_match(sub_flow1, &sub_flow1_merge, + true); + if (err) + return err; + + err = nfp_flower_populate_merge_match(sub_flow2, &sub_flow2_merge, + false); + if (err) + return err; + + err = nfp_flower_update_merge_with_actions(sub_flow1, &sub_flow1_merge, + &last_act_id, &act_out); + if (err) + return err; + + /* Must only be 1 output action and it must be the last in sequence. */ + if (act_out != 1 || last_act_id != NFP_FL_ACTION_OPCODE_OUTPUT) + return -EOPNOTSUPP; + + /* Reject merge if sub_flow2 matches on something that is not matched + * on or set in an action by sub_flow1. + */ + err = bitmap_andnot(sub_flow2_merge.vals, sub_flow2_merge.vals, + sub_flow1_merge.vals, + sizeof(struct nfp_flower_merge_check) * 8); + if (err) + return -EINVAL; + + return 0; +} + +static unsigned int +nfp_flower_copy_pre_actions(char *act_dst, char *act_src, int len, + bool *tunnel_act) +{ + unsigned int act_off = 0, act_len; + struct nfp_fl_act_head *a; + u8 act_id = 0; + + while (act_off < len) { + a = (struct nfp_fl_act_head *)&act_src[act_off]; + act_len = a->len_lw << NFP_FL_LW_SIZ; + act_id = a->jump_id; + + switch (act_id) { + case NFP_FL_ACTION_OPCODE_PRE_TUNNEL: + if (tunnel_act) + *tunnel_act = true; + fallthrough; + case NFP_FL_ACTION_OPCODE_PRE_LAG: + memcpy(act_dst + act_off, act_src + act_off, act_len); + break; + default: + return act_off; + } + + act_off += act_len; + } + + return act_off; +} + +static int +nfp_fl_verify_post_tun_acts(char *acts, int len, struct nfp_fl_push_vlan **vlan) +{ + struct nfp_fl_act_head *a; + unsigned int act_off = 0; + + while (act_off < len) { + a = (struct nfp_fl_act_head *)&acts[act_off]; + + if (a->jump_id == NFP_FL_ACTION_OPCODE_PUSH_VLAN && !act_off) + *vlan = (struct nfp_fl_push_vlan *)a; + else if (a->jump_id != NFP_FL_ACTION_OPCODE_OUTPUT) + return -EOPNOTSUPP; + + act_off += a->len_lw << NFP_FL_LW_SIZ; + } + + /* Ensure any VLAN push also has an egress action. */ + if (*vlan && act_off <= sizeof(struct nfp_fl_push_vlan)) + return -EOPNOTSUPP; + + return 0; +} + +static int +nfp_fl_push_vlan_after_tun(char *acts, int len, struct nfp_fl_push_vlan *vlan) +{ + struct nfp_fl_set_tun *tun; + struct nfp_fl_act_head *a; + unsigned int act_off = 0; + + while (act_off < len) { + a = (struct nfp_fl_act_head *)&acts[act_off]; + + if (a->jump_id == NFP_FL_ACTION_OPCODE_SET_TUNNEL) { + tun = (struct nfp_fl_set_tun *)a; + tun->outer_vlan_tpid = vlan->vlan_tpid; + tun->outer_vlan_tci = vlan->vlan_tci; + + return 0; + } + + act_off += a->len_lw << NFP_FL_LW_SIZ; + } + + /* Return error if no tunnel action is found. */ + return -EOPNOTSUPP; +} + +static int +nfp_flower_merge_action(struct nfp_fl_payload *sub_flow1, + struct nfp_fl_payload *sub_flow2, + struct nfp_fl_payload *merge_flow) +{ + unsigned int sub1_act_len, sub2_act_len, pre_off1, pre_off2; + struct nfp_fl_push_vlan *post_tun_push_vlan = NULL; + bool tunnel_act = false; + char *merge_act; + int err; + + /* The last action of sub_flow1 must be output - do not merge this. */ + sub1_act_len = sub_flow1->meta.act_len - sizeof(struct nfp_fl_output); + sub2_act_len = sub_flow2->meta.act_len; + + if (!sub2_act_len) + return -EINVAL; + + if (sub1_act_len + sub2_act_len > NFP_FL_MAX_A_SIZ) + return -EINVAL; + + /* A shortcut can only be applied if there is a single action. */ + if (sub1_act_len) + merge_flow->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + else + merge_flow->meta.shortcut = sub_flow2->meta.shortcut; + + merge_flow->meta.act_len = sub1_act_len + sub2_act_len; + merge_act = merge_flow->action_data; + + /* Copy any pre-actions to the start of merge flow action list. */ + pre_off1 = nfp_flower_copy_pre_actions(merge_act, + sub_flow1->action_data, + sub1_act_len, &tunnel_act); + merge_act += pre_off1; + sub1_act_len -= pre_off1; + pre_off2 = nfp_flower_copy_pre_actions(merge_act, + sub_flow2->action_data, + sub2_act_len, NULL); + merge_act += pre_off2; + sub2_act_len -= pre_off2; + + /* FW does a tunnel push when egressing, therefore, if sub_flow 1 pushes + * a tunnel, there are restrictions on what sub_flow 2 actions lead to a + * valid merge. + */ + if (tunnel_act) { + char *post_tun_acts = &sub_flow2->action_data[pre_off2]; + + err = nfp_fl_verify_post_tun_acts(post_tun_acts, sub2_act_len, + &post_tun_push_vlan); + if (err) + return err; + + if (post_tun_push_vlan) { + pre_off2 += sizeof(*post_tun_push_vlan); + sub2_act_len -= sizeof(*post_tun_push_vlan); + } + } + + /* Copy remaining actions from sub_flows 1 and 2. */ + memcpy(merge_act, sub_flow1->action_data + pre_off1, sub1_act_len); + + if (post_tun_push_vlan) { + /* Update tunnel action in merge to include VLAN push. */ + err = nfp_fl_push_vlan_after_tun(merge_act, sub1_act_len, + post_tun_push_vlan); + if (err) + return err; + + merge_flow->meta.act_len -= sizeof(*post_tun_push_vlan); + } + + merge_act += sub1_act_len; + memcpy(merge_act, sub_flow2->action_data + pre_off2, sub2_act_len); + + return 0; +} + +/* Flow link code should only be accessed under RTNL. */ +static void nfp_flower_unlink_flow(struct nfp_fl_payload_link *link) +{ + list_del(&link->merge_flow.list); + list_del(&link->sub_flow.list); + kfree(link); +} + +static void nfp_flower_unlink_flows(struct nfp_fl_payload *merge_flow, + struct nfp_fl_payload *sub_flow) +{ + struct nfp_fl_payload_link *link; + + list_for_each_entry(link, &merge_flow->linked_flows, merge_flow.list) + if (link->sub_flow.flow == sub_flow) { + nfp_flower_unlink_flow(link); + return; + } +} + +static int nfp_flower_link_flows(struct nfp_fl_payload *merge_flow, + struct nfp_fl_payload *sub_flow) +{ + struct nfp_fl_payload_link *link; + + link = kmalloc(sizeof(*link), GFP_KERNEL); + if (!link) + return -ENOMEM; + + link->merge_flow.flow = merge_flow; + list_add_tail(&link->merge_flow.list, &merge_flow->linked_flows); + link->sub_flow.flow = sub_flow; + list_add_tail(&link->sub_flow.list, &sub_flow->linked_flows); + + return 0; +} + +/** + * nfp_flower_merge_offloaded_flows() - Merge 2 existing flows to single flow. + * @app: Pointer to the APP handle + * @sub_flow1: Initial flow matched to produce merge hint + * @sub_flow2: Post recirculation flow matched in merge hint + * + * Combines 2 flows (if valid) to a single flow, removing the initial from hw + * and offloading the new, merged flow. + * + * Return: negative value on error, 0 in success. + */ +int nfp_flower_merge_offloaded_flows(struct nfp_app *app, + struct nfp_fl_payload *sub_flow1, + struct nfp_fl_payload *sub_flow2) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_payload *merge_flow; + struct nfp_fl_key_ls merge_key_ls; + struct nfp_merge_info *merge_info; + u64 parent_ctx = 0; + int err; + + if (sub_flow1 == sub_flow2 || + nfp_flower_is_merge_flow(sub_flow1) || + nfp_flower_is_merge_flow(sub_flow2)) + return -EINVAL; + + /* Check if the two flows are already merged */ + parent_ctx = (u64)(be32_to_cpu(sub_flow1->meta.host_ctx_id)) << 32; + parent_ctx |= (u64)(be32_to_cpu(sub_flow2->meta.host_ctx_id)); + if (rhashtable_lookup_fast(&priv->merge_table, + &parent_ctx, merge_table_params)) { + nfp_flower_cmsg_warn(app, "The two flows are already merged.\n"); + return 0; + } + + err = nfp_flower_can_merge(sub_flow1, sub_flow2); + if (err) + return err; + + merge_key_ls.key_size = sub_flow1->meta.key_len; + + merge_flow = nfp_flower_allocate_new(&merge_key_ls); + if (!merge_flow) + return -ENOMEM; + + merge_flow->tc_flower_cookie = (unsigned long)merge_flow; + merge_flow->ingress_dev = sub_flow1->ingress_dev; + + memcpy(merge_flow->unmasked_data, sub_flow1->unmasked_data, + sub_flow1->meta.key_len); + memcpy(merge_flow->mask_data, sub_flow1->mask_data, + sub_flow1->meta.mask_len); + + err = nfp_flower_merge_action(sub_flow1, sub_flow2, merge_flow); + if (err) + goto err_destroy_merge_flow; + + err = nfp_flower_link_flows(merge_flow, sub_flow1); + if (err) + goto err_destroy_merge_flow; + + err = nfp_flower_link_flows(merge_flow, sub_flow2); + if (err) + goto err_unlink_sub_flow1; + + err = nfp_compile_flow_metadata(app, merge_flow->tc_flower_cookie, merge_flow, + merge_flow->ingress_dev, NULL); + if (err) + goto err_unlink_sub_flow2; + + err = rhashtable_insert_fast(&priv->flow_table, &merge_flow->fl_node, + nfp_flower_table_params); + if (err) + goto err_release_metadata; + + merge_info = kmalloc(sizeof(*merge_info), GFP_KERNEL); + if (!merge_info) { + err = -ENOMEM; + goto err_remove_rhash; + } + merge_info->parent_ctx = parent_ctx; + err = rhashtable_insert_fast(&priv->merge_table, &merge_info->ht_node, + merge_table_params); + if (err) + goto err_destroy_merge_info; + + err = nfp_flower_xmit_flow(app, merge_flow, + NFP_FLOWER_CMSG_TYPE_FLOW_MOD); + if (err) + goto err_remove_merge_info; + + merge_flow->in_hw = true; + sub_flow1->in_hw = false; + + return 0; + +err_remove_merge_info: + WARN_ON_ONCE(rhashtable_remove_fast(&priv->merge_table, + &merge_info->ht_node, + merge_table_params)); +err_destroy_merge_info: + kfree(merge_info); +err_remove_rhash: + WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, + &merge_flow->fl_node, + nfp_flower_table_params)); +err_release_metadata: + nfp_modify_flow_metadata(app, merge_flow); +err_unlink_sub_flow2: + nfp_flower_unlink_flows(merge_flow, sub_flow2); +err_unlink_sub_flow1: + nfp_flower_unlink_flows(merge_flow, sub_flow1); +err_destroy_merge_flow: + kfree(merge_flow->action_data); + kfree(merge_flow->mask_data); + kfree(merge_flow->unmasked_data); + kfree(merge_flow); + return err; +} + +/** + * nfp_flower_validate_pre_tun_rule() + * @app: Pointer to the APP handle + * @flow: Pointer to NFP flow representation of rule + * @key_ls: Pointer to NFP key layers structure + * @extack: Netlink extended ACK report + * + * Verifies the flow as a pre-tunnel rule. + * + * Return: negative value on error, 0 if verified. + */ +static int +nfp_flower_validate_pre_tun_rule(struct nfp_app *app, + struct nfp_fl_payload *flow, + struct nfp_fl_key_ls *key_ls, + struct netlink_ext_ack *extack) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_flower_meta_tci *meta_tci; + struct nfp_flower_mac_mpls *mac; + u8 *ext = flow->unmasked_data; + struct nfp_fl_act_head *act; + u8 *mask = flow->mask_data; + bool vlan = false; + int act_offset; + u8 key_layer; + + meta_tci = (struct nfp_flower_meta_tci *)flow->unmasked_data; + key_layer = key_ls->key_layer; + if (!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ)) { + if (meta_tci->tci & cpu_to_be16(NFP_FLOWER_MASK_VLAN_PRESENT)) { + u16 vlan_tci = be16_to_cpu(meta_tci->tci); + + vlan_tci &= ~NFP_FLOWER_MASK_VLAN_PRESENT; + flow->pre_tun_rule.vlan_tci = cpu_to_be16(vlan_tci); + vlan = true; + } else { + flow->pre_tun_rule.vlan_tci = cpu_to_be16(0xffff); + } + } + + if (key_layer & ~NFP_FLOWER_PRE_TUN_RULE_FIELDS) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: too many match fields"); + return -EOPNOTSUPP; + } else if (key_ls->key_layer_two & ~NFP_FLOWER_LAYER2_QINQ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: non-vlan in extended match fields"); + return -EOPNOTSUPP; + } + + if (!(key_layer & NFP_FLOWER_LAYER_MAC)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: MAC fields match required"); + return -EOPNOTSUPP; + } + + if (!(key_layer & NFP_FLOWER_LAYER_IPV4) && + !(key_layer & NFP_FLOWER_LAYER_IPV6)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: match on ipv4/ipv6 eth_type must be present"); + return -EOPNOTSUPP; + } + + if (key_layer & NFP_FLOWER_LAYER_IPV6) + flow->pre_tun_rule.is_ipv6 = true; + else + flow->pre_tun_rule.is_ipv6 = false; + + /* Skip fields known to exist. */ + mask += sizeof(struct nfp_flower_meta_tci); + ext += sizeof(struct nfp_flower_meta_tci); + if (key_ls->key_layer_two) { + mask += sizeof(struct nfp_flower_ext_meta); + ext += sizeof(struct nfp_flower_ext_meta); + } + mask += sizeof(struct nfp_flower_in_port); + ext += sizeof(struct nfp_flower_in_port); + + /* Ensure destination MAC address is fully matched. */ + mac = (struct nfp_flower_mac_mpls *)mask; + if (!is_broadcast_ether_addr(&mac->mac_dst[0])) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: dest MAC field must not be masked"); + return -EOPNOTSUPP; + } + + /* Ensure source MAC address is fully matched. This is only needed + * for firmware with the DECAP_V2 feature enabled. Don't do this + * for firmware without this feature to keep old behaviour. + */ + if (priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2) { + mac = (struct nfp_flower_mac_mpls *)mask; + if (!is_broadcast_ether_addr(&mac->mac_src[0])) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported pre-tunnel rule: source MAC field must not be masked"); + return -EOPNOTSUPP; + } + } + + if (mac->mpls_lse) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: MPLS not supported"); + return -EOPNOTSUPP; + } + + /* Ensure destination MAC address matches pre_tun_dev. */ + mac = (struct nfp_flower_mac_mpls *)ext; + if (memcmp(&mac->mac_dst[0], flow->pre_tun_rule.dev->dev_addr, 6)) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported pre-tunnel rule: dest MAC must match output dev MAC"); + return -EOPNOTSUPP; + } + + /* Save mac addresses in pre_tun_rule entry for later use */ + memcpy(&flow->pre_tun_rule.loc_mac, &mac->mac_dst[0], ETH_ALEN); + memcpy(&flow->pre_tun_rule.rem_mac, &mac->mac_src[0], ETH_ALEN); + + mask += sizeof(struct nfp_flower_mac_mpls); + ext += sizeof(struct nfp_flower_mac_mpls); + if (key_layer & NFP_FLOWER_LAYER_IPV4 || + key_layer & NFP_FLOWER_LAYER_IPV6) { + /* Flags and proto fields have same offset in IPv4 and IPv6. */ + int ip_flags = offsetof(struct nfp_flower_ipv4, ip_ext.flags); + int ip_proto = offsetof(struct nfp_flower_ipv4, ip_ext.proto); + int size; + int i; + + size = key_layer & NFP_FLOWER_LAYER_IPV4 ? + sizeof(struct nfp_flower_ipv4) : + sizeof(struct nfp_flower_ipv6); + + + /* Ensure proto and flags are the only IP layer fields. */ + for (i = 0; i < size; i++) + if (mask[i] && i != ip_flags && i != ip_proto) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: only flags and proto can be matched in ip header"); + return -EOPNOTSUPP; + } + ext += size; + mask += size; + } + + if ((priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ)) { + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_QINQ) { + struct nfp_flower_vlan *vlan_tags; + u16 vlan_tpid; + u16 vlan_tci; + + vlan_tags = (struct nfp_flower_vlan *)ext; + + vlan_tci = be16_to_cpu(vlan_tags->outer_tci); + vlan_tpid = be16_to_cpu(vlan_tags->outer_tpid); + + vlan_tci &= ~NFP_FLOWER_MASK_VLAN_PRESENT; + flow->pre_tun_rule.vlan_tci = cpu_to_be16(vlan_tci); + flow->pre_tun_rule.vlan_tpid = cpu_to_be16(vlan_tpid); + vlan = true; + } else { + flow->pre_tun_rule.vlan_tci = cpu_to_be16(0xffff); + flow->pre_tun_rule.vlan_tpid = cpu_to_be16(0xffff); + } + } + + /* Action must be a single egress or pop_vlan and egress. */ + act_offset = 0; + act = (struct nfp_fl_act_head *)&flow->action_data[act_offset]; + if (vlan) { + if (act->jump_id != NFP_FL_ACTION_OPCODE_POP_VLAN) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: match on VLAN must have VLAN pop as first action"); + return -EOPNOTSUPP; + } + + act_offset += act->len_lw << NFP_FL_LW_SIZ; + act = (struct nfp_fl_act_head *)&flow->action_data[act_offset]; + } + + if (act->jump_id != NFP_FL_ACTION_OPCODE_OUTPUT) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: non egress action detected where egress was expected"); + return -EOPNOTSUPP; + } + + act_offset += act->len_lw << NFP_FL_LW_SIZ; + + /* Ensure there are no more actions after egress. */ + if (act_offset != flow->meta.act_len) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: egress is not the last action"); + return -EOPNOTSUPP; + } + + return 0; +} + +static bool offload_pre_check(struct flow_cls_offload *flow) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); + struct flow_dissector *dissector = rule->match.dissector; + struct flow_match_ct ct; + + if (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CT)) { + flow_rule_match_ct(rule, &ct); + /* Allow special case where CT match is all 0 */ + if (memchr_inv(ct.key, 0, sizeof(*ct.key))) + return false; + } + + if (flow->common.chain_index) + return false; + + return true; +} + +/** + * nfp_flower_add_offload() - Adds a new flow to hardware. + * @app: Pointer to the APP handle + * @netdev: netdev structure. + * @flow: TC flower classifier offload structure. + * + * Adds a new flow to the repeated hash structure and action payload. + * + * Return: negative value on error, 0 if configured successfully. + */ +static int +nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, + struct flow_cls_offload *flow) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); + enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE; + struct nfp_flower_priv *priv = app->priv; + struct netlink_ext_ack *extack = NULL; + struct nfp_fl_payload *flow_pay; + struct nfp_fl_key_ls *key_layer; + struct nfp_port *port = NULL; + int err; + + extack = flow->common.extack; + if (nfp_netdev_is_nfp_repr(netdev)) + port = nfp_port_from_netdev(netdev); + + if (is_pre_ct_flow(flow)) + return nfp_fl_ct_handle_pre_ct(priv, netdev, flow, extack); + + if (is_post_ct_flow(flow)) + return nfp_fl_ct_handle_post_ct(priv, netdev, flow, extack); + + if (!offload_pre_check(flow)) + return -EOPNOTSUPP; + + key_layer = kmalloc(sizeof(*key_layer), GFP_KERNEL); + if (!key_layer) + return -ENOMEM; + + err = nfp_flower_calculate_key_layers(app, netdev, key_layer, rule, + &tun_type, extack); + if (err) + goto err_free_key_ls; + + flow_pay = nfp_flower_allocate_new(key_layer); + if (!flow_pay) { + err = -ENOMEM; + goto err_free_key_ls; + } + + err = nfp_flower_compile_flow_match(app, rule, key_layer, netdev, + flow_pay, tun_type, extack); + if (err) + goto err_destroy_flow; + + err = nfp_flower_compile_action(app, rule, netdev, flow_pay, extack); + if (err) + goto err_destroy_flow; + + if (flow_pay->pre_tun_rule.dev) { + err = nfp_flower_validate_pre_tun_rule(app, flow_pay, key_layer, extack); + if (err) + goto err_destroy_flow; + } + + err = nfp_compile_flow_metadata(app, flow->cookie, flow_pay, netdev, extack); + if (err) + goto err_destroy_flow; + + flow_pay->tc_flower_cookie = flow->cookie; + err = rhashtable_insert_fast(&priv->flow_table, &flow_pay->fl_node, + nfp_flower_table_params); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot insert flow into tables for offloads"); + goto err_release_metadata; + } + + if (flow_pay->pre_tun_rule.dev) { + if (priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2) { + struct nfp_predt_entry *predt; + + predt = kzalloc(sizeof(*predt), GFP_KERNEL); + if (!predt) { + err = -ENOMEM; + goto err_remove_rhash; + } + predt->flow_pay = flow_pay; + INIT_LIST_HEAD(&predt->nn_list); + spin_lock_bh(&priv->predt_lock); + list_add(&predt->list_head, &priv->predt_list); + flow_pay->pre_tun_rule.predt = predt; + nfp_tun_link_and_update_nn_entries(app, predt); + spin_unlock_bh(&priv->predt_lock); + } else { + err = nfp_flower_xmit_pre_tun_flow(app, flow_pay); + } + } else { + err = nfp_flower_xmit_flow(app, flow_pay, + NFP_FLOWER_CMSG_TYPE_FLOW_ADD); + } + + if (err) + goto err_remove_rhash; + + if (port) + port->tc_offload_cnt++; + + flow_pay->in_hw = true; + + /* Deallocate flow payload when flower rule has been destroyed. */ + kfree(key_layer); + + return 0; + +err_remove_rhash: + WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, + &flow_pay->fl_node, + nfp_flower_table_params)); +err_release_metadata: + nfp_modify_flow_metadata(app, flow_pay); +err_destroy_flow: + if (flow_pay->nfp_tun_ipv6) + nfp_tunnel_put_ipv6_off(app, flow_pay->nfp_tun_ipv6); + kfree(flow_pay->action_data); + kfree(flow_pay->mask_data); + kfree(flow_pay->unmasked_data); + kfree(flow_pay); +err_free_key_ls: + kfree(key_layer); + return err; +} + +static void +nfp_flower_remove_merge_flow(struct nfp_app *app, + struct nfp_fl_payload *del_sub_flow, + struct nfp_fl_payload *merge_flow) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_payload_link *link, *temp; + struct nfp_merge_info *merge_info; + struct nfp_fl_payload *origin; + u64 parent_ctx = 0; + bool mod = false; + int err; + + link = list_first_entry(&merge_flow->linked_flows, + struct nfp_fl_payload_link, merge_flow.list); + origin = link->sub_flow.flow; + + /* Re-add rule the merge had overwritten if it has not been deleted. */ + if (origin != del_sub_flow) + mod = true; + + err = nfp_modify_flow_metadata(app, merge_flow); + if (err) { + nfp_flower_cmsg_warn(app, "Metadata fail for merge flow delete.\n"); + goto err_free_links; + } + + if (!mod) { + err = nfp_flower_xmit_flow(app, merge_flow, + NFP_FLOWER_CMSG_TYPE_FLOW_DEL); + if (err) { + nfp_flower_cmsg_warn(app, "Failed to delete merged flow.\n"); + goto err_free_links; + } + } else { + __nfp_modify_flow_metadata(priv, origin); + err = nfp_flower_xmit_flow(app, origin, + NFP_FLOWER_CMSG_TYPE_FLOW_MOD); + if (err) + nfp_flower_cmsg_warn(app, "Failed to revert merge flow.\n"); + origin->in_hw = true; + } + +err_free_links: + /* Clean any links connected with the merged flow. */ + list_for_each_entry_safe(link, temp, &merge_flow->linked_flows, + merge_flow.list) { + u32 ctx_id = be32_to_cpu(link->sub_flow.flow->meta.host_ctx_id); + + parent_ctx = (parent_ctx << 32) | (u64)(ctx_id); + nfp_flower_unlink_flow(link); + } + + merge_info = rhashtable_lookup_fast(&priv->merge_table, + &parent_ctx, + merge_table_params); + if (merge_info) { + WARN_ON_ONCE(rhashtable_remove_fast(&priv->merge_table, + &merge_info->ht_node, + merge_table_params)); + kfree(merge_info); + } + + kfree(merge_flow->action_data); + kfree(merge_flow->mask_data); + kfree(merge_flow->unmasked_data); + WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, + &merge_flow->fl_node, + nfp_flower_table_params)); + kfree_rcu(merge_flow, rcu); +} + +void +nfp_flower_del_linked_merge_flows(struct nfp_app *app, + struct nfp_fl_payload *sub_flow) +{ + struct nfp_fl_payload_link *link, *temp; + + /* Remove any merge flow formed from the deleted sub_flow. */ + list_for_each_entry_safe(link, temp, &sub_flow->linked_flows, + sub_flow.list) + nfp_flower_remove_merge_flow(app, sub_flow, + link->merge_flow.flow); +} + +/** + * nfp_flower_del_offload() - Removes a flow from hardware. + * @app: Pointer to the APP handle + * @netdev: netdev structure. + * @flow: TC flower classifier offload structure + * + * Removes a flow from the repeated hash structure and clears the + * action payload. Any flows merged from this are also deleted. + * + * Return: negative value on error, 0 if removed successfully. + */ +static int +nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev, + struct flow_cls_offload *flow) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_ct_map_entry *ct_map_ent; + struct netlink_ext_ack *extack = NULL; + struct nfp_fl_payload *nfp_flow; + struct nfp_port *port = NULL; + int err; + + extack = flow->common.extack; + if (nfp_netdev_is_nfp_repr(netdev)) + port = nfp_port_from_netdev(netdev); + + /* Check ct_map_table */ + ct_map_ent = rhashtable_lookup_fast(&priv->ct_map_table, &flow->cookie, + nfp_ct_map_params); + if (ct_map_ent) { + err = nfp_fl_ct_del_flow(ct_map_ent); + return err; + } + + nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, netdev); + if (!nfp_flow) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot remove flow that does not exist"); + return -ENOENT; + } + + err = nfp_modify_flow_metadata(app, nfp_flow); + if (err) + goto err_free_merge_flow; + + if (nfp_flow->nfp_tun_ipv4_addr) + nfp_tunnel_del_ipv4_off(app, nfp_flow->nfp_tun_ipv4_addr); + + if (nfp_flow->nfp_tun_ipv6) + nfp_tunnel_put_ipv6_off(app, nfp_flow->nfp_tun_ipv6); + + if (!nfp_flow->in_hw) { + err = 0; + goto err_free_merge_flow; + } + + if (nfp_flow->pre_tun_rule.dev) { + if (priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2) { + struct nfp_predt_entry *predt; + + predt = nfp_flow->pre_tun_rule.predt; + if (predt) { + spin_lock_bh(&priv->predt_lock); + nfp_tun_unlink_and_update_nn_entries(app, predt); + list_del(&predt->list_head); + spin_unlock_bh(&priv->predt_lock); + kfree(predt); + } + } else { + err = nfp_flower_xmit_pre_tun_del_flow(app, nfp_flow); + } + } else { + err = nfp_flower_xmit_flow(app, nfp_flow, + NFP_FLOWER_CMSG_TYPE_FLOW_DEL); + } + /* Fall through on error. */ + +err_free_merge_flow: + nfp_flower_del_linked_merge_flows(app, nfp_flow); + if (port) + port->tc_offload_cnt--; + kfree(nfp_flow->action_data); + kfree(nfp_flow->mask_data); + kfree(nfp_flow->unmasked_data); + WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, + &nfp_flow->fl_node, + nfp_flower_table_params)); + kfree_rcu(nfp_flow, rcu); + return err; +} + +static void +__nfp_flower_update_merge_stats(struct nfp_app *app, + struct nfp_fl_payload *merge_flow) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_payload_link *link; + struct nfp_fl_payload *sub_flow; + u64 pkts, bytes, used; + u32 ctx_id; + + ctx_id = be32_to_cpu(merge_flow->meta.host_ctx_id); + pkts = priv->stats[ctx_id].pkts; + /* Do not cycle subflows if no stats to distribute. */ + if (!pkts) + return; + bytes = priv->stats[ctx_id].bytes; + used = priv->stats[ctx_id].used; + + /* Reset stats for the merge flow. */ + priv->stats[ctx_id].pkts = 0; + priv->stats[ctx_id].bytes = 0; + + /* The merge flow has received stats updates from firmware. + * Distribute these stats to all subflows that form the merge. + * The stats will collected from TC via the subflows. + */ + list_for_each_entry(link, &merge_flow->linked_flows, merge_flow.list) { + sub_flow = link->sub_flow.flow; + ctx_id = be32_to_cpu(sub_flow->meta.host_ctx_id); + priv->stats[ctx_id].pkts += pkts; + priv->stats[ctx_id].bytes += bytes; + priv->stats[ctx_id].used = max_t(u64, used, + priv->stats[ctx_id].used); + } +} + +void +nfp_flower_update_merge_stats(struct nfp_app *app, + struct nfp_fl_payload *sub_flow) +{ + struct nfp_fl_payload_link *link; + + /* Get merge flows that the subflow forms to distribute their stats. */ + list_for_each_entry(link, &sub_flow->linked_flows, sub_flow.list) + __nfp_flower_update_merge_stats(app, link->merge_flow.flow); +} + +/** + * nfp_flower_get_stats() - Populates flow stats obtained from hardware. + * @app: Pointer to the APP handle + * @netdev: Netdev structure. + * @flow: TC flower classifier offload structure + * + * Populates a flow statistics structure which which corresponds to a + * specific flow. + * + * Return: negative value on error, 0 if stats populated successfully. + */ +static int +nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev, + struct flow_cls_offload *flow) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_ct_map_entry *ct_map_ent; + struct netlink_ext_ack *extack = NULL; + struct nfp_fl_payload *nfp_flow; + u32 ctx_id; + + /* Check ct_map table first */ + ct_map_ent = rhashtable_lookup_fast(&priv->ct_map_table, &flow->cookie, + nfp_ct_map_params); + if (ct_map_ent) + return nfp_fl_ct_stats(flow, ct_map_ent); + + extack = flow->common.extack; + nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, netdev); + if (!nfp_flow) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot dump stats for flow that does not exist"); + return -EINVAL; + } + + ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id); + + spin_lock_bh(&priv->stats_lock); + /* If request is for a sub_flow, update stats from merged flows. */ + if (!list_empty(&nfp_flow->linked_flows)) + nfp_flower_update_merge_stats(app, nfp_flow); + + flow_stats_update(&flow->stats, priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + + priv->stats[ctx_id].pkts = 0; + priv->stats[ctx_id].bytes = 0; + spin_unlock_bh(&priv->stats_lock); + + return 0; +} + +static int +nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev, + struct flow_cls_offload *flower) +{ + struct nfp_flower_priv *priv = app->priv; + int ret; + + if (!eth_proto_is_802_3(flower->common.protocol)) + return -EOPNOTSUPP; + + mutex_lock(&priv->nfp_fl_lock); + switch (flower->command) { + case FLOW_CLS_REPLACE: + ret = nfp_flower_add_offload(app, netdev, flower); + break; + case FLOW_CLS_DESTROY: + ret = nfp_flower_del_offload(app, netdev, flower); + break; + case FLOW_CLS_STATS: + ret = nfp_flower_get_stats(app, netdev, flower); + break; + default: + ret = -EOPNOTSUPP; + break; + } + mutex_unlock(&priv->nfp_fl_lock); + + return ret; +} + +static int nfp_flower_setup_tc_block_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + struct flow_cls_common_offload *common = type_data; + struct nfp_repr *repr = cb_priv; + + if (!tc_can_offload_extack(repr->netdev, common->extack)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return nfp_flower_repr_offload(repr->app, repr->netdev, + type_data); + case TC_SETUP_CLSMATCHALL: + return nfp_flower_setup_qos_offload(repr->app, repr->netdev, + type_data); + default: + return -EOPNOTSUPP; + } +} + +static LIST_HEAD(nfp_block_cb_list); + +static int nfp_flower_setup_tc_block(struct net_device *netdev, + struct flow_block_offload *f) +{ + struct nfp_repr *repr = netdev_priv(netdev); + struct nfp_flower_repr_priv *repr_priv; + struct flow_block_cb *block_cb; + + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + repr_priv = repr->app_priv; + repr_priv->block_shared = f->block_shared; + f->driver_block_list = &nfp_block_cb_list; + f->unlocked_driver_cb = true; + + switch (f->command) { + case FLOW_BLOCK_BIND: + if (flow_block_cb_is_busy(nfp_flower_setup_tc_block_cb, repr, + &nfp_block_cb_list)) + return -EBUSY; + + block_cb = flow_block_cb_alloc(nfp_flower_setup_tc_block_cb, + repr, repr, NULL); + if (IS_ERR(block_cb)) + return PTR_ERR(block_cb); + + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &nfp_block_cb_list); + return 0; + case FLOW_BLOCK_UNBIND: + block_cb = flow_block_cb_lookup(f->block, + nfp_flower_setup_tc_block_cb, + repr); + if (!block_cb) + return -ENOENT; + + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); + return 0; + default: + return -EOPNOTSUPP; + } +} + +int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev, + enum tc_setup_type type, void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: + return nfp_flower_setup_tc_block(netdev, type_data); + default: + return -EOPNOTSUPP; + } +} + +struct nfp_flower_indr_block_cb_priv { + struct net_device *netdev; + struct nfp_app *app; + struct list_head list; +}; + +static struct nfp_flower_indr_block_cb_priv * +nfp_flower_indr_block_cb_priv_lookup(struct nfp_app *app, + struct net_device *netdev) +{ + struct nfp_flower_indr_block_cb_priv *cb_priv; + struct nfp_flower_priv *priv = app->priv; + + list_for_each_entry(cb_priv, &priv->indr_block_cb_priv, list) + if (cb_priv->netdev == netdev) + return cb_priv; + + return NULL; +} + +static int nfp_flower_setup_indr_block_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + struct nfp_flower_indr_block_cb_priv *priv = cb_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return nfp_flower_repr_offload(priv->app, priv->netdev, + type_data); + default: + return -EOPNOTSUPP; + } +} + +void nfp_flower_setup_indr_tc_release(void *cb_priv) +{ + struct nfp_flower_indr_block_cb_priv *priv = cb_priv; + + list_del(&priv->list); + kfree(priv); +} + +static int +nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct Qdisc *sch, struct nfp_app *app, + struct flow_block_offload *f, void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) +{ + struct nfp_flower_indr_block_cb_priv *cb_priv; + struct nfp_flower_priv *priv = app->priv; + struct flow_block_cb *block_cb; + + if ((f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS && + !nfp_flower_internal_port_can_offload(app, netdev)) || + (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && + nfp_flower_internal_port_can_offload(app, netdev))) + return -EOPNOTSUPP; + + f->unlocked_driver_cb = true; + + switch (f->command) { + case FLOW_BLOCK_BIND: + cb_priv = nfp_flower_indr_block_cb_priv_lookup(app, netdev); + if (cb_priv && + flow_block_cb_is_busy(nfp_flower_setup_indr_block_cb, + cb_priv, + &nfp_block_cb_list)) + return -EBUSY; + + cb_priv = kmalloc(sizeof(*cb_priv), GFP_KERNEL); + if (!cb_priv) + return -ENOMEM; + + cb_priv->netdev = netdev; + cb_priv->app = app; + list_add(&cb_priv->list, &priv->indr_block_cb_priv); + + block_cb = flow_indr_block_cb_alloc(nfp_flower_setup_indr_block_cb, + cb_priv, cb_priv, + nfp_flower_setup_indr_tc_release, + f, netdev, sch, data, app, cleanup); + if (IS_ERR(block_cb)) { + list_del(&cb_priv->list); + kfree(cb_priv); + return PTR_ERR(block_cb); + } + + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &nfp_block_cb_list); + return 0; + case FLOW_BLOCK_UNBIND: + cb_priv = nfp_flower_indr_block_cb_priv_lookup(app, netdev); + if (!cb_priv) + return -ENOENT; + + block_cb = flow_block_cb_lookup(f->block, + nfp_flower_setup_indr_block_cb, + cb_priv); + if (!block_cb) + return -ENOENT; + + flow_indr_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); + return 0; + default: + return -EOPNOTSUPP; + } + return 0; +} + +static int +nfp_setup_tc_no_dev(struct nfp_app *app, enum tc_setup_type type, void *data) +{ + if (!data) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_ACT: + return nfp_setup_tc_act_offload(app, data); + default: + return -EOPNOTSUPP; + } +} + +int +nfp_flower_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv, + enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) +{ + if (!netdev) + return nfp_setup_tc_no_dev(cb_priv, type, data); + + if (!nfp_fl_is_netdev_to_offload(netdev)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_BLOCK: + return nfp_flower_setup_indr_tc_block(netdev, sch, cb_priv, + type_data, data, cleanup); + default: + return -EOPNOTSUPP; + } +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c new file mode 100644 index 000000000..e7180b479 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c @@ -0,0 +1,887 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#include <linux/hash.h> +#include <linux/hashtable.h> +#include <linux/jhash.h> +#include <linux/math64.h> +#include <linux/vmalloc.h> +#include <net/pkt_cls.h> +#include <net/pkt_sched.h> + +#include "cmsg.h" +#include "main.h" +#include "../nfp_port.h" + +#define NFP_FL_QOS_UPDATE msecs_to_jiffies(1000) +#define NFP_FL_QOS_PPS BIT(15) +#define NFP_FL_QOS_METER BIT(10) + +struct nfp_police_cfg_head { + __be32 flags_opts; + union { + __be32 meter_id; + __be32 port; + }; +}; + +enum NFP_FL_QOS_TYPES { + NFP_FL_QOS_TYPE_BPS, + NFP_FL_QOS_TYPE_PPS, + NFP_FL_QOS_TYPE_MAX, +}; + +/* Police cmsg for configuring a trTCM traffic conditioner (8W/32B) + * See RFC 2698 for more details. + * ---------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved |p| Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Port Ingress | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Token Bucket Peak | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Token Bucket Committed | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Peak Burst Size | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Committed Burst Size | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Peak Information Rate | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Committed Information Rate | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * Word[0](FLag options): + * [15] p(pps) 1 for pps, 0 for bps + * + * Meter control message + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-------------------------------+-+---+-----+-+---------+-+---+-+ + * | Reserved |p| Y |TYPE |E|TSHFV |P| PC|R| + * +-------------------------------+-+---+-----+-+---------+-+---+-+ + * | meter ID | + * +-------------------------------+-------------------------------+ + * + */ +struct nfp_police_config { + struct nfp_police_cfg_head head; + __be32 bkt_tkn_p; + __be32 bkt_tkn_c; + __be32 pbs; + __be32 cbs; + __be32 pir; + __be32 cir; +}; + +struct nfp_police_stats_reply { + struct nfp_police_cfg_head head; + __be64 pass_bytes; + __be64 pass_pkts; + __be64 drop_bytes; + __be64 drop_pkts; +}; + +int nfp_flower_offload_one_police(struct nfp_app *app, bool ingress, + bool pps, u32 id, u32 rate, u32 burst) +{ + struct nfp_police_config *config; + struct sk_buff *skb; + + skb = nfp_flower_cmsg_alloc(app, sizeof(struct nfp_police_config), + NFP_FLOWER_CMSG_TYPE_QOS_MOD, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + config = nfp_flower_cmsg_get_data(skb); + memset(config, 0, sizeof(struct nfp_police_config)); + if (pps) + config->head.flags_opts |= cpu_to_be32(NFP_FL_QOS_PPS); + if (!ingress) + config->head.flags_opts |= cpu_to_be32(NFP_FL_QOS_METER); + + if (ingress) + config->head.port = cpu_to_be32(id); + else + config->head.meter_id = cpu_to_be32(id); + + config->bkt_tkn_p = cpu_to_be32(burst); + config->bkt_tkn_c = cpu_to_be32(burst); + config->pbs = cpu_to_be32(burst); + config->cbs = cpu_to_be32(burst); + config->pir = cpu_to_be32(rate); + config->cir = cpu_to_be32(rate); + nfp_ctrl_tx(app->ctrl, skb); + + return 0; +} + +static int nfp_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack, + bool ingress) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (ingress) { + if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not continue or ok"); + return -EOPNOTSUPP; + } + } else { + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int +nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, + struct tc_cls_matchall_offload *flow, + struct netlink_ext_ack *extack) +{ + struct flow_action_entry *paction = &flow->rule->action.entries[0]; + u32 action_num = flow->rule->action.num_entries; + struct nfp_flower_priv *fl_priv = app->priv; + struct flow_action_entry *action = NULL; + struct nfp_flower_repr_priv *repr_priv; + u32 netdev_port_id, i; + struct nfp_repr *repr; + bool pps_support; + u32 bps_num = 0; + u32 pps_num = 0; + u32 burst; + bool pps; + u64 rate; + int err; + + if (!nfp_netdev_is_nfp_repr(netdev)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on higher level port"); + return -EOPNOTSUPP; + } + repr = netdev_priv(netdev); + repr_priv = repr->app_priv; + netdev_port_id = nfp_repr_get_port_id(netdev); + pps_support = !!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_PPS); + + if (repr_priv->block_shared) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on shared blocks"); + return -EOPNOTSUPP; + } + + if (repr->port->type != NFP_PORT_VF_PORT) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on non-VF ports"); + return -EOPNOTSUPP; + } + + if (pps_support) { + if (action_num > 2 || action_num == 0) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload only support action number 1 or 2"); + return -EOPNOTSUPP; + } + } else { + if (!flow_offload_has_one_action(&flow->rule->action)) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires a single action"); + return -EOPNOTSUPP; + } + } + + if (flow->common.prio != 1) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload requires highest priority"); + return -EOPNOTSUPP; + } + + for (i = 0 ; i < action_num; i++) { + action = paction + i; + if (action->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires police action"); + return -EOPNOTSUPP; + } + + err = nfp_policer_validate(&flow->rule->action, action, extack, true); + if (err) + return err; + + if (action->police.rate_bytes_ps > 0) { + if (bps_num++) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload only support one BPS action"); + return -EOPNOTSUPP; + } + } + if (action->police.rate_pkt_ps > 0) { + if (!pps_support) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: FW does not support PPS action"); + return -EOPNOTSUPP; + } + if (pps_num++) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload only support one PPS action"); + return -EOPNOTSUPP; + } + } + } + + for (i = 0 ; i < action_num; i++) { + /* Set QoS data for this interface */ + action = paction + i; + if (action->police.rate_bytes_ps > 0) { + rate = action->police.rate_bytes_ps; + burst = action->police.burst; + } else if (action->police.rate_pkt_ps > 0) { + rate = action->police.rate_pkt_ps; + burst = action->police.burst_pkt; + } else { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit is not BPS or PPS"); + continue; + } + + if (rate != 0) { + pps = false; + if (action->police.rate_pkt_ps > 0) + pps = true; + nfp_flower_offload_one_police(repr->app, true, + pps, netdev_port_id, + rate, burst); + } + } + repr_priv->qos_table.netdev_port_id = netdev_port_id; + fl_priv->qos_rate_limiters++; + if (fl_priv->qos_rate_limiters == 1) + schedule_delayed_work(&fl_priv->qos_stats_work, + NFP_FL_QOS_UPDATE); + + return 0; +} + +static int +nfp_flower_remove_rate_limiter(struct nfp_app *app, struct net_device *netdev, + struct tc_cls_matchall_offload *flow, + struct netlink_ext_ack *extack) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_flower_repr_priv *repr_priv; + struct nfp_police_config *config; + u32 netdev_port_id, i; + struct nfp_repr *repr; + struct sk_buff *skb; + bool pps_support; + + if (!nfp_netdev_is_nfp_repr(netdev)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on higher level port"); + return -EOPNOTSUPP; + } + repr = netdev_priv(netdev); + + netdev_port_id = nfp_repr_get_port_id(netdev); + repr_priv = repr->app_priv; + pps_support = !!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_PPS); + + if (!repr_priv->qos_table.netdev_port_id) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot remove qos entry that does not exist"); + return -EOPNOTSUPP; + } + + memset(&repr_priv->qos_table, 0, sizeof(struct nfp_fl_qos)); + fl_priv->qos_rate_limiters--; + if (!fl_priv->qos_rate_limiters) + cancel_delayed_work_sync(&fl_priv->qos_stats_work); + for (i = 0 ; i < NFP_FL_QOS_TYPE_MAX; i++) { + if (i == NFP_FL_QOS_TYPE_PPS && !pps_support) + break; + /* 0:bps 1:pps + * Clear QoS data for this interface. + * There is no need to check if a specific QOS_TYPE was + * configured as the firmware handles clearing a QoS entry + * safely, even if it wasn't explicitly added. + */ + skb = nfp_flower_cmsg_alloc(repr->app, sizeof(struct nfp_police_config), + NFP_FLOWER_CMSG_TYPE_QOS_DEL, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + config = nfp_flower_cmsg_get_data(skb); + memset(config, 0, sizeof(struct nfp_police_config)); + if (i == NFP_FL_QOS_TYPE_PPS) + config->head.flags_opts = cpu_to_be32(NFP_FL_QOS_PPS); + config->head.port = cpu_to_be32(netdev_port_id); + nfp_ctrl_tx(repr->app->ctrl, skb); + } + + return 0; +} + +void nfp_flower_stats_rlim_reply(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_flower_repr_priv *repr_priv; + struct nfp_police_stats_reply *msg; + struct nfp_stat_pair *curr_stats; + struct nfp_stat_pair *prev_stats; + struct net_device *netdev; + struct nfp_repr *repr; + u32 netdev_port_id; + + msg = nfp_flower_cmsg_get_data(skb); + if (be32_to_cpu(msg->head.flags_opts) & NFP_FL_QOS_METER) + return nfp_act_stats_reply(app, msg); + + netdev_port_id = be32_to_cpu(msg->head.port); + rcu_read_lock(); + netdev = nfp_app_dev_get(app, netdev_port_id, NULL); + if (!netdev) + goto exit_unlock_rcu; + + repr = netdev_priv(netdev); + repr_priv = repr->app_priv; + curr_stats = &repr_priv->qos_table.curr_stats; + prev_stats = &repr_priv->qos_table.prev_stats; + + spin_lock_bh(&fl_priv->qos_stats_lock); + curr_stats->pkts = be64_to_cpu(msg->pass_pkts) + + be64_to_cpu(msg->drop_pkts); + curr_stats->bytes = be64_to_cpu(msg->pass_bytes) + + be64_to_cpu(msg->drop_bytes); + + if (!repr_priv->qos_table.last_update) { + prev_stats->pkts = curr_stats->pkts; + prev_stats->bytes = curr_stats->bytes; + } + + repr_priv->qos_table.last_update = jiffies; + spin_unlock_bh(&fl_priv->qos_stats_lock); + +exit_unlock_rcu: + rcu_read_unlock(); +} + +static void +nfp_flower_stats_rlim_request(struct nfp_flower_priv *fl_priv, + u32 id, bool ingress) +{ + struct nfp_police_cfg_head *head; + struct sk_buff *skb; + + skb = nfp_flower_cmsg_alloc(fl_priv->app, + sizeof(struct nfp_police_cfg_head), + NFP_FLOWER_CMSG_TYPE_QOS_STATS, + GFP_ATOMIC); + if (!skb) + return; + head = nfp_flower_cmsg_get_data(skb); + + memset(head, 0, sizeof(struct nfp_police_cfg_head)); + if (ingress) { + head->port = cpu_to_be32(id); + } else { + head->flags_opts = cpu_to_be32(NFP_FL_QOS_METER); + head->meter_id = cpu_to_be32(id); + } + + nfp_ctrl_tx(fl_priv->app->ctrl, skb); +} + +static void +nfp_flower_stats_rlim_request_all(struct nfp_flower_priv *fl_priv) +{ + struct nfp_reprs *repr_set; + int i; + + rcu_read_lock(); + repr_set = rcu_dereference(fl_priv->app->reprs[NFP_REPR_TYPE_VF]); + if (!repr_set) + goto exit_unlock_rcu; + + for (i = 0; i < repr_set->num_reprs; i++) { + struct net_device *netdev; + + netdev = rcu_dereference(repr_set->reprs[i]); + if (netdev) { + struct nfp_repr *priv = netdev_priv(netdev); + struct nfp_flower_repr_priv *repr_priv; + u32 netdev_port_id; + + repr_priv = priv->app_priv; + netdev_port_id = repr_priv->qos_table.netdev_port_id; + if (!netdev_port_id) + continue; + + nfp_flower_stats_rlim_request(fl_priv, + netdev_port_id, true); + } + } + +exit_unlock_rcu: + rcu_read_unlock(); +} + +static void update_stats_cache(struct work_struct *work) +{ + struct delayed_work *delayed_work; + struct nfp_flower_priv *fl_priv; + + delayed_work = to_delayed_work(work); + fl_priv = container_of(delayed_work, struct nfp_flower_priv, + qos_stats_work); + + nfp_flower_stats_rlim_request_all(fl_priv); + nfp_flower_stats_meter_request_all(fl_priv); + + schedule_delayed_work(&fl_priv->qos_stats_work, NFP_FL_QOS_UPDATE); +} + +static int +nfp_flower_stats_rate_limiter(struct nfp_app *app, struct net_device *netdev, + struct tc_cls_matchall_offload *flow, + struct netlink_ext_ack *extack) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_flower_repr_priv *repr_priv; + struct nfp_stat_pair *curr_stats; + struct nfp_stat_pair *prev_stats; + u64 diff_bytes, diff_pkts; + struct nfp_repr *repr; + + if (!nfp_netdev_is_nfp_repr(netdev)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on higher level port"); + return -EOPNOTSUPP; + } + repr = netdev_priv(netdev); + + repr_priv = repr->app_priv; + if (!repr_priv->qos_table.netdev_port_id) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot find qos entry for stats update"); + return -EOPNOTSUPP; + } + + spin_lock_bh(&fl_priv->qos_stats_lock); + curr_stats = &repr_priv->qos_table.curr_stats; + prev_stats = &repr_priv->qos_table.prev_stats; + diff_pkts = curr_stats->pkts - prev_stats->pkts; + diff_bytes = curr_stats->bytes - prev_stats->bytes; + prev_stats->pkts = curr_stats->pkts; + prev_stats->bytes = curr_stats->bytes; + spin_unlock_bh(&fl_priv->qos_stats_lock); + + flow_stats_update(&flow->stats, diff_bytes, diff_pkts, 0, + repr_priv->qos_table.last_update, + FLOW_ACTION_HW_STATS_DELAYED); + return 0; +} + +void nfp_flower_qos_init(struct nfp_app *app) +{ + struct nfp_flower_priv *fl_priv = app->priv; + + spin_lock_init(&fl_priv->qos_stats_lock); + mutex_init(&fl_priv->meter_stats_lock); + nfp_init_meter_table(app); + + INIT_DELAYED_WORK(&fl_priv->qos_stats_work, &update_stats_cache); +} + +void nfp_flower_qos_cleanup(struct nfp_app *app) +{ + struct nfp_flower_priv *fl_priv = app->priv; + + cancel_delayed_work_sync(&fl_priv->qos_stats_work); +} + +int nfp_flower_setup_qos_offload(struct nfp_app *app, struct net_device *netdev, + struct tc_cls_matchall_offload *flow) +{ + struct netlink_ext_ack *extack = flow->common.extack; + struct nfp_flower_priv *fl_priv = app->priv; + int ret; + + if (!(fl_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support qos rate limit offload"); + return -EOPNOTSUPP; + } + + mutex_lock(&fl_priv->nfp_fl_lock); + switch (flow->command) { + case TC_CLSMATCHALL_REPLACE: + ret = nfp_flower_install_rate_limiter(app, netdev, flow, extack); + break; + case TC_CLSMATCHALL_DESTROY: + ret = nfp_flower_remove_rate_limiter(app, netdev, flow, extack); + break; + case TC_CLSMATCHALL_STATS: + ret = nfp_flower_stats_rate_limiter(app, netdev, flow, extack); + break; + default: + ret = -EOPNOTSUPP; + break; + } + mutex_unlock(&fl_priv->nfp_fl_lock); + + return ret; +} + +/* Offload tc action, currently only for tc police */ + +static const struct rhashtable_params stats_meter_table_params = { + .key_offset = offsetof(struct nfp_meter_entry, meter_id), + .head_offset = offsetof(struct nfp_meter_entry, ht_node), + .key_len = sizeof(u32), +}; + +struct nfp_meter_entry * +nfp_flower_search_meter_entry(struct nfp_app *app, u32 meter_id) +{ + struct nfp_flower_priv *priv = app->priv; + + return rhashtable_lookup_fast(&priv->meter_table, &meter_id, + stats_meter_table_params); +} + +static struct nfp_meter_entry * +nfp_flower_add_meter_entry(struct nfp_app *app, u32 meter_id) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_flower_priv *priv = app->priv; + + meter_entry = rhashtable_lookup_fast(&priv->meter_table, + &meter_id, + stats_meter_table_params); + if (meter_entry) + return meter_entry; + + meter_entry = kzalloc(sizeof(*meter_entry), GFP_KERNEL); + if (!meter_entry) + return NULL; + + meter_entry->meter_id = meter_id; + meter_entry->used = jiffies; + if (rhashtable_insert_fast(&priv->meter_table, &meter_entry->ht_node, + stats_meter_table_params)) { + kfree(meter_entry); + return NULL; + } + + priv->qos_rate_limiters++; + if (priv->qos_rate_limiters == 1) + schedule_delayed_work(&priv->qos_stats_work, + NFP_FL_QOS_UPDATE); + + return meter_entry; +} + +static void nfp_flower_del_meter_entry(struct nfp_app *app, u32 meter_id) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_flower_priv *priv = app->priv; + + meter_entry = rhashtable_lookup_fast(&priv->meter_table, &meter_id, + stats_meter_table_params); + if (!meter_entry) + return; + + rhashtable_remove_fast(&priv->meter_table, + &meter_entry->ht_node, + stats_meter_table_params); + kfree(meter_entry); + priv->qos_rate_limiters--; + if (!priv->qos_rate_limiters) + cancel_delayed_work_sync(&priv->qos_stats_work); +} + +int nfp_flower_setup_meter_entry(struct nfp_app *app, + const struct flow_action_entry *action, + enum nfp_meter_op op, + u32 meter_id) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_meter_entry *meter_entry = NULL; + int err = 0; + + mutex_lock(&fl_priv->meter_stats_lock); + + switch (op) { + case NFP_METER_DEL: + nfp_flower_del_meter_entry(app, meter_id); + goto exit_unlock; + case NFP_METER_ADD: + meter_entry = nfp_flower_add_meter_entry(app, meter_id); + break; + default: + err = -EOPNOTSUPP; + goto exit_unlock; + } + + if (!meter_entry) { + err = -ENOMEM; + goto exit_unlock; + } + + if (action->police.rate_bytes_ps > 0) { + meter_entry->bps = true; + meter_entry->rate = action->police.rate_bytes_ps; + meter_entry->burst = action->police.burst; + } else { + meter_entry->bps = false; + meter_entry->rate = action->police.rate_pkt_ps; + meter_entry->burst = action->police.burst_pkt; + } + +exit_unlock: + mutex_unlock(&fl_priv->meter_stats_lock); + return err; +} + +int nfp_init_meter_table(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + + return rhashtable_init(&priv->meter_table, &stats_meter_table_params); +} + +void +nfp_flower_stats_meter_request_all(struct nfp_flower_priv *fl_priv) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct rhashtable_iter iter; + + mutex_lock(&fl_priv->meter_stats_lock); + rhashtable_walk_enter(&fl_priv->meter_table, &iter); + rhashtable_walk_start(&iter); + + while ((meter_entry = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(meter_entry)) + continue; + nfp_flower_stats_rlim_request(fl_priv, + meter_entry->meter_id, false); + } + + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + mutex_unlock(&fl_priv->meter_stats_lock); +} + +static int +nfp_act_install_actions(struct nfp_app *app, struct flow_offload_action *fl_act, + struct netlink_ext_ack *extack) +{ + struct flow_action_entry *paction = &fl_act->action.entries[0]; + u32 action_num = fl_act->action.num_entries; + struct nfp_flower_priv *fl_priv = app->priv; + struct flow_action_entry *action = NULL; + u32 burst, i, meter_id; + bool pps_support, pps; + bool add = false; + u64 rate; + int err; + + pps_support = !!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_PPS); + + for (i = 0 ; i < action_num; i++) { + /* Set qos associate data for this interface */ + action = paction + i; + if (action->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires police action"); + continue; + } + + err = nfp_policer_validate(&fl_act->action, action, extack, false); + if (err) + return err; + + if (action->police.rate_bytes_ps > 0) { + rate = action->police.rate_bytes_ps; + burst = action->police.burst; + } else if (action->police.rate_pkt_ps > 0 && pps_support) { + rate = action->police.rate_pkt_ps; + burst = action->police.burst_pkt; + } else { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: unsupported qos rate limit"); + continue; + } + + if (rate != 0) { + meter_id = action->hw_index; + if (nfp_flower_setup_meter_entry(app, action, NFP_METER_ADD, meter_id)) + continue; + + pps = false; + if (action->police.rate_pkt_ps > 0) + pps = true; + nfp_flower_offload_one_police(app, false, pps, meter_id, + rate, burst); + add = true; + } + } + + return add ? 0 : -EOPNOTSUPP; +} + +static int +nfp_act_remove_actions(struct nfp_app *app, struct flow_offload_action *fl_act, + struct netlink_ext_ack *extack) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_police_config *config; + struct sk_buff *skb; + u32 meter_id; + bool pps; + + /* Delete qos associate data for this interface */ + if (fl_act->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires police action"); + return -EOPNOTSUPP; + } + + meter_id = fl_act->index; + meter_entry = nfp_flower_search_meter_entry(app, meter_id); + if (!meter_entry) { + NL_SET_ERR_MSG_MOD(extack, + "no meter entry when delete the action index."); + return -ENOENT; + } + pps = !meter_entry->bps; + + skb = nfp_flower_cmsg_alloc(app, sizeof(struct nfp_police_config), + NFP_FLOWER_CMSG_TYPE_QOS_DEL, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + config = nfp_flower_cmsg_get_data(skb); + memset(config, 0, sizeof(struct nfp_police_config)); + config->head.flags_opts = cpu_to_be32(NFP_FL_QOS_METER); + config->head.meter_id = cpu_to_be32(meter_id); + if (pps) + config->head.flags_opts |= cpu_to_be32(NFP_FL_QOS_PPS); + + nfp_ctrl_tx(app->ctrl, skb); + nfp_flower_setup_meter_entry(app, NULL, NFP_METER_DEL, meter_id); + + return 0; +} + +void +nfp_act_stats_reply(struct nfp_app *app, void *pmsg) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_police_stats_reply *msg = pmsg; + u32 meter_id; + + meter_id = be32_to_cpu(msg->head.meter_id); + mutex_lock(&fl_priv->meter_stats_lock); + + meter_entry = nfp_flower_search_meter_entry(app, meter_id); + if (!meter_entry) + goto exit_unlock; + + meter_entry->stats.curr.pkts = be64_to_cpu(msg->pass_pkts) + + be64_to_cpu(msg->drop_pkts); + meter_entry->stats.curr.bytes = be64_to_cpu(msg->pass_bytes) + + be64_to_cpu(msg->drop_bytes); + meter_entry->stats.curr.drops = be64_to_cpu(msg->drop_pkts); + if (!meter_entry->stats.update) { + meter_entry->stats.prev.pkts = meter_entry->stats.curr.pkts; + meter_entry->stats.prev.bytes = meter_entry->stats.curr.bytes; + meter_entry->stats.prev.drops = meter_entry->stats.curr.drops; + } + + meter_entry->stats.update = jiffies; + +exit_unlock: + mutex_unlock(&fl_priv->meter_stats_lock); +} + +static int +nfp_act_stats_actions(struct nfp_app *app, struct flow_offload_action *fl_act, + struct netlink_ext_ack *extack) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_meter_entry *meter_entry = NULL; + u64 diff_bytes, diff_pkts, diff_drops; + int err = 0; + + if (fl_act->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires police action"); + return -EOPNOTSUPP; + } + + mutex_lock(&fl_priv->meter_stats_lock); + meter_entry = nfp_flower_search_meter_entry(app, fl_act->index); + if (!meter_entry) { + err = -ENOENT; + goto exit_unlock; + } + diff_pkts = meter_entry->stats.curr.pkts > meter_entry->stats.prev.pkts ? + meter_entry->stats.curr.pkts - meter_entry->stats.prev.pkts : 0; + diff_bytes = meter_entry->stats.curr.bytes > meter_entry->stats.prev.bytes ? + meter_entry->stats.curr.bytes - meter_entry->stats.prev.bytes : 0; + diff_drops = meter_entry->stats.curr.drops > meter_entry->stats.prev.drops ? + meter_entry->stats.curr.drops - meter_entry->stats.prev.drops : 0; + + flow_stats_update(&fl_act->stats, diff_bytes, diff_pkts, diff_drops, + meter_entry->stats.update, + FLOW_ACTION_HW_STATS_DELAYED); + + meter_entry->stats.prev.pkts = meter_entry->stats.curr.pkts; + meter_entry->stats.prev.bytes = meter_entry->stats.curr.bytes; + meter_entry->stats.prev.drops = meter_entry->stats.curr.drops; + +exit_unlock: + mutex_unlock(&fl_priv->meter_stats_lock); + return err; +} + +int nfp_setup_tc_act_offload(struct nfp_app *app, + struct flow_offload_action *fl_act) +{ + struct netlink_ext_ack *extack = fl_act->extack; + struct nfp_flower_priv *fl_priv = app->priv; + + if (!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_METER)) + return -EOPNOTSUPP; + + switch (fl_act->command) { + case FLOW_ACT_REPLACE: + return nfp_act_install_actions(app, fl_act, extack); + case FLOW_ACT_DESTROY: + return nfp_act_remove_actions(app, fl_act, extack); + case FLOW_ACT_STATS: + return nfp_act_stats_actions(app, fl_act, extack); + default: + return -EOPNOTSUPP; + } +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c new file mode 100644 index 000000000..52f67157b --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -0,0 +1,1460 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <linux/etherdevice.h> +#include <linux/inetdevice.h> +#include <net/netevent.h> +#include <linux/idr.h> +#include <net/dst_metadata.h> +#include <net/arp.h> + +#include "cmsg.h" +#include "main.h" +#include "../nfp_net_repr.h" +#include "../nfp_net.h" + +#define NFP_FL_MAX_ROUTES 32 + +#define NFP_TUN_PRE_TUN_RULE_LIMIT 32 +#define NFP_TUN_PRE_TUN_RULE_DEL BIT(0) +#define NFP_TUN_PRE_TUN_IDX_BIT BIT(3) +#define NFP_TUN_PRE_TUN_IPV6_BIT BIT(7) + +/** + * struct nfp_tun_pre_tun_rule - rule matched before decap + * @flags: options for the rule offset + * @port_idx: index of destination MAC address for the rule + * @vlan_tci: VLAN info associated with MAC + * @host_ctx_id: stats context of rule to update + */ +struct nfp_tun_pre_tun_rule { + __be32 flags; + __be16 port_idx; + __be16 vlan_tci; + __be32 host_ctx_id; +}; + +/** + * struct nfp_tun_active_tuns - periodic message of active tunnels + * @seq: sequence number of the message + * @count: number of tunnels report in message + * @flags: options part of the request + * @tun_info.ipv4: dest IPv4 address of active route + * @tun_info.egress_port: port the encapsulated packet egressed + * @tun_info.extra: reserved for future use + * @tun_info: tunnels that have sent traffic in reported period + */ +struct nfp_tun_active_tuns { + __be32 seq; + __be32 count; + __be32 flags; + struct route_ip_info { + __be32 ipv4; + __be32 egress_port; + __be32 extra[2]; + } tun_info[]; +}; + +/** + * struct nfp_tun_active_tuns_v6 - periodic message of active IPv6 tunnels + * @seq: sequence number of the message + * @count: number of tunnels report in message + * @flags: options part of the request + * @tun_info.ipv6: dest IPv6 address of active route + * @tun_info.egress_port: port the encapsulated packet egressed + * @tun_info.extra: reserved for future use + * @tun_info: tunnels that have sent traffic in reported period + */ +struct nfp_tun_active_tuns_v6 { + __be32 seq; + __be32 count; + __be32 flags; + struct route_ip_info_v6 { + struct in6_addr ipv6; + __be32 egress_port; + __be32 extra[2]; + } tun_info[]; +}; + +/** + * struct nfp_tun_req_route_ipv4 - NFP requests a route/neighbour lookup + * @ingress_port: ingress port of packet that signalled request + * @ipv4_addr: destination ipv4 address for route + * @reserved: reserved for future use + */ +struct nfp_tun_req_route_ipv4 { + __be32 ingress_port; + __be32 ipv4_addr; + __be32 reserved[2]; +}; + +/** + * struct nfp_tun_req_route_ipv6 - NFP requests an IPv6 route/neighbour lookup + * @ingress_port: ingress port of packet that signalled request + * @ipv6_addr: destination ipv6 address for route + */ +struct nfp_tun_req_route_ipv6 { + __be32 ingress_port; + struct in6_addr ipv6_addr; +}; + +/** + * struct nfp_offloaded_route - routes that are offloaded to the NFP + * @list: list pointer + * @ip_add: destination of route - can be IPv4 or IPv6 + */ +struct nfp_offloaded_route { + struct list_head list; + u8 ip_add[]; +}; + +#define NFP_FL_IPV4_ADDRS_MAX 32 + +/** + * struct nfp_tun_ipv4_addr - set the IP address list on the NFP + * @count: number of IPs populated in the array + * @ipv4_addr: array of IPV4_ADDRS_MAX 32 bit IPv4 addresses + */ +struct nfp_tun_ipv4_addr { + __be32 count; + __be32 ipv4_addr[NFP_FL_IPV4_ADDRS_MAX]; +}; + +/** + * struct nfp_ipv4_addr_entry - cached IPv4 addresses + * @ipv4_addr: IP address + * @ref_count: number of rules currently using this IP + * @list: list pointer + */ +struct nfp_ipv4_addr_entry { + __be32 ipv4_addr; + int ref_count; + struct list_head list; +}; + +#define NFP_FL_IPV6_ADDRS_MAX 4 + +/** + * struct nfp_tun_ipv6_addr - set the IP address list on the NFP + * @count: number of IPs populated in the array + * @ipv6_addr: array of IPV6_ADDRS_MAX 128 bit IPv6 addresses + */ +struct nfp_tun_ipv6_addr { + __be32 count; + struct in6_addr ipv6_addr[NFP_FL_IPV6_ADDRS_MAX]; +}; + +#define NFP_TUN_MAC_OFFLOAD_DEL_FLAG 0x2 + +/** + * struct nfp_tun_mac_addr_offload - configure MAC address of tunnel EP on NFP + * @flags: MAC address offload options + * @count: number of MAC addresses in the message (should be 1) + * @index: index of MAC address in the lookup table + * @addr: interface MAC address + */ +struct nfp_tun_mac_addr_offload { + __be16 flags; + __be16 count; + __be16 index; + u8 addr[ETH_ALEN]; +}; + +enum nfp_flower_mac_offload_cmd { + NFP_TUNNEL_MAC_OFFLOAD_ADD = 0, + NFP_TUNNEL_MAC_OFFLOAD_DEL = 1, + NFP_TUNNEL_MAC_OFFLOAD_MOD = 2, +}; + +#define NFP_MAX_MAC_INDEX 0xff + +/** + * struct nfp_tun_offloaded_mac - hashtable entry for an offloaded MAC + * @ht_node: Hashtable entry + * @addr: Offloaded MAC address + * @index: Offloaded index for given MAC address + * @ref_count: Number of devs using this MAC address + * @repr_list: List of reprs sharing this MAC address + * @bridge_count: Number of bridge/internal devs with MAC + */ +struct nfp_tun_offloaded_mac { + struct rhash_head ht_node; + u8 addr[ETH_ALEN]; + u16 index; + int ref_count; + struct list_head repr_list; + int bridge_count; +}; + +static const struct rhashtable_params offloaded_macs_params = { + .key_offset = offsetof(struct nfp_tun_offloaded_mac, addr), + .head_offset = offsetof(struct nfp_tun_offloaded_mac, ht_node), + .key_len = ETH_ALEN, + .automatic_shrinking = true, +}; + +void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_tun_active_tuns *payload; + struct net_device *netdev; + int count, i, pay_len; + struct neighbour *n; + __be32 ipv4_addr; + u32 port; + + payload = nfp_flower_cmsg_get_data(skb); + count = be32_to_cpu(payload->count); + if (count > NFP_FL_MAX_ROUTES) { + nfp_flower_cmsg_warn(app, "Tunnel keep-alive request exceeds max routes.\n"); + return; + } + + pay_len = nfp_flower_cmsg_get_data_len(skb); + if (pay_len != struct_size(payload, tun_info, count)) { + nfp_flower_cmsg_warn(app, "Corruption in tunnel keep-alive message.\n"); + return; + } + + rcu_read_lock(); + for (i = 0; i < count; i++) { + ipv4_addr = payload->tun_info[i].ipv4; + port = be32_to_cpu(payload->tun_info[i].egress_port); + netdev = nfp_app_dev_get(app, port, NULL); + if (!netdev) + continue; + + n = neigh_lookup(&arp_tbl, &ipv4_addr, netdev); + if (!n) + continue; + + /* Update the used timestamp of neighbour */ + neigh_event_send(n, NULL); + neigh_release(n); + } + rcu_read_unlock(); +} + +void nfp_tunnel_keep_alive_v6(struct nfp_app *app, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct nfp_tun_active_tuns_v6 *payload; + struct net_device *netdev; + int count, i, pay_len; + struct neighbour *n; + void *ipv6_add; + u32 port; + + payload = nfp_flower_cmsg_get_data(skb); + count = be32_to_cpu(payload->count); + if (count > NFP_FL_IPV6_ADDRS_MAX) { + nfp_flower_cmsg_warn(app, "IPv6 tunnel keep-alive request exceeds max routes.\n"); + return; + } + + pay_len = nfp_flower_cmsg_get_data_len(skb); + if (pay_len != struct_size(payload, tun_info, count)) { + nfp_flower_cmsg_warn(app, "Corruption in tunnel keep-alive message.\n"); + return; + } + + rcu_read_lock(); + for (i = 0; i < count; i++) { + ipv6_add = &payload->tun_info[i].ipv6; + port = be32_to_cpu(payload->tun_info[i].egress_port); + netdev = nfp_app_dev_get(app, port, NULL); + if (!netdev) + continue; + + n = neigh_lookup(&nd_tbl, ipv6_add, netdev); + if (!n) + continue; + + /* Update the used timestamp of neighbour */ + neigh_event_send(n, NULL); + neigh_release(n); + } + rcu_read_unlock(); +#endif +} + +static int +nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata, + gfp_t flag) +{ + struct nfp_flower_priv *priv = app->priv; + struct sk_buff *skb; + unsigned char *msg; + + if (!(priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2) && + (mtype == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH || + mtype == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6)) + plen -= sizeof(struct nfp_tun_neigh_ext); + + skb = nfp_flower_cmsg_alloc(app, plen, mtype, flag); + if (!skb) + return -ENOMEM; + + msg = nfp_flower_cmsg_get_data(skb); + memcpy(msg, pdata, nfp_flower_cmsg_get_data_len(skb)); + + nfp_ctrl_tx(app->ctrl, skb); + return 0; +} + +static void +nfp_tun_mutual_link(struct nfp_predt_entry *predt, + struct nfp_neigh_entry *neigh) +{ + struct nfp_fl_payload *flow_pay = predt->flow_pay; + struct nfp_tun_neigh_ext *ext; + struct nfp_tun_neigh *common; + + if (flow_pay->pre_tun_rule.is_ipv6 != neigh->is_ipv6) + return; + + /* In the case of bonding it is possible that there might already + * be a flow linked (as the MAC address gets shared). If a flow + * is already linked just return. + */ + if (neigh->flow) + return; + + common = neigh->is_ipv6 ? + &((struct nfp_tun_neigh_v6 *)neigh->payload)->common : + &((struct nfp_tun_neigh_v4 *)neigh->payload)->common; + ext = neigh->is_ipv6 ? + &((struct nfp_tun_neigh_v6 *)neigh->payload)->ext : + &((struct nfp_tun_neigh_v4 *)neigh->payload)->ext; + + if (memcmp(flow_pay->pre_tun_rule.loc_mac, + common->src_addr, ETH_ALEN) || + memcmp(flow_pay->pre_tun_rule.rem_mac, + common->dst_addr, ETH_ALEN)) + return; + + list_add(&neigh->list_head, &predt->nn_list); + neigh->flow = predt; + ext->host_ctx = flow_pay->meta.host_ctx_id; + ext->vlan_tci = flow_pay->pre_tun_rule.vlan_tci; + ext->vlan_tpid = flow_pay->pre_tun_rule.vlan_tpid; +} + +static void +nfp_tun_link_predt_entries(struct nfp_app *app, + struct nfp_neigh_entry *nn_entry) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_predt_entry *predt, *tmp; + + list_for_each_entry_safe(predt, tmp, &priv->predt_list, list_head) { + nfp_tun_mutual_link(predt, nn_entry); + } +} + +void nfp_tun_link_and_update_nn_entries(struct nfp_app *app, + struct nfp_predt_entry *predt) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_neigh_entry *nn_entry; + struct rhashtable_iter iter; + size_t neigh_size; + u8 type; + + rhashtable_walk_enter(&priv->neigh_table, &iter); + rhashtable_walk_start(&iter); + while ((nn_entry = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(nn_entry)) + continue; + nfp_tun_mutual_link(predt, nn_entry); + neigh_size = nn_entry->is_ipv6 ? + sizeof(struct nfp_tun_neigh_v6) : + sizeof(struct nfp_tun_neigh_v4); + type = nn_entry->is_ipv6 ? NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 : + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + nfp_flower_xmit_tun_conf(app, type, neigh_size, + nn_entry->payload, + GFP_ATOMIC); + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); +} + +static void nfp_tun_cleanup_nn_entries(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_neigh_entry *neigh; + struct nfp_tun_neigh_ext *ext; + struct rhashtable_iter iter; + size_t neigh_size; + u8 type; + + rhashtable_walk_enter(&priv->neigh_table, &iter); + rhashtable_walk_start(&iter); + while ((neigh = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(neigh)) + continue; + ext = neigh->is_ipv6 ? + &((struct nfp_tun_neigh_v6 *)neigh->payload)->ext : + &((struct nfp_tun_neigh_v4 *)neigh->payload)->ext; + ext->host_ctx = cpu_to_be32(U32_MAX); + ext->vlan_tpid = cpu_to_be16(U16_MAX); + ext->vlan_tci = cpu_to_be16(U16_MAX); + + neigh_size = neigh->is_ipv6 ? + sizeof(struct nfp_tun_neigh_v6) : + sizeof(struct nfp_tun_neigh_v4); + type = neigh->is_ipv6 ? NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 : + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + nfp_flower_xmit_tun_conf(app, type, neigh_size, neigh->payload, + GFP_ATOMIC); + + rhashtable_remove_fast(&priv->neigh_table, &neigh->ht_node, + neigh_table_params); + if (neigh->flow) + list_del(&neigh->list_head); + kfree(neigh); + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); +} + +void nfp_tun_unlink_and_update_nn_entries(struct nfp_app *app, + struct nfp_predt_entry *predt) +{ + struct nfp_neigh_entry *neigh, *tmp; + struct nfp_tun_neigh_ext *ext; + size_t neigh_size; + u8 type; + + list_for_each_entry_safe(neigh, tmp, &predt->nn_list, list_head) { + ext = neigh->is_ipv6 ? + &((struct nfp_tun_neigh_v6 *)neigh->payload)->ext : + &((struct nfp_tun_neigh_v4 *)neigh->payload)->ext; + neigh->flow = NULL; + ext->host_ctx = cpu_to_be32(U32_MAX); + ext->vlan_tpid = cpu_to_be16(U16_MAX); + ext->vlan_tci = cpu_to_be16(U16_MAX); + list_del(&neigh->list_head); + neigh_size = neigh->is_ipv6 ? + sizeof(struct nfp_tun_neigh_v6) : + sizeof(struct nfp_tun_neigh_v4); + type = neigh->is_ipv6 ? NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 : + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + nfp_flower_xmit_tun_conf(app, type, neigh_size, neigh->payload, + GFP_ATOMIC); + } +} + +static void +nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, + void *flow, struct neighbour *neigh, bool is_ipv6, + bool override) +{ + bool neigh_invalid = !(neigh->nud_state & NUD_VALID) || neigh->dead; + size_t neigh_size = is_ipv6 ? sizeof(struct nfp_tun_neigh_v6) : + sizeof(struct nfp_tun_neigh_v4); + unsigned long cookie = (unsigned long)neigh; + struct nfp_flower_priv *priv = app->priv; + struct nfp_neigh_entry *nn_entry; + u32 port_id; + u8 mtype; + + port_id = nfp_flower_get_port_id_from_netdev(app, netdev); + if (!port_id) + return; + + spin_lock_bh(&priv->predt_lock); + nn_entry = rhashtable_lookup_fast(&priv->neigh_table, &cookie, + neigh_table_params); + if (!nn_entry && !neigh_invalid) { + struct nfp_tun_neigh_ext *ext; + struct nfp_tun_neigh *common; + + nn_entry = kzalloc(sizeof(*nn_entry) + neigh_size, + GFP_ATOMIC); + if (!nn_entry) + goto err; + + nn_entry->payload = (char *)&nn_entry[1]; + nn_entry->neigh_cookie = cookie; + nn_entry->is_ipv6 = is_ipv6; + nn_entry->flow = NULL; + if (is_ipv6) { + struct flowi6 *flowi6 = (struct flowi6 *)flow; + struct nfp_tun_neigh_v6 *payload; + + payload = (struct nfp_tun_neigh_v6 *)nn_entry->payload; + payload->src_ipv6 = flowi6->saddr; + payload->dst_ipv6 = flowi6->daddr; + common = &payload->common; + ext = &payload->ext; + mtype = NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6; + } else { + struct flowi4 *flowi4 = (struct flowi4 *)flow; + struct nfp_tun_neigh_v4 *payload; + + payload = (struct nfp_tun_neigh_v4 *)nn_entry->payload; + payload->src_ipv4 = flowi4->saddr; + payload->dst_ipv4 = flowi4->daddr; + common = &payload->common; + ext = &payload->ext; + mtype = NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + } + ext->host_ctx = cpu_to_be32(U32_MAX); + ext->vlan_tpid = cpu_to_be16(U16_MAX); + ext->vlan_tci = cpu_to_be16(U16_MAX); + ether_addr_copy(common->src_addr, netdev->dev_addr); + neigh_ha_snapshot(common->dst_addr, neigh, netdev); + common->port_id = cpu_to_be32(port_id); + + if (rhashtable_insert_fast(&priv->neigh_table, + &nn_entry->ht_node, + neigh_table_params)) + goto err; + + nfp_tun_link_predt_entries(app, nn_entry); + nfp_flower_xmit_tun_conf(app, mtype, neigh_size, + nn_entry->payload, + GFP_ATOMIC); + } else if (nn_entry && neigh_invalid) { + if (is_ipv6) { + struct flowi6 *flowi6 = (struct flowi6 *)flow; + struct nfp_tun_neigh_v6 *payload; + + payload = (struct nfp_tun_neigh_v6 *)nn_entry->payload; + memset(payload, 0, sizeof(struct nfp_tun_neigh_v6)); + payload->dst_ipv6 = flowi6->daddr; + mtype = NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6; + } else { + struct flowi4 *flowi4 = (struct flowi4 *)flow; + struct nfp_tun_neigh_v4 *payload; + + payload = (struct nfp_tun_neigh_v4 *)nn_entry->payload; + memset(payload, 0, sizeof(struct nfp_tun_neigh_v4)); + payload->dst_ipv4 = flowi4->daddr; + mtype = NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + } + /* Trigger ARP to verify invalid neighbour state. */ + neigh_event_send(neigh, NULL); + rhashtable_remove_fast(&priv->neigh_table, + &nn_entry->ht_node, + neigh_table_params); + + nfp_flower_xmit_tun_conf(app, mtype, neigh_size, + nn_entry->payload, + GFP_ATOMIC); + + if (nn_entry->flow) + list_del(&nn_entry->list_head); + kfree(nn_entry); + } else if (nn_entry && !neigh_invalid && override) { + mtype = is_ipv6 ? NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 : + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + nfp_tun_link_predt_entries(app, nn_entry); + nfp_flower_xmit_tun_conf(app, mtype, neigh_size, + nn_entry->payload, + GFP_ATOMIC); + } + + spin_unlock_bh(&priv->predt_lock); + return; + +err: + kfree(nn_entry); + spin_unlock_bh(&priv->predt_lock); + nfp_flower_cmsg_warn(app, "Neighbour configuration failed.\n"); +} + +static int +nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct nfp_flower_priv *app_priv; + struct netevent_redirect *redir; + struct neighbour *n; + struct nfp_app *app; + bool neigh_invalid; + int err; + + switch (event) { + case NETEVENT_REDIRECT: + redir = (struct netevent_redirect *)ptr; + n = redir->neigh; + break; + case NETEVENT_NEIGH_UPDATE: + n = (struct neighbour *)ptr; + break; + default: + return NOTIFY_DONE; + } + + neigh_invalid = !(n->nud_state & NUD_VALID) || n->dead; + + app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb); + app = app_priv->app; + + if (!nfp_netdev_is_nfp_repr(n->dev) && + !nfp_flower_internal_port_can_offload(app, n->dev)) + return NOTIFY_DONE; + +#if IS_ENABLED(CONFIG_INET) + if (n->tbl->family == AF_INET6) { +#if IS_ENABLED(CONFIG_IPV6) + struct flowi6 flow6 = {}; + + flow6.daddr = *(struct in6_addr *)n->primary_key; + if (!neigh_invalid) { + struct dst_entry *dst; + /* Use ipv6_dst_lookup_flow to populate flow6->saddr + * and other fields. This information is only needed + * for new entries, lookup can be skipped when an entry + * gets invalidated - as only the daddr is needed for + * deleting. + */ + dst = ip6_dst_lookup_flow(dev_net(n->dev), NULL, + &flow6, NULL); + if (IS_ERR(dst)) + return NOTIFY_DONE; + + dst_release(dst); + } + nfp_tun_write_neigh(n->dev, app, &flow6, n, true, false); +#else + return NOTIFY_DONE; +#endif /* CONFIG_IPV6 */ + } else { + struct flowi4 flow4 = {}; + + flow4.daddr = *(__be32 *)n->primary_key; + if (!neigh_invalid) { + struct rtable *rt; + /* Use ip_route_output_key to populate flow4->saddr and + * other fields. This information is only needed for + * new entries, lookup can be skipped when an entry + * gets invalidated - as only the daddr is needed for + * deleting. + */ + rt = ip_route_output_key(dev_net(n->dev), &flow4); + err = PTR_ERR_OR_ZERO(rt); + if (err) + return NOTIFY_DONE; + + ip_rt_put(rt); + } + nfp_tun_write_neigh(n->dev, app, &flow4, n, false, false); + } +#else + return NOTIFY_DONE; +#endif /* CONFIG_INET */ + + return NOTIFY_OK; +} + +void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_tun_req_route_ipv4 *payload; + struct net_device *netdev; + struct flowi4 flow = {}; + struct neighbour *n; + struct rtable *rt; + int err; + + payload = nfp_flower_cmsg_get_data(skb); + + rcu_read_lock(); + netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL); + if (!netdev) + goto fail_rcu_unlock; + + flow.daddr = payload->ipv4_addr; + flow.flowi4_proto = IPPROTO_UDP; + +#if IS_ENABLED(CONFIG_INET) + /* Do a route lookup on same namespace as ingress port. */ + rt = ip_route_output_key(dev_net(netdev), &flow); + err = PTR_ERR_OR_ZERO(rt); + if (err) + goto fail_rcu_unlock; +#else + goto fail_rcu_unlock; +#endif + + /* Get the neighbour entry for the lookup */ + n = dst_neigh_lookup(&rt->dst, &flow.daddr); + ip_rt_put(rt); + if (!n) + goto fail_rcu_unlock; + nfp_tun_write_neigh(n->dev, app, &flow, n, false, true); + neigh_release(n); + rcu_read_unlock(); + return; + +fail_rcu_unlock: + rcu_read_unlock(); + nfp_flower_cmsg_warn(app, "Requested route not found.\n"); +} + +void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_tun_req_route_ipv6 *payload; + struct net_device *netdev; + struct flowi6 flow = {}; + struct dst_entry *dst; + struct neighbour *n; + + payload = nfp_flower_cmsg_get_data(skb); + + rcu_read_lock(); + netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL); + if (!netdev) + goto fail_rcu_unlock; + + flow.daddr = payload->ipv6_addr; + flow.flowi6_proto = IPPROTO_UDP; + +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(netdev), NULL, &flow, + NULL); + if (IS_ERR(dst)) + goto fail_rcu_unlock; +#else + goto fail_rcu_unlock; +#endif + + n = dst_neigh_lookup(dst, &flow.daddr); + dst_release(dst); + if (!n) + goto fail_rcu_unlock; + + nfp_tun_write_neigh(n->dev, app, &flow, n, true, true); + neigh_release(n); + rcu_read_unlock(); + return; + +fail_rcu_unlock: + rcu_read_unlock(); + nfp_flower_cmsg_warn(app, "Requested IPv6 route not found.\n"); +} + +static void nfp_tun_write_ipv4_list(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_addr_entry *entry; + struct nfp_tun_ipv4_addr payload; + struct list_head *ptr, *storage; + int count; + + memset(&payload, 0, sizeof(struct nfp_tun_ipv4_addr)); + mutex_lock(&priv->tun.ipv4_off_lock); + count = 0; + list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) { + if (count >= NFP_FL_IPV4_ADDRS_MAX) { + mutex_unlock(&priv->tun.ipv4_off_lock); + nfp_flower_cmsg_warn(app, "IPv4 offload exceeds limit.\n"); + return; + } + entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + payload.ipv4_addr[count++] = entry->ipv4_addr; + } + payload.count = cpu_to_be32(count); + mutex_unlock(&priv->tun.ipv4_off_lock); + + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS, + sizeof(struct nfp_tun_ipv4_addr), + &payload, GFP_KERNEL); +} + +void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_addr_entry *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->tun.ipv4_off_lock); + list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + if (entry->ipv4_addr == ipv4) { + entry->ref_count++; + mutex_unlock(&priv->tun.ipv4_off_lock); + return; + } + } + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + mutex_unlock(&priv->tun.ipv4_off_lock); + nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n"); + return; + } + entry->ipv4_addr = ipv4; + entry->ref_count = 1; + list_add_tail(&entry->list, &priv->tun.ipv4_off_list); + mutex_unlock(&priv->tun.ipv4_off_lock); + + nfp_tun_write_ipv4_list(app); +} + +void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_addr_entry *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->tun.ipv4_off_lock); + list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + if (entry->ipv4_addr == ipv4) { + entry->ref_count--; + if (!entry->ref_count) { + list_del(&entry->list); + kfree(entry); + } + break; + } + } + mutex_unlock(&priv->tun.ipv4_off_lock); + + nfp_tun_write_ipv4_list(app); +} + +static void nfp_tun_write_ipv6_list(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv6_addr_entry *entry; + struct nfp_tun_ipv6_addr payload; + int count = 0; + + memset(&payload, 0, sizeof(struct nfp_tun_ipv6_addr)); + mutex_lock(&priv->tun.ipv6_off_lock); + list_for_each_entry(entry, &priv->tun.ipv6_off_list, list) { + if (count >= NFP_FL_IPV6_ADDRS_MAX) { + nfp_flower_cmsg_warn(app, "Too many IPv6 tunnel endpoint addresses, some cannot be offloaded.\n"); + break; + } + payload.ipv6_addr[count++] = entry->ipv6_addr; + } + mutex_unlock(&priv->tun.ipv6_off_lock); + payload.count = cpu_to_be32(count); + + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS_V6, + sizeof(struct nfp_tun_ipv6_addr), + &payload, GFP_KERNEL); +} + +struct nfp_ipv6_addr_entry * +nfp_tunnel_add_ipv6_off(struct nfp_app *app, struct in6_addr *ipv6) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv6_addr_entry *entry; + + mutex_lock(&priv->tun.ipv6_off_lock); + list_for_each_entry(entry, &priv->tun.ipv6_off_list, list) + if (!memcmp(&entry->ipv6_addr, ipv6, sizeof(*ipv6))) { + entry->ref_count++; + mutex_unlock(&priv->tun.ipv6_off_lock); + return entry; + } + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + mutex_unlock(&priv->tun.ipv6_off_lock); + nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n"); + return NULL; + } + entry->ipv6_addr = *ipv6; + entry->ref_count = 1; + list_add_tail(&entry->list, &priv->tun.ipv6_off_list); + mutex_unlock(&priv->tun.ipv6_off_lock); + + nfp_tun_write_ipv6_list(app); + + return entry; +} + +void +nfp_tunnel_put_ipv6_off(struct nfp_app *app, struct nfp_ipv6_addr_entry *entry) +{ + struct nfp_flower_priv *priv = app->priv; + bool freed = false; + + mutex_lock(&priv->tun.ipv6_off_lock); + if (!--entry->ref_count) { + list_del(&entry->list); + kfree(entry); + freed = true; + } + mutex_unlock(&priv->tun.ipv6_off_lock); + + if (freed) + nfp_tun_write_ipv6_list(app); +} + +static int +__nfp_tunnel_offload_mac(struct nfp_app *app, const u8 *mac, u16 idx, bool del) +{ + struct nfp_tun_mac_addr_offload payload; + + memset(&payload, 0, sizeof(payload)); + + if (del) + payload.flags = cpu_to_be16(NFP_TUN_MAC_OFFLOAD_DEL_FLAG); + + /* FW supports multiple MACs per cmsg but restrict to single. */ + payload.count = cpu_to_be16(1); + payload.index = cpu_to_be16(idx); + ether_addr_copy(payload.addr, mac); + + return nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_MAC, + sizeof(struct nfp_tun_mac_addr_offload), + &payload, GFP_KERNEL); +} + +static bool nfp_tunnel_port_is_phy_repr(int port) +{ + if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) == + NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT) + return true; + + return false; +} + +static u16 nfp_tunnel_get_mac_idx_from_phy_port_id(int port) +{ + return port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT; +} + +static u16 nfp_tunnel_get_global_mac_idx_from_ida(int id) +{ + return id << 8 | NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT; +} + +static int nfp_tunnel_get_ida_from_global_mac_idx(u16 nfp_mac_idx) +{ + return nfp_mac_idx >> 8; +} + +static bool nfp_tunnel_is_mac_idx_global(u16 nfp_mac_idx) +{ + return (nfp_mac_idx & 0xff) == NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT; +} + +static struct nfp_tun_offloaded_mac * +nfp_tunnel_lookup_offloaded_macs(struct nfp_app *app, const u8 *mac) +{ + struct nfp_flower_priv *priv = app->priv; + + return rhashtable_lookup_fast(&priv->tun.offloaded_macs, mac, + offloaded_macs_params); +} + +static void +nfp_tunnel_offloaded_macs_inc_ref_and_link(struct nfp_tun_offloaded_mac *entry, + struct net_device *netdev, bool mod) +{ + if (nfp_netdev_is_nfp_repr(netdev)) { + struct nfp_flower_repr_priv *repr_priv; + struct nfp_repr *repr; + + repr = netdev_priv(netdev); + repr_priv = repr->app_priv; + + /* If modifing MAC, remove repr from old list first. */ + if (mod) + list_del(&repr_priv->mac_list); + + list_add_tail(&repr_priv->mac_list, &entry->repr_list); + } else if (nfp_flower_is_supported_bridge(netdev)) { + entry->bridge_count++; + } + + entry->ref_count++; +} + +static int +nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev, + int port, bool mod) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_tun_offloaded_mac *entry; + int ida_idx = -1, err; + u16 nfp_mac_idx = 0; + + entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr); + if (entry && nfp_tunnel_is_mac_idx_global(entry->index)) { + if (entry->bridge_count || + !nfp_flower_is_supported_bridge(netdev)) { + nfp_tunnel_offloaded_macs_inc_ref_and_link(entry, + netdev, mod); + return 0; + } + + /* MAC is global but matches need to go to pre_tun table. */ + nfp_mac_idx = entry->index | NFP_TUN_PRE_TUN_IDX_BIT; + } + + if (!nfp_mac_idx) { + /* Assign a global index if non-repr or MAC is now shared. */ + if (entry || !port) { + ida_idx = ida_alloc_max(&priv->tun.mac_off_ids, + NFP_MAX_MAC_INDEX, GFP_KERNEL); + if (ida_idx < 0) + return ida_idx; + + nfp_mac_idx = + nfp_tunnel_get_global_mac_idx_from_ida(ida_idx); + + if (nfp_flower_is_supported_bridge(netdev)) + nfp_mac_idx |= NFP_TUN_PRE_TUN_IDX_BIT; + + } else { + nfp_mac_idx = + nfp_tunnel_get_mac_idx_from_phy_port_id(port); + } + } + + if (!entry) { + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + err = -ENOMEM; + goto err_free_ida; + } + + ether_addr_copy(entry->addr, netdev->dev_addr); + INIT_LIST_HEAD(&entry->repr_list); + + if (rhashtable_insert_fast(&priv->tun.offloaded_macs, + &entry->ht_node, + offloaded_macs_params)) { + err = -ENOMEM; + goto err_free_entry; + } + } + + err = __nfp_tunnel_offload_mac(app, netdev->dev_addr, + nfp_mac_idx, false); + if (err) { + /* If not shared then free. */ + if (!entry->ref_count) + goto err_remove_hash; + goto err_free_ida; + } + + entry->index = nfp_mac_idx; + nfp_tunnel_offloaded_macs_inc_ref_and_link(entry, netdev, mod); + + return 0; + +err_remove_hash: + rhashtable_remove_fast(&priv->tun.offloaded_macs, &entry->ht_node, + offloaded_macs_params); +err_free_entry: + kfree(entry); +err_free_ida: + if (ida_idx != -1) + ida_free(&priv->tun.mac_off_ids, ida_idx); + + return err; +} + +static int +nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, + const u8 *mac, bool mod) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_flower_repr_priv *repr_priv; + struct nfp_tun_offloaded_mac *entry; + struct nfp_repr *repr; + u16 nfp_mac_idx; + int ida_idx; + + entry = nfp_tunnel_lookup_offloaded_macs(app, mac); + if (!entry) + return 0; + + entry->ref_count--; + /* If del is part of a mod then mac_list is still in use elsewhere. */ + if (nfp_netdev_is_nfp_repr(netdev) && !mod) { + repr = netdev_priv(netdev); + repr_priv = repr->app_priv; + list_del(&repr_priv->mac_list); + } + + if (nfp_flower_is_supported_bridge(netdev)) { + entry->bridge_count--; + + if (!entry->bridge_count && entry->ref_count) { + nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT; + if (__nfp_tunnel_offload_mac(app, mac, nfp_mac_idx, + false)) { + nfp_flower_cmsg_warn(app, "MAC offload index revert failed on %s.\n", + netdev_name(netdev)); + return 0; + } + + entry->index = nfp_mac_idx; + return 0; + } + } + + /* If MAC is now used by 1 repr set the offloaded MAC index to port. */ + if (entry->ref_count == 1 && list_is_singular(&entry->repr_list)) { + int port, err; + + repr_priv = list_first_entry(&entry->repr_list, + struct nfp_flower_repr_priv, + mac_list); + repr = repr_priv->nfp_repr; + port = nfp_repr_get_port_id(repr->netdev); + nfp_mac_idx = nfp_tunnel_get_mac_idx_from_phy_port_id(port); + err = __nfp_tunnel_offload_mac(app, mac, nfp_mac_idx, false); + if (err) { + nfp_flower_cmsg_warn(app, "MAC offload index revert failed on %s.\n", + netdev_name(netdev)); + return 0; + } + + ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index); + ida_free(&priv->tun.mac_off_ids, ida_idx); + entry->index = nfp_mac_idx; + return 0; + } + + if (entry->ref_count) + return 0; + + WARN_ON_ONCE(rhashtable_remove_fast(&priv->tun.offloaded_macs, + &entry->ht_node, + offloaded_macs_params)); + + if (nfp_flower_is_supported_bridge(netdev)) + nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT; + else + nfp_mac_idx = entry->index; + + /* If MAC has global ID then extract and free the ida entry. */ + if (nfp_tunnel_is_mac_idx_global(nfp_mac_idx)) { + ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index); + ida_free(&priv->tun.mac_off_ids, ida_idx); + } + + kfree(entry); + + return __nfp_tunnel_offload_mac(app, mac, 0, true); +} + +static int +nfp_tunnel_offload_mac(struct nfp_app *app, struct net_device *netdev, + enum nfp_flower_mac_offload_cmd cmd) +{ + struct nfp_flower_non_repr_priv *nr_priv = NULL; + bool non_repr = false, *mac_offloaded; + u8 *off_mac = NULL; + int err, port = 0; + + if (nfp_netdev_is_nfp_repr(netdev)) { + struct nfp_flower_repr_priv *repr_priv; + struct nfp_repr *repr; + + repr = netdev_priv(netdev); + if (repr->app != app) + return 0; + + repr_priv = repr->app_priv; + if (repr_priv->on_bridge) + return 0; + + mac_offloaded = &repr_priv->mac_offloaded; + off_mac = &repr_priv->offloaded_mac_addr[0]; + port = nfp_repr_get_port_id(netdev); + if (!nfp_tunnel_port_is_phy_repr(port)) + return 0; + } else if (nfp_fl_is_netdev_to_offload(netdev)) { + nr_priv = nfp_flower_non_repr_priv_get(app, netdev); + if (!nr_priv) + return -ENOMEM; + + mac_offloaded = &nr_priv->mac_offloaded; + off_mac = &nr_priv->offloaded_mac_addr[0]; + non_repr = true; + } else { + return 0; + } + + if (!is_valid_ether_addr(netdev->dev_addr)) { + err = -EINVAL; + goto err_put_non_repr_priv; + } + + if (cmd == NFP_TUNNEL_MAC_OFFLOAD_MOD && !*mac_offloaded) + cmd = NFP_TUNNEL_MAC_OFFLOAD_ADD; + + switch (cmd) { + case NFP_TUNNEL_MAC_OFFLOAD_ADD: + err = nfp_tunnel_add_shared_mac(app, netdev, port, false); + if (err) + goto err_put_non_repr_priv; + + if (non_repr) + __nfp_flower_non_repr_priv_get(nr_priv); + + *mac_offloaded = true; + ether_addr_copy(off_mac, netdev->dev_addr); + break; + case NFP_TUNNEL_MAC_OFFLOAD_DEL: + /* Only attempt delete if add was successful. */ + if (!*mac_offloaded) + break; + + if (non_repr) + __nfp_flower_non_repr_priv_put(nr_priv); + + *mac_offloaded = false; + + err = nfp_tunnel_del_shared_mac(app, netdev, netdev->dev_addr, + false); + if (err) + goto err_put_non_repr_priv; + + break; + case NFP_TUNNEL_MAC_OFFLOAD_MOD: + /* Ignore if changing to the same address. */ + if (ether_addr_equal(netdev->dev_addr, off_mac)) + break; + + err = nfp_tunnel_add_shared_mac(app, netdev, port, true); + if (err) + goto err_put_non_repr_priv; + + /* Delete the previous MAC address. */ + err = nfp_tunnel_del_shared_mac(app, netdev, off_mac, true); + if (err) + nfp_flower_cmsg_warn(app, "Failed to remove offload of replaced MAC addr on %s.\n", + netdev_name(netdev)); + + ether_addr_copy(off_mac, netdev->dev_addr); + break; + default: + err = -EINVAL; + goto err_put_non_repr_priv; + } + + if (non_repr) + __nfp_flower_non_repr_priv_put(nr_priv); + + return 0; + +err_put_non_repr_priv: + if (non_repr) + __nfp_flower_non_repr_priv_put(nr_priv); + + return err; +} + +int nfp_tunnel_mac_event_handler(struct nfp_app *app, + struct net_device *netdev, + unsigned long event, void *ptr) +{ + int err; + + if (event == NETDEV_DOWN) { + err = nfp_tunnel_offload_mac(app, netdev, + NFP_TUNNEL_MAC_OFFLOAD_DEL); + if (err) + nfp_flower_cmsg_warn(app, "Failed to delete offload MAC on %s.\n", + netdev_name(netdev)); + } else if (event == NETDEV_UP) { + err = nfp_tunnel_offload_mac(app, netdev, + NFP_TUNNEL_MAC_OFFLOAD_ADD); + if (err) + nfp_flower_cmsg_warn(app, "Failed to offload MAC on %s.\n", + netdev_name(netdev)); + } else if (event == NETDEV_CHANGEADDR) { + /* Only offload addr change if netdev is already up. */ + if (!(netdev->flags & IFF_UP)) + return NOTIFY_OK; + + err = nfp_tunnel_offload_mac(app, netdev, + NFP_TUNNEL_MAC_OFFLOAD_MOD); + if (err) + nfp_flower_cmsg_warn(app, "Failed to offload MAC change on %s.\n", + netdev_name(netdev)); + } else if (event == NETDEV_CHANGEUPPER) { + /* If a repr is attached to a bridge then tunnel packets + * entering the physical port are directed through the bridge + * datapath and cannot be directly detunneled. Therefore, + * associated offloaded MACs and indexes should not be used + * by fw for detunneling. + */ + struct netdev_notifier_changeupper_info *info = ptr; + struct net_device *upper = info->upper_dev; + struct nfp_flower_repr_priv *repr_priv; + struct nfp_repr *repr; + + if (!nfp_netdev_is_nfp_repr(netdev) || + !nfp_flower_is_supported_bridge(upper)) + return NOTIFY_OK; + + repr = netdev_priv(netdev); + if (repr->app != app) + return NOTIFY_OK; + + repr_priv = repr->app_priv; + + if (info->linking) { + if (nfp_tunnel_offload_mac(app, netdev, + NFP_TUNNEL_MAC_OFFLOAD_DEL)) + nfp_flower_cmsg_warn(app, "Failed to delete offloaded MAC on %s.\n", + netdev_name(netdev)); + repr_priv->on_bridge = true; + } else { + repr_priv->on_bridge = false; + + if (!(netdev->flags & IFF_UP)) + return NOTIFY_OK; + + if (nfp_tunnel_offload_mac(app, netdev, + NFP_TUNNEL_MAC_OFFLOAD_ADD)) + nfp_flower_cmsg_warn(app, "Failed to offload MAC on %s.\n", + netdev_name(netdev)); + } + } + return NOTIFY_OK; +} + +int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app, + struct nfp_fl_payload *flow) +{ + struct nfp_flower_priv *app_priv = app->priv; + struct nfp_tun_offloaded_mac *mac_entry; + struct nfp_flower_meta_tci *key_meta; + struct nfp_tun_pre_tun_rule payload; + struct net_device *internal_dev; + int err; + + if (app_priv->pre_tun_rule_cnt == NFP_TUN_PRE_TUN_RULE_LIMIT) + return -ENOSPC; + + memset(&payload, 0, sizeof(struct nfp_tun_pre_tun_rule)); + + internal_dev = flow->pre_tun_rule.dev; + payload.vlan_tci = flow->pre_tun_rule.vlan_tci; + payload.host_ctx_id = flow->meta.host_ctx_id; + + /* Lookup MAC index for the pre-tunnel rule egress device. + * Note that because the device is always an internal port, it will + * have a constant global index so does not need to be tracked. + */ + mac_entry = nfp_tunnel_lookup_offloaded_macs(app, + internal_dev->dev_addr); + if (!mac_entry) + return -ENOENT; + + /* Set/clear IPV6 bit. cpu_to_be16() swap will lead to MSB being + * set/clear for port_idx. + */ + key_meta = (struct nfp_flower_meta_tci *)flow->unmasked_data; + if (key_meta->nfp_flow_key_layer & NFP_FLOWER_LAYER_IPV6) + mac_entry->index |= NFP_TUN_PRE_TUN_IPV6_BIT; + else + mac_entry->index &= ~NFP_TUN_PRE_TUN_IPV6_BIT; + + payload.port_idx = cpu_to_be16(mac_entry->index); + + /* Copy mac id and vlan to flow - dev may not exist at delete time. */ + flow->pre_tun_rule.vlan_tci = payload.vlan_tci; + flow->pre_tun_rule.port_idx = payload.port_idx; + + err = nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_PRE_TUN_RULE, + sizeof(struct nfp_tun_pre_tun_rule), + (unsigned char *)&payload, GFP_KERNEL); + if (err) + return err; + + app_priv->pre_tun_rule_cnt++; + + return 0; +} + +int nfp_flower_xmit_pre_tun_del_flow(struct nfp_app *app, + struct nfp_fl_payload *flow) +{ + struct nfp_flower_priv *app_priv = app->priv; + struct nfp_tun_pre_tun_rule payload; + u32 tmp_flags = 0; + int err; + + memset(&payload, 0, sizeof(struct nfp_tun_pre_tun_rule)); + + tmp_flags |= NFP_TUN_PRE_TUN_RULE_DEL; + payload.flags = cpu_to_be32(tmp_flags); + payload.vlan_tci = flow->pre_tun_rule.vlan_tci; + payload.port_idx = flow->pre_tun_rule.port_idx; + + err = nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_PRE_TUN_RULE, + sizeof(struct nfp_tun_pre_tun_rule), + (unsigned char *)&payload, GFP_KERNEL); + if (err) + return err; + + app_priv->pre_tun_rule_cnt--; + + return 0; +} + +int nfp_tunnel_config_start(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + int err; + + /* Initialise rhash for MAC offload tracking. */ + err = rhashtable_init(&priv->tun.offloaded_macs, + &offloaded_macs_params); + if (err) + return err; + + ida_init(&priv->tun.mac_off_ids); + + /* Initialise priv data for IPv4/v6 offloading. */ + mutex_init(&priv->tun.ipv4_off_lock); + INIT_LIST_HEAD(&priv->tun.ipv4_off_list); + mutex_init(&priv->tun.ipv6_off_lock); + INIT_LIST_HEAD(&priv->tun.ipv6_off_list); + + /* Initialise priv data for neighbour offloading. */ + priv->tun.neigh_nb.notifier_call = nfp_tun_neigh_event_handler; + + err = register_netevent_notifier(&priv->tun.neigh_nb); + if (err) { + rhashtable_free_and_destroy(&priv->tun.offloaded_macs, + nfp_check_rhashtable_empty, NULL); + return err; + } + + return 0; +} + +void nfp_tunnel_config_stop(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_addr_entry *ip_entry; + struct list_head *ptr, *storage; + + unregister_netevent_notifier(&priv->tun.neigh_nb); + + ida_destroy(&priv->tun.mac_off_ids); + + /* Free any memory that may be occupied by ipv4 list. */ + list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) { + ip_entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + list_del(&ip_entry->list); + kfree(ip_entry); + } + + mutex_destroy(&priv->tun.ipv6_off_lock); + + /* Destroy rhash. Entries should be cleaned on netdev notifier unreg. */ + rhashtable_free_and_destroy(&priv->tun.offloaded_macs, + nfp_check_rhashtable_empty, NULL); + + nfp_tun_cleanup_nn_entries(app); +} diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c new file mode 100644 index 000000000..448c1c1af --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c @@ -0,0 +1,1382 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2019 Netronome Systems, Inc. */ + +#include <linux/bpf_trace.h> +#include <linux/netdevice.h> +#include <linux/bitfield.h> + +#include "../nfp_app.h" +#include "../nfp_net.h" +#include "../nfp_net_dp.h" +#include "../nfp_net_xsk.h" +#include "../crypto/crypto.h" +#include "../crypto/fw.h" +#include "nfd3.h" + +/* Transmit processing + * + * One queue controller peripheral queue is used for transmit. The + * driver en-queues packets for transmit by advancing the write + * pointer. The device indicates that packets have transmitted by + * advancing the read pointer. The driver maintains a local copy of + * the read and write pointer in @struct nfp_net_tx_ring. The driver + * keeps @wr_p in sync with the queue controller write pointer and can + * determine how many packets have been transmitted by comparing its + * copy of the read pointer @rd_p with the read pointer maintained by + * the queue controller peripheral. + */ + +/* Wrappers for deciding when to stop and restart TX queues */ +static int nfp_nfd3_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring) +{ + return !nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS * 4); +} + +static int nfp_nfd3_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring) +{ + return nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS + 1); +} + +/** + * nfp_nfd3_tx_ring_stop() - stop tx ring + * @nd_q: netdev queue + * @tx_ring: driver tx queue structure + * + * Safely stop TX ring. Remember that while we are running .start_xmit() + * someone else may be cleaning the TX ring completions so we need to be + * extra careful here. + */ +static void +nfp_nfd3_tx_ring_stop(struct netdev_queue *nd_q, + struct nfp_net_tx_ring *tx_ring) +{ + netif_tx_stop_queue(nd_q); + + /* We can race with the TX completion out of NAPI so recheck */ + smp_mb(); + if (unlikely(nfp_nfd3_tx_ring_should_wake(tx_ring))) + netif_tx_start_queue(nd_q); +} + +/** + * nfp_nfd3_tx_tso() - Set up Tx descriptor for LSO + * @r_vec: per-ring structure + * @txbuf: Pointer to driver soft TX descriptor + * @txd: Pointer to HW TX descriptor + * @skb: Pointer to SKB + * @md_bytes: Prepend length + * + * Set up Tx descriptor for LSO, do nothing for non-LSO skbs. + * Return error on packet header greater than maximum supported LSO header size. + */ +static void +nfp_nfd3_tx_tso(struct nfp_net_r_vector *r_vec, struct nfp_nfd3_tx_buf *txbuf, + struct nfp_nfd3_tx_desc *txd, struct sk_buff *skb, u32 md_bytes) +{ + u32 l3_offset, l4_offset, hdrlen; + u16 mss; + + if (!skb_is_gso(skb)) + return; + + if (!skb->encapsulation) { + l3_offset = skb_network_offset(skb); + l4_offset = skb_transport_offset(skb); + hdrlen = skb_tcp_all_headers(skb); + } else { + l3_offset = skb_inner_network_offset(skb); + l4_offset = skb_inner_transport_offset(skb); + hdrlen = skb_inner_tcp_all_headers(skb); + } + + txbuf->pkt_cnt = skb_shinfo(skb)->gso_segs; + txbuf->real_len += hdrlen * (txbuf->pkt_cnt - 1); + + mss = skb_shinfo(skb)->gso_size & NFD3_DESC_TX_MSS_MASK; + txd->l3_offset = l3_offset - md_bytes; + txd->l4_offset = l4_offset - md_bytes; + txd->lso_hdrlen = hdrlen - md_bytes; + txd->mss = cpu_to_le16(mss); + txd->flags |= NFD3_DESC_TX_LSO; + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_lso++; + u64_stats_update_end(&r_vec->tx_sync); +} + +/** + * nfp_nfd3_tx_csum() - Set TX CSUM offload flags in TX descriptor + * @dp: NFP Net data path struct + * @r_vec: per-ring structure + * @txbuf: Pointer to driver soft TX descriptor + * @txd: Pointer to TX descriptor + * @skb: Pointer to SKB + * + * This function sets the TX checksum flags in the TX descriptor based + * on the configuration and the protocol of the packet to be transmitted. + */ +static void +nfp_nfd3_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + struct nfp_nfd3_tx_buf *txbuf, struct nfp_nfd3_tx_desc *txd, + struct sk_buff *skb) +{ + struct ipv6hdr *ipv6h; + struct iphdr *iph; + u8 l4_hdr; + + if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM)) + return; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return; + + txd->flags |= NFD3_DESC_TX_CSUM; + if (skb->encapsulation) + txd->flags |= NFD3_DESC_TX_ENCAP; + + iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); + ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); + + if (iph->version == 4) { + txd->flags |= NFD3_DESC_TX_IP4_CSUM; + l4_hdr = iph->protocol; + } else if (ipv6h->version == 6) { + l4_hdr = ipv6h->nexthdr; + } else { + nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version); + return; + } + + switch (l4_hdr) { + case IPPROTO_TCP: + txd->flags |= NFD3_DESC_TX_TCP_CSUM; + break; + case IPPROTO_UDP: + txd->flags |= NFD3_DESC_TX_UDP_CSUM; + break; + default: + nn_dp_warn(dp, "partial checksum but l4 proto=%x!\n", l4_hdr); + return; + } + + u64_stats_update_begin(&r_vec->tx_sync); + if (skb->encapsulation) + r_vec->hw_csum_tx_inner += txbuf->pkt_cnt; + else + r_vec->hw_csum_tx += txbuf->pkt_cnt; + u64_stats_update_end(&r_vec->tx_sync); +} + +static int nfp_nfd3_prep_tx_meta(struct nfp_net_dp *dp, struct sk_buff *skb, u64 tls_handle) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + unsigned char *data; + bool vlan_insert; + u32 meta_id = 0; + int md_bytes; + + if (unlikely(md_dst || tls_handle)) { + if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) + md_dst = NULL; + } + + vlan_insert = skb_vlan_tag_present(skb) && (dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN_V2); + + if (!(md_dst || tls_handle || vlan_insert)) + return 0; + + md_bytes = sizeof(meta_id) + + !!md_dst * NFP_NET_META_PORTID_SIZE + + !!tls_handle * NFP_NET_META_CONN_HANDLE_SIZE + + vlan_insert * NFP_NET_META_VLAN_SIZE; + + if (unlikely(skb_cow_head(skb, md_bytes))) + return -ENOMEM; + + data = skb_push(skb, md_bytes) + md_bytes; + if (md_dst) { + data -= NFP_NET_META_PORTID_SIZE; + put_unaligned_be32(md_dst->u.port_info.port_id, data); + meta_id = NFP_NET_META_PORTID; + } + if (tls_handle) { + /* conn handle is opaque, we just use u64 to be able to quickly + * compare it to zero + */ + data -= NFP_NET_META_CONN_HANDLE_SIZE; + memcpy(data, &tls_handle, sizeof(tls_handle)); + meta_id <<= NFP_NET_META_FIELD_SIZE; + meta_id |= NFP_NET_META_CONN_HANDLE; + } + if (vlan_insert) { + data -= NFP_NET_META_VLAN_SIZE; + /* data type of skb->vlan_proto is __be16 + * so it fills metadata without calling put_unaligned_be16 + */ + memcpy(data, &skb->vlan_proto, sizeof(skb->vlan_proto)); + put_unaligned_be16(skb_vlan_tag_get(skb), data + sizeof(skb->vlan_proto)); + meta_id <<= NFP_NET_META_FIELD_SIZE; + meta_id |= NFP_NET_META_VLAN; + } + + data -= sizeof(meta_id); + put_unaligned_be32(meta_id, data); + + return md_bytes; +} + +/** + * nfp_nfd3_tx() - Main transmit entry point + * @skb: SKB to transmit + * @netdev: netdev structure + * + * Return: NETDEV_TX_OK on success. + */ +netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int f, nr_frags, wr_idx, md_bytes; + struct nfp_net_tx_ring *tx_ring; + struct nfp_net_r_vector *r_vec; + struct nfp_nfd3_tx_buf *txbuf; + struct nfp_nfd3_tx_desc *txd; + struct netdev_queue *nd_q; + const skb_frag_t *frag; + struct nfp_net_dp *dp; + dma_addr_t dma_addr; + unsigned int fsize; + u64 tls_handle = 0; + u16 qidx; + + dp = &nn->dp; + qidx = skb_get_queue_mapping(skb); + tx_ring = &dp->tx_rings[qidx]; + r_vec = tx_ring->r_vec; + + nr_frags = skb_shinfo(skb)->nr_frags; + + if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) { + nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n", + qidx, tx_ring->wr_p, tx_ring->rd_p); + nd_q = netdev_get_tx_queue(dp->netdev, qidx); + netif_tx_stop_queue(nd_q); + nfp_net_tx_xmit_more_flush(tx_ring); + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_busy++; + u64_stats_update_end(&r_vec->tx_sync); + return NETDEV_TX_BUSY; + } + + skb = nfp_net_tls_tx(dp, r_vec, skb, &tls_handle, &nr_frags); + if (unlikely(!skb)) { + nfp_net_tx_xmit_more_flush(tx_ring); + return NETDEV_TX_OK; + } + + md_bytes = nfp_nfd3_prep_tx_meta(dp, skb, tls_handle); + if (unlikely(md_bytes < 0)) + goto err_flush; + + /* Start with the head skbuf */ + dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb), + DMA_TO_DEVICE); + if (dma_mapping_error(dp->dev, dma_addr)) + goto err_dma_err; + + wr_idx = D_IDX(tx_ring, tx_ring->wr_p); + + /* Stash the soft descriptor of the head then initialize it */ + txbuf = &tx_ring->txbufs[wr_idx]; + txbuf->skb = skb; + txbuf->dma_addr = dma_addr; + txbuf->fidx = -1; + txbuf->pkt_cnt = 1; + txbuf->real_len = skb->len; + + /* Build TX descriptor */ + txd = &tx_ring->txds[wr_idx]; + txd->offset_eop = (nr_frags ? 0 : NFD3_DESC_TX_EOP) | md_bytes; + txd->dma_len = cpu_to_le16(skb_headlen(skb)); + nfp_desc_set_dma_addr_40b(txd, dma_addr); + txd->data_len = cpu_to_le16(skb->len); + + txd->flags = 0; + txd->mss = 0; + txd->lso_hdrlen = 0; + + /* Do not reorder - tso may adjust pkt cnt, vlan may override fields */ + nfp_nfd3_tx_tso(r_vec, txbuf, txd, skb, md_bytes); + nfp_nfd3_tx_csum(dp, r_vec, txbuf, txd, skb); + if (skb_vlan_tag_present(skb) && dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN) { + txd->flags |= NFD3_DESC_TX_VLAN; + txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb)); + } + + /* Gather DMA */ + if (nr_frags > 0) { + __le64 second_half; + + /* all descs must match except for in addr, length and eop */ + second_half = txd->vals8[1]; + + for (f = 0; f < nr_frags; f++) { + frag = &skb_shinfo(skb)->frags[f]; + fsize = skb_frag_size(frag); + + dma_addr = skb_frag_dma_map(dp->dev, frag, 0, + fsize, DMA_TO_DEVICE); + if (dma_mapping_error(dp->dev, dma_addr)) + goto err_unmap; + + wr_idx = D_IDX(tx_ring, wr_idx + 1); + tx_ring->txbufs[wr_idx].skb = skb; + tx_ring->txbufs[wr_idx].dma_addr = dma_addr; + tx_ring->txbufs[wr_idx].fidx = f; + + txd = &tx_ring->txds[wr_idx]; + txd->dma_len = cpu_to_le16(fsize); + nfp_desc_set_dma_addr_40b(txd, dma_addr); + txd->offset_eop = md_bytes | + ((f == nr_frags - 1) ? NFD3_DESC_TX_EOP : 0); + txd->vals8[1] = second_half; + } + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_gather++; + u64_stats_update_end(&r_vec->tx_sync); + } + + skb_tx_timestamp(skb); + + nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); + + tx_ring->wr_p += nr_frags + 1; + if (nfp_nfd3_tx_ring_should_stop(tx_ring)) + nfp_nfd3_tx_ring_stop(nd_q, tx_ring); + + tx_ring->wr_ptr_add += nr_frags + 1; + if (__netdev_tx_sent_queue(nd_q, txbuf->real_len, netdev_xmit_more())) + nfp_net_tx_xmit_more_flush(tx_ring); + + return NETDEV_TX_OK; + +err_unmap: + while (--f >= 0) { + frag = &skb_shinfo(skb)->frags[f]; + dma_unmap_page(dp->dev, tx_ring->txbufs[wr_idx].dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + tx_ring->txbufs[wr_idx].skb = NULL; + tx_ring->txbufs[wr_idx].dma_addr = 0; + tx_ring->txbufs[wr_idx].fidx = -2; + wr_idx = wr_idx - 1; + if (wr_idx < 0) + wr_idx += tx_ring->cnt; + } + dma_unmap_single(dp->dev, tx_ring->txbufs[wr_idx].dma_addr, + skb_headlen(skb), DMA_TO_DEVICE); + tx_ring->txbufs[wr_idx].skb = NULL; + tx_ring->txbufs[wr_idx].dma_addr = 0; + tx_ring->txbufs[wr_idx].fidx = -2; +err_dma_err: + nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); +err_flush: + nfp_net_tx_xmit_more_flush(tx_ring); + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_errors++; + u64_stats_update_end(&r_vec->tx_sync); + nfp_net_tls_tx_undo(skb, tls_handle); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +/** + * nfp_nfd3_tx_complete() - Handled completed TX packets + * @tx_ring: TX ring structure + * @budget: NAPI budget (only used as bool to determine if in NAPI context) + */ +void nfp_nfd3_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + u32 done_pkts = 0, done_bytes = 0; + struct netdev_queue *nd_q; + u32 qcp_rd_p; + int todo; + + if (tx_ring->wr_p == tx_ring->rd_p) + return; + + /* Work out how many descriptors have been transmitted */ + qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); + + if (qcp_rd_p == tx_ring->qcp_rd_p) + return; + + todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); + + while (todo--) { + const skb_frag_t *frag; + struct nfp_nfd3_tx_buf *tx_buf; + struct sk_buff *skb; + int fidx, nr_frags; + int idx; + + idx = D_IDX(tx_ring, tx_ring->rd_p++); + tx_buf = &tx_ring->txbufs[idx]; + + skb = tx_buf->skb; + if (!skb) + continue; + + nr_frags = skb_shinfo(skb)->nr_frags; + fidx = tx_buf->fidx; + + if (fidx == -1) { + /* unmap head */ + dma_unmap_single(dp->dev, tx_buf->dma_addr, + skb_headlen(skb), DMA_TO_DEVICE); + + done_pkts += tx_buf->pkt_cnt; + done_bytes += tx_buf->real_len; + } else { + /* unmap fragment */ + frag = &skb_shinfo(skb)->frags[fidx]; + dma_unmap_page(dp->dev, tx_buf->dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + } + + /* check for last gather fragment */ + if (fidx == nr_frags - 1) + napi_consume_skb(skb, budget); + + tx_buf->dma_addr = 0; + tx_buf->skb = NULL; + tx_buf->fidx = -2; + } + + tx_ring->qcp_rd_p = qcp_rd_p; + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_bytes += done_bytes; + r_vec->tx_pkts += done_pkts; + u64_stats_update_end(&r_vec->tx_sync); + + if (!dp->netdev) + return; + + nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); + netdev_tx_completed_queue(nd_q, done_pkts, done_bytes); + if (nfp_nfd3_tx_ring_should_wake(tx_ring)) { + /* Make sure TX thread will see updated tx_ring->rd_p */ + smp_mb(); + + if (unlikely(netif_tx_queue_stopped(nd_q))) + netif_tx_wake_queue(nd_q); + } + + WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, + "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", + tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); +} + +static bool nfp_nfd3_xdp_complete(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + u32 done_pkts = 0, done_bytes = 0; + bool done_all; + int idx, todo; + u32 qcp_rd_p; + + /* Work out how many descriptors have been transmitted */ + qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); + + if (qcp_rd_p == tx_ring->qcp_rd_p) + return true; + + todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); + + done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; + todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); + + tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo); + + done_pkts = todo; + while (todo--) { + idx = D_IDX(tx_ring, tx_ring->rd_p); + tx_ring->rd_p++; + + done_bytes += tx_ring->txbufs[idx].real_len; + } + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_bytes += done_bytes; + r_vec->tx_pkts += done_pkts; + u64_stats_update_end(&r_vec->tx_sync); + + WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, + "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", + tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); + + return done_all; +} + +/* Receive processing + */ + +static void * +nfp_nfd3_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) +{ + void *frag; + + if (!dp->xdp_prog) { + frag = napi_alloc_frag(dp->fl_bufsz); + if (unlikely(!frag)) + return NULL; + } else { + struct page *page; + + page = dev_alloc_page(); + if (unlikely(!page)) + return NULL; + frag = page_address(page); + } + + *dma_addr = nfp_net_dma_map_rx(dp, frag); + if (dma_mapping_error(dp->dev, *dma_addr)) { + nfp_net_free_frag(frag, dp->xdp_prog); + nn_dp_warn(dp, "Failed to map DMA RX buffer\n"); + return NULL; + } + + return frag; +} + +/** + * nfp_nfd3_rx_give_one() - Put mapped skb on the software and hardware rings + * @dp: NFP Net data path struct + * @rx_ring: RX ring structure + * @frag: page fragment buffer + * @dma_addr: DMA address of skb mapping + */ +static void +nfp_nfd3_rx_give_one(const struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring, + void *frag, dma_addr_t dma_addr) +{ + unsigned int wr_idx; + + wr_idx = D_IDX(rx_ring, rx_ring->wr_p); + + nfp_net_dma_sync_dev_rx(dp, dma_addr); + + /* Stash SKB and DMA address away */ + rx_ring->rxbufs[wr_idx].frag = frag; + rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; + + /* Fill freelist descriptor */ + rx_ring->rxds[wr_idx].fld.reserved = 0; + rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; + /* DMA address is expanded to 48-bit width in freelist for NFP3800, + * so the *_48b macro is used accordingly, it's also OK to fill + * a 40-bit address since the top 8 bits are get set to 0. + */ + nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld, + dma_addr + dp->rx_dma_off); + + rx_ring->wr_p++; + if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) { + /* Update write pointer of the freelist queue. Make + * sure all writes are flushed before telling the hardware. + */ + wmb(); + nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH); + } +} + +/** + * nfp_nfd3_rx_ring_fill_freelist() - Give buffers from the ring to FW + * @dp: NFP Net data path struct + * @rx_ring: RX ring to fill + */ +void nfp_nfd3_rx_ring_fill_freelist(struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring) +{ + unsigned int i; + + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) + return nfp_net_xsk_rx_ring_fill_freelist(rx_ring); + + for (i = 0; i < rx_ring->cnt - 1; i++) + nfp_nfd3_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag, + rx_ring->rxbufs[i].dma_addr); +} + +/** + * nfp_nfd3_rx_csum_has_errors() - group check if rxd has any csum errors + * @flags: RX descriptor flags field in CPU byte order + */ +static int nfp_nfd3_rx_csum_has_errors(u16 flags) +{ + u16 csum_all_checked, csum_all_ok; + + csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL; + csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK; + + return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT); +} + +/** + * nfp_nfd3_rx_csum() - set SKB checksum field based on RX descriptor flags + * @dp: NFP Net data path struct + * @r_vec: per-ring structure + * @rxd: Pointer to RX descriptor + * @meta: Parsed metadata prepend + * @skb: Pointer to SKB + */ +void +nfp_nfd3_rx_csum(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + const struct nfp_net_rx_desc *rxd, + const struct nfp_meta_parsed *meta, struct sk_buff *skb) +{ + skb_checksum_none_assert(skb); + + if (!(dp->netdev->features & NETIF_F_RXCSUM)) + return; + + if (meta->csum_type) { + skb->ip_summed = meta->csum_type; + skb->csum = meta->csum; + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_complete++; + u64_stats_update_end(&r_vec->rx_sync); + return; + } + + if (nfp_nfd3_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) { + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_error++; + u64_stats_update_end(&r_vec->rx_sync); + return; + } + + /* Assume that the firmware will never report inner CSUM_OK unless outer + * L4 headers were successfully parsed. FW will always report zero UDP + * checksum as CSUM_OK. + */ + if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK || + rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) { + __skb_incr_checksum_unnecessary(skb); + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_ok++; + u64_stats_update_end(&r_vec->rx_sync); + } + + if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK || + rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) { + __skb_incr_checksum_unnecessary(skb); + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_inner_ok++; + u64_stats_update_end(&r_vec->rx_sync); + } +} + +static void +nfp_nfd3_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta, + unsigned int type, __be32 *hash) +{ + if (!(netdev->features & NETIF_F_RXHASH)) + return; + + switch (type) { + case NFP_NET_RSS_IPV4: + case NFP_NET_RSS_IPV6: + case NFP_NET_RSS_IPV6_EX: + meta->hash_type = PKT_HASH_TYPE_L3; + break; + default: + meta->hash_type = PKT_HASH_TYPE_L4; + break; + } + + meta->hash = get_unaligned_be32(hash); +} + +static void +nfp_nfd3_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta, + void *data, struct nfp_net_rx_desc *rxd) +{ + struct nfp_net_rx_hash *rx_hash = data; + + if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS)) + return; + + nfp_nfd3_set_hash(netdev, meta, get_unaligned_be32(&rx_hash->hash_type), + &rx_hash->hash); +} + +bool +nfp_nfd3_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, + void *data, void *pkt, unsigned int pkt_len, int meta_len) +{ + u32 meta_info, vlan_info; + + meta_info = get_unaligned_be32(data); + data += 4; + + while (meta_info) { + switch (meta_info & NFP_NET_META_FIELD_MASK) { + case NFP_NET_META_HASH: + meta_info >>= NFP_NET_META_FIELD_SIZE; + nfp_nfd3_set_hash(netdev, meta, + meta_info & NFP_NET_META_FIELD_MASK, + (__be32 *)data); + data += 4; + break; + case NFP_NET_META_MARK: + meta->mark = get_unaligned_be32(data); + data += 4; + break; + case NFP_NET_META_VLAN: + vlan_info = get_unaligned_be32(data); + if (FIELD_GET(NFP_NET_META_VLAN_STRIP, vlan_info)) { + meta->vlan.stripped = true; + meta->vlan.tpid = FIELD_GET(NFP_NET_META_VLAN_TPID_MASK, + vlan_info); + meta->vlan.tci = FIELD_GET(NFP_NET_META_VLAN_TCI_MASK, + vlan_info); + } + data += 4; + break; + case NFP_NET_META_PORTID: + meta->portid = get_unaligned_be32(data); + data += 4; + break; + case NFP_NET_META_CSUM: + meta->csum_type = CHECKSUM_COMPLETE; + meta->csum = + (__force __wsum)__get_unaligned_cpu32(data); + data += 4; + break; + case NFP_NET_META_RESYNC_INFO: + if (nfp_net_tls_rx_resync_req(netdev, data, pkt, + pkt_len)) + return false; + data += sizeof(struct nfp_net_tls_resync_req); + break; + default: + return true; + } + + meta_info >>= NFP_NET_META_FIELD_SIZE; + } + + return data != pkt; +} + +static void +nfp_nfd3_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf, + struct sk_buff *skb) +{ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_drops++; + /* If we have both skb and rxbuf the replacement buffer allocation + * must have failed, count this as an alloc failure. + */ + if (skb && rxbuf) + r_vec->rx_replace_buf_alloc_fail++; + u64_stats_update_end(&r_vec->rx_sync); + + /* skb is build based on the frag, free_skb() would free the frag + * so to be able to reuse it we need an extra ref. + */ + if (skb && rxbuf && skb->head == rxbuf->frag) + page_ref_inc(virt_to_head_page(rxbuf->frag)); + if (rxbuf) + nfp_nfd3_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr); + if (skb) + dev_kfree_skb_any(skb); +} + +static bool +nfp_nfd3_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, + struct nfp_net_tx_ring *tx_ring, + struct nfp_net_rx_buf *rxbuf, unsigned int dma_off, + unsigned int pkt_len, bool *completed) +{ + unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA; + struct nfp_nfd3_tx_buf *txbuf; + struct nfp_nfd3_tx_desc *txd; + int wr_idx; + + /* Reject if xdp_adjust_tail grow packet beyond DMA area */ + if (pkt_len + dma_off > dma_map_sz) + return false; + + if (unlikely(nfp_net_tx_full(tx_ring, 1))) { + if (!*completed) { + nfp_nfd3_xdp_complete(tx_ring); + *completed = true; + } + + if (unlikely(nfp_net_tx_full(tx_ring, 1))) { + nfp_nfd3_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf, + NULL); + return false; + } + } + + wr_idx = D_IDX(tx_ring, tx_ring->wr_p); + + /* Stash the soft descriptor of the head then initialize it */ + txbuf = &tx_ring->txbufs[wr_idx]; + + nfp_nfd3_rx_give_one(dp, rx_ring, txbuf->frag, txbuf->dma_addr); + + txbuf->frag = rxbuf->frag; + txbuf->dma_addr = rxbuf->dma_addr; + txbuf->fidx = -1; + txbuf->pkt_cnt = 1; + txbuf->real_len = pkt_len; + + dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off, + pkt_len, DMA_BIDIRECTIONAL); + + /* Build TX descriptor */ + txd = &tx_ring->txds[wr_idx]; + txd->offset_eop = NFD3_DESC_TX_EOP; + txd->dma_len = cpu_to_le16(pkt_len); + nfp_desc_set_dma_addr_40b(txd, rxbuf->dma_addr + dma_off); + txd->data_len = cpu_to_le16(pkt_len); + + txd->flags = 0; + txd->mss = 0; + txd->lso_hdrlen = 0; + + tx_ring->wr_p++; + tx_ring->wr_ptr_add++; + return true; +} + +/** + * nfp_nfd3_rx() - receive up to @budget packets on @rx_ring + * @rx_ring: RX ring to receive from + * @budget: NAPI budget + * + * Note, this function is separated out from the napi poll function to + * more cleanly separate packet receive code from other bookkeeping + * functions performed in the napi poll function. + * + * Return: Number of packets received. + */ +static int nfp_nfd3_rx(struct nfp_net_rx_ring *rx_ring, int budget) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + struct nfp_net_tx_ring *tx_ring; + struct bpf_prog *xdp_prog; + bool xdp_tx_cmpl = false; + unsigned int true_bufsz; + struct sk_buff *skb; + int pkts_polled = 0; + struct xdp_buff xdp; + int idx; + + xdp_prog = READ_ONCE(dp->xdp_prog); + true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz; + xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM, + &rx_ring->xdp_rxq); + tx_ring = r_vec->xdp_ring; + + while (pkts_polled < budget) { + unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; + struct nfp_net_rx_buf *rxbuf; + struct nfp_net_rx_desc *rxd; + struct nfp_meta_parsed meta; + bool redir_egress = false; + struct net_device *netdev; + dma_addr_t new_dma_addr; + u32 meta_len_xdp = 0; + void *new_frag; + + idx = D_IDX(rx_ring, rx_ring->rd_p); + + rxd = &rx_ring->rxds[idx]; + if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) + break; + + /* Memory barrier to ensure that we won't do other reads + * before the DD bit. + */ + dma_rmb(); + + memset(&meta, 0, sizeof(meta)); + + rx_ring->rd_p++; + pkts_polled++; + + rxbuf = &rx_ring->rxbufs[idx]; + /* < meta_len > + * <-- [rx_offset] --> + * --------------------------------------------------------- + * | [XX] | metadata | packet | XXXX | + * --------------------------------------------------------- + * <---------------- data_len ---------------> + * + * The rx_offset is fixed for all packets, the meta_len can vary + * on a packet by packet basis. If rx_offset is set to zero + * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the + * buffer and is immediately followed by the packet (no [XX]). + */ + meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; + data_len = le16_to_cpu(rxd->rxd.data_len); + pkt_len = data_len - meta_len; + + pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; + if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) + pkt_off += meta_len; + else + pkt_off += dp->rx_offset; + meta_off = pkt_off - meta_len; + + /* Stats update */ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_pkts++; + r_vec->rx_bytes += pkt_len; + u64_stats_update_end(&r_vec->rx_sync); + + if (unlikely(meta_len > NFP_NET_MAX_PREPEND || + (dp->rx_offset && meta_len > dp->rx_offset))) { + nn_dp_warn(dp, "oversized RX packet metadata %u\n", + meta_len); + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); + continue; + } + + nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, + data_len); + + if (!dp->chained_metadata_format) { + nfp_nfd3_set_hash_desc(dp->netdev, &meta, + rxbuf->frag + meta_off, rxd); + } else if (meta_len) { + if (unlikely(nfp_nfd3_parse_meta(dp->netdev, &meta, + rxbuf->frag + meta_off, + rxbuf->frag + pkt_off, + pkt_len, meta_len))) { + nn_dp_warn(dp, "invalid RX packet metadata\n"); + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, + NULL); + continue; + } + } + + if (xdp_prog && !meta.portid) { + void *orig_data = rxbuf->frag + pkt_off; + unsigned int dma_off; + int act; + + xdp_prepare_buff(&xdp, + rxbuf->frag + NFP_NET_RX_BUF_HEADROOM, + pkt_off - NFP_NET_RX_BUF_HEADROOM, + pkt_len, true); + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + pkt_len = xdp.data_end - xdp.data; + pkt_off += xdp.data - orig_data; + + switch (act) { + case XDP_PASS: + meta_len_xdp = xdp.data - xdp.data_meta; + break; + case XDP_TX: + dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM; + if (unlikely(!nfp_nfd3_tx_xdp_buf(dp, rx_ring, + tx_ring, + rxbuf, + dma_off, + pkt_len, + &xdp_tx_cmpl))) + trace_xdp_exception(dp->netdev, + xdp_prog, act); + continue; + default: + bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(dp->netdev, xdp_prog, act); + fallthrough; + case XDP_DROP: + nfp_nfd3_rx_give_one(dp, rx_ring, rxbuf->frag, + rxbuf->dma_addr); + continue; + } + } + + if (likely(!meta.portid)) { + netdev = dp->netdev; + } else if (meta.portid == NFP_META_PORT_ID_CTRL) { + struct nfp_net *nn = netdev_priv(dp->netdev); + + nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off, + pkt_len); + nfp_nfd3_rx_give_one(dp, rx_ring, rxbuf->frag, + rxbuf->dma_addr); + continue; + } else { + struct nfp_net *nn; + + nn = netdev_priv(dp->netdev); + netdev = nfp_app_dev_get(nn->app, meta.portid, + &redir_egress); + if (unlikely(!netdev)) { + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, + NULL); + continue; + } + + if (nfp_netdev_is_nfp_repr(netdev)) + nfp_repr_inc_rx_stats(netdev, pkt_len); + } + + skb = build_skb(rxbuf->frag, true_bufsz); + if (unlikely(!skb)) { + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); + continue; + } + new_frag = nfp_nfd3_napi_alloc_one(dp, &new_dma_addr); + if (unlikely(!new_frag)) { + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); + continue; + } + + nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); + + nfp_nfd3_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); + + skb_reserve(skb, pkt_off); + skb_put(skb, pkt_len); + + skb->mark = meta.mark; + skb_set_hash(skb, meta.hash, meta.hash_type); + + skb_record_rx_queue(skb, rx_ring->idx); + skb->protocol = eth_type_trans(skb, netdev); + + nfp_nfd3_rx_csum(dp, r_vec, rxd, &meta, skb); + +#ifdef CONFIG_TLS_DEVICE + if (rxd->rxd.flags & PCIE_DESC_RX_DECRYPTED) { + skb->decrypted = true; + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_tls_rx++; + u64_stats_update_end(&r_vec->rx_sync); + } +#endif + + if (unlikely(!nfp_net_vlan_strip(skb, rxd, &meta))) { + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, NULL, skb); + continue; + } + + if (meta_len_xdp) + skb_metadata_set(skb, meta_len_xdp); + + if (likely(!redir_egress)) { + napi_gro_receive(&rx_ring->r_vec->napi, skb); + } else { + skb->dev = netdev; + skb_reset_network_header(skb); + __skb_push(skb, ETH_HLEN); + dev_queue_xmit(skb); + } + } + + if (xdp_prog) { + if (tx_ring->wr_ptr_add) + nfp_net_tx_xmit_more_flush(tx_ring); + else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) && + !xdp_tx_cmpl) + if (!nfp_nfd3_xdp_complete(tx_ring)) + pkts_polled = budget; + } + + return pkts_polled; +} + +/** + * nfp_nfd3_poll() - napi poll function + * @napi: NAPI structure + * @budget: NAPI budget + * + * Return: number of packets polled. + */ +int nfp_nfd3_poll(struct napi_struct *napi, int budget) +{ + struct nfp_net_r_vector *r_vec = + container_of(napi, struct nfp_net_r_vector, napi); + unsigned int pkts_polled = 0; + + if (r_vec->tx_ring) + nfp_nfd3_tx_complete(r_vec->tx_ring, budget); + if (r_vec->rx_ring) + pkts_polled = nfp_nfd3_rx(r_vec->rx_ring, budget); + + if (pkts_polled < budget) + if (napi_complete_done(napi, pkts_polled)) + nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + + if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) { + struct dim_sample dim_sample = {}; + unsigned int start; + u64 pkts, bytes; + + do { + start = u64_stats_fetch_begin(&r_vec->rx_sync); + pkts = r_vec->rx_pkts; + bytes = r_vec->rx_bytes; + } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); + + dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); + net_dim(&r_vec->rx_dim, dim_sample); + } + + if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { + struct dim_sample dim_sample = {}; + unsigned int start; + u64 pkts, bytes; + + do { + start = u64_stats_fetch_begin(&r_vec->tx_sync); + pkts = r_vec->tx_pkts; + bytes = r_vec->tx_bytes; + } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); + + dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); + net_dim(&r_vec->tx_dim, dim_sample); + } + + return pkts_polled; +} + +/* Control device data path + */ + +bool +nfp_nfd3_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct sk_buff *skb, bool old) +{ + unsigned int real_len = skb->len, meta_len = 0; + struct nfp_net_tx_ring *tx_ring; + struct nfp_nfd3_tx_buf *txbuf; + struct nfp_nfd3_tx_desc *txd; + struct nfp_net_dp *dp; + dma_addr_t dma_addr; + int wr_idx; + + dp = &r_vec->nfp_net->dp; + tx_ring = r_vec->tx_ring; + + if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) { + nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n"); + goto err_free; + } + + if (unlikely(nfp_net_tx_full(tx_ring, 1))) { + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_busy++; + u64_stats_update_end(&r_vec->tx_sync); + if (!old) + __skb_queue_tail(&r_vec->queue, skb); + else + __skb_queue_head(&r_vec->queue, skb); + return true; + } + + if (nfp_app_ctrl_has_meta(nn->app)) { + if (unlikely(skb_headroom(skb) < 8)) { + nn_dp_warn(dp, "CTRL TX on skb without headroom\n"); + goto err_free; + } + meta_len = 8; + put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4)); + put_unaligned_be32(NFP_NET_META_PORTID, skb_push(skb, 4)); + } + + /* Start with the head skbuf */ + dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb), + DMA_TO_DEVICE); + if (dma_mapping_error(dp->dev, dma_addr)) + goto err_dma_warn; + + wr_idx = D_IDX(tx_ring, tx_ring->wr_p); + + /* Stash the soft descriptor of the head then initialize it */ + txbuf = &tx_ring->txbufs[wr_idx]; + txbuf->skb = skb; + txbuf->dma_addr = dma_addr; + txbuf->fidx = -1; + txbuf->pkt_cnt = 1; + txbuf->real_len = real_len; + + /* Build TX descriptor */ + txd = &tx_ring->txds[wr_idx]; + txd->offset_eop = meta_len | NFD3_DESC_TX_EOP; + txd->dma_len = cpu_to_le16(skb_headlen(skb)); + nfp_desc_set_dma_addr_40b(txd, dma_addr); + txd->data_len = cpu_to_le16(skb->len); + + txd->flags = 0; + txd->mss = 0; + txd->lso_hdrlen = 0; + + tx_ring->wr_p++; + tx_ring->wr_ptr_add++; + nfp_net_tx_xmit_more_flush(tx_ring); + + return false; + +err_dma_warn: + nn_dp_warn(dp, "Failed to DMA map TX CTRL buffer\n"); +err_free: + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_errors++; + u64_stats_update_end(&r_vec->tx_sync); + dev_kfree_skb_any(skb); + return false; +} + +static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec) +{ + struct sk_buff *skb; + + while ((skb = __skb_dequeue(&r_vec->queue))) + if (nfp_nfd3_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true)) + return; +} + +static bool +nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len) +{ + u32 meta_type, meta_tag; + + if (!nfp_app_ctrl_has_meta(nn->app)) + return !meta_len; + + if (meta_len != 8) + return false; + + meta_type = get_unaligned_be32(data); + meta_tag = get_unaligned_be32(data + 4); + + return (meta_type == NFP_NET_META_PORTID && + meta_tag == NFP_META_PORT_ID_CTRL); +} + +static bool +nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp, + struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring) +{ + unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; + struct nfp_net_rx_buf *rxbuf; + struct nfp_net_rx_desc *rxd; + dma_addr_t new_dma_addr; + struct sk_buff *skb; + void *new_frag; + int idx; + + idx = D_IDX(rx_ring, rx_ring->rd_p); + + rxd = &rx_ring->rxds[idx]; + if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) + return false; + + /* Memory barrier to ensure that we won't do other reads + * before the DD bit. + */ + dma_rmb(); + + rx_ring->rd_p++; + + rxbuf = &rx_ring->rxbufs[idx]; + meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; + data_len = le16_to_cpu(rxd->rxd.data_len); + pkt_len = data_len - meta_len; + + pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; + if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) + pkt_off += meta_len; + else + pkt_off += dp->rx_offset; + meta_off = pkt_off - meta_len; + + /* Stats update */ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_pkts++; + r_vec->rx_bytes += pkt_len; + u64_stats_update_end(&r_vec->rx_sync); + + nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, data_len); + + if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) { + nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n", + meta_len); + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); + return true; + } + + skb = build_skb(rxbuf->frag, dp->fl_bufsz); + if (unlikely(!skb)) { + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); + return true; + } + new_frag = nfp_nfd3_napi_alloc_one(dp, &new_dma_addr); + if (unlikely(!new_frag)) { + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); + return true; + } + + nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); + + nfp_nfd3_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); + + skb_reserve(skb, pkt_off); + skb_put(skb, pkt_len); + + nfp_app_ctrl_rx(nn->app, skb); + + return true; +} + +static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) +{ + struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; + struct nfp_net *nn = r_vec->nfp_net; + struct nfp_net_dp *dp = &nn->dp; + unsigned int budget = 512; + + while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) + continue; + + return budget; +} + +void nfp_nfd3_ctrl_poll(struct tasklet_struct *t) +{ + struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet); + + spin_lock(&r_vec->lock); + nfp_nfd3_tx_complete(r_vec->tx_ring, 0); + __nfp_ctrl_tx_queued(r_vec); + spin_unlock(&r_vec->lock); + + if (nfp_ctrl_rx(r_vec)) { + nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + } else { + tasklet_schedule(&r_vec->tasklet); + nn_dp_warn(&r_vec->nfp_net->dp, + "control message budget exceeded!\n"); + } +} diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/nfd3.h b/drivers/net/ethernet/netronome/nfp/nfd3/nfd3.h new file mode 100644 index 000000000..7a0df9e6c --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfd3/nfd3.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2015-2019 Netronome Systems, Inc. */ + +#ifndef _NFP_DP_NFD3_H_ +#define _NFP_DP_NFD3_H_ + +struct sk_buff; +struct net_device; + +/* TX descriptor format */ + +#define NFD3_DESC_TX_EOP BIT(7) +#define NFD3_DESC_TX_OFFSET_MASK GENMASK(6, 0) +#define NFD3_DESC_TX_MSS_MASK GENMASK(13, 0) + +/* Flags in the host TX descriptor */ +#define NFD3_DESC_TX_CSUM BIT(7) +#define NFD3_DESC_TX_IP4_CSUM BIT(6) +#define NFD3_DESC_TX_TCP_CSUM BIT(5) +#define NFD3_DESC_TX_UDP_CSUM BIT(4) +#define NFD3_DESC_TX_VLAN BIT(3) +#define NFD3_DESC_TX_LSO BIT(2) +#define NFD3_DESC_TX_ENCAP BIT(1) +#define NFD3_DESC_TX_O_IP4_CSUM BIT(0) + +struct nfp_nfd3_tx_desc { + union { + struct { + u8 dma_addr_hi; /* High bits of host buf address */ + __le16 dma_len; /* Length to DMA for this desc */ + u8 offset_eop; /* Offset in buf where pkt starts + + * highest bit is eop flag. + */ + __le32 dma_addr_lo; /* Low 32bit of host buf addr */ + + __le16 mss; /* MSS to be used for LSO */ + u8 lso_hdrlen; /* LSO, TCP payload offset */ + u8 flags; /* TX Flags, see @NFD3_DESC_TX_* */ + union { + struct { + u8 l3_offset; /* L3 header offset */ + u8 l4_offset; /* L4 header offset */ + }; + __le16 vlan; /* VLAN tag to add if indicated */ + }; + __le16 data_len; /* Length of frame + meta data */ + } __packed; + __le32 vals[4]; + __le64 vals8[2]; + }; +}; + +/** + * struct nfp_nfd3_tx_buf - software TX buffer descriptor + * @skb: normal ring, sk_buff associated with this buffer + * @frag: XDP ring, page frag associated with this buffer + * @xdp: XSK buffer pool handle (for AF_XDP) + * @dma_addr: DMA mapping address of the buffer + * @fidx: Fragment index (-1 for the head and [0..nr_frags-1] for frags) + * @pkt_cnt: Number of packets to be produced out of the skb associated + * with this buffer (valid only on the head's buffer). + * Will be 1 for all non-TSO packets. + * @is_xsk_tx: Flag if buffer is a RX buffer after a XDP_TX action and not a + * buffer from the TX queue (for AF_XDP). + * @real_len: Number of bytes which to be produced out of the skb (valid only + * on the head's buffer). Equal to skb->len for non-TSO packets. + */ +struct nfp_nfd3_tx_buf { + union { + struct sk_buff *skb; + void *frag; + struct xdp_buff *xdp; + }; + dma_addr_t dma_addr; + union { + struct { + short int fidx; + u16 pkt_cnt; + }; + struct { + bool is_xsk_tx; + }; + }; + u32 real_len; +}; + +void +nfp_nfd3_rx_csum(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + const struct nfp_net_rx_desc *rxd, + const struct nfp_meta_parsed *meta, struct sk_buff *skb); +bool +nfp_nfd3_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, + void *data, void *pkt, unsigned int pkt_len, int meta_len); +void nfp_nfd3_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget); +int nfp_nfd3_poll(struct napi_struct *napi, int budget); +netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev); +bool +nfp_nfd3_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct sk_buff *skb, bool old); +void nfp_nfd3_ctrl_poll(struct tasklet_struct *t); +void nfp_nfd3_rx_ring_fill_freelist(struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring); +void nfp_nfd3_xsk_tx_free(struct nfp_nfd3_tx_buf *txbuf); +int nfp_nfd3_xsk_poll(struct napi_struct *napi, int budget); + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/rings.c b/drivers/net/ethernet/netronome/nfp/nfd3/rings.c new file mode 100644 index 000000000..a03190c93 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfd3/rings.c @@ -0,0 +1,279 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2019 Netronome Systems, Inc. */ + +#include <linux/seq_file.h> + +#include "../nfp_net.h" +#include "../nfp_net_dp.h" +#include "../nfp_net_xsk.h" +#include "nfd3.h" + +static void nfp_nfd3_xsk_tx_bufs_free(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_nfd3_tx_buf *txbuf; + unsigned int idx; + + while (tx_ring->rd_p != tx_ring->wr_p) { + idx = D_IDX(tx_ring, tx_ring->rd_p); + txbuf = &tx_ring->txbufs[idx]; + + txbuf->real_len = 0; + + tx_ring->qcp_rd_p++; + tx_ring->rd_p++; + + if (tx_ring->r_vec->xsk_pool) { + if (txbuf->is_xsk_tx) + nfp_nfd3_xsk_tx_free(txbuf); + + xsk_tx_completed(tx_ring->r_vec->xsk_pool, 1); + } + } +} + +/** + * nfp_nfd3_tx_ring_reset() - Free any untransmitted buffers and reset pointers + * @dp: NFP Net data path struct + * @tx_ring: TX ring structure + * + * Assumes that the device is stopped, must be idempotent. + */ +static void +nfp_nfd3_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) +{ + struct netdev_queue *nd_q; + const skb_frag_t *frag; + + while (!tx_ring->is_xdp && tx_ring->rd_p != tx_ring->wr_p) { + struct nfp_nfd3_tx_buf *tx_buf; + struct sk_buff *skb; + int idx, nr_frags; + + idx = D_IDX(tx_ring, tx_ring->rd_p); + tx_buf = &tx_ring->txbufs[idx]; + + skb = tx_ring->txbufs[idx].skb; + nr_frags = skb_shinfo(skb)->nr_frags; + + if (tx_buf->fidx == -1) { + /* unmap head */ + dma_unmap_single(dp->dev, tx_buf->dma_addr, + skb_headlen(skb), DMA_TO_DEVICE); + } else { + /* unmap fragment */ + frag = &skb_shinfo(skb)->frags[tx_buf->fidx]; + dma_unmap_page(dp->dev, tx_buf->dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + } + + /* check for last gather fragment */ + if (tx_buf->fidx == nr_frags - 1) + dev_kfree_skb_any(skb); + + tx_buf->dma_addr = 0; + tx_buf->skb = NULL; + tx_buf->fidx = -2; + + tx_ring->qcp_rd_p++; + tx_ring->rd_p++; + } + + if (tx_ring->is_xdp) + nfp_nfd3_xsk_tx_bufs_free(tx_ring); + + memset(tx_ring->txds, 0, tx_ring->size); + tx_ring->wr_p = 0; + tx_ring->rd_p = 0; + tx_ring->qcp_rd_p = 0; + tx_ring->wr_ptr_add = 0; + + if (tx_ring->is_xdp || !dp->netdev) + return; + + nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); + netdev_tx_reset_queue(nd_q); +} + +/** + * nfp_nfd3_tx_ring_free() - Free resources allocated to a TX ring + * @tx_ring: TX ring to free + */ +static void nfp_nfd3_tx_ring_free(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + + kvfree(tx_ring->txbufs); + + if (tx_ring->txds) + dma_free_coherent(dp->dev, tx_ring->size, + tx_ring->txds, tx_ring->dma); + + tx_ring->cnt = 0; + tx_ring->txbufs = NULL; + tx_ring->txds = NULL; + tx_ring->dma = 0; + tx_ring->size = 0; +} + +/** + * nfp_nfd3_tx_ring_alloc() - Allocate resource for a TX ring + * @dp: NFP Net data path struct + * @tx_ring: TX Ring structure to allocate + * + * Return: 0 on success, negative errno otherwise. + */ +static int +nfp_nfd3_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + + tx_ring->cnt = dp->txd_cnt; + + tx_ring->size = array_size(tx_ring->cnt, sizeof(*tx_ring->txds)); + tx_ring->txds = dma_alloc_coherent(dp->dev, tx_ring->size, + &tx_ring->dma, + GFP_KERNEL | __GFP_NOWARN); + if (!tx_ring->txds) { + netdev_warn(dp->netdev, "failed to allocate TX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n", + tx_ring->cnt); + goto err_alloc; + } + + tx_ring->txbufs = kvcalloc(tx_ring->cnt, sizeof(*tx_ring->txbufs), + GFP_KERNEL); + if (!tx_ring->txbufs) + goto err_alloc; + + if (!tx_ring->is_xdp && dp->netdev) + netif_set_xps_queue(dp->netdev, &r_vec->affinity_mask, + tx_ring->idx); + + return 0; + +err_alloc: + nfp_nfd3_tx_ring_free(tx_ring); + return -ENOMEM; +} + +static void +nfp_nfd3_tx_ring_bufs_free(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring) +{ + unsigned int i; + + if (!tx_ring->is_xdp) + return; + + for (i = 0; i < tx_ring->cnt; i++) { + if (!tx_ring->txbufs[i].frag) + return; + + nfp_net_dma_unmap_rx(dp, tx_ring->txbufs[i].dma_addr); + __free_page(virt_to_page(tx_ring->txbufs[i].frag)); + } +} + +static int +nfp_nfd3_tx_ring_bufs_alloc(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_nfd3_tx_buf *txbufs = tx_ring->txbufs; + unsigned int i; + + if (!tx_ring->is_xdp) + return 0; + + for (i = 0; i < tx_ring->cnt; i++) { + txbufs[i].frag = nfp_net_rx_alloc_one(dp, &txbufs[i].dma_addr); + if (!txbufs[i].frag) { + nfp_nfd3_tx_ring_bufs_free(dp, tx_ring); + return -ENOMEM; + } + } + + return 0; +} + +static void +nfp_nfd3_print_tx_descs(struct seq_file *file, + struct nfp_net_r_vector *r_vec, + struct nfp_net_tx_ring *tx_ring, + u32 d_rd_p, u32 d_wr_p) +{ + struct nfp_nfd3_tx_desc *txd; + u32 txd_cnt = tx_ring->cnt; + int i; + + for (i = 0; i < txd_cnt; i++) { + struct xdp_buff *xdp; + struct sk_buff *skb; + + txd = &tx_ring->txds[i]; + seq_printf(file, "%04d: 0x%08x 0x%08x 0x%08x 0x%08x", i, + txd->vals[0], txd->vals[1], + txd->vals[2], txd->vals[3]); + + if (!tx_ring->is_xdp) { + skb = READ_ONCE(tx_ring->txbufs[i].skb); + if (skb) + seq_printf(file, " skb->head=%p skb->data=%p", + skb->head, skb->data); + } else { + xdp = READ_ONCE(tx_ring->txbufs[i].xdp); + if (xdp) + seq_printf(file, " xdp->data=%p", xdp->data); + } + + if (tx_ring->txbufs[i].dma_addr) + seq_printf(file, " dma_addr=%pad", + &tx_ring->txbufs[i].dma_addr); + + if (i == tx_ring->rd_p % txd_cnt) + seq_puts(file, " H_RD"); + if (i == tx_ring->wr_p % txd_cnt) + seq_puts(file, " H_WR"); + if (i == d_rd_p % txd_cnt) + seq_puts(file, " D_RD"); + if (i == d_wr_p % txd_cnt) + seq_puts(file, " D_WR"); + + seq_putc(file, '\n'); + } +} + +#define NFP_NFD3_CFG_CTRL_SUPPORTED \ + (NFP_NET_CFG_CTRL_ENABLE | NFP_NET_CFG_CTRL_PROMISC | \ + NFP_NET_CFG_CTRL_L2BC | NFP_NET_CFG_CTRL_L2MC | \ + NFP_NET_CFG_CTRL_RXCSUM | NFP_NET_CFG_CTRL_TXCSUM | \ + NFP_NET_CFG_CTRL_RXVLAN | NFP_NET_CFG_CTRL_TXVLAN | \ + NFP_NET_CFG_CTRL_RXVLAN_V2 | NFP_NET_CFG_CTRL_RXQINQ | \ + NFP_NET_CFG_CTRL_TXVLAN_V2 | \ + NFP_NET_CFG_CTRL_GATHER | NFP_NET_CFG_CTRL_LSO | \ + NFP_NET_CFG_CTRL_CTAG_FILTER | NFP_NET_CFG_CTRL_CMSG_DATA | \ + NFP_NET_CFG_CTRL_RINGCFG | NFP_NET_CFG_CTRL_RSS | \ + NFP_NET_CFG_CTRL_IRQMOD | NFP_NET_CFG_CTRL_TXRWB | \ + NFP_NET_CFG_CTRL_VEPA | \ + NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE | \ + NFP_NET_CFG_CTRL_BPF | NFP_NET_CFG_CTRL_LSO2 | \ + NFP_NET_CFG_CTRL_RSS2 | NFP_NET_CFG_CTRL_CSUM_COMPLETE | \ + NFP_NET_CFG_CTRL_LIVE_ADDR) + +const struct nfp_dp_ops nfp_nfd3_ops = { + .version = NFP_NFD_VER_NFD3, + .tx_min_desc_per_pkt = 1, + .cap_mask = NFP_NFD3_CFG_CTRL_SUPPORTED, + .dma_mask = DMA_BIT_MASK(40), + .poll = nfp_nfd3_poll, + .xsk_poll = nfp_nfd3_xsk_poll, + .ctrl_poll = nfp_nfd3_ctrl_poll, + .xmit = nfp_nfd3_tx, + .ctrl_tx_one = nfp_nfd3_ctrl_tx_one, + .rx_ring_fill_freelist = nfp_nfd3_rx_ring_fill_freelist, + .tx_ring_alloc = nfp_nfd3_tx_ring_alloc, + .tx_ring_reset = nfp_nfd3_tx_ring_reset, + .tx_ring_free = nfp_nfd3_tx_ring_free, + .tx_ring_bufs_alloc = nfp_nfd3_tx_ring_bufs_alloc, + .tx_ring_bufs_free = nfp_nfd3_tx_ring_bufs_free, + .print_tx_descs = nfp_nfd3_print_tx_descs +}; diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/xsk.c b/drivers/net/ethernet/netronome/nfp/nfd3/xsk.c new file mode 100644 index 000000000..5d9db8c2a --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfd3/xsk.c @@ -0,0 +1,409 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2018 Netronome Systems, Inc */ +/* Copyright (C) 2021 Corigine, Inc */ + +#include <linux/bpf_trace.h> +#include <linux/netdevice.h> + +#include "../nfp_app.h" +#include "../nfp_net.h" +#include "../nfp_net_dp.h" +#include "../nfp_net_xsk.h" +#include "nfd3.h" + +static bool +nfp_nfd3_xsk_tx_xdp(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + struct nfp_net_rx_ring *rx_ring, + struct nfp_net_tx_ring *tx_ring, + struct nfp_net_xsk_rx_buf *xrxbuf, unsigned int pkt_len, + int pkt_off) +{ + struct xsk_buff_pool *pool = r_vec->xsk_pool; + struct nfp_nfd3_tx_buf *txbuf; + struct nfp_nfd3_tx_desc *txd; + unsigned int wr_idx; + + if (nfp_net_tx_space(tx_ring) < 1) + return false; + + xsk_buff_raw_dma_sync_for_device(pool, xrxbuf->dma_addr + pkt_off, + pkt_len); + + wr_idx = D_IDX(tx_ring, tx_ring->wr_p); + + txbuf = &tx_ring->txbufs[wr_idx]; + txbuf->xdp = xrxbuf->xdp; + txbuf->real_len = pkt_len; + txbuf->is_xsk_tx = true; + + /* Build TX descriptor */ + txd = &tx_ring->txds[wr_idx]; + txd->offset_eop = NFD3_DESC_TX_EOP; + txd->dma_len = cpu_to_le16(pkt_len); + nfp_desc_set_dma_addr_40b(txd, xrxbuf->dma_addr + pkt_off); + txd->data_len = cpu_to_le16(pkt_len); + + txd->flags = 0; + txd->mss = 0; + txd->lso_hdrlen = 0; + + tx_ring->wr_ptr_add++; + tx_ring->wr_p++; + + return true; +} + +static void nfp_nfd3_xsk_rx_skb(struct nfp_net_rx_ring *rx_ring, + const struct nfp_net_rx_desc *rxd, + struct nfp_net_xsk_rx_buf *xrxbuf, + const struct nfp_meta_parsed *meta, + unsigned int pkt_len, + bool meta_xdp, + unsigned int *skbs_polled) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + struct net_device *netdev; + struct sk_buff *skb; + + if (likely(!meta->portid)) { + netdev = dp->netdev; + } else { + struct nfp_net *nn = netdev_priv(dp->netdev); + + netdev = nfp_app_dev_get(nn->app, meta->portid, NULL); + if (unlikely(!netdev)) { + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + return; + } + nfp_repr_inc_rx_stats(netdev, pkt_len); + } + + skb = napi_alloc_skb(&r_vec->napi, pkt_len); + if (!skb) { + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + return; + } + skb_put_data(skb, xrxbuf->xdp->data, pkt_len); + + skb->mark = meta->mark; + skb_set_hash(skb, meta->hash, meta->hash_type); + + skb_record_rx_queue(skb, rx_ring->idx); + skb->protocol = eth_type_trans(skb, netdev); + + nfp_nfd3_rx_csum(dp, r_vec, rxd, meta, skb); + + if (unlikely(!nfp_net_vlan_strip(skb, rxd, meta))) { + dev_kfree_skb_any(skb); + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + return; + } + + if (meta_xdp) + skb_metadata_set(skb, + xrxbuf->xdp->data - xrxbuf->xdp->data_meta); + + napi_gro_receive(&rx_ring->r_vec->napi, skb); + + nfp_net_xsk_rx_free(xrxbuf); + + (*skbs_polled)++; +} + +static unsigned int +nfp_nfd3_xsk_rx(struct nfp_net_rx_ring *rx_ring, int budget, + unsigned int *skbs_polled) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + struct nfp_net_tx_ring *tx_ring; + struct bpf_prog *xdp_prog; + bool xdp_redir = false; + int pkts_polled = 0; + + xdp_prog = READ_ONCE(dp->xdp_prog); + tx_ring = r_vec->xdp_ring; + + while (pkts_polled < budget) { + unsigned int meta_len, data_len, pkt_len, pkt_off; + struct nfp_net_xsk_rx_buf *xrxbuf; + struct nfp_net_rx_desc *rxd; + struct nfp_meta_parsed meta; + int idx, act; + + idx = D_IDX(rx_ring, rx_ring->rd_p); + + rxd = &rx_ring->rxds[idx]; + if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) + break; + + rx_ring->rd_p++; + pkts_polled++; + + xrxbuf = &rx_ring->xsk_rxbufs[idx]; + + /* If starved of buffers "drop" it and scream. */ + if (rx_ring->rd_p >= rx_ring->wr_p) { + nn_dp_warn(dp, "Starved of RX buffers\n"); + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + break; + } + + /* Memory barrier to ensure that we won't do other reads + * before the DD bit. + */ + dma_rmb(); + + memset(&meta, 0, sizeof(meta)); + + /* Only supporting AF_XDP with dynamic metadata so buffer layout + * is always: + * + * --------------------------------------------------------- + * | off | metadata | packet | XXXX | + * --------------------------------------------------------- + */ + meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; + data_len = le16_to_cpu(rxd->rxd.data_len); + pkt_len = data_len - meta_len; + + if (unlikely(meta_len > NFP_NET_MAX_PREPEND)) { + nn_dp_warn(dp, "Oversized RX packet metadata %u\n", + meta_len); + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + continue; + } + + /* Stats update. */ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_pkts++; + r_vec->rx_bytes += pkt_len; + u64_stats_update_end(&r_vec->rx_sync); + + xrxbuf->xdp->data += meta_len; + xrxbuf->xdp->data_end = xrxbuf->xdp->data + pkt_len; + xdp_set_data_meta_invalid(xrxbuf->xdp); + xsk_buff_dma_sync_for_cpu(xrxbuf->xdp, r_vec->xsk_pool); + net_prefetch(xrxbuf->xdp->data); + + if (meta_len) { + if (unlikely(nfp_nfd3_parse_meta(dp->netdev, &meta, + xrxbuf->xdp->data - + meta_len, + xrxbuf->xdp->data, + pkt_len, meta_len))) { + nn_dp_warn(dp, "Invalid RX packet metadata\n"); + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + continue; + } + + if (unlikely(meta.portid)) { + struct nfp_net *nn = netdev_priv(dp->netdev); + + if (meta.portid != NFP_META_PORT_ID_CTRL) { + nfp_nfd3_xsk_rx_skb(rx_ring, rxd, + xrxbuf, &meta, + pkt_len, false, + skbs_polled); + continue; + } + + nfp_app_ctrl_rx_raw(nn->app, xrxbuf->xdp->data, + pkt_len); + nfp_net_xsk_rx_free(xrxbuf); + continue; + } + } + + act = bpf_prog_run_xdp(xdp_prog, xrxbuf->xdp); + + pkt_len = xrxbuf->xdp->data_end - xrxbuf->xdp->data; + pkt_off = xrxbuf->xdp->data - xrxbuf->xdp->data_hard_start; + + switch (act) { + case XDP_PASS: + nfp_nfd3_xsk_rx_skb(rx_ring, rxd, xrxbuf, &meta, pkt_len, + true, skbs_polled); + break; + case XDP_TX: + if (!nfp_nfd3_xsk_tx_xdp(dp, r_vec, rx_ring, tx_ring, + xrxbuf, pkt_len, pkt_off)) + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + else + nfp_net_xsk_rx_unstash(xrxbuf); + break; + case XDP_REDIRECT: + if (xdp_do_redirect(dp->netdev, xrxbuf->xdp, xdp_prog)) { + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + } else { + nfp_net_xsk_rx_unstash(xrxbuf); + xdp_redir = true; + } + break; + default: + bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(dp->netdev, xdp_prog, act); + fallthrough; + case XDP_DROP: + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + break; + } + } + + nfp_net_xsk_rx_ring_fill_freelist(r_vec->rx_ring); + + if (xdp_redir) + xdp_do_flush_map(); + + if (tx_ring->wr_ptr_add) + nfp_net_tx_xmit_more_flush(tx_ring); + + return pkts_polled; +} + +void nfp_nfd3_xsk_tx_free(struct nfp_nfd3_tx_buf *txbuf) +{ + xsk_buff_free(txbuf->xdp); + + txbuf->dma_addr = 0; + txbuf->xdp = NULL; +} + +static bool nfp_nfd3_xsk_complete(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + u32 done_pkts = 0, done_bytes = 0, reused = 0; + bool done_all; + int idx, todo; + u32 qcp_rd_p; + + if (tx_ring->wr_p == tx_ring->rd_p) + return true; + + /* Work out how many descriptors have been transmitted. */ + qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); + + if (qcp_rd_p == tx_ring->qcp_rd_p) + return true; + + todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); + + done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; + todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); + + tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo); + + done_pkts = todo; + while (todo--) { + struct nfp_nfd3_tx_buf *txbuf; + + idx = D_IDX(tx_ring, tx_ring->rd_p); + tx_ring->rd_p++; + + txbuf = &tx_ring->txbufs[idx]; + if (unlikely(!txbuf->real_len)) + continue; + + done_bytes += txbuf->real_len; + txbuf->real_len = 0; + + if (txbuf->is_xsk_tx) { + nfp_nfd3_xsk_tx_free(txbuf); + reused++; + } + } + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_bytes += done_bytes; + r_vec->tx_pkts += done_pkts; + u64_stats_update_end(&r_vec->tx_sync); + + xsk_tx_completed(r_vec->xsk_pool, done_pkts - reused); + + WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, + "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", + tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); + + return done_all; +} + +static void nfp_nfd3_xsk_tx(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct xdp_desc desc[NFP_NET_XSK_TX_BATCH]; + struct xsk_buff_pool *xsk_pool; + struct nfp_nfd3_tx_desc *txd; + u32 pkts = 0, wr_idx; + u32 i, got; + + xsk_pool = r_vec->xsk_pool; + + while (nfp_net_tx_space(tx_ring) >= NFP_NET_XSK_TX_BATCH) { + for (i = 0; i < NFP_NET_XSK_TX_BATCH; i++) + if (!xsk_tx_peek_desc(xsk_pool, &desc[i])) + break; + got = i; + if (!got) + break; + + wr_idx = D_IDX(tx_ring, tx_ring->wr_p + i); + prefetchw(&tx_ring->txds[wr_idx]); + + for (i = 0; i < got; i++) + xsk_buff_raw_dma_sync_for_device(xsk_pool, desc[i].addr, + desc[i].len); + + for (i = 0; i < got; i++) { + wr_idx = D_IDX(tx_ring, tx_ring->wr_p + i); + + tx_ring->txbufs[wr_idx].real_len = desc[i].len; + tx_ring->txbufs[wr_idx].is_xsk_tx = false; + + /* Build TX descriptor. */ + txd = &tx_ring->txds[wr_idx]; + nfp_desc_set_dma_addr_40b(txd, + xsk_buff_raw_get_dma(xsk_pool, desc[i].addr)); + txd->offset_eop = NFD3_DESC_TX_EOP; + txd->dma_len = cpu_to_le16(desc[i].len); + txd->data_len = cpu_to_le16(desc[i].len); + } + + tx_ring->wr_p += got; + pkts += got; + } + + if (!pkts) + return; + + xsk_tx_release(xsk_pool); + /* Ensure all records are visible before incrementing write counter. */ + wmb(); + nfp_qcp_wr_ptr_add(tx_ring->qcp_q, pkts); +} + +int nfp_nfd3_xsk_poll(struct napi_struct *napi, int budget) +{ + struct nfp_net_r_vector *r_vec = + container_of(napi, struct nfp_net_r_vector, napi); + unsigned int pkts_polled, skbs = 0; + + pkts_polled = nfp_nfd3_xsk_rx(r_vec->rx_ring, budget, &skbs); + + if (pkts_polled < budget) { + if (r_vec->tx_ring) + nfp_nfd3_tx_complete(r_vec->tx_ring, budget); + + if (!nfp_nfd3_xsk_complete(r_vec->xdp_ring)) + pkts_polled = budget; + + nfp_nfd3_xsk_tx(r_vec->xdp_ring); + + if (pkts_polled < budget && napi_complete_done(napi, skbs)) + nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + } + + return pkts_polled; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c new file mode 100644 index 000000000..ccacb6ab6 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c @@ -0,0 +1,1539 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2019 Netronome Systems, Inc. */ + +#include <linux/bpf_trace.h> +#include <linux/netdevice.h> +#include <linux/overflow.h> +#include <linux/sizes.h> +#include <linux/bitfield.h> + +#include "../nfp_app.h" +#include "../nfp_net.h" +#include "../nfp_net_dp.h" +#include "../crypto/crypto.h" +#include "../crypto/fw.h" +#include "nfdk.h" + +static int nfp_nfdk_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring) +{ + return !nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT * 2); +} + +static int nfp_nfdk_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring) +{ + return nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT); +} + +static void nfp_nfdk_tx_ring_stop(struct netdev_queue *nd_q, + struct nfp_net_tx_ring *tx_ring) +{ + netif_tx_stop_queue(nd_q); + + /* We can race with the TX completion out of NAPI so recheck */ + smp_mb(); + if (unlikely(nfp_nfdk_tx_ring_should_wake(tx_ring))) + netif_tx_start_queue(nd_q); +} + +static __le64 +nfp_nfdk_tx_tso(struct nfp_net_r_vector *r_vec, struct nfp_nfdk_tx_buf *txbuf, + struct sk_buff *skb) +{ + u32 segs, hdrlen, l3_offset, l4_offset; + struct nfp_nfdk_tx_desc txd; + u16 mss; + + if (!skb->encapsulation) { + l3_offset = skb_network_offset(skb); + l4_offset = skb_transport_offset(skb); + hdrlen = skb_tcp_all_headers(skb); + } else { + l3_offset = skb_inner_network_offset(skb); + l4_offset = skb_inner_transport_offset(skb); + hdrlen = skb_inner_tcp_all_headers(skb); + } + + segs = skb_shinfo(skb)->gso_segs; + mss = skb_shinfo(skb)->gso_size & NFDK_DESC_TX_MSS_MASK; + + txd.l3_offset = l3_offset; + txd.l4_offset = l4_offset; + txd.lso_meta_res = 0; + txd.mss = cpu_to_le16(mss); + txd.lso_hdrlen = hdrlen; + txd.lso_totsegs = segs; + + txbuf->pkt_cnt = segs; + txbuf->real_len = skb->len + hdrlen * (txbuf->pkt_cnt - 1); + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_lso++; + u64_stats_update_end(&r_vec->tx_sync); + + return txd.raw; +} + +static u8 +nfp_nfdk_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + unsigned int pkt_cnt, struct sk_buff *skb, u64 flags) +{ + struct ipv6hdr *ipv6h; + struct iphdr *iph; + + if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM)) + return flags; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return flags; + + flags |= NFDK_DESC_TX_L4_CSUM; + + iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); + ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); + + /* L3 checksum offloading flag is not required for ipv6 */ + if (iph->version == 4) { + flags |= NFDK_DESC_TX_L3_CSUM; + } else if (ipv6h->version != 6) { + nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version); + return flags; + } + + u64_stats_update_begin(&r_vec->tx_sync); + if (!skb->encapsulation) { + r_vec->hw_csum_tx += pkt_cnt; + } else { + flags |= NFDK_DESC_TX_ENCAP; + r_vec->hw_csum_tx_inner += pkt_cnt; + } + u64_stats_update_end(&r_vec->tx_sync); + + return flags; +} + +static int +nfp_nfdk_tx_maybe_close_block(struct nfp_net_tx_ring *tx_ring, + struct sk_buff *skb) +{ + unsigned int n_descs, wr_p, nop_slots; + const skb_frag_t *frag, *fend; + struct nfp_nfdk_tx_desc *txd; + unsigned int nr_frags; + unsigned int wr_idx; + int err; + +recount_descs: + n_descs = nfp_nfdk_headlen_to_segs(skb_headlen(skb)); + nr_frags = skb_shinfo(skb)->nr_frags; + frag = skb_shinfo(skb)->frags; + fend = frag + nr_frags; + for (; frag < fend; frag++) + n_descs += DIV_ROUND_UP(skb_frag_size(frag), + NFDK_TX_MAX_DATA_PER_DESC); + + if (unlikely(n_descs > NFDK_TX_DESC_GATHER_MAX)) { + if (skb_is_nonlinear(skb)) { + err = skb_linearize(skb); + if (err) + return err; + goto recount_descs; + } + return -EINVAL; + } + + /* Under count by 1 (don't count meta) for the round down to work out */ + n_descs += !!skb_is_gso(skb); + + if (round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) != + round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) + goto close_block; + + if ((u32)tx_ring->data_pending + skb->len > NFDK_TX_MAX_DATA_PER_BLOCK) + goto close_block; + + return 0; + +close_block: + wr_p = tx_ring->wr_p; + nop_slots = D_BLOCK_CPL(wr_p); + + wr_idx = D_IDX(tx_ring, wr_p); + tx_ring->ktxbufs[wr_idx].skb = NULL; + txd = &tx_ring->ktxds[wr_idx]; + + memset(txd, 0, array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc))); + + tx_ring->data_pending = 0; + tx_ring->wr_p += nop_slots; + tx_ring->wr_ptr_add += nop_slots; + + return 0; +} + +static int +nfp_nfdk_prep_tx_meta(struct nfp_net_dp *dp, struct nfp_app *app, + struct sk_buff *skb) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + unsigned char *data; + bool vlan_insert; + u32 meta_id = 0; + int md_bytes; + + if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) + md_dst = NULL; + + vlan_insert = skb_vlan_tag_present(skb) && (dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN_V2); + + if (!(md_dst || vlan_insert)) + return 0; + + md_bytes = sizeof(meta_id) + + !!md_dst * NFP_NET_META_PORTID_SIZE + + vlan_insert * NFP_NET_META_VLAN_SIZE; + + if (unlikely(skb_cow_head(skb, md_bytes))) + return -ENOMEM; + + data = skb_push(skb, md_bytes) + md_bytes; + if (md_dst) { + data -= NFP_NET_META_PORTID_SIZE; + put_unaligned_be32(md_dst->u.port_info.port_id, data); + meta_id = NFP_NET_META_PORTID; + } + if (vlan_insert) { + data -= NFP_NET_META_VLAN_SIZE; + /* data type of skb->vlan_proto is __be16 + * so it fills metadata without calling put_unaligned_be16 + */ + memcpy(data, &skb->vlan_proto, sizeof(skb->vlan_proto)); + put_unaligned_be16(skb_vlan_tag_get(skb), data + sizeof(skb->vlan_proto)); + meta_id <<= NFP_NET_META_FIELD_SIZE; + meta_id |= NFP_NET_META_VLAN; + } + + meta_id = FIELD_PREP(NFDK_META_LEN, md_bytes) | + FIELD_PREP(NFDK_META_FIELDS, meta_id); + + data -= sizeof(meta_id); + put_unaligned_be32(meta_id, data); + + return NFDK_DESC_TX_CHAIN_META; +} + +/** + * nfp_nfdk_tx() - Main transmit entry point + * @skb: SKB to transmit + * @netdev: netdev structure + * + * Return: NETDEV_TX_OK on success. + */ +netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_nfdk_tx_buf *txbuf, *etxbuf; + u32 cnt, tmp_dlen, dlen_type = 0; + struct nfp_net_tx_ring *tx_ring; + struct nfp_net_r_vector *r_vec; + const skb_frag_t *frag, *fend; + struct nfp_nfdk_tx_desc *txd; + unsigned int real_len, qidx; + unsigned int dma_len, type; + struct netdev_queue *nd_q; + struct nfp_net_dp *dp; + int nr_frags, wr_idx; + dma_addr_t dma_addr; + u64 metadata; + + dp = &nn->dp; + qidx = skb_get_queue_mapping(skb); + tx_ring = &dp->tx_rings[qidx]; + r_vec = tx_ring->r_vec; + nd_q = netdev_get_tx_queue(dp->netdev, qidx); + + /* Don't bother counting frags, assume the worst */ + if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { + nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n", + qidx, tx_ring->wr_p, tx_ring->rd_p); + netif_tx_stop_queue(nd_q); + nfp_net_tx_xmit_more_flush(tx_ring); + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_busy++; + u64_stats_update_end(&r_vec->tx_sync); + return NETDEV_TX_BUSY; + } + + metadata = nfp_nfdk_prep_tx_meta(dp, nn->app, skb); + if (unlikely((int)metadata < 0)) + goto err_flush; + + if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb)) + goto err_flush; + + /* nr_frags will change after skb_linearize so we get nr_frags after + * nfp_nfdk_tx_maybe_close_block function + */ + nr_frags = skb_shinfo(skb)->nr_frags; + /* DMA map all */ + wr_idx = D_IDX(tx_ring, tx_ring->wr_p); + txd = &tx_ring->ktxds[wr_idx]; + txbuf = &tx_ring->ktxbufs[wr_idx]; + + dma_len = skb_headlen(skb); + if (skb_is_gso(skb)) + type = NFDK_DESC_TX_TYPE_TSO; + else if (!nr_frags && dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) + type = NFDK_DESC_TX_TYPE_SIMPLE; + else + type = NFDK_DESC_TX_TYPE_GATHER; + + dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE); + if (dma_mapping_error(dp->dev, dma_addr)) + goto err_warn_dma; + + txbuf->skb = skb; + txbuf++; + + txbuf->dma_addr = dma_addr; + txbuf++; + + /* FIELD_PREP() implicitly truncates to chunk */ + dma_len -= 1; + + /* We will do our best to pass as much data as we can in descriptor + * and we need to make sure the first descriptor includes whole head + * since there is limitation in firmware side. Sometimes the value of + * dma_len bitwise and NFDK_DESC_TX_DMA_LEN_HEAD will less than + * headlen. + */ + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, + dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? + NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | + FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); + + txd->dma_len_type = cpu_to_le16(dlen_type); + nfp_desc_set_dma_addr_48b(txd, dma_addr); + + /* starts at bit 0 */ + BUILD_BUG_ON(!(NFDK_DESC_TX_DMA_LEN_HEAD & 1)); + + /* Preserve the original dlen_type, this way below the EOP logic + * can use dlen_type. + */ + tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; + dma_len -= tmp_dlen; + dma_addr += tmp_dlen + 1; + txd++; + + /* The rest of the data (if any) will be in larger dma descritors + * and is handled with the fragment loop. + */ + frag = skb_shinfo(skb)->frags; + fend = frag + nr_frags; + + while (true) { + while (dma_len > 0) { + dma_len -= 1; + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); + + txd->dma_len_type = cpu_to_le16(dlen_type); + nfp_desc_set_dma_addr_48b(txd, dma_addr); + + dma_len -= dlen_type; + dma_addr += dlen_type + 1; + txd++; + } + + if (frag >= fend) + break; + + dma_len = skb_frag_size(frag); + dma_addr = skb_frag_dma_map(dp->dev, frag, 0, dma_len, + DMA_TO_DEVICE); + if (dma_mapping_error(dp->dev, dma_addr)) + goto err_unmap; + + txbuf->dma_addr = dma_addr; + txbuf++; + + frag++; + } + + (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); + + if (!skb_is_gso(skb)) { + real_len = skb->len; + /* Metadata desc */ + metadata = nfp_nfdk_tx_csum(dp, r_vec, 1, skb, metadata); + txd->raw = cpu_to_le64(metadata); + txd++; + } else { + /* lso desc should be placed after metadata desc */ + (txd + 1)->raw = nfp_nfdk_tx_tso(r_vec, txbuf, skb); + real_len = txbuf->real_len; + /* Metadata desc */ + metadata = nfp_nfdk_tx_csum(dp, r_vec, txbuf->pkt_cnt, skb, metadata); + txd->raw = cpu_to_le64(metadata); + txd += 2; + txbuf++; + } + + cnt = txd - tx_ring->ktxds - wr_idx; + if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) != + round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT))) + goto err_warn_overflow; + + skb_tx_timestamp(skb); + + tx_ring->wr_p += cnt; + if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) + tx_ring->data_pending += skb->len; + else + tx_ring->data_pending = 0; + + if (nfp_nfdk_tx_ring_should_stop(tx_ring)) + nfp_nfdk_tx_ring_stop(nd_q, tx_ring); + + tx_ring->wr_ptr_add += cnt; + if (__netdev_tx_sent_queue(nd_q, real_len, netdev_xmit_more())) + nfp_net_tx_xmit_more_flush(tx_ring); + + return NETDEV_TX_OK; + +err_warn_overflow: + WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d", + wr_idx, skb_headlen(skb), nr_frags, cnt); + if (skb_is_gso(skb)) + txbuf--; +err_unmap: + /* txbuf pointed to the next-to-use */ + etxbuf = txbuf; + /* first txbuf holds the skb */ + txbuf = &tx_ring->ktxbufs[wr_idx + 1]; + if (txbuf < etxbuf) { + dma_unmap_single(dp->dev, txbuf->dma_addr, + skb_headlen(skb), DMA_TO_DEVICE); + txbuf->raw = 0; + txbuf++; + } + frag = skb_shinfo(skb)->frags; + while (etxbuf < txbuf) { + dma_unmap_page(dp->dev, txbuf->dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + txbuf->raw = 0; + frag++; + txbuf++; + } +err_warn_dma: + nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); +err_flush: + nfp_net_tx_xmit_more_flush(tx_ring); + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_errors++; + u64_stats_update_end(&r_vec->tx_sync); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +/** + * nfp_nfdk_tx_complete() - Handled completed TX packets + * @tx_ring: TX ring structure + * @budget: NAPI budget (only used as bool to determine if in NAPI context) + */ +static void nfp_nfdk_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + u32 done_pkts = 0, done_bytes = 0; + struct nfp_nfdk_tx_buf *ktxbufs; + struct device *dev = dp->dev; + struct netdev_queue *nd_q; + u32 rd_p, qcp_rd_p; + int todo; + + rd_p = tx_ring->rd_p; + if (tx_ring->wr_p == rd_p) + return; + + /* Work out how many descriptors have been transmitted */ + qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); + + if (qcp_rd_p == tx_ring->qcp_rd_p) + return; + + todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); + ktxbufs = tx_ring->ktxbufs; + + while (todo > 0) { + const skb_frag_t *frag, *fend; + unsigned int size, n_descs = 1; + struct nfp_nfdk_tx_buf *txbuf; + struct sk_buff *skb; + + txbuf = &ktxbufs[D_IDX(tx_ring, rd_p)]; + skb = txbuf->skb; + txbuf++; + + /* Closed block */ + if (!skb) { + n_descs = D_BLOCK_CPL(rd_p); + goto next; + } + + /* Unmap head */ + size = skb_headlen(skb); + n_descs += nfp_nfdk_headlen_to_segs(size); + dma_unmap_single(dev, txbuf->dma_addr, size, DMA_TO_DEVICE); + txbuf++; + + /* Unmap frags */ + frag = skb_shinfo(skb)->frags; + fend = frag + skb_shinfo(skb)->nr_frags; + for (; frag < fend; frag++) { + size = skb_frag_size(frag); + n_descs += DIV_ROUND_UP(size, + NFDK_TX_MAX_DATA_PER_DESC); + dma_unmap_page(dev, txbuf->dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + txbuf++; + } + + if (!skb_is_gso(skb)) { + done_bytes += skb->len; + done_pkts++; + } else { + done_bytes += txbuf->real_len; + done_pkts += txbuf->pkt_cnt; + n_descs++; + } + + napi_consume_skb(skb, budget); +next: + rd_p += n_descs; + todo -= n_descs; + } + + tx_ring->rd_p = rd_p; + tx_ring->qcp_rd_p = qcp_rd_p; + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_bytes += done_bytes; + r_vec->tx_pkts += done_pkts; + u64_stats_update_end(&r_vec->tx_sync); + + if (!dp->netdev) + return; + + nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); + netdev_tx_completed_queue(nd_q, done_pkts, done_bytes); + if (nfp_nfdk_tx_ring_should_wake(tx_ring)) { + /* Make sure TX thread will see updated tx_ring->rd_p */ + smp_mb(); + + if (unlikely(netif_tx_queue_stopped(nd_q))) + netif_tx_wake_queue(nd_q); + } + + WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, + "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", + tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); +} + +/* Receive processing */ +static void * +nfp_nfdk_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) +{ + void *frag; + + if (!dp->xdp_prog) { + frag = napi_alloc_frag(dp->fl_bufsz); + if (unlikely(!frag)) + return NULL; + } else { + struct page *page; + + page = dev_alloc_page(); + if (unlikely(!page)) + return NULL; + frag = page_address(page); + } + + *dma_addr = nfp_net_dma_map_rx(dp, frag); + if (dma_mapping_error(dp->dev, *dma_addr)) { + nfp_net_free_frag(frag, dp->xdp_prog); + nn_dp_warn(dp, "Failed to map DMA RX buffer\n"); + return NULL; + } + + return frag; +} + +/** + * nfp_nfdk_rx_give_one() - Put mapped skb on the software and hardware rings + * @dp: NFP Net data path struct + * @rx_ring: RX ring structure + * @frag: page fragment buffer + * @dma_addr: DMA address of skb mapping + */ +static void +nfp_nfdk_rx_give_one(const struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring, + void *frag, dma_addr_t dma_addr) +{ + unsigned int wr_idx; + + wr_idx = D_IDX(rx_ring, rx_ring->wr_p); + + nfp_net_dma_sync_dev_rx(dp, dma_addr); + + /* Stash SKB and DMA address away */ + rx_ring->rxbufs[wr_idx].frag = frag; + rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; + + /* Fill freelist descriptor */ + rx_ring->rxds[wr_idx].fld.reserved = 0; + rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; + nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld, + dma_addr + dp->rx_dma_off); + + rx_ring->wr_p++; + if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) { + /* Update write pointer of the freelist queue. Make + * sure all writes are flushed before telling the hardware. + */ + wmb(); + nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH); + } +} + +/** + * nfp_nfdk_rx_ring_fill_freelist() - Give buffers from the ring to FW + * @dp: NFP Net data path struct + * @rx_ring: RX ring to fill + */ +void nfp_nfdk_rx_ring_fill_freelist(struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring) +{ + unsigned int i; + + for (i = 0; i < rx_ring->cnt - 1; i++) + nfp_nfdk_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag, + rx_ring->rxbufs[i].dma_addr); +} + +/** + * nfp_nfdk_rx_csum_has_errors() - group check if rxd has any csum errors + * @flags: RX descriptor flags field in CPU byte order + */ +static int nfp_nfdk_rx_csum_has_errors(u16 flags) +{ + u16 csum_all_checked, csum_all_ok; + + csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL; + csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK; + + return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT); +} + +/** + * nfp_nfdk_rx_csum() - set SKB checksum field based on RX descriptor flags + * @dp: NFP Net data path struct + * @r_vec: per-ring structure + * @rxd: Pointer to RX descriptor + * @meta: Parsed metadata prepend + * @skb: Pointer to SKB + */ +static void +nfp_nfdk_rx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + struct nfp_net_rx_desc *rxd, struct nfp_meta_parsed *meta, + struct sk_buff *skb) +{ + skb_checksum_none_assert(skb); + + if (!(dp->netdev->features & NETIF_F_RXCSUM)) + return; + + if (meta->csum_type) { + skb->ip_summed = meta->csum_type; + skb->csum = meta->csum; + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_complete++; + u64_stats_update_end(&r_vec->rx_sync); + return; + } + + if (nfp_nfdk_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) { + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_error++; + u64_stats_update_end(&r_vec->rx_sync); + return; + } + + /* Assume that the firmware will never report inner CSUM_OK unless outer + * L4 headers were successfully parsed. FW will always report zero UDP + * checksum as CSUM_OK. + */ + if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK || + rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) { + __skb_incr_checksum_unnecessary(skb); + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_ok++; + u64_stats_update_end(&r_vec->rx_sync); + } + + if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK || + rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) { + __skb_incr_checksum_unnecessary(skb); + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_inner_ok++; + u64_stats_update_end(&r_vec->rx_sync); + } +} + +static void +nfp_nfdk_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta, + unsigned int type, __be32 *hash) +{ + if (!(netdev->features & NETIF_F_RXHASH)) + return; + + switch (type) { + case NFP_NET_RSS_IPV4: + case NFP_NET_RSS_IPV6: + case NFP_NET_RSS_IPV6_EX: + meta->hash_type = PKT_HASH_TYPE_L3; + break; + default: + meta->hash_type = PKT_HASH_TYPE_L4; + break; + } + + meta->hash = get_unaligned_be32(hash); +} + +static bool +nfp_nfdk_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, + void *data, void *pkt, unsigned int pkt_len, int meta_len) +{ + u32 meta_info, vlan_info; + + meta_info = get_unaligned_be32(data); + data += 4; + + while (meta_info) { + switch (meta_info & NFP_NET_META_FIELD_MASK) { + case NFP_NET_META_HASH: + meta_info >>= NFP_NET_META_FIELD_SIZE; + nfp_nfdk_set_hash(netdev, meta, + meta_info & NFP_NET_META_FIELD_MASK, + (__be32 *)data); + data += 4; + break; + case NFP_NET_META_MARK: + meta->mark = get_unaligned_be32(data); + data += 4; + break; + case NFP_NET_META_VLAN: + vlan_info = get_unaligned_be32(data); + if (FIELD_GET(NFP_NET_META_VLAN_STRIP, vlan_info)) { + meta->vlan.stripped = true; + meta->vlan.tpid = FIELD_GET(NFP_NET_META_VLAN_TPID_MASK, + vlan_info); + meta->vlan.tci = FIELD_GET(NFP_NET_META_VLAN_TCI_MASK, + vlan_info); + } + data += 4; + break; + case NFP_NET_META_PORTID: + meta->portid = get_unaligned_be32(data); + data += 4; + break; + case NFP_NET_META_CSUM: + meta->csum_type = CHECKSUM_COMPLETE; + meta->csum = + (__force __wsum)__get_unaligned_cpu32(data); + data += 4; + break; + case NFP_NET_META_RESYNC_INFO: + if (nfp_net_tls_rx_resync_req(netdev, data, pkt, + pkt_len)) + return false; + data += sizeof(struct nfp_net_tls_resync_req); + break; + default: + return true; + } + + meta_info >>= NFP_NET_META_FIELD_SIZE; + } + + return data != pkt; +} + +static void +nfp_nfdk_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf, + struct sk_buff *skb) +{ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_drops++; + /* If we have both skb and rxbuf the replacement buffer allocation + * must have failed, count this as an alloc failure. + */ + if (skb && rxbuf) + r_vec->rx_replace_buf_alloc_fail++; + u64_stats_update_end(&r_vec->rx_sync); + + /* skb is build based on the frag, free_skb() would free the frag + * so to be able to reuse it we need an extra ref. + */ + if (skb && rxbuf && skb->head == rxbuf->frag) + page_ref_inc(virt_to_head_page(rxbuf->frag)); + if (rxbuf) + nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr); + if (skb) + dev_kfree_skb_any(skb); +} + +static bool nfp_nfdk_xdp_complete(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + struct nfp_net_rx_ring *rx_ring; + u32 qcp_rd_p, done = 0; + bool done_all; + int todo; + + /* Work out how many descriptors have been transmitted */ + qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); + if (qcp_rd_p == tx_ring->qcp_rd_p) + return true; + + todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); + + done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; + todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); + + rx_ring = r_vec->rx_ring; + while (todo > 0) { + int idx = D_IDX(tx_ring, tx_ring->rd_p + done); + struct nfp_nfdk_tx_buf *txbuf; + unsigned int step = 1; + + txbuf = &tx_ring->ktxbufs[idx]; + if (!txbuf->raw) + goto next; + + if (NFDK_TX_BUF_INFO(txbuf->val) != NFDK_TX_BUF_INFO_SOP) { + WARN_ONCE(1, "Unexpected TX buffer in XDP TX ring\n"); + goto next; + } + + /* Two successive txbufs are used to stash virtual and dma + * address respectively, recycle and clean them here. + */ + nfp_nfdk_rx_give_one(dp, rx_ring, + (void *)NFDK_TX_BUF_PTR(txbuf[0].val), + txbuf[1].dma_addr); + txbuf[0].raw = 0; + txbuf[1].raw = 0; + step = 2; + + u64_stats_update_begin(&r_vec->tx_sync); + /* Note: tx_bytes not accumulated. */ + r_vec->tx_pkts++; + u64_stats_update_end(&r_vec->tx_sync); +next: + todo -= step; + done += step; + } + + tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + done); + tx_ring->rd_p += done; + + WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, + "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", + tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); + + return done_all; +} + +static bool +nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, + struct nfp_net_tx_ring *tx_ring, + struct nfp_net_rx_buf *rxbuf, unsigned int dma_off, + unsigned int pkt_len, bool *completed) +{ + unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA; + unsigned int dma_len, type, cnt, dlen_type, tmp_dlen; + struct nfp_nfdk_tx_buf *txbuf; + struct nfp_nfdk_tx_desc *txd; + unsigned int n_descs; + dma_addr_t dma_addr; + int wr_idx; + + /* Reject if xdp_adjust_tail grow packet beyond DMA area */ + if (pkt_len + dma_off > dma_map_sz) + return false; + + /* Make sure there's still at least one block available after + * aligning to block boundary, so that the txds used below + * won't wrap around the tx_ring. + */ + if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { + if (!*completed) { + nfp_nfdk_xdp_complete(tx_ring); + *completed = true; + } + + if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { + nfp_nfdk_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf, + NULL); + return false; + } + } + + /* Check if cross block boundary */ + n_descs = nfp_nfdk_headlen_to_segs(pkt_len); + if ((round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) != + round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) || + ((u32)tx_ring->data_pending + pkt_len > + NFDK_TX_MAX_DATA_PER_BLOCK)) { + unsigned int nop_slots = D_BLOCK_CPL(tx_ring->wr_p); + + wr_idx = D_IDX(tx_ring, tx_ring->wr_p); + txd = &tx_ring->ktxds[wr_idx]; + memset(txd, 0, + array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc))); + + tx_ring->data_pending = 0; + tx_ring->wr_p += nop_slots; + tx_ring->wr_ptr_add += nop_slots; + } + + wr_idx = D_IDX(tx_ring, tx_ring->wr_p); + + txbuf = &tx_ring->ktxbufs[wr_idx]; + + txbuf[0].val = (unsigned long)rxbuf->frag | NFDK_TX_BUF_INFO_SOP; + txbuf[1].dma_addr = rxbuf->dma_addr; + /* Note: pkt len not stored */ + + dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off, + pkt_len, DMA_BIDIRECTIONAL); + + /* Build TX descriptor */ + txd = &tx_ring->ktxds[wr_idx]; + dma_len = pkt_len; + dma_addr = rxbuf->dma_addr + dma_off; + + if (dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) + type = NFDK_DESC_TX_TYPE_SIMPLE; + else + type = NFDK_DESC_TX_TYPE_GATHER; + + /* FIELD_PREP() implicitly truncates to chunk */ + dma_len -= 1; + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, + dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? + NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | + FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); + + txd->dma_len_type = cpu_to_le16(dlen_type); + nfp_desc_set_dma_addr_48b(txd, dma_addr); + + tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; + dma_len -= tmp_dlen; + dma_addr += tmp_dlen + 1; + txd++; + + while (dma_len > 0) { + dma_len -= 1; + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); + txd->dma_len_type = cpu_to_le16(dlen_type); + nfp_desc_set_dma_addr_48b(txd, dma_addr); + + dlen_type &= NFDK_DESC_TX_DMA_LEN; + dma_len -= dlen_type; + dma_addr += dlen_type + 1; + txd++; + } + + (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); + + /* Metadata desc */ + txd->raw = 0; + txd++; + + cnt = txd - tx_ring->ktxds - wr_idx; + tx_ring->wr_p += cnt; + if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) + tx_ring->data_pending += pkt_len; + else + tx_ring->data_pending = 0; + + tx_ring->wr_ptr_add += cnt; + return true; +} + +/** + * nfp_nfdk_rx() - receive up to @budget packets on @rx_ring + * @rx_ring: RX ring to receive from + * @budget: NAPI budget + * + * Note, this function is separated out from the napi poll function to + * more cleanly separate packet receive code from other bookkeeping + * functions performed in the napi poll function. + * + * Return: Number of packets received. + */ +static int nfp_nfdk_rx(struct nfp_net_rx_ring *rx_ring, int budget) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + struct nfp_net_tx_ring *tx_ring; + struct bpf_prog *xdp_prog; + bool xdp_tx_cmpl = false; + unsigned int true_bufsz; + struct sk_buff *skb; + int pkts_polled = 0; + struct xdp_buff xdp; + int idx; + + xdp_prog = READ_ONCE(dp->xdp_prog); + true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz; + xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM, + &rx_ring->xdp_rxq); + tx_ring = r_vec->xdp_ring; + + while (pkts_polled < budget) { + unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; + struct nfp_net_rx_buf *rxbuf; + struct nfp_net_rx_desc *rxd; + struct nfp_meta_parsed meta; + bool redir_egress = false; + struct net_device *netdev; + dma_addr_t new_dma_addr; + u32 meta_len_xdp = 0; + void *new_frag; + + idx = D_IDX(rx_ring, rx_ring->rd_p); + + rxd = &rx_ring->rxds[idx]; + if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) + break; + + /* Memory barrier to ensure that we won't do other reads + * before the DD bit. + */ + dma_rmb(); + + memset(&meta, 0, sizeof(meta)); + + rx_ring->rd_p++; + pkts_polled++; + + rxbuf = &rx_ring->rxbufs[idx]; + /* < meta_len > + * <-- [rx_offset] --> + * --------------------------------------------------------- + * | [XX] | metadata | packet | XXXX | + * --------------------------------------------------------- + * <---------------- data_len ---------------> + * + * The rx_offset is fixed for all packets, the meta_len can vary + * on a packet by packet basis. If rx_offset is set to zero + * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the + * buffer and is immediately followed by the packet (no [XX]). + */ + meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; + data_len = le16_to_cpu(rxd->rxd.data_len); + pkt_len = data_len - meta_len; + + pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; + if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) + pkt_off += meta_len; + else + pkt_off += dp->rx_offset; + meta_off = pkt_off - meta_len; + + /* Stats update */ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_pkts++; + r_vec->rx_bytes += pkt_len; + u64_stats_update_end(&r_vec->rx_sync); + + if (unlikely(meta_len > NFP_NET_MAX_PREPEND || + (dp->rx_offset && meta_len > dp->rx_offset))) { + nn_dp_warn(dp, "oversized RX packet metadata %u\n", + meta_len); + nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); + continue; + } + + nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, + data_len); + + if (meta_len) { + if (unlikely(nfp_nfdk_parse_meta(dp->netdev, &meta, + rxbuf->frag + meta_off, + rxbuf->frag + pkt_off, + pkt_len, meta_len))) { + nn_dp_warn(dp, "invalid RX packet metadata\n"); + nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, + NULL); + continue; + } + } + + if (xdp_prog && !meta.portid) { + void *orig_data = rxbuf->frag + pkt_off; + unsigned int dma_off; + int act; + + xdp_prepare_buff(&xdp, + rxbuf->frag + NFP_NET_RX_BUF_HEADROOM, + pkt_off - NFP_NET_RX_BUF_HEADROOM, + pkt_len, true); + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + pkt_len = xdp.data_end - xdp.data; + pkt_off += xdp.data - orig_data; + + switch (act) { + case XDP_PASS: + meta_len_xdp = xdp.data - xdp.data_meta; + break; + case XDP_TX: + dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM; + if (unlikely(!nfp_nfdk_tx_xdp_buf(dp, rx_ring, + tx_ring, + rxbuf, + dma_off, + pkt_len, + &xdp_tx_cmpl))) + trace_xdp_exception(dp->netdev, + xdp_prog, act); + continue; + default: + bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(dp->netdev, xdp_prog, act); + fallthrough; + case XDP_DROP: + nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, + rxbuf->dma_addr); + continue; + } + } + + if (likely(!meta.portid)) { + netdev = dp->netdev; + } else if (meta.portid == NFP_META_PORT_ID_CTRL) { + struct nfp_net *nn = netdev_priv(dp->netdev); + + nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off, + pkt_len); + nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, + rxbuf->dma_addr); + continue; + } else { + struct nfp_net *nn; + + nn = netdev_priv(dp->netdev); + netdev = nfp_app_dev_get(nn->app, meta.portid, + &redir_egress); + if (unlikely(!netdev)) { + nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, + NULL); + continue; + } + + if (nfp_netdev_is_nfp_repr(netdev)) + nfp_repr_inc_rx_stats(netdev, pkt_len); + } + + skb = build_skb(rxbuf->frag, true_bufsz); + if (unlikely(!skb)) { + nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); + continue; + } + new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr); + if (unlikely(!new_frag)) { + nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); + continue; + } + + nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); + + nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); + + skb_reserve(skb, pkt_off); + skb_put(skb, pkt_len); + + skb->mark = meta.mark; + skb_set_hash(skb, meta.hash, meta.hash_type); + + skb_record_rx_queue(skb, rx_ring->idx); + skb->protocol = eth_type_trans(skb, netdev); + + nfp_nfdk_rx_csum(dp, r_vec, rxd, &meta, skb); + + if (unlikely(!nfp_net_vlan_strip(skb, rxd, &meta))) { + nfp_nfdk_rx_drop(dp, r_vec, rx_ring, NULL, skb); + continue; + } + + if (meta_len_xdp) + skb_metadata_set(skb, meta_len_xdp); + + if (likely(!redir_egress)) { + napi_gro_receive(&rx_ring->r_vec->napi, skb); + } else { + skb->dev = netdev; + skb_reset_network_header(skb); + __skb_push(skb, ETH_HLEN); + dev_queue_xmit(skb); + } + } + + if (xdp_prog) { + if (tx_ring->wr_ptr_add) + nfp_net_tx_xmit_more_flush(tx_ring); + else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) && + !xdp_tx_cmpl) + if (!nfp_nfdk_xdp_complete(tx_ring)) + pkts_polled = budget; + } + + return pkts_polled; +} + +/** + * nfp_nfdk_poll() - napi poll function + * @napi: NAPI structure + * @budget: NAPI budget + * + * Return: number of packets polled. + */ +int nfp_nfdk_poll(struct napi_struct *napi, int budget) +{ + struct nfp_net_r_vector *r_vec = + container_of(napi, struct nfp_net_r_vector, napi); + unsigned int pkts_polled = 0; + + if (r_vec->tx_ring) + nfp_nfdk_tx_complete(r_vec->tx_ring, budget); + if (r_vec->rx_ring) + pkts_polled = nfp_nfdk_rx(r_vec->rx_ring, budget); + + if (pkts_polled < budget) + if (napi_complete_done(napi, pkts_polled)) + nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + + if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) { + struct dim_sample dim_sample = {}; + unsigned int start; + u64 pkts, bytes; + + do { + start = u64_stats_fetch_begin(&r_vec->rx_sync); + pkts = r_vec->rx_pkts; + bytes = r_vec->rx_bytes; + } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); + + dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); + net_dim(&r_vec->rx_dim, dim_sample); + } + + if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { + struct dim_sample dim_sample = {}; + unsigned int start; + u64 pkts, bytes; + + do { + start = u64_stats_fetch_begin(&r_vec->tx_sync); + pkts = r_vec->tx_pkts; + bytes = r_vec->tx_bytes; + } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); + + dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); + net_dim(&r_vec->tx_dim, dim_sample); + } + + return pkts_polled; +} + +/* Control device data path + */ + +bool +nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct sk_buff *skb, bool old) +{ + u32 cnt, tmp_dlen, dlen_type = 0; + struct nfp_net_tx_ring *tx_ring; + struct nfp_nfdk_tx_buf *txbuf; + struct nfp_nfdk_tx_desc *txd; + unsigned int dma_len, type; + struct nfp_net_dp *dp; + dma_addr_t dma_addr; + u64 metadata = 0; + int wr_idx; + + dp = &r_vec->nfp_net->dp; + tx_ring = r_vec->tx_ring; + + if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) { + nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n"); + goto err_free; + } + + /* Don't bother counting frags, assume the worst */ + if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_busy++; + u64_stats_update_end(&r_vec->tx_sync); + if (!old) + __skb_queue_tail(&r_vec->queue, skb); + else + __skb_queue_head(&r_vec->queue, skb); + return NETDEV_TX_BUSY; + } + + if (nfp_app_ctrl_has_meta(nn->app)) { + if (unlikely(skb_headroom(skb) < 8)) { + nn_dp_warn(dp, "CTRL TX on skb without headroom\n"); + goto err_free; + } + metadata = NFDK_DESC_TX_CHAIN_META; + put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4)); + put_unaligned_be32(FIELD_PREP(NFDK_META_LEN, 8) | + FIELD_PREP(NFDK_META_FIELDS, + NFP_NET_META_PORTID), + skb_push(skb, 4)); + } + + if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb)) + goto err_free; + + /* DMA map all */ + wr_idx = D_IDX(tx_ring, tx_ring->wr_p); + txd = &tx_ring->ktxds[wr_idx]; + txbuf = &tx_ring->ktxbufs[wr_idx]; + + dma_len = skb_headlen(skb); + if (dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) + type = NFDK_DESC_TX_TYPE_SIMPLE; + else + type = NFDK_DESC_TX_TYPE_GATHER; + + dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE); + if (dma_mapping_error(dp->dev, dma_addr)) + goto err_warn_dma; + + txbuf->skb = skb; + txbuf++; + + txbuf->dma_addr = dma_addr; + txbuf++; + + dma_len -= 1; + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, + dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? + NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | + FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); + + txd->dma_len_type = cpu_to_le16(dlen_type); + nfp_desc_set_dma_addr_48b(txd, dma_addr); + + tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; + dma_len -= tmp_dlen; + dma_addr += tmp_dlen + 1; + txd++; + + while (dma_len > 0) { + dma_len -= 1; + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); + txd->dma_len_type = cpu_to_le16(dlen_type); + nfp_desc_set_dma_addr_48b(txd, dma_addr); + + dlen_type &= NFDK_DESC_TX_DMA_LEN; + dma_len -= dlen_type; + dma_addr += dlen_type + 1; + txd++; + } + + (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); + + /* Metadata desc */ + txd->raw = cpu_to_le64(metadata); + txd++; + + cnt = txd - tx_ring->ktxds - wr_idx; + if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) != + round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT))) + goto err_warn_overflow; + + tx_ring->wr_p += cnt; + if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) + tx_ring->data_pending += skb->len; + else + tx_ring->data_pending = 0; + + tx_ring->wr_ptr_add += cnt; + nfp_net_tx_xmit_more_flush(tx_ring); + + return NETDEV_TX_OK; + +err_warn_overflow: + WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d", + wr_idx, skb_headlen(skb), 0, cnt); + txbuf--; + dma_unmap_single(dp->dev, txbuf->dma_addr, + skb_headlen(skb), DMA_TO_DEVICE); + txbuf->raw = 0; +err_warn_dma: + nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); +err_free: + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_errors++; + u64_stats_update_end(&r_vec->tx_sync); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec) +{ + struct sk_buff *skb; + + while ((skb = __skb_dequeue(&r_vec->queue))) + if (nfp_nfdk_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true)) + return; +} + +static bool +nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len) +{ + u32 meta_type, meta_tag; + + if (!nfp_app_ctrl_has_meta(nn->app)) + return !meta_len; + + if (meta_len != 8) + return false; + + meta_type = get_unaligned_be32(data); + meta_tag = get_unaligned_be32(data + 4); + + return (meta_type == NFP_NET_META_PORTID && + meta_tag == NFP_META_PORT_ID_CTRL); +} + +static bool +nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp, + struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring) +{ + unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; + struct nfp_net_rx_buf *rxbuf; + struct nfp_net_rx_desc *rxd; + dma_addr_t new_dma_addr; + struct sk_buff *skb; + void *new_frag; + int idx; + + idx = D_IDX(rx_ring, rx_ring->rd_p); + + rxd = &rx_ring->rxds[idx]; + if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) + return false; + + /* Memory barrier to ensure that we won't do other reads + * before the DD bit. + */ + dma_rmb(); + + rx_ring->rd_p++; + + rxbuf = &rx_ring->rxbufs[idx]; + meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; + data_len = le16_to_cpu(rxd->rxd.data_len); + pkt_len = data_len - meta_len; + + pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; + if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) + pkt_off += meta_len; + else + pkt_off += dp->rx_offset; + meta_off = pkt_off - meta_len; + + /* Stats update */ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_pkts++; + r_vec->rx_bytes += pkt_len; + u64_stats_update_end(&r_vec->rx_sync); + + nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, data_len); + + if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) { + nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n", + meta_len); + nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); + return true; + } + + skb = build_skb(rxbuf->frag, dp->fl_bufsz); + if (unlikely(!skb)) { + nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); + return true; + } + new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr); + if (unlikely(!new_frag)) { + nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); + return true; + } + + nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); + + nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); + + skb_reserve(skb, pkt_off); + skb_put(skb, pkt_len); + + nfp_app_ctrl_rx(nn->app, skb); + + return true; +} + +static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) +{ + struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; + struct nfp_net *nn = r_vec->nfp_net; + struct nfp_net_dp *dp = &nn->dp; + unsigned int budget = 512; + + while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) + continue; + + return budget; +} + +void nfp_nfdk_ctrl_poll(struct tasklet_struct *t) +{ + struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet); + + spin_lock(&r_vec->lock); + nfp_nfdk_tx_complete(r_vec->tx_ring, 0); + __nfp_ctrl_tx_queued(r_vec); + spin_unlock(&r_vec->lock); + + if (nfp_ctrl_rx(r_vec)) { + nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + } else { + tasklet_schedule(&r_vec->tasklet); + nn_dp_warn(&r_vec->nfp_net->dp, + "control message budget exceeded!\n"); + } +} diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/nfdk.h b/drivers/net/ethernet/netronome/nfp/nfdk/nfdk.h new file mode 100644 index 000000000..0ea51d9f2 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfdk/nfdk.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#ifndef _NFP_DP_NFDK_H_ +#define _NFP_DP_NFDK_H_ + +#include <linux/bitops.h> +#include <linux/types.h> + +#define NFDK_TX_DESC_PER_SIMPLE_PKT 2 + +#define NFDK_TX_MAX_DATA_PER_HEAD SZ_4K +#define NFDK_TX_MAX_DATA_PER_DESC SZ_16K +#define NFDK_TX_DESC_BLOCK_SZ 256 +#define NFDK_TX_DESC_BLOCK_CNT (NFDK_TX_DESC_BLOCK_SZ / \ + sizeof(struct nfp_nfdk_tx_desc)) +#define NFDK_TX_DESC_STOP_CNT (NFDK_TX_DESC_BLOCK_CNT * \ + NFDK_TX_DESC_PER_SIMPLE_PKT) +#define NFDK_TX_MAX_DATA_PER_BLOCK SZ_64K +#define NFDK_TX_DESC_GATHER_MAX 17 + +/* TX descriptor format */ + +#define NFDK_DESC_TX_MSS_MASK GENMASK(13, 0) + +#define NFDK_DESC_TX_CHAIN_META BIT(3) +#define NFDK_DESC_TX_ENCAP BIT(2) +#define NFDK_DESC_TX_L4_CSUM BIT(1) +#define NFDK_DESC_TX_L3_CSUM BIT(0) + +#define NFDK_DESC_TX_DMA_LEN_HEAD GENMASK(11, 0) +#define NFDK_DESC_TX_TYPE_HEAD GENMASK(15, 12) +#define NFDK_DESC_TX_DMA_LEN GENMASK(13, 0) +#define NFDK_DESC_TX_TYPE_NOP 0 +#define NFDK_DESC_TX_TYPE_GATHER 1 +#define NFDK_DESC_TX_TYPE_TSO 2 +#define NFDK_DESC_TX_TYPE_SIMPLE 8 +#define NFDK_DESC_TX_EOP BIT(14) + +#define NFDK_META_LEN GENMASK(7, 0) +#define NFDK_META_FIELDS GENMASK(31, 8) + +#define D_BLOCK_CPL(idx) (NFDK_TX_DESC_BLOCK_CNT - \ + (idx) % NFDK_TX_DESC_BLOCK_CNT) + +struct nfp_nfdk_tx_desc { + union { + struct { + __le16 dma_addr_hi; /* High bits of host buf address */ + __le16 dma_len_type; /* Length to DMA for this desc */ + __le32 dma_addr_lo; /* Low 32bit of host buf addr */ + }; + + struct { + __le16 mss; /* MSS to be used for LSO */ + u8 lso_hdrlen; /* LSO, TCP payload offset */ + u8 lso_totsegs; /* LSO, total segments */ + u8 l3_offset; /* L3 header offset */ + u8 l4_offset; /* L4 header offset */ + __le16 lso_meta_res; /* Rsvd bits in TSO metadata */ + }; + + struct { + u8 flags; /* TX Flags, see @NFDK_DESC_TX_* */ + u8 reserved[7]; /* meta byte placeholder */ + }; + + __le32 vals[2]; + __le64 raw; + }; +}; + +/* The device don't make use of the 2 or 3 least significant bits of the address + * due to alignment constraints. The driver can make use of those bits to carry + * information about the buffer before giving it to the device. + * + * NOTE: The driver must clear the lower bits before handing the buffer to the + * device. + * + * - NFDK_TX_BUF_INFO_SOP - Start of a packet + * Mark the buffer as a start of a packet. This is used in the XDP TX process + * to stash virtual and DMA address so that they can be recycled when the TX + * operation is completed. + */ +#define NFDK_TX_BUF_PTR(val) ((val) & ~(sizeof(void *) - 1)) +#define NFDK_TX_BUF_INFO(val) ((val) & (sizeof(void *) - 1)) +#define NFDK_TX_BUF_INFO_SOP BIT(0) + +struct nfp_nfdk_tx_buf { + union { + /* First slot */ + union { + struct sk_buff *skb; + void *frag; + unsigned long val; + }; + + /* 1 + nr_frags next slots */ + dma_addr_t dma_addr; + + /* TSO (optional) */ + struct { + u32 pkt_cnt; + u32 real_len; + }; + + u64 raw; + }; +}; + +static inline int nfp_nfdk_headlen_to_segs(unsigned int headlen) +{ + /* First descriptor fits less data, so adjust for that */ + return DIV_ROUND_UP(headlen + + NFDK_TX_MAX_DATA_PER_DESC - + NFDK_TX_MAX_DATA_PER_HEAD, + NFDK_TX_MAX_DATA_PER_DESC); +} + +int nfp_nfdk_poll(struct napi_struct *napi, int budget); +netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev); +bool +nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct sk_buff *skb, bool old); +void nfp_nfdk_ctrl_poll(struct tasklet_struct *t); +void nfp_nfdk_rx_ring_fill_freelist(struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring); +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/rings.c b/drivers/net/ethernet/netronome/nfp/nfdk/rings.c new file mode 100644 index 000000000..fdb8144a6 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfdk/rings.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#include <linux/seq_file.h> + +#include "../nfp_net.h" +#include "../nfp_net_dp.h" +#include "nfdk.h" + +static void +nfp_nfdk_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) +{ + struct device *dev = dp->dev; + struct netdev_queue *nd_q; + + while (!tx_ring->is_xdp && tx_ring->rd_p != tx_ring->wr_p) { + const skb_frag_t *frag, *fend; + unsigned int size, n_descs = 1; + struct nfp_nfdk_tx_buf *txbuf; + int nr_frags, rd_idx; + struct sk_buff *skb; + + rd_idx = D_IDX(tx_ring, tx_ring->rd_p); + txbuf = &tx_ring->ktxbufs[rd_idx]; + + skb = txbuf->skb; + if (!skb) { + n_descs = D_BLOCK_CPL(tx_ring->rd_p); + goto next; + } + + nr_frags = skb_shinfo(skb)->nr_frags; + txbuf++; + + /* Unmap head */ + size = skb_headlen(skb); + dma_unmap_single(dev, txbuf->dma_addr, size, DMA_TO_DEVICE); + n_descs += nfp_nfdk_headlen_to_segs(size); + txbuf++; + + frag = skb_shinfo(skb)->frags; + fend = frag + nr_frags; + for (; frag < fend; frag++) { + size = skb_frag_size(frag); + dma_unmap_page(dev, txbuf->dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + n_descs += DIV_ROUND_UP(size, + NFDK_TX_MAX_DATA_PER_DESC); + txbuf++; + } + + if (skb_is_gso(skb)) + n_descs++; + + dev_kfree_skb_any(skb); +next: + tx_ring->rd_p += n_descs; + } + + memset(tx_ring->txds, 0, tx_ring->size); + tx_ring->data_pending = 0; + tx_ring->wr_p = 0; + tx_ring->rd_p = 0; + tx_ring->qcp_rd_p = 0; + tx_ring->wr_ptr_add = 0; + + if (tx_ring->is_xdp || !dp->netdev) + return; + + nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); + netdev_tx_reset_queue(nd_q); +} + +static void nfp_nfdk_tx_ring_free(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + + kvfree(tx_ring->ktxbufs); + + if (tx_ring->ktxds) + dma_free_coherent(dp->dev, tx_ring->size, + tx_ring->ktxds, tx_ring->dma); + + tx_ring->cnt = 0; + tx_ring->txbufs = NULL; + tx_ring->txds = NULL; + tx_ring->dma = 0; + tx_ring->size = 0; +} + +static int +nfp_nfdk_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + + tx_ring->cnt = dp->txd_cnt * NFDK_TX_DESC_PER_SIMPLE_PKT; + tx_ring->size = array_size(tx_ring->cnt, sizeof(*tx_ring->ktxds)); + tx_ring->ktxds = dma_alloc_coherent(dp->dev, tx_ring->size, + &tx_ring->dma, + GFP_KERNEL | __GFP_NOWARN); + if (!tx_ring->ktxds) { + netdev_warn(dp->netdev, "failed to allocate TX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n", + tx_ring->cnt); + goto err_alloc; + } + + tx_ring->ktxbufs = kvcalloc(tx_ring->cnt, sizeof(*tx_ring->ktxbufs), + GFP_KERNEL); + if (!tx_ring->ktxbufs) + goto err_alloc; + + if (!tx_ring->is_xdp && dp->netdev) + netif_set_xps_queue(dp->netdev, &r_vec->affinity_mask, + tx_ring->idx); + + return 0; + +err_alloc: + nfp_nfdk_tx_ring_free(tx_ring); + return -ENOMEM; +} + +static void +nfp_nfdk_tx_ring_bufs_free(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring) +{ +} + +static int +nfp_nfdk_tx_ring_bufs_alloc(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring) +{ + return 0; +} + +static void +nfp_nfdk_print_tx_descs(struct seq_file *file, + struct nfp_net_r_vector *r_vec, + struct nfp_net_tx_ring *tx_ring, + u32 d_rd_p, u32 d_wr_p) +{ + struct nfp_nfdk_tx_desc *txd; + u32 txd_cnt = tx_ring->cnt; + int i; + + for (i = 0; i < txd_cnt; i++) { + txd = &tx_ring->ktxds[i]; + + seq_printf(file, "%04d: 0x%08x 0x%08x 0x%016llx", i, + txd->vals[0], txd->vals[1], tx_ring->ktxbufs[i].raw); + + if (i == tx_ring->rd_p % txd_cnt) + seq_puts(file, " H_RD"); + if (i == tx_ring->wr_p % txd_cnt) + seq_puts(file, " H_WR"); + if (i == d_rd_p % txd_cnt) + seq_puts(file, " D_RD"); + if (i == d_wr_p % txd_cnt) + seq_puts(file, " D_WR"); + + seq_putc(file, '\n'); + } +} + +#define NFP_NFDK_CFG_CTRL_SUPPORTED \ + (NFP_NET_CFG_CTRL_ENABLE | NFP_NET_CFG_CTRL_PROMISC | \ + NFP_NET_CFG_CTRL_L2BC | NFP_NET_CFG_CTRL_L2MC | \ + NFP_NET_CFG_CTRL_RXCSUM | NFP_NET_CFG_CTRL_TXCSUM | \ + NFP_NET_CFG_CTRL_RXVLAN | \ + NFP_NET_CFG_CTRL_RXVLAN_V2 | NFP_NET_CFG_CTRL_RXQINQ | \ + NFP_NET_CFG_CTRL_TXVLAN_V2 | \ + NFP_NET_CFG_CTRL_GATHER | NFP_NET_CFG_CTRL_LSO | \ + NFP_NET_CFG_CTRL_CTAG_FILTER | NFP_NET_CFG_CTRL_CMSG_DATA | \ + NFP_NET_CFG_CTRL_RINGCFG | NFP_NET_CFG_CTRL_IRQMOD | \ + NFP_NET_CFG_CTRL_TXRWB | NFP_NET_CFG_CTRL_VEPA | \ + NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE | \ + NFP_NET_CFG_CTRL_BPF | NFP_NET_CFG_CTRL_LSO2 | \ + NFP_NET_CFG_CTRL_RSS2 | NFP_NET_CFG_CTRL_CSUM_COMPLETE | \ + NFP_NET_CFG_CTRL_LIVE_ADDR) + +const struct nfp_dp_ops nfp_nfdk_ops = { + .version = NFP_NFD_VER_NFDK, + .tx_min_desc_per_pkt = NFDK_TX_DESC_PER_SIMPLE_PKT, + .cap_mask = NFP_NFDK_CFG_CTRL_SUPPORTED, + .dma_mask = DMA_BIT_MASK(48), + .poll = nfp_nfdk_poll, + .ctrl_poll = nfp_nfdk_ctrl_poll, + .xmit = nfp_nfdk_tx, + .ctrl_tx_one = nfp_nfdk_ctrl_tx_one, + .rx_ring_fill_freelist = nfp_nfdk_rx_ring_fill_freelist, + .tx_ring_alloc = nfp_nfdk_tx_ring_alloc, + .tx_ring_reset = nfp_nfdk_tx_ring_reset, + .tx_ring_free = nfp_nfdk_tx_ring_free, + .tx_ring_bufs_alloc = nfp_nfdk_tx_ring_bufs_alloc, + .tx_ring_bufs_free = nfp_nfdk_tx_ring_bufs_free, + .print_tx_descs = nfp_nfdk_print_tx_descs +}; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_abi.h b/drivers/net/ethernet/netronome/nfp/nfp_abi.h new file mode 100644 index 000000000..dd359a44a --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_abi.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2018 Netronome Systems, Inc. */ + +#ifndef __NFP_ABI__ +#define __NFP_ABI__ 1 + +#include <linux/types.h> + +#define NFP_MBOX_SYM_NAME "_abi_nfd_pf%u_mbox" +#define NFP_MBOX_SYM_MIN_SIZE 16 /* When no data needed */ + +#define NFP_MBOX_CMD 0x00 +#define NFP_MBOX_RET 0x04 +#define NFP_MBOX_DATA_LEN 0x08 +#define NFP_MBOX_RESERVED 0x0c +#define NFP_MBOX_DATA 0x10 + +/** + * enum nfp_mbox_cmd - PF mailbox commands + * + * @NFP_MBOX_NO_CMD: null command + * Used to indicate previous command has finished. + * + * @NFP_MBOX_POOL_GET: get shared buffer pool info/config + * Input - struct nfp_shared_buf_pool_id + * Output - struct nfp_shared_buf_pool_info_get + * + * @NFP_MBOX_POOL_SET: set shared buffer pool info/config + * Input - struct nfp_shared_buf_pool_info_set + * Output - None + * + * @NFP_MBOX_PCIE_ABM_ENABLE: enable PCIe-side advanced buffer management + * Enable advanced buffer management of the PCIe block. If ABM is disabled + * PCIe block maintains a very short queue of buffers and does tail drop. + * ABM allows more advanced buffering and priority control. + * Input - None + * Output - None + * + * @NFP_MBOX_PCIE_ABM_DISABLE: disable PCIe-side advanced buffer management + * Input - None + * Output - None + */ +enum nfp_mbox_cmd { + NFP_MBOX_NO_CMD = 0x00, + + NFP_MBOX_POOL_GET = 0x01, + NFP_MBOX_POOL_SET = 0x02, + + NFP_MBOX_PCIE_ABM_ENABLE = 0x03, + NFP_MBOX_PCIE_ABM_DISABLE = 0x04, +}; + +#define NFP_SHARED_BUF_COUNT_SYM_NAME "_abi_nfd_pf%u_sb_cnt" +#define NFP_SHARED_BUF_TABLE_SYM_NAME "_abi_nfd_pf%u_sb_tbl" + +/** + * struct nfp_shared_buf - NFP shared buffer description + * @id: numerical user-visible id of the shared buffer + * @size: size in bytes of the buffer + * @ingress_pools_count: number of ingress pools + * @egress_pools_count: number of egress pools + * @ingress_tc_count: number of ingress trafic classes + * @egress_tc_count: number of egress trafic classes + * @pool_size_unit: pool size may be in credits, each credit is + * @pool_size_unit bytes + */ +struct nfp_shared_buf { + __le32 id; + __le32 size; + __le16 ingress_pools_count; + __le16 egress_pools_count; + __le16 ingress_tc_count; + __le16 egress_tc_count; + + __le32 pool_size_unit; +}; + +/** + * struct nfp_shared_buf_pool_id - shared buffer pool identification + * @shared_buf: shared buffer id + * @pool: pool index + */ +struct nfp_shared_buf_pool_id { + __le32 shared_buf; + __le32 pool; +}; + +/** + * struct nfp_shared_buf_pool_info_get - struct devlink_sb_pool_info mirror + * @pool_type: one of enum devlink_sb_pool_type + * @size: pool size in units of SB's @pool_size_unit + * @threshold_type: one of enum devlink_sb_threshold_type + */ +struct nfp_shared_buf_pool_info_get { + __le32 pool_type; + __le32 size; + __le32 threshold_type; +}; + +/** + * struct nfp_shared_buf_pool_info_set - packed args of sb_pool_set + * @id: pool identification info + * @size: pool size in units of SB's @pool_size_unit + * @threshold_type: one of enum devlink_sb_threshold_type + */ +struct nfp_shared_buf_pool_info_set { + struct nfp_shared_buf_pool_id id; + __le32 size; + __le32 threshold_type; +}; + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c new file mode 100644 index 000000000..bb3f46c74 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <linux/bug.h> +#include <linux/lockdep.h> +#include <linux/rcupdate.h> +#include <linux/skbuff.h> +#include <linux/slab.h> + +#include "nfpcore/nfp_cpp.h" +#include "nfpcore/nfp_nffw.h" +#include "nfp_app.h" +#include "nfp_main.h" +#include "nfp_net.h" +#include "nfp_net_repr.h" +#include "nfp_port.h" + +static const struct nfp_app_type *apps[] = { + [NFP_APP_CORE_NIC] = &app_nic, +#ifdef CONFIG_BPF_SYSCALL + [NFP_APP_BPF_NIC] = &app_bpf, +#else + [NFP_APP_BPF_NIC] = &app_nic, +#endif +#ifdef CONFIG_NFP_APP_FLOWER + [NFP_APP_FLOWER_NIC] = &app_flower, +#endif +#ifdef CONFIG_NFP_APP_ABM_NIC + [NFP_APP_ACTIVE_BUFFER_MGMT_NIC] = &app_abm, +#endif +}; + +void nfp_check_rhashtable_empty(void *ptr, void *arg) +{ + WARN_ON_ONCE(1); +} + +struct nfp_app *nfp_app_from_netdev(struct net_device *netdev) +{ + if (nfp_netdev_is_nfp_net(netdev)) { + struct nfp_net *nn = netdev_priv(netdev); + + return nn->app; + } + + if (nfp_netdev_is_nfp_repr(netdev)) { + struct nfp_repr *repr = netdev_priv(netdev); + + return repr->app; + } + + WARN(1, "Unknown netdev type for nfp_app\n"); + + return NULL; +} + +const char *nfp_app_mip_name(struct nfp_app *app) +{ + if (!app || !app->pf->mip) + return ""; + return nfp_mip_name(app->pf->mip); +} + +int nfp_app_ndo_init(struct net_device *netdev) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + + if (!app || !app->type->ndo_init) + return 0; + return app->type->ndo_init(app, netdev); +} + +void nfp_app_ndo_uninit(struct net_device *netdev) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + + if (app && app->type->ndo_uninit) + app->type->ndo_uninit(app, netdev); +} + +u64 *nfp_app_port_get_stats(struct nfp_port *port, u64 *data) +{ + if (!port || !port->app || !port->app->type->port_get_stats) + return data; + return port->app->type->port_get_stats(port->app, port, data); +} + +int nfp_app_port_get_stats_count(struct nfp_port *port) +{ + if (!port || !port->app || !port->app->type->port_get_stats_count) + return 0; + return port->app->type->port_get_stats_count(port->app, port); +} + +u8 *nfp_app_port_get_stats_strings(struct nfp_port *port, u8 *data) +{ + if (!port || !port->app || !port->app->type->port_get_stats_strings) + return data; + return port->app->type->port_get_stats_strings(port->app, port, data); +} + +struct sk_buff * +nfp_app_ctrl_msg_alloc(struct nfp_app *app, unsigned int size, gfp_t priority) +{ + struct sk_buff *skb; + + if (nfp_app_ctrl_has_meta(app)) + size += 8; + + skb = alloc_skb(size, priority); + if (!skb) + return NULL; + + if (nfp_app_ctrl_has_meta(app)) + skb_reserve(skb, 8); + + return skb; +} + +struct nfp_reprs * +nfp_reprs_get_locked(struct nfp_app *app, enum nfp_repr_type type) +{ + return rcu_dereference_protected(app->reprs[type], + nfp_app_is_locked(app)); +} + +struct nfp_reprs * +nfp_app_reprs_set(struct nfp_app *app, enum nfp_repr_type type, + struct nfp_reprs *reprs) +{ + struct nfp_reprs *old; + + old = nfp_reprs_get_locked(app, type); + rtnl_lock(); + rcu_assign_pointer(app->reprs[type], reprs); + rtnl_unlock(); + + return old; +} + +static void +nfp_app_netdev_feat_change(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_net *nn; + unsigned int type; + + if (!nfp_netdev_is_nfp_net(netdev)) + return; + nn = netdev_priv(netdev); + if (nn->app != app) + return; + + for (type = 0; type < __NFP_REPR_TYPE_MAX; type++) { + struct nfp_reprs *reprs; + unsigned int i; + + reprs = rtnl_dereference(app->reprs[type]); + if (!reprs) + continue; + + for (i = 0; i < reprs->num_reprs; i++) { + struct net_device *repr; + + repr = rtnl_dereference(reprs->reprs[i]); + if (!repr) + continue; + + nfp_repr_transfer_features(repr, netdev); + } + } +} + +static int +nfp_app_netdev_event(struct notifier_block *nb, unsigned long event, void *ptr) +{ + struct net_device *netdev; + struct nfp_app *app; + + netdev = netdev_notifier_info_to_dev(ptr); + app = container_of(nb, struct nfp_app, netdev_nb); + + /* Handle events common code is interested in */ + switch (event) { + case NETDEV_FEAT_CHANGE: + nfp_app_netdev_feat_change(app, netdev); + break; + } + + /* Call offload specific handlers */ + if (app->type->netdev_event) + return app->type->netdev_event(app, netdev, event, ptr); + return NOTIFY_DONE; +} + +int nfp_app_start(struct nfp_app *app, struct nfp_net *ctrl) +{ + int err; + + app->ctrl = ctrl; + + if (app->type->start) { + err = app->type->start(app); + if (err) + return err; + } + + app->netdev_nb.notifier_call = nfp_app_netdev_event; + err = register_netdevice_notifier(&app->netdev_nb); + if (err) + goto err_app_stop; + + return 0; + +err_app_stop: + if (app->type->stop) + app->type->stop(app); + return err; +} + +void nfp_app_stop(struct nfp_app *app) +{ + unregister_netdevice_notifier(&app->netdev_nb); + + if (app->type->stop) + app->type->stop(app); +} + +struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id) +{ + struct nfp_app *app; + + if (id >= ARRAY_SIZE(apps) || !apps[id]) { + nfp_err(pf->cpp, "unknown FW app ID 0x%02x, driver too old or support for FW not built in\n", id); + return ERR_PTR(-EINVAL); + } + + if (WARN_ON(!apps[id]->name || !apps[id]->vnic_alloc)) + return ERR_PTR(-EINVAL); + if (WARN_ON(!apps[id]->ctrl_msg_rx && apps[id]->ctrl_msg_rx_raw)) + return ERR_PTR(-EINVAL); + + app = kzalloc(sizeof(*app), GFP_KERNEL); + if (!app) + return ERR_PTR(-ENOMEM); + + app->pf = pf; + app->cpp = pf->cpp; + app->pdev = pf->pdev; + app->type = apps[id]; + + return app; +} + +void nfp_app_free(struct nfp_app *app) +{ + kfree(app); +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h new file mode 100644 index 000000000..dd56207df --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -0,0 +1,450 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#ifndef _NFP_APP_H +#define _NFP_APP_H 1 + +#include <net/devlink.h> + +#include <trace/events/devlink.h> + +#include "nfp_net_repr.h" + +#define NFP_APP_CTRL_MTU_MAX U32_MAX + +struct bpf_prog; +struct net_device; +struct netdev_bpf; +struct netlink_ext_ack; +struct pci_dev; +struct sk_buff; +struct nfp_app; +struct nfp_cpp; +struct nfp_pf; +struct nfp_repr; +struct nfp_net; + +enum nfp_app_id { + NFP_APP_CORE_NIC = 0x1, + NFP_APP_BPF_NIC = 0x2, + NFP_APP_FLOWER_NIC = 0x3, + NFP_APP_ACTIVE_BUFFER_MGMT_NIC = 0x4, +}; + +extern const struct nfp_app_type app_nic; +extern const struct nfp_app_type app_bpf; +extern const struct nfp_app_type app_flower; +extern const struct nfp_app_type app_abm; + +/** + * struct nfp_app_type - application definition + * @id: application ID + * @name: application name + * @ctrl_cap_mask: ctrl vNIC capability mask, allows disabling features like + * IRQMOD which are on by default but counter-productive for + * control messages which are often latency-sensitive + * @ctrl_has_meta: control messages have prepend of type:5/port:CTRL + * + * Callbacks + * @init: perform basic app checks and init + * @clean: clean app state + * @extra_cap: extra capabilities string + * @ndo_init: vNIC and repr netdev .ndo_init + * @ndo_uninit: vNIC and repr netdev .ndo_unint + * @vnic_alloc: allocate vNICs (assign port types, etc.) + * @vnic_free: free up app's vNIC state + * @vnic_init: vNIC netdev was registered + * @vnic_clean: vNIC netdev about to be unregistered + * @repr_init: representor about to be registered + * @repr_preclean: representor about to unregistered, executed before app + * reference to the it is removed + * @repr_clean: representor about to be unregistered + * @repr_open: representor netdev open callback + * @repr_stop: representor netdev stop callback + * @check_mtu: MTU change request on a netdev (verify it is valid) + * @repr_change_mtu: MTU change request on repr (make and verify change) + * @port_get_stats: get extra ethtool statistics for a port + * @port_get_stats_count: get count of extra statistics for a port + * @port_get_stats_strings: get strings for extra statistics + * @start: start application logic + * @stop: stop application logic + * @netdev_event: Netdevice notifier event + * @ctrl_msg_rx: control message handler + * @ctrl_msg_rx_raw: handler for control messages from data queues + * @setup_tc: setup TC ndo + * @bpf: BPF ndo offload-related calls + * @xdp_offload: offload an XDP program + * @eswitch_mode_get: get SR-IOV eswitch mode + * @eswitch_mode_set: set SR-IOV eswitch mode + * @sriov_enable: app-specific sriov initialisation + * @sriov_disable: app-specific sriov clean-up + * @dev_get: get representor or internal port representing netdev + */ +struct nfp_app_type { + enum nfp_app_id id; + const char *name; + + u32 ctrl_cap_mask; + bool ctrl_has_meta; + + int (*init)(struct nfp_app *app); + void (*clean)(struct nfp_app *app); + + const char *(*extra_cap)(struct nfp_app *app, struct nfp_net *nn); + + int (*ndo_init)(struct nfp_app *app, struct net_device *netdev); + void (*ndo_uninit)(struct nfp_app *app, struct net_device *netdev); + + int (*vnic_alloc)(struct nfp_app *app, struct nfp_net *nn, + unsigned int id); + void (*vnic_free)(struct nfp_app *app, struct nfp_net *nn); + int (*vnic_init)(struct nfp_app *app, struct nfp_net *nn); + void (*vnic_clean)(struct nfp_app *app, struct nfp_net *nn); + + int (*repr_init)(struct nfp_app *app, struct net_device *netdev); + void (*repr_preclean)(struct nfp_app *app, struct net_device *netdev); + void (*repr_clean)(struct nfp_app *app, struct net_device *netdev); + + int (*repr_open)(struct nfp_app *app, struct nfp_repr *repr); + int (*repr_stop)(struct nfp_app *app, struct nfp_repr *repr); + + int (*check_mtu)(struct nfp_app *app, struct net_device *netdev, + int new_mtu); + int (*repr_change_mtu)(struct nfp_app *app, struct net_device *netdev, + int new_mtu); + + u64 *(*port_get_stats)(struct nfp_app *app, + struct nfp_port *port, u64 *data); + int (*port_get_stats_count)(struct nfp_app *app, struct nfp_port *port); + u8 *(*port_get_stats_strings)(struct nfp_app *app, + struct nfp_port *port, u8 *data); + + int (*start)(struct nfp_app *app); + void (*stop)(struct nfp_app *app); + + int (*netdev_event)(struct nfp_app *app, struct net_device *netdev, + unsigned long event, void *ptr); + + void (*ctrl_msg_rx)(struct nfp_app *app, struct sk_buff *skb); + void (*ctrl_msg_rx_raw)(struct nfp_app *app, const void *data, + unsigned int len); + + int (*setup_tc)(struct nfp_app *app, struct net_device *netdev, + enum tc_setup_type type, void *type_data); + int (*bpf)(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *xdp); + int (*xdp_offload)(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog, + struct netlink_ext_ack *extack); + + int (*sriov_enable)(struct nfp_app *app, int num_vfs); + void (*sriov_disable)(struct nfp_app *app); + + enum devlink_eswitch_mode (*eswitch_mode_get)(struct nfp_app *app); + int (*eswitch_mode_set)(struct nfp_app *app, u16 mode); + struct net_device *(*dev_get)(struct nfp_app *app, u32 id, + bool *redir_egress); +}; + +/** + * struct nfp_app - NFP application container + * @pdev: backpointer to PCI device + * @pf: backpointer to NFP PF structure + * @cpp: pointer to the CPP handle + * @ctrl: pointer to ctrl vNIC struct + * @reprs: array of pointers to representors + * @type: pointer to const application ops and info + * @ctrl_mtu: MTU to set on the control vNIC (set in .init()) + * @netdev_nb: Netdevice notifier block + * @priv: app-specific priv data + */ +struct nfp_app { + struct pci_dev *pdev; + struct nfp_pf *pf; + struct nfp_cpp *cpp; + + struct nfp_net *ctrl; + struct nfp_reprs __rcu *reprs[NFP_REPR_TYPE_MAX + 1]; + + const struct nfp_app_type *type; + unsigned int ctrl_mtu; + + struct notifier_block netdev_nb; + + void *priv; +}; + +static inline void assert_nfp_app_locked(struct nfp_app *app) +{ + devl_assert_locked(priv_to_devlink(app->pf)); +} + +static inline bool nfp_app_is_locked(struct nfp_app *app) +{ + return devl_lock_is_held(priv_to_devlink(app->pf)); +} + +void nfp_check_rhashtable_empty(void *ptr, void *arg); +bool __nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb); +bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb); + +static inline int nfp_app_init(struct nfp_app *app) +{ + if (!app->type->init) + return 0; + return app->type->init(app); +} + +static inline void nfp_app_clean(struct nfp_app *app) +{ + if (app->type->clean) + app->type->clean(app); +} + +int nfp_app_ndo_init(struct net_device *netdev); +void nfp_app_ndo_uninit(struct net_device *netdev); + +static inline int nfp_app_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, + unsigned int id) +{ + return app->type->vnic_alloc(app, nn, id); +} + +static inline void nfp_app_vnic_free(struct nfp_app *app, struct nfp_net *nn) +{ + if (app->type->vnic_free) + app->type->vnic_free(app, nn); +} + +static inline int nfp_app_vnic_init(struct nfp_app *app, struct nfp_net *nn) +{ + if (!app->type->vnic_init) + return 0; + return app->type->vnic_init(app, nn); +} + +static inline void nfp_app_vnic_clean(struct nfp_app *app, struct nfp_net *nn) +{ + if (app->type->vnic_clean) + app->type->vnic_clean(app, nn); +} + +static inline int nfp_app_repr_open(struct nfp_app *app, struct nfp_repr *repr) +{ + if (!app->type->repr_open) + return -EINVAL; + return app->type->repr_open(app, repr); +} + +static inline int nfp_app_repr_stop(struct nfp_app *app, struct nfp_repr *repr) +{ + if (!app->type->repr_stop) + return -EINVAL; + return app->type->repr_stop(app, repr); +} + +static inline int +nfp_app_repr_init(struct nfp_app *app, struct net_device *netdev) +{ + if (!app->type->repr_init) + return 0; + return app->type->repr_init(app, netdev); +} + +static inline void +nfp_app_repr_preclean(struct nfp_app *app, struct net_device *netdev) +{ + if (app->type->repr_preclean) + app->type->repr_preclean(app, netdev); +} + +static inline void +nfp_app_repr_clean(struct nfp_app *app, struct net_device *netdev) +{ + if (app->type->repr_clean) + app->type->repr_clean(app, netdev); +} + +static inline int +nfp_app_check_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu) +{ + if (!app || !app->type->check_mtu) + return 0; + return app->type->check_mtu(app, netdev, new_mtu); +} + +static inline int +nfp_app_repr_change_mtu(struct nfp_app *app, struct net_device *netdev, + int new_mtu) +{ + if (!app || !app->type->repr_change_mtu) + return 0; + return app->type->repr_change_mtu(app, netdev, new_mtu); +} + +static inline const char *nfp_app_name(struct nfp_app *app) +{ + if (!app) + return ""; + return app->type->name; +} + +static inline bool nfp_app_needs_ctrl_vnic(struct nfp_app *app) +{ + return app && app->type->ctrl_msg_rx; +} + +static inline bool nfp_app_ctrl_has_meta(struct nfp_app *app) +{ + return app->type->ctrl_has_meta; +} + +static inline bool nfp_app_ctrl_uses_data_vnics(struct nfp_app *app) +{ + return app && app->type->ctrl_msg_rx_raw; +} + +static inline const char *nfp_app_extra_cap(struct nfp_app *app, + struct nfp_net *nn) +{ + if (!app || !app->type->extra_cap) + return ""; + return app->type->extra_cap(app, nn); +} + +static inline bool nfp_app_has_tc(struct nfp_app *app) +{ + return app && app->type->setup_tc; +} + +static inline int nfp_app_setup_tc(struct nfp_app *app, + struct net_device *netdev, + enum tc_setup_type type, void *type_data) +{ + if (!app || !app->type->setup_tc) + return -EOPNOTSUPP; + return app->type->setup_tc(app, netdev, type, type_data); +} + +static inline int nfp_app_bpf(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf) +{ + if (!app || !app->type->bpf) + return -EINVAL; + return app->type->bpf(app, nn, bpf); +} + +static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + if (!app || !app->type->xdp_offload) + return -EOPNOTSUPP; + return app->type->xdp_offload(app, nn, prog, extack); +} + +static inline bool __nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb) +{ + trace_devlink_hwmsg(priv_to_devlink(app->pf), false, 0, + skb->data, skb->len); + + return __nfp_ctrl_tx(app->ctrl, skb); +} + +static inline bool nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb) +{ + trace_devlink_hwmsg(priv_to_devlink(app->pf), false, 0, + skb->data, skb->len); + + return nfp_ctrl_tx(app->ctrl, skb); +} + +static inline void nfp_app_ctrl_rx(struct nfp_app *app, struct sk_buff *skb) +{ + trace_devlink_hwmsg(priv_to_devlink(app->pf), true, 0, + skb->data, skb->len); + + app->type->ctrl_msg_rx(app, skb); +} + +static inline void +nfp_app_ctrl_rx_raw(struct nfp_app *app, const void *data, unsigned int len) +{ + if (!app || !app->type->ctrl_msg_rx_raw) + return; + + trace_devlink_hwmsg(priv_to_devlink(app->pf), true, 0, data, len); + app->type->ctrl_msg_rx_raw(app, data, len); +} + +static inline int nfp_app_eswitch_mode_get(struct nfp_app *app, u16 *mode) +{ + if (!app->type->eswitch_mode_get) + return -EOPNOTSUPP; + + *mode = app->type->eswitch_mode_get(app); + + return 0; +} + +static inline int nfp_app_eswitch_mode_set(struct nfp_app *app, u16 mode) +{ + if (!app->type->eswitch_mode_set) + return -EOPNOTSUPP; + return app->type->eswitch_mode_set(app, mode); +} + +static inline int nfp_app_sriov_enable(struct nfp_app *app, int num_vfs) +{ + if (!app || !app->type->sriov_enable) + return -EOPNOTSUPP; + return app->type->sriov_enable(app, num_vfs); +} + +static inline void nfp_app_sriov_disable(struct nfp_app *app) +{ + if (app && app->type->sriov_disable) + app->type->sriov_disable(app); +} + +static inline +struct net_device *nfp_app_dev_get(struct nfp_app *app, u32 id, + bool *redir_egress) +{ + if (unlikely(!app || !app->type->dev_get)) + return NULL; + + return app->type->dev_get(app, id, redir_egress); +} + +struct nfp_app *nfp_app_from_netdev(struct net_device *netdev); + +u64 *nfp_app_port_get_stats(struct nfp_port *port, u64 *data); +int nfp_app_port_get_stats_count(struct nfp_port *port); +u8 *nfp_app_port_get_stats_strings(struct nfp_port *port, u8 *data); + +struct nfp_reprs * +nfp_reprs_get_locked(struct nfp_app *app, enum nfp_repr_type type); +struct nfp_reprs * +nfp_app_reprs_set(struct nfp_app *app, enum nfp_repr_type type, + struct nfp_reprs *reprs); + +const char *nfp_app_mip_name(struct nfp_app *app); +struct sk_buff * +nfp_app_ctrl_msg_alloc(struct nfp_app *app, unsigned int size, gfp_t priority); + +struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id); +void nfp_app_free(struct nfp_app *app); +int nfp_app_start(struct nfp_app *app, struct nfp_net *ctrl); +void nfp_app_stop(struct nfp_app *app); + +/* Callbacks shared between apps */ + +int nfp_app_nic_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, + unsigned int id); +int nfp_app_nic_vnic_init_phy_port(struct nfp_pf *pf, struct nfp_app *app, + struct nfp_net *nn, unsigned int id); + +struct devlink_port *nfp_devlink_get_devlink_port(struct net_device *netdev); + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c b/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c new file mode 100644 index 000000000..f119277fd --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include "nfpcore/nfp_cpp.h" +#include "nfpcore/nfp_nsp.h" +#include "nfp_app.h" +#include "nfp_main.h" +#include "nfp_net.h" +#include "nfp_port.h" + +int nfp_app_nic_vnic_init_phy_port(struct nfp_pf *pf, struct nfp_app *app, + struct nfp_net *nn, unsigned int id) +{ + int err; + + if (!pf->eth_tbl) + return 0; + + nn->port = nfp_port_alloc(app, NFP_PORT_PHYS_PORT, nn->dp.netdev); + if (IS_ERR(nn->port)) + return PTR_ERR(nn->port); + + err = nfp_port_init_phy_port(pf, app, nn->port, id); + if (err) { + nfp_port_free(nn->port); + return err; + } + + return nn->port->type == NFP_PORT_INVALID; +} + +int nfp_app_nic_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, + unsigned int id) +{ + int err; + + err = nfp_app_nic_vnic_init_phy_port(app->pf, app, nn, id); + if (err) + return err < 0 ? err : 0; + + nfp_net_get_mac_addr(app->pf, nn->dp.netdev, nn->port); + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c new file mode 100644 index 000000000..154399c54 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2016-2018 Netronome Systems, Inc. */ + +#include <linux/bitops.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/types.h> + +#include "nfp_asm.h" + +const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { + [CMD_TGT_WRITE8_SWAP] = { 0x02, 0x42 }, + [CMD_TGT_WRITE32_SWAP] = { 0x02, 0x5f }, + [CMD_TGT_READ8] = { 0x01, 0x43 }, + [CMD_TGT_READ32] = { 0x00, 0x5c }, + [CMD_TGT_READ32_LE] = { 0x01, 0x5c }, + [CMD_TGT_READ32_SWAP] = { 0x02, 0x5c }, + [CMD_TGT_READ_LE] = { 0x01, 0x40 }, + [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 }, + [CMD_TGT_ADD] = { 0x00, 0x47 }, + [CMD_TGT_ADD_IMM] = { 0x02, 0x47 }, +}; + +static bool unreg_is_imm(u16 reg) +{ + return (reg & UR_REG_IMM) == UR_REG_IMM; +} + +u16 br_get_offset(u64 instr) +{ + u16 addr_lo, addr_hi; + + addr_lo = FIELD_GET(OP_BR_ADDR_LO, instr); + addr_hi = FIELD_GET(OP_BR_ADDR_HI, instr); + + return (addr_hi * ((OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)) + 1)) | + addr_lo; +} + +void br_set_offset(u64 *instr, u16 offset) +{ + u16 addr_lo, addr_hi; + + addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); + addr_hi = offset != addr_lo; + *instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO); + *instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi); + *instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo); +} + +void br_add_offset(u64 *instr, u16 offset) +{ + u16 addr; + + addr = br_get_offset(*instr); + br_set_offset(instr, addr + offset); +} + +static bool immed_can_modify(u64 instr) +{ + if (FIELD_GET(OP_IMMED_INV, instr) || + FIELD_GET(OP_IMMED_SHIFT, instr) || + FIELD_GET(OP_IMMED_WIDTH, instr) != IMMED_WIDTH_ALL) { + pr_err("Can't decode/encode immed!\n"); + return false; + } + return true; +} + +u16 immed_get_value(u64 instr) +{ + u16 reg; + + if (!immed_can_modify(instr)) + return 0; + + reg = FIELD_GET(OP_IMMED_A_SRC, instr); + if (!unreg_is_imm(reg)) + reg = FIELD_GET(OP_IMMED_B_SRC, instr); + + return (reg & 0xff) | FIELD_GET(OP_IMMED_IMM, instr) << 8; +} + +void immed_set_value(u64 *instr, u16 immed) +{ + if (!immed_can_modify(*instr)) + return; + + if (unreg_is_imm(FIELD_GET(OP_IMMED_A_SRC, *instr))) { + *instr &= ~FIELD_PREP(OP_IMMED_A_SRC, 0xff); + *instr |= FIELD_PREP(OP_IMMED_A_SRC, immed & 0xff); + } else { + *instr &= ~FIELD_PREP(OP_IMMED_B_SRC, 0xff); + *instr |= FIELD_PREP(OP_IMMED_B_SRC, immed & 0xff); + } + + *instr &= ~OP_IMMED_IMM; + *instr |= FIELD_PREP(OP_IMMED_IMM, immed >> 8); +} + +void immed_add_value(u64 *instr, u16 offset) +{ + u16 val; + + if (!immed_can_modify(*instr)) + return; + + val = immed_get_value(*instr); + immed_set_value(instr, val + offset); +} + +static u16 nfp_swreg_to_unreg(swreg reg, bool is_dst) +{ + bool lm_id, lm_dec = false; + u16 val = swreg_value(reg); + + switch (swreg_type(reg)) { + case NN_REG_GPR_A: + case NN_REG_GPR_B: + case NN_REG_GPR_BOTH: + return val; + case NN_REG_NNR: + return UR_REG_NN | val; + case NN_REG_XFER: + return UR_REG_XFR | val; + case NN_REG_LMEM: + lm_id = swreg_lm_idx(reg); + + switch (swreg_lm_mode(reg)) { + case NN_LM_MOD_NONE: + if (val & ~UR_REG_LM_IDX_MAX) { + pr_err("LM offset too large\n"); + return 0; + } + return UR_REG_LM | FIELD_PREP(UR_REG_LM_IDX, lm_id) | + val; + case NN_LM_MOD_DEC: + lm_dec = true; + fallthrough; + case NN_LM_MOD_INC: + if (val) { + pr_err("LM offset in inc/dev mode\n"); + return 0; + } + return UR_REG_LM | UR_REG_LM_POST_MOD | + FIELD_PREP(UR_REG_LM_IDX, lm_id) | + FIELD_PREP(UR_REG_LM_POST_MOD_DEC, lm_dec); + default: + pr_err("bad LM mode for unrestricted operands %d\n", + swreg_lm_mode(reg)); + return 0; + } + case NN_REG_IMM: + if (val & ~0xff) { + pr_err("immediate too large\n"); + return 0; + } + return UR_REG_IMM_encode(val); + case NN_REG_NONE: + return is_dst ? UR_REG_NO_DST : REG_NONE; + } + + pr_err("unrecognized reg encoding %08x\n", reg); + return 0; +} + +int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg, + struct nfp_insn_ur_regs *reg) +{ + memset(reg, 0, sizeof(*reg)); + + /* Decode destination */ + if (swreg_type(dst) == NN_REG_IMM) + return -EFAULT; + + if (swreg_type(dst) == NN_REG_GPR_B) + reg->dst_ab = ALU_DST_B; + if (swreg_type(dst) == NN_REG_GPR_BOTH) + reg->wr_both = true; + reg->dst = nfp_swreg_to_unreg(dst, true); + + /* Decode source operands */ + if (swreg_type(lreg) == swreg_type(rreg) && + swreg_type(lreg) != NN_REG_NONE) + return -EFAULT; + + if (swreg_type(lreg) == NN_REG_GPR_B || + swreg_type(rreg) == NN_REG_GPR_A) { + reg->areg = nfp_swreg_to_unreg(rreg, false); + reg->breg = nfp_swreg_to_unreg(lreg, false); + reg->swap = true; + } else { + reg->areg = nfp_swreg_to_unreg(lreg, false); + reg->breg = nfp_swreg_to_unreg(rreg, false); + } + + reg->dst_lmextn = swreg_lmextn(dst); + reg->src_lmextn = swreg_lmextn(lreg) || swreg_lmextn(rreg); + + return 0; +} + +static u16 nfp_swreg_to_rereg(swreg reg, bool is_dst, bool has_imm8, bool *i8) +{ + u16 val = swreg_value(reg); + bool lm_id; + + switch (swreg_type(reg)) { + case NN_REG_GPR_A: + case NN_REG_GPR_B: + case NN_REG_GPR_BOTH: + return val; + case NN_REG_XFER: + return RE_REG_XFR | val; + case NN_REG_LMEM: + lm_id = swreg_lm_idx(reg); + + if (swreg_lm_mode(reg) != NN_LM_MOD_NONE) { + pr_err("bad LM mode for restricted operands %d\n", + swreg_lm_mode(reg)); + return 0; + } + + if (val & ~RE_REG_LM_IDX_MAX) { + pr_err("LM offset too large\n"); + return 0; + } + + return RE_REG_LM | FIELD_PREP(RE_REG_LM_IDX, lm_id) | val; + case NN_REG_IMM: + if (val & ~(0x7f | has_imm8 << 7)) { + pr_err("immediate too large\n"); + return 0; + } + *i8 = val & 0x80; + return RE_REG_IMM_encode(val & 0x7f); + case NN_REG_NONE: + return is_dst ? RE_REG_NO_DST : REG_NONE; + case NN_REG_NNR: + pr_err("NNRs used with restricted encoding\n"); + return 0; + } + + pr_err("unrecognized reg encoding\n"); + return 0; +} + +int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg, + struct nfp_insn_re_regs *reg, bool has_imm8) +{ + memset(reg, 0, sizeof(*reg)); + + /* Decode destination */ + if (swreg_type(dst) == NN_REG_IMM) + return -EFAULT; + + if (swreg_type(dst) == NN_REG_GPR_B) + reg->dst_ab = ALU_DST_B; + if (swreg_type(dst) == NN_REG_GPR_BOTH) + reg->wr_both = true; + reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL); + + /* Decode source operands */ + if (swreg_type(lreg) == swreg_type(rreg) && + swreg_type(lreg) != NN_REG_NONE) + return -EFAULT; + + if (swreg_type(lreg) == NN_REG_GPR_B || + swreg_type(rreg) == NN_REG_GPR_A) { + reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); + reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); + reg->swap = true; + } else { + reg->areg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); + reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); + } + + reg->dst_lmextn = swreg_lmextn(dst); + reg->src_lmextn = swreg_lmextn(lreg) || swreg_lmextn(rreg); + + return 0; +} + +#define NFP_USTORE_ECC_POLY_WORDS 7 +#define NFP_USTORE_OP_BITS 45 + +static const u64 nfp_ustore_ecc_polynomials[NFP_USTORE_ECC_POLY_WORDS] = { + 0x0ff800007fffULL, + 0x11f801ff801fULL, + 0x1e387e0781e1ULL, + 0x17cb8e388e22ULL, + 0x1af5b2c93244ULL, + 0x1f56d5525488ULL, + 0x0daf69a46910ULL, +}; + +static bool parity(u64 value) +{ + return hweight64(value) & 1; +} + +int nfp_ustore_check_valid_no_ecc(u64 insn) +{ + if (insn & ~GENMASK_ULL(NFP_USTORE_OP_BITS, 0)) + return -EINVAL; + + return 0; +} + +u64 nfp_ustore_calc_ecc_insn(u64 insn) +{ + u8 ecc = 0; + int i; + + for (i = 0; i < NFP_USTORE_ECC_POLY_WORDS; i++) + ecc |= parity(nfp_ustore_ecc_polynomials[i] & insn) << i; + + return insn | (u64)ecc << NFP_USTORE_OP_BITS; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h new file mode 100644 index 000000000..648c2810e --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -0,0 +1,437 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2016-2018 Netronome Systems, Inc. */ + +#ifndef __NFP_ASM_H__ +#define __NFP_ASM_H__ 1 + +#include <linux/bitfield.h> +#include <linux/bug.h> +#include <linux/types.h> + +#define REG_NONE 0 +#define REG_WIDTH 4 + +#define RE_REG_NO_DST 0x020 +#define RE_REG_IMM 0x020 +#define RE_REG_IMM_encode(x) \ + (RE_REG_IMM | ((x) & 0x1f) | (((x) & 0x60) << 1)) +#define RE_REG_IMM_MAX 0x07fULL +#define RE_REG_LM 0x050 +#define RE_REG_LM_IDX 0x008 +#define RE_REG_LM_IDX_MAX 0x7 +#define RE_REG_XFR 0x080 + +#define UR_REG_XFR 0x180 +#define UR_REG_LM 0x200 +#define UR_REG_LM_IDX 0x020 +#define UR_REG_LM_POST_MOD 0x010 +#define UR_REG_LM_POST_MOD_DEC 0x001 +#define UR_REG_LM_IDX_MAX 0xf +#define UR_REG_NN 0x280 +#define UR_REG_NO_DST 0x300 +#define UR_REG_IMM UR_REG_NO_DST +#define UR_REG_IMM_encode(x) (UR_REG_IMM | (x)) +#define UR_REG_IMM_MAX 0x0ffULL + +#define OP_BR_BASE 0x0d800000020ULL +#define OP_BR_BASE_MASK 0x0f8000c3ce0ULL +#define OP_BR_MASK 0x0000000001fULL +#define OP_BR_EV_PIP 0x00000000300ULL +#define OP_BR_CSS 0x0000003c000ULL +#define OP_BR_DEFBR 0x00000300000ULL +#define OP_BR_ADDR_LO 0x007ffc00000ULL +#define OP_BR_ADDR_HI 0x10000000000ULL + +#define OP_BR_BIT_BASE 0x0d000000000ULL +#define OP_BR_BIT_BASE_MASK 0x0f800080300ULL +#define OP_BR_BIT_A_SRC 0x000000000ffULL +#define OP_BR_BIT_B_SRC 0x0000003fc00ULL +#define OP_BR_BIT_BV 0x00000040000ULL +#define OP_BR_BIT_SRC_LMEXTN 0x40000000000ULL +#define OP_BR_BIT_DEFBR OP_BR_DEFBR +#define OP_BR_BIT_ADDR_LO OP_BR_ADDR_LO +#define OP_BR_BIT_ADDR_HI OP_BR_ADDR_HI + +#define OP_BR_ALU_BASE 0x0e800000000ULL +#define OP_BR_ALU_BASE_MASK 0x0ff80000000ULL +#define OP_BR_ALU_A_SRC 0x000000003ffULL +#define OP_BR_ALU_B_SRC 0x000000ffc00ULL +#define OP_BR_ALU_DEFBR 0x00000300000ULL +#define OP_BR_ALU_IMM_HI 0x0007fc00000ULL +#define OP_BR_ALU_SRC_LMEXTN 0x40000000000ULL +#define OP_BR_ALU_DST_LMEXTN 0x80000000000ULL + +static inline bool nfp_is_br(u64 insn) +{ + return (insn & OP_BR_BASE_MASK) == OP_BR_BASE || + (insn & OP_BR_BIT_BASE_MASK) == OP_BR_BIT_BASE; +} + +enum br_mask { + BR_BEQ = 0x00, + BR_BNE = 0x01, + BR_BMI = 0x02, + BR_BHS = 0x04, + BR_BCC = 0x05, + BR_BLO = 0x05, + BR_BGE = 0x08, + BR_BLT = 0x09, + BR_UNC = 0x18, +}; + +enum br_ev_pip { + BR_EV_PIP_UNCOND = 0, + BR_EV_PIP_COND = 1, +}; + +enum br_ctx_signal_state { + BR_CSS_NONE = 2, +}; + +u16 br_get_offset(u64 instr); +void br_set_offset(u64 *instr, u16 offset); +void br_add_offset(u64 *instr, u16 offset); + +#define OP_BBYTE_BASE 0x0c800000000ULL +#define OP_BB_A_SRC 0x000000000ffULL +#define OP_BB_BYTE 0x00000000300ULL +#define OP_BB_B_SRC 0x0000003fc00ULL +#define OP_BB_I8 0x00000040000ULL +#define OP_BB_EQ 0x00000080000ULL +#define OP_BB_DEFBR 0x00000300000ULL +#define OP_BB_ADDR_LO 0x007ffc00000ULL +#define OP_BB_ADDR_HI 0x10000000000ULL +#define OP_BB_SRC_LMEXTN 0x40000000000ULL + +#define OP_BALU_BASE 0x0e800000000ULL +#define OP_BA_A_SRC 0x000000003ffULL +#define OP_BA_B_SRC 0x000000ffc00ULL +#define OP_BA_DEFBR 0x00000300000ULL +#define OP_BA_ADDR_HI 0x0007fc00000ULL + +#define OP_IMMED_A_SRC 0x000000003ffULL +#define OP_IMMED_B_SRC 0x000000ffc00ULL +#define OP_IMMED_IMM 0x0000ff00000ULL +#define OP_IMMED_WIDTH 0x00060000000ULL +#define OP_IMMED_INV 0x00080000000ULL +#define OP_IMMED_SHIFT 0x00600000000ULL +#define OP_IMMED_BASE 0x0f000000000ULL +#define OP_IMMED_WR_AB 0x20000000000ULL +#define OP_IMMED_SRC_LMEXTN 0x40000000000ULL +#define OP_IMMED_DST_LMEXTN 0x80000000000ULL + +enum immed_width { + IMMED_WIDTH_ALL = 0, + IMMED_WIDTH_BYTE = 1, + IMMED_WIDTH_WORD = 2, +}; + +enum immed_shift { + IMMED_SHIFT_0B = 0, + IMMED_SHIFT_1B = 1, + IMMED_SHIFT_2B = 2, +}; + +u16 immed_get_value(u64 instr); +void immed_set_value(u64 *instr, u16 immed); +void immed_add_value(u64 *instr, u16 offset); + +#define OP_SHF_BASE 0x08000000000ULL +#define OP_SHF_A_SRC 0x000000000ffULL +#define OP_SHF_SC 0x00000000300ULL +#define OP_SHF_B_SRC 0x0000003fc00ULL +#define OP_SHF_I8 0x00000040000ULL +#define OP_SHF_SW 0x00000080000ULL +#define OP_SHF_DST 0x0000ff00000ULL +#define OP_SHF_SHIFT 0x001f0000000ULL +#define OP_SHF_OP 0x00e00000000ULL +#define OP_SHF_DST_AB 0x01000000000ULL +#define OP_SHF_WR_AB 0x20000000000ULL +#define OP_SHF_SRC_LMEXTN 0x40000000000ULL +#define OP_SHF_DST_LMEXTN 0x80000000000ULL + +enum shf_op { + SHF_OP_NONE = 0, + SHF_OP_AND = 2, + SHF_OP_OR = 5, + SHF_OP_ASHR = 6, +}; + +enum shf_sc { + SHF_SC_R_ROT = 0, + SHF_SC_NONE = SHF_SC_R_ROT, + SHF_SC_R_SHF = 1, + SHF_SC_L_SHF = 2, + SHF_SC_R_DSHF = 3, +}; + +#define OP_ALU_A_SRC 0x000000003ffULL +#define OP_ALU_B_SRC 0x000000ffc00ULL +#define OP_ALU_DST 0x0003ff00000ULL +#define OP_ALU_SW 0x00040000000ULL +#define OP_ALU_OP 0x00f80000000ULL +#define OP_ALU_DST_AB 0x01000000000ULL +#define OP_ALU_BASE 0x0a000000000ULL +#define OP_ALU_WR_AB 0x20000000000ULL +#define OP_ALU_SRC_LMEXTN 0x40000000000ULL +#define OP_ALU_DST_LMEXTN 0x80000000000ULL + +enum alu_op { + ALU_OP_NONE = 0x00, + ALU_OP_ADD = 0x01, + ALU_OP_NOT = 0x04, + ALU_OP_ADD_2B = 0x05, + ALU_OP_AND = 0x08, + ALU_OP_AND_NOT_A = 0x0c, + ALU_OP_SUB_C = 0x0d, + ALU_OP_AND_NOT_B = 0x10, + ALU_OP_ADD_C = 0x11, + ALU_OP_OR = 0x14, + ALU_OP_SUB = 0x15, + ALU_OP_XOR = 0x18, +}; + +enum alu_dst_ab { + ALU_DST_A = 0, + ALU_DST_B = 1, +}; + +#define OP_LDF_BASE 0x0c000000000ULL +#define OP_LDF_A_SRC 0x000000000ffULL +#define OP_LDF_SC 0x00000000300ULL +#define OP_LDF_B_SRC 0x0000003fc00ULL +#define OP_LDF_I8 0x00000040000ULL +#define OP_LDF_SW 0x00000080000ULL +#define OP_LDF_ZF 0x00000100000ULL +#define OP_LDF_BMASK 0x0000f000000ULL +#define OP_LDF_SHF 0x001f0000000ULL +#define OP_LDF_WR_AB 0x20000000000ULL +#define OP_LDF_SRC_LMEXTN 0x40000000000ULL +#define OP_LDF_DST_LMEXTN 0x80000000000ULL + +#define OP_CMD_A_SRC 0x000000000ffULL +#define OP_CMD_CTX 0x00000000300ULL +#define OP_CMD_B_SRC 0x0000003fc00ULL +#define OP_CMD_TOKEN 0x000000c0000ULL +#define OP_CMD_XFER 0x00001f00000ULL +#define OP_CMD_CNT 0x0000e000000ULL +#define OP_CMD_SIG 0x000f0000000ULL +#define OP_CMD_TGT_CMD 0x07f00000000ULL +#define OP_CMD_INDIR 0x20000000000ULL +#define OP_CMD_MODE 0x1c0000000000ULL + +struct cmd_tgt_act { + u8 token; + u8 tgt_cmd; +}; + +enum cmd_tgt_map { + CMD_TGT_READ8, + CMD_TGT_WRITE8_SWAP, + CMD_TGT_WRITE32_SWAP, + CMD_TGT_READ32, + CMD_TGT_READ32_LE, + CMD_TGT_READ32_SWAP, + CMD_TGT_READ_LE, + CMD_TGT_READ_SWAP_LE, + CMD_TGT_ADD, + CMD_TGT_ADD_IMM, + __CMD_TGT_MAP_SIZE, +}; + +extern const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE]; + +enum cmd_mode { + CMD_MODE_40b_AB = 0, + CMD_MODE_40b_BA = 1, + CMD_MODE_32b = 4, +}; + +enum cmd_ctx_swap { + CMD_CTX_SWAP = 0, + CMD_CTX_SWAP_DEFER1 = 1, + CMD_CTX_SWAP_DEFER2 = 2, + CMD_CTX_NO_SWAP = 3, +}; + +#define CMD_OVE_DATA GENMASK(5, 3) +#define CMD_OVE_LEN BIT(7) +#define CMD_OV_LEN GENMASK(12, 8) + +#define OP_LCSR_BASE 0x0fc00000000ULL +#define OP_LCSR_A_SRC 0x000000003ffULL +#define OP_LCSR_B_SRC 0x000000ffc00ULL +#define OP_LCSR_WRITE 0x00000200000ULL +#define OP_LCSR_ADDR 0x001ffc00000ULL +#define OP_LCSR_SRC_LMEXTN 0x40000000000ULL +#define OP_LCSR_DST_LMEXTN 0x80000000000ULL + +enum lcsr_wr_src { + LCSR_WR_AREG, + LCSR_WR_BREG, + LCSR_WR_IMM, +}; + +#define OP_CARB_BASE 0x0e000000000ULL +#define OP_CARB_OR 0x00000010000ULL + +#define NFP_CSR_CTX_PTR 0x20 +#define NFP_CSR_ACT_LM_ADDR0 0x64 +#define NFP_CSR_ACT_LM_ADDR1 0x6c +#define NFP_CSR_ACT_LM_ADDR2 0x94 +#define NFP_CSR_ACT_LM_ADDR3 0x9c +#define NFP_CSR_PSEUDO_RND_NUM 0x148 + +/* Software register representation, independent of operand type */ +#define NN_REG_TYPE GENMASK(31, 24) +#define NN_REG_LM_IDX GENMASK(23, 22) +#define NN_REG_LM_IDX_HI BIT(23) +#define NN_REG_LM_IDX_LO BIT(22) +#define NN_REG_LM_MOD GENMASK(21, 20) +#define NN_REG_VAL GENMASK(7, 0) + +enum nfp_bpf_reg_type { + NN_REG_GPR_A = BIT(0), + NN_REG_GPR_B = BIT(1), + NN_REG_GPR_BOTH = NN_REG_GPR_A | NN_REG_GPR_B, + NN_REG_NNR = BIT(2), + NN_REG_XFER = BIT(3), + NN_REG_IMM = BIT(4), + NN_REG_NONE = BIT(5), + NN_REG_LMEM = BIT(6), +}; + +enum nfp_bpf_lm_mode { + NN_LM_MOD_NONE = 0, + NN_LM_MOD_INC, + NN_LM_MOD_DEC, +}; + +#define reg_both(x) __enc_swreg((x), NN_REG_GPR_BOTH) +#define reg_a(x) __enc_swreg((x), NN_REG_GPR_A) +#define reg_b(x) __enc_swreg((x), NN_REG_GPR_B) +#define reg_nnr(x) __enc_swreg((x), NN_REG_NNR) +#define reg_xfer(x) __enc_swreg((x), NN_REG_XFER) +#define reg_imm(x) __enc_swreg((x), NN_REG_IMM) +#define reg_none() __enc_swreg(0, NN_REG_NONE) +#define reg_lm(x, off) __enc_swreg_lm((x), NN_LM_MOD_NONE, (off)) +#define reg_lm_inc(x) __enc_swreg_lm((x), NN_LM_MOD_INC, 0) +#define reg_lm_dec(x) __enc_swreg_lm((x), NN_LM_MOD_DEC, 0) +#define __reg_lm(x, mod, off) __enc_swreg_lm((x), (mod), (off)) + +typedef __u32 __bitwise swreg; + +static inline swreg __enc_swreg(u16 id, u8 type) +{ + return (__force swreg)(id | FIELD_PREP(NN_REG_TYPE, type)); +} + +static inline swreg __enc_swreg_lm(u8 id, enum nfp_bpf_lm_mode mode, u8 off) +{ + WARN_ON(id > 3 || (off && mode != NN_LM_MOD_NONE)); + + return (__force swreg)(FIELD_PREP(NN_REG_TYPE, NN_REG_LMEM) | + FIELD_PREP(NN_REG_LM_IDX, id) | + FIELD_PREP(NN_REG_LM_MOD, mode) | + off); +} + +static inline u32 swreg_raw(swreg reg) +{ + return (__force u32)reg; +} + +static inline enum nfp_bpf_reg_type swreg_type(swreg reg) +{ + return FIELD_GET(NN_REG_TYPE, swreg_raw(reg)); +} + +static inline u16 swreg_value(swreg reg) +{ + return FIELD_GET(NN_REG_VAL, swreg_raw(reg)); +} + +static inline bool swreg_lm_idx(swreg reg) +{ + return FIELD_GET(NN_REG_LM_IDX_LO, swreg_raw(reg)); +} + +static inline bool swreg_lmextn(swreg reg) +{ + return FIELD_GET(NN_REG_LM_IDX_HI, swreg_raw(reg)); +} + +static inline enum nfp_bpf_lm_mode swreg_lm_mode(swreg reg) +{ + return FIELD_GET(NN_REG_LM_MOD, swreg_raw(reg)); +} + +struct nfp_insn_ur_regs { + enum alu_dst_ab dst_ab; + u16 dst; + u16 areg, breg; + bool swap; + bool wr_both; + bool dst_lmextn; + bool src_lmextn; +}; + +struct nfp_insn_re_regs { + enum alu_dst_ab dst_ab; + u8 dst; + u8 areg, breg; + bool swap; + bool wr_both; + bool i8; + bool dst_lmextn; + bool src_lmextn; +}; + +int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg, + struct nfp_insn_ur_regs *reg); +int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg, + struct nfp_insn_re_regs *reg, bool has_imm8); + +#define NFP_USTORE_PREFETCH_WINDOW 8 + +int nfp_ustore_check_valid_no_ecc(u64 insn); +u64 nfp_ustore_calc_ecc_insn(u64 insn); + +#define NFP_IND_ME_REFL_WR_SIG_INIT 3 +#define NFP_IND_ME_CTX_PTR_BASE_MASK GENMASK(9, 0) +#define NFP_IND_NUM_CONTEXTS 8 + +static inline u32 nfp_get_ind_csr_ctx_ptr_offs(u32 read_offset) +{ + return (read_offset & ~NFP_IND_ME_CTX_PTR_BASE_MASK) | NFP_CSR_CTX_PTR; +} + +enum mul_type { + MUL_TYPE_START = 0x00, + MUL_TYPE_STEP_24x8 = 0x01, + MUL_TYPE_STEP_16x16 = 0x02, + MUL_TYPE_STEP_32x32 = 0x03, +}; + +enum mul_step { + MUL_STEP_1 = 0x00, + MUL_STEP_NONE = MUL_STEP_1, + MUL_STEP_2 = 0x01, + MUL_STEP_3 = 0x02, + MUL_STEP_4 = 0x03, + MUL_LAST = 0x04, + MUL_LAST_2 = 0x05, +}; + +#define OP_MUL_BASE 0x0f800000000ULL +#define OP_MUL_A_SRC 0x000000003ffULL +#define OP_MUL_B_SRC 0x000000ffc00ULL +#define OP_MUL_STEP 0x00000700000ULL +#define OP_MUL_DST_AB 0x00000800000ULL +#define OP_MUL_SW 0x00040000000ULL +#define OP_MUL_TYPE 0x00180000000ULL +#define OP_MUL_WR_AB 0x20000000000ULL +#define OP_MUL_SRC_LMEXTN 0x40000000000ULL +#define OP_MUL_DST_LMEXTN 0x80000000000ULL + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c new file mode 100644 index 000000000..cb08d7bf9 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -0,0 +1,383 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <linux/rtnetlink.h> +#include <net/devlink.h> + +#include "nfpcore/nfp.h" +#include "nfpcore/nfp_nsp.h" +#include "nfp_app.h" +#include "nfp_main.h" +#include "nfp_port.h" + +static int +nfp_devlink_fill_eth_port(struct nfp_port *port, + struct nfp_eth_table_port *copy) +{ + struct nfp_eth_table_port *eth_port; + + eth_port = __nfp_port_get_eth_port(port); + if (!eth_port) + return -EINVAL; + + memcpy(copy, eth_port, sizeof(*eth_port)); + + return 0; +} + +static int +nfp_devlink_fill_eth_port_from_id(struct nfp_pf *pf, + struct devlink_port *dl_port, + struct nfp_eth_table_port *copy) +{ + struct nfp_port *port = container_of(dl_port, struct nfp_port, dl_port); + + return nfp_devlink_fill_eth_port(port, copy); +} + +static int +nfp_devlink_set_lanes(struct nfp_pf *pf, unsigned int idx, unsigned int lanes) +{ + struct nfp_nsp *nsp; + int ret; + + nsp = nfp_eth_config_start(pf->cpp, idx); + if (IS_ERR(nsp)) + return PTR_ERR(nsp); + + ret = __nfp_eth_set_split(nsp, lanes); + if (ret) { + nfp_eth_config_cleanup_end(nsp); + return ret; + } + + ret = nfp_eth_config_commit_end(nsp); + if (ret < 0) + return ret; + if (ret) /* no change */ + return 0; + + return nfp_net_refresh_port_table_sync(pf); +} + +static int +nfp_devlink_port_split(struct devlink *devlink, struct devlink_port *port, + unsigned int count, struct netlink_ext_ack *extack) +{ + struct nfp_pf *pf = devlink_priv(devlink); + struct nfp_eth_table_port eth_port; + unsigned int lanes; + int ret; + + rtnl_lock(); + ret = nfp_devlink_fill_eth_port_from_id(pf, port, ð_port); + rtnl_unlock(); + if (ret) + return ret; + + if (eth_port.port_lanes % count) + return -EINVAL; + + /* Special case the 100G CXP -> 2x40G split */ + lanes = eth_port.port_lanes / count; + if (eth_port.lanes == 10 && count == 2) + lanes = 8 / count; + + return nfp_devlink_set_lanes(pf, eth_port.index, lanes); +} + +static int +nfp_devlink_port_unsplit(struct devlink *devlink, struct devlink_port *port, + struct netlink_ext_ack *extack) +{ + struct nfp_pf *pf = devlink_priv(devlink); + struct nfp_eth_table_port eth_port; + unsigned int lanes; + int ret; + + rtnl_lock(); + ret = nfp_devlink_fill_eth_port_from_id(pf, port, ð_port); + rtnl_unlock(); + if (ret) + return ret; + + if (!eth_port.is_split) + return -EINVAL; + + /* Special case the 100G CXP -> 2x40G unsplit */ + lanes = eth_port.port_lanes; + if (eth_port.port_lanes == 8) + lanes = 10; + + return nfp_devlink_set_lanes(pf, eth_port.index, lanes); +} + +static int +nfp_devlink_sb_pool_get(struct devlink *devlink, unsigned int sb_index, + u16 pool_index, struct devlink_sb_pool_info *pool_info) +{ + struct nfp_pf *pf = devlink_priv(devlink); + + return nfp_shared_buf_pool_get(pf, sb_index, pool_index, pool_info); +} + +static int +nfp_devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index, + u16 pool_index, + u32 size, enum devlink_sb_threshold_type threshold_type, + struct netlink_ext_ack *extack) +{ + struct nfp_pf *pf = devlink_priv(devlink); + + return nfp_shared_buf_pool_set(pf, sb_index, pool_index, + size, threshold_type); +} + +static int nfp_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + struct nfp_pf *pf = devlink_priv(devlink); + + return nfp_app_eswitch_mode_get(pf->app, mode); +} + +static int nfp_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack) +{ + struct nfp_pf *pf = devlink_priv(devlink); + + return nfp_app_eswitch_mode_set(pf->app, mode); +} + +static const struct nfp_devlink_versions_simple { + const char *key; + const char *hwinfo; +} nfp_devlink_versions_hwinfo[] = { + { DEVLINK_INFO_VERSION_GENERIC_BOARD_ID, "assembly.partno", }, + { DEVLINK_INFO_VERSION_GENERIC_BOARD_REV, "assembly.revision", }, + { DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE, "assembly.vendor", }, + { "board.model", /* code name */ "assembly.model", }, +}; + +static int +nfp_devlink_versions_get_hwinfo(struct nfp_pf *pf, struct devlink_info_req *req) +{ + unsigned int i; + int err; + + for (i = 0; i < ARRAY_SIZE(nfp_devlink_versions_hwinfo); i++) { + const struct nfp_devlink_versions_simple *info; + const char *val; + + info = &nfp_devlink_versions_hwinfo[i]; + + val = nfp_hwinfo_lookup(pf->hwinfo, info->hwinfo); + if (!val) + continue; + + err = devlink_info_version_fixed_put(req, info->key, val); + if (err) + return err; + } + + return 0; +} + +static const struct nfp_devlink_versions { + enum nfp_nsp_versions id; + const char *key; +} nfp_devlink_versions_nsp[] = { + { NFP_VERSIONS_BUNDLE, DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, }, + { NFP_VERSIONS_BSP, DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, }, + { NFP_VERSIONS_CPLD, "fw.cpld", }, + { NFP_VERSIONS_APP, DEVLINK_INFO_VERSION_GENERIC_FW_APP, }, + { NFP_VERSIONS_UNDI, DEVLINK_INFO_VERSION_GENERIC_FW_UNDI, }, + { NFP_VERSIONS_NCSI, DEVLINK_INFO_VERSION_GENERIC_FW_NCSI, }, + { NFP_VERSIONS_CFGR, "chip.init", }, +}; + +static int +nfp_devlink_versions_get_nsp(struct devlink_info_req *req, bool flash, + const u8 *buf, unsigned int size) +{ + unsigned int i; + int err; + + for (i = 0; i < ARRAY_SIZE(nfp_devlink_versions_nsp); i++) { + const struct nfp_devlink_versions *info; + const char *version; + + info = &nfp_devlink_versions_nsp[i]; + + version = nfp_nsp_versions_get(info->id, flash, buf, size); + if (IS_ERR(version)) { + if (PTR_ERR(version) == -ENOENT) + continue; + else + return PTR_ERR(version); + } + + if (flash) + err = devlink_info_version_stored_put(req, info->key, + version); + else + err = devlink_info_version_running_put(req, info->key, + version); + if (err) + return err; + } + + return 0; +} + +static int +nfp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct nfp_pf *pf = devlink_priv(devlink); + const char *sn, *vendor, *part; + struct nfp_nsp *nsp; + char *buf = NULL; + int err; + + err = devlink_info_driver_name_put(req, "nfp"); + if (err) + return err; + + vendor = nfp_hwinfo_lookup(pf->hwinfo, "assembly.vendor"); + part = nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno"); + sn = nfp_hwinfo_lookup(pf->hwinfo, "assembly.serial"); + if (vendor && part && sn) { + char *buf; + + buf = kmalloc(strlen(vendor) + strlen(part) + strlen(sn) + 1, + GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf[0] = '\0'; + strcat(buf, vendor); + strcat(buf, part); + strcat(buf, sn); + + err = devlink_info_serial_number_put(req, buf); + kfree(buf); + if (err) + return err; + } + + nsp = nfp_nsp_open(pf->cpp); + if (IS_ERR(nsp)) { + NL_SET_ERR_MSG_MOD(extack, "can't access NSP"); + return PTR_ERR(nsp); + } + + if (nfp_nsp_has_versions(nsp)) { + buf = kzalloc(NFP_NSP_VERSION_BUFSZ, GFP_KERNEL); + if (!buf) { + err = -ENOMEM; + goto err_close_nsp; + } + + err = nfp_nsp_versions(nsp, buf, NFP_NSP_VERSION_BUFSZ); + if (err) + goto err_free_buf; + + err = nfp_devlink_versions_get_nsp(req, false, + buf, NFP_NSP_VERSION_BUFSZ); + if (err) + goto err_free_buf; + + err = nfp_devlink_versions_get_nsp(req, true, + buf, NFP_NSP_VERSION_BUFSZ); + if (err) + goto err_free_buf; + + kfree(buf); + } + + nfp_nsp_close(nsp); + + return nfp_devlink_versions_get_hwinfo(pf, req); + +err_free_buf: + kfree(buf); +err_close_nsp: + nfp_nsp_close(nsp); + return err; +} + +static int +nfp_devlink_flash_update(struct devlink *devlink, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) +{ + return nfp_flash_update_common(devlink_priv(devlink), params->fw, extack); +} + +const struct devlink_ops nfp_devlink_ops = { + .port_split = nfp_devlink_port_split, + .port_unsplit = nfp_devlink_port_unsplit, + .sb_pool_get = nfp_devlink_sb_pool_get, + .sb_pool_set = nfp_devlink_sb_pool_set, + .eswitch_mode_get = nfp_devlink_eswitch_mode_get, + .eswitch_mode_set = nfp_devlink_eswitch_mode_set, + .info_get = nfp_devlink_info_get, + .flash_update = nfp_devlink_flash_update, +}; + +int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) +{ + struct devlink_port_attrs attrs = {}; + struct nfp_eth_table_port eth_port; + struct devlink *devlink; + const u8 *serial; + int serial_len; + int ret; + + rtnl_lock(); + ret = nfp_devlink_fill_eth_port(port, ð_port); + rtnl_unlock(); + if (ret) + return ret; + + attrs.split = eth_port.is_split; + attrs.splittable = eth_port.port_lanes > 1 && !attrs.split; + attrs.lanes = eth_port.port_lanes; + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.phys.port_number = eth_port.label_port; + attrs.phys.split_subport_number = eth_port.label_subport; + serial_len = nfp_cpp_serial(port->app->cpp, &serial); + memcpy(attrs.switch_id.id, serial, serial_len); + attrs.switch_id.id_len = serial_len; + devlink_port_attrs_set(&port->dl_port, &attrs); + + devlink = priv_to_devlink(app->pf); + + return devl_port_register(devlink, &port->dl_port, port->eth_id); +} + +void nfp_devlink_port_unregister(struct nfp_port *port) +{ + devl_port_unregister(&port->dl_port); +} + +void nfp_devlink_port_type_eth_set(struct nfp_port *port) +{ + devlink_port_type_eth_set(&port->dl_port, port->netdev); +} + +void nfp_devlink_port_type_clear(struct nfp_port *port) +{ + devlink_port_type_clear(&port->dl_port); +} + +struct devlink_port *nfp_devlink_get_devlink_port(struct net_device *netdev) +{ + struct nfp_port *port; + + port = nfp_port_from_netdev(netdev); + if (!port) + return NULL; + + return &port->dl_port; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c b/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c new file mode 100644 index 000000000..5cabb1aa9 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017 Netronome Systems, Inc. */ + +#include <linux/kernel.h> +#include <linux/bitops.h> +#include <linux/hwmon.h> + +#include "nfpcore/nfp_cpp.h" +#include "nfpcore/nfp_nsp.h" +#include "nfp_main.h" + +#define NFP_TEMP_MAX (95 * 1000) +#define NFP_TEMP_CRIT (105 * 1000) + +#define NFP_POWER_MAX (25 * 1000 * 1000) + +static int nfp_hwmon_sensor_id(enum hwmon_sensor_types type, int channel) +{ + if (type == hwmon_temp) + return NFP_SENSOR_CHIP_TEMPERATURE; + if (type == hwmon_power) + return NFP_SENSOR_ASSEMBLY_POWER + channel; + return -EINVAL; +} + +static int +nfp_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, + int channel, long *val) +{ + static const struct { + enum hwmon_sensor_types type; + u32 attr; + long val; + } const_vals[] = { + { hwmon_temp, hwmon_temp_max, NFP_TEMP_MAX }, + { hwmon_temp, hwmon_temp_crit, NFP_TEMP_CRIT }, + { hwmon_power, hwmon_power_max, NFP_POWER_MAX }, + }; + struct nfp_pf *pf = dev_get_drvdata(dev); + enum nfp_nsp_sensor_id id; + int err, i; + + for (i = 0; i < ARRAY_SIZE(const_vals); i++) + if (const_vals[i].type == type && const_vals[i].attr == attr) { + *val = const_vals[i].val; + return 0; + } + + err = nfp_hwmon_sensor_id(type, channel); + if (err < 0) + return err; + id = err; + + if (!(pf->nspi->sensor_mask & BIT(id))) + return -EOPNOTSUPP; + + if (type == hwmon_temp && attr == hwmon_temp_input) + return nfp_hwmon_read_sensor(pf->cpp, id, val); + if (type == hwmon_power && attr == hwmon_power_input) + return nfp_hwmon_read_sensor(pf->cpp, id, val); + + return -EINVAL; +} + +static umode_t +nfp_hwmon_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr, + int channel) +{ + if (type == hwmon_temp) { + switch (attr) { + case hwmon_temp_input: + case hwmon_temp_crit: + case hwmon_temp_max: + return 0444; + } + } else if (type == hwmon_power) { + switch (attr) { + case hwmon_power_input: + case hwmon_power_max: + return 0444; + } + } + return 0; +} + +static u32 nfp_chip_config[] = { + HWMON_C_REGISTER_TZ, + 0 +}; + +static const struct hwmon_channel_info nfp_chip = { + .type = hwmon_chip, + .config = nfp_chip_config, +}; + +static u32 nfp_temp_config[] = { + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT, + 0 +}; + +static const struct hwmon_channel_info nfp_temp = { + .type = hwmon_temp, + .config = nfp_temp_config, +}; + +static u32 nfp_power_config[] = { + HWMON_P_INPUT | HWMON_P_MAX, + HWMON_P_INPUT, + HWMON_P_INPUT, + 0 +}; + +static const struct hwmon_channel_info nfp_power = { + .type = hwmon_power, + .config = nfp_power_config, +}; + +static const struct hwmon_channel_info *nfp_hwmon_info[] = { + &nfp_chip, + &nfp_temp, + &nfp_power, + NULL +}; + +static const struct hwmon_ops nfp_hwmon_ops = { + .is_visible = nfp_hwmon_is_visible, + .read = nfp_hwmon_read, +}; + +static const struct hwmon_chip_info nfp_chip_info = { + .ops = &nfp_hwmon_ops, + .info = nfp_hwmon_info, +}; + +int nfp_hwmon_register(struct nfp_pf *pf) +{ + if (!IS_REACHABLE(CONFIG_HWMON)) + return 0; + + if (!pf->nspi) { + nfp_warn(pf->cpp, "not registering HWMON (no NSP info)\n"); + return 0; + } + if (!pf->nspi->sensor_mask) { + nfp_info(pf->cpp, + "not registering HWMON (NSP doesn't report sensors)\n"); + return 0; + } + + pf->hwmon_dev = hwmon_device_register_with_info(&pf->pdev->dev, "nfp", + pf, &nfp_chip_info, + NULL); + return PTR_ERR_OR_ZERO(pf->hwmon_dev); +} + +void nfp_hwmon_unregister(struct nfp_pf *pf) +{ + if (!IS_REACHABLE(CONFIG_HWMON) || !pf->hwmon_dev) + return; + + hwmon_device_unregister(pf->hwmon_dev); +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c new file mode 100644 index 000000000..71301dbd8 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -0,0 +1,1000 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +/* + * nfp_main.c + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Alejandro Lucero <alejandro.lucero@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/firmware.h> +#include <linux/vmalloc.h> +#include <net/devlink.h> + +#include "nfpcore/nfp.h" +#include "nfpcore/nfp_cpp.h" +#include "nfpcore/nfp_dev.h" +#include "nfpcore/nfp_nffw.h" +#include "nfpcore/nfp_nsp.h" + +#include "nfpcore/nfp6000_pcie.h" + +#include "nfp_abi.h" +#include "nfp_app.h" +#include "nfp_main.h" +#include "nfp_net.h" + +static const char nfp_driver_name[] = "nfp"; + +static const struct pci_device_id nfp_pci_device_ids[] = { + { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_ID_NFP3800, + PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP3800, + }, + { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_ID_NFP4000, + PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP6000, + }, + { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_ID_NFP5000, + PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP6000, + }, + { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_ID_NFP6000, + PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP6000, + }, + { PCI_VENDOR_ID_CORIGINE, PCI_DEVICE_ID_NFP3800, + PCI_VENDOR_ID_CORIGINE, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP3800, + }, + { PCI_VENDOR_ID_CORIGINE, PCI_DEVICE_ID_NFP4000, + PCI_VENDOR_ID_CORIGINE, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP6000, + }, + { PCI_VENDOR_ID_CORIGINE, PCI_DEVICE_ID_NFP5000, + PCI_VENDOR_ID_CORIGINE, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP6000, + }, + { PCI_VENDOR_ID_CORIGINE, PCI_DEVICE_ID_NFP6000, + PCI_VENDOR_ID_CORIGINE, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP6000, + }, + { 0, } /* Required last entry. */ +}; +MODULE_DEVICE_TABLE(pci, nfp_pci_device_ids); + +int nfp_pf_rtsym_read_optional(struct nfp_pf *pf, const char *format, + unsigned int default_val) +{ + char name[256]; + int err = 0; + u64 val; + + snprintf(name, sizeof(name), format, nfp_cppcore_pcie_unit(pf->cpp)); + + val = nfp_rtsym_read_le(pf->rtbl, name, &err); + if (err) { + if (err == -ENOENT) + return default_val; + nfp_err(pf->cpp, "Unable to read symbol %s\n", name); + return err; + } + + return val; +} + +u8 __iomem * +nfp_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt, + unsigned int min_size, struct nfp_cpp_area **area) +{ + char pf_symbol[256]; + + snprintf(pf_symbol, sizeof(pf_symbol), sym_fmt, + nfp_cppcore_pcie_unit(pf->cpp)); + + return nfp_rtsym_map(pf->rtbl, pf_symbol, name, min_size, area); +} + +/* Callers should hold the devlink instance lock */ +int nfp_mbox_cmd(struct nfp_pf *pf, u32 cmd, void *in_data, u64 in_length, + void *out_data, u64 out_length) +{ + unsigned long err_at; + u64 max_data_sz; + u32 val = 0; + int n, err; + + if (!pf->mbox) + return -EOPNOTSUPP; + + max_data_sz = nfp_rtsym_size(pf->mbox) - NFP_MBOX_SYM_MIN_SIZE; + + /* Check if cmd field is clear */ + err = nfp_rtsym_readl(pf->cpp, pf->mbox, NFP_MBOX_CMD, &val); + if (err || val) { + nfp_warn(pf->cpp, "failed to issue command (%u): %u, err: %d\n", + cmd, val, err); + return err ?: -EBUSY; + } + + in_length = min(in_length, max_data_sz); + n = nfp_rtsym_write(pf->cpp, pf->mbox, NFP_MBOX_DATA, in_data, + in_length); + if (n != in_length) + return -EIO; + /* Write data_len and wipe reserved */ + err = nfp_rtsym_writeq(pf->cpp, pf->mbox, NFP_MBOX_DATA_LEN, in_length); + if (err) + return err; + + /* Read back for ordering */ + err = nfp_rtsym_readl(pf->cpp, pf->mbox, NFP_MBOX_DATA_LEN, &val); + if (err) + return err; + + /* Write cmd and wipe return value */ + err = nfp_rtsym_writeq(pf->cpp, pf->mbox, NFP_MBOX_CMD, cmd); + if (err) + return err; + + err_at = jiffies + 5 * HZ; + while (true) { + /* Wait for command to go to 0 (NFP_MBOX_NO_CMD) */ + err = nfp_rtsym_readl(pf->cpp, pf->mbox, NFP_MBOX_CMD, &val); + if (err) + return err; + if (!val) + break; + + if (time_is_before_eq_jiffies(err_at)) + return -ETIMEDOUT; + + msleep(5); + } + + /* Copy output if any (could be error info, do it before reading ret) */ + err = nfp_rtsym_readl(pf->cpp, pf->mbox, NFP_MBOX_DATA_LEN, &val); + if (err) + return err; + + out_length = min_t(u32, val, min(out_length, max_data_sz)); + n = nfp_rtsym_read(pf->cpp, pf->mbox, NFP_MBOX_DATA, + out_data, out_length); + if (n != out_length) + return -EIO; + + /* Check if there is an error */ + err = nfp_rtsym_readl(pf->cpp, pf->mbox, NFP_MBOX_RET, &val); + if (err) + return err; + if (val) + return -val; + + return out_length; +} + +static bool nfp_board_ready(struct nfp_pf *pf) +{ + const char *cp; + long state; + int err; + + cp = nfp_hwinfo_lookup(pf->hwinfo, "board.state"); + if (!cp) + return false; + + err = kstrtol(cp, 0, &state); + if (err < 0) + return false; + + return state == 15; +} + +static int nfp_pf_board_state_wait(struct nfp_pf *pf) +{ + const unsigned long wait_until = jiffies + 10 * HZ; + + while (!nfp_board_ready(pf)) { + if (time_is_before_eq_jiffies(wait_until)) { + nfp_err(pf->cpp, "NFP board initialization timeout\n"); + return -EINVAL; + } + + nfp_info(pf->cpp, "waiting for board initialization\n"); + if (msleep_interruptible(500)) + return -ERESTARTSYS; + + /* Refresh cached information */ + kfree(pf->hwinfo); + pf->hwinfo = nfp_hwinfo_read(pf->cpp); + } + + return 0; +} + +static int nfp_pcie_sriov_read_nfd_limit(struct nfp_pf *pf) +{ + int err; + + pf->limit_vfs = nfp_rtsym_read_le(pf->rtbl, "nfd_vf_cfg_max_vfs", &err); + if (err) { + /* For backwards compatibility if symbol not found allow all */ + pf->limit_vfs = ~0; + if (err == -ENOENT) + return 0; + + nfp_warn(pf->cpp, "Warning: VF limit read failed: %d\n", err); + return err; + } + + err = pci_sriov_set_totalvfs(pf->pdev, pf->limit_vfs); + if (err) + nfp_warn(pf->cpp, "Failed to set VF count in sysfs: %d\n", err); + return 0; +} + +static int nfp_pcie_sriov_enable(struct pci_dev *pdev, int num_vfs) +{ +#ifdef CONFIG_PCI_IOV + struct nfp_pf *pf = pci_get_drvdata(pdev); + struct devlink *devlink; + int err; + + if (num_vfs > pf->limit_vfs) { + nfp_info(pf->cpp, "Firmware limits number of VFs to %u\n", + pf->limit_vfs); + return -EINVAL; + } + + err = pci_enable_sriov(pdev, num_vfs); + if (err) { + dev_warn(&pdev->dev, "Failed to enable PCI SR-IOV: %d\n", err); + return err; + } + + devlink = priv_to_devlink(pf); + devl_lock(devlink); + + err = nfp_app_sriov_enable(pf->app, num_vfs); + if (err) { + dev_warn(&pdev->dev, + "App specific PCI SR-IOV configuration failed: %d\n", + err); + goto err_sriov_disable; + } + + pf->num_vfs = num_vfs; + + dev_dbg(&pdev->dev, "Created %d VFs.\n", pf->num_vfs); + + devl_unlock(devlink); + return num_vfs; + +err_sriov_disable: + devl_unlock(devlink); + pci_disable_sriov(pdev); + return err; +#endif + return 0; +} + +static int nfp_pcie_sriov_disable(struct pci_dev *pdev) +{ +#ifdef CONFIG_PCI_IOV + struct nfp_pf *pf = pci_get_drvdata(pdev); + struct devlink *devlink; + + devlink = priv_to_devlink(pf); + devl_lock(devlink); + + /* If the VFs are assigned we cannot shut down SR-IOV without + * causing issues, so just leave the hardware available but + * disabled + */ + if (pci_vfs_assigned(pdev)) { + dev_warn(&pdev->dev, "Disabling while VFs assigned - VFs will not be deallocated\n"); + devl_unlock(devlink); + return -EPERM; + } + + nfp_app_sriov_disable(pf->app); + + pf->num_vfs = 0; + + devl_unlock(devlink); + + pci_disable_sriov(pdev); + dev_dbg(&pdev->dev, "Removed VFs.\n"); +#endif + return 0; +} + +static int nfp_pcie_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + if (!pci_get_drvdata(pdev)) + return -ENOENT; + + if (num_vfs == 0) + return nfp_pcie_sriov_disable(pdev); + else + return nfp_pcie_sriov_enable(pdev, num_vfs); +} + +int nfp_flash_update_common(struct nfp_pf *pf, const struct firmware *fw, + struct netlink_ext_ack *extack) +{ + struct device *dev = &pf->pdev->dev; + struct nfp_nsp *nsp; + int err; + + nsp = nfp_nsp_open(pf->cpp); + if (IS_ERR(nsp)) { + err = PTR_ERR(nsp); + if (extack) + NL_SET_ERR_MSG_MOD(extack, "can't access NSP"); + else + dev_err(dev, "Failed to access the NSP: %d\n", err); + return err; + } + + err = nfp_nsp_write_flash(nsp, fw); + if (err < 0) + goto exit_close_nsp; + dev_info(dev, "Finished writing flash image\n"); + err = 0; + +exit_close_nsp: + nfp_nsp_close(nsp); + return err; +} + +static const struct firmware * +nfp_net_fw_request(struct pci_dev *pdev, struct nfp_pf *pf, const char *name) +{ + const struct firmware *fw = NULL; + int err; + + err = request_firmware_direct(&fw, name, &pdev->dev); + nfp_info(pf->cpp, " %s: %s\n", + name, err ? "not found" : "found"); + if (err) + return NULL; + + return fw; +} + +/** + * nfp_net_fw_find() - Find the correct firmware image for netdev mode + * @pdev: PCI Device structure + * @pf: NFP PF Device structure + * + * Return: firmware if found and requested successfully. + */ +static const struct firmware * +nfp_net_fw_find(struct pci_dev *pdev, struct nfp_pf *pf) +{ + struct nfp_eth_table_port *port; + const struct firmware *fw; + const char *fw_model; + char fw_name[256]; + const u8 *serial; + u16 interface; + int spc, i, j; + + nfp_info(pf->cpp, "Looking for firmware file in order of priority:\n"); + + /* First try to find a firmware image specific for this device */ + interface = nfp_cpp_interface(pf->cpp); + nfp_cpp_serial(pf->cpp, &serial); + sprintf(fw_name, "netronome/serial-%pMF-%02x-%02x.nffw", + serial, interface >> 8, interface & 0xff); + fw = nfp_net_fw_request(pdev, pf, fw_name); + if (fw) + return fw; + + /* Then try the PCI name */ + sprintf(fw_name, "netronome/pci-%s.nffw", pci_name(pdev)); + fw = nfp_net_fw_request(pdev, pf, fw_name); + if (fw) + return fw; + + /* Finally try the card type and media */ + if (!pf->eth_tbl) { + dev_err(&pdev->dev, "Error: can't identify media config\n"); + return NULL; + } + + fw_model = nfp_hwinfo_lookup(pf->hwinfo, "nffw.partno"); + if (!fw_model) + fw_model = nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno"); + if (!fw_model) { + dev_err(&pdev->dev, "Error: can't read part number\n"); + return NULL; + } + + spc = ARRAY_SIZE(fw_name); + spc -= snprintf(fw_name, spc, "netronome/nic_%s", fw_model); + + for (i = 0; spc > 0 && i < pf->eth_tbl->count; i += j) { + port = &pf->eth_tbl->ports[i]; + j = 1; + while (i + j < pf->eth_tbl->count && + port->speed == port[j].speed) + j++; + + spc -= snprintf(&fw_name[ARRAY_SIZE(fw_name) - spc], spc, + "_%dx%d", j, port->speed / 1000); + } + + if (spc <= 0) + return NULL; + + spc -= snprintf(&fw_name[ARRAY_SIZE(fw_name) - spc], spc, ".nffw"); + if (spc <= 0) + return NULL; + + return nfp_net_fw_request(pdev, pf, fw_name); +} + +static int +nfp_get_fw_policy_value(struct pci_dev *pdev, struct nfp_nsp *nsp, + const char *key, const char *default_val, int max_val, + int *value) +{ + char hwinfo[64]; + long hi_val; + int err; + + snprintf(hwinfo, sizeof(hwinfo), key); + err = nfp_nsp_hwinfo_lookup_optional(nsp, hwinfo, sizeof(hwinfo), + default_val); + if (err) + return err; + + err = kstrtol(hwinfo, 0, &hi_val); + if (err || hi_val < 0 || hi_val > max_val) { + dev_warn(&pdev->dev, + "Invalid value '%s' from '%s', ignoring\n", + hwinfo, key); + err = kstrtol(default_val, 0, &hi_val); + } + + *value = hi_val; + return err; +} + +/** + * nfp_fw_load() - Load the firmware image + * @pdev: PCI Device structure + * @pf: NFP PF Device structure + * @nsp: NFP SP handle + * + * Return: -ERRNO, 0 for no firmware loaded, 1 for firmware loaded + */ +static int +nfp_fw_load(struct pci_dev *pdev, struct nfp_pf *pf, struct nfp_nsp *nsp) +{ + bool do_reset, fw_loaded = false; + const struct firmware *fw = NULL; + int err, reset, policy, ifcs = 0; + char *token, *ptr; + char hwinfo[64]; + u16 interface; + + snprintf(hwinfo, sizeof(hwinfo), "abi_drv_load_ifc"); + err = nfp_nsp_hwinfo_lookup_optional(nsp, hwinfo, sizeof(hwinfo), + NFP_NSP_DRV_LOAD_IFC_DEFAULT); + if (err) + return err; + + interface = nfp_cpp_interface(pf->cpp); + ptr = hwinfo; + while ((token = strsep(&ptr, ","))) { + unsigned long interface_hi; + + err = kstrtoul(token, 0, &interface_hi); + if (err) { + dev_err(&pdev->dev, + "Failed to parse interface '%s': %d\n", + token, err); + return err; + } + + ifcs++; + if (interface == interface_hi) + break; + } + + if (!token) { + dev_info(&pdev->dev, "Firmware will be loaded by partner\n"); + return 0; + } + + err = nfp_get_fw_policy_value(pdev, nsp, "abi_drv_reset", + NFP_NSP_DRV_RESET_DEFAULT, + NFP_NSP_DRV_RESET_NEVER, &reset); + if (err) + return err; + + err = nfp_get_fw_policy_value(pdev, nsp, "app_fw_from_flash", + NFP_NSP_APP_FW_LOAD_DEFAULT, + NFP_NSP_APP_FW_LOAD_PREF, &policy); + if (err) + return err; + + fw = nfp_net_fw_find(pdev, pf); + do_reset = reset == NFP_NSP_DRV_RESET_ALWAYS || + (fw && reset == NFP_NSP_DRV_RESET_DISK); + + if (do_reset) { + dev_info(&pdev->dev, "Soft-resetting the NFP\n"); + err = nfp_nsp_device_soft_reset(nsp); + if (err < 0) { + dev_err(&pdev->dev, + "Failed to soft reset the NFP: %d\n", err); + goto exit_release_fw; + } + } + + if (fw && policy != NFP_NSP_APP_FW_LOAD_FLASH) { + if (nfp_nsp_has_fw_loaded(nsp) && nfp_nsp_fw_loaded(nsp)) + goto exit_release_fw; + + err = nfp_nsp_load_fw(nsp, fw); + if (err < 0) { + dev_err(&pdev->dev, "FW loading failed: %d\n", + err); + goto exit_release_fw; + } + dev_info(&pdev->dev, "Finished loading FW image\n"); + fw_loaded = true; + } else if (policy != NFP_NSP_APP_FW_LOAD_DISK && + nfp_nsp_has_stored_fw_load(nsp)) { + + /* Don't propagate this error to stick with legacy driver + * behavior, failure will be detected later during init. + */ + if (!nfp_nsp_load_stored_fw(nsp)) + dev_info(&pdev->dev, "Finished loading stored FW image\n"); + + /* Don't flag the fw_loaded in this case since other devices + * may reuse the firmware when configured this way + */ + } else { + dev_warn(&pdev->dev, "Didn't load firmware, please update flash or reconfigure card\n"); + } + +exit_release_fw: + release_firmware(fw); + + /* We don't want to unload firmware when other devices may still be + * dependent on it, which could be the case if there are multiple + * devices that could load firmware. + */ + if (fw_loaded && ifcs == 1) + pf->unload_fw_on_remove = true; + + return err < 0 ? err : fw_loaded; +} + +static void +nfp_nsp_init_ports(struct pci_dev *pdev, struct nfp_pf *pf, + struct nfp_nsp *nsp) +{ + bool needs_reinit = false; + int i; + + pf->eth_tbl = __nfp_eth_read_ports(pf->cpp, nsp); + if (!pf->eth_tbl) + return; + + if (!nfp_nsp_has_mac_reinit(nsp)) + return; + + for (i = 0; i < pf->eth_tbl->count; i++) + needs_reinit |= pf->eth_tbl->ports[i].override_changed; + if (!needs_reinit) + return; + + kfree(pf->eth_tbl); + if (nfp_nsp_mac_reinit(nsp)) + dev_warn(&pdev->dev, "MAC reinit failed\n"); + + pf->eth_tbl = __nfp_eth_read_ports(pf->cpp, nsp); +} + +static int nfp_nsp_init(struct pci_dev *pdev, struct nfp_pf *pf) +{ + struct nfp_nsp *nsp; + int err; + + err = nfp_resource_wait(pf->cpp, NFP_RESOURCE_NSP, 30); + if (err) + return err; + + nsp = nfp_nsp_open(pf->cpp); + if (IS_ERR(nsp)) { + err = PTR_ERR(nsp); + dev_err(&pdev->dev, "Failed to access the NSP: %d\n", err); + return err; + } + + err = nfp_nsp_wait(nsp); + if (err < 0) + goto exit_close_nsp; + + nfp_nsp_init_ports(pdev, pf, nsp); + + pf->nspi = __nfp_nsp_identify(nsp); + if (pf->nspi) + dev_info(&pdev->dev, "BSP: %s\n", pf->nspi->version); + + err = nfp_fw_load(pdev, pf, nsp); + if (err < 0) { + kfree(pf->nspi); + kfree(pf->eth_tbl); + dev_err(&pdev->dev, "Failed to load FW\n"); + goto exit_close_nsp; + } + + pf->fw_loaded = !!err; + err = 0; + +exit_close_nsp: + nfp_nsp_close(nsp); + + return err; +} + +static void nfp_fw_unload(struct nfp_pf *pf) +{ + struct nfp_nsp *nsp; + int err; + + nsp = nfp_nsp_open(pf->cpp); + if (IS_ERR(nsp)) { + nfp_err(pf->cpp, "Reset failed, can't open NSP\n"); + return; + } + + err = nfp_nsp_device_soft_reset(nsp); + if (err < 0) + dev_warn(&pf->pdev->dev, "Couldn't unload firmware: %d\n", err); + else + dev_info(&pf->pdev->dev, "Firmware safely unloaded\n"); + + nfp_nsp_close(nsp); +} + +static int nfp_pf_find_rtsyms(struct nfp_pf *pf) +{ + char pf_symbol[256]; + unsigned int pf_id; + + pf_id = nfp_cppcore_pcie_unit(pf->cpp); + + /* Optional per-PCI PF mailbox */ + snprintf(pf_symbol, sizeof(pf_symbol), NFP_MBOX_SYM_NAME, pf_id); + pf->mbox = nfp_rtsym_lookup(pf->rtbl, pf_symbol); + if (pf->mbox && nfp_rtsym_size(pf->mbox) < NFP_MBOX_SYM_MIN_SIZE) { + nfp_err(pf->cpp, "PF mailbox symbol too small: %llu < %d\n", + nfp_rtsym_size(pf->mbox), NFP_MBOX_SYM_MIN_SIZE); + return -EINVAL; + } + + return 0; +} + +int nfp_net_pf_get_app_id(struct nfp_pf *pf) +{ + return nfp_pf_rtsym_read_optional(pf, "_pf%u_net_app_id", + NFP_APP_CORE_NIC); +} + +static u64 nfp_net_pf_get_app_cap(struct nfp_pf *pf) +{ + char name[32]; + int err = 0; + u64 val; + + snprintf(name, sizeof(name), "_pf%u_net_app_cap", nfp_cppcore_pcie_unit(pf->cpp)); + + val = nfp_rtsym_read_le(pf->rtbl, name, &err); + if (err) { + if (err != -ENOENT) + nfp_err(pf->cpp, "Unable to read symbol %s\n", name); + + return 0; + } + + return val; +} + +static void nfp_pf_cfg_hwinfo(struct nfp_pf *pf) +{ + struct nfp_nsp *nsp; + char hwinfo[32]; + bool sp_indiff; + int err; + + nsp = nfp_nsp_open(pf->cpp); + if (IS_ERR(nsp)) + return; + + if (!nfp_nsp_has_hwinfo_set(nsp)) + goto end; + + sp_indiff = (nfp_net_pf_get_app_id(pf) == NFP_APP_FLOWER_NIC) || + (nfp_net_pf_get_app_cap(pf) & NFP_NET_APP_CAP_SP_INDIFF); + + /* No need to clean `sp_indiff` in driver, management firmware + * will do it when application firmware is unloaded. + */ + snprintf(hwinfo, sizeof(hwinfo), "sp_indiff=%d", sp_indiff); + err = nfp_nsp_hwinfo_set(nsp, hwinfo, sizeof(hwinfo)); + /* Not a fatal error, no need to return error to stop driver from loading */ + if (err) { + nfp_warn(pf->cpp, "HWinfo(sp_indiff=%d) set failed: %d\n", sp_indiff, err); + } else { + /* Need reinit eth_tbl since the eth table state may change + * after sp_indiff is configured. + */ + kfree(pf->eth_tbl); + pf->eth_tbl = __nfp_eth_read_ports(pf->cpp, nsp); + } + +end: + nfp_nsp_close(nsp); +} + +static int nfp_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *pci_id) +{ + const struct nfp_dev_info *dev_info; + struct devlink *devlink; + struct nfp_pf *pf; + int err; + + if ((pdev->vendor == PCI_VENDOR_ID_NETRONOME || + pdev->vendor == PCI_VENDOR_ID_CORIGINE) && + (pdev->device == PCI_DEVICE_ID_NFP3800_VF || + pdev->device == PCI_DEVICE_ID_NFP6000_VF)) + dev_warn(&pdev->dev, "Binding NFP VF device to the NFP PF driver, the VF driver is called 'nfp_netvf'\n"); + + dev_info = &nfp_dev_info[pci_id->driver_data]; + + err = pci_enable_device(pdev); + if (err < 0) + return err; + + pci_set_master(pdev); + + err = dma_set_mask_and_coherent(&pdev->dev, dev_info->dma_mask); + if (err) + goto err_pci_disable; + + err = pci_request_regions(pdev, nfp_driver_name); + if (err < 0) { + dev_err(&pdev->dev, "Unable to reserve pci resources.\n"); + goto err_pci_disable; + } + + devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf), &pdev->dev); + if (!devlink) { + err = -ENOMEM; + goto err_rel_regions; + } + pf = devlink_priv(devlink); + INIT_LIST_HEAD(&pf->vnics); + INIT_LIST_HEAD(&pf->ports); + pci_set_drvdata(pdev, pf); + pf->pdev = pdev; + pf->dev_info = dev_info; + + pf->wq = alloc_workqueue("nfp-%s", 0, 2, pci_name(pdev)); + if (!pf->wq) { + err = -ENOMEM; + goto err_pci_priv_unset; + } + + pf->cpp = nfp_cpp_from_nfp6000_pcie(pdev, dev_info); + if (IS_ERR(pf->cpp)) { + err = PTR_ERR(pf->cpp); + goto err_disable_msix; + } + + err = nfp_resource_table_init(pf->cpp); + if (err) + goto err_cpp_free; + + pf->hwinfo = nfp_hwinfo_read(pf->cpp); + + dev_info(&pdev->dev, "Assembly: %s%s%s-%s CPLD: %s\n", + nfp_hwinfo_lookup(pf->hwinfo, "assembly.vendor"), + nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno"), + nfp_hwinfo_lookup(pf->hwinfo, "assembly.serial"), + nfp_hwinfo_lookup(pf->hwinfo, "assembly.revision"), + nfp_hwinfo_lookup(pf->hwinfo, "cpld.version")); + + err = nfp_pf_board_state_wait(pf); + if (err) + goto err_hwinfo_free; + + err = nfp_nsp_init(pdev, pf); + if (err) + goto err_hwinfo_free; + + pf->mip = nfp_mip_open(pf->cpp); + pf->rtbl = __nfp_rtsym_table_read(pf->cpp, pf->mip); + + err = nfp_pf_find_rtsyms(pf); + if (err) + goto err_fw_unload; + + pf->dump_flag = NFP_DUMP_NSP_DIAG; + pf->dumpspec = nfp_net_dump_load_dumpspec(pf->cpp, pf->rtbl); + + err = nfp_pcie_sriov_read_nfd_limit(pf); + if (err) + goto err_fw_unload; + + pf->num_vfs = pci_num_vf(pdev); + if (pf->num_vfs > pf->limit_vfs) { + dev_err(&pdev->dev, + "Error: %d VFs already enabled, but loaded FW can only support %d\n", + pf->num_vfs, pf->limit_vfs); + err = -EINVAL; + goto err_fw_unload; + } + + nfp_pf_cfg_hwinfo(pf); + + err = nfp_net_pci_probe(pf); + if (err) + goto err_fw_unload; + + err = nfp_hwmon_register(pf); + if (err) { + dev_err(&pdev->dev, "Failed to register hwmon info\n"); + goto err_net_remove; + } + + return 0; + +err_net_remove: + nfp_net_pci_remove(pf); +err_fw_unload: + kfree(pf->rtbl); + nfp_mip_close(pf->mip); + if (pf->unload_fw_on_remove) + nfp_fw_unload(pf); + kfree(pf->eth_tbl); + kfree(pf->nspi); + vfree(pf->dumpspec); +err_hwinfo_free: + kfree(pf->hwinfo); +err_cpp_free: + nfp_cpp_free(pf->cpp); +err_disable_msix: + destroy_workqueue(pf->wq); +err_pci_priv_unset: + pci_set_drvdata(pdev, NULL); + devlink_free(devlink); +err_rel_regions: + pci_release_regions(pdev); +err_pci_disable: + pci_disable_device(pdev); + + return err; +} + +static void __nfp_pci_shutdown(struct pci_dev *pdev, bool unload_fw) +{ + struct nfp_pf *pf; + + pf = pci_get_drvdata(pdev); + if (!pf) + return; + + nfp_hwmon_unregister(pf); + + nfp_pcie_sriov_disable(pdev); + + nfp_net_pci_remove(pf); + + vfree(pf->dumpspec); + kfree(pf->rtbl); + nfp_mip_close(pf->mip); + if (unload_fw && pf->unload_fw_on_remove) + nfp_fw_unload(pf); + + destroy_workqueue(pf->wq); + pci_set_drvdata(pdev, NULL); + kfree(pf->hwinfo); + nfp_cpp_free(pf->cpp); + + kfree(pf->eth_tbl); + kfree(pf->nspi); + devlink_free(priv_to_devlink(pf)); + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +static void nfp_pci_remove(struct pci_dev *pdev) +{ + __nfp_pci_shutdown(pdev, true); +} + +static void nfp_pci_shutdown(struct pci_dev *pdev) +{ + __nfp_pci_shutdown(pdev, false); +} + +static struct pci_driver nfp_pci_driver = { + .name = nfp_driver_name, + .id_table = nfp_pci_device_ids, + .probe = nfp_pci_probe, + .remove = nfp_pci_remove, + .shutdown = nfp_pci_shutdown, + .sriov_configure = nfp_pcie_sriov_configure, +}; + +static int __init nfp_main_init(void) +{ + int err; + + pr_info("%s: NFP PCIe Driver, Copyright (C) 2014-2020 Netronome Systems\n", + nfp_driver_name); + pr_info("%s: NFP PCIe Driver, Copyright (C) 2021-2022 Corigine Inc.\n", + nfp_driver_name); + + nfp_net_debugfs_create(); + + err = pci_register_driver(&nfp_pci_driver); + if (err < 0) + goto err_destroy_debugfs; + + err = pci_register_driver(&nfp_netvf_pci_driver); + if (err) + goto err_unreg_pf; + + return err; + +err_unreg_pf: + pci_unregister_driver(&nfp_pci_driver); +err_destroy_debugfs: + nfp_net_debugfs_destroy(); + return err; +} + +static void __exit nfp_main_exit(void) +{ + pci_unregister_driver(&nfp_netvf_pci_driver); + pci_unregister_driver(&nfp_pci_driver); + nfp_net_debugfs_destroy(); +} + +module_init(nfp_main_init); +module_exit(nfp_main_exit); + +MODULE_FIRMWARE("netronome/nic_AMDA0058-0011_2x40.nffw"); +MODULE_FIRMWARE("netronome/nic_AMDA0058-0012_2x40.nffw"); +MODULE_FIRMWARE("netronome/nic_AMDA0081-0001_1x40.nffw"); +MODULE_FIRMWARE("netronome/nic_AMDA0081-0001_4x10.nffw"); +MODULE_FIRMWARE("netronome/nic_AMDA0096-0001_2x10.nffw"); +MODULE_FIRMWARE("netronome/nic_AMDA0097-0001_2x40.nffw"); +MODULE_FIRMWARE("netronome/nic_AMDA0097-0001_4x10_1x40.nffw"); +MODULE_FIRMWARE("netronome/nic_AMDA0097-0001_8x10.nffw"); +MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_2x10.nffw"); +MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_2x25.nffw"); +MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_1x10_1x25.nffw"); + +MODULE_AUTHOR("Corigine, Inc. <oss-drivers@corigine.com>"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("The Network Flow Processor (NFP) driver."); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h new file mode 100644 index 000000000..b9266cf72 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h @@ -0,0 +1,198 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +/* + * nfp_main.h + * Author: Jason McMullan <jason.mcmullan@netronome.com> + */ + +#ifndef NFP_MAIN_H +#define NFP_MAIN_H + +#include <linux/ethtool.h> +#include <linux/list.h> +#include <linux/types.h> +#include <linux/msi.h> +#include <linux/pci.h> +#include <linux/workqueue.h> +#include <net/devlink.h> + +struct dentry; +struct device; +struct pci_dev; + +struct nfp_cpp; +struct nfp_cpp_area; +struct nfp_eth_table; +struct nfp_hwinfo; +struct nfp_mip; +struct nfp_net; +struct nfp_nsp_identify; +struct nfp_eth_media_buf; +struct nfp_port; +struct nfp_rtsym; +struct nfp_rtsym_table; +struct nfp_shared_buf; + +/** + * struct nfp_dumpspec - NFP FW dump specification structure + * @size: Size of the data + * @data: Sequence of TLVs, each being an instruction to dump some data + * from FW + */ +struct nfp_dumpspec { + u32 size; + u8 data[]; +}; + +/** + * struct nfp_pf - NFP PF-specific device structure + * @pdev: Backpointer to PCI device + * @dev_info: NFP ASIC params + * @cpp: Pointer to the CPP handle + * @app: Pointer to the APP handle + * @data_vnic_bar: Pointer to the CPP area for the data vNICs' BARs + * @ctrl_vnic_bar: Pointer to the CPP area for the ctrl vNIC's BAR + * @qc_area: Pointer to the CPP area for the queues + * @mac_stats_bar: Pointer to the CPP area for the MAC stats + * @mac_stats_mem: Pointer to mapped MAC stats area + * @vf_cfg_bar: Pointer to the CPP area for the VF configuration BAR + * @vf_cfg_mem: Pointer to mapped VF configuration area + * @vfcfg_tbl2_area: Pointer to the CPP area for the VF config table + * @vfcfg_tbl2: Pointer to mapped VF config table + * @mbox: RTSym of per-PCI PF mailbox (under devlink lock) + * @irq_entries: Array of MSI-X entries for all vNICs + * @limit_vfs: Number of VFs supported by firmware (~0 for PCI limit) + * @num_vfs: Number of SR-IOV VFs enabled + * @fw_loaded: Is the firmware loaded? + * @unload_fw_on_remove:Do we need to unload firmware on driver removal? + * @ctrl_vnic: Pointer to the control vNIC if available + * @mip: MIP handle + * @rtbl: RTsym table + * @hwinfo: HWInfo table + * @dumpspec: Debug dump specification + * @dump_flag: Store dump flag between set_dump and get_dump_flag + * @dump_len: Store dump length between set_dump and get_dump_flag + * @eth_tbl: NSP ETH table + * @nspi: NSP identification info + * @hwmon_dev: pointer to hwmon device + * @ddir: Per-device debugfs directory + * @max_data_vnics: Number of data vNICs app firmware supports + * @num_vnics: Number of vNICs spawned + * @vnics: Linked list of vNIC structures (struct nfp_net) + * @ports: Linked list of port structures (struct nfp_port) + * @wq: Workqueue for running works which need to grab @lock + * @port_refresh_work: Work entry for taking netdevs out + * @shared_bufs: Array of shared buffer structures if FW has any SBs + * @num_shared_bufs: Number of elements in @shared_bufs + * + * Fields which may change after proble are protected by devlink instance lock. + */ +struct nfp_pf { + struct pci_dev *pdev; + const struct nfp_dev_info *dev_info; + + struct nfp_cpp *cpp; + + struct nfp_app *app; + + struct nfp_cpp_area *data_vnic_bar; + struct nfp_cpp_area *ctrl_vnic_bar; + struct nfp_cpp_area *qc_area; + struct nfp_cpp_area *mac_stats_bar; + u8 __iomem *mac_stats_mem; + struct nfp_cpp_area *vf_cfg_bar; + u8 __iomem *vf_cfg_mem; + struct nfp_cpp_area *vfcfg_tbl2_area; + u8 __iomem *vfcfg_tbl2; + + const struct nfp_rtsym *mbox; + + struct msix_entry *irq_entries; + + unsigned int limit_vfs; + unsigned int num_vfs; + + bool fw_loaded; + bool unload_fw_on_remove; + + struct nfp_net *ctrl_vnic; + + const struct nfp_mip *mip; + struct nfp_rtsym_table *rtbl; + struct nfp_hwinfo *hwinfo; + struct nfp_dumpspec *dumpspec; + u32 dump_flag; + u32 dump_len; + struct nfp_eth_table *eth_tbl; + struct nfp_nsp_identify *nspi; + + struct device *hwmon_dev; + + struct dentry *ddir; + + unsigned int max_data_vnics; + unsigned int num_vnics; + + struct list_head vnics; + struct list_head ports; + + struct workqueue_struct *wq; + struct work_struct port_refresh_work; + + struct nfp_shared_buf *shared_bufs; + unsigned int num_shared_bufs; +}; + +extern struct pci_driver nfp_netvf_pci_driver; + +extern const struct devlink_ops nfp_devlink_ops; + +int nfp_net_pci_probe(struct nfp_pf *pf); +void nfp_net_pci_remove(struct nfp_pf *pf); + +int nfp_hwmon_register(struct nfp_pf *pf); +void nfp_hwmon_unregister(struct nfp_pf *pf); + +void +nfp_net_get_mac_addr(struct nfp_pf *pf, struct net_device *netdev, + struct nfp_port *port); + +bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb); + +int nfp_pf_rtsym_read_optional(struct nfp_pf *pf, const char *format, + unsigned int default_val); +int nfp_net_pf_get_app_id(struct nfp_pf *pf); +u8 __iomem * +nfp_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt, + unsigned int min_size, struct nfp_cpp_area **area); +int nfp_mbox_cmd(struct nfp_pf *pf, u32 cmd, void *in_data, u64 in_length, + void *out_data, u64 out_length); +int nfp_flash_update_common(struct nfp_pf *pf, const struct firmware *fw, + struct netlink_ext_ack *extack); + +enum nfp_dump_diag { + NFP_DUMP_NSP_DIAG = 0, +}; + +struct nfp_dumpspec * +nfp_net_dump_load_dumpspec(struct nfp_cpp *cpp, struct nfp_rtsym_table *rtbl); +s64 nfp_net_dump_calculate_size(struct nfp_pf *pf, struct nfp_dumpspec *spec, + u32 flag); +int nfp_net_dump_populate_buffer(struct nfp_pf *pf, struct nfp_dumpspec *spec, + struct ethtool_dump *dump_param, void *dest); + +int nfp_shared_buf_register(struct nfp_pf *pf); +void nfp_shared_buf_unregister(struct nfp_pf *pf); +int nfp_shared_buf_pool_get(struct nfp_pf *pf, unsigned int sb, u16 pool_index, + struct devlink_sb_pool_info *pool_info); +int nfp_shared_buf_pool_set(struct nfp_pf *pf, unsigned int sb, + u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type); + +int nfp_devlink_params_register(struct nfp_pf *pf); +void nfp_devlink_params_unregister(struct nfp_pf *pf); + +unsigned int nfp_net_lr2speed(unsigned int linkrate); +unsigned int nfp_net_speed2lr(unsigned int speed); +#endif /* NFP_MAIN_H */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h new file mode 100644 index 000000000..a101ff30a --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -0,0 +1,984 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +/* + * nfp_net.h + * Declarations for Netronome network device driver. + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + */ + +#ifndef _NFP_NET_H_ +#define _NFP_NET_H_ + +#include <linux/atomic.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/pci.h> +#include <linux/dim.h> +#include <linux/io-64-nonatomic-hi-lo.h> +#include <linux/semaphore.h> +#include <linux/workqueue.h> +#include <net/xdp.h> + +#include "nfp_net_ctrl.h" + +#define nn_pr(nn, lvl, fmt, args...) \ + ({ \ + struct nfp_net *__nn = (nn); \ + \ + if (__nn->dp.netdev) \ + netdev_printk(lvl, __nn->dp.netdev, fmt, ## args); \ + else \ + dev_printk(lvl, __nn->dp.dev, "ctrl: " fmt, ## args); \ + }) + +#define nn_err(nn, fmt, args...) nn_pr(nn, KERN_ERR, fmt, ## args) +#define nn_warn(nn, fmt, args...) nn_pr(nn, KERN_WARNING, fmt, ## args) +#define nn_info(nn, fmt, args...) nn_pr(nn, KERN_INFO, fmt, ## args) +#define nn_dbg(nn, fmt, args...) nn_pr(nn, KERN_DEBUG, fmt, ## args) + +#define nn_dp_warn(dp, fmt, args...) \ + ({ \ + struct nfp_net_dp *__dp = (dp); \ + \ + if (unlikely(net_ratelimit())) { \ + if (__dp->netdev) \ + netdev_warn(__dp->netdev, fmt, ## args); \ + else \ + dev_warn(__dp->dev, fmt, ## args); \ + } \ + }) + +/* Max time to wait for NFP to respond on updates (in seconds) */ +#define NFP_NET_POLL_TIMEOUT 5 + +/* Interval for reading offloaded filter stats */ +#define NFP_NET_STAT_POLL_IVL msecs_to_jiffies(100) + +/* Bar allocation */ +#define NFP_NET_CTRL_BAR 0 +#define NFP_NET_Q0_BAR 2 +#define NFP_NET_Q1_BAR 4 /* OBSOLETE */ + +/* Default size for MTU and freelist buffer sizes */ +#define NFP_NET_DEFAULT_MTU 1500U + +/* Maximum number of bytes prepended to a packet */ +#define NFP_NET_MAX_PREPEND 64 + +/* Interrupt definitions */ +#define NFP_NET_NON_Q_VECTORS 2 +#define NFP_NET_IRQ_LSC_IDX 0 +#define NFP_NET_IRQ_EXN_IDX 1 +#define NFP_NET_MIN_VNIC_IRQS (NFP_NET_NON_Q_VECTORS + 1) + +/* Queue/Ring definitions */ +#define NFP_NET_MAX_TX_RINGS 64 /* Max. # of Tx rings per device */ +#define NFP_NET_MAX_RX_RINGS 64 /* Max. # of Rx rings per device */ +#define NFP_NET_MAX_R_VECS (NFP_NET_MAX_TX_RINGS > NFP_NET_MAX_RX_RINGS ? \ + NFP_NET_MAX_TX_RINGS : NFP_NET_MAX_RX_RINGS) +#define NFP_NET_MAX_IRQS (NFP_NET_NON_Q_VECTORS + NFP_NET_MAX_R_VECS) + +#define NFP_NET_TX_DESCS_DEFAULT 4096 /* Default # of Tx descs per ring */ +#define NFP_NET_RX_DESCS_DEFAULT 4096 /* Default # of Rx descs per ring */ + +#define NFP_NET_FL_BATCH 16 /* Add freelist in this Batch size */ +#define NFP_NET_XDP_MAX_COMPLETE 2048 /* XDP bufs to reclaim in NAPI poll */ + +/* Offload definitions */ +#define NFP_NET_N_VXLAN_PORTS (NFP_NET_CFG_VXLAN_SZ / sizeof(__be16)) + +#define NFP_NET_RX_BUF_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN) +#define NFP_NET_RX_BUF_NON_DATA (NFP_NET_RX_BUF_HEADROOM + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + +/* Forward declarations */ +struct nfp_cpp; +struct nfp_dev_info; +struct nfp_dp_ops; +struct nfp_eth_table_port; +struct nfp_net; +struct nfp_net_r_vector; +struct nfp_port; +struct xsk_buff_pool; + +struct nfp_nfd3_tx_desc; +struct nfp_nfd3_tx_buf; + +struct nfp_nfdk_tx_desc; +struct nfp_nfdk_tx_buf; + +/* Convenience macro for wrapping descriptor index on ring size */ +#define D_IDX(ring, idx) ((idx) & ((ring)->cnt - 1)) + +/* Convenience macro for writing dma address into RX/TX descriptors */ +#define nfp_desc_set_dma_addr_40b(desc, dma_addr) \ + do { \ + __typeof__(desc) __d = (desc); \ + dma_addr_t __addr = (dma_addr); \ + \ + __d->dma_addr_lo = cpu_to_le32(lower_32_bits(__addr)); \ + __d->dma_addr_hi = upper_32_bits(__addr) & 0xff; \ + } while (0) + +#define nfp_desc_set_dma_addr_48b(desc, dma_addr) \ + do { \ + __typeof__(desc) __d = (desc); \ + dma_addr_t __addr = (dma_addr); \ + \ + __d->dma_addr_hi = cpu_to_le16(upper_32_bits(__addr)); \ + __d->dma_addr_lo = cpu_to_le32(lower_32_bits(__addr)); \ + } while (0) + +/** + * struct nfp_net_tx_ring - TX ring structure + * @r_vec: Back pointer to ring vector structure + * @idx: Ring index from Linux's perspective + * @data_pending: number of bytes added to current block (NFDK only) + * @qcp_q: Pointer to base of the QCP TX queue + * @txrwb: TX pointer write back area + * @cnt: Size of the queue in number of descriptors + * @wr_p: TX ring write pointer (free running) + * @rd_p: TX ring read pointer (free running) + * @qcp_rd_p: Local copy of QCP TX queue read pointer + * @wr_ptr_add: Accumulated number of buffers to add to QCP write pointer + * (used for .xmit_more delayed kick) + * @txbufs: Array of transmitted TX buffers, to free on transmit (NFD3) + * @ktxbufs: Array of transmitted TX buffers, to free on transmit (NFDK) + * @txds: Virtual address of TX ring in host memory (NFD3) + * @ktxds: Virtual address of TX ring in host memory (NFDK) + * + * @qcidx: Queue Controller Peripheral (QCP) queue index for the TX queue + * @dma: DMA address of the TX ring + * @size: Size, in bytes, of the TX ring (needed to free) + * @is_xdp: Is this a XDP TX ring? + */ +struct nfp_net_tx_ring { + struct nfp_net_r_vector *r_vec; + + u16 idx; + u16 data_pending; + u8 __iomem *qcp_q; + u64 *txrwb; + + u32 cnt; + u32 wr_p; + u32 rd_p; + u32 qcp_rd_p; + + u32 wr_ptr_add; + + union { + struct nfp_nfd3_tx_buf *txbufs; + struct nfp_nfdk_tx_buf *ktxbufs; + }; + union { + struct nfp_nfd3_tx_desc *txds; + struct nfp_nfdk_tx_desc *ktxds; + }; + + /* Cold data follows */ + int qcidx; + + dma_addr_t dma; + size_t size; + bool is_xdp; +} ____cacheline_aligned; + +/* RX and freelist descriptor format */ + +#define PCIE_DESC_RX_DD BIT(7) +#define PCIE_DESC_RX_META_LEN_MASK GENMASK(6, 0) + +/* Flags in the RX descriptor */ +#define PCIE_DESC_RX_RSS cpu_to_le16(BIT(15)) +#define PCIE_DESC_RX_I_IP4_CSUM cpu_to_le16(BIT(14)) +#define PCIE_DESC_RX_I_IP4_CSUM_OK cpu_to_le16(BIT(13)) +#define PCIE_DESC_RX_I_TCP_CSUM cpu_to_le16(BIT(12)) +#define PCIE_DESC_RX_I_TCP_CSUM_OK cpu_to_le16(BIT(11)) +#define PCIE_DESC_RX_I_UDP_CSUM cpu_to_le16(BIT(10)) +#define PCIE_DESC_RX_I_UDP_CSUM_OK cpu_to_le16(BIT(9)) +#define PCIE_DESC_RX_DECRYPTED cpu_to_le16(BIT(8)) +#define PCIE_DESC_RX_EOP cpu_to_le16(BIT(7)) +#define PCIE_DESC_RX_IP4_CSUM cpu_to_le16(BIT(6)) +#define PCIE_DESC_RX_IP4_CSUM_OK cpu_to_le16(BIT(5)) +#define PCIE_DESC_RX_TCP_CSUM cpu_to_le16(BIT(4)) +#define PCIE_DESC_RX_TCP_CSUM_OK cpu_to_le16(BIT(3)) +#define PCIE_DESC_RX_UDP_CSUM cpu_to_le16(BIT(2)) +#define PCIE_DESC_RX_UDP_CSUM_OK cpu_to_le16(BIT(1)) +#define PCIE_DESC_RX_VLAN cpu_to_le16(BIT(0)) + +#define PCIE_DESC_RX_CSUM_ALL (PCIE_DESC_RX_IP4_CSUM | \ + PCIE_DESC_RX_TCP_CSUM | \ + PCIE_DESC_RX_UDP_CSUM | \ + PCIE_DESC_RX_I_IP4_CSUM | \ + PCIE_DESC_RX_I_TCP_CSUM | \ + PCIE_DESC_RX_I_UDP_CSUM) +#define PCIE_DESC_RX_CSUM_OK_SHIFT 1 +#define __PCIE_DESC_RX_CSUM_ALL le16_to_cpu(PCIE_DESC_RX_CSUM_ALL) +#define __PCIE_DESC_RX_CSUM_ALL_OK (__PCIE_DESC_RX_CSUM_ALL >> \ + PCIE_DESC_RX_CSUM_OK_SHIFT) + +struct nfp_net_rx_desc { + union { + struct { + __le16 dma_addr_hi; /* High bits of the buf address */ + u8 reserved; /* Must be zero */ + u8 meta_len_dd; /* Must be zero */ + + __le32 dma_addr_lo; /* Low bits of the buffer address */ + } __packed fld; + + struct { + __le16 data_len; /* Length of the frame + meta data */ + u8 reserved; + u8 meta_len_dd; /* Length of meta data prepended + + * descriptor done flag. + */ + + __le16 flags; /* RX flags. See @PCIE_DESC_RX_* */ + __le16 vlan; /* VLAN if stripped */ + } __packed rxd; + + __le32 vals[2]; + }; +}; + +#define NFP_NET_META_FIELD_MASK GENMASK(NFP_NET_META_FIELD_SIZE - 1, 0) +#define NFP_NET_VLAN_CTAG 0 +#define NFP_NET_VLAN_STAG 1 + +struct nfp_meta_parsed { + u8 hash_type; + u8 csum_type; + u32 hash; + u32 mark; + u32 portid; + __wsum csum; + struct { + bool stripped; + u8 tpid; + u16 tci; + } vlan; +}; + +struct nfp_net_rx_hash { + __be32 hash_type; + __be32 hash; +}; + +/** + * struct nfp_net_rx_buf - software RX buffer descriptor + * @frag: page fragment buffer + * @dma_addr: DMA mapping address of the buffer + */ +struct nfp_net_rx_buf { + void *frag; + dma_addr_t dma_addr; +}; + +/** + * struct nfp_net_xsk_rx_buf - software RX XSK buffer descriptor + * @dma_addr: DMA mapping address of the buffer + * @xdp: XSK buffer pool handle (for AF_XDP) + */ +struct nfp_net_xsk_rx_buf { + dma_addr_t dma_addr; + struct xdp_buff *xdp; +}; + +/** + * struct nfp_net_rx_ring - RX ring structure + * @r_vec: Back pointer to ring vector structure + * @cnt: Size of the queue in number of descriptors + * @wr_p: FL/RX ring write pointer (free running) + * @rd_p: FL/RX ring read pointer (free running) + * @idx: Ring index from Linux's perspective + * @fl_qcidx: Queue Controller Peripheral (QCP) queue index for the freelist + * @qcp_fl: Pointer to base of the QCP freelist queue + * @rxbufs: Array of transmitted FL/RX buffers + * @xsk_rxbufs: Array of transmitted FL/RX buffers (for AF_XDP) + * @rxds: Virtual address of FL/RX ring in host memory + * @xdp_rxq: RX-ring info avail for XDP + * @dma: DMA address of the FL/RX ring + * @size: Size, in bytes, of the FL/RX ring (needed to free) + */ +struct nfp_net_rx_ring { + struct nfp_net_r_vector *r_vec; + + u32 cnt; + u32 wr_p; + u32 rd_p; + + u32 idx; + + int fl_qcidx; + u8 __iomem *qcp_fl; + + struct nfp_net_rx_buf *rxbufs; + struct nfp_net_xsk_rx_buf *xsk_rxbufs; + struct nfp_net_rx_desc *rxds; + + struct xdp_rxq_info xdp_rxq; + + dma_addr_t dma; + size_t size; +} ____cacheline_aligned; + +/** + * struct nfp_net_r_vector - Per ring interrupt vector configuration + * @nfp_net: Backpointer to nfp_net structure + * @napi: NAPI structure for this ring vec + * @tasklet: ctrl vNIC, tasklet for servicing the r_vec + * @queue: ctrl vNIC, send queue + * @lock: ctrl vNIC, r_vec lock protects @queue + * @tx_ring: Pointer to TX ring + * @rx_ring: Pointer to RX ring + * @xdp_ring: Pointer to an extra TX ring for XDP + * @xsk_pool: XSK buffer pool active on vector queue pair (for AF_XDP) + * @irq_entry: MSI-X table entry (use for talking to the device) + * @event_ctr: Number of interrupt + * @rx_dim: Dynamic interrupt moderation structure for RX + * @tx_dim: Dynamic interrupt moderation structure for TX + * @rx_sync: Seqlock for atomic updates of RX stats + * @rx_pkts: Number of received packets + * @rx_bytes: Number of received bytes + * @rx_drops: Number of packets dropped on RX due to lack of resources + * @hw_csum_rx_ok: Counter of packets where the HW checksum was OK + * @hw_csum_rx_inner_ok: Counter of packets where the inner HW checksum was OK + * @hw_csum_rx_complete: Counter of packets with CHECKSUM_COMPLETE reported + * @hw_csum_rx_error: Counter of packets with bad checksums + * @hw_tls_rx: Number of packets with TLS decrypted by hardware + * @tx_sync: Seqlock for atomic updates of TX stats + * @tx_pkts: Number of Transmitted packets + * @tx_bytes: Number of Transmitted bytes + * @hw_csum_tx: Counter of packets with TX checksum offload requested + * @hw_csum_tx_inner: Counter of inner TX checksum offload requests + * @tx_gather: Counter of packets with Gather DMA + * @tx_lso: Counter of LSO packets sent + * @hw_tls_tx: Counter of TLS packets sent with crypto offloaded to HW + * @tls_tx_fallback: Counter of TLS packets sent which had to be encrypted + * by the fallback path because packets came out of order + * @tls_tx_no_fallback: Counter of TLS packets not sent because the fallback + * path could not encrypt them + * @tx_errors: How many TX errors were encountered + * @tx_busy: How often was TX busy (no space)? + * @rx_replace_buf_alloc_fail: Counter of RX buffer allocation failures + * @irq_vector: Interrupt vector number (use for talking to the OS) + * @handler: Interrupt handler for this ring vector + * @name: Name of the interrupt vector + * @affinity_mask: SMP affinity mask for this vector + * + * This structure ties RX and TX rings to interrupt vectors and a NAPI + * context. This currently only supports one RX and TX ring per + * interrupt vector but might be extended in the future to allow + * association of multiple rings per vector. + */ +struct nfp_net_r_vector { + struct nfp_net *nfp_net; + union { + struct napi_struct napi; + struct { + struct tasklet_struct tasklet; + struct sk_buff_head queue; + spinlock_t lock; + }; + }; + + struct nfp_net_tx_ring *tx_ring; + struct nfp_net_rx_ring *rx_ring; + + u16 irq_entry; + + u16 event_ctr; + struct dim rx_dim; + struct dim tx_dim; + + struct u64_stats_sync rx_sync; + u64 rx_pkts; + u64 rx_bytes; + u64 rx_drops; + u64 hw_csum_rx_ok; + u64 hw_csum_rx_inner_ok; + u64 hw_csum_rx_complete; + u64 hw_tls_rx; + + u64 hw_csum_rx_error; + u64 rx_replace_buf_alloc_fail; + + struct nfp_net_tx_ring *xdp_ring; + struct xsk_buff_pool *xsk_pool; + + struct u64_stats_sync tx_sync; + u64 tx_pkts; + u64 tx_bytes; + + u64 ____cacheline_aligned_in_smp hw_csum_tx; + u64 hw_csum_tx_inner; + u64 tx_gather; + u64 tx_lso; + u64 hw_tls_tx; + + u64 tls_tx_fallback; + u64 tls_tx_no_fallback; + u64 tx_errors; + u64 tx_busy; + + /* Cold data follows */ + + u32 irq_vector; + irq_handler_t handler; + char name[IFNAMSIZ + 8]; + cpumask_t affinity_mask; +} ____cacheline_aligned; + +/* Firmware version as it is written in the 32bit value in the BAR */ +struct nfp_net_fw_version { + u8 minor; + u8 major; + u8 class; + + /* This byte can be exploited for more use, currently, + * BIT0: dp type, BIT[7:1]: reserved + */ + u8 extend; +} __packed; + +static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, + u8 extend, u8 class, u8 major, u8 minor) +{ + return fw_ver->extend == extend && + fw_ver->class == class && + fw_ver->major == major && + fw_ver->minor == minor; +} + +struct nfp_stat_pair { + u64 pkts; + u64 bytes; +}; + +/** + * struct nfp_net_dp - NFP network device datapath data structure + * @dev: Backpointer to struct device + * @netdev: Backpointer to net_device structure + * @is_vf: Is the driver attached to a VF? + * @chained_metadata_format: Firemware will use new metadata format + * @ktls_tx: Is kTLS TX enabled? + * @rx_dma_dir: Mapping direction for RX buffers + * @rx_dma_off: Offset at which DMA packets (for XDP headroom) + * @rx_offset: Offset in the RX buffers where packet data starts + * @ctrl: Local copy of the control register/word. + * @fl_bufsz: Currently configured size of the freelist buffers + * @xdp_prog: Installed XDP program + * @tx_rings: Array of pre-allocated TX ring structures + * @rx_rings: Array of pre-allocated RX ring structures + * @ctrl_bar: Pointer to mapped control BAR + * + * @ops: Callbacks and parameters for this vNIC's NFD version + * @txrwb: TX pointer write back area (indexed by queue id) + * @txrwb_dma: TX pointer write back area DMA address + * @txd_cnt: Size of the TX ring in number of min size packets + * @rxd_cnt: Size of the RX ring in number of min size packets + * @num_r_vecs: Number of used ring vectors + * @num_tx_rings: Currently configured number of TX rings + * @num_stack_tx_rings: Number of TX rings used by the stack (not XDP) + * @num_rx_rings: Currently configured number of RX rings + * @mtu: Device MTU + * @xsk_pools: XSK buffer pools, @max_r_vecs in size (for AF_XDP). + */ +struct nfp_net_dp { + struct device *dev; + struct net_device *netdev; + + u8 is_vf:1; + u8 chained_metadata_format:1; + u8 ktls_tx:1; + + u8 rx_dma_dir; + u8 rx_offset; + + u32 rx_dma_off; + + u32 ctrl; + u32 fl_bufsz; + + struct bpf_prog *xdp_prog; + + struct nfp_net_tx_ring *tx_rings; + struct nfp_net_rx_ring *rx_rings; + + u8 __iomem *ctrl_bar; + + /* Cold data follows */ + + const struct nfp_dp_ops *ops; + + u64 *txrwb; + dma_addr_t txrwb_dma; + + unsigned int txd_cnt; + unsigned int rxd_cnt; + + unsigned int num_r_vecs; + + unsigned int num_tx_rings; + unsigned int num_stack_tx_rings; + unsigned int num_rx_rings; + + unsigned int mtu; + + struct xsk_buff_pool **xsk_pools; +}; + +/** + * struct nfp_net - NFP network device structure + * @dp: Datapath structure + * @dev_info: NFP ASIC params + * @id: vNIC id within the PF (0 for VFs) + * @fw_ver: Firmware version + * @cap: Capabilities advertised by the Firmware + * @max_mtu: Maximum support MTU advertised by the Firmware + * @rss_hfunc: RSS selected hash function + * @rss_cfg: RSS configuration + * @rss_key: RSS secret key + * @rss_itbl: RSS indirection table + * @xdp: Information about the driver XDP program + * @xdp_hw: Information about the HW XDP program + * @max_r_vecs: Number of allocated interrupt vectors for RX/TX + * @max_tx_rings: Maximum number of TX rings supported by the Firmware + * @max_rx_rings: Maximum number of RX rings supported by the Firmware + * @stride_rx: Queue controller RX queue spacing + * @stride_tx: Queue controller TX queue spacing + * @r_vecs: Pre-allocated array of ring vectors + * @irq_entries: Pre-allocated array of MSI-X entries + * @lsc_handler: Handler for Link State Change interrupt + * @lsc_name: Name for Link State Change interrupt + * @exn_handler: Handler for Exception interrupt + * @exn_name: Name for Exception interrupt + * @shared_handler: Handler for shared interrupts + * @shared_name: Name for shared interrupt + * @reconfig_lock: Protects @reconfig_posted, @reconfig_timer_active, + * @reconfig_sync_present and HW reconfiguration request + * regs/machinery from async requests (sync must take + * @bar_lock) + * @reconfig_posted: Pending reconfig bits coming from async sources + * @reconfig_timer_active: Timer for reading reconfiguration results is pending + * @reconfig_sync_present: Some thread is performing synchronous reconfig + * @reconfig_timer: Timer for async reading of reconfig results + * @reconfig_in_progress_update: Update FW is processing now (debug only) + * @bar_lock: vNIC config BAR access lock, protects: update, + * mailbox area, crypto TLV + * @link_up: Is the link up? + * @link_status_lock: Protects @link_* and ensures atomicity with BAR reading + * @rx_coalesce_adapt_on: Is RX interrupt moderation adaptive? + * @tx_coalesce_adapt_on: Is TX interrupt moderation adaptive? + * @rx_coalesce_usecs: RX interrupt moderation usecs delay parameter + * @rx_coalesce_max_frames: RX interrupt moderation frame count parameter + * @tx_coalesce_usecs: TX interrupt moderation usecs delay parameter + * @tx_coalesce_max_frames: TX interrupt moderation frame count parameter + * @qcp_cfg: Pointer to QCP queue used for configuration notification + * @tx_bar: Pointer to mapped TX queues + * @rx_bar: Pointer to mapped FL/RX queues + * @tlv_caps: Parsed TLV capabilities + * @ktls_tx_conn_cnt: Number of offloaded kTLS TX connections + * @ktls_rx_conn_cnt: Number of offloaded kTLS RX connections + * @ktls_conn_id_gen: Trivial generator for kTLS connection ids (for TX) + * @ktls_no_space: Counter of firmware rejecting kTLS connection due to + * lack of space + * @ktls_rx_resync_req: Counter of TLS RX resync requested + * @ktls_rx_resync_ign: Counter of TLS RX resync requests ignored + * @ktls_rx_resync_sent: Counter of TLS RX resync completed + * @mbox_cmsg: Common Control Message via vNIC mailbox state + * @mbox_cmsg.queue: CCM mbox queue of pending messages + * @mbox_cmsg.wq: CCM mbox wait queue of waiting processes + * @mbox_cmsg.workq: CCM mbox work queue for @wait_work and @runq_work + * @mbox_cmsg.wait_work: CCM mbox posted msg reconfig wait work + * @mbox_cmsg.runq_work: CCM mbox posted msg queue runner work + * @mbox_cmsg.tag: CCM mbox message tag allocator + * @debugfs_dir: Device directory in debugfs + * @vnic_list: Entry on device vNIC list + * @pdev: Backpointer to PCI device + * @app: APP handle if available + * @vnic_no_name: For non-port PF vNIC make ndo_get_phys_port_name return + * -EOPNOTSUPP to keep backwards compatibility (set by app) + * @port: Pointer to nfp_port structure if vNIC is a port + * @app_priv: APP private data for this vNIC + */ +struct nfp_net { + struct nfp_net_dp dp; + + const struct nfp_dev_info *dev_info; + struct nfp_net_fw_version fw_ver; + + u32 id; + + u32 cap; + u32 max_mtu; + + u8 rss_hfunc; + u32 rss_cfg; + u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ]; + u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ]; + + struct xdp_attachment_info xdp; + struct xdp_attachment_info xdp_hw; + + unsigned int max_tx_rings; + unsigned int max_rx_rings; + + int stride_tx; + int stride_rx; + + unsigned int max_r_vecs; + struct nfp_net_r_vector r_vecs[NFP_NET_MAX_R_VECS]; + struct msix_entry irq_entries[NFP_NET_MAX_IRQS]; + + irq_handler_t lsc_handler; + char lsc_name[IFNAMSIZ + 8]; + + irq_handler_t exn_handler; + char exn_name[IFNAMSIZ + 8]; + + irq_handler_t shared_handler; + char shared_name[IFNAMSIZ + 8]; + + bool link_up; + spinlock_t link_status_lock; + + spinlock_t reconfig_lock; + u32 reconfig_posted; + bool reconfig_timer_active; + bool reconfig_sync_present; + struct timer_list reconfig_timer; + u32 reconfig_in_progress_update; + + struct semaphore bar_lock; + + bool rx_coalesce_adapt_on; + bool tx_coalesce_adapt_on; + u32 rx_coalesce_usecs; + u32 rx_coalesce_max_frames; + u32 tx_coalesce_usecs; + u32 tx_coalesce_max_frames; + + u8 __iomem *qcp_cfg; + + u8 __iomem *tx_bar; + u8 __iomem *rx_bar; + + struct nfp_net_tlv_caps tlv_caps; + + unsigned int ktls_tx_conn_cnt; + unsigned int ktls_rx_conn_cnt; + + atomic64_t ktls_conn_id_gen; + + atomic_t ktls_no_space; + atomic_t ktls_rx_resync_req; + atomic_t ktls_rx_resync_ign; + atomic_t ktls_rx_resync_sent; + + struct { + struct sk_buff_head queue; + wait_queue_head_t wq; + struct workqueue_struct *workq; + struct work_struct wait_work; + struct work_struct runq_work; + u16 tag; + } mbox_cmsg; + + struct dentry *debugfs_dir; + + struct list_head vnic_list; + + struct pci_dev *pdev; + struct nfp_app *app; + + bool vnic_no_name; + + struct nfp_port *port; + + void *app_priv; +}; + +/* Functions to read/write from/to a BAR + * Performs any endian conversion necessary. + */ +static inline u16 nn_readb(struct nfp_net *nn, int off) +{ + return readb(nn->dp.ctrl_bar + off); +} + +static inline void nn_writeb(struct nfp_net *nn, int off, u8 val) +{ + writeb(val, nn->dp.ctrl_bar + off); +} + +static inline u16 nn_readw(struct nfp_net *nn, int off) +{ + return readw(nn->dp.ctrl_bar + off); +} + +static inline void nn_writew(struct nfp_net *nn, int off, u16 val) +{ + writew(val, nn->dp.ctrl_bar + off); +} + +static inline u32 nn_readl(struct nfp_net *nn, int off) +{ + return readl(nn->dp.ctrl_bar + off); +} + +static inline void nn_writel(struct nfp_net *nn, int off, u32 val) +{ + writel(val, nn->dp.ctrl_bar + off); +} + +static inline u64 nn_readq(struct nfp_net *nn, int off) +{ + return readq(nn->dp.ctrl_bar + off); +} + +static inline void nn_writeq(struct nfp_net *nn, int off, u64 val) +{ + writeq(val, nn->dp.ctrl_bar + off); +} + +/* Flush posted PCI writes by reading something without side effects */ +static inline void nn_pci_flush(struct nfp_net *nn) +{ + nn_readl(nn, NFP_NET_CFG_VERSION); +} + +/* Queue Controller Peripheral access functions and definitions. + * + * Some of the BARs of the NFP are mapped to portions of the Queue + * Controller Peripheral (QCP) address space on the NFP. A QCP queue + * has a read and a write pointer (as well as a size and flags, + * indicating overflow etc). The QCP offers a number of different + * operation on queue pointers, but here we only offer function to + * either add to a pointer or to read the pointer value. + */ +#define NFP_QCP_QUEUE_ADDR_SZ 0x800 +#define NFP_QCP_QUEUE_OFF(_x) ((_x) * NFP_QCP_QUEUE_ADDR_SZ) +#define NFP_QCP_QUEUE_ADD_RPTR 0x0000 +#define NFP_QCP_QUEUE_ADD_WPTR 0x0004 +#define NFP_QCP_QUEUE_STS_LO 0x0008 +#define NFP_QCP_QUEUE_STS_LO_READPTR_mask 0x3ffff +#define NFP_QCP_QUEUE_STS_HI 0x000c +#define NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask 0x3ffff + +/* nfp_qcp_ptr - Read or Write Pointer of a queue */ +enum nfp_qcp_ptr { + NFP_QCP_READ_PTR = 0, + NFP_QCP_WRITE_PTR +}; + +/** + * nfp_qcp_rd_ptr_add() - Add the value to the read pointer of a queue + * + * @q: Base address for queue structure + * @val: Value to add to the queue pointer + */ +static inline void nfp_qcp_rd_ptr_add(u8 __iomem *q, u32 val) +{ + writel(val, q + NFP_QCP_QUEUE_ADD_RPTR); +} + +/** + * nfp_qcp_wr_ptr_add() - Add the value to the write pointer of a queue + * + * @q: Base address for queue structure + * @val: Value to add to the queue pointer + */ +static inline void nfp_qcp_wr_ptr_add(u8 __iomem *q, u32 val) +{ + writel(val, q + NFP_QCP_QUEUE_ADD_WPTR); +} + +static inline u32 _nfp_qcp_read(u8 __iomem *q, enum nfp_qcp_ptr ptr) +{ + u32 off; + u32 val; + + if (ptr == NFP_QCP_READ_PTR) + off = NFP_QCP_QUEUE_STS_LO; + else + off = NFP_QCP_QUEUE_STS_HI; + + val = readl(q + off); + + if (ptr == NFP_QCP_READ_PTR) + return val & NFP_QCP_QUEUE_STS_LO_READPTR_mask; + else + return val & NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask; +} + +/** + * nfp_qcp_rd_ptr_read() - Read the current read pointer value for a queue + * @q: Base address for queue structure + * + * Return: Value read. + */ +static inline u32 nfp_qcp_rd_ptr_read(u8 __iomem *q) +{ + return _nfp_qcp_read(q, NFP_QCP_READ_PTR); +} + +/** + * nfp_qcp_wr_ptr_read() - Read the current write pointer value for a queue + * @q: Base address for queue structure + * + * Return: Value read. + */ +static inline u32 nfp_qcp_wr_ptr_read(u8 __iomem *q) +{ + return _nfp_qcp_read(q, NFP_QCP_WRITE_PTR); +} + +u32 nfp_qcp_queue_offset(const struct nfp_dev_info *dev_info, u16 queue); + +static inline bool nfp_net_is_data_vnic(struct nfp_net *nn) +{ + WARN_ON_ONCE(!nn->dp.netdev && nn->port); + return !!nn->dp.netdev; +} + +static inline bool nfp_net_running(struct nfp_net *nn) +{ + return nn->dp.ctrl & NFP_NET_CFG_CTRL_ENABLE; +} + +static inline const char *nfp_net_name(struct nfp_net *nn) +{ + return nn->dp.netdev ? nn->dp.netdev->name : "ctrl"; +} + +static inline void nfp_ctrl_lock(struct nfp_net *nn) + __acquires(&nn->r_vecs[0].lock) +{ + spin_lock_bh(&nn->r_vecs[0].lock); +} + +static inline void nfp_ctrl_unlock(struct nfp_net *nn) + __releases(&nn->r_vecs[0].lock) +{ + spin_unlock_bh(&nn->r_vecs[0].lock); +} + +static inline void nn_ctrl_bar_lock(struct nfp_net *nn) +{ + down(&nn->bar_lock); +} + +static inline bool nn_ctrl_bar_trylock(struct nfp_net *nn) +{ + return !down_trylock(&nn->bar_lock); +} + +static inline void nn_ctrl_bar_unlock(struct nfp_net *nn) +{ + up(&nn->bar_lock); +} + +/* Globals */ +extern const char nfp_driver_version[]; + +extern const struct net_device_ops nfp_nfd3_netdev_ops; +extern const struct net_device_ops nfp_nfdk_netdev_ops; + +static inline bool nfp_netdev_is_nfp_net(struct net_device *netdev) +{ + return netdev->netdev_ops == &nfp_nfd3_netdev_ops || + netdev->netdev_ops == &nfp_nfdk_netdev_ops; +} + +static inline int nfp_net_coalesce_para_check(u32 usecs, u32 pkts) +{ + if ((usecs >= ((1 << 16) - 1)) || (pkts >= ((1 << 16) - 1))) + return -EINVAL; + + return 0; +} + +/* Prototypes */ +void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, + void __iomem *ctrl_bar); + +struct nfp_net * +nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info, + void __iomem *ctrl_bar, bool needs_netdev, + unsigned int max_tx_rings, unsigned int max_rx_rings); +void nfp_net_free(struct nfp_net *nn); + +int nfp_net_init(struct nfp_net *nn); +void nfp_net_clean(struct nfp_net *nn); + +int nfp_ctrl_open(struct nfp_net *nn); +void nfp_ctrl_close(struct nfp_net *nn); + +void nfp_net_set_ethtool_ops(struct net_device *netdev); +void nfp_net_info(struct nfp_net *nn); +int __nfp_net_reconfig(struct nfp_net *nn, u32 update); +int nfp_net_reconfig(struct nfp_net *nn, u32 update); +unsigned int nfp_net_rss_key_sz(struct nfp_net *nn); +void nfp_net_rss_write_itbl(struct nfp_net *nn); +void nfp_net_rss_write_key(struct nfp_net *nn); +void nfp_net_coalesce_write_cfg(struct nfp_net *nn); +int nfp_net_mbox_lock(struct nfp_net *nn, unsigned int data_size); +int nfp_net_mbox_reconfig(struct nfp_net *nn, u32 mbox_cmd); +int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd); +void nfp_net_mbox_reconfig_post(struct nfp_net *nn, u32 update); +int nfp_net_mbox_reconfig_wait_posted(struct nfp_net *nn); + +unsigned int +nfp_net_irqs_alloc(struct pci_dev *pdev, struct msix_entry *irq_entries, + unsigned int min_irqs, unsigned int want_irqs); +void nfp_net_irqs_disable(struct pci_dev *pdev); +void +nfp_net_irqs_assign(struct nfp_net *nn, struct msix_entry *irq_entries, + unsigned int n); +struct sk_buff * +nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + struct sk_buff *skb, u64 *tls_handle, int *nr_frags); +void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle); + +struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn); +int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *new, + struct netlink_ext_ack *extack); + +#ifdef CONFIG_NFP_DEBUG +void nfp_net_debugfs_create(void); +void nfp_net_debugfs_destroy(void); +struct dentry *nfp_net_debugfs_device_add(struct pci_dev *pdev); +void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir); +void nfp_net_debugfs_dir_clean(struct dentry **dir); +#else +static inline void nfp_net_debugfs_create(void) +{ +} + +static inline void nfp_net_debugfs_destroy(void) +{ +} + +static inline struct dentry *nfp_net_debugfs_device_add(struct pci_dev *pdev) +{ + return NULL; +} + +static inline void +nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir) +{ +} + +static inline void nfp_net_debugfs_dir_clean(struct dentry **dir) +{ +} +#endif /* CONFIG_NFP_DEBUG */ + +#endif /* _NFP_NET_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c new file mode 100644 index 000000000..27f4786ac --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -0,0 +1,2593 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2019 Netronome Systems, Inc. */ + +/* + * nfp_net_common.c + * Netronome network device driver: Common functions between PF and VF + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + * Brad Petrus <brad.petrus@netronome.com> + * Chris Telfer <chris.telfer@netronome.com> + */ + +#include <linux/bitfield.h> +#include <linux/bpf.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/interrupt.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/mm.h> +#include <linux/overflow.h> +#include <linux/page_ref.h> +#include <linux/pci.h> +#include <linux/pci_regs.h> +#include <linux/msi.h> +#include <linux/ethtool.h> +#include <linux/log2.h> +#include <linux/if_vlan.h> +#include <linux/if_bridge.h> +#include <linux/random.h> +#include <linux/vmalloc.h> +#include <linux/ktime.h> + +#include <net/tls.h> +#include <net/vxlan.h> +#include <net/xdp_sock_drv.h> + +#include "nfpcore/nfp_dev.h" +#include "nfpcore/nfp_nsp.h" +#include "ccm.h" +#include "nfp_app.h" +#include "nfp_net_ctrl.h" +#include "nfp_net.h" +#include "nfp_net_dp.h" +#include "nfp_net_sriov.h" +#include "nfp_net_xsk.h" +#include "nfp_port.h" +#include "crypto/crypto.h" +#include "crypto/fw.h" + +/** + * nfp_net_get_fw_version() - Read and parse the FW version + * @fw_ver: Output fw_version structure to read to + * @ctrl_bar: Mapped address of the control BAR + */ +void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, + void __iomem *ctrl_bar) +{ + u32 reg; + + reg = readl(ctrl_bar + NFP_NET_CFG_VERSION); + put_unaligned_le32(reg, fw_ver); +} + +u32 nfp_qcp_queue_offset(const struct nfp_dev_info *dev_info, u16 queue) +{ + queue &= dev_info->qc_idx_mask; + return dev_info->qc_addr_offset + NFP_QCP_QUEUE_ADDR_SZ * queue; +} + +/* Firmware reconfig + * + * Firmware reconfig may take a while so we have two versions of it - + * synchronous and asynchronous (posted). All synchronous callers are holding + * RTNL so we don't have to worry about serializing them. + */ +static void nfp_net_reconfig_start(struct nfp_net *nn, u32 update) +{ + nn_writel(nn, NFP_NET_CFG_UPDATE, update); + /* ensure update is written before pinging HW */ + nn_pci_flush(nn); + nfp_qcp_wr_ptr_add(nn->qcp_cfg, 1); + nn->reconfig_in_progress_update = update; +} + +/* Pass 0 as update to run posted reconfigs. */ +static void nfp_net_reconfig_start_async(struct nfp_net *nn, u32 update) +{ + update |= nn->reconfig_posted; + nn->reconfig_posted = 0; + + nfp_net_reconfig_start(nn, update); + + nn->reconfig_timer_active = true; + mod_timer(&nn->reconfig_timer, jiffies + NFP_NET_POLL_TIMEOUT * HZ); +} + +static bool nfp_net_reconfig_check_done(struct nfp_net *nn, bool last_check) +{ + u32 reg; + + reg = nn_readl(nn, NFP_NET_CFG_UPDATE); + if (reg == 0) + return true; + if (reg & NFP_NET_CFG_UPDATE_ERR) { + nn_err(nn, "Reconfig error (status: 0x%08x update: 0x%08x ctrl: 0x%08x)\n", + reg, nn->reconfig_in_progress_update, + nn_readl(nn, NFP_NET_CFG_CTRL)); + return true; + } else if (last_check) { + nn_err(nn, "Reconfig timeout (status: 0x%08x update: 0x%08x ctrl: 0x%08x)\n", + reg, nn->reconfig_in_progress_update, + nn_readl(nn, NFP_NET_CFG_CTRL)); + return true; + } + + return false; +} + +static bool __nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline) +{ + bool timed_out = false; + int i; + + /* Poll update field, waiting for NFP to ack the config. + * Do an opportunistic wait-busy loop, afterward sleep. + */ + for (i = 0; i < 50; i++) { + if (nfp_net_reconfig_check_done(nn, false)) + return false; + udelay(4); + } + + while (!nfp_net_reconfig_check_done(nn, timed_out)) { + usleep_range(250, 500); + timed_out = time_is_before_eq_jiffies(deadline); + } + + return timed_out; +} + +static int nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline) +{ + if (__nfp_net_reconfig_wait(nn, deadline)) + return -EIO; + + if (nn_readl(nn, NFP_NET_CFG_UPDATE) & NFP_NET_CFG_UPDATE_ERR) + return -EIO; + + return 0; +} + +static void nfp_net_reconfig_timer(struct timer_list *t) +{ + struct nfp_net *nn = from_timer(nn, t, reconfig_timer); + + spin_lock_bh(&nn->reconfig_lock); + + nn->reconfig_timer_active = false; + + /* If sync caller is present it will take over from us */ + if (nn->reconfig_sync_present) + goto done; + + /* Read reconfig status and report errors */ + nfp_net_reconfig_check_done(nn, true); + + if (nn->reconfig_posted) + nfp_net_reconfig_start_async(nn, 0); +done: + spin_unlock_bh(&nn->reconfig_lock); +} + +/** + * nfp_net_reconfig_post() - Post async reconfig request + * @nn: NFP Net device to reconfigure + * @update: The value for the update field in the BAR config + * + * Record FW reconfiguration request. Reconfiguration will be kicked off + * whenever reconfiguration machinery is idle. Multiple requests can be + * merged together! + */ +static void nfp_net_reconfig_post(struct nfp_net *nn, u32 update) +{ + spin_lock_bh(&nn->reconfig_lock); + + /* Sync caller will kick off async reconf when it's done, just post */ + if (nn->reconfig_sync_present) { + nn->reconfig_posted |= update; + goto done; + } + + /* Opportunistically check if the previous command is done */ + if (!nn->reconfig_timer_active || + nfp_net_reconfig_check_done(nn, false)) + nfp_net_reconfig_start_async(nn, update); + else + nn->reconfig_posted |= update; +done: + spin_unlock_bh(&nn->reconfig_lock); +} + +static void nfp_net_reconfig_sync_enter(struct nfp_net *nn) +{ + bool cancelled_timer = false; + u32 pre_posted_requests; + + spin_lock_bh(&nn->reconfig_lock); + + WARN_ON(nn->reconfig_sync_present); + nn->reconfig_sync_present = true; + + if (nn->reconfig_timer_active) { + nn->reconfig_timer_active = false; + cancelled_timer = true; + } + pre_posted_requests = nn->reconfig_posted; + nn->reconfig_posted = 0; + + spin_unlock_bh(&nn->reconfig_lock); + + if (cancelled_timer) { + del_timer_sync(&nn->reconfig_timer); + nfp_net_reconfig_wait(nn, nn->reconfig_timer.expires); + } + + /* Run the posted reconfigs which were issued before we started */ + if (pre_posted_requests) { + nfp_net_reconfig_start(nn, pre_posted_requests); + nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT); + } +} + +static void nfp_net_reconfig_wait_posted(struct nfp_net *nn) +{ + nfp_net_reconfig_sync_enter(nn); + + spin_lock_bh(&nn->reconfig_lock); + nn->reconfig_sync_present = false; + spin_unlock_bh(&nn->reconfig_lock); +} + +/** + * __nfp_net_reconfig() - Reconfigure the firmware + * @nn: NFP Net device to reconfigure + * @update: The value for the update field in the BAR config + * + * Write the update word to the BAR and ping the reconfig queue. The + * poll until the firmware has acknowledged the update by zeroing the + * update word. + * + * Return: Negative errno on error, 0 on success + */ +int __nfp_net_reconfig(struct nfp_net *nn, u32 update) +{ + int ret; + + nfp_net_reconfig_sync_enter(nn); + + nfp_net_reconfig_start(nn, update); + ret = nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT); + + spin_lock_bh(&nn->reconfig_lock); + + if (nn->reconfig_posted) + nfp_net_reconfig_start_async(nn, 0); + + nn->reconfig_sync_present = false; + + spin_unlock_bh(&nn->reconfig_lock); + + return ret; +} + +int nfp_net_reconfig(struct nfp_net *nn, u32 update) +{ + int ret; + + nn_ctrl_bar_lock(nn); + ret = __nfp_net_reconfig(nn, update); + nn_ctrl_bar_unlock(nn); + + return ret; +} + +int nfp_net_mbox_lock(struct nfp_net *nn, unsigned int data_size) +{ + if (nn->tlv_caps.mbox_len < NFP_NET_CFG_MBOX_SIMPLE_VAL + data_size) { + nn_err(nn, "mailbox too small for %u of data (%u)\n", + data_size, nn->tlv_caps.mbox_len); + return -EIO; + } + + nn_ctrl_bar_lock(nn); + return 0; +} + +/** + * nfp_net_mbox_reconfig() - Reconfigure the firmware via the mailbox + * @nn: NFP Net device to reconfigure + * @mbox_cmd: The value for the mailbox command + * + * Helper function for mailbox updates + * + * Return: Negative errno on error, 0 on success + */ +int nfp_net_mbox_reconfig(struct nfp_net *nn, u32 mbox_cmd) +{ + u32 mbox = nn->tlv_caps.mbox_off; + int ret; + + nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd); + + ret = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MBOX); + if (ret) { + nn_err(nn, "Mailbox update error\n"); + return ret; + } + + return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET); +} + +void nfp_net_mbox_reconfig_post(struct nfp_net *nn, u32 mbox_cmd) +{ + u32 mbox = nn->tlv_caps.mbox_off; + + nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd); + + nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_MBOX); +} + +int nfp_net_mbox_reconfig_wait_posted(struct nfp_net *nn) +{ + u32 mbox = nn->tlv_caps.mbox_off; + + nfp_net_reconfig_wait_posted(nn); + + return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET); +} + +int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd) +{ + int ret; + + ret = nfp_net_mbox_reconfig(nn, mbox_cmd); + nn_ctrl_bar_unlock(nn); + return ret; +} + +/* Interrupt configuration and handling + */ + +/** + * nfp_net_irqs_alloc() - allocates MSI-X irqs + * @pdev: PCI device structure + * @irq_entries: Array to be initialized and used to hold the irq entries + * @min_irqs: Minimal acceptable number of interrupts + * @wanted_irqs: Target number of interrupts to allocate + * + * Return: Number of irqs obtained or 0 on error. + */ +unsigned int +nfp_net_irqs_alloc(struct pci_dev *pdev, struct msix_entry *irq_entries, + unsigned int min_irqs, unsigned int wanted_irqs) +{ + unsigned int i; + int got_irqs; + + for (i = 0; i < wanted_irqs; i++) + irq_entries[i].entry = i; + + got_irqs = pci_enable_msix_range(pdev, irq_entries, + min_irqs, wanted_irqs); + if (got_irqs < 0) { + dev_err(&pdev->dev, "Failed to enable %d-%d MSI-X (err=%d)\n", + min_irqs, wanted_irqs, got_irqs); + return 0; + } + + if (got_irqs < wanted_irqs) + dev_warn(&pdev->dev, "Unable to allocate %d IRQs got only %d\n", + wanted_irqs, got_irqs); + + return got_irqs; +} + +/** + * nfp_net_irqs_assign() - Assign interrupts allocated externally to netdev + * @nn: NFP Network structure + * @irq_entries: Table of allocated interrupts + * @n: Size of @irq_entries (number of entries to grab) + * + * After interrupts are allocated with nfp_net_irqs_alloc() this function + * should be called to assign them to a specific netdev (port). + */ +void +nfp_net_irqs_assign(struct nfp_net *nn, struct msix_entry *irq_entries, + unsigned int n) +{ + struct nfp_net_dp *dp = &nn->dp; + + nn->max_r_vecs = n - NFP_NET_NON_Q_VECTORS; + dp->num_r_vecs = nn->max_r_vecs; + + memcpy(nn->irq_entries, irq_entries, sizeof(*irq_entries) * n); + + if (dp->num_rx_rings > dp->num_r_vecs || + dp->num_tx_rings > dp->num_r_vecs) + dev_warn(nn->dp.dev, "More rings (%d,%d) than vectors (%d).\n", + dp->num_rx_rings, dp->num_tx_rings, + dp->num_r_vecs); + + dp->num_rx_rings = min(dp->num_r_vecs, dp->num_rx_rings); + dp->num_tx_rings = min(dp->num_r_vecs, dp->num_tx_rings); + dp->num_stack_tx_rings = dp->num_tx_rings; +} + +/** + * nfp_net_irqs_disable() - Disable interrupts + * @pdev: PCI device structure + * + * Undoes what @nfp_net_irqs_alloc() does. + */ +void nfp_net_irqs_disable(struct pci_dev *pdev) +{ + pci_disable_msix(pdev); +} + +/** + * nfp_net_irq_rxtx() - Interrupt service routine for RX/TX rings. + * @irq: Interrupt + * @data: Opaque data structure + * + * Return: Indicate if the interrupt has been handled. + */ +static irqreturn_t nfp_net_irq_rxtx(int irq, void *data) +{ + struct nfp_net_r_vector *r_vec = data; + + /* Currently we cannot tell if it's a rx or tx interrupt, + * since dim does not need accurate event_ctr to calculate, + * we just use this counter for both rx and tx dim. + */ + r_vec->event_ctr++; + + napi_schedule_irqoff(&r_vec->napi); + + /* The FW auto-masks any interrupt, either via the MASK bit in + * the MSI-X table or via the per entry ICR field. So there + * is no need to disable interrupts here. + */ + return IRQ_HANDLED; +} + +static irqreturn_t nfp_ctrl_irq_rxtx(int irq, void *data) +{ + struct nfp_net_r_vector *r_vec = data; + + tasklet_schedule(&r_vec->tasklet); + + return IRQ_HANDLED; +} + +/** + * nfp_net_read_link_status() - Reread link status from control BAR + * @nn: NFP Network structure + */ +static void nfp_net_read_link_status(struct nfp_net *nn) +{ + unsigned long flags; + bool link_up; + u16 sts; + + spin_lock_irqsave(&nn->link_status_lock, flags); + + sts = nn_readw(nn, NFP_NET_CFG_STS); + link_up = !!(sts & NFP_NET_CFG_STS_LINK); + + if (nn->link_up == link_up) + goto out; + + nn->link_up = link_up; + if (nn->port) { + set_bit(NFP_PORT_CHANGED, &nn->port->flags); + if (nn->port->link_cb) + nn->port->link_cb(nn->port); + } + + if (nn->link_up) { + netif_carrier_on(nn->dp.netdev); + netdev_info(nn->dp.netdev, "NIC Link is Up\n"); + } else { + netif_carrier_off(nn->dp.netdev); + netdev_info(nn->dp.netdev, "NIC Link is Down\n"); + } +out: + spin_unlock_irqrestore(&nn->link_status_lock, flags); +} + +/** + * nfp_net_irq_lsc() - Interrupt service routine for link state changes + * @irq: Interrupt + * @data: Opaque data structure + * + * Return: Indicate if the interrupt has been handled. + */ +static irqreturn_t nfp_net_irq_lsc(int irq, void *data) +{ + struct nfp_net *nn = data; + struct msix_entry *entry; + + entry = &nn->irq_entries[NFP_NET_IRQ_LSC_IDX]; + + nfp_net_read_link_status(nn); + + nfp_net_irq_unmask(nn, entry->entry); + + return IRQ_HANDLED; +} + +/** + * nfp_net_irq_exn() - Interrupt service routine for exceptions + * @irq: Interrupt + * @data: Opaque data structure + * + * Return: Indicate if the interrupt has been handled. + */ +static irqreturn_t nfp_net_irq_exn(int irq, void *data) +{ + struct nfp_net *nn = data; + + nn_err(nn, "%s: UNIMPLEMENTED.\n", __func__); + /* XXX TO BE IMPLEMENTED */ + return IRQ_HANDLED; +} + +/** + * nfp_net_aux_irq_request() - Request an auxiliary interrupt (LSC or EXN) + * @nn: NFP Network structure + * @ctrl_offset: Control BAR offset where IRQ configuration should be written + * @format: printf-style format to construct the interrupt name + * @name: Pointer to allocated space for interrupt name + * @name_sz: Size of space for interrupt name + * @vector_idx: Index of MSI-X vector used for this interrupt + * @handler: IRQ handler to register for this interrupt + */ +static int +nfp_net_aux_irq_request(struct nfp_net *nn, u32 ctrl_offset, + const char *format, char *name, size_t name_sz, + unsigned int vector_idx, irq_handler_t handler) +{ + struct msix_entry *entry; + int err; + + entry = &nn->irq_entries[vector_idx]; + + snprintf(name, name_sz, format, nfp_net_name(nn)); + err = request_irq(entry->vector, handler, 0, name, nn); + if (err) { + nn_err(nn, "Failed to request IRQ %d (err=%d).\n", + entry->vector, err); + return err; + } + nn_writeb(nn, ctrl_offset, entry->entry); + nfp_net_irq_unmask(nn, entry->entry); + + return 0; +} + +/** + * nfp_net_aux_irq_free() - Free an auxiliary interrupt (LSC or EXN) + * @nn: NFP Network structure + * @ctrl_offset: Control BAR offset where IRQ configuration should be written + * @vector_idx: Index of MSI-X vector used for this interrupt + */ +static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset, + unsigned int vector_idx) +{ + nn_writeb(nn, ctrl_offset, 0xff); + nn_pci_flush(nn); + free_irq(nn->irq_entries[vector_idx].vector, nn); +} + +struct sk_buff * +nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + struct sk_buff *skb, u64 *tls_handle, int *nr_frags) +{ +#ifdef CONFIG_TLS_DEVICE + struct nfp_net_tls_offload_ctx *ntls; + struct sk_buff *nskb; + bool resync_pending; + u32 datalen, seq; + + if (likely(!dp->ktls_tx)) + return skb; + if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)) + return skb; + + datalen = skb->len - skb_tcp_all_headers(skb); + seq = ntohl(tcp_hdr(skb)->seq); + ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX); + resync_pending = tls_offload_tx_resync_pending(skb->sk); + if (unlikely(resync_pending || ntls->next_seq != seq)) { + /* Pure ACK out of order already */ + if (!datalen) + return skb; + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tls_tx_fallback++; + u64_stats_update_end(&r_vec->tx_sync); + + nskb = tls_encrypt_skb(skb); + if (!nskb) { + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tls_tx_no_fallback++; + u64_stats_update_end(&r_vec->tx_sync); + return NULL; + } + /* encryption wasn't necessary */ + if (nskb == skb) + return skb; + /* we don't re-check ring space */ + if (unlikely(skb_is_nonlinear(nskb))) { + nn_dp_warn(dp, "tls_encrypt_skb() produced fragmented frame\n"); + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_errors++; + u64_stats_update_end(&r_vec->tx_sync); + dev_kfree_skb_any(nskb); + return NULL; + } + + /* jump forward, a TX may have gotten lost, need to sync TX */ + if (!resync_pending && seq - ntls->next_seq < U32_MAX / 4) + tls_offload_tx_resync_request(nskb->sk, seq, + ntls->next_seq); + + *nr_frags = 0; + return nskb; + } + + if (datalen) { + u64_stats_update_begin(&r_vec->tx_sync); + if (!skb_is_gso(skb)) + r_vec->hw_tls_tx++; + else + r_vec->hw_tls_tx += skb_shinfo(skb)->gso_segs; + u64_stats_update_end(&r_vec->tx_sync); + } + + memcpy(tls_handle, ntls->fw_handle, sizeof(ntls->fw_handle)); + ntls->next_seq += datalen; +#endif + return skb; +} + +void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle) +{ +#ifdef CONFIG_TLS_DEVICE + struct nfp_net_tls_offload_ctx *ntls; + u32 datalen, seq; + + if (!tls_handle) + return; + if (WARN_ON_ONCE(!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))) + return; + + datalen = skb->len - skb_tcp_all_headers(skb); + seq = ntohl(tcp_hdr(skb)->seq); + + ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX); + if (ntls->next_seq == seq + datalen) + ntls->next_seq = seq; + else + WARN_ON_ONCE(1); +#endif +} + +static void nfp_net_tx_timeout(struct net_device *netdev, unsigned int txqueue) +{ + struct nfp_net *nn = netdev_priv(netdev); + + nn_warn(nn, "TX watchdog timeout on ring: %u\n", txqueue); +} + +/* Receive processing */ +static unsigned int +nfp_net_calc_fl_bufsz_data(struct nfp_net_dp *dp) +{ + unsigned int fl_bufsz = 0; + + if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) + fl_bufsz += NFP_NET_MAX_PREPEND; + else + fl_bufsz += dp->rx_offset; + fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + dp->mtu; + + return fl_bufsz; +} + +static unsigned int nfp_net_calc_fl_bufsz(struct nfp_net_dp *dp) +{ + unsigned int fl_bufsz; + + fl_bufsz = NFP_NET_RX_BUF_HEADROOM; + fl_bufsz += dp->rx_dma_off; + fl_bufsz += nfp_net_calc_fl_bufsz_data(dp); + + fl_bufsz = SKB_DATA_ALIGN(fl_bufsz); + fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + return fl_bufsz; +} + +static unsigned int nfp_net_calc_fl_bufsz_xsk(struct nfp_net_dp *dp) +{ + unsigned int fl_bufsz; + + fl_bufsz = XDP_PACKET_HEADROOM; + fl_bufsz += nfp_net_calc_fl_bufsz_data(dp); + + return fl_bufsz; +} + +/* Setup and Configuration + */ + +/** + * nfp_net_vecs_init() - Assign IRQs and setup rvecs. + * @nn: NFP Network structure + */ +static void nfp_net_vecs_init(struct nfp_net *nn) +{ + struct nfp_net_r_vector *r_vec; + int r; + + nn->lsc_handler = nfp_net_irq_lsc; + nn->exn_handler = nfp_net_irq_exn; + + for (r = 0; r < nn->max_r_vecs; r++) { + struct msix_entry *entry; + + entry = &nn->irq_entries[NFP_NET_NON_Q_VECTORS + r]; + + r_vec = &nn->r_vecs[r]; + r_vec->nfp_net = nn; + r_vec->irq_entry = entry->entry; + r_vec->irq_vector = entry->vector; + + if (nn->dp.netdev) { + r_vec->handler = nfp_net_irq_rxtx; + } else { + r_vec->handler = nfp_ctrl_irq_rxtx; + + __skb_queue_head_init(&r_vec->queue); + spin_lock_init(&r_vec->lock); + tasklet_setup(&r_vec->tasklet, nn->dp.ops->ctrl_poll); + tasklet_disable(&r_vec->tasklet); + } + + cpumask_set_cpu(r, &r_vec->affinity_mask); + } +} + +static void +nfp_net_napi_add(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, int idx) +{ + if (dp->netdev) + netif_napi_add(dp->netdev, &r_vec->napi, + nfp_net_has_xsk_pool_slow(dp, idx) ? dp->ops->xsk_poll : dp->ops->poll); + else + tasklet_enable(&r_vec->tasklet); +} + +static void +nfp_net_napi_del(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec) +{ + if (dp->netdev) + netif_napi_del(&r_vec->napi); + else + tasklet_disable(&r_vec->tasklet); +} + +static void +nfp_net_vector_assign_rings(struct nfp_net_dp *dp, + struct nfp_net_r_vector *r_vec, int idx) +{ + r_vec->rx_ring = idx < dp->num_rx_rings ? &dp->rx_rings[idx] : NULL; + r_vec->tx_ring = + idx < dp->num_stack_tx_rings ? &dp->tx_rings[idx] : NULL; + + r_vec->xdp_ring = idx < dp->num_tx_rings - dp->num_stack_tx_rings ? + &dp->tx_rings[dp->num_stack_tx_rings + idx] : NULL; + + if (nfp_net_has_xsk_pool_slow(dp, idx) || r_vec->xsk_pool) { + r_vec->xsk_pool = dp->xdp_prog ? dp->xsk_pools[idx] : NULL; + + if (r_vec->xsk_pool) + xsk_pool_set_rxq_info(r_vec->xsk_pool, + &r_vec->rx_ring->xdp_rxq); + + nfp_net_napi_del(dp, r_vec); + nfp_net_napi_add(dp, r_vec, idx); + } +} + +static int +nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + int idx) +{ + int err; + + nfp_net_napi_add(&nn->dp, r_vec, idx); + + snprintf(r_vec->name, sizeof(r_vec->name), + "%s-rxtx-%d", nfp_net_name(nn), idx); + err = request_irq(r_vec->irq_vector, r_vec->handler, 0, r_vec->name, + r_vec); + if (err) { + nfp_net_napi_del(&nn->dp, r_vec); + nn_err(nn, "Error requesting IRQ %d\n", r_vec->irq_vector); + return err; + } + disable_irq(r_vec->irq_vector); + + irq_set_affinity_hint(r_vec->irq_vector, &r_vec->affinity_mask); + + nn_dbg(nn, "RV%02d: irq=%03d/%03d\n", idx, r_vec->irq_vector, + r_vec->irq_entry); + + return 0; +} + +static void +nfp_net_cleanup_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec) +{ + irq_set_affinity_hint(r_vec->irq_vector, NULL); + nfp_net_napi_del(&nn->dp, r_vec); + free_irq(r_vec->irq_vector, r_vec); +} + +/** + * nfp_net_rss_write_itbl() - Write RSS indirection table to device + * @nn: NFP Net device to reconfigure + */ +void nfp_net_rss_write_itbl(struct nfp_net *nn) +{ + int i; + + for (i = 0; i < NFP_NET_CFG_RSS_ITBL_SZ; i += 4) + nn_writel(nn, NFP_NET_CFG_RSS_ITBL + i, + get_unaligned_le32(nn->rss_itbl + i)); +} + +/** + * nfp_net_rss_write_key() - Write RSS hash key to device + * @nn: NFP Net device to reconfigure + */ +void nfp_net_rss_write_key(struct nfp_net *nn) +{ + int i; + + for (i = 0; i < nfp_net_rss_key_sz(nn); i += 4) + nn_writel(nn, NFP_NET_CFG_RSS_KEY + i, + get_unaligned_le32(nn->rss_key + i)); +} + +/** + * nfp_net_coalesce_write_cfg() - Write irq coalescence configuration to HW + * @nn: NFP Net device to reconfigure + */ +void nfp_net_coalesce_write_cfg(struct nfp_net *nn) +{ + u8 i; + u32 factor; + u32 value; + + /* Compute factor used to convert coalesce '_usecs' parameters to + * ME timestamp ticks. There are 16 ME clock cycles for each timestamp + * count. + */ + factor = nn->tlv_caps.me_freq_mhz / 16; + + /* copy RX interrupt coalesce parameters */ + value = (nn->rx_coalesce_max_frames << 16) | + (factor * nn->rx_coalesce_usecs); + for (i = 0; i < nn->dp.num_rx_rings; i++) + nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(i), value); + + /* copy TX interrupt coalesce parameters */ + value = (nn->tx_coalesce_max_frames << 16) | + (factor * nn->tx_coalesce_usecs); + for (i = 0; i < nn->dp.num_tx_rings; i++) + nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(i), value); +} + +/** + * nfp_net_write_mac_addr() - Write mac address to the device control BAR + * @nn: NFP Net device to reconfigure + * @addr: MAC address to write + * + * Writes the MAC address from the netdev to the device control BAR. Does not + * perform the required reconfig. We do a bit of byte swapping dance because + * firmware is LE. + */ +static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *addr) +{ + nn_writel(nn, NFP_NET_CFG_MACADDR + 0, get_unaligned_be32(addr)); + nn_writew(nn, NFP_NET_CFG_MACADDR + 6, get_unaligned_be16(addr + 4)); +} + +/** + * nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP + * @nn: NFP Net device to reconfigure + * + * Warning: must be fully idempotent. + */ +static void nfp_net_clear_config_and_disable(struct nfp_net *nn) +{ + u32 new_ctrl, update; + unsigned int r; + int err; + + new_ctrl = nn->dp.ctrl; + new_ctrl &= ~NFP_NET_CFG_CTRL_ENABLE; + update = NFP_NET_CFG_UPDATE_GEN; + update |= NFP_NET_CFG_UPDATE_MSIX; + update |= NFP_NET_CFG_UPDATE_RING; + + if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG) + new_ctrl &= ~NFP_NET_CFG_CTRL_RINGCFG; + + nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0); + nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0); + + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + err = nfp_net_reconfig(nn, update); + if (err) + nn_err(nn, "Could not disable device: %d\n", err); + + for (r = 0; r < nn->dp.num_rx_rings; r++) { + nfp_net_rx_ring_reset(&nn->dp.rx_rings[r]); + if (nfp_net_has_xsk_pool_slow(&nn->dp, nn->dp.rx_rings[r].idx)) + nfp_net_xsk_rx_bufs_free(&nn->dp.rx_rings[r]); + } + for (r = 0; r < nn->dp.num_tx_rings; r++) + nfp_net_tx_ring_reset(&nn->dp, &nn->dp.tx_rings[r]); + for (r = 0; r < nn->dp.num_r_vecs; r++) + nfp_net_vec_clear_ring_data(nn, r); + + nn->dp.ctrl = new_ctrl; +} + +/** + * nfp_net_set_config_and_enable() - Write control BAR and enable NFP + * @nn: NFP Net device to reconfigure + */ +static int nfp_net_set_config_and_enable(struct nfp_net *nn) +{ + u32 bufsz, new_ctrl, update = 0; + unsigned int r; + int err; + + new_ctrl = nn->dp.ctrl; + + if (nn->dp.ctrl & NFP_NET_CFG_CTRL_RSS_ANY) { + nfp_net_rss_write_key(nn); + nfp_net_rss_write_itbl(nn); + nn_writel(nn, NFP_NET_CFG_RSS_CTRL, nn->rss_cfg); + update |= NFP_NET_CFG_UPDATE_RSS; + } + + if (nn->dp.ctrl & NFP_NET_CFG_CTRL_IRQMOD) { + nfp_net_coalesce_write_cfg(nn); + update |= NFP_NET_CFG_UPDATE_IRQMOD; + } + + for (r = 0; r < nn->dp.num_tx_rings; r++) + nfp_net_tx_ring_hw_cfg_write(nn, &nn->dp.tx_rings[r], r); + for (r = 0; r < nn->dp.num_rx_rings; r++) + nfp_net_rx_ring_hw_cfg_write(nn, &nn->dp.rx_rings[r], r); + + nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, + U64_MAX >> (64 - nn->dp.num_tx_rings)); + + nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, + U64_MAX >> (64 - nn->dp.num_rx_rings)); + + if (nn->dp.netdev) + nfp_net_write_mac_addr(nn, nn->dp.netdev->dev_addr); + + nn_writel(nn, NFP_NET_CFG_MTU, nn->dp.mtu); + + bufsz = nn->dp.fl_bufsz - nn->dp.rx_dma_off - NFP_NET_RX_BUF_NON_DATA; + nn_writel(nn, NFP_NET_CFG_FLBUFSZ, bufsz); + + /* Enable device */ + new_ctrl |= NFP_NET_CFG_CTRL_ENABLE; + update |= NFP_NET_CFG_UPDATE_GEN; + update |= NFP_NET_CFG_UPDATE_MSIX; + update |= NFP_NET_CFG_UPDATE_RING; + if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG) + new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG; + + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + err = nfp_net_reconfig(nn, update); + if (err) { + nfp_net_clear_config_and_disable(nn); + return err; + } + + nn->dp.ctrl = new_ctrl; + + for (r = 0; r < nn->dp.num_rx_rings; r++) + nfp_net_rx_ring_fill_freelist(&nn->dp, &nn->dp.rx_rings[r]); + + return 0; +} + +/** + * nfp_net_close_stack() - Quiesce the stack (part of close) + * @nn: NFP Net device to reconfigure + */ +static void nfp_net_close_stack(struct nfp_net *nn) +{ + struct nfp_net_r_vector *r_vec; + unsigned int r; + + disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector); + netif_carrier_off(nn->dp.netdev); + nn->link_up = false; + + for (r = 0; r < nn->dp.num_r_vecs; r++) { + r_vec = &nn->r_vecs[r]; + + disable_irq(r_vec->irq_vector); + napi_disable(&r_vec->napi); + + if (r_vec->rx_ring) + cancel_work_sync(&r_vec->rx_dim.work); + + if (r_vec->tx_ring) + cancel_work_sync(&r_vec->tx_dim.work); + } + + netif_tx_disable(nn->dp.netdev); +} + +/** + * nfp_net_close_free_all() - Free all runtime resources + * @nn: NFP Net device to reconfigure + */ +static void nfp_net_close_free_all(struct nfp_net *nn) +{ + unsigned int r; + + nfp_net_tx_rings_free(&nn->dp); + nfp_net_rx_rings_free(&nn->dp); + + for (r = 0; r < nn->dp.num_r_vecs; r++) + nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); + + nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX); + nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX); +} + +/** + * nfp_net_netdev_close() - Called when the device is downed + * @netdev: netdev structure + */ +static int nfp_net_netdev_close(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + + /* Step 1: Disable RX and TX rings from the Linux kernel perspective + */ + nfp_net_close_stack(nn); + + /* Step 2: Tell NFP + */ + nfp_net_clear_config_and_disable(nn); + nfp_port_configure(netdev, false); + + /* Step 3: Free resources + */ + nfp_net_close_free_all(nn); + + nn_dbg(nn, "%s down", netdev->name); + return 0; +} + +void nfp_ctrl_close(struct nfp_net *nn) +{ + int r; + + rtnl_lock(); + + for (r = 0; r < nn->dp.num_r_vecs; r++) { + disable_irq(nn->r_vecs[r].irq_vector); + tasklet_disable(&nn->r_vecs[r].tasklet); + } + + nfp_net_clear_config_and_disable(nn); + + nfp_net_close_free_all(nn); + + rtnl_unlock(); +} + +static void nfp_net_rx_dim_work(struct work_struct *work) +{ + struct nfp_net_r_vector *r_vec; + unsigned int factor, value; + struct dim_cq_moder moder; + struct nfp_net *nn; + struct dim *dim; + + dim = container_of(work, struct dim, work); + moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); + r_vec = container_of(dim, struct nfp_net_r_vector, rx_dim); + nn = r_vec->nfp_net; + + /* Compute factor used to convert coalesce '_usecs' parameters to + * ME timestamp ticks. There are 16 ME clock cycles for each timestamp + * count. + */ + factor = nn->tlv_caps.me_freq_mhz / 16; + if (nfp_net_coalesce_para_check(factor * moder.usec, moder.pkts)) + return; + + /* copy RX interrupt coalesce parameters */ + value = (moder.pkts << 16) | (factor * moder.usec); + nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(r_vec->rx_ring->idx), value); + (void)nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD); + + dim->state = DIM_START_MEASURE; +} + +static void nfp_net_tx_dim_work(struct work_struct *work) +{ + struct nfp_net_r_vector *r_vec; + unsigned int factor, value; + struct dim_cq_moder moder; + struct nfp_net *nn; + struct dim *dim; + + dim = container_of(work, struct dim, work); + moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix); + r_vec = container_of(dim, struct nfp_net_r_vector, tx_dim); + nn = r_vec->nfp_net; + + /* Compute factor used to convert coalesce '_usecs' parameters to + * ME timestamp ticks. There are 16 ME clock cycles for each timestamp + * count. + */ + factor = nn->tlv_caps.me_freq_mhz / 16; + if (nfp_net_coalesce_para_check(factor * moder.usec, moder.pkts)) + return; + + /* copy TX interrupt coalesce parameters */ + value = (moder.pkts << 16) | (factor * moder.usec); + nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(r_vec->tx_ring->idx), value); + (void)nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD); + + dim->state = DIM_START_MEASURE; +} + +/** + * nfp_net_open_stack() - Start the device from stack's perspective + * @nn: NFP Net device to reconfigure + */ +static void nfp_net_open_stack(struct nfp_net *nn) +{ + struct nfp_net_r_vector *r_vec; + unsigned int r; + + for (r = 0; r < nn->dp.num_r_vecs; r++) { + r_vec = &nn->r_vecs[r]; + + if (r_vec->rx_ring) { + INIT_WORK(&r_vec->rx_dim.work, nfp_net_rx_dim_work); + r_vec->rx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + } + + if (r_vec->tx_ring) { + INIT_WORK(&r_vec->tx_dim.work, nfp_net_tx_dim_work); + r_vec->tx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + } + + napi_enable(&r_vec->napi); + enable_irq(r_vec->irq_vector); + } + + netif_tx_wake_all_queues(nn->dp.netdev); + + enable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector); + nfp_net_read_link_status(nn); +} + +static int nfp_net_open_alloc_all(struct nfp_net *nn) +{ + int err, r; + + err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_EXN, "%s-exn", + nn->exn_name, sizeof(nn->exn_name), + NFP_NET_IRQ_EXN_IDX, nn->exn_handler); + if (err) + return err; + err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_LSC, "%s-lsc", + nn->lsc_name, sizeof(nn->lsc_name), + NFP_NET_IRQ_LSC_IDX, nn->lsc_handler); + if (err) + goto err_free_exn; + disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector); + + for (r = 0; r < nn->dp.num_r_vecs; r++) { + err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r); + if (err) + goto err_cleanup_vec_p; + } + + err = nfp_net_rx_rings_prepare(nn, &nn->dp); + if (err) + goto err_cleanup_vec; + + err = nfp_net_tx_rings_prepare(nn, &nn->dp); + if (err) + goto err_free_rx_rings; + + for (r = 0; r < nn->max_r_vecs; r++) + nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r); + + return 0; + +err_free_rx_rings: + nfp_net_rx_rings_free(&nn->dp); +err_cleanup_vec: + r = nn->dp.num_r_vecs; +err_cleanup_vec_p: + while (r--) + nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); + nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX); +err_free_exn: + nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX); + return err; +} + +static int nfp_net_netdev_open(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int err; + + /* Step 1: Allocate resources for rings and the like + * - Request interrupts + * - Allocate RX and TX ring resources + * - Setup initial RSS table + */ + err = nfp_net_open_alloc_all(nn); + if (err) + return err; + + err = netif_set_real_num_tx_queues(netdev, nn->dp.num_stack_tx_rings); + if (err) + goto err_free_all; + + err = netif_set_real_num_rx_queues(netdev, nn->dp.num_rx_rings); + if (err) + goto err_free_all; + + /* Step 2: Configure the NFP + * - Ifup the physical interface if it exists + * - Enable rings from 0 to tx_rings/rx_rings - 1. + * - Write MAC address (in case it changed) + * - Set the MTU + * - Set the Freelist buffer size + * - Enable the FW + */ + err = nfp_port_configure(netdev, true); + if (err) + goto err_free_all; + + err = nfp_net_set_config_and_enable(nn); + if (err) + goto err_port_disable; + + /* Step 3: Enable for kernel + * - put some freelist descriptors on each RX ring + * - enable NAPI on each ring + * - enable all TX queues + * - set link state + */ + nfp_net_open_stack(nn); + + return 0; + +err_port_disable: + nfp_port_configure(netdev, false); +err_free_all: + nfp_net_close_free_all(nn); + return err; +} + +int nfp_ctrl_open(struct nfp_net *nn) +{ + int err, r; + + /* ring dumping depends on vNICs being opened/closed under rtnl */ + rtnl_lock(); + + err = nfp_net_open_alloc_all(nn); + if (err) + goto err_unlock; + + err = nfp_net_set_config_and_enable(nn); + if (err) + goto err_free_all; + + for (r = 0; r < nn->dp.num_r_vecs; r++) + enable_irq(nn->r_vecs[r].irq_vector); + + rtnl_unlock(); + + return 0; + +err_free_all: + nfp_net_close_free_all(nn); +err_unlock: + rtnl_unlock(); + return err; +} + +static void nfp_net_set_rx_mode(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + u32 new_ctrl; + + new_ctrl = nn->dp.ctrl; + + if (!netdev_mc_empty(netdev) || netdev->flags & IFF_ALLMULTI) + new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_L2MC; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_L2MC; + + if (netdev->flags & IFF_PROMISC) { + if (nn->cap & NFP_NET_CFG_CTRL_PROMISC) + new_ctrl |= NFP_NET_CFG_CTRL_PROMISC; + else + nn_warn(nn, "FW does not support promiscuous mode\n"); + } else { + new_ctrl &= ~NFP_NET_CFG_CTRL_PROMISC; + } + + if (new_ctrl == nn->dp.ctrl) + return; + + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_GEN); + + nn->dp.ctrl = new_ctrl; +} + +static void nfp_net_rss_init_itbl(struct nfp_net *nn) +{ + int i; + + for (i = 0; i < sizeof(nn->rss_itbl); i++) + nn->rss_itbl[i] = + ethtool_rxfh_indir_default(i, nn->dp.num_rx_rings); +} + +static void nfp_net_dp_swap(struct nfp_net *nn, struct nfp_net_dp *dp) +{ + struct nfp_net_dp new_dp = *dp; + + *dp = nn->dp; + nn->dp = new_dp; + + nn->dp.netdev->mtu = new_dp.mtu; + + if (!netif_is_rxfh_configured(nn->dp.netdev)) + nfp_net_rss_init_itbl(nn); +} + +static int nfp_net_dp_swap_enable(struct nfp_net *nn, struct nfp_net_dp *dp) +{ + unsigned int r; + int err; + + nfp_net_dp_swap(nn, dp); + + for (r = 0; r < nn->max_r_vecs; r++) + nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r); + + err = netif_set_real_num_queues(nn->dp.netdev, + nn->dp.num_stack_tx_rings, + nn->dp.num_rx_rings); + if (err) + return err; + + return nfp_net_set_config_and_enable(nn); +} + +struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn) +{ + struct nfp_net_dp *new; + + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return NULL; + + *new = nn->dp; + + new->xsk_pools = kmemdup(new->xsk_pools, + array_size(nn->max_r_vecs, + sizeof(new->xsk_pools)), + GFP_KERNEL); + if (!new->xsk_pools) { + kfree(new); + return NULL; + } + + /* Clear things which need to be recomputed */ + new->fl_bufsz = 0; + new->tx_rings = NULL; + new->rx_rings = NULL; + new->num_r_vecs = 0; + new->num_stack_tx_rings = 0; + new->txrwb = NULL; + new->txrwb_dma = 0; + + return new; +} + +static void nfp_net_free_dp(struct nfp_net_dp *dp) +{ + kfree(dp->xsk_pools); + kfree(dp); +} + +static int +nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp, + struct netlink_ext_ack *extack) +{ + unsigned int r, xsk_min_fl_bufsz; + + /* XDP-enabled tests */ + if (!dp->xdp_prog) + return 0; + if (dp->fl_bufsz > PAGE_SIZE) { + NL_SET_ERR_MSG_MOD(extack, "MTU too large w/ XDP enabled"); + return -EINVAL; + } + if (dp->num_tx_rings > nn->max_tx_rings) { + NL_SET_ERR_MSG_MOD(extack, "Insufficient number of TX rings w/ XDP enabled"); + return -EINVAL; + } + + xsk_min_fl_bufsz = nfp_net_calc_fl_bufsz_xsk(dp); + for (r = 0; r < nn->max_r_vecs; r++) { + if (!dp->xsk_pools[r]) + continue; + + if (xsk_pool_get_rx_frame_size(dp->xsk_pools[r]) < xsk_min_fl_bufsz) { + NL_SET_ERR_MSG_MOD(extack, + "XSK buffer pool chunk size too small"); + return -EINVAL; + } + } + + return 0; +} + +int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *dp, + struct netlink_ext_ack *extack) +{ + int r, err; + + dp->fl_bufsz = nfp_net_calc_fl_bufsz(dp); + + dp->num_stack_tx_rings = dp->num_tx_rings; + if (dp->xdp_prog) + dp->num_stack_tx_rings -= dp->num_rx_rings; + + dp->num_r_vecs = max(dp->num_rx_rings, dp->num_stack_tx_rings); + + err = nfp_net_check_config(nn, dp, extack); + if (err) + goto exit_free_dp; + + if (!netif_running(dp->netdev)) { + nfp_net_dp_swap(nn, dp); + err = 0; + goto exit_free_dp; + } + + /* Prepare new rings */ + for (r = nn->dp.num_r_vecs; r < dp->num_r_vecs; r++) { + err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r); + if (err) { + dp->num_r_vecs = r; + goto err_cleanup_vecs; + } + } + + err = nfp_net_rx_rings_prepare(nn, dp); + if (err) + goto err_cleanup_vecs; + + err = nfp_net_tx_rings_prepare(nn, dp); + if (err) + goto err_free_rx; + + /* Stop device, swap in new rings, try to start the firmware */ + nfp_net_close_stack(nn); + nfp_net_clear_config_and_disable(nn); + + err = nfp_net_dp_swap_enable(nn, dp); + if (err) { + int err2; + + nfp_net_clear_config_and_disable(nn); + + /* Try with old configuration and old rings */ + err2 = nfp_net_dp_swap_enable(nn, dp); + if (err2) + nn_err(nn, "Can't restore ring config - FW communication failed (%d,%d)\n", + err, err2); + } + for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--) + nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); + + nfp_net_rx_rings_free(dp); + nfp_net_tx_rings_free(dp); + + nfp_net_open_stack(nn); +exit_free_dp: + nfp_net_free_dp(dp); + + return err; + +err_free_rx: + nfp_net_rx_rings_free(dp); +err_cleanup_vecs: + for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--) + nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); + nfp_net_free_dp(dp); + return err; +} + +static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net_dp *dp; + int err; + + err = nfp_app_check_mtu(nn->app, netdev, new_mtu); + if (err) + return err; + + dp = nfp_net_clone_dp(nn); + if (!dp) + return -ENOMEM; + + dp->mtu = new_mtu; + + return nfp_net_ring_reconfig(nn, dp, NULL); +} + +static int +nfp_net_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) +{ + const u32 cmd = NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD; + struct nfp_net *nn = netdev_priv(netdev); + int err; + + /* Priority tagged packets with vlan id 0 are processed by the + * NFP as untagged packets + */ + if (!vid) + return 0; + + err = nfp_net_mbox_lock(nn, NFP_NET_CFG_VLAN_FILTER_SZ); + if (err) + return err; + + nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid); + nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO, + ETH_P_8021Q); + + return nfp_net_mbox_reconfig_and_unlock(nn, cmd); +} + +static int +nfp_net_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) +{ + const u32 cmd = NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL; + struct nfp_net *nn = netdev_priv(netdev); + int err; + + /* Priority tagged packets with vlan id 0 are processed by the + * NFP as untagged packets + */ + if (!vid) + return 0; + + err = nfp_net_mbox_lock(nn, NFP_NET_CFG_VLAN_FILTER_SZ); + if (err) + return err; + + nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid); + nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO, + ETH_P_8021Q); + + return nfp_net_mbox_reconfig_and_unlock(nn, cmd); +} + +static void nfp_net_stat64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct nfp_net *nn = netdev_priv(netdev); + int r; + + /* Collect software stats */ + for (r = 0; r < nn->max_r_vecs; r++) { + struct nfp_net_r_vector *r_vec = &nn->r_vecs[r]; + u64 data[3]; + unsigned int start; + + do { + start = u64_stats_fetch_begin_irq(&r_vec->rx_sync); + data[0] = r_vec->rx_pkts; + data[1] = r_vec->rx_bytes; + data[2] = r_vec->rx_drops; + } while (u64_stats_fetch_retry_irq(&r_vec->rx_sync, start)); + stats->rx_packets += data[0]; + stats->rx_bytes += data[1]; + stats->rx_dropped += data[2]; + + do { + start = u64_stats_fetch_begin_irq(&r_vec->tx_sync); + data[0] = r_vec->tx_pkts; + data[1] = r_vec->tx_bytes; + data[2] = r_vec->tx_errors; + } while (u64_stats_fetch_retry_irq(&r_vec->tx_sync, start)); + stats->tx_packets += data[0]; + stats->tx_bytes += data[1]; + stats->tx_errors += data[2]; + } + + /* Add in device stats */ + stats->multicast += nn_readq(nn, NFP_NET_CFG_STATS_RX_MC_FRAMES); + stats->rx_dropped += nn_readq(nn, NFP_NET_CFG_STATS_RX_DISCARDS); + stats->rx_errors += nn_readq(nn, NFP_NET_CFG_STATS_RX_ERRORS); + + stats->tx_dropped += nn_readq(nn, NFP_NET_CFG_STATS_TX_DISCARDS); + stats->tx_errors += nn_readq(nn, NFP_NET_CFG_STATS_TX_ERRORS); +} + +static int nfp_net_set_features(struct net_device *netdev, + netdev_features_t features) +{ + netdev_features_t changed = netdev->features ^ features; + struct nfp_net *nn = netdev_priv(netdev); + u32 new_ctrl; + int err; + + /* Assume this is not called with features we have not advertised */ + + new_ctrl = nn->dp.ctrl; + + if (changed & NETIF_F_RXCSUM) { + if (features & NETIF_F_RXCSUM) + new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_RXCSUM_ANY; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_RXCSUM_ANY; + } + + if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) { + if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) + new_ctrl |= NFP_NET_CFG_CTRL_TXCSUM; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_TXCSUM; + } + + if (changed & (NETIF_F_TSO | NETIF_F_TSO6)) { + if (features & (NETIF_F_TSO | NETIF_F_TSO6)) + new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_LSO2 ?: + NFP_NET_CFG_CTRL_LSO; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_LSO_ANY; + } + + if (changed & NETIF_F_HW_VLAN_CTAG_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) + new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_RXVLAN_V2 ?: + NFP_NET_CFG_CTRL_RXVLAN; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_RXVLAN_ANY; + } + + if (changed & NETIF_F_HW_VLAN_CTAG_TX) { + if (features & NETIF_F_HW_VLAN_CTAG_TX) + new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_TXVLAN_V2 ?: + NFP_NET_CFG_CTRL_TXVLAN; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_TXVLAN_ANY; + } + + if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) { + if (features & NETIF_F_HW_VLAN_CTAG_FILTER) + new_ctrl |= NFP_NET_CFG_CTRL_CTAG_FILTER; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_CTAG_FILTER; + } + + if (changed & NETIF_F_HW_VLAN_STAG_RX) { + if (features & NETIF_F_HW_VLAN_STAG_RX) + new_ctrl |= NFP_NET_CFG_CTRL_RXQINQ; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_RXQINQ; + } + + if (changed & NETIF_F_SG) { + if (features & NETIF_F_SG) + new_ctrl |= NFP_NET_CFG_CTRL_GATHER; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER; + } + + err = nfp_port_set_features(netdev, features); + if (err) + return err; + + nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n", + netdev->features, features, changed); + + if (new_ctrl == nn->dp.ctrl) + return 0; + + nn_dbg(nn, "NIC ctrl: 0x%x -> 0x%x\n", nn->dp.ctrl, new_ctrl); + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); + if (err) + return err; + + nn->dp.ctrl = new_ctrl; + + return 0; +} + +static netdev_features_t +nfp_net_fix_features(struct net_device *netdev, + netdev_features_t features) +{ + if ((features & NETIF_F_HW_VLAN_CTAG_RX) && + (features & NETIF_F_HW_VLAN_STAG_RX)) { + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) { + features &= ~NETIF_F_HW_VLAN_CTAG_RX; + netdev->wanted_features &= ~NETIF_F_HW_VLAN_CTAG_RX; + netdev_warn(netdev, + "S-tag and C-tag stripping can't be enabled at the same time. Enabling S-tag stripping and disabling C-tag stripping\n"); + } else if (netdev->features & NETIF_F_HW_VLAN_STAG_RX) { + features &= ~NETIF_F_HW_VLAN_STAG_RX; + netdev->wanted_features &= ~NETIF_F_HW_VLAN_STAG_RX; + netdev_warn(netdev, + "S-tag and C-tag stripping can't be enabled at the same time. Enabling C-tag stripping and disabling S-tag stripping\n"); + } + } + return features; +} + +static netdev_features_t +nfp_net_features_check(struct sk_buff *skb, struct net_device *dev, + netdev_features_t features) +{ + u8 l4_hdr; + + /* We can't do TSO over double tagged packets (802.1AD) */ + features &= vlan_features_check(skb, features); + + if (!skb->encapsulation) + return features; + + /* Ensure that inner L4 header offset fits into TX descriptor field */ + if (skb_is_gso(skb)) { + u32 hdrlen; + + hdrlen = skb_inner_tcp_all_headers(skb); + + /* Assume worst case scenario of having longest possible + * metadata prepend - 8B + */ + if (unlikely(hdrlen > NFP_NET_LSO_MAX_HDR_SZ - 8)) + features &= ~NETIF_F_GSO_MASK; + } + + /* VXLAN/GRE check */ + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IP): + l4_hdr = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + l4_hdr = ipv6_hdr(skb)->nexthdr; + break; + default: + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + } + + if (skb->inner_protocol_type != ENCAP_TYPE_ETHER || + skb->inner_protocol != htons(ETH_P_TEB) || + (l4_hdr != IPPROTO_UDP && l4_hdr != IPPROTO_GRE) || + (l4_hdr == IPPROTO_UDP && + (skb_inner_mac_header(skb) - skb_transport_header(skb) != + sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + + return features; +} + +static int +nfp_net_get_phys_port_name(struct net_device *netdev, char *name, size_t len) +{ + struct nfp_net *nn = netdev_priv(netdev); + int n; + + /* If port is defined, devlink_port is registered and devlink core + * is taking care of name formatting. + */ + if (nn->port) + return -EOPNOTSUPP; + + if (nn->dp.is_vf || nn->vnic_no_name) + return -EOPNOTSUPP; + + n = snprintf(name, len, "n%d", nn->id); + if (n >= len) + return -EINVAL; + + return 0; +} + +static int nfp_net_xdp_setup_drv(struct nfp_net *nn, struct netdev_bpf *bpf) +{ + struct bpf_prog *prog = bpf->prog; + struct nfp_net_dp *dp; + int err; + + if (!prog == !nn->dp.xdp_prog) { + WRITE_ONCE(nn->dp.xdp_prog, prog); + xdp_attachment_setup(&nn->xdp, bpf); + return 0; + } + + dp = nfp_net_clone_dp(nn); + if (!dp) + return -ENOMEM; + + dp->xdp_prog = prog; + dp->num_tx_rings += prog ? nn->dp.num_rx_rings : -nn->dp.num_rx_rings; + dp->rx_dma_dir = prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + dp->rx_dma_off = prog ? XDP_PACKET_HEADROOM - nn->dp.rx_offset : 0; + + /* We need RX reconfig to remap the buffers (BIDIR vs FROM_DEV) */ + err = nfp_net_ring_reconfig(nn, dp, bpf->extack); + if (err) + return err; + + xdp_attachment_setup(&nn->xdp, bpf); + return 0; +} + +static int nfp_net_xdp_setup_hw(struct nfp_net *nn, struct netdev_bpf *bpf) +{ + int err; + + err = nfp_app_xdp_offload(nn->app, nn, bpf->prog, bpf->extack); + if (err) + return err; + + xdp_attachment_setup(&nn->xdp_hw, bpf); + return 0; +} + +static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp) +{ + struct nfp_net *nn = netdev_priv(netdev); + + switch (xdp->command) { + case XDP_SETUP_PROG: + return nfp_net_xdp_setup_drv(nn, xdp); + case XDP_SETUP_PROG_HW: + return nfp_net_xdp_setup_hw(nn, xdp); + case XDP_SETUP_XSK_POOL: + return nfp_net_xsk_setup_pool(netdev, xdp->xsk.pool, + xdp->xsk.queue_id); + default: + return nfp_app_bpf(nn->app, nn, xdp); + } +} + +static int nfp_net_set_mac_address(struct net_device *netdev, void *addr) +{ + struct nfp_net *nn = netdev_priv(netdev); + struct sockaddr *saddr = addr; + int err; + + err = eth_prepare_mac_addr_change(netdev, addr); + if (err) + return err; + + nfp_net_write_mac_addr(nn, saddr->sa_data); + + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MACADDR); + if (err) + return err; + + eth_commit_mac_addr_change(netdev, addr); + + return 0; +} + +static int nfp_net_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask, + int nlflags) +{ + struct nfp_net *nn = netdev_priv(dev); + u16 mode; + + if (!(nn->cap & NFP_NET_CFG_CTRL_VEPA)) + return -EOPNOTSUPP; + + mode = (nn->dp.ctrl & NFP_NET_CFG_CTRL_VEPA) ? + BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB; + + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0, + nlflags, filter_mask, NULL); +} + +static int nfp_net_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, + u16 flags, struct netlink_ext_ack *extack) +{ + struct nfp_net *nn = netdev_priv(dev); + struct nlattr *attr, *br_spec; + int rem, err; + u32 new_ctrl; + u16 mode; + + if (!(nn->cap & NFP_NET_CFG_CTRL_VEPA)) + return -EOPNOTSUPP; + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; + + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) != IFLA_BRIDGE_MODE) + continue; + + if (nla_len(attr) < sizeof(mode)) + return -EINVAL; + + new_ctrl = nn->dp.ctrl; + mode = nla_get_u16(attr); + if (mode == BRIDGE_MODE_VEPA) + new_ctrl |= NFP_NET_CFG_CTRL_VEPA; + else if (mode == BRIDGE_MODE_VEB) + new_ctrl &= ~NFP_NET_CFG_CTRL_VEPA; + else + return -EOPNOTSUPP; + + if (new_ctrl == nn->dp.ctrl) + return 0; + + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); + if (!err) + nn->dp.ctrl = new_ctrl; + + return err; + } + + return -EINVAL; +} + +const struct net_device_ops nfp_nfd3_netdev_ops = { + .ndo_init = nfp_app_ndo_init, + .ndo_uninit = nfp_app_ndo_uninit, + .ndo_open = nfp_net_netdev_open, + .ndo_stop = nfp_net_netdev_close, + .ndo_start_xmit = nfp_net_tx, + .ndo_get_stats64 = nfp_net_stat64, + .ndo_vlan_rx_add_vid = nfp_net_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = nfp_net_vlan_rx_kill_vid, + .ndo_set_vf_mac = nfp_app_set_vf_mac, + .ndo_set_vf_vlan = nfp_app_set_vf_vlan, + .ndo_set_vf_rate = nfp_app_set_vf_rate, + .ndo_set_vf_spoofchk = nfp_app_set_vf_spoofchk, + .ndo_set_vf_trust = nfp_app_set_vf_trust, + .ndo_get_vf_config = nfp_app_get_vf_config, + .ndo_set_vf_link_state = nfp_app_set_vf_link_state, + .ndo_setup_tc = nfp_port_setup_tc, + .ndo_tx_timeout = nfp_net_tx_timeout, + .ndo_set_rx_mode = nfp_net_set_rx_mode, + .ndo_change_mtu = nfp_net_change_mtu, + .ndo_set_mac_address = nfp_net_set_mac_address, + .ndo_set_features = nfp_net_set_features, + .ndo_fix_features = nfp_net_fix_features, + .ndo_features_check = nfp_net_features_check, + .ndo_get_phys_port_name = nfp_net_get_phys_port_name, + .ndo_bpf = nfp_net_xdp, + .ndo_xsk_wakeup = nfp_net_xsk_wakeup, + .ndo_get_devlink_port = nfp_devlink_get_devlink_port, + .ndo_bridge_getlink = nfp_net_bridge_getlink, + .ndo_bridge_setlink = nfp_net_bridge_setlink, +}; + +const struct net_device_ops nfp_nfdk_netdev_ops = { + .ndo_init = nfp_app_ndo_init, + .ndo_uninit = nfp_app_ndo_uninit, + .ndo_open = nfp_net_netdev_open, + .ndo_stop = nfp_net_netdev_close, + .ndo_start_xmit = nfp_net_tx, + .ndo_get_stats64 = nfp_net_stat64, + .ndo_vlan_rx_add_vid = nfp_net_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = nfp_net_vlan_rx_kill_vid, + .ndo_set_vf_mac = nfp_app_set_vf_mac, + .ndo_set_vf_vlan = nfp_app_set_vf_vlan, + .ndo_set_vf_rate = nfp_app_set_vf_rate, + .ndo_set_vf_spoofchk = nfp_app_set_vf_spoofchk, + .ndo_set_vf_trust = nfp_app_set_vf_trust, + .ndo_get_vf_config = nfp_app_get_vf_config, + .ndo_set_vf_link_state = nfp_app_set_vf_link_state, + .ndo_setup_tc = nfp_port_setup_tc, + .ndo_tx_timeout = nfp_net_tx_timeout, + .ndo_set_rx_mode = nfp_net_set_rx_mode, + .ndo_change_mtu = nfp_net_change_mtu, + .ndo_set_mac_address = nfp_net_set_mac_address, + .ndo_set_features = nfp_net_set_features, + .ndo_fix_features = nfp_net_fix_features, + .ndo_features_check = nfp_net_features_check, + .ndo_get_phys_port_name = nfp_net_get_phys_port_name, + .ndo_bpf = nfp_net_xdp, + .ndo_get_devlink_port = nfp_devlink_get_devlink_port, + .ndo_bridge_getlink = nfp_net_bridge_getlink, + .ndo_bridge_setlink = nfp_net_bridge_setlink, +}; + +static int nfp_udp_tunnel_sync(struct net_device *netdev, unsigned int table) +{ + struct nfp_net *nn = netdev_priv(netdev); + int i; + + BUILD_BUG_ON(NFP_NET_N_VXLAN_PORTS & 1); + for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i += 2) { + struct udp_tunnel_info ti0, ti1; + + udp_tunnel_nic_get_port(netdev, table, i, &ti0); + udp_tunnel_nic_get_port(netdev, table, i + 1, &ti1); + + nn_writel(nn, NFP_NET_CFG_VXLAN_PORT + i * sizeof(ti0.port), + be16_to_cpu(ti1.port) << 16 | be16_to_cpu(ti0.port)); + } + + return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_VXLAN); +} + +static const struct udp_tunnel_nic_info nfp_udp_tunnels = { + .sync_table = nfp_udp_tunnel_sync, + .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | + UDP_TUNNEL_NIC_INFO_OPEN_ONLY, + .tables = { + { + .n_entries = NFP_NET_N_VXLAN_PORTS, + .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, + }, + }, +}; + +/** + * nfp_net_info() - Print general info about the NIC + * @nn: NFP Net device to reconfigure + */ +void nfp_net_info(struct nfp_net *nn) +{ + nn_info(nn, "NFP-6xxx %sNetdev: TxQs=%d/%d RxQs=%d/%d\n", + nn->dp.is_vf ? "VF " : "", + nn->dp.num_tx_rings, nn->max_tx_rings, + nn->dp.num_rx_rings, nn->max_rx_rings); + nn_info(nn, "VER: %d.%d.%d.%d, Maximum supported MTU: %d\n", + nn->fw_ver.extend, nn->fw_ver.class, + nn->fw_ver.major, nn->fw_ver.minor, + nn->max_mtu); + nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + nn->cap, + nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "", + nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "", + nn->cap & NFP_NET_CFG_CTRL_L2MC ? "L2MCFILT " : "", + nn->cap & NFP_NET_CFG_CTRL_RXCSUM ? "RXCSUM " : "", + nn->cap & NFP_NET_CFG_CTRL_TXCSUM ? "TXCSUM " : "", + nn->cap & NFP_NET_CFG_CTRL_RXVLAN ? "RXVLAN " : "", + nn->cap & NFP_NET_CFG_CTRL_TXVLAN ? "TXVLAN " : "", + nn->cap & NFP_NET_CFG_CTRL_RXQINQ ? "RXQINQ " : "", + nn->cap & NFP_NET_CFG_CTRL_RXVLAN_V2 ? "RXVLANv2 " : "", + nn->cap & NFP_NET_CFG_CTRL_TXVLAN_V2 ? "TXVLANv2 " : "", + nn->cap & NFP_NET_CFG_CTRL_SCATTER ? "SCATTER " : "", + nn->cap & NFP_NET_CFG_CTRL_GATHER ? "GATHER " : "", + nn->cap & NFP_NET_CFG_CTRL_LSO ? "TSO1 " : "", + nn->cap & NFP_NET_CFG_CTRL_LSO2 ? "TSO2 " : "", + nn->cap & NFP_NET_CFG_CTRL_RSS ? "RSS1 " : "", + nn->cap & NFP_NET_CFG_CTRL_RSS2 ? "RSS2 " : "", + nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER ? "CTAG_FILTER " : "", + nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "", + nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "", + nn->cap & NFP_NET_CFG_CTRL_TXRWB ? "TXRWB " : "", + nn->cap & NFP_NET_CFG_CTRL_VEPA ? "VEPA " : "", + nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "", + nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "", + nn->cap & NFP_NET_CFG_CTRL_CSUM_COMPLETE ? + "RXCSUM_COMPLETE " : "", + nn->cap & NFP_NET_CFG_CTRL_LIVE_ADDR ? "LIVE_ADDR " : "", + nfp_app_extra_cap(nn->app, nn)); +} + +/** + * nfp_net_alloc() - Allocate netdev and related structure + * @pdev: PCI device + * @dev_info: NFP ASIC params + * @ctrl_bar: PCI IOMEM with vNIC config memory + * @needs_netdev: Whether to allocate a netdev for this vNIC + * @max_tx_rings: Maximum number of TX rings supported by device + * @max_rx_rings: Maximum number of RX rings supported by device + * + * This function allocates a netdev device and fills in the initial + * part of the @struct nfp_net structure. In case of control device + * nfp_net structure is allocated without the netdev. + * + * Return: NFP Net device structure, or ERR_PTR on error. + */ +struct nfp_net * +nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info, + void __iomem *ctrl_bar, bool needs_netdev, + unsigned int max_tx_rings, unsigned int max_rx_rings) +{ + u64 dma_mask = dma_get_mask(&pdev->dev); + struct nfp_net *nn; + int err; + + if (needs_netdev) { + struct net_device *netdev; + + netdev = alloc_etherdev_mqs(sizeof(struct nfp_net), + max_tx_rings, max_rx_rings); + if (!netdev) + return ERR_PTR(-ENOMEM); + + SET_NETDEV_DEV(netdev, &pdev->dev); + nn = netdev_priv(netdev); + nn->dp.netdev = netdev; + } else { + nn = vzalloc(sizeof(*nn)); + if (!nn) + return ERR_PTR(-ENOMEM); + } + + nn->dp.dev = &pdev->dev; + nn->dp.ctrl_bar = ctrl_bar; + nn->dev_info = dev_info; + nn->pdev = pdev; + nfp_net_get_fw_version(&nn->fw_ver, ctrl_bar); + + switch (FIELD_GET(NFP_NET_CFG_VERSION_DP_MASK, nn->fw_ver.extend)) { + case NFP_NET_CFG_VERSION_DP_NFD3: + nn->dp.ops = &nfp_nfd3_ops; + break; + case NFP_NET_CFG_VERSION_DP_NFDK: + if (nn->fw_ver.major < 5) { + dev_err(&pdev->dev, + "NFDK must use ABI 5 or newer, found: %d\n", + nn->fw_ver.major); + err = -EINVAL; + goto err_free_nn; + } + nn->dp.ops = &nfp_nfdk_ops; + break; + default: + err = -EINVAL; + goto err_free_nn; + } + + if ((dma_mask & nn->dp.ops->dma_mask) != dma_mask) { + dev_err(&pdev->dev, + "DMA mask of loaded firmware: %llx, required DMA mask: %llx\n", + nn->dp.ops->dma_mask, dma_mask); + err = -EINVAL; + goto err_free_nn; + } + + nn->max_tx_rings = max_tx_rings; + nn->max_rx_rings = max_rx_rings; + + nn->dp.num_tx_rings = min_t(unsigned int, + max_tx_rings, num_online_cpus()); + nn->dp.num_rx_rings = min_t(unsigned int, max_rx_rings, + netif_get_num_default_rss_queues()); + + nn->dp.num_r_vecs = max(nn->dp.num_tx_rings, nn->dp.num_rx_rings); + nn->dp.num_r_vecs = min_t(unsigned int, + nn->dp.num_r_vecs, num_online_cpus()); + nn->max_r_vecs = nn->dp.num_r_vecs; + + nn->dp.xsk_pools = kcalloc(nn->max_r_vecs, sizeof(nn->dp.xsk_pools), + GFP_KERNEL); + if (!nn->dp.xsk_pools) { + err = -ENOMEM; + goto err_free_nn; + } + + nn->dp.txd_cnt = NFP_NET_TX_DESCS_DEFAULT; + nn->dp.rxd_cnt = NFP_NET_RX_DESCS_DEFAULT; + + sema_init(&nn->bar_lock, 1); + + spin_lock_init(&nn->reconfig_lock); + spin_lock_init(&nn->link_status_lock); + + timer_setup(&nn->reconfig_timer, nfp_net_reconfig_timer, 0); + + err = nfp_net_tlv_caps_parse(&nn->pdev->dev, nn->dp.ctrl_bar, + &nn->tlv_caps); + if (err) + goto err_free_nn; + + err = nfp_ccm_mbox_alloc(nn); + if (err) + goto err_free_nn; + + return nn; + +err_free_nn: + if (nn->dp.netdev) + free_netdev(nn->dp.netdev); + else + vfree(nn); + return ERR_PTR(err); +} + +/** + * nfp_net_free() - Undo what @nfp_net_alloc() did + * @nn: NFP Net device to reconfigure + */ +void nfp_net_free(struct nfp_net *nn) +{ + WARN_ON(timer_pending(&nn->reconfig_timer) || nn->reconfig_posted); + nfp_ccm_mbox_free(nn); + + kfree(nn->dp.xsk_pools); + if (nn->dp.netdev) + free_netdev(nn->dp.netdev); + else + vfree(nn); +} + +/** + * nfp_net_rss_key_sz() - Get current size of the RSS key + * @nn: NFP Net device instance + * + * Return: size of the RSS key for currently selected hash function. + */ +unsigned int nfp_net_rss_key_sz(struct nfp_net *nn) +{ + switch (nn->rss_hfunc) { + case ETH_RSS_HASH_TOP: + return NFP_NET_CFG_RSS_KEY_SZ; + case ETH_RSS_HASH_XOR: + return 0; + case ETH_RSS_HASH_CRC32: + return 4; + } + + nn_warn(nn, "Unknown hash function: %u\n", nn->rss_hfunc); + return 0; +} + +/** + * nfp_net_rss_init() - Set the initial RSS parameters + * @nn: NFP Net device to reconfigure + */ +static void nfp_net_rss_init(struct nfp_net *nn) +{ + unsigned long func_bit, rss_cap_hfunc; + u32 reg; + + /* Read the RSS function capability and select first supported func */ + reg = nn_readl(nn, NFP_NET_CFG_RSS_CAP); + rss_cap_hfunc = FIELD_GET(NFP_NET_CFG_RSS_CAP_HFUNC, reg); + if (!rss_cap_hfunc) + rss_cap_hfunc = FIELD_GET(NFP_NET_CFG_RSS_CAP_HFUNC, + NFP_NET_CFG_RSS_TOEPLITZ); + + func_bit = find_first_bit(&rss_cap_hfunc, NFP_NET_CFG_RSS_HFUNCS); + if (func_bit == NFP_NET_CFG_RSS_HFUNCS) { + dev_warn(nn->dp.dev, + "Bad RSS config, defaulting to Toeplitz hash\n"); + func_bit = ETH_RSS_HASH_TOP_BIT; + } + nn->rss_hfunc = 1 << func_bit; + + netdev_rss_key_fill(nn->rss_key, nfp_net_rss_key_sz(nn)); + + nfp_net_rss_init_itbl(nn); + + /* Enable IPv4/IPv6 TCP by default */ + nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP | + NFP_NET_CFG_RSS_IPV6_TCP | + FIELD_PREP(NFP_NET_CFG_RSS_HFUNC, nn->rss_hfunc) | + NFP_NET_CFG_RSS_MASK; +} + +/** + * nfp_net_irqmod_init() - Set the initial IRQ moderation parameters + * @nn: NFP Net device to reconfigure + */ +static void nfp_net_irqmod_init(struct nfp_net *nn) +{ + nn->rx_coalesce_usecs = 50; + nn->rx_coalesce_max_frames = 64; + nn->tx_coalesce_usecs = 50; + nn->tx_coalesce_max_frames = 64; + + nn->rx_coalesce_adapt_on = true; + nn->tx_coalesce_adapt_on = true; +} + +static void nfp_net_netdev_init(struct nfp_net *nn) +{ + struct net_device *netdev = nn->dp.netdev; + + nfp_net_write_mac_addr(nn, nn->dp.netdev->dev_addr); + + netdev->mtu = nn->dp.mtu; + + /* Advertise/enable offloads based on capabilities + * + * Note: netdev->features show the currently enabled features + * and netdev->hw_features advertises which features are + * supported. By default we enable most features. + */ + if (nn->cap & NFP_NET_CFG_CTRL_LIVE_ADDR) + netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + + netdev->hw_features = NETIF_F_HIGHDMA; + if (nn->cap & NFP_NET_CFG_CTRL_RXCSUM_ANY) { + netdev->hw_features |= NETIF_F_RXCSUM; + nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RXCSUM_ANY; + } + if (nn->cap & NFP_NET_CFG_CTRL_TXCSUM) { + netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXCSUM; + } + if (nn->cap & NFP_NET_CFG_CTRL_GATHER) { + netdev->hw_features |= NETIF_F_SG; + nn->dp.ctrl |= NFP_NET_CFG_CTRL_GATHER; + } + if ((nn->cap & NFP_NET_CFG_CTRL_LSO && nn->fw_ver.major > 2) || + nn->cap & NFP_NET_CFG_CTRL_LSO2) { + netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; + nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_LSO2 ?: + NFP_NET_CFG_CTRL_LSO; + } + if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY) + netdev->hw_features |= NETIF_F_RXHASH; + if (nn->cap & NFP_NET_CFG_CTRL_VXLAN) { + if (nn->cap & NFP_NET_CFG_CTRL_LSO) { + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; + } + netdev->udp_tunnel_nic_info = &nfp_udp_tunnels; + nn->dp.ctrl |= NFP_NET_CFG_CTRL_VXLAN; + } + if (nn->cap & NFP_NET_CFG_CTRL_NVGRE) { + if (nn->cap & NFP_NET_CFG_CTRL_LSO) + netdev->hw_features |= NETIF_F_GSO_GRE; + nn->dp.ctrl |= NFP_NET_CFG_CTRL_NVGRE; + } + if (nn->cap & (NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE)) + netdev->hw_enc_features = netdev->hw_features; + + netdev->vlan_features = netdev->hw_features; + + if (nn->cap & NFP_NET_CFG_CTRL_RXVLAN_ANY) { + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; + nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RXVLAN_V2 ?: + NFP_NET_CFG_CTRL_RXVLAN; + } + if (nn->cap & NFP_NET_CFG_CTRL_TXVLAN_ANY) { + if (nn->cap & NFP_NET_CFG_CTRL_LSO2) { + nn_warn(nn, "Device advertises both TSO2 and TXVLAN. Refusing to enable TXVLAN.\n"); + } else { + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; + nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_TXVLAN_V2 ?: + NFP_NET_CFG_CTRL_TXVLAN; + } + } + if (nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER) { + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + nn->dp.ctrl |= NFP_NET_CFG_CTRL_CTAG_FILTER; + } + if (nn->cap & NFP_NET_CFG_CTRL_RXQINQ) { + netdev->hw_features |= NETIF_F_HW_VLAN_STAG_RX; + nn->dp.ctrl |= NFP_NET_CFG_CTRL_RXQINQ; + } + + netdev->features = netdev->hw_features; + + if (nfp_app_has_tc(nn->app) && nn->port) + netdev->hw_features |= NETIF_F_HW_TC; + + /* C-Tag strip and S-Tag strip can't be supported simultaneously, + * so enable C-Tag strip and disable S-Tag strip by default. + */ + netdev->features &= ~NETIF_F_HW_VLAN_STAG_RX; + nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_RXQINQ; + + /* Finalise the netdev setup */ + switch (nn->dp.ops->version) { + case NFP_NFD_VER_NFD3: + netdev->netdev_ops = &nfp_nfd3_netdev_ops; + break; + case NFP_NFD_VER_NFDK: + netdev->netdev_ops = &nfp_nfdk_netdev_ops; + break; + } + + netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000); + + /* MTU range: 68 - hw-specific max */ + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = nn->max_mtu; + + netif_set_tso_max_segs(netdev, NFP_NET_LSO_MAX_SEGS); + + netif_carrier_off(netdev); + + nfp_net_set_ethtool_ops(netdev); +} + +static int nfp_net_read_caps(struct nfp_net *nn) +{ + /* Get some of the read-only fields from the BAR */ + nn->cap = nn_readl(nn, NFP_NET_CFG_CAP); + nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU); + + /* ABI 4.x and ctrl vNIC always use chained metadata, in other cases + * we allow use of non-chained metadata if RSS(v1) is the only + * advertised capability requiring metadata. + */ + nn->dp.chained_metadata_format = nn->fw_ver.major == 4 || + !nn->dp.netdev || + !(nn->cap & NFP_NET_CFG_CTRL_RSS) || + nn->cap & NFP_NET_CFG_CTRL_CHAIN_META; + /* RSS(v1) uses non-chained metadata format, except in ABI 4.x where + * it has the same meaning as RSSv2. + */ + if (nn->dp.chained_metadata_format && nn->fw_ver.major != 4) + nn->cap &= ~NFP_NET_CFG_CTRL_RSS; + + /* Determine RX packet/metadata boundary offset */ + if (nn->fw_ver.major >= 2) { + u32 reg; + + reg = nn_readl(nn, NFP_NET_CFG_RX_OFFSET); + if (reg > NFP_NET_MAX_PREPEND) { + nn_err(nn, "Invalid rx offset: %d\n", reg); + return -EINVAL; + } + nn->dp.rx_offset = reg; + } else { + nn->dp.rx_offset = NFP_NET_RX_OFFSET; + } + + /* Mask out NFD-version-specific features */ + nn->cap &= nn->dp.ops->cap_mask; + + /* For control vNICs mask out the capabilities app doesn't want. */ + if (!nn->dp.netdev) + nn->cap &= nn->app->type->ctrl_cap_mask; + + return 0; +} + +/** + * nfp_net_init() - Initialise/finalise the nfp_net structure + * @nn: NFP Net device structure + * + * Return: 0 on success or negative errno on error. + */ +int nfp_net_init(struct nfp_net *nn) +{ + int err; + + nn->dp.rx_dma_dir = DMA_FROM_DEVICE; + + err = nfp_net_read_caps(nn); + if (err) + return err; + + /* Set default MTU and Freelist buffer size */ + if (!nfp_net_is_data_vnic(nn) && nn->app->ctrl_mtu) { + nn->dp.mtu = min(nn->app->ctrl_mtu, nn->max_mtu); + } else if (nn->max_mtu < NFP_NET_DEFAULT_MTU) { + nn->dp.mtu = nn->max_mtu; + } else { + nn->dp.mtu = NFP_NET_DEFAULT_MTU; + } + nn->dp.fl_bufsz = nfp_net_calc_fl_bufsz(&nn->dp); + + if (nfp_app_ctrl_uses_data_vnics(nn->app)) + nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_CMSG_DATA; + + if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY) { + nfp_net_rss_init(nn); + nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RSS2 ?: + NFP_NET_CFG_CTRL_RSS; + } + + /* Allow L2 Broadcast and Multicast through by default, if supported */ + if (nn->cap & NFP_NET_CFG_CTRL_L2BC) + nn->dp.ctrl |= NFP_NET_CFG_CTRL_L2BC; + + /* Allow IRQ moderation, if supported */ + if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) { + nfp_net_irqmod_init(nn); + nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD; + } + + /* Enable TX pointer writeback, if supported */ + if (nn->cap & NFP_NET_CFG_CTRL_TXRWB) + nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXRWB; + + /* Stash the re-configuration queue away. First odd queue in TX Bar */ + nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ; + + /* Make sure the FW knows the netdev is supposed to be disabled here */ + nn_writel(nn, NFP_NET_CFG_CTRL, 0); + nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0); + nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RING | + NFP_NET_CFG_UPDATE_GEN); + if (err) + return err; + + if (nn->dp.netdev) { + nfp_net_netdev_init(nn); + + err = nfp_ccm_mbox_init(nn); + if (err) + return err; + + err = nfp_net_tls_init(nn); + if (err) + goto err_clean_mbox; + } + + nfp_net_vecs_init(nn); + + if (!nn->dp.netdev) + return 0; + return register_netdev(nn->dp.netdev); + +err_clean_mbox: + nfp_ccm_mbox_clean(nn); + return err; +} + +/** + * nfp_net_clean() - Undo what nfp_net_init() did. + * @nn: NFP Net device structure + */ +void nfp_net_clean(struct nfp_net *nn) +{ + if (!nn->dp.netdev) + return; + + unregister_netdev(nn->dp.netdev); + nfp_ccm_mbox_clean(nn); + nfp_net_reconfig_wait_posted(nn); +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c new file mode 100644 index 000000000..c3a763134 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2018 Netronome Systems, Inc. */ + +#include <linux/bitfield.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/types.h> + +#include "nfp_net_ctrl.h" +#include "nfp_net.h" + +static void nfp_net_tlv_caps_reset(struct nfp_net_tlv_caps *caps) +{ + memset(caps, 0, sizeof(*caps)); + caps->me_freq_mhz = 1200; + caps->mbox_off = NFP_NET_CFG_MBOX_BASE; + caps->mbox_len = NFP_NET_CFG_MBOX_VAL_MAX_SZ; +} + +static bool +nfp_net_tls_parse_crypto_ops(struct device *dev, struct nfp_net_tlv_caps *caps, + u8 __iomem *ctrl_mem, u8 __iomem *data, + unsigned int length, unsigned int offset, + bool rx_stream_scan) +{ + /* Ignore the legacy TLV if new one was already parsed */ + if (caps->tls_resync_ss && !rx_stream_scan) + return true; + + if (length < 32) { + dev_err(dev, + "CRYPTO OPS TLV should be at least 32B, is %dB offset:%u\n", + length, offset); + return false; + } + + caps->crypto_ops = readl(data); + caps->crypto_enable_off = data - ctrl_mem + 16; + caps->tls_resync_ss = rx_stream_scan; + + return true; +} + +int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem, + struct nfp_net_tlv_caps *caps) +{ + u8 __iomem *data = ctrl_mem + NFP_NET_CFG_TLV_BASE; + u8 __iomem *end = ctrl_mem + NFP_NET_CFG_BAR_SZ; + u32 hdr; + + nfp_net_tlv_caps_reset(caps); + + hdr = readl(data); + if (!hdr) + return 0; + + while (true) { + unsigned int length, offset; + u32 hdr = readl(data); + + length = FIELD_GET(NFP_NET_CFG_TLV_HEADER_LENGTH, hdr); + offset = data - ctrl_mem; + + /* Advance past the header */ + data += 4; + + if (length % NFP_NET_CFG_TLV_LENGTH_INC) { + dev_err(dev, "TLV size not multiple of %u offset:%u len:%u\n", + NFP_NET_CFG_TLV_LENGTH_INC, offset, length); + return -EINVAL; + } + if (data + length > end) { + dev_err(dev, "oversized TLV offset:%u len:%u\n", + offset, length); + return -EINVAL; + } + + switch (FIELD_GET(NFP_NET_CFG_TLV_HEADER_TYPE, hdr)) { + case NFP_NET_CFG_TLV_TYPE_UNKNOWN: + dev_err(dev, "NULL TLV at offset:%u\n", offset); + return -EINVAL; + case NFP_NET_CFG_TLV_TYPE_RESERVED: + break; + case NFP_NET_CFG_TLV_TYPE_END: + if (!length) + return 0; + + dev_err(dev, "END TLV should be empty, has offset:%u len:%d\n", + offset, length); + return -EINVAL; + case NFP_NET_CFG_TLV_TYPE_ME_FREQ: + if (length != 4) { + dev_err(dev, + "ME FREQ TLV should be 4B, is %dB offset:%u\n", + length, offset); + return -EINVAL; + } + + caps->me_freq_mhz = readl(data); + break; + case NFP_NET_CFG_TLV_TYPE_MBOX: + if (!length) { + caps->mbox_off = 0; + caps->mbox_len = 0; + } else { + caps->mbox_off = data - ctrl_mem; + caps->mbox_len = length; + } + break; + case NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL0: + case NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL1: + dev_warn(dev, + "experimental TLV type:%u offset:%u len:%u\n", + FIELD_GET(NFP_NET_CFG_TLV_HEADER_TYPE, hdr), + offset, length); + break; + case NFP_NET_CFG_TLV_TYPE_REPR_CAP: + if (length < 4) { + dev_err(dev, "REPR CAP TLV short %dB < 4B offset:%u\n", + length, offset); + return -EINVAL; + } + + caps->repr_cap = readl(data); + break; + case NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES: + if (length >= 4) + caps->mbox_cmsg_types = readl(data); + break; + case NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS: + if (!nfp_net_tls_parse_crypto_ops(dev, caps, ctrl_mem, + data, length, offset, + false)) + return -EINVAL; + break; + case NFP_NET_CFG_TLV_TYPE_VNIC_STATS: + if ((data - ctrl_mem) % 8) { + dev_warn(dev, "VNIC STATS TLV misaligned, ignoring offset:%u len:%u\n", + offset, length); + break; + } + caps->vnic_stats_off = data - ctrl_mem; + caps->vnic_stats_cnt = length / 10; + break; + case NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS_RX_SCAN: + if (!nfp_net_tls_parse_crypto_ops(dev, caps, ctrl_mem, + data, length, offset, + true)) + return -EINVAL; + break; + default: + if (!FIELD_GET(NFP_NET_CFG_TLV_HEADER_REQUIRED, hdr)) + break; + + dev_err(dev, "unknown TLV type:%u offset:%u len:%u\n", + FIELD_GET(NFP_NET_CFG_TLV_HEADER_TYPE, hdr), + offset, length); + return -EINVAL; + } + + data += length; + if (data + 4 > end) { + dev_err(dev, "reached end of BAR without END TLV\n"); + return -EINVAL; + } + } + + /* Not reached */ + return -EINVAL; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h new file mode 100644 index 000000000..6714d5e8f --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -0,0 +1,541 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +/* nfp_net_ctrl.h + * Netronome network device driver: Control BAR layout + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + * Brad Petrus <brad.petrus@netronome.com> + */ + +#ifndef _NFP_NET_CTRL_H_ +#define _NFP_NET_CTRL_H_ + +#include <linux/types.h> + +/* 64-bit per app capabilities */ +#define NFP_NET_APP_CAP_SP_INDIFF BIT_ULL(0) /* indifferent to port speed */ + +/* Configuration BAR size. + * + * The configuration BAR is 8K in size, but due to + * THB-350, 32k needs to be reserved. + */ +#define NFP_NET_CFG_BAR_SZ (32 * 1024) + +/* Offset in Freelist buffer where packet starts on RX */ +#define NFP_NET_RX_OFFSET 32 + +/* LSO parameters + * %NFP_NET_LSO_MAX_HDR_SZ: Maximum header size supported for LSO frames + * %NFP_NET_LSO_MAX_SEGS: Maximum number of segments LSO frame can produce + */ +#define NFP_NET_LSO_MAX_HDR_SZ 255 +#define NFP_NET_LSO_MAX_SEGS 64 + +/* working with metadata vlan api (NFD version >= 2.0) */ +#define NFP_NET_META_VLAN_STRIP BIT(31) +#define NFP_NET_META_VLAN_TPID_MASK GENMASK(19, 16) +#define NFP_NET_META_VLAN_TCI_MASK GENMASK(15, 0) + +/* Prepend field types */ +#define NFP_NET_META_FIELD_SIZE 4 +#define NFP_NET_META_HASH 1 /* next field carries hash type */ +#define NFP_NET_META_MARK 2 +#define NFP_NET_META_VLAN 4 /* ctag or stag type */ +#define NFP_NET_META_PORTID 5 +#define NFP_NET_META_CSUM 6 /* checksum complete type */ +#define NFP_NET_META_CONN_HANDLE 7 +#define NFP_NET_META_RESYNC_INFO 8 /* RX resync info request */ + +#define NFP_META_PORT_ID_CTRL ~0U + +/* Prepend field sizes */ +#define NFP_NET_META_VLAN_SIZE 4 +#define NFP_NET_META_PORTID_SIZE 4 +#define NFP_NET_META_CONN_HANDLE_SIZE 8 +/* Hash type pre-pended when a RSS hash was computed */ +#define NFP_NET_RSS_NONE 0 +#define NFP_NET_RSS_IPV4 1 +#define NFP_NET_RSS_IPV6 2 +#define NFP_NET_RSS_IPV6_EX 3 +#define NFP_NET_RSS_IPV4_TCP 4 +#define NFP_NET_RSS_IPV6_TCP 5 +#define NFP_NET_RSS_IPV6_EX_TCP 6 +#define NFP_NET_RSS_IPV4_UDP 7 +#define NFP_NET_RSS_IPV6_UDP 8 +#define NFP_NET_RSS_IPV6_EX_UDP 9 + +/* Ring counts + * %NFP_NET_TXR_MAX: Maximum number of TX rings + * %NFP_NET_RXR_MAX: Maximum number of RX rings + */ +#define NFP_NET_TXR_MAX 64 +#define NFP_NET_RXR_MAX 64 + +/* Read/Write config words (0x0000 - 0x002c) + * %NFP_NET_CFG_CTRL: Global control + * %NFP_NET_CFG_UPDATE: Indicate which fields are updated + * %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings + * %NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings + * %NFP_NET_CFG_MTU: Set MTU size + * %NFP_NET_CFG_FLBUFSZ: Set freelist buffer size (must be larger than MTU) + * %NFP_NET_CFG_EXN: MSI-X table entry for exceptions + * %NFP_NET_CFG_LSC: MSI-X table entry for link state changes + * %NFP_NET_CFG_MACADDR: MAC address + * + * TODO: + * - define Error details in UPDATE + */ +#define NFP_NET_CFG_CTRL 0x0000 +#define NFP_NET_CFG_CTRL_ENABLE (0x1 << 0) /* Global enable */ +#define NFP_NET_CFG_CTRL_PROMISC (0x1 << 1) /* Enable Promisc mode */ +#define NFP_NET_CFG_CTRL_L2BC (0x1 << 2) /* Allow L2 Broadcast */ +#define NFP_NET_CFG_CTRL_L2MC (0x1 << 3) /* Allow L2 Multicast */ +#define NFP_NET_CFG_CTRL_RXCSUM (0x1 << 4) /* Enable RX Checksum */ +#define NFP_NET_CFG_CTRL_TXCSUM (0x1 << 5) /* Enable TX Checksum */ +#define NFP_NET_CFG_CTRL_RXVLAN (0x1 << 6) /* Enable VLAN strip */ +#define NFP_NET_CFG_CTRL_TXVLAN (0x1 << 7) /* Enable VLAN insert */ +#define NFP_NET_CFG_CTRL_SCATTER (0x1 << 8) /* Scatter DMA */ +#define NFP_NET_CFG_CTRL_GATHER (0x1 << 9) /* Gather DMA */ +#define NFP_NET_CFG_CTRL_LSO (0x1 << 10) /* LSO/TSO (version 1) */ +#define NFP_NET_CFG_CTRL_CTAG_FILTER (0x1 << 11) /* VLAN CTAG filtering */ +#define NFP_NET_CFG_CTRL_CMSG_DATA (0x1 << 12) /* RX cmsgs on data Qs */ +#define NFP_NET_CFG_CTRL_RXQINQ (0x1 << 13) /* Enable S-tag strip */ +#define NFP_NET_CFG_CTRL_RXVLAN_V2 (0x1 << 15) /* Enable C-tag strip */ +#define NFP_NET_CFG_CTRL_RINGCFG (0x1 << 16) /* Ring runtime changes */ +#define NFP_NET_CFG_CTRL_RSS (0x1 << 17) /* RSS (version 1) */ +#define NFP_NET_CFG_CTRL_IRQMOD (0x1 << 18) /* Interrupt moderation */ +#define NFP_NET_CFG_CTRL_MSIXAUTO (0x1 << 20) /* MSI-X auto-masking */ +#define NFP_NET_CFG_CTRL_TXRWB (0x1 << 21) /* Write-back of TX ring*/ +#define NFP_NET_CFG_CTRL_VEPA (0x1 << 22) /* Enable VEPA mode */ +#define NFP_NET_CFG_CTRL_TXVLAN_V2 (0x1 << 23) /* Enable VLAN C-tag insert*/ +#define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */ +#define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */ +#define NFP_NET_CFG_CTRL_BPF (0x1 << 27) /* BPF offload capable */ +#define NFP_NET_CFG_CTRL_LSO2 (0x1 << 28) /* LSO/TSO (version 2) */ +#define NFP_NET_CFG_CTRL_RSS2 (0x1 << 29) /* RSS (version 2) */ +#define NFP_NET_CFG_CTRL_CSUM_COMPLETE (0x1 << 30) /* Checksum complete */ +#define NFP_NET_CFG_CTRL_LIVE_ADDR (0x1 << 31) /* live MAC addr change */ + +#define NFP_NET_CFG_CTRL_LSO_ANY (NFP_NET_CFG_CTRL_LSO | \ + NFP_NET_CFG_CTRL_LSO2) +#define NFP_NET_CFG_CTRL_RSS_ANY (NFP_NET_CFG_CTRL_RSS | \ + NFP_NET_CFG_CTRL_RSS2) +#define NFP_NET_CFG_CTRL_RXCSUM_ANY (NFP_NET_CFG_CTRL_RXCSUM | \ + NFP_NET_CFG_CTRL_CSUM_COMPLETE) +#define NFP_NET_CFG_CTRL_CHAIN_META (NFP_NET_CFG_CTRL_RSS2 | \ + NFP_NET_CFG_CTRL_CSUM_COMPLETE) +#define NFP_NET_CFG_CTRL_RXVLAN_ANY (NFP_NET_CFG_CTRL_RXVLAN | \ + NFP_NET_CFG_CTRL_RXVLAN_V2) +#define NFP_NET_CFG_CTRL_TXVLAN_ANY (NFP_NET_CFG_CTRL_TXVLAN | \ + NFP_NET_CFG_CTRL_TXVLAN_V2) + +#define NFP_NET_CFG_UPDATE 0x0004 +#define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */ +#define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */ +#define NFP_NET_CFG_UPDATE_RSS (0x1 << 2) /* RSS config change */ +#define NFP_NET_CFG_UPDATE_TXRPRIO (0x1 << 3) /* TX Ring prio change */ +#define NFP_NET_CFG_UPDATE_RXRPRIO (0x1 << 4) /* RX Ring prio change */ +#define NFP_NET_CFG_UPDATE_MSIX (0x1 << 5) /* MSI-X change */ +#define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */ +#define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */ +#define NFP_NET_CFG_UPDATE_VXLAN (0x1 << 9) /* VXLAN port change */ +#define NFP_NET_CFG_UPDATE_BPF (0x1 << 10) /* BPF program load */ +#define NFP_NET_CFG_UPDATE_MACADDR (0x1 << 11) /* MAC address change */ +#define NFP_NET_CFG_UPDATE_MBOX (0x1 << 12) /* Mailbox update */ +#define NFP_NET_CFG_UPDATE_VF (0x1 << 13) /* VF settings change */ +#define NFP_NET_CFG_UPDATE_CRYPTO (0x1 << 14) /* Crypto on/off */ +#define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */ +#define NFP_NET_CFG_TXRS_ENABLE 0x0008 +#define NFP_NET_CFG_RXRS_ENABLE 0x0010 +#define NFP_NET_CFG_MTU 0x0018 +#define NFP_NET_CFG_FLBUFSZ 0x001c +#define NFP_NET_CFG_EXN 0x001f +#define NFP_NET_CFG_LSC 0x0020 +#define NFP_NET_CFG_MACADDR 0x0024 + +/* Read-only words (0x0030 - 0x0050): + * %NFP_NET_CFG_VERSION: Firmware version number + * %NFP_NET_CFG_STS: Status + * %NFP_NET_CFG_CAP: Capabilities (same bits as %NFP_NET_CFG_CTRL) + * %NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings + * %NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings + * %NFP_NET_CFG_MAX_MTU: Maximum support MTU + * %NFP_NET_CFG_START_TXQ: Start Queue Control Queue to use for TX (PF only) + * %NFP_NET_CFG_START_RXQ: Start Queue Control Queue to use for RX (PF only) + * + * TODO: + * - define more STS bits + */ +#define NFP_NET_CFG_VERSION 0x0030 +#define NFP_NET_CFG_VERSION_RESERVED_MASK (0xfe << 24) +#define NFP_NET_CFG_VERSION_DP_NFD3 0 +#define NFP_NET_CFG_VERSION_DP_NFDK 1 +#define NFP_NET_CFG_VERSION_DP_MASK 1 +#define NFP_NET_CFG_VERSION_CLASS_MASK (0xff << 16) +#define NFP_NET_CFG_VERSION_CLASS(x) (((x) & 0xff) << 16) +#define NFP_NET_CFG_VERSION_CLASS_GENERIC 0 +#define NFP_NET_CFG_VERSION_MAJOR_MASK (0xff << 8) +#define NFP_NET_CFG_VERSION_MAJOR(x) (((x) & 0xff) << 8) +#define NFP_NET_CFG_VERSION_MINOR_MASK (0xff << 0) +#define NFP_NET_CFG_VERSION_MINOR(x) (((x) & 0xff) << 0) +#define NFP_NET_CFG_STS 0x0034 +#define NFP_NET_CFG_STS_LINK (0x1 << 0) /* Link up or down */ +/* Link rate */ +#define NFP_NET_CFG_STS_LINK_RATE_SHIFT 1 +#define NFP_NET_CFG_STS_LINK_RATE_MASK 0xF +#define NFP_NET_CFG_STS_LINK_RATE \ + (NFP_NET_CFG_STS_LINK_RATE_MASK << NFP_NET_CFG_STS_LINK_RATE_SHIFT) +#define NFP_NET_CFG_STS_LINK_RATE_UNSUPPORTED 0 +#define NFP_NET_CFG_STS_LINK_RATE_UNKNOWN 1 +#define NFP_NET_CFG_STS_LINK_RATE_1G 2 +#define NFP_NET_CFG_STS_LINK_RATE_10G 3 +#define NFP_NET_CFG_STS_LINK_RATE_25G 4 +#define NFP_NET_CFG_STS_LINK_RATE_40G 5 +#define NFP_NET_CFG_STS_LINK_RATE_50G 6 +#define NFP_NET_CFG_STS_LINK_RATE_100G 7 +/* NSP Link rate is a 16-bit word. It's determined by NSP and + * written to CFG BAR by NFP driver. + */ +#define NFP_NET_CFG_STS_NSP_LINK_RATE 0x0036 +#define NFP_NET_CFG_CAP 0x0038 +#define NFP_NET_CFG_MAX_TXRINGS 0x003c +#define NFP_NET_CFG_MAX_RXRINGS 0x0040 +#define NFP_NET_CFG_MAX_MTU 0x0044 +/* Next two words are being used by VFs for solving THB350 issue */ +#define NFP_NET_CFG_START_TXQ 0x0048 +#define NFP_NET_CFG_START_RXQ 0x004c + +/* Prepend configuration + */ +#define NFP_NET_CFG_RX_OFFSET 0x0050 +#define NFP_NET_CFG_RX_OFFSET_DYNAMIC 0 /* Prepend mode */ + +/* RSS capabilities + * %NFP_NET_CFG_RSS_CAP_HFUNC: supported hash functions (same bits as + * %NFP_NET_CFG_RSS_HFUNC) + */ +#define NFP_NET_CFG_RSS_CAP 0x0054 +#define NFP_NET_CFG_RSS_CAP_HFUNC 0xff000000 + +/* TLV area start + * %NFP_NET_CFG_TLV_BASE: start anchor of the TLV area + */ +#define NFP_NET_CFG_TLV_BASE 0x0058 + +/* VXLAN/UDP encap configuration + * %NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports + * %NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes + */ +#define NFP_NET_CFG_VXLAN_PORT 0x0060 +#define NFP_NET_CFG_VXLAN_SZ 0x0008 + +/* BPF section + * %NFP_NET_CFG_BPF_ABI: BPF ABI version + * %NFP_NET_CFG_BPF_CAP: BPF capabilities + * %NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes + * %NFP_NET_CFG_BPF_START: Offset at which BPF will be loaded + * %NFP_NET_CFG_BPF_DONE: Offset to jump to on exit + * %NFP_NET_CFG_BPF_STACK_SZ: Total size of stack area in 64B chunks + * %NFP_NET_CFG_BPF_INL_MTU: Packet data split offset in 64B chunks + * %NFP_NET_CFG_BPF_SIZE: Size of the JITed BPF code in instructions + * %NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code + */ +#define NFP_NET_CFG_BPF_ABI 0x0080 +#define NFP_NET_CFG_BPF_CAP 0x0081 +#define NFP_NET_BPF_CAP_RELO (1 << 0) /* seamless reload */ +#define NFP_NET_CFG_BPF_MAX_LEN 0x0082 +#define NFP_NET_CFG_BPF_START 0x0084 +#define NFP_NET_CFG_BPF_DONE 0x0086 +#define NFP_NET_CFG_BPF_STACK_SZ 0x0088 +#define NFP_NET_CFG_BPF_INL_MTU 0x0089 +#define NFP_NET_CFG_BPF_SIZE 0x008e +#define NFP_NET_CFG_BPF_ADDR 0x0090 +#define NFP_NET_CFG_BPF_CFG_8CTX (1 << 0) /* 8ctx mode */ +#define NFP_NET_CFG_BPF_CFG_MASK 7ULL +#define NFP_NET_CFG_BPF_ADDR_MASK (~NFP_NET_CFG_BPF_CFG_MASK) + +/* 40B reserved for future use (0x0098 - 0x00c0) + */ +#define NFP_NET_CFG_RESERVED 0x0098 +#define NFP_NET_CFG_RESERVED_SZ 0x0028 + +/* RSS configuration (0x0100 - 0x01ac): + * Used only when NFP_NET_CFG_CTRL_RSS is enabled + * %NFP_NET_CFG_RSS_CFG: RSS configuration word + * %NFP_NET_CFG_RSS_KEY: RSS "secret" key + * %NFP_NET_CFG_RSS_ITBL: RSS indirection table + */ +#define NFP_NET_CFG_RSS_BASE 0x0100 +#define NFP_NET_CFG_RSS_CTRL NFP_NET_CFG_RSS_BASE +#define NFP_NET_CFG_RSS_MASK (0x7f) +#define NFP_NET_CFG_RSS_MASK_of(_x) ((_x) & 0x7f) +#define NFP_NET_CFG_RSS_IPV4 (1 << 8) /* RSS for IPv4 */ +#define NFP_NET_CFG_RSS_IPV6 (1 << 9) /* RSS for IPv6 */ +#define NFP_NET_CFG_RSS_IPV4_TCP (1 << 10) /* RSS for IPv4/TCP */ +#define NFP_NET_CFG_RSS_IPV4_UDP (1 << 11) /* RSS for IPv4/UDP */ +#define NFP_NET_CFG_RSS_IPV6_TCP (1 << 12) /* RSS for IPv6/TCP */ +#define NFP_NET_CFG_RSS_IPV6_UDP (1 << 13) /* RSS for IPv6/UDP */ +#define NFP_NET_CFG_RSS_HFUNC 0xff000000 +#define NFP_NET_CFG_RSS_TOEPLITZ (1 << 24) /* Use Toeplitz hash */ +#define NFP_NET_CFG_RSS_XOR (1 << 25) /* Use XOR as hash */ +#define NFP_NET_CFG_RSS_CRC32 (1 << 26) /* Use CRC32 as hash */ +#define NFP_NET_CFG_RSS_HFUNCS 3 +#define NFP_NET_CFG_RSS_KEY (NFP_NET_CFG_RSS_BASE + 0x4) +#define NFP_NET_CFG_RSS_KEY_SZ 0x28 +#define NFP_NET_CFG_RSS_ITBL (NFP_NET_CFG_RSS_BASE + 0x4 + \ + NFP_NET_CFG_RSS_KEY_SZ) +#define NFP_NET_CFG_RSS_ITBL_SZ 0x80 + +/* TX ring configuration (0x200 - 0x800) + * %NFP_NET_CFG_TXR_BASE: Base offset for TX ring configuration + * %NFP_NET_CFG_TXR_ADDR: Per TX ring DMA address (8B entries) + * %NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries) + * %NFP_NET_CFG_TXR_SZ: Per TX ring ring size (1B entries) + * %NFP_NET_CFG_TXR_VEC: Per TX ring MSI-X table entry (1B entries) + * %NFP_NET_CFG_TXR_PRIO: Per TX ring priority (1B entries) + * %NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet + */ +#define NFP_NET_CFG_TXR_BASE 0x0200 +#define NFP_NET_CFG_TXR_ADDR(_x) (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8)) +#define NFP_NET_CFG_TXR_WB_ADDR(_x) (NFP_NET_CFG_TXR_BASE + 0x200 + \ + ((_x) * 0x8)) +#define NFP_NET_CFG_TXR_SZ(_x) (NFP_NET_CFG_TXR_BASE + 0x400 + (_x)) +#define NFP_NET_CFG_TXR_VEC(_x) (NFP_NET_CFG_TXR_BASE + 0x440 + (_x)) +#define NFP_NET_CFG_TXR_PRIO(_x) (NFP_NET_CFG_TXR_BASE + 0x480 + (_x)) +#define NFP_NET_CFG_TXR_IRQ_MOD(_x) (NFP_NET_CFG_TXR_BASE + 0x500 + \ + ((_x) * 0x4)) + +/* RX ring configuration (0x0800 - 0x0c00) + * %NFP_NET_CFG_RXR_BASE: Base offset for RX ring configuration + * %NFP_NET_CFG_RXR_ADDR: Per RX ring DMA address (8B entries) + * %NFP_NET_CFG_RXR_SZ: Per RX ring ring size (1B entries) + * %NFP_NET_CFG_RXR_VEC: Per RX ring MSI-X table entry (1B entries) + * %NFP_NET_CFG_RXR_PRIO: Per RX ring priority (1B entries) + * %NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries) + */ +#define NFP_NET_CFG_RXR_BASE 0x0800 +#define NFP_NET_CFG_RXR_ADDR(_x) (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8)) +#define NFP_NET_CFG_RXR_SZ(_x) (NFP_NET_CFG_RXR_BASE + 0x200 + (_x)) +#define NFP_NET_CFG_RXR_VEC(_x) (NFP_NET_CFG_RXR_BASE + 0x240 + (_x)) +#define NFP_NET_CFG_RXR_PRIO(_x) (NFP_NET_CFG_RXR_BASE + 0x280 + (_x)) +#define NFP_NET_CFG_RXR_IRQ_MOD(_x) (NFP_NET_CFG_RXR_BASE + 0x300 + \ + ((_x) * 0x4)) + +/* Interrupt Control/Cause registers (0x0c00 - 0x0d00) + * These registers are only used when MSI-X auto-masking is not + * enabled (%NFP_NET_CFG_CTRL_MSIXAUTO not set). The array is index + * by MSI-X entry and are 1B in size. If an entry is zero, the + * corresponding entry is enabled. If the FW generates an interrupt, + * it writes a cause into the corresponding field. This also masks + * the MSI-X entry and the host driver must clear the register to + * re-enable the interrupt. + */ +#define NFP_NET_CFG_ICR_BASE 0x0c00 +#define NFP_NET_CFG_ICR(_x) (NFP_NET_CFG_ICR_BASE + (_x)) +#define NFP_NET_CFG_ICR_UNMASKED 0x0 +#define NFP_NET_CFG_ICR_RXTX 0x1 +#define NFP_NET_CFG_ICR_LSC 0x2 + +/* General device stats (0x0d00 - 0x0d90) + * all counters are 64bit. + */ +#define NFP_NET_CFG_STATS_BASE 0x0d00 +#define NFP_NET_CFG_STATS_RX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x00) +#define NFP_NET_CFG_STATS_RX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x08) +#define NFP_NET_CFG_STATS_RX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x10) +#define NFP_NET_CFG_STATS_RX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x18) +#define NFP_NET_CFG_STATS_RX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x20) +#define NFP_NET_CFG_STATS_RX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x28) +#define NFP_NET_CFG_STATS_RX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x30) +#define NFP_NET_CFG_STATS_RX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x38) +#define NFP_NET_CFG_STATS_RX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x40) + +#define NFP_NET_CFG_STATS_TX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x48) +#define NFP_NET_CFG_STATS_TX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x50) +#define NFP_NET_CFG_STATS_TX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x58) +#define NFP_NET_CFG_STATS_TX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x60) +#define NFP_NET_CFG_STATS_TX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x68) +#define NFP_NET_CFG_STATS_TX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x70) +#define NFP_NET_CFG_STATS_TX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x78) +#define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80) +#define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88) + +#define NFP_NET_CFG_STATS_APP0_FRAMES (NFP_NET_CFG_STATS_BASE + 0x90) +#define NFP_NET_CFG_STATS_APP0_BYTES (NFP_NET_CFG_STATS_BASE + 0x98) +#define NFP_NET_CFG_STATS_APP1_FRAMES (NFP_NET_CFG_STATS_BASE + 0xa0) +#define NFP_NET_CFG_STATS_APP1_BYTES (NFP_NET_CFG_STATS_BASE + 0xa8) +#define NFP_NET_CFG_STATS_APP2_FRAMES (NFP_NET_CFG_STATS_BASE + 0xb0) +#define NFP_NET_CFG_STATS_APP2_BYTES (NFP_NET_CFG_STATS_BASE + 0xb8) +#define NFP_NET_CFG_STATS_APP3_FRAMES (NFP_NET_CFG_STATS_BASE + 0xc0) +#define NFP_NET_CFG_STATS_APP3_BYTES (NFP_NET_CFG_STATS_BASE + 0xc8) + +/* Per ring stats (0x1000 - 0x1800) + * options, 64bit per entry + * %NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count) + * %NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count) + */ +#define NFP_NET_CFG_TXR_STATS_BASE 0x1000 +#define NFP_NET_CFG_TXR_STATS(_x) (NFP_NET_CFG_TXR_STATS_BASE + \ + ((_x) * 0x10)) +#define NFP_NET_CFG_RXR_STATS_BASE 0x1400 +#define NFP_NET_CFG_RXR_STATS(_x) (NFP_NET_CFG_RXR_STATS_BASE + \ + ((_x) * 0x10)) + +/* General use mailbox area (0x1800 - 0x19ff) + * 4B used for update command and 4B return code + * followed by a max of 504B of variable length value + */ +#define NFP_NET_CFG_MBOX_BASE 0x1800 +#define NFP_NET_CFG_MBOX_VAL_MAX_SZ 0x1F8 + +#define NFP_NET_CFG_MBOX_SIMPLE_CMD 0x0 +#define NFP_NET_CFG_MBOX_SIMPLE_RET 0x4 +#define NFP_NET_CFG_MBOX_SIMPLE_VAL 0x8 + +#define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD 1 +#define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL 2 + +#define NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET 5 +#define NFP_NET_CFG_MBOX_CMD_TLV_CMSG 6 + +/* VLAN filtering using general use mailbox + * %NFP_NET_CFG_VLAN_FILTER: Base address of VLAN filter mailbox + * %NFP_NET_CFG_VLAN_FILTER_VID: VLAN ID to filter + * %NFP_NET_CFG_VLAN_FILTER_PROTO: VLAN proto to filter + * %NFP_NET_CFG_VXLAN_SZ: Size of the VLAN filter mailbox in bytes + */ +#define NFP_NET_CFG_VLAN_FILTER NFP_NET_CFG_MBOX_SIMPLE_VAL +#define NFP_NET_CFG_VLAN_FILTER_VID NFP_NET_CFG_VLAN_FILTER +#define NFP_NET_CFG_VLAN_FILTER_PROTO (NFP_NET_CFG_VLAN_FILTER + 2) +#define NFP_NET_CFG_VLAN_FILTER_SZ 0x0004 + +/* TLV capabilities + * %NFP_NET_CFG_TLV_TYPE: Offset of type within the TLV + * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV + * %NFP_NET_CFG_TLV_LENGTH: Offset of length within the TLV + * %NFP_NET_CFG_TLV_LENGTH_INC: TLV length increments + * %NFP_NET_CFG_TLV_VALUE: Offset of value with the TLV + * + * List of simple TLV structures, first one starts at %NFP_NET_CFG_TLV_BASE. + * Last structure must be of type %NFP_NET_CFG_TLV_TYPE_END. Presence of TLVs + * is indicated by %NFP_NET_CFG_TLV_BASE being non-zero. TLV structures may + * fill the entire remainder of the BAR or be shorter. FW must make sure TLVs + * don't conflict with other features which allocate space beyond + * %NFP_NET_CFG_TLV_BASE. %NFP_NET_CFG_TLV_TYPE_RESERVED should be used to wrap + * space used by such features. + * Note that the 4 byte TLV header is not counted in %NFP_NET_CFG_TLV_LENGTH. + */ +#define NFP_NET_CFG_TLV_TYPE 0x00 +#define NFP_NET_CFG_TLV_TYPE_REQUIRED 0x8000 +#define NFP_NET_CFG_TLV_LENGTH 0x02 +#define NFP_NET_CFG_TLV_LENGTH_INC 4 +#define NFP_NET_CFG_TLV_VALUE 0x04 + +#define NFP_NET_CFG_TLV_HEADER_REQUIRED 0x80000000 +#define NFP_NET_CFG_TLV_HEADER_TYPE 0x7fff0000 +#define NFP_NET_CFG_TLV_HEADER_LENGTH 0x0000ffff + +/* Capability TLV types + * + * %NFP_NET_CFG_TLV_TYPE_UNKNOWN: + * Special TLV type to catch bugs, should never be encountered. Drivers should + * treat encountering this type as error and refuse to probe. + * + * %NFP_NET_CFG_TLV_TYPE_RESERVED: + * Reserved space, may contain legacy fixed-offset fields, or be used for + * padding. The use of this type should be otherwise avoided. + * + * %NFP_NET_CFG_TLV_TYPE_END: + * Empty, end of TLV list. Must be the last TLV. Drivers will stop processing + * further TLVs when encountered. + * + * %NFP_NET_CFG_TLV_TYPE_ME_FREQ: + * Single word, ME frequency in MHz as used in calculation for + * %NFP_NET_CFG_RXR_IRQ_MOD and %NFP_NET_CFG_TXR_IRQ_MOD. + * + * %NFP_NET_CFG_TLV_TYPE_MBOX: + * Variable, mailbox area. Overwrites the default location which is + * %NFP_NET_CFG_MBOX_BASE and length %NFP_NET_CFG_MBOX_VAL_MAX_SZ. + * + * %NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL0: + * %NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL1: + * Variable, experimental IDs. IDs designated for internal development and + * experiments before a stable TLV ID has been allocated to a feature. Should + * never be present in production firmware. + * + * %NFP_NET_CFG_TLV_TYPE_REPR_CAP: + * Single word, equivalent of %NFP_NET_CFG_CAP for representors, features which + * can be used on representors. + * + * %NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES: + * Variable, bitmap of control message types supported by the mailbox handler. + * Bit 0 corresponds to message type 0, bit 1 to 1, etc. Control messages are + * encapsulated into simple TLVs, with an end TLV and written to the Mailbox. + * + * %NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS: + * 8 words, bitmaps of supported and enabled crypto operations. + * First 16B (4 words) contains a bitmap of supported crypto operations, + * and next 16B contain the enabled operations. + * This capability is made obsolete by ones with better sync methods. + * + * %NFP_NET_CFG_TLV_TYPE_VNIC_STATS: + * Variable, per-vNIC statistics, data should be 8B aligned (FW should insert + * zero-length RESERVED TLV to pad). + * TLV data has two sections. First is an array of statistics' IDs (2B each). + * Second 8B statistics themselves. Statistics are 8B aligned, meaning there + * may be a padding between sections. + * Number of statistics can be determined as floor(tlv.length / (2 + 8)). + * This TLV overwrites %NFP_NET_CFG_STATS_* values (statistics in this TLV + * duplicate the old ones, so driver should be careful not to unnecessarily + * render both). + * + * %NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS_RX_SCAN: + * Same as %NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS, but crypto TLS does stream scan + * RX sync, rather than kernel-assisted sync. + */ +#define NFP_NET_CFG_TLV_TYPE_UNKNOWN 0 +#define NFP_NET_CFG_TLV_TYPE_RESERVED 1 +#define NFP_NET_CFG_TLV_TYPE_END 2 +#define NFP_NET_CFG_TLV_TYPE_ME_FREQ 3 +#define NFP_NET_CFG_TLV_TYPE_MBOX 4 +#define NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL0 5 +#define NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL1 6 +#define NFP_NET_CFG_TLV_TYPE_REPR_CAP 7 +#define NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES 10 +#define NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS 11 /* see crypto/fw.h */ +#define NFP_NET_CFG_TLV_TYPE_VNIC_STATS 12 +#define NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS_RX_SCAN 13 + +struct device; + +/* struct nfp_net_tlv_caps - parsed control BAR TLV capabilities + * @me_freq_mhz: ME clock_freq (MHz) + * @mbox_off: vNIC mailbox area offset + * @mbox_len: vNIC mailbox area length + * @repr_cap: capabilities for representors + * @mbox_cmsg_types: cmsgs which can be passed through the mailbox + * @crypto_ops: supported crypto operations + * @crypto_enable_off: offset of crypto ops enable region + * @vnic_stats_off: offset of vNIC stats area + * @vnic_stats_cnt: number of vNIC stats + * @tls_resync_ss: TLS resync will be performed via stream scan + */ +struct nfp_net_tlv_caps { + u32 me_freq_mhz; + unsigned int mbox_off; + unsigned int mbox_len; + u32 repr_cap; + u32 mbox_cmsg_types; + u32 crypto_ops; + unsigned int crypto_enable_off; + unsigned int vnic_stats_off; + unsigned int vnic_stats_cnt; + unsigned int tls_resync_ss:1; +}; + +int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem, + struct nfp_net_tlv_caps *caps); +#endif /* _NFP_NET_CTRL_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c new file mode 100644 index 000000000..a614df095 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c @@ -0,0 +1,763 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <linux/ethtool.h> +#include <linux/vmalloc.h> + +#include "nfp_asm.h" +#include "nfp_main.h" +#include "nfpcore/nfp.h" +#include "nfpcore/nfp_nffw.h" +#include "nfpcore/nfp6000/nfp6000.h" + +#define NFP_DUMP_SPEC_RTSYM "_abi_dump_spec" + +#define ALIGN8(x) ALIGN(x, 8) + +enum nfp_dumpspec_type { + NFP_DUMPSPEC_TYPE_CPP_CSR = 0, + NFP_DUMPSPEC_TYPE_XPB_CSR = 1, + NFP_DUMPSPEC_TYPE_ME_CSR = 2, + NFP_DUMPSPEC_TYPE_INDIRECT_ME_CSR = 3, + NFP_DUMPSPEC_TYPE_RTSYM = 4, + NFP_DUMPSPEC_TYPE_HWINFO = 5, + NFP_DUMPSPEC_TYPE_FWNAME = 6, + NFP_DUMPSPEC_TYPE_HWINFO_FIELD = 7, + NFP_DUMPSPEC_TYPE_PROLOG = 10000, + NFP_DUMPSPEC_TYPE_ERROR = 10001, +}; + +/* The following structs must be carefully aligned so that they can be used to + * interpret the binary dumpspec and populate the dump data in a deterministic + * way. + */ + +/* generic type plus length */ +struct nfp_dump_tl { + __be32 type; + __be32 length; /* chunk length to follow, aligned to 8 bytes */ + char data[]; +}; + +/* NFP CPP parameters */ +struct nfp_dumpspec_cpp_isl_id { + u8 target; + u8 action; + u8 token; + u8 island; +}; + +struct nfp_dump_common_cpp { + struct nfp_dumpspec_cpp_isl_id cpp_id; + __be32 offset; /* address to start dump */ + __be32 dump_length; /* total bytes to dump, aligned to reg size */ +}; + +/* CSR dumpables */ +struct nfp_dumpspec_csr { + struct nfp_dump_tl tl; + struct nfp_dump_common_cpp cpp; + __be32 register_width; /* in bits */ +}; + +struct nfp_dumpspec_rtsym { + struct nfp_dump_tl tl; + char rtsym[]; +}; + +/* header for register dumpable */ +struct nfp_dump_csr { + struct nfp_dump_tl tl; + struct nfp_dump_common_cpp cpp; + __be32 register_width; /* in bits */ + __be32 error; /* error code encountered while reading */ + __be32 error_offset; /* offset being read when error occurred */ +}; + +struct nfp_dump_rtsym { + struct nfp_dump_tl tl; + struct nfp_dump_common_cpp cpp; + __be32 error; /* error code encountered while reading */ + u8 padded_name_length; /* pad so data starts at 8 byte boundary */ + char rtsym[]; + /* after padded_name_length, there is dump_length data */ +}; + +struct nfp_dump_prolog { + struct nfp_dump_tl tl; + __be32 dump_level; +}; + +struct nfp_dump_error { + struct nfp_dump_tl tl; + __be32 error; + char padding[4]; + char spec[]; +}; + +/* to track state through debug size calculation TLV traversal */ +struct nfp_level_size { + __be32 requested_level; /* input */ + u32 total_size; /* output */ +}; + +/* to track state during debug dump creation TLV traversal */ +struct nfp_dump_state { + __be32 requested_level; /* input param */ + u32 dumped_size; /* adds up to size of dumped data */ + u32 buf_size; /* size of buffer pointer to by p */ + void *p; /* current point in dump buffer */ +}; + +typedef int (*nfp_tlv_visit)(struct nfp_pf *pf, struct nfp_dump_tl *tl, + void *param); + +static int +nfp_traverse_tlvs(struct nfp_pf *pf, void *data, u32 data_length, void *param, + nfp_tlv_visit tlv_visit) +{ + long long remaining = data_length; + struct nfp_dump_tl *tl; + u32 total_tlv_size; + void *p = data; + int err; + + while (remaining >= sizeof(*tl)) { + tl = p; + if (!tl->type && !tl->length) + break; + + if (be32_to_cpu(tl->length) > remaining - sizeof(*tl)) + return -EINVAL; + + total_tlv_size = sizeof(*tl) + be32_to_cpu(tl->length); + + /* Spec TLVs should be aligned to 4 bytes. */ + if (total_tlv_size % 4 != 0) + return -EINVAL; + + p += total_tlv_size; + remaining -= total_tlv_size; + err = tlv_visit(pf, tl, param); + if (err) + return err; + } + + return 0; +} + +static u32 nfp_get_numeric_cpp_id(struct nfp_dumpspec_cpp_isl_id *cpp_id) +{ + return NFP_CPP_ISLAND_ID(cpp_id->target, cpp_id->action, cpp_id->token, + cpp_id->island); +} + +struct nfp_dumpspec * +nfp_net_dump_load_dumpspec(struct nfp_cpp *cpp, struct nfp_rtsym_table *rtbl) +{ + const struct nfp_rtsym *specsym; + struct nfp_dumpspec *dumpspec; + int bytes_read; + u64 sym_size; + + specsym = nfp_rtsym_lookup(rtbl, NFP_DUMP_SPEC_RTSYM); + if (!specsym) + return NULL; + sym_size = nfp_rtsym_size(specsym); + + /* expected size of this buffer is in the order of tens of kilobytes */ + dumpspec = vmalloc(sizeof(*dumpspec) + sym_size); + if (!dumpspec) + return NULL; + dumpspec->size = sym_size; + + bytes_read = nfp_rtsym_read(cpp, specsym, 0, dumpspec->data, sym_size); + if (bytes_read != sym_size) { + vfree(dumpspec); + nfp_warn(cpp, "Debug dump specification read failed.\n"); + return NULL; + } + + return dumpspec; +} + +static int nfp_dump_error_tlv_size(struct nfp_dump_tl *spec) +{ + return ALIGN8(sizeof(struct nfp_dump_error) + sizeof(*spec) + + be32_to_cpu(spec->length)); +} + +static int nfp_calc_fwname_tlv_size(struct nfp_pf *pf) +{ + u32 fwname_len = strlen(nfp_mip_name(pf->mip)); + + return sizeof(struct nfp_dump_tl) + ALIGN8(fwname_len + 1); +} + +static int nfp_calc_hwinfo_field_sz(struct nfp_pf *pf, struct nfp_dump_tl *spec) +{ + u32 tl_len, key_len; + const char *value; + + tl_len = be32_to_cpu(spec->length); + key_len = strnlen(spec->data, tl_len); + if (key_len == tl_len) + return nfp_dump_error_tlv_size(spec); + + value = nfp_hwinfo_lookup(pf->hwinfo, spec->data); + if (!value) + return nfp_dump_error_tlv_size(spec); + + return sizeof(struct nfp_dump_tl) + ALIGN8(key_len + strlen(value) + 2); +} + +static bool nfp_csr_spec_valid(struct nfp_dumpspec_csr *spec_csr) +{ + u32 required_read_sz = sizeof(*spec_csr) - sizeof(spec_csr->tl); + u32 available_sz = be32_to_cpu(spec_csr->tl.length); + u32 reg_width; + + if (available_sz < required_read_sz) + return false; + + reg_width = be32_to_cpu(spec_csr->register_width); + + return reg_width == 32 || reg_width == 64; +} + +static int +nfp_calc_rtsym_dump_sz(struct nfp_pf *pf, struct nfp_dump_tl *spec) +{ + struct nfp_rtsym_table *rtbl = pf->rtbl; + struct nfp_dumpspec_rtsym *spec_rtsym; + const struct nfp_rtsym *sym; + u32 tl_len, key_len; + + spec_rtsym = (struct nfp_dumpspec_rtsym *)spec; + tl_len = be32_to_cpu(spec->length); + key_len = strnlen(spec_rtsym->rtsym, tl_len); + if (key_len == tl_len) + return nfp_dump_error_tlv_size(spec); + + sym = nfp_rtsym_lookup(rtbl, spec_rtsym->rtsym); + if (!sym) + return nfp_dump_error_tlv_size(spec); + + return ALIGN8(offsetof(struct nfp_dump_rtsym, rtsym) + key_len + 1) + + ALIGN8(nfp_rtsym_size(sym)); +} + +static int +nfp_add_tlv_size(struct nfp_pf *pf, struct nfp_dump_tl *tl, void *param) +{ + struct nfp_dumpspec_csr *spec_csr; + u32 *size = param; + u32 hwinfo_size; + + switch (be32_to_cpu(tl->type)) { + case NFP_DUMPSPEC_TYPE_FWNAME: + *size += nfp_calc_fwname_tlv_size(pf); + break; + case NFP_DUMPSPEC_TYPE_CPP_CSR: + case NFP_DUMPSPEC_TYPE_XPB_CSR: + case NFP_DUMPSPEC_TYPE_ME_CSR: + spec_csr = (struct nfp_dumpspec_csr *)tl; + if (!nfp_csr_spec_valid(spec_csr)) + *size += nfp_dump_error_tlv_size(tl); + else + *size += ALIGN8(sizeof(struct nfp_dump_csr)) + + ALIGN8(be32_to_cpu(spec_csr->cpp.dump_length)); + break; + case NFP_DUMPSPEC_TYPE_INDIRECT_ME_CSR: + spec_csr = (struct nfp_dumpspec_csr *)tl; + if (!nfp_csr_spec_valid(spec_csr)) + *size += nfp_dump_error_tlv_size(tl); + else + *size += ALIGN8(sizeof(struct nfp_dump_csr)) + + ALIGN8(be32_to_cpu(spec_csr->cpp.dump_length) * + NFP_IND_NUM_CONTEXTS); + break; + case NFP_DUMPSPEC_TYPE_RTSYM: + *size += nfp_calc_rtsym_dump_sz(pf, tl); + break; + case NFP_DUMPSPEC_TYPE_HWINFO: + hwinfo_size = nfp_hwinfo_get_packed_str_size(pf->hwinfo); + *size += sizeof(struct nfp_dump_tl) + ALIGN8(hwinfo_size); + break; + case NFP_DUMPSPEC_TYPE_HWINFO_FIELD: + *size += nfp_calc_hwinfo_field_sz(pf, tl); + break; + default: + *size += nfp_dump_error_tlv_size(tl); + break; + } + + return 0; +} + +static int +nfp_calc_specific_level_size(struct nfp_pf *pf, struct nfp_dump_tl *dump_level, + void *param) +{ + struct nfp_level_size *lev_sz = param; + + if (dump_level->type != lev_sz->requested_level) + return 0; + + return nfp_traverse_tlvs(pf, dump_level->data, + be32_to_cpu(dump_level->length), + &lev_sz->total_size, nfp_add_tlv_size); +} + +s64 nfp_net_dump_calculate_size(struct nfp_pf *pf, struct nfp_dumpspec *spec, + u32 flag) +{ + struct nfp_level_size lev_sz; + int err; + + lev_sz.requested_level = cpu_to_be32(flag); + lev_sz.total_size = ALIGN8(sizeof(struct nfp_dump_prolog)); + + err = nfp_traverse_tlvs(pf, spec->data, spec->size, &lev_sz, + nfp_calc_specific_level_size); + if (err) + return err; + + return lev_sz.total_size; +} + +static int nfp_add_tlv(u32 type, u32 total_tlv_sz, struct nfp_dump_state *dump) +{ + struct nfp_dump_tl *tl = dump->p; + + if (total_tlv_sz > dump->buf_size) + return -ENOSPC; + + if (dump->buf_size - total_tlv_sz < dump->dumped_size) + return -ENOSPC; + + tl->type = cpu_to_be32(type); + tl->length = cpu_to_be32(total_tlv_sz - sizeof(*tl)); + + dump->dumped_size += total_tlv_sz; + dump->p += total_tlv_sz; + + return 0; +} + +static int +nfp_dump_error_tlv(struct nfp_dump_tl *spec, int error, + struct nfp_dump_state *dump) +{ + struct nfp_dump_error *dump_header = dump->p; + u32 total_spec_size, total_size; + int err; + + total_spec_size = sizeof(*spec) + be32_to_cpu(spec->length); + total_size = ALIGN8(sizeof(*dump_header) + total_spec_size); + + err = nfp_add_tlv(NFP_DUMPSPEC_TYPE_ERROR, total_size, dump); + if (err) + return err; + + dump_header->error = cpu_to_be32(error); + memcpy(dump_header->spec, spec, total_spec_size); + + return 0; +} + +static int nfp_dump_fwname(struct nfp_pf *pf, struct nfp_dump_state *dump) +{ + struct nfp_dump_tl *dump_header = dump->p; + u32 fwname_len, total_size; + const char *fwname; + int err; + + fwname = nfp_mip_name(pf->mip); + fwname_len = strlen(fwname); + total_size = sizeof(*dump_header) + ALIGN8(fwname_len + 1); + + err = nfp_add_tlv(NFP_DUMPSPEC_TYPE_FWNAME, total_size, dump); + if (err) + return err; + + memcpy(dump_header->data, fwname, fwname_len); + + return 0; +} + +static int +nfp_dump_hwinfo(struct nfp_pf *pf, struct nfp_dump_tl *spec, + struct nfp_dump_state *dump) +{ + struct nfp_dump_tl *dump_header = dump->p; + u32 hwinfo_size, total_size; + char *hwinfo; + int err; + + hwinfo = nfp_hwinfo_get_packed_strings(pf->hwinfo); + hwinfo_size = nfp_hwinfo_get_packed_str_size(pf->hwinfo); + total_size = sizeof(*dump_header) + ALIGN8(hwinfo_size); + + err = nfp_add_tlv(NFP_DUMPSPEC_TYPE_HWINFO, total_size, dump); + if (err) + return err; + + memcpy(dump_header->data, hwinfo, hwinfo_size); + + return 0; +} + +static int nfp_dump_hwinfo_field(struct nfp_pf *pf, struct nfp_dump_tl *spec, + struct nfp_dump_state *dump) +{ + struct nfp_dump_tl *dump_header = dump->p; + u32 tl_len, key_len, val_len; + const char *key, *value; + u32 total_size; + int err; + + tl_len = be32_to_cpu(spec->length); + key_len = strnlen(spec->data, tl_len); + if (key_len == tl_len) + return nfp_dump_error_tlv(spec, -EINVAL, dump); + + key = spec->data; + value = nfp_hwinfo_lookup(pf->hwinfo, key); + if (!value) + return nfp_dump_error_tlv(spec, -ENOENT, dump); + + val_len = strlen(value); + total_size = sizeof(*dump_header) + ALIGN8(key_len + val_len + 2); + err = nfp_add_tlv(NFP_DUMPSPEC_TYPE_HWINFO_FIELD, total_size, dump); + if (err) + return err; + + memcpy(dump_header->data, key, key_len + 1); + memcpy(dump_header->data + key_len + 1, value, val_len + 1); + + return 0; +} + +static bool is_xpb_read(struct nfp_dumpspec_cpp_isl_id *cpp_id) +{ + return cpp_id->target == NFP_CPP_TARGET_ISLAND_XPB && + cpp_id->action == 0 && cpp_id->token == 0; +} + +static int +nfp_dump_csr_range(struct nfp_pf *pf, struct nfp_dumpspec_csr *spec_csr, + struct nfp_dump_state *dump) +{ + struct nfp_dump_csr *dump_header = dump->p; + u32 reg_sz, header_size, total_size; + u32 cpp_rd_addr, max_rd_addr; + int bytes_read; + void *dest; + u32 cpp_id; + int err; + + if (!nfp_csr_spec_valid(spec_csr)) + return nfp_dump_error_tlv(&spec_csr->tl, -EINVAL, dump); + + reg_sz = be32_to_cpu(spec_csr->register_width) / BITS_PER_BYTE; + header_size = ALIGN8(sizeof(*dump_header)); + total_size = header_size + + ALIGN8(be32_to_cpu(spec_csr->cpp.dump_length)); + dest = dump->p + header_size; + + err = nfp_add_tlv(be32_to_cpu(spec_csr->tl.type), total_size, dump); + if (err) + return err; + + dump_header->cpp = spec_csr->cpp; + dump_header->register_width = spec_csr->register_width; + + cpp_id = nfp_get_numeric_cpp_id(&spec_csr->cpp.cpp_id); + cpp_rd_addr = be32_to_cpu(spec_csr->cpp.offset); + max_rd_addr = cpp_rd_addr + be32_to_cpu(spec_csr->cpp.dump_length); + + while (cpp_rd_addr < max_rd_addr) { + if (is_xpb_read(&spec_csr->cpp.cpp_id)) { + err = nfp_xpb_readl(pf->cpp, cpp_rd_addr, (u32 *)dest); + } else { + bytes_read = nfp_cpp_read(pf->cpp, cpp_id, cpp_rd_addr, + dest, reg_sz); + err = bytes_read == reg_sz ? 0 : -EIO; + } + if (err) { + dump_header->error = cpu_to_be32(err); + dump_header->error_offset = cpu_to_be32(cpp_rd_addr); + break; + } + cpp_rd_addr += reg_sz; + dest += reg_sz; + } + + return 0; +} + +/* Write context to CSRCtxPtr, then read from it. Then the value can be read + * from IndCtxStatus. + */ +static int +nfp_read_indirect_csr(struct nfp_cpp *cpp, + struct nfp_dumpspec_cpp_isl_id cpp_params, u32 offset, + u32 reg_sz, u32 context, void *dest) +{ + u32 csr_ctx_ptr_offs; + u32 cpp_id; + int result; + + csr_ctx_ptr_offs = nfp_get_ind_csr_ctx_ptr_offs(offset); + cpp_id = NFP_CPP_ISLAND_ID(cpp_params.target, + NFP_IND_ME_REFL_WR_SIG_INIT, + cpp_params.token, cpp_params.island); + result = nfp_cpp_writel(cpp, cpp_id, csr_ctx_ptr_offs, context); + if (result) + return result; + + cpp_id = nfp_get_numeric_cpp_id(&cpp_params); + result = nfp_cpp_read(cpp, cpp_id, csr_ctx_ptr_offs, dest, reg_sz); + if (result != reg_sz) + return result < 0 ? result : -EIO; + + result = nfp_cpp_read(cpp, cpp_id, offset, dest, reg_sz); + if (result != reg_sz) + return result < 0 ? result : -EIO; + + return 0; +} + +static int +nfp_read_all_indirect_csr_ctx(struct nfp_cpp *cpp, + struct nfp_dumpspec_csr *spec_csr, u32 address, + u32 reg_sz, void *dest) +{ + u32 ctx; + int err; + + for (ctx = 0; ctx < NFP_IND_NUM_CONTEXTS; ctx++) { + err = nfp_read_indirect_csr(cpp, spec_csr->cpp.cpp_id, address, + reg_sz, ctx, dest + ctx * reg_sz); + if (err) + return err; + } + + return 0; +} + +static int +nfp_dump_indirect_csr_range(struct nfp_pf *pf, + struct nfp_dumpspec_csr *spec_csr, + struct nfp_dump_state *dump) +{ + struct nfp_dump_csr *dump_header = dump->p; + u32 reg_sz, header_size, total_size; + u32 cpp_rd_addr, max_rd_addr; + u32 reg_data_length; + void *dest; + int err; + + if (!nfp_csr_spec_valid(spec_csr)) + return nfp_dump_error_tlv(&spec_csr->tl, -EINVAL, dump); + + reg_sz = be32_to_cpu(spec_csr->register_width) / BITS_PER_BYTE; + header_size = ALIGN8(sizeof(*dump_header)); + reg_data_length = be32_to_cpu(spec_csr->cpp.dump_length) * + NFP_IND_NUM_CONTEXTS; + total_size = header_size + ALIGN8(reg_data_length); + dest = dump->p + header_size; + + err = nfp_add_tlv(be32_to_cpu(spec_csr->tl.type), total_size, dump); + if (err) + return err; + + dump_header->cpp = spec_csr->cpp; + dump_header->register_width = spec_csr->register_width; + + cpp_rd_addr = be32_to_cpu(spec_csr->cpp.offset); + max_rd_addr = cpp_rd_addr + be32_to_cpu(spec_csr->cpp.dump_length); + while (cpp_rd_addr < max_rd_addr) { + err = nfp_read_all_indirect_csr_ctx(pf->cpp, spec_csr, + cpp_rd_addr, reg_sz, dest); + if (err) { + dump_header->error = cpu_to_be32(err); + dump_header->error_offset = cpu_to_be32(cpp_rd_addr); + break; + } + cpp_rd_addr += reg_sz; + dest += reg_sz * NFP_IND_NUM_CONTEXTS; + } + + return 0; +} + +static int +nfp_dump_single_rtsym(struct nfp_pf *pf, struct nfp_dumpspec_rtsym *spec, + struct nfp_dump_state *dump) +{ + struct nfp_dump_rtsym *dump_header = dump->p; + struct nfp_dumpspec_cpp_isl_id cpp_params; + struct nfp_rtsym_table *rtbl = pf->rtbl; + u32 header_size, total_size, sym_size; + const struct nfp_rtsym *sym; + u32 tl_len, key_len; + int bytes_read; + void *dest; + int err; + + tl_len = be32_to_cpu(spec->tl.length); + key_len = strnlen(spec->rtsym, tl_len); + if (key_len == tl_len) + return nfp_dump_error_tlv(&spec->tl, -EINVAL, dump); + + sym = nfp_rtsym_lookup(rtbl, spec->rtsym); + if (!sym) + return nfp_dump_error_tlv(&spec->tl, -ENOENT, dump); + + sym_size = nfp_rtsym_size(sym); + header_size = + ALIGN8(offsetof(struct nfp_dump_rtsym, rtsym) + key_len + 1); + total_size = header_size + ALIGN8(sym_size); + dest = dump->p + header_size; + + err = nfp_add_tlv(be32_to_cpu(spec->tl.type), total_size, dump); + if (err) + return err; + + dump_header->padded_name_length = + header_size - offsetof(struct nfp_dump_rtsym, rtsym); + memcpy(dump_header->rtsym, spec->rtsym, key_len + 1); + dump_header->cpp.dump_length = cpu_to_be32(sym_size); + + if (sym->type != NFP_RTSYM_TYPE_ABS) { + cpp_params.target = sym->target; + cpp_params.action = NFP_CPP_ACTION_RW; + cpp_params.token = 0; + cpp_params.island = sym->domain; + dump_header->cpp.cpp_id = cpp_params; + dump_header->cpp.offset = cpu_to_be32(sym->addr); + } + + bytes_read = nfp_rtsym_read(pf->cpp, sym, 0, dest, sym_size); + if (bytes_read != sym_size) { + if (bytes_read >= 0) + bytes_read = -EIO; + dump_header->error = cpu_to_be32(bytes_read); + } + + return 0; +} + +static int +nfp_dump_for_tlv(struct nfp_pf *pf, struct nfp_dump_tl *tl, void *param) +{ + struct nfp_dumpspec_rtsym *spec_rtsym; + struct nfp_dump_state *dump = param; + struct nfp_dumpspec_csr *spec_csr; + int err; + + switch (be32_to_cpu(tl->type)) { + case NFP_DUMPSPEC_TYPE_FWNAME: + err = nfp_dump_fwname(pf, dump); + if (err) + return err; + break; + case NFP_DUMPSPEC_TYPE_CPP_CSR: + case NFP_DUMPSPEC_TYPE_XPB_CSR: + case NFP_DUMPSPEC_TYPE_ME_CSR: + spec_csr = (struct nfp_dumpspec_csr *)tl; + err = nfp_dump_csr_range(pf, spec_csr, dump); + if (err) + return err; + break; + case NFP_DUMPSPEC_TYPE_INDIRECT_ME_CSR: + spec_csr = (struct nfp_dumpspec_csr *)tl; + err = nfp_dump_indirect_csr_range(pf, spec_csr, dump); + if (err) + return err; + break; + case NFP_DUMPSPEC_TYPE_RTSYM: + spec_rtsym = (struct nfp_dumpspec_rtsym *)tl; + err = nfp_dump_single_rtsym(pf, spec_rtsym, dump); + if (err) + return err; + break; + case NFP_DUMPSPEC_TYPE_HWINFO: + err = nfp_dump_hwinfo(pf, tl, dump); + if (err) + return err; + break; + case NFP_DUMPSPEC_TYPE_HWINFO_FIELD: + err = nfp_dump_hwinfo_field(pf, tl, dump); + if (err) + return err; + break; + default: + err = nfp_dump_error_tlv(tl, -EOPNOTSUPP, dump); + if (err) + return err; + } + + return 0; +} + +static int +nfp_dump_specific_level(struct nfp_pf *pf, struct nfp_dump_tl *dump_level, + void *param) +{ + struct nfp_dump_state *dump = param; + + if (dump_level->type != dump->requested_level) + return 0; + + return nfp_traverse_tlvs(pf, dump_level->data, + be32_to_cpu(dump_level->length), dump, + nfp_dump_for_tlv); +} + +static int nfp_dump_populate_prolog(struct nfp_dump_state *dump) +{ + struct nfp_dump_prolog *prolog = dump->p; + u32 total_size; + int err; + + total_size = ALIGN8(sizeof(*prolog)); + + err = nfp_add_tlv(NFP_DUMPSPEC_TYPE_PROLOG, total_size, dump); + if (err) + return err; + + prolog->dump_level = dump->requested_level; + + return 0; +} + +int nfp_net_dump_populate_buffer(struct nfp_pf *pf, struct nfp_dumpspec *spec, + struct ethtool_dump *dump_param, void *dest) +{ + struct nfp_dump_state dump; + int err; + + dump.requested_level = cpu_to_be32(dump_param->flag); + dump.dumped_size = 0; + dump.p = dest; + dump.buf_size = dump_param->len; + + err = nfp_dump_populate_prolog(&dump); + if (err) + return err; + + err = nfp_traverse_tlvs(pf, spec->data, spec->size, &dump, + nfp_dump_specific_level); + if (err) + return err; + + /* Set size of actual dump, to trigger warning if different from + * calculated size. + */ + dump_param->len = dump.dumped_size; + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c new file mode 100644 index 000000000..d8b735ccf --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2019 Netronome Systems, Inc. */ +#include <linux/debugfs.h> +#include <linux/module.h> +#include <linux/rtnetlink.h> + +#include "nfp_net.h" +#include "nfp_net_dp.h" + +static struct dentry *nfp_dir; + +static int nfp_rx_q_show(struct seq_file *file, void *data) +{ + struct nfp_net_r_vector *r_vec = file->private; + struct nfp_net_rx_ring *rx_ring; + int fl_rd_p, fl_wr_p, rxd_cnt; + struct nfp_net_rx_desc *rxd; + struct nfp_net *nn; + void *frag; + int i; + + rtnl_lock(); + + if (!r_vec->nfp_net || !r_vec->rx_ring) + goto out; + nn = r_vec->nfp_net; + rx_ring = r_vec->rx_ring; + if (!nfp_net_running(nn)) + goto out; + + rxd_cnt = rx_ring->cnt; + + fl_rd_p = nfp_qcp_rd_ptr_read(rx_ring->qcp_fl); + fl_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_fl); + + seq_printf(file, "RX[%02d,%02d]: cnt=%u dma=%pad host=%p H_RD=%u H_WR=%u FL_RD=%u FL_WR=%u\n", + rx_ring->idx, rx_ring->fl_qcidx, + rx_ring->cnt, &rx_ring->dma, rx_ring->rxds, + rx_ring->rd_p, rx_ring->wr_p, fl_rd_p, fl_wr_p); + + for (i = 0; i < rxd_cnt; i++) { + rxd = &rx_ring->rxds[i]; + seq_printf(file, "%04d: 0x%08x 0x%08x", i, + rxd->vals[0], rxd->vals[1]); + + if (!r_vec->xsk_pool) { + frag = READ_ONCE(rx_ring->rxbufs[i].frag); + if (frag) + seq_printf(file, " frag=%p", frag); + + if (rx_ring->rxbufs[i].dma_addr) + seq_printf(file, " dma_addr=%pad", + &rx_ring->rxbufs[i].dma_addr); + } else { + if (rx_ring->xsk_rxbufs[i].dma_addr) + seq_printf(file, " dma_addr=%pad", + &rx_ring->xsk_rxbufs[i].dma_addr); + } + + if (i == rx_ring->rd_p % rxd_cnt) + seq_puts(file, " H_RD "); + if (i == rx_ring->wr_p % rxd_cnt) + seq_puts(file, " H_WR "); + if (i == fl_rd_p % rxd_cnt) + seq_puts(file, " FL_RD"); + if (i == fl_wr_p % rxd_cnt) + seq_puts(file, " FL_WR"); + + seq_putc(file, '\n'); + } +out: + rtnl_unlock(); + return 0; +} +DEFINE_SHOW_ATTRIBUTE(nfp_rx_q); + +static int nfp_tx_q_show(struct seq_file *file, void *data); +DEFINE_SHOW_ATTRIBUTE(nfp_tx_q); + +static int nfp_tx_q_show(struct seq_file *file, void *data) +{ + struct nfp_net_r_vector *r_vec = file->private; + struct nfp_net_tx_ring *tx_ring; + struct nfp_net *nn; + int d_rd_p, d_wr_p; + + rtnl_lock(); + + if (debugfs_real_fops(file->file) == &nfp_tx_q_fops) + tx_ring = r_vec->tx_ring; + else + tx_ring = r_vec->xdp_ring; + if (!r_vec->nfp_net || !tx_ring) + goto out; + nn = r_vec->nfp_net; + if (!nfp_net_running(nn)) + goto out; + + d_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); + d_wr_p = nfp_qcp_wr_ptr_read(tx_ring->qcp_q); + + seq_printf(file, "TX[%02d,%02d%s]: cnt=%u dma=%pad host=%p H_RD=%u H_WR=%u D_RD=%u D_WR=%u", + tx_ring->idx, tx_ring->qcidx, + tx_ring == r_vec->tx_ring ? "" : "xdp", + tx_ring->cnt, &tx_ring->dma, tx_ring->txds, + tx_ring->rd_p, tx_ring->wr_p, d_rd_p, d_wr_p); + if (tx_ring->txrwb) + seq_printf(file, " TXRWB=%llu", *tx_ring->txrwb); + seq_putc(file, '\n'); + + nfp_net_debugfs_print_tx_descs(file, &nn->dp, r_vec, tx_ring, + d_rd_p, d_wr_p); +out: + rtnl_unlock(); + return 0; +} + +static int nfp_xdp_q_show(struct seq_file *file, void *data) +{ + return nfp_tx_q_show(file, data); +} +DEFINE_SHOW_ATTRIBUTE(nfp_xdp_q); + +void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir) +{ + struct dentry *queues, *tx, *rx, *xdp; + char name[20]; + int i; + + if (IS_ERR_OR_NULL(nfp_dir)) + return; + + if (nfp_net_is_data_vnic(nn)) + sprintf(name, "vnic%d", nn->id); + else + strcpy(name, "ctrl-vnic"); + nn->debugfs_dir = debugfs_create_dir(name, ddir); + + /* Create queue debugging sub-tree */ + queues = debugfs_create_dir("queue", nn->debugfs_dir); + + rx = debugfs_create_dir("rx", queues); + tx = debugfs_create_dir("tx", queues); + xdp = debugfs_create_dir("xdp", queues); + + for (i = 0; i < min(nn->max_rx_rings, nn->max_r_vecs); i++) { + sprintf(name, "%d", i); + debugfs_create_file(name, 0400, rx, + &nn->r_vecs[i], &nfp_rx_q_fops); + debugfs_create_file(name, 0400, xdp, + &nn->r_vecs[i], &nfp_xdp_q_fops); + } + + for (i = 0; i < min(nn->max_tx_rings, nn->max_r_vecs); i++) { + sprintf(name, "%d", i); + debugfs_create_file(name, 0400, tx, + &nn->r_vecs[i], &nfp_tx_q_fops); + } +} + +struct dentry *nfp_net_debugfs_device_add(struct pci_dev *pdev) +{ + return debugfs_create_dir(pci_name(pdev), nfp_dir); +} + +void nfp_net_debugfs_dir_clean(struct dentry **dir) +{ + debugfs_remove_recursive(*dir); + *dir = NULL; +} + +void nfp_net_debugfs_create(void) +{ + nfp_dir = debugfs_create_dir("nfp_net", NULL); +} + +void nfp_net_debugfs_destroy(void) +{ + debugfs_remove_recursive(nfp_dir); + nfp_dir = NULL; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_dp.c b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.c new file mode 100644 index 000000000..550df83b7 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.c @@ -0,0 +1,466 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2019 Netronome Systems, Inc. */ + +#include "nfp_app.h" +#include "nfp_net_dp.h" +#include "nfp_net_xsk.h" + +/** + * nfp_net_rx_alloc_one() - Allocate and map page frag for RX + * @dp: NFP Net data path struct + * @dma_addr: Pointer to storage for DMA address (output param) + * + * This function will allcate a new page frag, map it for DMA. + * + * Return: allocated page frag or NULL on failure. + */ +void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) +{ + void *frag; + + if (!dp->xdp_prog) { + frag = netdev_alloc_frag(dp->fl_bufsz); + } else { + struct page *page; + + page = alloc_page(GFP_KERNEL); + frag = page ? page_address(page) : NULL; + } + if (!frag) { + nn_dp_warn(dp, "Failed to alloc receive page frag\n"); + return NULL; + } + + *dma_addr = nfp_net_dma_map_rx(dp, frag); + if (dma_mapping_error(dp->dev, *dma_addr)) { + nfp_net_free_frag(frag, dp->xdp_prog); + nn_dp_warn(dp, "Failed to map DMA RX buffer\n"); + return NULL; + } + + return frag; +} + +/** + * nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring + * @tx_ring: TX ring structure + * @dp: NFP Net data path struct + * @r_vec: IRQ vector servicing this ring + * @idx: Ring index + * @is_xdp: Is this an XDP TX ring? + */ +static void +nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring, struct nfp_net_dp *dp, + struct nfp_net_r_vector *r_vec, unsigned int idx, + bool is_xdp) +{ + struct nfp_net *nn = r_vec->nfp_net; + + tx_ring->idx = idx; + tx_ring->r_vec = r_vec; + tx_ring->is_xdp = is_xdp; + u64_stats_init(&tx_ring->r_vec->tx_sync); + + tx_ring->qcidx = tx_ring->idx * nn->stride_tx; + tx_ring->txrwb = dp->txrwb ? &dp->txrwb[idx] : NULL; + tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx); +} + +/** + * nfp_net_rx_ring_init() - Fill in the boilerplate for a RX ring + * @rx_ring: RX ring structure + * @r_vec: IRQ vector servicing this ring + * @idx: Ring index + */ +static void +nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring, + struct nfp_net_r_vector *r_vec, unsigned int idx) +{ + struct nfp_net *nn = r_vec->nfp_net; + + rx_ring->idx = idx; + rx_ring->r_vec = r_vec; + u64_stats_init(&rx_ring->r_vec->rx_sync); + + rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx; + rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx); +} + +/** + * nfp_net_rx_ring_reset() - Reflect in SW state of freelist after disable + * @rx_ring: RX ring structure + * + * Assumes that the device is stopped, must be idempotent. + */ +void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring) +{ + unsigned int wr_idx, last_idx; + + /* wr_p == rd_p means ring was never fed FL bufs. RX rings are always + * kept at cnt - 1 FL bufs. + */ + if (rx_ring->wr_p == 0 && rx_ring->rd_p == 0) + return; + + /* Move the empty entry to the end of the list */ + wr_idx = D_IDX(rx_ring, rx_ring->wr_p); + last_idx = rx_ring->cnt - 1; + if (rx_ring->r_vec->xsk_pool) { + rx_ring->xsk_rxbufs[wr_idx] = rx_ring->xsk_rxbufs[last_idx]; + memset(&rx_ring->xsk_rxbufs[last_idx], 0, + sizeof(*rx_ring->xsk_rxbufs)); + } else { + rx_ring->rxbufs[wr_idx] = rx_ring->rxbufs[last_idx]; + memset(&rx_ring->rxbufs[last_idx], 0, sizeof(*rx_ring->rxbufs)); + } + + memset(rx_ring->rxds, 0, rx_ring->size); + rx_ring->wr_p = 0; + rx_ring->rd_p = 0; +} + +/** + * nfp_net_rx_ring_bufs_free() - Free any buffers currently on the RX ring + * @dp: NFP Net data path struct + * @rx_ring: RX ring to remove buffers from + * + * Assumes that the device is stopped and buffers are in [0, ring->cnt - 1) + * entries. After device is disabled nfp_net_rx_ring_reset() must be called + * to restore required ring geometry. + */ +static void +nfp_net_rx_ring_bufs_free(struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring) +{ + unsigned int i; + + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) + return; + + for (i = 0; i < rx_ring->cnt - 1; i++) { + /* NULL skb can only happen when initial filling of the ring + * fails to allocate enough buffers and calls here to free + * already allocated ones. + */ + if (!rx_ring->rxbufs[i].frag) + continue; + + nfp_net_dma_unmap_rx(dp, rx_ring->rxbufs[i].dma_addr); + nfp_net_free_frag(rx_ring->rxbufs[i].frag, dp->xdp_prog); + rx_ring->rxbufs[i].dma_addr = 0; + rx_ring->rxbufs[i].frag = NULL; + } +} + +/** + * nfp_net_rx_ring_bufs_alloc() - Fill RX ring with buffers (don't give to FW) + * @dp: NFP Net data path struct + * @rx_ring: RX ring to remove buffers from + */ +static int +nfp_net_rx_ring_bufs_alloc(struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net_rx_buf *rxbufs; + unsigned int i; + + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) + return 0; + + rxbufs = rx_ring->rxbufs; + + for (i = 0; i < rx_ring->cnt - 1; i++) { + rxbufs[i].frag = nfp_net_rx_alloc_one(dp, &rxbufs[i].dma_addr); + if (!rxbufs[i].frag) { + nfp_net_rx_ring_bufs_free(dp, rx_ring); + return -ENOMEM; + } + } + + return 0; +} + +int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp) +{ + unsigned int r; + + dp->tx_rings = kcalloc(dp->num_tx_rings, sizeof(*dp->tx_rings), + GFP_KERNEL); + if (!dp->tx_rings) + return -ENOMEM; + + if (dp->ctrl & NFP_NET_CFG_CTRL_TXRWB) { + dp->txrwb = dma_alloc_coherent(dp->dev, + dp->num_tx_rings * sizeof(u64), + &dp->txrwb_dma, GFP_KERNEL); + if (!dp->txrwb) + goto err_free_rings; + } + + for (r = 0; r < dp->num_tx_rings; r++) { + int bias = 0; + + if (r >= dp->num_stack_tx_rings) + bias = dp->num_stack_tx_rings; + + nfp_net_tx_ring_init(&dp->tx_rings[r], dp, + &nn->r_vecs[r - bias], r, bias); + + if (nfp_net_tx_ring_alloc(dp, &dp->tx_rings[r])) + goto err_free_prev; + + if (nfp_net_tx_ring_bufs_alloc(dp, &dp->tx_rings[r])) + goto err_free_ring; + } + + return 0; + +err_free_prev: + while (r--) { + nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]); +err_free_ring: + nfp_net_tx_ring_free(dp, &dp->tx_rings[r]); + } + if (dp->txrwb) + dma_free_coherent(dp->dev, dp->num_tx_rings * sizeof(u64), + dp->txrwb, dp->txrwb_dma); +err_free_rings: + kfree(dp->tx_rings); + return -ENOMEM; +} + +void nfp_net_tx_rings_free(struct nfp_net_dp *dp) +{ + unsigned int r; + + for (r = 0; r < dp->num_tx_rings; r++) { + nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]); + nfp_net_tx_ring_free(dp, &dp->tx_rings[r]); + } + + if (dp->txrwb) + dma_free_coherent(dp->dev, dp->num_tx_rings * sizeof(u64), + dp->txrwb, dp->txrwb_dma); + kfree(dp->tx_rings); +} + +/** + * nfp_net_rx_ring_free() - Free resources allocated to a RX ring + * @rx_ring: RX ring to free + */ +static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + + if (dp->netdev) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) + kvfree(rx_ring->xsk_rxbufs); + else + kvfree(rx_ring->rxbufs); + + if (rx_ring->rxds) + dma_free_coherent(dp->dev, rx_ring->size, + rx_ring->rxds, rx_ring->dma); + + rx_ring->cnt = 0; + rx_ring->rxbufs = NULL; + rx_ring->xsk_rxbufs = NULL; + rx_ring->rxds = NULL; + rx_ring->dma = 0; + rx_ring->size = 0; +} + +/** + * nfp_net_rx_ring_alloc() - Allocate resource for a RX ring + * @dp: NFP Net data path struct + * @rx_ring: RX ring to allocate + * + * Return: 0 on success, negative errno otherwise. + */ +static int +nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring) +{ + enum xdp_mem_type mem_type; + size_t rxbuf_sw_desc_sz; + int err; + + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) { + mem_type = MEM_TYPE_XSK_BUFF_POOL; + rxbuf_sw_desc_sz = sizeof(*rx_ring->xsk_rxbufs); + } else { + mem_type = MEM_TYPE_PAGE_ORDER0; + rxbuf_sw_desc_sz = sizeof(*rx_ring->rxbufs); + } + + if (dp->netdev) { + err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev, + rx_ring->idx, rx_ring->r_vec->napi.napi_id); + if (err < 0) + return err; + + err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, mem_type, NULL); + if (err) + goto err_alloc; + } + + rx_ring->cnt = dp->rxd_cnt; + rx_ring->size = array_size(rx_ring->cnt, sizeof(*rx_ring->rxds)); + rx_ring->rxds = dma_alloc_coherent(dp->dev, rx_ring->size, + &rx_ring->dma, + GFP_KERNEL | __GFP_NOWARN); + if (!rx_ring->rxds) { + netdev_warn(dp->netdev, "failed to allocate RX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n", + rx_ring->cnt); + goto err_alloc; + } + + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) { + rx_ring->xsk_rxbufs = kvcalloc(rx_ring->cnt, rxbuf_sw_desc_sz, + GFP_KERNEL); + if (!rx_ring->xsk_rxbufs) + goto err_alloc; + } else { + rx_ring->rxbufs = kvcalloc(rx_ring->cnt, rxbuf_sw_desc_sz, + GFP_KERNEL); + if (!rx_ring->rxbufs) + goto err_alloc; + } + + return 0; + +err_alloc: + nfp_net_rx_ring_free(rx_ring); + return -ENOMEM; +} + +int nfp_net_rx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp) +{ + unsigned int r; + + dp->rx_rings = kcalloc(dp->num_rx_rings, sizeof(*dp->rx_rings), + GFP_KERNEL); + if (!dp->rx_rings) + return -ENOMEM; + + for (r = 0; r < dp->num_rx_rings; r++) { + nfp_net_rx_ring_init(&dp->rx_rings[r], &nn->r_vecs[r], r); + + if (nfp_net_rx_ring_alloc(dp, &dp->rx_rings[r])) + goto err_free_prev; + + if (nfp_net_rx_ring_bufs_alloc(dp, &dp->rx_rings[r])) + goto err_free_ring; + } + + return 0; + +err_free_prev: + while (r--) { + nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]); +err_free_ring: + nfp_net_rx_ring_free(&dp->rx_rings[r]); + } + kfree(dp->rx_rings); + return -ENOMEM; +} + +void nfp_net_rx_rings_free(struct nfp_net_dp *dp) +{ + unsigned int r; + + for (r = 0; r < dp->num_rx_rings; r++) { + nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]); + nfp_net_rx_ring_free(&dp->rx_rings[r]); + } + + kfree(dp->rx_rings); +} + +void +nfp_net_rx_ring_hw_cfg_write(struct nfp_net *nn, + struct nfp_net_rx_ring *rx_ring, unsigned int idx) +{ + /* Write the DMA address, size and MSI-X info to the device */ + nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), rx_ring->dma); + nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), ilog2(rx_ring->cnt)); + nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), rx_ring->r_vec->irq_entry); +} + +void +nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn, + struct nfp_net_tx_ring *tx_ring, unsigned int idx) +{ + nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), tx_ring->dma); + if (tx_ring->txrwb) { + *tx_ring->txrwb = 0; + nn_writeq(nn, NFP_NET_CFG_TXR_WB_ADDR(idx), + nn->dp.txrwb_dma + idx * sizeof(u64)); + } + nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(tx_ring->cnt)); + nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), tx_ring->r_vec->irq_entry); +} + +void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx) +{ + nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), 0); + nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), 0); + nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), 0); + + nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), 0); + nn_writeq(nn, NFP_NET_CFG_TXR_WB_ADDR(idx), 0); + nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), 0); + nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), 0); +} + +netdev_tx_t nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + + return nn->dp.ops->xmit(skb, netdev); +} + +bool __nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb) +{ + struct nfp_net_r_vector *r_vec = &nn->r_vecs[0]; + + return nn->dp.ops->ctrl_tx_one(nn, r_vec, skb, false); +} + +bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb) +{ + struct nfp_net_r_vector *r_vec = &nn->r_vecs[0]; + bool ret; + + spin_lock_bh(&r_vec->lock); + ret = nn->dp.ops->ctrl_tx_one(nn, r_vec, skb, false); + spin_unlock_bh(&r_vec->lock); + + return ret; +} + +bool nfp_net_vlan_strip(struct sk_buff *skb, const struct nfp_net_rx_desc *rxd, + const struct nfp_meta_parsed *meta) +{ + u16 tpid = 0, tci = 0; + + if (rxd->rxd.flags & PCIE_DESC_RX_VLAN) { + tpid = ETH_P_8021Q; + tci = le16_to_cpu(rxd->rxd.vlan); + } else if (meta->vlan.stripped) { + if (meta->vlan.tpid == NFP_NET_VLAN_CTAG) + tpid = ETH_P_8021Q; + else if (meta->vlan.tpid == NFP_NET_VLAN_STAG) + tpid = ETH_P_8021AD; + else + return false; + + tci = meta->vlan.tci; + } + if (tpid) + __vlan_hwaccel_put_tag(skb, htons(tpid), tci); + + return true; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h new file mode 100644 index 000000000..831c83ce0 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h @@ -0,0 +1,219 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#ifndef _NFP_NET_DP_ +#define _NFP_NET_DP_ + +#include "nfp_net.h" + +static inline dma_addr_t nfp_net_dma_map_rx(struct nfp_net_dp *dp, void *frag) +{ + return dma_map_single_attrs(dp->dev, frag + NFP_NET_RX_BUF_HEADROOM, + dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA, + dp->rx_dma_dir, DMA_ATTR_SKIP_CPU_SYNC); +} + +static inline void +nfp_net_dma_sync_dev_rx(const struct nfp_net_dp *dp, dma_addr_t dma_addr) +{ + dma_sync_single_for_device(dp->dev, dma_addr, + dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA, + dp->rx_dma_dir); +} + +static inline void nfp_net_dma_unmap_rx(struct nfp_net_dp *dp, + dma_addr_t dma_addr) +{ + dma_unmap_single_attrs(dp->dev, dma_addr, + dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA, + dp->rx_dma_dir, DMA_ATTR_SKIP_CPU_SYNC); +} + +static inline void nfp_net_dma_sync_cpu_rx(struct nfp_net_dp *dp, + dma_addr_t dma_addr, + unsigned int len) +{ + dma_sync_single_for_cpu(dp->dev, dma_addr - NFP_NET_RX_BUF_HEADROOM, + len, dp->rx_dma_dir); +} + +/** + * nfp_net_tx_full() - check if the TX ring is full + * @tx_ring: TX ring to check + * @dcnt: Number of descriptors that need to be enqueued (must be >= 1) + * + * This function checks, based on the *host copy* of read/write + * pointer if a given TX ring is full. The real TX queue may have + * some newly made available slots. + * + * Return: True if the ring is full. + */ +static inline int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt) +{ + return (tx_ring->wr_p - tx_ring->rd_p) >= (tx_ring->cnt - dcnt); +} + +static inline void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring) +{ + wmb(); /* drain writebuffer */ + nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add); + tx_ring->wr_ptr_add = 0; +} + +static inline u32 +nfp_net_read_tx_cmpl(struct nfp_net_tx_ring *tx_ring, struct nfp_net_dp *dp) +{ + if (tx_ring->txrwb) + return *tx_ring->txrwb; + return nfp_qcp_rd_ptr_read(tx_ring->qcp_q); +} + +static inline void nfp_net_free_frag(void *frag, bool xdp) +{ + if (!xdp) + skb_free_frag(frag); + else + __free_page(virt_to_page(frag)); +} + +/** + * nfp_net_irq_unmask() - Unmask automasked interrupt + * @nn: NFP Network structure + * @entry_nr: MSI-X table entry + * + * Clear the ICR for the IRQ entry. + */ +static inline void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr) +{ + nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED); + nn_pci_flush(nn); +} + +struct seq_file; + +/* Common */ +void +nfp_net_rx_ring_hw_cfg_write(struct nfp_net *nn, + struct nfp_net_rx_ring *rx_ring, unsigned int idx); +void +nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn, + struct nfp_net_tx_ring *tx_ring, unsigned int idx); +void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx); + +void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr); +int nfp_net_rx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp); +int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp); +void nfp_net_rx_rings_free(struct nfp_net_dp *dp); +void nfp_net_tx_rings_free(struct nfp_net_dp *dp); +void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring); +bool nfp_net_vlan_strip(struct sk_buff *skb, const struct nfp_net_rx_desc *rxd, + const struct nfp_meta_parsed *meta); + +enum nfp_nfd_version { + NFP_NFD_VER_NFD3, + NFP_NFD_VER_NFDK, +}; + +/** + * struct nfp_dp_ops - Hooks to wrap different implementation of different dp + * @version: Indicate dp type + * @tx_min_desc_per_pkt: Minimal TX descs needed for each packet + * @cap_mask: Mask of supported features + * @dma_mask: DMA addressing capability + * @poll: Napi poll for normal rx/tx + * @xsk_poll: Napi poll when xsk is enabled + * @ctrl_poll: Tasklet poll for ctrl rx/tx + * @xmit: Xmit for normal path + * @ctrl_tx_one: Xmit for ctrl path + * @rx_ring_fill_freelist: Give buffers from the ring to FW + * @tx_ring_alloc: Allocate resource for a TX ring + * @tx_ring_reset: Free any untransmitted buffers and reset pointers + * @tx_ring_free: Free resources allocated to a TX ring + * @tx_ring_bufs_alloc: Allocate resource for each TX buffer + * @tx_ring_bufs_free: Free resources allocated to each TX buffer + * @print_tx_descs: Show TX ring's info for debug purpose + */ +struct nfp_dp_ops { + enum nfp_nfd_version version; + unsigned int tx_min_desc_per_pkt; + u32 cap_mask; + u64 dma_mask; + + int (*poll)(struct napi_struct *napi, int budget); + int (*xsk_poll)(struct napi_struct *napi, int budget); + void (*ctrl_poll)(struct tasklet_struct *t); + netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *netdev); + bool (*ctrl_tx_one)(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct sk_buff *skb, bool old); + void (*rx_ring_fill_freelist)(struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring); + int (*tx_ring_alloc)(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring); + void (*tx_ring_reset)(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring); + void (*tx_ring_free)(struct nfp_net_tx_ring *tx_ring); + int (*tx_ring_bufs_alloc)(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring); + void (*tx_ring_bufs_free)(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring); + + void (*print_tx_descs)(struct seq_file *file, + struct nfp_net_r_vector *r_vec, + struct nfp_net_tx_ring *tx_ring, + u32 d_rd_p, u32 d_wr_p); +}; + +static inline void +nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) +{ + return dp->ops->tx_ring_reset(dp, tx_ring); +} + +static inline void +nfp_net_rx_ring_fill_freelist(struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring) +{ + dp->ops->rx_ring_fill_freelist(dp, rx_ring); +} + +static inline int +nfp_net_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) +{ + return dp->ops->tx_ring_alloc(dp, tx_ring); +} + +static inline void +nfp_net_tx_ring_free(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) +{ + dp->ops->tx_ring_free(tx_ring); +} + +static inline int +nfp_net_tx_ring_bufs_alloc(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring) +{ + return dp->ops->tx_ring_bufs_alloc(dp, tx_ring); +} + +static inline void +nfp_net_tx_ring_bufs_free(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring) +{ + dp->ops->tx_ring_bufs_free(dp, tx_ring); +} + +static inline void +nfp_net_debugfs_print_tx_descs(struct seq_file *file, struct nfp_net_dp *dp, + struct nfp_net_r_vector *r_vec, + struct nfp_net_tx_ring *tx_ring, + u32 d_rd_p, u32 d_wr_p) +{ + dp->ops->print_tx_descs(file, r_vec, tx_ring, d_rd_p, d_wr_p); +} + +extern const struct nfp_dp_ops nfp_nfd3_ops; +extern const struct nfp_dp_ops nfp_nfdk_ops; + +netdev_tx_t nfp_net_tx(struct sk_buff *skb, struct net_device *netdev); + +#endif /* _NFP_NET_DP_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c new file mode 100644 index 000000000..af376b900 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -0,0 +1,2133 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +/* + * nfp_net_ethtool.c + * Netronome network device driver: ethtool support + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + * Brad Petrus <brad.petrus@netronome.com> + */ + +#include <linux/bitfield.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/ethtool.h> +#include <linux/firmware.h> +#include <linux/sfp.h> + +#include "nfpcore/nfp.h" +#include "nfpcore/nfp_dev.h" +#include "nfpcore/nfp_nsp.h" +#include "nfp_app.h" +#include "nfp_main.h" +#include "nfp_net_ctrl.h" +#include "nfp_net_dp.h" +#include "nfp_net.h" +#include "nfp_port.h" +#include "nfpcore/nfp_cpp.h" + +struct nfp_et_stat { + char name[ETH_GSTRING_LEN]; + int off; +}; + +static const struct nfp_et_stat nfp_net_et_stats[] = { + /* Stats from the device */ + { "dev_rx_discards", NFP_NET_CFG_STATS_RX_DISCARDS }, + { "dev_rx_errors", NFP_NET_CFG_STATS_RX_ERRORS }, + { "dev_rx_bytes", NFP_NET_CFG_STATS_RX_OCTETS }, + { "dev_rx_uc_bytes", NFP_NET_CFG_STATS_RX_UC_OCTETS }, + { "dev_rx_mc_bytes", NFP_NET_CFG_STATS_RX_MC_OCTETS }, + { "dev_rx_bc_bytes", NFP_NET_CFG_STATS_RX_BC_OCTETS }, + { "dev_rx_pkts", NFP_NET_CFG_STATS_RX_FRAMES }, + { "dev_rx_mc_pkts", NFP_NET_CFG_STATS_RX_MC_FRAMES }, + { "dev_rx_bc_pkts", NFP_NET_CFG_STATS_RX_BC_FRAMES }, + + { "dev_tx_discards", NFP_NET_CFG_STATS_TX_DISCARDS }, + { "dev_tx_errors", NFP_NET_CFG_STATS_TX_ERRORS }, + { "dev_tx_bytes", NFP_NET_CFG_STATS_TX_OCTETS }, + { "dev_tx_uc_bytes", NFP_NET_CFG_STATS_TX_UC_OCTETS }, + { "dev_tx_mc_bytes", NFP_NET_CFG_STATS_TX_MC_OCTETS }, + { "dev_tx_bc_bytes", NFP_NET_CFG_STATS_TX_BC_OCTETS }, + { "dev_tx_pkts", NFP_NET_CFG_STATS_TX_FRAMES }, + { "dev_tx_mc_pkts", NFP_NET_CFG_STATS_TX_MC_FRAMES }, + { "dev_tx_bc_pkts", NFP_NET_CFG_STATS_TX_BC_FRAMES }, + + { "bpf_pass_pkts", NFP_NET_CFG_STATS_APP0_FRAMES }, + { "bpf_pass_bytes", NFP_NET_CFG_STATS_APP0_BYTES }, + /* see comments in outro functions in nfp_bpf_jit.c to find out + * how different BPF modes use app-specific counters + */ + { "bpf_app1_pkts", NFP_NET_CFG_STATS_APP1_FRAMES }, + { "bpf_app1_bytes", NFP_NET_CFG_STATS_APP1_BYTES }, + { "bpf_app2_pkts", NFP_NET_CFG_STATS_APP2_FRAMES }, + { "bpf_app2_bytes", NFP_NET_CFG_STATS_APP2_BYTES }, + { "bpf_app3_pkts", NFP_NET_CFG_STATS_APP3_FRAMES }, + { "bpf_app3_bytes", NFP_NET_CFG_STATS_APP3_BYTES }, +}; + +static const struct nfp_et_stat nfp_mac_et_stats[] = { + { "rx_octets", NFP_MAC_STATS_RX_IN_OCTETS, }, + { "rx_frame_too_long_errors", + NFP_MAC_STATS_RX_FRAME_TOO_LONG_ERRORS, }, + { "rx_range_length_errors", NFP_MAC_STATS_RX_RANGE_LENGTH_ERRORS, }, + { "rx_vlan_received_ok", NFP_MAC_STATS_RX_VLAN_RECEIVED_OK, }, + { "rx_errors", NFP_MAC_STATS_RX_IN_ERRORS, }, + { "rx_broadcast_pkts", NFP_MAC_STATS_RX_IN_BROADCAST_PKTS, }, + { "rx_drop_events", NFP_MAC_STATS_RX_DROP_EVENTS, }, + { "rx_alignment_errors", NFP_MAC_STATS_RX_ALIGNMENT_ERRORS, }, + { "rx_pause_mac_ctrl_frames", + NFP_MAC_STATS_RX_PAUSE_MAC_CTRL_FRAMES, }, + { "rx_frames_received_ok", NFP_MAC_STATS_RX_FRAMES_RECEIVED_OK, }, + { "rx_frame_check_sequence_errors", + NFP_MAC_STATS_RX_FRAME_CHECK_SEQUENCE_ERRORS, }, + { "rx_unicast_pkts", NFP_MAC_STATS_RX_UNICAST_PKTS, }, + { "rx_multicast_pkts", NFP_MAC_STATS_RX_MULTICAST_PKTS, }, + { "rx_pkts", NFP_MAC_STATS_RX_PKTS, }, + { "rx_undersize_pkts", NFP_MAC_STATS_RX_UNDERSIZE_PKTS, }, + { "rx_pkts_64_octets", NFP_MAC_STATS_RX_PKTS_64_OCTETS, }, + { "rx_pkts_65_to_127_octets", + NFP_MAC_STATS_RX_PKTS_65_TO_127_OCTETS, }, + { "rx_pkts_128_to_255_octets", + NFP_MAC_STATS_RX_PKTS_128_TO_255_OCTETS, }, + { "rx_pkts_256_to_511_octets", + NFP_MAC_STATS_RX_PKTS_256_TO_511_OCTETS, }, + { "rx_pkts_512_to_1023_octets", + NFP_MAC_STATS_RX_PKTS_512_TO_1023_OCTETS, }, + { "rx_pkts_1024_to_1518_octets", + NFP_MAC_STATS_RX_PKTS_1024_TO_1518_OCTETS, }, + { "rx_pkts_1519_to_max_octets", + NFP_MAC_STATS_RX_PKTS_1519_TO_MAX_OCTETS, }, + { "rx_jabbers", NFP_MAC_STATS_RX_JABBERS, }, + { "rx_fragments", NFP_MAC_STATS_RX_FRAGMENTS, }, + { "rx_oversize_pkts", NFP_MAC_STATS_RX_OVERSIZE_PKTS, }, + { "rx_pause_frames_class0", NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS0, }, + { "rx_pause_frames_class1", NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS1, }, + { "rx_pause_frames_class2", NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS2, }, + { "rx_pause_frames_class3", NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS3, }, + { "rx_pause_frames_class4", NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS4, }, + { "rx_pause_frames_class5", NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS5, }, + { "rx_pause_frames_class6", NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS6, }, + { "rx_pause_frames_class7", NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS7, }, + { "rx_mac_ctrl_frames_received", + NFP_MAC_STATS_RX_MAC_CTRL_FRAMES_RECEIVED, }, + { "rx_mac_head_drop", NFP_MAC_STATS_RX_MAC_HEAD_DROP, }, + { "tx_queue_drop", NFP_MAC_STATS_TX_QUEUE_DROP, }, + { "tx_octets", NFP_MAC_STATS_TX_OUT_OCTETS, }, + { "tx_vlan_transmitted_ok", NFP_MAC_STATS_TX_VLAN_TRANSMITTED_OK, }, + { "tx_errors", NFP_MAC_STATS_TX_OUT_ERRORS, }, + { "tx_broadcast_pkts", NFP_MAC_STATS_TX_BROADCAST_PKTS, }, + { "tx_pause_mac_ctrl_frames", + NFP_MAC_STATS_TX_PAUSE_MAC_CTRL_FRAMES, }, + { "tx_frames_transmitted_ok", + NFP_MAC_STATS_TX_FRAMES_TRANSMITTED_OK, }, + { "tx_unicast_pkts", NFP_MAC_STATS_TX_UNICAST_PKTS, }, + { "tx_multicast_pkts", NFP_MAC_STATS_TX_MULTICAST_PKTS, }, + { "tx_pkts_64_octets", NFP_MAC_STATS_TX_PKTS_64_OCTETS, }, + { "tx_pkts_65_to_127_octets", + NFP_MAC_STATS_TX_PKTS_65_TO_127_OCTETS, }, + { "tx_pkts_128_to_255_octets", + NFP_MAC_STATS_TX_PKTS_128_TO_255_OCTETS, }, + { "tx_pkts_256_to_511_octets", + NFP_MAC_STATS_TX_PKTS_256_TO_511_OCTETS, }, + { "tx_pkts_512_to_1023_octets", + NFP_MAC_STATS_TX_PKTS_512_TO_1023_OCTETS, }, + { "tx_pkts_1024_to_1518_octets", + NFP_MAC_STATS_TX_PKTS_1024_TO_1518_OCTETS, }, + { "tx_pkts_1519_to_max_octets", + NFP_MAC_STATS_TX_PKTS_1519_TO_MAX_OCTETS, }, + { "tx_pause_frames_class0", NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS0, }, + { "tx_pause_frames_class1", NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS1, }, + { "tx_pause_frames_class2", NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS2, }, + { "tx_pause_frames_class3", NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS3, }, + { "tx_pause_frames_class4", NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS4, }, + { "tx_pause_frames_class5", NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS5, }, + { "tx_pause_frames_class6", NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS6, }, + { "tx_pause_frames_class7", NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS7, }, +}; + +static const char nfp_tlv_stat_names[][ETH_GSTRING_LEN] = { + [1] = "dev_rx_discards", + [2] = "dev_rx_errors", + [3] = "dev_rx_bytes", + [4] = "dev_rx_uc_bytes", + [5] = "dev_rx_mc_bytes", + [6] = "dev_rx_bc_bytes", + [7] = "dev_rx_pkts", + [8] = "dev_rx_mc_pkts", + [9] = "dev_rx_bc_pkts", + + [10] = "dev_tx_discards", + [11] = "dev_tx_errors", + [12] = "dev_tx_bytes", + [13] = "dev_tx_uc_bytes", + [14] = "dev_tx_mc_bytes", + [15] = "dev_tx_bc_bytes", + [16] = "dev_tx_pkts", + [17] = "dev_tx_mc_pkts", + [18] = "dev_tx_bc_pkts", +}; + +#define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats) +#define NN_ET_SWITCH_STATS_LEN 9 +#define NN_RVEC_GATHER_STATS 13 +#define NN_RVEC_PER_Q_STATS 3 +#define NN_CTRL_PATH_STATS 4 + +#define SFP_SFF_REV_COMPLIANCE 1 + +static void nfp_net_get_nspinfo(struct nfp_app *app, char *version) +{ + struct nfp_nsp *nsp; + + if (!app) + return; + + nsp = nfp_nsp_open(app->cpp); + if (IS_ERR(nsp)) + return; + + snprintf(version, ETHTOOL_FWVERS_LEN, "%hu.%hu", + nfp_nsp_get_abi_ver_major(nsp), + nfp_nsp_get_abi_ver_minor(nsp)); + + nfp_nsp_close(nsp); +} + +static void +nfp_get_drvinfo(struct nfp_app *app, struct pci_dev *pdev, + const char *vnic_version, struct ethtool_drvinfo *drvinfo) +{ + char nsp_version[ETHTOOL_FWVERS_LEN] = {}; + + strscpy(drvinfo->driver, dev_driver_string(&pdev->dev), + sizeof(drvinfo->driver)); + nfp_net_get_nspinfo(app, nsp_version); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%s %s %s %s", vnic_version, nsp_version, + nfp_app_mip_name(app), nfp_app_name(app)); +} + +static void +nfp_net_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) +{ + char vnic_version[ETHTOOL_FWVERS_LEN] = {}; + struct nfp_net *nn = netdev_priv(netdev); + + snprintf(vnic_version, sizeof(vnic_version), "%d.%d.%d.%d", + nn->fw_ver.extend, nn->fw_ver.class, + nn->fw_ver.major, nn->fw_ver.minor); + strscpy(drvinfo->bus_info, pci_name(nn->pdev), + sizeof(drvinfo->bus_info)); + + nfp_get_drvinfo(nn->app, nn->pdev, vnic_version, drvinfo); +} + +static int +nfp_net_nway_reset(struct net_device *netdev) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + int err; + + port = nfp_port_from_netdev(netdev); + eth_port = nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + if (!netif_running(netdev)) + return 0; + + err = nfp_eth_set_configured(port->app->cpp, eth_port->index, false); + if (err) { + netdev_info(netdev, "Link down failed: %d\n", err); + return err; + } + + err = nfp_eth_set_configured(port->app->cpp, eth_port->index, true); + if (err) { + netdev_info(netdev, "Link up failed: %d\n", err); + return err; + } + + netdev_info(netdev, "Link reset succeeded\n"); + return 0; +} + +static void +nfp_app_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + + strscpy(drvinfo->bus_info, pci_name(app->pdev), + sizeof(drvinfo->bus_info)); + nfp_get_drvinfo(app, app->pdev, "*", drvinfo); +} + +static void +nfp_net_set_fec_link_mode(struct nfp_eth_table_port *eth_port, + struct ethtool_link_ksettings *c) +{ + unsigned int modes; + + ethtool_link_ksettings_add_link_mode(c, supported, FEC_NONE); + if (!nfp_eth_can_support_fec(eth_port)) { + ethtool_link_ksettings_add_link_mode(c, advertising, FEC_NONE); + return; + } + + modes = nfp_eth_supported_fec_modes(eth_port); + if (modes & NFP_FEC_BASER) { + ethtool_link_ksettings_add_link_mode(c, supported, FEC_BASER); + ethtool_link_ksettings_add_link_mode(c, advertising, FEC_BASER); + } + + if (modes & NFP_FEC_REED_SOLOMON) { + ethtool_link_ksettings_add_link_mode(c, supported, FEC_RS); + ethtool_link_ksettings_add_link_mode(c, advertising, FEC_RS); + } +} + +static const struct nfp_eth_media_link_mode { + u16 ethtool_link_mode; + u16 speed; +} nfp_eth_media_table[NFP_MEDIA_LINK_MODES_NUMBER] = { + [NFP_MEDIA_1000BASE_CX] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + .speed = NFP_SPEED_1G, + }, + [NFP_MEDIA_1000BASE_KX] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + .speed = NFP_SPEED_1G, + }, + [NFP_MEDIA_10GBASE_KX4] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, + .speed = NFP_SPEED_10G, + }, + [NFP_MEDIA_10GBASE_KR] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + .speed = NFP_SPEED_10G, + }, + [NFP_MEDIA_10GBASE_CX4] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, + .speed = NFP_SPEED_10G, + }, + [NFP_MEDIA_10GBASE_CR] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_10000baseCR_Full_BIT, + .speed = NFP_SPEED_10G, + }, + [NFP_MEDIA_10GBASE_SR] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, + .speed = NFP_SPEED_10G, + }, + [NFP_MEDIA_10GBASE_ER] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_10000baseER_Full_BIT, + .speed = NFP_SPEED_10G, + }, + [NFP_MEDIA_25GBASE_KR] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + .speed = NFP_SPEED_25G, + }, + [NFP_MEDIA_25GBASE_KR_S] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + .speed = NFP_SPEED_25G, + }, + [NFP_MEDIA_25GBASE_CR] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + .speed = NFP_SPEED_25G, + }, + [NFP_MEDIA_25GBASE_CR_S] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + .speed = NFP_SPEED_25G, + }, + [NFP_MEDIA_25GBASE_SR] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, + .speed = NFP_SPEED_25G, + }, + [NFP_MEDIA_40GBASE_CR4] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, + .speed = NFP_SPEED_40G, + }, + [NFP_MEDIA_40GBASE_KR4] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, + .speed = NFP_SPEED_40G, + }, + [NFP_MEDIA_40GBASE_SR4] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT, + .speed = NFP_SPEED_40G, + }, + [NFP_MEDIA_40GBASE_LR4] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT, + .speed = NFP_SPEED_40G, + }, + [NFP_MEDIA_50GBASE_KR] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_50000baseKR_Full_BIT, + .speed = NFP_SPEED_50G, + }, + [NFP_MEDIA_50GBASE_SR] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_50000baseSR_Full_BIT, + .speed = NFP_SPEED_50G, + }, + [NFP_MEDIA_50GBASE_CR] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_50000baseCR_Full_BIT, + .speed = NFP_SPEED_50G, + }, + [NFP_MEDIA_50GBASE_LR] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT, + .speed = NFP_SPEED_50G, + }, + [NFP_MEDIA_50GBASE_ER] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT, + .speed = NFP_SPEED_50G, + }, + [NFP_MEDIA_50GBASE_FR] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT, + .speed = NFP_SPEED_50G, + }, + [NFP_MEDIA_100GBASE_KR4] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, + .speed = NFP_SPEED_100G, + }, + [NFP_MEDIA_100GBASE_SR4] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, + .speed = NFP_SPEED_100G, + }, + [NFP_MEDIA_100GBASE_CR4] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, + .speed = NFP_SPEED_100G, + }, + [NFP_MEDIA_100GBASE_KP4] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, + .speed = NFP_SPEED_100G, + }, + [NFP_MEDIA_100GBASE_CR10] = { + .ethtool_link_mode = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, + .speed = NFP_SPEED_100G, + }, +}; + +static const unsigned int nfp_eth_speed_map[NFP_SUP_SPEED_NUMBER] = { + [NFP_SPEED_1G] = SPEED_1000, + [NFP_SPEED_10G] = SPEED_10000, + [NFP_SPEED_25G] = SPEED_25000, + [NFP_SPEED_40G] = SPEED_40000, + [NFP_SPEED_50G] = SPEED_50000, + [NFP_SPEED_100G] = SPEED_100000, +}; + +static void nfp_add_media_link_mode(struct nfp_port *port, + struct nfp_eth_table_port *eth_port, + struct ethtool_link_ksettings *cmd) +{ + u64 supported_modes[2], advertised_modes[2]; + struct nfp_eth_media_buf ethm = { + .eth_index = eth_port->eth_index, + }; + struct nfp_cpp *cpp = port->app->cpp; + + if (nfp_eth_read_media(cpp, ðm)) { + bitmap_fill(port->speed_bitmap, NFP_SUP_SPEED_NUMBER); + return; + } + + bitmap_zero(port->speed_bitmap, NFP_SUP_SPEED_NUMBER); + + for (u32 i = 0; i < 2; i++) { + supported_modes[i] = le64_to_cpu(ethm.supported_modes[i]); + advertised_modes[i] = le64_to_cpu(ethm.advertised_modes[i]); + } + + for (u32 i = 0; i < NFP_MEDIA_LINK_MODES_NUMBER; i++) { + if (i < 64) { + if (supported_modes[0] & BIT_ULL(i)) { + __set_bit(nfp_eth_media_table[i].ethtool_link_mode, + cmd->link_modes.supported); + __set_bit(nfp_eth_media_table[i].speed, + port->speed_bitmap); + } + + if (advertised_modes[0] & BIT_ULL(i)) + __set_bit(nfp_eth_media_table[i].ethtool_link_mode, + cmd->link_modes.advertising); + } else { + if (supported_modes[1] & BIT_ULL(i - 64)) { + __set_bit(nfp_eth_media_table[i].ethtool_link_mode, + cmd->link_modes.supported); + __set_bit(nfp_eth_media_table[i].speed, + port->speed_bitmap); + } + + if (advertised_modes[1] & BIT_ULL(i - 64)) + __set_bit(nfp_eth_media_table[i].ethtool_link_mode, + cmd->link_modes.advertising); + } + } +} + +/** + * nfp_net_get_link_ksettings - Get Link Speed settings + * @netdev: network interface device structure + * @cmd: ethtool command + * + * Reports speed settings based on info in the BAR provided by the fw. + */ +static int +nfp_net_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + struct nfp_net *nn; + unsigned int speed; + u16 sts; + + /* Init to unknowns */ + ethtool_link_ksettings_zero_link_mode(cmd, supported); + ethtool_link_ksettings_zero_link_mode(cmd, advertising); + ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); + cmd->base.port = PORT_OTHER; + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + + port = nfp_port_from_netdev(netdev); + eth_port = nfp_port_get_eth_port(port); + if (eth_port) { + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); + nfp_add_media_link_mode(port, eth_port, cmd); + if (eth_port->supp_aneg) { + ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); + if (eth_port->aneg == NFP_ANEG_AUTO) { + ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); + cmd->base.autoneg = AUTONEG_ENABLE; + } + } + nfp_net_set_fec_link_mode(eth_port, cmd); + } + + if (!netif_carrier_ok(netdev)) + return 0; + + /* Use link speed from ETH table if available, otherwise try the BAR */ + if (eth_port) { + cmd->base.port = eth_port->port_type; + cmd->base.speed = eth_port->speed; + cmd->base.duplex = DUPLEX_FULL; + return 0; + } + + if (!nfp_netdev_is_nfp_net(netdev)) + return -EOPNOTSUPP; + nn = netdev_priv(netdev); + + sts = nn_readw(nn, NFP_NET_CFG_STS); + speed = nfp_net_lr2speed(FIELD_GET(NFP_NET_CFG_STS_LINK_RATE, sts)); + if (!speed) + return -EOPNOTSUPP; + + if (speed != SPEED_UNKNOWN) { + cmd->base.speed = speed; + cmd->base.duplex = DUPLEX_FULL; + } + + return 0; +} + +static int +nfp_net_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) +{ + bool req_aneg = (cmd->base.autoneg == AUTONEG_ENABLE); + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + struct nfp_nsp *nsp; + int err; + + port = nfp_port_from_netdev(netdev); + eth_port = __nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + if (netif_running(netdev)) { + netdev_warn(netdev, "Changing settings not allowed on an active interface. It may cause the port to be disabled until driver reload.\n"); + return -EBUSY; + } + + nsp = nfp_eth_config_start(port->app->cpp, eth_port->index); + if (IS_ERR(nsp)) + return PTR_ERR(nsp); + + if (req_aneg && !eth_port->supp_aneg) { + netdev_warn(netdev, "Autoneg is not supported.\n"); + err = -EOPNOTSUPP; + goto err_bad_set; + } + + err = __nfp_eth_set_aneg(nsp, req_aneg ? NFP_ANEG_AUTO : NFP_ANEG_DISABLED); + if (err) + goto err_bad_set; + + if (cmd->base.speed != SPEED_UNKNOWN) { + u32 speed = cmd->base.speed / eth_port->lanes; + bool is_supported = false; + + for (u32 i = 0; i < NFP_SUP_SPEED_NUMBER; i++) { + if (cmd->base.speed == nfp_eth_speed_map[i] && + test_bit(i, port->speed_bitmap)) { + is_supported = true; + break; + } + } + + if (!is_supported) { + netdev_err(netdev, "Speed %u is not supported.\n", + cmd->base.speed); + err = -EINVAL; + goto err_bad_set; + } + + if (req_aneg) { + netdev_err(netdev, "Speed changing is not allowed when working on autoneg mode.\n"); + err = -EINVAL; + goto err_bad_set; + } + + err = __nfp_eth_set_speed(nsp, speed); + if (err) + goto err_bad_set; + } + + err = nfp_eth_config_commit_end(nsp); + if (err > 0) + return 0; /* no change */ + + nfp_net_refresh_port_table(port); + + return err; + +err_bad_set: + nfp_eth_config_cleanup_end(nsp); + return err; +} + +static void nfp_net_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + struct nfp_net *nn = netdev_priv(netdev); + u32 qc_max = nn->dev_info->max_qc_size; + + ring->rx_max_pending = qc_max; + ring->tx_max_pending = qc_max / nn->dp.ops->tx_min_desc_per_pkt; + ring->rx_pending = nn->dp.rxd_cnt; + ring->tx_pending = nn->dp.txd_cnt; +} + +static int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt) +{ + struct nfp_net_dp *dp; + + dp = nfp_net_clone_dp(nn); + if (!dp) + return -ENOMEM; + + dp->rxd_cnt = rxd_cnt; + dp->txd_cnt = txd_cnt; + + return nfp_net_ring_reconfig(nn, dp, NULL); +} + +static int nfp_net_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + u32 tx_dpp, qc_min, qc_max, rxd_cnt, txd_cnt; + struct nfp_net *nn = netdev_priv(netdev); + + /* We don't have separate queues/rings for small/large frames. */ + if (ring->rx_mini_pending || ring->rx_jumbo_pending) + return -EINVAL; + + qc_min = nn->dev_info->min_qc_size; + qc_max = nn->dev_info->max_qc_size; + tx_dpp = nn->dp.ops->tx_min_desc_per_pkt; + /* Round up to supported values */ + rxd_cnt = roundup_pow_of_two(ring->rx_pending); + txd_cnt = roundup_pow_of_two(ring->tx_pending); + + if (rxd_cnt < qc_min || rxd_cnt > qc_max || + txd_cnt < qc_min / tx_dpp || txd_cnt > qc_max / tx_dpp) + return -EINVAL; + + if (nn->dp.rxd_cnt == rxd_cnt && nn->dp.txd_cnt == txd_cnt) + return 0; + + nn_dbg(nn, "Change ring size: RxQ %u->%u, TxQ %u->%u\n", + nn->dp.rxd_cnt, rxd_cnt, nn->dp.txd_cnt, txd_cnt); + + return nfp_net_set_ring_size(nn, rxd_cnt, txd_cnt); +} + +static int nfp_test_link(struct net_device *netdev) +{ + if (!netif_carrier_ok(netdev) || !(netdev->flags & IFF_UP)) + return 1; + + return 0; +} + +static int nfp_test_nsp(struct net_device *netdev) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + struct nfp_nsp_identify *nspi; + struct nfp_nsp *nsp; + int err; + + nsp = nfp_nsp_open(app->cpp); + if (IS_ERR(nsp)) { + err = PTR_ERR(nsp); + netdev_info(netdev, "NSP Test: failed to access the NSP: %d\n", err); + goto exit; + } + + if (nfp_nsp_get_abi_ver_minor(nsp) < 15) { + err = -EOPNOTSUPP; + goto exit_close_nsp; + } + + nspi = kzalloc(sizeof(*nspi), GFP_KERNEL); + if (!nspi) { + err = -ENOMEM; + goto exit_close_nsp; + } + + err = nfp_nsp_read_identify(nsp, nspi, sizeof(*nspi)); + if (err < 0) + netdev_info(netdev, "NSP Test: reading bsp version failed %d\n", err); + + kfree(nspi); +exit_close_nsp: + nfp_nsp_close(nsp); +exit: + return err; +} + +static int nfp_test_fw(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int err; + + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); + if (err) + netdev_info(netdev, "FW Test: update failed %d\n", err); + + return err; +} + +static int nfp_test_reg(struct net_device *netdev) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + struct nfp_cpp *cpp = app->cpp; + u32 model = nfp_cpp_model(cpp); + u32 value; + int err; + + err = nfp_cpp_model_autodetect(cpp, &value); + if (err < 0) { + netdev_info(netdev, "REG Test: NFP model detection failed %d\n", err); + return err; + } + + return (value == model) ? 0 : 1; +} + +static bool link_test_supported(struct net_device *netdev) +{ + return true; +} + +static bool nsp_test_supported(struct net_device *netdev) +{ + if (nfp_app_from_netdev(netdev)) + return true; + + return false; +} + +static bool fw_test_supported(struct net_device *netdev) +{ + if (nfp_netdev_is_nfp_net(netdev)) + return true; + + return false; +} + +static bool reg_test_supported(struct net_device *netdev) +{ + if (nfp_app_from_netdev(netdev)) + return true; + + return false; +} + +static struct nfp_self_test_item { + char name[ETH_GSTRING_LEN]; + bool (*is_supported)(struct net_device *dev); + int (*func)(struct net_device *dev); +} nfp_self_test[] = { + {"Link Test", link_test_supported, nfp_test_link}, + {"NSP Test", nsp_test_supported, nfp_test_nsp}, + {"Firmware Test", fw_test_supported, nfp_test_fw}, + {"Register Test", reg_test_supported, nfp_test_reg} +}; + +#define NFP_TEST_TOTAL_NUM ARRAY_SIZE(nfp_self_test) + +static void nfp_get_self_test_strings(struct net_device *netdev, u8 *data) +{ + int i; + + for (i = 0; i < NFP_TEST_TOTAL_NUM; i++) + if (nfp_self_test[i].is_supported(netdev)) + ethtool_sprintf(&data, nfp_self_test[i].name); +} + +static int nfp_get_self_test_count(struct net_device *netdev) +{ + int i, count = 0; + + for (i = 0; i < NFP_TEST_TOTAL_NUM; i++) + if (nfp_self_test[i].is_supported(netdev)) + count++; + + return count; +} + +static void nfp_net_self_test(struct net_device *netdev, struct ethtool_test *eth_test, + u64 *data) +{ + int i, ret, count = 0; + + netdev_info(netdev, "Start self test\n"); + + for (i = 0; i < NFP_TEST_TOTAL_NUM; i++) { + if (nfp_self_test[i].is_supported(netdev)) { + ret = nfp_self_test[i].func(netdev); + if (ret) + eth_test->flags |= ETH_TEST_FL_FAILED; + data[count++] = ret; + } + } + + netdev_info(netdev, "Test end\n"); +} + +static unsigned int nfp_vnic_get_sw_stats_count(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + + return NN_RVEC_GATHER_STATS + nn->max_r_vecs * NN_RVEC_PER_Q_STATS + + NN_CTRL_PATH_STATS; +} + +static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data) +{ + struct nfp_net *nn = netdev_priv(netdev); + int i; + + for (i = 0; i < nn->max_r_vecs; i++) { + ethtool_sprintf(&data, "rvec_%u_rx_pkts", i); + ethtool_sprintf(&data, "rvec_%u_tx_pkts", i); + ethtool_sprintf(&data, "rvec_%u_tx_busy", i); + } + + ethtool_sprintf(&data, "hw_rx_csum_ok"); + ethtool_sprintf(&data, "hw_rx_csum_inner_ok"); + ethtool_sprintf(&data, "hw_rx_csum_complete"); + ethtool_sprintf(&data, "hw_rx_csum_err"); + ethtool_sprintf(&data, "rx_replace_buf_alloc_fail"); + ethtool_sprintf(&data, "rx_tls_decrypted_packets"); + ethtool_sprintf(&data, "hw_tx_csum"); + ethtool_sprintf(&data, "hw_tx_inner_csum"); + ethtool_sprintf(&data, "tx_gather"); + ethtool_sprintf(&data, "tx_lso"); + ethtool_sprintf(&data, "tx_tls_encrypted_packets"); + ethtool_sprintf(&data, "tx_tls_ooo"); + ethtool_sprintf(&data, "tx_tls_drop_no_sync_data"); + + ethtool_sprintf(&data, "hw_tls_no_space"); + ethtool_sprintf(&data, "rx_tls_resync_req_ok"); + ethtool_sprintf(&data, "rx_tls_resync_req_ign"); + ethtool_sprintf(&data, "rx_tls_resync_sent"); + + return data; +} + +static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) +{ + u64 gathered_stats[NN_RVEC_GATHER_STATS] = {}; + struct nfp_net *nn = netdev_priv(netdev); + u64 tmp[NN_RVEC_GATHER_STATS]; + unsigned int i, j; + + for (i = 0; i < nn->max_r_vecs; i++) { + unsigned int start; + + do { + start = u64_stats_fetch_begin_irq(&nn->r_vecs[i].rx_sync); + data[0] = nn->r_vecs[i].rx_pkts; + tmp[0] = nn->r_vecs[i].hw_csum_rx_ok; + tmp[1] = nn->r_vecs[i].hw_csum_rx_inner_ok; + tmp[2] = nn->r_vecs[i].hw_csum_rx_complete; + tmp[3] = nn->r_vecs[i].hw_csum_rx_error; + tmp[4] = nn->r_vecs[i].rx_replace_buf_alloc_fail; + tmp[5] = nn->r_vecs[i].hw_tls_rx; + } while (u64_stats_fetch_retry_irq(&nn->r_vecs[i].rx_sync, start)); + + do { + start = u64_stats_fetch_begin_irq(&nn->r_vecs[i].tx_sync); + data[1] = nn->r_vecs[i].tx_pkts; + data[2] = nn->r_vecs[i].tx_busy; + tmp[6] = nn->r_vecs[i].hw_csum_tx; + tmp[7] = nn->r_vecs[i].hw_csum_tx_inner; + tmp[8] = nn->r_vecs[i].tx_gather; + tmp[9] = nn->r_vecs[i].tx_lso; + tmp[10] = nn->r_vecs[i].hw_tls_tx; + tmp[11] = nn->r_vecs[i].tls_tx_fallback; + tmp[12] = nn->r_vecs[i].tls_tx_no_fallback; + } while (u64_stats_fetch_retry_irq(&nn->r_vecs[i].tx_sync, start)); + + data += NN_RVEC_PER_Q_STATS; + + for (j = 0; j < NN_RVEC_GATHER_STATS; j++) + gathered_stats[j] += tmp[j]; + } + + for (j = 0; j < NN_RVEC_GATHER_STATS; j++) + *data++ = gathered_stats[j]; + + *data++ = atomic_read(&nn->ktls_no_space); + *data++ = atomic_read(&nn->ktls_rx_resync_req); + *data++ = atomic_read(&nn->ktls_rx_resync_ign); + *data++ = atomic_read(&nn->ktls_rx_resync_sent); + + return data; +} + +static unsigned int nfp_vnic_get_hw_stats_count(unsigned int num_vecs) +{ + return NN_ET_GLOBAL_STATS_LEN + num_vecs * 4; +} + +static u8 * +nfp_vnic_get_hw_stats_strings(u8 *data, unsigned int num_vecs, bool repr) +{ + int swap_off, i; + + BUILD_BUG_ON(NN_ET_GLOBAL_STATS_LEN < NN_ET_SWITCH_STATS_LEN * 2); + /* If repr is true first add SWITCH_STATS_LEN and then subtract it + * effectively swapping the RX and TX statistics (giving us the RX + * and TX from perspective of the switch). + */ + swap_off = repr * NN_ET_SWITCH_STATS_LEN; + + for (i = 0; i < NN_ET_SWITCH_STATS_LEN; i++) + ethtool_sprintf(&data, nfp_net_et_stats[i + swap_off].name); + + for (i = NN_ET_SWITCH_STATS_LEN; i < NN_ET_SWITCH_STATS_LEN * 2; i++) + ethtool_sprintf(&data, nfp_net_et_stats[i - swap_off].name); + + for (i = NN_ET_SWITCH_STATS_LEN * 2; i < NN_ET_GLOBAL_STATS_LEN; i++) + ethtool_sprintf(&data, nfp_net_et_stats[i].name); + + for (i = 0; i < num_vecs; i++) { + ethtool_sprintf(&data, "rxq_%u_pkts", i); + ethtool_sprintf(&data, "rxq_%u_bytes", i); + ethtool_sprintf(&data, "txq_%u_pkts", i); + ethtool_sprintf(&data, "txq_%u_bytes", i); + } + + return data; +} + +static u64 * +nfp_vnic_get_hw_stats(u64 *data, u8 __iomem *mem, unsigned int num_vecs) +{ + unsigned int i; + + for (i = 0; i < NN_ET_GLOBAL_STATS_LEN; i++) + *data++ = readq(mem + nfp_net_et_stats[i].off); + + for (i = 0; i < num_vecs; i++) { + *data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i)); + *data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i) + 8); + *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i)); + *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i) + 8); + } + + return data; +} + +static unsigned int nfp_vnic_get_tlv_stats_count(struct nfp_net *nn) +{ + return nn->tlv_caps.vnic_stats_cnt + nn->max_r_vecs * 4; +} + +static u8 *nfp_vnic_get_tlv_stats_strings(struct nfp_net *nn, u8 *data) +{ + unsigned int i, id; + u8 __iomem *mem; + u64 id_word = 0; + + mem = nn->dp.ctrl_bar + nn->tlv_caps.vnic_stats_off; + for (i = 0; i < nn->tlv_caps.vnic_stats_cnt; i++) { + if (!(i % 4)) + id_word = readq(mem + i * 2); + + id = (u16)id_word; + id_word >>= 16; + + if (id < ARRAY_SIZE(nfp_tlv_stat_names) && + nfp_tlv_stat_names[id][0]) { + memcpy(data, nfp_tlv_stat_names[id], ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } else { + ethtool_sprintf(&data, "dev_unknown_stat%u", id); + } + } + + for (i = 0; i < nn->max_r_vecs; i++) { + ethtool_sprintf(&data, "rxq_%u_pkts", i); + ethtool_sprintf(&data, "rxq_%u_bytes", i); + ethtool_sprintf(&data, "txq_%u_pkts", i); + ethtool_sprintf(&data, "txq_%u_bytes", i); + } + + return data; +} + +static u64 *nfp_vnic_get_tlv_stats(struct nfp_net *nn, u64 *data) +{ + u8 __iomem *mem; + unsigned int i; + + mem = nn->dp.ctrl_bar + nn->tlv_caps.vnic_stats_off; + mem += roundup(2 * nn->tlv_caps.vnic_stats_cnt, 8); + for (i = 0; i < nn->tlv_caps.vnic_stats_cnt; i++) + *data++ = readq(mem + i * 8); + + mem = nn->dp.ctrl_bar; + for (i = 0; i < nn->max_r_vecs; i++) { + *data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i)); + *data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i) + 8); + *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i)); + *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i) + 8); + } + + return data; +} + +static unsigned int nfp_mac_get_stats_count(struct net_device *netdev) +{ + struct nfp_port *port; + + port = nfp_port_from_netdev(netdev); + if (!__nfp_port_get_eth_port(port) || !port->eth_stats) + return 0; + + return ARRAY_SIZE(nfp_mac_et_stats); +} + +static u8 *nfp_mac_get_stats_strings(struct net_device *netdev, u8 *data) +{ + struct nfp_port *port; + unsigned int i; + + port = nfp_port_from_netdev(netdev); + if (!__nfp_port_get_eth_port(port) || !port->eth_stats) + return data; + + for (i = 0; i < ARRAY_SIZE(nfp_mac_et_stats); i++) + ethtool_sprintf(&data, "mac.%s", nfp_mac_et_stats[i].name); + + return data; +} + +static u64 *nfp_mac_get_stats(struct net_device *netdev, u64 *data) +{ + struct nfp_port *port; + unsigned int i; + + port = nfp_port_from_netdev(netdev); + if (!__nfp_port_get_eth_port(port) || !port->eth_stats) + return data; + + for (i = 0; i < ARRAY_SIZE(nfp_mac_et_stats); i++) + *data++ = readq(port->eth_stats + nfp_mac_et_stats[i].off); + + return data; +} + +static void nfp_net_get_strings(struct net_device *netdev, + u32 stringset, u8 *data) +{ + struct nfp_net *nn = netdev_priv(netdev); + + switch (stringset) { + case ETH_SS_STATS: + data = nfp_vnic_get_sw_stats_strings(netdev, data); + if (!nn->tlv_caps.vnic_stats_off) + data = nfp_vnic_get_hw_stats_strings(data, + nn->max_r_vecs, + false); + else + data = nfp_vnic_get_tlv_stats_strings(nn, data); + data = nfp_mac_get_stats_strings(netdev, data); + data = nfp_app_port_get_stats_strings(nn->port, data); + break; + case ETH_SS_TEST: + nfp_get_self_test_strings(netdev, data); + break; + } +} + +static void +nfp_net_get_stats(struct net_device *netdev, struct ethtool_stats *stats, + u64 *data) +{ + struct nfp_net *nn = netdev_priv(netdev); + + data = nfp_vnic_get_sw_stats(netdev, data); + if (!nn->tlv_caps.vnic_stats_off) + data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar, + nn->max_r_vecs); + else + data = nfp_vnic_get_tlv_stats(nn, data); + data = nfp_mac_get_stats(netdev, data); + data = nfp_app_port_get_stats(nn->port, data); +} + +static int nfp_net_get_sset_count(struct net_device *netdev, int sset) +{ + struct nfp_net *nn = netdev_priv(netdev); + unsigned int cnt; + + switch (sset) { + case ETH_SS_STATS: + cnt = nfp_vnic_get_sw_stats_count(netdev); + if (!nn->tlv_caps.vnic_stats_off) + cnt += nfp_vnic_get_hw_stats_count(nn->max_r_vecs); + else + cnt += nfp_vnic_get_tlv_stats_count(nn); + cnt += nfp_mac_get_stats_count(netdev); + cnt += nfp_app_port_get_stats_count(nn->port); + return cnt; + case ETH_SS_TEST: + return nfp_get_self_test_count(netdev); + default: + return -EOPNOTSUPP; + } +} + +static void nfp_port_get_strings(struct net_device *netdev, + u32 stringset, u8 *data) +{ + struct nfp_port *port = nfp_port_from_netdev(netdev); + + switch (stringset) { + case ETH_SS_STATS: + if (nfp_port_is_vnic(port)) + data = nfp_vnic_get_hw_stats_strings(data, 0, true); + else + data = nfp_mac_get_stats_strings(netdev, data); + data = nfp_app_port_get_stats_strings(port, data); + break; + case ETH_SS_TEST: + nfp_get_self_test_strings(netdev, data); + break; + } +} + +static void +nfp_port_get_stats(struct net_device *netdev, struct ethtool_stats *stats, + u64 *data) +{ + struct nfp_port *port = nfp_port_from_netdev(netdev); + + if (nfp_port_is_vnic(port)) + data = nfp_vnic_get_hw_stats(data, port->vnic, 0); + else + data = nfp_mac_get_stats(netdev, data); + data = nfp_app_port_get_stats(port, data); +} + +static int nfp_port_get_sset_count(struct net_device *netdev, int sset) +{ + struct nfp_port *port = nfp_port_from_netdev(netdev); + unsigned int count; + + switch (sset) { + case ETH_SS_STATS: + if (nfp_port_is_vnic(port)) + count = nfp_vnic_get_hw_stats_count(0); + else + count = nfp_mac_get_stats_count(netdev); + count += nfp_app_port_get_stats_count(port); + return count; + case ETH_SS_TEST: + return nfp_get_self_test_count(netdev); + default: + return -EOPNOTSUPP; + } +} + +static int nfp_port_fec_ethtool_to_nsp(u32 fec) +{ + switch (fec) { + case ETHTOOL_FEC_AUTO: + return NFP_FEC_AUTO_BIT; + case ETHTOOL_FEC_OFF: + return NFP_FEC_DISABLED_BIT; + case ETHTOOL_FEC_RS: + return NFP_FEC_REED_SOLOMON_BIT; + case ETHTOOL_FEC_BASER: + return NFP_FEC_BASER_BIT; + default: + /* NSP only supports a single mode at a time */ + return -EOPNOTSUPP; + } +} + +static u32 nfp_port_fec_nsp_to_ethtool(u32 fec) +{ + u32 result = 0; + + if (fec & NFP_FEC_AUTO) + result |= ETHTOOL_FEC_AUTO; + if (fec & NFP_FEC_BASER) + result |= ETHTOOL_FEC_BASER; + if (fec & NFP_FEC_REED_SOLOMON) + result |= ETHTOOL_FEC_RS; + if (fec & NFP_FEC_DISABLED) + result |= ETHTOOL_FEC_OFF; + + return result ?: ETHTOOL_FEC_NONE; +} + +static int +nfp_port_get_fecparam(struct net_device *netdev, + struct ethtool_fecparam *param) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + + param->active_fec = ETHTOOL_FEC_NONE; + param->fec = ETHTOOL_FEC_NONE; + + port = nfp_port_from_netdev(netdev); + eth_port = nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + if (!nfp_eth_can_support_fec(eth_port)) + return 0; + + param->fec = nfp_port_fec_nsp_to_ethtool(eth_port->fec_modes_supported); + param->active_fec = nfp_port_fec_nsp_to_ethtool(BIT(eth_port->act_fec)); + + return 0; +} + +static int +nfp_port_set_fecparam(struct net_device *netdev, + struct ethtool_fecparam *param) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + int err, fec; + + port = nfp_port_from_netdev(netdev); + eth_port = nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + if (!nfp_eth_can_support_fec(eth_port)) + return -EOPNOTSUPP; + + fec = nfp_port_fec_ethtool_to_nsp(param->fec); + if (fec < 0) + return fec; + + err = nfp_eth_set_fec(port->app->cpp, eth_port->index, fec); + if (!err) + /* Only refresh if we did something */ + nfp_net_refresh_port_table(port); + + return err < 0 ? err : 0; +} + +/* RX network flow classification (RSS, filters, etc) + */ +static u32 ethtool_flow_to_nfp_flag(u32 flow_type) +{ + static const u32 xlate_ethtool_to_nfp[IPV6_FLOW + 1] = { + [TCP_V4_FLOW] = NFP_NET_CFG_RSS_IPV4_TCP, + [TCP_V6_FLOW] = NFP_NET_CFG_RSS_IPV6_TCP, + [UDP_V4_FLOW] = NFP_NET_CFG_RSS_IPV4_UDP, + [UDP_V6_FLOW] = NFP_NET_CFG_RSS_IPV6_UDP, + [IPV4_FLOW] = NFP_NET_CFG_RSS_IPV4, + [IPV6_FLOW] = NFP_NET_CFG_RSS_IPV6, + }; + + if (flow_type >= ARRAY_SIZE(xlate_ethtool_to_nfp)) + return 0; + + return xlate_ethtool_to_nfp[flow_type]; +} + +static int nfp_net_get_rss_hash_opts(struct nfp_net *nn, + struct ethtool_rxnfc *cmd) +{ + u32 nfp_rss_flag; + + cmd->data = 0; + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS_ANY)) + return -EOPNOTSUPP; + + nfp_rss_flag = ethtool_flow_to_nfp_flag(cmd->flow_type); + if (!nfp_rss_flag) + return -EINVAL; + + cmd->data |= RXH_IP_SRC | RXH_IP_DST; + if (nn->rss_cfg & nfp_rss_flag) + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + + return 0; +} + +static int nfp_net_get_rxnfc(struct net_device *netdev, + struct ethtool_rxnfc *cmd, u32 *rule_locs) +{ + struct nfp_net *nn = netdev_priv(netdev); + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = nn->dp.num_rx_rings; + return 0; + case ETHTOOL_GRXFH: + return nfp_net_get_rss_hash_opts(nn, cmd); + default: + return -EOPNOTSUPP; + } +} + +static int nfp_net_set_rss_hash_opt(struct nfp_net *nn, + struct ethtool_rxnfc *nfc) +{ + u32 new_rss_cfg = nn->rss_cfg; + u32 nfp_rss_flag; + int err; + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS_ANY)) + return -EOPNOTSUPP; + + /* RSS only supports IP SA/DA and L4 src/dst ports */ + if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EINVAL; + + /* We need at least the IP SA/DA fields for hashing */ + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + + nfp_rss_flag = ethtool_flow_to_nfp_flag(nfc->flow_type); + if (!nfp_rss_flag) + return -EINVAL; + + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + new_rss_cfg &= ~nfp_rss_flag; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + new_rss_cfg |= nfp_rss_flag; + break; + default: + return -EINVAL; + } + + new_rss_cfg |= FIELD_PREP(NFP_NET_CFG_RSS_HFUNC, nn->rss_hfunc); + new_rss_cfg |= NFP_NET_CFG_RSS_MASK; + + if (new_rss_cfg == nn->rss_cfg) + return 0; + + writel(new_rss_cfg, nn->dp.ctrl_bar + NFP_NET_CFG_RSS_CTRL); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RSS); + if (err) + return err; + + nn->rss_cfg = new_rss_cfg; + + nn_dbg(nn, "Changed RSS config to 0x%x\n", nn->rss_cfg); + return 0; +} + +static int nfp_net_set_rxnfc(struct net_device *netdev, + struct ethtool_rxnfc *cmd) +{ + struct nfp_net *nn = netdev_priv(netdev); + + switch (cmd->cmd) { + case ETHTOOL_SRXFH: + return nfp_net_set_rss_hash_opt(nn, cmd); + default: + return -EOPNOTSUPP; + } +} + +static u32 nfp_net_get_rxfh_indir_size(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS_ANY)) + return 0; + + return ARRAY_SIZE(nn->rss_itbl); +} + +static u32 nfp_net_get_rxfh_key_size(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS_ANY)) + return -EOPNOTSUPP; + + return nfp_net_rss_key_sz(nn); +} + +static int nfp_net_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct nfp_net *nn = netdev_priv(netdev); + int i; + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS_ANY)) + return -EOPNOTSUPP; + + if (indir) + for (i = 0; i < ARRAY_SIZE(nn->rss_itbl); i++) + indir[i] = nn->rss_itbl[i]; + if (key) + memcpy(key, nn->rss_key, nfp_net_rss_key_sz(nn)); + if (hfunc) { + *hfunc = nn->rss_hfunc; + if (*hfunc >= 1 << ETH_RSS_HASH_FUNCS_COUNT) + *hfunc = ETH_RSS_HASH_UNKNOWN; + } + + return 0; +} + +static int nfp_net_set_rxfh(struct net_device *netdev, + const u32 *indir, const u8 *key, + const u8 hfunc) +{ + struct nfp_net *nn = netdev_priv(netdev); + int i; + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS_ANY) || + !(hfunc == ETH_RSS_HASH_NO_CHANGE || hfunc == nn->rss_hfunc)) + return -EOPNOTSUPP; + + if (!key && !indir) + return 0; + + if (key) { + memcpy(nn->rss_key, key, nfp_net_rss_key_sz(nn)); + nfp_net_rss_write_key(nn); + } + if (indir) { + for (i = 0; i < ARRAY_SIZE(nn->rss_itbl); i++) + nn->rss_itbl[i] = indir[i]; + + nfp_net_rss_write_itbl(nn); + } + + return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RSS); +} + +/* Dump BAR registers + */ +static int nfp_net_get_regs_len(struct net_device *netdev) +{ + return NFP_NET_CFG_BAR_SZ; +} + +static void nfp_net_get_regs(struct net_device *netdev, + struct ethtool_regs *regs, void *p) +{ + struct nfp_net *nn = netdev_priv(netdev); + u32 *regs_buf = p; + int i; + + regs->version = nn_readl(nn, NFP_NET_CFG_VERSION); + + for (i = 0; i < NFP_NET_CFG_BAR_SZ / sizeof(u32); i++) + regs_buf[i] = readl(nn->dp.ctrl_bar + (i * sizeof(u32))); +} + +static int nfp_net_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (!(nn->cap & NFP_NET_CFG_CTRL_IRQMOD)) + return -EINVAL; + + ec->use_adaptive_rx_coalesce = nn->rx_coalesce_adapt_on; + ec->use_adaptive_tx_coalesce = nn->tx_coalesce_adapt_on; + + ec->rx_coalesce_usecs = nn->rx_coalesce_usecs; + ec->rx_max_coalesced_frames = nn->rx_coalesce_max_frames; + ec->tx_coalesce_usecs = nn->tx_coalesce_usecs; + ec->tx_max_coalesced_frames = nn->tx_coalesce_max_frames; + + return 0; +} + +/* Other debug dumps + */ +static int +nfp_dump_nsp_diag(struct nfp_app *app, struct ethtool_dump *dump, void *buffer) +{ + struct nfp_resource *res; + int ret; + + if (!app) + return -EOPNOTSUPP; + + dump->version = 1; + dump->flag = NFP_DUMP_NSP_DIAG; + + res = nfp_resource_acquire(app->cpp, NFP_RESOURCE_NSP_DIAG); + if (IS_ERR(res)) + return PTR_ERR(res); + + if (buffer) { + if (dump->len != nfp_resource_size(res)) { + ret = -EINVAL; + goto exit_release; + } + + ret = nfp_cpp_read(app->cpp, nfp_resource_cpp_id(res), + nfp_resource_address(res), + buffer, dump->len); + if (ret != dump->len) + ret = ret < 0 ? ret : -EIO; + else + ret = 0; + } else { + dump->len = nfp_resource_size(res); + ret = 0; + } +exit_release: + nfp_resource_release(res); + + return ret; +} + +/* Set the dump flag/level. Calculate the dump length for flag > 0 only (new TLV + * based dumps), since flag 0 (default) calculates the length in + * nfp_app_get_dump_flag(), and we need to support triggering a level 0 dump + * without setting the flag first, for backward compatibility. + */ +static int nfp_app_set_dump(struct net_device *netdev, struct ethtool_dump *val) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + s64 len; + + if (!app) + return -EOPNOTSUPP; + + if (val->flag == NFP_DUMP_NSP_DIAG) { + app->pf->dump_flag = val->flag; + return 0; + } + + if (!app->pf->dumpspec) + return -EOPNOTSUPP; + + len = nfp_net_dump_calculate_size(app->pf, app->pf->dumpspec, + val->flag); + if (len < 0) + return len; + + app->pf->dump_flag = val->flag; + app->pf->dump_len = len; + + return 0; +} + +static int +nfp_app_get_dump_flag(struct net_device *netdev, struct ethtool_dump *dump) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + + if (!app) + return -EOPNOTSUPP; + + if (app->pf->dump_flag == NFP_DUMP_NSP_DIAG) + return nfp_dump_nsp_diag(app, dump, NULL); + + dump->flag = app->pf->dump_flag; + dump->len = app->pf->dump_len; + + return 0; +} + +static int +nfp_app_get_dump_data(struct net_device *netdev, struct ethtool_dump *dump, + void *buffer) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + + if (!app) + return -EOPNOTSUPP; + + if (app->pf->dump_flag == NFP_DUMP_NSP_DIAG) + return nfp_dump_nsp_diag(app, dump, buffer); + + dump->flag = app->pf->dump_flag; + dump->len = app->pf->dump_len; + + return nfp_net_dump_populate_buffer(app->pf, app->pf->dumpspec, dump, + buffer); +} + +static int +nfp_port_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + unsigned int read_len; + struct nfp_nsp *nsp; + int err = 0; + u8 data; + + port = nfp_port_from_netdev(netdev); + if (!port) + return -EOPNOTSUPP; + + /* update port state to get latest interface */ + set_bit(NFP_PORT_CHANGED, &port->flags); + eth_port = nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + nsp = nfp_nsp_open(port->app->cpp); + if (IS_ERR(nsp)) { + err = PTR_ERR(nsp); + netdev_err(netdev, "Failed to access the NSP: %d\n", err); + return err; + } + + if (!nfp_nsp_has_read_module_eeprom(nsp)) { + netdev_info(netdev, "reading module EEPROM not supported. Please update flash\n"); + err = -EOPNOTSUPP; + goto exit_close_nsp; + } + + switch (eth_port->interface) { + case NFP_INTERFACE_SFP: + case NFP_INTERFACE_SFP28: + err = nfp_nsp_read_module_eeprom(nsp, eth_port->eth_index, + SFP_SFF8472_COMPLIANCE, &data, + 1, &read_len); + if (err < 0) + goto exit_close_nsp; + + if (!data) { + modinfo->type = ETH_MODULE_SFF_8079; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + } + break; + case NFP_INTERFACE_QSFP: + err = nfp_nsp_read_module_eeprom(nsp, eth_port->eth_index, + SFP_SFF_REV_COMPLIANCE, &data, + 1, &read_len); + if (err < 0) + goto exit_close_nsp; + + if (data < 0x3) { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN; + } + break; + case NFP_INTERFACE_QSFP28: + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN; + break; + default: + netdev_err(netdev, "Unsupported module 0x%x detected\n", + eth_port->interface); + err = -EINVAL; + } + +exit_close_nsp: + nfp_nsp_close(nsp); + return err; +} + +static int +nfp_port_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *eeprom, u8 *data) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + struct nfp_nsp *nsp; + int err; + + port = nfp_port_from_netdev(netdev); + eth_port = __nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + nsp = nfp_nsp_open(port->app->cpp); + if (IS_ERR(nsp)) { + err = PTR_ERR(nsp); + netdev_err(netdev, "Failed to access the NSP: %d\n", err); + return err; + } + + if (!nfp_nsp_has_read_module_eeprom(nsp)) { + netdev_info(netdev, "reading module EEPROM not supported. Please update flash\n"); + err = -EOPNOTSUPP; + goto exit_close_nsp; + } + + err = nfp_nsp_read_module_eeprom(nsp, eth_port->eth_index, + eeprom->offset, data, eeprom->len, + &eeprom->len); + if (err < 0) { + if (eeprom->len) { + netdev_warn(netdev, + "Incomplete read from module EEPROM: %d\n", + err); + err = 0; + } else { + netdev_err(netdev, + "Reading from module EEPROM failed: %d\n", + err); + } + } + +exit_close_nsp: + nfp_nsp_close(nsp); + return err; +} + +static int nfp_net_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct nfp_net *nn = netdev_priv(netdev); + unsigned int factor; + + /* Compute factor used to convert coalesce '_usecs' parameters to + * ME timestamp ticks. There are 16 ME clock cycles for each timestamp + * count. + */ + factor = nn->tlv_caps.me_freq_mhz / 16; + + /* Each pair of (usecs, max_frames) fields specifies that interrupts + * should be coalesced until + * (usecs > 0 && time_since_first_completion >= usecs) || + * (max_frames > 0 && completed_frames >= max_frames) + * + * It is illegal to set both usecs and max_frames to zero as this would + * cause interrupts to never be generated. To disable coalescing, set + * usecs = 0 and max_frames = 1. + * + * Some implementations ignore the value of max_frames and use the + * condition time_since_first_completion >= usecs + */ + + if (!(nn->cap & NFP_NET_CFG_CTRL_IRQMOD)) + return -EINVAL; + + /* ensure valid configuration */ + if (!ec->rx_coalesce_usecs && !ec->rx_max_coalesced_frames) + return -EINVAL; + + if (!ec->tx_coalesce_usecs && !ec->tx_max_coalesced_frames) + return -EINVAL; + + if (nfp_net_coalesce_para_check(ec->rx_coalesce_usecs * factor, + ec->rx_max_coalesced_frames)) + return -EINVAL; + + if (nfp_net_coalesce_para_check(ec->tx_coalesce_usecs * factor, + ec->tx_max_coalesced_frames)) + return -EINVAL; + + /* configuration is valid */ + nn->rx_coalesce_adapt_on = !!ec->use_adaptive_rx_coalesce; + nn->tx_coalesce_adapt_on = !!ec->use_adaptive_tx_coalesce; + + nn->rx_coalesce_usecs = ec->rx_coalesce_usecs; + nn->rx_coalesce_max_frames = ec->rx_max_coalesced_frames; + nn->tx_coalesce_usecs = ec->tx_coalesce_usecs; + nn->tx_coalesce_max_frames = ec->tx_max_coalesced_frames; + + /* write configuration to device */ + nfp_net_coalesce_write_cfg(nn); + return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD); +} + +static void nfp_net_get_channels(struct net_device *netdev, + struct ethtool_channels *channel) +{ + struct nfp_net *nn = netdev_priv(netdev); + unsigned int num_tx_rings; + + num_tx_rings = nn->dp.num_tx_rings; + if (nn->dp.xdp_prog) + num_tx_rings -= nn->dp.num_rx_rings; + + channel->max_rx = min(nn->max_rx_rings, nn->max_r_vecs); + channel->max_tx = min(nn->max_tx_rings, nn->max_r_vecs); + channel->max_combined = min(channel->max_rx, channel->max_tx); + channel->max_other = NFP_NET_NON_Q_VECTORS; + channel->combined_count = min(nn->dp.num_rx_rings, num_tx_rings); + channel->rx_count = nn->dp.num_rx_rings - channel->combined_count; + channel->tx_count = num_tx_rings - channel->combined_count; + channel->other_count = NFP_NET_NON_Q_VECTORS; +} + +static int nfp_net_set_num_rings(struct nfp_net *nn, unsigned int total_rx, + unsigned int total_tx) +{ + struct nfp_net_dp *dp; + + dp = nfp_net_clone_dp(nn); + if (!dp) + return -ENOMEM; + + dp->num_rx_rings = total_rx; + dp->num_tx_rings = total_tx; + /* nfp_net_check_config() will catch num_tx_rings > nn->max_tx_rings */ + if (dp->xdp_prog) + dp->num_tx_rings += total_rx; + + return nfp_net_ring_reconfig(nn, dp, NULL); +} + +static int nfp_net_set_channels(struct net_device *netdev, + struct ethtool_channels *channel) +{ + struct nfp_net *nn = netdev_priv(netdev); + unsigned int total_rx, total_tx; + + /* Reject unsupported */ + if (channel->other_count != NFP_NET_NON_Q_VECTORS || + (channel->rx_count && channel->tx_count)) + return -EINVAL; + + total_rx = channel->combined_count + channel->rx_count; + total_tx = channel->combined_count + channel->tx_count; + + if (total_rx > min(nn->max_rx_rings, nn->max_r_vecs) || + total_tx > min(nn->max_tx_rings, nn->max_r_vecs)) + return -EINVAL; + + return nfp_net_set_num_rings(nn, total_rx, total_tx); +} + +static void nfp_port_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + + port = nfp_port_from_netdev(netdev); + eth_port = nfp_port_get_eth_port(port); + if (!eth_port) + return; + + /* Currently pause frame support is fixed */ + pause->autoneg = AUTONEG_DISABLE; + pause->rx_pause = 1; + pause->tx_pause = 1; +} + +static int nfp_net_set_phys_id(struct net_device *netdev, + enum ethtool_phys_id_state state) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + int err; + + port = nfp_port_from_netdev(netdev); + eth_port = __nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + /* Control LED to blink */ + err = nfp_eth_set_idmode(port->app->cpp, eth_port->index, 1); + break; + + case ETHTOOL_ID_INACTIVE: + /* Control LED to normal mode */ + err = nfp_eth_set_idmode(port->app->cpp, eth_port->index, 0); + break; + + case ETHTOOL_ID_ON: + case ETHTOOL_ID_OFF: + default: + return -EOPNOTSUPP; + } + + return err; +} + +#define NFP_EEPROM_LEN ETH_ALEN + +static int +nfp_net_get_eeprom_len(struct net_device *netdev) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + + port = nfp_port_from_netdev(netdev); + eth_port = __nfp_port_get_eth_port(port); + if (!eth_port) + return 0; + + return NFP_EEPROM_LEN; +} + +static int +nfp_net_get_nsp_hwindex(struct net_device *netdev, + struct nfp_nsp **nspptr, + u32 *index) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + struct nfp_nsp *nsp; + int err; + + port = nfp_port_from_netdev(netdev); + eth_port = __nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + nsp = nfp_nsp_open(port->app->cpp); + if (IS_ERR(nsp)) { + err = PTR_ERR(nsp); + netdev_err(netdev, "Failed to access the NSP: %d\n", err); + return err; + } + + if (!nfp_nsp_has_hwinfo_lookup(nsp)) { + netdev_err(netdev, "NSP doesn't support PF MAC generation\n"); + nfp_nsp_close(nsp); + return -EOPNOTSUPP; + } + + *nspptr = nsp; + *index = eth_port->eth_index; + + return 0; +} + +static int +nfp_net_get_port_mac_by_hwinfo(struct net_device *netdev, + u8 *mac_addr) +{ + char hwinfo[32] = {}; + struct nfp_nsp *nsp; + u32 index; + int err; + + err = nfp_net_get_nsp_hwindex(netdev, &nsp, &index); + if (err) + return err; + + snprintf(hwinfo, sizeof(hwinfo), "eth%u.mac", index); + err = nfp_nsp_hwinfo_lookup(nsp, hwinfo, sizeof(hwinfo)); + nfp_nsp_close(nsp); + if (err) { + netdev_err(netdev, "Reading persistent MAC address failed: %d\n", + err); + return -EOPNOTSUPP; + } + + if (sscanf(hwinfo, "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx", + &mac_addr[0], &mac_addr[1], &mac_addr[2], + &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6) { + netdev_err(netdev, "Can't parse persistent MAC address (%s)\n", + hwinfo); + return -EOPNOTSUPP; + } + + return 0; +} + +static int +nfp_net_set_port_mac_by_hwinfo(struct net_device *netdev, + u8 *mac_addr) +{ + char hwinfo[32] = {}; + struct nfp_nsp *nsp; + u32 index; + int err; + + err = nfp_net_get_nsp_hwindex(netdev, &nsp, &index); + if (err) + return err; + + snprintf(hwinfo, sizeof(hwinfo), + "eth%u.mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx", + index, mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3], + mac_addr[4], mac_addr[5]); + + err = nfp_nsp_hwinfo_set(nsp, hwinfo, sizeof(hwinfo)); + nfp_nsp_close(nsp); + if (err) { + netdev_err(netdev, "HWinfo set failed: %d, hwinfo: %s\n", + err, hwinfo); + return -EOPNOTSUPP; + } + + return 0; +} + +static int +nfp_net_get_eeprom(struct net_device *netdev, + struct ethtool_eeprom *eeprom, u8 *bytes) +{ + struct nfp_net *nn = netdev_priv(netdev); + u8 buf[NFP_EEPROM_LEN] = {}; + + if (eeprom->len == 0) + return -EINVAL; + + if (nfp_net_get_port_mac_by_hwinfo(netdev, buf)) + return -EOPNOTSUPP; + + eeprom->magic = nn->pdev->vendor | (nn->pdev->device << 16); + memcpy(bytes, buf + eeprom->offset, eeprom->len); + + return 0; +} + +static int +nfp_net_set_eeprom(struct net_device *netdev, + struct ethtool_eeprom *eeprom, u8 *bytes) +{ + struct nfp_net *nn = netdev_priv(netdev); + u8 buf[NFP_EEPROM_LEN] = {}; + + if (eeprom->len == 0) + return -EINVAL; + + if (eeprom->magic != (nn->pdev->vendor | nn->pdev->device << 16)) + return -EINVAL; + + if (nfp_net_get_port_mac_by_hwinfo(netdev, buf)) + return -EOPNOTSUPP; + + memcpy(buf + eeprom->offset, bytes, eeprom->len); + if (nfp_net_set_port_mac_by_hwinfo(netdev, buf)) + return -EOPNOTSUPP; + + return 0; +} + +static const struct ethtool_ops nfp_net_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE, + .get_drvinfo = nfp_net_get_drvinfo, + .nway_reset = nfp_net_nway_reset, + .get_link = ethtool_op_get_link, + .get_ringparam = nfp_net_get_ringparam, + .set_ringparam = nfp_net_set_ringparam, + .self_test = nfp_net_self_test, + .get_strings = nfp_net_get_strings, + .get_ethtool_stats = nfp_net_get_stats, + .get_sset_count = nfp_net_get_sset_count, + .get_rxnfc = nfp_net_get_rxnfc, + .set_rxnfc = nfp_net_set_rxnfc, + .get_rxfh_indir_size = nfp_net_get_rxfh_indir_size, + .get_rxfh_key_size = nfp_net_get_rxfh_key_size, + .get_rxfh = nfp_net_get_rxfh, + .set_rxfh = nfp_net_set_rxfh, + .get_regs_len = nfp_net_get_regs_len, + .get_regs = nfp_net_get_regs, + .set_dump = nfp_app_set_dump, + .get_dump_flag = nfp_app_get_dump_flag, + .get_dump_data = nfp_app_get_dump_data, + .get_eeprom_len = nfp_net_get_eeprom_len, + .get_eeprom = nfp_net_get_eeprom, + .set_eeprom = nfp_net_set_eeprom, + .get_module_info = nfp_port_get_module_info, + .get_module_eeprom = nfp_port_get_module_eeprom, + .get_coalesce = nfp_net_get_coalesce, + .set_coalesce = nfp_net_set_coalesce, + .get_channels = nfp_net_get_channels, + .set_channels = nfp_net_set_channels, + .get_link_ksettings = nfp_net_get_link_ksettings, + .set_link_ksettings = nfp_net_set_link_ksettings, + .get_fecparam = nfp_port_get_fecparam, + .set_fecparam = nfp_port_set_fecparam, + .get_pauseparam = nfp_port_get_pauseparam, + .set_phys_id = nfp_net_set_phys_id, +}; + +const struct ethtool_ops nfp_port_ethtool_ops = { + .get_drvinfo = nfp_app_get_drvinfo, + .nway_reset = nfp_net_nway_reset, + .get_link = ethtool_op_get_link, + .get_strings = nfp_port_get_strings, + .get_ethtool_stats = nfp_port_get_stats, + .self_test = nfp_net_self_test, + .get_sset_count = nfp_port_get_sset_count, + .set_dump = nfp_app_set_dump, + .get_dump_flag = nfp_app_get_dump_flag, + .get_dump_data = nfp_app_get_dump_data, + .get_module_info = nfp_port_get_module_info, + .get_module_eeprom = nfp_port_get_module_eeprom, + .get_link_ksettings = nfp_net_get_link_ksettings, + .set_link_ksettings = nfp_net_set_link_ksettings, + .get_fecparam = nfp_port_get_fecparam, + .set_fecparam = nfp_port_set_fecparam, + .get_pauseparam = nfp_port_get_pauseparam, + .set_phys_id = nfp_net_set_phys_id, +}; + +void nfp_net_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &nfp_net_ethtool_ops; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c new file mode 100644 index 000000000..3bae92dc8 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -0,0 +1,842 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +/* + * nfp_net_main.c + * Netronome network device driver: Main entry point + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Alejandro Lucero <alejandro.lucero@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + */ + +#include <linux/etherdevice.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/lockdep.h> +#include <linux/pci.h> +#include <linux/pci_regs.h> +#include <linux/msi.h> +#include <linux/random.h> +#include <linux/rtnetlink.h> + +#include "nfpcore/nfp.h" +#include "nfpcore/nfp_cpp.h" +#include "nfpcore/nfp_dev.h" +#include "nfpcore/nfp_nffw.h" +#include "nfpcore/nfp_nsp.h" +#include "nfpcore/nfp6000_pcie.h" +#include "nfp_app.h" +#include "nfp_net_ctrl.h" +#include "nfp_net_sriov.h" +#include "nfp_net.h" +#include "nfp_main.h" +#include "nfp_port.h" + +#define NFP_PF_CSR_SLICE_SIZE (32 * 1024) + +/** + * nfp_net_get_mac_addr() - Get the MAC address. + * @pf: NFP PF handle + * @netdev: net_device to set MAC address on + * @port: NFP port structure + * + * First try to get the MAC address from NSP ETH table. If that + * fails generate a random address. + */ +void +nfp_net_get_mac_addr(struct nfp_pf *pf, struct net_device *netdev, + struct nfp_port *port) +{ + struct nfp_eth_table_port *eth_port; + + eth_port = __nfp_port_get_eth_port(port); + if (!eth_port) { + eth_hw_addr_random(netdev); + return; + } + + eth_hw_addr_set(netdev, eth_port->mac_addr); + ether_addr_copy(netdev->perm_addr, eth_port->mac_addr); +} + +static struct nfp_eth_table_port * +nfp_net_find_port(struct nfp_eth_table *eth_tbl, unsigned int index) +{ + int i; + + for (i = 0; eth_tbl && i < eth_tbl->count; i++) + if (eth_tbl->ports[i].index == index) + return ð_tbl->ports[i]; + + return NULL; +} + +static int nfp_net_pf_get_num_ports(struct nfp_pf *pf) +{ + return nfp_pf_rtsym_read_optional(pf, "nfd_cfg_pf%u_num_ports", 1); +} + +static void nfp_net_pf_free_vnic(struct nfp_pf *pf, struct nfp_net *nn) +{ + if (nfp_net_is_data_vnic(nn)) + nfp_app_vnic_free(pf->app, nn); + nfp_port_free(nn->port); + list_del(&nn->vnic_list); + pf->num_vnics--; + nfp_net_free(nn); +} + +static void nfp_net_pf_free_vnics(struct nfp_pf *pf) +{ + struct nfp_net *nn, *next; + + list_for_each_entry_safe(nn, next, &pf->vnics, vnic_list) + if (nfp_net_is_data_vnic(nn)) + nfp_net_pf_free_vnic(pf, nn); +} + +static struct nfp_net * +nfp_net_pf_alloc_vnic(struct nfp_pf *pf, bool needs_netdev, + void __iomem *ctrl_bar, void __iomem *qc_bar, + int stride, unsigned int id) +{ + u32 tx_base, rx_base, n_tx_rings, n_rx_rings; + struct nfp_net *nn; + int err; + + tx_base = readl(ctrl_bar + NFP_NET_CFG_START_TXQ); + rx_base = readl(ctrl_bar + NFP_NET_CFG_START_RXQ); + n_tx_rings = readl(ctrl_bar + NFP_NET_CFG_MAX_TXRINGS); + n_rx_rings = readl(ctrl_bar + NFP_NET_CFG_MAX_RXRINGS); + + /* Allocate and initialise the vNIC */ + nn = nfp_net_alloc(pf->pdev, pf->dev_info, ctrl_bar, needs_netdev, + n_tx_rings, n_rx_rings); + if (IS_ERR(nn)) + return nn; + + nn->app = pf->app; + nn->tx_bar = qc_bar + tx_base * NFP_QCP_QUEUE_ADDR_SZ; + nn->rx_bar = qc_bar + rx_base * NFP_QCP_QUEUE_ADDR_SZ; + nn->dp.is_vf = 0; + nn->stride_rx = stride; + nn->stride_tx = stride; + + if (needs_netdev) { + err = nfp_app_vnic_alloc(pf->app, nn, id); + if (err) { + nfp_net_free(nn); + return ERR_PTR(err); + } + } + + pf->num_vnics++; + list_add_tail(&nn->vnic_list, &pf->vnics); + + return nn; +} + +static int +nfp_net_pf_init_vnic(struct nfp_pf *pf, struct nfp_net *nn, unsigned int id) +{ + int err; + + nn->id = id; + + if (nn->port) { + err = nfp_devlink_port_register(pf->app, nn->port); + if (err) + return err; + } + + err = nfp_net_init(nn); + if (err) + goto err_devlink_port_clean; + + nfp_net_debugfs_vnic_add(nn, pf->ddir); + + if (nn->port) + nfp_devlink_port_type_eth_set(nn->port); + + nfp_net_info(nn); + + if (nfp_net_is_data_vnic(nn)) { + err = nfp_app_vnic_init(pf->app, nn); + if (err) + goto err_devlink_port_type_clean; + } + + return 0; + +err_devlink_port_type_clean: + if (nn->port) + nfp_devlink_port_type_clear(nn->port); + nfp_net_debugfs_dir_clean(&nn->debugfs_dir); + nfp_net_clean(nn); +err_devlink_port_clean: + if (nn->port) + nfp_devlink_port_unregister(nn->port); + return err; +} + +static int +nfp_net_pf_alloc_vnics(struct nfp_pf *pf, void __iomem *ctrl_bar, + void __iomem *qc_bar, int stride) +{ + struct nfp_net *nn; + unsigned int i; + int err; + + for (i = 0; i < pf->max_data_vnics; i++) { + nn = nfp_net_pf_alloc_vnic(pf, true, ctrl_bar, qc_bar, + stride, i); + if (IS_ERR(nn)) { + err = PTR_ERR(nn); + goto err_free_prev; + } + + if (nn->port) + nn->port->link_cb = nfp_net_refresh_port_table; + + ctrl_bar += NFP_PF_CSR_SLICE_SIZE; + + /* Kill the vNIC if app init marked it as invalid */ + if (nn->port && nn->port->type == NFP_PORT_INVALID) + nfp_net_pf_free_vnic(pf, nn); + } + + if (list_empty(&pf->vnics)) + return -ENODEV; + + return 0; + +err_free_prev: + nfp_net_pf_free_vnics(pf); + return err; +} + +static void nfp_net_pf_clean_vnic(struct nfp_pf *pf, struct nfp_net *nn) +{ + if (nfp_net_is_data_vnic(nn)) + nfp_app_vnic_clean(pf->app, nn); + if (nn->port) + nfp_devlink_port_type_clear(nn->port); + nfp_net_debugfs_dir_clean(&nn->debugfs_dir); + nfp_net_clean(nn); + if (nn->port) + nfp_devlink_port_unregister(nn->port); +} + +static int nfp_net_pf_alloc_irqs(struct nfp_pf *pf) +{ + unsigned int wanted_irqs, num_irqs, vnics_left, irqs_left; + struct nfp_net *nn; + + /* Get MSI-X vectors */ + wanted_irqs = 0; + list_for_each_entry(nn, &pf->vnics, vnic_list) + wanted_irqs += NFP_NET_NON_Q_VECTORS + nn->dp.num_r_vecs; + pf->irq_entries = kcalloc(wanted_irqs, sizeof(*pf->irq_entries), + GFP_KERNEL); + if (!pf->irq_entries) + return -ENOMEM; + + num_irqs = nfp_net_irqs_alloc(pf->pdev, pf->irq_entries, + NFP_NET_MIN_VNIC_IRQS * pf->num_vnics, + wanted_irqs); + if (!num_irqs) { + nfp_warn(pf->cpp, "Unable to allocate MSI-X vectors\n"); + kfree(pf->irq_entries); + return -ENOMEM; + } + + /* Distribute IRQs to vNICs */ + irqs_left = num_irqs; + vnics_left = pf->num_vnics; + list_for_each_entry(nn, &pf->vnics, vnic_list) { + unsigned int n; + + n = min(NFP_NET_NON_Q_VECTORS + nn->dp.num_r_vecs, + DIV_ROUND_UP(irqs_left, vnics_left)); + nfp_net_irqs_assign(nn, &pf->irq_entries[num_irqs - irqs_left], + n); + irqs_left -= n; + vnics_left--; + } + + return 0; +} + +static void nfp_net_pf_free_irqs(struct nfp_pf *pf) +{ + nfp_net_irqs_disable(pf->pdev); + kfree(pf->irq_entries); +} + +static int nfp_net_pf_init_vnics(struct nfp_pf *pf) +{ + struct nfp_net *nn; + unsigned int id; + int err; + + /* Finish vNIC init and register */ + id = 0; + list_for_each_entry(nn, &pf->vnics, vnic_list) { + if (!nfp_net_is_data_vnic(nn)) + continue; + err = nfp_net_pf_init_vnic(pf, nn, id); + if (err) + goto err_prev_deinit; + + id++; + } + + return 0; + +err_prev_deinit: + list_for_each_entry_continue_reverse(nn, &pf->vnics, vnic_list) + if (nfp_net_is_data_vnic(nn)) + nfp_net_pf_clean_vnic(pf, nn); + return err; +} + +static int +nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride) +{ + struct devlink *devlink = priv_to_devlink(pf); + u8 __iomem *ctrl_bar; + int err; + + pf->app = nfp_app_alloc(pf, nfp_net_pf_get_app_id(pf)); + if (IS_ERR(pf->app)) + return PTR_ERR(pf->app); + + devl_lock(devlink); + err = nfp_app_init(pf->app); + devl_unlock(devlink); + if (err) + goto err_free; + + if (!nfp_app_needs_ctrl_vnic(pf->app)) + return 0; + + ctrl_bar = nfp_pf_map_rtsym(pf, "net.ctrl", "_pf%u_net_ctrl_bar", + NFP_PF_CSR_SLICE_SIZE, &pf->ctrl_vnic_bar); + if (IS_ERR(ctrl_bar)) { + nfp_err(pf->cpp, "Failed to find ctrl vNIC memory symbol\n"); + err = PTR_ERR(ctrl_bar); + goto err_app_clean; + } + + pf->ctrl_vnic = nfp_net_pf_alloc_vnic(pf, false, ctrl_bar, qc_bar, + stride, 0); + if (IS_ERR(pf->ctrl_vnic)) { + err = PTR_ERR(pf->ctrl_vnic); + goto err_unmap; + } + + return 0; + +err_unmap: + nfp_cpp_area_release_free(pf->ctrl_vnic_bar); +err_app_clean: + devl_lock(devlink); + nfp_app_clean(pf->app); + devl_unlock(devlink); +err_free: + nfp_app_free(pf->app); + pf->app = NULL; + return err; +} + +static void nfp_net_pf_app_clean(struct nfp_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + + if (pf->ctrl_vnic) { + nfp_net_pf_free_vnic(pf, pf->ctrl_vnic); + nfp_cpp_area_release_free(pf->ctrl_vnic_bar); + } + + devl_lock(devlink); + nfp_app_clean(pf->app); + devl_unlock(devlink); + + nfp_app_free(pf->app); + pf->app = NULL; +} + +static int nfp_net_pf_app_start_ctrl(struct nfp_pf *pf) +{ + int err; + + if (!pf->ctrl_vnic) + return 0; + err = nfp_net_pf_init_vnic(pf, pf->ctrl_vnic, 0); + if (err) + return err; + + err = nfp_ctrl_open(pf->ctrl_vnic); + if (err) + goto err_clean_ctrl; + + return 0; + +err_clean_ctrl: + nfp_net_pf_clean_vnic(pf, pf->ctrl_vnic); + return err; +} + +static void nfp_net_pf_app_stop_ctrl(struct nfp_pf *pf) +{ + if (!pf->ctrl_vnic) + return; + nfp_ctrl_close(pf->ctrl_vnic); + nfp_net_pf_clean_vnic(pf, pf->ctrl_vnic); +} + +static int nfp_net_pf_app_start(struct nfp_pf *pf) +{ + int err; + + err = nfp_net_pf_app_start_ctrl(pf); + if (err) + return err; + + err = nfp_app_start(pf->app, pf->ctrl_vnic); + if (err) + goto err_ctrl_stop; + + if (pf->num_vfs) { + err = nfp_app_sriov_enable(pf->app, pf->num_vfs); + if (err) + goto err_app_stop; + } + + return 0; + +err_app_stop: + nfp_app_stop(pf->app); +err_ctrl_stop: + nfp_net_pf_app_stop_ctrl(pf); + return err; +} + +static void nfp_net_pf_app_stop(struct nfp_pf *pf) +{ + if (pf->num_vfs) + nfp_app_sriov_disable(pf->app); + nfp_app_stop(pf->app); + nfp_net_pf_app_stop_ctrl(pf); +} + +static void nfp_net_pci_unmap_mem(struct nfp_pf *pf) +{ + if (pf->vfcfg_tbl2_area) + nfp_cpp_area_release_free(pf->vfcfg_tbl2_area); + if (pf->vf_cfg_bar) + nfp_cpp_area_release_free(pf->vf_cfg_bar); + if (pf->mac_stats_bar) + nfp_cpp_area_release_free(pf->mac_stats_bar); + nfp_cpp_area_release_free(pf->qc_area); + nfp_cpp_area_release_free(pf->data_vnic_bar); +} + +static int nfp_net_pci_map_mem(struct nfp_pf *pf) +{ + u32 min_size, cpp_id; + u8 __iomem *mem; + int err; + + min_size = pf->max_data_vnics * NFP_PF_CSR_SLICE_SIZE; + mem = nfp_pf_map_rtsym(pf, "net.bar0", "_pf%d_net_bar0", + min_size, &pf->data_vnic_bar); + if (IS_ERR(mem)) { + nfp_err(pf->cpp, "Failed to find data vNIC memory symbol\n"); + return PTR_ERR(mem); + } + + if (pf->eth_tbl) { + min_size = NFP_MAC_STATS_SIZE * (pf->eth_tbl->max_index + 1); + pf->mac_stats_mem = nfp_rtsym_map(pf->rtbl, "_mac_stats", + "net.macstats", min_size, + &pf->mac_stats_bar); + if (IS_ERR(pf->mac_stats_mem)) { + if (PTR_ERR(pf->mac_stats_mem) != -ENOENT) { + err = PTR_ERR(pf->mac_stats_mem); + goto err_unmap_ctrl; + } + pf->mac_stats_mem = NULL; + } + } + + pf->vf_cfg_mem = nfp_pf_map_rtsym(pf, "net.vfcfg", "_pf%d_net_vf_bar", + NFP_NET_CFG_BAR_SZ * pf->limit_vfs, + &pf->vf_cfg_bar); + if (IS_ERR(pf->vf_cfg_mem)) { + if (PTR_ERR(pf->vf_cfg_mem) != -ENOENT) { + err = PTR_ERR(pf->vf_cfg_mem); + goto err_unmap_mac_stats; + } + pf->vf_cfg_mem = NULL; + } + + min_size = NFP_NET_VF_CFG_SZ * pf->limit_vfs + NFP_NET_VF_CFG_MB_SZ; + pf->vfcfg_tbl2 = nfp_pf_map_rtsym(pf, "net.vfcfg_tbl2", + "_pf%d_net_vf_cfg2", + min_size, &pf->vfcfg_tbl2_area); + if (IS_ERR(pf->vfcfg_tbl2)) { + if (PTR_ERR(pf->vfcfg_tbl2) != -ENOENT) { + err = PTR_ERR(pf->vfcfg_tbl2); + goto err_unmap_vf_cfg; + } + pf->vfcfg_tbl2 = NULL; + } + + cpp_id = NFP_CPP_ISLAND_ID(0, NFP_CPP_ACTION_RW, 0, 0); + mem = nfp_cpp_map_area(pf->cpp, "net.qc", cpp_id, + nfp_qcp_queue_offset(pf->dev_info, 0), + pf->dev_info->qc_area_sz, &pf->qc_area); + if (IS_ERR(mem)) { + nfp_err(pf->cpp, "Failed to map Queue Controller area.\n"); + err = PTR_ERR(mem); + goto err_unmap_vfcfg_tbl2; + } + + return 0; + +err_unmap_vfcfg_tbl2: + if (pf->vfcfg_tbl2_area) + nfp_cpp_area_release_free(pf->vfcfg_tbl2_area); +err_unmap_vf_cfg: + if (pf->vf_cfg_bar) + nfp_cpp_area_release_free(pf->vf_cfg_bar); +err_unmap_mac_stats: + if (pf->mac_stats_bar) + nfp_cpp_area_release_free(pf->mac_stats_bar); +err_unmap_ctrl: + nfp_cpp_area_release_free(pf->data_vnic_bar); + return err; +} + +static const unsigned int lr_to_speed[] = { + [NFP_NET_CFG_STS_LINK_RATE_UNSUPPORTED] = 0, + [NFP_NET_CFG_STS_LINK_RATE_UNKNOWN] = SPEED_UNKNOWN, + [NFP_NET_CFG_STS_LINK_RATE_1G] = SPEED_1000, + [NFP_NET_CFG_STS_LINK_RATE_10G] = SPEED_10000, + [NFP_NET_CFG_STS_LINK_RATE_25G] = SPEED_25000, + [NFP_NET_CFG_STS_LINK_RATE_40G] = SPEED_40000, + [NFP_NET_CFG_STS_LINK_RATE_50G] = SPEED_50000, + [NFP_NET_CFG_STS_LINK_RATE_100G] = SPEED_100000, +}; + +unsigned int nfp_net_lr2speed(unsigned int linkrate) +{ + if (linkrate < ARRAY_SIZE(lr_to_speed)) + return lr_to_speed[linkrate]; + + return SPEED_UNKNOWN; +} + +unsigned int nfp_net_speed2lr(unsigned int speed) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(lr_to_speed); i++) { + if (speed == lr_to_speed[i]) + return i; + } + + return NFP_NET_CFG_STS_LINK_RATE_UNKNOWN; +} + +static void nfp_net_notify_port_speed(struct nfp_port *port) +{ + struct net_device *netdev = port->netdev; + struct nfp_net *nn; + u16 sts; + + if (!nfp_netdev_is_nfp_net(netdev)) + return; + + nn = netdev_priv(netdev); + sts = nn_readw(nn, NFP_NET_CFG_STS); + + if (!(sts & NFP_NET_CFG_STS_LINK)) { + nn_writew(nn, NFP_NET_CFG_STS_NSP_LINK_RATE, NFP_NET_CFG_STS_LINK_RATE_UNKNOWN); + return; + } + + nn_writew(nn, NFP_NET_CFG_STS_NSP_LINK_RATE, nfp_net_speed2lr(port->eth_port->speed)); +} + +static int +nfp_net_eth_port_update(struct nfp_cpp *cpp, struct nfp_port *port, + struct nfp_eth_table *eth_table) +{ + struct nfp_eth_table_port *eth_port; + + ASSERT_RTNL(); + + eth_port = nfp_net_find_port(eth_table, port->eth_id); + if (!eth_port) { + set_bit(NFP_PORT_CHANGED, &port->flags); + nfp_warn(cpp, "Warning: port #%d not present after reconfig\n", + port->eth_id); + return -EIO; + } + if (eth_port->override_changed) { + nfp_warn(cpp, "Port #%d config changed, unregistering. Driver reload required before port will be operational again.\n", port->eth_id); + port->type = NFP_PORT_INVALID; + } + + memcpy(port->eth_port, eth_port, sizeof(*eth_port)); + nfp_net_notify_port_speed(port); + + return 0; +} + +int nfp_net_refresh_port_table_sync(struct nfp_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + struct nfp_eth_table *eth_table; + struct nfp_net *nn, *next; + struct nfp_port *port; + int err; + + devl_assert_locked(devlink); + + /* Check for nfp_net_pci_remove() racing against us */ + if (list_empty(&pf->vnics)) + return 0; + + /* Update state of all ports */ + rtnl_lock(); + list_for_each_entry(port, &pf->ports, port_list) + clear_bit(NFP_PORT_CHANGED, &port->flags); + + eth_table = nfp_eth_read_ports(pf->cpp); + if (!eth_table) { + list_for_each_entry(port, &pf->ports, port_list) + if (__nfp_port_get_eth_port(port)) + set_bit(NFP_PORT_CHANGED, &port->flags); + rtnl_unlock(); + nfp_err(pf->cpp, "Error refreshing port config!\n"); + return -EIO; + } + + list_for_each_entry(port, &pf->ports, port_list) + if (__nfp_port_get_eth_port(port)) + nfp_net_eth_port_update(pf->cpp, port, eth_table); + rtnl_unlock(); + + kfree(eth_table); + + /* Resync repr state. This may cause reprs to be removed. */ + err = nfp_reprs_resync_phys_ports(pf->app); + if (err) + return err; + + /* Shoot off the ports which became invalid */ + list_for_each_entry_safe(nn, next, &pf->vnics, vnic_list) { + if (!nn->port || nn->port->type != NFP_PORT_INVALID) + continue; + + nfp_net_pf_clean_vnic(pf, nn); + nfp_net_pf_free_vnic(pf, nn); + } + + return 0; +} + +static void nfp_net_refresh_vnics(struct work_struct *work) +{ + struct nfp_pf *pf = container_of(work, struct nfp_pf, + port_refresh_work); + struct devlink *devlink = priv_to_devlink(pf); + + devl_lock(devlink); + nfp_net_refresh_port_table_sync(pf); + devl_unlock(devlink); +} + +void nfp_net_refresh_port_table(struct nfp_port *port) +{ + struct nfp_pf *pf = port->app->pf; + + set_bit(NFP_PORT_CHANGED, &port->flags); + + queue_work(pf->wq, &pf->port_refresh_work); +} + +int nfp_net_refresh_eth_port(struct nfp_port *port) +{ + struct nfp_cpp *cpp = port->app->cpp; + struct nfp_eth_table *eth_table; + int ret; + + clear_bit(NFP_PORT_CHANGED, &port->flags); + + eth_table = nfp_eth_read_ports(cpp); + if (!eth_table) { + set_bit(NFP_PORT_CHANGED, &port->flags); + nfp_err(cpp, "Error refreshing port state table!\n"); + return -EIO; + } + + ret = nfp_net_eth_port_update(cpp, port, eth_table); + + kfree(eth_table); + + return ret; +} + +/* + * PCI device functions + */ +int nfp_net_pci_probe(struct nfp_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + struct nfp_net_fw_version fw_ver; + u8 __iomem *ctrl_bar, *qc_bar; + int stride; + int err; + + INIT_WORK(&pf->port_refresh_work, nfp_net_refresh_vnics); + + if (!pf->rtbl) { + nfp_err(pf->cpp, "No %s, giving up.\n", + pf->fw_loaded ? "symbol table" : "firmware found"); + return -EINVAL; + } + + pf->max_data_vnics = nfp_net_pf_get_num_ports(pf); + if ((int)pf->max_data_vnics < 0) + return pf->max_data_vnics; + + err = nfp_net_pci_map_mem(pf); + if (err) + return err; + + ctrl_bar = nfp_cpp_area_iomem(pf->data_vnic_bar); + qc_bar = nfp_cpp_area_iomem(pf->qc_area); + if (!ctrl_bar || !qc_bar) { + err = -EIO; + goto err_unmap; + } + + nfp_net_get_fw_version(&fw_ver, ctrl_bar); + if (fw_ver.extend & NFP_NET_CFG_VERSION_RESERVED_MASK || + fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) { + nfp_err(pf->cpp, "Unknown Firmware ABI %d.%d.%d.%d\n", + fw_ver.extend, fw_ver.class, + fw_ver.major, fw_ver.minor); + err = -EINVAL; + goto err_unmap; + } + + /* Determine stride */ + if (nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 1)) { + stride = 2; + nfp_warn(pf->cpp, "OBSOLETE Firmware detected - VF isolation not available\n"); + } else { + switch (fw_ver.major) { + case 1 ... 5: + stride = 4; + break; + default: + nfp_err(pf->cpp, "Unsupported Firmware ABI %d.%d.%d.%d\n", + fw_ver.extend, fw_ver.class, + fw_ver.major, fw_ver.minor); + err = -EINVAL; + goto err_unmap; + } + } + + err = nfp_net_pf_app_init(pf, qc_bar, stride); + if (err) + goto err_unmap; + + err = nfp_shared_buf_register(pf); + if (err) + goto err_devlink_unreg; + + err = nfp_devlink_params_register(pf); + if (err) + goto err_shared_buf_unreg; + + devl_lock(devlink); + pf->ddir = nfp_net_debugfs_device_add(pf->pdev); + + /* Allocate the vnics and do basic init */ + err = nfp_net_pf_alloc_vnics(pf, ctrl_bar, qc_bar, stride); + if (err) + goto err_clean_ddir; + + err = nfp_net_pf_alloc_irqs(pf); + if (err) + goto err_free_vnics; + + err = nfp_net_pf_app_start(pf); + if (err) + goto err_free_irqs; + + err = nfp_net_pf_init_vnics(pf); + if (err) + goto err_stop_app; + + devl_unlock(devlink); + devlink_register(devlink); + + return 0; + +err_stop_app: + nfp_net_pf_app_stop(pf); +err_free_irqs: + nfp_net_pf_free_irqs(pf); +err_free_vnics: + nfp_net_pf_free_vnics(pf); +err_clean_ddir: + nfp_net_debugfs_dir_clean(&pf->ddir); + devl_unlock(devlink); + nfp_devlink_params_unregister(pf); +err_shared_buf_unreg: + nfp_shared_buf_unregister(pf); +err_devlink_unreg: + cancel_work_sync(&pf->port_refresh_work); + nfp_net_pf_app_clean(pf); +err_unmap: + nfp_net_pci_unmap_mem(pf); + return err; +} + +void nfp_net_pci_remove(struct nfp_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + struct nfp_net *nn, *next; + + devlink_unregister(priv_to_devlink(pf)); + devl_lock(devlink); + list_for_each_entry_safe(nn, next, &pf->vnics, vnic_list) { + if (!nfp_net_is_data_vnic(nn)) + continue; + nfp_net_pf_clean_vnic(pf, nn); + nfp_net_pf_free_vnic(pf, nn); + } + + nfp_net_pf_app_stop(pf); + /* stop app first, to avoid double free of ctrl vNIC's ddir */ + nfp_net_debugfs_dir_clean(&pf->ddir); + + devl_unlock(devlink); + + nfp_devlink_params_unregister(pf); + nfp_shared_buf_unregister(pf); + + nfp_net_pf_free_irqs(pf); + nfp_net_pf_app_clean(pf); + nfp_net_pci_unmap_mem(pf); + + cancel_work_sync(&pf->port_refresh_work); +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c new file mode 100644 index 000000000..8b77582bd --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -0,0 +1,542 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <linux/etherdevice.h> +#include <linux/io-64-nonatomic-hi-lo.h> +#include <linux/lockdep.h> +#include <net/dst_metadata.h> + +#include "nfpcore/nfp_cpp.h" +#include "nfpcore/nfp_nsp.h" +#include "nfp_app.h" +#include "nfp_main.h" +#include "nfp_net.h" +#include "nfp_net_ctrl.h" +#include "nfp_net_repr.h" +#include "nfp_net_sriov.h" +#include "nfp_port.h" + +struct net_device * +nfp_repr_get_locked(struct nfp_app *app, struct nfp_reprs *set, unsigned int id) +{ + return rcu_dereference_protected(set->reprs[id], + nfp_app_is_locked(app)); +} + +static void +nfp_repr_inc_tx_stats(struct net_device *netdev, unsigned int len, + int tx_status) +{ + struct nfp_repr *repr = netdev_priv(netdev); + struct nfp_repr_pcpu_stats *stats; + + if (unlikely(tx_status != NET_XMIT_SUCCESS && + tx_status != NET_XMIT_CN)) { + this_cpu_inc(repr->stats->tx_drops); + return; + } + + stats = this_cpu_ptr(repr->stats); + u64_stats_update_begin(&stats->syncp); + stats->tx_packets++; + stats->tx_bytes += len; + u64_stats_update_end(&stats->syncp); +} + +void nfp_repr_inc_rx_stats(struct net_device *netdev, unsigned int len) +{ + struct nfp_repr *repr = netdev_priv(netdev); + struct nfp_repr_pcpu_stats *stats; + + stats = this_cpu_ptr(repr->stats); + u64_stats_update_begin(&stats->syncp); + stats->rx_packets++; + stats->rx_bytes += len; + u64_stats_update_end(&stats->syncp); +} + +static void +nfp_repr_phy_port_get_stats64(struct nfp_port *port, + struct rtnl_link_stats64 *stats) +{ + u8 __iomem *mem = port->eth_stats; + + stats->tx_packets = readq(mem + NFP_MAC_STATS_TX_FRAMES_TRANSMITTED_OK); + stats->tx_bytes = readq(mem + NFP_MAC_STATS_TX_OUT_OCTETS); + stats->tx_dropped = readq(mem + NFP_MAC_STATS_TX_OUT_ERRORS); + + stats->rx_packets = readq(mem + NFP_MAC_STATS_RX_FRAMES_RECEIVED_OK); + stats->rx_bytes = readq(mem + NFP_MAC_STATS_RX_IN_OCTETS); + stats->rx_dropped = readq(mem + NFP_MAC_STATS_RX_IN_ERRORS); +} + +static void +nfp_repr_vnic_get_stats64(struct nfp_port *port, + struct rtnl_link_stats64 *stats) +{ + /* TX and RX stats are flipped as we are returning the stats as seen + * at the switch port corresponding to the VF. + */ + stats->tx_packets = readq(port->vnic + NFP_NET_CFG_STATS_RX_FRAMES); + stats->tx_bytes = readq(port->vnic + NFP_NET_CFG_STATS_RX_OCTETS); + stats->tx_dropped = readq(port->vnic + NFP_NET_CFG_STATS_RX_DISCARDS); + + stats->rx_packets = readq(port->vnic + NFP_NET_CFG_STATS_TX_FRAMES); + stats->rx_bytes = readq(port->vnic + NFP_NET_CFG_STATS_TX_OCTETS); + stats->rx_dropped = readq(port->vnic + NFP_NET_CFG_STATS_TX_DISCARDS); +} + +static void +nfp_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) +{ + struct nfp_repr *repr = netdev_priv(netdev); + + if (WARN_ON(!repr->port)) + return; + + switch (repr->port->type) { + case NFP_PORT_PHYS_PORT: + if (!__nfp_port_get_eth_port(repr->port)) + break; + nfp_repr_phy_port_get_stats64(repr->port, stats); + break; + case NFP_PORT_PF_PORT: + case NFP_PORT_VF_PORT: + nfp_repr_vnic_get_stats64(repr->port, stats); + break; + default: + break; + } +} + +static bool +nfp_repr_has_offload_stats(const struct net_device *dev, int attr_id) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return true; + } + + return false; +} + +static int +nfp_repr_get_host_stats64(const struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct nfp_repr *repr = netdev_priv(netdev); + int i; + + for_each_possible_cpu(i) { + u64 tbytes, tpkts, tdrops, rbytes, rpkts; + struct nfp_repr_pcpu_stats *repr_stats; + unsigned int start; + + repr_stats = per_cpu_ptr(repr->stats, i); + do { + start = u64_stats_fetch_begin_irq(&repr_stats->syncp); + tbytes = repr_stats->tx_bytes; + tpkts = repr_stats->tx_packets; + tdrops = repr_stats->tx_drops; + rbytes = repr_stats->rx_bytes; + rpkts = repr_stats->rx_packets; + } while (u64_stats_fetch_retry_irq(&repr_stats->syncp, start)); + + stats->tx_bytes += tbytes; + stats->tx_packets += tpkts; + stats->tx_dropped += tdrops; + stats->rx_bytes += rbytes; + stats->rx_packets += rpkts; + } + + return 0; +} + +static int +nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev, + void *stats) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return nfp_repr_get_host_stats64(dev, stats); + } + + return -EINVAL; +} + +static int nfp_repr_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct nfp_repr *repr = netdev_priv(netdev); + int err; + + err = nfp_app_check_mtu(repr->app, netdev, new_mtu); + if (err) + return err; + + err = nfp_app_repr_change_mtu(repr->app, netdev, new_mtu); + if (err) + return err; + + netdev->mtu = new_mtu; + + return 0; +} + +static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct nfp_repr *repr = netdev_priv(netdev); + unsigned int len = skb->len; + int ret; + + skb_dst_drop(skb); + dst_hold((struct dst_entry *)repr->dst); + skb_dst_set(skb, (struct dst_entry *)repr->dst); + skb->dev = repr->dst->u.port_info.lower_dev; + + ret = dev_queue_xmit(skb); + nfp_repr_inc_tx_stats(netdev, len, ret); + + return NETDEV_TX_OK; +} + +static int nfp_repr_stop(struct net_device *netdev) +{ + struct nfp_repr *repr = netdev_priv(netdev); + int err; + + err = nfp_app_repr_stop(repr->app, repr); + if (err) + return err; + + nfp_port_configure(netdev, false); + return 0; +} + +static int nfp_repr_open(struct net_device *netdev) +{ + struct nfp_repr *repr = netdev_priv(netdev); + int err; + + err = nfp_port_configure(netdev, true); + if (err) + return err; + + err = nfp_app_repr_open(repr->app, repr); + if (err) + goto err_port_disable; + + return 0; + +err_port_disable: + nfp_port_configure(netdev, false); + return err; +} + +static netdev_features_t +nfp_repr_fix_features(struct net_device *netdev, netdev_features_t features) +{ + struct nfp_repr *repr = netdev_priv(netdev); + netdev_features_t old_features = features; + netdev_features_t lower_features; + struct net_device *lower_dev; + + lower_dev = repr->dst->u.port_info.lower_dev; + + lower_features = lower_dev->features; + if (lower_features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) + lower_features |= NETIF_F_HW_CSUM; + + features = netdev_intersect_features(features, lower_features); + features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_HW_TC); + features |= NETIF_F_LLTX; + + return features; +} + +const struct net_device_ops nfp_repr_netdev_ops = { + .ndo_init = nfp_app_ndo_init, + .ndo_uninit = nfp_app_ndo_uninit, + .ndo_open = nfp_repr_open, + .ndo_stop = nfp_repr_stop, + .ndo_start_xmit = nfp_repr_xmit, + .ndo_change_mtu = nfp_repr_change_mtu, + .ndo_get_stats64 = nfp_repr_get_stats64, + .ndo_has_offload_stats = nfp_repr_has_offload_stats, + .ndo_get_offload_stats = nfp_repr_get_offload_stats, + .ndo_get_phys_port_name = nfp_port_get_phys_port_name, + .ndo_setup_tc = nfp_port_setup_tc, + .ndo_set_vf_mac = nfp_app_set_vf_mac, + .ndo_set_vf_vlan = nfp_app_set_vf_vlan, + .ndo_set_vf_spoofchk = nfp_app_set_vf_spoofchk, + .ndo_set_vf_trust = nfp_app_set_vf_trust, + .ndo_get_vf_config = nfp_app_get_vf_config, + .ndo_set_vf_link_state = nfp_app_set_vf_link_state, + .ndo_fix_features = nfp_repr_fix_features, + .ndo_set_features = nfp_port_set_features, + .ndo_set_mac_address = eth_mac_addr, + .ndo_get_port_parent_id = nfp_port_get_port_parent_id, + .ndo_get_devlink_port = nfp_devlink_get_devlink_port, +}; + +void +nfp_repr_transfer_features(struct net_device *netdev, struct net_device *lower) +{ + struct nfp_repr *repr = netdev_priv(netdev); + + if (repr->dst->u.port_info.lower_dev != lower) + return; + + netif_inherit_tso_max(netdev, lower); + + netdev_update_features(netdev); +} + +static void nfp_repr_clean(struct nfp_repr *repr) +{ + unregister_netdev(repr->netdev); + nfp_app_repr_clean(repr->app, repr->netdev); + dst_release((struct dst_entry *)repr->dst); + nfp_port_free(repr->port); +} + +static struct lock_class_key nfp_repr_netdev_xmit_lock_key; + +static void nfp_repr_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, &nfp_repr_netdev_xmit_lock_key); +} + +static void nfp_repr_set_lockdep_class(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, nfp_repr_set_lockdep_class_one, NULL); +} + +int nfp_repr_init(struct nfp_app *app, struct net_device *netdev, + u32 cmsg_port_id, struct nfp_port *port, + struct net_device *pf_netdev) +{ + struct nfp_repr *repr = netdev_priv(netdev); + struct nfp_net *nn = netdev_priv(pf_netdev); + u32 repr_cap = nn->tlv_caps.repr_cap; + int err; + + nfp_repr_set_lockdep_class(netdev); + + repr->port = port; + repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, GFP_KERNEL); + if (!repr->dst) + return -ENOMEM; + repr->dst->u.port_info.port_id = cmsg_port_id; + repr->dst->u.port_info.lower_dev = pf_netdev; + + netdev->netdev_ops = &nfp_repr_netdev_ops; + netdev->ethtool_ops = &nfp_port_ethtool_ops; + + netdev->max_mtu = pf_netdev->max_mtu; + + /* Set features the lower device can support with representors */ + if (repr_cap & NFP_NET_CFG_CTRL_LIVE_ADDR) + netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + + netdev->hw_features = NETIF_F_HIGHDMA; + if (repr_cap & NFP_NET_CFG_CTRL_RXCSUM_ANY) + netdev->hw_features |= NETIF_F_RXCSUM; + if (repr_cap & NFP_NET_CFG_CTRL_TXCSUM) + netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + if (repr_cap & NFP_NET_CFG_CTRL_GATHER) + netdev->hw_features |= NETIF_F_SG; + if ((repr_cap & NFP_NET_CFG_CTRL_LSO && nn->fw_ver.major > 2) || + repr_cap & NFP_NET_CFG_CTRL_LSO2) + netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; + if (repr_cap & NFP_NET_CFG_CTRL_RSS_ANY) + netdev->hw_features |= NETIF_F_RXHASH; + if (repr_cap & NFP_NET_CFG_CTRL_VXLAN) { + if (repr_cap & NFP_NET_CFG_CTRL_LSO) + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + } + if (repr_cap & NFP_NET_CFG_CTRL_NVGRE) { + if (repr_cap & NFP_NET_CFG_CTRL_LSO) + netdev->hw_features |= NETIF_F_GSO_GRE; + } + if (repr_cap & (NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE)) + netdev->hw_enc_features = netdev->hw_features; + + netdev->vlan_features = netdev->hw_features; + + if (repr_cap & NFP_NET_CFG_CTRL_RXVLAN_ANY) + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; + if (repr_cap & NFP_NET_CFG_CTRL_TXVLAN_ANY) { + if (repr_cap & NFP_NET_CFG_CTRL_LSO2) + netdev_warn(netdev, "Device advertises both TSO2 and TXVLAN. Refusing to enable TXVLAN.\n"); + else + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; + } + if (repr_cap & NFP_NET_CFG_CTRL_CTAG_FILTER) + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + if (repr_cap & NFP_NET_CFG_CTRL_RXQINQ) + netdev->hw_features |= NETIF_F_HW_VLAN_STAG_RX; + + netdev->features = netdev->hw_features; + + /* C-Tag strip and S-Tag strip can't be supported simultaneously, + * so enable C-Tag strip and disable S-Tag strip by default. + */ + netdev->features &= ~NETIF_F_HW_VLAN_STAG_RX; + netif_set_tso_max_segs(netdev, NFP_NET_LSO_MAX_SEGS); + + netdev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL; + netdev->features |= NETIF_F_LLTX; + + if (nfp_app_has_tc(app)) { + netdev->features |= NETIF_F_HW_TC; + netdev->hw_features |= NETIF_F_HW_TC; + } + + err = nfp_app_repr_init(app, netdev); + if (err) + goto err_clean; + + err = register_netdev(netdev); + if (err) + goto err_repr_clean; + + return 0; + +err_repr_clean: + nfp_app_repr_clean(app, netdev); +err_clean: + dst_release((struct dst_entry *)repr->dst); + return err; +} + +static void __nfp_repr_free(struct nfp_repr *repr) +{ + free_percpu(repr->stats); + free_netdev(repr->netdev); +} + +void nfp_repr_free(struct net_device *netdev) +{ + __nfp_repr_free(netdev_priv(netdev)); +} + +struct net_device * +nfp_repr_alloc_mqs(struct nfp_app *app, unsigned int txqs, unsigned int rxqs) +{ + struct net_device *netdev; + struct nfp_repr *repr; + + netdev = alloc_etherdev_mqs(sizeof(*repr), txqs, rxqs); + if (!netdev) + return NULL; + + netif_carrier_off(netdev); + + repr = netdev_priv(netdev); + repr->netdev = netdev; + repr->app = app; + + repr->stats = netdev_alloc_pcpu_stats(struct nfp_repr_pcpu_stats); + if (!repr->stats) + goto err_free_netdev; + + return netdev; + +err_free_netdev: + free_netdev(netdev); + return NULL; +} + +void nfp_repr_clean_and_free(struct nfp_repr *repr) +{ + nfp_info(repr->app->cpp, "Destroying Representor(%s)\n", + repr->netdev->name); + nfp_repr_clean(repr); + __nfp_repr_free(repr); +} + +void nfp_reprs_clean_and_free(struct nfp_app *app, struct nfp_reprs *reprs) +{ + struct net_device *netdev; + unsigned int i; + + for (i = 0; i < reprs->num_reprs; i++) { + netdev = nfp_repr_get_locked(app, reprs, i); + if (netdev) + nfp_repr_clean_and_free(netdev_priv(netdev)); + } + + kfree(reprs); +} + +void +nfp_reprs_clean_and_free_by_type(struct nfp_app *app, enum nfp_repr_type type) +{ + struct net_device *netdev; + struct nfp_reprs *reprs; + int i; + + reprs = rcu_dereference_protected(app->reprs[type], + nfp_app_is_locked(app)); + if (!reprs) + return; + + /* Preclean must happen before we remove the reprs reference from the + * app below. + */ + for (i = 0; i < reprs->num_reprs; i++) { + netdev = nfp_repr_get_locked(app, reprs, i); + if (netdev) + nfp_app_repr_preclean(app, netdev); + } + + reprs = nfp_app_reprs_set(app, type, NULL); + + synchronize_rcu(); + nfp_reprs_clean_and_free(app, reprs); +} + +struct nfp_reprs *nfp_reprs_alloc(unsigned int num_reprs) +{ + struct nfp_reprs *reprs; + + reprs = kzalloc(struct_size(reprs, reprs, num_reprs), GFP_KERNEL); + if (!reprs) + return NULL; + reprs->num_reprs = num_reprs; + + return reprs; +} + +int nfp_reprs_resync_phys_ports(struct nfp_app *app) +{ + struct net_device *netdev; + struct nfp_reprs *reprs; + struct nfp_repr *repr; + int i; + + reprs = nfp_reprs_get_locked(app, NFP_REPR_TYPE_PHYS_PORT); + if (!reprs) + return 0; + + for (i = 0; i < reprs->num_reprs; i++) { + netdev = nfp_repr_get_locked(app, reprs, i); + if (!netdev) + continue; + + repr = netdev_priv(netdev); + if (repr->port->type != NFP_PORT_INVALID) + continue; + + nfp_app_repr_preclean(app, netdev); + rtnl_lock(); + rcu_assign_pointer(reprs->reprs[i], NULL); + rtnl_unlock(); + synchronize_rcu(); + nfp_repr_clean(repr); + } + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h new file mode 100644 index 000000000..48a74accb --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#ifndef NFP_NET_REPR_H +#define NFP_NET_REPR_H + +struct metadata_dst; +struct nfp_app; +struct nfp_net; +struct nfp_port; + +#include <net/dst_metadata.h> + +/** + * struct nfp_reprs - container for representor netdevs + * @num_reprs: Number of elements in reprs array + * @reprs: Array of representor netdevs + */ +struct nfp_reprs { + unsigned int num_reprs; + struct net_device __rcu *reprs[]; +}; + +/** + * struct nfp_repr_pcpu_stats + * @rx_packets: Received packets + * @rx_bytes: Received bytes + * @tx_packets: Transmitted packets + * @tx_bytes: Transmitted dropped + * @tx_drops: Packets dropped on transmit + * @syncp: Reference count + */ +struct nfp_repr_pcpu_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + u64 tx_drops; + struct u64_stats_sync syncp; +}; + +/** + * struct nfp_repr - priv data for representor netdevs + * @netdev: Back pointer to netdev + * @dst: Destination for packet TX + * @port: Port of representor + * @app: APP handle + * @stats: Statistic of packets hitting CPU + * @app_priv: Pointer for APP data + */ +struct nfp_repr { + struct net_device *netdev; + struct metadata_dst *dst; + struct nfp_port *port; + struct nfp_app *app; + struct nfp_repr_pcpu_stats __percpu *stats; + void *app_priv; +}; + +/** + * enum nfp_repr_type - type of representor + * @NFP_REPR_TYPE_PHYS_PORT: external NIC port + * @NFP_REPR_TYPE_PF: physical function + * @NFP_REPR_TYPE_VF: virtual function + * @__NFP_REPR_TYPE_MAX: number of representor types + */ +enum nfp_repr_type { + NFP_REPR_TYPE_PHYS_PORT, + NFP_REPR_TYPE_PF, + NFP_REPR_TYPE_VF, + + __NFP_REPR_TYPE_MAX, +}; +#define NFP_REPR_TYPE_MAX (__NFP_REPR_TYPE_MAX - 1) + +extern const struct net_device_ops nfp_repr_netdev_ops; + +static inline bool nfp_netdev_is_nfp_repr(struct net_device *netdev) +{ + return netdev->netdev_ops == &nfp_repr_netdev_ops; +} + +static inline int nfp_repr_get_port_id(struct net_device *netdev) +{ + struct nfp_repr *priv = netdev_priv(netdev); + + return priv->dst->u.port_info.port_id; +} + +struct net_device * +nfp_repr_get_locked(struct nfp_app *app, struct nfp_reprs *set, + unsigned int id); + +void nfp_repr_inc_rx_stats(struct net_device *netdev, unsigned int len); +void +nfp_repr_transfer_features(struct net_device *netdev, struct net_device *lower); +int nfp_repr_init(struct nfp_app *app, struct net_device *netdev, + u32 cmsg_port_id, struct nfp_port *port, + struct net_device *pf_netdev); +void nfp_repr_free(struct net_device *netdev); +struct net_device * +nfp_repr_alloc_mqs(struct nfp_app *app, unsigned int txqs, unsigned int rxqs); +void nfp_repr_clean_and_free(struct nfp_repr *repr); +void nfp_reprs_clean_and_free(struct nfp_app *app, struct nfp_reprs *reprs); +void nfp_reprs_clean_and_free_by_type(struct nfp_app *app, + enum nfp_repr_type type); +struct nfp_reprs *nfp_reprs_alloc(unsigned int num_reprs); +int nfp_reprs_resync_phys_ports(struct nfp_app *app); + +static inline struct net_device *nfp_repr_alloc(struct nfp_app *app) +{ + return nfp_repr_alloc_mqs(app, 1, 1); +} +#endif /* NFP_NET_REPR_H */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c new file mode 100644 index 000000000..6eeeb0fda --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2019 Netronome Systems, Inc. */ + +#include <linux/bitfield.h> +#include <linux/errno.h> +#include <linux/etherdevice.h> +#include <linux/if_link.h> +#include <linux/if_ether.h> + +#include "nfpcore/nfp_cpp.h" +#include "nfp_app.h" +#include "nfp_main.h" +#include "nfp_net_ctrl.h" +#include "nfp_net.h" +#include "nfp_net_sriov.h" + +static int +nfp_net_sriov_check(struct nfp_app *app, int vf, u16 cap, const char *msg, bool warn) +{ + u16 cap_vf; + + if (!app || !app->pf->vfcfg_tbl2) + return -EOPNOTSUPP; + + cap_vf = readw(app->pf->vfcfg_tbl2 + NFP_NET_VF_CFG_MB_CAP); + if ((cap_vf & cap) != cap) { + if (warn) + nfp_warn(app->pf->cpp, "ndo_set_vf_%s not supported\n", msg); + return -EOPNOTSUPP; + } + + if (vf < 0 || vf >= app->pf->num_vfs) { + if (warn) + nfp_warn(app->pf->cpp, "invalid VF id %d\n", vf); + return -EINVAL; + } + + return 0; +} + +static int +nfp_net_sriov_update(struct nfp_app *app, int vf, u16 update, const char *msg) +{ + struct nfp_net *nn; + int ret; + + /* Write update info to mailbox in VF config symbol */ + writeb(vf, app->pf->vfcfg_tbl2 + NFP_NET_VF_CFG_MB_VF_NUM); + writew(update, app->pf->vfcfg_tbl2 + NFP_NET_VF_CFG_MB_UPD); + + nn = list_first_entry(&app->pf->vnics, struct nfp_net, vnic_list); + /* Signal VF reconfiguration */ + ret = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_VF); + if (ret) + return ret; + + ret = readw(app->pf->vfcfg_tbl2 + NFP_NET_VF_CFG_MB_RET); + if (ret) + nfp_warn(app->pf->cpp, + "FW refused VF %s update with errno: %d\n", msg, ret); + return -ret; +} + +int nfp_app_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + unsigned int vf_offset; + int err; + + err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_MAC, "mac", true); + if (err) + return err; + + if (is_multicast_ether_addr(mac)) { + nfp_warn(app->pf->cpp, + "invalid Ethernet address %pM for VF id %d\n", + mac, vf); + return -EINVAL; + } + + /* Write MAC to VF entry in VF config symbol */ + vf_offset = NFP_NET_VF_CFG_MB_SZ + vf * NFP_NET_VF_CFG_SZ; + writel(get_unaligned_be32(mac), app->pf->vfcfg_tbl2 + vf_offset); + writew(get_unaligned_be16(mac + 4), + app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_MAC_LO); + + err = nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_MAC, "MAC"); + if (!err) + nfp_info(app->pf->cpp, + "MAC %pM set on VF %d, reload the VF driver to make this change effective.\n", + mac, vf); + + return err; +} + +int nfp_app_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + u16 update = NFP_NET_VF_CFG_MB_UPD_VLAN; + bool is_proto_sup = true; + unsigned int vf_offset; + u32 vlan_tag; + int err; + + err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_VLAN, "vlan", true); + if (err) + return err; + + if (!eth_type_vlan(vlan_proto)) + return -EOPNOTSUPP; + + if (vlan > 4095 || qos > 7) { + nfp_warn(app->pf->cpp, + "invalid vlan id or qos for VF id %d\n", vf); + return -EINVAL; + } + + /* Check if fw supports or not */ + err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_VLAN_PROTO, "vlan_proto", true); + if (err) + is_proto_sup = false; + + if (vlan_proto != htons(ETH_P_8021Q)) { + if (!is_proto_sup) + return -EOPNOTSUPP; + update |= NFP_NET_VF_CFG_MB_UPD_VLAN_PROTO; + } + + /* Write VLAN tag to VF entry in VF config symbol */ + vlan_tag = FIELD_PREP(NFP_NET_VF_CFG_VLAN_VID, vlan) | + FIELD_PREP(NFP_NET_VF_CFG_VLAN_QOS, qos); + + /* vlan_tag of 0 means that the configuration should be cleared and in + * such circumstances setting the TPID has no meaning when + * configuring firmware. + */ + if (vlan_tag && is_proto_sup) + vlan_tag |= FIELD_PREP(NFP_NET_VF_CFG_VLAN_PROT, ntohs(vlan_proto)); + + vf_offset = NFP_NET_VF_CFG_MB_SZ + vf * NFP_NET_VF_CFG_SZ; + writel(vlan_tag, app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_VLAN); + + return nfp_net_sriov_update(app, vf, update, "vlan"); +} + +int nfp_app_set_vf_rate(struct net_device *netdev, int vf, + int min_tx_rate, int max_tx_rate) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + u32 vf_offset, ratevalue; + int err; + + err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_RATE, "rate", true); + if (err) + return err; + + if (max_tx_rate >= NFP_NET_VF_RATE_MAX || + min_tx_rate >= NFP_NET_VF_RATE_MAX) { + nfp_warn(app->cpp, "tx-rate exceeds %d.\n", + NFP_NET_VF_RATE_MAX); + return -EINVAL; + } + + vf_offset = NFP_NET_VF_CFG_MB_SZ + vf * NFP_NET_VF_CFG_SZ; + ratevalue = FIELD_PREP(NFP_NET_VF_CFG_MAX_RATE, + max_tx_rate ? max_tx_rate : + NFP_NET_VF_RATE_MAX) | + FIELD_PREP(NFP_NET_VF_CFG_MIN_RATE, min_tx_rate); + + writel(ratevalue, + app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_RATE); + + return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_RATE, + "rate"); +} + +int nfp_app_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + unsigned int vf_offset; + u8 vf_ctrl; + int err; + + err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_SPOOF, + "spoofchk", true); + if (err) + return err; + + /* Write spoof check control bit to VF entry in VF config symbol */ + vf_offset = NFP_NET_VF_CFG_MB_SZ + vf * NFP_NET_VF_CFG_SZ + + NFP_NET_VF_CFG_CTRL; + vf_ctrl = readb(app->pf->vfcfg_tbl2 + vf_offset); + vf_ctrl &= ~NFP_NET_VF_CFG_CTRL_SPOOF; + vf_ctrl |= FIELD_PREP(NFP_NET_VF_CFG_CTRL_SPOOF, enable); + writeb(vf_ctrl, app->pf->vfcfg_tbl2 + vf_offset); + + return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_SPOOF, + "spoofchk"); +} + +int nfp_app_set_vf_trust(struct net_device *netdev, int vf, bool enable) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + unsigned int vf_offset; + u8 vf_ctrl; + int err; + + err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_TRUST, + "trust", true); + if (err) + return err; + + /* Write trust control bit to VF entry in VF config symbol */ + vf_offset = NFP_NET_VF_CFG_MB_SZ + vf * NFP_NET_VF_CFG_SZ + + NFP_NET_VF_CFG_CTRL; + vf_ctrl = readb(app->pf->vfcfg_tbl2 + vf_offset); + vf_ctrl &= ~NFP_NET_VF_CFG_CTRL_TRUST; + vf_ctrl |= FIELD_PREP(NFP_NET_VF_CFG_CTRL_TRUST, enable); + writeb(vf_ctrl, app->pf->vfcfg_tbl2 + vf_offset); + + return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_TRUST, + "trust"); +} + +int nfp_app_set_vf_link_state(struct net_device *netdev, int vf, + int link_state) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + unsigned int vf_offset; + u8 vf_ctrl; + int err; + + err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_LINK_STATE, + "link_state", true); + if (err) + return err; + + switch (link_state) { + case IFLA_VF_LINK_STATE_AUTO: + case IFLA_VF_LINK_STATE_ENABLE: + case IFLA_VF_LINK_STATE_DISABLE: + break; + default: + return -EINVAL; + } + + /* Write link state to VF entry in VF config symbol */ + vf_offset = NFP_NET_VF_CFG_MB_SZ + vf * NFP_NET_VF_CFG_SZ + + NFP_NET_VF_CFG_CTRL; + vf_ctrl = readb(app->pf->vfcfg_tbl2 + vf_offset); + vf_ctrl &= ~NFP_NET_VF_CFG_CTRL_LINK_STATE; + vf_ctrl |= FIELD_PREP(NFP_NET_VF_CFG_CTRL_LINK_STATE, link_state); + writeb(vf_ctrl, app->pf->vfcfg_tbl2 + vf_offset); + + return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_LINK_STATE, + "link state"); +} + +int nfp_app_get_vf_config(struct net_device *netdev, int vf, + struct ifla_vf_info *ivi) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + u32 vf_offset, mac_hi, rate; + u32 vlan_tag; + u16 mac_lo; + u8 flags; + int err; + + err = nfp_net_sriov_check(app, vf, 0, "", true); + if (err) + return err; + + vf_offset = NFP_NET_VF_CFG_MB_SZ + vf * NFP_NET_VF_CFG_SZ; + + mac_hi = readl(app->pf->vfcfg_tbl2 + vf_offset); + mac_lo = readw(app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_MAC_LO); + + flags = readb(app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_CTRL); + vlan_tag = readl(app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_VLAN); + + memset(ivi, 0, sizeof(*ivi)); + ivi->vf = vf; + + put_unaligned_be32(mac_hi, &ivi->mac[0]); + put_unaligned_be16(mac_lo, &ivi->mac[4]); + + ivi->vlan = FIELD_GET(NFP_NET_VF_CFG_VLAN_VID, vlan_tag); + ivi->qos = FIELD_GET(NFP_NET_VF_CFG_VLAN_QOS, vlan_tag); + if (!nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_VLAN_PROTO, "vlan_proto", false)) + ivi->vlan_proto = htons(FIELD_GET(NFP_NET_VF_CFG_VLAN_PROT, vlan_tag)); + ivi->spoofchk = FIELD_GET(NFP_NET_VF_CFG_CTRL_SPOOF, flags); + ivi->trusted = FIELD_GET(NFP_NET_VF_CFG_CTRL_TRUST, flags); + ivi->linkstate = FIELD_GET(NFP_NET_VF_CFG_CTRL_LINK_STATE, flags); + + err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_RATE, "rate", false); + if (!err) { + rate = readl(app->pf->vfcfg_tbl2 + vf_offset + + NFP_NET_VF_CFG_RATE); + + ivi->max_tx_rate = FIELD_GET(NFP_NET_VF_CFG_MAX_RATE, rate); + ivi->min_tx_rate = FIELD_GET(NFP_NET_VF_CFG_MIN_RATE, rate); + + if (ivi->max_tx_rate == NFP_NET_VF_RATE_MAX) + ivi->max_tx_rate = 0; + if (ivi->min_tx_rate == NFP_NET_VF_RATE_MAX) + ivi->min_tx_rate = 0; + } + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h new file mode 100644 index 000000000..2d445fa19 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2017-2019 Netronome Systems, Inc. */ + +#ifndef _NFP_NET_SRIOV_H_ +#define _NFP_NET_SRIOV_H_ + +/* SRIOV VF configuration. + * The configuration memory begins with a mailbox region for communication with + * the firmware followed by individual VF entries. + */ +#define NFP_NET_VF_CFG_SZ 16 +#define NFP_NET_VF_CFG_MB_SZ 16 + +/* VF config mailbox */ +#define NFP_NET_VF_CFG_MB 0x0 +#define NFP_NET_VF_CFG_MB_CAP 0x0 +#define NFP_NET_VF_CFG_MB_CAP_MAC (0x1 << 0) +#define NFP_NET_VF_CFG_MB_CAP_VLAN (0x1 << 1) +#define NFP_NET_VF_CFG_MB_CAP_SPOOF (0x1 << 2) +#define NFP_NET_VF_CFG_MB_CAP_LINK_STATE (0x1 << 3) +#define NFP_NET_VF_CFG_MB_CAP_TRUST (0x1 << 4) +#define NFP_NET_VF_CFG_MB_CAP_VLAN_PROTO (0x1 << 5) +#define NFP_NET_VF_CFG_MB_CAP_RATE (0x1 << 6) +#define NFP_NET_VF_CFG_MB_RET 0x2 +#define NFP_NET_VF_CFG_MB_UPD 0x4 +#define NFP_NET_VF_CFG_MB_UPD_MAC (0x1 << 0) +#define NFP_NET_VF_CFG_MB_UPD_VLAN (0x1 << 1) +#define NFP_NET_VF_CFG_MB_UPD_SPOOF (0x1 << 2) +#define NFP_NET_VF_CFG_MB_UPD_LINK_STATE (0x1 << 3) +#define NFP_NET_VF_CFG_MB_UPD_TRUST (0x1 << 4) +#define NFP_NET_VF_CFG_MB_UPD_VLAN_PROTO (0x1 << 5) +#define NFP_NET_VF_CFG_MB_UPD_RATE (0x1 << 6) +#define NFP_NET_VF_CFG_MB_VF_NUM 0x7 + +/* VF config entry + * MAC_LO is set that the MAC address can be read in a single 6 byte read + * by the NFP + */ +#define NFP_NET_VF_CFG_MAC 0x0 +#define NFP_NET_VF_CFG_MAC_HI 0x0 +#define NFP_NET_VF_CFG_MAC_LO 0x6 +#define NFP_NET_VF_CFG_CTRL 0x4 +#define NFP_NET_VF_CFG_CTRL_TRUST 0x8 +#define NFP_NET_VF_CFG_CTRL_SPOOF 0x4 +#define NFP_NET_VF_CFG_CTRL_LINK_STATE 0x3 +#define NFP_NET_VF_CFG_LS_MODE_AUTO 0 +#define NFP_NET_VF_CFG_LS_MODE_ENABLE 1 +#define NFP_NET_VF_CFG_LS_MODE_DISABLE 2 +#define NFP_NET_VF_CFG_VLAN 0x8 +#define NFP_NET_VF_CFG_VLAN_PROT 0xffff0000 +#define NFP_NET_VF_CFG_VLAN_QOS 0xe000 +#define NFP_NET_VF_CFG_VLAN_VID 0x0fff +#define NFP_NET_VF_CFG_RATE 0xc +#define NFP_NET_VF_CFG_MIN_RATE 0x0000ffff +#define NFP_NET_VF_CFG_MAX_RATE 0xffff0000 + +#define NFP_NET_VF_RATE_MAX 0xffff + +int nfp_app_set_vf_mac(struct net_device *netdev, int vf, u8 *mac); +int nfp_app_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto); +int nfp_app_set_vf_rate(struct net_device *netdev, int vf, int min_tx_rate, + int max_tx_rate); +int nfp_app_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); +int nfp_app_set_vf_trust(struct net_device *netdev, int vf, bool setting); +int nfp_app_set_vf_link_state(struct net_device *netdev, int vf, + int link_state); +int nfp_app_get_vf_config(struct net_device *netdev, int vf, + struct ifla_vf_info *ivi); + +#endif /* _NFP_NET_SRIOV_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c new file mode 100644 index 000000000..aea507aed --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2018 Netronome Systems, Inc */ +/* Copyright (C) 2021 Corigine, Inc */ + +#include <linux/dma-direction.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <net/xdp_sock_drv.h> +#include <trace/events/xdp.h> + +#include "nfp_app.h" +#include "nfp_net.h" +#include "nfp_net_dp.h" +#include "nfp_net_xsk.h" + +static void +nfp_net_xsk_rx_bufs_stash(struct nfp_net_rx_ring *rx_ring, unsigned int idx, + struct xdp_buff *xdp) +{ + unsigned int headroom; + + headroom = xsk_pool_get_headroom(rx_ring->r_vec->xsk_pool); + + rx_ring->rxds[idx].fld.reserved = 0; + rx_ring->rxds[idx].fld.meta_len_dd = 0; + + rx_ring->xsk_rxbufs[idx].xdp = xdp; + rx_ring->xsk_rxbufs[idx].dma_addr = + xsk_buff_xdp_get_frame_dma(xdp) + headroom; +} + +void nfp_net_xsk_rx_unstash(struct nfp_net_xsk_rx_buf *rxbuf) +{ + rxbuf->dma_addr = 0; + rxbuf->xdp = NULL; +} + +void nfp_net_xsk_rx_free(struct nfp_net_xsk_rx_buf *rxbuf) +{ + if (rxbuf->xdp) + xsk_buff_free(rxbuf->xdp); + + nfp_net_xsk_rx_unstash(rxbuf); +} + +void nfp_net_xsk_rx_bufs_free(struct nfp_net_rx_ring *rx_ring) +{ + unsigned int i; + + if (!rx_ring->cnt) + return; + + for (i = 0; i < rx_ring->cnt - 1; i++) + nfp_net_xsk_rx_free(&rx_ring->xsk_rxbufs[i]); +} + +void nfp_net_xsk_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct xsk_buff_pool *pool = r_vec->xsk_pool; + unsigned int wr_idx, wr_ptr_add = 0; + struct xdp_buff *xdp; + + while (nfp_net_rx_space(rx_ring)) { + wr_idx = D_IDX(rx_ring, rx_ring->wr_p); + + xdp = xsk_buff_alloc(pool); + if (!xdp) + break; + + nfp_net_xsk_rx_bufs_stash(rx_ring, wr_idx, xdp); + + /* DMA address is expanded to 48-bit width in freelist for NFP3800, + * so the *_48b macro is used accordingly, it's also OK to fill + * a 40-bit address since the top 8 bits are get set to 0. + */ + nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld, + rx_ring->xsk_rxbufs[wr_idx].dma_addr); + + rx_ring->wr_p++; + wr_ptr_add++; + } + + /* Ensure all records are visible before incrementing write counter. */ + wmb(); + nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, wr_ptr_add); +} + +void nfp_net_xsk_rx_drop(struct nfp_net_r_vector *r_vec, + struct nfp_net_xsk_rx_buf *xrxbuf) +{ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_drops++; + u64_stats_update_end(&r_vec->rx_sync); + + nfp_net_xsk_rx_free(xrxbuf); +} + +static void nfp_net_xsk_pool_unmap(struct device *dev, + struct xsk_buff_pool *pool) +{ + return xsk_pool_dma_unmap(pool, 0); +} + +static int nfp_net_xsk_pool_map(struct device *dev, struct xsk_buff_pool *pool) +{ + return xsk_pool_dma_map(pool, dev, 0); +} + +int nfp_net_xsk_setup_pool(struct net_device *netdev, + struct xsk_buff_pool *pool, u16 queue_id) +{ + struct nfp_net *nn = netdev_priv(netdev); + + struct xsk_buff_pool *prev_pool; + struct nfp_net_dp *dp; + int err; + + /* NFDK doesn't implement xsk yet. */ + if (nn->dp.ops->version == NFP_NFD_VER_NFDK) + return -EOPNOTSUPP; + + /* Reject on old FWs so we can drop some checks on datapath. */ + if (nn->dp.rx_offset != NFP_NET_CFG_RX_OFFSET_DYNAMIC) + return -EOPNOTSUPP; + if (!nn->dp.chained_metadata_format) + return -EOPNOTSUPP; + + /* Install */ + if (pool) { + err = nfp_net_xsk_pool_map(nn->dp.dev, pool); + if (err) + return err; + } + + /* Reconfig/swap */ + dp = nfp_net_clone_dp(nn); + if (!dp) { + err = -ENOMEM; + goto err_unmap; + } + + prev_pool = dp->xsk_pools[queue_id]; + dp->xsk_pools[queue_id] = pool; + + err = nfp_net_ring_reconfig(nn, dp, NULL); + if (err) + goto err_unmap; + + /* Uninstall */ + if (prev_pool) + nfp_net_xsk_pool_unmap(nn->dp.dev, prev_pool); + + return 0; +err_unmap: + if (pool) + nfp_net_xsk_pool_unmap(nn->dp.dev, pool); + + return err; +} + +int nfp_net_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags) +{ + struct nfp_net *nn = netdev_priv(netdev); + + /* queue_id comes from a zero-copy socket, installed with XDP_SETUP_XSK_POOL, + * so it must be within our vector range. Moreover, our napi structs + * are statically allocated, so we can always kick them without worrying + * if reconfig is in progress or interface down. + */ + napi_schedule(&nn->r_vecs[queue_id].napi); + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.h b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.h new file mode 100644 index 000000000..6d281eb2f --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2018 Netronome Systems, Inc */ +/* Copyright (C) 2021 Corigine, Inc */ + +#ifndef _NFP_XSK_H_ +#define _NFP_XSK_H_ + +#include <net/xdp_sock_drv.h> + +#define NFP_NET_XSK_TX_BATCH 16 /* XSK TX transmission batch size. */ + +static inline bool nfp_net_has_xsk_pool_slow(struct nfp_net_dp *dp, + unsigned int qid) +{ + return dp->xdp_prog && dp->xsk_pools[qid]; +} + +static inline int nfp_net_rx_space(struct nfp_net_rx_ring *rx_ring) +{ + return rx_ring->cnt - rx_ring->wr_p + rx_ring->rd_p - 1; +} + +static inline int nfp_net_tx_space(struct nfp_net_tx_ring *tx_ring) +{ + return tx_ring->cnt - tx_ring->wr_p + tx_ring->rd_p - 1; +} + +void nfp_net_xsk_rx_unstash(struct nfp_net_xsk_rx_buf *rxbuf); +void nfp_net_xsk_rx_free(struct nfp_net_xsk_rx_buf *rxbuf); +void nfp_net_xsk_rx_drop(struct nfp_net_r_vector *r_vec, + struct nfp_net_xsk_rx_buf *xrxbuf); +int nfp_net_xsk_setup_pool(struct net_device *netdev, struct xsk_buff_pool *pool, + u16 queue_id); + +void nfp_net_xsk_rx_bufs_free(struct nfp_net_rx_ring *rx_ring); + +void nfp_net_xsk_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring); + +int nfp_net_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags); + +#endif /* _NFP_XSK_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c new file mode 100644 index 000000000..e19bb0150 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c @@ -0,0 +1,343 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +/* + * nfp_netvf_main.c + * Netronome virtual function network device driver: Main entry point + * Author: Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/etherdevice.h> + +#include "nfpcore/nfp_dev.h" +#include "nfp_net_ctrl.h" +#include "nfp_net.h" +#include "nfp_main.h" + +/** + * struct nfp_net_vf - NFP VF-specific device structure + * @nn: NFP Net structure for this device + * @irq_entries: Pre-allocated array of MSI-X entries + * @q_bar: Pointer to mapped QC memory (NULL if TX/RX mapped directly) + * @ddir: Per-device debugfs directory + */ +struct nfp_net_vf { + struct nfp_net *nn; + + struct msix_entry irq_entries[NFP_NET_NON_Q_VECTORS + + NFP_NET_MAX_TX_RINGS]; + u8 __iomem *q_bar; + + struct dentry *ddir; +}; + +static const char nfp_net_driver_name[] = "nfp_netvf"; + +static const struct pci_device_id nfp_netvf_pci_device_ids[] = { + { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_ID_NFP3800_VF, + PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP3800_VF, + }, + { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_ID_NFP6000_VF, + PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP6000_VF, + }, + { PCI_VENDOR_ID_CORIGINE, PCI_DEVICE_ID_NFP3800_VF, + PCI_VENDOR_ID_CORIGINE, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP3800_VF, + }, + { PCI_VENDOR_ID_CORIGINE, PCI_DEVICE_ID_NFP6000_VF, + PCI_VENDOR_ID_CORIGINE, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP6000_VF, + }, + { 0, } /* Required last entry. */ +}; +MODULE_DEVICE_TABLE(pci, nfp_netvf_pci_device_ids); + +static void nfp_netvf_get_mac_addr(struct nfp_net *nn) +{ + u8 mac_addr[ETH_ALEN]; + + put_unaligned_be32(nn_readl(nn, NFP_NET_CFG_MACADDR + 0), &mac_addr[0]); + put_unaligned_be16(nn_readw(nn, NFP_NET_CFG_MACADDR + 6), &mac_addr[4]); + + if (!is_valid_ether_addr(mac_addr)) { + eth_hw_addr_random(nn->dp.netdev); + return; + } + + eth_hw_addr_set(nn->dp.netdev, mac_addr); + ether_addr_copy(nn->dp.netdev->perm_addr, mac_addr); +} + +static int nfp_netvf_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *pci_id) +{ + const struct nfp_dev_info *dev_info; + struct nfp_net_fw_version fw_ver; + int max_tx_rings, max_rx_rings; + u32 tx_bar_off, rx_bar_off; + u32 tx_bar_sz, rx_bar_sz; + int tx_bar_no, rx_bar_no; + struct nfp_net_vf *vf; + unsigned int num_irqs; + u8 __iomem *ctrl_bar; + struct nfp_net *nn; + u32 startq; + int stride; + int err; + + dev_info = &nfp_dev_info[pci_id->driver_data]; + + vf = kzalloc(sizeof(*vf), GFP_KERNEL); + if (!vf) + return -ENOMEM; + pci_set_drvdata(pdev, vf); + + err = pci_enable_device_mem(pdev); + if (err) + goto err_free_vf; + + err = pci_request_regions(pdev, nfp_net_driver_name); + if (err) { + dev_err(&pdev->dev, "Unable to allocate device memory.\n"); + goto err_pci_disable; + } + + pci_set_master(pdev); + + err = dma_set_mask_and_coherent(&pdev->dev, dev_info->dma_mask); + if (err) + goto err_pci_regions; + + /* Map the Control BAR. + * + * Irrespective of the advertised BAR size we only map the + * first NFP_NET_CFG_BAR_SZ of the BAR. This keeps the code + * the identical for PF and VF drivers. + */ + ctrl_bar = ioremap(pci_resource_start(pdev, NFP_NET_CTRL_BAR), + NFP_NET_CFG_BAR_SZ); + if (!ctrl_bar) { + dev_err(&pdev->dev, + "Failed to map resource %d\n", NFP_NET_CTRL_BAR); + err = -EIO; + goto err_pci_regions; + } + + nfp_net_get_fw_version(&fw_ver, ctrl_bar); + if (fw_ver.extend & NFP_NET_CFG_VERSION_RESERVED_MASK || + fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) { + dev_err(&pdev->dev, "Unknown Firmware ABI %d.%d.%d.%d\n", + fw_ver.extend, fw_ver.class, + fw_ver.major, fw_ver.minor); + err = -EINVAL; + goto err_ctrl_unmap; + } + + /* Determine stride */ + if (nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 1)) { + stride = 2; + tx_bar_no = NFP_NET_Q0_BAR; + rx_bar_no = NFP_NET_Q1_BAR; + dev_warn(&pdev->dev, "OBSOLETE Firmware detected - VF isolation not available\n"); + } else { + switch (fw_ver.major) { + case 1 ... 5: + stride = 4; + tx_bar_no = NFP_NET_Q0_BAR; + rx_bar_no = tx_bar_no; + break; + default: + dev_err(&pdev->dev, "Unsupported Firmware ABI %d.%d.%d.%d\n", + fw_ver.extend, fw_ver.class, + fw_ver.major, fw_ver.minor); + err = -EINVAL; + goto err_ctrl_unmap; + } + } + + /* Find out how many rings are supported */ + max_tx_rings = readl(ctrl_bar + NFP_NET_CFG_MAX_TXRINGS); + max_rx_rings = readl(ctrl_bar + NFP_NET_CFG_MAX_RXRINGS); + + tx_bar_sz = NFP_QCP_QUEUE_ADDR_SZ * max_tx_rings * stride; + rx_bar_sz = NFP_QCP_QUEUE_ADDR_SZ * max_rx_rings * stride; + + /* Sanity checks */ + if (tx_bar_sz > pci_resource_len(pdev, tx_bar_no)) { + dev_err(&pdev->dev, + "TX BAR too small for number of TX rings. Adjusting\n"); + tx_bar_sz = pci_resource_len(pdev, tx_bar_no); + max_tx_rings = (tx_bar_sz / NFP_QCP_QUEUE_ADDR_SZ) / 2; + } + if (rx_bar_sz > pci_resource_len(pdev, rx_bar_no)) { + dev_err(&pdev->dev, + "RX BAR too small for number of RX rings. Adjusting\n"); + rx_bar_sz = pci_resource_len(pdev, rx_bar_no); + max_rx_rings = (rx_bar_sz / NFP_QCP_QUEUE_ADDR_SZ) / 2; + } + + startq = readl(ctrl_bar + NFP_NET_CFG_START_TXQ); + tx_bar_off = nfp_qcp_queue_offset(dev_info, startq); + startq = readl(ctrl_bar + NFP_NET_CFG_START_RXQ); + rx_bar_off = nfp_qcp_queue_offset(dev_info, startq); + + /* Allocate and initialise the netdev */ + nn = nfp_net_alloc(pdev, dev_info, ctrl_bar, true, + max_tx_rings, max_rx_rings); + if (IS_ERR(nn)) { + err = PTR_ERR(nn); + goto err_ctrl_unmap; + } + vf->nn = nn; + + nn->dp.is_vf = 1; + nn->stride_tx = stride; + nn->stride_rx = stride; + + if (rx_bar_no == tx_bar_no) { + u32 bar_off, bar_sz; + resource_size_t map_addr; + + /* Make a single overlapping BAR mapping */ + if (tx_bar_off < rx_bar_off) + bar_off = tx_bar_off; + else + bar_off = rx_bar_off; + + if ((tx_bar_off + tx_bar_sz) > (rx_bar_off + rx_bar_sz)) + bar_sz = (tx_bar_off + tx_bar_sz) - bar_off; + else + bar_sz = (rx_bar_off + rx_bar_sz) - bar_off; + + map_addr = pci_resource_start(pdev, tx_bar_no) + bar_off; + vf->q_bar = ioremap(map_addr, bar_sz); + if (!vf->q_bar) { + nn_err(nn, "Failed to map resource %d\n", tx_bar_no); + err = -EIO; + goto err_netdev_free; + } + + /* TX queues */ + nn->tx_bar = vf->q_bar + (tx_bar_off - bar_off); + /* RX queues */ + nn->rx_bar = vf->q_bar + (rx_bar_off - bar_off); + } else { + resource_size_t map_addr; + + /* TX queues */ + map_addr = pci_resource_start(pdev, tx_bar_no) + tx_bar_off; + nn->tx_bar = ioremap(map_addr, tx_bar_sz); + if (!nn->tx_bar) { + nn_err(nn, "Failed to map resource %d\n", tx_bar_no); + err = -EIO; + goto err_netdev_free; + } + + /* RX queues */ + map_addr = pci_resource_start(pdev, rx_bar_no) + rx_bar_off; + nn->rx_bar = ioremap(map_addr, rx_bar_sz); + if (!nn->rx_bar) { + nn_err(nn, "Failed to map resource %d\n", rx_bar_no); + err = -EIO; + goto err_unmap_tx; + } + } + + nfp_netvf_get_mac_addr(nn); + + num_irqs = nfp_net_irqs_alloc(pdev, vf->irq_entries, + NFP_NET_MIN_VNIC_IRQS, + NFP_NET_NON_Q_VECTORS + + nn->dp.num_r_vecs); + if (!num_irqs) { + nn_warn(nn, "Unable to allocate MSI-X Vectors. Exiting\n"); + err = -EIO; + goto err_unmap_rx; + } + nfp_net_irqs_assign(nn, vf->irq_entries, num_irqs); + + err = nfp_net_init(nn); + if (err) + goto err_irqs_disable; + + nfp_net_info(nn); + vf->ddir = nfp_net_debugfs_device_add(pdev); + nfp_net_debugfs_vnic_add(nn, vf->ddir); + + return 0; + +err_irqs_disable: + nfp_net_irqs_disable(pdev); +err_unmap_rx: + if (!vf->q_bar) + iounmap(nn->rx_bar); +err_unmap_tx: + if (!vf->q_bar) + iounmap(nn->tx_bar); + else + iounmap(vf->q_bar); +err_netdev_free: + nfp_net_free(nn); +err_ctrl_unmap: + iounmap(ctrl_bar); +err_pci_regions: + pci_release_regions(pdev); +err_pci_disable: + pci_disable_device(pdev); +err_free_vf: + pci_set_drvdata(pdev, NULL); + kfree(vf); + return err; +} + +static void nfp_netvf_pci_remove(struct pci_dev *pdev) +{ + struct nfp_net_vf *vf; + struct nfp_net *nn; + + vf = pci_get_drvdata(pdev); + if (!vf) + return; + + nn = vf->nn; + + /* Note, the order is slightly different from above as we need + * to keep the nn pointer around till we have freed everything. + */ + nfp_net_debugfs_dir_clean(&nn->debugfs_dir); + nfp_net_debugfs_dir_clean(&vf->ddir); + + nfp_net_clean(nn); + + nfp_net_irqs_disable(pdev); + + if (!vf->q_bar) { + iounmap(nn->rx_bar); + iounmap(nn->tx_bar); + } else { + iounmap(vf->q_bar); + } + iounmap(nn->dp.ctrl_bar); + + nfp_net_free(nn); + + pci_release_regions(pdev); + pci_disable_device(pdev); + + pci_set_drvdata(pdev, NULL); + kfree(vf); +} + +struct pci_driver nfp_netvf_pci_driver = { + .name = nfp_net_driver_name, + .id_table = nfp_netvf_pci_device_ids, + .probe = nfp_netvf_pci_probe, + .remove = nfp_netvf_pci_remove, + .shutdown = nfp_netvf_pci_remove, +}; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c b/drivers/net/ethernet/netronome/nfp/nfp_port.c new file mode 100644 index 000000000..4f2308570 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <linux/lockdep.h> +#include <linux/netdevice.h> + +#include "nfpcore/nfp_cpp.h" +#include "nfpcore/nfp_nsp.h" +#include "nfp_app.h" +#include "nfp_main.h" +#include "nfp_net.h" +#include "nfp_port.h" + +struct nfp_port *nfp_port_from_netdev(struct net_device *netdev) +{ + if (nfp_netdev_is_nfp_net(netdev)) { + struct nfp_net *nn = netdev_priv(netdev); + + return nn->port; + } + + if (nfp_netdev_is_nfp_repr(netdev)) { + struct nfp_repr *repr = netdev_priv(netdev); + + return repr->port; + } + + WARN(1, "Unknown netdev type for nfp_port\n"); + + return NULL; +} + +int nfp_port_get_port_parent_id(struct net_device *netdev, + struct netdev_phys_item_id *ppid) +{ + struct nfp_port *port; + const u8 *serial; + + port = nfp_port_from_netdev(netdev); + if (!port) + return -EOPNOTSUPP; + + ppid->id_len = nfp_cpp_serial(port->app->cpp, &serial); + memcpy(&ppid->id, serial, ppid->id_len); + + return 0; +} + +int nfp_port_setup_tc(struct net_device *netdev, enum tc_setup_type type, + void *type_data) +{ + struct nfp_port *port; + + port = nfp_port_from_netdev(netdev); + if (!port) + return -EOPNOTSUPP; + + return nfp_app_setup_tc(port->app, netdev, type, type_data); +} + +int nfp_port_set_features(struct net_device *netdev, netdev_features_t features) +{ + struct nfp_port *port; + + port = nfp_port_from_netdev(netdev); + if (!port) + return 0; + + if ((netdev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC) && + port->tc_offload_cnt) { + netdev_err(netdev, "Cannot disable HW TC offload while offloads active\n"); + return -EBUSY; + } + + return 0; +} + +struct nfp_eth_table_port *__nfp_port_get_eth_port(struct nfp_port *port) +{ + if (!port) + return NULL; + if (port->type != NFP_PORT_PHYS_PORT) + return NULL; + + return port->eth_port; +} + +struct nfp_eth_table_port *nfp_port_get_eth_port(struct nfp_port *port) +{ + if (!__nfp_port_get_eth_port(port)) + return NULL; + + if (test_bit(NFP_PORT_CHANGED, &port->flags)) + if (nfp_net_refresh_eth_port(port)) + return NULL; + + return __nfp_port_get_eth_port(port); +} + +int +nfp_port_get_phys_port_name(struct net_device *netdev, char *name, size_t len) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + int n; + + port = nfp_port_from_netdev(netdev); + if (!port) + return -EOPNOTSUPP; + + switch (port->type) { + case NFP_PORT_PHYS_PORT: + eth_port = __nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + if (!eth_port->is_split) + n = snprintf(name, len, "p%d", eth_port->label_port); + else + n = snprintf(name, len, "p%ds%d", eth_port->label_port, + eth_port->label_subport); + break; + case NFP_PORT_PF_PORT: + if (!port->pf_split) + n = snprintf(name, len, "pf%d", port->pf_id); + else + n = snprintf(name, len, "pf%ds%d", port->pf_id, + port->pf_split_id); + break; + case NFP_PORT_VF_PORT: + n = snprintf(name, len, "pf%dvf%d", port->pf_id, port->vf_id); + break; + default: + return -EOPNOTSUPP; + } + + if (n >= len) + return -EINVAL; + + return 0; +} + +/** + * nfp_port_configure() - helper to set the interface configured bit + * @netdev: net_device instance + * @configed: Desired state + * + * Helper to set the ifup/ifdown state on the PHY only if there is a physical + * interface associated with the netdev. + * + * Return: + * 0 - configuration successful (or no change); + * -ERRNO - configuration failed. + */ +int nfp_port_configure(struct net_device *netdev, bool configed) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + int err; + + port = nfp_port_from_netdev(netdev); + eth_port = __nfp_port_get_eth_port(port); + if (!eth_port) + return 0; + if (port->eth_forced) + return 0; + + err = nfp_eth_set_configured(port->app->cpp, eth_port->index, configed); + return err < 0 && err != -EOPNOTSUPP ? err : 0; +} + +int nfp_port_init_phy_port(struct nfp_pf *pf, struct nfp_app *app, + struct nfp_port *port, unsigned int id) +{ + /* Check if vNIC has external port associated and cfg is OK */ + if (!pf->eth_tbl || id >= pf->eth_tbl->count) { + nfp_err(app->cpp, + "NSP port entries don't match vNICs (no entry %d)\n", + id); + return -EINVAL; + } + if (pf->eth_tbl->ports[id].override_changed) { + nfp_warn(app->cpp, + "Config changed for port #%d, reboot required before port will be operational\n", + pf->eth_tbl->ports[id].index); + port->type = NFP_PORT_INVALID; + return 0; + } + + port->eth_port = &pf->eth_tbl->ports[id]; + port->eth_id = pf->eth_tbl->ports[id].index; + if (pf->mac_stats_mem) + port->eth_stats = + pf->mac_stats_mem + port->eth_id * NFP_MAC_STATS_SIZE; + + return 0; +} + +struct nfp_port * +nfp_port_alloc(struct nfp_app *app, enum nfp_port_type type, + struct net_device *netdev) +{ + struct nfp_port *port; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return ERR_PTR(-ENOMEM); + + port->netdev = netdev; + port->type = type; + port->app = app; + + list_add_tail(&port->port_list, &app->pf->ports); + + return port; +} + +void nfp_port_free(struct nfp_port *port) +{ + if (!port) + return; + list_del(&port->port_list); + kfree(port); +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h new file mode 100644 index 000000000..c31812287 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h @@ -0,0 +1,218 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#ifndef _NFP_PORT_H_ +#define _NFP_PORT_H_ + +#include <net/devlink.h> + +struct net_device; +struct netdev_phys_item_id; +struct nfp_app; +struct nfp_pf; +struct nfp_port; + +/** + * enum nfp_port_type - type of port NFP can switch traffic to + * @NFP_PORT_INVALID: port is invalid, %NFP_PORT_PHYS_PORT transitions to this + * state when port disappears because of FW fault or config + * change + * @NFP_PORT_PHYS_PORT: external NIC port + * @NFP_PORT_PF_PORT: logical port of PCI PF + * @NFP_PORT_VF_PORT: logical port of PCI VF + */ +enum nfp_port_type { + NFP_PORT_INVALID, + NFP_PORT_PHYS_PORT, + NFP_PORT_PF_PORT, + NFP_PORT_VF_PORT, +}; + +/** + * enum nfp_port_flags - port flags (can be type-specific) + * @NFP_PORT_CHANGED: port state has changed since last eth table refresh; + * for NFP_PORT_PHYS_PORT, never set otherwise; must hold + * rtnl_lock to clear + */ +enum nfp_port_flags { + NFP_PORT_CHANGED = 0, +}; + +enum { + NFP_SPEED_1G, + NFP_SPEED_10G, + NFP_SPEED_25G, + NFP_SPEED_40G, + NFP_SPEED_50G, + NFP_SPEED_100G, + NFP_SUP_SPEED_NUMBER +}; + +/** + * struct nfp_port - structure representing NFP port + * @netdev: backpointer to associated netdev + * @type: what port type does the entity represent + * @flags: port flags + * @tc_offload_cnt: number of active TC offloads, how offloads are counted + * is not defined, use as a boolean + * @app: backpointer to the app structure + * @link_cb: callback when link status changed + * @dl_port: devlink port structure + * @eth_id: for %NFP_PORT_PHYS_PORT port ID in NFP enumeration scheme + * @eth_forced: for %NFP_PORT_PHYS_PORT port is forced UP or DOWN, don't change + * @eth_port: for %NFP_PORT_PHYS_PORT translated ETH Table port entry + * @eth_stats: for %NFP_PORT_PHYS_PORT MAC stats if available + * @speed_bitmap: for %NFP_PORT_PHYS_PORT supported speed bitmap + * @pf_id: for %NFP_PORT_PF_PORT, %NFP_PORT_VF_PORT ID of the PCI PF (0-3) + * @vf_id: for %NFP_PORT_VF_PORT ID of the PCI VF within @pf_id + * @pf_split: for %NFP_PORT_PF_PORT %true if PCI PF has more than one vNIC + * @pf_split_id:for %NFP_PORT_PF_PORT ID of PCI PF vNIC (valid if @pf_split) + * @vnic: for %NFP_PORT_PF_PORT, %NFP_PORT_VF_PORT vNIC ctrl memory + * @port_list: entry on pf's list of ports + */ +struct nfp_port { + struct net_device *netdev; + enum nfp_port_type type; + + unsigned long flags; + unsigned long tc_offload_cnt; + + struct nfp_app *app; + void (*link_cb)(struct nfp_port *port); + + struct devlink_port dl_port; + + union { + /* NFP_PORT_PHYS_PORT */ + struct { + unsigned int eth_id; + bool eth_forced; + struct nfp_eth_table_port *eth_port; + u8 __iomem *eth_stats; + DECLARE_BITMAP(speed_bitmap, NFP_SUP_SPEED_NUMBER); + }; + /* NFP_PORT_PF_PORT, NFP_PORT_VF_PORT */ + struct { + unsigned int pf_id; + unsigned int vf_id; + bool pf_split; + unsigned int pf_split_id; + u8 __iomem *vnic; + }; + }; + + struct list_head port_list; +}; + +extern const struct ethtool_ops nfp_port_ethtool_ops; + +int nfp_port_setup_tc(struct net_device *netdev, enum tc_setup_type type, + void *type_data); + +static inline bool nfp_port_is_vnic(const struct nfp_port *port) +{ + return port->type == NFP_PORT_PF_PORT || port->type == NFP_PORT_VF_PORT; +} + +int +nfp_port_set_features(struct net_device *netdev, netdev_features_t features); + +struct nfp_port *nfp_port_from_netdev(struct net_device *netdev); +int nfp_port_get_port_parent_id(struct net_device *netdev, + struct netdev_phys_item_id *ppid); +struct nfp_eth_table_port *__nfp_port_get_eth_port(struct nfp_port *port); +struct nfp_eth_table_port *nfp_port_get_eth_port(struct nfp_port *port); + +int +nfp_port_get_phys_port_name(struct net_device *netdev, char *name, size_t len); +int nfp_port_configure(struct net_device *netdev, bool configed); + +struct nfp_port * +nfp_port_alloc(struct nfp_app *app, enum nfp_port_type type, + struct net_device *netdev); +void nfp_port_free(struct nfp_port *port); + +int nfp_port_init_phy_port(struct nfp_pf *pf, struct nfp_app *app, + struct nfp_port *port, unsigned int id); + +int nfp_net_refresh_eth_port(struct nfp_port *port); +void nfp_net_refresh_port_table(struct nfp_port *port); +int nfp_net_refresh_port_table_sync(struct nfp_pf *pf); + +int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port); +void nfp_devlink_port_unregister(struct nfp_port *port); +void nfp_devlink_port_type_eth_set(struct nfp_port *port); +void nfp_devlink_port_type_clear(struct nfp_port *port); + +/* Mac stats (0x0000 - 0x0200) + * all counters are 64bit. + */ +#define NFP_MAC_STATS_BASE 0x0000 +#define NFP_MAC_STATS_SIZE 0x0200 + +#define NFP_MAC_STATS_RX_IN_OCTETS (NFP_MAC_STATS_BASE + 0x000) + /* unused 0x008 */ +#define NFP_MAC_STATS_RX_FRAME_TOO_LONG_ERRORS (NFP_MAC_STATS_BASE + 0x010) +#define NFP_MAC_STATS_RX_RANGE_LENGTH_ERRORS (NFP_MAC_STATS_BASE + 0x018) +#define NFP_MAC_STATS_RX_VLAN_RECEIVED_OK (NFP_MAC_STATS_BASE + 0x020) +#define NFP_MAC_STATS_RX_IN_ERRORS (NFP_MAC_STATS_BASE + 0x028) +#define NFP_MAC_STATS_RX_IN_BROADCAST_PKTS (NFP_MAC_STATS_BASE + 0x030) +#define NFP_MAC_STATS_RX_DROP_EVENTS (NFP_MAC_STATS_BASE + 0x038) +#define NFP_MAC_STATS_RX_ALIGNMENT_ERRORS (NFP_MAC_STATS_BASE + 0x040) +#define NFP_MAC_STATS_RX_PAUSE_MAC_CTRL_FRAMES (NFP_MAC_STATS_BASE + 0x048) +#define NFP_MAC_STATS_RX_FRAMES_RECEIVED_OK (NFP_MAC_STATS_BASE + 0x050) +#define NFP_MAC_STATS_RX_FRAME_CHECK_SEQUENCE_ERRORS (NFP_MAC_STATS_BASE + 0x058) +#define NFP_MAC_STATS_RX_UNICAST_PKTS (NFP_MAC_STATS_BASE + 0x060) +#define NFP_MAC_STATS_RX_MULTICAST_PKTS (NFP_MAC_STATS_BASE + 0x068) +#define NFP_MAC_STATS_RX_PKTS (NFP_MAC_STATS_BASE + 0x070) +#define NFP_MAC_STATS_RX_UNDERSIZE_PKTS (NFP_MAC_STATS_BASE + 0x078) +#define NFP_MAC_STATS_RX_PKTS_64_OCTETS (NFP_MAC_STATS_BASE + 0x080) +#define NFP_MAC_STATS_RX_PKTS_65_TO_127_OCTETS (NFP_MAC_STATS_BASE + 0x088) +#define NFP_MAC_STATS_RX_PKTS_512_TO_1023_OCTETS (NFP_MAC_STATS_BASE + 0x090) +#define NFP_MAC_STATS_RX_PKTS_1024_TO_1518_OCTETS (NFP_MAC_STATS_BASE + 0x098) +#define NFP_MAC_STATS_RX_JABBERS (NFP_MAC_STATS_BASE + 0x0a0) +#define NFP_MAC_STATS_RX_FRAGMENTS (NFP_MAC_STATS_BASE + 0x0a8) +#define NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS2 (NFP_MAC_STATS_BASE + 0x0b0) +#define NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS3 (NFP_MAC_STATS_BASE + 0x0b8) +#define NFP_MAC_STATS_RX_PKTS_128_TO_255_OCTETS (NFP_MAC_STATS_BASE + 0x0c0) +#define NFP_MAC_STATS_RX_PKTS_256_TO_511_OCTETS (NFP_MAC_STATS_BASE + 0x0c8) +#define NFP_MAC_STATS_RX_PKTS_1519_TO_MAX_OCTETS (NFP_MAC_STATS_BASE + 0x0d0) +#define NFP_MAC_STATS_RX_OVERSIZE_PKTS (NFP_MAC_STATS_BASE + 0x0d8) +#define NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS0 (NFP_MAC_STATS_BASE + 0x0e0) +#define NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS1 (NFP_MAC_STATS_BASE + 0x0e8) +#define NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS4 (NFP_MAC_STATS_BASE + 0x0f0) +#define NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS5 (NFP_MAC_STATS_BASE + 0x0f8) +#define NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS6 (NFP_MAC_STATS_BASE + 0x100) +#define NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS7 (NFP_MAC_STATS_BASE + 0x108) +#define NFP_MAC_STATS_RX_MAC_CTRL_FRAMES_RECEIVED (NFP_MAC_STATS_BASE + 0x110) +#define NFP_MAC_STATS_RX_MAC_HEAD_DROP (NFP_MAC_STATS_BASE + 0x118) + /* unused 0x120 */ + /* unused 0x128 */ + /* unused 0x130 */ +#define NFP_MAC_STATS_TX_QUEUE_DROP (NFP_MAC_STATS_BASE + 0x138) +#define NFP_MAC_STATS_TX_OUT_OCTETS (NFP_MAC_STATS_BASE + 0x140) + /* unused 0x148 */ +#define NFP_MAC_STATS_TX_VLAN_TRANSMITTED_OK (NFP_MAC_STATS_BASE + 0x150) +#define NFP_MAC_STATS_TX_OUT_ERRORS (NFP_MAC_STATS_BASE + 0x158) +#define NFP_MAC_STATS_TX_BROADCAST_PKTS (NFP_MAC_STATS_BASE + 0x160) +#define NFP_MAC_STATS_TX_PKTS_64_OCTETS (NFP_MAC_STATS_BASE + 0x168) +#define NFP_MAC_STATS_TX_PKTS_256_TO_511_OCTETS (NFP_MAC_STATS_BASE + 0x170) +#define NFP_MAC_STATS_TX_PKTS_512_TO_1023_OCTETS (NFP_MAC_STATS_BASE + 0x178) +#define NFP_MAC_STATS_TX_PAUSE_MAC_CTRL_FRAMES (NFP_MAC_STATS_BASE + 0x180) +#define NFP_MAC_STATS_TX_FRAMES_TRANSMITTED_OK (NFP_MAC_STATS_BASE + 0x188) +#define NFP_MAC_STATS_TX_UNICAST_PKTS (NFP_MAC_STATS_BASE + 0x190) +#define NFP_MAC_STATS_TX_MULTICAST_PKTS (NFP_MAC_STATS_BASE + 0x198) +#define NFP_MAC_STATS_TX_PKTS_65_TO_127_OCTETS (NFP_MAC_STATS_BASE + 0x1a0) +#define NFP_MAC_STATS_TX_PKTS_128_TO_255_OCTETS (NFP_MAC_STATS_BASE + 0x1a8) +#define NFP_MAC_STATS_TX_PKTS_1024_TO_1518_OCTETS (NFP_MAC_STATS_BASE + 0x1b0) +#define NFP_MAC_STATS_TX_PKTS_1519_TO_MAX_OCTETS (NFP_MAC_STATS_BASE + 0x1b8) +#define NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS0 (NFP_MAC_STATS_BASE + 0x1c0) +#define NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS1 (NFP_MAC_STATS_BASE + 0x1c8) +#define NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS4 (NFP_MAC_STATS_BASE + 0x1d0) +#define NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS5 (NFP_MAC_STATS_BASE + 0x1d8) +#define NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS2 (NFP_MAC_STATS_BASE + 0x1e0) +#define NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS3 (NFP_MAC_STATS_BASE + 0x1e8) +#define NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS6 (NFP_MAC_STATS_BASE + 0x1f0) +#define NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS7 (NFP_MAC_STATS_BASE + 0x1f8) + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_shared_buf.c b/drivers/net/ethernet/netronome/nfp/nfp_shared_buf.c new file mode 100644 index 000000000..ea2e3f829 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_shared_buf.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2018 Netronome Systems, Inc. */ + +#include <linux/kernel.h> +#include <net/devlink.h> + +#include "nfpcore/nfp_cpp.h" +#include "nfpcore/nfp_nffw.h" +#include "nfp_abi.h" +#include "nfp_app.h" +#include "nfp_main.h" + +static u32 nfp_shared_buf_pool_unit(struct nfp_pf *pf, unsigned int sb) +{ + __le32 sb_id = cpu_to_le32(sb); + unsigned int i; + + for (i = 0; i < pf->num_shared_bufs; i++) + if (pf->shared_bufs[i].id == sb_id) + return le32_to_cpu(pf->shared_bufs[i].pool_size_unit); + + WARN_ON_ONCE(1); + return 0; +} + +int nfp_shared_buf_pool_get(struct nfp_pf *pf, unsigned int sb, u16 pool_index, + struct devlink_sb_pool_info *pool_info) +{ + struct nfp_shared_buf_pool_info_get get_data; + struct nfp_shared_buf_pool_id id = { + .shared_buf = cpu_to_le32(sb), + .pool = cpu_to_le32(pool_index), + }; + unsigned int unit_size; + int n; + + unit_size = nfp_shared_buf_pool_unit(pf, sb); + if (!unit_size) + return -EINVAL; + + n = nfp_mbox_cmd(pf, NFP_MBOX_POOL_GET, &id, sizeof(id), + &get_data, sizeof(get_data)); + if (n < 0) + return n; + if (n < sizeof(get_data)) + return -EIO; + + pool_info->pool_type = le32_to_cpu(get_data.pool_type); + pool_info->threshold_type = le32_to_cpu(get_data.threshold_type); + pool_info->size = le32_to_cpu(get_data.size) * unit_size; + pool_info->cell_size = unit_size; + + return 0; +} + +int nfp_shared_buf_pool_set(struct nfp_pf *pf, unsigned int sb, + u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type) +{ + struct nfp_shared_buf_pool_info_set set_data = { + .id = { + .shared_buf = cpu_to_le32(sb), + .pool = cpu_to_le32(pool_index), + }, + .threshold_type = cpu_to_le32(threshold_type), + }; + unsigned int unit_size; + + unit_size = nfp_shared_buf_pool_unit(pf, sb); + if (!unit_size || size % unit_size) + return -EINVAL; + set_data.size = cpu_to_le32(size / unit_size); + + return nfp_mbox_cmd(pf, NFP_MBOX_POOL_SET, &set_data, sizeof(set_data), + NULL, 0); +} + +int nfp_shared_buf_register(struct nfp_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + unsigned int i, num_entries, entry_sz; + struct nfp_cpp_area *sb_desc_area; + u8 __iomem *sb_desc; + int n, err; + + if (!pf->mbox) + return 0; + + n = nfp_pf_rtsym_read_optional(pf, NFP_SHARED_BUF_COUNT_SYM_NAME, 0); + if (n <= 0) + return n; + num_entries = n; + + sb_desc = nfp_pf_map_rtsym(pf, "sb_tbl", NFP_SHARED_BUF_TABLE_SYM_NAME, + num_entries * sizeof(pf->shared_bufs[0]), + &sb_desc_area); + if (IS_ERR(sb_desc)) + return PTR_ERR(sb_desc); + + entry_sz = nfp_cpp_area_size(sb_desc_area) / num_entries; + + pf->shared_bufs = kmalloc_array(num_entries, sizeof(pf->shared_bufs[0]), + GFP_KERNEL); + if (!pf->shared_bufs) { + err = -ENOMEM; + goto err_release_area; + } + + for (i = 0; i < num_entries; i++) { + struct nfp_shared_buf *sb = &pf->shared_bufs[i]; + + /* Entries may be larger in future FW */ + memcpy_fromio(sb, sb_desc + i * entry_sz, sizeof(*sb)); + + err = devlink_sb_register(devlink, + le32_to_cpu(sb->id), + le32_to_cpu(sb->size), + le16_to_cpu(sb->ingress_pools_count), + le16_to_cpu(sb->egress_pools_count), + le16_to_cpu(sb->ingress_tc_count), + le16_to_cpu(sb->egress_tc_count)); + if (err) + goto err_unreg_prev; + } + pf->num_shared_bufs = num_entries; + + nfp_cpp_area_release_free(sb_desc_area); + + return 0; + +err_unreg_prev: + while (i--) + devlink_sb_unregister(devlink, + le32_to_cpu(pf->shared_bufs[i].id)); + kfree(pf->shared_bufs); +err_release_area: + nfp_cpp_area_release_free(sb_desc_area); + return err; +} + +void nfp_shared_buf_unregister(struct nfp_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + unsigned int i; + + for (i = 0; i < pf->num_shared_bufs; i++) + devlink_sb_unregister(devlink, + le32_to_cpu(pf->shared_bufs[i].id)); + kfree(pf->shared_bufs); +} diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h b/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h new file mode 100644 index 000000000..6ad43c7ce --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2015-2017 Netronome Systems, Inc. */ + +#ifndef NFP_CRC32_H +#define NFP_CRC32_H + +#include <linux/crc32.h> + +/** + * crc32_posix_end() - Finalize POSIX CRC32 working state + * @crc: Current CRC32 working state + * @total_len: Total length of data that was CRC32'd + * + * Return: Final POSIX CRC32 value + */ +static inline u32 crc32_posix_end(u32 crc, size_t total_len) +{ + /* Extend with the length of the string. */ + while (total_len != 0) { + u8 c = total_len & 0xff; + + crc = crc32_be(crc, &c, 1); + total_len >>= 8; + } + + return ~crc; +} + +static inline u32 crc32_posix(const void *buff, size_t len) +{ + return crc32_posix_end(crc32_be(0, buff, len), len); +} + +#endif /* NFP_CRC32_H */ diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h new file mode 100644 index 000000000..db94b0bdd --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +/* + * nfp.h + * Interface for NFP device access and query functions. + */ + +#ifndef __NFP_H__ +#define __NFP_H__ + +#include <linux/device.h> +#include <linux/types.h> + +#include "nfp_cpp.h" + +/* Implemented in nfp_hwinfo.c */ + +struct nfp_hwinfo; +struct nfp_hwinfo *nfp_hwinfo_read(struct nfp_cpp *cpp); +const char *nfp_hwinfo_lookup(struct nfp_hwinfo *hwinfo, const char *lookup); +char *nfp_hwinfo_get_packed_strings(struct nfp_hwinfo *hwinfo); +u32 nfp_hwinfo_get_packed_str_size(struct nfp_hwinfo *hwinfo); + +/* Implemented in nfp_nsp.c, low level functions */ + +struct nfp_nsp; + +struct nfp_cpp *nfp_nsp_cpp(struct nfp_nsp *state); +bool nfp_nsp_config_modified(struct nfp_nsp *state); +void nfp_nsp_config_set_modified(struct nfp_nsp *state, bool modified); +void *nfp_nsp_config_entries(struct nfp_nsp *state); +unsigned int nfp_nsp_config_idx(struct nfp_nsp *state); +void nfp_nsp_config_set_state(struct nfp_nsp *state, void *entries, + unsigned int idx); +void nfp_nsp_config_clear_state(struct nfp_nsp *state); +int nfp_nsp_read_eth_table(struct nfp_nsp *state, void *buf, unsigned int size); +int nfp_nsp_write_eth_table(struct nfp_nsp *state, + const void *buf, unsigned int size); +int nfp_nsp_read_identify(struct nfp_nsp *state, void *buf, unsigned int size); +int nfp_nsp_read_sensors(struct nfp_nsp *state, unsigned int sensor_mask, + void *buf, unsigned int size); + +/* Implemented in nfp_resource.c */ + +/* All keys are CRC32-POSIX of the 8-byte identification string */ + +/* ARM/PCI vNIC Interfaces 0..3 */ +#define NFP_RESOURCE_VNIC_PCI_0 "vnic.p0" +#define NFP_RESOURCE_VNIC_PCI_1 "vnic.p1" +#define NFP_RESOURCE_VNIC_PCI_2 "vnic.p2" +#define NFP_RESOURCE_VNIC_PCI_3 "vnic.p3" + +/* NFP Hardware Info Database */ +#define NFP_RESOURCE_NFP_HWINFO "nfp.info" + +/* Service Processor */ +#define NFP_RESOURCE_NSP "nfp.sp" +#define NFP_RESOURCE_NSP_DIAG "arm.diag" + +/* Netronone Flow Firmware Table */ +#define NFP_RESOURCE_NFP_NFFW "nfp.nffw" + +/* MAC Statistics Accumulator */ +#define NFP_RESOURCE_MAC_STATISTICS "mac.stat" + +int nfp_resource_table_init(struct nfp_cpp *cpp); + +struct nfp_resource * +nfp_resource_acquire(struct nfp_cpp *cpp, const char *name); + +void nfp_resource_release(struct nfp_resource *res); + +int nfp_resource_wait(struct nfp_cpp *cpp, const char *name, unsigned int secs); + +u32 nfp_resource_cpp_id(struct nfp_resource *res); + +const char *nfp_resource_name(struct nfp_resource *res); + +u64 nfp_resource_address(struct nfp_resource *res); + +u64 nfp_resource_size(struct nfp_resource *res); + +#endif /* !__NFP_H__ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/nfp6000.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/nfp6000.h new file mode 100644 index 000000000..4a1213385 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/nfp6000.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2015-2017 Netronome Systems, Inc. */ + +#ifndef NFP6000_NFP6000_H +#define NFP6000_NFP6000_H + +#include <linux/errno.h> +#include <linux/types.h> + +/* CPP Target IDs */ +#define NFP_CPP_TARGET_INVALID 0 +#define NFP_CPP_TARGET_NBI 1 +#define NFP_CPP_TARGET_QDR 2 +#define NFP_CPP_TARGET_ILA 6 +#define NFP_CPP_TARGET_MU 7 +#define NFP_CPP_TARGET_PCIE 9 +#define NFP_CPP_TARGET_ARM 10 +#define NFP_CPP_TARGET_CRYPTO 12 +#define NFP_CPP_TARGET_ISLAND_XPB 14 /* Shared with CAP */ +#define NFP_CPP_TARGET_ISLAND_CAP 14 /* Shared with XPB */ +#define NFP_CPP_TARGET_CT_XPB 14 +#define NFP_CPP_TARGET_LOCAL_SCRATCH 15 +#define NFP_CPP_TARGET_CLS NFP_CPP_TARGET_LOCAL_SCRATCH + +#define NFP_ISL_EMEM0 24 + +#define NFP_MU_ADDR_ACCESS_TYPE_MASK 3ULL +#define NFP_MU_ADDR_ACCESS_TYPE_DIRECT 2ULL + +#define PUSHPULL(_pull, _push) ((_pull) << 4 | (_push) << 0) +#define PUSH_WIDTH(_pushpull) pushpull_width((_pushpull) >> 0) +#define PULL_WIDTH(_pushpull) pushpull_width((_pushpull) >> 4) + +static inline int pushpull_width(int pp) +{ + pp &= 0xf; + + if (pp == 0) + return -EINVAL; + return 2 << pp; +} + +static inline int nfp_cppat_mu_locality_lsb(int mode, bool addr40) +{ + switch (mode) { + case 0 ... 3: + return addr40 ? 38 : 30; + default: + return -EINVAL; + } +} + +int nfp_target_pushpull(u32 cpp_id, u64 address); +int nfp_target_cpp(u32 cpp_island_id, u64 cpp_island_address, + u32 *cpp_target_id, u64 *cpp_target_address, + const u32 *imb_table); + +#endif /* NFP6000_NFP6000_H */ diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/nfp_xpb.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/nfp_xpb.h new file mode 100644 index 000000000..9a86ec11c --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/nfp_xpb.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2015-2017 Netronome Systems, Inc. */ + +/* + * nfp_xpb.h + * Author: Jason McMullan <jason.mcmullan@netronome.com> + */ + +#ifndef NFP6000_XPB_H +#define NFP6000_XPB_H + +/* For use with NFP6000 Databook "XPB Addressing" section + */ +#define NFP_XPB_OVERLAY(island) (((island) & 0x3f) << 24) + +#define NFP_XPB_ISLAND(island) (NFP_XPB_OVERLAY(island) + 0x60000) + +#define NFP_XPB_ISLAND_of(offset) (((offset) >> 24) & 0x3F) + +/* For use with NFP6000 Databook "XPB Island and Device IDs" chapter + */ +#define NFP_XPB_DEVICE(island, slave, device) \ + (NFP_XPB_OVERLAY(island) | \ + (((slave) & 3) << 22) | \ + (((device) & 0x3f) << 16)) + +#endif /* NFP6000_XPB_H */ diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c new file mode 100644 index 000000000..33b4c2856 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c @@ -0,0 +1,1366 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +/* + * nfp6000_pcie.c + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + * + * Multiplexes the NFP BARs between NFP internal resources and + * implements the PCIe specific interface for generic CPP bus access. + * + * The BARs are managed with refcounts and are allocated/acquired + * using target, token and offset/size matching. The generic CPP bus + * abstraction builds upon this BAR interface. + */ + +#include <asm/unaligned.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/kref.h> +#include <linux/io.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/sort.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <linux/pci.h> + +#include "nfp_cpp.h" +#include "nfp_dev.h" + +#include "nfp6000/nfp6000.h" + +#include "nfp6000_pcie.h" + +#define NFP_PCIE_BAR(_pf) (0x30000 + ((_pf) & 7) * 0xc0) +#define NFP_PCIE_BAR_EXPLICIT_BAR0(_x, _y) \ + (0x00000080 + (0x40 * ((_x) & 0x3)) + (0x10 * ((_y) & 0x3))) +#define NFP_PCIE_BAR_EXPLICIT_BAR0_SignalType(_x) (((_x) & 0x3) << 30) +#define NFP_PCIE_BAR_EXPLICIT_BAR0_SignalType_of(_x) (((_x) >> 30) & 0x3) +#define NFP_PCIE_BAR_EXPLICIT_BAR0_Token(_x) (((_x) & 0x3) << 28) +#define NFP_PCIE_BAR_EXPLICIT_BAR0_Token_of(_x) (((_x) >> 28) & 0x3) +#define NFP_PCIE_BAR_EXPLICIT_BAR0_Address(_x) (((_x) & 0xffffff) << 0) +#define NFP_PCIE_BAR_EXPLICIT_BAR0_Address_of(_x) (((_x) >> 0) & 0xffffff) +#define NFP_PCIE_BAR_EXPLICIT_BAR1(_x, _y) \ + (0x00000084 + (0x40 * ((_x) & 0x3)) + (0x10 * ((_y) & 0x3))) +#define NFP_PCIE_BAR_EXPLICIT_BAR1_SignalRef(_x) (((_x) & 0x7f) << 24) +#define NFP_PCIE_BAR_EXPLICIT_BAR1_SignalRef_of(_x) (((_x) >> 24) & 0x7f) +#define NFP_PCIE_BAR_EXPLICIT_BAR1_DataMaster(_x) (((_x) & 0x3ff) << 14) +#define NFP_PCIE_BAR_EXPLICIT_BAR1_DataMaster_of(_x) (((_x) >> 14) & 0x3ff) +#define NFP_PCIE_BAR_EXPLICIT_BAR1_DataRef(_x) (((_x) & 0x3fff) << 0) +#define NFP_PCIE_BAR_EXPLICIT_BAR1_DataRef_of(_x) (((_x) >> 0) & 0x3fff) +#define NFP_PCIE_BAR_EXPLICIT_BAR2(_x, _y) \ + (0x00000088 + (0x40 * ((_x) & 0x3)) + (0x10 * ((_y) & 0x3))) +#define NFP_PCIE_BAR_EXPLICIT_BAR2_Target(_x) (((_x) & 0xf) << 28) +#define NFP_PCIE_BAR_EXPLICIT_BAR2_Target_of(_x) (((_x) >> 28) & 0xf) +#define NFP_PCIE_BAR_EXPLICIT_BAR2_Action(_x) (((_x) & 0x1f) << 23) +#define NFP_PCIE_BAR_EXPLICIT_BAR2_Action_of(_x) (((_x) >> 23) & 0x1f) +#define NFP_PCIE_BAR_EXPLICIT_BAR2_Length(_x) (((_x) & 0x1f) << 18) +#define NFP_PCIE_BAR_EXPLICIT_BAR2_Length_of(_x) (((_x) >> 18) & 0x1f) +#define NFP_PCIE_BAR_EXPLICIT_BAR2_ByteMask(_x) (((_x) & 0xff) << 10) +#define NFP_PCIE_BAR_EXPLICIT_BAR2_ByteMask_of(_x) (((_x) >> 10) & 0xff) +#define NFP_PCIE_BAR_EXPLICIT_BAR2_SignalMaster(_x) (((_x) & 0x3ff) << 0) +#define NFP_PCIE_BAR_EXPLICIT_BAR2_SignalMaster_of(_x) (((_x) >> 0) & 0x3ff) + +#define NFP_PCIE_BAR_PCIE2CPP_Action_BaseAddress(_x) (((_x) & 0x1f) << 16) +#define NFP_PCIE_BAR_PCIE2CPP_Action_BaseAddress_of(_x) (((_x) >> 16) & 0x1f) +#define NFP_PCIE_BAR_PCIE2CPP_BaseAddress(_x) (((_x) & 0xffff) << 0) +#define NFP_PCIE_BAR_PCIE2CPP_BaseAddress_of(_x) (((_x) >> 0) & 0xffff) +#define NFP_PCIE_BAR_PCIE2CPP_LengthSelect(_x) (((_x) & 0x3) << 27) +#define NFP_PCIE_BAR_PCIE2CPP_LengthSelect_of(_x) (((_x) >> 27) & 0x3) +#define NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT 0 +#define NFP_PCIE_BAR_PCIE2CPP_LengthSelect_64BIT 1 +#define NFP_PCIE_BAR_PCIE2CPP_LengthSelect_0BYTE 3 +#define NFP_PCIE_BAR_PCIE2CPP_MapType(_x) (((_x) & 0x7) << 29) +#define NFP_PCIE_BAR_PCIE2CPP_MapType_of(_x) (((_x) >> 29) & 0x7) +#define NFP_PCIE_BAR_PCIE2CPP_MapType_FIXED 0 +#define NFP_PCIE_BAR_PCIE2CPP_MapType_BULK 1 +#define NFP_PCIE_BAR_PCIE2CPP_MapType_TARGET 2 +#define NFP_PCIE_BAR_PCIE2CPP_MapType_GENERAL 3 +#define NFP_PCIE_BAR_PCIE2CPP_MapType_EXPLICIT0 4 +#define NFP_PCIE_BAR_PCIE2CPP_MapType_EXPLICIT1 5 +#define NFP_PCIE_BAR_PCIE2CPP_MapType_EXPLICIT2 6 +#define NFP_PCIE_BAR_PCIE2CPP_MapType_EXPLICIT3 7 +#define NFP_PCIE_BAR_PCIE2CPP_Target_BaseAddress(_x) (((_x) & 0xf) << 23) +#define NFP_PCIE_BAR_PCIE2CPP_Target_BaseAddress_of(_x) (((_x) >> 23) & 0xf) +#define NFP_PCIE_BAR_PCIE2CPP_Token_BaseAddress(_x) (((_x) & 0x3) << 21) +#define NFP_PCIE_BAR_PCIE2CPP_Token_BaseAddress_of(_x) (((_x) >> 21) & 0x3) +#define NFP_PCIE_EM 0x020000 +#define NFP_PCIE_SRAM 0x000000 + +/* Minimal size of the PCIe cfg memory we depend on being mapped, + * queue controller and DMA controller don't have to be covered. + */ +#define NFP_PCI_MIN_MAP_SIZE 0x080000 + +#define NFP_PCIE_P2C_FIXED_SIZE(bar) (1 << (bar)->bitsize) +#define NFP_PCIE_P2C_BULK_SIZE(bar) (1 << (bar)->bitsize) +#define NFP_PCIE_P2C_GENERAL_TARGET_OFFSET(bar, x) ((x) << ((bar)->bitsize - 2)) +#define NFP_PCIE_P2C_GENERAL_TOKEN_OFFSET(bar, x) ((x) << ((bar)->bitsize - 4)) +#define NFP_PCIE_P2C_GENERAL_SIZE(bar) (1 << ((bar)->bitsize - 4)) + +#define NFP_PCIE_P2C_EXPBAR_OFFSET(bar_index) ((bar_index) * 4) + +/* The number of explicit BARs to reserve. + * Minimum is 0, maximum is 4 on the NFP6000. + * The NFP3800 can have only one per PF. + */ +#define NFP_PCIE_EXPLICIT_BARS 2 + +struct nfp6000_pcie; +struct nfp6000_area_priv; + +/** + * struct nfp_bar - describes BAR configuration and usage + * @nfp: backlink to owner + * @barcfg: cached contents of BAR config CSR + * @base: the BAR's base CPP offset + * @mask: mask for the BAR aperture (read only) + * @bitsize: bitsize of BAR aperture (read only) + * @index: index of the BAR + * @refcnt: number of current users + * @iomem: mapped IO memory + * @resource: iomem resource window + */ +struct nfp_bar { + struct nfp6000_pcie *nfp; + u32 barcfg; + u64 base; /* CPP address base */ + u64 mask; /* Bit mask of the bar */ + u32 bitsize; /* Bit size of the bar */ + int index; + atomic_t refcnt; + + void __iomem *iomem; + struct resource *resource; +}; + +#define NFP_PCI_BAR_MAX (PCI_64BIT_BAR_COUNT * 8) + +struct nfp6000_pcie { + struct pci_dev *pdev; + struct device *dev; + const struct nfp_dev_info *dev_info; + + /* PCI BAR management */ + spinlock_t bar_lock; /* Protect the PCI2CPP BAR cache */ + int bars; + struct nfp_bar bar[NFP_PCI_BAR_MAX]; + wait_queue_head_t bar_waiters; + + /* Reserved BAR access */ + struct { + void __iomem *csr; + void __iomem *em; + void __iomem *expl[4]; + } iomem; + + /* Explicit IO access */ + struct { + struct mutex mutex; /* Lock access to this explicit group */ + u8 master_id; + u8 signal_ref; + void __iomem *data; + struct { + void __iomem *addr; + int bitsize; + int free[4]; + } group[4]; + } expl; +}; + +static u32 nfp_bar_maptype(struct nfp_bar *bar) +{ + return NFP_PCIE_BAR_PCIE2CPP_MapType_of(bar->barcfg); +} + +static resource_size_t nfp_bar_resource_len(struct nfp_bar *bar) +{ + return pci_resource_len(bar->nfp->pdev, (bar->index / 8) * 2) / 8; +} + +static resource_size_t nfp_bar_resource_start(struct nfp_bar *bar) +{ + return pci_resource_start(bar->nfp->pdev, (bar->index / 8) * 2) + + nfp_bar_resource_len(bar) * (bar->index & 7); +} + +#define TARGET_WIDTH_32 4 +#define TARGET_WIDTH_64 8 + +static int +compute_bar(const struct nfp6000_pcie *nfp, const struct nfp_bar *bar, + u32 *bar_config, u64 *bar_base, + int tgt, int act, int tok, u64 offset, size_t size, int width) +{ + int bitsize; + u32 newcfg; + + if (tgt >= NFP_CPP_NUM_TARGETS) + return -EINVAL; + + switch (width) { + case 8: + newcfg = NFP_PCIE_BAR_PCIE2CPP_LengthSelect( + NFP_PCIE_BAR_PCIE2CPP_LengthSelect_64BIT); + break; + case 4: + newcfg = NFP_PCIE_BAR_PCIE2CPP_LengthSelect( + NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT); + break; + case 0: + newcfg = NFP_PCIE_BAR_PCIE2CPP_LengthSelect( + NFP_PCIE_BAR_PCIE2CPP_LengthSelect_0BYTE); + break; + default: + return -EINVAL; + } + + if (act != NFP_CPP_ACTION_RW && act != 0) { + /* Fixed CPP mapping with specific action */ + u64 mask = ~(NFP_PCIE_P2C_FIXED_SIZE(bar) - 1); + + newcfg |= NFP_PCIE_BAR_PCIE2CPP_MapType( + NFP_PCIE_BAR_PCIE2CPP_MapType_FIXED); + newcfg |= NFP_PCIE_BAR_PCIE2CPP_Target_BaseAddress(tgt); + newcfg |= NFP_PCIE_BAR_PCIE2CPP_Action_BaseAddress(act); + newcfg |= NFP_PCIE_BAR_PCIE2CPP_Token_BaseAddress(tok); + + if ((offset & mask) != ((offset + size - 1) & mask)) + return -EINVAL; + offset &= mask; + + bitsize = 40 - 16; + } else { + u64 mask = ~(NFP_PCIE_P2C_BULK_SIZE(bar) - 1); + + /* Bulk mapping */ + newcfg |= NFP_PCIE_BAR_PCIE2CPP_MapType( + NFP_PCIE_BAR_PCIE2CPP_MapType_BULK); + newcfg |= NFP_PCIE_BAR_PCIE2CPP_Target_BaseAddress(tgt); + newcfg |= NFP_PCIE_BAR_PCIE2CPP_Token_BaseAddress(tok); + + if ((offset & mask) != ((offset + size - 1) & mask)) + return -EINVAL; + + offset &= mask; + + bitsize = 40 - 21; + } + + if (bar->bitsize < bitsize) + return -EINVAL; + + newcfg |= offset >> bitsize; + + if (bar_base) + *bar_base = offset; + + if (bar_config) + *bar_config = newcfg; + + return 0; +} + +static int +nfp6000_bar_write(struct nfp6000_pcie *nfp, struct nfp_bar *bar, u32 newcfg) +{ + unsigned int xbar; + + xbar = NFP_PCIE_P2C_EXPBAR_OFFSET(bar->index); + + if (nfp->iomem.csr) { + writel(newcfg, nfp->iomem.csr + xbar); + /* Readback to ensure BAR is flushed */ + readl(nfp->iomem.csr + xbar); + } else { + xbar += nfp->dev_info->pcie_cfg_expbar_offset; + pci_write_config_dword(nfp->pdev, xbar, newcfg); + } + + bar->barcfg = newcfg; + + return 0; +} + +static int +reconfigure_bar(struct nfp6000_pcie *nfp, struct nfp_bar *bar, + int tgt, int act, int tok, u64 offset, size_t size, int width) +{ + u64 newbase; + u32 newcfg; + int err; + + err = compute_bar(nfp, bar, &newcfg, &newbase, + tgt, act, tok, offset, size, width); + if (err) + return err; + + bar->base = newbase; + + return nfp6000_bar_write(nfp, bar, newcfg); +} + +/* Check if BAR can be used with the given parameters. */ +static int matching_bar(struct nfp_bar *bar, u32 tgt, u32 act, u32 tok, + u64 offset, size_t size, int width) +{ + int bartgt, baract, bartok; + int barwidth; + u32 maptype; + + maptype = NFP_PCIE_BAR_PCIE2CPP_MapType_of(bar->barcfg); + bartgt = NFP_PCIE_BAR_PCIE2CPP_Target_BaseAddress_of(bar->barcfg); + bartok = NFP_PCIE_BAR_PCIE2CPP_Token_BaseAddress_of(bar->barcfg); + baract = NFP_PCIE_BAR_PCIE2CPP_Action_BaseAddress_of(bar->barcfg); + + barwidth = NFP_PCIE_BAR_PCIE2CPP_LengthSelect_of(bar->barcfg); + switch (barwidth) { + case NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT: + barwidth = 4; + break; + case NFP_PCIE_BAR_PCIE2CPP_LengthSelect_64BIT: + barwidth = 8; + break; + case NFP_PCIE_BAR_PCIE2CPP_LengthSelect_0BYTE: + barwidth = 0; + break; + default: + barwidth = -1; + break; + } + + switch (maptype) { + case NFP_PCIE_BAR_PCIE2CPP_MapType_TARGET: + bartok = -1; + fallthrough; + case NFP_PCIE_BAR_PCIE2CPP_MapType_BULK: + baract = NFP_CPP_ACTION_RW; + if (act == 0) + act = NFP_CPP_ACTION_RW; + fallthrough; + case NFP_PCIE_BAR_PCIE2CPP_MapType_FIXED: + break; + default: + /* We don't match explicit bars through the area interface */ + return 0; + } + + /* Make sure to match up the width */ + if (barwidth != width) + return 0; + + if ((bartgt < 0 || bartgt == tgt) && + (bartok < 0 || bartok == tok) && + (baract == act) && + bar->base <= offset && + (bar->base + (1 << bar->bitsize)) >= (offset + size)) + return 1; + + /* No match */ + return 0; +} + +static int +find_matching_bar(struct nfp6000_pcie *nfp, + u32 tgt, u32 act, u32 tok, u64 offset, size_t size, int width) +{ + int n; + + for (n = 0; n < nfp->bars; n++) { + struct nfp_bar *bar = &nfp->bar[n]; + + if (matching_bar(bar, tgt, act, tok, offset, size, width)) + return n; + } + + return -1; +} + +/* Return EAGAIN if no resource is available */ +static int +find_unused_bar_noblock(const struct nfp6000_pcie *nfp, + int tgt, int act, int tok, + u64 offset, size_t size, int width) +{ + int n, busy = 0; + + for (n = 0; n < nfp->bars; n++) { + const struct nfp_bar *bar = &nfp->bar[n]; + int err; + + if (!bar->bitsize) + continue; + + /* Just check to see if we can make it fit... */ + err = compute_bar(nfp, bar, NULL, NULL, + tgt, act, tok, offset, size, width); + if (err) + continue; + + if (!atomic_read(&bar->refcnt)) + return n; + + busy++; + } + + if (WARN(!busy, "No suitable BAR found for request tgt:0x%x act:0x%x tok:0x%x off:0x%llx size:%zd width:%d\n", + tgt, act, tok, offset, size, width)) + return -EINVAL; + + return -EAGAIN; +} + +static int +find_unused_bar_and_lock(struct nfp6000_pcie *nfp, + int tgt, int act, int tok, + u64 offset, size_t size, int width) +{ + unsigned long flags; + int n; + + spin_lock_irqsave(&nfp->bar_lock, flags); + + n = find_unused_bar_noblock(nfp, tgt, act, tok, offset, size, width); + if (n < 0) + spin_unlock_irqrestore(&nfp->bar_lock, flags); + else + __release(&nfp->bar_lock); + + return n; +} + +static void nfp_bar_get(struct nfp6000_pcie *nfp, struct nfp_bar *bar) +{ + atomic_inc(&bar->refcnt); +} + +static void nfp_bar_put(struct nfp6000_pcie *nfp, struct nfp_bar *bar) +{ + if (atomic_dec_and_test(&bar->refcnt)) + wake_up_interruptible(&nfp->bar_waiters); +} + +static int +nfp_wait_for_bar(struct nfp6000_pcie *nfp, int *barnum, + u32 tgt, u32 act, u32 tok, u64 offset, size_t size, int width) +{ + return wait_event_interruptible(nfp->bar_waiters, + (*barnum = find_unused_bar_and_lock(nfp, tgt, act, tok, + offset, size, width)) + != -EAGAIN); +} + +static int +nfp_alloc_bar(struct nfp6000_pcie *nfp, + u32 tgt, u32 act, u32 tok, + u64 offset, size_t size, int width, int nonblocking) +{ + unsigned long irqflags; + int barnum, retval; + + if (size > (1 << 24)) + return -EINVAL; + + spin_lock_irqsave(&nfp->bar_lock, irqflags); + barnum = find_matching_bar(nfp, tgt, act, tok, offset, size, width); + if (barnum >= 0) { + /* Found a perfect match. */ + nfp_bar_get(nfp, &nfp->bar[barnum]); + spin_unlock_irqrestore(&nfp->bar_lock, irqflags); + return barnum; + } + + barnum = find_unused_bar_noblock(nfp, tgt, act, tok, + offset, size, width); + if (barnum < 0) { + if (nonblocking) + goto err_nobar; + + /* Wait until a BAR becomes available. The + * find_unused_bar function will reclaim the bar_lock + * if a free BAR is found. + */ + spin_unlock_irqrestore(&nfp->bar_lock, irqflags); + retval = nfp_wait_for_bar(nfp, &barnum, tgt, act, tok, + offset, size, width); + if (retval) + return retval; + __acquire(&nfp->bar_lock); + } + + nfp_bar_get(nfp, &nfp->bar[barnum]); + retval = reconfigure_bar(nfp, &nfp->bar[barnum], + tgt, act, tok, offset, size, width); + if (retval < 0) { + nfp_bar_put(nfp, &nfp->bar[barnum]); + barnum = retval; + } + +err_nobar: + spin_unlock_irqrestore(&nfp->bar_lock, irqflags); + return barnum; +} + +static void disable_bars(struct nfp6000_pcie *nfp); + +static int bar_cmp(const void *aptr, const void *bptr) +{ + const struct nfp_bar *a = aptr, *b = bptr; + + if (a->bitsize == b->bitsize) + return a->index - b->index; + else + return a->bitsize - b->bitsize; +} + +/* Map all PCI bars and fetch the actual BAR configurations from the + * board. We assume that the BAR with the PCIe config block is + * already mapped. + * + * BAR0.0: Reserved for General Mapping (for MSI-X access to PCIe SRAM) + * BAR0.1: Reserved for XPB access (for MSI-X access to PCIe PBA) + * BAR0.2: -- + * BAR0.3: -- + * BAR0.4: Reserved for Explicit 0.0-0.3 access + * BAR0.5: Reserved for Explicit 1.0-1.3 access + * BAR0.6: Reserved for Explicit 2.0-2.3 access + * BAR0.7: Reserved for Explicit 3.0-3.3 access + * + * BAR1.0-BAR1.7: -- + * BAR2.0-BAR2.7: -- + */ +static int enable_bars(struct nfp6000_pcie *nfp, u16 interface) +{ + const u32 barcfg_msix_general = + NFP_PCIE_BAR_PCIE2CPP_MapType( + NFP_PCIE_BAR_PCIE2CPP_MapType_GENERAL) | + NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT; + const u32 barcfg_msix_xpb = + NFP_PCIE_BAR_PCIE2CPP_MapType( + NFP_PCIE_BAR_PCIE2CPP_MapType_BULK) | + NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT | + NFP_PCIE_BAR_PCIE2CPP_Target_BaseAddress( + NFP_CPP_TARGET_ISLAND_XPB); + const u32 barcfg_explicit[4] = { + NFP_PCIE_BAR_PCIE2CPP_MapType( + NFP_PCIE_BAR_PCIE2CPP_MapType_EXPLICIT0), + NFP_PCIE_BAR_PCIE2CPP_MapType( + NFP_PCIE_BAR_PCIE2CPP_MapType_EXPLICIT1), + NFP_PCIE_BAR_PCIE2CPP_MapType( + NFP_PCIE_BAR_PCIE2CPP_MapType_EXPLICIT2), + NFP_PCIE_BAR_PCIE2CPP_MapType( + NFP_PCIE_BAR_PCIE2CPP_MapType_EXPLICIT3), + }; + char status_msg[196] = {}; + int i, err, bars_free; + struct nfp_bar *bar; + int expl_groups; + char *msg, *end; + + msg = status_msg + + snprintf(status_msg, sizeof(status_msg) - 1, "RESERVED BARs: "); + end = status_msg + sizeof(status_msg) - 1; + + bar = &nfp->bar[0]; + for (i = 0; i < ARRAY_SIZE(nfp->bar); i++, bar++) { + struct resource *res; + + res = &nfp->pdev->resource[(i >> 3) * 2]; + + /* Skip over BARs that are not IORESOURCE_MEM */ + if (!(resource_type(res) & IORESOURCE_MEM)) { + bar--; + continue; + } + + bar->resource = res; + bar->barcfg = 0; + + bar->nfp = nfp; + bar->index = i; + bar->mask = nfp_bar_resource_len(bar) - 1; + bar->bitsize = fls(bar->mask); + bar->base = 0; + bar->iomem = NULL; + } + + nfp->bars = bar - &nfp->bar[0]; + if (nfp->bars < 8) { + dev_err(nfp->dev, "No usable BARs found!\n"); + return -EINVAL; + } + + bars_free = nfp->bars; + + /* Convert unit ID (0..3) to signal master/data master ID (0x40..0x70) + */ + mutex_init(&nfp->expl.mutex); + + nfp->expl.master_id = ((NFP_CPP_INTERFACE_UNIT_of(interface) & 3) + 4) + << 4; + nfp->expl.signal_ref = 0x10; + + /* Configure, and lock, BAR0.0 for General Target use (MSI-X SRAM) */ + bar = &nfp->bar[0]; + if (nfp_bar_resource_len(bar) >= NFP_PCI_MIN_MAP_SIZE) + bar->iomem = ioremap(nfp_bar_resource_start(bar), + nfp_bar_resource_len(bar)); + if (bar->iomem) { + int pf; + + msg += scnprintf(msg, end - msg, "0.0: General/MSI-X SRAM, "); + atomic_inc(&bar->refcnt); + bars_free--; + + nfp6000_bar_write(nfp, bar, barcfg_msix_general); + + nfp->expl.data = bar->iomem + NFP_PCIE_SRAM + + nfp->dev_info->pcie_expl_offset; + + switch (nfp->pdev->device) { + case PCI_DEVICE_ID_NFP3800: + pf = nfp->pdev->devfn & 7; + nfp->iomem.csr = bar->iomem + NFP_PCIE_BAR(pf); + break; + case PCI_DEVICE_ID_NFP4000: + case PCI_DEVICE_ID_NFP5000: + case PCI_DEVICE_ID_NFP6000: + nfp->iomem.csr = bar->iomem + NFP_PCIE_BAR(0); + break; + default: + dev_err(nfp->dev, "Unsupported device ID: %04hx!\n", + nfp->pdev->device); + err = -EINVAL; + goto err_unmap_bar0; + } + nfp->iomem.em = bar->iomem + NFP_PCIE_EM; + } + + switch (nfp->pdev->device) { + case PCI_DEVICE_ID_NFP3800: + expl_groups = 1; + break; + case PCI_DEVICE_ID_NFP4000: + case PCI_DEVICE_ID_NFP5000: + case PCI_DEVICE_ID_NFP6000: + expl_groups = 4; + break; + default: + dev_err(nfp->dev, "Unsupported device ID: %04hx!\n", + nfp->pdev->device); + err = -EINVAL; + goto err_unmap_bar0; + } + + /* Configure, and lock, BAR0.1 for PCIe XPB (MSI-X PBA) */ + bar = &nfp->bar[1]; + msg += scnprintf(msg, end - msg, "0.1: PCIe XPB/MSI-X PBA, "); + atomic_inc(&bar->refcnt); + bars_free--; + + nfp6000_bar_write(nfp, bar, barcfg_msix_xpb); + + /* Use BAR0.4..BAR0.7 for EXPL IO */ + for (i = 0; i < 4; i++) { + int j; + + if (i >= NFP_PCIE_EXPLICIT_BARS || i >= expl_groups) { + nfp->expl.group[i].bitsize = 0; + continue; + } + + bar = &nfp->bar[4 + i]; + bar->iomem = ioremap(nfp_bar_resource_start(bar), + nfp_bar_resource_len(bar)); + if (bar->iomem) { + msg += scnprintf(msg, end - msg, + "0.%d: Explicit%d, ", 4 + i, i); + atomic_inc(&bar->refcnt); + bars_free--; + + nfp->expl.group[i].bitsize = bar->bitsize; + nfp->expl.group[i].addr = bar->iomem; + nfp6000_bar_write(nfp, bar, barcfg_explicit[i]); + + for (j = 0; j < 4; j++) + nfp->expl.group[i].free[j] = true; + } + nfp->iomem.expl[i] = bar->iomem; + } + + /* Sort bars by bit size - use the smallest possible first. */ + sort(&nfp->bar[0], nfp->bars, sizeof(nfp->bar[0]), + bar_cmp, NULL); + + dev_info(nfp->dev, "%sfree: %d/%d\n", status_msg, bars_free, nfp->bars); + + return 0; + +err_unmap_bar0: + if (nfp->bar[0].iomem) + iounmap(nfp->bar[0].iomem); + return err; +} + +static void disable_bars(struct nfp6000_pcie *nfp) +{ + struct nfp_bar *bar = &nfp->bar[0]; + int n; + + for (n = 0; n < nfp->bars; n++, bar++) { + if (bar->iomem) { + iounmap(bar->iomem); + bar->iomem = NULL; + } + } +} + +/* + * Generic CPP bus access interface. + */ + +struct nfp6000_area_priv { + atomic_t refcnt; + + struct nfp_bar *bar; + u32 bar_offset; + + u32 target; + u32 action; + u32 token; + u64 offset; + struct { + int read; + int write; + int bar; + } width; + size_t size; + + void __iomem *iomem; + phys_addr_t phys; + struct resource resource; +}; + +static int nfp6000_area_init(struct nfp_cpp_area *area, u32 dest, + unsigned long long address, unsigned long size) +{ + struct nfp6000_area_priv *priv = nfp_cpp_area_priv(area); + u32 target = NFP_CPP_ID_TARGET_of(dest); + u32 action = NFP_CPP_ID_ACTION_of(dest); + u32 token = NFP_CPP_ID_TOKEN_of(dest); + int pp; + + pp = nfp_target_pushpull(NFP_CPP_ID(target, action, token), address); + if (pp < 0) + return pp; + + priv->width.read = PUSH_WIDTH(pp); + priv->width.write = PULL_WIDTH(pp); + if (priv->width.read > 0 && + priv->width.write > 0 && + priv->width.read != priv->width.write) { + return -EINVAL; + } + + if (priv->width.read > 0) + priv->width.bar = priv->width.read; + else + priv->width.bar = priv->width.write; + + atomic_set(&priv->refcnt, 0); + priv->bar = NULL; + + priv->target = target; + priv->action = action; + priv->token = token; + priv->offset = address; + priv->size = size; + memset(&priv->resource, 0, sizeof(priv->resource)); + + return 0; +} + +static void nfp6000_area_cleanup(struct nfp_cpp_area *area) +{ +} + +static void priv_area_get(struct nfp_cpp_area *area) +{ + struct nfp6000_area_priv *priv = nfp_cpp_area_priv(area); + + atomic_inc(&priv->refcnt); +} + +static int priv_area_put(struct nfp_cpp_area *area) +{ + struct nfp6000_area_priv *priv = nfp_cpp_area_priv(area); + + if (WARN_ON(!atomic_read(&priv->refcnt))) + return 0; + + return atomic_dec_and_test(&priv->refcnt); +} + +static int nfp6000_area_acquire(struct nfp_cpp_area *area) +{ + struct nfp6000_pcie *nfp = nfp_cpp_priv(nfp_cpp_area_cpp(area)); + struct nfp6000_area_priv *priv = nfp_cpp_area_priv(area); + int barnum, err; + + if (priv->bar) { + /* Already allocated. */ + priv_area_get(area); + return 0; + } + + barnum = nfp_alloc_bar(nfp, priv->target, priv->action, priv->token, + priv->offset, priv->size, priv->width.bar, 1); + + if (barnum < 0) { + err = barnum; + goto err_alloc_bar; + } + priv->bar = &nfp->bar[barnum]; + + /* Calculate offset into BAR. */ + if (nfp_bar_maptype(priv->bar) == + NFP_PCIE_BAR_PCIE2CPP_MapType_GENERAL) { + priv->bar_offset = priv->offset & + (NFP_PCIE_P2C_GENERAL_SIZE(priv->bar) - 1); + priv->bar_offset += NFP_PCIE_P2C_GENERAL_TARGET_OFFSET( + priv->bar, priv->target); + priv->bar_offset += NFP_PCIE_P2C_GENERAL_TOKEN_OFFSET( + priv->bar, priv->token); + } else { + priv->bar_offset = priv->offset & priv->bar->mask; + } + + /* We don't actually try to acquire the resource area using + * request_resource. This would prevent sharing the mapped + * BAR between multiple CPP areas and prevent us from + * effectively utilizing the limited amount of BAR resources. + */ + priv->phys = nfp_bar_resource_start(priv->bar) + priv->bar_offset; + priv->resource.name = nfp_cpp_area_name(area); + priv->resource.start = priv->phys; + priv->resource.end = priv->resource.start + priv->size - 1; + priv->resource.flags = IORESOURCE_MEM; + + /* If the bar is already mapped in, use its mapping */ + if (priv->bar->iomem) + priv->iomem = priv->bar->iomem + priv->bar_offset; + else + /* Must have been too big. Sub-allocate. */ + priv->iomem = ioremap(priv->phys, priv->size); + + if (IS_ERR_OR_NULL(priv->iomem)) { + dev_err(nfp->dev, "Can't ioremap() a %d byte region of BAR %d\n", + (int)priv->size, priv->bar->index); + err = !priv->iomem ? -ENOMEM : PTR_ERR(priv->iomem); + priv->iomem = NULL; + goto err_iomem_remap; + } + + priv_area_get(area); + return 0; + +err_iomem_remap: + nfp_bar_put(nfp, priv->bar); + priv->bar = NULL; +err_alloc_bar: + return err; +} + +static void nfp6000_area_release(struct nfp_cpp_area *area) +{ + struct nfp6000_pcie *nfp = nfp_cpp_priv(nfp_cpp_area_cpp(area)); + struct nfp6000_area_priv *priv = nfp_cpp_area_priv(area); + + if (!priv_area_put(area)) + return; + + if (!priv->bar->iomem) + iounmap(priv->iomem); + + nfp_bar_put(nfp, priv->bar); + + priv->bar = NULL; + priv->iomem = NULL; +} + +static phys_addr_t nfp6000_area_phys(struct nfp_cpp_area *area) +{ + struct nfp6000_area_priv *priv = nfp_cpp_area_priv(area); + + return priv->phys; +} + +static void __iomem *nfp6000_area_iomem(struct nfp_cpp_area *area) +{ + struct nfp6000_area_priv *priv = nfp_cpp_area_priv(area); + + return priv->iomem; +} + +static struct resource *nfp6000_area_resource(struct nfp_cpp_area *area) +{ + /* Use the BAR resource as the resource for the CPP area. + * This enables us to share the BAR among multiple CPP areas + * without resource conflicts. + */ + struct nfp6000_area_priv *priv = nfp_cpp_area_priv(area); + + return priv->bar->resource; +} + +static int nfp6000_area_read(struct nfp_cpp_area *area, void *kernel_vaddr, + unsigned long offset, unsigned int length) +{ + u64 __maybe_unused *wrptr64 = kernel_vaddr; + const u64 __iomem __maybe_unused *rdptr64; + struct nfp6000_area_priv *priv; + u32 *wrptr32 = kernel_vaddr; + const u32 __iomem *rdptr32; + int n, width; + + priv = nfp_cpp_area_priv(area); + rdptr64 = priv->iomem + offset; + rdptr32 = priv->iomem + offset; + + if (offset + length > priv->size) + return -EFAULT; + + width = priv->width.read; + if (width <= 0) + return -EINVAL; + + /* MU reads via a PCIe2CPP BAR support 32bit (and other) lengths */ + if (priv->target == (NFP_CPP_TARGET_MU & NFP_CPP_TARGET_ID_MASK) && + priv->action == NFP_CPP_ACTION_RW && + (offset % sizeof(u64) == 4 || length % sizeof(u64) == 4)) + width = TARGET_WIDTH_32; + + /* Unaligned? Translate to an explicit access */ + if ((priv->offset + offset) & (width - 1)) + return nfp_cpp_explicit_read(nfp_cpp_area_cpp(area), + NFP_CPP_ID(priv->target, + priv->action, + priv->token), + priv->offset + offset, + kernel_vaddr, length, width); + + if (WARN_ON(!priv->bar)) + return -EFAULT; + + switch (width) { + case TARGET_WIDTH_32: + if (offset % sizeof(u32) != 0 || length % sizeof(u32) != 0) + return -EINVAL; + + for (n = 0; n < length; n += sizeof(u32)) + *wrptr32++ = __raw_readl(rdptr32++); + return n; +#ifdef __raw_readq + case TARGET_WIDTH_64: + if (offset % sizeof(u64) != 0 || length % sizeof(u64) != 0) + return -EINVAL; + + for (n = 0; n < length; n += sizeof(u64)) + *wrptr64++ = __raw_readq(rdptr64++); + return n; +#endif + default: + return -EINVAL; + } +} + +static int +nfp6000_area_write(struct nfp_cpp_area *area, + const void *kernel_vaddr, + unsigned long offset, unsigned int length) +{ + const u64 __maybe_unused *rdptr64 = kernel_vaddr; + u64 __iomem __maybe_unused *wrptr64; + const u32 *rdptr32 = kernel_vaddr; + struct nfp6000_area_priv *priv; + u32 __iomem *wrptr32; + int n, width; + + priv = nfp_cpp_area_priv(area); + wrptr64 = priv->iomem + offset; + wrptr32 = priv->iomem + offset; + + if (offset + length > priv->size) + return -EFAULT; + + width = priv->width.write; + if (width <= 0) + return -EINVAL; + + /* MU writes via a PCIe2CPP BAR support 32bit (and other) lengths */ + if (priv->target == (NFP_CPP_TARGET_ID_MASK & NFP_CPP_TARGET_MU) && + priv->action == NFP_CPP_ACTION_RW && + (offset % sizeof(u64) == 4 || length % sizeof(u64) == 4)) + width = TARGET_WIDTH_32; + + /* Unaligned? Translate to an explicit access */ + if ((priv->offset + offset) & (width - 1)) + return nfp_cpp_explicit_write(nfp_cpp_area_cpp(area), + NFP_CPP_ID(priv->target, + priv->action, + priv->token), + priv->offset + offset, + kernel_vaddr, length, width); + + if (WARN_ON(!priv->bar)) + return -EFAULT; + + switch (width) { + case TARGET_WIDTH_32: + if (offset % sizeof(u32) != 0 || length % sizeof(u32) != 0) + return -EINVAL; + + for (n = 0; n < length; n += sizeof(u32)) { + __raw_writel(*rdptr32++, wrptr32++); + wmb(); + } + return n; +#ifdef __raw_writeq + case TARGET_WIDTH_64: + if (offset % sizeof(u64) != 0 || length % sizeof(u64) != 0) + return -EINVAL; + + for (n = 0; n < length; n += sizeof(u64)) { + __raw_writeq(*rdptr64++, wrptr64++); + wmb(); + } + return n; +#endif + default: + return -EINVAL; + } +} + +struct nfp6000_explicit_priv { + struct nfp6000_pcie *nfp; + struct { + int group; + int area; + } bar; + int bitsize; + void __iomem *data; + void __iomem *addr; +}; + +static int nfp6000_explicit_acquire(struct nfp_cpp_explicit *expl) +{ + struct nfp6000_pcie *nfp = nfp_cpp_priv(nfp_cpp_explicit_cpp(expl)); + struct nfp6000_explicit_priv *priv = nfp_cpp_explicit_priv(expl); + int i, j; + + mutex_lock(&nfp->expl.mutex); + for (i = 0; i < ARRAY_SIZE(nfp->expl.group); i++) { + if (!nfp->expl.group[i].bitsize) + continue; + + for (j = 0; j < ARRAY_SIZE(nfp->expl.group[i].free); j++) { + u16 data_offset; + + if (!nfp->expl.group[i].free[j]) + continue; + + priv->nfp = nfp; + priv->bar.group = i; + priv->bar.area = j; + priv->bitsize = nfp->expl.group[i].bitsize - 2; + + data_offset = (priv->bar.group << 9) + + (priv->bar.area << 7); + priv->data = nfp->expl.data + data_offset; + priv->addr = nfp->expl.group[i].addr + + (priv->bar.area << priv->bitsize); + nfp->expl.group[i].free[j] = false; + + mutex_unlock(&nfp->expl.mutex); + return 0; + } + } + mutex_unlock(&nfp->expl.mutex); + + return -EAGAIN; +} + +static void nfp6000_explicit_release(struct nfp_cpp_explicit *expl) +{ + struct nfp6000_explicit_priv *priv = nfp_cpp_explicit_priv(expl); + struct nfp6000_pcie *nfp = priv->nfp; + + mutex_lock(&nfp->expl.mutex); + nfp->expl.group[priv->bar.group].free[priv->bar.area] = true; + mutex_unlock(&nfp->expl.mutex); +} + +static int nfp6000_explicit_put(struct nfp_cpp_explicit *expl, + const void *buff, size_t len) +{ + struct nfp6000_explicit_priv *priv = nfp_cpp_explicit_priv(expl); + const u32 *src = buff; + size_t i; + + for (i = 0; i < len; i += sizeof(u32)) + writel(*(src++), priv->data + i); + + return i; +} + +static int +nfp6000_explicit_do(struct nfp_cpp_explicit *expl, + const struct nfp_cpp_explicit_command *cmd, u64 address) +{ + struct nfp6000_explicit_priv *priv = nfp_cpp_explicit_priv(expl); + u8 signal_master, signal_ref, data_master; + struct nfp6000_pcie *nfp = priv->nfp; + int sigmask = 0; + u16 data_ref; + u32 csr[3]; + + if (cmd->siga_mode) + sigmask |= 1 << cmd->siga; + if (cmd->sigb_mode) + sigmask |= 1 << cmd->sigb; + + signal_master = cmd->signal_master; + if (!signal_master) + signal_master = nfp->expl.master_id; + + signal_ref = cmd->signal_ref; + if (signal_master == nfp->expl.master_id) + signal_ref = nfp->expl.signal_ref + + ((priv->bar.group * 4 + priv->bar.area) << 1); + + data_master = cmd->data_master; + if (!data_master) + data_master = nfp->expl.master_id; + + data_ref = cmd->data_ref; + if (data_master == nfp->expl.master_id) + data_ref = 0x1000 + + (priv->bar.group << 9) + (priv->bar.area << 7); + + csr[0] = NFP_PCIE_BAR_EXPLICIT_BAR0_SignalType(sigmask) | + NFP_PCIE_BAR_EXPLICIT_BAR0_Token( + NFP_CPP_ID_TOKEN_of(cmd->cpp_id)) | + NFP_PCIE_BAR_EXPLICIT_BAR0_Address(address >> 16); + + csr[1] = NFP_PCIE_BAR_EXPLICIT_BAR1_SignalRef(signal_ref) | + NFP_PCIE_BAR_EXPLICIT_BAR1_DataMaster(data_master) | + NFP_PCIE_BAR_EXPLICIT_BAR1_DataRef(data_ref); + + csr[2] = NFP_PCIE_BAR_EXPLICIT_BAR2_Target( + NFP_CPP_ID_TARGET_of(cmd->cpp_id)) | + NFP_PCIE_BAR_EXPLICIT_BAR2_Action( + NFP_CPP_ID_ACTION_of(cmd->cpp_id)) | + NFP_PCIE_BAR_EXPLICIT_BAR2_Length(cmd->len) | + NFP_PCIE_BAR_EXPLICIT_BAR2_ByteMask(cmd->byte_mask) | + NFP_PCIE_BAR_EXPLICIT_BAR2_SignalMaster(signal_master); + + if (nfp->iomem.csr) { + writel(csr[0], nfp->iomem.csr + + NFP_PCIE_BAR_EXPLICIT_BAR0(priv->bar.group, + priv->bar.area)); + writel(csr[1], nfp->iomem.csr + + NFP_PCIE_BAR_EXPLICIT_BAR1(priv->bar.group, + priv->bar.area)); + writel(csr[2], nfp->iomem.csr + + NFP_PCIE_BAR_EXPLICIT_BAR2(priv->bar.group, + priv->bar.area)); + /* Readback to ensure BAR is flushed */ + readl(nfp->iomem.csr + + NFP_PCIE_BAR_EXPLICIT_BAR0(priv->bar.group, + priv->bar.area)); + readl(nfp->iomem.csr + + NFP_PCIE_BAR_EXPLICIT_BAR1(priv->bar.group, + priv->bar.area)); + readl(nfp->iomem.csr + + NFP_PCIE_BAR_EXPLICIT_BAR2(priv->bar.group, + priv->bar.area)); + } else { + pci_write_config_dword(nfp->pdev, 0x400 + + NFP_PCIE_BAR_EXPLICIT_BAR0( + priv->bar.group, priv->bar.area), + csr[0]); + + pci_write_config_dword(nfp->pdev, 0x400 + + NFP_PCIE_BAR_EXPLICIT_BAR1( + priv->bar.group, priv->bar.area), + csr[1]); + + pci_write_config_dword(nfp->pdev, 0x400 + + NFP_PCIE_BAR_EXPLICIT_BAR2( + priv->bar.group, priv->bar.area), + csr[2]); + } + + /* Issue the 'kickoff' transaction */ + readb(priv->addr + (address & ((1 << priv->bitsize) - 1))); + + return sigmask; +} + +static int nfp6000_explicit_get(struct nfp_cpp_explicit *expl, + void *buff, size_t len) +{ + struct nfp6000_explicit_priv *priv = nfp_cpp_explicit_priv(expl); + u32 *dst = buff; + size_t i; + + for (i = 0; i < len; i += sizeof(u32)) + *(dst++) = readl(priv->data + i); + + return i; +} + +static int nfp6000_init(struct nfp_cpp *cpp) +{ + nfp_cpp_area_cache_add(cpp, SZ_64K); + nfp_cpp_area_cache_add(cpp, SZ_64K); + nfp_cpp_area_cache_add(cpp, SZ_256K); + + return 0; +} + +static void nfp6000_free(struct nfp_cpp *cpp) +{ + struct nfp6000_pcie *nfp = nfp_cpp_priv(cpp); + + disable_bars(nfp); + kfree(nfp); +} + +static int nfp6000_read_serial(struct device *dev, u8 *serial) +{ + struct pci_dev *pdev = to_pci_dev(dev); + u64 dsn; + + dsn = pci_get_dsn(pdev); + if (!dsn) { + dev_err(dev, "can't find PCIe Serial Number Capability\n"); + return -EINVAL; + } + + put_unaligned_be32((u32)(dsn >> 32), serial); + put_unaligned_be16((u16)(dsn >> 16), serial + 4); + + return 0; +} + +static int nfp6000_get_interface(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + u64 dsn; + + dsn = pci_get_dsn(pdev); + if (!dsn) { + dev_err(dev, "can't find PCIe Serial Number Capability\n"); + return -EINVAL; + } + + return dsn & 0xffff; +} + +static const struct nfp_cpp_operations nfp6000_pcie_ops = { + .owner = THIS_MODULE, + + .init = nfp6000_init, + .free = nfp6000_free, + + .read_serial = nfp6000_read_serial, + .get_interface = nfp6000_get_interface, + + .area_priv_size = sizeof(struct nfp6000_area_priv), + .area_init = nfp6000_area_init, + .area_cleanup = nfp6000_area_cleanup, + .area_acquire = nfp6000_area_acquire, + .area_release = nfp6000_area_release, + .area_phys = nfp6000_area_phys, + .area_iomem = nfp6000_area_iomem, + .area_resource = nfp6000_area_resource, + .area_read = nfp6000_area_read, + .area_write = nfp6000_area_write, + + .explicit_priv_size = sizeof(struct nfp6000_explicit_priv), + .explicit_acquire = nfp6000_explicit_acquire, + .explicit_release = nfp6000_explicit_release, + .explicit_put = nfp6000_explicit_put, + .explicit_do = nfp6000_explicit_do, + .explicit_get = nfp6000_explicit_get, +}; + +/** + * nfp_cpp_from_nfp6000_pcie() - Build a NFP CPP bus from a NFP6000 PCI device + * @pdev: NFP6000 PCI device + * @dev_info: NFP ASIC params + * + * Return: NFP CPP handle + */ +struct nfp_cpp * +nfp_cpp_from_nfp6000_pcie(struct pci_dev *pdev, const struct nfp_dev_info *dev_info) +{ + struct nfp6000_pcie *nfp; + u16 interface; + int err; + + /* Finished with card initialization. */ + dev_info(&pdev->dev, "Network Flow Processor %s PCIe Card Probe\n", + dev_info->chip_names); + pcie_print_link_status(pdev); + + nfp = kzalloc(sizeof(*nfp), GFP_KERNEL); + if (!nfp) { + err = -ENOMEM; + goto err_ret; + } + + nfp->dev = &pdev->dev; + nfp->pdev = pdev; + nfp->dev_info = dev_info; + init_waitqueue_head(&nfp->bar_waiters); + spin_lock_init(&nfp->bar_lock); + + interface = nfp6000_get_interface(&pdev->dev); + + if (NFP_CPP_INTERFACE_TYPE_of(interface) != + NFP_CPP_INTERFACE_TYPE_PCI) { + dev_err(&pdev->dev, + "Interface type %d is not the expected %d\n", + NFP_CPP_INTERFACE_TYPE_of(interface), + NFP_CPP_INTERFACE_TYPE_PCI); + err = -ENODEV; + goto err_free_nfp; + } + + if (NFP_CPP_INTERFACE_CHANNEL_of(interface) != + NFP_CPP_INTERFACE_CHANNEL_PEROPENER) { + dev_err(&pdev->dev, "Interface channel %d is not the expected %d\n", + NFP_CPP_INTERFACE_CHANNEL_of(interface), + NFP_CPP_INTERFACE_CHANNEL_PEROPENER); + err = -ENODEV; + goto err_free_nfp; + } + + err = enable_bars(nfp, interface); + if (err) + goto err_free_nfp; + + /* Probe for all the common NFP devices */ + return nfp_cpp_from_operations(&nfp6000_pcie_ops, &pdev->dev, nfp); + +err_free_nfp: + kfree(nfp); +err_ret: + dev_err(&pdev->dev, "NFP6000 PCI setup failed\n"); + return ERR_PTR(err); +} diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.h new file mode 100644 index 000000000..097660b67 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2015-2017 Netronome Systems, Inc. */ + +/* + * nfp6000_pcie.h + * Author: Jason McMullan <jason.mcmullan@netronome.com> + */ + +#ifndef NFP6000_PCIE_H +#define NFP6000_PCIE_H + +#include "nfp_cpp.h" + +struct nfp_cpp * +nfp_cpp_from_nfp6000_pcie(struct pci_dev *pdev, const struct nfp_dev_info *dev_info); + +#endif /* NFP6000_PCIE_H */ diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_arm.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_arm.h new file mode 100644 index 000000000..3d172e255 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_arm.h @@ -0,0 +1,216 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2015-2017 Netronome Systems, Inc. */ + +/* + * nfp_arm.h + * Definitions for ARM-based registers and memory spaces + */ + +#ifndef NFP_ARM_H +#define NFP_ARM_H + +#define NFP_ARM_QUEUE(_q) (0x100000 + (0x800 * ((_q) & 0xff))) +#define NFP_ARM_IM 0x200000 +#define NFP_ARM_EM 0x300000 +#define NFP_ARM_GCSR 0x400000 +#define NFP_ARM_MPCORE 0x800000 +#define NFP_ARM_PL310 0xa00000 +/* Register Type: BulkBARConfig */ +#define NFP_ARM_GCSR_BULK_BAR(_bar) (0x0 + (0x4 * ((_bar) & 0x7))) +#define NFP_ARM_GCSR_BULK_BAR_TYPE (0x1 << 31) +#define NFP_ARM_GCSR_BULK_BAR_TYPE_BULK (0x0) +#define NFP_ARM_GCSR_BULK_BAR_TYPE_EXPA (0x80000000) +#define NFP_ARM_GCSR_BULK_BAR_TGT(_x) (((_x) & 0xf) << 27) +#define NFP_ARM_GCSR_BULK_BAR_TGT_of(_x) (((_x) >> 27) & 0xf) +#define NFP_ARM_GCSR_BULK_BAR_TOK(_x) (((_x) & 0x3) << 25) +#define NFP_ARM_GCSR_BULK_BAR_TOK_of(_x) (((_x) >> 25) & 0x3) +#define NFP_ARM_GCSR_BULK_BAR_LEN (0x1 << 24) +#define NFP_ARM_GCSR_BULK_BAR_LEN_32BIT (0x0) +#define NFP_ARM_GCSR_BULK_BAR_LEN_64BIT (0x1000000) +#define NFP_ARM_GCSR_BULK_BAR_ADDR(_x) ((_x) & 0x7ff) +#define NFP_ARM_GCSR_BULK_BAR_ADDR_of(_x) ((_x) & 0x7ff) +/* Register Type: ExpansionBARConfig */ +#define NFP_ARM_GCSR_EXPA_BAR(_bar) (0x20 + (0x4 * ((_bar) & 0xf))) +#define NFP_ARM_GCSR_EXPA_BAR_TYPE (0x1 << 31) +#define NFP_ARM_GCSR_EXPA_BAR_TYPE_EXPA (0x0) +#define NFP_ARM_GCSR_EXPA_BAR_TYPE_EXPL (0x80000000) +#define NFP_ARM_GCSR_EXPA_BAR_TGT(_x) (((_x) & 0xf) << 27) +#define NFP_ARM_GCSR_EXPA_BAR_TGT_of(_x) (((_x) >> 27) & 0xf) +#define NFP_ARM_GCSR_EXPA_BAR_TOK(_x) (((_x) & 0x3) << 25) +#define NFP_ARM_GCSR_EXPA_BAR_TOK_of(_x) (((_x) >> 25) & 0x3) +#define NFP_ARM_GCSR_EXPA_BAR_LEN (0x1 << 24) +#define NFP_ARM_GCSR_EXPA_BAR_LEN_32BIT (0x0) +#define NFP_ARM_GCSR_EXPA_BAR_LEN_64BIT (0x1000000) +#define NFP_ARM_GCSR_EXPA_BAR_ACT(_x) (((_x) & 0x1f) << 19) +#define NFP_ARM_GCSR_EXPA_BAR_ACT_of(_x) (((_x) >> 19) & 0x1f) +#define NFP_ARM_GCSR_EXPA_BAR_ACT_DERIVED (0) +#define NFP_ARM_GCSR_EXPA_BAR_ADDR(_x) ((_x) & 0x7fff) +#define NFP_ARM_GCSR_EXPA_BAR_ADDR_of(_x) ((_x) & 0x7fff) +/* Register Type: ExplicitBARConfig0_Reg */ +#define NFP_ARM_GCSR_EXPL0_BAR(_bar) (0x60 + (0x4 * ((_bar) & 0x7))) +#define NFP_ARM_GCSR_EXPL0_BAR_ADDR(_x) ((_x) & 0x3ffff) +#define NFP_ARM_GCSR_EXPL0_BAR_ADDR_of(_x) ((_x) & 0x3ffff) +/* Register Type: ExplicitBARConfig1_Reg */ +#define NFP_ARM_GCSR_EXPL1_BAR(_bar) (0x80 + (0x4 * ((_bar) & 0x7))) +#define NFP_ARM_GCSR_EXPL1_BAR_POSTED (0x1 << 31) +#define NFP_ARM_GCSR_EXPL1_BAR_SIGNAL_REF(_x) (((_x) & 0x7f) << 24) +#define NFP_ARM_GCSR_EXPL1_BAR_SIGNAL_REF_of(_x) (((_x) >> 24) & 0x7f) +#define NFP_ARM_GCSR_EXPL1_BAR_DATA_MASTER(_x) (((_x) & 0xff) << 16) +#define NFP_ARM_GCSR_EXPL1_BAR_DATA_MASTER_of(_x) (((_x) >> 16) & 0xff) +#define NFP_ARM_GCSR_EXPL1_BAR_DATA_REF(_x) ((_x) & 0x3fff) +#define NFP_ARM_GCSR_EXPL1_BAR_DATA_REF_of(_x) ((_x) & 0x3fff) +/* Register Type: ExplicitBARConfig2_Reg */ +#define NFP_ARM_GCSR_EXPL2_BAR(_bar) (0xa0 + (0x4 * ((_bar) & 0x7))) +#define NFP_ARM_GCSR_EXPL2_BAR_TGT(_x) (((_x) & 0xf) << 28) +#define NFP_ARM_GCSR_EXPL2_BAR_TGT_of(_x) (((_x) >> 28) & 0xf) +#define NFP_ARM_GCSR_EXPL2_BAR_ACT(_x) (((_x) & 0x1f) << 23) +#define NFP_ARM_GCSR_EXPL2_BAR_ACT_of(_x) (((_x) >> 23) & 0x1f) +#define NFP_ARM_GCSR_EXPL2_BAR_LEN(_x) (((_x) & 0x1f) << 18) +#define NFP_ARM_GCSR_EXPL2_BAR_LEN_of(_x) (((_x) >> 18) & 0x1f) +#define NFP_ARM_GCSR_EXPL2_BAR_BYTE_MASK(_x) (((_x) & 0xff) << 10) +#define NFP_ARM_GCSR_EXPL2_BAR_BYTE_MASK_of(_x) (((_x) >> 10) & 0xff) +#define NFP_ARM_GCSR_EXPL2_BAR_TOK(_x) (((_x) & 0x3) << 8) +#define NFP_ARM_GCSR_EXPL2_BAR_TOK_of(_x) (((_x) >> 8) & 0x3) +#define NFP_ARM_GCSR_EXPL2_BAR_SIGNAL_MASTER(_x) ((_x) & 0xff) +#define NFP_ARM_GCSR_EXPL2_BAR_SIGNAL_MASTER_of(_x) ((_x) & 0xff) +/* Register Type: PostedCommandSignal */ +#define NFP_ARM_GCSR_EXPL_POST(_bar) (0xc0 + (0x4 * ((_bar) & 0x7))) +#define NFP_ARM_GCSR_EXPL_POST_SIG_B(_x) (((_x) & 0x7f) << 25) +#define NFP_ARM_GCSR_EXPL_POST_SIG_B_of(_x) (((_x) >> 25) & 0x7f) +#define NFP_ARM_GCSR_EXPL_POST_SIG_B_BUS (0x1 << 24) +#define NFP_ARM_GCSR_EXPL_POST_SIG_B_BUS_PULL (0x0) +#define NFP_ARM_GCSR_EXPL_POST_SIG_B_BUS_PUSH (0x1000000) +#define NFP_ARM_GCSR_EXPL_POST_SIG_A(_x) (((_x) & 0x7f) << 17) +#define NFP_ARM_GCSR_EXPL_POST_SIG_A_of(_x) (((_x) >> 17) & 0x7f) +#define NFP_ARM_GCSR_EXPL_POST_SIG_A_BUS (0x1 << 16) +#define NFP_ARM_GCSR_EXPL_POST_SIG_A_BUS_PULL (0x0) +#define NFP_ARM_GCSR_EXPL_POST_SIG_A_BUS_PUSH (0x10000) +#define NFP_ARM_GCSR_EXPL_POST_SIG_B_RCVD (0x1 << 7) +#define NFP_ARM_GCSR_EXPL_POST_SIG_B_VALID (0x1 << 6) +#define NFP_ARM_GCSR_EXPL_POST_SIG_A_RCVD (0x1 << 5) +#define NFP_ARM_GCSR_EXPL_POST_SIG_A_VALID (0x1 << 4) +#define NFP_ARM_GCSR_EXPL_POST_CMD_COMPLETE (0x1) +/* Register Type: MPCoreBaseAddress */ +#define NFP_ARM_GCSR_MPCORE_BASE 0x00e0 +#define NFP_ARM_GCSR_MPCORE_BASE_ADDR(_x) (((_x) & 0x7ffff) << 13) +#define NFP_ARM_GCSR_MPCORE_BASE_ADDR_of(_x) (((_x) >> 13) & 0x7ffff) +/* Register Type: PL310BaseAddress */ +#define NFP_ARM_GCSR_PL310_BASE 0x00e4 +#define NFP_ARM_GCSR_PL310_BASE_ADDR(_x) (((_x) & 0xfffff) << 12) +#define NFP_ARM_GCSR_PL310_BASE_ADDR_of(_x) (((_x) >> 12) & 0xfffff) +/* Register Type: MPCoreConfig */ +#define NFP_ARM_GCSR_MP0_CFG 0x00e8 +#define NFP_ARM_GCSR_MP0_CFG_SPI_BOOT (0x1 << 14) +#define NFP_ARM_GCSR_MP0_CFG_ENDIAN(_x) (((_x) & 0x3) << 12) +#define NFP_ARM_GCSR_MP0_CFG_ENDIAN_of(_x) (((_x) >> 12) & 0x3) +#define NFP_ARM_GCSR_MP0_CFG_ENDIAN_LITTLE (0) +#define NFP_ARM_GCSR_MP0_CFG_ENDIAN_BIG (1) +#define NFP_ARM_GCSR_MP0_CFG_RESET_VECTOR (0x1 << 8) +#define NFP_ARM_GCSR_MP0_CFG_RESET_VECTOR_LO (0x0) +#define NFP_ARM_GCSR_MP0_CFG_RESET_VECTOR_HI (0x100) +#define NFP_ARM_GCSR_MP0_CFG_OUTCLK_EN(_x) (((_x) & 0xf) << 4) +#define NFP_ARM_GCSR_MP0_CFG_OUTCLK_EN_of(_x) (((_x) >> 4) & 0xf) +#define NFP_ARM_GCSR_MP0_CFG_ARMID(_x) ((_x) & 0xf) +#define NFP_ARM_GCSR_MP0_CFG_ARMID_of(_x) ((_x) & 0xf) +/* Register Type: MPCoreIDCacheDataError */ +#define NFP_ARM_GCSR_MP0_CACHE_ERR 0x00ec +#define NFP_ARM_GCSR_MP0_CACHE_ERR_MP0_D7 (0x1 << 15) +#define NFP_ARM_GCSR_MP0_CACHE_ERR_MP0_D6 (0x1 << 14) +#define NFP_ARM_GCSR_MP0_CACHE_ERR_MP0_D5 (0x1 << 13) +#define NFP_ARM_GCSR_MP0_CACHE_ERR_MP0_D4 (0x1 << 12) +#define NFP_ARM_GCSR_MP0_CACHE_ERR_MP0_D3 (0x1 << 11) +#define NFP_ARM_GCSR_MP0_CACHE_ERR_MP0_D2 (0x1 << 10) +#define NFP_ARM_GCSR_MP0_CACHE_ERR_MP0_D1 (0x1 << 9) +#define NFP_ARM_GCSR_MP0_CACHE_ERR_MP0_D0 (0x1 << 8) +#define NFP_ARM_GCSR_MP0_CACHE_ERR_MP0_I7 (0x1 << 7) +#define NFP_ARM_GCSR_MP0_CACHE_ERR_MP0_I6 (0x1 << 6) +#define NFP_ARM_GCSR_MP0_CACHE_ERR_MP0_I5 (0x1 << 5) +#define NFP_ARM_GCSR_MP0_CACHE_ERR_MP0_I4 (0x1 << 4) +#define NFP_ARM_GCSR_MP0_CACHE_ERR_MP0_I3 (0x1 << 3) +#define NFP_ARM_GCSR_MP0_CACHE_ERR_MP0_I2 (0x1 << 2) +#define NFP_ARM_GCSR_MP0_CACHE_ERR_MP0_I1 (0x1 << 1) +#define NFP_ARM_GCSR_MP0_CACHE_ERR_MP0_I0 (0x1) +/* Register Type: ARMDFT */ +#define NFP_ARM_GCSR_DFT 0x0100 +#define NFP_ARM_GCSR_DFT_DBG_REQ (0x1 << 20) +#define NFP_ARM_GCSR_DFT_DBG_EN (0x1 << 19) +#define NFP_ARM_GCSR_DFT_WFE_EVT_TRG (0x1 << 18) +#define NFP_ARM_GCSR_DFT_ETM_WFI_RDY (0x1 << 17) +#define NFP_ARM_GCSR_DFT_ETM_PWR_ON (0x1 << 16) +#define NFP_ARM_GCSR_DFT_BIST_FAIL_of(_x) (((_x) >> 8) & 0xf) +#define NFP_ARM_GCSR_DFT_BIST_DONE_of(_x) (((_x) >> 4) & 0xf) +#define NFP_ARM_GCSR_DFT_BIST_RUN(_x) ((_x) & 0x7) +#define NFP_ARM_GCSR_DFT_BIST_RUN_of(_x) ((_x) & 0x7) + +/* Gasket CSRs */ +/* NOTE: These cannot be remapped, and are always at this location. + */ +#define NFP_ARM_GCSR_START (0xd6000000 + NFP_ARM_GCSR) +#define NFP_ARM_GCSR_SIZE SZ_64K + +/* BAR CSRs + */ +#define NFP_ARM_GCSR_BULK_BITS 11 +#define NFP_ARM_GCSR_EXPA_BITS 15 +#define NFP_ARM_GCSR_EXPL_BITS 18 + +#define NFP_ARM_GCSR_BULK_SHIFT (40 - 11) +#define NFP_ARM_GCSR_EXPA_SHIFT (40 - 15) +#define NFP_ARM_GCSR_EXPL_SHIFT (40 - 18) + +#define NFP_ARM_GCSR_BULK_SIZE (1 << NFP_ARM_GCSR_BULK_SHIFT) +#define NFP_ARM_GCSR_EXPA_SIZE (1 << NFP_ARM_GCSR_EXPA_SHIFT) +#define NFP_ARM_GCSR_EXPL_SIZE (1 << NFP_ARM_GCSR_EXPL_SHIFT) + +#define NFP_ARM_GCSR_EXPL2_CSR(target, action, length, \ + byte_mask, token, signal_master) \ + (NFP_ARM_GCSR_EXPL2_BAR_TGT(target) | \ + NFP_ARM_GCSR_EXPL2_BAR_ACT(action) | \ + NFP_ARM_GCSR_EXPL2_BAR_LEN(length) | \ + NFP_ARM_GCSR_EXPL2_BAR_BYTE_MASK(byte_mask) | \ + NFP_ARM_GCSR_EXPL2_BAR_TOK(token) | \ + NFP_ARM_GCSR_EXPL2_BAR_SIGNAL_MASTER(signal_master)) +#define NFP_ARM_GCSR_EXPL1_CSR(posted, signal_ref, data_master, data_ref) \ + (((posted) ? NFP_ARM_GCSR_EXPL1_BAR_POSTED : 0) | \ + NFP_ARM_GCSR_EXPL1_BAR_SIGNAL_REF(signal_ref) | \ + NFP_ARM_GCSR_EXPL1_BAR_DATA_MASTER(data_master) | \ + NFP_ARM_GCSR_EXPL1_BAR_DATA_REF(data_ref)) +#define NFP_ARM_GCSR_EXPL0_CSR(address) \ + NFP_ARM_GCSR_EXPL0_BAR_ADDR((address) >> NFP_ARM_GCSR_EXPL_SHIFT) +#define NFP_ARM_GCSR_EXPL_POST_EXPECT_A(sig_ref, is_push, is_required) \ + (NFP_ARM_GCSR_EXPL_POST_SIG_A(sig_ref) | \ + ((is_push) ? NFP_ARM_GCSR_EXPL_POST_SIG_A_BUS_PUSH : \ + NFP_ARM_GCSR_EXPL_POST_SIG_A_BUS_PULL) | \ + ((is_required) ? NFP_ARM_GCSR_EXPL_POST_SIG_A_VALID : 0)) +#define NFP_ARM_GCSR_EXPL_POST_EXPECT_B(sig_ref, is_push, is_required) \ + (NFP_ARM_GCSR_EXPL_POST_SIG_B(sig_ref) | \ + ((is_push) ? NFP_ARM_GCSR_EXPL_POST_SIG_B_BUS_PUSH : \ + NFP_ARM_GCSR_EXPL_POST_SIG_B_BUS_PULL) | \ + ((is_required) ? NFP_ARM_GCSR_EXPL_POST_SIG_B_VALID : 0)) + +#define NFP_ARM_GCSR_EXPA_CSR(mode, target, token, is_64, action, address) \ + (((mode) ? NFP_ARM_GCSR_EXPA_BAR_TYPE_EXPL : \ + NFP_ARM_GCSR_EXPA_BAR_TYPE_EXPA) | \ + NFP_ARM_GCSR_EXPA_BAR_TGT(target) | \ + NFP_ARM_GCSR_EXPA_BAR_TOK(token) | \ + ((is_64) ? NFP_ARM_GCSR_EXPA_BAR_LEN_64BIT : \ + NFP_ARM_GCSR_EXPA_BAR_LEN_32BIT) | \ + NFP_ARM_GCSR_EXPA_BAR_ACT(action) | \ + NFP_ARM_GCSR_EXPA_BAR_ADDR((address) >> NFP_ARM_GCSR_EXPA_SHIFT)) + +#define NFP_ARM_GCSR_BULK_CSR(mode, target, token, is_64, address) \ + (((mode) ? NFP_ARM_GCSR_BULK_BAR_TYPE_EXPA : \ + NFP_ARM_GCSR_BULK_BAR_TYPE_BULK) | \ + NFP_ARM_GCSR_BULK_BAR_TGT(target) | \ + NFP_ARM_GCSR_BULK_BAR_TOK(token) | \ + ((is_64) ? NFP_ARM_GCSR_BULK_BAR_LEN_64BIT : \ + NFP_ARM_GCSR_BULK_BAR_LEN_32BIT) | \ + NFP_ARM_GCSR_BULK_BAR_ADDR((address) >> NFP_ARM_GCSR_BULK_SHIFT)) + + /* MP Core CSRs */ +#define NFP_ARM_MPCORE_SIZE SZ_128K + + /* PL320 CSRs */ +#define NFP_ARM_PCSR_SIZE SZ_64K + +#endif /* NFP_ARM_H */ diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h new file mode 100644 index 000000000..3d379e937 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h @@ -0,0 +1,432 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +/* + * nfp_cpp.h + * Interface for low-level NFP CPP access. + * Authors: Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + */ +#ifndef __NFP_CPP_H__ +#define __NFP_CPP_H__ + +#include <linux/ctype.h> +#include <linux/types.h> +#include <linux/sizes.h> + +#ifndef NFP_SUBSYS +#define NFP_SUBSYS "nfp" +#endif + +#define nfp_err(cpp, fmt, args...) \ + dev_err(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args) +#define nfp_warn(cpp, fmt, args...) \ + dev_warn(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args) +#define nfp_info(cpp, fmt, args...) \ + dev_info(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args) +#define nfp_dbg(cpp, fmt, args...) \ + dev_dbg(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args) +#define nfp_printk(level, cpp, fmt, args...) \ + dev_printk(level, nfp_cpp_device(cpp)->parent, \ + NFP_SUBSYS ": " fmt, ## args) + +#define PCI_64BIT_BAR_COUNT 3 + +#define NFP_CPP_NUM_TARGETS 16 +/* Max size of area it should be safe to request */ +#define NFP_CPP_SAFE_AREA_SIZE SZ_2M + +/* NFP_MUTEX_WAIT_* are timeouts in seconds when waiting for a mutex */ +#define NFP_MUTEX_WAIT_FIRST_WARN 15 +#define NFP_MUTEX_WAIT_NEXT_WARN 5 +#define NFP_MUTEX_WAIT_ERROR 60 + +struct device; + +struct nfp_cpp_area; +struct nfp_cpp; +struct resource; + +/* Wildcard indicating a CPP read or write action + * + * The action used will be either read or write depending on whether a + * read or write instruction/call is performed on the NFP_CPP_ID. It + * is recomended that the RW action is used even if all actions to be + * performed on a NFP_CPP_ID are known to be only reads or writes. + * Doing so will in many cases save NFP CPP internal software + * resources. + */ +#define NFP_CPP_ACTION_RW 32 + +#define NFP_CPP_TARGET_ID_MASK 0x1f + +#define NFP_CPP_ATOMIC_RD(target, island) \ + NFP_CPP_ISLAND_ID((target), 3, 0, (island)) +#define NFP_CPP_ATOMIC_WR(target, island) \ + NFP_CPP_ISLAND_ID((target), 4, 0, (island)) + +/** + * NFP_CPP_ID() - pack target, token, and action into a CPP ID. + * @target: NFP CPP target id + * @action: NFP CPP action id + * @token: NFP CPP token id + * + * Create a 32-bit CPP identifier representing the access to be made. + * These identifiers are used as parameters to other NFP CPP + * functions. Some CPP devices may allow wildcard identifiers to be + * specified. + * + * Return: NFP CPP ID + */ +#define NFP_CPP_ID(target, action, token) \ + ((((target) & 0x7f) << 24) | (((token) & 0xff) << 16) | \ + (((action) & 0xff) << 8)) + +/** + * NFP_CPP_ISLAND_ID() - pack target, token, action, and island into a CPP ID. + * @target: NFP CPP target id + * @action: NFP CPP action id + * @token: NFP CPP token id + * @island: NFP CPP island id + * + * Create a 32-bit CPP identifier representing the access to be made. + * These identifiers are used as parameters to other NFP CPP + * functions. Some CPP devices may allow wildcard identifiers to be + * specified. + * + * Return: NFP CPP ID + */ +#define NFP_CPP_ISLAND_ID(target, action, token, island) \ + ((((target) & 0x7f) << 24) | (((token) & 0xff) << 16) | \ + (((action) & 0xff) << 8) | (((island) & 0xff) << 0)) + +/** + * NFP_CPP_ID_TARGET_of() - Return the NFP CPP target of a NFP CPP ID + * @id: NFP CPP ID + * + * Return: NFP CPP target + */ +static inline u8 NFP_CPP_ID_TARGET_of(u32 id) +{ + return (id >> 24) & NFP_CPP_TARGET_ID_MASK; +} + +/** + * NFP_CPP_ID_TOKEN_of() - Return the NFP CPP token of a NFP CPP ID + * @id: NFP CPP ID + * Return: NFP CPP token + */ +static inline u8 NFP_CPP_ID_TOKEN_of(u32 id) +{ + return (id >> 16) & 0xff; +} + +/** + * NFP_CPP_ID_ACTION_of() - Return the NFP CPP action of a NFP CPP ID + * @id: NFP CPP ID + * + * Return: NFP CPP action + */ +static inline u8 NFP_CPP_ID_ACTION_of(u32 id) +{ + return (id >> 8) & 0xff; +} + +/** + * NFP_CPP_ID_ISLAND_of() - Return the NFP CPP island of a NFP CPP ID + * @id: NFP CPP ID + * + * Return: NFP CPP island + */ +static inline u8 NFP_CPP_ID_ISLAND_of(u32 id) +{ + return (id >> 0) & 0xff; +} + +/* NFP Interface types - logical interface for this CPP connection + * 4 bits are reserved for interface type. + */ +#define NFP_CPP_INTERFACE_TYPE_INVALID 0x0 +#define NFP_CPP_INTERFACE_TYPE_PCI 0x1 +#define NFP_CPP_INTERFACE_TYPE_ARM 0x2 +#define NFP_CPP_INTERFACE_TYPE_RPC 0x3 +#define NFP_CPP_INTERFACE_TYPE_ILA 0x4 + +/** + * NFP_CPP_INTERFACE() - Construct a 16-bit NFP Interface ID + * @type: NFP Interface Type + * @unit: Unit identifier for the interface type + * @channel: Channel identifier for the interface unit + * + * Interface IDs consists of 4 bits of interface type, + * 4 bits of unit identifier, and 8 bits of channel identifier. + * + * The NFP Interface ID is used in the implementation of + * NFP CPP API mutexes, which use the MU Atomic CompareAndWrite + * operation - hence the limit to 16 bits to be able to + * use the NFP Interface ID as a lock owner. + * + * Return: Interface ID + */ +#define NFP_CPP_INTERFACE(type, unit, channel) \ + ((((type) & 0xf) << 12) | \ + (((unit) & 0xf) << 8) | \ + (((channel) & 0xff) << 0)) + +/** + * NFP_CPP_INTERFACE_TYPE_of() - Get the interface type + * @interface: NFP Interface ID + * Return: NFP Interface ID's type + */ +#define NFP_CPP_INTERFACE_TYPE_of(interface) (((interface) >> 12) & 0xf) + +/** + * NFP_CPP_INTERFACE_UNIT_of() - Get the interface unit + * @interface: NFP Interface ID + * Return: NFP Interface ID's unit + */ +#define NFP_CPP_INTERFACE_UNIT_of(interface) (((interface) >> 8) & 0xf) + +/** + * NFP_CPP_INTERFACE_CHANNEL_of() - Get the interface channel + * @interface: NFP Interface ID + * Return: NFP Interface ID's channel + */ +#define NFP_CPP_INTERFACE_CHANNEL_of(interface) (((interface) >> 0) & 0xff) + +/* Implemented in nfp_cppcore.c */ +void nfp_cpp_free(struct nfp_cpp *cpp); +u32 nfp_cpp_model(struct nfp_cpp *cpp); +u16 nfp_cpp_interface(struct nfp_cpp *cpp); +int nfp_cpp_serial(struct nfp_cpp *cpp, const u8 **serial); +unsigned int nfp_cpp_mu_locality_lsb(struct nfp_cpp *cpp); + +struct nfp_cpp_area *nfp_cpp_area_alloc_with_name(struct nfp_cpp *cpp, + u32 cpp_id, + const char *name, + unsigned long long address, + unsigned long size); +struct nfp_cpp_area *nfp_cpp_area_alloc(struct nfp_cpp *cpp, u32 cpp_id, + unsigned long long address, + unsigned long size); +struct nfp_cpp_area * +nfp_cpp_area_alloc_acquire(struct nfp_cpp *cpp, const char *name, u32 cpp_id, + unsigned long long address, unsigned long size); +void nfp_cpp_area_free(struct nfp_cpp_area *area); +int nfp_cpp_area_acquire(struct nfp_cpp_area *area); +int nfp_cpp_area_acquire_nonblocking(struct nfp_cpp_area *area); +void nfp_cpp_area_release(struct nfp_cpp_area *area); +void nfp_cpp_area_release_free(struct nfp_cpp_area *area); +int nfp_cpp_area_read(struct nfp_cpp_area *area, unsigned long offset, + void *buffer, size_t length); +int nfp_cpp_area_write(struct nfp_cpp_area *area, unsigned long offset, + const void *buffer, size_t length); +size_t nfp_cpp_area_size(struct nfp_cpp_area *area); +const char *nfp_cpp_area_name(struct nfp_cpp_area *cpp_area); +void *nfp_cpp_area_priv(struct nfp_cpp_area *cpp_area); +struct nfp_cpp *nfp_cpp_area_cpp(struct nfp_cpp_area *cpp_area); +struct resource *nfp_cpp_area_resource(struct nfp_cpp_area *area); +phys_addr_t nfp_cpp_area_phys(struct nfp_cpp_area *area); +void __iomem *nfp_cpp_area_iomem(struct nfp_cpp_area *area); + +int nfp_cpp_area_readl(struct nfp_cpp_area *area, unsigned long offset, + u32 *value); +int nfp_cpp_area_writel(struct nfp_cpp_area *area, unsigned long offset, + u32 value); +int nfp_cpp_area_readq(struct nfp_cpp_area *area, unsigned long offset, + u64 *value); +int nfp_cpp_area_writeq(struct nfp_cpp_area *area, unsigned long offset, + u64 value); +int nfp_cpp_area_fill(struct nfp_cpp_area *area, unsigned long offset, + u32 value, size_t length); + +int nfp_xpb_readl(struct nfp_cpp *cpp, u32 xpb_tgt, u32 *value); +int nfp_xpb_writel(struct nfp_cpp *cpp, u32 xpb_tgt, u32 value); +int nfp_xpb_writelm(struct nfp_cpp *cpp, u32 xpb_tgt, u32 mask, u32 value); + +/* Implemented in nfp_cpplib.c */ +int nfp_cpp_read(struct nfp_cpp *cpp, u32 cpp_id, + unsigned long long address, void *kernel_vaddr, size_t length); +int nfp_cpp_write(struct nfp_cpp *cpp, u32 cpp_id, + unsigned long long address, const void *kernel_vaddr, + size_t length); +int nfp_cpp_readl(struct nfp_cpp *cpp, u32 cpp_id, + unsigned long long address, u32 *value); +int nfp_cpp_writel(struct nfp_cpp *cpp, u32 cpp_id, + unsigned long long address, u32 value); +int nfp_cpp_readq(struct nfp_cpp *cpp, u32 cpp_id, + unsigned long long address, u64 *value); +int nfp_cpp_writeq(struct nfp_cpp *cpp, u32 cpp_id, + unsigned long long address, u64 value); + +u8 __iomem * +nfp_cpp_map_area(struct nfp_cpp *cpp, const char *name, u32 cpp_id, u64 addr, + unsigned long size, struct nfp_cpp_area **area); + +struct nfp_cpp_mutex; + +int nfp_cpp_mutex_init(struct nfp_cpp *cpp, int target, + unsigned long long address, u32 key_id); +struct nfp_cpp_mutex *nfp_cpp_mutex_alloc(struct nfp_cpp *cpp, int target, + unsigned long long address, + u32 key_id); +void nfp_cpp_mutex_free(struct nfp_cpp_mutex *mutex); +int nfp_cpp_mutex_lock(struct nfp_cpp_mutex *mutex); +int nfp_cpp_mutex_unlock(struct nfp_cpp_mutex *mutex); +int nfp_cpp_mutex_trylock(struct nfp_cpp_mutex *mutex); +int nfp_cpp_mutex_reclaim(struct nfp_cpp *cpp, int target, + unsigned long long address); + +/** + * nfp_cppcore_pcie_unit() - Get PCI Unit of a CPP handle + * @cpp: CPP handle + * + * Return: PCI unit for the NFP CPP handle + */ +static inline u8 nfp_cppcore_pcie_unit(struct nfp_cpp *cpp) +{ + return NFP_CPP_INTERFACE_UNIT_of(nfp_cpp_interface(cpp)); +} + +struct nfp_cpp_explicit; + +struct nfp_cpp_explicit_command { + u32 cpp_id; + u16 data_ref; + u8 data_master; + u8 len; + u8 byte_mask; + u8 signal_master; + u8 signal_ref; + u8 posted; + u8 siga; + u8 sigb; + s8 siga_mode; + s8 sigb_mode; +}; + +#define NFP_SERIAL_LEN 6 + +/** + * struct nfp_cpp_operations - NFP CPP operations structure + * @area_priv_size: Size of the nfp_cpp_area private data + * @owner: Owner module + * @init: Initialize the NFP CPP bus + * @free: Free the bus + * @read_serial: Read serial number to memory provided + * @get_interface: Return CPP interface + * @area_init: Initialize a new NFP CPP area (not serialized) + * @area_cleanup: Clean up a NFP CPP area (not serialized) + * @area_acquire: Acquire the NFP CPP area (serialized) + * @area_release: Release area (serialized) + * @area_resource: Get resource range of area (not serialized) + * @area_phys: Get physical address of area (not serialized) + * @area_iomem: Get iomem of area (not serialized) + * @area_read: Perform a read from a NFP CPP area (serialized) + * @area_write: Perform a write to a NFP CPP area (serialized) + * @explicit_priv_size: Size of an explicit's private area + * @explicit_acquire: Acquire an explicit area + * @explicit_release: Release an explicit area + * @explicit_put: Write data to send + * @explicit_get: Read data received + * @explicit_do: Perform the transaction + */ +struct nfp_cpp_operations { + size_t area_priv_size; + struct module *owner; + + int (*init)(struct nfp_cpp *cpp); + void (*free)(struct nfp_cpp *cpp); + + int (*read_serial)(struct device *dev, u8 *serial); + int (*get_interface)(struct device *dev); + + int (*area_init)(struct nfp_cpp_area *area, + u32 dest, unsigned long long address, + unsigned long size); + void (*area_cleanup)(struct nfp_cpp_area *area); + int (*area_acquire)(struct nfp_cpp_area *area); + void (*area_release)(struct nfp_cpp_area *area); + struct resource *(*area_resource)(struct nfp_cpp_area *area); + phys_addr_t (*area_phys)(struct nfp_cpp_area *area); + void __iomem *(*area_iomem)(struct nfp_cpp_area *area); + int (*area_read)(struct nfp_cpp_area *area, void *kernel_vaddr, + unsigned long offset, unsigned int length); + int (*area_write)(struct nfp_cpp_area *area, const void *kernel_vaddr, + unsigned long offset, unsigned int length); + + size_t explicit_priv_size; + int (*explicit_acquire)(struct nfp_cpp_explicit *expl); + void (*explicit_release)(struct nfp_cpp_explicit *expl); + int (*explicit_put)(struct nfp_cpp_explicit *expl, + const void *buff, size_t len); + int (*explicit_get)(struct nfp_cpp_explicit *expl, + void *buff, size_t len); + int (*explicit_do)(struct nfp_cpp_explicit *expl, + const struct nfp_cpp_explicit_command *cmd, + u64 address); +}; + +struct nfp_cpp * +nfp_cpp_from_operations(const struct nfp_cpp_operations *ops, + struct device *parent, void *priv); +void *nfp_cpp_priv(struct nfp_cpp *priv); + +int nfp_cpp_area_cache_add(struct nfp_cpp *cpp, size_t size); + +/* The following section contains extensions to the + * NFP CPP API, to be used in a Linux kernel-space context. + */ + +/* Use this channel ID for multiple virtual channel interfaces + * (ie ARM and PCIe) when setting up the interface field. + */ +#define NFP_CPP_INTERFACE_CHANNEL_PEROPENER 255 +struct device *nfp_cpp_device(struct nfp_cpp *cpp); + +/* Return code masks for nfp_cpp_explicit_do() + */ +#define NFP_SIGNAL_MASK_A BIT(0) /* Signal A fired */ +#define NFP_SIGNAL_MASK_B BIT(1) /* Signal B fired */ + +enum nfp_cpp_explicit_signal_mode { + NFP_SIGNAL_NONE = 0, + NFP_SIGNAL_PUSH = 1, + NFP_SIGNAL_PUSH_OPTIONAL = -1, + NFP_SIGNAL_PULL = 2, + NFP_SIGNAL_PULL_OPTIONAL = -2, +}; + +struct nfp_cpp_explicit *nfp_cpp_explicit_acquire(struct nfp_cpp *cpp); +int nfp_cpp_explicit_set_target(struct nfp_cpp_explicit *expl, u32 cpp_id, + u8 len, u8 mask); +int nfp_cpp_explicit_set_data(struct nfp_cpp_explicit *expl, + u8 data_master, u16 data_ref); +int nfp_cpp_explicit_set_signal(struct nfp_cpp_explicit *expl, + u8 signal_master, u8 signal_ref); +int nfp_cpp_explicit_set_posted(struct nfp_cpp_explicit *expl, int posted, + u8 siga, + enum nfp_cpp_explicit_signal_mode siga_mode, + u8 sigb, + enum nfp_cpp_explicit_signal_mode sigb_mode); +int nfp_cpp_explicit_put(struct nfp_cpp_explicit *expl, + const void *buff, size_t len); +int nfp_cpp_explicit_do(struct nfp_cpp_explicit *expl, u64 address); +int nfp_cpp_explicit_get(struct nfp_cpp_explicit *expl, void *buff, size_t len); +void nfp_cpp_explicit_release(struct nfp_cpp_explicit *expl); +struct nfp_cpp *nfp_cpp_explicit_cpp(struct nfp_cpp_explicit *expl); +void *nfp_cpp_explicit_priv(struct nfp_cpp_explicit *cpp_explicit); + +/* Implemented in nfp_cpplib.c */ + +int nfp_cpp_model_autodetect(struct nfp_cpp *cpp, u32 *model); + +int nfp_cpp_explicit_read(struct nfp_cpp *cpp, u32 cpp_id, + u64 addr, void *buff, size_t len, + int width_read); + +int nfp_cpp_explicit_write(struct nfp_cpp *cpp, u32 cpp_id, + u64 addr, const void *buff, size_t len, + int width_write); + +#endif /* !__NFP_CPP_H__ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c new file mode 100644 index 000000000..a8286d003 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c @@ -0,0 +1,1499 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +/* + * nfp_cppcore.c + * Provides low-level access to the NFP's internal CPP bus + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + */ + +#include <asm/unaligned.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/ioport.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/wait.h> + +#include "nfp_arm.h" +#include "nfp_cpp.h" +#include "nfp6000/nfp6000.h" + +#define NFP_ARM_GCSR_SOFTMODEL2 0x0000014c +#define NFP_ARM_GCSR_SOFTMODEL3 0x00000150 + +struct nfp_cpp_resource { + struct list_head list; + const char *name; + u32 cpp_id; + u64 start; + u64 end; +}; + +/** + * struct nfp_cpp - main nfpcore device structure + * Following fields are read-only after probe() exits or netdevs are spawned. + * @dev: embedded device structure + * @op: low-level implementation ops + * @priv: private data of the low-level implementation + * @model: chip model + * @interface: chip interface id we are using to reach it + * @serial: chip serial number + * @imb_cat_table: CPP Mapping Table + * @mu_locality_lsb: MU access type bit offset + * + * Following fields use explicit locking: + * @resource_list: NFP CPP resource list + * @resource_lock: protects @resource_list + * + * @area_cache_list: cached areas for cpp/xpb read/write speed up + * @area_cache_mutex: protects @area_cache_list + * + * @waitq: area wait queue + */ +struct nfp_cpp { + struct device dev; + + void *priv; + + u32 model; + u16 interface; + u8 serial[NFP_SERIAL_LEN]; + + const struct nfp_cpp_operations *op; + struct list_head resource_list; + rwlock_t resource_lock; + wait_queue_head_t waitq; + + u32 imb_cat_table[16]; + unsigned int mu_locality_lsb; + + struct mutex area_cache_mutex; + struct list_head area_cache_list; +}; + +/* Element of the area_cache_list */ +struct nfp_cpp_area_cache { + struct list_head entry; + u32 id; + u64 addr; + u32 size; + struct nfp_cpp_area *area; +}; + +struct nfp_cpp_area { + struct nfp_cpp *cpp; + struct kref kref; + atomic_t refcount; + struct mutex mutex; /* Lock for the area's refcount */ + unsigned long long offset; + unsigned long size; + struct nfp_cpp_resource resource; + void __iomem *iomem; + /* Here follows the 'priv' part of nfp_cpp_area. */ +}; + +struct nfp_cpp_explicit { + struct nfp_cpp *cpp; + struct nfp_cpp_explicit_command cmd; + /* Here follows the 'priv' part of nfp_cpp_area. */ +}; + +static void __resource_add(struct list_head *head, struct nfp_cpp_resource *res) +{ + struct nfp_cpp_resource *tmp; + struct list_head *pos; + + list_for_each(pos, head) { + tmp = container_of(pos, struct nfp_cpp_resource, list); + + if (tmp->cpp_id > res->cpp_id) + break; + + if (tmp->cpp_id == res->cpp_id && tmp->start > res->start) + break; + } + + list_add_tail(&res->list, pos); +} + +static void __resource_del(struct nfp_cpp_resource *res) +{ + list_del_init(&res->list); +} + +static void __release_cpp_area(struct kref *kref) +{ + struct nfp_cpp_area *area = + container_of(kref, struct nfp_cpp_area, kref); + struct nfp_cpp *cpp = nfp_cpp_area_cpp(area); + + if (area->cpp->op->area_cleanup) + area->cpp->op->area_cleanup(area); + + write_lock(&cpp->resource_lock); + __resource_del(&area->resource); + write_unlock(&cpp->resource_lock); + kfree(area); +} + +static void nfp_cpp_area_put(struct nfp_cpp_area *area) +{ + kref_put(&area->kref, __release_cpp_area); +} + +static struct nfp_cpp_area *nfp_cpp_area_get(struct nfp_cpp_area *area) +{ + kref_get(&area->kref); + + return area; +} + +/** + * nfp_cpp_free() - free the CPP handle + * @cpp: CPP handle + */ +void nfp_cpp_free(struct nfp_cpp *cpp) +{ + struct nfp_cpp_area_cache *cache, *ctmp; + struct nfp_cpp_resource *res, *rtmp; + + /* Remove all caches */ + list_for_each_entry_safe(cache, ctmp, &cpp->area_cache_list, entry) { + list_del(&cache->entry); + if (cache->id) + nfp_cpp_area_release(cache->area); + nfp_cpp_area_free(cache->area); + kfree(cache); + } + + /* There should be no dangling areas at this point */ + WARN_ON(!list_empty(&cpp->resource_list)); + + /* .. but if they weren't, try to clean up. */ + list_for_each_entry_safe(res, rtmp, &cpp->resource_list, list) { + struct nfp_cpp_area *area = container_of(res, + struct nfp_cpp_area, + resource); + + dev_err(cpp->dev.parent, "Dangling area: %d:%d:%d:0x%0llx-0x%0llx%s%s\n", + NFP_CPP_ID_TARGET_of(res->cpp_id), + NFP_CPP_ID_ACTION_of(res->cpp_id), + NFP_CPP_ID_TOKEN_of(res->cpp_id), + res->start, res->end, + res->name ? " " : "", + res->name ? res->name : ""); + + if (area->cpp->op->area_release) + area->cpp->op->area_release(area); + + __release_cpp_area(&area->kref); + } + + if (cpp->op->free) + cpp->op->free(cpp); + + device_unregister(&cpp->dev); + + kfree(cpp); +} + +/** + * nfp_cpp_model() - Retrieve the Model ID of the NFP + * @cpp: NFP CPP handle + * + * Return: NFP CPP Model ID + */ +u32 nfp_cpp_model(struct nfp_cpp *cpp) +{ + return cpp->model; +} + +/** + * nfp_cpp_interface() - Retrieve the Interface ID of the NFP + * @cpp: NFP CPP handle + * + * Return: NFP CPP Interface ID + */ +u16 nfp_cpp_interface(struct nfp_cpp *cpp) +{ + return cpp->interface; +} + +/** + * nfp_cpp_serial() - Retrieve the Serial ID of the NFP + * @cpp: NFP CPP handle + * @serial: Pointer to NFP serial number + * + * Return: Length of NFP serial number + */ +int nfp_cpp_serial(struct nfp_cpp *cpp, const u8 **serial) +{ + *serial = &cpp->serial[0]; + return sizeof(cpp->serial); +} + +#define NFP_IMB_TGTADDRESSMODECFG_MODE_of(_x) (((_x) >> 13) & 0x7) +#define NFP_IMB_TGTADDRESSMODECFG_ADDRMODE BIT(12) +#define NFP_IMB_TGTADDRESSMODECFG_ADDRMODE_32_BIT 0 +#define NFP_IMB_TGTADDRESSMODECFG_ADDRMODE_40_BIT BIT(12) + +static int nfp_cpp_set_mu_locality_lsb(struct nfp_cpp *cpp) +{ + unsigned int mode, addr40; + u32 imbcppat; + int res; + + imbcppat = cpp->imb_cat_table[NFP_CPP_TARGET_MU]; + mode = NFP_IMB_TGTADDRESSMODECFG_MODE_of(imbcppat); + addr40 = !!(imbcppat & NFP_IMB_TGTADDRESSMODECFG_ADDRMODE); + + res = nfp_cppat_mu_locality_lsb(mode, addr40); + if (res < 0) + return res; + cpp->mu_locality_lsb = res; + + return 0; +} + +unsigned int nfp_cpp_mu_locality_lsb(struct nfp_cpp *cpp) +{ + return cpp->mu_locality_lsb; +} + +/** + * nfp_cpp_area_alloc_with_name() - allocate a new CPP area + * @cpp: CPP device handle + * @dest: NFP CPP ID + * @name: Name of region + * @address: Address of region + * @size: Size of region + * + * Allocate and initialize a CPP area structure. The area must later + * be locked down with an 'acquire' before it can be safely accessed. + * + * NOTE: @address and @size must be 32-bit aligned values. + * + * Return: NFP CPP area handle, or NULL + */ +struct nfp_cpp_area * +nfp_cpp_area_alloc_with_name(struct nfp_cpp *cpp, u32 dest, const char *name, + unsigned long long address, unsigned long size) +{ + struct nfp_cpp_area *area; + u64 tmp64 = address; + int err, name_len; + + /* Remap from cpp_island to cpp_target */ + err = nfp_target_cpp(dest, tmp64, &dest, &tmp64, cpp->imb_cat_table); + if (err < 0) + return NULL; + + address = tmp64; + + if (!name) + name = "(reserved)"; + + name_len = strlen(name) + 1; + area = kzalloc(sizeof(*area) + cpp->op->area_priv_size + name_len, + GFP_KERNEL); + if (!area) + return NULL; + + area->cpp = cpp; + area->resource.name = (void *)area + sizeof(*area) + + cpp->op->area_priv_size; + memcpy((char *)area->resource.name, name, name_len); + + area->resource.cpp_id = dest; + area->resource.start = address; + area->resource.end = area->resource.start + size - 1; + INIT_LIST_HEAD(&area->resource.list); + + atomic_set(&area->refcount, 0); + kref_init(&area->kref); + mutex_init(&area->mutex); + + if (cpp->op->area_init) { + int err; + + err = cpp->op->area_init(area, dest, address, size); + if (err < 0) { + kfree(area); + return NULL; + } + } + + write_lock(&cpp->resource_lock); + __resource_add(&cpp->resource_list, &area->resource); + write_unlock(&cpp->resource_lock); + + area->offset = address; + area->size = size; + + return area; +} + +/** + * nfp_cpp_area_alloc() - allocate a new CPP area + * @cpp: CPP handle + * @dest: CPP id + * @address: Start address on CPP target + * @size: Size of area in bytes + * + * Allocate and initialize a CPP area structure. The area must later + * be locked down with an 'acquire' before it can be safely accessed. + * + * NOTE: @address and @size must be 32-bit aligned values. + * + * Return: NFP CPP Area handle, or NULL + */ +struct nfp_cpp_area * +nfp_cpp_area_alloc(struct nfp_cpp *cpp, u32 dest, + unsigned long long address, unsigned long size) +{ + return nfp_cpp_area_alloc_with_name(cpp, dest, NULL, address, size); +} + +/** + * nfp_cpp_area_alloc_acquire() - allocate a new CPP area and lock it down + * @cpp: CPP handle + * @name: Name of region + * @dest: CPP id + * @address: Start address on CPP target + * @size: Size of area + * + * Allocate and initialize a CPP area structure, and lock it down so + * that it can be accessed directly. + * + * NOTE: @address and @size must be 32-bit aligned values. + * The area must also be 'released' when the structure is freed. + * + * Return: NFP CPP Area handle, or NULL + */ +struct nfp_cpp_area * +nfp_cpp_area_alloc_acquire(struct nfp_cpp *cpp, const char *name, u32 dest, + unsigned long long address, unsigned long size) +{ + struct nfp_cpp_area *area; + + area = nfp_cpp_area_alloc_with_name(cpp, dest, name, address, size); + if (!area) + return NULL; + + if (nfp_cpp_area_acquire(area)) { + nfp_cpp_area_free(area); + return NULL; + } + + return area; +} + +/** + * nfp_cpp_area_free() - free up the CPP area + * @area: CPP area handle + * + * Frees up memory resources held by the CPP area. + */ +void nfp_cpp_area_free(struct nfp_cpp_area *area) +{ + if (atomic_read(&area->refcount)) + nfp_warn(area->cpp, "Warning: freeing busy area\n"); + nfp_cpp_area_put(area); +} + +static bool nfp_cpp_area_acquire_try(struct nfp_cpp_area *area, int *status) +{ + *status = area->cpp->op->area_acquire(area); + + return *status != -EAGAIN; +} + +static int __nfp_cpp_area_acquire(struct nfp_cpp_area *area) +{ + int err, status; + + if (atomic_inc_return(&area->refcount) > 1) + return 0; + + if (!area->cpp->op->area_acquire) + return 0; + + err = wait_event_interruptible(area->cpp->waitq, + nfp_cpp_area_acquire_try(area, &status)); + if (!err) + err = status; + if (err) { + nfp_warn(area->cpp, "Warning: area wait failed: %d\n", err); + atomic_dec(&area->refcount); + return err; + } + + nfp_cpp_area_get(area); + + return 0; +} + +/** + * nfp_cpp_area_acquire() - lock down a CPP area for access + * @area: CPP area handle + * + * Locks down the CPP area for a potential long term activity. Area + * must always be locked down before being accessed. + * + * Return: 0, or -ERRNO + */ +int nfp_cpp_area_acquire(struct nfp_cpp_area *area) +{ + int ret; + + mutex_lock(&area->mutex); + ret = __nfp_cpp_area_acquire(area); + mutex_unlock(&area->mutex); + + return ret; +} + +/** + * nfp_cpp_area_acquire_nonblocking() - lock down a CPP area for access + * @area: CPP area handle + * + * Locks down the CPP area for a potential long term activity. Area + * must always be locked down before being accessed. + * + * NOTE: Returns -EAGAIN is no area is available + * + * Return: 0, or -ERRNO + */ +int nfp_cpp_area_acquire_nonblocking(struct nfp_cpp_area *area) +{ + mutex_lock(&area->mutex); + if (atomic_inc_return(&area->refcount) == 1) { + if (area->cpp->op->area_acquire) { + int err; + + err = area->cpp->op->area_acquire(area); + if (err < 0) { + atomic_dec(&area->refcount); + mutex_unlock(&area->mutex); + return err; + } + } + } + mutex_unlock(&area->mutex); + + nfp_cpp_area_get(area); + return 0; +} + +/** + * nfp_cpp_area_release() - release a locked down CPP area + * @area: CPP area handle + * + * Releases a previously locked down CPP area. + */ +void nfp_cpp_area_release(struct nfp_cpp_area *area) +{ + mutex_lock(&area->mutex); + /* Only call the release on refcount == 0 */ + if (atomic_dec_and_test(&area->refcount)) { + if (area->cpp->op->area_release) { + area->cpp->op->area_release(area); + /* Let anyone waiting for a BAR try to get one.. */ + wake_up_interruptible_all(&area->cpp->waitq); + } + } + mutex_unlock(&area->mutex); + + nfp_cpp_area_put(area); +} + +/** + * nfp_cpp_area_release_free() - release CPP area and free it + * @area: CPP area handle + * + * Releases CPP area and frees up memory resources held by the it. + */ +void nfp_cpp_area_release_free(struct nfp_cpp_area *area) +{ + nfp_cpp_area_release(area); + nfp_cpp_area_free(area); +} + +/** + * nfp_cpp_area_read() - read data from CPP area + * @area: CPP area handle + * @offset: offset into CPP area + * @kernel_vaddr: kernel address to put data into + * @length: number of bytes to read + * + * Read data from indicated CPP region. + * + * NOTE: @offset and @length must be 32-bit aligned values. + * Area must have been locked down with an 'acquire'. + * + * Return: length of io, or -ERRNO + */ +int nfp_cpp_area_read(struct nfp_cpp_area *area, + unsigned long offset, void *kernel_vaddr, + size_t length) +{ + return area->cpp->op->area_read(area, kernel_vaddr, offset, length); +} + +/** + * nfp_cpp_area_write() - write data to CPP area + * @area: CPP area handle + * @offset: offset into CPP area + * @kernel_vaddr: kernel address to read data from + * @length: number of bytes to write + * + * Write data to indicated CPP region. + * + * NOTE: @offset and @length must be 32-bit aligned values. + * Area must have been locked down with an 'acquire'. + * + * Return: length of io, or -ERRNO + */ +int nfp_cpp_area_write(struct nfp_cpp_area *area, + unsigned long offset, const void *kernel_vaddr, + size_t length) +{ + return area->cpp->op->area_write(area, kernel_vaddr, offset, length); +} + +/** + * nfp_cpp_area_size() - return size of a CPP area + * @cpp_area: CPP area handle + * + * Return: Size of the area + */ +size_t nfp_cpp_area_size(struct nfp_cpp_area *cpp_area) +{ + return cpp_area->size; +} + +/** + * nfp_cpp_area_name() - return name of a CPP area + * @cpp_area: CPP area handle + * + * Return: Name of the area, or NULL + */ +const char *nfp_cpp_area_name(struct nfp_cpp_area *cpp_area) +{ + return cpp_area->resource.name; +} + +/** + * nfp_cpp_area_priv() - return private struct for CPP area + * @cpp_area: CPP area handle + * + * Return: Private data for the CPP area + */ +void *nfp_cpp_area_priv(struct nfp_cpp_area *cpp_area) +{ + return &cpp_area[1]; +} + +/** + * nfp_cpp_area_cpp() - return CPP handle for CPP area + * @cpp_area: CPP area handle + * + * Return: NFP CPP handle + */ +struct nfp_cpp *nfp_cpp_area_cpp(struct nfp_cpp_area *cpp_area) +{ + return cpp_area->cpp; +} + +/** + * nfp_cpp_area_resource() - get resource + * @area: CPP area handle + * + * NOTE: Area must have been locked down with an 'acquire'. + * + * Return: struct resource pointer, or NULL + */ +struct resource *nfp_cpp_area_resource(struct nfp_cpp_area *area) +{ + struct resource *res = NULL; + + if (area->cpp->op->area_resource) + res = area->cpp->op->area_resource(area); + + return res; +} + +/** + * nfp_cpp_area_phys() - get physical address of CPP area + * @area: CPP area handle + * + * NOTE: Area must have been locked down with an 'acquire'. + * + * Return: phy_addr_t of the area, or NULL + */ +phys_addr_t nfp_cpp_area_phys(struct nfp_cpp_area *area) +{ + phys_addr_t addr = ~0; + + if (area->cpp->op->area_phys) + addr = area->cpp->op->area_phys(area); + + return addr; +} + +/** + * nfp_cpp_area_iomem() - get IOMEM region for CPP area + * @area: CPP area handle + * + * Returns an iomem pointer for use with readl()/writel() style + * operations. + * + * NOTE: Area must have been locked down with an 'acquire'. + * + * Return: __iomem pointer to the area, or NULL + */ +void __iomem *nfp_cpp_area_iomem(struct nfp_cpp_area *area) +{ + void __iomem *iomem = NULL; + + if (area->cpp->op->area_iomem) + iomem = area->cpp->op->area_iomem(area); + + return iomem; +} + +/** + * nfp_cpp_area_readl() - Read a u32 word from an area + * @area: CPP Area handle + * @offset: Offset into area + * @value: Pointer to read buffer + * + * Return: 0 on success, or -ERRNO + */ +int nfp_cpp_area_readl(struct nfp_cpp_area *area, + unsigned long offset, u32 *value) +{ + u8 tmp[4]; + int n; + + n = nfp_cpp_area_read(area, offset, &tmp, sizeof(tmp)); + if (n != sizeof(tmp)) + return n < 0 ? n : -EIO; + + *value = get_unaligned_le32(tmp); + return 0; +} + +/** + * nfp_cpp_area_writel() - Write a u32 word to an area + * @area: CPP Area handle + * @offset: Offset into area + * @value: Value to write + * + * Return: 0 on success, or -ERRNO + */ +int nfp_cpp_area_writel(struct nfp_cpp_area *area, + unsigned long offset, u32 value) +{ + u8 tmp[4]; + int n; + + put_unaligned_le32(value, tmp); + n = nfp_cpp_area_write(area, offset, &tmp, sizeof(tmp)); + + return n == sizeof(tmp) ? 0 : n < 0 ? n : -EIO; +} + +/** + * nfp_cpp_area_readq() - Read a u64 word from an area + * @area: CPP Area handle + * @offset: Offset into area + * @value: Pointer to read buffer + * + * Return: 0 on success, or -ERRNO + */ +int nfp_cpp_area_readq(struct nfp_cpp_area *area, + unsigned long offset, u64 *value) +{ + u8 tmp[8]; + int n; + + n = nfp_cpp_area_read(area, offset, &tmp, sizeof(tmp)); + if (n != sizeof(tmp)) + return n < 0 ? n : -EIO; + + *value = get_unaligned_le64(tmp); + return 0; +} + +/** + * nfp_cpp_area_writeq() - Write a u64 word to an area + * @area: CPP Area handle + * @offset: Offset into area + * @value: Value to write + * + * Return: 0 on success, or -ERRNO + */ +int nfp_cpp_area_writeq(struct nfp_cpp_area *area, + unsigned long offset, u64 value) +{ + u8 tmp[8]; + int n; + + put_unaligned_le64(value, tmp); + n = nfp_cpp_area_write(area, offset, &tmp, sizeof(tmp)); + + return n == sizeof(tmp) ? 0 : n < 0 ? n : -EIO; +} + +/** + * nfp_cpp_area_fill() - fill a CPP area with a value + * @area: CPP area + * @offset: offset into CPP area + * @value: value to fill with + * @length: length of area to fill + * + * Fill indicated area with given value. + * + * Return: length of io, or -ERRNO + */ +int nfp_cpp_area_fill(struct nfp_cpp_area *area, + unsigned long offset, u32 value, size_t length) +{ + u8 tmp[4]; + size_t i; + int k; + + put_unaligned_le32(value, tmp); + + if (offset % sizeof(tmp) || length % sizeof(tmp)) + return -EINVAL; + + for (i = 0; i < length; i += sizeof(tmp)) { + k = nfp_cpp_area_write(area, offset + i, &tmp, sizeof(tmp)); + if (k < 0) + return k; + } + + return i; +} + +/** + * nfp_cpp_area_cache_add() - Permanently reserve and area for the hot cache + * @cpp: NFP CPP handle + * @size: Size of the area - MUST BE A POWER OF 2. + */ +int nfp_cpp_area_cache_add(struct nfp_cpp *cpp, size_t size) +{ + struct nfp_cpp_area_cache *cache; + struct nfp_cpp_area *area; + + /* Allocate an area - we use the MU target's base as a placeholder, + * as all supported chips have a MU. + */ + area = nfp_cpp_area_alloc(cpp, NFP_CPP_ID(7, NFP_CPP_ACTION_RW, 0), + 0, size); + if (!area) + return -ENOMEM; + + cache = kzalloc(sizeof(*cache), GFP_KERNEL); + if (!cache) { + nfp_cpp_area_free(area); + return -ENOMEM; + } + + cache->id = 0; + cache->addr = 0; + cache->size = size; + cache->area = area; + mutex_lock(&cpp->area_cache_mutex); + list_add_tail(&cache->entry, &cpp->area_cache_list); + mutex_unlock(&cpp->area_cache_mutex); + + return 0; +} + +static struct nfp_cpp_area_cache * +area_cache_get(struct nfp_cpp *cpp, u32 id, + u64 addr, unsigned long *offset, size_t length) +{ + struct nfp_cpp_area_cache *cache; + int err; + + /* Early exit when length == 0, which prevents + * the need for special case code below when + * checking against available cache size. + */ + if (length == 0 || id == 0) + return NULL; + + /* Remap from cpp_island to cpp_target */ + err = nfp_target_cpp(id, addr, &id, &addr, cpp->imb_cat_table); + if (err < 0) + return NULL; + + mutex_lock(&cpp->area_cache_mutex); + + if (list_empty(&cpp->area_cache_list)) { + mutex_unlock(&cpp->area_cache_mutex); + return NULL; + } + + addr += *offset; + + /* See if we have a match */ + list_for_each_entry(cache, &cpp->area_cache_list, entry) { + if (id == cache->id && + addr >= cache->addr && + addr + length <= cache->addr + cache->size) + goto exit; + } + + /* No matches - inspect the tail of the LRU */ + cache = list_entry(cpp->area_cache_list.prev, + struct nfp_cpp_area_cache, entry); + + /* Can we fit in the cache entry? */ + if (round_down(addr + length - 1, cache->size) != + round_down(addr, cache->size)) { + mutex_unlock(&cpp->area_cache_mutex); + return NULL; + } + + /* If id != 0, we will need to release it */ + if (cache->id) { + nfp_cpp_area_release(cache->area); + cache->id = 0; + cache->addr = 0; + } + + /* Adjust the start address to be cache size aligned */ + cache->addr = addr & ~(u64)(cache->size - 1); + + /* Re-init to the new ID and address */ + if (cpp->op->area_init) { + err = cpp->op->area_init(cache->area, + id, cache->addr, cache->size); + if (err < 0) { + mutex_unlock(&cpp->area_cache_mutex); + return NULL; + } + } + + /* Attempt to acquire */ + err = nfp_cpp_area_acquire(cache->area); + if (err < 0) { + mutex_unlock(&cpp->area_cache_mutex); + return NULL; + } + + cache->id = id; + +exit: + /* Adjust offset */ + *offset = addr - cache->addr; + return cache; +} + +static void +area_cache_put(struct nfp_cpp *cpp, struct nfp_cpp_area_cache *cache) +{ + if (!cache) + return; + + /* Move to front of LRU */ + list_move(&cache->entry, &cpp->area_cache_list); + + mutex_unlock(&cpp->area_cache_mutex); +} + +static int __nfp_cpp_read(struct nfp_cpp *cpp, u32 destination, + unsigned long long address, void *kernel_vaddr, + size_t length) +{ + struct nfp_cpp_area_cache *cache; + struct nfp_cpp_area *area; + unsigned long offset = 0; + int err; + + cache = area_cache_get(cpp, destination, address, &offset, length); + if (cache) { + area = cache->area; + } else { + area = nfp_cpp_area_alloc(cpp, destination, address, length); + if (!area) + return -ENOMEM; + + err = nfp_cpp_area_acquire(area); + if (err) { + nfp_cpp_area_free(area); + return err; + } + } + + err = nfp_cpp_area_read(area, offset, kernel_vaddr, length); + + if (cache) + area_cache_put(cpp, cache); + else + nfp_cpp_area_release_free(area); + + return err; +} + +/** + * nfp_cpp_read() - read from CPP target + * @cpp: CPP handle + * @destination: CPP id + * @address: offset into CPP target + * @kernel_vaddr: kernel buffer for result + * @length: number of bytes to read + * + * Return: length of io, or -ERRNO + */ +int nfp_cpp_read(struct nfp_cpp *cpp, u32 destination, + unsigned long long address, void *kernel_vaddr, + size_t length) +{ + size_t n, offset; + int ret; + + for (offset = 0; offset < length; offset += n) { + unsigned long long r_addr = address + offset; + + /* make first read smaller to align to safe window */ + n = min_t(size_t, length - offset, + ALIGN(r_addr + 1, NFP_CPP_SAFE_AREA_SIZE) - r_addr); + + ret = __nfp_cpp_read(cpp, destination, address + offset, + kernel_vaddr + offset, n); + if (ret < 0) + return ret; + if (ret != n) + return offset + n; + } + + return length; +} + +static int __nfp_cpp_write(struct nfp_cpp *cpp, u32 destination, + unsigned long long address, + const void *kernel_vaddr, size_t length) +{ + struct nfp_cpp_area_cache *cache; + struct nfp_cpp_area *area; + unsigned long offset = 0; + int err; + + cache = area_cache_get(cpp, destination, address, &offset, length); + if (cache) { + area = cache->area; + } else { + area = nfp_cpp_area_alloc(cpp, destination, address, length); + if (!area) + return -ENOMEM; + + err = nfp_cpp_area_acquire(area); + if (err) { + nfp_cpp_area_free(area); + return err; + } + } + + err = nfp_cpp_area_write(area, offset, kernel_vaddr, length); + + if (cache) + area_cache_put(cpp, cache); + else + nfp_cpp_area_release_free(area); + + return err; +} + +/** + * nfp_cpp_write() - write to CPP target + * @cpp: CPP handle + * @destination: CPP id + * @address: offset into CPP target + * @kernel_vaddr: kernel buffer to read from + * @length: number of bytes to write + * + * Return: length of io, or -ERRNO + */ +int nfp_cpp_write(struct nfp_cpp *cpp, u32 destination, + unsigned long long address, + const void *kernel_vaddr, size_t length) +{ + size_t n, offset; + int ret; + + for (offset = 0; offset < length; offset += n) { + unsigned long long w_addr = address + offset; + + /* make first write smaller to align to safe window */ + n = min_t(size_t, length - offset, + ALIGN(w_addr + 1, NFP_CPP_SAFE_AREA_SIZE) - w_addr); + + ret = __nfp_cpp_write(cpp, destination, address + offset, + kernel_vaddr + offset, n); + if (ret < 0) + return ret; + if (ret != n) + return offset + n; + } + + return length; +} + +/* Return the correct CPP address, and fixup xpb_addr as needed. */ +static u32 nfp_xpb_to_cpp(struct nfp_cpp *cpp, u32 *xpb_addr) +{ + int island; + u32 xpb; + + xpb = NFP_CPP_ID(14, NFP_CPP_ACTION_RW, 0); + /* Ensure that non-local XPB accesses go + * out through the global XPBM bus. + */ + island = (*xpb_addr >> 24) & 0x3f; + if (!island) + return xpb; + + if (island != 1) { + *xpb_addr |= 1 << 30; + return xpb; + } + + /* Accesses to the ARM Island overlay uses Island 0 / Global Bit */ + *xpb_addr &= ~0x7f000000; + if (*xpb_addr < 0x60000) { + *xpb_addr |= 1 << 30; + } else { + /* And only non-ARM interfaces use the island id = 1 */ + if (NFP_CPP_INTERFACE_TYPE_of(nfp_cpp_interface(cpp)) + != NFP_CPP_INTERFACE_TYPE_ARM) + *xpb_addr |= 1 << 24; + } + + return xpb; +} + +/** + * nfp_xpb_readl() - Read a u32 word from a XPB location + * @cpp: CPP device handle + * @xpb_addr: Address for operation + * @value: Pointer to read buffer + * + * Return: 0 on success, or -ERRNO + */ +int nfp_xpb_readl(struct nfp_cpp *cpp, u32 xpb_addr, u32 *value) +{ + u32 cpp_dest = nfp_xpb_to_cpp(cpp, &xpb_addr); + + return nfp_cpp_readl(cpp, cpp_dest, xpb_addr, value); +} + +/** + * nfp_xpb_writel() - Write a u32 word to a XPB location + * @cpp: CPP device handle + * @xpb_addr: Address for operation + * @value: Value to write + * + * Return: 0 on success, or -ERRNO + */ +int nfp_xpb_writel(struct nfp_cpp *cpp, u32 xpb_addr, u32 value) +{ + u32 cpp_dest = nfp_xpb_to_cpp(cpp, &xpb_addr); + + return nfp_cpp_writel(cpp, cpp_dest, xpb_addr, value); +} + +/** + * nfp_xpb_writelm() - Modify bits of a 32-bit value from the XPB bus + * @cpp: NFP CPP device handle + * @xpb_tgt: XPB target and address + * @mask: mask of bits to alter + * @value: value to modify + * + * KERNEL: This operation is safe to call in interrupt or softirq context. + * + * Return: 0 on success, or -ERRNO + */ +int nfp_xpb_writelm(struct nfp_cpp *cpp, u32 xpb_tgt, + u32 mask, u32 value) +{ + int err; + u32 tmp; + + err = nfp_xpb_readl(cpp, xpb_tgt, &tmp); + if (err < 0) + return err; + + tmp &= ~mask; + tmp |= mask & value; + return nfp_xpb_writel(cpp, xpb_tgt, tmp); +} + +/* Lockdep markers */ +static struct lock_class_key nfp_cpp_resource_lock_key; + +static void nfp_cpp_dev_release(struct device *dev) +{ + /* Nothing to do here - it just makes the kernel happy */ +} + +/** + * nfp_cpp_from_operations() - Create a NFP CPP handle + * from an operations structure + * @ops: NFP CPP operations structure + * @parent: Parent device + * @priv: Private data of low-level implementation + * + * NOTE: On failure, cpp_ops->free will be called! + * + * Return: NFP CPP handle on success, ERR_PTR on failure + */ +struct nfp_cpp * +nfp_cpp_from_operations(const struct nfp_cpp_operations *ops, + struct device *parent, void *priv) +{ + const u32 arm = NFP_CPP_ID(NFP_CPP_TARGET_ARM, NFP_CPP_ACTION_RW, 0); + struct nfp_cpp *cpp; + int ifc, err; + u32 mask[2]; + u32 xpbaddr; + size_t tgt; + + cpp = kzalloc(sizeof(*cpp), GFP_KERNEL); + if (!cpp) { + err = -ENOMEM; + goto err_malloc; + } + + cpp->op = ops; + cpp->priv = priv; + + ifc = ops->get_interface(parent); + if (ifc < 0) { + err = ifc; + goto err_free_cpp; + } + cpp->interface = ifc; + if (ops->read_serial) { + err = ops->read_serial(parent, cpp->serial); + if (err) + goto err_free_cpp; + } + + rwlock_init(&cpp->resource_lock); + init_waitqueue_head(&cpp->waitq); + lockdep_set_class(&cpp->resource_lock, &nfp_cpp_resource_lock_key); + INIT_LIST_HEAD(&cpp->resource_list); + INIT_LIST_HEAD(&cpp->area_cache_list); + mutex_init(&cpp->area_cache_mutex); + cpp->dev.init_name = "cpp"; + cpp->dev.parent = parent; + cpp->dev.release = nfp_cpp_dev_release; + err = device_register(&cpp->dev); + if (err < 0) { + put_device(&cpp->dev); + goto err_free_cpp; + } + + dev_set_drvdata(&cpp->dev, cpp); + + /* NOTE: cpp_lock is NOT locked for op->init, + * since it may call NFP CPP API operations + */ + if (cpp->op->init) { + err = cpp->op->init(cpp); + if (err < 0) { + dev_err(parent, + "NFP interface initialization failed\n"); + goto err_out; + } + } + + err = nfp_cpp_model_autodetect(cpp, &cpp->model); + if (err < 0) { + dev_err(parent, "NFP model detection failed\n"); + goto err_out; + } + + for (tgt = 0; tgt < ARRAY_SIZE(cpp->imb_cat_table); tgt++) { + /* Hardcoded XPB IMB Base, island 0 */ + xpbaddr = 0x000a0000 + (tgt * 4); + err = nfp_xpb_readl(cpp, xpbaddr, + &cpp->imb_cat_table[tgt]); + if (err < 0) { + dev_err(parent, + "Can't read CPP mapping from device\n"); + goto err_out; + } + } + + nfp_cpp_readl(cpp, arm, NFP_ARM_GCSR + NFP_ARM_GCSR_SOFTMODEL2, + &mask[0]); + nfp_cpp_readl(cpp, arm, NFP_ARM_GCSR + NFP_ARM_GCSR_SOFTMODEL3, + &mask[1]); + + err = nfp_cpp_set_mu_locality_lsb(cpp); + if (err < 0) { + dev_err(parent, "Can't calculate MU locality bit offset\n"); + goto err_out; + } + + dev_info(cpp->dev.parent, "Model: 0x%08x, SN: %pM, Ifc: 0x%04x\n", + nfp_cpp_model(cpp), cpp->serial, nfp_cpp_interface(cpp)); + + return cpp; + +err_out: + device_unregister(&cpp->dev); +err_free_cpp: + kfree(cpp); +err_malloc: + return ERR_PTR(err); +} + +/** + * nfp_cpp_priv() - Get the operations private data of a CPP handle + * @cpp: CPP handle + * + * Return: Private data for the NFP CPP handle + */ +void *nfp_cpp_priv(struct nfp_cpp *cpp) +{ + return cpp->priv; +} + +/** + * nfp_cpp_device() - Get the Linux device handle of a CPP handle + * @cpp: CPP handle + * + * Return: Device for the NFP CPP bus + */ +struct device *nfp_cpp_device(struct nfp_cpp *cpp) +{ + return &cpp->dev; +} + +#define NFP_EXPL_OP(func, expl, args...) \ + ({ \ + struct nfp_cpp *cpp = nfp_cpp_explicit_cpp(expl); \ + int err = -ENODEV; \ + \ + if (cpp->op->func) \ + err = cpp->op->func(expl, ##args); \ + err; \ + }) + +#define NFP_EXPL_OP_NR(func, expl, args...) \ + ({ \ + struct nfp_cpp *cpp = nfp_cpp_explicit_cpp(expl); \ + \ + if (cpp->op->func) \ + cpp->op->func(expl, ##args); \ + \ + }) + +/** + * nfp_cpp_explicit_acquire() - Acquire explicit access handle + * @cpp: NFP CPP handle + * + * The 'data_ref' and 'signal_ref' values are useful when + * constructing the NFP_EXPL_CSR1 and NFP_EXPL_POST values. + * + * Return: NFP CPP explicit handle + */ +struct nfp_cpp_explicit *nfp_cpp_explicit_acquire(struct nfp_cpp *cpp) +{ + struct nfp_cpp_explicit *expl; + int err; + + expl = kzalloc(sizeof(*expl) + cpp->op->explicit_priv_size, GFP_KERNEL); + if (!expl) + return NULL; + + expl->cpp = cpp; + err = NFP_EXPL_OP(explicit_acquire, expl); + if (err < 0) { + kfree(expl); + return NULL; + } + + return expl; +} + +/** + * nfp_cpp_explicit_set_target() - Set target fields for explicit + * @expl: Explicit handle + * @cpp_id: CPP ID field + * @len: CPP Length field + * @mask: CPP Mask field + * + * Return: 0, or -ERRNO + */ +int nfp_cpp_explicit_set_target(struct nfp_cpp_explicit *expl, + u32 cpp_id, u8 len, u8 mask) +{ + expl->cmd.cpp_id = cpp_id; + expl->cmd.len = len; + expl->cmd.byte_mask = mask; + + return 0; +} + +/** + * nfp_cpp_explicit_set_data() - Set data fields for explicit + * @expl: Explicit handle + * @data_master: CPP Data Master field + * @data_ref: CPP Data Ref field + * + * Return: 0, or -ERRNO + */ +int nfp_cpp_explicit_set_data(struct nfp_cpp_explicit *expl, + u8 data_master, u16 data_ref) +{ + expl->cmd.data_master = data_master; + expl->cmd.data_ref = data_ref; + + return 0; +} + +/** + * nfp_cpp_explicit_set_signal() - Set signal fields for explicit + * @expl: Explicit handle + * @signal_master: CPP Signal Master field + * @signal_ref: CPP Signal Ref field + * + * Return: 0, or -ERRNO + */ +int nfp_cpp_explicit_set_signal(struct nfp_cpp_explicit *expl, + u8 signal_master, u8 signal_ref) +{ + expl->cmd.signal_master = signal_master; + expl->cmd.signal_ref = signal_ref; + + return 0; +} + +/** + * nfp_cpp_explicit_set_posted() - Set completion fields for explicit + * @expl: Explicit handle + * @posted: True for signaled completion, false otherwise + * @siga: CPP Signal A field + * @siga_mode: CPP Signal A Mode field + * @sigb: CPP Signal B field + * @sigb_mode: CPP Signal B Mode field + * + * Return: 0, or -ERRNO + */ +int nfp_cpp_explicit_set_posted(struct nfp_cpp_explicit *expl, int posted, + u8 siga, + enum nfp_cpp_explicit_signal_mode siga_mode, + u8 sigb, + enum nfp_cpp_explicit_signal_mode sigb_mode) +{ + expl->cmd.posted = posted; + expl->cmd.siga = siga; + expl->cmd.sigb = sigb; + expl->cmd.siga_mode = siga_mode; + expl->cmd.sigb_mode = sigb_mode; + + return 0; +} + +/** + * nfp_cpp_explicit_put() - Set up the write (pull) data for a explicit access + * @expl: NFP CPP Explicit handle + * @buff: Data to have the target pull in the transaction + * @len: Length of data, in bytes + * + * The 'len' parameter must be less than or equal to 128 bytes. + * + * If this function is called before the configuration + * registers are set, it will return -EINVAL. + * + * Return: 0, or -ERRNO + */ +int nfp_cpp_explicit_put(struct nfp_cpp_explicit *expl, + const void *buff, size_t len) +{ + return NFP_EXPL_OP(explicit_put, expl, buff, len); +} + +/** + * nfp_cpp_explicit_do() - Execute a transaction, and wait for it to complete + * @expl: NFP CPP Explicit handle + * @address: Address to send in the explicit transaction + * + * If this function is called before the configuration + * registers are set, it will return -1, with an errno of EINVAL. + * + * Return: 0, or -ERRNO + */ +int nfp_cpp_explicit_do(struct nfp_cpp_explicit *expl, u64 address) +{ + return NFP_EXPL_OP(explicit_do, expl, &expl->cmd, address); +} + +/** + * nfp_cpp_explicit_get() - Get the 'push' (read) data from a explicit access + * @expl: NFP CPP Explicit handle + * @buff: Data that the target pushed in the transaction + * @len: Length of data, in bytes + * + * The 'len' parameter must be less than or equal to 128 bytes. + * + * If this function is called before all three configuration + * registers are set, it will return -1, with an errno of EINVAL. + * + * If this function is called before nfp_cpp_explicit_do() + * has completed, it will return -1, with an errno of EBUSY. + * + * Return: 0, or -ERRNO + */ +int nfp_cpp_explicit_get(struct nfp_cpp_explicit *expl, void *buff, size_t len) +{ + return NFP_EXPL_OP(explicit_get, expl, buff, len); +} + +/** + * nfp_cpp_explicit_release() - Release explicit access handle + * @expl: NFP CPP Explicit handle + * + */ +void nfp_cpp_explicit_release(struct nfp_cpp_explicit *expl) +{ + NFP_EXPL_OP_NR(explicit_release, expl); + kfree(expl); +} + +/** + * nfp_cpp_explicit_cpp() - return CPP handle for CPP explicit + * @cpp_explicit: CPP explicit handle + * + * Return: NFP CPP handle of the explicit + */ +struct nfp_cpp *nfp_cpp_explicit_cpp(struct nfp_cpp_explicit *cpp_explicit) +{ + return cpp_explicit->cpp; +} + +/** + * nfp_cpp_explicit_priv() - return private struct for CPP explicit + * @cpp_explicit: CPP explicit handle + * + * Return: private data of the explicit, or NULL + */ +void *nfp_cpp_explicit_priv(struct nfp_cpp_explicit *cpp_explicit) +{ + return &cpp_explicit[1]; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c new file mode 100644 index 000000000..508ae6b57 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c @@ -0,0 +1,298 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +/* + * nfp_cpplib.c + * Library of functions to access the NFP's CPP bus + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + */ + +#include <asm/unaligned.h> +#include <linux/bitfield.h> +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/sched.h> + +#include "nfp_cpp.h" +#include "nfp6000/nfp6000.h" +#include "nfp6000/nfp_xpb.h" + +/* NFP6000 PL */ +#define NFP_PL_DEVICE_PART_NFP6000 0x6200 +#define NFP_PL_DEVICE_ID 0x00000004 +#define NFP_PL_DEVICE_ID_MASK GENMASK(7, 0) +#define NFP_PL_DEVICE_PART_MASK GENMASK(31, 16) +#define NFP_PL_DEVICE_MODEL_MASK (NFP_PL_DEVICE_PART_MASK | \ + NFP_PL_DEVICE_ID_MASK) + +/** + * nfp_cpp_readl() - Read a u32 word from a CPP location + * @cpp: CPP device handle + * @cpp_id: CPP ID for operation + * @address: Address for operation + * @value: Pointer to read buffer + * + * Return: 0 on success, or -ERRNO + */ +int nfp_cpp_readl(struct nfp_cpp *cpp, u32 cpp_id, + unsigned long long address, u32 *value) +{ + u8 tmp[4]; + int n; + + n = nfp_cpp_read(cpp, cpp_id, address, tmp, sizeof(tmp)); + if (n != sizeof(tmp)) + return n < 0 ? n : -EIO; + + *value = get_unaligned_le32(tmp); + return 0; +} + +/** + * nfp_cpp_writel() - Write a u32 word to a CPP location + * @cpp: CPP device handle + * @cpp_id: CPP ID for operation + * @address: Address for operation + * @value: Value to write + * + * Return: 0 on success, or -ERRNO + */ +int nfp_cpp_writel(struct nfp_cpp *cpp, u32 cpp_id, + unsigned long long address, u32 value) +{ + u8 tmp[4]; + int n; + + put_unaligned_le32(value, tmp); + n = nfp_cpp_write(cpp, cpp_id, address, tmp, sizeof(tmp)); + + return n == sizeof(tmp) ? 0 : n < 0 ? n : -EIO; +} + +/** + * nfp_cpp_readq() - Read a u64 word from a CPP location + * @cpp: CPP device handle + * @cpp_id: CPP ID for operation + * @address: Address for operation + * @value: Pointer to read buffer + * + * Return: 0 on success, or -ERRNO + */ +int nfp_cpp_readq(struct nfp_cpp *cpp, u32 cpp_id, + unsigned long long address, u64 *value) +{ + u8 tmp[8]; + int n; + + n = nfp_cpp_read(cpp, cpp_id, address, tmp, sizeof(tmp)); + if (n != sizeof(tmp)) + return n < 0 ? n : -EIO; + + *value = get_unaligned_le64(tmp); + return 0; +} + +/** + * nfp_cpp_writeq() - Write a u64 word to a CPP location + * @cpp: CPP device handle + * @cpp_id: CPP ID for operation + * @address: Address for operation + * @value: Value to write + * + * Return: 0 on success, or -ERRNO + */ +int nfp_cpp_writeq(struct nfp_cpp *cpp, u32 cpp_id, + unsigned long long address, u64 value) +{ + u8 tmp[8]; + int n; + + put_unaligned_le64(value, tmp); + n = nfp_cpp_write(cpp, cpp_id, address, tmp, sizeof(tmp)); + + return n == sizeof(tmp) ? 0 : n < 0 ? n : -EIO; +} + +/* NOTE: This code should not use nfp_xpb_* functions, + * as those are model-specific + */ +int nfp_cpp_model_autodetect(struct nfp_cpp *cpp, u32 *model) +{ + u32 reg; + int err; + + err = nfp_xpb_readl(cpp, NFP_XPB_DEVICE(1, 1, 16) + NFP_PL_DEVICE_ID, + ®); + if (err < 0) + return err; + + *model = reg & NFP_PL_DEVICE_MODEL_MASK; + /* Disambiguate the NFP4000/NFP5000/NFP6000 chips */ + if (FIELD_GET(NFP_PL_DEVICE_PART_MASK, reg) == + NFP_PL_DEVICE_PART_NFP6000) { + if (*model & NFP_PL_DEVICE_ID_MASK) + *model -= 0x10; + } + + return 0; +} + +static u8 nfp_bytemask(int width, u64 addr) +{ + if (width == 8) + return 0xff; + else if (width == 4) + return 0x0f << (addr & 4); + else if (width == 2) + return 0x03 << (addr & 6); + else if (width == 1) + return 0x01 << (addr & 7); + else + return 0; +} + +int nfp_cpp_explicit_read(struct nfp_cpp *cpp, u32 cpp_id, + u64 addr, void *buff, size_t len, int width_read) +{ + struct nfp_cpp_explicit *expl; + char *tmp = buff; + int err, i, incr; + u8 byte_mask; + + if (len & (width_read - 1)) + return -EINVAL; + + expl = nfp_cpp_explicit_acquire(cpp); + if (!expl) + return -EBUSY; + + incr = min_t(int, 16 * width_read, 128); + incr = min_t(int, incr, len); + + /* Translate a NFP_CPP_ACTION_RW to action 0 */ + if (NFP_CPP_ID_ACTION_of(cpp_id) == NFP_CPP_ACTION_RW) + cpp_id = NFP_CPP_ID(NFP_CPP_ID_TARGET_of(cpp_id), 0, + NFP_CPP_ID_TOKEN_of(cpp_id)); + + byte_mask = nfp_bytemask(width_read, addr); + + nfp_cpp_explicit_set_target(expl, cpp_id, + incr / width_read - 1, byte_mask); + nfp_cpp_explicit_set_posted(expl, 1, 0, NFP_SIGNAL_PUSH, + 0, NFP_SIGNAL_NONE); + + for (i = 0; i < len; i += incr, addr += incr, tmp += incr) { + if (i + incr > len) { + incr = len - i; + nfp_cpp_explicit_set_target(expl, cpp_id, + incr / width_read - 1, + 0xff); + } + + err = nfp_cpp_explicit_do(expl, addr); + if (err < 0) + goto exit_release; + + err = nfp_cpp_explicit_get(expl, tmp, incr); + if (err < 0) + goto exit_release; + } + err = len; +exit_release: + nfp_cpp_explicit_release(expl); + + return err; +} + +int nfp_cpp_explicit_write(struct nfp_cpp *cpp, u32 cpp_id, u64 addr, + const void *buff, size_t len, int width_write) +{ + struct nfp_cpp_explicit *expl; + const char *tmp = buff; + int err, i, incr; + u8 byte_mask; + + if (len & (width_write - 1)) + return -EINVAL; + + expl = nfp_cpp_explicit_acquire(cpp); + if (!expl) + return -EBUSY; + + incr = min_t(int, 16 * width_write, 128); + incr = min_t(int, incr, len); + + /* Translate a NFP_CPP_ACTION_RW to action 1 */ + if (NFP_CPP_ID_ACTION_of(cpp_id) == NFP_CPP_ACTION_RW) + cpp_id = NFP_CPP_ID(NFP_CPP_ID_TARGET_of(cpp_id), 1, + NFP_CPP_ID_TOKEN_of(cpp_id)); + + byte_mask = nfp_bytemask(width_write, addr); + + nfp_cpp_explicit_set_target(expl, cpp_id, + incr / width_write - 1, byte_mask); + nfp_cpp_explicit_set_posted(expl, 1, 0, NFP_SIGNAL_PULL, + 0, NFP_SIGNAL_NONE); + + for (i = 0; i < len; i += incr, addr += incr, tmp += incr) { + if (i + incr > len) { + incr = len - i; + nfp_cpp_explicit_set_target(expl, cpp_id, + incr / width_write - 1, + 0xff); + } + + err = nfp_cpp_explicit_put(expl, tmp, incr); + if (err < 0) + goto exit_release; + + err = nfp_cpp_explicit_do(expl, addr); + if (err < 0) + goto exit_release; + } + err = len; +exit_release: + nfp_cpp_explicit_release(expl); + + return err; +} + +/** + * nfp_cpp_map_area() - Helper function to map an area + * @cpp: NFP CPP handler + * @name: Name for the area + * @cpp_id: CPP ID for operation + * @addr: CPP address + * @size: Size of the area + * @area: Area handle (output) + * + * Map an area of IOMEM access. To undo the effect of this function call + * @nfp_cpp_area_release_free(*area). + * + * Return: Pointer to memory mapped area or ERR_PTR + */ +u8 __iomem * +nfp_cpp_map_area(struct nfp_cpp *cpp, const char *name, u32 cpp_id, u64 addr, + unsigned long size, struct nfp_cpp_area **area) +{ + u8 __iomem *res; + + *area = nfp_cpp_area_alloc_acquire(cpp, name, cpp_id, addr, size); + if (!*area) + goto err_eio; + + res = nfp_cpp_area_iomem(*area); + if (!res) + goto err_release_free; + + return res; + +err_release_free: + nfp_cpp_area_release_free(*area); +err_eio: + return (u8 __iomem *)ERR_PTR(-EIO); +} diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.c new file mode 100644 index 000000000..0725b51c2 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#include <linux/dma-mapping.h> +#include <linux/kernel.h> +#include <linux/sizes.h> + +#include "nfp_dev.h" + +const struct nfp_dev_info nfp_dev_info[NFP_DEV_CNT] = { + [NFP_DEV_NFP3800] = { + .dma_mask = DMA_BIT_MASK(48), + .qc_idx_mask = GENMASK(8, 0), + .qc_addr_offset = 0x400000, + .min_qc_size = 512, + .max_qc_size = SZ_64K, + + .chip_names = "NFP3800", + .pcie_cfg_expbar_offset = 0x0a00, + .pcie_expl_offset = 0xd000, + .qc_area_sz = 0x100000, + }, + [NFP_DEV_NFP3800_VF] = { + .dma_mask = DMA_BIT_MASK(48), + .qc_idx_mask = GENMASK(8, 0), + .qc_addr_offset = 0, + .min_qc_size = 512, + .max_qc_size = SZ_64K, + }, + [NFP_DEV_NFP6000] = { + .dma_mask = DMA_BIT_MASK(40), + .qc_idx_mask = GENMASK(7, 0), + .qc_addr_offset = 0x80000, + .min_qc_size = 256, + .max_qc_size = SZ_256K, + + .chip_names = "NFP4000/NFP5000/NFP6000", + .pcie_cfg_expbar_offset = 0x0400, + .pcie_expl_offset = 0x1000, + .qc_area_sz = 0x80000, + }, + [NFP_DEV_NFP6000_VF] = { + .dma_mask = DMA_BIT_MASK(40), + .qc_idx_mask = GENMASK(7, 0), + .qc_addr_offset = 0, + .min_qc_size = 256, + .max_qc_size = SZ_256K, + }, +}; diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.h new file mode 100644 index 000000000..e4d38178d --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#ifndef _NFP_DEV_H_ +#define _NFP_DEV_H_ + +#include <linux/types.h> + +#define PCI_VENDOR_ID_CORIGINE 0x1da8 +#define PCI_DEVICE_ID_NFP3800 0x3800 +#define PCI_DEVICE_ID_NFP4000 0x4000 +#define PCI_DEVICE_ID_NFP5000 0x5000 +#define PCI_DEVICE_ID_NFP6000 0x6000 +#define PCI_DEVICE_ID_NFP3800_VF 0x3803 +#define PCI_DEVICE_ID_NFP6000_VF 0x6003 + +enum nfp_dev_id { + NFP_DEV_NFP3800, + NFP_DEV_NFP3800_VF, + NFP_DEV_NFP6000, + NFP_DEV_NFP6000_VF, + NFP_DEV_CNT, +}; + +struct nfp_dev_info { + /* Required fields */ + u64 dma_mask; + u32 qc_idx_mask; + u32 qc_addr_offset; + u32 min_qc_size; + u32 max_qc_size; + + /* PF-only fields */ + const char *chip_names; + u32 pcie_cfg_expbar_offset; + u32 pcie_expl_offset; + u32 qc_area_sz; +}; + +extern const struct nfp_dev_info nfp_dev_info[NFP_DEV_CNT]; + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c new file mode 100644 index 000000000..f05dd34ab --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2017 Netronome Systems, Inc. */ + +/* Parse the hwinfo table that the ARM firmware builds in the ARM scratch SRAM + * after chip reset. + * + * Examples of the fields: + * me.count = 40 + * me.mask = 0x7f_ffff_ffff + * + * me.count is the total number of MEs on the system. + * me.mask is the bitmask of MEs that are available for application usage. + * + * (ie, in this example, ME 39 has been reserved by boardconfig.) + */ + +#include <asm/byteorder.h> +#include <asm/unaligned.h> +#include <linux/delay.h> +#include <linux/log2.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> + +#define NFP_SUBSYS "nfp_hwinfo" + +#include "crc32.h" +#include "nfp.h" +#include "nfp_cpp.h" +#include "nfp6000/nfp6000.h" + +#define HWINFO_SIZE_MIN 0x100 +#define HWINFO_WAIT 20 /* seconds */ + +/* The Hardware Info Table defines the properties of the system. + * + * HWInfo v1 Table (fixed size) + * + * 0x0000: u32 version Hardware Info Table version (1.0) + * 0x0004: u32 size Total size of the table, including + * the CRC32 (IEEE 802.3) + * 0x0008: u32 jumptab Offset of key/value table + * 0x000c: u32 keys Total number of keys in the key/value table + * NNNNNN: Key/value jump table and string data + * (size - 4): u32 crc32 CRC32 (same as IEEE 802.3, POSIX csum, etc) + * CRC32("",0) = ~0, CRC32("a",1) = 0x48C279FE + * + * HWInfo v2 Table (variable size) + * + * 0x0000: u32 version Hardware Info Table version (2.0) + * 0x0004: u32 size Current size of the data area, excluding CRC32 + * 0x0008: u32 limit Maximum size of the table + * 0x000c: u32 reserved Unused, set to zero + * NNNNNN: Key/value data + * (size - 4): u32 crc32 CRC32 (same as IEEE 802.3, POSIX csum, etc) + * CRC32("",0) = ~0, CRC32("a",1) = 0x48C279FE + * + * If the HWInfo table is in the process of being updated, the low bit + * of version will be set. + * + * HWInfo v1 Key/Value Table + * ------------------------- + * + * The key/value table is a set of offsets to ASCIIZ strings which have + * been strcmp(3) sorted (yes, please use bsearch(3) on the table). + * + * All keys are guaranteed to be unique. + * + * N+0: u32 key_1 Offset to the first key + * N+4: u32 val_1 Offset to the first value + * N+8: u32 key_2 Offset to the second key + * N+c: u32 val_2 Offset to the second value + * ... + * + * HWInfo v2 Key/Value Table + * ------------------------- + * + * Packed UTF8Z strings, ie 'key1\000value1\000key2\000value2\000' + * + * Unsorted. + */ + +#define NFP_HWINFO_VERSION_1 ('H' << 24 | 'I' << 16 | 1 << 8 | 0 << 1 | 0) +#define NFP_HWINFO_VERSION_2 ('H' << 24 | 'I' << 16 | 2 << 8 | 0 << 1 | 0) +#define NFP_HWINFO_VERSION_UPDATING BIT(0) + +struct nfp_hwinfo { + u8 start[0]; + + __le32 version; + __le32 size; + + /* v2 specific fields */ + __le32 limit; + __le32 resv; + + char data[]; +}; + +static bool nfp_hwinfo_is_updating(struct nfp_hwinfo *hwinfo) +{ + return le32_to_cpu(hwinfo->version) & NFP_HWINFO_VERSION_UPDATING; +} + +static int +hwinfo_db_walk(struct nfp_cpp *cpp, struct nfp_hwinfo *hwinfo, u32 size) +{ + const char *key, *val, *end = hwinfo->data + size; + + for (key = hwinfo->data; *key && key < end; + key = val + strlen(val) + 1) { + + val = key + strlen(key) + 1; + if (val >= end) { + nfp_warn(cpp, "Bad HWINFO - overflowing key\n"); + return -EINVAL; + } + + if (val + strlen(val) + 1 > end) { + nfp_warn(cpp, "Bad HWINFO - overflowing value\n"); + return -EINVAL; + } + } + + return 0; +} + +static int +hwinfo_db_validate(struct nfp_cpp *cpp, struct nfp_hwinfo *db, u32 len) +{ + u32 size, crc; + + size = le32_to_cpu(db->size); + if (size > len) { + nfp_err(cpp, "Unsupported hwinfo size %u > %u\n", size, len); + return -EINVAL; + } + + size -= sizeof(u32); + crc = crc32_posix(db, size); + if (crc != get_unaligned_le32(db->start + size)) { + nfp_err(cpp, "Corrupt hwinfo table (CRC mismatch), calculated 0x%x, expected 0x%x\n", + crc, get_unaligned_le32(db->start + size)); + + return -EINVAL; + } + + return hwinfo_db_walk(cpp, db, size); +} + +static struct nfp_hwinfo * +hwinfo_try_fetch(struct nfp_cpp *cpp, size_t *cpp_size) +{ + struct nfp_hwinfo *header; + struct nfp_resource *res; + u64 cpp_addr; + u32 cpp_id; + int err; + u8 *db; + + res = nfp_resource_acquire(cpp, NFP_RESOURCE_NFP_HWINFO); + if (!IS_ERR(res)) { + cpp_id = nfp_resource_cpp_id(res); + cpp_addr = nfp_resource_address(res); + *cpp_size = nfp_resource_size(res); + + nfp_resource_release(res); + + if (*cpp_size < HWINFO_SIZE_MIN) + return NULL; + } else if (PTR_ERR(res) == -ENOENT) { + /* Try getting the HWInfo table from the 'classic' location */ + cpp_id = NFP_CPP_ISLAND_ID(NFP_CPP_TARGET_MU, + NFP_CPP_ACTION_RW, 0, 1); + cpp_addr = 0x30000; + *cpp_size = 0x0e000; + } else { + return NULL; + } + + db = kmalloc(*cpp_size + 1, GFP_KERNEL); + if (!db) + return NULL; + + err = nfp_cpp_read(cpp, cpp_id, cpp_addr, db, *cpp_size); + if (err != *cpp_size) + goto exit_free; + + header = (void *)db; + if (nfp_hwinfo_is_updating(header)) + goto exit_free; + + if (le32_to_cpu(header->version) != NFP_HWINFO_VERSION_2) { + nfp_err(cpp, "Unknown HWInfo version: 0x%08x\n", + le32_to_cpu(header->version)); + goto exit_free; + } + + /* NULL-terminate for safety */ + db[*cpp_size] = '\0'; + + return (void *)db; +exit_free: + kfree(db); + return NULL; +} + +static struct nfp_hwinfo *hwinfo_fetch(struct nfp_cpp *cpp, size_t *hwdb_size) +{ + const unsigned long wait_until = jiffies + HWINFO_WAIT * HZ; + struct nfp_hwinfo *db; + int err; + + for (;;) { + const unsigned long start_time = jiffies; + + db = hwinfo_try_fetch(cpp, hwdb_size); + if (db) + return db; + + err = msleep_interruptible(100); + if (err || time_after(start_time, wait_until)) { + nfp_err(cpp, "NFP access error\n"); + return NULL; + } + } +} + +struct nfp_hwinfo *nfp_hwinfo_read(struct nfp_cpp *cpp) +{ + struct nfp_hwinfo *db; + size_t hwdb_size = 0; + int err; + + db = hwinfo_fetch(cpp, &hwdb_size); + if (!db) + return NULL; + + err = hwinfo_db_validate(cpp, db, hwdb_size); + if (err) { + kfree(db); + return NULL; + } + + return db; +} + +/** + * nfp_hwinfo_lookup() - Find a value in the HWInfo table by name + * @hwinfo: NFP HWinfo table + * @lookup: HWInfo name to search for + * + * Return: Value of the HWInfo name, or NULL + */ +const char *nfp_hwinfo_lookup(struct nfp_hwinfo *hwinfo, const char *lookup) +{ + const char *key, *val, *end; + + if (!hwinfo || !lookup) + return NULL; + + end = hwinfo->data + le32_to_cpu(hwinfo->size) - sizeof(u32); + + for (key = hwinfo->data; *key && key < end; + key = val + strlen(val) + 1) { + + val = key + strlen(key) + 1; + + if (strcmp(key, lookup) == 0) + return val; + } + + return NULL; +} + +char *nfp_hwinfo_get_packed_strings(struct nfp_hwinfo *hwinfo) +{ + return hwinfo->data; +} + +u32 nfp_hwinfo_get_packed_str_size(struct nfp_hwinfo *hwinfo) +{ + return le32_to_cpu(hwinfo->size) - sizeof(u32); +} diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mip.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mip.c new file mode 100644 index 000000000..79e179435 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mip.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2017 Netronome Systems, Inc. */ + +/* + * nfp_mip.c + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Espen Skoglund <espen.skoglund@netronome.com> + */ +#include <linux/kernel.h> +#include <linux/slab.h> + +#include "nfp.h" +#include "nfp_cpp.h" +#include "nfp_nffw.h" + +#define NFP_MIP_SIGNATURE cpu_to_le32(0x0050494d) /* "MIP\0" */ +#define NFP_MIP_VERSION cpu_to_le32(1) +#define NFP_MIP_MAX_OFFSET (256 * 1024) + +struct nfp_mip { + __le32 signature; + __le32 mip_version; + __le32 mip_size; + __le32 first_entry; + + __le32 version; + __le32 buildnum; + __le32 buildtime; + __le32 loadtime; + + __le32 symtab_addr; + __le32 symtab_size; + __le32 strtab_addr; + __le32 strtab_size; + + char name[16]; + char toolchain[32]; +}; + +/* Read memory and check if it could be a valid MIP */ +static int +nfp_mip_try_read(struct nfp_cpp *cpp, u32 cpp_id, u64 addr, struct nfp_mip *mip) +{ + int ret; + + ret = nfp_cpp_read(cpp, cpp_id, addr, mip, sizeof(*mip)); + if (ret != sizeof(*mip)) { + nfp_err(cpp, "Failed to read MIP data (%d, %zu)\n", + ret, sizeof(*mip)); + return -EIO; + } + if (mip->signature != NFP_MIP_SIGNATURE) { + nfp_warn(cpp, "Incorrect MIP signature (0x%08x)\n", + le32_to_cpu(mip->signature)); + return -EINVAL; + } + if (mip->mip_version != NFP_MIP_VERSION) { + nfp_warn(cpp, "Unsupported MIP version (%d)\n", + le32_to_cpu(mip->mip_version)); + return -EINVAL; + } + + return 0; +} + +/* Try to locate MIP using the resource table */ +static int nfp_mip_read_resource(struct nfp_cpp *cpp, struct nfp_mip *mip) +{ + struct nfp_nffw_info *nffw_info; + u32 cpp_id; + u64 addr; + int err; + + nffw_info = nfp_nffw_info_open(cpp); + if (IS_ERR(nffw_info)) + return PTR_ERR(nffw_info); + + err = nfp_nffw_info_mip_first(nffw_info, &cpp_id, &addr); + if (err) + goto exit_close_nffw; + + err = nfp_mip_try_read(cpp, cpp_id, addr, mip); +exit_close_nffw: + nfp_nffw_info_close(nffw_info); + return err; +} + +/** + * nfp_mip_open() - Get device MIP structure + * @cpp: NFP CPP Handle + * + * Copy MIP structure from NFP device and return it. The returned + * structure is handled internally by the library and should be + * freed by calling nfp_mip_close(). + * + * Return: pointer to mip, NULL on failure. + */ +const struct nfp_mip *nfp_mip_open(struct nfp_cpp *cpp) +{ + struct nfp_mip *mip; + int err; + + mip = kmalloc(sizeof(*mip), GFP_KERNEL); + if (!mip) + return NULL; + + err = nfp_mip_read_resource(cpp, mip); + if (err) { + kfree(mip); + return NULL; + } + + mip->name[sizeof(mip->name) - 1] = 0; + + return mip; +} + +void nfp_mip_close(const struct nfp_mip *mip) +{ + kfree(mip); +} + +const char *nfp_mip_name(const struct nfp_mip *mip) +{ + return mip->name; +} + +/** + * nfp_mip_symtab() - Get the address and size of the MIP symbol table + * @mip: MIP handle + * @addr: Location for NFP DDR address of MIP symbol table + * @size: Location for size of MIP symbol table + */ +void nfp_mip_symtab(const struct nfp_mip *mip, u32 *addr, u32 *size) +{ + *addr = le32_to_cpu(mip->symtab_addr); + *size = le32_to_cpu(mip->symtab_size); +} + +/** + * nfp_mip_strtab() - Get the address and size of the MIP symbol name table + * @mip: MIP handle + * @addr: Location for NFP DDR address of MIP symbol name table + * @size: Location for size of MIP symbol name table + */ +void nfp_mip_strtab(const struct nfp_mip *mip, u32 *addr, u32 *size) +{ + *addr = le32_to_cpu(mip->strtab_addr); + *size = le32_to_cpu(mip->strtab_size); +} diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c new file mode 100644 index 000000000..7bc17b94a --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c @@ -0,0 +1,368 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/jiffies.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/wait.h> + +#include "nfp_cpp.h" +#include "nfp6000/nfp6000.h" + +struct nfp_cpp_mutex { + struct nfp_cpp *cpp; + int target; + u16 depth; + unsigned long long address; + u32 key; +}; + +static u32 nfp_mutex_locked(u16 interface) +{ + return (u32)interface << 16 | 0x000f; +} + +static u32 nfp_mutex_unlocked(u16 interface) +{ + return (u32)interface << 16 | 0x0000; +} + +static u32 nfp_mutex_owner(u32 val) +{ + return val >> 16; +} + +static bool nfp_mutex_is_locked(u32 val) +{ + return (val & 0xffff) == 0x000f; +} + +static bool nfp_mutex_is_unlocked(u32 val) +{ + return (val & 0xffff) == 0000; +} + +/* If you need more than 65536 recursive locks, please rethink your code. */ +#define NFP_MUTEX_DEPTH_MAX 0xffff + +static int +nfp_cpp_mutex_validate(u16 interface, int *target, unsigned long long address) +{ + /* Not permitted on invalid interfaces */ + if (NFP_CPP_INTERFACE_TYPE_of(interface) == + NFP_CPP_INTERFACE_TYPE_INVALID) + return -EINVAL; + + /* Address must be 64-bit aligned */ + if (address & 7) + return -EINVAL; + + if (*target != NFP_CPP_TARGET_MU) + return -EINVAL; + + return 0; +} + +/** + * nfp_cpp_mutex_init() - Initialize a mutex location + * @cpp: NFP CPP handle + * @target: NFP CPP target ID (ie NFP_CPP_TARGET_CLS or NFP_CPP_TARGET_MU) + * @address: Offset into the address space of the NFP CPP target ID + * @key: Unique 32-bit value for this mutex + * + * The CPP target:address must point to a 64-bit aligned location, and + * will initialize 64 bits of data at the location. + * + * This creates the initial mutex state, as locked by this + * nfp_cpp_interface(). + * + * This function should only be called when setting up + * the initial lock state upon boot-up of the system. + * + * Return: 0 on success, or -errno on failure + */ +int nfp_cpp_mutex_init(struct nfp_cpp *cpp, + int target, unsigned long long address, u32 key) +{ + const u32 muw = NFP_CPP_ID(target, 4, 0); /* atomic_write */ + u16 interface = nfp_cpp_interface(cpp); + int err; + + err = nfp_cpp_mutex_validate(interface, &target, address); + if (err) + return err; + + err = nfp_cpp_writel(cpp, muw, address + 4, key); + if (err) + return err; + + err = nfp_cpp_writel(cpp, muw, address, nfp_mutex_locked(interface)); + if (err) + return err; + + return 0; +} + +/** + * nfp_cpp_mutex_alloc() - Create a mutex handle + * @cpp: NFP CPP handle + * @target: NFP CPP target ID (ie NFP_CPP_TARGET_CLS or NFP_CPP_TARGET_MU) + * @address: Offset into the address space of the NFP CPP target ID + * @key: 32-bit unique key (must match the key at this location) + * + * The CPP target:address must point to a 64-bit aligned location, and + * reserve 64 bits of data at the location for use by the handle. + * + * Only target/address pairs that point to entities that support the + * MU Atomic Engine's CmpAndSwap32 command are supported. + * + * Return: A non-NULL struct nfp_cpp_mutex * on success, NULL on failure. + */ +struct nfp_cpp_mutex *nfp_cpp_mutex_alloc(struct nfp_cpp *cpp, int target, + unsigned long long address, u32 key) +{ + const u32 mur = NFP_CPP_ID(target, 3, 0); /* atomic_read */ + u16 interface = nfp_cpp_interface(cpp); + struct nfp_cpp_mutex *mutex; + int err; + u32 tmp; + + err = nfp_cpp_mutex_validate(interface, &target, address); + if (err) + return NULL; + + err = nfp_cpp_readl(cpp, mur, address + 4, &tmp); + if (err < 0) + return NULL; + + if (tmp != key) + return NULL; + + mutex = kzalloc(sizeof(*mutex), GFP_KERNEL); + if (!mutex) + return NULL; + + mutex->cpp = cpp; + mutex->target = target; + mutex->address = address; + mutex->key = key; + mutex->depth = 0; + + return mutex; +} + +/** + * nfp_cpp_mutex_free() - Free a mutex handle - does not alter the lock state + * @mutex: NFP CPP Mutex handle + */ +void nfp_cpp_mutex_free(struct nfp_cpp_mutex *mutex) +{ + kfree(mutex); +} + +/** + * nfp_cpp_mutex_lock() - Lock a mutex handle, using the NFP MU Atomic Engine + * @mutex: NFP CPP Mutex handle + * + * Return: 0 on success, or -errno on failure + */ +int nfp_cpp_mutex_lock(struct nfp_cpp_mutex *mutex) +{ + unsigned long warn_at = jiffies + NFP_MUTEX_WAIT_FIRST_WARN * HZ; + unsigned long err_at = jiffies + NFP_MUTEX_WAIT_ERROR * HZ; + unsigned int timeout_ms = 1; + int err; + + /* We can't use a waitqueue here, because the unlocker + * might be on a separate CPU. + * + * So just wait for now. + */ + for (;;) { + err = nfp_cpp_mutex_trylock(mutex); + if (err != -EBUSY) + break; + + err = msleep_interruptible(timeout_ms); + if (err != 0) { + nfp_info(mutex->cpp, + "interrupted waiting for NFP mutex\n"); + return -ERESTARTSYS; + } + + if (time_is_before_eq_jiffies(warn_at)) { + warn_at = jiffies + NFP_MUTEX_WAIT_NEXT_WARN * HZ; + nfp_warn(mutex->cpp, + "Warning: waiting for NFP mutex [depth:%hd target:%d addr:%llx key:%08x]\n", + mutex->depth, + mutex->target, mutex->address, mutex->key); + } + if (time_is_before_eq_jiffies(err_at)) { + nfp_err(mutex->cpp, "Error: mutex wait timed out\n"); + return -EBUSY; + } + } + + return err; +} + +/** + * nfp_cpp_mutex_unlock() - Unlock a mutex handle, using the MU Atomic Engine + * @mutex: NFP CPP Mutex handle + * + * Return: 0 on success, or -errno on failure + */ +int nfp_cpp_mutex_unlock(struct nfp_cpp_mutex *mutex) +{ + const u32 muw = NFP_CPP_ID(mutex->target, 4, 0); /* atomic_write */ + const u32 mur = NFP_CPP_ID(mutex->target, 3, 0); /* atomic_read */ + struct nfp_cpp *cpp = mutex->cpp; + u32 key, value; + u16 interface; + int err; + + interface = nfp_cpp_interface(cpp); + + if (mutex->depth > 1) { + mutex->depth--; + return 0; + } + + err = nfp_cpp_readl(mutex->cpp, mur, mutex->address + 4, &key); + if (err < 0) + return err; + + if (key != mutex->key) + return -EPERM; + + err = nfp_cpp_readl(mutex->cpp, mur, mutex->address, &value); + if (err < 0) + return err; + + if (value != nfp_mutex_locked(interface)) + return -EACCES; + + err = nfp_cpp_writel(cpp, muw, mutex->address, + nfp_mutex_unlocked(interface)); + if (err < 0) + return err; + + mutex->depth = 0; + return 0; +} + +/** + * nfp_cpp_mutex_trylock() - Attempt to lock a mutex handle + * @mutex: NFP CPP Mutex handle + * + * Return: 0 if the lock succeeded, -errno on failure + */ +int nfp_cpp_mutex_trylock(struct nfp_cpp_mutex *mutex) +{ + const u32 muw = NFP_CPP_ID(mutex->target, 4, 0); /* atomic_write */ + const u32 mus = NFP_CPP_ID(mutex->target, 5, 3); /* test_set_imm */ + const u32 mur = NFP_CPP_ID(mutex->target, 3, 0); /* atomic_read */ + struct nfp_cpp *cpp = mutex->cpp; + u32 key, value, tmp; + int err; + + if (mutex->depth > 0) { + if (mutex->depth == NFP_MUTEX_DEPTH_MAX) + return -E2BIG; + mutex->depth++; + return 0; + } + + /* Verify that the lock marker is not damaged */ + err = nfp_cpp_readl(cpp, mur, mutex->address + 4, &key); + if (err < 0) + return err; + + if (key != mutex->key) + return -EPERM; + + /* Compare against the unlocked state, and if true, + * write the interface id into the top 16 bits, and + * mark as locked. + */ + value = nfp_mutex_locked(nfp_cpp_interface(cpp)); + + /* We use test_set_imm here, as it implies a read + * of the current state, and sets the bits in the + * bytemask of the command to 1s. Since the mutex + * is guaranteed to be 64-bit aligned, the bytemask + * of this 32-bit command is ensured to be 8'b00001111, + * which implies that the lower 4 bits will be set to + * ones regardless of the initial state. + * + * Since this is a 'Readback' operation, with no Pull + * data, we can treat this as a normal Push (read) + * atomic, which returns the original value. + */ + err = nfp_cpp_readl(cpp, mus, mutex->address, &tmp); + if (err < 0) + return err; + + /* Was it unlocked? */ + if (nfp_mutex_is_unlocked(tmp)) { + /* The read value can only be 0x....0000 in the unlocked state. + * If there was another contending for this lock, then + * the lock state would be 0x....000f + */ + + /* Write our owner ID into the lock + * While not strictly necessary, this helps with + * debug and bookkeeping. + */ + err = nfp_cpp_writel(cpp, muw, mutex->address, value); + if (err < 0) + return err; + + mutex->depth = 1; + return 0; + } + + return nfp_mutex_is_locked(tmp) ? -EBUSY : -EINVAL; +} + +/** + * nfp_cpp_mutex_reclaim() - Unlock mutex if held by local endpoint + * @cpp: NFP CPP handle + * @target: NFP CPP target ID (ie NFP_CPP_TARGET_CLS or NFP_CPP_TARGET_MU) + * @address: Offset into the address space of the NFP CPP target ID + * + * Release lock if held by local system. Extreme care is advised, call only + * when no local lock users can exist. + * + * Return: 0 if the lock was OK, 1 if locked by us, -errno on invalid mutex + */ +int nfp_cpp_mutex_reclaim(struct nfp_cpp *cpp, int target, + unsigned long long address) +{ + const u32 mur = NFP_CPP_ID(target, 3, 0); /* atomic_read */ + const u32 muw = NFP_CPP_ID(target, 4, 0); /* atomic_write */ + u16 interface = nfp_cpp_interface(cpp); + int err; + u32 tmp; + + err = nfp_cpp_mutex_validate(interface, &target, address); + if (err) + return err; + + /* Check lock */ + err = nfp_cpp_readl(cpp, mur, address, &tmp); + if (err < 0) + return err; + + if (nfp_mutex_is_unlocked(tmp) || nfp_mutex_owner(tmp) != interface) + return 0; + + /* Bust the lock */ + err = nfp_cpp_writel(cpp, muw, address, nfp_mutex_unlocked(interface)); + if (err < 0) + return err; + + return 1; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c new file mode 100644 index 000000000..e2e5fd003 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +/* + * nfp_nffw.c + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Francois H. Theron <francois.theron@netronome.com> + */ + +#include <linux/kernel.h> +#include <linux/slab.h> + +#include "nfp.h" +#include "nfp_cpp.h" +#include "nfp_nffw.h" +#include "nfp6000/nfp6000.h" + +/* Init-CSR owner IDs for firmware map to firmware IDs which start at 4. + * Lower IDs are reserved for target and loader IDs. + */ +#define NFFW_FWID_EXT 3 /* For active MEs that we didn't load. */ +#define NFFW_FWID_BASE 4 + +#define NFFW_FWID_ALL 255 + +/* + * NFFW_INFO_VERSION history: + * 0: This was never actually used (before versioning), but it refers to + * the previous struct which had FWINFO_CNT = MEINFO_CNT = 120 that later + * changed to 200. + * 1: First versioned struct, with + * FWINFO_CNT = 120 + * MEINFO_CNT = 120 + * 2: FWINFO_CNT = 200 + * MEINFO_CNT = 200 + */ +#define NFFW_INFO_VERSION_CURRENT 2 + +/* Enough for all current chip families */ +#define NFFW_MEINFO_CNT_V1 120 +#define NFFW_FWINFO_CNT_V1 120 +#define NFFW_MEINFO_CNT_V2 200 +#define NFFW_FWINFO_CNT_V2 200 + +/* Work in 32-bit words to make cross-platform endianness easier to handle */ + +/** nfp.nffw meinfo **/ +struct nffw_meinfo { + __le32 ctxmask__fwid__meid; +}; + +struct nffw_fwinfo { + __le32 loaded__mu_da__mip_off_hi; + __le32 mip_cppid; /* 0 means no MIP */ + __le32 mip_offset_lo; +}; + +struct nfp_nffw_info_v1 { + struct nffw_meinfo meinfo[NFFW_MEINFO_CNT_V1]; + struct nffw_fwinfo fwinfo[NFFW_FWINFO_CNT_V1]; +}; + +struct nfp_nffw_info_v2 { + struct nffw_meinfo meinfo[NFFW_MEINFO_CNT_V2]; + struct nffw_fwinfo fwinfo[NFFW_FWINFO_CNT_V2]; +}; + +/** Resource: nfp.nffw main **/ +struct nfp_nffw_info_data { + __le32 flags[2]; + union { + struct nfp_nffw_info_v1 v1; + struct nfp_nffw_info_v2 v2; + } info; +}; + +struct nfp_nffw_info { + struct nfp_cpp *cpp; + struct nfp_resource *res; + + struct nfp_nffw_info_data fwinf; +}; + +/* flg_info_version = flags[0]<27:16> + * This is a small version counter intended only to detect if the current + * implementation can read the current struct. Struct changes should be very + * rare and as such a 12-bit counter should cover large spans of time. By the + * time it wraps around, we don't expect to have 4096 versions of this struct + * to be in use at the same time. + */ +static u32 nffw_res_info_version_get(const struct nfp_nffw_info_data *res) +{ + return (le32_to_cpu(res->flags[0]) >> 16) & 0xfff; +} + +/* flg_init = flags[0]<0> */ +static u32 nffw_res_flg_init_get(const struct nfp_nffw_info_data *res) +{ + return (le32_to_cpu(res->flags[0]) >> 0) & 1; +} + +/* loaded = loaded__mu_da__mip_off_hi<31:31> */ +static u32 nffw_fwinfo_loaded_get(const struct nffw_fwinfo *fi) +{ + return (le32_to_cpu(fi->loaded__mu_da__mip_off_hi) >> 31) & 1; +} + +/* mip_cppid = mip_cppid */ +static u32 nffw_fwinfo_mip_cppid_get(const struct nffw_fwinfo *fi) +{ + return le32_to_cpu(fi->mip_cppid); +} + +/* loaded = loaded__mu_da__mip_off_hi<8:8> */ +static u32 nffw_fwinfo_mip_mu_da_get(const struct nffw_fwinfo *fi) +{ + return (le32_to_cpu(fi->loaded__mu_da__mip_off_hi) >> 8) & 1; +} + +/* mip_offset = (loaded__mu_da__mip_off_hi<7:0> << 8) | mip_offset_lo */ +static u64 nffw_fwinfo_mip_offset_get(const struct nffw_fwinfo *fi) +{ + u64 mip_off_hi = le32_to_cpu(fi->loaded__mu_da__mip_off_hi); + + return (mip_off_hi & 0xFF) << 32 | le32_to_cpu(fi->mip_offset_lo); +} + +static unsigned int +nffw_res_fwinfos(struct nfp_nffw_info_data *fwinf, struct nffw_fwinfo **arr) +{ + /* For the this code, version 0 is most likely to be + * version 1 in this case. Since the kernel driver + * does not take responsibility for initialising the + * nfp.nffw resource, any previous code (CA firmware or + * userspace) that left the version 0 and did set + * the init flag is going to be version 1. + */ + switch (nffw_res_info_version_get(fwinf)) { + case 0: + case 1: + *arr = &fwinf->info.v1.fwinfo[0]; + return NFFW_FWINFO_CNT_V1; + case 2: + *arr = &fwinf->info.v2.fwinfo[0]; + return NFFW_FWINFO_CNT_V2; + default: + *arr = NULL; + return 0; + } +} + +/** + * nfp_nffw_info_open() - Acquire the lock on the NFFW table + * @cpp: NFP CPP handle + * + * Return: pointer to nfp_nffw_info object or ERR_PTR() + */ +struct nfp_nffw_info *nfp_nffw_info_open(struct nfp_cpp *cpp) +{ + struct nfp_nffw_info_data *fwinf; + struct nfp_nffw_info *state; + u32 info_ver; + int err; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return ERR_PTR(-ENOMEM); + + state->res = nfp_resource_acquire(cpp, NFP_RESOURCE_NFP_NFFW); + if (IS_ERR(state->res)) + goto err_free; + + fwinf = &state->fwinf; + + if (sizeof(*fwinf) > nfp_resource_size(state->res)) + goto err_release; + + err = nfp_cpp_read(cpp, nfp_resource_cpp_id(state->res), + nfp_resource_address(state->res), + fwinf, sizeof(*fwinf)); + if (err < (int)sizeof(*fwinf)) + goto err_release; + + if (!nffw_res_flg_init_get(fwinf)) + goto err_release; + + info_ver = nffw_res_info_version_get(fwinf); + if (info_ver > NFFW_INFO_VERSION_CURRENT) + goto err_release; + + state->cpp = cpp; + return state; + +err_release: + nfp_resource_release(state->res); +err_free: + kfree(state); + return ERR_PTR(-EIO); +} + +/** + * nfp_nffw_info_close() - Release the lock on the NFFW table and free state + * @state: NFP FW info state + */ +void nfp_nffw_info_close(struct nfp_nffw_info *state) +{ + nfp_resource_release(state->res); + kfree(state); +} + +/** + * nfp_nffw_info_fwid_first() - Return the first firmware ID in the NFFW + * @state: NFP FW info state + * + * Return: First NFFW firmware info, NULL on failure + */ +static struct nffw_fwinfo *nfp_nffw_info_fwid_first(struct nfp_nffw_info *state) +{ + struct nffw_fwinfo *fwinfo; + unsigned int cnt, i; + + cnt = nffw_res_fwinfos(&state->fwinf, &fwinfo); + if (!cnt) + return NULL; + + for (i = 0; i < cnt; i++) + if (nffw_fwinfo_loaded_get(&fwinfo[i])) + return &fwinfo[i]; + + return NULL; +} + +/** + * nfp_nffw_info_mip_first() - Retrieve the location of the first FW's MIP + * @state: NFP FW info state + * @cpp_id: Pointer to the CPP ID of the MIP + * @off: Pointer to the CPP Address of the MIP + * + * Return: 0, or -ERRNO + */ +int nfp_nffw_info_mip_first(struct nfp_nffw_info *state, u32 *cpp_id, u64 *off) +{ + struct nffw_fwinfo *fwinfo; + + fwinfo = nfp_nffw_info_fwid_first(state); + if (!fwinfo) + return -EINVAL; + + *cpp_id = nffw_fwinfo_mip_cppid_get(fwinfo); + *off = nffw_fwinfo_mip_offset_get(fwinfo); + + if (nffw_fwinfo_mip_mu_da_get(fwinfo)) { + int locality_off = nfp_cpp_mu_locality_lsb(state->cpp); + + *off &= ~(NFP_MU_ADDR_ACCESS_TYPE_MASK << locality_off); + *off |= NFP_MU_ADDR_ACCESS_TYPE_DIRECT << locality_off; + } + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h new file mode 100644 index 000000000..49a4d3f56 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +/* + * nfp_nffw.h + * Authors: Jason McMullan <jason.mcmullan@netronome.com> + * Francois H. Theron <francois.theron@netronome.com> + */ + +#ifndef NFP_NFFW_H +#define NFP_NFFW_H + +/* Implemented in nfp_nffw.c */ + +struct nfp_nffw_info; + +struct nfp_nffw_info *nfp_nffw_info_open(struct nfp_cpp *cpp); +void nfp_nffw_info_close(struct nfp_nffw_info *state); +int nfp_nffw_info_mip_first(struct nfp_nffw_info *state, u32 *cpp_id, u64 *off); + +/* Implemented in nfp_mip.c */ + +struct nfp_mip; + +const struct nfp_mip *nfp_mip_open(struct nfp_cpp *cpp); +void nfp_mip_close(const struct nfp_mip *mip); + +const char *nfp_mip_name(const struct nfp_mip *mip); +void nfp_mip_symtab(const struct nfp_mip *mip, u32 *addr, u32 *size); +void nfp_mip_strtab(const struct nfp_mip *mip, u32 *addr, u32 *size); + +/* Implemented in nfp_rtsym.c */ + +enum nfp_rtsym_type { + NFP_RTSYM_TYPE_NONE = 0, + NFP_RTSYM_TYPE_OBJECT = 1, + NFP_RTSYM_TYPE_FUNCTION = 2, + NFP_RTSYM_TYPE_ABS = 3, +}; + +#define NFP_RTSYM_TARGET_NONE 0 +#define NFP_RTSYM_TARGET_LMEM -1 +#define NFP_RTSYM_TARGET_EMU_CACHE -7 + +/** + * struct nfp_rtsym - RTSYM descriptor + * @name: Symbol name + * @addr: Address in the domain/target's address space + * @size: Size (in bytes) of the symbol + * @type: NFP_RTSYM_TYPE_* of the symbol + * @target: CPP Target identifier, or NFP_RTSYM_TARGET_* + * @domain: CPP Target Domain (island) + */ +struct nfp_rtsym { + const char *name; + u64 addr; + u64 size; + enum nfp_rtsym_type type; + int target; + int domain; +}; + +struct nfp_rtsym_table; + +struct nfp_rtsym_table *nfp_rtsym_table_read(struct nfp_cpp *cpp); +struct nfp_rtsym_table * +__nfp_rtsym_table_read(struct nfp_cpp *cpp, const struct nfp_mip *mip); +int nfp_rtsym_count(struct nfp_rtsym_table *rtbl); +const struct nfp_rtsym *nfp_rtsym_get(struct nfp_rtsym_table *rtbl, int idx); +const struct nfp_rtsym * +nfp_rtsym_lookup(struct nfp_rtsym_table *rtbl, const char *name); + +u64 nfp_rtsym_size(const struct nfp_rtsym *rtsym); +int __nfp_rtsym_read(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, + u8 action, u8 token, u64 off, void *buf, size_t len); +int nfp_rtsym_read(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off, + void *buf, size_t len); +int __nfp_rtsym_readl(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, + u8 action, u8 token, u64 off, u32 *value); +int nfp_rtsym_readl(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off, + u32 *value); +int __nfp_rtsym_readq(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, + u8 action, u8 token, u64 off, u64 *value); +int nfp_rtsym_readq(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off, + u64 *value); +int __nfp_rtsym_write(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, + u8 action, u8 token, u64 off, void *buf, size_t len); +int nfp_rtsym_write(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off, + void *buf, size_t len); +int __nfp_rtsym_writel(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, + u8 action, u8 token, u64 off, u32 value); +int nfp_rtsym_writel(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off, + u32 value); +int __nfp_rtsym_writeq(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, + u8 action, u8 token, u64 off, u64 value); +int nfp_rtsym_writeq(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off, + u64 value); + +u64 nfp_rtsym_read_le(struct nfp_rtsym_table *rtbl, const char *name, + int *error); +int nfp_rtsym_write_le(struct nfp_rtsym_table *rtbl, const char *name, + u64 value); +u8 __iomem * +nfp_rtsym_map(struct nfp_rtsym_table *rtbl, const char *name, const char *id, + unsigned int min_size, struct nfp_cpp_area **area); + +#endif /* NFP_NFFW_H */ diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c new file mode 100644 index 000000000..7136bc485 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c @@ -0,0 +1,1120 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +/* + * nfp_nsp.c + * Author: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + */ + +#include <asm/unaligned.h> +#include <linux/bitfield.h> +#include <linux/delay.h> +#include <linux/firmware.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/overflow.h> +#include <linux/sizes.h> +#include <linux/slab.h> + +#define NFP_SUBSYS "nfp_nsp" + +#include "nfp.h" +#include "nfp_cpp.h" +#include "nfp_nsp.h" + +#define NFP_NSP_TIMEOUT_DEFAULT 30 +#define NFP_NSP_TIMEOUT_BOOT 30 + +/* Offsets relative to the CSR base */ +#define NSP_STATUS 0x00 +#define NSP_STATUS_MAGIC GENMASK_ULL(63, 48) +#define NSP_STATUS_MAJOR GENMASK_ULL(47, 44) +#define NSP_STATUS_MINOR GENMASK_ULL(43, 32) +#define NSP_STATUS_CODE GENMASK_ULL(31, 16) +#define NSP_STATUS_RESULT GENMASK_ULL(15, 8) +#define NSP_STATUS_BUSY BIT_ULL(0) + +#define NSP_COMMAND 0x08 +#define NSP_COMMAND_OPTION GENMASK_ULL(63, 32) +#define NSP_COMMAND_CODE GENMASK_ULL(31, 16) +#define NSP_COMMAND_DMA_BUF BIT_ULL(1) +#define NSP_COMMAND_START BIT_ULL(0) + +/* CPP address to retrieve the data from */ +#define NSP_BUFFER 0x10 +#define NSP_BUFFER_CPP GENMASK_ULL(63, 40) +#define NSP_BUFFER_ADDRESS GENMASK_ULL(39, 0) + +#define NSP_DFLT_BUFFER 0x18 +#define NSP_DFLT_BUFFER_CPP GENMASK_ULL(63, 40) +#define NSP_DFLT_BUFFER_ADDRESS GENMASK_ULL(39, 0) + +#define NSP_DFLT_BUFFER_CONFIG 0x20 +#define NSP_DFLT_BUFFER_DMA_CHUNK_ORDER GENMASK_ULL(63, 58) +#define NSP_DFLT_BUFFER_SIZE_4KB GENMASK_ULL(15, 8) +#define NSP_DFLT_BUFFER_SIZE_MB GENMASK_ULL(7, 0) + +#define NFP_CAP_CMD_DMA_SG 0x28 + +#define NSP_MAGIC 0xab10 +#define NSP_MAJOR 0 +#define NSP_MINOR 8 + +#define NSP_CODE_MAJOR GENMASK(15, 12) +#define NSP_CODE_MINOR GENMASK(11, 0) + +#define NFP_FW_LOAD_RET_MAJOR GENMASK(15, 8) +#define NFP_FW_LOAD_RET_MINOR GENMASK(23, 16) + +#define NFP_HWINFO_LOOKUP_SIZE GENMASK(11, 0) + +#define NFP_VERSIONS_SIZE GENMASK(11, 0) +#define NFP_VERSIONS_CNT_OFF 0 +#define NFP_VERSIONS_BSP_OFF 2 +#define NFP_VERSIONS_CPLD_OFF 6 +#define NFP_VERSIONS_APP_OFF 10 +#define NFP_VERSIONS_BUNDLE_OFF 14 +#define NFP_VERSIONS_UNDI_OFF 18 +#define NFP_VERSIONS_NCSI_OFF 22 +#define NFP_VERSIONS_CFGR_OFF 26 + +#define NSP_SFF_EEPROM_BLOCK_LEN 8 + +enum nfp_nsp_cmd { + SPCODE_NOOP = 0, /* No operation */ + SPCODE_SOFT_RESET = 1, /* Soft reset the NFP */ + SPCODE_FW_DEFAULT = 2, /* Load default (UNDI) FW */ + SPCODE_PHY_INIT = 3, /* Initialize the PHY */ + SPCODE_MAC_INIT = 4, /* Initialize the MAC */ + SPCODE_PHY_RXADAPT = 5, /* Re-run PHY RX Adaptation */ + SPCODE_FW_LOAD = 6, /* Load fw from buffer, len in option */ + SPCODE_ETH_RESCAN = 7, /* Rescan ETHs, write ETH_TABLE to buf */ + SPCODE_ETH_CONTROL = 8, /* Update media config from buffer */ + SPCODE_NSP_WRITE_FLASH = 11, /* Load and flash image from buffer */ + SPCODE_NSP_SENSORS = 12, /* Read NSP sensor(s) */ + SPCODE_NSP_IDENTIFY = 13, /* Read NSP version */ + SPCODE_FW_STORED = 16, /* If no FW loaded, load flash app FW */ + SPCODE_HWINFO_LOOKUP = 17, /* Lookup HWinfo with overwrites etc. */ + SPCODE_HWINFO_SET = 18, /* Set HWinfo entry */ + SPCODE_FW_LOADED = 19, /* Is application firmware loaded */ + SPCODE_VERSIONS = 21, /* Report FW versions */ + SPCODE_READ_SFF_EEPROM = 22, /* Read module EEPROM */ + SPCODE_READ_MEDIA = 23, /* Get either the supported or advertised media for a port */ +}; + +struct nfp_nsp_dma_buf { + __le32 chunk_cnt; + __le32 reserved[3]; + struct { + __le32 size; + __le32 reserved; + __le64 addr; + } descs[]; +}; + +static const struct { + int code; + const char *msg; +} nsp_errors[] = { + { 6010, "could not map to phy for port" }, + { 6011, "not an allowed rate/lanes for port" }, + { 6012, "not an allowed rate/lanes for port" }, + { 6013, "high/low error, change other port first" }, + { 6014, "config not found in flash" }, +}; + +struct nfp_nsp { + struct nfp_cpp *cpp; + struct nfp_resource *res; + struct { + u16 major; + u16 minor; + } ver; + + /* Eth table config state */ + bool modified; + unsigned int idx; + void *entries; +}; + +/** + * struct nfp_nsp_command_arg - NFP command argument structure + * @code: NFP SP Command Code + * @dma: @buf points to a host buffer, not NSP buffer + * @timeout_sec:Timeout value to wait for completion in seconds + * @option: NFP SP Command Argument + * @buf: NFP SP Buffer Address + * @error_cb: Callback for interpreting option if error occurred + * @error_quiet:Don't print command error/warning. Protocol errors are still + * logged. + */ +struct nfp_nsp_command_arg { + u16 code; + bool dma; + unsigned int timeout_sec; + u32 option; + u64 buf; + void (*error_cb)(struct nfp_nsp *state, u32 ret_val); + bool error_quiet; +}; + +/** + * struct nfp_nsp_command_buf_arg - NFP command with buffer argument structure + * @arg: NFP command argument structure + * @in_buf: Buffer with data for input + * @in_size: Size of @in_buf + * @out_buf: Buffer for output data + * @out_size: Size of @out_buf + */ +struct nfp_nsp_command_buf_arg { + struct nfp_nsp_command_arg arg; + const void *in_buf; + unsigned int in_size; + void *out_buf; + unsigned int out_size; +}; + +struct nfp_cpp *nfp_nsp_cpp(struct nfp_nsp *state) +{ + return state->cpp; +} + +bool nfp_nsp_config_modified(struct nfp_nsp *state) +{ + return state->modified; +} + +void nfp_nsp_config_set_modified(struct nfp_nsp *state, bool modified) +{ + state->modified = modified; +} + +void *nfp_nsp_config_entries(struct nfp_nsp *state) +{ + return state->entries; +} + +unsigned int nfp_nsp_config_idx(struct nfp_nsp *state) +{ + return state->idx; +} + +void +nfp_nsp_config_set_state(struct nfp_nsp *state, void *entries, unsigned int idx) +{ + state->entries = entries; + state->idx = idx; +} + +void nfp_nsp_config_clear_state(struct nfp_nsp *state) +{ + state->entries = NULL; + state->idx = 0; +} + +static void nfp_nsp_print_extended_error(struct nfp_nsp *state, u32 ret_val) +{ + int i; + + if (!ret_val) + return; + + for (i = 0; i < ARRAY_SIZE(nsp_errors); i++) + if (ret_val == nsp_errors[i].code) + nfp_err(state->cpp, "err msg: %s\n", nsp_errors[i].msg); +} + +static int nfp_nsp_check(struct nfp_nsp *state) +{ + struct nfp_cpp *cpp = state->cpp; + u64 nsp_status, reg; + u32 nsp_cpp; + int err; + + nsp_cpp = nfp_resource_cpp_id(state->res); + nsp_status = nfp_resource_address(state->res) + NSP_STATUS; + + err = nfp_cpp_readq(cpp, nsp_cpp, nsp_status, ®); + if (err < 0) + return err; + + if (FIELD_GET(NSP_STATUS_MAGIC, reg) != NSP_MAGIC) { + nfp_err(cpp, "Cannot detect NFP Service Processor\n"); + return -ENODEV; + } + + state->ver.major = FIELD_GET(NSP_STATUS_MAJOR, reg); + state->ver.minor = FIELD_GET(NSP_STATUS_MINOR, reg); + + if (state->ver.major != NSP_MAJOR) { + nfp_err(cpp, "Unsupported ABI %hu.%hu\n", + state->ver.major, state->ver.minor); + return -EINVAL; + } + if (state->ver.minor < NSP_MINOR) { + nfp_err(cpp, "ABI too old to support NIC operation (%u.%hu < %u.%u), please update the management FW on the flash\n", + NSP_MAJOR, state->ver.minor, NSP_MAJOR, NSP_MINOR); + return -EINVAL; + } + + if (reg & NSP_STATUS_BUSY) { + nfp_err(cpp, "Service processor busy!\n"); + return -EBUSY; + } + + return 0; +} + +/** + * nfp_nsp_open() - Prepare for communication and lock the NSP resource. + * @cpp: NFP CPP Handle + */ +struct nfp_nsp *nfp_nsp_open(struct nfp_cpp *cpp) +{ + struct nfp_resource *res; + struct nfp_nsp *state; + int err; + + res = nfp_resource_acquire(cpp, NFP_RESOURCE_NSP); + if (IS_ERR(res)) + return (void *)res; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) { + nfp_resource_release(res); + return ERR_PTR(-ENOMEM); + } + state->cpp = cpp; + state->res = res; + + err = nfp_nsp_check(state); + if (err) { + nfp_nsp_close(state); + return ERR_PTR(err); + } + + return state; +} + +/** + * nfp_nsp_close() - Clean up and unlock the NSP resource. + * @state: NFP SP state + */ +void nfp_nsp_close(struct nfp_nsp *state) +{ + nfp_resource_release(state->res); + kfree(state); +} + +u16 nfp_nsp_get_abi_ver_major(struct nfp_nsp *state) +{ + return state->ver.major; +} + +u16 nfp_nsp_get_abi_ver_minor(struct nfp_nsp *state) +{ + return state->ver.minor; +} + +static int +nfp_nsp_wait_reg(struct nfp_cpp *cpp, u64 *reg, u32 nsp_cpp, u64 addr, + u64 mask, u64 val, u32 timeout_sec) +{ + const unsigned long wait_until = jiffies + timeout_sec * HZ; + int err; + + for (;;) { + const unsigned long start_time = jiffies; + + err = nfp_cpp_readq(cpp, nsp_cpp, addr, reg); + if (err < 0) + return err; + + if ((*reg & mask) == val) + return 0; + + msleep(25); + + if (time_after(start_time, wait_until)) + return -ETIMEDOUT; + } +} + +/** + * __nfp_nsp_command() - Execute a command on the NFP Service Processor + * @state: NFP SP state + * @arg: NFP command argument structure + * + * Return: 0 for success with no result + * + * positive value for NSP completion with a result code + * + * -EAGAIN if the NSP is not yet present + * -ENODEV if the NSP is not a supported model + * -EBUSY if the NSP is stuck + * -EINTR if interrupted while waiting for completion + * -ETIMEDOUT if the NSP took longer than @timeout_sec seconds to complete + */ +static int +__nfp_nsp_command(struct nfp_nsp *state, const struct nfp_nsp_command_arg *arg) +{ + u64 reg, ret_val, nsp_base, nsp_buffer, nsp_status, nsp_command; + struct nfp_cpp *cpp = state->cpp; + u32 nsp_cpp; + int err; + + nsp_cpp = nfp_resource_cpp_id(state->res); + nsp_base = nfp_resource_address(state->res); + nsp_status = nsp_base + NSP_STATUS; + nsp_command = nsp_base + NSP_COMMAND; + nsp_buffer = nsp_base + NSP_BUFFER; + + err = nfp_nsp_check(state); + if (err) + return err; + + err = nfp_cpp_writeq(cpp, nsp_cpp, nsp_buffer, arg->buf); + if (err < 0) + return err; + + err = nfp_cpp_writeq(cpp, nsp_cpp, nsp_command, + FIELD_PREP(NSP_COMMAND_OPTION, arg->option) | + FIELD_PREP(NSP_COMMAND_CODE, arg->code) | + FIELD_PREP(NSP_COMMAND_DMA_BUF, arg->dma) | + FIELD_PREP(NSP_COMMAND_START, 1)); + if (err < 0) + return err; + + /* Wait for NSP_COMMAND_START to go to 0 */ + err = nfp_nsp_wait_reg(cpp, ®, nsp_cpp, nsp_command, + NSP_COMMAND_START, 0, NFP_NSP_TIMEOUT_DEFAULT); + if (err) { + nfp_err(cpp, "Error %d waiting for code 0x%04x to start\n", + err, arg->code); + return err; + } + + /* Wait for NSP_STATUS_BUSY to go to 0 */ + err = nfp_nsp_wait_reg(cpp, ®, nsp_cpp, nsp_status, NSP_STATUS_BUSY, + 0, arg->timeout_sec ?: NFP_NSP_TIMEOUT_DEFAULT); + if (err) { + nfp_err(cpp, "Error %d waiting for code 0x%04x to complete\n", + err, arg->code); + return err; + } + + err = nfp_cpp_readq(cpp, nsp_cpp, nsp_command, &ret_val); + if (err < 0) + return err; + ret_val = FIELD_GET(NSP_COMMAND_OPTION, ret_val); + + err = FIELD_GET(NSP_STATUS_RESULT, reg); + if (err) { + if (!arg->error_quiet) + nfp_warn(cpp, "Result (error) code set: %d (%d) command: %d\n", + -err, (int)ret_val, arg->code); + + if (arg->error_cb) + arg->error_cb(state, ret_val); + else + nfp_nsp_print_extended_error(state, ret_val); + return -err; + } + + return ret_val; +} + +static int nfp_nsp_command(struct nfp_nsp *state, u16 code) +{ + const struct nfp_nsp_command_arg arg = { + .code = code, + }; + + return __nfp_nsp_command(state, &arg); +} + +static int +nfp_nsp_command_buf_def(struct nfp_nsp *nsp, + struct nfp_nsp_command_buf_arg *arg) +{ + struct nfp_cpp *cpp = nsp->cpp; + u64 reg, cpp_buf; + int err, ret; + u32 cpp_id; + + err = nfp_cpp_readq(cpp, nfp_resource_cpp_id(nsp->res), + nfp_resource_address(nsp->res) + + NSP_DFLT_BUFFER, + ®); + if (err < 0) + return err; + + cpp_id = FIELD_GET(NSP_DFLT_BUFFER_CPP, reg) << 8; + cpp_buf = FIELD_GET(NSP_DFLT_BUFFER_ADDRESS, reg); + + if (arg->in_buf && arg->in_size) { + err = nfp_cpp_write(cpp, cpp_id, cpp_buf, + arg->in_buf, arg->in_size); + if (err < 0) + return err; + } + /* Zero out remaining part of the buffer */ + if (arg->out_buf && arg->out_size && arg->out_size > arg->in_size) { + err = nfp_cpp_write(cpp, cpp_id, cpp_buf + arg->in_size, + arg->out_buf, arg->out_size - arg->in_size); + if (err < 0) + return err; + } + + if (!FIELD_FIT(NSP_BUFFER_CPP, cpp_id >> 8) || + !FIELD_FIT(NSP_BUFFER_ADDRESS, cpp_buf)) { + nfp_err(cpp, "Buffer out of reach %08x %016llx\n", + cpp_id, cpp_buf); + return -EINVAL; + } + + arg->arg.buf = FIELD_PREP(NSP_BUFFER_CPP, cpp_id >> 8) | + FIELD_PREP(NSP_BUFFER_ADDRESS, cpp_buf); + ret = __nfp_nsp_command(nsp, &arg->arg); + if (ret < 0) + return ret; + + if (arg->out_buf && arg->out_size) { + err = nfp_cpp_read(cpp, cpp_id, cpp_buf, + arg->out_buf, arg->out_size); + if (err < 0) + return err; + } + + return ret; +} + +static int +nfp_nsp_command_buf_dma_sg(struct nfp_nsp *nsp, + struct nfp_nsp_command_buf_arg *arg, + unsigned int max_size, unsigned int chunk_order, + unsigned int dma_order) +{ + struct nfp_cpp *cpp = nsp->cpp; + struct nfp_nsp_dma_buf *desc; + struct { + dma_addr_t dma_addr; + unsigned long len; + void *chunk; + } *chunks; + size_t chunk_size, dma_size; + dma_addr_t dma_desc; + struct device *dev; + unsigned long off; + int i, ret, nseg; + size_t desc_sz; + + chunk_size = BIT_ULL(chunk_order); + dma_size = BIT_ULL(dma_order); + nseg = DIV_ROUND_UP(max_size, chunk_size); + + chunks = kcalloc(nseg, sizeof(*chunks), GFP_KERNEL); + if (!chunks) + return -ENOMEM; + + off = 0; + ret = -ENOMEM; + for (i = 0; i < nseg; i++) { + unsigned long coff; + + chunks[i].chunk = kmalloc(chunk_size, + GFP_KERNEL | __GFP_NOWARN); + if (!chunks[i].chunk) + goto exit_free_prev; + + chunks[i].len = min_t(u64, chunk_size, max_size - off); + + coff = 0; + if (arg->in_size > off) { + coff = min_t(u64, arg->in_size - off, chunk_size); + memcpy(chunks[i].chunk, arg->in_buf + off, coff); + } + memset(chunks[i].chunk + coff, 0, chunk_size - coff); + + off += chunks[i].len; + } + + dev = nfp_cpp_device(cpp)->parent; + + for (i = 0; i < nseg; i++) { + dma_addr_t addr; + + addr = dma_map_single(dev, chunks[i].chunk, chunks[i].len, + DMA_BIDIRECTIONAL); + chunks[i].dma_addr = addr; + + ret = dma_mapping_error(dev, addr); + if (ret) + goto exit_unmap_prev; + + if (WARN_ONCE(round_down(addr, dma_size) != + round_down(addr + chunks[i].len - 1, dma_size), + "unaligned DMA address: %pad %lu %zd\n", + &addr, chunks[i].len, dma_size)) { + ret = -EFAULT; + i++; + goto exit_unmap_prev; + } + } + + desc_sz = struct_size(desc, descs, nseg); + desc = kmalloc(desc_sz, GFP_KERNEL); + if (!desc) { + ret = -ENOMEM; + goto exit_unmap_all; + } + + desc->chunk_cnt = cpu_to_le32(nseg); + for (i = 0; i < nseg; i++) { + desc->descs[i].size = cpu_to_le32(chunks[i].len); + desc->descs[i].addr = cpu_to_le64(chunks[i].dma_addr); + } + + dma_desc = dma_map_single(dev, desc, desc_sz, DMA_TO_DEVICE); + ret = dma_mapping_error(dev, dma_desc); + if (ret) + goto exit_free_desc; + + arg->arg.dma = true; + arg->arg.buf = dma_desc; + ret = __nfp_nsp_command(nsp, &arg->arg); + if (ret < 0) + goto exit_unmap_desc; + + i = 0; + off = 0; + while (off < arg->out_size) { + unsigned int len; + + len = min_t(u64, chunks[i].len, arg->out_size - off); + memcpy(arg->out_buf + off, chunks[i].chunk, len); + off += len; + i++; + } + +exit_unmap_desc: + dma_unmap_single(dev, dma_desc, desc_sz, DMA_TO_DEVICE); +exit_free_desc: + kfree(desc); +exit_unmap_all: + i = nseg; +exit_unmap_prev: + while (--i >= 0) + dma_unmap_single(dev, chunks[i].dma_addr, chunks[i].len, + DMA_BIDIRECTIONAL); + i = nseg; +exit_free_prev: + while (--i >= 0) + kfree(chunks[i].chunk); + kfree(chunks); + if (ret < 0) + nfp_err(cpp, "NSP: SG DMA failed for command 0x%04x: %d (sz:%d cord:%d)\n", + arg->arg.code, ret, max_size, chunk_order); + return ret; +} + +static int +nfp_nsp_command_buf_dma(struct nfp_nsp *nsp, + struct nfp_nsp_command_buf_arg *arg, + unsigned int max_size, unsigned int dma_order) +{ + unsigned int chunk_order, buf_order; + struct nfp_cpp *cpp = nsp->cpp; + bool sg_ok; + u64 reg; + int err; + + buf_order = order_base_2(roundup_pow_of_two(max_size)); + + err = nfp_cpp_readq(cpp, nfp_resource_cpp_id(nsp->res), + nfp_resource_address(nsp->res) + NFP_CAP_CMD_DMA_SG, + ®); + if (err < 0) + return err; + sg_ok = reg & BIT_ULL(arg->arg.code - 1); + + if (!sg_ok) { + if (buf_order > dma_order) { + nfp_err(cpp, "NSP: can't service non-SG DMA for command 0x%04x\n", + arg->arg.code); + return -ENOMEM; + } + chunk_order = buf_order; + } else { + chunk_order = min_t(unsigned int, dma_order, PAGE_SHIFT); + } + + return nfp_nsp_command_buf_dma_sg(nsp, arg, max_size, chunk_order, + dma_order); +} + +static int +nfp_nsp_command_buf(struct nfp_nsp *nsp, struct nfp_nsp_command_buf_arg *arg) +{ + unsigned int dma_order, def_size, max_size; + struct nfp_cpp *cpp = nsp->cpp; + u64 reg; + int err; + + if (nsp->ver.minor < 13) { + nfp_err(cpp, "NSP: Code 0x%04x with buffer not supported (ABI %hu.%hu)\n", + arg->arg.code, nsp->ver.major, nsp->ver.minor); + return -EOPNOTSUPP; + } + + err = nfp_cpp_readq(cpp, nfp_resource_cpp_id(nsp->res), + nfp_resource_address(nsp->res) + + NSP_DFLT_BUFFER_CONFIG, + ®); + if (err < 0) + return err; + + /* Zero out undefined part of the out buffer */ + if (arg->out_buf && arg->out_size && arg->out_size > arg->in_size) + memset(arg->out_buf, 0, arg->out_size - arg->in_size); + + max_size = max(arg->in_size, arg->out_size); + def_size = FIELD_GET(NSP_DFLT_BUFFER_SIZE_MB, reg) * SZ_1M + + FIELD_GET(NSP_DFLT_BUFFER_SIZE_4KB, reg) * SZ_4K; + dma_order = FIELD_GET(NSP_DFLT_BUFFER_DMA_CHUNK_ORDER, reg); + if (def_size >= max_size) { + return nfp_nsp_command_buf_def(nsp, arg); + } else if (!dma_order) { + nfp_err(cpp, "NSP: default buffer too small for command 0x%04x (%u < %u)\n", + arg->arg.code, def_size, max_size); + return -EINVAL; + } + + return nfp_nsp_command_buf_dma(nsp, arg, max_size, dma_order); +} + +int nfp_nsp_wait(struct nfp_nsp *state) +{ + const unsigned long wait_until = jiffies + NFP_NSP_TIMEOUT_BOOT * HZ; + int err; + + nfp_dbg(state->cpp, "Waiting for NSP to respond (%u sec max).\n", + NFP_NSP_TIMEOUT_BOOT); + + for (;;) { + const unsigned long start_time = jiffies; + + err = nfp_nsp_command(state, SPCODE_NOOP); + if (err != -EAGAIN) + break; + + if (msleep_interruptible(25)) { + err = -ERESTARTSYS; + break; + } + + if (time_after(start_time, wait_until)) { + err = -ETIMEDOUT; + break; + } + } + if (err) + nfp_err(state->cpp, "NSP failed to respond %d\n", err); + + return err; +} + +int nfp_nsp_device_soft_reset(struct nfp_nsp *state) +{ + return nfp_nsp_command(state, SPCODE_SOFT_RESET); +} + +int nfp_nsp_mac_reinit(struct nfp_nsp *state) +{ + return nfp_nsp_command(state, SPCODE_MAC_INIT); +} + +static void nfp_nsp_load_fw_extended_msg(struct nfp_nsp *state, u32 ret_val) +{ + static const char * const major_msg[] = { + /* 0 */ "Firmware from driver loaded", + /* 1 */ "Firmware from flash loaded", + /* 2 */ "Firmware loading failure", + }; + static const char * const minor_msg[] = { + /* 0 */ "", + /* 1 */ "no named partition on flash", + /* 2 */ "error reading from flash", + /* 3 */ "can not deflate", + /* 4 */ "not a trusted file", + /* 5 */ "can not parse FW file", + /* 6 */ "MIP not found in FW file", + /* 7 */ "null firmware name in MIP", + /* 8 */ "FW version none", + /* 9 */ "FW build number none", + /* 10 */ "no FW selection policy HWInfo key found", + /* 11 */ "static FW selection policy", + /* 12 */ "FW version has precedence", + /* 13 */ "different FW application load requested", + /* 14 */ "development build", + }; + unsigned int major, minor; + const char *level; + + major = FIELD_GET(NFP_FW_LOAD_RET_MAJOR, ret_val); + minor = FIELD_GET(NFP_FW_LOAD_RET_MINOR, ret_val); + + if (!nfp_nsp_has_stored_fw_load(state)) + return; + + /* Lower the message level in legacy case */ + if (major == 0 && (minor == 0 || minor == 10)) + level = KERN_DEBUG; + else if (major == 2) + level = KERN_ERR; + else + level = KERN_INFO; + + if (major >= ARRAY_SIZE(major_msg)) + nfp_printk(level, state->cpp, "FW loading status: %x\n", + ret_val); + else if (minor >= ARRAY_SIZE(minor_msg)) + nfp_printk(level, state->cpp, "%s, reason code: %d\n", + major_msg[major], minor); + else + nfp_printk(level, state->cpp, "%s%c %s\n", + major_msg[major], minor ? ',' : '.', + minor_msg[minor]); +} + +int nfp_nsp_load_fw(struct nfp_nsp *state, const struct firmware *fw) +{ + struct nfp_nsp_command_buf_arg load_fw = { + { + .code = SPCODE_FW_LOAD, + .option = fw->size, + .error_cb = nfp_nsp_load_fw_extended_msg, + }, + .in_buf = fw->data, + .in_size = fw->size, + }; + int ret; + + ret = nfp_nsp_command_buf(state, &load_fw); + if (ret < 0) + return ret; + + nfp_nsp_load_fw_extended_msg(state, ret); + return 0; +} + +int nfp_nsp_write_flash(struct nfp_nsp *state, const struct firmware *fw) +{ + struct nfp_nsp_command_buf_arg write_flash = { + { + .code = SPCODE_NSP_WRITE_FLASH, + .option = fw->size, + .timeout_sec = 900, + }, + .in_buf = fw->data, + .in_size = fw->size, + }; + + return nfp_nsp_command_buf(state, &write_flash); +} + +int nfp_nsp_read_eth_table(struct nfp_nsp *state, void *buf, unsigned int size) +{ + struct nfp_nsp_command_buf_arg eth_rescan = { + { + .code = SPCODE_ETH_RESCAN, + .option = size, + }, + .out_buf = buf, + .out_size = size, + }; + + return nfp_nsp_command_buf(state, ð_rescan); +} + +int nfp_nsp_write_eth_table(struct nfp_nsp *state, + const void *buf, unsigned int size) +{ + struct nfp_nsp_command_buf_arg eth_ctrl = { + { + .code = SPCODE_ETH_CONTROL, + .option = size, + }, + .in_buf = buf, + .in_size = size, + }; + + return nfp_nsp_command_buf(state, ð_ctrl); +} + +int nfp_nsp_read_identify(struct nfp_nsp *state, void *buf, unsigned int size) +{ + struct nfp_nsp_command_buf_arg identify = { + { + .code = SPCODE_NSP_IDENTIFY, + .option = size, + }, + .out_buf = buf, + .out_size = size, + }; + + return nfp_nsp_command_buf(state, &identify); +} + +int nfp_nsp_read_sensors(struct nfp_nsp *state, unsigned int sensor_mask, + void *buf, unsigned int size) +{ + struct nfp_nsp_command_buf_arg sensors = { + { + .code = SPCODE_NSP_SENSORS, + .option = sensor_mask, + }, + .out_buf = buf, + .out_size = size, + }; + + return nfp_nsp_command_buf(state, &sensors); +} + +int nfp_nsp_load_stored_fw(struct nfp_nsp *state) +{ + const struct nfp_nsp_command_arg arg = { + .code = SPCODE_FW_STORED, + .error_cb = nfp_nsp_load_fw_extended_msg, + }; + int ret; + + ret = __nfp_nsp_command(state, &arg); + if (ret < 0) + return ret; + + nfp_nsp_load_fw_extended_msg(state, ret); + return 0; +} + +static int +__nfp_nsp_hwinfo_lookup(struct nfp_nsp *state, void *buf, unsigned int size, + bool optional) +{ + struct nfp_nsp_command_buf_arg hwinfo_lookup = { + { + .code = SPCODE_HWINFO_LOOKUP, + .option = size, + .error_quiet = optional, + }, + .in_buf = buf, + .in_size = size, + .out_buf = buf, + .out_size = size, + }; + + return nfp_nsp_command_buf(state, &hwinfo_lookup); +} + +int nfp_nsp_hwinfo_lookup(struct nfp_nsp *state, void *buf, unsigned int size) +{ + int err; + + size = min_t(u32, size, NFP_HWINFO_LOOKUP_SIZE); + + err = __nfp_nsp_hwinfo_lookup(state, buf, size, false); + if (err) + return err; + + if (strnlen(buf, size) == size) { + nfp_err(state->cpp, "NSP HWinfo value not NULL-terminated\n"); + return -EINVAL; + } + + return 0; +} + +int nfp_nsp_hwinfo_lookup_optional(struct nfp_nsp *state, void *buf, + unsigned int size, const char *default_val) +{ + int err; + + /* Ensure that the default value is usable irrespective of whether + * it is actually going to be used. + */ + if (strnlen(default_val, size) == size) + return -EINVAL; + + if (!nfp_nsp_has_hwinfo_lookup(state)) { + strcpy(buf, default_val); + return 0; + } + + size = min_t(u32, size, NFP_HWINFO_LOOKUP_SIZE); + + err = __nfp_nsp_hwinfo_lookup(state, buf, size, true); + if (err) { + if (err == -ENOENT) { + strcpy(buf, default_val); + return 0; + } + + nfp_err(state->cpp, "NSP HWinfo lookup failed: %d\n", err); + return err; + } + + if (strnlen(buf, size) == size) { + nfp_err(state->cpp, "NSP HWinfo value not NULL-terminated\n"); + return -EINVAL; + } + + return 0; +} + +int nfp_nsp_hwinfo_set(struct nfp_nsp *state, void *buf, unsigned int size) +{ + struct nfp_nsp_command_buf_arg hwinfo_set = { + { + .code = SPCODE_HWINFO_SET, + .option = size, + }, + .in_buf = buf, + .in_size = size, + }; + + return nfp_nsp_command_buf(state, &hwinfo_set); +} + +int nfp_nsp_fw_loaded(struct nfp_nsp *state) +{ + const struct nfp_nsp_command_arg arg = { + .code = SPCODE_FW_LOADED, + }; + + return __nfp_nsp_command(state, &arg); +} + +int nfp_nsp_versions(struct nfp_nsp *state, void *buf, unsigned int size) +{ + struct nfp_nsp_command_buf_arg versions = { + { + .code = SPCODE_VERSIONS, + .option = min_t(u32, size, NFP_VERSIONS_SIZE), + }, + .out_buf = buf, + .out_size = min_t(u32, size, NFP_VERSIONS_SIZE), + }; + + return nfp_nsp_command_buf(state, &versions); +} + +const char *nfp_nsp_versions_get(enum nfp_nsp_versions id, bool flash, + const u8 *buf, unsigned int size) +{ + static const u32 id2off[] = { + [NFP_VERSIONS_BSP] = NFP_VERSIONS_BSP_OFF, + [NFP_VERSIONS_CPLD] = NFP_VERSIONS_CPLD_OFF, + [NFP_VERSIONS_APP] = NFP_VERSIONS_APP_OFF, + [NFP_VERSIONS_BUNDLE] = NFP_VERSIONS_BUNDLE_OFF, + [NFP_VERSIONS_UNDI] = NFP_VERSIONS_UNDI_OFF, + [NFP_VERSIONS_NCSI] = NFP_VERSIONS_NCSI_OFF, + [NFP_VERSIONS_CFGR] = NFP_VERSIONS_CFGR_OFF, + }; + unsigned int field, buf_field_cnt, buf_off; + + if (id >= ARRAY_SIZE(id2off) || !id2off[id]) + return ERR_PTR(-EINVAL); + + field = id * 2 + flash; + + buf_field_cnt = get_unaligned_le16(buf); + if (buf_field_cnt <= field) + return ERR_PTR(-ENOENT); + + buf_off = get_unaligned_le16(buf + id2off[id] + flash * 2); + if (!buf_off) + return ERR_PTR(-ENOENT); + + if (buf_off >= size) + return ERR_PTR(-EINVAL); + if (strnlen(&buf[buf_off], size - buf_off) == size - buf_off) + return ERR_PTR(-EINVAL); + + return (const char *)&buf[buf_off]; +} + +static int +__nfp_nsp_module_eeprom(struct nfp_nsp *state, void *buf, unsigned int size) +{ + struct nfp_nsp_command_buf_arg module_eeprom = { + { + .code = SPCODE_READ_SFF_EEPROM, + .option = size, + }, + .in_buf = buf, + .in_size = size, + .out_buf = buf, + .out_size = size, + }; + + return nfp_nsp_command_buf(state, &module_eeprom); +} + +int nfp_nsp_read_module_eeprom(struct nfp_nsp *state, int eth_index, + unsigned int offset, void *data, + unsigned int len, unsigned int *read_len) +{ + struct eeprom_buf { + u8 metalen; + __le16 length; + __le16 offset; + __le16 readlen; + u8 eth_index; + u8 data[]; + } __packed *buf; + int bufsz, ret; + + BUILD_BUG_ON(offsetof(struct eeprom_buf, data) % 8); + + /* Buffer must be large enough and rounded to the next block size. */ + bufsz = struct_size(buf, data, round_up(len, NSP_SFF_EEPROM_BLOCK_LEN)); + buf = kzalloc(bufsz, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf->metalen = + offsetof(struct eeprom_buf, data) / NSP_SFF_EEPROM_BLOCK_LEN; + buf->length = cpu_to_le16(len); + buf->offset = cpu_to_le16(offset); + buf->eth_index = eth_index; + + ret = __nfp_nsp_module_eeprom(state, buf, bufsz); + + *read_len = min_t(unsigned int, len, le16_to_cpu(buf->readlen)); + if (*read_len) + memcpy(data, buf->data, *read_len); + + if (!ret && *read_len < len) + ret = -EIO; + + kfree(buf); + + return ret; +}; + +int nfp_nsp_read_media(struct nfp_nsp *state, void *buf, unsigned int size) +{ + struct nfp_nsp_command_buf_arg media = { + { + .code = SPCODE_READ_MEDIA, + .option = size, + }, + .in_buf = buf, + .in_size = size, + .out_buf = buf, + .out_size = size, + }; + + return nfp_nsp_command_buf(state, &media); +} diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h new file mode 100644 index 000000000..8f5cab003 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h @@ -0,0 +1,330 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +#ifndef NSP_NSP_H +#define NSP_NSP_H 1 + +#include <linux/types.h> +#include <linux/if_ether.h> + +struct firmware; +struct nfp_cpp; +struct nfp_nsp; + +struct nfp_nsp *nfp_nsp_open(struct nfp_cpp *cpp); +void nfp_nsp_close(struct nfp_nsp *state); +u16 nfp_nsp_get_abi_ver_major(struct nfp_nsp *state); +u16 nfp_nsp_get_abi_ver_minor(struct nfp_nsp *state); +int nfp_nsp_wait(struct nfp_nsp *state); +int nfp_nsp_device_soft_reset(struct nfp_nsp *state); +int nfp_nsp_load_fw(struct nfp_nsp *state, const struct firmware *fw); +int nfp_nsp_write_flash(struct nfp_nsp *state, const struct firmware *fw); +int nfp_nsp_mac_reinit(struct nfp_nsp *state); +int nfp_nsp_load_stored_fw(struct nfp_nsp *state); +int nfp_nsp_hwinfo_lookup(struct nfp_nsp *state, void *buf, unsigned int size); +int nfp_nsp_hwinfo_lookup_optional(struct nfp_nsp *state, void *buf, + unsigned int size, const char *default_val); +int nfp_nsp_hwinfo_set(struct nfp_nsp *state, void *buf, unsigned int size); +int nfp_nsp_fw_loaded(struct nfp_nsp *state); +int nfp_nsp_read_module_eeprom(struct nfp_nsp *state, int eth_index, + unsigned int offset, void *data, + unsigned int len, unsigned int *read_len); + +static inline bool nfp_nsp_has_mac_reinit(struct nfp_nsp *state) +{ + return nfp_nsp_get_abi_ver_minor(state) > 20; +} + +static inline bool nfp_nsp_has_stored_fw_load(struct nfp_nsp *state) +{ + return nfp_nsp_get_abi_ver_minor(state) > 23; +} + +static inline bool nfp_nsp_has_hwinfo_lookup(struct nfp_nsp *state) +{ + return nfp_nsp_get_abi_ver_minor(state) > 24; +} + +static inline bool nfp_nsp_has_hwinfo_set(struct nfp_nsp *state) +{ + return nfp_nsp_get_abi_ver_minor(state) > 25; +} + +static inline bool nfp_nsp_has_fw_loaded(struct nfp_nsp *state) +{ + return nfp_nsp_get_abi_ver_minor(state) > 25; +} + +static inline bool nfp_nsp_has_versions(struct nfp_nsp *state) +{ + return nfp_nsp_get_abi_ver_minor(state) > 27; +} + +static inline bool nfp_nsp_has_read_module_eeprom(struct nfp_nsp *state) +{ + return nfp_nsp_get_abi_ver_minor(state) > 28; +} + +static inline bool nfp_nsp_has_read_media(struct nfp_nsp *state) +{ + return nfp_nsp_get_abi_ver_minor(state) > 33; +} + +enum nfp_eth_interface { + NFP_INTERFACE_NONE = 0, + NFP_INTERFACE_SFP = 1, + NFP_INTERFACE_SFPP = 10, + NFP_INTERFACE_SFP28 = 28, + NFP_INTERFACE_QSFP = 40, + NFP_INTERFACE_RJ45 = 45, + NFP_INTERFACE_CXP = 100, + NFP_INTERFACE_QSFP28 = 112, +}; + +enum nfp_eth_media { + NFP_MEDIA_DAC_PASSIVE = 0, + NFP_MEDIA_DAC_ACTIVE, + NFP_MEDIA_FIBRE, +}; + +enum nfp_eth_aneg { + NFP_ANEG_AUTO = 0, + NFP_ANEG_SEARCH, + NFP_ANEG_25G_CONSORTIUM, + NFP_ANEG_25G_IEEE, + NFP_ANEG_DISABLED, +}; + +enum nfp_eth_fec { + NFP_FEC_AUTO_BIT = 0, + NFP_FEC_BASER_BIT, + NFP_FEC_REED_SOLOMON_BIT, + NFP_FEC_DISABLED_BIT, +}; + +/* link modes about RJ45 haven't been used, so there's no mapping to them */ +enum nfp_ethtool_link_mode_list { + NFP_MEDIA_W0_RJ45_10M, + NFP_MEDIA_W0_RJ45_10M_HD, + NFP_MEDIA_W0_RJ45_100M, + NFP_MEDIA_W0_RJ45_100M_HD, + NFP_MEDIA_W0_RJ45_1G, + NFP_MEDIA_W0_RJ45_2P5G, + NFP_MEDIA_W0_RJ45_5G, + NFP_MEDIA_W0_RJ45_10G, + NFP_MEDIA_1000BASE_CX, + NFP_MEDIA_1000BASE_KX, + NFP_MEDIA_10GBASE_KX4, + NFP_MEDIA_10GBASE_KR, + NFP_MEDIA_10GBASE_CX4, + NFP_MEDIA_10GBASE_CR, + NFP_MEDIA_10GBASE_SR, + NFP_MEDIA_10GBASE_ER, + NFP_MEDIA_25GBASE_KR, + NFP_MEDIA_25GBASE_KR_S, + NFP_MEDIA_25GBASE_CR, + NFP_MEDIA_25GBASE_CR_S, + NFP_MEDIA_25GBASE_SR, + NFP_MEDIA_40GBASE_CR4, + NFP_MEDIA_40GBASE_KR4, + NFP_MEDIA_40GBASE_SR4, + NFP_MEDIA_40GBASE_LR4, + NFP_MEDIA_50GBASE_KR, + NFP_MEDIA_50GBASE_SR, + NFP_MEDIA_50GBASE_CR, + NFP_MEDIA_50GBASE_LR, + NFP_MEDIA_50GBASE_ER, + NFP_MEDIA_50GBASE_FR, + NFP_MEDIA_100GBASE_KR4, + NFP_MEDIA_100GBASE_SR4, + NFP_MEDIA_100GBASE_CR4, + NFP_MEDIA_100GBASE_KP4, + NFP_MEDIA_100GBASE_CR10, + NFP_MEDIA_LINK_MODES_NUMBER +}; + +#define NFP_FEC_AUTO BIT(NFP_FEC_AUTO_BIT) +#define NFP_FEC_BASER BIT(NFP_FEC_BASER_BIT) +#define NFP_FEC_REED_SOLOMON BIT(NFP_FEC_REED_SOLOMON_BIT) +#define NFP_FEC_DISABLED BIT(NFP_FEC_DISABLED_BIT) + +/* Defines the valid values of the 'abi_drv_reset' hwinfo key */ +#define NFP_NSP_DRV_RESET_DISK 0 +#define NFP_NSP_DRV_RESET_ALWAYS 1 +#define NFP_NSP_DRV_RESET_NEVER 2 +#define NFP_NSP_DRV_RESET_DEFAULT "0" + +/* Defines the valid values of the 'app_fw_from_flash' hwinfo key */ +#define NFP_NSP_APP_FW_LOAD_DISK 0 +#define NFP_NSP_APP_FW_LOAD_FLASH 1 +#define NFP_NSP_APP_FW_LOAD_PREF 2 +#define NFP_NSP_APP_FW_LOAD_DEFAULT "2" + +/* Define the default value for the 'abi_drv_load_ifc' key */ +#define NFP_NSP_DRV_LOAD_IFC_DEFAULT "0x10ff" + +/** + * struct nfp_eth_table - ETH table information + * @count: number of table entries + * @max_index: max of @index fields of all @ports + * @ports: table of ports + * + * @ports.eth_index: port index according to legacy ethX numbering + * @ports.index: chip-wide first channel index + * @ports.nbi: NBI index + * @ports.base: first channel index (within NBI) + * @ports.lanes: number of channels + * @ports.speed: interface speed (in Mbps) + * @ports.interface: interface (module) plugged in + * @ports.media: media type of the @interface + * @ports.fec: forward error correction mode + * @ports.act_fec: active forward error correction mode + * @ports.aneg: auto negotiation mode + * @ports.mac_addr: interface MAC address + * @ports.label_port: port id + * @ports.label_subport: id of interface within port (for split ports) + * @ports.enabled: is enabled? + * @ports.tx_enabled: is TX enabled? + * @ports.rx_enabled: is RX enabled? + * @ports.override_changed: is media reconfig pending? + * + * @ports.port_type: one of %PORT_* defines for ethtool + * @ports.port_lanes: total number of lanes on the port (sum of lanes of all + * subports) + * @ports.is_split: is interface part of a split port + * @ports.fec_modes_supported: bitmap of FEC modes supported + */ +struct nfp_eth_table { + unsigned int count; + unsigned int max_index; + struct nfp_eth_table_port { + unsigned int eth_index; + unsigned int index; + unsigned int nbi; + unsigned int base; + unsigned int lanes; + unsigned int speed; + + unsigned int interface; + enum nfp_eth_media media; + + enum nfp_eth_fec fec; + enum nfp_eth_fec act_fec; + enum nfp_eth_aneg aneg; + + u8 mac_addr[ETH_ALEN]; + + u8 label_port; + u8 label_subport; + + bool enabled; + bool tx_enabled; + bool rx_enabled; + bool supp_aneg; + + bool override_changed; + + /* Computed fields */ + u8 port_type; + + unsigned int port_lanes; + + bool is_split; + + unsigned int fec_modes_supported; + } ports[]; +}; + +struct nfp_eth_table *nfp_eth_read_ports(struct nfp_cpp *cpp); +struct nfp_eth_table * +__nfp_eth_read_ports(struct nfp_cpp *cpp, struct nfp_nsp *nsp); + +int nfp_eth_set_mod_enable(struct nfp_cpp *cpp, unsigned int idx, bool enable); +int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, + bool configed); +int +nfp_eth_set_fec(struct nfp_cpp *cpp, unsigned int idx, enum nfp_eth_fec mode); + +int nfp_eth_set_idmode(struct nfp_cpp *cpp, unsigned int idx, bool state); + +static inline bool nfp_eth_can_support_fec(struct nfp_eth_table_port *eth_port) +{ + return !!eth_port->fec_modes_supported; +} + +static inline unsigned int +nfp_eth_supported_fec_modes(struct nfp_eth_table_port *eth_port) +{ + return eth_port->fec_modes_supported; +} + +struct nfp_nsp *nfp_eth_config_start(struct nfp_cpp *cpp, unsigned int idx); +int nfp_eth_config_commit_end(struct nfp_nsp *nsp); +void nfp_eth_config_cleanup_end(struct nfp_nsp *nsp); + +int __nfp_eth_set_aneg(struct nfp_nsp *nsp, enum nfp_eth_aneg mode); +int __nfp_eth_set_speed(struct nfp_nsp *nsp, unsigned int speed); +int __nfp_eth_set_split(struct nfp_nsp *nsp, unsigned int lanes); + +/** + * struct nfp_nsp_identify - NSP static information + * @version: opaque version string + * @flags: version flags + * @br_primary: branch id of primary bootloader + * @br_secondary: branch id of secondary bootloader + * @br_nsp: branch id of NSP + * @primary: version of primarary bootloader + * @secondary: version id of secondary bootloader + * @nsp: version id of NSP + * @sensor_mask: mask of present sensors available on NIC + */ +struct nfp_nsp_identify { + char version[40]; + u8 flags; + u8 br_primary; + u8 br_secondary; + u8 br_nsp; + u16 primary; + u16 secondary; + u16 nsp; + u64 sensor_mask; +}; + +struct nfp_nsp_identify *__nfp_nsp_identify(struct nfp_nsp *nsp); + +enum nfp_nsp_sensor_id { + NFP_SENSOR_CHIP_TEMPERATURE, + NFP_SENSOR_ASSEMBLY_POWER, + NFP_SENSOR_ASSEMBLY_12V_POWER, + NFP_SENSOR_ASSEMBLY_3V3_POWER, +}; + +int nfp_hwmon_read_sensor(struct nfp_cpp *cpp, enum nfp_nsp_sensor_id id, + long *val); + +struct nfp_eth_media_buf { + u8 eth_index; + u8 reserved[7]; + __le64 supported_modes[2]; + __le64 advertised_modes[2]; +}; + +int nfp_nsp_read_media(struct nfp_nsp *state, void *buf, unsigned int size); +int nfp_eth_read_media(struct nfp_cpp *cpp, struct nfp_eth_media_buf *ethm); + +#define NFP_NSP_VERSION_BUFSZ 1024 /* reasonable size, not in the ABI */ + +enum nfp_nsp_versions { + NFP_VERSIONS_BSP, + NFP_VERSIONS_CPLD, + NFP_VERSIONS_APP, + NFP_VERSIONS_BUNDLE, + NFP_VERSIONS_UNDI, + NFP_VERSIONS_NCSI, + NFP_VERSIONS_CFGR, +}; + +int nfp_nsp_versions(struct nfp_nsp *state, void *buf, unsigned int size); +const char *nfp_nsp_versions_get(enum nfp_nsp_versions id, bool flash, + const u8 *buf, unsigned int size); +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_cmds.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_cmds.c new file mode 100644 index 000000000..0997d1271 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_cmds.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017 Netronome Systems, Inc. */ + +#include <linux/kernel.h> +#include <linux/slab.h> + +#include "nfp.h" +#include "nfp_nsp.h" + +struct nsp_identify { + u8 version[40]; + u8 flags; + u8 br_primary; + u8 br_secondary; + u8 br_nsp; + __le16 primary; + __le16 secondary; + __le16 nsp; + u8 reserved[6]; + __le64 sensor_mask; +}; + +struct nfp_nsp_identify *__nfp_nsp_identify(struct nfp_nsp *nsp) +{ + struct nfp_nsp_identify *nspi = NULL; + struct nsp_identify *ni; + int ret; + + if (nfp_nsp_get_abi_ver_minor(nsp) < 15) + return NULL; + + ni = kzalloc(sizeof(*ni), GFP_KERNEL); + if (!ni) + return NULL; + + ret = nfp_nsp_read_identify(nsp, ni, sizeof(*ni)); + if (ret < 0) { + nfp_err(nfp_nsp_cpp(nsp), "reading bsp version failed %d\n", + ret); + goto exit_free; + } + + nspi = kzalloc(sizeof(*nspi), GFP_KERNEL); + if (!nspi) + goto exit_free; + + memcpy(nspi->version, ni->version, sizeof(nspi->version)); + nspi->version[sizeof(nspi->version) - 1] = '\0'; + nspi->flags = ni->flags; + nspi->br_primary = ni->br_primary; + nspi->br_secondary = ni->br_secondary; + nspi->br_nsp = ni->br_nsp; + nspi->primary = le16_to_cpu(ni->primary); + nspi->secondary = le16_to_cpu(ni->secondary); + nspi->nsp = le16_to_cpu(ni->nsp); + nspi->sensor_mask = le64_to_cpu(ni->sensor_mask); + +exit_free: + kfree(ni); + return nspi; +} + +struct nfp_sensors { + __le32 chip_temp; + __le32 assembly_power; + __le32 assembly_12v_power; + __le32 assembly_3v3_power; +}; + +int nfp_hwmon_read_sensor(struct nfp_cpp *cpp, enum nfp_nsp_sensor_id id, + long *val) +{ + struct nfp_sensors s; + struct nfp_nsp *nsp; + int ret; + + nsp = nfp_nsp_open(cpp); + if (IS_ERR(nsp)) + return PTR_ERR(nsp); + + ret = nfp_nsp_read_sensors(nsp, BIT(id), &s, sizeof(s)); + nfp_nsp_close(nsp); + + if (ret < 0) + return ret; + + switch (id) { + case NFP_SENSOR_CHIP_TEMPERATURE: + *val = le32_to_cpu(s.chip_temp); + break; + case NFP_SENSOR_ASSEMBLY_POWER: + *val = le32_to_cpu(s.assembly_power); + break; + case NFP_SENSOR_ASSEMBLY_12V_POWER: + *val = le32_to_cpu(s.assembly_12v_power); + break; + case NFP_SENSOR_ASSEMBLY_3V3_POWER: + *val = le32_to_cpu(s.assembly_3v3_power); + break; + default: + return -EINVAL; + } + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c new file mode 100644 index 000000000..570ac1bb2 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c @@ -0,0 +1,675 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2017 Netronome Systems, Inc. */ + +/* Authors: David Brunecz <david.brunecz@netronome.com> + * Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason Mcmullan <jason.mcmullan@netronome.com> + */ + +#include <linux/bitfield.h> +#include <linux/ethtool.h> +#include <linux/if_ether.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include "nfp.h" +#include "nfp_nsp.h" +#include "nfp6000/nfp6000.h" + +#define NSP_ETH_NBI_PORT_COUNT 24 +#define NSP_ETH_MAX_COUNT (2 * NSP_ETH_NBI_PORT_COUNT) +#define NSP_ETH_TABLE_SIZE (NSP_ETH_MAX_COUNT * \ + sizeof(union eth_table_entry)) + +#define NSP_ETH_PORT_LANES GENMASK_ULL(3, 0) +#define NSP_ETH_PORT_INDEX GENMASK_ULL(15, 8) +#define NSP_ETH_PORT_LABEL GENMASK_ULL(53, 48) +#define NSP_ETH_PORT_PHYLABEL GENMASK_ULL(59, 54) +#define NSP_ETH_PORT_FEC_SUPP_BASER BIT_ULL(60) +#define NSP_ETH_PORT_FEC_SUPP_RS BIT_ULL(61) +#define NSP_ETH_PORT_SUPP_ANEG BIT_ULL(63) + +#define NSP_ETH_PORT_LANES_MASK cpu_to_le64(NSP_ETH_PORT_LANES) + +#define NSP_ETH_STATE_CONFIGURED BIT_ULL(0) +#define NSP_ETH_STATE_ENABLED BIT_ULL(1) +#define NSP_ETH_STATE_TX_ENABLED BIT_ULL(2) +#define NSP_ETH_STATE_RX_ENABLED BIT_ULL(3) +#define NSP_ETH_STATE_RATE GENMASK_ULL(11, 8) +#define NSP_ETH_STATE_INTERFACE GENMASK_ULL(19, 12) +#define NSP_ETH_STATE_MEDIA GENMASK_ULL(21, 20) +#define NSP_ETH_STATE_OVRD_CHNG BIT_ULL(22) +#define NSP_ETH_STATE_ANEG GENMASK_ULL(25, 23) +#define NSP_ETH_STATE_FEC GENMASK_ULL(27, 26) +#define NSP_ETH_STATE_ACT_FEC GENMASK_ULL(29, 28) + +#define NSP_ETH_CTRL_CONFIGURED BIT_ULL(0) +#define NSP_ETH_CTRL_ENABLED BIT_ULL(1) +#define NSP_ETH_CTRL_TX_ENABLED BIT_ULL(2) +#define NSP_ETH_CTRL_RX_ENABLED BIT_ULL(3) +#define NSP_ETH_CTRL_SET_RATE BIT_ULL(4) +#define NSP_ETH_CTRL_SET_LANES BIT_ULL(5) +#define NSP_ETH_CTRL_SET_ANEG BIT_ULL(6) +#define NSP_ETH_CTRL_SET_FEC BIT_ULL(7) +#define NSP_ETH_CTRL_SET_IDMODE BIT_ULL(8) + +enum nfp_eth_raw { + NSP_ETH_RAW_PORT = 0, + NSP_ETH_RAW_STATE, + NSP_ETH_RAW_MAC, + NSP_ETH_RAW_CONTROL, + + NSP_ETH_NUM_RAW +}; + +enum nfp_eth_rate { + RATE_INVALID = 0, + RATE_10M, + RATE_100M, + RATE_1G, + RATE_10G, + RATE_25G, +}; + +union eth_table_entry { + struct { + __le64 port; + __le64 state; + u8 mac_addr[6]; + u8 resv[2]; + __le64 control; + }; + __le64 raw[NSP_ETH_NUM_RAW]; +}; + +static const struct { + enum nfp_eth_rate rate; + unsigned int speed; +} nsp_eth_rate_tbl[] = { + { RATE_INVALID, 0, }, + { RATE_10M, SPEED_10, }, + { RATE_100M, SPEED_100, }, + { RATE_1G, SPEED_1000, }, + { RATE_10G, SPEED_10000, }, + { RATE_25G, SPEED_25000, }, +}; + +static unsigned int nfp_eth_rate2speed(enum nfp_eth_rate rate) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(nsp_eth_rate_tbl); i++) + if (nsp_eth_rate_tbl[i].rate == rate) + return nsp_eth_rate_tbl[i].speed; + + return 0; +} + +static unsigned int nfp_eth_speed2rate(unsigned int speed) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(nsp_eth_rate_tbl); i++) + if (nsp_eth_rate_tbl[i].speed == speed) + return nsp_eth_rate_tbl[i].rate; + + return RATE_INVALID; +} + +static void nfp_eth_copy_mac_reverse(u8 *dst, const u8 *src) +{ + int i; + + for (i = 0; i < ETH_ALEN; i++) + dst[ETH_ALEN - i - 1] = src[i]; +} + +static void +nfp_eth_port_translate(struct nfp_nsp *nsp, const union eth_table_entry *src, + unsigned int index, struct nfp_eth_table_port *dst) +{ + unsigned int rate; + unsigned int fec; + u64 port, state; + + port = le64_to_cpu(src->port); + state = le64_to_cpu(src->state); + + dst->eth_index = FIELD_GET(NSP_ETH_PORT_INDEX, port); + dst->index = index; + dst->nbi = index / NSP_ETH_NBI_PORT_COUNT; + dst->base = index % NSP_ETH_NBI_PORT_COUNT; + dst->lanes = FIELD_GET(NSP_ETH_PORT_LANES, port); + + dst->enabled = FIELD_GET(NSP_ETH_STATE_ENABLED, state); + dst->tx_enabled = FIELD_GET(NSP_ETH_STATE_TX_ENABLED, state); + dst->rx_enabled = FIELD_GET(NSP_ETH_STATE_RX_ENABLED, state); + + rate = nfp_eth_rate2speed(FIELD_GET(NSP_ETH_STATE_RATE, state)); + dst->speed = dst->lanes * rate; + + dst->interface = FIELD_GET(NSP_ETH_STATE_INTERFACE, state); + dst->media = FIELD_GET(NSP_ETH_STATE_MEDIA, state); + + nfp_eth_copy_mac_reverse(dst->mac_addr, src->mac_addr); + + dst->label_port = FIELD_GET(NSP_ETH_PORT_PHYLABEL, port); + dst->label_subport = FIELD_GET(NSP_ETH_PORT_LABEL, port); + + if (nfp_nsp_get_abi_ver_minor(nsp) < 17) + return; + + dst->override_changed = FIELD_GET(NSP_ETH_STATE_OVRD_CHNG, state); + dst->aneg = FIELD_GET(NSP_ETH_STATE_ANEG, state); + + if (nfp_nsp_get_abi_ver_minor(nsp) < 22) + return; + + fec = FIELD_GET(NSP_ETH_PORT_FEC_SUPP_BASER, port); + dst->fec_modes_supported |= fec << NFP_FEC_BASER_BIT; + fec = FIELD_GET(NSP_ETH_PORT_FEC_SUPP_RS, port); + dst->fec_modes_supported |= fec << NFP_FEC_REED_SOLOMON_BIT; + if (dst->fec_modes_supported) + dst->fec_modes_supported |= NFP_FEC_AUTO | NFP_FEC_DISABLED; + + dst->fec = FIELD_GET(NSP_ETH_STATE_FEC, state); + dst->act_fec = dst->fec; + + if (nfp_nsp_get_abi_ver_minor(nsp) < 33) + return; + + dst->act_fec = FIELD_GET(NSP_ETH_STATE_ACT_FEC, state); + dst->supp_aneg = FIELD_GET(NSP_ETH_PORT_SUPP_ANEG, port); +} + +static void +nfp_eth_calc_port_geometry(struct nfp_cpp *cpp, struct nfp_eth_table *table) +{ + unsigned int i, j; + + for (i = 0; i < table->count; i++) { + table->max_index = max(table->max_index, table->ports[i].index); + + for (j = 0; j < table->count; j++) { + if (table->ports[i].label_port != + table->ports[j].label_port) + continue; + table->ports[i].port_lanes += table->ports[j].lanes; + + if (i == j) + continue; + if (table->ports[i].label_subport == + table->ports[j].label_subport) + nfp_warn(cpp, + "Port %d subport %d is a duplicate\n", + table->ports[i].label_port, + table->ports[i].label_subport); + + table->ports[i].is_split = true; + } + } +} + +static void +nfp_eth_calc_port_type(struct nfp_cpp *cpp, struct nfp_eth_table_port *entry) +{ + if (entry->interface == NFP_INTERFACE_NONE) { + entry->port_type = PORT_NONE; + return; + } else if (entry->interface == NFP_INTERFACE_RJ45) { + entry->port_type = PORT_TP; + return; + } + + if (entry->media == NFP_MEDIA_FIBRE) + entry->port_type = PORT_FIBRE; + else + entry->port_type = PORT_DA; +} + +/** + * nfp_eth_read_ports() - retrieve port information + * @cpp: NFP CPP handle + * + * Read the port information from the device. Returned structure should + * be freed with kfree() once no longer needed. + * + * Return: populated ETH table or NULL on error. + */ +struct nfp_eth_table *nfp_eth_read_ports(struct nfp_cpp *cpp) +{ + struct nfp_eth_table *ret; + struct nfp_nsp *nsp; + + nsp = nfp_nsp_open(cpp); + if (IS_ERR(nsp)) + return NULL; + + ret = __nfp_eth_read_ports(cpp, nsp); + nfp_nsp_close(nsp); + + return ret; +} + +struct nfp_eth_table * +__nfp_eth_read_ports(struct nfp_cpp *cpp, struct nfp_nsp *nsp) +{ + union eth_table_entry *entries; + struct nfp_eth_table *table; + int i, j, ret, cnt = 0; + + entries = kzalloc(NSP_ETH_TABLE_SIZE, GFP_KERNEL); + if (!entries) + return NULL; + + ret = nfp_nsp_read_eth_table(nsp, entries, NSP_ETH_TABLE_SIZE); + if (ret < 0) { + nfp_err(cpp, "reading port table failed %d\n", ret); + goto err; + } + + for (i = 0; i < NSP_ETH_MAX_COUNT; i++) + if (entries[i].port & NSP_ETH_PORT_LANES_MASK) + cnt++; + + /* Some versions of flash will give us 0 instead of port count. + * For those that give a port count, verify it against the value + * calculated above. + */ + if (ret && ret != cnt) { + nfp_err(cpp, "table entry count reported (%d) does not match entries present (%d)\n", + ret, cnt); + goto err; + } + + table = kzalloc(struct_size(table, ports, cnt), GFP_KERNEL); + if (!table) + goto err; + + table->count = cnt; + for (i = 0, j = 0; i < NSP_ETH_MAX_COUNT; i++) + if (entries[i].port & NSP_ETH_PORT_LANES_MASK) + nfp_eth_port_translate(nsp, &entries[i], i, + &table->ports[j++]); + + nfp_eth_calc_port_geometry(cpp, table); + for (i = 0; i < table->count; i++) + nfp_eth_calc_port_type(cpp, &table->ports[i]); + + kfree(entries); + + return table; + +err: + kfree(entries); + return NULL; +} + +struct nfp_nsp *nfp_eth_config_start(struct nfp_cpp *cpp, unsigned int idx) +{ + union eth_table_entry *entries; + struct nfp_nsp *nsp; + int ret; + + entries = kzalloc(NSP_ETH_TABLE_SIZE, GFP_KERNEL); + if (!entries) + return ERR_PTR(-ENOMEM); + + nsp = nfp_nsp_open(cpp); + if (IS_ERR(nsp)) { + kfree(entries); + return nsp; + } + + ret = nfp_nsp_read_eth_table(nsp, entries, NSP_ETH_TABLE_SIZE); + if (ret < 0) { + nfp_err(cpp, "reading port table failed %d\n", ret); + goto err; + } + + if (!(entries[idx].port & NSP_ETH_PORT_LANES_MASK)) { + nfp_warn(cpp, "trying to set port state on disabled port %d\n", + idx); + goto err; + } + + nfp_nsp_config_set_state(nsp, entries, idx); + return nsp; + +err: + nfp_nsp_close(nsp); + kfree(entries); + return ERR_PTR(-EIO); +} + +void nfp_eth_config_cleanup_end(struct nfp_nsp *nsp) +{ + union eth_table_entry *entries = nfp_nsp_config_entries(nsp); + + nfp_nsp_config_set_modified(nsp, false); + nfp_nsp_config_clear_state(nsp); + nfp_nsp_close(nsp); + kfree(entries); +} + +/** + * nfp_eth_config_commit_end() - perform recorded configuration changes + * @nsp: NFP NSP handle returned from nfp_eth_config_start() + * + * Perform the configuration which was requested with __nfp_eth_set_*() + * helpers and recorded in @nsp state. If device was already configured + * as requested or no __nfp_eth_set_*() operations were made no NSP command + * will be performed. + * + * Return: + * 0 - configuration successful; + * 1 - no changes were needed; + * -ERRNO - configuration failed. + */ +int nfp_eth_config_commit_end(struct nfp_nsp *nsp) +{ + union eth_table_entry *entries = nfp_nsp_config_entries(nsp); + int ret = 1; + + if (nfp_nsp_config_modified(nsp)) { + ret = nfp_nsp_write_eth_table(nsp, entries, NSP_ETH_TABLE_SIZE); + ret = ret < 0 ? ret : 0; + } + + nfp_eth_config_cleanup_end(nsp); + + return ret; +} + +/** + * nfp_eth_set_mod_enable() - set PHY module enable control bit + * @cpp: NFP CPP handle + * @idx: NFP chip-wide port index + * @enable: Desired state + * + * Enable or disable PHY module (this usually means setting the TX lanes + * disable bits). + * + * Return: + * 0 - configuration successful; + * 1 - no changes were needed; + * -ERRNO - configuration failed. + */ +int nfp_eth_set_mod_enable(struct nfp_cpp *cpp, unsigned int idx, bool enable) +{ + union eth_table_entry *entries; + struct nfp_nsp *nsp; + u64 reg; + + nsp = nfp_eth_config_start(cpp, idx); + if (IS_ERR(nsp)) + return PTR_ERR(nsp); + + entries = nfp_nsp_config_entries(nsp); + + /* Check if we are already in requested state */ + reg = le64_to_cpu(entries[idx].state); + if (enable != FIELD_GET(NSP_ETH_CTRL_ENABLED, reg)) { + reg = le64_to_cpu(entries[idx].control); + reg &= ~NSP_ETH_CTRL_ENABLED; + reg |= FIELD_PREP(NSP_ETH_CTRL_ENABLED, enable); + entries[idx].control = cpu_to_le64(reg); + + nfp_nsp_config_set_modified(nsp, true); + } + + return nfp_eth_config_commit_end(nsp); +} + +/** + * nfp_eth_set_configured() - set PHY module configured control bit + * @cpp: NFP CPP handle + * @idx: NFP chip-wide port index + * @configed: Desired state + * + * Set the ifup/ifdown state on the PHY. + * + * Return: + * 0 - configuration successful; + * 1 - no changes were needed; + * -ERRNO - configuration failed. + */ +int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, bool configed) +{ + union eth_table_entry *entries; + struct nfp_nsp *nsp; + u64 reg; + + nsp = nfp_eth_config_start(cpp, idx); + if (IS_ERR(nsp)) + return PTR_ERR(nsp); + + /* Older ABI versions did support this feature, however this has only + * been reliable since ABI 20. + */ + if (nfp_nsp_get_abi_ver_minor(nsp) < 20) { + nfp_eth_config_cleanup_end(nsp); + return -EOPNOTSUPP; + } + + entries = nfp_nsp_config_entries(nsp); + + /* Check if we are already in requested state */ + reg = le64_to_cpu(entries[idx].state); + if (configed != FIELD_GET(NSP_ETH_STATE_CONFIGURED, reg)) { + reg = le64_to_cpu(entries[idx].control); + reg &= ~NSP_ETH_CTRL_CONFIGURED; + reg |= FIELD_PREP(NSP_ETH_CTRL_CONFIGURED, configed); + entries[idx].control = cpu_to_le64(reg); + + nfp_nsp_config_set_modified(nsp, true); + } + + return nfp_eth_config_commit_end(nsp); +} + +static int +nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx, + const u64 mask, const unsigned int shift, + unsigned int val, const u64 ctrl_bit) +{ + union eth_table_entry *entries = nfp_nsp_config_entries(nsp); + unsigned int idx = nfp_nsp_config_idx(nsp); + u64 reg; + + /* Note: set features were added in ABI 0.14 but the error + * codes were initially not populated correctly. + */ + if (nfp_nsp_get_abi_ver_minor(nsp) < 17) { + nfp_err(nfp_nsp_cpp(nsp), + "set operations not supported, please update flash\n"); + return -EOPNOTSUPP; + } + + /* Check if we are already in requested state */ + reg = le64_to_cpu(entries[idx].raw[raw_idx]); + if (val == (reg & mask) >> shift) + return 0; + + reg &= ~mask; + reg |= (val << shift) & mask; + entries[idx].raw[raw_idx] = cpu_to_le64(reg); + + entries[idx].control |= cpu_to_le64(ctrl_bit); + + nfp_nsp_config_set_modified(nsp, true); + + return 0; +} + +int nfp_eth_set_idmode(struct nfp_cpp *cpp, unsigned int idx, bool state) +{ + union eth_table_entry *entries; + struct nfp_nsp *nsp; + u64 reg; + + nsp = nfp_eth_config_start(cpp, idx); + if (IS_ERR(nsp)) + return PTR_ERR(nsp); + + /* Set this features were added in ABI 0.32 */ + if (nfp_nsp_get_abi_ver_minor(nsp) < 32) { + nfp_err(nfp_nsp_cpp(nsp), + "set id mode operation not supported, please update flash\n"); + nfp_eth_config_cleanup_end(nsp); + return -EOPNOTSUPP; + } + + entries = nfp_nsp_config_entries(nsp); + + reg = le64_to_cpu(entries[idx].control); + reg &= ~NSP_ETH_CTRL_SET_IDMODE; + reg |= FIELD_PREP(NSP_ETH_CTRL_SET_IDMODE, state); + entries[idx].control = cpu_to_le64(reg); + + nfp_nsp_config_set_modified(nsp, true); + + return nfp_eth_config_commit_end(nsp); +} + +#define NFP_ETH_SET_BIT_CONFIG(nsp, raw_idx, mask, val, ctrl_bit) \ + ({ \ + __BF_FIELD_CHECK(mask, 0ULL, val, "NFP_ETH_SET_BIT_CONFIG: "); \ + nfp_eth_set_bit_config(nsp, raw_idx, mask, __bf_shf(mask), \ + val, ctrl_bit); \ + }) + +/** + * __nfp_eth_set_aneg() - set PHY autonegotiation control bit + * @nsp: NFP NSP handle returned from nfp_eth_config_start() + * @mode: Desired autonegotiation mode + * + * Allow/disallow PHY module to advertise/perform autonegotiation. + * Will write to hwinfo overrides in the flash (persistent config). + * + * Return: 0 or -ERRNO. + */ +int __nfp_eth_set_aneg(struct nfp_nsp *nsp, enum nfp_eth_aneg mode) +{ + return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_STATE, + NSP_ETH_STATE_ANEG, mode, + NSP_ETH_CTRL_SET_ANEG); +} + +/** + * __nfp_eth_set_fec() - set PHY forward error correction control bit + * @nsp: NFP NSP handle returned from nfp_eth_config_start() + * @mode: Desired fec mode + * + * Set the PHY module forward error correction mode. + * Will write to hwinfo overrides in the flash (persistent config). + * + * Return: 0 or -ERRNO. + */ +static int __nfp_eth_set_fec(struct nfp_nsp *nsp, enum nfp_eth_fec mode) +{ + return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_STATE, + NSP_ETH_STATE_FEC, mode, + NSP_ETH_CTRL_SET_FEC); +} + +/** + * nfp_eth_set_fec() - set PHY forward error correction control mode + * @cpp: NFP CPP handle + * @idx: NFP chip-wide port index + * @mode: Desired fec mode + * + * Return: + * 0 - configuration successful; + * 1 - no changes were needed; + * -ERRNO - configuration failed. + */ +int +nfp_eth_set_fec(struct nfp_cpp *cpp, unsigned int idx, enum nfp_eth_fec mode) +{ + struct nfp_nsp *nsp; + int err; + + nsp = nfp_eth_config_start(cpp, idx); + if (IS_ERR(nsp)) + return PTR_ERR(nsp); + + err = __nfp_eth_set_fec(nsp, mode); + if (err) { + nfp_eth_config_cleanup_end(nsp); + return err; + } + + return nfp_eth_config_commit_end(nsp); +} + +/** + * __nfp_eth_set_speed() - set interface speed/rate + * @nsp: NFP NSP handle returned from nfp_eth_config_start() + * @speed: Desired speed (per lane) + * + * Set lane speed. Provided @speed value should be subport speed divided + * by number of lanes this subport is spanning (i.e. 10000 for 40G, 25000 for + * 50G, etc.) + * Will write to hwinfo overrides in the flash (persistent config). + * + * Return: 0 or -ERRNO. + */ +int __nfp_eth_set_speed(struct nfp_nsp *nsp, unsigned int speed) +{ + enum nfp_eth_rate rate; + + rate = nfp_eth_speed2rate(speed); + if (rate == RATE_INVALID) { + nfp_warn(nfp_nsp_cpp(nsp), + "could not find matching lane rate for speed %u\n", + speed); + return -EINVAL; + } + + return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_STATE, + NSP_ETH_STATE_RATE, rate, + NSP_ETH_CTRL_SET_RATE); +} + +/** + * __nfp_eth_set_split() - set interface lane split + * @nsp: NFP NSP handle returned from nfp_eth_config_start() + * @lanes: Desired lanes per port + * + * Set number of lanes in the port. + * Will write to hwinfo overrides in the flash (persistent config). + * + * Return: 0 or -ERRNO. + */ +int __nfp_eth_set_split(struct nfp_nsp *nsp, unsigned int lanes) +{ + return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_PORT, NSP_ETH_PORT_LANES, + lanes, NSP_ETH_CTRL_SET_LANES); +} + +int nfp_eth_read_media(struct nfp_cpp *cpp, struct nfp_eth_media_buf *ethm) +{ + struct nfp_nsp *nsp; + int ret; + + nsp = nfp_nsp_open(cpp); + if (IS_ERR(nsp)) { + nfp_err(cpp, "Failed to access the NSP: %pe\n", nsp); + return PTR_ERR(nsp); + } + + if (!nfp_nsp_has_read_media(nsp)) { + nfp_warn(cpp, "Reading media link modes not supported. Please update flash\n"); + ret = -EOPNOTSUPP; + goto exit_close_nsp; + } + + ret = nfp_nsp_read_media(nsp, ethm, sizeof(*ethm)); + if (ret) + nfp_err(cpp, "Reading media link modes failed: %pe\n", ERR_PTR(ret)); + +exit_close_nsp: + nfp_nsp_close(nsp); + return ret; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c new file mode 100644 index 000000000..ce7492a6a --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c @@ -0,0 +1,366 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +/* + * nfp_resource.c + * Author: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + */ +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/slab.h> + +#include "crc32.h" +#include "nfp.h" +#include "nfp_cpp.h" +#include "nfp6000/nfp6000.h" + +#define NFP_RESOURCE_TBL_TARGET NFP_CPP_TARGET_MU +#define NFP_RESOURCE_TBL_BASE 0x8100000000ULL + +/* NFP Resource Table self-identifier */ +#define NFP_RESOURCE_TBL_NAME "nfp.res" +#define NFP_RESOURCE_TBL_KEY 0x00000000 /* Special key for entry 0 */ + +#define NFP_RESOURCE_ENTRY_NAME_SZ 8 + +/** + * struct nfp_resource_entry - Resource table entry + * @mutex: NFP CPP Lock + * @mutex.owner: NFP CPP Lock, interface owner + * @mutex.key: NFP CPP Lock, posix_crc32(name, 8) + * @region: Memory region descriptor + * @region.name: ASCII, zero padded name + * @region.reserved: padding + * @region.cpp_action: CPP Action + * @region.cpp_token: CPP Token + * @region.cpp_target: CPP Target ID + * @region.page_offset: 256-byte page offset into target's CPP address + * @region.page_size: size, in 256-byte pages + */ +struct nfp_resource_entry { + struct nfp_resource_entry_mutex { + u32 owner; + u32 key; + } mutex; + struct nfp_resource_entry_region { + u8 name[NFP_RESOURCE_ENTRY_NAME_SZ]; + u8 reserved[5]; + u8 cpp_action; + u8 cpp_token; + u8 cpp_target; + u32 page_offset; + u32 page_size; + } region; +}; + +#define NFP_RESOURCE_TBL_SIZE 4096 +#define NFP_RESOURCE_TBL_ENTRIES (NFP_RESOURCE_TBL_SIZE / \ + sizeof(struct nfp_resource_entry)) + +struct nfp_resource { + char name[NFP_RESOURCE_ENTRY_NAME_SZ + 1]; + u32 cpp_id; + u64 addr; + u64 size; + struct nfp_cpp_mutex *mutex; +}; + +static int nfp_cpp_resource_find(struct nfp_cpp *cpp, struct nfp_resource *res) +{ + struct nfp_resource_entry entry; + u32 cpp_id, key; + int ret, i; + + cpp_id = NFP_CPP_ID(NFP_RESOURCE_TBL_TARGET, 3, 0); /* Atomic read */ + + /* Search for a matching entry */ + if (!strcmp(res->name, NFP_RESOURCE_TBL_NAME)) { + nfp_err(cpp, "Grabbing device lock not supported\n"); + return -EOPNOTSUPP; + } + key = crc32_posix(res->name, NFP_RESOURCE_ENTRY_NAME_SZ); + + for (i = 0; i < NFP_RESOURCE_TBL_ENTRIES; i++) { + u64 addr = NFP_RESOURCE_TBL_BASE + + sizeof(struct nfp_resource_entry) * i; + + ret = nfp_cpp_read(cpp, cpp_id, addr, &entry, sizeof(entry)); + if (ret != sizeof(entry)) + return -EIO; + + if (entry.mutex.key != key) + continue; + + /* Found key! */ + res->mutex = + nfp_cpp_mutex_alloc(cpp, + NFP_RESOURCE_TBL_TARGET, addr, key); + res->cpp_id = NFP_CPP_ID(entry.region.cpp_target, + entry.region.cpp_action, + entry.region.cpp_token); + res->addr = (u64)entry.region.page_offset << 8; + res->size = (u64)entry.region.page_size << 8; + + return 0; + } + + return -ENOENT; +} + +static int +nfp_resource_try_acquire(struct nfp_cpp *cpp, struct nfp_resource *res, + struct nfp_cpp_mutex *dev_mutex) +{ + int err; + + if (nfp_cpp_mutex_lock(dev_mutex)) + return -EINVAL; + + err = nfp_cpp_resource_find(cpp, res); + if (err) + goto err_unlock_dev; + + err = nfp_cpp_mutex_trylock(res->mutex); + if (err) + goto err_res_mutex_free; + + nfp_cpp_mutex_unlock(dev_mutex); + + return 0; + +err_res_mutex_free: + nfp_cpp_mutex_free(res->mutex); +err_unlock_dev: + nfp_cpp_mutex_unlock(dev_mutex); + + return err; +} + +/** + * nfp_resource_acquire() - Acquire a resource handle + * @cpp: NFP CPP handle + * @name: Name of the resource + * + * NOTE: This function locks the acquired resource + * + * Return: NFP Resource handle, or ERR_PTR() + */ +struct nfp_resource * +nfp_resource_acquire(struct nfp_cpp *cpp, const char *name) +{ + unsigned long warn_at = jiffies + NFP_MUTEX_WAIT_FIRST_WARN * HZ; + unsigned long err_at = jiffies + NFP_MUTEX_WAIT_ERROR * HZ; + struct nfp_cpp_mutex *dev_mutex; + struct nfp_resource *res; + int err; + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return ERR_PTR(-ENOMEM); + + strncpy(res->name, name, NFP_RESOURCE_ENTRY_NAME_SZ); + + dev_mutex = nfp_cpp_mutex_alloc(cpp, NFP_RESOURCE_TBL_TARGET, + NFP_RESOURCE_TBL_BASE, + NFP_RESOURCE_TBL_KEY); + if (!dev_mutex) { + kfree(res); + return ERR_PTR(-ENOMEM); + } + + for (;;) { + err = nfp_resource_try_acquire(cpp, res, dev_mutex); + if (!err) + break; + if (err != -EBUSY) + goto err_free; + + err = msleep_interruptible(1); + if (err != 0) { + err = -ERESTARTSYS; + goto err_free; + } + + if (time_is_before_eq_jiffies(warn_at)) { + warn_at = jiffies + NFP_MUTEX_WAIT_NEXT_WARN * HZ; + nfp_warn(cpp, "Warning: waiting for NFP resource %s\n", + name); + } + if (time_is_before_eq_jiffies(err_at)) { + nfp_err(cpp, "Error: resource %s timed out\n", name); + err = -EBUSY; + goto err_free; + } + } + + nfp_cpp_mutex_free(dev_mutex); + + return res; + +err_free: + nfp_cpp_mutex_free(dev_mutex); + kfree(res); + return ERR_PTR(err); +} + +/** + * nfp_resource_release() - Release a NFP Resource handle + * @res: NFP Resource handle + * + * NOTE: This function implictly unlocks the resource handle + */ +void nfp_resource_release(struct nfp_resource *res) +{ + nfp_cpp_mutex_unlock(res->mutex); + nfp_cpp_mutex_free(res->mutex); + kfree(res); +} + +/** + * nfp_resource_wait() - Wait for resource to appear + * @cpp: NFP CPP handle + * @name: Name of the resource + * @secs: Number of seconds to wait + * + * Wait for resource to appear in the resource table, grab and release + * its lock. The wait is jiffies-based, don't expect fine granularity. + * + * Return: 0 on success, errno otherwise. + */ +int nfp_resource_wait(struct nfp_cpp *cpp, const char *name, unsigned int secs) +{ + unsigned long warn_at = jiffies + NFP_MUTEX_WAIT_FIRST_WARN * HZ; + unsigned long err_at = jiffies + secs * HZ; + struct nfp_resource *res; + + while (true) { + res = nfp_resource_acquire(cpp, name); + if (!IS_ERR(res)) { + nfp_resource_release(res); + return 0; + } + + if (PTR_ERR(res) != -ENOENT) { + nfp_err(cpp, "error waiting for resource %s: %ld\n", + name, PTR_ERR(res)); + return PTR_ERR(res); + } + if (time_is_before_eq_jiffies(err_at)) { + nfp_err(cpp, "timeout waiting for resource %s\n", name); + return -ETIMEDOUT; + } + if (time_is_before_eq_jiffies(warn_at)) { + warn_at = jiffies + NFP_MUTEX_WAIT_NEXT_WARN * HZ; + nfp_info(cpp, "waiting for NFP resource %s\n", name); + } + if (msleep_interruptible(10)) { + nfp_err(cpp, "wait for resource %s interrupted\n", + name); + return -ERESTARTSYS; + } + } +} + +/** + * nfp_resource_cpp_id() - Return the cpp_id of a resource handle + * @res: NFP Resource handle + * + * Return: NFP CPP ID + */ +u32 nfp_resource_cpp_id(struct nfp_resource *res) +{ + return res->cpp_id; +} + +/** + * nfp_resource_name() - Return the name of a resource handle + * @res: NFP Resource handle + * + * Return: const char pointer to the name of the resource + */ +const char *nfp_resource_name(struct nfp_resource *res) +{ + return res->name; +} + +/** + * nfp_resource_address() - Return the address of a resource handle + * @res: NFP Resource handle + * + * Return: Address of the resource + */ +u64 nfp_resource_address(struct nfp_resource *res) +{ + return res->addr; +} + +/** + * nfp_resource_size() - Return the size in bytes of a resource handle + * @res: NFP Resource handle + * + * Return: Size of the resource in bytes + */ +u64 nfp_resource_size(struct nfp_resource *res) +{ + return res->size; +} + +/** + * nfp_resource_table_init() - Run initial checks on the resource table + * @cpp: NFP CPP handle + * + * Start-of-day init procedure for resource table. Must be called before + * any local resource table users may exist. + * + * Return: 0 on success, -errno on failure + */ +int nfp_resource_table_init(struct nfp_cpp *cpp) +{ + struct nfp_cpp_mutex *dev_mutex; + int i, err; + + err = nfp_cpp_mutex_reclaim(cpp, NFP_RESOURCE_TBL_TARGET, + NFP_RESOURCE_TBL_BASE); + if (err < 0) { + nfp_err(cpp, "Error: failed to reclaim resource table mutex\n"); + return err; + } + if (err) + nfp_warn(cpp, "Warning: busted main resource table mutex\n"); + + dev_mutex = nfp_cpp_mutex_alloc(cpp, NFP_RESOURCE_TBL_TARGET, + NFP_RESOURCE_TBL_BASE, + NFP_RESOURCE_TBL_KEY); + if (!dev_mutex) + return -ENOMEM; + + if (nfp_cpp_mutex_lock(dev_mutex)) { + nfp_err(cpp, "Error: failed to claim resource table mutex\n"); + nfp_cpp_mutex_free(dev_mutex); + return -EINVAL; + } + + /* Resource 0 is the dev_mutex, start from 1 */ + for (i = 1; i < NFP_RESOURCE_TBL_ENTRIES; i++) { + u64 addr = NFP_RESOURCE_TBL_BASE + + sizeof(struct nfp_resource_entry) * i; + + err = nfp_cpp_mutex_reclaim(cpp, NFP_RESOURCE_TBL_TARGET, addr); + if (err < 0) { + nfp_err(cpp, + "Error: failed to reclaim resource %d mutex\n", + i); + goto err_unlock; + } + if (err) + nfp_warn(cpp, "Warning: busted resource %d mutex\n", i); + } + + err = 0; +err_unlock: + nfp_cpp_mutex_unlock(dev_mutex); + nfp_cpp_mutex_free(dev_mutex); + + return err; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c new file mode 100644 index 000000000..2260c2403 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c @@ -0,0 +1,556 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +/* + * nfp_rtsym.c + * Interface for accessing run-time symbol table + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Espen Skoglund <espen.skoglund@netronome.com> + * Francois H. Theron <francois.theron@netronome.com> + */ + +#include <asm/unaligned.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/io-64-nonatomic-hi-lo.h> + +#include "nfp.h" +#include "nfp_cpp.h" +#include "nfp_nffw.h" +#include "nfp6000/nfp6000.h" + +/* These need to match the linker */ +#define SYM_TGT_LMEM 0 +#define SYM_TGT_EMU_CACHE 0x17 + +struct nfp_rtsym_entry { + u8 type; + u8 target; + u8 island; + u8 addr_hi; + __le32 addr_lo; + __le16 name; + u8 menum; + u8 size_hi; + __le32 size_lo; +}; + +struct nfp_rtsym_table { + struct nfp_cpp *cpp; + int num; + char *strtab; + struct nfp_rtsym symtab[]; +}; + +static int nfp_meid(u8 island_id, u8 menum) +{ + return (island_id & 0x3F) == island_id && menum < 12 ? + (island_id << 4) | (menum + 4) : -1; +} + +static void +nfp_rtsym_sw_entry_init(struct nfp_rtsym_table *cache, u32 strtab_size, + struct nfp_rtsym *sw, struct nfp_rtsym_entry *fw) +{ + sw->type = fw->type; + sw->name = cache->strtab + le16_to_cpu(fw->name) % strtab_size; + sw->addr = ((u64)fw->addr_hi << 32) | le32_to_cpu(fw->addr_lo); + sw->size = ((u64)fw->size_hi << 32) | le32_to_cpu(fw->size_lo); + + switch (fw->target) { + case SYM_TGT_LMEM: + sw->target = NFP_RTSYM_TARGET_LMEM; + break; + case SYM_TGT_EMU_CACHE: + sw->target = NFP_RTSYM_TARGET_EMU_CACHE; + break; + default: + sw->target = fw->target; + break; + } + + if (fw->menum != 0xff) + sw->domain = nfp_meid(fw->island, fw->menum); + else if (fw->island != 0xff) + sw->domain = fw->island; + else + sw->domain = -1; +} + +struct nfp_rtsym_table *nfp_rtsym_table_read(struct nfp_cpp *cpp) +{ + struct nfp_rtsym_table *rtbl; + const struct nfp_mip *mip; + + mip = nfp_mip_open(cpp); + rtbl = __nfp_rtsym_table_read(cpp, mip); + nfp_mip_close(mip); + + return rtbl; +} + +struct nfp_rtsym_table * +__nfp_rtsym_table_read(struct nfp_cpp *cpp, const struct nfp_mip *mip) +{ + const u32 dram = NFP_CPP_ID(NFP_CPP_TARGET_MU, NFP_CPP_ACTION_RW, 0) | + NFP_ISL_EMEM0; + u32 strtab_addr, symtab_addr, strtab_size, symtab_size; + struct nfp_rtsym_entry *rtsymtab; + struct nfp_rtsym_table *cache; + int err, n, size; + + if (!mip) + return NULL; + + nfp_mip_strtab(mip, &strtab_addr, &strtab_size); + nfp_mip_symtab(mip, &symtab_addr, &symtab_size); + + if (!symtab_size || !strtab_size || symtab_size % sizeof(*rtsymtab)) + return NULL; + + /* Align to 64 bits */ + symtab_size = round_up(symtab_size, 8); + strtab_size = round_up(strtab_size, 8); + + rtsymtab = kmalloc(symtab_size, GFP_KERNEL); + if (!rtsymtab) + return NULL; + + size = sizeof(*cache); + size += symtab_size / sizeof(*rtsymtab) * sizeof(struct nfp_rtsym); + size += strtab_size + 1; + cache = kmalloc(size, GFP_KERNEL); + if (!cache) + goto exit_free_rtsym_raw; + + cache->cpp = cpp; + cache->num = symtab_size / sizeof(*rtsymtab); + cache->strtab = (void *)&cache->symtab[cache->num]; + + err = nfp_cpp_read(cpp, dram, symtab_addr, rtsymtab, symtab_size); + if (err != symtab_size) + goto exit_free_cache; + + err = nfp_cpp_read(cpp, dram, strtab_addr, cache->strtab, strtab_size); + if (err != strtab_size) + goto exit_free_cache; + cache->strtab[strtab_size] = '\0'; + + for (n = 0; n < cache->num; n++) + nfp_rtsym_sw_entry_init(cache, strtab_size, + &cache->symtab[n], &rtsymtab[n]); + + kfree(rtsymtab); + + return cache; + +exit_free_cache: + kfree(cache); +exit_free_rtsym_raw: + kfree(rtsymtab); + return NULL; +} + +/** + * nfp_rtsym_count() - Get the number of RTSYM descriptors + * @rtbl: NFP RTsym table + * + * Return: Number of RTSYM descriptors + */ +int nfp_rtsym_count(struct nfp_rtsym_table *rtbl) +{ + if (!rtbl) + return -EINVAL; + return rtbl->num; +} + +/** + * nfp_rtsym_get() - Get the Nth RTSYM descriptor + * @rtbl: NFP RTsym table + * @idx: Index (0-based) of the RTSYM descriptor + * + * Return: const pointer to a struct nfp_rtsym descriptor, or NULL + */ +const struct nfp_rtsym *nfp_rtsym_get(struct nfp_rtsym_table *rtbl, int idx) +{ + if (!rtbl) + return NULL; + if (idx >= rtbl->num) + return NULL; + + return &rtbl->symtab[idx]; +} + +/** + * nfp_rtsym_lookup() - Return the RTSYM descriptor for a symbol name + * @rtbl: NFP RTsym table + * @name: Symbol name + * + * Return: const pointer to a struct nfp_rtsym descriptor, or NULL + */ +const struct nfp_rtsym * +nfp_rtsym_lookup(struct nfp_rtsym_table *rtbl, const char *name) +{ + int n; + + if (!rtbl) + return NULL; + + for (n = 0; n < rtbl->num; n++) + if (strcmp(name, rtbl->symtab[n].name) == 0) + return &rtbl->symtab[n]; + + return NULL; +} + +u64 nfp_rtsym_size(const struct nfp_rtsym *sym) +{ + switch (sym->type) { + case NFP_RTSYM_TYPE_NONE: + pr_err("rtsym '%s': type NONE\n", sym->name); + return 0; + default: + pr_warn("rtsym '%s': unknown type: %d\n", sym->name, sym->type); + fallthrough; + case NFP_RTSYM_TYPE_OBJECT: + case NFP_RTSYM_TYPE_FUNCTION: + return sym->size; + case NFP_RTSYM_TYPE_ABS: + return sizeof(u64); + } +} + +static int +nfp_rtsym_to_dest(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, + u8 action, u8 token, u64 off, u32 *cpp_id, u64 *addr) +{ + if (sym->type != NFP_RTSYM_TYPE_OBJECT) { + nfp_err(cpp, "rtsym '%s': direct access to non-object rtsym\n", + sym->name); + return -EINVAL; + } + + *addr = sym->addr + off; + + if (sym->target == NFP_RTSYM_TARGET_EMU_CACHE) { + int locality_off = nfp_cpp_mu_locality_lsb(cpp); + + *addr &= ~(NFP_MU_ADDR_ACCESS_TYPE_MASK << locality_off); + *addr |= NFP_MU_ADDR_ACCESS_TYPE_DIRECT << locality_off; + + *cpp_id = NFP_CPP_ISLAND_ID(NFP_CPP_TARGET_MU, action, token, + sym->domain); + } else if (sym->target < 0) { + nfp_err(cpp, "rtsym '%s': unhandled target encoding: %d\n", + sym->name, sym->target); + return -EINVAL; + } else { + *cpp_id = NFP_CPP_ISLAND_ID(sym->target, action, token, + sym->domain); + } + + return 0; +} + +int __nfp_rtsym_read(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, + u8 action, u8 token, u64 off, void *buf, size_t len) +{ + u64 sym_size = nfp_rtsym_size(sym); + u32 cpp_id; + u64 addr; + int err; + + if (off > sym_size) { + nfp_err(cpp, "rtsym '%s': read out of bounds: off: %lld + len: %zd > size: %lld\n", + sym->name, off, len, sym_size); + return -ENXIO; + } + len = min_t(size_t, len, sym_size - off); + + if (sym->type == NFP_RTSYM_TYPE_ABS) { + u8 tmp[8]; + + put_unaligned_le64(sym->addr, tmp); + memcpy(buf, &tmp[off], len); + + return len; + } + + err = nfp_rtsym_to_dest(cpp, sym, action, token, off, &cpp_id, &addr); + if (err) + return err; + + return nfp_cpp_read(cpp, cpp_id, addr, buf, len); +} + +int nfp_rtsym_read(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off, + void *buf, size_t len) +{ + return __nfp_rtsym_read(cpp, sym, NFP_CPP_ACTION_RW, 0, off, buf, len); +} + +int __nfp_rtsym_readl(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, + u8 action, u8 token, u64 off, u32 *value) +{ + u32 cpp_id; + u64 addr; + int err; + + if (off + 4 > nfp_rtsym_size(sym)) { + nfp_err(cpp, "rtsym '%s': readl out of bounds: off: %lld + 4 > size: %lld\n", + sym->name, off, nfp_rtsym_size(sym)); + return -ENXIO; + } + + err = nfp_rtsym_to_dest(cpp, sym, action, token, off, &cpp_id, &addr); + if (err) + return err; + + return nfp_cpp_readl(cpp, cpp_id, addr, value); +} + +int nfp_rtsym_readl(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off, + u32 *value) +{ + return __nfp_rtsym_readl(cpp, sym, NFP_CPP_ACTION_RW, 0, off, value); +} + +int __nfp_rtsym_readq(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, + u8 action, u8 token, u64 off, u64 *value) +{ + u32 cpp_id; + u64 addr; + int err; + + if (off + 8 > nfp_rtsym_size(sym)) { + nfp_err(cpp, "rtsym '%s': readq out of bounds: off: %lld + 8 > size: %lld\n", + sym->name, off, nfp_rtsym_size(sym)); + return -ENXIO; + } + + if (sym->type == NFP_RTSYM_TYPE_ABS) { + *value = sym->addr; + return 0; + } + + err = nfp_rtsym_to_dest(cpp, sym, action, token, off, &cpp_id, &addr); + if (err) + return err; + + return nfp_cpp_readq(cpp, cpp_id, addr, value); +} + +int nfp_rtsym_readq(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off, + u64 *value) +{ + return __nfp_rtsym_readq(cpp, sym, NFP_CPP_ACTION_RW, 0, off, value); +} + +int __nfp_rtsym_write(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, + u8 action, u8 token, u64 off, void *buf, size_t len) +{ + u64 sym_size = nfp_rtsym_size(sym); + u32 cpp_id; + u64 addr; + int err; + + if (off > sym_size) { + nfp_err(cpp, "rtsym '%s': write out of bounds: off: %lld + len: %zd > size: %lld\n", + sym->name, off, len, sym_size); + return -ENXIO; + } + len = min_t(size_t, len, sym_size - off); + + err = nfp_rtsym_to_dest(cpp, sym, action, token, off, &cpp_id, &addr); + if (err) + return err; + + return nfp_cpp_write(cpp, cpp_id, addr, buf, len); +} + +int nfp_rtsym_write(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off, + void *buf, size_t len) +{ + return __nfp_rtsym_write(cpp, sym, NFP_CPP_ACTION_RW, 0, off, buf, len); +} + +int __nfp_rtsym_writel(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, + u8 action, u8 token, u64 off, u32 value) +{ + u32 cpp_id; + u64 addr; + int err; + + if (off + 4 > nfp_rtsym_size(sym)) { + nfp_err(cpp, "rtsym '%s': writel out of bounds: off: %lld + 4 > size: %lld\n", + sym->name, off, nfp_rtsym_size(sym)); + return -ENXIO; + } + + err = nfp_rtsym_to_dest(cpp, sym, action, token, off, &cpp_id, &addr); + if (err) + return err; + + return nfp_cpp_writel(cpp, cpp_id, addr, value); +} + +int nfp_rtsym_writel(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off, + u32 value) +{ + return __nfp_rtsym_writel(cpp, sym, NFP_CPP_ACTION_RW, 0, off, value); +} + +int __nfp_rtsym_writeq(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, + u8 action, u8 token, u64 off, u64 value) +{ + u32 cpp_id; + u64 addr; + int err; + + if (off + 8 > nfp_rtsym_size(sym)) { + nfp_err(cpp, "rtsym '%s': writeq out of bounds: off: %lld + 8 > size: %lld\n", + sym->name, off, nfp_rtsym_size(sym)); + return -ENXIO; + } + + err = nfp_rtsym_to_dest(cpp, sym, action, token, off, &cpp_id, &addr); + if (err) + return err; + + return nfp_cpp_writeq(cpp, cpp_id, addr, value); +} + +int nfp_rtsym_writeq(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off, + u64 value) +{ + return __nfp_rtsym_writeq(cpp, sym, NFP_CPP_ACTION_RW, 0, off, value); +} + +/** + * nfp_rtsym_read_le() - Read a simple unsigned scalar value from symbol + * @rtbl: NFP RTsym table + * @name: Symbol name + * @error: Poniter to error code (optional) + * + * Lookup a symbol, map, read it and return it's value. Value of the symbol + * will be interpreted as a simple little-endian unsigned value. Symbol can + * be 4 or 8 bytes in size. + * + * Return: value read, on error sets the error and returns ~0ULL. + */ +u64 nfp_rtsym_read_le(struct nfp_rtsym_table *rtbl, const char *name, + int *error) +{ + const struct nfp_rtsym *sym; + u32 val32; + u64 val; + int err; + + sym = nfp_rtsym_lookup(rtbl, name); + if (!sym) { + err = -ENOENT; + goto exit; + } + + switch (nfp_rtsym_size(sym)) { + case 4: + err = nfp_rtsym_readl(rtbl->cpp, sym, 0, &val32); + val = val32; + break; + case 8: + err = nfp_rtsym_readq(rtbl->cpp, sym, 0, &val); + break; + default: + nfp_err(rtbl->cpp, + "rtsym '%s': unsupported or non-scalar size: %lld\n", + name, nfp_rtsym_size(sym)); + err = -EINVAL; + break; + } + +exit: + if (error) + *error = err; + + if (err) + return ~0ULL; + return val; +} + +/** + * nfp_rtsym_write_le() - Write an unsigned scalar value to a symbol + * @rtbl: NFP RTsym table + * @name: Symbol name + * @value: Value to write + * + * Lookup a symbol and write a value to it. Symbol can be 4 or 8 bytes in size. + * If 4 bytes then the lower 32-bits of 'value' are used. Value will be + * written as simple little-endian unsigned value. + * + * Return: 0 on success or error code. + */ +int nfp_rtsym_write_le(struct nfp_rtsym_table *rtbl, const char *name, + u64 value) +{ + const struct nfp_rtsym *sym; + int err; + + sym = nfp_rtsym_lookup(rtbl, name); + if (!sym) + return -ENOENT; + + switch (nfp_rtsym_size(sym)) { + case 4: + err = nfp_rtsym_writel(rtbl->cpp, sym, 0, value); + break; + case 8: + err = nfp_rtsym_writeq(rtbl->cpp, sym, 0, value); + break; + default: + nfp_err(rtbl->cpp, + "rtsym '%s': unsupported or non-scalar size: %lld\n", + name, nfp_rtsym_size(sym)); + err = -EINVAL; + break; + } + + return err; +} + +u8 __iomem * +nfp_rtsym_map(struct nfp_rtsym_table *rtbl, const char *name, const char *id, + unsigned int min_size, struct nfp_cpp_area **area) +{ + const struct nfp_rtsym *sym; + u8 __iomem *mem; + u32 cpp_id; + u64 addr; + int err; + + sym = nfp_rtsym_lookup(rtbl, name); + if (!sym) + return (u8 __iomem *)ERR_PTR(-ENOENT); + + err = nfp_rtsym_to_dest(rtbl->cpp, sym, NFP_CPP_ACTION_RW, 0, 0, + &cpp_id, &addr); + if (err) { + nfp_err(rtbl->cpp, "rtsym '%s': mapping failed\n", name); + return (u8 __iomem *)ERR_PTR(err); + } + + if (sym->size < min_size) { + nfp_err(rtbl->cpp, "rtsym '%s': too small\n", name); + return (u8 __iomem *)ERR_PTR(-EINVAL); + } + + mem = nfp_cpp_map_area(rtbl->cpp, id, cpp_id, addr, sym->size, area); + if (IS_ERR(mem)) { + nfp_err(rtbl->cpp, "rtysm '%s': failed to map: %ld\n", + name, PTR_ERR(mem)); + return mem; + } + + return mem; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_target.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_target.c new file mode 100644 index 000000000..79470f198 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_target.c @@ -0,0 +1,742 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +/* + * nfp_target.c + * CPP Access Width Decoder + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Francois H. Theron <francois.theron@netronome.com> + */ + +#define pr_fmt(fmt) "NFP target: " fmt + +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <linux/printk.h> + +#include "nfp_cpp.h" + +#include "nfp6000/nfp6000.h" + +#define P32 1 +#define P64 2 + +/* This structure ONLY includes items that can be done with a read or write of + * 32-bit or 64-bit words. All others are not listed. + */ + +#define AT(_action, _token, _pull, _push) \ + case NFP_CPP_ID(0, (_action), (_token)): \ + return PUSHPULL((_pull), (_push)) + +static int target_rw(u32 cpp_id, int pp, int start, int len) +{ + switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { + AT(0, 0, 0, pp); + AT(1, 0, pp, 0); + AT(NFP_CPP_ACTION_RW, 0, pp, pp); + default: + return -EINVAL; + } +} + +static int nfp6000_nbi_dma(u32 cpp_id) +{ + switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { + AT(0, 0, 0, P64); /* ReadNbiDma */ + AT(1, 0, P64, 0); /* WriteNbiDma */ + AT(NFP_CPP_ACTION_RW, 0, P64, P64); + default: + return -EINVAL; + } +} + +static int nfp6000_nbi_stats(u32 cpp_id) +{ + switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { + AT(0, 0, 0, P32); /* ReadNbiStats */ + AT(1, 0, P32, 0); /* WriteNbiStats */ + AT(NFP_CPP_ACTION_RW, 0, P32, P32); + default: + return -EINVAL; + } +} + +static int nfp6000_nbi_tm(u32 cpp_id) +{ + switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { + AT(0, 0, 0, P64); /* ReadNbiTM */ + AT(1, 0, P64, 0); /* WriteNbiTM */ + AT(NFP_CPP_ACTION_RW, 0, P64, P64); + default: + return -EINVAL; + } +} + +static int nfp6000_nbi_ppc(u32 cpp_id) +{ + switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { + AT(0, 0, 0, P64); /* ReadNbiPreclassifier */ + AT(1, 0, P64, 0); /* WriteNbiPreclassifier */ + AT(NFP_CPP_ACTION_RW, 0, P64, P64); + default: + return -EINVAL; + } +} + +static int nfp6000_nbi(u32 cpp_id, u64 address) +{ + u64 rel_addr = address & 0x3fFFFF; + + if (rel_addr < (1 << 20)) + return nfp6000_nbi_dma(cpp_id); + if (rel_addr < (2 << 20)) + return nfp6000_nbi_stats(cpp_id); + if (rel_addr < (3 << 20)) + return nfp6000_nbi_tm(cpp_id); + return nfp6000_nbi_ppc(cpp_id); +} + +/* This structure ONLY includes items that can be done with a read or write of + * 32-bit or 64-bit words. All others are not listed. + */ +static int nfp6000_mu_common(u32 cpp_id) +{ + switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { + AT(NFP_CPP_ACTION_RW, 0, P64, P64); /* read_be/write_be */ + AT(NFP_CPP_ACTION_RW, 1, P64, P64); /* read_le/write_le */ + AT(NFP_CPP_ACTION_RW, 2, P64, P64); /* read_swap_be/write_swap_be */ + AT(NFP_CPP_ACTION_RW, 3, P64, P64); /* read_swap_le/write_swap_le */ + AT(0, 0, 0, P64); /* read_be */ + AT(0, 1, 0, P64); /* read_le */ + AT(0, 2, 0, P64); /* read_swap_be */ + AT(0, 3, 0, P64); /* read_swap_le */ + AT(1, 0, P64, 0); /* write_be */ + AT(1, 1, P64, 0); /* write_le */ + AT(1, 2, P64, 0); /* write_swap_be */ + AT(1, 3, P64, 0); /* write_swap_le */ + AT(3, 0, 0, P32); /* atomic_read */ + AT(3, 2, P32, 0); /* mask_compare_write */ + AT(4, 0, P32, 0); /* atomic_write */ + AT(4, 2, 0, 0); /* atomic_write_imm */ + AT(4, 3, 0, P32); /* swap_imm */ + AT(5, 0, P32, 0); /* set */ + AT(5, 3, 0, P32); /* test_set_imm */ + AT(6, 0, P32, 0); /* clr */ + AT(6, 3, 0, P32); /* test_clr_imm */ + AT(7, 0, P32, 0); /* add */ + AT(7, 3, 0, P32); /* test_add_imm */ + AT(8, 0, P32, 0); /* addsat */ + AT(8, 3, 0, P32); /* test_subsat_imm */ + AT(9, 0, P32, 0); /* sub */ + AT(9, 3, 0, P32); /* test_sub_imm */ + AT(10, 0, P32, 0); /* subsat */ + AT(10, 3, 0, P32); /* test_subsat_imm */ + AT(13, 0, 0, P32); /* microq128_get */ + AT(13, 1, 0, P32); /* microq128_pop */ + AT(13, 2, P32, 0); /* microq128_put */ + AT(15, 0, P32, 0); /* xor */ + AT(15, 3, 0, P32); /* test_xor_imm */ + AT(28, 0, 0, P32); /* read32_be */ + AT(28, 1, 0, P32); /* read32_le */ + AT(28, 2, 0, P32); /* read32_swap_be */ + AT(28, 3, 0, P32); /* read32_swap_le */ + AT(31, 0, P32, 0); /* write32_be */ + AT(31, 1, P32, 0); /* write32_le */ + AT(31, 2, P32, 0); /* write32_swap_be */ + AT(31, 3, P32, 0); /* write32_swap_le */ + default: + return -EINVAL; + } +} + +static int nfp6000_mu_ctm(u32 cpp_id) +{ + switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { + AT(16, 1, 0, P32); /* packet_read_packet_status */ + AT(17, 1, 0, P32); /* packet_credit_get */ + AT(17, 3, 0, P64); /* packet_add_thread */ + AT(18, 2, 0, P64); /* packet_free_and_return_pointer */ + AT(18, 3, 0, P64); /* packet_return_pointer */ + AT(21, 0, 0, P64); /* pe_dma_to_memory_indirect */ + AT(21, 1, 0, P64); /* pe_dma_to_memory_indirect_swap */ + AT(21, 2, 0, P64); /* pe_dma_to_memory_indirect_free */ + AT(21, 3, 0, P64); /* pe_dma_to_memory_indirect_free_swap */ + default: + return nfp6000_mu_common(cpp_id); + } +} + +static int nfp6000_mu_emu(u32 cpp_id) +{ + switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { + AT(18, 0, 0, P32); /* read_queue */ + AT(18, 1, 0, P32); /* read_queue_ring */ + AT(18, 2, P32, 0); /* write_queue */ + AT(18, 3, P32, 0); /* write_queue_ring */ + AT(20, 2, P32, 0); /* journal */ + AT(21, 0, 0, P32); /* get */ + AT(21, 1, 0, P32); /* get_eop */ + AT(21, 2, 0, P32); /* get_freely */ + AT(22, 0, 0, P32); /* pop */ + AT(22, 1, 0, P32); /* pop_eop */ + AT(22, 2, 0, P32); /* pop_freely */ + default: + return nfp6000_mu_common(cpp_id); + } +} + +static int nfp6000_mu_imu(u32 cpp_id) +{ + return nfp6000_mu_common(cpp_id); +} + +static int nfp6000_mu(u32 cpp_id, u64 address) +{ + int pp; + + if (address < 0x2000000000ULL) + pp = nfp6000_mu_ctm(cpp_id); + else if (address < 0x8000000000ULL) + pp = nfp6000_mu_emu(cpp_id); + else if (address < 0x9800000000ULL) + pp = nfp6000_mu_ctm(cpp_id); + else if (address < 0x9C00000000ULL) + pp = nfp6000_mu_emu(cpp_id); + else if (address < 0xA000000000ULL) + pp = nfp6000_mu_imu(cpp_id); + else + pp = nfp6000_mu_ctm(cpp_id); + + return pp; +} + +static int nfp6000_ila(u32 cpp_id) +{ + switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { + AT(0, 1, 0, P32); /* read_check_error */ + AT(2, 0, 0, P32); /* read_int */ + AT(3, 0, P32, 0); /* write_int */ + default: + return target_rw(cpp_id, P32, 48, 4); + } +} + +static int nfp6000_pci(u32 cpp_id) +{ + switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { + AT(2, 0, 0, P32); + AT(3, 0, P32, 0); + default: + return target_rw(cpp_id, P32, 4, 4); + } +} + +static int nfp6000_crypto(u32 cpp_id) +{ + switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { + AT(2, 0, P64, 0); + default: + return target_rw(cpp_id, P64, 12, 4); + } +} + +static int nfp6000_cap_xpb(u32 cpp_id) +{ + switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { + AT(0, 1, 0, P32); /* RingGet */ + AT(0, 2, P32, 0); /* Interthread Signal */ + AT(1, 1, P32, 0); /* RingPut */ + AT(1, 2, P32, 0); /* CTNNWr */ + AT(2, 0, 0, P32); /* ReflectRd, signal none */ + AT(2, 1, 0, P32); /* ReflectRd, signal self */ + AT(2, 2, 0, P32); /* ReflectRd, signal remote */ + AT(2, 3, 0, P32); /* ReflectRd, signal both */ + AT(3, 0, P32, 0); /* ReflectWr, signal none */ + AT(3, 1, P32, 0); /* ReflectWr, signal self */ + AT(3, 2, P32, 0); /* ReflectWr, signal remote */ + AT(3, 3, P32, 0); /* ReflectWr, signal both */ + AT(NFP_CPP_ACTION_RW, 1, P32, P32); + default: + return target_rw(cpp_id, P32, 1, 63); + } +} + +static int nfp6000_cls(u32 cpp_id) +{ + switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { + AT(0, 3, P32, 0); /* xor */ + AT(2, 0, P32, 0); /* set */ + AT(2, 1, P32, 0); /* clr */ + AT(4, 0, P32, 0); /* add */ + AT(4, 1, P32, 0); /* add64 */ + AT(6, 0, P32, 0); /* sub */ + AT(6, 1, P32, 0); /* sub64 */ + AT(6, 2, P32, 0); /* subsat */ + AT(8, 2, P32, 0); /* hash_mask */ + AT(8, 3, P32, 0); /* hash_clear */ + AT(9, 0, 0, P32); /* ring_get */ + AT(9, 1, 0, P32); /* ring_pop */ + AT(9, 2, 0, P32); /* ring_get_freely */ + AT(9, 3, 0, P32); /* ring_pop_freely */ + AT(10, 0, P32, 0); /* ring_put */ + AT(10, 2, P32, 0); /* ring_journal */ + AT(14, 0, P32, 0); /* reflect_write_sig_local */ + AT(15, 1, 0, P32); /* reflect_read_sig_local */ + AT(17, 2, P32, 0); /* statisic */ + AT(24, 0, 0, P32); /* ring_read */ + AT(24, 1, P32, 0); /* ring_write */ + AT(25, 0, 0, P32); /* ring_workq_add_thread */ + AT(25, 1, P32, 0); /* ring_workq_add_work */ + default: + return target_rw(cpp_id, P32, 0, 64); + } +} + +int nfp_target_pushpull(u32 cpp_id, u64 address) +{ + switch (NFP_CPP_ID_TARGET_of(cpp_id)) { + case NFP_CPP_TARGET_NBI: + return nfp6000_nbi(cpp_id, address); + case NFP_CPP_TARGET_QDR: + return target_rw(cpp_id, P32, 24, 4); + case NFP_CPP_TARGET_ILA: + return nfp6000_ila(cpp_id); + case NFP_CPP_TARGET_MU: + return nfp6000_mu(cpp_id, address); + case NFP_CPP_TARGET_PCIE: + return nfp6000_pci(cpp_id); + case NFP_CPP_TARGET_ARM: + if (address < 0x10000) + return target_rw(cpp_id, P64, 1, 1); + else + return target_rw(cpp_id, P32, 1, 1); + case NFP_CPP_TARGET_CRYPTO: + return nfp6000_crypto(cpp_id); + case NFP_CPP_TARGET_CT_XPB: + return nfp6000_cap_xpb(cpp_id); + case NFP_CPP_TARGET_CLS: + return nfp6000_cls(cpp_id); + case 0: + return target_rw(cpp_id, P32, 4, 4); + default: + return -EINVAL; + } +} + +#undef AT +#undef P32 +#undef P64 + +/* All magic NFP-6xxx IMB 'mode' numbers here are from: + * Databook (1 August 2013) + * - System Overview and Connectivity + * -- Internal Connectivity + * --- Distributed Switch Fabric - Command Push/Pull (DSF-CPP) Bus + * ---- CPP addressing + * ----- Table 3.6. CPP Address Translation Mode Commands + */ + +#define _NIC_NFP6000_MU_LOCALITY_DIRECT 2 + +static int nfp_decode_basic(u64 addr, int *dest_island, int cpp_tgt, + int mode, bool addr40, int isld1, int isld0) +{ + int iid_lsb, idx_lsb; + + /* This function doesn't handle MU or CTXBP */ + if (cpp_tgt == NFP_CPP_TARGET_MU || cpp_tgt == NFP_CPP_TARGET_CT_XPB) + return -EINVAL; + + switch (mode) { + case 0: + /* For VQDR, in this mode for 32-bit addressing + * it would be islands 0, 16, 32 and 48 depending on channel + * and upper address bits. + * Since those are not all valid islands, most decode + * cases would result in bad island IDs, but we do them + * anyway since this is decoding an address that is already + * assumed to be used as-is to get to sram. + */ + iid_lsb = addr40 ? 34 : 26; + *dest_island = (addr >> iid_lsb) & 0x3F; + return 0; + case 1: + /* For VQDR 32-bit, this would decode as: + * Channel 0: island#0 + * Channel 1: island#0 + * Channel 2: island#1 + * Channel 3: island#1 + * That would be valid as long as both islands + * have VQDR. Let's allow this. + */ + idx_lsb = addr40 ? 39 : 31; + if (addr & BIT_ULL(idx_lsb)) + *dest_island = isld1; + else + *dest_island = isld0; + + return 0; + case 2: + /* For VQDR 32-bit: + * Channel 0: (island#0 | 0) + * Channel 1: (island#0 | 1) + * Channel 2: (island#1 | 0) + * Channel 3: (island#1 | 1) + * + * Make sure we compare against isldN values + * by clearing the LSB. + * This is what the silicon does. + */ + isld0 &= ~1; + isld1 &= ~1; + + idx_lsb = addr40 ? 39 : 31; + iid_lsb = idx_lsb - 1; + + if (addr & BIT_ULL(idx_lsb)) + *dest_island = isld1 | (int)((addr >> iid_lsb) & 1); + else + *dest_island = isld0 | (int)((addr >> iid_lsb) & 1); + + return 0; + case 3: + /* In this mode the data address starts to affect the island ID + * so rather not allow it. In some really specific case + * one could use this to send the upper half of the + * VQDR channel to another MU, but this is getting very + * specific. + * However, as above for mode 0, this is the decoder + * and the caller should validate the resulting IID. + * This blindly does what the silicon would do. + */ + isld0 &= ~3; + isld1 &= ~3; + + idx_lsb = addr40 ? 39 : 31; + iid_lsb = idx_lsb - 2; + + if (addr & BIT_ULL(idx_lsb)) + *dest_island = isld1 | (int)((addr >> iid_lsb) & 3); + else + *dest_island = isld0 | (int)((addr >> iid_lsb) & 3); + + return 0; + default: + return -EINVAL; + } +} + +static int nfp_encode_basic_qdr(u64 addr, int dest_island, int cpp_tgt, + int mode, bool addr40, int isld1, int isld0) +{ + int v, ret; + + /* Full Island ID and channel bits overlap? */ + ret = nfp_decode_basic(addr, &v, cpp_tgt, mode, addr40, isld1, isld0); + if (ret) + return ret; + + /* The current address won't go where expected? */ + if (dest_island != -1 && dest_island != v) + return -EINVAL; + + /* If dest_island was -1, we don't care where it goes. */ + return 0; +} + +/* Try each option, take first one that fits. + * Not sure if we would want to do some smarter + * searching and prefer 0 or non-0 island IDs. + */ +static int nfp_encode_basic_search(u64 *addr, int dest_island, int *isld, + int iid_lsb, int idx_lsb, int v_max) +{ + int i, v; + + for (i = 0; i < 2; i++) + for (v = 0; v < v_max; v++) { + if (dest_island != (isld[i] | v)) + continue; + + *addr &= ~GENMASK_ULL(idx_lsb, iid_lsb); + *addr |= ((u64)i << idx_lsb); + *addr |= ((u64)v << iid_lsb); + return 0; + } + + return -ENODEV; +} + +/* For VQDR, we may not modify the Channel bits, which might overlap + * with the Index bit. When it does, we need to ensure that isld0 == isld1. + */ +static int nfp_encode_basic(u64 *addr, int dest_island, int cpp_tgt, + int mode, bool addr40, int isld1, int isld0) +{ + int iid_lsb, idx_lsb; + int isld[2]; + u64 v64; + + isld[0] = isld0; + isld[1] = isld1; + + /* This function doesn't handle MU or CTXBP */ + if (cpp_tgt == NFP_CPP_TARGET_MU || cpp_tgt == NFP_CPP_TARGET_CT_XPB) + return -EINVAL; + + switch (mode) { + case 0: + if (cpp_tgt == NFP_CPP_TARGET_QDR && !addr40) + /* In this specific mode we'd rather not modify + * the address but we can verify if the existing + * contents will point to a valid island. + */ + return nfp_encode_basic_qdr(*addr, cpp_tgt, dest_island, + mode, addr40, isld1, isld0); + + iid_lsb = addr40 ? 34 : 26; + /* <39:34> or <31:26> */ + v64 = GENMASK_ULL(iid_lsb + 5, iid_lsb); + *addr &= ~v64; + *addr |= ((u64)dest_island << iid_lsb) & v64; + return 0; + case 1: + if (cpp_tgt == NFP_CPP_TARGET_QDR && !addr40) + return nfp_encode_basic_qdr(*addr, cpp_tgt, dest_island, + mode, addr40, isld1, isld0); + + idx_lsb = addr40 ? 39 : 31; + if (dest_island == isld0) { + /* Only need to clear the Index bit */ + *addr &= ~BIT_ULL(idx_lsb); + return 0; + } + + if (dest_island == isld1) { + /* Only need to set the Index bit */ + *addr |= BIT_ULL(idx_lsb); + return 0; + } + + return -ENODEV; + case 2: + /* iid<0> = addr<30> = channel<0> + * channel<1> = addr<31> = Index + */ + if (cpp_tgt == NFP_CPP_TARGET_QDR && !addr40) + /* Special case where we allow channel bits to + * be set before hand and with them select an island. + * So we need to confirm that it's at least plausible. + */ + return nfp_encode_basic_qdr(*addr, cpp_tgt, dest_island, + mode, addr40, isld1, isld0); + + /* Make sure we compare against isldN values + * by clearing the LSB. + * This is what the silicon does. + */ + isld[0] &= ~1; + isld[1] &= ~1; + + idx_lsb = addr40 ? 39 : 31; + iid_lsb = idx_lsb - 1; + + return nfp_encode_basic_search(addr, dest_island, isld, + iid_lsb, idx_lsb, 2); + case 3: + if (cpp_tgt == NFP_CPP_TARGET_QDR && !addr40) + /* iid<0> = addr<29> = data + * iid<1> = addr<30> = channel<0> + * channel<1> = addr<31> = Index + */ + return nfp_encode_basic_qdr(*addr, cpp_tgt, dest_island, + mode, addr40, isld1, isld0); + + isld[0] &= ~3; + isld[1] &= ~3; + + idx_lsb = addr40 ? 39 : 31; + iid_lsb = idx_lsb - 2; + + return nfp_encode_basic_search(addr, dest_island, isld, + iid_lsb, idx_lsb, 4); + default: + return -EINVAL; + } +} + +static int nfp_encode_mu(u64 *addr, int dest_island, int mode, + bool addr40, int isld1, int isld0) +{ + int iid_lsb, idx_lsb, locality_lsb; + int isld[2]; + u64 v64; + int da; + + isld[0] = isld0; + isld[1] = isld1; + locality_lsb = nfp_cppat_mu_locality_lsb(mode, addr40); + + if (((*addr >> locality_lsb) & 3) == _NIC_NFP6000_MU_LOCALITY_DIRECT) + da = 1; + else + da = 0; + + switch (mode) { + case 0: + iid_lsb = addr40 ? 32 : 24; + v64 = GENMASK_ULL(iid_lsb + 5, iid_lsb); + *addr &= ~v64; + *addr |= (((u64)dest_island) << iid_lsb) & v64; + return 0; + case 1: + if (da) { + iid_lsb = addr40 ? 32 : 24; + v64 = GENMASK_ULL(iid_lsb + 5, iid_lsb); + *addr &= ~v64; + *addr |= (((u64)dest_island) << iid_lsb) & v64; + return 0; + } + + idx_lsb = addr40 ? 37 : 29; + if (dest_island == isld0) { + *addr &= ~BIT_ULL(idx_lsb); + return 0; + } + + if (dest_island == isld1) { + *addr |= BIT_ULL(idx_lsb); + return 0; + } + + return -ENODEV; + case 2: + if (da) { + iid_lsb = addr40 ? 32 : 24; + v64 = GENMASK_ULL(iid_lsb + 5, iid_lsb); + *addr &= ~v64; + *addr |= (((u64)dest_island) << iid_lsb) & v64; + return 0; + } + + /* Make sure we compare against isldN values + * by clearing the LSB. + * This is what the silicon does. + */ + isld[0] &= ~1; + isld[1] &= ~1; + + idx_lsb = addr40 ? 37 : 29; + iid_lsb = idx_lsb - 1; + + return nfp_encode_basic_search(addr, dest_island, isld, + iid_lsb, idx_lsb, 2); + case 3: + /* Only the EMU will use 40 bit addressing. Silently + * set the direct locality bit for everyone else. + * The SDK toolchain uses dest_island <= 0 to test + * for atypical address encodings to support access + * to local-island CTM with a 32-but address (high-locality + * is effewctively ignored and just used for + * routing to island #0). + */ + if (dest_island > 0 && (dest_island < 24 || dest_island > 26)) { + *addr |= ((u64)_NIC_NFP6000_MU_LOCALITY_DIRECT) + << locality_lsb; + da = 1; + } + + if (da) { + iid_lsb = addr40 ? 32 : 24; + v64 = GENMASK_ULL(iid_lsb + 5, iid_lsb); + *addr &= ~v64; + *addr |= (((u64)dest_island) << iid_lsb) & v64; + return 0; + } + + isld[0] &= ~3; + isld[1] &= ~3; + + idx_lsb = addr40 ? 37 : 29; + iid_lsb = idx_lsb - 2; + + return nfp_encode_basic_search(addr, dest_island, isld, + iid_lsb, idx_lsb, 4); + default: + return -EINVAL; + } +} + +static int nfp_cppat_addr_encode(u64 *addr, int dest_island, int cpp_tgt, + int mode, bool addr40, int isld1, int isld0) +{ + switch (cpp_tgt) { + case NFP_CPP_TARGET_NBI: + case NFP_CPP_TARGET_QDR: + case NFP_CPP_TARGET_ILA: + case NFP_CPP_TARGET_PCIE: + case NFP_CPP_TARGET_ARM: + case NFP_CPP_TARGET_CRYPTO: + case NFP_CPP_TARGET_CLS: + return nfp_encode_basic(addr, dest_island, cpp_tgt, mode, + addr40, isld1, isld0); + + case NFP_CPP_TARGET_MU: + return nfp_encode_mu(addr, dest_island, mode, + addr40, isld1, isld0); + + case NFP_CPP_TARGET_CT_XPB: + if (mode != 1 || addr40) + return -EINVAL; + *addr &= ~GENMASK_ULL(29, 24); + *addr |= ((u64)dest_island << 24) & GENMASK_ULL(29, 24); + return 0; + default: + return -EINVAL; + } +} + +int nfp_target_cpp(u32 cpp_island_id, u64 cpp_island_address, + u32 *cpp_target_id, u64 *cpp_target_address, + const u32 *imb_table) +{ + const int island = NFP_CPP_ID_ISLAND_of(cpp_island_id); + const int target = NFP_CPP_ID_TARGET_of(cpp_island_id); + u32 imb; + int err; + + if (target < 0 || target >= 16) { + pr_err("Invalid CPP target: %d\n", target); + return -EINVAL; + } + + if (island == 0) { + /* Already translated */ + *cpp_target_id = cpp_island_id; + *cpp_target_address = cpp_island_address; + return 0; + } + + /* CPP + Island only allowed on systems with IMB tables */ + if (!imb_table) + return -EINVAL; + + imb = imb_table[target]; + + *cpp_target_address = cpp_island_address; + err = nfp_cppat_addr_encode(cpp_target_address, island, target, + ((imb >> 13) & 7), ((imb >> 12) & 1), + ((imb >> 6) & 0x3f), ((imb >> 0) & 0x3f)); + if (err) { + pr_err("Can't encode CPP address: %d\n", err); + return err; + } + + *cpp_target_id = NFP_CPP_ID(target, + NFP_CPP_ID_ACTION_of(cpp_island_id), + NFP_CPP_ID_TOKEN_of(cpp_island_id)); + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/nic/main.c b/drivers/net/ethernet/netronome/nfp/nic/main.c new file mode 100644 index 000000000..aea857920 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nic/main.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017 Netronome Systems, Inc. */ + +#include "../nfpcore/nfp_cpp.h" +#include "../nfpcore/nfp_nsp.h" +#include "../nfp_app.h" +#include "../nfp_main.h" + +static int nfp_nic_init(struct nfp_app *app) +{ + struct nfp_pf *pf = app->pf; + + if (pf->eth_tbl && pf->max_data_vnics != pf->eth_tbl->count) { + nfp_err(pf->cpp, "ETH entries don't match vNICs (%d vs %d)\n", + pf->max_data_vnics, pf->eth_tbl->count); + return -EINVAL; + } + + return 0; +} + +static int nfp_nic_sriov_enable(struct nfp_app *app, int num_vfs) +{ + return 0; +} + +static void nfp_nic_sriov_disable(struct nfp_app *app) +{ +} + +const struct nfp_app_type app_nic = { + .id = NFP_APP_CORE_NIC, + .name = "nic", + + .init = nfp_nic_init, + .vnic_alloc = nfp_app_nic_vnic_alloc, + + .sriov_enable = nfp_nic_sriov_enable, + .sriov_disable = nfp_nic_sriov_disable, +}; |