diff options
Diffstat (limited to '')
18 files changed, 6835 insertions, 0 deletions
diff --git a/src/seastar/dpdk/drivers/net/thunderx/Makefile b/src/seastar/dpdk/drivers/net/thunderx/Makefile new file mode 100644 index 00000000..706250b8 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/thunderx/Makefile @@ -0,0 +1,68 @@ +# BSD LICENSE +# +# Copyright(c) 2016 Cavium Networks. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Cavium Networks nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_pmd_thunderx_nicvf.a + +CFLAGS += $(WERROR_FLAGS) + +LDLIBS += -lm + +EXPORT_MAP := rte_pmd_thunderx_nicvf_version.map + +LIBABIVER := 1 + +OBJS_BASE_DRIVER=$(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c))) +$(foreach obj, $(OBJS_BASE_DRIVER), $(eval CFLAGS_$(obj)+=$(CFLAGS_BASE_DRIVER))) + +VPATH += $(SRCDIR)/base + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += nicvf_rxtx.c +SRCS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += nicvf_hw.c +SRCS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += nicvf_mbox.c +SRCS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += nicvf_ethdev.c +SRCS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += nicvf_bsvf.c +SRCS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += nicvf_svf.c + +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_nicvf_rxtx.o += -fno-prefetch-loop-arrays +endif +CFLAGS_nicvf_rxtx.o += -Ofast + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_bsvf.c b/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_bsvf.c new file mode 100644 index 00000000..49a2646d --- /dev/null +++ b/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_bsvf.c @@ -0,0 +1,72 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <assert.h> +#include <stddef.h> +#include <err.h> + +#include "nicvf_bsvf.h" +#include "nicvf_plat.h" + +static STAILQ_HEAD(, svf_entry) head = STAILQ_HEAD_INITIALIZER(head); + +void +nicvf_bsvf_push(struct svf_entry *entry) +{ + assert(entry != NULL); + assert(entry->vf != NULL); + + STAILQ_INSERT_TAIL(&head, entry, next); +} + +struct svf_entry * +nicvf_bsvf_pop(void) +{ + struct svf_entry *entry; + + assert(!STAILQ_EMPTY(&head)); + + entry = STAILQ_FIRST(&head); + + assert(entry != NULL); + assert(entry->vf != NULL); + + STAILQ_REMOVE_HEAD(&head, next); + + return entry; +} + +int +nicvf_bsvf_empty(void) +{ + return STAILQ_EMPTY(&head); +} diff --git a/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_bsvf.h b/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_bsvf.h new file mode 100644 index 00000000..fb9b2484 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_bsvf.h @@ -0,0 +1,76 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __THUNDERX_NICVF_BSVF_H__ +#define __THUNDERX_NICVF_BSVF_H__ + +#include <sys/queue.h> + +struct nicvf; + +/** + * The base queue structure to hold secondary qsets. + */ +struct svf_entry { + STAILQ_ENTRY(svf_entry) next; /**< Next element's pointer */ + struct nicvf *vf; /**< Holder of a secondary qset */ +}; + +/** + * Enqueue new entry to secondary qsets. + * + * @param entry + * Entry to be enqueued. + */ +void +nicvf_bsvf_push(struct svf_entry *entry); + +/** + * Dequeue an entry from secondary qsets. + * + * @return + * Dequeued entry. + */ +struct svf_entry * +nicvf_bsvf_pop(void); + +/** + * Check if the queue of secondary qsets is empty. + * + * @return + * 0 on non-empty + * otherwise empty + */ +int +nicvf_bsvf_empty(void); + +#endif /* __THUNDERX_NICVF_BSVF_H__ */ diff --git a/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_hw.c b/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_hw.c new file mode 100644 index 00000000..04b3b69c --- /dev/null +++ b/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_hw.c @@ -0,0 +1,930 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <unistd.h> +#include <math.h> +#include <errno.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include "nicvf_plat.h" + +struct nicvf_reg_info { + uint32_t offset; + const char *name; +}; + +#define NICVF_REG_POLL_ITER_NR (10) +#define NICVF_REG_POLL_DELAY_US (2000) +#define NICVF_REG_INFO(reg) {reg, #reg} + +static const struct nicvf_reg_info nicvf_reg_tbl[] = { + NICVF_REG_INFO(NIC_VF_CFG), + NICVF_REG_INFO(NIC_VF_PF_MAILBOX_0_1), + NICVF_REG_INFO(NIC_VF_INT), + NICVF_REG_INFO(NIC_VF_INT_W1S), + NICVF_REG_INFO(NIC_VF_ENA_W1C), + NICVF_REG_INFO(NIC_VF_ENA_W1S), + NICVF_REG_INFO(NIC_VNIC_RSS_CFG), + NICVF_REG_INFO(NIC_VNIC_RQ_GEN_CFG), +}; + +static const struct nicvf_reg_info nicvf_multi_reg_tbl[] = { + {NIC_VNIC_RSS_KEY_0_4 + 0, "NIC_VNIC_RSS_KEY_0"}, + {NIC_VNIC_RSS_KEY_0_4 + 8, "NIC_VNIC_RSS_KEY_1"}, + {NIC_VNIC_RSS_KEY_0_4 + 16, "NIC_VNIC_RSS_KEY_2"}, + {NIC_VNIC_RSS_KEY_0_4 + 24, "NIC_VNIC_RSS_KEY_3"}, + {NIC_VNIC_RSS_KEY_0_4 + 32, "NIC_VNIC_RSS_KEY_4"}, + {NIC_VNIC_TX_STAT_0_4 + 0, "NIC_VNIC_STAT_TX_OCTS"}, + {NIC_VNIC_TX_STAT_0_4 + 8, "NIC_VNIC_STAT_TX_UCAST"}, + {NIC_VNIC_TX_STAT_0_4 + 16, "NIC_VNIC_STAT_TX_BCAST"}, + {NIC_VNIC_TX_STAT_0_4 + 24, "NIC_VNIC_STAT_TX_MCAST"}, + {NIC_VNIC_TX_STAT_0_4 + 32, "NIC_VNIC_STAT_TX_DROP"}, + {NIC_VNIC_RX_STAT_0_13 + 0, "NIC_VNIC_STAT_RX_OCTS"}, + {NIC_VNIC_RX_STAT_0_13 + 8, "NIC_VNIC_STAT_RX_UCAST"}, + {NIC_VNIC_RX_STAT_0_13 + 16, "NIC_VNIC_STAT_RX_BCAST"}, + {NIC_VNIC_RX_STAT_0_13 + 24, "NIC_VNIC_STAT_RX_MCAST"}, + {NIC_VNIC_RX_STAT_0_13 + 32, "NIC_VNIC_STAT_RX_RED"}, + {NIC_VNIC_RX_STAT_0_13 + 40, "NIC_VNIC_STAT_RX_RED_OCTS"}, + {NIC_VNIC_RX_STAT_0_13 + 48, "NIC_VNIC_STAT_RX_ORUN"}, + {NIC_VNIC_RX_STAT_0_13 + 56, "NIC_VNIC_STAT_RX_ORUN_OCTS"}, + {NIC_VNIC_RX_STAT_0_13 + 64, "NIC_VNIC_STAT_RX_FCS"}, + {NIC_VNIC_RX_STAT_0_13 + 72, "NIC_VNIC_STAT_RX_L2ERR"}, + {NIC_VNIC_RX_STAT_0_13 + 80, "NIC_VNIC_STAT_RX_DRP_BCAST"}, + {NIC_VNIC_RX_STAT_0_13 + 88, "NIC_VNIC_STAT_RX_DRP_MCAST"}, + {NIC_VNIC_RX_STAT_0_13 + 96, "NIC_VNIC_STAT_RX_DRP_L3BCAST"}, + {NIC_VNIC_RX_STAT_0_13 + 104, "NIC_VNIC_STAT_RX_DRP_L3MCAST"}, +}; + +static const struct nicvf_reg_info nicvf_qset_cq_reg_tbl[] = { + NICVF_REG_INFO(NIC_QSET_CQ_0_7_CFG), + NICVF_REG_INFO(NIC_QSET_CQ_0_7_CFG2), + NICVF_REG_INFO(NIC_QSET_CQ_0_7_THRESH), + NICVF_REG_INFO(NIC_QSET_CQ_0_7_BASE), + NICVF_REG_INFO(NIC_QSET_CQ_0_7_HEAD), + NICVF_REG_INFO(NIC_QSET_CQ_0_7_TAIL), + NICVF_REG_INFO(NIC_QSET_CQ_0_7_DOOR), + NICVF_REG_INFO(NIC_QSET_CQ_0_7_STATUS), + NICVF_REG_INFO(NIC_QSET_CQ_0_7_STATUS2), + NICVF_REG_INFO(NIC_QSET_CQ_0_7_DEBUG), +}; + +static const struct nicvf_reg_info nicvf_qset_rq_reg_tbl[] = { + NICVF_REG_INFO(NIC_QSET_RQ_0_7_CFG), + NICVF_REG_INFO(NIC_QSET_RQ_0_7_STATUS0), + NICVF_REG_INFO(NIC_QSET_RQ_0_7_STATUS1), +}; + +static const struct nicvf_reg_info nicvf_qset_sq_reg_tbl[] = { + NICVF_REG_INFO(NIC_QSET_SQ_0_7_CFG), + NICVF_REG_INFO(NIC_QSET_SQ_0_7_THRESH), + NICVF_REG_INFO(NIC_QSET_SQ_0_7_BASE), + NICVF_REG_INFO(NIC_QSET_SQ_0_7_HEAD), + NICVF_REG_INFO(NIC_QSET_SQ_0_7_TAIL), + NICVF_REG_INFO(NIC_QSET_SQ_0_7_DOOR), + NICVF_REG_INFO(NIC_QSET_SQ_0_7_STATUS), + NICVF_REG_INFO(NIC_QSET_SQ_0_7_DEBUG), + NICVF_REG_INFO(NIC_QSET_SQ_0_7_STATUS0), + NICVF_REG_INFO(NIC_QSET_SQ_0_7_STATUS1), +}; + +static const struct nicvf_reg_info nicvf_qset_rbdr_reg_tbl[] = { + NICVF_REG_INFO(NIC_QSET_RBDR_0_1_CFG), + NICVF_REG_INFO(NIC_QSET_RBDR_0_1_THRESH), + NICVF_REG_INFO(NIC_QSET_RBDR_0_1_BASE), + NICVF_REG_INFO(NIC_QSET_RBDR_0_1_HEAD), + NICVF_REG_INFO(NIC_QSET_RBDR_0_1_TAIL), + NICVF_REG_INFO(NIC_QSET_RBDR_0_1_DOOR), + NICVF_REG_INFO(NIC_QSET_RBDR_0_1_STATUS0), + NICVF_REG_INFO(NIC_QSET_RBDR_0_1_STATUS1), + NICVF_REG_INFO(NIC_QSET_RBDR_0_1_PRFCH_STATUS), +}; + +int +nicvf_base_init(struct nicvf *nic) +{ + nic->hwcap = 0; + if (nic->subsystem_device_id == 0) + return NICVF_ERR_BASE_INIT; + + if (nicvf_hw_version(nic) == PCI_SUB_DEVICE_ID_CN88XX_PASS2_NICVF) + nic->hwcap |= NICVF_CAP_TUNNEL_PARSING | NICVF_CAP_CQE_RX2; + + if (nicvf_hw_version(nic) == PCI_SUB_DEVICE_ID_CN81XX_NICVF) + nic->hwcap |= NICVF_CAP_TUNNEL_PARSING | NICVF_CAP_CQE_RX2; + + if (nicvf_hw_version(nic) == PCI_SUB_DEVICE_ID_CN83XX_NICVF) + nic->hwcap |= NICVF_CAP_TUNNEL_PARSING | NICVF_CAP_CQE_RX2 | + NICVF_CAP_DISABLE_APAD; + + return NICVF_OK; +} + +/* dump on stdout if data is NULL */ +int +nicvf_reg_dump(struct nicvf *nic, uint64_t *data) +{ + uint32_t i, q; + bool dump_stdout; + + dump_stdout = data ? 0 : 1; + + for (i = 0; i < NICVF_ARRAY_SIZE(nicvf_reg_tbl); i++) + if (dump_stdout) + nicvf_log("%24s = 0x%" PRIx64 "\n", + nicvf_reg_tbl[i].name, + nicvf_reg_read(nic, nicvf_reg_tbl[i].offset)); + else + *data++ = nicvf_reg_read(nic, nicvf_reg_tbl[i].offset); + + for (i = 0; i < NICVF_ARRAY_SIZE(nicvf_multi_reg_tbl); i++) + if (dump_stdout) + nicvf_log("%24s = 0x%" PRIx64 "\n", + nicvf_multi_reg_tbl[i].name, + nicvf_reg_read(nic, + nicvf_multi_reg_tbl[i].offset)); + else + *data++ = nicvf_reg_read(nic, + nicvf_multi_reg_tbl[i].offset); + + for (q = 0; q < MAX_CMP_QUEUES_PER_QS; q++) + for (i = 0; i < NICVF_ARRAY_SIZE(nicvf_qset_cq_reg_tbl); i++) + if (dump_stdout) + nicvf_log("%30s(%d) = 0x%" PRIx64 "\n", + nicvf_qset_cq_reg_tbl[i].name, q, + nicvf_queue_reg_read(nic, + nicvf_qset_cq_reg_tbl[i].offset, q)); + else + *data++ = nicvf_queue_reg_read(nic, + nicvf_qset_cq_reg_tbl[i].offset, q); + + for (q = 0; q < MAX_RCV_QUEUES_PER_QS; q++) + for (i = 0; i < NICVF_ARRAY_SIZE(nicvf_qset_rq_reg_tbl); i++) + if (dump_stdout) + nicvf_log("%30s(%d) = 0x%" PRIx64 "\n", + nicvf_qset_rq_reg_tbl[i].name, q, + nicvf_queue_reg_read(nic, + nicvf_qset_rq_reg_tbl[i].offset, q)); + else + *data++ = nicvf_queue_reg_read(nic, + nicvf_qset_rq_reg_tbl[i].offset, q); + + for (q = 0; q < MAX_SND_QUEUES_PER_QS; q++) + for (i = 0; i < NICVF_ARRAY_SIZE(nicvf_qset_sq_reg_tbl); i++) + if (dump_stdout) + nicvf_log("%30s(%d) = 0x%" PRIx64 "\n", + nicvf_qset_sq_reg_tbl[i].name, q, + nicvf_queue_reg_read(nic, + nicvf_qset_sq_reg_tbl[i].offset, q)); + else + *data++ = nicvf_queue_reg_read(nic, + nicvf_qset_sq_reg_tbl[i].offset, q); + + for (q = 0; q < MAX_RCV_BUF_DESC_RINGS_PER_QS; q++) + for (i = 0; i < NICVF_ARRAY_SIZE(nicvf_qset_rbdr_reg_tbl); i++) + if (dump_stdout) + nicvf_log("%30s(%d) = 0x%" PRIx64 "\n", + nicvf_qset_rbdr_reg_tbl[i].name, q, + nicvf_queue_reg_read(nic, + nicvf_qset_rbdr_reg_tbl[i].offset, q)); + else + *data++ = nicvf_queue_reg_read(nic, + nicvf_qset_rbdr_reg_tbl[i].offset, q); + return 0; +} + +int +nicvf_reg_get_count(void) +{ + int nr_regs; + + nr_regs = NICVF_ARRAY_SIZE(nicvf_reg_tbl); + nr_regs += NICVF_ARRAY_SIZE(nicvf_multi_reg_tbl); + nr_regs += NICVF_ARRAY_SIZE(nicvf_qset_cq_reg_tbl) * + MAX_CMP_QUEUES_PER_QS; + nr_regs += NICVF_ARRAY_SIZE(nicvf_qset_rq_reg_tbl) * + MAX_RCV_QUEUES_PER_QS; + nr_regs += NICVF_ARRAY_SIZE(nicvf_qset_sq_reg_tbl) * + MAX_SND_QUEUES_PER_QS; + nr_regs += NICVF_ARRAY_SIZE(nicvf_qset_rbdr_reg_tbl) * + MAX_RCV_BUF_DESC_RINGS_PER_QS; + + return nr_regs; +} + +static int +nicvf_qset_config_internal(struct nicvf *nic, bool enable) +{ + int ret; + struct pf_qs_cfg pf_qs_cfg = {.value = 0}; + + pf_qs_cfg.ena = enable ? 1 : 0; + pf_qs_cfg.vnic = nic->vf_id; + ret = nicvf_mbox_qset_config(nic, &pf_qs_cfg); + return ret ? NICVF_ERR_SET_QS : 0; +} + +/* Requests PF to assign and enable Qset */ +int +nicvf_qset_config(struct nicvf *nic) +{ + /* Enable Qset */ + return nicvf_qset_config_internal(nic, true); +} + +int +nicvf_qset_reclaim(struct nicvf *nic) +{ + /* Disable Qset */ + return nicvf_qset_config_internal(nic, false); +} + +static int +cmpfunc(const void *a, const void *b) +{ + return (*(const uint32_t *)a - *(const uint32_t *)b); +} + +static uint32_t +nicvf_roundup_list(uint32_t val, uint32_t list[], uint32_t entries) +{ + uint32_t i; + + qsort(list, entries, sizeof(uint32_t), cmpfunc); + for (i = 0; i < entries; i++) + if (val <= list[i]) + break; + /* Not in the list */ + if (i >= entries) + return 0; + else + return list[i]; +} + +static void +nicvf_handle_qset_err_intr(struct nicvf *nic) +{ + uint16_t qidx; + uint64_t status; + + nicvf_log("%s (VF%d)\n", __func__, nic->vf_id); + nicvf_reg_dump(nic, NULL); + + for (qidx = 0; qidx < MAX_CMP_QUEUES_PER_QS; qidx++) { + status = nicvf_queue_reg_read( + nic, NIC_QSET_CQ_0_7_STATUS, qidx); + if (!(status & NICVF_CQ_ERR_MASK)) + continue; + + if (status & NICVF_CQ_WR_FULL) + nicvf_log("[%d]NICVF_CQ_WR_FULL\n", qidx); + if (status & NICVF_CQ_WR_DISABLE) + nicvf_log("[%d]NICVF_CQ_WR_DISABLE\n", qidx); + if (status & NICVF_CQ_WR_FAULT) + nicvf_log("[%d]NICVF_CQ_WR_FAULT\n", qidx); + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_STATUS, qidx, 0); + } + + for (qidx = 0; qidx < MAX_SND_QUEUES_PER_QS; qidx++) { + status = nicvf_queue_reg_read( + nic, NIC_QSET_SQ_0_7_STATUS, qidx); + if (!(status & NICVF_SQ_ERR_MASK)) + continue; + + if (status & NICVF_SQ_ERR_STOPPED) + nicvf_log("[%d]NICVF_SQ_ERR_STOPPED\n", qidx); + if (status & NICVF_SQ_ERR_SEND) + nicvf_log("[%d]NICVF_SQ_ERR_SEND\n", qidx); + if (status & NICVF_SQ_ERR_DPE) + nicvf_log("[%d]NICVF_SQ_ERR_DPE\n", qidx); + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_STATUS, qidx, 0); + } + + for (qidx = 0; qidx < MAX_RCV_BUF_DESC_RINGS_PER_QS; qidx++) { + status = nicvf_queue_reg_read(nic, + NIC_QSET_RBDR_0_1_STATUS0, qidx); + status &= NICVF_RBDR_FIFO_STATE_MASK; + status >>= NICVF_RBDR_FIFO_STATE_SHIFT; + + if (status == RBDR_FIFO_STATE_FAIL) + nicvf_log("[%d]RBDR_FIFO_STATE_FAIL\n", qidx); + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_STATUS0, qidx, 0); + } + + nicvf_disable_all_interrupts(nic); + abort(); +} + +/* + * Handle poll mode driver interested "mbox" and "queue-set error" interrupts. + * This function is not re-entrant. + * The caller should provide proper serialization. + */ +int +nicvf_reg_poll_interrupts(struct nicvf *nic) +{ + int msg = 0; + uint64_t intr; + + intr = nicvf_reg_read(nic, NIC_VF_INT); + if (intr & NICVF_INTR_MBOX_MASK) { + nicvf_reg_write(nic, NIC_VF_INT, NICVF_INTR_MBOX_MASK); + msg = nicvf_handle_mbx_intr(nic); + } + if (intr & NICVF_INTR_QS_ERR_MASK) { + nicvf_reg_write(nic, NIC_VF_INT, NICVF_INTR_QS_ERR_MASK); + nicvf_handle_qset_err_intr(nic); + } + return msg; +} + +static int +nicvf_qset_poll_reg(struct nicvf *nic, uint16_t qidx, uint32_t offset, + uint32_t bit_pos, uint32_t bits, uint64_t val) +{ + uint64_t bit_mask; + uint64_t reg_val; + int timeout = NICVF_REG_POLL_ITER_NR; + + bit_mask = (1ULL << bits) - 1; + bit_mask = (bit_mask << bit_pos); + + while (timeout) { + reg_val = nicvf_queue_reg_read(nic, offset, qidx); + if (((reg_val & bit_mask) >> bit_pos) == val) + return NICVF_OK; + nicvf_delay_us(NICVF_REG_POLL_DELAY_US); + timeout--; + } + return NICVF_ERR_REG_POLL; +} + +int +nicvf_qset_rbdr_reclaim(struct nicvf *nic, uint16_t qidx) +{ + uint64_t status; + int timeout = NICVF_REG_POLL_ITER_NR; + struct nicvf_rbdr *rbdr = nic->rbdr; + + /* Save head and tail pointers for freeing up buffers */ + if (rbdr) { + rbdr->head = nicvf_queue_reg_read(nic, + NIC_QSET_RBDR_0_1_HEAD, qidx) >> 3; + rbdr->tail = nicvf_queue_reg_read(nic, + NIC_QSET_RBDR_0_1_TAIL, qidx) >> 3; + rbdr->next_tail = rbdr->tail; + } + + /* Reset RBDR */ + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, + NICVF_RBDR_RESET); + + /* Disable RBDR */ + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0); + if (nicvf_qset_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, + 62, 2, 0x00)) + return NICVF_ERR_RBDR_DISABLE; + + while (1) { + status = nicvf_queue_reg_read(nic, + NIC_QSET_RBDR_0_1_PRFCH_STATUS, qidx); + if ((status & 0xFFFFFFFF) == ((status >> 32) & 0xFFFFFFFF)) + break; + nicvf_delay_us(NICVF_REG_POLL_DELAY_US); + timeout--; + if (!timeout) + return NICVF_ERR_RBDR_PREFETCH; + } + + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, + NICVF_RBDR_RESET); + if (nicvf_qset_poll_reg(nic, qidx, + NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x02)) + return NICVF_ERR_RBDR_RESET1; + + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0x00); + if (nicvf_qset_poll_reg(nic, qidx, + NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00)) + return NICVF_ERR_RBDR_RESET2; + + return NICVF_OK; +} + +static int +nicvf_qsize_regbit(uint32_t len, uint32_t len_shift) +{ + int val; + + val = ((uint32_t)log2(len) - len_shift); + assert(val >= NICVF_QSIZE_MIN_VAL); + assert(val <= NICVF_QSIZE_MAX_VAL); + return val; +} + +int +nicvf_qset_rbdr_config(struct nicvf *nic, uint16_t qidx) +{ + int ret; + uint64_t head, tail; + struct nicvf_rbdr *rbdr = nic->rbdr; + struct rbdr_cfg rbdr_cfg = {.value = 0}; + + ret = nicvf_qset_rbdr_reclaim(nic, qidx); + if (ret) + return ret; + + /* Set descriptor base address */ + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_BASE, qidx, rbdr->phys); + + /* Enable RBDR & set queue size */ + rbdr_cfg.ena = 1; + rbdr_cfg.reset = 0; + rbdr_cfg.ldwb = 0; + rbdr_cfg.qsize = nicvf_qsize_regbit(rbdr->qlen_mask + 1, + RBDR_SIZE_SHIFT); + rbdr_cfg.avg_con = 0; + rbdr_cfg.lines = rbdr->buffsz / 128; + + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, rbdr_cfg.value); + + /* Verify proper RBDR reset */ + head = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_HEAD, qidx); + tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, qidx); + + if (head | tail) + return NICVF_ERR_RBDR_RESET; + + return NICVF_OK; +} + +uint32_t +nicvf_qsize_rbdr_roundup(uint32_t val) +{ + uint32_t list[] = {RBDR_QUEUE_SZ_8K, RBDR_QUEUE_SZ_16K, + RBDR_QUEUE_SZ_32K, RBDR_QUEUE_SZ_64K, + RBDR_QUEUE_SZ_128K, RBDR_QUEUE_SZ_256K, + RBDR_QUEUE_SZ_512K}; + return nicvf_roundup_list(val, list, NICVF_ARRAY_SIZE(list)); +} + +int +nicvf_qset_rbdr_precharge(void *dev, struct nicvf *nic, + uint16_t ridx, rbdr_pool_get_handler handler, + uint32_t max_buffs) +{ + struct rbdr_entry_t *desc, *desc0; + struct nicvf_rbdr *rbdr = nic->rbdr; + uint32_t count; + nicvf_phys_addr_t phy; + + assert(rbdr != NULL); + desc = rbdr->desc; + count = 0; + /* Don't fill beyond max numbers of desc */ + while (count < rbdr->qlen_mask) { + if (count >= max_buffs) + break; + desc0 = desc + count; + phy = handler(dev, nic); + if (phy) { + desc0->full_addr = phy; + count++; + } else { + break; + } + } + nicvf_smp_wmb(); + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR, ridx, count); + rbdr->tail = nicvf_queue_reg_read(nic, + NIC_QSET_RBDR_0_1_TAIL, ridx) >> 3; + rbdr->next_tail = rbdr->tail; + nicvf_smp_rmb(); + return 0; +} + +int +nicvf_qset_rbdr_active(struct nicvf *nic, uint16_t qidx) +{ + return nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, qidx); +} + +int +nicvf_qset_sq_reclaim(struct nicvf *nic, uint16_t qidx) +{ + uint64_t head, tail; + struct sq_cfg sq_cfg; + + sq_cfg.value = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx); + + /* Disable send queue */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, 0); + + /* Check if SQ is stopped */ + if (sq_cfg.ena && nicvf_qset_poll_reg(nic, qidx, NIC_QSET_SQ_0_7_STATUS, + NICVF_SQ_STATUS_STOPPED_BIT, 1, 0x01)) + return NICVF_ERR_SQ_DISABLE; + + /* Reset send queue */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET); + head = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, qidx) >> 4; + tail = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, qidx) >> 4; + if (head | tail) + return NICVF_ERR_SQ_RESET; + + return 0; +} + +int +nicvf_qset_sq_config(struct nicvf *nic, uint16_t qidx, struct nicvf_txq *txq) +{ + int ret; + struct sq_cfg sq_cfg = {.value = 0}; + + ret = nicvf_qset_sq_reclaim(nic, qidx); + if (ret) + return ret; + + /* Send a mailbox msg to PF to config SQ */ + if (nicvf_mbox_sq_config(nic, qidx)) + return NICVF_ERR_SQ_PF_CFG; + + /* Set queue base address */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_BASE, qidx, txq->phys); + + /* Enable send queue & set queue size */ + sq_cfg.ena = 1; + sq_cfg.reset = 0; + sq_cfg.ldwb = 0; + sq_cfg.qsize = nicvf_qsize_regbit(txq->qlen_mask + 1, SND_QSIZE_SHIFT); + sq_cfg.tstmp_bgx_intf = 0; + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg.value); + + /* Ring doorbell so that H/W restarts processing SQEs */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, qidx, 0); + + return 0; +} + +uint32_t +nicvf_qsize_sq_roundup(uint32_t val) +{ + uint32_t list[] = {SND_QUEUE_SZ_1K, SND_QUEUE_SZ_2K, + SND_QUEUE_SZ_4K, SND_QUEUE_SZ_8K, + SND_QUEUE_SZ_16K, SND_QUEUE_SZ_32K, + SND_QUEUE_SZ_64K}; + return nicvf_roundup_list(val, list, NICVF_ARRAY_SIZE(list)); +} + +int +nicvf_qset_rq_reclaim(struct nicvf *nic, uint16_t qidx) +{ + /* Disable receive queue */ + nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, 0); + return nicvf_mbox_rq_sync(nic); +} + +int +nicvf_qset_rq_config(struct nicvf *nic, uint16_t qidx, struct nicvf_rxq *rxq) +{ + struct pf_rq_cfg pf_rq_cfg = {.value = 0}; + struct rq_cfg rq_cfg = {.value = 0}; + + if (nicvf_qset_rq_reclaim(nic, qidx)) + return NICVF_ERR_RQ_CLAIM; + + pf_rq_cfg.strip_pre_l2 = 0; + /* First cache line of RBDR data will be allocated into L2C */ + pf_rq_cfg.caching = RQ_CACHE_ALLOC_FIRST; + pf_rq_cfg.cq_qs = nic->vf_id; + pf_rq_cfg.cq_idx = qidx; + pf_rq_cfg.rbdr_cont_qs = nic->vf_id; + pf_rq_cfg.rbdr_cont_idx = 0; + pf_rq_cfg.rbdr_strt_qs = nic->vf_id; + pf_rq_cfg.rbdr_strt_idx = 0; + + /* Send a mailbox msg to PF to config RQ */ + if (nicvf_mbox_rq_config(nic, qidx, &pf_rq_cfg)) + return NICVF_ERR_RQ_PF_CFG; + + /* Select Rx backpressure */ + if (nicvf_mbox_rq_bp_config(nic, qidx, rxq->rx_drop_en)) + return NICVF_ERR_RQ_BP_CFG; + + /* Send a mailbox msg to PF to config RQ drop */ + if (nicvf_mbox_rq_drop_config(nic, qidx, rxq->rx_drop_en)) + return NICVF_ERR_RQ_DROP_CFG; + + /* Enable Receive queue */ + rq_cfg.ena = 1; + nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, rq_cfg.value); + + return 0; +} + +int +nicvf_qset_cq_reclaim(struct nicvf *nic, uint16_t qidx) +{ + uint64_t tail, head; + + /* Disable completion queue */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, 0); + if (nicvf_qset_poll_reg(nic, qidx, NIC_QSET_CQ_0_7_CFG, 42, 1, 0)) + return NICVF_ERR_CQ_DISABLE; + + /* Reset completion queue */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET); + tail = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_TAIL, qidx) >> 9; + head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, qidx) >> 9; + if (head | tail) + return NICVF_ERR_CQ_RESET; + + /* Disable timer threshold (doesn't get reset upon CQ reset) */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx, 0); + return 0; +} + +int +nicvf_qset_cq_config(struct nicvf *nic, uint16_t qidx, struct nicvf_rxq *rxq) +{ + int ret; + struct cq_cfg cq_cfg = {.value = 0}; + + ret = nicvf_qset_cq_reclaim(nic, qidx); + if (ret) + return ret; + + /* Set completion queue base address */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_BASE, qidx, rxq->phys); + + cq_cfg.ena = 1; + cq_cfg.reset = 0; + /* Writes of CQE will be allocated into L2C */ + cq_cfg.caching = 1; + cq_cfg.qsize = nicvf_qsize_regbit(rxq->qlen_mask + 1, CMP_QSIZE_SHIFT); + cq_cfg.avg_con = 0; + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, cq_cfg.value); + + /* Set threshold value for interrupt generation */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, 0); + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx, 0); + return 0; +} + +uint32_t +nicvf_qsize_cq_roundup(uint32_t val) +{ + uint32_t list[] = {CMP_QUEUE_SZ_1K, CMP_QUEUE_SZ_2K, + CMP_QUEUE_SZ_4K, CMP_QUEUE_SZ_8K, + CMP_QUEUE_SZ_16K, CMP_QUEUE_SZ_32K, + CMP_QUEUE_SZ_64K}; + return nicvf_roundup_list(val, list, NICVF_ARRAY_SIZE(list)); +} + + +void +nicvf_vlan_hw_strip(struct nicvf *nic, bool enable) +{ + uint64_t val; + + val = nicvf_reg_read(nic, NIC_VNIC_RQ_GEN_CFG); + if (enable) + val |= (STRIP_FIRST_VLAN << 25); + else + val &= ~((STRIP_SECOND_VLAN | STRIP_FIRST_VLAN) << 25); + + nicvf_reg_write(nic, NIC_VNIC_RQ_GEN_CFG, val); +} + +void +nicvf_apad_config(struct nicvf *nic, bool enable) +{ + uint64_t val; + + /* APAD always enabled in this device */ + if (!(nic->hwcap & NICVF_CAP_DISABLE_APAD)) + return; + + val = nicvf_reg_read(nic, NIC_VNIC_RQ_GEN_CFG); + if (enable) + val &= ~(1ULL << NICVF_QS_RQ_DIS_APAD_SHIFT); + else + val |= (1ULL << NICVF_QS_RQ_DIS_APAD_SHIFT); + + nicvf_reg_write(nic, NIC_VNIC_RQ_GEN_CFG, val); +} + +void +nicvf_rss_set_key(struct nicvf *nic, uint8_t *key) +{ + int idx; + uint64_t addr, val; + uint64_t *keyptr = (uint64_t *)key; + + addr = NIC_VNIC_RSS_KEY_0_4; + for (idx = 0; idx < RSS_HASH_KEY_SIZE; idx++) { + val = nicvf_cpu_to_be_64(*keyptr); + nicvf_reg_write(nic, addr, val); + addr += sizeof(uint64_t); + keyptr++; + } +} + +void +nicvf_rss_get_key(struct nicvf *nic, uint8_t *key) +{ + int idx; + uint64_t addr, val; + uint64_t *keyptr = (uint64_t *)key; + + addr = NIC_VNIC_RSS_KEY_0_4; + for (idx = 0; idx < RSS_HASH_KEY_SIZE; idx++) { + val = nicvf_reg_read(nic, addr); + *keyptr = nicvf_be_to_cpu_64(val); + addr += sizeof(uint64_t); + keyptr++; + } +} + +void +nicvf_rss_set_cfg(struct nicvf *nic, uint64_t val) +{ + nicvf_reg_write(nic, NIC_VNIC_RSS_CFG, val); +} + +uint64_t +nicvf_rss_get_cfg(struct nicvf *nic) +{ + return nicvf_reg_read(nic, NIC_VNIC_RSS_CFG); +} + +int +nicvf_rss_reta_update(struct nicvf *nic, uint8_t *tbl, uint32_t max_count) +{ + uint32_t idx; + struct nicvf_rss_reta_info *rss = &nic->rss_info; + + /* result will be stored in nic->rss_info.rss_size */ + if (nicvf_mbox_get_rss_size(nic)) + return NICVF_ERR_RSS_GET_SZ; + + assert(rss->rss_size > 0); + rss->hash_bits = (uint8_t)log2(rss->rss_size); + for (idx = 0; idx < rss->rss_size && idx < max_count; idx++) + rss->ind_tbl[idx] = tbl[idx]; + + if (nicvf_mbox_config_rss(nic)) + return NICVF_ERR_RSS_TBL_UPDATE; + + return NICVF_OK; +} + +int +nicvf_rss_reta_query(struct nicvf *nic, uint8_t *tbl, uint32_t max_count) +{ + uint32_t idx; + struct nicvf_rss_reta_info *rss = &nic->rss_info; + + /* result will be stored in nic->rss_info.rss_size */ + if (nicvf_mbox_get_rss_size(nic)) + return NICVF_ERR_RSS_GET_SZ; + + assert(rss->rss_size > 0); + rss->hash_bits = (uint8_t)log2(rss->rss_size); + for (idx = 0; idx < rss->rss_size && idx < max_count; idx++) + tbl[idx] = rss->ind_tbl[idx]; + + return NICVF_OK; +} + +int +nicvf_rss_config(struct nicvf *nic, uint32_t qcnt, uint64_t cfg) +{ + uint32_t idx; + uint8_t default_reta[NIC_MAX_RSS_IDR_TBL_SIZE]; + uint8_t default_key[RSS_HASH_KEY_BYTE_SIZE] = { + 0xFE, 0xED, 0x0B, 0xAD, 0xFE, 0xED, 0x0B, 0xAD, + 0xFE, 0xED, 0x0B, 0xAD, 0xFE, 0xED, 0x0B, 0xAD, + 0xFE, 0xED, 0x0B, 0xAD, 0xFE, 0xED, 0x0B, 0xAD, + 0xFE, 0xED, 0x0B, 0xAD, 0xFE, 0xED, 0x0B, 0xAD, + 0xFE, 0xED, 0x0B, 0xAD, 0xFE, 0xED, 0x0B, 0xAD + }; + + if (nic->cpi_alg != CPI_ALG_NONE) + return -EINVAL; + + if (cfg == 0) + return -EINVAL; + + /* Update default RSS key and cfg */ + nicvf_rss_set_key(nic, default_key); + nicvf_rss_set_cfg(nic, cfg); + + /* Update default RSS RETA */ + for (idx = 0; idx < NIC_MAX_RSS_IDR_TBL_SIZE; idx++) + default_reta[idx] = idx % qcnt; + + return nicvf_rss_reta_update(nic, default_reta, + NIC_MAX_RSS_IDR_TBL_SIZE); +} + +int +nicvf_rss_term(struct nicvf *nic) +{ + uint32_t idx; + uint8_t disable_rss[NIC_MAX_RSS_IDR_TBL_SIZE]; + + nicvf_rss_set_cfg(nic, 0); + /* Redirect the output to 0th queue */ + for (idx = 0; idx < NIC_MAX_RSS_IDR_TBL_SIZE; idx++) + disable_rss[idx] = 0; + + return nicvf_rss_reta_update(nic, disable_rss, + NIC_MAX_RSS_IDR_TBL_SIZE); +} + +int +nicvf_loopback_config(struct nicvf *nic, bool enable) +{ + if (enable && nic->loopback_supported == 0) + return NICVF_ERR_LOOPBACK_CFG; + + return nicvf_mbox_loopback_config(nic, enable); +} + +void +nicvf_hw_get_stats(struct nicvf *nic, struct nicvf_hw_stats *stats) +{ + stats->rx_bytes = NICVF_GET_RX_STATS(RX_OCTS); + stats->rx_ucast_frames = NICVF_GET_RX_STATS(RX_UCAST); + stats->rx_bcast_frames = NICVF_GET_RX_STATS(RX_BCAST); + stats->rx_mcast_frames = NICVF_GET_RX_STATS(RX_MCAST); + stats->rx_fcs_errors = NICVF_GET_RX_STATS(RX_FCS); + stats->rx_l2_errors = NICVF_GET_RX_STATS(RX_L2ERR); + stats->rx_drop_red = NICVF_GET_RX_STATS(RX_RED); + stats->rx_drop_red_bytes = NICVF_GET_RX_STATS(RX_RED_OCTS); + stats->rx_drop_overrun = NICVF_GET_RX_STATS(RX_ORUN); + stats->rx_drop_overrun_bytes = NICVF_GET_RX_STATS(RX_ORUN_OCTS); + stats->rx_drop_bcast = NICVF_GET_RX_STATS(RX_DRP_BCAST); + stats->rx_drop_mcast = NICVF_GET_RX_STATS(RX_DRP_MCAST); + stats->rx_drop_l3_bcast = NICVF_GET_RX_STATS(RX_DRP_L3BCAST); + stats->rx_drop_l3_mcast = NICVF_GET_RX_STATS(RX_DRP_L3MCAST); + + stats->tx_bytes_ok = NICVF_GET_TX_STATS(TX_OCTS); + stats->tx_ucast_frames_ok = NICVF_GET_TX_STATS(TX_UCAST); + stats->tx_bcast_frames_ok = NICVF_GET_TX_STATS(TX_BCAST); + stats->tx_mcast_frames_ok = NICVF_GET_TX_STATS(TX_MCAST); + stats->tx_drops = NICVF_GET_TX_STATS(TX_DROP); +} + +void +nicvf_hw_get_rx_qstats(struct nicvf *nic, struct nicvf_hw_rx_qstats *qstats, + uint16_t qidx) +{ + qstats->q_rx_bytes = + nicvf_queue_reg_read(nic, NIC_QSET_RQ_0_7_STATUS0, qidx); + qstats->q_rx_packets = + nicvf_queue_reg_read(nic, NIC_QSET_RQ_0_7_STATUS1, qidx); +} + +void +nicvf_hw_get_tx_qstats(struct nicvf *nic, struct nicvf_hw_tx_qstats *qstats, + uint16_t qidx) +{ + qstats->q_tx_bytes = + nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STATUS0, qidx); + qstats->q_tx_packets = + nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STATUS1, qidx); +} diff --git a/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_hw.h b/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_hw.h new file mode 100644 index 00000000..14fb2feb --- /dev/null +++ b/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_hw.h @@ -0,0 +1,245 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _THUNDERX_NICVF_HW_H +#define _THUNDERX_NICVF_HW_H + +#include <stdint.h> + +#include "nicvf_hw_defs.h" + +#define PCI_VENDOR_ID_CAVIUM 0x177D +#define PCI_DEVICE_ID_THUNDERX_CN88XX_PASS1_NICVF 0x0011 +#define PCI_DEVICE_ID_THUNDERX_NICVF 0xA034 +#define PCI_SUB_DEVICE_ID_CN88XX_PASS1_NICVF 0xA11E +#define PCI_SUB_DEVICE_ID_CN88XX_PASS2_NICVF 0xA134 +#define PCI_SUB_DEVICE_ID_CN81XX_NICVF 0xA234 +#define PCI_SUB_DEVICE_ID_CN83XX_NICVF 0xA334 + +#define NICVF_ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +#define NICVF_GET_RX_STATS(reg) \ + nicvf_reg_read(nic, NIC_VNIC_RX_STAT_0_13 | (reg << 3)) +#define NICVF_GET_TX_STATS(reg) \ + nicvf_reg_read(nic, NIC_VNIC_TX_STAT_0_4 | (reg << 3)) + +#define NICVF_CAP_TUNNEL_PARSING (1ULL << 0) +/* Additional word in Rx descriptor to hold optional tunneling extension info */ +#define NICVF_CAP_CQE_RX2 (1ULL << 1) +/* The device capable of setting NIC_CQE_RX_S[APAD] == 0 */ +#define NICVF_CAP_DISABLE_APAD (1ULL << 2) + +enum nicvf_tns_mode { + NIC_TNS_BYPASS_MODE, + NIC_TNS_MODE, +}; + +enum nicvf_err_e { + NICVF_OK, + NICVF_ERR_SET_QS = -8191,/* -8191 */ + NICVF_ERR_RESET_QS, /* -8190 */ + NICVF_ERR_REG_POLL, /* -8189 */ + NICVF_ERR_RBDR_RESET, /* -8188 */ + NICVF_ERR_RBDR_DISABLE, /* -8187 */ + NICVF_ERR_RBDR_PREFETCH, /* -8186 */ + NICVF_ERR_RBDR_RESET1, /* -8185 */ + NICVF_ERR_RBDR_RESET2, /* -8184 */ + NICVF_ERR_RQ_CLAIM, /* -8183 */ + NICVF_ERR_RQ_PF_CFG, /* -8182 */ + NICVF_ERR_RQ_BP_CFG, /* -8181 */ + NICVF_ERR_RQ_DROP_CFG, /* -8180 */ + NICVF_ERR_CQ_DISABLE, /* -8179 */ + NICVF_ERR_CQ_RESET, /* -8178 */ + NICVF_ERR_SQ_DISABLE, /* -8177 */ + NICVF_ERR_SQ_RESET, /* -8176 */ + NICVF_ERR_SQ_PF_CFG, /* -8175 */ + NICVF_ERR_LOOPBACK_CFG, /* -8174 */ + NICVF_ERR_BASE_INIT, /* -8173 */ + NICVF_ERR_RSS_TBL_UPDATE,/* -8172 */ + NICVF_ERR_RSS_GET_SZ, /* -8171 */ +}; + +typedef nicvf_phys_addr_t (*rbdr_pool_get_handler)(void *dev, void *opaque); + +struct nicvf_hw_rx_qstats { + uint64_t q_rx_bytes; + uint64_t q_rx_packets; +}; + +struct nicvf_hw_tx_qstats { + uint64_t q_tx_bytes; + uint64_t q_tx_packets; +}; + +struct nicvf_hw_stats { + uint64_t rx_bytes; + uint64_t rx_ucast_frames; + uint64_t rx_bcast_frames; + uint64_t rx_mcast_frames; + uint64_t rx_fcs_errors; + uint64_t rx_l2_errors; + uint64_t rx_drop_red; + uint64_t rx_drop_red_bytes; + uint64_t rx_drop_overrun; + uint64_t rx_drop_overrun_bytes; + uint64_t rx_drop_bcast; + uint64_t rx_drop_mcast; + uint64_t rx_drop_l3_bcast; + uint64_t rx_drop_l3_mcast; + + uint64_t tx_bytes_ok; + uint64_t tx_ucast_frames_ok; + uint64_t tx_bcast_frames_ok; + uint64_t tx_mcast_frames_ok; + uint64_t tx_drops; +}; + +struct nicvf_rss_reta_info { + uint8_t hash_bits; + uint16_t rss_size; + uint8_t ind_tbl[NIC_MAX_RSS_IDR_TBL_SIZE]; +}; + +/* Common structs used in DPDK and base layer are defined in DPDK layer */ +#include "../nicvf_struct.h" + +NICVF_STATIC_ASSERT(sizeof(struct nicvf_rbdr) <= 128); +NICVF_STATIC_ASSERT(sizeof(struct nicvf_txq) <= 128); +NICVF_STATIC_ASSERT(sizeof(struct nicvf_rxq) <= 128); + +static inline void +nicvf_reg_write(struct nicvf *nic, uint32_t offset, uint64_t val) +{ + nicvf_addr_write(nic->reg_base + offset, val); +} + +static inline uint64_t +nicvf_reg_read(struct nicvf *nic, uint32_t offset) +{ + return nicvf_addr_read(nic->reg_base + offset); +} + +static inline uintptr_t +nicvf_qset_base(struct nicvf *nic, uint32_t qidx) +{ + return nic->reg_base + (qidx << NIC_Q_NUM_SHIFT); +} + +static inline void +nicvf_queue_reg_write(struct nicvf *nic, uint32_t offset, uint32_t qidx, + uint64_t val) +{ + nicvf_addr_write(nicvf_qset_base(nic, qidx) + offset, val); +} + +static inline uint64_t +nicvf_queue_reg_read(struct nicvf *nic, uint32_t offset, uint32_t qidx) +{ + return nicvf_addr_read(nicvf_qset_base(nic, qidx) + offset); +} + +static inline void +nicvf_disable_all_interrupts(struct nicvf *nic) +{ + nicvf_reg_write(nic, NIC_VF_ENA_W1C, NICVF_INTR_ALL_MASK); + nicvf_reg_write(nic, NIC_VF_INT, NICVF_INTR_ALL_MASK); +} + +static inline uint32_t +nicvf_hw_version(struct nicvf *nic) +{ + return nic->subsystem_device_id; +} + +static inline uint64_t +nicvf_hw_cap(struct nicvf *nic) +{ + return nic->hwcap; +} + +int nicvf_base_init(struct nicvf *nic); + +int nicvf_reg_get_count(void); +int nicvf_reg_poll_interrupts(struct nicvf *nic); +int nicvf_reg_dump(struct nicvf *nic, uint64_t *data); + +int nicvf_qset_config(struct nicvf *nic); +int nicvf_qset_reclaim(struct nicvf *nic); + +int nicvf_qset_rbdr_config(struct nicvf *nic, uint16_t qidx); +int nicvf_qset_rbdr_reclaim(struct nicvf *nic, uint16_t qidx); +int nicvf_qset_rbdr_precharge(void *dev, struct nicvf *nic, + uint16_t ridx, rbdr_pool_get_handler handler, + uint32_t max_buffs); +int nicvf_qset_rbdr_active(struct nicvf *nic, uint16_t qidx); + +int nicvf_qset_rq_config(struct nicvf *nic, uint16_t qidx, + struct nicvf_rxq *rxq); +int nicvf_qset_rq_reclaim(struct nicvf *nic, uint16_t qidx); + +int nicvf_qset_cq_config(struct nicvf *nic, uint16_t qidx, + struct nicvf_rxq *rxq); +int nicvf_qset_cq_reclaim(struct nicvf *nic, uint16_t qidx); + +int nicvf_qset_sq_config(struct nicvf *nic, uint16_t qidx, + struct nicvf_txq *txq); +int nicvf_qset_sq_reclaim(struct nicvf *nic, uint16_t qidx); + +uint32_t nicvf_qsize_rbdr_roundup(uint32_t val); +uint32_t nicvf_qsize_cq_roundup(uint32_t val); +uint32_t nicvf_qsize_sq_roundup(uint32_t val); + +void nicvf_vlan_hw_strip(struct nicvf *nic, bool enable); + +void nicvf_apad_config(struct nicvf *nic, bool enable); + +int nicvf_rss_config(struct nicvf *nic, uint32_t qcnt, uint64_t cfg); +int nicvf_rss_term(struct nicvf *nic); + +int nicvf_rss_reta_update(struct nicvf *nic, uint8_t *tbl, uint32_t max_count); +int nicvf_rss_reta_query(struct nicvf *nic, uint8_t *tbl, uint32_t max_count); + +void nicvf_rss_set_key(struct nicvf *nic, uint8_t *key); +void nicvf_rss_get_key(struct nicvf *nic, uint8_t *key); + +void nicvf_rss_set_cfg(struct nicvf *nic, uint64_t val); +uint64_t nicvf_rss_get_cfg(struct nicvf *nic); + +int nicvf_loopback_config(struct nicvf *nic, bool enable); + +void nicvf_hw_get_stats(struct nicvf *nic, struct nicvf_hw_stats *stats); +void nicvf_hw_get_rx_qstats(struct nicvf *nic, + struct nicvf_hw_rx_qstats *qstats, uint16_t qidx); +void nicvf_hw_get_tx_qstats(struct nicvf *nic, + struct nicvf_hw_tx_qstats *qstats, uint16_t qidx); + +#endif /* _THUNDERX_NICVF_HW_H */ diff --git a/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_hw_defs.h b/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_hw_defs.h new file mode 100644 index 00000000..79f83c8d --- /dev/null +++ b/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_hw_defs.h @@ -0,0 +1,1223 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _THUNDERX_NICVF_HW_DEFS_H +#define _THUNDERX_NICVF_HW_DEFS_H + +#include <stdint.h> +#include <stdbool.h> + +#include "nicvf_plat.h" + +/* Virtual function register offsets */ + +#define NIC_VF_CFG (0x000020) +#define NIC_VF_PF_MAILBOX_0_1 (0x000130) +#define NIC_VF_INT (0x000200) +#define NIC_VF_INT_W1S (0x000220) +#define NIC_VF_ENA_W1C (0x000240) +#define NIC_VF_ENA_W1S (0x000260) + +#define NIC_VNIC_RSS_CFG (0x0020E0) +#define NIC_VNIC_RSS_KEY_0_4 (0x002200) +#define NIC_VNIC_TX_STAT_0_4 (0x004000) +#define NIC_VNIC_RX_STAT_0_13 (0x004100) +#define NIC_VNIC_RQ_GEN_CFG (0x010010) + +#define NIC_QSET_CQ_0_7_CFG (0x010400) +#define NIC_QSET_CQ_0_7_CFG2 (0x010408) +#define NIC_QSET_CQ_0_7_THRESH (0x010410) +#define NIC_QSET_CQ_0_7_BASE (0x010420) +#define NIC_QSET_CQ_0_7_HEAD (0x010428) +#define NIC_QSET_CQ_0_7_TAIL (0x010430) +#define NIC_QSET_CQ_0_7_DOOR (0x010438) +#define NIC_QSET_CQ_0_7_STATUS (0x010440) +#define NIC_QSET_CQ_0_7_STATUS2 (0x010448) +#define NIC_QSET_CQ_0_7_DEBUG (0x010450) + +#define NIC_QSET_RQ_0_7_CFG (0x010600) +#define NIC_QSET_RQ_0_7_STATUS0 (0x010700) +#define NIC_QSET_RQ_0_7_STATUS1 (0x010708) + +#define NIC_QSET_SQ_0_7_CFG (0x010800) +#define NIC_QSET_SQ_0_7_THRESH (0x010810) +#define NIC_QSET_SQ_0_7_BASE (0x010820) +#define NIC_QSET_SQ_0_7_HEAD (0x010828) +#define NIC_QSET_SQ_0_7_TAIL (0x010830) +#define NIC_QSET_SQ_0_7_DOOR (0x010838) +#define NIC_QSET_SQ_0_7_STATUS (0x010840) +#define NIC_QSET_SQ_0_7_DEBUG (0x010848) +#define NIC_QSET_SQ_0_7_STATUS0 (0x010900) +#define NIC_QSET_SQ_0_7_STATUS1 (0x010908) + +#define NIC_QSET_RBDR_0_1_CFG (0x010C00) +#define NIC_QSET_RBDR_0_1_THRESH (0x010C10) +#define NIC_QSET_RBDR_0_1_BASE (0x010C20) +#define NIC_QSET_RBDR_0_1_HEAD (0x010C28) +#define NIC_QSET_RBDR_0_1_TAIL (0x010C30) +#define NIC_QSET_RBDR_0_1_DOOR (0x010C38) +#define NIC_QSET_RBDR_0_1_STATUS0 (0x010C40) +#define NIC_QSET_RBDR_0_1_STATUS1 (0x010C48) +#define NIC_QSET_RBDR_0_1_PRFCH_STATUS (0x010C50) + +/* vNIC HW Constants */ + +#define NIC_Q_NUM_SHIFT 18 + +#define MAX_QUEUE_SET 128 +#define MAX_RCV_QUEUES_PER_QS 8 +#define MAX_RCV_BUF_DESC_RINGS_PER_QS 2 +#define MAX_SND_QUEUES_PER_QS 8 +#define MAX_CMP_QUEUES_PER_QS 8 + +#define NICVF_INTR_CQ_SHIFT 0 +#define NICVF_INTR_SQ_SHIFT 8 +#define NICVF_INTR_RBDR_SHIFT 16 +#define NICVF_INTR_PKT_DROP_SHIFT 20 +#define NICVF_INTR_TCP_TIMER_SHIFT 21 +#define NICVF_INTR_MBOX_SHIFT 22 +#define NICVF_INTR_QS_ERR_SHIFT 23 + +#define NICVF_QS_RQ_DIS_APAD_SHIFT 22 + +#define NICVF_INTR_CQ_MASK (0xFF << NICVF_INTR_CQ_SHIFT) +#define NICVF_INTR_SQ_MASK (0xFF << NICVF_INTR_SQ_SHIFT) +#define NICVF_INTR_RBDR_MASK (0x03 << NICVF_INTR_RBDR_SHIFT) +#define NICVF_INTR_PKT_DROP_MASK (1 << NICVF_INTR_PKT_DROP_SHIFT) +#define NICVF_INTR_TCP_TIMER_MASK (1 << NICVF_INTR_TCP_TIMER_SHIFT) +#define NICVF_INTR_MBOX_MASK (1 << NICVF_INTR_MBOX_SHIFT) +#define NICVF_INTR_QS_ERR_MASK (1 << NICVF_INTR_QS_ERR_SHIFT) +#define NICVF_INTR_ALL_MASK (0x7FFFFF) + +#define NICVF_CQ_WR_FULL (1ULL << 26) +#define NICVF_CQ_WR_DISABLE (1ULL << 25) +#define NICVF_CQ_WR_FAULT (1ULL << 24) +#define NICVF_CQ_ERR_MASK (NICVF_CQ_WR_FULL |\ + NICVF_CQ_WR_DISABLE |\ + NICVF_CQ_WR_FAULT) +#define NICVF_CQ_CQE_COUNT_MASK (0xFFFF) + +#define NICVF_SQ_ERR_STOPPED (1ULL << 21) +#define NICVF_SQ_ERR_SEND (1ULL << 20) +#define NICVF_SQ_ERR_DPE (1ULL << 19) +#define NICVF_SQ_ERR_MASK (NICVF_SQ_ERR_STOPPED |\ + NICVF_SQ_ERR_SEND |\ + NICVF_SQ_ERR_DPE) +#define NICVF_SQ_STATUS_STOPPED_BIT (21) + +#define NICVF_RBDR_FIFO_STATE_SHIFT (62) +#define NICVF_RBDR_FIFO_STATE_MASK (3ULL << NICVF_RBDR_FIFO_STATE_SHIFT) +#define NICVF_RBDR_COUNT_MASK (0x7FFFF) + +/* Queue reset */ +#define NICVF_CQ_RESET (1ULL << 41) +#define NICVF_SQ_RESET (1ULL << 17) +#define NICVF_RBDR_RESET (1ULL << 43) + +/* RSS constants */ +#define NIC_MAX_RSS_HASH_BITS (8) +#define NIC_MAX_RSS_IDR_TBL_SIZE (1 << NIC_MAX_RSS_HASH_BITS) +#define RSS_HASH_KEY_SIZE (5) /* 320 bit key */ +#define RSS_HASH_KEY_BYTE_SIZE (40) /* 320 bit key */ + +#define RSS_L2_EXTENDED_HASH_ENA (1 << 0) +#define RSS_IP_ENA (1 << 1) +#define RSS_TCP_ENA (1 << 2) +#define RSS_TCP_SYN_ENA (1 << 3) +#define RSS_UDP_ENA (1 << 4) +#define RSS_L4_EXTENDED_ENA (1 << 5) +#define RSS_L3_BI_DIRECTION_ENA (1 << 7) +#define RSS_L4_BI_DIRECTION_ENA (1 << 8) +#define RSS_TUN_VXLAN_ENA (1 << 9) +#define RSS_TUN_GENEVE_ENA (1 << 10) +#define RSS_TUN_NVGRE_ENA (1 << 11) + +#define RBDR_QUEUE_SZ_8K (8 * 1024) +#define RBDR_QUEUE_SZ_16K (16 * 1024) +#define RBDR_QUEUE_SZ_32K (32 * 1024) +#define RBDR_QUEUE_SZ_64K (64 * 1024) +#define RBDR_QUEUE_SZ_128K (128 * 1024) +#define RBDR_QUEUE_SZ_256K (256 * 1024) +#define RBDR_QUEUE_SZ_512K (512 * 1024) +#define RBDR_QUEUE_SZ_MAX RBDR_QUEUE_SZ_512K + +#define RBDR_SIZE_SHIFT (13) /* 8k */ + +#define SND_QUEUE_SZ_1K (1 * 1024) +#define SND_QUEUE_SZ_2K (2 * 1024) +#define SND_QUEUE_SZ_4K (4 * 1024) +#define SND_QUEUE_SZ_8K (8 * 1024) +#define SND_QUEUE_SZ_16K (16 * 1024) +#define SND_QUEUE_SZ_32K (32 * 1024) +#define SND_QUEUE_SZ_64K (64 * 1024) +#define SND_QUEUE_SZ_MAX SND_QUEUE_SZ_64K + +#define SND_QSIZE_SHIFT (10) /* 1k */ + +#define CMP_QUEUE_SZ_1K (1 * 1024) +#define CMP_QUEUE_SZ_2K (2 * 1024) +#define CMP_QUEUE_SZ_4K (4 * 1024) +#define CMP_QUEUE_SZ_8K (8 * 1024) +#define CMP_QUEUE_SZ_16K (16 * 1024) +#define CMP_QUEUE_SZ_32K (32 * 1024) +#define CMP_QUEUE_SZ_64K (64 * 1024) +#define CMP_QUEUE_SZ_MAX CMP_QUEUE_SZ_64K + +#define CMP_QSIZE_SHIFT (10) /* 1k */ + +#define NICVF_QSIZE_MIN_VAL (0) +#define NICVF_QSIZE_MAX_VAL (6) + +/* Min/Max packet size */ +#define NIC_HW_MIN_FRS (64) +#define NIC_HW_MAX_FRS (9200) /* 9216 max pkt including FCS */ +#define NIC_HW_MAX_SEGS (12) + +/* Descriptor alignments */ +#define NICVF_RBDR_BASE_ALIGN_BYTES (128) /* 7 bits */ +#define NICVF_CQ_BASE_ALIGN_BYTES (512) /* 9 bits */ +#define NICVF_SQ_BASE_ALIGN_BYTES (128) /* 7 bits */ + +#define NICVF_CQE_RBPTR_WORD (6) +#define NICVF_CQE_RX2_RBPTR_WORD (7) + +#define NICVF_STATIC_ASSERT(s) _Static_assert(s, #s) +#define assert_primary(nic) assert((nic)->sqs_mode == 0) + +typedef uint64_t nicvf_phys_addr_t; + +/* vNIC HW Enumerations */ + +enum nic_send_ld_type_e { + NIC_SEND_LD_TYPE_E_LDD, + NIC_SEND_LD_TYPE_E_LDT, + NIC_SEND_LD_TYPE_E_LDWB, + NIC_SEND_LD_TYPE_E_ENUM_LAST, +}; + +enum ether_type_algorithm { + ETYPE_ALG_NONE, + ETYPE_ALG_SKIP, + ETYPE_ALG_ENDPARSE, + ETYPE_ALG_VLAN, + ETYPE_ALG_VLAN_STRIP, +}; + +enum layer3_type { + L3TYPE_NONE, + L3TYPE_GRH, + L3TYPE_IPV4 = 0x4, + L3TYPE_IPV4_OPTIONS = 0x5, + L3TYPE_IPV6 = 0x6, + L3TYPE_IPV6_OPTIONS = 0x7, + L3TYPE_ET_STOP = 0xD, + L3TYPE_OTHER = 0xE, +}; + +#define NICVF_L3TYPE_OPTIONS_MASK ((uint8_t)1) +#define NICVF_L3TYPE_IPVX_MASK ((uint8_t)0x06) + +enum layer4_type { + L4TYPE_NONE, + L4TYPE_IPSEC_ESP, + L4TYPE_IPFRAG, + L4TYPE_IPCOMP, + L4TYPE_TCP, + L4TYPE_UDP, + L4TYPE_SCTP, + L4TYPE_GRE, + L4TYPE_ROCE_BTH, + L4TYPE_OTHER = 0xE, +}; + +/* CPI and RSSI configuration */ +enum cpi_algorithm_type { + CPI_ALG_NONE, + CPI_ALG_VLAN, + CPI_ALG_VLAN16, + CPI_ALG_DIFF, +}; + +enum rss_algorithm_type { + RSS_ALG_NONE, + RSS_ALG_PORT, + RSS_ALG_IP, + RSS_ALG_TCP_IP, + RSS_ALG_UDP_IP, + RSS_ALG_SCTP_IP, + RSS_ALG_GRE_IP, + RSS_ALG_ROCE, +}; + +enum rss_hash_cfg { + RSS_HASH_L2ETC, + RSS_HASH_IP, + RSS_HASH_TCP, + RSS_HASH_TCP_SYN_DIS, + RSS_HASH_UDP, + RSS_HASH_L4ETC, + RSS_HASH_ROCE, + RSS_L3_BIDI, + RSS_L4_BIDI, +}; + +/* Completion queue entry types */ +enum cqe_type { + CQE_TYPE_INVALID, + CQE_TYPE_RX = 0x2, + CQE_TYPE_RX_SPLIT = 0x3, + CQE_TYPE_RX_TCP = 0x4, + CQE_TYPE_SEND = 0x8, + CQE_TYPE_SEND_PTP = 0x9, +}; + +enum cqe_rx_tcp_status { + CQE_RX_STATUS_VALID_TCP_CNXT, + CQE_RX_STATUS_INVALID_TCP_CNXT = 0x0F, +}; + +enum cqe_send_status { + CQE_SEND_STATUS_GOOD, + CQE_SEND_STATUS_DESC_FAULT = 0x01, + CQE_SEND_STATUS_HDR_CONS_ERR = 0x11, + CQE_SEND_STATUS_SUBDESC_ERR = 0x12, + CQE_SEND_STATUS_IMM_SIZE_OFLOW = 0x80, + CQE_SEND_STATUS_CRC_SEQ_ERR = 0x81, + CQE_SEND_STATUS_DATA_SEQ_ERR = 0x82, + CQE_SEND_STATUS_MEM_SEQ_ERR = 0x83, + CQE_SEND_STATUS_LOCK_VIOL = 0x84, + CQE_SEND_STATUS_LOCK_UFLOW = 0x85, + CQE_SEND_STATUS_DATA_FAULT = 0x86, + CQE_SEND_STATUS_TSTMP_CONFLICT = 0x87, + CQE_SEND_STATUS_TSTMP_TIMEOUT = 0x88, + CQE_SEND_STATUS_MEM_FAULT = 0x89, + CQE_SEND_STATUS_CSUM_OVERLAP = 0x8A, + CQE_SEND_STATUS_CSUM_OVERFLOW = 0x8B, +}; + +enum cqe_rx_tcp_end_reason { + CQE_RX_TCP_END_FIN_FLAG_DET, + CQE_RX_TCP_END_INVALID_FLAG, + CQE_RX_TCP_END_TIMEOUT, + CQE_RX_TCP_END_OUT_OF_SEQ, + CQE_RX_TCP_END_PKT_ERR, + CQE_RX_TCP_END_QS_DISABLED = 0x0F, +}; + +/* Packet protocol level error enumeration */ +enum cqe_rx_err_level { + CQE_RX_ERRLVL_RE, + CQE_RX_ERRLVL_L2, + CQE_RX_ERRLVL_L3, + CQE_RX_ERRLVL_L4, +}; + +/* Packet protocol level error type enumeration */ +enum cqe_rx_err_opcode { + CQE_RX_ERR_RE_NONE, + CQE_RX_ERR_RE_PARTIAL, + CQE_RX_ERR_RE_JABBER, + CQE_RX_ERR_RE_FCS = 0x7, + CQE_RX_ERR_RE_TERMINATE = 0x9, + CQE_RX_ERR_RE_RX_CTL = 0xb, + CQE_RX_ERR_PREL2_ERR = 0x1f, + CQE_RX_ERR_L2_FRAGMENT = 0x20, + CQE_RX_ERR_L2_OVERRUN = 0x21, + CQE_RX_ERR_L2_PFCS = 0x22, + CQE_RX_ERR_L2_PUNY = 0x23, + CQE_RX_ERR_L2_MAL = 0x24, + CQE_RX_ERR_L2_OVERSIZE = 0x25, + CQE_RX_ERR_L2_UNDERSIZE = 0x26, + CQE_RX_ERR_L2_LENMISM = 0x27, + CQE_RX_ERR_L2_PCLP = 0x28, + CQE_RX_ERR_IP_NOT = 0x41, + CQE_RX_ERR_IP_CHK = 0x42, + CQE_RX_ERR_IP_MAL = 0x43, + CQE_RX_ERR_IP_MALD = 0x44, + CQE_RX_ERR_IP_HOP = 0x45, + CQE_RX_ERR_L3_ICRC = 0x46, + CQE_RX_ERR_L3_PCLP = 0x47, + CQE_RX_ERR_L4_MAL = 0x61, + CQE_RX_ERR_L4_CHK = 0x62, + CQE_RX_ERR_UDP_LEN = 0x63, + CQE_RX_ERR_L4_PORT = 0x64, + CQE_RX_ERR_TCP_FLAG = 0x65, + CQE_RX_ERR_TCP_OFFSET = 0x66, + CQE_RX_ERR_L4_PCLP = 0x67, + CQE_RX_ERR_RBDR_TRUNC = 0x70, +}; + +enum send_l4_csum_type { + SEND_L4_CSUM_DISABLE, + SEND_L4_CSUM_UDP, + SEND_L4_CSUM_TCP, +}; + +enum send_crc_alg { + SEND_CRCALG_CRC32, + SEND_CRCALG_CRC32C, + SEND_CRCALG_ICRC, +}; + +enum send_load_type { + SEND_LD_TYPE_LDD, + SEND_LD_TYPE_LDT, + SEND_LD_TYPE_LDWB, +}; + +enum send_mem_alg_type { + SEND_MEMALG_SET, + SEND_MEMALG_ADD = 0x08, + SEND_MEMALG_SUB = 0x09, + SEND_MEMALG_ADDLEN = 0x0A, + SEND_MEMALG_SUBLEN = 0x0B, +}; + +enum send_mem_dsz_type { + SEND_MEMDSZ_B64, + SEND_MEMDSZ_B32, + SEND_MEMDSZ_B8 = 0x03, +}; + +enum sq_subdesc_type { + SQ_DESC_TYPE_INVALID, + SQ_DESC_TYPE_HEADER, + SQ_DESC_TYPE_CRC, + SQ_DESC_TYPE_IMMEDIATE, + SQ_DESC_TYPE_GATHER, + SQ_DESC_TYPE_MEMORY, +}; + +enum l3_type_t { + L3_NONE, + L3_IPV4 = 0x04, + L3_IPV4_OPT = 0x05, + L3_IPV6 = 0x06, + L3_IPV6_OPT = 0x07, + L3_ET_STOP = 0x0D, + L3_OTHER = 0x0E +}; + +enum l4_type_t { + L4_NONE, + L4_IPSEC_ESP = 0x01, + L4_IPFRAG = 0x02, + L4_IPCOMP = 0x03, + L4_TCP = 0x04, + L4_UDP_PASS1 = 0x05, + L4_GRE = 0x07, + L4_UDP_PASS2 = 0x08, + L4_UDP_GENEVE = 0x09, + L4_UDP_VXLAN = 0x0A, + L4_NVGRE = 0x0C, + L4_OTHER = 0x0E +}; + +enum vlan_strip { + NO_STRIP, + STRIP_FIRST_VLAN, + STRIP_SECOND_VLAN, + STRIP_RESERV, +}; + +enum rbdr_state { + RBDR_FIFO_STATE_INACTIVE, + RBDR_FIFO_STATE_ACTIVE, + RBDR_FIFO_STATE_RESET, + RBDR_FIFO_STATE_FAIL, +}; + +enum rq_cache_allocation { + RQ_CACHE_ALLOC_OFF, + RQ_CACHE_ALLOC_ALL, + RQ_CACHE_ALLOC_FIRST, + RQ_CACHE_ALLOC_TWO, +}; + +enum cq_rx_errlvl_e { + CQ_ERRLVL_MAC, + CQ_ERRLVL_L2, + CQ_ERRLVL_L3, + CQ_ERRLVL_L4, +}; + +enum cq_rx_errop_e { + CQ_RX_ERROP_RE_NONE, + CQ_RX_ERROP_RE_PARTIAL = 0x1, + CQ_RX_ERROP_RE_JABBER = 0x2, + CQ_RX_ERROP_RE_FCS = 0x7, + CQ_RX_ERROP_RE_TERMINATE = 0x9, + CQ_RX_ERROP_RE_RX_CTL = 0xb, + CQ_RX_ERROP_PREL2_ERR = 0x1f, + CQ_RX_ERROP_L2_FRAGMENT = 0x20, + CQ_RX_ERROP_L2_OVERRUN = 0x21, + CQ_RX_ERROP_L2_PFCS = 0x22, + CQ_RX_ERROP_L2_PUNY = 0x23, + CQ_RX_ERROP_L2_MAL = 0x24, + CQ_RX_ERROP_L2_OVERSIZE = 0x25, + CQ_RX_ERROP_L2_UNDERSIZE = 0x26, + CQ_RX_ERROP_L2_LENMISM = 0x27, + CQ_RX_ERROP_L2_PCLP = 0x28, + CQ_RX_ERROP_IP_NOT = 0x41, + CQ_RX_ERROP_IP_CSUM_ERR = 0x42, + CQ_RX_ERROP_IP_MAL = 0x43, + CQ_RX_ERROP_IP_MALD = 0x44, + CQ_RX_ERROP_IP_HOP = 0x45, + CQ_RX_ERROP_L3_ICRC = 0x46, + CQ_RX_ERROP_L3_PCLP = 0x47, + CQ_RX_ERROP_L4_MAL = 0x61, + CQ_RX_ERROP_L4_CHK = 0x62, + CQ_RX_ERROP_UDP_LEN = 0x63, + CQ_RX_ERROP_L4_PORT = 0x64, + CQ_RX_ERROP_TCP_FLAG = 0x65, + CQ_RX_ERROP_TCP_OFFSET = 0x66, + CQ_RX_ERROP_L4_PCLP = 0x67, + CQ_RX_ERROP_RBDR_TRUNC = 0x70, +}; + +enum cq_tx_errop_e { + CQ_TX_ERROP_GOOD, + CQ_TX_ERROP_DESC_FAULT = 0x10, + CQ_TX_ERROP_HDR_CONS_ERR = 0x11, + CQ_TX_ERROP_SUBDC_ERR = 0x12, + CQ_TX_ERROP_IMM_SIZE_OFLOW = 0x80, + CQ_TX_ERROP_DATA_SEQUENCE_ERR = 0x81, + CQ_TX_ERROP_MEM_SEQUENCE_ERR = 0x82, + CQ_TX_ERROP_LOCK_VIOL = 0x83, + CQ_TX_ERROP_DATA_FAULT = 0x84, + CQ_TX_ERROP_TSTMP_CONFLICT = 0x85, + CQ_TX_ERROP_TSTMP_TIMEOUT = 0x86, + CQ_TX_ERROP_MEM_FAULT = 0x87, + CQ_TX_ERROP_CK_OVERLAP = 0x88, + CQ_TX_ERROP_CK_OFLOW = 0x89, + CQ_TX_ERROP_ENUM_LAST = 0x8a, +}; + +enum rq_sq_stats_reg_offset { + RQ_SQ_STATS_OCTS, + RQ_SQ_STATS_PKTS, +}; + +enum nic_stat_vnic_rx_e { + RX_OCTS, + RX_UCAST, + RX_BCAST, + RX_MCAST, + RX_RED, + RX_RED_OCTS, + RX_ORUN, + RX_ORUN_OCTS, + RX_FCS, + RX_L2ERR, + RX_DRP_BCAST, + RX_DRP_MCAST, + RX_DRP_L3BCAST, + RX_DRP_L3MCAST, +}; + +enum nic_stat_vnic_tx_e { + TX_OCTS, + TX_UCAST, + TX_BCAST, + TX_MCAST, + TX_DROP, +}; + +/* vNIC HW Register structures */ + +typedef union { + uint64_t u64; + struct { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t cqe_type:4; + uint64_t stdn_fault:1; + uint64_t rsvd0:1; + uint64_t rq_qs:7; + uint64_t rq_idx:3; + uint64_t rsvd1:12; + uint64_t rss_alg:4; + uint64_t rsvd2:4; + uint64_t rb_cnt:4; + uint64_t vlan_found:1; + uint64_t vlan_stripped:1; + uint64_t vlan2_found:1; + uint64_t vlan2_stripped:1; + uint64_t l4_type:4; + uint64_t l3_type:4; + uint64_t l2_present:1; + uint64_t err_level:3; + uint64_t err_opcode:8; +#else + uint64_t err_opcode:8; + uint64_t err_level:3; + uint64_t l2_present:1; + uint64_t l3_type:4; + uint64_t l4_type:4; + uint64_t vlan2_stripped:1; + uint64_t vlan2_found:1; + uint64_t vlan_stripped:1; + uint64_t vlan_found:1; + uint64_t rb_cnt:4; + uint64_t rsvd2:4; + uint64_t rss_alg:4; + uint64_t rsvd1:12; + uint64_t rq_idx:3; + uint64_t rq_qs:7; + uint64_t rsvd0:1; + uint64_t stdn_fault:1; + uint64_t cqe_type:4; +#endif + }; +} cqe_rx_word0_t; + +typedef union { + uint64_t u64; + struct { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t pkt_len:16; + uint64_t l2_ptr:8; + uint64_t l3_ptr:8; + uint64_t l4_ptr:8; + uint64_t cq_pkt_len:8; + uint64_t align_pad:3; + uint64_t rsvd3:1; + uint64_t chan:12; +#else + uint64_t chan:12; + uint64_t rsvd3:1; + uint64_t align_pad:3; + uint64_t cq_pkt_len:8; + uint64_t l4_ptr:8; + uint64_t l3_ptr:8; + uint64_t l2_ptr:8; + uint64_t pkt_len:16; +#endif + }; +} cqe_rx_word1_t; + +typedef union { + uint64_t u64; + struct { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t rss_tag:32; + uint64_t vlan_tci:16; + uint64_t vlan_ptr:8; + uint64_t vlan2_ptr:8; +#else + uint64_t vlan2_ptr:8; + uint64_t vlan_ptr:8; + uint64_t vlan_tci:16; + uint64_t rss_tag:32; +#endif + }; +} cqe_rx_word2_t; + +typedef union { + uint64_t u64; + struct { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint16_t rb3_sz; + uint16_t rb2_sz; + uint16_t rb1_sz; + uint16_t rb0_sz; +#else + uint16_t rb0_sz; + uint16_t rb1_sz; + uint16_t rb2_sz; + uint16_t rb3_sz; +#endif + }; +} cqe_rx_word3_t; + +typedef union { + uint64_t u64; + struct { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint16_t rb7_sz; + uint16_t rb6_sz; + uint16_t rb5_sz; + uint16_t rb4_sz; +#else + uint16_t rb4_sz; + uint16_t rb5_sz; + uint16_t rb6_sz; + uint16_t rb7_sz; +#endif + }; +} cqe_rx_word4_t; + +typedef union { + uint64_t u64; + struct { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint16_t rb11_sz; + uint16_t rb10_sz; + uint16_t rb9_sz; + uint16_t rb8_sz; +#else + uint16_t rb8_sz; + uint16_t rb9_sz; + uint16_t rb10_sz; + uint16_t rb11_sz; +#endif + }; +} cqe_rx_word5_t; + +typedef union { + uint64_t u64; + struct { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t vlan_found:1; + uint64_t vlan_stripped:1; + uint64_t vlan2_found:1; + uint64_t vlan2_stripped:1; + uint64_t rsvd2:3; + uint64_t inner_l2:1; + uint64_t inner_l4type:4; + uint64_t inner_l3type:4; + uint64_t vlan_ptr:8; + uint64_t vlan2_ptr:8; + uint64_t rsvd1:8; + uint64_t rsvd0:8; + uint64_t inner_l3ptr:8; + uint64_t inner_l4ptr:8; +#else + uint64_t inner_l4ptr:8; + uint64_t inner_l3ptr:8; + uint64_t rsvd0:8; + uint64_t rsvd1:8; + uint64_t vlan2_ptr:8; + uint64_t vlan_ptr:8; + uint64_t inner_l3type:4; + uint64_t inner_l4type:4; + uint64_t inner_l2:1; + uint64_t rsvd2:3; + uint64_t vlan2_stripped:1; + uint64_t vlan2_found:1; + uint64_t vlan_stripped:1; + uint64_t vlan_found:1; +#endif + }; +} cqe_rx2_word6_t; + +struct cqe_rx_t { + cqe_rx_word0_t word0; + cqe_rx_word1_t word1; + cqe_rx_word2_t word2; + cqe_rx_word3_t word3; + cqe_rx_word4_t word4; + cqe_rx_word5_t word5; + cqe_rx2_word6_t word6; /* if NIC_PF_RX_CFG[CQE_RX2_ENA] set */ +}; + +struct cqe_rx_tcp_err_t { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t cqe_type:4; /* W0 */ + uint64_t rsvd0:60; + + uint64_t rsvd1:4; /* W1 */ + uint64_t partial_first:1; + uint64_t rsvd2:27; + uint64_t rbdr_bytes:8; + uint64_t rsvd3:24; +#else + uint64_t rsvd0:60; + uint64_t cqe_type:4; + + uint64_t rsvd3:24; + uint64_t rbdr_bytes:8; + uint64_t rsvd2:27; + uint64_t partial_first:1; + uint64_t rsvd1:4; +#endif +}; + +struct cqe_rx_tcp_t { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t cqe_type:4; /* W0 */ + uint64_t rsvd0:52; + uint64_t cq_tcp_status:8; + + uint64_t rsvd1:32; /* W1 */ + uint64_t tcp_cntx_bytes:8; + uint64_t rsvd2:8; + uint64_t tcp_err_bytes:16; +#else + uint64_t cq_tcp_status:8; + uint64_t rsvd0:52; + uint64_t cqe_type:4; /* W0 */ + + uint64_t tcp_err_bytes:16; + uint64_t rsvd2:8; + uint64_t tcp_cntx_bytes:8; + uint64_t rsvd1:32; /* W1 */ +#endif +}; + +struct cqe_send_t { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t cqe_type:4; /* W0 */ + uint64_t rsvd0:4; + uint64_t sqe_ptr:16; + uint64_t rsvd1:4; + uint64_t rsvd2:10; + uint64_t sq_qs:7; + uint64_t sq_idx:3; + uint64_t rsvd3:8; + uint64_t send_status:8; + + uint64_t ptp_timestamp:64; /* W1 */ +#elif NICVF_BYTE_ORDER == NICVF_LITTLE_ENDIAN + uint64_t send_status:8; + uint64_t rsvd3:8; + uint64_t sq_idx:3; + uint64_t sq_qs:7; + uint64_t rsvd2:10; + uint64_t rsvd1:4; + uint64_t sqe_ptr:16; + uint64_t rsvd0:4; + uint64_t cqe_type:4; /* W0 */ + + uint64_t ptp_timestamp:64; +#endif +}; + +struct cq_entry_type_t { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t cqe_type:4; + uint64_t __pad:60; +#else + uint64_t __pad:60; + uint64_t cqe_type:4; +#endif +}; + +union cq_entry_t { + uint64_t u[64]; + struct cq_entry_type_t type; + struct cqe_rx_t rx_hdr; + struct cqe_rx_tcp_t rx_tcp_hdr; + struct cqe_rx_tcp_err_t rx_tcp_err_hdr; + struct cqe_send_t cqe_send; +}; + +NICVF_STATIC_ASSERT(sizeof(union cq_entry_t) == 512); + +struct rbdr_entry_t { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + union { + struct { + uint64_t rsvd0:15; + uint64_t buf_addr:42; + uint64_t cache_align:7; + }; + nicvf_phys_addr_t full_addr; + }; +#else + union { + struct { + uint64_t cache_align:7; + uint64_t buf_addr:42; + uint64_t rsvd0:15; + }; + nicvf_phys_addr_t full_addr; + }; +#endif +}; + +NICVF_STATIC_ASSERT(sizeof(struct rbdr_entry_t) == sizeof(uint64_t)); + +/* TCP reassembly context */ +struct rbe_tcp_cnxt_t { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t tcp_pkt_cnt:12; + uint64_t rsvd1:4; + uint64_t align_hdr_bytes:4; + uint64_t align_ptr_bytes:4; + uint64_t ptr_bytes:16; + uint64_t rsvd2:24; + uint64_t cqe_type:4; + uint64_t rsvd0:54; + uint64_t tcp_end_reason:2; + uint64_t tcp_status:4; +#else + uint64_t tcp_status:4; + uint64_t tcp_end_reason:2; + uint64_t rsvd0:54; + uint64_t cqe_type:4; + uint64_t rsvd2:24; + uint64_t ptr_bytes:16; + uint64_t align_ptr_bytes:4; + uint64_t align_hdr_bytes:4; + uint64_t rsvd1:4; + uint64_t tcp_pkt_cnt:12; +#endif +}; + +/* Always Big endian */ +struct rx_hdr_t { + uint64_t opaque:32; + uint64_t rss_flow:8; + uint64_t skip_length:6; + uint64_t disable_rss:1; + uint64_t disable_tcp_reassembly:1; + uint64_t nodrop:1; + uint64_t dest_alg:2; + uint64_t rsvd0:2; + uint64_t dest_rq:11; +}; + +struct sq_crc_subdesc { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t rsvd1:32; + uint64_t crc_ival:32; + uint64_t subdesc_type:4; + uint64_t crc_alg:2; + uint64_t rsvd0:10; + uint64_t crc_insert_pos:16; + uint64_t hdr_start:16; + uint64_t crc_len:16; +#else + uint64_t crc_len:16; + uint64_t hdr_start:16; + uint64_t crc_insert_pos:16; + uint64_t rsvd0:10; + uint64_t crc_alg:2; + uint64_t subdesc_type:4; + uint64_t crc_ival:32; + uint64_t rsvd1:32; +#endif +}; + +struct sq_gather_subdesc { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t subdesc_type:4; /* W0 */ + uint64_t ld_type:2; + uint64_t rsvd0:42; + uint64_t size:16; + + uint64_t rsvd1:15; /* W1 */ + uint64_t addr:49; +#else + uint64_t size:16; + uint64_t rsvd0:42; + uint64_t ld_type:2; + uint64_t subdesc_type:4; /* W0 */ + + uint64_t addr:49; + uint64_t rsvd1:15; /* W1 */ +#endif +}; + +/* SQ immediate subdescriptor */ +struct sq_imm_subdesc { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t subdesc_type:4; /* W0 */ + uint64_t rsvd0:46; + uint64_t len:14; + + uint64_t data:64; /* W1 */ +#else + uint64_t len:14; + uint64_t rsvd0:46; + uint64_t subdesc_type:4; /* W0 */ + + uint64_t data:64; /* W1 */ +#endif +}; + +struct sq_mem_subdesc { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t subdesc_type:4; /* W0 */ + uint64_t mem_alg:4; + uint64_t mem_dsz:2; + uint64_t wmem:1; + uint64_t rsvd0:21; + uint64_t offset:32; + + uint64_t rsvd1:15; /* W1 */ + uint64_t addr:49; +#else + uint64_t offset:32; + uint64_t rsvd0:21; + uint64_t wmem:1; + uint64_t mem_dsz:2; + uint64_t mem_alg:4; + uint64_t subdesc_type:4; /* W0 */ + + uint64_t addr:49; + uint64_t rsvd1:15; /* W1 */ +#endif +}; + +struct sq_hdr_subdesc { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t subdesc_type:4; + uint64_t tso:1; + uint64_t post_cqe:1; /* Post CQE on no error also */ + uint64_t dont_send:1; + uint64_t tstmp:1; + uint64_t subdesc_cnt:8; + uint64_t csum_l4:2; + uint64_t csum_l3:1; + uint64_t csum_inner_l4:2; + uint64_t csum_inner_l3:1; + uint64_t rsvd0:2; + uint64_t l4_offset:8; + uint64_t l3_offset:8; + uint64_t rsvd1:4; + uint64_t tot_len:20; /* W0 */ + + uint64_t rsvd2:24; + uint64_t inner_l4_offset:8; + uint64_t inner_l3_offset:8; + uint64_t tso_start:8; + uint64_t rsvd3:2; + uint64_t tso_max_paysize:14; /* W1 */ +#else + uint64_t tot_len:20; + uint64_t rsvd1:4; + uint64_t l3_offset:8; + uint64_t l4_offset:8; + uint64_t rsvd0:2; + uint64_t csum_inner_l3:1; + uint64_t csum_inner_l4:2; + uint64_t csum_l3:1; + uint64_t csum_l4:2; + uint64_t subdesc_cnt:8; + uint64_t tstmp:1; + uint64_t dont_send:1; + uint64_t post_cqe:1; /* Post CQE on no error also */ + uint64_t tso:1; + uint64_t subdesc_type:4; /* W0 */ + + uint64_t tso_max_paysize:14; + uint64_t rsvd3:2; + uint64_t tso_start:8; + uint64_t inner_l3_offset:8; + uint64_t inner_l4_offset:8; + uint64_t rsvd2:24; /* W1 */ +#endif +}; + +/* Each sq entry is 128 bits wide */ +union sq_entry_t { + uint64_t buff[2]; + struct sq_hdr_subdesc hdr; + struct sq_imm_subdesc imm; + struct sq_gather_subdesc gather; + struct sq_crc_subdesc crc; + struct sq_mem_subdesc mem; +}; + +NICVF_STATIC_ASSERT(sizeof(union sq_entry_t) == 16); + +/* Queue config register formats */ +struct rq_cfg { union { struct { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t reserved_2_63:62; + uint64_t ena:1; + uint64_t reserved_0:1; +#else + uint64_t reserved_0:1; + uint64_t ena:1; + uint64_t reserved_2_63:62; +#endif + }; + uint64_t value; +}; }; + +struct cq_cfg { union { struct { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t reserved_43_63:21; + uint64_t ena:1; + uint64_t reset:1; + uint64_t caching:1; + uint64_t reserved_35_39:5; + uint64_t qsize:3; + uint64_t reserved_25_31:7; + uint64_t avg_con:9; + uint64_t reserved_0_15:16; +#else + uint64_t reserved_0_15:16; + uint64_t avg_con:9; + uint64_t reserved_25_31:7; + uint64_t qsize:3; + uint64_t reserved_35_39:5; + uint64_t caching:1; + uint64_t reset:1; + uint64_t ena:1; + uint64_t reserved_43_63:21; +#endif + }; + uint64_t value; +}; }; + +struct sq_cfg { union { struct { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t reserved_20_63:44; + uint64_t ena:1; + uint64_t reserved_18_18:1; + uint64_t reset:1; + uint64_t ldwb:1; + uint64_t reserved_11_15:5; + uint64_t qsize:3; + uint64_t reserved_3_7:5; + uint64_t tstmp_bgx_intf:3; +#else + uint64_t tstmp_bgx_intf:3; + uint64_t reserved_3_7:5; + uint64_t qsize:3; + uint64_t reserved_11_15:5; + uint64_t ldwb:1; + uint64_t reset:1; + uint64_t reserved_18_18:1; + uint64_t ena:1; + uint64_t reserved_20_63:44; +#endif + }; + uint64_t value; +}; }; + +struct rbdr_cfg { union { struct { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t reserved_45_63:19; + uint64_t ena:1; + uint64_t reset:1; + uint64_t ldwb:1; + uint64_t reserved_36_41:6; + uint64_t qsize:4; + uint64_t reserved_25_31:7; + uint64_t avg_con:9; + uint64_t reserved_12_15:4; + uint64_t lines:12; +#else + uint64_t lines:12; + uint64_t reserved_12_15:4; + uint64_t avg_con:9; + uint64_t reserved_25_31:7; + uint64_t qsize:4; + uint64_t reserved_36_41:6; + uint64_t ldwb:1; + uint64_t reset:1; + uint64_t ena: 1; + uint64_t reserved_45_63:19; +#endif + }; + uint64_t value; +}; }; + +struct pf_qs_cfg { union { struct { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t reserved_32_63:32; + uint64_t ena:1; + uint64_t reserved_27_30:4; + uint64_t sq_ins_ena:1; + uint64_t sq_ins_pos:6; + uint64_t lock_ena:1; + uint64_t lock_viol_cqe_ena:1; + uint64_t send_tstmp_ena:1; + uint64_t be:1; + uint64_t reserved_7_15:9; + uint64_t vnic:7; +#else + uint64_t vnic:7; + uint64_t reserved_7_15:9; + uint64_t be:1; + uint64_t send_tstmp_ena:1; + uint64_t lock_viol_cqe_ena:1; + uint64_t lock_ena:1; + uint64_t sq_ins_pos:6; + uint64_t sq_ins_ena:1; + uint64_t reserved_27_30:4; + uint64_t ena:1; + uint64_t reserved_32_63:32; +#endif + }; + uint64_t value; +}; }; + +struct pf_rq_cfg { union { struct { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t reserved1:1; + uint64_t reserved0:34; + uint64_t strip_pre_l2:1; + uint64_t caching:2; + uint64_t cq_qs:7; + uint64_t cq_idx:3; + uint64_t rbdr_cont_qs:7; + uint64_t rbdr_cont_idx:1; + uint64_t rbdr_strt_qs:7; + uint64_t rbdr_strt_idx:1; +#else + uint64_t rbdr_strt_idx:1; + uint64_t rbdr_strt_qs:7; + uint64_t rbdr_cont_idx:1; + uint64_t rbdr_cont_qs:7; + uint64_t cq_idx:3; + uint64_t cq_qs:7; + uint64_t caching:2; + uint64_t strip_pre_l2:1; + uint64_t reserved0:34; + uint64_t reserved1:1; +#endif + }; + uint64_t value; +}; }; + +struct pf_rq_drop_cfg { union { struct { +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + uint64_t rbdr_red:1; + uint64_t cq_red:1; + uint64_t reserved3:14; + uint64_t rbdr_pass:8; + uint64_t rbdr_drop:8; + uint64_t reserved2:8; + uint64_t cq_pass:8; + uint64_t cq_drop:8; + uint64_t reserved1:8; +#else + uint64_t reserved1:8; + uint64_t cq_drop:8; + uint64_t cq_pass:8; + uint64_t reserved2:8; + uint64_t rbdr_drop:8; + uint64_t rbdr_pass:8; + uint64_t reserved3:14; + uint64_t cq_red:1; + uint64_t rbdr_red:1; +#endif + }; + uint64_t value; +}; }; + +#endif /* _THUNDERX_NICVF_HW_DEFS_H */ diff --git a/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_mbox.c b/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_mbox.c new file mode 100644 index 00000000..a072f19d --- /dev/null +++ b/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_mbox.c @@ -0,0 +1,460 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <assert.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> + +#include "nicvf_plat.h" + +#define NICVF_MBOX_PF_RESPONSE_DELAY_US (1000) + +static const char *mbox_message[NIC_MBOX_MSG_MAX] = { + [NIC_MBOX_MSG_INVALID] = "NIC_MBOX_MSG_INVALID", + [NIC_MBOX_MSG_READY] = "NIC_MBOX_MSG_READY", + [NIC_MBOX_MSG_ACK] = "NIC_MBOX_MSG_ACK", + [NIC_MBOX_MSG_NACK] = "NIC_MBOX_MSG_ACK", + [NIC_MBOX_MSG_QS_CFG] = "NIC_MBOX_MSG_QS_CFG", + [NIC_MBOX_MSG_RQ_CFG] = "NIC_MBOX_MSG_RQ_CFG", + [NIC_MBOX_MSG_SQ_CFG] = "NIC_MBOX_MSG_SQ_CFG", + [NIC_MBOX_MSG_RQ_DROP_CFG] = "NIC_MBOX_MSG_RQ_DROP_CFG", + [NIC_MBOX_MSG_SET_MAC] = "NIC_MBOX_MSG_SET_MAC", + [NIC_MBOX_MSG_SET_MAX_FRS] = "NIC_MBOX_MSG_SET_MAX_FRS", + [NIC_MBOX_MSG_CPI_CFG] = "NIC_MBOX_MSG_CPI_CFG", + [NIC_MBOX_MSG_RSS_SIZE] = "NIC_MBOX_MSG_RSS_SIZE", + [NIC_MBOX_MSG_RSS_CFG] = "NIC_MBOX_MSG_RSS_CFG", + [NIC_MBOX_MSG_RSS_CFG_CONT] = "NIC_MBOX_MSG_RSS_CFG_CONT", + [NIC_MBOX_MSG_RQ_BP_CFG] = "NIC_MBOX_MSG_RQ_BP_CFG", + [NIC_MBOX_MSG_RQ_SW_SYNC] = "NIC_MBOX_MSG_RQ_SW_SYNC", + [NIC_MBOX_MSG_BGX_LINK_CHANGE] = "NIC_MBOX_MSG_BGX_LINK_CHANGE", + [NIC_MBOX_MSG_ALLOC_SQS] = "NIC_MBOX_MSG_ALLOC_SQS", + [NIC_MBOX_MSG_LOOPBACK] = "NIC_MBOX_MSG_LOOPBACK", + [NIC_MBOX_MSG_RESET_STAT_COUNTER] = "NIC_MBOX_MSG_RESET_STAT_COUNTER", + [NIC_MBOX_MSG_CFG_DONE] = "NIC_MBOX_MSG_CFG_DONE", + [NIC_MBOX_MSG_SHUTDOWN] = "NIC_MBOX_MSG_SHUTDOWN", +}; + +static inline const char * __attribute__((unused)) +nicvf_mbox_msg_str(int msg) +{ + assert(msg >= 0 && msg < NIC_MBOX_MSG_MAX); + /* undefined messages */ + if (mbox_message[msg] == NULL) + msg = 0; + return mbox_message[msg]; +} + +static inline void +nicvf_mbox_send_msg_to_pf_raw(struct nicvf *nic, struct nic_mbx *mbx) +{ + uint64_t *mbx_data; + uint64_t mbx_addr; + int i; + + mbx_addr = NIC_VF_PF_MAILBOX_0_1; + mbx_data = (uint64_t *)mbx; + for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) { + nicvf_reg_write(nic, mbx_addr, *mbx_data); + mbx_data++; + mbx_addr += sizeof(uint64_t); + } + nicvf_mbox_log("msg sent %s (VF%d)", + nicvf_mbox_msg_str(mbx->msg.msg), nic->vf_id); +} + +static inline void +nicvf_mbox_send_async_msg_to_pf(struct nicvf *nic, struct nic_mbx *mbx) +{ + nicvf_mbox_send_msg_to_pf_raw(nic, mbx); + /* Messages without ack are racy!*/ + nicvf_delay_us(NICVF_MBOX_PF_RESPONSE_DELAY_US); +} + +static inline int +nicvf_mbox_send_msg_to_pf(struct nicvf *nic, struct nic_mbx *mbx) +{ + long timeout; + long sleep = 10; + int i, retry = 5; + + for (i = 0; i < retry; i++) { + nic->pf_acked = false; + nic->pf_nacked = false; + nicvf_smp_wmb(); + + nicvf_mbox_send_msg_to_pf_raw(nic, mbx); + /* Give some time to get PF response */ + nicvf_delay_us(NICVF_MBOX_PF_RESPONSE_DELAY_US); + timeout = NIC_MBOX_MSG_TIMEOUT; + while (timeout > 0) { + /* Periodic poll happens from nicvf_interrupt() */ + nicvf_smp_rmb(); + + if (nic->pf_nacked) + return -EINVAL; + if (nic->pf_acked) + return 0; + + nicvf_delay_us(NICVF_MBOX_PF_RESPONSE_DELAY_US); + timeout -= sleep; + } + nicvf_log_error("PF didn't ack to msg 0x%02x %s VF%d (%d/%d)", + mbx->msg.msg, nicvf_mbox_msg_str(mbx->msg.msg), + nic->vf_id, i, retry); + } + return -EBUSY; +} + + +int +nicvf_handle_mbx_intr(struct nicvf *nic) +{ + struct nic_mbx mbx; + uint64_t *mbx_data = (uint64_t *)&mbx; + uint64_t mbx_addr = NIC_VF_PF_MAILBOX_0_1; + size_t i; + + for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) { + *mbx_data = nicvf_reg_read(nic, mbx_addr); + mbx_data++; + mbx_addr += sizeof(uint64_t); + } + + /* Overwrite the message so we won't receive it again */ + nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1, 0x0); + + nicvf_mbox_log("msg received id=0x%hhx %s (VF%d)", mbx.msg.msg, + nicvf_mbox_msg_str(mbx.msg.msg), nic->vf_id); + + switch (mbx.msg.msg) { + case NIC_MBOX_MSG_READY: + nic->vf_id = mbx.nic_cfg.vf_id & 0x7F; + nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F; + nic->node = mbx.nic_cfg.node_id; + nic->sqs_mode = mbx.nic_cfg.sqs_mode; + nic->loopback_supported = mbx.nic_cfg.loopback_supported; + ether_addr_copy((struct ether_addr *)mbx.nic_cfg.mac_addr, + (struct ether_addr *)nic->mac_addr); + nic->pf_acked = true; + break; + case NIC_MBOX_MSG_ACK: + nic->pf_acked = true; + break; + case NIC_MBOX_MSG_NACK: + nic->pf_nacked = true; + break; + case NIC_MBOX_MSG_RSS_SIZE: + nic->rss_info.rss_size = mbx.rss_size.ind_tbl_size; + nic->pf_acked = true; + break; + case NIC_MBOX_MSG_BGX_LINK_CHANGE: + nic->link_up = mbx.link_status.link_up; + nic->duplex = mbx.link_status.duplex; + nic->speed = mbx.link_status.speed; + nic->pf_acked = true; + break; + case NIC_MBOX_MSG_ALLOC_SQS: + assert_primary(nic); + if (mbx.sqs_alloc.qs_count != nic->sqs_count) { + nicvf_log_error("Received %" PRIu8 "/%" PRIu8 + " secondary qsets", + mbx.sqs_alloc.qs_count, + nic->sqs_count); + abort(); + } + for (i = 0; i < mbx.sqs_alloc.qs_count; i++) { + if (mbx.sqs_alloc.svf[i] != nic->snicvf[i]->vf_id) { + nicvf_log_error("Received secondary qset[%zu] " + "ID %" PRIu8 " expected %" + PRIu8, i, mbx.sqs_alloc.svf[i], + nic->snicvf[i]->vf_id); + abort(); + } + } + nic->pf_acked = true; + break; + default: + nicvf_log_error("Invalid message from PF, msg_id=0x%hhx %s", + mbx.msg.msg, nicvf_mbox_msg_str(mbx.msg.msg)); + break; + } + nicvf_smp_wmb(); + + return mbx.msg.msg; +} + +/* + * Checks if VF is able to communicate with PF + * and also gets the VNIC number this VF is associated to. + */ +int +nicvf_mbox_check_pf_ready(struct nicvf *nic) +{ + struct nic_mbx mbx = { .msg = {.msg = NIC_MBOX_MSG_READY} }; + + return nicvf_mbox_send_msg_to_pf(nic, &mbx); +} + +int +nicvf_mbox_set_mac_addr(struct nicvf *nic, + const uint8_t mac[NICVF_MAC_ADDR_SIZE]) +{ + struct nic_mbx mbx = { .msg = {0} }; + int i; + + mbx.msg.msg = NIC_MBOX_MSG_SET_MAC; + mbx.mac.vf_id = nic->vf_id; + for (i = 0; i < 6; i++) + mbx.mac.mac_addr[i] = mac[i]; + + return nicvf_mbox_send_msg_to_pf(nic, &mbx); +} + +int +nicvf_mbox_config_cpi(struct nicvf *nic, uint32_t qcnt) +{ + struct nic_mbx mbx = { .msg = { 0 } }; + + mbx.msg.msg = NIC_MBOX_MSG_CPI_CFG; + mbx.cpi_cfg.vf_id = nic->vf_id; + mbx.cpi_cfg.cpi_alg = nic->cpi_alg; + mbx.cpi_cfg.rq_cnt = qcnt; + + return nicvf_mbox_send_msg_to_pf(nic, &mbx); +} + +int +nicvf_mbox_get_rss_size(struct nicvf *nic) +{ + struct nic_mbx mbx = { .msg = { 0 } }; + + mbx.msg.msg = NIC_MBOX_MSG_RSS_SIZE; + mbx.rss_size.vf_id = nic->vf_id; + + /* Result will be stored in nic->rss_info.rss_size */ + return nicvf_mbox_send_msg_to_pf(nic, &mbx); +} + +int +nicvf_mbox_config_rss(struct nicvf *nic) +{ + struct nic_mbx mbx = { .msg = { 0 } }; + struct nicvf_rss_reta_info *rss = &nic->rss_info; + size_t tot_len = rss->rss_size; + size_t cur_len; + size_t cur_idx = 0; + size_t i; + + mbx.rss_cfg.vf_id = nic->vf_id; + mbx.rss_cfg.hash_bits = rss->hash_bits; + mbx.rss_cfg.tbl_len = 0; + mbx.rss_cfg.tbl_offset = 0; + + while (cur_idx < tot_len) { + cur_len = nicvf_min(tot_len - cur_idx, + (size_t)RSS_IND_TBL_LEN_PER_MBX_MSG); + mbx.msg.msg = (cur_idx > 0) ? + NIC_MBOX_MSG_RSS_CFG_CONT : NIC_MBOX_MSG_RSS_CFG; + mbx.rss_cfg.tbl_offset = cur_idx; + mbx.rss_cfg.tbl_len = cur_len; + for (i = 0; i < cur_len; i++) + mbx.rss_cfg.ind_tbl[i] = rss->ind_tbl[cur_idx++]; + + if (nicvf_mbox_send_msg_to_pf(nic, &mbx)) + return NICVF_ERR_RSS_TBL_UPDATE; + } + + return 0; +} + +int +nicvf_mbox_rq_config(struct nicvf *nic, uint16_t qidx, + struct pf_rq_cfg *pf_rq_cfg) +{ + struct nic_mbx mbx = { .msg = { 0 } }; + + mbx.msg.msg = NIC_MBOX_MSG_RQ_CFG; + mbx.rq.qs_num = nic->vf_id; + mbx.rq.rq_num = qidx; + mbx.rq.cfg = pf_rq_cfg->value; + return nicvf_mbox_send_msg_to_pf(nic, &mbx); +} + +int +nicvf_mbox_sq_config(struct nicvf *nic, uint16_t qidx) +{ + struct nic_mbx mbx = { .msg = { 0 } }; + + mbx.msg.msg = NIC_MBOX_MSG_SQ_CFG; + mbx.sq.qs_num = nic->vf_id; + mbx.sq.sq_num = qidx; + mbx.sq.sqs_mode = nic->sqs_mode; + mbx.sq.cfg = (nic->vf_id << 3) | qidx; + return nicvf_mbox_send_msg_to_pf(nic, &mbx); +} + +int +nicvf_mbox_qset_config(struct nicvf *nic, struct pf_qs_cfg *qs_cfg) +{ + struct nic_mbx mbx = { .msg = { 0 } }; + +#if NICVF_BYTE_ORDER == NICVF_BIG_ENDIAN + qs_cfg->be = 1; +#endif + /* Send a mailbox msg to PF to config Qset */ + mbx.msg.msg = NIC_MBOX_MSG_QS_CFG; + mbx.qs.num = nic->vf_id; + mbx.qs.sqs_count = nic->sqs_count; + mbx.qs.cfg = qs_cfg->value; + return nicvf_mbox_send_msg_to_pf(nic, &mbx); +} + +int +nicvf_mbox_request_sqs(struct nicvf *nic) +{ + struct nic_mbx mbx = { .msg = { 0 } }; + size_t i; + + assert_primary(nic); + assert(nic->sqs_count > 0); + assert(nic->sqs_count <= MAX_SQS_PER_VF); + + mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS; + mbx.sqs_alloc.spec = 1; + mbx.sqs_alloc.qs_count = nic->sqs_count; + + /* Set no of Rx/Tx queues in each of the SQsets */ + for (i = 0; i < nic->sqs_count; i++) + mbx.sqs_alloc.svf[i] = nic->snicvf[i]->vf_id; + + return nicvf_mbox_send_msg_to_pf(nic, &mbx); +} + +int +nicvf_mbox_rq_drop_config(struct nicvf *nic, uint16_t qidx, bool enable) +{ + struct nic_mbx mbx = { .msg = { 0 } }; + struct pf_rq_drop_cfg *drop_cfg; + + /* Enable CQ drop to reserve sufficient CQEs for all tx packets */ + mbx.msg.msg = NIC_MBOX_MSG_RQ_DROP_CFG; + mbx.rq.qs_num = nic->vf_id; + mbx.rq.rq_num = qidx; + drop_cfg = (struct pf_rq_drop_cfg *)&mbx.rq.cfg; + drop_cfg->value = 0; + if (enable) { + drop_cfg->cq_red = 1; + drop_cfg->cq_drop = 2; + } + return nicvf_mbox_send_msg_to_pf(nic, &mbx); +} + +int +nicvf_mbox_update_hw_max_frs(struct nicvf *nic, uint16_t mtu) +{ + struct nic_mbx mbx = { .msg = { 0 } }; + + mbx.msg.msg = NIC_MBOX_MSG_SET_MAX_FRS; + mbx.frs.max_frs = mtu; + mbx.frs.vf_id = nic->vf_id; + return nicvf_mbox_send_msg_to_pf(nic, &mbx); +} + +int +nicvf_mbox_rq_sync(struct nicvf *nic) +{ + struct nic_mbx mbx = { .msg = { 0 } }; + + /* Make sure all packets in the pipeline are written back into mem */ + mbx.msg.msg = NIC_MBOX_MSG_RQ_SW_SYNC; + mbx.rq.cfg = 0; + return nicvf_mbox_send_msg_to_pf(nic, &mbx); +} + +int +nicvf_mbox_rq_bp_config(struct nicvf *nic, uint16_t qidx, bool enable) +{ + struct nic_mbx mbx = { .msg = { 0 } }; + + mbx.msg.msg = NIC_MBOX_MSG_RQ_BP_CFG; + mbx.rq.qs_num = nic->vf_id; + mbx.rq.rq_num = qidx; + mbx.rq.cfg = 0; + if (enable) + mbx.rq.cfg = (1ULL << 63) | (1ULL << 62) | (nic->vf_id << 0); + return nicvf_mbox_send_msg_to_pf(nic, &mbx); +} + +int +nicvf_mbox_loopback_config(struct nicvf *nic, bool enable) +{ + struct nic_mbx mbx = { .msg = { 0 } }; + + mbx.lbk.msg = NIC_MBOX_MSG_LOOPBACK; + mbx.lbk.vf_id = nic->vf_id; + mbx.lbk.enable = enable; + return nicvf_mbox_send_msg_to_pf(nic, &mbx); +} + +int +nicvf_mbox_reset_stat_counters(struct nicvf *nic, uint16_t rx_stat_mask, + uint8_t tx_stat_mask, uint16_t rq_stat_mask, + uint16_t sq_stat_mask) +{ + struct nic_mbx mbx = { .msg = { 0 } }; + + mbx.reset_stat.msg = NIC_MBOX_MSG_RESET_STAT_COUNTER; + mbx.reset_stat.rx_stat_mask = rx_stat_mask; + mbx.reset_stat.tx_stat_mask = tx_stat_mask; + mbx.reset_stat.rq_stat_mask = rq_stat_mask; + mbx.reset_stat.sq_stat_mask = sq_stat_mask; + return nicvf_mbox_send_msg_to_pf(nic, &mbx); +} + +void +nicvf_mbox_shutdown(struct nicvf *nic) +{ + struct nic_mbx mbx = { .msg = { 0 } }; + + mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN; + nicvf_mbox_send_msg_to_pf(nic, &mbx); +} + +void +nicvf_mbox_cfg_done(struct nicvf *nic) +{ + struct nic_mbx mbx = { .msg = { 0 } }; + + mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE; + nicvf_mbox_send_async_msg_to_pf(nic, &mbx); +} diff --git a/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_mbox.h b/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_mbox.h new file mode 100644 index 00000000..8675fe8f --- /dev/null +++ b/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_mbox.h @@ -0,0 +1,244 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __THUNDERX_NICVF_MBOX__ +#define __THUNDERX_NICVF_MBOX__ + +#include <stdint.h> + +#include "nicvf_plat.h" +#include "../nicvf_struct.h" + +/* PF <--> VF Mailbox communication + * Two 64bit registers are shared between PF and VF for each VF + * Writing into second register means end of message. + */ + +/* PF <--> VF mailbox communication */ +#define NIC_PF_VF_MAILBOX_SIZE 2 +#define NIC_MBOX_MSG_TIMEOUT 2000 /* ms */ + +/* Mailbox message types */ +#define NIC_MBOX_MSG_INVALID 0x00 /* Invalid message */ +#define NIC_MBOX_MSG_READY 0x01 /* Is PF ready to rcv msgs */ +#define NIC_MBOX_MSG_ACK 0x02 /* ACK the message received */ +#define NIC_MBOX_MSG_NACK 0x03 /* NACK the message received */ +#define NIC_MBOX_MSG_QS_CFG 0x04 /* Configure Qset */ +#define NIC_MBOX_MSG_RQ_CFG 0x05 /* Configure receive queue */ +#define NIC_MBOX_MSG_SQ_CFG 0x06 /* Configure Send queue */ +#define NIC_MBOX_MSG_RQ_DROP_CFG 0x07 /* Configure receive queue */ +#define NIC_MBOX_MSG_SET_MAC 0x08 /* Add MAC ID to DMAC filter */ +#define NIC_MBOX_MSG_SET_MAX_FRS 0x09 /* Set max frame size */ +#define NIC_MBOX_MSG_CPI_CFG 0x0A /* Config CPI, RSSI */ +#define NIC_MBOX_MSG_RSS_SIZE 0x0B /* Get RSS indir_tbl size */ +#define NIC_MBOX_MSG_RSS_CFG 0x0C /* Config RSS table */ +#define NIC_MBOX_MSG_RSS_CFG_CONT 0x0D /* RSS config continuation */ +#define NIC_MBOX_MSG_RQ_BP_CFG 0x0E /* RQ backpressure config */ +#define NIC_MBOX_MSG_RQ_SW_SYNC 0x0F /* Flush inflight pkts to RQ */ +#define NIC_MBOX_MSG_BGX_LINK_CHANGE 0x11 /* BGX:LMAC link status */ +#define NIC_MBOX_MSG_ALLOC_SQS 0x12 /* Allocate secondary Qset */ +#define NIC_MBOX_MSG_LOOPBACK 0x16 /* Set interface in loopback */ +#define NIC_MBOX_MSG_RESET_STAT_COUNTER 0x17 /* Reset statistics counters */ +#define NIC_MBOX_MSG_CFG_DONE 0xF0 /* VF configuration done */ +#define NIC_MBOX_MSG_SHUTDOWN 0xF1 /* VF is being shutdown */ +#define NIC_MBOX_MSG_MAX 0x100 /* Maximum number of messages */ + +/* Get vNIC VF configuration */ +struct nic_cfg_msg { + uint8_t msg; + uint8_t vf_id; + uint8_t node_id; + bool tns_mode:1; + bool sqs_mode:1; + bool loopback_supported:1; + uint8_t mac_addr[NICVF_MAC_ADDR_SIZE]; +}; + +/* Qset configuration */ +struct qs_cfg_msg { + uint8_t msg; + uint8_t num; + uint8_t sqs_count; + uint64_t cfg; +}; + +/* Receive queue configuration */ +struct rq_cfg_msg { + uint8_t msg; + uint8_t qs_num; + uint8_t rq_num; + uint64_t cfg; +}; + +/* Send queue configuration */ +struct sq_cfg_msg { + uint8_t msg; + uint8_t qs_num; + uint8_t sq_num; + bool sqs_mode; + uint64_t cfg; +}; + +/* Set VF's MAC address */ +struct set_mac_msg { + uint8_t msg; + uint8_t vf_id; + uint8_t mac_addr[NICVF_MAC_ADDR_SIZE]; +}; + +/* Set Maximum frame size */ +struct set_frs_msg { + uint8_t msg; + uint8_t vf_id; + uint16_t max_frs; +}; + +/* Set CPI algorithm type */ +struct cpi_cfg_msg { + uint8_t msg; + uint8_t vf_id; + uint8_t rq_cnt; + uint8_t cpi_alg; +}; + +/* Get RSS table size */ +struct rss_sz_msg { + uint8_t msg; + uint8_t vf_id; + uint16_t ind_tbl_size; +}; + +/* Set RSS configuration */ +struct rss_cfg_msg { + uint8_t msg; + uint8_t vf_id; + uint8_t hash_bits; + uint8_t tbl_len; + uint8_t tbl_offset; +#define RSS_IND_TBL_LEN_PER_MBX_MSG 8 + uint8_t ind_tbl[RSS_IND_TBL_LEN_PER_MBX_MSG]; +}; + +/* Physical interface link status */ +struct bgx_link_status { + uint8_t msg; + uint8_t mac_type; + uint8_t link_up; + uint8_t duplex; + uint32_t speed; +}; + +/* Allocate additional SQS to VF */ +struct sqs_alloc { + uint8_t msg; + uint8_t spec; + uint8_t qs_count; + uint8_t svf[MAX_SQS_PER_VF]; +}; + +/* Set interface in loopback mode */ +struct set_loopback { + uint8_t msg; + uint8_t vf_id; + bool enable; +}; + +/* Reset statistics counters */ +struct reset_stat_cfg { + uint8_t msg; + /* Bitmap to select NIC_PF_VNIC(vf_id)_RX_STAT(0..13) */ + uint16_t rx_stat_mask; + /* Bitmap to select NIC_PF_VNIC(vf_id)_TX_STAT(0..4) */ + uint8_t tx_stat_mask; + /* Bitmap to select NIC_PF_QS(0..127)_RQ(0..7)_STAT(0..1) + * bit14, bit15 NIC_PF_QS(vf_id)_RQ7_STAT(0..1) + * bit12, bit13 NIC_PF_QS(vf_id)_RQ6_STAT(0..1) + * .. + * bit2, bit3 NIC_PF_QS(vf_id)_RQ1_STAT(0..1) + * bit0, bit1 NIC_PF_QS(vf_id)_RQ0_STAT(0..1) + */ + uint16_t rq_stat_mask; + /* Bitmap to select NIC_PF_QS(0..127)_SQ(0..7)_STAT(0..1) + * bit14, bit15 NIC_PF_QS(vf_id)_SQ7_STAT(0..1) + * bit12, bit13 NIC_PF_QS(vf_id)_SQ6_STAT(0..1) + * .. + * bit2, bit3 NIC_PF_QS(vf_id)_SQ1_STAT(0..1) + * bit0, bit1 NIC_PF_QS(vf_id)_SQ0_STAT(0..1) + */ + uint16_t sq_stat_mask; +}; + +struct nic_mbx { +/* 128 bit shared memory between PF and each VF */ +union { + struct { uint8_t msg; } msg; + struct nic_cfg_msg nic_cfg; + struct qs_cfg_msg qs; + struct rq_cfg_msg rq; + struct sq_cfg_msg sq; + struct set_mac_msg mac; + struct set_frs_msg frs; + struct cpi_cfg_msg cpi_cfg; + struct rss_sz_msg rss_size; + struct rss_cfg_msg rss_cfg; + struct bgx_link_status link_status; + struct sqs_alloc sqs_alloc; + struct set_loopback lbk; + struct reset_stat_cfg reset_stat; +}; +}; + +NICVF_STATIC_ASSERT(sizeof(struct nic_mbx) <= 16); + +int nicvf_handle_mbx_intr(struct nicvf *nic); +int nicvf_mbox_check_pf_ready(struct nicvf *nic); +int nicvf_mbox_qset_config(struct nicvf *nic, struct pf_qs_cfg *qs_cfg); +int nicvf_mbox_request_sqs(struct nicvf *nic); +int nicvf_mbox_rq_config(struct nicvf *nic, uint16_t qidx, + struct pf_rq_cfg *pf_rq_cfg); +int nicvf_mbox_sq_config(struct nicvf *nic, uint16_t qidx); +int nicvf_mbox_rq_drop_config(struct nicvf *nic, uint16_t qidx, bool enable); +int nicvf_mbox_rq_bp_config(struct nicvf *nic, uint16_t qidx, bool enable); +int nicvf_mbox_set_mac_addr(struct nicvf *nic, + const uint8_t mac[NICVF_MAC_ADDR_SIZE]); +int nicvf_mbox_config_cpi(struct nicvf *nic, uint32_t qcnt); +int nicvf_mbox_get_rss_size(struct nicvf *nic); +int nicvf_mbox_config_rss(struct nicvf *nic); +int nicvf_mbox_update_hw_max_frs(struct nicvf *nic, uint16_t mtu); +int nicvf_mbox_rq_sync(struct nicvf *nic); +int nicvf_mbox_loopback_config(struct nicvf *nic, bool enable); +int nicvf_mbox_reset_stat_counters(struct nicvf *nic, uint16_t rx_stat_mask, + uint8_t tx_stat_mask, uint16_t rq_stat_mask, uint16_t sq_stat_mask); +void nicvf_mbox_shutdown(struct nicvf *nic); +void nicvf_mbox_cfg_done(struct nicvf *nic); + +#endif /* __THUNDERX_NICVF_MBOX__ */ diff --git a/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_plat.h b/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_plat.h new file mode 100644 index 00000000..36da1200 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/thunderx/base/nicvf_plat.h @@ -0,0 +1,108 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _THUNDERX_NICVF_H +#define _THUNDERX_NICVF_H + +/* Platform/OS/arch specific abstractions */ + +/* log */ +#include <rte_log.h> +#include "../nicvf_logs.h" + +#define nicvf_log_error(s, ...) PMD_DRV_LOG(ERR, s, ##__VA_ARGS__) + +#define nicvf_log_debug(s, ...) PMD_DRV_LOG(DEBUG, s, ##__VA_ARGS__) + +#define nicvf_mbox_log(s, ...) PMD_MBOX_LOG(DEBUG, s, ##__VA_ARGS__) + +#define nicvf_log(s, ...) fprintf(stderr, s, ##__VA_ARGS__) + +/* delay */ +#include <rte_cycles.h> +#define nicvf_delay_us(x) rte_delay_us(x) + +/* barrier */ +#include <rte_atomic.h> +#define nicvf_smp_wmb() rte_smp_wmb() +#define nicvf_smp_rmb() rte_smp_rmb() + +/* utils */ +#include <rte_common.h> +#define nicvf_min(x, y) RTE_MIN(x, y) + +/* byte order */ +#include <rte_byteorder.h> +#define nicvf_cpu_to_be_64(x) rte_cpu_to_be_64(x) +#define nicvf_be_to_cpu_64(x) rte_be_to_cpu_64(x) + +#define NICVF_BYTE_ORDER RTE_BYTE_ORDER +#define NICVF_BIG_ENDIAN RTE_BIG_ENDIAN +#define NICVF_LITTLE_ENDIAN RTE_LITTLE_ENDIAN + +/* Constants */ +#include <rte_ether.h> +#define NICVF_MAC_ADDR_SIZE ETHER_ADDR_LEN + +#include <rte_io.h> +#define nicvf_addr_write(addr, val) rte_write64_relaxed((val), (void *)(addr)) +#define nicvf_addr_read(addr) rte_read64_relaxed((void *)(addr)) + +/* ARM64 specific functions */ +#if defined(RTE_ARCH_ARM64) +#define nicvf_prefetch_store_keep(_ptr) ({\ + asm volatile("prfm pstl1keep, %a0\n" : : "p" (_ptr)); }) + + +#define NICVF_LOAD_PAIR(reg1, reg2, addr) ({ \ + asm volatile( \ + "ldp %x[x1], %x[x0], [%x[p1]]" \ + : [x1]"=r"(reg1), [x0]"=r"(reg2)\ + : [p1]"r"(addr) \ + ); }) + +#else /* non optimized functions for building on non arm64 arch */ + +#define nicvf_prefetch_store_keep(_ptr) do {} while (0) + +#define NICVF_LOAD_PAIR(reg1, reg2, addr) \ +do { \ + reg1 = nicvf_addr_read((uintptr_t)addr); \ + reg2 = nicvf_addr_read((uintptr_t)addr + 8); \ +} while (0) + +#endif + +#include "nicvf_hw.h" +#include "nicvf_mbox.h" + +#endif /* _THUNDERX_NICVF_H */ diff --git a/src/seastar/dpdk/drivers/net/thunderx/nicvf_ethdev.c b/src/seastar/dpdk/drivers/net/thunderx/nicvf_ethdev.c new file mode 100644 index 00000000..e4910c9b --- /dev/null +++ b/src/seastar/dpdk/drivers/net/thunderx/nicvf_ethdev.c @@ -0,0 +1,2174 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <assert.h> +#include <stdio.h> +#include <stdbool.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> +#include <stdarg.h> +#include <inttypes.h> +#include <netinet/in.h> +#include <sys/queue.h> + +#include <rte_alarm.h> +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_byteorder.h> +#include <rte_common.h> +#include <rte_cycles.h> +#include <rte_debug.h> +#include <rte_dev.h> +#include <rte_eal.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ethdev_pci.h> +#include <rte_interrupts.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_malloc.h> +#include <rte_random.h> +#include <rte_pci.h> +#include <rte_tailq.h> + +#include "base/nicvf_plat.h" + +#include "nicvf_ethdev.h" +#include "nicvf_rxtx.h" +#include "nicvf_svf.h" +#include "nicvf_logs.h" + +static void nicvf_dev_stop(struct rte_eth_dev *dev); +static void nicvf_dev_stop_cleanup(struct rte_eth_dev *dev, bool cleanup); +static void nicvf_vf_stop(struct rte_eth_dev *dev, struct nicvf *nic, + bool cleanup); + +static inline int +nicvf_atomic_write_link_status(struct rte_eth_dev *dev, + struct rte_eth_link *link) +{ + struct rte_eth_link *dst = &dev->data->dev_link; + struct rte_eth_link *src = link; + + if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst, + *(uint64_t *)src) == 0) + return -1; + + return 0; +} + +static inline void +nicvf_set_eth_link_status(struct nicvf *nic, struct rte_eth_link *link) +{ + link->link_status = nic->link_up; + link->link_duplex = ETH_LINK_AUTONEG; + if (nic->duplex == NICVF_HALF_DUPLEX) + link->link_duplex = ETH_LINK_HALF_DUPLEX; + else if (nic->duplex == NICVF_FULL_DUPLEX) + link->link_duplex = ETH_LINK_FULL_DUPLEX; + link->link_speed = nic->speed; + link->link_autoneg = ETH_LINK_SPEED_AUTONEG; +} + +static void +nicvf_interrupt(void *arg) +{ + struct rte_eth_dev *dev = arg; + struct nicvf *nic = nicvf_pmd_priv(dev); + + if (nicvf_reg_poll_interrupts(nic) == NIC_MBOX_MSG_BGX_LINK_CHANGE) { + if (dev->data->dev_conf.intr_conf.lsc) + nicvf_set_eth_link_status(nic, &dev->data->dev_link); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); + } + + rte_eal_alarm_set(NICVF_INTR_POLL_INTERVAL_MS * 1000, + nicvf_interrupt, dev); +} + +static void +nicvf_vf_interrupt(void *arg) +{ + struct nicvf *nic = arg; + + nicvf_reg_poll_interrupts(nic); + + rte_eal_alarm_set(NICVF_INTR_POLL_INTERVAL_MS * 1000, + nicvf_vf_interrupt, nic); +} + +static int +nicvf_periodic_alarm_start(void (fn)(void *), void *arg) +{ + return rte_eal_alarm_set(NICVF_INTR_POLL_INTERVAL_MS * 1000, fn, arg); +} + +static int +nicvf_periodic_alarm_stop(void (fn)(void *), void *arg) +{ + return rte_eal_alarm_cancel(fn, arg); +} + +/* + * Return 0 means link status changed, -1 means not changed + */ +static int +nicvf_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + struct rte_eth_link link; + struct nicvf *nic = nicvf_pmd_priv(dev); + int i; + + PMD_INIT_FUNC_TRACE(); + + if (wait_to_complete) { + /* rte_eth_link_get() might need to wait up to 9 seconds */ + for (i = 0; i < MAX_CHECK_TIME; i++) { + memset(&link, 0, sizeof(link)); + nicvf_set_eth_link_status(nic, &link); + if (link.link_status) + break; + rte_delay_ms(CHECK_INTERVAL); + } + } else { + memset(&link, 0, sizeof(link)); + nicvf_set_eth_link_status(nic, &link); + } + return nicvf_atomic_write_link_status(dev, &link); +} + +static int +nicvf_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) +{ + struct nicvf *nic = nicvf_pmd_priv(dev); + uint32_t buffsz, frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; + size_t i; + + PMD_INIT_FUNC_TRACE(); + + if (frame_size > NIC_HW_MAX_FRS) + return -EINVAL; + + if (frame_size < NIC_HW_MIN_FRS) + return -EINVAL; + + buffsz = dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM; + + /* + * Refuse mtu that requires the support of scattered packets + * when this feature has not been enabled before. + */ + if (!dev->data->scattered_rx && + (frame_size + 2 * VLAN_TAG_SIZE > buffsz)) + return -EINVAL; + + /* check <seg size> * <max_seg> >= max_frame */ + if (dev->data->scattered_rx && + (frame_size + 2 * VLAN_TAG_SIZE > buffsz * NIC_HW_MAX_SEGS)) + return -EINVAL; + + if (frame_size > ETHER_MAX_LEN) + dev->data->dev_conf.rxmode.jumbo_frame = 1; + else + dev->data->dev_conf.rxmode.jumbo_frame = 0; + + if (nicvf_mbox_update_hw_max_frs(nic, frame_size)) + return -EINVAL; + + /* Update max frame size */ + dev->data->dev_conf.rxmode.max_rx_pkt_len = (uint32_t)frame_size; + nic->mtu = mtu; + + for (i = 0; i < nic->sqs_count; i++) + nic->snicvf[i]->mtu = mtu; + + return 0; +} + +static int +nicvf_dev_get_regs(struct rte_eth_dev *dev, struct rte_dev_reg_info *regs) +{ + uint64_t *data = regs->data; + struct nicvf *nic = nicvf_pmd_priv(dev); + + if (data == NULL) { + regs->length = nicvf_reg_get_count(); + regs->width = THUNDERX_REG_BYTES; + return 0; + } + + /* Support only full register dump */ + if ((regs->length == 0) || + (regs->length == (uint32_t)nicvf_reg_get_count())) { + regs->version = nic->vendor_id << 16 | nic->device_id; + nicvf_reg_dump(nic, data); + return 0; + } + return -ENOTSUP; +} + +static void +nicvf_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) +{ + uint16_t qidx; + struct nicvf_hw_rx_qstats rx_qstats; + struct nicvf_hw_tx_qstats tx_qstats; + struct nicvf_hw_stats port_stats; + struct nicvf *nic = nicvf_pmd_priv(dev); + uint16_t rx_start, rx_end; + uint16_t tx_start, tx_end; + size_t i; + + /* RX queue indices for the first VF */ + nicvf_rx_range(dev, nic, &rx_start, &rx_end); + + /* Reading per RX ring stats */ + for (qidx = rx_start; qidx <= rx_end; qidx++) { + if (qidx >= RTE_ETHDEV_QUEUE_STAT_CNTRS) + break; + + nicvf_hw_get_rx_qstats(nic, &rx_qstats, qidx); + stats->q_ibytes[qidx] = rx_qstats.q_rx_bytes; + stats->q_ipackets[qidx] = rx_qstats.q_rx_packets; + } + + /* TX queue indices for the first VF */ + nicvf_tx_range(dev, nic, &tx_start, &tx_end); + + /* Reading per TX ring stats */ + for (qidx = tx_start; qidx <= tx_end; qidx++) { + if (qidx >= RTE_ETHDEV_QUEUE_STAT_CNTRS) + break; + + nicvf_hw_get_tx_qstats(nic, &tx_qstats, qidx); + stats->q_obytes[qidx] = tx_qstats.q_tx_bytes; + stats->q_opackets[qidx] = tx_qstats.q_tx_packets; + } + + for (i = 0; i < nic->sqs_count; i++) { + struct nicvf *snic = nic->snicvf[i]; + + if (snic == NULL) + break; + + /* RX queue indices for a secondary VF */ + nicvf_rx_range(dev, snic, &rx_start, &rx_end); + + /* Reading per RX ring stats */ + for (qidx = rx_start; qidx <= rx_end; qidx++) { + if (qidx >= RTE_ETHDEV_QUEUE_STAT_CNTRS) + break; + + nicvf_hw_get_rx_qstats(snic, &rx_qstats, + qidx % MAX_RCV_QUEUES_PER_QS); + stats->q_ibytes[qidx] = rx_qstats.q_rx_bytes; + stats->q_ipackets[qidx] = rx_qstats.q_rx_packets; + } + + /* TX queue indices for a secondary VF */ + nicvf_tx_range(dev, snic, &tx_start, &tx_end); + /* Reading per TX ring stats */ + for (qidx = tx_start; qidx <= tx_end; qidx++) { + if (qidx >= RTE_ETHDEV_QUEUE_STAT_CNTRS) + break; + + nicvf_hw_get_tx_qstats(snic, &tx_qstats, + qidx % MAX_SND_QUEUES_PER_QS); + stats->q_obytes[qidx] = tx_qstats.q_tx_bytes; + stats->q_opackets[qidx] = tx_qstats.q_tx_packets; + } + } + + nicvf_hw_get_stats(nic, &port_stats); + stats->ibytes = port_stats.rx_bytes; + stats->ipackets = port_stats.rx_ucast_frames; + stats->ipackets += port_stats.rx_bcast_frames; + stats->ipackets += port_stats.rx_mcast_frames; + stats->ierrors = port_stats.rx_l2_errors; + stats->imissed = port_stats.rx_drop_red; + stats->imissed += port_stats.rx_drop_overrun; + stats->imissed += port_stats.rx_drop_bcast; + stats->imissed += port_stats.rx_drop_mcast; + stats->imissed += port_stats.rx_drop_l3_bcast; + stats->imissed += port_stats.rx_drop_l3_mcast; + + stats->obytes = port_stats.tx_bytes_ok; + stats->opackets = port_stats.tx_ucast_frames_ok; + stats->opackets += port_stats.tx_bcast_frames_ok; + stats->opackets += port_stats.tx_mcast_frames_ok; + stats->oerrors = port_stats.tx_drops; +} + +static const uint32_t * +nicvf_dev_supported_ptypes_get(struct rte_eth_dev *dev) +{ + size_t copied; + static uint32_t ptypes[32]; + struct nicvf *nic = nicvf_pmd_priv(dev); + static const uint32_t ptypes_common[] = { + RTE_PTYPE_L3_IPV4, + RTE_PTYPE_L3_IPV4_EXT, + RTE_PTYPE_L3_IPV6, + RTE_PTYPE_L3_IPV6_EXT, + RTE_PTYPE_L4_TCP, + RTE_PTYPE_L4_UDP, + RTE_PTYPE_L4_FRAG, + }; + static const uint32_t ptypes_tunnel[] = { + RTE_PTYPE_TUNNEL_GRE, + RTE_PTYPE_TUNNEL_GENEVE, + RTE_PTYPE_TUNNEL_VXLAN, + RTE_PTYPE_TUNNEL_NVGRE, + }; + static const uint32_t ptypes_end = RTE_PTYPE_UNKNOWN; + + copied = sizeof(ptypes_common); + memcpy(ptypes, ptypes_common, copied); + if (nicvf_hw_cap(nic) & NICVF_CAP_TUNNEL_PARSING) { + memcpy((char *)ptypes + copied, ptypes_tunnel, + sizeof(ptypes_tunnel)); + copied += sizeof(ptypes_tunnel); + } + + memcpy((char *)ptypes + copied, &ptypes_end, sizeof(ptypes_end)); + if (dev->rx_pkt_burst == nicvf_recv_pkts || + dev->rx_pkt_burst == nicvf_recv_pkts_multiseg) + return ptypes; + + return NULL; +} + +static void +nicvf_dev_stats_reset(struct rte_eth_dev *dev) +{ + int i; + uint16_t rxqs = 0, txqs = 0; + struct nicvf *nic = nicvf_pmd_priv(dev); + uint16_t rx_start, rx_end; + uint16_t tx_start, tx_end; + + /* Reset all primary nic counters */ + nicvf_rx_range(dev, nic, &rx_start, &rx_end); + for (i = rx_start; i <= rx_end; i++) + rxqs |= (0x3 << (i * 2)); + + nicvf_tx_range(dev, nic, &tx_start, &tx_end); + for (i = tx_start; i <= tx_end; i++) + txqs |= (0x3 << (i * 2)); + + nicvf_mbox_reset_stat_counters(nic, 0x3FFF, 0x1F, rxqs, txqs); + + /* Reset secondary nic queue counters */ + for (i = 0; i < nic->sqs_count; i++) { + struct nicvf *snic = nic->snicvf[i]; + if (snic == NULL) + break; + + nicvf_rx_range(dev, snic, &rx_start, &rx_end); + for (i = rx_start; i <= rx_end; i++) + rxqs |= (0x3 << ((i % MAX_CMP_QUEUES_PER_QS) * 2)); + + nicvf_tx_range(dev, snic, &tx_start, &tx_end); + for (i = tx_start; i <= tx_end; i++) + txqs |= (0x3 << ((i % MAX_SND_QUEUES_PER_QS) * 2)); + + nicvf_mbox_reset_stat_counters(snic, 0, 0, rxqs, txqs); + } +} + +/* Promiscuous mode enabled by default in LMAC to VF 1:1 map configuration */ +static void +nicvf_dev_promisc_enable(struct rte_eth_dev *dev __rte_unused) +{ +} + +static inline uint64_t +nicvf_rss_ethdev_to_nic(struct nicvf *nic, uint64_t ethdev_rss) +{ + uint64_t nic_rss = 0; + + if (ethdev_rss & ETH_RSS_IPV4) + nic_rss |= RSS_IP_ENA; + + if (ethdev_rss & ETH_RSS_IPV6) + nic_rss |= RSS_IP_ENA; + + if (ethdev_rss & ETH_RSS_NONFRAG_IPV4_UDP) + nic_rss |= (RSS_IP_ENA | RSS_UDP_ENA); + + if (ethdev_rss & ETH_RSS_NONFRAG_IPV4_TCP) + nic_rss |= (RSS_IP_ENA | RSS_TCP_ENA); + + if (ethdev_rss & ETH_RSS_NONFRAG_IPV6_UDP) + nic_rss |= (RSS_IP_ENA | RSS_UDP_ENA); + + if (ethdev_rss & ETH_RSS_NONFRAG_IPV6_TCP) + nic_rss |= (RSS_IP_ENA | RSS_TCP_ENA); + + if (ethdev_rss & ETH_RSS_PORT) + nic_rss |= RSS_L2_EXTENDED_HASH_ENA; + + if (nicvf_hw_cap(nic) & NICVF_CAP_TUNNEL_PARSING) { + if (ethdev_rss & ETH_RSS_VXLAN) + nic_rss |= RSS_TUN_VXLAN_ENA; + + if (ethdev_rss & ETH_RSS_GENEVE) + nic_rss |= RSS_TUN_GENEVE_ENA; + + if (ethdev_rss & ETH_RSS_NVGRE) + nic_rss |= RSS_TUN_NVGRE_ENA; + } + + return nic_rss; +} + +static inline uint64_t +nicvf_rss_nic_to_ethdev(struct nicvf *nic, uint64_t nic_rss) +{ + uint64_t ethdev_rss = 0; + + if (nic_rss & RSS_IP_ENA) + ethdev_rss |= (ETH_RSS_IPV4 | ETH_RSS_IPV6); + + if ((nic_rss & RSS_IP_ENA) && (nic_rss & RSS_TCP_ENA)) + ethdev_rss |= (ETH_RSS_NONFRAG_IPV4_TCP | + ETH_RSS_NONFRAG_IPV6_TCP); + + if ((nic_rss & RSS_IP_ENA) && (nic_rss & RSS_UDP_ENA)) + ethdev_rss |= (ETH_RSS_NONFRAG_IPV4_UDP | + ETH_RSS_NONFRAG_IPV6_UDP); + + if (nic_rss & RSS_L2_EXTENDED_HASH_ENA) + ethdev_rss |= ETH_RSS_PORT; + + if (nicvf_hw_cap(nic) & NICVF_CAP_TUNNEL_PARSING) { + if (nic_rss & RSS_TUN_VXLAN_ENA) + ethdev_rss |= ETH_RSS_VXLAN; + + if (nic_rss & RSS_TUN_GENEVE_ENA) + ethdev_rss |= ETH_RSS_GENEVE; + + if (nic_rss & RSS_TUN_NVGRE_ENA) + ethdev_rss |= ETH_RSS_NVGRE; + } + return ethdev_rss; +} + +static int +nicvf_dev_reta_query(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + struct nicvf *nic = nicvf_pmd_priv(dev); + uint8_t tbl[NIC_MAX_RSS_IDR_TBL_SIZE]; + int ret, i, j; + + if (reta_size != NIC_MAX_RSS_IDR_TBL_SIZE) { + RTE_LOG(ERR, PMD, "The size of hash lookup table configured " + "(%d) doesn't match the number hardware can supported " + "(%d)", reta_size, NIC_MAX_RSS_IDR_TBL_SIZE); + return -EINVAL; + } + + ret = nicvf_rss_reta_query(nic, tbl, NIC_MAX_RSS_IDR_TBL_SIZE); + if (ret) + return ret; + + /* Copy RETA table */ + for (i = 0; i < (NIC_MAX_RSS_IDR_TBL_SIZE / RTE_RETA_GROUP_SIZE); i++) { + for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) + if ((reta_conf[i].mask >> j) & 0x01) + reta_conf[i].reta[j] = tbl[j]; + } + + return 0; +} + +static int +nicvf_dev_reta_update(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + struct nicvf *nic = nicvf_pmd_priv(dev); + uint8_t tbl[NIC_MAX_RSS_IDR_TBL_SIZE]; + int ret, i, j; + + if (reta_size != NIC_MAX_RSS_IDR_TBL_SIZE) { + RTE_LOG(ERR, PMD, "The size of hash lookup table configured " + "(%d) doesn't match the number hardware can supported " + "(%d)", reta_size, NIC_MAX_RSS_IDR_TBL_SIZE); + return -EINVAL; + } + + ret = nicvf_rss_reta_query(nic, tbl, NIC_MAX_RSS_IDR_TBL_SIZE); + if (ret) + return ret; + + /* Copy RETA table */ + for (i = 0; i < (NIC_MAX_RSS_IDR_TBL_SIZE / RTE_RETA_GROUP_SIZE); i++) { + for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) + if ((reta_conf[i].mask >> j) & 0x01) + tbl[j] = reta_conf[i].reta[j]; + } + + return nicvf_rss_reta_update(nic, tbl, NIC_MAX_RSS_IDR_TBL_SIZE); +} + +static int +nicvf_dev_rss_hash_conf_get(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf) +{ + struct nicvf *nic = nicvf_pmd_priv(dev); + + if (rss_conf->rss_key) + nicvf_rss_get_key(nic, rss_conf->rss_key); + + rss_conf->rss_key_len = RSS_HASH_KEY_BYTE_SIZE; + rss_conf->rss_hf = nicvf_rss_nic_to_ethdev(nic, nicvf_rss_get_cfg(nic)); + return 0; +} + +static int +nicvf_dev_rss_hash_update(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf) +{ + struct nicvf *nic = nicvf_pmd_priv(dev); + uint64_t nic_rss; + + if (rss_conf->rss_key && + rss_conf->rss_key_len != RSS_HASH_KEY_BYTE_SIZE) { + RTE_LOG(ERR, PMD, "Hash key size mismatch %d", + rss_conf->rss_key_len); + return -EINVAL; + } + + if (rss_conf->rss_key) + nicvf_rss_set_key(nic, rss_conf->rss_key); + + nic_rss = nicvf_rss_ethdev_to_nic(nic, rss_conf->rss_hf); + nicvf_rss_set_cfg(nic, nic_rss); + return 0; +} + +static int +nicvf_qset_cq_alloc(struct rte_eth_dev *dev, struct nicvf *nic, + struct nicvf_rxq *rxq, uint16_t qidx, uint32_t desc_cnt) +{ + const struct rte_memzone *rz; + uint32_t ring_size = CMP_QUEUE_SZ_MAX * sizeof(union cq_entry_t); + + rz = rte_eth_dma_zone_reserve(dev, "cq_ring", + nicvf_netdev_qidx(nic, qidx), ring_size, + NICVF_CQ_BASE_ALIGN_BYTES, nic->node); + if (rz == NULL) { + PMD_INIT_LOG(ERR, "Failed to allocate mem for cq hw ring"); + return -ENOMEM; + } + + memset(rz->addr, 0, ring_size); + + rxq->phys = rz->phys_addr; + rxq->desc = rz->addr; + rxq->qlen_mask = desc_cnt - 1; + + return 0; +} + +static int +nicvf_qset_sq_alloc(struct rte_eth_dev *dev, struct nicvf *nic, + struct nicvf_txq *sq, uint16_t qidx, uint32_t desc_cnt) +{ + const struct rte_memzone *rz; + uint32_t ring_size = SND_QUEUE_SZ_MAX * sizeof(union sq_entry_t); + + rz = rte_eth_dma_zone_reserve(dev, "sq", + nicvf_netdev_qidx(nic, qidx), ring_size, + NICVF_SQ_BASE_ALIGN_BYTES, nic->node); + if (rz == NULL) { + PMD_INIT_LOG(ERR, "Failed allocate mem for sq hw ring"); + return -ENOMEM; + } + + memset(rz->addr, 0, ring_size); + + sq->phys = rz->phys_addr; + sq->desc = rz->addr; + sq->qlen_mask = desc_cnt - 1; + + return 0; +} + +static int +nicvf_qset_rbdr_alloc(struct rte_eth_dev *dev, struct nicvf *nic, + uint32_t desc_cnt, uint32_t buffsz) +{ + struct nicvf_rbdr *rbdr; + const struct rte_memzone *rz; + uint32_t ring_size; + + assert(nic->rbdr == NULL); + rbdr = rte_zmalloc_socket("rbdr", sizeof(struct nicvf_rbdr), + RTE_CACHE_LINE_SIZE, nic->node); + if (rbdr == NULL) { + PMD_INIT_LOG(ERR, "Failed to allocate mem for rbdr"); + return -ENOMEM; + } + + ring_size = sizeof(struct rbdr_entry_t) * RBDR_QUEUE_SZ_MAX; + rz = rte_eth_dma_zone_reserve(dev, "rbdr", + nicvf_netdev_qidx(nic, 0), ring_size, + NICVF_RBDR_BASE_ALIGN_BYTES, nic->node); + if (rz == NULL) { + PMD_INIT_LOG(ERR, "Failed to allocate mem for rbdr desc ring"); + return -ENOMEM; + } + + memset(rz->addr, 0, ring_size); + + rbdr->phys = rz->phys_addr; + rbdr->tail = 0; + rbdr->next_tail = 0; + rbdr->desc = rz->addr; + rbdr->buffsz = buffsz; + rbdr->qlen_mask = desc_cnt - 1; + rbdr->rbdr_status = + nicvf_qset_base(nic, 0) + NIC_QSET_RBDR_0_1_STATUS0; + rbdr->rbdr_door = + nicvf_qset_base(nic, 0) + NIC_QSET_RBDR_0_1_DOOR; + + nic->rbdr = rbdr; + return 0; +} + +static void +nicvf_rbdr_release_mbuf(struct rte_eth_dev *dev, struct nicvf *nic, + nicvf_phys_addr_t phy) +{ + uint16_t qidx; + void *obj; + struct nicvf_rxq *rxq; + uint16_t rx_start, rx_end; + + /* Get queue ranges for this VF */ + nicvf_rx_range(dev, nic, &rx_start, &rx_end); + + for (qidx = rx_start; qidx <= rx_end; qidx++) { + rxq = dev->data->rx_queues[qidx]; + if (rxq->precharge_cnt) { + obj = (void *)nicvf_mbuff_phy2virt(phy, + rxq->mbuf_phys_off); + rte_mempool_put(rxq->pool, obj); + rxq->precharge_cnt--; + break; + } + } +} + +static inline void +nicvf_rbdr_release_mbufs(struct rte_eth_dev *dev, struct nicvf *nic) +{ + uint32_t qlen_mask, head; + struct rbdr_entry_t *entry; + struct nicvf_rbdr *rbdr = nic->rbdr; + + qlen_mask = rbdr->qlen_mask; + head = rbdr->head; + while (head != rbdr->tail) { + entry = rbdr->desc + head; + nicvf_rbdr_release_mbuf(dev, nic, entry->full_addr); + head++; + head = head & qlen_mask; + } +} + +static inline void +nicvf_tx_queue_release_mbufs(struct nicvf_txq *txq) +{ + uint32_t head; + + head = txq->head; + while (head != txq->tail) { + if (txq->txbuffs[head]) { + rte_pktmbuf_free_seg(txq->txbuffs[head]); + txq->txbuffs[head] = NULL; + } + head++; + head = head & txq->qlen_mask; + } +} + +static void +nicvf_tx_queue_reset(struct nicvf_txq *txq) +{ + uint32_t txq_desc_cnt = txq->qlen_mask + 1; + + memset(txq->desc, 0, sizeof(union sq_entry_t) * txq_desc_cnt); + memset(txq->txbuffs, 0, sizeof(struct rte_mbuf *) * txq_desc_cnt); + txq->tail = 0; + txq->head = 0; + txq->xmit_bufs = 0; +} + +static inline int +nicvf_vf_start_tx_queue(struct rte_eth_dev *dev, struct nicvf *nic, + uint16_t qidx) +{ + struct nicvf_txq *txq; + int ret; + + assert(qidx < MAX_SND_QUEUES_PER_QS); + + if (dev->data->tx_queue_state[nicvf_netdev_qidx(nic, qidx)] == + RTE_ETH_QUEUE_STATE_STARTED) + return 0; + + txq = dev->data->tx_queues[nicvf_netdev_qidx(nic, qidx)]; + txq->pool = NULL; + ret = nicvf_qset_sq_config(nic, qidx, txq); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to configure sq VF%d %d %d", + nic->vf_id, qidx, ret); + goto config_sq_error; + } + + dev->data->tx_queue_state[nicvf_netdev_qidx(nic, qidx)] = + RTE_ETH_QUEUE_STATE_STARTED; + return ret; + +config_sq_error: + nicvf_qset_sq_reclaim(nic, qidx); + return ret; +} + +static inline int +nicvf_vf_stop_tx_queue(struct rte_eth_dev *dev, struct nicvf *nic, + uint16_t qidx) +{ + struct nicvf_txq *txq; + int ret; + + assert(qidx < MAX_SND_QUEUES_PER_QS); + + if (dev->data->tx_queue_state[nicvf_netdev_qidx(nic, qidx)] == + RTE_ETH_QUEUE_STATE_STOPPED) + return 0; + + ret = nicvf_qset_sq_reclaim(nic, qidx); + if (ret) + PMD_INIT_LOG(ERR, "Failed to reclaim sq VF%d %d %d", + nic->vf_id, qidx, ret); + + txq = dev->data->tx_queues[nicvf_netdev_qidx(nic, qidx)]; + nicvf_tx_queue_release_mbufs(txq); + nicvf_tx_queue_reset(txq); + + dev->data->tx_queue_state[nicvf_netdev_qidx(nic, qidx)] = + RTE_ETH_QUEUE_STATE_STOPPED; + return ret; +} + +static inline int +nicvf_configure_cpi(struct rte_eth_dev *dev) +{ + struct nicvf *nic = nicvf_pmd_priv(dev); + uint16_t qidx, qcnt; + int ret; + + /* Count started rx queues */ + for (qidx = qcnt = 0; qidx < dev->data->nb_rx_queues; qidx++) + if (dev->data->rx_queue_state[qidx] == + RTE_ETH_QUEUE_STATE_STARTED) + qcnt++; + + nic->cpi_alg = CPI_ALG_NONE; + ret = nicvf_mbox_config_cpi(nic, qcnt); + if (ret) + PMD_INIT_LOG(ERR, "Failed to configure CPI %d", ret); + + return ret; +} + +static inline int +nicvf_configure_rss(struct rte_eth_dev *dev) +{ + struct nicvf *nic = nicvf_pmd_priv(dev); + uint64_t rsshf; + int ret = -EINVAL; + + rsshf = nicvf_rss_ethdev_to_nic(nic, + dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf); + PMD_DRV_LOG(INFO, "mode=%d rx_queues=%d loopback=%d rsshf=0x%" PRIx64, + dev->data->dev_conf.rxmode.mq_mode, + dev->data->nb_rx_queues, + dev->data->dev_conf.lpbk_mode, rsshf); + + if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_NONE) + ret = nicvf_rss_term(nic); + else if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_RSS) + ret = nicvf_rss_config(nic, dev->data->nb_rx_queues, rsshf); + if (ret) + PMD_INIT_LOG(ERR, "Failed to configure RSS %d", ret); + + return ret; +} + +static int +nicvf_configure_rss_reta(struct rte_eth_dev *dev) +{ + struct nicvf *nic = nicvf_pmd_priv(dev); + unsigned int idx, qmap_size; + uint8_t qmap[RTE_MAX_QUEUES_PER_PORT]; + uint8_t default_reta[NIC_MAX_RSS_IDR_TBL_SIZE]; + + if (nic->cpi_alg != CPI_ALG_NONE) + return -EINVAL; + + /* Prepare queue map */ + for (idx = 0, qmap_size = 0; idx < dev->data->nb_rx_queues; idx++) { + if (dev->data->rx_queue_state[idx] == + RTE_ETH_QUEUE_STATE_STARTED) + qmap[qmap_size++] = idx; + } + + /* Update default RSS RETA */ + for (idx = 0; idx < NIC_MAX_RSS_IDR_TBL_SIZE; idx++) + default_reta[idx] = qmap[idx % qmap_size]; + + return nicvf_rss_reta_update(nic, default_reta, + NIC_MAX_RSS_IDR_TBL_SIZE); +} + +static void +nicvf_dev_tx_queue_release(void *sq) +{ + struct nicvf_txq *txq; + + PMD_INIT_FUNC_TRACE(); + + txq = (struct nicvf_txq *)sq; + if (txq) { + if (txq->txbuffs != NULL) { + nicvf_tx_queue_release_mbufs(txq); + rte_free(txq->txbuffs); + txq->txbuffs = NULL; + } + rte_free(txq); + } +} + +static void +nicvf_set_tx_function(struct rte_eth_dev *dev) +{ + struct nicvf_txq *txq; + size_t i; + bool multiseg = false; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + txq = dev->data->tx_queues[i]; + if ((txq->txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS) == 0) { + multiseg = true; + break; + } + } + + /* Use a simple Tx queue (no offloads, no multi segs) if possible */ + if (multiseg) { + PMD_DRV_LOG(DEBUG, "Using multi-segment tx callback"); + dev->tx_pkt_burst = nicvf_xmit_pkts_multiseg; + } else { + PMD_DRV_LOG(DEBUG, "Using single-segment tx callback"); + dev->tx_pkt_burst = nicvf_xmit_pkts; + } + + if (txq->pool_free == nicvf_single_pool_free_xmited_buffers) + PMD_DRV_LOG(DEBUG, "Using single-mempool tx free method"); + else + PMD_DRV_LOG(DEBUG, "Using multi-mempool tx free method"); +} + +static void +nicvf_set_rx_function(struct rte_eth_dev *dev) +{ + if (dev->data->scattered_rx) { + PMD_DRV_LOG(DEBUG, "Using multi-segment rx callback"); + dev->rx_pkt_burst = nicvf_recv_pkts_multiseg; + } else { + PMD_DRV_LOG(DEBUG, "Using single-segment rx callback"); + dev->rx_pkt_burst = nicvf_recv_pkts; + } +} + +static int +nicvf_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t qidx, + uint16_t nb_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) +{ + uint16_t tx_free_thresh; + uint8_t is_single_pool; + struct nicvf_txq *txq; + struct nicvf *nic = nicvf_pmd_priv(dev); + + PMD_INIT_FUNC_TRACE(); + + if (qidx >= MAX_SND_QUEUES_PER_QS) + nic = nic->snicvf[qidx / MAX_SND_QUEUES_PER_QS - 1]; + + qidx = qidx % MAX_SND_QUEUES_PER_QS; + + /* Socket id check */ + if (socket_id != (unsigned int)SOCKET_ID_ANY && socket_id != nic->node) + PMD_DRV_LOG(WARNING, "socket_id expected %d, configured %d", + socket_id, nic->node); + + /* Tx deferred start is not supported */ + if (tx_conf->tx_deferred_start) { + PMD_INIT_LOG(ERR, "Tx deferred start not supported"); + return -EINVAL; + } + + /* Roundup nb_desc to available qsize and validate max number of desc */ + nb_desc = nicvf_qsize_sq_roundup(nb_desc); + if (nb_desc == 0) { + PMD_INIT_LOG(ERR, "Value of nb_desc beyond available sq qsize"); + return -EINVAL; + } + + /* Validate tx_free_thresh */ + tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ? + tx_conf->tx_free_thresh : + NICVF_DEFAULT_TX_FREE_THRESH); + + if (tx_free_thresh > (nb_desc) || + tx_free_thresh > NICVF_MAX_TX_FREE_THRESH) { + PMD_INIT_LOG(ERR, + "tx_free_thresh must be less than the number of TX " + "descriptors. (tx_free_thresh=%u port=%d " + "queue=%d)", (unsigned int)tx_free_thresh, + (int)dev->data->port_id, (int)qidx); + return -EINVAL; + } + + /* Free memory prior to re-allocation if needed. */ + if (dev->data->tx_queues[nicvf_netdev_qidx(nic, qidx)] != NULL) { + PMD_TX_LOG(DEBUG, "Freeing memory prior to re-allocation %d", + nicvf_netdev_qidx(nic, qidx)); + nicvf_dev_tx_queue_release( + dev->data->tx_queues[nicvf_netdev_qidx(nic, qidx)]); + dev->data->tx_queues[nicvf_netdev_qidx(nic, qidx)] = NULL; + } + + /* Allocating tx queue data structure */ + txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct nicvf_txq), + RTE_CACHE_LINE_SIZE, nic->node); + if (txq == NULL) { + PMD_INIT_LOG(ERR, "Failed to allocate txq=%d", + nicvf_netdev_qidx(nic, qidx)); + return -ENOMEM; + } + + txq->nic = nic; + txq->queue_id = qidx; + txq->tx_free_thresh = tx_free_thresh; + txq->txq_flags = tx_conf->txq_flags; + txq->sq_head = nicvf_qset_base(nic, qidx) + NIC_QSET_SQ_0_7_HEAD; + txq->sq_door = nicvf_qset_base(nic, qidx) + NIC_QSET_SQ_0_7_DOOR; + is_single_pool = (txq->txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT && + txq->txq_flags & ETH_TXQ_FLAGS_NOMULTMEMP); + + /* Choose optimum free threshold value for multipool case */ + if (!is_single_pool) { + txq->tx_free_thresh = (uint16_t) + (tx_conf->tx_free_thresh == NICVF_DEFAULT_TX_FREE_THRESH ? + NICVF_TX_FREE_MPOOL_THRESH : + tx_conf->tx_free_thresh); + txq->pool_free = nicvf_multi_pool_free_xmited_buffers; + } else { + txq->pool_free = nicvf_single_pool_free_xmited_buffers; + } + + /* Allocate software ring */ + txq->txbuffs = rte_zmalloc_socket("txq->txbuffs", + nb_desc * sizeof(struct rte_mbuf *), + RTE_CACHE_LINE_SIZE, nic->node); + + if (txq->txbuffs == NULL) { + nicvf_dev_tx_queue_release(txq); + return -ENOMEM; + } + + if (nicvf_qset_sq_alloc(dev, nic, txq, qidx, nb_desc)) { + PMD_INIT_LOG(ERR, "Failed to allocate mem for sq %d", qidx); + nicvf_dev_tx_queue_release(txq); + return -ENOMEM; + } + + nicvf_tx_queue_reset(txq); + + PMD_TX_LOG(DEBUG, "[%d] txq=%p nb_desc=%d desc=%p phys=0x%" PRIx64, + nicvf_netdev_qidx(nic, qidx), txq, nb_desc, txq->desc, + txq->phys); + + dev->data->tx_queues[nicvf_netdev_qidx(nic, qidx)] = txq; + dev->data->tx_queue_state[nicvf_netdev_qidx(nic, qidx)] = + RTE_ETH_QUEUE_STATE_STOPPED; + return 0; +} + +static inline void +nicvf_rx_queue_release_mbufs(struct rte_eth_dev *dev, struct nicvf_rxq *rxq) +{ + uint32_t rxq_cnt; + uint32_t nb_pkts, released_pkts = 0; + uint32_t refill_cnt = 0; + struct rte_mbuf *rx_pkts[NICVF_MAX_RX_FREE_THRESH]; + + if (dev->rx_pkt_burst == NULL) + return; + + while ((rxq_cnt = nicvf_dev_rx_queue_count(dev, + nicvf_netdev_qidx(rxq->nic, rxq->queue_id)))) { + nb_pkts = dev->rx_pkt_burst(rxq, rx_pkts, + NICVF_MAX_RX_FREE_THRESH); + PMD_DRV_LOG(INFO, "nb_pkts=%d rxq_cnt=%d", nb_pkts, rxq_cnt); + while (nb_pkts) { + rte_pktmbuf_free_seg(rx_pkts[--nb_pkts]); + released_pkts++; + } + } + + + refill_cnt += nicvf_dev_rbdr_refill(dev, + nicvf_netdev_qidx(rxq->nic, rxq->queue_id)); + + PMD_DRV_LOG(INFO, "free_cnt=%d refill_cnt=%d", + released_pkts, refill_cnt); +} + +static void +nicvf_rx_queue_reset(struct nicvf_rxq *rxq) +{ + rxq->head = 0; + rxq->available_space = 0; + rxq->recv_buffers = 0; +} + +static inline int +nicvf_vf_start_rx_queue(struct rte_eth_dev *dev, struct nicvf *nic, + uint16_t qidx) +{ + struct nicvf_rxq *rxq; + int ret; + + assert(qidx < MAX_RCV_QUEUES_PER_QS); + + if (dev->data->rx_queue_state[nicvf_netdev_qidx(nic, qidx)] == + RTE_ETH_QUEUE_STATE_STARTED) + return 0; + + /* Update rbdr pointer to all rxq */ + rxq = dev->data->rx_queues[nicvf_netdev_qidx(nic, qidx)]; + rxq->shared_rbdr = nic->rbdr; + + ret = nicvf_qset_rq_config(nic, qidx, rxq); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to configure rq VF%d %d %d", + nic->vf_id, qidx, ret); + goto config_rq_error; + } + ret = nicvf_qset_cq_config(nic, qidx, rxq); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to configure cq VF%d %d %d", + nic->vf_id, qidx, ret); + goto config_cq_error; + } + + dev->data->rx_queue_state[nicvf_netdev_qidx(nic, qidx)] = + RTE_ETH_QUEUE_STATE_STARTED; + return 0; + +config_cq_error: + nicvf_qset_cq_reclaim(nic, qidx); +config_rq_error: + nicvf_qset_rq_reclaim(nic, qidx); + return ret; +} + +static inline int +nicvf_vf_stop_rx_queue(struct rte_eth_dev *dev, struct nicvf *nic, + uint16_t qidx) +{ + struct nicvf_rxq *rxq; + int ret, other_error; + + if (dev->data->rx_queue_state[nicvf_netdev_qidx(nic, qidx)] == + RTE_ETH_QUEUE_STATE_STOPPED) + return 0; + + ret = nicvf_qset_rq_reclaim(nic, qidx); + if (ret) + PMD_INIT_LOG(ERR, "Failed to reclaim rq VF%d %d %d", + nic->vf_id, qidx, ret); + + other_error = ret; + rxq = dev->data->rx_queues[nicvf_netdev_qidx(nic, qidx)]; + nicvf_rx_queue_release_mbufs(dev, rxq); + nicvf_rx_queue_reset(rxq); + + ret = nicvf_qset_cq_reclaim(nic, qidx); + if (ret) + PMD_INIT_LOG(ERR, "Failed to reclaim cq VF%d %d %d", + nic->vf_id, qidx, ret); + + other_error |= ret; + dev->data->rx_queue_state[nicvf_netdev_qidx(nic, qidx)] = + RTE_ETH_QUEUE_STATE_STOPPED; + return other_error; +} + +static void +nicvf_dev_rx_queue_release(void *rx_queue) +{ + PMD_INIT_FUNC_TRACE(); + + rte_free(rx_queue); +} + +static int +nicvf_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t qidx) +{ + struct nicvf *nic = nicvf_pmd_priv(dev); + int ret; + + if (qidx >= MAX_RCV_QUEUES_PER_QS) + nic = nic->snicvf[(qidx / MAX_RCV_QUEUES_PER_QS - 1)]; + + qidx = qidx % MAX_RCV_QUEUES_PER_QS; + + ret = nicvf_vf_start_rx_queue(dev, nic, qidx); + if (ret) + return ret; + + ret = nicvf_configure_cpi(dev); + if (ret) + return ret; + + return nicvf_configure_rss_reta(dev); +} + +static int +nicvf_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t qidx) +{ + int ret; + struct nicvf *nic = nicvf_pmd_priv(dev); + + if (qidx >= MAX_SND_QUEUES_PER_QS) + nic = nic->snicvf[(qidx / MAX_SND_QUEUES_PER_QS - 1)]; + + qidx = qidx % MAX_RCV_QUEUES_PER_QS; + + ret = nicvf_vf_stop_rx_queue(dev, nic, qidx); + ret |= nicvf_configure_cpi(dev); + ret |= nicvf_configure_rss_reta(dev); + return ret; +} + +static int +nicvf_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t qidx) +{ + struct nicvf *nic = nicvf_pmd_priv(dev); + + if (qidx >= MAX_SND_QUEUES_PER_QS) + nic = nic->snicvf[(qidx / MAX_SND_QUEUES_PER_QS - 1)]; + + qidx = qidx % MAX_SND_QUEUES_PER_QS; + + return nicvf_vf_start_tx_queue(dev, nic, qidx); +} + +static int +nicvf_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t qidx) +{ + struct nicvf *nic = nicvf_pmd_priv(dev); + + if (qidx >= MAX_SND_QUEUES_PER_QS) + nic = nic->snicvf[(qidx / MAX_SND_QUEUES_PER_QS - 1)]; + + qidx = qidx % MAX_SND_QUEUES_PER_QS; + + return nicvf_vf_stop_tx_queue(dev, nic, qidx); +} + +static inline void +nicvf_rxq_mbuf_setup(struct nicvf_rxq *rxq) +{ + uintptr_t p; + struct rte_mbuf mb_def; + + RTE_BUILD_BUG_ON(sizeof(union mbuf_initializer) != 8); + mb_def.nb_segs = 1; + mb_def.data_off = RTE_PKTMBUF_HEADROOM; + mb_def.port = rxq->port_id; + rte_mbuf_refcnt_set(&mb_def, 1); + + /* Prevent compiler reordering: rearm_data covers previous fields */ + rte_compiler_barrier(); + p = (uintptr_t)&mb_def.rearm_data; + rxq->mbuf_initializer.value = *(uint64_t *)p; +} + +static int +nicvf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t qidx, + uint16_t nb_desc, unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp) +{ + uint16_t rx_free_thresh; + struct nicvf_rxq *rxq; + struct nicvf *nic = nicvf_pmd_priv(dev); + + PMD_INIT_FUNC_TRACE(); + + if (qidx >= MAX_RCV_QUEUES_PER_QS) + nic = nic->snicvf[qidx / MAX_RCV_QUEUES_PER_QS - 1]; + + qidx = qidx % MAX_RCV_QUEUES_PER_QS; + + /* Socket id check */ + if (socket_id != (unsigned int)SOCKET_ID_ANY && socket_id != nic->node) + PMD_DRV_LOG(WARNING, "socket_id expected %d, configured %d", + socket_id, nic->node); + + /* Mempool memory must be contiguous, so must be one memory segment*/ + if (mp->nb_mem_chunks != 1) { + PMD_INIT_LOG(ERR, "Non-contiguous mempool, add more huge pages"); + return -EINVAL; + } + + /* Mempool memory must be physically contiguous */ + if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG) { + PMD_INIT_LOG(ERR, "Mempool memory must be physically contiguous"); + return -EINVAL; + } + + /* Rx deferred start is not supported */ + if (rx_conf->rx_deferred_start) { + PMD_INIT_LOG(ERR, "Rx deferred start not supported"); + return -EINVAL; + } + + /* Roundup nb_desc to available qsize and validate max number of desc */ + nb_desc = nicvf_qsize_cq_roundup(nb_desc); + if (nb_desc == 0) { + PMD_INIT_LOG(ERR, "Value nb_desc beyond available hw cq qsize"); + return -EINVAL; + } + + /* Check rx_free_thresh upper bound */ + rx_free_thresh = (uint16_t)((rx_conf->rx_free_thresh) ? + rx_conf->rx_free_thresh : + NICVF_DEFAULT_RX_FREE_THRESH); + if (rx_free_thresh > NICVF_MAX_RX_FREE_THRESH || + rx_free_thresh >= nb_desc * .75) { + PMD_INIT_LOG(ERR, "rx_free_thresh greater than expected %d", + rx_free_thresh); + return -EINVAL; + } + + /* Free memory prior to re-allocation if needed */ + if (dev->data->rx_queues[nicvf_netdev_qidx(nic, qidx)] != NULL) { + PMD_RX_LOG(DEBUG, "Freeing memory prior to re-allocation %d", + nicvf_netdev_qidx(nic, qidx)); + nicvf_dev_rx_queue_release( + dev->data->rx_queues[nicvf_netdev_qidx(nic, qidx)]); + dev->data->rx_queues[nicvf_netdev_qidx(nic, qidx)] = NULL; + } + + /* Allocate rxq memory */ + rxq = rte_zmalloc_socket("ethdev rx queue", sizeof(struct nicvf_rxq), + RTE_CACHE_LINE_SIZE, nic->node); + if (rxq == NULL) { + PMD_INIT_LOG(ERR, "Failed to allocate rxq=%d", + nicvf_netdev_qidx(nic, qidx)); + return -ENOMEM; + } + + rxq->nic = nic; + rxq->pool = mp; + rxq->queue_id = qidx; + rxq->port_id = dev->data->port_id; + rxq->rx_free_thresh = rx_free_thresh; + rxq->rx_drop_en = rx_conf->rx_drop_en; + rxq->cq_status = nicvf_qset_base(nic, qidx) + NIC_QSET_CQ_0_7_STATUS; + rxq->cq_door = nicvf_qset_base(nic, qidx) + NIC_QSET_CQ_0_7_DOOR; + rxq->precharge_cnt = 0; + + if (nicvf_hw_cap(nic) & NICVF_CAP_CQE_RX2) + rxq->rbptr_offset = NICVF_CQE_RX2_RBPTR_WORD; + else + rxq->rbptr_offset = NICVF_CQE_RBPTR_WORD; + + nicvf_rxq_mbuf_setup(rxq); + + /* Alloc completion queue */ + if (nicvf_qset_cq_alloc(dev, nic, rxq, rxq->queue_id, nb_desc)) { + PMD_INIT_LOG(ERR, "failed to allocate cq %u", rxq->queue_id); + nicvf_dev_rx_queue_release(rxq); + return -ENOMEM; + } + + nicvf_rx_queue_reset(rxq); + + PMD_RX_LOG(DEBUG, "[%d] rxq=%p pool=%s nb_desc=(%d/%d) phy=%" PRIx64, + nicvf_netdev_qidx(nic, qidx), rxq, mp->name, nb_desc, + rte_mempool_avail_count(mp), rxq->phys); + + dev->data->rx_queues[nicvf_netdev_qidx(nic, qidx)] = rxq; + dev->data->rx_queue_state[nicvf_netdev_qidx(nic, qidx)] = + RTE_ETH_QUEUE_STATE_STOPPED; + return 0; +} + +static void +nicvf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) +{ + struct nicvf *nic = nicvf_pmd_priv(dev); + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device); + + PMD_INIT_FUNC_TRACE(); + + dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device); + + dev_info->min_rx_bufsize = ETHER_MIN_MTU; + dev_info->max_rx_pktlen = NIC_HW_MAX_FRS; + dev_info->max_rx_queues = + (uint16_t)MAX_RCV_QUEUES_PER_QS * (MAX_SQS_PER_VF + 1); + dev_info->max_tx_queues = + (uint16_t)MAX_SND_QUEUES_PER_QS * (MAX_SQS_PER_VF + 1); + dev_info->max_mac_addrs = 1; + dev_info->max_vfs = pci_dev->max_vfs; + + dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP; + dev_info->tx_offload_capa = + DEV_TX_OFFLOAD_IPV4_CKSUM | + DEV_TX_OFFLOAD_UDP_CKSUM | + DEV_TX_OFFLOAD_TCP_CKSUM | + DEV_TX_OFFLOAD_TCP_TSO | + DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; + + dev_info->reta_size = nic->rss_info.rss_size; + dev_info->hash_key_size = RSS_HASH_KEY_BYTE_SIZE; + dev_info->flow_type_rss_offloads = NICVF_RSS_OFFLOAD_PASS1; + if (nicvf_hw_cap(nic) & NICVF_CAP_TUNNEL_PARSING) + dev_info->flow_type_rss_offloads |= NICVF_RSS_OFFLOAD_TUNNEL; + + dev_info->default_rxconf = (struct rte_eth_rxconf) { + .rx_free_thresh = NICVF_DEFAULT_RX_FREE_THRESH, + .rx_drop_en = 0, + }; + + dev_info->default_txconf = (struct rte_eth_txconf) { + .tx_free_thresh = NICVF_DEFAULT_TX_FREE_THRESH, + .txq_flags = + ETH_TXQ_FLAGS_NOMULTSEGS | + ETH_TXQ_FLAGS_NOREFCOUNT | + ETH_TXQ_FLAGS_NOMULTMEMP | + ETH_TXQ_FLAGS_NOVLANOFFL | + ETH_TXQ_FLAGS_NOXSUMSCTP, + }; +} + +static nicvf_phys_addr_t +rbdr_rte_mempool_get(void *dev, void *opaque) +{ + uint16_t qidx; + uintptr_t mbuf; + struct nicvf_rxq *rxq; + struct rte_eth_dev *eth_dev = (struct rte_eth_dev *)dev; + struct nicvf *nic = (struct nicvf *)opaque; + uint16_t rx_start, rx_end; + + /* Get queue ranges for this VF */ + nicvf_rx_range(eth_dev, nic, &rx_start, &rx_end); + + for (qidx = rx_start; qidx <= rx_end; qidx++) { + rxq = eth_dev->data->rx_queues[qidx]; + /* Maintain equal buffer count across all pools */ + if (rxq->precharge_cnt >= rxq->qlen_mask) + continue; + rxq->precharge_cnt++; + mbuf = (uintptr_t)rte_pktmbuf_alloc(rxq->pool); + if (mbuf) + return nicvf_mbuff_virt2phy(mbuf, rxq->mbuf_phys_off); + } + return 0; +} + +static int +nicvf_vf_start(struct rte_eth_dev *dev, struct nicvf *nic, uint32_t rbdrsz) +{ + int ret; + uint16_t qidx, data_off; + uint32_t total_rxq_desc, nb_rbdr_desc, exp_buffs; + uint64_t mbuf_phys_off = 0; + struct nicvf_rxq *rxq; + struct rte_mbuf *mbuf; + uint16_t rx_start, rx_end; + uint16_t tx_start, tx_end; + + PMD_INIT_FUNC_TRACE(); + + /* Userspace process exited without proper shutdown in last run */ + if (nicvf_qset_rbdr_active(nic, 0)) + nicvf_vf_stop(dev, nic, false); + + /* Get queue ranges for this VF */ + nicvf_rx_range(dev, nic, &rx_start, &rx_end); + + /* + * Thunderx nicvf PMD can support more than one pool per port only when + * 1) Data payload size is same across all the pools in given port + * AND + * 2) All mbuffs in the pools are from the same hugepage + * AND + * 3) Mbuff metadata size is same across all the pools in given port + * + * This is to support existing application that uses multiple pool/port. + * But, the purpose of using multipool for QoS will not be addressed. + * + */ + + /* Validate mempool attributes */ + for (qidx = rx_start; qidx <= rx_end; qidx++) { + rxq = dev->data->rx_queues[qidx]; + rxq->mbuf_phys_off = nicvf_mempool_phy_offset(rxq->pool); + mbuf = rte_pktmbuf_alloc(rxq->pool); + if (mbuf == NULL) { + PMD_INIT_LOG(ERR, "Failed allocate mbuf VF%d qid=%d " + "pool=%s", + nic->vf_id, qidx, rxq->pool->name); + return -ENOMEM; + } + data_off = nicvf_mbuff_meta_length(mbuf); + data_off += RTE_PKTMBUF_HEADROOM; + rte_pktmbuf_free(mbuf); + + if (data_off % RTE_CACHE_LINE_SIZE) { + PMD_INIT_LOG(ERR, "%s: unaligned data_off=%d delta=%d", + rxq->pool->name, data_off, + data_off % RTE_CACHE_LINE_SIZE); + return -EINVAL; + } + rxq->mbuf_phys_off -= data_off; + + if (mbuf_phys_off == 0) + mbuf_phys_off = rxq->mbuf_phys_off; + if (mbuf_phys_off != rxq->mbuf_phys_off) { + PMD_INIT_LOG(ERR, "pool params not same,%s VF%d %" + PRIx64, rxq->pool->name, nic->vf_id, + mbuf_phys_off); + return -EINVAL; + } + } + + /* Check the level of buffers in the pool */ + total_rxq_desc = 0; + for (qidx = rx_start; qidx <= rx_end; qidx++) { + rxq = dev->data->rx_queues[qidx]; + /* Count total numbers of rxq descs */ + total_rxq_desc += rxq->qlen_mask + 1; + exp_buffs = RTE_MEMPOOL_CACHE_MAX_SIZE + rxq->rx_free_thresh; + exp_buffs *= dev->data->nb_rx_queues; + if (rte_mempool_avail_count(rxq->pool) < exp_buffs) { + PMD_INIT_LOG(ERR, "Buff shortage in pool=%s (%d/%d)", + rxq->pool->name, + rte_mempool_avail_count(rxq->pool), + exp_buffs); + return -ENOENT; + } + } + + /* Check RBDR desc overflow */ + ret = nicvf_qsize_rbdr_roundup(total_rxq_desc); + if (ret == 0) { + PMD_INIT_LOG(ERR, "Reached RBDR desc limit, reduce nr desc " + "VF%d", nic->vf_id); + return -ENOMEM; + } + + /* Enable qset */ + ret = nicvf_qset_config(nic); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to enable qset %d VF%d", ret, + nic->vf_id); + return ret; + } + + /* Allocate RBDR and RBDR ring desc */ + nb_rbdr_desc = nicvf_qsize_rbdr_roundup(total_rxq_desc); + ret = nicvf_qset_rbdr_alloc(dev, nic, nb_rbdr_desc, rbdrsz); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to allocate memory for rbdr alloc " + "VF%d", nic->vf_id); + goto qset_reclaim; + } + + /* Enable and configure RBDR registers */ + ret = nicvf_qset_rbdr_config(nic, 0); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to configure rbdr %d VF%d", ret, + nic->vf_id); + goto qset_rbdr_free; + } + + /* Fill rte_mempool buffers in RBDR pool and precharge it */ + ret = nicvf_qset_rbdr_precharge(dev, nic, 0, rbdr_rte_mempool_get, + total_rxq_desc); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to fill rbdr %d VF%d", ret, + nic->vf_id); + goto qset_rbdr_reclaim; + } + + PMD_DRV_LOG(INFO, "Filled %d out of %d entries in RBDR VF%d", + nic->rbdr->tail, nb_rbdr_desc, nic->vf_id); + + /* Configure VLAN Strip */ + nicvf_vlan_hw_strip(nic, dev->data->dev_conf.rxmode.hw_vlan_strip); + + /* Based on the packet type(IPv4 or IPv6), the nicvf HW aligns L3 data + * to the 64bit memory address. + * The alignment creates a hole in mbuf(between the end of headroom and + * packet data start). The new revision of the HW provides an option to + * disable the L3 alignment feature and make mbuf layout looks + * more like other NICs. For better application compatibility, disabling + * l3 alignment feature on the hardware revisions it supports + */ + nicvf_apad_config(nic, false); + + /* Get queue ranges for this VF */ + nicvf_tx_range(dev, nic, &tx_start, &tx_end); + + /* Configure TX queues */ + for (qidx = tx_start; qidx <= tx_end; qidx++) { + ret = nicvf_vf_start_tx_queue(dev, nic, + qidx % MAX_SND_QUEUES_PER_QS); + if (ret) + goto start_txq_error; + } + + /* Configure RX queues */ + for (qidx = rx_start; qidx <= rx_end; qidx++) { + ret = nicvf_vf_start_rx_queue(dev, nic, + qidx % MAX_RCV_QUEUES_PER_QS); + if (ret) + goto start_rxq_error; + } + + if (!nic->sqs_mode) { + /* Configure CPI algorithm */ + ret = nicvf_configure_cpi(dev); + if (ret) + goto start_txq_error; + + ret = nicvf_mbox_get_rss_size(nic); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to get rss table size"); + goto qset_rss_error; + } + + /* Configure RSS */ + ret = nicvf_configure_rss(dev); + if (ret) + goto qset_rss_error; + } + + /* Done; Let PF make the BGX's RX and TX switches to ON position */ + nicvf_mbox_cfg_done(nic); + return 0; + +qset_rss_error: + nicvf_rss_term(nic); +start_rxq_error: + for (qidx = rx_start; qidx <= rx_end; qidx++) + nicvf_vf_stop_rx_queue(dev, nic, qidx % MAX_RCV_QUEUES_PER_QS); +start_txq_error: + for (qidx = tx_start; qidx <= tx_end; qidx++) + nicvf_vf_stop_tx_queue(dev, nic, qidx % MAX_SND_QUEUES_PER_QS); +qset_rbdr_reclaim: + nicvf_qset_rbdr_reclaim(nic, 0); + nicvf_rbdr_release_mbufs(dev, nic); +qset_rbdr_free: + if (nic->rbdr) { + rte_free(nic->rbdr); + nic->rbdr = NULL; + } +qset_reclaim: + nicvf_qset_reclaim(nic); + return ret; +} + +static int +nicvf_dev_start(struct rte_eth_dev *dev) +{ + uint16_t qidx; + int ret; + size_t i; + struct nicvf *nic = nicvf_pmd_priv(dev); + struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode; + uint16_t mtu; + uint32_t buffsz = 0, rbdrsz = 0; + struct rte_pktmbuf_pool_private *mbp_priv; + struct nicvf_rxq *rxq; + + PMD_INIT_FUNC_TRACE(); + + /* This function must be called for a primary device */ + assert_primary(nic); + + /* Validate RBDR buff size */ + for (qidx = 0; qidx < dev->data->nb_rx_queues; qidx++) { + rxq = dev->data->rx_queues[qidx]; + mbp_priv = rte_mempool_get_priv(rxq->pool); + buffsz = mbp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM; + if (buffsz % 128) { + PMD_INIT_LOG(ERR, "rxbuf size must be multiply of 128"); + return -EINVAL; + } + if (rbdrsz == 0) + rbdrsz = buffsz; + if (rbdrsz != buffsz) { + PMD_INIT_LOG(ERR, "buffsz not same, qidx=%d (%d/%d)", + qidx, rbdrsz, buffsz); + return -EINVAL; + } + } + + /* Configure loopback */ + ret = nicvf_loopback_config(nic, dev->data->dev_conf.lpbk_mode); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to configure loopback %d", ret); + return ret; + } + + /* Reset all statistics counters attached to this port */ + ret = nicvf_mbox_reset_stat_counters(nic, 0x3FFF, 0x1F, 0xFFFF, 0xFFFF); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to reset stat counters %d", ret); + return ret; + } + + /* Setup scatter mode if needed by jumbo */ + if (dev->data->dev_conf.rxmode.max_rx_pkt_len + + 2 * VLAN_TAG_SIZE > buffsz) + dev->data->scattered_rx = 1; + if (rx_conf->enable_scatter) + dev->data->scattered_rx = 1; + + /* Setup MTU based on max_rx_pkt_len or default */ + mtu = dev->data->dev_conf.rxmode.jumbo_frame ? + dev->data->dev_conf.rxmode.max_rx_pkt_len + - ETHER_HDR_LEN - ETHER_CRC_LEN + : ETHER_MTU; + + if (nicvf_dev_set_mtu(dev, mtu)) { + PMD_INIT_LOG(ERR, "Failed to set default mtu size"); + return -EBUSY; + } + + ret = nicvf_vf_start(dev, nic, rbdrsz); + if (ret != 0) + return ret; + + for (i = 0; i < nic->sqs_count; i++) { + assert(nic->snicvf[i]); + + ret = nicvf_vf_start(dev, nic->snicvf[i], rbdrsz); + if (ret != 0) + return ret; + } + + /* Configure callbacks based on scatter mode */ + nicvf_set_tx_function(dev); + nicvf_set_rx_function(dev); + + return 0; +} + +static void +nicvf_dev_stop_cleanup(struct rte_eth_dev *dev, bool cleanup) +{ + size_t i; + int ret; + struct nicvf *nic = nicvf_pmd_priv(dev); + + PMD_INIT_FUNC_TRACE(); + + /* Teardown secondary vf first */ + for (i = 0; i < nic->sqs_count; i++) { + if (!nic->snicvf[i]) + continue; + + nicvf_vf_stop(dev, nic->snicvf[i], cleanup); + } + + /* Stop the primary VF now */ + nicvf_vf_stop(dev, nic, cleanup); + + /* Disable loopback */ + ret = nicvf_loopback_config(nic, 0); + if (ret) + PMD_INIT_LOG(ERR, "Failed to disable loopback %d", ret); + + /* Reclaim CPI configuration */ + ret = nicvf_mbox_config_cpi(nic, 0); + if (ret) + PMD_INIT_LOG(ERR, "Failed to reclaim CPI config %d", ret); +} + +static void +nicvf_dev_stop(struct rte_eth_dev *dev) +{ + PMD_INIT_FUNC_TRACE(); + + nicvf_dev_stop_cleanup(dev, false); +} + +static void +nicvf_vf_stop(struct rte_eth_dev *dev, struct nicvf *nic, bool cleanup) +{ + int ret; + uint16_t qidx; + uint16_t tx_start, tx_end; + uint16_t rx_start, rx_end; + + PMD_INIT_FUNC_TRACE(); + + if (cleanup) { + /* Let PF make the BGX's RX and TX switches to OFF position */ + nicvf_mbox_shutdown(nic); + } + + /* Disable VLAN Strip */ + nicvf_vlan_hw_strip(nic, 0); + + /* Get queue ranges for this VF */ + nicvf_tx_range(dev, nic, &tx_start, &tx_end); + + for (qidx = tx_start; qidx <= tx_end; qidx++) + nicvf_vf_stop_tx_queue(dev, nic, qidx % MAX_SND_QUEUES_PER_QS); + + /* Get queue ranges for this VF */ + nicvf_rx_range(dev, nic, &rx_start, &rx_end); + + /* Reclaim rq */ + for (qidx = rx_start; qidx <= rx_end; qidx++) + nicvf_vf_stop_rx_queue(dev, nic, qidx % MAX_RCV_QUEUES_PER_QS); + + /* Reclaim RBDR */ + ret = nicvf_qset_rbdr_reclaim(nic, 0); + if (ret) + PMD_INIT_LOG(ERR, "Failed to reclaim RBDR %d", ret); + + /* Move all charged buffers in RBDR back to pool */ + if (nic->rbdr != NULL) + nicvf_rbdr_release_mbufs(dev, nic); + + /* Disable qset */ + ret = nicvf_qset_reclaim(nic); + if (ret) + PMD_INIT_LOG(ERR, "Failed to disable qset %d", ret); + + /* Disable all interrupts */ + nicvf_disable_all_interrupts(nic); + + /* Free RBDR SW structure */ + if (nic->rbdr) { + rte_free(nic->rbdr); + nic->rbdr = NULL; + } +} + +static void +nicvf_dev_close(struct rte_eth_dev *dev) +{ + size_t i; + struct nicvf *nic = nicvf_pmd_priv(dev); + + PMD_INIT_FUNC_TRACE(); + + nicvf_dev_stop_cleanup(dev, true); + nicvf_periodic_alarm_stop(nicvf_interrupt, dev); + + for (i = 0; i < nic->sqs_count; i++) { + if (!nic->snicvf[i]) + continue; + + nicvf_periodic_alarm_stop(nicvf_vf_interrupt, nic->snicvf[i]); + } +} + +static int +nicvf_request_sqs(struct nicvf *nic) +{ + size_t i; + + assert_primary(nic); + assert(nic->sqs_count > 0); + assert(nic->sqs_count <= MAX_SQS_PER_VF); + + /* Set no of Rx/Tx queues in each of the SQsets */ + for (i = 0; i < nic->sqs_count; i++) { + if (nicvf_svf_empty()) + rte_panic("Cannot assign sufficient number of " + "secondary queues to primary VF%" PRIu8 "\n", + nic->vf_id); + + nic->snicvf[i] = nicvf_svf_pop(); + nic->snicvf[i]->sqs_id = i; + } + + return nicvf_mbox_request_sqs(nic); +} + +static int +nicvf_dev_configure(struct rte_eth_dev *dev) +{ + struct rte_eth_dev_data *data = dev->data; + struct rte_eth_conf *conf = &data->dev_conf; + struct rte_eth_rxmode *rxmode = &conf->rxmode; + struct rte_eth_txmode *txmode = &conf->txmode; + struct nicvf *nic = nicvf_pmd_priv(dev); + uint8_t cqcount; + + PMD_INIT_FUNC_TRACE(); + + if (!rte_eal_has_hugepages()) { + PMD_INIT_LOG(INFO, "Huge page is not configured"); + return -EINVAL; + } + + if (txmode->mq_mode) { + PMD_INIT_LOG(INFO, "Tx mq_mode DCB or VMDq not supported"); + return -EINVAL; + } + + if (rxmode->mq_mode != ETH_MQ_RX_NONE && + rxmode->mq_mode != ETH_MQ_RX_RSS) { + PMD_INIT_LOG(INFO, "Unsupported rx qmode %d", rxmode->mq_mode); + return -EINVAL; + } + + if (!rxmode->hw_strip_crc) { + PMD_INIT_LOG(NOTICE, "Can't disable hw crc strip"); + rxmode->hw_strip_crc = 1; + } + + if (rxmode->hw_ip_checksum) { + PMD_INIT_LOG(NOTICE, "Rxcksum not supported"); + rxmode->hw_ip_checksum = 0; + } + + if (rxmode->split_hdr_size) { + PMD_INIT_LOG(INFO, "Rxmode does not support split header"); + return -EINVAL; + } + + if (rxmode->hw_vlan_filter) { + PMD_INIT_LOG(INFO, "VLAN filter not supported"); + return -EINVAL; + } + + if (rxmode->hw_vlan_extend) { + PMD_INIT_LOG(INFO, "VLAN extended not supported"); + return -EINVAL; + } + + if (rxmode->enable_lro) { + PMD_INIT_LOG(INFO, "LRO not supported"); + return -EINVAL; + } + + if (conf->link_speeds & ETH_LINK_SPEED_FIXED) { + PMD_INIT_LOG(INFO, "Setting link speed/duplex not supported"); + return -EINVAL; + } + + if (conf->dcb_capability_en) { + PMD_INIT_LOG(INFO, "DCB enable not supported"); + return -EINVAL; + } + + if (conf->fdir_conf.mode != RTE_FDIR_MODE_NONE) { + PMD_INIT_LOG(INFO, "Flow director not supported"); + return -EINVAL; + } + + assert_primary(nic); + NICVF_STATIC_ASSERT(MAX_RCV_QUEUES_PER_QS == MAX_SND_QUEUES_PER_QS); + cqcount = RTE_MAX(data->nb_tx_queues, data->nb_rx_queues); + if (cqcount > MAX_RCV_QUEUES_PER_QS) { + nic->sqs_count = RTE_ALIGN_CEIL(cqcount, MAX_RCV_QUEUES_PER_QS); + nic->sqs_count = (nic->sqs_count / MAX_RCV_QUEUES_PER_QS) - 1; + } else { + nic->sqs_count = 0; + } + + assert(nic->sqs_count <= MAX_SQS_PER_VF); + + if (nic->sqs_count > 0) { + if (nicvf_request_sqs(nic)) { + rte_panic("Cannot assign sufficient number of " + "secondary queues to PORT%d VF%" PRIu8 "\n", + dev->data->port_id, nic->vf_id); + } + } + + PMD_INIT_LOG(DEBUG, "Configured ethdev port%d hwcap=0x%" PRIx64, + dev->data->port_id, nicvf_hw_cap(nic)); + + return 0; +} + +/* Initialize and register driver with DPDK Application */ +static const struct eth_dev_ops nicvf_eth_dev_ops = { + .dev_configure = nicvf_dev_configure, + .dev_start = nicvf_dev_start, + .dev_stop = nicvf_dev_stop, + .link_update = nicvf_dev_link_update, + .dev_close = nicvf_dev_close, + .stats_get = nicvf_dev_stats_get, + .stats_reset = nicvf_dev_stats_reset, + .promiscuous_enable = nicvf_dev_promisc_enable, + .dev_infos_get = nicvf_dev_info_get, + .dev_supported_ptypes_get = nicvf_dev_supported_ptypes_get, + .mtu_set = nicvf_dev_set_mtu, + .reta_update = nicvf_dev_reta_update, + .reta_query = nicvf_dev_reta_query, + .rss_hash_update = nicvf_dev_rss_hash_update, + .rss_hash_conf_get = nicvf_dev_rss_hash_conf_get, + .rx_queue_start = nicvf_dev_rx_queue_start, + .rx_queue_stop = nicvf_dev_rx_queue_stop, + .tx_queue_start = nicvf_dev_tx_queue_start, + .tx_queue_stop = nicvf_dev_tx_queue_stop, + .rx_queue_setup = nicvf_dev_rx_queue_setup, + .rx_queue_release = nicvf_dev_rx_queue_release, + .rx_queue_count = nicvf_dev_rx_queue_count, + .tx_queue_setup = nicvf_dev_tx_queue_setup, + .tx_queue_release = nicvf_dev_tx_queue_release, + .get_reg = nicvf_dev_get_regs, +}; + +static int +nicvf_eth_dev_init(struct rte_eth_dev *eth_dev) +{ + int ret; + struct rte_pci_device *pci_dev; + struct nicvf *nic = nicvf_pmd_priv(eth_dev); + + PMD_INIT_FUNC_TRACE(); + + eth_dev->dev_ops = &nicvf_eth_dev_ops; + + /* For secondary processes, the primary has done all the work */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { + if (nic) { + /* Setup callbacks for secondary process */ + nicvf_set_tx_function(eth_dev); + nicvf_set_rx_function(eth_dev); + return 0; + } else { + /* If nic == NULL than it is secondary function + * so ethdev need to be released by caller */ + return ENOTSUP; + } + } + + pci_dev = RTE_DEV_TO_PCI(eth_dev->device); + rte_eth_copy_pci_info(eth_dev, pci_dev); + + nic->device_id = pci_dev->id.device_id; + nic->vendor_id = pci_dev->id.vendor_id; + nic->subsystem_device_id = pci_dev->id.subsystem_device_id; + nic->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id; + + PMD_INIT_LOG(DEBUG, "nicvf: device (%x:%x) %u:%u:%u:%u", + pci_dev->id.vendor_id, pci_dev->id.device_id, + pci_dev->addr.domain, pci_dev->addr.bus, + pci_dev->addr.devid, pci_dev->addr.function); + + nic->reg_base = (uintptr_t)pci_dev->mem_resource[0].addr; + if (!nic->reg_base) { + PMD_INIT_LOG(ERR, "Failed to map BAR0"); + ret = -ENODEV; + goto fail; + } + + nicvf_disable_all_interrupts(nic); + + ret = nicvf_periodic_alarm_start(nicvf_interrupt, eth_dev); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to start period alarm"); + goto fail; + } + + ret = nicvf_mbox_check_pf_ready(nic); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to get ready message from PF"); + goto alarm_fail; + } else { + PMD_INIT_LOG(INFO, + "node=%d vf=%d mode=%s sqs=%s loopback_supported=%s", + nic->node, nic->vf_id, + nic->tns_mode == NIC_TNS_MODE ? "tns" : "tns-bypass", + nic->sqs_mode ? "true" : "false", + nic->loopback_supported ? "true" : "false" + ); + } + + ret = nicvf_base_init(nic); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to execute nicvf_base_init"); + goto malloc_fail; + } + + if (nic->sqs_mode) { + /* Push nic to stack of secondary vfs */ + nicvf_svf_push(nic); + + /* Steal nic pointer from the device for further reuse */ + eth_dev->data->dev_private = NULL; + + nicvf_periodic_alarm_stop(nicvf_interrupt, eth_dev); + ret = nicvf_periodic_alarm_start(nicvf_vf_interrupt, nic); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to start period alarm"); + goto fail; + } + + /* Detach port by returning postive error number */ + return ENOTSUP; + } + + eth_dev->data->mac_addrs = rte_zmalloc("mac_addr", ETHER_ADDR_LEN, 0); + if (eth_dev->data->mac_addrs == NULL) { + PMD_INIT_LOG(ERR, "Failed to allocate memory for mac addr"); + ret = -ENOMEM; + goto alarm_fail; + } + if (is_zero_ether_addr((struct ether_addr *)nic->mac_addr)) + eth_random_addr(&nic->mac_addr[0]); + + ether_addr_copy((struct ether_addr *)nic->mac_addr, + ð_dev->data->mac_addrs[0]); + + ret = nicvf_mbox_set_mac_addr(nic, nic->mac_addr); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to set mac addr"); + goto malloc_fail; + } + + PMD_INIT_LOG(INFO, "Port %d (%x:%x) mac=%02x:%02x:%02x:%02x:%02x:%02x", + eth_dev->data->port_id, nic->vendor_id, nic->device_id, + nic->mac_addr[0], nic->mac_addr[1], nic->mac_addr[2], + nic->mac_addr[3], nic->mac_addr[4], nic->mac_addr[5]); + + return 0; + +malloc_fail: + rte_free(eth_dev->data->mac_addrs); +alarm_fail: + nicvf_periodic_alarm_stop(nicvf_interrupt, eth_dev); +fail: + return ret; +} + +static const struct rte_pci_id pci_id_nicvf_map[] = { + { + .class_id = RTE_CLASS_ANY_ID, + .vendor_id = PCI_VENDOR_ID_CAVIUM, + .device_id = PCI_DEVICE_ID_THUNDERX_CN88XX_PASS1_NICVF, + .subsystem_vendor_id = PCI_VENDOR_ID_CAVIUM, + .subsystem_device_id = PCI_SUB_DEVICE_ID_CN88XX_PASS1_NICVF, + }, + { + .class_id = RTE_CLASS_ANY_ID, + .vendor_id = PCI_VENDOR_ID_CAVIUM, + .device_id = PCI_DEVICE_ID_THUNDERX_NICVF, + .subsystem_vendor_id = PCI_VENDOR_ID_CAVIUM, + .subsystem_device_id = PCI_SUB_DEVICE_ID_CN88XX_PASS2_NICVF, + }, + { + .class_id = RTE_CLASS_ANY_ID, + .vendor_id = PCI_VENDOR_ID_CAVIUM, + .device_id = PCI_DEVICE_ID_THUNDERX_NICVF, + .subsystem_vendor_id = PCI_VENDOR_ID_CAVIUM, + .subsystem_device_id = PCI_SUB_DEVICE_ID_CN81XX_NICVF, + }, + { + .class_id = RTE_CLASS_ANY_ID, + .vendor_id = PCI_VENDOR_ID_CAVIUM, + .device_id = PCI_DEVICE_ID_THUNDERX_NICVF, + .subsystem_vendor_id = PCI_VENDOR_ID_CAVIUM, + .subsystem_device_id = PCI_SUB_DEVICE_ID_CN83XX_NICVF, + }, + { + .vendor_id = 0, + }, +}; + +static int nicvf_eth_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + struct rte_pci_device *pci_dev) +{ + return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct nicvf), + nicvf_eth_dev_init); +} + +static int nicvf_eth_pci_remove(struct rte_pci_device *pci_dev) +{ + return rte_eth_dev_pci_generic_remove(pci_dev, NULL); +} + +static struct rte_pci_driver rte_nicvf_pmd = { + .id_table = pci_id_nicvf_map, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = nicvf_eth_pci_probe, + .remove = nicvf_eth_pci_remove, +}; + +RTE_PMD_REGISTER_PCI(net_thunderx, rte_nicvf_pmd); +RTE_PMD_REGISTER_PCI_TABLE(net_thunderx, pci_id_nicvf_map); +RTE_PMD_REGISTER_KMOD_DEP(net_thunderx, "* igb_uio | uio_pci_generic | vfio"); diff --git a/src/seastar/dpdk/drivers/net/thunderx/nicvf_ethdev.h b/src/seastar/dpdk/drivers/net/thunderx/nicvf_ethdev.h new file mode 100644 index 00000000..a74219fa --- /dev/null +++ b/src/seastar/dpdk/drivers/net/thunderx/nicvf_ethdev.h @@ -0,0 +1,146 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __THUNDERX_NICVF_ETHDEV_H__ +#define __THUNDERX_NICVF_ETHDEV_H__ + +#include <rte_ethdev.h> + +#define THUNDERX_NICVF_PMD_VERSION "2.0" +#define THUNDERX_REG_BYTES 8 + +#define NICVF_INTR_POLL_INTERVAL_MS 50 +#define NICVF_HALF_DUPLEX 0x00 +#define NICVF_FULL_DUPLEX 0x01 +#define NICVF_UNKNOWN_DUPLEX 0xff + +#define NICVF_RSS_OFFLOAD_PASS1 ( \ + ETH_RSS_PORT | \ + ETH_RSS_IPV4 | \ + ETH_RSS_NONFRAG_IPV4_TCP | \ + ETH_RSS_NONFRAG_IPV4_UDP | \ + ETH_RSS_IPV6 | \ + ETH_RSS_NONFRAG_IPV6_TCP | \ + ETH_RSS_NONFRAG_IPV6_UDP) + +#define NICVF_RSS_OFFLOAD_TUNNEL ( \ + ETH_RSS_VXLAN | \ + ETH_RSS_GENEVE | \ + ETH_RSS_NVGRE) + +#define NICVF_DEFAULT_RX_FREE_THRESH 224 +#define NICVF_DEFAULT_TX_FREE_THRESH 224 +#define NICVF_TX_FREE_MPOOL_THRESH 16 +#define NICVF_MAX_RX_FREE_THRESH 1024 +#define NICVF_MAX_TX_FREE_THRESH 1024 + +#define VLAN_TAG_SIZE 4 /* 802.3ac tag */ + +static inline struct nicvf * +nicvf_pmd_priv(struct rte_eth_dev *eth_dev) +{ + return eth_dev->data->dev_private; +} + +static inline uint64_t +nicvf_mempool_phy_offset(struct rte_mempool *mp) +{ + struct rte_mempool_memhdr *hdr; + + hdr = STAILQ_FIRST(&mp->mem_list); + assert(hdr != NULL); + return (uint64_t)((uintptr_t)hdr->addr - hdr->phys_addr); +} + +static inline uint16_t +nicvf_mbuff_meta_length(struct rte_mbuf *mbuf) +{ + return (uint16_t)((uintptr_t)mbuf->buf_addr - (uintptr_t)mbuf); +} + +static inline uint16_t +nicvf_netdev_qidx(struct nicvf *nic, uint8_t local_qidx) +{ + uint16_t global_qidx = local_qidx; + + if (nic->sqs_mode) + global_qidx += ((nic->sqs_id + 1) * MAX_CMP_QUEUES_PER_QS); + + return global_qidx; +} + +/* + * Simple phy2virt functions assuming mbufs are in a single huge page + * V = P + offset + * P = V - offset + */ +static inline uintptr_t +nicvf_mbuff_phy2virt(phys_addr_t phy, uint64_t mbuf_phys_off) +{ + return (uintptr_t)(phy + mbuf_phys_off); +} + +static inline uintptr_t +nicvf_mbuff_virt2phy(uintptr_t virt, uint64_t mbuf_phys_off) +{ + return (phys_addr_t)(virt - mbuf_phys_off); +} + +static inline void +nicvf_tx_range(struct rte_eth_dev *dev, struct nicvf *nic, uint16_t *tx_start, + uint16_t *tx_end) +{ + uint16_t tmp; + + *tx_start = RTE_ALIGN_FLOOR(nicvf_netdev_qidx(nic, 0), + MAX_SND_QUEUES_PER_QS); + tmp = RTE_ALIGN_CEIL(nicvf_netdev_qidx(nic, 0) + 1, + MAX_SND_QUEUES_PER_QS) - 1; + *tx_end = dev->data->nb_tx_queues ? + RTE_MIN(tmp, dev->data->nb_tx_queues - 1) : 0; +} + +static inline void +nicvf_rx_range(struct rte_eth_dev *dev, struct nicvf *nic, uint16_t *rx_start, + uint16_t *rx_end) +{ + uint16_t tmp; + + *rx_start = RTE_ALIGN_FLOOR(nicvf_netdev_qidx(nic, 0), + MAX_RCV_QUEUES_PER_QS); + tmp = RTE_ALIGN_CEIL(nicvf_netdev_qidx(nic, 0) + 1, + MAX_RCV_QUEUES_PER_QS) - 1; + *rx_end = dev->data->nb_rx_queues ? + RTE_MIN(tmp, dev->data->nb_rx_queues - 1) : 0; +} + +#endif /* __THUNDERX_NICVF_ETHDEV_H__ */ diff --git a/src/seastar/dpdk/drivers/net/thunderx/nicvf_logs.h b/src/seastar/dpdk/drivers/net/thunderx/nicvf_logs.h new file mode 100644 index 00000000..0667d468 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/thunderx/nicvf_logs.h @@ -0,0 +1,83 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __THUNDERX_NICVF_LOGS__ +#define __THUNDERX_NICVF_LOGS__ + +#include <assert.h> + +#define PMD_INIT_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) + +#ifdef RTE_LIBRTE_THUNDERX_NICVF_DEBUG_INIT +#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, ">>") +#else +#define PMD_INIT_FUNC_TRACE() do { } while (0) +#endif + +#ifdef RTE_LIBRTE_THUNDERX_NICVF_DEBUG_RX +#define PMD_RX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) +#define NICVF_RX_ASSERT(x) assert(x) +#else +#define PMD_RX_LOG(level, fmt, args...) do { } while (0) +#define NICVF_RX_ASSERT(x) do { } while (0) +#endif + +#ifdef RTE_LIBRTE_THUNDERX_NICVF_DEBUG_TX +#define PMD_TX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) +#define NICVF_TX_ASSERT(x) assert(x) +#else +#define PMD_TX_LOG(level, fmt, args...) do { } while (0) +#define NICVF_TX_ASSERT(x) do { } while (0) +#endif + +#ifdef RTE_LIBRTE_THUNDERX_NICVF_DEBUG_DRIVER +#define PMD_DRV_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) +#define PMD_DRV_FUNC_TRACE() PMD_DRV_LOG(DEBUG, ">>") +#else +#define PMD_DRV_LOG(level, fmt, args...) do { } while (0) +#define PMD_DRV_FUNC_TRACE() do { } while (0) +#endif + +#ifdef RTE_LIBRTE_THUNDERX_NICVF_DEBUG_MBOX +#define PMD_MBOX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) +#define PMD_MBOX_FUNC_TRACE() PMD_DRV_LOG(DEBUG, ">>") +#else +#define PMD_MBOX_LOG(level, fmt, args...) do { } while (0) +#define PMD_MBOX_FUNC_TRACE() do { } while (0) +#endif + +#endif /* __THUNDERX_NICVF_LOGS__ */ diff --git a/src/seastar/dpdk/drivers/net/thunderx/nicvf_rxtx.c b/src/seastar/dpdk/drivers/net/thunderx/nicvf_rxtx.c new file mode 100644 index 00000000..6cae8341 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/thunderx/nicvf_rxtx.c @@ -0,0 +1,591 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <unistd.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_byteorder.h> +#include <rte_common.h> +#include <rte_cycles.h> +#include <rte_errno.h> +#include <rte_ethdev.h> +#include <rte_ether.h> +#include <rte_log.h> +#include <rte_mbuf.h> +#include <rte_prefetch.h> + +#include "base/nicvf_plat.h" + +#include "nicvf_ethdev.h" +#include "nicvf_rxtx.h" +#include "nicvf_logs.h" + +static inline void __hot +fill_sq_desc_header(union sq_entry_t *entry, struct rte_mbuf *pkt) +{ + /* Local variable sqe to avoid read from sq desc memory*/ + union sq_entry_t sqe; + uint64_t ol_flags; + + /* Fill SQ header descriptor */ + sqe.buff[0] = 0; + sqe.hdr.subdesc_type = SQ_DESC_TYPE_HEADER; + /* Number of sub-descriptors following this one */ + sqe.hdr.subdesc_cnt = pkt->nb_segs; + sqe.hdr.tot_len = pkt->pkt_len; + + ol_flags = pkt->ol_flags & NICVF_TX_OFFLOAD_MASK; + if (unlikely(ol_flags)) { + /* L4 cksum */ + uint64_t l4_flags = ol_flags & PKT_TX_L4_MASK; + if (l4_flags == PKT_TX_TCP_CKSUM) + sqe.hdr.csum_l4 = SEND_L4_CSUM_TCP; + else if (l4_flags == PKT_TX_UDP_CKSUM) + sqe.hdr.csum_l4 = SEND_L4_CSUM_UDP; + else + sqe.hdr.csum_l4 = SEND_L4_CSUM_DISABLE; + + sqe.hdr.l3_offset = pkt->l2_len; + sqe.hdr.l4_offset = pkt->l3_len + pkt->l2_len; + + /* L3 cksum */ + if (ol_flags & PKT_TX_IP_CKSUM) + sqe.hdr.csum_l3 = 1; + } + + entry->buff[0] = sqe.buff[0]; +} + +void __hot +nicvf_single_pool_free_xmited_buffers(struct nicvf_txq *sq) +{ + int j = 0; + uint32_t curr_head; + uint32_t head = sq->head; + struct rte_mbuf **txbuffs = sq->txbuffs; + void *obj_p[NICVF_MAX_TX_FREE_THRESH] __rte_cache_aligned; + + curr_head = nicvf_addr_read(sq->sq_head) >> 4; + while (head != curr_head) { + if (txbuffs[head]) + obj_p[j++] = txbuffs[head]; + + head = (head + 1) & sq->qlen_mask; + } + + rte_mempool_put_bulk(sq->pool, obj_p, j); + sq->head = curr_head; + sq->xmit_bufs -= j; + NICVF_TX_ASSERT(sq->xmit_bufs >= 0); +} + +void __hot +nicvf_multi_pool_free_xmited_buffers(struct nicvf_txq *sq) +{ + uint32_t n = 0; + uint32_t curr_head; + uint32_t head = sq->head; + struct rte_mbuf **txbuffs = sq->txbuffs; + + curr_head = nicvf_addr_read(sq->sq_head) >> 4; + while (head != curr_head) { + if (txbuffs[head]) { + rte_pktmbuf_free_seg(txbuffs[head]); + n++; + } + + head = (head + 1) & sq->qlen_mask; + } + + sq->head = curr_head; + sq->xmit_bufs -= n; + NICVF_TX_ASSERT(sq->xmit_bufs >= 0); +} + +static inline uint32_t __hot +nicvf_free_tx_desc(struct nicvf_txq *sq) +{ + return ((sq->head - sq->tail - 1) & sq->qlen_mask); +} + +/* Send Header + Packet */ +#define TX_DESC_PER_PKT 2 + +static inline uint32_t __hot +nicvf_free_xmitted_buffers(struct nicvf_txq *sq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + uint32_t free_desc = nicvf_free_tx_desc(sq); + + if (free_desc < nb_pkts * TX_DESC_PER_PKT || + sq->xmit_bufs > sq->tx_free_thresh) { + if (unlikely(sq->pool == NULL)) + sq->pool = tx_pkts[0]->pool; + + sq->pool_free(sq); + /* Freed now, let see the number of free descs again */ + free_desc = nicvf_free_tx_desc(sq); + } + return free_desc; +} + +uint16_t __hot +nicvf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + int i; + uint32_t free_desc; + uint32_t tail; + struct nicvf_txq *sq = tx_queue; + union sq_entry_t *desc_ptr = sq->desc; + struct rte_mbuf **txbuffs = sq->txbuffs; + struct rte_mbuf *pkt; + uint32_t qlen_mask = sq->qlen_mask; + + tail = sq->tail; + free_desc = nicvf_free_xmitted_buffers(sq, tx_pkts, nb_pkts); + + for (i = 0; i < nb_pkts && (int)free_desc >= TX_DESC_PER_PKT; i++) { + pkt = tx_pkts[i]; + + txbuffs[tail] = NULL; + fill_sq_desc_header(desc_ptr + tail, pkt); + tail = (tail + 1) & qlen_mask; + + txbuffs[tail] = pkt; + fill_sq_desc_gather(desc_ptr + tail, pkt); + tail = (tail + 1) & qlen_mask; + free_desc -= TX_DESC_PER_PKT; + } + + sq->tail = tail; + sq->xmit_bufs += i; + rte_wmb(); + + /* Inform HW to xmit the packets */ + nicvf_addr_write(sq->sq_door, i * TX_DESC_PER_PKT); + return i; +} + +uint16_t __hot +nicvf_xmit_pkts_multiseg(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, k; + uint32_t used_desc, next_used_desc, used_bufs, free_desc, tail; + struct nicvf_txq *sq = tx_queue; + union sq_entry_t *desc_ptr = sq->desc; + struct rte_mbuf **txbuffs = sq->txbuffs; + struct rte_mbuf *pkt, *seg; + uint32_t qlen_mask = sq->qlen_mask; + uint16_t nb_segs; + + tail = sq->tail; + used_desc = 0; + used_bufs = 0; + + free_desc = nicvf_free_xmitted_buffers(sq, tx_pkts, nb_pkts); + + for (i = 0; i < nb_pkts; i++) { + pkt = tx_pkts[i]; + + nb_segs = pkt->nb_segs; + + next_used_desc = used_desc + nb_segs + 1; + if (next_used_desc > free_desc) + break; + used_desc = next_used_desc; + used_bufs += nb_segs; + + txbuffs[tail] = NULL; + fill_sq_desc_header(desc_ptr + tail, pkt); + tail = (tail + 1) & qlen_mask; + + txbuffs[tail] = pkt; + fill_sq_desc_gather(desc_ptr + tail, pkt); + tail = (tail + 1) & qlen_mask; + + seg = pkt->next; + for (k = 1; k < nb_segs; k++) { + txbuffs[tail] = seg; + fill_sq_desc_gather(desc_ptr + tail, seg); + tail = (tail + 1) & qlen_mask; + seg = seg->next; + } + } + + sq->tail = tail; + sq->xmit_bufs += used_bufs; + rte_wmb(); + + /* Inform HW to xmit the packets */ + nicvf_addr_write(sq->sq_door, used_desc); + return nb_pkts; +} + +static const uint32_t ptype_table[16][16] __rte_cache_aligned = { + [L3_NONE][L4_NONE] = RTE_PTYPE_UNKNOWN, + [L3_NONE][L4_IPSEC_ESP] = RTE_PTYPE_UNKNOWN, + [L3_NONE][L4_IPFRAG] = RTE_PTYPE_L4_FRAG, + [L3_NONE][L4_IPCOMP] = RTE_PTYPE_UNKNOWN, + [L3_NONE][L4_TCP] = RTE_PTYPE_L4_TCP, + [L3_NONE][L4_UDP_PASS1] = RTE_PTYPE_L4_UDP, + [L3_NONE][L4_GRE] = RTE_PTYPE_TUNNEL_GRE, + [L3_NONE][L4_UDP_PASS2] = RTE_PTYPE_L4_UDP, + [L3_NONE][L4_UDP_GENEVE] = RTE_PTYPE_TUNNEL_GENEVE, + [L3_NONE][L4_UDP_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN, + [L3_NONE][L4_NVGRE] = RTE_PTYPE_TUNNEL_NVGRE, + + [L3_IPV4][L4_NONE] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_UNKNOWN, + [L3_IPV4][L4_IPSEC_ESP] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV4, + [L3_IPV4][L4_IPFRAG] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_FRAG, + [L3_IPV4][L4_IPCOMP] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_UNKNOWN, + [L3_IPV4][L4_TCP] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP, + [L3_IPV4][L4_UDP_PASS1] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP, + [L3_IPV4][L4_GRE] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_GRE, + [L3_IPV4][L4_UDP_PASS2] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP, + [L3_IPV4][L4_UDP_GENEVE] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_GENEVE, + [L3_IPV4][L4_UDP_VXLAN] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_VXLAN, + [L3_IPV4][L4_NVGRE] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_NVGRE, + + [L3_IPV4_OPT][L4_NONE] = RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_UNKNOWN, + [L3_IPV4_OPT][L4_IPSEC_ESP] = RTE_PTYPE_L3_IPV4_EXT | + RTE_PTYPE_L3_IPV4, + [L3_IPV4_OPT][L4_IPFRAG] = RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_FRAG, + [L3_IPV4_OPT][L4_IPCOMP] = RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_UNKNOWN, + [L3_IPV4_OPT][L4_TCP] = RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP, + [L3_IPV4_OPT][L4_UDP_PASS1] = RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP, + [L3_IPV4_OPT][L4_GRE] = RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_GRE, + [L3_IPV4_OPT][L4_UDP_PASS2] = RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP, + [L3_IPV4_OPT][L4_UDP_GENEVE] = RTE_PTYPE_L3_IPV4_EXT | + RTE_PTYPE_TUNNEL_GENEVE, + [L3_IPV4_OPT][L4_UDP_VXLAN] = RTE_PTYPE_L3_IPV4_EXT | + RTE_PTYPE_TUNNEL_VXLAN, + [L3_IPV4_OPT][L4_NVGRE] = RTE_PTYPE_L3_IPV4_EXT | + RTE_PTYPE_TUNNEL_NVGRE, + + [L3_IPV6][L4_NONE] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_UNKNOWN, + [L3_IPV6][L4_IPSEC_ESP] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L3_IPV4, + [L3_IPV6][L4_IPFRAG] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_FRAG, + [L3_IPV6][L4_IPCOMP] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_UNKNOWN, + [L3_IPV6][L4_TCP] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP, + [L3_IPV6][L4_UDP_PASS1] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP, + [L3_IPV6][L4_GRE] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_TUNNEL_GRE, + [L3_IPV6][L4_UDP_PASS2] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP, + [L3_IPV6][L4_UDP_GENEVE] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_TUNNEL_GENEVE, + [L3_IPV6][L4_UDP_VXLAN] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_TUNNEL_VXLAN, + [L3_IPV6][L4_NVGRE] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_TUNNEL_NVGRE, + + [L3_IPV6_OPT][L4_NONE] = RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_UNKNOWN, + [L3_IPV6_OPT][L4_IPSEC_ESP] = RTE_PTYPE_L3_IPV6_EXT | + RTE_PTYPE_L3_IPV4, + [L3_IPV6_OPT][L4_IPFRAG] = RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_FRAG, + [L3_IPV6_OPT][L4_IPCOMP] = RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_UNKNOWN, + [L3_IPV6_OPT][L4_TCP] = RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP, + [L3_IPV6_OPT][L4_UDP_PASS1] = RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP, + [L3_IPV6_OPT][L4_GRE] = RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_TUNNEL_GRE, + [L3_IPV6_OPT][L4_UDP_PASS2] = RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP, + [L3_IPV6_OPT][L4_UDP_GENEVE] = RTE_PTYPE_L3_IPV6_EXT | + RTE_PTYPE_TUNNEL_GENEVE, + [L3_IPV6_OPT][L4_UDP_VXLAN] = RTE_PTYPE_L3_IPV6_EXT | + RTE_PTYPE_TUNNEL_VXLAN, + [L3_IPV6_OPT][L4_NVGRE] = RTE_PTYPE_L3_IPV6_EXT | + RTE_PTYPE_TUNNEL_NVGRE, + + [L3_ET_STOP][L4_NONE] = RTE_PTYPE_UNKNOWN, + [L3_ET_STOP][L4_IPSEC_ESP] = RTE_PTYPE_UNKNOWN, + [L3_ET_STOP][L4_IPFRAG] = RTE_PTYPE_L4_FRAG, + [L3_ET_STOP][L4_IPCOMP] = RTE_PTYPE_UNKNOWN, + [L3_ET_STOP][L4_TCP] = RTE_PTYPE_L4_TCP, + [L3_ET_STOP][L4_UDP_PASS1] = RTE_PTYPE_L4_UDP, + [L3_ET_STOP][L4_GRE] = RTE_PTYPE_TUNNEL_GRE, + [L3_ET_STOP][L4_UDP_PASS2] = RTE_PTYPE_L4_UDP, + [L3_ET_STOP][L4_UDP_GENEVE] = RTE_PTYPE_TUNNEL_GENEVE, + [L3_ET_STOP][L4_UDP_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN, + [L3_ET_STOP][L4_NVGRE] = RTE_PTYPE_TUNNEL_NVGRE, + + [L3_OTHER][L4_NONE] = RTE_PTYPE_UNKNOWN, + [L3_OTHER][L4_IPSEC_ESP] = RTE_PTYPE_UNKNOWN, + [L3_OTHER][L4_IPFRAG] = RTE_PTYPE_L4_FRAG, + [L3_OTHER][L4_IPCOMP] = RTE_PTYPE_UNKNOWN, + [L3_OTHER][L4_TCP] = RTE_PTYPE_L4_TCP, + [L3_OTHER][L4_UDP_PASS1] = RTE_PTYPE_L4_UDP, + [L3_OTHER][L4_GRE] = RTE_PTYPE_TUNNEL_GRE, + [L3_OTHER][L4_UDP_PASS2] = RTE_PTYPE_L4_UDP, + [L3_OTHER][L4_UDP_GENEVE] = RTE_PTYPE_TUNNEL_GENEVE, + [L3_OTHER][L4_UDP_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN, + [L3_OTHER][L4_NVGRE] = RTE_PTYPE_TUNNEL_NVGRE, +}; + +static inline uint32_t __hot +nicvf_rx_classify_pkt(cqe_rx_word0_t cqe_rx_w0) +{ + return ptype_table[cqe_rx_w0.l3_type][cqe_rx_w0.l4_type]; +} + +static inline int __hot +nicvf_fill_rbdr(struct nicvf_rxq *rxq, int to_fill) +{ + int i; + uint32_t ltail, next_tail; + struct nicvf_rbdr *rbdr = rxq->shared_rbdr; + uint64_t mbuf_phys_off = rxq->mbuf_phys_off; + struct rbdr_entry_t *desc = rbdr->desc; + uint32_t qlen_mask = rbdr->qlen_mask; + uintptr_t door = rbdr->rbdr_door; + void *obj_p[NICVF_MAX_RX_FREE_THRESH] __rte_cache_aligned; + + if (unlikely(rte_mempool_get_bulk(rxq->pool, obj_p, to_fill) < 0)) { + rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += + to_fill; + return 0; + } + + NICVF_RX_ASSERT((unsigned int)to_fill <= (qlen_mask - + (nicvf_addr_read(rbdr->rbdr_status) & NICVF_RBDR_COUNT_MASK))); + + next_tail = __atomic_fetch_add(&rbdr->next_tail, to_fill, + __ATOMIC_ACQUIRE); + ltail = next_tail; + for (i = 0; i < to_fill; i++) { + struct rbdr_entry_t *entry = desc + (ltail & qlen_mask); + + entry->full_addr = nicvf_mbuff_virt2phy((uintptr_t)obj_p[i], + mbuf_phys_off); + ltail++; + } + + while (__atomic_load_n(&rbdr->tail, __ATOMIC_RELAXED) != next_tail) + rte_pause(); + + __atomic_store_n(&rbdr->tail, ltail, __ATOMIC_RELEASE); + nicvf_addr_write(door, to_fill); + return to_fill; +} + +static inline int32_t __hot +nicvf_rx_pkts_to_process(struct nicvf_rxq *rxq, uint16_t nb_pkts, + int32_t available_space) +{ + if (unlikely(available_space < nb_pkts)) + rxq->available_space = nicvf_addr_read(rxq->cq_status) + & NICVF_CQ_CQE_COUNT_MASK; + + return RTE_MIN(nb_pkts, available_space); +} + +static inline void __hot +nicvf_rx_offload(cqe_rx_word0_t cqe_rx_w0, cqe_rx_word2_t cqe_rx_w2, + struct rte_mbuf *pkt) +{ + if (likely(cqe_rx_w0.rss_alg)) { + pkt->hash.rss = cqe_rx_w2.rss_tag; + pkt->ol_flags |= PKT_RX_RSS_HASH; + } +} + +uint16_t __hot +nicvf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) +{ + uint32_t i, to_process; + struct cqe_rx_t *cqe_rx; + struct rte_mbuf *pkt; + cqe_rx_word0_t cqe_rx_w0; + cqe_rx_word1_t cqe_rx_w1; + cqe_rx_word2_t cqe_rx_w2; + cqe_rx_word3_t cqe_rx_w3; + struct nicvf_rxq *rxq = rx_queue; + union cq_entry_t *desc = rxq->desc; + const uint64_t cqe_mask = rxq->qlen_mask; + uint64_t rb0_ptr, mbuf_phys_off = rxq->mbuf_phys_off; + const uint64_t mbuf_init = rxq->mbuf_initializer.value; + uint32_t cqe_head = rxq->head & cqe_mask; + int32_t available_space = rxq->available_space; + const uint8_t rbptr_offset = rxq->rbptr_offset; + + to_process = nicvf_rx_pkts_to_process(rxq, nb_pkts, available_space); + + for (i = 0; i < to_process; i++) { + rte_prefetch_non_temporal(&desc[cqe_head + 2]); + cqe_rx = (struct cqe_rx_t *)&desc[cqe_head]; + NICVF_RX_ASSERT(((struct cq_entry_type_t *)cqe_rx)->cqe_type + == CQE_TYPE_RX); + + NICVF_LOAD_PAIR(cqe_rx_w0.u64, cqe_rx_w1.u64, cqe_rx); + NICVF_LOAD_PAIR(cqe_rx_w2.u64, cqe_rx_w3.u64, &cqe_rx->word2); + rb0_ptr = *((uint64_t *)cqe_rx + rbptr_offset); + pkt = (struct rte_mbuf *)nicvf_mbuff_phy2virt + (rb0_ptr - cqe_rx_w1.align_pad, mbuf_phys_off); + pkt->ol_flags = 0; + pkt->data_len = cqe_rx_w3.rb0_sz; + pkt->pkt_len = cqe_rx_w3.rb0_sz; + pkt->packet_type = nicvf_rx_classify_pkt(cqe_rx_w0); + nicvf_mbuff_init_update(pkt, mbuf_init, cqe_rx_w1.align_pad); + nicvf_rx_offload(cqe_rx_w0, cqe_rx_w2, pkt); + rx_pkts[i] = pkt; + cqe_head = (cqe_head + 1) & cqe_mask; + nicvf_prefetch_store_keep(pkt); + } + + if (likely(to_process)) { + rxq->available_space -= to_process; + rxq->head = cqe_head; + nicvf_addr_write(rxq->cq_door, to_process); + rxq->recv_buffers += to_process; + } + if (rxq->recv_buffers > rxq->rx_free_thresh) { + rxq->recv_buffers -= nicvf_fill_rbdr(rxq, rxq->rx_free_thresh); + NICVF_RX_ASSERT(rxq->recv_buffers >= 0); + } + + return to_process; +} + +static inline uint16_t __hot +nicvf_process_cq_mseg_entry(struct cqe_rx_t *cqe_rx, + uint64_t mbuf_phys_off, + struct rte_mbuf **rx_pkt, uint8_t rbptr_offset, + uint64_t mbuf_init) +{ + struct rte_mbuf *pkt, *seg, *prev; + cqe_rx_word0_t cqe_rx_w0; + cqe_rx_word1_t cqe_rx_w1; + cqe_rx_word2_t cqe_rx_w2; + uint16_t *rb_sz, nb_segs, seg_idx; + uint64_t *rb_ptr; + + NICVF_LOAD_PAIR(cqe_rx_w0.u64, cqe_rx_w1.u64, cqe_rx); + NICVF_RX_ASSERT(cqe_rx_w0.cqe_type == CQE_TYPE_RX); + cqe_rx_w2 = cqe_rx->word2; + rb_sz = &cqe_rx->word3.rb0_sz; + rb_ptr = (uint64_t *)cqe_rx + rbptr_offset; + nb_segs = cqe_rx_w0.rb_cnt; + pkt = (struct rte_mbuf *)nicvf_mbuff_phy2virt + (rb_ptr[0] - cqe_rx_w1.align_pad, mbuf_phys_off); + + pkt->ol_flags = 0; + pkt->pkt_len = cqe_rx_w1.pkt_len; + pkt->data_len = rb_sz[nicvf_frag_num(0)]; + nicvf_mbuff_init_mseg_update( + pkt, mbuf_init, cqe_rx_w1.align_pad, nb_segs); + pkt->packet_type = nicvf_rx_classify_pkt(cqe_rx_w0); + nicvf_rx_offload(cqe_rx_w0, cqe_rx_w2, pkt); + + *rx_pkt = pkt; + prev = pkt; + for (seg_idx = 1; seg_idx < nb_segs; seg_idx++) { + seg = (struct rte_mbuf *)nicvf_mbuff_phy2virt + (rb_ptr[seg_idx], mbuf_phys_off); + + prev->next = seg; + seg->data_len = rb_sz[nicvf_frag_num(seg_idx)]; + nicvf_mbuff_init_update(seg, mbuf_init, 0); + + prev = seg; + } + prev->next = NULL; + return nb_segs; +} + +uint16_t __hot +nicvf_recv_pkts_multiseg(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + union cq_entry_t *cq_entry; + struct cqe_rx_t *cqe_rx; + struct nicvf_rxq *rxq = rx_queue; + union cq_entry_t *desc = rxq->desc; + const uint64_t cqe_mask = rxq->qlen_mask; + uint64_t mbuf_phys_off = rxq->mbuf_phys_off; + uint32_t i, to_process, cqe_head, buffers_consumed = 0; + int32_t available_space = rxq->available_space; + uint16_t nb_segs; + const uint64_t mbuf_init = rxq->mbuf_initializer.value; + const uint8_t rbptr_offset = rxq->rbptr_offset; + + cqe_head = rxq->head & cqe_mask; + to_process = nicvf_rx_pkts_to_process(rxq, nb_pkts, available_space); + + for (i = 0; i < to_process; i++) { + rte_prefetch_non_temporal(&desc[cqe_head + 2]); + cq_entry = &desc[cqe_head]; + cqe_rx = (struct cqe_rx_t *)cq_entry; + nb_segs = nicvf_process_cq_mseg_entry(cqe_rx, mbuf_phys_off, + rx_pkts + i, rbptr_offset, mbuf_init); + buffers_consumed += nb_segs; + cqe_head = (cqe_head + 1) & cqe_mask; + nicvf_prefetch_store_keep(rx_pkts[i]); + } + + if (likely(to_process)) { + rxq->available_space -= to_process; + rxq->head = cqe_head; + nicvf_addr_write(rxq->cq_door, to_process); + rxq->recv_buffers += buffers_consumed; + } + if (rxq->recv_buffers > rxq->rx_free_thresh) { + rxq->recv_buffers -= nicvf_fill_rbdr(rxq, rxq->rx_free_thresh); + NICVF_RX_ASSERT(rxq->recv_buffers >= 0); + } + + return to_process; +} + +uint32_t +nicvf_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t queue_idx) +{ + struct nicvf_rxq *rxq; + + rxq = dev->data->rx_queues[queue_idx]; + return nicvf_addr_read(rxq->cq_status) & NICVF_CQ_CQE_COUNT_MASK; +} + +uint32_t +nicvf_dev_rbdr_refill(struct rte_eth_dev *dev, uint16_t queue_idx) +{ + struct nicvf_rxq *rxq; + uint32_t to_process; + uint32_t rx_free; + + rxq = dev->data->rx_queues[queue_idx]; + to_process = rxq->recv_buffers; + while (rxq->recv_buffers > 0) { + rx_free = RTE_MIN(rxq->recv_buffers, NICVF_MAX_RX_FREE_THRESH); + rxq->recv_buffers -= nicvf_fill_rbdr(rxq, rx_free); + } + + assert(rxq->recv_buffers == 0); + return to_process; +} diff --git a/src/seastar/dpdk/drivers/net/thunderx/nicvf_rxtx.h b/src/seastar/dpdk/drivers/net/thunderx/nicvf_rxtx.h new file mode 100644 index 00000000..3631ff22 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/thunderx/nicvf_rxtx.h @@ -0,0 +1,128 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __THUNDERX_NICVF_RXTX_H__ +#define __THUNDERX_NICVF_RXTX_H__ + +#include <rte_byteorder.h> +#include <rte_ethdev.h> + +#define NICVF_TX_OFFLOAD_MASK (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK) + +#ifndef __hot +#define __hot __attribute__((hot)) +#endif + +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN +static inline uint16_t __attribute__((const)) +nicvf_frag_num(uint16_t i) +{ + return (i & ~3) + 3 - (i & 3); +} + +static inline void __hot +fill_sq_desc_gather(union sq_entry_t *entry, struct rte_mbuf *pkt) +{ + /* Local variable sqe to avoid read from sq desc memory*/ + union sq_entry_t sqe; + + /* Fill the SQ gather entry */ + sqe.buff[0] = 0; sqe.buff[1] = 0; + sqe.gather.subdesc_type = SQ_DESC_TYPE_GATHER; + sqe.gather.ld_type = NIC_SEND_LD_TYPE_E_LDT; + sqe.gather.size = pkt->data_len; + sqe.gather.addr = rte_mbuf_data_dma_addr(pkt); + + entry->buff[0] = sqe.buff[0]; + entry->buff[1] = sqe.buff[1]; +} + +#else + +static inline uint16_t __attribute__((const)) +nicvf_frag_num(uint16_t i) +{ + return i; +} + +static inline void __hot +fill_sq_desc_gather(union sq_entry_t *entry, struct rte_mbuf *pkt) +{ + entry->buff[0] = (uint64_t)SQ_DESC_TYPE_GATHER << 60 | + (uint64_t)NIC_SEND_LD_TYPE_E_LDT << 58 | + pkt->data_len; + entry->buff[1] = rte_mbuf_data_dma_addr(pkt); +} +#endif + +static inline void +nicvf_mbuff_init_update(struct rte_mbuf *pkt, const uint64_t mbuf_init, + uint16_t apad) +{ + union mbuf_initializer init = {.value = mbuf_init}; +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN + init.fields.data_off += apad; +#else + init.value += apad; +#endif + *(uint64_t *)(&pkt->rearm_data) = init.value; +} + +static inline void +nicvf_mbuff_init_mseg_update(struct rte_mbuf *pkt, const uint64_t mbuf_init, + uint16_t apad, uint16_t nb_segs) +{ + union mbuf_initializer init = {.value = mbuf_init}; +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN + init.fields.data_off += apad; +#else + init.value += apad; +#endif + init.fields.nb_segs = nb_segs; + *(uint64_t *)(&pkt->rearm_data) = init.value; +} + +uint32_t nicvf_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t queue_idx); +uint32_t nicvf_dev_rbdr_refill(struct rte_eth_dev *dev, uint16_t queue_idx); + +uint16_t nicvf_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts, uint16_t pkts); +uint16_t nicvf_recv_pkts_multiseg(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); + +uint16_t nicvf_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts, uint16_t pkts); +uint16_t nicvf_xmit_pkts_multiseg(void *txq, struct rte_mbuf **tx_pkts, + uint16_t pkts); + +void nicvf_single_pool_free_xmited_buffers(struct nicvf_txq *sq); +void nicvf_multi_pool_free_xmited_buffers(struct nicvf_txq *sq); + +#endif /* __THUNDERX_NICVF_RXTX_H__ */ diff --git a/src/seastar/dpdk/drivers/net/thunderx/nicvf_struct.h b/src/seastar/dpdk/drivers/net/thunderx/nicvf_struct.h new file mode 100644 index 00000000..34c41b79 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/thunderx/nicvf_struct.h @@ -0,0 +1,139 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _THUNDERX_NICVF_STRUCT_H +#define _THUNDERX_NICVF_STRUCT_H + +#include <stdint.h> + +#include <rte_spinlock.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_interrupts.h> +#include <rte_ethdev.h> +#include <rte_memory.h> + +struct nicvf_rbdr { + uintptr_t rbdr_status; + uintptr_t rbdr_door; + struct rbdr_entry_t *desc; + nicvf_phys_addr_t phys; + uint32_t buffsz; + uint32_t tail; + uint32_t next_tail; + uint32_t head; + uint32_t qlen_mask; +} __rte_cache_aligned; + +struct nicvf_txq { + union sq_entry_t *desc; + nicvf_phys_addr_t phys; + struct rte_mbuf **txbuffs; + uintptr_t sq_head; + uintptr_t sq_door; + struct rte_mempool *pool; + struct nicvf *nic; + void (*pool_free)(struct nicvf_txq *sq); + uint32_t head; + uint32_t tail; + int32_t xmit_bufs; + uint32_t qlen_mask; + uint32_t txq_flags; + uint16_t queue_id; + uint16_t tx_free_thresh; +} __rte_cache_aligned; + +union mbuf_initializer { + struct { + uint16_t data_off; + uint16_t refcnt; + uint16_t nb_segs; + uint16_t port; + } fields; + uint64_t value; +}; + +struct nicvf_rxq { + uint64_t mbuf_phys_off; + uintptr_t cq_status; + uintptr_t cq_door; + union mbuf_initializer mbuf_initializer; + nicvf_phys_addr_t phys; + union cq_entry_t *desc; + struct nicvf_rbdr *shared_rbdr; + struct nicvf *nic; + struct rte_mempool *pool; + uint32_t head; + uint32_t qlen_mask; + int32_t available_space; + int32_t recv_buffers; + uint16_t rx_free_thresh; + uint16_t queue_id; + uint16_t precharge_cnt; + uint8_t rx_drop_en; + uint8_t port_id; + uint8_t rbptr_offset; +} __rte_cache_aligned; + +struct nicvf { + uint8_t vf_id; + uint8_t node; + uintptr_t reg_base; + bool tns_mode; + bool sqs_mode; + bool loopback_supported; + bool pf_acked:1; + bool pf_nacked:1; + uint64_t hwcap; + uint8_t link_up; + uint8_t duplex; + uint32_t speed; + uint32_t msg_enable; + uint16_t device_id; + uint16_t vendor_id; + uint16_t subsystem_device_id; + uint16_t subsystem_vendor_id; + struct nicvf_rbdr *rbdr; + struct nicvf_rss_reta_info rss_info; + struct rte_intr_handle intr_handle; + uint8_t cpi_alg; + uint16_t mtu; + bool vlan_filter_en; + uint8_t mac_addr[ETHER_ADDR_LEN]; + /* secondary queue set support */ + uint8_t sqs_id; + uint8_t sqs_count; +#define MAX_SQS_PER_VF 11 + struct nicvf *snicvf[MAX_SQS_PER_VF]; +} __rte_cache_aligned; + +#endif /* _THUNDERX_NICVF_STRUCT_H */ diff --git a/src/seastar/dpdk/drivers/net/thunderx/nicvf_svf.c b/src/seastar/dpdk/drivers/net/thunderx/nicvf_svf.c new file mode 100644 index 00000000..f746e946 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/thunderx/nicvf_svf.c @@ -0,0 +1,78 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <assert.h> +#include <stddef.h> + +#include <rte_debug.h> +#include <rte_malloc.h> + +#include "base/nicvf_bsvf.h" + +#include "nicvf_svf.h" + +void +nicvf_svf_push(struct nicvf *vf) +{ + struct svf_entry *entry = NULL; + + assert(vf != NULL); + + entry = rte_zmalloc("nicvf", sizeof(*entry), RTE_CACHE_LINE_SIZE); + if (entry == NULL) + rte_panic("Cannoc allocate memory for svf_entry\n"); + + entry->vf = vf; + + nicvf_bsvf_push(entry); +} + +struct nicvf * +nicvf_svf_pop(void) +{ + struct nicvf *vf; + struct svf_entry *entry; + + entry = nicvf_bsvf_pop(); + + vf = entry->vf; + + rte_free(entry); + + return vf; +} + +int +nicvf_svf_empty(void) +{ + return nicvf_bsvf_empty(); +} diff --git a/src/seastar/dpdk/drivers/net/thunderx/nicvf_svf.h b/src/seastar/dpdk/drivers/net/thunderx/nicvf_svf.h new file mode 100644 index 00000000..6471aa57 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/thunderx/nicvf_svf.h @@ -0,0 +1,66 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __THUNDERX_NICVF_SVF_H__ +#define __THUNDERX_NICVF_SVF_H__ + +struct nicvf; + +/** + * Enqueue new VF to secondary qsets. + * + * @param entry + * Entry to be enqueued. + */ +void +nicvf_svf_push(struct nicvf *vf); + +/** + * Dequeue a VF from secondary qsets. + * + * @return + * Dequeued entry. + */ +struct nicvf * +nicvf_svf_pop(void); + +/** + * Check if the queue of secondary qsets is empty. + * + * @return + * 0 on non-empty + * otherwise empty + */ +int +nicvf_svf_empty(void); + +#endif /* __THUNDERX_NICVF_SVF_H__ */ diff --git a/src/seastar/dpdk/drivers/net/thunderx/rte_pmd_thunderx_nicvf_version.map b/src/seastar/dpdk/drivers/net/thunderx/rte_pmd_thunderx_nicvf_version.map new file mode 100644 index 00000000..1901bcb3 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/thunderx/rte_pmd_thunderx_nicvf_version.map @@ -0,0 +1,4 @@ +DPDK_16.07 { + + local: *; +}; |