diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/dpdk/drivers/net/enic | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/dpdk/drivers/net/enic')
36 files changed, 15310 insertions, 0 deletions
diff --git a/src/spdk/dpdk/drivers/net/enic/Makefile b/src/spdk/dpdk/drivers/net/enic/Makefile new file mode 100644 index 000000000..d098a474a --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/Makefile @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. +# Copyright 2007 Nuova Systems, Inc. All rights reserved. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_pmd_enic.a + +EXPORT_MAP := rte_pmd_enic_version.map + +CFLAGS += -I$(SRCDIR)/base/ +CFLAGS += -I$(SRCDIR) +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -Wno-strict-aliasing +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring +LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash +LDLIBS += -lrte_bus_pci + +VPATH += $(SRCDIR)/src + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic_ethdev.c +SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic_main.c +SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic_rxtx.c +SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic_clsf.c +SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic_res.c +SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic_flow.c +SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic_fm_flow.c +SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += base/vnic_cq.c +SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += base/vnic_wq.c +SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += base/vnic_dev.c +SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += base/vnic_intr.c +SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += base/vnic_rq.c + +# The current implementation assumes 64-bit pointers +CC_AVX2_SUPPORT=0 +ifeq ($(CONFIG_RTE_ARCH_X86_64),y) +# Figure out if the compiler supports avx2. The extra check using +# -march=core-avx2 is necessary to support users who build for the +# 'default' machine (corei7 which has no avx2) and run the binary on +# newer CPUs that have avx2. +# This part is verbatim from i40e makefile. +ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2) + CC_AVX2_SUPPORT=1 +else + CC_AVX2_SUPPORT=\ + $(shell $(CC) -march=core-avx2 -dM -E - </dev/null 2>&1 | \ + grep -q AVX2 && echo 1) + ifeq ($(CC_AVX2_SUPPORT), 1) + ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y) + CFLAGS_enic_rxtx_vec_avx2.o += -march=core-avx2 + else + CFLAGS_enic_rxtx_vec_avx2.o += -mavx2 + endif + endif +endif +endif + +ifeq ($(CC_AVX2_SUPPORT), 1) + SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic_rxtx_vec_avx2.c +endif + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/drivers/net/enic/base/cq_desc.h b/src/spdk/dpdk/drivers/net/enic/base/cq_desc.h new file mode 100644 index 000000000..7151353cb --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/cq_desc.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _CQ_DESC_H_ +#define _CQ_DESC_H_ +#include <rte_byteorder.h> + +/* + * Completion queue descriptor types + */ +enum cq_desc_types { + CQ_DESC_TYPE_WQ_ENET = 0, + CQ_DESC_TYPE_DESC_COPY = 1, + CQ_DESC_TYPE_WQ_EXCH = 2, + CQ_DESC_TYPE_RQ_ENET = 3, + CQ_DESC_TYPE_RQ_FCP = 4, + CQ_DESC_TYPE_IOMMU_MISS = 5, + CQ_DESC_TYPE_SGL = 6, + CQ_DESC_TYPE_CLASSIFIER = 7, + CQ_DESC_TYPE_TEST = 127, +}; + +/* Completion queue descriptor: 16B + * + * All completion queues have this basic layout. The + * type_specific area is unique for each completion + * queue type. + */ +struct cq_desc { + uint16_t completed_index; + uint16_t q_number; + uint8_t type_specific[11]; + uint8_t type_color; +}; + +#define CQ_DESC_TYPE_BITS 4 +#define CQ_DESC_TYPE_MASK ((1 << CQ_DESC_TYPE_BITS) - 1) +#define CQ_DESC_COLOR_MASK 1 +#define CQ_DESC_COLOR_SHIFT 7 +#define CQ_DESC_COLOR_MASK_NOSHIFT 0x80 +#define CQ_DESC_Q_NUM_BITS 10 +#define CQ_DESC_Q_NUM_MASK ((1 << CQ_DESC_Q_NUM_BITS) - 1) +#define CQ_DESC_COMP_NDX_BITS 12 +#define CQ_DESC_COMP_NDX_MASK ((1 << CQ_DESC_COMP_NDX_BITS) - 1) + +static inline void cq_color_enc(struct cq_desc *desc, const uint8_t color) +{ + if (color) + desc->type_color |= (1 << CQ_DESC_COLOR_SHIFT); + else + desc->type_color &= ~(1 << CQ_DESC_COLOR_SHIFT); +} + +static inline void cq_desc_enc(struct cq_desc *desc, + const uint8_t type, const uint8_t color, const uint16_t q_number, + const uint16_t completed_index) +{ + desc->type_color = (type & CQ_DESC_TYPE_MASK) | + ((color & CQ_DESC_COLOR_MASK) << CQ_DESC_COLOR_SHIFT); + desc->q_number = rte_cpu_to_le_16(q_number & CQ_DESC_Q_NUM_MASK); + desc->completed_index = rte_cpu_to_le_16(completed_index & + CQ_DESC_COMP_NDX_MASK); +} + +static inline void cq_desc_dec(const struct cq_desc *desc_arg, + uint8_t *type, uint8_t *color, uint16_t *q_number, + uint16_t *completed_index) +{ + const struct cq_desc *desc = desc_arg; + const uint8_t type_color = desc->type_color; + + *color = (type_color >> CQ_DESC_COLOR_SHIFT) & CQ_DESC_COLOR_MASK; + + /* + * Make sure color bit is read from desc *before* other fields + * are read from desc. Hardware guarantees color bit is last + * bit (byte) written. Adding the rmb() prevents the compiler + * and/or CPU from reordering the reads which would potentially + * result in reading stale values. + */ + + rte_rmb(); + + *type = type_color & CQ_DESC_TYPE_MASK; + *q_number = rte_le_to_cpu_16(desc->q_number) & CQ_DESC_Q_NUM_MASK; + *completed_index = rte_le_to_cpu_16(desc->completed_index) & + CQ_DESC_COMP_NDX_MASK; +} + +static inline void cq_color_dec(const struct cq_desc *desc_arg, uint8_t *color) +{ + volatile const struct cq_desc *desc = desc_arg; + + *color = (desc->type_color >> CQ_DESC_COLOR_SHIFT) & CQ_DESC_COLOR_MASK; +} + +#endif /* _CQ_DESC_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/base/cq_enet_desc.h b/src/spdk/dpdk/drivers/net/enic/base/cq_enet_desc.h new file mode 100644 index 000000000..602ac22b6 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/cq_enet_desc.h @@ -0,0 +1,252 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _CQ_ENET_DESC_H_ +#define _CQ_ENET_DESC_H_ + +#include <rte_byteorder.h> +#include "cq_desc.h" + +/* Ethernet completion queue descriptor: 16B */ +struct cq_enet_wq_desc { + uint16_t completed_index; + uint16_t q_number; + uint8_t reserved[11]; + uint8_t type_color; +}; + +static inline void cq_enet_wq_desc_enc(struct cq_enet_wq_desc *desc, + uint8_t type, uint8_t color, uint16_t q_number, + uint16_t completed_index) +{ + cq_desc_enc((struct cq_desc *)desc, type, + color, q_number, completed_index); +} + +static inline void cq_enet_wq_desc_dec(struct cq_enet_wq_desc *desc, + uint8_t *type, uint8_t *color, uint16_t *q_number, + uint16_t *completed_index) +{ + cq_desc_dec((struct cq_desc *)desc, type, + color, q_number, completed_index); +} + +/* Completion queue descriptor: Ethernet receive queue, 16B */ +struct cq_enet_rq_desc { + uint16_t completed_index_flags; + uint16_t q_number_rss_type_flags; + uint32_t rss_hash; + uint16_t bytes_written_flags; + uint16_t vlan; + uint16_t checksum_fcoe; + uint8_t flags; + uint8_t type_color; +}; + +/* Completion queue descriptor: Ethernet receive queue, 16B */ +struct cq_enet_rq_clsf_desc { + uint16_t completed_index_flags; + uint16_t q_number_rss_type_flags; + uint16_t filter_id; + uint16_t lif; + uint16_t bytes_written_flags; + uint16_t vlan; + uint16_t checksum_fcoe; + uint8_t flags; + uint8_t type_color; +}; + +#define CQ_ENET_RQ_DESC_FLAGS_INGRESS_PORT (0x1 << 12) +#define CQ_ENET_RQ_DESC_FLAGS_FCOE (0x1 << 13) +#define CQ_ENET_RQ_DESC_FLAGS_EOP (0x1 << 14) +#define CQ_ENET_RQ_DESC_FLAGS_SOP (0x1 << 15) + +#define CQ_ENET_RQ_DESC_RSS_TYPE_BITS 4 +#define CQ_ENET_RQ_DESC_RSS_TYPE_MASK \ + ((1 << CQ_ENET_RQ_DESC_RSS_TYPE_BITS) - 1) +#define CQ_ENET_RQ_DESC_RSS_TYPE_NONE 0 +#define CQ_ENET_RQ_DESC_RSS_TYPE_IPv4 1 +#define CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4 2 +#define CQ_ENET_RQ_DESC_RSS_TYPE_IPv6 3 +#define CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6 4 +#define CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX 5 +#define CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX 6 + +#define CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC (0x1 << 14) + +#define CQ_ENET_RQ_DESC_BYTES_WRITTEN_BITS 14 +#define CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK \ + ((1 << CQ_ENET_RQ_DESC_BYTES_WRITTEN_BITS) - 1) +#define CQ_ENET_RQ_DESC_FLAGS_TRUNCATED (0x1 << 14) +#define CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED (0x1 << 15) + +#define CQ_ENET_RQ_DESC_VLAN_TCI_VLAN_BITS 12 +#define CQ_ENET_RQ_DESC_VLAN_TCI_VLAN_MASK \ + ((1 << CQ_ENET_RQ_DESC_VLAN_TCI_VLAN_BITS) - 1) +#define CQ_ENET_RQ_DESC_VLAN_TCI_CFI_MASK (0x1 << 12) +#define CQ_ENET_RQ_DESC_VLAN_TCI_USER_PRIO_BITS 3 +#define CQ_ENET_RQ_DESC_VLAN_TCI_USER_PRIO_MASK \ + ((1 << CQ_ENET_RQ_DESC_VLAN_TCI_USER_PRIO_BITS) - 1) +#define CQ_ENET_RQ_DESC_VLAN_TCI_USER_PRIO_SHIFT 13 + +#define CQ_ENET_RQ_DESC_FCOE_SOF_BITS 8 +#define CQ_ENET_RQ_DESC_FCOE_SOF_MASK \ + ((1 << CQ_ENET_RQ_DESC_FCOE_SOF_BITS) - 1) +#define CQ_ENET_RQ_DESC_FCOE_EOF_BITS 8 +#define CQ_ENET_RQ_DESC_FCOE_EOF_MASK \ + ((1 << CQ_ENET_RQ_DESC_FCOE_EOF_BITS) - 1) +#define CQ_ENET_RQ_DESC_FCOE_EOF_SHIFT 8 + +#define CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK (0x1 << 0) +#define CQ_ENET_RQ_DESC_FCOE_FC_CRC_OK (0x1 << 0) +#define CQ_ENET_RQ_DESC_FLAGS_UDP (0x1 << 1) +#define CQ_ENET_RQ_DESC_FCOE_ENC_ERROR (0x1 << 1) +#define CQ_ENET_RQ_DESC_FLAGS_TCP (0x1 << 2) +#define CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK (0x1 << 3) +#define CQ_ENET_RQ_DESC_FLAGS_IPV6 (0x1 << 4) +#define CQ_ENET_RQ_DESC_FLAGS_IPV4 (0x1 << 5) +#define CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT (0x1 << 6) +#define CQ_ENET_RQ_DESC_FLAGS_FCS_OK (0x1 << 7) + +static inline void cq_enet_rq_desc_enc(struct cq_enet_rq_desc *desc, + uint8_t type, uint8_t color, uint16_t q_number, + uint16_t completed_index, uint8_t ingress_port, uint8_t fcoe, + uint8_t eop, uint8_t sop, uint8_t rss_type, uint8_t csum_not_calc, + uint32_t rss_hash, uint16_t bytes_written, uint8_t packet_error, + uint8_t vlan_stripped, uint16_t vlan, uint16_t checksum, + uint8_t fcoe_sof, uint8_t fcoe_fc_crc_ok, uint8_t fcoe_enc_error, + uint8_t fcoe_eof, uint8_t tcp_udp_csum_ok, uint8_t udp, uint8_t tcp, + uint8_t ipv4_csum_ok, uint8_t ipv6, uint8_t ipv4, uint8_t ipv4_fragment, + uint8_t fcs_ok) +{ + cq_desc_enc((struct cq_desc *)desc, type, + color, q_number, completed_index); + + desc->completed_index_flags |= rte_cpu_to_le_16 + ((ingress_port ? CQ_ENET_RQ_DESC_FLAGS_INGRESS_PORT : 0) | + (fcoe ? CQ_ENET_RQ_DESC_FLAGS_FCOE : 0) | + (eop ? CQ_ENET_RQ_DESC_FLAGS_EOP : 0) | + (sop ? CQ_ENET_RQ_DESC_FLAGS_SOP : 0)); + + desc->q_number_rss_type_flags |= rte_cpu_to_le_16 + (((rss_type & CQ_ENET_RQ_DESC_RSS_TYPE_MASK) << + CQ_DESC_Q_NUM_BITS) | + (csum_not_calc ? CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC : 0)); + + desc->rss_hash = rte_cpu_to_le_32(rss_hash); + + desc->bytes_written_flags = rte_cpu_to_le_16 + ((bytes_written & CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK) | + (packet_error ? CQ_ENET_RQ_DESC_FLAGS_TRUNCATED : 0) | + (vlan_stripped ? CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED : 0)); + + desc->vlan = rte_cpu_to_le_16(vlan); + + if (fcoe) { + desc->checksum_fcoe = rte_cpu_to_le_16 + ((fcoe_sof & CQ_ENET_RQ_DESC_FCOE_SOF_MASK) | + ((fcoe_eof & CQ_ENET_RQ_DESC_FCOE_EOF_MASK) << + CQ_ENET_RQ_DESC_FCOE_EOF_SHIFT)); + } else { + desc->checksum_fcoe = rte_cpu_to_le_16(checksum); + } + + desc->flags = + (tcp_udp_csum_ok ? CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK : 0) | + (udp ? CQ_ENET_RQ_DESC_FLAGS_UDP : 0) | + (tcp ? CQ_ENET_RQ_DESC_FLAGS_TCP : 0) | + (ipv4_csum_ok ? CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK : 0) | + (ipv6 ? CQ_ENET_RQ_DESC_FLAGS_IPV6 : 0) | + (ipv4 ? CQ_ENET_RQ_DESC_FLAGS_IPV4 : 0) | + (ipv4_fragment ? CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT : 0) | + (fcs_ok ? CQ_ENET_RQ_DESC_FLAGS_FCS_OK : 0) | + (fcoe_fc_crc_ok ? CQ_ENET_RQ_DESC_FCOE_FC_CRC_OK : 0) | + (fcoe_enc_error ? CQ_ENET_RQ_DESC_FCOE_ENC_ERROR : 0); +} + +static inline void cq_enet_rq_desc_dec(struct cq_enet_rq_desc *desc, + uint8_t *type, uint8_t *color, uint16_t *q_number, + uint16_t *completed_index, uint8_t *ingress_port, uint8_t *fcoe, + uint8_t *eop, uint8_t *sop, uint8_t *rss_type, uint8_t *csum_not_calc, + uint32_t *rss_hash, uint16_t *bytes_written, uint8_t *packet_error, + uint8_t *vlan_stripped, uint16_t *vlan_tci, uint16_t *checksum, + uint8_t *fcoe_sof, uint8_t *fcoe_fc_crc_ok, uint8_t *fcoe_enc_error, + uint8_t *fcoe_eof, uint8_t *tcp_udp_csum_ok, uint8_t *udp, uint8_t *tcp, + uint8_t *ipv4_csum_ok, uint8_t *ipv6, uint8_t *ipv4, + uint8_t *ipv4_fragment, uint8_t *fcs_ok) +{ + uint16_t completed_index_flags; + uint16_t q_number_rss_type_flags; + uint16_t bytes_written_flags; + + cq_desc_dec((struct cq_desc *)desc, type, + color, q_number, completed_index); + + completed_index_flags = rte_le_to_cpu_16(desc->completed_index_flags); + q_number_rss_type_flags = + rte_le_to_cpu_16(desc->q_number_rss_type_flags); + bytes_written_flags = rte_le_to_cpu_16(desc->bytes_written_flags); + + *ingress_port = (completed_index_flags & + CQ_ENET_RQ_DESC_FLAGS_INGRESS_PORT) ? 1 : 0; + *fcoe = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_FCOE) ? + 1 : 0; + *eop = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_EOP) ? + 1 : 0; + *sop = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_SOP) ? + 1 : 0; + + *rss_type = (uint8_t)((q_number_rss_type_flags >> CQ_DESC_Q_NUM_BITS) & + CQ_ENET_RQ_DESC_RSS_TYPE_MASK); + *csum_not_calc = (q_number_rss_type_flags & + CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC) ? 1 : 0; + + *rss_hash = rte_le_to_cpu_32(desc->rss_hash); + + *bytes_written = bytes_written_flags & + CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; + *packet_error = (bytes_written_flags & + CQ_ENET_RQ_DESC_FLAGS_TRUNCATED) ? 1 : 0; + *vlan_stripped = (bytes_written_flags & + CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) ? 1 : 0; + + /* + * Tag Control Information(16) = user_priority(3) + cfi(1) + vlan(12) + */ + *vlan_tci = rte_le_to_cpu_16(desc->vlan); + + if (*fcoe) { + *fcoe_sof = (uint8_t)(rte_le_to_cpu_16(desc->checksum_fcoe) & + CQ_ENET_RQ_DESC_FCOE_SOF_MASK); + *fcoe_fc_crc_ok = (desc->flags & + CQ_ENET_RQ_DESC_FCOE_FC_CRC_OK) ? 1 : 0; + *fcoe_enc_error = (desc->flags & + CQ_ENET_RQ_DESC_FCOE_ENC_ERROR) ? 1 : 0; + *fcoe_eof = (uint8_t)((rte_le_to_cpu_16(desc->checksum_fcoe) >> + CQ_ENET_RQ_DESC_FCOE_EOF_SHIFT) & + CQ_ENET_RQ_DESC_FCOE_EOF_MASK); + *checksum = 0; + } else { + *fcoe_sof = 0; + *fcoe_fc_crc_ok = 0; + *fcoe_enc_error = 0; + *fcoe_eof = 0; + *checksum = rte_le_to_cpu_16(desc->checksum_fcoe); + } + + *tcp_udp_csum_ok = + (desc->flags & CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK) ? 1 : 0; + *udp = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_UDP) ? 1 : 0; + *tcp = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_TCP) ? 1 : 0; + *ipv4_csum_ok = + (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK) ? 1 : 0; + *ipv6 = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV6) ? 1 : 0; + *ipv4 = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4) ? 1 : 0; + *ipv4_fragment = + (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT) ? 1 : 0; + *fcs_ok = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_FCS_OK) ? 1 : 0; +} + +#endif /* _CQ_ENET_DESC_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/base/rq_enet_desc.h b/src/spdk/dpdk/drivers/net/enic/base/rq_enet_desc.h new file mode 100644 index 000000000..c79c0287b --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/rq_enet_desc.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _RQ_ENET_DESC_H_ +#define _RQ_ENET_DESC_H_ + +#include <rte_byteorder.h> + +/* Ethernet receive queue descriptor: 16B */ +struct rq_enet_desc { + uint64_t address; + uint16_t length_type; + uint8_t reserved[6]; +}; + +enum rq_enet_type_types { + RQ_ENET_TYPE_ONLY_SOP = 0, + RQ_ENET_TYPE_NOT_SOP = 1, + RQ_ENET_TYPE_RESV2 = 2, + RQ_ENET_TYPE_RESV3 = 3, +}; + +#define RQ_ENET_ADDR_BITS 64 +#define RQ_ENET_LEN_BITS 14 +#define RQ_ENET_LEN_MASK ((1 << RQ_ENET_LEN_BITS) - 1) +#define RQ_ENET_TYPE_BITS 2 +#define RQ_ENET_TYPE_MASK ((1 << RQ_ENET_TYPE_BITS) - 1) + +static inline void rq_enet_desc_enc(volatile struct rq_enet_desc *desc, + uint64_t address, uint8_t type, uint16_t length) +{ + desc->address = rte_cpu_to_le_64(address); + desc->length_type = rte_cpu_to_le_16((length & RQ_ENET_LEN_MASK) | + ((type & RQ_ENET_TYPE_MASK) << RQ_ENET_LEN_BITS)); +} + +static inline void rq_enet_desc_dec(struct rq_enet_desc *desc, + uint64_t *address, uint8_t *type, uint16_t *length) +{ + *address = rte_le_to_cpu_64(desc->address); + *length = rte_le_to_cpu_16(desc->length_type) & RQ_ENET_LEN_MASK; + *type = (uint8_t)((rte_le_to_cpu_16(desc->length_type) >> + RQ_ENET_LEN_BITS) & RQ_ENET_TYPE_MASK); +} + +#endif /* _RQ_ENET_DESC_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/base/vnic_cq.c b/src/spdk/dpdk/drivers/net/enic/base/vnic_cq.c new file mode 100644 index 000000000..51d6fd387 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/vnic_cq.c @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#include "vnic_dev.h" +#include "vnic_cq.h" +#include <rte_memzone.h> + +void vnic_cq_free(struct vnic_cq *cq) +{ + vnic_dev_free_desc_ring(cq->vdev, &cq->ring); + + cq->ctrl = NULL; +} + +int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index, + unsigned int socket_id, + unsigned int desc_count, unsigned int desc_size) +{ + int err; + char res_name[RTE_MEMZONE_NAMESIZE]; + static int instance; + + cq->index = index; + cq->vdev = vdev; + + cq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_CQ, index); + if (!cq->ctrl) { + pr_err("Failed to hook CQ[%u] resource\n", index); + return -EINVAL; + } + + snprintf(res_name, sizeof(res_name), "%d-cq-%u", instance++, index); + err = vnic_dev_alloc_desc_ring(vdev, &cq->ring, desc_count, desc_size, + socket_id, res_name); + if (err) + return err; + + return 0; +} + +void vnic_cq_init(struct vnic_cq *cq, unsigned int flow_control_enable, + unsigned int color_enable, unsigned int cq_head, unsigned int cq_tail, + unsigned int cq_tail_color, unsigned int interrupt_enable, + unsigned int cq_entry_enable, unsigned int cq_message_enable, + unsigned int interrupt_offset, uint64_t cq_message_addr) +{ + uint64_t paddr; + + paddr = (uint64_t)cq->ring.base_addr | VNIC_PADDR_TARGET; + writeq(paddr, &cq->ctrl->ring_base); + iowrite32(cq->ring.desc_count, &cq->ctrl->ring_size); + iowrite32(flow_control_enable, &cq->ctrl->flow_control_enable); + iowrite32(color_enable, &cq->ctrl->color_enable); + iowrite32(cq_head, &cq->ctrl->cq_head); + iowrite32(cq_tail, &cq->ctrl->cq_tail); + iowrite32(cq_tail_color, &cq->ctrl->cq_tail_color); + iowrite32(interrupt_enable, &cq->ctrl->interrupt_enable); + iowrite32(cq_entry_enable, &cq->ctrl->cq_entry_enable); + iowrite32(cq_message_enable, &cq->ctrl->cq_message_enable); + iowrite32(interrupt_offset, &cq->ctrl->interrupt_offset); + writeq(cq_message_addr, &cq->ctrl->cq_message_addr); + + cq->interrupt_offset = interrupt_offset; +} + +void vnic_cq_clean(struct vnic_cq *cq) +{ + cq->to_clean = 0; + cq->last_color = 0; + + iowrite32(0, &cq->ctrl->cq_head); + iowrite32(0, &cq->ctrl->cq_tail); + iowrite32(1, &cq->ctrl->cq_tail_color); + + vnic_dev_clear_desc_ring(&cq->ring); +} diff --git a/src/spdk/dpdk/drivers/net/enic/base/vnic_cq.h b/src/spdk/dpdk/drivers/net/enic/base/vnic_cq.h new file mode 100644 index 000000000..2e48759c4 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/vnic_cq.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _VNIC_CQ_H_ +#define _VNIC_CQ_H_ + +#include <rte_mbuf.h> + +#include "cq_desc.h" +#include "vnic_dev.h" + +/* Completion queue control */ +struct vnic_cq_ctrl { + uint64_t ring_base; /* 0x00 */ + uint32_t ring_size; /* 0x08 */ + uint32_t pad0; + uint32_t flow_control_enable; /* 0x10 */ + uint32_t pad1; + uint32_t color_enable; /* 0x18 */ + uint32_t pad2; + uint32_t cq_head; /* 0x20 */ + uint32_t pad3; + uint32_t cq_tail; /* 0x28 */ + uint32_t pad4; + uint32_t cq_tail_color; /* 0x30 */ + uint32_t pad5; + uint32_t interrupt_enable; /* 0x38 */ + uint32_t pad6; + uint32_t cq_entry_enable; /* 0x40 */ + uint32_t pad7; + uint32_t cq_message_enable; /* 0x48 */ + uint32_t pad8; + uint32_t interrupt_offset; /* 0x50 */ + uint32_t pad9; + uint64_t cq_message_addr; /* 0x58 */ + uint32_t pad10; +}; + +#ifdef ENIC_AIC +struct vnic_rx_bytes_counter { + unsigned int small_pkt_bytes_cnt; + unsigned int large_pkt_bytes_cnt; +}; +#endif + +struct vnic_cq { + unsigned int index; + struct vnic_dev *vdev; + struct vnic_cq_ctrl __iomem *ctrl; /* memory-mapped */ + struct vnic_dev_ring ring; + unsigned int to_clean; + unsigned int last_color; + unsigned int interrupt_offset; +#ifdef ENIC_AIC + struct vnic_rx_bytes_counter pkt_size_counter; + unsigned int cur_rx_coal_timeval; + unsigned int tobe_rx_coal_timeval; + ktime_t prev_ts; +#endif +}; + +void vnic_cq_free(struct vnic_cq *cq); +int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index, + unsigned int socket_id, + unsigned int desc_count, unsigned int desc_size); +void vnic_cq_init(struct vnic_cq *cq, unsigned int flow_control_enable, + unsigned int color_enable, unsigned int cq_head, unsigned int cq_tail, + unsigned int cq_tail_color, unsigned int interrupt_enable, + unsigned int cq_entry_enable, unsigned int message_enable, + unsigned int interrupt_offset, uint64_t message_addr); +void vnic_cq_clean(struct vnic_cq *cq); +int vnic_cq_mem_size(struct vnic_cq *cq, unsigned int desc_count, + unsigned int desc_size); + +#endif /* _VNIC_CQ_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/base/vnic_dev.c b/src/spdk/dpdk/drivers/net/enic/base/vnic_dev.c new file mode 100644 index 000000000..ac03817f4 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/vnic_dev.c @@ -0,0 +1,1216 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#include <rte_memzone.h> +#include <rte_memcpy.h> +#include <rte_string_fns.h> +#include <rte_ether.h> + +#include "vnic_dev.h" +#include "vnic_resource.h" +#include "vnic_devcmd.h" +#include "vnic_nic.h" +#include "vnic_stats.h" +#include "vnic_flowman.h" + + +enum vnic_proxy_type { + PROXY_NONE, + PROXY_BY_BDF, + PROXY_BY_INDEX, +}; + +struct vnic_res { + void __iomem *vaddr; + dma_addr_t bus_addr; + unsigned int count; +}; + +struct vnic_intr_coal_timer_info { + uint32_t mul; + uint32_t div; + uint32_t max_usec; +}; + +struct vnic_dev { + void *priv; + struct rte_pci_device *pdev; + struct vnic_res res[RES_TYPE_MAX]; + enum vnic_dev_intr_mode intr_mode; + struct vnic_devcmd __iomem *devcmd; + struct vnic_devcmd_notify *notify; + struct vnic_devcmd_notify notify_copy; + dma_addr_t notify_pa; + uint32_t notify_sz; + dma_addr_t linkstatus_pa; + struct vnic_stats *stats; + dma_addr_t stats_pa; + struct vnic_devcmd_fw_info *fw_info; + dma_addr_t fw_info_pa; + struct fm_info *flowman_info; + dma_addr_t flowman_info_pa; + enum vnic_proxy_type proxy; + uint32_t proxy_index; + uint64_t args[VNIC_DEVCMD_NARGS]; + int in_reset; + struct vnic_intr_coal_timer_info intr_coal_timer_info; + void *(*alloc_consistent)(void *priv, size_t size, + dma_addr_t *dma_handle, uint8_t *name); + void (*free_consistent)(void *priv, + size_t size, void *vaddr, + dma_addr_t dma_handle); +}; + +#define VNIC_MAX_RES_HDR_SIZE \ + (sizeof(struct vnic_resource_header) + \ + sizeof(struct vnic_resource) * RES_TYPE_MAX) +#define VNIC_RES_STRIDE 128 + +void *vnic_dev_priv(struct vnic_dev *vdev) +{ + return vdev->priv; +} + +void vnic_register_cbacks(struct vnic_dev *vdev, + void *(*alloc_consistent)(void *priv, size_t size, + dma_addr_t *dma_handle, uint8_t *name), + void (*free_consistent)(void *priv, + size_t size, void *vaddr, + dma_addr_t dma_handle)) +{ + vdev->alloc_consistent = alloc_consistent; + vdev->free_consistent = free_consistent; +} + +static int vnic_dev_discover_res(struct vnic_dev *vdev, + struct vnic_dev_bar *bar, unsigned int num_bars) +{ + struct vnic_resource_header __iomem *rh; + struct mgmt_barmap_hdr __iomem *mrh; + struct vnic_resource __iomem *r; + uint8_t type; + + if (num_bars == 0) + return -EINVAL; + + if (bar->len < VNIC_MAX_RES_HDR_SIZE) { + pr_err("vNIC BAR0 res hdr length error\n"); + return -EINVAL; + } + + rh = bar->vaddr; + mrh = bar->vaddr; + if (!rh) { + pr_err("vNIC BAR0 res hdr not mem-mapped\n"); + return -EINVAL; + } + + /* Check for mgmt vnic in addition to normal vnic */ + if ((ioread32(&rh->magic) != VNIC_RES_MAGIC) || + (ioread32(&rh->version) != VNIC_RES_VERSION)) { + if ((ioread32(&mrh->magic) != MGMTVNIC_MAGIC) || + (ioread32(&mrh->version) != MGMTVNIC_VERSION)) { + pr_err("vNIC BAR0 res magic/version error " \ + "exp (%lx/%lx) or (%lx/%lx), curr (%x/%x)\n", + VNIC_RES_MAGIC, VNIC_RES_VERSION, + MGMTVNIC_MAGIC, MGMTVNIC_VERSION, + ioread32(&rh->magic), ioread32(&rh->version)); + return -EINVAL; + } + } + + if (ioread32(&mrh->magic) == MGMTVNIC_MAGIC) + r = (struct vnic_resource __iomem *)(mrh + 1); + else + r = (struct vnic_resource __iomem *)(rh + 1); + + + while ((type = ioread8(&r->type)) != RES_TYPE_EOL) { + uint8_t bar_num = ioread8(&r->bar); + uint32_t bar_offset = ioread32(&r->bar_offset); + uint32_t count = ioread32(&r->count); + uint32_t len; + + r++; + + if (bar_num >= num_bars) + continue; + + if (!bar[bar_num].len || !bar[bar_num].vaddr) + continue; + + switch (type) { + case RES_TYPE_WQ: + case RES_TYPE_RQ: + case RES_TYPE_CQ: + case RES_TYPE_INTR_CTRL: + /* each count is stride bytes long */ + len = count * VNIC_RES_STRIDE; + if (len + bar_offset > bar[bar_num].len) { + pr_err("vNIC BAR0 resource %d " \ + "out-of-bounds, offset 0x%x + " \ + "size 0x%x > bar len 0x%lx\n", + type, bar_offset, + len, + bar[bar_num].len); + return -EINVAL; + } + break; + case RES_TYPE_INTR_PBA_LEGACY: + case RES_TYPE_DEVCMD: + len = count; + break; + default: + continue; + } + + vdev->res[type].count = count; + vdev->res[type].vaddr = (char __iomem *)bar[bar_num].vaddr + + bar_offset; + vdev->res[type].bus_addr = bar[bar_num].bus_addr + bar_offset; + } + + return 0; +} + +unsigned int vnic_dev_get_res_count(struct vnic_dev *vdev, + enum vnic_res_type type) +{ + return vdev->res[type].count; +} + +void __iomem *vnic_dev_get_res(struct vnic_dev *vdev, enum vnic_res_type type, + unsigned int index) +{ + if (!vdev->res[type].vaddr) + return NULL; + + switch (type) { + case RES_TYPE_WQ: + case RES_TYPE_RQ: + case RES_TYPE_CQ: + case RES_TYPE_INTR_CTRL: + return (char __iomem *)vdev->res[type].vaddr + + index * VNIC_RES_STRIDE; + default: + return (char __iomem *)vdev->res[type].vaddr; + } +} + +unsigned int vnic_dev_desc_ring_size(struct vnic_dev_ring *ring, + unsigned int desc_count, unsigned int desc_size) +{ + /* The base address of the desc rings must be 512 byte aligned. + * Descriptor count is aligned to groups of 32 descriptors. A + * count of 0 means the maximum 4096 descriptors. Descriptor + * size is aligned to 16 bytes. + */ + + unsigned int count_align = 32; + unsigned int desc_align = 16; + + ring->base_align = 512; + + if (desc_count == 0) + desc_count = 4096; + + ring->desc_count = VNIC_ALIGN(desc_count, count_align); + + ring->desc_size = VNIC_ALIGN(desc_size, desc_align); + + ring->size = ring->desc_count * ring->desc_size; + ring->size_unaligned = ring->size + ring->base_align; + + return ring->size_unaligned; +} + +void vnic_dev_clear_desc_ring(struct vnic_dev_ring *ring) +{ + memset(ring->descs, 0, ring->size); +} + +int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev, + struct vnic_dev_ring *ring, + unsigned int desc_count, unsigned int desc_size, + __rte_unused unsigned int socket_id, + char *z_name) +{ + void *alloc_addr; + dma_addr_t alloc_pa = 0; + + vnic_dev_desc_ring_size(ring, desc_count, desc_size); + alloc_addr = vdev->alloc_consistent(vdev->priv, + ring->size_unaligned, + &alloc_pa, (uint8_t *)z_name); + if (!alloc_addr) { + pr_err("Failed to allocate ring (size=%d), aborting\n", + (int)ring->size); + return -ENOMEM; + } + ring->descs_unaligned = alloc_addr; + if (!alloc_pa) { + pr_err("Failed to map allocated ring (size=%d), aborting\n", + (int)ring->size); + vdev->free_consistent(vdev->priv, + ring->size_unaligned, + alloc_addr, + alloc_pa); + return -ENOMEM; + } + ring->base_addr_unaligned = alloc_pa; + + ring->base_addr = VNIC_ALIGN(ring->base_addr_unaligned, + ring->base_align); + ring->descs = (uint8_t *)ring->descs_unaligned + + (ring->base_addr - ring->base_addr_unaligned); + + vnic_dev_clear_desc_ring(ring); + + ring->desc_avail = ring->desc_count - 1; + + return 0; +} + +void vnic_dev_free_desc_ring(__rte_unused struct vnic_dev *vdev, + struct vnic_dev_ring *ring) +{ + if (ring->descs) { + vdev->free_consistent(vdev->priv, + ring->size_unaligned, + ring->descs_unaligned, + ring->base_addr_unaligned); + ring->descs = NULL; + } +} + +static int _vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, + int wait) +{ + struct vnic_devcmd __iomem *devcmd = vdev->devcmd; + unsigned int i; + int delay; + uint32_t status; + int err; + + status = ioread32(&devcmd->status); + if (status == 0xFFFFFFFF) { + /* PCI-e target device is gone */ + return -ENODEV; + } + if (status & STAT_BUSY) { + + pr_err("Busy devcmd %d\n", _CMD_N(cmd)); + return -EBUSY; + } + + if (_CMD_DIR(cmd) & _CMD_DIR_WRITE) { + for (i = 0; i < VNIC_DEVCMD_NARGS; i++) + writeq(vdev->args[i], &devcmd->args[i]); + rte_wmb(); /* complete all writes initiated till now */ + } + + iowrite32(cmd, &devcmd->cmd); + + if ((_CMD_FLAGS(cmd) & _CMD_FLAGS_NOWAIT)) + return 0; + + for (delay = 0; delay < wait; delay++) { + + usleep(100); + + status = ioread32(&devcmd->status); + if (status == 0xFFFFFFFF) { + /* PCI-e target device is gone */ + return -ENODEV; + } + + if (!(status & STAT_BUSY)) { + if (status & STAT_ERROR) { + err = -(int)readq(&devcmd->args[0]); + if (cmd != CMD_CAPABILITY && + cmd != CMD_OVERLAY_OFFLOAD_CTRL && + cmd != CMD_GET_SUPP_FEATURE_VER) + pr_err("Devcmd %d failed " \ + "with error code %d\n", + _CMD_N(cmd), err); + return err; + } + + if (_CMD_DIR(cmd) & _CMD_DIR_READ) { + rte_rmb();/* finish all reads */ + for (i = 0; i < VNIC_DEVCMD_NARGS; i++) + vdev->args[i] = readq(&devcmd->args[i]); + } + + return 0; + } + } + + pr_err("Timedout devcmd %d\n", _CMD_N(cmd)); + return -ETIMEDOUT; +} + +static int vnic_dev_cmd_proxy(struct vnic_dev *vdev, + enum vnic_devcmd_cmd proxy_cmd, enum vnic_devcmd_cmd cmd, + uint64_t *args, int nargs, int wait) +{ + uint32_t status; + int err; + + /* + * Proxy command consumes 2 arguments. One for proxy index, + * the other is for command to be proxied + */ + if (nargs > VNIC_DEVCMD_NARGS - 2) { + pr_err("number of args %d exceeds the maximum\n", nargs); + return -EINVAL; + } + memset(vdev->args, 0, sizeof(vdev->args)); + + vdev->args[0] = vdev->proxy_index; + vdev->args[1] = cmd; + memcpy(&vdev->args[2], args, nargs * sizeof(args[0])); + + err = _vnic_dev_cmd(vdev, proxy_cmd, wait); + if (err) + return err; + + status = (uint32_t)vdev->args[0]; + if (status & STAT_ERROR) { + err = (int)vdev->args[1]; + if (err != ERR_ECMDUNKNOWN || + cmd != CMD_CAPABILITY) + pr_err("Error %d proxy devcmd %d\n", err, _CMD_N(cmd)); + return err; + } + + memcpy(args, &vdev->args[1], nargs * sizeof(args[0])); + + return 0; +} + +static int vnic_dev_cmd_no_proxy(struct vnic_dev *vdev, + enum vnic_devcmd_cmd cmd, uint64_t *args, int nargs, int wait) +{ + int err; + + if (nargs > VNIC_DEVCMD_NARGS) { + pr_err("number of args %d exceeds the maximum\n", nargs); + return -EINVAL; + } + memset(vdev->args, 0, sizeof(vdev->args)); + memcpy(vdev->args, args, nargs * sizeof(args[0])); + + err = _vnic_dev_cmd(vdev, cmd, wait); + + memcpy(args, vdev->args, nargs * sizeof(args[0])); + + return err; +} + +int vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, + uint64_t *a0, uint64_t *a1, int wait) +{ + uint64_t args[2]; + int err; + + args[0] = *a0; + args[1] = *a1; + memset(vdev->args, 0, sizeof(vdev->args)); + + switch (vdev->proxy) { + case PROXY_BY_INDEX: + err = vnic_dev_cmd_proxy(vdev, CMD_PROXY_BY_INDEX, cmd, + args, ARRAY_SIZE(args), wait); + break; + case PROXY_BY_BDF: + err = vnic_dev_cmd_proxy(vdev, CMD_PROXY_BY_BDF, cmd, + args, ARRAY_SIZE(args), wait); + break; + case PROXY_NONE: + default: + err = vnic_dev_cmd_no_proxy(vdev, cmd, args, 2, wait); + break; + } + + if (err == 0) { + *a0 = args[0]; + *a1 = args[1]; + } + + return err; +} + +int vnic_dev_cmd_args(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, + uint64_t *args, int nargs, int wait) +{ + switch (vdev->proxy) { + case PROXY_BY_INDEX: + return vnic_dev_cmd_proxy(vdev, CMD_PROXY_BY_INDEX, cmd, + args, nargs, wait); + case PROXY_BY_BDF: + return vnic_dev_cmd_proxy(vdev, CMD_PROXY_BY_BDF, cmd, + args, nargs, wait); + case PROXY_NONE: + default: + return vnic_dev_cmd_no_proxy(vdev, cmd, args, nargs, wait); + } +} + +int vnic_dev_fw_info(struct vnic_dev *vdev, + struct vnic_devcmd_fw_info **fw_info) +{ + char name[RTE_MEMZONE_NAMESIZE]; + uint64_t a0, a1 = 0; + int wait = 1000; + int err = 0; + static uint32_t instance; + + if (!vdev->fw_info) { + snprintf((char *)name, sizeof(name), "vnic_fw_info-%u", + instance++); + vdev->fw_info = vdev->alloc_consistent(vdev->priv, + sizeof(struct vnic_devcmd_fw_info), + &vdev->fw_info_pa, (uint8_t *)name); + if (!vdev->fw_info) + return -ENOMEM; + a0 = vdev->fw_info_pa; + a1 = sizeof(struct vnic_devcmd_fw_info); + err = vnic_dev_cmd(vdev, CMD_MCPU_FW_INFO, + &a0, &a1, wait); + } + *fw_info = vdev->fw_info; + return err; +} + +static int vnic_dev_advanced_filters_cap(struct vnic_dev *vdev, uint64_t *args, + int nargs) +{ + memset(args, 0, nargs * sizeof(*args)); + args[0] = CMD_ADD_ADV_FILTER; + args[1] = FILTER_CAP_MODE_V1_FLAG; + return vnic_dev_cmd_args(vdev, CMD_CAPABILITY, args, nargs, 1000); +} + +int vnic_dev_capable_adv_filters(struct vnic_dev *vdev) +{ + uint64_t a0 = CMD_ADD_ADV_FILTER, a1 = 0; + int wait = 1000; + int err; + + err = vnic_dev_cmd(vdev, CMD_CAPABILITY, &a0, &a1, wait); + if (err) + return 0; + return (a1 >= (uint32_t)FILTER_DPDK_1); +} + +int vnic_dev_flowman_cmd(struct vnic_dev *vdev, uint64_t *args, int nargs) +{ + int wait = 1000; + + return vnic_dev_cmd_args(vdev, CMD_FLOW_MANAGER_OP, args, nargs, wait); +} + +static int vnic_dev_flowman_enable(struct vnic_dev *vdev, uint32_t *mode, + uint8_t *filter_actions) +{ + char name[RTE_MEMZONE_NAMESIZE]; + uint64_t args[3]; + uint64_t ops; + static uint32_t instance; + + /* flowman devcmd available? */ + if (!vnic_dev_capable(vdev, CMD_FLOW_MANAGER_OP)) + return 0; + /* Have the version we are using? */ + args[0] = FM_API_VERSION_QUERY; + if (vnic_dev_flowman_cmd(vdev, args, 1)) + return 0; + if ((args[0] & (1ULL << FM_VERSION)) == 0) + return 0; + /* Select the version */ + args[0] = FM_API_VERSION_SELECT; + args[1] = FM_VERSION; + if (vnic_dev_flowman_cmd(vdev, args, 2)) + return 0; + /* Can we get fm_info? */ + if (!vdev->flowman_info) { + snprintf((char *)name, sizeof(name), "vnic_fm_info-%u", + instance++); + vdev->flowman_info = vdev->alloc_consistent(vdev->priv, + sizeof(struct fm_info), + &vdev->flowman_info_pa, (uint8_t *)name); + if (!vdev->flowman_info) + return 0; + } + args[0] = FM_INFO_QUERY; + args[1] = vdev->flowman_info_pa; + args[2] = sizeof(struct fm_info); + if (vnic_dev_flowman_cmd(vdev, args, 3)) + return 0; + /* Have required operations? */ + ops = (1ULL << FMOP_END) | + (1ULL << FMOP_DROP) | + (1ULL << FMOP_RQ_STEER) | + (1ULL << FMOP_EXACT_MATCH) | + (1ULL << FMOP_MARK) | + (1ULL << FMOP_TAG) | + (1ULL << FMOP_EG_HAIRPIN) | + (1ULL << FMOP_ENCAP) | + (1ULL << FMOP_DECAP_NOSTRIP); + if ((vdev->flowman_info->fm_op_mask & ops) != ops) + return 0; + /* Good to use flowman now */ + *mode = FILTER_FLOWMAN; + *filter_actions = FILTER_ACTION_RQ_STEERING_FLAG | + FILTER_ACTION_FILTER_ID_FLAG | + FILTER_ACTION_COUNTER_FLAG | + FILTER_ACTION_DROP_FLAG; + return 1; +} + +/* Determine the "best" filtering mode VIC is capaible of. Returns one of 4 + * value or 0 on error: + * FILTER_FLOWMAN- flowman api capable + * FILTER_DPDK_1- advanced filters availabile + * FILTER_USNIC_IP_FLAG - advanced filters but with the restriction that + * the IP layer must explicitly specified. I.e. cannot have a UDP + * filter that matches both IPv4 and IPv6. + * FILTER_IPV4_5TUPLE - fallback if either of the 2 above aren't available. + * all other filter types are not available. + * Retrun true in filter_tags if supported + */ +int vnic_dev_capable_filter_mode(struct vnic_dev *vdev, uint32_t *mode, + uint8_t *filter_actions) +{ + uint64_t args[4]; + int err; + uint32_t max_level = 0; + + /* If flowman is available, use it as it is the most capable API */ + if (vnic_dev_flowman_enable(vdev, mode, filter_actions)) + return 0; + + err = vnic_dev_advanced_filters_cap(vdev, args, 4); + + /* determine supported filter actions */ + *filter_actions = FILTER_ACTION_RQ_STEERING_FLAG; /* always available */ + if (args[2] == FILTER_CAP_MODE_V1) + *filter_actions = args[3]; + + if (err || ((args[0] == 1) && (args[1] == 0))) { + /* Adv filter Command not supported or adv filters available but + * not enabled. Try the normal filter capability command. + */ + args[0] = CMD_ADD_FILTER; + args[1] = 0; + err = vnic_dev_cmd_args(vdev, CMD_CAPABILITY, args, 2, 1000); + if (err) + return err; + max_level = args[1]; + goto parse_max_level; + } else if (args[2] == FILTER_CAP_MODE_V1) { + /* parse filter capability mask in args[1] */ + if (args[1] & FILTER_DPDK_1_FLAG) + *mode = FILTER_DPDK_1; + else if (args[1] & FILTER_USNIC_IP_FLAG) + *mode = FILTER_USNIC_IP; + else if (args[1] & FILTER_IPV4_5TUPLE_FLAG) + *mode = FILTER_IPV4_5TUPLE; + return 0; + } + max_level = args[1]; +parse_max_level: + if (max_level >= (uint32_t)FILTER_USNIC_IP) + *mode = FILTER_USNIC_IP; + else + *mode = FILTER_IPV4_5TUPLE; + return 0; +} + +void vnic_dev_capable_udp_rss_weak(struct vnic_dev *vdev, bool *cfg_chk, + bool *weak) +{ + uint64_t a0 = CMD_NIC_CFG, a1 = 0; + int wait = 1000; + int err; + + *cfg_chk = false; + *weak = false; + err = vnic_dev_cmd(vdev, CMD_CAPABILITY, &a0, &a1, wait); + if (err == 0 && a0 != 0 && a1 != 0) { + *cfg_chk = true; + *weak = !!((a1 >> 32) & CMD_NIC_CFG_CAPF_UDP_WEAK); + } +} + +int vnic_dev_capable(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd) +{ + uint64_t a0 = (uint32_t)cmd, a1 = 0; + int wait = 1000; + int err; + + err = vnic_dev_cmd(vdev, CMD_CAPABILITY, &a0, &a1, wait); + + return !(err || a0); +} + +int vnic_dev_spec(struct vnic_dev *vdev, unsigned int offset, size_t size, + void *value) +{ + uint64_t a0, a1; + int wait = 1000; + int err; + + a0 = offset; + a1 = size; + + err = vnic_dev_cmd(vdev, CMD_DEV_SPEC, &a0, &a1, wait); + + switch (size) { + case 1: + *(uint8_t *)value = (uint8_t)a0; + break; + case 2: + *(uint16_t *)value = (uint16_t)a0; + break; + case 4: + *(uint32_t *)value = (uint32_t)a0; + break; + case 8: + *(uint64_t *)value = a0; + break; + default: + BUG(); + break; + } + + return err; +} + +int vnic_dev_stats_clear(struct vnic_dev *vdev) +{ + uint64_t a0 = 0, a1 = 0; + int wait = 1000; + + return vnic_dev_cmd(vdev, CMD_STATS_CLEAR, &a0, &a1, wait); +} + +int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats) +{ + uint64_t a0, a1; + int wait = 1000; + + if (!vdev->stats) + return -ENOMEM; + + *stats = vdev->stats; + a0 = vdev->stats_pa; + a1 = sizeof(struct vnic_stats); + + return vnic_dev_cmd(vdev, CMD_STATS_DUMP, &a0, &a1, wait); +} + +int vnic_dev_close(struct vnic_dev *vdev) +{ + uint64_t a0 = 0, a1 = 0; + int wait = 1000; + + return vnic_dev_cmd(vdev, CMD_CLOSE, &a0, &a1, wait); +} + +int vnic_dev_enable_wait(struct vnic_dev *vdev) +{ + uint64_t a0 = 0, a1 = 0; + int wait = 1000; + + if (vnic_dev_capable(vdev, CMD_ENABLE_WAIT)) + return vnic_dev_cmd(vdev, CMD_ENABLE_WAIT, &a0, &a1, wait); + else + return vnic_dev_cmd(vdev, CMD_ENABLE, &a0, &a1, wait); +} + +int vnic_dev_disable(struct vnic_dev *vdev) +{ + uint64_t a0 = 0, a1 = 0; + int wait = 1000; + + return vnic_dev_cmd(vdev, CMD_DISABLE, &a0, &a1, wait); +} + +int vnic_dev_open(struct vnic_dev *vdev, int arg) +{ + uint64_t a0 = (uint32_t)arg, a1 = 0; + int wait = 1000; + + return vnic_dev_cmd(vdev, CMD_OPEN, &a0, &a1, wait); +} + +int vnic_dev_open_done(struct vnic_dev *vdev, int *done) +{ + uint64_t a0 = 0, a1 = 0; + int wait = 1000; + int err; + + *done = 0; + + err = vnic_dev_cmd(vdev, CMD_OPEN_STATUS, &a0, &a1, wait); + if (err) + return err; + + *done = (a0 == 0); + + return 0; +} + +int vnic_dev_get_mac_addr(struct vnic_dev *vdev, uint8_t *mac_addr) +{ + uint64_t a0 = 0, a1 = 0; + int wait = 1000; + int err, i; + + for (i = 0; i < RTE_ETHER_ADDR_LEN; i++) + mac_addr[i] = 0; + + err = vnic_dev_cmd(vdev, CMD_GET_MAC_ADDR, &a0, &a1, wait); + if (err) + return err; + + for (i = 0; i < RTE_ETHER_ADDR_LEN; i++) + mac_addr[i] = ((uint8_t *)&a0)[i]; + + return 0; +} + +int vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast, + int broadcast, int promisc, int allmulti) +{ + uint64_t a0, a1 = 0; + int wait = 1000; + int err; + + a0 = (directed ? CMD_PFILTER_DIRECTED : 0) | + (multicast ? CMD_PFILTER_MULTICAST : 0) | + (broadcast ? CMD_PFILTER_BROADCAST : 0) | + (promisc ? CMD_PFILTER_PROMISCUOUS : 0) | + (allmulti ? CMD_PFILTER_ALL_MULTICAST : 0); + + err = vnic_dev_cmd(vdev, CMD_PACKET_FILTER, &a0, &a1, wait); + if (err) + pr_err("Can't set packet filter\n"); + + return err; +} + +int vnic_dev_add_addr(struct vnic_dev *vdev, uint8_t *addr) +{ + uint64_t a0 = 0, a1 = 0; + int wait = 1000; + int err; + int i; + + for (i = 0; i < RTE_ETHER_ADDR_LEN; i++) + ((uint8_t *)&a0)[i] = addr[i]; + + err = vnic_dev_cmd(vdev, CMD_ADDR_ADD, &a0, &a1, wait); + if (err) + pr_err("Can't add addr [%02x:%02x:%02x:%02x:%02x:%02x], %d\n", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], + err); + + return err; +} + +int vnic_dev_del_addr(struct vnic_dev *vdev, uint8_t *addr) +{ + uint64_t a0 = 0, a1 = 0; + int wait = 1000; + int err; + int i; + + for (i = 0; i < RTE_ETHER_ADDR_LEN; i++) + ((uint8_t *)&a0)[i] = addr[i]; + + err = vnic_dev_cmd(vdev, CMD_ADDR_DEL, &a0, &a1, wait); + if (err) + pr_err("Can't del addr [%02x:%02x:%02x:%02x:%02x:%02x], %d\n", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], + err); + + return err; +} + +int vnic_dev_set_ig_vlan_rewrite_mode(struct vnic_dev *vdev, + uint8_t ig_vlan_rewrite_mode) +{ + uint64_t a0 = ig_vlan_rewrite_mode, a1 = 0; + int wait = 1000; + + if (vnic_dev_capable(vdev, CMD_IG_VLAN_REWRITE_MODE)) + return vnic_dev_cmd(vdev, CMD_IG_VLAN_REWRITE_MODE, + &a0, &a1, wait); + else + return 0; +} + +void vnic_dev_set_reset_flag(struct vnic_dev *vdev, int state) +{ + vdev->in_reset = state; +} + +static inline int vnic_dev_in_reset(struct vnic_dev *vdev) +{ + return vdev->in_reset; +} + +int vnic_dev_notify_setcmd(struct vnic_dev *vdev, + void *notify_addr, dma_addr_t notify_pa, uint16_t intr) +{ + uint64_t a0, a1; + int wait = 1000; + int r; + + memset(notify_addr, 0, sizeof(struct vnic_devcmd_notify)); + if (!vnic_dev_in_reset(vdev)) { + vdev->notify = notify_addr; + vdev->notify_pa = notify_pa; + } + + a0 = (uint64_t)notify_pa; + a1 = ((uint64_t)intr << 32) & 0x0000ffff00000000ULL; + a1 += sizeof(struct vnic_devcmd_notify); + + r = vnic_dev_cmd(vdev, CMD_NOTIFY, &a0, &a1, wait); + if (!vnic_dev_in_reset(vdev)) + vdev->notify_sz = (r == 0) ? (uint32_t)a1 : 0; + + return r; +} + +int vnic_dev_notify_set(struct vnic_dev *vdev, uint16_t intr) +{ + void *notify_addr = NULL; + dma_addr_t notify_pa = 0; + char name[RTE_MEMZONE_NAMESIZE]; + static uint32_t instance; + + if (vdev->notify || vdev->notify_pa) { + return vnic_dev_notify_setcmd(vdev, vdev->notify, + vdev->notify_pa, intr); + } + if (!vnic_dev_in_reset(vdev)) { + snprintf((char *)name, sizeof(name), + "vnic_notify-%u", instance++); + notify_addr = vdev->alloc_consistent(vdev->priv, + sizeof(struct vnic_devcmd_notify), + ¬ify_pa, (uint8_t *)name); + if (!notify_addr) + return -ENOMEM; + } + + return vnic_dev_notify_setcmd(vdev, notify_addr, notify_pa, intr); +} + +int vnic_dev_notify_unsetcmd(struct vnic_dev *vdev) +{ + uint64_t a0, a1; + int wait = 1000; + int err; + + a0 = 0; /* paddr = 0 to unset notify buffer */ + a1 = 0x0000ffff00000000ULL; /* intr num = -1 to unreg for intr */ + a1 += sizeof(struct vnic_devcmd_notify); + + err = vnic_dev_cmd(vdev, CMD_NOTIFY, &a0, &a1, wait); + if (!vnic_dev_in_reset(vdev)) { + vdev->notify = NULL; + vdev->notify_pa = 0; + vdev->notify_sz = 0; + } + + return err; +} + +int vnic_dev_notify_unset(struct vnic_dev *vdev) +{ + if (vdev->notify && !vnic_dev_in_reset(vdev)) { + vdev->free_consistent(vdev->priv, + sizeof(struct vnic_devcmd_notify), + vdev->notify, + vdev->notify_pa); + } + + return vnic_dev_notify_unsetcmd(vdev); +} + +static int vnic_dev_notify_ready(struct vnic_dev *vdev) +{ + uint32_t *words; + unsigned int nwords = vdev->notify_sz / 4; + unsigned int i; + uint32_t csum; + + if (!vdev->notify || !vdev->notify_sz) + return 0; + + do { + csum = 0; + rte_memcpy(&vdev->notify_copy, vdev->notify, vdev->notify_sz); + words = (uint32_t *)&vdev->notify_copy; + for (i = 1; i < nwords; i++) + csum += words[i]; + } while (csum != words[0]); + + return 1; +} + +int vnic_dev_init(struct vnic_dev *vdev, int arg) +{ + uint64_t a0 = (uint32_t)arg, a1 = 0; + int wait = 1000; + int r = 0; + + if (vnic_dev_capable(vdev, CMD_INIT)) + r = vnic_dev_cmd(vdev, CMD_INIT, &a0, &a1, wait); + else { + vnic_dev_cmd(vdev, CMD_INIT_v1, &a0, &a1, wait); + if (a0 & CMD_INITF_DEFAULT_MAC) { + /* Emulate these for old CMD_INIT_v1 which + * didn't pass a0 so no CMD_INITF_*. + */ + vnic_dev_cmd(vdev, CMD_GET_MAC_ADDR, &a0, &a1, wait); + vnic_dev_cmd(vdev, CMD_ADDR_ADD, &a0, &a1, wait); + } + } + return r; +} + +void vnic_dev_intr_coal_timer_info_default(struct vnic_dev *vdev) +{ + /* Default: hardware intr coal timer is in units of 1.5 usecs */ + vdev->intr_coal_timer_info.mul = 2; + vdev->intr_coal_timer_info.div = 3; + vdev->intr_coal_timer_info.max_usec = + vnic_dev_intr_coal_timer_hw_to_usec(vdev, 0xffff); +} + +int vnic_dev_link_status(struct vnic_dev *vdev) +{ + if (!vnic_dev_notify_ready(vdev)) + return 0; + + return vdev->notify_copy.link_state; +} + +uint32_t vnic_dev_port_speed(struct vnic_dev *vdev) +{ + if (!vnic_dev_notify_ready(vdev)) + return 0; + + return vdev->notify_copy.port_speed; +} + +uint32_t vnic_dev_intr_coal_timer_usec_to_hw(struct vnic_dev *vdev, + uint32_t usec) +{ + return (usec * vdev->intr_coal_timer_info.mul) / + vdev->intr_coal_timer_info.div; +} + +uint32_t vnic_dev_intr_coal_timer_hw_to_usec(struct vnic_dev *vdev, + uint32_t hw_cycles) +{ + return (hw_cycles * vdev->intr_coal_timer_info.div) / + vdev->intr_coal_timer_info.mul; +} + +uint32_t vnic_dev_get_intr_coal_timer_max(struct vnic_dev *vdev) +{ + return vdev->intr_coal_timer_info.max_usec; +} + +int vnic_dev_alloc_stats_mem(struct vnic_dev *vdev) +{ + char name[RTE_MEMZONE_NAMESIZE]; + static uint32_t instance; + + snprintf((char *)name, sizeof(name), "vnic_stats-%u", instance++); + vdev->stats = vdev->alloc_consistent(vdev->priv, + sizeof(struct vnic_stats), + &vdev->stats_pa, (uint8_t *)name); + return vdev->stats == NULL ? -ENOMEM : 0; +} + +void vnic_dev_unregister(struct vnic_dev *vdev) +{ + if (vdev) { + if (vdev->notify) + vdev->free_consistent(vdev->priv, + sizeof(struct vnic_devcmd_notify), + vdev->notify, + vdev->notify_pa); + if (vdev->stats) + vdev->free_consistent(vdev->priv, + sizeof(struct vnic_stats), + vdev->stats, vdev->stats_pa); + if (vdev->flowman_info) + vdev->free_consistent(vdev->priv, + sizeof(struct fm_info), + vdev->flowman_info, vdev->flowman_info_pa); + if (vdev->fw_info) + vdev->free_consistent(vdev->priv, + sizeof(struct vnic_devcmd_fw_info), + vdev->fw_info, vdev->fw_info_pa); + rte_free(vdev); + } +} + +struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev, + void *priv, struct rte_pci_device *pdev, struct vnic_dev_bar *bar, + unsigned int num_bars) +{ + if (!vdev) { + char name[RTE_MEMZONE_NAMESIZE]; + snprintf((char *)name, sizeof(name), "%s-vnic", + pdev->device.name); + vdev = (struct vnic_dev *)rte_zmalloc_socket(name, + sizeof(struct vnic_dev), + RTE_CACHE_LINE_SIZE, + pdev->device.numa_node); + if (!vdev) + return NULL; + } + + vdev->priv = priv; + vdev->pdev = pdev; + + if (vnic_dev_discover_res(vdev, bar, num_bars)) + goto err_out; + + vdev->devcmd = vnic_dev_get_res(vdev, RES_TYPE_DEVCMD, 0); + if (!vdev->devcmd) + goto err_out; + + return vdev; + +err_out: + vnic_dev_unregister(vdev); + return NULL; +} + +/* + * vnic_dev_classifier: Add/Delete classifier entries + * @vdev: vdev of the device + * @cmd: CLSF_ADD for Add filter + * CLSF_DEL for Delete filter + * @entry: In case of ADD filter, the caller passes the RQ number in this + * variable. + * This function stores the filter_id returned by the + * firmware in the same variable before return; + * + * In case of DEL filter, the caller passes the RQ number. Return + * value is irrelevant. + * @data: filter data + * @action: action data + */ +int vnic_dev_classifier(struct vnic_dev *vdev, uint8_t cmd, uint16_t *entry, + struct filter_v2 *data, struct filter_action_v2 *action_v2) +{ + uint64_t a0 = 0, a1 = 0; + int wait = 1000; + dma_addr_t tlv_pa; + int ret = -EINVAL; + struct filter_tlv *tlv, *tlv_va; + uint64_t tlv_size; + uint32_t filter_size, action_size; + static unsigned int unique_id; + char z_name[RTE_MEMZONE_NAMESIZE]; + enum vnic_devcmd_cmd dev_cmd; + + if (cmd == CLSF_ADD) { + dev_cmd = (data->type >= FILTER_DPDK_1) ? + CMD_ADD_ADV_FILTER : CMD_ADD_FILTER; + + filter_size = vnic_filter_size(data); + action_size = vnic_action_size(action_v2); + + tlv_size = filter_size + action_size + + 2*sizeof(struct filter_tlv); + snprintf((char *)z_name, sizeof(z_name), + "vnic_clsf_%u", unique_id++); + tlv_va = vdev->alloc_consistent(vdev->priv, + tlv_size, &tlv_pa, (uint8_t *)z_name); + if (!tlv_va) + return -ENOMEM; + tlv = tlv_va; + a0 = tlv_pa; + a1 = tlv_size; + memset(tlv, 0, tlv_size); + tlv->type = CLSF_TLV_FILTER; + tlv->length = filter_size; + memcpy(&tlv->val, (void *)data, filter_size); + + tlv = (struct filter_tlv *)((char *)tlv + + sizeof(struct filter_tlv) + + filter_size); + + tlv->type = CLSF_TLV_ACTION; + tlv->length = action_size; + memcpy(&tlv->val, (void *)action_v2, action_size); + ret = vnic_dev_cmd(vdev, dev_cmd, &a0, &a1, wait); + *entry = (uint16_t)a0; + vdev->free_consistent(vdev->priv, tlv_size, tlv_va, tlv_pa); + } else if (cmd == CLSF_DEL) { + a0 = *entry; + ret = vnic_dev_cmd(vdev, CMD_DEL_FILTER, &a0, &a1, wait); + } + + return ret; +} + +int vnic_dev_overlay_offload_ctrl(struct vnic_dev *vdev, uint8_t overlay, + uint8_t config) +{ + uint64_t a0 = overlay; + uint64_t a1 = config; + int wait = 1000; + + return vnic_dev_cmd(vdev, CMD_OVERLAY_OFFLOAD_CTRL, &a0, &a1, wait); +} + +int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, uint8_t overlay, + uint16_t vxlan_udp_port_number) +{ + uint64_t a1 = vxlan_udp_port_number; + uint64_t a0 = overlay; + int wait = 1000; + + return vnic_dev_cmd(vdev, CMD_OVERLAY_OFFLOAD_CFG, &a0, &a1, wait); +} + +int vnic_dev_capable_vxlan(struct vnic_dev *vdev) +{ + uint64_t a0 = VIC_FEATURE_VXLAN; + uint64_t a1 = 0; + int wait = 1000; + int ret; + + ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, &a1, wait); + /* 1 if the NIC can do VXLAN for both IPv4 and IPv6 with multiple WQs */ + return ret == 0 && + (a1 & (FEATURE_VXLAN_IPV6 | FEATURE_VXLAN_MULTI_WQ)) == + (FEATURE_VXLAN_IPV6 | FEATURE_VXLAN_MULTI_WQ); +} + +int vnic_dev_capable_geneve(struct vnic_dev *vdev) +{ + uint64_t a0 = VIC_FEATURE_GENEVE; + uint64_t a1 = 0; + int wait = 1000; + int ret; + + ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, &a1, wait); + return ret == 0 && (a1 & FEATURE_GENEVE_OPTIONS); +} diff --git a/src/spdk/dpdk/drivers/net/enic/base/vnic_dev.h b/src/spdk/dpdk/drivers/net/enic/base/vnic_dev.h new file mode 100644 index 000000000..02e19c0b8 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/vnic_dev.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _VNIC_DEV_H_ +#define _VNIC_DEV_H_ + +#include <stdbool.h> + +#include <rte_pci.h> +#include <rte_bus_pci.h> + +#include "enic_compat.h" +#include "vnic_resource.h" +#include "vnic_devcmd.h" + +#ifndef VNIC_PADDR_TARGET +#define VNIC_PADDR_TARGET 0x0000000000000000ULL +#endif + +#ifndef readq +static inline uint64_t readq(void __iomem *reg) +{ + return ((uint64_t)readl((char *)reg + 0x4UL) << 32) | + (uint64_t)readl(reg); +} + +static inline void writeq(uint64_t val, void __iomem *reg) +{ + writel(val & 0xffffffff, reg); + writel((uint32_t)(val >> 32), (char *)reg + 0x4UL); +} +#endif + +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +enum vnic_dev_intr_mode { + VNIC_DEV_INTR_MODE_UNKNOWN, + VNIC_DEV_INTR_MODE_INTX, + VNIC_DEV_INTR_MODE_MSI, + VNIC_DEV_INTR_MODE_MSIX, +}; + +struct vnic_dev_bar { + void __iomem *vaddr; + dma_addr_t bus_addr; + unsigned long len; +}; + +struct vnic_dev_ring { + void *descs; + size_t size; + dma_addr_t base_addr; + size_t base_align; + void *descs_unaligned; + size_t size_unaligned; + dma_addr_t base_addr_unaligned; + unsigned int desc_size; + unsigned int desc_count; + unsigned int desc_avail; +}; + +struct vnic_dev_iomap_info { + dma_addr_t bus_addr; + unsigned long len; + void __iomem *vaddr; +}; + +struct vnic_dev; +struct vnic_stats; + +void *vnic_dev_priv(struct vnic_dev *vdev); +unsigned int vnic_dev_get_res_count(struct vnic_dev *vdev, + enum vnic_res_type type); +void vnic_register_cbacks(struct vnic_dev *vdev, + void *(*alloc_consistent)(void *priv, size_t size, + dma_addr_t *dma_handle, uint8_t *name), + void (*free_consistent)(void *priv, + size_t size, void *vaddr, + dma_addr_t dma_handle)); +void __iomem *vnic_dev_get_res(struct vnic_dev *vdev, enum vnic_res_type type, + unsigned int index); +dma_addr_t vnic_dev_get_res_bus_addr(struct vnic_dev *vdev, + enum vnic_res_type type, unsigned int index); +uint8_t vnic_dev_get_res_bar(struct vnic_dev *vdev, + enum vnic_res_type type); +uint32_t vnic_dev_get_res_offset(struct vnic_dev *vdev, + enum vnic_res_type type, unsigned int index); +unsigned long vnic_dev_get_res_type_len(struct vnic_dev *vdev, + enum vnic_res_type type); +unsigned int vnic_dev_desc_ring_size(struct vnic_dev_ring *ring, + unsigned int desc_count, unsigned int desc_size); +void vnic_dev_clear_desc_ring(struct vnic_dev_ring *ring); +int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring, + unsigned int desc_count, unsigned int desc_size, unsigned int socket_id, + char *z_name); +void vnic_dev_free_desc_ring(struct vnic_dev *vdev, + struct vnic_dev_ring *ring); +int vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, + uint64_t *a0, uint64_t *a1, int wait); +int vnic_dev_cmd_args(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, + uint64_t *args, int nargs, int wait); +void vnic_dev_cmd_proxy_by_index_start(struct vnic_dev *vdev, uint16_t index); +void vnic_dev_cmd_proxy_by_bdf_start(struct vnic_dev *vdev, uint16_t bdf); +void vnic_dev_cmd_proxy_end(struct vnic_dev *vdev); +int vnic_dev_fw_info(struct vnic_dev *vdev, + struct vnic_devcmd_fw_info **fw_info); +int vnic_dev_capable_adv_filters(struct vnic_dev *vdev); +int vnic_dev_capable(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd); +int vnic_dev_capable_filter_mode(struct vnic_dev *vdev, uint32_t *mode, + uint8_t *filter_actions); +void vnic_dev_capable_udp_rss_weak(struct vnic_dev *vdev, bool *cfg_chk, + bool *weak); +int vnic_dev_asic_info(struct vnic_dev *vdev, uint16_t *asic_type, + uint16_t *asic_rev); +int vnic_dev_spec(struct vnic_dev *vdev, unsigned int offset, size_t size, + void *value); +int vnic_dev_stats_clear(struct vnic_dev *vdev); +int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats); +int vnic_dev_hang_notify(struct vnic_dev *vdev); +int vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast, + int broadcast, int promisc, int allmulti); +int vnic_dev_packet_filter_all(struct vnic_dev *vdev, int directed, + int multicast, int broadcast, int promisc, int allmulti); +int vnic_dev_add_addr(struct vnic_dev *vdev, uint8_t *addr); +int vnic_dev_del_addr(struct vnic_dev *vdev, uint8_t *addr); +int vnic_dev_get_mac_addr(struct vnic_dev *vdev, uint8_t *mac_addr); +int vnic_dev_raise_intr(struct vnic_dev *vdev, uint16_t intr); +int vnic_dev_notify_set(struct vnic_dev *vdev, uint16_t intr); +void vnic_dev_set_reset_flag(struct vnic_dev *vdev, int state); +int vnic_dev_notify_unset(struct vnic_dev *vdev); +int vnic_dev_notify_setcmd(struct vnic_dev *vdev, + void *notify_addr, dma_addr_t notify_pa, uint16_t intr); +int vnic_dev_notify_unsetcmd(struct vnic_dev *vdev); +int vnic_dev_link_status(struct vnic_dev *vdev); +uint32_t vnic_dev_port_speed(struct vnic_dev *vdev); +uint32_t vnic_dev_msg_lvl(struct vnic_dev *vdev); +uint32_t vnic_dev_mtu(struct vnic_dev *vdev); +uint32_t vnic_dev_link_down_cnt(struct vnic_dev *vdev); +uint32_t vnic_dev_notify_status(struct vnic_dev *vdev); +uint32_t vnic_dev_uif(struct vnic_dev *vdev); +int vnic_dev_close(struct vnic_dev *vdev); +int vnic_dev_enable(struct vnic_dev *vdev); +int vnic_dev_enable_wait(struct vnic_dev *vdev); +int vnic_dev_disable(struct vnic_dev *vdev); +int vnic_dev_open(struct vnic_dev *vdev, int arg); +int vnic_dev_open_done(struct vnic_dev *vdev, int *done); +int vnic_dev_init(struct vnic_dev *vdev, int arg); +int vnic_dev_init_done(struct vnic_dev *vdev, int *done, int *err); +int vnic_dev_init_prov(struct vnic_dev *vdev, uint8_t *buf, uint32_t len); +int vnic_dev_deinit(struct vnic_dev *vdev); +void vnic_dev_intr_coal_timer_info_default(struct vnic_dev *vdev); +int vnic_dev_intr_coal_timer_info(struct vnic_dev *vdev); +int vnic_dev_soft_reset(struct vnic_dev *vdev, int arg); +int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done); +int vnic_dev_hang_reset(struct vnic_dev *vdev, int arg); +int vnic_dev_hang_reset_done(struct vnic_dev *vdev, int *done); +void vnic_dev_set_intr_mode(struct vnic_dev *vdev, + enum vnic_dev_intr_mode intr_mode); +enum vnic_dev_intr_mode vnic_dev_get_intr_mode(struct vnic_dev *vdev); +uint32_t vnic_dev_intr_coal_timer_usec_to_hw(struct vnic_dev *vdev, + uint32_t usec); +uint32_t vnic_dev_intr_coal_timer_hw_to_usec(struct vnic_dev *vdev, + uint32_t hw_cycles); +uint32_t vnic_dev_get_intr_coal_timer_max(struct vnic_dev *vdev); +void vnic_dev_unregister(struct vnic_dev *vdev); +int vnic_dev_set_ig_vlan_rewrite_mode(struct vnic_dev *vdev, + uint8_t ig_vlan_rewrite_mode); +struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev, + void *priv, struct rte_pci_device *pdev, struct vnic_dev_bar *bar, + unsigned int num_bars); +struct rte_pci_device *vnic_dev_get_pdev(struct vnic_dev *vdev); +int vnic_dev_alloc_stats_mem(struct vnic_dev *vdev); +int vnic_dev_cmd_init(struct vnic_dev *vdev, int fallback); +int vnic_dev_get_size(void); +int vnic_dev_int13(struct vnic_dev *vdev, uint64_t arg, uint32_t op); +int vnic_dev_perbi(struct vnic_dev *vdev, uint64_t arg, uint32_t op); +uint32_t vnic_dev_perbi_rebuild_cnt(struct vnic_dev *vdev); +int vnic_dev_init_prov2(struct vnic_dev *vdev, uint8_t *buf, uint32_t len); +int vnic_dev_enable2(struct vnic_dev *vdev, int active); +int vnic_dev_enable2_done(struct vnic_dev *vdev, int *status); +int vnic_dev_deinit_done(struct vnic_dev *vdev, int *status); +int vnic_dev_set_mac_addr(struct vnic_dev *vdev, uint8_t *mac_addr); +int vnic_dev_classifier(struct vnic_dev *vdev, uint8_t cmd, uint16_t *entry, + struct filter_v2 *data, struct filter_action_v2 *action_v2); +int vnic_dev_flowman_cmd(struct vnic_dev *vdev, uint64_t *args, int nargs); +int vnic_dev_overlay_offload_ctrl(struct vnic_dev *vdev, + uint8_t overlay, uint8_t config); +int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, uint8_t overlay, + uint16_t vxlan_udp_port_number); +int vnic_dev_capable_vxlan(struct vnic_dev *vdev); +int vnic_dev_capable_geneve(struct vnic_dev *vdev); +#endif /* _VNIC_DEV_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/base/vnic_devcmd.h b/src/spdk/dpdk/drivers/net/enic/base/vnic_devcmd.h new file mode 100644 index 000000000..a2f577f1e --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/vnic_devcmd.h @@ -0,0 +1,1166 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _VNIC_DEVCMD_H_ +#define _VNIC_DEVCMD_H_ + +#define _CMD_NBITS 14 +#define _CMD_VTYPEBITS 10 +#define _CMD_FLAGSBITS 6 +#define _CMD_DIRBITS 2 + +#define _CMD_NMASK ((1 << _CMD_NBITS)-1) +#define _CMD_VTYPEMASK ((1 << _CMD_VTYPEBITS)-1) +#define _CMD_FLAGSMASK ((1 << _CMD_FLAGSBITS)-1) +#define _CMD_DIRMASK ((1 << _CMD_DIRBITS)-1) + +#define _CMD_NSHIFT 0 +#define _CMD_VTYPESHIFT (_CMD_NSHIFT+_CMD_NBITS) +#define _CMD_FLAGSSHIFT (_CMD_VTYPESHIFT+_CMD_VTYPEBITS) +#define _CMD_DIRSHIFT (_CMD_FLAGSSHIFT+_CMD_FLAGSBITS) + +/* + * Direction bits (from host perspective). + */ +#define _CMD_DIR_NONE 0U +#define _CMD_DIR_WRITE 1U +#define _CMD_DIR_READ 2U +#define _CMD_DIR_RW (_CMD_DIR_WRITE | _CMD_DIR_READ) + +/* + * Flag bits. + */ +#define _CMD_FLAGS_NONE 0U +#define _CMD_FLAGS_NOWAIT 1U + +/* + * vNIC type bits. + */ +#define _CMD_VTYPE_NONE 0U +#define _CMD_VTYPE_ENET 1U +#define _CMD_VTYPE_FC 2U +#define _CMD_VTYPE_SCSI 4U +#define _CMD_VTYPE_ALL (_CMD_VTYPE_ENET | _CMD_VTYPE_FC | _CMD_VTYPE_SCSI) + +/* + * Used to create cmds.. + */ +#define _CMDCF(dir, flags, vtype, nr) \ + (((dir) << _CMD_DIRSHIFT) | \ + ((flags) << _CMD_FLAGSSHIFT) | \ + ((vtype) << _CMD_VTYPESHIFT) | \ + ((nr) << _CMD_NSHIFT)) +#define _CMDC(dir, vtype, nr) _CMDCF(dir, 0, vtype, nr) +#define _CMDCNW(dir, vtype, nr) _CMDCF(dir, _CMD_FLAGS_NOWAIT, vtype, nr) + +/* + * Used to decode cmds.. + */ +#define _CMD_DIR(cmd) (((cmd) >> _CMD_DIRSHIFT) & _CMD_DIRMASK) +#define _CMD_FLAGS(cmd) (((cmd) >> _CMD_FLAGSSHIFT) & _CMD_FLAGSMASK) +#define _CMD_VTYPE(cmd) (((cmd) >> _CMD_VTYPESHIFT) & _CMD_VTYPEMASK) +#define _CMD_N(cmd) (((cmd) >> _CMD_NSHIFT) & _CMD_NMASK) + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +enum vnic_devcmd_cmd { + CMD_NONE = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_NONE, 0), + + /* + * mcpu fw info in mem: + * in: + * (uint64_t)a0=paddr to struct vnic_devcmd_fw_info + * action: + * Fills in struct vnic_devcmd_fw_info (128 bytes) + * note: + * An old definition of CMD_MCPU_FW_INFO + */ + CMD_MCPU_FW_INFO_OLD = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 1), + + /* + * mcpu fw info in mem: + * in: + * (uint64_t)a0=paddr to struct vnic_devcmd_fw_info + * (uint16_t)a1=size of the structure + * out: + * (uint16_t)a1=0 for in:a1 = 0, + * data size actually written for other values. + * action: + * Fills in first 128 bytes of vnic_devcmd_fw_info for in:a1 = 0, + * first in:a1 bytes for 0 < in:a1 <= 132, + * 132 bytes for other values of in:a1. + * note: + * CMD_MCPU_FW_INFO and CMD_MCPU_FW_INFO_OLD have the same enum 1 + * for source compatibility. + */ + CMD_MCPU_FW_INFO = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 1), + + /* dev-specific block member: + * in: (uint16_t)a0=offset,(uint8_t)a1=size + * out: a0=value + */ + CMD_DEV_SPEC = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 2), + + /* stats clear */ + CMD_STATS_CLEAR = _CMDCNW(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 3), + + /* stats dump in mem: (uint64_t)a0=paddr to stats area, + * (uint16_t)a1=sizeof stats area + */ + CMD_STATS_DUMP = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 4), + + /* set Rx packet filter: (uint32_t)a0=filters (see CMD_PFILTER_*) */ + CMD_PACKET_FILTER = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 7), + + /* set Rx packet filter for all: (uint32_t)a0=filters + * (see CMD_PFILTER_*) + */ + CMD_PACKET_FILTER_ALL = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 7), + + /* hang detection notification */ + CMD_HANG_NOTIFY = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 8), + + /* MAC address in (u48)a0 */ + CMD_MAC_ADDR = _CMDC(_CMD_DIR_READ, + _CMD_VTYPE_ENET | _CMD_VTYPE_FC, 9), +#define CMD_GET_MAC_ADDR CMD_MAC_ADDR /* some uses are aliased */ + + /* add addr from (u48)a0 */ + CMD_ADDR_ADD = _CMDCNW(_CMD_DIR_WRITE, + _CMD_VTYPE_ENET | _CMD_VTYPE_FC, 12), + + /* del addr from (u48)a0 */ + CMD_ADDR_DEL = _CMDCNW(_CMD_DIR_WRITE, + _CMD_VTYPE_ENET | _CMD_VTYPE_FC, 13), + + /* add VLAN id in (uint16_t)a0 */ + CMD_VLAN_ADD = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 14), + + /* del VLAN id in (uint16_t)a0 */ + CMD_VLAN_DEL = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 15), + + /* + * nic_cfg in (uint32_t)a0 + * + * Capability query: + * out: (uint64_t) a0= 1 if a1 is valid + * (uint64_t) a1= (NIC_CFG bits supported) | (flags << 32) + * (flags are CMD_NIC_CFG_CAPF_xxx) + */ + CMD_NIC_CFG = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 16), + + /* + * nic_cfg_chk (same as nic_cfg, but may return error) + * in (uint32_t)a0 + * + * Capability query: + * out: (uint64_t) a0= 1 if a1 is valid + * (uint64_t) a1= (NIC_CFG bits supported) | (flags << 32) + * (flags are CMD_NIC_CFG_CAPF_xxx) + */ + CMD_NIC_CFG_CHK = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 16), + + /* union vnic_rss_key in mem: (uint64_t)a0=paddr, (uint16_t)a1=len */ + CMD_RSS_KEY = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 17), + + /* union vnic_rss_cpu in mem: (uint64_t)a0=paddr, (uint16_t)a1=len */ + CMD_RSS_CPU = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 18), + + /* initiate softreset */ + CMD_SOFT_RESET = _CMDCNW(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 19), + + /* softreset status: + * out: a0=0 reset complete, a0=1 reset in progress */ + CMD_SOFT_RESET_STATUS = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 20), + + /* set struct vnic_devcmd_notify buffer in mem: + * in: + * (uint64_t)a0=paddr to notify (set paddr=0 to unset) + * (uint32_t)a1 & 0x00000000ffffffff=sizeof(struct vnic_devcmd_notify) + * (uint16_t)a1 & 0x0000ffff00000000=intr num (-1 for no intr) + * out: + * (uint32_t)a1 = effective size + */ + CMD_NOTIFY = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 21), + + /* UNDI API: (uint64_t)a0=paddr to s_PXENV_UNDI_ struct, + * (uint8_t)a1=PXENV_UNDI_xxx + */ + CMD_UNDI = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 22), + + /* initiate open sequence (uint32_t)a0=flags (see CMD_OPENF_*) */ + CMD_OPEN = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 23), + + /* open status: + * out: a0=0 open complete, a0=1 open in progress */ + CMD_OPEN_STATUS = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 24), + + /* close vnic */ + CMD_CLOSE = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 25), + + /* initialize virtual link: (uint32_t)a0=flags (see CMD_INITF_*) */ +/***** Replaced by CMD_INIT *****/ + CMD_INIT_v1 = _CMDCNW(_CMD_DIR_READ, _CMD_VTYPE_ALL, 26), + + /* variant of CMD_INIT, with provisioning info + * (uint64_t)a0=paddr of vnic_devcmd_provinfo + * (uint32_t)a1=sizeof provision info + */ + CMD_INIT_PROV_INFO = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 27), + + /* enable virtual link */ + CMD_ENABLE = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 28), + + /* enable virtual link, waiting variant. */ + CMD_ENABLE_WAIT = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 28), + + /* disable virtual link */ + CMD_DISABLE = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 29), + + /* stats dump sum of all vnic stats on same uplink in mem: + * (uint64_t)a0=paddr + * (uint16_t)a1=sizeof stats area + */ + CMD_STATS_DUMP_ALL = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 30), + + /* init status: + * out: a0=0 init complete, a0=1 init in progress + * if a0=0, a1=errno + */ + CMD_INIT_STATUS = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 31), + + /* INT13 API: (uint64_t)a0=paddr to vnic_int13_params struct + * (uint32_t)a1=INT13_CMD_xxx + */ + CMD_INT13 = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_FC, 32), + + /* logical uplink enable/disable: (uint64_t)a0: 0/1=disable/enable */ + CMD_LOGICAL_UPLINK = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 33), + + /* undo initialize of virtual link */ + CMD_DEINIT = _CMDCNW(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 34), + + /* initialize virtual link: (uint32_t)a0=flags (see CMD_INITF_*) */ + CMD_INIT = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 35), + + /* check fw capability of a cmd: + * in: (uint32_t)a0=cmd + * out: (uint32_t)a0=errno, 0:valid cmd, a1=supported VNIC_STF_* bits + */ + CMD_CAPABILITY = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 36), + + /* persistent binding info + * in: (uint64_t)a0=paddr of arg + * (uint32_t)a1=CMD_PERBI_XXX + */ + CMD_PERBI = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_FC, 37), + + /* Interrupt Assert Register functionality + * in: (uint16_t)a0=interrupt number to assert + */ + CMD_IAR = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 38), + + /* initiate hangreset, like softreset after hang detected */ + CMD_HANG_RESET = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 39), + + /* hangreset status: + * out: a0=0 reset complete, a0=1 reset in progress + */ + CMD_HANG_RESET_STATUS = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 40), + + /* + * Set hw ingress packet vlan rewrite mode: + * in: (uint32_t)a0=new vlan rewrite mode + * out: (uint32_t)a0=old vlan rewrite mode + */ + CMD_IG_VLAN_REWRITE_MODE = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 41), + + /* + * in: (uint16_t)a0=bdf of target vnic + * (uint32_t)a1=cmd to proxy + * a2-a15=args to cmd in a1 + * out: (uint32_t)a0=status of proxied cmd + * a1-a15=out args of proxied cmd + */ + CMD_PROXY_BY_BDF = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 42), + + /* + * As for BY_BDF except a0 is index of hvnlink subordinate vnic + * or SR-IOV virtual vnic + */ + CMD_PROXY_BY_INDEX = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 43), + + /* + * For HPP toggle: + * adapter-info-get + * in: (uint64_t)a0=phsical address of buffer passed in from caller. + * (uint16_t)a1=size of buffer specified in a0. + * out: (uint64_t)a0=phsical address of buffer passed in from caller. + * (uint16_t)a1=actual bytes from VIF-CONFIG-INFO TLV, or + * 0 if no VIF-CONFIG-INFO TLV was ever received. + */ + CMD_CONFIG_INFO_GET = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 44), + + /* + * INT13 API: (uint64_t)a0=paddr to vnic_int13_params struct + * (uint32_t)a1=INT13_CMD_xxx + */ + CMD_INT13_ALL = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 45), + + /* + * Set default vlan: + * in: (uint16_t)a0=new default vlan + * (uint16_t)a1=zero for overriding vlan with param a0, + * non-zero for resetting vlan to the default + * out: (uint16_t)a0=old default vlan + */ + CMD_SET_DEFAULT_VLAN = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 46), + + /* init_prov_info2: + * Variant of CMD_INIT_PROV_INFO, where it will not try to enable + * the vnic until CMD_ENABLE2 is issued. + * (uint64_t)a0=paddr of vnic_devcmd_provinfo + * (uint32_t)a1=sizeof provision info + */ + CMD_INIT_PROV_INFO2 = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 47), + + /* enable2: + * (uint32_t)a0=0 ==> standby + * =CMD_ENABLE2_ACTIVE ==> active + */ + CMD_ENABLE2 = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 48), + + /* + * cmd_status: + * Returns the status of the specified command + * Input: + * a0 = command for which status is being queried. + * Possible values are: + * CMD_SOFT_RESET + * CMD_HANG_RESET + * CMD_OPEN + * CMD_INIT + * CMD_INIT_PROV_INFO + * CMD_DEINIT + * CMD_INIT_PROV_INFO2 + * CMD_ENABLE2 + * Output: + * if status == STAT_ERROR + * a0 = ERR_ENOTSUPPORTED - status for command in a0 is + * not supported + * if status == STAT_NONE + * a0 = status of the devcmd specified in a0 as follows. + * ERR_SUCCESS - command in a0 completed successfully + * ERR_EINPROGRESS - command in a0 is still in progress + */ + CMD_STATUS = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 49), + + /* + * Returns interrupt coalescing timer conversion factors. + * After calling this devcmd, ENIC driver can convert + * interrupt coalescing timer in usec into CPU cycles as follows: + * + * intr_timer_cycles = intr_timer_usec * multiplier / divisor + * + * Interrupt coalescing timer in usecs can be be converted/obtained + * from CPU cycles as follows: + * + * intr_timer_usec = intr_timer_cycles * divisor / multiplier + * + * in: none + * out: (uint32_t)a0 = multiplier + * (uint32_t)a1 = divisor + * (uint32_t)a2 = maximum timer value in usec + */ + CMD_INTR_COAL_CONVERT = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 50), + + /* + * ISCSI DUMP API: + * in: (uint64_t)a0=paddr of the param or param itself + * (uint32_t)a1=ISCSI_CMD_xxx + */ + CMD_ISCSI_DUMP_REQ = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 51), + + /* + * ISCSI DUMP STATUS API: + * in: (uint32_t)a0=cmd tag + * in: (uint32_t)a1=ISCSI_CMD_xxx + * out: (uint32_t)a0=cmd status + */ + CMD_ISCSI_DUMP_STATUS = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 52), + + /* + * Subvnic migration from MQ <--> VF. + * Enable the LIF migration from MQ to VF and vice versa. MQ and VF + * indexes are statically bound at the time of initialization. + * Based on the direction of migration, the resources of either MQ or + * the VF shall be attached to the LIF. + * in: (uint32_t)a0=Direction of Migration + * 0=> Migrate to VF + * 1=> Migrate to MQ + * (uint32_t)a1=VF index (MQ index) + */ + CMD_MIGRATE_SUBVNIC = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 53), + + /* + * Register / Deregister the notification block for MQ subvnics + * in: + * (uint64_t)a0=paddr to notify (set paddr=0 to unset) + * (uint32_t)a1 & 0x00000000ffffffff=sizeof(struct vnic_devcmd_notify) + * (uint16_t)a1 & 0x0000ffff00000000=intr num (-1 for no intr) + * out: + * (uint32_t)a1 = effective size + */ + CMD_SUBVNIC_NOTIFY = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 54), + + /* + * Set the predefined mac address as default + * in: + * (u48)a0=mac addr + */ + CMD_SET_MAC_ADDR = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 55), + + /* Update the provisioning info of the given VIF + * (uint64_t)a0=paddr of vnic_devcmd_provinfo + * (uint32_t)a1=sizeof provision info + */ + CMD_PROV_INFO_UPDATE = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 56), + + /* + * Initialization for the devcmd2 interface. + * in: (uint64_t) a0=host result buffer physical address + * in: (uint16_t) a1=number of entries in result buffer + */ + CMD_INITIALIZE_DEVCMD2 = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 57), + + /* + * Add a filter. + * in: (uint64_t) a0= filter address + * (uint32_t) a1= size of filter + * out: (uint32_t) a0=filter identifier + * + * Capability query: + * out: (uint64_t) a0= 1 if capability query supported + * (uint64_t) a1= MAX filter type supported + */ + CMD_ADD_FILTER = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 58), + + /* + * Delete a filter. + * in: (uint32_t) a0=filter identifier + */ + CMD_DEL_FILTER = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 59), + + /* + * Enable a Queue Pair in User space NIC + * in: (uint32_t) a0=Queue Pair number + * (uint32_t) a1= command + */ + CMD_QP_ENABLE = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 60), + + /* + * Disable a Queue Pair in User space NIC + * in: (uint32_t) a0=Queue Pair number + * (uint32_t) a1= command + */ + CMD_QP_DISABLE = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 61), + + /* + * Stats dump Queue Pair in User space NIC + * in: (uint32_t) a0=Queue Pair number + * (uint64_t) a1=host buffer addr for status dump + * (uint32_t) a2=length of the buffer + */ + CMD_QP_STATS_DUMP = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 62), + + /* + * Clear stats for Queue Pair in User space NIC + * in: (uint32_t) a0=Queue Pair number + */ + CMD_QP_STATS_CLEAR = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 63), + + /* + * UEFI BOOT API: (uint64_t)a0= UEFI FLS_CMD_xxx + * (ui64)a1= paddr for the info buffer + */ + CMD_FC_REQ = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_FC, 64), + + /* + * Return the iSCSI config details required by the EFI Option ROM + * in: (uint32_t) a0=0 Get Boot Info for PXE eNIC as per + * pxe_boot_config_t + * a0=1 Get Boot info for iSCSI enic as per + * iscsi_boot_efi_cfg_t + * in: (uint64_t) a1=Host address where iSCSI config info is returned + */ + CMD_VNIC_BOOT_CONFIG_INFO = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 65), + + /* + * Create a Queue Pair (RoCE) + * in: (uint32_t) a0 = Queue Pair number + * (uint32_t) a1 = Remote QP + * (uint32_t) a2 = RDMA-RQ + * (uint16_t) a3 = RQ Res Group + * (uint16_t) a4 = SQ Res Group + * (uint32_t) a5 = Protection Domain + * (uint64_t) a6 = Remote MAC + * (uint32_t) a7 = start PSN + * (uint16_t) a8 = MSS + * (uint32_t) a9 = protocol version + */ + CMD_RDMA_QP_CREATE = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 66), + + /* + * Delete a Queue Pair (RoCE) + * in: (uint32_t) a0 = Queue Pair number + */ + CMD_RDMA_QP_DELETE = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 67), + + /* + * Retrieve a Queue Pair's status information (RoCE) + * in: (uint32_t) a0 = Queue Pair number + * (uint64_t) a1 = host buffer addr for QP status struct + * (uint32_t) a2 = length of the buffer + */ + CMD_RDMA_QP_STATUS = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 68), + + /* + * Use this devcmd for agreeing on the highest common version supported + * by both driver and fw for by features who need such a facility. + * in: (uint64_t) a0 = feature (driver requests for the supported + * versions on this feature) + * out: (uint64_t) a0 = bitmap of all supported versions for that + * feature + */ + CMD_GET_SUPP_FEATURE_VER = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 69), + + /* + * Initialize the RDMA notification work queue + * in: (uint64_t) a0 = host buffer address + * in: (uint16_t) a1 = number of entries in buffer + * in: (uint16_t) a2 = resource group number + * in: (uint16_t) a3 = CQ number to post completion + */ + CMD_RDMA_INIT_INFO_BUF = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 70), + + /* + * De-init the RDMA notification work queue + * in: (uint64_t) a0=resource group number + */ + CMD_RDMA_DEINIT_INFO_BUF = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 71), + + /* + * Control (Enable/Disable) overlay offloads on the given vnic + * in: (uint8_t) a0 = OVERLAY_FEATURE_NVGRE : NVGRE + * a0 = OVERLAY_FEATURE_VXLAN : VxLAN + * a0 = OVERLAY_FEATURE_GENEVE : Geneve + * in: (uint8_t) a1 = OVERLAY_OFFLOAD_ENABLE : Enable or + * a1 = OVERLAY_OFFLOAD_DISABLE : Disable or + * a1 = OVERLAY_OFFLOAD_ENABLE_V2 : Enable with version 2 + */ + CMD_OVERLAY_OFFLOAD_CTRL = + _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 72), + + /* + * Configuration of overlay offloads feature on a given vNIC + * in: (uint8_t) a0 = OVERLAY_CFG_VXLAN_PORT_UPDATE : VxLAN + * OVERLAY_CFG_GENEVE_PORT_UPDATE : Geneve + * in: (uint16_t) a1 = unsigned short int port information + */ + CMD_OVERLAY_OFFLOAD_CFG = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 73), + + /* + * Return the configured name for the device + * in: (uint64_t) a0=Host address where the name is copied + * (uint32_t) a1=Size of the buffer + */ + CMD_GET_CONFIG_NAME = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 74), + + /* + * Enable group interrupt for the VF + * in: (uint32_t) a0 = GRPINTR_ENABLE : enable + * a0 = GRPINTR_DISABLE : disable + * a0 = GRPINTR_UPD_VECT: update group vector addr + * in: (uint32_t) a1 = interrupt group count + * in: (uint64_t) a2 = Start of host buffer address for DMAing group + * vector bitmap + * in: (uint64_t) a3 = Stride between group vectors + */ + CMD_CONFIG_GRPINTR = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 75), + + /* + * Set cq arrary base and size in a list of consective wqs and + * rqs for a device + * in: (uint16_t) a0 = the wq relative index in the device. + * -1 indicates skipping wq configuration + * in: (uint16_t) a1 = the wcq relative index in the device + * in: (uint16_t) a2 = the rq relative index in the device + * -1 indicates skipping rq configuration + * in: (uint16_t) a3 = the rcq relative index in the device + */ + CMD_CONFIG_CQ_ARRAY = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 76), + + /* + * Add an advanced filter. + * in: (uint64_t) a0= filter address + * (uint32_t) a1= size of filter + * out: (uint32_t) a0=filter identifier + * + * Capability query: + * in: (uint64_t) a1= supported filter capability exchange modes + * out: (uint64_t) a0= 1 if capability query supported + * if (uint64_t) a1 = 0: a1 = MAX filter type supported + * if (uint64_t) a1 & FILTER_CAP_MODE_V1_FLAG: + * a1 = bitmask of supported filters + * a2 = FILTER_CAP_MODE_V1 + * a3 = bitmask of supported actions + */ + CMD_ADD_ADV_FILTER = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 77), + + /* + * Perform a Flow Manager Operation (see flowman_api.h) + * in: (uint32_t) a0 = sub-command + * (uint64_t) a1..15 = (sub-command specific) + * + * All arguments that have not been assigned a meaning should be + * initialized to 0 to allow for better driver forward compatibility. + */ + CMD_FLOW_MANAGER_OP = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 88), +}; + +/* Modes for exchanging advanced filter capabilities. The modes supported by + * the driver are passed in the CMD_ADD_ADV_FILTER capability command and the + * mode selected is returned. + * V0: the maximum filter type supported is returned + * V1: bitmasks of supported filters and actions are returned + */ +enum filter_cap_mode { + FILTER_CAP_MODE_V0 = 0, /* Must always be 0 for legacy drivers */ + FILTER_CAP_MODE_V1 = 1, +}; +#define FILTER_CAP_MODE_V1_FLAG (1 << FILTER_CAP_MODE_V1) + +/* CMD_ENABLE2 flags */ +#define CMD_ENABLE2_STANDBY 0x0 +#define CMD_ENABLE2_ACTIVE 0x1 + +/* flags for CMD_OPEN */ +#define CMD_OPENF_OPROM 0x1 /* open coming from option rom */ +#define CMD_OPENF_IG_DESCCACHE 0x2 /* Do not flush IG DESC cache */ + +/* flags for CMD_INIT */ +#define CMD_INITF_DEFAULT_MAC 0x1 /* init with default mac addr */ + +/* flags for CMD_NIC_CFG */ +#define CMD_NIC_CFG_CAPF_UDP_WEAK (1ULL << 0) /* Bodega-style UDP RSS */ + +/* flags for CMD_PACKET_FILTER */ +#define CMD_PFILTER_DIRECTED 0x01 +#define CMD_PFILTER_MULTICAST 0x02 +#define CMD_PFILTER_BROADCAST 0x04 +#define CMD_PFILTER_PROMISCUOUS 0x08 +#define CMD_PFILTER_ALL_MULTICAST 0x10 + +/* Commands for CMD_QP_ENABLE/CM_QP_DISABLE */ +#define CMD_QP_RQWQ 0x0 + +/* rewrite modes for CMD_IG_VLAN_REWRITE_MODE */ +#define IG_VLAN_REWRITE_MODE_DEFAULT_TRUNK 0 +#define IG_VLAN_REWRITE_MODE_UNTAG_DEFAULT_VLAN 1 +#define IG_VLAN_REWRITE_MODE_PRIORITY_TAG_DEFAULT_VLAN 2 +#define IG_VLAN_REWRITE_MODE_PASS_THRU 3 + +enum vnic_devcmd_status { + STAT_NONE = 0, + STAT_BUSY = 1 << 0, /* cmd in progress */ + STAT_ERROR = 1 << 1, /* last cmd caused error (code in a0) */ + STAT_FAILOVER = 1 << 2, /* always set on vnics in pci standby state + * if seen a failover to the standby happened + */ +}; + +enum vnic_devcmd_error { + ERR_SUCCESS = 0, + ERR_EINVAL = 1, + ERR_EFAULT = 2, + ERR_EPERM = 3, + ERR_EBUSY = 4, + ERR_ECMDUNKNOWN = 5, + ERR_EBADSTATE = 6, + ERR_ENOMEM = 7, + ERR_ETIMEDOUT = 8, + ERR_ELINKDOWN = 9, + ERR_EMAXRES = 10, + ERR_ENOTSUPPORTED = 11, + ERR_EINPROGRESS = 12, + ERR_MAX +}; + +/* + * note: hw_version and asic_rev refer to the same thing, + * but have different formats. hw_version is + * a 32-byte string (e.g. "A2") and asic_rev is + * a 16-bit integer (e.g. 0xA2). + */ +struct vnic_devcmd_fw_info { + char fw_version[32]; + char fw_build[32]; + char hw_version[32]; + char hw_serial_number[32]; + uint16_t asic_type; + uint16_t asic_rev; +}; + +enum fwinfo_asic_type { + FWINFO_ASIC_TYPE_UNKNOWN, + FWINFO_ASIC_TYPE_PALO, + FWINFO_ASIC_TYPE_SERENO, + FWINFO_ASIC_TYPE_CRUZ, +}; + +struct vnic_devcmd_notify { + uint32_t csum; /* checksum over following words */ + + uint32_t link_state; /* link up == 1 */ + uint32_t port_speed; /* effective port speed (rate limit) */ + uint32_t mtu; /* MTU */ + uint32_t msglvl; /* requested driver msg lvl */ + uint32_t uif; /* uplink interface */ + uint32_t status; /* status bits (see VNIC_STF_*) */ + uint32_t error; /* error code (see ERR_*) for 1st ERR */ + uint32_t link_down_cnt; /* running count of link down + * transitions + */ + uint32_t perbi_rebuild_cnt; /* running count of perbi rebuilds */ +}; +#define VNIC_STF_FATAL_ERR 0x0001 /* fatal fw error */ +#define VNIC_STF_STD_PAUSE 0x0002 /* standard link-level pause on */ +#define VNIC_STF_PFC_PAUSE 0x0004 /* priority flow control pause on */ +/* all supported status flags */ +#define VNIC_STF_ALL (VNIC_STF_FATAL_ERR |\ + VNIC_STF_STD_PAUSE |\ + VNIC_STF_PFC_PAUSE |\ + 0) + +struct vnic_devcmd_provinfo { + uint8_t oui[3]; + uint8_t type; + uint8_t data[0]; +}; + +/* + * These are used in flags field of different filters to denote + * valid fields used. + */ +#define FILTER_FIELD_VALID(fld) (1 << (fld - 1)) + +#define FILTER_FIELD_USNIC_VLAN FILTER_FIELD_VALID(1) +#define FILTER_FIELD_USNIC_ETHTYPE FILTER_FIELD_VALID(2) +#define FILTER_FIELD_USNIC_PROTO FILTER_FIELD_VALID(3) +#define FILTER_FIELD_USNIC_ID FILTER_FIELD_VALID(4) + +#define FILTER_FIELDS_USNIC (FILTER_FIELD_USNIC_VLAN | \ + FILTER_FIELD_USNIC_ETHTYPE | \ + FILTER_FIELD_USNIC_PROTO | \ + FILTER_FIELD_USNIC_ID) + +struct filter_usnic_id { + uint32_t flags; + uint16_t vlan; + uint16_t ethtype; + uint8_t proto_version; + uint32_t usnic_id; +} __rte_packed; + +#define FILTER_FIELD_5TUP_PROTO FILTER_FIELD_VALID(1) +#define FILTER_FIELD_5TUP_SRC_AD FILTER_FIELD_VALID(2) +#define FILTER_FIELD_5TUP_DST_AD FILTER_FIELD_VALID(3) +#define FILTER_FIELD_5TUP_SRC_PT FILTER_FIELD_VALID(4) +#define FILTER_FIELD_5TUP_DST_PT FILTER_FIELD_VALID(5) + +#define FILTER_FIELDS_IPV4_5TUPLE (FILTER_FIELD_5TUP_PROTO | \ + FILTER_FIELD_5TUP_SRC_AD | \ + FILTER_FIELD_5TUP_DST_AD | \ + FILTER_FIELD_5TUP_SRC_PT | \ + FILTER_FIELD_5TUP_DST_PT) + +/* Enums for the protocol field. */ +enum protocol_e { + PROTO_UDP = 0, + PROTO_TCP = 1, + PROTO_IPV4 = 2, + PROTO_IPV6 = 3 +}; + +struct filter_ipv4_5tuple { + uint32_t flags; + uint32_t protocol; + uint32_t src_addr; + uint32_t dst_addr; + uint16_t src_port; + uint16_t dst_port; +} __rte_packed; + +#define FILTER_FIELD_VMQ_VLAN FILTER_FIELD_VALID(1) +#define FILTER_FIELD_VMQ_MAC FILTER_FIELD_VALID(2) + +#define FILTER_FIELDS_MAC_VLAN (FILTER_FIELD_VMQ_VLAN | \ + FILTER_FIELD_VMQ_MAC) + +#define FILTER_FIELDS_NVGRE FILTER_FIELD_VMQ_MAC + +struct filter_mac_vlan { + uint32_t flags; + uint16_t vlan; + uint8_t mac_addr[6]; +} __rte_packed; + +#define FILTER_FIELD_VLAN_IP_3TUP_VLAN FILTER_FIELD_VALID(1) +#define FILTER_FIELD_VLAN_IP_3TUP_L3_PROTO FILTER_FIELD_VALID(2) +#define FILTER_FIELD_VLAN_IP_3TUP_DST_AD FILTER_FIELD_VALID(3) +#define FILTER_FIELD_VLAN_IP_3TUP_L4_PROTO FILTER_FIELD_VALID(4) +#define FILTER_FIELD_VLAN_IP_3TUP_DST_PT FILTER_FIELD_VALID(5) + +#define FILTER_FIELDS_VLAN_IP_3TUP (FILTER_FIELD_VLAN_IP_3TUP_VLAN | \ + FILTER_FIELD_VLAN_IP_3TUP_L3_PROTO | \ + FILTER_FIELD_VLAN_IP_3TUP_DST_AD | \ + FILTER_FIELD_VLAN_IP_3TUP_L4_PROTO | \ + FILTER_FIELD_VLAN_IP_3TUP_DST_PT) + +struct filter_vlan_ip_3tuple { + uint32_t flags; + uint16_t vlan; + uint16_t l3_protocol; + union { + uint32_t dst_addr_v4; + uint8_t dst_addr_v6[16]; + } u; + uint32_t l4_protocol; + uint16_t dst_port; +} __rte_packed; + +#define FILTER_GENERIC_1_BYTES 64 + +enum filter_generic_1_layer { + FILTER_GENERIC_1_L2, + FILTER_GENERIC_1_L3, + FILTER_GENERIC_1_L4, + FILTER_GENERIC_1_L5, + FILTER_GENERIC_1_NUM_LAYERS +}; + +#define FILTER_GENERIC_1_IPV4 (1 << 0) +#define FILTER_GENERIC_1_IPV6 (1 << 1) +#define FILTER_GENERIC_1_UDP (1 << 2) +#define FILTER_GENERIC_1_TCP (1 << 3) +#define FILTER_GENERIC_1_TCP_OR_UDP (1 << 4) +#define FILTER_GENERIC_1_IP4SUM_OK (1 << 5) +#define FILTER_GENERIC_1_L4SUM_OK (1 << 6) +#define FILTER_GENERIC_1_IPFRAG (1 << 7) + +#define FILTER_GENERIC_1_KEY_LEN 64 + +/* + * Version 1 of generic filter specification + * position is only 16 bits, reserving positions > 64k to be used by firmware + */ +struct filter_generic_1 { + uint16_t position; /* lower position comes first */ + uint32_t mask_flags; + uint32_t val_flags; + uint16_t mask_vlan; + uint16_t val_vlan; + struct { + uint8_t mask[FILTER_GENERIC_1_KEY_LEN]; /* 0 bit means + * " don't care" + */ + uint8_t val[FILTER_GENERIC_1_KEY_LEN]; + } __rte_packed layer[FILTER_GENERIC_1_NUM_LAYERS]; +} __rte_packed; + +/* Specifies the filter_action type. */ +enum { + FILTER_ACTION_RQ_STEERING = 0, + FILTER_ACTION_V2 = 1, + FILTER_ACTION_MAX +}; + +struct filter_action { + uint32_t type; + union { + uint32_t rq_idx; + } u; +} __rte_packed; + +#define FILTER_ACTION_RQ_STEERING_FLAG (1 << 0) +#define FILTER_ACTION_FILTER_ID_FLAG (1 << 1) +#define FILTER_ACTION_DROP_FLAG (1 << 2) +#define FILTER_ACTION_COUNTER_FLAG (1 << 3) +#define FILTER_ACTION_V2_ALL (FILTER_ACTION_RQ_STEERING_FLAG \ + | FILTER_ACTION_DROP_FLAG \ + | FILTER_ACTION_FILTER_ID_FLAG) + +/* Version 2 of filter action must be a strict extension of struct + * filter_action where the first fields exactly match in size and meaning. + */ +struct filter_action_v2 { + uint32_t type; + uint32_t rq_idx; + uint32_t flags; /* use FILTER_ACTION_XXX_FLAG defines */ + uint16_t filter_id; + uint8_t reserved[32]; /* for future expansion */ +} __rte_packed; + +/* Specifies the filter type. */ +enum filter_type { + FILTER_USNIC_ID = 0, + FILTER_IPV4_5TUPLE = 1, + FILTER_MAC_VLAN = 2, + FILTER_VLAN_IP_3TUPLE = 3, + FILTER_NVGRE_VMQ = 4, + FILTER_USNIC_IP = 5, + FILTER_DPDK_1 = 6, + FILTER_FLOWMAN = 7, + FILTER_MAX +}; + +#define FILTER_USNIC_ID_FLAG (1 << FILTER_USNIC_ID) +#define FILTER_IPV4_5TUPLE_FLAG (1 << FILTER_IPV4_5TUPLE) +#define FILTER_MAC_VLAN_FLAG (1 << FILTER_MAC_VLAN) +#define FILTER_VLAN_IP_3TUPLE_FLAG (1 << FILTER_VLAN_IP_3TUPLE) +#define FILTER_NVGRE_VMQ_FLAG (1 << FILTER_NVGRE_VMQ) +#define FILTER_USNIC_IP_FLAG (1 << FILTER_USNIC_IP) +#define FILTER_DPDK_1_FLAG (1 << FILTER_DPDK_1) +#define FILTER_V1_ALL (FILTER_USNIC_ID_FLAG | \ + FILTER_IPV4_5TUPLE_FLAG | \ + FILTER_MAC_VLAN_FLAG | \ + FILTER_VLAN_IP_3TUPLE_FLAG | \ + FILTER_NVGRE_VMQ_FLAG | \ + FILTER_USNIC_IP_FLAG | \ + FILTER_DPDK_1_FLAG) + +struct filter { + uint32_t type; + union { + struct filter_usnic_id usnic; + struct filter_ipv4_5tuple ipv4; + struct filter_mac_vlan mac_vlan; + struct filter_vlan_ip_3tuple vlan_3tuple; + } u; +} __rte_packed; + +/* + * This is a strict superset of "struct filter" and exists only + * because many drivers use "sizeof (struct filter)" in deciding TLV size. + * This new, larger struct filter would cause any code that uses that method + * to not work with older firmware, so we add filter_v2 to hold the + * new filter types. Drivers should use vnic_filter_size() to determine + * the TLV size instead of sizeof (struct fiter_v2) to guard against future + * growth. + */ +struct filter_v2 { + uint32_t type; + union { + struct filter_usnic_id usnic; + struct filter_ipv4_5tuple ipv4; + struct filter_mac_vlan mac_vlan; + struct filter_vlan_ip_3tuple vlan_3tuple; + struct filter_generic_1 generic_1; + } u; +} __rte_packed; + +enum { + CLSF_TLV_FILTER = 0, + CLSF_TLV_ACTION = 1, +}; + +struct filter_tlv { + uint32_t type; + uint32_t length; + uint32_t val[0]; +}; + +/* Data for CMD_ADD_FILTER is 2 TLV and filter + action structs */ +#define FILTER_MAX_BUF_SIZE 100 +#define FILTER_V2_MAX_BUF_SIZE (sizeof(struct filter_v2) + \ + sizeof(struct filter_action_v2) + \ + (2 * sizeof(struct filter_tlv))) + +/* + * Compute actual structure size given filter type. To be "future-proof," + * drivers should use this instead of "sizeof (struct filter_v2)" when + * computing length for TLV. + */ +static inline uint32_t +vnic_filter_size(struct filter_v2 *fp) +{ + uint32_t size; + + switch (fp->type) { + case FILTER_USNIC_ID: + size = sizeof(fp->u.usnic); + break; + case FILTER_IPV4_5TUPLE: + size = sizeof(fp->u.ipv4); + break; + case FILTER_MAC_VLAN: + case FILTER_NVGRE_VMQ: + size = sizeof(fp->u.mac_vlan); + break; + case FILTER_VLAN_IP_3TUPLE: + size = sizeof(fp->u.vlan_3tuple); + break; + case FILTER_USNIC_IP: + case FILTER_DPDK_1: + size = sizeof(fp->u.generic_1); + break; + default: + size = sizeof(fp->u); + break; + } + size += sizeof(fp->type); + return size; +} + + +enum { + CLSF_ADD = 0, + CLSF_DEL = 1, +}; + +/* + * Get the action structure size given action type. To be "future-proof," + * drivers should use this instead of "sizeof (struct filter_action_v2)" + * when computing length for TLV. + */ +static inline uint32_t +vnic_action_size(struct filter_action_v2 *fap) +{ + uint32_t size; + + switch (fap->type) { + case FILTER_ACTION_RQ_STEERING: + size = sizeof(struct filter_action); + break; + case FILTER_ACTION_V2: + size = sizeof(struct filter_action_v2); + break; + default: + size = sizeof(struct filter_action); + break; + } + return size; +} + +/* + * Writing cmd register causes STAT_BUSY to get set in status register. + * When cmd completes, STAT_BUSY will be cleared. + * + * If cmd completed successfully STAT_ERROR will be clear + * and args registers contain cmd-specific results. + * + * If cmd error, STAT_ERROR will be set and args[0] contains error code. + * + * status register is read-only. While STAT_BUSY is set, + * all other register contents are read-only. + */ + +/* Make sizeof(vnic_devcmd) a power-of-2 for I/O BAR. */ +#define VNIC_DEVCMD_NARGS 15 +struct vnic_devcmd { + uint32_t status; /* RO */ + uint32_t cmd; /* RW */ + uint64_t args[VNIC_DEVCMD_NARGS]; /* RW cmd args (little-endian)*/ +}; + +/* + * Version 2 of the interface. + * + * Some things are carried over, notably the vnic_devcmd_cmd enum. + */ + +/* + * Flags for vnic_devcmd2.flags + */ + +#define DEVCMD2_FNORESULT 0x1 /* Don't copy result to host */ + +#define VNIC_DEVCMD2_NARGS VNIC_DEVCMD_NARGS +struct vnic_devcmd2 { + uint16_t pad; + uint16_t flags; + uint32_t cmd; /* same command #defines as original */ + uint64_t args[VNIC_DEVCMD2_NARGS]; +}; + +#define VNIC_DEVCMD2_NRESULTS VNIC_DEVCMD_NARGS +struct devcmd2_result { + uint64_t results[VNIC_DEVCMD2_NRESULTS]; + uint32_t pad; + uint16_t completed_index; /* into copy WQ */ + uint8_t error; /* same error codes as original */ + uint8_t color; /* 0 or 1 as with completion queues */ +}; + +#define DEVCMD2_RING_SIZE 32 +#define DEVCMD2_DESC_SIZE 128 + +#define DEVCMD2_RESULTS_SIZE_MAX ((1 << 16) - 1) + +/* Overlay related definitions */ + +/* + * This enum lists the flag associated with each of the overlay features + */ +typedef enum { + OVERLAY_FEATURE_NVGRE = 1, + OVERLAY_FEATURE_VXLAN, + OVERLAY_FEATURE_GENEVE, + OVERLAY_FEATURE_MAX, +} overlay_feature_t; + +#define OVERLAY_OFFLOAD_ENABLE 0 +#define OVERLAY_OFFLOAD_DISABLE 1 +#define OVERLAY_OFFLOAD_ENABLE_V2 2 + +#define OVERLAY_CFG_VXLAN_PORT_UPDATE 0 +#define OVERLAY_CFG_GENEVE_PORT_UPDATE 1 + +/* + * Use this enum to get the supported versions for each of these features + * If you need to use the devcmd_get_supported_feature_version(), add + * the new feature into this enum and install function handler in devcmd.c + */ +typedef enum { + VIC_FEATURE_VXLAN, + VIC_FEATURE_RDMA, + VIC_FEATURE_GENEVE, + VIC_FEATURE_MAX, +} vic_feature_t; + +/* + * These flags are used in args[1] of devcmd CMD_GET_SUPP_FEATURE_VER + * to indicate the host driver about the VxLAN and Multi WQ features + * supported + */ +#define FEATURE_VXLAN_IPV6_INNER (1 << 0) +#define FEATURE_VXLAN_IPV6_OUTER (1 << 1) +#define FEATURE_VXLAN_MULTI_WQ (1 << 2) + +#define FEATURE_VXLAN_IPV6 (FEATURE_VXLAN_IPV6_INNER | \ + FEATURE_VXLAN_IPV6_OUTER) +/* Support Geneve option bytes */ +#define FEATURE_GENEVE_OPTIONS (1 << 0) + +/* + * CMD_CONFIG_GRPINTR subcommands + */ +typedef enum { + GRPINTR_ENABLE = 1, + GRPINTR_DISABLE, + GRPINTR_UPD_VECT, +} grpintr_subcmd_t; + +#endif /* _VNIC_DEVCMD_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/base/vnic_enet.h b/src/spdk/dpdk/drivers/net/enic/base/vnic_enet.h new file mode 100644 index 000000000..7687951c9 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/vnic_enet.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _VNIC_ENIC_H_ +#define _VNIC_ENIC_H_ + +/* Hardware intr coalesce timer is in units of 1.5us */ +#define INTR_COALESCE_USEC_TO_HW(usec) ((usec) * 2 / 3) +#define INTR_COALESCE_HW_TO_USEC(usec) ((usec) * 3 / 2) + +/* Device-specific region: enet configuration */ +struct vnic_enet_config { + uint32_t flags; + uint32_t wq_desc_count; + uint32_t rq_desc_count; + uint16_t mtu; + uint16_t intr_timer_deprecated; + uint8_t intr_timer_type; + uint8_t intr_mode; + char devname[16]; + uint32_t intr_timer_usec; + uint16_t loop_tag; + uint16_t vf_rq_count; + uint16_t num_arfs; + uint64_t mem_paddr; + uint16_t rdma_qp_id; + uint16_t rdma_qp_count; + uint16_t rdma_resgrp; + uint32_t rdma_mr_id; + uint32_t rdma_mr_count; + uint32_t max_pkt_size; +}; + +#define VENETF_TSO 0x1 /* TSO enabled */ +#define VENETF_LRO 0x2 /* LRO enabled */ +#define VENETF_RXCSUM 0x4 /* RX csum enabled */ +#define VENETF_TXCSUM 0x8 /* TX csum enabled */ +#define VENETF_RSS 0x10 /* RSS enabled */ +#define VENETF_RSSHASH_IPV4 0x20 /* Hash on IPv4 fields */ +#define VENETF_RSSHASH_TCPIPV4 0x40 /* Hash on TCP + IPv4 fields */ +#define VENETF_RSSHASH_IPV6 0x80 /* Hash on IPv6 fields */ +#define VENETF_RSSHASH_TCPIPV6 0x100 /* Hash on TCP + IPv6 fields */ +#define VENETF_RSSHASH_IPV6_EX 0x200 /* Hash on IPv6 extended fields */ +#define VENETF_RSSHASH_TCPIPV6_EX 0x400 /* Hash on TCP + IPv6 ext. fields */ +#define VENETF_LOOP 0x800 /* Loopback enabled */ +#define VENETF_FAILOVER 0x1000 /* Fabric failover enabled */ +#define VENETF_USPACE_NIC 0x2000 /* vHPC enabled */ +#define VENETF_VMQ 0x4000 /* VMQ enabled */ +#define VENETF_ARFS 0x8000 /* ARFS enabled */ +#define VENETF_VXLAN 0x10000 /* VxLAN offload */ +#define VENETF_NVGRE 0x20000 /* NVGRE offload */ +#define VENETF_GRPINTR 0x40000 /* group interrupt */ +#define VENETF_NICSWITCH 0x80000 /* NICSWITCH enabled */ +#define VENETF_RSSHASH_UDPIPV4 0x100000 /* Hash on UDP + IPv4 fields */ +#define VENETF_RSSHASH_UDPIPV6 0x200000 /* Hash on UDP + IPv6 fields */ + +#define VENET_INTR_TYPE_MIN 0 /* Timer specs min interrupt spacing */ +#define VENET_INTR_TYPE_IDLE 1 /* Timer specs idle time before irq */ + +#define VENET_INTR_MODE_ANY 0 /* Try MSI-X, then MSI, then INTx */ +#define VENET_INTR_MODE_MSI 1 /* Try MSI then INTx */ +#define VENET_INTR_MODE_INTX 2 /* Try INTx only */ + +#endif /* _VNIC_ENIC_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/base/vnic_flowman.h b/src/spdk/dpdk/drivers/net/enic/base/vnic_flowman.h new file mode 100644 index 000000000..81e2cff1b --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/vnic_flowman.h @@ -0,0 +1,386 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018-2019 Cisco Systems, Inc. All rights reserved. + */ +#ifndef _VNIC_FLOWMAN_H_ +#define _VNIC_FLOWMAN_H_ + +/* This file contains Flow Manager (FM) API of the firmware */ + +/* Flow manager sub-ops */ +enum { + FM_EXACT_TABLE_ALLOC, + FM_TCAM_TABLE_ALLOC, + FM_MATCH_TABLE_FREE, + FM_COUNTER_BRK, + FM_COUNTER_QUERY, + FM_COUNTER_CLEAR_ALL, + FM_COUNTER_DMA, + FM_ACTION_ALLOC, + FM_ACTION_FREE, + FM_EXACT_ENTRY_INSTALL, + FM_TCAM_ENTRY_INSTALL, + FM_MATCH_ENTRY_REMOVE, + FM_VNIC_FIND, + FM_API_VERSION_QUERY, + FM_API_VERSION_SELECT, + FM_INFO_QUERY +}; + +/* + * FKM (flow key metadata) flags used to match packet metadata + * (e.g. packet is tcp) + */ +#define FKM_BITS \ + FBIT(FKM_QTAG) \ + FBIT(FKM_CMD) \ + FBIT(FKM_IPV4) \ + FBIT(FKM_IPV6) \ + FBIT(FKM_ROCE) \ + FBIT(FKM_UDP) \ + FBIT(FKM_TCP) \ + FBIT(FKM_TCPORUDP) \ + FBIT(FKM_IPFRAG) \ + FBIT(FKM_NVGRE) \ + FBIT(FKM_VXLAN) \ + FBIT(FKM_GENEVE) \ + FBIT(FKM_NSH) \ + FBIT(FKM_ROCEV2) \ + FBIT(FKM_VLAN_PRES) \ + FBIT(FKM_IPOK) \ + FBIT(FKM_L4OK) \ + FBIT(FKM_ROCEOK) \ + FBIT(FKM_FCSOK) \ + FBIT(FKM_EG_SPAN) \ + FBIT(FKM_IG_SPAN) \ + FBIT(FKM_EG_HAIRPINNED) + +/* + * FKH (flow key header) flags. + * This selects which headers are valid in the struct. + * This is distinct from metadata in that metadata is requesting actual + * selection criteria. If, for example, a TCAM match with metadata "FKM_UDP" + * is feeding into an exact match table, there may be no need for the + * exact match table to also specify FKM_UDP, so FKH_UDP is used to + * specify that the UDP header fields should be used in the match. + */ +#define FKH_BITS \ + FBIT(FKH_ETHER) \ + FBIT(FKH_QTAG) \ + FBIT(FKH_L2RAW) \ + FBIT(FKH_IPV4) \ + FBIT(FKH_IPV6) \ + FBIT(FKH_L3RAW) \ + FBIT(FKH_UDP) \ + FBIT(FKH_TCP) \ + FBIT(FKH_ICMP) \ + FBIT(FKH_VXLAN) \ + FBIT(FKH_L4RAW) + +#define FBIT(X) X##_BIT, +enum { + FKM_BITS + FKM_BIT_COUNT +}; + +enum { + FKH_BITS + FKH_BIT_COUNT +}; +#undef FBIT +#define FBIT(X) X = (1 << X##_BIT), +enum { + FKM_BITS +}; +enum { + FKH_BITS +}; +#undef FBIT + +#define FM_ETH_ALEN 6 +#define FM_LAYER_SIZE 64 + +/* Header match pattern */ +struct fm_header_set { + uint32_t fk_metadata; /* FKM flags */ + uint32_t fk_header_select; /* FKH flags */ + uint16_t fk_vlan; + /* L2: Ethernet Header (valid if FKH_ETHER) */ + union { + struct { + uint8_t fk_dstmac[FM_ETH_ALEN]; + uint8_t fk_srcmac[FM_ETH_ALEN]; + uint16_t fk_ethtype; + } __rte_packed eth; + uint8_t rawdata[FM_LAYER_SIZE]; + } __rte_packed l2; + /* L3: IPv4 or IPv6 (valid if FKH_IPV4,6) */ + union { + /* Valid if FKH_IPV4 */ + struct { + uint8_t fk_ihl_vers; + uint8_t fk_tos; + uint16_t fk_tot_len; + uint16_t fk_id; + uint16_t fk_frag_off; + uint8_t fk_ttl; + uint8_t fk_proto; + uint16_t fk_check; + uint32_t fk_saddr; + uint32_t fk_daddr; + } __rte_packed ip4; + /* Valid if FKH_IPV6 */ + struct { + union { + struct { + uint32_t fk_un1_flow; + uint16_t fk_un1_plen; + uint8_t fk_un1_nxt; + uint8_t fk_un1_hlim; + } unl; + uint8_t fk_un2_vfc; + } ctl; + uint8_t fk_srcip[16]; + uint8_t fk_dstip[16]; + } __rte_packed ip6; + uint8_t rawdata[FM_LAYER_SIZE]; + } __rte_packed l3; + /* L4: UDP, TCP, or ICMP (valid if FKH_UDP,TCP,ICMP) */ + union { + struct { + uint16_t fk_source; + uint16_t fk_dest; + uint16_t fk_len; + uint16_t fk_check; + } __rte_packed udp; + struct { + uint16_t fk_source; + uint16_t fk_dest; + uint32_t fk_seq; + uint32_t fk_ack_seq; + uint16_t fk_flags; + uint16_t fk_window; + uint16_t fk_check; + uint16_t fk_urg_ptr; + } __rte_packed tcp; + struct { + uint8_t fk_code; + uint8_t fk_type; + } __rte_packed icmp; + uint8_t rawdata[FM_LAYER_SIZE]; + } __rte_packed l4; + /* VXLAN (valid if FKH_VXLAN) */ + struct { + uint8_t fkvx_flags; + uint8_t fkvx_res0[3]; + uint8_t fkvx_vni[3]; + uint8_t fkvx_res1; + } __rte_packed vxlan; + /* Payload or unknown inner-most protocol */ + uint8_t fk_l5_data[64]; +} __rte_packed; + +/* + * FK (flow key) template. + * fk_hdrset specifies a set of headers per layer of encapsulation. + * Currently FM supports two header sets: outer (0) and inner(1) + */ +#define FM_HDRSET_MAX 2 + +struct fm_key_template { + struct fm_header_set fk_hdrset[FM_HDRSET_MAX]; + uint32_t fk_flags; + uint16_t fk_packet_tag; + uint16_t fk_packet_size; + uint16_t fk_port_id; + uint32_t fk_wq_id; /* WQ index */ + uint64_t fk_wq_vnic; /* VNIC handle for WQ index */ +} __rte_packed; + +/* Action operation types */ +enum { + FMOP_NOP = 0, + /* End the action chain. */ + FMOP_END, + /* Drop packet and end the action chain. */ + FMOP_DROP, + /* Steer packet to an RQ. */ + FMOP_RQ_STEER, + /* + * Jump to an exact match table. + * arg1: exact match table handle + */ + FMOP_EXACT_MATCH, + /* Apply CQ-visible mark on packet. Mark is written to RSS HASH. */ + FMOP_MARK, + /* + * Apply CQ-visible mark on packet. Mark is written to a field in + * extended CQ. RSS HASH is preserved. + */ + FMOP_EXT_MARK, + /* + * Apply internal tag which can be matched in subsequent + * stages or hairpin. + */ + FMOP_TAG, + /* Hairpin packet from EG -> IG */ + FMOP_EG_HAIRPIN, + /* Hairpin packet from IG -> EG */ + FMOP_IG_HAIRPIN, + /* Encap with VXLAN and inner VLAN from metadata. */ + FMOP_ENCAP_IVLAN, + /* Encap, no inner VLAN. */ + FMOP_ENCAP_NOIVLAN, + /* Encap, add inner VLAN if present. */ + FMOP_ENCAP, + /* Set outer VLAN. */ + FMOP_SET_OVLAN, + /* Decap when vlan_strip is off */ + FMOP_DECAP_NOSTRIP, + /* Decap and strip VLAN */ + FMOP_DECAP_STRIP, + /* Remove outer VLAN */ + FMOP_POP_VLAN, + /* Set Egress port */ + FMOP_SET_EGPORT, + /* Steer to an RQ without entering EMIT state */ + FMOP_RQ_STEER_ONLY, + /* Set VLAN when replicating encapped packets */ + FMOP_SET_ENCAP_VLAN, + /* Enter EMIT state */ + FMOP_EMIT, + /* Enter MODIFY state */ + FMOP_MODIFY, + FMOP_OP_MAX, +}; + +/* + * Action operation. + * Complex actions are achieved by a series of "transform operations" + * We can have complex transform operations like "decap" or "vxlan + * encap" and also simple ops like insert this data, add PACKET_LEN to + * this address, etc. + */ +struct fm_action_op { + uint32_t fa_op; /* FMOP flags */ + + union { + struct { + uint8_t len1_offset; + uint8_t len1_delta; + uint8_t len2_offset; + uint8_t len2_delta; + uint16_t outer_vlan; + uint8_t template_offset; + uint8_t template_len; + } __rte_packed encap; + struct { + uint16_t rq_index; + uint16_t rq_count; + uint64_t vnic_handle; + } __rte_packed rq_steer; + struct { + uint16_t vlan; + } __rte_packed ovlan; + struct { + uint16_t vlan; + } __rte_packed set_encap_vlan; + struct { + uint16_t mark; + } __rte_packed mark; + struct { + uint32_t ext_mark; + } __rte_packed ext_mark; + struct { + uint8_t tag; + } __rte_packed tag; + struct { + uint64_t handle; + } __rte_packed exact; + struct { + uint32_t egport; + } __rte_packed set_egport; + } __rte_packed; +} __rte_packed; + +#define FM_ACTION_OP_MAX 64 +#define FM_ACTION_DATA_MAX 96 + +/* + * Action is a series of action operations applied to matched + * packet. FMA (flowman action). + */ +struct fm_action { + struct fm_action_op fma_action_ops[FM_ACTION_OP_MAX]; + uint8_t fma_data[FM_ACTION_DATA_MAX]; +} __rte_packed; + +/* Match entry flags. FMEF (flow match entry flag) */ +#define FMEF_COUNTER 0x0001 /* counter index is valid */ + +/* FEM (flow exact match) entry */ +struct fm_exact_match_entry { + struct fm_key_template fem_data; /* Match data. Mask is per table */ + uint32_t fem_flags; /* FMEF_xxx */ + uint64_t fem_action; /* Action handle */ + uint32_t fem_counter; /* Counter index */ +} __rte_packed; + +/* FTM (flow TCAM match) entry */ +struct fm_tcam_match_entry { + struct fm_key_template ftm_mask; /* Key mask */ + struct fm_key_template ftm_data; /* Match data */ + uint32_t ftm_flags; /* FMEF_xxx */ + uint32_t ftm_position; /* Entry position */ + uint64_t ftm_action; /* Action handle */ + uint32_t ftm_counter; /* Counter index */ +} __rte_packed; + +/* Match directions */ +enum { + FM_INGRESS, + FM_EGRESS, + FM_DIR_CNT +}; + +/* Last stage ID, independent of the number of stages in hardware */ +#define FM_STAGE_LAST 0xff + +/* Hash based exact match table. FET (flow exact match table) */ +struct fm_exact_match_table { + uint8_t fet_direction; /* FM_INGRESS or EGRESS*/ + uint8_t fet_stage; + uint8_t pad[2]; + uint32_t fet_max_entries; + uint64_t fet_dflt_action; + struct fm_key_template fet_key; +} __rte_packed; + +/* TCAM based match table. FTT (flow TCAM match table) */ +struct fm_tcam_match_table { + uint8_t ftt_direction; + uint8_t ftt_stage; + uint8_t pad[2]; + uint32_t ftt_max_entries; +} __rte_packed; + +struct fm_counter_counts { + uint64_t fcc_packets; + uint64_t fcc_bytes; +} __rte_packed; + +/* + * Return structure for FM_INFO_QUERY devcmd + */ +#define FM_VERSION 1 /* This header file is for version 1 */ + +struct fm_info { + uint64_t fm_op_mask; /* Bitmask of action supported ops */ + uint64_t fm_current_ts; /* Current VIC timestamp */ + uint64_t fm_clock_freq; /* Timestamp clock frequency */ + uint16_t fm_max_ops; /* Max ops in an action */ + uint8_t fm_stages; /* Number of match-action stages */ + uint8_t pad[5]; + uint32_t fm_counter_count; /* Number of allocated counters */ +} __rte_packed; + +#endif /* _VNIC_FLOWMAN_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/base/vnic_intr.c b/src/spdk/dpdk/drivers/net/enic/base/vnic_intr.c new file mode 100644 index 000000000..e3ef70954 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/vnic_intr.c @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#include "vnic_dev.h" +#include "vnic_intr.h" + +void vnic_intr_free(struct vnic_intr *intr) +{ + intr->ctrl = NULL; +} + +int vnic_intr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr, + unsigned int index) +{ + intr->index = index; + intr->vdev = vdev; + + intr->ctrl = vnic_dev_get_res(vdev, RES_TYPE_INTR_CTRL, index); + if (!intr->ctrl) { + pr_err("Failed to hook INTR[%d].ctrl resource\n", index); + return -EINVAL; + } + + return 0; +} + +void vnic_intr_init(struct vnic_intr *intr, uint32_t coalescing_timer, + unsigned int coalescing_type, unsigned int mask_on_assertion) +{ + vnic_intr_coalescing_timer_set(intr, coalescing_timer); + iowrite32(coalescing_type, &intr->ctrl->coalescing_type); + iowrite32(mask_on_assertion, &intr->ctrl->mask_on_assertion); + iowrite32(0, &intr->ctrl->int_credits); +} + +void vnic_intr_coalescing_timer_set(struct vnic_intr *intr, + uint32_t coalescing_timer) +{ + iowrite32(vnic_dev_intr_coal_timer_usec_to_hw(intr->vdev, + coalescing_timer), &intr->ctrl->coalescing_timer); +} + +void vnic_intr_clean(struct vnic_intr *intr) +{ + iowrite32(0, &intr->ctrl->int_credits); +} diff --git a/src/spdk/dpdk/drivers/net/enic/base/vnic_intr.h b/src/spdk/dpdk/drivers/net/enic/base/vnic_intr.h new file mode 100644 index 000000000..6282ae520 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/vnic_intr.h @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _VNIC_INTR_H_ +#define _VNIC_INTR_H_ + + +#include "vnic_dev.h" + +#define VNIC_INTR_TIMER_TYPE_ABS 0 +#define VNIC_INTR_TIMER_TYPE_QUIET 1 + +/* Interrupt control */ +struct vnic_intr_ctrl { + uint32_t coalescing_timer; /* 0x00 */ + uint32_t pad0; + uint32_t coalescing_value; /* 0x08 */ + uint32_t pad1; + uint32_t coalescing_type; /* 0x10 */ + uint32_t pad2; + uint32_t mask_on_assertion; /* 0x18 */ + uint32_t pad3; + uint32_t mask; /* 0x20 */ + uint32_t pad4; + uint32_t int_credits; /* 0x28 */ + uint32_t pad5; + uint32_t int_credit_return; /* 0x30 */ + uint32_t pad6; +}; + +struct vnic_intr { + unsigned int index; + struct vnic_dev *vdev; + struct vnic_intr_ctrl __iomem *ctrl; /* memory-mapped */ +}; + +static inline void vnic_intr_unmask(struct vnic_intr *intr) +{ + iowrite32(0, &intr->ctrl->mask); +} + +static inline void vnic_intr_mask(struct vnic_intr *intr) +{ + iowrite32(1, &intr->ctrl->mask); +} + +static inline int vnic_intr_masked(struct vnic_intr *intr) +{ + return ioread32(&intr->ctrl->mask); +} + +static inline void vnic_intr_return_credits(struct vnic_intr *intr, + unsigned int credits, int unmask, int reset_timer) +{ +#define VNIC_INTR_UNMASK_SHIFT 16 +#define VNIC_INTR_RESET_TIMER_SHIFT 17 + + uint32_t int_credit_return = (credits & 0xffff) | + (unmask ? (1 << VNIC_INTR_UNMASK_SHIFT) : 0) | + (reset_timer ? (1 << VNIC_INTR_RESET_TIMER_SHIFT) : 0); + + iowrite32(int_credit_return, &intr->ctrl->int_credit_return); +} + +static inline unsigned int vnic_intr_credits(struct vnic_intr *intr) +{ + return ioread32(&intr->ctrl->int_credits); +} + +static inline void vnic_intr_return_all_credits(struct vnic_intr *intr) +{ + unsigned int credits = vnic_intr_credits(intr); + int unmask = 1; + int reset_timer = 1; + + vnic_intr_return_credits(intr, credits, unmask, reset_timer); +} + +static inline uint32_t vnic_intr_legacy_pba(uint32_t __iomem *legacy_pba) +{ + /* read PBA without clearing */ + return ioread32(legacy_pba); +} + +void vnic_intr_free(struct vnic_intr *intr); +int vnic_intr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr, + unsigned int index); +void vnic_intr_init(struct vnic_intr *intr, uint32_t coalescing_timer, + unsigned int coalescing_type, unsigned int mask_on_assertion); +void vnic_intr_coalescing_timer_set(struct vnic_intr *intr, + uint32_t coalescing_timer); +void vnic_intr_clean(struct vnic_intr *intr); + +#endif /* _VNIC_INTR_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/base/vnic_nic.h b/src/spdk/dpdk/drivers/net/enic/base/vnic_nic.h new file mode 100644 index 000000000..6e4a83d24 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/vnic_nic.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _VNIC_NIC_H_ +#define _VNIC_NIC_H_ + +#define NIC_CFG_RSS_DEFAULT_CPU_MASK_FIELD 0xffUL +#define NIC_CFG_RSS_DEFAULT_CPU_SHIFT 0 +#define NIC_CFG_RSS_HASH_TYPE (0xffUL << 8) +#define NIC_CFG_RSS_HASH_TYPE_MASK_FIELD 0xffUL +#define NIC_CFG_RSS_HASH_TYPE_SHIFT 8 +#define NIC_CFG_RSS_HASH_BITS (7UL << 16) +#define NIC_CFG_RSS_HASH_BITS_MASK_FIELD 7UL +#define NIC_CFG_RSS_HASH_BITS_SHIFT 16 +#define NIC_CFG_RSS_BASE_CPU (7UL << 19) +#define NIC_CFG_RSS_BASE_CPU_MASK_FIELD 7UL +#define NIC_CFG_RSS_BASE_CPU_SHIFT 19 +#define NIC_CFG_RSS_ENABLE (1UL << 22) +#define NIC_CFG_RSS_ENABLE_MASK_FIELD 1UL +#define NIC_CFG_RSS_ENABLE_SHIFT 22 +#define NIC_CFG_TSO_IPID_SPLIT_EN (1UL << 23) +#define NIC_CFG_TSO_IPID_SPLIT_EN_MASK_FIELD 1UL +#define NIC_CFG_TSO_IPID_SPLIT_EN_SHIFT 23 +#define NIC_CFG_IG_VLAN_STRIP_EN (1UL << 24) +#define NIC_CFG_IG_VLAN_STRIP_EN_MASK_FIELD 1UL +#define NIC_CFG_IG_VLAN_STRIP_EN_SHIFT 24 + +#define NIC_CFG_RSS_HASH_TYPE_UDP_IPV4 (1 << 0) +#define NIC_CFG_RSS_HASH_TYPE_IPV4 (1 << 1) +#define NIC_CFG_RSS_HASH_TYPE_TCP_IPV4 (1 << 2) +#define NIC_CFG_RSS_HASH_TYPE_IPV6 (1 << 3) +#define NIC_CFG_RSS_HASH_TYPE_TCP_IPV6 (1 << 4) +#define NIC_CFG_RSS_HASH_TYPE_RSVD1 (1 << 5) +#define NIC_CFG_RSS_HASH_TYPE_RSVD2 (1 << 6) +#define NIC_CFG_RSS_HASH_TYPE_UDP_IPV6 (1 << 7) + +static inline void vnic_set_nic_cfg(uint32_t *nic_cfg, + uint8_t rss_default_cpu, uint8_t rss_hash_type, + uint8_t rss_hash_bits, uint8_t rss_base_cpu, + uint8_t rss_enable, uint8_t tso_ipid_split_en, + uint8_t ig_vlan_strip_en) +{ + *nic_cfg = (rss_default_cpu & NIC_CFG_RSS_DEFAULT_CPU_MASK_FIELD) | + ((rss_hash_type & NIC_CFG_RSS_HASH_TYPE_MASK_FIELD) + << NIC_CFG_RSS_HASH_TYPE_SHIFT) | + ((rss_hash_bits & NIC_CFG_RSS_HASH_BITS_MASK_FIELD) + << NIC_CFG_RSS_HASH_BITS_SHIFT) | + ((rss_base_cpu & NIC_CFG_RSS_BASE_CPU_MASK_FIELD) + << NIC_CFG_RSS_BASE_CPU_SHIFT) | + ((rss_enable & NIC_CFG_RSS_ENABLE_MASK_FIELD) + << NIC_CFG_RSS_ENABLE_SHIFT) | + ((tso_ipid_split_en & NIC_CFG_TSO_IPID_SPLIT_EN_MASK_FIELD) + << NIC_CFG_TSO_IPID_SPLIT_EN_SHIFT) | + ((ig_vlan_strip_en & NIC_CFG_IG_VLAN_STRIP_EN_MASK_FIELD) + << NIC_CFG_IG_VLAN_STRIP_EN_SHIFT); +} + +#endif /* _VNIC_NIC_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/base/vnic_resource.h b/src/spdk/dpdk/drivers/net/enic/base/vnic_resource.h new file mode 100644 index 000000000..870a4de6e --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/vnic_resource.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _VNIC_RESOURCE_H_ +#define _VNIC_RESOURCE_H_ + +#define VNIC_RES_MAGIC 0x766E6963L /* 'vnic' */ +#define VNIC_RES_VERSION 0x00000000L +#define MGMTVNIC_MAGIC 0x544d474dL /* 'MGMT' */ +#define MGMTVNIC_VERSION 0x00000000L + +/* The MAC address assigned to the CFG vNIC is fixed. */ +#define MGMTVNIC_MAC { 0x02, 0x00, 0x54, 0x4d, 0x47, 0x4d } + +/* vNIC resource types */ +enum vnic_res_type { + RES_TYPE_EOL, /* End-of-list */ + RES_TYPE_WQ, /* Work queues */ + RES_TYPE_RQ, /* Receive queues */ + RES_TYPE_CQ, /* Completion queues */ + RES_TYPE_MEM, /* Window to dev memory */ + RES_TYPE_NIC_CFG, /* Enet NIC config registers */ + RES_TYPE_RSS_KEY, /* Enet RSS secret key */ + RES_TYPE_RSS_CPU, /* Enet RSS indirection table */ + RES_TYPE_TX_STATS, /* Netblock Tx statistic regs */ + RES_TYPE_RX_STATS, /* Netblock Rx statistic regs */ + RES_TYPE_INTR_CTRL, /* Interrupt ctrl table */ + RES_TYPE_INTR_TABLE, /* MSI/MSI-X Interrupt table */ + RES_TYPE_INTR_PBA, /* MSI/MSI-X PBA table */ + RES_TYPE_INTR_PBA_LEGACY, /* Legacy intr status */ + RES_TYPE_DEBUG, /* Debug-only info */ + RES_TYPE_DEV, /* Device-specific region */ + RES_TYPE_DEVCMD, /* Device command region */ + RES_TYPE_PASS_THRU_PAGE, /* Pass-thru page */ + RES_TYPE_SUBVNIC, /* subvnic resource type */ + RES_TYPE_MQ_WQ, /* MQ Work queues */ + RES_TYPE_MQ_RQ, /* MQ Receive queues */ + RES_TYPE_MQ_CQ, /* MQ Completion queues */ + RES_TYPE_DEPRECATED1, /* Old version of devcmd 2 */ + RES_TYPE_DEVCMD2, /* Device control region */ + RES_TYPE_MAX, /* Count of resource types */ +}; + +struct vnic_resource_header { + uint32_t magic; + uint32_t version; +}; + +struct mgmt_barmap_hdr { + uint32_t magic; /* magic number */ + uint32_t version; /* header format version */ + uint16_t lif; /* loopback lif for mgmt frames */ + uint16_t pci_slot; /* installed pci slot */ + char serial[16]; /* card serial number */ +}; + +struct vnic_resource { + uint8_t type; + uint8_t bar; + uint8_t pad[2]; + uint32_t bar_offset; + uint32_t count; +}; + +#endif /* _VNIC_RESOURCE_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/base/vnic_rq.c b/src/spdk/dpdk/drivers/net/enic/base/vnic_rq.c new file mode 100644 index 000000000..1af96a941 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/vnic_rq.c @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#include <rte_memzone.h> +#include "vnic_dev.h" +#include "vnic_rq.h" + +void vnic_rq_free(struct vnic_rq *rq) +{ + struct vnic_dev *vdev; + + vdev = rq->vdev; + + vnic_dev_free_desc_ring(vdev, &rq->ring); + + rq->ctrl = NULL; +} + +int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index, + unsigned int desc_count, unsigned int desc_size) +{ + int rc; + char res_name[RTE_MEMZONE_NAMESIZE]; + static int instance; + + rq->index = index; + rq->vdev = vdev; + + rq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_RQ, index); + if (!rq->ctrl) { + pr_err("Failed to hook RQ[%u] resource\n", index); + return -EINVAL; + } + + vnic_rq_disable(rq); + + snprintf(res_name, sizeof(res_name), "%d-rq-%u", instance++, index); + rc = vnic_dev_alloc_desc_ring(vdev, &rq->ring, desc_count, desc_size, + rq->socket_id, res_name); + return rc; +} + +void vnic_rq_init_start(struct vnic_rq *rq, unsigned int cq_index, + unsigned int fetch_index, unsigned int posted_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset) +{ + uint64_t paddr; + unsigned int count = rq->ring.desc_count; + + paddr = (uint64_t)rq->ring.base_addr | VNIC_PADDR_TARGET; + writeq(paddr, &rq->ctrl->ring_base); + iowrite32(count, &rq->ctrl->ring_size); + iowrite32(cq_index, &rq->ctrl->cq_index); + iowrite32(error_interrupt_enable, &rq->ctrl->error_interrupt_enable); + iowrite32(error_interrupt_offset, &rq->ctrl->error_interrupt_offset); + iowrite32(0, &rq->ctrl->error_status); + iowrite32(fetch_index, &rq->ctrl->fetch_index); + iowrite32(posted_index, &rq->ctrl->posted_index); + if (rq->data_queue_enable) + iowrite32(((1 << 10) | rq->data_queue_idx), + &rq->ctrl->data_ring); + else + iowrite32(0, &rq->ctrl->data_ring); +} + +void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset) +{ + uint32_t fetch_index = 0; + + /* Use current fetch_index as the ring starting point */ + fetch_index = ioread32(&rq->ctrl->fetch_index); + + if (fetch_index == 0xFFFFFFFF) { /* check for hardware gone */ + /* Hardware surprise removal: reset fetch_index */ + fetch_index = 0; + } + + vnic_rq_init_start(rq, cq_index, + fetch_index, fetch_index, + error_interrupt_enable, + error_interrupt_offset); + rq->rxst_idx = 0; + rq->tot_pkts = 0; + rq->pkt_first_seg = NULL; + rq->pkt_last_seg = NULL; +} + +unsigned int vnic_rq_error_status(struct vnic_rq *rq) +{ + return ioread32(&rq->ctrl->error_status); +} + +void vnic_rq_enable(struct vnic_rq *rq) +{ + iowrite32(1, &rq->ctrl->enable); +} + +int vnic_rq_disable(struct vnic_rq *rq) +{ + unsigned int wait; + + iowrite32(0, &rq->ctrl->enable); + + /* Wait for HW to ACK disable request */ + for (wait = 0; wait < 1000; wait++) { + if (!(ioread32(&rq->ctrl->running))) + return 0; + usleep(10); + } + + pr_err("Failed to disable RQ[%d]\n", rq->index); + + return -ETIMEDOUT; +} + +void vnic_rq_clean(struct vnic_rq *rq, + void (*buf_clean)(struct rte_mbuf **buf)) +{ + struct rte_mbuf **buf; + uint32_t fetch_index, i; + unsigned int count = rq->ring.desc_count; + + buf = &rq->mbuf_ring[0]; + + for (i = 0; i < count; i++) { + (*buf_clean)(buf); + buf++; + } + rq->ring.desc_avail = count - 1; + rq->rx_nb_hold = 0; + + /* Use current fetch_index as the ring starting point */ + fetch_index = ioread32(&rq->ctrl->fetch_index); + + if (fetch_index == 0xFFFFFFFF) { /* check for hardware gone */ + /* Hardware surprise removal: reset fetch_index */ + fetch_index = 0; + } + + iowrite32(fetch_index, &rq->ctrl->posted_index); + + vnic_dev_clear_desc_ring(&rq->ring); +} diff --git a/src/spdk/dpdk/drivers/net/enic/base/vnic_rq.h b/src/spdk/dpdk/drivers/net/enic/base/vnic_rq.h new file mode 100644 index 000000000..cfe65015d --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/vnic_rq.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _VNIC_RQ_H_ +#define _VNIC_RQ_H_ + +#include <stdbool.h> + +#include "vnic_dev.h" +#include "vnic_cq.h" + +/* Receive queue control */ +struct vnic_rq_ctrl { + uint64_t ring_base; /* 0x00 */ + uint32_t ring_size; /* 0x08 */ + uint32_t pad0; + uint32_t posted_index; /* 0x10 */ + uint32_t pad1; + uint32_t cq_index; /* 0x18 */ + uint32_t pad2; + uint32_t enable; /* 0x20 */ + uint32_t pad3; + uint32_t running; /* 0x28 */ + uint32_t pad4; + uint32_t fetch_index; /* 0x30 */ + uint32_t pad5; + uint32_t error_interrupt_enable; /* 0x38 */ + uint32_t pad6; + uint32_t error_interrupt_offset; /* 0x40 */ + uint32_t pad7; + uint32_t error_status; /* 0x48 */ + uint32_t pad8; + uint32_t tcp_sn; /* 0x50 */ + uint32_t pad9; + uint32_t unused; /* 0x58 */ + uint32_t pad10; + uint32_t dca_select; /* 0x60 */ + uint32_t pad11; + uint32_t dca_value; /* 0x68 */ + uint32_t pad12; + uint32_t data_ring; /* 0x70 */ + uint32_t pad13; + uint32_t header_split; /* 0x78 */ + uint32_t pad14; +}; + +struct vnic_rq { + unsigned int index; + unsigned int posted_index; + struct vnic_dev *vdev; + struct vnic_rq_ctrl __iomem *ctrl; /* memory-mapped */ + struct vnic_dev_ring ring; + struct rte_mbuf **free_mbufs; /* reserve of free mbufs */ + int num_free_mbufs; + struct rte_mbuf **mbuf_ring; /* array of allocated mbufs */ + unsigned int mbuf_next_idx; /* next mb to consume */ + void *os_buf_head; + unsigned int pkts_outstanding; + uint16_t rx_nb_hold; + uint16_t rx_free_thresh; + unsigned int socket_id; + struct rte_mempool *mp; + uint16_t rxst_idx; + uint32_t tot_pkts; + uint16_t data_queue_idx; + uint8_t data_queue_enable; + uint8_t is_sop; + uint8_t in_use; + struct rte_mbuf *pkt_first_seg; + struct rte_mbuf *pkt_last_seg; + unsigned int max_mbufs_per_pkt; + uint16_t tot_nb_desc; + bool need_initial_post; +}; + +static inline unsigned int vnic_rq_desc_avail(struct vnic_rq *rq) +{ + /* how many does SW own? */ + return rq->ring.desc_avail; +} + +static inline unsigned int vnic_rq_desc_used(struct vnic_rq *rq) +{ + /* how many does HW own? */ + return rq->ring.desc_count - rq->ring.desc_avail - 1; +} + + + +enum desc_return_options { + VNIC_RQ_RETURN_DESC, + VNIC_RQ_DEFER_RETURN_DESC, +}; + +static inline int vnic_rq_fill(struct vnic_rq *rq, + int (*buf_fill)(struct vnic_rq *rq)) +{ + int err; + + while (vnic_rq_desc_avail(rq) > 0) { + + err = (*buf_fill)(rq); + if (err) + return err; + } + + return 0; +} + +static inline int vnic_rq_fill_count(struct vnic_rq *rq, + int (*buf_fill)(struct vnic_rq *rq), unsigned int count) +{ + int err; + + while ((vnic_rq_desc_avail(rq) > 0) && (count--)) { + + err = (*buf_fill)(rq); + if (err) + return err; + } + + return 0; +} + +void vnic_rq_free(struct vnic_rq *rq); +int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index, + unsigned int desc_count, unsigned int desc_size); +void vnic_rq_init_start(struct vnic_rq *rq, unsigned int cq_index, + unsigned int fetch_index, unsigned int posted_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset); +void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset); +void vnic_rq_error_out(struct vnic_rq *rq, unsigned int error); +unsigned int vnic_rq_error_status(struct vnic_rq *rq); +void vnic_rq_enable(struct vnic_rq *rq); +int vnic_rq_disable(struct vnic_rq *rq); +void vnic_rq_clean(struct vnic_rq *rq, + void (*buf_clean)(struct rte_mbuf **buf)); +#endif /* _VNIC_RQ_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/base/vnic_rss.h b/src/spdk/dpdk/drivers/net/enic/base/vnic_rss.h new file mode 100644 index 000000000..2b3c571f8 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/vnic_rss.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _VNIC_RSS_H_ +#define _VNIC_RSS_H_ + +/* RSS key array */ +union vnic_rss_key { + struct { + uint8_t b[10]; + uint8_t b_pad[6]; + } key[4]; + uint64_t raw[8]; +}; + +/* RSS cpu array */ +union vnic_rss_cpu { + struct { + uint8_t b[4]; + uint8_t b_pad[4]; + } cpu[32]; + uint64_t raw[32]; +}; + +#endif /* _VNIC_RSS_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/base/vnic_stats.h b/src/spdk/dpdk/drivers/net/enic/base/vnic_stats.h new file mode 100644 index 000000000..a2fb40f07 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/vnic_stats.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _VNIC_STATS_H_ +#define _VNIC_STATS_H_ + +/* Tx statistics */ +struct vnic_tx_stats { + uint64_t tx_frames_ok; + uint64_t tx_unicast_frames_ok; + uint64_t tx_multicast_frames_ok; + uint64_t tx_broadcast_frames_ok; + uint64_t tx_bytes_ok; + uint64_t tx_unicast_bytes_ok; + uint64_t tx_multicast_bytes_ok; + uint64_t tx_broadcast_bytes_ok; + uint64_t tx_drops; + uint64_t tx_errors; + uint64_t tx_tso; + uint64_t rsvd[16]; +}; + +/* Rx statistics */ +struct vnic_rx_stats { + uint64_t rx_frames_ok; + uint64_t rx_frames_total; + uint64_t rx_unicast_frames_ok; + uint64_t rx_multicast_frames_ok; + uint64_t rx_broadcast_frames_ok; + uint64_t rx_bytes_ok; + uint64_t rx_unicast_bytes_ok; + uint64_t rx_multicast_bytes_ok; + uint64_t rx_broadcast_bytes_ok; + uint64_t rx_drop; + uint64_t rx_no_bufs; + uint64_t rx_errors; + uint64_t rx_rss; + uint64_t rx_crc_errors; + uint64_t rx_frames_64; + uint64_t rx_frames_127; + uint64_t rx_frames_255; + uint64_t rx_frames_511; + uint64_t rx_frames_1023; + uint64_t rx_frames_1518; + uint64_t rx_frames_to_max; + uint64_t rsvd[16]; +}; + +struct vnic_stats { + struct vnic_tx_stats tx; + struct vnic_rx_stats rx; +}; + +#endif /* _VNIC_STATS_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/base/vnic_wq.c b/src/spdk/dpdk/drivers/net/enic/base/vnic_wq.c new file mode 100644 index 000000000..fc6dde5ae --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/vnic_wq.c @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#include "vnic_dev.h" +#include "vnic_wq.h" + +static inline +int vnic_wq_get_ctrl(struct vnic_dev *vdev, struct vnic_wq *wq, + unsigned int index, enum vnic_res_type res_type) +{ + wq->ctrl = vnic_dev_get_res(vdev, res_type, index); + if (!wq->ctrl) + return -EINVAL; + return 0; +} + +static inline +int vnic_wq_alloc_ring(struct vnic_dev *vdev, struct vnic_wq *wq, + unsigned int desc_count, unsigned int desc_size) +{ + char res_name[RTE_MEMZONE_NAMESIZE]; + static int instance; + + snprintf(res_name, sizeof(res_name), "%d-wq-%u", instance++, wq->index); + return vnic_dev_alloc_desc_ring(vdev, &wq->ring, desc_count, desc_size, + wq->socket_id, res_name); +} + +static int vnic_wq_alloc_bufs(struct vnic_wq *wq) +{ + unsigned int count = wq->ring.desc_count; + /* Allocate the mbuf ring */ + wq->bufs = (struct rte_mbuf **)rte_zmalloc_socket("wq->bufs", + sizeof(struct rte_mbuf *) * count, + RTE_CACHE_LINE_SIZE, wq->socket_id); + wq->head_idx = 0; + wq->tail_idx = 0; + if (wq->bufs == NULL) + return -ENOMEM; + return 0; +} + +void vnic_wq_free(struct vnic_wq *wq) +{ + struct vnic_dev *vdev; + + vdev = wq->vdev; + + vnic_dev_free_desc_ring(vdev, &wq->ring); + + rte_free(wq->bufs); + wq->ctrl = NULL; +} + + +int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index, + unsigned int desc_count, unsigned int desc_size) +{ + int err; + + wq->index = index; + wq->vdev = vdev; + + err = vnic_wq_get_ctrl(vdev, wq, index, RES_TYPE_WQ); + if (err) { + pr_err("Failed to hook WQ[%d] resource, err %d\n", index, err); + return err; + } + + vnic_wq_disable(wq); + + err = vnic_wq_alloc_ring(vdev, wq, desc_count, desc_size); + if (err) + return err; + + err = vnic_wq_alloc_bufs(wq); + if (err) { + vnic_wq_free(wq); + return err; + } + + return 0; +} + +void vnic_wq_init_start(struct vnic_wq *wq, unsigned int cq_index, + unsigned int fetch_index, unsigned int posted_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset) +{ + uint64_t paddr; + unsigned int count = wq->ring.desc_count; + + paddr = (uint64_t)wq->ring.base_addr | VNIC_PADDR_TARGET; + writeq(paddr, &wq->ctrl->ring_base); + iowrite32(count, &wq->ctrl->ring_size); + iowrite32(fetch_index, &wq->ctrl->fetch_index); + iowrite32(posted_index, &wq->ctrl->posted_index); + iowrite32(cq_index, &wq->ctrl->cq_index); + iowrite32(error_interrupt_enable, &wq->ctrl->error_interrupt_enable); + iowrite32(error_interrupt_offset, &wq->ctrl->error_interrupt_offset); + iowrite32(0, &wq->ctrl->error_status); + + wq->head_idx = fetch_index; + wq->tail_idx = wq->head_idx; +} + +void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset) +{ + vnic_wq_init_start(wq, cq_index, 0, 0, + error_interrupt_enable, + error_interrupt_offset); + wq->cq_pend = 0; + wq->last_completed_index = 0; +} + +unsigned int vnic_wq_error_status(struct vnic_wq *wq) +{ + return ioread32(&wq->ctrl->error_status); +} + +void vnic_wq_enable(struct vnic_wq *wq) +{ + iowrite32(1, &wq->ctrl->enable); +} + +int vnic_wq_disable(struct vnic_wq *wq) +{ + unsigned int wait; + + iowrite32(0, &wq->ctrl->enable); + + /* Wait for HW to ACK disable request */ + for (wait = 0; wait < 1000; wait++) { + if (!(ioread32(&wq->ctrl->running))) + return 0; + usleep(10); + } + + pr_err("Failed to disable WQ[%d]\n", wq->index); + + return -ETIMEDOUT; +} + +void vnic_wq_clean(struct vnic_wq *wq, + void (*buf_clean)(struct rte_mbuf **buf)) +{ + struct rte_mbuf **buf; + unsigned int to_clean = wq->tail_idx; + + buf = &wq->bufs[to_clean]; + + while (vnic_wq_desc_used(wq) > 0) { + + (*buf_clean)(buf); + to_clean = buf_idx_incr(wq->ring.desc_count, to_clean); + + buf = &wq->bufs[to_clean]; + wq->ring.desc_avail++; + } + + wq->head_idx = 0; + wq->tail_idx = 0; + wq->last_completed_index = 0; + *((uint32_t *)wq->cqmsg_rz->addr) = 0; + + iowrite32(0, &wq->ctrl->fetch_index); + iowrite32(0, &wq->ctrl->posted_index); + iowrite32(0, &wq->ctrl->error_status); + + vnic_dev_clear_desc_ring(&wq->ring); +} diff --git a/src/spdk/dpdk/drivers/net/enic/base/vnic_wq.h b/src/spdk/dpdk/drivers/net/enic/base/vnic_wq.h new file mode 100644 index 000000000..789a50a64 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/vnic_wq.h @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _VNIC_WQ_H_ +#define _VNIC_WQ_H_ + + +#include "vnic_dev.h" +#include "vnic_cq.h" +#include <rte_memzone.h> + +/* Work queue control */ +struct vnic_wq_ctrl { + uint64_t ring_base; /* 0x00 */ + uint32_t ring_size; /* 0x08 */ + uint32_t pad0; + uint32_t posted_index; /* 0x10 */ + uint32_t pad1; + uint32_t cq_index; /* 0x18 */ + uint32_t pad2; + uint32_t enable; /* 0x20 */ + uint32_t pad3; + uint32_t running; /* 0x28 */ + uint32_t pad4; + uint32_t fetch_index; /* 0x30 */ + uint32_t pad5; + uint32_t dca_value; /* 0x38 */ + uint32_t pad6; + uint32_t error_interrupt_enable; /* 0x40 */ + uint32_t pad7; + uint32_t error_interrupt_offset; /* 0x48 */ + uint32_t pad8; + uint32_t error_status; /* 0x50 */ + uint32_t pad9; +}; + +struct vnic_wq { + unsigned int index; + uint64_t tx_offload_notsup_mask; + struct vnic_dev *vdev; + struct vnic_wq_ctrl __iomem *ctrl; /* memory-mapped */ + struct vnic_dev_ring ring; + struct rte_mbuf **bufs; + unsigned int head_idx; + unsigned int cq_pend; + unsigned int tail_idx; + unsigned int socket_id; + const struct rte_memzone *cqmsg_rz; + uint16_t last_completed_index; + uint64_t offloads; +}; + +static inline unsigned int vnic_wq_desc_avail(struct vnic_wq *wq) +{ + /* how many does SW own? */ + return wq->ring.desc_avail; +} + +static inline unsigned int vnic_wq_desc_used(struct vnic_wq *wq) +{ + /* how many does HW own? */ + return wq->ring.desc_count - wq->ring.desc_avail - 1; +} + +#define PI_LOG2_CACHE_LINE_SIZE 5 +#define PI_INDEX_BITS 12 +#define PI_INDEX_MASK ((1U << PI_INDEX_BITS) - 1) +#define PI_PREFETCH_LEN_MASK ((1U << PI_LOG2_CACHE_LINE_SIZE) - 1) +#define PI_PREFETCH_LEN_OFF 16 +#define PI_PREFETCH_ADDR_BITS 43 +#define PI_PREFETCH_ADDR_MASK ((1ULL << PI_PREFETCH_ADDR_BITS) - 1) +#define PI_PREFETCH_ADDR_OFF 21 + +/** How many cache lines are touched by buffer (addr, len). */ +static inline unsigned int num_cache_lines_touched(dma_addr_t addr, + unsigned int len) +{ + const unsigned long mask = PI_PREFETCH_LEN_MASK; + const unsigned long laddr = (unsigned long)addr; + unsigned long lines, equiv_len; + /* A. If addr is aligned, our solution is just to round up len to the + next boundary. + + e.g. addr = 0, len = 48 + +--------------------+ + |XXXXXXXXXXXXXXXXXXXX| 32-byte cacheline a + +--------------------+ + |XXXXXXXXXX | cacheline b + +--------------------+ + + B. If addr is not aligned, however, we may use an extra + cacheline. e.g. addr = 12, len = 22 + + +--------------------+ + | XXXXXXXXXXXXX| + +--------------------+ + |XX | + +--------------------+ + + Our solution is to make the problem equivalent to case A + above by adding the empty space in the first cacheline to the length: + unsigned long len; + + +--------------------+ + |eeeeeeeXXXXXXXXXXXXX| "e" is empty space, which we add to len + +--------------------+ + |XX | + +--------------------+ + + */ + equiv_len = len + (laddr & mask); + + /* Now we can just round up this len to the next 32-byte boundary. */ + lines = (equiv_len + mask) & (~mask); + + /* Scale bytes -> cachelines. */ + return lines >> PI_LOG2_CACHE_LINE_SIZE; +} + +static inline uint64_t vnic_cached_posted_index(dma_addr_t addr, + unsigned int len, + unsigned int index) +{ + unsigned int num_cache_lines = num_cache_lines_touched(addr, len); + /* Wish we could avoid a branch here. We could have separate + * vnic_wq_post() and vinc_wq_post_inline(), the latter + * only supporting < 1k (2^5 * 2^5) sends, I suppose. This would + * eliminate the if (eop) branch as well. + */ + if (num_cache_lines > PI_PREFETCH_LEN_MASK) + num_cache_lines = 0; + return (index & PI_INDEX_MASK) | + ((num_cache_lines & PI_PREFETCH_LEN_MASK) << PI_PREFETCH_LEN_OFF) | + (((addr >> PI_LOG2_CACHE_LINE_SIZE) & + PI_PREFETCH_ADDR_MASK) << PI_PREFETCH_ADDR_OFF); +} + +static inline uint32_t +buf_idx_incr(uint32_t n_descriptors, uint32_t idx) +{ + idx++; + if (unlikely(idx == n_descriptors)) + idx = 0; + return idx; +} + +void vnic_wq_free(struct vnic_wq *wq); +int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index, + unsigned int desc_count, unsigned int desc_size); +void vnic_wq_init_start(struct vnic_wq *wq, unsigned int cq_index, + unsigned int fetch_index, unsigned int posted_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset); +void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset); +void vnic_wq_error_out(struct vnic_wq *wq, unsigned int error); +unsigned int vnic_wq_error_status(struct vnic_wq *wq); +void vnic_wq_enable(struct vnic_wq *wq); +int vnic_wq_disable(struct vnic_wq *wq); +void vnic_wq_clean(struct vnic_wq *wq, + void (*buf_clean)(struct rte_mbuf **buf)); +#endif /* _VNIC_WQ_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/base/wq_enet_desc.h b/src/spdk/dpdk/drivers/net/enic/base/wq_enet_desc.h new file mode 100644 index 000000000..e1ad18798 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/base/wq_enet_desc.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _WQ_ENET_DESC_H_ +#define _WQ_ENET_DESC_H_ + +#include <rte_byteorder.h> + +/* Ethernet work queue descriptor: 16B */ +struct wq_enet_desc { + uint64_t address; + uint16_t length; + uint16_t mss_loopback; + uint16_t header_length_flags; + uint16_t vlan_tag; +}; + +#define WQ_ENET_ADDR_BITS 64 +#define WQ_ENET_LEN_BITS 14 +#define WQ_ENET_LEN_MASK ((1 << WQ_ENET_LEN_BITS) - 1) +#define WQ_ENET_MSS_BITS 14 +#define WQ_ENET_MSS_MASK ((1 << WQ_ENET_MSS_BITS) - 1) +#define WQ_ENET_MSS_SHIFT 2 +#define WQ_ENET_LOOPBACK_SHIFT 1 +#define WQ_ENET_HDRLEN_BITS 10 +#define WQ_ENET_HDRLEN_MASK ((1 << WQ_ENET_HDRLEN_BITS) - 1) +#define WQ_ENET_FLAGS_OM_BITS 2 +#define WQ_ENET_FLAGS_OM_MASK ((1 << WQ_ENET_FLAGS_OM_BITS) - 1) +#define WQ_ENET_FLAGS_EOP_SHIFT 12 +#define WQ_ENET_FLAGS_CQ_ENTRY_SHIFT 13 +#define WQ_ENET_FLAGS_FCOE_ENCAP_SHIFT 14 +#define WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT 15 + +#define WQ_ENET_OFFLOAD_MODE_CSUM 0 +#define WQ_ENET_OFFLOAD_MODE_RESERVED 1 +#define WQ_ENET_OFFLOAD_MODE_CSUM_L4 2 +#define WQ_ENET_OFFLOAD_MODE_TSO 3 + +static inline void wq_enet_desc_enc(struct wq_enet_desc *desc, + uint64_t address, uint16_t length, uint16_t mss, uint16_t header_length, + uint8_t offload_mode, uint8_t eop, uint8_t cq_entry, uint8_t fcoe_encap, + uint8_t vlan_tag_insert, uint16_t vlan_tag, uint8_t loopback) +{ + desc->address = rte_cpu_to_le_64(address); + desc->length = rte_cpu_to_le_16(length & WQ_ENET_LEN_MASK); + desc->mss_loopback = rte_cpu_to_le_16((mss & WQ_ENET_MSS_MASK) << + WQ_ENET_MSS_SHIFT | (loopback & 1) << WQ_ENET_LOOPBACK_SHIFT); + desc->header_length_flags = rte_cpu_to_le_16 + ((header_length & WQ_ENET_HDRLEN_MASK) | + (offload_mode & WQ_ENET_FLAGS_OM_MASK) << WQ_ENET_HDRLEN_BITS | + (eop & 1) << WQ_ENET_FLAGS_EOP_SHIFT | + (cq_entry & 1) << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT | + (fcoe_encap & 1) << WQ_ENET_FLAGS_FCOE_ENCAP_SHIFT | + (vlan_tag_insert & 1) << WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT); + desc->vlan_tag = rte_cpu_to_le_16(vlan_tag); +} + +static inline void wq_enet_desc_dec(struct wq_enet_desc *desc, + uint64_t *address, uint16_t *length, uint16_t *mss, + uint16_t *header_length, uint8_t *offload_mode, uint8_t *eop, + uint8_t *cq_entry, uint8_t *fcoe_encap, uint8_t *vlan_tag_insert, + uint16_t *vlan_tag, uint8_t *loopback) +{ + *address = rte_le_to_cpu_64(desc->address); + *length = rte_le_to_cpu_16(desc->length) & WQ_ENET_LEN_MASK; + *mss = (rte_le_to_cpu_16(desc->mss_loopback) >> WQ_ENET_MSS_SHIFT) & + WQ_ENET_MSS_MASK; + *loopback = (uint8_t)((rte_le_to_cpu_16(desc->mss_loopback) >> + WQ_ENET_LOOPBACK_SHIFT) & 1); + *header_length = rte_le_to_cpu_16(desc->header_length_flags) & + WQ_ENET_HDRLEN_MASK; + *offload_mode = + (uint8_t)((rte_le_to_cpu_16(desc->header_length_flags) >> + WQ_ENET_HDRLEN_BITS) & WQ_ENET_FLAGS_OM_MASK); + *eop = (uint8_t)((rte_le_to_cpu_16(desc->header_length_flags) >> + WQ_ENET_FLAGS_EOP_SHIFT) & 1); + *cq_entry = (uint8_t)((rte_le_to_cpu_16(desc->header_length_flags) >> + WQ_ENET_FLAGS_CQ_ENTRY_SHIFT) & 1); + *fcoe_encap = (uint8_t)((rte_le_to_cpu_16(desc->header_length_flags) >> + WQ_ENET_FLAGS_FCOE_ENCAP_SHIFT) & 1); + *vlan_tag_insert = + (uint8_t)((rte_le_to_cpu_16(desc->header_length_flags) >> + WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT) & 1); + *vlan_tag = rte_le_to_cpu_16(desc->vlan_tag); +} + +#endif /* _WQ_ENET_DESC_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/enic.h b/src/spdk/dpdk/drivers/net/enic/enic.h new file mode 100644 index 000000000..a95e51eea --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/enic.h @@ -0,0 +1,368 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _ENIC_H_ +#define _ENIC_H_ + +#include <rte_vxlan.h> +#include <rte_ether.h> +#include "vnic_enet.h" +#include "vnic_dev.h" +#include "vnic_flowman.h" +#include "vnic_wq.h" +#include "vnic_rq.h" +#include "vnic_cq.h" +#include "vnic_intr.h" +#include "vnic_stats.h" +#include "vnic_nic.h" +#include "vnic_rss.h" +#include "enic_res.h" +#include "cq_enet_desc.h" +#include <stdbool.h> +#include <sys/queue.h> +#include <rte_spinlock.h> + +#define DRV_NAME "enic_pmd" +#define DRV_DESCRIPTION "Cisco VIC Ethernet NIC Poll-mode Driver" +#define DRV_COPYRIGHT "Copyright 2008-2015 Cisco Systems, Inc" + +#define VLAN_ETH_HLEN 18 + +#define ENICPMD_SETTING(enic, f) ((enic->config.flags & VENETF_##f) ? 1 : 0) + +#define ENICPMD_BDF_LENGTH 13 /* 0000:00:00.0'\0' */ +#define ENIC_CALC_IP_CKSUM 1 +#define ENIC_CALC_TCP_UDP_CKSUM 2 +#define ENIC_MAX_MTU 9000 +#define ENIC_PAGE_SIZE 4096 +#define PAGE_ROUND_UP(x) \ + ((((unsigned long)(x)) + ENIC_PAGE_SIZE-1) & (~(ENIC_PAGE_SIZE-1))) + +#define ENICPMD_VFIO_PATH "/dev/vfio/vfio" +/*#define ENIC_DESC_COUNT_MAKE_ODD (x) do{if ((~(x)) & 1) { (x)--; } }while(0)*/ + +#define PCI_DEVICE_ID_CISCO_VIC_ENET 0x0043 /* ethernet vnic */ +#define PCI_DEVICE_ID_CISCO_VIC_ENET_VF 0x0071 /* enet SRIOV VF */ +/* enet SRIOV Standalone vNic VF */ +#define PCI_DEVICE_ID_CISCO_VIC_ENET_SN 0x02B7 + +/* Special Filter id for non-specific packet flagging. Don't change value */ +#define ENIC_MAGIC_FILTER_ID 0xffff + +#define ENICPMD_FDIR_MAX 64 + +/* + * Interrupt 0: LSC and errors + * Interrupt 1: rx queue 0 + * Interrupt 2: rx queue 1 + * ... + */ +#define ENICPMD_LSC_INTR_OFFSET 0 +#define ENICPMD_RXQ_INTR_OFFSET 1 + +struct enic_fdir_node { + struct rte_eth_fdir_filter filter; + uint16_t fltr_id; + uint16_t rq_index; +}; + +struct enic_fdir { + struct rte_eth_fdir_stats stats; + struct rte_hash *hash; + struct enic_fdir_node *nodes[ENICPMD_FDIR_MAX]; + uint32_t modes; + uint32_t types_mask; + void (*copy_fltr_fn)(struct filter_v2 *filt, + const struct rte_eth_fdir_input *input, + const struct rte_eth_fdir_masks *masks); +}; + +struct enic_soft_stats { + rte_atomic64_t rx_nombuf; + rte_atomic64_t rx_packet_errors; + rte_atomic64_t tx_oversized; +}; + +struct enic_memzone_entry { + const struct rte_memzone *rz; + LIST_ENTRY(enic_memzone_entry) entries; +}; + +/* Defined in enic_fm_flow.c */ +struct enic_flowman; +struct enic_fm_flow; + +struct rte_flow { + LIST_ENTRY(rte_flow) next; + /* Data for filter API based flow (enic_flow.c) */ + uint16_t enic_filter_id; + struct filter_v2 enic_filter; + /* Data for flow manager based flow (enic_fm_flow.c) */ + struct enic_fm_flow *fm; +}; + +/* Per-instance private data structure */ +struct enic { + struct enic *next; + struct rte_pci_device *pdev; + struct vnic_enet_config config; + struct vnic_dev_bar bar0; + struct vnic_dev *vdev; + + /* + * mbuf_initializer contains 64 bits of mbuf rearm_data, used by + * the avx2 handler at this time. + */ + uint64_t mbuf_initializer; + unsigned int port_id; + bool overlay_offload; + struct rte_eth_dev *rte_dev; + struct rte_eth_dev_data *dev_data; + struct enic_fdir fdir; + char bdf_name[ENICPMD_BDF_LENGTH]; + int dev_fd; + int iommu_group_fd; + int iommu_groupid; + int eventfd; + uint8_t mac_addr[RTE_ETHER_ADDR_LEN]; + pthread_t err_intr_thread; + int promisc; + int allmulti; + uint8_t ig_vlan_strip_en; + int link_status; + uint8_t hw_ip_checksum; + uint16_t max_mtu; + uint8_t adv_filters; + uint32_t flow_filter_mode; + uint8_t filter_actions; /* HW supported actions */ + bool vxlan; + bool disable_overlay; /* devargs disable_overlay=1 */ + uint8_t enable_avx2_rx; /* devargs enable-avx2-rx=1 */ + uint8_t geneve_opt_avail; /* Geneve with options offload available */ + uint8_t geneve_opt_enabled; /* Geneve with options offload enabled */ + uint8_t geneve_opt_request; /* devargs geneve-opt=1 */ + bool nic_cfg_chk; /* NIC_CFG_CHK available */ + bool udp_rss_weak; /* Bodega style UDP RSS */ + uint8_t ig_vlan_rewrite_mode; /* devargs ig-vlan-rewrite */ + uint16_t vxlan_port; /* current vxlan port pushed to NIC */ + int use_simple_tx_handler; + + unsigned int flags; + unsigned int priv_flags; + + /* work queue (len = conf_wq_count) */ + struct vnic_wq *wq; + unsigned int wq_count; /* equals eth_dev nb_tx_queues */ + + /* receive queue (len = conf_rq_count) */ + struct vnic_rq *rq; + unsigned int rq_count; /* equals eth_dev nb_rx_queues */ + + /* completion queue (len = conf_cq_count) */ + struct vnic_cq *cq; + unsigned int cq_count; /* equals rq_count + wq_count */ + + /* interrupt vectors (len = conf_intr_count) */ + struct vnic_intr *intr; + unsigned int intr_count; /* equals enabled interrupts (lsc + rxqs) */ + + /* software counters */ + struct enic_soft_stats soft_stats; + + /* configured resources on vic */ + unsigned int conf_rq_count; + unsigned int conf_wq_count; + unsigned int conf_cq_count; + unsigned int conf_intr_count; + + /* linked list storing memory allocations */ + LIST_HEAD(enic_memzone_list, enic_memzone_entry) memzone_list; + rte_spinlock_t memzone_list_lock; + rte_spinlock_t mtu_lock; + + LIST_HEAD(enic_flows, rte_flow) flows; + + /* RSS */ + uint16_t reta_size; + uint8_t hash_key_size; + uint64_t flow_type_rss_offloads; /* 0 indicates RSS not supported */ + /* + * Keep a copy of current RSS config for queries, as we cannot retrieve + * it from the NIC. + */ + uint8_t rss_hash_type; /* NIC_CFG_RSS_HASH_TYPE flags */ + uint8_t rss_enable; + uint64_t rss_hf; /* ETH_RSS flags */ + union vnic_rss_key rss_key; + union vnic_rss_cpu rss_cpu; + + uint64_t rx_offload_capa; /* DEV_RX_OFFLOAD flags */ + uint64_t tx_offload_capa; /* DEV_TX_OFFLOAD flags */ + uint64_t tx_queue_offload_capa; /* DEV_TX_OFFLOAD flags */ + uint64_t tx_offload_mask; /* PKT_TX flags accepted */ + + /* Multicast MAC addresses added to the NIC */ + uint32_t mc_count; + struct rte_ether_addr mc_addrs[ENIC_MULTICAST_PERFECT_FILTERS]; + + /* Flow manager API */ + struct enic_flowman *fm; +}; + +/* Compute ethdev's max packet size from MTU */ +static inline uint32_t enic_mtu_to_max_rx_pktlen(uint32_t mtu) +{ + /* ethdev max size includes eth whereas NIC MTU does not */ + return mtu + RTE_ETHER_HDR_LEN; +} + +/* Get the CQ index from a Start of Packet(SOP) RQ index */ +static inline unsigned int enic_sop_rq_idx_to_cq_idx(unsigned int sop_idx) +{ + return sop_idx; +} + +/* Get the RTE RQ index from a Start of Packet(SOP) RQ index */ +static inline unsigned int enic_sop_rq_idx_to_rte_idx(unsigned int sop_idx) +{ + return sop_idx; +} + +/* Get the Start of Packet(SOP) RQ index from a RTE RQ index */ +static inline unsigned int enic_rte_rq_idx_to_sop_idx(unsigned int rte_idx) +{ + return rte_idx; +} + +/* Get the Data RQ index from a RTE RQ index */ +static inline unsigned int enic_rte_rq_idx_to_data_idx(unsigned int rte_idx, + struct enic *enic) +{ + return enic->rq_count + rte_idx; +} + +static inline unsigned int enic_vnic_rq_count(struct enic *enic) +{ + return enic->rq_count * 2; +} + +static inline unsigned int enic_cq_rq(__rte_unused struct enic *enic, unsigned int rq) +{ + /* Scatter rx uses two receive queues together with one + * completion queue, so the completion queue number is no + * longer the same as the rq number. + */ + return rq; +} + +static inline unsigned int enic_cq_wq(struct enic *enic, unsigned int wq) +{ + return enic->rq_count + wq; +} + +static inline struct enic *pmd_priv(struct rte_eth_dev *eth_dev) +{ + return eth_dev->data->dev_private; +} + +static inline uint32_t +enic_ring_add(uint32_t n_descriptors, uint32_t i0, uint32_t i1) +{ + uint32_t d = i0 + i1; + d -= (d >= n_descriptors) ? n_descriptors : 0; + return d; +} + +static inline uint32_t +enic_ring_sub(uint32_t n_descriptors, uint32_t i0, uint32_t i1) +{ + int32_t d = i1 - i0; + return (uint32_t)((d < 0) ? ((int32_t)n_descriptors + d) : d); +} + +static inline uint32_t +enic_ring_incr(uint32_t n_descriptors, uint32_t idx) +{ + idx++; + if (unlikely(idx == n_descriptors)) + idx = 0; + return idx; +} + +int dev_is_enic(struct rte_eth_dev *dev); +void enic_fdir_stats_get(struct enic *enic, + struct rte_eth_fdir_stats *stats); +int enic_fdir_add_fltr(struct enic *enic, + struct rte_eth_fdir_filter *params); +int enic_fdir_del_fltr(struct enic *enic, + struct rte_eth_fdir_filter *params); +void enic_free_wq(void *txq); +int enic_alloc_intr_resources(struct enic *enic); +int enic_setup_finish(struct enic *enic); +int enic_alloc_wq(struct enic *enic, uint16_t queue_idx, + unsigned int socket_id, uint16_t nb_desc); +void enic_start_wq(struct enic *enic, uint16_t queue_idx); +int enic_stop_wq(struct enic *enic, uint16_t queue_idx); +void enic_start_rq(struct enic *enic, uint16_t queue_idx); +int enic_stop_rq(struct enic *enic, uint16_t queue_idx); +void enic_free_rq(void *rxq); +int enic_alloc_rq(struct enic *enic, uint16_t queue_idx, + unsigned int socket_id, struct rte_mempool *mp, + uint16_t nb_desc, uint16_t free_thresh); +int enic_set_vnic_res(struct enic *enic); +int enic_init_rss_nic_cfg(struct enic *enic); +int enic_set_rss_conf(struct enic *enic, + struct rte_eth_rss_conf *rss_conf); +int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu); +int enic_set_vlan_strip(struct enic *enic); +int enic_enable(struct enic *enic); +int enic_disable(struct enic *enic); +void enic_remove(struct enic *enic); +int enic_get_link_status(struct enic *enic); +int enic_dev_stats_get(struct enic *enic, + struct rte_eth_stats *r_stats); +int enic_dev_stats_clear(struct enic *enic); +int enic_add_packet_filter(struct enic *enic); +int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr); +int enic_del_mac_address(struct enic *enic, int mac_index); +unsigned int enic_cleanup_wq(struct enic *enic, struct vnic_wq *wq); +void enic_send_pkt(struct enic *enic, struct vnic_wq *wq, + struct rte_mbuf *tx_pkt, unsigned short len, + uint8_t sop, uint8_t eop, uint8_t cq_entry, + uint16_t ol_flags, uint16_t vlan_tag); + +void enic_post_wq_index(struct vnic_wq *wq); +int enic_probe(struct enic *enic); +int enic_clsf_init(struct enic *enic); +void enic_clsf_destroy(struct enic *enic); +int enic_fm_init(struct enic *enic); +void enic_fm_destroy(struct enic *enic); +void *enic_alloc_consistent(void *priv, size_t size, dma_addr_t *dma_handle, + uint8_t *name); +void enic_free_consistent(void *priv, size_t size, void *vaddr, + dma_addr_t dma_handle); +uint16_t enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); +uint16_t enic_noscatter_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); +uint16_t enic_dummy_recv_pkts(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); +uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +uint16_t enic_simple_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +uint16_t enic_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +int enic_set_mtu(struct enic *enic, uint16_t new_mtu); +int enic_link_update(struct rte_eth_dev *eth_dev); +bool enic_use_vector_rx_handler(struct rte_eth_dev *eth_dev); +void enic_pick_rx_handler(struct rte_eth_dev *eth_dev); +void enic_pick_tx_handler(struct rte_eth_dev *eth_dev); +void enic_fdir_info(struct enic *enic); +void enic_fdir_info_get(struct enic *enic, struct rte_eth_fdir_info *stats); +extern const struct rte_flow_ops enic_flow_ops; +extern const struct rte_flow_ops enic_fm_flow_ops; +#endif /* _ENIC_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/enic_clsf.c b/src/spdk/dpdk/drivers/net/enic/enic_clsf.c new file mode 100644 index 000000000..e206123ba --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/enic_clsf.c @@ -0,0 +1,502 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#include <rte_ethdev_driver.h> +#include <rte_malloc.h> +#include <rte_hash.h> +#include <rte_byteorder.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_udp.h> +#include <rte_sctp.h> + +#include "enic_compat.h" +#include "enic.h" +#include "wq_enet_desc.h" +#include "rq_enet_desc.h" +#include "cq_enet_desc.h" +#include "vnic_enet.h" +#include "vnic_dev.h" +#include "vnic_wq.h" +#include "vnic_rq.h" +#include "vnic_cq.h" +#include "vnic_intr.h" +#include "vnic_nic.h" + +#ifdef RTE_ARCH_X86 +#include <rte_hash_crc.h> +#define DEFAULT_HASH_FUNC rte_hash_crc +#else +#include <rte_jhash.h> +#define DEFAULT_HASH_FUNC rte_jhash +#endif + +#define ENICPMD_CLSF_HASH_ENTRIES ENICPMD_FDIR_MAX + +static void copy_fltr_v1(struct filter_v2 *fltr, + const struct rte_eth_fdir_input *input, + const struct rte_eth_fdir_masks *masks); +static void copy_fltr_v2(struct filter_v2 *fltr, + const struct rte_eth_fdir_input *input, + const struct rte_eth_fdir_masks *masks); + +void enic_fdir_stats_get(struct enic *enic, struct rte_eth_fdir_stats *stats) +{ + *stats = enic->fdir.stats; +} + +void enic_fdir_info_get(struct enic *enic, struct rte_eth_fdir_info *info) +{ + info->mode = (enum rte_fdir_mode)enic->fdir.modes; + info->flow_types_mask[0] = enic->fdir.types_mask; +} + +void enic_fdir_info(struct enic *enic) +{ + enic->fdir.modes = (uint32_t)RTE_FDIR_MODE_PERFECT; + enic->fdir.types_mask = 1 << RTE_ETH_FLOW_NONFRAG_IPV4_UDP | + 1 << RTE_ETH_FLOW_NONFRAG_IPV4_TCP; + if (enic->adv_filters) { + enic->fdir.types_mask |= 1 << RTE_ETH_FLOW_NONFRAG_IPV4_OTHER | + 1 << RTE_ETH_FLOW_NONFRAG_IPV4_SCTP | + 1 << RTE_ETH_FLOW_NONFRAG_IPV6_UDP | + 1 << RTE_ETH_FLOW_NONFRAG_IPV6_TCP | + 1 << RTE_ETH_FLOW_NONFRAG_IPV6_SCTP | + 1 << RTE_ETH_FLOW_NONFRAG_IPV6_OTHER; + enic->fdir.copy_fltr_fn = copy_fltr_v2; + } else { + enic->fdir.copy_fltr_fn = copy_fltr_v1; + } +} + +static void +enic_set_layer(struct filter_generic_1 *gp, unsigned int flag, + enum filter_generic_1_layer layer, void *mask, void *val, + unsigned int len) +{ + gp->mask_flags |= flag; + gp->val_flags |= gp->mask_flags; + memcpy(gp->layer[layer].mask, mask, len); + memcpy(gp->layer[layer].val, val, len); +} + +/* Copy Flow Director filter to a VIC ipv4 filter (for Cisco VICs + * without advanced filter support. + */ +static void +copy_fltr_v1(struct filter_v2 *fltr, const struct rte_eth_fdir_input *input, + __rte_unused const struct rte_eth_fdir_masks *masks) +{ + fltr->type = FILTER_IPV4_5TUPLE; + fltr->u.ipv4.src_addr = rte_be_to_cpu_32( + input->flow.ip4_flow.src_ip); + fltr->u.ipv4.dst_addr = rte_be_to_cpu_32( + input->flow.ip4_flow.dst_ip); + fltr->u.ipv4.src_port = rte_be_to_cpu_16( + input->flow.udp4_flow.src_port); + fltr->u.ipv4.dst_port = rte_be_to_cpu_16( + input->flow.udp4_flow.dst_port); + + if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_TCP) + fltr->u.ipv4.protocol = PROTO_TCP; + else + fltr->u.ipv4.protocol = PROTO_UDP; + + fltr->u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE; +} + +/* Copy Flow Director filter to a VIC generic filter (requires advanced + * filter support. + */ +static void +copy_fltr_v2(struct filter_v2 *fltr, const struct rte_eth_fdir_input *input, + const struct rte_eth_fdir_masks *masks) +{ + struct filter_generic_1 *gp = &fltr->u.generic_1; + + fltr->type = FILTER_DPDK_1; + memset(gp, 0, sizeof(*gp)); + + if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_UDP) { + struct rte_udp_hdr udp_mask, udp_val; + memset(&udp_mask, 0, sizeof(udp_mask)); + memset(&udp_val, 0, sizeof(udp_val)); + + if (input->flow.udp4_flow.src_port) { + udp_mask.src_port = masks->src_port_mask; + udp_val.src_port = input->flow.udp4_flow.src_port; + } + if (input->flow.udp4_flow.dst_port) { + udp_mask.dst_port = masks->dst_port_mask; + udp_val.dst_port = input->flow.udp4_flow.dst_port; + } + + enic_set_layer(gp, FILTER_GENERIC_1_UDP, FILTER_GENERIC_1_L4, + &udp_mask, &udp_val, sizeof(struct rte_udp_hdr)); + } else if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_TCP) { + struct rte_tcp_hdr tcp_mask, tcp_val; + memset(&tcp_mask, 0, sizeof(tcp_mask)); + memset(&tcp_val, 0, sizeof(tcp_val)); + + if (input->flow.tcp4_flow.src_port) { + tcp_mask.src_port = masks->src_port_mask; + tcp_val.src_port = input->flow.tcp4_flow.src_port; + } + if (input->flow.tcp4_flow.dst_port) { + tcp_mask.dst_port = masks->dst_port_mask; + tcp_val.dst_port = input->flow.tcp4_flow.dst_port; + } + + enic_set_layer(gp, FILTER_GENERIC_1_TCP, FILTER_GENERIC_1_L4, + &tcp_mask, &tcp_val, sizeof(struct rte_tcp_hdr)); + } else if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_SCTP) { + struct rte_sctp_hdr sctp_mask, sctp_val; + memset(&sctp_mask, 0, sizeof(sctp_mask)); + memset(&sctp_val, 0, sizeof(sctp_val)); + + if (input->flow.sctp4_flow.src_port) { + sctp_mask.src_port = masks->src_port_mask; + sctp_val.src_port = input->flow.sctp4_flow.src_port; + } + if (input->flow.sctp4_flow.dst_port) { + sctp_mask.dst_port = masks->dst_port_mask; + sctp_val.dst_port = input->flow.sctp4_flow.dst_port; + } + if (input->flow.sctp4_flow.verify_tag) { + sctp_mask.tag = 0xffffffff; + sctp_val.tag = input->flow.sctp4_flow.verify_tag; + } + + /* + * Unlike UDP/TCP (FILTER_GENERIC_1_{UDP,TCP}), the firmware + * has no "packet is SCTP" flag. Use flag=0 (generic L4) and + * manually set proto_id=sctp below. + */ + enic_set_layer(gp, 0, FILTER_GENERIC_1_L4, &sctp_mask, + &sctp_val, sizeof(struct rte_sctp_hdr)); + } + + if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_UDP || + input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_TCP || + input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_SCTP || + input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_OTHER) { + struct rte_ipv4_hdr ip4_mask, ip4_val; + memset(&ip4_mask, 0, sizeof(struct rte_ipv4_hdr)); + memset(&ip4_val, 0, sizeof(struct rte_ipv4_hdr)); + + if (input->flow.ip4_flow.tos) { + ip4_mask.type_of_service = masks->ipv4_mask.tos; + ip4_val.type_of_service = input->flow.ip4_flow.tos; + } + if (input->flow.ip4_flow.ttl) { + ip4_mask.time_to_live = masks->ipv4_mask.ttl; + ip4_val.time_to_live = input->flow.ip4_flow.ttl; + } + if (input->flow.ip4_flow.proto) { + ip4_mask.next_proto_id = masks->ipv4_mask.proto; + ip4_val.next_proto_id = input->flow.ip4_flow.proto; + } else if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_SCTP) { + /* Explicitly match the SCTP protocol number */ + ip4_mask.next_proto_id = 0xff; + ip4_val.next_proto_id = IPPROTO_SCTP; + } + if (input->flow.ip4_flow.src_ip) { + ip4_mask.src_addr = masks->ipv4_mask.src_ip; + ip4_val.src_addr = input->flow.ip4_flow.src_ip; + } + if (input->flow.ip4_flow.dst_ip) { + ip4_mask.dst_addr = masks->ipv4_mask.dst_ip; + ip4_val.dst_addr = input->flow.ip4_flow.dst_ip; + } + + enic_set_layer(gp, FILTER_GENERIC_1_IPV4, FILTER_GENERIC_1_L3, + &ip4_mask, &ip4_val, sizeof(struct rte_ipv4_hdr)); + } + + if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV6_UDP) { + struct rte_udp_hdr udp_mask, udp_val; + memset(&udp_mask, 0, sizeof(udp_mask)); + memset(&udp_val, 0, sizeof(udp_val)); + + if (input->flow.udp6_flow.src_port) { + udp_mask.src_port = masks->src_port_mask; + udp_val.src_port = input->flow.udp6_flow.src_port; + } + if (input->flow.udp6_flow.dst_port) { + udp_mask.dst_port = masks->dst_port_mask; + udp_val.dst_port = input->flow.udp6_flow.dst_port; + } + enic_set_layer(gp, FILTER_GENERIC_1_UDP, FILTER_GENERIC_1_L4, + &udp_mask, &udp_val, sizeof(struct rte_udp_hdr)); + } else if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV6_TCP) { + struct rte_tcp_hdr tcp_mask, tcp_val; + memset(&tcp_mask, 0, sizeof(tcp_mask)); + memset(&tcp_val, 0, sizeof(tcp_val)); + + if (input->flow.tcp6_flow.src_port) { + tcp_mask.src_port = masks->src_port_mask; + tcp_val.src_port = input->flow.tcp6_flow.src_port; + } + if (input->flow.tcp6_flow.dst_port) { + tcp_mask.dst_port = masks->dst_port_mask; + tcp_val.dst_port = input->flow.tcp6_flow.dst_port; + } + enic_set_layer(gp, FILTER_GENERIC_1_TCP, FILTER_GENERIC_1_L4, + &tcp_mask, &tcp_val, sizeof(struct rte_tcp_hdr)); + } else if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV6_SCTP) { + struct rte_sctp_hdr sctp_mask, sctp_val; + memset(&sctp_mask, 0, sizeof(sctp_mask)); + memset(&sctp_val, 0, sizeof(sctp_val)); + + if (input->flow.sctp6_flow.src_port) { + sctp_mask.src_port = masks->src_port_mask; + sctp_val.src_port = input->flow.sctp6_flow.src_port; + } + if (input->flow.sctp6_flow.dst_port) { + sctp_mask.dst_port = masks->dst_port_mask; + sctp_val.dst_port = input->flow.sctp6_flow.dst_port; + } + if (input->flow.sctp6_flow.verify_tag) { + sctp_mask.tag = 0xffffffff; + sctp_val.tag = input->flow.sctp6_flow.verify_tag; + } + + enic_set_layer(gp, 0, FILTER_GENERIC_1_L4, &sctp_mask, + &sctp_val, sizeof(struct rte_sctp_hdr)); + } + + if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV6_UDP || + input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV6_TCP || + input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV6_SCTP || + input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV6_OTHER) { + struct rte_ipv6_hdr ipv6_mask, ipv6_val; + memset(&ipv6_mask, 0, sizeof(struct rte_ipv6_hdr)); + memset(&ipv6_val, 0, sizeof(struct rte_ipv6_hdr)); + + if (input->flow.ipv6_flow.proto) { + ipv6_mask.proto = masks->ipv6_mask.proto; + ipv6_val.proto = input->flow.ipv6_flow.proto; + } else if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV6_SCTP) { + /* See comments for IPv4 SCTP above. */ + ipv6_mask.proto = 0xff; + ipv6_val.proto = IPPROTO_SCTP; + } + memcpy(ipv6_mask.src_addr, masks->ipv6_mask.src_ip, + sizeof(ipv6_mask.src_addr)); + memcpy(ipv6_val.src_addr, input->flow.ipv6_flow.src_ip, + sizeof(ipv6_val.src_addr)); + memcpy(ipv6_mask.dst_addr, masks->ipv6_mask.dst_ip, + sizeof(ipv6_mask.dst_addr)); + memcpy(ipv6_val.dst_addr, input->flow.ipv6_flow.dst_ip, + sizeof(ipv6_val.dst_addr)); + if (input->flow.ipv6_flow.tc) { + ipv6_mask.vtc_flow = masks->ipv6_mask.tc << 12; + ipv6_val.vtc_flow = input->flow.ipv6_flow.tc << 12; + } + if (input->flow.ipv6_flow.hop_limits) { + ipv6_mask.hop_limits = masks->ipv6_mask.hop_limits; + ipv6_val.hop_limits = input->flow.ipv6_flow.hop_limits; + } + + enic_set_layer(gp, FILTER_GENERIC_1_IPV6, FILTER_GENERIC_1_L3, + &ipv6_mask, &ipv6_val, sizeof(struct rte_ipv6_hdr)); + } +} + +int enic_fdir_del_fltr(struct enic *enic, struct rte_eth_fdir_filter *params) +{ + int32_t pos; + struct enic_fdir_node *key; + /* See if the key is in the table */ + pos = rte_hash_del_key(enic->fdir.hash, params); + switch (pos) { + case -EINVAL: + case -ENOENT: + enic->fdir.stats.f_remove++; + return -EINVAL; + default: + /* The entry is present in the table */ + key = enic->fdir.nodes[pos]; + + /* Delete the filter */ + vnic_dev_classifier(enic->vdev, CLSF_DEL, + &key->fltr_id, NULL, NULL); + rte_free(key); + enic->fdir.nodes[pos] = NULL; + enic->fdir.stats.free++; + enic->fdir.stats.remove++; + break; + } + return 0; +} + +int enic_fdir_add_fltr(struct enic *enic, struct rte_eth_fdir_filter *params) +{ + struct enic_fdir_node *key; + struct filter_v2 fltr; + int32_t pos; + uint8_t do_free = 0; + uint16_t old_fltr_id = 0; + uint32_t flowtype_supported; + uint16_t flex_bytes; + uint16_t queue; + struct filter_action_v2 action; + + memset(&fltr, 0, sizeof(fltr)); + memset(&action, 0, sizeof(action)); + flowtype_supported = enic->fdir.types_mask + & (1 << params->input.flow_type); + + flex_bytes = ((params->input.flow_ext.flexbytes[1] << 8 & 0xFF00) | + (params->input.flow_ext.flexbytes[0] & 0xFF)); + + if (!enic->fdir.hash || + (params->input.flow_ext.vlan_tci & 0xFFF) || + !flowtype_supported || flex_bytes || + params->action.behavior /* drop */) { + enic->fdir.stats.f_add++; + return -ENOTSUP; + } + + /* Get the enicpmd RQ from the DPDK Rx queue */ + queue = enic_rte_rq_idx_to_sop_idx(params->action.rx_queue); + + if (!enic->rq[queue].in_use) + return -EINVAL; + + /* See if the key is already there in the table */ + pos = rte_hash_del_key(enic->fdir.hash, params); + switch (pos) { + case -EINVAL: + enic->fdir.stats.f_add++; + return -EINVAL; + case -ENOENT: + /* Add a new classifier entry */ + if (!enic->fdir.stats.free) { + enic->fdir.stats.f_add++; + return -ENOSPC; + } + key = rte_zmalloc("enic_fdir_node", + sizeof(struct enic_fdir_node), 0); + if (!key) { + enic->fdir.stats.f_add++; + return -ENOMEM; + } + break; + default: + /* The entry is already present in the table. + * Check if there is a change in queue + */ + key = enic->fdir.nodes[pos]; + enic->fdir.nodes[pos] = NULL; + if (unlikely(key->rq_index == queue)) { + /* Nothing to be done */ + enic->fdir.stats.f_add++; + pos = rte_hash_add_key(enic->fdir.hash, params); + if (pos < 0) { + dev_err(enic, "Add hash key failed\n"); + return pos; + } + enic->fdir.nodes[pos] = key; + dev_warning(enic, + "FDIR rule is already present\n"); + return 0; + } + + if (likely(enic->fdir.stats.free)) { + /* Add the filter and then delete the old one. + * This is to avoid packets from going into the + * default queue during the window between + * delete and add + */ + do_free = 1; + old_fltr_id = key->fltr_id; + } else { + /* No free slots in the classifier. + * Delete the filter and add the modified one later + */ + vnic_dev_classifier(enic->vdev, CLSF_DEL, + &key->fltr_id, NULL, NULL); + enic->fdir.stats.free++; + } + + break; + } + + key->filter = *params; + key->rq_index = queue; + + enic->fdir.copy_fltr_fn(&fltr, ¶ms->input, + &enic->rte_dev->data->dev_conf.fdir_conf.mask); + action.type = FILTER_ACTION_RQ_STEERING; + action.rq_idx = queue; + + if (!vnic_dev_classifier(enic->vdev, CLSF_ADD, &queue, &fltr, + &action)) { + key->fltr_id = queue; + } else { + dev_err(enic, "Add classifier entry failed\n"); + enic->fdir.stats.f_add++; + rte_free(key); + return -1; + } + + if (do_free) + vnic_dev_classifier(enic->vdev, CLSF_DEL, &old_fltr_id, NULL, + NULL); + else{ + enic->fdir.stats.free--; + enic->fdir.stats.add++; + } + + pos = rte_hash_add_key(enic->fdir.hash, params); + if (pos < 0) { + enic->fdir.stats.f_add++; + dev_err(enic, "Add hash key failed\n"); + return pos; + } + + enic->fdir.nodes[pos] = key; + return 0; +} + +void enic_clsf_destroy(struct enic *enic) +{ + uint32_t index; + struct enic_fdir_node *key; + /* delete classifier entries */ + for (index = 0; index < ENICPMD_FDIR_MAX; index++) { + key = enic->fdir.nodes[index]; + if (key) { + vnic_dev_classifier(enic->vdev, CLSF_DEL, + &key->fltr_id, NULL, NULL); + rte_free(key); + enic->fdir.nodes[index] = NULL; + } + } + + if (enic->fdir.hash) { + rte_hash_free(enic->fdir.hash); + enic->fdir.hash = NULL; + } +} + +int enic_clsf_init(struct enic *enic) +{ + char clsf_name[RTE_HASH_NAMESIZE]; + struct rte_hash_parameters hash_params = { + .name = clsf_name, + .entries = ENICPMD_CLSF_HASH_ENTRIES, + .key_len = sizeof(struct rte_eth_fdir_filter), + .hash_func = DEFAULT_HASH_FUNC, + .hash_func_init_val = 0, + .socket_id = SOCKET_ID_ANY, + }; + snprintf(clsf_name, RTE_HASH_NAMESIZE, "enic_clsf_%s", enic->bdf_name); + enic->fdir.hash = rte_hash_create(&hash_params); + memset(&enic->fdir.stats, 0, sizeof(enic->fdir.stats)); + enic->fdir.stats.free = ENICPMD_FDIR_MAX; + return NULL == enic->fdir.hash; +} diff --git a/src/spdk/dpdk/drivers/net/enic/enic_compat.h b/src/spdk/dpdk/drivers/net/enic/enic_compat.h new file mode 100644 index 000000000..774127303 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/enic_compat.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _ENIC_COMPAT_H_ +#define _ENIC_COMPAT_H_ + +#include <stdio.h> +#include <unistd.h> + +#include <rte_atomic.h> +#include <rte_malloc.h> +#include <rte_log.h> +#include <rte_io.h> + +#define ETH_ALEN 6 + +#define __iomem + +#define pr_err(y, args...) dev_err(0, y, ##args) +#define pr_warn(y, args...) dev_warning(0, y, ##args) +#define BUG() pr_err("BUG at %s:%d", __func__, __LINE__) + +#define VNIC_ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) +#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) + +extern int enic_pmd_logtype; + +#define dev_printk(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, enic_pmd_logtype, \ + "PMD: rte_enic_pmd: " fmt, ##args) + +#define dev_err(x, args...) dev_printk(ERR, args) +#define dev_info(x, args...) dev_printk(INFO, args) +#define dev_warning(x, args...) dev_printk(WARNING, args) +#define dev_debug(x, args...) dev_printk(DEBUG, args) + +#define ENICPMD_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, enic_pmd_logtype, \ + "%s " fmt "\n", __func__, ##args) +#define ENICPMD_FUNC_TRACE() ENICPMD_LOG(DEBUG, ">>") + +typedef unsigned long long dma_addr_t; + +static inline uint32_t ioread32(volatile void *addr) +{ + return rte_read32(addr); +} + +static inline uint8_t ioread8(volatile void *addr) +{ + return rte_read8(addr); +} + +static inline void iowrite32(uint32_t val, volatile void *addr) +{ + rte_write32(val, addr); +} + +static inline void iowrite32_relaxed(uint32_t val, volatile void *addr) +{ + rte_write32_relaxed(val, addr); +} + +static inline unsigned int readl(volatile void __iomem *addr) +{ + return rte_read32(addr); +} + +static inline void writel(unsigned int val, volatile void __iomem *addr) +{ + rte_write32(val, addr); +} + +#endif /* _ENIC_COMPAT_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/enic_ethdev.c b/src/spdk/dpdk/drivers/net/enic/enic_ethdev.c new file mode 100644 index 000000000..32d5397f8 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/enic_ethdev.c @@ -0,0 +1,1316 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#include <stdio.h> +#include <stdint.h> + +#include <rte_dev.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_ethdev_driver.h> +#include <rte_ethdev_pci.h> +#include <rte_kvargs.h> +#include <rte_string_fns.h> + +#include "vnic_intr.h" +#include "vnic_cq.h" +#include "vnic_wq.h" +#include "vnic_rq.h" +#include "vnic_enet.h" +#include "enic.h" + +int enic_pmd_logtype; + +/* + * The set of PCI devices this driver supports + */ +#define CISCO_PCI_VENDOR_ID 0x1137 +static const struct rte_pci_id pci_id_enic_map[] = { + {RTE_PCI_DEVICE(CISCO_PCI_VENDOR_ID, PCI_DEVICE_ID_CISCO_VIC_ENET)}, + {RTE_PCI_DEVICE(CISCO_PCI_VENDOR_ID, PCI_DEVICE_ID_CISCO_VIC_ENET_VF)}, + {RTE_PCI_DEVICE(CISCO_PCI_VENDOR_ID, PCI_DEVICE_ID_CISCO_VIC_ENET_SN)}, + {.vendor_id = 0, /* sentinel */}, +}; + +/* Supported link speeds of production VIC models */ +static const struct vic_speed_capa { + uint16_t sub_devid; + uint32_t capa; +} vic_speed_capa_map[] = { + { 0x0043, ETH_LINK_SPEED_10G }, /* VIC */ + { 0x0047, ETH_LINK_SPEED_10G }, /* P81E PCIe */ + { 0x0048, ETH_LINK_SPEED_10G }, /* M81KR Mezz */ + { 0x004f, ETH_LINK_SPEED_10G }, /* 1280 Mezz */ + { 0x0084, ETH_LINK_SPEED_10G }, /* 1240 MLOM */ + { 0x0085, ETH_LINK_SPEED_10G }, /* 1225 PCIe */ + { 0x00cd, ETH_LINK_SPEED_10G | ETH_LINK_SPEED_40G }, /* 1285 PCIe */ + { 0x00ce, ETH_LINK_SPEED_10G }, /* 1225T PCIe */ + { 0x012a, ETH_LINK_SPEED_40G }, /* M4308 */ + { 0x012c, ETH_LINK_SPEED_10G | ETH_LINK_SPEED_40G }, /* 1340 MLOM */ + { 0x012e, ETH_LINK_SPEED_10G }, /* 1227 PCIe */ + { 0x0137, ETH_LINK_SPEED_10G | ETH_LINK_SPEED_40G }, /* 1380 Mezz */ + { 0x014d, ETH_LINK_SPEED_10G | ETH_LINK_SPEED_40G }, /* 1385 PCIe */ + { 0x015d, ETH_LINK_SPEED_10G | ETH_LINK_SPEED_40G }, /* 1387 MLOM */ + { 0x0215, ETH_LINK_SPEED_10G | ETH_LINK_SPEED_25G | + ETH_LINK_SPEED_40G }, /* 1440 Mezz */ + { 0x0216, ETH_LINK_SPEED_10G | ETH_LINK_SPEED_25G | + ETH_LINK_SPEED_40G }, /* 1480 MLOM */ + { 0x0217, ETH_LINK_SPEED_10G | ETH_LINK_SPEED_25G }, /* 1455 PCIe */ + { 0x0218, ETH_LINK_SPEED_10G | ETH_LINK_SPEED_25G }, /* 1457 MLOM */ + { 0x0219, ETH_LINK_SPEED_40G }, /* 1485 PCIe */ + { 0x021a, ETH_LINK_SPEED_40G }, /* 1487 MLOM */ + { 0x024a, ETH_LINK_SPEED_40G | ETH_LINK_SPEED_100G }, /* 1495 PCIe */ + { 0x024b, ETH_LINK_SPEED_40G | ETH_LINK_SPEED_100G }, /* 1497 MLOM */ + { 0, 0 }, /* End marker */ +}; + +#define ENIC_DEVARG_DISABLE_OVERLAY "disable-overlay" +#define ENIC_DEVARG_ENABLE_AVX2_RX "enable-avx2-rx" +#define ENIC_DEVARG_GENEVE_OPT "geneve-opt" +#define ENIC_DEVARG_IG_VLAN_REWRITE "ig-vlan-rewrite" + +RTE_INIT(enicpmd_init_log) +{ + enic_pmd_logtype = rte_log_register("pmd.net.enic"); + if (enic_pmd_logtype >= 0) + rte_log_set_level(enic_pmd_logtype, RTE_LOG_INFO); +} + +static int +enicpmd_fdir_ctrl_func(struct rte_eth_dev *eth_dev, + enum rte_filter_op filter_op, void *arg) +{ + struct enic *enic = pmd_priv(eth_dev); + int ret = 0; + + ENICPMD_FUNC_TRACE(); + if (filter_op == RTE_ETH_FILTER_NOP) + return 0; + + if (arg == NULL && filter_op != RTE_ETH_FILTER_FLUSH) + return -EINVAL; + + switch (filter_op) { + case RTE_ETH_FILTER_ADD: + case RTE_ETH_FILTER_UPDATE: + ret = enic_fdir_add_fltr(enic, + (struct rte_eth_fdir_filter *)arg); + break; + + case RTE_ETH_FILTER_DELETE: + ret = enic_fdir_del_fltr(enic, + (struct rte_eth_fdir_filter *)arg); + break; + + case RTE_ETH_FILTER_STATS: + enic_fdir_stats_get(enic, (struct rte_eth_fdir_stats *)arg); + break; + + case RTE_ETH_FILTER_FLUSH: + dev_warning(enic, "unsupported operation %u", filter_op); + ret = -ENOTSUP; + break; + case RTE_ETH_FILTER_INFO: + enic_fdir_info_get(enic, (struct rte_eth_fdir_info *)arg); + break; + default: + dev_err(enic, "unknown operation %u", filter_op); + ret = -EINVAL; + break; + } + return ret; +} + +static int +enicpmd_dev_filter_ctrl(struct rte_eth_dev *dev, + enum rte_filter_type filter_type, + enum rte_filter_op filter_op, + void *arg) +{ + struct enic *enic = pmd_priv(dev); + int ret = 0; + + ENICPMD_FUNC_TRACE(); + + /* + * Currently, when Geneve with options offload is enabled, host + * cannot insert match-action rules. + */ + if (enic->geneve_opt_enabled) + return -ENOTSUP; + switch (filter_type) { + case RTE_ETH_FILTER_GENERIC: + if (filter_op != RTE_ETH_FILTER_GET) + return -EINVAL; + if (enic->flow_filter_mode == FILTER_FLOWMAN) + *(const void **)arg = &enic_fm_flow_ops; + else + *(const void **)arg = &enic_flow_ops; + break; + case RTE_ETH_FILTER_FDIR: + ret = enicpmd_fdir_ctrl_func(dev, filter_op, arg); + break; + default: + dev_warning(enic, "Filter type (%d) not supported", + filter_type); + ret = -EINVAL; + break; + } + + return ret; +} + +static void enicpmd_dev_tx_queue_release(void *txq) +{ + ENICPMD_FUNC_TRACE(); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return; + + enic_free_wq(txq); +} + +static int enicpmd_dev_setup_intr(struct enic *enic) +{ + int ret; + unsigned int index; + + ENICPMD_FUNC_TRACE(); + + /* Are we done with the init of all the queues? */ + for (index = 0; index < enic->cq_count; index++) { + if (!enic->cq[index].ctrl) + break; + } + if (enic->cq_count != index) + return 0; + for (index = 0; index < enic->wq_count; index++) { + if (!enic->wq[index].ctrl) + break; + } + if (enic->wq_count != index) + return 0; + /* check start of packet (SOP) RQs only in case scatter is disabled. */ + for (index = 0; index < enic->rq_count; index++) { + if (!enic->rq[enic_rte_rq_idx_to_sop_idx(index)].ctrl) + break; + } + if (enic->rq_count != index) + return 0; + + ret = enic_alloc_intr_resources(enic); + if (ret) { + dev_err(enic, "alloc intr failed\n"); + return ret; + } + enic_init_vnic_resources(enic); + + ret = enic_setup_finish(enic); + if (ret) + dev_err(enic, "setup could not be finished\n"); + + return ret; +} + +static int enicpmd_dev_tx_queue_setup(struct rte_eth_dev *eth_dev, + uint16_t queue_idx, + uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) +{ + int ret; + struct enic *enic = pmd_priv(eth_dev); + struct vnic_wq *wq; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return -E_RTE_SECONDARY; + + ENICPMD_FUNC_TRACE(); + RTE_ASSERT(queue_idx < enic->conf_wq_count); + wq = &enic->wq[queue_idx]; + wq->offloads = tx_conf->offloads | + eth_dev->data->dev_conf.txmode.offloads; + eth_dev->data->tx_queues[queue_idx] = (void *)wq; + + ret = enic_alloc_wq(enic, queue_idx, socket_id, nb_desc); + if (ret) { + dev_err(enic, "error in allocating wq\n"); + return ret; + } + + return enicpmd_dev_setup_intr(enic); +} + +static int enicpmd_dev_tx_queue_start(struct rte_eth_dev *eth_dev, + uint16_t queue_idx) +{ + struct enic *enic = pmd_priv(eth_dev); + + ENICPMD_FUNC_TRACE(); + + enic_start_wq(enic, queue_idx); + + return 0; +} + +static int enicpmd_dev_tx_queue_stop(struct rte_eth_dev *eth_dev, + uint16_t queue_idx) +{ + int ret; + struct enic *enic = pmd_priv(eth_dev); + + ENICPMD_FUNC_TRACE(); + + ret = enic_stop_wq(enic, queue_idx); + if (ret) + dev_err(enic, "error in stopping wq %d\n", queue_idx); + + return ret; +} + +static int enicpmd_dev_rx_queue_start(struct rte_eth_dev *eth_dev, + uint16_t queue_idx) +{ + struct enic *enic = pmd_priv(eth_dev); + + ENICPMD_FUNC_TRACE(); + + enic_start_rq(enic, queue_idx); + + return 0; +} + +static int enicpmd_dev_rx_queue_stop(struct rte_eth_dev *eth_dev, + uint16_t queue_idx) +{ + int ret; + struct enic *enic = pmd_priv(eth_dev); + + ENICPMD_FUNC_TRACE(); + + ret = enic_stop_rq(enic, queue_idx); + if (ret) + dev_err(enic, "error in stopping rq %d\n", queue_idx); + + return ret; +} + +static void enicpmd_dev_rx_queue_release(void *rxq) +{ + ENICPMD_FUNC_TRACE(); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return; + + enic_free_rq(rxq); +} + +static uint32_t enicpmd_dev_rx_queue_count(struct rte_eth_dev *dev, + uint16_t rx_queue_id) +{ + struct enic *enic = pmd_priv(dev); + uint32_t queue_count = 0; + struct vnic_cq *cq; + uint32_t cq_tail; + uint16_t cq_idx; + int rq_num; + + rq_num = enic_rte_rq_idx_to_sop_idx(rx_queue_id); + cq = &enic->cq[enic_cq_rq(enic, rq_num)]; + cq_idx = cq->to_clean; + + cq_tail = ioread32(&cq->ctrl->cq_tail); + + if (cq_tail < cq_idx) + cq_tail += cq->ring.desc_count; + + queue_count = cq_tail - cq_idx; + + return queue_count; +} + +static int enicpmd_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, + uint16_t queue_idx, + uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp) +{ + int ret; + struct enic *enic = pmd_priv(eth_dev); + + ENICPMD_FUNC_TRACE(); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return -E_RTE_SECONDARY; + RTE_ASSERT(enic_rte_rq_idx_to_sop_idx(queue_idx) < enic->conf_rq_count); + eth_dev->data->rx_queues[queue_idx] = + (void *)&enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)]; + + ret = enic_alloc_rq(enic, queue_idx, socket_id, mp, nb_desc, + rx_conf->rx_free_thresh); + if (ret) { + dev_err(enic, "error in allocating rq\n"); + return ret; + } + + return enicpmd_dev_setup_intr(enic); +} + +static int enicpmd_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask) +{ + struct enic *enic = pmd_priv(eth_dev); + uint64_t offloads; + + ENICPMD_FUNC_TRACE(); + + offloads = eth_dev->data->dev_conf.rxmode.offloads; + if (mask & ETH_VLAN_STRIP_MASK) { + if (offloads & DEV_RX_OFFLOAD_VLAN_STRIP) + enic->ig_vlan_strip_en = 1; + else + enic->ig_vlan_strip_en = 0; + } + + if ((mask & ETH_VLAN_FILTER_MASK) && + (offloads & DEV_RX_OFFLOAD_VLAN_FILTER)) { + dev_warning(enic, + "Configuration of VLAN filter is not supported\n"); + } + + if ((mask & ETH_VLAN_EXTEND_MASK) && + (offloads & DEV_RX_OFFLOAD_VLAN_EXTEND)) { + dev_warning(enic, + "Configuration of extended VLAN is not supported\n"); + } + + return enic_set_vlan_strip(enic); +} + +static int enicpmd_dev_configure(struct rte_eth_dev *eth_dev) +{ + int ret; + int mask; + struct enic *enic = pmd_priv(eth_dev); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return -E_RTE_SECONDARY; + + ENICPMD_FUNC_TRACE(); + ret = enic_set_vnic_res(enic); + if (ret) { + dev_err(enic, "Set vNIC resource num failed, aborting\n"); + return ret; + } + + if (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) + eth_dev->data->dev_conf.rxmode.offloads |= + DEV_RX_OFFLOAD_RSS_HASH; + + enic->mc_count = 0; + enic->hw_ip_checksum = !!(eth_dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_CHECKSUM); + /* All vlan offload masks to apply the current settings */ + mask = ETH_VLAN_STRIP_MASK | + ETH_VLAN_FILTER_MASK | + ETH_VLAN_EXTEND_MASK; + ret = enicpmd_vlan_offload_set(eth_dev, mask); + if (ret) { + dev_err(enic, "Failed to configure VLAN offloads\n"); + return ret; + } + /* + * Initialize RSS with the default reta and key. If the user key is + * given (rx_adv_conf.rss_conf.rss_key), will use that instead of the + * default key. + */ + return enic_init_rss_nic_cfg(enic); +} + +/* Start the device. + * It returns 0 on success. + */ +static int enicpmd_dev_start(struct rte_eth_dev *eth_dev) +{ + struct enic *enic = pmd_priv(eth_dev); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return -E_RTE_SECONDARY; + + ENICPMD_FUNC_TRACE(); + return enic_enable(enic); +} + +/* + * Stop device: disable rx and tx functions to allow for reconfiguring. + */ +static void enicpmd_dev_stop(struct rte_eth_dev *eth_dev) +{ + struct rte_eth_link link; + struct enic *enic = pmd_priv(eth_dev); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return; + + ENICPMD_FUNC_TRACE(); + enic_disable(enic); + + memset(&link, 0, sizeof(link)); + rte_eth_linkstatus_set(eth_dev, &link); +} + +/* + * Stop device. + */ +static void enicpmd_dev_close(struct rte_eth_dev *eth_dev) +{ + struct enic *enic = pmd_priv(eth_dev); + + ENICPMD_FUNC_TRACE(); + enic_remove(enic); +} + +static int enicpmd_dev_link_update(struct rte_eth_dev *eth_dev, + __rte_unused int wait_to_complete) +{ + ENICPMD_FUNC_TRACE(); + return enic_link_update(eth_dev); +} + +static int enicpmd_dev_stats_get(struct rte_eth_dev *eth_dev, + struct rte_eth_stats *stats) +{ + struct enic *enic = pmd_priv(eth_dev); + + ENICPMD_FUNC_TRACE(); + return enic_dev_stats_get(enic, stats); +} + +static int enicpmd_dev_stats_reset(struct rte_eth_dev *eth_dev) +{ + struct enic *enic = pmd_priv(eth_dev); + + ENICPMD_FUNC_TRACE(); + return enic_dev_stats_clear(enic); +} + +static uint32_t speed_capa_from_pci_id(struct rte_eth_dev *eth_dev) +{ + const struct vic_speed_capa *m; + struct rte_pci_device *pdev; + uint16_t id; + + pdev = RTE_ETH_DEV_TO_PCI(eth_dev); + id = pdev->id.subsystem_device_id; + for (m = vic_speed_capa_map; m->sub_devid != 0; m++) { + if (m->sub_devid == id) + return m->capa; + } + /* 1300 and later models are at least 40G */ + if (id >= 0x0100) + return ETH_LINK_SPEED_40G; + /* VFs have subsystem id 0, check device id */ + if (id == 0) { + /* Newer VF implies at least 40G model */ + if (pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_SN) + return ETH_LINK_SPEED_40G; + } + return ETH_LINK_SPEED_10G; +} + +static int enicpmd_dev_info_get(struct rte_eth_dev *eth_dev, + struct rte_eth_dev_info *device_info) +{ + struct enic *enic = pmd_priv(eth_dev); + + ENICPMD_FUNC_TRACE(); + /* Scattered Rx uses two receive queues per rx queue exposed to dpdk */ + device_info->max_rx_queues = enic->conf_rq_count / 2; + device_info->max_tx_queues = enic->conf_wq_count; + device_info->min_rx_bufsize = ENIC_MIN_MTU; + /* "Max" mtu is not a typo. HW receives packet sizes up to the + * max mtu regardless of the current mtu (vNIC's mtu). vNIC mtu is + * a hint to the driver to size receive buffers accordingly so that + * larger-than-vnic-mtu packets get truncated.. For DPDK, we let + * the user decide the buffer size via rxmode.max_rx_pkt_len, basically + * ignoring vNIC mtu. + */ + device_info->max_rx_pktlen = enic_mtu_to_max_rx_pktlen(enic->max_mtu); + device_info->max_mac_addrs = ENIC_UNICAST_PERFECT_FILTERS; + device_info->min_mtu = ENIC_MIN_MTU; + device_info->max_mtu = enic->max_mtu; + device_info->rx_offload_capa = enic->rx_offload_capa; + device_info->tx_offload_capa = enic->tx_offload_capa; + device_info->tx_queue_offload_capa = enic->tx_queue_offload_capa; + device_info->default_rxconf = (struct rte_eth_rxconf) { + .rx_free_thresh = ENIC_DEFAULT_RX_FREE_THRESH + }; + device_info->reta_size = enic->reta_size; + device_info->hash_key_size = enic->hash_key_size; + device_info->flow_type_rss_offloads = enic->flow_type_rss_offloads; + device_info->rx_desc_lim = (struct rte_eth_desc_lim) { + .nb_max = enic->config.rq_desc_count, + .nb_min = ENIC_MIN_RQ_DESCS, + .nb_align = ENIC_ALIGN_DESCS, + }; + device_info->tx_desc_lim = (struct rte_eth_desc_lim) { + .nb_max = enic->config.wq_desc_count, + .nb_min = ENIC_MIN_WQ_DESCS, + .nb_align = ENIC_ALIGN_DESCS, + .nb_seg_max = ENIC_TX_XMIT_MAX, + .nb_mtu_seg_max = ENIC_NON_TSO_MAX_DESC, + }; + device_info->default_rxportconf = (struct rte_eth_dev_portconf) { + .burst_size = ENIC_DEFAULT_RX_BURST, + .ring_size = RTE_MIN(device_info->rx_desc_lim.nb_max, + ENIC_DEFAULT_RX_RING_SIZE), + .nb_queues = ENIC_DEFAULT_RX_RINGS, + }; + device_info->default_txportconf = (struct rte_eth_dev_portconf) { + .burst_size = ENIC_DEFAULT_TX_BURST, + .ring_size = RTE_MIN(device_info->tx_desc_lim.nb_max, + ENIC_DEFAULT_TX_RING_SIZE), + .nb_queues = ENIC_DEFAULT_TX_RINGS, + }; + device_info->speed_capa = speed_capa_from_pci_id(eth_dev); + + return 0; +} + +static const uint32_t *enicpmd_dev_supported_ptypes_get(struct rte_eth_dev *dev) +{ + static const uint32_t ptypes[] = { + RTE_PTYPE_L2_ETHER, + RTE_PTYPE_L2_ETHER_VLAN, + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, + RTE_PTYPE_L4_TCP, + RTE_PTYPE_L4_UDP, + RTE_PTYPE_L4_FRAG, + RTE_PTYPE_L4_NONFRAG, + RTE_PTYPE_UNKNOWN + }; + static const uint32_t ptypes_overlay[] = { + RTE_PTYPE_L2_ETHER, + RTE_PTYPE_L2_ETHER_VLAN, + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, + RTE_PTYPE_L4_TCP, + RTE_PTYPE_L4_UDP, + RTE_PTYPE_L4_FRAG, + RTE_PTYPE_L4_NONFRAG, + RTE_PTYPE_TUNNEL_GRENAT, + RTE_PTYPE_INNER_L2_ETHER, + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, + RTE_PTYPE_INNER_L4_TCP, + RTE_PTYPE_INNER_L4_UDP, + RTE_PTYPE_INNER_L4_FRAG, + RTE_PTYPE_INNER_L4_NONFRAG, + RTE_PTYPE_UNKNOWN + }; + + if (dev->rx_pkt_burst != enic_dummy_recv_pkts && + dev->rx_pkt_burst != NULL) { + struct enic *enic = pmd_priv(dev); + if (enic->overlay_offload) + return ptypes_overlay; + else + return ptypes; + } + return NULL; +} + +static int enicpmd_dev_promiscuous_enable(struct rte_eth_dev *eth_dev) +{ + struct enic *enic = pmd_priv(eth_dev); + int ret; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return -E_RTE_SECONDARY; + + ENICPMD_FUNC_TRACE(); + + enic->promisc = 1; + ret = enic_add_packet_filter(enic); + if (ret != 0) + enic->promisc = 0; + + return ret; +} + +static int enicpmd_dev_promiscuous_disable(struct rte_eth_dev *eth_dev) +{ + struct enic *enic = pmd_priv(eth_dev); + int ret; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return -E_RTE_SECONDARY; + + ENICPMD_FUNC_TRACE(); + enic->promisc = 0; + ret = enic_add_packet_filter(enic); + if (ret != 0) + enic->promisc = 1; + + return ret; +} + +static int enicpmd_dev_allmulticast_enable(struct rte_eth_dev *eth_dev) +{ + struct enic *enic = pmd_priv(eth_dev); + int ret; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return -E_RTE_SECONDARY; + + ENICPMD_FUNC_TRACE(); + enic->allmulti = 1; + ret = enic_add_packet_filter(enic); + if (ret != 0) + enic->allmulti = 0; + + return ret; +} + +static int enicpmd_dev_allmulticast_disable(struct rte_eth_dev *eth_dev) +{ + struct enic *enic = pmd_priv(eth_dev); + int ret; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return -E_RTE_SECONDARY; + + ENICPMD_FUNC_TRACE(); + enic->allmulti = 0; + ret = enic_add_packet_filter(enic); + if (ret != 0) + enic->allmulti = 1; + + return ret; +} + +static int enicpmd_add_mac_addr(struct rte_eth_dev *eth_dev, + struct rte_ether_addr *mac_addr, + __rte_unused uint32_t index, __rte_unused uint32_t pool) +{ + struct enic *enic = pmd_priv(eth_dev); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return -E_RTE_SECONDARY; + + ENICPMD_FUNC_TRACE(); + return enic_set_mac_address(enic, mac_addr->addr_bytes); +} + +static void enicpmd_remove_mac_addr(struct rte_eth_dev *eth_dev, uint32_t index) +{ + struct enic *enic = pmd_priv(eth_dev); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return; + + ENICPMD_FUNC_TRACE(); + if (enic_del_mac_address(enic, index)) + dev_err(enic, "del mac addr failed\n"); +} + +static int enicpmd_set_mac_addr(struct rte_eth_dev *eth_dev, + struct rte_ether_addr *addr) +{ + struct enic *enic = pmd_priv(eth_dev); + int ret; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return -E_RTE_SECONDARY; + + ENICPMD_FUNC_TRACE(); + ret = enic_del_mac_address(enic, 0); + if (ret) + return ret; + return enic_set_mac_address(enic, addr->addr_bytes); +} + +static void debug_log_add_del_addr(struct rte_ether_addr *addr, bool add) +{ + char mac_str[RTE_ETHER_ADDR_FMT_SIZE]; + + rte_ether_format_addr(mac_str, RTE_ETHER_ADDR_FMT_SIZE, addr); + ENICPMD_LOG(DEBUG, " %s address %s\n", + add ? "add" : "remove", mac_str); +} + +static int enicpmd_set_mc_addr_list(struct rte_eth_dev *eth_dev, + struct rte_ether_addr *mc_addr_set, + uint32_t nb_mc_addr) +{ + struct enic *enic = pmd_priv(eth_dev); + char mac_str[RTE_ETHER_ADDR_FMT_SIZE]; + struct rte_ether_addr *addr; + uint32_t i, j; + int ret; + + ENICPMD_FUNC_TRACE(); + + /* Validate the given addresses first */ + for (i = 0; i < nb_mc_addr && mc_addr_set != NULL; i++) { + addr = &mc_addr_set[i]; + if (!rte_is_multicast_ether_addr(addr) || + rte_is_broadcast_ether_addr(addr)) { + rte_ether_format_addr(mac_str, + RTE_ETHER_ADDR_FMT_SIZE, addr); + ENICPMD_LOG(ERR, " invalid multicast address %s\n", + mac_str); + return -EINVAL; + } + } + + /* Flush all if requested */ + if (nb_mc_addr == 0 || mc_addr_set == NULL) { + ENICPMD_LOG(DEBUG, " flush multicast addresses\n"); + for (i = 0; i < enic->mc_count; i++) { + addr = &enic->mc_addrs[i]; + debug_log_add_del_addr(addr, false); + ret = vnic_dev_del_addr(enic->vdev, addr->addr_bytes); + if (ret) + return ret; + } + enic->mc_count = 0; + return 0; + } + + if (nb_mc_addr > ENIC_MULTICAST_PERFECT_FILTERS) { + ENICPMD_LOG(ERR, " too many multicast addresses: max=%d\n", + ENIC_MULTICAST_PERFECT_FILTERS); + return -ENOSPC; + } + /* + * devcmd is slow, so apply the difference instead of flushing and + * adding everything. + * 1. Delete addresses on the NIC but not on the host + */ + for (i = 0; i < enic->mc_count; i++) { + addr = &enic->mc_addrs[i]; + for (j = 0; j < nb_mc_addr; j++) { + if (rte_is_same_ether_addr(addr, &mc_addr_set[j])) + break; + } + if (j < nb_mc_addr) + continue; + debug_log_add_del_addr(addr, false); + ret = vnic_dev_del_addr(enic->vdev, addr->addr_bytes); + if (ret) + return ret; + } + /* 2. Add addresses on the host but not on the NIC */ + for (i = 0; i < nb_mc_addr; i++) { + addr = &mc_addr_set[i]; + for (j = 0; j < enic->mc_count; j++) { + if (rte_is_same_ether_addr(addr, &enic->mc_addrs[j])) + break; + } + if (j < enic->mc_count) + continue; + debug_log_add_del_addr(addr, true); + ret = vnic_dev_add_addr(enic->vdev, addr->addr_bytes); + if (ret) + return ret; + } + /* Keep a copy so we can flush/apply later on.. */ + memcpy(enic->mc_addrs, mc_addr_set, + nb_mc_addr * sizeof(struct rte_ether_addr)); + enic->mc_count = nb_mc_addr; + return 0; +} + +static int enicpmd_mtu_set(struct rte_eth_dev *eth_dev, uint16_t mtu) +{ + struct enic *enic = pmd_priv(eth_dev); + + ENICPMD_FUNC_TRACE(); + return enic_set_mtu(enic, mtu); +} + +static int enicpmd_dev_rss_reta_query(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 + *reta_conf, + uint16_t reta_size) +{ + struct enic *enic = pmd_priv(dev); + uint16_t i, idx, shift; + + ENICPMD_FUNC_TRACE(); + if (reta_size != ENIC_RSS_RETA_SIZE) { + dev_err(enic, "reta_query: wrong reta_size. given=%u expected=%u\n", + reta_size, ENIC_RSS_RETA_SIZE); + return -EINVAL; + } + + for (i = 0; i < reta_size; i++) { + idx = i / RTE_RETA_GROUP_SIZE; + shift = i % RTE_RETA_GROUP_SIZE; + if (reta_conf[idx].mask & (1ULL << shift)) + reta_conf[idx].reta[shift] = enic_sop_rq_idx_to_rte_idx( + enic->rss_cpu.cpu[i / 4].b[i % 4]); + } + + return 0; +} + +static int enicpmd_dev_rss_reta_update(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 + *reta_conf, + uint16_t reta_size) +{ + struct enic *enic = pmd_priv(dev); + union vnic_rss_cpu rss_cpu; + uint16_t i, idx, shift; + + ENICPMD_FUNC_TRACE(); + if (reta_size != ENIC_RSS_RETA_SIZE) { + dev_err(enic, "reta_update: wrong reta_size. given=%u" + " expected=%u\n", + reta_size, ENIC_RSS_RETA_SIZE); + return -EINVAL; + } + /* + * Start with the current reta and modify it per reta_conf, as we + * need to push the entire reta even if we only modify one entry. + */ + rss_cpu = enic->rss_cpu; + for (i = 0; i < reta_size; i++) { + idx = i / RTE_RETA_GROUP_SIZE; + shift = i % RTE_RETA_GROUP_SIZE; + if (reta_conf[idx].mask & (1ULL << shift)) + rss_cpu.cpu[i / 4].b[i % 4] = + enic_rte_rq_idx_to_sop_idx( + reta_conf[idx].reta[shift]); + } + return enic_set_rss_reta(enic, &rss_cpu); +} + +static int enicpmd_dev_rss_hash_update(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf) +{ + struct enic *enic = pmd_priv(dev); + + ENICPMD_FUNC_TRACE(); + return enic_set_rss_conf(enic, rss_conf); +} + +static int enicpmd_dev_rss_hash_conf_get(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf) +{ + struct enic *enic = pmd_priv(dev); + + ENICPMD_FUNC_TRACE(); + if (rss_conf == NULL) + return -EINVAL; + if (rss_conf->rss_key != NULL && + rss_conf->rss_key_len < ENIC_RSS_HASH_KEY_SIZE) { + dev_err(enic, "rss_hash_conf_get: wrong rss_key_len. given=%u" + " expected=%u+\n", + rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE); + return -EINVAL; + } + rss_conf->rss_hf = enic->rss_hf; + if (rss_conf->rss_key != NULL) { + int i; + for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++) { + rss_conf->rss_key[i] = + enic->rss_key.key[i / 10].b[i % 10]; + } + rss_conf->rss_key_len = ENIC_RSS_HASH_KEY_SIZE; + } + return 0; +} + +static void enicpmd_dev_rxq_info_get(struct rte_eth_dev *dev, + uint16_t rx_queue_id, + struct rte_eth_rxq_info *qinfo) +{ + struct enic *enic = pmd_priv(dev); + struct vnic_rq *rq_sop; + struct vnic_rq *rq_data; + struct rte_eth_rxconf *conf; + uint16_t sop_queue_idx; + uint16_t data_queue_idx; + + ENICPMD_FUNC_TRACE(); + sop_queue_idx = enic_rte_rq_idx_to_sop_idx(rx_queue_id); + data_queue_idx = enic_rte_rq_idx_to_data_idx(rx_queue_id, enic); + rq_sop = &enic->rq[sop_queue_idx]; + rq_data = &enic->rq[data_queue_idx]; /* valid if data_queue_enable */ + qinfo->mp = rq_sop->mp; + qinfo->scattered_rx = rq_sop->data_queue_enable; + qinfo->nb_desc = rq_sop->ring.desc_count; + if (qinfo->scattered_rx) + qinfo->nb_desc += rq_data->ring.desc_count; + conf = &qinfo->conf; + memset(conf, 0, sizeof(*conf)); + conf->rx_free_thresh = rq_sop->rx_free_thresh; + conf->rx_drop_en = 1; + /* + * Except VLAN stripping (port setting), all the checksum offloads + * are always enabled. + */ + conf->offloads = enic->rx_offload_capa; + if (!enic->ig_vlan_strip_en) + conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP; + /* rx_thresh and other fields are not applicable for enic */ +} + +static void enicpmd_dev_txq_info_get(struct rte_eth_dev *dev, + uint16_t tx_queue_id, + struct rte_eth_txq_info *qinfo) +{ + struct enic *enic = pmd_priv(dev); + struct vnic_wq *wq = &enic->wq[tx_queue_id]; + + ENICPMD_FUNC_TRACE(); + qinfo->nb_desc = wq->ring.desc_count; + memset(&qinfo->conf, 0, sizeof(qinfo->conf)); + qinfo->conf.offloads = wq->offloads; + /* tx_thresh, and all the other fields are not applicable for enic */ +} + +static int enicpmd_dev_rx_queue_intr_enable(struct rte_eth_dev *eth_dev, + uint16_t rx_queue_id) +{ + struct enic *enic = pmd_priv(eth_dev); + + ENICPMD_FUNC_TRACE(); + vnic_intr_unmask(&enic->intr[rx_queue_id + ENICPMD_RXQ_INTR_OFFSET]); + return 0; +} + +static int enicpmd_dev_rx_queue_intr_disable(struct rte_eth_dev *eth_dev, + uint16_t rx_queue_id) +{ + struct enic *enic = pmd_priv(eth_dev); + + ENICPMD_FUNC_TRACE(); + vnic_intr_mask(&enic->intr[rx_queue_id + ENICPMD_RXQ_INTR_OFFSET]); + return 0; +} + +static int udp_tunnel_common_check(struct enic *enic, + struct rte_eth_udp_tunnel *tnl) +{ + if (tnl->prot_type != RTE_TUNNEL_TYPE_VXLAN) + return -ENOTSUP; + if (!enic->overlay_offload) { + ENICPMD_LOG(DEBUG, " vxlan (overlay offload) is not " + "supported\n"); + return -ENOTSUP; + } + return 0; +} + +static int update_vxlan_port(struct enic *enic, uint16_t port) +{ + if (vnic_dev_overlay_offload_cfg(enic->vdev, + OVERLAY_CFG_VXLAN_PORT_UPDATE, + port)) { + ENICPMD_LOG(DEBUG, " failed to update vxlan port\n"); + return -EINVAL; + } + ENICPMD_LOG(DEBUG, " updated vxlan port to %u\n", port); + enic->vxlan_port = port; + return 0; +} + +static int enicpmd_dev_udp_tunnel_port_add(struct rte_eth_dev *eth_dev, + struct rte_eth_udp_tunnel *tnl) +{ + struct enic *enic = pmd_priv(eth_dev); + int ret; + + ENICPMD_FUNC_TRACE(); + ret = udp_tunnel_common_check(enic, tnl); + if (ret) + return ret; + /* + * The NIC has 1 configurable VXLAN port number. "Adding" a new port + * number replaces it. + */ + if (tnl->udp_port == enic->vxlan_port || tnl->udp_port == 0) { + ENICPMD_LOG(DEBUG, " %u is already configured or invalid\n", + tnl->udp_port); + return -EINVAL; + } + return update_vxlan_port(enic, tnl->udp_port); +} + +static int enicpmd_dev_udp_tunnel_port_del(struct rte_eth_dev *eth_dev, + struct rte_eth_udp_tunnel *tnl) +{ + struct enic *enic = pmd_priv(eth_dev); + int ret; + + ENICPMD_FUNC_TRACE(); + ret = udp_tunnel_common_check(enic, tnl); + if (ret) + return ret; + /* + * Clear the previously set port number and restore the + * hardware default port number. Some drivers disable VXLAN + * offloads when there are no configured port numbers. But + * enic does not do that as VXLAN is part of overlay offload, + * which is tied to inner RSS and TSO. + */ + if (tnl->udp_port != enic->vxlan_port) { + ENICPMD_LOG(DEBUG, " %u is not a configured vxlan port\n", + tnl->udp_port); + return -EINVAL; + } + return update_vxlan_port(enic, RTE_VXLAN_DEFAULT_PORT); +} + +static int enicpmd_dev_fw_version_get(struct rte_eth_dev *eth_dev, + char *fw_version, size_t fw_size) +{ + struct vnic_devcmd_fw_info *info; + struct enic *enic; + int ret; + + ENICPMD_FUNC_TRACE(); + if (fw_version == NULL || fw_size <= 0) + return -EINVAL; + enic = pmd_priv(eth_dev); + ret = vnic_dev_fw_info(enic->vdev, &info); + if (ret) + return ret; + snprintf(fw_version, fw_size, "%s %s", + info->fw_version, info->fw_build); + fw_version[fw_size - 1] = '\0'; + return 0; +} + +static const struct eth_dev_ops enicpmd_eth_dev_ops = { + .dev_configure = enicpmd_dev_configure, + .dev_start = enicpmd_dev_start, + .dev_stop = enicpmd_dev_stop, + .dev_set_link_up = NULL, + .dev_set_link_down = NULL, + .dev_close = enicpmd_dev_close, + .promiscuous_enable = enicpmd_dev_promiscuous_enable, + .promiscuous_disable = enicpmd_dev_promiscuous_disable, + .allmulticast_enable = enicpmd_dev_allmulticast_enable, + .allmulticast_disable = enicpmd_dev_allmulticast_disable, + .link_update = enicpmd_dev_link_update, + .stats_get = enicpmd_dev_stats_get, + .stats_reset = enicpmd_dev_stats_reset, + .queue_stats_mapping_set = NULL, + .dev_infos_get = enicpmd_dev_info_get, + .dev_supported_ptypes_get = enicpmd_dev_supported_ptypes_get, + .mtu_set = enicpmd_mtu_set, + .vlan_filter_set = NULL, + .vlan_tpid_set = NULL, + .vlan_offload_set = enicpmd_vlan_offload_set, + .vlan_strip_queue_set = NULL, + .rx_queue_start = enicpmd_dev_rx_queue_start, + .rx_queue_stop = enicpmd_dev_rx_queue_stop, + .tx_queue_start = enicpmd_dev_tx_queue_start, + .tx_queue_stop = enicpmd_dev_tx_queue_stop, + .rx_queue_setup = enicpmd_dev_rx_queue_setup, + .rx_queue_release = enicpmd_dev_rx_queue_release, + .rx_queue_count = enicpmd_dev_rx_queue_count, + .rx_descriptor_done = NULL, + .tx_queue_setup = enicpmd_dev_tx_queue_setup, + .tx_queue_release = enicpmd_dev_tx_queue_release, + .rx_queue_intr_enable = enicpmd_dev_rx_queue_intr_enable, + .rx_queue_intr_disable = enicpmd_dev_rx_queue_intr_disable, + .rxq_info_get = enicpmd_dev_rxq_info_get, + .txq_info_get = enicpmd_dev_txq_info_get, + .dev_led_on = NULL, + .dev_led_off = NULL, + .flow_ctrl_get = NULL, + .flow_ctrl_set = NULL, + .priority_flow_ctrl_set = NULL, + .mac_addr_add = enicpmd_add_mac_addr, + .mac_addr_remove = enicpmd_remove_mac_addr, + .mac_addr_set = enicpmd_set_mac_addr, + .set_mc_addr_list = enicpmd_set_mc_addr_list, + .filter_ctrl = enicpmd_dev_filter_ctrl, + .reta_query = enicpmd_dev_rss_reta_query, + .reta_update = enicpmd_dev_rss_reta_update, + .rss_hash_conf_get = enicpmd_dev_rss_hash_conf_get, + .rss_hash_update = enicpmd_dev_rss_hash_update, + .udp_tunnel_port_add = enicpmd_dev_udp_tunnel_port_add, + .udp_tunnel_port_del = enicpmd_dev_udp_tunnel_port_del, + .fw_version_get = enicpmd_dev_fw_version_get, +}; + +static int enic_parse_zero_one(const char *key, + const char *value, + void *opaque) +{ + struct enic *enic; + bool b; + + enic = (struct enic *)opaque; + if (strcmp(value, "0") == 0) { + b = false; + } else if (strcmp(value, "1") == 0) { + b = true; + } else { + dev_err(enic, "Invalid value for %s" + ": expected=0|1 given=%s\n", key, value); + return -EINVAL; + } + if (strcmp(key, ENIC_DEVARG_DISABLE_OVERLAY) == 0) + enic->disable_overlay = b; + if (strcmp(key, ENIC_DEVARG_ENABLE_AVX2_RX) == 0) + enic->enable_avx2_rx = b; + if (strcmp(key, ENIC_DEVARG_GENEVE_OPT) == 0) + enic->geneve_opt_request = b; + return 0; +} + +static int enic_parse_ig_vlan_rewrite(__rte_unused const char *key, + const char *value, + void *opaque) +{ + struct enic *enic; + + enic = (struct enic *)opaque; + if (strcmp(value, "trunk") == 0) { + /* Trunk mode: always tag */ + enic->ig_vlan_rewrite_mode = IG_VLAN_REWRITE_MODE_DEFAULT_TRUNK; + } else if (strcmp(value, "untag") == 0) { + /* Untag default VLAN mode: untag if VLAN = default VLAN */ + enic->ig_vlan_rewrite_mode = + IG_VLAN_REWRITE_MODE_UNTAG_DEFAULT_VLAN; + } else if (strcmp(value, "priority") == 0) { + /* + * Priority-tag default VLAN mode: priority tag (VLAN header + * with ID=0) if VLAN = default + */ + enic->ig_vlan_rewrite_mode = + IG_VLAN_REWRITE_MODE_PRIORITY_TAG_DEFAULT_VLAN; + } else if (strcmp(value, "pass") == 0) { + /* Pass through mode: do not touch tags */ + enic->ig_vlan_rewrite_mode = IG_VLAN_REWRITE_MODE_PASS_THRU; + } else { + dev_err(enic, "Invalid value for " ENIC_DEVARG_IG_VLAN_REWRITE + ": expected=trunk|untag|priority|pass given=%s\n", + value); + return -EINVAL; + } + return 0; +} + +static int enic_check_devargs(struct rte_eth_dev *dev) +{ + static const char *const valid_keys[] = { + ENIC_DEVARG_DISABLE_OVERLAY, + ENIC_DEVARG_ENABLE_AVX2_RX, + ENIC_DEVARG_GENEVE_OPT, + ENIC_DEVARG_IG_VLAN_REWRITE, + NULL}; + struct enic *enic = pmd_priv(dev); + struct rte_kvargs *kvlist; + + ENICPMD_FUNC_TRACE(); + + enic->disable_overlay = false; + enic->enable_avx2_rx = false; + enic->geneve_opt_request = false; + enic->ig_vlan_rewrite_mode = IG_VLAN_REWRITE_MODE_PASS_THRU; + if (!dev->device->devargs) + return 0; + kvlist = rte_kvargs_parse(dev->device->devargs->args, valid_keys); + if (!kvlist) + return -EINVAL; + if (rte_kvargs_process(kvlist, ENIC_DEVARG_DISABLE_OVERLAY, + enic_parse_zero_one, enic) < 0 || + rte_kvargs_process(kvlist, ENIC_DEVARG_ENABLE_AVX2_RX, + enic_parse_zero_one, enic) < 0 || + rte_kvargs_process(kvlist, ENIC_DEVARG_GENEVE_OPT, + enic_parse_zero_one, enic) < 0 || + rte_kvargs_process(kvlist, ENIC_DEVARG_IG_VLAN_REWRITE, + enic_parse_ig_vlan_rewrite, enic) < 0) { + rte_kvargs_free(kvlist); + return -EINVAL; + } + rte_kvargs_free(kvlist); + return 0; +} + +/* Initialize the driver + * It returns 0 on success. + */ +static int eth_enicpmd_dev_init(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pdev; + struct rte_pci_addr *addr; + struct enic *enic = pmd_priv(eth_dev); + int err; + + ENICPMD_FUNC_TRACE(); + + eth_dev->dev_ops = &enicpmd_eth_dev_ops; + eth_dev->rx_pkt_burst = &enic_recv_pkts; + eth_dev->tx_pkt_burst = &enic_xmit_pkts; + eth_dev->tx_pkt_prepare = &enic_prep_pkts; + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { + enic_pick_tx_handler(eth_dev); + enic_pick_rx_handler(eth_dev); + return 0; + } + /* Only the primary sets up adapter and other data in shared memory */ + enic->port_id = eth_dev->data->port_id; + enic->rte_dev = eth_dev; + enic->dev_data = eth_dev->data; + /* Let rte_eth_dev_close() release the port resources */ + eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; + + pdev = RTE_ETH_DEV_TO_PCI(eth_dev); + rte_eth_copy_pci_info(eth_dev, pdev); + enic->pdev = pdev; + addr = &pdev->addr; + + snprintf(enic->bdf_name, ENICPMD_BDF_LENGTH, "%04x:%02x:%02x.%x", + addr->domain, addr->bus, addr->devid, addr->function); + + err = enic_check_devargs(eth_dev); + if (err) + return err; + return enic_probe(enic); +} + +static int eth_enic_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + struct rte_pci_device *pci_dev) +{ + return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct enic), + eth_enicpmd_dev_init); +} + +static int eth_enic_pci_remove(struct rte_pci_device *pci_dev) +{ + return rte_eth_dev_pci_generic_remove(pci_dev, NULL); +} + +static struct rte_pci_driver rte_enic_pmd = { + .id_table = pci_id_enic_map, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_enic_pci_probe, + .remove = eth_enic_pci_remove, +}; + +int dev_is_enic(struct rte_eth_dev *dev) +{ + return dev->device->driver == &rte_enic_pmd.driver; +} + +RTE_PMD_REGISTER_PCI(net_enic, rte_enic_pmd); +RTE_PMD_REGISTER_PCI_TABLE(net_enic, pci_id_enic_map); +RTE_PMD_REGISTER_KMOD_DEP(net_enic, "* igb_uio | uio_pci_generic | vfio-pci"); +RTE_PMD_REGISTER_PARAM_STRING(net_enic, + ENIC_DEVARG_DISABLE_OVERLAY "=0|1 " + ENIC_DEVARG_ENABLE_AVX2_RX "=0|1 " + ENIC_DEVARG_GENEVE_OPT "=0|1 " + ENIC_DEVARG_IG_VLAN_REWRITE "=trunk|untag|priority|pass"); diff --git a/src/spdk/dpdk/drivers/net/enic/enic_flow.c b/src/spdk/dpdk/drivers/net/enic/enic_flow.c new file mode 100644 index 000000000..cebca7d55 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/enic_flow.c @@ -0,0 +1,1795 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + */ + +#include <errno.h> +#include <stdint.h> +#include <rte_log.h> +#include <rte_ethdev_driver.h> +#include <rte_flow_driver.h> +#include <rte_ether.h> +#include <rte_ip.h> +#include <rte_udp.h> + +#include "enic_compat.h" +#include "enic.h" +#include "vnic_dev.h" +#include "vnic_nic.h" + +/* + * Common arguments passed to copy_item functions. Use this structure + * so we can easily add new arguments. + * item: Item specification. + * filter: Partially filled in NIC filter structure. + * inner_ofst: If zero, this is an outer header. If non-zero, this is + * the offset into L5 where the header begins. + * l2_proto_off: offset to EtherType eth or vlan header. + * l3_proto_off: offset to next protocol field in IPv4 or 6 header. + */ +struct copy_item_args { + const struct rte_flow_item *item; + struct filter_v2 *filter; + uint8_t *inner_ofst; + uint8_t l2_proto_off; + uint8_t l3_proto_off; + struct enic *enic; +}; + +/* functions for copying items into enic filters */ +typedef int (enic_copy_item_fn)(struct copy_item_args *arg); + +/** Info about how to copy items into enic filters. */ +struct enic_items { + /** Function for copying and validating an item. */ + enic_copy_item_fn *copy_item; + /** List of valid previous items. */ + const enum rte_flow_item_type * const prev_items; + /** True if it's OK for this item to be the first item. For some NIC + * versions, it's invalid to start the stack above layer 3. + */ + const uint8_t valid_start_item; + /* Inner packet version of copy_item. */ + enic_copy_item_fn *inner_copy_item; +}; + +/** Filtering capabilities for various NIC and firmware versions. */ +struct enic_filter_cap { + /** list of valid items and their handlers and attributes. */ + const struct enic_items *item_info; + /* Max type in the above list, used to detect unsupported types */ + enum rte_flow_item_type max_item_type; +}; + +/* functions for copying flow actions into enic actions */ +typedef int (copy_action_fn)(struct enic *enic, + const struct rte_flow_action actions[], + struct filter_action_v2 *enic_action); + +/** Action capabilities for various NICs. */ +struct enic_action_cap { + /** list of valid actions */ + const enum rte_flow_action_type *actions; + /** copy function for a particular NIC */ + copy_action_fn *copy_fn; +}; + +/* Forward declarations */ +static enic_copy_item_fn enic_copy_item_ipv4_v1; +static enic_copy_item_fn enic_copy_item_udp_v1; +static enic_copy_item_fn enic_copy_item_tcp_v1; +static enic_copy_item_fn enic_copy_item_raw_v2; +static enic_copy_item_fn enic_copy_item_eth_v2; +static enic_copy_item_fn enic_copy_item_vlan_v2; +static enic_copy_item_fn enic_copy_item_ipv4_v2; +static enic_copy_item_fn enic_copy_item_ipv6_v2; +static enic_copy_item_fn enic_copy_item_udp_v2; +static enic_copy_item_fn enic_copy_item_tcp_v2; +static enic_copy_item_fn enic_copy_item_sctp_v2; +static enic_copy_item_fn enic_copy_item_vxlan_v2; +static enic_copy_item_fn enic_copy_item_inner_eth_v2; +static enic_copy_item_fn enic_copy_item_inner_vlan_v2; +static enic_copy_item_fn enic_copy_item_inner_ipv4_v2; +static enic_copy_item_fn enic_copy_item_inner_ipv6_v2; +static enic_copy_item_fn enic_copy_item_inner_udp_v2; +static enic_copy_item_fn enic_copy_item_inner_tcp_v2; +static copy_action_fn enic_copy_action_v1; +static copy_action_fn enic_copy_action_v2; + +/** + * Legacy NICs or NICs with outdated firmware. Only 5-tuple perfect match + * is supported. + */ +static const struct enic_items enic_items_v1[] = { + [RTE_FLOW_ITEM_TYPE_IPV4] = { + .copy_item = enic_copy_item_ipv4_v1, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = NULL, + }, + [RTE_FLOW_ITEM_TYPE_UDP] = { + .copy_item = enic_copy_item_udp_v1, + .valid_start_item = 0, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = NULL, + }, + [RTE_FLOW_ITEM_TYPE_TCP] = { + .copy_item = enic_copy_item_tcp_v1, + .valid_start_item = 0, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = NULL, + }, +}; + +/** + * NICs have Advanced Filters capability but they are disabled. This means + * that layer 3 must be specified. + */ +static const struct enic_items enic_items_v2[] = { + [RTE_FLOW_ITEM_TYPE_RAW] = { + .copy_item = enic_copy_item_raw_v2, + .valid_start_item = 0, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = NULL, + }, + [RTE_FLOW_ITEM_TYPE_ETH] = { + .copy_item = enic_copy_item_eth_v2, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_VXLAN, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = enic_copy_item_inner_eth_v2, + }, + [RTE_FLOW_ITEM_TYPE_VLAN] = { + .copy_item = enic_copy_item_vlan_v2, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = enic_copy_item_inner_vlan_v2, + }, + [RTE_FLOW_ITEM_TYPE_IPV4] = { + .copy_item = enic_copy_item_ipv4_v2, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_VLAN, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = enic_copy_item_inner_ipv4_v2, + }, + [RTE_FLOW_ITEM_TYPE_IPV6] = { + .copy_item = enic_copy_item_ipv6_v2, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_VLAN, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = enic_copy_item_inner_ipv6_v2, + }, + [RTE_FLOW_ITEM_TYPE_UDP] = { + .copy_item = enic_copy_item_udp_v2, + .valid_start_item = 0, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = enic_copy_item_inner_udp_v2, + }, + [RTE_FLOW_ITEM_TYPE_TCP] = { + .copy_item = enic_copy_item_tcp_v2, + .valid_start_item = 0, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = enic_copy_item_inner_tcp_v2, + }, + [RTE_FLOW_ITEM_TYPE_SCTP] = { + .copy_item = enic_copy_item_sctp_v2, + .valid_start_item = 0, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = NULL, + }, + [RTE_FLOW_ITEM_TYPE_VXLAN] = { + .copy_item = enic_copy_item_vxlan_v2, + .valid_start_item = 0, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = NULL, + }, +}; + +/** NICs with Advanced filters enabled */ +static const struct enic_items enic_items_v3[] = { + [RTE_FLOW_ITEM_TYPE_RAW] = { + .copy_item = enic_copy_item_raw_v2, + .valid_start_item = 0, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = NULL, + }, + [RTE_FLOW_ITEM_TYPE_ETH] = { + .copy_item = enic_copy_item_eth_v2, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_VXLAN, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = enic_copy_item_inner_eth_v2, + }, + [RTE_FLOW_ITEM_TYPE_VLAN] = { + .copy_item = enic_copy_item_vlan_v2, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = enic_copy_item_inner_vlan_v2, + }, + [RTE_FLOW_ITEM_TYPE_IPV4] = { + .copy_item = enic_copy_item_ipv4_v2, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_VLAN, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = enic_copy_item_inner_ipv4_v2, + }, + [RTE_FLOW_ITEM_TYPE_IPV6] = { + .copy_item = enic_copy_item_ipv6_v2, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_VLAN, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = enic_copy_item_inner_ipv6_v2, + }, + [RTE_FLOW_ITEM_TYPE_UDP] = { + .copy_item = enic_copy_item_udp_v2, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = enic_copy_item_inner_udp_v2, + }, + [RTE_FLOW_ITEM_TYPE_TCP] = { + .copy_item = enic_copy_item_tcp_v2, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = enic_copy_item_inner_tcp_v2, + }, + [RTE_FLOW_ITEM_TYPE_SCTP] = { + .copy_item = enic_copy_item_sctp_v2, + .valid_start_item = 0, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = NULL, + }, + [RTE_FLOW_ITEM_TYPE_VXLAN] = { + .copy_item = enic_copy_item_vxlan_v2, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_END, + }, + .inner_copy_item = NULL, + }, +}; + +/** Filtering capabilities indexed this NICs supported filter type. */ +static const struct enic_filter_cap enic_filter_cap[] = { + [FILTER_IPV4_5TUPLE] = { + .item_info = enic_items_v1, + .max_item_type = RTE_FLOW_ITEM_TYPE_TCP, + }, + [FILTER_USNIC_IP] = { + .item_info = enic_items_v2, + .max_item_type = RTE_FLOW_ITEM_TYPE_VXLAN, + }, + [FILTER_DPDK_1] = { + .item_info = enic_items_v3, + .max_item_type = RTE_FLOW_ITEM_TYPE_VXLAN, + }, +}; + +/** Supported actions for older NICs */ +static const enum rte_flow_action_type enic_supported_actions_v1[] = { + RTE_FLOW_ACTION_TYPE_QUEUE, + RTE_FLOW_ACTION_TYPE_END, +}; + +/** Supported actions for newer NICs */ +static const enum rte_flow_action_type enic_supported_actions_v2_id[] = { + RTE_FLOW_ACTION_TYPE_QUEUE, + RTE_FLOW_ACTION_TYPE_MARK, + RTE_FLOW_ACTION_TYPE_FLAG, + RTE_FLOW_ACTION_TYPE_RSS, + RTE_FLOW_ACTION_TYPE_PASSTHRU, + RTE_FLOW_ACTION_TYPE_END, +}; + +static const enum rte_flow_action_type enic_supported_actions_v2_drop[] = { + RTE_FLOW_ACTION_TYPE_QUEUE, + RTE_FLOW_ACTION_TYPE_MARK, + RTE_FLOW_ACTION_TYPE_FLAG, + RTE_FLOW_ACTION_TYPE_DROP, + RTE_FLOW_ACTION_TYPE_RSS, + RTE_FLOW_ACTION_TYPE_PASSTHRU, + RTE_FLOW_ACTION_TYPE_END, +}; + +/** Action capabilities indexed by NIC version information */ +static const struct enic_action_cap enic_action_cap[] = { + [FILTER_ACTION_RQ_STEERING_FLAG] = { + .actions = enic_supported_actions_v1, + .copy_fn = enic_copy_action_v1, + }, + [FILTER_ACTION_FILTER_ID_FLAG] = { + .actions = enic_supported_actions_v2_id, + .copy_fn = enic_copy_action_v2, + }, + [FILTER_ACTION_DROP_FLAG] = { + .actions = enic_supported_actions_v2_drop, + .copy_fn = enic_copy_action_v2, + }, +}; + +static int +mask_exact_match(const uint8_t *supported, const uint8_t *supplied, + unsigned int size) +{ + unsigned int i; + for (i = 0; i < size; i++) { + if (supported[i] != supplied[i]) + return 0; + } + return 1; +} + +static int +enic_copy_item_ipv4_v1(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + struct filter_v2 *enic_filter = arg->filter; + const struct rte_flow_item_ipv4 *spec = item->spec; + const struct rte_flow_item_ipv4 *mask = item->mask; + struct filter_ipv4_5tuple *enic_5tup = &enic_filter->u.ipv4; + struct rte_ipv4_hdr supported_mask = { + .src_addr = 0xffffffff, + .dst_addr = 0xffffffff, + }; + + ENICPMD_FUNC_TRACE(); + + if (!mask) + mask = &rte_flow_item_ipv4_mask; + + /* This is an exact match filter, both fields must be set */ + if (!spec || !spec->hdr.src_addr || !spec->hdr.dst_addr) { + ENICPMD_LOG(ERR, "IPv4 exact match src/dst addr"); + return ENOTSUP; + } + + /* check that the suppied mask exactly matches capabilty */ + if (!mask_exact_match((const uint8_t *)&supported_mask, + (const uint8_t *)item->mask, sizeof(*mask))) { + ENICPMD_LOG(ERR, "IPv4 exact match mask"); + return ENOTSUP; + } + + enic_filter->u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE; + enic_5tup->src_addr = spec->hdr.src_addr; + enic_5tup->dst_addr = spec->hdr.dst_addr; + + return 0; +} + +static int +enic_copy_item_udp_v1(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + struct filter_v2 *enic_filter = arg->filter; + const struct rte_flow_item_udp *spec = item->spec; + const struct rte_flow_item_udp *mask = item->mask; + struct filter_ipv4_5tuple *enic_5tup = &enic_filter->u.ipv4; + struct rte_udp_hdr supported_mask = { + .src_port = 0xffff, + .dst_port = 0xffff, + }; + + ENICPMD_FUNC_TRACE(); + + if (!mask) + mask = &rte_flow_item_udp_mask; + + /* This is an exact match filter, both ports must be set */ + if (!spec || !spec->hdr.src_port || !spec->hdr.dst_port) { + ENICPMD_LOG(ERR, "UDP exact match src/dst addr"); + return ENOTSUP; + } + + /* check that the suppied mask exactly matches capabilty */ + if (!mask_exact_match((const uint8_t *)&supported_mask, + (const uint8_t *)item->mask, sizeof(*mask))) { + ENICPMD_LOG(ERR, "UDP exact match mask"); + return ENOTSUP; + } + + enic_filter->u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE; + enic_5tup->src_port = spec->hdr.src_port; + enic_5tup->dst_port = spec->hdr.dst_port; + enic_5tup->protocol = PROTO_UDP; + + return 0; +} + +static int +enic_copy_item_tcp_v1(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + struct filter_v2 *enic_filter = arg->filter; + const struct rte_flow_item_tcp *spec = item->spec; + const struct rte_flow_item_tcp *mask = item->mask; + struct filter_ipv4_5tuple *enic_5tup = &enic_filter->u.ipv4; + struct rte_tcp_hdr supported_mask = { + .src_port = 0xffff, + .dst_port = 0xffff, + }; + + ENICPMD_FUNC_TRACE(); + + if (!mask) + mask = &rte_flow_item_tcp_mask; + + /* This is an exact match filter, both ports must be set */ + if (!spec || !spec->hdr.src_port || !spec->hdr.dst_port) { + ENICPMD_LOG(ERR, "TCPIPv4 exact match src/dst addr"); + return ENOTSUP; + } + + /* check that the suppied mask exactly matches capabilty */ + if (!mask_exact_match((const uint8_t *)&supported_mask, + (const uint8_t *)item->mask, sizeof(*mask))) { + ENICPMD_LOG(ERR, "TCP exact match mask"); + return ENOTSUP; + } + + enic_filter->u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE; + enic_5tup->src_port = spec->hdr.src_port; + enic_5tup->dst_port = spec->hdr.dst_port; + enic_5tup->protocol = PROTO_TCP; + + return 0; +} + +/* + * The common 'copy' function for all inner packet patterns. Patterns are + * first appended to the L5 pattern buffer. Then, since the NIC filter + * API has no special support for inner packet matching at the moment, + * we set EtherType and IP proto as necessary. + */ +static int +copy_inner_common(struct filter_generic_1 *gp, uint8_t *inner_ofst, + const void *val, const void *mask, uint8_t val_size, + uint8_t proto_off, uint16_t proto_val, uint8_t proto_size) +{ + uint8_t *l5_mask, *l5_val; + uint8_t start_off; + + /* No space left in the L5 pattern buffer. */ + start_off = *inner_ofst; + if ((start_off + val_size) > FILTER_GENERIC_1_KEY_LEN) + return ENOTSUP; + l5_mask = gp->layer[FILTER_GENERIC_1_L5].mask; + l5_val = gp->layer[FILTER_GENERIC_1_L5].val; + /* Copy the pattern into the L5 buffer. */ + if (val) { + memcpy(l5_mask + start_off, mask, val_size); + memcpy(l5_val + start_off, val, val_size); + } + /* Set the protocol field in the previous header. */ + if (proto_off) { + void *m, *v; + + m = l5_mask + proto_off; + v = l5_val + proto_off; + if (proto_size == 1) { + *(uint8_t *)m = 0xff; + *(uint8_t *)v = (uint8_t)proto_val; + } else if (proto_size == 2) { + *(uint16_t *)m = 0xffff; + *(uint16_t *)v = proto_val; + } + } + /* All inner headers land in L5 buffer even if their spec is null. */ + *inner_ofst += val_size; + return 0; +} + +static int +enic_copy_item_inner_eth_v2(struct copy_item_args *arg) +{ + const void *mask = arg->item->mask; + uint8_t *off = arg->inner_ofst; + + ENICPMD_FUNC_TRACE(); + if (!mask) + mask = &rte_flow_item_eth_mask; + arg->l2_proto_off = *off + offsetof(struct rte_ether_hdr, ether_type); + return copy_inner_common(&arg->filter->u.generic_1, off, + arg->item->spec, mask, sizeof(struct rte_ether_hdr), + 0 /* no previous protocol */, 0, 0); +} + +static int +enic_copy_item_inner_vlan_v2(struct copy_item_args *arg) +{ + const void *mask = arg->item->mask; + uint8_t *off = arg->inner_ofst; + uint8_t eth_type_off; + + ENICPMD_FUNC_TRACE(); + if (!mask) + mask = &rte_flow_item_vlan_mask; + /* Append vlan header to L5 and set ether type = TPID */ + eth_type_off = arg->l2_proto_off; + arg->l2_proto_off = *off + offsetof(struct rte_vlan_hdr, eth_proto); + return copy_inner_common(&arg->filter->u.generic_1, off, + arg->item->spec, mask, sizeof(struct rte_vlan_hdr), + eth_type_off, rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN), 2); +} + +static int +enic_copy_item_inner_ipv4_v2(struct copy_item_args *arg) +{ + const void *mask = arg->item->mask; + uint8_t *off = arg->inner_ofst; + + ENICPMD_FUNC_TRACE(); + if (!mask) + mask = &rte_flow_item_ipv4_mask; + /* Append ipv4 header to L5 and set ether type = ipv4 */ + arg->l3_proto_off = *off + offsetof(struct rte_ipv4_hdr, next_proto_id); + return copy_inner_common(&arg->filter->u.generic_1, off, + arg->item->spec, mask, sizeof(struct rte_ipv4_hdr), + arg->l2_proto_off, rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4), 2); +} + +static int +enic_copy_item_inner_ipv6_v2(struct copy_item_args *arg) +{ + const void *mask = arg->item->mask; + uint8_t *off = arg->inner_ofst; + + ENICPMD_FUNC_TRACE(); + if (!mask) + mask = &rte_flow_item_ipv6_mask; + /* Append ipv6 header to L5 and set ether type = ipv6 */ + arg->l3_proto_off = *off + offsetof(struct rte_ipv6_hdr, proto); + return copy_inner_common(&arg->filter->u.generic_1, off, + arg->item->spec, mask, sizeof(struct rte_ipv6_hdr), + arg->l2_proto_off, rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6), 2); +} + +static int +enic_copy_item_inner_udp_v2(struct copy_item_args *arg) +{ + const void *mask = arg->item->mask; + uint8_t *off = arg->inner_ofst; + + ENICPMD_FUNC_TRACE(); + if (!mask) + mask = &rte_flow_item_udp_mask; + /* Append udp header to L5 and set ip proto = udp */ + return copy_inner_common(&arg->filter->u.generic_1, off, + arg->item->spec, mask, sizeof(struct rte_udp_hdr), + arg->l3_proto_off, IPPROTO_UDP, 1); +} + +static int +enic_copy_item_inner_tcp_v2(struct copy_item_args *arg) +{ + const void *mask = arg->item->mask; + uint8_t *off = arg->inner_ofst; + + ENICPMD_FUNC_TRACE(); + if (!mask) + mask = &rte_flow_item_tcp_mask; + /* Append tcp header to L5 and set ip proto = tcp */ + return copy_inner_common(&arg->filter->u.generic_1, off, + arg->item->spec, mask, sizeof(struct rte_tcp_hdr), + arg->l3_proto_off, IPPROTO_TCP, 1); +} + +static int +enic_copy_item_eth_v2(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + struct filter_v2 *enic_filter = arg->filter; + struct rte_ether_hdr enic_spec; + struct rte_ether_hdr enic_mask; + const struct rte_flow_item_eth *spec = item->spec; + const struct rte_flow_item_eth *mask = item->mask; + struct filter_generic_1 *gp = &enic_filter->u.generic_1; + + ENICPMD_FUNC_TRACE(); + + /* Match all if no spec */ + if (!spec) + return 0; + + if (!mask) + mask = &rte_flow_item_eth_mask; + + memcpy(enic_spec.d_addr.addr_bytes, spec->dst.addr_bytes, + RTE_ETHER_ADDR_LEN); + memcpy(enic_spec.s_addr.addr_bytes, spec->src.addr_bytes, + RTE_ETHER_ADDR_LEN); + + memcpy(enic_mask.d_addr.addr_bytes, mask->dst.addr_bytes, + RTE_ETHER_ADDR_LEN); + memcpy(enic_mask.s_addr.addr_bytes, mask->src.addr_bytes, + RTE_ETHER_ADDR_LEN); + enic_spec.ether_type = spec->type; + enic_mask.ether_type = mask->type; + + /* outer header */ + memcpy(gp->layer[FILTER_GENERIC_1_L2].mask, &enic_mask, + sizeof(struct rte_ether_hdr)); + memcpy(gp->layer[FILTER_GENERIC_1_L2].val, &enic_spec, + sizeof(struct rte_ether_hdr)); + return 0; +} + +static int +enic_copy_item_vlan_v2(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + struct filter_v2 *enic_filter = arg->filter; + const struct rte_flow_item_vlan *spec = item->spec; + const struct rte_flow_item_vlan *mask = item->mask; + struct filter_generic_1 *gp = &enic_filter->u.generic_1; + struct rte_ether_hdr *eth_mask; + struct rte_ether_hdr *eth_val; + + ENICPMD_FUNC_TRACE(); + + /* Match all if no spec */ + if (!spec) + return 0; + + if (!mask) + mask = &rte_flow_item_vlan_mask; + + eth_mask = (void *)gp->layer[FILTER_GENERIC_1_L2].mask; + eth_val = (void *)gp->layer[FILTER_GENERIC_1_L2].val; + /* Outer TPID cannot be matched */ + if (eth_mask->ether_type) + return ENOTSUP; + /* + * For recent models: + * When packet matching, the VIC always compares vlan-stripped + * L2, regardless of vlan stripping settings. So, the inner type + * from vlan becomes the ether type of the eth header. + * + * Older models w/o hardware vxlan parser have a different + * behavior when vlan stripping is disabled. In this case, + * vlan tag remains in the L2 buffer. + */ + if (!arg->enic->vxlan && !arg->enic->ig_vlan_strip_en) { + struct rte_vlan_hdr *vlan; + + vlan = (struct rte_vlan_hdr *)(eth_mask + 1); + vlan->eth_proto = mask->inner_type; + vlan = (struct rte_vlan_hdr *)(eth_val + 1); + vlan->eth_proto = spec->inner_type; + } else { + eth_mask->ether_type = mask->inner_type; + eth_val->ether_type = spec->inner_type; + } + /* For TCI, use the vlan mask/val fields (little endian). */ + gp->mask_vlan = rte_be_to_cpu_16(mask->tci); + gp->val_vlan = rte_be_to_cpu_16(spec->tci); + return 0; +} + +static int +enic_copy_item_ipv4_v2(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + struct filter_v2 *enic_filter = arg->filter; + const struct rte_flow_item_ipv4 *spec = item->spec; + const struct rte_flow_item_ipv4 *mask = item->mask; + struct filter_generic_1 *gp = &enic_filter->u.generic_1; + + ENICPMD_FUNC_TRACE(); + + /* Match IPv4 */ + gp->mask_flags |= FILTER_GENERIC_1_IPV4; + gp->val_flags |= FILTER_GENERIC_1_IPV4; + + /* Match all if no spec */ + if (!spec) + return 0; + + if (!mask) + mask = &rte_flow_item_ipv4_mask; + + memcpy(gp->layer[FILTER_GENERIC_1_L3].mask, &mask->hdr, + sizeof(struct rte_ipv4_hdr)); + memcpy(gp->layer[FILTER_GENERIC_1_L3].val, &spec->hdr, + sizeof(struct rte_ipv4_hdr)); + return 0; +} + +static int +enic_copy_item_ipv6_v2(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + struct filter_v2 *enic_filter = arg->filter; + const struct rte_flow_item_ipv6 *spec = item->spec; + const struct rte_flow_item_ipv6 *mask = item->mask; + struct filter_generic_1 *gp = &enic_filter->u.generic_1; + + ENICPMD_FUNC_TRACE(); + + /* Match IPv6 */ + gp->mask_flags |= FILTER_GENERIC_1_IPV6; + gp->val_flags |= FILTER_GENERIC_1_IPV6; + + /* Match all if no spec */ + if (!spec) + return 0; + + if (!mask) + mask = &rte_flow_item_ipv6_mask; + + memcpy(gp->layer[FILTER_GENERIC_1_L3].mask, &mask->hdr, + sizeof(struct rte_ipv6_hdr)); + memcpy(gp->layer[FILTER_GENERIC_1_L3].val, &spec->hdr, + sizeof(struct rte_ipv6_hdr)); + return 0; +} + +static int +enic_copy_item_udp_v2(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + struct filter_v2 *enic_filter = arg->filter; + const struct rte_flow_item_udp *spec = item->spec; + const struct rte_flow_item_udp *mask = item->mask; + struct filter_generic_1 *gp = &enic_filter->u.generic_1; + + ENICPMD_FUNC_TRACE(); + + /* Match UDP */ + gp->mask_flags |= FILTER_GENERIC_1_UDP; + gp->val_flags |= FILTER_GENERIC_1_UDP; + + /* Match all if no spec */ + if (!spec) + return 0; + + if (!mask) + mask = &rte_flow_item_udp_mask; + + memcpy(gp->layer[FILTER_GENERIC_1_L4].mask, &mask->hdr, + sizeof(struct rte_udp_hdr)); + memcpy(gp->layer[FILTER_GENERIC_1_L4].val, &spec->hdr, + sizeof(struct rte_udp_hdr)); + return 0; +} + +static int +enic_copy_item_tcp_v2(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + struct filter_v2 *enic_filter = arg->filter; + const struct rte_flow_item_tcp *spec = item->spec; + const struct rte_flow_item_tcp *mask = item->mask; + struct filter_generic_1 *gp = &enic_filter->u.generic_1; + + ENICPMD_FUNC_TRACE(); + + /* Match TCP */ + gp->mask_flags |= FILTER_GENERIC_1_TCP; + gp->val_flags |= FILTER_GENERIC_1_TCP; + + /* Match all if no spec */ + if (!spec) + return 0; + + if (!mask) + return ENOTSUP; + + memcpy(gp->layer[FILTER_GENERIC_1_L4].mask, &mask->hdr, + sizeof(struct rte_tcp_hdr)); + memcpy(gp->layer[FILTER_GENERIC_1_L4].val, &spec->hdr, + sizeof(struct rte_tcp_hdr)); + return 0; +} + +static int +enic_copy_item_sctp_v2(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + struct filter_v2 *enic_filter = arg->filter; + const struct rte_flow_item_sctp *spec = item->spec; + const struct rte_flow_item_sctp *mask = item->mask; + struct filter_generic_1 *gp = &enic_filter->u.generic_1; + uint8_t *ip_proto_mask = NULL; + uint8_t *ip_proto = NULL; + + ENICPMD_FUNC_TRACE(); + + /* + * The NIC filter API has no flags for "match sctp", so explicitly set + * the protocol number in the IP pattern. + */ + if (gp->val_flags & FILTER_GENERIC_1_IPV4) { + struct rte_ipv4_hdr *ip; + ip = (struct rte_ipv4_hdr *)gp->layer[FILTER_GENERIC_1_L3].mask; + ip_proto_mask = &ip->next_proto_id; + ip = (struct rte_ipv4_hdr *)gp->layer[FILTER_GENERIC_1_L3].val; + ip_proto = &ip->next_proto_id; + } else if (gp->val_flags & FILTER_GENERIC_1_IPV6) { + struct rte_ipv6_hdr *ip; + ip = (struct rte_ipv6_hdr *)gp->layer[FILTER_GENERIC_1_L3].mask; + ip_proto_mask = &ip->proto; + ip = (struct rte_ipv6_hdr *)gp->layer[FILTER_GENERIC_1_L3].val; + ip_proto = &ip->proto; + } else { + /* Need IPv4/IPv6 pattern first */ + return EINVAL; + } + *ip_proto = IPPROTO_SCTP; + *ip_proto_mask = 0xff; + + /* Match all if no spec */ + if (!spec) + return 0; + + if (!mask) + mask = &rte_flow_item_sctp_mask; + + memcpy(gp->layer[FILTER_GENERIC_1_L4].mask, &mask->hdr, + sizeof(struct rte_sctp_hdr)); + memcpy(gp->layer[FILTER_GENERIC_1_L4].val, &spec->hdr, + sizeof(struct rte_sctp_hdr)); + return 0; +} + +static int +enic_copy_item_vxlan_v2(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + struct filter_v2 *enic_filter = arg->filter; + uint8_t *inner_ofst = arg->inner_ofst; + const struct rte_flow_item_vxlan *spec = item->spec; + const struct rte_flow_item_vxlan *mask = item->mask; + struct filter_generic_1 *gp = &enic_filter->u.generic_1; + struct rte_udp_hdr *udp; + + ENICPMD_FUNC_TRACE(); + + /* + * The NIC filter API has no flags for "match vxlan". Set UDP port to + * avoid false positives. + */ + gp->mask_flags |= FILTER_GENERIC_1_UDP; + gp->val_flags |= FILTER_GENERIC_1_UDP; + udp = (struct rte_udp_hdr *)gp->layer[FILTER_GENERIC_1_L4].mask; + udp->dst_port = 0xffff; + udp = (struct rte_udp_hdr *)gp->layer[FILTER_GENERIC_1_L4].val; + udp->dst_port = RTE_BE16(4789); + /* Match all if no spec */ + if (!spec) + return 0; + + if (!mask) + mask = &rte_flow_item_vxlan_mask; + + memcpy(gp->layer[FILTER_GENERIC_1_L5].mask, mask, + sizeof(struct rte_vxlan_hdr)); + memcpy(gp->layer[FILTER_GENERIC_1_L5].val, spec, + sizeof(struct rte_vxlan_hdr)); + + *inner_ofst = sizeof(struct rte_vxlan_hdr); + return 0; +} + +/* + * Copy raw item into version 2 NIC filter. Currently, raw pattern match is + * very limited. It is intended for matching UDP tunnel header (e.g. vxlan + * or geneve). + */ +static int +enic_copy_item_raw_v2(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + struct filter_v2 *enic_filter = arg->filter; + uint8_t *inner_ofst = arg->inner_ofst; + const struct rte_flow_item_raw *spec = item->spec; + const struct rte_flow_item_raw *mask = item->mask; + struct filter_generic_1 *gp = &enic_filter->u.generic_1; + + ENICPMD_FUNC_TRACE(); + + /* Cannot be used for inner packet */ + if (*inner_ofst) + return EINVAL; + /* Need both spec and mask */ + if (!spec || !mask) + return EINVAL; + /* Only supports relative with offset 0 */ + if (!spec->relative || spec->offset != 0 || spec->search || spec->limit) + return EINVAL; + /* Need non-null pattern that fits within the NIC's filter pattern */ + if (spec->length == 0 || + spec->length + sizeof(struct rte_udp_hdr) > FILTER_GENERIC_1_KEY_LEN || + !spec->pattern || !mask->pattern) + return EINVAL; + /* + * Mask fields, including length, are often set to zero. Assume that + * means "same as spec" to avoid breaking existing apps. If length + * is not zero, then it should be >= spec length. + * + * No more pattern follows this, so append to the L4 layer instead of + * L5 to work with both recent and older VICs. + */ + if (mask->length != 0 && mask->length < spec->length) + return EINVAL; + memcpy(gp->layer[FILTER_GENERIC_1_L4].mask + sizeof(struct rte_udp_hdr), + mask->pattern, spec->length); + memcpy(gp->layer[FILTER_GENERIC_1_L4].val + sizeof(struct rte_udp_hdr), + spec->pattern, spec->length); + + return 0; +} + +/** + * Return 1 if current item is valid on top of the previous one. + * + * @param prev_item[in] + * The item before this one in the pattern or RTE_FLOW_ITEM_TYPE_END if this + * is the first item. + * @param item_info[in] + * Info about this item, like valid previous items. + * @param is_first[in] + * True if this the first item in the pattern. + */ +static int +item_stacking_valid(enum rte_flow_item_type prev_item, + const struct enic_items *item_info, uint8_t is_first_item) +{ + enum rte_flow_item_type const *allowed_items = item_info->prev_items; + + ENICPMD_FUNC_TRACE(); + + for (; *allowed_items != RTE_FLOW_ITEM_TYPE_END; allowed_items++) { + if (prev_item == *allowed_items) + return 1; + } + + /* This is the first item in the stack. Check if that's cool */ + if (is_first_item && item_info->valid_start_item) + return 1; + + return 0; +} + +/* + * Fix up the L5 layer.. HW vxlan parsing removes vxlan header from L5. + * Instead it is in L4 following the UDP header. Append the vxlan + * pattern to L4 (udp) and shift any inner packet pattern in L5. + */ +static void +fixup_l5_layer(struct enic *enic, struct filter_generic_1 *gp, + uint8_t inner_ofst) +{ + uint8_t layer[FILTER_GENERIC_1_KEY_LEN]; + uint8_t inner; + uint8_t vxlan; + + if (!(inner_ofst > 0 && enic->vxlan)) + return; + ENICPMD_FUNC_TRACE(); + vxlan = sizeof(struct rte_vxlan_hdr); + memcpy(gp->layer[FILTER_GENERIC_1_L4].mask + sizeof(struct rte_udp_hdr), + gp->layer[FILTER_GENERIC_1_L5].mask, vxlan); + memcpy(gp->layer[FILTER_GENERIC_1_L4].val + sizeof(struct rte_udp_hdr), + gp->layer[FILTER_GENERIC_1_L5].val, vxlan); + inner = inner_ofst - vxlan; + memset(layer, 0, sizeof(layer)); + memcpy(layer, gp->layer[FILTER_GENERIC_1_L5].mask + vxlan, inner); + memcpy(gp->layer[FILTER_GENERIC_1_L5].mask, layer, sizeof(layer)); + memset(layer, 0, sizeof(layer)); + memcpy(layer, gp->layer[FILTER_GENERIC_1_L5].val + vxlan, inner); + memcpy(gp->layer[FILTER_GENERIC_1_L5].val, layer, sizeof(layer)); +} + +/** + * Build the intenal enic filter structure from the provided pattern. The + * pattern is validated as the items are copied. + * + * @param pattern[in] + * @param items_info[in] + * Info about this NICs item support, like valid previous items. + * @param enic_filter[out] + * NIC specfilc filters derived from the pattern. + * @param error[out] + */ +static int +enic_copy_filter(const struct rte_flow_item pattern[], + const struct enic_filter_cap *cap, + struct enic *enic, + struct filter_v2 *enic_filter, + struct rte_flow_error *error) +{ + int ret; + const struct rte_flow_item *item = pattern; + uint8_t inner_ofst = 0; /* If encapsulated, ofst into L5 */ + enum rte_flow_item_type prev_item; + const struct enic_items *item_info; + struct copy_item_args args; + enic_copy_item_fn *copy_fn; + uint8_t is_first_item = 1; + + ENICPMD_FUNC_TRACE(); + + prev_item = 0; + + args.filter = enic_filter; + args.inner_ofst = &inner_ofst; + args.enic = enic; + for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { + /* Get info about how to validate and copy the item. If NULL + * is returned the nic does not support the item. + */ + if (item->type == RTE_FLOW_ITEM_TYPE_VOID) + continue; + + item_info = &cap->item_info[item->type]; + if (item->type > cap->max_item_type || + item_info->copy_item == NULL || + (inner_ofst > 0 && item_info->inner_copy_item == NULL)) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "Unsupported item."); + return -rte_errno; + } + + /* check to see if item stacking is valid */ + if (!item_stacking_valid(prev_item, item_info, is_first_item)) + goto stacking_error; + + args.item = item; + copy_fn = inner_ofst > 0 ? item_info->inner_copy_item : + item_info->copy_item; + ret = copy_fn(&args); + if (ret) + goto item_not_supported; + prev_item = item->type; + is_first_item = 0; + } + fixup_l5_layer(enic, &enic_filter->u.generic_1, inner_ofst); + + return 0; + +item_not_supported: + rte_flow_error_set(error, ret, RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "enic type error"); + return -rte_errno; + +stacking_error: + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, + item, "stacking error"); + return -rte_errno; +} + +/** + * Build the intenal version 1 NIC action structure from the provided pattern. + * The pattern is validated as the items are copied. + * + * @param actions[in] + * @param enic_action[out] + * NIC specfilc actions derived from the actions. + * @param error[out] + */ +static int +enic_copy_action_v1(__rte_unused struct enic *enic, + const struct rte_flow_action actions[], + struct filter_action_v2 *enic_action) +{ + enum { FATE = 1, }; + uint32_t overlap = 0; + + ENICPMD_FUNC_TRACE(); + + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { + if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) + continue; + + switch (actions->type) { + case RTE_FLOW_ACTION_TYPE_QUEUE: { + const struct rte_flow_action_queue *queue = + (const struct rte_flow_action_queue *) + actions->conf; + + if (overlap & FATE) + return ENOTSUP; + overlap |= FATE; + enic_action->rq_idx = + enic_rte_rq_idx_to_sop_idx(queue->index); + break; + } + default: + RTE_ASSERT(0); + break; + } + } + if (!(overlap & FATE)) + return ENOTSUP; + enic_action->type = FILTER_ACTION_RQ_STEERING; + return 0; +} + +/** + * Build the intenal version 2 NIC action structure from the provided pattern. + * The pattern is validated as the items are copied. + * + * @param actions[in] + * @param enic_action[out] + * NIC specfilc actions derived from the actions. + * @param error[out] + */ +static int +enic_copy_action_v2(struct enic *enic, + const struct rte_flow_action actions[], + struct filter_action_v2 *enic_action) +{ + enum { FATE = 1, MARK = 2, }; + uint32_t overlap = 0; + bool passthru = false; + + ENICPMD_FUNC_TRACE(); + + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { + switch (actions->type) { + case RTE_FLOW_ACTION_TYPE_QUEUE: { + const struct rte_flow_action_queue *queue = + (const struct rte_flow_action_queue *) + actions->conf; + + if (overlap & FATE) + return ENOTSUP; + overlap |= FATE; + enic_action->rq_idx = + enic_rte_rq_idx_to_sop_idx(queue->index); + enic_action->flags |= FILTER_ACTION_RQ_STEERING_FLAG; + break; + } + case RTE_FLOW_ACTION_TYPE_MARK: { + const struct rte_flow_action_mark *mark = + (const struct rte_flow_action_mark *) + actions->conf; + + if (overlap & MARK) + return ENOTSUP; + overlap |= MARK; + /* + * Map mark ID (32-bit) to filter ID (16-bit): + * - Reject values > 16 bits + * - Filter ID 0 is reserved for filters that steer + * but not mark. So add 1 to the mark ID to avoid + * using 0. + * - Filter ID (ENIC_MAGIC_FILTER_ID = 0xffff) is + * reserved for the "flag" action below. + */ + if (mark->id >= ENIC_MAGIC_FILTER_ID - 1) + return EINVAL; + enic_action->filter_id = mark->id + 1; + enic_action->flags |= FILTER_ACTION_FILTER_ID_FLAG; + break; + } + case RTE_FLOW_ACTION_TYPE_FLAG: { + if (overlap & MARK) + return ENOTSUP; + overlap |= MARK; + /* ENIC_MAGIC_FILTER_ID is reserved for flagging */ + enic_action->filter_id = ENIC_MAGIC_FILTER_ID; + enic_action->flags |= FILTER_ACTION_FILTER_ID_FLAG; + break; + } + case RTE_FLOW_ACTION_TYPE_DROP: { + if (overlap & FATE) + return ENOTSUP; + overlap |= FATE; + enic_action->flags |= FILTER_ACTION_DROP_FLAG; + break; + } + case RTE_FLOW_ACTION_TYPE_RSS: { + const struct rte_flow_action_rss *rss = + (const struct rte_flow_action_rss *) + actions->conf; + bool allow; + uint16_t i; + + /* + * Hardware does not support general RSS actions, but + * we can still support the dummy one that is used to + * "receive normally". + */ + allow = rss->func == RTE_ETH_HASH_FUNCTION_DEFAULT && + rss->level == 0 && + (rss->types == 0 || + rss->types == enic->rss_hf) && + rss->queue_num == enic->rq_count && + rss->key_len == 0; + /* Identity queue map is ok */ + for (i = 0; i < rss->queue_num; i++) + allow = allow && (i == rss->queue[i]); + if (!allow) + return ENOTSUP; + if (overlap & FATE) + return ENOTSUP; + /* Need MARK or FLAG */ + if (!(overlap & MARK)) + return ENOTSUP; + overlap |= FATE; + break; + } + case RTE_FLOW_ACTION_TYPE_PASSTHRU: { + /* + * Like RSS above, PASSTHRU + MARK may be used to + * "mark and then receive normally". MARK usually comes + * after PASSTHRU, so remember we have seen passthru + * and check for mark later. + */ + if (overlap & FATE) + return ENOTSUP; + overlap |= FATE; + passthru = true; + break; + } + case RTE_FLOW_ACTION_TYPE_VOID: + continue; + default: + RTE_ASSERT(0); + break; + } + } + /* Only PASSTHRU + MARK is allowed */ + if (passthru && !(overlap & MARK)) + return ENOTSUP; + if (!(overlap & FATE)) + return ENOTSUP; + enic_action->type = FILTER_ACTION_V2; + return 0; +} + +/** Check if the action is supported */ +static int +enic_match_action(const struct rte_flow_action *action, + const enum rte_flow_action_type *supported_actions) +{ + for (; *supported_actions != RTE_FLOW_ACTION_TYPE_END; + supported_actions++) { + if (action->type == *supported_actions) + return 1; + } + return 0; +} + +/** Get the NIC filter capabilties structure */ +static const struct enic_filter_cap * +enic_get_filter_cap(struct enic *enic) +{ + if (enic->flow_filter_mode) + return &enic_filter_cap[enic->flow_filter_mode]; + + return NULL; +} + +/** Get the actions for this NIC version. */ +static const struct enic_action_cap * +enic_get_action_cap(struct enic *enic) +{ + const struct enic_action_cap *ea; + uint8_t actions; + + actions = enic->filter_actions; + if (actions & FILTER_ACTION_DROP_FLAG) + ea = &enic_action_cap[FILTER_ACTION_DROP_FLAG]; + else if (actions & FILTER_ACTION_FILTER_ID_FLAG) + ea = &enic_action_cap[FILTER_ACTION_FILTER_ID_FLAG]; + else + ea = &enic_action_cap[FILTER_ACTION_RQ_STEERING_FLAG]; + return ea; +} + +/* Debug function to dump internal NIC action structure. */ +static void +enic_dump_actions(const struct filter_action_v2 *ea) +{ + if (ea->type == FILTER_ACTION_RQ_STEERING) { + ENICPMD_LOG(INFO, "Action(V1), queue: %u\n", ea->rq_idx); + } else if (ea->type == FILTER_ACTION_V2) { + ENICPMD_LOG(INFO, "Actions(V2)\n"); + if (ea->flags & FILTER_ACTION_RQ_STEERING_FLAG) + ENICPMD_LOG(INFO, "\tqueue: %u\n", + enic_sop_rq_idx_to_rte_idx(ea->rq_idx)); + if (ea->flags & FILTER_ACTION_FILTER_ID_FLAG) + ENICPMD_LOG(INFO, "\tfilter_id: %u\n", ea->filter_id); + } +} + +/* Debug function to dump internal NIC filter structure. */ +static void +enic_dump_filter(const struct filter_v2 *filt) +{ + const struct filter_generic_1 *gp; + int i, j, mbyte; + char buf[128], *bp; + char ip4[16], ip6[16], udp[16], tcp[16], tcpudp[16], ip4csum[16]; + char l4csum[16], ipfrag[16]; + + switch (filt->type) { + case FILTER_IPV4_5TUPLE: + ENICPMD_LOG(INFO, "FILTER_IPV4_5TUPLE\n"); + break; + case FILTER_USNIC_IP: + case FILTER_DPDK_1: + /* FIXME: this should be a loop */ + gp = &filt->u.generic_1; + ENICPMD_LOG(INFO, "Filter: vlan: 0x%04x, mask: 0x%04x\n", + gp->val_vlan, gp->mask_vlan); + + if (gp->mask_flags & FILTER_GENERIC_1_IPV4) + sprintf(ip4, "%s ", + (gp->val_flags & FILTER_GENERIC_1_IPV4) + ? "ip4(y)" : "ip4(n)"); + else + sprintf(ip4, "%s ", "ip4(x)"); + + if (gp->mask_flags & FILTER_GENERIC_1_IPV6) + sprintf(ip6, "%s ", + (gp->val_flags & FILTER_GENERIC_1_IPV4) + ? "ip6(y)" : "ip6(n)"); + else + sprintf(ip6, "%s ", "ip6(x)"); + + if (gp->mask_flags & FILTER_GENERIC_1_UDP) + sprintf(udp, "%s ", + (gp->val_flags & FILTER_GENERIC_1_UDP) + ? "udp(y)" : "udp(n)"); + else + sprintf(udp, "%s ", "udp(x)"); + + if (gp->mask_flags & FILTER_GENERIC_1_TCP) + sprintf(tcp, "%s ", + (gp->val_flags & FILTER_GENERIC_1_TCP) + ? "tcp(y)" : "tcp(n)"); + else + sprintf(tcp, "%s ", "tcp(x)"); + + if (gp->mask_flags & FILTER_GENERIC_1_TCP_OR_UDP) + sprintf(tcpudp, "%s ", + (gp->val_flags & FILTER_GENERIC_1_TCP_OR_UDP) + ? "tcpudp(y)" : "tcpudp(n)"); + else + sprintf(tcpudp, "%s ", "tcpudp(x)"); + + if (gp->mask_flags & FILTER_GENERIC_1_IP4SUM_OK) + sprintf(ip4csum, "%s ", + (gp->val_flags & FILTER_GENERIC_1_IP4SUM_OK) + ? "ip4csum(y)" : "ip4csum(n)"); + else + sprintf(ip4csum, "%s ", "ip4csum(x)"); + + if (gp->mask_flags & FILTER_GENERIC_1_L4SUM_OK) + sprintf(l4csum, "%s ", + (gp->val_flags & FILTER_GENERIC_1_L4SUM_OK) + ? "l4csum(y)" : "l4csum(n)"); + else + sprintf(l4csum, "%s ", "l4csum(x)"); + + if (gp->mask_flags & FILTER_GENERIC_1_IPFRAG) + sprintf(ipfrag, "%s ", + (gp->val_flags & FILTER_GENERIC_1_IPFRAG) + ? "ipfrag(y)" : "ipfrag(n)"); + else + sprintf(ipfrag, "%s ", "ipfrag(x)"); + ENICPMD_LOG(INFO, "\tFlags: %s%s%s%s%s%s%s%s\n", ip4, ip6, udp, + tcp, tcpudp, ip4csum, l4csum, ipfrag); + + for (i = 0; i < FILTER_GENERIC_1_NUM_LAYERS; i++) { + mbyte = FILTER_GENERIC_1_KEY_LEN - 1; + while (mbyte && !gp->layer[i].mask[mbyte]) + mbyte--; + if (mbyte == 0) + continue; + + bp = buf; + for (j = 0; j <= mbyte; j++) { + sprintf(bp, "%02x", + gp->layer[i].mask[j]); + bp += 2; + } + *bp = '\0'; + ENICPMD_LOG(INFO, "\tL%u mask: %s\n", i + 2, buf); + bp = buf; + for (j = 0; j <= mbyte; j++) { + sprintf(bp, "%02x", + gp->layer[i].val[j]); + bp += 2; + } + *bp = '\0'; + ENICPMD_LOG(INFO, "\tL%u val: %s\n", i + 2, buf); + } + break; + default: + ENICPMD_LOG(INFO, "FILTER UNKNOWN\n"); + break; + } +} + +/* Debug function to dump internal NIC flow structures. */ +static void +enic_dump_flow(const struct filter_action_v2 *ea, const struct filter_v2 *filt) +{ + enic_dump_filter(filt); + enic_dump_actions(ea); +} + + +/** + * Internal flow parse/validate function. + * + * @param dev[in] + * This device pointer. + * @param pattern[in] + * @param actions[in] + * @param error[out] + * @param enic_filter[out] + * Internal NIC filter structure pointer. + * @param enic_action[out] + * Internal NIC action structure pointer. + */ +static int +enic_flow_parse(struct rte_eth_dev *dev, + const struct rte_flow_attr *attrs, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error, + struct filter_v2 *enic_filter, + struct filter_action_v2 *enic_action) +{ + unsigned int ret = 0; + struct enic *enic = pmd_priv(dev); + const struct enic_filter_cap *enic_filter_cap; + const struct enic_action_cap *enic_action_cap; + const struct rte_flow_action *action; + + ENICPMD_FUNC_TRACE(); + + memset(enic_filter, 0, sizeof(*enic_filter)); + memset(enic_action, 0, sizeof(*enic_action)); + + if (!pattern) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "No pattern specified"); + return -rte_errno; + } + + if (!actions) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "No action specified"); + return -rte_errno; + } + + if (attrs) { + if (attrs->group) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_GROUP, + NULL, + "priority groups are not supported"); + return -rte_errno; + } else if (attrs->priority) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, + NULL, + "priorities are not supported"); + return -rte_errno; + } else if (attrs->egress) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + NULL, + "egress is not supported"); + return -rte_errno; + } else if (attrs->transfer) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, + NULL, + "transfer is not supported"); + return -rte_errno; + } else if (!attrs->ingress) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + NULL, + "only ingress is supported"); + return -rte_errno; + } + + } else { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR, + NULL, "No attribute specified"); + return -rte_errno; + } + + /* Verify Actions. */ + enic_action_cap = enic_get_action_cap(enic); + for (action = &actions[0]; action->type != RTE_FLOW_ACTION_TYPE_END; + action++) { + if (action->type == RTE_FLOW_ACTION_TYPE_VOID) + continue; + else if (!enic_match_action(action, enic_action_cap->actions)) + break; + } + if (action->type != RTE_FLOW_ACTION_TYPE_END) { + rte_flow_error_set(error, EPERM, RTE_FLOW_ERROR_TYPE_ACTION, + action, "Invalid action."); + return -rte_errno; + } + ret = enic_action_cap->copy_fn(enic, actions, enic_action); + if (ret) { + rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "Unsupported action."); + return -rte_errno; + } + + /* Verify Flow items. If copying the filter from flow format to enic + * format fails, the flow is not supported + */ + enic_filter_cap = enic_get_filter_cap(enic); + if (enic_filter_cap == NULL) { + rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "Flow API not available"); + return -rte_errno; + } + enic_filter->type = enic->flow_filter_mode; + ret = enic_copy_filter(pattern, enic_filter_cap, enic, + enic_filter, error); + return ret; +} + +/** + * Push filter/action to the NIC. + * + * @param enic[in] + * Device structure pointer. + * @param enic_filter[in] + * Internal NIC filter structure pointer. + * @param enic_action[in] + * Internal NIC action structure pointer. + * @param error[out] + */ +static struct rte_flow * +enic_flow_add_filter(struct enic *enic, struct filter_v2 *enic_filter, + struct filter_action_v2 *enic_action, + struct rte_flow_error *error) +{ + struct rte_flow *flow; + int err; + uint16_t entry; + + ENICPMD_FUNC_TRACE(); + + flow = rte_calloc(__func__, 1, sizeof(*flow), 0); + if (!flow) { + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "cannot allocate flow memory"); + return NULL; + } + + /* entry[in] is the queue id, entry[out] is the filter Id for delete */ + entry = enic_action->rq_idx; + err = vnic_dev_classifier(enic->vdev, CLSF_ADD, &entry, enic_filter, + enic_action); + if (err) { + rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "vnic_dev_classifier error"); + rte_free(flow); + return NULL; + } + + flow->enic_filter_id = entry; + flow->enic_filter = *enic_filter; + return flow; +} + +/** + * Remove filter/action from the NIC. + * + * @param enic[in] + * Device structure pointer. + * @param filter_id[in] + * Id of NIC filter. + * @param enic_action[in] + * Internal NIC action structure pointer. + * @param error[out] + */ +static int +enic_flow_del_filter(struct enic *enic, struct rte_flow *flow, + struct rte_flow_error *error) +{ + uint16_t filter_id; + int err; + + ENICPMD_FUNC_TRACE(); + + filter_id = flow->enic_filter_id; + err = vnic_dev_classifier(enic->vdev, CLSF_DEL, &filter_id, NULL, NULL); + if (err) { + rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "vnic_dev_classifier failed"); + return -err; + } + return 0; +} + +/* + * The following functions are callbacks for Generic flow API. + */ + +/** + * Validate a flow supported by the NIC. + * + * @see rte_flow_validate() + * @see rte_flow_ops + */ +static int +enic_flow_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attrs, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct filter_v2 enic_filter; + struct filter_action_v2 enic_action; + int ret; + + ENICPMD_FUNC_TRACE(); + + ret = enic_flow_parse(dev, attrs, pattern, actions, error, + &enic_filter, &enic_action); + if (!ret) + enic_dump_flow(&enic_action, &enic_filter); + return ret; +} + +/** + * Create a flow supported by the NIC. + * + * @see rte_flow_create() + * @see rte_flow_ops + */ +static struct rte_flow * +enic_flow_create(struct rte_eth_dev *dev, + const struct rte_flow_attr *attrs, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + int ret; + struct filter_v2 enic_filter; + struct filter_action_v2 enic_action; + struct rte_flow *flow; + struct enic *enic = pmd_priv(dev); + + ENICPMD_FUNC_TRACE(); + + ret = enic_flow_parse(dev, attrs, pattern, actions, error, &enic_filter, + &enic_action); + if (ret < 0) + return NULL; + + flow = enic_flow_add_filter(enic, &enic_filter, &enic_action, + error); + if (flow) + LIST_INSERT_HEAD(&enic->flows, flow, next); + + return flow; +} + +/** + * Destroy a flow supported by the NIC. + * + * @see rte_flow_destroy() + * @see rte_flow_ops + */ +static int +enic_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow, + __rte_unused struct rte_flow_error *error) +{ + struct enic *enic = pmd_priv(dev); + + ENICPMD_FUNC_TRACE(); + + enic_flow_del_filter(enic, flow, error); + LIST_REMOVE(flow, next); + rte_free(flow); + return 0; +} + +/** + * Flush all flows on the device. + * + * @see rte_flow_flush() + * @see rte_flow_ops + */ +static int +enic_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error) +{ + struct rte_flow *flow; + struct enic *enic = pmd_priv(dev); + + ENICPMD_FUNC_TRACE(); + + + while (!LIST_EMPTY(&enic->flows)) { + flow = LIST_FIRST(&enic->flows); + enic_flow_del_filter(enic, flow, error); + LIST_REMOVE(flow, next); + rte_free(flow); + } + return 0; +} + +/** + * Flow callback registration. + * + * @see rte_flow_ops + */ +const struct rte_flow_ops enic_flow_ops = { + .validate = enic_flow_validate, + .create = enic_flow_create, + .destroy = enic_flow_destroy, + .flush = enic_flow_flush, +}; diff --git a/src/spdk/dpdk/drivers/net/enic/enic_fm_flow.c b/src/spdk/dpdk/drivers/net/enic/enic_fm_flow.c new file mode 100644 index 000000000..6ee022437 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/enic_fm_flow.c @@ -0,0 +1,2463 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2019 Cisco Systems, Inc. All rights reserved. + */ + +#include <errno.h> +#include <stdint.h> +#include <rte_log.h> +#include <rte_ethdev_driver.h> +#include <rte_flow_driver.h> +#include <rte_ether.h> +#include <rte_ip.h> +#include <rte_udp.h> +#include <rte_memzone.h> + +#include "enic_compat.h" +#include "enic.h" +#include "vnic_dev.h" +#include "vnic_nic.h" + +#define IP_DEFTTL 64 /* from RFC 1340. */ +#define IP6_VTC_FLOW 0x60000000 + +/* Highest Item type supported by Flowman */ +#define FM_MAX_ITEM_TYPE RTE_FLOW_ITEM_TYPE_VXLAN + +/* Up to 1024 TCAM entries */ +#define FM_MAX_TCAM_TABLE_SIZE 1024 + +/* Up to 4096 entries per exact match table */ +#define FM_MAX_EXACT_TABLE_SIZE 4096 + +/* Number of counters to increase on for each increment */ +#define FM_COUNTERS_EXPAND 100 + +#define FM_INVALID_HANDLE 0 + +/* + * Flow exact match tables (FET) in the VIC and rte_flow groups. + * Use a simple scheme to map groups to tables. + * Group 0 uses the single TCAM tables, one for each direction. + * Group 1, 2, ... uses its own exact match table. + * + * The TCAM tables are allocated upfront during init. + * + * Exact match tables are allocated on demand. 3 paths that lead allocations. + * + * 1. Add a flow that jumps from group 0 to group N. + * + * If N does not exist, we allocate an exact match table for it, using + * a dummy key. A key is required for the table. + * + * 2. Add a flow that uses group N. + * + * If N does not exist, we allocate an exact match table for it, using + * the flow's key. Subsequent flows to the same group all should have + * the same key. + * + * Without a jump flow to N, N is not reachable in hardware. No packets + * reach N and match. + * + * 3. Add a flow to an empty group N. + * + * N has been created via (1) and the dummy key. We free that table, allocate + * a new table using the new flow's key. Also re-do the existing jump flow to + * point to the new table. + */ +#define FM_TCAM_RTE_GROUP 0 + +struct enic_fm_fet { + TAILQ_ENTRY(enic_fm_fet) list; + uint32_t group; /* rte_flow group ID */ + uint64_t handle; /* Exact match table handle from flowman */ + uint8_t ingress; + uint8_t default_key; + int ref; /* Reference count via get/put */ + struct fm_key_template key; /* Key associated with the table */ +}; + +struct enic_fm_counter { + SLIST_ENTRY(enic_fm_counter) next; + uint32_t handle; +}; + +/* rte_flow.fm */ +struct enic_fm_flow { + bool counter_valid; + uint64_t entry_handle; + uint64_t action_handle; + struct enic_fm_counter *counter; + struct enic_fm_fet *fet; +}; + +struct enic_fm_jump_flow { + TAILQ_ENTRY(enic_fm_jump_flow) list; + struct rte_flow *flow; + uint32_t group; + struct fm_tcam_match_entry match; + struct fm_action action; +}; + +/* + * Flowman uses host memory for commands. This structure is allocated + * in DMA-able memory. + */ +union enic_flowman_cmd_mem { + struct fm_tcam_match_table fm_tcam_match_table; + struct fm_exact_match_table fm_exact_match_table; + struct fm_tcam_match_entry fm_tcam_match_entry; + struct fm_exact_match_entry fm_exact_match_entry; + struct fm_action fm_action; +}; + +struct enic_flowman { + struct enic *enic; + /* Command buffer */ + struct { + union enic_flowman_cmd_mem *va; + dma_addr_t pa; + } cmd; + /* TCAM tables allocated upfront, used for group 0 */ + uint64_t ig_tcam_hndl; + uint64_t eg_tcam_hndl; + /* Counters */ + SLIST_HEAD(enic_free_counters, enic_fm_counter) counters; + void *counter_stack; + uint32_t counters_alloced; + /* Exact match tables for groups != 0, dynamically allocated */ + TAILQ_HEAD(fet_list, enic_fm_fet) fet_list; + /* + * Default exact match tables used for jump actions to + * non-existent groups. + */ + struct enic_fm_fet *default_eg_fet; + struct enic_fm_fet *default_ig_fet; + /* Flows that jump to the default table above */ + TAILQ_HEAD(jump_flow_list, enic_fm_jump_flow) jump_list; + /* + * Scratch data used during each invocation of flow_create + * and flow_validate. + */ + struct enic_fm_fet *fet; + struct fm_tcam_match_entry tcam_entry; + struct fm_action action; + struct fm_action action_tmp; /* enic_fm_reorder_action_op */ + int action_op_count; +}; + +static int enic_fm_tbl_free(struct enic_flowman *fm, uint64_t handle); + +/* + * Common arguments passed to copy_item functions. Use this structure + * so we can easily add new arguments. + * item: Item specification. + * fm_tcam_entry: Flowman TCAM match entry. + * header_level: 0 for outer header, 1 for inner header. + */ +struct copy_item_args { + const struct rte_flow_item *item; + struct fm_tcam_match_entry *fm_tcam_entry; + uint8_t header_level; +}; + +/* functions for copying items into flowman match */ +typedef int (enic_copy_item_fn)(struct copy_item_args *arg); + +/* Info about how to copy items into flowman match */ +struct enic_fm_items { + /* Function for copying and validating an item. */ + enic_copy_item_fn * const copy_item; + /* List of valid previous items. */ + const enum rte_flow_item_type * const prev_items; + /* + * True if it's OK for this item to be the first item. For some NIC + * versions, it's invalid to start the stack above layer 3. + */ + const uint8_t valid_start_item; +}; + +static enic_copy_item_fn enic_fm_copy_item_eth; +static enic_copy_item_fn enic_fm_copy_item_ipv4; +static enic_copy_item_fn enic_fm_copy_item_ipv6; +static enic_copy_item_fn enic_fm_copy_item_raw; +static enic_copy_item_fn enic_fm_copy_item_sctp; +static enic_copy_item_fn enic_fm_copy_item_tcp; +static enic_copy_item_fn enic_fm_copy_item_udp; +static enic_copy_item_fn enic_fm_copy_item_vlan; +static enic_copy_item_fn enic_fm_copy_item_vxlan; + +/* Ingress actions */ +static const enum rte_flow_action_type enic_fm_supported_ig_actions[] = { + RTE_FLOW_ACTION_TYPE_COUNT, + RTE_FLOW_ACTION_TYPE_DROP, + RTE_FLOW_ACTION_TYPE_FLAG, + RTE_FLOW_ACTION_TYPE_JUMP, + RTE_FLOW_ACTION_TYPE_MARK, + RTE_FLOW_ACTION_TYPE_PORT_ID, + RTE_FLOW_ACTION_TYPE_PASSTHRU, + RTE_FLOW_ACTION_TYPE_QUEUE, + RTE_FLOW_ACTION_TYPE_RSS, + RTE_FLOW_ACTION_TYPE_VOID, + RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP, + RTE_FLOW_ACTION_TYPE_VXLAN_DECAP, + RTE_FLOW_ACTION_TYPE_END, /* END must be the last entry */ +}; + +/* Egress actions */ +static const enum rte_flow_action_type enic_fm_supported_eg_actions[] = { + RTE_FLOW_ACTION_TYPE_COUNT, + RTE_FLOW_ACTION_TYPE_DROP, + RTE_FLOW_ACTION_TYPE_JUMP, + RTE_FLOW_ACTION_TYPE_PASSTHRU, + RTE_FLOW_ACTION_TYPE_VOID, + RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP, + RTE_FLOW_ACTION_TYPE_END, +}; + +static const struct enic_fm_items enic_fm_items[] = { + [RTE_FLOW_ITEM_TYPE_RAW] = { + .copy_item = enic_fm_copy_item_raw, + .valid_start_item = 0, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_END, + }, + }, + [RTE_FLOW_ITEM_TYPE_ETH] = { + .copy_item = enic_fm_copy_item_eth, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_END, + }, + }, + [RTE_FLOW_ITEM_TYPE_VLAN] = { + .copy_item = enic_fm_copy_item_vlan, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_END, + }, + }, + [RTE_FLOW_ITEM_TYPE_IPV4] = { + .copy_item = enic_fm_copy_item_ipv4, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_VLAN, + RTE_FLOW_ITEM_TYPE_END, + }, + }, + [RTE_FLOW_ITEM_TYPE_IPV6] = { + .copy_item = enic_fm_copy_item_ipv6, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_VLAN, + RTE_FLOW_ITEM_TYPE_END, + }, + }, + [RTE_FLOW_ITEM_TYPE_UDP] = { + .copy_item = enic_fm_copy_item_udp, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_END, + }, + }, + [RTE_FLOW_ITEM_TYPE_TCP] = { + .copy_item = enic_fm_copy_item_tcp, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_END, + }, + }, + [RTE_FLOW_ITEM_TYPE_SCTP] = { + .copy_item = enic_fm_copy_item_sctp, + .valid_start_item = 0, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_END, + }, + }, + [RTE_FLOW_ITEM_TYPE_VXLAN] = { + .copy_item = enic_fm_copy_item_vxlan, + .valid_start_item = 1, + .prev_items = (const enum rte_flow_item_type[]) { + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_END, + }, + }, +}; + +static int +enic_fm_copy_item_eth(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + const struct rte_flow_item_eth *spec = item->spec; + const struct rte_flow_item_eth *mask = item->mask; + const uint8_t lvl = arg->header_level; + struct fm_tcam_match_entry *entry = arg->fm_tcam_entry; + struct fm_header_set *fm_data, *fm_mask; + + ENICPMD_FUNC_TRACE(); + /* Match all if no spec */ + if (!spec) + return 0; + if (!mask) + mask = &rte_flow_item_eth_mask; + fm_data = &entry->ftm_data.fk_hdrset[lvl]; + fm_mask = &entry->ftm_mask.fk_hdrset[lvl]; + fm_data->fk_header_select |= FKH_ETHER; + fm_mask->fk_header_select |= FKH_ETHER; + memcpy(&fm_data->l2.eth, spec, sizeof(*spec)); + memcpy(&fm_mask->l2.eth, mask, sizeof(*mask)); + return 0; +} + +static int +enic_fm_copy_item_vlan(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + const struct rte_flow_item_vlan *spec = item->spec; + const struct rte_flow_item_vlan *mask = item->mask; + const uint8_t lvl = arg->header_level; + struct fm_tcam_match_entry *entry = arg->fm_tcam_entry; + struct fm_header_set *fm_data, *fm_mask; + struct rte_ether_hdr *eth_mask; + struct rte_ether_hdr *eth_val; + uint32_t meta; + + ENICPMD_FUNC_TRACE(); + fm_data = &entry->ftm_data.fk_hdrset[lvl]; + fm_mask = &entry->ftm_mask.fk_hdrset[lvl]; + /* Outer and inner packet vlans need different flags */ + meta = FKM_VLAN_PRES; + if (lvl > 0) + meta = FKM_QTAG; + fm_data->fk_metadata |= meta; + fm_mask->fk_metadata |= meta; + + /* Match all if no spec */ + if (!spec) + return 0; + if (!mask) + mask = &rte_flow_item_vlan_mask; + + eth_mask = (void *)&fm_mask->l2.eth; + eth_val = (void *)&fm_data->l2.eth; + + /* Outer TPID cannot be matched */ + if (eth_mask->ether_type) + return -ENOTSUP; + + /* + * When packet matching, the VIC always compares vlan-stripped + * L2, regardless of vlan stripping settings. So, the inner type + * from vlan becomes the ether type of the eth header. + */ + eth_mask->ether_type = mask->inner_type; + eth_val->ether_type = spec->inner_type; + fm_data->fk_header_select |= FKH_ETHER | FKH_QTAG; + fm_mask->fk_header_select |= FKH_ETHER | FKH_QTAG; + fm_data->fk_vlan = rte_be_to_cpu_16(spec->tci); + fm_mask->fk_vlan = rte_be_to_cpu_16(mask->tci); + return 0; +} + +static int +enic_fm_copy_item_ipv4(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + const struct rte_flow_item_ipv4 *spec = item->spec; + const struct rte_flow_item_ipv4 *mask = item->mask; + const uint8_t lvl = arg->header_level; + struct fm_tcam_match_entry *entry = arg->fm_tcam_entry; + struct fm_header_set *fm_data, *fm_mask; + + ENICPMD_FUNC_TRACE(); + fm_data = &entry->ftm_data.fk_hdrset[lvl]; + fm_mask = &entry->ftm_mask.fk_hdrset[lvl]; + fm_data->fk_metadata |= FKM_IPV4; + fm_mask->fk_metadata |= FKM_IPV4; + + if (!spec) + return 0; + if (!mask) + mask = &rte_flow_item_ipv4_mask; + + fm_data->fk_header_select |= FKH_IPV4; + fm_mask->fk_header_select |= FKH_IPV4; + memcpy(&fm_data->l3.ip4, spec, sizeof(*spec)); + memcpy(&fm_mask->l3.ip4, mask, sizeof(*mask)); + return 0; +} + +static int +enic_fm_copy_item_ipv6(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + const struct rte_flow_item_ipv6 *spec = item->spec; + const struct rte_flow_item_ipv6 *mask = item->mask; + const uint8_t lvl = arg->header_level; + struct fm_tcam_match_entry *entry = arg->fm_tcam_entry; + struct fm_header_set *fm_data, *fm_mask; + + ENICPMD_FUNC_TRACE(); + fm_data = &entry->ftm_data.fk_hdrset[lvl]; + fm_mask = &entry->ftm_mask.fk_hdrset[lvl]; + fm_data->fk_metadata |= FKM_IPV6; + fm_mask->fk_metadata |= FKM_IPV6; + + if (!spec) + return 0; + if (!mask) + mask = &rte_flow_item_ipv6_mask; + + fm_data->fk_header_select |= FKH_IPV6; + fm_mask->fk_header_select |= FKH_IPV6; + memcpy(&fm_data->l3.ip6, spec, sizeof(*spec)); + memcpy(&fm_mask->l3.ip6, mask, sizeof(*mask)); + return 0; +} + +static int +enic_fm_copy_item_udp(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + const struct rte_flow_item_udp *spec = item->spec; + const struct rte_flow_item_udp *mask = item->mask; + const uint8_t lvl = arg->header_level; + struct fm_tcam_match_entry *entry = arg->fm_tcam_entry; + struct fm_header_set *fm_data, *fm_mask; + + ENICPMD_FUNC_TRACE(); + fm_data = &entry->ftm_data.fk_hdrset[lvl]; + fm_mask = &entry->ftm_mask.fk_hdrset[lvl]; + fm_data->fk_metadata |= FKM_UDP; + fm_mask->fk_metadata |= FKM_UDP; + + if (!spec) + return 0; + if (!mask) + mask = &rte_flow_item_udp_mask; + + fm_data->fk_header_select |= FKH_UDP; + fm_mask->fk_header_select |= FKH_UDP; + memcpy(&fm_data->l4.udp, spec, sizeof(*spec)); + memcpy(&fm_mask->l4.udp, mask, sizeof(*mask)); + return 0; +} + +static int +enic_fm_copy_item_tcp(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + const struct rte_flow_item_tcp *spec = item->spec; + const struct rte_flow_item_tcp *mask = item->mask; + const uint8_t lvl = arg->header_level; + struct fm_tcam_match_entry *entry = arg->fm_tcam_entry; + struct fm_header_set *fm_data, *fm_mask; + + ENICPMD_FUNC_TRACE(); + fm_data = &entry->ftm_data.fk_hdrset[lvl]; + fm_mask = &entry->ftm_mask.fk_hdrset[lvl]; + fm_data->fk_metadata |= FKM_TCP; + fm_mask->fk_metadata |= FKM_TCP; + + if (!spec) + return 0; + if (!mask) + mask = &rte_flow_item_tcp_mask; + + fm_data->fk_header_select |= FKH_TCP; + fm_mask->fk_header_select |= FKH_TCP; + memcpy(&fm_data->l4.tcp, spec, sizeof(*spec)); + memcpy(&fm_mask->l4.tcp, mask, sizeof(*mask)); + return 0; +} + +static int +enic_fm_copy_item_sctp(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + const struct rte_flow_item_sctp *spec = item->spec; + const struct rte_flow_item_sctp *mask = item->mask; + const uint8_t lvl = arg->header_level; + struct fm_tcam_match_entry *entry = arg->fm_tcam_entry; + struct fm_header_set *fm_data, *fm_mask; + uint8_t *ip_proto_mask = NULL; + uint8_t *ip_proto = NULL; + uint32_t l3_fkh; + + ENICPMD_FUNC_TRACE(); + fm_data = &entry->ftm_data.fk_hdrset[lvl]; + fm_mask = &entry->ftm_mask.fk_hdrset[lvl]; + /* + * The NIC filter API has no flags for "match sctp", so explicitly + * set the protocol number in the IP pattern. + */ + if (fm_data->fk_metadata & FKM_IPV4) { + struct rte_ipv4_hdr *ip; + ip = (struct rte_ipv4_hdr *)&fm_mask->l3.ip4; + ip_proto_mask = &ip->next_proto_id; + ip = (struct rte_ipv4_hdr *)&fm_data->l3.ip4; + ip_proto = &ip->next_proto_id; + l3_fkh = FKH_IPV4; + } else if (fm_data->fk_metadata & FKM_IPV6) { + struct rte_ipv6_hdr *ip; + ip = (struct rte_ipv6_hdr *)&fm_mask->l3.ip6; + ip_proto_mask = &ip->proto; + ip = (struct rte_ipv6_hdr *)&fm_data->l3.ip6; + ip_proto = &ip->proto; + l3_fkh = FKH_IPV6; + } else { + /* Need IPv4/IPv6 pattern first */ + return -EINVAL; + } + *ip_proto = IPPROTO_SCTP; + *ip_proto_mask = 0xff; + fm_data->fk_header_select |= l3_fkh; + fm_mask->fk_header_select |= l3_fkh; + + if (!spec) + return 0; + if (!mask) + mask = &rte_flow_item_sctp_mask; + + fm_data->fk_header_select |= FKH_L4RAW; + fm_mask->fk_header_select |= FKH_L4RAW; + memcpy(fm_data->l4.rawdata, spec, sizeof(*spec)); + memcpy(fm_mask->l4.rawdata, mask, sizeof(*mask)); + return 0; +} + +static int +enic_fm_copy_item_vxlan(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + const struct rte_flow_item_vxlan *spec = item->spec; + const struct rte_flow_item_vxlan *mask = item->mask; + struct fm_tcam_match_entry *entry = arg->fm_tcam_entry; + struct fm_header_set *fm_data, *fm_mask; + + ENICPMD_FUNC_TRACE(); + /* Only 2 header levels (outer and inner) allowed */ + if (arg->header_level > 0) + return -EINVAL; + + fm_data = &entry->ftm_data.fk_hdrset[0]; + fm_mask = &entry->ftm_mask.fk_hdrset[0]; + fm_data->fk_metadata |= FKM_VXLAN; + fm_mask->fk_metadata |= FKM_VXLAN; + /* items from here on out are inner header items */ + arg->header_level = 1; + + /* Match all if no spec */ + if (!spec) + return 0; + if (!mask) + mask = &rte_flow_item_vxlan_mask; + + fm_data->fk_header_select |= FKH_VXLAN; + fm_mask->fk_header_select |= FKH_VXLAN; + memcpy(&fm_data->vxlan, spec, sizeof(*spec)); + memcpy(&fm_mask->vxlan, mask, sizeof(*mask)); + return 0; +} + +/* + * Currently, raw pattern match is very limited. It is intended for matching + * UDP tunnel header (e.g. vxlan or geneve). + */ +static int +enic_fm_copy_item_raw(struct copy_item_args *arg) +{ + const struct rte_flow_item *item = arg->item; + const struct rte_flow_item_raw *spec = item->spec; + const struct rte_flow_item_raw *mask = item->mask; + const uint8_t lvl = arg->header_level; + struct fm_tcam_match_entry *entry = arg->fm_tcam_entry; + struct fm_header_set *fm_data, *fm_mask; + + ENICPMD_FUNC_TRACE(); + /* Cannot be used for inner packet */ + if (lvl > 0) + return -EINVAL; + /* Need both spec and mask */ + if (!spec || !mask) + return -EINVAL; + /* Only supports relative with offset 0 */ + if (!spec->relative || spec->offset != 0 || spec->search || + spec->limit) + return -EINVAL; + /* Need non-null pattern that fits within the NIC's filter pattern */ + if (spec->length == 0 || + spec->length + sizeof(struct rte_udp_hdr) > FM_LAYER_SIZE || + !spec->pattern || !mask->pattern) + return -EINVAL; + /* + * Mask fields, including length, are often set to zero. Assume that + * means "same as spec" to avoid breaking existing apps. If length + * is not zero, then it should be >= spec length. + * + * No more pattern follows this, so append to the L4 layer instead of + * L5 to work with both recent and older VICs. + */ + if (mask->length != 0 && mask->length < spec->length) + return -EINVAL; + + fm_data = &entry->ftm_data.fk_hdrset[lvl]; + fm_mask = &entry->ftm_mask.fk_hdrset[lvl]; + fm_data->fk_header_select |= FKH_L4RAW; + fm_mask->fk_header_select |= FKH_L4RAW; + fm_data->fk_header_select &= ~FKH_UDP; + fm_mask->fk_header_select &= ~FKH_UDP; + memcpy(fm_data->l4.rawdata + sizeof(struct rte_udp_hdr), + spec->pattern, spec->length); + memcpy(fm_mask->l4.rawdata + sizeof(struct rte_udp_hdr), + mask->pattern, spec->length); + return 0; +} + +static int +enic_fet_alloc(struct enic_flowman *fm, uint8_t ingress, + struct fm_key_template *key, int entries, + struct enic_fm_fet **fet_out) +{ + struct fm_exact_match_table *cmd; + struct fm_header_set *hdr; + struct enic_fm_fet *fet; + uint64_t args[3]; + int ret; + + ENICPMD_FUNC_TRACE(); + fet = calloc(1, sizeof(struct enic_fm_fet)); + if (fet == NULL) + return -ENOMEM; + cmd = &fm->cmd.va->fm_exact_match_table; + memset(cmd, 0, sizeof(*cmd)); + cmd->fet_direction = ingress ? FM_INGRESS : FM_EGRESS; + cmd->fet_stage = FM_STAGE_LAST; + cmd->fet_max_entries = entries ? entries : FM_MAX_EXACT_TABLE_SIZE; + if (key == NULL) { + hdr = &cmd->fet_key.fk_hdrset[0]; + memset(hdr, 0, sizeof(*hdr)); + hdr->fk_header_select = FKH_IPV4 | FKH_UDP; + hdr->l3.ip4.fk_saddr = 0xFFFFFFFF; + hdr->l3.ip4.fk_daddr = 0xFFFFFFFF; + hdr->l4.udp.fk_source = 0xFFFF; + hdr->l4.udp.fk_dest = 0xFFFF; + fet->default_key = 1; + } else { + memcpy(&cmd->fet_key, key, sizeof(*key)); + memcpy(&fet->key, key, sizeof(*key)); + fet->default_key = 0; + } + cmd->fet_key.fk_packet_tag = 1; + + args[0] = FM_EXACT_TABLE_ALLOC; + args[1] = fm->cmd.pa; + ret = vnic_dev_flowman_cmd(fm->enic->vdev, args, 2); + if (ret) { + ENICPMD_LOG(ERR, "cannot alloc exact match table: rc=%d", ret); + free(fet); + return ret; + } + fet->handle = args[0]; + fet->ingress = ingress; + ENICPMD_LOG(DEBUG, "allocated exact match table: handle=0x%" PRIx64, + fet->handle); + *fet_out = fet; + return 0; +} + +static void +enic_fet_free(struct enic_flowman *fm, struct enic_fm_fet *fet) +{ + ENICPMD_FUNC_TRACE(); + enic_fm_tbl_free(fm, fet->handle); + if (!fet->default_key) + TAILQ_REMOVE(&fm->fet_list, fet, list); + free(fet); +} + +/* + * Get the exact match table for the given combination of + * <group, ingress, key>. Allocate one on the fly as necessary. + */ +static int +enic_fet_get(struct enic_flowman *fm, + uint32_t group, + uint8_t ingress, + struct fm_key_template *key, + struct enic_fm_fet **fet_out, + struct rte_flow_error *error) +{ + struct enic_fm_fet *fet; + + ENICPMD_FUNC_TRACE(); + /* See if we already have this table open */ + TAILQ_FOREACH(fet, &fm->fet_list, list) { + if (fet->group == group && fet->ingress == ingress) + break; + } + if (fet == NULL) { + /* Jumping to a non-existing group? Use the default table */ + if (key == NULL) { + fet = ingress ? fm->default_ig_fet : fm->default_eg_fet; + } else if (enic_fet_alloc(fm, ingress, key, 0, &fet)) { + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, "enic: cannot get exact match table"); + } + fet->group = group; + /* Default table is never on the open table list */ + if (!fet->default_key) + TAILQ_INSERT_HEAD(&fm->fet_list, fet, list); + } + fet->ref++; + *fet_out = fet; + ENICPMD_LOG(DEBUG, "fet_get: %s %s group=%u ref=%u", + fet->default_key ? "default" : "", + fet->ingress ? "ingress" : "egress", + fet->group, fet->ref); + return 0; +} + +static void +enic_fet_put(struct enic_flowman *fm, struct enic_fm_fet *fet) +{ + ENICPMD_FUNC_TRACE(); + RTE_ASSERT(fet->ref > 0); + fet->ref--; + ENICPMD_LOG(DEBUG, "fet_put: %s %s group=%u ref=%u", + fet->default_key ? "default" : "", + fet->ingress ? "ingress" : "egress", + fet->group, fet->ref); + if (fet->ref == 0) + enic_fet_free(fm, fet); +} + +/* Return 1 if current item is valid on top of the previous one. */ +static int +fm_item_stacking_valid(enum rte_flow_item_type prev_item, + const struct enic_fm_items *item_info, + uint8_t is_first_item) +{ + enum rte_flow_item_type const *allowed_items = item_info->prev_items; + + ENICPMD_FUNC_TRACE(); + for (; *allowed_items != RTE_FLOW_ITEM_TYPE_END; allowed_items++) { + if (prev_item == *allowed_items) + return 1; + } + + /* This is the first item in the stack. Check if that's cool */ + if (is_first_item && item_info->valid_start_item) + return 1; + return 0; +} + +/* + * Build the flow manager match entry structure from the provided pattern. + * The pattern is validated as the items are copied. + */ +static int +enic_fm_copy_entry(struct enic_flowman *fm, + const struct rte_flow_item pattern[], + struct rte_flow_error *error) +{ + const struct enic_fm_items *item_info; + enum rte_flow_item_type prev_item; + const struct rte_flow_item *item; + struct copy_item_args args; + uint8_t prev_header_level; + uint8_t is_first_item; + int ret; + + ENICPMD_FUNC_TRACE(); + item = pattern; + is_first_item = 1; + prev_item = RTE_FLOW_ITEM_TYPE_END; + + args.fm_tcam_entry = &fm->tcam_entry; + args.header_level = 0; + prev_header_level = 0; + for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { + /* + * Get info about how to validate and copy the item. If NULL + * is returned the nic does not support the item. + */ + if (item->type == RTE_FLOW_ITEM_TYPE_VOID) + continue; + + item_info = &enic_fm_items[item->type]; + + if (item->type > FM_MAX_ITEM_TYPE || + item_info->copy_item == NULL) { + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "enic: unsupported item"); + } + + /* check to see if item stacking is valid */ + if (!fm_item_stacking_valid(prev_item, item_info, + is_first_item)) + goto stacking_error; + + args.item = item; + ret = item_info->copy_item(&args); + if (ret) + goto item_not_supported; + /* Going from outer to inner? Treat it as a new packet start */ + if (prev_header_level != args.header_level) { + prev_item = RTE_FLOW_ITEM_TYPE_END; + is_first_item = 1; + } else { + prev_item = item->type; + is_first_item = 0; + } + prev_header_level = args.header_level; + } + return 0; + +item_not_supported: + return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "enic: unsupported item type"); + +stacking_error: + return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, + item, "enic: unsupported item stack"); +} + +static void +flow_item_skip_void(const struct rte_flow_item **item) +{ + for ( ; ; (*item)++) + if ((*item)->type != RTE_FLOW_ITEM_TYPE_VOID) + return; +} + +static void +append_template(void **template, uint8_t *off, const void *data, int len) +{ + memcpy(*template, data, len); + *template = (char *)*template + len; + *off = *off + len; +} + +static int +enic_fm_append_action_op(struct enic_flowman *fm, + struct fm_action_op *fm_op, + struct rte_flow_error *error) +{ + int count; + + count = fm->action_op_count; + ENICPMD_LOG(DEBUG, "append action op: idx=%d op=%u", + count, fm_op->fa_op); + if (count == FM_ACTION_OP_MAX) { + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "too many action operations"); + } + fm->action.fma_action_ops[count] = *fm_op; + fm->action_op_count = count + 1; + return 0; +} + +/* NIC requires that 1st steer appear before decap. + * Correct example: steer, decap, steer, steer, ... + */ +static void +enic_fm_reorder_action_op(struct enic_flowman *fm) +{ + struct fm_action_op *op, *steer, *decap; + struct fm_action_op tmp_op; + + ENICPMD_FUNC_TRACE(); + /* Find 1st steer and decap */ + op = fm->action.fma_action_ops; + steer = NULL; + decap = NULL; + while (op->fa_op != FMOP_END) { + if (!decap && op->fa_op == FMOP_DECAP_NOSTRIP) + decap = op; + else if (!steer && op->fa_op == FMOP_RQ_STEER) + steer = op; + op++; + } + /* If decap is before steer, swap */ + if (steer && decap && decap < steer) { + op = fm->action.fma_action_ops; + ENICPMD_LOG(DEBUG, "swap decap %ld <-> steer %ld", + (long)(decap - op), (long)(steer - op)); + tmp_op = *decap; + *decap = *steer; + *steer = tmp_op; + } +} + +/* VXLAN decap is done via flowman compound action */ +static int +enic_fm_copy_vxlan_decap(struct enic_flowman *fm, + struct fm_tcam_match_entry *fmt, + const struct rte_flow_action *action, + struct rte_flow_error *error) +{ + struct fm_header_set *fm_data; + struct fm_action_op fm_op; + + ENICPMD_FUNC_TRACE(); + fm_data = &fmt->ftm_data.fk_hdrset[0]; + if (!(fm_data->fk_metadata & FKM_VXLAN)) { + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "vxlan-decap: vxlan must be in pattern"); + } + + memset(&fm_op, 0, sizeof(fm_op)); + fm_op.fa_op = FMOP_DECAP_NOSTRIP; + return enic_fm_append_action_op(fm, &fm_op, error); +} + +/* VXLAN encap is done via flowman compound action */ +static int +enic_fm_copy_vxlan_encap(struct enic_flowman *fm, + const struct rte_flow_item *item, + struct rte_flow_error *error) +{ + struct fm_action_op fm_op; + struct rte_ether_hdr *eth; + uint16_t *ethertype; + void *template; + uint8_t off; + + ENICPMD_FUNC_TRACE(); + memset(&fm_op, 0, sizeof(fm_op)); + fm_op.fa_op = FMOP_ENCAP; + template = fm->action.fma_data; + off = 0; + /* + * Copy flow items to the flowman template starting L2. + * L2 must be ethernet. + */ + flow_item_skip_void(&item); + if (item->type != RTE_FLOW_ITEM_TYPE_ETH) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "vxlan-encap: first item should be ethernet"); + eth = (struct rte_ether_hdr *)template; + ethertype = ð->ether_type; + append_template(&template, &off, item->spec, + sizeof(struct rte_flow_item_eth)); + item++; + flow_item_skip_void(&item); + /* Optional VLAN */ + if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { + const struct rte_flow_item_vlan *spec; + + ENICPMD_LOG(DEBUG, "vxlan-encap: vlan"); + spec = item->spec; + fm_op.encap.outer_vlan = rte_be_to_cpu_16(spec->tci); + item++; + flow_item_skip_void(&item); + } + /* L3 must be IPv4, IPv6 */ + switch (item->type) { + case RTE_FLOW_ITEM_TYPE_IPV4: + { + struct rte_ipv4_hdr *ip4; + + ENICPMD_LOG(DEBUG, "vxlan-encap: ipv4"); + *ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4); + ip4 = (struct rte_ipv4_hdr *)template; + /* + * Offset of IPv4 length field and its initial value + * (IP + UDP + VXLAN) are specified in the action. The NIC + * will add inner packet length. + */ + fm_op.encap.len1_offset = off + + offsetof(struct rte_ipv4_hdr, total_length); + fm_op.encap.len1_delta = sizeof(struct rte_ipv4_hdr) + + sizeof(struct rte_udp_hdr) + + sizeof(struct rte_vxlan_hdr); + append_template(&template, &off, item->spec, + sizeof(struct rte_ipv4_hdr)); + ip4->version_ihl = RTE_IPV4_VHL_DEF; + if (ip4->time_to_live == 0) + ip4->time_to_live = IP_DEFTTL; + ip4->next_proto_id = IPPROTO_UDP; + break; + } + case RTE_FLOW_ITEM_TYPE_IPV6: + { + struct rte_ipv6_hdr *ip6; + + ENICPMD_LOG(DEBUG, "vxlan-encap: ipv6"); + *ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6); + ip6 = (struct rte_ipv6_hdr *)template; + fm_op.encap.len1_offset = off + + offsetof(struct rte_ipv6_hdr, payload_len); + fm_op.encap.len1_delta = sizeof(struct rte_udp_hdr) + + sizeof(struct rte_vxlan_hdr); + append_template(&template, &off, item->spec, + sizeof(struct rte_ipv6_hdr)); + ip6->vtc_flow |= rte_cpu_to_be_32(IP6_VTC_FLOW); + if (ip6->hop_limits == 0) + ip6->hop_limits = IP_DEFTTL; + ip6->proto = IPPROTO_UDP; + break; + } + default: + return rte_flow_error_set(error, + EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item, + "vxlan-encap: L3 must be IPv4/IPv6"); + } + item++; + flow_item_skip_void(&item); + + /* L4 is UDP */ + if (item->type != RTE_FLOW_ITEM_TYPE_UDP) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "vxlan-encap: UDP must follow IPv4/IPv6"); + /* UDP length = UDP + VXLAN. NIC will add inner packet length. */ + fm_op.encap.len2_offset = + off + offsetof(struct rte_udp_hdr, dgram_len); + fm_op.encap.len2_delta = + sizeof(struct rte_udp_hdr) + sizeof(struct rte_vxlan_hdr); + append_template(&template, &off, item->spec, + sizeof(struct rte_udp_hdr)); + item++; + flow_item_skip_void(&item); + + /* Finally VXLAN */ + if (item->type != RTE_FLOW_ITEM_TYPE_VXLAN) + return rte_flow_error_set(error, + EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item, + "vxlan-encap: VXLAN must follow UDP"); + append_template(&template, &off, item->spec, + sizeof(struct rte_flow_item_vxlan)); + + /* + * Fill in the rest of the action structure. + * Indicate that we want to encap with vxlan at packet start. + */ + fm_op.encap.template_offset = 0; + fm_op.encap.template_len = off; + return enic_fm_append_action_op(fm, &fm_op, error); +} + +static int +enic_fm_find_vnic(struct enic *enic, const struct rte_pci_addr *addr, + uint64_t *handle) +{ + uint32_t bdf; + uint64_t args[2]; + int rc; + + ENICPMD_FUNC_TRACE(); + ENICPMD_LOG(DEBUG, "bdf=%x:%x:%x", addr->bus, addr->devid, + addr->function); + bdf = addr->bus << 8 | addr->devid << 3 | addr->function; + args[0] = FM_VNIC_FIND; + args[1] = bdf; + rc = vnic_dev_flowman_cmd(enic->vdev, args, 2); + if (rc != 0) { + ENICPMD_LOG(ERR, "allocating counters rc=%d", rc); + return rc; + } + *handle = args[0]; + ENICPMD_LOG(DEBUG, "found vnic: handle=0x%" PRIx64, *handle); + return 0; +} + +/* Translate flow actions to flowman TCAM entry actions */ +static int +enic_fm_copy_action(struct enic_flowman *fm, + const struct rte_flow_action actions[], + uint8_t ingress, + struct rte_flow_error *error) +{ + enum { + FATE = 1 << 0, + DECAP = 1 << 1, + PASSTHRU = 1 << 2, + COUNT = 1 << 3, + ENCAP = 1 << 4, + }; + struct fm_tcam_match_entry *fmt; + struct fm_action_op fm_op; + struct enic *enic; + uint32_t overlap; + uint64_t vnic_h; + bool first_rq; + int ret; + + ENICPMD_FUNC_TRACE(); + fmt = &fm->tcam_entry; + first_rq = true; + enic = fm->enic; + overlap = 0; + vnic_h = 0; /* 0 = current vNIC */ + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { + switch (actions->type) { + case RTE_FLOW_ACTION_TYPE_VOID: + continue; + case RTE_FLOW_ACTION_TYPE_PASSTHRU: { + if (overlap & PASSTHRU) + goto unsupported; + overlap |= PASSTHRU; + break; + } + case RTE_FLOW_ACTION_TYPE_JUMP: { + const struct rte_flow_action_jump *jump = + actions->conf; + struct enic_fm_fet *fet; + + if (overlap & FATE) + goto unsupported; + ret = enic_fet_get(fm, jump->group, ingress, NULL, + &fet, error); + if (ret) + return ret; + overlap |= FATE; + memset(&fm_op, 0, sizeof(fm_op)); + fm_op.fa_op = FMOP_EXACT_MATCH; + fm_op.exact.handle = fet->handle; + fm->fet = fet; + ret = enic_fm_append_action_op(fm, &fm_op, error); + if (ret) + return ret; + break; + } + case RTE_FLOW_ACTION_TYPE_MARK: { + const struct rte_flow_action_mark *mark = + actions->conf; + + if (mark->id >= ENIC_MAGIC_FILTER_ID - 1) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "invalid mark id"); + memset(&fm_op, 0, sizeof(fm_op)); + fm_op.fa_op = FMOP_MARK; + fm_op.mark.mark = mark->id + 1; + ret = enic_fm_append_action_op(fm, &fm_op, error); + if (ret) + return ret; + break; + } + case RTE_FLOW_ACTION_TYPE_FLAG: { + /* ENIC_MAGIC_FILTER_ID is reserved for flagging */ + memset(&fm_op, 0, sizeof(fm_op)); + fm_op.fa_op = FMOP_MARK; + fm_op.mark.mark = ENIC_MAGIC_FILTER_ID; + ret = enic_fm_append_action_op(fm, &fm_op, error); + if (ret) + return ret; + break; + } + case RTE_FLOW_ACTION_TYPE_QUEUE: { + const struct rte_flow_action_queue *queue = + actions->conf; + + /* + * If fate other than QUEUE or RSS, fail. Multiple + * rss and queue actions are ok. + */ + if ((overlap & FATE) && first_rq) + goto unsupported; + first_rq = false; + overlap |= FATE; + memset(&fm_op, 0, sizeof(fm_op)); + fm_op.fa_op = FMOP_RQ_STEER; + fm_op.rq_steer.rq_index = + enic_rte_rq_idx_to_sop_idx(queue->index); + fm_op.rq_steer.rq_count = 1; + fm_op.rq_steer.vnic_handle = vnic_h; + ret = enic_fm_append_action_op(fm, &fm_op, error); + if (ret) + return ret; + ENICPMD_LOG(DEBUG, "create QUEUE action rq: %u", + fm_op.rq_steer.rq_index); + break; + } + case RTE_FLOW_ACTION_TYPE_DROP: { + if (overlap & FATE) + goto unsupported; + overlap |= FATE; + memset(&fm_op, 0, sizeof(fm_op)); + fm_op.fa_op = FMOP_DROP; + ret = enic_fm_append_action_op(fm, &fm_op, error); + if (ret) + return ret; + ENICPMD_LOG(DEBUG, "create DROP action"); + break; + } + case RTE_FLOW_ACTION_TYPE_COUNT: { + if (overlap & COUNT) + goto unsupported; + overlap |= COUNT; + /* Count is associated with entry not action on VIC. */ + fmt->ftm_flags |= FMEF_COUNTER; + break; + } + case RTE_FLOW_ACTION_TYPE_RSS: { + const struct rte_flow_action_rss *rss = actions->conf; + bool allow; + uint16_t i; + + /* + * If fate other than QUEUE or RSS, fail. Multiple + * rss and queue actions are ok. + */ + if ((overlap & FATE) && first_rq) + goto unsupported; + first_rq = false; + overlap |= FATE; + + /* + * Hardware only supports RSS actions on outer level + * with default type and function. Queues must be + * sequential. + */ + allow = rss->func == RTE_ETH_HASH_FUNCTION_DEFAULT && + rss->level == 0 && (rss->types == 0 || + rss->types == enic->rss_hf) && + rss->queue_num <= enic->rq_count && + rss->queue[rss->queue_num - 1] < enic->rq_count; + + + /* Identity queue map needs to be sequential */ + for (i = 1; i < rss->queue_num; i++) + allow = allow && (rss->queue[i] == + rss->queue[i - 1] + 1); + if (!allow) + goto unsupported; + + memset(&fm_op, 0, sizeof(fm_op)); + fm_op.fa_op = FMOP_RQ_STEER; + fm_op.rq_steer.rq_index = + enic_rte_rq_idx_to_sop_idx(rss->queue[0]); + fm_op.rq_steer.rq_count = rss->queue_num; + fm_op.rq_steer.vnic_handle = vnic_h; + ret = enic_fm_append_action_op(fm, &fm_op, error); + if (ret) + return ret; + ENICPMD_LOG(DEBUG, "create QUEUE action rq: %u", + fm_op.rq_steer.rq_index); + break; + } + case RTE_FLOW_ACTION_TYPE_PORT_ID: { + const struct rte_flow_action_port_id *port; + struct rte_pci_device *pdev; + struct rte_eth_dev *dev; + + port = actions->conf; + if (port->original) { + vnic_h = 0; /* This port */ + break; + } + ENICPMD_LOG(DEBUG, "port id %u", port->id); + if (!rte_eth_dev_is_valid_port(port->id)) { + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "invalid port_id"); + } + dev = &rte_eth_devices[port->id]; + if (!dev_is_enic(dev)) { + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "port_id is not enic"); + } + pdev = RTE_ETH_DEV_TO_PCI(dev); + if (enic_fm_find_vnic(enic, &pdev->addr, &vnic_h)) { + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "port_id is not vnic"); + } + break; + } + case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: { + if (overlap & DECAP) + goto unsupported; + overlap |= DECAP; + + ret = enic_fm_copy_vxlan_decap(fm, fmt, actions, + error); + if (ret != 0) + return ret; + break; + } + case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: { + const struct rte_flow_action_vxlan_encap *encap; + + encap = actions->conf; + if (overlap & ENCAP) + goto unsupported; + overlap |= ENCAP; + ret = enic_fm_copy_vxlan_encap(fm, encap->definition, + error); + if (ret != 0) + return ret; + break; + } + default: + goto unsupported; + } + } + + if (!(overlap & (FATE | PASSTHRU | COUNT))) + goto unsupported; + memset(&fm_op, 0, sizeof(fm_op)); + fm_op.fa_op = FMOP_END; + ret = enic_fm_append_action_op(fm, &fm_op, error); + if (ret) + return ret; + enic_fm_reorder_action_op(fm); + return 0; + +unsupported: + return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "enic: unsupported action"); +} + +/** Check if the action is supported */ +static int +enic_fm_match_action(const struct rte_flow_action *action, + const enum rte_flow_action_type *supported_actions) +{ + for (; *supported_actions != RTE_FLOW_ACTION_TYPE_END; + supported_actions++) { + if (action->type == *supported_actions) + return 1; + } + return 0; +} + +/* Debug function to dump internal NIC action structure. */ +static void +enic_fm_dump_tcam_actions(const struct fm_action *fm_action) +{ + /* Manually keep in sync with FMOP commands */ + const char *fmop_str[FMOP_OP_MAX] = { + [FMOP_END] = "end", + [FMOP_DROP] = "drop", + [FMOP_RQ_STEER] = "steer", + [FMOP_EXACT_MATCH] = "exmatch", + [FMOP_MARK] = "mark", + [FMOP_EXT_MARK] = "ext_mark", + [FMOP_TAG] = "tag", + [FMOP_EG_HAIRPIN] = "eg_hairpin", + [FMOP_IG_HAIRPIN] = "ig_hairpin", + [FMOP_ENCAP_IVLAN] = "encap_ivlan", + [FMOP_ENCAP_NOIVLAN] = "encap_noivlan", + [FMOP_ENCAP] = "encap", + [FMOP_SET_OVLAN] = "set_ovlan", + [FMOP_DECAP_NOSTRIP] = "decap_nostrip", + }; + const struct fm_action_op *op = &fm_action->fma_action_ops[0]; + char buf[128], *bp = buf; + const char *op_str; + int i, n, buf_len; + + buf[0] = '\0'; + buf_len = sizeof(buf); + for (i = 0; i < FM_ACTION_OP_MAX; i++) { + if (op->fa_op == FMOP_END) + break; + if (op->fa_op >= FMOP_OP_MAX) + op_str = "unknown"; + else + op_str = fmop_str[op->fa_op]; + n = snprintf(bp, buf_len, "%s,", op_str); + if (n > 0 && n < buf_len) { + bp += n; + buf_len -= n; + } + op++; + } + /* Remove trailing comma */ + if (buf[0]) + *(bp - 1) = '\0'; + ENICPMD_LOG(DEBUG, " Acions: %s", buf); +} + +static int +bits_to_str(uint32_t bits, const char *strings[], int max, + char *buf, int buf_len) +{ + int i, n = 0, len = 0; + + for (i = 0; i < max; i++) { + if (bits & (1 << i)) { + n = snprintf(buf, buf_len, "%s,", strings[i]); + if (n > 0 && n < buf_len) { + buf += n; + buf_len -= n; + len += n; + } + } + } + /* Remove trailing comma */ + if (len) { + *(buf - 1) = '\0'; + len--; + } + return len; +} + +/* Debug function to dump internal NIC filter structure. */ +static void +__enic_fm_dump_tcam_match(const struct fm_header_set *fk_hdrset, char *buf, + int buf_len) +{ + /* Manually keep in sync with FKM_BITS */ + const char *fm_fkm_str[FKM_BIT_COUNT] = { + [FKM_QTAG_BIT] = "qtag", + [FKM_CMD_BIT] = "cmd", + [FKM_IPV4_BIT] = "ip4", + [FKM_IPV6_BIT] = "ip6", + [FKM_ROCE_BIT] = "roce", + [FKM_UDP_BIT] = "udp", + [FKM_TCP_BIT] = "tcp", + [FKM_TCPORUDP_BIT] = "tcpportudp", + [FKM_IPFRAG_BIT] = "ipfrag", + [FKM_NVGRE_BIT] = "nvgre", + [FKM_VXLAN_BIT] = "vxlan", + [FKM_GENEVE_BIT] = "geneve", + [FKM_NSH_BIT] = "nsh", + [FKM_ROCEV2_BIT] = "rocev2", + [FKM_VLAN_PRES_BIT] = "vlan_pres", + [FKM_IPOK_BIT] = "ipok", + [FKM_L4OK_BIT] = "l4ok", + [FKM_ROCEOK_BIT] = "roceok", + [FKM_FCSOK_BIT] = "fcsok", + [FKM_EG_SPAN_BIT] = "eg_span", + [FKM_IG_SPAN_BIT] = "ig_span", + [FKM_EG_HAIRPINNED_BIT] = "eg_hairpinned", + }; + /* Manually keep in sync with FKH_BITS */ + const char *fm_fkh_str[FKH_BIT_COUNT] = { + [FKH_ETHER_BIT] = "eth", + [FKH_QTAG_BIT] = "qtag", + [FKH_L2RAW_BIT] = "l2raw", + [FKH_IPV4_BIT] = "ip4", + [FKH_IPV6_BIT] = "ip6", + [FKH_L3RAW_BIT] = "l3raw", + [FKH_UDP_BIT] = "udp", + [FKH_TCP_BIT] = "tcp", + [FKH_ICMP_BIT] = "icmp", + [FKH_VXLAN_BIT] = "vxlan", + [FKH_L4RAW_BIT] = "l4raw", + }; + uint32_t fkh_bits = fk_hdrset->fk_header_select; + uint32_t fkm_bits = fk_hdrset->fk_metadata; + int n; + + if (!fkm_bits && !fkh_bits) + return; + n = snprintf(buf, buf_len, "metadata("); + if (n > 0 && n < buf_len) { + buf += n; + buf_len -= n; + } + n = bits_to_str(fkm_bits, fm_fkm_str, FKM_BIT_COUNT, buf, buf_len); + if (n > 0 && n < buf_len) { + buf += n; + buf_len -= n; + } + n = snprintf(buf, buf_len, ") valid hdr fields("); + if (n > 0 && n < buf_len) { + buf += n; + buf_len -= n; + } + n = bits_to_str(fkh_bits, fm_fkh_str, FKH_BIT_COUNT, buf, buf_len); + if (n > 0 && n < buf_len) { + buf += n; + buf_len -= n; + } + snprintf(buf, buf_len, ")"); +} + +static void +enic_fm_dump_tcam_match(const struct fm_tcam_match_entry *match, + uint8_t ingress) +{ + char buf[256]; + + memset(buf, 0, sizeof(buf)); + __enic_fm_dump_tcam_match(&match->ftm_mask.fk_hdrset[0], + buf, sizeof(buf)); + ENICPMD_LOG(DEBUG, " TCAM %s Outer: %s %scounter", + (ingress) ? "IG" : "EG", buf, + (match->ftm_flags & FMEF_COUNTER) ? "" : "no "); + memset(buf, 0, sizeof(buf)); + __enic_fm_dump_tcam_match(&match->ftm_mask.fk_hdrset[1], + buf, sizeof(buf)); + if (buf[0]) + ENICPMD_LOG(DEBUG, " Inner: %s", buf); +} + +/* Debug function to dump internal NIC flow structures. */ +static void +enic_fm_dump_tcam_entry(const struct fm_tcam_match_entry *fm_match, + const struct fm_action *fm_action, + uint8_t ingress) +{ + if (!rte_log_can_log(enic_pmd_logtype, RTE_LOG_DEBUG)) + return; + enic_fm_dump_tcam_match(fm_match, ingress); + enic_fm_dump_tcam_actions(fm_action); +} + +static int +enic_fm_flow_parse(struct enic_flowman *fm, + const struct rte_flow_attr *attrs, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + const struct rte_flow_action *action; + unsigned int ret; + static const enum rte_flow_action_type *sa; + + ENICPMD_FUNC_TRACE(); + ret = 0; + if (!pattern) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "no pattern specified"); + return -rte_errno; + } + + if (!actions) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "no action specified"); + return -rte_errno; + } + + if (attrs) { + if (attrs->priority) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, + NULL, + "priorities are not supported"); + return -rte_errno; + } else if (attrs->transfer) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, + NULL, + "transfer is not supported"); + return -rte_errno; + } else if (attrs->ingress && attrs->egress) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + NULL, + "bidirectional rules not supported"); + return -rte_errno; + } + + } else { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR, + NULL, "no attribute specified"); + return -rte_errno; + } + + /* Verify Actions. */ + sa = (attrs->ingress) ? enic_fm_supported_ig_actions : + enic_fm_supported_eg_actions; + for (action = &actions[0]; action->type != RTE_FLOW_ACTION_TYPE_END; + action++) { + if (action->type == RTE_FLOW_ACTION_TYPE_VOID) + continue; + else if (!enic_fm_match_action(action, sa)) + break; + } + if (action->type != RTE_FLOW_ACTION_TYPE_END) { + rte_flow_error_set(error, EPERM, RTE_FLOW_ERROR_TYPE_ACTION, + action, "invalid action"); + return -rte_errno; + } + ret = enic_fm_copy_entry(fm, pattern, error); + if (ret) + return ret; + ret = enic_fm_copy_action(fm, actions, attrs->ingress, error); + return ret; +} + +static void +enic_fm_counter_free(struct enic_flowman *fm, struct enic_fm_flow *fm_flow) +{ + if (!fm_flow->counter_valid) + return; + SLIST_INSERT_HEAD(&fm->counters, fm_flow->counter, next); + fm_flow->counter_valid = false; +} + +static int +enic_fm_more_counters(struct enic_flowman *fm) +{ + struct enic_fm_counter *new_stack; + struct enic_fm_counter *ctrs; + struct enic *enic; + int i, rc; + uint64_t args[2]; + + ENICPMD_FUNC_TRACE(); + enic = fm->enic; + new_stack = rte_realloc(fm->counter_stack, (fm->counters_alloced + + FM_COUNTERS_EXPAND) * + sizeof(struct enic_fm_counter), 0); + if (new_stack == NULL) { + ENICPMD_LOG(ERR, "cannot alloc counter memory"); + return -ENOMEM; + } + fm->counter_stack = new_stack; + + args[0] = FM_COUNTER_BRK; + args[1] = fm->counters_alloced + FM_COUNTERS_EXPAND; + rc = vnic_dev_flowman_cmd(enic->vdev, args, 2); + if (rc != 0) { + ENICPMD_LOG(ERR, "cannot alloc counters rc=%d", rc); + return rc; + } + ctrs = (struct enic_fm_counter *)fm->counter_stack + + fm->counters_alloced; + for (i = 0; i < FM_COUNTERS_EXPAND; i++, ctrs++) { + ctrs->handle = fm->counters_alloced + i; + SLIST_INSERT_HEAD(&fm->counters, ctrs, next); + } + fm->counters_alloced += FM_COUNTERS_EXPAND; + ENICPMD_LOG(DEBUG, "%u counters allocated, total: %u", + FM_COUNTERS_EXPAND, fm->counters_alloced); + return 0; +} + +static int +enic_fm_counter_zero(struct enic_flowman *fm, struct enic_fm_counter *c) +{ + struct enic *enic; + uint64_t args[3]; + int ret; + + ENICPMD_FUNC_TRACE(); + enic = fm->enic; + args[0] = FM_COUNTER_QUERY; + args[1] = c->handle; + args[2] = 1; /* clear */ + ret = vnic_dev_flowman_cmd(enic->vdev, args, 3); + if (ret) { + ENICPMD_LOG(ERR, "counter init: rc=%d handle=0x%x", + ret, c->handle); + return ret; + } + return 0; +} + +static int +enic_fm_counter_alloc(struct enic_flowman *fm, struct rte_flow_error *error, + struct enic_fm_counter **ctr) +{ + struct enic_fm_counter *c; + int ret; + + ENICPMD_FUNC_TRACE(); + *ctr = NULL; + if (SLIST_EMPTY(&fm->counters)) { + ret = enic_fm_more_counters(fm); + if (ret) + return rte_flow_error_set(error, -ret, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, "enic: out of counters"); + } + c = SLIST_FIRST(&fm->counters); + SLIST_REMOVE_HEAD(&fm->counters, next); + *ctr = c; + return 0; +} + +static int +enic_fm_action_free(struct enic_flowman *fm, uint64_t handle) +{ + uint64_t args[2]; + int rc; + + ENICPMD_FUNC_TRACE(); + args[0] = FM_ACTION_FREE; + args[1] = handle; + rc = vnic_dev_flowman_cmd(fm->enic->vdev, args, 2); + if (rc) + ENICPMD_LOG(ERR, "cannot free action: rc=%d handle=0x%" PRIx64, + rc, handle); + return rc; +} + +static int +enic_fm_entry_free(struct enic_flowman *fm, uint64_t handle) +{ + uint64_t args[2]; + int rc; + + ENICPMD_FUNC_TRACE(); + args[0] = FM_MATCH_ENTRY_REMOVE; + args[1] = handle; + rc = vnic_dev_flowman_cmd(fm->enic->vdev, args, 2); + if (rc) + ENICPMD_LOG(ERR, "cannot free match entry: rc=%d" + " handle=0x%" PRIx64, rc, handle); + return rc; +} + +static struct enic_fm_jump_flow * +find_jump_flow(struct enic_flowman *fm, uint32_t group) +{ + struct enic_fm_jump_flow *j; + + ENICPMD_FUNC_TRACE(); + TAILQ_FOREACH(j, &fm->jump_list, list) { + if (j->group == group) + return j; + } + return NULL; +} + +static void +remove_jump_flow(struct enic_flowman *fm, struct rte_flow *flow) +{ + struct enic_fm_jump_flow *j; + + ENICPMD_FUNC_TRACE(); + TAILQ_FOREACH(j, &fm->jump_list, list) { + if (j->flow == flow) { + TAILQ_REMOVE(&fm->jump_list, j, list); + free(j); + return; + } + } +} + +static int +save_jump_flow(struct enic_flowman *fm, + struct rte_flow *flow, + uint32_t group, + struct fm_tcam_match_entry *match, + struct fm_action *action) +{ + struct enic_fm_jump_flow *j; + + ENICPMD_FUNC_TRACE(); + j = calloc(1, sizeof(struct enic_fm_jump_flow)); + if (j == NULL) + return -ENOMEM; + j->flow = flow; + j->group = group; + j->match = *match; + j->action = *action; + TAILQ_INSERT_HEAD(&fm->jump_list, j, list); + ENICPMD_LOG(DEBUG, "saved jump flow: flow=%p group=%u", flow, group); + return 0; +} + +static void +__enic_fm_flow_free(struct enic_flowman *fm, struct enic_fm_flow *fm_flow) +{ + if (fm_flow->entry_handle != FM_INVALID_HANDLE) { + enic_fm_entry_free(fm, fm_flow->entry_handle); + fm_flow->entry_handle = FM_INVALID_HANDLE; + } + if (fm_flow->action_handle != FM_INVALID_HANDLE) { + enic_fm_action_free(fm, fm_flow->action_handle); + fm_flow->action_handle = FM_INVALID_HANDLE; + } + enic_fm_counter_free(fm, fm_flow); + if (fm_flow->fet) { + enic_fet_put(fm, fm_flow->fet); + fm_flow->fet = NULL; + } +} + +static void +enic_fm_flow_free(struct enic_flowman *fm, struct rte_flow *flow) +{ + if (flow->fm->fet && flow->fm->fet->default_key) + remove_jump_flow(fm, flow); + __enic_fm_flow_free(fm, flow->fm); + free(flow->fm); + free(flow); +} + +static int +enic_fm_add_tcam_entry(struct enic_flowman *fm, + struct fm_tcam_match_entry *match_in, + uint64_t *entry_handle, + uint8_t ingress, + struct rte_flow_error *error) +{ + struct fm_tcam_match_entry *ftm; + uint64_t args[3]; + int ret; + + ENICPMD_FUNC_TRACE(); + /* Copy entry to the command buffer */ + ftm = &fm->cmd.va->fm_tcam_match_entry; + memcpy(ftm, match_in, sizeof(*ftm)); + /* Add TCAM entry */ + args[0] = FM_TCAM_ENTRY_INSTALL; + args[1] = ingress ? fm->ig_tcam_hndl : fm->eg_tcam_hndl; + args[2] = fm->cmd.pa; + ret = vnic_dev_flowman_cmd(fm->enic->vdev, args, 3); + if (ret != 0) { + ENICPMD_LOG(ERR, "cannot add %s TCAM entry: rc=%d", + ingress ? "ingress" : "egress", ret); + rte_flow_error_set(error, ret, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, "enic: devcmd(tcam-entry-install)"); + return ret; + } + ENICPMD_LOG(DEBUG, "installed %s TCAM entry: handle=0x%" PRIx64, + ingress ? "ingress" : "egress", (uint64_t)args[0]); + *entry_handle = args[0]; + return 0; +} + +static int +enic_fm_add_exact_entry(struct enic_flowman *fm, + struct fm_tcam_match_entry *match_in, + uint64_t *entry_handle, + struct enic_fm_fet *fet, + struct rte_flow_error *error) +{ + struct fm_exact_match_entry *fem; + uint64_t args[3]; + int ret; + + ENICPMD_FUNC_TRACE(); + /* The new entry must have the table's key */ + if (memcmp(fet->key.fk_hdrset, match_in->ftm_mask.fk_hdrset, + sizeof(struct fm_header_set) * FM_HDRSET_MAX)) { + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, NULL, + "enic: key does not match group's key"); + } + + /* Copy entry to the command buffer */ + fem = &fm->cmd.va->fm_exact_match_entry; + /* + * Translate TCAM entry to exact entry. As is only need to drop + * position and mask. The mask is part of the exact match table. + * Position (aka priority) is not supported in the exact match table. + */ + fem->fem_data = match_in->ftm_data; + fem->fem_flags = match_in->ftm_flags; + fem->fem_action = match_in->ftm_action; + fem->fem_counter = match_in->ftm_counter; + + /* Add exact entry */ + args[0] = FM_EXACT_ENTRY_INSTALL; + args[1] = fet->handle; + args[2] = fm->cmd.pa; + ret = vnic_dev_flowman_cmd(fm->enic->vdev, args, 3); + if (ret != 0) { + ENICPMD_LOG(ERR, "cannot add %s exact entry: group=%u", + fet->ingress ? "ingress" : "egress", fet->group); + rte_flow_error_set(error, ret, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, "enic: devcmd(exact-entry-install)"); + return ret; + } + ENICPMD_LOG(DEBUG, "installed %s exact entry: group=%u" + " handle=0x%" PRIx64, + fet->ingress ? "ingress" : "egress", fet->group, + (uint64_t)args[0]); + *entry_handle = args[0]; + return 0; +} + +/* Push match-action to the NIC. */ +static int +__enic_fm_flow_add_entry(struct enic_flowman *fm, + struct enic_fm_flow *fm_flow, + struct fm_tcam_match_entry *match_in, + struct fm_action *action_in, + uint32_t group, + uint8_t ingress, + struct rte_flow_error *error) +{ + struct enic_fm_counter *ctr; + struct fm_action *fma; + uint64_t action_h; + uint64_t entry_h; + uint64_t args[3]; + int ret; + + ENICPMD_FUNC_TRACE(); + /* Allocate action. */ + fma = &fm->cmd.va->fm_action; + memcpy(fma, action_in, sizeof(*fma)); + args[0] = FM_ACTION_ALLOC; + args[1] = fm->cmd.pa; + ret = vnic_dev_flowman_cmd(fm->enic->vdev, args, 2); + if (ret != 0) { + ENICPMD_LOG(ERR, "allocating TCAM table action rc=%d", ret); + rte_flow_error_set(error, ret, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, "enic: devcmd(action-alloc)"); + return ret; + } + action_h = args[0]; + fm_flow->action_handle = action_h; + match_in->ftm_action = action_h; + ENICPMD_LOG(DEBUG, "action allocated: handle=0x%" PRIx64, action_h); + + /* Allocate counter if requested. */ + if (match_in->ftm_flags & FMEF_COUNTER) { + ret = enic_fm_counter_alloc(fm, error, &ctr); + if (ret) /* error has been filled in */ + return ret; + fm_flow->counter_valid = true; + fm_flow->counter = ctr; + match_in->ftm_counter = ctr->handle; + } + + /* + * Get the group's table (either TCAM or exact match table) and + * add entry to it. If we use the exact match table, the handler + * will translate the TCAM entry (match_in) to the appropriate + * exact match entry and use that instead. + */ + entry_h = FM_INVALID_HANDLE; + if (group == FM_TCAM_RTE_GROUP) { + ret = enic_fm_add_tcam_entry(fm, match_in, &entry_h, ingress, + error); + if (ret) + return ret; + /* Jump action might have a ref to fet */ + fm_flow->fet = fm->fet; + fm->fet = NULL; + } else { + struct enic_fm_fet *fet = NULL; + + ret = enic_fet_get(fm, group, ingress, + &match_in->ftm_mask, &fet, error); + if (ret) + return ret; + fm_flow->fet = fet; + ret = enic_fm_add_exact_entry(fm, match_in, &entry_h, fet, + error); + if (ret) + return ret; + } + /* Clear counter after adding entry, as it requires in-use counter */ + if (fm_flow->counter_valid) { + ret = enic_fm_counter_zero(fm, fm_flow->counter); + if (ret) + return ret; + } + fm_flow->entry_handle = entry_h; + return 0; +} + +/* Push match-action to the NIC. */ +static struct rte_flow * +enic_fm_flow_add_entry(struct enic_flowman *fm, + struct fm_tcam_match_entry *match_in, + struct fm_action *action_in, + const struct rte_flow_attr *attrs, + struct rte_flow_error *error) +{ + struct enic_fm_flow *fm_flow; + struct rte_flow *flow; + + ENICPMD_FUNC_TRACE(); + enic_fm_dump_tcam_entry(match_in, action_in, attrs->ingress); + flow = calloc(1, sizeof(*flow)); + fm_flow = calloc(1, sizeof(*fm_flow)); + if (flow == NULL || fm_flow == NULL) { + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "enic: cannot allocate rte_flow"); + free(flow); + free(fm_flow); + return NULL; + } + flow->fm = fm_flow; + fm_flow->action_handle = FM_INVALID_HANDLE; + fm_flow->entry_handle = FM_INVALID_HANDLE; + if (__enic_fm_flow_add_entry(fm, fm_flow, match_in, action_in, + attrs->group, attrs->ingress, error)) { + enic_fm_flow_free(fm, flow); + return NULL; + } + return flow; +} + +static void +convert_jump_flows(struct enic_flowman *fm, struct enic_fm_fet *fet, + struct rte_flow_error *error) +{ + struct enic_fm_flow *fm_flow; + struct enic_fm_jump_flow *j; + struct fm_action *fma; + uint32_t group; + + ENICPMD_FUNC_TRACE(); + /* + * Find the saved flows that should jump to the new table (fet). + * Then delete the old TCAM entry that jumps to the default table, + * and add a new one that jumps to the new table. + */ + group = fet->group; + j = find_jump_flow(fm, group); + while (j) { + ENICPMD_LOG(DEBUG, "convert jump flow: flow=%p group=%u", + j->flow, group); + /* Delete old entry */ + fm_flow = j->flow->fm; + __enic_fm_flow_free(fm, fm_flow); + + /* Add new entry */ + fma = &j->action; + fma->fma_action_ops[0].exact.handle = fet->handle; + if (__enic_fm_flow_add_entry(fm, fm_flow, &j->match, fma, + FM_TCAM_RTE_GROUP, fet->ingress, error)) { + /* Cannot roll back changes at the moment */ + ENICPMD_LOG(ERR, "cannot convert jump flow: flow=%p", + j->flow); + } else { + fm_flow->fet = fet; + fet->ref++; + ENICPMD_LOG(DEBUG, "convert ok: group=%u ref=%u", + fet->group, fet->ref); + } + + TAILQ_REMOVE(&fm->jump_list, j, list); + free(j); + j = find_jump_flow(fm, group); + } +} + +static void +enic_fm_open_scratch(struct enic_flowman *fm) +{ + fm->action_op_count = 0; + fm->fet = NULL; + memset(&fm->tcam_entry, 0, sizeof(fm->tcam_entry)); + memset(&fm->action, 0, sizeof(fm->action)); +} + +static void +enic_fm_close_scratch(struct enic_flowman *fm) +{ + if (fm->fet) { + enic_fet_put(fm, fm->fet); + fm->fet = NULL; + } + fm->action_op_count = 0; +} + +static int +enic_fm_flow_validate(struct rte_eth_dev *dev, + const struct rte_flow_attr *attrs, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct fm_tcam_match_entry *fm_tcam_entry; + struct fm_action *fm_action; + struct enic_flowman *fm; + int ret; + + ENICPMD_FUNC_TRACE(); + fm = pmd_priv(dev)->fm; + if (fm == NULL) + return -ENOTSUP; + enic_fm_open_scratch(fm); + ret = enic_fm_flow_parse(fm, attrs, pattern, actions, error); + if (!ret) { + fm_tcam_entry = &fm->tcam_entry; + fm_action = &fm->action; + enic_fm_dump_tcam_entry(fm_tcam_entry, fm_action, + attrs->ingress); + } + enic_fm_close_scratch(fm); + return ret; +} + +static int +enic_fm_flow_query_count(struct rte_eth_dev *dev, + struct rte_flow *flow, void *data, + struct rte_flow_error *error) +{ + struct rte_flow_query_count *query; + struct enic_fm_flow *fm_flow; + struct enic *enic; + uint64_t args[3]; + int rc; + + ENICPMD_FUNC_TRACE(); + enic = pmd_priv(dev); + query = data; + fm_flow = flow->fm; + if (!fm_flow->counter_valid) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "enic: flow does not have counter"); + + args[0] = FM_COUNTER_QUERY; + args[1] = fm_flow->counter->handle; + args[2] = query->reset; + rc = vnic_dev_flowman_cmd(enic->vdev, args, 3); + if (rc) { + ENICPMD_LOG(ERR, "cannot query counter: rc=%d handle=0x%x", + rc, fm_flow->counter->handle); + return rc; + } + query->hits_set = 1; + query->hits = args[0]; + query->bytes_set = 1; + query->bytes = args[1]; + return 0; +} + +static int +enic_fm_flow_query(struct rte_eth_dev *dev, + struct rte_flow *flow, + const struct rte_flow_action *actions, + void *data, + struct rte_flow_error *error) +{ + int ret = 0; + + ENICPMD_FUNC_TRACE(); + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { + switch (actions->type) { + case RTE_FLOW_ACTION_TYPE_VOID: + break; + case RTE_FLOW_ACTION_TYPE_COUNT: + ret = enic_fm_flow_query_count(dev, flow, data, error); + break; + default: + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, + actions, + "action not supported"); + } + if (ret < 0) + return ret; + } + return 0; +} + +static struct rte_flow * +enic_fm_flow_create(struct rte_eth_dev *dev, + const struct rte_flow_attr *attrs, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct fm_tcam_match_entry *fm_tcam_entry; + struct fm_action *fm_action; + struct enic_flowman *fm; + struct enic_fm_fet *fet; + struct rte_flow *flow; + struct enic *enic; + int ret; + + ENICPMD_FUNC_TRACE(); + enic = pmd_priv(dev); + fm = enic->fm; + if (fm == NULL) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "flowman is not initialized"); + return NULL; + } + enic_fm_open_scratch(fm); + flow = NULL; + ret = enic_fm_flow_parse(fm, attrs, pattern, actions, error); + if (ret < 0) + goto error_with_scratch; + fm_tcam_entry = &fm->tcam_entry; + fm_action = &fm->action; + flow = enic_fm_flow_add_entry(fm, fm_tcam_entry, fm_action, + attrs, error); + if (flow) { + LIST_INSERT_HEAD(&enic->flows, flow, next); + fet = flow->fm->fet; + if (fet && fet->default_key) { + /* + * Jump to non-existent group? Save the relevant info + * so we can convert this flow when that group + * materializes. + */ + save_jump_flow(fm, flow, fet->group, + fm_tcam_entry, fm_action); + } else if (fet && fet->ref == 1) { + /* + * A new table is created. Convert the saved flows + * that should jump to this group. + */ + convert_jump_flows(fm, fet, error); + } + } + +error_with_scratch: + enic_fm_close_scratch(fm); + return flow; +} + +static int +enic_fm_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow, + __rte_unused struct rte_flow_error *error) +{ + struct enic *enic = pmd_priv(dev); + + ENICPMD_FUNC_TRACE(); + if (enic->fm == NULL) + return 0; + LIST_REMOVE(flow, next); + enic_fm_flow_free(enic->fm, flow); + return 0; +} + +static int +enic_fm_flow_flush(struct rte_eth_dev *dev, + __rte_unused struct rte_flow_error *error) +{ + struct enic_fm_flow *fm_flow; + struct enic_flowman *fm; + struct rte_flow *flow; + struct enic *enic = pmd_priv(dev); + + ENICPMD_FUNC_TRACE(); + if (enic->fm == NULL) + return 0; + fm = enic->fm; + while (!LIST_EMPTY(&enic->flows)) { + flow = LIST_FIRST(&enic->flows); + fm_flow = flow->fm; + LIST_REMOVE(flow, next); + /* + * If tables are null, then vNIC is closing, and the firmware + * has already cleaned up flowman state. So do not try to free + * resources, as it only causes errors. + */ + if (fm->ig_tcam_hndl == FM_INVALID_HANDLE) { + fm_flow->entry_handle = FM_INVALID_HANDLE; + fm_flow->action_handle = FM_INVALID_HANDLE; + fm_flow->fet = NULL; + } + enic_fm_flow_free(fm, flow); + } + return 0; +} + +static int +enic_fm_tbl_free(struct enic_flowman *fm, uint64_t handle) +{ + uint64_t args[2]; + int rc; + + args[0] = FM_MATCH_TABLE_FREE; + args[1] = handle; + rc = vnic_dev_flowman_cmd(fm->enic->vdev, args, 2); + if (rc) + ENICPMD_LOG(ERR, "cannot free table: rc=%d handle=0x%" PRIx64, + rc, handle); + return rc; +} + +static int +enic_fm_tcam_tbl_alloc(struct enic_flowman *fm, uint32_t direction, + uint32_t max_entries, uint64_t *handle) +{ + struct fm_tcam_match_table *tcam_tbl; + struct enic *enic; + uint64_t args[2]; + int rc; + + ENICPMD_FUNC_TRACE(); + enic = fm->enic; + tcam_tbl = &fm->cmd.va->fm_tcam_match_table; + tcam_tbl->ftt_direction = direction; + tcam_tbl->ftt_stage = FM_STAGE_LAST; + tcam_tbl->ftt_max_entries = max_entries; + args[0] = FM_TCAM_TABLE_ALLOC; + args[1] = fm->cmd.pa; + rc = vnic_dev_flowman_cmd(enic->vdev, args, 2); + if (rc) { + ENICPMD_LOG(ERR, "cannot alloc %s TCAM table: rc=%d", + (direction == FM_INGRESS) ? "IG" : "EG", rc); + return rc; + } + *handle = args[0]; + ENICPMD_LOG(DEBUG, "%s TCAM table allocated, handle=0x%" PRIx64, + (direction == FM_INGRESS) ? "IG" : "EG", *handle); + return 0; +} + +static int +enic_fm_init_counters(struct enic_flowman *fm) +{ + ENICPMD_FUNC_TRACE(); + SLIST_INIT(&fm->counters); + return enic_fm_more_counters(fm); +} + +static void +enic_fm_free_all_counters(struct enic_flowman *fm) +{ + struct enic *enic; + uint64_t args[2]; + int rc; + + enic = fm->enic; + args[0] = FM_COUNTER_BRK; + args[1] = 0; + rc = vnic_dev_flowman_cmd(enic->vdev, args, 2); + if (rc != 0) + ENICPMD_LOG(ERR, "cannot free counters: rc=%d", rc); + rte_free(fm->counter_stack); +} + +static int +enic_fm_alloc_tcam_tables(struct enic_flowman *fm) +{ + int rc; + + ENICPMD_FUNC_TRACE(); + rc = enic_fm_tcam_tbl_alloc(fm, FM_INGRESS, FM_MAX_TCAM_TABLE_SIZE, + &fm->ig_tcam_hndl); + if (rc) + return rc; + rc = enic_fm_tcam_tbl_alloc(fm, FM_EGRESS, FM_MAX_TCAM_TABLE_SIZE, + &fm->eg_tcam_hndl); + return rc; +} + +static void +enic_fm_free_tcam_tables(struct enic_flowman *fm) +{ + ENICPMD_FUNC_TRACE(); + if (fm->ig_tcam_hndl) { + ENICPMD_LOG(DEBUG, "free IG TCAM table handle=0x%" PRIx64, + fm->ig_tcam_hndl); + enic_fm_tbl_free(fm, fm->ig_tcam_hndl); + fm->ig_tcam_hndl = FM_INVALID_HANDLE; + } + if (fm->eg_tcam_hndl) { + ENICPMD_LOG(DEBUG, "free EG TCAM table handle=0x%" PRIx64, + fm->eg_tcam_hndl); + enic_fm_tbl_free(fm, fm->eg_tcam_hndl); + fm->eg_tcam_hndl = FM_INVALID_HANDLE; + } +} + +int +enic_fm_init(struct enic *enic) +{ + struct enic_flowman *fm; + uint8_t name[RTE_MEMZONE_NAMESIZE]; + int rc; + + if (enic->flow_filter_mode != FILTER_FLOWMAN) + return 0; + ENICPMD_FUNC_TRACE(); + fm = calloc(1, sizeof(*fm)); + if (fm == NULL) { + ENICPMD_LOG(ERR, "cannot alloc flowman struct"); + return -ENOMEM; + } + fm->enic = enic; + TAILQ_INIT(&fm->fet_list); + TAILQ_INIT(&fm->jump_list); + /* Allocate host memory for flowman commands */ + snprintf((char *)name, sizeof(name), "fm-cmd-%s", enic->bdf_name); + fm->cmd.va = enic_alloc_consistent(enic, + sizeof(union enic_flowman_cmd_mem), &fm->cmd.pa, name); + if (!fm->cmd.va) { + ENICPMD_LOG(ERR, "cannot allocate flowman command memory"); + rc = -ENOMEM; + goto error_fm; + } + /* Allocate TCAM tables upfront as they are the main tables */ + rc = enic_fm_alloc_tcam_tables(fm); + if (rc) { + ENICPMD_LOG(ERR, "cannot alloc TCAM tables"); + goto error_cmd; + } + /* Then a number of counters */ + rc = enic_fm_init_counters(fm); + if (rc) { + ENICPMD_LOG(ERR, "cannot alloc counters"); + goto error_tables; + } + /* + * One default exact match table for each direction. We hold onto + * it until close. + */ + rc = enic_fet_alloc(fm, 1, NULL, 128, &fm->default_ig_fet); + if (rc) { + ENICPMD_LOG(ERR, "cannot alloc default IG exact match table"); + goto error_counters; + } + fm->default_ig_fet->ref = 1; + rc = enic_fet_alloc(fm, 0, NULL, 128, &fm->default_eg_fet); + if (rc) { + ENICPMD_LOG(ERR, "cannot alloc default EG exact match table"); + goto error_ig_fet; + } + fm->default_eg_fet->ref = 1; + enic->fm = fm; + return 0; + +error_ig_fet: + enic_fet_free(fm, fm->default_ig_fet); +error_counters: + enic_fm_free_all_counters(fm); +error_tables: + enic_fm_free_tcam_tables(fm); +error_cmd: + enic_free_consistent(enic, sizeof(union enic_flowman_cmd_mem), + fm->cmd.va, fm->cmd.pa); +error_fm: + free(fm); + return rc; +} + +void +enic_fm_destroy(struct enic *enic) +{ + struct enic_flowman *fm; + struct enic_fm_fet *fet; + + if (enic->fm == NULL) + return; + ENICPMD_FUNC_TRACE(); + fm = enic->fm; + enic_fet_free(fm, fm->default_eg_fet); + enic_fet_free(fm, fm->default_ig_fet); + /* Free all exact match tables still open */ + while (!TAILQ_EMPTY(&fm->fet_list)) { + fet = TAILQ_FIRST(&fm->fet_list); + enic_fet_free(fm, fet); + } + enic_fm_free_tcam_tables(fm); + enic_fm_free_all_counters(fm); + enic_free_consistent(enic, sizeof(union enic_flowman_cmd_mem), + fm->cmd.va, fm->cmd.pa); + fm->cmd.va = NULL; + free(fm); + enic->fm = NULL; +} + +const struct rte_flow_ops enic_fm_flow_ops = { + .validate = enic_fm_flow_validate, + .create = enic_fm_flow_create, + .destroy = enic_fm_flow_destroy, + .flush = enic_fm_flow_flush, + .query = enic_fm_flow_query, +}; diff --git a/src/spdk/dpdk/drivers/net/enic/enic_main.c b/src/spdk/dpdk/drivers/net/enic/enic_main.c new file mode 100644 index 000000000..7942b0df6 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/enic_main.c @@ -0,0 +1,1882 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#include <stdio.h> + +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> + +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_memzone.h> +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_string_fns.h> +#include <rte_ethdev_driver.h> + +#include "enic_compat.h" +#include "enic.h" +#include "wq_enet_desc.h" +#include "rq_enet_desc.h" +#include "cq_enet_desc.h" +#include "vnic_enet.h" +#include "vnic_dev.h" +#include "vnic_wq.h" +#include "vnic_rq.h" +#include "vnic_cq.h" +#include "vnic_intr.h" +#include "vnic_nic.h" + +static inline int enic_is_sriov_vf(struct enic *enic) +{ + return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF; +} + +static int is_zero_addr(uint8_t *addr) +{ + return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]); +} + +static int is_mcast_addr(uint8_t *addr) +{ + return addr[0] & 1; +} + +static int is_eth_addr_valid(uint8_t *addr) +{ + return !is_mcast_addr(addr) && !is_zero_addr(addr); +} + +static void +enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq) +{ + uint16_t i; + + if (!rq || !rq->mbuf_ring) { + dev_debug(enic, "Pointer to rq or mbuf_ring is NULL"); + return; + } + + for (i = 0; i < rq->ring.desc_count; i++) { + if (rq->mbuf_ring[i]) { + rte_pktmbuf_free_seg(rq->mbuf_ring[i]); + rq->mbuf_ring[i] = NULL; + } + } +} + +static void enic_free_wq_buf(struct rte_mbuf **buf) +{ + struct rte_mbuf *mbuf = *buf; + + rte_pktmbuf_free_seg(mbuf); + *buf = NULL; +} + +static void enic_log_q_error(struct enic *enic) +{ + unsigned int i; + uint32_t error_status; + + for (i = 0; i < enic->wq_count; i++) { + error_status = vnic_wq_error_status(&enic->wq[i]); + if (error_status) + dev_err(enic, "WQ[%d] error_status %d\n", i, + error_status); + } + + for (i = 0; i < enic_vnic_rq_count(enic); i++) { + if (!enic->rq[i].in_use) + continue; + error_status = vnic_rq_error_status(&enic->rq[i]); + if (error_status) + dev_err(enic, "RQ[%d] error_status %d\n", i, + error_status); + } +} + +static void enic_clear_soft_stats(struct enic *enic) +{ + struct enic_soft_stats *soft_stats = &enic->soft_stats; + rte_atomic64_clear(&soft_stats->rx_nombuf); + rte_atomic64_clear(&soft_stats->rx_packet_errors); + rte_atomic64_clear(&soft_stats->tx_oversized); +} + +static void enic_init_soft_stats(struct enic *enic) +{ + struct enic_soft_stats *soft_stats = &enic->soft_stats; + rte_atomic64_init(&soft_stats->rx_nombuf); + rte_atomic64_init(&soft_stats->rx_packet_errors); + rte_atomic64_init(&soft_stats->tx_oversized); + enic_clear_soft_stats(enic); +} + +int enic_dev_stats_clear(struct enic *enic) +{ + int ret; + + ret = vnic_dev_stats_clear(enic->vdev); + if (ret != 0) { + dev_err(enic, "Error in clearing stats\n"); + return ret; + } + enic_clear_soft_stats(enic); + + return 0; +} + +int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats) +{ + struct vnic_stats *stats; + struct enic_soft_stats *soft_stats = &enic->soft_stats; + int64_t rx_truncated; + uint64_t rx_packet_errors; + int ret = vnic_dev_stats_dump(enic->vdev, &stats); + + if (ret) { + dev_err(enic, "Error in getting stats\n"); + return ret; + } + + /* The number of truncated packets can only be calculated by + * subtracting a hardware counter from error packets received by + * the driver. Note: this causes transient inaccuracies in the + * ipackets count. Also, the length of truncated packets are + * counted in ibytes even though truncated packets are dropped + * which can make ibytes be slightly higher than it should be. + */ + rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors); + rx_truncated = rx_packet_errors - stats->rx.rx_errors; + + r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated; + r_stats->opackets = stats->tx.tx_frames_ok; + + r_stats->ibytes = stats->rx.rx_bytes_ok; + r_stats->obytes = stats->tx.tx_bytes_ok; + + r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop; + r_stats->oerrors = stats->tx.tx_errors + + rte_atomic64_read(&soft_stats->tx_oversized); + + r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated; + + r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf); + return 0; +} + +int enic_del_mac_address(struct enic *enic, int mac_index) +{ + struct rte_eth_dev *eth_dev = enic->rte_dev; + uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes; + + return vnic_dev_del_addr(enic->vdev, mac_addr); +} + +int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr) +{ + int err; + + if (!is_eth_addr_valid(mac_addr)) { + dev_err(enic, "invalid mac address\n"); + return -EINVAL; + } + + err = vnic_dev_add_addr(enic->vdev, mac_addr); + if (err) + dev_err(enic, "add mac addr failed\n"); + return err; +} + +static void +enic_free_rq_buf(struct rte_mbuf **mbuf) +{ + if (*mbuf == NULL) + return; + + rte_pktmbuf_free(*mbuf); + *mbuf = NULL; +} + +void enic_init_vnic_resources(struct enic *enic) +{ + unsigned int error_interrupt_enable = 1; + unsigned int error_interrupt_offset = 0; + unsigned int rxq_interrupt_enable = 0; + unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET; + unsigned int index = 0; + unsigned int cq_idx; + struct vnic_rq *data_rq; + + if (enic->rte_dev->data->dev_conf.intr_conf.rxq) + rxq_interrupt_enable = 1; + + for (index = 0; index < enic->rq_count; index++) { + cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index)); + + vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)], + cq_idx, + error_interrupt_enable, + error_interrupt_offset); + + data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)]; + if (data_rq->in_use) + vnic_rq_init(data_rq, + cq_idx, + error_interrupt_enable, + error_interrupt_offset); + vnic_cq_init(&enic->cq[cq_idx], + 0 /* flow_control_enable */, + 1 /* color_enable */, + 0 /* cq_head */, + 0 /* cq_tail */, + 1 /* cq_tail_color */, + rxq_interrupt_enable, + 1 /* cq_entry_enable */, + 0 /* cq_message_enable */, + rxq_interrupt_offset, + 0 /* cq_message_addr */); + if (rxq_interrupt_enable) + rxq_interrupt_offset++; + } + + for (index = 0; index < enic->wq_count; index++) { + vnic_wq_init(&enic->wq[index], + enic_cq_wq(enic, index), + error_interrupt_enable, + error_interrupt_offset); + /* Compute unsupported ol flags for enic_prep_pkts() */ + enic->wq[index].tx_offload_notsup_mask = + PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask; + + cq_idx = enic_cq_wq(enic, index); + vnic_cq_init(&enic->cq[cq_idx], + 0 /* flow_control_enable */, + 1 /* color_enable */, + 0 /* cq_head */, + 0 /* cq_tail */, + 1 /* cq_tail_color */, + 0 /* interrupt_enable */, + 0 /* cq_entry_enable */, + 1 /* cq_message_enable */, + 0 /* interrupt offset */, + (uint64_t)enic->wq[index].cqmsg_rz->iova); + } + + for (index = 0; index < enic->intr_count; index++) { + vnic_intr_init(&enic->intr[index], + enic->config.intr_timer_usec, + enic->config.intr_timer_type, + /*mask_on_assertion*/1); + } +} + + +static int +enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq) +{ + struct rte_mbuf *mb; + struct rq_enet_desc *rqd = rq->ring.descs; + unsigned i; + dma_addr_t dma_addr; + uint32_t max_rx_pkt_len; + uint16_t rq_buf_len; + + if (!rq->in_use) + return 0; + + dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index, + rq->ring.desc_count); + + /* + * If *not* using scatter and the mbuf size is greater than the + * requested max packet size (max_rx_pkt_len), then reduce the + * posted buffer size to max_rx_pkt_len. HW still receives packets + * larger than max_rx_pkt_len, but they will be truncated, which we + * drop in the rx handler. Not ideal, but better than returning + * large packets when the user is not expecting them. + */ + max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len; + rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM; + if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable) + rq_buf_len = max_rx_pkt_len; + for (i = 0; i < rq->ring.desc_count; i++, rqd++) { + mb = rte_mbuf_raw_alloc(rq->mp); + if (mb == NULL) { + dev_err(enic, "RX mbuf alloc failed queue_id=%u\n", + (unsigned)rq->index); + return -ENOMEM; + } + + mb->data_off = RTE_PKTMBUF_HEADROOM; + dma_addr = (dma_addr_t)(mb->buf_iova + + RTE_PKTMBUF_HEADROOM); + rq_enet_desc_enc(rqd, dma_addr, + (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP + : RQ_ENET_TYPE_NOT_SOP), + rq_buf_len); + rq->mbuf_ring[i] = mb; + } + /* + * Do not post the buffers to the NIC until we enable the RQ via + * enic_start_rq(). + */ + rq->need_initial_post = true; + /* Initialize fetch index while RQ is disabled */ + iowrite32(0, &rq->ctrl->fetch_index); + return 0; +} + +/* + * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has + * allocated the buffers and filled the RQ descriptor ring. Just need to push + * the post index to the NIC. + */ +static void +enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq) +{ + if (!rq->in_use || !rq->need_initial_post) + return; + + /* make sure all prior writes are complete before doing the PIO write */ + rte_rmb(); + + /* Post all but the last buffer to VIC. */ + rq->posted_index = rq->ring.desc_count - 1; + + rq->rx_nb_hold = 0; + + dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n", + enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold); + iowrite32(rq->posted_index, &rq->ctrl->posted_index); + rte_rmb(); + rq->need_initial_post = false; +} + +void * +enic_alloc_consistent(void *priv, size_t size, + dma_addr_t *dma_handle, uint8_t *name) +{ + void *vaddr; + const struct rte_memzone *rz; + *dma_handle = 0; + struct enic *enic = (struct enic *)priv; + struct enic_memzone_entry *mze; + + rz = rte_memzone_reserve_aligned((const char *)name, size, + SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE); + if (!rz) { + pr_err("%s : Failed to allocate memory requested for %s\n", + __func__, name); + return NULL; + } + + vaddr = rz->addr; + *dma_handle = (dma_addr_t)rz->iova; + + mze = rte_malloc("enic memzone entry", + sizeof(struct enic_memzone_entry), 0); + + if (!mze) { + pr_err("%s : Failed to allocate memory for memzone list\n", + __func__); + rte_memzone_free(rz); + return NULL; + } + + mze->rz = rz; + + rte_spinlock_lock(&enic->memzone_list_lock); + LIST_INSERT_HEAD(&enic->memzone_list, mze, entries); + rte_spinlock_unlock(&enic->memzone_list_lock); + + return vaddr; +} + +void +enic_free_consistent(void *priv, + __rte_unused size_t size, + void *vaddr, + dma_addr_t dma_handle) +{ + struct enic_memzone_entry *mze; + struct enic *enic = (struct enic *)priv; + + rte_spinlock_lock(&enic->memzone_list_lock); + LIST_FOREACH(mze, &enic->memzone_list, entries) { + if (mze->rz->addr == vaddr && + mze->rz->iova == dma_handle) + break; + } + if (mze == NULL) { + rte_spinlock_unlock(&enic->memzone_list_lock); + dev_warning(enic, + "Tried to free memory, but couldn't find it in the memzone list\n"); + return; + } + LIST_REMOVE(mze, entries); + rte_spinlock_unlock(&enic->memzone_list_lock); + rte_memzone_free(mze->rz); + rte_free(mze); +} + +int enic_link_update(struct rte_eth_dev *eth_dev) +{ + struct enic *enic = pmd_priv(eth_dev); + struct rte_eth_link link; + + memset(&link, 0, sizeof(link)); + link.link_status = enic_get_link_status(enic); + link.link_duplex = ETH_LINK_FULL_DUPLEX; + link.link_speed = vnic_dev_port_speed(enic->vdev); + + return rte_eth_linkstatus_set(eth_dev, &link); +} + +static void +enic_intr_handler(void *arg) +{ + struct rte_eth_dev *dev = (struct rte_eth_dev *)arg; + struct enic *enic = pmd_priv(dev); + + vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]); + + enic_link_update(dev); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); + enic_log_q_error(enic); + /* Re-enable irq in case of INTx */ + rte_intr_ack(&enic->pdev->intr_handle); +} + +static int enic_rxq_intr_init(struct enic *enic) +{ + struct rte_intr_handle *intr_handle; + uint32_t rxq_intr_count, i; + int err; + + intr_handle = enic->rte_dev->intr_handle; + if (!enic->rte_dev->data->dev_conf.intr_conf.rxq) + return 0; + /* + * Rx queue interrupts only work when we have MSI-X interrupts, + * one per queue. Sharing one interrupt is technically + * possible with VIC, but it is not worth the complications it brings. + */ + if (!rte_intr_cap_multiple(intr_handle)) { + dev_err(enic, "Rx queue interrupts require MSI-X interrupts" + " (vfio-pci driver)\n"); + return -ENOTSUP; + } + rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET; + err = rte_intr_efd_enable(intr_handle, rxq_intr_count); + if (err) { + dev_err(enic, "Failed to enable event fds for Rx queue" + " interrupts\n"); + return err; + } + intr_handle->intr_vec = rte_zmalloc("enic_intr_vec", + rxq_intr_count * sizeof(int), 0); + if (intr_handle->intr_vec == NULL) { + dev_err(enic, "Failed to allocate intr_vec\n"); + return -ENOMEM; + } + for (i = 0; i < rxq_intr_count; i++) + intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET; + return 0; +} + +static void enic_rxq_intr_deinit(struct enic *enic) +{ + struct rte_intr_handle *intr_handle; + + intr_handle = enic->rte_dev->intr_handle; + rte_intr_efd_disable(intr_handle); + if (intr_handle->intr_vec != NULL) { + rte_free(intr_handle->intr_vec); + intr_handle->intr_vec = NULL; + } +} + +static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx) +{ + struct wq_enet_desc *desc; + struct vnic_wq *wq; + unsigned int i; + + /* + * Fill WQ descriptor fields that never change. Every descriptor is + * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH + * descriptors (i.e. request one completion update every 32 packets). + */ + wq = &enic->wq[queue_idx]; + desc = (struct wq_enet_desc *)wq->ring.descs; + for (i = 0; i < wq->ring.desc_count; i++, desc++) { + desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT; + if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1) + desc->header_length_flags |= + (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT); + } +} + +/* + * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used + * used when that file is not compiled. + */ +__rte_weak bool +enic_use_vector_rx_handler(__rte_unused struct rte_eth_dev *eth_dev) +{ + return false; +} + +void enic_pick_rx_handler(struct rte_eth_dev *eth_dev) +{ + struct enic *enic = pmd_priv(eth_dev); + + /* + * Preference order: + * 1. The vectorized handler if possible and requested. + * 2. The non-scatter, simplified handler if scatter Rx is not used. + * 3. The default handler as a fallback. + */ + if (enic_use_vector_rx_handler(eth_dev)) + return; + if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) { + ENICPMD_LOG(DEBUG, " use the non-scatter Rx handler"); + eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts; + } else { + ENICPMD_LOG(DEBUG, " use the normal Rx handler"); + eth_dev->rx_pkt_burst = &enic_recv_pkts; + } +} + +/* Secondary process uses this to set the Tx handler */ +void enic_pick_tx_handler(struct rte_eth_dev *eth_dev) +{ + struct enic *enic = pmd_priv(eth_dev); + + if (enic->use_simple_tx_handler) { + ENICPMD_LOG(DEBUG, " use the simple tx handler"); + eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts; + } else { + ENICPMD_LOG(DEBUG, " use the default tx handler"); + eth_dev->tx_pkt_burst = &enic_xmit_pkts; + } +} + +int enic_enable(struct enic *enic) +{ + unsigned int index; + int err; + struct rte_eth_dev *eth_dev = enic->rte_dev; + uint64_t simple_tx_offloads; + uintptr_t p; + + if (enic->enable_avx2_rx) { + struct rte_mbuf mb_def = { .buf_addr = 0 }; + + /* + * mbuf_initializer contains const-after-init fields of + * receive mbufs (i.e. 64 bits of fields from rearm_data). + * It is currently used by the vectorized handler. + */ + mb_def.nb_segs = 1; + mb_def.data_off = RTE_PKTMBUF_HEADROOM; + mb_def.port = enic->port_id; + rte_mbuf_refcnt_set(&mb_def, 1); + rte_compiler_barrier(); + p = (uintptr_t)&mb_def.rearm_data; + enic->mbuf_initializer = *(uint64_t *)p; + } + + eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev); + eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX; + + /* vnic notification of link status has already been turned on in + * enic_dev_init() which is called during probe time. Here we are + * just turning on interrupt vector 0 if needed. + */ + if (eth_dev->data->dev_conf.intr_conf.lsc) + vnic_dev_notify_set(enic->vdev, 0); + + err = enic_rxq_intr_init(enic); + if (err) + return err; + if (enic_clsf_init(enic)) + dev_warning(enic, "Init of hash table for clsf failed."\ + "Flow director feature will not work\n"); + + if (enic_fm_init(enic)) + dev_warning(enic, "Init of flowman failed.\n"); + + for (index = 0; index < enic->rq_count; index++) { + err = enic_alloc_rx_queue_mbufs(enic, + &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]); + if (err) { + dev_err(enic, "Failed to alloc sop RX queue mbufs\n"); + return err; + } + err = enic_alloc_rx_queue_mbufs(enic, + &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)]); + if (err) { + /* release the allocated mbufs for the sop rq*/ + enic_rxmbuf_queue_release(enic, + &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]); + + dev_err(enic, "Failed to alloc data RX queue mbufs\n"); + return err; + } + } + + /* + * Use the simple TX handler if possible. Only checksum offloads + * and vlan insertion are supported. + */ + simple_tx_offloads = enic->tx_offload_capa & + (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | + DEV_TX_OFFLOAD_VLAN_INSERT | + DEV_TX_OFFLOAD_IPV4_CKSUM | + DEV_TX_OFFLOAD_UDP_CKSUM | + DEV_TX_OFFLOAD_TCP_CKSUM); + if ((eth_dev->data->dev_conf.txmode.offloads & + ~simple_tx_offloads) == 0) { + ENICPMD_LOG(DEBUG, " use the simple tx handler"); + eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts; + for (index = 0; index < enic->wq_count; index++) + enic_prep_wq_for_simple_tx(enic, index); + enic->use_simple_tx_handler = 1; + } else { + ENICPMD_LOG(DEBUG, " use the default tx handler"); + eth_dev->tx_pkt_burst = &enic_xmit_pkts; + } + + enic_pick_rx_handler(eth_dev); + + for (index = 0; index < enic->wq_count; index++) + enic_start_wq(enic, index); + for (index = 0; index < enic->rq_count; index++) + enic_start_rq(enic, index); + + vnic_dev_add_addr(enic->vdev, enic->mac_addr); + + vnic_dev_enable_wait(enic->vdev); + + /* Register and enable error interrupt */ + rte_intr_callback_register(&(enic->pdev->intr_handle), + enic_intr_handler, (void *)enic->rte_dev); + + rte_intr_enable(&(enic->pdev->intr_handle)); + /* Unmask LSC interrupt */ + vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]); + + return 0; +} + +int enic_alloc_intr_resources(struct enic *enic) +{ + int err; + unsigned int i; + + dev_info(enic, "vNIC resources used: "\ + "wq %d rq %d cq %d intr %d\n", + enic->wq_count, enic_vnic_rq_count(enic), + enic->cq_count, enic->intr_count); + + for (i = 0; i < enic->intr_count; i++) { + err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i); + if (err) { + enic_free_vnic_resources(enic); + return err; + } + } + return 0; +} + +void enic_free_rq(void *rxq) +{ + struct vnic_rq *rq_sop, *rq_data; + struct enic *enic; + + if (rxq == NULL) + return; + + rq_sop = (struct vnic_rq *)rxq; + enic = vnic_dev_priv(rq_sop->vdev); + rq_data = &enic->rq[rq_sop->data_queue_idx]; + + if (rq_sop->free_mbufs) { + struct rte_mbuf **mb; + int i; + + mb = rq_sop->free_mbufs; + for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs; + i < ENIC_RX_BURST_MAX; i++) + rte_pktmbuf_free(mb[i]); + rte_free(rq_sop->free_mbufs); + rq_sop->free_mbufs = NULL; + rq_sop->num_free_mbufs = 0; + } + + enic_rxmbuf_queue_release(enic, rq_sop); + if (rq_data->in_use) + enic_rxmbuf_queue_release(enic, rq_data); + + rte_free(rq_sop->mbuf_ring); + if (rq_data->in_use) + rte_free(rq_data->mbuf_ring); + + rq_sop->mbuf_ring = NULL; + rq_data->mbuf_ring = NULL; + + vnic_rq_free(rq_sop); + if (rq_data->in_use) + vnic_rq_free(rq_data); + + vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]); + + rq_sop->in_use = 0; + rq_data->in_use = 0; +} + +void enic_start_wq(struct enic *enic, uint16_t queue_idx) +{ + struct rte_eth_dev_data *data = enic->dev_data; + vnic_wq_enable(&enic->wq[queue_idx]); + data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED; +} + +int enic_stop_wq(struct enic *enic, uint16_t queue_idx) +{ + struct rte_eth_dev_data *data = enic->dev_data; + int ret; + + ret = vnic_wq_disable(&enic->wq[queue_idx]); + if (ret) + return ret; + + data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED; + return 0; +} + +void enic_start_rq(struct enic *enic, uint16_t queue_idx) +{ + struct rte_eth_dev_data *data = enic->dev_data; + struct vnic_rq *rq_sop; + struct vnic_rq *rq_data; + rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)]; + rq_data = &enic->rq[rq_sop->data_queue_idx]; + + if (rq_data->in_use) { + vnic_rq_enable(rq_data); + enic_initial_post_rx(enic, rq_data); + } + rte_mb(); + vnic_rq_enable(rq_sop); + enic_initial_post_rx(enic, rq_sop); + data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED; +} + +int enic_stop_rq(struct enic *enic, uint16_t queue_idx) +{ + struct rte_eth_dev_data *data = enic->dev_data; + int ret1 = 0, ret2 = 0; + struct vnic_rq *rq_sop; + struct vnic_rq *rq_data; + rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)]; + rq_data = &enic->rq[rq_sop->data_queue_idx]; + + ret2 = vnic_rq_disable(rq_sop); + rte_mb(); + if (rq_data->in_use) + ret1 = vnic_rq_disable(rq_data); + + if (ret2) + return ret2; + else if (ret1) + return ret1; + + data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED; + return 0; +} + +int enic_alloc_rq(struct enic *enic, uint16_t queue_idx, + unsigned int socket_id, struct rte_mempool *mp, + uint16_t nb_desc, uint16_t free_thresh) +{ + int rc; + uint16_t sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx); + uint16_t data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx, enic); + struct vnic_rq *rq_sop = &enic->rq[sop_queue_idx]; + struct vnic_rq *rq_data = &enic->rq[data_queue_idx]; + unsigned int mbuf_size, mbufs_per_pkt; + unsigned int nb_sop_desc, nb_data_desc; + uint16_t min_sop, max_sop, min_data, max_data; + uint32_t max_rx_pkt_len; + + rq_sop->is_sop = 1; + rq_sop->data_queue_idx = data_queue_idx; + rq_data->is_sop = 0; + rq_data->data_queue_idx = 0; + rq_sop->socket_id = socket_id; + rq_sop->mp = mp; + rq_data->socket_id = socket_id; + rq_data->mp = mp; + rq_sop->in_use = 1; + rq_sop->rx_free_thresh = free_thresh; + rq_data->rx_free_thresh = free_thresh; + dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx, + free_thresh); + + mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) - + RTE_PKTMBUF_HEADROOM); + /* max_rx_pkt_len includes the ethernet header and CRC. */ + max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len; + + if (enic->rte_dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_SCATTER) { + dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx); + /* ceil((max pkt len)/mbuf_size) */ + mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size; + } else { + dev_info(enic, "Scatter rx mode disabled\n"); + mbufs_per_pkt = 1; + if (max_rx_pkt_len > mbuf_size) { + dev_warning(enic, "The maximum Rx packet size (%u) is" + " larger than the mbuf size (%u), and" + " scatter is disabled. Larger packets will" + " be truncated.\n", + max_rx_pkt_len, mbuf_size); + } + } + + if (mbufs_per_pkt > 1) { + dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx); + rq_sop->data_queue_enable = 1; + rq_data->in_use = 1; + /* + * HW does not directly support rxmode.max_rx_pkt_len. HW always + * receives packet sizes up to the "max" MTU. + * If not using scatter, we can achieve the effect of dropping + * larger packets by reducing the size of posted buffers. + * See enic_alloc_rx_queue_mbufs(). + */ + if (max_rx_pkt_len < + enic_mtu_to_max_rx_pktlen(enic->max_mtu)) { + dev_warning(enic, "rxmode.max_rx_pkt_len is ignored" + " when scatter rx mode is in use.\n"); + } + } else { + dev_info(enic, "Rq %u Scatter rx mode not being used\n", + queue_idx); + rq_sop->data_queue_enable = 0; + rq_data->in_use = 0; + } + + /* number of descriptors have to be a multiple of 32 */ + nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK; + nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK; + + rq_sop->max_mbufs_per_pkt = mbufs_per_pkt; + rq_data->max_mbufs_per_pkt = mbufs_per_pkt; + + if (mbufs_per_pkt > 1) { + min_sop = ENIC_RX_BURST_MAX; + max_sop = ((enic->config.rq_desc_count / + (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK); + min_data = min_sop * (mbufs_per_pkt - 1); + max_data = enic->config.rq_desc_count; + } else { + min_sop = ENIC_RX_BURST_MAX; + max_sop = enic->config.rq_desc_count; + min_data = 0; + max_data = 0; + } + + if (nb_desc < (min_sop + min_data)) { + dev_warning(enic, + "Number of rx descs too low, adjusting to minimum\n"); + nb_sop_desc = min_sop; + nb_data_desc = min_data; + } else if (nb_desc > (max_sop + max_data)) { + dev_warning(enic, + "Number of rx_descs too high, adjusting to maximum\n"); + nb_sop_desc = max_sop; + nb_data_desc = max_data; + } + if (mbufs_per_pkt > 1) { + dev_info(enic, "For max packet size %u and mbuf size %u valid" + " rx descriptor range is %u to %u\n", + max_rx_pkt_len, mbuf_size, min_sop + min_data, + max_sop + max_data); + } + dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n", + nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc); + + /* Allocate sop queue resources */ + rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx, + nb_sop_desc, sizeof(struct rq_enet_desc)); + if (rc) { + dev_err(enic, "error in allocation of sop rq\n"); + goto err_exit; + } + nb_sop_desc = rq_sop->ring.desc_count; + + if (rq_data->in_use) { + /* Allocate data queue resources */ + rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx, + nb_data_desc, + sizeof(struct rq_enet_desc)); + if (rc) { + dev_err(enic, "error in allocation of data rq\n"); + goto err_free_rq_sop; + } + nb_data_desc = rq_data->ring.desc_count; + } + rc = vnic_cq_alloc(enic->vdev, &enic->cq[queue_idx], queue_idx, + socket_id, nb_sop_desc + nb_data_desc, + sizeof(struct cq_enet_rq_desc)); + if (rc) { + dev_err(enic, "error in allocation of cq for rq\n"); + goto err_free_rq_data; + } + + /* Allocate the mbuf rings */ + rq_sop->mbuf_ring = (struct rte_mbuf **) + rte_zmalloc_socket("rq->mbuf_ring", + sizeof(struct rte_mbuf *) * nb_sop_desc, + RTE_CACHE_LINE_SIZE, rq_sop->socket_id); + if (rq_sop->mbuf_ring == NULL) + goto err_free_cq; + + if (rq_data->in_use) { + rq_data->mbuf_ring = (struct rte_mbuf **) + rte_zmalloc_socket("rq->mbuf_ring", + sizeof(struct rte_mbuf *) * nb_data_desc, + RTE_CACHE_LINE_SIZE, rq_sop->socket_id); + if (rq_data->mbuf_ring == NULL) + goto err_free_sop_mbuf; + } + + rq_sop->free_mbufs = (struct rte_mbuf **) + rte_zmalloc_socket("rq->free_mbufs", + sizeof(struct rte_mbuf *) * + ENIC_RX_BURST_MAX, + RTE_CACHE_LINE_SIZE, rq_sop->socket_id); + if (rq_sop->free_mbufs == NULL) + goto err_free_data_mbuf; + rq_sop->num_free_mbufs = 0; + + rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */ + + return 0; + +err_free_data_mbuf: + rte_free(rq_data->mbuf_ring); +err_free_sop_mbuf: + rte_free(rq_sop->mbuf_ring); +err_free_cq: + /* cleanup on error */ + vnic_cq_free(&enic->cq[queue_idx]); +err_free_rq_data: + if (rq_data->in_use) + vnic_rq_free(rq_data); +err_free_rq_sop: + vnic_rq_free(rq_sop); +err_exit: + return -ENOMEM; +} + +void enic_free_wq(void *txq) +{ + struct vnic_wq *wq; + struct enic *enic; + + if (txq == NULL) + return; + + wq = (struct vnic_wq *)txq; + enic = vnic_dev_priv(wq->vdev); + rte_memzone_free(wq->cqmsg_rz); + vnic_wq_free(wq); + vnic_cq_free(&enic->cq[enic->rq_count + wq->index]); +} + +int enic_alloc_wq(struct enic *enic, uint16_t queue_idx, + unsigned int socket_id, uint16_t nb_desc) +{ + int err; + struct vnic_wq *wq = &enic->wq[queue_idx]; + unsigned int cq_index = enic_cq_wq(enic, queue_idx); + char name[RTE_MEMZONE_NAMESIZE]; + static int instance; + + wq->socket_id = socket_id; + /* + * rte_eth_tx_queue_setup() checks min, max, and alignment. So just + * print an info message for diagnostics. + */ + dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc); + + /* Allocate queue resources */ + err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx, + nb_desc, + sizeof(struct wq_enet_desc)); + if (err) { + dev_err(enic, "error in allocation of wq\n"); + return err; + } + + err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index, + socket_id, nb_desc, + sizeof(struct cq_enet_wq_desc)); + if (err) { + vnic_wq_free(wq); + dev_err(enic, "error in allocation of cq for wq\n"); + } + + /* setup up CQ message */ + snprintf((char *)name, sizeof(name), + "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx, + instance++); + + wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name, + sizeof(uint32_t), SOCKET_ID_ANY, + RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE); + if (!wq->cqmsg_rz) + return -ENOMEM; + + return err; +} + +int enic_disable(struct enic *enic) +{ + unsigned int i; + int err; + + for (i = 0; i < enic->intr_count; i++) { + vnic_intr_mask(&enic->intr[i]); + (void)vnic_intr_masked(&enic->intr[i]); /* flush write */ + } + enic_rxq_intr_deinit(enic); + rte_intr_disable(&enic->pdev->intr_handle); + rte_intr_callback_unregister(&enic->pdev->intr_handle, + enic_intr_handler, + (void *)enic->rte_dev); + + vnic_dev_disable(enic->vdev); + + enic_clsf_destroy(enic); + enic_fm_destroy(enic); + + if (!enic_is_sriov_vf(enic)) + vnic_dev_del_addr(enic->vdev, enic->mac_addr); + + for (i = 0; i < enic->wq_count; i++) { + err = vnic_wq_disable(&enic->wq[i]); + if (err) + return err; + } + for (i = 0; i < enic_vnic_rq_count(enic); i++) { + if (enic->rq[i].in_use) { + err = vnic_rq_disable(&enic->rq[i]); + if (err) + return err; + } + } + + /* If we were using interrupts, set the interrupt vector to -1 + * to disable interrupts. We are not disabling link notifcations, + * though, as we want the polling of link status to continue working. + */ + if (enic->rte_dev->data->dev_conf.intr_conf.lsc) + vnic_dev_notify_set(enic->vdev, -1); + + vnic_dev_set_reset_flag(enic->vdev, 1); + + for (i = 0; i < enic->wq_count; i++) + vnic_wq_clean(&enic->wq[i], enic_free_wq_buf); + + for (i = 0; i < enic_vnic_rq_count(enic); i++) + if (enic->rq[i].in_use) + vnic_rq_clean(&enic->rq[i], enic_free_rq_buf); + for (i = 0; i < enic->cq_count; i++) + vnic_cq_clean(&enic->cq[i]); + for (i = 0; i < enic->intr_count; i++) + vnic_intr_clean(&enic->intr[i]); + + return 0; +} + +static int enic_dev_wait(struct vnic_dev *vdev, + int (*start)(struct vnic_dev *, int), + int (*finished)(struct vnic_dev *, int *), + int arg) +{ + int done; + int err; + int i; + + err = start(vdev, arg); + if (err) + return err; + + /* Wait for func to complete...2 seconds max */ + for (i = 0; i < 2000; i++) { + err = finished(vdev, &done); + if (err) + return err; + if (done) + return 0; + usleep(1000); + } + return -ETIMEDOUT; +} + +static int enic_dev_open(struct enic *enic) +{ + int err; + int flags = CMD_OPENF_IG_DESCCACHE; + + err = enic_dev_wait(enic->vdev, vnic_dev_open, + vnic_dev_open_done, flags); + if (err) + dev_err(enic_get_dev(enic), + "vNIC device open failed, err %d\n", err); + + return err; +} + +static int enic_set_rsskey(struct enic *enic, uint8_t *user_key) +{ + dma_addr_t rss_key_buf_pa; + union vnic_rss_key *rss_key_buf_va = NULL; + int err, i; + uint8_t name[RTE_MEMZONE_NAMESIZE]; + + RTE_ASSERT(user_key != NULL); + snprintf((char *)name, sizeof(name), "rss_key-%s", enic->bdf_name); + rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key), + &rss_key_buf_pa, name); + if (!rss_key_buf_va) + return -ENOMEM; + + for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++) + rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i]; + + err = enic_set_rss_key(enic, + rss_key_buf_pa, + sizeof(union vnic_rss_key)); + + /* Save for later queries */ + if (!err) { + rte_memcpy(&enic->rss_key, rss_key_buf_va, + sizeof(union vnic_rss_key)); + } + enic_free_consistent(enic, sizeof(union vnic_rss_key), + rss_key_buf_va, rss_key_buf_pa); + + return err; +} + +int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu) +{ + dma_addr_t rss_cpu_buf_pa; + union vnic_rss_cpu *rss_cpu_buf_va = NULL; + int err; + uint8_t name[RTE_MEMZONE_NAMESIZE]; + + snprintf((char *)name, sizeof(name), "rss_cpu-%s", enic->bdf_name); + rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu), + &rss_cpu_buf_pa, name); + if (!rss_cpu_buf_va) + return -ENOMEM; + + rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu)); + + err = enic_set_rss_cpu(enic, + rss_cpu_buf_pa, + sizeof(union vnic_rss_cpu)); + + enic_free_consistent(enic, sizeof(union vnic_rss_cpu), + rss_cpu_buf_va, rss_cpu_buf_pa); + + /* Save for later queries */ + if (!err) + rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu)); + return err; +} + +static int enic_set_niccfg(struct enic *enic, uint8_t rss_default_cpu, + uint8_t rss_hash_type, uint8_t rss_hash_bits, uint8_t rss_base_cpu, + uint8_t rss_enable) +{ + const uint8_t tso_ipid_split_en = 0; + int err; + + err = enic_set_nic_cfg(enic, + rss_default_cpu, rss_hash_type, + rss_hash_bits, rss_base_cpu, + rss_enable, tso_ipid_split_en, + enic->ig_vlan_strip_en); + + return err; +} + +/* Initialize RSS with defaults, called from dev_configure */ +int enic_init_rss_nic_cfg(struct enic *enic) +{ + static uint8_t default_rss_key[] = { + 85, 67, 83, 97, 119, 101, 115, 111, 109, 101, + 80, 65, 76, 79, 117, 110, 105, 113, 117, 101, + 76, 73, 78, 85, 88, 114, 111, 99, 107, 115, + 69, 78, 73, 67, 105, 115, 99, 111, 111, 108, + }; + struct rte_eth_rss_conf rss_conf; + union vnic_rss_cpu rss_cpu; + int ret, i; + + rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf; + /* + * If setting key for the first time, and the user gives us none, then + * push the default key to NIC. + */ + if (rss_conf.rss_key == NULL) { + rss_conf.rss_key = default_rss_key; + rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE; + } + ret = enic_set_rss_conf(enic, &rss_conf); + if (ret) { + dev_err(enic, "Failed to configure RSS\n"); + return ret; + } + if (enic->rss_enable) { + /* If enabling RSS, use the default reta */ + for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) { + rss_cpu.cpu[i / 4].b[i % 4] = + enic_rte_rq_idx_to_sop_idx(i % enic->rq_count); + } + ret = enic_set_rss_reta(enic, &rss_cpu); + if (ret) + dev_err(enic, "Failed to set RSS indirection table\n"); + } + return ret; +} + +int enic_setup_finish(struct enic *enic) +{ + enic_init_soft_stats(enic); + + /* Default conf */ + vnic_dev_packet_filter(enic->vdev, + 1 /* directed */, + 1 /* multicast */, + 1 /* broadcast */, + 0 /* promisc */, + 1 /* allmulti */); + + enic->promisc = 0; + enic->allmulti = 1; + + return 0; +} + +static int enic_rss_conf_valid(struct enic *enic, + struct rte_eth_rss_conf *rss_conf) +{ + /* RSS is disabled per VIC settings. Ignore rss_conf. */ + if (enic->flow_type_rss_offloads == 0) + return 0; + if (rss_conf->rss_key != NULL && + rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) { + dev_err(enic, "Given rss_key is %d bytes, it must be %d\n", + rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE); + return -EINVAL; + } + if (rss_conf->rss_hf != 0 && + (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) { + dev_err(enic, "Given rss_hf contains none of the supported" + " types\n"); + return -EINVAL; + } + return 0; +} + +/* Set hash type and key according to rss_conf */ +int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf) +{ + struct rte_eth_dev *eth_dev; + uint64_t rss_hf; + uint8_t rss_hash_type; + uint8_t rss_enable; + int ret; + + RTE_ASSERT(rss_conf != NULL); + ret = enic_rss_conf_valid(enic, rss_conf); + if (ret) { + dev_err(enic, "RSS configuration (rss_conf) is invalid\n"); + return ret; + } + + eth_dev = enic->rte_dev; + rss_hash_type = 0; + rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads; + if (enic->rq_count > 1 && + (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) && + rss_hf != 0) { + rss_enable = 1; + if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | + ETH_RSS_NONFRAG_IPV4_OTHER)) + rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4; + if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) + rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4; + if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) { + rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4; + if (enic->udp_rss_weak) { + /* + * 'TCP' is not a typo. The "weak" version of + * UDP RSS requires both the TCP and UDP bits + * be set. It does enable TCP RSS as well. + */ + rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4; + } + } + if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX | + ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER)) + rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6; + if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX)) + rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6; + if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) { + rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6; + if (enic->udp_rss_weak) + rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6; + } + } else { + rss_enable = 0; + rss_hf = 0; + } + + /* Set the hash key if provided */ + if (rss_enable && rss_conf->rss_key) { + ret = enic_set_rsskey(enic, rss_conf->rss_key); + if (ret) { + dev_err(enic, "Failed to set RSS key\n"); + return ret; + } + } + + ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type, + ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU, + rss_enable); + if (!ret) { + enic->rss_hf = rss_hf; + enic->rss_hash_type = rss_hash_type; + enic->rss_enable = rss_enable; + } else { + dev_err(enic, "Failed to update RSS configurations." + " hash=0x%x\n", rss_hash_type); + } + return ret; +} + +int enic_set_vlan_strip(struct enic *enic) +{ + /* + * Unfortunately, VLAN strip on/off and RSS on/off are configured + * together. So, re-do niccfg, preserving the current RSS settings. + */ + return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type, + ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU, + enic->rss_enable); +} + +int enic_add_packet_filter(struct enic *enic) +{ + /* Args -> directed, multicast, broadcast, promisc, allmulti */ + return vnic_dev_packet_filter(enic->vdev, 1, 1, 1, + enic->promisc, enic->allmulti); +} + +int enic_get_link_status(struct enic *enic) +{ + return vnic_dev_link_status(enic->vdev); +} + +static void enic_dev_deinit(struct enic *enic) +{ + /* stop link status checking */ + vnic_dev_notify_unset(enic->vdev); + + /* mac_addrs is freed by rte_eth_dev_release_port() */ + rte_free(enic->cq); + rte_free(enic->intr); + rte_free(enic->rq); + rte_free(enic->wq); +} + + +int enic_set_vnic_res(struct enic *enic) +{ + struct rte_eth_dev *eth_dev = enic->rte_dev; + int rc = 0; + unsigned int required_rq, required_wq, required_cq, required_intr; + + /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */ + required_rq = eth_dev->data->nb_rx_queues * 2; + required_wq = eth_dev->data->nb_tx_queues; + required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues; + required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */ + if (eth_dev->data->dev_conf.intr_conf.rxq) { + required_intr += eth_dev->data->nb_rx_queues; + } + + if (enic->conf_rq_count < required_rq) { + dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n", + eth_dev->data->nb_rx_queues, + required_rq, enic->conf_rq_count); + rc = -EINVAL; + } + if (enic->conf_wq_count < required_wq) { + dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n", + eth_dev->data->nb_tx_queues, enic->conf_wq_count); + rc = -EINVAL; + } + + if (enic->conf_cq_count < required_cq) { + dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n", + required_cq, enic->conf_cq_count); + rc = -EINVAL; + } + if (enic->conf_intr_count < required_intr) { + dev_err(dev, "Not enough Interrupts to support Rx queue" + " interrupts. Required:%u, Configured:%u\n", + required_intr, enic->conf_intr_count); + rc = -EINVAL; + } + + if (rc == 0) { + enic->rq_count = eth_dev->data->nb_rx_queues; + enic->wq_count = eth_dev->data->nb_tx_queues; + enic->cq_count = enic->rq_count + enic->wq_count; + enic->intr_count = required_intr; + } + + return rc; +} + +/* Initialize the completion queue for an RQ */ +static int +enic_reinit_rq(struct enic *enic, unsigned int rq_idx) +{ + struct vnic_rq *sop_rq, *data_rq; + unsigned int cq_idx; + int rc = 0; + + sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)]; + data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx, enic)]; + cq_idx = rq_idx; + + vnic_cq_clean(&enic->cq[cq_idx]); + vnic_cq_init(&enic->cq[cq_idx], + 0 /* flow_control_enable */, + 1 /* color_enable */, + 0 /* cq_head */, + 0 /* cq_tail */, + 1 /* cq_tail_color */, + 0 /* interrupt_enable */, + 1 /* cq_entry_enable */, + 0 /* cq_message_enable */, + 0 /* interrupt offset */, + 0 /* cq_message_addr */); + + + vnic_rq_init_start(sop_rq, enic_cq_rq(enic, + enic_rte_rq_idx_to_sop_idx(rq_idx)), 0, + sop_rq->ring.desc_count - 1, 1, 0); + if (data_rq->in_use) { + vnic_rq_init_start(data_rq, + enic_cq_rq(enic, + enic_rte_rq_idx_to_data_idx(rq_idx, enic)), + 0, data_rq->ring.desc_count - 1, 1, 0); + } + + rc = enic_alloc_rx_queue_mbufs(enic, sop_rq); + if (rc) + return rc; + + if (data_rq->in_use) { + rc = enic_alloc_rx_queue_mbufs(enic, data_rq); + if (rc) { + enic_rxmbuf_queue_release(enic, sop_rq); + return rc; + } + } + + return 0; +} + +/* The Cisco NIC can send and receive packets up to a max packet size + * determined by the NIC type and firmware. There is also an MTU + * configured into the NIC via the CIMC/UCSM management interface + * which can be overridden by this function (up to the max packet size). + * Depending on the network setup, doing so may cause packet drops + * and unexpected behavior. + */ +int enic_set_mtu(struct enic *enic, uint16_t new_mtu) +{ + unsigned int rq_idx; + struct vnic_rq *rq; + int rc = 0; + uint16_t old_mtu; /* previous setting */ + uint16_t config_mtu; /* Value configured into NIC via CIMC/UCSM */ + struct rte_eth_dev *eth_dev = enic->rte_dev; + + old_mtu = eth_dev->data->mtu; + config_mtu = enic->config.mtu; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return -E_RTE_SECONDARY; + + if (new_mtu > enic->max_mtu) { + dev_err(enic, + "MTU not updated: requested (%u) greater than max (%u)\n", + new_mtu, enic->max_mtu); + return -EINVAL; + } + if (new_mtu < ENIC_MIN_MTU) { + dev_info(enic, + "MTU not updated: requested (%u) less than min (%u)\n", + new_mtu, ENIC_MIN_MTU); + return -EINVAL; + } + if (new_mtu > config_mtu) + dev_warning(enic, + "MTU (%u) is greater than value configured in NIC (%u)\n", + new_mtu, config_mtu); + + /* Update the MTU and maximum packet length */ + eth_dev->data->mtu = new_mtu; + eth_dev->data->dev_conf.rxmode.max_rx_pkt_len = + enic_mtu_to_max_rx_pktlen(new_mtu); + + /* + * If the device has not started (enic_enable), nothing to do. + * Later, enic_enable() will set up RQs reflecting the new maximum + * packet length. + */ + if (!eth_dev->data->dev_started) + goto set_mtu_done; + + /* + * The device has started, re-do RQs on the fly. In the process, we + * pick up the new maximum packet length. + * + * Some applications rely on the ability to change MTU without stopping + * the device. So keep this behavior for now. + */ + rte_spinlock_lock(&enic->mtu_lock); + + /* Stop traffic on all RQs */ + for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) { + rq = &enic->rq[rq_idx]; + if (rq->is_sop && rq->in_use) { + rc = enic_stop_rq(enic, + enic_sop_rq_idx_to_rte_idx(rq_idx)); + if (rc) { + dev_err(enic, "Failed to stop Rq %u\n", rq_idx); + goto set_mtu_done; + } + } + } + + /* replace Rx function with a no-op to avoid getting stale pkts */ + eth_dev->rx_pkt_burst = enic_dummy_recv_pkts; + rte_mb(); + + /* Allow time for threads to exit the real Rx function. */ + usleep(100000); + + /* now it is safe to reconfigure the RQs */ + + + /* free and reallocate RQs with the new MTU */ + for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) { + rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)]; + if (!rq->in_use) + continue; + + enic_free_rq(rq); + rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp, + rq->tot_nb_desc, rq->rx_free_thresh); + if (rc) { + dev_err(enic, + "Fatal MTU alloc error- No traffic will pass\n"); + goto set_mtu_done; + } + + rc = enic_reinit_rq(enic, rq_idx); + if (rc) { + dev_err(enic, + "Fatal MTU RQ reinit- No traffic will pass\n"); + goto set_mtu_done; + } + } + + /* put back the real receive function */ + rte_mb(); + enic_pick_rx_handler(eth_dev); + rte_mb(); + + /* restart Rx traffic */ + for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) { + rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)]; + if (rq->is_sop && rq->in_use) + enic_start_rq(enic, rq_idx); + } + +set_mtu_done: + dev_info(enic, "MTU changed from %u to %u\n", old_mtu, new_mtu); + rte_spinlock_unlock(&enic->mtu_lock); + return rc; +} + +static int enic_dev_init(struct enic *enic) +{ + int err; + struct rte_eth_dev *eth_dev = enic->rte_dev; + + vnic_dev_intr_coal_timer_info_default(enic->vdev); + + /* Get vNIC configuration + */ + err = enic_get_vnic_config(enic); + if (err) { + dev_err(dev, "Get vNIC configuration failed, aborting\n"); + return err; + } + + /* Get available resource counts */ + enic_get_res_counts(enic); + if (enic->conf_rq_count == 1) { + dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n"); + dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n"); + dev_err(enic, "See the ENIC PMD guide for more information.\n"); + return -EINVAL; + } + /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */ + enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) * + enic->conf_cq_count, 8); + enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) * + enic->conf_intr_count, 8); + enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) * + enic->conf_rq_count, 8); + enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) * + enic->conf_wq_count, 8); + if (enic->conf_cq_count > 0 && enic->cq == NULL) { + dev_err(enic, "failed to allocate vnic_cq, aborting.\n"); + return -1; + } + if (enic->conf_intr_count > 0 && enic->intr == NULL) { + dev_err(enic, "failed to allocate vnic_intr, aborting.\n"); + return -1; + } + if (enic->conf_rq_count > 0 && enic->rq == NULL) { + dev_err(enic, "failed to allocate vnic_rq, aborting.\n"); + return -1; + } + if (enic->conf_wq_count > 0 && enic->wq == NULL) { + dev_err(enic, "failed to allocate vnic_wq, aborting.\n"); + return -1; + } + + /* Get the supported filters */ + enic_fdir_info(enic); + + eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr", + sizeof(struct rte_ether_addr) * + ENIC_UNICAST_PERFECT_FILTERS, 0); + if (!eth_dev->data->mac_addrs) { + dev_err(enic, "mac addr storage alloc failed, aborting.\n"); + return -1; + } + rte_ether_addr_copy((struct rte_ether_addr *)enic->mac_addr, + eth_dev->data->mac_addrs); + + vnic_dev_set_reset_flag(enic->vdev, 0); + + LIST_INIT(&enic->flows); + + /* set up link status checking */ + vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */ + + /* + * When Geneve with options offload is available, always disable it + * first as it can interfere with user flow rules. + */ + if (enic->geneve_opt_avail) { + /* + * Disabling fails if the feature is provisioned but + * not enabled. So ignore result and do not log error. + */ + vnic_dev_overlay_offload_ctrl(enic->vdev, + OVERLAY_FEATURE_GENEVE, + OVERLAY_OFFLOAD_DISABLE); + } + enic->overlay_offload = false; + if (enic->disable_overlay && enic->vxlan) { + /* + * Explicitly disable overlay offload as the setting is + * sticky, and resetting vNIC does not disable it. + */ + if (vnic_dev_overlay_offload_ctrl(enic->vdev, + OVERLAY_FEATURE_VXLAN, + OVERLAY_OFFLOAD_DISABLE)) { + dev_err(enic, "failed to disable overlay offload\n"); + } else { + dev_info(enic, "Overlay offload is disabled\n"); + } + } + if (!enic->disable_overlay && enic->vxlan && + /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */ + vnic_dev_overlay_offload_ctrl(enic->vdev, + OVERLAY_FEATURE_VXLAN, + OVERLAY_OFFLOAD_ENABLE) == 0) { + enic->tx_offload_capa |= + DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | + DEV_TX_OFFLOAD_GENEVE_TNL_TSO | + DEV_TX_OFFLOAD_VXLAN_TNL_TSO; + enic->tx_offload_mask |= + PKT_TX_OUTER_IPV6 | + PKT_TX_OUTER_IPV4 | + PKT_TX_OUTER_IP_CKSUM | + PKT_TX_TUNNEL_MASK; + enic->overlay_offload = true; + dev_info(enic, "Overlay offload is enabled\n"); + } + /* Geneve with options offload requires overlay offload */ + if (enic->overlay_offload && enic->geneve_opt_avail && + enic->geneve_opt_request) { + if (vnic_dev_overlay_offload_ctrl(enic->vdev, + OVERLAY_FEATURE_GENEVE, + OVERLAY_OFFLOAD_ENABLE)) { + dev_err(enic, "failed to enable geneve+option\n"); + } else { + enic->geneve_opt_enabled = 1; + dev_info(enic, "Geneve with options is enabled\n"); + } + } + /* + * Reset the vxlan port if HW vxlan parsing is available. It + * is always enabled regardless of overlay offload + * enable/disable. + */ + if (enic->vxlan) { + enic->vxlan_port = RTE_VXLAN_DEFAULT_PORT; + /* + * Reset the vxlan port to the default, as the NIC firmware + * does not reset it automatically and keeps the old setting. + */ + if (vnic_dev_overlay_offload_cfg(enic->vdev, + OVERLAY_CFG_VXLAN_PORT_UPDATE, + RTE_VXLAN_DEFAULT_PORT)) { + dev_err(enic, "failed to update vxlan port\n"); + return -EINVAL; + } + } + + return 0; + +} + +int enic_probe(struct enic *enic) +{ + struct rte_pci_device *pdev = enic->pdev; + int err = -1; + + dev_debug(enic, "Initializing ENIC PMD\n"); + + /* if this is a secondary process the hardware is already initialized */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr; + enic->bar0.len = pdev->mem_resource[0].len; + + /* Register vNIC device */ + enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1); + if (!enic->vdev) { + dev_err(enic, "vNIC registration failed, aborting\n"); + goto err_out; + } + + LIST_INIT(&enic->memzone_list); + rte_spinlock_init(&enic->memzone_list_lock); + + vnic_register_cbacks(enic->vdev, + enic_alloc_consistent, + enic_free_consistent); + + /* + * Allocate the consistent memory for stats upfront so both primary and + * secondary processes can dump stats. + */ + err = vnic_dev_alloc_stats_mem(enic->vdev); + if (err) { + dev_err(enic, "Failed to allocate cmd memory, aborting\n"); + goto err_out_unregister; + } + /* Issue device open to get device in known state */ + err = enic_dev_open(enic); + if (err) { + dev_err(enic, "vNIC dev open failed, aborting\n"); + goto err_out_unregister; + } + + /* Set ingress vlan rewrite mode before vnic initialization */ + dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n", + enic->ig_vlan_rewrite_mode); + err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev, + enic->ig_vlan_rewrite_mode); + if (err) { + dev_err(enic, + "Failed to set ingress vlan rewrite mode, aborting.\n"); + goto err_out_dev_close; + } + + /* Issue device init to initialize the vnic-to-switch link. + * We'll start with carrier off and wait for link UP + * notification later to turn on carrier. We don't need + * to wait here for the vnic-to-switch link initialization + * to complete; link UP notification is the indication that + * the process is complete. + */ + + err = vnic_dev_init(enic->vdev, 0); + if (err) { + dev_err(enic, "vNIC dev init failed, aborting\n"); + goto err_out_dev_close; + } + + err = enic_dev_init(enic); + if (err) { + dev_err(enic, "Device initialization failed, aborting\n"); + goto err_out_dev_close; + } + + return 0; + +err_out_dev_close: + vnic_dev_close(enic->vdev); +err_out_unregister: + vnic_dev_unregister(enic->vdev); +err_out: + return err; +} + +void enic_remove(struct enic *enic) +{ + enic_dev_deinit(enic); + vnic_dev_close(enic->vdev); + vnic_dev_unregister(enic->vdev); +} diff --git a/src/spdk/dpdk/drivers/net/enic/enic_res.c b/src/spdk/dpdk/drivers/net/enic/enic_res.c new file mode 100644 index 000000000..20888eb25 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/enic_res.c @@ -0,0 +1,280 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#include "enic_compat.h" +#include "rte_ethdev_driver.h" +#include "wq_enet_desc.h" +#include "rq_enet_desc.h" +#include "cq_enet_desc.h" +#include "vnic_resource.h" +#include "vnic_enet.h" +#include "vnic_dev.h" +#include "vnic_wq.h" +#include "vnic_rq.h" +#include "vnic_cq.h" +#include "vnic_intr.h" +#include "vnic_stats.h" +#include "vnic_nic.h" +#include "vnic_rss.h" +#include "enic_res.h" +#include "enic.h" + +int enic_get_vnic_config(struct enic *enic) +{ + struct vnic_enet_config *c = &enic->config; + int err; + + err = vnic_dev_get_mac_addr(enic->vdev, enic->mac_addr); + if (err) { + dev_err(enic_get_dev(enic), + "Error getting MAC addr, %d\n", err); + return err; + } + + +#define GET_CONFIG(m) \ + do { \ + err = vnic_dev_spec(enic->vdev, \ + offsetof(struct vnic_enet_config, m), \ + sizeof(c->m), &c->m); \ + if (err) { \ + dev_err(enic_get_dev(enic), \ + "Error getting %s, %d\n", #m, err); \ + return err; \ + } \ + } while (0) + + GET_CONFIG(flags); + GET_CONFIG(wq_desc_count); + GET_CONFIG(rq_desc_count); + GET_CONFIG(mtu); + GET_CONFIG(intr_timer_type); + GET_CONFIG(intr_mode); + GET_CONFIG(intr_timer_usec); + GET_CONFIG(loop_tag); + GET_CONFIG(num_arfs); + GET_CONFIG(max_pkt_size); + + /* max packet size is only defined in newer VIC firmware + * and will be 0 for legacy firmware and VICs + */ + if (c->max_pkt_size > ENIC_DEFAULT_RX_MAX_PKT_SIZE) + enic->max_mtu = c->max_pkt_size - RTE_ETHER_HDR_LEN; + else + enic->max_mtu = ENIC_DEFAULT_RX_MAX_PKT_SIZE - + RTE_ETHER_HDR_LEN; + if (c->mtu == 0) + c->mtu = 1500; + + enic->rte_dev->data->mtu = RTE_MIN(enic->max_mtu, + RTE_MAX((uint16_t)ENIC_MIN_MTU, c->mtu)); + + enic->adv_filters = vnic_dev_capable_adv_filters(enic->vdev); + dev_info(enic, "Advanced Filters %savailable\n", ((enic->adv_filters) + ? "" : "not ")); + + err = vnic_dev_capable_filter_mode(enic->vdev, &enic->flow_filter_mode, + &enic->filter_actions); + if (err) { + dev_err(enic_get_dev(enic), + "Error getting filter modes, %d\n", err); + return err; + } + vnic_dev_capable_udp_rss_weak(enic->vdev, &enic->nic_cfg_chk, + &enic->udp_rss_weak); + + dev_info(enic, "Flow api filter mode: %s Actions: %s%s%s%s\n", + ((enic->flow_filter_mode == FILTER_FLOWMAN) ? "FLOWMAN" : + ((enic->flow_filter_mode == FILTER_DPDK_1) ? "DPDK" : + ((enic->flow_filter_mode == FILTER_USNIC_IP) ? "USNIC" : + ((enic->flow_filter_mode == FILTER_IPV4_5TUPLE) ? "5TUPLE" : + "NONE")))), + ((enic->filter_actions & FILTER_ACTION_RQ_STEERING_FLAG) ? + "steer " : ""), + ((enic->filter_actions & FILTER_ACTION_FILTER_ID_FLAG) ? + "tag " : ""), + ((enic->filter_actions & FILTER_ACTION_DROP_FLAG) ? + "drop " : ""), + ((enic->filter_actions & FILTER_ACTION_COUNTER_FLAG) ? + "count " : "")); + + c->wq_desc_count = RTE_MIN((uint32_t)ENIC_MAX_WQ_DESCS, + RTE_MAX((uint32_t)ENIC_MIN_WQ_DESCS, c->wq_desc_count)); + c->wq_desc_count &= 0xffffffe0; /* must be aligned to groups of 32 */ + + c->rq_desc_count = RTE_MIN((uint32_t)ENIC_MAX_RQ_DESCS, + RTE_MAX((uint32_t)ENIC_MIN_RQ_DESCS, c->rq_desc_count)); + c->rq_desc_count &= 0xffffffe0; /* must be aligned to groups of 32 */ + + c->intr_timer_usec = RTE_MIN(c->intr_timer_usec, + vnic_dev_get_intr_coal_timer_max(enic->vdev)); + + dev_info(enic_get_dev(enic), + "vNIC MAC addr %02x:%02x:%02x:%02x:%02x:%02x " + "wq/rq %d/%d mtu %d, max mtu:%d\n", + enic->mac_addr[0], enic->mac_addr[1], enic->mac_addr[2], + enic->mac_addr[3], enic->mac_addr[4], enic->mac_addr[5], + c->wq_desc_count, c->rq_desc_count, + enic->rte_dev->data->mtu, enic->max_mtu); + dev_info(enic_get_dev(enic), "vNIC csum tx/rx %s/%s " + "rss %s intr mode %s type %s timer %d usec " + "loopback tag 0x%04x\n", + ENIC_SETTING(enic, TXCSUM) ? "yes" : "no", + ENIC_SETTING(enic, RXCSUM) ? "yes" : "no", + ENIC_SETTING(enic, RSS) ? + (ENIC_SETTING(enic, RSSHASH_UDPIPV4) ? "+UDP" : + ((enic->udp_rss_weak ? "+udp" : + "yes"))) : "no", + c->intr_mode == VENET_INTR_MODE_INTX ? "INTx" : + c->intr_mode == VENET_INTR_MODE_MSI ? "MSI" : + c->intr_mode == VENET_INTR_MODE_ANY ? "any" : + "unknown", + c->intr_timer_type == VENET_INTR_TYPE_MIN ? "min" : + c->intr_timer_type == VENET_INTR_TYPE_IDLE ? "idle" : + "unknown", + c->intr_timer_usec, + c->loop_tag); + + /* RSS settings from vNIC */ + enic->reta_size = ENIC_RSS_RETA_SIZE; + enic->hash_key_size = ENIC_RSS_HASH_KEY_SIZE; + enic->flow_type_rss_offloads = 0; + if (ENIC_SETTING(enic, RSSHASH_IPV4)) + /* + * IPV4 hash type handles both non-frag and frag packet types. + * TCP/UDP is controlled via a separate flag below. + */ + enic->flow_type_rss_offloads |= ETH_RSS_IPV4 | + ETH_RSS_FRAG_IPV4 | ETH_RSS_NONFRAG_IPV4_OTHER; + if (ENIC_SETTING(enic, RSSHASH_TCPIPV4)) + enic->flow_type_rss_offloads |= ETH_RSS_NONFRAG_IPV4_TCP; + if (ENIC_SETTING(enic, RSSHASH_IPV6)) + /* + * The VIC adapter can perform RSS on IPv6 packets with and + * without extension headers. An IPv6 "fragment" is an IPv6 + * packet with the fragment extension header. + */ + enic->flow_type_rss_offloads |= ETH_RSS_IPV6 | + ETH_RSS_IPV6_EX | ETH_RSS_FRAG_IPV6 | + ETH_RSS_NONFRAG_IPV6_OTHER; + if (ENIC_SETTING(enic, RSSHASH_TCPIPV6)) + enic->flow_type_rss_offloads |= ETH_RSS_NONFRAG_IPV6_TCP | + ETH_RSS_IPV6_TCP_EX; + if (enic->udp_rss_weak) + enic->flow_type_rss_offloads |= + ETH_RSS_NONFRAG_IPV4_UDP | ETH_RSS_NONFRAG_IPV6_UDP | + ETH_RSS_IPV6_UDP_EX; + if (ENIC_SETTING(enic, RSSHASH_UDPIPV4)) + enic->flow_type_rss_offloads |= ETH_RSS_NONFRAG_IPV4_UDP; + if (ENIC_SETTING(enic, RSSHASH_UDPIPV6)) + enic->flow_type_rss_offloads |= ETH_RSS_NONFRAG_IPV6_UDP | + ETH_RSS_IPV6_UDP_EX; + + /* Zero offloads if RSS is not enabled */ + if (!ENIC_SETTING(enic, RSS)) + enic->flow_type_rss_offloads = 0; + + enic->vxlan = ENIC_SETTING(enic, VXLAN) && + vnic_dev_capable_vxlan(enic->vdev); + if (vnic_dev_capable_geneve(enic->vdev)) { + dev_info(NULL, "Geneve with options offload available\n"); + enic->geneve_opt_avail = 1; + } + /* + * Default hardware capabilities. enic_dev_init() may add additional + * flags if it enables overlay offloads. + */ + enic->tx_queue_offload_capa = 0; + enic->tx_offload_capa = + enic->tx_queue_offload_capa | + DEV_TX_OFFLOAD_MULTI_SEGS | + DEV_TX_OFFLOAD_VLAN_INSERT | + DEV_TX_OFFLOAD_IPV4_CKSUM | + DEV_TX_OFFLOAD_UDP_CKSUM | + DEV_TX_OFFLOAD_TCP_CKSUM | + DEV_TX_OFFLOAD_TCP_TSO; + enic->rx_offload_capa = + DEV_RX_OFFLOAD_SCATTER | + DEV_RX_OFFLOAD_JUMBO_FRAME | + DEV_RX_OFFLOAD_VLAN_STRIP | + DEV_RX_OFFLOAD_IPV4_CKSUM | + DEV_RX_OFFLOAD_UDP_CKSUM | + DEV_RX_OFFLOAD_TCP_CKSUM | + DEV_RX_OFFLOAD_RSS_HASH; + enic->tx_offload_mask = + PKT_TX_IPV6 | + PKT_TX_IPV4 | + PKT_TX_VLAN | + PKT_TX_IP_CKSUM | + PKT_TX_L4_MASK | + PKT_TX_TCP_SEG; + + return 0; +} + +int enic_set_nic_cfg(struct enic *enic, uint8_t rss_default_cpu, + uint8_t rss_hash_type, uint8_t rss_hash_bits, + uint8_t rss_base_cpu, uint8_t rss_enable, + uint8_t tso_ipid_split_en, uint8_t ig_vlan_strip_en) +{ + enum vnic_devcmd_cmd cmd; + uint64_t a0, a1; + uint32_t nic_cfg; + int wait = 1000; + + vnic_set_nic_cfg(&nic_cfg, rss_default_cpu, + rss_hash_type, rss_hash_bits, rss_base_cpu, + rss_enable, tso_ipid_split_en, ig_vlan_strip_en); + + a0 = nic_cfg; + a1 = 0; + cmd = enic->nic_cfg_chk ? CMD_NIC_CFG_CHK : CMD_NIC_CFG; + return vnic_dev_cmd(enic->vdev, cmd, &a0, &a1, wait); +} + +int enic_set_rss_key(struct enic *enic, dma_addr_t key_pa, uint64_t len) +{ + uint64_t a0 = (uint64_t)key_pa, a1 = len; + int wait = 1000; + + return vnic_dev_cmd(enic->vdev, CMD_RSS_KEY, &a0, &a1, wait); +} + +int enic_set_rss_cpu(struct enic *enic, dma_addr_t cpu_pa, uint64_t len) +{ + uint64_t a0 = (uint64_t)cpu_pa, a1 = len; + int wait = 1000; + + return vnic_dev_cmd(enic->vdev, CMD_RSS_CPU, &a0, &a1, wait); +} + +void enic_free_vnic_resources(struct enic *enic) +{ + unsigned int i; + + for (i = 0; i < enic->wq_count; i++) + vnic_wq_free(&enic->wq[i]); + for (i = 0; i < enic_vnic_rq_count(enic); i++) + if (enic->rq[i].in_use) + vnic_rq_free(&enic->rq[i]); + for (i = 0; i < enic->cq_count; i++) + vnic_cq_free(&enic->cq[i]); + for (i = 0; i < enic->intr_count; i++) + vnic_intr_free(&enic->intr[i]); +} + +void enic_get_res_counts(struct enic *enic) +{ + enic->conf_wq_count = vnic_dev_get_res_count(enic->vdev, RES_TYPE_WQ); + enic->conf_rq_count = vnic_dev_get_res_count(enic->vdev, RES_TYPE_RQ); + enic->conf_cq_count = vnic_dev_get_res_count(enic->vdev, RES_TYPE_CQ); + enic->conf_intr_count = vnic_dev_get_res_count(enic->vdev, + RES_TYPE_INTR_CTRL); + + dev_info(enic_get_dev(enic), + "vNIC resources avail: wq %d rq %d cq %d intr %d\n", + enic->conf_wq_count, enic->conf_rq_count, + enic->conf_cq_count, enic->conf_intr_count); +} diff --git a/src/spdk/dpdk/drivers/net/enic/enic_res.h b/src/spdk/dpdk/drivers/net/enic/enic_res.h new file mode 100644 index 000000000..34f15d5a4 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/enic_res.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _ENIC_RES_H_ +#define _ENIC_RES_H_ + +#include "wq_enet_desc.h" +#include "rq_enet_desc.h" +#include "vnic_wq.h" +#include "vnic_rq.h" + +#define ENIC_MIN_WQ_DESCS 64 +#define ENIC_MAX_WQ_DESCS 4096 +#define ENIC_MIN_RQ_DESCS 64 +#define ENIC_MAX_RQ_DESCS 4096 + +/* A descriptor ring has a multiple of 32 descriptors */ +#define ENIC_ALIGN_DESCS 32 +#define ENIC_ALIGN_DESCS_MASK ~(ENIC_ALIGN_DESCS - 1) + +/* Request a completion index every 32 buffers (roughly packets) */ +#define ENIC_WQ_CQ_THRESH 32 + +#define ENIC_MIN_MTU 68 + +/* Does not include (possible) inserted VLAN tag and FCS */ +#define ENIC_DEFAULT_RX_MAX_PKT_SIZE 9022 + +/* Does not include (possible) inserted VLAN tag and FCS */ +#define ENIC_TX_MAX_PKT_SIZE 9208 + +#define ENIC_MULTICAST_PERFECT_FILTERS 32 +#define ENIC_UNICAST_PERFECT_FILTERS 32 + +#define ENIC_NON_TSO_MAX_DESC 16 +#define ENIC_DEFAULT_RX_FREE_THRESH 32 +#define ENIC_TX_XMIT_MAX 64 +#define ENIC_RX_BURST_MAX 64 + +/* Defaults for dev_info.default_{rx,tx}portconf */ +#define ENIC_DEFAULT_RX_BURST 32 +#define ENIC_DEFAULT_RX_RINGS 1 +#define ENIC_DEFAULT_RX_RING_SIZE 512 +#define ENIC_DEFAULT_TX_BURST 32 +#define ENIC_DEFAULT_TX_RINGS 1 +#define ENIC_DEFAULT_TX_RING_SIZE 512 + +#define ENIC_RSS_DEFAULT_CPU 0 +#define ENIC_RSS_BASE_CPU 0 +#define ENIC_RSS_HASH_BITS 7 +#define ENIC_RSS_RETA_SIZE (1 << ENIC_RSS_HASH_BITS) +#define ENIC_RSS_HASH_KEY_SIZE 40 + +#define ENIC_SETTING(enic, f) ((enic->config.flags & VENETF_##f) ? 1 : 0) + +struct enic; + +int enic_get_vnic_config(struct enic *); +int enic_set_nic_cfg(struct enic *enic, uint8_t rss_default_cpu, + uint8_t rss_hash_type, uint8_t rss_hash_bits, + uint8_t rss_base_cpu, uint8_t rss_enable, + uint8_t tso_ipid_split_en, uint8_t ig_vlan_strip_en); +int enic_set_rss_key(struct enic *enic, dma_addr_t key_pa, uint64_t len); +int enic_set_rss_cpu(struct enic *enic, dma_addr_t cpu_pa, uint64_t len); +void enic_get_res_counts(struct enic *enic); +void enic_init_vnic_resources(struct enic *enic); +int enic_alloc_vnic_resources(struct enic *); +void enic_free_vnic_resources(struct enic *); + +#endif /* _ENIC_RES_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/enic_rxtx.c b/src/spdk/dpdk/drivers/net/enic/enic_rxtx.c new file mode 100644 index 000000000..6a8718c08 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/enic_rxtx.c @@ -0,0 +1,688 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#include <rte_mbuf.h> +#include <rte_ethdev_driver.h> +#include <rte_net.h> +#include <rte_prefetch.h> + +#include "enic_compat.h" +#include "rq_enet_desc.h" +#include "enic.h" +#include "enic_rxtx_common.h" +#include <rte_ether.h> +#include <rte_ip.h> +#include <rte_tcp.h> + +#define RTE_PMD_USE_PREFETCH + +#ifdef RTE_PMD_USE_PREFETCH +/*Prefetch a cache line into all cache levels. */ +#define rte_enic_prefetch(p) rte_prefetch0(p) +#else +#define rte_enic_prefetch(p) do {} while (0) +#endif + +#ifdef RTE_PMD_PACKET_PREFETCH +#define rte_packet_prefetch(p) rte_prefetch1(p) +#else +#define rte_packet_prefetch(p) do {} while (0) +#endif + +/* dummy receive function to replace actual function in + * order to do safe reconfiguration operations. + */ +uint16_t +enic_dummy_recv_pkts(__rte_unused void *rx_queue, + __rte_unused struct rte_mbuf **rx_pkts, + __rte_unused uint16_t nb_pkts) +{ + return 0; +} + +uint16_t +enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct vnic_rq *sop_rq = rx_queue; + struct vnic_rq *data_rq; + struct vnic_rq *rq; + struct enic *enic = vnic_dev_priv(sop_rq->vdev); + uint16_t cq_idx; + uint16_t rq_idx, max_rx; + uint16_t rq_num; + struct rte_mbuf *nmb, *rxmb; + uint16_t nb_rx = 0; + struct vnic_cq *cq; + volatile struct cq_desc *cqd_ptr; + uint8_t color; + uint8_t tnl; + uint16_t seg_length; + struct rte_mbuf *first_seg = sop_rq->pkt_first_seg; + struct rte_mbuf *last_seg = sop_rq->pkt_last_seg; + + cq = &enic->cq[enic_cq_rq(enic, sop_rq->index)]; + cq_idx = cq->to_clean; /* index of cqd, rqd, mbuf_table */ + cqd_ptr = (struct cq_desc *)(cq->ring.descs) + cq_idx; + color = cq->last_color; + + data_rq = &enic->rq[sop_rq->data_queue_idx]; + + /* Receive until the end of the ring, at most. */ + max_rx = RTE_MIN(nb_pkts, cq->ring.desc_count - cq_idx); + + while (max_rx) { + volatile struct rq_enet_desc *rqd_ptr; + struct cq_desc cqd; + uint8_t packet_error; + uint16_t ciflags; + + max_rx--; + + /* Check for pkts available */ + if ((cqd_ptr->type_color & CQ_DESC_COLOR_MASK_NOSHIFT) == color) + break; + + /* Get the cq descriptor and extract rq info from it */ + cqd = *cqd_ptr; + rq_num = cqd.q_number & CQ_DESC_Q_NUM_MASK; + rq_idx = cqd.completed_index & CQ_DESC_COMP_NDX_MASK; + + rq = &enic->rq[rq_num]; + rqd_ptr = ((struct rq_enet_desc *)rq->ring.descs) + rq_idx; + + /* allocate a new mbuf */ + nmb = rte_mbuf_raw_alloc(rq->mp); + if (nmb == NULL) { + rte_atomic64_inc(&enic->soft_stats.rx_nombuf); + break; + } + + /* A packet error means descriptor and data are untrusted */ + packet_error = enic_cq_rx_check_err(&cqd); + + /* Get the mbuf to return and replace with one just allocated */ + rxmb = rq->mbuf_ring[rq_idx]; + rq->mbuf_ring[rq_idx] = nmb; + cq_idx++; + + /* Prefetch next mbuf & desc while processing current one */ + cqd_ptr = (struct cq_desc *)(cq->ring.descs) + cq_idx; + rte_enic_prefetch(cqd_ptr); + + ciflags = enic_cq_rx_desc_ciflags( + (struct cq_enet_rq_desc *)&cqd); + + /* Push descriptor for newly allocated mbuf */ + nmb->data_off = RTE_PKTMBUF_HEADROOM; + /* + * Only the address needs to be refilled. length_type of the + * descriptor it set during initialization + * (enic_alloc_rx_queue_mbufs) and does not change. + */ + rqd_ptr->address = rte_cpu_to_le_64(nmb->buf_iova + + RTE_PKTMBUF_HEADROOM); + + /* Fill in the rest of the mbuf */ + seg_length = enic_cq_rx_desc_n_bytes(&cqd); + + if (rq->is_sop) { + first_seg = rxmb; + first_seg->pkt_len = seg_length; + } else { + first_seg->pkt_len = (uint16_t)(first_seg->pkt_len + + seg_length); + first_seg->nb_segs++; + last_seg->next = rxmb; + } + + rxmb->port = enic->port_id; + rxmb->data_len = seg_length; + + rq->rx_nb_hold++; + + if (!(enic_cq_rx_desc_eop(ciflags))) { + last_seg = rxmb; + continue; + } + + /* + * When overlay offload is enabled, CQ.fcoe indicates the + * packet is tunnelled. + */ + tnl = enic->overlay_offload && + (ciflags & CQ_ENET_RQ_DESC_FLAGS_FCOE) != 0; + /* cq rx flags are only valid if eop bit is set */ + first_seg->packet_type = + enic_cq_rx_flags_to_pkt_type(&cqd, tnl); + enic_cq_rx_to_pkt_flags(&cqd, first_seg); + + /* Wipe the outer types set by enic_cq_rx_flags_to_pkt_type() */ + if (tnl) { + first_seg->packet_type &= ~(RTE_PTYPE_L3_MASK | + RTE_PTYPE_L4_MASK); + } + if (unlikely(packet_error)) { + rte_pktmbuf_free(first_seg); + rte_atomic64_inc(&enic->soft_stats.rx_packet_errors); + continue; + } + + + /* prefetch mbuf data for caller */ + rte_packet_prefetch(RTE_PTR_ADD(first_seg->buf_addr, + RTE_PKTMBUF_HEADROOM)); + + /* store the mbuf address into the next entry of the array */ + rx_pkts[nb_rx++] = first_seg; + } + if (unlikely(cq_idx == cq->ring.desc_count)) { + cq_idx = 0; + cq->last_color ^= CQ_DESC_COLOR_MASK_NOSHIFT; + } + + sop_rq->pkt_first_seg = first_seg; + sop_rq->pkt_last_seg = last_seg; + + cq->to_clean = cq_idx; + + if ((sop_rq->rx_nb_hold + data_rq->rx_nb_hold) > + sop_rq->rx_free_thresh) { + if (data_rq->in_use) { + data_rq->posted_index = + enic_ring_add(data_rq->ring.desc_count, + data_rq->posted_index, + data_rq->rx_nb_hold); + data_rq->rx_nb_hold = 0; + } + sop_rq->posted_index = enic_ring_add(sop_rq->ring.desc_count, + sop_rq->posted_index, + sop_rq->rx_nb_hold); + sop_rq->rx_nb_hold = 0; + + rte_mb(); + if (data_rq->in_use) + iowrite32_relaxed(data_rq->posted_index, + &data_rq->ctrl->posted_index); + rte_compiler_barrier(); + iowrite32_relaxed(sop_rq->posted_index, + &sop_rq->ctrl->posted_index); + } + + + return nb_rx; +} + +uint16_t +enic_noscatter_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct rte_mbuf *mb, **rx, **rxmb; + uint16_t cq_idx, nb_rx, max_rx; + struct cq_enet_rq_desc *cqd; + struct rq_enet_desc *rqd; + unsigned int port_id; + struct vnic_cq *cq; + struct vnic_rq *rq; + struct enic *enic; + uint8_t color; + bool overlay; + bool tnl; + + rq = rx_queue; + enic = vnic_dev_priv(rq->vdev); + cq = &enic->cq[enic_cq_rq(enic, rq->index)]; + cq_idx = cq->to_clean; + + /* + * Fill up the reserve of free mbufs. Below, we restock the receive + * ring with these mbufs to avoid allocation failures. + */ + if (rq->num_free_mbufs == 0) { + if (rte_mempool_get_bulk(rq->mp, (void **)rq->free_mbufs, + ENIC_RX_BURST_MAX)) + return 0; + rq->num_free_mbufs = ENIC_RX_BURST_MAX; + } + + /* Receive until the end of the ring, at most. */ + max_rx = RTE_MIN(nb_pkts, rq->num_free_mbufs); + max_rx = RTE_MIN(max_rx, cq->ring.desc_count - cq_idx); + + cqd = (struct cq_enet_rq_desc *)(cq->ring.descs) + cq_idx; + color = cq->last_color; + rxmb = rq->mbuf_ring + cq_idx; + port_id = enic->port_id; + overlay = enic->overlay_offload; + + rx = rx_pkts; + while (max_rx) { + max_rx--; + if ((cqd->type_color & CQ_DESC_COLOR_MASK_NOSHIFT) == color) + break; + if (unlikely(cqd->bytes_written_flags & + CQ_ENET_RQ_DESC_FLAGS_TRUNCATED)) { + rte_pktmbuf_free(*rxmb++); + rte_atomic64_inc(&enic->soft_stats.rx_packet_errors); + cqd++; + continue; + } + + mb = *rxmb++; + /* prefetch mbuf data for caller */ + rte_packet_prefetch(RTE_PTR_ADD(mb->buf_addr, + RTE_PKTMBUF_HEADROOM)); + mb->data_len = cqd->bytes_written_flags & + CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; + mb->pkt_len = mb->data_len; + mb->port = port_id; + tnl = overlay && (cqd->completed_index_flags & + CQ_ENET_RQ_DESC_FLAGS_FCOE) != 0; + mb->packet_type = + enic_cq_rx_flags_to_pkt_type((struct cq_desc *)cqd, + tnl); + enic_cq_rx_to_pkt_flags((struct cq_desc *)cqd, mb); + /* Wipe the outer types set by enic_cq_rx_flags_to_pkt_type() */ + if (tnl) { + mb->packet_type &= ~(RTE_PTYPE_L3_MASK | + RTE_PTYPE_L4_MASK); + } + cqd++; + *rx++ = mb; + } + /* Number of descriptors visited */ + nb_rx = cqd - (struct cq_enet_rq_desc *)(cq->ring.descs) - cq_idx; + if (nb_rx == 0) + return 0; + rqd = ((struct rq_enet_desc *)rq->ring.descs) + cq_idx; + rxmb = rq->mbuf_ring + cq_idx; + cq_idx += nb_rx; + rq->rx_nb_hold += nb_rx; + if (unlikely(cq_idx == cq->ring.desc_count)) { + cq_idx = 0; + cq->last_color ^= CQ_DESC_COLOR_MASK_NOSHIFT; + } + cq->to_clean = cq_idx; + + memcpy(rxmb, rq->free_mbufs + ENIC_RX_BURST_MAX - rq->num_free_mbufs, + sizeof(struct rte_mbuf *) * nb_rx); + rq->num_free_mbufs -= nb_rx; + while (nb_rx) { + nb_rx--; + mb = *rxmb++; + mb->data_off = RTE_PKTMBUF_HEADROOM; + rqd->address = mb->buf_iova + RTE_PKTMBUF_HEADROOM; + rqd++; + } + if (rq->rx_nb_hold > rq->rx_free_thresh) { + rq->posted_index = enic_ring_add(rq->ring.desc_count, + rq->posted_index, + rq->rx_nb_hold); + rq->rx_nb_hold = 0; + rte_wmb(); + iowrite32_relaxed(rq->posted_index, + &rq->ctrl->posted_index); + } + + return rx - rx_pkts; +} + +static inline void enic_free_wq_bufs(struct vnic_wq *wq, + uint16_t completed_index) +{ + struct rte_mbuf *buf; + struct rte_mbuf *m, *free[ENIC_MAX_WQ_DESCS]; + unsigned int nb_to_free, nb_free = 0, i; + struct rte_mempool *pool; + unsigned int tail_idx; + unsigned int desc_count = wq->ring.desc_count; + + nb_to_free = enic_ring_sub(desc_count, wq->tail_idx, completed_index) + + 1; + tail_idx = wq->tail_idx; + pool = wq->bufs[tail_idx]->pool; + for (i = 0; i < nb_to_free; i++) { + buf = wq->bufs[tail_idx]; + m = rte_pktmbuf_prefree_seg(buf); + if (unlikely(m == NULL)) { + tail_idx = enic_ring_incr(desc_count, tail_idx); + continue; + } + + if (likely(m->pool == pool)) { + RTE_ASSERT(nb_free < ENIC_MAX_WQ_DESCS); + free[nb_free++] = m; + } else { + rte_mempool_put_bulk(pool, (void *)free, nb_free); + free[0] = m; + nb_free = 1; + pool = m->pool; + } + tail_idx = enic_ring_incr(desc_count, tail_idx); + } + + if (nb_free > 0) + rte_mempool_put_bulk(pool, (void **)free, nb_free); + + wq->tail_idx = tail_idx; + wq->ring.desc_avail += nb_to_free; +} + +unsigned int enic_cleanup_wq(__rte_unused struct enic *enic, struct vnic_wq *wq) +{ + uint16_t completed_index; + + completed_index = *((uint32_t *)wq->cqmsg_rz->addr) & 0xffff; + + if (wq->last_completed_index != completed_index) { + enic_free_wq_bufs(wq, completed_index); + wq->last_completed_index = completed_index; + } + return 0; +} + +uint16_t enic_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + struct vnic_wq *wq = (struct vnic_wq *)tx_queue; + int32_t ret; + uint16_t i; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i != nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + if (!(ol_flags & PKT_TX_TCP_SEG)) { + if (unlikely(m->pkt_len > ENIC_TX_MAX_PKT_SIZE)) { + rte_errno = EINVAL; + return i; + } + } else { + uint16_t header_len; + + header_len = m->l2_len + m->l3_len + m->l4_len; + if (m->tso_segsz + header_len > ENIC_TX_MAX_PKT_SIZE) { + rte_errno = EINVAL; + return i; + } + } + + if (ol_flags & wq->tx_offload_notsup_mask) { + rte_errno = ENOTSUP; + return i; + } +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = -ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = -ret; + return i; + } + } + + return i; +} + +uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + uint16_t index; + unsigned int pkt_len, data_len; + unsigned int nb_segs; + struct rte_mbuf *tx_pkt; + struct vnic_wq *wq = (struct vnic_wq *)tx_queue; + struct enic *enic = vnic_dev_priv(wq->vdev); + unsigned short vlan_id; + uint64_t ol_flags; + uint64_t ol_flags_mask; + unsigned int wq_desc_avail; + int head_idx; + unsigned int desc_count; + struct wq_enet_desc *descs, *desc_p, desc_tmp; + uint16_t mss; + uint8_t vlan_tag_insert; + uint8_t eop, cq; + uint64_t bus_addr; + uint8_t offload_mode; + uint16_t header_len; + uint64_t tso; + rte_atomic64_t *tx_oversized; + + enic_cleanup_wq(enic, wq); + wq_desc_avail = vnic_wq_desc_avail(wq); + head_idx = wq->head_idx; + desc_count = wq->ring.desc_count; + ol_flags_mask = PKT_TX_VLAN | PKT_TX_IP_CKSUM | PKT_TX_L4_MASK; + tx_oversized = &enic->soft_stats.tx_oversized; + + nb_pkts = RTE_MIN(nb_pkts, ENIC_TX_XMIT_MAX); + + for (index = 0; index < nb_pkts; index++) { + tx_pkt = *tx_pkts++; + pkt_len = tx_pkt->pkt_len; + data_len = tx_pkt->data_len; + ol_flags = tx_pkt->ol_flags; + nb_segs = tx_pkt->nb_segs; + tso = ol_flags & PKT_TX_TCP_SEG; + + /* drop packet if it's too big to send */ + if (unlikely(!tso && pkt_len > ENIC_TX_MAX_PKT_SIZE)) { + rte_pktmbuf_free(tx_pkt); + rte_atomic64_inc(tx_oversized); + continue; + } + + if (nb_segs > wq_desc_avail) { + if (index > 0) + goto post; + goto done; + } + + mss = 0; + vlan_id = tx_pkt->vlan_tci; + vlan_tag_insert = !!(ol_flags & PKT_TX_VLAN); + bus_addr = (dma_addr_t) + (tx_pkt->buf_iova + tx_pkt->data_off); + + descs = (struct wq_enet_desc *)wq->ring.descs; + desc_p = descs + head_idx; + + eop = (data_len == pkt_len); + offload_mode = WQ_ENET_OFFLOAD_MODE_CSUM; + header_len = 0; + + if (tso) { + header_len = tx_pkt->l2_len + tx_pkt->l3_len + + tx_pkt->l4_len; + + /* Drop if non-TCP packet or TSO seg size is too big */ + if (unlikely(header_len == 0 || ((tx_pkt->tso_segsz + + header_len) > ENIC_TX_MAX_PKT_SIZE))) { + rte_pktmbuf_free(tx_pkt); + rte_atomic64_inc(tx_oversized); + continue; + } + + offload_mode = WQ_ENET_OFFLOAD_MODE_TSO; + mss = tx_pkt->tso_segsz; + /* For tunnel, need the size of outer+inner headers */ + if (ol_flags & PKT_TX_TUNNEL_MASK) { + header_len += tx_pkt->outer_l2_len + + tx_pkt->outer_l3_len; + } + } + + if ((ol_flags & ol_flags_mask) && (header_len == 0)) { + if (ol_flags & PKT_TX_IP_CKSUM) + mss |= ENIC_CALC_IP_CKSUM; + + /* Nic uses just 1 bit for UDP and TCP */ + switch (ol_flags & PKT_TX_L4_MASK) { + case PKT_TX_TCP_CKSUM: + case PKT_TX_UDP_CKSUM: + mss |= ENIC_CALC_TCP_UDP_CKSUM; + break; + } + } + wq->cq_pend++; + cq = 0; + if (eop && wq->cq_pend >= ENIC_WQ_CQ_THRESH) { + cq = 1; + wq->cq_pend = 0; + } + wq_enet_desc_enc(&desc_tmp, bus_addr, data_len, mss, header_len, + offload_mode, eop, cq, 0, vlan_tag_insert, + vlan_id, 0); + + *desc_p = desc_tmp; + wq->bufs[head_idx] = tx_pkt; + head_idx = enic_ring_incr(desc_count, head_idx); + wq_desc_avail--; + + if (!eop) { + for (tx_pkt = tx_pkt->next; tx_pkt; tx_pkt = + tx_pkt->next) { + data_len = tx_pkt->data_len; + + wq->cq_pend++; + cq = 0; + if (tx_pkt->next == NULL) { + eop = 1; + if (wq->cq_pend >= ENIC_WQ_CQ_THRESH) { + cq = 1; + wq->cq_pend = 0; + } + } + desc_p = descs + head_idx; + bus_addr = (dma_addr_t)(tx_pkt->buf_iova + + tx_pkt->data_off); + wq_enet_desc_enc((struct wq_enet_desc *) + &desc_tmp, bus_addr, data_len, + mss, 0, offload_mode, eop, cq, + 0, vlan_tag_insert, vlan_id, + 0); + + *desc_p = desc_tmp; + wq->bufs[head_idx] = tx_pkt; + head_idx = enic_ring_incr(desc_count, head_idx); + wq_desc_avail--; + } + } + } + post: + rte_wmb(); + iowrite32_relaxed(head_idx, &wq->ctrl->posted_index); + done: + wq->ring.desc_avail = wq_desc_avail; + wq->head_idx = head_idx; + + return index; +} + +static void enqueue_simple_pkts(struct rte_mbuf **pkts, + struct wq_enet_desc *desc, + uint16_t n, + struct enic *enic) +{ + struct rte_mbuf *p; + uint16_t mss; + + while (n) { + n--; + p = *pkts++; + desc->address = p->buf_iova + p->data_off; + desc->length = p->pkt_len; + /* VLAN insert */ + desc->vlan_tag = p->vlan_tci; + desc->header_length_flags &= + ((1 << WQ_ENET_FLAGS_EOP_SHIFT) | + (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT)); + if (p->ol_flags & PKT_TX_VLAN) { + desc->header_length_flags |= + 1 << WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT; + } + /* + * Checksum offload. We use WQ_ENET_OFFLOAD_MODE_CSUM, which + * is 0, so no need to set offload_mode. + */ + mss = 0; + if (p->ol_flags & PKT_TX_IP_CKSUM) + mss |= ENIC_CALC_IP_CKSUM << WQ_ENET_MSS_SHIFT; + if (p->ol_flags & PKT_TX_L4_MASK) + mss |= ENIC_CALC_TCP_UDP_CKSUM << WQ_ENET_MSS_SHIFT; + desc->mss_loopback = mss; + + /* + * The app should not send oversized + * packets. tx_pkt_prepare includes a check as + * well. But some apps ignore the device max size and + * tx_pkt_prepare. Oversized packets cause WQ errrors + * and the NIC ends up disabling the whole WQ. So + * truncate packets.. + */ + if (unlikely(p->pkt_len > ENIC_TX_MAX_PKT_SIZE)) { + desc->length = ENIC_TX_MAX_PKT_SIZE; + rte_atomic64_inc(&enic->soft_stats.tx_oversized); + } + desc++; + } +} + +uint16_t enic_simple_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + unsigned int head_idx, desc_count; + struct wq_enet_desc *desc; + struct vnic_wq *wq; + struct enic *enic; + uint16_t rem, n; + + wq = (struct vnic_wq *)tx_queue; + enic = vnic_dev_priv(wq->vdev); + enic_cleanup_wq(enic, wq); + /* Will enqueue this many packets in this call */ + nb_pkts = RTE_MIN(nb_pkts, wq->ring.desc_avail); + if (nb_pkts == 0) + return 0; + + head_idx = wq->head_idx; + desc_count = wq->ring.desc_count; + + /* Descriptors until the end of the ring */ + n = desc_count - head_idx; + n = RTE_MIN(nb_pkts, n); + + /* Save mbuf pointers to free later */ + memcpy(wq->bufs + head_idx, tx_pkts, sizeof(struct rte_mbuf *) * n); + + /* Enqueue until the ring end */ + rem = nb_pkts - n; + desc = ((struct wq_enet_desc *)wq->ring.descs) + head_idx; + enqueue_simple_pkts(tx_pkts, desc, n, enic); + + /* Wrap to the start of the ring */ + if (rem) { + tx_pkts += n; + memcpy(wq->bufs, tx_pkts, sizeof(struct rte_mbuf *) * rem); + desc = (struct wq_enet_desc *)wq->ring.descs; + enqueue_simple_pkts(tx_pkts, desc, rem, enic); + } + rte_wmb(); + + /* Update head_idx and desc_avail */ + wq->ring.desc_avail -= nb_pkts; + head_idx += nb_pkts; + if (head_idx >= desc_count) + head_idx -= desc_count; + wq->head_idx = head_idx; + iowrite32_relaxed(head_idx, &wq->ctrl->posted_index); + return nb_pkts; +} diff --git a/src/spdk/dpdk/drivers/net/enic/enic_rxtx_common.h b/src/spdk/dpdk/drivers/net/enic/enic_rxtx_common.h new file mode 100644 index 000000000..d8668d189 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/enic_rxtx_common.h @@ -0,0 +1,275 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _ENIC_RXTX_COMMON_H_ +#define _ENIC_RXTX_COMMON_H_ + +#include <rte_byteorder.h> + +static inline uint16_t +enic_cq_rx_desc_ciflags(struct cq_enet_rq_desc *crd) +{ + return rte_le_to_cpu_16(crd->completed_index_flags) & + ~CQ_DESC_COMP_NDX_MASK; +} + +static inline uint16_t +enic_cq_rx_desc_bwflags(struct cq_enet_rq_desc *crd) +{ + return rte_le_to_cpu_16(crd->bytes_written_flags) & + ~CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; +} + +static inline uint8_t +enic_cq_rx_desc_packet_error(uint16_t bwflags) +{ + return (bwflags & CQ_ENET_RQ_DESC_FLAGS_TRUNCATED) == + CQ_ENET_RQ_DESC_FLAGS_TRUNCATED; +} + +static inline uint8_t +enic_cq_rx_desc_eop(uint16_t ciflags) +{ + return (ciflags & CQ_ENET_RQ_DESC_FLAGS_EOP) + == CQ_ENET_RQ_DESC_FLAGS_EOP; +} + +static inline uint8_t +enic_cq_rx_desc_csum_not_calc(struct cq_enet_rq_desc *cqrd) +{ + return (rte_le_to_cpu_16(cqrd->q_number_rss_type_flags) & + CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC) == + CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC; +} + +static inline uint8_t +enic_cq_rx_desc_ipv4_csum_ok(struct cq_enet_rq_desc *cqrd) +{ + return (cqrd->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK) == + CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK; +} + +static inline uint8_t +enic_cq_rx_desc_tcp_udp_csum_ok(struct cq_enet_rq_desc *cqrd) +{ + return (cqrd->flags & CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK) == + CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK; +} + +static inline uint8_t +enic_cq_rx_desc_rss_type(struct cq_enet_rq_desc *cqrd) +{ + return (uint8_t)((rte_le_to_cpu_16(cqrd->q_number_rss_type_flags) >> + CQ_DESC_Q_NUM_BITS) & CQ_ENET_RQ_DESC_RSS_TYPE_MASK); +} + +static inline uint32_t +enic_cq_rx_desc_rss_hash(struct cq_enet_rq_desc *cqrd) +{ + return rte_le_to_cpu_32(cqrd->rss_hash); +} + +static inline uint16_t +enic_cq_rx_desc_vlan(struct cq_enet_rq_desc *cqrd) +{ + return rte_le_to_cpu_16(cqrd->vlan); +} + +static inline uint16_t +enic_cq_rx_desc_n_bytes(struct cq_desc *cqd) +{ + struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd; + return rte_le_to_cpu_16(cqrd->bytes_written_flags) & + CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; +} + + +static inline uint8_t +enic_cq_rx_check_err(struct cq_desc *cqd) +{ + struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd; + uint16_t bwflags; + + bwflags = enic_cq_rx_desc_bwflags(cqrd); + if (unlikely(enic_cq_rx_desc_packet_error(bwflags))) + return 1; + return 0; +} + +/* Lookup table to translate RX CQ flags to mbuf flags. */ +static uint32_t +enic_cq_rx_flags_to_pkt_type(struct cq_desc *cqd, uint8_t tnl) +{ + struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd; + uint8_t cqrd_flags = cqrd->flags; + /* + * Odd-numbered entries are for tunnel packets. All packet type info + * applies to the inner packet, and there is no info on the outer + * packet. The outer flags in these entries exist only to avoid + * changing enic_cq_rx_to_pkt_flags(). They are cleared from mbuf + * afterwards. + * + * Also, as there is no tunnel type info (VXLAN, NVGRE, or GENEVE), set + * RTE_PTYPE_TUNNEL_GRENAT.. + */ + static const uint32_t cq_type_table[128] __rte_cache_aligned = { + [0x00] = RTE_PTYPE_UNKNOWN, + [0x01] = RTE_PTYPE_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER, + [0x20] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG, + [0x21] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [0x22] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP, + [0x23] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + [0x24] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP, + [0x25] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [0x60] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, + [0x61] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [0x62] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, + [0x63] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [0x64] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, + [0x65] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [0x10] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG, + [0x11] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [0x12] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP, + [0x13] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + [0x14] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP, + [0x15] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [0x50] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, + [0x51] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [0x52] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, + [0x53] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [0x54] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, + [0x55] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + /* All others reserved */ + }; + cqrd_flags &= CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT + | CQ_ENET_RQ_DESC_FLAGS_IPV4 | CQ_ENET_RQ_DESC_FLAGS_IPV6 + | CQ_ENET_RQ_DESC_FLAGS_TCP | CQ_ENET_RQ_DESC_FLAGS_UDP; + return cq_type_table[cqrd_flags + tnl]; +} + +static void +enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf) +{ + struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd; + uint16_t bwflags, pkt_flags = 0, vlan_tci; + bwflags = enic_cq_rx_desc_bwflags(cqrd); + vlan_tci = enic_cq_rx_desc_vlan(cqrd); + + /* VLAN STRIPPED flag. The L2 packet type updated here also */ + if (bwflags & CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) { + pkt_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; + mbuf->packet_type |= RTE_PTYPE_L2_ETHER; + } else { + if (vlan_tci != 0) { + pkt_flags |= PKT_RX_VLAN; + mbuf->packet_type |= RTE_PTYPE_L2_ETHER_VLAN; + } else { + mbuf->packet_type |= RTE_PTYPE_L2_ETHER; + } + } + mbuf->vlan_tci = vlan_tci; + + if ((cqd->type_color & CQ_DESC_TYPE_MASK) == CQ_DESC_TYPE_CLASSIFIER) { + struct cq_enet_rq_clsf_desc *clsf_cqd; + uint16_t filter_id; + clsf_cqd = (struct cq_enet_rq_clsf_desc *)cqd; + filter_id = clsf_cqd->filter_id; + if (filter_id) { + pkt_flags |= PKT_RX_FDIR; + if (filter_id != ENIC_MAGIC_FILTER_ID) { + /* filter_id = mark id + 1, so subtract 1 */ + mbuf->hash.fdir.hi = filter_id - 1; + pkt_flags |= PKT_RX_FDIR_ID; + } + } + } else if (enic_cq_rx_desc_rss_type(cqrd)) { + /* RSS flag */ + pkt_flags |= PKT_RX_RSS_HASH; + mbuf->hash.rss = enic_cq_rx_desc_rss_hash(cqrd); + } + + /* checksum flags */ + if (mbuf->packet_type & (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6)) { + if (!enic_cq_rx_desc_csum_not_calc(cqrd)) { + uint32_t l4_flags; + l4_flags = mbuf->packet_type & RTE_PTYPE_L4_MASK; + + /* + * When overlay offload is enabled, the NIC may + * set ipv4_csum_ok=1 if the inner packet is IPv6.. + * So, explicitly check for IPv4 before checking + * ipv4_csum_ok. + */ + if (mbuf->packet_type & RTE_PTYPE_L3_IPV4) { + if (enic_cq_rx_desc_ipv4_csum_ok(cqrd)) + pkt_flags |= PKT_RX_IP_CKSUM_GOOD; + else + pkt_flags |= PKT_RX_IP_CKSUM_BAD; + } + + if (l4_flags == RTE_PTYPE_L4_UDP || + l4_flags == RTE_PTYPE_L4_TCP) { + if (enic_cq_rx_desc_tcp_udp_csum_ok(cqrd)) + pkt_flags |= PKT_RX_L4_CKSUM_GOOD; + else + pkt_flags |= PKT_RX_L4_CKSUM_BAD; + } + } + } + + mbuf->ol_flags = pkt_flags; +} + +#endif /* _ENIC_RXTX_COMMON_H_ */ diff --git a/src/spdk/dpdk/drivers/net/enic/enic_rxtx_vec_avx2.c b/src/spdk/dpdk/drivers/net/enic/enic_rxtx_vec_avx2.c new file mode 100644 index 000000000..36d4d0dea --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/enic_rxtx_vec_avx2.c @@ -0,0 +1,830 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#include <rte_mbuf.h> +#include <rte_ethdev_driver.h> + +#include "enic_compat.h" +#include "rq_enet_desc.h" +#include "enic.h" +#include "enic_rxtx_common.h" + +#include <x86intrin.h> + +static struct rte_mbuf * +rx_one(struct cq_enet_rq_desc *cqd, struct rte_mbuf *mb, struct enic *enic) +{ + bool tnl; + + *(uint64_t *)&mb->rearm_data = enic->mbuf_initializer; + mb->data_len = cqd->bytes_written_flags & + CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; + mb->pkt_len = mb->data_len; + tnl = enic->overlay_offload && (cqd->completed_index_flags & + CQ_ENET_RQ_DESC_FLAGS_FCOE) != 0; + mb->packet_type = + enic_cq_rx_flags_to_pkt_type((struct cq_desc *)cqd, tnl); + enic_cq_rx_to_pkt_flags((struct cq_desc *)cqd, mb); + /* Wipe the outer types set by enic_cq_rx_flags_to_pkt_type() */ + if (tnl) { + mb->packet_type &= ~(RTE_PTYPE_L3_MASK | + RTE_PTYPE_L4_MASK); + } + return mb; +} + +static uint16_t +enic_noscatter_vec_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct rte_mbuf **rx, **rxmb; + uint16_t cq_idx, nb_rx, max_rx; + struct cq_enet_rq_desc *cqd; + struct rq_enet_desc *rqd; + struct vnic_cq *cq; + struct vnic_rq *rq; + struct enic *enic; + uint8_t color; + + rq = rx_queue; + enic = vnic_dev_priv(rq->vdev); + cq = &enic->cq[enic_cq_rq(enic, rq->index)]; + cq_idx = cq->to_clean; + + /* + * Fill up the reserve of free mbufs. Below, we restock the receive + * ring with these mbufs to avoid allocation failures. + */ + if (rq->num_free_mbufs == 0) { + if (rte_mempool_get_bulk(rq->mp, (void **)rq->free_mbufs, + ENIC_RX_BURST_MAX)) + return 0; + rq->num_free_mbufs = ENIC_RX_BURST_MAX; + } + /* Receive until the end of the ring, at most. */ + max_rx = RTE_MIN(nb_pkts, rq->num_free_mbufs); + max_rx = RTE_MIN(max_rx, cq->ring.desc_count - cq_idx); + + rxmb = rq->mbuf_ring + cq_idx; + color = cq->last_color; + cqd = (struct cq_enet_rq_desc *)(cq->ring.descs) + cq_idx; + rx = rx_pkts; + if (max_rx == 0 || + (cqd->type_color & CQ_DESC_COLOR_MASK_NOSHIFT) == color) + return 0; + + /* Step 1: Process one packet to do aligned 256-bit load below */ + if (cq_idx & 0x1) { + if (unlikely(cqd->bytes_written_flags & + CQ_ENET_RQ_DESC_FLAGS_TRUNCATED)) { + rte_pktmbuf_free(*rxmb++); + rte_atomic64_inc(&enic->soft_stats.rx_packet_errors); + } else { + *rx++ = rx_one(cqd, *rxmb++, enic); + } + cqd++; + max_rx--; + } + + const __m256i mask = + _mm256_set_epi8(/* Second descriptor */ + 0xff, /* type_color */ + (CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT | + CQ_ENET_RQ_DESC_FLAGS_IPV4 | + CQ_ENET_RQ_DESC_FLAGS_IPV6 | + CQ_ENET_RQ_DESC_FLAGS_TCP | + CQ_ENET_RQ_DESC_FLAGS_UDP), /* flags */ + 0, 0, /* checksum_fcoe */ + 0xff, 0xff, /* vlan */ + 0x3f, 0xff, /* bytes_written_flags */ + 0xff, 0xff, 0xff, 0xff, /* rss_hash */ + 0xff, 0xff, /* q_number_rss_type_flags */ + 0, 0, /* completed_index_flags */ + /* First descriptor */ + 0xff, /* type_color */ + (CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT | + CQ_ENET_RQ_DESC_FLAGS_IPV4 | + CQ_ENET_RQ_DESC_FLAGS_IPV6 | + CQ_ENET_RQ_DESC_FLAGS_TCP | + CQ_ENET_RQ_DESC_FLAGS_UDP), /* flags */ + 0, 0, /* checksum_fcoe */ + 0xff, 0xff, /* vlan */ + 0x3f, 0xff, /* bytes_written_flags */ + 0xff, 0xff, 0xff, 0xff, /* rss_hash */ + 0xff, 0xff, /* q_number_rss_type_flags */ + 0, 0 /* completed_index_flags */ + ); + const __m256i shuffle_mask = + _mm256_set_epi8(/* Second descriptor */ + 7, 6, 5, 4, /* rss = rss_hash */ + 11, 10, /* vlan_tci = vlan */ + 9, 8, /* data_len = bytes_written */ + 0x80, 0x80, 9, 8, /* pkt_len = bytes_written */ + 0x80, 0x80, 0x80, 0x80, /* packet_type = 0 */ + /* First descriptor */ + 7, 6, 5, 4, /* rss = rss_hash */ + 11, 10, /* vlan_tci = vlan */ + 9, 8, /* data_len = bytes_written */ + 0x80, 0x80, 9, 8, /* pkt_len = bytes_written */ + 0x80, 0x80, 0x80, 0x80 /* packet_type = 0 */ + ); + /* Used to collect 8 flags from 8 desc into one register */ + const __m256i flags_shuffle_mask = + _mm256_set_epi8(/* Second descriptor */ + 1, 3, 9, 14, + 1, 3, 9, 14, + 1, 3, 9, 14, + 1, 3, 9, 14, + /* First descriptor */ + 1, 3, 9, 14, + 1, 3, 9, 14, + 1, 3, 9, 14, + /* + * Byte 3: upper byte of completed_index_flags + * bit 5 = fcoe (tunnel) + * Byte 2: upper byte of q_number_rss_type_flags + * bits 2,3,4,5 = rss type + * bit 6 = csum_not_calc + * Byte 1: upper byte of bytes_written_flags + * bit 6 = truncated + * bit 7 = vlan stripped + * Byte 0: flags + */ + 1, 3, 9, 14 + ); + /* Used to collect 8 VLAN IDs from 8 desc into one register */ + const __m256i vlan_shuffle_mask = + _mm256_set_epi8(/* Second descriptor */ + 0x80, 0x80, 11, 10, + 0x80, 0x80, 11, 10, + 0x80, 0x80, 11, 10, + 0x80, 0x80, 11, 10, + /* First descriptor */ + 0x80, 0x80, 11, 10, + 0x80, 0x80, 11, 10, + 0x80, 0x80, 11, 10, + 0x80, 0x80, 11, 10); + /* PKT_RX_RSS_HASH is 1<<1 so fits in 8-bit integer */ + const __m256i rss_shuffle = + _mm256_set_epi8(/* second 128 bits */ + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + 0, /* rss_types = 0 */ + /* first 128 bits */ + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + 0 /* rss_types = 0 */); + /* + * VLAN offload flags. + * shuffle index: + * vlan_stripped => bit 0 + * vlan_id == 0 => bit 1 + */ + const __m256i vlan_shuffle = + _mm256_set_epi32(0, 0, 0, 0, + PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, 0, + PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, PKT_RX_VLAN); + /* Use the same shuffle index as vlan_shuffle */ + const __m256i vlan_ptype_shuffle = + _mm256_set_epi32(0, 0, 0, 0, + RTE_PTYPE_L2_ETHER, + RTE_PTYPE_L2_ETHER, + RTE_PTYPE_L2_ETHER, + RTE_PTYPE_L2_ETHER_VLAN); + /* + * CKSUM flags. Shift right so they fit int 8-bit integers. + * shuffle index: + * ipv4_csum_ok => bit 3 + * ip4 => bit 2 + * tcp_or_udp => bit 1 + * tcp_udp_csum_ok => bit 0 + */ + const __m256i csum_shuffle = + _mm256_set_epi8(/* second 128 bits */ + /* 1111 ip4+ip4_ok+l4+l4_ok */ + ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1), + /* 1110 ip4_ok+ip4+l4+!l4_ok */ + ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1), + (PKT_RX_IP_CKSUM_GOOD >> 1), /* 1101 ip4+ip4_ok */ + (PKT_RX_IP_CKSUM_GOOD >> 1), /* 1100 ip4_ok+ip4 */ + (PKT_RX_L4_CKSUM_GOOD >> 1), /* 1011 l4+l4_ok */ + (PKT_RX_L4_CKSUM_BAD >> 1), /* 1010 l4+!l4_ok */ + 0, /* 1001 */ + 0, /* 1000 */ + /* 0111 !ip4_ok+ip4+l4+l4_ok */ + ((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD) >> 1), + /* 0110 !ip4_ok+ip4+l4+!l4_ok */ + ((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> 1), + (PKT_RX_IP_CKSUM_BAD >> 1), /* 0101 !ip4_ok+ip4 */ + (PKT_RX_IP_CKSUM_BAD >> 1), /* 0100 !ip4_ok+ip4 */ + (PKT_RX_L4_CKSUM_GOOD >> 1), /* 0011 l4+l4_ok */ + (PKT_RX_L4_CKSUM_BAD >> 1), /* 0010 l4+!l4_ok */ + 0, /* 0001 */ + 0, /* 0000 */ + /* first 128 bits */ + ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1), + ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1), + (PKT_RX_IP_CKSUM_GOOD >> 1), + (PKT_RX_IP_CKSUM_GOOD >> 1), + (PKT_RX_L4_CKSUM_GOOD >> 1), + (PKT_RX_L4_CKSUM_BAD >> 1), + 0, 0, + ((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD) >> 1), + ((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> 1), + (PKT_RX_IP_CKSUM_BAD >> 1), + (PKT_RX_IP_CKSUM_BAD >> 1), + (PKT_RX_L4_CKSUM_GOOD >> 1), + (PKT_RX_L4_CKSUM_BAD >> 1), + 0, 0); + /* + * Non-fragment PTYPEs. + * Shuffle 4-bit index: + * ip6 => bit 0 + * ip4 => bit 1 + * udp => bit 2 + * tcp => bit 3 + * bit + * 3 2 1 0 + * ------- + * 0 0 0 0 unknown + * 0 0 0 1 ip6 | nonfrag + * 0 0 1 0 ip4 | nonfrag + * 0 0 1 1 unknown + * 0 1 0 0 unknown + * 0 1 0 1 ip6 | udp + * 0 1 1 0 ip4 | udp + * 0 1 1 1 unknown + * 1 0 0 0 unknown + * 1 0 0 1 ip6 | tcp + * 1 0 1 0 ip4 | tcp + * 1 0 1 1 unknown + * 1 1 0 0 unknown + * 1 1 0 1 unknown + * 1 1 1 0 unknown + * 1 1 1 1 unknown + * + * PTYPEs do not fit in 8 bits, so shift right 4.. + */ + const __m256i nonfrag_ptype_shuffle = + _mm256_set_epi8(/* second 128 bits */ + RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG) >> 4, + RTE_PTYPE_UNKNOWN, + /* first 128 bits */ + RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG) >> 4, + RTE_PTYPE_UNKNOWN); + /* Fragment PTYPEs. Use the same shuffle index as above. */ + const __m256i frag_ptype_shuffle = + _mm256_set_epi8(/* second 128 bits */ + RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + RTE_PTYPE_UNKNOWN, + /* first 128 bits */ + RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + RTE_PTYPE_UNKNOWN); + /* + * Tunnel PTYPEs. Use the same shuffle index as above. + * L4 types are not part of this table. They come from non-tunnel + * types above. + */ + const __m256i tnl_l3_ptype_shuffle = + _mm256_set_epi8(/* second 128 bits */ + RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16, + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16, + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16, + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16, + RTE_PTYPE_UNKNOWN, + /* first 128 bits */ + RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16, + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16, + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16, + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16, + RTE_PTYPE_UNKNOWN); + + const __m256i mbuf_init = _mm256_set_epi64x(0, enic->mbuf_initializer, + 0, enic->mbuf_initializer); + + /* + * --- cq desc fields --- offset + * completed_index_flags - 0 use: fcoe + * q_number_rss_type_flags - 2 use: rss types, csum_not_calc + * rss_hash - 4 ==> mbuf.hash.rss + * bytes_written_flags - 8 ==> mbuf.pkt_len,data_len + * use: truncated, vlan_stripped + * vlan - 10 ==> mbuf.vlan_tci + * checksum_fcoe - 12 (unused) + * flags - 14 use: all bits + * type_color - 15 (unused) + * + * --- mbuf fields --- offset + * rearm_data ---- 16 + * data_off - 0 (mbuf_init) -+ + * refcnt - 2 (mbuf_init) | + * nb_segs - 4 (mbuf_init) | 16B 128b + * port - 6 (mbuf_init) | + * ol_flag - 8 (from cqd) -+ + * rx_descriptor_fields1 ---- 32 + * packet_type - 0 (from cqd) -+ + * pkt_len - 4 (from cqd) | + * data_len - 8 (from cqd) | 16B 128b + * vlan_tci - 10 (from cqd) | + * rss - 12 (from cqd) -+ + */ + + __m256i overlay_enabled = + _mm256_set1_epi32((uint32_t)enic->overlay_offload); + + /* Step 2: Process 8 packets per loop using SIMD */ + while (max_rx > 7 && (((cqd + 7)->type_color & + CQ_DESC_COLOR_MASK_NOSHIFT) != color)) { + /* Load 8 16B CQ descriptors */ + __m256i cqd01 = _mm256_load_si256((void *)cqd); + __m256i cqd23 = _mm256_load_si256((void *)(cqd + 2)); + __m256i cqd45 = _mm256_load_si256((void *)(cqd + 4)); + __m256i cqd67 = _mm256_load_si256((void *)(cqd + 6)); + /* Copy 8 mbuf pointers to rx_pkts */ + _mm256_storeu_si256((void *)rx, + _mm256_loadu_si256((void *)rxmb)); + _mm256_storeu_si256((void *)(rx + 4), + _mm256_loadu_si256((void *)(rxmb + 4))); + + /* + * Collect 8 flags (each 32 bits) into one register. + * 4 shuffles, 3 blends, 1 permute for 8 desc: 1 inst/desc + */ + __m256i flags01 = + _mm256_shuffle_epi8(cqd01, flags_shuffle_mask); + /* + * Shuffle above produces 8 x 32-bit flags for 8 descriptors + * in this order: 0, 0, 0, 0, 1, 1, 1, 1 + * The duplicates in each 128-bit lane simplifies blending + * below. + */ + __m256i flags23 = + _mm256_shuffle_epi8(cqd23, flags_shuffle_mask); + __m256i flags45 = + _mm256_shuffle_epi8(cqd45, flags_shuffle_mask); + __m256i flags67 = + _mm256_shuffle_epi8(cqd67, flags_shuffle_mask); + /* 1st blend produces flags for desc: 0, 2, 0, 0, 1, 3, 1, 1 */ + __m256i flags0_3 = _mm256_blend_epi32(flags01, flags23, 0x22); + /* 2nd blend produces flags for desc: 4, 4, 4, 6, 5, 5, 5, 7 */ + __m256i flags4_7 = _mm256_blend_epi32(flags45, flags67, 0x88); + /* 3rd blend produces flags for desc: 0, 2, 4, 6, 1, 3, 5, 7 */ + __m256i flags0_7 = _mm256_blend_epi32(flags0_3, flags4_7, 0xcc); + /* + * Swap to reorder flags in this order: 1, 3, 5, 7, 0, 2, 4, 6 + * This order simplifies blend operations way below that + * produce 'rearm' data for each mbuf. + */ + flags0_7 = _mm256_permute4x64_epi64(flags0_7, + (1 << 6) + (0 << 4) + (3 << 2) + 2); + + /* + * Check truncated bits and bail out early on. + * 6 avx inst, 1 or, 1 if-then-else for 8 desc: 1 inst/desc + */ + __m256i trunc = + _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 17), 31); + trunc = _mm256_add_epi64(trunc, _mm256_permute4x64_epi64(trunc, + (1 << 6) + (0 << 4) + (3 << 2) + 2)); + /* 0:63 contains 1+3+0+2 and 64:127 contains 5+7+4+6 */ + if (_mm256_extract_epi64(trunc, 0) || + _mm256_extract_epi64(trunc, 1)) + break; + + /* + * Compute PKT_RX_RSS_HASH. + * Use 2 shifts and 1 shuffle for 8 desc: 0.375 inst/desc + * RSS types in byte 0, 4, 8, 12, 16, 20, 24, 28 + * Everything else is zero. + */ + __m256i rss_types = + _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 10), 28); + /* + * RSS flags (PKT_RX_RSS_HASH) are in + * byte 0, 4, 8, 12, 16, 20, 24, 28 + * Everything else is zero. + */ + __m256i rss_flags = _mm256_shuffle_epi8(rss_shuffle, rss_types); + + /* + * Compute CKSUM flags. First build the index and then + * use it to shuffle csum_shuffle. + * 20 instructions including const loads: 2.5 inst/desc + */ + /* + * csum_not_calc (bit 22) + * csum_not_calc (0) => 0xffffffff + * csum_not_calc (1) => 0x0 + */ + const __m256i zero4 = _mm256_setzero_si256(); + const __m256i mask22 = _mm256_set1_epi32(0x400000); + __m256i csum_not_calc = _mm256_cmpeq_epi32(zero4, + _mm256_and_si256(flags0_7, mask22)); + /* + * (tcp|udp) && !fragment => bit 1 + * tcp = bit 2, udp = bit 1, frag = bit 6 + */ + const __m256i mask1 = _mm256_set1_epi32(0x2); + __m256i tcp_udp = + _mm256_andnot_si256(_mm256_srli_epi32(flags0_7, 5), + _mm256_or_si256(flags0_7, + _mm256_srli_epi32(flags0_7, 1))); + tcp_udp = _mm256_and_si256(tcp_udp, mask1); + /* ipv4 (bit 5) => bit 2 */ + const __m256i mask2 = _mm256_set1_epi32(0x4); + __m256i ipv4 = _mm256_and_si256(mask2, + _mm256_srli_epi32(flags0_7, 3)); + /* + * ipv4_csum_ok (bit 3) => bit 3 + * tcp_udp_csum_ok (bit 0) => bit 0 + * 0x9 + */ + const __m256i mask0_3 = _mm256_set1_epi32(0x9); + __m256i csum_idx = _mm256_and_si256(flags0_7, mask0_3); + csum_idx = _mm256_and_si256(csum_not_calc, + _mm256_or_si256(_mm256_or_si256(csum_idx, ipv4), + tcp_udp)); + __m256i csum_flags = + _mm256_shuffle_epi8(csum_shuffle, csum_idx); + /* Shift left to restore CKSUM flags. See csum_shuffle. */ + csum_flags = _mm256_slli_epi32(csum_flags, 1); + /* Combine csum flags and offload flags: 0.125 inst/desc */ + rss_flags = _mm256_or_si256(rss_flags, csum_flags); + + /* + * Collect 8 VLAN IDs and compute vlan_id != 0 on each. + * 4 shuffles, 3 blends, 1 permute, 1 cmp, 1 sub for 8 desc: + * 1.25 inst/desc + */ + __m256i vlan01 = _mm256_shuffle_epi8(cqd01, vlan_shuffle_mask); + __m256i vlan23 = _mm256_shuffle_epi8(cqd23, vlan_shuffle_mask); + __m256i vlan45 = _mm256_shuffle_epi8(cqd45, vlan_shuffle_mask); + __m256i vlan67 = _mm256_shuffle_epi8(cqd67, vlan_shuffle_mask); + __m256i vlan0_3 = _mm256_blend_epi32(vlan01, vlan23, 0x22); + __m256i vlan4_7 = _mm256_blend_epi32(vlan45, vlan67, 0x88); + /* desc: 0, 2, 4, 6, 1, 3, 5, 7 */ + __m256i vlan0_7 = _mm256_blend_epi32(vlan0_3, vlan4_7, 0xcc); + /* desc: 1, 3, 5, 7, 0, 2, 4, 6 */ + vlan0_7 = _mm256_permute4x64_epi64(vlan0_7, + (1 << 6) + (0 << 4) + (3 << 2) + 2); + /* + * Compare 0 == vlan_id produces 0xffffffff (-1) if + * vlan 0 and 0 if vlan non-0. Then subtracting the + * result from 0 produces 0 - (-1) = 1 for vlan 0, and + * 0 - 0 = 0 for vlan non-0. + */ + vlan0_7 = _mm256_cmpeq_epi32(zero4, vlan0_7); + /* vlan_id != 0 => 0, vlan_id == 0 => 1 */ + vlan0_7 = _mm256_sub_epi32(zero4, vlan0_7); + + /* + * Compute PKT_RX_VLAN and PKT_RX_VLAN_STRIPPED. + * Use 3 shifts, 1 or, 1 shuffle for 8 desc: 0.625 inst/desc + * VLAN offload flags in byte 0, 4, 8, 12, 16, 20, 24, 28 + * Everything else is zero. + */ + __m256i vlan_idx = + _mm256_or_si256(/* vlan_stripped => bit 0 */ + _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, + 16), 31), + /* (vlan_id == 0) => bit 1 */ + _mm256_slli_epi32(vlan0_7, 1)); + /* + * The index captures 4 cases. + * stripped, id = 0 ==> 11b = 3 + * stripped, id != 0 ==> 01b = 1 + * not strip, id == 0 ==> 10b = 2 + * not strip, id != 0 ==> 00b = 0 + */ + __m256i vlan_flags = _mm256_permutevar8x32_epi32(vlan_shuffle, + vlan_idx); + /* Combine vlan and offload flags: 0.125 inst/desc */ + rss_flags = _mm256_or_si256(rss_flags, vlan_flags); + + /* + * Compute non-tunnel PTYPEs. + * 17 inst / 8 desc = 2.125 inst/desc + */ + /* ETHER and ETHER_VLAN */ + __m256i vlan_ptype = + _mm256_permutevar8x32_epi32(vlan_ptype_shuffle, + vlan_idx); + /* Build the ptype index from flags */ + tcp_udp = _mm256_slli_epi32(flags0_7, 29); + tcp_udp = _mm256_slli_epi32(_mm256_srli_epi32(tcp_udp, 30), 2); + __m256i ip4_ip6 = + _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 26), 30); + __m256i ptype_idx = _mm256_or_si256(tcp_udp, ip4_ip6); + __m256i frag_bit = + _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 25), 31); + __m256i nonfrag_ptype = + _mm256_shuffle_epi8(nonfrag_ptype_shuffle, ptype_idx); + __m256i frag_ptype = + _mm256_shuffle_epi8(frag_ptype_shuffle, ptype_idx); + /* + * Zero out the unwanted types and combine the remaining bits. + * The effect is same as selecting non-frag or frag types + * depending on the frag bit. + */ + nonfrag_ptype = _mm256_and_si256(nonfrag_ptype, + _mm256_cmpeq_epi32(zero4, frag_bit)); + frag_ptype = _mm256_and_si256(frag_ptype, + _mm256_cmpgt_epi32(frag_bit, zero4)); + __m256i ptype = _mm256_or_si256(nonfrag_ptype, frag_ptype); + ptype = _mm256_slli_epi32(ptype, 4); + /* + * Compute tunnel PTYPEs. + * 15 inst / 8 desc = 1.875 inst/desc + */ + __m256i tnl_l3_ptype = + _mm256_shuffle_epi8(tnl_l3_ptype_shuffle, ptype_idx); + tnl_l3_ptype = _mm256_slli_epi32(tnl_l3_ptype, 16); + /* + * Shift non-tunnel L4 types to make them tunnel types. + * RTE_PTYPE_L4_TCP << 16 == RTE_PTYPE_INNER_L4_TCP + */ + __m256i tnl_l4_ptype = + _mm256_slli_epi32(_mm256_and_si256(ptype, + _mm256_set1_epi32(RTE_PTYPE_L4_MASK)), 16); + __m256i tnl_ptype = + _mm256_or_si256(tnl_l3_ptype, tnl_l4_ptype); + tnl_ptype = _mm256_or_si256(tnl_ptype, + _mm256_set1_epi32(RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER)); + /* + * Select non-tunnel or tunnel types by zeroing out the + * unwanted ones. + */ + __m256i tnl_flags = _mm256_and_si256(overlay_enabled, + _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 2), 31)); + tnl_ptype = _mm256_and_si256(tnl_ptype, + _mm256_sub_epi32(zero4, tnl_flags)); + ptype = _mm256_and_si256(ptype, + _mm256_cmpeq_epi32(zero4, tnl_flags)); + /* + * Combine types and swap to have ptypes in the same order + * as desc. + * desc: 0 2 4 6 1 3 5 7 + * 3 inst / 8 desc = 0.375 inst/desc + */ + ptype = _mm256_or_si256(ptype, tnl_ptype); + ptype = _mm256_or_si256(ptype, vlan_ptype); + ptype = _mm256_permute4x64_epi64(ptype, + (1 << 6) + (0 << 4) + (3 << 2) + 2); + + /* + * Mask packet length. + * Use 4 ands: 0.5 instructions/desc + */ + cqd01 = _mm256_and_si256(cqd01, mask); + cqd23 = _mm256_and_si256(cqd23, mask); + cqd45 = _mm256_and_si256(cqd45, mask); + cqd67 = _mm256_and_si256(cqd67, mask); + /* + * Shuffle. Two 16B sets of the mbuf fields. + * packet_type, pkt_len, data_len, vlan_tci, rss + */ + __m256i rearm01 = _mm256_shuffle_epi8(cqd01, shuffle_mask); + __m256i rearm23 = _mm256_shuffle_epi8(cqd23, shuffle_mask); + __m256i rearm45 = _mm256_shuffle_epi8(cqd45, shuffle_mask); + __m256i rearm67 = _mm256_shuffle_epi8(cqd67, shuffle_mask); + + /* + * Blend in ptypes + * 4 blends and 3 shuffles for 8 desc: 0.875 inst/desc + */ + rearm01 = _mm256_blend_epi32(rearm01, ptype, 0x11); + rearm23 = _mm256_blend_epi32(rearm23, + _mm256_shuffle_epi32(ptype, 1), 0x11); + rearm45 = _mm256_blend_epi32(rearm45, + _mm256_shuffle_epi32(ptype, 2), 0x11); + rearm67 = _mm256_blend_epi32(rearm67, + _mm256_shuffle_epi32(ptype, 3), 0x11); + + /* + * Move rss_flags into ol_flags in mbuf_init. + * Use 1 shift and 1 blend for each desc: 2 inst/desc + */ + __m256i mbuf_init4_5 = _mm256_blend_epi32(mbuf_init, + rss_flags, 0x44); + __m256i mbuf_init2_3 = _mm256_blend_epi32(mbuf_init, + _mm256_slli_si256(rss_flags, 4), 0x44); + __m256i mbuf_init0_1 = _mm256_blend_epi32(mbuf_init, + _mm256_slli_si256(rss_flags, 8), 0x44); + __m256i mbuf_init6_7 = _mm256_blend_epi32(mbuf_init, + _mm256_srli_si256(rss_flags, 4), 0x44); + + /* + * Build rearm, one per desc. + * 8 blends and 4 permutes: 1.5 inst/desc + */ + __m256i rearm0 = _mm256_blend_epi32(rearm01, + mbuf_init0_1, 0xf0); + __m256i rearm1 = _mm256_blend_epi32(mbuf_init0_1, + rearm01, 0xf0); + __m256i rearm2 = _mm256_blend_epi32(rearm23, + mbuf_init2_3, 0xf0); + __m256i rearm3 = _mm256_blend_epi32(mbuf_init2_3, + rearm23, 0xf0); + /* Swap upper and lower 64 bits */ + rearm0 = _mm256_permute4x64_epi64(rearm0, + (1 << 6) + (0 << 4) + (3 << 2) + 2); + rearm2 = _mm256_permute4x64_epi64(rearm2, + (1 << 6) + (0 << 4) + (3 << 2) + 2); + /* Second set of 4 descriptors */ + __m256i rearm4 = _mm256_blend_epi32(rearm45, + mbuf_init4_5, 0xf0); + __m256i rearm5 = _mm256_blend_epi32(mbuf_init4_5, + rearm45, 0xf0); + __m256i rearm6 = _mm256_blend_epi32(rearm67, + mbuf_init6_7, 0xf0); + __m256i rearm7 = _mm256_blend_epi32(mbuf_init6_7, + rearm67, 0xf0); + rearm4 = _mm256_permute4x64_epi64(rearm4, + (1 << 6) + (0 << 4) + (3 << 2) + 2); + rearm6 = _mm256_permute4x64_epi64(rearm6, + (1 << 6) + (0 << 4) + (3 << 2) + 2); + + /* + * Write out 32B of mbuf fields. + * data_off - off 0 (mbuf_init) + * refcnt - 2 (mbuf_init) + * nb_segs - 4 (mbuf_init) + * port - 6 (mbuf_init) + * ol_flag - 8 (from cqd) + * packet_type - 16 (from cqd) + * pkt_len - 20 (from cqd) + * data_len - 24 (from cqd) + * vlan_tci - 26 (from cqd) + * rss - 28 (from cqd) + */ + _mm256_storeu_si256((__m256i *)&rxmb[0]->rearm_data, rearm0); + _mm256_storeu_si256((__m256i *)&rxmb[1]->rearm_data, rearm1); + _mm256_storeu_si256((__m256i *)&rxmb[2]->rearm_data, rearm2); + _mm256_storeu_si256((__m256i *)&rxmb[3]->rearm_data, rearm3); + _mm256_storeu_si256((__m256i *)&rxmb[4]->rearm_data, rearm4); + _mm256_storeu_si256((__m256i *)&rxmb[5]->rearm_data, rearm5); + _mm256_storeu_si256((__m256i *)&rxmb[6]->rearm_data, rearm6); + _mm256_storeu_si256((__m256i *)&rxmb[7]->rearm_data, rearm7); + + max_rx -= 8; + cqd += 8; + rx += 8; + rxmb += 8; + } + + /* + * Step 3: Slow path to handle a small (<8) number of packets and + * occasional truncated packets. + */ + while (max_rx && ((cqd->type_color & + CQ_DESC_COLOR_MASK_NOSHIFT) != color)) { + if (unlikely(cqd->bytes_written_flags & + CQ_ENET_RQ_DESC_FLAGS_TRUNCATED)) { + rte_pktmbuf_free(*rxmb++); + rte_atomic64_inc(&enic->soft_stats.rx_packet_errors); + } else { + *rx++ = rx_one(cqd, *rxmb++, enic); + } + cqd++; + max_rx--; + } + + /* Number of descriptors visited */ + nb_rx = cqd - (struct cq_enet_rq_desc *)(cq->ring.descs) - cq_idx; + if (nb_rx == 0) + return 0; + rqd = ((struct rq_enet_desc *)rq->ring.descs) + cq_idx; + rxmb = rq->mbuf_ring + cq_idx; + cq_idx += nb_rx; + rq->rx_nb_hold += nb_rx; + if (unlikely(cq_idx == cq->ring.desc_count)) { + cq_idx = 0; + cq->last_color ^= CQ_DESC_COLOR_MASK_NOSHIFT; + } + cq->to_clean = cq_idx; + + /* Step 4: Restock RQ with new mbufs */ + memcpy(rxmb, rq->free_mbufs + ENIC_RX_BURST_MAX - rq->num_free_mbufs, + sizeof(struct rte_mbuf *) * nb_rx); + rq->num_free_mbufs -= nb_rx; + while (nb_rx) { + rqd->address = (*rxmb)->buf_iova + RTE_PKTMBUF_HEADROOM; + nb_rx--; + rqd++; + rxmb++; + } + if (rq->rx_nb_hold > rq->rx_free_thresh) { + rq->posted_index = enic_ring_add(rq->ring.desc_count, + rq->posted_index, + rq->rx_nb_hold); + rq->rx_nb_hold = 0; + rte_wmb(); + iowrite32_relaxed(rq->posted_index, + &rq->ctrl->posted_index); + } + + return rx - rx_pkts; +} + +bool +enic_use_vector_rx_handler(struct rte_eth_dev *eth_dev) +{ + struct enic *enic = pmd_priv(eth_dev); + struct rte_fdir_conf *fconf; + + /* User needs to request for the avx2 handler */ + if (!enic->enable_avx2_rx) + return false; + /* Do not support scatter Rx */ + if (!(enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0)) + return false; + /* Do not support fdir/flow */ + fconf = ð_dev->data->dev_conf.fdir_conf; + if (fconf->mode != RTE_FDIR_MODE_NONE) + return false; + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) { + ENICPMD_LOG(DEBUG, " use the non-scatter avx2 Rx handler"); + eth_dev->rx_pkt_burst = &enic_noscatter_vec_recv_pkts; + return true; + } + return false; +} diff --git a/src/spdk/dpdk/drivers/net/enic/meson.build b/src/spdk/dpdk/drivers/net/enic/meson.build new file mode 100644 index 000000000..1bd7cc7e1 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/meson.build @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Cisco Systems, Inc. + +sources = files( + 'base/vnic_cq.c', + 'base/vnic_dev.c', + 'base/vnic_intr.c', + 'base/vnic_rq.c', + 'base/vnic_wq.c', + 'enic_clsf.c', + 'enic_ethdev.c', + 'enic_flow.c', + 'enic_fm_flow.c', + 'enic_main.c', + 'enic_res.c', + 'enic_rxtx.c', + ) +deps += ['hash'] +includes += include_directories('base') + +# The current implementation assumes 64-bit pointers +if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') and dpdk_conf.get('RTE_ARCH_64') + sources += files('enic_rxtx_vec_avx2.c') +# Build the avx2 handler if the compiler supports it, even though 'machine' +# does not. This is to support users who build for the min supported machine +# and need to run the binary on newer CPUs too. +# This part is from i40e meson.build +elif cc.has_argument('-mavx2') and dpdk_conf.get('RTE_ARCH_64') + enic_avx2_lib = static_library('enic_avx2_lib', + 'enic_rxtx_vec_avx2.c', + dependencies: [static_rte_ethdev, static_rte_bus_pci], + include_directories: includes, + c_args: [cflags, '-mavx2']) + objs += enic_avx2_lib.extract_objects('enic_rxtx_vec_avx2.c') +endif diff --git a/src/spdk/dpdk/drivers/net/enic/rte_pmd_enic_version.map b/src/spdk/dpdk/drivers/net/enic/rte_pmd_enic_version.map new file mode 100644 index 000000000..f9f17e4f6 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/enic/rte_pmd_enic_version.map @@ -0,0 +1,3 @@ +DPDK_20.0 { + local: *; +}; |