diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
commit | 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch) | |
tree | 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/seastar/dpdk/drivers/net/vmxnet3 | |
parent | Initial commit. (diff) | |
download | ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip |
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/seastar/dpdk/drivers/net/vmxnet3')
14 files changed, 4297 insertions, 0 deletions
diff --git a/src/seastar/dpdk/drivers/net/vmxnet3/Makefile b/src/seastar/dpdk/drivers/net/vmxnet3/Makefile new file mode 100644 index 000000000..f1141da67 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/vmxnet3/Makefile @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2015 Intel Corporation + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_pmd_vmxnet3_uio.a + +CFLAGS += -DALLOW_EXPERIMENTAL_API +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y) +# +# CFLAGS for icc +# +CFLAGS_BASE_DRIVER = -diag-disable 174 -diag-disable 593 -diag-disable 869 +CFLAGS_BASE_DRIVER += -diag-disable 981 -diag-disable 2259 + +else ifeq ($(CONFIG_RTE_TOOLCHAIN_CLANG),y) +# +# CFLAGS for clang +# +CFLAGS_BASE_DRIVER = -Wno-unused-parameter -Wno-unused-value +CFLAGS_BASE_DRIVER += -Wno-strict-aliasing -Wno-format-extra-args + +else +# +# CFLAGS for gcc +# +ifeq ($(shell test $(GCC_VERSION) -ge 44 && echo 1), 1) +CFLAGS += -Wno-deprecated +endif +CFLAGS_BASE_DRIVER = -Wno-unused-parameter -Wno-unused-value +CFLAGS_BASE_DRIVER += -Wno-strict-aliasing -Wno-format-extra-args + +endif +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring +LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs +LDLIBS += -lrte_bus_pci + +VPATH += $(SRCDIR)/base + +EXPORT_MAP := rte_pmd_vmxnet3_version.map + +LIBABIVER := 1 + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3_rxtx.c +SRCS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3_ethdev.c + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/seastar/dpdk/drivers/net/vmxnet3/base/README b/src/seastar/dpdk/drivers/net/vmxnet3/base/README new file mode 100644 index 000000000..599a3661e --- /dev/null +++ b/src/seastar/dpdk/drivers/net/vmxnet3/base/README @@ -0,0 +1,47 @@ +.. + BSD LICENSE + + Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Intel VMXNET3 driver +=================== + +This directory contains source code of FreeBSD VMXNET3 driver released by VMware. +In which, upt1_defs.h and vmxnet3_defs.h is introduced without any change. +The other 4 files: includeCheck.h, vmware_pack_begin.h, vmware_pack_end.h and vmxnet3_osdep.h +are crated to adapt to the needs from above 2 files. + +Updating the driver +=================== + +NOTE: The source code in this directory should not be modified apart from +the following file(s): + + vmxnet3_osdep.h diff --git a/src/seastar/dpdk/drivers/net/vmxnet3/base/upt1_defs.h b/src/seastar/dpdk/drivers/net/vmxnet3/base/upt1_defs.h new file mode 100644 index 000000000..5fd7a3974 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/vmxnet3/base/upt1_defs.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) 2007 VMware, Inc. All rights reserved. + */ + +/* upt1_defs.h + * + * Definitions for UPTv1 + * + * Some of the defs are duplicated in vmkapi_net_upt.h, because + * vmkapi_net_upt.h cannot distribute with OSS yet and vmkapi headers can + * only include vmkapi headers. Make sure they are kept in sync! + */ + +#ifndef _UPT1_DEFS_H +#define _UPT1_DEFS_H + +#define UPT1_MAX_TX_QUEUES 64 +#define UPT1_MAX_RX_QUEUES 64 + +#define UPT1_MAX_INTRS (UPT1_MAX_TX_QUEUES + UPT1_MAX_RX_QUEUES) + +typedef +#include "vmware_pack_begin.h" +struct UPT1_TxStats { + uint64 TSOPktsTxOK; /* TSO pkts post-segmentation */ + uint64 TSOBytesTxOK; + uint64 ucastPktsTxOK; + uint64 ucastBytesTxOK; + uint64 mcastPktsTxOK; + uint64 mcastBytesTxOK; + uint64 bcastPktsTxOK; + uint64 bcastBytesTxOK; + uint64 pktsTxError; + uint64 pktsTxDiscard; +} +#include "vmware_pack_end.h" +UPT1_TxStats; + +typedef +#include "vmware_pack_begin.h" +struct UPT1_RxStats { + uint64 LROPktsRxOK; /* LRO pkts */ + uint64 LROBytesRxOK; /* bytes from LRO pkts */ + /* the following counters are for pkts from the wire, i.e., pre-LRO */ + uint64 ucastPktsRxOK; + uint64 ucastBytesRxOK; + uint64 mcastPktsRxOK; + uint64 mcastBytesRxOK; + uint64 bcastPktsRxOK; + uint64 bcastBytesRxOK; + uint64 pktsRxOutOfBuf; + uint64 pktsRxError; +} +#include "vmware_pack_end.h" +UPT1_RxStats; + +/* interrupt moderation level */ +#define UPT1_IML_NONE 0 /* no interrupt moderation */ +#define UPT1_IML_HIGHEST 7 /* least intr generated */ +#define UPT1_IML_ADAPTIVE 8 /* adpative intr moderation */ + +/* values for UPT1_RSSConf.hashFunc */ +#define UPT1_RSS_HASH_TYPE_NONE 0x0 +#define UPT1_RSS_HASH_TYPE_IPV4 0x01 +#define UPT1_RSS_HASH_TYPE_TCP_IPV4 0x02 +#define UPT1_RSS_HASH_TYPE_IPV6 0x04 +#define UPT1_RSS_HASH_TYPE_TCP_IPV6 0x08 + +#define UPT1_RSS_HASH_FUNC_NONE 0x0 +#define UPT1_RSS_HASH_FUNC_TOEPLITZ 0x01 + +#define UPT1_RSS_MAX_KEY_SIZE 40 +#define UPT1_RSS_MAX_IND_TABLE_SIZE 128 + +typedef +#include "vmware_pack_begin.h" +struct UPT1_RSSConf { + uint16 hashType; + uint16 hashFunc; + uint16 hashKeySize; + uint16 indTableSize; + uint8 hashKey[UPT1_RSS_MAX_KEY_SIZE]; + uint8 indTable[UPT1_RSS_MAX_IND_TABLE_SIZE]; +} +#include "vmware_pack_end.h" +UPT1_RSSConf; + +/* features */ +#define UPT1_F_RXCSUM 0x0001 /* rx csum verification */ +#define UPT1_F_RSS 0x0002 +#define UPT1_F_RXVLAN 0x0004 /* VLAN tag stripping */ +#define UPT1_F_LRO 0x0008 + +#endif diff --git a/src/seastar/dpdk/drivers/net/vmxnet3/base/vmware_pack_begin.h b/src/seastar/dpdk/drivers/net/vmxnet3/base/vmware_pack_begin.h new file mode 100644 index 000000000..df22590f8 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/vmxnet3/base/vmware_pack_begin.h @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ diff --git a/src/seastar/dpdk/drivers/net/vmxnet3/base/vmware_pack_end.h b/src/seastar/dpdk/drivers/net/vmxnet3/base/vmware_pack_end.h new file mode 100644 index 000000000..df22590f8 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/vmxnet3/base/vmware_pack_end.h @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ diff --git a/src/seastar/dpdk/drivers/net/vmxnet3/base/vmxnet3_defs.h b/src/seastar/dpdk/drivers/net/vmxnet3/base/vmxnet3_defs.h new file mode 100644 index 000000000..296d7e54c --- /dev/null +++ b/src/seastar/dpdk/drivers/net/vmxnet3/base/vmxnet3_defs.h @@ -0,0 +1,833 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) 2007 VMware, Inc. All rights reserved. + */ + +/* + * vmxnet3_defs.h -- + * + * Definitions shared by device emulation and guest drivers for + * VMXNET3 NIC + */ + +#ifndef _VMXNET3_DEFS_H_ +#define _VMXNET3_DEFS_H_ + +#include "vmxnet3_osdep.h" +#include "upt1_defs.h" + +/* all registers are 32 bit wide */ +/* BAR 1 */ +#define VMXNET3_REG_VRRS 0x0 /* Vmxnet3 Revision Report Selection */ +#define VMXNET3_REG_UVRS 0x8 /* UPT Version Report Selection */ +#define VMXNET3_REG_DSAL 0x10 /* Driver Shared Address Low */ +#define VMXNET3_REG_DSAH 0x18 /* Driver Shared Address High */ +#define VMXNET3_REG_CMD 0x20 /* Command */ +#define VMXNET3_REG_MACL 0x28 /* MAC Address Low */ +#define VMXNET3_REG_MACH 0x30 /* MAC Address High */ +#define VMXNET3_REG_ICR 0x38 /* Interrupt Cause Register */ +#define VMXNET3_REG_ECR 0x40 /* Event Cause Register */ + +#define VMXNET3_REG_WSAL 0xF00 /* Wireless Shared Address Lo */ +#define VMXNET3_REG_WSAH 0xF08 /* Wireless Shared Address Hi */ +#define VMXNET3_REG_WCMD 0xF18 /* Wireless Command */ + +/* BAR 0 */ +#define VMXNET3_REG_IMR 0x0 /* Interrupt Mask Register */ +#define VMXNET3_REG_TXPROD 0x600 /* Tx Producer Index */ +#define VMXNET3_REG_RXPROD 0x800 /* Rx Producer Index for ring 1 */ +#define VMXNET3_REG_RXPROD2 0xA00 /* Rx Producer Index for ring 2 */ + +#define VMXNET3_PT_REG_SIZE 4096 /* BAR 0 */ +#define VMXNET3_VD_REG_SIZE 4096 /* BAR 1 */ + +/* + * The two Vmxnet3 MMIO Register PCI BARs (BAR 0 at offset 10h and BAR 1 at + * offset 14h) as well as the MSI-X BAR are combined into one PhysMem region: + * <-VMXNET3_PT_REG_SIZE-><-VMXNET3_VD_REG_SIZE-><-VMXNET3_MSIX_BAR_SIZE--> + * ------------------------------------------------------------------------- + * |Pass Thru Registers | Virtual Dev Registers | MSI-X Vector/PBA Table | + * ------------------------------------------------------------------------- + * VMXNET3_MSIX_BAR_SIZE is defined in "vmxnet3Int.h" + */ +#define VMXNET3_PHYSMEM_PAGES 4 + +#define VMXNET3_REG_ALIGN 8 /* All registers are 8-byte aligned. */ +#define VMXNET3_REG_ALIGN_MASK 0x7 + +/* I/O Mapped access to registers */ +#define VMXNET3_IO_TYPE_PT 0 +#define VMXNET3_IO_TYPE_VD 1 +#define VMXNET3_IO_ADDR(type, reg) (((type) << 24) | ((reg) & 0xFFFFFF)) +#define VMXNET3_IO_TYPE(addr) ((addr) >> 24) +#define VMXNET3_IO_REG(addr) ((addr) & 0xFFFFFF) + +#ifndef __le16 +#define __le16 uint16 +#endif +#ifndef __le32 +#define __le32 uint32 +#endif +#ifndef __le64 +#define __le64 uint64 +#endif + +typedef enum { + VMXNET3_CMD_FIRST_SET = 0xCAFE0000, + VMXNET3_CMD_ACTIVATE_DEV = VMXNET3_CMD_FIRST_SET, + VMXNET3_CMD_QUIESCE_DEV, + VMXNET3_CMD_RESET_DEV, + VMXNET3_CMD_UPDATE_RX_MODE, + VMXNET3_CMD_UPDATE_MAC_FILTERS, + VMXNET3_CMD_UPDATE_VLAN_FILTERS, + VMXNET3_CMD_UPDATE_RSSIDT, + VMXNET3_CMD_UPDATE_IML, + VMXNET3_CMD_UPDATE_PMCFG, + VMXNET3_CMD_UPDATE_FEATURE, + VMXNET3_CMD_STOP_EMULATION, + VMXNET3_CMD_LOAD_PLUGIN, + VMXNET3_CMD_ACTIVATE_VF, + VMXNET3_CMD_RESERVED3, + VMXNET3_CMD_RESERVED4, + VMXNET3_CMD_REGISTER_MEMREGS, + VMXNET3_CMD_SET_RSS_FIELDS, + + VMXNET3_CMD_FIRST_GET = 0xF00D0000, + VMXNET3_CMD_GET_QUEUE_STATUS = VMXNET3_CMD_FIRST_GET, + VMXNET3_CMD_GET_STATS, + VMXNET3_CMD_GET_LINK, + VMXNET3_CMD_GET_PERM_MAC_LO, + VMXNET3_CMD_GET_PERM_MAC_HI, + VMXNET3_CMD_GET_DID_LO, + VMXNET3_CMD_GET_DID_HI, + VMXNET3_CMD_GET_DEV_EXTRA_INFO, + VMXNET3_CMD_GET_CONF_INTR, + VMXNET3_CMD_GET_ADAPTIVE_RING_INFO, + VMXNET3_CMD_GET_TXDATA_DESC_SIZE, + VMXNET3_CMD_RESERVED5, +} Vmxnet3_Cmd; + +/* Adaptive Ring Info Flags */ +#define VMXNET3_DISABLE_ADAPTIVE_RING 1 + +/* + * Little Endian layout of bitfields - + * Byte 0 : 7.....len.....0 + * Byte 1 : rsvd gen 13.len.8 + * Byte 2 : 5.msscof.0 ext1 dtype + * Byte 3 : 13...msscof...6 + * + * Big Endian layout of bitfields - + * Byte 0: 13...msscof...6 + * Byte 1 : 5.msscof.0 ext1 dtype + * Byte 2 : rsvd gen 13.len.8 + * Byte 3 : 7.....len.....0 + * + * Thus, le32_to_cpu on the dword will allow the big endian driver to read + * the bit fields correctly. And cpu_to_le32 will convert bitfields + * bit fields written by big endian driver to format required by device. + */ + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_TxDesc { + __le64 addr; + +#ifdef __BIG_ENDIAN_BITFIELD + uint32 msscof:14; /* MSS, checksum offset, flags */ + uint32 ext1:1; + uint32 dtype:1; /* descriptor type */ + uint32 rsvd:1; + uint32 gen:1; /* generation bit */ + uint32 len:14; +#else + uint32 len:14; + uint32 gen:1; /* generation bit */ + uint32 rsvd:1; + uint32 dtype:1; /* descriptor type */ + uint32 ext1:1; + uint32 msscof:14; /* MSS, checksum offset, flags */ +#endif /* __BIG_ENDIAN_BITFIELD */ + +#ifdef __BIG_ENDIAN_BITFIELD + uint32 tci:16; /* Tag to Insert */ + uint32 ti:1; /* VLAN Tag Insertion */ + uint32 ext2:1; + uint32 cq:1; /* completion request */ + uint32 eop:1; /* End Of Packet */ + uint32 om:2; /* offload mode */ + uint32 hlen:10; /* header len */ +#else + uint32 hlen:10; /* header len */ + uint32 om:2; /* offload mode */ + uint32 eop:1; /* End Of Packet */ + uint32 cq:1; /* completion request */ + uint32 ext2:1; + uint32 ti:1; /* VLAN Tag Insertion */ + uint32 tci:16; /* Tag to Insert */ +#endif /* __BIG_ENDIAN_BITFIELD */ +} +#include "vmware_pack_end.h" +Vmxnet3_TxDesc; + +/* TxDesc.OM values */ +#define VMXNET3_OM_NONE 0 +#define VMXNET3_OM_CSUM 2 +#define VMXNET3_OM_TSO 3 + +/* fields in TxDesc we access w/o using bit fields */ +#define VMXNET3_TXD_EOP_SHIFT 12 +#define VMXNET3_TXD_CQ_SHIFT 13 +#define VMXNET3_TXD_GEN_SHIFT 14 +#define VMXNET3_TXD_EOP_DWORD_SHIFT 3 +#define VMXNET3_TXD_GEN_DWORD_SHIFT 2 + +#define VMXNET3_TXD_CQ (1 << VMXNET3_TXD_CQ_SHIFT) +#define VMXNET3_TXD_EOP (1 << VMXNET3_TXD_EOP_SHIFT) +#define VMXNET3_TXD_GEN (1 << VMXNET3_TXD_GEN_SHIFT) + +#define VMXNET3_TXD_GEN_SIZE 1 +#define VMXNET3_TXD_EOP_SIZE 1 + +#define VMXNET3_HDR_COPY_SIZE 128 + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_TxDataDesc { + uint8 data[VMXNET3_HDR_COPY_SIZE]; +} +#include "vmware_pack_end.h" +Vmxnet3_TxDataDesc; + +#define VMXNET3_TCD_GEN_SHIFT 31 +#define VMXNET3_TCD_GEN_SIZE 1 +#define VMXNET3_TCD_TXIDX_SHIFT 0 +#define VMXNET3_TCD_TXIDX_SIZE 12 +#define VMXNET3_TCD_GEN_DWORD_SHIFT 3 + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_TxCompDesc { + uint32 txdIdx:12; /* Index of the EOP TxDesc */ + uint32 ext1:20; + + __le32 ext2; + __le32 ext3; + + uint32 rsvd:24; + uint32 type:7; /* completion type */ + uint32 gen:1; /* generation bit */ +} +#include "vmware_pack_end.h" +Vmxnet3_TxCompDesc; + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_RxDesc { + __le64 addr; + +#ifdef __BIG_ENDIAN_BITFIELD + uint32 gen:1; /* Generation bit */ + uint32 rsvd:15; + uint32 dtype:1; /* Descriptor type */ + uint32 btype:1; /* Buffer Type */ + uint32 len:14; +#else + uint32 len:14; + uint32 btype:1; /* Buffer Type */ + uint32 dtype:1; /* Descriptor type */ + uint32 rsvd:15; + uint32 gen:1; /* Generation bit */ +#endif + __le32 ext1; +} +#include "vmware_pack_end.h" +Vmxnet3_RxDesc; + +/* values of RXD.BTYPE */ +#define VMXNET3_RXD_BTYPE_HEAD 0 /* head only */ +#define VMXNET3_RXD_BTYPE_BODY 1 /* body only */ + +/* fields in RxDesc we access w/o using bit fields */ +#define VMXNET3_RXD_BTYPE_SHIFT 14 +#define VMXNET3_RXD_GEN_SHIFT 31 + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_RxCompDesc { +#ifdef __BIG_ENDIAN_BITFIELD + uint32 ext2:1; + uint32 cnc:1; /* Checksum Not Calculated */ + uint32 rssType:4; /* RSS hash type used */ + uint32 rqID:10; /* rx queue/ring ID */ + uint32 sop:1; /* Start of Packet */ + uint32 eop:1; /* End of Packet */ + uint32 ext1:2; + uint32 rxdIdx:12; /* Index of the RxDesc */ +#else + uint32 rxdIdx:12; /* Index of the RxDesc */ + uint32 ext1:2; + uint32 eop:1; /* End of Packet */ + uint32 sop:1; /* Start of Packet */ + uint32 rqID:10; /* rx queue/ring ID */ + uint32 rssType:4; /* RSS hash type used */ + uint32 cnc:1; /* Checksum Not Calculated */ + uint32 ext2:1; +#endif /* __BIG_ENDIAN_BITFIELD */ + + __le32 rssHash; /* RSS hash value */ + +#ifdef __BIG_ENDIAN_BITFIELD + uint32 tci:16; /* Tag stripped */ + uint32 ts:1; /* Tag is stripped */ + uint32 err:1; /* Error */ + uint32 len:14; /* data length */ +#else + uint32 len:14; /* data length */ + uint32 err:1; /* Error */ + uint32 ts:1; /* Tag is stripped */ + uint32 tci:16; /* Tag stripped */ +#endif /* __BIG_ENDIAN_BITFIELD */ + + +#ifdef __BIG_ENDIAN_BITFIELD + uint32 gen:1; /* generation bit */ + uint32 type:7; /* completion type */ + uint32 fcs:1; /* Frame CRC correct */ + uint32 frg:1; /* IP Fragment */ + uint32 v4:1; /* IPv4 */ + uint32 v6:1; /* IPv6 */ + uint32 ipc:1; /* IP Checksum Correct */ + uint32 tcp:1; /* TCP packet */ + uint32 udp:1; /* UDP packet */ + uint32 tuc:1; /* TCP/UDP Checksum Correct */ + uint32 csum:16; +#else + uint32 csum:16; + uint32 tuc:1; /* TCP/UDP Checksum Correct */ + uint32 udp:1; /* UDP packet */ + uint32 tcp:1; /* TCP packet */ + uint32 ipc:1; /* IP Checksum Correct */ + uint32 v6:1; /* IPv6 */ + uint32 v4:1; /* IPv4 */ + uint32 frg:1; /* IP Fragment */ + uint32 fcs:1; /* Frame CRC correct */ + uint32 type:7; /* completion type */ + uint32 gen:1; /* generation bit */ +#endif /* __BIG_ENDIAN_BITFIELD */ +} +#include "vmware_pack_end.h" +Vmxnet3_RxCompDesc; + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_RxCompDescExt { + __le32 dword1; + uint8 segCnt; /* Number of aggregated packets */ + uint8 dupAckCnt; /* Number of duplicate Acks */ + __le16 tsDelta; /* TCP timestamp difference */ + __le32 dword2; +#ifdef __BIG_ENDIAN_BITFIELD + uint32 gen : 1; /* generation bit */ + uint32 type : 7; /* completion type */ + uint32 fcs : 1; /* Frame CRC correct */ + uint32 frg : 1; /* IP Fragment */ + uint32 v4 : 1; /* IPv4 */ + uint32 v6 : 1; /* IPv6 */ + uint32 ipc : 1; /* IP Checksum Correct */ + uint32 tcp : 1; /* TCP packet */ + uint32 udp : 1; /* UDP packet */ + uint32 tuc : 1; /* TCP/UDP Checksum Correct */ + uint32 mss : 16; +#else + uint32 mss : 16; + uint32 tuc : 1; /* TCP/UDP Checksum Correct */ + uint32 udp : 1; /* UDP packet */ + uint32 tcp : 1; /* TCP packet */ + uint32 ipc : 1; /* IP Checksum Correct */ + uint32 v6 : 1; /* IPv6 */ + uint32 v4 : 1; /* IPv4 */ + uint32 frg : 1; /* IP Fragment */ + uint32 fcs : 1; /* Frame CRC correct */ + uint32 type : 7; /* completion type */ + uint32 gen : 1; /* generation bit */ +#endif /* __BIG_ENDIAN_BITFIELD */ +} +#include "vmware_pack_end.h" +Vmxnet3_RxCompDescExt; + +/* fields in RxCompDesc we access via Vmxnet3_GenericDesc.dword[3] */ +#define VMXNET3_RCD_TUC_SHIFT 16 +#define VMXNET3_RCD_IPC_SHIFT 19 + +/* fields in RxCompDesc we access via Vmxnet3_GenericDesc.qword[1] */ +#define VMXNET3_RCD_TYPE_SHIFT 56 +#define VMXNET3_RCD_GEN_SHIFT 63 + +/* csum OK for TCP/UDP pkts over IP */ +#define VMXNET3_RCD_CSUM_OK (1 << VMXNET3_RCD_TUC_SHIFT | 1 << VMXNET3_RCD_IPC_SHIFT) + +/* value of RxCompDesc.rssType */ +#define VMXNET3_RCD_RSS_TYPE_NONE 0 +#define VMXNET3_RCD_RSS_TYPE_IPV4 1 +#define VMXNET3_RCD_RSS_TYPE_TCPIPV4 2 +#define VMXNET3_RCD_RSS_TYPE_IPV6 3 +#define VMXNET3_RCD_RSS_TYPE_TCPIPV6 4 + +/* a union for accessing all cmd/completion descriptors */ +typedef union Vmxnet3_GenericDesc { + __le64 qword[2]; + __le32 dword[4]; + __le16 word[8]; + Vmxnet3_TxDesc txd; + Vmxnet3_RxDesc rxd; + Vmxnet3_TxCompDesc tcd; + Vmxnet3_RxCompDesc rcd; + Vmxnet3_RxCompDescExt rcdExt; +} Vmxnet3_GenericDesc; + +#define VMXNET3_INIT_GEN 1 + +/* Max size of a single tx buffer */ +#define VMXNET3_MAX_TX_BUF_SIZE (1 << 14) + +/* # of tx desc needed for a tx buffer size */ +#define VMXNET3_TXD_NEEDED(size) (((size) + VMXNET3_MAX_TX_BUF_SIZE - 1) / VMXNET3_MAX_TX_BUF_SIZE) + +/* max # of tx descs for a non-tso pkt */ +#define VMXNET3_MAX_TXD_PER_PKT 16 + +/* Max size of a single rx buffer */ +#define VMXNET3_MAX_RX_BUF_SIZE ((1 << 14) - 1) +/* Minimum size of a type 0 buffer */ +#define VMXNET3_MIN_T0_BUF_SIZE 128 +#define VMXNET3_MAX_CSUM_OFFSET 1024 + +/* Ring base address alignment */ +#define VMXNET3_RING_BA_ALIGN 512 +#define VMXNET3_RING_BA_MASK (VMXNET3_RING_BA_ALIGN - 1) + +/* Ring size must be a multiple of 32 */ +#define VMXNET3_RING_SIZE_ALIGN 32 +#define VMXNET3_RING_SIZE_MASK (VMXNET3_RING_SIZE_ALIGN - 1) + +/* Tx Data Ring buffer size must be a multiple of 64 */ +#define VMXNET3_TXDATA_DESC_SIZE_ALIGN 64 +#define VMXNET3_TXDATA_DESC_SIZE_MASK (VMXNET3_TXDATA_DESC_SIZE_ALIGN - 1) + +/* Rx Data Ring buffer size must be a multiple of 64 */ +#define VMXNET3_RXDATA_DESC_SIZE_ALIGN 64 +#define VMXNET3_RXDATA_DESC_SIZE_MASK (VMXNET3_RXDATA_DESC_SIZE_ALIGN - 1) + +/* Max ring size */ +#define VMXNET3_TX_RING_MAX_SIZE 4096 +#define VMXNET3_TC_RING_MAX_SIZE 4096 +#define VMXNET3_RX_RING_MAX_SIZE 4096 +#define VMXNET3_RC_RING_MAX_SIZE 8192 + +#define VMXNET3_TXDATA_DESC_MIN_SIZE 128 +#define VMXNET3_TXDATA_DESC_MAX_SIZE 2048 + +#define VMXNET3_RXDATA_DESC_MAX_SIZE 2048 + +/* a list of reasons for queue stop */ + +#define VMXNET3_ERR_NOEOP 0x80000000 /* cannot find the EOP desc of a pkt */ +#define VMXNET3_ERR_TXD_REUSE 0x80000001 /* reuse a TxDesc before tx completion */ +#define VMXNET3_ERR_BIG_PKT 0x80000002 /* too many TxDesc for a pkt */ +#define VMXNET3_ERR_DESC_NOT_SPT 0x80000003 /* descriptor type not supported */ +#define VMXNET3_ERR_SMALL_BUF 0x80000004 /* type 0 buffer too small */ +#define VMXNET3_ERR_STRESS 0x80000005 /* stress option firing in vmkernel */ +#define VMXNET3_ERR_SWITCH 0x80000006 /* mode switch failure */ +#define VMXNET3_ERR_TXD_INVALID 0x80000007 /* invalid TxDesc */ + +/* completion descriptor types */ +#define VMXNET3_CDTYPE_TXCOMP 0 /* Tx Completion Descriptor */ +#define VMXNET3_CDTYPE_RXCOMP 3 /* Rx Completion Descriptor */ +#define VMXNET3_CDTYPE_RXCOMP_LRO 4 /* Rx Completion Descriptor for LRO */ + +#define VMXNET3_GOS_BITS_UNK 0 /* unknown */ +#define VMXNET3_GOS_BITS_32 1 +#define VMXNET3_GOS_BITS_64 2 + +#define VMXNET3_GOS_TYPE_UNK 0 /* unknown */ +#define VMXNET3_GOS_TYPE_LINUX 1 +#define VMXNET3_GOS_TYPE_WIN 2 +#define VMXNET3_GOS_TYPE_SOLARIS 3 +#define VMXNET3_GOS_TYPE_FREEBSD 4 +#define VMXNET3_GOS_TYPE_PXE 5 + +/* All structures in DriverShared are padded to multiples of 8 bytes */ + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_GOSInfo { +#ifdef __BIG_ENDIAN_BITFIELD + uint32 gosMisc: 10; /* other info about gos */ + uint32 gosVer: 16; /* gos version */ + uint32 gosType: 4; /* which guest */ + uint32 gosBits: 2; /* 32-bit or 64-bit? */ +#else + uint32 gosBits: 2; /* 32-bit or 64-bit? */ + uint32 gosType: 4; /* which guest */ + uint32 gosVer: 16; /* gos version */ + uint32 gosMisc: 10; /* other info about gos */ +#endif /* __BIG_ENDIAN_BITFIELD */ +} +#include "vmware_pack_end.h" +Vmxnet3_GOSInfo; + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_DriverInfo { + __le32 version; /* driver version */ + Vmxnet3_GOSInfo gos; + __le32 vmxnet3RevSpt; /* vmxnet3 revision supported */ + __le32 uptVerSpt; /* upt version supported */ +} +#include "vmware_pack_end.h" +Vmxnet3_DriverInfo; + +#define VMXNET3_REV1_MAGIC 0xbabefee1 + +/* + * QueueDescPA must be 128 bytes aligned. It points to an array of + * Vmxnet3_TxQueueDesc followed by an array of Vmxnet3_RxQueueDesc. + * The number of Vmxnet3_TxQueueDesc/Vmxnet3_RxQueueDesc are specified by + * Vmxnet3_MiscConf.numTxQueues/numRxQueues, respectively. + */ +#define VMXNET3_QUEUE_DESC_ALIGN 128 + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_MiscConf { + Vmxnet3_DriverInfo driverInfo; + __le64 uptFeatures; + __le64 ddPA; /* driver data PA */ + __le64 queueDescPA; /* queue descriptor table PA */ + __le32 ddLen; /* driver data len */ + __le32 queueDescLen; /* queue descriptor table len, in bytes */ + __le32 mtu; + __le16 maxNumRxSG; + uint8 numTxQueues; + uint8 numRxQueues; + __le32 reserved[4]; +} +#include "vmware_pack_end.h" +Vmxnet3_MiscConf; + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_TxQueueConf { + __le64 txRingBasePA; + __le64 dataRingBasePA; + __le64 compRingBasePA; + __le64 ddPA; /* driver data */ + __le64 reserved; + __le32 txRingSize; /* # of tx desc */ + __le32 dataRingSize; /* # of data desc */ + __le32 compRingSize; /* # of comp desc */ + __le32 ddLen; /* size of driver data */ + uint8 intrIdx; + uint8 _pad[1]; + __le16 txDataRingDescSize; + uint8 _pad2[4]; +} +#include "vmware_pack_end.h" +Vmxnet3_TxQueueConf; + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_RxQueueConf { + __le64 rxRingBasePA[2]; + __le64 compRingBasePA; + __le64 ddPA; /* driver data */ + __le64 rxDataRingBasePA; + __le32 rxRingSize[2]; /* # of rx desc */ + __le32 compRingSize; /* # of rx comp desc */ + __le32 ddLen; /* size of driver data */ + uint8 intrIdx; + uint8 _pad1[1]; + __le16 rxDataRingDescSize; /* size of rx data ring buffer */ + uint8 _pad2[4]; +} +#include "vmware_pack_end.h" +Vmxnet3_RxQueueConf; + +enum vmxnet3_intr_mask_mode { + VMXNET3_IMM_AUTO = 0, + VMXNET3_IMM_ACTIVE = 1, + VMXNET3_IMM_LAZY = 2 +}; + +enum vmxnet3_intr_type { + VMXNET3_IT_AUTO = 0, + VMXNET3_IT_INTX = 1, + VMXNET3_IT_MSI = 2, + VMXNET3_IT_MSIX = 3 +}; + +#define VMXNET3_MAX_TX_QUEUES 8 +#define VMXNET3_MAX_RX_QUEUES 16 +/* addition 1 for events */ +#define VMXNET3_MAX_INTRS 25 + +/* value of intrCtrl */ +#define VMXNET3_IC_DISABLE_ALL 0x1 /* bit 0 */ + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_IntrConf { + Bool autoMask; + uint8 numIntrs; /* # of interrupts */ + uint8 eventIntrIdx; + uint8 modLevels[VMXNET3_MAX_INTRS]; /* moderation level for each intr */ + __le32 intrCtrl; + __le32 reserved[2]; +} +#include "vmware_pack_end.h" +Vmxnet3_IntrConf; + +/* one bit per VLAN ID, the size is in the units of uint32 */ +#define VMXNET3_VFT_SIZE (4096 / (sizeof(uint32) * 8)) + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_QueueStatus { + Bool stopped; + uint8 _pad[3]; + __le32 error; +} +#include "vmware_pack_end.h" +Vmxnet3_QueueStatus; + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_TxQueueCtrl { + __le32 txNumDeferred; + __le32 txThreshold; + __le64 reserved; +} +#include "vmware_pack_end.h" +Vmxnet3_TxQueueCtrl; + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_RxQueueCtrl { + Bool updateRxProd; + uint8 _pad[7]; + __le64 reserved; +} +#include "vmware_pack_end.h" +Vmxnet3_RxQueueCtrl; + +#define VMXNET3_RXM_UCAST 0x01 /* unicast only */ +#define VMXNET3_RXM_MCAST 0x02 /* multicast passing the filters */ +#define VMXNET3_RXM_BCAST 0x04 /* broadcast only */ +#define VMXNET3_RXM_ALL_MULTI 0x08 /* all multicast */ +#define VMXNET3_RXM_PROMISC 0x10 /* promiscuous */ + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_RxFilterConf { + __le32 rxMode; /* VMXNET3_RXM_xxx */ + __le16 mfTableLen; /* size of the multicast filter table */ + __le16 _pad1; + __le64 mfTablePA; /* PA of the multicast filters table */ + __le32 vfTable[VMXNET3_VFT_SIZE]; /* vlan filter */ +} +#include "vmware_pack_end.h" +Vmxnet3_RxFilterConf; + +#define VMXNET3_PM_MAX_FILTERS 6 +#define VMXNET3_PM_MAX_PATTERN_SIZE 128 +#define VMXNET3_PM_MAX_MASK_SIZE (VMXNET3_PM_MAX_PATTERN_SIZE / 8) + +#define VMXNET3_PM_WAKEUP_MAGIC 0x01 /* wake up on magic pkts */ +#define VMXNET3_PM_WAKEUP_FILTER 0x02 /* wake up on pkts matching filters */ + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_PM_PktFilter { + uint8 maskSize; + uint8 patternSize; + uint8 mask[VMXNET3_PM_MAX_MASK_SIZE]; + uint8 pattern[VMXNET3_PM_MAX_PATTERN_SIZE]; + uint8 pad[6]; +} +#include "vmware_pack_end.h" +Vmxnet3_PM_PktFilter; + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_PMConf { + __le16 wakeUpEvents; /* VMXNET3_PM_WAKEUP_xxx */ + uint8 numFilters; + uint8 pad[5]; + Vmxnet3_PM_PktFilter filters[VMXNET3_PM_MAX_FILTERS]; +} +#include "vmware_pack_end.h" +Vmxnet3_PMConf; + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_VariableLenConfDesc { + __le32 confVer; + __le32 confLen; + __le64 confPA; +} +#include "vmware_pack_end.h" +Vmxnet3_VariableLenConfDesc; + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_DSDevRead { + /* read-only region for device, read by dev in response to a SET cmd */ + Vmxnet3_MiscConf misc; + Vmxnet3_IntrConf intrConf; + Vmxnet3_RxFilterConf rxFilterConf; + Vmxnet3_VariableLenConfDesc rssConfDesc; + Vmxnet3_VariableLenConfDesc pmConfDesc; + Vmxnet3_VariableLenConfDesc pluginConfDesc; +} +#include "vmware_pack_end.h" +Vmxnet3_DSDevRead; + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_TxQueueDesc { + Vmxnet3_TxQueueCtrl ctrl; + Vmxnet3_TxQueueConf conf; + /* Driver read after a GET command */ + Vmxnet3_QueueStatus status; + UPT1_TxStats stats; + uint8 _pad[88]; /* 128 aligned */ +} +#include "vmware_pack_end.h" +Vmxnet3_TxQueueDesc; + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_RxQueueDesc { + Vmxnet3_RxQueueCtrl ctrl; + Vmxnet3_RxQueueConf conf; + /* Driver read after a GET command */ + Vmxnet3_QueueStatus status; + UPT1_RxStats stats; + uint8 _pad[88]; /* 128 aligned */ +} +#include "vmware_pack_end.h" +Vmxnet3_RxQueueDesc; + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_SetPolling { + uint8 enablePolling; +} +#include "vmware_pack_end.h" +Vmxnet3_SetPolling; + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_MemoryRegion { + __le64 startPA; + __le32 length; + __le16 txQueueBits; /* bit n corresponding to tx queue n */ + __le16 rxQueueBits; /* bit n corresponding to rx queue n */ +} +#include "vmware_pack_end.h" +Vmxnet3_MemoryRegion; + +#define MAX_MEMORY_REGION_PER_QUEUE 16 +#define MAX_MEMORY_REGION_PER_DEVICE 256 + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_MemRegs { + __le16 numRegs; + __le16 pad[3]; + Vmxnet3_MemoryRegion memRegs[1]; +} +#include "vmware_pack_end.h" +Vmxnet3_MemRegs; + +typedef enum Vmxnet3_RSSField { + VMXNET3_RSS_FIELDS_TCPIP4 = 0x0001, + VMXNET3_RSS_FIELDS_TCPIP6 = 0x0002, + VMXNET3_RSS_FIELDS_UDPIP4 = 0x0004, + VMXNET3_RSS_FIELDS_UDPIP6 = 0x0008, + VMXNET3_RSS_FIELDS_ESPIP4 = 0x0010, + VMXNET3_RSS_FIELDS_ESPIP6 = 0x0020, +} Vmxnet3_RSSField; + +/* + * If the command data <= 16 bytes, use the shared memory direcly. + * Otherwise, use the variable length configuration descriptor. + */ +typedef +#include "vmware_pack_begin.h" +union Vmxnet3_CmdInfo { + Vmxnet3_VariableLenConfDesc varConf; + Vmxnet3_SetPolling setPolling; + Vmxnet3_RSSField setRSSFields; + __le16 reserved[2]; + __le64 data[2]; +} +#include "vmware_pack_end.h" +Vmxnet3_CmdInfo; + +typedef +#include "vmware_pack_begin.h" +struct Vmxnet3_DriverShared { + __le32 magic; + __le32 pad; /* make devRead start at 64-bit boundaries */ + Vmxnet3_DSDevRead devRead; + __le32 ecr; + __le32 reserved; + + union { + __le32 reserved1[4]; + Vmxnet3_CmdInfo cmdInfo; /* only valid in the context of executing the + * relevant command + */ + } cu; +} +#include "vmware_pack_end.h" +Vmxnet3_DriverShared; + +#define VMXNET3_ECR_RQERR (1 << 0) +#define VMXNET3_ECR_TQERR (1 << 1) +#define VMXNET3_ECR_LINK (1 << 2) +#define VMXNET3_ECR_DIC (1 << 3) +#define VMXNET3_ECR_DEBUG (1 << 4) + +/* flip the gen bit of a ring */ +#define VMXNET3_FLIP_RING_GEN(gen) ((gen) = (gen) ^ 0x1) + +/* only use this if moving the idx won't affect the gen bit */ +#define VMXNET3_INC_RING_IDX_ONLY(idx, ring_size) \ +do {\ + (idx)++;\ + if (UNLIKELY((idx) == (ring_size))) {\ + (idx) = 0;\ + }\ +} while (0) + +#define VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid) \ + vfTable[vid >> 5] |= (1 << (vid & 31)) +#define VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid) \ + vfTable[vid >> 5] &= ~(1 << (vid & 31)) + +#define VMXNET3_VFTABLE_ENTRY_IS_SET(vfTable, vid) \ + ((vfTable[vid >> 5] & (1 << (vid & 31))) != 0) + +#define VMXNET3_MAX_MTU 9000 +#define VMXNET3_MIN_MTU 60 + +#define VMXNET3_LINK_UP (10000 << 16 | 1) // 10 Gbps, up +#define VMXNET3_LINK_DOWN 0 + +#define VMXWIFI_DRIVER_SHARED_LEN 8192 + +#define VMXNET3_DID_PASSTHRU 0xFFFF + +#endif /* _VMXNET3_DEFS_H_ */ diff --git a/src/seastar/dpdk/drivers/net/vmxnet3/base/vmxnet3_osdep.h b/src/seastar/dpdk/drivers/net/vmxnet3/base/vmxnet3_osdep.h new file mode 100644 index 000000000..c9b92b049 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/vmxnet3/base/vmxnet3_osdep.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef _VMXNET3_OSDEP_H +#define _VMXNET3_OSDEP_H + +typedef uint64_t uint64; +typedef uint32_t uint32; +typedef uint16_t uint16; +typedef uint8_t uint8; +typedef int bool; +typedef char Bool; + +#ifndef UNLIKELY +#define UNLIKELY(x) __builtin_expect((x),0) +#endif /* unlikely */ + +#endif /* _VMXNET3_OSDEP_H */ diff --git a/src/seastar/dpdk/drivers/net/vmxnet3/meson.build b/src/seastar/dpdk/drivers/net/vmxnet3/meson.build new file mode 100644 index 000000000..a92bd2868 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/vmxnet3/meson.build @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Luca Boccassi <bluca@debian.org> + +allow_experimental_apis = true +sources += files( + 'vmxnet3_ethdev.c', + 'vmxnet3_rxtx.c', +) + +error_cflags = [ + '-Wno-unused-parameter', '-Wno-unused-value', + '-Wno-strict-aliasing', '-Wno-format-extra-args', +] +foreach flag: error_cflags + if cc.has_argument(flag) + cflags += flag + endif +endforeach diff --git a/src/seastar/dpdk/drivers/net/vmxnet3/rte_pmd_vmxnet3_version.map b/src/seastar/dpdk/drivers/net/vmxnet3/rte_pmd_vmxnet3_version.map new file mode 100644 index 000000000..ef3539840 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/vmxnet3/rte_pmd_vmxnet3_version.map @@ -0,0 +1,4 @@ +DPDK_2.0 { + + local: *; +}; diff --git a/src/seastar/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.c b/src/seastar/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.c new file mode 100644 index 000000000..f54536b49 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.c @@ -0,0 +1,1445 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2015 Intel Corporation + */ + +#include <sys/queue.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> +#include <stdarg.h> +#include <fcntl.h> +#include <inttypes.h> +#include <rte_byteorder.h> +#include <rte_common.h> +#include <rte_cycles.h> + +#include <rte_interrupts.h> +#include <rte_log.h> +#include <rte_debug.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_branch_prediction.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_alarm.h> +#include <rte_ether.h> +#include <rte_ethdev_driver.h> +#include <rte_ethdev_pci.h> +#include <rte_string_fns.h> +#include <rte_malloc.h> +#include <rte_dev.h> + +#include "base/vmxnet3_defs.h" + +#include "vmxnet3_ring.h" +#include "vmxnet3_logs.h" +#include "vmxnet3_ethdev.h" + +#define PROCESS_SYS_EVENTS 0 + +#define VMXNET3_TX_MAX_SEG UINT8_MAX + +#define VMXNET3_TX_OFFLOAD_CAP \ + (DEV_TX_OFFLOAD_VLAN_INSERT | \ + DEV_TX_OFFLOAD_IPV4_CKSUM | \ + DEV_TX_OFFLOAD_TCP_CKSUM | \ + DEV_TX_OFFLOAD_UDP_CKSUM | \ + DEV_TX_OFFLOAD_TCP_TSO | \ + DEV_TX_OFFLOAD_MULTI_SEGS) + +#define VMXNET3_RX_OFFLOAD_CAP \ + (DEV_RX_OFFLOAD_VLAN_STRIP | \ + DEV_RX_OFFLOAD_VLAN_FILTER | \ + DEV_RX_OFFLOAD_SCATTER | \ + DEV_RX_OFFLOAD_IPV4_CKSUM | \ + DEV_RX_OFFLOAD_UDP_CKSUM | \ + DEV_RX_OFFLOAD_TCP_CKSUM | \ + DEV_RX_OFFLOAD_TCP_LRO | \ + DEV_RX_OFFLOAD_JUMBO_FRAME) + +static int eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev); +static int eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev); +static int vmxnet3_dev_configure(struct rte_eth_dev *dev); +static int vmxnet3_dev_start(struct rte_eth_dev *dev); +static void vmxnet3_dev_stop(struct rte_eth_dev *dev); +static void vmxnet3_dev_close(struct rte_eth_dev *dev); +static void vmxnet3_dev_set_rxmode(struct vmxnet3_hw *hw, uint32_t feature, int set); +static void vmxnet3_dev_promiscuous_enable(struct rte_eth_dev *dev); +static void vmxnet3_dev_promiscuous_disable(struct rte_eth_dev *dev); +static void vmxnet3_dev_allmulticast_enable(struct rte_eth_dev *dev); +static void vmxnet3_dev_allmulticast_disable(struct rte_eth_dev *dev); +static int __vmxnet3_dev_link_update(struct rte_eth_dev *dev, + int wait_to_complete); +static int vmxnet3_dev_link_update(struct rte_eth_dev *dev, + int wait_to_complete); +static void vmxnet3_hw_stats_save(struct vmxnet3_hw *hw); +static int vmxnet3_dev_stats_get(struct rte_eth_dev *dev, + struct rte_eth_stats *stats); +static void vmxnet3_dev_stats_reset(struct rte_eth_dev *dev); +static int vmxnet3_dev_xstats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats, + unsigned int n); +static int vmxnet3_dev_xstats_get(struct rte_eth_dev *dev, + struct rte_eth_xstat *xstats, unsigned int n); +static void vmxnet3_dev_info_get(struct rte_eth_dev *dev, + struct rte_eth_dev_info *dev_info); +static const uint32_t * +vmxnet3_dev_supported_ptypes_get(struct rte_eth_dev *dev); +static int vmxnet3_dev_vlan_filter_set(struct rte_eth_dev *dev, + uint16_t vid, int on); +static int vmxnet3_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask); +static int vmxnet3_mac_addr_set(struct rte_eth_dev *dev, + struct ether_addr *mac_addr); +static void vmxnet3_interrupt_handler(void *param); + +int vmxnet3_logtype_init; +int vmxnet3_logtype_driver; + +/* + * The set of PCI devices this driver supports + */ +#define VMWARE_PCI_VENDOR_ID 0x15AD +#define VMWARE_DEV_ID_VMXNET3 0x07B0 +static const struct rte_pci_id pci_id_vmxnet3_map[] = { + { RTE_PCI_DEVICE(VMWARE_PCI_VENDOR_ID, VMWARE_DEV_ID_VMXNET3) }, + { .vendor_id = 0, /* sentinel */ }, +}; + +static const struct eth_dev_ops vmxnet3_eth_dev_ops = { + .dev_configure = vmxnet3_dev_configure, + .dev_start = vmxnet3_dev_start, + .dev_stop = vmxnet3_dev_stop, + .dev_close = vmxnet3_dev_close, + .promiscuous_enable = vmxnet3_dev_promiscuous_enable, + .promiscuous_disable = vmxnet3_dev_promiscuous_disable, + .allmulticast_enable = vmxnet3_dev_allmulticast_enable, + .allmulticast_disable = vmxnet3_dev_allmulticast_disable, + .link_update = vmxnet3_dev_link_update, + .stats_get = vmxnet3_dev_stats_get, + .xstats_get_names = vmxnet3_dev_xstats_get_names, + .xstats_get = vmxnet3_dev_xstats_get, + .stats_reset = vmxnet3_dev_stats_reset, + .mac_addr_set = vmxnet3_mac_addr_set, + .dev_infos_get = vmxnet3_dev_info_get, + .dev_supported_ptypes_get = vmxnet3_dev_supported_ptypes_get, + .vlan_filter_set = vmxnet3_dev_vlan_filter_set, + .vlan_offload_set = vmxnet3_dev_vlan_offload_set, + .rx_queue_setup = vmxnet3_dev_rx_queue_setup, + .rx_queue_release = vmxnet3_dev_rx_queue_release, + .tx_queue_setup = vmxnet3_dev_tx_queue_setup, + .tx_queue_release = vmxnet3_dev_tx_queue_release, +}; + +struct vmxnet3_xstats_name_off { + char name[RTE_ETH_XSTATS_NAME_SIZE]; + unsigned int offset; +}; + +/* tx_qX_ is prepended to the name string here */ +static const struct vmxnet3_xstats_name_off vmxnet3_txq_stat_strings[] = { + {"drop_total", offsetof(struct vmxnet3_txq_stats, drop_total)}, + {"drop_too_many_segs", offsetof(struct vmxnet3_txq_stats, drop_too_many_segs)}, + {"drop_tso", offsetof(struct vmxnet3_txq_stats, drop_tso)}, + {"tx_ring_full", offsetof(struct vmxnet3_txq_stats, tx_ring_full)}, +}; + +/* rx_qX_ is prepended to the name string here */ +static const struct vmxnet3_xstats_name_off vmxnet3_rxq_stat_strings[] = { + {"drop_total", offsetof(struct vmxnet3_rxq_stats, drop_total)}, + {"drop_err", offsetof(struct vmxnet3_rxq_stats, drop_err)}, + {"drop_fcs", offsetof(struct vmxnet3_rxq_stats, drop_fcs)}, + {"rx_buf_alloc_failure", offsetof(struct vmxnet3_rxq_stats, rx_buf_alloc_failure)}, +}; + +static const struct rte_memzone * +gpa_zone_reserve(struct rte_eth_dev *dev, uint32_t size, + const char *post_string, int socket_id, + uint16_t align, bool reuse) +{ + char z_name[RTE_MEMZONE_NAMESIZE]; + const struct rte_memzone *mz; + + snprintf(z_name, sizeof(z_name), "eth_p%d_%s", + dev->data->port_id, post_string); + + mz = rte_memzone_lookup(z_name); + if (!reuse) { + if (mz) + rte_memzone_free(mz); + return rte_memzone_reserve_aligned(z_name, size, socket_id, + RTE_MEMZONE_IOVA_CONTIG, align); + } + + if (mz) + return mz; + + return rte_memzone_reserve_aligned(z_name, size, socket_id, + RTE_MEMZONE_IOVA_CONTIG, align); +} + +/* + * This function is based on vmxnet3_disable_intr() + */ +static void +vmxnet3_disable_intr(struct vmxnet3_hw *hw) +{ + int i; + + PMD_INIT_FUNC_TRACE(); + + hw->shared->devRead.intrConf.intrCtrl |= VMXNET3_IC_DISABLE_ALL; + for (i = 0; i < hw->num_intrs; i++) + VMXNET3_WRITE_BAR0_REG(hw, VMXNET3_REG_IMR + i * 8, 1); +} + +static void +vmxnet3_enable_intr(struct vmxnet3_hw *hw) +{ + int i; + + PMD_INIT_FUNC_TRACE(); + + hw->shared->devRead.intrConf.intrCtrl &= ~VMXNET3_IC_DISABLE_ALL; + for (i = 0; i < hw->num_intrs; i++) + VMXNET3_WRITE_BAR0_REG(hw, VMXNET3_REG_IMR + i * 8, 0); +} + +/* + * Gets tx data ring descriptor size. + */ +static uint16_t +eth_vmxnet3_txdata_get(struct vmxnet3_hw *hw) +{ + uint16 txdata_desc_size; + + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_TXDATA_DESC_SIZE); + txdata_desc_size = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD); + + return (txdata_desc_size < VMXNET3_TXDATA_DESC_MIN_SIZE || + txdata_desc_size > VMXNET3_TXDATA_DESC_MAX_SIZE || + txdata_desc_size & VMXNET3_TXDATA_DESC_SIZE_MASK) ? + sizeof(struct Vmxnet3_TxDataDesc) : txdata_desc_size; +} + +/* + * It returns 0 on success. + */ +static int +eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev; + struct vmxnet3_hw *hw = eth_dev->data->dev_private; + uint32_t mac_hi, mac_lo, ver; + struct rte_eth_link link; + + PMD_INIT_FUNC_TRACE(); + + eth_dev->dev_ops = &vmxnet3_eth_dev_ops; + eth_dev->rx_pkt_burst = &vmxnet3_recv_pkts; + eth_dev->tx_pkt_burst = &vmxnet3_xmit_pkts; + eth_dev->tx_pkt_prepare = vmxnet3_prep_pkts; + pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); + + /* + * for secondary processes, we don't initialize any further as primary + * has already done this work. + */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + rte_eth_copy_pci_info(eth_dev, pci_dev); + + /* Vendor and Device ID need to be set before init of shared code */ + hw->device_id = pci_dev->id.device_id; + hw->vendor_id = pci_dev->id.vendor_id; + hw->hw_addr0 = (void *)pci_dev->mem_resource[0].addr; + hw->hw_addr1 = (void *)pci_dev->mem_resource[1].addr; + + hw->num_rx_queues = 1; + hw->num_tx_queues = 1; + hw->bufs_per_pkt = 1; + + /* Check h/w version compatibility with driver. */ + ver = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_VRRS); + PMD_INIT_LOG(DEBUG, "Hardware version : %d", ver); + + if (ver & (1 << VMXNET3_REV_4)) { + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_VRRS, + 1 << VMXNET3_REV_4); + hw->version = VMXNET3_REV_4 + 1; + } else if (ver & (1 << VMXNET3_REV_3)) { + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_VRRS, + 1 << VMXNET3_REV_3); + hw->version = VMXNET3_REV_3 + 1; + } else if (ver & (1 << VMXNET3_REV_2)) { + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_VRRS, + 1 << VMXNET3_REV_2); + hw->version = VMXNET3_REV_2 + 1; + } else if (ver & (1 << VMXNET3_REV_1)) { + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_VRRS, + 1 << VMXNET3_REV_1); + hw->version = VMXNET3_REV_1 + 1; + } else { + PMD_INIT_LOG(ERR, "Incompatible hardware version: %d", ver); + return -EIO; + } + + PMD_INIT_LOG(DEBUG, "Using device version %d\n", hw->version); + + /* Check UPT version compatibility with driver. */ + ver = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_UVRS); + PMD_INIT_LOG(DEBUG, "UPT hardware version : %d", ver); + if (ver & 0x1) + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_UVRS, 1); + else { + PMD_INIT_LOG(ERR, "Incompatible UPT version."); + return -EIO; + } + + /* Getting MAC Address */ + mac_lo = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_MACL); + mac_hi = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_MACH); + memcpy(hw->perm_addr, &mac_lo, 4); + memcpy(hw->perm_addr + 4, &mac_hi, 2); + + /* Allocate memory for storing MAC addresses */ + eth_dev->data->mac_addrs = rte_zmalloc("vmxnet3", ETHER_ADDR_LEN * + VMXNET3_MAX_MAC_ADDRS, 0); + if (eth_dev->data->mac_addrs == NULL) { + PMD_INIT_LOG(ERR, + "Failed to allocate %d bytes needed to store MAC addresses", + ETHER_ADDR_LEN * VMXNET3_MAX_MAC_ADDRS); + return -ENOMEM; + } + /* Copy the permanent MAC address */ + ether_addr_copy((struct ether_addr *) hw->perm_addr, + ð_dev->data->mac_addrs[0]); + + PMD_INIT_LOG(DEBUG, "MAC Address : %02x:%02x:%02x:%02x:%02x:%02x", + hw->perm_addr[0], hw->perm_addr[1], hw->perm_addr[2], + hw->perm_addr[3], hw->perm_addr[4], hw->perm_addr[5]); + + /* Flag to call rte_eth_dev_release_port() in rte_eth_dev_close(). */ + eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; + + /* Put device in Quiesce Mode */ + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_QUIESCE_DEV); + + /* allow untagged pkts */ + VMXNET3_SET_VFTABLE_ENTRY(hw->shadow_vfta, 0); + + hw->txdata_desc_size = VMXNET3_VERSION_GE_3(hw) ? + eth_vmxnet3_txdata_get(hw) : sizeof(struct Vmxnet3_TxDataDesc); + + hw->rxdata_desc_size = VMXNET3_VERSION_GE_3(hw) ? + VMXNET3_DEF_RXDATA_DESC_SIZE : 0; + RTE_ASSERT((hw->rxdata_desc_size & ~VMXNET3_RXDATA_DESC_SIZE_MASK) == + hw->rxdata_desc_size); + + /* clear shadow stats */ + memset(hw->saved_tx_stats, 0, sizeof(hw->saved_tx_stats)); + memset(hw->saved_rx_stats, 0, sizeof(hw->saved_rx_stats)); + + /* clear snapshot stats */ + memset(hw->snapshot_tx_stats, 0, sizeof(hw->snapshot_tx_stats)); + memset(hw->snapshot_rx_stats, 0, sizeof(hw->snapshot_rx_stats)); + + /* set the initial link status */ + memset(&link, 0, sizeof(link)); + link.link_duplex = ETH_LINK_FULL_DUPLEX; + link.link_speed = ETH_SPEED_NUM_10G; + link.link_autoneg = ETH_LINK_FIXED; + rte_eth_linkstatus_set(eth_dev, &link); + + return 0; +} + +static int +eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev) +{ + struct vmxnet3_hw *hw = eth_dev->data->dev_private; + + PMD_INIT_FUNC_TRACE(); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + if (hw->adapter_stopped == 0) { + PMD_INIT_LOG(DEBUG, "Device has not been closed."); + return -EBUSY; + } + + eth_dev->dev_ops = NULL; + eth_dev->rx_pkt_burst = NULL; + eth_dev->tx_pkt_burst = NULL; + eth_dev->tx_pkt_prepare = NULL; + + return 0; +} + +static int eth_vmxnet3_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + struct rte_pci_device *pci_dev) +{ + return rte_eth_dev_pci_generic_probe(pci_dev, + sizeof(struct vmxnet3_hw), eth_vmxnet3_dev_init); +} + +static int eth_vmxnet3_pci_remove(struct rte_pci_device *pci_dev) +{ + return rte_eth_dev_pci_generic_remove(pci_dev, eth_vmxnet3_dev_uninit); +} + +static struct rte_pci_driver rte_vmxnet3_pmd = { + .id_table = pci_id_vmxnet3_map, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_vmxnet3_pci_probe, + .remove = eth_vmxnet3_pci_remove, +}; + +static int +vmxnet3_dev_configure(struct rte_eth_dev *dev) +{ + const struct rte_memzone *mz; + struct vmxnet3_hw *hw = dev->data->dev_private; + size_t size; + + PMD_INIT_FUNC_TRACE(); + + if (dev->data->nb_tx_queues > VMXNET3_MAX_TX_QUEUES || + dev->data->nb_rx_queues > VMXNET3_MAX_RX_QUEUES) { + PMD_INIT_LOG(ERR, "ERROR: Number of queues not supported"); + return -EINVAL; + } + + if (!rte_is_power_of_2(dev->data->nb_rx_queues)) { + PMD_INIT_LOG(ERR, "ERROR: Number of rx queues not power of 2"); + return -EINVAL; + } + + size = dev->data->nb_rx_queues * sizeof(struct Vmxnet3_TxQueueDesc) + + dev->data->nb_tx_queues * sizeof(struct Vmxnet3_RxQueueDesc); + + if (size > UINT16_MAX) + return -EINVAL; + + hw->num_rx_queues = (uint8_t)dev->data->nb_rx_queues; + hw->num_tx_queues = (uint8_t)dev->data->nb_tx_queues; + + /* + * Allocate a memzone for Vmxnet3_DriverShared - Vmxnet3_DSDevRead + * on current socket + */ + mz = gpa_zone_reserve(dev, sizeof(struct Vmxnet3_DriverShared), + "shared", rte_socket_id(), 8, 1); + + if (mz == NULL) { + PMD_INIT_LOG(ERR, "ERROR: Creating shared zone"); + return -ENOMEM; + } + memset(mz->addr, 0, mz->len); + + hw->shared = mz->addr; + hw->sharedPA = mz->iova; + + /* + * Allocate a memzone for Vmxnet3_RxQueueDesc - Vmxnet3_TxQueueDesc + * on current socket. + * + * We cannot reuse this memzone from previous allocation as its size + * depends on the number of tx and rx queues, which could be different + * from one config to another. + */ + mz = gpa_zone_reserve(dev, size, "queuedesc", rte_socket_id(), + VMXNET3_QUEUE_DESC_ALIGN, 0); + if (mz == NULL) { + PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone"); + return -ENOMEM; + } + memset(mz->addr, 0, mz->len); + + hw->tqd_start = (Vmxnet3_TxQueueDesc *)mz->addr; + hw->rqd_start = (Vmxnet3_RxQueueDesc *)(hw->tqd_start + hw->num_tx_queues); + + hw->queueDescPA = mz->iova; + hw->queue_desc_len = (uint16_t)size; + + if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_RSS) { + /* Allocate memory structure for UPT1_RSSConf and configure */ + mz = gpa_zone_reserve(dev, sizeof(struct VMXNET3_RSSConf), + "rss_conf", rte_socket_id(), + RTE_CACHE_LINE_SIZE, 1); + if (mz == NULL) { + PMD_INIT_LOG(ERR, + "ERROR: Creating rss_conf structure zone"); + return -ENOMEM; + } + memset(mz->addr, 0, mz->len); + + hw->rss_conf = mz->addr; + hw->rss_confPA = mz->iova; + } + + return 0; +} + +static void +vmxnet3_write_mac(struct vmxnet3_hw *hw, const uint8_t *addr) +{ + uint32_t val; + + PMD_INIT_LOG(DEBUG, + "Writing MAC Address : %02x:%02x:%02x:%02x:%02x:%02x", + addr[0], addr[1], addr[2], + addr[3], addr[4], addr[5]); + + memcpy(&val, addr, 4); + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_MACL, val); + + memcpy(&val, addr + 4, 2); + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_MACH, val); +} + +static int +vmxnet3_dev_setup_memreg(struct rte_eth_dev *dev) +{ + struct vmxnet3_hw *hw = dev->data->dev_private; + Vmxnet3_DriverShared *shared = hw->shared; + Vmxnet3_CmdInfo *cmdInfo; + struct rte_mempool *mp[VMXNET3_MAX_RX_QUEUES]; + uint8_t index[VMXNET3_MAX_RX_QUEUES + VMXNET3_MAX_TX_QUEUES]; + uint32_t num, i, j, size; + + if (hw->memRegsPA == 0) { + const struct rte_memzone *mz; + + size = sizeof(Vmxnet3_MemRegs) + + (VMXNET3_MAX_RX_QUEUES + VMXNET3_MAX_TX_QUEUES) * + sizeof(Vmxnet3_MemoryRegion); + + mz = gpa_zone_reserve(dev, size, "memRegs", rte_socket_id(), 8, + 1); + if (mz == NULL) { + PMD_INIT_LOG(ERR, "ERROR: Creating memRegs zone"); + return -ENOMEM; + } + memset(mz->addr, 0, mz->len); + hw->memRegs = mz->addr; + hw->memRegsPA = mz->iova; + } + + num = hw->num_rx_queues; + + for (i = 0; i < num; i++) { + vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i]; + + mp[i] = rxq->mp; + index[i] = 1 << i; + } + + /* + * The same mempool could be used by multiple queues. In such a case, + * remove duplicate mempool entries. Only one entry is kept with + * bitmask indicating queues that are using this mempool. + */ + for (i = 1; i < num; i++) { + for (j = 0; j < i; j++) { + if (mp[i] == mp[j]) { + mp[i] = NULL; + index[j] |= 1 << i; + break; + } + } + } + + j = 0; + for (i = 0; i < num; i++) { + if (mp[i] == NULL) + continue; + + Vmxnet3_MemoryRegion *mr = &hw->memRegs->memRegs[j]; + + mr->startPA = + (uintptr_t)STAILQ_FIRST(&mp[i]->mem_list)->iova; + mr->length = STAILQ_FIRST(&mp[i]->mem_list)->len <= INT32_MAX ? + STAILQ_FIRST(&mp[i]->mem_list)->len : INT32_MAX; + mr->txQueueBits = index[i]; + mr->rxQueueBits = index[i]; + + PMD_INIT_LOG(INFO, + "index: %u startPA: %" PRIu64 " length: %u, " + "rxBits: %x", + j, mr->startPA, mr->length, mr->rxQueueBits); + j++; + } + hw->memRegs->numRegs = j; + PMD_INIT_LOG(INFO, "numRegs: %u", j); + + size = sizeof(Vmxnet3_MemRegs) + + (j - 1) * sizeof(Vmxnet3_MemoryRegion); + + cmdInfo = &shared->cu.cmdInfo; + cmdInfo->varConf.confVer = 1; + cmdInfo->varConf.confLen = size; + cmdInfo->varConf.confPA = hw->memRegsPA; + + return 0; +} + +static int +vmxnet3_setup_driver_shared(struct rte_eth_dev *dev) +{ + struct rte_eth_conf port_conf = dev->data->dev_conf; + struct vmxnet3_hw *hw = dev->data->dev_private; + uint32_t mtu = dev->data->mtu; + Vmxnet3_DriverShared *shared = hw->shared; + Vmxnet3_DSDevRead *devRead = &shared->devRead; + uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads; + uint32_t i; + int ret; + + hw->mtu = mtu; + + shared->magic = VMXNET3_REV1_MAGIC; + devRead->misc.driverInfo.version = VMXNET3_DRIVER_VERSION_NUM; + + /* Setting up Guest OS information */ + devRead->misc.driverInfo.gos.gosBits = sizeof(void *) == 4 ? + VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64; + devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX; + devRead->misc.driverInfo.vmxnet3RevSpt = 1; + devRead->misc.driverInfo.uptVerSpt = 1; + + devRead->misc.mtu = rte_le_to_cpu_32(mtu); + devRead->misc.queueDescPA = hw->queueDescPA; + devRead->misc.queueDescLen = hw->queue_desc_len; + devRead->misc.numTxQueues = hw->num_tx_queues; + devRead->misc.numRxQueues = hw->num_rx_queues; + + /* + * Set number of interrupts to 1 + * PMD by default disables all the interrupts but this is MUST + * to activate device. It needs at least one interrupt for + * link events to handle + */ + hw->num_intrs = devRead->intrConf.numIntrs = 1; + devRead->intrConf.intrCtrl |= VMXNET3_IC_DISABLE_ALL; + + for (i = 0; i < hw->num_tx_queues; i++) { + Vmxnet3_TxQueueDesc *tqd = &hw->tqd_start[i]; + vmxnet3_tx_queue_t *txq = dev->data->tx_queues[i]; + + txq->shared = &hw->tqd_start[i]; + + tqd->ctrl.txNumDeferred = 0; + tqd->ctrl.txThreshold = 1; + tqd->conf.txRingBasePA = txq->cmd_ring.basePA; + tqd->conf.compRingBasePA = txq->comp_ring.basePA; + tqd->conf.dataRingBasePA = txq->data_ring.basePA; + + tqd->conf.txRingSize = txq->cmd_ring.size; + tqd->conf.compRingSize = txq->comp_ring.size; + tqd->conf.dataRingSize = txq->data_ring.size; + tqd->conf.txDataRingDescSize = txq->txdata_desc_size; + tqd->conf.intrIdx = txq->comp_ring.intr_idx; + tqd->status.stopped = TRUE; + tqd->status.error = 0; + memset(&tqd->stats, 0, sizeof(tqd->stats)); + } + + for (i = 0; i < hw->num_rx_queues; i++) { + Vmxnet3_RxQueueDesc *rqd = &hw->rqd_start[i]; + vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i]; + + rxq->shared = &hw->rqd_start[i]; + + rqd->conf.rxRingBasePA[0] = rxq->cmd_ring[0].basePA; + rqd->conf.rxRingBasePA[1] = rxq->cmd_ring[1].basePA; + rqd->conf.compRingBasePA = rxq->comp_ring.basePA; + + rqd->conf.rxRingSize[0] = rxq->cmd_ring[0].size; + rqd->conf.rxRingSize[1] = rxq->cmd_ring[1].size; + rqd->conf.compRingSize = rxq->comp_ring.size; + rqd->conf.intrIdx = rxq->comp_ring.intr_idx; + if (VMXNET3_VERSION_GE_3(hw)) { + rqd->conf.rxDataRingBasePA = rxq->data_ring.basePA; + rqd->conf.rxDataRingDescSize = rxq->data_desc_size; + } + rqd->status.stopped = TRUE; + rqd->status.error = 0; + memset(&rqd->stats, 0, sizeof(rqd->stats)); + } + + /* RxMode set to 0 of VMXNET3_RXM_xxx */ + devRead->rxFilterConf.rxMode = 0; + + /* Setting up feature flags */ + if (rx_offloads & DEV_RX_OFFLOAD_CHECKSUM) + devRead->misc.uptFeatures |= VMXNET3_F_RXCSUM; + + if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) { + devRead->misc.uptFeatures |= VMXNET3_F_LRO; + devRead->misc.maxNumRxSG = 0; + } + + if (port_conf.rxmode.mq_mode == ETH_MQ_RX_RSS) { + ret = vmxnet3_rss_configure(dev); + if (ret != VMXNET3_SUCCESS) + return ret; + + devRead->misc.uptFeatures |= VMXNET3_F_RSS; + devRead->rssConfDesc.confVer = 1; + devRead->rssConfDesc.confLen = sizeof(struct VMXNET3_RSSConf); + devRead->rssConfDesc.confPA = hw->rss_confPA; + } + + ret = vmxnet3_dev_vlan_offload_set(dev, + ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK); + if (ret) + return ret; + + vmxnet3_write_mac(hw, dev->data->mac_addrs->addr_bytes); + + return VMXNET3_SUCCESS; +} + +/* + * Configure device link speed and setup link. + * Must be called after eth_vmxnet3_dev_init. Other wise it might fail + * It returns 0 on success. + */ +static int +vmxnet3_dev_start(struct rte_eth_dev *dev) +{ + int ret; + struct vmxnet3_hw *hw = dev->data->dev_private; + + PMD_INIT_FUNC_TRACE(); + + /* Save stats before it is reset by CMD_ACTIVATE */ + vmxnet3_hw_stats_save(hw); + + ret = vmxnet3_setup_driver_shared(dev); + if (ret != VMXNET3_SUCCESS) + return ret; + + /* check if lsc interrupt feature is enabled */ + if (dev->data->dev_conf.intr_conf.lsc) { + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device); + + /* Setup interrupt callback */ + rte_intr_callback_register(&pci_dev->intr_handle, + vmxnet3_interrupt_handler, dev); + + if (rte_intr_enable(&pci_dev->intr_handle) < 0) { + PMD_INIT_LOG(ERR, "interrupt enable failed"); + return -EIO; + } + } + + /* Exchange shared data with device */ + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_DSAL, + VMXNET3_GET_ADDR_LO(hw->sharedPA)); + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_DSAH, + VMXNET3_GET_ADDR_HI(hw->sharedPA)); + + /* Activate device by register write */ + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_ACTIVATE_DEV); + ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD); + + if (ret != 0) { + PMD_INIT_LOG(ERR, "Device activation: UNSUCCESSFUL"); + return -EINVAL; + } + + /* Setup memory region for rx buffers */ + ret = vmxnet3_dev_setup_memreg(dev); + if (ret == 0) { + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, + VMXNET3_CMD_REGISTER_MEMREGS); + ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD); + if (ret != 0) + PMD_INIT_LOG(DEBUG, + "Failed in setup memory region cmd\n"); + ret = 0; + } else { + PMD_INIT_LOG(DEBUG, "Failed to setup memory region\n"); + } + + if (VMXNET3_VERSION_GE_4(hw)) { + /* Check for additional RSS */ + ret = vmxnet3_v4_rss_configure(dev); + if (ret != VMXNET3_SUCCESS) { + PMD_INIT_LOG(ERR, "Failed to configure v4 RSS"); + return ret; + } + } + + /* Disable interrupts */ + vmxnet3_disable_intr(hw); + + /* + * Load RX queues with blank mbufs and update next2fill index for device + * Update RxMode of the device + */ + ret = vmxnet3_dev_rxtx_init(dev); + if (ret != VMXNET3_SUCCESS) { + PMD_INIT_LOG(ERR, "Device queue init: UNSUCCESSFUL"); + return ret; + } + + hw->adapter_stopped = FALSE; + + /* Setting proper Rx Mode and issue Rx Mode Update command */ + vmxnet3_dev_set_rxmode(hw, VMXNET3_RXM_UCAST | VMXNET3_RXM_BCAST, 1); + + if (dev->data->dev_conf.intr_conf.lsc) { + vmxnet3_enable_intr(hw); + + /* + * Update link state from device since this won't be + * done upon starting with lsc in use. This is done + * only after enabling interrupts to avoid any race + * where the link state could change without an + * interrupt being fired. + */ + __vmxnet3_dev_link_update(dev, 0); + } + + return VMXNET3_SUCCESS; +} + +/* + * Stop device: disable rx and tx functions to allow for reconfiguring. + */ +static void +vmxnet3_dev_stop(struct rte_eth_dev *dev) +{ + struct rte_eth_link link; + struct vmxnet3_hw *hw = dev->data->dev_private; + + PMD_INIT_FUNC_TRACE(); + + if (hw->adapter_stopped == 1) { + PMD_INIT_LOG(DEBUG, "Device already stopped."); + return; + } + + /* disable interrupts */ + vmxnet3_disable_intr(hw); + + if (dev->data->dev_conf.intr_conf.lsc) { + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device); + + rte_intr_disable(&pci_dev->intr_handle); + + rte_intr_callback_unregister(&pci_dev->intr_handle, + vmxnet3_interrupt_handler, dev); + } + + /* quiesce the device first */ + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_QUIESCE_DEV); + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_DSAL, 0); + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_DSAH, 0); + + /* reset the device */ + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV); + PMD_INIT_LOG(DEBUG, "Device reset."); + + vmxnet3_dev_clear_queues(dev); + + /* Clear recorded link status */ + memset(&link, 0, sizeof(link)); + link.link_duplex = ETH_LINK_FULL_DUPLEX; + link.link_speed = ETH_SPEED_NUM_10G; + link.link_autoneg = ETH_LINK_FIXED; + rte_eth_linkstatus_set(dev, &link); + + hw->adapter_stopped = 1; +} + +static void +vmxnet3_free_queues(struct rte_eth_dev *dev) +{ + int i; + + PMD_INIT_FUNC_TRACE(); + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + void *rxq = dev->data->rx_queues[i]; + + vmxnet3_dev_rx_queue_release(rxq); + } + dev->data->nb_rx_queues = 0; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + void *txq = dev->data->tx_queues[i]; + + vmxnet3_dev_tx_queue_release(txq); + } + dev->data->nb_tx_queues = 0; +} + +/* + * Reset and stop device. + */ +static void +vmxnet3_dev_close(struct rte_eth_dev *dev) +{ + PMD_INIT_FUNC_TRACE(); + + vmxnet3_dev_stop(dev); + vmxnet3_free_queues(dev); +} + +static void +vmxnet3_hw_tx_stats_get(struct vmxnet3_hw *hw, unsigned int q, + struct UPT1_TxStats *res) +{ +#define VMXNET3_UPDATE_TX_STAT(h, i, f, r) \ + ((r)->f = (h)->tqd_start[(i)].stats.f + \ + (h)->saved_tx_stats[(i)].f) + + VMXNET3_UPDATE_TX_STAT(hw, q, ucastPktsTxOK, res); + VMXNET3_UPDATE_TX_STAT(hw, q, mcastPktsTxOK, res); + VMXNET3_UPDATE_TX_STAT(hw, q, bcastPktsTxOK, res); + VMXNET3_UPDATE_TX_STAT(hw, q, ucastBytesTxOK, res); + VMXNET3_UPDATE_TX_STAT(hw, q, mcastBytesTxOK, res); + VMXNET3_UPDATE_TX_STAT(hw, q, bcastBytesTxOK, res); + VMXNET3_UPDATE_TX_STAT(hw, q, pktsTxError, res); + VMXNET3_UPDATE_TX_STAT(hw, q, pktsTxDiscard, res); + +#undef VMXNET3_UPDATE_TX_STAT +} + +static void +vmxnet3_hw_rx_stats_get(struct vmxnet3_hw *hw, unsigned int q, + struct UPT1_RxStats *res) +{ +#define VMXNET3_UPDATE_RX_STAT(h, i, f, r) \ + ((r)->f = (h)->rqd_start[(i)].stats.f + \ + (h)->saved_rx_stats[(i)].f) + + VMXNET3_UPDATE_RX_STAT(hw, q, ucastPktsRxOK, res); + VMXNET3_UPDATE_RX_STAT(hw, q, mcastPktsRxOK, res); + VMXNET3_UPDATE_RX_STAT(hw, q, bcastPktsRxOK, res); + VMXNET3_UPDATE_RX_STAT(hw, q, ucastBytesRxOK, res); + VMXNET3_UPDATE_RX_STAT(hw, q, mcastBytesRxOK, res); + VMXNET3_UPDATE_RX_STAT(hw, q, bcastBytesRxOK, res); + VMXNET3_UPDATE_RX_STAT(hw, q, pktsRxError, res); + VMXNET3_UPDATE_RX_STAT(hw, q, pktsRxOutOfBuf, res); + +#undef VMXNET3_UPDATE_RX_STAT +} + +static void +vmxnet3_tx_stats_get(struct vmxnet3_hw *hw, unsigned int q, + struct UPT1_TxStats *res) +{ + vmxnet3_hw_tx_stats_get(hw, q, res); + +#define VMXNET3_REDUCE_SNAPSHOT_TX_STAT(h, i, f, r) \ + ((r)->f -= (h)->snapshot_tx_stats[(i)].f) + + VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, ucastPktsTxOK, res); + VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, mcastPktsTxOK, res); + VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, bcastPktsTxOK, res); + VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, ucastBytesTxOK, res); + VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, mcastBytesTxOK, res); + VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, bcastBytesTxOK, res); + VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, pktsTxError, res); + VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, pktsTxDiscard, res); + +#undef VMXNET3_REDUCE_SNAPSHOT_TX_STAT +} + +static void +vmxnet3_rx_stats_get(struct vmxnet3_hw *hw, unsigned int q, + struct UPT1_RxStats *res) +{ + vmxnet3_hw_rx_stats_get(hw, q, res); + +#define VMXNET3_REDUCE_SNAPSHOT_RX_STAT(h, i, f, r) \ + ((r)->f -= (h)->snapshot_rx_stats[(i)].f) + + VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, ucastPktsRxOK, res); + VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, mcastPktsRxOK, res); + VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, bcastPktsRxOK, res); + VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, ucastBytesRxOK, res); + VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, mcastBytesRxOK, res); + VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, bcastBytesRxOK, res); + VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, pktsRxError, res); + VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, pktsRxOutOfBuf, res); + +#undef VMXNET3_REDUCE_SNAPSHOT_RX_STAT +} + +static void +vmxnet3_hw_stats_save(struct vmxnet3_hw *hw) +{ + unsigned int i; + + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_GET_STATS); + + RTE_BUILD_BUG_ON(RTE_ETHDEV_QUEUE_STAT_CNTRS < VMXNET3_MAX_TX_QUEUES); + + for (i = 0; i < hw->num_tx_queues; i++) + vmxnet3_hw_tx_stats_get(hw, i, &hw->saved_tx_stats[i]); + for (i = 0; i < hw->num_rx_queues; i++) + vmxnet3_hw_rx_stats_get(hw, i, &hw->saved_rx_stats[i]); +} + +static int +vmxnet3_dev_xstats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, + unsigned int n) +{ + unsigned int i, t, count = 0; + unsigned int nstats = + dev->data->nb_tx_queues * RTE_DIM(vmxnet3_txq_stat_strings) + + dev->data->nb_rx_queues * RTE_DIM(vmxnet3_rxq_stat_strings); + + if (!xstats_names || n < nstats) + return nstats; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + if (!dev->data->rx_queues[i]) + continue; + + for (t = 0; t < RTE_DIM(vmxnet3_rxq_stat_strings); t++) { + snprintf(xstats_names[count].name, + sizeof(xstats_names[count].name), + "rx_q%u_%s", i, + vmxnet3_rxq_stat_strings[t].name); + count++; + } + } + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + if (!dev->data->tx_queues[i]) + continue; + + for (t = 0; t < RTE_DIM(vmxnet3_txq_stat_strings); t++) { + snprintf(xstats_names[count].name, + sizeof(xstats_names[count].name), + "tx_q%u_%s", i, + vmxnet3_txq_stat_strings[t].name); + count++; + } + } + + return count; +} + +static int +vmxnet3_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, + unsigned int n) +{ + unsigned int i, t, count = 0; + unsigned int nstats = + dev->data->nb_tx_queues * RTE_DIM(vmxnet3_txq_stat_strings) + + dev->data->nb_rx_queues * RTE_DIM(vmxnet3_rxq_stat_strings); + + if (n < nstats) + return nstats; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i]; + + if (rxq == NULL) + continue; + + for (t = 0; t < RTE_DIM(vmxnet3_rxq_stat_strings); t++) { + xstats[count].value = *(uint64_t *)(((char *)&rxq->stats) + + vmxnet3_rxq_stat_strings[t].offset); + xstats[count].id = count; + count++; + } + } + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i]; + + if (txq == NULL) + continue; + + for (t = 0; t < RTE_DIM(vmxnet3_txq_stat_strings); t++) { + xstats[count].value = *(uint64_t *)(((char *)&txq->stats) + + vmxnet3_txq_stat_strings[t].offset); + xstats[count].id = count; + count++; + } + } + + return count; +} + +static int +vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) +{ + unsigned int i; + struct vmxnet3_hw *hw = dev->data->dev_private; + struct UPT1_TxStats txStats; + struct UPT1_RxStats rxStats; + + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_GET_STATS); + + RTE_BUILD_BUG_ON(RTE_ETHDEV_QUEUE_STAT_CNTRS < VMXNET3_MAX_TX_QUEUES); + for (i = 0; i < hw->num_tx_queues; i++) { + vmxnet3_tx_stats_get(hw, i, &txStats); + + stats->q_opackets[i] = txStats.ucastPktsTxOK + + txStats.mcastPktsTxOK + + txStats.bcastPktsTxOK; + + stats->q_obytes[i] = txStats.ucastBytesTxOK + + txStats.mcastBytesTxOK + + txStats.bcastBytesTxOK; + + stats->opackets += stats->q_opackets[i]; + stats->obytes += stats->q_obytes[i]; + stats->oerrors += txStats.pktsTxError + txStats.pktsTxDiscard; + } + + RTE_BUILD_BUG_ON(RTE_ETHDEV_QUEUE_STAT_CNTRS < VMXNET3_MAX_RX_QUEUES); + for (i = 0; i < hw->num_rx_queues; i++) { + vmxnet3_rx_stats_get(hw, i, &rxStats); + + stats->q_ipackets[i] = rxStats.ucastPktsRxOK + + rxStats.mcastPktsRxOK + + rxStats.bcastPktsRxOK; + + stats->q_ibytes[i] = rxStats.ucastBytesRxOK + + rxStats.mcastBytesRxOK + + rxStats.bcastBytesRxOK; + + stats->ipackets += stats->q_ipackets[i]; + stats->ibytes += stats->q_ibytes[i]; + + stats->q_errors[i] = rxStats.pktsRxError; + stats->ierrors += rxStats.pktsRxError; + stats->imissed += rxStats.pktsRxOutOfBuf; + } + + return 0; +} + +static void +vmxnet3_dev_stats_reset(struct rte_eth_dev *dev) +{ + unsigned int i; + struct vmxnet3_hw *hw = dev->data->dev_private; + struct UPT1_TxStats txStats; + struct UPT1_RxStats rxStats; + + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_GET_STATS); + + RTE_BUILD_BUG_ON(RTE_ETHDEV_QUEUE_STAT_CNTRS < VMXNET3_MAX_TX_QUEUES); + + for (i = 0; i < hw->num_tx_queues; i++) { + vmxnet3_hw_tx_stats_get(hw, i, &txStats); + memcpy(&hw->snapshot_tx_stats[i], &txStats, + sizeof(hw->snapshot_tx_stats[0])); + } + for (i = 0; i < hw->num_rx_queues; i++) { + vmxnet3_hw_rx_stats_get(hw, i, &rxStats); + memcpy(&hw->snapshot_rx_stats[i], &rxStats, + sizeof(hw->snapshot_rx_stats[0])); + } +} + +static void +vmxnet3_dev_info_get(struct rte_eth_dev *dev __rte_unused, + struct rte_eth_dev_info *dev_info) +{ + struct vmxnet3_hw *hw = dev->data->dev_private; + + dev_info->max_rx_queues = VMXNET3_MAX_RX_QUEUES; + dev_info->max_tx_queues = VMXNET3_MAX_TX_QUEUES; + dev_info->min_rx_bufsize = 1518 + RTE_PKTMBUF_HEADROOM; + dev_info->max_rx_pktlen = 16384; /* includes CRC, cf MAXFRS register */ + dev_info->speed_capa = ETH_LINK_SPEED_10G; + dev_info->max_mac_addrs = VMXNET3_MAX_MAC_ADDRS; + + dev_info->flow_type_rss_offloads = VMXNET3_RSS_OFFLOAD_ALL; + + if (VMXNET3_VERSION_GE_4(hw)) { + dev_info->flow_type_rss_offloads |= VMXNET3_V4_RSS_MASK; + } + + dev_info->rx_desc_lim = (struct rte_eth_desc_lim) { + .nb_max = VMXNET3_RX_RING_MAX_SIZE, + .nb_min = VMXNET3_DEF_RX_RING_SIZE, + .nb_align = 1, + }; + + dev_info->tx_desc_lim = (struct rte_eth_desc_lim) { + .nb_max = VMXNET3_TX_RING_MAX_SIZE, + .nb_min = VMXNET3_DEF_TX_RING_SIZE, + .nb_align = 1, + .nb_seg_max = VMXNET3_TX_MAX_SEG, + .nb_mtu_seg_max = VMXNET3_MAX_TXD_PER_PKT, + }; + + dev_info->rx_offload_capa = VMXNET3_RX_OFFLOAD_CAP; + dev_info->rx_queue_offload_capa = 0; + dev_info->tx_offload_capa = VMXNET3_TX_OFFLOAD_CAP; + dev_info->tx_queue_offload_capa = 0; +} + +static const uint32_t * +vmxnet3_dev_supported_ptypes_get(struct rte_eth_dev *dev) +{ + static const uint32_t ptypes[] = { + RTE_PTYPE_L3_IPV4_EXT, + RTE_PTYPE_L3_IPV4, + RTE_PTYPE_UNKNOWN + }; + + if (dev->rx_pkt_burst == vmxnet3_recv_pkts) + return ptypes; + return NULL; +} + +static int +vmxnet3_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr) +{ + struct vmxnet3_hw *hw = dev->data->dev_private; + + ether_addr_copy(mac_addr, (struct ether_addr *)(hw->perm_addr)); + vmxnet3_write_mac(hw, mac_addr->addr_bytes); + return 0; +} + +/* return 0 means link status changed, -1 means not changed */ +static int +__vmxnet3_dev_link_update(struct rte_eth_dev *dev, + __rte_unused int wait_to_complete) +{ + struct vmxnet3_hw *hw = dev->data->dev_private; + struct rte_eth_link link; + uint32_t ret; + + memset(&link, 0, sizeof(link)); + + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK); + ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD); + + if (ret & 0x1) + link.link_status = ETH_LINK_UP; + link.link_duplex = ETH_LINK_FULL_DUPLEX; + link.link_speed = ETH_SPEED_NUM_10G; + link.link_autoneg = ETH_LINK_FIXED; + + return rte_eth_linkstatus_set(dev, &link); +} + +static int +vmxnet3_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete) +{ + /* Link status doesn't change for stopped dev */ + if (dev->data->dev_started == 0) + return -1; + + return __vmxnet3_dev_link_update(dev, wait_to_complete); +} + +/* Updating rxmode through Vmxnet3_DriverShared structure in adapter */ +static void +vmxnet3_dev_set_rxmode(struct vmxnet3_hw *hw, uint32_t feature, int set) +{ + struct Vmxnet3_RxFilterConf *rxConf = &hw->shared->devRead.rxFilterConf; + + if (set) + rxConf->rxMode = rxConf->rxMode | feature; + else + rxConf->rxMode = rxConf->rxMode & (~feature); + + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_UPDATE_RX_MODE); +} + +/* Promiscuous supported only if Vmxnet3_DriverShared is initialized in adapter */ +static void +vmxnet3_dev_promiscuous_enable(struct rte_eth_dev *dev) +{ + struct vmxnet3_hw *hw = dev->data->dev_private; + uint32_t *vf_table = hw->shared->devRead.rxFilterConf.vfTable; + + memset(vf_table, 0, VMXNET3_VFT_TABLE_SIZE); + vmxnet3_dev_set_rxmode(hw, VMXNET3_RXM_PROMISC, 1); + + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_VLAN_FILTERS); +} + +/* Promiscuous supported only if Vmxnet3_DriverShared is initialized in adapter */ +static void +vmxnet3_dev_promiscuous_disable(struct rte_eth_dev *dev) +{ + struct vmxnet3_hw *hw = dev->data->dev_private; + uint32_t *vf_table = hw->shared->devRead.rxFilterConf.vfTable; + uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads; + + if (rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER) + memcpy(vf_table, hw->shadow_vfta, VMXNET3_VFT_TABLE_SIZE); + else + memset(vf_table, 0xff, VMXNET3_VFT_TABLE_SIZE); + vmxnet3_dev_set_rxmode(hw, VMXNET3_RXM_PROMISC, 0); + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_VLAN_FILTERS); +} + +/* Allmulticast supported only if Vmxnet3_DriverShared is initialized in adapter */ +static void +vmxnet3_dev_allmulticast_enable(struct rte_eth_dev *dev) +{ + struct vmxnet3_hw *hw = dev->data->dev_private; + + vmxnet3_dev_set_rxmode(hw, VMXNET3_RXM_ALL_MULTI, 1); +} + +/* Allmulticast supported only if Vmxnet3_DriverShared is initialized in adapter */ +static void +vmxnet3_dev_allmulticast_disable(struct rte_eth_dev *dev) +{ + struct vmxnet3_hw *hw = dev->data->dev_private; + + vmxnet3_dev_set_rxmode(hw, VMXNET3_RXM_ALL_MULTI, 0); +} + +/* Enable/disable filter on vlan */ +static int +vmxnet3_dev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vid, int on) +{ + struct vmxnet3_hw *hw = dev->data->dev_private; + struct Vmxnet3_RxFilterConf *rxConf = &hw->shared->devRead.rxFilterConf; + uint32_t *vf_table = rxConf->vfTable; + + /* save state for restore */ + if (on) + VMXNET3_SET_VFTABLE_ENTRY(hw->shadow_vfta, vid); + else + VMXNET3_CLEAR_VFTABLE_ENTRY(hw->shadow_vfta, vid); + + /* don't change active filter if in promiscuous mode */ + if (rxConf->rxMode & VMXNET3_RXM_PROMISC) + return 0; + + /* set in hardware */ + if (on) + VMXNET3_SET_VFTABLE_ENTRY(vf_table, vid); + else + VMXNET3_CLEAR_VFTABLE_ENTRY(vf_table, vid); + + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_VLAN_FILTERS); + return 0; +} + +static int +vmxnet3_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask) +{ + struct vmxnet3_hw *hw = dev->data->dev_private; + Vmxnet3_DSDevRead *devRead = &hw->shared->devRead; + uint32_t *vf_table = devRead->rxFilterConf.vfTable; + uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads; + + if (mask & ETH_VLAN_STRIP_MASK) { + if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP) + devRead->misc.uptFeatures |= UPT1_F_RXVLAN; + else + devRead->misc.uptFeatures &= ~UPT1_F_RXVLAN; + + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_FEATURE); + } + + if (mask & ETH_VLAN_FILTER_MASK) { + if (rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER) + memcpy(vf_table, hw->shadow_vfta, VMXNET3_VFT_TABLE_SIZE); + else + memset(vf_table, 0xff, VMXNET3_VFT_TABLE_SIZE); + + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_VLAN_FILTERS); + } + + return 0; +} + +static void +vmxnet3_process_events(struct rte_eth_dev *dev) +{ + struct vmxnet3_hw *hw = dev->data->dev_private; + uint32_t events = hw->shared->ecr; + + if (!events) + return; + + /* + * ECR bits when written with 1b are cleared. Hence write + * events back to ECR so that the bits which were set will be reset. + */ + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_ECR, events); + + /* Check if link state has changed */ + if (events & VMXNET3_ECR_LINK) { + PMD_DRV_LOG(DEBUG, "Process events: VMXNET3_ECR_LINK event"); + if (vmxnet3_dev_link_update(dev, 0) == 0) + _rte_eth_dev_callback_process(dev, + RTE_ETH_EVENT_INTR_LSC, + NULL); + } + + /* Check if there is an error on xmit/recv queues */ + if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) { + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_QUEUE_STATUS); + + if (hw->tqd_start->status.stopped) + PMD_DRV_LOG(ERR, "tq error 0x%x", + hw->tqd_start->status.error); + + if (hw->rqd_start->status.stopped) + PMD_DRV_LOG(ERR, "rq error 0x%x", + hw->rqd_start->status.error); + + /* Reset the device */ + /* Have to reset the device */ + } + + if (events & VMXNET3_ECR_DIC) + PMD_DRV_LOG(DEBUG, "Device implementation change event."); + + if (events & VMXNET3_ECR_DEBUG) + PMD_DRV_LOG(DEBUG, "Debug event generated by device."); +} + +static void +vmxnet3_interrupt_handler(void *param) +{ + struct rte_eth_dev *dev = param; + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device); + + vmxnet3_process_events(dev); + + if (rte_intr_enable(&pci_dev->intr_handle) < 0) + PMD_DRV_LOG(ERR, "interrupt enable failed"); +} + +RTE_PMD_REGISTER_PCI(net_vmxnet3, rte_vmxnet3_pmd); +RTE_PMD_REGISTER_PCI_TABLE(net_vmxnet3, pci_id_vmxnet3_map); +RTE_PMD_REGISTER_KMOD_DEP(net_vmxnet3, "* igb_uio | uio_pci_generic | vfio-pci"); + +RTE_INIT(vmxnet3_init_log) +{ + vmxnet3_logtype_init = rte_log_register("pmd.net.vmxnet3.init"); + if (vmxnet3_logtype_init >= 0) + rte_log_set_level(vmxnet3_logtype_init, RTE_LOG_NOTICE); + vmxnet3_logtype_driver = rte_log_register("pmd.net.vmxnet3.driver"); + if (vmxnet3_logtype_driver >= 0) + rte_log_set_level(vmxnet3_logtype_driver, RTE_LOG_NOTICE); +} diff --git a/src/seastar/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.h b/src/seastar/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.h new file mode 100644 index 000000000..319d73926 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.h @@ -0,0 +1,192 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef _VMXNET3_ETHDEV_H_ +#define _VMXNET3_ETHDEV_H_ + +#include <rte_io.h> + +#define VMXNET3_MAX_MAC_ADDRS 1 + +/* UPT feature to negotiate */ +#define VMXNET3_F_RXCSUM 0x0001 +#define VMXNET3_F_RSS 0x0002 +#define VMXNET3_F_RXVLAN 0x0004 +#define VMXNET3_F_LRO 0x0008 + +/* Hash Types supported by device */ +#define VMXNET3_RSS_HASH_TYPE_NONE 0x0 +#define VMXNET3_RSS_HASH_TYPE_IPV4 0x01 +#define VMXNET3_RSS_HASH_TYPE_TCP_IPV4 0x02 +#define VMXNET3_RSS_HASH_TYPE_IPV6 0x04 +#define VMXNET3_RSS_HASH_TYPE_TCP_IPV6 0x08 + +#define VMXNET3_RSS_HASH_FUNC_NONE 0x0 +#define VMXNET3_RSS_HASH_FUNC_TOEPLITZ 0x01 + +#define VMXNET3_RSS_MAX_KEY_SIZE 40 +#define VMXNET3_RSS_MAX_IND_TABLE_SIZE 128 + +#define VMXNET3_RSS_OFFLOAD_ALL ( \ + ETH_RSS_IPV4 | \ + ETH_RSS_NONFRAG_IPV4_TCP | \ + ETH_RSS_IPV6 | \ + ETH_RSS_NONFRAG_IPV6_TCP) + +#define VMXNET3_V4_RSS_MASK ( \ + ETH_RSS_NONFRAG_IPV4_UDP | \ + ETH_RSS_NONFRAG_IPV6_UDP) + +/* RSS configuration structure - shared with device through GPA */ +typedef struct VMXNET3_RSSConf { + uint16_t hashType; + uint16_t hashFunc; + uint16_t hashKeySize; + uint16_t indTableSize; + uint8_t hashKey[VMXNET3_RSS_MAX_KEY_SIZE]; + /* + * indTable is only element that can be changed without + * device quiesce-reset-update-activation cycle + */ + uint8_t indTable[VMXNET3_RSS_MAX_IND_TABLE_SIZE]; +} VMXNET3_RSSConf; + +typedef struct vmxnet3_mf_table { + void *mfTableBase; /* Multicast addresses list */ + uint64_t mfTablePA; /* Physical address of the list */ + uint16_t num_addrs; /* number of multicast addrs */ +} vmxnet3_mf_table_t; + +struct vmxnet3_hw { + uint8_t *hw_addr0; /* BAR0: PT-Passthrough Regs */ + uint8_t *hw_addr1; /* BAR1: VD-Virtual Device Regs */ + /* BAR2: MSI-X Regs */ + /* BAR3: Port IO */ + void *back; + + uint16_t device_id; + uint16_t vendor_id; + uint16_t subsystem_device_id; + uint16_t subsystem_vendor_id; + bool adapter_stopped; + + uint8_t perm_addr[ETHER_ADDR_LEN]; + uint8_t num_tx_queues; + uint8_t num_rx_queues; + uint8_t bufs_per_pkt; + + uint8_t version; + + uint16_t txdata_desc_size; /* tx data ring buffer size */ + uint16_t rxdata_desc_size; /* rx data ring buffer size */ + + uint8_t num_intrs; + + Vmxnet3_TxQueueDesc *tqd_start; /* start address of all tx queue desc */ + Vmxnet3_RxQueueDesc *rqd_start; /* start address of all rx queue desc */ + + Vmxnet3_DriverShared *shared; + uint64_t sharedPA; + + uint64_t queueDescPA; + uint16_t queue_desc_len; + uint16_t mtu; + + VMXNET3_RSSConf *rss_conf; + uint64_t rss_confPA; + vmxnet3_mf_table_t *mf_table; + uint32_t shadow_vfta[VMXNET3_VFT_SIZE]; + Vmxnet3_MemRegs *memRegs; + uint64_t memRegsPA; +#define VMXNET3_VFT_TABLE_SIZE (VMXNET3_VFT_SIZE * sizeof(uint32_t)) + UPT1_TxStats saved_tx_stats[VMXNET3_MAX_TX_QUEUES]; + UPT1_RxStats saved_rx_stats[VMXNET3_MAX_RX_QUEUES]; + + UPT1_TxStats snapshot_tx_stats[VMXNET3_MAX_TX_QUEUES]; + UPT1_RxStats snapshot_rx_stats[VMXNET3_MAX_RX_QUEUES]; +}; + +#define VMXNET3_REV_4 3 /* Vmxnet3 Rev. 4 */ +#define VMXNET3_REV_3 2 /* Vmxnet3 Rev. 3 */ +#define VMXNET3_REV_2 1 /* Vmxnet3 Rev. 2 */ +#define VMXNET3_REV_1 0 /* Vmxnet3 Rev. 1 */ + +#define VMXNET3_VERSION_GE_4(hw) ((hw)->version >= VMXNET3_REV_4 + 1) +#define VMXNET3_VERSION_GE_3(hw) ((hw)->version >= VMXNET3_REV_3 + 1) +#define VMXNET3_VERSION_GE_2(hw) ((hw)->version >= VMXNET3_REV_2 + 1) + +#define VMXNET3_GET_ADDR_LO(reg) ((uint32_t)(reg)) +#define VMXNET3_GET_ADDR_HI(reg) ((uint32_t)(((uint64_t)(reg)) >> 32)) + +/* Config space read/writes */ + +#define VMXNET3_PCI_REG(reg) rte_read32(reg) + +static inline uint32_t +vmxnet3_read_addr(volatile void *addr) +{ + return VMXNET3_PCI_REG(addr); +} + +#define VMXNET3_PCI_REG_WRITE(reg, value) rte_write32((value), (reg)) + +#define VMXNET3_PCI_BAR0_REG_ADDR(hw, reg) \ + ((volatile uint32_t *)((char *)(hw)->hw_addr0 + (reg))) +#define VMXNET3_READ_BAR0_REG(hw, reg) \ + vmxnet3_read_addr(VMXNET3_PCI_BAR0_REG_ADDR((hw), (reg))) +#define VMXNET3_WRITE_BAR0_REG(hw, reg, value) \ + VMXNET3_PCI_REG_WRITE(VMXNET3_PCI_BAR0_REG_ADDR((hw), (reg)), (value)) + +#define VMXNET3_PCI_BAR1_REG_ADDR(hw, reg) \ + ((volatile uint32_t *)((char *)(hw)->hw_addr1 + (reg))) +#define VMXNET3_READ_BAR1_REG(hw, reg) \ + vmxnet3_read_addr(VMXNET3_PCI_BAR1_REG_ADDR((hw), (reg))) +#define VMXNET3_WRITE_BAR1_REG(hw, reg, value) \ + VMXNET3_PCI_REG_WRITE(VMXNET3_PCI_BAR1_REG_ADDR((hw), (reg)), (value)) + +static inline uint8_t +vmxnet3_get_ring_idx(struct vmxnet3_hw *hw, uint32 rqID) +{ + return (rqID >= hw->num_rx_queues && + rqID < 2 * hw->num_rx_queues) ? 1 : 0; +} + +static inline bool +vmxnet3_rx_data_ring(struct vmxnet3_hw *hw, uint32 rqID) +{ + return (rqID >= 2 * hw->num_rx_queues && + rqID < 3 * hw->num_rx_queues); +} + +/* + * RX/TX function prototypes + */ + +void vmxnet3_dev_clear_queues(struct rte_eth_dev *dev); + +void vmxnet3_dev_rx_queue_release(void *rxq); +void vmxnet3_dev_tx_queue_release(void *txq); + +int vmxnet3_v4_rss_configure(struct rte_eth_dev *dev); + +int vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, + uint16_t nb_rx_desc, unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mb_pool); +int vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, + uint16_t nb_tx_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); + +int vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev); + +int vmxnet3_rss_configure(struct rte_eth_dev *dev); + +uint16_t vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); +uint16_t vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +uint16_t vmxnet3_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + +#endif /* _VMXNET3_ETHDEV_H_ */ diff --git a/src/seastar/dpdk/drivers/net/vmxnet3/vmxnet3_logs.h b/src/seastar/dpdk/drivers/net/vmxnet3/vmxnet3_logs.h new file mode 100644 index 000000000..74154e3a1 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/vmxnet3/vmxnet3_logs.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef _VMXNET3_LOGS_H_ +#define _VMXNET3_LOGS_H_ + +extern int vmxnet3_logtype_init; +#define PMD_INIT_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, vmxnet3_logtype_driver, \ + "%s(): " fmt "\n", __func__, ## args) +#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>") + +#ifdef RTE_LIBRTE_VMXNET3_DEBUG_RX +#define PMD_RX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) +#else +#define PMD_RX_LOG(level, fmt, args...) do { } while(0) +#endif + +#ifdef RTE_LIBRTE_VMXNET3_DEBUG_TX +#define PMD_TX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) +#else +#define PMD_TX_LOG(level, fmt, args...) do { } while(0) +#endif + +#ifdef RTE_LIBRTE_VMXNET3_DEBUG_TX_FREE +#define PMD_TX_FREE_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) +#else +#define PMD_TX_FREE_LOG(level, fmt, args...) do { } while(0) +#endif + +extern int vmxnet3_logtype_driver; +#define PMD_DRV_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, vmxnet3_logtype_driver, \ + "%s(): " fmt "\n", __func__, ## args) + +#endif /* _VMXNET3_LOGS_H_ */ diff --git a/src/seastar/dpdk/drivers/net/vmxnet3/vmxnet3_ring.h b/src/seastar/dpdk/drivers/net/vmxnet3/vmxnet3_ring.h new file mode 100644 index 000000000..50992349d --- /dev/null +++ b/src/seastar/dpdk/drivers/net/vmxnet3/vmxnet3_ring.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef _VMXNET3_RING_H_ +#define _VMXNET3_RING_H_ + +#define VMXNET3_RX_CMDRING_SIZE 2 + +#define VMXNET3_DRIVER_VERSION_NUM 0x01012000 + +/* Default ring size */ +#define VMXNET3_DEF_TX_RING_SIZE 512 +#define VMXNET3_DEF_RX_RING_SIZE 128 + +/* Default rx data ring desc size */ +#define VMXNET3_DEF_RXDATA_DESC_SIZE 256 + +#define VMXNET3_SUCCESS 0 +#define VMXNET3_FAIL -1 + +#define TRUE 1 +#define FALSE 0 + + +typedef struct vmxnet3_buf_info { + uint16_t len; + struct rte_mbuf *m; + uint64_t bufPA; +} vmxnet3_buf_info_t; + +typedef struct vmxnet3_cmd_ring { + vmxnet3_buf_info_t *buf_info; + uint32_t size; + uint32_t next2fill; + uint32_t next2comp; + uint8_t gen; + uint8_t rid; + Vmxnet3_GenericDesc *base; + uint64_t basePA; +} vmxnet3_cmd_ring_t; + +static inline void +vmxnet3_cmd_ring_adv_next2fill(struct vmxnet3_cmd_ring *ring) +{ + ring->next2fill++; + if (unlikely(ring->next2fill == ring->size)) { + ring->next2fill = 0; + ring->gen = (uint8_t)(ring->gen ^ 1); + } +} + +static inline void +vmxnet3_cmd_ring_adv_next2comp(struct vmxnet3_cmd_ring *ring) +{ + VMXNET3_INC_RING_IDX_ONLY(ring->next2comp, ring->size); +} + +static inline uint32_t +vmxnet3_cmd_ring_desc_avail(struct vmxnet3_cmd_ring *ring) +{ + return (ring->next2comp > ring->next2fill ? 0 : ring->size) + + ring->next2comp - ring->next2fill - 1; +} + +static inline bool +vmxnet3_cmd_ring_desc_empty(struct vmxnet3_cmd_ring *ring) +{ + return ring->next2comp == ring->next2fill; +} + +typedef struct vmxnet3_comp_ring { + uint32_t size; + uint32_t next2proc; + uint8_t gen; + uint8_t intr_idx; + Vmxnet3_GenericDesc *base; + uint64_t basePA; +} vmxnet3_comp_ring_t; + +struct vmxnet3_data_ring { + struct Vmxnet3_TxDataDesc *base; + uint32_t size; + uint64_t basePA; +}; + +static inline void +vmxnet3_comp_ring_adv_next2proc(struct vmxnet3_comp_ring *ring) +{ + ring->next2proc++; + if (unlikely(ring->next2proc == ring->size)) { + ring->next2proc = 0; + ring->gen = (uint8_t)(ring->gen ^ 1); + } +} + +struct vmxnet3_txq_stats { + uint64_t drop_total; /* # of pkts dropped by the driver, + * the counters below track droppings due to + * different reasons + */ + uint64_t drop_too_many_segs; + uint64_t drop_tso; + uint64_t tx_ring_full; +}; + +typedef struct vmxnet3_tx_queue { + struct vmxnet3_hw *hw; + struct vmxnet3_cmd_ring cmd_ring; + struct vmxnet3_comp_ring comp_ring; + struct vmxnet3_data_ring data_ring; + uint32_t qid; + struct Vmxnet3_TxQueueDesc *shared; + struct vmxnet3_txq_stats stats; + const struct rte_memzone *mz; + bool stopped; + uint16_t queue_id; /**< Device TX queue index. */ + uint16_t port_id; /**< Device port identifier. */ + uint16_t txdata_desc_size; +} vmxnet3_tx_queue_t; + +struct vmxnet3_rxq_stats { + uint64_t drop_total; + uint64_t drop_err; + uint64_t drop_fcs; + uint64_t rx_buf_alloc_failure; +}; + +struct vmxnet3_rx_data_ring { + uint8_t *base; + uint64_t basePA; + uint32_t size; +}; + +typedef struct vmxnet3_rx_queue { + struct rte_mempool *mp; + struct vmxnet3_hw *hw; + struct vmxnet3_cmd_ring cmd_ring[VMXNET3_RX_CMDRING_SIZE]; + struct vmxnet3_comp_ring comp_ring; + struct vmxnet3_rx_data_ring data_ring; + uint16_t data_desc_size; + uint32_t qid1; + uint32_t qid2; + /* rqID in RCD for buffer from data ring */ + uint32_t data_ring_qid; + Vmxnet3_RxQueueDesc *shared; + struct rte_mbuf *start_seg; + struct rte_mbuf *last_seg; + struct vmxnet3_rxq_stats stats; + const struct rte_memzone *mz; + bool stopped; + uint16_t queue_id; /**< Device RX queue index. */ + uint16_t port_id; /**< Device port identifier. */ +} vmxnet3_rx_queue_t; + +#endif /* _VMXNET3_RING_H_ */ diff --git a/src/seastar/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c b/src/seastar/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c new file mode 100644 index 000000000..4867a64f3 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c @@ -0,0 +1,1387 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2015 Intel Corporation + */ + +#include <sys/queue.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdint.h> +#include <stdarg.h> +#include <unistd.h> +#include <inttypes.h> + +#include <rte_byteorder.h> +#include <rte_common.h> +#include <rte_cycles.h> +#include <rte_log.h> +#include <rte_debug.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_launch.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_lcore.h> +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_mempool.h> +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_ether.h> +#include <rte_ethdev_driver.h> +#include <rte_prefetch.h> +#include <rte_ip.h> +#include <rte_udp.h> +#include <rte_tcp.h> +#include <rte_sctp.h> +#include <rte_string_fns.h> +#include <rte_errno.h> +#include <rte_net.h> + +#include "base/vmxnet3_defs.h" +#include "vmxnet3_ring.h" + +#include "vmxnet3_logs.h" +#include "vmxnet3_ethdev.h" + +#define VMXNET3_TX_OFFLOAD_MASK ( \ + PKT_TX_VLAN_PKT | \ + PKT_TX_IPV6 | \ + PKT_TX_IPV4 | \ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG) + +#define VMXNET3_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK) + +static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2}; + +static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t); +static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *); +#ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED +static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *); +static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *); +#endif + +#ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED +static void +vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq) +{ + uint32_t avail = 0; + + if (rxq == NULL) + return; + + PMD_RX_LOG(DEBUG, + "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.", + rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base); + PMD_RX_LOG(DEBUG, + "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.", + (unsigned long)rxq->cmd_ring[0].basePA, + (unsigned long)rxq->cmd_ring[1].basePA, + (unsigned long)rxq->comp_ring.basePA); + + avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]); + PMD_RX_LOG(DEBUG, + "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u", + (uint32_t)rxq->cmd_ring[0].size, avail, + rxq->comp_ring.next2proc, + rxq->cmd_ring[0].size - avail); + + avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]); + PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u", + (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc, + rxq->cmd_ring[1].size - avail); + +} + +static void +vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq) +{ + uint32_t avail = 0; + + if (txq == NULL) + return; + + PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.", + txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base); + PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.", + (unsigned long)txq->cmd_ring.basePA, + (unsigned long)txq->comp_ring.basePA, + (unsigned long)txq->data_ring.basePA); + + avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring); + PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u", + (uint32_t)txq->cmd_ring.size, avail, + txq->comp_ring.next2proc, txq->cmd_ring.size - avail); +} +#endif + +static void +vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring) +{ + while (ring->next2comp != ring->next2fill) { + /* No need to worry about desc ownership, device is quiesced by now. */ + vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp; + + if (buf_info->m) { + rte_pktmbuf_free(buf_info->m); + buf_info->m = NULL; + buf_info->bufPA = 0; + buf_info->len = 0; + } + vmxnet3_cmd_ring_adv_next2comp(ring); + } +} + +static void +vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring) +{ + uint32_t i; + + for (i = 0; i < ring->size; i++) { + /* No need to worry about desc ownership, device is quiesced by now. */ + vmxnet3_buf_info_t *buf_info = &ring->buf_info[i]; + + if (buf_info->m) { + rte_pktmbuf_free_seg(buf_info->m); + buf_info->m = NULL; + buf_info->bufPA = 0; + buf_info->len = 0; + } + vmxnet3_cmd_ring_adv_next2comp(ring); + } +} + +static void +vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring) +{ + rte_free(ring->buf_info); + ring->buf_info = NULL; +} + +void +vmxnet3_dev_tx_queue_release(void *txq) +{ + vmxnet3_tx_queue_t *tq = txq; + + if (tq != NULL) { + /* Release mbufs */ + vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring); + /* Release the cmd_ring */ + vmxnet3_cmd_ring_release(&tq->cmd_ring); + /* Release the memzone */ + rte_memzone_free(tq->mz); + /* Release the queue */ + rte_free(tq); + } +} + +void +vmxnet3_dev_rx_queue_release(void *rxq) +{ + int i; + vmxnet3_rx_queue_t *rq = rxq; + + if (rq != NULL) { + /* Release mbufs */ + for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) + vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]); + + /* Release both the cmd_rings */ + for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) + vmxnet3_cmd_ring_release(&rq->cmd_ring[i]); + + /* Release the memzone */ + rte_memzone_free(rq->mz); + + /* Release the queue */ + rte_free(rq); + } +} + +static void +vmxnet3_dev_tx_queue_reset(void *txq) +{ + vmxnet3_tx_queue_t *tq = txq; + struct vmxnet3_cmd_ring *ring = &tq->cmd_ring; + struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring; + struct vmxnet3_data_ring *data_ring = &tq->data_ring; + int size; + + if (tq != NULL) { + /* Release the cmd_ring mbufs */ + vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring); + } + + /* Tx vmxnet rings structure initialization*/ + ring->next2fill = 0; + ring->next2comp = 0; + ring->gen = VMXNET3_INIT_GEN; + comp_ring->next2proc = 0; + comp_ring->gen = VMXNET3_INIT_GEN; + + size = sizeof(struct Vmxnet3_TxDesc) * ring->size; + size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size; + size += tq->txdata_desc_size * data_ring->size; + + memset(ring->base, 0, size); +} + +static void +vmxnet3_dev_rx_queue_reset(void *rxq) +{ + int i; + vmxnet3_rx_queue_t *rq = rxq; + struct vmxnet3_hw *hw = rq->hw; + struct vmxnet3_cmd_ring *ring0, *ring1; + struct vmxnet3_comp_ring *comp_ring; + struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring; + int size; + + /* Release both the cmd_rings mbufs */ + for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) + vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]); + + ring0 = &rq->cmd_ring[0]; + ring1 = &rq->cmd_ring[1]; + comp_ring = &rq->comp_ring; + + /* Rx vmxnet rings structure initialization */ + ring0->next2fill = 0; + ring1->next2fill = 0; + ring0->next2comp = 0; + ring1->next2comp = 0; + ring0->gen = VMXNET3_INIT_GEN; + ring1->gen = VMXNET3_INIT_GEN; + comp_ring->next2proc = 0; + comp_ring->gen = VMXNET3_INIT_GEN; + + size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size); + size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size; + if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size) + size += rq->data_desc_size * data_ring->size; + + memset(ring0->base, 0, size); +} + +void +vmxnet3_dev_clear_queues(struct rte_eth_dev *dev) +{ + unsigned i; + + PMD_INIT_FUNC_TRACE(); + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i]; + + if (txq != NULL) { + txq->stopped = TRUE; + vmxnet3_dev_tx_queue_reset(txq); + } + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i]; + + if (rxq != NULL) { + rxq->stopped = TRUE; + vmxnet3_dev_rx_queue_reset(rxq); + } + } +} + +static int +vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq) +{ + int completed = 0; + struct rte_mbuf *mbuf; + + /* Release cmd_ring descriptor and free mbuf */ + RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1); + + mbuf = txq->cmd_ring.buf_info[eop_idx].m; + if (mbuf == NULL) + rte_panic("EOP desc does not point to a valid mbuf"); + rte_pktmbuf_free(mbuf); + + txq->cmd_ring.buf_info[eop_idx].m = NULL; + + while (txq->cmd_ring.next2comp != eop_idx) { + /* no out-of-order completion */ + RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0); + vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring); + completed++; + } + + /* Mark the txd for which tcd was generated as completed */ + vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring); + + return completed + 1; +} + +static void +vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq) +{ + int completed = 0; + vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring; + struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *) + (comp_ring->base + comp_ring->next2proc); + + while (tcd->gen == comp_ring->gen) { + completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq); + + vmxnet3_comp_ring_adv_next2proc(comp_ring); + tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base + + comp_ring->next2proc); + } + + PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed); +} + +uint16_t +vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int32_t ret; + uint32_t i; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i != nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /* Non-TSO packet cannot occupy more than + * VMXNET3_MAX_TXD_PER_PKT TX descriptors. + */ + if ((ol_flags & PKT_TX_TCP_SEG) == 0 && + m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) { + rte_errno = -EINVAL; + return i; + } + + /* check that only supported TX offloads are requested. */ + if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 || + (ol_flags & PKT_TX_L4_MASK) == + PKT_TX_SCTP_CKSUM) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + +uint16_t +vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + uint16_t nb_tx; + vmxnet3_tx_queue_t *txq = tx_queue; + struct vmxnet3_hw *hw = txq->hw; + Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl; + uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred); + + if (unlikely(txq->stopped)) { + PMD_TX_LOG(DEBUG, "Tx queue is stopped."); + return 0; + } + + /* Free up the comp_descriptors aggressively */ + vmxnet3_tq_tx_complete(txq); + + nb_tx = 0; + while (nb_tx < nb_pkts) { + Vmxnet3_GenericDesc *gdesc; + vmxnet3_buf_info_t *tbi; + uint32_t first2fill, avail, dw2; + struct rte_mbuf *txm = tx_pkts[nb_tx]; + struct rte_mbuf *m_seg = txm; + int copy_size = 0; + bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0; + /* # of descriptors needed for a packet. */ + unsigned count = txm->nb_segs; + + avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring); + if (count > avail) { + /* Is command ring full? */ + if (unlikely(avail == 0)) { + PMD_TX_LOG(DEBUG, "No free ring descriptors"); + txq->stats.tx_ring_full++; + txq->stats.drop_total += (nb_pkts - nb_tx); + break; + } + + /* Command ring is not full but cannot handle the + * multi-segmented packet. Let's try the next packet + * in this case. + */ + PMD_TX_LOG(DEBUG, "Running out of ring descriptors " + "(avail %d needed %d)", avail, count); + txq->stats.drop_total++; + if (tso) + txq->stats.drop_tso++; + rte_pktmbuf_free(txm); + nb_tx++; + continue; + } + + /* Drop non-TSO packet that is excessively fragmented */ + if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) { + PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx " + "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT); + txq->stats.drop_too_many_segs++; + txq->stats.drop_total++; + rte_pktmbuf_free(txm); + nb_tx++; + continue; + } + + if (txm->nb_segs == 1 && + rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) { + struct Vmxnet3_TxDataDesc *tdd; + + /* Skip empty packets */ + if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) { + txq->stats.drop_total++; + rte_pktmbuf_free(txm); + nb_tx++; + continue; + } + + tdd = (struct Vmxnet3_TxDataDesc *) + ((uint8 *)txq->data_ring.base + + txq->cmd_ring.next2fill * + txq->txdata_desc_size); + copy_size = rte_pktmbuf_pkt_len(txm); + rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size); + } + + /* use the previous gen bit for the SOP desc */ + dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; + first2fill = txq->cmd_ring.next2fill; + do { + /* Remember the transmit buffer for cleanup */ + tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill; + + /* NB: the following assumes that VMXNET3 maximum + * transmit buffer size (16K) is greater than + * maximum size of mbuf segment size. + */ + gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill; + + /* Skip empty segments */ + if (unlikely(m_seg->data_len == 0)) + continue; + + if (copy_size) { + uint64 offset = + (uint64)txq->cmd_ring.next2fill * + txq->txdata_desc_size; + gdesc->txd.addr = + rte_cpu_to_le_64(txq->data_ring.basePA + + offset); + } else { + gdesc->txd.addr = rte_mbuf_data_iova(m_seg); + } + + gdesc->dword[2] = dw2 | m_seg->data_len; + gdesc->dword[3] = 0; + + /* move to the next2fill descriptor */ + vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring); + + /* use the right gen for non-SOP desc */ + dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT; + } while ((m_seg = m_seg->next) != NULL); + + /* set the last buf_info for the pkt */ + tbi->m = txm; + /* Update the EOP descriptor */ + gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ; + + /* Add VLAN tag if present */ + gdesc = txq->cmd_ring.base + first2fill; + if (txm->ol_flags & PKT_TX_VLAN_PKT) { + gdesc->txd.ti = 1; + gdesc->txd.tci = txm->vlan_tci; + } + + if (tso) { + uint16_t mss = txm->tso_segsz; + + RTE_ASSERT(mss > 0); + + gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len; + gdesc->txd.om = VMXNET3_OM_TSO; + gdesc->txd.msscof = mss; + + deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss; + } else if (txm->ol_flags & PKT_TX_L4_MASK) { + gdesc->txd.om = VMXNET3_OM_CSUM; + gdesc->txd.hlen = txm->l2_len + txm->l3_len; + + switch (txm->ol_flags & PKT_TX_L4_MASK) { + case PKT_TX_TCP_CKSUM: + gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum); + break; + case PKT_TX_UDP_CKSUM: + gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum); + break; + default: + PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx", + txm->ol_flags & PKT_TX_L4_MASK); + abort(); + } + deferred++; + } else { + gdesc->txd.hlen = 0; + gdesc->txd.om = VMXNET3_OM_NONE; + gdesc->txd.msscof = 0; + deferred++; + } + + /* flip the GEN bit on the SOP */ + rte_compiler_barrier(); + gdesc->dword[2] ^= VMXNET3_TXD_GEN; + + txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred); + nb_tx++; + } + + PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold)); + + if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) { + txq_ctrl->txNumDeferred = 0; + /* Notify vSwitch that packets are available. */ + VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN), + txq->cmd_ring.next2fill); + } + + return nb_tx; +} + +static inline void +vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id, + struct rte_mbuf *mbuf) +{ + uint32_t val; + struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id]; + struct Vmxnet3_RxDesc *rxd = + (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill); + vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill]; + + if (ring_id == 0) { + /* Usually: One HEAD type buf per packet + * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ? + * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD; + */ + + /* We use single packet buffer so all heads here */ + val = VMXNET3_RXD_BTYPE_HEAD; + } else { + /* All BODY type buffers for 2nd ring */ + val = VMXNET3_RXD_BTYPE_BODY; + } + + /* + * Load mbuf pointer into buf_info[ring_size] + * buf_info structure is equivalent to cookie for virtio-virtqueue + */ + buf_info->m = mbuf; + buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM); + buf_info->bufPA = rte_mbuf_data_iova_default(mbuf); + + /* Load Rx Descriptor with the buffer's GPA */ + rxd->addr = buf_info->bufPA; + + /* After this point rxd->addr MUST not be NULL */ + rxd->btype = val; + rxd->len = buf_info->len; + /* Flip gen bit at the end to change ownership */ + rxd->gen = ring->gen; + + vmxnet3_cmd_ring_adv_next2fill(ring); +} +/* + * Allocates mbufs and clusters. Post rx descriptors with buffer details + * so that device can receive packets in those buffers. + * Ring layout: + * Among the two rings, 1st ring contains buffers of type 0 and type 1. + * bufs_per_pkt is set such that for non-LRO cases all the buffers required + * by a frame will fit in 1st ring (1st buf of type0 and rest of type1). + * 2nd ring contains buffers of type 1 alone. Second ring mostly be used + * only for LRO. + */ +static int +vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id) +{ + int err = 0; + uint32_t i = 0; + struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id]; + + while (vmxnet3_cmd_ring_desc_avail(ring) > 0) { + struct rte_mbuf *mbuf; + + /* Allocate blank mbuf for the current Rx Descriptor */ + mbuf = rte_mbuf_raw_alloc(rxq->mp); + if (unlikely(mbuf == NULL)) { + PMD_RX_LOG(ERR, "Error allocating mbuf"); + rxq->stats.rx_buf_alloc_failure++; + err = ENOMEM; + break; + } + + vmxnet3_renew_desc(rxq, ring_id, mbuf); + i++; + } + + /* Return error only if no buffers are posted at present */ + if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1)) + return -err; + else + return i; +} + +/* MSS not provided by vmxnet3, guess one with available information */ +static uint16_t +vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd, + struct rte_mbuf *rxm) +{ + uint32_t hlen, slen; + struct ipv4_hdr *ipv4_hdr; + struct ipv6_hdr *ipv6_hdr; + struct tcp_hdr *tcp_hdr; + char *ptr; + + RTE_ASSERT(rcd->tcp); + + ptr = rte_pktmbuf_mtod(rxm, char *); + slen = rte_pktmbuf_data_len(rxm); + hlen = sizeof(struct ether_hdr); + + if (rcd->v4) { + if (unlikely(slen < hlen + sizeof(struct ipv4_hdr))) + return hw->mtu - sizeof(struct ipv4_hdr) + - sizeof(struct tcp_hdr); + + ipv4_hdr = (struct ipv4_hdr *)(ptr + hlen); + hlen += (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) * + IPV4_IHL_MULTIPLIER; + } else if (rcd->v6) { + if (unlikely(slen < hlen + sizeof(struct ipv6_hdr))) + return hw->mtu - sizeof(struct ipv6_hdr) - + sizeof(struct tcp_hdr); + + ipv6_hdr = (struct ipv6_hdr *)(ptr + hlen); + hlen += sizeof(struct ipv6_hdr); + if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) { + int frag; + + rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm, + &hlen, &frag); + } + } + + if (unlikely(slen < hlen + sizeof(struct tcp_hdr))) + return hw->mtu - hlen - sizeof(struct tcp_hdr) + + sizeof(struct ether_hdr); + + tcp_hdr = (struct tcp_hdr *)(ptr + hlen); + hlen += (tcp_hdr->data_off & 0xf0) >> 2; + + if (rxm->udata64 > 1) + return (rte_pktmbuf_pkt_len(rxm) - hlen + + rxm->udata64 - 1) / rxm->udata64; + else + return hw->mtu - hlen + sizeof(struct ether_hdr); +} + +/* Receive side checksum and other offloads */ +static inline void +vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd, + struct rte_mbuf *rxm, const uint8_t sop) +{ + uint64_t ol_flags = rxm->ol_flags; + uint32_t packet_type = rxm->packet_type; + + /* Offloads set in sop */ + if (sop) { + /* Set packet type */ + packet_type |= RTE_PTYPE_L2_ETHER; + + /* Check large packet receive */ + if (VMXNET3_VERSION_GE_2(hw) && + rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) { + const Vmxnet3_RxCompDescExt *rcde = + (const Vmxnet3_RxCompDescExt *)rcd; + + rxm->tso_segsz = rcde->mss; + rxm->udata64 = rcde->segCnt; + ol_flags |= PKT_RX_LRO; + } + } else { /* Offloads set in eop */ + /* Check for RSS */ + if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) { + ol_flags |= PKT_RX_RSS_HASH; + rxm->hash.rss = rcd->rssHash; + } + + /* Check for hardware stripped VLAN tag */ + if (rcd->ts) { + ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED); + rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci); + } + + /* Check packet type, checksum errors, etc. */ + if (rcd->cnc) { + ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN; + } else { + if (rcd->v4) { + packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; + + if (rcd->ipc) + ol_flags |= PKT_RX_IP_CKSUM_GOOD; + else + ol_flags |= PKT_RX_IP_CKSUM_BAD; + + if (rcd->tuc) { + ol_flags |= PKT_RX_L4_CKSUM_GOOD; + if (rcd->tcp) + packet_type |= RTE_PTYPE_L4_TCP; + else + packet_type |= RTE_PTYPE_L4_UDP; + } else { + if (rcd->tcp) { + packet_type |= RTE_PTYPE_L4_TCP; + ol_flags |= PKT_RX_L4_CKSUM_BAD; + } else if (rcd->udp) { + packet_type |= RTE_PTYPE_L4_UDP; + ol_flags |= PKT_RX_L4_CKSUM_BAD; + } + } + } else if (rcd->v6) { + packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; + + if (rcd->tuc) { + ol_flags |= PKT_RX_L4_CKSUM_GOOD; + if (rcd->tcp) + packet_type |= RTE_PTYPE_L4_TCP; + else + packet_type |= RTE_PTYPE_L4_UDP; + } else { + if (rcd->tcp) { + packet_type |= RTE_PTYPE_L4_TCP; + ol_flags |= PKT_RX_L4_CKSUM_BAD; + } else if (rcd->udp) { + packet_type |= RTE_PTYPE_L4_UDP; + ol_flags |= PKT_RX_L4_CKSUM_BAD; + } + } + } else { + packet_type |= RTE_PTYPE_UNKNOWN; + } + + /* Old variants of vmxnet3 do not provide MSS */ + if ((ol_flags & PKT_RX_LRO) && rxm->tso_segsz == 0) + rxm->tso_segsz = vmxnet3_guess_mss(hw, + rcd, rxm); + } + } + + rxm->ol_flags = ol_flags; + rxm->packet_type = packet_type; +} + +/* + * Process the Rx Completion Ring of given vmxnet3_rx_queue + * for nb_pkts burst and return the number of packets received + */ +uint16_t +vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) +{ + uint16_t nb_rx; + uint32_t nb_rxd, idx; + uint8_t ring_idx; + vmxnet3_rx_queue_t *rxq; + Vmxnet3_RxCompDesc *rcd; + vmxnet3_buf_info_t *rbi; + Vmxnet3_RxDesc *rxd; + struct rte_mbuf *rxm = NULL; + struct vmxnet3_hw *hw; + + nb_rx = 0; + ring_idx = 0; + nb_rxd = 0; + idx = 0; + + rxq = rx_queue; + hw = rxq->hw; + + rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd; + + if (unlikely(rxq->stopped)) { + PMD_RX_LOG(DEBUG, "Rx queue is stopped."); + return 0; + } + + while (rcd->gen == rxq->comp_ring.gen) { + struct rte_mbuf *newm; + + if (nb_rx >= nb_pkts) + break; + + newm = rte_mbuf_raw_alloc(rxq->mp); + if (unlikely(newm == NULL)) { + PMD_RX_LOG(ERR, "Error allocating mbuf"); + rxq->stats.rx_buf_alloc_failure++; + break; + } + + idx = rcd->rxdIdx; + ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID); + rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx; + RTE_SET_USED(rxd); /* used only for assert when enabled */ + rbi = rxq->cmd_ring[ring_idx].buf_info + idx; + + PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx); + + RTE_ASSERT(rcd->len <= rxd->len); + RTE_ASSERT(rbi->m); + + /* Get the packet buffer pointer from buf_info */ + rxm = rbi->m; + + /* Clear descriptor associated buf_info to be reused */ + rbi->m = NULL; + rbi->bufPA = 0; + + /* Update the index that we received a packet */ + rxq->cmd_ring[ring_idx].next2comp = idx; + + /* For RCD with EOP set, check if there is frame error */ + if (unlikely(rcd->eop && rcd->err)) { + rxq->stats.drop_total++; + rxq->stats.drop_err++; + + if (!rcd->fcs) { + rxq->stats.drop_fcs++; + PMD_RX_LOG(ERR, "Recv packet dropped due to frame err."); + } + PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d", + (int)(rcd - (struct Vmxnet3_RxCompDesc *) + rxq->comp_ring.base), rcd->rxdIdx); + rte_pktmbuf_free_seg(rxm); + if (rxq->start_seg) { + struct rte_mbuf *start = rxq->start_seg; + + rxq->start_seg = NULL; + rte_pktmbuf_free(start); + } + goto rcd_done; + } + + /* Initialize newly received packet buffer */ + rxm->port = rxq->port_id; + rxm->nb_segs = 1; + rxm->next = NULL; + rxm->pkt_len = (uint16_t)rcd->len; + rxm->data_len = (uint16_t)rcd->len; + rxm->data_off = RTE_PKTMBUF_HEADROOM; + rxm->ol_flags = 0; + rxm->vlan_tci = 0; + rxm->packet_type = 0; + + /* + * If this is the first buffer of the received packet, + * set the pointer to the first mbuf of the packet + * Otherwise, update the total length and the number of segments + * of the current scattered packet, and update the pointer to + * the last mbuf of the current packet. + */ + if (rcd->sop) { + RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD); + + if (unlikely(rcd->len == 0)) { + RTE_ASSERT(rcd->eop); + + PMD_RX_LOG(DEBUG, + "Rx buf was skipped. rxring[%d][%d])", + ring_idx, idx); + rte_pktmbuf_free_seg(rxm); + goto rcd_done; + } + + if (vmxnet3_rx_data_ring(hw, rcd->rqID)) { + uint8_t *rdd = rxq->data_ring.base + + idx * rxq->data_desc_size; + + RTE_ASSERT(VMXNET3_VERSION_GE_3(hw)); + rte_memcpy(rte_pktmbuf_mtod(rxm, char *), + rdd, rcd->len); + } + + rxq->start_seg = rxm; + rxq->last_seg = rxm; + vmxnet3_rx_offload(hw, rcd, rxm, 1); + } else { + struct rte_mbuf *start = rxq->start_seg; + + RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY); + + if (rxm->data_len) { + start->pkt_len += rxm->data_len; + start->nb_segs++; + + rxq->last_seg->next = rxm; + rxq->last_seg = rxm; + } else { + rte_pktmbuf_free_seg(rxm); + } + } + + if (rcd->eop) { + struct rte_mbuf *start = rxq->start_seg; + + vmxnet3_rx_offload(hw, rcd, start, 0); + rx_pkts[nb_rx++] = start; + rxq->start_seg = NULL; + } + +rcd_done: + rxq->cmd_ring[ring_idx].next2comp = idx; + VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, + rxq->cmd_ring[ring_idx].size); + + /* It's time to renew descriptors */ + vmxnet3_renew_desc(rxq, ring_idx, newm); + if (unlikely(rxq->shared->ctrl.updateRxProd)) { + VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN), + rxq->cmd_ring[ring_idx].next2fill); + } + + /* Advance to the next descriptor in comp_ring */ + vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring); + + rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd; + nb_rxd++; + if (nb_rxd > rxq->cmd_ring[0].size) { + PMD_RX_LOG(ERR, "Used up quota of receiving packets," + " relinquish control."); + break; + } + } + + if (unlikely(nb_rxd == 0)) { + uint32_t avail; + for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) { + avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]); + if (unlikely(avail > 0)) { + /* try to alloc new buf and renew descriptors */ + vmxnet3_post_rx_bufs(rxq, ring_idx); + } + } + if (unlikely(rxq->shared->ctrl.updateRxProd)) { + for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) { + VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN), + rxq->cmd_ring[ring_idx].next2fill); + } + } + } + + return nb_rx; +} + +int +vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, + uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf __rte_unused) +{ + struct vmxnet3_hw *hw = dev->data->dev_private; + const struct rte_memzone *mz; + struct vmxnet3_tx_queue *txq; + struct vmxnet3_cmd_ring *ring; + struct vmxnet3_comp_ring *comp_ring; + struct vmxnet3_data_ring *data_ring; + int size; + + PMD_INIT_FUNC_TRACE(); + + txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), + RTE_CACHE_LINE_SIZE); + if (txq == NULL) { + PMD_INIT_LOG(ERR, "Can not allocate tx queue structure"); + return -ENOMEM; + } + + txq->queue_id = queue_idx; + txq->port_id = dev->data->port_id; + txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */ + txq->hw = hw; + txq->qid = queue_idx; + txq->stopped = TRUE; + txq->txdata_desc_size = hw->txdata_desc_size; + + ring = &txq->cmd_ring; + comp_ring = &txq->comp_ring; + data_ring = &txq->data_ring; + + /* Tx vmxnet ring length should be between 512-4096 */ + if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) { + PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u", + VMXNET3_DEF_TX_RING_SIZE); + return -EINVAL; + } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) { + PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u", + VMXNET3_TX_RING_MAX_SIZE); + return -EINVAL; + } else { + ring->size = nb_desc; + ring->size &= ~VMXNET3_RING_SIZE_MASK; + } + comp_ring->size = data_ring->size = ring->size; + + /* Tx vmxnet rings structure initialization*/ + ring->next2fill = 0; + ring->next2comp = 0; + ring->gen = VMXNET3_INIT_GEN; + comp_ring->next2proc = 0; + comp_ring->gen = VMXNET3_INIT_GEN; + + size = sizeof(struct Vmxnet3_TxDesc) * ring->size; + size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size; + size += txq->txdata_desc_size * data_ring->size; + + mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size, + VMXNET3_RING_BA_ALIGN, socket_id); + if (mz == NULL) { + PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone"); + return -ENOMEM; + } + txq->mz = mz; + memset(mz->addr, 0, mz->len); + + /* cmd_ring initialization */ + ring->base = mz->addr; + ring->basePA = mz->iova; + + /* comp_ring initialization */ + comp_ring->base = ring->base + ring->size; + comp_ring->basePA = ring->basePA + + (sizeof(struct Vmxnet3_TxDesc) * ring->size); + + /* data_ring initialization */ + data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size); + data_ring->basePA = comp_ring->basePA + + (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size); + + /* cmd_ring0 buf_info allocation */ + ring->buf_info = rte_zmalloc("tx_ring_buf_info", + ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE); + if (ring->buf_info == NULL) { + PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure"); + return -ENOMEM; + } + + /* Update the data portion with txq */ + dev->data->tx_queues[queue_idx] = txq; + + return 0; +} + +int +vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, + uint16_t nb_desc, + unsigned int socket_id, + __rte_unused const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp) +{ + const struct rte_memzone *mz; + struct vmxnet3_rx_queue *rxq; + struct vmxnet3_hw *hw = dev->data->dev_private; + struct vmxnet3_cmd_ring *ring0, *ring1, *ring; + struct vmxnet3_comp_ring *comp_ring; + struct vmxnet3_rx_data_ring *data_ring; + int size; + uint8_t i; + char mem_name[32]; + + PMD_INIT_FUNC_TRACE(); + + rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), + RTE_CACHE_LINE_SIZE); + if (rxq == NULL) { + PMD_INIT_LOG(ERR, "Can not allocate rx queue structure"); + return -ENOMEM; + } + + rxq->mp = mp; + rxq->queue_id = queue_idx; + rxq->port_id = dev->data->port_id; + rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */ + rxq->hw = hw; + rxq->qid1 = queue_idx; + rxq->qid2 = queue_idx + hw->num_rx_queues; + rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues; + rxq->data_desc_size = hw->rxdata_desc_size; + rxq->stopped = TRUE; + + ring0 = &rxq->cmd_ring[0]; + ring1 = &rxq->cmd_ring[1]; + comp_ring = &rxq->comp_ring; + data_ring = &rxq->data_ring; + + /* Rx vmxnet rings length should be between 256-4096 */ + if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) { + PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256"); + return -EINVAL; + } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) { + PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096"); + return -EINVAL; + } else { + ring0->size = nb_desc; + ring0->size &= ~VMXNET3_RING_SIZE_MASK; + ring1->size = ring0->size; + } + + comp_ring->size = ring0->size + ring1->size; + data_ring->size = ring0->size; + + /* Rx vmxnet rings structure initialization */ + ring0->next2fill = 0; + ring1->next2fill = 0; + ring0->next2comp = 0; + ring1->next2comp = 0; + ring0->gen = VMXNET3_INIT_GEN; + ring1->gen = VMXNET3_INIT_GEN; + comp_ring->next2proc = 0; + comp_ring->gen = VMXNET3_INIT_GEN; + + size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size); + size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size; + if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) + size += rxq->data_desc_size * data_ring->size; + + mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size, + VMXNET3_RING_BA_ALIGN, socket_id); + if (mz == NULL) { + PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone"); + return -ENOMEM; + } + rxq->mz = mz; + memset(mz->addr, 0, mz->len); + + /* cmd_ring0 initialization */ + ring0->base = mz->addr; + ring0->basePA = mz->iova; + + /* cmd_ring1 initialization */ + ring1->base = ring0->base + ring0->size; + ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size; + + /* comp_ring initialization */ + comp_ring->base = ring1->base + ring1->size; + comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) * + ring1->size; + + /* data_ring initialization */ + if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) { + data_ring->base = + (uint8_t *)(comp_ring->base + comp_ring->size); + data_ring->basePA = comp_ring->basePA + + sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size; + } + + /* cmd_ring0-cmd_ring1 buf_info allocation */ + for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) { + + ring = &rxq->cmd_ring[i]; + ring->rid = i; + snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i); + + ring->buf_info = rte_zmalloc(mem_name, + ring->size * sizeof(vmxnet3_buf_info_t), + RTE_CACHE_LINE_SIZE); + if (ring->buf_info == NULL) { + PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure"); + return -ENOMEM; + } + } + + /* Update the data portion with rxq */ + dev->data->rx_queues[queue_idx] = rxq; + + return 0; +} + +/* + * Initializes Receive Unit + * Load mbufs in rx queue in advance + */ +int +vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev) +{ + struct vmxnet3_hw *hw = dev->data->dev_private; + + int i, ret; + uint8_t j; + + PMD_INIT_FUNC_TRACE(); + + for (i = 0; i < hw->num_rx_queues; i++) { + vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i]; + + for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) { + /* Passing 0 as alloc_num will allocate full ring */ + ret = vmxnet3_post_rx_bufs(rxq, j); + if (ret <= 0) { + PMD_INIT_LOG(ERR, + "ERROR: Posting Rxq: %d buffers ring: %d", + i, j); + return -ret; + } + /* + * Updating device with the index:next2fill to fill the + * mbufs for coming packets. + */ + if (unlikely(rxq->shared->ctrl.updateRxProd)) { + VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN), + rxq->cmd_ring[j].next2fill); + } + } + rxq->stopped = FALSE; + rxq->start_seg = NULL; + } + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i]; + + txq->stopped = FALSE; + } + + return 0; +} + +static uint8_t rss_intel_key[40] = { + 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, + 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0, + 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4, + 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, + 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA, +}; + +/* + * Additional RSS configurations based on vmxnet v4+ APIs + */ +int +vmxnet3_v4_rss_configure(struct rte_eth_dev *dev) +{ + struct vmxnet3_hw *hw = dev->data->dev_private; + Vmxnet3_DriverShared *shared = hw->shared; + Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo; + struct rte_eth_rss_conf *port_rss_conf; + uint64_t rss_hf; + uint32_t ret; + + PMD_INIT_FUNC_TRACE(); + + cmdInfo->setRSSFields = 0; + port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf; + rss_hf = port_rss_conf->rss_hf & + (VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL); + + if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) + cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP4; + if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP) + cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP6; + if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) + cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP4; + if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP) + cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP6; + + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, + VMXNET3_CMD_SET_RSS_FIELDS); + ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD); + + if (ret != VMXNET3_SUCCESS) { + PMD_DRV_LOG(ERR, "Set RSS fields (v4) failed: %d", ret); + } + + return ret; +} + +/* + * Configure RSS feature + */ +int +vmxnet3_rss_configure(struct rte_eth_dev *dev) +{ + struct vmxnet3_hw *hw = dev->data->dev_private; + struct VMXNET3_RSSConf *dev_rss_conf; + struct rte_eth_rss_conf *port_rss_conf; + uint64_t rss_hf; + uint8_t i, j; + + PMD_INIT_FUNC_TRACE(); + + dev_rss_conf = hw->rss_conf; + port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf; + + /* loading hashFunc */ + dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ; + /* loading hashKeySize */ + dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE; + /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/ + dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4); + + if (port_rss_conf->rss_key == NULL) { + /* Default hash key */ + port_rss_conf->rss_key = rss_intel_key; + } + + /* loading hashKey */ + memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, + dev_rss_conf->hashKeySize); + + /* loading indTable */ + for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) { + if (j == dev->data->nb_rx_queues) + j = 0; + dev_rss_conf->indTable[i] = j; + } + + /* loading hashType */ + dev_rss_conf->hashType = 0; + rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL; + if (rss_hf & ETH_RSS_IPV4) + dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4; + if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) + dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4; + if (rss_hf & ETH_RSS_IPV6) + dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6; + if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP) + dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6; + + return VMXNET3_SUCCESS; +} |