diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/seastar/dpdk/drivers/net/avp | |
parent | Initial commit. (diff) | |
download | ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/seastar/dpdk/drivers/net/avp')
-rw-r--r-- | src/seastar/dpdk/drivers/net/avp/Makefile | 57 | ||||
-rw-r--r-- | src/seastar/dpdk/drivers/net/avp/avp_ethdev.c | 2312 | ||||
-rw-r--r-- | src/seastar/dpdk/drivers/net/avp/avp_logs.h | 59 | ||||
-rw-r--r-- | src/seastar/dpdk/drivers/net/avp/rte_avp_common.h | 432 | ||||
-rw-r--r-- | src/seastar/dpdk/drivers/net/avp/rte_avp_fifo.h | 169 | ||||
-rw-r--r-- | src/seastar/dpdk/drivers/net/avp/rte_pmd_avp_version.map | 4 |
6 files changed, 3033 insertions, 0 deletions
diff --git a/src/seastar/dpdk/drivers/net/avp/Makefile b/src/seastar/dpdk/drivers/net/avp/Makefile new file mode 100644 index 00000000..cd465aac --- /dev/null +++ b/src/seastar/dpdk/drivers/net/avp/Makefile @@ -0,0 +1,57 @@ +# BSD LICENSE +# +# Copyright(c) 2013-2017, Wind River Systems, Inc. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Wind River Systems nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_pmd_avp.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) + +EXPORT_MAP := rte_pmd_avp_version.map + +LIBABIVER := 1 + +# install public header files to enable compilation of the hypervisor level +# dpdk application +SYMLINK-$(CONFIG_RTE_LIBRTE_AVP_PMD)-include += rte_avp_common.h +SYMLINK-$(CONFIG_RTE_LIBRTE_AVP_PMD)-include += rte_avp_fifo.h + +# +# all source files are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_AVP_PMD) += avp_ethdev.c + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/seastar/dpdk/drivers/net/avp/avp_ethdev.c b/src/seastar/dpdk/drivers/net/avp/avp_ethdev.c new file mode 100644 index 00000000..fe6849f5 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/avp/avp_ethdev.c @@ -0,0 +1,2312 @@ +/* + * BSD LICENSE + * + * Copyright (c) 2013-2017, Wind River Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1) Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2) Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3) Neither the name of Wind River Systems nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <unistd.h> + +#include <rte_ethdev.h> +#include <rte_ethdev_pci.h> +#include <rte_memcpy.h> +#include <rte_string_fns.h> +#include <rte_memzone.h> +#include <rte_malloc.h> +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_pci.h> +#include <rte_ether.h> +#include <rte_common.h> +#include <rte_cycles.h> +#include <rte_spinlock.h> +#include <rte_byteorder.h> +#include <rte_dev.h> +#include <rte_memory.h> +#include <rte_eal.h> +#include <rte_io.h> + +#include "rte_avp_common.h" +#include "rte_avp_fifo.h" + +#include "avp_logs.h" + + +static int avp_dev_create(struct rte_pci_device *pci_dev, + struct rte_eth_dev *eth_dev); + +static int avp_dev_configure(struct rte_eth_dev *dev); +static int avp_dev_start(struct rte_eth_dev *dev); +static void avp_dev_stop(struct rte_eth_dev *dev); +static void avp_dev_close(struct rte_eth_dev *dev); +static void avp_dev_info_get(struct rte_eth_dev *dev, + struct rte_eth_dev_info *dev_info); +static void avp_vlan_offload_set(struct rte_eth_dev *dev, int mask); +static int avp_dev_link_update(struct rte_eth_dev *dev, + __rte_unused int wait_to_complete); +static void avp_dev_promiscuous_enable(struct rte_eth_dev *dev); +static void avp_dev_promiscuous_disable(struct rte_eth_dev *dev); + +static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t rx_queue_id, + uint16_t nb_rx_desc, + unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *pool); + +static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev, + uint16_t tx_queue_id, + uint16_t nb_tx_desc, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); + +static uint16_t avp_recv_scattered_pkts(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); + +static uint16_t avp_recv_pkts(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); + +static uint16_t avp_xmit_scattered_pkts(void *tx_queue, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + +static uint16_t avp_xmit_pkts(void *tx_queue, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + +static void avp_dev_rx_queue_release(void *rxq); +static void avp_dev_tx_queue_release(void *txq); + +static void avp_dev_stats_get(struct rte_eth_dev *dev, + struct rte_eth_stats *stats); +static void avp_dev_stats_reset(struct rte_eth_dev *dev); + + +#define AVP_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device) + + +#define AVP_MAX_RX_BURST 64 +#define AVP_MAX_TX_BURST 64 +#define AVP_MAX_MAC_ADDRS 1 +#define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN + + +/* + * Defines the number of microseconds to wait before checking the response + * queue for completion. + */ +#define AVP_REQUEST_DELAY_USECS (5000) + +/* + * Defines the number times to check the response queue for completion before + * declaring a timeout. + */ +#define AVP_MAX_REQUEST_RETRY (100) + +/* Defines the current PCI driver version number */ +#define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION + +/* + * The set of PCI devices this driver supports + */ +static const struct rte_pci_id pci_id_avp_map[] = { + { .vendor_id = RTE_AVP_PCI_VENDOR_ID, + .device_id = RTE_AVP_PCI_DEVICE_ID, + .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID, + .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID, + .class_id = RTE_CLASS_ANY_ID, + }, + + { .vendor_id = 0, /* sentinel */ + }, +}; + +/* + * dev_ops for avp, bare necessities for basic operation + */ +static const struct eth_dev_ops avp_eth_dev_ops = { + .dev_configure = avp_dev_configure, + .dev_start = avp_dev_start, + .dev_stop = avp_dev_stop, + .dev_close = avp_dev_close, + .dev_infos_get = avp_dev_info_get, + .vlan_offload_set = avp_vlan_offload_set, + .stats_get = avp_dev_stats_get, + .stats_reset = avp_dev_stats_reset, + .link_update = avp_dev_link_update, + .promiscuous_enable = avp_dev_promiscuous_enable, + .promiscuous_disable = avp_dev_promiscuous_disable, + .rx_queue_setup = avp_dev_rx_queue_setup, + .rx_queue_release = avp_dev_rx_queue_release, + .tx_queue_setup = avp_dev_tx_queue_setup, + .tx_queue_release = avp_dev_tx_queue_release, +}; + +/**@{ AVP device flags */ +#define AVP_F_PROMISC (1 << 1) +#define AVP_F_CONFIGURED (1 << 2) +#define AVP_F_LINKUP (1 << 3) +#define AVP_F_DETACHED (1 << 4) +/**@} */ + +/* Ethernet device validation marker */ +#define AVP_ETHDEV_MAGIC 0x92972862 + +/* + * Defines the AVP device attributes which are attached to an RTE ethernet + * device + */ +struct avp_dev { + uint32_t magic; /**< Memory validation marker */ + uint64_t device_id; /**< Unique system identifier */ + struct ether_addr ethaddr; /**< Host specified MAC address */ + struct rte_eth_dev_data *dev_data; + /**< Back pointer to ethernet device data */ + volatile uint32_t flags; /**< Device operational flags */ + uint8_t port_id; /**< Ethernet port identifier */ + struct rte_mempool *pool; /**< pkt mbuf mempool */ + unsigned int guest_mbuf_size; /**< local pool mbuf size */ + unsigned int host_mbuf_size; /**< host mbuf size */ + unsigned int max_rx_pkt_len; /**< maximum receive unit */ + uint32_t host_features; /**< Supported feature bitmap */ + uint32_t features; /**< Enabled feature bitmap */ + unsigned int num_tx_queues; /**< Negotiated number of transmit queues */ + unsigned int max_tx_queues; /**< Maximum number of transmit queues */ + unsigned int num_rx_queues; /**< Negotiated number of receive queues */ + unsigned int max_rx_queues; /**< Maximum number of receive queues */ + + struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */ + struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */ + struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES]; + /**< Allocated mbufs queue */ + struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES]; + /**< To be freed mbufs queue */ + + /* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */ + rte_spinlock_t lock; + + /* For request & response */ + struct rte_avp_fifo *req_q; /**< Request queue */ + struct rte_avp_fifo *resp_q; /**< Response queue */ + void *host_sync_addr; /**< (host) Req/Resp Mem address */ + void *sync_addr; /**< Req/Resp Mem address */ + void *host_mbuf_addr; /**< (host) MBUF pool start address */ + void *mbuf_addr; /**< MBUF pool start address */ +} __rte_cache_aligned; + +/* RTE ethernet private data */ +struct avp_adapter { + struct avp_dev avp; +} __rte_cache_aligned; + + +/* 32-bit MMIO register write */ +#define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr)) + +/* 32-bit MMIO register read */ +#define AVP_READ32(_addr) rte_read32_relaxed((_addr)) + +/* Macro to cast the ethernet device private data to a AVP object */ +#define AVP_DEV_PRIVATE_TO_HW(adapter) \ + (&((struct avp_adapter *)adapter)->avp) + +/* + * Defines the structure of a AVP device queue for the purpose of handling the + * receive and transmit burst callback functions + */ +struct avp_queue { + struct rte_eth_dev_data *dev_data; + /**< Backpointer to ethernet device data */ + struct avp_dev *avp; /**< Backpointer to AVP device */ + uint16_t queue_id; + /**< Queue identifier used for indexing current queue */ + uint16_t queue_base; + /**< Base queue identifier for queue servicing */ + uint16_t queue_limit; + /**< Maximum queue identifier for queue servicing */ + + uint64_t packets; + uint64_t bytes; + uint64_t errors; +}; + +/* send a request and wait for a response + * + * @warning must be called while holding the avp->lock spinlock. + */ +static int +avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request) +{ + unsigned int retry = AVP_MAX_REQUEST_RETRY; + void *resp_addr = NULL; + unsigned int count; + int ret; + + PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id); + + request->result = -ENOTSUP; + + /* Discard any stale responses before starting a new request */ + while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1)) + PMD_DRV_LOG(DEBUG, "Discarding stale response\n"); + + rte_memcpy(avp->sync_addr, request, sizeof(*request)); + count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1); + if (count < 1) { + PMD_DRV_LOG(ERR, "Cannot send request %u to host\n", + request->req_id); + ret = -EBUSY; + goto done; + } + + while (retry--) { + /* wait for a response */ + usleep(AVP_REQUEST_DELAY_USECS); + + count = avp_fifo_count(avp->resp_q); + if (count >= 1) { + /* response received */ + break; + } + + if ((count < 1) && (retry == 0)) { + PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n", + request->req_id); + ret = -ETIME; + goto done; + } + } + + /* retrieve the response */ + count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1); + if ((count != 1) || (resp_addr != avp->host_sync_addr)) { + PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n", + count, resp_addr, avp->host_sync_addr); + ret = -ENODATA; + goto done; + } + + /* copy to user buffer */ + rte_memcpy(request, avp->sync_addr, sizeof(*request)); + ret = 0; + + PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n", + request->result, request->req_id); + +done: + return ret; +} + +static int +avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_avp_request request; + int ret; + + /* setup a link state change request */ + memset(&request, 0, sizeof(request)); + request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF; + request.if_up = state; + + ret = avp_dev_process_request(avp, &request); + + return ret == 0 ? request.result : ret; +} + +static int +avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev, + struct rte_avp_device_config *config) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_avp_request request; + int ret; + + /* setup a configure request */ + memset(&request, 0, sizeof(request)); + request.req_id = RTE_AVP_REQ_CFG_DEVICE; + memcpy(&request.config, config, sizeof(request.config)); + + ret = avp_dev_process_request(avp, &request); + + return ret == 0 ? request.result : ret; +} + +static int +avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_avp_request request; + int ret; + + /* setup a shutdown request */ + memset(&request, 0, sizeof(request)); + request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE; + + ret = avp_dev_process_request(avp, &request); + + return ret == 0 ? request.result : ret; +} + +/* translate from host mbuf virtual address to guest virtual address */ +static inline void * +avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address) +{ + return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address, + (uintptr_t)avp->host_mbuf_addr), + (uintptr_t)avp->mbuf_addr); +} + +/* translate from host physical address to guest virtual address */ +static void * +avp_dev_translate_address(struct rte_eth_dev *eth_dev, + phys_addr_t host_phys_addr) +{ + struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); + struct rte_mem_resource *resource; + struct rte_avp_memmap_info *info; + struct rte_avp_memmap *map; + off_t offset; + void *addr; + unsigned int i; + + addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr; + resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR]; + info = (struct rte_avp_memmap_info *)resource->addr; + + offset = 0; + for (i = 0; i < info->nb_maps; i++) { + /* search all segments looking for a matching address */ + map = &info->maps[i]; + + if ((host_phys_addr >= map->phys_addr) && + (host_phys_addr < (map->phys_addr + map->length))) { + /* address is within this segment */ + offset += (host_phys_addr - map->phys_addr); + addr = RTE_PTR_ADD(addr, offset); + + PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n", + host_phys_addr, addr); + + return addr; + } + offset += map->length; + } + + return NULL; +} + +/* verify that the incoming device version is compatible with our version */ +static int +avp_dev_version_check(uint32_t version) +{ + uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION); + uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version); + + if (device <= driver) { + /* the host driver version is less than or equal to ours */ + return 0; + } + + return 1; +} + +/* verify that memory regions have expected version and validation markers */ +static int +avp_dev_check_regions(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); + struct rte_avp_memmap_info *memmap; + struct rte_avp_device_info *info; + struct rte_mem_resource *resource; + unsigned int i; + + /* Dump resource info for debug */ + for (i = 0; i < PCI_MAX_RESOURCE; i++) { + resource = &pci_dev->mem_resource[i]; + if ((resource->phys_addr == 0) || (resource->len == 0)) + continue; + + PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n", + i, resource->phys_addr, + resource->len, resource->addr); + + switch (i) { + case RTE_AVP_PCI_MEMMAP_BAR: + memmap = (struct rte_avp_memmap_info *)resource->addr; + if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) || + (memmap->version != RTE_AVP_MEMMAP_VERSION)) { + PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n", + memmap->magic, memmap->version); + return -EINVAL; + } + break; + + case RTE_AVP_PCI_DEVICE_BAR: + info = (struct rte_avp_device_info *)resource->addr; + if ((info->magic != RTE_AVP_DEVICE_MAGIC) || + avp_dev_version_check(info->version)) { + PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n", + info->magic, info->version, + AVP_DPDK_DRIVER_VERSION); + return -EINVAL; + } + break; + + case RTE_AVP_PCI_MEMORY_BAR: + case RTE_AVP_PCI_MMIO_BAR: + if (resource->addr == NULL) { + PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n", + i); + return -EINVAL; + } + break; + + case RTE_AVP_PCI_MSIX_BAR: + default: + /* no validation required */ + break; + } + } + + return 0; +} + +static int +avp_dev_detach(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + int ret; + + PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n", + eth_dev->data->port_id, avp->device_id); + + rte_spinlock_lock(&avp->lock); + + if (avp->flags & AVP_F_DETACHED) { + PMD_DRV_LOG(NOTICE, "port %u already detached\n", + eth_dev->data->port_id); + ret = 0; + goto unlock; + } + + /* shutdown the device first so the host stops sending us packets. */ + ret = avp_dev_ctrl_shutdown(eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n", + ret); + avp->flags &= ~AVP_F_DETACHED; + goto unlock; + } + + avp->flags |= AVP_F_DETACHED; + rte_wmb(); + + /* wait for queues to acknowledge the presence of the detach flag */ + rte_delay_ms(1); + + ret = 0; + +unlock: + rte_spinlock_unlock(&avp->lock); + return ret; +} + +static void +_avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id) +{ + struct avp_dev *avp = + AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct avp_queue *rxq; + uint16_t queue_count; + uint16_t remainder; + + rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id]; + + /* + * Must map all AVP fifos as evenly as possible between the configured + * device queues. Each device queue will service a subset of the AVP + * fifos. If there is an odd number of device queues the first set of + * device queues will get the extra AVP fifos. + */ + queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues; + remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues; + if (rx_queue_id < remainder) { + /* these queues must service one extra FIFO */ + rxq->queue_base = rx_queue_id * (queue_count + 1); + rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1; + } else { + /* these queues service the regular number of FIFO */ + rxq->queue_base = ((remainder * (queue_count + 1)) + + ((rx_queue_id - remainder) * queue_count)); + rxq->queue_limit = rxq->queue_base + queue_count - 1; + } + + PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n", + rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit); + + rxq->queue_id = rxq->queue_base; +} + +static void +_avp_set_queue_counts(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_avp_device_info *host_info; + void *addr; + + addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr; + host_info = (struct rte_avp_device_info *)addr; + + /* + * the transmit direction is not negotiated beyond respecting the max + * number of queues because the host can handle arbitrary guest tx + * queues (host rx queues). + */ + avp->num_tx_queues = eth_dev->data->nb_tx_queues; + + /* + * the receive direction is more restrictive. The host requires a + * minimum number of guest rx queues (host tx queues) therefore + * negotiate a value that is at least as large as the host minimum + * requirement. If the host and guest values are not identical then a + * mapping will be established in the receive_queue_setup function. + */ + avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues, + eth_dev->data->nb_rx_queues); + + PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n", + avp->num_tx_queues, avp->num_rx_queues); +} + +static int +avp_dev_attach(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_avp_device_config config; + unsigned int i; + int ret; + + PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n", + eth_dev->data->port_id, avp->device_id); + + rte_spinlock_lock(&avp->lock); + + if (!(avp->flags & AVP_F_DETACHED)) { + PMD_DRV_LOG(NOTICE, "port %u already attached\n", + eth_dev->data->port_id); + ret = 0; + goto unlock; + } + + /* + * make sure that the detached flag is set prior to reconfiguring the + * queues. + */ + avp->flags |= AVP_F_DETACHED; + rte_wmb(); + + /* + * re-run the device create utility which will parse the new host info + * and setup the AVP device queue pointers. + */ + ret = avp_dev_create(AVP_DEV_TO_PCI(eth_dev), eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n", + ret); + goto unlock; + } + + if (avp->flags & AVP_F_CONFIGURED) { + /* + * Update the receive queue mapping to handle cases where the + * source and destination hosts have different queue + * requirements. As long as the DETACHED flag is asserted the + * queue table should not be referenced so it should be safe to + * update it. + */ + _avp_set_queue_counts(eth_dev); + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) + _avp_set_rx_queue_mappings(eth_dev, i); + + /* + * Update the host with our config details so that it knows the + * device is active. + */ + memset(&config, 0, sizeof(config)); + config.device_id = avp->device_id; + config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK; + config.driver_version = AVP_DPDK_DRIVER_VERSION; + config.features = avp->features; + config.num_tx_queues = avp->num_tx_queues; + config.num_rx_queues = avp->num_rx_queues; + config.if_up = !!(avp->flags & AVP_F_LINKUP); + + ret = avp_dev_ctrl_set_config(eth_dev, &config); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n", + ret); + goto unlock; + } + } + + rte_wmb(); + avp->flags &= ~AVP_F_DETACHED; + + ret = 0; + +unlock: + rte_spinlock_unlock(&avp->lock); + return ret; +} + +static void +avp_dev_interrupt_handler(void *data) +{ + struct rte_eth_dev *eth_dev = data; + struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); + void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; + uint32_t status, value; + int ret; + + if (registers == NULL) + rte_panic("no mapped MMIO register space\n"); + + /* read the interrupt status register + * note: this register clears on read so all raised interrupts must be + * handled or remembered for later processing + */ + status = AVP_READ32( + RTE_PTR_ADD(registers, + RTE_AVP_INTERRUPT_STATUS_OFFSET)); + + if (status & RTE_AVP_MIGRATION_INTERRUPT_MASK) { + /* handle interrupt based on current status */ + value = AVP_READ32( + RTE_PTR_ADD(registers, + RTE_AVP_MIGRATION_STATUS_OFFSET)); + switch (value) { + case RTE_AVP_MIGRATION_DETACHED: + ret = avp_dev_detach(eth_dev); + break; + case RTE_AVP_MIGRATION_ATTACHED: + ret = avp_dev_attach(eth_dev); + break; + default: + PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n", + value); + ret = -EINVAL; + } + + /* acknowledge the request by writing out our current status */ + value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR); + AVP_WRITE32(value, + RTE_PTR_ADD(registers, + RTE_AVP_MIGRATION_ACK_OFFSET)); + + PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n"); + } + + if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK) + PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n", + status); + + /* re-enable UIO interrupt handling */ + ret = rte_intr_enable(&pci_dev->intr_handle); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n", + ret); + /* continue */ + } +} + +static int +avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); + void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; + int ret; + + if (registers == NULL) + return -EINVAL; + + /* enable UIO interrupt handling */ + ret = rte_intr_enable(&pci_dev->intr_handle); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n", + ret); + return ret; + } + + /* inform the device that all interrupts are enabled */ + AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK, + RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET)); + + return 0; +} + +static int +avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); + void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; + int ret; + + if (registers == NULL) + return 0; + + /* inform the device that all interrupts are disabled */ + AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK, + RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET)); + + /* enable UIO interrupt handling */ + ret = rte_intr_disable(&pci_dev->intr_handle); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n", + ret); + return ret; + } + + return 0; +} + +static int +avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); + int ret; + + /* register a callback handler with UIO for interrupt notifications */ + ret = rte_intr_callback_register(&pci_dev->intr_handle, + avp_dev_interrupt_handler, + (void *)eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n", + ret); + return ret; + } + + /* enable interrupt processing */ + return avp_dev_enable_interrupts(eth_dev); +} + +static int +avp_dev_migration_pending(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); + void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; + uint32_t value; + + if (registers == NULL) + return 0; + + value = AVP_READ32(RTE_PTR_ADD(registers, + RTE_AVP_MIGRATION_STATUS_OFFSET)); + if (value == RTE_AVP_MIGRATION_DETACHED) { + /* migration is in progress; ack it if we have not already */ + AVP_WRITE32(value, + RTE_PTR_ADD(registers, + RTE_AVP_MIGRATION_ACK_OFFSET)); + return 1; + } + return 0; +} + +/* + * create a AVP device using the supplied device info by first translating it + * to guest address space(s). + */ +static int +avp_dev_create(struct rte_pci_device *pci_dev, + struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_avp_device_info *host_info; + struct rte_mem_resource *resource; + unsigned int i; + + resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR]; + if (resource->addr == NULL) { + PMD_DRV_LOG(ERR, "BAR%u is not mapped\n", + RTE_AVP_PCI_DEVICE_BAR); + return -EFAULT; + } + host_info = (struct rte_avp_device_info *)resource->addr; + + if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) || + avp_dev_version_check(host_info->version)) { + PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n", + host_info->magic, host_info->version, + AVP_DPDK_DRIVER_VERSION); + return -EINVAL; + } + + PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n", + RTE_AVP_GET_RELEASE_VERSION(host_info->version), + RTE_AVP_GET_MAJOR_VERSION(host_info->version), + RTE_AVP_GET_MINOR_VERSION(host_info->version)); + + PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n", + host_info->min_tx_queues, host_info->max_tx_queues); + PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n", + host_info->min_rx_queues, host_info->max_rx_queues); + PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n", + host_info->features); + + if (avp->magic != AVP_ETHDEV_MAGIC) { + /* + * First time initialization (i.e., not during a VM + * migration) + */ + memset(avp, 0, sizeof(*avp)); + avp->magic = AVP_ETHDEV_MAGIC; + avp->dev_data = eth_dev->data; + avp->port_id = eth_dev->data->port_id; + avp->host_mbuf_size = host_info->mbuf_size; + avp->host_features = host_info->features; + rte_spinlock_init(&avp->lock); + memcpy(&avp->ethaddr.addr_bytes[0], + host_info->ethaddr, ETHER_ADDR_LEN); + /* adjust max values to not exceed our max */ + avp->max_tx_queues = + RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES); + avp->max_rx_queues = + RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES); + } else { + /* Re-attaching during migration */ + + /* TODO... requires validation of host values */ + if ((host_info->features & avp->features) != avp->features) { + PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n", + avp->features, host_info->features); + /* this should not be possible; continue for now */ + } + } + + /* the device id is allowed to change over migrations */ + avp->device_id = host_info->device_id; + + /* translate incoming host addresses to guest address space */ + PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n", + host_info->tx_phys); + PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n", + host_info->alloc_phys); + for (i = 0; i < avp->max_tx_queues; i++) { + avp->tx_q[i] = avp_dev_translate_address(eth_dev, + host_info->tx_phys + (i * host_info->tx_size)); + + avp->alloc_q[i] = avp_dev_translate_address(eth_dev, + host_info->alloc_phys + (i * host_info->alloc_size)); + } + + PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n", + host_info->rx_phys); + PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n", + host_info->free_phys); + for (i = 0; i < avp->max_rx_queues; i++) { + avp->rx_q[i] = avp_dev_translate_address(eth_dev, + host_info->rx_phys + (i * host_info->rx_size)); + avp->free_q[i] = avp_dev_translate_address(eth_dev, + host_info->free_phys + (i * host_info->free_size)); + } + + PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n", + host_info->req_phys); + PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n", + host_info->resp_phys); + PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n", + host_info->sync_phys); + PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n", + host_info->mbuf_phys); + avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys); + avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys); + avp->sync_addr = + avp_dev_translate_address(eth_dev, host_info->sync_phys); + avp->mbuf_addr = + avp_dev_translate_address(eth_dev, host_info->mbuf_phys); + + /* + * store the host mbuf virtual address so that we can calculate + * relative offsets for each mbuf as they are processed + */ + avp->host_mbuf_addr = host_info->mbuf_va; + avp->host_sync_addr = host_info->sync_va; + + /* + * store the maximum packet length that is supported by the host. + */ + avp->max_rx_pkt_len = host_info->max_rx_pkt_len; + PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n", + host_info->max_rx_pkt_len); + + return 0; +} + +/* + * This function is based on probe() function in avp_pci.c + * It returns 0 on success. + */ +static int +eth_avp_dev_init(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = + AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_pci_device *pci_dev; + int ret; + + pci_dev = AVP_DEV_TO_PCI(eth_dev); + eth_dev->dev_ops = &avp_eth_dev_ops; + eth_dev->rx_pkt_burst = &avp_recv_pkts; + eth_dev->tx_pkt_burst = &avp_xmit_pkts; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { + /* + * no setup required on secondary processes. All data is saved + * in dev_private by the primary process. All resource should + * be mapped to the same virtual address so all pointers should + * be valid. + */ + if (eth_dev->data->scattered_rx) { + PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n"); + eth_dev->rx_pkt_burst = avp_recv_scattered_pkts; + eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts; + } + return 0; + } + + rte_eth_copy_pci_info(eth_dev, pci_dev); + + eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE; + + /* Check current migration status */ + if (avp_dev_migration_pending(eth_dev)) { + PMD_DRV_LOG(ERR, "VM live migration operation in progress\n"); + return -EBUSY; + } + + /* Check BAR resources */ + ret = avp_dev_check_regions(eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n", + ret); + return ret; + } + + /* Enable interrupts */ + ret = avp_dev_setup_interrupts(eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret); + return ret; + } + + /* Handle each subtype */ + ret = avp_dev_create(pci_dev, eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret); + return ret; + } + + /* Allocate memory for storing MAC addresses */ + eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0); + if (eth_dev->data->mac_addrs == NULL) { + PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n", + ETHER_ADDR_LEN); + return -ENOMEM; + } + + /* Get a mac from device config */ + ether_addr_copy(&avp->ethaddr, ð_dev->data->mac_addrs[0]); + + return 0; +} + +static int +eth_avp_dev_uninit(struct rte_eth_dev *eth_dev) +{ + int ret; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return -EPERM; + + if (eth_dev->data == NULL) + return 0; + + ret = avp_dev_disable_interrupts(eth_dev); + if (ret != 0) { + PMD_DRV_LOG(ERR, "Failed to disable interrupts, ret=%d\n", ret); + return ret; + } + + if (eth_dev->data->mac_addrs != NULL) { + rte_free(eth_dev->data->mac_addrs); + eth_dev->data->mac_addrs = NULL; + } + + return 0; +} + +static int +eth_avp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + struct rte_pci_device *pci_dev) +{ + struct rte_eth_dev *eth_dev; + int ret; + + eth_dev = rte_eth_dev_pci_allocate(pci_dev, + sizeof(struct avp_adapter)); + if (eth_dev == NULL) + return -ENOMEM; + + ret = eth_avp_dev_init(eth_dev); + if (ret) + rte_eth_dev_pci_release(eth_dev); + + return ret; +} + +static int +eth_avp_pci_remove(struct rte_pci_device *pci_dev) +{ + return rte_eth_dev_pci_generic_remove(pci_dev, + eth_avp_dev_uninit); +} + +static struct rte_pci_driver rte_avp_pmd = { + .id_table = pci_id_avp_map, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = eth_avp_pci_probe, + .remove = eth_avp_pci_remove, +}; + +static int +avp_dev_enable_scattered(struct rte_eth_dev *eth_dev, + struct avp_dev *avp) +{ + unsigned int max_rx_pkt_len; + + max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len; + + if ((max_rx_pkt_len > avp->guest_mbuf_size) || + (max_rx_pkt_len > avp->host_mbuf_size)) { + /* + * If the guest MTU is greater than either the host or guest + * buffers then chained mbufs have to be enabled in the TX + * direction. It is assumed that the application will not need + * to send packets larger than their max_rx_pkt_len (MRU). + */ + return 1; + } + + if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) || + (avp->max_rx_pkt_len > avp->host_mbuf_size)) { + /* + * If the host MRU is greater than its own mbuf size or the + * guest mbuf size then chained mbufs have to be enabled in the + * RX direction. + */ + return 1; + } + + return 0; +} + +static int +avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, + uint16_t rx_queue_id, + uint16_t nb_rx_desc, + unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *pool) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_pktmbuf_pool_private *mbp_priv; + struct avp_queue *rxq; + + if (rx_queue_id >= eth_dev->data->nb_rx_queues) { + PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n", + rx_queue_id, eth_dev->data->nb_rx_queues); + return -EINVAL; + } + + /* Save mbuf pool pointer */ + avp->pool = pool; + + /* Save the local mbuf size */ + mbp_priv = rte_mempool_get_priv(pool); + avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size); + avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM; + + if (avp_dev_enable_scattered(eth_dev, avp)) { + if (!eth_dev->data->scattered_rx) { + PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n"); + eth_dev->data->scattered_rx = 1; + eth_dev->rx_pkt_burst = avp_recv_scattered_pkts; + eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts; + } + } + + PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n", + avp->max_rx_pkt_len, + eth_dev->data->dev_conf.rxmode.max_rx_pkt_len, + avp->host_mbuf_size, + avp->guest_mbuf_size); + + /* allocate a queue object */ + rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue), + RTE_CACHE_LINE_SIZE, socket_id); + if (rxq == NULL) { + PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n"); + return -ENOMEM; + } + + /* save back pointers to AVP and Ethernet devices */ + rxq->avp = avp; + rxq->dev_data = eth_dev->data; + eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq; + + /* setup the queue receive mapping for the current queue. */ + _avp_set_rx_queue_mappings(eth_dev, rx_queue_id); + + PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq); + + (void)nb_rx_desc; + (void)rx_conf; + return 0; +} + +static int +avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev, + uint16_t tx_queue_id, + uint16_t nb_tx_desc, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct avp_queue *txq; + + if (tx_queue_id >= eth_dev->data->nb_tx_queues) { + PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n", + tx_queue_id, eth_dev->data->nb_tx_queues); + return -EINVAL; + } + + /* allocate a queue object */ + txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue), + RTE_CACHE_LINE_SIZE, socket_id); + if (txq == NULL) { + PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n"); + return -ENOMEM; + } + + /* only the configured set of transmit queues are used */ + txq->queue_id = tx_queue_id; + txq->queue_base = tx_queue_id; + txq->queue_limit = tx_queue_id; + + /* save back pointers to AVP and Ethernet devices */ + txq->avp = avp; + txq->dev_data = eth_dev->data; + eth_dev->data->tx_queues[tx_queue_id] = (void *)txq; + + PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq); + + (void)nb_tx_desc; + (void)tx_conf; + return 0; +} + +static inline int +_avp_cmp_ether_addr(struct ether_addr *a, struct ether_addr *b) +{ + uint16_t *_a = (uint16_t *)&a->addr_bytes[0]; + uint16_t *_b = (uint16_t *)&b->addr_bytes[0]; + return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]); +} + +static inline int +_avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m) +{ + struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + + if (likely(_avp_cmp_ether_addr(&avp->ethaddr, ð->d_addr) == 0)) { + /* allow all packets destined to our address */ + return 0; + } + + if (likely(is_broadcast_ether_addr(ð->d_addr))) { + /* allow all broadcast packets */ + return 0; + } + + if (likely(is_multicast_ether_addr(ð->d_addr))) { + /* allow all multicast packets */ + return 0; + } + + if (avp->flags & AVP_F_PROMISC) { + /* allow all packets when in promiscuous mode */ + return 0; + } + + return -1; +} + +#ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS +static inline void +__avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf) +{ + struct rte_avp_desc *first_buf; + struct rte_avp_desc *pkt_buf; + unsigned int pkt_len; + unsigned int nb_segs; + void *pkt_data; + unsigned int i; + + first_buf = avp_dev_translate_buffer(avp, buf); + + i = 0; + pkt_len = 0; + nb_segs = first_buf->nb_segs; + do { + /* Adjust pointers for guest addressing */ + pkt_buf = avp_dev_translate_buffer(avp, buf); + if (pkt_buf == NULL) + rte_panic("bad buffer: segment %u has an invalid address %p\n", + i, buf); + pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); + if (pkt_data == NULL) + rte_panic("bad buffer: segment %u has a NULL data pointer\n", + i); + if (pkt_buf->data_len == 0) + rte_panic("bad buffer: segment %u has 0 data length\n", + i); + pkt_len += pkt_buf->data_len; + nb_segs--; + i++; + + } while (nb_segs && (buf = pkt_buf->next) != NULL); + + if (nb_segs != 0) + rte_panic("bad buffer: expected %u segments found %u\n", + first_buf->nb_segs, (first_buf->nb_segs - nb_segs)); + if (pkt_len != first_buf->pkt_len) + rte_panic("bad buffer: expected length %u found %u\n", + first_buf->pkt_len, pkt_len); +} + +#define avp_dev_buffer_sanity_check(a, b) \ + __avp_dev_buffer_sanity_check((a), (b)) + +#else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */ + +#define avp_dev_buffer_sanity_check(a, b) do {} while (0) + +#endif + +/* + * Copy a host buffer chain to a set of mbufs. This function assumes that + * there exactly the required number of mbufs to copy all source bytes. + */ +static inline struct rte_mbuf * +avp_dev_copy_from_buffers(struct avp_dev *avp, + struct rte_avp_desc *buf, + struct rte_mbuf **mbufs, + unsigned int count) +{ + struct rte_mbuf *m_previous = NULL; + struct rte_avp_desc *pkt_buf; + unsigned int total_length = 0; + unsigned int copy_length; + unsigned int src_offset; + struct rte_mbuf *m; + uint16_t ol_flags; + uint16_t vlan_tci; + void *pkt_data; + unsigned int i; + + avp_dev_buffer_sanity_check(avp, buf); + + /* setup the first source buffer */ + pkt_buf = avp_dev_translate_buffer(avp, buf); + pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); + total_length = pkt_buf->pkt_len; + src_offset = 0; + + if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) { + ol_flags = PKT_RX_VLAN_PKT; + vlan_tci = pkt_buf->vlan_tci; + } else { + ol_flags = 0; + vlan_tci = 0; + } + + for (i = 0; (i < count) && (buf != NULL); i++) { + /* fill each destination buffer */ + m = mbufs[i]; + + if (m_previous != NULL) + m_previous->next = m; + + m_previous = m; + + do { + /* + * Copy as many source buffers as will fit in the + * destination buffer. + */ + copy_length = RTE_MIN((avp->guest_mbuf_size - + rte_pktmbuf_data_len(m)), + (pkt_buf->data_len - + src_offset)); + rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *), + rte_pktmbuf_data_len(m)), + RTE_PTR_ADD(pkt_data, src_offset), + copy_length); + rte_pktmbuf_data_len(m) += copy_length; + src_offset += copy_length; + + if (likely(src_offset == pkt_buf->data_len)) { + /* need a new source buffer */ + buf = pkt_buf->next; + if (buf != NULL) { + pkt_buf = avp_dev_translate_buffer( + avp, buf); + pkt_data = avp_dev_translate_buffer( + avp, pkt_buf->data); + src_offset = 0; + } + } + + if (unlikely(rte_pktmbuf_data_len(m) == + avp->guest_mbuf_size)) { + /* need a new destination mbuf */ + break; + } + + } while (buf != NULL); + } + + m = mbufs[0]; + m->ol_flags = ol_flags; + m->nb_segs = count; + rte_pktmbuf_pkt_len(m) = total_length; + m->vlan_tci = vlan_tci; + + __rte_mbuf_sanity_check(m, 1); + + return m; +} + +static uint16_t +avp_recv_scattered_pkts(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct avp_queue *rxq = (struct avp_queue *)rx_queue; + struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST]; + struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS]; + struct avp_dev *avp = rxq->avp; + struct rte_avp_desc *pkt_buf; + struct rte_avp_fifo *free_q; + struct rte_avp_fifo *rx_q; + struct rte_avp_desc *buf; + unsigned int count, avail, n; + unsigned int guest_mbuf_size; + struct rte_mbuf *m; + unsigned int required; + unsigned int buf_len; + unsigned int port_id; + unsigned int i; + + if (unlikely(avp->flags & AVP_F_DETACHED)) { + /* VM live migration in progress */ + return 0; + } + + guest_mbuf_size = avp->guest_mbuf_size; + port_id = avp->port_id; + rx_q = avp->rx_q[rxq->queue_id]; + free_q = avp->free_q[rxq->queue_id]; + + /* setup next queue to service */ + rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ? + (rxq->queue_id + 1) : rxq->queue_base; + + /* determine how many slots are available in the free queue */ + count = avp_fifo_free_count(free_q); + + /* determine how many packets are available in the rx queue */ + avail = avp_fifo_count(rx_q); + + /* determine how many packets can be received */ + count = RTE_MIN(count, avail); + count = RTE_MIN(count, nb_pkts); + count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST); + + if (unlikely(count == 0)) { + /* no free buffers, or no buffers on the rx queue */ + return 0; + } + + /* retrieve pending packets */ + n = avp_fifo_get(rx_q, (void **)&avp_bufs, count); + PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n", + count, rx_q); + + count = 0; + for (i = 0; i < n; i++) { + /* prefetch next entry while processing current one */ + if (i + 1 < n) { + pkt_buf = avp_dev_translate_buffer(avp, + avp_bufs[i + 1]); + rte_prefetch0(pkt_buf); + } + buf = avp_bufs[i]; + + /* Peek into the first buffer to determine the total length */ + pkt_buf = avp_dev_translate_buffer(avp, buf); + buf_len = pkt_buf->pkt_len; + + /* Allocate enough mbufs to receive the entire packet */ + required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size; + if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) { + rxq->dev_data->rx_mbuf_alloc_failed++; + continue; + } + + /* Copy the data from the buffers to our mbufs */ + m = avp_dev_copy_from_buffers(avp, buf, mbufs, required); + + /* finalize mbuf */ + m->port = port_id; + + if (_avp_mac_filter(avp, m) != 0) { + /* silently discard packets not destined to our MAC */ + rte_pktmbuf_free(m); + continue; + } + + /* return new mbuf to caller */ + rx_pkts[count++] = m; + rxq->bytes += buf_len; + } + + rxq->packets += count; + + /* return the buffers to the free queue */ + avp_fifo_put(free_q, (void **)&avp_bufs[0], n); + + return count; +} + + +static uint16_t +avp_recv_pkts(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct avp_queue *rxq = (struct avp_queue *)rx_queue; + struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST]; + struct avp_dev *avp = rxq->avp; + struct rte_avp_desc *pkt_buf; + struct rte_avp_fifo *free_q; + struct rte_avp_fifo *rx_q; + unsigned int count, avail, n; + unsigned int pkt_len; + struct rte_mbuf *m; + char *pkt_data; + unsigned int i; + + if (unlikely(avp->flags & AVP_F_DETACHED)) { + /* VM live migration in progress */ + return 0; + } + + rx_q = avp->rx_q[rxq->queue_id]; + free_q = avp->free_q[rxq->queue_id]; + + /* setup next queue to service */ + rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ? + (rxq->queue_id + 1) : rxq->queue_base; + + /* determine how many slots are available in the free queue */ + count = avp_fifo_free_count(free_q); + + /* determine how many packets are available in the rx queue */ + avail = avp_fifo_count(rx_q); + + /* determine how many packets can be received */ + count = RTE_MIN(count, avail); + count = RTE_MIN(count, nb_pkts); + count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST); + + if (unlikely(count == 0)) { + /* no free buffers, or no buffers on the rx queue */ + return 0; + } + + /* retrieve pending packets */ + n = avp_fifo_get(rx_q, (void **)&avp_bufs, count); + PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n", + count, rx_q); + + count = 0; + for (i = 0; i < n; i++) { + /* prefetch next entry while processing current one */ + if (i < n - 1) { + pkt_buf = avp_dev_translate_buffer(avp, + avp_bufs[i + 1]); + rte_prefetch0(pkt_buf); + } + + /* Adjust host pointers for guest addressing */ + pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]); + pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); + pkt_len = pkt_buf->pkt_len; + + if (unlikely((pkt_len > avp->guest_mbuf_size) || + (pkt_buf->nb_segs > 1))) { + /* + * application should be using the scattered receive + * function + */ + rxq->errors++; + continue; + } + + /* process each packet to be transmitted */ + m = rte_pktmbuf_alloc(avp->pool); + if (unlikely(m == NULL)) { + rxq->dev_data->rx_mbuf_alloc_failed++; + continue; + } + + /* copy data out of the host buffer to our buffer */ + m->data_off = RTE_PKTMBUF_HEADROOM; + rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len); + + /* initialize the local mbuf */ + rte_pktmbuf_data_len(m) = pkt_len; + rte_pktmbuf_pkt_len(m) = pkt_len; + m->port = avp->port_id; + + if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) { + m->ol_flags = PKT_RX_VLAN_PKT; + m->vlan_tci = pkt_buf->vlan_tci; + } + + if (_avp_mac_filter(avp, m) != 0) { + /* silently discard packets not destined to our MAC */ + rte_pktmbuf_free(m); + continue; + } + + /* return new mbuf to caller */ + rx_pkts[count++] = m; + rxq->bytes += pkt_len; + } + + rxq->packets += count; + + /* return the buffers to the free queue */ + avp_fifo_put(free_q, (void **)&avp_bufs[0], n); + + return count; +} + +/* + * Copy a chained mbuf to a set of host buffers. This function assumes that + * there are sufficient destination buffers to contain the entire source + * packet. + */ +static inline uint16_t +avp_dev_copy_to_buffers(struct avp_dev *avp, + struct rte_mbuf *mbuf, + struct rte_avp_desc **buffers, + unsigned int count) +{ + struct rte_avp_desc *previous_buf = NULL; + struct rte_avp_desc *first_buf = NULL; + struct rte_avp_desc *pkt_buf; + struct rte_avp_desc *buf; + size_t total_length; + struct rte_mbuf *m; + size_t copy_length; + size_t src_offset; + char *pkt_data; + unsigned int i; + + __rte_mbuf_sanity_check(mbuf, 1); + + m = mbuf; + src_offset = 0; + total_length = rte_pktmbuf_pkt_len(m); + for (i = 0; (i < count) && (m != NULL); i++) { + /* fill each destination buffer */ + buf = buffers[i]; + + if (i < count - 1) { + /* prefetch next entry while processing this one */ + pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]); + rte_prefetch0(pkt_buf); + } + + /* Adjust pointers for guest addressing */ + pkt_buf = avp_dev_translate_buffer(avp, buf); + pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); + + /* setup the buffer chain */ + if (previous_buf != NULL) + previous_buf->next = buf; + else + first_buf = pkt_buf; + + previous_buf = pkt_buf; + + do { + /* + * copy as many source mbuf segments as will fit in the + * destination buffer. + */ + copy_length = RTE_MIN((avp->host_mbuf_size - + pkt_buf->data_len), + (rte_pktmbuf_data_len(m) - + src_offset)); + rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len), + RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *), + src_offset), + copy_length); + pkt_buf->data_len += copy_length; + src_offset += copy_length; + + if (likely(src_offset == rte_pktmbuf_data_len(m))) { + /* need a new source buffer */ + m = m->next; + src_offset = 0; + } + + if (unlikely(pkt_buf->data_len == + avp->host_mbuf_size)) { + /* need a new destination buffer */ + break; + } + + } while (m != NULL); + } + + first_buf->nb_segs = count; + first_buf->pkt_len = total_length; + + if (mbuf->ol_flags & PKT_TX_VLAN_PKT) { + first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT; + first_buf->vlan_tci = mbuf->vlan_tci; + } + + avp_dev_buffer_sanity_check(avp, buffers[0]); + + return total_length; +} + + +static uint16_t +avp_xmit_scattered_pkts(void *tx_queue, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST * + RTE_AVP_MAX_MBUF_SEGMENTS)]; + struct avp_queue *txq = (struct avp_queue *)tx_queue; + struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST]; + struct avp_dev *avp = txq->avp; + struct rte_avp_fifo *alloc_q; + struct rte_avp_fifo *tx_q; + unsigned int count, avail, n; + unsigned int orig_nb_pkts; + struct rte_mbuf *m; + unsigned int required; + unsigned int segments; + unsigned int tx_bytes; + unsigned int i; + + orig_nb_pkts = nb_pkts; + if (unlikely(avp->flags & AVP_F_DETACHED)) { + /* VM live migration in progress */ + /* TODO ... buffer for X packets then drop? */ + txq->errors += nb_pkts; + return 0; + } + + tx_q = avp->tx_q[txq->queue_id]; + alloc_q = avp->alloc_q[txq->queue_id]; + + /* limit the number of transmitted packets to the max burst size */ + if (unlikely(nb_pkts > AVP_MAX_TX_BURST)) + nb_pkts = AVP_MAX_TX_BURST; + + /* determine how many buffers are available to copy into */ + avail = avp_fifo_count(alloc_q); + if (unlikely(avail > (AVP_MAX_TX_BURST * + RTE_AVP_MAX_MBUF_SEGMENTS))) + avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS; + + /* determine how many slots are available in the transmit queue */ + count = avp_fifo_free_count(tx_q); + + /* determine how many packets can be sent */ + nb_pkts = RTE_MIN(count, nb_pkts); + + /* determine how many packets will fit in the available buffers */ + count = 0; + segments = 0; + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + if (likely(i < (unsigned int)nb_pkts - 1)) { + /* prefetch next entry while processing this one */ + rte_prefetch0(tx_pkts[i + 1]); + } + required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) / + avp->host_mbuf_size; + + if (unlikely((required == 0) || + (required > RTE_AVP_MAX_MBUF_SEGMENTS))) + break; + else if (unlikely(required + segments > avail)) + break; + segments += required; + count++; + } + nb_pkts = count; + + if (unlikely(nb_pkts == 0)) { + /* no available buffers, or no space on the tx queue */ + txq->errors += orig_nb_pkts; + return 0; + } + + PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n", + nb_pkts, tx_q); + + /* retrieve sufficient send buffers */ + n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments); + if (unlikely(n != segments)) { + PMD_TX_LOG(DEBUG, "Failed to allocate buffers " + "n=%u, segments=%u, orig=%u\n", + n, segments, orig_nb_pkts); + txq->errors += orig_nb_pkts; + return 0; + } + + tx_bytes = 0; + count = 0; + for (i = 0; i < nb_pkts; i++) { + /* process each packet to be transmitted */ + m = tx_pkts[i]; + + /* determine how many buffers are required for this packet */ + required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) / + avp->host_mbuf_size; + + tx_bytes += avp_dev_copy_to_buffers(avp, m, + &avp_bufs[count], required); + tx_bufs[i] = avp_bufs[count]; + count += required; + + /* free the original mbuf */ + rte_pktmbuf_free(m); + } + + txq->packets += nb_pkts; + txq->bytes += tx_bytes; + +#ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS + for (i = 0; i < nb_pkts; i++) + avp_dev_buffer_sanity_check(avp, tx_bufs[i]); +#endif + + /* send the packets */ + n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts); + if (unlikely(n != orig_nb_pkts)) + txq->errors += (orig_nb_pkts - n); + + return n; +} + + +static uint16_t +avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct avp_queue *txq = (struct avp_queue *)tx_queue; + struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST]; + struct avp_dev *avp = txq->avp; + struct rte_avp_desc *pkt_buf; + struct rte_avp_fifo *alloc_q; + struct rte_avp_fifo *tx_q; + unsigned int count, avail, n; + struct rte_mbuf *m; + unsigned int pkt_len; + unsigned int tx_bytes; + char *pkt_data; + unsigned int i; + + if (unlikely(avp->flags & AVP_F_DETACHED)) { + /* VM live migration in progress */ + /* TODO ... buffer for X packets then drop?! */ + txq->errors++; + return 0; + } + + tx_q = avp->tx_q[txq->queue_id]; + alloc_q = avp->alloc_q[txq->queue_id]; + + /* limit the number of transmitted packets to the max burst size */ + if (unlikely(nb_pkts > AVP_MAX_TX_BURST)) + nb_pkts = AVP_MAX_TX_BURST; + + /* determine how many buffers are available to copy into */ + avail = avp_fifo_count(alloc_q); + + /* determine how many slots are available in the transmit queue */ + count = avp_fifo_free_count(tx_q); + + /* determine how many packets can be sent */ + count = RTE_MIN(count, avail); + count = RTE_MIN(count, nb_pkts); + + if (unlikely(count == 0)) { + /* no available buffers, or no space on the tx queue */ + txq->errors += nb_pkts; + return 0; + } + + PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n", + count, tx_q); + + /* retrieve sufficient send buffers */ + n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count); + if (unlikely(n != count)) { + txq->errors++; + return 0; + } + + tx_bytes = 0; + for (i = 0; i < count; i++) { + /* prefetch next entry while processing the current one */ + if (i < count - 1) { + pkt_buf = avp_dev_translate_buffer(avp, + avp_bufs[i + 1]); + rte_prefetch0(pkt_buf); + } + + /* process each packet to be transmitted */ + m = tx_pkts[i]; + + /* Adjust pointers for guest addressing */ + pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]); + pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); + pkt_len = rte_pktmbuf_pkt_len(m); + + if (unlikely((pkt_len > avp->guest_mbuf_size) || + (pkt_len > avp->host_mbuf_size))) { + /* + * application should be using the scattered transmit + * function; send it truncated to avoid the performance + * hit of having to manage returning the already + * allocated buffer to the free list. This should not + * happen since the application should have set the + * max_rx_pkt_len based on its MTU and it should be + * policing its own packet sizes. + */ + txq->errors++; + pkt_len = RTE_MIN(avp->guest_mbuf_size, + avp->host_mbuf_size); + } + + /* copy data out of our mbuf and into the AVP buffer */ + rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len); + pkt_buf->pkt_len = pkt_len; + pkt_buf->data_len = pkt_len; + pkt_buf->nb_segs = 1; + pkt_buf->next = NULL; + + if (m->ol_flags & PKT_TX_VLAN_PKT) { + pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT; + pkt_buf->vlan_tci = m->vlan_tci; + } + + tx_bytes += pkt_len; + + /* free the original mbuf */ + rte_pktmbuf_free(m); + } + + txq->packets += count; + txq->bytes += tx_bytes; + + /* send the packets */ + n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count); + + return n; +} + +static void +avp_dev_rx_queue_release(void *rx_queue) +{ + struct avp_queue *rxq = (struct avp_queue *)rx_queue; + struct avp_dev *avp = rxq->avp; + struct rte_eth_dev_data *data = avp->dev_data; + unsigned int i; + + for (i = 0; i < avp->num_rx_queues; i++) { + if (data->rx_queues[i] == rxq) + data->rx_queues[i] = NULL; + } +} + +static void +avp_dev_tx_queue_release(void *tx_queue) +{ + struct avp_queue *txq = (struct avp_queue *)tx_queue; + struct avp_dev *avp = txq->avp; + struct rte_eth_dev_data *data = avp->dev_data; + unsigned int i; + + for (i = 0; i < avp->num_tx_queues; i++) { + if (data->tx_queues[i] == txq) + data->tx_queues[i] = NULL; + } +} + +static int +avp_dev_configure(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_avp_device_info *host_info; + struct rte_avp_device_config config; + int mask = 0; + void *addr; + int ret; + + rte_spinlock_lock(&avp->lock); + if (avp->flags & AVP_F_DETACHED) { + PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); + ret = -ENOTSUP; + goto unlock; + } + + addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr; + host_info = (struct rte_avp_device_info *)addr; + + /* Setup required number of queues */ + _avp_set_queue_counts(eth_dev); + + mask = (ETH_VLAN_STRIP_MASK | + ETH_VLAN_FILTER_MASK | + ETH_VLAN_EXTEND_MASK); + avp_vlan_offload_set(eth_dev, mask); + + /* update device config */ + memset(&config, 0, sizeof(config)); + config.device_id = host_info->device_id; + config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK; + config.driver_version = AVP_DPDK_DRIVER_VERSION; + config.features = avp->features; + config.num_tx_queues = avp->num_tx_queues; + config.num_rx_queues = avp->num_rx_queues; + + ret = avp_dev_ctrl_set_config(eth_dev, &config); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n", + ret); + goto unlock; + } + + avp->flags |= AVP_F_CONFIGURED; + ret = 0; + +unlock: + rte_spinlock_unlock(&avp->lock); + return ret; +} + +static int +avp_dev_start(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + int ret; + + rte_spinlock_lock(&avp->lock); + if (avp->flags & AVP_F_DETACHED) { + PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); + ret = -ENOTSUP; + goto unlock; + } + + /* disable features that we do not support */ + eth_dev->data->dev_conf.rxmode.hw_ip_checksum = 0; + eth_dev->data->dev_conf.rxmode.hw_vlan_filter = 0; + eth_dev->data->dev_conf.rxmode.hw_vlan_extend = 0; + eth_dev->data->dev_conf.rxmode.hw_strip_crc = 0; + + /* update link state */ + ret = avp_dev_ctrl_set_link_state(eth_dev, 1); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n", + ret); + goto unlock; + } + + /* remember current link state */ + avp->flags |= AVP_F_LINKUP; + + ret = 0; + +unlock: + rte_spinlock_unlock(&avp->lock); + return ret; +} + +static void +avp_dev_stop(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + int ret; + + rte_spinlock_lock(&avp->lock); + if (avp->flags & AVP_F_DETACHED) { + PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); + goto unlock; + } + + /* remember current link state */ + avp->flags &= ~AVP_F_LINKUP; + + /* update link state */ + ret = avp_dev_ctrl_set_link_state(eth_dev, 0); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n", + ret); + } + +unlock: + rte_spinlock_unlock(&avp->lock); +} + +static void +avp_dev_close(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + int ret; + + rte_spinlock_lock(&avp->lock); + if (avp->flags & AVP_F_DETACHED) { + PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); + goto unlock; + } + + /* remember current link state */ + avp->flags &= ~AVP_F_LINKUP; + avp->flags &= ~AVP_F_CONFIGURED; + + ret = avp_dev_disable_interrupts(eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to disable interrupts\n"); + /* continue */ + } + + /* update device state */ + ret = avp_dev_ctrl_shutdown(eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n", + ret); + /* continue */ + } + +unlock: + rte_spinlock_unlock(&avp->lock); +} + +static int +avp_dev_link_update(struct rte_eth_dev *eth_dev, + __rte_unused int wait_to_complete) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_eth_link *link = ð_dev->data->dev_link; + + link->link_speed = ETH_SPEED_NUM_10G; + link->link_duplex = ETH_LINK_FULL_DUPLEX; + link->link_status = !!(avp->flags & AVP_F_LINKUP); + + return -1; +} + +static void +avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + + rte_spinlock_lock(&avp->lock); + if ((avp->flags & AVP_F_PROMISC) == 0) { + avp->flags |= AVP_F_PROMISC; + PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n", + eth_dev->data->port_id); + } + rte_spinlock_unlock(&avp->lock); +} + +static void +avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + + rte_spinlock_lock(&avp->lock); + if ((avp->flags & AVP_F_PROMISC) != 0) { + avp->flags &= ~AVP_F_PROMISC; + PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n", + eth_dev->data->port_id); + } + rte_spinlock_unlock(&avp->lock); +} + +static void +avp_dev_info_get(struct rte_eth_dev *eth_dev, + struct rte_eth_dev_info *dev_info) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + + dev_info->driver_name = "rte_avp_pmd"; + dev_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device); + dev_info->max_rx_queues = avp->max_rx_queues; + dev_info->max_tx_queues = avp->max_tx_queues; + dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE; + dev_info->max_rx_pktlen = avp->max_rx_pkt_len; + dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS; + if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) { + dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP; + dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT; + } +} + +static void +avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + + if (mask & ETH_VLAN_STRIP_MASK) { + if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) { + if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip) + avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD; + else + avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD; + } else { + PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n"); + } + } + + if (mask & ETH_VLAN_FILTER_MASK) { + if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter) + PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n"); + } + + if (mask & ETH_VLAN_EXTEND_MASK) { + if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend) + PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n"); + } +} + +static void +avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + unsigned int i; + + for (i = 0; i < avp->num_rx_queues; i++) { + struct avp_queue *rxq = avp->dev_data->rx_queues[i]; + + if (rxq) { + stats->ipackets += rxq->packets; + stats->ibytes += rxq->bytes; + stats->ierrors += rxq->errors; + + stats->q_ipackets[i] += rxq->packets; + stats->q_ibytes[i] += rxq->bytes; + stats->q_errors[i] += rxq->errors; + } + } + + for (i = 0; i < avp->num_tx_queues; i++) { + struct avp_queue *txq = avp->dev_data->tx_queues[i]; + + if (txq) { + stats->opackets += txq->packets; + stats->obytes += txq->bytes; + stats->oerrors += txq->errors; + + stats->q_opackets[i] += txq->packets; + stats->q_obytes[i] += txq->bytes; + stats->q_errors[i] += txq->errors; + } + } +} + +static void +avp_dev_stats_reset(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + unsigned int i; + + for (i = 0; i < avp->num_rx_queues; i++) { + struct avp_queue *rxq = avp->dev_data->rx_queues[i]; + + if (rxq) { + rxq->bytes = 0; + rxq->packets = 0; + rxq->errors = 0; + } + } + + for (i = 0; i < avp->num_tx_queues; i++) { + struct avp_queue *txq = avp->dev_data->tx_queues[i]; + + if (txq) { + txq->bytes = 0; + txq->packets = 0; + txq->errors = 0; + } + } +} + +RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd); +RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map); diff --git a/src/seastar/dpdk/drivers/net/avp/avp_logs.h b/src/seastar/dpdk/drivers/net/avp/avp_logs.h new file mode 100644 index 00000000..252cab7d --- /dev/null +++ b/src/seastar/dpdk/drivers/net/avp/avp_logs.h @@ -0,0 +1,59 @@ +/* + * BSD LICENSE + * + * Copyright (c) 2013-2015, Wind River Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1) Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2) Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3) Neither the name of Wind River Systems nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _AVP_LOGS_H_ +#define _AVP_LOGS_H_ + +#include <rte_log.h> + +#ifdef RTE_LIBRTE_AVP_DEBUG_RX +#define PMD_RX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s() rx: " fmt, __func__, ## args) +#else +#define PMD_RX_LOG(level, fmt, args...) do { } while (0) +#endif + +#ifdef RTE_LIBRTE_AVP_DEBUG_TX +#define PMD_TX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s() tx: " fmt, __func__, ## args) +#else +#define PMD_TX_LOG(level, fmt, args...) do { } while (0) +#endif + +#ifdef RTE_LIBRTE_AVP_DEBUG_DRIVER +#define PMD_DRV_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt, __func__, ## args) +#else +#define PMD_DRV_LOG(level, fmt, args...) do { } while (0) +#endif + +#endif /* _AVP_LOGS_H_ */ diff --git a/src/seastar/dpdk/drivers/net/avp/rte_avp_common.h b/src/seastar/dpdk/drivers/net/avp/rte_avp_common.h new file mode 100644 index 00000000..488d7216 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/avp/rte_avp_common.h @@ -0,0 +1,432 @@ +/*- + * This file is provided under a dual BSD/LGPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GNU LESSER GENERAL PUBLIC LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * Copyright(c) 2014-2017 Wind River Systems, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * Contact Information: + * Wind River Systems, Inc. + * + * + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * Copyright(c) 2014-2017 Wind River Systems, Inc. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _RTE_AVP_COMMON_H_ +#define _RTE_AVP_COMMON_H_ + +#ifdef __KERNEL__ +#include <linux/if.h> +#define RTE_STD_C11 +#else +#include <stdint.h> +#include <rte_common.h> +#include <rte_memory.h> +#include <rte_ether.h> +#include <rte_atomic.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * AVP name is part of network device name. + */ +#define RTE_AVP_NAMESIZE 32 + +/** + * AVP alias is a user-defined value used for lookups from secondary + * processes. Typically, this is a UUID. + */ +#define RTE_AVP_ALIASSIZE 128 + +/* + * Request id. + */ +enum rte_avp_req_id { + RTE_AVP_REQ_UNKNOWN = 0, + RTE_AVP_REQ_CHANGE_MTU, + RTE_AVP_REQ_CFG_NETWORK_IF, + RTE_AVP_REQ_CFG_DEVICE, + RTE_AVP_REQ_SHUTDOWN_DEVICE, + RTE_AVP_REQ_MAX, +}; + +/**@{ AVP device driver types */ +#define RTE_AVP_DRIVER_TYPE_UNKNOWN 0 +#define RTE_AVP_DRIVER_TYPE_DPDK 1 +#define RTE_AVP_DRIVER_TYPE_KERNEL 2 +#define RTE_AVP_DRIVER_TYPE_QEMU 3 +/**@} */ + +/**@{ AVP device operational modes */ +#define RTE_AVP_MODE_HOST 0 /**< AVP interface created in host */ +#define RTE_AVP_MODE_GUEST 1 /**< AVP interface created for export to guest */ +#define RTE_AVP_MODE_TRACE 2 /**< AVP interface created for packet tracing */ +/**@} */ + +/* + * Structure for AVP queue configuration query request/result + */ +struct rte_avp_device_config { + uint64_t device_id; /**< Unique system identifier */ + uint32_t driver_type; /**< Device Driver type */ + uint32_t driver_version; /**< Device Driver version */ + uint32_t features; /**< Negotiated features */ + uint16_t num_tx_queues; /**< Number of active transmit queues */ + uint16_t num_rx_queues; /**< Number of active receive queues */ + uint8_t if_up; /**< 1: interface up, 0: interface down */ +} __attribute__ ((__packed__)); + +/* + * Structure for AVP request. + */ +struct rte_avp_request { + uint32_t req_id; /**< Request id */ + RTE_STD_C11 + union { + uint32_t new_mtu; /**< New MTU */ + uint8_t if_up; /**< 1: interface up, 0: interface down */ + struct rte_avp_device_config config; /**< Queue configuration */ + }; + int32_t result; /**< Result for processing request */ +} __attribute__ ((__packed__)); + +/* + * FIFO struct mapped in a shared memory. It describes a circular buffer FIFO + * Write and read should wrap around. FIFO is empty when write == read + * Writing should never overwrite the read position + */ +struct rte_avp_fifo { + volatile unsigned int write; /**< Next position to be written*/ + volatile unsigned int read; /**< Next position to be read */ + unsigned int len; /**< Circular buffer length */ + unsigned int elem_size; /**< Pointer size - for 32/64 bit OS */ + void *volatile buffer[]; /**< The buffer contains mbuf pointers */ +}; + + +/* + * AVP packet buffer header used to define the exchange of packet data. + */ +struct rte_avp_desc { + uint64_t pad0; + void *pkt_mbuf; /**< Reference to packet mbuf */ + uint8_t pad1[14]; + uint16_t ol_flags; /**< Offload features. */ + void *next; /**< Reference to next buffer in chain */ + void *data; /**< Start address of data in segment buffer. */ + uint16_t data_len; /**< Amount of data in segment buffer. */ + uint8_t nb_segs; /**< Number of segments */ + uint8_t pad2; + uint16_t pkt_len; /**< Total pkt len: sum of all segment data_len. */ + uint32_t pad3; + uint16_t vlan_tci; /**< VLAN Tag Control Identifier (CPU order). */ + uint32_t pad4; +} __attribute__ ((__aligned__(RTE_CACHE_LINE_SIZE), __packed__)); + + +/**{ AVP device features */ +#define RTE_AVP_FEATURE_VLAN_OFFLOAD (1 << 0) /**< Emulated HW VLAN offload */ +/**@} */ + + +/**@{ Offload feature flags */ +#define RTE_AVP_TX_VLAN_PKT 0x0001 /**< TX packet is a 802.1q VLAN packet. */ +#define RTE_AVP_RX_VLAN_PKT 0x0800 /**< RX packet is a 802.1q VLAN packet. */ +/**@} */ + + +/**@{ AVP PCI identifiers */ +#define RTE_AVP_PCI_VENDOR_ID 0x1af4 +#define RTE_AVP_PCI_DEVICE_ID 0x1110 +/**@} */ + +/**@{ AVP PCI subsystem identifiers */ +#define RTE_AVP_PCI_SUB_VENDOR_ID RTE_AVP_PCI_VENDOR_ID +#define RTE_AVP_PCI_SUB_DEVICE_ID 0x1104 +/**@} */ + +/**@{ AVP PCI BAR definitions */ +#define RTE_AVP_PCI_MMIO_BAR 0 +#define RTE_AVP_PCI_MSIX_BAR 1 +#define RTE_AVP_PCI_MEMORY_BAR 2 +#define RTE_AVP_PCI_MEMMAP_BAR 4 +#define RTE_AVP_PCI_DEVICE_BAR 5 +#define RTE_AVP_PCI_MAX_BAR 6 +/**@} */ + +/**@{ AVP PCI BAR name definitions */ +#define RTE_AVP_MMIO_BAR_NAME "avp-mmio" +#define RTE_AVP_MSIX_BAR_NAME "avp-msix" +#define RTE_AVP_MEMORY_BAR_NAME "avp-memory" +#define RTE_AVP_MEMMAP_BAR_NAME "avp-memmap" +#define RTE_AVP_DEVICE_BAR_NAME "avp-device" +/**@} */ + +/**@{ AVP PCI MSI-X vectors */ +#define RTE_AVP_MIGRATION_MSIX_VECTOR 0 /**< Migration interrupts */ +#define RTE_AVP_MAX_MSIX_VECTORS 1 +/**@} */ + +/**@} AVP Migration status/ack register values */ +#define RTE_AVP_MIGRATION_NONE 0 /**< Migration never executed */ +#define RTE_AVP_MIGRATION_DETACHED 1 /**< Device attached during migration */ +#define RTE_AVP_MIGRATION_ATTACHED 2 /**< Device reattached during migration */ +#define RTE_AVP_MIGRATION_ERROR 3 /**< Device failed to attach/detach */ +/**@} */ + +/**@} AVP MMIO Register Offsets */ +#define RTE_AVP_REGISTER_BASE 0 +#define RTE_AVP_INTERRUPT_MASK_OFFSET (RTE_AVP_REGISTER_BASE + 0) +#define RTE_AVP_INTERRUPT_STATUS_OFFSET (RTE_AVP_REGISTER_BASE + 4) +#define RTE_AVP_MIGRATION_STATUS_OFFSET (RTE_AVP_REGISTER_BASE + 8) +#define RTE_AVP_MIGRATION_ACK_OFFSET (RTE_AVP_REGISTER_BASE + 12) +/**@} */ + +/**@} AVP Interrupt Status Mask */ +#define RTE_AVP_MIGRATION_INTERRUPT_MASK (1 << 1) +#define RTE_AVP_APP_INTERRUPTS_MASK 0xFFFFFFFF +#define RTE_AVP_NO_INTERRUPTS_MASK 0 +/**@} */ + +/* + * Maximum number of memory regions to export + */ +#define RTE_AVP_MAX_MAPS 2048 + +/* + * Description of a single memory region + */ +struct rte_avp_memmap { + void *addr; + phys_addr_t phys_addr; + uint64_t length; +}; + +/* + * AVP memory mapping validation marker + */ +#define RTE_AVP_MEMMAP_MAGIC 0x20131969 + +/**@{ AVP memory map versions */ +#define RTE_AVP_MEMMAP_VERSION_1 1 +#define RTE_AVP_MEMMAP_VERSION RTE_AVP_MEMMAP_VERSION_1 +/**@} */ + +/* + * Defines a list of memory regions exported from the host to the guest + */ +struct rte_avp_memmap_info { + uint32_t magic; /**< Memory validation marker */ + uint32_t version; /**< Data format version */ + uint32_t nb_maps; + struct rte_avp_memmap maps[RTE_AVP_MAX_MAPS]; +}; + +/* + * AVP device memory validation marker + */ +#define RTE_AVP_DEVICE_MAGIC 0x20131975 + +/**@{ AVP device map versions + * WARNING: do not change the format or names of these variables. They are + * automatically parsed from the build system to generate the SDK package + * name. + **/ +#define RTE_AVP_RELEASE_VERSION_1 1 +#define RTE_AVP_RELEASE_VERSION RTE_AVP_RELEASE_VERSION_1 +#define RTE_AVP_MAJOR_VERSION_0 0 +#define RTE_AVP_MAJOR_VERSION_1 1 +#define RTE_AVP_MAJOR_VERSION_2 2 +#define RTE_AVP_MAJOR_VERSION RTE_AVP_MAJOR_VERSION_2 +#define RTE_AVP_MINOR_VERSION_0 0 +#define RTE_AVP_MINOR_VERSION_1 1 +#define RTE_AVP_MINOR_VERSION_13 13 +#define RTE_AVP_MINOR_VERSION RTE_AVP_MINOR_VERSION_13 +/**@} */ + + +/** + * Generates a 32-bit version number from the specified version number + * components + */ +#define RTE_AVP_MAKE_VERSION(_release, _major, _minor) \ +((((_release) & 0xffff) << 16) | (((_major) & 0xff) << 8) | ((_minor) & 0xff)) + + +/** + * Represents the current version of the AVP host driver + * WARNING: in the current development branch the host and guest driver + * version should always be the same. When patching guest features back to + * GA releases the host version number should not be updated unless there was + * an actual change made to the host driver. + */ +#define RTE_AVP_CURRENT_HOST_VERSION \ +RTE_AVP_MAKE_VERSION(RTE_AVP_RELEASE_VERSION_1, \ + RTE_AVP_MAJOR_VERSION_0, \ + RTE_AVP_MINOR_VERSION_1) + + +/** + * Represents the current version of the AVP guest drivers + */ +#define RTE_AVP_CURRENT_GUEST_VERSION \ +RTE_AVP_MAKE_VERSION(RTE_AVP_RELEASE_VERSION_1, \ + RTE_AVP_MAJOR_VERSION_2, \ + RTE_AVP_MINOR_VERSION_13) + +/** + * Access AVP device version values + */ +#define RTE_AVP_GET_RELEASE_VERSION(_version) (((_version) >> 16) & 0xffff) +#define RTE_AVP_GET_MAJOR_VERSION(_version) (((_version) >> 8) & 0xff) +#define RTE_AVP_GET_MINOR_VERSION(_version) ((_version) & 0xff) +/**@}*/ + + +/** + * Remove the minor version number so that only the release and major versions + * are used for comparisons. + */ +#define RTE_AVP_STRIP_MINOR_VERSION(_version) ((_version) >> 8) + + +/** + * Defines the number of mbuf pools supported per device (1 per socket) + */ +#define RTE_AVP_MAX_MEMPOOLS 8 + +/* + * Defines address translation parameters for each support mbuf pool + */ +struct rte_avp_mempool_info { + void *addr; + phys_addr_t phys_addr; + uint64_t length; +}; + +/* + * Struct used to create a AVP device. Passed to the kernel in IOCTL call or + * via inter-VM shared memory when used in a guest. + */ +struct rte_avp_device_info { + uint32_t magic; /**< Memory validation marker */ + uint32_t version; /**< Data format version */ + + char ifname[RTE_AVP_NAMESIZE]; /**< Network device name for AVP */ + + phys_addr_t tx_phys; + phys_addr_t rx_phys; + phys_addr_t alloc_phys; + phys_addr_t free_phys; + + uint32_t features; /**< Supported feature bitmap */ + uint8_t min_rx_queues; /**< Minimum supported receive/free queues */ + uint8_t num_rx_queues; /**< Recommended number of receive/free queues */ + uint8_t max_rx_queues; /**< Maximum supported receive/free queues */ + uint8_t min_tx_queues; /**< Minimum supported transmit/alloc queues */ + uint8_t num_tx_queues; + /**< Recommended number of transmit/alloc queues */ + uint8_t max_tx_queues; /**< Maximum supported transmit/alloc queues */ + + uint32_t tx_size; /**< Size of each transmit queue */ + uint32_t rx_size; /**< Size of each receive queue */ + uint32_t alloc_size; /**< Size of each alloc queue */ + uint32_t free_size; /**< Size of each free queue */ + + /* Used by Ethtool */ + phys_addr_t req_phys; + phys_addr_t resp_phys; + phys_addr_t sync_phys; + void *sync_va; + + /* mbuf mempool (used when a single memory area is supported) */ + void *mbuf_va; + phys_addr_t mbuf_phys; + + /* mbuf mempools */ + struct rte_avp_mempool_info pool[RTE_AVP_MAX_MEMPOOLS]; + +#ifdef __KERNEL__ + /* Ethernet info */ + char ethaddr[ETH_ALEN]; +#else + char ethaddr[ETHER_ADDR_LEN]; +#endif + + uint8_t mode; /**< device mode, i.e guest, host, trace */ + + /* mbuf size */ + unsigned int mbuf_size; + + /* + * unique id to differentiate between two instantiations of the same + * AVP device (i.e., the guest needs to know if the device has been + * deleted and recreated). + */ + uint64_t device_id; + + uint32_t max_rx_pkt_len; /**< Maximum receive unit size */ +}; + +#define RTE_AVP_MAX_QUEUES 8 /**< Maximum number of queues per device */ + +/** Maximum number of chained mbufs in a packet */ +#define RTE_AVP_MAX_MBUF_SEGMENTS 5 + +#define RTE_AVP_DEVICE "avp" + +#define RTE_AVP_IOCTL_TEST _IOWR(0, 1, int) +#define RTE_AVP_IOCTL_CREATE _IOWR(0, 2, struct rte_avp_device_info) +#define RTE_AVP_IOCTL_RELEASE _IOWR(0, 3, struct rte_avp_device_info) +#define RTE_AVP_IOCTL_QUERY _IOWR(0, 4, struct rte_avp_device_config) + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_AVP_COMMON_H_ */ diff --git a/src/seastar/dpdk/drivers/net/avp/rte_avp_fifo.h b/src/seastar/dpdk/drivers/net/avp/rte_avp_fifo.h new file mode 100644 index 00000000..803eb80a --- /dev/null +++ b/src/seastar/dpdk/drivers/net/avp/rte_avp_fifo.h @@ -0,0 +1,169 @@ +/*- + * This file is provided under a dual BSD/LGPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GNU LESSER GENERAL PUBLIC LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * Copyright(c) 2014 Wind River Systems, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * Contact Information: + * Wind River Systems, Inc. + * + * + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * Copyright(c) 2013-2017 Wind River Systems, Inc. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _RTE_AVP_FIFO_H_ +#define _RTE_AVP_FIFO_H_ + +#include "rte_avp_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __KERNEL__ +/* Write memory barrier for kernel compiles */ +#define AVP_WMB() smp_wmb() +/* Read memory barrier for kernel compiles */ +#define AVP_RMB() smp_rmb() +#else +/* Write memory barrier for userspace compiles */ +#define AVP_WMB() rte_wmb() +/* Read memory barrier for userspace compiles */ +#define AVP_RMB() rte_rmb() +#endif + +#ifndef __KERNEL__ +#include <rte_debug.h> + +/** + * Initializes the avp fifo structure + */ +static inline void +avp_fifo_init(struct rte_avp_fifo *fifo, unsigned int size) +{ + /* Ensure size is power of 2 */ + if (size & (size - 1)) + rte_panic("AVP fifo size must be power of 2\n"); + + fifo->write = 0; + fifo->read = 0; + fifo->len = size; + fifo->elem_size = sizeof(void *); +} +#endif + +/** + * Adds num elements into the fifo. Return the number actually written + */ +static inline unsigned +avp_fifo_put(struct rte_avp_fifo *fifo, void **data, unsigned int num) +{ + unsigned int i = 0; + unsigned int fifo_write = fifo->write; + unsigned int fifo_read = fifo->read; + unsigned int new_write = fifo_write; + + for (i = 0; i < num; i++) { + new_write = (new_write + 1) & (fifo->len - 1); + + if (new_write == fifo_read) + break; + fifo->buffer[fifo_write] = data[i]; + fifo_write = new_write; + } + AVP_WMB(); + fifo->write = fifo_write; + return i; +} + +/** + * Get up to num elements from the fifo. Return the number actually read + */ +static inline unsigned int +avp_fifo_get(struct rte_avp_fifo *fifo, void **data, unsigned int num) +{ + unsigned int i = 0; + unsigned int new_read = fifo->read; + unsigned int fifo_write = fifo->write; + + if (new_read == fifo_write) + return 0; /* empty */ + + for (i = 0; i < num; i++) { + if (new_read == fifo_write) + break; + + data[i] = fifo->buffer[new_read]; + new_read = (new_read + 1) & (fifo->len - 1); + } + AVP_RMB(); + fifo->read = new_read; + return i; +} + +/** + * Get the num of elements in the fifo + */ +static inline unsigned int +avp_fifo_count(struct rte_avp_fifo *fifo) +{ + return (fifo->len + fifo->write - fifo->read) & (fifo->len - 1); +} + +/** + * Get the num of available elements in the fifo + */ +static inline unsigned int +avp_fifo_free_count(struct rte_avp_fifo *fifo) +{ + return (fifo->read - fifo->write - 1) & (fifo->len - 1); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_AVP_FIFO_H_ */ diff --git a/src/seastar/dpdk/drivers/net/avp/rte_pmd_avp_version.map b/src/seastar/dpdk/drivers/net/avp/rte_pmd_avp_version.map new file mode 100644 index 00000000..af8f3f47 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/avp/rte_pmd_avp_version.map @@ -0,0 +1,4 @@ +DPDK_17.05 { + + local: *; +}; |