diff options
Diffstat (limited to 'src/spdk/dpdk/drivers/net/memif')
-rw-r--r-- | src/spdk/dpdk/drivers/net/memif/Makefile | 26 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/memif/memif.h | 179 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/memif/memif_socket.c | 1115 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/memif/memif_socket.h | 109 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/memif/meson.build | 12 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/memif/rte_eth_memif.c | 1816 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/memif/rte_eth_memif.h | 215 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/memif/rte_pmd_memif_version.map | 3 |
8 files changed, 3475 insertions, 0 deletions
diff --git a/src/spdk/dpdk/drivers/net/memif/Makefile b/src/spdk/dpdk/drivers/net/memif/Makefile new file mode 100644 index 000000000..3bf4ddce4 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/memif/Makefile @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018-2019 Cisco Systems, Inc. All rights reserved. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_pmd_memif.a + +EXPORT_MAP := rte_pmd_memif_version.map + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool +LDLIBS += -lrte_ethdev -lrte_kvargs -lrte_net +LDLIBS += -lrte_hash +LDLIBS += -lrte_bus_vdev + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += rte_eth_memif.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += memif_socket.c + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/drivers/net/memif/memif.h b/src/spdk/dpdk/drivers/net/memif/memif.h new file mode 100644 index 000000000..b91230890 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/memif/memif.h @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018-2019 Cisco Systems, Inc. All rights reserved. + */ + +#ifndef _MEMIF_H_ +#define _MEMIF_H_ + +#define MEMIF_COOKIE 0x3E31F20 +#define MEMIF_VERSION_MAJOR 2 +#define MEMIF_VERSION_MINOR 0 +#define MEMIF_VERSION ((MEMIF_VERSION_MAJOR << 8) | MEMIF_VERSION_MINOR) +#define MEMIF_NAME_SZ 32 + +/* + * S2M: direction slave -> master + * M2S: direction master -> slave + */ + +/* + * Type definitions + */ + +typedef enum memif_msg_type { + MEMIF_MSG_TYPE_NONE, + MEMIF_MSG_TYPE_ACK, + MEMIF_MSG_TYPE_HELLO, + MEMIF_MSG_TYPE_INIT, + MEMIF_MSG_TYPE_ADD_REGION, + MEMIF_MSG_TYPE_ADD_RING, + MEMIF_MSG_TYPE_CONNECT, + MEMIF_MSG_TYPE_CONNECTED, + MEMIF_MSG_TYPE_DISCONNECT, +} memif_msg_type_t; + +typedef enum { + MEMIF_RING_S2M, /**< buffer ring in direction slave -> master */ + MEMIF_RING_M2S, /**< buffer ring in direction master -> slave */ +} memif_ring_type_t; + +typedef enum { + MEMIF_INTERFACE_MODE_ETHERNET, + MEMIF_INTERFACE_MODE_IP, + MEMIF_INTERFACE_MODE_PUNT_INJECT, +} memif_interface_mode_t; + +typedef uint16_t memif_region_index_t; +typedef uint32_t memif_region_offset_t; +typedef uint64_t memif_region_size_t; +typedef uint16_t memif_ring_index_t; +typedef uint32_t memif_interface_id_t; +typedef uint16_t memif_version_t; +typedef uint8_t memif_log2_ring_size_t; + +/* + * Socket messages + */ + + /** + * M2S + * Contains master interfaces configuration. + */ +typedef struct __rte_packed { + uint8_t name[MEMIF_NAME_SZ]; /**< Client app name. In this case DPDK version */ + memif_version_t min_version; /**< lowest supported memif version */ + memif_version_t max_version; /**< highest supported memif version */ + memif_region_index_t max_region; /**< maximum num of regions */ + memif_ring_index_t max_m2s_ring; /**< maximum num of M2S ring */ + memif_ring_index_t max_s2m_ring; /**< maximum num of S2M rings */ + memif_log2_ring_size_t max_log2_ring_size; /**< maximum ring size (as log2) */ +} memif_msg_hello_t; + +/** + * S2M + * Contains information required to identify interface + * to which the slave wants to connect. + */ +typedef struct __rte_packed { + memif_version_t version; /**< memif version */ + memif_interface_id_t id; /**< interface id */ + memif_interface_mode_t mode:8; /**< interface mode */ + uint8_t secret[24]; /**< optional security parameter */ + uint8_t name[MEMIF_NAME_SZ]; /**< Client app name. In this case DPDK version */ +} memif_msg_init_t; + +/** + * S2M + * Request master to add new shared memory region to master interface. + * Shared files file descriptor is passed in cmsghdr. + */ +typedef struct __rte_packed { + memif_region_index_t index; /**< shm regions index */ + memif_region_size_t size; /**< shm region size */ +} memif_msg_add_region_t; + +/** + * S2M + * Request master to add new ring to master interface. + */ +typedef struct __rte_packed { + uint16_t flags; /**< flags */ +#define MEMIF_MSG_ADD_RING_FLAG_S2M 1 /**< ring is in S2M direction */ + memif_ring_index_t index; /**< ring index */ + memif_region_index_t region; /**< region index on which this ring is located */ + memif_region_offset_t offset; /**< buffer start offset */ + memif_log2_ring_size_t log2_ring_size; /**< ring size (log2) */ + uint16_t private_hdr_size; /**< used for private metadata */ +} memif_msg_add_ring_t; + +/** + * S2M + * Finalize connection establishment. + */ +typedef struct __rte_packed { + uint8_t if_name[MEMIF_NAME_SZ]; /**< slave interface name */ +} memif_msg_connect_t; + +/** + * M2S + * Finalize connection establishment. + */ +typedef struct __rte_packed { + uint8_t if_name[MEMIF_NAME_SZ]; /**< master interface name */ +} memif_msg_connected_t; + +/** + * S2M & M2S + * Disconnect interfaces. + */ +typedef struct __rte_packed { + uint32_t code; /**< error code */ + uint8_t string[96]; /**< disconnect reason */ +} memif_msg_disconnect_t; + +typedef struct __rte_packed __rte_aligned(128) +{ + memif_msg_type_t type:16; + union { + memif_msg_hello_t hello; + memif_msg_init_t init; + memif_msg_add_region_t add_region; + memif_msg_add_ring_t add_ring; + memif_msg_connect_t connect; + memif_msg_connected_t connected; + memif_msg_disconnect_t disconnect; + }; +} memif_msg_t; + +/* + * Ring and Descriptor Layout + */ + +/** + * Buffer descriptor. + */ +typedef struct __rte_packed { + uint16_t flags; /**< flags */ +#define MEMIF_DESC_FLAG_NEXT 1 /**< is chained buffer */ + memif_region_index_t region; /**< region index on which the buffer is located */ + uint32_t length; /**< buffer length */ + memif_region_offset_t offset; /**< buffer offset */ + uint32_t metadata; +} memif_desc_t; + +#define MEMIF_CACHELINE_ALIGN_MARK(mark) \ + RTE_MARKER mark __rte_cache_aligned; + +typedef struct { + MEMIF_CACHELINE_ALIGN_MARK(cacheline0); + uint32_t cookie; /**< MEMIF_COOKIE */ + uint16_t flags; /**< flags */ +#define MEMIF_RING_FLAG_MASK_INT 1 /**< disable interrupt mode */ + uint16_t head; /**< pointer to ring buffer head */ + MEMIF_CACHELINE_ALIGN_MARK(cacheline1); + uint16_t tail; /**< pointer to ring buffer tail */ + MEMIF_CACHELINE_ALIGN_MARK(cacheline2); + memif_desc_t desc[0]; /**< buffer descriptors */ +} memif_ring_t; + +#endif /* _MEMIF_H_ */ diff --git a/src/spdk/dpdk/drivers/net/memif/memif_socket.c b/src/spdk/dpdk/drivers/net/memif/memif_socket.c new file mode 100644 index 000000000..67794cb6f --- /dev/null +++ b/src/spdk/dpdk/drivers/net/memif/memif_socket.c @@ -0,0 +1,1115 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018-2019 Cisco Systems, Inc. All rights reserved. + */ + +#include <stdlib.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <errno.h> + +#include <rte_version.h> +#include <rte_mbuf.h> +#include <rte_ether.h> +#include <rte_ethdev_driver.h> +#include <rte_ethdev_vdev.h> +#include <rte_malloc.h> +#include <rte_kvargs.h> +#include <rte_bus_vdev.h> +#include <rte_hash.h> +#include <rte_jhash.h> +#include <rte_string_fns.h> + +#include "rte_eth_memif.h" +#include "memif_socket.h" + +static void memif_intr_handler(void *arg); + +static ssize_t +memif_msg_send(int fd, memif_msg_t *msg, int afd) +{ + struct msghdr mh = { 0 }; + struct iovec iov[1]; + struct cmsghdr *cmsg; + char ctl[CMSG_SPACE(sizeof(int))]; + + iov[0].iov_base = msg; + iov[0].iov_len = sizeof(memif_msg_t); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + + if (afd > 0) { + memset(&ctl, 0, sizeof(ctl)); + mh.msg_control = ctl; + mh.msg_controllen = sizeof(ctl); + cmsg = CMSG_FIRSTHDR(&mh); + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + rte_memcpy(CMSG_DATA(cmsg), &afd, sizeof(int)); + } + + return sendmsg(fd, &mh, 0); +} + +static int +memif_msg_send_from_queue(struct memif_control_channel *cc) +{ + ssize_t size; + int ret = 0; + struct memif_msg_queue_elt *e; + + e = TAILQ_FIRST(&cc->msg_queue); + if (e == NULL) + return 0; + + size = memif_msg_send(cc->intr_handle.fd, &e->msg, e->fd); + if (size != sizeof(memif_msg_t)) { + MIF_LOG(ERR, "sendmsg fail: %s.", strerror(errno)); + ret = -1; + } else { + MIF_LOG(DEBUG, "Sent msg type %u.", e->msg.type); + } + TAILQ_REMOVE(&cc->msg_queue, e, next); + rte_free(e); + + return ret; +} + +static struct memif_msg_queue_elt * +memif_msg_enq(struct memif_control_channel *cc) +{ + struct memif_msg_queue_elt *e; + + e = rte_zmalloc("memif_msg", sizeof(struct memif_msg_queue_elt), 0); + if (e == NULL) { + MIF_LOG(ERR, "Failed to allocate control message."); + return NULL; + } + + e->fd = -1; + TAILQ_INSERT_TAIL(&cc->msg_queue, e, next); + + return e; +} + +void +memif_msg_enq_disconnect(struct memif_control_channel *cc, const char *reason, + int err_code) +{ + struct memif_msg_queue_elt *e; + struct pmd_internals *pmd; + memif_msg_disconnect_t *d; + + if (cc == NULL) { + MIF_LOG(DEBUG, "Missing control channel."); + return; + } + + e = memif_msg_enq(cc); + if (e == NULL) { + MIF_LOG(WARNING, "Failed to enqueue disconnect message."); + return; + } + + d = &e->msg.disconnect; + + e->msg.type = MEMIF_MSG_TYPE_DISCONNECT; + d->code = err_code; + + if (reason != NULL) { + strlcpy((char *)d->string, reason, sizeof(d->string)); + if (cc->dev != NULL) { + pmd = cc->dev->data->dev_private; + strlcpy(pmd->local_disc_string, reason, + sizeof(pmd->local_disc_string)); + } + } +} + +static int +memif_msg_enq_hello(struct memif_control_channel *cc) +{ + struct memif_msg_queue_elt *e = memif_msg_enq(cc); + memif_msg_hello_t *h; + + if (e == NULL) + return -1; + + h = &e->msg.hello; + + e->msg.type = MEMIF_MSG_TYPE_HELLO; + h->min_version = MEMIF_VERSION; + h->max_version = MEMIF_VERSION; + h->max_s2m_ring = ETH_MEMIF_MAX_NUM_Q_PAIRS; + h->max_m2s_ring = ETH_MEMIF_MAX_NUM_Q_PAIRS; + h->max_region = ETH_MEMIF_MAX_REGION_NUM - 1; + h->max_log2_ring_size = ETH_MEMIF_MAX_LOG2_RING_SIZE; + + strlcpy((char *)h->name, rte_version(), sizeof(h->name)); + + return 0; +} + +static int +memif_msg_receive_hello(struct rte_eth_dev *dev, memif_msg_t *msg) +{ + struct pmd_internals *pmd = dev->data->dev_private; + memif_msg_hello_t *h = &msg->hello; + + if (h->min_version > MEMIF_VERSION || h->max_version < MEMIF_VERSION) { + memif_msg_enq_disconnect(pmd->cc, "Incompatible memif version", 0); + return -1; + } + + /* Set parameters for active connection */ + pmd->run.num_s2m_rings = RTE_MIN(h->max_s2m_ring + 1, + pmd->cfg.num_s2m_rings); + pmd->run.num_m2s_rings = RTE_MIN(h->max_m2s_ring + 1, + pmd->cfg.num_m2s_rings); + pmd->run.log2_ring_size = RTE_MIN(h->max_log2_ring_size, + pmd->cfg.log2_ring_size); + pmd->run.pkt_buffer_size = pmd->cfg.pkt_buffer_size; + + strlcpy(pmd->remote_name, (char *)h->name, sizeof(pmd->remote_name)); + + MIF_LOG(DEBUG, "Connecting to %s.", pmd->remote_name); + + return 0; +} + +static int +memif_msg_receive_init(struct memif_control_channel *cc, memif_msg_t *msg) +{ + memif_msg_init_t *i = &msg->init; + struct memif_socket_dev_list_elt *elt; + struct pmd_internals *pmd; + struct rte_eth_dev *dev; + + if (i->version != MEMIF_VERSION) { + memif_msg_enq_disconnect(cc, "Incompatible memif version", 0); + return -1; + } + + if (cc->socket == NULL) { + memif_msg_enq_disconnect(cc, "Device error", 0); + return -1; + } + + /* Find device with requested ID */ + TAILQ_FOREACH(elt, &cc->socket->dev_queue, next) { + dev = elt->dev; + pmd = dev->data->dev_private; + if (((pmd->flags & ETH_MEMIF_FLAG_DISABLED) == 0) && + (pmd->id == i->id) && (pmd->role == MEMIF_ROLE_MASTER)) { + if (pmd->flags & (ETH_MEMIF_FLAG_CONNECTING | + ETH_MEMIF_FLAG_CONNECTED)) { + memif_msg_enq_disconnect(cc, + "Already connected", 0); + return -1; + } + + /* assign control channel to device */ + cc->dev = dev; + pmd->cc = cc; + + if (i->mode != MEMIF_INTERFACE_MODE_ETHERNET) { + memif_msg_enq_disconnect(pmd->cc, + "Only ethernet mode supported", + 0); + return -1; + } + + strlcpy(pmd->remote_name, (char *)i->name, + sizeof(pmd->remote_name)); + + if (*pmd->secret != '\0') { + if (*i->secret == '\0') { + memif_msg_enq_disconnect(pmd->cc, + "Secret required", 0); + return -1; + } + if (strncmp(pmd->secret, (char *)i->secret, + ETH_MEMIF_SECRET_SIZE) != 0) { + memif_msg_enq_disconnect(pmd->cc, + "Incorrect secret", 0); + return -1; + } + } + + pmd->flags |= ETH_MEMIF_FLAG_CONNECTING; + return 0; + } + } + + /* ID not found on this socket */ + MIF_LOG(DEBUG, "ID %u not found.", i->id); + memif_msg_enq_disconnect(cc, "ID not found", 0); + return -1; +} + +static int +memif_msg_receive_add_region(struct rte_eth_dev *dev, memif_msg_t *msg, + int fd) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct pmd_process_private *proc_private = dev->process_private; + memif_msg_add_region_t *ar = &msg->add_region; + struct memif_region *r; + + if (fd < 0) { + memif_msg_enq_disconnect(pmd->cc, "Missing region fd", 0); + return -1; + } + + if (ar->index >= ETH_MEMIF_MAX_REGION_NUM || + ar->index != proc_private->regions_num || + proc_private->regions[ar->index] != NULL) { + memif_msg_enq_disconnect(pmd->cc, "Invalid region index", 0); + return -1; + } + + r = rte_zmalloc("region", sizeof(struct memif_region), 0); + if (r == NULL) { + memif_msg_enq_disconnect(pmd->cc, "Failed to alloc memif region.", 0); + return -ENOMEM; + } + + r->fd = fd; + r->region_size = ar->size; + r->addr = NULL; + + proc_private->regions[ar->index] = r; + proc_private->regions_num++; + + return 0; +} + +static int +memif_msg_receive_add_ring(struct rte_eth_dev *dev, memif_msg_t *msg, int fd) +{ + struct pmd_internals *pmd = dev->data->dev_private; + memif_msg_add_ring_t *ar = &msg->add_ring; + struct memif_queue *mq; + + if (fd < 0) { + memif_msg_enq_disconnect(pmd->cc, "Missing interrupt fd", 0); + return -1; + } + + /* check if we have enough queues */ + if (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M) { + if (ar->index >= pmd->cfg.num_s2m_rings) { + memif_msg_enq_disconnect(pmd->cc, "Invalid ring index", 0); + return -1; + } + pmd->run.num_s2m_rings++; + } else { + if (ar->index >= pmd->cfg.num_m2s_rings) { + memif_msg_enq_disconnect(pmd->cc, "Invalid ring index", 0); + return -1; + } + pmd->run.num_m2s_rings++; + } + + mq = (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M) ? + dev->data->rx_queues[ar->index] : dev->data->tx_queues[ar->index]; + + mq->intr_handle.fd = fd; + mq->log2_ring_size = ar->log2_ring_size; + mq->region = ar->region; + mq->ring_offset = ar->offset; + + return 0; +} + +static int +memif_msg_receive_connect(struct rte_eth_dev *dev, memif_msg_t *msg) +{ + struct pmd_internals *pmd = dev->data->dev_private; + memif_msg_connect_t *c = &msg->connect; + int ret; + + ret = memif_connect(dev); + if (ret < 0) + return ret; + + strlcpy(pmd->remote_if_name, (char *)c->if_name, + sizeof(pmd->remote_if_name)); + MIF_LOG(INFO, "Remote interface %s connected.", pmd->remote_if_name); + + return 0; +} + +static int +memif_msg_receive_connected(struct rte_eth_dev *dev, memif_msg_t *msg) +{ + struct pmd_internals *pmd = dev->data->dev_private; + memif_msg_connected_t *c = &msg->connected; + int ret; + + ret = memif_connect(dev); + if (ret < 0) + return ret; + + strlcpy(pmd->remote_if_name, (char *)c->if_name, + sizeof(pmd->remote_if_name)); + MIF_LOG(INFO, "Remote interface %s connected.", pmd->remote_if_name); + + return 0; +} + +static int +memif_msg_receive_disconnect(struct rte_eth_dev *dev, memif_msg_t *msg) +{ + struct pmd_internals *pmd = dev->data->dev_private; + memif_msg_disconnect_t *d = &msg->disconnect; + + memset(pmd->remote_disc_string, 0, sizeof(pmd->remote_disc_string)); + strlcpy(pmd->remote_disc_string, (char *)d->string, + sizeof(pmd->remote_disc_string)); + + MIF_LOG(INFO, "Disconnect received: %s", pmd->remote_disc_string); + + memset(pmd->local_disc_string, 0, 96); + memif_disconnect(dev); + return 0; +} + +static int +memif_msg_enq_ack(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc); + if (e == NULL) + return -1; + + e->msg.type = MEMIF_MSG_TYPE_ACK; + + return 0; +} + +static int +memif_msg_enq_init(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc); + memif_msg_init_t *i = &e->msg.init; + + if (e == NULL) + return -1; + + i = &e->msg.init; + e->msg.type = MEMIF_MSG_TYPE_INIT; + i->version = MEMIF_VERSION; + i->id = pmd->id; + i->mode = MEMIF_INTERFACE_MODE_ETHERNET; + + strlcpy((char *)i->name, rte_version(), sizeof(i->name)); + + if (*pmd->secret != '\0') + strlcpy((char *)i->secret, pmd->secret, sizeof(i->secret)); + + return 0; +} + +static int +memif_msg_enq_add_region(struct rte_eth_dev *dev, uint8_t idx) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct pmd_process_private *proc_private = dev->process_private; + struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc); + memif_msg_add_region_t *ar; + struct memif_region *mr = proc_private->regions[idx]; + + if (e == NULL) + return -1; + + ar = &e->msg.add_region; + e->msg.type = MEMIF_MSG_TYPE_ADD_REGION; + e->fd = mr->fd; + ar->index = idx; + ar->size = mr->region_size; + + return 0; +} + +static int +memif_msg_enq_add_ring(struct rte_eth_dev *dev, uint8_t idx, + memif_ring_type_t type) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc); + struct memif_queue *mq; + memif_msg_add_ring_t *ar; + + if (e == NULL) + return -1; + + ar = &e->msg.add_ring; + mq = (type == MEMIF_RING_S2M) ? dev->data->tx_queues[idx] : + dev->data->rx_queues[idx]; + + e->msg.type = MEMIF_MSG_TYPE_ADD_RING; + e->fd = mq->intr_handle.fd; + ar->index = idx; + ar->offset = mq->ring_offset; + ar->region = mq->region; + ar->log2_ring_size = mq->log2_ring_size; + ar->flags = (type == MEMIF_RING_S2M) ? MEMIF_MSG_ADD_RING_FLAG_S2M : 0; + ar->private_hdr_size = 0; + + return 0; +} + +static int +memif_msg_enq_connect(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc); + memif_msg_connect_t *c; + + if (e == NULL) + return -1; + + c = &e->msg.connect; + e->msg.type = MEMIF_MSG_TYPE_CONNECT; + strlcpy((char *)c->if_name, dev->data->name, sizeof(c->if_name)); + + return 0; +} + +static int +memif_msg_enq_connected(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc); + memif_msg_connected_t *c; + + if (e == NULL) + return -1; + + c = &e->msg.connected; + e->msg.type = MEMIF_MSG_TYPE_CONNECTED; + strlcpy((char *)c->if_name, dev->data->name, sizeof(c->if_name)); + + return 0; +} + +static void +memif_intr_unregister_handler(struct rte_intr_handle *intr_handle, void *arg) +{ + struct memif_msg_queue_elt *elt; + struct memif_control_channel *cc = arg; + + /* close control channel fd */ + close(intr_handle->fd); + /* clear message queue */ + while ((elt = TAILQ_FIRST(&cc->msg_queue)) != NULL) { + TAILQ_REMOVE(&cc->msg_queue, elt, next); + rte_free(elt); + } + /* free control channel */ + rte_free(cc); +} + +void +memif_disconnect(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_msg_queue_elt *elt, *next; + struct memif_queue *mq; + struct rte_intr_handle *ih; + int i; + int ret; + + dev->data->dev_link.link_status = ETH_LINK_DOWN; + pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING; + pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTED; + + rte_spinlock_lock(&pmd->cc_lock); + if (pmd->cc != NULL) { + /* Clear control message queue (except disconnect message if any). */ + for (elt = TAILQ_FIRST(&pmd->cc->msg_queue); elt != NULL; elt = next) { + next = TAILQ_NEXT(elt, next); + if (elt->msg.type != MEMIF_MSG_TYPE_DISCONNECT) { + TAILQ_REMOVE(&pmd->cc->msg_queue, elt, next); + rte_free(elt); + } + } + /* send disconnect message (if there is any in queue) */ + memif_msg_send_from_queue(pmd->cc); + + /* at this point, there should be no more messages in queue */ + if (TAILQ_FIRST(&pmd->cc->msg_queue) != NULL) { + MIF_LOG(WARNING, + "Unexpected message(s) in message queue."); + } + + ih = &pmd->cc->intr_handle; + if (ih->fd > 0) { + ret = rte_intr_callback_unregister(ih, + memif_intr_handler, + pmd->cc); + /* + * If callback is active (disconnecting based on + * received control message). + */ + if (ret == -EAGAIN) { + ret = rte_intr_callback_unregister_pending(ih, + memif_intr_handler, + pmd->cc, + memif_intr_unregister_handler); + } else if (ret > 0) { + close(ih->fd); + rte_free(pmd->cc); + } + pmd->cc = NULL; + if (ret <= 0) + MIF_LOG(WARNING, + "Failed to unregister control channel callback."); + } + } + rte_spinlock_unlock(&pmd->cc_lock); + + /* unconfig interrupts */ + for (i = 0; i < pmd->cfg.num_s2m_rings; i++) { + if (pmd->role == MEMIF_ROLE_SLAVE) { + if (dev->data->tx_queues != NULL) + mq = dev->data->tx_queues[i]; + else + continue; + } else { + if (dev->data->rx_queues != NULL) + mq = dev->data->rx_queues[i]; + else + continue; + } + if (mq->intr_handle.fd > 0) { + close(mq->intr_handle.fd); + mq->intr_handle.fd = -1; + } + } + for (i = 0; i < pmd->cfg.num_m2s_rings; i++) { + if (pmd->role == MEMIF_ROLE_MASTER) { + if (dev->data->tx_queues != NULL) + mq = dev->data->tx_queues[i]; + else + continue; + } else { + if (dev->data->rx_queues != NULL) + mq = dev->data->rx_queues[i]; + else + continue; + } + if (mq->intr_handle.fd > 0) { + close(mq->intr_handle.fd); + mq->intr_handle.fd = -1; + } + } + + memif_free_regions(dev); + + /* reset connection configuration */ + memset(&pmd->run, 0, sizeof(pmd->run)); + + MIF_LOG(DEBUG, "Disconnected, id: %d, role: %s.", pmd->id, + (pmd->role == MEMIF_ROLE_MASTER) ? "master" : "slave"); +} + +static int +memif_msg_receive(struct memif_control_channel *cc) +{ + char ctl[CMSG_SPACE(sizeof(int)) + + CMSG_SPACE(sizeof(struct ucred))] = { 0 }; + struct msghdr mh = { 0 }; + struct iovec iov[1]; + memif_msg_t msg = { 0 }; + ssize_t size; + int ret = 0; + struct ucred *cr __rte_unused; + cr = 0; + struct cmsghdr *cmsg; + int afd = -1; + int i; + struct pmd_internals *pmd; + struct pmd_process_private *proc_private; + + iov[0].iov_base = (void *)&msg; + iov[0].iov_len = sizeof(memif_msg_t); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + mh.msg_control = ctl; + mh.msg_controllen = sizeof(ctl); + + size = recvmsg(cc->intr_handle.fd, &mh, 0); + if (size != sizeof(memif_msg_t)) { + MIF_LOG(DEBUG, "Invalid message size = %zd", size); + if (size > 0) + /* 0 means end-of-file, negative size means error, + * don't send further disconnect message in such cases. + */ + memif_msg_enq_disconnect(cc, "Invalid message size", 0); + return -1; + } + MIF_LOG(DEBUG, "Received msg type: %u.", msg.type); + + cmsg = CMSG_FIRSTHDR(&mh); + while (cmsg) { + if (cmsg->cmsg_level == SOL_SOCKET) { + if (cmsg->cmsg_type == SCM_CREDENTIALS) + cr = (struct ucred *)CMSG_DATA(cmsg); + else if (cmsg->cmsg_type == SCM_RIGHTS) + rte_memcpy(&afd, CMSG_DATA(cmsg), sizeof(int)); + } + cmsg = CMSG_NXTHDR(&mh, cmsg); + } + + if (cc->dev == NULL && msg.type != MEMIF_MSG_TYPE_INIT) { + MIF_LOG(DEBUG, "Unexpected message."); + memif_msg_enq_disconnect(cc, "Unexpected message", 0); + return -1; + } + + /* get device from hash data */ + switch (msg.type) { + case MEMIF_MSG_TYPE_ACK: + break; + case MEMIF_MSG_TYPE_HELLO: + ret = memif_msg_receive_hello(cc->dev, &msg); + if (ret < 0) + goto exit; + ret = memif_init_regions_and_queues(cc->dev); + if (ret < 0) + goto exit; + ret = memif_msg_enq_init(cc->dev); + if (ret < 0) + goto exit; + pmd = cc->dev->data->dev_private; + proc_private = cc->dev->process_private; + for (i = 0; i < proc_private->regions_num; i++) { + ret = memif_msg_enq_add_region(cc->dev, i); + if (ret < 0) + goto exit; + } + for (i = 0; i < pmd->run.num_s2m_rings; i++) { + ret = memif_msg_enq_add_ring(cc->dev, i, + MEMIF_RING_S2M); + if (ret < 0) + goto exit; + } + for (i = 0; i < pmd->run.num_m2s_rings; i++) { + ret = memif_msg_enq_add_ring(cc->dev, i, + MEMIF_RING_M2S); + if (ret < 0) + goto exit; + } + ret = memif_msg_enq_connect(cc->dev); + if (ret < 0) + goto exit; + break; + case MEMIF_MSG_TYPE_INIT: + /* + * This cc does not have an interface asociated with it. + * If suitable interface is found it will be assigned here. + */ + ret = memif_msg_receive_init(cc, &msg); + if (ret < 0) + goto exit; + ret = memif_msg_enq_ack(cc->dev); + if (ret < 0) + goto exit; + break; + case MEMIF_MSG_TYPE_ADD_REGION: + ret = memif_msg_receive_add_region(cc->dev, &msg, afd); + if (ret < 0) + goto exit; + ret = memif_msg_enq_ack(cc->dev); + if (ret < 0) + goto exit; + break; + case MEMIF_MSG_TYPE_ADD_RING: + ret = memif_msg_receive_add_ring(cc->dev, &msg, afd); + if (ret < 0) + goto exit; + ret = memif_msg_enq_ack(cc->dev); + if (ret < 0) + goto exit; + break; + case MEMIF_MSG_TYPE_CONNECT: + ret = memif_msg_receive_connect(cc->dev, &msg); + if (ret < 0) + goto exit; + ret = memif_msg_enq_connected(cc->dev); + if (ret < 0) + goto exit; + break; + case MEMIF_MSG_TYPE_CONNECTED: + ret = memif_msg_receive_connected(cc->dev, &msg); + break; + case MEMIF_MSG_TYPE_DISCONNECT: + ret = memif_msg_receive_disconnect(cc->dev, &msg); + if (ret < 0) + goto exit; + break; + default: + memif_msg_enq_disconnect(cc, "Unknown message type", 0); + ret = -1; + goto exit; + } + + exit: + return ret; +} + +static void +memif_intr_handler(void *arg) +{ + struct memif_control_channel *cc = arg; + int ret; + + ret = memif_msg_receive(cc); + /* if driver failed to assign device */ + if (cc->dev == NULL) { + memif_msg_send_from_queue(cc); + ret = rte_intr_callback_unregister_pending(&cc->intr_handle, + memif_intr_handler, + cc, + memif_intr_unregister_handler); + if (ret < 0) + MIF_LOG(WARNING, + "Failed to unregister control channel callback."); + return; + } + /* if memif_msg_receive failed */ + if (ret < 0) + goto disconnect; + + ret = memif_msg_send_from_queue(cc); + if (ret < 0) + goto disconnect; + + return; + + disconnect: + if (cc->dev == NULL) { + MIF_LOG(WARNING, "eth dev not allocated"); + return; + } + memif_disconnect(cc->dev); +} + +static void +memif_listener_handler(void *arg) +{ + struct memif_socket *socket = arg; + int sockfd; + int addr_len; + struct sockaddr_un client; + struct memif_control_channel *cc; + int ret; + + addr_len = sizeof(client); + sockfd = accept(socket->intr_handle.fd, (struct sockaddr *)&client, + (socklen_t *)&addr_len); + if (sockfd < 0) { + MIF_LOG(ERR, + "Failed to accept connection request on socket fd %d", + socket->intr_handle.fd); + return; + } + + MIF_LOG(DEBUG, "%s: Connection request accepted.", socket->filename); + + cc = rte_zmalloc("memif-cc", sizeof(struct memif_control_channel), 0); + if (cc == NULL) { + MIF_LOG(ERR, "Failed to allocate control channel."); + goto error; + } + + cc->intr_handle.fd = sockfd; + cc->intr_handle.type = RTE_INTR_HANDLE_EXT; + cc->socket = socket; + cc->dev = NULL; + TAILQ_INIT(&cc->msg_queue); + + ret = rte_intr_callback_register(&cc->intr_handle, memif_intr_handler, cc); + if (ret < 0) { + MIF_LOG(ERR, "Failed to register control channel callback."); + goto error; + } + + ret = memif_msg_enq_hello(cc); + if (ret < 0) { + MIF_LOG(ERR, "Failed to enqueue hello message."); + goto error; + } + ret = memif_msg_send_from_queue(cc); + if (ret < 0) + goto error; + + return; + + error: + if (sockfd >= 0) { + close(sockfd); + sockfd = -1; + } + if (cc != NULL) + rte_free(cc); +} + +static struct memif_socket * +memif_socket_create(char *key, uint8_t listener) +{ + struct memif_socket *sock; + struct sockaddr_un un; + int sockfd; + int ret; + int on = 1; + + sock = rte_zmalloc("memif-socket", sizeof(struct memif_socket), 0); + if (sock == NULL) { + MIF_LOG(ERR, "Failed to allocate memory for memif socket"); + return NULL; + } + + sock->listener = listener; + strlcpy(sock->filename, key, MEMIF_SOCKET_UN_SIZE); + TAILQ_INIT(&sock->dev_queue); + + if (listener != 0) { + sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0); + if (sockfd < 0) + goto error; + + un.sun_family = AF_UNIX; + strlcpy(un.sun_path, sock->filename, MEMIF_SOCKET_UN_SIZE); + + ret = setsockopt(sockfd, SOL_SOCKET, SO_PASSCRED, &on, + sizeof(on)); + if (ret < 0) + goto error; + + ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un)); + if (ret < 0) + goto error; + + ret = listen(sockfd, 1); + if (ret < 0) + goto error; + + MIF_LOG(DEBUG, "Memif listener socket %s created.", sock->filename); + + sock->intr_handle.fd = sockfd; + sock->intr_handle.type = RTE_INTR_HANDLE_EXT; + ret = rte_intr_callback_register(&sock->intr_handle, + memif_listener_handler, sock); + if (ret < 0) { + MIF_LOG(ERR, "Failed to register interrupt " + "callback for listener socket"); + return NULL; + } + } + + return sock; + + error: + MIF_LOG(ERR, "Failed to setup socket %s: %s", key, strerror(errno)); + if (sock != NULL) + rte_free(sock); + if (sockfd >= 0) + close(sockfd); + return NULL; +} + +static struct rte_hash * +memif_create_socket_hash(void) +{ + struct rte_hash_parameters params = { 0 }; + + params.name = MEMIF_SOCKET_HASH_NAME; + params.entries = 256; + params.key_len = MEMIF_SOCKET_UN_SIZE; + params.hash_func = rte_jhash; + params.hash_func_init_val = 0; + return rte_hash_create(¶ms); +} + +int +memif_socket_init(struct rte_eth_dev *dev, const char *socket_filename) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_socket *socket = NULL; + struct memif_socket_dev_list_elt *elt; + struct pmd_internals *tmp_pmd; + struct rte_hash *hash; + int ret; + char key[MEMIF_SOCKET_UN_SIZE]; + + hash = rte_hash_find_existing(MEMIF_SOCKET_HASH_NAME); + if (hash == NULL) { + hash = memif_create_socket_hash(); + if (hash == NULL) { + MIF_LOG(ERR, "Failed to create memif socket hash."); + return -1; + } + } + + memset(key, 0, MEMIF_SOCKET_UN_SIZE); + strlcpy(key, socket_filename, MEMIF_SOCKET_UN_SIZE); + ret = rte_hash_lookup_data(hash, key, (void **)&socket); + if (ret < 0) { + socket = memif_socket_create(key, + (pmd->role == MEMIF_ROLE_SLAVE) ? 0 : 1); + if (socket == NULL) + return -1; + ret = rte_hash_add_key_data(hash, key, socket); + if (ret < 0) { + MIF_LOG(ERR, "Failed to add socket to socket hash."); + return ret; + } + } + pmd->socket_filename = socket->filename; + + TAILQ_FOREACH(elt, &socket->dev_queue, next) { + tmp_pmd = elt->dev->data->dev_private; + if (tmp_pmd->id == pmd->id && tmp_pmd->role == pmd->role) { + MIF_LOG(ERR, "Two interfaces with the same id (%d) can " + "not have the same role.", pmd->id); + return -1; + } + } + + elt = rte_malloc("pmd-queue", sizeof(struct memif_socket_dev_list_elt), 0); + if (elt == NULL) { + MIF_LOG(ERR, "Failed to add device to socket device list."); + return -1; + } + elt->dev = dev; + TAILQ_INSERT_TAIL(&socket->dev_queue, elt, next); + + return 0; +} + +void +memif_socket_remove_device(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_socket *socket = NULL; + struct memif_socket_dev_list_elt *elt, *next; + struct rte_hash *hash; + int ret; + + hash = rte_hash_find_existing(MEMIF_SOCKET_HASH_NAME); + if (hash == NULL) + return; + + if (pmd->socket_filename == NULL) + return; + + if (rte_hash_lookup_data(hash, pmd->socket_filename, (void **)&socket) < 0) + return; + + for (elt = TAILQ_FIRST(&socket->dev_queue); elt != NULL; elt = next) { + next = TAILQ_NEXT(elt, next); + if (elt->dev == dev) { + TAILQ_REMOVE(&socket->dev_queue, elt, next); + rte_free(elt); + pmd->socket_filename = NULL; + } + } + + /* remove socket, if this was the last device using it */ + if (TAILQ_EMPTY(&socket->dev_queue)) { + rte_hash_del_key(hash, socket->filename); + if (socket->listener) { + /* remove listener socket file, + * so we can create new one later. + */ + ret = remove(socket->filename); + if (ret < 0) + MIF_LOG(ERR, "Failed to remove socket file: %s", + socket->filename); + } + rte_free(socket); + } +} + +int +memif_connect_master(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + + memset(pmd->local_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE); + memset(pmd->remote_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE); + pmd->flags &= ~ETH_MEMIF_FLAG_DISABLED; + return 0; +} + +int +memif_connect_slave(struct rte_eth_dev *dev) +{ + int sockfd; + int ret; + struct sockaddr_un sun; + struct pmd_internals *pmd = dev->data->dev_private; + + memset(pmd->local_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE); + memset(pmd->remote_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE); + pmd->flags &= ~ETH_MEMIF_FLAG_DISABLED; + + sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0); + if (sockfd < 0) { + MIF_LOG(ERR, "Failed to open socket."); + return -1; + } + + sun.sun_family = AF_UNIX; + + memcpy(sun.sun_path, pmd->socket_filename, sizeof(sun.sun_path) - 1); + + ret = connect(sockfd, (struct sockaddr *)&sun, + sizeof(struct sockaddr_un)); + if (ret < 0) { + MIF_LOG(ERR, "Failed to connect socket: %s.", pmd->socket_filename); + goto error; + } + + MIF_LOG(DEBUG, "Memif socket: %s connected.", pmd->socket_filename); + + pmd->cc = rte_zmalloc("memif-cc", + sizeof(struct memif_control_channel), 0); + if (pmd->cc == NULL) { + MIF_LOG(ERR, "Failed to allocate control channel."); + goto error; + } + + pmd->cc->intr_handle.fd = sockfd; + pmd->cc->intr_handle.type = RTE_INTR_HANDLE_EXT; + pmd->cc->socket = NULL; + pmd->cc->dev = dev; + TAILQ_INIT(&pmd->cc->msg_queue); + + ret = rte_intr_callback_register(&pmd->cc->intr_handle, + memif_intr_handler, pmd->cc); + if (ret < 0) { + MIF_LOG(ERR, "Failed to register interrupt callback for control fd"); + goto error; + } + + return 0; + + error: + if (sockfd >= 0) { + close(sockfd); + sockfd = -1; + } + if (pmd->cc != NULL) { + rte_free(pmd->cc); + pmd->cc = NULL; + } + return -1; +} diff --git a/src/spdk/dpdk/drivers/net/memif/memif_socket.h b/src/spdk/dpdk/drivers/net/memif/memif_socket.h new file mode 100644 index 000000000..5c49ec24e --- /dev/null +++ b/src/spdk/dpdk/drivers/net/memif/memif_socket.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018-2019 Cisco Systems, Inc. All rights reserved. + */ + +#ifndef _MEMIF_SOCKET_H_ +#define _MEMIF_SOCKET_H_ + +#include <sys/queue.h> +#include <sys/un.h> + +/** + * Remove device from socket device list. If no device is left on the socket, + * remove the socket as well. + * + * @param dev + * memif device + */ +void memif_socket_remove_device(struct rte_eth_dev *dev); + +/** + * Enqueue disconnect message to control channel message queue. + * + * @param cc + * control channel + * @param reason + * const string stating disconnect reason (96 characters) + * @param err_code + * error code + */ +void memif_msg_enq_disconnect(struct memif_control_channel *cc, const char *reason, + int err_code); + +/** + * Initialize memif socket for specified device. If socket doesn't exist, create socket. + * + * @param dev + * memif device + * @param socket_filename + * socket filename + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int memif_socket_init(struct rte_eth_dev *dev, const char *socket_filename); + +/** + * Disconnect memif device. Close control channel and shared memory. + * + * @param dev + * memif device + */ +void memif_disconnect(struct rte_eth_dev *dev); + +/** + * If device is properly configured, enable connection establishment. + * + * @param dev + * memif device + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int memif_connect_master(struct rte_eth_dev *dev); + +/** + * If device is properly configured, send connection request. + * + * @param dev + * memif device + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int memif_connect_slave(struct rte_eth_dev *dev); + +struct memif_socket_dev_list_elt { + TAILQ_ENTRY(memif_socket_dev_list_elt) next; + struct rte_eth_dev *dev; /**< pointer to device internals */ + char dev_name[RTE_ETH_NAME_MAX_LEN]; +}; + +#define MEMIF_SOCKET_HASH_NAME "memif-sh" +#define MEMIF_SOCKET_UN_SIZE \ + (sizeof(struct sockaddr_un) - offsetof(struct sockaddr_un, sun_path)) + +struct memif_socket { + struct rte_intr_handle intr_handle; /**< interrupt handle */ + char filename[MEMIF_SOCKET_UN_SIZE]; /**< socket filename */ + + TAILQ_HEAD(, memif_socket_dev_list_elt) dev_queue; + /**< Queue of devices using this socket */ + uint8_t listener; /**< if not zero socket is listener */ +}; + +/* Control message queue. */ +struct memif_msg_queue_elt { + memif_msg_t msg; /**< control message */ + TAILQ_ENTRY(memif_msg_queue_elt) next; + int fd; /**< fd to be sent to peer */ +}; + +struct memif_control_channel { + struct rte_intr_handle intr_handle; /**< interrupt handle */ + TAILQ_HEAD(, memif_msg_queue_elt) msg_queue; /**< control message queue */ + struct memif_socket *socket; /**< pointer to socket */ + struct rte_eth_dev *dev; /**< pointer to device */ +}; + +#endif /* MEMIF_SOCKET_H */ diff --git a/src/spdk/dpdk/drivers/net/memif/meson.build b/src/spdk/dpdk/drivers/net/memif/meson.build new file mode 100644 index 000000000..9c3ba432d --- /dev/null +++ b/src/spdk/dpdk/drivers/net/memif/meson.build @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018-2019 Cisco Systems, Inc. All rights reserved. + +if not is_linux + build = false + reason = 'only supported on Linux' +endif + +sources = files('rte_eth_memif.c', + 'memif_socket.c') + +deps += ['hash'] diff --git a/src/spdk/dpdk/drivers/net/memif/rte_eth_memif.c b/src/spdk/dpdk/drivers/net/memif/rte_eth_memif.c new file mode 100644 index 000000000..b6da9a8b4 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/memif/rte_eth_memif.c @@ -0,0 +1,1816 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018-2019 Cisco Systems, Inc. All rights reserved. + */ + +#include <stdlib.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <linux/if_ether.h> +#include <errno.h> +#include <sys/eventfd.h> + +#include <rte_version.h> +#include <rte_mbuf.h> +#include <rte_ether.h> +#include <rte_ethdev_driver.h> +#include <rte_ethdev_vdev.h> +#include <rte_malloc.h> +#include <rte_kvargs.h> +#include <rte_bus_vdev.h> +#include <rte_string_fns.h> +#include <rte_errno.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_eal_memconfig.h> + +#include "rte_eth_memif.h" +#include "memif_socket.h" + +#define ETH_MEMIF_ID_ARG "id" +#define ETH_MEMIF_ROLE_ARG "role" +#define ETH_MEMIF_PKT_BUFFER_SIZE_ARG "bsize" +#define ETH_MEMIF_RING_SIZE_ARG "rsize" +#define ETH_MEMIF_SOCKET_ARG "socket" +#define ETH_MEMIF_MAC_ARG "mac" +#define ETH_MEMIF_ZC_ARG "zero-copy" +#define ETH_MEMIF_SECRET_ARG "secret" + +static const char * const valid_arguments[] = { + ETH_MEMIF_ID_ARG, + ETH_MEMIF_ROLE_ARG, + ETH_MEMIF_PKT_BUFFER_SIZE_ARG, + ETH_MEMIF_RING_SIZE_ARG, + ETH_MEMIF_SOCKET_ARG, + ETH_MEMIF_MAC_ARG, + ETH_MEMIF_ZC_ARG, + ETH_MEMIF_SECRET_ARG, + NULL +}; + +static const struct rte_eth_link pmd_link = { + .link_speed = ETH_SPEED_NUM_10G, + .link_duplex = ETH_LINK_FULL_DUPLEX, + .link_status = ETH_LINK_DOWN, + .link_autoneg = ETH_LINK_AUTONEG +}; + +#define MEMIF_MP_SEND_REGION "memif_mp_send_region" + + +static int memif_region_init_zc(const struct rte_memseg_list *msl, + const struct rte_memseg *ms, void *arg); + +const char * +memif_version(void) +{ + return ("memif-" RTE_STR(MEMIF_VERSION_MAJOR) "." RTE_STR(MEMIF_VERSION_MINOR)); +} + +/* Message header to synchronize regions */ +struct mp_region_msg { + char port_name[RTE_DEV_NAME_MAX_LEN]; + memif_region_index_t idx; + memif_region_size_t size; +}; + +static int +memif_mp_send_region(const struct rte_mp_msg *msg, const void *peer) +{ + struct rte_eth_dev *dev; + struct pmd_process_private *proc_private; + const struct mp_region_msg *msg_param = (const struct mp_region_msg *)msg->param; + struct rte_mp_msg reply; + struct mp_region_msg *reply_param = (struct mp_region_msg *)reply.param; + uint16_t port_id; + int ret; + + /* Get requested port */ + ret = rte_eth_dev_get_port_by_name(msg_param->port_name, &port_id); + if (ret) { + MIF_LOG(ERR, "Failed to get port id for %s", + msg_param->port_name); + return -1; + } + dev = &rte_eth_devices[port_id]; + proc_private = dev->process_private; + + memset(&reply, 0, sizeof(reply)); + strlcpy(reply.name, msg->name, sizeof(reply.name)); + reply_param->idx = msg_param->idx; + if (proc_private->regions[msg_param->idx] != NULL) { + reply_param->size = proc_private->regions[msg_param->idx]->region_size; + reply.fds[0] = proc_private->regions[msg_param->idx]->fd; + reply.num_fds = 1; + } + reply.len_param = sizeof(*reply_param); + if (rte_mp_reply(&reply, peer) < 0) { + MIF_LOG(ERR, "Failed to reply to an add region request"); + return -1; + } + + return 0; +} + +/* + * Request regions + * Called by secondary process, when ports link status goes up. + */ +static int +memif_mp_request_regions(struct rte_eth_dev *dev) +{ + int ret, i; + struct timespec timeout = {.tv_sec = 5, .tv_nsec = 0}; + struct rte_mp_msg msg, *reply; + struct rte_mp_reply replies; + struct mp_region_msg *msg_param = (struct mp_region_msg *)msg.param; + struct mp_region_msg *reply_param; + struct memif_region *r; + struct pmd_process_private *proc_private = dev->process_private; + struct pmd_internals *pmd = dev->data->dev_private; + /* in case of zero-copy slave, only request region 0 */ + uint16_t max_region_num = (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) ? + 1 : ETH_MEMIF_MAX_REGION_NUM; + + MIF_LOG(DEBUG, "Requesting memory regions"); + + for (i = 0; i < max_region_num; i++) { + /* Prepare the message */ + memset(&msg, 0, sizeof(msg)); + strlcpy(msg.name, MEMIF_MP_SEND_REGION, sizeof(msg.name)); + strlcpy(msg_param->port_name, dev->data->name, + sizeof(msg_param->port_name)); + msg_param->idx = i; + msg.len_param = sizeof(*msg_param); + + /* Send message */ + ret = rte_mp_request_sync(&msg, &replies, &timeout); + if (ret < 0 || replies.nb_received != 1) { + MIF_LOG(ERR, "Failed to send mp msg: %d", + rte_errno); + return -1; + } + + reply = &replies.msgs[0]; + reply_param = (struct mp_region_msg *)reply->param; + + if (reply_param->size > 0) { + r = rte_zmalloc("region", sizeof(struct memif_region), 0); + if (r == NULL) { + MIF_LOG(ERR, "Failed to alloc memif region."); + free(reply); + return -ENOMEM; + } + r->region_size = reply_param->size; + if (reply->num_fds < 1) { + MIF_LOG(ERR, "Missing file descriptor."); + free(reply); + return -1; + } + r->fd = reply->fds[0]; + r->addr = NULL; + + proc_private->regions[reply_param->idx] = r; + proc_private->regions_num++; + } + free(reply); + } + + if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) { + ret = rte_memseg_walk(memif_region_init_zc, (void *)proc_private); + if (ret < 0) + return ret; + } + + return memif_connect(dev); +} + +static int +memif_dev_info(struct rte_eth_dev *dev __rte_unused, struct rte_eth_dev_info *dev_info) +{ + dev_info->max_mac_addrs = 1; + dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN; + dev_info->max_rx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS; + dev_info->max_tx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS; + dev_info->min_rx_bufsize = 0; + + return 0; +} + +static memif_ring_t * +memif_get_ring(struct pmd_internals *pmd, struct pmd_process_private *proc_private, + memif_ring_type_t type, uint16_t ring_num) +{ + /* rings only in region 0 */ + void *p = proc_private->regions[0]->addr; + int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) * + (1 << pmd->run.log2_ring_size); + + p = (uint8_t *)p + (ring_num + type * pmd->run.num_s2m_rings) * ring_size; + + return (memif_ring_t *)p; +} + +static memif_region_offset_t +memif_get_ring_offset(struct rte_eth_dev *dev, struct memif_queue *mq, + memif_ring_type_t type, uint16_t num) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct pmd_process_private *proc_private = dev->process_private; + + return ((uint8_t *)memif_get_ring(pmd, proc_private, type, num) - + (uint8_t *)proc_private->regions[mq->region]->addr); +} + +static memif_ring_t * +memif_get_ring_from_queue(struct pmd_process_private *proc_private, + struct memif_queue *mq) +{ + struct memif_region *r; + + r = proc_private->regions[mq->region]; + if (r == NULL) + return NULL; + + return (memif_ring_t *)((uint8_t *)r->addr + mq->ring_offset); +} + +static void * +memif_get_buffer(struct pmd_process_private *proc_private, memif_desc_t *d) +{ + return ((uint8_t *)proc_private->regions[d->region]->addr + d->offset); +} + +/* Free mbufs received by master */ +static void +memif_free_stored_mbufs(struct pmd_process_private *proc_private, struct memif_queue *mq) +{ + uint16_t mask = (1 << mq->log2_ring_size) - 1; + memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq); + + /* FIXME: improve performance */ + while (mq->last_tail != ring->tail) { + RTE_MBUF_PREFETCH_TO_FREE(mq->buffers[(mq->last_tail + 1) & mask]); + /* Decrement refcnt and free mbuf. (current segment) */ + rte_mbuf_refcnt_update(mq->buffers[mq->last_tail & mask], -1); + rte_pktmbuf_free_seg(mq->buffers[mq->last_tail & mask]); + mq->last_tail++; + } +} + +static int +memif_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *cur_tail, + struct rte_mbuf *tail) +{ + /* Check for number-of-segments-overflow */ + if (unlikely(head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS)) + return -EOVERFLOW; + + /* Chain 'tail' onto the old tail */ + cur_tail->next = tail; + + /* accumulate number of segments and total length. */ + head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs); + + tail->pkt_len = tail->data_len; + head->pkt_len += tail->pkt_len; + + return 0; +} + +static uint16_t +eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct memif_queue *mq = queue; + struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private; + struct pmd_process_private *proc_private = + rte_eth_devices[mq->in_port].process_private; + memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq); + uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0; + uint16_t n_rx_pkts = 0; + uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) - + RTE_PKTMBUF_HEADROOM; + uint16_t src_len, src_off, dst_len, dst_off, cp_len; + memif_ring_type_t type = mq->type; + memif_desc_t *d0; + struct rte_mbuf *mbuf, *mbuf_head, *mbuf_tail; + uint64_t b; + ssize_t size __rte_unused; + uint16_t head; + int ret; + struct rte_eth_link link; + + if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0)) + return 0; + if (unlikely(ring == NULL)) { + /* Secondary process will attempt to request regions. */ + ret = rte_eth_link_get(mq->in_port, &link); + if (ret < 0) + MIF_LOG(ERR, "Failed to get port %u link info: %s", + mq->in_port, rte_strerror(-ret)); + return 0; + } + + /* consume interrupt */ + if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) + size = read(mq->intr_handle.fd, &b, sizeof(b)); + + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; + + if (type == MEMIF_RING_S2M) { + cur_slot = mq->last_head; + last_slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE); + } else { + cur_slot = mq->last_tail; + last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE); + } + + if (cur_slot == last_slot) + goto refill; + n_slots = last_slot - cur_slot; + + while (n_slots && n_rx_pkts < nb_pkts) { + mbuf_head = rte_pktmbuf_alloc(mq->mempool); + if (unlikely(mbuf_head == NULL)) + goto no_free_bufs; + mbuf = mbuf_head; + mbuf->port = mq->in_port; + +next_slot: + s0 = cur_slot & mask; + d0 = &ring->desc[s0]; + + src_len = d0->length; + dst_off = 0; + src_off = 0; + + do { + dst_len = mbuf_size - dst_off; + if (dst_len == 0) { + dst_off = 0; + dst_len = mbuf_size; + + /* store pointer to tail */ + mbuf_tail = mbuf; + mbuf = rte_pktmbuf_alloc(mq->mempool); + if (unlikely(mbuf == NULL)) + goto no_free_bufs; + mbuf->port = mq->in_port; + ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf); + if (unlikely(ret < 0)) { + MIF_LOG(ERR, "number-of-segments-overflow"); + rte_pktmbuf_free(mbuf); + goto no_free_bufs; + } + } + cp_len = RTE_MIN(dst_len, src_len); + + rte_pktmbuf_data_len(mbuf) += cp_len; + rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf); + if (mbuf != mbuf_head) + rte_pktmbuf_pkt_len(mbuf_head) += cp_len; + + memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, dst_off), + (uint8_t *)memif_get_buffer(proc_private, d0) + src_off, + cp_len); + + src_off += cp_len; + dst_off += cp_len; + src_len -= cp_len; + } while (src_len); + + cur_slot++; + n_slots--; + + if (d0->flags & MEMIF_DESC_FLAG_NEXT) + goto next_slot; + + mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head); + *bufs++ = mbuf_head; + n_rx_pkts++; + } + +no_free_bufs: + if (type == MEMIF_RING_S2M) { + __atomic_store_n(&ring->tail, cur_slot, __ATOMIC_RELEASE); + mq->last_head = cur_slot; + } else { + mq->last_tail = cur_slot; + } + +refill: + if (type == MEMIF_RING_M2S) { + head = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE); + n_slots = ring_size - head + mq->last_tail; + + while (n_slots--) { + s0 = head++ & mask; + d0 = &ring->desc[s0]; + d0->length = pmd->run.pkt_buffer_size; + } + __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE); + } + + mq->n_pkts += n_rx_pkts; + return n_rx_pkts; +} + +static uint16_t +eth_memif_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct memif_queue *mq = queue; + struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private; + struct pmd_process_private *proc_private = + rte_eth_devices[mq->in_port].process_private; + memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq); + uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0, head; + uint16_t n_rx_pkts = 0; + memif_desc_t *d0; + struct rte_mbuf *mbuf, *mbuf_tail; + struct rte_mbuf *mbuf_head = NULL; + int ret; + struct rte_eth_link link; + + if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0)) + return 0; + if (unlikely(ring == NULL)) { + /* Secondary process will attempt to request regions. */ + rte_eth_link_get(mq->in_port, &link); + return 0; + } + + /* consume interrupt */ + if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) { + uint64_t b; + ssize_t size __rte_unused; + size = read(mq->intr_handle.fd, &b, sizeof(b)); + } + + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; + + cur_slot = mq->last_tail; + last_slot = ring->tail; + if (cur_slot == last_slot) + goto refill; + n_slots = last_slot - cur_slot; + + while (n_slots && n_rx_pkts < nb_pkts) { + s0 = cur_slot & mask; + + d0 = &ring->desc[s0]; + mbuf_head = mq->buffers[s0]; + mbuf = mbuf_head; + +next_slot: + /* prefetch next descriptor */ + if (n_rx_pkts + 1 < nb_pkts) + rte_prefetch0(&ring->desc[(cur_slot + 1) & mask]); + + mbuf->port = mq->in_port; + rte_pktmbuf_data_len(mbuf) = d0->length; + rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf); + + mq->n_bytes += rte_pktmbuf_data_len(mbuf); + + cur_slot++; + n_slots--; + if (d0->flags & MEMIF_DESC_FLAG_NEXT) { + s0 = cur_slot & mask; + d0 = &ring->desc[s0]; + mbuf_tail = mbuf; + mbuf = mq->buffers[s0]; + ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf); + if (unlikely(ret < 0)) { + MIF_LOG(ERR, "number-of-segments-overflow"); + goto refill; + } + goto next_slot; + } + + *bufs++ = mbuf_head; + n_rx_pkts++; + } + + mq->last_tail = cur_slot; + +/* Supply master with new buffers */ +refill: + head = ring->head; + n_slots = ring_size - head + mq->last_tail; + + if (n_slots < 32) + goto no_free_mbufs; + + ret = rte_pktmbuf_alloc_bulk(mq->mempool, &mq->buffers[head & mask], n_slots); + if (unlikely(ret < 0)) + goto no_free_mbufs; + + while (n_slots--) { + s0 = head++ & mask; + if (n_slots > 0) + rte_prefetch0(mq->buffers[head & mask]); + d0 = &ring->desc[s0]; + /* store buffer header */ + mbuf = mq->buffers[s0]; + /* populate descriptor */ + d0->length = rte_pktmbuf_data_room_size(mq->mempool) - + RTE_PKTMBUF_HEADROOM; + d0->region = 1; + d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) - + (uint8_t *)proc_private->regions[d0->region]->addr; + } +no_free_mbufs: + rte_mb(); + ring->head = head; + + mq->n_pkts += n_rx_pkts; + + return n_rx_pkts; +} + +static uint16_t +eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct memif_queue *mq = queue; + struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private; + struct pmd_process_private *proc_private = + rte_eth_devices[mq->in_port].process_private; + memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq); + uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0; + uint16_t src_len, src_off, dst_len, dst_off, cp_len; + memif_ring_type_t type = mq->type; + memif_desc_t *d0; + struct rte_mbuf *mbuf; + struct rte_mbuf *mbuf_head; + uint64_t a; + ssize_t size; + struct rte_eth_link link; + + if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0)) + return 0; + if (unlikely(ring == NULL)) { + int ret; + + /* Secondary process will attempt to request regions. */ + ret = rte_eth_link_get(mq->in_port, &link); + if (ret < 0) + MIF_LOG(ERR, "Failed to get port %u link info: %s", + mq->in_port, rte_strerror(-ret)); + return 0; + } + + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; + + n_free = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE) - mq->last_tail; + mq->last_tail += n_free; + + if (type == MEMIF_RING_S2M) { + slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE); + n_free = ring_size - slot + mq->last_tail; + } else { + slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE); + n_free = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE) - slot; + } + + while (n_tx_pkts < nb_pkts && n_free) { + mbuf_head = *bufs++; + mbuf = mbuf_head; + + saved_slot = slot; + d0 = &ring->desc[slot & mask]; + dst_off = 0; + dst_len = (type == MEMIF_RING_S2M) ? + pmd->run.pkt_buffer_size : d0->length; + +next_in_chain: + src_off = 0; + src_len = rte_pktmbuf_data_len(mbuf); + + while (src_len) { + if (dst_len == 0) { + if (n_free) { + slot++; + n_free--; + d0->flags |= MEMIF_DESC_FLAG_NEXT; + d0 = &ring->desc[slot & mask]; + dst_off = 0; + dst_len = (type == MEMIF_RING_S2M) ? + pmd->run.pkt_buffer_size : d0->length; + d0->flags = 0; + } else { + slot = saved_slot; + goto no_free_slots; + } + } + cp_len = RTE_MIN(dst_len, src_len); + + memcpy((uint8_t *)memif_get_buffer(proc_private, d0) + dst_off, + rte_pktmbuf_mtod_offset(mbuf, void *, src_off), + cp_len); + + mq->n_bytes += cp_len; + src_off += cp_len; + dst_off += cp_len; + src_len -= cp_len; + dst_len -= cp_len; + + d0->length = dst_off; + } + + if (rte_pktmbuf_is_contiguous(mbuf) == 0) { + mbuf = mbuf->next; + goto next_in_chain; + } + + n_tx_pkts++; + slot++; + n_free--; + rte_pktmbuf_free(mbuf_head); + } + +no_free_slots: + if (type == MEMIF_RING_S2M) + __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE); + else + __atomic_store_n(&ring->tail, slot, __ATOMIC_RELEASE); + + if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) { + a = 1; + size = write(mq->intr_handle.fd, &a, sizeof(a)); + if (unlikely(size < 0)) { + MIF_LOG(WARNING, + "Failed to send interrupt. %s", strerror(errno)); + } + } + + mq->n_pkts += n_tx_pkts; + return n_tx_pkts; +} + + +static int +memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq, + memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask, + uint16_t slot, uint16_t n_free) +{ + memif_desc_t *d0; + int used_slots = 1; + +next_in_chain: + /* store pointer to mbuf to free it later */ + mq->buffers[slot & mask] = mbuf; + /* Increment refcnt to make sure the buffer is not freed before master + * receives it. (current segment) + */ + rte_mbuf_refcnt_update(mbuf, 1); + /* populate descriptor */ + d0 = &ring->desc[slot & mask]; + d0->length = rte_pktmbuf_data_len(mbuf); + /* FIXME: get region index */ + d0->region = 1; + d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) - + (uint8_t *)proc_private->regions[d0->region]->addr; + d0->flags = 0; + + /* check if buffer is chained */ + if (rte_pktmbuf_is_contiguous(mbuf) == 0) { + if (n_free < 2) + return 0; + /* mark buffer as chained */ + d0->flags |= MEMIF_DESC_FLAG_NEXT; + /* advance mbuf */ + mbuf = mbuf->next; + /* update counters */ + used_slots++; + slot++; + n_free--; + goto next_in_chain; + } + return used_slots; +} + +static uint16_t +eth_memif_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct memif_queue *mq = queue; + struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private; + struct pmd_process_private *proc_private = + rte_eth_devices[mq->in_port].process_private; + memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq); + uint16_t slot, n_free, ring_size, mask, n_tx_pkts = 0; + memif_ring_type_t type = mq->type; + struct rte_eth_link link; + + if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0)) + return 0; + if (unlikely(ring == NULL)) { + /* Secondary process will attempt to request regions. */ + rte_eth_link_get(mq->in_port, &link); + return 0; + } + + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; + + /* free mbufs received by master */ + memif_free_stored_mbufs(proc_private, mq); + + /* ring type always MEMIF_RING_S2M */ + slot = ring->head; + n_free = ring_size - ring->head + mq->last_tail; + + int used_slots; + + while (n_free && (n_tx_pkts < nb_pkts)) { + while ((n_free > 4) && ((nb_pkts - n_tx_pkts) > 4)) { + if ((nb_pkts - n_tx_pkts) > 8) { + rte_prefetch0(*bufs + 4); + rte_prefetch0(*bufs + 5); + rte_prefetch0(*bufs + 6); + rte_prefetch0(*bufs + 7); + } + used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++, + mask, slot, n_free); + if (unlikely(used_slots < 1)) + goto no_free_slots; + n_tx_pkts++; + slot += used_slots; + n_free -= used_slots; + + used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++, + mask, slot, n_free); + if (unlikely(used_slots < 1)) + goto no_free_slots; + n_tx_pkts++; + slot += used_slots; + n_free -= used_slots; + + used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++, + mask, slot, n_free); + if (unlikely(used_slots < 1)) + goto no_free_slots; + n_tx_pkts++; + slot += used_slots; + n_free -= used_slots; + + used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++, + mask, slot, n_free); + if (unlikely(used_slots < 1)) + goto no_free_slots; + n_tx_pkts++; + slot += used_slots; + n_free -= used_slots; + } + used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++, + mask, slot, n_free); + if (unlikely(used_slots < 1)) + goto no_free_slots; + n_tx_pkts++; + slot += used_slots; + n_free -= used_slots; + } + +no_free_slots: + rte_mb(); + /* update ring pointers */ + if (type == MEMIF_RING_S2M) + ring->head = slot; + else + ring->tail = slot; + + /* Send interrupt, if enabled. */ + if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) { + uint64_t a = 1; + ssize_t size = write(mq->intr_handle.fd, &a, sizeof(a)); + if (unlikely(size < 0)) { + MIF_LOG(WARNING, + "Failed to send interrupt. %s", strerror(errno)); + } + } + + /* increment queue counters */ + mq->n_pkts += n_tx_pkts; + + return n_tx_pkts; +} + +void +memif_free_regions(struct rte_eth_dev *dev) +{ + struct pmd_process_private *proc_private = dev->process_private; + struct pmd_internals *pmd = dev->data->dev_private; + int i; + struct memif_region *r; + + /* regions are allocated contiguously, so it's + * enough to loop until 'proc_private->regions_num' + */ + for (i = 0; i < proc_private->regions_num; i++) { + r = proc_private->regions[i]; + if (r != NULL) { + /* This is memzone */ + if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) { + r->addr = NULL; + if (r->fd > 0) + close(r->fd); + } + if (r->addr != NULL) { + munmap(r->addr, r->region_size); + if (r->fd > 0) { + close(r->fd); + r->fd = -1; + } + } + rte_free(r); + proc_private->regions[i] = NULL; + } + } + proc_private->regions_num = 0; +} + +static int +memif_region_init_zc(const struct rte_memseg_list *msl, const struct rte_memseg *ms, + void *arg) +{ + struct pmd_process_private *proc_private = (struct pmd_process_private *)arg; + struct memif_region *r; + + if (proc_private->regions_num < 1) { + MIF_LOG(ERR, "Missing descriptor region"); + return -1; + } + + r = proc_private->regions[proc_private->regions_num - 1]; + + if (r->addr != msl->base_va) + r = proc_private->regions[++proc_private->regions_num - 1]; + + if (r == NULL) { + r = rte_zmalloc("region", sizeof(struct memif_region), 0); + if (r == NULL) { + MIF_LOG(ERR, "Failed to alloc memif region."); + return -ENOMEM; + } + + r->addr = msl->base_va; + r->region_size = ms->len; + r->fd = rte_memseg_get_fd(ms); + if (r->fd < 0) + return -1; + r->pkt_buffer_offset = 0; + + proc_private->regions[proc_private->regions_num - 1] = r; + } else { + r->region_size += ms->len; + } + + return 0; +} + +static int +memif_region_init_shm(struct rte_eth_dev *dev, uint8_t has_buffers) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct pmd_process_private *proc_private = dev->process_private; + char shm_name[ETH_MEMIF_SHM_NAME_SIZE]; + int ret = 0; + struct memif_region *r; + + if (proc_private->regions_num >= ETH_MEMIF_MAX_REGION_NUM) { + MIF_LOG(ERR, "Too many regions."); + return -1; + } + + r = rte_zmalloc("region", sizeof(struct memif_region), 0); + if (r == NULL) { + MIF_LOG(ERR, "Failed to alloc memif region."); + return -ENOMEM; + } + + /* calculate buffer offset */ + r->pkt_buffer_offset = (pmd->run.num_s2m_rings + pmd->run.num_m2s_rings) * + (sizeof(memif_ring_t) + sizeof(memif_desc_t) * + (1 << pmd->run.log2_ring_size)); + + r->region_size = r->pkt_buffer_offset; + /* if region has buffers, add buffers size to region_size */ + if (has_buffers == 1) + r->region_size += (uint32_t)(pmd->run.pkt_buffer_size * + (1 << pmd->run.log2_ring_size) * + (pmd->run.num_s2m_rings + + pmd->run.num_m2s_rings)); + + memset(shm_name, 0, sizeof(char) * ETH_MEMIF_SHM_NAME_SIZE); + snprintf(shm_name, ETH_MEMIF_SHM_NAME_SIZE, "memif_region_%d", + proc_private->regions_num); + + r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING); + if (r->fd < 0) { + MIF_LOG(ERR, "Failed to create shm file: %s.", strerror(errno)); + ret = -1; + goto error; + } + + ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK); + if (ret < 0) { + MIF_LOG(ERR, "Failed to add seals to shm file: %s.", strerror(errno)); + goto error; + } + + ret = ftruncate(r->fd, r->region_size); + if (ret < 0) { + MIF_LOG(ERR, "Failed to truncate shm file: %s.", strerror(errno)); + goto error; + } + + r->addr = mmap(NULL, r->region_size, PROT_READ | + PROT_WRITE, MAP_SHARED, r->fd, 0); + if (r->addr == MAP_FAILED) { + MIF_LOG(ERR, "Failed to mmap shm region: %s.", strerror(ret)); + ret = -1; + goto error; + } + + proc_private->regions[proc_private->regions_num] = r; + proc_private->regions_num++; + + return ret; + +error: + if (r->fd > 0) + close(r->fd); + r->fd = -1; + + return ret; +} + +static int +memif_regions_init(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + int ret; + + /* + * Zero-copy exposes dpdk memory. + * Each memseg list will be represented by memif region. + * Zero-copy regions indexing: memseg list idx + 1, + * as we already have region 0 reserved for descriptors. + */ + if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) { + /* create region idx 0 containing descriptors */ + ret = memif_region_init_shm(dev, 0); + if (ret < 0) + return ret; + ret = rte_memseg_walk(memif_region_init_zc, (void *)dev->process_private); + if (ret < 0) + return ret; + } else { + /* create one memory region contaning rings and buffers */ + ret = memif_region_init_shm(dev, /* has buffers */ 1); + if (ret < 0) + return ret; + } + + return 0; +} + +static void +memif_init_rings(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct pmd_process_private *proc_private = dev->process_private; + memif_ring_t *ring; + int i, j; + uint16_t slot; + + for (i = 0; i < pmd->run.num_s2m_rings; i++) { + ring = memif_get_ring(pmd, proc_private, MEMIF_RING_S2M, i); + __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED); + __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED); + ring->cookie = MEMIF_COOKIE; + ring->flags = 0; + + if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) + continue; + + for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) { + slot = i * (1 << pmd->run.log2_ring_size) + j; + ring->desc[j].region = 0; + ring->desc[j].offset = + proc_private->regions[0]->pkt_buffer_offset + + (uint32_t)(slot * pmd->run.pkt_buffer_size); + ring->desc[j].length = pmd->run.pkt_buffer_size; + } + } + + for (i = 0; i < pmd->run.num_m2s_rings; i++) { + ring = memif_get_ring(pmd, proc_private, MEMIF_RING_M2S, i); + __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED); + __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED); + ring->cookie = MEMIF_COOKIE; + ring->flags = 0; + + if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) + continue; + + for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) { + slot = (i + pmd->run.num_s2m_rings) * + (1 << pmd->run.log2_ring_size) + j; + ring->desc[j].region = 0; + ring->desc[j].offset = + proc_private->regions[0]->pkt_buffer_offset + + (uint32_t)(slot * pmd->run.pkt_buffer_size); + ring->desc[j].length = pmd->run.pkt_buffer_size; + } + } +} + +/* called only by slave */ +static int +memif_init_queues(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_queue *mq; + int i; + + for (i = 0; i < pmd->run.num_s2m_rings; i++) { + mq = dev->data->tx_queues[i]; + mq->log2_ring_size = pmd->run.log2_ring_size; + /* queues located only in region 0 */ + mq->region = 0; + mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_S2M, i); + mq->last_head = 0; + mq->last_tail = 0; + mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK); + if (mq->intr_handle.fd < 0) { + MIF_LOG(WARNING, + "Failed to create eventfd for tx queue %d: %s.", i, + strerror(errno)); + } + mq->buffers = NULL; + if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) { + mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) * + (1 << mq->log2_ring_size), 0); + if (mq->buffers == NULL) + return -ENOMEM; + } + } + + for (i = 0; i < pmd->run.num_m2s_rings; i++) { + mq = dev->data->rx_queues[i]; + mq->log2_ring_size = pmd->run.log2_ring_size; + /* queues located only in region 0 */ + mq->region = 0; + mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_M2S, i); + mq->last_head = 0; + mq->last_tail = 0; + mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK); + if (mq->intr_handle.fd < 0) { + MIF_LOG(WARNING, + "Failed to create eventfd for rx queue %d: %s.", i, + strerror(errno)); + } + mq->buffers = NULL; + if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) { + mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) * + (1 << mq->log2_ring_size), 0); + if (mq->buffers == NULL) + return -ENOMEM; + } + } + return 0; +} + +int +memif_init_regions_and_queues(struct rte_eth_dev *dev) +{ + int ret; + + ret = memif_regions_init(dev); + if (ret < 0) + return ret; + + memif_init_rings(dev); + + ret = memif_init_queues(dev); + if (ret < 0) + return ret; + + return 0; +} + +int +memif_connect(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct pmd_process_private *proc_private = dev->process_private; + struct memif_region *mr; + struct memif_queue *mq; + memif_ring_t *ring; + int i; + + for (i = 0; i < proc_private->regions_num; i++) { + mr = proc_private->regions[i]; + if (mr != NULL) { + if (mr->addr == NULL) { + if (mr->fd < 0) + return -1; + mr->addr = mmap(NULL, mr->region_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, mr->fd, 0); + if (mr->addr == MAP_FAILED) { + MIF_LOG(ERR, "mmap failed: %s\n", + strerror(errno)); + return -1; + } + } + if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) { + /* close memseg file */ + close(mr->fd); + mr->fd = -1; + } + } + } + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + for (i = 0; i < pmd->run.num_s2m_rings; i++) { + mq = (pmd->role == MEMIF_ROLE_SLAVE) ? + dev->data->tx_queues[i] : dev->data->rx_queues[i]; + ring = memif_get_ring_from_queue(proc_private, mq); + if (ring == NULL || ring->cookie != MEMIF_COOKIE) { + MIF_LOG(ERR, "Wrong ring"); + return -1; + } + __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED); + __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED); + mq->last_head = 0; + mq->last_tail = 0; + /* enable polling mode */ + if (pmd->role == MEMIF_ROLE_MASTER) + ring->flags = MEMIF_RING_FLAG_MASK_INT; + } + for (i = 0; i < pmd->run.num_m2s_rings; i++) { + mq = (pmd->role == MEMIF_ROLE_SLAVE) ? + dev->data->rx_queues[i] : dev->data->tx_queues[i]; + ring = memif_get_ring_from_queue(proc_private, mq); + if (ring == NULL || ring->cookie != MEMIF_COOKIE) { + MIF_LOG(ERR, "Wrong ring"); + return -1; + } + __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED); + __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED); + mq->last_head = 0; + mq->last_tail = 0; + /* enable polling mode */ + if (pmd->role == MEMIF_ROLE_SLAVE) + ring->flags = MEMIF_RING_FLAG_MASK_INT; + } + + pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING; + pmd->flags |= ETH_MEMIF_FLAG_CONNECTED; + dev->data->dev_link.link_status = ETH_LINK_UP; + } + MIF_LOG(INFO, "Connected."); + return 0; +} + +static int +memif_dev_start(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + int ret = 0; + + switch (pmd->role) { + case MEMIF_ROLE_SLAVE: + ret = memif_connect_slave(dev); + break; + case MEMIF_ROLE_MASTER: + ret = memif_connect_master(dev); + break; + default: + MIF_LOG(ERR, "Unknown role: %d.", pmd->role); + ret = -1; + break; + } + + return ret; +} + +static void +memif_dev_close(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + int i; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + memif_msg_enq_disconnect(pmd->cc, "Device closed", 0); + memif_disconnect(dev); + + for (i = 0; i < dev->data->nb_rx_queues; i++) + (*dev->dev_ops->rx_queue_release)(dev->data->rx_queues[i]); + for (i = 0; i < dev->data->nb_tx_queues; i++) + (*dev->dev_ops->tx_queue_release)(dev->data->tx_queues[i]); + + memif_socket_remove_device(dev); + } else { + memif_disconnect(dev); + } + + rte_free(dev->process_private); +} + +static int +memif_dev_configure(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + + /* + * SLAVE - TXQ + * MASTER - RXQ + */ + pmd->cfg.num_s2m_rings = (pmd->role == MEMIF_ROLE_SLAVE) ? + dev->data->nb_tx_queues : dev->data->nb_rx_queues; + + /* + * SLAVE - RXQ + * MASTER - TXQ + */ + pmd->cfg.num_m2s_rings = (pmd->role == MEMIF_ROLE_SLAVE) ? + dev->data->nb_rx_queues : dev->data->nb_tx_queues; + + return 0; +} + +static int +memif_tx_queue_setup(struct rte_eth_dev *dev, + uint16_t qid, + uint16_t nb_tx_desc __rte_unused, + unsigned int socket_id __rte_unused, + const struct rte_eth_txconf *tx_conf __rte_unused) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_queue *mq; + + mq = rte_zmalloc("tx-queue", sizeof(struct memif_queue), 0); + if (mq == NULL) { + MIF_LOG(ERR, "Failed to allocate tx queue id: %u", qid); + return -ENOMEM; + } + + mq->type = + (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_S2M : MEMIF_RING_M2S; + mq->n_pkts = 0; + mq->n_bytes = 0; + mq->intr_handle.fd = -1; + mq->intr_handle.type = RTE_INTR_HANDLE_EXT; + mq->in_port = dev->data->port_id; + dev->data->tx_queues[qid] = mq; + + return 0; +} + +static int +memif_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t qid, + uint16_t nb_rx_desc __rte_unused, + unsigned int socket_id __rte_unused, + const struct rte_eth_rxconf *rx_conf __rte_unused, + struct rte_mempool *mb_pool) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_queue *mq; + + mq = rte_zmalloc("rx-queue", sizeof(struct memif_queue), 0); + if (mq == NULL) { + MIF_LOG(ERR, "Failed to allocate rx queue id: %u", qid); + return -ENOMEM; + } + + mq->type = (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_M2S : MEMIF_RING_S2M; + mq->n_pkts = 0; + mq->n_bytes = 0; + mq->intr_handle.fd = -1; + mq->intr_handle.type = RTE_INTR_HANDLE_EXT; + mq->mempool = mb_pool; + mq->in_port = dev->data->port_id; + dev->data->rx_queues[qid] = mq; + + return 0; +} + +static void +memif_queue_release(void *queue) +{ + struct memif_queue *mq = (struct memif_queue *)queue; + + if (!mq) + return; + + rte_free(mq); +} + +static int +memif_link_update(struct rte_eth_dev *dev, + int wait_to_complete __rte_unused) +{ + struct pmd_process_private *proc_private; + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + proc_private = dev->process_private; + if (dev->data->dev_link.link_status == ETH_LINK_UP && + proc_private->regions_num == 0) { + memif_mp_request_regions(dev); + } else if (dev->data->dev_link.link_status == ETH_LINK_DOWN && + proc_private->regions_num > 0) { + memif_free_regions(dev); + } + } + return 0; +} + +static int +memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_queue *mq; + int i; + uint8_t tmp, nq; + + stats->ipackets = 0; + stats->ibytes = 0; + stats->opackets = 0; + stats->obytes = 0; + + tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_s2m_rings : + pmd->run.num_m2s_rings; + nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp : + RTE_ETHDEV_QUEUE_STAT_CNTRS; + + /* RX stats */ + for (i = 0; i < nq; i++) { + mq = dev->data->rx_queues[i]; + stats->q_ipackets[i] = mq->n_pkts; + stats->q_ibytes[i] = mq->n_bytes; + stats->ipackets += mq->n_pkts; + stats->ibytes += mq->n_bytes; + } + + tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_m2s_rings : + pmd->run.num_s2m_rings; + nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp : + RTE_ETHDEV_QUEUE_STAT_CNTRS; + + /* TX stats */ + for (i = 0; i < nq; i++) { + mq = dev->data->tx_queues[i]; + stats->q_opackets[i] = mq->n_pkts; + stats->q_obytes[i] = mq->n_bytes; + stats->opackets += mq->n_pkts; + stats->obytes += mq->n_bytes; + } + return 0; +} + +static int +memif_stats_reset(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + int i; + struct memif_queue *mq; + + for (i = 0; i < pmd->run.num_s2m_rings; i++) { + mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->tx_queues[i] : + dev->data->rx_queues[i]; + mq->n_pkts = 0; + mq->n_bytes = 0; + } + for (i = 0; i < pmd->run.num_m2s_rings; i++) { + mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->rx_queues[i] : + dev->data->tx_queues[i]; + mq->n_pkts = 0; + mq->n_bytes = 0; + } + + return 0; +} + +static int +memif_rx_queue_intr_enable(struct rte_eth_dev *dev __rte_unused, + uint16_t qid __rte_unused) +{ + MIF_LOG(WARNING, "Interrupt mode not supported."); + + return -1; +} + +static int +memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused) +{ + struct pmd_internals *pmd __rte_unused = dev->data->dev_private; + + return 0; +} + +static const struct eth_dev_ops ops = { + .dev_start = memif_dev_start, + .dev_close = memif_dev_close, + .dev_infos_get = memif_dev_info, + .dev_configure = memif_dev_configure, + .tx_queue_setup = memif_tx_queue_setup, + .rx_queue_setup = memif_rx_queue_setup, + .rx_queue_release = memif_queue_release, + .tx_queue_release = memif_queue_release, + .rx_queue_intr_enable = memif_rx_queue_intr_enable, + .rx_queue_intr_disable = memif_rx_queue_intr_disable, + .link_update = memif_link_update, + .stats_get = memif_stats_get, + .stats_reset = memif_stats_reset, +}; + +static int +memif_create(struct rte_vdev_device *vdev, enum memif_role_t role, + memif_interface_id_t id, uint32_t flags, + const char *socket_filename, + memif_log2_ring_size_t log2_ring_size, + uint16_t pkt_buffer_size, const char *secret, + struct rte_ether_addr *ether_addr) +{ + int ret = 0; + struct rte_eth_dev *eth_dev; + struct rte_eth_dev_data *data; + struct pmd_internals *pmd; + struct pmd_process_private *process_private; + const unsigned int numa_node = vdev->device.numa_node; + const char *name = rte_vdev_device_name(vdev); + + eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd)); + if (eth_dev == NULL) { + MIF_LOG(ERR, "%s: Unable to allocate device struct.", name); + return -1; + } + + process_private = (struct pmd_process_private *) + rte_zmalloc(name, sizeof(struct pmd_process_private), + RTE_CACHE_LINE_SIZE); + + if (process_private == NULL) { + MIF_LOG(ERR, "Failed to alloc memory for process private"); + return -1; + } + eth_dev->process_private = process_private; + + pmd = eth_dev->data->dev_private; + memset(pmd, 0, sizeof(*pmd)); + + pmd->id = id; + pmd->flags = flags; + pmd->flags |= ETH_MEMIF_FLAG_DISABLED; + pmd->role = role; + /* Zero-copy flag irelevant to master. */ + if (pmd->role == MEMIF_ROLE_MASTER) + pmd->flags &= ~ETH_MEMIF_FLAG_ZERO_COPY; + + ret = memif_socket_init(eth_dev, socket_filename); + if (ret < 0) + return ret; + + memset(pmd->secret, 0, sizeof(char) * ETH_MEMIF_SECRET_SIZE); + if (secret != NULL) + strlcpy(pmd->secret, secret, sizeof(pmd->secret)); + + pmd->cfg.log2_ring_size = log2_ring_size; + /* set in .dev_configure() */ + pmd->cfg.num_s2m_rings = 0; + pmd->cfg.num_m2s_rings = 0; + + pmd->cfg.pkt_buffer_size = pkt_buffer_size; + rte_spinlock_init(&pmd->cc_lock); + + data = eth_dev->data; + data->dev_private = pmd; + data->numa_node = numa_node; + data->dev_link = pmd_link; + data->mac_addrs = ether_addr; + data->promiscuous = 1; + + eth_dev->dev_ops = &ops; + eth_dev->device = &vdev->device; + if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) { + eth_dev->rx_pkt_burst = eth_memif_rx_zc; + eth_dev->tx_pkt_burst = eth_memif_tx_zc; + } else { + eth_dev->rx_pkt_burst = eth_memif_rx; + eth_dev->tx_pkt_burst = eth_memif_tx; + } + + + eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; + + rte_eth_dev_probing_finish(eth_dev); + + return 0; +} + +static int +memif_set_role(const char *key __rte_unused, const char *value, + void *extra_args) +{ + enum memif_role_t *role = (enum memif_role_t *)extra_args; + + if (strstr(value, "master") != NULL) { + *role = MEMIF_ROLE_MASTER; + } else if (strstr(value, "slave") != NULL) { + *role = MEMIF_ROLE_SLAVE; + } else { + MIF_LOG(ERR, "Unknown role: %s.", value); + return -EINVAL; + } + return 0; +} + +static int +memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args) +{ + uint32_t *flags = (uint32_t *)extra_args; + + if (strstr(value, "yes") != NULL) { + if (!rte_mcfg_get_single_file_segments()) { + MIF_LOG(ERR, "Zero-copy doesn't support multi-file segments."); + return -ENOTSUP; + } + *flags |= ETH_MEMIF_FLAG_ZERO_COPY; + } else if (strstr(value, "no") != NULL) { + *flags &= ~ETH_MEMIF_FLAG_ZERO_COPY; + } else { + MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value); + return -EINVAL; + } + return 0; +} + +static int +memif_set_id(const char *key __rte_unused, const char *value, void *extra_args) +{ + memif_interface_id_t *id = (memif_interface_id_t *)extra_args; + + /* even if parsing fails, 0 is a valid id */ + *id = strtoul(value, NULL, 10); + return 0; +} + +static int +memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args) +{ + unsigned long tmp; + uint16_t *pkt_buffer_size = (uint16_t *)extra_args; + + tmp = strtoul(value, NULL, 10); + if (tmp == 0 || tmp > 0xFFFF) { + MIF_LOG(ERR, "Invalid buffer size: %s.", value); + return -EINVAL; + } + *pkt_buffer_size = tmp; + return 0; +} + +static int +memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args) +{ + unsigned long tmp; + memif_log2_ring_size_t *log2_ring_size = + (memif_log2_ring_size_t *)extra_args; + + tmp = strtoul(value, NULL, 10); + if (tmp == 0 || tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE) { + MIF_LOG(ERR, "Invalid ring size: %s (max %u).", + value, ETH_MEMIF_MAX_LOG2_RING_SIZE); + return -EINVAL; + } + *log2_ring_size = tmp; + return 0; +} + +/* check if directory exists and if we have permission to read/write */ +static int +memif_check_socket_filename(const char *filename) +{ + char *dir = NULL, *tmp; + uint32_t idx; + int ret = 0; + + if (strlen(filename) >= MEMIF_SOCKET_UN_SIZE) { + MIF_LOG(ERR, "Unix socket address too long (max 108)."); + return -1; + } + + tmp = strrchr(filename, '/'); + if (tmp != NULL) { + idx = tmp - filename; + dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 1), 0); + if (dir == NULL) { + MIF_LOG(ERR, "Failed to allocate memory."); + return -1; + } + strlcpy(dir, filename, sizeof(char) * (idx + 1)); + } + + if (dir == NULL || (faccessat(-1, dir, F_OK | R_OK | + W_OK, AT_EACCESS) < 0)) { + MIF_LOG(ERR, "Invalid socket directory."); + ret = -EINVAL; + } + + if (dir != NULL) + rte_free(dir); + + return ret; +} + +static int +memif_set_socket_filename(const char *key __rte_unused, const char *value, + void *extra_args) +{ + const char **socket_filename = (const char **)extra_args; + + *socket_filename = value; + return memif_check_socket_filename(*socket_filename); +} + +static int +memif_set_mac(const char *key __rte_unused, const char *value, void *extra_args) +{ + struct rte_ether_addr *ether_addr = (struct rte_ether_addr *)extra_args; + + if (rte_ether_unformat_addr(value, ether_addr) < 0) + MIF_LOG(WARNING, "Failed to parse mac '%s'.", value); + return 0; +} + +static int +memif_set_secret(const char *key __rte_unused, const char *value, void *extra_args) +{ + const char **secret = (const char **)extra_args; + + *secret = value; + return 0; +} + +static int +rte_pmd_memif_probe(struct rte_vdev_device *vdev) +{ + RTE_BUILD_BUG_ON(sizeof(memif_msg_t) != 128); + RTE_BUILD_BUG_ON(sizeof(memif_desc_t) != 16); + int ret = 0; + struct rte_kvargs *kvlist; + const char *name = rte_vdev_device_name(vdev); + enum memif_role_t role = MEMIF_ROLE_SLAVE; + memif_interface_id_t id = 0; + uint16_t pkt_buffer_size = ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE; + memif_log2_ring_size_t log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE; + const char *socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME; + uint32_t flags = 0; + const char *secret = NULL; + struct rte_ether_addr *ether_addr = rte_zmalloc("", + sizeof(struct rte_ether_addr), 0); + struct rte_eth_dev *eth_dev; + + rte_eth_random_addr(ether_addr->addr_bytes); + + MIF_LOG(INFO, "Initialize MEMIF: %s.", name); + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + eth_dev = rte_eth_dev_attach_secondary(name); + if (!eth_dev) { + MIF_LOG(ERR, "Failed to probe %s", name); + return -1; + } + + eth_dev->dev_ops = &ops; + eth_dev->device = &vdev->device; + eth_dev->rx_pkt_burst = eth_memif_rx; + eth_dev->tx_pkt_burst = eth_memif_tx; + + if (!rte_eal_primary_proc_alive(NULL)) { + MIF_LOG(ERR, "Primary process is missing"); + return -1; + } + + eth_dev->process_private = (struct pmd_process_private *) + rte_zmalloc(name, + sizeof(struct pmd_process_private), + RTE_CACHE_LINE_SIZE); + if (eth_dev->process_private == NULL) { + MIF_LOG(ERR, + "Failed to alloc memory for process private"); + return -1; + } + + rte_eth_dev_probing_finish(eth_dev); + + return 0; + } + + ret = rte_mp_action_register(MEMIF_MP_SEND_REGION, memif_mp_send_region); + /* + * Primary process can continue probing, but secondary process won't + * be able to get memory regions information + */ + if (ret < 0 && rte_errno != EEXIST) + MIF_LOG(WARNING, "Failed to register mp action callback: %s", + strerror(rte_errno)); + + kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments); + + /* parse parameters */ + if (kvlist != NULL) { + ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG, + &memif_set_role, &role); + if (ret < 0) + goto exit; + ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG, + &memif_set_id, &id); + if (ret < 0) + goto exit; + ret = rte_kvargs_process(kvlist, ETH_MEMIF_PKT_BUFFER_SIZE_ARG, + &memif_set_bs, &pkt_buffer_size); + if (ret < 0) + goto exit; + ret = rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG, + &memif_set_rs, &log2_ring_size); + if (ret < 0) + goto exit; + ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ARG, + &memif_set_socket_filename, + (void *)(&socket_filename)); + if (ret < 0) + goto exit; + ret = rte_kvargs_process(kvlist, ETH_MEMIF_MAC_ARG, + &memif_set_mac, ether_addr); + if (ret < 0) + goto exit; + ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG, + &memif_set_zc, &flags); + if (ret < 0) + goto exit; + ret = rte_kvargs_process(kvlist, ETH_MEMIF_SECRET_ARG, + &memif_set_secret, (void *)(&secret)); + if (ret < 0) + goto exit; + } + + /* create interface */ + ret = memif_create(vdev, role, id, flags, socket_filename, + log2_ring_size, pkt_buffer_size, secret, ether_addr); + +exit: + if (kvlist != NULL) + rte_kvargs_free(kvlist); + return ret; +} + +static int +rte_pmd_memif_remove(struct rte_vdev_device *vdev) +{ + struct rte_eth_dev *eth_dev; + + eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev)); + if (eth_dev == NULL) + return 0; + + rte_eth_dev_close(eth_dev->data->port_id); + + return 0; +} + +static struct rte_vdev_driver pmd_memif_drv = { + .probe = rte_pmd_memif_probe, + .remove = rte_pmd_memif_remove, +}; + +RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv); + +RTE_PMD_REGISTER_PARAM_STRING(net_memif, + ETH_MEMIF_ID_ARG "=<int>" + ETH_MEMIF_ROLE_ARG "=master|slave" + ETH_MEMIF_PKT_BUFFER_SIZE_ARG "=<int>" + ETH_MEMIF_RING_SIZE_ARG "=<int>" + ETH_MEMIF_SOCKET_ARG "=<string>" + ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx" + ETH_MEMIF_ZC_ARG "=yes|no" + ETH_MEMIF_SECRET_ARG "=<string>"); + +int memif_logtype; + +RTE_INIT(memif_init_log) +{ + memif_logtype = rte_log_register("pmd.net.memif"); + if (memif_logtype >= 0) + rte_log_set_level(memif_logtype, RTE_LOG_NOTICE); +} diff --git a/src/spdk/dpdk/drivers/net/memif/rte_eth_memif.h b/src/spdk/dpdk/drivers/net/memif/rte_eth_memif.h new file mode 100644 index 000000000..6f45b7072 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/memif/rte_eth_memif.h @@ -0,0 +1,215 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018-2019 Cisco Systems, Inc. All rights reserved. + */ + +#ifndef _RTE_ETH_MEMIF_H_ +#define _RTE_ETH_MEMIF_H_ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif /* GNU_SOURCE */ + +#include <sys/queue.h> + +#include <rte_ethdev_driver.h> +#include <rte_ether.h> +#include <rte_interrupts.h> + +#include "memif.h" + +#define ETH_MEMIF_DEFAULT_SOCKET_FILENAME "/run/memif.sock" +#define ETH_MEMIF_DEFAULT_RING_SIZE 10 +#define ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE 2048 + +#define ETH_MEMIF_MAX_NUM_Q_PAIRS 255 +#define ETH_MEMIF_MAX_LOG2_RING_SIZE 14 +#define ETH_MEMIF_MAX_REGION_NUM 256 + +#define ETH_MEMIF_SHM_NAME_SIZE 32 +#define ETH_MEMIF_DISC_STRING_SIZE 96 +#define ETH_MEMIF_SECRET_SIZE 24 + +extern int memif_logtype; + +#define MIF_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, memif_logtype, \ + "%s(): " fmt "\n", __func__, ##args) + +enum memif_role_t { + MEMIF_ROLE_MASTER, + MEMIF_ROLE_SLAVE, +}; + +struct memif_region { + void *addr; /**< shared memory address */ + memif_region_size_t region_size; /**< shared memory size */ + int fd; /**< shared memory file descriptor */ + uint32_t pkt_buffer_offset; + /**< offset from 'addr' to first packet buffer */ +}; + +struct memif_queue { + struct rte_mempool *mempool; /**< mempool for RX packets */ + struct pmd_internals *pmd; /**< device internals */ + + memif_ring_type_t type; /**< ring type */ + memif_region_index_t region; /**< shared memory region index */ + + uint16_t in_port; /**< port id */ + + memif_region_offset_t ring_offset; + /**< ring offset from start of shm region (ring - memif_region.addr) */ + + uint16_t last_head; /**< last ring head */ + uint16_t last_tail; /**< last ring tail */ + + struct rte_mbuf **buffers; + /**< Stored mbufs. Used in zero-copy tx. Slave stores transmitted + * mbufs to free them once master has received them. + */ + + /* rx/tx info */ + uint64_t n_pkts; /**< number of rx/tx packets */ + uint64_t n_bytes; /**< number of rx/tx bytes */ + + struct rte_intr_handle intr_handle; /**< interrupt handle */ + + memif_log2_ring_size_t log2_ring_size; /**< log2 of ring size */ +}; + +struct pmd_internals { + memif_interface_id_t id; /**< unique id */ + enum memif_role_t role; /**< device role */ + uint32_t flags; /**< device status flags */ +#define ETH_MEMIF_FLAG_CONNECTING (1 << 0) +/**< device is connecting */ +#define ETH_MEMIF_FLAG_CONNECTED (1 << 1) +/**< device is connected */ +#define ETH_MEMIF_FLAG_ZERO_COPY (1 << 2) +/**< device is zero-copy enabled */ +#define ETH_MEMIF_FLAG_DISABLED (1 << 3) +/**< device has not been configured and can not accept connection requests */ + + char *socket_filename; /**< pointer to socket filename */ + char secret[ETH_MEMIF_SECRET_SIZE]; /**< secret (optional security parameter) */ + + struct memif_control_channel *cc; /**< control channel */ + rte_spinlock_t cc_lock; /**< control channel lock */ + + /* remote info */ + char remote_name[RTE_DEV_NAME_MAX_LEN]; /**< remote app name */ + char remote_if_name[RTE_DEV_NAME_MAX_LEN]; /**< remote peer name */ + + struct { + memif_log2_ring_size_t log2_ring_size; /**< log2 of ring size */ + uint8_t num_s2m_rings; /**< number of slave to master rings */ + uint8_t num_m2s_rings; /**< number of master to slave rings */ + uint16_t pkt_buffer_size; /**< buffer size */ + } cfg; /**< Configured parameters (max values) */ + + struct { + memif_log2_ring_size_t log2_ring_size; /**< log2 of ring size */ + uint8_t num_s2m_rings; /**< number of slave to master rings */ + uint8_t num_m2s_rings; /**< number of master to slave rings */ + uint16_t pkt_buffer_size; /**< buffer size */ + } run; + /**< Parameters used in active connection */ + + char local_disc_string[ETH_MEMIF_DISC_STRING_SIZE]; + /**< local disconnect reason */ + char remote_disc_string[ETH_MEMIF_DISC_STRING_SIZE]; + /**< remote disconnect reason */ +}; + +struct pmd_process_private { + struct memif_region *regions[ETH_MEMIF_MAX_REGION_NUM]; + /**< shared memory regions */ + memif_region_index_t regions_num; /**< number of regions */ +}; + +/** + * Unmap shared memory and free regions from memory. + * + * @param proc_private + * device process private data + */ +void memif_free_regions(struct rte_eth_dev *dev); + +/** + * Finalize connection establishment process. Map shared memory file + * (master role), initialize ring queue, set link status up. + * + * @param dev + * memif device + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int memif_connect(struct rte_eth_dev *dev); + +/** + * Create shared memory file and initialize ring queue. + * Only called by slave when establishing connection + * + * @param dev + * memif device + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int memif_init_regions_and_queues(struct rte_eth_dev *dev); + +/** + * Get memif version string. + * + * @return + * - memif version string + */ +const char *memif_version(void); + +#ifndef MFD_HUGETLB +#ifndef __NR_memfd_create + +#if defined __x86_64__ +#define __NR_memfd_create 319 +#elif defined __x86_32__ +#define __NR_memfd_create 1073742143 +#elif defined __arm__ +#define __NR_memfd_create 385 +#elif defined __aarch64__ +#define __NR_memfd_create 279 +#elif defined __powerpc__ +#define __NR_memfd_create 360 +#elif defined __i386__ +#define __NR_memfd_create 356 +#else +#error "__NR_memfd_create unknown for this architecture" +#endif + +#endif /* __NR_memfd_create */ + +static inline int memfd_create(const char *name, unsigned int flags) +{ + return syscall(__NR_memfd_create, name, flags); +} +#endif /* MFD_HUGETLB */ + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef MFD_ALLOW_SEALING +#define MFD_ALLOW_SEALING 0x0002U +#endif + +#ifndef F_ADD_SEALS +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +#endif + +#endif /* RTE_ETH_MEMIF_H */ diff --git a/src/spdk/dpdk/drivers/net/memif/rte_pmd_memif_version.map b/src/spdk/dpdk/drivers/net/memif/rte_pmd_memif_version.map new file mode 100644 index 000000000..f9f17e4f6 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/memif/rte_pmd_memif_version.map @@ -0,0 +1,3 @@ +DPDK_20.0 { + local: *; +}; |