diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/seastar/dpdk/examples/netmap_compat | |
parent | Initial commit. (diff) | |
download | ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
7 files changed, 1865 insertions, 0 deletions
diff --git a/src/seastar/dpdk/examples/netmap_compat/Makefile b/src/seastar/dpdk/examples/netmap_compat/Makefile new file mode 100644 index 00000000..52d80869 --- /dev/null +++ b/src/seastar/dpdk/examples/netmap_compat/Makefile @@ -0,0 +1,50 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk +unexport RTE_SRCDIR RTE_OUTPUT RTE_EXTMK + +DIRS-y += bridge + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +$(DIRS-y): + $(MAKE) -C $@ $(MAKECMDGOALS) O=$(RTE_OUTPUT) diff --git a/src/seastar/dpdk/examples/netmap_compat/bridge/Makefile b/src/seastar/dpdk/examples/netmap_compat/bridge/Makefile new file mode 100644 index 00000000..1d4ddfff --- /dev/null +++ b/src/seastar/dpdk/examples/netmap_compat/bridge/Makefile @@ -0,0 +1,63 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define the RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp") +$(info This application can only operate in a linuxapp environment, \ +please change the definition of the RTE_TARGET environment variable) +all: +clean: +else + +# binary name +APP = bridge + +# for compat_netmap.c +VPATH := $(SRCDIR)/../lib + +# all source are stored in SRCS-y +SRCS-y := bridge.c +SRCS-y += compat_netmap.c + +CFLAGS += -O3 -I$(SRCDIR)/../lib -I$(SRCDIR)/../netmap +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk + +endif diff --git a/src/seastar/dpdk/examples/netmap_compat/bridge/bridge.c b/src/seastar/dpdk/examples/netmap_compat/bridge/bridge.c new file mode 100644 index 00000000..2f2b6baa --- /dev/null +++ b/src/seastar/dpdk/examples/netmap_compat/bridge/bridge.c @@ -0,0 +1,377 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <fcntl.h> +#include <getopt.h> +#include <inttypes.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> + +#include <rte_eal.h> +#include <rte_ethdev.h> +#include <rte_mbuf.h> +#include <rte_mempool.h> +#include <rte_string_fns.h> +#include "compat_netmap.h" + + +#define BUF_SIZE RTE_MBUF_DEFAULT_DATAROOM +#define MBUF_DATA_SIZE (BUF_SIZE + RTE_PKTMBUF_HEADROOM) + +#define MBUF_PER_POOL 8192 + +struct rte_eth_conf eth_conf = { + .rxmode = { + .split_hdr_size = 0, + .header_split = 0, + .hw_ip_checksum = 0, + .hw_vlan_filter = 0, + .jumbo_frame = 0, + .hw_strip_crc = 1, + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +#define MAX_QUEUE_NUM 1 +#define RX_QUEUE_NUM 1 +#define TX_QUEUE_NUM 1 + +#define MAX_DESC_NUM 0x400 +#define RX_DESC_NUM 0x100 +#define TX_DESC_NUM 0x200 + +#define RX_SYNC_NUM 0x20 +#define TX_SYNC_NUM 0x20 + +struct rte_netmap_port_conf port_conf = { + .eth_conf = ð_conf, + .socket_id = SOCKET_ID_ANY, + .nr_tx_rings = TX_QUEUE_NUM, + .nr_rx_rings = RX_QUEUE_NUM, + .nr_tx_slots = TX_DESC_NUM, + .nr_rx_slots = RX_DESC_NUM, + .tx_burst = TX_SYNC_NUM, + .rx_burst = RX_SYNC_NUM, +}; + +struct rte_netmap_conf netmap_conf = { + .socket_id = SOCKET_ID_ANY, + .max_bufsz = BUF_SIZE, + .max_rings = MAX_QUEUE_NUM, + .max_slots = MAX_DESC_NUM, +}; + +static int stop = 0; + +#define MAX_PORT_NUM 2 + +struct netmap_port { + int fd; + struct netmap_if *nmif; + struct netmap_ring *rx_ring; + struct netmap_ring *tx_ring; + const char *str; + uint8_t id; +}; + +static struct { + uint32_t num; + struct netmap_port p[MAX_PORT_NUM]; + void *mem; +} ports; + +static void +usage(const char *prgname) +{ + fprintf(stderr, "Usage: %s [EAL args] -- [OPTION]...\n" + "-h, --help \t Show this help message and exit\n" + "-i INTERFACE_A \t Interface (DPDK port number) to use\n" + "[ -i INTERFACE_B \t Interface (DPDK port number) to use ]\n", + prgname); +} + +static uint8_t +parse_portid(const char *portid_str) +{ + char *end; + unsigned id; + + id = strtoul(portid_str, &end, 10); + + if (end == portid_str || *end != '\0' || id > RTE_MAX_ETHPORTS) + rte_exit(EXIT_FAILURE, "Invalid port number\n"); + + return (uint8_t) id; +} + +static int +parse_args(int argc, char **argv) +{ + int opt; + + while ((opt = getopt(argc, argv, "hi:")) != -1) { + switch (opt) { + case 'h': + usage(argv[0]); + rte_exit(EXIT_SUCCESS, "exiting..."); + break; + case 'i': + if (ports.num >= RTE_DIM(ports.p)) { + usage(argv[0]); + rte_exit(EXIT_FAILURE, "configs with %u " + "ports are not supported\n", + ports.num + 1); + + } + + ports.p[ports.num].str = optarg; + ports.p[ports.num].id = parse_portid(optarg); + ports.num++; + break; + default: + usage(argv[0]); + rte_exit(EXIT_FAILURE, "invalid option: %c\n", opt); + } + } + + return 0; +} + +static void sigint_handler(__rte_unused int sig) +{ + stop = 1; + signal(SIGINT, SIG_DFL); +} + +static void move(int n, struct netmap_ring *rx, struct netmap_ring *tx) +{ + uint32_t tmp; + + while (n-- > 0) { + tmp = tx->slot[tx->cur].buf_idx; + + tx->slot[tx->cur].buf_idx = rx->slot[rx->cur].buf_idx; + tx->slot[tx->cur].len = rx->slot[rx->cur].len; + tx->slot[tx->cur].flags |= NS_BUF_CHANGED; + tx->cur = NETMAP_RING_NEXT(tx, tx->cur); + tx->avail--; + + rx->slot[rx->cur].buf_idx = tmp; + rx->slot[rx->cur].flags |= NS_BUF_CHANGED; + rx->cur = NETMAP_RING_NEXT(rx, rx->cur); + rx->avail--; + } +} + +static int +netmap_port_open(uint32_t idx) +{ + int err; + struct netmap_port *port; + struct nmreq req; + + port = ports.p + idx; + + port->fd = rte_netmap_open("/dev/netmap", O_RDWR); + + snprintf(req.nr_name, sizeof(req.nr_name), "%s", port->str); + req.nr_version = NETMAP_API; + req.nr_ringid = 0; + + err = rte_netmap_ioctl(port->fd, NIOCGINFO, &req); + if (err) { + printf("[E] NIOCGINFO ioctl failed (error %d)\n", err); + return err; + } + + snprintf(req.nr_name, sizeof(req.nr_name), "%s", port->str); + req.nr_version = NETMAP_API; + req.nr_ringid = 0; + + err = rte_netmap_ioctl(port->fd, NIOCREGIF, &req); + if (err) { + printf("[E] NIOCREGIF ioctl failed (error %d)\n", err); + return err; + } + + /* mmap only once. */ + if (ports.mem == NULL) + ports.mem = rte_netmap_mmap(NULL, req.nr_memsize, + PROT_WRITE | PROT_READ, MAP_PRIVATE, port->fd, 0); + + if (ports.mem == MAP_FAILED) { + printf("[E] NETMAP mmap failed for fd: %d)\n", port->fd); + return -ENOMEM; + } + + port->nmif = NETMAP_IF(ports.mem, req.nr_offset); + + port->tx_ring = NETMAP_TXRING(port->nmif, 0); + port->rx_ring = NETMAP_RXRING(port->nmif, 0); + + return 0; +} + + +int main(int argc, char *argv[]) +{ + int err, ret; + uint32_t i, pmsk; + struct nmreq req; + struct pollfd pollfd[MAX_PORT_NUM]; + struct rte_mempool *pool; + struct netmap_ring *rx_ring, *tx_ring; + + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot initialize EAL\n"); + + argc -= ret; + argv += ret; + + parse_args(argc, argv); + + if (ports.num == 0) + rte_exit(EXIT_FAILURE, "no ports specified\n"); + + if (rte_eth_dev_count() < 1) + rte_exit(EXIT_FAILURE, "Not enough ethernet ports available\n"); + + pool = rte_pktmbuf_pool_create("mbuf_pool", MBUF_PER_POOL, 32, 0, + MBUF_DATA_SIZE, rte_socket_id()); + if (pool == NULL) + rte_exit(EXIT_FAILURE, "Couldn't create mempool\n"); + + netmap_conf.socket_id = rte_socket_id(); + err = rte_netmap_init(&netmap_conf); + + if (err < 0) + rte_exit(EXIT_FAILURE, + "Couldn't initialize librte_compat_netmap\n"); + else + printf("librte_compat_netmap initialized\n"); + + port_conf.pool = pool; + port_conf.socket_id = rte_socket_id(); + + for (i = 0; i != ports.num; i++) { + + err = rte_netmap_init_port(ports.p[i].id, &port_conf); + if (err < 0) + rte_exit(EXIT_FAILURE, "Couldn't setup port %hhu\n", + ports.p[i].id); + + rte_eth_promiscuous_enable(ports.p[i].id); + } + + for (i = 0; i != ports.num; i++) { + + err = netmap_port_open(i); + if (err) { + rte_exit(EXIT_FAILURE, "Couldn't set port %hhu " + "under NETMAP control\n", + ports.p[i].id); + } + else + printf("Port %hhu now in Netmap mode\n", ports.p[i].id); + } + + memset(pollfd, 0, sizeof(pollfd)); + + for (i = 0; i != ports.num; i++) { + pollfd[i].fd = ports.p[i].fd; + pollfd[i].events = POLLIN | POLLOUT; + } + + signal(SIGINT, sigint_handler); + + pmsk = ports.num - 1; + + printf("Bridge up and running!\n"); + + while (!stop) { + uint32_t n_pkts; + + pollfd[0].revents = 0; + pollfd[1].revents = 0; + + ret = rte_netmap_poll(pollfd, ports.num, 0); + if (ret < 0) { + stop = 1; + printf("[E] poll returned with error %d\n", ret); + } + + if (((pollfd[0].revents | pollfd[1].revents) & POLLERR) != 0) { + printf("POLLERR!\n"); + } + + if ((pollfd[0].revents & POLLIN) != 0 && + (pollfd[pmsk].revents & POLLOUT) != 0) { + + rx_ring = ports.p[0].rx_ring; + tx_ring = ports.p[pmsk].tx_ring; + + n_pkts = RTE_MIN(rx_ring->avail, tx_ring->avail); + move(n_pkts, rx_ring, tx_ring); + } + + if (pmsk != 0 && (pollfd[pmsk].revents & POLLIN) != 0 && + (pollfd[0].revents & POLLOUT) != 0) { + + rx_ring = ports.p[pmsk].rx_ring; + tx_ring = ports.p[0].tx_ring; + + n_pkts = RTE_MIN(rx_ring->avail, tx_ring->avail); + move(n_pkts, rx_ring, tx_ring); + } + } + + printf("Bridge stopped!\n"); + + for (i = 0; i != ports.num; i++) { + err = rte_netmap_ioctl(ports.p[i].fd, NIOCUNREGIF, &req); + if (err) { + printf("[E] NIOCUNREGIF ioctl failed (error %d)\n", + err); + } + else + printf("Port %hhu unregistered from Netmap mode\n", ports.p[i].id); + + rte_netmap_close(ports.p[i].fd); + } + return 0; +} diff --git a/src/seastar/dpdk/examples/netmap_compat/lib/compat_netmap.c b/src/seastar/dpdk/examples/netmap_compat/lib/compat_netmap.c new file mode 100644 index 00000000..112c551f --- /dev/null +++ b/src/seastar/dpdk/examples/netmap_compat/lib/compat_netmap.c @@ -0,0 +1,911 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <errno.h> +#include <inttypes.h> +#include <poll.h> +#include <stdio.h> +#include <stdlib.h> +#include <net/if.h> +#include <sys/types.h> +#include <sys/resource.h> +#include <sys/mman.h> + +#include <rte_common.h> +#include <rte_errno.h> +#include <rte_ethdev.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_memzone.h> +#include <rte_spinlock.h> +#include <rte_string_fns.h> + +#include "compat_netmap.h" + +struct netmap_port { + struct rte_mempool *pool; + struct netmap_if *nmif; + struct rte_eth_conf eth_conf; + struct rte_eth_txconf tx_conf; + struct rte_eth_rxconf rx_conf; + int32_t socket_id; + uint16_t nr_tx_rings; + uint16_t nr_rx_rings; + uint32_t nr_tx_slots; + uint32_t nr_rx_slots; + uint16_t tx_burst; + uint16_t rx_burst; + uint32_t fd; +}; + +struct fd_port { + uint32_t port; +}; + +#ifndef POLLRDNORM +#define POLLRDNORM 0x0040 +#endif + +#ifndef POLLWRNORM +#define POLLWRNORM 0x0100 +#endif + +#define FD_PORT_FREE UINT32_MAX +#define FD_PORT_RSRV (FD_PORT_FREE - 1) + +struct netmap_state { + struct rte_netmap_conf conf; + uintptr_t buf_start; + void *mem; + uint32_t mem_sz; + uint32_t netif_memsz; +}; + + +#define COMPAT_NETMAP_MAX_NOFILE (2 * RTE_MAX_ETHPORTS) +#define COMPAT_NETMAP_MAX_BURST 64 +#define COMPAT_NETMAP_MAX_PKT_PER_SYNC (2 * COMPAT_NETMAP_MAX_BURST) + +static struct netmap_port ports[RTE_MAX_ETHPORTS]; +static struct netmap_state netmap; + +static struct fd_port fd_port[COMPAT_NETMAP_MAX_NOFILE]; +static const int next_fd_start = RLIMIT_NOFILE + 1; +static rte_spinlock_t netmap_lock; + +#define IDX_TO_FD(x) ((x) + next_fd_start) +#define FD_TO_IDX(x) ((x) - next_fd_start) +#define FD_VALID(x) ((x) >= next_fd_start && \ + (x) < (typeof (x))(RTE_DIM(fd_port) + next_fd_start)) + +#define PORT_NUM_RINGS (2 * netmap.conf.max_rings) +#define PORT_NUM_SLOTS (PORT_NUM_RINGS * netmap.conf.max_slots) + +#define BUF_IDX(port, ring, slot) \ + (((port) * PORT_NUM_RINGS + (ring)) * netmap.conf.max_slots + \ + (slot)) + +#define NETMAP_IF_RING_OFS(rid, rings, slots) ({\ + struct netmap_if *_if; \ + struct netmap_ring *_rg; \ + sizeof(*_if) + \ + (rings) * sizeof(_if->ring_ofs[0]) + \ + (rid) * sizeof(*_rg) + \ + (slots) * sizeof(_rg->slot[0]); \ + }) + +static void netmap_unregif(uint32_t idx, uint32_t port); + + +static int32_t +ifname_to_portid(const char *ifname, uint8_t *port) +{ + char *endptr; + uint64_t portid; + + errno = 0; + portid = strtoul(ifname, &endptr, 10); + if (endptr == ifname || *endptr != '\0' || + portid >= RTE_DIM(ports) || errno != 0) + return -EINVAL; + + *port = (uint8_t)portid; + return 0; +} + +/** + * Given a dpdk mbuf, fill in the Netmap slot in ring r and its associated + * buffer with the data held by the mbuf. + * Note that mbuf chains are not supported. + */ +static void +mbuf_to_slot(struct rte_mbuf *mbuf, struct netmap_ring *r, uint32_t index) +{ + char *data; + uint16_t length; + + data = rte_pktmbuf_mtod(mbuf, char *); + length = rte_pktmbuf_data_len(mbuf); + + if (length > r->nr_buf_size) + length = 0; + + r->slot[index].len = length; + rte_memcpy(NETMAP_BUF(r, r->slot[index].buf_idx), data, length); +} + +/** + * Given a Netmap ring and a slot index for that ring, construct a dpdk mbuf + * from the data held in the buffer associated with the slot. + * Allocation/deallocation of the dpdk mbuf are the responsability of the + * caller. + * Note that mbuf chains are not supported. + */ +static void +slot_to_mbuf(struct netmap_ring *r, uint32_t index, struct rte_mbuf *mbuf) +{ + char *data; + uint16_t length; + + rte_pktmbuf_reset(mbuf); + length = r->slot[index].len; + data = rte_pktmbuf_append(mbuf, length); + + if (data != NULL) + rte_memcpy(data, NETMAP_BUF(r, r->slot[index].buf_idx), length); +} + +static int32_t +fd_reserve(void) +{ + uint32_t i; + + for (i = 0; i != RTE_DIM(fd_port) && fd_port[i].port != FD_PORT_FREE; + i++) + ; + + if (i == RTE_DIM(fd_port)) + return -ENOMEM; + + fd_port[i].port = FD_PORT_RSRV; + return IDX_TO_FD(i); +} + +static int32_t +fd_release(int32_t fd) +{ + uint32_t idx, port; + + idx = FD_TO_IDX(fd); + + if (!FD_VALID(fd) || (port = fd_port[idx].port) == FD_PORT_FREE) + return -EINVAL; + + /* if we still have a valid port attached, release the port */ + if (port < RTE_DIM(ports) && ports[port].fd == idx) { + netmap_unregif(idx, port); + } + + fd_port[idx].port = FD_PORT_FREE; + return 0; +} + +static int +check_nmreq(struct nmreq *req, uint8_t *port) +{ + int32_t rc; + uint8_t portid; + + if (req == NULL) + return -EINVAL; + + if (req->nr_version != NETMAP_API) { + req->nr_version = NETMAP_API; + return -EINVAL; + } + + if ((rc = ifname_to_portid(req->nr_name, &portid)) != 0) { + RTE_LOG(ERR, USER1, "Invalid interface name:\"%s\" " + "in NIOCGINFO call\n", req->nr_name); + return rc; + } + + if (ports[portid].pool == NULL) { + RTE_LOG(ERR, USER1, "Misconfigured portid %hhu\n", portid); + return -EINVAL; + } + + *port = portid; + return 0; +} + +/** + * Simulate a Netmap NIOCGINFO ioctl: given a struct nmreq holding an interface + * name (a port number in our case), fill the struct nmreq in with advisory + * information about the interface: number of rings and their size, total memory + * required in the map, ... + * Those are preconfigured using rte_eth_{,tx,rx}conf and + * rte_netmap_port_conf structures + * and calls to rte_netmap_init_port() in the Netmap application. + */ +static int +ioctl_niocginfo(__rte_unused int fd, void * param) +{ + uint8_t portid; + struct nmreq *req; + int32_t rc; + + req = (struct nmreq *)param; + if ((rc = check_nmreq(req, &portid)) != 0) + return rc; + + req->nr_tx_rings = (uint16_t)(ports[portid].nr_tx_rings - 1); + req->nr_rx_rings = (uint16_t)(ports[portid].nr_rx_rings - 1); + req->nr_tx_slots = ports[portid].nr_tx_slots; + req->nr_rx_slots = ports[portid].nr_rx_slots; + + /* in current implementation we have all NETIFs shared aone region. */ + req->nr_memsize = netmap.mem_sz; + req->nr_offset = 0; + + return 0; +} + +static void +netmap_ring_setup(struct netmap_ring *ring, uint8_t port, uint32_t ringid, + uint32_t num_slots) +{ + uint32_t j; + + ring->buf_ofs = netmap.buf_start - (uintptr_t)ring; + ring->num_slots = num_slots; + ring->cur = 0; + ring->reserved = 0; + ring->nr_buf_size = netmap.conf.max_bufsz; + ring->flags = 0; + ring->ts.tv_sec = 0; + ring->ts.tv_usec = 0; + + for (j = 0; j < ring->num_slots; j++) { + ring->slot[j].buf_idx = BUF_IDX(port, ringid, j); + ring->slot[j].len = 0; + ring->flags = 0; + } +} + +static int +netmap_regif(struct nmreq *req, uint32_t idx, uint8_t port) +{ + struct netmap_if *nmif; + struct netmap_ring *ring; + uint32_t i, slots, start_ring; + int32_t rc; + + if (ports[port].fd < RTE_DIM(fd_port)) { + RTE_LOG(ERR, USER1, "port %hhu already in use by fd: %u\n", + port, IDX_TO_FD(ports[port].fd)); + return -EBUSY; + } + if (fd_port[idx].port != FD_PORT_RSRV) { + RTE_LOG(ERR, USER1, "fd: %u is misconfigured\n", + IDX_TO_FD(idx)); + return -EBUSY; + } + + nmif = ports[port].nmif; + + /* setup netmap_if fields. */ + memset(nmif, 0, netmap.netif_memsz); + + /* only ALL rings supported right now. */ + if (req->nr_ringid != 0) + return -EINVAL; + + snprintf(nmif->ni_name, sizeof(nmif->ni_name), "%s", req->nr_name); + nmif->ni_version = req->nr_version; + + /* Netmap uses ni_(r|t)x_rings + 1 */ + nmif->ni_rx_rings = ports[port].nr_rx_rings - 1; + nmif->ni_tx_rings = ports[port].nr_tx_rings - 1; + + /* + * Setup TX rings and slots. + * Refer to the comments in netmap.h for details + */ + + slots = 0; + for (i = 0; i < nmif->ni_tx_rings + 1; i++) { + + nmif->ring_ofs[i] = NETMAP_IF_RING_OFS(i, + PORT_NUM_RINGS, slots); + + ring = NETMAP_TXRING(nmif, i); + netmap_ring_setup(ring, port, i, ports[port].nr_tx_slots); + ring->avail = ring->num_slots; + + slots += ports[port].nr_tx_slots; + } + + /* + * Setup RX rings and slots. + * Refer to the comments in netmap.h for details + */ + + start_ring = i; + + for (; i < nmif->ni_rx_rings + 1 + start_ring; i++) { + + nmif->ring_ofs[i] = NETMAP_IF_RING_OFS(i, + PORT_NUM_RINGS, slots); + + ring = NETMAP_RXRING(nmif, (i - start_ring)); + netmap_ring_setup(ring, port, i, ports[port].nr_rx_slots); + ring->avail = 0; + + slots += ports[port].nr_rx_slots; + } + + if ((rc = rte_eth_dev_start(port)) < 0) { + RTE_LOG(ERR, USER1, + "Couldn't start ethernet device %s (error %d)\n", + req->nr_name, rc); + return rc; + } + + /* setup fdi <--> port relationtip. */ + ports[port].fd = idx; + fd_port[idx].port = port; + + req->nr_memsize = netmap.mem_sz; + req->nr_offset = (uintptr_t)nmif - (uintptr_t)netmap.mem; + + return 0; +} + +/** + * Simulate a Netmap NIOCREGIF ioctl: + */ +static int +ioctl_niocregif(int32_t fd, void * param) +{ + uint8_t portid; + int32_t rc; + uint32_t idx; + struct nmreq *req; + + req = (struct nmreq *)param; + if ((rc = check_nmreq(req, &portid)) != 0) + return rc; + + idx = FD_TO_IDX(fd); + + rte_spinlock_lock(&netmap_lock); + rc = netmap_regif(req, idx, portid); + rte_spinlock_unlock(&netmap_lock); + + return rc; +} + +static void +netmap_unregif(uint32_t idx, uint32_t port) +{ + fd_port[idx].port = FD_PORT_RSRV; + ports[port].fd = UINT32_MAX; + rte_eth_dev_stop((uint8_t)port); +} + +/** + * Simulate a Netmap NIOCUNREGIF ioctl: put an interface running in Netmap + * mode back in "normal" mode. In our case, we just stop the port associated + * with this file descriptor. + */ +static int +ioctl_niocunregif(int fd) +{ + uint32_t idx, port; + int32_t rc; + + idx = FD_TO_IDX(fd); + + rte_spinlock_lock(&netmap_lock); + + port = fd_port[idx].port; + if (port < RTE_DIM(ports) && ports[port].fd == idx) { + netmap_unregif(idx, port); + rc = 0; + } else { + RTE_LOG(ERR, USER1, + "%s: %d is not associated with valid port\n", + __func__, fd); + rc = -EINVAL; + } + + rte_spinlock_unlock(&netmap_lock); + return rc; +} + +/** + * A call to rx_sync_ring will try to fill a Netmap RX ring with as many + * packets as it can hold coming from its dpdk port. + */ +static inline int +rx_sync_ring(struct netmap_ring *ring, uint8_t port, uint16_t ring_number, + uint16_t max_burst) +{ + int32_t i, n_rx; + uint16_t burst_size; + uint32_t cur_slot, n_free_slots; + struct rte_mbuf *rx_mbufs[COMPAT_NETMAP_MAX_BURST]; + + n_free_slots = ring->num_slots - (ring->avail + ring->reserved); + n_free_slots = RTE_MIN(n_free_slots, max_burst); + cur_slot = (ring->cur + ring->avail) & (ring->num_slots - 1); + + while (n_free_slots) { + burst_size = (uint16_t)RTE_MIN(n_free_slots, RTE_DIM(rx_mbufs)); + + /* receive up to burst_size packets from the NIC's queue */ + n_rx = rte_eth_rx_burst(port, ring_number, rx_mbufs, + burst_size); + + if (n_rx == 0) + return 0; + if (unlikely(n_rx < 0)) + return -1; + + /* Put those n_rx packets in the Netmap structures */ + for (i = 0; i < n_rx ; i++) { + mbuf_to_slot(rx_mbufs[i], ring, cur_slot); + rte_pktmbuf_free(rx_mbufs[i]); + cur_slot = NETMAP_RING_NEXT(ring, cur_slot); + } + + /* Update the Netmap ring structure to reflect the change */ + ring->avail += n_rx; + n_free_slots -= n_rx; + } + + return 0; +} + +static inline int +rx_sync_if(uint32_t port) +{ + uint16_t burst; + uint32_t i, rc; + struct netmap_if *nifp; + struct netmap_ring *r; + + nifp = ports[port].nmif; + burst = ports[port].rx_burst; + rc = 0; + + for (i = 0; i < nifp->ni_rx_rings + 1; i++) { + r = NETMAP_RXRING(nifp, i); + rx_sync_ring(r, (uint8_t)port, (uint16_t)i, burst); + rc += r->avail; + } + + return rc; +} + +/** + * Simulate a Netmap NIOCRXSYNC ioctl: + */ +static int +ioctl_niocrxsync(int fd) +{ + uint32_t idx, port; + + idx = FD_TO_IDX(fd); + if ((port = fd_port[idx].port) < RTE_DIM(ports) && + ports[port].fd == idx) { + return rx_sync_if(fd_port[idx].port); + } else { + return -EINVAL; + } +} + +/** + * A call to tx_sync_ring will try to empty a Netmap TX ring by converting its + * buffers into rte_mbufs and sending them out on the rings's dpdk port. + */ +static int +tx_sync_ring(struct netmap_ring *ring, uint8_t port, uint16_t ring_number, + struct rte_mempool *pool, uint16_t max_burst) +{ + uint32_t i, n_tx; + uint16_t burst_size; + uint32_t cur_slot, n_used_slots; + struct rte_mbuf *tx_mbufs[COMPAT_NETMAP_MAX_BURST]; + + n_used_slots = ring->num_slots - ring->avail; + n_used_slots = RTE_MIN(n_used_slots, max_burst); + cur_slot = (ring->cur + ring->avail) & (ring->num_slots - 1); + + while (n_used_slots) { + burst_size = (uint16_t)RTE_MIN(n_used_slots, RTE_DIM(tx_mbufs)); + + for (i = 0; i < burst_size; i++) { + tx_mbufs[i] = rte_pktmbuf_alloc(pool); + if (tx_mbufs[i] == NULL) + goto err; + + slot_to_mbuf(ring, cur_slot, tx_mbufs[i]); + cur_slot = NETMAP_RING_NEXT(ring, cur_slot); + } + + n_tx = rte_eth_tx_burst(port, ring_number, tx_mbufs, + burst_size); + + /* Update the Netmap ring structure to reflect the change */ + ring->avail += n_tx; + n_used_slots -= n_tx; + + /* Return the mbufs that failed to transmit to their pool */ + if (unlikely(n_tx != burst_size)) { + for (i = n_tx; i < burst_size; i++) + rte_pktmbuf_free(tx_mbufs[i]); + break; + } + } + + return 0; + +err: + for (; i == 0; --i) + rte_pktmbuf_free(tx_mbufs[i]); + + RTE_LOG(ERR, USER1, + "Couldn't get mbuf from mempool is the mempool too small?\n"); + return -1; +} + +static int +tx_sync_if(uint32_t port) +{ + uint16_t burst; + uint32_t i, rc; + struct netmap_if *nifp; + struct netmap_ring *r; + struct rte_mempool *mp; + + nifp = ports[port].nmif; + mp = ports[port].pool; + burst = ports[port].tx_burst; + rc = 0; + + for (i = 0; i < nifp->ni_tx_rings + 1; i++) { + r = NETMAP_TXRING(nifp, i); + tx_sync_ring(r, (uint8_t)port, (uint16_t)i, mp, burst); + rc += r->avail; + } + + return rc; +} + +/** + * Simulate a Netmap NIOCTXSYNC ioctl: + */ +static inline int +ioctl_nioctxsync(int fd) +{ + uint32_t idx, port; + + idx = FD_TO_IDX(fd); + if ((port = fd_port[idx].port) < RTE_DIM(ports) && + ports[port].fd == idx) { + return tx_sync_if(fd_port[idx].port); + } else { + return -EINVAL; + } +} + +/** + * Give the library a mempool of rte_mbufs with which it can do the + * rte_mbuf <--> netmap slot conversions. + */ +int +rte_netmap_init(const struct rte_netmap_conf *conf) +{ + size_t buf_ofs, nmif_sz, sz; + size_t port_rings, port_slots, port_bufs; + uint32_t i, port_num; + + port_num = RTE_MAX_ETHPORTS; + port_rings = 2 * conf->max_rings; + port_slots = port_rings * conf->max_slots; + port_bufs = port_slots; + + nmif_sz = NETMAP_IF_RING_OFS(port_rings, port_rings, port_slots); + sz = nmif_sz * port_num; + + buf_ofs = RTE_ALIGN_CEIL(sz, RTE_CACHE_LINE_SIZE); + sz = buf_ofs + port_bufs * conf->max_bufsz * port_num; + + if (sz > UINT32_MAX || + (netmap.mem = rte_zmalloc_socket(__func__, sz, + RTE_CACHE_LINE_SIZE, conf->socket_id)) == NULL) { + RTE_LOG(ERR, USER1, "%s: failed to allocate %zu bytes\n", + __func__, sz); + return -ENOMEM; + } + + netmap.mem_sz = sz; + netmap.netif_memsz = nmif_sz; + netmap.buf_start = (uintptr_t)netmap.mem + buf_ofs; + netmap.conf = *conf; + + rte_spinlock_init(&netmap_lock); + + /* Mark all ports as unused and set NETIF pointer. */ + for (i = 0; i != RTE_DIM(ports); i++) { + ports[i].fd = UINT32_MAX; + ports[i].nmif = (struct netmap_if *) + ((uintptr_t)netmap.mem + nmif_sz * i); + } + + /* Mark all fd_ports as unused. */ + for (i = 0; i != RTE_DIM(fd_port); i++) { + fd_port[i].port = FD_PORT_FREE; + } + + return 0; +} + + +int +rte_netmap_init_port(uint8_t portid, const struct rte_netmap_port_conf *conf) +{ + int32_t ret; + uint16_t i; + uint16_t rx_slots, tx_slots; + + if (conf == NULL || + portid >= RTE_DIM(ports) || + conf->nr_tx_rings > netmap.conf.max_rings || + conf->nr_rx_rings > netmap.conf.max_rings) { + RTE_LOG(ERR, USER1, "%s(%hhu): invalid parameters\n", + __func__, portid); + return -EINVAL; + } + + rx_slots = (uint16_t)rte_align32pow2(conf->nr_rx_slots); + tx_slots = (uint16_t)rte_align32pow2(conf->nr_tx_slots); + + if (tx_slots > netmap.conf.max_slots || + rx_slots > netmap.conf.max_slots) { + RTE_LOG(ERR, USER1, "%s(%hhu): invalid parameters\n", + __func__, portid); + return -EINVAL; + } + + ret = rte_eth_dev_configure(portid, conf->nr_rx_rings, + conf->nr_tx_rings, conf->eth_conf); + + if (ret < 0) { + RTE_LOG(ERR, USER1, "Couldn't configure port %hhu\n", portid); + return ret; + } + + for (i = 0; i < conf->nr_tx_rings; i++) { + ret = rte_eth_tx_queue_setup(portid, i, tx_slots, + conf->socket_id, NULL); + + if (ret < 0) { + RTE_LOG(ERR, USER1, + "Couldn't configure TX queue %"PRIu16" of " + "port %"PRIu8"\n", + i, portid); + return ret; + } + + ret = rte_eth_rx_queue_setup(portid, i, rx_slots, + conf->socket_id, NULL, conf->pool); + + if (ret < 0) { + RTE_LOG(ERR, USER1, + "Couldn't configure RX queue %"PRIu16" of " + "port %"PRIu8"\n", + i, portid); + return ret; + } + } + + /* copy config to the private storage. */ + ports[portid].eth_conf = conf->eth_conf[0]; + ports[portid].pool = conf->pool; + ports[portid].socket_id = conf->socket_id; + ports[portid].nr_tx_rings = conf->nr_tx_rings; + ports[portid].nr_rx_rings = conf->nr_rx_rings; + ports[portid].nr_tx_slots = tx_slots; + ports[portid].nr_rx_slots = rx_slots; + ports[portid].tx_burst = conf->tx_burst; + ports[portid].rx_burst = conf->rx_burst; + + return 0; +} + +int +rte_netmap_close(__rte_unused int fd) +{ + int32_t rc; + + rte_spinlock_lock(&netmap_lock); + rc = fd_release(fd); + rte_spinlock_unlock(&netmap_lock); + + if (rc < 0) { + errno =-rc; + rc = -1; + } + return rc; +} + +int rte_netmap_ioctl(int fd, uint32_t op, void *param) +{ + int ret; + + if (!FD_VALID(fd)) { + errno = EBADF; + return -1; + } + + switch (op) { + + case NIOCGINFO: + ret = ioctl_niocginfo(fd, param); + break; + + case NIOCREGIF: + ret = ioctl_niocregif(fd, param); + break; + + case NIOCUNREGIF: + ret = ioctl_niocunregif(fd); + break; + + case NIOCRXSYNC: + ret = ioctl_niocrxsync(fd); + break; + + case NIOCTXSYNC: + ret = ioctl_nioctxsync(fd); + break; + + default: + ret = -ENOTTY; + } + + if (ret < 0) { + errno = -ret; + ret = -1; + } else { + ret = 0; + } + + return ret; +} + +void * +rte_netmap_mmap(void *addr, size_t length, + int prot, int flags, int fd, off_t offset) +{ + static const int cprot = PROT_WRITE | PROT_READ; + + if (!FD_VALID(fd) || length + offset > netmap.mem_sz || + (prot & cprot) != cprot || + ((flags & MAP_FIXED) != 0 && addr != NULL)) { + + errno = EINVAL; + return MAP_FAILED; + } + + return (void *)((uintptr_t)netmap.mem + (uintptr_t)offset); +} + +/** + * Return a "fake" file descriptor with a value above RLIMIT_NOFILE so that + * any attempt to use that file descriptor with the usual API will fail. + */ +int +rte_netmap_open(__rte_unused const char *pathname, __rte_unused int flags) +{ + int fd; + + rte_spinlock_lock(&netmap_lock); + fd = fd_reserve(); + rte_spinlock_unlock(&netmap_lock); + + if (fd < 0) { + errno = -fd; + fd = -1; + } + return fd; +} + +/** + * Doesn't support timeout other than 0 or infinite (negative) timeout + */ +int +rte_netmap_poll(struct pollfd *fds, nfds_t nfds, int timeout) +{ + int32_t count_it, ret; + uint32_t i, idx, port; + uint32_t want_rx, want_tx; + + if (timeout > 0) + return -1; + + ret = 0; + do { + for (i = 0; i < nfds; i++) { + + count_it = 0; + + if (!FD_VALID(fds[i].fd) || fds[i].events == 0) { + fds[i].revents = 0; + continue; + } + + idx = FD_TO_IDX(fds[i].fd); + if ((port = fd_port[idx].port) >= RTE_DIM(ports) || + ports[port].fd != idx) { + + fds[i].revents |= POLLERR; + ret++; + continue; + } + + want_rx = fds[i].events & (POLLIN | POLLRDNORM); + want_tx = fds[i].events & (POLLOUT | POLLWRNORM); + + if (want_rx && rx_sync_if(port) > 0) { + fds[i].revents = (uint16_t) + (fds[i].revents | want_rx); + count_it = 1; + } + if (want_tx && tx_sync_if(port) > 0) { + fds[i].revents = (uint16_t) + (fds[i].revents | want_tx); + count_it = 1; + } + + ret += count_it; + } + } + while ((ret == 0 && timeout < 0) || timeout); + + return ret; +} diff --git a/src/seastar/dpdk/examples/netmap_compat/lib/compat_netmap.h b/src/seastar/dpdk/examples/netmap_compat/lib/compat_netmap.h new file mode 100644 index 00000000..3dc7a2f4 --- /dev/null +++ b/src/seastar/dpdk/examples/netmap_compat/lib/compat_netmap.h @@ -0,0 +1,80 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_COMPAT_NETMAP_H_ + +#include <poll.h> +#include <linux/ioctl.h> +#include <net/if.h> + +#include <rte_ethdev.h> +#include <rte_mempool.h> + +#include "netmap.h" +#include "netmap_user.h" + +/** + * One can overwrite Netmap macros here as needed + */ + +struct rte_netmap_conf { + int32_t socket_id; + uint32_t max_rings; /* number of rings(queues) per netmap_if(port) */ + uint32_t max_slots; /* number of slots(descriptors) per netmap ring. */ + uint16_t max_bufsz; /* size of each netmap buffer. */ +}; + +struct rte_netmap_port_conf { + struct rte_eth_conf *eth_conf; + struct rte_mempool *pool; + int32_t socket_id; + uint16_t nr_tx_rings; + uint16_t nr_rx_rings; + uint32_t nr_tx_slots; + uint32_t nr_rx_slots; + uint16_t tx_burst; + uint16_t rx_burst; +}; + +int rte_netmap_init(const struct rte_netmap_conf *conf); +int rte_netmap_init_port(uint8_t portid, + const struct rte_netmap_port_conf *conf); + +int rte_netmap_close(int fd); +int rte_netmap_ioctl(int fd, uint32_t op, void *param); +int rte_netmap_open(const char *pathname, int flags); +int rte_netmap_poll(struct pollfd *fds, nfds_t nfds, int timeout); +void *rte_netmap_mmap(void *addr, size_t length, int prot, int flags, int fd, + off_t offset); + +#endif /* _RTE_COMPAT_NETMAP_H_ */ diff --git a/src/seastar/dpdk/examples/netmap_compat/netmap/netmap.h b/src/seastar/dpdk/examples/netmap_compat/netmap/netmap.h new file mode 100644 index 00000000..677c8a9f --- /dev/null +++ b/src/seastar/dpdk/examples/netmap_compat/netmap/netmap.h @@ -0,0 +1,289 @@ +/* + * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the + * distribution. + * + * 3. Neither the name of the authors nor the names of their contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY MATTEO LANDI AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MATTEO LANDI OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * $FreeBSD: head/sys/net/netmap.h 231198 2012-02-08 11:43:29Z luigi $ + * $Id: netmap.h 10879 2012-04-12 22:48:59Z luigi $ + * + * Definitions of constants and the structures used by the netmap + * framework, for the part visible to both kernel and userspace. + * Detailed info on netmap is available with "man netmap" or at + * + * http://info.iet.unipi.it/~luigi/netmap/ + */ + +#ifndef _NET_NETMAP_H_ +#define _NET_NETMAP_H_ + +/* + * --- Netmap data structures --- + * + * The data structures used by netmap are shown below. Those in + * capital letters are in an mmapp()ed area shared with userspace, + * while others are private to the kernel. + * Shared structures do not contain pointers but only memory + * offsets, so that addressing is portable between kernel and userspace. + + + softc ++----------------+ +| standard fields| +| if_pspare[0] ----------+ ++----------------+ | + | ++----------------+<------+ +|(netmap_adapter)| +| | netmap_kring +| tx_rings *--------------------------------->+---------------+ +| | netmap_kring | ring *---------. +| rx_rings *--------->+---------------+ | nr_hwcur | | ++----------------+ | ring *--------. | nr_hwavail | V + | nr_hwcur | | | selinfo | | + | nr_hwavail | | +---------------+ . + | selinfo | | | ... | . + +---------------+ | |(ntx+1 entries)| + | .... | | | | + |(nrx+1 entries)| | +---------------+ + | | | + KERNEL +---------------+ | + | + ==================================================================== + | + USERSPACE | NETMAP_RING + +---->+-------------+ + / | cur | + NETMAP_IF (nifp, one per file desc.) / | avail | + +---------------+ / | buf_ofs | + | ni_tx_rings | / +=============+ + | ni_rx_rings | / | buf_idx | slot[0] + | | / | len, flags | + | | / +-------------+ + +===============+ / | buf_idx | slot[1] + | txring_ofs[0] | (rel.to nifp)--' | len, flags | + | txring_ofs[1] | +-------------+ + (num_rings+1 entries) (nr_num_slots entries) + | txring_ofs[n] | | buf_idx | slot[n-1] + +---------------+ | len, flags | + | rxring_ofs[0] | +-------------+ + | rxring_ofs[1] | + (num_rings+1 entries) + | txring_ofs[n] | + +---------------+ + + * The private descriptor ('softc' or 'adapter') of each interface + * is extended with a "struct netmap_adapter" containing netmap-related + * info (see description in dev/netmap/netmap_kernel.h. + * Among other things, tx_rings and rx_rings point to the arrays of + * "struct netmap_kring" which in turn reache the various + * "struct netmap_ring", shared with userspace. + + * The NETMAP_RING is the userspace-visible replica of the NIC ring. + * Each slot has the index of a buffer, its length and some flags. + * In user space, the buffer address is computed as + * (char *)ring + buf_ofs + index*NETMAP_BUF_SIZE + * In the kernel, buffers do not necessarily need to be contiguous, + * and the virtual and physical addresses are derived through + * a lookup table. + * To associate a different buffer to a slot, applications must + * write the new index in buf_idx, and set NS_BUF_CHANGED flag to + * make sure that the kernel updates the hardware ring as needed. + * + * Normally the driver is not requested to report the result of + * transmissions (this can dramatically speed up operation). + * However the user may request to report completion by setting + * NS_REPORT. + */ +struct netmap_slot { + uint32_t buf_idx; /* buffer index */ + uint16_t len; /* packet length, to be copied to/from the hw ring */ + uint16_t flags; /* buf changed, etc. */ +#define NS_BUF_CHANGED 0x0001 /* must resync the map, buffer changed */ +#define NS_REPORT 0x0002 /* ask the hardware to report results + * e.g. by generating an interrupt + */ +}; + +/* + * Netmap representation of a TX or RX ring (also known as "queue"). + * This is a queue implemented as a fixed-size circular array. + * At the software level, two fields are important: avail and cur. + * + * In TX rings: + * avail indicates the number of slots available for transmission. + * It is updated by the kernel after every netmap system call. + * It MUST BE decremented by the application when it appends a + * packet. + * cur indicates the slot to use for the next packet + * to send (i.e. the "tail" of the queue). + * It MUST BE incremented by the application before + * netmap system calls to reflect the number of newly + * sent packets. + * It is checked by the kernel on netmap system calls + * (normally unmodified by the kernel unless invalid). + * + * The kernel side of netmap uses two additional fields in its own + * private ring structure, netmap_kring: + * nr_hwcur is a copy of nr_cur on an NIOCTXSYNC. + * nr_hwavail is the number of slots known as available by the + * hardware. It is updated on an INTR (inc by the + * number of packets sent) and on a NIOCTXSYNC + * (decrease by nr_cur - nr_hwcur) + * A special case, nr_hwavail is -1 if the transmit + * side is idle (no pending transmits). + * + * In RX rings: + * avail is the number of packets available (possibly 0). + * It MUST BE decremented by the application when it consumes + * a packet, and it is updated to nr_hwavail on a NIOCRXSYNC + * cur indicates the first slot that contains a packet not + * processed yet (the "head" of the queue). + * It MUST BE incremented by the software when it consumes + * a packet. + * reserved indicates the number of buffers before 'cur' + * that the application has still in use. Normally 0, + * it MUST BE incremented by the application when it + * does not return the buffer immediately, and decremented + * when the buffer is finally freed. + * + * The kernel side of netmap uses two additional fields in the kring: + * nr_hwcur is a copy of nr_cur on an NIOCRXSYNC + * nr_hwavail is the number of packets available. It is updated + * on INTR (inc by the number of new packets arrived) + * and on NIOCRXSYNC (decreased by nr_cur - nr_hwcur). + * + * DATA OWNERSHIP/LOCKING: + * The netmap_ring is owned by the user program and it is only + * accessed or modified in the upper half of the kernel during + * a system call. + * + * The netmap_kring is only modified by the upper half of the kernel. + */ +struct netmap_ring { + /* + * nr_buf_base_ofs is meant to be used through macros. + * It contains the offset of the buffer region from this + * descriptor. + */ + ssize_t buf_ofs; + uint32_t num_slots; /* number of slots in the ring. */ + uint32_t avail; /* number of usable slots */ + uint32_t cur; /* 'current' r/w position */ + uint32_t reserved; /* not refilled before current */ + + uint16_t nr_buf_size; + uint16_t flags; +#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */ + + struct timeval ts; /* time of last *sync() */ + + /* the slots follow. This struct has variable size */ + struct netmap_slot slot[0]; /* array of slots. */ +}; + + +/* + * Netmap representation of an interface and its queue(s). + * There is one netmap_if for each file descriptor on which we want + * to select/poll. We assume that on each interface has the same number + * of receive and transmit queues. + * select/poll operates on one or all pairs depending on the value of + * nmr_queueid passed on the ioctl. + */ +struct netmap_if { + char ni_name[IFNAMSIZ]; /* name of the interface. */ + u_int ni_version; /* API version, currently unused */ + u_int ni_rx_rings; /* number of rx rings */ + u_int ni_tx_rings; /* if zero, same as ni_rx_rings */ + /* + * The following array contains the offset of each netmap ring + * from this structure. The first ni_tx_queues+1 entries refer + * to the tx rings, the next ni_rx_queues+1 refer to the rx rings + * (the last entry in each block refers to the host stack rings). + * The area is filled up by the kernel on NIOCREG, + * and then only read by userspace code. + */ + ssize_t ring_ofs[0]; +}; + +#ifndef NIOCREGIF +/* + * ioctl names and related fields + * + * NIOCGINFO takes a struct ifreq, the interface name is the input, + * the outputs are number of queues and number of descriptor + * for each queue (useful to set number of threads etc.). + * + * NIOCREGIF takes an interface name within a struct ifreq, + * and activates netmap mode on the interface (if possible). + * + * NIOCUNREGIF unregisters the interface associated to the fd. + * + * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues, + * whose identity is set in NIOCREGIF through nr_ringid + */ + +/* + * struct nmreq overlays a struct ifreq + */ +struct nmreq { + char nr_name[IFNAMSIZ]; + uint32_t nr_version; /* API version */ +#define NETMAP_API 3 /* current version */ + uint32_t nr_offset; /* nifp offset in the shared region */ + uint32_t nr_memsize; /* size of the shared region */ + uint32_t nr_tx_slots; /* slots in tx rings */ + uint32_t nr_rx_slots; /* slots in rx rings */ + uint16_t nr_tx_rings; /* number of tx rings */ + uint16_t nr_rx_rings; /* number of rx rings */ + uint16_t nr_ringid; /* ring(s) we care about */ +#define NETMAP_HW_RING 0x4000 /* low bits indicate one hw ring */ +#define NETMAP_SW_RING 0x2000 /* process the sw ring */ +#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */ +#define NETMAP_RING_MASK 0xfff /* the ring number */ + uint16_t spare1; + uint32_t spare2[4]; +}; + +/* + * FreeBSD uses the size value embedded in the _IOWR to determine + * how much to copy in/out. So we need it to match the actual + * data structure we pass. We put some spares in the structure + * to ease compatibility with other versions + */ +#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */ +#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */ +#define NIOCUNREGIF _IO('i', 147) /* interface unregister */ +#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */ +#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */ +#endif /* !NIOCREGIF */ + +#endif /* _NET_NETMAP_H_ */ diff --git a/src/seastar/dpdk/examples/netmap_compat/netmap/netmap_user.h b/src/seastar/dpdk/examples/netmap_compat/netmap/netmap_user.h new file mode 100644 index 00000000..f369592e --- /dev/null +++ b/src/seastar/dpdk/examples/netmap_compat/netmap/netmap_user.h @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the + * distribution. + * + * 3. Neither the name of the authors nor the names of their contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY MATTEO LANDI AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MATTEO LANDI OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * $FreeBSD: head/sys/net/netmap_user.h 231198 2012-02-08 11:43:29Z luigi $ + * $Id: netmap_user.h 10879 2012-04-12 22:48:59Z luigi $ + * + * This header contains the macros used to manipulate netmap structures + * and packets in userspace. See netmap(4) for more information. + * + * The address of the struct netmap_if, say nifp, is computed from the + * value returned from ioctl(.., NIOCREG, ...) and the mmap region: + * ioctl(fd, NIOCREG, &req); + * mem = mmap(0, ... ); + * nifp = NETMAP_IF(mem, req.nr_nifp); + * (so simple, we could just do it manually) + * + * From there: + * struct netmap_ring *NETMAP_TXRING(nifp, index) + * struct netmap_ring *NETMAP_RXRING(nifp, index) + * we can access ring->nr_cur, ring->nr_avail, ring->nr_flags + * + * ring->slot[i] gives us the i-th slot (we can access + * directly plen, flags, bufindex) + * + * char *buf = NETMAP_BUF(ring, index) returns a pointer to + * the i-th buffer + * + * Since rings are circular, we have macros to compute the next index + * i = NETMAP_RING_NEXT(ring, i); + */ + +#ifndef _NET_NETMAP_USER_H_ +#define _NET_NETMAP_USER_H_ + +#define NETMAP_IF(b, o) (struct netmap_if *)((char *)(b) + (o)) + +#define NETMAP_TXRING(nifp, index) \ + ((struct netmap_ring *)((char *)(nifp) + \ + (nifp)->ring_ofs[index] ) ) + +#define NETMAP_RXRING(nifp, index) \ + ((struct netmap_ring *)((char *)(nifp) + \ + (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) ) + +#define NETMAP_BUF(ring, index) \ + ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) + +#define NETMAP_BUF_IDX(ring, buf) \ + ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ + (ring)->nr_buf_size ) + +#define NETMAP_RING_NEXT(r, i) \ + ((i)+1 == (r)->num_slots ? 0 : (i) + 1 ) + +#define NETMAP_RING_FIRST_RESERVED(r) \ + ( (r)->cur < (r)->reserved ? \ + (r)->cur + (r)->num_slots - (r)->reserved : \ + (r)->cur - (r)->reserved ) + +/* + * Return 1 if the given tx ring is empty. + */ +#define NETMAP_TX_RING_EMPTY(r) ((r)->avail >= (r)->num_slots - 1) + +#endif /* _NET_NETMAP_USER_H_ */ |