summaryrefslogtreecommitdiffstats
path: root/src/seastar/dpdk/examples/netmap_compat
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/seastar/dpdk/examples/netmap_compat
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/seastar/dpdk/examples/netmap_compat')
-rw-r--r--src/seastar/dpdk/examples/netmap_compat/Makefile22
-rw-r--r--src/seastar/dpdk/examples/netmap_compat/bridge/Makefile35
-rw-r--r--src/seastar/dpdk/examples/netmap_compat/bridge/bridge.c343
-rw-r--r--src/seastar/dpdk/examples/netmap_compat/lib/compat_netmap.c899
-rw-r--r--src/seastar/dpdk/examples/netmap_compat/lib/compat_netmap.h51
-rw-r--r--src/seastar/dpdk/examples/netmap_compat/meson.build10
-rw-r--r--src/seastar/dpdk/examples/netmap_compat/netmap/netmap.h289
-rw-r--r--src/seastar/dpdk/examples/netmap_compat/netmap/netmap_user.h95
8 files changed, 1744 insertions, 0 deletions
diff --git a/src/seastar/dpdk/examples/netmap_compat/Makefile b/src/seastar/dpdk/examples/netmap_compat/Makefile
new file mode 100644
index 000000000..b9f78d173
--- /dev/null
+++ b/src/seastar/dpdk/examples/netmap_compat/Makefile
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2010-2014 Intel Corporation
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, detect a build directory, by looking for a path with a .config
+RTE_TARGET ?= $(notdir $(abspath $(dir $(firstword $(wildcard $(RTE_SDK)/*/.config)))))
+
+include $(RTE_SDK)/mk/rte.vars.mk
+unexport RTE_SRCDIR RTE_OUTPUT RTE_EXTMK
+
+DIRS-y += bridge
+
+.PHONY: all clean $(DIRS-y)
+
+all: $(DIRS-y)
+clean: $(DIRS-y)
+
+$(DIRS-y):
+ $(MAKE) -C $@ $(MAKECMDGOALS) O=$(RTE_OUTPUT)
diff --git a/src/seastar/dpdk/examples/netmap_compat/bridge/Makefile b/src/seastar/dpdk/examples/netmap_compat/bridge/Makefile
new file mode 100644
index 000000000..7ed30e57b
--- /dev/null
+++ b/src/seastar/dpdk/examples/netmap_compat/bridge/Makefile
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2010-2014 Intel Corporation
+
+ifeq ($(RTE_SDK),)
+$(error "Please define the RTE_SDK environment variable")
+endif
+
+# Default target, detect a build directory, by looking for a path with a .config
+RTE_TARGET ?= $(notdir $(abspath $(dir $(firstword $(wildcard $(RTE_SDK)/*/.config)))))
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV_LINUX),y)
+$(info This application can only operate in a linux environment, \
+please change the definition of the RTE_TARGET environment variable)
+all:
+clean:
+else
+
+# binary name
+APP = bridge
+
+# for compat_netmap.c
+VPATH := $(SRCDIR)/../lib
+
+# all source are stored in SRCS-y
+SRCS-y := bridge.c
+SRCS-y += compat_netmap.c
+
+CFLAGS += -O3 -I$(SRCDIR)/../lib -I$(SRCDIR)/../netmap
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+
+endif
diff --git a/src/seastar/dpdk/examples/netmap_compat/bridge/bridge.c b/src/seastar/dpdk/examples/netmap_compat/bridge/bridge.c
new file mode 100644
index 000000000..d40e163b0
--- /dev/null
+++ b/src/seastar/dpdk/examples/netmap_compat/bridge/bridge.c
@@ -0,0 +1,343 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_mbuf.h>
+#include <rte_mempool.h>
+#include <rte_string_fns.h>
+#include "compat_netmap.h"
+
+
+#define BUF_SIZE RTE_MBUF_DEFAULT_DATAROOM
+#define MBUF_DATA_SIZE (BUF_SIZE + RTE_PKTMBUF_HEADROOM)
+
+#define MBUF_PER_POOL 8192
+
+struct rte_eth_conf eth_conf = {
+ .rxmode = {
+ .split_hdr_size = 0,
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+#define MAX_QUEUE_NUM 1
+#define RX_QUEUE_NUM 1
+#define TX_QUEUE_NUM 1
+
+#define MAX_DESC_NUM 0x400
+#define RX_DESC_NUM 0x100
+#define TX_DESC_NUM 0x200
+
+#define RX_SYNC_NUM 0x20
+#define TX_SYNC_NUM 0x20
+
+struct rte_netmap_port_conf port_conf = {
+ .eth_conf = &eth_conf,
+ .socket_id = SOCKET_ID_ANY,
+ .nr_tx_rings = TX_QUEUE_NUM,
+ .nr_rx_rings = RX_QUEUE_NUM,
+ .nr_tx_slots = TX_DESC_NUM,
+ .nr_rx_slots = RX_DESC_NUM,
+ .tx_burst = TX_SYNC_NUM,
+ .rx_burst = RX_SYNC_NUM,
+};
+
+struct rte_netmap_conf netmap_conf = {
+ .socket_id = SOCKET_ID_ANY,
+ .max_bufsz = BUF_SIZE,
+ .max_rings = MAX_QUEUE_NUM,
+ .max_slots = MAX_DESC_NUM,
+};
+
+static int stop = 0;
+
+#define MAX_PORT_NUM 2
+
+struct netmap_port {
+ int fd;
+ struct netmap_if *nmif;
+ struct netmap_ring *rx_ring;
+ struct netmap_ring *tx_ring;
+ const char *str;
+ uint8_t id;
+};
+
+static struct {
+ uint32_t num;
+ struct netmap_port p[MAX_PORT_NUM];
+ void *mem;
+} ports;
+
+static void
+usage(const char *prgname)
+{
+ fprintf(stderr, "Usage: %s [EAL args] -- [OPTION]...\n"
+ "-h, --help \t Show this help message and exit\n"
+ "-i INTERFACE_A \t Interface (DPDK port number) to use\n"
+ "[ -i INTERFACE_B \t Interface (DPDK port number) to use ]\n",
+ prgname);
+}
+
+static uint8_t
+parse_portid(const char *portid_str)
+{
+ char *end;
+ unsigned id;
+
+ id = strtoul(portid_str, &end, 10);
+
+ if (end == portid_str || *end != '\0' || id > RTE_MAX_ETHPORTS)
+ rte_exit(EXIT_FAILURE, "Invalid port number\n");
+
+ return (uint8_t) id;
+}
+
+static int
+parse_args(int argc, char **argv)
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "hi:")) != -1) {
+ switch (opt) {
+ case 'h':
+ usage(argv[0]);
+ rte_exit(EXIT_SUCCESS, "exiting...");
+ break;
+ case 'i':
+ if (ports.num >= RTE_DIM(ports.p)) {
+ usage(argv[0]);
+ rte_exit(EXIT_FAILURE, "configs with %u "
+ "ports are not supported\n",
+ ports.num + 1);
+
+ }
+
+ ports.p[ports.num].str = optarg;
+ ports.p[ports.num].id = parse_portid(optarg);
+ ports.num++;
+ break;
+ default:
+ usage(argv[0]);
+ rte_exit(EXIT_FAILURE, "invalid option: %c\n", opt);
+ }
+ }
+
+ return 0;
+}
+
+static void sigint_handler(__rte_unused int sig)
+{
+ stop = 1;
+ signal(SIGINT, SIG_DFL);
+}
+
+static void move(int n, struct netmap_ring *rx, struct netmap_ring *tx)
+{
+ uint32_t tmp;
+
+ while (n-- > 0) {
+ tmp = tx->slot[tx->cur].buf_idx;
+
+ tx->slot[tx->cur].buf_idx = rx->slot[rx->cur].buf_idx;
+ tx->slot[tx->cur].len = rx->slot[rx->cur].len;
+ tx->slot[tx->cur].flags |= NS_BUF_CHANGED;
+ tx->cur = NETMAP_RING_NEXT(tx, tx->cur);
+ tx->avail--;
+
+ rx->slot[rx->cur].buf_idx = tmp;
+ rx->slot[rx->cur].flags |= NS_BUF_CHANGED;
+ rx->cur = NETMAP_RING_NEXT(rx, rx->cur);
+ rx->avail--;
+ }
+}
+
+static int
+netmap_port_open(uint32_t idx)
+{
+ int err;
+ struct netmap_port *port;
+ struct nmreq req;
+
+ port = ports.p + idx;
+
+ port->fd = rte_netmap_open("/dev/netmap", O_RDWR);
+
+ strlcpy(req.nr_name, port->str, sizeof(req.nr_name));
+ req.nr_version = NETMAP_API;
+ req.nr_ringid = 0;
+
+ err = rte_netmap_ioctl(port->fd, NIOCGINFO, &req);
+ if (err) {
+ printf("[E] NIOCGINFO ioctl failed (error %d)\n", err);
+ return err;
+ }
+
+ strlcpy(req.nr_name, port->str, sizeof(req.nr_name));
+ req.nr_version = NETMAP_API;
+ req.nr_ringid = 0;
+
+ err = rte_netmap_ioctl(port->fd, NIOCREGIF, &req);
+ if (err) {
+ printf("[E] NIOCREGIF ioctl failed (error %d)\n", err);
+ return err;
+ }
+
+ /* mmap only once. */
+ if (ports.mem == NULL)
+ ports.mem = rte_netmap_mmap(NULL, req.nr_memsize,
+ PROT_WRITE | PROT_READ, MAP_PRIVATE, port->fd, 0);
+
+ if (ports.mem == MAP_FAILED) {
+ printf("[E] NETMAP mmap failed for fd: %d)\n", port->fd);
+ return -ENOMEM;
+ }
+
+ port->nmif = NETMAP_IF(ports.mem, req.nr_offset);
+
+ port->tx_ring = NETMAP_TXRING(port->nmif, 0);
+ port->rx_ring = NETMAP_RXRING(port->nmif, 0);
+
+ return 0;
+}
+
+
+int main(int argc, char *argv[])
+{
+ int err, ret;
+ uint32_t i, pmsk;
+ struct nmreq req;
+ struct pollfd pollfd[MAX_PORT_NUM];
+ struct rte_mempool *pool;
+ struct netmap_ring *rx_ring, *tx_ring;
+
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot initialize EAL\n");
+
+ argc -= ret;
+ argv += ret;
+
+ parse_args(argc, argv);
+
+ if (ports.num == 0)
+ rte_exit(EXIT_FAILURE, "no ports specified\n");
+
+ if (rte_eth_dev_count_avail() < 1)
+ rte_exit(EXIT_FAILURE, "Not enough ethernet ports available\n");
+
+ pool = rte_pktmbuf_pool_create("mbuf_pool", MBUF_PER_POOL, 32, 0,
+ MBUF_DATA_SIZE, rte_socket_id());
+ if (pool == NULL)
+ rte_exit(EXIT_FAILURE, "Couldn't create mempool\n");
+
+ netmap_conf.socket_id = rte_socket_id();
+ err = rte_netmap_init(&netmap_conf);
+
+ if (err < 0)
+ rte_exit(EXIT_FAILURE,
+ "Couldn't initialize librte_compat_netmap\n");
+ else
+ printf("librte_compat_netmap initialized\n");
+
+ port_conf.pool = pool;
+ port_conf.socket_id = rte_socket_id();
+
+ for (i = 0; i != ports.num; i++) {
+
+ err = rte_netmap_init_port(ports.p[i].id, &port_conf);
+ if (err < 0)
+ rte_exit(EXIT_FAILURE, "Couldn't setup port %hhu\n",
+ ports.p[i].id);
+
+ rte_eth_promiscuous_enable(ports.p[i].id);
+ }
+
+ for (i = 0; i != ports.num; i++) {
+
+ err = netmap_port_open(i);
+ if (err) {
+ rte_exit(EXIT_FAILURE, "Couldn't set port %hhu "
+ "under NETMAP control\n",
+ ports.p[i].id);
+ }
+ else
+ printf("Port %hhu now in Netmap mode\n", ports.p[i].id);
+ }
+
+ memset(pollfd, 0, sizeof(pollfd));
+
+ for (i = 0; i != ports.num; i++) {
+ pollfd[i].fd = ports.p[i].fd;
+ pollfd[i].events = POLLIN | POLLOUT;
+ }
+
+ signal(SIGINT, sigint_handler);
+
+ pmsk = ports.num - 1;
+
+ printf("Bridge up and running!\n");
+
+ while (!stop) {
+ uint32_t n_pkts;
+
+ pollfd[0].revents = 0;
+ pollfd[1].revents = 0;
+
+ ret = rte_netmap_poll(pollfd, ports.num, 0);
+ if (ret < 0) {
+ stop = 1;
+ printf("[E] poll returned with error %d\n", ret);
+ }
+
+ if (((pollfd[0].revents | pollfd[1].revents) & POLLERR) != 0) {
+ printf("POLLERR!\n");
+ }
+
+ if ((pollfd[0].revents & POLLIN) != 0 &&
+ (pollfd[pmsk].revents & POLLOUT) != 0) {
+
+ rx_ring = ports.p[0].rx_ring;
+ tx_ring = ports.p[pmsk].tx_ring;
+
+ n_pkts = RTE_MIN(rx_ring->avail, tx_ring->avail);
+ move(n_pkts, rx_ring, tx_ring);
+ }
+
+ if (pmsk != 0 && (pollfd[pmsk].revents & POLLIN) != 0 &&
+ (pollfd[0].revents & POLLOUT) != 0) {
+
+ rx_ring = ports.p[pmsk].rx_ring;
+ tx_ring = ports.p[0].tx_ring;
+
+ n_pkts = RTE_MIN(rx_ring->avail, tx_ring->avail);
+ move(n_pkts, rx_ring, tx_ring);
+ }
+ }
+
+ printf("Bridge stopped!\n");
+
+ for (i = 0; i != ports.num; i++) {
+ err = rte_netmap_ioctl(ports.p[i].fd, NIOCUNREGIF, &req);
+ if (err) {
+ printf("[E] NIOCUNREGIF ioctl failed (error %d)\n",
+ err);
+ }
+ else
+ printf("Port %hhu unregistered from Netmap mode\n", ports.p[i].id);
+
+ rte_netmap_close(ports.p[i].fd);
+ }
+ return 0;
+}
diff --git a/src/seastar/dpdk/examples/netmap_compat/lib/compat_netmap.c b/src/seastar/dpdk/examples/netmap_compat/lib/compat_netmap.c
new file mode 100644
index 000000000..10a437943
--- /dev/null
+++ b/src/seastar/dpdk/examples/netmap_compat/lib/compat_netmap.c
@@ -0,0 +1,899 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <net/if.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#include <sys/mman.h>
+
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_spinlock.h>
+#include <rte_string_fns.h>
+
+#include "compat_netmap.h"
+
+struct netmap_port {
+ struct rte_mempool *pool;
+ struct netmap_if *nmif;
+ struct rte_eth_conf eth_conf;
+ struct rte_eth_txconf tx_conf;
+ struct rte_eth_rxconf rx_conf;
+ int32_t socket_id;
+ uint16_t nr_tx_rings;
+ uint16_t nr_rx_rings;
+ uint32_t nr_tx_slots;
+ uint32_t nr_rx_slots;
+ uint16_t tx_burst;
+ uint16_t rx_burst;
+ uint32_t fd;
+};
+
+struct fd_port {
+ uint32_t port;
+};
+
+#ifndef POLLRDNORM
+#define POLLRDNORM 0x0040
+#endif
+
+#ifndef POLLWRNORM
+#define POLLWRNORM 0x0100
+#endif
+
+#define FD_PORT_FREE UINT32_MAX
+#define FD_PORT_RSRV (FD_PORT_FREE - 1)
+
+struct netmap_state {
+ struct rte_netmap_conf conf;
+ uintptr_t buf_start;
+ void *mem;
+ uint32_t mem_sz;
+ uint32_t netif_memsz;
+};
+
+
+#define COMPAT_NETMAP_MAX_NOFILE (2 * RTE_MAX_ETHPORTS)
+#define COMPAT_NETMAP_MAX_BURST 64
+#define COMPAT_NETMAP_MAX_PKT_PER_SYNC (2 * COMPAT_NETMAP_MAX_BURST)
+
+static struct netmap_port ports[RTE_MAX_ETHPORTS];
+static struct netmap_state netmap;
+
+static struct fd_port fd_port[COMPAT_NETMAP_MAX_NOFILE];
+static const int next_fd_start = RLIMIT_NOFILE + 1;
+static rte_spinlock_t netmap_lock;
+
+#define IDX_TO_FD(x) ((x) + next_fd_start)
+#define FD_TO_IDX(x) ((x) - next_fd_start)
+#define FD_VALID(x) ((x) >= next_fd_start && \
+ (x) < (typeof (x))(RTE_DIM(fd_port) + next_fd_start))
+
+#define PORT_NUM_RINGS (2 * netmap.conf.max_rings)
+#define PORT_NUM_SLOTS (PORT_NUM_RINGS * netmap.conf.max_slots)
+
+#define BUF_IDX(port, ring, slot) \
+ (((port) * PORT_NUM_RINGS + (ring)) * netmap.conf.max_slots + \
+ (slot))
+
+#define NETMAP_IF_RING_OFS(rid, rings, slots) ({\
+ struct netmap_if *_if; \
+ struct netmap_ring *_rg; \
+ sizeof(*_if) + \
+ (rings) * sizeof(_if->ring_ofs[0]) + \
+ (rid) * sizeof(*_rg) + \
+ (slots) * sizeof(_rg->slot[0]); \
+ })
+
+static void netmap_unregif(uint32_t idx, uint32_t port);
+
+
+static int32_t
+ifname_to_portid(const char *ifname, uint16_t *port)
+{
+ char *endptr;
+ uint64_t portid;
+
+ errno = 0;
+ portid = strtoul(ifname, &endptr, 10);
+ if (endptr == ifname || *endptr != '\0' ||
+ portid >= RTE_DIM(ports) || errno != 0)
+ return -EINVAL;
+
+ *port = portid;
+ return 0;
+}
+
+/**
+ * Given a dpdk mbuf, fill in the Netmap slot in ring r and its associated
+ * buffer with the data held by the mbuf.
+ * Note that mbuf chains are not supported.
+ */
+static void
+mbuf_to_slot(struct rte_mbuf *mbuf, struct netmap_ring *r, uint32_t index)
+{
+ char *data;
+ uint16_t length;
+
+ data = rte_pktmbuf_mtod(mbuf, char *);
+ length = rte_pktmbuf_data_len(mbuf);
+
+ if (length > r->nr_buf_size)
+ length = 0;
+
+ r->slot[index].len = length;
+ rte_memcpy(NETMAP_BUF(r, r->slot[index].buf_idx), data, length);
+}
+
+/**
+ * Given a Netmap ring and a slot index for that ring, construct a dpdk mbuf
+ * from the data held in the buffer associated with the slot.
+ * Allocation/deallocation of the dpdk mbuf are the responsibility of the
+ * caller.
+ * Note that mbuf chains are not supported.
+ */
+static void
+slot_to_mbuf(struct netmap_ring *r, uint32_t index, struct rte_mbuf *mbuf)
+{
+ char *data;
+ uint16_t length;
+
+ rte_pktmbuf_reset(mbuf);
+ length = r->slot[index].len;
+ data = rte_pktmbuf_append(mbuf, length);
+
+ if (data != NULL)
+ rte_memcpy(data, NETMAP_BUF(r, r->slot[index].buf_idx), length);
+}
+
+static int32_t
+fd_reserve(void)
+{
+ uint32_t i;
+
+ for (i = 0; i != RTE_DIM(fd_port) && fd_port[i].port != FD_PORT_FREE;
+ i++)
+ ;
+
+ if (i == RTE_DIM(fd_port))
+ return -ENOMEM;
+
+ fd_port[i].port = FD_PORT_RSRV;
+ return IDX_TO_FD(i);
+}
+
+static int32_t
+fd_release(int32_t fd)
+{
+ uint32_t idx, port;
+
+ idx = FD_TO_IDX(fd);
+
+ if (!FD_VALID(fd) || (port = fd_port[idx].port) == FD_PORT_FREE)
+ return -EINVAL;
+
+ /* if we still have a valid port attached, release the port */
+ if (port < RTE_DIM(ports) && ports[port].fd == idx) {
+ netmap_unregif(idx, port);
+ }
+
+ fd_port[idx].port = FD_PORT_FREE;
+ return 0;
+}
+
+static int
+check_nmreq(struct nmreq *req, uint16_t *port)
+{
+ int32_t rc;
+ uint16_t portid;
+
+ if (req == NULL)
+ return -EINVAL;
+
+ if (req->nr_version != NETMAP_API) {
+ req->nr_version = NETMAP_API;
+ return -EINVAL;
+ }
+
+ if ((rc = ifname_to_portid(req->nr_name, &portid)) != 0) {
+ RTE_LOG(ERR, USER1, "Invalid interface name:\"%s\" "
+ "in NIOCGINFO call\n", req->nr_name);
+ return rc;
+ }
+
+ if (ports[portid].pool == NULL) {
+ RTE_LOG(ERR, USER1, "Misconfigured portid %u\n", portid);
+ return -EINVAL;
+ }
+
+ *port = portid;
+ return 0;
+}
+
+/**
+ * Simulate a Netmap NIOCGINFO ioctl: given a struct nmreq holding an interface
+ * name (a port number in our case), fill the struct nmreq in with advisory
+ * information about the interface: number of rings and their size, total memory
+ * required in the map, ...
+ * Those are preconfigured using rte_eth_{,tx,rx}conf and
+ * rte_netmap_port_conf structures
+ * and calls to rte_netmap_init_port() in the Netmap application.
+ */
+static int
+ioctl_niocginfo(__rte_unused int fd, void * param)
+{
+ uint16_t portid;
+ struct nmreq *req;
+ int32_t rc;
+
+ req = (struct nmreq *)param;
+ if ((rc = check_nmreq(req, &portid)) != 0)
+ return rc;
+
+ req->nr_tx_rings = (uint16_t)(ports[portid].nr_tx_rings - 1);
+ req->nr_rx_rings = (uint16_t)(ports[portid].nr_rx_rings - 1);
+ req->nr_tx_slots = ports[portid].nr_tx_slots;
+ req->nr_rx_slots = ports[portid].nr_rx_slots;
+
+ /* in current implementation we have all NETIFs shared aone region. */
+ req->nr_memsize = netmap.mem_sz;
+ req->nr_offset = 0;
+
+ return 0;
+}
+
+static void
+netmap_ring_setup(struct netmap_ring *ring, uint16_t port, uint32_t ringid,
+ uint32_t num_slots)
+{
+ uint32_t j;
+
+ ring->buf_ofs = netmap.buf_start - (uintptr_t)ring;
+ ring->num_slots = num_slots;
+ ring->cur = 0;
+ ring->reserved = 0;
+ ring->nr_buf_size = netmap.conf.max_bufsz;
+ ring->flags = 0;
+ ring->ts.tv_sec = 0;
+ ring->ts.tv_usec = 0;
+
+ for (j = 0; j < ring->num_slots; j++) {
+ ring->slot[j].buf_idx = BUF_IDX(port, ringid, j);
+ ring->slot[j].len = 0;
+ ring->flags = 0;
+ }
+}
+
+static int
+netmap_regif(struct nmreq *req, uint32_t idx, uint16_t port)
+{
+ struct netmap_if *nmif;
+ struct netmap_ring *ring;
+ uint32_t i, slots, start_ring;
+ int32_t rc;
+
+ if (ports[port].fd < RTE_DIM(fd_port)) {
+ RTE_LOG(ERR, USER1, "port %u already in use by fd: %u\n",
+ port, IDX_TO_FD(ports[port].fd));
+ return -EBUSY;
+ }
+ if (fd_port[idx].port != FD_PORT_RSRV) {
+ RTE_LOG(ERR, USER1, "fd: %u is misconfigured\n",
+ IDX_TO_FD(idx));
+ return -EBUSY;
+ }
+
+ nmif = ports[port].nmif;
+
+ /* setup netmap_if fields. */
+ memset(nmif, 0, netmap.netif_memsz);
+
+ /* only ALL rings supported right now. */
+ if (req->nr_ringid != 0)
+ return -EINVAL;
+
+ strlcpy(nmif->ni_name, req->nr_name, sizeof(nmif->ni_name));
+ nmif->ni_version = req->nr_version;
+
+ /* Netmap uses ni_(r|t)x_rings + 1 */
+ nmif->ni_rx_rings = ports[port].nr_rx_rings - 1;
+ nmif->ni_tx_rings = ports[port].nr_tx_rings - 1;
+
+ /*
+ * Setup TX rings and slots.
+ * Refer to the comments in netmap.h for details
+ */
+
+ slots = 0;
+ for (i = 0; i < nmif->ni_tx_rings + 1; i++) {
+
+ nmif->ring_ofs[i] = NETMAP_IF_RING_OFS(i,
+ PORT_NUM_RINGS, slots);
+
+ ring = NETMAP_TXRING(nmif, i);
+ netmap_ring_setup(ring, port, i, ports[port].nr_tx_slots);
+ ring->avail = ring->num_slots;
+
+ slots += ports[port].nr_tx_slots;
+ }
+
+ /*
+ * Setup RX rings and slots.
+ * Refer to the comments in netmap.h for details
+ */
+
+ start_ring = i;
+
+ for (; i < nmif->ni_rx_rings + 1 + start_ring; i++) {
+
+ nmif->ring_ofs[i] = NETMAP_IF_RING_OFS(i,
+ PORT_NUM_RINGS, slots);
+
+ ring = NETMAP_RXRING(nmif, (i - start_ring));
+ netmap_ring_setup(ring, port, i, ports[port].nr_rx_slots);
+ ring->avail = 0;
+
+ slots += ports[port].nr_rx_slots;
+ }
+
+ if ((rc = rte_eth_dev_start(port)) < 0) {
+ RTE_LOG(ERR, USER1,
+ "Couldn't start ethernet device %s (error %d)\n",
+ req->nr_name, rc);
+ return rc;
+ }
+
+ /* setup fdi <--> port relationtip. */
+ ports[port].fd = idx;
+ fd_port[idx].port = port;
+
+ req->nr_memsize = netmap.mem_sz;
+ req->nr_offset = (uintptr_t)nmif - (uintptr_t)netmap.mem;
+
+ return 0;
+}
+
+/**
+ * Simulate a Netmap NIOCREGIF ioctl:
+ */
+static int
+ioctl_niocregif(int32_t fd, void * param)
+{
+ uint16_t portid;
+ int32_t rc;
+ uint32_t idx;
+ struct nmreq *req;
+
+ req = (struct nmreq *)param;
+ if ((rc = check_nmreq(req, &portid)) != 0)
+ return rc;
+
+ idx = FD_TO_IDX(fd);
+
+ rte_spinlock_lock(&netmap_lock);
+ rc = netmap_regif(req, idx, portid);
+ rte_spinlock_unlock(&netmap_lock);
+
+ return rc;
+}
+
+static void
+netmap_unregif(uint32_t idx, uint32_t port)
+{
+ fd_port[idx].port = FD_PORT_RSRV;
+ ports[port].fd = UINT32_MAX;
+ rte_eth_dev_stop(port);
+}
+
+/**
+ * Simulate a Netmap NIOCUNREGIF ioctl: put an interface running in Netmap
+ * mode back in "normal" mode. In our case, we just stop the port associated
+ * with this file descriptor.
+ */
+static int
+ioctl_niocunregif(int fd)
+{
+ uint32_t idx, port;
+ int32_t rc;
+
+ idx = FD_TO_IDX(fd);
+
+ rte_spinlock_lock(&netmap_lock);
+
+ port = fd_port[idx].port;
+ if (port < RTE_DIM(ports) && ports[port].fd == idx) {
+ netmap_unregif(idx, port);
+ rc = 0;
+ } else {
+ RTE_LOG(ERR, USER1,
+ "%s: %d is not associated with valid port\n",
+ __func__, fd);
+ rc = -EINVAL;
+ }
+
+ rte_spinlock_unlock(&netmap_lock);
+ return rc;
+}
+
+/**
+ * A call to rx_sync_ring will try to fill a Netmap RX ring with as many
+ * packets as it can hold coming from its dpdk port.
+ */
+static inline int
+rx_sync_ring(struct netmap_ring *ring, uint16_t port, uint16_t ring_number,
+ uint16_t max_burst)
+{
+ int32_t i, n_rx;
+ uint16_t burst_size;
+ uint32_t cur_slot, n_free_slots;
+ struct rte_mbuf *rx_mbufs[COMPAT_NETMAP_MAX_BURST];
+
+ n_free_slots = ring->num_slots - (ring->avail + ring->reserved);
+ n_free_slots = RTE_MIN(n_free_slots, max_burst);
+ cur_slot = (ring->cur + ring->avail) & (ring->num_slots - 1);
+
+ while (n_free_slots) {
+ burst_size = (uint16_t)RTE_MIN(n_free_slots, RTE_DIM(rx_mbufs));
+
+ /* receive up to burst_size packets from the NIC's queue */
+ n_rx = rte_eth_rx_burst(port, ring_number, rx_mbufs,
+ burst_size);
+
+ if (n_rx == 0)
+ return 0;
+ if (unlikely(n_rx < 0))
+ return -1;
+
+ /* Put those n_rx packets in the Netmap structures */
+ for (i = 0; i < n_rx ; i++) {
+ mbuf_to_slot(rx_mbufs[i], ring, cur_slot);
+ rte_pktmbuf_free(rx_mbufs[i]);
+ cur_slot = NETMAP_RING_NEXT(ring, cur_slot);
+ }
+
+ /* Update the Netmap ring structure to reflect the change */
+ ring->avail += n_rx;
+ n_free_slots -= n_rx;
+ }
+
+ return 0;
+}
+
+static inline int
+rx_sync_if(uint32_t port)
+{
+ uint16_t burst;
+ uint32_t i, rc;
+ struct netmap_if *nifp;
+ struct netmap_ring *r;
+
+ nifp = ports[port].nmif;
+ burst = ports[port].rx_burst;
+ rc = 0;
+
+ for (i = 0; i < nifp->ni_rx_rings + 1; i++) {
+ r = NETMAP_RXRING(nifp, i);
+ rx_sync_ring(r, port, (uint16_t)i, burst);
+ rc += r->avail;
+ }
+
+ return rc;
+}
+
+/**
+ * Simulate a Netmap NIOCRXSYNC ioctl:
+ */
+static int
+ioctl_niocrxsync(int fd)
+{
+ uint32_t idx, port;
+
+ idx = FD_TO_IDX(fd);
+ if ((port = fd_port[idx].port) < RTE_DIM(ports) &&
+ ports[port].fd == idx) {
+ return rx_sync_if(fd_port[idx].port);
+ } else {
+ return -EINVAL;
+ }
+}
+
+/**
+ * A call to tx_sync_ring will try to empty a Netmap TX ring by converting its
+ * buffers into rte_mbufs and sending them out on the rings's dpdk port.
+ */
+static int
+tx_sync_ring(struct netmap_ring *ring, uint16_t port, uint16_t ring_number,
+ struct rte_mempool *pool, uint16_t max_burst)
+{
+ uint32_t i, n_tx;
+ uint16_t burst_size;
+ uint32_t cur_slot, n_used_slots;
+ struct rte_mbuf *tx_mbufs[COMPAT_NETMAP_MAX_BURST];
+
+ n_used_slots = ring->num_slots - ring->avail;
+ n_used_slots = RTE_MIN(n_used_slots, max_burst);
+ cur_slot = (ring->cur + ring->avail) & (ring->num_slots - 1);
+
+ while (n_used_slots) {
+ burst_size = (uint16_t)RTE_MIN(n_used_slots, RTE_DIM(tx_mbufs));
+
+ for (i = 0; i < burst_size; i++) {
+ tx_mbufs[i] = rte_pktmbuf_alloc(pool);
+ if (tx_mbufs[i] == NULL)
+ goto err;
+
+ slot_to_mbuf(ring, cur_slot, tx_mbufs[i]);
+ cur_slot = NETMAP_RING_NEXT(ring, cur_slot);
+ }
+
+ n_tx = rte_eth_tx_burst(port, ring_number, tx_mbufs,
+ burst_size);
+
+ /* Update the Netmap ring structure to reflect the change */
+ ring->avail += n_tx;
+ n_used_slots -= n_tx;
+
+ /* Return the mbufs that failed to transmit to their pool */
+ if (unlikely(n_tx != burst_size)) {
+ for (i = n_tx; i < burst_size; i++)
+ rte_pktmbuf_free(tx_mbufs[i]);
+ break;
+ }
+ }
+
+ return 0;
+
+err:
+ for (; i == 0; --i)
+ rte_pktmbuf_free(tx_mbufs[i]);
+
+ RTE_LOG(ERR, USER1,
+ "Couldn't get mbuf from mempool is the mempool too small?\n");
+ return -1;
+}
+
+static int
+tx_sync_if(uint32_t port)
+{
+ uint16_t burst;
+ uint32_t i, rc;
+ struct netmap_if *nifp;
+ struct netmap_ring *r;
+ struct rte_mempool *mp;
+
+ nifp = ports[port].nmif;
+ mp = ports[port].pool;
+ burst = ports[port].tx_burst;
+ rc = 0;
+
+ for (i = 0; i < nifp->ni_tx_rings + 1; i++) {
+ r = NETMAP_TXRING(nifp, i);
+ tx_sync_ring(r, port, (uint16_t)i, mp, burst);
+ rc += r->avail;
+ }
+
+ return rc;
+}
+
+/**
+ * Simulate a Netmap NIOCTXSYNC ioctl:
+ */
+static inline int
+ioctl_nioctxsync(int fd)
+{
+ uint32_t idx, port;
+
+ idx = FD_TO_IDX(fd);
+ if ((port = fd_port[idx].port) < RTE_DIM(ports) &&
+ ports[port].fd == idx) {
+ return tx_sync_if(fd_port[idx].port);
+ } else {
+ return -EINVAL;
+ }
+}
+
+/**
+ * Give the library a mempool of rte_mbufs with which it can do the
+ * rte_mbuf <--> netmap slot conversions.
+ */
+int
+rte_netmap_init(const struct rte_netmap_conf *conf)
+{
+ size_t buf_ofs, nmif_sz, sz;
+ size_t port_rings, port_slots, port_bufs;
+ uint32_t i, port_num;
+
+ port_num = RTE_MAX_ETHPORTS;
+ port_rings = 2 * conf->max_rings;
+ port_slots = port_rings * conf->max_slots;
+ port_bufs = port_slots;
+
+ nmif_sz = NETMAP_IF_RING_OFS(port_rings, port_rings, port_slots);
+ sz = nmif_sz * port_num;
+
+ buf_ofs = RTE_ALIGN_CEIL(sz, RTE_CACHE_LINE_SIZE);
+ sz = buf_ofs + port_bufs * conf->max_bufsz * port_num;
+
+ if (sz > UINT32_MAX ||
+ (netmap.mem = rte_zmalloc_socket(__func__, sz,
+ RTE_CACHE_LINE_SIZE, conf->socket_id)) == NULL) {
+ RTE_LOG(ERR, USER1, "%s: failed to allocate %zu bytes\n",
+ __func__, sz);
+ return -ENOMEM;
+ }
+
+ netmap.mem_sz = sz;
+ netmap.netif_memsz = nmif_sz;
+ netmap.buf_start = (uintptr_t)netmap.mem + buf_ofs;
+ netmap.conf = *conf;
+
+ rte_spinlock_init(&netmap_lock);
+
+ /* Mark all ports as unused and set NETIF pointer. */
+ for (i = 0; i != RTE_DIM(ports); i++) {
+ ports[i].fd = UINT32_MAX;
+ ports[i].nmif = (struct netmap_if *)
+ ((uintptr_t)netmap.mem + nmif_sz * i);
+ }
+
+ /* Mark all fd_ports as unused. */
+ for (i = 0; i != RTE_DIM(fd_port); i++) {
+ fd_port[i].port = FD_PORT_FREE;
+ }
+
+ return 0;
+}
+
+
+int
+rte_netmap_init_port(uint16_t portid, const struct rte_netmap_port_conf *conf)
+{
+ int32_t ret;
+ uint16_t i;
+ uint16_t rx_slots, tx_slots;
+ struct rte_eth_rxconf rxq_conf;
+ struct rte_eth_txconf txq_conf;
+ struct rte_eth_dev_info dev_info;
+
+ if (conf == NULL ||
+ portid >= RTE_DIM(ports) ||
+ conf->nr_tx_rings > netmap.conf.max_rings ||
+ conf->nr_rx_rings > netmap.conf.max_rings) {
+ RTE_LOG(ERR, USER1, "%s(%u): invalid parameters\n",
+ __func__, portid);
+ return -EINVAL;
+ }
+
+ rx_slots = (uint16_t)rte_align32pow2(conf->nr_rx_slots);
+ tx_slots = (uint16_t)rte_align32pow2(conf->nr_tx_slots);
+
+ if (tx_slots > netmap.conf.max_slots ||
+ rx_slots > netmap.conf.max_slots) {
+ RTE_LOG(ERR, USER1, "%s(%u): invalid parameters\n",
+ __func__, portid);
+ return -EINVAL;
+ }
+
+ rte_eth_dev_info_get(portid, &dev_info);
+ if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
+ conf->eth_conf->txmode.offloads |=
+ DEV_TX_OFFLOAD_MBUF_FAST_FREE;
+ ret = rte_eth_dev_configure(portid, conf->nr_rx_rings,
+ conf->nr_tx_rings, conf->eth_conf);
+
+ if (ret < 0) {
+ RTE_LOG(ERR, USER1, "Couldn't configure port %u\n", portid);
+ return ret;
+ }
+
+ ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &rx_slots, &tx_slots);
+
+ if (ret < 0) {
+ RTE_LOG(ERR, USER1,
+ "Couldn't ot adjust number of descriptors for port %u\n",
+ portid);
+ return ret;
+ }
+
+ rxq_conf = dev_info.default_rxconf;
+ rxq_conf.offloads = conf->eth_conf->rxmode.offloads;
+ txq_conf = dev_info.default_txconf;
+ txq_conf.offloads = conf->eth_conf->txmode.offloads;
+ for (i = 0; i < conf->nr_tx_rings; i++) {
+ ret = rte_eth_tx_queue_setup(portid, i, tx_slots,
+ conf->socket_id, &txq_conf);
+
+ if (ret < 0) {
+ RTE_LOG(ERR, USER1,
+ "fail to configure TX queue %u of port %u\n",
+ i, portid);
+ return ret;
+ }
+
+ ret = rte_eth_rx_queue_setup(portid, i, rx_slots,
+ conf->socket_id, &rxq_conf, conf->pool);
+
+ if (ret < 0) {
+ RTE_LOG(ERR, USER1,
+ "fail to configure RX queue %u of port %u\n",
+ i, portid);
+ return ret;
+ }
+ }
+
+ /* copy config to the private storage. */
+ ports[portid].eth_conf = conf->eth_conf[0];
+ ports[portid].pool = conf->pool;
+ ports[portid].socket_id = conf->socket_id;
+ ports[portid].nr_tx_rings = conf->nr_tx_rings;
+ ports[portid].nr_rx_rings = conf->nr_rx_rings;
+ ports[portid].nr_tx_slots = tx_slots;
+ ports[portid].nr_rx_slots = rx_slots;
+ ports[portid].tx_burst = conf->tx_burst;
+ ports[portid].rx_burst = conf->rx_burst;
+
+ return 0;
+}
+
+int
+rte_netmap_close(__rte_unused int fd)
+{
+ int32_t rc;
+
+ rte_spinlock_lock(&netmap_lock);
+ rc = fd_release(fd);
+ rte_spinlock_unlock(&netmap_lock);
+
+ if (rc < 0) {
+ errno =-rc;
+ rc = -1;
+ }
+ return rc;
+}
+
+int rte_netmap_ioctl(int fd, uint32_t op, void *param)
+{
+ int ret;
+
+ if (!FD_VALID(fd)) {
+ errno = EBADF;
+ return -1;
+ }
+
+ switch (op) {
+
+ case NIOCGINFO:
+ ret = ioctl_niocginfo(fd, param);
+ break;
+
+ case NIOCREGIF:
+ ret = ioctl_niocregif(fd, param);
+ break;
+
+ case NIOCUNREGIF:
+ ret = ioctl_niocunregif(fd);
+ break;
+
+ case NIOCRXSYNC:
+ ret = ioctl_niocrxsync(fd);
+ break;
+
+ case NIOCTXSYNC:
+ ret = ioctl_nioctxsync(fd);
+ break;
+
+ default:
+ ret = -ENOTTY;
+ }
+
+ if (ret < 0) {
+ errno = -ret;
+ ret = -1;
+ } else {
+ ret = 0;
+ }
+
+ return ret;
+}
+
+void *
+rte_netmap_mmap(void *addr, size_t length,
+ int prot, int flags, int fd, off_t offset)
+{
+ static const int cprot = PROT_WRITE | PROT_READ;
+
+ if (!FD_VALID(fd) || length + offset > netmap.mem_sz ||
+ (prot & cprot) != cprot ||
+ ((flags & MAP_FIXED) != 0 && addr != NULL)) {
+
+ errno = EINVAL;
+ return MAP_FAILED;
+ }
+
+ return (void *)((uintptr_t)netmap.mem + (uintptr_t)offset);
+}
+
+/**
+ * Return a "fake" file descriptor with a value above RLIMIT_NOFILE so that
+ * any attempt to use that file descriptor with the usual API will fail.
+ */
+int
+rte_netmap_open(__rte_unused const char *pathname, __rte_unused int flags)
+{
+ int fd;
+
+ rte_spinlock_lock(&netmap_lock);
+ fd = fd_reserve();
+ rte_spinlock_unlock(&netmap_lock);
+
+ if (fd < 0) {
+ errno = -fd;
+ fd = -1;
+ }
+ return fd;
+}
+
+/**
+ * Doesn't support timeout other than 0 or infinite (negative) timeout
+ */
+int
+rte_netmap_poll(struct pollfd *fds, nfds_t nfds, int timeout)
+{
+ int32_t count_it, ret;
+ uint32_t i, idx, port;
+ uint32_t want_rx, want_tx;
+
+ if (timeout > 0)
+ return -1;
+
+ ret = 0;
+ do {
+ for (i = 0; i < nfds; i++) {
+
+ count_it = 0;
+
+ if (!FD_VALID(fds[i].fd) || fds[i].events == 0) {
+ fds[i].revents = 0;
+ continue;
+ }
+
+ idx = FD_TO_IDX(fds[i].fd);
+ if ((port = fd_port[idx].port) >= RTE_DIM(ports) ||
+ ports[port].fd != idx) {
+
+ fds[i].revents |= POLLERR;
+ ret++;
+ continue;
+ }
+
+ want_rx = fds[i].events & (POLLIN | POLLRDNORM);
+ want_tx = fds[i].events & (POLLOUT | POLLWRNORM);
+
+ if (want_rx && rx_sync_if(port) > 0) {
+ fds[i].revents = (uint16_t)
+ (fds[i].revents | want_rx);
+ count_it = 1;
+ }
+ if (want_tx && tx_sync_if(port) > 0) {
+ fds[i].revents = (uint16_t)
+ (fds[i].revents | want_tx);
+ count_it = 1;
+ }
+
+ ret += count_it;
+ }
+ }
+ while ((ret == 0 && timeout < 0) || timeout);
+
+ return ret;
+}
diff --git a/src/seastar/dpdk/examples/netmap_compat/lib/compat_netmap.h b/src/seastar/dpdk/examples/netmap_compat/lib/compat_netmap.h
new file mode 100644
index 000000000..12b618b68
--- /dev/null
+++ b/src/seastar/dpdk/examples/netmap_compat/lib/compat_netmap.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_COMPAT_NETMAP_H_
+
+#include <poll.h>
+#include <linux/ioctl.h>
+#include <net/if.h>
+
+#include <rte_ethdev.h>
+#include <rte_mempool.h>
+
+#include "netmap.h"
+#include "netmap_user.h"
+
+/**
+ * One can overwrite Netmap macros here as needed
+ */
+
+struct rte_netmap_conf {
+ int32_t socket_id;
+ uint32_t max_rings; /* number of rings(queues) per netmap_if(port) */
+ uint32_t max_slots; /* number of slots(descriptors) per netmap ring. */
+ uint16_t max_bufsz; /* size of each netmap buffer. */
+};
+
+struct rte_netmap_port_conf {
+ struct rte_eth_conf *eth_conf;
+ struct rte_mempool *pool;
+ int32_t socket_id;
+ uint16_t nr_tx_rings;
+ uint16_t nr_rx_rings;
+ uint32_t nr_tx_slots;
+ uint32_t nr_rx_slots;
+ uint16_t tx_burst;
+ uint16_t rx_burst;
+};
+
+int rte_netmap_init(const struct rte_netmap_conf *conf);
+int rte_netmap_init_port(uint16_t portid,
+ const struct rte_netmap_port_conf *conf);
+
+int rte_netmap_close(int fd);
+int rte_netmap_ioctl(int fd, uint32_t op, void *param);
+int rte_netmap_open(const char *pathname, int flags);
+int rte_netmap_poll(struct pollfd *fds, nfds_t nfds, int timeout);
+void *rte_netmap_mmap(void *addr, size_t length, int prot, int flags, int fd,
+ off_t offset);
+
+#endif /* _RTE_COMPAT_NETMAP_H_ */
diff --git a/src/seastar/dpdk/examples/netmap_compat/meson.build b/src/seastar/dpdk/examples/netmap_compat/meson.build
new file mode 100644
index 000000000..c370d7476
--- /dev/null
+++ b/src/seastar/dpdk/examples/netmap_compat/meson.build
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+# meson file, for building this example as part of a main DPDK build.
+#
+# To build this example as a standalone application with an already-installed
+# DPDK instance, use 'make'
+
+# Example app currently unsupported by meson build
+build = false
diff --git a/src/seastar/dpdk/examples/netmap_compat/netmap/netmap.h b/src/seastar/dpdk/examples/netmap_compat/netmap/netmap.h
new file mode 100644
index 000000000..677c8a9fb
--- /dev/null
+++ b/src/seastar/dpdk/examples/netmap_compat/netmap/netmap.h
@@ -0,0 +1,289 @@
+/*
+ * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. Neither the name of the authors nor the names of their contributors
+ * may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY MATTEO LANDI AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MATTEO LANDI OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD: head/sys/net/netmap.h 231198 2012-02-08 11:43:29Z luigi $
+ * $Id: netmap.h 10879 2012-04-12 22:48:59Z luigi $
+ *
+ * Definitions of constants and the structures used by the netmap
+ * framework, for the part visible to both kernel and userspace.
+ * Detailed info on netmap is available with "man netmap" or at
+ *
+ * http://info.iet.unipi.it/~luigi/netmap/
+ */
+
+#ifndef _NET_NETMAP_H_
+#define _NET_NETMAP_H_
+
+/*
+ * --- Netmap data structures ---
+ *
+ * The data structures used by netmap are shown below. Those in
+ * capital letters are in an mmapp()ed area shared with userspace,
+ * while others are private to the kernel.
+ * Shared structures do not contain pointers but only memory
+ * offsets, so that addressing is portable between kernel and userspace.
+
+
+ softc
++----------------+
+| standard fields|
+| if_pspare[0] ----------+
++----------------+ |
+ |
++----------------+<------+
+|(netmap_adapter)|
+| | netmap_kring
+| tx_rings *--------------------------------->+---------------+
+| | netmap_kring | ring *---------.
+| rx_rings *--------->+---------------+ | nr_hwcur | |
++----------------+ | ring *--------. | nr_hwavail | V
+ | nr_hwcur | | | selinfo | |
+ | nr_hwavail | | +---------------+ .
+ | selinfo | | | ... | .
+ +---------------+ | |(ntx+1 entries)|
+ | .... | | | |
+ |(nrx+1 entries)| | +---------------+
+ | | |
+ KERNEL +---------------+ |
+ |
+ ====================================================================
+ |
+ USERSPACE | NETMAP_RING
+ +---->+-------------+
+ / | cur |
+ NETMAP_IF (nifp, one per file desc.) / | avail |
+ +---------------+ / | buf_ofs |
+ | ni_tx_rings | / +=============+
+ | ni_rx_rings | / | buf_idx | slot[0]
+ | | / | len, flags |
+ | | / +-------------+
+ +===============+ / | buf_idx | slot[1]
+ | txring_ofs[0] | (rel.to nifp)--' | len, flags |
+ | txring_ofs[1] | +-------------+
+ (num_rings+1 entries) (nr_num_slots entries)
+ | txring_ofs[n] | | buf_idx | slot[n-1]
+ +---------------+ | len, flags |
+ | rxring_ofs[0] | +-------------+
+ | rxring_ofs[1] |
+ (num_rings+1 entries)
+ | txring_ofs[n] |
+ +---------------+
+
+ * The private descriptor ('softc' or 'adapter') of each interface
+ * is extended with a "struct netmap_adapter" containing netmap-related
+ * info (see description in dev/netmap/netmap_kernel.h.
+ * Among other things, tx_rings and rx_rings point to the arrays of
+ * "struct netmap_kring" which in turn reache the various
+ * "struct netmap_ring", shared with userspace.
+
+ * The NETMAP_RING is the userspace-visible replica of the NIC ring.
+ * Each slot has the index of a buffer, its length and some flags.
+ * In user space, the buffer address is computed as
+ * (char *)ring + buf_ofs + index*NETMAP_BUF_SIZE
+ * In the kernel, buffers do not necessarily need to be contiguous,
+ * and the virtual and physical addresses are derived through
+ * a lookup table.
+ * To associate a different buffer to a slot, applications must
+ * write the new index in buf_idx, and set NS_BUF_CHANGED flag to
+ * make sure that the kernel updates the hardware ring as needed.
+ *
+ * Normally the driver is not requested to report the result of
+ * transmissions (this can dramatically speed up operation).
+ * However the user may request to report completion by setting
+ * NS_REPORT.
+ */
+struct netmap_slot {
+ uint32_t buf_idx; /* buffer index */
+ uint16_t len; /* packet length, to be copied to/from the hw ring */
+ uint16_t flags; /* buf changed, etc. */
+#define NS_BUF_CHANGED 0x0001 /* must resync the map, buffer changed */
+#define NS_REPORT 0x0002 /* ask the hardware to report results
+ * e.g. by generating an interrupt
+ */
+};
+
+/*
+ * Netmap representation of a TX or RX ring (also known as "queue").
+ * This is a queue implemented as a fixed-size circular array.
+ * At the software level, two fields are important: avail and cur.
+ *
+ * In TX rings:
+ * avail indicates the number of slots available for transmission.
+ * It is updated by the kernel after every netmap system call.
+ * It MUST BE decremented by the application when it appends a
+ * packet.
+ * cur indicates the slot to use for the next packet
+ * to send (i.e. the "tail" of the queue).
+ * It MUST BE incremented by the application before
+ * netmap system calls to reflect the number of newly
+ * sent packets.
+ * It is checked by the kernel on netmap system calls
+ * (normally unmodified by the kernel unless invalid).
+ *
+ * The kernel side of netmap uses two additional fields in its own
+ * private ring structure, netmap_kring:
+ * nr_hwcur is a copy of nr_cur on an NIOCTXSYNC.
+ * nr_hwavail is the number of slots known as available by the
+ * hardware. It is updated on an INTR (inc by the
+ * number of packets sent) and on a NIOCTXSYNC
+ * (decrease by nr_cur - nr_hwcur)
+ * A special case, nr_hwavail is -1 if the transmit
+ * side is idle (no pending transmits).
+ *
+ * In RX rings:
+ * avail is the number of packets available (possibly 0).
+ * It MUST BE decremented by the application when it consumes
+ * a packet, and it is updated to nr_hwavail on a NIOCRXSYNC
+ * cur indicates the first slot that contains a packet not
+ * processed yet (the "head" of the queue).
+ * It MUST BE incremented by the software when it consumes
+ * a packet.
+ * reserved indicates the number of buffers before 'cur'
+ * that the application has still in use. Normally 0,
+ * it MUST BE incremented by the application when it
+ * does not return the buffer immediately, and decremented
+ * when the buffer is finally freed.
+ *
+ * The kernel side of netmap uses two additional fields in the kring:
+ * nr_hwcur is a copy of nr_cur on an NIOCRXSYNC
+ * nr_hwavail is the number of packets available. It is updated
+ * on INTR (inc by the number of new packets arrived)
+ * and on NIOCRXSYNC (decreased by nr_cur - nr_hwcur).
+ *
+ * DATA OWNERSHIP/LOCKING:
+ * The netmap_ring is owned by the user program and it is only
+ * accessed or modified in the upper half of the kernel during
+ * a system call.
+ *
+ * The netmap_kring is only modified by the upper half of the kernel.
+ */
+struct netmap_ring {
+ /*
+ * nr_buf_base_ofs is meant to be used through macros.
+ * It contains the offset of the buffer region from this
+ * descriptor.
+ */
+ ssize_t buf_ofs;
+ uint32_t num_slots; /* number of slots in the ring. */
+ uint32_t avail; /* number of usable slots */
+ uint32_t cur; /* 'current' r/w position */
+ uint32_t reserved; /* not refilled before current */
+
+ uint16_t nr_buf_size;
+ uint16_t flags;
+#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */
+
+ struct timeval ts; /* time of last *sync() */
+
+ /* the slots follow. This struct has variable size */
+ struct netmap_slot slot[0]; /* array of slots. */
+};
+
+
+/*
+ * Netmap representation of an interface and its queue(s).
+ * There is one netmap_if for each file descriptor on which we want
+ * to select/poll. We assume that on each interface has the same number
+ * of receive and transmit queues.
+ * select/poll operates on one or all pairs depending on the value of
+ * nmr_queueid passed on the ioctl.
+ */
+struct netmap_if {
+ char ni_name[IFNAMSIZ]; /* name of the interface. */
+ u_int ni_version; /* API version, currently unused */
+ u_int ni_rx_rings; /* number of rx rings */
+ u_int ni_tx_rings; /* if zero, same as ni_rx_rings */
+ /*
+ * The following array contains the offset of each netmap ring
+ * from this structure. The first ni_tx_queues+1 entries refer
+ * to the tx rings, the next ni_rx_queues+1 refer to the rx rings
+ * (the last entry in each block refers to the host stack rings).
+ * The area is filled up by the kernel on NIOCREG,
+ * and then only read by userspace code.
+ */
+ ssize_t ring_ofs[0];
+};
+
+#ifndef NIOCREGIF
+/*
+ * ioctl names and related fields
+ *
+ * NIOCGINFO takes a struct ifreq, the interface name is the input,
+ * the outputs are number of queues and number of descriptor
+ * for each queue (useful to set number of threads etc.).
+ *
+ * NIOCREGIF takes an interface name within a struct ifreq,
+ * and activates netmap mode on the interface (if possible).
+ *
+ * NIOCUNREGIF unregisters the interface associated to the fd.
+ *
+ * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues,
+ * whose identity is set in NIOCREGIF through nr_ringid
+ */
+
+/*
+ * struct nmreq overlays a struct ifreq
+ */
+struct nmreq {
+ char nr_name[IFNAMSIZ];
+ uint32_t nr_version; /* API version */
+#define NETMAP_API 3 /* current version */
+ uint32_t nr_offset; /* nifp offset in the shared region */
+ uint32_t nr_memsize; /* size of the shared region */
+ uint32_t nr_tx_slots; /* slots in tx rings */
+ uint32_t nr_rx_slots; /* slots in rx rings */
+ uint16_t nr_tx_rings; /* number of tx rings */
+ uint16_t nr_rx_rings; /* number of rx rings */
+ uint16_t nr_ringid; /* ring(s) we care about */
+#define NETMAP_HW_RING 0x4000 /* low bits indicate one hw ring */
+#define NETMAP_SW_RING 0x2000 /* process the sw ring */
+#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */
+#define NETMAP_RING_MASK 0xfff /* the ring number */
+ uint16_t spare1;
+ uint32_t spare2[4];
+};
+
+/*
+ * FreeBSD uses the size value embedded in the _IOWR to determine
+ * how much to copy in/out. So we need it to match the actual
+ * data structure we pass. We put some spares in the structure
+ * to ease compatibility with other versions
+ */
+#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */
+#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */
+#define NIOCUNREGIF _IO('i', 147) /* interface unregister */
+#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */
+#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */
+#endif /* !NIOCREGIF */
+
+#endif /* _NET_NETMAP_H_ */
diff --git a/src/seastar/dpdk/examples/netmap_compat/netmap/netmap_user.h b/src/seastar/dpdk/examples/netmap_compat/netmap/netmap_user.h
new file mode 100644
index 000000000..f369592e3
--- /dev/null
+++ b/src/seastar/dpdk/examples/netmap_compat/netmap/netmap_user.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. Neither the name of the authors nor the names of their contributors
+ * may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY MATTEO LANDI AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MATTEO LANDI OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD: head/sys/net/netmap_user.h 231198 2012-02-08 11:43:29Z luigi $
+ * $Id: netmap_user.h 10879 2012-04-12 22:48:59Z luigi $
+ *
+ * This header contains the macros used to manipulate netmap structures
+ * and packets in userspace. See netmap(4) for more information.
+ *
+ * The address of the struct netmap_if, say nifp, is computed from the
+ * value returned from ioctl(.., NIOCREG, ...) and the mmap region:
+ * ioctl(fd, NIOCREG, &req);
+ * mem = mmap(0, ... );
+ * nifp = NETMAP_IF(mem, req.nr_nifp);
+ * (so simple, we could just do it manually)
+ *
+ * From there:
+ * struct netmap_ring *NETMAP_TXRING(nifp, index)
+ * struct netmap_ring *NETMAP_RXRING(nifp, index)
+ * we can access ring->nr_cur, ring->nr_avail, ring->nr_flags
+ *
+ * ring->slot[i] gives us the i-th slot (we can access
+ * directly plen, flags, bufindex)
+ *
+ * char *buf = NETMAP_BUF(ring, index) returns a pointer to
+ * the i-th buffer
+ *
+ * Since rings are circular, we have macros to compute the next index
+ * i = NETMAP_RING_NEXT(ring, i);
+ */
+
+#ifndef _NET_NETMAP_USER_H_
+#define _NET_NETMAP_USER_H_
+
+#define NETMAP_IF(b, o) (struct netmap_if *)((char *)(b) + (o))
+
+#define NETMAP_TXRING(nifp, index) \
+ ((struct netmap_ring *)((char *)(nifp) + \
+ (nifp)->ring_ofs[index] ) )
+
+#define NETMAP_RXRING(nifp, index) \
+ ((struct netmap_ring *)((char *)(nifp) + \
+ (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) )
+
+#define NETMAP_BUF(ring, index) \
+ ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size))
+
+#define NETMAP_BUF_IDX(ring, buf) \
+ ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
+ (ring)->nr_buf_size )
+
+#define NETMAP_RING_NEXT(r, i) \
+ ((i)+1 == (r)->num_slots ? 0 : (i) + 1 )
+
+#define NETMAP_RING_FIRST_RESERVED(r) \
+ ( (r)->cur < (r)->reserved ? \
+ (r)->cur + (r)->num_slots - (r)->reserved : \
+ (r)->cur - (r)->reserved )
+
+/*
+ * Return 1 if the given tx ring is empty.
+ */
+#define NETMAP_TX_RING_EMPTY(r) ((r)->avail >= (r)->num_slots - 1)
+
+#endif /* _NET_NETMAP_USER_H_ */