summaryrefslogtreecommitdiffstats
path: root/src/seastar/dpdk/drivers/net/failsafe
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/seastar/dpdk/drivers/net/failsafe
parentInitial commit. (diff)
downloadceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz
ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/seastar/dpdk/drivers/net/failsafe')
-rw-r--r--src/seastar/dpdk/drivers/net/failsafe/Makefile46
-rw-r--r--src/seastar/dpdk/drivers/net/failsafe/failsafe.c414
-rw-r--r--src/seastar/dpdk/drivers/net/failsafe/failsafe_args.c521
-rw-r--r--src/seastar/dpdk/drivers/net/failsafe/failsafe_eal.c168
-rw-r--r--src/seastar/dpdk/drivers/net/failsafe/failsafe_ether.c624
-rw-r--r--src/seastar/dpdk/drivers/net/failsafe/failsafe_flow.c255
-rw-r--r--src/seastar/dpdk/drivers/net/failsafe/failsafe_intr.c536
-rw-r--r--src/seastar/dpdk/drivers/net/failsafe/failsafe_ops.c1260
-rw-r--r--src/seastar/dpdk/drivers/net/failsafe/failsafe_private.h497
-rw-r--r--src/seastar/dpdk/drivers/net/failsafe/failsafe_rxtx.c178
-rw-r--r--src/seastar/dpdk/drivers/net/failsafe/meson.build23
-rw-r--r--src/seastar/dpdk/drivers/net/failsafe/rte_pmd_failsafe_version.map4
12 files changed, 4526 insertions, 0 deletions
diff --git a/src/seastar/dpdk/drivers/net/failsafe/Makefile b/src/seastar/dpdk/drivers/net/failsafe/Makefile
new file mode 100644
index 000000000..0d840a272
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/failsafe/Makefile
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2017 6WIND S.A.
+# Copyright 2017 Mellanox Technologies, Ltd
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# Library name
+LIB = librte_pmd_failsafe.a
+
+EXPORT_MAP := rte_pmd_failsafe_version.map
+
+LIBABIVER := 1
+
+# Sources are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_args.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ether.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_flow.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_intr.c
+ifeq ($(CONFIG_RTE_EXEC_ENV_LINUX),y)
+CFLAGS += -DLINUX
+else
+CFLAGS += -DBSD
+endif
+
+# No exported include files
+
+# Basic CFLAGS:
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+CFLAGS += -std=gnu99 -Wextra
+CFLAGS += -O3
+CFLAGS += -I.
+CFLAGS += -D_DEFAULT_SOURCE
+CFLAGS += -D_XOPEN_SOURCE=700
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -Wno-strict-prototypes
+CFLAGS += -pedantic
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_vdev
+LDLIBS += -lpthread
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/src/seastar/dpdk/drivers/net/failsafe/failsafe.c b/src/seastar/dpdk/drivers/net/failsafe/failsafe.c
new file mode 100644
index 000000000..42dfaca30
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/failsafe/failsafe.c
@@ -0,0 +1,414 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox Technologies, Ltd
+ */
+
+#include <stdbool.h>
+
+#include <rte_alarm.h>
+#include <rte_malloc.h>
+#include <rte_ethdev_driver.h>
+#include <rte_ethdev_vdev.h>
+#include <rte_devargs.h>
+#include <rte_kvargs.h>
+#include <rte_bus_vdev.h>
+
+#include "failsafe_private.h"
+
+int failsafe_logtype;
+
+const char pmd_failsafe_driver_name[] = FAILSAFE_DRIVER_NAME;
+static const struct rte_eth_link eth_link = {
+ .link_speed = ETH_SPEED_NUM_10G,
+ .link_duplex = ETH_LINK_FULL_DUPLEX,
+ .link_status = ETH_LINK_UP,
+ .link_autoneg = ETH_LINK_AUTONEG,
+};
+
+static int
+fs_sub_device_alloc(struct rte_eth_dev *dev,
+ const char *params)
+{
+ uint8_t nb_subs;
+ int ret;
+ int i;
+ struct sub_device *sdev;
+ uint8_t sdev_iterator;
+
+ ret = failsafe_args_count_subdevice(dev, params);
+ if (ret)
+ return ret;
+ if (PRIV(dev)->subs_tail > FAILSAFE_MAX_ETHPORTS) {
+ ERROR("Cannot allocate more than %d ports",
+ FAILSAFE_MAX_ETHPORTS);
+ return -ENOSPC;
+ }
+ nb_subs = PRIV(dev)->subs_tail;
+ PRIV(dev)->subs = rte_zmalloc(NULL,
+ sizeof(struct sub_device) * nb_subs,
+ RTE_CACHE_LINE_SIZE);
+ if (PRIV(dev)->subs == NULL) {
+ ERROR("Could not allocate sub_devices");
+ return -ENOMEM;
+ }
+ /* Initiate static sub devices linked list. */
+ for (i = 1; i < nb_subs; i++)
+ PRIV(dev)->subs[i - 1].next = PRIV(dev)->subs + i;
+ PRIV(dev)->subs[i - 1].next = PRIV(dev)->subs;
+
+ FOREACH_SUBDEV(sdev, sdev_iterator, dev) {
+ sdev->sdev_port_id = RTE_MAX_ETHPORTS;
+ }
+ return 0;
+}
+
+static void
+fs_sub_device_free(struct rte_eth_dev *dev)
+{
+ rte_free(PRIV(dev)->subs);
+}
+
+static void fs_hotplug_alarm(void *arg);
+
+int
+failsafe_hotplug_alarm_install(struct rte_eth_dev *dev)
+{
+ int ret;
+
+ if (dev == NULL)
+ return -EINVAL;
+ if (PRIV(dev)->pending_alarm)
+ return 0;
+ ret = rte_eal_alarm_set(failsafe_hotplug_poll * 1000,
+ fs_hotplug_alarm,
+ dev);
+ if (ret) {
+ ERROR("Could not set up plug-in event detection");
+ return ret;
+ }
+ PRIV(dev)->pending_alarm = 1;
+ return 0;
+}
+
+int
+failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev)
+{
+ int ret = 0;
+
+ rte_errno = 0;
+ rte_eal_alarm_cancel(fs_hotplug_alarm, dev);
+ if (rte_errno) {
+ ERROR("rte_eal_alarm_cancel failed (errno: %s)",
+ strerror(rte_errno));
+ ret = -rte_errno;
+ } else {
+ PRIV(dev)->pending_alarm = 0;
+ }
+ return ret;
+}
+
+static void
+fs_hotplug_alarm(void *arg)
+{
+ struct rte_eth_dev *dev = arg;
+ struct sub_device *sdev;
+ int ret;
+ uint8_t i;
+
+ if (!PRIV(dev)->pending_alarm)
+ return;
+ PRIV(dev)->pending_alarm = 0;
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state != PRIV(dev)->state)
+ break;
+ /* if we have non-probed device */
+ if (i != PRIV(dev)->subs_tail) {
+ if (fs_lock(dev, 1) != 0)
+ goto reinstall;
+ ret = failsafe_eth_dev_state_sync(dev);
+ fs_unlock(dev, 1);
+ if (ret)
+ ERROR("Unable to synchronize sub_device state");
+ }
+ failsafe_dev_remove(dev);
+reinstall:
+ ret = failsafe_hotplug_alarm_install(dev);
+ if (ret)
+ ERROR("Unable to set up next alarm");
+}
+
+static int
+fs_mutex_init(struct fs_priv *priv)
+{
+ int ret;
+ pthread_mutexattr_t attr;
+
+ ret = pthread_mutexattr_init(&attr);
+ if (ret) {
+ ERROR("Cannot initiate mutex attributes - %s", strerror(ret));
+ return ret;
+ }
+ /* Allow mutex relocks for the thread holding the mutex. */
+ ret = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
+ if (ret) {
+ ERROR("Cannot set mutex type - %s", strerror(ret));
+ return ret;
+ }
+ ret = pthread_mutex_init(&priv->hotplug_mutex, &attr);
+ if (ret) {
+ ERROR("Cannot initiate mutex - %s", strerror(ret));
+ return ret;
+ }
+ return 0;
+}
+
+static int
+fs_eth_dev_create(struct rte_vdev_device *vdev)
+{
+ struct rte_eth_dev *dev;
+ struct ether_addr *mac;
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ const char *params;
+ unsigned int socket_id;
+ uint8_t i;
+ int ret;
+
+ dev = NULL;
+ priv = NULL;
+ socket_id = rte_socket_id();
+ INFO("Creating fail-safe device on NUMA socket %u", socket_id);
+ params = rte_vdev_device_args(vdev);
+ if (params == NULL) {
+ ERROR("This PMD requires sub-devices, none provided");
+ return -1;
+ }
+ dev = rte_eth_vdev_allocate(vdev, sizeof(*priv));
+ if (dev == NULL) {
+ ERROR("Unable to allocate rte_eth_dev");
+ return -1;
+ }
+ priv = PRIV(dev);
+ priv->data = dev->data;
+ dev->dev_ops = &failsafe_ops;
+ dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0];
+ dev->data->dev_link = eth_link;
+ PRIV(dev)->nb_mac_addr = 1;
+ TAILQ_INIT(&PRIV(dev)->flow_list);
+ dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst;
+ dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst;
+ ret = fs_sub_device_alloc(dev, params);
+ if (ret) {
+ ERROR("Could not allocate sub_devices");
+ goto free_dev;
+ }
+ ret = failsafe_args_parse(dev, params);
+ if (ret)
+ goto free_subs;
+ ret = rte_eth_dev_owner_new(&priv->my_owner.id);
+ if (ret) {
+ ERROR("Failed to get unique owner identifier");
+ goto free_args;
+ }
+ snprintf(priv->my_owner.name, sizeof(priv->my_owner.name),
+ FAILSAFE_OWNER_NAME);
+ DEBUG("Failsafe port %u owner info: %s_%016"PRIX64, dev->data->port_id,
+ priv->my_owner.name, priv->my_owner.id);
+ ret = rte_eth_dev_callback_register(RTE_ETH_ALL, RTE_ETH_EVENT_NEW,
+ failsafe_eth_new_event_callback,
+ dev);
+ if (ret) {
+ ERROR("Failed to register NEW callback");
+ goto free_args;
+ }
+ ret = failsafe_eal_init(dev);
+ if (ret)
+ goto unregister_new_callback;
+ ret = fs_mutex_init(priv);
+ if (ret)
+ goto unregister_new_callback;
+ ret = failsafe_hotplug_alarm_install(dev);
+ if (ret) {
+ ERROR("Could not set up plug-in event detection");
+ goto unregister_new_callback;
+ }
+ mac = &dev->data->mac_addrs[0];
+ if (failsafe_mac_from_arg) {
+ /*
+ * If MAC address was provided as a parameter,
+ * apply to all probed slaves.
+ */
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
+ ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
+ mac);
+ if (ret) {
+ ERROR("Failed to set default MAC address");
+ goto cancel_alarm;
+ }
+ }
+ } else {
+ /*
+ * Use the ether_addr from first probed
+ * device, either preferred or fallback.
+ */
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state >= DEV_PROBED) {
+ ether_addr_copy(&ETH(sdev)->data->mac_addrs[0],
+ mac);
+ break;
+ }
+ /*
+ * If no device has been probed and no ether_addr
+ * has been provided on the command line, use a random
+ * valid one.
+ * It will be applied during future slave state syncs to
+ * probed slaves.
+ */
+ if (i == priv->subs_tail)
+ eth_random_addr(&mac->addr_bytes[0]);
+ }
+ INFO("MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
+ mac->addr_bytes[0], mac->addr_bytes[1],
+ mac->addr_bytes[2], mac->addr_bytes[3],
+ mac->addr_bytes[4], mac->addr_bytes[5]);
+ dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
+ PRIV(dev)->intr_handle = (struct rte_intr_handle){
+ .fd = -1,
+ .type = RTE_INTR_HANDLE_EXT,
+ };
+ rte_eth_dev_probing_finish(dev);
+ return 0;
+cancel_alarm:
+ failsafe_hotplug_alarm_cancel(dev);
+unregister_new_callback:
+ rte_eth_dev_callback_unregister(RTE_ETH_ALL, RTE_ETH_EVENT_NEW,
+ failsafe_eth_new_event_callback, dev);
+free_args:
+ failsafe_args_free(dev);
+free_subs:
+ fs_sub_device_free(dev);
+free_dev:
+ /* mac_addrs must not be freed alone because part of dev_private */
+ dev->data->mac_addrs = NULL;
+ rte_eth_dev_release_port(dev);
+ return -1;
+}
+
+static int
+fs_rte_eth_free(const char *name)
+{
+ struct rte_eth_dev *dev;
+ int ret;
+
+ dev = rte_eth_dev_allocated(name);
+ if (dev == NULL)
+ return -ENODEV;
+ rte_eth_dev_callback_unregister(RTE_ETH_ALL, RTE_ETH_EVENT_NEW,
+ failsafe_eth_new_event_callback, dev);
+ ret = failsafe_eal_uninit(dev);
+ if (ret)
+ ERROR("Error while uninitializing sub-EAL");
+ failsafe_args_free(dev);
+ fs_sub_device_free(dev);
+ ret = pthread_mutex_destroy(&PRIV(dev)->hotplug_mutex);
+ if (ret)
+ ERROR("Error while destroying hotplug mutex");
+ rte_free(PRIV(dev)->mcast_addrs);
+ /* mac_addrs must not be freed alone because part of dev_private */
+ dev->data->mac_addrs = NULL;
+ rte_eth_dev_release_port(dev);
+ return ret;
+}
+
+static bool
+devargs_already_listed(struct rte_devargs *devargs)
+{
+ struct rte_devargs *list_da;
+
+ RTE_EAL_DEVARGS_FOREACH(devargs->bus->name, list_da) {
+ if (strcmp(list_da->name, devargs->name) == 0)
+ /* devargs already in the list */
+ return true;
+ }
+ return false;
+}
+
+static int
+rte_pmd_failsafe_probe(struct rte_vdev_device *vdev)
+{
+ const char *name;
+ struct rte_eth_dev *eth_dev;
+ struct sub_device *sdev;
+ struct rte_devargs devargs;
+ uint8_t i;
+ int ret;
+
+ name = rte_vdev_device_name(vdev);
+ INFO("Initializing " FAILSAFE_DRIVER_NAME " for %s",
+ name);
+
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
+ strlen(rte_vdev_device_args(vdev)) == 0) {
+ eth_dev = rte_eth_dev_attach_secondary(name);
+ if (!eth_dev) {
+ ERROR("Failed to probe %s", name);
+ return -1;
+ }
+ eth_dev->dev_ops = &failsafe_ops;
+ eth_dev->device = &vdev->device;
+ eth_dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst;
+ eth_dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst;
+ /*
+ * Failsafe will attempt to probe all of its sub-devices.
+ * Any failure in sub-devices is not a fatal error.
+ * A sub-device can be plugged later.
+ */
+ FOREACH_SUBDEV(sdev, i, eth_dev) {
+ /* rebuild devargs to be able to get the bus name. */
+ ret = rte_devargs_parse(&devargs,
+ sdev->devargs.name);
+ if (ret != 0) {
+ ERROR("Failed to parse devargs %s",
+ devargs.name);
+ continue;
+ }
+ if (!devargs_already_listed(&devargs)) {
+ ret = rte_dev_probe(devargs.name);
+ if (ret != 0) {
+ ERROR("Failed to probe devargs %s",
+ devargs.name);
+ continue;
+ }
+ }
+ }
+ rte_eth_dev_probing_finish(eth_dev);
+ return 0;
+ }
+
+ return fs_eth_dev_create(vdev);
+}
+
+static int
+rte_pmd_failsafe_remove(struct rte_vdev_device *vdev)
+{
+ const char *name;
+
+ name = rte_vdev_device_name(vdev);
+ INFO("Uninitializing " FAILSAFE_DRIVER_NAME " for %s", name);
+ return fs_rte_eth_free(name);
+}
+
+static struct rte_vdev_driver failsafe_drv = {
+ .probe = rte_pmd_failsafe_probe,
+ .remove = rte_pmd_failsafe_remove,
+};
+
+RTE_PMD_REGISTER_VDEV(net_failsafe, failsafe_drv);
+RTE_PMD_REGISTER_PARAM_STRING(net_failsafe, PMD_FAILSAFE_PARAM_STRING);
+
+RTE_INIT(failsafe_init_log)
+{
+ failsafe_logtype = rte_log_register("pmd.net.failsafe");
+ if (failsafe_logtype >= 0)
+ rte_log_set_level(failsafe_logtype, RTE_LOG_NOTICE);
+}
diff --git a/src/seastar/dpdk/drivers/net/failsafe/failsafe_args.c b/src/seastar/dpdk/drivers/net/failsafe/failsafe_args.c
new file mode 100644
index 000000000..3351c5bca
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/failsafe/failsafe_args.c
@@ -0,0 +1,521 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox Technologies, Ltd
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <rte_debug.h>
+#include <rte_devargs.h>
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+#include <rte_string_fns.h>
+
+#include "failsafe_private.h"
+
+/* Callback used when a new device is found in devargs */
+typedef int (parse_cb)(struct rte_eth_dev *dev, const char *params,
+ uint8_t head);
+
+uint64_t failsafe_hotplug_poll = FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS;
+int failsafe_mac_from_arg;
+
+static const char * const pmd_failsafe_init_parameters[] = {
+ PMD_FAILSAFE_HOTPLUG_POLL_KVARG,
+ PMD_FAILSAFE_MAC_KVARG,
+ NULL,
+};
+
+/*
+ * input: text.
+ * output: 0: if text[0] != '(',
+ * 0: if there are no corresponding ')'
+ * n: distance to corresponding ')' otherwise
+ */
+static size_t
+closing_paren(const char *text)
+{
+ int nb_open = 0;
+ size_t i = 0;
+
+ while (text[i] != '\0') {
+ if (text[i] == '(')
+ nb_open++;
+ if (text[i] == ')')
+ nb_open--;
+ if (nb_open == 0)
+ return i;
+ i++;
+ }
+ return 0;
+}
+
+static int
+fs_parse_device(struct sub_device *sdev, char *args)
+{
+ struct rte_devargs *d;
+ int ret;
+
+ d = &sdev->devargs;
+ DEBUG("%s", args);
+ ret = rte_devargs_parse(d, args);
+ if (ret) {
+ DEBUG("devargs parsing failed with code %d", ret);
+ return ret;
+ }
+ sdev->bus = d->bus;
+ sdev->state = DEV_PARSED;
+ return 0;
+}
+
+static void
+fs_sanitize_cmdline(char *args)
+{
+ char *nl;
+
+ nl = strrchr(args, '\n');
+ if (nl)
+ nl[0] = '\0';
+}
+
+static int
+fs_execute_cmd(struct sub_device *sdev, char *cmdline)
+{
+ FILE *fp;
+ /* store possible newline as well */
+ char output[DEVARGS_MAXLEN + 1];
+ size_t len;
+ int ret;
+
+ RTE_ASSERT(cmdline != NULL || sdev->cmdline != NULL);
+ if (sdev->cmdline == NULL) {
+ size_t i;
+
+ len = strlen(cmdline) + 1;
+ sdev->cmdline = calloc(1, len);
+ if (sdev->cmdline == NULL) {
+ ERROR("Command line allocation failed");
+ return -ENOMEM;
+ }
+ strlcpy(sdev->cmdline, cmdline, len);
+ /* Replace all commas in the command line by spaces */
+ for (i = 0; i < len; i++)
+ if (sdev->cmdline[i] == ',')
+ sdev->cmdline[i] = ' ';
+ }
+ DEBUG("'%s'", sdev->cmdline);
+ fp = popen(sdev->cmdline, "r");
+ if (fp == NULL) {
+ ret = -errno;
+ ERROR("popen: %s", strerror(errno));
+ return ret;
+ }
+ /* We only read one line */
+ if (fgets(output, sizeof(output) - 1, fp) == NULL) {
+ DEBUG("Could not read command output");
+ ret = -ENODEV;
+ goto ret_pclose;
+ }
+ fs_sanitize_cmdline(output);
+ if (output[0] == '\0') {
+ ret = -ENODEV;
+ goto ret_pclose;
+ }
+ ret = fs_parse_device(sdev, output);
+ if (ret)
+ ERROR("Parsing device '%s' failed", output);
+ret_pclose:
+ if (pclose(fp) == -1)
+ ERROR("pclose: %s", strerror(errno));
+ return ret;
+}
+
+static int
+fs_read_fd(struct sub_device *sdev, char *fd_str)
+{
+ FILE *fp = NULL;
+ int fd = -1;
+ /* store possible newline as well */
+ char output[DEVARGS_MAXLEN + 1];
+ int err = -ENODEV;
+ int oflags;
+ int lcount;
+
+ RTE_ASSERT(fd_str != NULL || sdev->fd_str != NULL);
+ if (sdev->fd_str == NULL) {
+ sdev->fd_str = strdup(fd_str);
+ if (sdev->fd_str == NULL) {
+ ERROR("Command line allocation failed");
+ return -ENOMEM;
+ }
+ }
+ errno = 0;
+ fd = strtol(fd_str, &fd_str, 0);
+ if (errno || *fd_str || fd < 0) {
+ ERROR("Parsing FD number failed");
+ goto error;
+ }
+ /* Fiddle with copy of file descriptor */
+ fd = dup(fd);
+ if (fd == -1)
+ goto error;
+ oflags = fcntl(fd, F_GETFL);
+ if (oflags == -1)
+ goto error;
+ if (fcntl(fd, F_SETFL, oflags | O_NONBLOCK) == -1)
+ goto error;
+ fp = fdopen(fd, "r");
+ if (fp == NULL)
+ goto error;
+ fd = -1;
+ /* Only take the last line into account */
+ lcount = 0;
+ while (fgets(output, sizeof(output), fp))
+ ++lcount;
+ if (lcount == 0)
+ goto error;
+ else if (ferror(fp) && errno != EAGAIN)
+ goto error;
+ /* Line must end with a newline character */
+ fs_sanitize_cmdline(output);
+ if (output[0] == '\0')
+ goto error;
+ err = fs_parse_device(sdev, output);
+ if (err)
+ ERROR("Parsing device '%s' failed", output);
+error:
+ if (fp)
+ fclose(fp);
+ if (fd != -1)
+ close(fd);
+ return err;
+}
+
+static int
+fs_parse_device_param(struct rte_eth_dev *dev, const char *param,
+ uint8_t head)
+{
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ char *args = NULL;
+ size_t a, b;
+ int ret;
+
+ priv = PRIV(dev);
+ a = 0;
+ b = 0;
+ ret = 0;
+ while (param[b] != '(' &&
+ param[b] != '\0')
+ b++;
+ a = b;
+ b += closing_paren(&param[b]);
+ if (a == b) {
+ ERROR("Dangling parenthesis");
+ return -EINVAL;
+ }
+ a += 1;
+ args = strndup(&param[a], b - a);
+ if (args == NULL) {
+ ERROR("Not enough memory for parameter parsing");
+ return -ENOMEM;
+ }
+ sdev = &priv->subs[head];
+ if (strncmp(param, "dev", 3) == 0) {
+ ret = fs_parse_device(sdev, args);
+ if (ret)
+ goto free_args;
+ } else if (strncmp(param, "exec", 4) == 0) {
+ ret = fs_execute_cmd(sdev, args);
+ if (ret == -ENODEV) {
+ DEBUG("Reading device info from command line failed");
+ ret = 0;
+ }
+ if (ret)
+ goto free_args;
+ } else if (strncmp(param, "fd(", 3) == 0) {
+ ret = fs_read_fd(sdev, args);
+ if (ret == -ENODEV) {
+ DEBUG("Reading device info from FD failed");
+ ret = 0;
+ }
+ if (ret)
+ goto free_args;
+ } else {
+ ERROR("Unrecognized device type: %.*s", (int)b, param);
+ return -EINVAL;
+ }
+free_args:
+ free(args);
+ return ret;
+}
+
+static int
+fs_parse_sub_devices(parse_cb *cb,
+ struct rte_eth_dev *dev, const char *params)
+{
+ size_t a, b;
+ uint8_t head;
+ int ret;
+
+ a = 0;
+ head = 0;
+ ret = 0;
+ while (params[a] != '\0') {
+ b = a;
+ while (params[b] != '(' &&
+ params[b] != ',' &&
+ params[b] != '\0')
+ b++;
+ if (b == a) {
+ ERROR("Invalid parameter");
+ return -EINVAL;
+ }
+ if (params[b] == ',') {
+ a = b + 1;
+ continue;
+ }
+ if (params[b] == '(') {
+ size_t start = b;
+
+ b += closing_paren(&params[b]);
+ if (b == start) {
+ ERROR("Dangling parenthesis");
+ return -EINVAL;
+ }
+ ret = (*cb)(dev, &params[a], head);
+ if (ret)
+ return ret;
+ head += 1;
+ b += 1;
+ if (params[b] == '\0')
+ return 0;
+ }
+ a = b + 1;
+ }
+ return 0;
+}
+
+static int
+fs_remove_sub_devices_definition(char params[DEVARGS_MAXLEN])
+{
+ char buffer[DEVARGS_MAXLEN] = {0};
+ size_t a, b;
+ int i;
+
+ a = 0;
+ i = 0;
+ while (params[a] != '\0') {
+ b = a;
+ while (params[b] != '(' &&
+ params[b] != ',' &&
+ params[b] != '\0')
+ b++;
+ if (b == a) {
+ ERROR("Invalid parameter");
+ return -EINVAL;
+ }
+ if (params[b] == ',' || params[b] == '\0') {
+ size_t len = b - a;
+
+ if (i > 0)
+ len += 1;
+ snprintf(&buffer[i], len + 1, "%s%s",
+ i ? "," : "", &params[a]);
+ i += len;
+ } else if (params[b] == '(') {
+ size_t start = b;
+
+ b += closing_paren(&params[b]);
+ if (b == start)
+ return -EINVAL;
+ b += 1;
+ if (params[b] == '\0')
+ goto out;
+ }
+ a = b + 1;
+ }
+out:
+ strlcpy(params, buffer, DEVARGS_MAXLEN);
+ return 0;
+}
+
+static int
+fs_get_u64_arg(const char *key __rte_unused,
+ const char *value, void *out)
+{
+ uint64_t *u64 = out;
+ char *endptr = NULL;
+
+ if ((value == NULL) || (out == NULL))
+ return -EINVAL;
+ errno = 0;
+ *u64 = strtoull(value, &endptr, 0);
+ if (errno != 0)
+ return -errno;
+ if (endptr == value)
+ return -1;
+ return 0;
+}
+
+static int
+fs_get_mac_addr_arg(const char *key __rte_unused,
+ const char *value, void *out)
+{
+ struct ether_addr *ea = out;
+ int ret;
+
+ if ((value == NULL) || (out == NULL))
+ return -EINVAL;
+ ret = sscanf(value, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+ &ea->addr_bytes[0], &ea->addr_bytes[1],
+ &ea->addr_bytes[2], &ea->addr_bytes[3],
+ &ea->addr_bytes[4], &ea->addr_bytes[5]);
+ return ret != ETHER_ADDR_LEN;
+}
+
+int
+failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
+{
+ struct fs_priv *priv;
+ char mut_params[DEVARGS_MAXLEN] = "";
+ struct rte_kvargs *kvlist = NULL;
+ unsigned int arg_count;
+ size_t n;
+ int ret;
+
+ priv = PRIV(dev);
+ ret = 0;
+ priv->subs_tx = FAILSAFE_MAX_ETHPORTS;
+ /* default parameters */
+ n = strlcpy(mut_params, params, sizeof(mut_params));
+ if (n >= sizeof(mut_params)) {
+ ERROR("Parameter string too long (>=%zu)",
+ sizeof(mut_params));
+ return -ENOMEM;
+ }
+ ret = fs_parse_sub_devices(fs_parse_device_param,
+ dev, params);
+ if (ret < 0)
+ return ret;
+ ret = fs_remove_sub_devices_definition(mut_params);
+ if (ret < 0)
+ return ret;
+ if (strnlen(mut_params, sizeof(mut_params)) > 0) {
+ kvlist = rte_kvargs_parse(mut_params,
+ pmd_failsafe_init_parameters);
+ if (kvlist == NULL) {
+ ERROR("Error parsing parameters, usage:\n"
+ PMD_FAILSAFE_PARAM_STRING);
+ return -1;
+ }
+ /* PLUG_IN event poll timer */
+ arg_count = rte_kvargs_count(kvlist,
+ PMD_FAILSAFE_HOTPLUG_POLL_KVARG);
+ if (arg_count == 1) {
+ ret = rte_kvargs_process(kvlist,
+ PMD_FAILSAFE_HOTPLUG_POLL_KVARG,
+ &fs_get_u64_arg, &failsafe_hotplug_poll);
+ if (ret < 0)
+ goto free_kvlist;
+ }
+ /* MAC addr */
+ arg_count = rte_kvargs_count(kvlist,
+ PMD_FAILSAFE_MAC_KVARG);
+ if (arg_count > 0) {
+ ret = rte_kvargs_process(kvlist,
+ PMD_FAILSAFE_MAC_KVARG,
+ &fs_get_mac_addr_arg,
+ &dev->data->mac_addrs[0]);
+ if (ret < 0)
+ goto free_kvlist;
+
+ failsafe_mac_from_arg = 1;
+ }
+ }
+ PRIV(dev)->state = DEV_PARSED;
+free_kvlist:
+ rte_kvargs_free(kvlist);
+ return ret;
+}
+
+void
+failsafe_args_free(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ free(sdev->cmdline);
+ sdev->cmdline = NULL;
+ free(sdev->fd_str);
+ sdev->fd_str = NULL;
+ free(sdev->devargs.args);
+ sdev->devargs.args = NULL;
+ }
+}
+
+static int
+fs_count_device(struct rte_eth_dev *dev, const char *param,
+ uint8_t head __rte_unused)
+{
+ size_t b = 0;
+
+ while (param[b] != '(' &&
+ param[b] != '\0')
+ b++;
+ if (strncmp(param, "dev", b) != 0 &&
+ strncmp(param, "exec", b) != 0 &&
+ strncmp(param, "fd(", b) != 0) {
+ ERROR("Unrecognized device type: %.*s", (int)b, param);
+ return -EINVAL;
+ }
+ PRIV(dev)->subs_tail += 1;
+ return 0;
+}
+
+int
+failsafe_args_count_subdevice(struct rte_eth_dev *dev,
+ const char *params)
+{
+ return fs_parse_sub_devices(fs_count_device,
+ dev, params);
+}
+
+static int
+fs_parse_sub_device(struct sub_device *sdev)
+{
+ struct rte_devargs *da;
+ char devstr[DEVARGS_MAXLEN] = "";
+
+ da = &sdev->devargs;
+ snprintf(devstr, sizeof(devstr), "%s,%s", da->name, da->args);
+ return fs_parse_device(sdev, devstr);
+}
+
+int
+failsafe_args_parse_subs(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret = 0;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state >= DEV_PARSED)
+ continue;
+ if (sdev->cmdline)
+ ret = fs_execute_cmd(sdev, sdev->cmdline);
+ else if (sdev->fd_str)
+ ret = fs_read_fd(sdev, sdev->fd_str);
+ else
+ ret = fs_parse_sub_device(sdev);
+ if (ret == 0)
+ sdev->state = DEV_PARSED;
+ }
+ return 0;
+}
diff --git a/src/seastar/dpdk/drivers/net/failsafe/failsafe_eal.c b/src/seastar/dpdk/drivers/net/failsafe/failsafe_eal.c
new file mode 100644
index 000000000..820a915f7
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/failsafe/failsafe_eal.c
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox Technologies, Ltd
+ */
+
+#include <rte_string_fns.h>
+#include <rte_malloc.h>
+
+#include "failsafe_private.h"
+
+static int
+fs_ethdev_portid_get(const char *name, uint16_t *port_id)
+{
+ uint16_t pid;
+ size_t len;
+
+ if (name == NULL) {
+ DEBUG("Null pointer is specified\n");
+ return -EINVAL;
+ }
+ len = strlen(name);
+ for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++) {
+ if (rte_eth_dev_is_valid_port(pid) &&
+ !strncmp(name, rte_eth_devices[pid].device->name, len)) {
+ *port_id = pid;
+ return 0;
+ }
+ }
+ return -ENODEV;
+}
+
+static int
+fs_bus_init(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ struct rte_devargs *da;
+ uint8_t i;
+ uint16_t pid;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_PARSED)
+ continue;
+ da = &sdev->devargs;
+ if (fs_ethdev_portid_get(da->name, &pid) != 0) {
+ struct rte_eth_dev_owner pid_owner;
+
+ ret = rte_eal_hotplug_add(da->bus->name,
+ da->name,
+ da->args);
+ if (ret) {
+ ERROR("sub_device %d probe failed %s%s%s", i,
+ rte_errno ? "(" : "",
+ rte_errno ? strerror(rte_errno) : "",
+ rte_errno ? ")" : "");
+ continue;
+ }
+ if (fs_ethdev_portid_get(da->name, &pid) != 0) {
+ ERROR("sub_device %d init went wrong", i);
+ return -ENODEV;
+ }
+ /*
+ * The NEW callback tried to take ownership, check
+ * whether it succeed or didn't.
+ */
+ rte_eth_dev_owner_get(pid, &pid_owner);
+ if (pid_owner.id != PRIV(dev)->my_owner.id) {
+ INFO("sub_device %d owner(%s_%016"PRIX64") is not my,"
+ " owner(%s_%016"PRIX64"), will try again later",
+ i, pid_owner.name, pid_owner.id,
+ PRIV(dev)->my_owner.name,
+ PRIV(dev)->my_owner.id);
+ continue;
+ }
+ } else {
+ /* The sub-device port was found. */
+ char devstr[DEVARGS_MAXLEN] = "";
+ struct rte_devargs *probed_da =
+ rte_eth_devices[pid].device->devargs;
+
+ /* Take control of probed device. */
+ free(da->args);
+ memset(da, 0, sizeof(*da));
+ if (probed_da != NULL)
+ snprintf(devstr, sizeof(devstr), "%s,%s",
+ probed_da->name, probed_da->args);
+ else
+ strlcpy(devstr,
+ rte_eth_devices[pid].device->name,
+ sizeof(devstr));
+ ret = rte_devargs_parse(da, devstr);
+ if (ret) {
+ ERROR("Probed devargs parsing failed with code"
+ " %d", ret);
+ return ret;
+ }
+ INFO("Taking control of a probed sub device"
+ " %d named %s", i, da->name);
+ ret = rte_eth_dev_owner_set(pid, &PRIV(dev)->my_owner);
+ if (ret < 0) {
+ INFO("sub_device %d owner set failed (%s), "
+ "will try again later", i, strerror(-ret));
+ continue;
+ } else if (strncmp(rte_eth_devices[pid].device->name,
+ da->name, strlen(da->name)) != 0) {
+ /*
+ * The device probably was removed and its port
+ * id was reallocated before ownership set.
+ */
+ rte_eth_dev_owner_unset(pid,
+ PRIV(dev)->my_owner.id);
+ INFO("sub_device %d was removed before taking"
+ " ownership, will try again later", i);
+ continue;
+ }
+ }
+ sdev->sdev_port_id = pid;
+ SUB_ID(sdev) = i;
+ sdev->fs_port_id = dev->data->port_id;
+ sdev->dev = ETH(sdev)->device;
+ sdev->state = DEV_PROBED;
+ }
+ return 0;
+}
+
+int
+failsafe_eal_init(struct rte_eth_dev *dev)
+{
+ int ret;
+
+ ret = fs_bus_init(dev);
+ if (ret)
+ return ret;
+ if (PRIV(dev)->state < DEV_PROBED)
+ PRIV(dev)->state = DEV_PROBED;
+ fs_switch_dev(dev, NULL);
+ return 0;
+}
+
+static int
+fs_bus_uninit(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev = NULL;
+ uint8_t i;
+ int sdev_ret;
+ int ret = 0;
+
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
+ sdev_ret = rte_dev_remove(sdev->dev);
+ if (sdev_ret) {
+ ERROR("Failed to remove requested device %s (err: %d)",
+ sdev->dev->name, sdev_ret);
+ continue;
+ }
+ sdev->state = DEV_PROBED - 1;
+ }
+ return ret;
+}
+
+int
+failsafe_eal_uninit(struct rte_eth_dev *dev)
+{
+ int ret;
+
+ ret = fs_bus_uninit(dev);
+ PRIV(dev)->state = DEV_PROBED - 1;
+ return ret;
+}
diff --git a/src/seastar/dpdk/drivers/net/failsafe/failsafe_ether.c b/src/seastar/dpdk/drivers/net/failsafe/failsafe_ether.c
new file mode 100644
index 000000000..7ac23d49a
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/failsafe/failsafe_ether.c
@@ -0,0 +1,624 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox Technologies, Ltd
+ */
+
+#include <unistd.h>
+
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_cycles.h>
+
+#include "failsafe_private.h"
+
+/** Print a message out of a flow error. */
+static int
+fs_flow_complain(struct rte_flow_error *error)
+{
+ static const char *const errstrlist[] = {
+ [RTE_FLOW_ERROR_TYPE_NONE] = "no error",
+ [RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified",
+ [RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)",
+ [RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field",
+ [RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field",
+ [RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field",
+ [RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field",
+ [RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure",
+ [RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length",
+ [RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item",
+ [RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions",
+ [RTE_FLOW_ERROR_TYPE_ACTION] = "specific action",
+ };
+ const char *errstr;
+ char buf[32];
+ int err = rte_errno;
+
+ if ((unsigned int)error->type >= RTE_DIM(errstrlist) ||
+ !errstrlist[error->type])
+ errstr = "unknown type";
+ else
+ errstr = errstrlist[error->type];
+ ERROR("Caught error type %d (%s): %s%s\n",
+ error->type, errstr,
+ error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ",
+ error->cause), buf) : "",
+ error->message ? error->message : "(no stated reason)");
+ return -err;
+}
+
+static int
+eth_dev_flow_isolate_set(struct rte_eth_dev *dev,
+ struct sub_device *sdev)
+{
+ struct rte_flow_error ferror;
+ int ret;
+
+ if (!PRIV(dev)->flow_isolated) {
+ DEBUG("Flow isolation already disabled");
+ } else {
+ DEBUG("Enabling flow isolation");
+ ret = rte_flow_isolate(PORT_ID(sdev),
+ PRIV(dev)->flow_isolated,
+ &ferror);
+ if (ret) {
+ fs_flow_complain(&ferror);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int
+fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
+ struct sub_device *sdev)
+{
+ struct rte_eth_dev *edev;
+ struct rte_vlan_filter_conf *vfc1;
+ struct rte_vlan_filter_conf *vfc2;
+ struct rte_flow *flow;
+ struct rte_flow_error ferror;
+ uint32_t i;
+ int ret;
+
+ edev = ETH(sdev);
+ /* RX queue setup */
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ struct rxq *rxq;
+
+ rxq = dev->data->rx_queues[i];
+ ret = rte_eth_rx_queue_setup(PORT_ID(sdev), i,
+ rxq->info.nb_desc, rxq->socket_id,
+ &rxq->info.conf, rxq->info.mp);
+ if (ret) {
+ ERROR("rx_queue_setup failed");
+ return ret;
+ }
+ }
+ /* TX queue setup */
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ struct txq *txq;
+
+ txq = dev->data->tx_queues[i];
+ ret = rte_eth_tx_queue_setup(PORT_ID(sdev), i,
+ txq->info.nb_desc, txq->socket_id,
+ &txq->info.conf);
+ if (ret) {
+ ERROR("tx_queue_setup failed");
+ return ret;
+ }
+ }
+ /* dev_link.link_status */
+ if (dev->data->dev_link.link_status !=
+ edev->data->dev_link.link_status) {
+ DEBUG("Configuring link_status");
+ if (dev->data->dev_link.link_status)
+ ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
+ else
+ ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
+ if (ret) {
+ ERROR("Failed to apply link_status");
+ return ret;
+ }
+ } else {
+ DEBUG("link_status already set");
+ }
+ /* promiscuous */
+ if (dev->data->promiscuous != edev->data->promiscuous) {
+ DEBUG("Configuring promiscuous");
+ if (dev->data->promiscuous)
+ rte_eth_promiscuous_enable(PORT_ID(sdev));
+ else
+ rte_eth_promiscuous_disable(PORT_ID(sdev));
+ } else {
+ DEBUG("promiscuous already set");
+ }
+ /* all_multicast */
+ if (dev->data->all_multicast != edev->data->all_multicast) {
+ DEBUG("Configuring all_multicast");
+ if (dev->data->all_multicast)
+ rte_eth_allmulticast_enable(PORT_ID(sdev));
+ else
+ rte_eth_allmulticast_disable(PORT_ID(sdev));
+ } else {
+ DEBUG("all_multicast already set");
+ }
+ /* MTU */
+ if (dev->data->mtu != edev->data->mtu) {
+ DEBUG("Configuring MTU");
+ ret = rte_eth_dev_set_mtu(PORT_ID(sdev), dev->data->mtu);
+ if (ret) {
+ ERROR("Failed to apply MTU");
+ return ret;
+ }
+ } else {
+ DEBUG("MTU already set");
+ }
+ /* default MAC */
+ DEBUG("Configuring default MAC address");
+ ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
+ &dev->data->mac_addrs[0]);
+ if (ret) {
+ ERROR("Setting default MAC address failed");
+ return ret;
+ }
+ /* additional MAC */
+ if (PRIV(dev)->nb_mac_addr > 1)
+ DEBUG("Configure additional MAC address%s",
+ (PRIV(dev)->nb_mac_addr > 2 ? "es" : ""));
+ for (i = 1; i < PRIV(dev)->nb_mac_addr; i++) {
+ struct ether_addr *ea;
+
+ ea = &dev->data->mac_addrs[i];
+ ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), ea,
+ PRIV(dev)->mac_addr_pool[i]);
+ if (ret) {
+ char ea_fmt[ETHER_ADDR_FMT_SIZE];
+
+ ether_format_addr(ea_fmt, ETHER_ADDR_FMT_SIZE, ea);
+ ERROR("Adding MAC address %s failed", ea_fmt);
+ return ret;
+ }
+ }
+ /*
+ * Propagate multicast MAC addresses to sub-devices,
+ * if non zero number of addresses is set.
+ * The condition is required to avoid breakage of failsafe
+ * for sub-devices which do not support the operation
+ * if the feature is really not used.
+ */
+ if (PRIV(dev)->nb_mcast_addr > 0) {
+ DEBUG("Configuring multicast MAC addresses");
+ ret = rte_eth_dev_set_mc_addr_list(PORT_ID(sdev),
+ PRIV(dev)->mcast_addrs,
+ PRIV(dev)->nb_mcast_addr);
+ if (ret) {
+ ERROR("Failed to apply multicast MAC addresses");
+ return ret;
+ }
+ }
+ /* VLAN filter */
+ vfc1 = &dev->data->vlan_filter_conf;
+ vfc2 = &edev->data->vlan_filter_conf;
+ if (memcmp(vfc1, vfc2, sizeof(struct rte_vlan_filter_conf))) {
+ uint64_t vbit;
+ uint64_t ids;
+ size_t i;
+ uint16_t vlan_id;
+
+ DEBUG("Configuring VLAN filter");
+ for (i = 0; i < RTE_DIM(vfc1->ids); i++) {
+ if (vfc1->ids[i] == 0)
+ continue;
+ ids = vfc1->ids[i];
+ while (ids) {
+ vlan_id = 64 * i;
+ /* count trailing zeroes */
+ vbit = ~ids & (ids - 1);
+ /* clear least significant bit set */
+ ids ^= (ids ^ (ids - 1)) ^ vbit;
+ for (; vbit; vlan_id++)
+ vbit >>= 1;
+ ret = rte_eth_dev_vlan_filter(
+ PORT_ID(sdev), vlan_id, 1);
+ if (ret) {
+ ERROR("Failed to apply VLAN filter %hu",
+ vlan_id);
+ return ret;
+ }
+ }
+ }
+ } else {
+ DEBUG("VLAN filter already set");
+ }
+ /* rte_flow */
+ if (TAILQ_EMPTY(&PRIV(dev)->flow_list)) {
+ DEBUG("rte_flow already set");
+ } else {
+ DEBUG("Resetting rte_flow configuration");
+ ret = rte_flow_flush(PORT_ID(sdev), &ferror);
+ if (ret) {
+ fs_flow_complain(&ferror);
+ return ret;
+ }
+ i = 0;
+ rte_errno = 0;
+ DEBUG("Configuring rte_flow");
+ TAILQ_FOREACH(flow, &PRIV(dev)->flow_list, next) {
+ DEBUG("Creating flow #%" PRIu32, i++);
+ flow->flows[SUB_ID(sdev)] =
+ rte_flow_create(PORT_ID(sdev),
+ flow->rule.attr,
+ flow->rule.pattern,
+ flow->rule.actions,
+ &ferror);
+ ret = rte_errno;
+ if (ret)
+ break;
+ }
+ if (ret) {
+ fs_flow_complain(&ferror);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static void
+fs_dev_remove(struct sub_device *sdev)
+{
+ int ret;
+
+ if (sdev == NULL)
+ return;
+ switch (sdev->state) {
+ case DEV_STARTED:
+ failsafe_rx_intr_uninstall_subdevice(sdev);
+ rte_eth_dev_stop(PORT_ID(sdev));
+ sdev->state = DEV_ACTIVE;
+ /* fallthrough */
+ case DEV_ACTIVE:
+ failsafe_eth_dev_unregister_callbacks(sdev);
+ rte_eth_dev_close(PORT_ID(sdev));
+ sdev->state = DEV_PROBED;
+ /* fallthrough */
+ case DEV_PROBED:
+ ret = rte_dev_remove(sdev->dev);
+ if (ret) {
+ ERROR("Bus detach failed for sub_device %u",
+ SUB_ID(sdev));
+ } else {
+ rte_eth_dev_release_port(ETH(sdev));
+ }
+ sdev->state = DEV_PARSED;
+ /* fallthrough */
+ case DEV_PARSED:
+ case DEV_UNDEFINED:
+ sdev->state = DEV_UNDEFINED;
+ sdev->sdev_port_id = RTE_MAX_ETHPORTS;
+ /* the end */
+ break;
+ }
+ sdev->remove = 0;
+ failsafe_hotplug_alarm_install(fs_dev(sdev));
+}
+
+static void
+fs_dev_stats_save(struct sub_device *sdev)
+{
+ struct rte_eth_stats stats;
+ int err;
+
+ /* Attempt to read current stats. */
+ err = rte_eth_stats_get(PORT_ID(sdev), &stats);
+ if (err) {
+ uint64_t timestamp = sdev->stats_snapshot.timestamp;
+
+ WARN("Could not access latest statistics from sub-device %d.\n",
+ SUB_ID(sdev));
+ if (timestamp != 0)
+ WARN("Using latest snapshot taken before %"PRIu64" seconds.\n",
+ (rte_rdtsc() - timestamp) / rte_get_tsc_hz());
+ }
+ failsafe_stats_increment
+ (&PRIV(fs_dev(sdev))->stats_accumulator,
+ err ? &sdev->stats_snapshot.stats : &stats);
+ memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot));
+}
+
+static inline int
+fs_rxtx_clean(struct sub_device *sdev)
+{
+ uint16_t i;
+
+ for (i = 0; i < ETH(sdev)->data->nb_rx_queues; i++)
+ if (FS_ATOMIC_RX(sdev, i))
+ return 0;
+ for (i = 0; i < ETH(sdev)->data->nb_tx_queues; i++)
+ if (FS_ATOMIC_TX(sdev, i))
+ return 0;
+ return 1;
+}
+
+void
+failsafe_eth_dev_unregister_callbacks(struct sub_device *sdev)
+{
+ int ret;
+
+ if (sdev == NULL)
+ return;
+ if (sdev->rmv_callback) {
+ ret = rte_eth_dev_callback_unregister(PORT_ID(sdev),
+ RTE_ETH_EVENT_INTR_RMV,
+ failsafe_eth_rmv_event_callback,
+ sdev);
+ if (ret)
+ WARN("Failed to unregister RMV callback for sub_device"
+ " %d", SUB_ID(sdev));
+ sdev->rmv_callback = 0;
+ }
+ if (sdev->lsc_callback) {
+ ret = rte_eth_dev_callback_unregister(PORT_ID(sdev),
+ RTE_ETH_EVENT_INTR_LSC,
+ failsafe_eth_lsc_event_callback,
+ sdev);
+ if (ret)
+ WARN("Failed to unregister LSC callback for sub_device"
+ " %d", SUB_ID(sdev));
+ sdev->lsc_callback = 0;
+ }
+}
+
+void
+failsafe_dev_remove(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+ if (sdev->remove && fs_rxtx_clean(sdev)) {
+ if (fs_lock(dev, 1) != 0)
+ return;
+ fs_dev_stats_save(sdev);
+ fs_dev_remove(sdev);
+ fs_unlock(dev, 1);
+ }
+}
+
+static int
+failsafe_eth_dev_rx_queues_sync(struct rte_eth_dev *dev)
+{
+ struct rxq *rxq;
+ int ret;
+ uint16_t i;
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ rxq = dev->data->rx_queues[i];
+
+ if (rxq->info.conf.rx_deferred_start &&
+ dev->data->rx_queue_state[i] ==
+ RTE_ETH_QUEUE_STATE_STARTED) {
+ /*
+ * The subdevice Rx queue does not launch on device
+ * start if deferred start flag is set. It needs to be
+ * started manually in case an appropriate failsafe Rx
+ * queue has been started earlier.
+ */
+ ret = dev->dev_ops->rx_queue_start(dev, i);
+ if (ret) {
+ ERROR("Could not synchronize Rx queue %d", i);
+ return ret;
+ }
+ } else if (dev->data->rx_queue_state[i] ==
+ RTE_ETH_QUEUE_STATE_STOPPED) {
+ /*
+ * The subdevice Rx queue needs to be stopped manually
+ * in case an appropriate failsafe Rx queue has been
+ * stopped earlier.
+ */
+ ret = dev->dev_ops->rx_queue_stop(dev, i);
+ if (ret) {
+ ERROR("Could not synchronize Rx queue %d", i);
+ return ret;
+ }
+ }
+ }
+ return 0;
+}
+
+static int
+failsafe_eth_dev_tx_queues_sync(struct rte_eth_dev *dev)
+{
+ struct txq *txq;
+ int ret;
+ uint16_t i;
+
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ txq = dev->data->tx_queues[i];
+
+ if (txq->info.conf.tx_deferred_start &&
+ dev->data->tx_queue_state[i] ==
+ RTE_ETH_QUEUE_STATE_STARTED) {
+ /*
+ * The subdevice Tx queue does not launch on device
+ * start if deferred start flag is set. It needs to be
+ * started manually in case an appropriate failsafe Tx
+ * queue has been started earlier.
+ */
+ ret = dev->dev_ops->tx_queue_start(dev, i);
+ if (ret) {
+ ERROR("Could not synchronize Tx queue %d", i);
+ return ret;
+ }
+ } else if (dev->data->tx_queue_state[i] ==
+ RTE_ETH_QUEUE_STATE_STOPPED) {
+ /*
+ * The subdevice Tx queue needs to be stopped manually
+ * in case an appropriate failsafe Tx queue has been
+ * stopped earlier.
+ */
+ ret = dev->dev_ops->tx_queue_stop(dev, i);
+ if (ret) {
+ ERROR("Could not synchronize Tx queue %d", i);
+ return ret;
+ }
+ }
+ }
+ return 0;
+}
+
+int
+failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint32_t inactive;
+ int ret;
+ uint8_t i;
+
+ if (PRIV(dev)->state < DEV_PARSED)
+ return 0;
+
+ ret = failsafe_args_parse_subs(dev);
+ if (ret)
+ goto err_remove;
+
+ if (PRIV(dev)->state < DEV_PROBED)
+ return 0;
+ ret = failsafe_eal_init(dev);
+ if (ret)
+ goto err_remove;
+ if (PRIV(dev)->state < DEV_ACTIVE)
+ return 0;
+ inactive = 0;
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state == DEV_PROBED) {
+ inactive |= UINT32_C(1) << i;
+ ret = eth_dev_flow_isolate_set(dev, sdev);
+ if (ret) {
+ ERROR("Could not apply configuration to sub_device %d",
+ i);
+ goto err_remove;
+ }
+ }
+ }
+ ret = dev->dev_ops->dev_configure(dev);
+ if (ret)
+ goto err_remove;
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (inactive & (UINT32_C(1) << i)) {
+ ret = fs_eth_dev_conf_apply(dev, sdev);
+ if (ret) {
+ ERROR("Could not apply configuration to sub_device %d",
+ i);
+ goto err_remove;
+ }
+ }
+ }
+ /*
+ * If new devices have been configured, check if
+ * the link state has changed.
+ */
+ if (inactive)
+ dev->dev_ops->link_update(dev, 1);
+ if (PRIV(dev)->state < DEV_STARTED)
+ return 0;
+ ret = dev->dev_ops->dev_start(dev);
+ if (ret)
+ goto err_remove;
+ ret = failsafe_eth_dev_rx_queues_sync(dev);
+ if (ret)
+ goto err_remove;
+ ret = failsafe_eth_dev_tx_queues_sync(dev);
+ if (ret)
+ goto err_remove;
+ return 0;
+err_remove:
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state != PRIV(dev)->state)
+ sdev->remove = 1;
+ return ret;
+}
+
+void
+failsafe_stats_increment(struct rte_eth_stats *to, struct rte_eth_stats *from)
+{
+ uint32_t i;
+
+ RTE_ASSERT(to != NULL && from != NULL);
+ to->ipackets += from->ipackets;
+ to->opackets += from->opackets;
+ to->ibytes += from->ibytes;
+ to->obytes += from->obytes;
+ to->imissed += from->imissed;
+ to->ierrors += from->ierrors;
+ to->oerrors += from->oerrors;
+ to->rx_nombuf += from->rx_nombuf;
+ for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS; i++) {
+ to->q_ipackets[i] += from->q_ipackets[i];
+ to->q_opackets[i] += from->q_opackets[i];
+ to->q_ibytes[i] += from->q_ibytes[i];
+ to->q_obytes[i] += from->q_obytes[i];
+ to->q_errors[i] += from->q_errors[i];
+ }
+}
+
+int
+failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused,
+ enum rte_eth_event_type event __rte_unused,
+ void *cb_arg, void *out __rte_unused)
+{
+ struct sub_device *sdev = cb_arg;
+
+ fs_lock(fs_dev(sdev), 0);
+ /* Switch as soon as possible tx_dev. */
+ fs_switch_dev(fs_dev(sdev), sdev);
+ /* Use safe bursts in any case. */
+ failsafe_set_burst_fn(fs_dev(sdev), 1);
+ /*
+ * Async removal, the sub-PMD will try to unregister
+ * the callback at the source of the current thread context.
+ */
+ sdev->remove = 1;
+ fs_unlock(fs_dev(sdev), 0);
+ return 0;
+}
+
+int
+failsafe_eth_lsc_event_callback(uint16_t port_id __rte_unused,
+ enum rte_eth_event_type event __rte_unused,
+ void *cb_arg, void *out __rte_unused)
+{
+ struct rte_eth_dev *dev = cb_arg;
+ int ret;
+
+ ret = dev->dev_ops->link_update(dev, 0);
+ /* We must pass on the LSC event */
+ if (ret)
+ return _rte_eth_dev_callback_process(dev,
+ RTE_ETH_EVENT_INTR_LSC,
+ NULL);
+ else
+ return 0;
+}
+
+/* Take sub-device ownership before it becomes exposed to the application. */
+int
+failsafe_eth_new_event_callback(uint16_t port_id,
+ enum rte_eth_event_type event __rte_unused,
+ void *cb_arg, void *out __rte_unused)
+{
+ struct rte_eth_dev *fs_dev = cb_arg;
+ struct sub_device *sdev;
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ uint8_t i;
+
+ FOREACH_SUBDEV_STATE(sdev, i, fs_dev, DEV_PARSED) {
+ if (sdev->state >= DEV_PROBED)
+ continue;
+ if (strcmp(sdev->devargs.name, dev->device->name) != 0)
+ continue;
+ rte_eth_dev_owner_set(port_id, &PRIV(fs_dev)->my_owner);
+ /* The actual owner will be checked after the port probing. */
+ break;
+ }
+ return 0;
+}
diff --git a/src/seastar/dpdk/drivers/net/failsafe/failsafe_flow.c b/src/seastar/dpdk/drivers/net/failsafe/failsafe_flow.c
new file mode 100644
index 000000000..5e2b5f7c6
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/failsafe/failsafe_flow.c
@@ -0,0 +1,255 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox Technologies, Ltd
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_tailq.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+
+#include "failsafe_private.h"
+
+static struct rte_flow *
+fs_flow_allocate(const struct rte_flow_attr *attr,
+ const struct rte_flow_item *items,
+ const struct rte_flow_action *actions)
+{
+ struct rte_flow *flow;
+ const struct rte_flow_conv_rule rule = {
+ .attr_ro = attr,
+ .pattern_ro = items,
+ .actions_ro = actions,
+ };
+ struct rte_flow_error error;
+ int ret;
+
+ ret = rte_flow_conv(RTE_FLOW_CONV_OP_RULE, NULL, 0, &rule, &error);
+ if (ret < 0) {
+ ERROR("Unable to process flow rule (%s): %s",
+ error.message ? error.message : "unspecified",
+ strerror(rte_errno));
+ return NULL;
+ }
+ flow = rte_zmalloc(NULL, offsetof(struct rte_flow, rule) + ret,
+ RTE_CACHE_LINE_SIZE);
+ if (flow == NULL) {
+ ERROR("Could not allocate new flow");
+ return NULL;
+ }
+ ret = rte_flow_conv(RTE_FLOW_CONV_OP_RULE, &flow->rule, ret, &rule,
+ &error);
+ if (ret < 0) {
+ ERROR("Failed to copy flow rule (%s): %s",
+ error.message ? error.message : "unspecified",
+ strerror(rte_errno));
+ rte_free(flow);
+ return NULL;
+ }
+ return flow;
+}
+
+static void
+fs_flow_release(struct rte_flow **flow)
+{
+ rte_free(*flow);
+ *flow = NULL;
+}
+
+static int
+fs_flow_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item patterns[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_flow_validate on sub_device %d", i);
+ ret = rte_flow_validate(PORT_ID(sdev),
+ attr, patterns, actions, error);
+ if ((ret = fs_err(sdev, ret))) {
+ ERROR("Operation rte_flow_validate failed for sub_device %d"
+ " with error %d", i, ret);
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ }
+ fs_unlock(dev, 0);
+ return 0;
+}
+
+static struct rte_flow *
+fs_flow_create(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item patterns[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ struct rte_flow *flow;
+ uint8_t i;
+
+ fs_lock(dev, 0);
+ flow = fs_flow_allocate(attr, patterns, actions);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ flow->flows[i] = rte_flow_create(PORT_ID(sdev),
+ attr, patterns, actions, error);
+ if (flow->flows[i] == NULL && fs_err(sdev, -rte_errno)) {
+ ERROR("Failed to create flow on sub_device %d",
+ i);
+ goto err;
+ }
+ }
+ TAILQ_INSERT_TAIL(&PRIV(dev)->flow_list, flow, next);
+ fs_unlock(dev, 0);
+ return flow;
+err:
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (flow->flows[i] != NULL)
+ rte_flow_destroy(PORT_ID(sdev),
+ flow->flows[i], error);
+ }
+ fs_flow_release(&flow);
+ fs_unlock(dev, 0);
+ return NULL;
+}
+
+static int
+fs_flow_destroy(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ if (flow == NULL) {
+ ERROR("Invalid flow");
+ return -EINVAL;
+ }
+ ret = 0;
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ int local_ret;
+
+ if (flow->flows[i] == NULL)
+ continue;
+ local_ret = rte_flow_destroy(PORT_ID(sdev),
+ flow->flows[i], error);
+ if ((local_ret = fs_err(sdev, local_ret))) {
+ ERROR("Failed to destroy flow on sub_device %d: %d",
+ i, local_ret);
+ if (ret == 0)
+ ret = local_ret;
+ }
+ }
+ TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
+ fs_flow_release(&flow);
+ fs_unlock(dev, 0);
+ return ret;
+}
+
+static int
+fs_flow_flush(struct rte_eth_dev *dev,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ struct rte_flow *flow;
+ void *tmp;
+ uint8_t i;
+ int ret;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_flow_flush on sub_device %d", i);
+ ret = rte_flow_flush(PORT_ID(sdev), error);
+ if ((ret = fs_err(sdev, ret))) {
+ ERROR("Operation rte_flow_flush failed for sub_device %d"
+ " with error %d", i, ret);
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ }
+ TAILQ_FOREACH_SAFE(flow, &PRIV(dev)->flow_list, next, tmp) {
+ TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
+ fs_flow_release(&flow);
+ }
+ fs_unlock(dev, 0);
+ return 0;
+}
+
+static int
+fs_flow_query(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_action *action,
+ void *arg,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+
+ fs_lock(dev, 0);
+ sdev = TX_SUBDEV(dev);
+ if (sdev != NULL) {
+ int ret = rte_flow_query(PORT_ID(sdev),
+ flow->flows[SUB_ID(sdev)],
+ action, arg, error);
+
+ if ((ret = fs_err(sdev, ret))) {
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ }
+ fs_unlock(dev, 0);
+ WARN("No active sub_device to query about its flow");
+ return -1;
+}
+
+static int
+fs_flow_isolate(struct rte_eth_dev *dev,
+ int set,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state < DEV_PROBED)
+ continue;
+ DEBUG("Calling rte_flow_isolate on sub_device %d", i);
+ if (PRIV(dev)->flow_isolated != sdev->flow_isolated)
+ WARN("flow isolation mode of sub_device %d in incoherent state.",
+ i);
+ ret = rte_flow_isolate(PORT_ID(sdev), set, error);
+ if ((ret = fs_err(sdev, ret))) {
+ ERROR("Operation rte_flow_isolate failed for sub_device %d"
+ " with error %d", i, ret);
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ sdev->flow_isolated = set;
+ }
+ PRIV(dev)->flow_isolated = set;
+ fs_unlock(dev, 0);
+ return 0;
+}
+
+const struct rte_flow_ops fs_flow_ops = {
+ .validate = fs_flow_validate,
+ .create = fs_flow_create,
+ .destroy = fs_flow_destroy,
+ .flush = fs_flow_flush,
+ .query = fs_flow_query,
+ .isolate = fs_flow_isolate,
+};
diff --git a/src/seastar/dpdk/drivers/net/failsafe/failsafe_intr.c b/src/seastar/dpdk/drivers/net/failsafe/failsafe_intr.c
new file mode 100644
index 000000000..0f34c5bba
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/failsafe/failsafe_intr.c
@@ -0,0 +1,536 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+/**
+ * @file
+ * Interrupts handling for failsafe driver.
+ */
+
+#if defined(LINUX)
+#include <sys/epoll.h>
+#endif
+#include <unistd.h>
+
+#include <rte_alarm.h>
+#include <rte_config.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_interrupts.h>
+#include <rte_io.h>
+#include <rte_service_component.h>
+
+#include "failsafe_private.h"
+
+#define NUM_RX_PROXIES (FAILSAFE_MAX_ETHPORTS * RTE_MAX_RXTX_INTR_VEC_ID)
+
+
+/**
+ * Open an epoll file descriptor.
+ *
+ * @param flags
+ * Flags for defining epoll behavior.
+ * @return
+ * 0 on success, negative errno value otherwise.
+ */
+static int
+fs_epoll_create1(int flags)
+{
+#if defined(LINUX)
+ return epoll_create1(flags);
+#elif defined(BSD)
+ RTE_SET_USED(flags);
+ return -ENOTSUP;
+#endif
+}
+
+/**
+ * Install failsafe Rx event proxy service.
+ * The Rx event proxy is the service that listens to Rx events from the
+ * subdevices and triggers failsafe Rx events accordingly.
+ *
+ * @param priv
+ * Pointer to failsafe private structure.
+ * @return
+ * 0 on success, negative errno value otherwise.
+ */
+static int
+fs_rx_event_proxy_routine(void *data)
+{
+ struct fs_priv *priv;
+ struct rxq *rxq;
+ struct rte_epoll_event *events;
+ uint64_t u64;
+ int i, n;
+ int rc = 0;
+
+ u64 = 1;
+ priv = data;
+ events = priv->rxp.evec;
+ n = rte_epoll_wait(priv->rxp.efd, events, NUM_RX_PROXIES, -1);
+ for (i = 0; i < n; i++) {
+ rxq = events[i].epdata.data;
+ if (rxq->enable_events && rxq->event_fd != -1) {
+ if (write(rxq->event_fd, &u64, sizeof(u64)) !=
+ sizeof(u64)) {
+ ERROR("Failed to proxy Rx event to socket %d",
+ rxq->event_fd);
+ rc = -EIO;
+ }
+ }
+ }
+ return rc;
+}
+
+/**
+ * Uninstall failsafe Rx event proxy service.
+ *
+ * @param priv
+ * Pointer to failsafe private structure.
+ */
+static void
+fs_rx_event_proxy_service_uninstall(struct fs_priv *priv)
+{
+ /* Unregister the event service. */
+ switch (priv->rxp.sstate) {
+ case SS_RUNNING:
+ rte_service_map_lcore_set(priv->rxp.sid, priv->rxp.scid, 0);
+ /* fall through */
+ case SS_READY:
+ rte_service_runstate_set(priv->rxp.sid, 0);
+ rte_service_set_stats_enable(priv->rxp.sid, 0);
+ rte_service_component_runstate_set(priv->rxp.sid, 0);
+ /* fall through */
+ case SS_REGISTERED:
+ rte_service_component_unregister(priv->rxp.sid);
+ /* fall through */
+ default:
+ break;
+ }
+}
+
+/**
+ * Install the failsafe Rx event proxy service.
+ *
+ * @param priv
+ * Pointer to failsafe private structure.
+ * @return
+ * 0 on success, negative errno value otherwise.
+ */
+static int
+fs_rx_event_proxy_service_install(struct fs_priv *priv)
+{
+ struct rte_service_spec service;
+ int32_t num_service_cores;
+ int ret = 0;
+
+ num_service_cores = rte_service_lcore_count();
+ if (num_service_cores <= 0) {
+ ERROR("Failed to install Rx interrupts, "
+ "no service core found");
+ return -ENOTSUP;
+ }
+ /* prepare service info */
+ memset(&service, 0, sizeof(struct rte_service_spec));
+ snprintf(service.name, sizeof(service.name), "%s_Rx_service",
+ priv->data->name);
+ service.socket_id = priv->data->numa_node;
+ service.callback = fs_rx_event_proxy_routine;
+ service.callback_userdata = priv;
+
+ if (priv->rxp.sstate == SS_NO_SERVICE) {
+ uint32_t service_core_list[num_service_cores];
+
+ /* get a service core to work with */
+ ret = rte_service_lcore_list(service_core_list,
+ num_service_cores);
+ if (ret <= 0) {
+ ERROR("Failed to install Rx interrupts, "
+ "service core list empty or corrupted");
+ return -ENOTSUP;
+ }
+ priv->rxp.scid = service_core_list[0];
+ ret = rte_service_lcore_add(priv->rxp.scid);
+ if (ret && ret != -EALREADY) {
+ ERROR("Failed adding service core");
+ return ret;
+ }
+ /* service core may be in "stopped" state, start it */
+ ret = rte_service_lcore_start(priv->rxp.scid);
+ if (ret && (ret != -EALREADY)) {
+ ERROR("Failed to install Rx interrupts, "
+ "service core not started");
+ return ret;
+ }
+ /* register our service */
+ int32_t ret = rte_service_component_register(&service,
+ &priv->rxp.sid);
+ if (ret) {
+ ERROR("service register() failed");
+ return -ENOEXEC;
+ }
+ priv->rxp.sstate = SS_REGISTERED;
+ /* run the service */
+ ret = rte_service_component_runstate_set(priv->rxp.sid, 1);
+ if (ret < 0) {
+ ERROR("Failed Setting component runstate\n");
+ return ret;
+ }
+ ret = rte_service_set_stats_enable(priv->rxp.sid, 1);
+ if (ret < 0) {
+ ERROR("Failed enabling stats\n");
+ return ret;
+ }
+ ret = rte_service_runstate_set(priv->rxp.sid, 1);
+ if (ret < 0) {
+ ERROR("Failed to run service\n");
+ return ret;
+ }
+ priv->rxp.sstate = SS_READY;
+ /* map the service with the service core */
+ ret = rte_service_map_lcore_set(priv->rxp.sid,
+ priv->rxp.scid, 1);
+ if (ret) {
+ ERROR("Failed to install Rx interrupts, "
+ "could not map service core");
+ return ret;
+ }
+ priv->rxp.sstate = SS_RUNNING;
+ }
+ return 0;
+}
+
+/**
+ * Install failsafe Rx event proxy subsystem.
+ * This is the way the failsafe PMD generates Rx events on behalf of its
+ * subdevices.
+ *
+ * @param priv
+ * Pointer to failsafe private structure.
+ * @return
+ * 0 on success, negative errno value otherwise and rte_errno is set.
+ */
+static int
+fs_rx_event_proxy_install(struct fs_priv *priv)
+{
+ int rc = 0;
+
+ /*
+ * Create the epoll fd and event vector for the proxy service to
+ * wait on for Rx events generated by the subdevices.
+ */
+ priv->rxp.efd = fs_epoll_create1(0);
+ if (priv->rxp.efd < 0) {
+ rte_errno = errno;
+ ERROR("Failed to create epoll,"
+ " Rx interrupts will not be supported");
+ return -rte_errno;
+ }
+ priv->rxp.evec = calloc(NUM_RX_PROXIES, sizeof(*priv->rxp.evec));
+ if (priv->rxp.evec == NULL) {
+ ERROR("Failed to allocate memory for event vectors,"
+ " Rx interrupts will not be supported");
+ rc = -ENOMEM;
+ goto error;
+ }
+ rc = fs_rx_event_proxy_service_install(priv);
+ if (rc < 0)
+ goto error;
+ return 0;
+error:
+ if (priv->rxp.efd >= 0) {
+ close(priv->rxp.efd);
+ priv->rxp.efd = -1;
+ }
+ if (priv->rxp.evec != NULL) {
+ free(priv->rxp.evec);
+ priv->rxp.evec = NULL;
+ }
+ rte_errno = -rc;
+ return rc;
+}
+
+/**
+ * RX Interrupt control per subdevice.
+ *
+ * @param sdev
+ * Pointer to sub-device structure.
+ * @param op
+ * The operation be performed for the vector.
+ * Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}.
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+static int
+failsafe_eth_rx_intr_ctl_subdevice(struct sub_device *sdev, int op)
+{
+ struct rte_eth_dev *dev;
+ struct rte_eth_dev *fsdev;
+ int epfd;
+ uint16_t pid;
+ uint16_t qid;
+ struct rxq *fsrxq;
+ int rc;
+ int ret = 0;
+
+ fsdev = fs_dev(sdev);
+ if (sdev == NULL || (ETH(sdev) == NULL) ||
+ fsdev == NULL || (PRIV(fsdev) == NULL)) {
+ ERROR("Called with invalid arguments");
+ return -EINVAL;
+ }
+ dev = ETH(sdev);
+ epfd = PRIV(fsdev)->rxp.efd;
+ pid = PORT_ID(sdev);
+
+ if (epfd <= 0) {
+ if (op == RTE_INTR_EVENT_ADD) {
+ ERROR("Proxy events are not initialized");
+ return -EBADF;
+ } else {
+ return 0;
+ }
+ }
+ if (dev->data->nb_rx_queues > fsdev->data->nb_rx_queues) {
+ ERROR("subdevice has too many queues,"
+ " Interrupts will not be enabled");
+ return -E2BIG;
+ }
+ for (qid = 0; qid < dev->data->nb_rx_queues; qid++) {
+ fsrxq = fsdev->data->rx_queues[qid];
+ rc = rte_eth_dev_rx_intr_ctl_q(pid, qid, epfd,
+ op, (void *)fsrxq);
+ if (rc) {
+ ERROR("rte_eth_dev_rx_intr_ctl_q failed for "
+ "port %d queue %d, epfd %d, error %d",
+ pid, qid, epfd, rc);
+ ret = rc;
+ }
+ }
+ return ret;
+}
+
+/**
+ * Install Rx interrupts subsystem for a subdevice.
+ * This is a support for dynamically adding subdevices.
+ *
+ * @param sdev
+ * Pointer to subdevice structure.
+ *
+ * @return
+ * 0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int failsafe_rx_intr_install_subdevice(struct sub_device *sdev)
+{
+ int rc;
+ int qid;
+ struct rte_eth_dev *fsdev;
+ struct rxq **rxq;
+ const struct rte_intr_conf *const intr_conf =
+ &ETH(sdev)->data->dev_conf.intr_conf;
+
+ fsdev = fs_dev(sdev);
+ rxq = (struct rxq **)fsdev->data->rx_queues;
+ if (intr_conf->rxq == 0)
+ return 0;
+ rc = failsafe_eth_rx_intr_ctl_subdevice(sdev, RTE_INTR_EVENT_ADD);
+ if (rc)
+ return rc;
+ /* enable interrupts on already-enabled queues */
+ for (qid = 0; qid < ETH(sdev)->data->nb_rx_queues; qid++) {
+ if (rxq[qid]->enable_events) {
+ int ret = rte_eth_dev_rx_intr_enable(PORT_ID(sdev),
+ qid);
+ if (ret && (ret != -ENOTSUP)) {
+ ERROR("Failed to enable interrupts on "
+ "port %d queue %d", PORT_ID(sdev), qid);
+ rc = ret;
+ }
+ }
+ }
+ return rc;
+}
+
+/**
+ * Uninstall Rx interrupts subsystem for a subdevice.
+ * This is a support for dynamically removing subdevices.
+ *
+ * @param sdev
+ * Pointer to subdevice structure.
+ *
+ * @return
+ * 0 on success, negative errno value otherwise and rte_errno is set.
+ */
+void failsafe_rx_intr_uninstall_subdevice(struct sub_device *sdev)
+{
+ int qid;
+ struct rte_eth_dev *fsdev;
+ struct rxq *fsrxq;
+
+ fsdev = fs_dev(sdev);
+ for (qid = 0; qid < ETH(sdev)->data->nb_rx_queues; qid++) {
+ if (qid < fsdev->data->nb_rx_queues) {
+ fsrxq = fsdev->data->rx_queues[qid];
+ if (fsrxq != NULL && fsrxq->enable_events)
+ rte_eth_dev_rx_intr_disable(PORT_ID(sdev),
+ qid);
+ }
+ }
+ failsafe_eth_rx_intr_ctl_subdevice(sdev, RTE_INTR_EVENT_DEL);
+}
+
+/**
+ * Uninstall failsafe Rx event proxy.
+ *
+ * @param priv
+ * Pointer to failsafe private structure.
+ */
+static void
+fs_rx_event_proxy_uninstall(struct fs_priv *priv)
+{
+ fs_rx_event_proxy_service_uninstall(priv);
+ if (priv->rxp.evec != NULL) {
+ free(priv->rxp.evec);
+ priv->rxp.evec = NULL;
+ }
+ if (priv->rxp.efd > 0) {
+ close(priv->rxp.efd);
+ priv->rxp.efd = -1;
+ }
+}
+
+/**
+ * Uninstall failsafe interrupt vector.
+ *
+ * @param priv
+ * Pointer to failsafe private structure.
+ */
+static void
+fs_rx_intr_vec_uninstall(struct fs_priv *priv)
+{
+ struct rte_intr_handle *intr_handle;
+
+ intr_handle = &priv->intr_handle;
+ if (intr_handle->intr_vec != NULL) {
+ free(intr_handle->intr_vec);
+ intr_handle->intr_vec = NULL;
+ }
+ intr_handle->nb_efd = 0;
+}
+
+/**
+ * Installs failsafe interrupt vector to be registered with EAL later on.
+ *
+ * @param priv
+ * Pointer to failsafe private structure.
+ *
+ * @return
+ * 0 on success, negative errno value otherwise and rte_errno is set.
+ */
+static int
+fs_rx_intr_vec_install(struct fs_priv *priv)
+{
+ unsigned int i;
+ unsigned int rxqs_n;
+ unsigned int n;
+ unsigned int count;
+ struct rte_intr_handle *intr_handle;
+
+ rxqs_n = priv->data->nb_rx_queues;
+ n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
+ count = 0;
+ intr_handle = &priv->intr_handle;
+ RTE_ASSERT(intr_handle->intr_vec == NULL);
+ /* Allocate the interrupt vector of the failsafe Rx proxy interrupts */
+ intr_handle->intr_vec = malloc(n * sizeof(intr_handle->intr_vec[0]));
+ if (intr_handle->intr_vec == NULL) {
+ fs_rx_intr_vec_uninstall(priv);
+ rte_errno = ENOMEM;
+ ERROR("Failed to allocate memory for interrupt vector,"
+ " Rx interrupts will not be supported");
+ return -rte_errno;
+ }
+ for (i = 0; i < n; i++) {
+ struct rxq *rxq = priv->data->rx_queues[i];
+
+ /* Skip queues that cannot request interrupts. */
+ if (rxq == NULL || rxq->event_fd < 0) {
+ /* Use invalid intr_vec[] index to disable entry. */
+ intr_handle->intr_vec[i] =
+ RTE_INTR_VEC_RXTX_OFFSET +
+ RTE_MAX_RXTX_INTR_VEC_ID;
+ continue;
+ }
+ if (count >= RTE_MAX_RXTX_INTR_VEC_ID) {
+ rte_errno = E2BIG;
+ ERROR("Too many Rx queues for interrupt vector size"
+ " (%d), Rx interrupts cannot be enabled",
+ RTE_MAX_RXTX_INTR_VEC_ID);
+ fs_rx_intr_vec_uninstall(priv);
+ return -rte_errno;
+ }
+ intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count;
+ intr_handle->efds[count] = rxq->event_fd;
+ count++;
+ }
+ if (count == 0) {
+ fs_rx_intr_vec_uninstall(priv);
+ } else {
+ intr_handle->nb_efd = count;
+ intr_handle->efd_counter_size = sizeof(uint64_t);
+ }
+ return 0;
+}
+
+
+/**
+ * Uninstall failsafe Rx interrupts subsystem.
+ *
+ * @param priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 0 on success, negative errno value otherwise and rte_errno is set.
+ */
+void
+failsafe_rx_intr_uninstall(struct rte_eth_dev *dev)
+{
+ struct fs_priv *priv;
+ struct rte_intr_handle *intr_handle;
+
+ priv = PRIV(dev);
+ intr_handle = &priv->intr_handle;
+ rte_intr_free_epoll_fd(intr_handle);
+ fs_rx_event_proxy_uninstall(priv);
+ fs_rx_intr_vec_uninstall(priv);
+ dev->intr_handle = NULL;
+}
+
+/**
+ * Install failsafe Rx interrupts subsystem.
+ *
+ * @param priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+failsafe_rx_intr_install(struct rte_eth_dev *dev)
+{
+ struct fs_priv *priv = PRIV(dev);
+ const struct rte_intr_conf *const intr_conf =
+ &priv->data->dev_conf.intr_conf;
+
+ if (intr_conf->rxq == 0 || dev->intr_handle != NULL)
+ return 0;
+ if (fs_rx_intr_vec_install(priv) < 0)
+ return -rte_errno;
+ if (fs_rx_event_proxy_install(priv) < 0) {
+ fs_rx_intr_vec_uninstall(priv);
+ return -rte_errno;
+ }
+ dev->intr_handle = &priv->intr_handle;
+ return 0;
+}
diff --git a/src/seastar/dpdk/drivers/net/failsafe/failsafe_ops.c b/src/seastar/dpdk/drivers/net/failsafe/failsafe_ops.c
new file mode 100644
index 000000000..43d6a828f
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/failsafe/failsafe_ops.c
@@ -0,0 +1,1260 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox Technologies, Ltd
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include <rte_debug.h>
+#include <rte_atomic.h>
+#include <rte_ethdev_driver.h>
+#include <rte_malloc.h>
+#include <rte_flow.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+
+#include "failsafe_private.h"
+
+static struct rte_eth_dev_info default_infos = {
+ /* Max possible number of elements */
+ .max_rx_pktlen = UINT32_MAX,
+ .max_rx_queues = RTE_MAX_QUEUES_PER_PORT,
+ .max_tx_queues = RTE_MAX_QUEUES_PER_PORT,
+ .max_mac_addrs = FAILSAFE_MAX_ETHADDR,
+ .max_hash_mac_addrs = UINT32_MAX,
+ .max_vfs = UINT16_MAX,
+ .max_vmdq_pools = UINT16_MAX,
+ .rx_desc_lim = {
+ .nb_max = UINT16_MAX,
+ .nb_min = 0,
+ .nb_align = 1,
+ .nb_seg_max = UINT16_MAX,
+ .nb_mtu_seg_max = UINT16_MAX,
+ },
+ .tx_desc_lim = {
+ .nb_max = UINT16_MAX,
+ .nb_min = 0,
+ .nb_align = 1,
+ .nb_seg_max = UINT16_MAX,
+ .nb_mtu_seg_max = UINT16_MAX,
+ },
+ /*
+ * Set of capabilities that can be verified upon
+ * configuring a sub-device.
+ */
+ .rx_offload_capa =
+ DEV_RX_OFFLOAD_VLAN_STRIP |
+ DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_LRO |
+ DEV_RX_OFFLOAD_QINQ_STRIP |
+ DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_MACSEC_STRIP |
+ DEV_RX_OFFLOAD_HEADER_SPLIT |
+ DEV_RX_OFFLOAD_VLAN_FILTER |
+ DEV_RX_OFFLOAD_VLAN_EXTEND |
+ DEV_RX_OFFLOAD_JUMBO_FRAME |
+ DEV_RX_OFFLOAD_SCATTER |
+ DEV_RX_OFFLOAD_TIMESTAMP |
+ DEV_RX_OFFLOAD_SECURITY,
+ .rx_queue_offload_capa =
+ DEV_RX_OFFLOAD_VLAN_STRIP |
+ DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_LRO |
+ DEV_RX_OFFLOAD_QINQ_STRIP |
+ DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_MACSEC_STRIP |
+ DEV_RX_OFFLOAD_HEADER_SPLIT |
+ DEV_RX_OFFLOAD_VLAN_FILTER |
+ DEV_RX_OFFLOAD_VLAN_EXTEND |
+ DEV_RX_OFFLOAD_JUMBO_FRAME |
+ DEV_RX_OFFLOAD_SCATTER |
+ DEV_RX_OFFLOAD_TIMESTAMP |
+ DEV_RX_OFFLOAD_SECURITY,
+ .tx_offload_capa =
+ DEV_TX_OFFLOAD_MULTI_SEGS |
+ DEV_TX_OFFLOAD_MBUF_FAST_FREE |
+ DEV_TX_OFFLOAD_IPV4_CKSUM |
+ DEV_TX_OFFLOAD_UDP_CKSUM |
+ DEV_TX_OFFLOAD_TCP_CKSUM |
+ DEV_TX_OFFLOAD_TCP_TSO,
+ .flow_type_rss_offloads =
+ ETH_RSS_IP |
+ ETH_RSS_UDP |
+ ETH_RSS_TCP,
+ .dev_capa =
+ RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP |
+ RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP,
+};
+
+static int
+fs_dev_configure(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV(sdev, i, dev) {
+ int rmv_interrupt = 0;
+ int lsc_interrupt = 0;
+ int lsc_enabled;
+
+ if (sdev->state != DEV_PROBED &&
+ !(PRIV(dev)->alarm_lock == 0 && sdev->state == DEV_ACTIVE))
+ continue;
+
+ rmv_interrupt = ETH(sdev)->data->dev_flags &
+ RTE_ETH_DEV_INTR_RMV;
+ if (rmv_interrupt) {
+ DEBUG("Enabling RMV interrupts for sub_device %d", i);
+ dev->data->dev_conf.intr_conf.rmv = 1;
+ } else {
+ DEBUG("sub_device %d does not support RMV event", i);
+ }
+ lsc_enabled = dev->data->dev_conf.intr_conf.lsc;
+ lsc_interrupt = lsc_enabled &&
+ (ETH(sdev)->data->dev_flags &
+ RTE_ETH_DEV_INTR_LSC);
+ if (lsc_interrupt) {
+ DEBUG("Enabling LSC interrupts for sub_device %d", i);
+ dev->data->dev_conf.intr_conf.lsc = 1;
+ } else if (lsc_enabled && !lsc_interrupt) {
+ DEBUG("Disabling LSC interrupts for sub_device %d", i);
+ dev->data->dev_conf.intr_conf.lsc = 0;
+ }
+ DEBUG("Configuring sub-device %d", i);
+ ret = rte_eth_dev_configure(PORT_ID(sdev),
+ dev->data->nb_rx_queues,
+ dev->data->nb_tx_queues,
+ &dev->data->dev_conf);
+ if (ret) {
+ if (!fs_err(sdev, ret))
+ continue;
+ ERROR("Could not configure sub_device %d", i);
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ if (rmv_interrupt && sdev->rmv_callback == 0) {
+ ret = rte_eth_dev_callback_register(PORT_ID(sdev),
+ RTE_ETH_EVENT_INTR_RMV,
+ failsafe_eth_rmv_event_callback,
+ sdev);
+ if (ret)
+ WARN("Failed to register RMV callback for sub_device %d",
+ SUB_ID(sdev));
+ else
+ sdev->rmv_callback = 1;
+ }
+ dev->data->dev_conf.intr_conf.rmv = 0;
+ if (lsc_interrupt && sdev->lsc_callback == 0) {
+ ret = rte_eth_dev_callback_register(PORT_ID(sdev),
+ RTE_ETH_EVENT_INTR_LSC,
+ failsafe_eth_lsc_event_callback,
+ dev);
+ if (ret)
+ WARN("Failed to register LSC callback for sub_device %d",
+ SUB_ID(sdev));
+ else
+ sdev->lsc_callback = 1;
+ }
+ dev->data->dev_conf.intr_conf.lsc = lsc_enabled;
+ sdev->state = DEV_ACTIVE;
+ }
+ if (PRIV(dev)->state < DEV_ACTIVE)
+ PRIV(dev)->state = DEV_ACTIVE;
+ fs_unlock(dev, 0);
+ return 0;
+}
+
+static void
+fs_set_queues_state_start(struct rte_eth_dev *dev)
+{
+ struct rxq *rxq;
+ struct txq *txq;
+ uint16_t i;
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ rxq = dev->data->rx_queues[i];
+ if (rxq != NULL && !rxq->info.conf.rx_deferred_start)
+ dev->data->rx_queue_state[i] =
+ RTE_ETH_QUEUE_STATE_STARTED;
+ }
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ txq = dev->data->tx_queues[i];
+ if (txq != NULL && !txq->info.conf.tx_deferred_start)
+ dev->data->tx_queue_state[i] =
+ RTE_ETH_QUEUE_STATE_STARTED;
+ }
+}
+
+static int
+fs_dev_start(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ fs_lock(dev, 0);
+ ret = failsafe_rx_intr_install(dev);
+ if (ret) {
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_ACTIVE)
+ continue;
+ DEBUG("Starting sub_device %d", i);
+ ret = rte_eth_dev_start(PORT_ID(sdev));
+ if (ret) {
+ if (!fs_err(sdev, ret))
+ continue;
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ ret = failsafe_rx_intr_install_subdevice(sdev);
+ if (ret) {
+ if (!fs_err(sdev, ret))
+ continue;
+ rte_eth_dev_stop(PORT_ID(sdev));
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ sdev->state = DEV_STARTED;
+ }
+ if (PRIV(dev)->state < DEV_STARTED) {
+ PRIV(dev)->state = DEV_STARTED;
+ fs_set_queues_state_start(dev);
+ }
+ fs_switch_dev(dev, NULL);
+ fs_unlock(dev, 0);
+ return 0;
+}
+
+static void
+fs_set_queues_state_stop(struct rte_eth_dev *dev)
+{
+ uint16_t i;
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++)
+ if (dev->data->rx_queues[i] != NULL)
+ dev->data->rx_queue_state[i] =
+ RTE_ETH_QUEUE_STATE_STOPPED;
+ for (i = 0; i < dev->data->nb_tx_queues; i++)
+ if (dev->data->tx_queues[i] != NULL)
+ dev->data->tx_queue_state[i] =
+ RTE_ETH_QUEUE_STATE_STOPPED;
+}
+
+static void
+fs_dev_stop(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ fs_lock(dev, 0);
+ PRIV(dev)->state = DEV_STARTED - 1;
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_STARTED) {
+ rte_eth_dev_stop(PORT_ID(sdev));
+ failsafe_rx_intr_uninstall_subdevice(sdev);
+ sdev->state = DEV_STARTED - 1;
+ }
+ failsafe_rx_intr_uninstall(dev);
+ fs_set_queues_state_stop(dev);
+ fs_unlock(dev, 0);
+}
+
+static int
+fs_dev_set_link_up(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
+ ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
+ if ((ret = fs_err(sdev, ret))) {
+ ERROR("Operation rte_eth_dev_set_link_up failed for sub_device %d"
+ " with error %d", i, ret);
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ }
+ fs_unlock(dev, 0);
+ return 0;
+}
+
+static int
+fs_dev_set_link_down(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
+ ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
+ if ((ret = fs_err(sdev, ret))) {
+ ERROR("Operation rte_eth_dev_set_link_down failed for sub_device %d"
+ " with error %d", i, ret);
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ }
+ fs_unlock(dev, 0);
+ return 0;
+}
+
+static void fs_dev_free_queues(struct rte_eth_dev *dev);
+static void
+fs_dev_close(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ fs_lock(dev, 0);
+ failsafe_hotplug_alarm_cancel(dev);
+ if (PRIV(dev)->state == DEV_STARTED)
+ dev->dev_ops->dev_stop(dev);
+ PRIV(dev)->state = DEV_ACTIVE - 1;
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Closing sub_device %d", i);
+ failsafe_eth_dev_unregister_callbacks(sdev);
+ rte_eth_dev_close(PORT_ID(sdev));
+ sdev->state = DEV_ACTIVE - 1;
+ }
+ fs_dev_free_queues(dev);
+ fs_unlock(dev, 0);
+}
+
+static int
+fs_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+ int err = 0;
+ bool failure = true;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ uint16_t port_id = ETH(sdev)->data->port_id;
+
+ ret = rte_eth_dev_rx_queue_stop(port_id, rx_queue_id);
+ ret = fs_err(sdev, ret);
+ if (ret) {
+ ERROR("Rx queue stop failed for subdevice %d", i);
+ err = ret;
+ } else {
+ failure = false;
+ }
+ }
+ dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
+ fs_unlock(dev, 0);
+ /* Return 0 in case of at least one successful queue stop */
+ return (failure) ? err : 0;
+}
+
+static int
+fs_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ uint16_t port_id = ETH(sdev)->data->port_id;
+
+ ret = rte_eth_dev_rx_queue_start(port_id, rx_queue_id);
+ ret = fs_err(sdev, ret);
+ if (ret) {
+ ERROR("Rx queue start failed for subdevice %d", i);
+ fs_rx_queue_stop(dev, rx_queue_id);
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ }
+ dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
+ fs_unlock(dev, 0);
+ return 0;
+}
+
+static int
+fs_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+ int err = 0;
+ bool failure = true;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ uint16_t port_id = ETH(sdev)->data->port_id;
+
+ ret = rte_eth_dev_tx_queue_stop(port_id, tx_queue_id);
+ ret = fs_err(sdev, ret);
+ if (ret) {
+ ERROR("Tx queue stop failed for subdevice %d", i);
+ err = ret;
+ } else {
+ failure = false;
+ }
+ }
+ dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
+ fs_unlock(dev, 0);
+ /* Return 0 in case of at least one successful queue stop */
+ return (failure) ? err : 0;
+}
+
+static int
+fs_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ uint16_t port_id = ETH(sdev)->data->port_id;
+
+ ret = rte_eth_dev_tx_queue_start(port_id, tx_queue_id);
+ ret = fs_err(sdev, ret);
+ if (ret) {
+ ERROR("Tx queue start failed for subdevice %d", i);
+ fs_tx_queue_stop(dev, tx_queue_id);
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ }
+ dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
+ fs_unlock(dev, 0);
+ return 0;
+}
+
+static void
+fs_rx_queue_release(void *queue)
+{
+ struct rte_eth_dev *dev;
+ struct sub_device *sdev;
+ uint8_t i;
+ struct rxq *rxq;
+
+ if (queue == NULL)
+ return;
+ rxq = queue;
+ dev = &rte_eth_devices[rxq->priv->data->port_id];
+ fs_lock(dev, 0);
+ if (rxq->event_fd > 0)
+ close(rxq->event_fd);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ if (ETH(sdev)->data->rx_queues != NULL &&
+ ETH(sdev)->data->rx_queues[rxq->qid] != NULL) {
+ SUBOPS(sdev, rx_queue_release)
+ (ETH(sdev)->data->rx_queues[rxq->qid]);
+ }
+ }
+ dev->data->rx_queues[rxq->qid] = NULL;
+ rte_free(rxq);
+ fs_unlock(dev, 0);
+}
+
+static int
+fs_rx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t rx_queue_id,
+ uint16_t nb_rx_desc,
+ unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mb_pool)
+{
+ /*
+ * FIXME: Add a proper interface in rte_eal_interrupts for
+ * allocating eventfd as an interrupt vector.
+ * For the time being, fake as if we are using MSIX interrupts,
+ * this will cause rte_intr_efd_enable to allocate an eventfd for us.
+ */
+ struct rte_intr_handle intr_handle = {
+ .type = RTE_INTR_HANDLE_VFIO_MSIX,
+ .efds = { -1, },
+ };
+ struct sub_device *sdev;
+ struct rxq *rxq;
+ uint8_t i;
+ int ret;
+
+ fs_lock(dev, 0);
+ if (rx_conf->rx_deferred_start) {
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
+ if (SUBOPS(sdev, rx_queue_start) == NULL) {
+ ERROR("Rx queue deferred start is not "
+ "supported for subdevice %d", i);
+ fs_unlock(dev, 0);
+ return -EINVAL;
+ }
+ }
+ }
+ rxq = dev->data->rx_queues[rx_queue_id];
+ if (rxq != NULL) {
+ fs_rx_queue_release(rxq);
+ dev->data->rx_queues[rx_queue_id] = NULL;
+ }
+ rxq = rte_zmalloc(NULL,
+ sizeof(*rxq) +
+ sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
+ RTE_CACHE_LINE_SIZE);
+ if (rxq == NULL) {
+ fs_unlock(dev, 0);
+ return -ENOMEM;
+ }
+ FOREACH_SUBDEV(sdev, i, dev)
+ rte_atomic64_init(&rxq->refcnt[i]);
+ rxq->qid = rx_queue_id;
+ rxq->socket_id = socket_id;
+ rxq->info.mp = mb_pool;
+ rxq->info.conf = *rx_conf;
+ rxq->info.nb_desc = nb_rx_desc;
+ rxq->priv = PRIV(dev);
+ rxq->sdev = PRIV(dev)->subs;
+ ret = rte_intr_efd_enable(&intr_handle, 1);
+ if (ret < 0) {
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ rxq->event_fd = intr_handle.efds[0];
+ dev->data->rx_queues[rx_queue_id] = rxq;
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_rx_queue_setup(PORT_ID(sdev),
+ rx_queue_id,
+ nb_rx_desc, socket_id,
+ rx_conf, mb_pool);
+ if ((ret = fs_err(sdev, ret))) {
+ ERROR("RX queue setup failed for sub_device %d", i);
+ goto free_rxq;
+ }
+ }
+ fs_unlock(dev, 0);
+ return 0;
+free_rxq:
+ fs_rx_queue_release(rxq);
+ fs_unlock(dev, 0);
+ return ret;
+}
+
+static int
+fs_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx)
+{
+ struct rxq *rxq;
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+ int rc = 0;
+
+ fs_lock(dev, 0);
+ if (idx >= dev->data->nb_rx_queues) {
+ rc = -EINVAL;
+ goto unlock;
+ }
+ rxq = dev->data->rx_queues[idx];
+ if (rxq == NULL || rxq->event_fd <= 0) {
+ rc = -EINVAL;
+ goto unlock;
+ }
+ /* Fail if proxy service is nor running. */
+ if (PRIV(dev)->rxp.sstate != SS_RUNNING) {
+ ERROR("failsafe interrupt services are not running");
+ rc = -EAGAIN;
+ goto unlock;
+ }
+ rxq->enable_events = 1;
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_dev_rx_intr_enable(PORT_ID(sdev), idx);
+ ret = fs_err(sdev, ret);
+ if (ret)
+ rc = ret;
+ }
+unlock:
+ fs_unlock(dev, 0);
+ if (rc)
+ rte_errno = -rc;
+ return rc;
+}
+
+static int
+fs_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx)
+{
+ struct rxq *rxq;
+ struct sub_device *sdev;
+ uint64_t u64;
+ uint8_t i;
+ int rc = 0;
+ int ret;
+
+ fs_lock(dev, 0);
+ if (idx >= dev->data->nb_rx_queues) {
+ rc = -EINVAL;
+ goto unlock;
+ }
+ rxq = dev->data->rx_queues[idx];
+ if (rxq == NULL || rxq->event_fd <= 0) {
+ rc = -EINVAL;
+ goto unlock;
+ }
+ rxq->enable_events = 0;
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_dev_rx_intr_disable(PORT_ID(sdev), idx);
+ ret = fs_err(sdev, ret);
+ if (ret)
+ rc = ret;
+ }
+ /* Clear pending events */
+ while (read(rxq->event_fd, &u64, sizeof(uint64_t)) > 0)
+ ;
+unlock:
+ fs_unlock(dev, 0);
+ if (rc)
+ rte_errno = -rc;
+ return rc;
+}
+
+static void
+fs_tx_queue_release(void *queue)
+{
+ struct rte_eth_dev *dev;
+ struct sub_device *sdev;
+ uint8_t i;
+ struct txq *txq;
+
+ if (queue == NULL)
+ return;
+ txq = queue;
+ dev = &rte_eth_devices[txq->priv->data->port_id];
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ if (ETH(sdev)->data->tx_queues != NULL &&
+ ETH(sdev)->data->tx_queues[txq->qid] != NULL) {
+ SUBOPS(sdev, tx_queue_release)
+ (ETH(sdev)->data->tx_queues[txq->qid]);
+ }
+ }
+ dev->data->tx_queues[txq->qid] = NULL;
+ rte_free(txq);
+ fs_unlock(dev, 0);
+}
+
+static int
+fs_tx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t tx_queue_id,
+ uint16_t nb_tx_desc,
+ unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf)
+{
+ struct sub_device *sdev;
+ struct txq *txq;
+ uint8_t i;
+ int ret;
+
+ fs_lock(dev, 0);
+ if (tx_conf->tx_deferred_start) {
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
+ if (SUBOPS(sdev, tx_queue_start) == NULL) {
+ ERROR("Tx queue deferred start is not "
+ "supported for subdevice %d", i);
+ fs_unlock(dev, 0);
+ return -EINVAL;
+ }
+ }
+ }
+ txq = dev->data->tx_queues[tx_queue_id];
+ if (txq != NULL) {
+ fs_tx_queue_release(txq);
+ dev->data->tx_queues[tx_queue_id] = NULL;
+ }
+ txq = rte_zmalloc("ethdev TX queue",
+ sizeof(*txq) +
+ sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
+ RTE_CACHE_LINE_SIZE);
+ if (txq == NULL) {
+ fs_unlock(dev, 0);
+ return -ENOMEM;
+ }
+ FOREACH_SUBDEV(sdev, i, dev)
+ rte_atomic64_init(&txq->refcnt[i]);
+ txq->qid = tx_queue_id;
+ txq->socket_id = socket_id;
+ txq->info.conf = *tx_conf;
+ txq->info.nb_desc = nb_tx_desc;
+ txq->priv = PRIV(dev);
+ dev->data->tx_queues[tx_queue_id] = txq;
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_tx_queue_setup(PORT_ID(sdev),
+ tx_queue_id,
+ nb_tx_desc, socket_id,
+ tx_conf);
+ if ((ret = fs_err(sdev, ret))) {
+ ERROR("TX queue setup failed for sub_device %d", i);
+ goto free_txq;
+ }
+ }
+ fs_unlock(dev, 0);
+ return 0;
+free_txq:
+ fs_tx_queue_release(txq);
+ fs_unlock(dev, 0);
+ return ret;
+}
+
+static void
+fs_dev_free_queues(struct rte_eth_dev *dev)
+{
+ uint16_t i;
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ fs_rx_queue_release(dev->data->rx_queues[i]);
+ dev->data->rx_queues[i] = NULL;
+ }
+ dev->data->nb_rx_queues = 0;
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ fs_tx_queue_release(dev->data->tx_queues[i]);
+ dev->data->tx_queues[i] = NULL;
+ }
+ dev->data->nb_tx_queues = 0;
+}
+
+static void
+fs_promiscuous_enable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_promiscuous_enable(PORT_ID(sdev));
+ fs_unlock(dev, 0);
+}
+
+static void
+fs_promiscuous_disable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_promiscuous_disable(PORT_ID(sdev));
+ fs_unlock(dev, 0);
+}
+
+static void
+fs_allmulticast_enable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_allmulticast_enable(PORT_ID(sdev));
+ fs_unlock(dev, 0);
+}
+
+static void
+fs_allmulticast_disable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_allmulticast_disable(PORT_ID(sdev));
+ fs_unlock(dev, 0);
+}
+
+static int
+fs_link_update(struct rte_eth_dev *dev,
+ int wait_to_complete)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling link_update on sub_device %d", i);
+ ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
+ if (ret && ret != -1 && sdev->remove == 0 &&
+ rte_eth_dev_is_removed(PORT_ID(sdev)) == 0) {
+ ERROR("Link update failed for sub_device %d with error %d",
+ i, ret);
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ }
+ if (TX_SUBDEV(dev)) {
+ struct rte_eth_link *l1;
+ struct rte_eth_link *l2;
+
+ l1 = &dev->data->dev_link;
+ l2 = &ETH(TX_SUBDEV(dev))->data->dev_link;
+ if (memcmp(l1, l2, sizeof(*l1))) {
+ *l1 = *l2;
+ fs_unlock(dev, 0);
+ return 0;
+ }
+ }
+ fs_unlock(dev, 0);
+ return -1;
+}
+
+static int
+fs_stats_get(struct rte_eth_dev *dev,
+ struct rte_eth_stats *stats)
+{
+ struct rte_eth_stats backup;
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ fs_lock(dev, 0);
+ rte_memcpy(stats, &PRIV(dev)->stats_accumulator, sizeof(*stats));
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ struct rte_eth_stats *snapshot = &sdev->stats_snapshot.stats;
+ uint64_t *timestamp = &sdev->stats_snapshot.timestamp;
+
+ rte_memcpy(&backup, snapshot, sizeof(backup));
+ ret = rte_eth_stats_get(PORT_ID(sdev), snapshot);
+ if (ret) {
+ if (!fs_err(sdev, ret)) {
+ rte_memcpy(snapshot, &backup, sizeof(backup));
+ goto inc;
+ }
+ ERROR("Operation rte_eth_stats_get failed for sub_device %d with error %d",
+ i, ret);
+ *timestamp = 0;
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ *timestamp = rte_rdtsc();
+inc:
+ failsafe_stats_increment(stats, snapshot);
+ }
+ fs_unlock(dev, 0);
+ return 0;
+}
+
+static void
+fs_stats_reset(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ rte_eth_stats_reset(PORT_ID(sdev));
+ memset(&sdev->stats_snapshot, 0, sizeof(struct rte_eth_stats));
+ }
+ memset(&PRIV(dev)->stats_accumulator, 0, sizeof(struct rte_eth_stats));
+ fs_unlock(dev, 0);
+}
+
+/**
+ * Fail-safe dev_infos_get rules:
+ *
+ * No sub_device:
+ * Numerables:
+ * Use the maximum possible values for any field, so as not
+ * to impede any further configuration effort.
+ * Capabilities:
+ * Limits capabilities to those that are understood by the
+ * fail-safe PMD. This understanding stems from the fail-safe
+ * being capable of verifying that the related capability is
+ * expressed within the device configuration (struct rte_eth_conf).
+ *
+ * At least one probed sub_device:
+ * Numerables:
+ * Uses values from the active probed sub_device
+ * The rationale here is that if any sub_device is less capable
+ * (for example concerning the number of queues) than the active
+ * sub_device, then its subsequent configuration will fail.
+ * It is impossible to foresee this failure when the failing sub_device
+ * is supposed to be plugged-in later on, so the configuration process
+ * is the single point of failure and error reporting.
+ * Capabilities:
+ * Uses a logical AND of RX capabilities among
+ * all sub_devices and the default capabilities.
+ * Uses a logical AND of TX capabilities among
+ * the active probed sub_device and the default capabilities.
+ * Uses a logical AND of device capabilities among
+ * all sub_devices and the default capabilities.
+ *
+ */
+static void
+fs_dev_infos_get(struct rte_eth_dev *dev,
+ struct rte_eth_dev_info *infos)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev == NULL) {
+ DEBUG("No probed device, using default infos");
+ rte_memcpy(&PRIV(dev)->infos, &default_infos,
+ sizeof(default_infos));
+ } else {
+ uint64_t rx_offload_capa;
+ uint64_t rxq_offload_capa;
+ uint64_t rss_hf_offload_capa;
+ uint64_t dev_capa;
+
+ rx_offload_capa = default_infos.rx_offload_capa;
+ rxq_offload_capa = default_infos.rx_queue_offload_capa;
+ rss_hf_offload_capa = default_infos.flow_type_rss_offloads;
+ dev_capa = default_infos.dev_capa;
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
+ rte_eth_dev_info_get(PORT_ID(sdev),
+ &PRIV(dev)->infos);
+ rx_offload_capa &= PRIV(dev)->infos.rx_offload_capa;
+ rxq_offload_capa &=
+ PRIV(dev)->infos.rx_queue_offload_capa;
+ rss_hf_offload_capa &=
+ PRIV(dev)->infos.flow_type_rss_offloads;
+ dev_capa &= PRIV(dev)->infos.dev_capa;
+ }
+ sdev = TX_SUBDEV(dev);
+ rte_eth_dev_info_get(PORT_ID(sdev), &PRIV(dev)->infos);
+ PRIV(dev)->infos.rx_offload_capa = rx_offload_capa;
+ PRIV(dev)->infos.rx_queue_offload_capa = rxq_offload_capa;
+ PRIV(dev)->infos.flow_type_rss_offloads = rss_hf_offload_capa;
+ PRIV(dev)->infos.dev_capa = dev_capa;
+ PRIV(dev)->infos.tx_offload_capa &=
+ default_infos.tx_offload_capa;
+ PRIV(dev)->infos.tx_queue_offload_capa &=
+ default_infos.tx_queue_offload_capa;
+ }
+ rte_memcpy(infos, &PRIV(dev)->infos, sizeof(*infos));
+}
+
+static const uint32_t *
+fs_dev_supported_ptypes_get(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ struct rte_eth_dev *edev;
+ const uint32_t *ret;
+
+ fs_lock(dev, 0);
+ sdev = TX_SUBDEV(dev);
+ if (sdev == NULL) {
+ ret = NULL;
+ goto unlock;
+ }
+ edev = ETH(sdev);
+ /* ENOTSUP: counts as no supported ptypes */
+ if (SUBOPS(sdev, dev_supported_ptypes_get) == NULL) {
+ ret = NULL;
+ goto unlock;
+ }
+ /*
+ * The API does not permit to do a clean AND of all ptypes,
+ * It is also incomplete by design and we do not really care
+ * to have a best possible value in this context.
+ * We just return the ptypes of the device of highest
+ * priority, usually the PREFERRED device.
+ */
+ ret = SUBOPS(sdev, dev_supported_ptypes_get)(edev);
+unlock:
+ fs_unlock(dev, 0);
+ return ret;
+}
+
+static int
+fs_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
+ ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
+ if ((ret = fs_err(sdev, ret))) {
+ ERROR("Operation rte_eth_dev_set_mtu failed for sub_device %d with error %d",
+ i, ret);
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ }
+ fs_unlock(dev, 0);
+ return 0;
+}
+
+static int
+fs_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i);
+ ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
+ if ((ret = fs_err(sdev, ret))) {
+ ERROR("Operation rte_eth_dev_vlan_filter failed for sub_device %d"
+ " with error %d", i, ret);
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ }
+ fs_unlock(dev, 0);
+ return 0;
+}
+
+static int
+fs_flow_ctrl_get(struct rte_eth_dev *dev,
+ struct rte_eth_fc_conf *fc_conf)
+{
+ struct sub_device *sdev;
+ int ret;
+
+ fs_lock(dev, 0);
+ sdev = TX_SUBDEV(dev);
+ if (sdev == NULL) {
+ ret = 0;
+ goto unlock;
+ }
+ if (SUBOPS(sdev, flow_ctrl_get) == NULL) {
+ ret = -ENOTSUP;
+ goto unlock;
+ }
+ ret = SUBOPS(sdev, flow_ctrl_get)(ETH(sdev), fc_conf);
+unlock:
+ fs_unlock(dev, 0);
+ return ret;
+}
+
+static int
+fs_flow_ctrl_set(struct rte_eth_dev *dev,
+ struct rte_eth_fc_conf *fc_conf)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i);
+ ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
+ if ((ret = fs_err(sdev, ret))) {
+ ERROR("Operation rte_eth_dev_flow_ctrl_set failed for sub_device %d"
+ " with error %d", i, ret);
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ }
+ fs_unlock(dev, 0);
+ return 0;
+}
+
+static void
+fs_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ fs_lock(dev, 0);
+ /* No check: already done within the rte_eth_dev_mac_addr_remove
+ * call for the fail-safe device.
+ */
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_dev_mac_addr_remove(PORT_ID(sdev),
+ &dev->data->mac_addrs[index]);
+ PRIV(dev)->mac_addr_pool[index] = 0;
+ fs_unlock(dev, 0);
+}
+
+static int
+fs_mac_addr_add(struct rte_eth_dev *dev,
+ struct ether_addr *mac_addr,
+ uint32_t index,
+ uint32_t vmdq)
+{
+ struct sub_device *sdev;
+ int ret;
+ uint8_t i;
+
+ RTE_ASSERT(index < FAILSAFE_MAX_ETHADDR);
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq);
+ if ((ret = fs_err(sdev, ret))) {
+ ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %"
+ PRIu8 " with error %d", i, ret);
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ }
+ if (index >= PRIV(dev)->nb_mac_addr) {
+ DEBUG("Growing mac_addrs array");
+ PRIV(dev)->nb_mac_addr = index;
+ }
+ PRIV(dev)->mac_addr_pool[index] = vmdq;
+ fs_unlock(dev, 0);
+ return 0;
+}
+
+static int
+fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
+ ret = fs_err(sdev, ret);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_mac_addr_set failed for sub_device %d with error %d",
+ i, ret);
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ }
+ fs_unlock(dev, 0);
+
+ return 0;
+}
+
+static int
+fs_set_mc_addr_list(struct rte_eth_dev *dev,
+ struct ether_addr *mc_addr_set, uint32_t nb_mc_addr)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+ void *mcast_addrs;
+
+ fs_lock(dev, 0);
+
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_dev_set_mc_addr_list(PORT_ID(sdev),
+ mc_addr_set, nb_mc_addr);
+ if (ret != 0) {
+ ERROR("Operation rte_eth_dev_set_mc_addr_list failed for sub_device %d with error %d",
+ i, ret);
+ goto rollback;
+ }
+ }
+
+ mcast_addrs = rte_realloc(PRIV(dev)->mcast_addrs,
+ nb_mc_addr * sizeof(PRIV(dev)->mcast_addrs[0]), 0);
+ if (mcast_addrs == NULL && nb_mc_addr > 0) {
+ ret = -ENOMEM;
+ goto rollback;
+ }
+ rte_memcpy(mcast_addrs, mc_addr_set,
+ nb_mc_addr * sizeof(PRIV(dev)->mcast_addrs[0]));
+ PRIV(dev)->nb_mcast_addr = nb_mc_addr;
+ PRIV(dev)->mcast_addrs = mcast_addrs;
+
+ fs_unlock(dev, 0);
+ return 0;
+
+rollback:
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ int rc = rte_eth_dev_set_mc_addr_list(PORT_ID(sdev),
+ PRIV(dev)->mcast_addrs, PRIV(dev)->nb_mcast_addr);
+ if (rc != 0) {
+ ERROR("Multicast MAC address list rollback for sub_device %d failed with error %d",
+ i, rc);
+ }
+ }
+
+ fs_unlock(dev, 0);
+ return ret;
+}
+
+static int
+fs_rss_hash_update(struct rte_eth_dev *dev,
+ struct rte_eth_rss_conf *rss_conf)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ fs_lock(dev, 0);
+ FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_dev_rss_hash_update(PORT_ID(sdev), rss_conf);
+ ret = fs_err(sdev, ret);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_rss_hash_update"
+ " failed for sub_device %d with error %d",
+ i, ret);
+ fs_unlock(dev, 0);
+ return ret;
+ }
+ }
+ fs_unlock(dev, 0);
+
+ return 0;
+}
+
+static int
+fs_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
+ enum rte_filter_type type,
+ enum rte_filter_op op,
+ void *arg)
+{
+ if (type == RTE_ETH_FILTER_GENERIC &&
+ op == RTE_ETH_FILTER_GET) {
+ *(const void **)arg = &fs_flow_ops;
+ return 0;
+ }
+ return -ENOTSUP;
+}
+
+const struct eth_dev_ops failsafe_ops = {
+ .dev_configure = fs_dev_configure,
+ .dev_start = fs_dev_start,
+ .dev_stop = fs_dev_stop,
+ .dev_set_link_down = fs_dev_set_link_down,
+ .dev_set_link_up = fs_dev_set_link_up,
+ .dev_close = fs_dev_close,
+ .promiscuous_enable = fs_promiscuous_enable,
+ .promiscuous_disable = fs_promiscuous_disable,
+ .allmulticast_enable = fs_allmulticast_enable,
+ .allmulticast_disable = fs_allmulticast_disable,
+ .link_update = fs_link_update,
+ .stats_get = fs_stats_get,
+ .stats_reset = fs_stats_reset,
+ .dev_infos_get = fs_dev_infos_get,
+ .dev_supported_ptypes_get = fs_dev_supported_ptypes_get,
+ .mtu_set = fs_mtu_set,
+ .vlan_filter_set = fs_vlan_filter_set,
+ .rx_queue_start = fs_rx_queue_start,
+ .rx_queue_stop = fs_rx_queue_stop,
+ .tx_queue_start = fs_tx_queue_start,
+ .tx_queue_stop = fs_tx_queue_stop,
+ .rx_queue_setup = fs_rx_queue_setup,
+ .tx_queue_setup = fs_tx_queue_setup,
+ .rx_queue_release = fs_rx_queue_release,
+ .tx_queue_release = fs_tx_queue_release,
+ .rx_queue_intr_enable = fs_rx_intr_enable,
+ .rx_queue_intr_disable = fs_rx_intr_disable,
+ .flow_ctrl_get = fs_flow_ctrl_get,
+ .flow_ctrl_set = fs_flow_ctrl_set,
+ .mac_addr_remove = fs_mac_addr_remove,
+ .mac_addr_add = fs_mac_addr_add,
+ .mac_addr_set = fs_mac_addr_set,
+ .set_mc_addr_list = fs_set_mc_addr_list,
+ .rss_hash_update = fs_rss_hash_update,
+ .filter_ctrl = fs_filter_ctrl,
+};
diff --git a/src/seastar/dpdk/drivers/net/failsafe/failsafe_private.h b/src/seastar/dpdk/drivers/net/failsafe/failsafe_private.h
new file mode 100644
index 000000000..4f58a5c91
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/failsafe/failsafe_private.h
@@ -0,0 +1,497 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox Technologies, Ltd
+ */
+
+#ifndef _RTE_ETH_FAILSAFE_PRIVATE_H_
+#define _RTE_ETH_FAILSAFE_PRIVATE_H_
+
+#include <stdint.h>
+#include <sys/queue.h>
+#include <pthread.h>
+
+#include <rte_atomic.h>
+#include <rte_dev.h>
+#include <rte_ethdev_driver.h>
+#include <rte_devargs.h>
+#include <rte_flow.h>
+#include <rte_interrupts.h>
+
+#define FAILSAFE_DRIVER_NAME "Fail-safe PMD"
+#define FAILSAFE_OWNER_NAME "Fail-safe"
+
+#define PMD_FAILSAFE_MAC_KVARG "mac"
+#define PMD_FAILSAFE_HOTPLUG_POLL_KVARG "hotplug_poll"
+#define PMD_FAILSAFE_PARAM_STRING \
+ "dev(<ifc>)," \
+ "exec(<shell command>)," \
+ "fd(<fd number>)," \
+ "mac=mac_addr," \
+ "hotplug_poll=u64" \
+ ""
+
+#define FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS 2000
+
+#define FAILSAFE_MAX_ETHPORTS 2
+#define FAILSAFE_MAX_ETHADDR 128
+
+#define DEVARGS_MAXLEN 4096
+
+enum rxp_service_state {
+ SS_NO_SERVICE = 0,
+ SS_REGISTERED,
+ SS_READY,
+ SS_RUNNING,
+};
+
+/* TYPES */
+
+struct rx_proxy {
+ /* epoll file descriptor */
+ int efd;
+ /* event vector to be used by epoll */
+ struct rte_epoll_event *evec;
+ /* rte service id */
+ uint32_t sid;
+ /* service core id */
+ uint32_t scid;
+ enum rxp_service_state sstate;
+};
+
+struct rxq {
+ struct fs_priv *priv;
+ uint16_t qid;
+ /* next sub_device to poll */
+ struct sub_device *sdev;
+ unsigned int socket_id;
+ int event_fd;
+ unsigned int enable_events:1;
+ struct rte_eth_rxq_info info;
+ rte_atomic64_t refcnt[];
+};
+
+struct txq {
+ struct fs_priv *priv;
+ uint16_t qid;
+ unsigned int socket_id;
+ struct rte_eth_txq_info info;
+ rte_atomic64_t refcnt[];
+};
+
+struct rte_flow {
+ TAILQ_ENTRY(rte_flow) next;
+ /* sub_flows */
+ struct rte_flow *flows[FAILSAFE_MAX_ETHPORTS];
+ /* flow description for synchronization */
+ struct rte_flow_conv_rule rule;
+ uint8_t rule_data[];
+};
+
+enum dev_state {
+ DEV_UNDEFINED,
+ DEV_PARSED,
+ DEV_PROBED,
+ DEV_ACTIVE,
+ DEV_STARTED,
+};
+
+struct fs_stats {
+ struct rte_eth_stats stats;
+ uint64_t timestamp;
+};
+
+/*
+ * Allocated in shared memory.
+ */
+struct sub_device {
+ /* Exhaustive DPDK device description */
+ struct sub_device *next;
+ struct rte_devargs devargs;
+ struct rte_bus *bus; /* for primary process only. */
+ struct rte_device *dev; /* for primary process only. */
+ uint8_t sid;
+ /* Device state machine */
+ enum dev_state state;
+ /* Last stats snapshot passed to user */
+ struct fs_stats stats_snapshot;
+ /* Some device are defined as a command line */
+ char *cmdline;
+ /* Others are retrieved through a file descriptor */
+ char *fd_str;
+ /* fail-safe device backreference */
+ uint16_t fs_port_id; /* shared between processes */
+ /* sub device port id*/
+ uint16_t sdev_port_id; /* shared between processes */
+ /* flag calling for recollection */
+ volatile unsigned int remove:1;
+ /* flow isolation state */
+ int flow_isolated:1;
+ /* RMV callback registration state */
+ unsigned int rmv_callback:1;
+ /* LSC callback registration state */
+ unsigned int lsc_callback:1;
+};
+
+/*
+ * This is referenced by eth_dev->data->dev_private
+ * This is shared between processes.
+ */
+struct fs_priv {
+ struct rte_eth_dev_data *data; /* backreference to shared data. */
+ /*
+ * Set of sub_devices.
+ * subs[0] is the preferred device
+ * any other is just another slave
+ */
+ struct sub_device *subs; /* shared between processes */
+ uint8_t subs_head; /* if head == tail, no subs */
+ uint8_t subs_tail; /* first invalid */
+ uint8_t subs_tx; /* current emitting device */
+ uint8_t current_probed;
+ /* flow mapping */
+ TAILQ_HEAD(sub_flows, rte_flow) flow_list;
+ /* current number of mac_addr slots allocated. */
+ uint32_t nb_mac_addr;
+ struct ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR];
+ uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR];
+ uint32_t nb_mcast_addr;
+ struct ether_addr *mcast_addrs;
+ /* current capabilities */
+ struct rte_eth_dev_info infos;
+ struct rte_eth_dev_owner my_owner; /* Unique owner. */
+ struct rte_intr_handle intr_handle; /* Port interrupt handle. */
+ /*
+ * Fail-safe state machine.
+ * This level will be tracking state of the EAL and eth
+ * layer at large as defined by the user application.
+ * It will then steer the sub_devices toward the same
+ * synchronized state.
+ */
+ enum dev_state state;
+ struct rte_eth_stats stats_accumulator;
+ /*
+ * Rx interrupts/events proxy.
+ * The PMD issues Rx events to the EAL on behalf of its subdevices,
+ * it does that by registering an event-fd for each of its queues with
+ * the EAL. A PMD service thread listens to all the Rx events from the
+ * subdevices, when an Rx event is issued by a subdevice it will be
+ * caught by this service with will trigger an Rx event in the
+ * appropriate failsafe Rx queue.
+ */
+ struct rx_proxy rxp;
+ pthread_mutex_t hotplug_mutex;
+ /* Hot-plug mutex is locked by the alarm mechanism. */
+ volatile unsigned int alarm_lock:1;
+ unsigned int pending_alarm:1; /* An alarm is pending */
+ /* flow isolation state */
+ int flow_isolated:1;
+};
+
+/* FAILSAFE_INTR */
+
+int failsafe_rx_intr_install(struct rte_eth_dev *dev);
+void failsafe_rx_intr_uninstall(struct rte_eth_dev *dev);
+int failsafe_rx_intr_install_subdevice(struct sub_device *sdev);
+void failsafe_rx_intr_uninstall_subdevice(struct sub_device *sdev);
+
+/* MISC */
+
+int failsafe_hotplug_alarm_install(struct rte_eth_dev *dev);
+int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev);
+
+/* RX / TX */
+
+void failsafe_set_burst_fn(struct rte_eth_dev *dev, int force_safe);
+
+uint16_t failsafe_rx_burst(void *rxq,
+ struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t failsafe_tx_burst(void *txq,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
+uint16_t failsafe_rx_burst_fast(void *rxq,
+ struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t failsafe_tx_burst_fast(void *txq,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
+/* ARGS */
+
+int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
+void failsafe_args_free(struct rte_eth_dev *dev);
+int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params);
+int failsafe_args_parse_subs(struct rte_eth_dev *dev);
+
+/* EAL */
+
+int failsafe_eal_init(struct rte_eth_dev *dev);
+int failsafe_eal_uninit(struct rte_eth_dev *dev);
+
+/* ETH_DEV */
+
+int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);
+void failsafe_eth_dev_unregister_callbacks(struct sub_device *sdev);
+void failsafe_dev_remove(struct rte_eth_dev *dev);
+void failsafe_stats_increment(struct rte_eth_stats *to,
+ struct rte_eth_stats *from);
+int failsafe_eth_rmv_event_callback(uint16_t port_id,
+ enum rte_eth_event_type type,
+ void *arg, void *out);
+int failsafe_eth_lsc_event_callback(uint16_t port_id,
+ enum rte_eth_event_type event,
+ void *cb_arg, void *out);
+int failsafe_eth_new_event_callback(uint16_t port_id,
+ enum rte_eth_event_type event,
+ void *cb_arg, void *out);
+
+/* GLOBALS */
+
+extern const char pmd_failsafe_driver_name[];
+extern const struct eth_dev_ops failsafe_ops;
+extern const struct rte_flow_ops fs_flow_ops;
+extern uint64_t failsafe_hotplug_poll;
+extern int failsafe_mac_from_arg;
+
+/* HELPERS */
+
+/* dev: (struct rte_eth_dev *) fail-safe device */
+#define PRIV(dev) \
+ ((struct fs_priv *)(dev)->data->dev_private)
+
+/* sdev: (struct sub_device *) */
+#define ETH(sdev) \
+ ((sdev)->sdev_port_id == RTE_MAX_ETHPORTS ? \
+ NULL : &rte_eth_devices[(sdev)->sdev_port_id])
+
+/* sdev: (struct sub_device *) */
+#define PORT_ID(sdev) \
+ ((sdev)->sdev_port_id)
+
+/* sdev: (struct sub_device *) */
+#define SUB_ID(sdev) \
+ ((sdev)->sid)
+
+/**
+ * Stateful iterator construct over fail-safe sub-devices:
+ * s: (struct sub_device *), iterator
+ * i: (uint8_t), increment
+ * dev: (struct rte_eth_dev *), fail-safe ethdev
+ * state: (enum dev_state), minimum acceptable device state
+ */
+#define FOREACH_SUBDEV_STATE(s, i, dev, state) \
+ for (s = fs_find_next((dev), 0, state, &i); \
+ s != NULL; \
+ s = fs_find_next((dev), i + 1, state, &i))
+
+/**
+ * Iterator construct over fail-safe sub-devices:
+ * s: (struct sub_device *), iterator
+ * i: (uint8_t), increment
+ * dev: (struct rte_eth_dev *), fail-safe ethdev
+ */
+#define FOREACH_SUBDEV(s, i, dev) \
+ FOREACH_SUBDEV_STATE(s, i, dev, DEV_UNDEFINED)
+
+/* dev: (struct rte_eth_dev *) fail-safe device */
+#define PREFERRED_SUBDEV(dev) \
+ (&PRIV(dev)->subs[0])
+
+/* dev: (struct rte_eth_dev *) fail-safe device */
+#define TX_SUBDEV(dev) \
+ (PRIV(dev)->subs_tx >= PRIV(dev)->subs_tail ? NULL \
+ : (PRIV(dev)->subs[PRIV(dev)->subs_tx].state < DEV_PROBED ? NULL \
+ : &PRIV(dev)->subs[PRIV(dev)->subs_tx]))
+
+/**
+ * s: (struct sub_device *)
+ * ops: (struct eth_dev_ops) member
+ */
+#define SUBOPS(s, ops) \
+ (ETH(s)->dev_ops->ops)
+
+/**
+ * Atomic guard
+ */
+
+/**
+ * a: (rte_atomic64_t)
+ */
+#define FS_ATOMIC_P(a) \
+ rte_atomic64_set(&(a), 1)
+
+/**
+ * a: (rte_atomic64_t)
+ */
+#define FS_ATOMIC_V(a) \
+ rte_atomic64_set(&(a), 0)
+
+/**
+ * s: (struct sub_device *)
+ * i: uint16_t qid
+ */
+#define FS_ATOMIC_RX(s, i) \
+ rte_atomic64_read( \
+ &((struct rxq *) \
+ (fs_dev(s)->data->rx_queues[i]))->refcnt[(s)->sid])
+/**
+ * s: (struct sub_device *)
+ * i: uint16_t qid
+ */
+#define FS_ATOMIC_TX(s, i) \
+ rte_atomic64_read( \
+ &((struct txq *) \
+ (fs_dev(s)->data->tx_queues[i]))->refcnt[(s)->sid])
+
+#ifdef RTE_EXEC_ENV_FREEBSD
+#define FS_THREADID_TYPE void*
+#define FS_THREADID_FMT "p"
+#else
+#define FS_THREADID_TYPE unsigned long
+#define FS_THREADID_FMT "lu"
+#endif
+
+extern int failsafe_logtype;
+
+#define LOG__(l, m, ...) \
+ rte_log(RTE_LOG_ ## l, failsafe_logtype, \
+ "net_failsafe: " m "%c", __VA_ARGS__)
+
+#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
+#define DEBUG(...) LOG_(DEBUG, __VA_ARGS__)
+#define INFO(...) LOG_(INFO, __VA_ARGS__)
+#define WARN(...) LOG_(WARNING, __VA_ARGS__)
+#define ERROR(...) LOG_(ERR, __VA_ARGS__)
+
+/* inlined functions */
+
+static inline struct sub_device *
+fs_find_next(struct rte_eth_dev *dev,
+ uint8_t sid,
+ enum dev_state min_state,
+ uint8_t *sid_out)
+{
+ struct sub_device *subs;
+ uint8_t tail;
+
+ subs = PRIV(dev)->subs;
+ tail = PRIV(dev)->subs_tail;
+ while (sid < tail) {
+ if (subs[sid].state >= min_state)
+ break;
+ sid++;
+ }
+ *sid_out = sid;
+ if (sid >= tail)
+ return NULL;
+ return &subs[sid];
+}
+
+static inline struct rte_eth_dev *
+fs_dev(struct sub_device *sdev) {
+ return &rte_eth_devices[sdev->fs_port_id];
+}
+
+/*
+ * Lock hot-plug mutex.
+ * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism.
+ */
+static inline int
+fs_lock(struct rte_eth_dev *dev, unsigned int is_alarm)
+{
+ int ret;
+
+ if (is_alarm) {
+ ret = pthread_mutex_trylock(&PRIV(dev)->hotplug_mutex);
+ if (ret) {
+ DEBUG("Hot-plug mutex lock trying failed(%s), will try"
+ " again later...", strerror(ret));
+ return ret;
+ }
+ PRIV(dev)->alarm_lock = 1;
+ } else {
+ ret = pthread_mutex_lock(&PRIV(dev)->hotplug_mutex);
+ if (ret) {
+ ERROR("Cannot lock mutex(%s)", strerror(ret));
+ return ret;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Unlock hot-plug mutex.
+ * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism.
+ */
+static inline void
+fs_unlock(struct rte_eth_dev *dev, unsigned int is_alarm)
+{
+ int ret;
+
+ if (is_alarm) {
+ RTE_ASSERT(PRIV(dev)->alarm_lock == 1);
+ PRIV(dev)->alarm_lock = 0;
+ }
+ ret = pthread_mutex_unlock(&PRIV(dev)->hotplug_mutex);
+ if (ret)
+ ERROR("Cannot unlock hot-plug mutex(%s)", strerror(ret));
+}
+
+/*
+ * Switch emitting device.
+ * If banned is set, banned must not be considered for
+ * the role of emitting device.
+ */
+static inline void
+fs_switch_dev(struct rte_eth_dev *dev,
+ struct sub_device *banned)
+{
+ struct sub_device *txd;
+ enum dev_state req_state;
+
+ req_state = PRIV(dev)->state;
+ txd = TX_SUBDEV(dev);
+ if (PREFERRED_SUBDEV(dev)->state >= req_state &&
+ PREFERRED_SUBDEV(dev) != banned) {
+ if (txd != PREFERRED_SUBDEV(dev) &&
+ (txd == NULL ||
+ (req_state == DEV_STARTED) ||
+ (txd && txd->state < DEV_STARTED))) {
+ DEBUG("Switching tx_dev to preferred sub_device");
+ PRIV(dev)->subs_tx = 0;
+ }
+ } else if ((txd && txd->state < req_state) ||
+ txd == NULL ||
+ txd == banned) {
+ struct sub_device *sdev = NULL;
+ uint8_t i;
+
+ /* Using acceptable device */
+ FOREACH_SUBDEV_STATE(sdev, i, dev, req_state) {
+ if (sdev == banned)
+ continue;
+ DEBUG("Switching tx_dev to sub_device %d",
+ i);
+ PRIV(dev)->subs_tx = i;
+ break;
+ }
+ if (i >= PRIV(dev)->subs_tail || sdev == NULL) {
+ DEBUG("No device ready, deactivating tx_dev");
+ PRIV(dev)->subs_tx = PRIV(dev)->subs_tail;
+ }
+ } else {
+ return;
+ }
+ failsafe_set_burst_fn(dev, 0);
+ rte_wmb();
+}
+
+/*
+ * Adjust error value and rte_errno to the fail-safe actual error value.
+ */
+static inline int
+fs_err(struct sub_device *sdev, int err)
+{
+ /* A device removal shouldn't be reported as an error. */
+ if (sdev->remove == 1 || err == -EIO)
+ return rte_errno = 0;
+ return err;
+}
+#endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */
diff --git a/src/seastar/dpdk/drivers/net/failsafe/failsafe_rxtx.c b/src/seastar/dpdk/drivers/net/failsafe/failsafe_rxtx.c
new file mode 100644
index 000000000..fee08fa23
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/failsafe/failsafe_rxtx.c
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox Technologies, Ltd
+ */
+
+#include <rte_atomic.h>
+#include <rte_debug.h>
+#include <rte_mbuf.h>
+#include <rte_ethdev_driver.h>
+
+#include "failsafe_private.h"
+
+static inline int
+fs_rx_unsafe(struct sub_device *sdev)
+{
+ return (ETH(sdev) == NULL) ||
+ (ETH(sdev)->rx_pkt_burst == NULL) ||
+ (sdev->state != DEV_STARTED) ||
+ (sdev->remove != 0);
+}
+
+static inline int
+fs_tx_unsafe(struct sub_device *sdev)
+{
+ return (sdev == NULL) ||
+ (ETH(sdev) == NULL) ||
+ (ETH(sdev)->tx_pkt_burst == NULL) ||
+ (sdev->state != DEV_STARTED);
+}
+
+void
+failsafe_set_burst_fn(struct rte_eth_dev *dev, int force_safe)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int need_safe;
+ int safe_set;
+
+ need_safe = force_safe;
+ FOREACH_SUBDEV(sdev, i, dev)
+ need_safe |= fs_rx_unsafe(sdev);
+ safe_set = (dev->rx_pkt_burst == &failsafe_rx_burst);
+ if (need_safe && !safe_set) {
+ DEBUG("Using safe RX bursts%s",
+ (force_safe ? " (forced)" : ""));
+ dev->rx_pkt_burst = &failsafe_rx_burst;
+ } else if (!need_safe && safe_set) {
+ DEBUG("Using fast RX bursts");
+ dev->rx_pkt_burst = &failsafe_rx_burst_fast;
+ }
+ need_safe = force_safe || fs_tx_unsafe(TX_SUBDEV(dev));
+ safe_set = (dev->tx_pkt_burst == &failsafe_tx_burst);
+ if (need_safe && !safe_set) {
+ DEBUG("Using safe TX bursts%s",
+ (force_safe ? " (forced)" : ""));
+ dev->tx_pkt_burst = &failsafe_tx_burst;
+ } else if (!need_safe && safe_set) {
+ DEBUG("Using fast TX bursts");
+ dev->tx_pkt_burst = &failsafe_tx_burst_fast;
+ }
+ rte_wmb();
+}
+
+/*
+ * Override source port in Rx packets.
+ *
+ * Make Rx packets originate from this PMD instance instead of one of its
+ * sub-devices. This is mandatory to avoid breaking applications.
+ */
+static void
+failsafe_rx_set_port(struct rte_mbuf **rx_pkts, uint16_t nb_pkts, uint16_t port)
+{
+ unsigned int i;
+
+ for (i = 0; i != nb_pkts; ++i)
+ rx_pkts[i]->port = port;
+}
+
+uint16_t
+failsafe_rx_burst(void *queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct sub_device *sdev;
+ struct rxq *rxq;
+ void *sub_rxq;
+ uint16_t nb_rx;
+
+ rxq = queue;
+ sdev = rxq->sdev;
+ do {
+ if (fs_rx_unsafe(sdev)) {
+ nb_rx = 0;
+ sdev = sdev->next;
+ continue;
+ }
+ sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ FS_ATOMIC_P(rxq->refcnt[sdev->sid]);
+ nb_rx = ETH(sdev)->
+ rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ FS_ATOMIC_V(rxq->refcnt[sdev->sid]);
+ sdev = sdev->next;
+ } while (nb_rx == 0 && sdev != rxq->sdev);
+ rxq->sdev = sdev;
+ if (nb_rx)
+ failsafe_rx_set_port(rx_pkts, nb_rx,
+ rxq->priv->data->port_id);
+ return nb_rx;
+}
+
+uint16_t
+failsafe_rx_burst_fast(void *queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct sub_device *sdev;
+ struct rxq *rxq;
+ void *sub_rxq;
+ uint16_t nb_rx;
+
+ rxq = queue;
+ sdev = rxq->sdev;
+ do {
+ RTE_ASSERT(!fs_rx_unsafe(sdev));
+ sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ FS_ATOMIC_P(rxq->refcnt[sdev->sid]);
+ nb_rx = ETH(sdev)->
+ rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ FS_ATOMIC_V(rxq->refcnt[sdev->sid]);
+ sdev = sdev->next;
+ } while (nb_rx == 0 && sdev != rxq->sdev);
+ rxq->sdev = sdev;
+ if (nb_rx)
+ failsafe_rx_set_port(rx_pkts, nb_rx,
+ rxq->priv->data->port_id);
+ return nb_rx;
+}
+
+uint16_t
+failsafe_tx_burst(void *queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct sub_device *sdev;
+ struct txq *txq;
+ void *sub_txq;
+ uint16_t nb_tx;
+
+ txq = queue;
+ sdev = TX_SUBDEV(&rte_eth_devices[txq->priv->data->port_id]);
+ if (unlikely(fs_tx_unsafe(sdev)))
+ return 0;
+ sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
+ FS_ATOMIC_P(txq->refcnt[sdev->sid]);
+ nb_tx = ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+ FS_ATOMIC_V(txq->refcnt[sdev->sid]);
+ return nb_tx;
+}
+
+uint16_t
+failsafe_tx_burst_fast(void *queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct sub_device *sdev;
+ struct txq *txq;
+ void *sub_txq;
+ uint16_t nb_tx;
+
+ txq = queue;
+ sdev = TX_SUBDEV(&rte_eth_devices[txq->priv->data->port_id]);
+ RTE_ASSERT(!fs_tx_unsafe(sdev));
+ sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
+ FS_ATOMIC_P(txq->refcnt[sdev->sid]);
+ nb_tx = ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+ FS_ATOMIC_V(txq->refcnt[sdev->sid]);
+ return nb_tx;
+}
diff --git a/src/seastar/dpdk/drivers/net/failsafe/meson.build b/src/seastar/dpdk/drivers/net/failsafe/meson.build
new file mode 100644
index 000000000..dfd4067cb
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/failsafe/meson.build
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+cflags += '-std=gnu99'
+cflags += '-D_DEFAULT_SOURCE'
+cflags += '-D_XOPEN_SOURCE=700'
+cflags += '-pedantic'
+if is_linux
+ cflags += '-DLINUX'
+else
+ cflags += '-DBSD'
+endif
+
+allow_experimental_apis = true
+
+sources = files('failsafe_args.c',
+ 'failsafe.c',
+ 'failsafe_eal.c',
+ 'failsafe_ether.c',
+ 'failsafe_flow.c',
+ 'failsafe_intr.c',
+ 'failsafe_ops.c',
+ 'failsafe_rxtx.c')
diff --git a/src/seastar/dpdk/drivers/net/failsafe/rte_pmd_failsafe_version.map b/src/seastar/dpdk/drivers/net/failsafe/rte_pmd_failsafe_version.map
new file mode 100644
index 000000000..b6d2840be
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/failsafe/rte_pmd_failsafe_version.map
@@ -0,0 +1,4 @@
+DPDK_17.08 {
+
+ local: *;
+};