diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/dpdk/drivers/net/failsafe | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/dpdk/drivers/net/failsafe')
-rw-r--r-- | src/spdk/dpdk/drivers/net/failsafe/Makefile | 43 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/failsafe/failsafe.c | 419 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/failsafe/failsafe_args.c | 517 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/failsafe/failsafe_eal.c | 168 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/failsafe/failsafe_ether.c | 638 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/failsafe/failsafe_flow.c | 255 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/failsafe/failsafe_intr.c | 535 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/failsafe/failsafe_ops.c | 1511 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/failsafe/failsafe_private.h | 504 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/failsafe/failsafe_rxtx.c | 178 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/failsafe/meson.build | 21 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/failsafe/rte_pmd_failsafe_version.map | 3 |
12 files changed, 4792 insertions, 0 deletions
diff --git a/src/spdk/dpdk/drivers/net/failsafe/Makefile b/src/spdk/dpdk/drivers/net/failsafe/Makefile new file mode 100644 index 000000000..464fd0515 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/failsafe/Makefile @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2017 6WIND S.A. +# Copyright 2017 Mellanox Technologies, Ltd + +include $(RTE_SDK)/mk/rte.vars.mk + +# Library name +LIB = librte_pmd_failsafe.a + +EXPORT_MAP := rte_pmd_failsafe_version.map + +# Sources are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_args.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ether.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_flow.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_intr.c +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUX),y) +CFLAGS += -DLINUX +else +CFLAGS += -DBSD +endif + +# No exported include files + +# Basic CFLAGS: +CFLAGS += -std=gnu99 -Wextra +CFLAGS += -O3 +CFLAGS += -I. +CFLAGS += -D_DEFAULT_SOURCE +CFLAGS += -D_XOPEN_SOURCE=700 +CFLAGS += $(WERROR_FLAGS) +CFLAGS += -Wno-strict-prototypes +CFLAGS += -pedantic +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring +LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs +LDLIBS += -lrte_bus_vdev +LDLIBS += -lpthread + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/drivers/net/failsafe/failsafe.c b/src/spdk/dpdk/drivers/net/failsafe/failsafe.c new file mode 100644 index 000000000..72362f35d --- /dev/null +++ b/src/spdk/dpdk/drivers/net/failsafe/failsafe.c @@ -0,0 +1,419 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox Technologies, Ltd + */ + +#include <stdbool.h> + +#include <rte_alarm.h> +#include <rte_malloc.h> +#include <rte_ethdev_driver.h> +#include <rte_ethdev_vdev.h> +#include <rte_devargs.h> +#include <rte_kvargs.h> +#include <rte_bus_vdev.h> + +#include "failsafe_private.h" + +int failsafe_logtype; + +const char pmd_failsafe_driver_name[] = FAILSAFE_DRIVER_NAME; +static const struct rte_eth_link eth_link = { + .link_speed = ETH_SPEED_NUM_10G, + .link_duplex = ETH_LINK_FULL_DUPLEX, + .link_status = ETH_LINK_UP, + .link_autoneg = ETH_LINK_AUTONEG, +}; + +static int +fs_sub_device_alloc(struct rte_eth_dev *dev, + const char *params) +{ + uint8_t nb_subs; + int ret; + int i; + struct sub_device *sdev; + uint8_t sdev_iterator; + + ret = failsafe_args_count_subdevice(dev, params); + if (ret) + return ret; + if (PRIV(dev)->subs_tail > FAILSAFE_MAX_ETHPORTS) { + ERROR("Cannot allocate more than %d ports", + FAILSAFE_MAX_ETHPORTS); + return -ENOSPC; + } + nb_subs = PRIV(dev)->subs_tail; + PRIV(dev)->subs = rte_zmalloc(NULL, + sizeof(struct sub_device) * nb_subs, + RTE_CACHE_LINE_SIZE); + if (PRIV(dev)->subs == NULL) { + ERROR("Could not allocate sub_devices"); + return -ENOMEM; + } + /* Initiate static sub devices linked list. */ + for (i = 1; i < nb_subs; i++) + PRIV(dev)->subs[i - 1].next = PRIV(dev)->subs + i; + PRIV(dev)->subs[i - 1].next = PRIV(dev)->subs; + + FOREACH_SUBDEV(sdev, sdev_iterator, dev) { + sdev->sdev_port_id = RTE_MAX_ETHPORTS; + } + return 0; +} + +static void +fs_sub_device_free(struct rte_eth_dev *dev) +{ + rte_free(PRIV(dev)->subs); +} + +static void fs_hotplug_alarm(void *arg); + +int +failsafe_hotplug_alarm_install(struct rte_eth_dev *dev) +{ + int ret; + + if (dev == NULL) + return -EINVAL; + if (PRIV(dev)->pending_alarm) + return 0; + ret = rte_eal_alarm_set(failsafe_hotplug_poll * 1000, + fs_hotplug_alarm, + dev); + if (ret) { + ERROR("Could not set up plug-in event detection"); + return ret; + } + PRIV(dev)->pending_alarm = 1; + return 0; +} + +int +failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev) +{ + int ret = 0; + + rte_errno = 0; + rte_eal_alarm_cancel(fs_hotplug_alarm, dev); + if (rte_errno) { + ERROR("rte_eal_alarm_cancel failed (errno: %s)", + strerror(rte_errno)); + ret = -rte_errno; + } else { + PRIV(dev)->pending_alarm = 0; + } + return ret; +} + +static void +fs_hotplug_alarm(void *arg) +{ + struct rte_eth_dev *dev = arg; + struct sub_device *sdev; + int ret; + uint8_t i; + + if (!PRIV(dev)->pending_alarm) + return; + PRIV(dev)->pending_alarm = 0; + FOREACH_SUBDEV(sdev, i, dev) + if (sdev->state != PRIV(dev)->state) + break; + /* if we have non-probed device */ + if (i != PRIV(dev)->subs_tail) { + if (fs_lock(dev, 1) != 0) + goto reinstall; + ret = failsafe_eth_dev_state_sync(dev); + fs_unlock(dev, 1); + if (ret) + ERROR("Unable to synchronize sub_device state"); + } + failsafe_dev_remove(dev); +reinstall: + ret = failsafe_hotplug_alarm_install(dev); + if (ret) + ERROR("Unable to set up next alarm"); +} + +static int +fs_mutex_init(struct fs_priv *priv) +{ + int ret; + pthread_mutexattr_t attr; + + ret = pthread_mutexattr_init(&attr); + if (ret) { + ERROR("Cannot initiate mutex attributes - %s", strerror(ret)); + return ret; + } + /* Allow mutex relocks for the thread holding the mutex. */ + ret = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + if (ret) { + ERROR("Cannot set mutex type - %s", strerror(ret)); + return ret; + } + ret = pthread_mutex_init(&priv->hotplug_mutex, &attr); + if (ret) { + ERROR("Cannot initiate mutex - %s", strerror(ret)); + return ret; + } + return 0; +} + +static int +fs_eth_dev_create(struct rte_vdev_device *vdev) +{ + struct rte_eth_dev *dev; + struct rte_ether_addr *mac; + struct fs_priv *priv; + struct sub_device *sdev; + const char *params; + unsigned int socket_id; + uint8_t i; + int ret; + + dev = NULL; + priv = NULL; + socket_id = rte_socket_id(); + INFO("Creating fail-safe device on NUMA socket %u", socket_id); + params = rte_vdev_device_args(vdev); + if (params == NULL) { + ERROR("This PMD requires sub-devices, none provided"); + return -1; + } + dev = rte_eth_vdev_allocate(vdev, sizeof(*priv)); + if (dev == NULL) { + ERROR("Unable to allocate rte_eth_dev"); + return -1; + } + priv = PRIV(dev); + priv->data = dev->data; + priv->rxp = FS_RX_PROXY_INIT; + dev->dev_ops = &failsafe_ops; + dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0]; + dev->data->dev_link = eth_link; + PRIV(dev)->nb_mac_addr = 1; + TAILQ_INIT(&PRIV(dev)->flow_list); + dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst; + dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst; + ret = fs_sub_device_alloc(dev, params); + if (ret) { + ERROR("Could not allocate sub_devices"); + goto free_dev; + } + ret = failsafe_args_parse(dev, params); + if (ret) + goto free_subs; + ret = rte_eth_dev_owner_new(&priv->my_owner.id); + if (ret) { + ERROR("Failed to get unique owner identifier"); + goto free_args; + } + snprintf(priv->my_owner.name, sizeof(priv->my_owner.name), + FAILSAFE_OWNER_NAME); + DEBUG("Failsafe port %u owner info: %s_%016"PRIX64, dev->data->port_id, + priv->my_owner.name, priv->my_owner.id); + ret = rte_eth_dev_callback_register(RTE_ETH_ALL, RTE_ETH_EVENT_NEW, + failsafe_eth_new_event_callback, + dev); + if (ret) { + ERROR("Failed to register NEW callback"); + goto free_args; + } + ret = failsafe_eal_init(dev); + if (ret) + goto unregister_new_callback; + ret = fs_mutex_init(priv); + if (ret) + goto unregister_new_callback; + ret = failsafe_hotplug_alarm_install(dev); + if (ret) { + ERROR("Could not set up plug-in event detection"); + goto unregister_new_callback; + } + mac = &dev->data->mac_addrs[0]; + if (failsafe_mac_from_arg) { + /* + * If MAC address was provided as a parameter, + * apply to all probed slaves. + */ + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) { + ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), + mac); + if (ret) { + ERROR("Failed to set default MAC address"); + goto cancel_alarm; + } + } + } else { + /* + * Use the ether_addr from first probed + * device, either preferred or fallback. + */ + FOREACH_SUBDEV(sdev, i, dev) + if (sdev->state >= DEV_PROBED) { + rte_ether_addr_copy( + Ð(sdev)->data->mac_addrs[0], mac); + break; + } + /* + * If no device has been probed and no ether_addr + * has been provided on the command line, use a random + * valid one. + * It will be applied during future slave state syncs to + * probed slaves. + */ + if (i == priv->subs_tail) + rte_eth_random_addr(&mac->addr_bytes[0]); + } + INFO("MAC address is %02x:%02x:%02x:%02x:%02x:%02x", + mac->addr_bytes[0], mac->addr_bytes[1], + mac->addr_bytes[2], mac->addr_bytes[3], + mac->addr_bytes[4], mac->addr_bytes[5]); + dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; + PRIV(dev)->intr_handle = (struct rte_intr_handle){ + .fd = -1, + .type = RTE_INTR_HANDLE_EXT, + }; + rte_eth_dev_probing_finish(dev); + return 0; +cancel_alarm: + failsafe_hotplug_alarm_cancel(dev); +unregister_new_callback: + rte_eth_dev_callback_unregister(RTE_ETH_ALL, RTE_ETH_EVENT_NEW, + failsafe_eth_new_event_callback, dev); +free_args: + failsafe_args_free(dev); +free_subs: + fs_sub_device_free(dev); +free_dev: + /* mac_addrs must not be freed alone because part of dev_private */ + dev->data->mac_addrs = NULL; + rte_eth_dev_release_port(dev); + return -1; +} + +static int +fs_rte_eth_free(const char *name) +{ + struct rte_eth_dev *dev; + int ret; + + dev = rte_eth_dev_allocated(name); + if (dev == NULL) + return -ENODEV; + rte_eth_dev_callback_unregister(RTE_ETH_ALL, RTE_ETH_EVENT_NEW, + failsafe_eth_new_event_callback, dev); + ret = failsafe_eal_uninit(dev); + if (ret) + ERROR("Error while uninitializing sub-EAL"); + failsafe_args_free(dev); + fs_sub_device_free(dev); + ret = pthread_mutex_destroy(&PRIV(dev)->hotplug_mutex); + if (ret) + ERROR("Error while destroying hotplug mutex"); + rte_free(PRIV(dev)->mcast_addrs); + /* mac_addrs must not be freed alone because part of dev_private */ + dev->data->mac_addrs = NULL; + rte_eth_dev_release_port(dev); + return ret; +} + +static bool +devargs_already_listed(struct rte_devargs *devargs) +{ + struct rte_devargs *list_da; + + RTE_EAL_DEVARGS_FOREACH(devargs->bus->name, list_da) { + if (strcmp(list_da->name, devargs->name) == 0) + /* devargs already in the list */ + return true; + } + return false; +} + +static int +rte_pmd_failsafe_probe(struct rte_vdev_device *vdev) +{ + const char *name; + struct rte_eth_dev *eth_dev; + struct sub_device *sdev; + struct rte_devargs devargs; + uint8_t i; + int ret; + + name = rte_vdev_device_name(vdev); + INFO("Initializing " FAILSAFE_DRIVER_NAME " for %s", + name); + + if (rte_eal_process_type() == RTE_PROC_SECONDARY && + strlen(rte_vdev_device_args(vdev)) == 0) { + eth_dev = rte_eth_dev_attach_secondary(name); + if (!eth_dev) { + ERROR("Failed to probe %s", name); + return -1; + } + eth_dev->dev_ops = &failsafe_ops; + eth_dev->device = &vdev->device; + eth_dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst; + eth_dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst; + /* + * Failsafe will attempt to probe all of its sub-devices. + * Any failure in sub-devices is not a fatal error. + * A sub-device can be plugged later. + */ + FOREACH_SUBDEV(sdev, i, eth_dev) { + /* skip empty devargs */ + if (sdev->devargs.name[0] == '\0') + continue; + + /* rebuild devargs to be able to get the bus name. */ + ret = rte_devargs_parse(&devargs, + sdev->devargs.name); + if (ret != 0) { + ERROR("Failed to parse devargs %s", + devargs.name); + continue; + } + if (!devargs_already_listed(&devargs)) { + ret = rte_dev_probe(devargs.name); + if (ret < 0) { + ERROR("Failed to probe devargs %s", + devargs.name); + continue; + } + } + } + rte_eth_dev_probing_finish(eth_dev); + return 0; + } + + return fs_eth_dev_create(vdev); +} + +static int +rte_pmd_failsafe_remove(struct rte_vdev_device *vdev) +{ + const char *name; + + name = rte_vdev_device_name(vdev); + INFO("Uninitializing " FAILSAFE_DRIVER_NAME " for %s", name); + return fs_rte_eth_free(name); +} + +static struct rte_vdev_driver failsafe_drv = { + .probe = rte_pmd_failsafe_probe, + .remove = rte_pmd_failsafe_remove, +}; + +RTE_PMD_REGISTER_VDEV(net_failsafe, failsafe_drv); +RTE_PMD_REGISTER_PARAM_STRING(net_failsafe, PMD_FAILSAFE_PARAM_STRING); + +RTE_INIT(failsafe_init_log) +{ + failsafe_logtype = rte_log_register("pmd.net.failsafe"); + if (failsafe_logtype >= 0) + rte_log_set_level(failsafe_logtype, RTE_LOG_NOTICE); +} diff --git a/src/spdk/dpdk/drivers/net/failsafe/failsafe_args.c b/src/spdk/dpdk/drivers/net/failsafe/failsafe_args.c new file mode 100644 index 000000000..707490b94 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/failsafe/failsafe_args.c @@ -0,0 +1,517 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox Technologies, Ltd + */ + +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> + +#include <rte_debug.h> +#include <rte_devargs.h> +#include <rte_malloc.h> +#include <rte_kvargs.h> +#include <rte_string_fns.h> + +#include "failsafe_private.h" + +/* Callback used when a new device is found in devargs */ +typedef int (parse_cb)(struct rte_eth_dev *dev, const char *params, + uint8_t head); + +uint64_t failsafe_hotplug_poll = FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS; +int failsafe_mac_from_arg; + +static const char * const pmd_failsafe_init_parameters[] = { + PMD_FAILSAFE_HOTPLUG_POLL_KVARG, + PMD_FAILSAFE_MAC_KVARG, + NULL, +}; + +/* + * input: text. + * output: 0: if text[0] != '(', + * 0: if there are no corresponding ')' + * n: distance to corresponding ')' otherwise + */ +static size_t +closing_paren(const char *text) +{ + int nb_open = 0; + size_t i = 0; + + while (text[i] != '\0') { + if (text[i] == '(') + nb_open++; + if (text[i] == ')') + nb_open--; + if (nb_open == 0) + return i; + i++; + } + return 0; +} + +static int +fs_parse_device(struct sub_device *sdev, char *args) +{ + struct rte_devargs *d; + int ret; + + d = &sdev->devargs; + DEBUG("%s", args); + ret = rte_devargs_parse(d, args); + if (ret) { + DEBUG("devargs parsing failed with code %d", ret); + return ret; + } + sdev->bus = d->bus; + sdev->state = DEV_PARSED; + return 0; +} + +static void +fs_sanitize_cmdline(char *args) +{ + char *nl; + + nl = strrchr(args, '\n'); + if (nl) + nl[0] = '\0'; +} + +static int +fs_execute_cmd(struct sub_device *sdev, char *cmdline) +{ + FILE *fp; + /* store possible newline as well */ + char output[DEVARGS_MAXLEN + 1]; + size_t len; + int ret; + + RTE_ASSERT(cmdline != NULL || sdev->cmdline != NULL); + if (sdev->cmdline == NULL) { + size_t i; + + len = strlen(cmdline) + 1; + sdev->cmdline = calloc(1, len); + if (sdev->cmdline == NULL) { + ERROR("Command line allocation failed"); + return -ENOMEM; + } + strlcpy(sdev->cmdline, cmdline, len); + /* Replace all commas in the command line by spaces */ + for (i = 0; i < len; i++) + if (sdev->cmdline[i] == ',') + sdev->cmdline[i] = ' '; + } + DEBUG("'%s'", sdev->cmdline); + fp = popen(sdev->cmdline, "r"); + if (fp == NULL) { + ret = -errno; + ERROR("popen: %s", strerror(errno)); + return ret; + } + /* We only read one line */ + if (fgets(output, sizeof(output) - 1, fp) == NULL) { + DEBUG("Could not read command output"); + ret = -ENODEV; + goto ret_pclose; + } + fs_sanitize_cmdline(output); + if (output[0] == '\0') { + ret = -ENODEV; + goto ret_pclose; + } + ret = fs_parse_device(sdev, output); + if (ret) + ERROR("Parsing device '%s' failed", output); +ret_pclose: + if (pclose(fp) == -1) + ERROR("pclose: %s", strerror(errno)); + return ret; +} + +static int +fs_read_fd(struct sub_device *sdev, char *fd_str) +{ + FILE *fp = NULL; + int fd = -1; + /* store possible newline as well */ + char output[DEVARGS_MAXLEN + 1]; + int err = -ENODEV; + int oflags; + int lcount; + + RTE_ASSERT(fd_str != NULL || sdev->fd_str != NULL); + if (sdev->fd_str == NULL) { + sdev->fd_str = strdup(fd_str); + if (sdev->fd_str == NULL) { + ERROR("Command line allocation failed"); + return -ENOMEM; + } + } + errno = 0; + fd = strtol(fd_str, &fd_str, 0); + if (errno || *fd_str || fd < 0) { + ERROR("Parsing FD number failed"); + goto error; + } + /* Fiddle with copy of file descriptor */ + fd = dup(fd); + if (fd == -1) + goto error; + oflags = fcntl(fd, F_GETFL); + if (oflags == -1) + goto error; + if (fcntl(fd, F_SETFL, oflags | O_NONBLOCK) == -1) + goto error; + fp = fdopen(fd, "r"); + if (fp == NULL) + goto error; + fd = -1; + /* Only take the last line into account */ + lcount = 0; + while (fgets(output, sizeof(output), fp)) + ++lcount; + if (lcount == 0) + goto error; + else if (ferror(fp) && errno != EAGAIN) + goto error; + /* Line must end with a newline character */ + fs_sanitize_cmdline(output); + if (output[0] == '\0') + goto error; + err = fs_parse_device(sdev, output); + if (err) + ERROR("Parsing device '%s' failed", output); +error: + if (fp) + fclose(fp); + if (fd != -1) + close(fd); + return err; +} + +static int +fs_parse_device_param(struct rte_eth_dev *dev, const char *param, + uint8_t head) +{ + struct fs_priv *priv; + struct sub_device *sdev; + char *args = NULL; + size_t a, b; + int ret; + + priv = PRIV(dev); + a = 0; + b = 0; + ret = 0; + while (param[b] != '(' && + param[b] != '\0') + b++; + a = b; + b += closing_paren(¶m[b]); + if (a == b) { + ERROR("Dangling parenthesis"); + return -EINVAL; + } + a += 1; + args = strndup(¶m[a], b - a); + if (args == NULL) { + ERROR("Not enough memory for parameter parsing"); + return -ENOMEM; + } + sdev = &priv->subs[head]; + if (strncmp(param, "dev", 3) == 0) { + ret = fs_parse_device(sdev, args); + if (ret) + goto free_args; + } else if (strncmp(param, "exec", 4) == 0) { + ret = fs_execute_cmd(sdev, args); + if (ret == -ENODEV) { + DEBUG("Reading device info from command line failed"); + ret = 0; + } + if (ret) + goto free_args; + } else if (strncmp(param, "fd(", 3) == 0) { + ret = fs_read_fd(sdev, args); + if (ret == -ENODEV) { + DEBUG("Reading device info from FD failed"); + ret = 0; + } + if (ret) + goto free_args; + } else { + ERROR("Unrecognized device type: %.*s", (int)b, param); + return -EINVAL; + } +free_args: + free(args); + return ret; +} + +static int +fs_parse_sub_devices(parse_cb *cb, + struct rte_eth_dev *dev, const char *params) +{ + size_t a, b; + uint8_t head; + int ret; + + a = 0; + head = 0; + ret = 0; + while (params[a] != '\0') { + b = a; + while (params[b] != '(' && + params[b] != ',' && + params[b] != '\0') + b++; + if (b == a) { + ERROR("Invalid parameter"); + return -EINVAL; + } + if (params[b] == ',') { + a = b + 1; + continue; + } + if (params[b] == '(') { + size_t start = b; + + b += closing_paren(¶ms[b]); + if (b == start) { + ERROR("Dangling parenthesis"); + return -EINVAL; + } + ret = (*cb)(dev, ¶ms[a], head); + if (ret) + return ret; + head += 1; + b += 1; + if (params[b] == '\0') + return 0; + } + a = b + 1; + } + return 0; +} + +static int +fs_remove_sub_devices_definition(char params[DEVARGS_MAXLEN]) +{ + char buffer[DEVARGS_MAXLEN] = {0}; + size_t a, b; + int i; + + a = 0; + i = 0; + while (params[a] != '\0') { + b = a; + while (params[b] != '(' && + params[b] != ',' && + params[b] != '\0') + b++; + if (b == a) { + ERROR("Invalid parameter"); + return -EINVAL; + } + if (params[b] == ',' || params[b] == '\0') { + size_t len = b - a; + + if (i > 0) + len += 1; + snprintf(&buffer[i], len + 1, "%s%s", + i ? "," : "", ¶ms[a]); + i += len; + } else if (params[b] == '(') { + size_t start = b; + + b += closing_paren(¶ms[b]); + if (b == start) + return -EINVAL; + b += 1; + if (params[b] == '\0') + goto out; + } + a = b + 1; + } +out: + strlcpy(params, buffer, DEVARGS_MAXLEN); + return 0; +} + +static int +fs_get_u64_arg(const char *key __rte_unused, + const char *value, void *out) +{ + uint64_t *u64 = out; + char *endptr = NULL; + + if ((value == NULL) || (out == NULL)) + return -EINVAL; + errno = 0; + *u64 = strtoull(value, &endptr, 0); + if (errno != 0) + return -errno; + if (endptr == value) + return -1; + return 0; +} + +static int +fs_get_mac_addr_arg(const char *key __rte_unused, + const char *value, void *out) +{ + struct rte_ether_addr *ea = out; + + if ((value == NULL) || (out == NULL)) + return -EINVAL; + + return rte_ether_unformat_addr(value, ea); +} + +int +failsafe_args_parse(struct rte_eth_dev *dev, const char *params) +{ + struct fs_priv *priv; + char mut_params[DEVARGS_MAXLEN] = ""; + struct rte_kvargs *kvlist = NULL; + unsigned int arg_count; + size_t n; + int ret; + + priv = PRIV(dev); + ret = 0; + priv->subs_tx = FAILSAFE_MAX_ETHPORTS; + /* default parameters */ + n = strlcpy(mut_params, params, sizeof(mut_params)); + if (n >= sizeof(mut_params)) { + ERROR("Parameter string too long (>=%zu)", + sizeof(mut_params)); + return -ENOMEM; + } + ret = fs_parse_sub_devices(fs_parse_device_param, + dev, params); + if (ret < 0) + return ret; + ret = fs_remove_sub_devices_definition(mut_params); + if (ret < 0) + return ret; + if (strnlen(mut_params, sizeof(mut_params)) > 0) { + kvlist = rte_kvargs_parse(mut_params, + pmd_failsafe_init_parameters); + if (kvlist == NULL) { + ERROR("Error parsing parameters, usage:\n" + PMD_FAILSAFE_PARAM_STRING); + return -1; + } + /* PLUG_IN event poll timer */ + arg_count = rte_kvargs_count(kvlist, + PMD_FAILSAFE_HOTPLUG_POLL_KVARG); + if (arg_count == 1) { + ret = rte_kvargs_process(kvlist, + PMD_FAILSAFE_HOTPLUG_POLL_KVARG, + &fs_get_u64_arg, &failsafe_hotplug_poll); + if (ret < 0) + goto free_kvlist; + } + /* MAC addr */ + arg_count = rte_kvargs_count(kvlist, + PMD_FAILSAFE_MAC_KVARG); + if (arg_count > 0) { + ret = rte_kvargs_process(kvlist, + PMD_FAILSAFE_MAC_KVARG, + &fs_get_mac_addr_arg, + &dev->data->mac_addrs[0]); + if (ret < 0) + goto free_kvlist; + + failsafe_mac_from_arg = 1; + } + } + PRIV(dev)->state = DEV_PARSED; +free_kvlist: + rte_kvargs_free(kvlist); + return ret; +} + +void +failsafe_args_free(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + uint8_t i; + + FOREACH_SUBDEV(sdev, i, dev) { + free(sdev->cmdline); + sdev->cmdline = NULL; + free(sdev->fd_str); + sdev->fd_str = NULL; + free(sdev->devargs.args); + sdev->devargs.args = NULL; + } +} + +static int +fs_count_device(struct rte_eth_dev *dev, const char *param, + uint8_t head __rte_unused) +{ + size_t b = 0; + + while (param[b] != '(' && + param[b] != '\0') + b++; + if (strncmp(param, "dev", b) != 0 && + strncmp(param, "exec", b) != 0 && + strncmp(param, "fd(", b) != 0) { + ERROR("Unrecognized device type: %.*s", (int)b, param); + return -EINVAL; + } + PRIV(dev)->subs_tail += 1; + return 0; +} + +int +failsafe_args_count_subdevice(struct rte_eth_dev *dev, + const char *params) +{ + return fs_parse_sub_devices(fs_count_device, + dev, params); +} + +static int +fs_parse_sub_device(struct sub_device *sdev) +{ + struct rte_devargs *da; + char devstr[DEVARGS_MAXLEN] = ""; + + da = &sdev->devargs; + snprintf(devstr, sizeof(devstr), "%s,%s", da->name, da->args); + return fs_parse_device(sdev, devstr); +} + +int +failsafe_args_parse_subs(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + uint8_t i; + int ret = 0; + + FOREACH_SUBDEV(sdev, i, dev) { + if (sdev->state >= DEV_PARSED) + continue; + if (sdev->cmdline) + ret = fs_execute_cmd(sdev, sdev->cmdline); + else if (sdev->fd_str) + ret = fs_read_fd(sdev, sdev->fd_str); + else + ret = fs_parse_sub_device(sdev); + if (ret == 0) + sdev->state = DEV_PARSED; + } + return 0; +} diff --git a/src/spdk/dpdk/drivers/net/failsafe/failsafe_eal.c b/src/spdk/dpdk/drivers/net/failsafe/failsafe_eal.c new file mode 100644 index 000000000..b9fc50867 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/failsafe/failsafe_eal.c @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox Technologies, Ltd + */ + +#include <rte_string_fns.h> +#include <rte_malloc.h> + +#include "failsafe_private.h" + +static int +fs_ethdev_portid_get(const char *name, uint16_t *port_id) +{ + uint16_t pid; + size_t len; + + if (name == NULL) { + DEBUG("Null pointer is specified\n"); + return -EINVAL; + } + len = strlen(name); + for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++) { + if (rte_eth_dev_is_valid_port(pid) && + !strncmp(name, rte_eth_devices[pid].device->name, len)) { + *port_id = pid; + return 0; + } + } + return -ENODEV; +} + +static int +fs_bus_init(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + struct rte_devargs *da; + uint8_t i; + uint16_t pid; + int ret; + + FOREACH_SUBDEV(sdev, i, dev) { + if (sdev->state != DEV_PARSED) + continue; + da = &sdev->devargs; + if (fs_ethdev_portid_get(da->name, &pid) != 0) { + struct rte_eth_dev_owner pid_owner; + + ret = rte_eal_hotplug_add(da->bus->name, + da->name, + da->args); + if (ret < 0) { + ERROR("sub_device %d probe failed %s%s%s", i, + rte_errno ? "(" : "", + rte_errno ? strerror(rte_errno) : "", + rte_errno ? ")" : ""); + continue; + } + if (fs_ethdev_portid_get(da->name, &pid) != 0) { + ERROR("sub_device %d init went wrong", i); + return -ENODEV; + } + /* + * The NEW callback tried to take ownership, check + * whether it succeed or didn't. + */ + rte_eth_dev_owner_get(pid, &pid_owner); + if (pid_owner.id != PRIV(dev)->my_owner.id) { + INFO("sub_device %d owner(%s_%016"PRIX64") is not my," + " owner(%s_%016"PRIX64"), will try again later", + i, pid_owner.name, pid_owner.id, + PRIV(dev)->my_owner.name, + PRIV(dev)->my_owner.id); + continue; + } + } else { + /* The sub-device port was found. */ + char devstr[DEVARGS_MAXLEN] = ""; + struct rte_devargs *probed_da = + rte_eth_devices[pid].device->devargs; + + /* Take control of probed device. */ + free(da->args); + memset(da, 0, sizeof(*da)); + if (probed_da != NULL) + snprintf(devstr, sizeof(devstr), "%s,%s", + probed_da->name, probed_da->args); + else + strlcpy(devstr, + rte_eth_devices[pid].device->name, + sizeof(devstr)); + ret = rte_devargs_parse(da, devstr); + if (ret) { + ERROR("Probed devargs parsing failed with code" + " %d", ret); + return ret; + } + INFO("Taking control of a probed sub device" + " %d named %s", i, da->name); + ret = rte_eth_dev_owner_set(pid, &PRIV(dev)->my_owner); + if (ret < 0) { + INFO("sub_device %d owner set failed (%s), " + "will try again later", i, strerror(-ret)); + continue; + } else if (strncmp(rte_eth_devices[pid].device->name, + da->name, strlen(da->name)) != 0) { + /* + * The device probably was removed and its port + * id was reallocated before ownership set. + */ + rte_eth_dev_owner_unset(pid, + PRIV(dev)->my_owner.id); + INFO("sub_device %d was removed before taking" + " ownership, will try again later", i); + continue; + } + } + sdev->sdev_port_id = pid; + SUB_ID(sdev) = i; + sdev->fs_port_id = dev->data->port_id; + sdev->dev = ETH(sdev)->device; + sdev->state = DEV_PROBED; + } + return 0; +} + +int +failsafe_eal_init(struct rte_eth_dev *dev) +{ + int ret; + + ret = fs_bus_init(dev); + if (ret) + return ret; + if (PRIV(dev)->state < DEV_PROBED) + PRIV(dev)->state = DEV_PROBED; + fs_switch_dev(dev, NULL); + return 0; +} + +static int +fs_bus_uninit(struct rte_eth_dev *dev) +{ + struct sub_device *sdev = NULL; + uint8_t i; + int sdev_ret; + int ret = 0; + + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) { + sdev_ret = rte_dev_remove(sdev->dev); + if (sdev_ret < 0) { + ERROR("Failed to remove requested device %s (err: %d)", + sdev->dev->name, sdev_ret); + continue; + } + sdev->state = DEV_PROBED - 1; + } + return ret; +} + +int +failsafe_eal_uninit(struct rte_eth_dev *dev) +{ + int ret; + + ret = fs_bus_uninit(dev); + PRIV(dev)->state = DEV_PROBED - 1; + return ret; +} diff --git a/src/spdk/dpdk/drivers/net/failsafe/failsafe_ether.c b/src/spdk/dpdk/drivers/net/failsafe/failsafe_ether.c new file mode 100644 index 000000000..2b748bd8b --- /dev/null +++ b/src/spdk/dpdk/drivers/net/failsafe/failsafe_ether.c @@ -0,0 +1,638 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox Technologies, Ltd + */ + +#include <unistd.h> + +#include <rte_flow.h> +#include <rte_flow_driver.h> +#include <rte_cycles.h> + +#include "failsafe_private.h" + +/** Print a message out of a flow error. */ +static int +fs_flow_complain(struct rte_flow_error *error) +{ + static const char *const errstrlist[] = { + [RTE_FLOW_ERROR_TYPE_NONE] = "no error", + [RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified", + [RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)", + [RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field", + [RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field", + [RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field", + [RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field", + [RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure", + [RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length", + [RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item", + [RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions", + [RTE_FLOW_ERROR_TYPE_ACTION] = "specific action", + }; + const char *errstr; + char buf[32]; + int err = rte_errno; + + if ((unsigned int)error->type >= RTE_DIM(errstrlist) || + !errstrlist[error->type]) + errstr = "unknown type"; + else + errstr = errstrlist[error->type]; + ERROR("Caught error type %d (%s): %s%s\n", + error->type, errstr, + error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ", + error->cause), buf) : "", + error->message ? error->message : "(no stated reason)"); + return -err; +} + +static int +eth_dev_flow_isolate_set(struct rte_eth_dev *dev, + struct sub_device *sdev) +{ + struct rte_flow_error ferror; + int ret; + + if (!PRIV(dev)->flow_isolated) { + DEBUG("Flow isolation already disabled"); + } else { + DEBUG("Enabling flow isolation"); + ret = rte_flow_isolate(PORT_ID(sdev), + PRIV(dev)->flow_isolated, + &ferror); + if (ret) { + fs_flow_complain(&ferror); + return ret; + } + } + return 0; +} + +static int +fs_eth_dev_conf_apply(struct rte_eth_dev *dev, + struct sub_device *sdev) +{ + struct rte_eth_dev *edev; + struct rte_vlan_filter_conf *vfc1; + struct rte_vlan_filter_conf *vfc2; + struct rte_flow *flow; + struct rte_flow_error ferror; + uint32_t i; + int ret; + + edev = ETH(sdev); + /* RX queue setup */ + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct rxq *rxq; + + rxq = dev->data->rx_queues[i]; + ret = rte_eth_rx_queue_setup(PORT_ID(sdev), i, + rxq->info.nb_desc, rxq->socket_id, + &rxq->info.conf, rxq->info.mp); + if (ret) { + ERROR("rx_queue_setup failed"); + return ret; + } + } + /* TX queue setup */ + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct txq *txq; + + txq = dev->data->tx_queues[i]; + ret = rte_eth_tx_queue_setup(PORT_ID(sdev), i, + txq->info.nb_desc, txq->socket_id, + &txq->info.conf); + if (ret) { + ERROR("tx_queue_setup failed"); + return ret; + } + } + /* dev_link.link_status */ + if (dev->data->dev_link.link_status != + edev->data->dev_link.link_status) { + DEBUG("Configuring link_status"); + if (dev->data->dev_link.link_status) + ret = rte_eth_dev_set_link_up(PORT_ID(sdev)); + else + ret = rte_eth_dev_set_link_down(PORT_ID(sdev)); + if (ret) { + ERROR("Failed to apply link_status"); + return ret; + } + } else { + DEBUG("link_status already set"); + } + /* promiscuous */ + if (dev->data->promiscuous != edev->data->promiscuous) { + DEBUG("Configuring promiscuous"); + if (dev->data->promiscuous) + ret = rte_eth_promiscuous_enable(PORT_ID(sdev)); + else + ret = rte_eth_promiscuous_disable(PORT_ID(sdev)); + if (ret != 0) { + ERROR("Failed to apply promiscuous mode"); + return ret; + } + } else { + DEBUG("promiscuous already set"); + } + /* all_multicast */ + if (dev->data->all_multicast != edev->data->all_multicast) { + DEBUG("Configuring all_multicast"); + if (dev->data->all_multicast) + ret = rte_eth_allmulticast_enable(PORT_ID(sdev)); + else + ret = rte_eth_allmulticast_disable(PORT_ID(sdev)); + if (ret != 0) { + ERROR("Failed to apply allmulticast mode"); + return ret; + } + } else { + DEBUG("all_multicast already set"); + } + /* MTU */ + if (dev->data->mtu != edev->data->mtu) { + DEBUG("Configuring MTU"); + ret = rte_eth_dev_set_mtu(PORT_ID(sdev), dev->data->mtu); + if (ret) { + ERROR("Failed to apply MTU"); + return ret; + } + } else { + DEBUG("MTU already set"); + } + /* default MAC */ + DEBUG("Configuring default MAC address"); + ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), + &dev->data->mac_addrs[0]); + if (ret) { + ERROR("Setting default MAC address failed"); + return ret; + } + /* additional MAC */ + if (PRIV(dev)->nb_mac_addr > 1) + DEBUG("Configure additional MAC address%s", + (PRIV(dev)->nb_mac_addr > 2 ? "es" : "")); + for (i = 1; i < PRIV(dev)->nb_mac_addr; i++) { + struct rte_ether_addr *ea; + + ea = &dev->data->mac_addrs[i]; + ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), ea, + PRIV(dev)->mac_addr_pool[i]); + if (ret) { + char ea_fmt[RTE_ETHER_ADDR_FMT_SIZE]; + + rte_ether_format_addr(ea_fmt, + RTE_ETHER_ADDR_FMT_SIZE, ea); + ERROR("Adding MAC address %s failed", ea_fmt); + return ret; + } + } + /* + * Propagate multicast MAC addresses to sub-devices, + * if non zero number of addresses is set. + * The condition is required to avoid breakage of failsafe + * for sub-devices which do not support the operation + * if the feature is really not used. + */ + if (PRIV(dev)->nb_mcast_addr > 0) { + DEBUG("Configuring multicast MAC addresses"); + ret = rte_eth_dev_set_mc_addr_list(PORT_ID(sdev), + PRIV(dev)->mcast_addrs, + PRIV(dev)->nb_mcast_addr); + if (ret) { + ERROR("Failed to apply multicast MAC addresses"); + return ret; + } + } + /* VLAN filter */ + vfc1 = &dev->data->vlan_filter_conf; + vfc2 = &edev->data->vlan_filter_conf; + if (memcmp(vfc1, vfc2, sizeof(struct rte_vlan_filter_conf))) { + uint64_t vbit; + uint64_t ids; + size_t i; + uint16_t vlan_id; + + DEBUG("Configuring VLAN filter"); + for (i = 0; i < RTE_DIM(vfc1->ids); i++) { + if (vfc1->ids[i] == 0) + continue; + ids = vfc1->ids[i]; + while (ids) { + vlan_id = 64 * i; + /* count trailing zeroes */ + vbit = ~ids & (ids - 1); + /* clear least significant bit set */ + ids ^= (ids ^ (ids - 1)) ^ vbit; + for (; vbit; vlan_id++) + vbit >>= 1; + ret = rte_eth_dev_vlan_filter( + PORT_ID(sdev), vlan_id, 1); + if (ret) { + ERROR("Failed to apply VLAN filter %hu", + vlan_id); + return ret; + } + } + } + } else { + DEBUG("VLAN filter already set"); + } + /* rte_flow */ + if (TAILQ_EMPTY(&PRIV(dev)->flow_list)) { + DEBUG("rte_flow already set"); + } else { + DEBUG("Resetting rte_flow configuration"); + ret = rte_flow_flush(PORT_ID(sdev), &ferror); + if (ret) { + fs_flow_complain(&ferror); + return ret; + } + i = 0; + rte_errno = 0; + DEBUG("Configuring rte_flow"); + TAILQ_FOREACH(flow, &PRIV(dev)->flow_list, next) { + DEBUG("Creating flow #%" PRIu32, i++); + flow->flows[SUB_ID(sdev)] = + rte_flow_create(PORT_ID(sdev), + flow->rule.attr, + flow->rule.pattern, + flow->rule.actions, + &ferror); + ret = rte_errno; + if (ret) + break; + } + if (ret) { + fs_flow_complain(&ferror); + return ret; + } + } + return 0; +} + +static void +fs_dev_remove(struct sub_device *sdev) +{ + int ret; + + if (sdev == NULL) + return; + switch (sdev->state) { + case DEV_STARTED: + failsafe_rx_intr_uninstall_subdevice(sdev); + rte_eth_dev_stop(PORT_ID(sdev)); + sdev->state = DEV_ACTIVE; + /* fallthrough */ + case DEV_ACTIVE: + failsafe_eth_dev_unregister_callbacks(sdev); + rte_eth_dev_close(PORT_ID(sdev)); + sdev->state = DEV_PROBED; + /* fallthrough */ + case DEV_PROBED: + ret = rte_dev_remove(sdev->dev); + if (ret < 0) { + ERROR("Bus detach failed for sub_device %u", + SUB_ID(sdev)); + } else { + rte_eth_dev_release_port(ETH(sdev)); + } + sdev->state = DEV_PARSED; + /* fallthrough */ + case DEV_PARSED: + case DEV_UNDEFINED: + sdev->state = DEV_UNDEFINED; + sdev->sdev_port_id = RTE_MAX_ETHPORTS; + /* the end */ + break; + } + sdev->remove = 0; + failsafe_hotplug_alarm_install(fs_dev(sdev)); +} + +static void +fs_dev_stats_save(struct sub_device *sdev) +{ + struct rte_eth_stats stats; + int err; + + /* Attempt to read current stats. */ + err = rte_eth_stats_get(PORT_ID(sdev), &stats); + if (err) { + uint64_t timestamp = sdev->stats_snapshot.timestamp; + + WARN("Could not access latest statistics from sub-device %d.\n", + SUB_ID(sdev)); + if (timestamp != 0) + WARN("Using latest snapshot taken before %"PRIu64" seconds.\n", + (rte_rdtsc() - timestamp) / rte_get_tsc_hz()); + } + failsafe_stats_increment + (&PRIV(fs_dev(sdev))->stats_accumulator, + err ? &sdev->stats_snapshot.stats : &stats); + memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot)); +} + +static inline int +fs_rxtx_clean(struct sub_device *sdev) +{ + uint16_t i; + + for (i = 0; i < ETH(sdev)->data->nb_rx_queues; i++) + if (FS_ATOMIC_RX(sdev, i)) + return 0; + for (i = 0; i < ETH(sdev)->data->nb_tx_queues; i++) + if (FS_ATOMIC_TX(sdev, i)) + return 0; + return 1; +} + +void +failsafe_eth_dev_unregister_callbacks(struct sub_device *sdev) +{ + int ret; + + if (sdev == NULL) + return; + if (sdev->rmv_callback) { + ret = rte_eth_dev_callback_unregister(PORT_ID(sdev), + RTE_ETH_EVENT_INTR_RMV, + failsafe_eth_rmv_event_callback, + sdev); + if (ret) + WARN("Failed to unregister RMV callback for sub_device" + " %d", SUB_ID(sdev)); + sdev->rmv_callback = 0; + } + if (sdev->lsc_callback) { + ret = rte_eth_dev_callback_unregister(PORT_ID(sdev), + RTE_ETH_EVENT_INTR_LSC, + failsafe_eth_lsc_event_callback, + sdev); + if (ret) + WARN("Failed to unregister LSC callback for sub_device" + " %d", SUB_ID(sdev)); + sdev->lsc_callback = 0; + } +} + +void +failsafe_dev_remove(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + uint8_t i; + + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) + if (sdev->remove && fs_rxtx_clean(sdev)) { + if (fs_lock(dev, 1) != 0) + return; + fs_dev_stats_save(sdev); + fs_dev_remove(sdev); + fs_unlock(dev, 1); + } +} + +static int +failsafe_eth_dev_rx_queues_sync(struct rte_eth_dev *dev) +{ + struct rxq *rxq; + int ret; + uint16_t i; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + + if (rxq->info.conf.rx_deferred_start && + dev->data->rx_queue_state[i] == + RTE_ETH_QUEUE_STATE_STARTED) { + /* + * The subdevice Rx queue does not launch on device + * start if deferred start flag is set. It needs to be + * started manually in case an appropriate failsafe Rx + * queue has been started earlier. + */ + ret = dev->dev_ops->rx_queue_start(dev, i); + if (ret) { + ERROR("Could not synchronize Rx queue %d", i); + return ret; + } + } else if (dev->data->rx_queue_state[i] == + RTE_ETH_QUEUE_STATE_STOPPED) { + /* + * The subdevice Rx queue needs to be stopped manually + * in case an appropriate failsafe Rx queue has been + * stopped earlier. + */ + ret = dev->dev_ops->rx_queue_stop(dev, i); + if (ret) { + ERROR("Could not synchronize Rx queue %d", i); + return ret; + } + } + } + return 0; +} + +static int +failsafe_eth_dev_tx_queues_sync(struct rte_eth_dev *dev) +{ + struct txq *txq; + int ret; + uint16_t i; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + txq = dev->data->tx_queues[i]; + + if (txq->info.conf.tx_deferred_start && + dev->data->tx_queue_state[i] == + RTE_ETH_QUEUE_STATE_STARTED) { + /* + * The subdevice Tx queue does not launch on device + * start if deferred start flag is set. It needs to be + * started manually in case an appropriate failsafe Tx + * queue has been started earlier. + */ + ret = dev->dev_ops->tx_queue_start(dev, i); + if (ret) { + ERROR("Could not synchronize Tx queue %d", i); + return ret; + } + } else if (dev->data->tx_queue_state[i] == + RTE_ETH_QUEUE_STATE_STOPPED) { + /* + * The subdevice Tx queue needs to be stopped manually + * in case an appropriate failsafe Tx queue has been + * stopped earlier. + */ + ret = dev->dev_ops->tx_queue_stop(dev, i); + if (ret) { + ERROR("Could not synchronize Tx queue %d", i); + return ret; + } + } + } + return 0; +} + +int +failsafe_eth_dev_state_sync(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + uint32_t inactive; + int ret; + uint8_t i; + + if (PRIV(dev)->state < DEV_PARSED) + return 0; + + ret = failsafe_args_parse_subs(dev); + if (ret) + goto err_remove; + + if (PRIV(dev)->state < DEV_PROBED) + return 0; + ret = failsafe_eal_init(dev); + if (ret) + goto err_remove; + if (PRIV(dev)->state < DEV_ACTIVE) + return 0; + inactive = 0; + FOREACH_SUBDEV(sdev, i, dev) { + if (sdev->state == DEV_PROBED) { + inactive |= UINT32_C(1) << i; + ret = eth_dev_flow_isolate_set(dev, sdev); + if (ret) { + ERROR("Could not apply configuration to sub_device %d", + i); + goto err_remove; + } + } + } + ret = dev->dev_ops->dev_configure(dev); + if (ret) + goto err_remove; + FOREACH_SUBDEV(sdev, i, dev) { + if (inactive & (UINT32_C(1) << i)) { + ret = fs_eth_dev_conf_apply(dev, sdev); + if (ret) { + ERROR("Could not apply configuration to sub_device %d", + i); + goto err_remove; + } + } + } + /* + * If new devices have been configured, check if + * the link state has changed. + */ + if (inactive) + dev->dev_ops->link_update(dev, 1); + if (PRIV(dev)->state < DEV_STARTED) + return 0; + ret = dev->dev_ops->dev_start(dev); + if (ret) + goto err_remove; + ret = failsafe_eth_dev_rx_queues_sync(dev); + if (ret) + goto err_remove; + ret = failsafe_eth_dev_tx_queues_sync(dev); + if (ret) + goto err_remove; + return 0; +err_remove: + FOREACH_SUBDEV(sdev, i, dev) + if (sdev->state != PRIV(dev)->state) + sdev->remove = 1; + return ret; +} + +void +failsafe_stats_increment(struct rte_eth_stats *to, struct rte_eth_stats *from) +{ + uint32_t i; + + RTE_ASSERT(to != NULL && from != NULL); + to->ipackets += from->ipackets; + to->opackets += from->opackets; + to->ibytes += from->ibytes; + to->obytes += from->obytes; + to->imissed += from->imissed; + to->ierrors += from->ierrors; + to->oerrors += from->oerrors; + to->rx_nombuf += from->rx_nombuf; + for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS; i++) { + to->q_ipackets[i] += from->q_ipackets[i]; + to->q_opackets[i] += from->q_opackets[i]; + to->q_ibytes[i] += from->q_ibytes[i]; + to->q_obytes[i] += from->q_obytes[i]; + to->q_errors[i] += from->q_errors[i]; + } +} + +int +failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused, + enum rte_eth_event_type event __rte_unused, + void *cb_arg, void *out __rte_unused) +{ + struct sub_device *sdev = cb_arg; + + fs_lock(fs_dev(sdev), 0); + /* Switch as soon as possible tx_dev. */ + fs_switch_dev(fs_dev(sdev), sdev); + /* Use safe bursts in any case. */ + failsafe_set_burst_fn(fs_dev(sdev), 1); + /* + * Async removal, the sub-PMD will try to unregister + * the callback at the source of the current thread context. + */ + sdev->remove = 1; + fs_unlock(fs_dev(sdev), 0); + return 0; +} + +int +failsafe_eth_lsc_event_callback(uint16_t port_id __rte_unused, + enum rte_eth_event_type event __rte_unused, + void *cb_arg, void *out __rte_unused) +{ + struct rte_eth_dev *dev = cb_arg; + int ret; + + ret = dev->dev_ops->link_update(dev, 0); + /* We must pass on the LSC event */ + if (ret) + return _rte_eth_dev_callback_process(dev, + RTE_ETH_EVENT_INTR_LSC, + NULL); + else + return 0; +} + +/* Take sub-device ownership before it becomes exposed to the application. */ +int +failsafe_eth_new_event_callback(uint16_t port_id, + enum rte_eth_event_type event __rte_unused, + void *cb_arg, void *out __rte_unused) +{ + struct rte_eth_dev *fs_dev = cb_arg; + struct sub_device *sdev; + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + uint8_t i; + + FOREACH_SUBDEV_STATE(sdev, i, fs_dev, DEV_PARSED) { + if (sdev->state >= DEV_PROBED) + continue; + if (dev->device == NULL) { + WARN("Trying to probe malformed device %s.\n", + sdev->devargs.name); + continue; + } + if (strcmp(sdev->devargs.name, dev->device->name) != 0) + continue; + rte_eth_dev_owner_set(port_id, &PRIV(fs_dev)->my_owner); + /* The actual owner will be checked after the port probing. */ + break; + } + return 0; +} diff --git a/src/spdk/dpdk/drivers/net/failsafe/failsafe_flow.c b/src/spdk/dpdk/drivers/net/failsafe/failsafe_flow.c new file mode 100644 index 000000000..5e2b5f7c6 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/failsafe/failsafe_flow.c @@ -0,0 +1,255 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox Technologies, Ltd + */ + +#include <stddef.h> +#include <string.h> +#include <sys/queue.h> + +#include <rte_errno.h> +#include <rte_malloc.h> +#include <rte_tailq.h> +#include <rte_flow.h> +#include <rte_flow_driver.h> + +#include "failsafe_private.h" + +static struct rte_flow * +fs_flow_allocate(const struct rte_flow_attr *attr, + const struct rte_flow_item *items, + const struct rte_flow_action *actions) +{ + struct rte_flow *flow; + const struct rte_flow_conv_rule rule = { + .attr_ro = attr, + .pattern_ro = items, + .actions_ro = actions, + }; + struct rte_flow_error error; + int ret; + + ret = rte_flow_conv(RTE_FLOW_CONV_OP_RULE, NULL, 0, &rule, &error); + if (ret < 0) { + ERROR("Unable to process flow rule (%s): %s", + error.message ? error.message : "unspecified", + strerror(rte_errno)); + return NULL; + } + flow = rte_zmalloc(NULL, offsetof(struct rte_flow, rule) + ret, + RTE_CACHE_LINE_SIZE); + if (flow == NULL) { + ERROR("Could not allocate new flow"); + return NULL; + } + ret = rte_flow_conv(RTE_FLOW_CONV_OP_RULE, &flow->rule, ret, &rule, + &error); + if (ret < 0) { + ERROR("Failed to copy flow rule (%s): %s", + error.message ? error.message : "unspecified", + strerror(rte_errno)); + rte_free(flow); + return NULL; + } + return flow; +} + +static void +fs_flow_release(struct rte_flow **flow) +{ + rte_free(*flow); + *flow = NULL; +} + +static int +fs_flow_validate(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item patterns[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + DEBUG("Calling rte_flow_validate on sub_device %d", i); + ret = rte_flow_validate(PORT_ID(sdev), + attr, patterns, actions, error); + if ((ret = fs_err(sdev, ret))) { + ERROR("Operation rte_flow_validate failed for sub_device %d" + " with error %d", i, ret); + fs_unlock(dev, 0); + return ret; + } + } + fs_unlock(dev, 0); + return 0; +} + +static struct rte_flow * +fs_flow_create(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item patterns[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct sub_device *sdev; + struct rte_flow *flow; + uint8_t i; + + fs_lock(dev, 0); + flow = fs_flow_allocate(attr, patterns, actions); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + flow->flows[i] = rte_flow_create(PORT_ID(sdev), + attr, patterns, actions, error); + if (flow->flows[i] == NULL && fs_err(sdev, -rte_errno)) { + ERROR("Failed to create flow on sub_device %d", + i); + goto err; + } + } + TAILQ_INSERT_TAIL(&PRIV(dev)->flow_list, flow, next); + fs_unlock(dev, 0); + return flow; +err: + FOREACH_SUBDEV(sdev, i, dev) { + if (flow->flows[i] != NULL) + rte_flow_destroy(PORT_ID(sdev), + flow->flows[i], error); + } + fs_flow_release(&flow); + fs_unlock(dev, 0); + return NULL; +} + +static int +fs_flow_destroy(struct rte_eth_dev *dev, + struct rte_flow *flow, + struct rte_flow_error *error) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + + if (flow == NULL) { + ERROR("Invalid flow"); + return -EINVAL; + } + ret = 0; + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + int local_ret; + + if (flow->flows[i] == NULL) + continue; + local_ret = rte_flow_destroy(PORT_ID(sdev), + flow->flows[i], error); + if ((local_ret = fs_err(sdev, local_ret))) { + ERROR("Failed to destroy flow on sub_device %d: %d", + i, local_ret); + if (ret == 0) + ret = local_ret; + } + } + TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next); + fs_flow_release(&flow); + fs_unlock(dev, 0); + return ret; +} + +static int +fs_flow_flush(struct rte_eth_dev *dev, + struct rte_flow_error *error) +{ + struct sub_device *sdev; + struct rte_flow *flow; + void *tmp; + uint8_t i; + int ret; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + DEBUG("Calling rte_flow_flush on sub_device %d", i); + ret = rte_flow_flush(PORT_ID(sdev), error); + if ((ret = fs_err(sdev, ret))) { + ERROR("Operation rte_flow_flush failed for sub_device %d" + " with error %d", i, ret); + fs_unlock(dev, 0); + return ret; + } + } + TAILQ_FOREACH_SAFE(flow, &PRIV(dev)->flow_list, next, tmp) { + TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next); + fs_flow_release(&flow); + } + fs_unlock(dev, 0); + return 0; +} + +static int +fs_flow_query(struct rte_eth_dev *dev, + struct rte_flow *flow, + const struct rte_flow_action *action, + void *arg, + struct rte_flow_error *error) +{ + struct sub_device *sdev; + + fs_lock(dev, 0); + sdev = TX_SUBDEV(dev); + if (sdev != NULL) { + int ret = rte_flow_query(PORT_ID(sdev), + flow->flows[SUB_ID(sdev)], + action, arg, error); + + if ((ret = fs_err(sdev, ret))) { + fs_unlock(dev, 0); + return ret; + } + } + fs_unlock(dev, 0); + WARN("No active sub_device to query about its flow"); + return -1; +} + +static int +fs_flow_isolate(struct rte_eth_dev *dev, + int set, + struct rte_flow_error *error) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + + fs_lock(dev, 0); + FOREACH_SUBDEV(sdev, i, dev) { + if (sdev->state < DEV_PROBED) + continue; + DEBUG("Calling rte_flow_isolate on sub_device %d", i); + if (PRIV(dev)->flow_isolated != sdev->flow_isolated) + WARN("flow isolation mode of sub_device %d in incoherent state.", + i); + ret = rte_flow_isolate(PORT_ID(sdev), set, error); + if ((ret = fs_err(sdev, ret))) { + ERROR("Operation rte_flow_isolate failed for sub_device %d" + " with error %d", i, ret); + fs_unlock(dev, 0); + return ret; + } + sdev->flow_isolated = set; + } + PRIV(dev)->flow_isolated = set; + fs_unlock(dev, 0); + return 0; +} + +const struct rte_flow_ops fs_flow_ops = { + .validate = fs_flow_validate, + .create = fs_flow_create, + .destroy = fs_flow_destroy, + .flush = fs_flow_flush, + .query = fs_flow_query, + .isolate = fs_flow_isolate, +}; diff --git a/src/spdk/dpdk/drivers/net/failsafe/failsafe_intr.c b/src/spdk/dpdk/drivers/net/failsafe/failsafe_intr.c new file mode 100644 index 000000000..602c04033 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/failsafe/failsafe_intr.c @@ -0,0 +1,535 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018 Mellanox Technologies, Ltd + */ + +/** + * @file + * Interrupts handling for failsafe driver. + */ + +#if defined(LINUX) +#include <sys/epoll.h> +#endif +#include <unistd.h> + +#include <rte_alarm.h> +#include <rte_errno.h> +#include <rte_ethdev.h> +#include <rte_interrupts.h> +#include <rte_io.h> +#include <rte_service_component.h> + +#include "failsafe_private.h" + +#define NUM_RX_PROXIES (FAILSAFE_MAX_ETHPORTS * RTE_MAX_RXTX_INTR_VEC_ID) + + +/** + * Open an epoll file descriptor. + * + * @param flags + * Flags for defining epoll behavior. + * @return + * 0 on success, negative errno value otherwise. + */ +static int +fs_epoll_create1(int flags) +{ +#if defined(LINUX) + return epoll_create1(flags); +#elif defined(BSD) + RTE_SET_USED(flags); + return -ENOTSUP; +#endif +} + +/** + * Install failsafe Rx event proxy service. + * The Rx event proxy is the service that listens to Rx events from the + * subdevices and triggers failsafe Rx events accordingly. + * + * @param priv + * Pointer to failsafe private structure. + * @return + * 0 on success, negative errno value otherwise. + */ +static int +fs_rx_event_proxy_routine(void *data) +{ + struct fs_priv *priv; + struct rxq *rxq; + struct rte_epoll_event *events; + uint64_t u64; + int i, n; + int rc = 0; + + u64 = 1; + priv = data; + events = priv->rxp.evec; + n = rte_epoll_wait(priv->rxp.efd, events, NUM_RX_PROXIES, -1); + for (i = 0; i < n; i++) { + rxq = events[i].epdata.data; + if (rxq->enable_events && rxq->event_fd != -1) { + if (write(rxq->event_fd, &u64, sizeof(u64)) != + sizeof(u64)) { + ERROR("Failed to proxy Rx event to socket %d", + rxq->event_fd); + rc = -EIO; + } + } + } + return rc; +} + +/** + * Uninstall failsafe Rx event proxy service. + * + * @param priv + * Pointer to failsafe private structure. + */ +static void +fs_rx_event_proxy_service_uninstall(struct fs_priv *priv) +{ + /* Unregister the event service. */ + switch (priv->rxp.sstate) { + case SS_RUNNING: + rte_service_map_lcore_set(priv->rxp.sid, priv->rxp.scid, 0); + /* fall through */ + case SS_READY: + rte_service_runstate_set(priv->rxp.sid, 0); + rte_service_set_stats_enable(priv->rxp.sid, 0); + rte_service_component_runstate_set(priv->rxp.sid, 0); + /* fall through */ + case SS_REGISTERED: + rte_service_component_unregister(priv->rxp.sid); + /* fall through */ + default: + break; + } +} + +/** + * Install the failsafe Rx event proxy service. + * + * @param priv + * Pointer to failsafe private structure. + * @return + * 0 on success, negative errno value otherwise. + */ +static int +fs_rx_event_proxy_service_install(struct fs_priv *priv) +{ + struct rte_service_spec service; + int32_t num_service_cores; + int ret = 0; + + num_service_cores = rte_service_lcore_count(); + if (num_service_cores <= 0) { + ERROR("Failed to install Rx interrupts, " + "no service core found"); + return -ENOTSUP; + } + /* prepare service info */ + memset(&service, 0, sizeof(struct rte_service_spec)); + snprintf(service.name, sizeof(service.name), "%s_Rx_service", + priv->data->name); + service.socket_id = priv->data->numa_node; + service.callback = fs_rx_event_proxy_routine; + service.callback_userdata = priv; + + if (priv->rxp.sstate == SS_NO_SERVICE) { + uint32_t service_core_list[num_service_cores]; + + /* get a service core to work with */ + ret = rte_service_lcore_list(service_core_list, + num_service_cores); + if (ret <= 0) { + ERROR("Failed to install Rx interrupts, " + "service core list empty or corrupted"); + return -ENOTSUP; + } + priv->rxp.scid = service_core_list[0]; + ret = rte_service_lcore_add(priv->rxp.scid); + if (ret && ret != -EALREADY) { + ERROR("Failed adding service core"); + return ret; + } + /* service core may be in "stopped" state, start it */ + ret = rte_service_lcore_start(priv->rxp.scid); + if (ret && (ret != -EALREADY)) { + ERROR("Failed to install Rx interrupts, " + "service core not started"); + return ret; + } + /* register our service */ + int32_t ret = rte_service_component_register(&service, + &priv->rxp.sid); + if (ret) { + ERROR("service register() failed"); + return -ENOEXEC; + } + priv->rxp.sstate = SS_REGISTERED; + /* run the service */ + ret = rte_service_component_runstate_set(priv->rxp.sid, 1); + if (ret < 0) { + ERROR("Failed Setting component runstate\n"); + return ret; + } + ret = rte_service_set_stats_enable(priv->rxp.sid, 1); + if (ret < 0) { + ERROR("Failed enabling stats\n"); + return ret; + } + ret = rte_service_runstate_set(priv->rxp.sid, 1); + if (ret < 0) { + ERROR("Failed to run service\n"); + return ret; + } + priv->rxp.sstate = SS_READY; + /* map the service with the service core */ + ret = rte_service_map_lcore_set(priv->rxp.sid, + priv->rxp.scid, 1); + if (ret) { + ERROR("Failed to install Rx interrupts, " + "could not map service core"); + return ret; + } + priv->rxp.sstate = SS_RUNNING; + } + return 0; +} + +/** + * Install failsafe Rx event proxy subsystem. + * This is the way the failsafe PMD generates Rx events on behalf of its + * subdevices. + * + * @param priv + * Pointer to failsafe private structure. + * @return + * 0 on success, negative errno value otherwise and rte_errno is set. + */ +static int +fs_rx_event_proxy_install(struct fs_priv *priv) +{ + int rc = 0; + + /* + * Create the epoll fd and event vector for the proxy service to + * wait on for Rx events generated by the subdevices. + */ + priv->rxp.efd = fs_epoll_create1(0); + if (priv->rxp.efd < 0) { + rte_errno = errno; + ERROR("Failed to create epoll," + " Rx interrupts will not be supported"); + return -rte_errno; + } + priv->rxp.evec = calloc(NUM_RX_PROXIES, sizeof(*priv->rxp.evec)); + if (priv->rxp.evec == NULL) { + ERROR("Failed to allocate memory for event vectors," + " Rx interrupts will not be supported"); + rc = -ENOMEM; + goto error; + } + rc = fs_rx_event_proxy_service_install(priv); + if (rc < 0) + goto error; + return 0; +error: + if (priv->rxp.efd >= 0) { + close(priv->rxp.efd); + priv->rxp.efd = -1; + } + if (priv->rxp.evec != NULL) { + free(priv->rxp.evec); + priv->rxp.evec = NULL; + } + rte_errno = -rc; + return rc; +} + +/** + * RX Interrupt control per subdevice. + * + * @param sdev + * Pointer to sub-device structure. + * @param op + * The operation be performed for the vector. + * Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +static int +failsafe_eth_rx_intr_ctl_subdevice(struct sub_device *sdev, int op) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev *fsdev; + int epfd; + uint16_t pid; + uint16_t qid; + struct rxq *fsrxq; + int rc; + int ret = 0; + + fsdev = fs_dev(sdev); + if (sdev == NULL || (ETH(sdev) == NULL) || + fsdev == NULL || (PRIV(fsdev) == NULL)) { + ERROR("Called with invalid arguments"); + return -EINVAL; + } + dev = ETH(sdev); + epfd = PRIV(fsdev)->rxp.efd; + pid = PORT_ID(sdev); + + if (epfd <= 0) { + if (op == RTE_INTR_EVENT_ADD) { + ERROR("Proxy events are not initialized"); + return -EBADF; + } else { + return 0; + } + } + if (dev->data->nb_rx_queues > fsdev->data->nb_rx_queues) { + ERROR("subdevice has too many queues," + " Interrupts will not be enabled"); + return -E2BIG; + } + for (qid = 0; qid < dev->data->nb_rx_queues; qid++) { + fsrxq = fsdev->data->rx_queues[qid]; + rc = rte_eth_dev_rx_intr_ctl_q(pid, qid, epfd, + op, (void *)fsrxq); + if (rc) { + ERROR("rte_eth_dev_rx_intr_ctl_q failed for " + "port %d queue %d, epfd %d, error %d", + pid, qid, epfd, rc); + ret = rc; + } + } + return ret; +} + +/** + * Install Rx interrupts subsystem for a subdevice. + * This is a support for dynamically adding subdevices. + * + * @param sdev + * Pointer to subdevice structure. + * + * @return + * 0 on success, negative errno value otherwise and rte_errno is set. + */ +int failsafe_rx_intr_install_subdevice(struct sub_device *sdev) +{ + int rc; + int qid; + struct rte_eth_dev *fsdev; + struct rxq **rxq; + const struct rte_intr_conf *const intr_conf = + Ð(sdev)->data->dev_conf.intr_conf; + + fsdev = fs_dev(sdev); + rxq = (struct rxq **)fsdev->data->rx_queues; + if (intr_conf->rxq == 0) + return 0; + rc = failsafe_eth_rx_intr_ctl_subdevice(sdev, RTE_INTR_EVENT_ADD); + if (rc) + return rc; + /* enable interrupts on already-enabled queues */ + for (qid = 0; qid < ETH(sdev)->data->nb_rx_queues; qid++) { + if (rxq[qid]->enable_events) { + int ret = rte_eth_dev_rx_intr_enable(PORT_ID(sdev), + qid); + if (ret && (ret != -ENOTSUP)) { + ERROR("Failed to enable interrupts on " + "port %d queue %d", PORT_ID(sdev), qid); + rc = ret; + } + } + } + return rc; +} + +/** + * Uninstall Rx interrupts subsystem for a subdevice. + * This is a support for dynamically removing subdevices. + * + * @param sdev + * Pointer to subdevice structure. + * + * @return + * 0 on success, negative errno value otherwise and rte_errno is set. + */ +void failsafe_rx_intr_uninstall_subdevice(struct sub_device *sdev) +{ + int qid; + struct rte_eth_dev *fsdev; + struct rxq *fsrxq; + + fsdev = fs_dev(sdev); + for (qid = 0; qid < ETH(sdev)->data->nb_rx_queues; qid++) { + if (qid < fsdev->data->nb_rx_queues) { + fsrxq = fsdev->data->rx_queues[qid]; + if (fsrxq != NULL && fsrxq->enable_events) + rte_eth_dev_rx_intr_disable(PORT_ID(sdev), + qid); + } + } + failsafe_eth_rx_intr_ctl_subdevice(sdev, RTE_INTR_EVENT_DEL); +} + +/** + * Uninstall failsafe Rx event proxy. + * + * @param priv + * Pointer to failsafe private structure. + */ +static void +fs_rx_event_proxy_uninstall(struct fs_priv *priv) +{ + fs_rx_event_proxy_service_uninstall(priv); + if (priv->rxp.evec != NULL) { + free(priv->rxp.evec); + priv->rxp.evec = NULL; + } + if (priv->rxp.efd >= 0) { + close(priv->rxp.efd); + priv->rxp.efd = -1; + } +} + +/** + * Uninstall failsafe interrupt vector. + * + * @param priv + * Pointer to failsafe private structure. + */ +static void +fs_rx_intr_vec_uninstall(struct fs_priv *priv) +{ + struct rte_intr_handle *intr_handle; + + intr_handle = &priv->intr_handle; + if (intr_handle->intr_vec != NULL) { + free(intr_handle->intr_vec); + intr_handle->intr_vec = NULL; + } + intr_handle->nb_efd = 0; +} + +/** + * Installs failsafe interrupt vector to be registered with EAL later on. + * + * @param priv + * Pointer to failsafe private structure. + * + * @return + * 0 on success, negative errno value otherwise and rte_errno is set. + */ +static int +fs_rx_intr_vec_install(struct fs_priv *priv) +{ + unsigned int i; + unsigned int rxqs_n; + unsigned int n; + unsigned int count; + struct rte_intr_handle *intr_handle; + + rxqs_n = priv->data->nb_rx_queues; + n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); + count = 0; + intr_handle = &priv->intr_handle; + RTE_ASSERT(intr_handle->intr_vec == NULL); + /* Allocate the interrupt vector of the failsafe Rx proxy interrupts */ + intr_handle->intr_vec = malloc(n * sizeof(intr_handle->intr_vec[0])); + if (intr_handle->intr_vec == NULL) { + fs_rx_intr_vec_uninstall(priv); + rte_errno = ENOMEM; + ERROR("Failed to allocate memory for interrupt vector," + " Rx interrupts will not be supported"); + return -rte_errno; + } + for (i = 0; i < n; i++) { + struct rxq *rxq = priv->data->rx_queues[i]; + + /* Skip queues that cannot request interrupts. */ + if (rxq == NULL || rxq->event_fd < 0) { + /* Use invalid intr_vec[] index to disable entry. */ + intr_handle->intr_vec[i] = + RTE_INTR_VEC_RXTX_OFFSET + + RTE_MAX_RXTX_INTR_VEC_ID; + continue; + } + if (count >= RTE_MAX_RXTX_INTR_VEC_ID) { + rte_errno = E2BIG; + ERROR("Too many Rx queues for interrupt vector size" + " (%d), Rx interrupts cannot be enabled", + RTE_MAX_RXTX_INTR_VEC_ID); + fs_rx_intr_vec_uninstall(priv); + return -rte_errno; + } + intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count; + intr_handle->efds[count] = rxq->event_fd; + count++; + } + if (count == 0) { + fs_rx_intr_vec_uninstall(priv); + } else { + intr_handle->nb_efd = count; + intr_handle->efd_counter_size = sizeof(uint64_t); + } + return 0; +} + + +/** + * Uninstall failsafe Rx interrupts subsystem. + * + * @param priv + * Pointer to private structure. + * + * @return + * 0 on success, negative errno value otherwise and rte_errno is set. + */ +void +failsafe_rx_intr_uninstall(struct rte_eth_dev *dev) +{ + struct fs_priv *priv; + struct rte_intr_handle *intr_handle; + + priv = PRIV(dev); + intr_handle = &priv->intr_handle; + rte_intr_free_epoll_fd(intr_handle); + fs_rx_event_proxy_uninstall(priv); + fs_rx_intr_vec_uninstall(priv); + dev->intr_handle = NULL; +} + +/** + * Install failsafe Rx interrupts subsystem. + * + * @param priv + * Pointer to private structure. + * + * @return + * 0 on success, negative errno value otherwise and rte_errno is set. + */ +int +failsafe_rx_intr_install(struct rte_eth_dev *dev) +{ + struct fs_priv *priv = PRIV(dev); + const struct rte_intr_conf *const intr_conf = + &priv->data->dev_conf.intr_conf; + + if (intr_conf->rxq == 0 || dev->intr_handle != NULL) + return 0; + if (fs_rx_intr_vec_install(priv) < 0) + return -rte_errno; + if (fs_rx_event_proxy_install(priv) < 0) { + fs_rx_intr_vec_uninstall(priv); + return -rte_errno; + } + dev->intr_handle = &priv->intr_handle; + return 0; +} diff --git a/src/spdk/dpdk/drivers/net/failsafe/failsafe_ops.c b/src/spdk/dpdk/drivers/net/failsafe/failsafe_ops.c new file mode 100644 index 000000000..e1d08e46c --- /dev/null +++ b/src/spdk/dpdk/drivers/net/failsafe/failsafe_ops.c @@ -0,0 +1,1511 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox Technologies, Ltd + */ + +#include <stdbool.h> +#include <stdint.h> +#include <unistd.h> + +#include <rte_debug.h> +#include <rte_atomic.h> +#include <rte_ethdev_driver.h> +#include <rte_malloc.h> +#include <rte_flow.h> +#include <rte_cycles.h> +#include <rte_ethdev.h> +#include <rte_string_fns.h> + +#include "failsafe_private.h" + +static int +fs_dev_configure(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + + fs_lock(dev, 0); + FOREACH_SUBDEV(sdev, i, dev) { + int rmv_interrupt = 0; + int lsc_interrupt = 0; + int lsc_enabled; + + if (sdev->state != DEV_PROBED && + !(PRIV(dev)->alarm_lock == 0 && sdev->state == DEV_ACTIVE)) + continue; + + rmv_interrupt = ETH(sdev)->data->dev_flags & + RTE_ETH_DEV_INTR_RMV; + if (rmv_interrupt) { + DEBUG("Enabling RMV interrupts for sub_device %d", i); + dev->data->dev_conf.intr_conf.rmv = 1; + } else { + DEBUG("sub_device %d does not support RMV event", i); + } + lsc_enabled = dev->data->dev_conf.intr_conf.lsc; + lsc_interrupt = lsc_enabled && + (ETH(sdev)->data->dev_flags & + RTE_ETH_DEV_INTR_LSC); + if (lsc_interrupt) { + DEBUG("Enabling LSC interrupts for sub_device %d", i); + dev->data->dev_conf.intr_conf.lsc = 1; + } else if (lsc_enabled && !lsc_interrupt) { + DEBUG("Disabling LSC interrupts for sub_device %d", i); + dev->data->dev_conf.intr_conf.lsc = 0; + } + DEBUG("Configuring sub-device %d", i); + ret = rte_eth_dev_configure(PORT_ID(sdev), + dev->data->nb_rx_queues, + dev->data->nb_tx_queues, + &dev->data->dev_conf); + if (ret) { + if (!fs_err(sdev, ret)) + continue; + ERROR("Could not configure sub_device %d", i); + fs_unlock(dev, 0); + return ret; + } + if (rmv_interrupt && sdev->rmv_callback == 0) { + ret = rte_eth_dev_callback_register(PORT_ID(sdev), + RTE_ETH_EVENT_INTR_RMV, + failsafe_eth_rmv_event_callback, + sdev); + if (ret) + WARN("Failed to register RMV callback for sub_device %d", + SUB_ID(sdev)); + else + sdev->rmv_callback = 1; + } + dev->data->dev_conf.intr_conf.rmv = 0; + if (lsc_interrupt && sdev->lsc_callback == 0) { + ret = rte_eth_dev_callback_register(PORT_ID(sdev), + RTE_ETH_EVENT_INTR_LSC, + failsafe_eth_lsc_event_callback, + dev); + if (ret) + WARN("Failed to register LSC callback for sub_device %d", + SUB_ID(sdev)); + else + sdev->lsc_callback = 1; + } + dev->data->dev_conf.intr_conf.lsc = lsc_enabled; + sdev->state = DEV_ACTIVE; + } + if (PRIV(dev)->state < DEV_ACTIVE) + PRIV(dev)->state = DEV_ACTIVE; + fs_unlock(dev, 0); + return 0; +} + +static void +fs_set_queues_state_start(struct rte_eth_dev *dev) +{ + struct rxq *rxq; + struct txq *txq; + uint16_t i; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + if (rxq != NULL && !rxq->info.conf.rx_deferred_start) + dev->data->rx_queue_state[i] = + RTE_ETH_QUEUE_STATE_STARTED; + } + for (i = 0; i < dev->data->nb_tx_queues; i++) { + txq = dev->data->tx_queues[i]; + if (txq != NULL && !txq->info.conf.tx_deferred_start) + dev->data->tx_queue_state[i] = + RTE_ETH_QUEUE_STATE_STARTED; + } +} + +static int +fs_dev_start(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + + fs_lock(dev, 0); + ret = failsafe_rx_intr_install(dev); + if (ret) { + fs_unlock(dev, 0); + return ret; + } + FOREACH_SUBDEV(sdev, i, dev) { + if (sdev->state != DEV_ACTIVE) + continue; + DEBUG("Starting sub_device %d", i); + ret = rte_eth_dev_start(PORT_ID(sdev)); + if (ret) { + if (!fs_err(sdev, ret)) + continue; + fs_unlock(dev, 0); + return ret; + } + ret = failsafe_rx_intr_install_subdevice(sdev); + if (ret) { + if (!fs_err(sdev, ret)) + continue; + rte_eth_dev_stop(PORT_ID(sdev)); + fs_unlock(dev, 0); + return ret; + } + sdev->state = DEV_STARTED; + } + if (PRIV(dev)->state < DEV_STARTED) { + PRIV(dev)->state = DEV_STARTED; + fs_set_queues_state_start(dev); + } + fs_switch_dev(dev, NULL); + fs_unlock(dev, 0); + return 0; +} + +static void +fs_set_queues_state_stop(struct rte_eth_dev *dev) +{ + uint16_t i; + + for (i = 0; i < dev->data->nb_rx_queues; i++) + if (dev->data->rx_queues[i] != NULL) + dev->data->rx_queue_state[i] = + RTE_ETH_QUEUE_STATE_STOPPED; + for (i = 0; i < dev->data->nb_tx_queues; i++) + if (dev->data->tx_queues[i] != NULL) + dev->data->tx_queue_state[i] = + RTE_ETH_QUEUE_STATE_STOPPED; +} + +static void +fs_dev_stop(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + uint8_t i; + + fs_lock(dev, 0); + PRIV(dev)->state = DEV_STARTED - 1; + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_STARTED) { + rte_eth_dev_stop(PORT_ID(sdev)); + failsafe_rx_intr_uninstall_subdevice(sdev); + sdev->state = DEV_STARTED - 1; + } + failsafe_rx_intr_uninstall(dev); + fs_set_queues_state_stop(dev); + fs_unlock(dev, 0); +} + +static int +fs_dev_set_link_up(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i); + ret = rte_eth_dev_set_link_up(PORT_ID(sdev)); + if ((ret = fs_err(sdev, ret))) { + ERROR("Operation rte_eth_dev_set_link_up failed for sub_device %d" + " with error %d", i, ret); + fs_unlock(dev, 0); + return ret; + } + } + fs_unlock(dev, 0); + return 0; +} + +static int +fs_dev_set_link_down(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i); + ret = rte_eth_dev_set_link_down(PORT_ID(sdev)); + if ((ret = fs_err(sdev, ret))) { + ERROR("Operation rte_eth_dev_set_link_down failed for sub_device %d" + " with error %d", i, ret); + fs_unlock(dev, 0); + return ret; + } + } + fs_unlock(dev, 0); + return 0; +} + +static void fs_dev_free_queues(struct rte_eth_dev *dev); +static void +fs_dev_close(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + uint8_t i; + + fs_lock(dev, 0); + failsafe_hotplug_alarm_cancel(dev); + if (PRIV(dev)->state == DEV_STARTED) + dev->dev_ops->dev_stop(dev); + PRIV(dev)->state = DEV_ACTIVE - 1; + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + DEBUG("Closing sub_device %d", i); + failsafe_eth_dev_unregister_callbacks(sdev); + rte_eth_dev_close(PORT_ID(sdev)); + sdev->state = DEV_ACTIVE - 1; + } + fs_dev_free_queues(dev); + fs_unlock(dev, 0); +} + +static int +fs_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + int err = 0; + bool failure = true; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + uint16_t port_id = ETH(sdev)->data->port_id; + + ret = rte_eth_dev_rx_queue_stop(port_id, rx_queue_id); + ret = fs_err(sdev, ret); + if (ret) { + ERROR("Rx queue stop failed for subdevice %d", i); + err = ret; + } else { + failure = false; + } + } + dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED; + fs_unlock(dev, 0); + /* Return 0 in case of at least one successful queue stop */ + return (failure) ? err : 0; +} + +static int +fs_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + uint16_t port_id = ETH(sdev)->data->port_id; + + ret = rte_eth_dev_rx_queue_start(port_id, rx_queue_id); + ret = fs_err(sdev, ret); + if (ret) { + ERROR("Rx queue start failed for subdevice %d", i); + fs_rx_queue_stop(dev, rx_queue_id); + fs_unlock(dev, 0); + return ret; + } + } + dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED; + fs_unlock(dev, 0); + return 0; +} + +static int +fs_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + int err = 0; + bool failure = true; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + uint16_t port_id = ETH(sdev)->data->port_id; + + ret = rte_eth_dev_tx_queue_stop(port_id, tx_queue_id); + ret = fs_err(sdev, ret); + if (ret) { + ERROR("Tx queue stop failed for subdevice %d", i); + err = ret; + } else { + failure = false; + } + } + dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED; + fs_unlock(dev, 0); + /* Return 0 in case of at least one successful queue stop */ + return (failure) ? err : 0; +} + +static int +fs_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + uint16_t port_id = ETH(sdev)->data->port_id; + + ret = rte_eth_dev_tx_queue_start(port_id, tx_queue_id); + ret = fs_err(sdev, ret); + if (ret) { + ERROR("Tx queue start failed for subdevice %d", i); + fs_tx_queue_stop(dev, tx_queue_id); + fs_unlock(dev, 0); + return ret; + } + } + dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED; + fs_unlock(dev, 0); + return 0; +} + +static void +fs_rx_queue_release(void *queue) +{ + struct rte_eth_dev *dev; + struct sub_device *sdev; + uint8_t i; + struct rxq *rxq; + + if (queue == NULL) + return; + rxq = queue; + dev = &rte_eth_devices[rxq->priv->data->port_id]; + fs_lock(dev, 0); + if (rxq->event_fd >= 0) + close(rxq->event_fd); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + if (ETH(sdev)->data->rx_queues != NULL && + ETH(sdev)->data->rx_queues[rxq->qid] != NULL) { + SUBOPS(sdev, rx_queue_release) + (ETH(sdev)->data->rx_queues[rxq->qid]); + } + } + dev->data->rx_queues[rxq->qid] = NULL; + rte_free(rxq); + fs_unlock(dev, 0); +} + +static int +fs_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t rx_queue_id, + uint16_t nb_rx_desc, + unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mb_pool) +{ + /* + * FIXME: Add a proper interface in rte_eal_interrupts for + * allocating eventfd as an interrupt vector. + * For the time being, fake as if we are using MSIX interrupts, + * this will cause rte_intr_efd_enable to allocate an eventfd for us. + */ + struct rte_intr_handle intr_handle = { + .type = RTE_INTR_HANDLE_VFIO_MSIX, + .efds = { -1, }, + }; + struct sub_device *sdev; + struct rxq *rxq; + uint8_t i; + int ret; + + fs_lock(dev, 0); + if (rx_conf->rx_deferred_start) { + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) { + if (SUBOPS(sdev, rx_queue_start) == NULL) { + ERROR("Rx queue deferred start is not " + "supported for subdevice %d", i); + fs_unlock(dev, 0); + return -EINVAL; + } + } + } + rxq = dev->data->rx_queues[rx_queue_id]; + if (rxq != NULL) { + fs_rx_queue_release(rxq); + dev->data->rx_queues[rx_queue_id] = NULL; + } + rxq = rte_zmalloc(NULL, + sizeof(*rxq) + + sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail, + RTE_CACHE_LINE_SIZE); + if (rxq == NULL) { + fs_unlock(dev, 0); + return -ENOMEM; + } + FOREACH_SUBDEV(sdev, i, dev) + rte_atomic64_init(&rxq->refcnt[i]); + rxq->qid = rx_queue_id; + rxq->socket_id = socket_id; + rxq->info.mp = mb_pool; + rxq->info.conf = *rx_conf; + rxq->info.nb_desc = nb_rx_desc; + rxq->priv = PRIV(dev); + rxq->sdev = PRIV(dev)->subs; + ret = rte_intr_efd_enable(&intr_handle, 1); + if (ret < 0) { + fs_unlock(dev, 0); + return ret; + } + rxq->event_fd = intr_handle.efds[0]; + dev->data->rx_queues[rx_queue_id] = rxq; + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_rx_queue_setup(PORT_ID(sdev), + rx_queue_id, + nb_rx_desc, socket_id, + rx_conf, mb_pool); + if ((ret = fs_err(sdev, ret))) { + ERROR("RX queue setup failed for sub_device %d", i); + goto free_rxq; + } + } + fs_unlock(dev, 0); + return 0; +free_rxq: + fs_rx_queue_release(rxq); + fs_unlock(dev, 0); + return ret; +} + +static int +fs_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx) +{ + struct rxq *rxq; + struct sub_device *sdev; + uint8_t i; + int ret; + int rc = 0; + + fs_lock(dev, 0); + if (idx >= dev->data->nb_rx_queues) { + rc = -EINVAL; + goto unlock; + } + rxq = dev->data->rx_queues[idx]; + if (rxq == NULL || rxq->event_fd <= 0) { + rc = -EINVAL; + goto unlock; + } + /* Fail if proxy service is nor running. */ + if (PRIV(dev)->rxp.sstate != SS_RUNNING) { + ERROR("failsafe interrupt services are not running"); + rc = -EAGAIN; + goto unlock; + } + rxq->enable_events = 1; + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_dev_rx_intr_enable(PORT_ID(sdev), idx); + ret = fs_err(sdev, ret); + if (ret) + rc = ret; + } +unlock: + fs_unlock(dev, 0); + if (rc) + rte_errno = -rc; + return rc; +} + +static int +fs_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx) +{ + struct rxq *rxq; + struct sub_device *sdev; + uint64_t u64; + uint8_t i; + int rc = 0; + int ret; + + fs_lock(dev, 0); + if (idx >= dev->data->nb_rx_queues) { + rc = -EINVAL; + goto unlock; + } + rxq = dev->data->rx_queues[idx]; + if (rxq == NULL || rxq->event_fd <= 0) { + rc = -EINVAL; + goto unlock; + } + rxq->enable_events = 0; + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_dev_rx_intr_disable(PORT_ID(sdev), idx); + ret = fs_err(sdev, ret); + if (ret) + rc = ret; + } + /* Clear pending events */ + while (read(rxq->event_fd, &u64, sizeof(uint64_t)) > 0) + ; +unlock: + fs_unlock(dev, 0); + if (rc) + rte_errno = -rc; + return rc; +} + +static void +fs_tx_queue_release(void *queue) +{ + struct rte_eth_dev *dev; + struct sub_device *sdev; + uint8_t i; + struct txq *txq; + + if (queue == NULL) + return; + txq = queue; + dev = &rte_eth_devices[txq->priv->data->port_id]; + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + if (ETH(sdev)->data->tx_queues != NULL && + ETH(sdev)->data->tx_queues[txq->qid] != NULL) { + SUBOPS(sdev, tx_queue_release) + (ETH(sdev)->data->tx_queues[txq->qid]); + } + } + dev->data->tx_queues[txq->qid] = NULL; + rte_free(txq); + fs_unlock(dev, 0); +} + +static int +fs_tx_queue_setup(struct rte_eth_dev *dev, + uint16_t tx_queue_id, + uint16_t nb_tx_desc, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) +{ + struct sub_device *sdev; + struct txq *txq; + uint8_t i; + int ret; + + fs_lock(dev, 0); + if (tx_conf->tx_deferred_start) { + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) { + if (SUBOPS(sdev, tx_queue_start) == NULL) { + ERROR("Tx queue deferred start is not " + "supported for subdevice %d", i); + fs_unlock(dev, 0); + return -EINVAL; + } + } + } + txq = dev->data->tx_queues[tx_queue_id]; + if (txq != NULL) { + fs_tx_queue_release(txq); + dev->data->tx_queues[tx_queue_id] = NULL; + } + txq = rte_zmalloc("ethdev TX queue", + sizeof(*txq) + + sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail, + RTE_CACHE_LINE_SIZE); + if (txq == NULL) { + fs_unlock(dev, 0); + return -ENOMEM; + } + FOREACH_SUBDEV(sdev, i, dev) + rte_atomic64_init(&txq->refcnt[i]); + txq->qid = tx_queue_id; + txq->socket_id = socket_id; + txq->info.conf = *tx_conf; + txq->info.nb_desc = nb_tx_desc; + txq->priv = PRIV(dev); + dev->data->tx_queues[tx_queue_id] = txq; + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_tx_queue_setup(PORT_ID(sdev), + tx_queue_id, + nb_tx_desc, socket_id, + tx_conf); + if ((ret = fs_err(sdev, ret))) { + ERROR("TX queue setup failed for sub_device %d", i); + goto free_txq; + } + } + fs_unlock(dev, 0); + return 0; +free_txq: + fs_tx_queue_release(txq); + fs_unlock(dev, 0); + return ret; +} + +static void +fs_dev_free_queues(struct rte_eth_dev *dev) +{ + uint16_t i; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + fs_rx_queue_release(dev->data->rx_queues[i]); + dev->data->rx_queues[i] = NULL; + } + dev->data->nb_rx_queues = 0; + for (i = 0; i < dev->data->nb_tx_queues; i++) { + fs_tx_queue_release(dev->data->tx_queues[i]); + dev->data->tx_queues[i] = NULL; + } + dev->data->nb_tx_queues = 0; +} + +static int +fs_promiscuous_enable(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + uint8_t i; + int ret = 0; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_promiscuous_enable(PORT_ID(sdev)); + ret = fs_err(sdev, ret); + if (ret != 0) { + ERROR("Promiscuous mode enable failed for subdevice %d", + PORT_ID(sdev)); + break; + } + } + if (ret != 0) { + /* Rollback in the case of failure */ + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_promiscuous_disable(PORT_ID(sdev)); + ret = fs_err(sdev, ret); + if (ret != 0) + ERROR("Promiscuous mode disable during rollback failed for subdevice %d", + PORT_ID(sdev)); + } + } + fs_unlock(dev, 0); + + return ret; +} + +static int +fs_promiscuous_disable(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + uint8_t i; + int ret = 0; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_promiscuous_disable(PORT_ID(sdev)); + ret = fs_err(sdev, ret); + if (ret != 0) { + ERROR("Promiscuous mode disable failed for subdevice %d", + PORT_ID(sdev)); + break; + } + } + if (ret != 0) { + /* Rollback in the case of failure */ + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_promiscuous_enable(PORT_ID(sdev)); + ret = fs_err(sdev, ret); + if (ret != 0) + ERROR("Promiscuous mode enable during rollback failed for subdevice %d", + PORT_ID(sdev)); + } + } + fs_unlock(dev, 0); + + return ret; +} + +static int +fs_allmulticast_enable(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + uint8_t i; + int ret = 0; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_allmulticast_enable(PORT_ID(sdev)); + ret = fs_err(sdev, ret); + if (ret != 0) { + ERROR("All-multicast mode enable failed for subdevice %d", + PORT_ID(sdev)); + break; + } + } + if (ret != 0) { + /* Rollback in the case of failure */ + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_allmulticast_disable(PORT_ID(sdev)); + ret = fs_err(sdev, ret); + if (ret != 0) + ERROR("All-multicast mode disable during rollback failed for subdevice %d", + PORT_ID(sdev)); + } + } + fs_unlock(dev, 0); + + return ret; +} + +static int +fs_allmulticast_disable(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + uint8_t i; + int ret = 0; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_allmulticast_disable(PORT_ID(sdev)); + ret = fs_err(sdev, ret); + if (ret != 0) { + ERROR("All-multicast mode disable failed for subdevice %d", + PORT_ID(sdev)); + break; + } + } + if (ret != 0) { + /* Rollback in the case of failure */ + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_allmulticast_enable(PORT_ID(sdev)); + ret = fs_err(sdev, ret); + if (ret != 0) + ERROR("All-multicast mode enable during rollback failed for subdevice %d", + PORT_ID(sdev)); + } + } + fs_unlock(dev, 0); + + return ret; +} + +static int +fs_link_update(struct rte_eth_dev *dev, + int wait_to_complete) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + DEBUG("Calling link_update on sub_device %d", i); + ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete); + if (ret && ret != -1 && sdev->remove == 0 && + rte_eth_dev_is_removed(PORT_ID(sdev)) == 0) { + ERROR("Link update failed for sub_device %d with error %d", + i, ret); + fs_unlock(dev, 0); + return ret; + } + } + if (TX_SUBDEV(dev)) { + struct rte_eth_link *l1; + struct rte_eth_link *l2; + + l1 = &dev->data->dev_link; + l2 = Ð(TX_SUBDEV(dev))->data->dev_link; + if (memcmp(l1, l2, sizeof(*l1))) { + *l1 = *l2; + fs_unlock(dev, 0); + return 0; + } + } + fs_unlock(dev, 0); + return -1; +} + +static int +fs_stats_get(struct rte_eth_dev *dev, + struct rte_eth_stats *stats) +{ + struct rte_eth_stats backup; + struct sub_device *sdev; + uint8_t i; + int ret; + + fs_lock(dev, 0); + rte_memcpy(stats, &PRIV(dev)->stats_accumulator, sizeof(*stats)); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + struct rte_eth_stats *snapshot = &sdev->stats_snapshot.stats; + uint64_t *timestamp = &sdev->stats_snapshot.timestamp; + + rte_memcpy(&backup, snapshot, sizeof(backup)); + ret = rte_eth_stats_get(PORT_ID(sdev), snapshot); + if (ret) { + if (!fs_err(sdev, ret)) { + rte_memcpy(snapshot, &backup, sizeof(backup)); + goto inc; + } + ERROR("Operation rte_eth_stats_get failed for sub_device %d with error %d", + i, ret); + *timestamp = 0; + fs_unlock(dev, 0); + return ret; + } + *timestamp = rte_rdtsc(); +inc: + failsafe_stats_increment(stats, snapshot); + } + fs_unlock(dev, 0); + return 0; +} + +static int +fs_stats_reset(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_stats_reset(PORT_ID(sdev)); + if (ret) { + if (!fs_err(sdev, ret)) + continue; + + ERROR("Operation rte_eth_stats_reset failed for sub_device %d with error %d", + i, ret); + fs_unlock(dev, 0); + return ret; + } + memset(&sdev->stats_snapshot, 0, sizeof(struct rte_eth_stats)); + } + memset(&PRIV(dev)->stats_accumulator, 0, sizeof(struct rte_eth_stats)); + fs_unlock(dev, 0); + + return 0; +} + +static int +__fs_xstats_count(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + int count = 0; + uint8_t i; + int ret; + + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_xstats_get_names(PORT_ID(sdev), NULL, 0); + if (ret < 0) + return ret; + count += ret; + } + + return count; +} + +static int +__fs_xstats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, + unsigned int limit) +{ + struct sub_device *sdev; + unsigned int count = 0; + uint8_t i; + + /* Caller only cares about count */ + if (!xstats_names) + return __fs_xstats_count(dev); + + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + struct rte_eth_xstat_name *sub_names = xstats_names + count; + int j, r; + + if (count >= limit) + break; + + r = rte_eth_xstats_get_names(PORT_ID(sdev), + sub_names, limit - count); + if (r < 0) + return r; + + /* add subN_ prefix to names */ + for (j = 0; j < r; j++) { + char *xname = sub_names[j].name; + char tmp[RTE_ETH_XSTATS_NAME_SIZE]; + + if ((xname[0] == 't' || xname[0] == 'r') && + xname[1] == 'x' && xname[2] == '_') + snprintf(tmp, sizeof(tmp), "%.3ssub%u_%s", + xname, i, xname + 3); + else + snprintf(tmp, sizeof(tmp), "sub%u_%s", + i, xname); + + strlcpy(xname, tmp, RTE_ETH_XSTATS_NAME_SIZE); + } + count += r; + } + return count; +} + +static int +fs_xstats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, + unsigned int limit) +{ + int ret; + + fs_lock(dev, 0); + ret = __fs_xstats_get_names(dev, xstats_names, limit); + fs_unlock(dev, 0); + return ret; +} + +static int +__fs_xstats_get(struct rte_eth_dev *dev, + struct rte_eth_xstat *xstats, + unsigned int n) +{ + unsigned int count = 0; + struct sub_device *sdev; + uint8_t i; + int j, ret; + + ret = __fs_xstats_count(dev); + /* + * if error + * or caller did not give enough space + * or just querying + */ + if (ret < 0 || ret > (int)n || xstats == NULL) + return ret; + + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_xstats_get(PORT_ID(sdev), xstats, n); + if (ret < 0) + return ret; + + if (ret > (int)n) + return n + count; + + /* add offset to id's from sub-device */ + for (j = 0; j < ret; j++) + xstats[j].id += count; + + xstats += ret; + n -= ret; + count += ret; + } + + return count; +} + +static int +fs_xstats_get(struct rte_eth_dev *dev, + struct rte_eth_xstat *xstats, + unsigned int n) +{ + int ret; + + fs_lock(dev, 0); + ret = __fs_xstats_get(dev, xstats, n); + fs_unlock(dev, 0); + + return ret; +} + + +static int +fs_xstats_reset(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + uint8_t i; + int r = 0; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + r = rte_eth_xstats_reset(PORT_ID(sdev)); + if (r < 0) + break; + } + fs_unlock(dev, 0); + + return r; +} + +static void +fs_dev_merge_desc_lim(struct rte_eth_desc_lim *to, + const struct rte_eth_desc_lim *from) +{ + to->nb_max = RTE_MIN(to->nb_max, from->nb_max); + to->nb_min = RTE_MAX(to->nb_min, from->nb_min); + to->nb_align = RTE_MAX(to->nb_align, from->nb_align); + + to->nb_seg_max = RTE_MIN(to->nb_seg_max, from->nb_seg_max); + to->nb_mtu_seg_max = RTE_MIN(to->nb_mtu_seg_max, from->nb_mtu_seg_max); +} + +/* + * Merge the information from sub-devices. + * + * The reported values must be the common subset of all sub devices + */ +static void +fs_dev_merge_info(struct rte_eth_dev_info *info, + const struct rte_eth_dev_info *sinfo) +{ + info->max_rx_pktlen = RTE_MIN(info->max_rx_pktlen, sinfo->max_rx_pktlen); + info->max_rx_queues = RTE_MIN(info->max_rx_queues, sinfo->max_rx_queues); + info->max_tx_queues = RTE_MIN(info->max_tx_queues, sinfo->max_tx_queues); + info->max_mac_addrs = RTE_MIN(info->max_mac_addrs, sinfo->max_mac_addrs); + info->max_hash_mac_addrs = RTE_MIN(info->max_hash_mac_addrs, + sinfo->max_hash_mac_addrs); + info->max_vmdq_pools = RTE_MIN(info->max_vmdq_pools, sinfo->max_vmdq_pools); + info->max_vfs = RTE_MIN(info->max_vfs, sinfo->max_vfs); + + fs_dev_merge_desc_lim(&info->rx_desc_lim, &sinfo->rx_desc_lim); + fs_dev_merge_desc_lim(&info->tx_desc_lim, &sinfo->tx_desc_lim); + + info->rx_offload_capa &= sinfo->rx_offload_capa; + info->tx_offload_capa &= sinfo->tx_offload_capa; + info->rx_queue_offload_capa &= sinfo->rx_queue_offload_capa; + info->tx_queue_offload_capa &= sinfo->tx_queue_offload_capa; + info->flow_type_rss_offloads &= sinfo->flow_type_rss_offloads; + info->hash_key_size = RTE_MIN(info->hash_key_size, + sinfo->hash_key_size); +} + +/** + * Fail-safe dev_infos_get rules: + * + * No sub_device: + * Numerables: + * Use the maximum possible values for any field, so as not + * to impede any further configuration effort. + * Capabilities: + * Limits capabilities to those that are understood by the + * fail-safe PMD. This understanding stems from the fail-safe + * being capable of verifying that the related capability is + * expressed within the device configuration (struct rte_eth_conf). + * + * At least one probed sub_device: + * Numerables: + * Uses values from the active probed sub_device + * The rationale here is that if any sub_device is less capable + * (for example concerning the number of queues) than the active + * sub_device, then its subsequent configuration will fail. + * It is impossible to foresee this failure when the failing sub_device + * is supposed to be plugged-in later on, so the configuration process + * is the single point of failure and error reporting. + * Capabilities: + * Uses a logical AND of RX capabilities among + * all sub_devices and the default capabilities. + * Uses a logical AND of TX capabilities among + * the active probed sub_device and the default capabilities. + * Uses a logical AND of device capabilities among + * all sub_devices and the default capabilities. + * + */ +static int +fs_dev_infos_get(struct rte_eth_dev *dev, + struct rte_eth_dev_info *infos) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + + /* Use maximum upper bounds by default */ + infos->max_rx_pktlen = UINT32_MAX; + infos->max_rx_queues = RTE_MAX_QUEUES_PER_PORT; + infos->max_tx_queues = RTE_MAX_QUEUES_PER_PORT; + infos->max_mac_addrs = FAILSAFE_MAX_ETHADDR; + infos->max_hash_mac_addrs = UINT32_MAX; + infos->max_vfs = UINT16_MAX; + infos->max_vmdq_pools = UINT16_MAX; + infos->hash_key_size = UINT8_MAX; + + /* + * Set of capabilities that can be verified upon + * configuring a sub-device. + */ + infos->rx_offload_capa = + DEV_RX_OFFLOAD_VLAN_STRIP | + DEV_RX_OFFLOAD_IPV4_CKSUM | + DEV_RX_OFFLOAD_UDP_CKSUM | + DEV_RX_OFFLOAD_TCP_CKSUM | + DEV_RX_OFFLOAD_TCP_LRO | + DEV_RX_OFFLOAD_QINQ_STRIP | + DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM | + DEV_RX_OFFLOAD_MACSEC_STRIP | + DEV_RX_OFFLOAD_HEADER_SPLIT | + DEV_RX_OFFLOAD_VLAN_FILTER | + DEV_RX_OFFLOAD_VLAN_EXTEND | + DEV_RX_OFFLOAD_JUMBO_FRAME | + DEV_RX_OFFLOAD_SCATTER | + DEV_RX_OFFLOAD_TIMESTAMP | + DEV_RX_OFFLOAD_SECURITY; + + infos->rx_queue_offload_capa = + DEV_RX_OFFLOAD_VLAN_STRIP | + DEV_RX_OFFLOAD_IPV4_CKSUM | + DEV_RX_OFFLOAD_UDP_CKSUM | + DEV_RX_OFFLOAD_TCP_CKSUM | + DEV_RX_OFFLOAD_TCP_LRO | + DEV_RX_OFFLOAD_QINQ_STRIP | + DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM | + DEV_RX_OFFLOAD_MACSEC_STRIP | + DEV_RX_OFFLOAD_HEADER_SPLIT | + DEV_RX_OFFLOAD_VLAN_FILTER | + DEV_RX_OFFLOAD_VLAN_EXTEND | + DEV_RX_OFFLOAD_JUMBO_FRAME | + DEV_RX_OFFLOAD_SCATTER | + DEV_RX_OFFLOAD_TIMESTAMP | + DEV_RX_OFFLOAD_SECURITY; + + infos->tx_offload_capa = + DEV_TX_OFFLOAD_MULTI_SEGS | + DEV_TX_OFFLOAD_MBUF_FAST_FREE | + DEV_TX_OFFLOAD_IPV4_CKSUM | + DEV_TX_OFFLOAD_UDP_CKSUM | + DEV_TX_OFFLOAD_TCP_CKSUM | + DEV_TX_OFFLOAD_TCP_TSO; + + infos->flow_type_rss_offloads = + ETH_RSS_IP | + ETH_RSS_UDP | + ETH_RSS_TCP; + infos->dev_capa = + RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP | + RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP; + + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) { + struct rte_eth_dev_info sub_info; + + ret = rte_eth_dev_info_get(PORT_ID(sdev), &sub_info); + ret = fs_err(sdev, ret); + if (ret != 0) + return ret; + + fs_dev_merge_info(infos, &sub_info); + } + + return 0; +} + +static const uint32_t * +fs_dev_supported_ptypes_get(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + struct rte_eth_dev *edev; + const uint32_t *ret; + + fs_lock(dev, 0); + sdev = TX_SUBDEV(dev); + if (sdev == NULL) { + ret = NULL; + goto unlock; + } + edev = ETH(sdev); + /* ENOTSUP: counts as no supported ptypes */ + if (SUBOPS(sdev, dev_supported_ptypes_get) == NULL) { + ret = NULL; + goto unlock; + } + /* + * The API does not permit to do a clean AND of all ptypes, + * It is also incomplete by design and we do not really care + * to have a best possible value in this context. + * We just return the ptypes of the device of highest + * priority, usually the PREFERRED device. + */ + ret = SUBOPS(sdev, dev_supported_ptypes_get)(edev); +unlock: + fs_unlock(dev, 0); + return ret; +} + +static int +fs_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i); + ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu); + if ((ret = fs_err(sdev, ret))) { + ERROR("Operation rte_eth_dev_set_mtu failed for sub_device %d with error %d", + i, ret); + fs_unlock(dev, 0); + return ret; + } + } + fs_unlock(dev, 0); + return 0; +} + +static int +fs_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i); + ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on); + if ((ret = fs_err(sdev, ret))) { + ERROR("Operation rte_eth_dev_vlan_filter failed for sub_device %d" + " with error %d", i, ret); + fs_unlock(dev, 0); + return ret; + } + } + fs_unlock(dev, 0); + return 0; +} + +static int +fs_flow_ctrl_get(struct rte_eth_dev *dev, + struct rte_eth_fc_conf *fc_conf) +{ + struct sub_device *sdev; + int ret; + + fs_lock(dev, 0); + sdev = TX_SUBDEV(dev); + if (sdev == NULL) { + ret = 0; + goto unlock; + } + if (SUBOPS(sdev, flow_ctrl_get) == NULL) { + ret = -ENOTSUP; + goto unlock; + } + ret = SUBOPS(sdev, flow_ctrl_get)(ETH(sdev), fc_conf); +unlock: + fs_unlock(dev, 0); + return ret; +} + +static int +fs_flow_ctrl_set(struct rte_eth_dev *dev, + struct rte_eth_fc_conf *fc_conf) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i); + ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf); + if ((ret = fs_err(sdev, ret))) { + ERROR("Operation rte_eth_dev_flow_ctrl_set failed for sub_device %d" + " with error %d", i, ret); + fs_unlock(dev, 0); + return ret; + } + } + fs_unlock(dev, 0); + return 0; +} + +static void +fs_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index) +{ + struct sub_device *sdev; + uint8_t i; + + fs_lock(dev, 0); + /* No check: already done within the rte_eth_dev_mac_addr_remove + * call for the fail-safe device. + */ + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) + rte_eth_dev_mac_addr_remove(PORT_ID(sdev), + &dev->data->mac_addrs[index]); + PRIV(dev)->mac_addr_pool[index] = 0; + fs_unlock(dev, 0); +} + +static int +fs_mac_addr_add(struct rte_eth_dev *dev, + struct rte_ether_addr *mac_addr, + uint32_t index, + uint32_t vmdq) +{ + struct sub_device *sdev; + int ret; + uint8_t i; + + RTE_ASSERT(index < FAILSAFE_MAX_ETHADDR); + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq); + if ((ret = fs_err(sdev, ret))) { + ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %" + PRIu8 " with error %d", i, ret); + fs_unlock(dev, 0); + return ret; + } + } + if (index >= PRIV(dev)->nb_mac_addr) { + DEBUG("Growing mac_addrs array"); + PRIV(dev)->nb_mac_addr = index; + } + PRIV(dev)->mac_addr_pool[index] = vmdq; + fs_unlock(dev, 0); + return 0; +} + +static int +fs_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr); + ret = fs_err(sdev, ret); + if (ret) { + ERROR("Operation rte_eth_dev_mac_addr_set failed for sub_device %d with error %d", + i, ret); + fs_unlock(dev, 0); + return ret; + } + } + fs_unlock(dev, 0); + + return 0; +} + +static int +fs_set_mc_addr_list(struct rte_eth_dev *dev, + struct rte_ether_addr *mc_addr_set, uint32_t nb_mc_addr) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + void *mcast_addrs; + + fs_lock(dev, 0); + + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_dev_set_mc_addr_list(PORT_ID(sdev), + mc_addr_set, nb_mc_addr); + if (ret != 0) { + ERROR("Operation rte_eth_dev_set_mc_addr_list failed for sub_device %d with error %d", + i, ret); + goto rollback; + } + } + + mcast_addrs = rte_realloc(PRIV(dev)->mcast_addrs, + nb_mc_addr * sizeof(PRIV(dev)->mcast_addrs[0]), 0); + if (mcast_addrs == NULL && nb_mc_addr > 0) { + ret = -ENOMEM; + goto rollback; + } + rte_memcpy(mcast_addrs, mc_addr_set, + nb_mc_addr * sizeof(PRIV(dev)->mcast_addrs[0])); + PRIV(dev)->nb_mcast_addr = nb_mc_addr; + PRIV(dev)->mcast_addrs = mcast_addrs; + + fs_unlock(dev, 0); + return 0; + +rollback: + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + int rc = rte_eth_dev_set_mc_addr_list(PORT_ID(sdev), + PRIV(dev)->mcast_addrs, PRIV(dev)->nb_mcast_addr); + if (rc != 0) { + ERROR("Multicast MAC address list rollback for sub_device %d failed with error %d", + i, rc); + } + } + + fs_unlock(dev, 0); + return ret; +} + +static int +fs_rss_hash_update(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf) +{ + struct sub_device *sdev; + uint8_t i; + int ret; + + fs_lock(dev, 0); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + ret = rte_eth_dev_rss_hash_update(PORT_ID(sdev), rss_conf); + ret = fs_err(sdev, ret); + if (ret) { + ERROR("Operation rte_eth_dev_rss_hash_update" + " failed for sub_device %d with error %d", + i, ret); + fs_unlock(dev, 0); + return ret; + } + } + fs_unlock(dev, 0); + + return 0; +} + +static int +fs_filter_ctrl(struct rte_eth_dev *dev __rte_unused, + enum rte_filter_type type, + enum rte_filter_op op, + void *arg) +{ + if (type == RTE_ETH_FILTER_GENERIC && + op == RTE_ETH_FILTER_GET) { + *(const void **)arg = &fs_flow_ops; + return 0; + } + return -ENOTSUP; +} + +const struct eth_dev_ops failsafe_ops = { + .dev_configure = fs_dev_configure, + .dev_start = fs_dev_start, + .dev_stop = fs_dev_stop, + .dev_set_link_down = fs_dev_set_link_down, + .dev_set_link_up = fs_dev_set_link_up, + .dev_close = fs_dev_close, + .promiscuous_enable = fs_promiscuous_enable, + .promiscuous_disable = fs_promiscuous_disable, + .allmulticast_enable = fs_allmulticast_enable, + .allmulticast_disable = fs_allmulticast_disable, + .link_update = fs_link_update, + .stats_get = fs_stats_get, + .stats_reset = fs_stats_reset, + .xstats_get = fs_xstats_get, + .xstats_get_names = fs_xstats_get_names, + .xstats_reset = fs_xstats_reset, + .dev_infos_get = fs_dev_infos_get, + .dev_supported_ptypes_get = fs_dev_supported_ptypes_get, + .mtu_set = fs_mtu_set, + .vlan_filter_set = fs_vlan_filter_set, + .rx_queue_start = fs_rx_queue_start, + .rx_queue_stop = fs_rx_queue_stop, + .tx_queue_start = fs_tx_queue_start, + .tx_queue_stop = fs_tx_queue_stop, + .rx_queue_setup = fs_rx_queue_setup, + .tx_queue_setup = fs_tx_queue_setup, + .rx_queue_release = fs_rx_queue_release, + .tx_queue_release = fs_tx_queue_release, + .rx_queue_intr_enable = fs_rx_intr_enable, + .rx_queue_intr_disable = fs_rx_intr_disable, + .flow_ctrl_get = fs_flow_ctrl_get, + .flow_ctrl_set = fs_flow_ctrl_set, + .mac_addr_remove = fs_mac_addr_remove, + .mac_addr_add = fs_mac_addr_add, + .mac_addr_set = fs_mac_addr_set, + .set_mc_addr_list = fs_set_mc_addr_list, + .rss_hash_update = fs_rss_hash_update, + .filter_ctrl = fs_filter_ctrl, +}; diff --git a/src/spdk/dpdk/drivers/net/failsafe/failsafe_private.h b/src/spdk/dpdk/drivers/net/failsafe/failsafe_private.h new file mode 100644 index 000000000..651578a12 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/failsafe/failsafe_private.h @@ -0,0 +1,504 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox Technologies, Ltd + */ + +#ifndef _ETH_FAILSAFE_PRIVATE_H_ +#define _ETH_FAILSAFE_PRIVATE_H_ + +#include <stdint.h> +#include <sys/queue.h> +#include <pthread.h> + +#include <rte_atomic.h> +#include <rte_dev.h> +#include <rte_ethdev_driver.h> +#include <rte_devargs.h> +#include <rte_flow.h> +#include <rte_interrupts.h> + +#define FAILSAFE_DRIVER_NAME "Fail-safe PMD" +#define FAILSAFE_OWNER_NAME "Fail-safe" + +#define PMD_FAILSAFE_MAC_KVARG "mac" +#define PMD_FAILSAFE_HOTPLUG_POLL_KVARG "hotplug_poll" +#define PMD_FAILSAFE_PARAM_STRING \ + "dev(<ifc>)," \ + "exec(<shell command>)," \ + "fd(<fd number>)," \ + "mac=mac_addr," \ + "hotplug_poll=u64" \ + "" + +#define FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS 2000 + +#define FAILSAFE_MAX_ETHPORTS 2 +#define FAILSAFE_MAX_ETHADDR 128 + +#define DEVARGS_MAXLEN 4096 + +enum rxp_service_state { + SS_NO_SERVICE = 0, + SS_REGISTERED, + SS_READY, + SS_RUNNING, +}; + +/* TYPES */ + +struct rx_proxy { + /* epoll file descriptor */ + int efd; + /* event vector to be used by epoll */ + struct rte_epoll_event *evec; + /* rte service id */ + uint32_t sid; + /* service core id */ + uint32_t scid; + enum rxp_service_state sstate; +}; + +#define FS_RX_PROXY_INIT (struct rx_proxy){ \ + .efd = -1, \ + .evec = NULL, \ + .sid = 0, \ + .scid = 0, \ + .sstate = SS_NO_SERVICE, \ +} + +struct rxq { + struct fs_priv *priv; + uint16_t qid; + /* next sub_device to poll */ + struct sub_device *sdev; + unsigned int socket_id; + int event_fd; + unsigned int enable_events:1; + struct rte_eth_rxq_info info; + rte_atomic64_t refcnt[]; +}; + +struct txq { + struct fs_priv *priv; + uint16_t qid; + unsigned int socket_id; + struct rte_eth_txq_info info; + rte_atomic64_t refcnt[]; +}; + +struct rte_flow { + TAILQ_ENTRY(rte_flow) next; + /* sub_flows */ + struct rte_flow *flows[FAILSAFE_MAX_ETHPORTS]; + /* flow description for synchronization */ + struct rte_flow_conv_rule rule; + uint8_t rule_data[]; +}; + +enum dev_state { + DEV_UNDEFINED, + DEV_PARSED, + DEV_PROBED, + DEV_ACTIVE, + DEV_STARTED, +}; + +struct fs_stats { + struct rte_eth_stats stats; + uint64_t timestamp; +}; + +/* + * Allocated in shared memory. + */ +struct sub_device { + /* Exhaustive DPDK device description */ + struct sub_device *next; + struct rte_devargs devargs; + struct rte_bus *bus; /* for primary process only. */ + struct rte_device *dev; /* for primary process only. */ + uint8_t sid; + /* Device state machine */ + enum dev_state state; + /* Last stats snapshot passed to user */ + struct fs_stats stats_snapshot; + /* Some device are defined as a command line */ + char *cmdline; + /* Others are retrieved through a file descriptor */ + char *fd_str; + /* fail-safe device backreference */ + uint16_t fs_port_id; /* shared between processes */ + /* sub device port id*/ + uint16_t sdev_port_id; /* shared between processes */ + /* flag calling for recollection */ + volatile unsigned int remove:1; + /* flow isolation state */ + int flow_isolated:1; + /* RMV callback registration state */ + unsigned int rmv_callback:1; + /* LSC callback registration state */ + unsigned int lsc_callback:1; +}; + +/* + * This is referenced by eth_dev->data->dev_private + * This is shared between processes. + */ +struct fs_priv { + struct rte_eth_dev_data *data; /* backreference to shared data. */ + /* + * Set of sub_devices. + * subs[0] is the preferred device + * any other is just another slave + */ + struct sub_device *subs; /* shared between processes */ + uint8_t subs_head; /* if head == tail, no subs */ + uint8_t subs_tail; /* first invalid */ + uint8_t subs_tx; /* current emitting device */ + uint8_t current_probed; + /* flow mapping */ + TAILQ_HEAD(sub_flows, rte_flow) flow_list; + /* current number of mac_addr slots allocated. */ + uint32_t nb_mac_addr; + struct rte_ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR]; + uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR]; + uint32_t nb_mcast_addr; + struct rte_ether_addr *mcast_addrs; + /* current capabilities */ + struct rte_eth_dev_owner my_owner; /* Unique owner. */ + struct rte_intr_handle intr_handle; /* Port interrupt handle. */ + /* + * Fail-safe state machine. + * This level will be tracking state of the EAL and eth + * layer at large as defined by the user application. + * It will then steer the sub_devices toward the same + * synchronized state. + */ + enum dev_state state; + struct rte_eth_stats stats_accumulator; + /* + * Rx interrupts/events proxy. + * The PMD issues Rx events to the EAL on behalf of its subdevices, + * it does that by registering an event-fd for each of its queues with + * the EAL. A PMD service thread listens to all the Rx events from the + * subdevices, when an Rx event is issued by a subdevice it will be + * caught by this service with will trigger an Rx event in the + * appropriate failsafe Rx queue. + */ + struct rx_proxy rxp; + pthread_mutex_t hotplug_mutex; + /* Hot-plug mutex is locked by the alarm mechanism. */ + volatile unsigned int alarm_lock:1; + unsigned int pending_alarm:1; /* An alarm is pending */ + /* flow isolation state */ + int flow_isolated:1; +}; + +/* FAILSAFE_INTR */ + +int failsafe_rx_intr_install(struct rte_eth_dev *dev); +void failsafe_rx_intr_uninstall(struct rte_eth_dev *dev); +int failsafe_rx_intr_install_subdevice(struct sub_device *sdev); +void failsafe_rx_intr_uninstall_subdevice(struct sub_device *sdev); + +/* MISC */ + +int failsafe_hotplug_alarm_install(struct rte_eth_dev *dev); +int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev); + +/* RX / TX */ + +void failsafe_set_burst_fn(struct rte_eth_dev *dev, int force_safe); + +uint16_t failsafe_rx_burst(void *rxq, + struct rte_mbuf **rx_pkts, uint16_t nb_pkts); +uint16_t failsafe_tx_burst(void *txq, + struct rte_mbuf **tx_pkts, uint16_t nb_pkts); + +uint16_t failsafe_rx_burst_fast(void *rxq, + struct rte_mbuf **rx_pkts, uint16_t nb_pkts); +uint16_t failsafe_tx_burst_fast(void *txq, + struct rte_mbuf **tx_pkts, uint16_t nb_pkts); + +/* ARGS */ + +int failsafe_args_parse(struct rte_eth_dev *dev, const char *params); +void failsafe_args_free(struct rte_eth_dev *dev); +int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params); +int failsafe_args_parse_subs(struct rte_eth_dev *dev); + +/* EAL */ + +int failsafe_eal_init(struct rte_eth_dev *dev); +int failsafe_eal_uninit(struct rte_eth_dev *dev); + +/* ETH_DEV */ + +int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev); +void failsafe_eth_dev_unregister_callbacks(struct sub_device *sdev); +void failsafe_dev_remove(struct rte_eth_dev *dev); +void failsafe_stats_increment(struct rte_eth_stats *to, + struct rte_eth_stats *from); +int failsafe_eth_rmv_event_callback(uint16_t port_id, + enum rte_eth_event_type type, + void *arg, void *out); +int failsafe_eth_lsc_event_callback(uint16_t port_id, + enum rte_eth_event_type event, + void *cb_arg, void *out); +int failsafe_eth_new_event_callback(uint16_t port_id, + enum rte_eth_event_type event, + void *cb_arg, void *out); + +/* GLOBALS */ + +extern const char pmd_failsafe_driver_name[]; +extern const struct eth_dev_ops failsafe_ops; +extern const struct rte_flow_ops fs_flow_ops; +extern uint64_t failsafe_hotplug_poll; +extern int failsafe_mac_from_arg; + +/* HELPERS */ + +/* dev: (struct rte_eth_dev *) fail-safe device */ +#define PRIV(dev) \ + ((struct fs_priv *)(dev)->data->dev_private) + +/* sdev: (struct sub_device *) */ +#define ETH(sdev) \ + ((sdev)->sdev_port_id == RTE_MAX_ETHPORTS ? \ + NULL : &rte_eth_devices[(sdev)->sdev_port_id]) + +/* sdev: (struct sub_device *) */ +#define PORT_ID(sdev) \ + ((sdev)->sdev_port_id) + +/* sdev: (struct sub_device *) */ +#define SUB_ID(sdev) \ + ((sdev)->sid) + +/** + * Stateful iterator construct over fail-safe sub-devices: + * s: (struct sub_device *), iterator + * i: (uint8_t), increment + * dev: (struct rte_eth_dev *), fail-safe ethdev + * state: (enum dev_state), minimum acceptable device state + */ +#define FOREACH_SUBDEV_STATE(s, i, dev, state) \ + for (s = fs_find_next((dev), 0, state, &i); \ + s != NULL; \ + s = fs_find_next((dev), i + 1, state, &i)) + +/** + * Iterator construct over fail-safe sub-devices: + * s: (struct sub_device *), iterator + * i: (uint8_t), increment + * dev: (struct rte_eth_dev *), fail-safe ethdev + */ +#define FOREACH_SUBDEV(s, i, dev) \ + FOREACH_SUBDEV_STATE(s, i, dev, DEV_UNDEFINED) + +/* dev: (struct rte_eth_dev *) fail-safe device */ +#define PREFERRED_SUBDEV(dev) \ + (&PRIV(dev)->subs[0]) + +/* dev: (struct rte_eth_dev *) fail-safe device */ +#define TX_SUBDEV(dev) \ + (PRIV(dev)->subs_tx >= PRIV(dev)->subs_tail ? NULL \ + : (PRIV(dev)->subs[PRIV(dev)->subs_tx].state < DEV_PROBED ? NULL \ + : &PRIV(dev)->subs[PRIV(dev)->subs_tx])) + +/** + * s: (struct sub_device *) + * ops: (struct eth_dev_ops) member + */ +#define SUBOPS(s, ops) \ + (ETH(s)->dev_ops->ops) + +/** + * Atomic guard + */ + +/** + * a: (rte_atomic64_t) + */ +#define FS_ATOMIC_P(a) \ + rte_atomic64_set(&(a), 1) + +/** + * a: (rte_atomic64_t) + */ +#define FS_ATOMIC_V(a) \ + rte_atomic64_set(&(a), 0) + +/** + * s: (struct sub_device *) + * i: uint16_t qid + */ +#define FS_ATOMIC_RX(s, i) \ + rte_atomic64_read( \ + &((struct rxq *) \ + (fs_dev(s)->data->rx_queues[i]))->refcnt[(s)->sid]) +/** + * s: (struct sub_device *) + * i: uint16_t qid + */ +#define FS_ATOMIC_TX(s, i) \ + rte_atomic64_read( \ + &((struct txq *) \ + (fs_dev(s)->data->tx_queues[i]))->refcnt[(s)->sid]) + +#ifdef RTE_EXEC_ENV_FREEBSD +#define FS_THREADID_TYPE void* +#define FS_THREADID_FMT "p" +#else +#define FS_THREADID_TYPE unsigned long +#define FS_THREADID_FMT "lu" +#endif + +extern int failsafe_logtype; + +#define LOG__(l, m, ...) \ + rte_log(RTE_LOG_ ## l, failsafe_logtype, \ + "net_failsafe: " m "%c", __VA_ARGS__) + +#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n') +#define DEBUG(...) LOG_(DEBUG, __VA_ARGS__) +#define INFO(...) LOG_(INFO, __VA_ARGS__) +#define WARN(...) LOG_(WARNING, __VA_ARGS__) +#define ERROR(...) LOG_(ERR, __VA_ARGS__) + +/* inlined functions */ + +static inline struct sub_device * +fs_find_next(struct rte_eth_dev *dev, + uint8_t sid, + enum dev_state min_state, + uint8_t *sid_out) +{ + struct sub_device *subs; + uint8_t tail; + + subs = PRIV(dev)->subs; + tail = PRIV(dev)->subs_tail; + while (sid < tail) { + if (subs[sid].state >= min_state) + break; + sid++; + } + *sid_out = sid; + if (sid >= tail) + return NULL; + return &subs[sid]; +} + +static inline struct rte_eth_dev * +fs_dev(struct sub_device *sdev) { + return &rte_eth_devices[sdev->fs_port_id]; +} + +/* + * Lock hot-plug mutex. + * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism. + */ +static inline int +fs_lock(struct rte_eth_dev *dev, unsigned int is_alarm) +{ + int ret; + + if (is_alarm) { + ret = pthread_mutex_trylock(&PRIV(dev)->hotplug_mutex); + if (ret) { + DEBUG("Hot-plug mutex lock trying failed(%s), will try" + " again later...", strerror(ret)); + return ret; + } + PRIV(dev)->alarm_lock = 1; + } else { + ret = pthread_mutex_lock(&PRIV(dev)->hotplug_mutex); + if (ret) { + ERROR("Cannot lock mutex(%s)", strerror(ret)); + return ret; + } + } + return ret; +} + +/* + * Unlock hot-plug mutex. + * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism. + */ +static inline void +fs_unlock(struct rte_eth_dev *dev, unsigned int is_alarm) +{ + int ret; + + if (is_alarm) { + RTE_ASSERT(PRIV(dev)->alarm_lock == 1); + PRIV(dev)->alarm_lock = 0; + } + ret = pthread_mutex_unlock(&PRIV(dev)->hotplug_mutex); + if (ret) + ERROR("Cannot unlock hot-plug mutex(%s)", strerror(ret)); +} + +/* + * Switch emitting device. + * If banned is set, banned must not be considered for + * the role of emitting device. + */ +static inline void +fs_switch_dev(struct rte_eth_dev *dev, + struct sub_device *banned) +{ + struct sub_device *txd; + enum dev_state req_state; + + req_state = PRIV(dev)->state; + txd = TX_SUBDEV(dev); + if (PREFERRED_SUBDEV(dev)->state >= req_state && + PREFERRED_SUBDEV(dev) != banned) { + if (txd != PREFERRED_SUBDEV(dev) && + (txd == NULL || + (req_state == DEV_STARTED) || + (txd && txd->state < DEV_STARTED))) { + DEBUG("Switching tx_dev to preferred sub_device"); + PRIV(dev)->subs_tx = 0; + } + } else if ((txd && txd->state < req_state) || + txd == NULL || + txd == banned) { + struct sub_device *sdev = NULL; + uint8_t i; + + /* Using acceptable device */ + FOREACH_SUBDEV_STATE(sdev, i, dev, req_state) { + if (sdev == banned) + continue; + DEBUG("Switching tx_dev to sub_device %d", + i); + PRIV(dev)->subs_tx = i; + break; + } + if (i >= PRIV(dev)->subs_tail || sdev == NULL) { + DEBUG("No device ready, deactivating tx_dev"); + PRIV(dev)->subs_tx = PRIV(dev)->subs_tail; + } + } else { + return; + } + failsafe_set_burst_fn(dev, 0); + rte_wmb(); +} + +/* + * Adjust error value and rte_errno to the fail-safe actual error value. + */ +static inline int +fs_err(struct sub_device *sdev, int err) +{ + /* A device removal shouldn't be reported as an error. */ + if (sdev->remove == 1 || err == -EIO) + return rte_errno = 0; + return err; +} +#endif /* _ETH_FAILSAFE_PRIVATE_H_ */ diff --git a/src/spdk/dpdk/drivers/net/failsafe/failsafe_rxtx.c b/src/spdk/dpdk/drivers/net/failsafe/failsafe_rxtx.c new file mode 100644 index 000000000..fee08fa23 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/failsafe/failsafe_rxtx.c @@ -0,0 +1,178 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox Technologies, Ltd + */ + +#include <rte_atomic.h> +#include <rte_debug.h> +#include <rte_mbuf.h> +#include <rte_ethdev_driver.h> + +#include "failsafe_private.h" + +static inline int +fs_rx_unsafe(struct sub_device *sdev) +{ + return (ETH(sdev) == NULL) || + (ETH(sdev)->rx_pkt_burst == NULL) || + (sdev->state != DEV_STARTED) || + (sdev->remove != 0); +} + +static inline int +fs_tx_unsafe(struct sub_device *sdev) +{ + return (sdev == NULL) || + (ETH(sdev) == NULL) || + (ETH(sdev)->tx_pkt_burst == NULL) || + (sdev->state != DEV_STARTED); +} + +void +failsafe_set_burst_fn(struct rte_eth_dev *dev, int force_safe) +{ + struct sub_device *sdev; + uint8_t i; + int need_safe; + int safe_set; + + need_safe = force_safe; + FOREACH_SUBDEV(sdev, i, dev) + need_safe |= fs_rx_unsafe(sdev); + safe_set = (dev->rx_pkt_burst == &failsafe_rx_burst); + if (need_safe && !safe_set) { + DEBUG("Using safe RX bursts%s", + (force_safe ? " (forced)" : "")); + dev->rx_pkt_burst = &failsafe_rx_burst; + } else if (!need_safe && safe_set) { + DEBUG("Using fast RX bursts"); + dev->rx_pkt_burst = &failsafe_rx_burst_fast; + } + need_safe = force_safe || fs_tx_unsafe(TX_SUBDEV(dev)); + safe_set = (dev->tx_pkt_burst == &failsafe_tx_burst); + if (need_safe && !safe_set) { + DEBUG("Using safe TX bursts%s", + (force_safe ? " (forced)" : "")); + dev->tx_pkt_burst = &failsafe_tx_burst; + } else if (!need_safe && safe_set) { + DEBUG("Using fast TX bursts"); + dev->tx_pkt_burst = &failsafe_tx_burst_fast; + } + rte_wmb(); +} + +/* + * Override source port in Rx packets. + * + * Make Rx packets originate from this PMD instance instead of one of its + * sub-devices. This is mandatory to avoid breaking applications. + */ +static void +failsafe_rx_set_port(struct rte_mbuf **rx_pkts, uint16_t nb_pkts, uint16_t port) +{ + unsigned int i; + + for (i = 0; i != nb_pkts; ++i) + rx_pkts[i]->port = port; +} + +uint16_t +failsafe_rx_burst(void *queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct sub_device *sdev; + struct rxq *rxq; + void *sub_rxq; + uint16_t nb_rx; + + rxq = queue; + sdev = rxq->sdev; + do { + if (fs_rx_unsafe(sdev)) { + nb_rx = 0; + sdev = sdev->next; + continue; + } + sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid]; + FS_ATOMIC_P(rxq->refcnt[sdev->sid]); + nb_rx = ETH(sdev)-> + rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts); + FS_ATOMIC_V(rxq->refcnt[sdev->sid]); + sdev = sdev->next; + } while (nb_rx == 0 && sdev != rxq->sdev); + rxq->sdev = sdev; + if (nb_rx) + failsafe_rx_set_port(rx_pkts, nb_rx, + rxq->priv->data->port_id); + return nb_rx; +} + +uint16_t +failsafe_rx_burst_fast(void *queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct sub_device *sdev; + struct rxq *rxq; + void *sub_rxq; + uint16_t nb_rx; + + rxq = queue; + sdev = rxq->sdev; + do { + RTE_ASSERT(!fs_rx_unsafe(sdev)); + sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid]; + FS_ATOMIC_P(rxq->refcnt[sdev->sid]); + nb_rx = ETH(sdev)-> + rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts); + FS_ATOMIC_V(rxq->refcnt[sdev->sid]); + sdev = sdev->next; + } while (nb_rx == 0 && sdev != rxq->sdev); + rxq->sdev = sdev; + if (nb_rx) + failsafe_rx_set_port(rx_pkts, nb_rx, + rxq->priv->data->port_id); + return nb_rx; +} + +uint16_t +failsafe_tx_burst(void *queue, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + struct sub_device *sdev; + struct txq *txq; + void *sub_txq; + uint16_t nb_tx; + + txq = queue; + sdev = TX_SUBDEV(&rte_eth_devices[txq->priv->data->port_id]); + if (unlikely(fs_tx_unsafe(sdev))) + return 0; + sub_txq = ETH(sdev)->data->tx_queues[txq->qid]; + FS_ATOMIC_P(txq->refcnt[sdev->sid]); + nb_tx = ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts); + FS_ATOMIC_V(txq->refcnt[sdev->sid]); + return nb_tx; +} + +uint16_t +failsafe_tx_burst_fast(void *queue, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + struct sub_device *sdev; + struct txq *txq; + void *sub_txq; + uint16_t nb_tx; + + txq = queue; + sdev = TX_SUBDEV(&rte_eth_devices[txq->priv->data->port_id]); + RTE_ASSERT(!fs_tx_unsafe(sdev)); + sub_txq = ETH(sdev)->data->tx_queues[txq->qid]; + FS_ATOMIC_P(txq->refcnt[sdev->sid]); + nb_tx = ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts); + FS_ATOMIC_V(txq->refcnt[sdev->sid]); + return nb_tx; +} diff --git a/src/spdk/dpdk/drivers/net/failsafe/meson.build b/src/spdk/dpdk/drivers/net/failsafe/meson.build new file mode 100644 index 000000000..56010e212 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/failsafe/meson.build @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Intel Corporation + +cflags += '-std=gnu99' +cflags += '-D_DEFAULT_SOURCE' +cflags += '-D_XOPEN_SOURCE=700' +cflags += '-pedantic' +if is_linux + cflags += '-DLINUX' +else + cflags += '-DBSD' +endif + +sources = files('failsafe_args.c', + 'failsafe.c', + 'failsafe_eal.c', + 'failsafe_ether.c', + 'failsafe_flow.c', + 'failsafe_intr.c', + 'failsafe_ops.c', + 'failsafe_rxtx.c') diff --git a/src/spdk/dpdk/drivers/net/failsafe/rte_pmd_failsafe_version.map b/src/spdk/dpdk/drivers/net/failsafe/rte_pmd_failsafe_version.map new file mode 100644 index 000000000..f9f17e4f6 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/failsafe/rte_pmd_failsafe_version.map @@ -0,0 +1,3 @@ +DPDK_20.0 { + local: *; +}; |