summaryrefslogtreecommitdiffstats
path: root/src/seastar/dpdk/drivers/bus/vdev/vdev.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/seastar/dpdk/drivers/bus/vdev/vdev.c
parentInitial commit. (diff)
downloadceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz
ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/seastar/dpdk/drivers/bus/vdev/vdev.c')
-rw-r--r--src/seastar/dpdk/drivers/bus/vdev/vdev.c561
1 files changed, 561 insertions, 0 deletions
diff --git a/src/seastar/dpdk/drivers/bus/vdev/vdev.c b/src/seastar/dpdk/drivers/bus/vdev/vdev.c
new file mode 100644
index 000000000..04f76a63f
--- /dev/null
+++ b/src/seastar/dpdk/drivers/bus/vdev/vdev.c
@@ -0,0 +1,561 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 RehiveTech. All rights reserved.
+ */
+
+#include <string.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <sys/queue.h>
+
+#include <rte_eal.h>
+#include <rte_dev.h>
+#include <rte_bus.h>
+#include <rte_common.h>
+#include <rte_devargs.h>
+#include <rte_memory.h>
+#include <rte_tailq.h>
+#include <rte_spinlock.h>
+#include <rte_string_fns.h>
+#include <rte_errno.h>
+
+#include "rte_bus_vdev.h"
+#include "vdev_logs.h"
+#include "vdev_private.h"
+
+#define VDEV_MP_KEY "bus_vdev_mp"
+
+int vdev_logtype_bus;
+
+/* Forward declare to access virtual bus name */
+static struct rte_bus rte_vdev_bus;
+
+/** Double linked list of virtual device drivers. */
+TAILQ_HEAD(vdev_device_list, rte_vdev_device);
+
+static struct vdev_device_list vdev_device_list =
+ TAILQ_HEAD_INITIALIZER(vdev_device_list);
+/* The lock needs to be recursive because a vdev can manage another vdev. */
+static rte_spinlock_recursive_t vdev_device_list_lock =
+ RTE_SPINLOCK_RECURSIVE_INITIALIZER;
+
+static struct vdev_driver_list vdev_driver_list =
+ TAILQ_HEAD_INITIALIZER(vdev_driver_list);
+
+struct vdev_custom_scan {
+ TAILQ_ENTRY(vdev_custom_scan) next;
+ rte_vdev_scan_callback callback;
+ void *user_arg;
+};
+TAILQ_HEAD(vdev_custom_scans, vdev_custom_scan);
+static struct vdev_custom_scans vdev_custom_scans =
+ TAILQ_HEAD_INITIALIZER(vdev_custom_scans);
+static rte_spinlock_t vdev_custom_scan_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* register a driver */
+void
+rte_vdev_register(struct rte_vdev_driver *driver)
+{
+ TAILQ_INSERT_TAIL(&vdev_driver_list, driver, next);
+}
+
+/* unregister a driver */
+void
+rte_vdev_unregister(struct rte_vdev_driver *driver)
+{
+ TAILQ_REMOVE(&vdev_driver_list, driver, next);
+}
+
+int
+rte_vdev_add_custom_scan(rte_vdev_scan_callback callback, void *user_arg)
+{
+ struct vdev_custom_scan *custom_scan;
+
+ rte_spinlock_lock(&vdev_custom_scan_lock);
+
+ /* check if already registered */
+ TAILQ_FOREACH(custom_scan, &vdev_custom_scans, next) {
+ if (custom_scan->callback == callback &&
+ custom_scan->user_arg == user_arg)
+ break;
+ }
+
+ if (custom_scan == NULL) {
+ custom_scan = malloc(sizeof(struct vdev_custom_scan));
+ if (custom_scan != NULL) {
+ custom_scan->callback = callback;
+ custom_scan->user_arg = user_arg;
+ TAILQ_INSERT_TAIL(&vdev_custom_scans, custom_scan, next);
+ }
+ }
+
+ rte_spinlock_unlock(&vdev_custom_scan_lock);
+
+ return (custom_scan == NULL) ? -1 : 0;
+}
+
+int
+rte_vdev_remove_custom_scan(rte_vdev_scan_callback callback, void *user_arg)
+{
+ struct vdev_custom_scan *custom_scan, *tmp_scan;
+
+ rte_spinlock_lock(&vdev_custom_scan_lock);
+ TAILQ_FOREACH_SAFE(custom_scan, &vdev_custom_scans, next, tmp_scan) {
+ if (custom_scan->callback != callback ||
+ (custom_scan->user_arg != (void *)-1 &&
+ custom_scan->user_arg != user_arg))
+ continue;
+ TAILQ_REMOVE(&vdev_custom_scans, custom_scan, next);
+ free(custom_scan);
+ }
+ rte_spinlock_unlock(&vdev_custom_scan_lock);
+
+ return 0;
+}
+
+static int
+vdev_parse(const char *name, void *addr)
+{
+ struct rte_vdev_driver **out = addr;
+ struct rte_vdev_driver *driver = NULL;
+
+ TAILQ_FOREACH(driver, &vdev_driver_list, next) {
+ if (strncmp(driver->driver.name, name,
+ strlen(driver->driver.name)) == 0)
+ break;
+ if (driver->driver.alias &&
+ strncmp(driver->driver.alias, name,
+ strlen(driver->driver.alias)) == 0)
+ break;
+ }
+ if (driver != NULL &&
+ addr != NULL)
+ *out = driver;
+ return driver == NULL;
+}
+
+static int
+vdev_probe_all_drivers(struct rte_vdev_device *dev)
+{
+ const char *name;
+ struct rte_vdev_driver *driver;
+ int ret;
+
+ if (rte_dev_is_probed(&dev->device))
+ return -EEXIST;
+
+ name = rte_vdev_device_name(dev);
+ VDEV_LOG(DEBUG, "Search driver to probe device %s", name);
+
+ if (vdev_parse(name, &driver))
+ return -1;
+ ret = driver->probe(dev);
+ if (ret == 0)
+ dev->device.driver = &driver->driver;
+ return ret;
+}
+
+/* The caller shall be responsible for thread-safe */
+static struct rte_vdev_device *
+find_vdev(const char *name)
+{
+ struct rte_vdev_device *dev;
+
+ if (!name)
+ return NULL;
+
+ TAILQ_FOREACH(dev, &vdev_device_list, next) {
+ const char *devname = rte_vdev_device_name(dev);
+
+ if (!strcmp(devname, name))
+ return dev;
+ }
+
+ return NULL;
+}
+
+static struct rte_devargs *
+alloc_devargs(const char *name, const char *args)
+{
+ struct rte_devargs *devargs;
+ int ret;
+
+ devargs = calloc(1, sizeof(*devargs));
+ if (!devargs)
+ return NULL;
+
+ devargs->bus = &rte_vdev_bus;
+ if (args)
+ devargs->args = strdup(args);
+ else
+ devargs->args = strdup("");
+
+ ret = strlcpy(devargs->name, name, sizeof(devargs->name));
+ if (ret < 0 || ret >= (int)sizeof(devargs->name)) {
+ free(devargs->args);
+ free(devargs);
+ return NULL;
+ }
+
+ return devargs;
+}
+
+static int
+insert_vdev(const char *name, const char *args,
+ struct rte_vdev_device **p_dev,
+ bool init)
+{
+ struct rte_vdev_device *dev;
+ struct rte_devargs *devargs;
+ int ret;
+
+ if (name == NULL)
+ return -EINVAL;
+
+ devargs = alloc_devargs(name, args);
+ if (!devargs)
+ return -ENOMEM;
+
+ dev = calloc(1, sizeof(*dev));
+ if (!dev) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ dev->device.bus = &rte_vdev_bus;
+ dev->device.numa_node = SOCKET_ID_ANY;
+ dev->device.name = devargs->name;
+
+ if (find_vdev(name)) {
+ /*
+ * A vdev is expected to have only one port.
+ * So there is no reason to try probing again,
+ * even with new arguments.
+ */
+ ret = -EEXIST;
+ goto fail;
+ }
+
+ if (init)
+ rte_devargs_insert(&devargs);
+ dev->device.devargs = devargs;
+ TAILQ_INSERT_TAIL(&vdev_device_list, dev, next);
+
+ if (p_dev)
+ *p_dev = dev;
+
+ return 0;
+fail:
+ free(devargs->args);
+ free(devargs);
+ free(dev);
+ return ret;
+}
+
+int
+rte_vdev_init(const char *name, const char *args)
+{
+ struct rte_vdev_device *dev;
+ int ret;
+
+ rte_spinlock_recursive_lock(&vdev_device_list_lock);
+ ret = insert_vdev(name, args, &dev, true);
+ if (ret == 0) {
+ ret = vdev_probe_all_drivers(dev);
+ if (ret) {
+ if (ret > 0)
+ VDEV_LOG(ERR, "no driver found for %s", name);
+ /* If fails, remove it from vdev list */
+ TAILQ_REMOVE(&vdev_device_list, dev, next);
+ rte_devargs_remove(dev->device.devargs);
+ free(dev);
+ }
+ }
+ rte_spinlock_recursive_unlock(&vdev_device_list_lock);
+ return ret;
+}
+
+static int
+vdev_remove_driver(struct rte_vdev_device *dev)
+{
+ const char *name = rte_vdev_device_name(dev);
+ const struct rte_vdev_driver *driver;
+
+ if (!dev->device.driver) {
+ VDEV_LOG(DEBUG, "no driver attach to device %s", name);
+ return 1;
+ }
+
+ driver = container_of(dev->device.driver, const struct rte_vdev_driver,
+ driver);
+ return driver->remove(dev);
+}
+
+int
+rte_vdev_uninit(const char *name)
+{
+ struct rte_vdev_device *dev;
+ int ret;
+
+ if (name == NULL)
+ return -EINVAL;
+
+ rte_spinlock_recursive_lock(&vdev_device_list_lock);
+
+ dev = find_vdev(name);
+ if (!dev) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+
+ ret = vdev_remove_driver(dev);
+ if (ret)
+ goto unlock;
+
+ TAILQ_REMOVE(&vdev_device_list, dev, next);
+ rte_devargs_remove(dev->device.devargs);
+ free(dev);
+
+unlock:
+ rte_spinlock_recursive_unlock(&vdev_device_list_lock);
+ return ret;
+}
+
+struct vdev_param {
+#define VDEV_SCAN_REQ 1
+#define VDEV_SCAN_ONE 2
+#define VDEV_SCAN_REP 3
+ int type;
+ int num;
+ char name[RTE_DEV_NAME_MAX_LEN];
+};
+
+static int vdev_plug(struct rte_device *dev);
+
+/**
+ * This function works as the action for both primary and secondary process
+ * for static vdev discovery when a secondary process is booting.
+ *
+ * step 1, secondary process sends a sync request to ask for vdev in primary;
+ * step 2, primary process receives the request, and send vdevs one by one;
+ * step 3, primary process sends back reply, which indicates how many vdevs
+ * are sent.
+ */
+static int
+vdev_action(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+ struct rte_vdev_device *dev;
+ struct rte_mp_msg mp_resp;
+ struct vdev_param *ou = (struct vdev_param *)&mp_resp.param;
+ const struct vdev_param *in = (const struct vdev_param *)mp_msg->param;
+ const char *devname;
+ int num;
+ int ret;
+
+ strlcpy(mp_resp.name, VDEV_MP_KEY, sizeof(mp_resp.name));
+ mp_resp.len_param = sizeof(*ou);
+ mp_resp.num_fds = 0;
+
+ switch (in->type) {
+ case VDEV_SCAN_REQ:
+ ou->type = VDEV_SCAN_ONE;
+ ou->num = 1;
+ num = 0;
+
+ rte_spinlock_recursive_lock(&vdev_device_list_lock);
+ TAILQ_FOREACH(dev, &vdev_device_list, next) {
+ devname = rte_vdev_device_name(dev);
+ if (strlen(devname) == 0) {
+ VDEV_LOG(INFO, "vdev with no name is not sent");
+ continue;
+ }
+ VDEV_LOG(INFO, "send vdev, %s", devname);
+ strlcpy(ou->name, devname, RTE_DEV_NAME_MAX_LEN);
+ if (rte_mp_sendmsg(&mp_resp) < 0)
+ VDEV_LOG(ERR, "send vdev, %s, failed, %s",
+ devname, strerror(rte_errno));
+ num++;
+ }
+ rte_spinlock_recursive_unlock(&vdev_device_list_lock);
+
+ ou->type = VDEV_SCAN_REP;
+ ou->num = num;
+ if (rte_mp_reply(&mp_resp, peer) < 0)
+ VDEV_LOG(ERR, "Failed to reply a scan request");
+ break;
+ case VDEV_SCAN_ONE:
+ VDEV_LOG(INFO, "receive vdev, %s", in->name);
+ ret = insert_vdev(in->name, NULL, NULL, false);
+ if (ret == -EEXIST)
+ VDEV_LOG(DEBUG, "device already exist, %s", in->name);
+ else if (ret < 0)
+ VDEV_LOG(ERR, "failed to add vdev, %s", in->name);
+ break;
+ default:
+ VDEV_LOG(ERR, "vdev cannot recognize this message");
+ }
+
+ return 0;
+}
+
+static int
+vdev_scan(void)
+{
+ struct rte_vdev_device *dev;
+ struct rte_devargs *devargs;
+ struct vdev_custom_scan *custom_scan;
+
+ if (rte_mp_action_register(VDEV_MP_KEY, vdev_action) < 0 &&
+ rte_errno != EEXIST) {
+ VDEV_LOG(ERR, "Failed to add vdev mp action");
+ return -1;
+ }
+
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ struct rte_mp_msg mp_req, *mp_rep;
+ struct rte_mp_reply mp_reply;
+ struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+ struct vdev_param *req = (struct vdev_param *)mp_req.param;
+ struct vdev_param *resp;
+
+ strlcpy(mp_req.name, VDEV_MP_KEY, sizeof(mp_req.name));
+ mp_req.len_param = sizeof(*req);
+ mp_req.num_fds = 0;
+ req->type = VDEV_SCAN_REQ;
+ if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+ mp_reply.nb_received == 1) {
+ mp_rep = &mp_reply.msgs[0];
+ resp = (struct vdev_param *)mp_rep->param;
+ VDEV_LOG(INFO, "Received %d vdevs", resp->num);
+ free(mp_reply.msgs);
+ } else
+ VDEV_LOG(ERR, "Failed to request vdev from primary");
+
+ /* Fall through to allow private vdevs in secondary process */
+ }
+
+ /* call custom scan callbacks if any */
+ rte_spinlock_lock(&vdev_custom_scan_lock);
+ TAILQ_FOREACH(custom_scan, &vdev_custom_scans, next) {
+ if (custom_scan->callback != NULL)
+ /*
+ * the callback should update devargs list
+ * by calling rte_devargs_insert() with
+ * devargs.bus = rte_bus_find_by_name("vdev");
+ * devargs.type = RTE_DEVTYPE_VIRTUAL;
+ * devargs.policy = RTE_DEV_WHITELISTED;
+ */
+ custom_scan->callback(custom_scan->user_arg);
+ }
+ rte_spinlock_unlock(&vdev_custom_scan_lock);
+
+ /* for virtual devices we scan the devargs_list populated via cmdline */
+ RTE_EAL_DEVARGS_FOREACH("vdev", devargs) {
+
+ dev = calloc(1, sizeof(*dev));
+ if (!dev)
+ return -1;
+
+ rte_spinlock_recursive_lock(&vdev_device_list_lock);
+
+ if (find_vdev(devargs->name)) {
+ rte_spinlock_recursive_unlock(&vdev_device_list_lock);
+ free(dev);
+ continue;
+ }
+
+ dev->device.bus = &rte_vdev_bus;
+ dev->device.devargs = devargs;
+ dev->device.numa_node = SOCKET_ID_ANY;
+ dev->device.name = devargs->name;
+
+ TAILQ_INSERT_TAIL(&vdev_device_list, dev, next);
+
+ rte_spinlock_recursive_unlock(&vdev_device_list_lock);
+ }
+
+ return 0;
+}
+
+static int
+vdev_probe(void)
+{
+ struct rte_vdev_device *dev;
+ int r, ret = 0;
+
+ /* call the init function for each virtual device */
+ TAILQ_FOREACH(dev, &vdev_device_list, next) {
+ /* we don't use the vdev lock here, as it's only used in DPDK
+ * initialization; and we don't want to hold such a lock when
+ * we call each driver probe.
+ */
+
+ r = vdev_probe_all_drivers(dev);
+ if (r != 0) {
+ if (r == -EEXIST)
+ continue;
+ VDEV_LOG(ERR, "failed to initialize %s device",
+ rte_vdev_device_name(dev));
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
+
+struct rte_device *
+rte_vdev_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
+ const void *data)
+{
+ const struct rte_vdev_device *vstart;
+ struct rte_vdev_device *dev;
+
+ rte_spinlock_recursive_lock(&vdev_device_list_lock);
+ if (start != NULL) {
+ vstart = RTE_DEV_TO_VDEV_CONST(start);
+ dev = TAILQ_NEXT(vstart, next);
+ } else {
+ dev = TAILQ_FIRST(&vdev_device_list);
+ }
+ while (dev != NULL) {
+ if (cmp(&dev->device, data) == 0)
+ break;
+ dev = TAILQ_NEXT(dev, next);
+ }
+ rte_spinlock_recursive_unlock(&vdev_device_list_lock);
+
+ return dev ? &dev->device : NULL;
+}
+
+static int
+vdev_plug(struct rte_device *dev)
+{
+ return vdev_probe_all_drivers(RTE_DEV_TO_VDEV(dev));
+}
+
+static int
+vdev_unplug(struct rte_device *dev)
+{
+ return rte_vdev_uninit(dev->name);
+}
+
+static struct rte_bus rte_vdev_bus = {
+ .scan = vdev_scan,
+ .probe = vdev_probe,
+ .find_device = rte_vdev_find_device,
+ .plug = vdev_plug,
+ .unplug = vdev_unplug,
+ .parse = vdev_parse,
+ .dev_iterate = rte_vdev_dev_iterate,
+};
+
+RTE_REGISTER_BUS(vdev, rte_vdev_bus);
+
+RTE_INIT(vdev_init_log)
+{
+ vdev_logtype_bus = rte_log_register("bus.vdev");
+ if (vdev_logtype_bus >= 0)
+ rte_log_set_level(vdev_logtype_bus, RTE_LOG_NOTICE);
+}