summaryrefslogtreecommitdiffstats
path: root/src/spdk/dpdk/lib/librte_eal/common
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/spdk/dpdk/lib/librte_eal/common
parentInitial commit. (diff)
downloadceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz
ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/dpdk/lib/librte_eal/common')
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_bus.c279
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_class.c62
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_cpuflags.c39
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_dev.c793
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_devargs.c403
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_errno.c50
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_fbarray.c1510
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_hexdump.c77
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_hypervisor.c22
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_launch.c92
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_lcore.c211
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_log.c481
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_mcfg.c170
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_memalloc.c363
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_memory.c939
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_memzone.c420
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_options.c1861
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_proc.c1217
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_string_fns.c66
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_tailqs.c171
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_thread.c230
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_timer.c116
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_trace.c498
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_ctf.c488
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_points.c115
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_utils.c448
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_uuid.c167
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_filesystem.h107
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_hugepages.h40
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_internal_cfg.h91
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_memalloc.h96
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_memcfg.h98
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_options.h105
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_private.h423
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_thread.h60
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_trace.h120
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.c465
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.h55
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/malloc_elem.c682
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/malloc_elem.h190
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/malloc_heap.c1367
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/malloc_heap.h107
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/malloc_mp.c751
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/malloc_mp.h86
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/meson.build58
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/rte_keepalive.c162
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/rte_malloc.c668
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/rte_random.c211
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/rte_reciprocal.c122
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/rte_service.c919
50 files changed, 18271 insertions, 0 deletions
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_bus.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_bus.c
new file mode 100644
index 000000000..baa5b532a
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_bus.c
@@ -0,0 +1,279 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2016 NXP
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_bus.h>
+#include <rte_debug.h>
+#include <rte_string_fns.h>
+#include <rte_errno.h>
+
+#include "eal_private.h"
+
+static struct rte_bus_list rte_bus_list =
+ TAILQ_HEAD_INITIALIZER(rte_bus_list);
+
+void
+rte_bus_register(struct rte_bus *bus)
+{
+ RTE_VERIFY(bus);
+ RTE_VERIFY(bus->name && strlen(bus->name));
+ /* A bus should mandatorily have the scan implemented */
+ RTE_VERIFY(bus->scan);
+ RTE_VERIFY(bus->probe);
+ RTE_VERIFY(bus->find_device);
+ /* Buses supporting driver plug also require unplug. */
+ RTE_VERIFY(!bus->plug || bus->unplug);
+
+ TAILQ_INSERT_TAIL(&rte_bus_list, bus, next);
+ RTE_LOG(DEBUG, EAL, "Registered [%s] bus.\n", bus->name);
+}
+
+void
+rte_bus_unregister(struct rte_bus *bus)
+{
+ TAILQ_REMOVE(&rte_bus_list, bus, next);
+ RTE_LOG(DEBUG, EAL, "Unregistered [%s] bus.\n", bus->name);
+}
+
+/* Scan all the buses for registered devices */
+int
+rte_bus_scan(void)
+{
+ int ret;
+ struct rte_bus *bus = NULL;
+
+ TAILQ_FOREACH(bus, &rte_bus_list, next) {
+ ret = bus->scan();
+ if (ret)
+ RTE_LOG(ERR, EAL, "Scan for (%s) bus failed.\n",
+ bus->name);
+ }
+
+ return 0;
+}
+
+/* Probe all devices of all buses */
+int
+rte_bus_probe(void)
+{
+ int ret;
+ struct rte_bus *bus, *vbus = NULL;
+
+ TAILQ_FOREACH(bus, &rte_bus_list, next) {
+ if (!strcmp(bus->name, "vdev")) {
+ vbus = bus;
+ continue;
+ }
+
+ ret = bus->probe();
+ if (ret)
+ RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n",
+ bus->name);
+ }
+
+ if (vbus) {
+ ret = vbus->probe();
+ if (ret)
+ RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n",
+ vbus->name);
+ }
+
+ return 0;
+}
+
+/* Dump information of a single bus */
+static int
+bus_dump_one(FILE *f, struct rte_bus *bus)
+{
+ int ret;
+
+ /* For now, dump only the bus name */
+ ret = fprintf(f, " %s\n", bus->name);
+
+ /* Error in case of inability in writing to stream */
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+void
+rte_bus_dump(FILE *f)
+{
+ int ret;
+ struct rte_bus *bus;
+
+ TAILQ_FOREACH(bus, &rte_bus_list, next) {
+ ret = bus_dump_one(f, bus);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Unable to write to stream (%d)\n",
+ ret);
+ break;
+ }
+ }
+}
+
+struct rte_bus *
+rte_bus_find(const struct rte_bus *start, rte_bus_cmp_t cmp,
+ const void *data)
+{
+ struct rte_bus *bus;
+
+ if (start != NULL)
+ bus = TAILQ_NEXT(start, next);
+ else
+ bus = TAILQ_FIRST(&rte_bus_list);
+ while (bus != NULL) {
+ if (cmp(bus, data) == 0)
+ break;
+ bus = TAILQ_NEXT(bus, next);
+ }
+ return bus;
+}
+
+static int
+cmp_rte_device(const struct rte_device *dev1, const void *_dev2)
+{
+ const struct rte_device *dev2 = _dev2;
+
+ return dev1 != dev2;
+}
+
+static int
+bus_find_device(const struct rte_bus *bus, const void *_dev)
+{
+ struct rte_device *dev;
+
+ dev = bus->find_device(NULL, cmp_rte_device, _dev);
+ return dev == NULL;
+}
+
+struct rte_bus *
+rte_bus_find_by_device(const struct rte_device *dev)
+{
+ return rte_bus_find(NULL, bus_find_device, (const void *)dev);
+}
+
+static int
+cmp_bus_name(const struct rte_bus *bus, const void *_name)
+{
+ const char *name = _name;
+
+ return strcmp(bus->name, name);
+}
+
+struct rte_bus *
+rte_bus_find_by_name(const char *busname)
+{
+ return rte_bus_find(NULL, cmp_bus_name, (const void *)busname);
+}
+
+static int
+bus_can_parse(const struct rte_bus *bus, const void *_name)
+{
+ const char *name = _name;
+
+ return !(bus->parse && bus->parse(name, NULL) == 0);
+}
+
+struct rte_bus *
+rte_bus_find_by_device_name(const char *str)
+{
+ char name[RTE_DEV_NAME_MAX_LEN];
+ char *c;
+
+ strlcpy(name, str, sizeof(name));
+ c = strchr(name, ',');
+ if (c != NULL)
+ c[0] = '\0';
+ return rte_bus_find(NULL, bus_can_parse, name);
+}
+
+
+/*
+ * Get iommu class of devices on the bus.
+ */
+enum rte_iova_mode
+rte_bus_get_iommu_class(void)
+{
+ enum rte_iova_mode mode = RTE_IOVA_DC;
+ bool buses_want_va = false;
+ bool buses_want_pa = false;
+ struct rte_bus *bus;
+
+ TAILQ_FOREACH(bus, &rte_bus_list, next) {
+ enum rte_iova_mode bus_iova_mode;
+
+ if (bus->get_iommu_class == NULL)
+ continue;
+
+ bus_iova_mode = bus->get_iommu_class();
+ RTE_LOG(DEBUG, EAL, "Bus %s wants IOVA as '%s'\n",
+ bus->name,
+ bus_iova_mode == RTE_IOVA_DC ? "DC" :
+ (bus_iova_mode == RTE_IOVA_PA ? "PA" : "VA"));
+ if (bus_iova_mode == RTE_IOVA_PA)
+ buses_want_pa = true;
+ else if (bus_iova_mode == RTE_IOVA_VA)
+ buses_want_va = true;
+ }
+ if (buses_want_va && !buses_want_pa) {
+ mode = RTE_IOVA_VA;
+ } else if (buses_want_pa && !buses_want_va) {
+ mode = RTE_IOVA_PA;
+ } else {
+ mode = RTE_IOVA_DC;
+ if (buses_want_va) {
+ RTE_LOG(WARNING, EAL, "Some buses want 'VA' but forcing 'DC' because other buses want 'PA'.\n");
+ RTE_LOG(WARNING, EAL, "Depending on the final decision by the EAL, not all buses may be able to initialize.\n");
+ }
+ }
+
+ return mode;
+}
+
+static int
+bus_handle_sigbus(const struct rte_bus *bus,
+ const void *failure_addr)
+{
+ int ret;
+
+ if (!bus->sigbus_handler)
+ return -1;
+
+ ret = bus->sigbus_handler(failure_addr);
+
+ /* find bus but handle failed, keep the errno be set. */
+ if (ret < 0 && rte_errno == 0)
+ rte_errno = ENOTSUP;
+
+ return ret > 0;
+}
+
+int
+rte_bus_sigbus_handler(const void *failure_addr)
+{
+ struct rte_bus *bus;
+
+ int ret = 0;
+ int old_errno = rte_errno;
+
+ rte_errno = 0;
+
+ bus = rte_bus_find(NULL, bus_handle_sigbus, failure_addr);
+ /* can not find bus. */
+ if (!bus)
+ return 1;
+ /* find bus but handle failed, pass on the new errno. */
+ else if (rte_errno != 0)
+ return -1;
+
+ /* restore the old errno. */
+ rte_errno = old_errno;
+
+ return ret;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_class.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_class.c
new file mode 100644
index 000000000..0187076af
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_class.c
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Gaëtan Rivet
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_class.h>
+#include <rte_debug.h>
+
+static struct rte_class_list rte_class_list =
+ TAILQ_HEAD_INITIALIZER(rte_class_list);
+
+void
+rte_class_register(struct rte_class *class)
+{
+ RTE_VERIFY(class);
+ RTE_VERIFY(class->name && strlen(class->name));
+
+ TAILQ_INSERT_TAIL(&rte_class_list, class, next);
+ RTE_LOG(DEBUG, EAL, "Registered [%s] device class.\n", class->name);
+}
+
+void
+rte_class_unregister(struct rte_class *class)
+{
+ TAILQ_REMOVE(&rte_class_list, class, next);
+ RTE_LOG(DEBUG, EAL, "Unregistered [%s] device class.\n", class->name);
+}
+
+struct rte_class *
+rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp,
+ const void *data)
+{
+ struct rte_class *cls;
+
+ if (start != NULL)
+ cls = TAILQ_NEXT(start, next);
+ else
+ cls = TAILQ_FIRST(&rte_class_list);
+ while (cls != NULL) {
+ if (cmp(cls, data) == 0)
+ break;
+ cls = TAILQ_NEXT(cls, next);
+ }
+ return cls;
+}
+
+static int
+cmp_class_name(const struct rte_class *class, const void *_name)
+{
+ const char *name = _name;
+
+ return strcmp(class->name, name);
+}
+
+struct rte_class *
+rte_class_find_by_name(const char *name)
+{
+ return rte_class_find(NULL, cmp_class_name, (const void *)name);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_cpuflags.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_cpuflags.c
new file mode 100644
index 000000000..dc5f75d05
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_cpuflags.c
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_cpuflags.h>
+
+int
+rte_cpu_is_supported(void)
+{
+ /* This is generated at compile-time by the build system */
+ static const enum rte_cpu_flag_t compile_time_flags[] = {
+ RTE_COMPILE_TIME_CPUFLAGS
+ };
+ unsigned count = RTE_DIM(compile_time_flags), i;
+ int ret;
+
+ for (i = 0; i < count; i++) {
+ ret = rte_cpu_get_flag_enabled(compile_time_flags[i]);
+
+ if (ret < 0) {
+ fprintf(stderr,
+ "ERROR: CPU feature flag lookup failed with error %d\n",
+ ret);
+ return 0;
+ }
+ if (!ret) {
+ fprintf(stderr,
+ "ERROR: This system does not support \"%s\".\n"
+ "Please check that RTE_MACHINE is set correctly.\n",
+ rte_cpu_get_flag_name(compile_time_flags[i]));
+ return 0;
+ }
+ }
+
+ return 1;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_dev.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_dev.c
new file mode 100644
index 000000000..9e4f09d83
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_dev.c
@@ -0,0 +1,793 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation.
+ * Copyright(c) 2014 6WIND S.A.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/queue.h>
+
+#include <rte_compat.h>
+#include <rte_bus.h>
+#include <rte_class.h>
+#include <rte_dev.h>
+#include <rte_devargs.h>
+#include <rte_debug.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
+#include <rte_spinlock.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+
+#include "eal_private.h"
+#include "hotplug_mp.h"
+
+/**
+ * The device event callback description.
+ *
+ * It contains callback address to be registered by user application,
+ * the pointer to the parameters for callback, and the device name.
+ */
+struct dev_event_callback {
+ TAILQ_ENTRY(dev_event_callback) next; /**< Callbacks list */
+ rte_dev_event_cb_fn cb_fn; /**< Callback address */
+ void *cb_arg; /**< Callback parameter */
+ char *dev_name; /**< Callback device name, NULL is for all device */
+ uint32_t active; /**< Callback is executing */
+};
+
+/** @internal Structure to keep track of registered callbacks */
+TAILQ_HEAD(dev_event_cb_list, dev_event_callback);
+
+/* The device event callback list for all registered callbacks. */
+static struct dev_event_cb_list dev_event_cbs;
+
+/* spinlock for device callbacks */
+static rte_spinlock_t dev_event_lock = RTE_SPINLOCK_INITIALIZER;
+
+struct dev_next_ctx {
+ struct rte_dev_iterator *it;
+ const char *bus_str;
+ const char *cls_str;
+};
+
+#define CTX(it, bus_str, cls_str) \
+ (&(const struct dev_next_ctx){ \
+ .it = it, \
+ .bus_str = bus_str, \
+ .cls_str = cls_str, \
+ })
+
+#define ITCTX(ptr) \
+ (((struct dev_next_ctx *)(intptr_t)ptr)->it)
+
+#define BUSCTX(ptr) \
+ (((struct dev_next_ctx *)(intptr_t)ptr)->bus_str)
+
+#define CLSCTX(ptr) \
+ (((struct dev_next_ctx *)(intptr_t)ptr)->cls_str)
+
+static int cmp_dev_name(const struct rte_device *dev, const void *_name)
+{
+ const char *name = _name;
+
+ return strcmp(dev->name, name);
+}
+
+int
+rte_dev_is_probed(const struct rte_device *dev)
+{
+ /* The field driver should be set only when the probe is successful. */
+ return dev->driver != NULL;
+}
+
+/* helper function to build devargs, caller should free the memory */
+static int
+build_devargs(const char *busname, const char *devname,
+ const char *drvargs, char **devargs)
+{
+ int length;
+
+ length = snprintf(NULL, 0, "%s:%s,%s", busname, devname, drvargs);
+ if (length < 0)
+ return -EINVAL;
+
+ *devargs = malloc(length + 1);
+ if (*devargs == NULL)
+ return -ENOMEM;
+
+ length = snprintf(*devargs, length + 1, "%s:%s,%s",
+ busname, devname, drvargs);
+ if (length < 0) {
+ free(*devargs);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int
+rte_eal_hotplug_add(const char *busname, const char *devname,
+ const char *drvargs)
+{
+
+ char *devargs;
+ int ret;
+
+ ret = build_devargs(busname, devname, drvargs, &devargs);
+ if (ret != 0)
+ return ret;
+
+ ret = rte_dev_probe(devargs);
+ free(devargs);
+
+ return ret;
+}
+
+/* probe device at local process. */
+int
+local_dev_probe(const char *devargs, struct rte_device **new_dev)
+{
+ struct rte_device *dev;
+ struct rte_devargs *da;
+ int ret;
+
+ *new_dev = NULL;
+ da = calloc(1, sizeof(*da));
+ if (da == NULL)
+ return -ENOMEM;
+
+ ret = rte_devargs_parse(da, devargs);
+ if (ret)
+ goto err_devarg;
+
+ if (da->bus->plug == NULL) {
+ RTE_LOG(ERR, EAL, "Function plug not supported by bus (%s)\n",
+ da->bus->name);
+ ret = -ENOTSUP;
+ goto err_devarg;
+ }
+
+ ret = rte_devargs_insert(&da);
+ if (ret)
+ goto err_devarg;
+
+ /* the rte_devargs will be referenced in the matching rte_device */
+ ret = da->bus->scan();
+ if (ret)
+ goto err_devarg;
+
+ dev = da->bus->find_device(NULL, cmp_dev_name, da->name);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find device (%s)\n",
+ da->name);
+ ret = -ENODEV;
+ goto err_devarg;
+ }
+ /* Since there is a matching device, it is now its responsibility
+ * to manage the devargs we've just inserted. From this point
+ * those devargs shouldn't be removed manually anymore.
+ */
+
+ ret = dev->bus->plug(dev);
+ if (ret > 0)
+ ret = -ENOTSUP;
+
+ if (ret && !rte_dev_is_probed(dev)) { /* if hasn't ever succeeded */
+ RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
+ dev->name);
+ return ret;
+ }
+
+ *new_dev = dev;
+ return ret;
+
+err_devarg:
+ if (rte_devargs_remove(da) != 0) {
+ free(da->args);
+ free(da);
+ }
+ return ret;
+}
+
+int
+rte_dev_probe(const char *devargs)
+{
+ struct eal_dev_mp_req req;
+ struct rte_device *dev;
+ int ret;
+
+ memset(&req, 0, sizeof(req));
+ req.t = EAL_DEV_REQ_TYPE_ATTACH;
+ strlcpy(req.devargs, devargs, EAL_DEV_MP_DEV_ARGS_MAX_LEN);
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /**
+ * If in secondary process, just send IPC request to
+ * primary process.
+ */
+ ret = eal_dev_hotplug_request_to_primary(&req);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to send hotplug request to primary\n");
+ return -ENOMSG;
+ }
+ if (req.result != 0)
+ RTE_LOG(ERR, EAL,
+ "Failed to hotplug add device\n");
+ return req.result;
+ }
+
+ /* attach a shared device from primary start from here: */
+
+ /* primary attach the new device itself. */
+ ret = local_dev_probe(devargs, &dev);
+
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to attach device on primary process\n");
+
+ /**
+ * it is possible that secondary process failed to attached a
+ * device that primary process have during initialization,
+ * so for -EEXIST case, we still need to sync with secondary
+ * process.
+ */
+ if (ret != -EEXIST)
+ return ret;
+ }
+
+ /* primary send attach sync request to secondary. */
+ ret = eal_dev_hotplug_request_to_secondary(&req);
+
+ /* if any communication error, we need to rollback. */
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to send hotplug add request to secondary\n");
+ ret = -ENOMSG;
+ goto rollback;
+ }
+
+ /**
+ * if any secondary failed to attach, we need to consider if rollback
+ * is necessary.
+ */
+ if (req.result != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to attach device on secondary process\n");
+ ret = req.result;
+
+ /* for -EEXIST, we don't need to rollback. */
+ if (ret == -EEXIST)
+ return ret;
+ goto rollback;
+ }
+
+ return 0;
+
+rollback:
+ req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK;
+
+ /* primary send rollback request to secondary. */
+ if (eal_dev_hotplug_request_to_secondary(&req) != 0)
+ RTE_LOG(WARNING, EAL,
+ "Failed to rollback device attach on secondary."
+ "Devices in secondary may not sync with primary\n");
+
+ /* primary rollback itself. */
+ if (local_dev_remove(dev) != 0)
+ RTE_LOG(WARNING, EAL,
+ "Failed to rollback device attach on primary."
+ "Devices in secondary may not sync with primary\n");
+
+ return ret;
+}
+
+int
+rte_eal_hotplug_remove(const char *busname, const char *devname)
+{
+ struct rte_device *dev;
+ struct rte_bus *bus;
+
+ bus = rte_bus_find_by_name(busname);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", busname);
+ return -ENOENT;
+ }
+
+ dev = bus->find_device(NULL, cmp_dev_name, devname);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", devname);
+ return -EINVAL;
+ }
+
+ return rte_dev_remove(dev);
+}
+
+/* remove device at local process. */
+int
+local_dev_remove(struct rte_device *dev)
+{
+ int ret;
+
+ if (dev->bus->unplug == NULL) {
+ RTE_LOG(ERR, EAL, "Function unplug not supported by bus (%s)\n",
+ dev->bus->name);
+ return -ENOTSUP;
+ }
+
+ ret = dev->bus->unplug(dev);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
+ dev->name);
+ return (ret < 0) ? ret : -ENOENT;
+ }
+
+ return 0;
+}
+
+int
+rte_dev_remove(struct rte_device *dev)
+{
+ struct eal_dev_mp_req req;
+ char *devargs;
+ int ret;
+
+ if (!rte_dev_is_probed(dev)) {
+ RTE_LOG(ERR, EAL, "Device is not probed\n");
+ return -ENOENT;
+ }
+
+ ret = build_devargs(dev->bus->name, dev->name, "", &devargs);
+ if (ret != 0)
+ return ret;
+
+ memset(&req, 0, sizeof(req));
+ req.t = EAL_DEV_REQ_TYPE_DETACH;
+ strlcpy(req.devargs, devargs, EAL_DEV_MP_DEV_ARGS_MAX_LEN);
+ free(devargs);
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /**
+ * If in secondary process, just send IPC request to
+ * primary process.
+ */
+ ret = eal_dev_hotplug_request_to_primary(&req);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to send hotplug request to primary\n");
+ return -ENOMSG;
+ }
+ if (req.result != 0)
+ RTE_LOG(ERR, EAL,
+ "Failed to hotplug remove device\n");
+ return req.result;
+ }
+
+ /* detach a device from primary start from here: */
+
+ /* primary send detach sync request to secondary */
+ ret = eal_dev_hotplug_request_to_secondary(&req);
+
+ /**
+ * if communication error, we need to rollback, because it is possible
+ * part of the secondary processes still detached it successfully.
+ */
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to send device detach request to secondary\n");
+ ret = -ENOMSG;
+ goto rollback;
+ }
+
+ /**
+ * if any secondary failed to detach, we need to consider if rollback
+ * is necessary.
+ */
+ if (req.result != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to detach device on secondary process\n");
+ ret = req.result;
+ /**
+ * if -ENOENT, we don't need to rollback, since devices is
+ * already detached on secondary process.
+ */
+ if (ret != -ENOENT)
+ goto rollback;
+ }
+
+ /* primary detach the device itself. */
+ ret = local_dev_remove(dev);
+
+ /* if primary failed, still need to consider if rollback is necessary */
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to detach device on primary process\n");
+ /* if -ENOENT, we don't need to rollback */
+ if (ret == -ENOENT)
+ return ret;
+ goto rollback;
+ }
+
+ return 0;
+
+rollback:
+ req.t = EAL_DEV_REQ_TYPE_DETACH_ROLLBACK;
+
+ /* primary send rollback request to secondary. */
+ if (eal_dev_hotplug_request_to_secondary(&req) != 0)
+ RTE_LOG(WARNING, EAL,
+ "Failed to rollback device detach on secondary."
+ "Devices in secondary may not sync with primary\n");
+
+ return ret;
+}
+
+int
+rte_dev_event_callback_register(const char *device_name,
+ rte_dev_event_cb_fn cb_fn,
+ void *cb_arg)
+{
+ struct dev_event_callback *event_cb;
+ int ret;
+
+ if (!cb_fn)
+ return -EINVAL;
+
+ rte_spinlock_lock(&dev_event_lock);
+
+ if (TAILQ_EMPTY(&dev_event_cbs))
+ TAILQ_INIT(&dev_event_cbs);
+
+ TAILQ_FOREACH(event_cb, &dev_event_cbs, next) {
+ if (event_cb->cb_fn == cb_fn && event_cb->cb_arg == cb_arg) {
+ if (device_name == NULL && event_cb->dev_name == NULL)
+ break;
+ if (device_name == NULL || event_cb->dev_name == NULL)
+ continue;
+ if (!strcmp(event_cb->dev_name, device_name))
+ break;
+ }
+ }
+
+ /* create a new callback. */
+ if (event_cb == NULL) {
+ event_cb = malloc(sizeof(struct dev_event_callback));
+ if (event_cb != NULL) {
+ event_cb->cb_fn = cb_fn;
+ event_cb->cb_arg = cb_arg;
+ event_cb->active = 0;
+ if (!device_name) {
+ event_cb->dev_name = NULL;
+ } else {
+ event_cb->dev_name = strdup(device_name);
+ if (event_cb->dev_name == NULL) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ }
+ TAILQ_INSERT_TAIL(&dev_event_cbs, event_cb, next);
+ } else {
+ RTE_LOG(ERR, EAL,
+ "Failed to allocate memory for device "
+ "event callback.");
+ ret = -ENOMEM;
+ goto error;
+ }
+ } else {
+ RTE_LOG(ERR, EAL,
+ "The callback is already exist, no need "
+ "to register again.\n");
+ ret = -EEXIST;
+ }
+
+ rte_spinlock_unlock(&dev_event_lock);
+ return 0;
+error:
+ free(event_cb);
+ rte_spinlock_unlock(&dev_event_lock);
+ return ret;
+}
+
+int
+rte_dev_event_callback_unregister(const char *device_name,
+ rte_dev_event_cb_fn cb_fn,
+ void *cb_arg)
+{
+ int ret = 0;
+ struct dev_event_callback *event_cb, *next;
+
+ if (!cb_fn)
+ return -EINVAL;
+
+ rte_spinlock_lock(&dev_event_lock);
+ /*walk through the callbacks and remove all that match. */
+ for (event_cb = TAILQ_FIRST(&dev_event_cbs); event_cb != NULL;
+ event_cb = next) {
+
+ next = TAILQ_NEXT(event_cb, next);
+
+ if (device_name != NULL && event_cb->dev_name != NULL) {
+ if (!strcmp(event_cb->dev_name, device_name)) {
+ if (event_cb->cb_fn != cb_fn ||
+ (cb_arg != (void *)-1 &&
+ event_cb->cb_arg != cb_arg))
+ continue;
+ }
+ } else if (device_name != NULL) {
+ continue;
+ }
+
+ /*
+ * if this callback is not executing right now,
+ * then remove it.
+ */
+ if (event_cb->active == 0) {
+ TAILQ_REMOVE(&dev_event_cbs, event_cb, next);
+ free(event_cb);
+ ret++;
+ } else {
+ continue;
+ }
+ }
+ rte_spinlock_unlock(&dev_event_lock);
+ return ret;
+}
+
+void
+rte_dev_event_callback_process(const char *device_name,
+ enum rte_dev_event_type event)
+{
+ struct dev_event_callback *cb_lst;
+
+ if (device_name == NULL)
+ return;
+
+ rte_spinlock_lock(&dev_event_lock);
+
+ TAILQ_FOREACH(cb_lst, &dev_event_cbs, next) {
+ if (cb_lst->dev_name) {
+ if (strcmp(cb_lst->dev_name, device_name))
+ continue;
+ }
+ cb_lst->active = 1;
+ rte_spinlock_unlock(&dev_event_lock);
+ cb_lst->cb_fn(device_name, event,
+ cb_lst->cb_arg);
+ rte_spinlock_lock(&dev_event_lock);
+ cb_lst->active = 0;
+ }
+ rte_spinlock_unlock(&dev_event_lock);
+}
+
+int
+rte_dev_iterator_init(struct rte_dev_iterator *it,
+ const char *dev_str)
+{
+ struct rte_devargs devargs;
+ struct rte_class *cls = NULL;
+ struct rte_bus *bus = NULL;
+
+ /* Having both bus_str and cls_str NULL is illegal,
+ * marking this iterator as invalid unless
+ * everything goes well.
+ */
+ it->bus_str = NULL;
+ it->cls_str = NULL;
+
+ devargs.data = dev_str;
+ if (rte_devargs_layers_parse(&devargs, dev_str))
+ goto get_out;
+
+ bus = devargs.bus;
+ cls = devargs.cls;
+ /* The string should have at least
+ * one layer specified.
+ */
+ if (bus == NULL && cls == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Either bus or class must be specified.\n");
+ rte_errno = EINVAL;
+ goto get_out;
+ }
+ if (bus != NULL && bus->dev_iterate == NULL) {
+ RTE_LOG(ERR, EAL, "Bus %s not supported\n", bus->name);
+ rte_errno = ENOTSUP;
+ goto get_out;
+ }
+ if (cls != NULL && cls->dev_iterate == NULL) {
+ RTE_LOG(ERR, EAL, "Class %s not supported\n", cls->name);
+ rte_errno = ENOTSUP;
+ goto get_out;
+ }
+ it->bus_str = devargs.bus_str;
+ it->cls_str = devargs.cls_str;
+ it->dev_str = dev_str;
+ it->bus = bus;
+ it->cls = cls;
+ it->device = NULL;
+ it->class_device = NULL;
+get_out:
+ return -rte_errno;
+}
+
+static char *
+dev_str_sane_copy(const char *str)
+{
+ size_t end;
+ char *copy;
+
+ end = strcspn(str, ",/");
+ if (str[end] == ',') {
+ copy = strdup(&str[end + 1]);
+ } else {
+ /* '/' or '\0' */
+ copy = strdup("");
+ }
+ if (copy == NULL) {
+ rte_errno = ENOMEM;
+ } else {
+ char *slash;
+
+ slash = strchr(copy, '/');
+ if (slash != NULL)
+ slash[0] = '\0';
+ }
+ return copy;
+}
+
+static int
+class_next_dev_cmp(const struct rte_class *cls,
+ const void *ctx)
+{
+ struct rte_dev_iterator *it;
+ const char *cls_str = NULL;
+ void *dev;
+
+ if (cls->dev_iterate == NULL)
+ return 1;
+ it = ITCTX(ctx);
+ cls_str = CLSCTX(ctx);
+ dev = it->class_device;
+ /* it->cls_str != NULL means a class
+ * was specified in the devstr.
+ */
+ if (it->cls_str != NULL && cls != it->cls)
+ return 1;
+ /* If an error occurred previously,
+ * no need to test further.
+ */
+ if (rte_errno != 0)
+ return -1;
+ dev = cls->dev_iterate(dev, cls_str, it);
+ it->class_device = dev;
+ return dev == NULL;
+}
+
+static int
+bus_next_dev_cmp(const struct rte_bus *bus,
+ const void *ctx)
+{
+ struct rte_device *dev = NULL;
+ struct rte_class *cls = NULL;
+ struct rte_dev_iterator *it;
+ const char *bus_str = NULL;
+
+ if (bus->dev_iterate == NULL)
+ return 1;
+ it = ITCTX(ctx);
+ bus_str = BUSCTX(ctx);
+ dev = it->device;
+ /* it->bus_str != NULL means a bus
+ * was specified in the devstr.
+ */
+ if (it->bus_str != NULL && bus != it->bus)
+ return 1;
+ /* If an error occurred previously,
+ * no need to test further.
+ */
+ if (rte_errno != 0)
+ return -1;
+ if (it->cls_str == NULL) {
+ dev = bus->dev_iterate(dev, bus_str, it);
+ goto end;
+ }
+ /* cls_str != NULL */
+ if (dev == NULL) {
+next_dev_on_bus:
+ dev = bus->dev_iterate(dev, bus_str, it);
+ it->device = dev;
+ }
+ if (dev == NULL)
+ return 1;
+ if (it->cls != NULL)
+ cls = TAILQ_PREV(it->cls, rte_class_list, next);
+ cls = rte_class_find(cls, class_next_dev_cmp, ctx);
+ if (cls != NULL) {
+ it->cls = cls;
+ goto end;
+ }
+ goto next_dev_on_bus;
+end:
+ it->device = dev;
+ return dev == NULL;
+}
+struct rte_device *
+rte_dev_iterator_next(struct rte_dev_iterator *it)
+{
+ struct rte_bus *bus = NULL;
+ int old_errno = rte_errno;
+ char *bus_str = NULL;
+ char *cls_str = NULL;
+
+ rte_errno = 0;
+ if (it->bus_str == NULL && it->cls_str == NULL) {
+ /* Invalid iterator. */
+ rte_errno = EINVAL;
+ return NULL;
+ }
+ if (it->bus != NULL)
+ bus = TAILQ_PREV(it->bus, rte_bus_list, next);
+ if (it->bus_str != NULL) {
+ bus_str = dev_str_sane_copy(it->bus_str);
+ if (bus_str == NULL)
+ goto out;
+ }
+ if (it->cls_str != NULL) {
+ cls_str = dev_str_sane_copy(it->cls_str);
+ if (cls_str == NULL)
+ goto out;
+ }
+ while ((bus = rte_bus_find(bus, bus_next_dev_cmp,
+ CTX(it, bus_str, cls_str)))) {
+ if (it->device != NULL) {
+ it->bus = bus;
+ goto out;
+ }
+ if (it->bus_str != NULL ||
+ rte_errno != 0)
+ break;
+ }
+ if (rte_errno == 0)
+ rte_errno = old_errno;
+out:
+ free(bus_str);
+ free(cls_str);
+ return it->device;
+}
+
+int
+rte_dev_dma_map(struct rte_device *dev, void *addr, uint64_t iova,
+ size_t len)
+{
+ if (dev->bus->dma_map == NULL || len == 0) {
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+ /* Memory must be registered through rte_extmem_* APIs */
+ if (rte_mem_virt2memseg_list(addr) == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ return dev->bus->dma_map(dev, addr, iova, len);
+}
+
+int
+rte_dev_dma_unmap(struct rte_device *dev, void *addr, uint64_t iova,
+ size_t len)
+{
+ if (dev->bus->dma_unmap == NULL || len == 0) {
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+ /* Memory must be registered through rte_extmem_* APIs */
+ if (rte_mem_virt2memseg_list(addr) == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ return dev->bus->dma_unmap(dev, addr, iova, len);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_devargs.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_devargs.c
new file mode 100644
index 000000000..2123773ef
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_devargs.c
@@ -0,0 +1,403 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2014 6WIND S.A.
+ */
+
+/* This file manages the list of devices and their arguments, as given
+ * by the user at startup
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+
+#include <rte_bus.h>
+#include <rte_class.h>
+#include <rte_compat.h>
+#include <rte_dev.h>
+#include <rte_devargs.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
+#include <rte_tailq.h>
+#include "eal_private.h"
+
+/** user device double-linked queue type definition */
+TAILQ_HEAD(rte_devargs_list, rte_devargs);
+
+/** Global list of user devices */
+static struct rte_devargs_list devargs_list =
+ TAILQ_HEAD_INITIALIZER(devargs_list);
+
+static size_t
+devargs_layer_count(const char *s)
+{
+ size_t i = s ? 1 : 0;
+
+ while (s != NULL && s[0] != '\0') {
+ i += s[0] == '/';
+ s++;
+ }
+ return i;
+}
+
+int
+rte_devargs_layers_parse(struct rte_devargs *devargs,
+ const char *devstr)
+{
+ struct {
+ const char *key;
+ const char *str;
+ struct rte_kvargs *kvlist;
+ } layers[] = {
+ { "bus=", NULL, NULL, },
+ { "class=", NULL, NULL, },
+ { "driver=", NULL, NULL, },
+ };
+ struct rte_kvargs_pair *kv = NULL;
+ struct rte_class *cls = NULL;
+ struct rte_bus *bus = NULL;
+ const char *s = devstr;
+ size_t nblayer;
+ size_t i = 0;
+ int ret = 0;
+
+ /* Split each sub-lists. */
+ nblayer = devargs_layer_count(devstr);
+ if (nblayer > RTE_DIM(layers)) {
+ RTE_LOG(ERR, EAL, "Invalid format: too many layers (%zu)\n",
+ nblayer);
+ ret = -E2BIG;
+ goto get_out;
+ }
+
+ /* If the devargs points the devstr
+ * as source data, then it should not allocate
+ * anything and keep referring only to it.
+ */
+ if (devargs->data != devstr) {
+ devargs->data = strdup(devstr);
+ if (devargs->data == NULL) {
+ RTE_LOG(ERR, EAL, "OOM\n");
+ ret = -ENOMEM;
+ goto get_out;
+ }
+ s = devargs->data;
+ }
+
+ while (s != NULL) {
+ if (i >= RTE_DIM(layers)) {
+ RTE_LOG(ERR, EAL, "Unrecognized layer %s\n", s);
+ ret = -EINVAL;
+ goto get_out;
+ }
+ /*
+ * The last layer is free-form.
+ * The "driver" key is not required (but accepted).
+ */
+ if (strncmp(layers[i].key, s, strlen(layers[i].key)) &&
+ i != RTE_DIM(layers) - 1)
+ goto next_layer;
+ layers[i].str = s;
+ layers[i].kvlist = rte_kvargs_parse_delim(s, NULL, "/");
+ if (layers[i].kvlist == NULL) {
+ RTE_LOG(ERR, EAL, "Could not parse %s\n", s);
+ ret = -EINVAL;
+ goto get_out;
+ }
+ s = strchr(s, '/');
+ if (s != NULL)
+ s++;
+next_layer:
+ i++;
+ }
+
+ /* Parse each sub-list. */
+ for (i = 0; i < RTE_DIM(layers); i++) {
+ if (layers[i].kvlist == NULL)
+ continue;
+ kv = &layers[i].kvlist->pairs[0];
+ if (strcmp(kv->key, "bus") == 0) {
+ bus = rte_bus_find_by_name(kv->value);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Could not find bus \"%s\"\n",
+ kv->value);
+ ret = -EFAULT;
+ goto get_out;
+ }
+ } else if (strcmp(kv->key, "class") == 0) {
+ cls = rte_class_find_by_name(kv->value);
+ if (cls == NULL) {
+ RTE_LOG(ERR, EAL, "Could not find class \"%s\"\n",
+ kv->value);
+ ret = -EFAULT;
+ goto get_out;
+ }
+ } else if (strcmp(kv->key, "driver") == 0) {
+ /* Ignore */
+ continue;
+ }
+ }
+
+ /* Fill devargs fields. */
+ devargs->bus_str = layers[0].str;
+ devargs->cls_str = layers[1].str;
+ devargs->drv_str = layers[2].str;
+ devargs->bus = bus;
+ devargs->cls = cls;
+
+ /* If we own the data, clean up a bit
+ * the several layers string, to ease
+ * their parsing afterward.
+ */
+ if (devargs->data != devstr) {
+ char *s = (void *)(intptr_t)(devargs->data);
+
+ while ((s = strchr(s, '/'))) {
+ *s = '\0';
+ s++;
+ }
+ }
+
+get_out:
+ for (i = 0; i < RTE_DIM(layers); i++) {
+ if (layers[i].kvlist)
+ rte_kvargs_free(layers[i].kvlist);
+ }
+ if (ret != 0)
+ rte_errno = -ret;
+ return ret;
+}
+
+static int
+bus_name_cmp(const struct rte_bus *bus, const void *name)
+{
+ return strncmp(bus->name, name, strlen(bus->name));
+}
+
+int
+rte_devargs_parse(struct rte_devargs *da, const char *dev)
+{
+ struct rte_bus *bus = NULL;
+ const char *devname;
+ const size_t maxlen = sizeof(da->name);
+ size_t i;
+
+ if (da == NULL)
+ return -EINVAL;
+
+ /* Retrieve eventual bus info */
+ do {
+ devname = dev;
+ bus = rte_bus_find(bus, bus_name_cmp, dev);
+ if (bus == NULL)
+ break;
+ devname = dev + strlen(bus->name) + 1;
+ if (rte_bus_find_by_device_name(devname) == bus)
+ break;
+ } while (1);
+ /* Store device name */
+ i = 0;
+ while (devname[i] != '\0' && devname[i] != ',') {
+ da->name[i] = devname[i];
+ i++;
+ if (i == maxlen) {
+ RTE_LOG(WARNING, EAL, "Parsing \"%s\": device name should be shorter than %zu\n",
+ dev, maxlen);
+ da->name[i - 1] = '\0';
+ return -EINVAL;
+ }
+ }
+ da->name[i] = '\0';
+ if (bus == NULL) {
+ bus = rte_bus_find_by_device_name(da->name);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "failed to parse device \"%s\"\n",
+ da->name);
+ return -EFAULT;
+ }
+ }
+ da->bus = bus;
+ /* Parse eventual device arguments */
+ if (devname[i] == ',')
+ da->args = strdup(&devname[i + 1]);
+ else
+ da->args = strdup("");
+ if (da->args == NULL) {
+ RTE_LOG(ERR, EAL, "not enough memory to parse arguments\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+int
+rte_devargs_parsef(struct rte_devargs *da, const char *format, ...)
+{
+ va_list ap;
+ size_t len;
+ char *dev;
+ int ret;
+
+ if (da == NULL)
+ return -EINVAL;
+
+ va_start(ap, format);
+ len = vsnprintf(NULL, 0, format, ap);
+ va_end(ap);
+
+ dev = calloc(1, len + 1);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "not enough memory to parse device\n");
+ return -ENOMEM;
+ }
+
+ va_start(ap, format);
+ vsnprintf(dev, len + 1, format, ap);
+ va_end(ap);
+
+ ret = rte_devargs_parse(da, dev);
+
+ free(dev);
+ return ret;
+}
+
+int
+rte_devargs_insert(struct rte_devargs **da)
+{
+ struct rte_devargs *listed_da;
+ void *tmp;
+
+ if (*da == NULL || (*da)->bus == NULL)
+ return -1;
+
+ TAILQ_FOREACH_SAFE(listed_da, &devargs_list, next, tmp) {
+ if (listed_da == *da)
+ /* devargs already in the list */
+ return 0;
+ if (strcmp(listed_da->bus->name, (*da)->bus->name) == 0 &&
+ strcmp(listed_da->name, (*da)->name) == 0) {
+ /* device already in devargs list, must be updated */
+ listed_da->type = (*da)->type;
+ listed_da->policy = (*da)->policy;
+ free(listed_da->args);
+ listed_da->args = (*da)->args;
+ listed_da->bus = (*da)->bus;
+ listed_da->cls = (*da)->cls;
+ listed_da->bus_str = (*da)->bus_str;
+ listed_da->cls_str = (*da)->cls_str;
+ listed_da->data = (*da)->data;
+ /* replace provided devargs with found one */
+ free(*da);
+ *da = listed_da;
+ return 0;
+ }
+ }
+ /* new device in the list */
+ TAILQ_INSERT_TAIL(&devargs_list, *da, next);
+ return 0;
+}
+
+/* store a whitelist parameter for later parsing */
+int
+rte_devargs_add(enum rte_devtype devtype, const char *devargs_str)
+{
+ struct rte_devargs *devargs = NULL;
+ struct rte_bus *bus = NULL;
+ const char *dev = devargs_str;
+
+ /* use calloc instead of rte_zmalloc as it's called early at init */
+ devargs = calloc(1, sizeof(*devargs));
+ if (devargs == NULL)
+ goto fail;
+
+ if (rte_devargs_parse(devargs, dev))
+ goto fail;
+ devargs->type = devtype;
+ bus = devargs->bus;
+ if (devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI)
+ devargs->policy = RTE_DEV_BLACKLISTED;
+ if (bus->conf.scan_mode == RTE_BUS_SCAN_UNDEFINED) {
+ if (devargs->policy == RTE_DEV_WHITELISTED)
+ bus->conf.scan_mode = RTE_BUS_SCAN_WHITELIST;
+ else if (devargs->policy == RTE_DEV_BLACKLISTED)
+ bus->conf.scan_mode = RTE_BUS_SCAN_BLACKLIST;
+ }
+ TAILQ_INSERT_TAIL(&devargs_list, devargs, next);
+ return 0;
+
+fail:
+ if (devargs) {
+ free(devargs->args);
+ free(devargs);
+ }
+
+ return -1;
+}
+
+int
+rte_devargs_remove(struct rte_devargs *devargs)
+{
+ struct rte_devargs *d;
+ void *tmp;
+
+ if (devargs == NULL || devargs->bus == NULL)
+ return -1;
+
+ TAILQ_FOREACH_SAFE(d, &devargs_list, next, tmp) {
+ if (strcmp(d->bus->name, devargs->bus->name) == 0 &&
+ strcmp(d->name, devargs->name) == 0) {
+ TAILQ_REMOVE(&devargs_list, d, next);
+ free(d->args);
+ free(d);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/* count the number of devices of a specified type */
+unsigned int
+rte_devargs_type_count(enum rte_devtype devtype)
+{
+ struct rte_devargs *devargs;
+ unsigned int count = 0;
+
+ TAILQ_FOREACH(devargs, &devargs_list, next) {
+ if (devargs->type != devtype)
+ continue;
+ count++;
+ }
+ return count;
+}
+
+/* dump the user devices on the console */
+void
+rte_devargs_dump(FILE *f)
+{
+ struct rte_devargs *devargs;
+
+ fprintf(f, "User device list:\n");
+ TAILQ_FOREACH(devargs, &devargs_list, next) {
+ fprintf(f, " [%s]: %s %s\n",
+ (devargs->bus ? devargs->bus->name : "??"),
+ devargs->name, devargs->args);
+ }
+}
+
+/* bus-aware rte_devargs iterator. */
+struct rte_devargs *
+rte_devargs_next(const char *busname, const struct rte_devargs *start)
+{
+ struct rte_devargs *da;
+
+ if (start != NULL)
+ da = TAILQ_NEXT(start, next);
+ else
+ da = TAILQ_FIRST(&devargs_list);
+ while (da != NULL) {
+ if (busname == NULL ||
+ (strcmp(busname, da->bus->name) == 0))
+ return da;
+ da = TAILQ_NEXT(da, next);
+ }
+ return NULL;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_errno.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_errno.c
new file mode 100644
index 000000000..2a10fb823
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_errno.c
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+/* Use XSI-compliant portable version of strerror_r() */
+#undef _GNU_SOURCE
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+
+#include <rte_per_lcore.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+
+RTE_DEFINE_PER_LCORE(int, _rte_errno);
+
+const char *
+rte_strerror(int errnum)
+{
+ /* BSD puts a colon in the "unknown error" messages, Linux doesn't */
+#ifdef RTE_EXEC_ENV_FREEBSD
+ static const char *sep = ":";
+#else
+ static const char *sep = "";
+#endif
+#define RETVAL_SZ 256
+ static RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
+ char *ret = RTE_PER_LCORE(retval);
+
+ /* since some implementations of strerror_r throw an error
+ * themselves if errnum is too big, we handle that case here */
+ if (errnum >= RTE_MAX_ERRNO)
+ snprintf(ret, RETVAL_SZ, "Unknown error%s %d", sep, errnum);
+ else
+ switch (errnum){
+ case E_RTE_SECONDARY:
+ return "Invalid call in secondary process";
+ case E_RTE_NO_CONFIG:
+ return "Missing rte_config structure";
+ default:
+ if (strerror_r(errnum, ret, RETVAL_SZ) != 0)
+ snprintf(ret, RETVAL_SZ, "Unknown error%s %d",
+ sep, errnum);
+ }
+
+ return ret;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_fbarray.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_fbarray.c
new file mode 100644
index 000000000..4f8f1af73
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_fbarray.c
@@ -0,0 +1,1510 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <sys/mman.h>
+#include <stdint.h>
+#include <errno.h>
+#include <sys/file.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_errno.h>
+#include <rte_spinlock.h>
+#include <rte_tailq.h>
+
+#include "eal_filesystem.h"
+#include "eal_private.h"
+
+#include "rte_fbarray.h"
+
+#define MASK_SHIFT 6ULL
+#define MASK_ALIGN (1ULL << MASK_SHIFT)
+#define MASK_LEN_TO_IDX(x) ((x) >> MASK_SHIFT)
+#define MASK_LEN_TO_MOD(x) ((x) - RTE_ALIGN_FLOOR(x, MASK_ALIGN))
+#define MASK_GET_IDX(idx, mod) ((idx << MASK_SHIFT) + mod)
+
+/*
+ * We use this to keep track of created/attached memory areas to prevent user
+ * errors in API usage.
+ */
+struct mem_area {
+ TAILQ_ENTRY(mem_area) next;
+ void *addr;
+ size_t len;
+ int fd;
+};
+TAILQ_HEAD(mem_area_head, mem_area);
+/* local per-process tailq */
+static struct mem_area_head mem_area_tailq =
+ TAILQ_HEAD_INITIALIZER(mem_area_tailq);
+static rte_spinlock_t mem_area_lock = RTE_SPINLOCK_INITIALIZER;
+
+/*
+ * This is a mask that is always stored at the end of array, to provide fast
+ * way of finding free/used spots without looping through each element.
+ */
+
+struct used_mask {
+ unsigned int n_masks;
+ uint64_t data[];
+};
+
+static size_t
+calc_mask_size(unsigned int len)
+{
+ /* mask must be multiple of MASK_ALIGN, even though length of array
+ * itself may not be aligned on that boundary.
+ */
+ len = RTE_ALIGN_CEIL(len, MASK_ALIGN);
+ return sizeof(struct used_mask) +
+ sizeof(uint64_t) * MASK_LEN_TO_IDX(len);
+}
+
+static size_t
+calc_data_size(size_t page_sz, unsigned int elt_sz, unsigned int len)
+{
+ size_t data_sz = elt_sz * len;
+ size_t msk_sz = calc_mask_size(len);
+ return RTE_ALIGN_CEIL(data_sz + msk_sz, page_sz);
+}
+
+static struct used_mask *
+get_used_mask(void *data, unsigned int elt_sz, unsigned int len)
+{
+ return (struct used_mask *) RTE_PTR_ADD(data, elt_sz * len);
+}
+
+static int
+resize_and_map(int fd, void *addr, size_t len)
+{
+ char path[PATH_MAX];
+ void *map_addr;
+
+ if (ftruncate(fd, len)) {
+ RTE_LOG(ERR, EAL, "Cannot truncate %s\n", path);
+ /* pass errno up the chain */
+ rte_errno = errno;
+ return -1;
+ }
+
+ map_addr = mmap(addr, len, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, fd, 0);
+ if (map_addr != addr) {
+ RTE_LOG(ERR, EAL, "mmap() failed: %s\n", strerror(errno));
+ /* pass errno up the chain */
+ rte_errno = errno;
+ return -1;
+ }
+ return 0;
+}
+
+static int
+overlap(const struct mem_area *ma, const void *start, size_t len)
+{
+ const void *end = RTE_PTR_ADD(start, len);
+ const void *ma_start = ma->addr;
+ const void *ma_end = RTE_PTR_ADD(ma->addr, ma->len);
+
+ /* start overlap? */
+ if (start >= ma_start && start < ma_end)
+ return 1;
+ /* end overlap? */
+ if (end >= ma_start && end < ma_end)
+ return 1;
+ return 0;
+}
+
+static int
+find_next_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
+ bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int msk_idx, lookahead_idx, first, first_mod;
+ unsigned int last, last_mod;
+ uint64_t last_msk, ignore_msk;
+
+ /*
+ * mask only has granularity of MASK_ALIGN, but start may not be aligned
+ * on that boundary, so construct a special mask to exclude anything we
+ * don't want to see to avoid confusing ctz.
+ */
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ ignore_msk = ~((1ULL << first_mod) - 1);
+
+ /* array length may not be aligned, so calculate ignore mask for last
+ * mask index.
+ */
+ last = MASK_LEN_TO_IDX(arr->len);
+ last_mod = MASK_LEN_TO_MOD(arr->len);
+ last_msk = ~(-1ULL << last_mod);
+
+ for (msk_idx = first; msk_idx < msk->n_masks; msk_idx++) {
+ uint64_t cur_msk, lookahead_msk;
+ unsigned int run_start, clz, left;
+ bool found = false;
+ /*
+ * The process of getting n consecutive bits for arbitrary n is
+ * a bit involved, but here it is in a nutshell:
+ *
+ * 1. let n be the number of consecutive bits we're looking for
+ * 2. check if n can fit in one mask, and if so, do n-1
+ * rshift-ands to see if there is an appropriate run inside
+ * our current mask
+ * 2a. if we found a run, bail out early
+ * 2b. if we didn't find a run, proceed
+ * 3. invert the mask and count leading zeroes (that is, count
+ * how many consecutive set bits we had starting from the
+ * end of current mask) as k
+ * 3a. if k is 0, continue to next mask
+ * 3b. if k is not 0, we have a potential run
+ * 4. to satisfy our requirements, next mask must have n-k
+ * consecutive set bits right at the start, so we will do
+ * (n-k-1) rshift-ands and check if first bit is set.
+ *
+ * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until
+ * we either run out of masks, lose the run, or find what we
+ * were looking for.
+ */
+ cur_msk = msk->data[msk_idx];
+ left = n;
+
+ /* if we're looking for free spaces, invert the mask */
+ if (!used)
+ cur_msk = ~cur_msk;
+
+ /* combine current ignore mask with last index ignore mask */
+ if (msk_idx == last)
+ ignore_msk |= last_msk;
+
+ /* if we have an ignore mask, ignore once */
+ if (ignore_msk) {
+ cur_msk &= ignore_msk;
+ ignore_msk = 0;
+ }
+
+ /* if n can fit in within a single mask, do a search */
+ if (n <= MASK_ALIGN) {
+ uint64_t tmp_msk = cur_msk;
+ unsigned int s_idx;
+ for (s_idx = 0; s_idx < n - 1; s_idx++)
+ tmp_msk &= tmp_msk >> 1ULL;
+ /* we found what we were looking for */
+ if (tmp_msk != 0) {
+ run_start = __builtin_ctzll(tmp_msk);
+ return MASK_GET_IDX(msk_idx, run_start);
+ }
+ }
+
+ /*
+ * we didn't find our run within the mask, or n > MASK_ALIGN,
+ * so we're going for plan B.
+ */
+
+ /* count leading zeroes on inverted mask */
+ if (~cur_msk == 0)
+ clz = sizeof(cur_msk) * 8;
+ else
+ clz = __builtin_clzll(~cur_msk);
+
+ /* if there aren't any runs at the end either, just continue */
+ if (clz == 0)
+ continue;
+
+ /* we have a partial run at the end, so try looking ahead */
+ run_start = MASK_ALIGN - clz;
+ left -= clz;
+
+ for (lookahead_idx = msk_idx + 1; lookahead_idx < msk->n_masks;
+ lookahead_idx++) {
+ unsigned int s_idx, need;
+ lookahead_msk = msk->data[lookahead_idx];
+
+ /* if we're looking for free space, invert the mask */
+ if (!used)
+ lookahead_msk = ~lookahead_msk;
+
+ /* figure out how many consecutive bits we need here */
+ need = RTE_MIN(left, MASK_ALIGN);
+
+ for (s_idx = 0; s_idx < need - 1; s_idx++)
+ lookahead_msk &= lookahead_msk >> 1ULL;
+
+ /* if first bit is not set, we've lost the run */
+ if ((lookahead_msk & 1) == 0) {
+ /*
+ * we've scanned this far, so we know there are
+ * no runs in the space we've lookahead-scanned
+ * as well, so skip that on next iteration.
+ */
+ ignore_msk = ~((1ULL << need) - 1);
+ msk_idx = lookahead_idx;
+ break;
+ }
+
+ left -= need;
+
+ /* check if we've found what we were looking for */
+ if (left == 0) {
+ found = true;
+ break;
+ }
+ }
+
+ /* we didn't find anything, so continue */
+ if (!found)
+ continue;
+
+ return MASK_GET_IDX(msk_idx, run_start);
+ }
+ /* we didn't find anything */
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+}
+
+static int
+find_next(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int idx, first, first_mod;
+ unsigned int last, last_mod;
+ uint64_t last_msk, ignore_msk;
+
+ /*
+ * mask only has granularity of MASK_ALIGN, but start may not be aligned
+ * on that boundary, so construct a special mask to exclude anything we
+ * don't want to see to avoid confusing ctz.
+ */
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ ignore_msk = ~((1ULL << first_mod) - 1ULL);
+
+ /* array length may not be aligned, so calculate ignore mask for last
+ * mask index.
+ */
+ last = MASK_LEN_TO_IDX(arr->len);
+ last_mod = MASK_LEN_TO_MOD(arr->len);
+ last_msk = ~(-(1ULL) << last_mod);
+
+ for (idx = first; idx < msk->n_masks; idx++) {
+ uint64_t cur = msk->data[idx];
+ int found;
+
+ /* if we're looking for free entries, invert mask */
+ if (!used)
+ cur = ~cur;
+
+ if (idx == last)
+ cur &= last_msk;
+
+ /* ignore everything before start on first iteration */
+ if (idx == first)
+ cur &= ignore_msk;
+
+ /* check if we have any entries */
+ if (cur == 0)
+ continue;
+
+ /*
+ * find first set bit - that will correspond to whatever it is
+ * that we're looking for.
+ */
+ found = __builtin_ctzll(cur);
+ return MASK_GET_IDX(idx, found);
+ }
+ /* we didn't find anything */
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+}
+
+static int
+find_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int idx, first, first_mod;
+ unsigned int last, last_mod;
+ uint64_t last_msk;
+ unsigned int need_len, result = 0;
+
+ /* array length may not be aligned, so calculate ignore mask for last
+ * mask index.
+ */
+ last = MASK_LEN_TO_IDX(arr->len);
+ last_mod = MASK_LEN_TO_MOD(arr->len);
+ last_msk = ~(-(1ULL) << last_mod);
+
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ for (idx = first; idx < msk->n_masks; idx++, result += need_len) {
+ uint64_t cur = msk->data[idx];
+ unsigned int run_len;
+
+ need_len = MASK_ALIGN;
+
+ /* if we're looking for free entries, invert mask */
+ if (!used)
+ cur = ~cur;
+
+ /* if this is last mask, ignore everything after last bit */
+ if (idx == last)
+ cur &= last_msk;
+
+ /* ignore everything before start on first iteration */
+ if (idx == first) {
+ cur >>= first_mod;
+ /* at the start, we don't need the full mask len */
+ need_len -= first_mod;
+ }
+
+ /* we will be looking for zeroes, so invert the mask */
+ cur = ~cur;
+
+ /* if mask is zero, we have a complete run */
+ if (cur == 0)
+ continue;
+
+ /*
+ * see if current run ends before mask end.
+ */
+ run_len = __builtin_ctzll(cur);
+
+ /* add however many zeroes we've had in the last run and quit */
+ if (run_len < need_len) {
+ result += run_len;
+ break;
+ }
+ }
+ return result;
+}
+
+static int
+find_prev_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
+ bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int msk_idx, lookbehind_idx, first, first_mod;
+ uint64_t ignore_msk;
+
+ /*
+ * mask only has granularity of MASK_ALIGN, but start may not be aligned
+ * on that boundary, so construct a special mask to exclude anything we
+ * don't want to see to avoid confusing ctz.
+ */
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ /* we're going backwards, so mask must start from the top */
+ ignore_msk = first_mod == MASK_ALIGN - 1 ?
+ -1ULL : /* prevent overflow */
+ ~(-1ULL << (first_mod + 1));
+
+ /* go backwards, include zero */
+ msk_idx = first;
+ do {
+ uint64_t cur_msk, lookbehind_msk;
+ unsigned int run_start, run_end, ctz, left;
+ bool found = false;
+ /*
+ * The process of getting n consecutive bits from the top for
+ * arbitrary n is a bit involved, but here it is in a nutshell:
+ *
+ * 1. let n be the number of consecutive bits we're looking for
+ * 2. check if n can fit in one mask, and if so, do n-1
+ * lshift-ands to see if there is an appropriate run inside
+ * our current mask
+ * 2a. if we found a run, bail out early
+ * 2b. if we didn't find a run, proceed
+ * 3. invert the mask and count trailing zeroes (that is, count
+ * how many consecutive set bits we had starting from the
+ * start of current mask) as k
+ * 3a. if k is 0, continue to next mask
+ * 3b. if k is not 0, we have a potential run
+ * 4. to satisfy our requirements, next mask must have n-k
+ * consecutive set bits at the end, so we will do (n-k-1)
+ * lshift-ands and check if last bit is set.
+ *
+ * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until
+ * we either run out of masks, lose the run, or find what we
+ * were looking for.
+ */
+ cur_msk = msk->data[msk_idx];
+ left = n;
+
+ /* if we're looking for free spaces, invert the mask */
+ if (!used)
+ cur_msk = ~cur_msk;
+
+ /* if we have an ignore mask, ignore once */
+ if (ignore_msk) {
+ cur_msk &= ignore_msk;
+ ignore_msk = 0;
+ }
+
+ /* if n can fit in within a single mask, do a search */
+ if (n <= MASK_ALIGN) {
+ uint64_t tmp_msk = cur_msk;
+ unsigned int s_idx;
+ for (s_idx = 0; s_idx < n - 1; s_idx++)
+ tmp_msk &= tmp_msk << 1ULL;
+ /* we found what we were looking for */
+ if (tmp_msk != 0) {
+ /* clz will give us offset from end of mask, and
+ * we only get the end of our run, not start,
+ * so adjust result to point to where start
+ * would have been.
+ */
+ run_start = MASK_ALIGN -
+ __builtin_clzll(tmp_msk) - n;
+ return MASK_GET_IDX(msk_idx, run_start);
+ }
+ }
+
+ /*
+ * we didn't find our run within the mask, or n > MASK_ALIGN,
+ * so we're going for plan B.
+ */
+
+ /* count trailing zeroes on inverted mask */
+ if (~cur_msk == 0)
+ ctz = sizeof(cur_msk) * 8;
+ else
+ ctz = __builtin_ctzll(~cur_msk);
+
+ /* if there aren't any runs at the start either, just
+ * continue
+ */
+ if (ctz == 0)
+ continue;
+
+ /* we have a partial run at the start, so try looking behind */
+ run_end = MASK_GET_IDX(msk_idx, ctz);
+ left -= ctz;
+
+ /* go backwards, include zero */
+ lookbehind_idx = msk_idx - 1;
+
+ /* we can't lookbehind as we've run out of masks, so stop */
+ if (msk_idx == 0)
+ break;
+
+ do {
+ const uint64_t last_bit = 1ULL << (MASK_ALIGN - 1);
+ unsigned int s_idx, need;
+
+ lookbehind_msk = msk->data[lookbehind_idx];
+
+ /* if we're looking for free space, invert the mask */
+ if (!used)
+ lookbehind_msk = ~lookbehind_msk;
+
+ /* figure out how many consecutive bits we need here */
+ need = RTE_MIN(left, MASK_ALIGN);
+
+ for (s_idx = 0; s_idx < need - 1; s_idx++)
+ lookbehind_msk &= lookbehind_msk << 1ULL;
+
+ /* if last bit is not set, we've lost the run */
+ if ((lookbehind_msk & last_bit) == 0) {
+ /*
+ * we've scanned this far, so we know there are
+ * no runs in the space we've lookbehind-scanned
+ * as well, so skip that on next iteration.
+ */
+ ignore_msk = -1ULL << need;
+ msk_idx = lookbehind_idx;
+ break;
+ }
+
+ left -= need;
+
+ /* check if we've found what we were looking for */
+ if (left == 0) {
+ found = true;
+ break;
+ }
+ } while ((lookbehind_idx--) != 0); /* decrement after check to
+ * include zero
+ */
+
+ /* we didn't find anything, so continue */
+ if (!found)
+ continue;
+
+ /* we've found what we were looking for, but we only know where
+ * the run ended, so calculate start position.
+ */
+ return run_end - n;
+ } while (msk_idx-- != 0); /* decrement after check to include zero */
+ /* we didn't find anything */
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+}
+
+static int
+find_prev(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int idx, first, first_mod;
+ uint64_t ignore_msk;
+
+ /*
+ * mask only has granularity of MASK_ALIGN, but start may not be aligned
+ * on that boundary, so construct a special mask to exclude anything we
+ * don't want to see to avoid confusing clz.
+ */
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ /* we're going backwards, so mask must start from the top */
+ ignore_msk = first_mod == MASK_ALIGN - 1 ?
+ -1ULL : /* prevent overflow */
+ ~(-1ULL << (first_mod + 1));
+
+ /* go backwards, include zero */
+ idx = first;
+ do {
+ uint64_t cur = msk->data[idx];
+ int found;
+
+ /* if we're looking for free entries, invert mask */
+ if (!used)
+ cur = ~cur;
+
+ /* ignore everything before start on first iteration */
+ if (idx == first)
+ cur &= ignore_msk;
+
+ /* check if we have any entries */
+ if (cur == 0)
+ continue;
+
+ /*
+ * find last set bit - that will correspond to whatever it is
+ * that we're looking for. we're counting trailing zeroes, thus
+ * the value we get is counted from end of mask, so calculate
+ * position from start of mask.
+ */
+ found = MASK_ALIGN - __builtin_clzll(cur) - 1;
+
+ return MASK_GET_IDX(idx, found);
+ } while (idx-- != 0); /* decrement after check to include zero*/
+
+ /* we didn't find anything */
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+}
+
+static int
+find_rev_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int idx, first, first_mod;
+ unsigned int need_len, result = 0;
+
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+
+ /* go backwards, include zero */
+ idx = first;
+ do {
+ uint64_t cur = msk->data[idx];
+ unsigned int run_len;
+
+ need_len = MASK_ALIGN;
+
+ /* if we're looking for free entries, invert mask */
+ if (!used)
+ cur = ~cur;
+
+ /* ignore everything after start on first iteration */
+ if (idx == first) {
+ unsigned int end_len = MASK_ALIGN - first_mod - 1;
+ cur <<= end_len;
+ /* at the start, we don't need the full mask len */
+ need_len -= end_len;
+ }
+
+ /* we will be looking for zeroes, so invert the mask */
+ cur = ~cur;
+
+ /* if mask is zero, we have a complete run */
+ if (cur == 0)
+ goto endloop;
+
+ /*
+ * see where run ends, starting from the end.
+ */
+ run_len = __builtin_clzll(cur);
+
+ /* add however many zeroes we've had in the last run and quit */
+ if (run_len < need_len) {
+ result += run_len;
+ break;
+ }
+endloop:
+ result += need_len;
+ } while (idx-- != 0); /* decrement after check to include zero */
+ return result;
+}
+
+static int
+set_used(struct rte_fbarray *arr, unsigned int idx, bool used)
+{
+ struct used_mask *msk;
+ uint64_t msk_bit = 1ULL << MASK_LEN_TO_MOD(idx);
+ unsigned int msk_idx = MASK_LEN_TO_IDX(idx);
+ bool already_used;
+ int ret = -1;
+
+ if (arr == NULL || idx >= arr->len) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
+ ret = 0;
+
+ /* prevent array from changing under us */
+ rte_rwlock_write_lock(&arr->rwlock);
+
+ already_used = (msk->data[msk_idx] & msk_bit) != 0;
+
+ /* nothing to be done */
+ if (used == already_used)
+ goto out;
+
+ if (used) {
+ msk->data[msk_idx] |= msk_bit;
+ arr->count++;
+ } else {
+ msk->data[msk_idx] &= ~msk_bit;
+ arr->count--;
+ }
+out:
+ rte_rwlock_write_unlock(&arr->rwlock);
+
+ return ret;
+}
+
+static int
+fully_validate(const char *name, unsigned int elt_sz, unsigned int len)
+{
+ if (name == NULL || elt_sz == 0 || len == 0 || len > INT_MAX) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ if (strnlen(name, RTE_FBARRAY_NAME_LEN) == RTE_FBARRAY_NAME_LEN) {
+ rte_errno = ENAMETOOLONG;
+ return -1;
+ }
+ return 0;
+}
+
+int
+rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
+ unsigned int elt_sz)
+{
+ size_t page_sz, mmap_len;
+ char path[PATH_MAX];
+ struct used_mask *msk;
+ struct mem_area *ma = NULL;
+ void *data = NULL;
+ int fd = -1;
+
+ if (arr == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ if (fully_validate(name, elt_sz, len))
+ return -1;
+
+ /* allocate mem area before doing anything */
+ ma = malloc(sizeof(*ma));
+ if (ma == NULL) {
+ rte_errno = ENOMEM;
+ return -1;
+ }
+
+ page_sz = sysconf(_SC_PAGESIZE);
+ if (page_sz == (size_t)-1) {
+ free(ma);
+ return -1;
+ }
+
+ /* calculate our memory limits */
+ mmap_len = calc_data_size(page_sz, elt_sz, len);
+
+ data = eal_get_virtual_area(NULL, &mmap_len, page_sz, 0, 0);
+ if (data == NULL) {
+ free(ma);
+ return -1;
+ }
+
+ rte_spinlock_lock(&mem_area_lock);
+
+ fd = -1;
+
+ if (internal_config.no_shconf) {
+ /* remap virtual area as writable */
+ void *new_data = mmap(data, mmap_len, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, fd, 0);
+ if (new_data == MAP_FAILED) {
+ RTE_LOG(DEBUG, EAL, "%s(): couldn't remap anonymous memory: %s\n",
+ __func__, strerror(errno));
+ goto fail;
+ }
+ } else {
+ eal_get_fbarray_path(path, sizeof(path), name);
+
+ /*
+ * Each fbarray is unique to process namespace, i.e. the
+ * filename depends on process prefix. Try to take out a lock
+ * and see if we succeed. If we don't, someone else is using it
+ * already.
+ */
+ fd = open(path, O_CREAT | O_RDWR, 0600);
+ if (fd < 0) {
+ RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n",
+ __func__, path, strerror(errno));
+ rte_errno = errno;
+ goto fail;
+ } else if (flock(fd, LOCK_EX | LOCK_NB)) {
+ RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n",
+ __func__, path, strerror(errno));
+ rte_errno = EBUSY;
+ goto fail;
+ }
+
+ /* take out a non-exclusive lock, so that other processes could
+ * still attach to it, but no other process could reinitialize
+ * it.
+ */
+ if (flock(fd, LOCK_SH | LOCK_NB)) {
+ rte_errno = errno;
+ goto fail;
+ }
+
+ if (resize_and_map(fd, data, mmap_len))
+ goto fail;
+ }
+ ma->addr = data;
+ ma->len = mmap_len;
+ ma->fd = fd;
+
+ /* do not close fd - keep it until detach/destroy */
+ TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next);
+
+ /* initialize the data */
+ memset(data, 0, mmap_len);
+
+ /* populate data structure */
+ strlcpy(arr->name, name, sizeof(arr->name));
+ arr->data = data;
+ arr->len = len;
+ arr->elt_sz = elt_sz;
+ arr->count = 0;
+
+ msk = get_used_mask(data, elt_sz, len);
+ msk->n_masks = MASK_LEN_TO_IDX(RTE_ALIGN_CEIL(len, MASK_ALIGN));
+
+ rte_rwlock_init(&arr->rwlock);
+
+ rte_spinlock_unlock(&mem_area_lock);
+
+ return 0;
+fail:
+ if (data)
+ munmap(data, mmap_len);
+ if (fd >= 0)
+ close(fd);
+ free(ma);
+
+ rte_spinlock_unlock(&mem_area_lock);
+ return -1;
+}
+
+int
+rte_fbarray_attach(struct rte_fbarray *arr)
+{
+ struct mem_area *ma = NULL, *tmp = NULL;
+ size_t page_sz, mmap_len;
+ char path[PATH_MAX];
+ void *data = NULL;
+ int fd = -1;
+
+ if (arr == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ /*
+ * we don't need to synchronize attach as two values we need (element
+ * size and array length) are constant for the duration of life of
+ * the array, so the parts we care about will not race.
+ */
+
+ if (fully_validate(arr->name, arr->elt_sz, arr->len))
+ return -1;
+
+ ma = malloc(sizeof(*ma));
+ if (ma == NULL) {
+ rte_errno = ENOMEM;
+ return -1;
+ }
+
+ page_sz = sysconf(_SC_PAGESIZE);
+ if (page_sz == (size_t)-1) {
+ free(ma);
+ return -1;
+ }
+
+ mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len);
+
+ /* check the tailq - maybe user has already mapped this address space */
+ rte_spinlock_lock(&mem_area_lock);
+
+ TAILQ_FOREACH(tmp, &mem_area_tailq, next) {
+ if (overlap(tmp, arr->data, mmap_len)) {
+ rte_errno = EEXIST;
+ goto fail;
+ }
+ }
+
+ /* we know this memory area is unique, so proceed */
+
+ data = eal_get_virtual_area(arr->data, &mmap_len, page_sz, 0, 0);
+ if (data == NULL)
+ goto fail;
+
+ eal_get_fbarray_path(path, sizeof(path), arr->name);
+
+ fd = open(path, O_RDWR);
+ if (fd < 0) {
+ rte_errno = errno;
+ goto fail;
+ }
+
+ /* lock the file, to let others know we're using it */
+ if (flock(fd, LOCK_SH | LOCK_NB)) {
+ rte_errno = errno;
+ goto fail;
+ }
+
+ if (resize_and_map(fd, data, mmap_len))
+ goto fail;
+
+ /* store our new memory area */
+ ma->addr = data;
+ ma->fd = fd; /* keep fd until detach/destroy */
+ ma->len = mmap_len;
+
+ TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next);
+
+ /* we're done */
+
+ rte_spinlock_unlock(&mem_area_lock);
+ return 0;
+fail:
+ if (data)
+ munmap(data, mmap_len);
+ if (fd >= 0)
+ close(fd);
+ free(ma);
+ rte_spinlock_unlock(&mem_area_lock);
+ return -1;
+}
+
+int
+rte_fbarray_detach(struct rte_fbarray *arr)
+{
+ struct mem_area *tmp = NULL;
+ size_t mmap_len;
+ int ret = -1;
+
+ if (arr == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ /*
+ * we don't need to synchronize detach as two values we need (element
+ * size and total capacity) are constant for the duration of life of
+ * the array, so the parts we care about will not race. if the user is
+ * detaching while doing something else in the same process, we can't
+ * really do anything about it, things will blow up either way.
+ */
+
+ size_t page_sz = sysconf(_SC_PAGESIZE);
+
+ if (page_sz == (size_t)-1)
+ return -1;
+
+ mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len);
+
+ /* does this area exist? */
+ rte_spinlock_lock(&mem_area_lock);
+
+ TAILQ_FOREACH(tmp, &mem_area_tailq, next) {
+ if (tmp->addr == arr->data && tmp->len == mmap_len)
+ break;
+ }
+ if (tmp == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto out;
+ }
+
+ munmap(arr->data, mmap_len);
+
+ /* area is unmapped, close fd and remove the tailq entry */
+ if (tmp->fd >= 0)
+ close(tmp->fd);
+ TAILQ_REMOVE(&mem_area_tailq, tmp, next);
+ free(tmp);
+
+ ret = 0;
+out:
+ rte_spinlock_unlock(&mem_area_lock);
+ return ret;
+}
+
+int
+rte_fbarray_destroy(struct rte_fbarray *arr)
+{
+ struct mem_area *tmp = NULL;
+ size_t mmap_len;
+ int fd, ret;
+ char path[PATH_MAX];
+
+ if (arr == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ /*
+ * we don't need to synchronize detach as two values we need (element
+ * size and total capacity) are constant for the duration of life of
+ * the array, so the parts we care about will not race. if the user is
+ * detaching while doing something else in the same process, we can't
+ * really do anything about it, things will blow up either way.
+ */
+
+ size_t page_sz = sysconf(_SC_PAGESIZE);
+
+ if (page_sz == (size_t)-1)
+ return -1;
+
+ mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len);
+
+ /* does this area exist? */
+ rte_spinlock_lock(&mem_area_lock);
+
+ TAILQ_FOREACH(tmp, &mem_area_tailq, next) {
+ if (tmp->addr == arr->data && tmp->len == mmap_len)
+ break;
+ }
+ if (tmp == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto out;
+ }
+ /* with no shconf, there were never any files to begin with */
+ if (!internal_config.no_shconf) {
+ /*
+ * attempt to get an exclusive lock on the file, to ensure it
+ * has been detached by all other processes
+ */
+ fd = tmp->fd;
+ if (flock(fd, LOCK_EX | LOCK_NB)) {
+ RTE_LOG(DEBUG, EAL, "Cannot destroy fbarray - another process is using it\n");
+ rte_errno = EBUSY;
+ ret = -1;
+ goto out;
+ }
+
+ /* we're OK to destroy the file */
+ eal_get_fbarray_path(path, sizeof(path), arr->name);
+ if (unlink(path)) {
+ RTE_LOG(DEBUG, EAL, "Cannot unlink fbarray: %s\n",
+ strerror(errno));
+ rte_errno = errno;
+ /*
+ * we're still holding an exclusive lock, so drop it to
+ * shared.
+ */
+ flock(fd, LOCK_SH | LOCK_NB);
+
+ ret = -1;
+ goto out;
+ }
+ close(fd);
+ }
+ munmap(arr->data, mmap_len);
+
+ /* area is unmapped, remove the tailq entry */
+ TAILQ_REMOVE(&mem_area_tailq, tmp, next);
+ free(tmp);
+ ret = 0;
+
+ /* reset the fbarray structure */
+ memset(arr, 0, sizeof(*arr));
+out:
+ rte_spinlock_unlock(&mem_area_lock);
+ return ret;
+}
+
+void *
+rte_fbarray_get(const struct rte_fbarray *arr, unsigned int idx)
+{
+ void *ret = NULL;
+ if (arr == NULL) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ if (idx >= arr->len) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ ret = RTE_PTR_ADD(arr->data, idx * arr->elt_sz);
+
+ return ret;
+}
+
+int
+rte_fbarray_set_used(struct rte_fbarray *arr, unsigned int idx)
+{
+ return set_used(arr, idx, true);
+}
+
+int
+rte_fbarray_set_free(struct rte_fbarray *arr, unsigned int idx)
+{
+ return set_used(arr, idx, false);
+}
+
+int
+rte_fbarray_is_used(struct rte_fbarray *arr, unsigned int idx)
+{
+ struct used_mask *msk;
+ int msk_idx;
+ uint64_t msk_bit;
+ int ret = -1;
+
+ if (arr == NULL || idx >= arr->len) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ /* prevent array from changing under us */
+ rte_rwlock_read_lock(&arr->rwlock);
+
+ msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
+ msk_idx = MASK_LEN_TO_IDX(idx);
+ msk_bit = 1ULL << MASK_LEN_TO_MOD(idx);
+
+ ret = (msk->data[msk_idx] & msk_bit) != 0;
+
+ rte_rwlock_read_unlock(&arr->rwlock);
+
+ return ret;
+}
+
+static int
+fbarray_find(struct rte_fbarray *arr, unsigned int start, bool next, bool used)
+{
+ int ret = -1;
+
+ if (arr == NULL || start >= arr->len) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ /* prevent array from changing under us */
+ rte_rwlock_read_lock(&arr->rwlock);
+
+ /* cheap checks to prevent doing useless work */
+ if (!used) {
+ if (arr->len == arr->count) {
+ rte_errno = ENOSPC;
+ goto out;
+ }
+ if (arr->count == 0) {
+ ret = start;
+ goto out;
+ }
+ } else {
+ if (arr->count == 0) {
+ rte_errno = ENOENT;
+ goto out;
+ }
+ if (arr->len == arr->count) {
+ ret = start;
+ goto out;
+ }
+ }
+ if (next)
+ ret = find_next(arr, start, used);
+ else
+ ret = find_prev(arr, start, used);
+out:
+ rte_rwlock_read_unlock(&arr->rwlock);
+ return ret;
+}
+
+int
+rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find(arr, start, true, false);
+}
+
+int
+rte_fbarray_find_next_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find(arr, start, true, true);
+}
+
+int
+rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find(arr, start, false, false);
+}
+
+int
+rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find(arr, start, false, true);
+}
+
+static int
+fbarray_find_n(struct rte_fbarray *arr, unsigned int start, unsigned int n,
+ bool next, bool used)
+{
+ int ret = -1;
+
+ if (arr == NULL || start >= arr->len || n > arr->len || n == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ if (next && (arr->len - start) < n) {
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+ }
+ if (!next && start < (n - 1)) {
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+ }
+
+ /* prevent array from changing under us */
+ rte_rwlock_read_lock(&arr->rwlock);
+
+ /* cheap checks to prevent doing useless work */
+ if (!used) {
+ if (arr->len == arr->count || arr->len - arr->count < n) {
+ rte_errno = ENOSPC;
+ goto out;
+ }
+ if (arr->count == 0) {
+ ret = next ? start : start - n + 1;
+ goto out;
+ }
+ } else {
+ if (arr->count < n) {
+ rte_errno = ENOENT;
+ goto out;
+ }
+ if (arr->count == arr->len) {
+ ret = next ? start : start - n + 1;
+ goto out;
+ }
+ }
+
+ if (next)
+ ret = find_next_n(arr, start, n, used);
+ else
+ ret = find_prev_n(arr, start, n, used);
+out:
+ rte_rwlock_read_unlock(&arr->rwlock);
+ return ret;
+}
+
+int
+rte_fbarray_find_next_n_free(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n)
+{
+ return fbarray_find_n(arr, start, n, true, false);
+}
+
+int
+rte_fbarray_find_next_n_used(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n)
+{
+ return fbarray_find_n(arr, start, n, true, true);
+}
+
+int
+rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n)
+{
+ return fbarray_find_n(arr, start, n, false, false);
+}
+
+int
+rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n)
+{
+ return fbarray_find_n(arr, start, n, false, true);
+}
+
+static int
+fbarray_find_contig(struct rte_fbarray *arr, unsigned int start, bool next,
+ bool used)
+{
+ int ret = -1;
+
+ if (arr == NULL || start >= arr->len) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ /* prevent array from changing under us */
+ rte_rwlock_read_lock(&arr->rwlock);
+
+ /* cheap checks to prevent doing useless work */
+ if (used) {
+ if (arr->count == 0) {
+ ret = 0;
+ goto out;
+ }
+ if (next && arr->count == arr->len) {
+ ret = arr->len - start;
+ goto out;
+ }
+ if (!next && arr->count == arr->len) {
+ ret = start + 1;
+ goto out;
+ }
+ } else {
+ if (arr->len == arr->count) {
+ ret = 0;
+ goto out;
+ }
+ if (next && arr->count == 0) {
+ ret = arr->len - start;
+ goto out;
+ }
+ if (!next && arr->count == 0) {
+ ret = start + 1;
+ goto out;
+ }
+ }
+
+ if (next)
+ ret = find_contig(arr, start, used);
+ else
+ ret = find_rev_contig(arr, start, used);
+out:
+ rte_rwlock_read_unlock(&arr->rwlock);
+ return ret;
+}
+
+static int
+fbarray_find_biggest(struct rte_fbarray *arr, unsigned int start, bool used,
+ bool rev)
+{
+ int cur_idx, next_idx, cur_len, biggest_idx, biggest_len;
+ /* don't stack if conditions, use function pointers instead */
+ int (*find_func)(struct rte_fbarray *, unsigned int);
+ int (*find_contig_func)(struct rte_fbarray *, unsigned int);
+
+ if (arr == NULL || start >= arr->len) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ /* the other API calls already do their fair share of cheap checks, so
+ * no need to do them here.
+ */
+
+ /* the API's called are thread-safe, but something may still happen
+ * between the API calls, so lock the fbarray. all other API's are
+ * read-locking the fbarray, so read lock here is OK.
+ */
+ rte_rwlock_read_lock(&arr->rwlock);
+
+ /* pick out appropriate functions */
+ if (used) {
+ if (rev) {
+ find_func = rte_fbarray_find_prev_used;
+ find_contig_func = rte_fbarray_find_rev_contig_used;
+ } else {
+ find_func = rte_fbarray_find_next_used;
+ find_contig_func = rte_fbarray_find_contig_used;
+ }
+ } else {
+ if (rev) {
+ find_func = rte_fbarray_find_prev_free;
+ find_contig_func = rte_fbarray_find_rev_contig_free;
+ } else {
+ find_func = rte_fbarray_find_next_free;
+ find_contig_func = rte_fbarray_find_contig_free;
+ }
+ }
+
+ cur_idx = start;
+ biggest_idx = -1; /* default is error */
+ biggest_len = 0;
+ for (;;) {
+ cur_idx = find_func(arr, cur_idx);
+
+ /* block found, check its length */
+ if (cur_idx >= 0) {
+ cur_len = find_contig_func(arr, cur_idx);
+ /* decide where we go next */
+ next_idx = rev ? cur_idx - cur_len : cur_idx + cur_len;
+ /* move current index to start of chunk */
+ cur_idx = rev ? next_idx + 1 : cur_idx;
+
+ if (cur_len > biggest_len) {
+ biggest_idx = cur_idx;
+ biggest_len = cur_len;
+ }
+ cur_idx = next_idx;
+ /* in reverse mode, next_idx may be -1 if chunk started
+ * at array beginning. this means there's no more work
+ * to do.
+ */
+ if (cur_idx < 0)
+ break;
+ } else {
+ /* nothing more to find, stop. however, a failed API
+ * call has set rte_errno, which we want to ignore, as
+ * reaching the end of fbarray is not an error.
+ */
+ rte_errno = 0;
+ break;
+ }
+ }
+ /* if we didn't find anything at all, set rte_errno */
+ if (biggest_idx < 0)
+ rte_errno = used ? ENOENT : ENOSPC;
+
+ rte_rwlock_read_unlock(&arr->rwlock);
+ return biggest_idx;
+}
+
+int
+rte_fbarray_find_biggest_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_biggest(arr, start, false, false);
+}
+
+int
+rte_fbarray_find_biggest_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_biggest(arr, start, true, false);
+}
+
+int
+rte_fbarray_find_rev_biggest_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_biggest(arr, start, false, true);
+}
+
+int
+rte_fbarray_find_rev_biggest_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_biggest(arr, start, true, true);
+}
+
+
+int
+rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_contig(arr, start, true, false);
+}
+
+int
+rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_contig(arr, start, true, true);
+}
+
+int
+rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_contig(arr, start, false, false);
+}
+
+int
+rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_contig(arr, start, false, true);
+}
+
+int
+rte_fbarray_find_idx(const struct rte_fbarray *arr, const void *elt)
+{
+ void *end;
+ int ret = -1;
+
+ /*
+ * no need to synchronize as it doesn't matter if underlying data
+ * changes - we're doing pointer arithmetic here.
+ */
+
+ if (arr == NULL || elt == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ end = RTE_PTR_ADD(arr->data, arr->elt_sz * arr->len);
+ if (elt < arr->data || elt >= end) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ ret = RTE_PTR_DIFF(elt, arr->data) / arr->elt_sz;
+
+ return ret;
+}
+
+void
+rte_fbarray_dump_metadata(struct rte_fbarray *arr, FILE *f)
+{
+ struct used_mask *msk;
+ unsigned int i;
+
+ if (arr == NULL || f == NULL) {
+ rte_errno = EINVAL;
+ return;
+ }
+
+ if (fully_validate(arr->name, arr->elt_sz, arr->len)) {
+ fprintf(f, "Invalid file-backed array\n");
+ goto out;
+ }
+
+ /* prevent array from changing under us */
+ rte_rwlock_read_lock(&arr->rwlock);
+
+ fprintf(f, "File-backed array: %s\n", arr->name);
+ fprintf(f, "size: %i occupied: %i elt_sz: %i\n",
+ arr->len, arr->count, arr->elt_sz);
+
+ msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
+
+ for (i = 0; i < msk->n_masks; i++)
+ fprintf(f, "msk idx %i: 0x%016" PRIx64 "\n", i, msk->data[i]);
+out:
+ rte_rwlock_read_unlock(&arr->rwlock);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_hexdump.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_hexdump.c
new file mode 100644
index 000000000..2d2179d41
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_hexdump.c
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdint.h>
+#include <rte_hexdump.h>
+#include <rte_string_fns.h>
+
+#define LINE_LEN 128
+
+void
+rte_hexdump(FILE *f, const char *title, const void *buf, unsigned int len)
+{
+ unsigned int i, out, ofs;
+ const unsigned char *data = buf;
+ char line[LINE_LEN]; /* space needed 8+16*3+3+16 == 75 */
+
+ fprintf(f, "%s at [%p], len=%u\n",
+ title ? : " Dump data", data, len);
+ ofs = 0;
+ while (ofs < len) {
+ /* format the line in the buffer */
+ out = snprintf(line, LINE_LEN, "%08X:", ofs);
+ for (i = 0; i < 16; i++) {
+ if (ofs + i < len)
+ snprintf(line + out, LINE_LEN - out,
+ " %02X", (data[ofs + i] & 0xff));
+ else
+ strcpy(line + out, " ");
+ out += 3;
+ }
+
+
+ for (; i <= 16; i++)
+ out += snprintf(line + out, LINE_LEN - out, " | ");
+
+ for (i = 0; ofs < len && i < 16; i++, ofs++) {
+ unsigned char c = data[ofs];
+
+ if (c < ' ' || c > '~')
+ c = '.';
+ out += snprintf(line + out, LINE_LEN - out, "%c", c);
+ }
+ fprintf(f, "%s\n", line);
+ }
+ fflush(f);
+}
+
+void
+rte_memdump(FILE *f, const char *title, const void *buf, unsigned int len)
+{
+ unsigned int i, out;
+ const unsigned char *data = buf;
+ char line[LINE_LEN];
+
+ if (title)
+ fprintf(f, "%s: ", title);
+
+ line[0] = '\0';
+ for (i = 0, out = 0; i < len; i++) {
+ /* Make sure we do not overrun the line buffer length. */
+ if (out >= LINE_LEN - 4) {
+ fprintf(f, "%s", line);
+ out = 0;
+ line[out] = '\0';
+ }
+ out += snprintf(line + out, LINE_LEN - out, "%02x%s",
+ (data[i] & 0xff), ((i + 1) < len) ? ":" : "");
+ }
+ if (out > 0)
+ fprintf(f, "%s", line);
+ fprintf(f, "\n");
+
+ fflush(f);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_hypervisor.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_hypervisor.c
new file mode 100644
index 000000000..5388b81a5
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_hypervisor.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 Mellanox Technologies, Ltd
+ */
+
+#include "rte_hypervisor.h"
+
+const char *
+rte_hypervisor_get_name(enum rte_hypervisor id)
+{
+ switch (id) {
+ case RTE_HYPERVISOR_NONE:
+ return "none";
+ case RTE_HYPERVISOR_KVM:
+ return "KVM";
+ case RTE_HYPERVISOR_HYPERV:
+ return "Hyper-V";
+ case RTE_HYPERVISOR_VMWARE:
+ return "VMware";
+ default:
+ return "unknown";
+ }
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_launch.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_launch.c
new file mode 100644
index 000000000..cf52d717f
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_launch.c
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/queue.h>
+
+#include <rte_launch.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_atomic.h>
+#include <rte_pause.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+
+#include "eal_private.h"
+
+/*
+ * Wait until a lcore finished its job.
+ */
+int
+rte_eal_wait_lcore(unsigned slave_id)
+{
+ if (lcore_config[slave_id].state == WAIT)
+ return 0;
+
+ while (lcore_config[slave_id].state != WAIT &&
+ lcore_config[slave_id].state != FINISHED)
+ rte_pause();
+
+ rte_rmb();
+
+ /* we are in finished state, go to wait state */
+ lcore_config[slave_id].state = WAIT;
+ return lcore_config[slave_id].ret;
+}
+
+/*
+ * Check that every SLAVE lcores are in WAIT state, then call
+ * rte_eal_remote_launch() for all of them. If call_master is true
+ * (set to CALL_MASTER), also call the function on the master lcore.
+ */
+int
+rte_eal_mp_remote_launch(int (*f)(void *), void *arg,
+ enum rte_rmt_call_master_t call_master)
+{
+ int lcore_id;
+ int master = rte_get_master_lcore();
+
+ /* check state of lcores */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (lcore_config[lcore_id].state != WAIT)
+ return -EBUSY;
+ }
+
+ /* send messages to cores */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ rte_eal_remote_launch(f, arg, lcore_id);
+ }
+
+ if (call_master == CALL_MASTER) {
+ lcore_config[master].ret = f(arg);
+ lcore_config[master].state = FINISHED;
+ }
+
+ return 0;
+}
+
+/*
+ * Return the state of the lcore identified by slave_id.
+ */
+enum rte_lcore_state_t
+rte_eal_get_lcore_state(unsigned lcore_id)
+{
+ return lcore_config[lcore_id].state;
+}
+
+/*
+ * Do a rte_eal_wait_lcore() for every lcore. The return values are
+ * ignored.
+ */
+void
+rte_eal_mp_wait_lcore(void)
+{
+ unsigned lcore_id;
+
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ rte_eal_wait_lcore(lcore_id);
+ }
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_lcore.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_lcore.c
new file mode 100644
index 000000000..5404922a8
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_lcore.c
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <unistd.h>
+#include <limits.h>
+#include <string.h>
+
+#include <rte_errno.h>
+#include <rte_log.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+unsigned int rte_get_master_lcore(void)
+{
+ return rte_eal_get_configuration()->master_lcore;
+}
+
+unsigned int rte_lcore_count(void)
+{
+ return rte_eal_get_configuration()->lcore_count;
+}
+
+int rte_lcore_index(int lcore_id)
+{
+ if (unlikely(lcore_id >= RTE_MAX_LCORE))
+ return -1;
+
+ if (lcore_id < 0)
+ lcore_id = (int)rte_lcore_id();
+
+ return lcore_config[lcore_id].core_index;
+}
+
+int rte_lcore_to_cpu_id(int lcore_id)
+{
+ if (unlikely(lcore_id >= RTE_MAX_LCORE))
+ return -1;
+
+ if (lcore_id < 0)
+ lcore_id = (int)rte_lcore_id();
+
+ return lcore_config[lcore_id].core_id;
+}
+
+rte_cpuset_t rte_lcore_cpuset(unsigned int lcore_id)
+{
+ return lcore_config[lcore_id].cpuset;
+}
+
+enum rte_lcore_role_t
+rte_eal_lcore_role(unsigned int lcore_id)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ if (lcore_id >= RTE_MAX_LCORE)
+ return ROLE_OFF;
+ return cfg->lcore_role[lcore_id];
+}
+
+int rte_lcore_is_enabled(unsigned int lcore_id)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ if (lcore_id >= RTE_MAX_LCORE)
+ return 0;
+ return cfg->lcore_role[lcore_id] == ROLE_RTE;
+}
+
+unsigned int rte_get_next_lcore(unsigned int i, int skip_master, int wrap)
+{
+ i++;
+ if (wrap)
+ i %= RTE_MAX_LCORE;
+
+ while (i < RTE_MAX_LCORE) {
+ if (!rte_lcore_is_enabled(i) ||
+ (skip_master && (i == rte_get_master_lcore()))) {
+ i++;
+ if (wrap)
+ i %= RTE_MAX_LCORE;
+ continue;
+ }
+ break;
+ }
+ return i;
+}
+
+unsigned int
+rte_lcore_to_socket_id(unsigned int lcore_id)
+{
+ return lcore_config[lcore_id].socket_id;
+}
+
+static int
+socket_id_cmp(const void *a, const void *b)
+{
+ const int *lcore_id_a = a;
+ const int *lcore_id_b = b;
+
+ if (*lcore_id_a < *lcore_id_b)
+ return -1;
+ if (*lcore_id_a > *lcore_id_b)
+ return 1;
+ return 0;
+}
+
+/*
+ * Parse /sys/devices/system/cpu to get the number of physical and logical
+ * processors on the machine. The function will fill the cpu_info
+ * structure.
+ */
+int
+rte_eal_cpu_init(void)
+{
+ /* pointer to global configuration */
+ struct rte_config *config = rte_eal_get_configuration();
+ unsigned lcore_id;
+ unsigned count = 0;
+ unsigned int socket_id, prev_socket_id;
+ int lcore_to_socket_id[RTE_MAX_LCORE];
+
+ /*
+ * Parse the maximum set of logical cores, detect the subset of running
+ * ones and enable them by default.
+ */
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ lcore_config[lcore_id].core_index = count;
+
+ /* init cpuset for per lcore config */
+ CPU_ZERO(&lcore_config[lcore_id].cpuset);
+
+ /* find socket first */
+ socket_id = eal_cpu_socket_id(lcore_id);
+ lcore_to_socket_id[lcore_id] = socket_id;
+
+ if (eal_cpu_detected(lcore_id) == 0) {
+ config->lcore_role[lcore_id] = ROLE_OFF;
+ lcore_config[lcore_id].core_index = -1;
+ continue;
+ }
+
+ /* By default, lcore 1:1 map to cpu id */
+ CPU_SET(lcore_id, &lcore_config[lcore_id].cpuset);
+
+ /* By default, each detected core is enabled */
+ config->lcore_role[lcore_id] = ROLE_RTE;
+ lcore_config[lcore_id].core_role = ROLE_RTE;
+ lcore_config[lcore_id].core_id = eal_cpu_core_id(lcore_id);
+ lcore_config[lcore_id].socket_id = socket_id;
+ RTE_LOG(DEBUG, EAL, "Detected lcore %u as "
+ "core %u on socket %u\n",
+ lcore_id, lcore_config[lcore_id].core_id,
+ lcore_config[lcore_id].socket_id);
+ count++;
+ }
+ for (; lcore_id < CPU_SETSIZE; lcore_id++) {
+ if (eal_cpu_detected(lcore_id) == 0)
+ continue;
+ RTE_LOG(DEBUG, EAL, "Skipped lcore %u as core %u on socket %u\n",
+ lcore_id, eal_cpu_core_id(lcore_id),
+ eal_cpu_socket_id(lcore_id));
+ }
+
+ /* Set the count of enabled logical cores of the EAL configuration */
+ config->lcore_count = count;
+ RTE_LOG(DEBUG, EAL,
+ "Support maximum %u logical core(s) by configuration.\n",
+ RTE_MAX_LCORE);
+ RTE_LOG(INFO, EAL, "Detected %u lcore(s)\n", config->lcore_count);
+
+ /* sort all socket id's in ascending order */
+ qsort(lcore_to_socket_id, RTE_DIM(lcore_to_socket_id),
+ sizeof(lcore_to_socket_id[0]), socket_id_cmp);
+
+ prev_socket_id = -1;
+ config->numa_node_count = 0;
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ socket_id = lcore_to_socket_id[lcore_id];
+ if (socket_id != prev_socket_id)
+ config->numa_nodes[config->numa_node_count++] =
+ socket_id;
+ prev_socket_id = socket_id;
+ }
+ RTE_LOG(INFO, EAL, "Detected %u NUMA nodes\n", config->numa_node_count);
+
+ return 0;
+}
+
+unsigned int
+rte_socket_count(void)
+{
+ const struct rte_config *config = rte_eal_get_configuration();
+ return config->numa_node_count;
+}
+
+int
+rte_socket_id_by_idx(unsigned int idx)
+{
+ const struct rte_config *config = rte_eal_get_configuration();
+ if (idx >= config->numa_node_count) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ return config->numa_nodes[idx];
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_log.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_log.c
new file mode 100644
index 000000000..8835c8fff
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_log.c
@@ -0,0 +1,481 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <regex.h>
+#include <fnmatch.h>
+
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_per_lcore.h>
+
+#include "eal_private.h"
+
+/* global log structure */
+struct rte_logs rte_logs = {
+ .type = ~0,
+ .level = RTE_LOG_DEBUG,
+ .file = NULL,
+};
+
+struct rte_eal_opt_loglevel {
+ /** Next list entry */
+ TAILQ_ENTRY(rte_eal_opt_loglevel) next;
+ /** Compiled regular expression obtained from the option */
+ regex_t re_match;
+ /** Globbing pattern option */
+ char *pattern;
+ /** Log level value obtained from the option */
+ uint32_t level;
+};
+
+TAILQ_HEAD(rte_eal_opt_loglevel_list, rte_eal_opt_loglevel);
+
+/** List of valid EAL log level options */
+static struct rte_eal_opt_loglevel_list opt_loglevel_list =
+ TAILQ_HEAD_INITIALIZER(opt_loglevel_list);
+
+/* Stream to use for logging if rte_logs.file is NULL */
+static FILE *default_log_stream;
+
+/**
+ * This global structure stores some information about the message
+ * that is currently being processed by one lcore
+ */
+struct log_cur_msg {
+ uint32_t loglevel; /**< log level - see rte_log.h */
+ uint32_t logtype; /**< log type - see rte_log.h */
+};
+
+struct rte_log_dynamic_type {
+ const char *name;
+ uint32_t loglevel;
+};
+
+ /* per core log */
+static RTE_DEFINE_PER_LCORE(struct log_cur_msg, log_cur_msg);
+
+/* default logs */
+
+/* Change the stream that will be used by logging system */
+int
+rte_openlog_stream(FILE *f)
+{
+ rte_logs.file = f;
+ return 0;
+}
+
+FILE *
+rte_log_get_stream(void)
+{
+ FILE *f = rte_logs.file;
+
+ if (f == NULL) {
+ /*
+ * Grab the current value of stderr here, rather than
+ * just initializing default_log_stream to stderr. This
+ * ensures that we will always use the current value
+ * of stderr, even if the application closes and
+ * reopens it.
+ */
+ return default_log_stream ? : stderr;
+ }
+ return f;
+}
+
+/* Set global log level */
+void
+rte_log_set_global_level(uint32_t level)
+{
+ rte_logs.level = (uint32_t)level;
+}
+
+/* Get global log level */
+uint32_t
+rte_log_get_global_level(void)
+{
+ return rte_logs.level;
+}
+
+int
+rte_log_get_level(uint32_t type)
+{
+ if (type >= rte_logs.dynamic_types_len)
+ return -1;
+
+ return rte_logs.dynamic_types[type].loglevel;
+}
+
+bool
+rte_log_can_log(uint32_t logtype, uint32_t level)
+{
+ int log_level;
+
+ if (level > rte_log_get_global_level())
+ return false;
+
+ log_level = rte_log_get_level(logtype);
+ if (log_level < 0)
+ return false;
+
+ if (level > (uint32_t)log_level)
+ return false;
+
+ return true;
+}
+
+int
+rte_log_set_level(uint32_t type, uint32_t level)
+{
+ if (type >= rte_logs.dynamic_types_len)
+ return -1;
+ if (level > RTE_LOG_DEBUG)
+ return -1;
+
+ rte_logs.dynamic_types[type].loglevel = level;
+
+ return 0;
+}
+
+/* set log level by regular expression */
+int
+rte_log_set_level_regexp(const char *regex, uint32_t level)
+{
+ regex_t r;
+ size_t i;
+
+ if (level > RTE_LOG_DEBUG)
+ return -1;
+
+ if (regcomp(&r, regex, 0) != 0)
+ return -1;
+
+ for (i = 0; i < rte_logs.dynamic_types_len; i++) {
+ if (rte_logs.dynamic_types[i].name == NULL)
+ continue;
+ if (regexec(&r, rte_logs.dynamic_types[i].name, 0,
+ NULL, 0) == 0)
+ rte_logs.dynamic_types[i].loglevel = level;
+ }
+
+ regfree(&r);
+
+ return 0;
+}
+
+/*
+ * Save the type string and the loglevel for later dynamic
+ * logtypes which may register later.
+ */
+static int rte_log_save_level(int priority,
+ const char *regex, const char *pattern)
+{
+ struct rte_eal_opt_loglevel *opt_ll = NULL;
+
+ opt_ll = malloc(sizeof(*opt_ll));
+ if (opt_ll == NULL)
+ goto fail;
+
+ opt_ll->level = priority;
+
+ if (regex) {
+ opt_ll->pattern = NULL;
+ if (regcomp(&opt_ll->re_match, regex, 0) != 0)
+ goto fail;
+ } else if (pattern) {
+ opt_ll->pattern = strdup(pattern);
+ if (opt_ll->pattern == NULL)
+ goto fail;
+ } else
+ goto fail;
+
+ TAILQ_INSERT_HEAD(&opt_loglevel_list, opt_ll, next);
+ return 0;
+fail:
+ free(opt_ll);
+ return -1;
+}
+
+int rte_log_save_regexp(const char *regex, int tmp)
+{
+ return rte_log_save_level(tmp, regex, NULL);
+}
+
+/* set log level based on globbing pattern */
+int
+rte_log_set_level_pattern(const char *pattern, uint32_t level)
+{
+ size_t i;
+
+ if (level > RTE_LOG_DEBUG)
+ return -1;
+
+ for (i = 0; i < rte_logs.dynamic_types_len; i++) {
+ if (rte_logs.dynamic_types[i].name == NULL)
+ continue;
+
+ if (fnmatch(pattern, rte_logs.dynamic_types[i].name, 0) == 0)
+ rte_logs.dynamic_types[i].loglevel = level;
+ }
+
+ return 0;
+}
+
+int rte_log_save_pattern(const char *pattern, int priority)
+{
+ return rte_log_save_level(priority, NULL, pattern);
+}
+
+/* get the current loglevel for the message being processed */
+int rte_log_cur_msg_loglevel(void)
+{
+ return RTE_PER_LCORE(log_cur_msg).loglevel;
+}
+
+/* get the current logtype for the message being processed */
+int rte_log_cur_msg_logtype(void)
+{
+ return RTE_PER_LCORE(log_cur_msg).logtype;
+}
+
+static int
+rte_log_lookup(const char *name)
+{
+ size_t i;
+
+ for (i = 0; i < rte_logs.dynamic_types_len; i++) {
+ if (rte_logs.dynamic_types[i].name == NULL)
+ continue;
+ if (strcmp(name, rte_logs.dynamic_types[i].name) == 0)
+ return i;
+ }
+
+ return -1;
+}
+
+/* register an extended log type, assuming table is large enough, and id
+ * is not yet registered.
+ */
+static int
+__rte_log_register(const char *name, int id)
+{
+ char *dup_name = strdup(name);
+
+ if (dup_name == NULL)
+ return -ENOMEM;
+
+ rte_logs.dynamic_types[id].name = dup_name;
+ rte_logs.dynamic_types[id].loglevel = RTE_LOG_INFO;
+
+ return id;
+}
+
+/* register an extended log type */
+int
+rte_log_register(const char *name)
+{
+ struct rte_log_dynamic_type *new_dynamic_types;
+ int id, ret;
+
+ id = rte_log_lookup(name);
+ if (id >= 0)
+ return id;
+
+ new_dynamic_types = realloc(rte_logs.dynamic_types,
+ sizeof(struct rte_log_dynamic_type) *
+ (rte_logs.dynamic_types_len + 1));
+ if (new_dynamic_types == NULL)
+ return -ENOMEM;
+ rte_logs.dynamic_types = new_dynamic_types;
+
+ ret = __rte_log_register(name, rte_logs.dynamic_types_len);
+ if (ret < 0)
+ return ret;
+
+ rte_logs.dynamic_types_len++;
+
+ return ret;
+}
+
+/* Register an extended log type and try to pick its level from EAL options */
+int
+rte_log_register_type_and_pick_level(const char *name, uint32_t level_def)
+{
+ struct rte_eal_opt_loglevel *opt_ll;
+ uint32_t level = level_def;
+ int type;
+
+ type = rte_log_register(name);
+ if (type < 0)
+ return type;
+
+ TAILQ_FOREACH(opt_ll, &opt_loglevel_list, next) {
+ if (opt_ll->level > RTE_LOG_DEBUG)
+ continue;
+
+ if (opt_ll->pattern) {
+ if (fnmatch(opt_ll->pattern, name, 0) == 0)
+ level = opt_ll->level;
+ } else {
+ if (regexec(&opt_ll->re_match, name, 0, NULL, 0) == 0)
+ level = opt_ll->level;
+ }
+ }
+
+ rte_logs.dynamic_types[type].loglevel = level;
+
+ return type;
+}
+
+struct logtype {
+ uint32_t log_id;
+ const char *logtype;
+};
+
+static const struct logtype logtype_strings[] = {
+ {RTE_LOGTYPE_EAL, "lib.eal"},
+ {RTE_LOGTYPE_MALLOC, "lib.malloc"},
+ {RTE_LOGTYPE_RING, "lib.ring"},
+ {RTE_LOGTYPE_MEMPOOL, "lib.mempool"},
+ {RTE_LOGTYPE_TIMER, "lib.timer"},
+ {RTE_LOGTYPE_PMD, "pmd"},
+ {RTE_LOGTYPE_HASH, "lib.hash"},
+ {RTE_LOGTYPE_LPM, "lib.lpm"},
+ {RTE_LOGTYPE_KNI, "lib.kni"},
+ {RTE_LOGTYPE_ACL, "lib.acl"},
+ {RTE_LOGTYPE_POWER, "lib.power"},
+ {RTE_LOGTYPE_METER, "lib.meter"},
+ {RTE_LOGTYPE_SCHED, "lib.sched"},
+ {RTE_LOGTYPE_PORT, "lib.port"},
+ {RTE_LOGTYPE_TABLE, "lib.table"},
+ {RTE_LOGTYPE_PIPELINE, "lib.pipeline"},
+ {RTE_LOGTYPE_MBUF, "lib.mbuf"},
+ {RTE_LOGTYPE_CRYPTODEV, "lib.cryptodev"},
+ {RTE_LOGTYPE_EFD, "lib.efd"},
+ {RTE_LOGTYPE_EVENTDEV, "lib.eventdev"},
+ {RTE_LOGTYPE_GSO, "lib.gso"},
+ {RTE_LOGTYPE_USER1, "user1"},
+ {RTE_LOGTYPE_USER2, "user2"},
+ {RTE_LOGTYPE_USER3, "user3"},
+ {RTE_LOGTYPE_USER4, "user4"},
+ {RTE_LOGTYPE_USER5, "user5"},
+ {RTE_LOGTYPE_USER6, "user6"},
+ {RTE_LOGTYPE_USER7, "user7"},
+ {RTE_LOGTYPE_USER8, "user8"}
+};
+
+/* Logging should be first initializer (before drivers and bus) */
+RTE_INIT_PRIO(rte_log_init, LOG)
+{
+ uint32_t i;
+
+ rte_log_set_global_level(RTE_LOG_DEBUG);
+
+ rte_logs.dynamic_types = calloc(RTE_LOGTYPE_FIRST_EXT_ID,
+ sizeof(struct rte_log_dynamic_type));
+ if (rte_logs.dynamic_types == NULL)
+ return;
+
+ /* register legacy log types */
+ for (i = 0; i < RTE_DIM(logtype_strings); i++)
+ __rte_log_register(logtype_strings[i].logtype,
+ logtype_strings[i].log_id);
+
+ rte_logs.dynamic_types_len = RTE_LOGTYPE_FIRST_EXT_ID;
+}
+
+static const char *
+loglevel_to_string(uint32_t level)
+{
+ switch (level) {
+ case 0: return "disabled";
+ case RTE_LOG_EMERG: return "emerg";
+ case RTE_LOG_ALERT: return "alert";
+ case RTE_LOG_CRIT: return "critical";
+ case RTE_LOG_ERR: return "error";
+ case RTE_LOG_WARNING: return "warning";
+ case RTE_LOG_NOTICE: return "notice";
+ case RTE_LOG_INFO: return "info";
+ case RTE_LOG_DEBUG: return "debug";
+ default: return "unknown";
+ }
+}
+
+/* dump global level and registered log types */
+void
+rte_log_dump(FILE *f)
+{
+ size_t i;
+
+ fprintf(f, "global log level is %s\n",
+ loglevel_to_string(rte_log_get_global_level()));
+
+ for (i = 0; i < rte_logs.dynamic_types_len; i++) {
+ if (rte_logs.dynamic_types[i].name == NULL)
+ continue;
+ fprintf(f, "id %zu: %s, level is %s\n",
+ i, rte_logs.dynamic_types[i].name,
+ loglevel_to_string(rte_logs.dynamic_types[i].loglevel));
+ }
+}
+
+/*
+ * Generates a log message The message will be sent in the stream
+ * defined by the previous call to rte_openlog_stream().
+ */
+int
+rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap)
+{
+ FILE *f = rte_log_get_stream();
+ int ret;
+
+ if (logtype >= rte_logs.dynamic_types_len)
+ return -1;
+ if (!rte_log_can_log(logtype, level))
+ return 0;
+
+ /* save loglevel and logtype in a global per-lcore variable */
+ RTE_PER_LCORE(log_cur_msg).loglevel = level;
+ RTE_PER_LCORE(log_cur_msg).logtype = logtype;
+
+ ret = vfprintf(f, format, ap);
+ fflush(f);
+ return ret;
+}
+
+/*
+ * Generates a log message The message will be sent in the stream
+ * defined by the previous call to rte_openlog_stream().
+ * No need to check level here, done by rte_vlog().
+ */
+int
+rte_log(uint32_t level, uint32_t logtype, const char *format, ...)
+{
+ va_list ap;
+ int ret;
+
+ va_start(ap, format);
+ ret = rte_vlog(level, logtype, format, ap);
+ va_end(ap);
+ return ret;
+}
+
+/*
+ * Called by environment-specific initialization functions.
+ */
+void
+eal_log_set_default(FILE *default_log)
+{
+ default_log_stream = default_log;
+
+#if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG
+ RTE_LOG(NOTICE, EAL,
+ "Debug dataplane logs available - lower performance\n");
+#endif
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_mcfg.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_mcfg.c
new file mode 100644
index 000000000..49d3ed0ce
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_mcfg.c
@@ -0,0 +1,170 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#include <rte_eal_memconfig.h>
+#include <rte_version.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_memcfg.h"
+#include "eal_private.h"
+
+void
+eal_mcfg_complete(void)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+ struct rte_mem_config *mcfg = cfg->mem_config;
+
+ /* ALL shared mem_config related INIT DONE */
+ if (cfg->process_type == RTE_PROC_PRIMARY)
+ mcfg->magic = RTE_MAGIC;
+
+ internal_config.init_complete = 1;
+}
+
+void
+eal_mcfg_wait_complete(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* wait until shared mem_config finish initialising */
+ while (mcfg->magic != RTE_MAGIC)
+ rte_pause();
+}
+
+int
+eal_mcfg_check_version(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* check if version from memconfig matches compiled in macro */
+ if (mcfg->version != RTE_VERSION)
+ return -1;
+
+ return 0;
+}
+
+void
+eal_mcfg_update_internal(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+ internal_config.legacy_mem = mcfg->legacy_mem;
+ internal_config.single_file_segments = mcfg->single_file_segments;
+}
+
+void
+eal_mcfg_update_from_internal(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+ mcfg->legacy_mem = internal_config.legacy_mem;
+ mcfg->single_file_segments = internal_config.single_file_segments;
+ /* record current DPDK version */
+ mcfg->version = RTE_VERSION;
+}
+
+void
+rte_mcfg_mem_read_lock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+}
+
+void
+rte_mcfg_mem_read_unlock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+}
+
+void
+rte_mcfg_mem_write_lock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+}
+
+void
+rte_mcfg_mem_write_unlock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+}
+
+void
+rte_mcfg_tailq_read_lock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_read_lock(&mcfg->qlock);
+}
+
+void
+rte_mcfg_tailq_read_unlock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_read_unlock(&mcfg->qlock);
+}
+
+void
+rte_mcfg_tailq_write_lock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_write_lock(&mcfg->qlock);
+}
+
+void
+rte_mcfg_tailq_write_unlock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_write_unlock(&mcfg->qlock);
+}
+
+void
+rte_mcfg_mempool_read_lock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_read_lock(&mcfg->mplock);
+}
+
+void
+rte_mcfg_mempool_read_unlock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_read_unlock(&mcfg->mplock);
+}
+
+void
+rte_mcfg_mempool_write_lock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_write_lock(&mcfg->mplock);
+}
+
+void
+rte_mcfg_mempool_write_unlock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_write_unlock(&mcfg->mplock);
+}
+
+void
+rte_mcfg_timer_lock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_spinlock_lock(&mcfg->tlock);
+}
+
+void
+rte_mcfg_timer_unlock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_spinlock_unlock(&mcfg->tlock);
+}
+
+bool
+rte_mcfg_get_single_file_segments(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ return (bool)mcfg->single_file_segments;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_memalloc.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_memalloc.c
new file mode 100644
index 000000000..55189d072
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_memalloc.c
@@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include <string.h>
+
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_fbarray.h>
+#include <rte_memzone.h>
+#include <rte_memory.h>
+#include <rte_string_fns.h>
+#include <rte_rwlock.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
+
+struct mem_event_callback_entry {
+ TAILQ_ENTRY(mem_event_callback_entry) next;
+ char name[RTE_MEM_EVENT_CALLBACK_NAME_LEN];
+ rte_mem_event_callback_t clb;
+ void *arg;
+};
+
+struct mem_alloc_validator_entry {
+ TAILQ_ENTRY(mem_alloc_validator_entry) next;
+ char name[RTE_MEM_ALLOC_VALIDATOR_NAME_LEN];
+ rte_mem_alloc_validator_t clb;
+ int socket_id;
+ size_t limit;
+};
+
+/** Double linked list of actions. */
+TAILQ_HEAD(mem_event_callback_entry_list, mem_event_callback_entry);
+TAILQ_HEAD(mem_alloc_validator_entry_list, mem_alloc_validator_entry);
+
+static struct mem_event_callback_entry_list mem_event_callback_list =
+ TAILQ_HEAD_INITIALIZER(mem_event_callback_list);
+static rte_rwlock_t mem_event_rwlock = RTE_RWLOCK_INITIALIZER;
+
+static struct mem_alloc_validator_entry_list mem_alloc_validator_list =
+ TAILQ_HEAD_INITIALIZER(mem_alloc_validator_list);
+static rte_rwlock_t mem_alloc_validator_rwlock = RTE_RWLOCK_INITIALIZER;
+
+static struct mem_event_callback_entry *
+find_mem_event_callback(const char *name, void *arg)
+{
+ struct mem_event_callback_entry *r;
+
+ TAILQ_FOREACH(r, &mem_event_callback_list, next) {
+ if (!strcmp(r->name, name) && r->arg == arg)
+ break;
+ }
+ return r;
+}
+
+static struct mem_alloc_validator_entry *
+find_mem_alloc_validator(const char *name, int socket_id)
+{
+ struct mem_alloc_validator_entry *r;
+
+ TAILQ_FOREACH(r, &mem_alloc_validator_list, next) {
+ if (!strcmp(r->name, name) && r->socket_id == socket_id)
+ break;
+ }
+ return r;
+}
+
+bool
+eal_memalloc_is_contig(const struct rte_memseg_list *msl, void *start,
+ size_t len)
+{
+ void *end, *aligned_start, *aligned_end;
+ size_t pgsz = (size_t)msl->page_sz;
+ const struct rte_memseg *ms;
+
+ /* for IOVA_VA, it's always contiguous */
+ if (rte_eal_iova_mode() == RTE_IOVA_VA && !msl->external)
+ return true;
+
+ /* for legacy memory, it's always contiguous */
+ if (internal_config.legacy_mem)
+ return true;
+
+ end = RTE_PTR_ADD(start, len);
+
+ /* for nohuge, we check pagemap, otherwise check memseg */
+ if (!rte_eal_has_hugepages()) {
+ rte_iova_t cur, expected;
+
+ aligned_start = RTE_PTR_ALIGN_FLOOR(start, pgsz);
+ aligned_end = RTE_PTR_ALIGN_CEIL(end, pgsz);
+
+ /* if start and end are on the same page, bail out early */
+ if (RTE_PTR_DIFF(aligned_end, aligned_start) == pgsz)
+ return true;
+
+ /* skip first iteration */
+ cur = rte_mem_virt2iova(aligned_start);
+ expected = cur + pgsz;
+ aligned_start = RTE_PTR_ADD(aligned_start, pgsz);
+
+ while (aligned_start < aligned_end) {
+ cur = rte_mem_virt2iova(aligned_start);
+ if (cur != expected)
+ return false;
+ aligned_start = RTE_PTR_ADD(aligned_start, pgsz);
+ expected += pgsz;
+ }
+ } else {
+ int start_seg, end_seg, cur_seg;
+ rte_iova_t cur, expected;
+
+ aligned_start = RTE_PTR_ALIGN_FLOOR(start, pgsz);
+ aligned_end = RTE_PTR_ALIGN_CEIL(end, pgsz);
+
+ start_seg = RTE_PTR_DIFF(aligned_start, msl->base_va) /
+ pgsz;
+ end_seg = RTE_PTR_DIFF(aligned_end, msl->base_va) /
+ pgsz;
+
+ /* if start and end are on the same page, bail out early */
+ if (RTE_PTR_DIFF(aligned_end, aligned_start) == pgsz)
+ return true;
+
+ /* skip first iteration */
+ ms = rte_fbarray_get(&msl->memseg_arr, start_seg);
+ cur = ms->iova;
+ expected = cur + pgsz;
+
+ /* if we can't access IOVA addresses, assume non-contiguous */
+ if (cur == RTE_BAD_IOVA)
+ return false;
+
+ for (cur_seg = start_seg + 1; cur_seg < end_seg;
+ cur_seg++, expected += pgsz) {
+ ms = rte_fbarray_get(&msl->memseg_arr, cur_seg);
+
+ if (ms->iova != expected)
+ return false;
+ }
+ }
+ return true;
+}
+
+int
+eal_memalloc_mem_event_callback_register(const char *name,
+ rte_mem_event_callback_t clb, void *arg)
+{
+ struct mem_event_callback_entry *entry;
+ int ret, len;
+ if (name == NULL || clb == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ len = strnlen(name, RTE_MEM_EVENT_CALLBACK_NAME_LEN);
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ } else if (len == RTE_MEM_EVENT_CALLBACK_NAME_LEN) {
+ rte_errno = ENAMETOOLONG;
+ return -1;
+ }
+ rte_rwlock_write_lock(&mem_event_rwlock);
+
+ entry = find_mem_event_callback(name, arg);
+ if (entry != NULL) {
+ rte_errno = EEXIST;
+ ret = -1;
+ goto unlock;
+ }
+
+ entry = malloc(sizeof(*entry));
+ if (entry == NULL) {
+ rte_errno = ENOMEM;
+ ret = -1;
+ goto unlock;
+ }
+
+ /* callback successfully created and is valid, add it to the list */
+ entry->clb = clb;
+ entry->arg = arg;
+ strlcpy(entry->name, name, RTE_MEM_EVENT_CALLBACK_NAME_LEN);
+ TAILQ_INSERT_TAIL(&mem_event_callback_list, entry, next);
+
+ ret = 0;
+
+ RTE_LOG(DEBUG, EAL, "Mem event callback '%s:%p' registered\n",
+ name, arg);
+
+unlock:
+ rte_rwlock_write_unlock(&mem_event_rwlock);
+ return ret;
+}
+
+int
+eal_memalloc_mem_event_callback_unregister(const char *name, void *arg)
+{
+ struct mem_event_callback_entry *entry;
+ int ret, len;
+
+ if (name == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ len = strnlen(name, RTE_MEM_EVENT_CALLBACK_NAME_LEN);
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ } else if (len == RTE_MEM_EVENT_CALLBACK_NAME_LEN) {
+ rte_errno = ENAMETOOLONG;
+ return -1;
+ }
+ rte_rwlock_write_lock(&mem_event_rwlock);
+
+ entry = find_mem_event_callback(name, arg);
+ if (entry == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ TAILQ_REMOVE(&mem_event_callback_list, entry, next);
+ free(entry);
+
+ ret = 0;
+
+ RTE_LOG(DEBUG, EAL, "Mem event callback '%s:%p' unregistered\n",
+ name, arg);
+
+unlock:
+ rte_rwlock_write_unlock(&mem_event_rwlock);
+ return ret;
+}
+
+void
+eal_memalloc_mem_event_notify(enum rte_mem_event event, const void *start,
+ size_t len)
+{
+ struct mem_event_callback_entry *entry;
+
+ rte_rwlock_read_lock(&mem_event_rwlock);
+
+ TAILQ_FOREACH(entry, &mem_event_callback_list, next) {
+ RTE_LOG(DEBUG, EAL, "Calling mem event callback '%s:%p'\n",
+ entry->name, entry->arg);
+ entry->clb(event, start, len, entry->arg);
+ }
+
+ rte_rwlock_read_unlock(&mem_event_rwlock);
+}
+
+int
+eal_memalloc_mem_alloc_validator_register(const char *name,
+ rte_mem_alloc_validator_t clb, int socket_id, size_t limit)
+{
+ struct mem_alloc_validator_entry *entry;
+ int ret, len;
+ if (name == NULL || clb == NULL || socket_id < 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ len = strnlen(name, RTE_MEM_ALLOC_VALIDATOR_NAME_LEN);
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ } else if (len == RTE_MEM_ALLOC_VALIDATOR_NAME_LEN) {
+ rte_errno = ENAMETOOLONG;
+ return -1;
+ }
+ rte_rwlock_write_lock(&mem_alloc_validator_rwlock);
+
+ entry = find_mem_alloc_validator(name, socket_id);
+ if (entry != NULL) {
+ rte_errno = EEXIST;
+ ret = -1;
+ goto unlock;
+ }
+
+ entry = malloc(sizeof(*entry));
+ if (entry == NULL) {
+ rte_errno = ENOMEM;
+ ret = -1;
+ goto unlock;
+ }
+
+ /* callback successfully created and is valid, add it to the list */
+ entry->clb = clb;
+ entry->socket_id = socket_id;
+ entry->limit = limit;
+ strlcpy(entry->name, name, RTE_MEM_ALLOC_VALIDATOR_NAME_LEN);
+ TAILQ_INSERT_TAIL(&mem_alloc_validator_list, entry, next);
+
+ ret = 0;
+
+ RTE_LOG(DEBUG, EAL, "Mem alloc validator '%s' on socket %i with limit %zu registered\n",
+ name, socket_id, limit);
+
+unlock:
+ rte_rwlock_write_unlock(&mem_alloc_validator_rwlock);
+ return ret;
+}
+
+int
+eal_memalloc_mem_alloc_validator_unregister(const char *name, int socket_id)
+{
+ struct mem_alloc_validator_entry *entry;
+ int ret, len;
+
+ if (name == NULL || socket_id < 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ len = strnlen(name, RTE_MEM_ALLOC_VALIDATOR_NAME_LEN);
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ } else if (len == RTE_MEM_ALLOC_VALIDATOR_NAME_LEN) {
+ rte_errno = ENAMETOOLONG;
+ return -1;
+ }
+ rte_rwlock_write_lock(&mem_alloc_validator_rwlock);
+
+ entry = find_mem_alloc_validator(name, socket_id);
+ if (entry == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ TAILQ_REMOVE(&mem_alloc_validator_list, entry, next);
+ free(entry);
+
+ ret = 0;
+
+ RTE_LOG(DEBUG, EAL, "Mem alloc validator '%s' on socket %i unregistered\n",
+ name, socket_id);
+
+unlock:
+ rte_rwlock_write_unlock(&mem_alloc_validator_rwlock);
+ return ret;
+}
+
+int
+eal_memalloc_mem_alloc_validate(int socket_id, size_t new_len)
+{
+ struct mem_alloc_validator_entry *entry;
+ int ret = 0;
+
+ rte_rwlock_read_lock(&mem_alloc_validator_rwlock);
+
+ TAILQ_FOREACH(entry, &mem_alloc_validator_list, next) {
+ if (entry->socket_id != socket_id || entry->limit > new_len)
+ continue;
+ RTE_LOG(DEBUG, EAL, "Calling mem alloc validator '%s' on socket %i\n",
+ entry->name, entry->socket_id);
+ if (entry->clb(socket_id, entry->limit, new_len) < 0)
+ ret = -1;
+ }
+
+ rte_rwlock_read_unlock(&mem_alloc_validator_rwlock);
+
+ return ret;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_memory.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_memory.c
new file mode 100644
index 000000000..4c897a13f
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_memory.c
@@ -0,0 +1,939 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+
+#include <rte_fbarray.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_errno.h>
+#include <rte_log.h>
+
+#include "eal_memalloc.h"
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+#include "eal_memcfg.h"
+#include "malloc_heap.h"
+
+/*
+ * Try to mmap *size bytes in /dev/zero. If it is successful, return the
+ * pointer to the mmap'd area and keep *size unmodified. Else, retry
+ * with a smaller zone: decrease *size by hugepage_sz until it reaches
+ * 0. In this case, return NULL. Note: this function returns an address
+ * which is a multiple of hugepage size.
+ */
+
+#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
+
+static void *next_baseaddr;
+static uint64_t system_page_sz;
+
+#ifdef RTE_EXEC_ENV_LINUX
+#define RTE_DONTDUMP MADV_DONTDUMP
+#elif defined RTE_EXEC_ENV_FREEBSD
+#define RTE_DONTDUMP MADV_NOCORE
+#else
+#error "madvise doesn't support this OS"
+#endif
+
+#define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5
+void *
+eal_get_virtual_area(void *requested_addr, size_t *size,
+ size_t page_sz, int flags, int mmap_flags)
+{
+ bool addr_is_hint, allow_shrink, unmap, no_align;
+ uint64_t map_sz;
+ void *mapped_addr, *aligned_addr;
+ uint8_t try = 0;
+
+ if (system_page_sz == 0)
+ system_page_sz = sysconf(_SC_PAGESIZE);
+
+ mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
+
+ RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
+
+ addr_is_hint = (flags & EAL_VIRTUAL_AREA_ADDR_IS_HINT) > 0;
+ allow_shrink = (flags & EAL_VIRTUAL_AREA_ALLOW_SHRINK) > 0;
+ unmap = (flags & EAL_VIRTUAL_AREA_UNMAP) > 0;
+
+ if (next_baseaddr == NULL && internal_config.base_virtaddr != 0 &&
+ rte_eal_process_type() == RTE_PROC_PRIMARY)
+ next_baseaddr = (void *) internal_config.base_virtaddr;
+
+#ifdef RTE_ARCH_64
+ if (next_baseaddr == NULL && internal_config.base_virtaddr == 0 &&
+ rte_eal_process_type() == RTE_PROC_PRIMARY)
+ next_baseaddr = (void *) eal_get_baseaddr();
+#endif
+ if (requested_addr == NULL && next_baseaddr != NULL) {
+ requested_addr = next_baseaddr;
+ requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz);
+ addr_is_hint = true;
+ }
+
+ /* we don't need alignment of resulting pointer in the following cases:
+ *
+ * 1. page size is equal to system size
+ * 2. we have a requested address, and it is page-aligned, and we will
+ * be discarding the address if we get a different one.
+ *
+ * for all other cases, alignment is potentially necessary.
+ */
+ no_align = (requested_addr != NULL &&
+ requested_addr == RTE_PTR_ALIGN(requested_addr, page_sz) &&
+ !addr_is_hint) ||
+ page_sz == system_page_sz;
+
+ do {
+ map_sz = no_align ? *size : *size + page_sz;
+ if (map_sz > SIZE_MAX) {
+ RTE_LOG(ERR, EAL, "Map size too big\n");
+ rte_errno = E2BIG;
+ return NULL;
+ }
+
+ mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_NONE,
+ mmap_flags, -1, 0);
+ if (mapped_addr == MAP_FAILED && allow_shrink)
+ *size -= page_sz;
+
+ if (mapped_addr != MAP_FAILED && addr_is_hint &&
+ mapped_addr != requested_addr) {
+ try++;
+ next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz);
+ if (try <= MAX_MMAP_WITH_DEFINED_ADDR_TRIES) {
+ /* hint was not used. Try with another offset */
+ munmap(mapped_addr, map_sz);
+ mapped_addr = MAP_FAILED;
+ requested_addr = next_baseaddr;
+ }
+ }
+ } while ((allow_shrink || addr_is_hint) &&
+ mapped_addr == MAP_FAILED && *size > 0);
+
+ /* align resulting address - if map failed, we will ignore the value
+ * anyway, so no need to add additional checks.
+ */
+ aligned_addr = no_align ? mapped_addr :
+ RTE_PTR_ALIGN(mapped_addr, page_sz);
+
+ if (*size == 0) {
+ RTE_LOG(ERR, EAL, "Cannot get a virtual area of any size: %s\n",
+ strerror(errno));
+ rte_errno = errno;
+ return NULL;
+ } else if (mapped_addr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n",
+ strerror(errno));
+ /* pass errno up the call chain */
+ rte_errno = errno;
+ return NULL;
+ } else if (requested_addr != NULL && !addr_is_hint &&
+ aligned_addr != requested_addr) {
+ RTE_LOG(ERR, EAL, "Cannot get a virtual area at requested address: %p (got %p)\n",
+ requested_addr, aligned_addr);
+ munmap(mapped_addr, map_sz);
+ rte_errno = EADDRNOTAVAIL;
+ return NULL;
+ } else if (requested_addr != NULL && addr_is_hint &&
+ aligned_addr != requested_addr) {
+ RTE_LOG(WARNING, EAL, "WARNING! Base virtual address hint (%p != %p) not respected!\n",
+ requested_addr, aligned_addr);
+ RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory into secondary processes\n");
+ } else if (next_baseaddr != NULL) {
+ next_baseaddr = RTE_PTR_ADD(aligned_addr, *size);
+ }
+
+ RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
+ aligned_addr, *size);
+
+ if (unmap) {
+ munmap(mapped_addr, map_sz);
+ } else if (!no_align) {
+ void *map_end, *aligned_end;
+ size_t before_len, after_len;
+
+ /* when we reserve space with alignment, we add alignment to
+ * mapping size. On 32-bit, if 1GB alignment was requested, this
+ * would waste 1GB of address space, which is a luxury we cannot
+ * afford. so, if alignment was performed, check if any unneeded
+ * address space can be unmapped back.
+ */
+
+ map_end = RTE_PTR_ADD(mapped_addr, (size_t)map_sz);
+ aligned_end = RTE_PTR_ADD(aligned_addr, *size);
+
+ /* unmap space before aligned mmap address */
+ before_len = RTE_PTR_DIFF(aligned_addr, mapped_addr);
+ if (before_len > 0)
+ munmap(mapped_addr, before_len);
+
+ /* unmap space after aligned end mmap address */
+ after_len = RTE_PTR_DIFF(map_end, aligned_end);
+ if (after_len > 0)
+ munmap(aligned_end, after_len);
+ }
+
+ if (!unmap) {
+ /* Exclude these pages from a core dump. */
+ if (madvise(aligned_addr, *size, RTE_DONTDUMP) != 0)
+ RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
+ strerror(errno));
+ }
+
+ return aligned_addr;
+}
+
+static struct rte_memseg *
+virt2memseg(const void *addr, const struct rte_memseg_list *msl)
+{
+ const struct rte_fbarray *arr;
+ void *start, *end;
+ int ms_idx;
+
+ if (msl == NULL)
+ return NULL;
+
+ /* a memseg list was specified, check if it's the right one */
+ start = msl->base_va;
+ end = RTE_PTR_ADD(start, msl->len);
+
+ if (addr < start || addr >= end)
+ return NULL;
+
+ /* now, calculate index */
+ arr = &msl->memseg_arr;
+ ms_idx = RTE_PTR_DIFF(addr, msl->base_va) / msl->page_sz;
+ return rte_fbarray_get(arr, ms_idx);
+}
+
+static struct rte_memseg_list *
+virt2memseg_list(const void *addr)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *msl;
+ int msl_idx;
+
+ for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+ void *start, *end;
+ msl = &mcfg->memsegs[msl_idx];
+
+ start = msl->base_va;
+ end = RTE_PTR_ADD(start, msl->len);
+ if (addr >= start && addr < end)
+ break;
+ }
+ /* if we didn't find our memseg list */
+ if (msl_idx == RTE_MAX_MEMSEG_LISTS)
+ return NULL;
+ return msl;
+}
+
+struct rte_memseg_list *
+rte_mem_virt2memseg_list(const void *addr)
+{
+ return virt2memseg_list(addr);
+}
+
+struct virtiova {
+ rte_iova_t iova;
+ void *virt;
+};
+static int
+find_virt(const struct rte_memseg_list *msl __rte_unused,
+ const struct rte_memseg *ms, void *arg)
+{
+ struct virtiova *vi = arg;
+ if (vi->iova >= ms->iova && vi->iova < (ms->iova + ms->len)) {
+ size_t offset = vi->iova - ms->iova;
+ vi->virt = RTE_PTR_ADD(ms->addr, offset);
+ /* stop the walk */
+ return 1;
+ }
+ return 0;
+}
+static int
+find_virt_legacy(const struct rte_memseg_list *msl __rte_unused,
+ const struct rte_memseg *ms, size_t len, void *arg)
+{
+ struct virtiova *vi = arg;
+ if (vi->iova >= ms->iova && vi->iova < (ms->iova + len)) {
+ size_t offset = vi->iova - ms->iova;
+ vi->virt = RTE_PTR_ADD(ms->addr, offset);
+ /* stop the walk */
+ return 1;
+ }
+ return 0;
+}
+
+void *
+rte_mem_iova2virt(rte_iova_t iova)
+{
+ struct virtiova vi;
+
+ memset(&vi, 0, sizeof(vi));
+
+ vi.iova = iova;
+ /* for legacy mem, we can get away with scanning VA-contiguous segments,
+ * as we know they are PA-contiguous as well
+ */
+ if (internal_config.legacy_mem)
+ rte_memseg_contig_walk(find_virt_legacy, &vi);
+ else
+ rte_memseg_walk(find_virt, &vi);
+
+ return vi.virt;
+}
+
+struct rte_memseg *
+rte_mem_virt2memseg(const void *addr, const struct rte_memseg_list *msl)
+{
+ return virt2memseg(addr, msl != NULL ? msl :
+ rte_mem_virt2memseg_list(addr));
+}
+
+static int
+physmem_size(const struct rte_memseg_list *msl, void *arg)
+{
+ uint64_t *total_len = arg;
+
+ if (msl->external)
+ return 0;
+
+ *total_len += msl->memseg_arr.count * msl->page_sz;
+
+ return 0;
+}
+
+/* get the total size of memory */
+uint64_t
+rte_eal_get_physmem_size(void)
+{
+ uint64_t total_len = 0;
+
+ rte_memseg_list_walk(physmem_size, &total_len);
+
+ return total_len;
+}
+
+static int
+dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+ void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int msl_idx, ms_idx, fd;
+ FILE *f = arg;
+
+ msl_idx = msl - mcfg->memsegs;
+ if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS)
+ return -1;
+
+ ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
+ if (ms_idx < 0)
+ return -1;
+
+ fd = eal_memalloc_get_seg_fd(msl_idx, ms_idx);
+ fprintf(f, "Segment %i-%i: IOVA:0x%"PRIx64", len:%zu, "
+ "virt:%p, socket_id:%"PRId32", "
+ "hugepage_sz:%"PRIu64", nchannel:%"PRIx32", "
+ "nrank:%"PRIx32" fd:%i\n",
+ msl_idx, ms_idx,
+ ms->iova,
+ ms->len,
+ ms->addr,
+ ms->socket_id,
+ ms->hugepage_sz,
+ ms->nchannel,
+ ms->nrank,
+ fd);
+
+ return 0;
+}
+
+/*
+ * Defining here because declared in rte_memory.h, but the actual implementation
+ * is in eal_common_memalloc.c, like all other memalloc internals.
+ */
+int
+rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb,
+ void *arg)
+{
+ /* FreeBSD boots with legacy mem enabled by default */
+ if (internal_config.legacy_mem) {
+ RTE_LOG(DEBUG, EAL, "Registering mem event callbacks not supported\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+ return eal_memalloc_mem_event_callback_register(name, clb, arg);
+}
+
+int
+rte_mem_event_callback_unregister(const char *name, void *arg)
+{
+ /* FreeBSD boots with legacy mem enabled by default */
+ if (internal_config.legacy_mem) {
+ RTE_LOG(DEBUG, EAL, "Registering mem event callbacks not supported\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+ return eal_memalloc_mem_event_callback_unregister(name, arg);
+}
+
+int
+rte_mem_alloc_validator_register(const char *name,
+ rte_mem_alloc_validator_t clb, int socket_id, size_t limit)
+{
+ /* FreeBSD boots with legacy mem enabled by default */
+ if (internal_config.legacy_mem) {
+ RTE_LOG(DEBUG, EAL, "Registering mem alloc validators not supported\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+ return eal_memalloc_mem_alloc_validator_register(name, clb, socket_id,
+ limit);
+}
+
+int
+rte_mem_alloc_validator_unregister(const char *name, int socket_id)
+{
+ /* FreeBSD boots with legacy mem enabled by default */
+ if (internal_config.legacy_mem) {
+ RTE_LOG(DEBUG, EAL, "Registering mem alloc validators not supported\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+ return eal_memalloc_mem_alloc_validator_unregister(name, socket_id);
+}
+
+/* Dump the physical memory layout on console */
+void
+rte_dump_physmem_layout(FILE *f)
+{
+ rte_memseg_walk(dump_memseg, f);
+}
+
+static int
+check_iova(const struct rte_memseg_list *msl __rte_unused,
+ const struct rte_memseg *ms, void *arg)
+{
+ uint64_t *mask = arg;
+ rte_iova_t iova;
+
+ /* higher address within segment */
+ iova = (ms->iova + ms->len) - 1;
+ if (!(iova & *mask))
+ return 0;
+
+ RTE_LOG(DEBUG, EAL, "memseg iova %"PRIx64", len %zx, out of range\n",
+ ms->iova, ms->len);
+
+ RTE_LOG(DEBUG, EAL, "\tusing dma mask %"PRIx64"\n", *mask);
+ return 1;
+}
+
+#define MAX_DMA_MASK_BITS 63
+
+/* check memseg iovas are within the required range based on dma mask */
+static int
+check_dma_mask(uint8_t maskbits, bool thread_unsafe)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ uint64_t mask;
+ int ret;
+
+ /* Sanity check. We only check width can be managed with 64 bits
+ * variables. Indeed any higher value is likely wrong. */
+ if (maskbits > MAX_DMA_MASK_BITS) {
+ RTE_LOG(ERR, EAL, "wrong dma mask size %u (Max: %u)\n",
+ maskbits, MAX_DMA_MASK_BITS);
+ return -1;
+ }
+
+ /* create dma mask */
+ mask = ~((1ULL << maskbits) - 1);
+
+ if (thread_unsafe)
+ ret = rte_memseg_walk_thread_unsafe(check_iova, &mask);
+ else
+ ret = rte_memseg_walk(check_iova, &mask);
+
+ if (ret)
+ /*
+ * Dma mask precludes hugepage usage.
+ * This device can not be used and we do not need to keep
+ * the dma mask.
+ */
+ return 1;
+
+ /*
+ * we need to keep the more restricted maskbit for checking
+ * potential dynamic memory allocation in the future.
+ */
+ mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits :
+ RTE_MIN(mcfg->dma_maskbits, maskbits);
+
+ return 0;
+}
+
+int
+rte_mem_check_dma_mask(uint8_t maskbits)
+{
+ return check_dma_mask(maskbits, false);
+}
+
+int
+rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits)
+{
+ return check_dma_mask(maskbits, true);
+}
+
+/*
+ * Set dma mask to use when memory initialization is done.
+ *
+ * This function should ONLY be used by code executed before the memory
+ * initialization. PMDs should use rte_mem_check_dma_mask if addressing
+ * limitations by the device.
+ */
+void
+rte_mem_set_dma_mask(uint8_t maskbits)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+ mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits :
+ RTE_MIN(mcfg->dma_maskbits, maskbits);
+}
+
+/* return the number of memory channels */
+unsigned rte_memory_get_nchannel(void)
+{
+ return rte_eal_get_configuration()->mem_config->nchannel;
+}
+
+/* return the number of memory rank */
+unsigned rte_memory_get_nrank(void)
+{
+ return rte_eal_get_configuration()->mem_config->nrank;
+}
+
+static int
+rte_eal_memdevice_init(void)
+{
+ struct rte_config *config;
+
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+ return 0;
+
+ config = rte_eal_get_configuration();
+ config->mem_config->nchannel = internal_config.force_nchannel;
+ config->mem_config->nrank = internal_config.force_nrank;
+
+ return 0;
+}
+
+/* Lock page in physical memory and prevent from swapping. */
+int
+rte_mem_lock_page(const void *virt)
+{
+ unsigned long virtual = (unsigned long)virt;
+ int page_size = getpagesize();
+ unsigned long aligned = (virtual & ~(page_size - 1));
+ return mlock((void *)aligned, page_size);
+}
+
+int
+rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i, ms_idx, ret = 0;
+
+ for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+ struct rte_memseg_list *msl = &mcfg->memsegs[i];
+ const struct rte_memseg *ms;
+ struct rte_fbarray *arr;
+
+ if (msl->memseg_arr.count == 0)
+ continue;
+
+ arr = &msl->memseg_arr;
+
+ ms_idx = rte_fbarray_find_next_used(arr, 0);
+ while (ms_idx >= 0) {
+ int n_segs;
+ size_t len;
+
+ ms = rte_fbarray_get(arr, ms_idx);
+
+ /* find how many more segments there are, starting with
+ * this one.
+ */
+ n_segs = rte_fbarray_find_contig_used(arr, ms_idx);
+ len = n_segs * msl->page_sz;
+
+ ret = func(msl, ms, len, arg);
+ if (ret)
+ return ret;
+ ms_idx = rte_fbarray_find_next_used(arr,
+ ms_idx + n_segs);
+ }
+ }
+ return 0;
+}
+
+int
+rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
+{
+ int ret = 0;
+
+ /* do not allow allocations/frees/init while we iterate */
+ rte_mcfg_mem_read_lock();
+ ret = rte_memseg_contig_walk_thread_unsafe(func, arg);
+ rte_mcfg_mem_read_unlock();
+
+ return ret;
+}
+
+int
+rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i, ms_idx, ret = 0;
+
+ for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+ struct rte_memseg_list *msl = &mcfg->memsegs[i];
+ const struct rte_memseg *ms;
+ struct rte_fbarray *arr;
+
+ if (msl->memseg_arr.count == 0)
+ continue;
+
+ arr = &msl->memseg_arr;
+
+ ms_idx = rte_fbarray_find_next_used(arr, 0);
+ while (ms_idx >= 0) {
+ ms = rte_fbarray_get(arr, ms_idx);
+ ret = func(msl, ms, arg);
+ if (ret)
+ return ret;
+ ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1);
+ }
+ }
+ return 0;
+}
+
+int
+rte_memseg_walk(rte_memseg_walk_t func, void *arg)
+{
+ int ret = 0;
+
+ /* do not allow allocations/frees/init while we iterate */
+ rte_mcfg_mem_read_lock();
+ ret = rte_memseg_walk_thread_unsafe(func, arg);
+ rte_mcfg_mem_read_unlock();
+
+ return ret;
+}
+
+int
+rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i, ret = 0;
+
+ for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+ struct rte_memseg_list *msl = &mcfg->memsegs[i];
+
+ if (msl->base_va == NULL)
+ continue;
+
+ ret = func(msl, arg);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+int
+rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
+{
+ int ret = 0;
+
+ /* do not allow allocations/frees/init while we iterate */
+ rte_mcfg_mem_read_lock();
+ ret = rte_memseg_list_walk_thread_unsafe(func, arg);
+ rte_mcfg_mem_read_unlock();
+
+ return ret;
+}
+
+int
+rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *msl;
+ struct rte_fbarray *arr;
+ int msl_idx, seg_idx, ret;
+
+ if (ms == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ msl = rte_mem_virt2memseg_list(ms->addr);
+ if (msl == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ arr = &msl->memseg_arr;
+
+ msl_idx = msl - mcfg->memsegs;
+ seg_idx = rte_fbarray_find_idx(arr, ms);
+
+ if (!rte_fbarray_is_used(arr, seg_idx)) {
+ rte_errno = ENOENT;
+ return -1;
+ }
+
+ /* segment fd API is not supported for external segments */
+ if (msl->external) {
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ ret = eal_memalloc_get_seg_fd(msl_idx, seg_idx);
+ if (ret < 0) {
+ rte_errno = -ret;
+ ret = -1;
+ }
+ return ret;
+}
+
+int
+rte_memseg_get_fd(const struct rte_memseg *ms)
+{
+ int ret;
+
+ rte_mcfg_mem_read_lock();
+ ret = rte_memseg_get_fd_thread_unsafe(ms);
+ rte_mcfg_mem_read_unlock();
+
+ return ret;
+}
+
+int
+rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
+ size_t *offset)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *msl;
+ struct rte_fbarray *arr;
+ int msl_idx, seg_idx, ret;
+
+ if (ms == NULL || offset == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ msl = rte_mem_virt2memseg_list(ms->addr);
+ if (msl == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ arr = &msl->memseg_arr;
+
+ msl_idx = msl - mcfg->memsegs;
+ seg_idx = rte_fbarray_find_idx(arr, ms);
+
+ if (!rte_fbarray_is_used(arr, seg_idx)) {
+ rte_errno = ENOENT;
+ return -1;
+ }
+
+ /* segment fd API is not supported for external segments */
+ if (msl->external) {
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ ret = eal_memalloc_get_seg_fd_offset(msl_idx, seg_idx, offset);
+ if (ret < 0) {
+ rte_errno = -ret;
+ ret = -1;
+ }
+ return ret;
+}
+
+int
+rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset)
+{
+ int ret;
+
+ rte_mcfg_mem_read_lock();
+ ret = rte_memseg_get_fd_offset_thread_unsafe(ms, offset);
+ rte_mcfg_mem_read_unlock();
+
+ return ret;
+}
+
+int
+rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[],
+ unsigned int n_pages, size_t page_sz)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int socket_id, n;
+ int ret = 0;
+
+ if (va_addr == NULL || page_sz == 0 || len == 0 ||
+ !rte_is_power_of_2(page_sz) ||
+ RTE_ALIGN(len, page_sz) != len ||
+ ((len / page_sz) != n_pages && iova_addrs != NULL) ||
+ !rte_is_aligned(va_addr, page_sz)) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_mcfg_mem_write_lock();
+
+ /* make sure the segment doesn't already exist */
+ if (malloc_heap_find_external_seg(va_addr, len) != NULL) {
+ rte_errno = EEXIST;
+ ret = -1;
+ goto unlock;
+ }
+
+ /* get next available socket ID */
+ socket_id = mcfg->next_socket_id;
+ if (socket_id > INT32_MAX) {
+ RTE_LOG(ERR, EAL, "Cannot assign new socket ID's\n");
+ rte_errno = ENOSPC;
+ ret = -1;
+ goto unlock;
+ }
+
+ /* we can create a new memseg */
+ n = len / page_sz;
+ if (malloc_heap_create_external_seg(va_addr, iova_addrs, n,
+ page_sz, "extmem", socket_id) == NULL) {
+ ret = -1;
+ goto unlock;
+ }
+
+ /* memseg list successfully created - increment next socket ID */
+ mcfg->next_socket_id++;
+unlock:
+ rte_mcfg_mem_write_unlock();
+ return ret;
+}
+
+int
+rte_extmem_unregister(void *va_addr, size_t len)
+{
+ struct rte_memseg_list *msl;
+ int ret = 0;
+
+ if (va_addr == NULL || len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_mcfg_mem_write_lock();
+
+ /* find our segment */
+ msl = malloc_heap_find_external_seg(va_addr, len);
+ if (msl == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+
+ ret = malloc_heap_destroy_external_seg(msl);
+unlock:
+ rte_mcfg_mem_write_unlock();
+ return ret;
+}
+
+static int
+sync_memory(void *va_addr, size_t len, bool attach)
+{
+ struct rte_memseg_list *msl;
+ int ret = 0;
+
+ if (va_addr == NULL || len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_mcfg_mem_write_lock();
+
+ /* find our segment */
+ msl = malloc_heap_find_external_seg(va_addr, len);
+ if (msl == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ if (attach)
+ ret = rte_fbarray_attach(&msl->memseg_arr);
+ else
+ ret = rte_fbarray_detach(&msl->memseg_arr);
+
+unlock:
+ rte_mcfg_mem_write_unlock();
+ return ret;
+}
+
+int
+rte_extmem_attach(void *va_addr, size_t len)
+{
+ return sync_memory(va_addr, len, true);
+}
+
+int
+rte_extmem_detach(void *va_addr, size_t len)
+{
+ return sync_memory(va_addr, len, false);
+}
+
+/* init memory subsystem */
+int
+rte_eal_memory_init(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int retval;
+ RTE_LOG(DEBUG, EAL, "Setting up physically contiguous memory...\n");
+
+ if (!mcfg)
+ return -1;
+
+ /* lock mem hotplug here, to prevent races while we init */
+ rte_mcfg_mem_read_lock();
+
+ if (rte_eal_memseg_init() < 0)
+ goto fail;
+
+ if (eal_memalloc_init() < 0)
+ goto fail;
+
+ retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
+ rte_eal_hugepage_init() :
+ rte_eal_hugepage_attach();
+ if (retval < 0)
+ goto fail;
+
+ if (internal_config.no_shconf == 0 && rte_eal_memdevice_init() < 0)
+ goto fail;
+
+ return 0;
+fail:
+ rte_mcfg_mem_read_unlock();
+ return -1;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_memzone.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_memzone.c
new file mode 100644
index 000000000..7c21aa921
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_memzone.c
@@ -0,0 +1,420 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+#include <rte_common.h>
+#include <rte_eal_trace.h>
+
+#include "malloc_heap.h"
+#include "malloc_elem.h"
+#include "eal_private.h"
+#include "eal_memcfg.h"
+
+static inline const struct rte_memzone *
+memzone_lookup_thread_unsafe(const char *name)
+{
+ struct rte_mem_config *mcfg;
+ struct rte_fbarray *arr;
+ const struct rte_memzone *mz;
+ int i = 0;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+ arr = &mcfg->memzones;
+
+ /*
+ * the algorithm is not optimal (linear), but there are few
+ * zones and this function should be called at init only
+ */
+ i = rte_fbarray_find_next_used(arr, 0);
+ while (i >= 0) {
+ mz = rte_fbarray_get(arr, i);
+ if (mz->addr != NULL &&
+ !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE))
+ return mz;
+ i = rte_fbarray_find_next_used(arr, i + 1);
+ }
+ return NULL;
+}
+
+static const struct rte_memzone *
+memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
+ int socket_id, unsigned int flags, unsigned int align,
+ unsigned int bound)
+{
+ struct rte_memzone *mz;
+ struct rte_mem_config *mcfg;
+ struct rte_fbarray *arr;
+ void *mz_addr;
+ size_t requested_len;
+ int mz_idx;
+ bool contig;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+ arr = &mcfg->memzones;
+
+ /* no more room in config */
+ if (arr->count >= arr->len) {
+ RTE_LOG(ERR, EAL,
+ "%s(): Number of requested memzone segments exceeds RTE_MAX_MEMZONE\n",
+ __func__);
+ rte_errno = ENOSPC;
+ return NULL;
+ }
+
+ if (strlen(name) > sizeof(mz->name) - 1) {
+ RTE_LOG(DEBUG, EAL, "%s(): memzone <%s>: name too long\n",
+ __func__, name);
+ rte_errno = ENAMETOOLONG;
+ return NULL;
+ }
+
+ /* zone already exist */
+ if ((memzone_lookup_thread_unsafe(name)) != NULL) {
+ RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n",
+ __func__, name);
+ rte_errno = EEXIST;
+ return NULL;
+ }
+
+ /* if alignment is not a power of two */
+ if (align && !rte_is_power_of_2(align)) {
+ RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__,
+ align);
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ /* alignment less than cache size is not allowed */
+ if (align < RTE_CACHE_LINE_SIZE)
+ align = RTE_CACHE_LINE_SIZE;
+
+ /* align length on cache boundary. Check for overflow before doing so */
+ if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) {
+ rte_errno = EINVAL; /* requested size too big */
+ return NULL;
+ }
+
+ len = RTE_ALIGN_CEIL(len, RTE_CACHE_LINE_SIZE);
+
+ /* save minimal requested length */
+ requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len);
+
+ /* check that boundary condition is valid */
+ if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ if ((socket_id != SOCKET_ID_ANY) && socket_id < 0) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ /* only set socket to SOCKET_ID_ANY if we aren't allocating for an
+ * external heap.
+ */
+ if (!rte_eal_has_hugepages() && socket_id < RTE_MAX_NUMA_NODES)
+ socket_id = SOCKET_ID_ANY;
+
+ contig = (flags & RTE_MEMZONE_IOVA_CONTIG) != 0;
+ /* malloc only cares about size flags, remove contig flag from flags */
+ flags &= ~RTE_MEMZONE_IOVA_CONTIG;
+
+ if (len == 0 && bound == 0) {
+ /* no size constraints were placed, so use malloc elem len */
+ requested_len = 0;
+ mz_addr = malloc_heap_alloc_biggest(NULL, socket_id, flags,
+ align, contig);
+ } else {
+ if (len == 0)
+ requested_len = bound;
+ /* allocate memory on heap */
+ mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id,
+ flags, align, bound, contig);
+ }
+ if (mz_addr == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
+
+ /* fill the zone in config */
+ mz_idx = rte_fbarray_find_next_free(arr, 0);
+
+ if (mz_idx < 0) {
+ mz = NULL;
+ } else {
+ rte_fbarray_set_used(arr, mz_idx);
+ mz = rte_fbarray_get(arr, mz_idx);
+ }
+
+ if (mz == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone\n", __func__);
+ malloc_heap_free(elem);
+ rte_errno = ENOSPC;
+ return NULL;
+ }
+
+ strlcpy(mz->name, name, sizeof(mz->name));
+ mz->iova = rte_malloc_virt2iova(mz_addr);
+ mz->addr = mz_addr;
+ mz->len = requested_len == 0 ?
+ elem->size - elem->pad - MALLOC_ELEM_OVERHEAD :
+ requested_len;
+ mz->hugepage_sz = elem->msl->page_sz;
+ mz->socket_id = elem->msl->socket_id;
+ mz->flags = 0;
+
+ return mz;
+}
+
+static const struct rte_memzone *
+rte_memzone_reserve_thread_safe(const char *name, size_t len, int socket_id,
+ unsigned int flags, unsigned int align, unsigned int bound)
+{
+ struct rte_mem_config *mcfg;
+ const struct rte_memzone *mz = NULL;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_write_lock(&mcfg->mlock);
+
+ mz = memzone_reserve_aligned_thread_unsafe(
+ name, len, socket_id, flags, align, bound);
+
+ rte_eal_trace_memzone_reserve(name, len, socket_id, flags, align,
+ bound, mz);
+
+ rte_rwlock_write_unlock(&mcfg->mlock);
+
+ return mz;
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor (with a
+ * specified alignment and boundary). If the allocation cannot be done,
+ * return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve_bounded(const char *name, size_t len, int socket_id,
+ unsigned flags, unsigned align, unsigned bound)
+{
+ return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
+ align, bound);
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor (with a
+ * specified alignment). If the allocation cannot be done, return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve_aligned(const char *name, size_t len, int socket_id,
+ unsigned flags, unsigned align)
+{
+ return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
+ align, 0);
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor. If the
+ * allocation cannot be done, return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve(const char *name, size_t len, int socket_id,
+ unsigned flags)
+{
+ return rte_memzone_reserve_thread_safe(name, len, socket_id,
+ flags, RTE_CACHE_LINE_SIZE, 0);
+}
+
+int
+rte_memzone_free(const struct rte_memzone *mz)
+{
+ char name[RTE_MEMZONE_NAMESIZE];
+ struct rte_mem_config *mcfg;
+ struct rte_fbarray *arr;
+ struct rte_memzone *found_mz;
+ int ret = 0;
+ void *addr = NULL;
+ unsigned idx;
+
+ if (mz == NULL)
+ return -EINVAL;
+
+ rte_strlcpy(name, mz->name, RTE_MEMZONE_NAMESIZE);
+ mcfg = rte_eal_get_configuration()->mem_config;
+ arr = &mcfg->memzones;
+
+ rte_rwlock_write_lock(&mcfg->mlock);
+
+ idx = rte_fbarray_find_idx(arr, mz);
+ found_mz = rte_fbarray_get(arr, idx);
+
+ if (found_mz == NULL) {
+ ret = -EINVAL;
+ } else if (found_mz->addr == NULL) {
+ RTE_LOG(ERR, EAL, "Memzone is not allocated\n");
+ ret = -EINVAL;
+ } else {
+ addr = found_mz->addr;
+ memset(found_mz, 0, sizeof(*found_mz));
+ rte_fbarray_set_free(arr, idx);
+ }
+
+ rte_rwlock_write_unlock(&mcfg->mlock);
+
+ if (addr != NULL)
+ rte_free(addr);
+
+ rte_eal_trace_memzone_free(name, addr, ret);
+ return ret;
+}
+
+/*
+ * Lookup for the memzone identified by the given name
+ */
+const struct rte_memzone *
+rte_memzone_lookup(const char *name)
+{
+ struct rte_mem_config *mcfg;
+ const struct rte_memzone *memzone = NULL;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_read_lock(&mcfg->mlock);
+
+ memzone = memzone_lookup_thread_unsafe(name);
+
+ rte_rwlock_read_unlock(&mcfg->mlock);
+
+ rte_eal_trace_memzone_lookup(name, memzone);
+ return memzone;
+}
+
+static void
+dump_memzone(const struct rte_memzone *mz, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *msl = NULL;
+ void *cur_addr, *mz_end;
+ struct rte_memseg *ms;
+ int mz_idx, ms_idx;
+ size_t page_sz;
+ FILE *f = arg;
+
+ mz_idx = rte_fbarray_find_idx(&mcfg->memzones, mz);
+
+ fprintf(f, "Zone %u: name:<%s>, len:0x%zx, virt:%p, "
+ "socket_id:%"PRId32", flags:%"PRIx32"\n",
+ mz_idx,
+ mz->name,
+ mz->len,
+ mz->addr,
+ mz->socket_id,
+ mz->flags);
+
+ /* go through each page occupied by this memzone */
+ msl = rte_mem_virt2memseg_list(mz->addr);
+ if (!msl) {
+ RTE_LOG(DEBUG, EAL, "Skipping bad memzone\n");
+ return;
+ }
+ page_sz = (size_t)mz->hugepage_sz;
+ cur_addr = RTE_PTR_ALIGN_FLOOR(mz->addr, page_sz);
+ mz_end = RTE_PTR_ADD(cur_addr, mz->len);
+
+ fprintf(f, "physical segments used:\n");
+ ms_idx = RTE_PTR_DIFF(mz->addr, msl->base_va) / page_sz;
+ ms = rte_fbarray_get(&msl->memseg_arr, ms_idx);
+
+ do {
+ fprintf(f, " addr: %p iova: 0x%" PRIx64 " "
+ "len: 0x%zx "
+ "pagesz: 0x%zx\n",
+ cur_addr, ms->iova, ms->len, page_sz);
+
+ /* advance VA to next page */
+ cur_addr = RTE_PTR_ADD(cur_addr, page_sz);
+
+ /* memzones occupy contiguous segments */
+ ++ms;
+ } while (cur_addr < mz_end);
+}
+
+/* Dump all reserved memory zones on console */
+void
+rte_memzone_dump(FILE *f)
+{
+ rte_memzone_walk(dump_memzone, f);
+}
+
+/*
+ * Init the memzone subsystem
+ */
+int
+rte_eal_memzone_init(void)
+{
+ struct rte_mem_config *mcfg;
+ int ret = 0;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_write_lock(&mcfg->mlock);
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
+ rte_fbarray_init(&mcfg->memzones, "memzone",
+ RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
+ ret = -1;
+ } else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
+ rte_fbarray_attach(&mcfg->memzones)) {
+ RTE_LOG(ERR, EAL, "Cannot attach to memzone list\n");
+ ret = -1;
+ }
+
+ rte_rwlock_write_unlock(&mcfg->mlock);
+
+ return ret;
+}
+
+/* Walk all reserved memory zones */
+void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
+ void *arg)
+{
+ struct rte_mem_config *mcfg;
+ struct rte_fbarray *arr;
+ int i;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+ arr = &mcfg->memzones;
+
+ rte_rwlock_read_lock(&mcfg->mlock);
+ i = rte_fbarray_find_next_used(arr, 0);
+ while (i >= 0) {
+ struct rte_memzone *mz = rte_fbarray_get(arr, i);
+ (*func)(mz, arg);
+ i = rte_fbarray_find_next_used(arr, i + 1);
+ }
+ rte_rwlock_read_unlock(&mcfg->mlock);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_options.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_options.c
new file mode 100644
index 000000000..8f2cbd1c6
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_options.c
@@ -0,0 +1,1861 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation.
+ * Copyright(c) 2014 6WIND S.A.
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#ifndef RTE_EXEC_ENV_WINDOWS
+#include <syslog.h>
+#endif
+#include <ctype.h>
+#include <limits.h>
+#include <errno.h>
+#include <getopt.h>
+#ifndef RTE_EXEC_ENV_WINDOWS
+#include <dlfcn.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dirent.h>
+
+#include <rte_string_fns.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_tailq.h>
+#include <rte_version.h>
+#include <rte_devargs.h>
+#include <rte_memcpy.h>
+#ifndef RTE_EXEC_ENV_WINDOWS
+#include <rte_telemetry.h>
+#endif
+
+#include "eal_internal_cfg.h"
+#include "eal_options.h"
+#include "eal_filesystem.h"
+#include "eal_private.h"
+#ifndef RTE_EXEC_ENV_WINDOWS
+#include "eal_trace.h"
+#endif
+
+#define BITS_PER_HEX 4
+#define LCORE_OPT_LST 1
+#define LCORE_OPT_MSK 2
+#define LCORE_OPT_MAP 3
+
+const char
+eal_short_options[] =
+ "b:" /* pci-blacklist */
+ "c:" /* coremask */
+ "s:" /* service coremask */
+ "d:" /* driver */
+ "h" /* help */
+ "l:" /* corelist */
+ "S:" /* service corelist */
+ "m:" /* memory size */
+ "n:" /* memory channels */
+ "r:" /* memory ranks */
+ "v" /* version */
+ "w:" /* pci-whitelist */
+ ;
+
+const struct option
+eal_long_options[] = {
+ {OPT_BASE_VIRTADDR, 1, NULL, OPT_BASE_VIRTADDR_NUM },
+ {OPT_CREATE_UIO_DEV, 0, NULL, OPT_CREATE_UIO_DEV_NUM },
+ {OPT_FILE_PREFIX, 1, NULL, OPT_FILE_PREFIX_NUM },
+ {OPT_HELP, 0, NULL, OPT_HELP_NUM },
+ {OPT_HUGE_DIR, 1, NULL, OPT_HUGE_DIR_NUM },
+ {OPT_HUGE_UNLINK, 0, NULL, OPT_HUGE_UNLINK_NUM },
+ {OPT_IOVA_MODE, 1, NULL, OPT_IOVA_MODE_NUM },
+ {OPT_LCORES, 1, NULL, OPT_LCORES_NUM },
+ {OPT_LOG_LEVEL, 1, NULL, OPT_LOG_LEVEL_NUM },
+ {OPT_TRACE, 1, NULL, OPT_TRACE_NUM },
+ {OPT_TRACE_DIR, 1, NULL, OPT_TRACE_DIR_NUM },
+ {OPT_TRACE_BUF_SIZE, 1, NULL, OPT_TRACE_BUF_SIZE_NUM },
+ {OPT_TRACE_MODE, 1, NULL, OPT_TRACE_MODE_NUM },
+ {OPT_MASTER_LCORE, 1, NULL, OPT_MASTER_LCORE_NUM },
+ {OPT_MBUF_POOL_OPS_NAME, 1, NULL, OPT_MBUF_POOL_OPS_NAME_NUM},
+ {OPT_NO_HPET, 0, NULL, OPT_NO_HPET_NUM },
+ {OPT_NO_HUGE, 0, NULL, OPT_NO_HUGE_NUM },
+ {OPT_NO_PCI, 0, NULL, OPT_NO_PCI_NUM },
+ {OPT_NO_SHCONF, 0, NULL, OPT_NO_SHCONF_NUM },
+ {OPT_IN_MEMORY, 0, NULL, OPT_IN_MEMORY_NUM },
+ {OPT_PCI_BLACKLIST, 1, NULL, OPT_PCI_BLACKLIST_NUM },
+ {OPT_PCI_WHITELIST, 1, NULL, OPT_PCI_WHITELIST_NUM },
+ {OPT_PROC_TYPE, 1, NULL, OPT_PROC_TYPE_NUM },
+ {OPT_SOCKET_MEM, 1, NULL, OPT_SOCKET_MEM_NUM },
+ {OPT_SOCKET_LIMIT, 1, NULL, OPT_SOCKET_LIMIT_NUM },
+ {OPT_SYSLOG, 1, NULL, OPT_SYSLOG_NUM },
+ {OPT_VDEV, 1, NULL, OPT_VDEV_NUM },
+ {OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM },
+ {OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM },
+ {OPT_LEGACY_MEM, 0, NULL, OPT_LEGACY_MEM_NUM },
+ {OPT_SINGLE_FILE_SEGMENTS, 0, NULL, OPT_SINGLE_FILE_SEGMENTS_NUM},
+ {OPT_MATCH_ALLOCATIONS, 0, NULL, OPT_MATCH_ALLOCATIONS_NUM},
+ {OPT_TELEMETRY, 0, NULL, OPT_TELEMETRY_NUM },
+ {OPT_NO_TELEMETRY, 0, NULL, OPT_NO_TELEMETRY_NUM },
+ {0, 0, NULL, 0 }
+};
+
+TAILQ_HEAD(shared_driver_list, shared_driver);
+
+/* Definition for shared object drivers. */
+struct shared_driver {
+ TAILQ_ENTRY(shared_driver) next;
+
+ char name[PATH_MAX];
+ void* lib_handle;
+};
+
+/* List of external loadable drivers */
+static struct shared_driver_list solib_list =
+TAILQ_HEAD_INITIALIZER(solib_list);
+
+/* Default path of external loadable drivers */
+static const char *default_solib_dir = RTE_EAL_PMD_PATH;
+
+/*
+ * Stringified version of solib path used by dpdk-pmdinfo.py
+ * Note: PLEASE DO NOT ALTER THIS without making a corresponding
+ * change to usertools/dpdk-pmdinfo.py
+ */
+static const char dpdk_solib_path[] __rte_used =
+"DPDK_PLUGIN_PATH=" RTE_EAL_PMD_PATH;
+
+TAILQ_HEAD(device_option_list, device_option);
+
+struct device_option {
+ TAILQ_ENTRY(device_option) next;
+
+ enum rte_devtype type;
+ char arg[];
+};
+
+static struct device_option_list devopt_list =
+TAILQ_HEAD_INITIALIZER(devopt_list);
+
+static int master_lcore_parsed;
+static int mem_parsed;
+static int core_parsed;
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+static char **eal_args;
+static char **eal_app_args;
+
+#define EAL_PARAM_REQ "/eal/params"
+#define EAL_APP_PARAM_REQ "/eal/app_params"
+
+/* callback handler for telemetry library to report out EAL flags */
+int
+handle_eal_info_request(const char *cmd, const char *params __rte_unused,
+ struct rte_tel_data *d)
+{
+ char **args;
+ int used = 0;
+ int i = 0;
+
+ if (strcmp(cmd, EAL_PARAM_REQ) == 0)
+ args = eal_args;
+ else
+ args = eal_app_args;
+
+ rte_tel_data_start_array(d, RTE_TEL_STRING_VAL);
+ if (args == NULL || args[0] == NULL)
+ return 0;
+
+ for ( ; args[i] != NULL; i++)
+ used = rte_tel_data_add_array_string(d, args[i]);
+ return used;
+}
+
+int
+eal_save_args(int argc, char **argv)
+{
+ int i, j;
+
+ rte_telemetry_register_cmd(EAL_PARAM_REQ, handle_eal_info_request,
+ "Returns EAL commandline parameters used. Takes no parameters");
+ rte_telemetry_register_cmd(EAL_APP_PARAM_REQ, handle_eal_info_request,
+ "Returns app commandline parameters used. Takes no parameters");
+
+ /* clone argv to report out later. We overprovision, but
+ * this does not waste huge amounts of memory
+ */
+ eal_args = calloc(argc + 1, sizeof(*eal_args));
+ if (eal_args == NULL)
+ return -1;
+
+ for (i = 0; i < argc; i++) {
+ eal_args[i] = strdup(argv[i]);
+ if (strcmp(argv[i], "--") == 0)
+ break;
+ }
+ eal_args[i++] = NULL; /* always finish with NULL */
+
+ /* allow reporting of any app args we know about too */
+ if (i >= argc)
+ return 0;
+
+ eal_app_args = calloc(argc - i + 1, sizeof(*eal_args));
+ if (eal_app_args == NULL)
+ return -1;
+
+ for (j = 0; i < argc; j++, i++)
+ eal_app_args[j] = strdup(argv[i]);
+ eal_app_args[j] = NULL;
+
+ return 0;
+}
+#endif
+
+static int
+eal_option_device_add(enum rte_devtype type, const char *optarg)
+{
+ struct device_option *devopt;
+ size_t optlen;
+ int ret;
+
+ optlen = strlen(optarg) + 1;
+ devopt = calloc(1, sizeof(*devopt) + optlen);
+ if (devopt == NULL) {
+ RTE_LOG(ERR, EAL, "Unable to allocate device option\n");
+ return -ENOMEM;
+ }
+
+ devopt->type = type;
+ ret = strlcpy(devopt->arg, optarg, optlen);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Unable to copy device option\n");
+ free(devopt);
+ return -EINVAL;
+ }
+ TAILQ_INSERT_TAIL(&devopt_list, devopt, next);
+ return 0;
+}
+
+int
+eal_option_device_parse(void)
+{
+ struct device_option *devopt;
+ void *tmp;
+ int ret = 0;
+
+ TAILQ_FOREACH_SAFE(devopt, &devopt_list, next, tmp) {
+ if (ret == 0) {
+ ret = rte_devargs_add(devopt->type, devopt->arg);
+ if (ret)
+ RTE_LOG(ERR, EAL, "Unable to parse device '%s'\n",
+ devopt->arg);
+ }
+ TAILQ_REMOVE(&devopt_list, devopt, next);
+ free(devopt);
+ }
+ return ret;
+}
+
+const char *
+eal_get_hugefile_prefix(void)
+{
+ if (internal_config.hugefile_prefix != NULL)
+ return internal_config.hugefile_prefix;
+ return HUGEFILE_PREFIX_DEFAULT;
+}
+
+void
+eal_reset_internal_config(struct internal_config *internal_cfg)
+{
+ int i;
+
+ internal_cfg->memory = 0;
+ internal_cfg->force_nrank = 0;
+ internal_cfg->force_nchannel = 0;
+ internal_cfg->hugefile_prefix = NULL;
+ internal_cfg->hugepage_dir = NULL;
+ internal_cfg->force_sockets = 0;
+ /* zero out the NUMA config */
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+ internal_cfg->socket_mem[i] = 0;
+ internal_cfg->force_socket_limits = 0;
+ /* zero out the NUMA limits config */
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+ internal_cfg->socket_limit[i] = 0;
+ /* zero out hugedir descriptors */
+ for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) {
+ memset(&internal_cfg->hugepage_info[i], 0,
+ sizeof(internal_cfg->hugepage_info[0]));
+ internal_cfg->hugepage_info[i].lock_descriptor = -1;
+ }
+ internal_cfg->base_virtaddr = 0;
+
+#ifdef LOG_DAEMON
+ internal_cfg->syslog_facility = LOG_DAEMON;
+#endif
+
+ /* if set to NONE, interrupt mode is determined automatically */
+ internal_cfg->vfio_intr_mode = RTE_INTR_MODE_NONE;
+
+#ifdef RTE_LIBEAL_USE_HPET
+ internal_cfg->no_hpet = 0;
+#else
+ internal_cfg->no_hpet = 1;
+#endif
+ internal_cfg->vmware_tsc_map = 0;
+ internal_cfg->create_uio_dev = 0;
+ internal_cfg->iova_mode = RTE_IOVA_DC;
+ internal_cfg->user_mbuf_pool_ops_name = NULL;
+ CPU_ZERO(&internal_cfg->ctrl_cpuset);
+ internal_cfg->init_complete = 0;
+}
+
+static int
+eal_plugin_add(const char *path)
+{
+ struct shared_driver *solib;
+
+ solib = malloc(sizeof(*solib));
+ if (solib == NULL) {
+ RTE_LOG(ERR, EAL, "malloc(solib) failed\n");
+ return -1;
+ }
+ memset(solib, 0, sizeof(*solib));
+ strlcpy(solib->name, path, PATH_MAX-1);
+ solib->name[PATH_MAX-1] = 0;
+ TAILQ_INSERT_TAIL(&solib_list, solib, next);
+
+ return 0;
+}
+
+static int
+eal_plugindir_init(const char *path)
+{
+ DIR *d = NULL;
+ struct dirent *dent = NULL;
+ char sopath[PATH_MAX];
+
+ if (path == NULL || *path == '\0')
+ return 0;
+
+ d = opendir(path);
+ if (d == NULL) {
+ RTE_LOG(ERR, EAL, "failed to open directory %s: %s\n",
+ path, strerror(errno));
+ return -1;
+ }
+
+ while ((dent = readdir(d)) != NULL) {
+ struct stat sb;
+
+ snprintf(sopath, sizeof(sopath), "%s/%s", path, dent->d_name);
+
+ if (!(stat(sopath, &sb) == 0 && S_ISREG(sb.st_mode)))
+ continue;
+
+ if (eal_plugin_add(sopath) == -1)
+ break;
+ }
+
+ closedir(d);
+ /* XXX this ignores failures from readdir() itself */
+ return (dent == NULL) ? 0 : -1;
+}
+
+int
+eal_plugins_init(void)
+{
+#ifndef RTE_EXEC_ENV_WINDOWS
+ struct shared_driver *solib = NULL;
+ struct stat sb;
+
+ if (*default_solib_dir != '\0' && stat(default_solib_dir, &sb) == 0 &&
+ S_ISDIR(sb.st_mode))
+ eal_plugin_add(default_solib_dir);
+
+ TAILQ_FOREACH(solib, &solib_list, next) {
+
+ if (stat(solib->name, &sb) == 0 && S_ISDIR(sb.st_mode)) {
+ if (eal_plugindir_init(solib->name) == -1) {
+ RTE_LOG(ERR, EAL,
+ "Cannot init plugin directory %s\n",
+ solib->name);
+ return -1;
+ }
+ } else {
+ RTE_LOG(DEBUG, EAL, "open shared lib %s\n",
+ solib->name);
+ solib->lib_handle = dlopen(solib->name, RTLD_NOW);
+ if (solib->lib_handle == NULL) {
+ RTE_LOG(ERR, EAL, "%s\n", dlerror());
+ return -1;
+ }
+ }
+
+ }
+ return 0;
+#endif
+}
+
+/*
+ * Parse the coremask given as argument (hexadecimal string) and fill
+ * the global configuration (core role and core count) with the parsed
+ * value.
+ */
+static int xdigit2val(unsigned char c)
+{
+ int val;
+
+ if (isdigit(c))
+ val = c - '0';
+ else if (isupper(c))
+ val = c - 'A' + 10;
+ else
+ val = c - 'a' + 10;
+ return val;
+}
+
+static int
+eal_parse_service_coremask(const char *coremask)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+ int i, j, idx = 0;
+ unsigned int count = 0;
+ char c;
+ int val;
+ uint32_t taken_lcore_count = 0;
+
+ if (coremask == NULL)
+ return -1;
+ /* Remove all blank characters ahead and after .
+ * Remove 0x/0X if exists.
+ */
+ while (isblank(*coremask))
+ coremask++;
+ if (coremask[0] == '0' && ((coremask[1] == 'x')
+ || (coremask[1] == 'X')))
+ coremask += 2;
+ i = strlen(coremask);
+ while ((i > 0) && isblank(coremask[i - 1]))
+ i--;
+
+ if (i == 0)
+ return -1;
+
+ for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) {
+ c = coremask[i];
+ if (isxdigit(c) == 0) {
+ /* invalid characters */
+ return -1;
+ }
+ val = xdigit2val(c);
+ for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE;
+ j++, idx++) {
+ if ((1 << j) & val) {
+ /* handle master lcore already parsed */
+ uint32_t lcore = idx;
+ if (master_lcore_parsed &&
+ cfg->master_lcore == lcore) {
+ RTE_LOG(ERR, EAL,
+ "lcore %u is master lcore, cannot use as service core\n",
+ idx);
+ return -1;
+ }
+
+ if (eal_cpu_detected(idx) == 0) {
+ RTE_LOG(ERR, EAL,
+ "lcore %u unavailable\n", idx);
+ return -1;
+ }
+
+ if (cfg->lcore_role[idx] == ROLE_RTE)
+ taken_lcore_count++;
+
+ lcore_config[idx].core_role = ROLE_SERVICE;
+ count++;
+ }
+ }
+ }
+
+ for (; i >= 0; i--)
+ if (coremask[i] != '0')
+ return -1;
+
+ for (; idx < RTE_MAX_LCORE; idx++)
+ lcore_config[idx].core_index = -1;
+
+ if (count == 0)
+ return -1;
+
+ if (core_parsed && taken_lcore_count != count) {
+ RTE_LOG(WARNING, EAL,
+ "Not all service cores are in the coremask. "
+ "Please ensure -c or -l includes service cores\n");
+ }
+
+ cfg->service_lcore_count = count;
+ return 0;
+}
+
+static int
+eal_service_cores_parsed(void)
+{
+ int idx;
+ for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+ if (lcore_config[idx].core_role == ROLE_SERVICE)
+ return 1;
+ }
+ return 0;
+}
+
+static int
+update_lcore_config(int *cores)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+ unsigned int count = 0;
+ unsigned int i;
+ int ret = 0;
+
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if (cores[i] != -1) {
+ if (eal_cpu_detected(i) == 0) {
+ RTE_LOG(ERR, EAL, "lcore %u unavailable\n", i);
+ ret = -1;
+ continue;
+ }
+ cfg->lcore_role[i] = ROLE_RTE;
+ count++;
+ } else {
+ cfg->lcore_role[i] = ROLE_OFF;
+ }
+ lcore_config[i].core_index = cores[i];
+ }
+ if (!ret)
+ cfg->lcore_count = count;
+ return ret;
+}
+
+static int
+eal_parse_coremask(const char *coremask, int *cores)
+{
+ unsigned count = 0;
+ int i, j, idx;
+ int val;
+ char c;
+
+ for (idx = 0; idx < RTE_MAX_LCORE; idx++)
+ cores[idx] = -1;
+ idx = 0;
+
+ /* Remove all blank characters ahead and after .
+ * Remove 0x/0X if exists.
+ */
+ while (isblank(*coremask))
+ coremask++;
+ if (coremask[0] == '0' && ((coremask[1] == 'x')
+ || (coremask[1] == 'X')))
+ coremask += 2;
+ i = strlen(coremask);
+ while ((i > 0) && isblank(coremask[i - 1]))
+ i--;
+ if (i == 0)
+ return -1;
+
+ for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) {
+ c = coremask[i];
+ if (isxdigit(c) == 0) {
+ /* invalid characters */
+ return -1;
+ }
+ val = xdigit2val(c);
+ for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; j++, idx++)
+ {
+ if ((1 << j) & val) {
+ cores[idx] = count;
+ count++;
+ }
+ }
+ }
+ for (; i >= 0; i--)
+ if (coremask[i] != '0')
+ return -1;
+ if (count == 0)
+ return -1;
+ return 0;
+}
+
+static int
+eal_parse_service_corelist(const char *corelist)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+ int i, idx = 0;
+ unsigned count = 0;
+ char *end = NULL;
+ int min, max;
+ uint32_t taken_lcore_count = 0;
+
+ if (corelist == NULL)
+ return -1;
+
+ /* Remove all blank characters ahead and after */
+ while (isblank(*corelist))
+ corelist++;
+ i = strlen(corelist);
+ while ((i > 0) && isblank(corelist[i - 1]))
+ i--;
+
+ /* Get list of cores */
+ min = RTE_MAX_LCORE;
+ do {
+ while (isblank(*corelist))
+ corelist++;
+ if (*corelist == '\0')
+ return -1;
+ errno = 0;
+ idx = strtoul(corelist, &end, 10);
+ if (errno || end == NULL)
+ return -1;
+ while (isblank(*end))
+ end++;
+ if (*end == '-') {
+ min = idx;
+ } else if ((*end == ',') || (*end == '\0')) {
+ max = idx;
+ if (min == RTE_MAX_LCORE)
+ min = idx;
+ for (idx = min; idx <= max; idx++) {
+ if (cfg->lcore_role[idx] != ROLE_SERVICE) {
+ /* handle master lcore already parsed */
+ uint32_t lcore = idx;
+ if (cfg->master_lcore == lcore &&
+ master_lcore_parsed) {
+ RTE_LOG(ERR, EAL,
+ "Error: lcore %u is master lcore, cannot use as service core\n",
+ idx);
+ return -1;
+ }
+ if (cfg->lcore_role[idx] == ROLE_RTE)
+ taken_lcore_count++;
+
+ lcore_config[idx].core_role =
+ ROLE_SERVICE;
+ count++;
+ }
+ }
+ min = RTE_MAX_LCORE;
+ } else
+ return -1;
+ corelist = end + 1;
+ } while (*end != '\0');
+
+ if (count == 0)
+ return -1;
+
+ if (core_parsed && taken_lcore_count != count) {
+ RTE_LOG(WARNING, EAL,
+ "Not all service cores were in the coremask. "
+ "Please ensure -c or -l includes service cores\n");
+ }
+
+ return 0;
+}
+
+static int
+eal_parse_corelist(const char *corelist, int *cores)
+{
+ unsigned count = 0;
+ char *end = NULL;
+ int min, max;
+ int idx;
+
+ for (idx = 0; idx < RTE_MAX_LCORE; idx++)
+ cores[idx] = -1;
+
+ /* Remove all blank characters ahead */
+ while (isblank(*corelist))
+ corelist++;
+
+ /* Get list of cores */
+ min = RTE_MAX_LCORE;
+ do {
+ while (isblank(*corelist))
+ corelist++;
+ if (*corelist == '\0')
+ return -1;
+ errno = 0;
+ idx = strtol(corelist, &end, 10);
+ if (errno || end == NULL)
+ return -1;
+ if (idx < 0 || idx >= RTE_MAX_LCORE)
+ return -1;
+ while (isblank(*end))
+ end++;
+ if (*end == '-') {
+ min = idx;
+ } else if ((*end == ',') || (*end == '\0')) {
+ max = idx;
+ if (min == RTE_MAX_LCORE)
+ min = idx;
+ for (idx = min; idx <= max; idx++) {
+ if (cores[idx] == -1) {
+ cores[idx] = count;
+ count++;
+ }
+ }
+ min = RTE_MAX_LCORE;
+ } else
+ return -1;
+ corelist = end + 1;
+ } while (*end != '\0');
+
+ if (count == 0)
+ return -1;
+ return 0;
+}
+
+/* Changes the lcore id of the master thread */
+static int
+eal_parse_master_lcore(const char *arg)
+{
+ char *parsing_end;
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ errno = 0;
+ cfg->master_lcore = (uint32_t) strtol(arg, &parsing_end, 0);
+ if (errno || parsing_end[0] != 0)
+ return -1;
+ if (cfg->master_lcore >= RTE_MAX_LCORE)
+ return -1;
+ master_lcore_parsed = 1;
+
+ /* ensure master core is not used as service core */
+ if (lcore_config[cfg->master_lcore].core_role == ROLE_SERVICE) {
+ RTE_LOG(ERR, EAL,
+ "Error: Master lcore is used as a service core\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Parse elem, the elem could be single number/range or '(' ')' group
+ * 1) A single number elem, it's just a simple digit. e.g. 9
+ * 2) A single range elem, two digits with a '-' between. e.g. 2-6
+ * 3) A group elem, combines multiple 1) or 2) with '( )'. e.g (0,2-4,6)
+ * Within group elem, '-' used for a range separator;
+ * ',' used for a single number.
+ */
+static int
+eal_parse_set(const char *input, rte_cpuset_t *set)
+{
+ unsigned idx;
+ const char *str = input;
+ char *end = NULL;
+ unsigned min, max;
+
+ CPU_ZERO(set);
+
+ while (isblank(*str))
+ str++;
+
+ /* only digit or left bracket is qualify for start point */
+ if ((!isdigit(*str) && *str != '(') || *str == '\0')
+ return -1;
+
+ /* process single number or single range of number */
+ if (*str != '(') {
+ errno = 0;
+ idx = strtoul(str, &end, 10);
+ if (errno || end == NULL || idx >= CPU_SETSIZE)
+ return -1;
+ else {
+ while (isblank(*end))
+ end++;
+
+ min = idx;
+ max = idx;
+ if (*end == '-') {
+ /* process single <number>-<number> */
+ end++;
+ while (isblank(*end))
+ end++;
+ if (!isdigit(*end))
+ return -1;
+
+ errno = 0;
+ idx = strtoul(end, &end, 10);
+ if (errno || end == NULL || idx >= CPU_SETSIZE)
+ return -1;
+ max = idx;
+ while (isblank(*end))
+ end++;
+ if (*end != ',' && *end != '\0')
+ return -1;
+ }
+
+ if (*end != ',' && *end != '\0' &&
+ *end != '@')
+ return -1;
+
+ for (idx = RTE_MIN(min, max);
+ idx <= RTE_MAX(min, max); idx++)
+ CPU_SET(idx, set);
+
+ return end - input;
+ }
+ }
+
+ /* process set within bracket */
+ str++;
+ while (isblank(*str))
+ str++;
+ if (*str == '\0')
+ return -1;
+
+ min = RTE_MAX_LCORE;
+ do {
+
+ /* go ahead to the first digit */
+ while (isblank(*str))
+ str++;
+ if (!isdigit(*str))
+ return -1;
+
+ /* get the digit value */
+ errno = 0;
+ idx = strtoul(str, &end, 10);
+ if (errno || end == NULL || idx >= CPU_SETSIZE)
+ return -1;
+
+ /* go ahead to separator '-',',' and ')' */
+ while (isblank(*end))
+ end++;
+ if (*end == '-') {
+ if (min == RTE_MAX_LCORE)
+ min = idx;
+ else /* avoid continuous '-' */
+ return -1;
+ } else if ((*end == ',') || (*end == ')')) {
+ max = idx;
+ if (min == RTE_MAX_LCORE)
+ min = idx;
+ for (idx = RTE_MIN(min, max);
+ idx <= RTE_MAX(min, max); idx++)
+ CPU_SET(idx, set);
+
+ min = RTE_MAX_LCORE;
+ } else
+ return -1;
+
+ str = end + 1;
+ } while (*end != '\0' && *end != ')');
+
+ /*
+ * to avoid failure that tail blank makes end character check fail
+ * in eal_parse_lcores( )
+ */
+ while (isblank(*str))
+ str++;
+
+ return str - input;
+}
+
+static int
+check_cpuset(rte_cpuset_t *set)
+{
+ unsigned int idx;
+
+ for (idx = 0; idx < CPU_SETSIZE; idx++) {
+ if (!CPU_ISSET(idx, set))
+ continue;
+
+ if (eal_cpu_detected(idx) == 0) {
+ RTE_LOG(ERR, EAL, "core %u "
+ "unavailable\n", idx);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * The format pattern: --lcores='<lcores[@cpus]>[<,lcores[@cpus]>...]'
+ * lcores, cpus could be a single digit/range or a group.
+ * '(' and ')' are necessary if it's a group.
+ * If not supply '@cpus', the value of cpus uses the same as lcores.
+ * e.g. '1,2@(5-7),(3-5)@(0,2),(0,6),7-8' means start 9 EAL thread as below
+ * lcore 0 runs on cpuset 0x41 (cpu 0,6)
+ * lcore 1 runs on cpuset 0x2 (cpu 1)
+ * lcore 2 runs on cpuset 0xe0 (cpu 5,6,7)
+ * lcore 3,4,5 runs on cpuset 0x5 (cpu 0,2)
+ * lcore 6 runs on cpuset 0x41 (cpu 0,6)
+ * lcore 7 runs on cpuset 0x80 (cpu 7)
+ * lcore 8 runs on cpuset 0x100 (cpu 8)
+ */
+static int
+eal_parse_lcores(const char *lcores)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+ rte_cpuset_t lcore_set;
+ unsigned int set_count;
+ unsigned idx = 0;
+ unsigned count = 0;
+ const char *lcore_start = NULL;
+ const char *end = NULL;
+ int offset;
+ rte_cpuset_t cpuset;
+ int lflags;
+ int ret = -1;
+
+ if (lcores == NULL)
+ return -1;
+
+ /* Remove all blank characters ahead and after */
+ while (isblank(*lcores))
+ lcores++;
+
+ CPU_ZERO(&cpuset);
+
+ /* Reset lcore config */
+ for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+ cfg->lcore_role[idx] = ROLE_OFF;
+ lcore_config[idx].core_index = -1;
+ CPU_ZERO(&lcore_config[idx].cpuset);
+ }
+
+ /* Get list of cores */
+ do {
+ while (isblank(*lcores))
+ lcores++;
+ if (*lcores == '\0')
+ goto err;
+
+ lflags = 0;
+
+ /* record lcore_set start point */
+ lcore_start = lcores;
+
+ /* go across a complete bracket */
+ if (*lcore_start == '(') {
+ lcores += strcspn(lcores, ")");
+ if (*lcores++ == '\0')
+ goto err;
+ }
+
+ /* scan the separator '@', ','(next) or '\0'(finish) */
+ lcores += strcspn(lcores, "@,");
+
+ if (*lcores == '@') {
+ /* explicit assign cpuset and update the end cursor */
+ offset = eal_parse_set(lcores + 1, &cpuset);
+ if (offset < 0)
+ goto err;
+ end = lcores + 1 + offset;
+ } else { /* ',' or '\0' */
+ /* haven't given cpuset, current loop done */
+ end = lcores;
+
+ /* go back to check <number>-<number> */
+ offset = strcspn(lcore_start, "(-");
+ if (offset < (end - lcore_start) &&
+ *(lcore_start + offset) != '(')
+ lflags = 1;
+ }
+
+ if (*end != ',' && *end != '\0')
+ goto err;
+
+ /* parse lcore_set from start point */
+ if (eal_parse_set(lcore_start, &lcore_set) < 0)
+ goto err;
+
+ /* without '@', by default using lcore_set as cpuset */
+ if (*lcores != '@')
+ rte_memcpy(&cpuset, &lcore_set, sizeof(cpuset));
+
+ set_count = CPU_COUNT(&lcore_set);
+ /* start to update lcore_set */
+ for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+ if (!CPU_ISSET(idx, &lcore_set))
+ continue;
+ set_count--;
+
+ if (cfg->lcore_role[idx] != ROLE_RTE) {
+ lcore_config[idx].core_index = count;
+ cfg->lcore_role[idx] = ROLE_RTE;
+ count++;
+ }
+
+ if (lflags) {
+ CPU_ZERO(&cpuset);
+ CPU_SET(idx, &cpuset);
+ }
+
+ if (check_cpuset(&cpuset) < 0)
+ goto err;
+ rte_memcpy(&lcore_config[idx].cpuset, &cpuset,
+ sizeof(rte_cpuset_t));
+ }
+
+ /* some cores from the lcore_set can't be handled by EAL */
+ if (set_count != 0)
+ goto err;
+
+ lcores = end + 1;
+ } while (*end != '\0');
+
+ if (count == 0)
+ goto err;
+
+ cfg->lcore_count = count;
+ ret = 0;
+
+err:
+
+ return ret;
+}
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+static int
+eal_parse_syslog(const char *facility, struct internal_config *conf)
+{
+ int i;
+ static const struct {
+ const char *name;
+ int value;
+ } map[] = {
+ { "auth", LOG_AUTH },
+ { "cron", LOG_CRON },
+ { "daemon", LOG_DAEMON },
+ { "ftp", LOG_FTP },
+ { "kern", LOG_KERN },
+ { "lpr", LOG_LPR },
+ { "mail", LOG_MAIL },
+ { "news", LOG_NEWS },
+ { "syslog", LOG_SYSLOG },
+ { "user", LOG_USER },
+ { "uucp", LOG_UUCP },
+ { "local0", LOG_LOCAL0 },
+ { "local1", LOG_LOCAL1 },
+ { "local2", LOG_LOCAL2 },
+ { "local3", LOG_LOCAL3 },
+ { "local4", LOG_LOCAL4 },
+ { "local5", LOG_LOCAL5 },
+ { "local6", LOG_LOCAL6 },
+ { "local7", LOG_LOCAL7 },
+ { NULL, 0 }
+ };
+
+ for (i = 0; map[i].name; i++) {
+ if (!strcmp(facility, map[i].name)) {
+ conf->syslog_facility = map[i].value;
+ return 0;
+ }
+ }
+ return -1;
+}
+#endif
+
+static int
+eal_parse_log_priority(const char *level)
+{
+ static const char * const levels[] = {
+ [RTE_LOG_EMERG] = "emergency",
+ [RTE_LOG_ALERT] = "alert",
+ [RTE_LOG_CRIT] = "critical",
+ [RTE_LOG_ERR] = "error",
+ [RTE_LOG_WARNING] = "warning",
+ [RTE_LOG_NOTICE] = "notice",
+ [RTE_LOG_INFO] = "info",
+ [RTE_LOG_DEBUG] = "debug",
+ };
+ size_t len = strlen(level);
+ unsigned long tmp;
+ char *end;
+ unsigned int i;
+
+ if (len == 0)
+ return -1;
+
+ /* look for named values, skip 0 which is not a valid level */
+ for (i = 1; i < RTE_DIM(levels); i++) {
+ if (strncmp(levels[i], level, len) == 0)
+ return i;
+ }
+
+ /* not a string, maybe it is numeric */
+ errno = 0;
+ tmp = strtoul(level, &end, 0);
+
+ /* check for errors */
+ if (errno != 0 || end == NULL || *end != '\0' ||
+ tmp >= UINT32_MAX)
+ return -1;
+
+ return tmp;
+}
+
+static int
+eal_parse_log_level(const char *arg)
+{
+ const char *pattern = NULL;
+ const char *regex = NULL;
+ char *str, *level;
+ int priority;
+
+ str = strdup(arg);
+ if (str == NULL)
+ return -1;
+
+ if ((level = strchr(str, ','))) {
+ regex = str;
+ *level++ = '\0';
+ } else if ((level = strchr(str, ':'))) {
+ pattern = str;
+ *level++ = '\0';
+ } else {
+ level = str;
+ }
+
+ priority = eal_parse_log_priority(level);
+ if (priority < 0) {
+ fprintf(stderr, "invalid log priority: %s\n", level);
+ goto fail;
+ }
+
+ if (regex) {
+ if (rte_log_set_level_regexp(regex, priority) < 0) {
+ fprintf(stderr, "cannot set log level %s,%d\n",
+ regex, priority);
+ goto fail;
+ }
+ if (rte_log_save_regexp(regex, priority) < 0)
+ goto fail;
+ } else if (pattern) {
+ if (rte_log_set_level_pattern(pattern, priority) < 0) {
+ fprintf(stderr, "cannot set log level %s:%d\n",
+ pattern, priority);
+ goto fail;
+ }
+ if (rte_log_save_pattern(pattern, priority) < 0)
+ goto fail;
+ } else {
+ rte_log_set_global_level(priority);
+ }
+
+ free(str);
+ return 0;
+
+fail:
+ free(str);
+ return -1;
+}
+
+static enum rte_proc_type_t
+eal_parse_proc_type(const char *arg)
+{
+ if (strncasecmp(arg, "primary", sizeof("primary")) == 0)
+ return RTE_PROC_PRIMARY;
+ if (strncasecmp(arg, "secondary", sizeof("secondary")) == 0)
+ return RTE_PROC_SECONDARY;
+ if (strncasecmp(arg, "auto", sizeof("auto")) == 0)
+ return RTE_PROC_AUTO;
+
+ return RTE_PROC_INVALID;
+}
+
+static int
+eal_parse_iova_mode(const char *name)
+{
+ int mode;
+
+ if (name == NULL)
+ return -1;
+
+ if (!strcmp("pa", name))
+ mode = RTE_IOVA_PA;
+ else if (!strcmp("va", name))
+ mode = RTE_IOVA_VA;
+ else
+ return -1;
+
+ internal_config.iova_mode = mode;
+ return 0;
+}
+
+static int
+eal_parse_base_virtaddr(const char *arg)
+{
+ char *end;
+ uint64_t addr;
+
+ errno = 0;
+ addr = strtoull(arg, &end, 16);
+
+ /* check for errors */
+ if ((errno != 0) || (arg[0] == '\0') || end == NULL || (*end != '\0'))
+ return -1;
+
+ /* make sure we don't exceed 32-bit boundary on 32-bit target */
+#ifndef RTE_ARCH_64
+ if (addr >= UINTPTR_MAX)
+ return -1;
+#endif
+
+ /* align the addr on 16M boundary, 16MB is the minimum huge page
+ * size on IBM Power architecture. If the addr is aligned to 16MB,
+ * it can align to 2MB for x86. So this alignment can also be used
+ * on x86 and other architectures.
+ */
+ internal_config.base_virtaddr =
+ RTE_PTR_ALIGN_CEIL((uintptr_t)addr, (size_t)RTE_PGSIZE_16M);
+
+ return 0;
+}
+
+/* caller is responsible for freeing the returned string */
+static char *
+available_cores(void)
+{
+ char *str = NULL;
+ int previous;
+ int sequence;
+ char *tmp;
+ int idx;
+
+ /* find the first available cpu */
+ for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+ if (eal_cpu_detected(idx) == 0)
+ continue;
+ break;
+ }
+ if (idx >= RTE_MAX_LCORE)
+ return NULL;
+
+ /* first sequence */
+ if (asprintf(&str, "%d", idx) < 0)
+ return NULL;
+ previous = idx;
+ sequence = 0;
+
+ for (idx++ ; idx < RTE_MAX_LCORE; idx++) {
+ if (eal_cpu_detected(idx) == 0)
+ continue;
+
+ if (idx == previous + 1) {
+ previous = idx;
+ sequence = 1;
+ continue;
+ }
+
+ /* finish current sequence */
+ if (sequence) {
+ if (asprintf(&tmp, "%s-%d", str, previous) < 0) {
+ free(str);
+ return NULL;
+ }
+ free(str);
+ str = tmp;
+ }
+
+ /* new sequence */
+ if (asprintf(&tmp, "%s,%d", str, idx) < 0) {
+ free(str);
+ return NULL;
+ }
+ free(str);
+ str = tmp;
+ previous = idx;
+ sequence = 0;
+ }
+
+ /* finish last sequence */
+ if (sequence) {
+ if (asprintf(&tmp, "%s-%d", str, previous) < 0) {
+ free(str);
+ return NULL;
+ }
+ free(str);
+ str = tmp;
+ }
+
+ return str;
+}
+
+int
+eal_parse_common_option(int opt, const char *optarg,
+ struct internal_config *conf)
+{
+ static int b_used;
+ static int w_used;
+
+ switch (opt) {
+ /* blacklist */
+ case 'b':
+ if (w_used)
+ goto bw_used;
+ if (eal_option_device_add(RTE_DEVTYPE_BLACKLISTED_PCI,
+ optarg) < 0) {
+ return -1;
+ }
+ b_used = 1;
+ break;
+ /* whitelist */
+ case 'w':
+ if (b_used)
+ goto bw_used;
+ if (eal_option_device_add(RTE_DEVTYPE_WHITELISTED_PCI,
+ optarg) < 0) {
+ return -1;
+ }
+ w_used = 1;
+ break;
+ /* coremask */
+ case 'c': {
+ int lcore_indexes[RTE_MAX_LCORE];
+
+ if (eal_service_cores_parsed())
+ RTE_LOG(WARNING, EAL,
+ "Service cores parsed before dataplane cores. Please ensure -c is before -s or -S\n");
+ if (eal_parse_coremask(optarg, lcore_indexes) < 0) {
+ RTE_LOG(ERR, EAL, "invalid coremask syntax\n");
+ return -1;
+ }
+ if (update_lcore_config(lcore_indexes) < 0) {
+ char *available = available_cores();
+
+ RTE_LOG(ERR, EAL,
+ "invalid coremask, please check specified cores are part of %s\n",
+ available);
+ free(available);
+ return -1;
+ }
+
+ if (core_parsed) {
+ RTE_LOG(ERR, EAL, "Option -c is ignored, because (%s) is set!\n",
+ (core_parsed == LCORE_OPT_LST) ? "-l" :
+ (core_parsed == LCORE_OPT_MAP) ? "--lcore" :
+ "-c");
+ return -1;
+ }
+
+ core_parsed = LCORE_OPT_MSK;
+ break;
+ }
+ /* corelist */
+ case 'l': {
+ int lcore_indexes[RTE_MAX_LCORE];
+
+ if (eal_service_cores_parsed())
+ RTE_LOG(WARNING, EAL,
+ "Service cores parsed before dataplane cores. Please ensure -l is before -s or -S\n");
+
+ if (eal_parse_corelist(optarg, lcore_indexes) < 0) {
+ RTE_LOG(ERR, EAL, "invalid core list syntax\n");
+ return -1;
+ }
+ if (update_lcore_config(lcore_indexes) < 0) {
+ char *available = available_cores();
+
+ RTE_LOG(ERR, EAL,
+ "invalid core list, please check specified cores are part of %s\n",
+ available);
+ free(available);
+ return -1;
+ }
+
+ if (core_parsed) {
+ RTE_LOG(ERR, EAL, "Option -l is ignored, because (%s) is set!\n",
+ (core_parsed == LCORE_OPT_MSK) ? "-c" :
+ (core_parsed == LCORE_OPT_MAP) ? "--lcore" :
+ "-l");
+ return -1;
+ }
+
+ core_parsed = LCORE_OPT_LST;
+ break;
+ }
+ /* service coremask */
+ case 's':
+ if (eal_parse_service_coremask(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid service coremask\n");
+ return -1;
+ }
+ break;
+ /* service corelist */
+ case 'S':
+ if (eal_parse_service_corelist(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid service core list\n");
+ return -1;
+ }
+ break;
+ /* size of memory */
+ case 'm':
+ conf->memory = atoi(optarg);
+ conf->memory *= 1024ULL;
+ conf->memory *= 1024ULL;
+ mem_parsed = 1;
+ break;
+ /* force number of channels */
+ case 'n':
+ conf->force_nchannel = atoi(optarg);
+ if (conf->force_nchannel == 0) {
+ RTE_LOG(ERR, EAL, "invalid channel number\n");
+ return -1;
+ }
+ break;
+ /* force number of ranks */
+ case 'r':
+ conf->force_nrank = atoi(optarg);
+ if (conf->force_nrank == 0 ||
+ conf->force_nrank > 16) {
+ RTE_LOG(ERR, EAL, "invalid rank number\n");
+ return -1;
+ }
+ break;
+ /* force loading of external driver */
+ case 'd':
+ if (eal_plugin_add(optarg) == -1)
+ return -1;
+ break;
+ case 'v':
+ /* since message is explicitly requested by user, we
+ * write message at highest log level so it can always
+ * be seen
+ * even if info or warning messages are disabled */
+ RTE_LOG(CRIT, EAL, "RTE Version: '%s'\n", rte_version());
+ break;
+
+ /* long options */
+ case OPT_HUGE_UNLINK_NUM:
+ conf->hugepage_unlink = 1;
+ break;
+
+ case OPT_NO_HUGE_NUM:
+ conf->no_hugetlbfs = 1;
+ /* no-huge is legacy mem */
+ conf->legacy_mem = 1;
+ break;
+
+ case OPT_NO_PCI_NUM:
+ conf->no_pci = 1;
+ break;
+
+ case OPT_NO_HPET_NUM:
+ conf->no_hpet = 1;
+ break;
+
+ case OPT_VMWARE_TSC_MAP_NUM:
+ conf->vmware_tsc_map = 1;
+ break;
+
+ case OPT_NO_SHCONF_NUM:
+ conf->no_shconf = 1;
+ break;
+
+ case OPT_IN_MEMORY_NUM:
+ conf->in_memory = 1;
+ /* in-memory is a superset of noshconf and huge-unlink */
+ conf->no_shconf = 1;
+ conf->hugepage_unlink = 1;
+ break;
+
+ case OPT_PROC_TYPE_NUM:
+ conf->process_type = eal_parse_proc_type(optarg);
+ break;
+
+ case OPT_MASTER_LCORE_NUM:
+ if (eal_parse_master_lcore(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameter for --"
+ OPT_MASTER_LCORE "\n");
+ return -1;
+ }
+ break;
+
+ case OPT_VDEV_NUM:
+ if (eal_option_device_add(RTE_DEVTYPE_VIRTUAL,
+ optarg) < 0) {
+ return -1;
+ }
+ break;
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+ case OPT_SYSLOG_NUM:
+ if (eal_parse_syslog(optarg, conf) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_SYSLOG "\n");
+ return -1;
+ }
+ break;
+#endif
+
+ case OPT_LOG_LEVEL_NUM: {
+ if (eal_parse_log_level(optarg) < 0) {
+ RTE_LOG(ERR, EAL,
+ "invalid parameters for --"
+ OPT_LOG_LEVEL "\n");
+ return -1;
+ }
+ break;
+ }
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+ case OPT_TRACE_NUM: {
+ if (eal_trace_args_save(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_TRACE "\n");
+ return -1;
+ }
+ break;
+ }
+
+ case OPT_TRACE_DIR_NUM: {
+ if (eal_trace_dir_args_save(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_TRACE_DIR "\n");
+ return -1;
+ }
+ break;
+ }
+
+ case OPT_TRACE_BUF_SIZE_NUM: {
+ if (eal_trace_bufsz_args_save(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_TRACE_BUF_SIZE "\n");
+ return -1;
+ }
+ break;
+ }
+
+ case OPT_TRACE_MODE_NUM: {
+ if (eal_trace_mode_args_save(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_TRACE_MODE "\n");
+ return -1;
+ }
+ break;
+ }
+#endif /* !RTE_EXEC_ENV_WINDOWS */
+
+ case OPT_LCORES_NUM:
+ if (eal_parse_lcores(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameter for --"
+ OPT_LCORES "\n");
+ return -1;
+ }
+
+ if (core_parsed) {
+ RTE_LOG(ERR, EAL, "Option --lcore is ignored, because (%s) is set!\n",
+ (core_parsed == LCORE_OPT_LST) ? "-l" :
+ (core_parsed == LCORE_OPT_MSK) ? "-c" :
+ "--lcore");
+ return -1;
+ }
+
+ core_parsed = LCORE_OPT_MAP;
+ break;
+ case OPT_LEGACY_MEM_NUM:
+ conf->legacy_mem = 1;
+ break;
+ case OPT_SINGLE_FILE_SEGMENTS_NUM:
+ conf->single_file_segments = 1;
+ break;
+ case OPT_IOVA_MODE_NUM:
+ if (eal_parse_iova_mode(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_IOVA_MODE "\n");
+ return -1;
+ }
+ break;
+ case OPT_BASE_VIRTADDR_NUM:
+ if (eal_parse_base_virtaddr(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameter for --"
+ OPT_BASE_VIRTADDR "\n");
+ return -1;
+ }
+ break;
+ case OPT_TELEMETRY_NUM:
+ break;
+ case OPT_NO_TELEMETRY_NUM:
+ conf->no_telemetry = 1;
+ break;
+
+ /* don't know what to do, leave this to caller */
+ default:
+ return 1;
+
+ }
+
+ return 0;
+bw_used:
+ RTE_LOG(ERR, EAL, "Options blacklist (-b) and whitelist (-w) "
+ "cannot be used at the same time\n");
+ return -1;
+}
+
+static void
+eal_auto_detect_cores(struct rte_config *cfg)
+{
+ unsigned int lcore_id;
+ unsigned int removed = 0;
+ rte_cpuset_t affinity_set;
+
+ if (pthread_getaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+ &affinity_set))
+ CPU_ZERO(&affinity_set);
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (cfg->lcore_role[lcore_id] == ROLE_RTE &&
+ !CPU_ISSET(lcore_id, &affinity_set)) {
+ cfg->lcore_role[lcore_id] = ROLE_OFF;
+ removed++;
+ }
+ }
+
+ cfg->lcore_count -= removed;
+}
+
+static void
+compute_ctrl_threads_cpuset(struct internal_config *internal_cfg)
+{
+ rte_cpuset_t *cpuset = &internal_cfg->ctrl_cpuset;
+ rte_cpuset_t default_set;
+ unsigned int lcore_id;
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_has_role(lcore_id, ROLE_OFF))
+ continue;
+ RTE_CPU_OR(cpuset, cpuset, &lcore_config[lcore_id].cpuset);
+ }
+ RTE_CPU_NOT(cpuset, cpuset);
+
+ if (pthread_getaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+ &default_set))
+ CPU_ZERO(&default_set);
+
+ RTE_CPU_AND(cpuset, cpuset, &default_set);
+
+ /* if no remaining cpu, use master lcore cpu affinity */
+ if (!CPU_COUNT(cpuset)) {
+ memcpy(cpuset, &lcore_config[rte_get_master_lcore()].cpuset,
+ sizeof(*cpuset));
+ }
+}
+
+int
+eal_cleanup_config(struct internal_config *internal_cfg)
+{
+ if (internal_cfg->hugefile_prefix != NULL)
+ free(internal_cfg->hugefile_prefix);
+ if (internal_cfg->hugepage_dir != NULL)
+ free(internal_cfg->hugepage_dir);
+ if (internal_cfg->user_mbuf_pool_ops_name != NULL)
+ free(internal_cfg->user_mbuf_pool_ops_name);
+
+ return 0;
+}
+
+int
+eal_adjust_config(struct internal_config *internal_cfg)
+{
+ int i;
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ if (!core_parsed)
+ eal_auto_detect_cores(cfg);
+
+ if (internal_config.process_type == RTE_PROC_AUTO)
+ internal_config.process_type = eal_proc_type_detect();
+
+ /* default master lcore is the first one */
+ if (!master_lcore_parsed) {
+ cfg->master_lcore = rte_get_next_lcore(-1, 0, 0);
+ if (cfg->master_lcore >= RTE_MAX_LCORE)
+ return -1;
+ lcore_config[cfg->master_lcore].core_role = ROLE_RTE;
+ }
+
+ compute_ctrl_threads_cpuset(internal_cfg);
+
+ /* if no memory amounts were requested, this will result in 0 and
+ * will be overridden later, right after eal_hugepage_info_init() */
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+ internal_cfg->memory += internal_cfg->socket_mem[i];
+
+ return 0;
+}
+
+int
+eal_check_common_options(struct internal_config *internal_cfg)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ if (cfg->lcore_role[cfg->master_lcore] != ROLE_RTE) {
+ RTE_LOG(ERR, EAL, "Master lcore is not enabled for DPDK\n");
+ return -1;
+ }
+
+ if (internal_cfg->process_type == RTE_PROC_INVALID) {
+ RTE_LOG(ERR, EAL, "Invalid process type specified\n");
+ return -1;
+ }
+ if (internal_cfg->hugefile_prefix != NULL &&
+ strlen(internal_cfg->hugefile_prefix) < 1) {
+ RTE_LOG(ERR, EAL, "Invalid length of --" OPT_FILE_PREFIX " option\n");
+ return -1;
+ }
+ if (internal_cfg->hugepage_dir != NULL &&
+ strlen(internal_cfg->hugepage_dir) < 1) {
+ RTE_LOG(ERR, EAL, "Invalid length of --" OPT_HUGE_DIR" option\n");
+ return -1;
+ }
+ if (internal_cfg->user_mbuf_pool_ops_name != NULL &&
+ strlen(internal_cfg->user_mbuf_pool_ops_name) < 1) {
+ RTE_LOG(ERR, EAL, "Invalid length of --" OPT_MBUF_POOL_OPS_NAME" option\n");
+ return -1;
+ }
+ if (index(eal_get_hugefile_prefix(), '%') != NULL) {
+ RTE_LOG(ERR, EAL, "Invalid char, '%%', in --"OPT_FILE_PREFIX" "
+ "option\n");
+ return -1;
+ }
+ if (mem_parsed && internal_cfg->force_sockets == 1) {
+ RTE_LOG(ERR, EAL, "Options -m and --"OPT_SOCKET_MEM" cannot "
+ "be specified at the same time\n");
+ return -1;
+ }
+ if (internal_cfg->no_hugetlbfs && internal_cfg->force_sockets == 1) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_MEM" cannot "
+ "be specified together with --"OPT_NO_HUGE"\n");
+ return -1;
+ }
+ if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink &&
+ !internal_cfg->in_memory) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot "
+ "be specified together with --"OPT_NO_HUGE"\n");
+ return -1;
+ }
+ if (internal_config.force_socket_limits && internal_config.legacy_mem) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_LIMIT
+ " is only supported in non-legacy memory mode\n");
+ }
+ if (internal_cfg->single_file_segments &&
+ internal_cfg->hugepage_unlink &&
+ !internal_cfg->in_memory) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_SINGLE_FILE_SEGMENTS" is "
+ "not compatible with --"OPT_HUGE_UNLINK"\n");
+ return -1;
+ }
+ if (internal_cfg->legacy_mem &&
+ internal_cfg->in_memory) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_LEGACY_MEM" is not compatible "
+ "with --"OPT_IN_MEMORY"\n");
+ return -1;
+ }
+ if (internal_cfg->legacy_mem && internal_cfg->match_allocations) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_LEGACY_MEM" is not compatible "
+ "with --"OPT_MATCH_ALLOCATIONS"\n");
+ return -1;
+ }
+ if (internal_cfg->no_hugetlbfs && internal_cfg->match_allocations) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_NO_HUGE" is not compatible "
+ "with --"OPT_MATCH_ALLOCATIONS"\n");
+ return -1;
+ }
+ if (internal_cfg->legacy_mem && internal_cfg->memory == 0) {
+ RTE_LOG(NOTICE, EAL, "Static memory layout is selected, "
+ "amount of reserved memory can be adjusted with "
+ "-m or --"OPT_SOCKET_MEM"\n");
+ }
+
+ return 0;
+}
+
+void
+eal_common_usage(void)
+{
+ printf("[options]\n\n"
+ "EAL common options:\n"
+ " -c COREMASK Hexadecimal bitmask of cores to run on\n"
+ " -l CORELIST List of cores to run on\n"
+ " The argument format is <c1>[-c2][,c3[-c4],...]\n"
+ " where c1, c2, etc are core indexes between 0 and %d\n"
+ " --"OPT_LCORES" COREMAP Map lcore set to physical cpu set\n"
+ " The argument format is\n"
+ " '<lcores[@cpus]>[<,lcores[@cpus]>...]'\n"
+ " lcores and cpus list are grouped by '(' and ')'\n"
+ " Within the group, '-' is used for range separator,\n"
+ " ',' is used for single number separator.\n"
+ " '( )' can be omitted for single element group,\n"
+ " '@' can be omitted if cpus and lcores have the same value\n"
+ " -s SERVICE COREMASK Hexadecimal bitmask of cores to be used as service cores\n"
+ " --"OPT_MASTER_LCORE" ID Core ID that is used as master\n"
+ " --"OPT_MBUF_POOL_OPS_NAME" Pool ops name for mbuf to use\n"
+ " -n CHANNELS Number of memory channels\n"
+ " -m MB Memory to allocate (see also --"OPT_SOCKET_MEM")\n"
+ " -r RANKS Force number of memory ranks (don't detect)\n"
+ " -b, --"OPT_PCI_BLACKLIST" Add a PCI device in black list.\n"
+ " Prevent EAL from using this PCI device. The argument\n"
+ " format is <domain:bus:devid.func>.\n"
+ " -w, --"OPT_PCI_WHITELIST" Add a PCI device in white list.\n"
+ " Only use the specified PCI devices. The argument format\n"
+ " is <[domain:]bus:devid.func>. This option can be present\n"
+ " several times (once per device).\n"
+ " [NOTE: PCI whitelist cannot be used with -b option]\n"
+ " --"OPT_VDEV" Add a virtual device.\n"
+ " The argument format is <driver><id>[,key=val,...]\n"
+ " (ex: --vdev=net_pcap0,iface=eth2).\n"
+ " --"OPT_IOVA_MODE" Set IOVA mode. 'pa' for IOVA_PA\n"
+ " 'va' for IOVA_VA\n"
+ " -d LIB.so|DIR Add a driver or driver directory\n"
+ " (can be used multiple times)\n"
+ " --"OPT_VMWARE_TSC_MAP" Use VMware TSC map instead of native RDTSC\n"
+ " --"OPT_PROC_TYPE" Type of this process (primary|secondary|auto)\n"
+#ifndef RTE_EXEC_ENV_WINDOWS
+ " --"OPT_SYSLOG" Set syslog facility\n"
+#endif
+ " --"OPT_LOG_LEVEL"=<int> Set global log level\n"
+ " --"OPT_LOG_LEVEL"=<type-match>:<int>\n"
+ " Set specific log level\n"
+#ifndef RTE_EXEC_ENV_WINDOWS
+ " --"OPT_TRACE"=<regex-match>\n"
+ " Enable trace based on regular expression trace name.\n"
+ " By default, the trace is disabled.\n"
+ " User must specify this option to enable trace.\n"
+ " --"OPT_TRACE_DIR"=<directory path>\n"
+ " Specify trace directory for trace output.\n"
+ " By default, trace output will created at\n"
+ " $HOME directory and parameter must be\n"
+ " specified once only.\n"
+ " --"OPT_TRACE_BUF_SIZE"=<int>\n"
+ " Specify maximum size of allocated memory\n"
+ " for trace output for each thread. Valid\n"
+ " unit can be either 'B|K|M' for 'Bytes',\n"
+ " 'KBytes' and 'MBytes' respectively.\n"
+ " Default is 1MB and parameter must be\n"
+ " specified once only.\n"
+ " --"OPT_TRACE_MODE"=<o[verwrite] | d[iscard]>\n"
+ " Specify the mode of update of trace\n"
+ " output file. Either update on a file can\n"
+ " be wrapped or discarded when file size\n"
+ " reaches its maximum limit.\n"
+ " Default mode is 'overwrite' and parameter\n"
+ " must be specified once only.\n"
+#endif /* !RTE_EXEC_ENV_WINDOWS */
+ " -v Display version information on startup\n"
+ " -h, --help This help\n"
+ " --"OPT_IN_MEMORY" Operate entirely in memory. This will\n"
+ " disable secondary process support\n"
+ " --"OPT_BASE_VIRTADDR" Base virtual address\n"
+ " --"OPT_TELEMETRY" Enable telemetry support (on by default)\n"
+ " --"OPT_NO_TELEMETRY" Disable telemetry support\n"
+ "\nEAL options for DEBUG use only:\n"
+ " --"OPT_HUGE_UNLINK" Unlink hugepage files after init\n"
+ " --"OPT_NO_HUGE" Use malloc instead of hugetlbfs\n"
+ " --"OPT_NO_PCI" Disable PCI\n"
+ " --"OPT_NO_HPET" Disable HPET\n"
+ " --"OPT_NO_SHCONF" No shared config (mmap'd files)\n"
+ "\n", RTE_MAX_LCORE);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_proc.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_proc.c
new file mode 100644
index 000000000..935e8fefe
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_proc.c
@@ -0,0 +1,1217 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016-2018 Intel Corporation
+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <inttypes.h>
+#include <libgen.h>
+#include <limits.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include <rte_alarm.h>
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_eal.h>
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_tailq.h>
+
+#include "eal_private.h"
+#include "eal_filesystem.h"
+#include "eal_internal_cfg.h"
+
+static int mp_fd = -1;
+static char mp_filter[PATH_MAX]; /* Filter for secondary process sockets */
+static char mp_dir_path[PATH_MAX]; /* The directory path for all mp sockets */
+static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
+static char peer_name[PATH_MAX];
+
+struct action_entry {
+ TAILQ_ENTRY(action_entry) next;
+ char action_name[RTE_MP_MAX_NAME_LEN];
+ rte_mp_t action;
+};
+
+/** Double linked list of actions. */
+TAILQ_HEAD(action_entry_list, action_entry);
+
+static struct action_entry_list action_entry_list =
+ TAILQ_HEAD_INITIALIZER(action_entry_list);
+
+enum mp_type {
+ MP_MSG, /* Share message with peers, will not block */
+ MP_REQ, /* Request for information, Will block for a reply */
+ MP_REP, /* Response to previously-received request */
+ MP_IGN, /* Response telling requester to ignore this response */
+};
+
+struct mp_msg_internal {
+ int type;
+ struct rte_mp_msg msg;
+};
+
+struct async_request_param {
+ rte_mp_async_reply_t clb;
+ struct rte_mp_reply user_reply;
+ struct timespec end;
+ int n_responses_processed;
+};
+
+struct pending_request {
+ TAILQ_ENTRY(pending_request) next;
+ enum {
+ REQUEST_TYPE_SYNC,
+ REQUEST_TYPE_ASYNC
+ } type;
+ char dst[PATH_MAX];
+ struct rte_mp_msg *request;
+ struct rte_mp_msg *reply;
+ int reply_received;
+ RTE_STD_C11
+ union {
+ struct {
+ struct async_request_param *param;
+ } async;
+ struct {
+ pthread_cond_t cond;
+ } sync;
+ };
+};
+
+TAILQ_HEAD(pending_request_list, pending_request);
+
+static struct {
+ struct pending_request_list requests;
+ pthread_mutex_t lock;
+} pending_requests = {
+ .requests = TAILQ_HEAD_INITIALIZER(pending_requests.requests),
+ .lock = PTHREAD_MUTEX_INITIALIZER,
+ /**< used in async requests only */
+};
+
+/* forward declarations */
+static int
+mp_send(struct rte_mp_msg *msg, const char *peer, int type);
+
+/* for use with alarm callback */
+static void
+async_reply_handle(void *arg);
+
+/* for use with process_msg */
+static struct pending_request *
+async_reply_handle_thread_unsafe(void *arg);
+
+static void
+trigger_async_action(struct pending_request *req);
+
+static struct pending_request *
+find_pending_request(const char *dst, const char *act_name)
+{
+ struct pending_request *r;
+
+ TAILQ_FOREACH(r, &pending_requests.requests, next) {
+ if (!strcmp(r->dst, dst) &&
+ !strcmp(r->request->name, act_name))
+ break;
+ }
+
+ return r;
+}
+
+static void
+create_socket_path(const char *name, char *buf, int len)
+{
+ const char *prefix = eal_mp_socket_path();
+
+ if (strlen(name) > 0)
+ snprintf(buf, len, "%s_%s", prefix, name);
+ else
+ strlcpy(buf, prefix, len);
+}
+
+int
+rte_eal_primary_proc_alive(const char *config_file_path)
+{
+ int config_fd;
+
+ if (config_file_path)
+ config_fd = open(config_file_path, O_RDONLY);
+ else {
+ const char *path;
+
+ path = eal_runtime_config_path();
+ config_fd = open(path, O_RDONLY);
+ }
+ if (config_fd < 0)
+ return 0;
+
+ int ret = lockf(config_fd, F_TEST, 0);
+ close(config_fd);
+
+ return !!ret;
+}
+
+static struct action_entry *
+find_action_entry_by_name(const char *name)
+{
+ struct action_entry *entry;
+
+ TAILQ_FOREACH(entry, &action_entry_list, next) {
+ if (strncmp(entry->action_name, name, RTE_MP_MAX_NAME_LEN) == 0)
+ break;
+ }
+
+ return entry;
+}
+
+static int
+validate_action_name(const char *name)
+{
+ if (name == NULL) {
+ RTE_LOG(ERR, EAL, "Action name cannot be NULL\n");
+ rte_errno = EINVAL;
+ return -1;
+ }
+ if (strnlen(name, RTE_MP_MAX_NAME_LEN) == 0) {
+ RTE_LOG(ERR, EAL, "Length of action name is zero\n");
+ rte_errno = EINVAL;
+ return -1;
+ }
+ if (strnlen(name, RTE_MP_MAX_NAME_LEN) == RTE_MP_MAX_NAME_LEN) {
+ rte_errno = E2BIG;
+ return -1;
+ }
+ return 0;
+}
+
+int
+rte_mp_action_register(const char *name, rte_mp_t action)
+{
+ struct action_entry *entry;
+
+ if (validate_action_name(name) != 0)
+ return -1;
+
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ entry = malloc(sizeof(struct action_entry));
+ if (entry == NULL) {
+ rte_errno = ENOMEM;
+ return -1;
+ }
+ strlcpy(entry->action_name, name, sizeof(entry->action_name));
+ entry->action = action;
+
+ pthread_mutex_lock(&mp_mutex_action);
+ if (find_action_entry_by_name(name) != NULL) {
+ pthread_mutex_unlock(&mp_mutex_action);
+ rte_errno = EEXIST;
+ free(entry);
+ return -1;
+ }
+ TAILQ_INSERT_TAIL(&action_entry_list, entry, next);
+ pthread_mutex_unlock(&mp_mutex_action);
+ return 0;
+}
+
+void
+rte_mp_action_unregister(const char *name)
+{
+ struct action_entry *entry;
+
+ if (validate_action_name(name) != 0)
+ return;
+
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ return;
+ }
+
+ pthread_mutex_lock(&mp_mutex_action);
+ entry = find_action_entry_by_name(name);
+ if (entry == NULL) {
+ pthread_mutex_unlock(&mp_mutex_action);
+ return;
+ }
+ TAILQ_REMOVE(&action_entry_list, entry, next);
+ pthread_mutex_unlock(&mp_mutex_action);
+ free(entry);
+}
+
+static int
+read_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
+{
+ int msglen;
+ struct iovec iov;
+ struct msghdr msgh;
+ char control[CMSG_SPACE(sizeof(m->msg.fds))];
+ struct cmsghdr *cmsg;
+ int buflen = sizeof(*m) - sizeof(m->msg.fds);
+
+ memset(&msgh, 0, sizeof(msgh));
+ iov.iov_base = m;
+ iov.iov_len = buflen;
+
+ msgh.msg_name = s;
+ msgh.msg_namelen = sizeof(*s);
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+ msgh.msg_control = control;
+ msgh.msg_controllen = sizeof(control);
+
+ msglen = recvmsg(mp_fd, &msgh, 0);
+ if (msglen < 0) {
+ RTE_LOG(ERR, EAL, "recvmsg failed, %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (msglen != buflen || (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
+ RTE_LOG(ERR, EAL, "truncated msg\n");
+ return -1;
+ }
+
+ /* read auxiliary FDs if any */
+ for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
+ if ((cmsg->cmsg_level == SOL_SOCKET) &&
+ (cmsg->cmsg_type == SCM_RIGHTS)) {
+ memcpy(m->msg.fds, CMSG_DATA(cmsg), sizeof(m->msg.fds));
+ break;
+ }
+ }
+ /* sanity-check the response */
+ if (m->msg.num_fds < 0 || m->msg.num_fds > RTE_MP_MAX_FD_NUM) {
+ RTE_LOG(ERR, EAL, "invalid number of fd's received\n");
+ return -1;
+ }
+ if (m->msg.len_param < 0 || m->msg.len_param > RTE_MP_MAX_PARAM_LEN) {
+ RTE_LOG(ERR, EAL, "invalid received data length\n");
+ return -1;
+ }
+ return 0;
+}
+
+static void
+process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
+{
+ struct pending_request *pending_req;
+ struct action_entry *entry;
+ struct rte_mp_msg *msg = &m->msg;
+ rte_mp_t action = NULL;
+
+ RTE_LOG(DEBUG, EAL, "msg: %s\n", msg->name);
+
+ if (m->type == MP_REP || m->type == MP_IGN) {
+ struct pending_request *req = NULL;
+
+ pthread_mutex_lock(&pending_requests.lock);
+ pending_req = find_pending_request(s->sun_path, msg->name);
+ if (pending_req) {
+ memcpy(pending_req->reply, msg, sizeof(*msg));
+ /* -1 indicates that we've been asked to ignore */
+ pending_req->reply_received =
+ m->type == MP_REP ? 1 : -1;
+
+ if (pending_req->type == REQUEST_TYPE_SYNC)
+ pthread_cond_signal(&pending_req->sync.cond);
+ else if (pending_req->type == REQUEST_TYPE_ASYNC)
+ req = async_reply_handle_thread_unsafe(
+ pending_req);
+ } else
+ RTE_LOG(ERR, EAL, "Drop mp reply: %s\n", msg->name);
+ pthread_mutex_unlock(&pending_requests.lock);
+
+ if (req != NULL)
+ trigger_async_action(req);
+ return;
+ }
+
+ pthread_mutex_lock(&mp_mutex_action);
+ entry = find_action_entry_by_name(msg->name);
+ if (entry != NULL)
+ action = entry->action;
+ pthread_mutex_unlock(&mp_mutex_action);
+
+ if (!action) {
+ if (m->type == MP_REQ && !internal_config.init_complete) {
+ /* if this is a request, and init is not yet complete,
+ * and callback wasn't registered, we should tell the
+ * requester to ignore our existence because we're not
+ * yet ready to process this request.
+ */
+ struct rte_mp_msg dummy;
+
+ memset(&dummy, 0, sizeof(dummy));
+ strlcpy(dummy.name, msg->name, sizeof(dummy.name));
+ mp_send(&dummy, s->sun_path, MP_IGN);
+ } else {
+ RTE_LOG(ERR, EAL, "Cannot find action: %s\n",
+ msg->name);
+ }
+ } else if (action(msg, s->sun_path) < 0) {
+ RTE_LOG(ERR, EAL, "Fail to handle message: %s\n", msg->name);
+ }
+}
+
+static void *
+mp_handle(void *arg __rte_unused)
+{
+ struct mp_msg_internal msg;
+ struct sockaddr_un sa;
+
+ while (1) {
+ if (read_msg(&msg, &sa) == 0)
+ process_msg(&msg, &sa);
+ }
+
+ return NULL;
+}
+
+static int
+timespec_cmp(const struct timespec *a, const struct timespec *b)
+{
+ if (a->tv_sec < b->tv_sec)
+ return -1;
+ if (a->tv_sec > b->tv_sec)
+ return 1;
+ if (a->tv_nsec < b->tv_nsec)
+ return -1;
+ if (a->tv_nsec > b->tv_nsec)
+ return 1;
+ return 0;
+}
+
+enum async_action {
+ ACTION_FREE, /**< free the action entry, but don't trigger callback */
+ ACTION_TRIGGER /**< trigger callback, then free action entry */
+};
+
+static enum async_action
+process_async_request(struct pending_request *sr, const struct timespec *now)
+{
+ struct async_request_param *param;
+ struct rte_mp_reply *reply;
+ bool timeout, last_msg;
+
+ param = sr->async.param;
+ reply = &param->user_reply;
+
+ /* did we timeout? */
+ timeout = timespec_cmp(&param->end, now) <= 0;
+
+ /* if we received a response, adjust relevant data and copy mesasge. */
+ if (sr->reply_received == 1 && sr->reply) {
+ struct rte_mp_msg *msg, *user_msgs, *tmp;
+
+ msg = sr->reply;
+ user_msgs = reply->msgs;
+
+ tmp = realloc(user_msgs, sizeof(*msg) *
+ (reply->nb_received + 1));
+ if (!tmp) {
+ RTE_LOG(ERR, EAL, "Fail to alloc reply for request %s:%s\n",
+ sr->dst, sr->request->name);
+ /* this entry is going to be removed and its message
+ * dropped, but we don't want to leak memory, so
+ * continue.
+ */
+ } else {
+ user_msgs = tmp;
+ reply->msgs = user_msgs;
+ memcpy(&user_msgs[reply->nb_received],
+ msg, sizeof(*msg));
+ reply->nb_received++;
+ }
+
+ /* mark this request as processed */
+ param->n_responses_processed++;
+ } else if (sr->reply_received == -1) {
+ /* we were asked to ignore this process */
+ reply->nb_sent--;
+ } else if (timeout) {
+ /* count it as processed response, but don't increment
+ * nb_received.
+ */
+ param->n_responses_processed++;
+ }
+
+ free(sr->reply);
+
+ last_msg = param->n_responses_processed == reply->nb_sent;
+
+ return last_msg ? ACTION_TRIGGER : ACTION_FREE;
+}
+
+static void
+trigger_async_action(struct pending_request *sr)
+{
+ struct async_request_param *param;
+ struct rte_mp_reply *reply;
+
+ param = sr->async.param;
+ reply = &param->user_reply;
+
+ param->clb(sr->request, reply);
+
+ /* clean up */
+ free(sr->async.param->user_reply.msgs);
+ free(sr->async.param);
+ free(sr->request);
+ free(sr);
+}
+
+static struct pending_request *
+async_reply_handle_thread_unsafe(void *arg)
+{
+ struct pending_request *req = (struct pending_request *)arg;
+ enum async_action action;
+ struct timespec ts_now;
+ struct timeval now;
+
+ if (gettimeofday(&now, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot get current time\n");
+ goto no_trigger;
+ }
+ ts_now.tv_nsec = now.tv_usec * 1000;
+ ts_now.tv_sec = now.tv_sec;
+
+ action = process_async_request(req, &ts_now);
+
+ TAILQ_REMOVE(&pending_requests.requests, req, next);
+
+ if (rte_eal_alarm_cancel(async_reply_handle, req) < 0) {
+ /* if we failed to cancel the alarm because it's already in
+ * progress, don't proceed because otherwise we will end up
+ * handling the same message twice.
+ */
+ if (rte_errno == EINPROGRESS) {
+ RTE_LOG(DEBUG, EAL, "Request handling is already in progress\n");
+ goto no_trigger;
+ }
+ RTE_LOG(ERR, EAL, "Failed to cancel alarm\n");
+ }
+
+ if (action == ACTION_TRIGGER)
+ return req;
+no_trigger:
+ free(req);
+ return NULL;
+}
+
+static void
+async_reply_handle(void *arg)
+{
+ struct pending_request *req;
+
+ pthread_mutex_lock(&pending_requests.lock);
+ req = async_reply_handle_thread_unsafe(arg);
+ pthread_mutex_unlock(&pending_requests.lock);
+
+ if (req != NULL)
+ trigger_async_action(req);
+}
+
+static int
+open_socket_fd(void)
+{
+ struct sockaddr_un un;
+
+ peer_name[0] = '\0';
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+ snprintf(peer_name, sizeof(peer_name),
+ "%d_%"PRIx64, getpid(), rte_rdtsc());
+
+ mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+ if (mp_fd < 0) {
+ RTE_LOG(ERR, EAL, "failed to create unix socket\n");
+ return -1;
+ }
+
+ memset(&un, 0, sizeof(un));
+ un.sun_family = AF_UNIX;
+
+ create_socket_path(peer_name, un.sun_path, sizeof(un.sun_path));
+
+ unlink(un.sun_path); /* May still exist since last run */
+
+ if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
+ RTE_LOG(ERR, EAL, "failed to bind %s: %s\n",
+ un.sun_path, strerror(errno));
+ close(mp_fd);
+ return -1;
+ }
+
+ RTE_LOG(INFO, EAL, "Multi-process socket %s\n", un.sun_path);
+ return mp_fd;
+}
+
+static void
+close_socket_fd(void)
+{
+ char path[PATH_MAX];
+
+ if (mp_fd < 0)
+ return;
+
+ close(mp_fd);
+ create_socket_path(peer_name, path, sizeof(path));
+ unlink(path);
+}
+
+int
+rte_mp_channel_init(void)
+{
+ char path[PATH_MAX];
+ int dir_fd;
+ pthread_t mp_handle_tid;
+
+ /* in no shared files mode, we do not have secondary processes support,
+ * so no need to initialize IPC.
+ */
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC will be disabled\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ /* create filter path */
+ create_socket_path("*", path, sizeof(path));
+ strlcpy(mp_filter, basename(path), sizeof(mp_filter));
+
+ /* path may have been modified, so recreate it */
+ create_socket_path("*", path, sizeof(path));
+ strlcpy(mp_dir_path, dirname(path), sizeof(mp_dir_path));
+
+ /* lock the directory */
+ dir_fd = open(mp_dir_path, O_RDONLY);
+ if (dir_fd < 0) {
+ RTE_LOG(ERR, EAL, "failed to open %s: %s\n",
+ mp_dir_path, strerror(errno));
+ return -1;
+ }
+
+ if (flock(dir_fd, LOCK_EX)) {
+ RTE_LOG(ERR, EAL, "failed to lock %s: %s\n",
+ mp_dir_path, strerror(errno));
+ close(dir_fd);
+ return -1;
+ }
+
+ if (open_socket_fd() < 0) {
+ close(dir_fd);
+ return -1;
+ }
+
+ if (rte_ctrl_thread_create(&mp_handle_tid, "rte_mp_handle",
+ NULL, mp_handle, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
+ strerror(errno));
+ close(mp_fd);
+ close(dir_fd);
+ mp_fd = -1;
+ return -1;
+ }
+
+ /* unlock the directory */
+ flock(dir_fd, LOCK_UN);
+ close(dir_fd);
+
+ return 0;
+}
+
+void
+rte_mp_channel_cleanup(void)
+{
+ close_socket_fd();
+}
+
+/**
+ * Return -1, as fail to send message and it's caused by the local side.
+ * Return 0, as fail to send message and it's caused by the remote side.
+ * Return 1, as succeed to send message.
+ *
+ */
+static int
+send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
+{
+ int snd;
+ struct iovec iov;
+ struct msghdr msgh;
+ struct cmsghdr *cmsg;
+ struct sockaddr_un dst;
+ struct mp_msg_internal m;
+ int fd_size = msg->num_fds * sizeof(int);
+ char control[CMSG_SPACE(fd_size)];
+
+ m.type = type;
+ memcpy(&m.msg, msg, sizeof(*msg));
+
+ memset(&dst, 0, sizeof(dst));
+ dst.sun_family = AF_UNIX;
+ strlcpy(dst.sun_path, dst_path, sizeof(dst.sun_path));
+
+ memset(&msgh, 0, sizeof(msgh));
+ memset(control, 0, sizeof(control));
+
+ iov.iov_base = &m;
+ iov.iov_len = sizeof(m) - sizeof(msg->fds);
+
+ msgh.msg_name = &dst;
+ msgh.msg_namelen = sizeof(dst);
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+ msgh.msg_control = control;
+ msgh.msg_controllen = sizeof(control);
+
+ cmsg = CMSG_FIRSTHDR(&msgh);
+ cmsg->cmsg_len = CMSG_LEN(fd_size);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy(CMSG_DATA(cmsg), msg->fds, fd_size);
+
+ do {
+ snd = sendmsg(mp_fd, &msgh, 0);
+ } while (snd < 0 && errno == EINTR);
+
+ if (snd < 0) {
+ rte_errno = errno;
+ /* Check if it caused by peer process exits */
+ if (errno == ECONNREFUSED &&
+ rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ unlink(dst_path);
+ return 0;
+ }
+ RTE_LOG(ERR, EAL, "failed to send to (%s) due to %s\n",
+ dst_path, strerror(errno));
+ return -1;
+ }
+
+ return 1;
+}
+
+static int
+mp_send(struct rte_mp_msg *msg, const char *peer, int type)
+{
+ int dir_fd, ret = 0;
+ DIR *mp_dir;
+ struct dirent *ent;
+
+ if (!peer && (rte_eal_process_type() == RTE_PROC_SECONDARY))
+ peer = eal_mp_socket_path();
+
+ if (peer) {
+ if (send_msg(peer, msg, type) < 0)
+ return -1;
+ else
+ return 0;
+ }
+
+ /* broadcast to all secondary processes */
+ mp_dir = opendir(mp_dir_path);
+ if (!mp_dir) {
+ RTE_LOG(ERR, EAL, "Unable to open directory %s\n",
+ mp_dir_path);
+ rte_errno = errno;
+ return -1;
+ }
+
+ dir_fd = dirfd(mp_dir);
+ /* lock the directory to prevent processes spinning up while we send */
+ if (flock(dir_fd, LOCK_SH)) {
+ RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+ mp_dir_path);
+ rte_errno = errno;
+ closedir(mp_dir);
+ return -1;
+ }
+
+ while ((ent = readdir(mp_dir))) {
+ char path[PATH_MAX];
+
+ if (fnmatch(mp_filter, ent->d_name, 0) != 0)
+ continue;
+
+ snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+ ent->d_name);
+ if (send_msg(path, msg, type) < 0)
+ ret = -1;
+ }
+ /* unlock the dir */
+ flock(dir_fd, LOCK_UN);
+
+ /* dir_fd automatically closed on closedir */
+ closedir(mp_dir);
+ return ret;
+}
+
+static int
+check_input(const struct rte_mp_msg *msg)
+{
+ if (msg == NULL) {
+ RTE_LOG(ERR, EAL, "Msg cannot be NULL\n");
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ if (validate_action_name(msg->name) != 0)
+ return -1;
+
+ if (msg->len_param < 0) {
+ RTE_LOG(ERR, EAL, "Message data length is negative\n");
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ if (msg->num_fds < 0) {
+ RTE_LOG(ERR, EAL, "Number of fd's is negative\n");
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ if (msg->len_param > RTE_MP_MAX_PARAM_LEN) {
+ RTE_LOG(ERR, EAL, "Message data is too long\n");
+ rte_errno = E2BIG;
+ return -1;
+ }
+
+ if (msg->num_fds > RTE_MP_MAX_FD_NUM) {
+ RTE_LOG(ERR, EAL, "Cannot send more than %d FDs\n",
+ RTE_MP_MAX_FD_NUM);
+ rte_errno = E2BIG;
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+rte_mp_sendmsg(struct rte_mp_msg *msg)
+{
+ if (check_input(msg) != 0)
+ return -1;
+
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ RTE_LOG(DEBUG, EAL, "sendmsg: %s\n", msg->name);
+ return mp_send(msg, NULL, MP_MSG);
+}
+
+static int
+mp_request_async(const char *dst, struct rte_mp_msg *req,
+ struct async_request_param *param, const struct timespec *ts)
+{
+ struct rte_mp_msg *reply_msg;
+ struct pending_request *pending_req, *exist;
+ int ret = -1;
+
+ pending_req = calloc(1, sizeof(*pending_req));
+ reply_msg = calloc(1, sizeof(*reply_msg));
+ if (pending_req == NULL || reply_msg == NULL) {
+ RTE_LOG(ERR, EAL, "Could not allocate space for sync request\n");
+ rte_errno = ENOMEM;
+ ret = -1;
+ goto fail;
+ }
+
+ pending_req->type = REQUEST_TYPE_ASYNC;
+ strlcpy(pending_req->dst, dst, sizeof(pending_req->dst));
+ pending_req->request = req;
+ pending_req->reply = reply_msg;
+ pending_req->async.param = param;
+
+ /* queue already locked by caller */
+
+ exist = find_pending_request(dst, req->name);
+ if (exist) {
+ RTE_LOG(ERR, EAL, "A pending request %s:%s\n", dst, req->name);
+ rte_errno = EEXIST;
+ ret = -1;
+ goto fail;
+ }
+
+ ret = send_msg(dst, req, MP_REQ);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Fail to send request %s:%s\n",
+ dst, req->name);
+ ret = -1;
+ goto fail;
+ } else if (ret == 0) {
+ ret = 0;
+ goto fail;
+ }
+ param->user_reply.nb_sent++;
+
+ /* if alarm set fails, we simply ignore the reply */
+ if (rte_eal_alarm_set(ts->tv_sec * 1000000 + ts->tv_nsec / 1000,
+ async_reply_handle, pending_req) < 0) {
+ RTE_LOG(ERR, EAL, "Fail to set alarm for request %s:%s\n",
+ dst, req->name);
+ ret = -1;
+ goto fail;
+ }
+ TAILQ_INSERT_TAIL(&pending_requests.requests, pending_req, next);
+
+ return 0;
+fail:
+ free(pending_req);
+ free(reply_msg);
+ return ret;
+}
+
+static int
+mp_request_sync(const char *dst, struct rte_mp_msg *req,
+ struct rte_mp_reply *reply, const struct timespec *ts)
+{
+ int ret;
+ struct rte_mp_msg msg, *tmp;
+ struct pending_request pending_req, *exist;
+
+ pending_req.type = REQUEST_TYPE_SYNC;
+ pending_req.reply_received = 0;
+ strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
+ pending_req.request = req;
+ pending_req.reply = &msg;
+ pthread_cond_init(&pending_req.sync.cond, NULL);
+
+ exist = find_pending_request(dst, req->name);
+ if (exist) {
+ RTE_LOG(ERR, EAL, "A pending request %s:%s\n", dst, req->name);
+ rte_errno = EEXIST;
+ return -1;
+ }
+
+ ret = send_msg(dst, req, MP_REQ);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Fail to send request %s:%s\n",
+ dst, req->name);
+ return -1;
+ } else if (ret == 0)
+ return 0;
+
+ TAILQ_INSERT_TAIL(&pending_requests.requests, &pending_req, next);
+
+ reply->nb_sent++;
+
+ do {
+ ret = pthread_cond_timedwait(&pending_req.sync.cond,
+ &pending_requests.lock, ts);
+ } while (ret != 0 && ret != ETIMEDOUT);
+
+ TAILQ_REMOVE(&pending_requests.requests, &pending_req, next);
+
+ if (pending_req.reply_received == 0) {
+ RTE_LOG(ERR, EAL, "Fail to recv reply for request %s:%s\n",
+ dst, req->name);
+ rte_errno = ETIMEDOUT;
+ return -1;
+ }
+ if (pending_req.reply_received == -1) {
+ RTE_LOG(DEBUG, EAL, "Asked to ignore response\n");
+ /* not receiving this message is not an error, so decrement
+ * number of sent messages
+ */
+ reply->nb_sent--;
+ return 0;
+ }
+
+ tmp = realloc(reply->msgs, sizeof(msg) * (reply->nb_received + 1));
+ if (!tmp) {
+ RTE_LOG(ERR, EAL, "Fail to alloc reply for request %s:%s\n",
+ dst, req->name);
+ rte_errno = ENOMEM;
+ return -1;
+ }
+ memcpy(&tmp[reply->nb_received], &msg, sizeof(msg));
+ reply->msgs = tmp;
+ reply->nb_received++;
+ return 0;
+}
+
+int
+rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
+ const struct timespec *ts)
+{
+ int dir_fd, ret = -1;
+ DIR *mp_dir;
+ struct dirent *ent;
+ struct timeval now;
+ struct timespec end;
+
+ RTE_LOG(DEBUG, EAL, "request: %s\n", req->name);
+
+ reply->nb_sent = 0;
+ reply->nb_received = 0;
+ reply->msgs = NULL;
+
+ if (check_input(req) != 0)
+ goto end;
+
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ if (gettimeofday(&now, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "Failed to get current time\n");
+ rte_errno = errno;
+ goto end;
+ }
+
+ end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
+ end.tv_sec = now.tv_sec + ts->tv_sec +
+ (now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
+
+ /* for secondary process, send request to the primary process only */
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ pthread_mutex_lock(&pending_requests.lock);
+ ret = mp_request_sync(eal_mp_socket_path(), req, reply, &end);
+ pthread_mutex_unlock(&pending_requests.lock);
+ goto end;
+ }
+
+ /* for primary process, broadcast request, and collect reply 1 by 1 */
+ mp_dir = opendir(mp_dir_path);
+ if (!mp_dir) {
+ RTE_LOG(ERR, EAL, "Unable to open directory %s\n", mp_dir_path);
+ rte_errno = errno;
+ goto end;
+ }
+
+ dir_fd = dirfd(mp_dir);
+ /* lock the directory to prevent processes spinning up while we send */
+ if (flock(dir_fd, LOCK_SH)) {
+ RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+ mp_dir_path);
+ rte_errno = errno;
+ goto close_end;
+ }
+
+ pthread_mutex_lock(&pending_requests.lock);
+ while ((ent = readdir(mp_dir))) {
+ char path[PATH_MAX];
+
+ if (fnmatch(mp_filter, ent->d_name, 0) != 0)
+ continue;
+
+ snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+ ent->d_name);
+
+ /* unlocks the mutex while waiting for response,
+ * locks on receive
+ */
+ if (mp_request_sync(path, req, reply, &end))
+ goto unlock_end;
+ }
+ ret = 0;
+
+unlock_end:
+ pthread_mutex_unlock(&pending_requests.lock);
+ /* unlock the directory */
+ flock(dir_fd, LOCK_UN);
+
+close_end:
+ /* dir_fd automatically closed on closedir */
+ closedir(mp_dir);
+
+end:
+ if (ret) {
+ free(reply->msgs);
+ reply->nb_received = 0;
+ reply->msgs = NULL;
+ }
+ return ret;
+}
+
+int
+rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
+ rte_mp_async_reply_t clb)
+{
+ struct rte_mp_msg *copy;
+ struct pending_request *dummy;
+ struct async_request_param *param;
+ struct rte_mp_reply *reply;
+ int dir_fd, ret = 0;
+ DIR *mp_dir;
+ struct dirent *ent;
+ struct timeval now;
+ struct timespec *end;
+ bool dummy_used = false;
+
+ RTE_LOG(DEBUG, EAL, "request: %s\n", req->name);
+
+ if (check_input(req) != 0)
+ return -1;
+
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ if (gettimeofday(&now, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "Failed to get current time\n");
+ rte_errno = errno;
+ return -1;
+ }
+ copy = calloc(1, sizeof(*copy));
+ dummy = calloc(1, sizeof(*dummy));
+ param = calloc(1, sizeof(*param));
+ if (copy == NULL || dummy == NULL || param == NULL) {
+ RTE_LOG(ERR, EAL, "Failed to allocate memory for async reply\n");
+ rte_errno = ENOMEM;
+ goto fail;
+ }
+
+ /* copy message */
+ memcpy(copy, req, sizeof(*copy));
+
+ param->n_responses_processed = 0;
+ param->clb = clb;
+ end = &param->end;
+ reply = &param->user_reply;
+
+ end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
+ end->tv_sec = now.tv_sec + ts->tv_sec +
+ (now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
+ reply->nb_sent = 0;
+ reply->nb_received = 0;
+ reply->msgs = NULL;
+
+ /* we have to lock the request queue here, as we will be adding a bunch
+ * of requests to the queue at once, and some of the replies may arrive
+ * before we add all of the requests to the queue.
+ */
+ pthread_mutex_lock(&pending_requests.lock);
+
+ /* we have to ensure that callback gets triggered even if we don't send
+ * anything, therefore earlier we have allocated a dummy request. fill
+ * it, and put it on the queue if we don't send any requests.
+ */
+ dummy->type = REQUEST_TYPE_ASYNC;
+ dummy->request = copy;
+ dummy->reply = NULL;
+ dummy->async.param = param;
+ dummy->reply_received = 1; /* short-circuit the timeout */
+
+ /* for secondary process, send request to the primary process only */
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ ret = mp_request_async(eal_mp_socket_path(), copy, param, ts);
+
+ /* if we didn't send anything, put dummy request on the queue */
+ if (ret == 0 && reply->nb_sent == 0) {
+ TAILQ_INSERT_TAIL(&pending_requests.requests, dummy,
+ next);
+ dummy_used = true;
+ }
+
+ pthread_mutex_unlock(&pending_requests.lock);
+
+ /* if we couldn't send anything, clean up */
+ if (ret != 0)
+ goto fail;
+ return 0;
+ }
+
+ /* for primary process, broadcast request */
+ mp_dir = opendir(mp_dir_path);
+ if (!mp_dir) {
+ RTE_LOG(ERR, EAL, "Unable to open directory %s\n", mp_dir_path);
+ rte_errno = errno;
+ goto unlock_fail;
+ }
+ dir_fd = dirfd(mp_dir);
+
+ /* lock the directory to prevent processes spinning up while we send */
+ if (flock(dir_fd, LOCK_SH)) {
+ RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+ mp_dir_path);
+ rte_errno = errno;
+ goto closedir_fail;
+ }
+
+ while ((ent = readdir(mp_dir))) {
+ char path[PATH_MAX];
+
+ if (fnmatch(mp_filter, ent->d_name, 0) != 0)
+ continue;
+
+ snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+ ent->d_name);
+
+ if (mp_request_async(path, copy, param, ts))
+ ret = -1;
+ }
+ /* if we didn't send anything, put dummy request on the queue */
+ if (ret == 0 && reply->nb_sent == 0) {
+ TAILQ_INSERT_HEAD(&pending_requests.requests, dummy, next);
+ dummy_used = true;
+ }
+
+ /* finally, unlock the queue */
+ pthread_mutex_unlock(&pending_requests.lock);
+
+ /* unlock the directory */
+ flock(dir_fd, LOCK_UN);
+
+ /* dir_fd automatically closed on closedir */
+ closedir(mp_dir);
+
+ /* if dummy was unused, free it */
+ if (!dummy_used)
+ free(dummy);
+
+ return ret;
+closedir_fail:
+ closedir(mp_dir);
+unlock_fail:
+ pthread_mutex_unlock(&pending_requests.lock);
+fail:
+ free(dummy);
+ free(param);
+ free(copy);
+ return -1;
+}
+
+int
+rte_mp_reply(struct rte_mp_msg *msg, const char *peer)
+{
+ RTE_LOG(DEBUG, EAL, "reply: %s\n", msg->name);
+
+ if (check_input(msg) != 0)
+ return -1;
+
+ if (peer == NULL) {
+ RTE_LOG(ERR, EAL, "peer is not specified\n");
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ return 0;
+ }
+
+ return mp_send(msg, peer, MP_REP);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_string_fns.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_string_fns.c
new file mode 100644
index 000000000..60c5dd66f
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_string_fns.c
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <errno.h>
+
+#include <rte_string_fns.h>
+
+/* split string into tokens */
+int
+rte_strsplit(char *string, int stringlen,
+ char **tokens, int maxtokens, char delim)
+{
+ int i, tok = 0;
+ int tokstart = 1; /* first token is right at start of string */
+
+ if (string == NULL || tokens == NULL)
+ goto einval_error;
+
+ for (i = 0; i < stringlen; i++) {
+ if (string[i] == '\0' || tok >= maxtokens)
+ break;
+ if (tokstart) {
+ tokstart = 0;
+ tokens[tok++] = &string[i];
+ }
+ if (string[i] == delim) {
+ string[i] = '\0';
+ tokstart = 1;
+ }
+ }
+ return tok;
+
+einval_error:
+ errno = EINVAL;
+ return -1;
+}
+
+/* Copy src string into dst.
+ *
+ * Return negative value and NUL-terminate if dst is too short,
+ * Otherwise return number of bytes copied.
+ */
+ssize_t
+rte_strscpy(char *dst, const char *src, size_t dsize)
+{
+ size_t nleft = dsize;
+ size_t res = 0;
+
+ /* Copy as many bytes as will fit. */
+ while (nleft != 0) {
+ dst[res] = src[res];
+ if (src[res] == '\0')
+ return res;
+ res++;
+ nleft--;
+ }
+
+ /* Not enough room in dst, set NUL and return error. */
+ if (res != 0)
+ dst[res - 1] = '\0';
+ return -E2BIG;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_tailqs.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_tailqs.c
new file mode 100644
index 000000000..ead06897b
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_tailqs.c
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <sys/queue.h>
+#include <stdint.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <rte_memory.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_memcfg.h"
+
+TAILQ_HEAD(rte_tailq_elem_head, rte_tailq_elem);
+/* local tailq list */
+static struct rte_tailq_elem_head rte_tailq_elem_head =
+ TAILQ_HEAD_INITIALIZER(rte_tailq_elem_head);
+
+/* number of tailqs registered, -1 before call to rte_eal_tailqs_init */
+static int rte_tailqs_count = -1;
+
+struct rte_tailq_head *
+rte_eal_tailq_lookup(const char *name)
+{
+ unsigned i;
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+ if (name == NULL)
+ return NULL;
+
+ for (i = 0; i < RTE_MAX_TAILQ; i++) {
+ if (!strncmp(name, mcfg->tailq_head[i].name,
+ RTE_TAILQ_NAMESIZE-1))
+ return &mcfg->tailq_head[i];
+ }
+
+ return NULL;
+}
+
+void
+rte_dump_tailq(FILE *f)
+{
+ struct rte_mem_config *mcfg;
+ unsigned i = 0;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_mcfg_tailq_read_lock();
+ for (i = 0; i < RTE_MAX_TAILQ; i++) {
+ const struct rte_tailq_head *tailq = &mcfg->tailq_head[i];
+ const struct rte_tailq_entry_head *head = &tailq->tailq_head;
+
+ fprintf(f, "Tailq %u: qname:<%s>, tqh_first:%p, tqh_last:%p\n",
+ i, tailq->name, head->tqh_first, head->tqh_last);
+ }
+ rte_mcfg_tailq_read_unlock();
+}
+
+static struct rte_tailq_head *
+rte_eal_tailq_create(const char *name)
+{
+ struct rte_tailq_head *head = NULL;
+
+ if (!rte_eal_tailq_lookup(name) &&
+ (rte_tailqs_count + 1 < RTE_MAX_TAILQ)) {
+ struct rte_mem_config *mcfg;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+ head = &mcfg->tailq_head[rte_tailqs_count];
+ strlcpy(head->name, name, sizeof(head->name) - 1);
+ TAILQ_INIT(&head->tailq_head);
+ rte_tailqs_count++;
+ }
+
+ return head;
+}
+
+/* local register, used to store "early" tailqs before rte_eal_init() and to
+ * ensure secondary process only registers tailqs once. */
+static int
+rte_eal_tailq_local_register(struct rte_tailq_elem *t)
+{
+ struct rte_tailq_elem *temp;
+
+ TAILQ_FOREACH(temp, &rte_tailq_elem_head, next) {
+ if (!strncmp(t->name, temp->name, sizeof(temp->name)))
+ return -1;
+ }
+
+ TAILQ_INSERT_TAIL(&rte_tailq_elem_head, t, next);
+ return 0;
+}
+
+static void
+rte_eal_tailq_update(struct rte_tailq_elem *t)
+{
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* primary process is the only one that creates */
+ t->head = rte_eal_tailq_create(t->name);
+ } else {
+ t->head = rte_eal_tailq_lookup(t->name);
+ }
+}
+
+int
+rte_eal_tailq_register(struct rte_tailq_elem *t)
+{
+ if (rte_eal_tailq_local_register(t) < 0) {
+ RTE_LOG(ERR, EAL,
+ "%s tailq is already registered\n", t->name);
+ goto error;
+ }
+
+ /* if a register happens after rte_eal_tailqs_init(), then we can update
+ * tailq head */
+ if (rte_tailqs_count >= 0) {
+ rte_eal_tailq_update(t);
+ if (t->head == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Cannot initialize tailq: %s\n", t->name);
+ TAILQ_REMOVE(&rte_tailq_elem_head, t, next);
+ goto error;
+ }
+ }
+
+ return 0;
+
+error:
+ t->head = NULL;
+ return -1;
+}
+
+int
+rte_eal_tailqs_init(void)
+{
+ struct rte_tailq_elem *t;
+
+ rte_tailqs_count = 0;
+
+ TAILQ_FOREACH(t, &rte_tailq_elem_head, next) {
+ /* second part of register job for "early" tailqs, see
+ * rte_eal_tailq_register and EAL_REGISTER_TAILQ */
+ rte_eal_tailq_update(t);
+ if (t->head == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Cannot initialize tailq: %s\n", t->name);
+ /* TAILQ_REMOVE not needed, error is already fatal */
+ goto fail;
+ }
+ }
+
+ return 0;
+
+fail:
+ rte_dump_tailq(stderr);
+ return -1;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_thread.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_thread.c
new file mode 100644
index 000000000..f9f588c17
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_thread.c
@@ -0,0 +1,230 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sched.h>
+#include <assert.h>
+#include <string.h>
+
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_log.h>
+#ifndef RTE_EXEC_ENV_WINDOWS
+#include <rte_trace_point.h>
+#endif
+
+#include "eal_internal_cfg.h"
+#include "eal_private.h"
+#include "eal_thread.h"
+
+RTE_DECLARE_PER_LCORE(unsigned , _socket_id);
+
+unsigned rte_socket_id(void)
+{
+ return RTE_PER_LCORE(_socket_id);
+}
+
+int
+rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ if (lcore_id >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ return cfg->lcore_role[lcore_id] == role;
+}
+
+static int
+eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
+{
+ unsigned cpu = 0;
+ int socket_id = SOCKET_ID_ANY;
+ int sid;
+
+ if (cpusetp == NULL)
+ return SOCKET_ID_ANY;
+
+ do {
+ if (!CPU_ISSET(cpu, cpusetp))
+ continue;
+
+ if (socket_id == SOCKET_ID_ANY)
+ socket_id = eal_cpu_socket_id(cpu);
+
+ sid = eal_cpu_socket_id(cpu);
+ if (socket_id != sid) {
+ socket_id = SOCKET_ID_ANY;
+ break;
+ }
+
+ } while (++cpu < CPU_SETSIZE);
+
+ return socket_id;
+}
+
+int
+rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+{
+ int s;
+ unsigned lcore_id;
+ pthread_t tid;
+
+ tid = pthread_self();
+
+ s = pthread_setaffinity_np(tid, sizeof(rte_cpuset_t), cpusetp);
+ if (s != 0) {
+ RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
+ return -1;
+ }
+
+ /* store socket_id in TLS for quick access */
+ RTE_PER_LCORE(_socket_id) =
+ eal_cpuset_socket_id(cpusetp);
+
+ /* store cpuset in TLS for quick access */
+ memmove(&RTE_PER_LCORE(_cpuset), cpusetp,
+ sizeof(rte_cpuset_t));
+
+ lcore_id = rte_lcore_id();
+ if (lcore_id != (unsigned)LCORE_ID_ANY) {
+ /* EAL thread will update lcore_config */
+ lcore_config[lcore_id].socket_id = RTE_PER_LCORE(_socket_id);
+ memmove(&lcore_config[lcore_id].cpuset, cpusetp,
+ sizeof(rte_cpuset_t));
+ }
+
+ return 0;
+}
+
+void
+rte_thread_get_affinity(rte_cpuset_t *cpusetp)
+{
+ assert(cpusetp);
+ memmove(cpusetp, &RTE_PER_LCORE(_cpuset),
+ sizeof(rte_cpuset_t));
+}
+
+int
+eal_thread_dump_affinity(char *str, unsigned size)
+{
+ rte_cpuset_t cpuset;
+ unsigned cpu;
+ int ret;
+ unsigned int out = 0;
+
+ rte_thread_get_affinity(&cpuset);
+
+ for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+ if (!CPU_ISSET(cpu, &cpuset))
+ continue;
+
+ ret = snprintf(str + out,
+ size - out, "%u,", cpu);
+ if (ret < 0 || (unsigned)ret >= size - out) {
+ /* string will be truncated */
+ ret = -1;
+ goto exit;
+ }
+
+ out += ret;
+ }
+
+ ret = 0;
+exit:
+ /* remove the last separator */
+ if (out > 0)
+ str[out - 1] = '\0';
+
+ return ret;
+}
+
+
+struct rte_thread_ctrl_params {
+ void *(*start_routine)(void *);
+ void *arg;
+ pthread_barrier_t configured;
+};
+
+static void *rte_thread_init(void *arg)
+{
+ int ret;
+ rte_cpuset_t *cpuset = &internal_config.ctrl_cpuset;
+ struct rte_thread_ctrl_params *params = arg;
+ void *(*start_routine)(void *) = params->start_routine;
+ void *routine_arg = params->arg;
+
+ /* Store cpuset in TLS for quick access */
+ memmove(&RTE_PER_LCORE(_cpuset), cpuset, sizeof(rte_cpuset_t));
+
+ ret = pthread_barrier_wait(&params->configured);
+ if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
+ pthread_barrier_destroy(&params->configured);
+ free(params);
+ }
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+ __rte_trace_mem_per_thread_alloc();
+#endif
+ return start_routine(routine_arg);
+}
+
+int
+rte_ctrl_thread_create(pthread_t *thread, const char *name,
+ const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg)
+{
+ rte_cpuset_t *cpuset = &internal_config.ctrl_cpuset;
+ struct rte_thread_ctrl_params *params;
+ int ret;
+
+ params = malloc(sizeof(*params));
+ if (!params)
+ return -ENOMEM;
+
+ params->start_routine = start_routine;
+ params->arg = arg;
+
+ pthread_barrier_init(&params->configured, NULL, 2);
+
+ ret = pthread_create(thread, attr, rte_thread_init, (void *)params);
+ if (ret != 0) {
+ free(params);
+ return -ret;
+ }
+
+ if (name != NULL) {
+ ret = rte_thread_setname(*thread, name);
+ if (ret < 0)
+ RTE_LOG(DEBUG, EAL,
+ "Cannot set name for ctrl thread\n");
+ }
+
+ ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
+ if (ret)
+ goto fail;
+
+ ret = pthread_barrier_wait(&params->configured);
+ if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
+ pthread_barrier_destroy(&params->configured);
+ free(params);
+ }
+
+ return 0;
+
+fail:
+ if (PTHREAD_BARRIER_SERIAL_THREAD ==
+ pthread_barrier_wait(&params->configured)) {
+ pthread_barrier_destroy(&params->configured);
+ free(params);
+ }
+ pthread_cancel(*thread);
+ pthread_join(*thread, NULL);
+ return -ret;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_timer.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_timer.c
new file mode 100644
index 000000000..fa9ee1b22
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_timer.c
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <time.h>
+#include <errno.h>
+
+#include <rte_common.h>
+#include <rte_compat.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include <rte_pause.h>
+#include <rte_eal.h>
+
+#include "eal_private.h"
+#include "eal_memcfg.h"
+
+/* The frequency of the RDTSC timer resolution */
+static uint64_t eal_tsc_resolution_hz;
+
+/* Pointer to user delay function */
+void (*rte_delay_us)(unsigned int) = NULL;
+
+void
+rte_delay_us_block(unsigned int us)
+{
+ const uint64_t start = rte_get_timer_cycles();
+ const uint64_t ticks = (uint64_t)us * rte_get_timer_hz() / 1E6;
+ while ((rte_get_timer_cycles() - start) < ticks)
+ rte_pause();
+}
+
+void
+rte_delay_us_sleep(unsigned int us)
+{
+ struct timespec wait[2];
+ int ind = 0;
+
+ wait[0].tv_sec = 0;
+ if (us >= US_PER_S) {
+ wait[0].tv_sec = us / US_PER_S;
+ us -= wait[0].tv_sec * US_PER_S;
+ }
+ wait[0].tv_nsec = 1000 * us;
+
+ while (nanosleep(&wait[ind], &wait[1 - ind]) && errno == EINTR) {
+ /*
+ * Sleep was interrupted. Flip the index, so the 'remainder'
+ * will become the 'request' for a next call.
+ */
+ ind = 1 - ind;
+ }
+}
+
+uint64_t
+rte_get_tsc_hz(void)
+{
+ return eal_tsc_resolution_hz;
+}
+
+static uint64_t
+estimate_tsc_freq(void)
+{
+#define CYC_PER_10MHZ 1E7
+ RTE_LOG(WARNING, EAL, "WARNING: TSC frequency estimated roughly"
+ " - clock timings may be less accurate.\n");
+ /* assume that the sleep(1) will sleep for 1 second */
+ uint64_t start = rte_rdtsc();
+ sleep(1);
+ /* Round up to 10Mhz. 1E7 ~ 10Mhz */
+ return RTE_ALIGN_MUL_NEAR(rte_rdtsc() - start, CYC_PER_10MHZ);
+}
+
+void
+set_tsc_freq(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ uint64_t freq;
+
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ /*
+ * Just use the primary process calculated TSC rate in any
+ * secondary process. It avoids any unnecessary overhead on
+ * systems where arch-specific frequency detection is not
+ * available.
+ */
+ eal_tsc_resolution_hz = mcfg->tsc_hz;
+ return;
+ }
+
+ freq = get_tsc_freq_arch();
+ if (!freq)
+ freq = get_tsc_freq();
+ if (!freq)
+ freq = estimate_tsc_freq();
+
+ RTE_LOG(DEBUG, EAL, "TSC frequency is ~%" PRIu64 " KHz\n", freq / 1000);
+ eal_tsc_resolution_hz = freq;
+ mcfg->tsc_hz = freq;
+}
+
+void rte_delay_us_callback_register(void (*userfunc)(unsigned int))
+{
+ rte_delay_us = userfunc;
+}
+
+RTE_INIT(rte_timer_init)
+{
+ /* set rte_delay_us_block as a delay function */
+ rte_delay_us_callback_register(rte_delay_us_block);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace.c
new file mode 100644
index 000000000..875553d7e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace.c
@@ -0,0 +1,498 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#include <fnmatch.h>
+#include <inttypes.h>
+#include <sys/queue.h>
+#include <regex.h>
+
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_string_fns.h>
+
+#include "eal_trace.h"
+
+RTE_DEFINE_PER_LCORE(volatile int, trace_point_sz);
+RTE_DEFINE_PER_LCORE(void *, trace_mem);
+static RTE_DEFINE_PER_LCORE(char, ctf_field[TRACE_CTF_FIELD_SIZE]);
+static RTE_DEFINE_PER_LCORE(int, ctf_count);
+
+static struct trace_point_head tp_list = STAILQ_HEAD_INITIALIZER(tp_list);
+static struct trace trace = { .args = STAILQ_HEAD_INITIALIZER(trace.args), };
+
+struct trace *
+trace_obj_get(void)
+{
+ return &trace;
+}
+
+struct trace_point_head *
+trace_list_head_get(void)
+{
+ return &tp_list;
+}
+
+int
+eal_trace_init(void)
+{
+ struct trace_arg *arg;
+
+ /* Trace memory should start with 8B aligned for natural alignment */
+ RTE_BUILD_BUG_ON((offsetof(struct __rte_trace_header, mem) % 8) != 0);
+
+ /* One of the trace point registration failed */
+ if (trace.register_errno) {
+ rte_errno = trace.register_errno;
+ goto fail;
+ }
+
+ if (!STAILQ_EMPTY(&trace.args))
+ trace.status = true;
+
+ if (!rte_trace_is_enabled())
+ return 0;
+
+ rte_spinlock_init(&trace.lock);
+
+ /* Is duplicate trace name registered */
+ if (trace_has_duplicate_entry())
+ goto fail;
+
+ /* Generate UUID ver 4 with total size of events and number of
+ * events
+ */
+ trace_uuid_generate();
+
+ /* Apply buffer size configuration for trace output */
+ trace_bufsz_args_apply();
+
+ /* Generate CTF TDSL metadata */
+ if (trace_metadata_create() < 0)
+ goto fail;
+
+ /* Create trace directory */
+ if (trace_mkdir())
+ goto free_meta;
+
+ /* Save current epoch timestamp for future use */
+ if (trace_epoch_time_save() < 0)
+ goto fail;
+
+ /* Apply global configurations */
+ STAILQ_FOREACH(arg, &trace.args, next)
+ trace_args_apply(arg->val);
+
+ rte_trace_mode_set(trace.mode);
+
+ return 0;
+
+free_meta:
+ trace_metadata_destroy();
+fail:
+ trace_err("failed to initialize trace [%s]", rte_strerror(rte_errno));
+ return -rte_errno;
+}
+
+void
+eal_trace_fini(void)
+{
+ if (!rte_trace_is_enabled())
+ return;
+ trace_mem_per_thread_free();
+ trace_metadata_destroy();
+ eal_trace_args_free();
+}
+
+bool
+rte_trace_is_enabled(void)
+{
+ return trace.status;
+}
+
+static void
+trace_mode_set(rte_trace_point_t *trace, enum rte_trace_mode mode)
+{
+ if (mode == RTE_TRACE_MODE_OVERWRITE)
+ __atomic_and_fetch(trace, ~__RTE_TRACE_FIELD_ENABLE_DISCARD,
+ __ATOMIC_RELEASE);
+ else
+ __atomic_or_fetch(trace, __RTE_TRACE_FIELD_ENABLE_DISCARD,
+ __ATOMIC_RELEASE);
+}
+
+void
+rte_trace_mode_set(enum rte_trace_mode mode)
+{
+ struct trace_point *tp;
+
+ if (!rte_trace_is_enabled())
+ return;
+
+ STAILQ_FOREACH(tp, &tp_list, next)
+ trace_mode_set(tp->handle, mode);
+
+ trace.mode = mode;
+}
+
+enum
+rte_trace_mode rte_trace_mode_get(void)
+{
+ return trace.mode;
+}
+
+static bool
+trace_point_is_invalid(rte_trace_point_t *t)
+{
+ return (t == NULL) || (trace_id_get(t) >= trace.nb_trace_points);
+}
+
+bool
+rte_trace_point_is_enabled(rte_trace_point_t *trace)
+{
+ uint64_t val;
+
+ if (trace_point_is_invalid(trace))
+ return false;
+
+ val = __atomic_load_n(trace, __ATOMIC_ACQUIRE);
+ return (val & __RTE_TRACE_FIELD_ENABLE_MASK) != 0;
+}
+
+int
+rte_trace_point_enable(rte_trace_point_t *trace)
+{
+ if (trace_point_is_invalid(trace))
+ return -ERANGE;
+
+ __atomic_or_fetch(trace, __RTE_TRACE_FIELD_ENABLE_MASK,
+ __ATOMIC_RELEASE);
+ return 0;
+}
+
+int
+rte_trace_point_disable(rte_trace_point_t *trace)
+{
+ if (trace_point_is_invalid(trace))
+ return -ERANGE;
+
+ __atomic_and_fetch(trace, ~__RTE_TRACE_FIELD_ENABLE_MASK,
+ __ATOMIC_RELEASE);
+ return 0;
+}
+
+int
+rte_trace_pattern(const char *pattern, bool enable)
+{
+ struct trace_point *tp;
+ int rc = 0, found = 0;
+
+ STAILQ_FOREACH(tp, &tp_list, next) {
+ if (fnmatch(pattern, tp->name, 0) == 0) {
+ if (enable)
+ rc = rte_trace_point_enable(tp->handle);
+ else
+ rc = rte_trace_point_disable(tp->handle);
+ found = 1;
+ }
+ if (rc < 0)
+ return rc;
+ }
+
+ return rc | found;
+}
+
+int
+rte_trace_regexp(const char *regex, bool enable)
+{
+ struct trace_point *tp;
+ int rc = 0, found = 0;
+ regex_t r;
+
+ if (regcomp(&r, regex, 0) != 0)
+ return -EINVAL;
+
+ STAILQ_FOREACH(tp, &tp_list, next) {
+ if (regexec(&r, tp->name, 0, NULL, 0) == 0) {
+ if (enable)
+ rc = rte_trace_point_enable(tp->handle);
+ else
+ rc = rte_trace_point_disable(tp->handle);
+ found = 1;
+ }
+ if (rc < 0)
+ return rc;
+ }
+ regfree(&r);
+
+ return rc | found;
+}
+
+rte_trace_point_t *
+rte_trace_point_lookup(const char *name)
+{
+ struct trace_point *tp;
+
+ if (name == NULL)
+ return NULL;
+
+ STAILQ_FOREACH(tp, &tp_list, next)
+ if (strncmp(tp->name, name, TRACE_POINT_NAME_SIZE) == 0)
+ return tp->handle;
+
+ return NULL;
+}
+
+static void
+trace_point_dump(FILE *f, struct trace_point *tp)
+{
+ rte_trace_point_t *handle = tp->handle;
+
+ fprintf(f, "\tid %d, %s, size is %d, %s\n",
+ trace_id_get(handle), tp->name,
+ (uint16_t)(*handle & __RTE_TRACE_FIELD_SIZE_MASK),
+ rte_trace_point_is_enabled(handle) ? "enabled" : "disabled");
+}
+
+static void
+trace_lcore_mem_dump(FILE *f)
+{
+ struct trace *trace = trace_obj_get();
+ struct __rte_trace_header *header;
+ uint32_t count;
+
+ if (trace->nb_trace_mem_list == 0)
+ return;
+
+ rte_spinlock_lock(&trace->lock);
+ fprintf(f, "nb_trace_mem_list = %d\n", trace->nb_trace_mem_list);
+ fprintf(f, "\nTrace mem info\n--------------\n");
+ for (count = 0; count < trace->nb_trace_mem_list; count++) {
+ header = trace->lcore_meta[count].mem;
+ fprintf(f, "\tid %d, mem=%p, area=%s, lcore_id=%d, name=%s\n",
+ count, header,
+ trace_area_to_string(trace->lcore_meta[count].area),
+ header->stream_header.lcore_id,
+ header->stream_header.thread_name);
+ }
+ rte_spinlock_unlock(&trace->lock);
+}
+
+void
+rte_trace_dump(FILE *f)
+{
+ struct trace_point_head *tp_list = trace_list_head_get();
+ struct trace *trace = trace_obj_get();
+ struct trace_point *tp;
+
+ fprintf(f, "\nGlobal info\n-----------\n");
+ fprintf(f, "status = %s\n",
+ rte_trace_is_enabled() ? "enabled" : "disabled");
+ fprintf(f, "mode = %s\n",
+ trace_mode_to_string(rte_trace_mode_get()));
+ fprintf(f, "dir = %s\n", trace->dir);
+ fprintf(f, "buffer len = %d\n", trace->buff_len);
+ fprintf(f, "number of trace points = %d\n", trace->nb_trace_points);
+
+ trace_lcore_mem_dump(f);
+ fprintf(f, "\nTrace point info\n----------------\n");
+ STAILQ_FOREACH(tp, tp_list, next)
+ trace_point_dump(f, tp);
+}
+
+void
+__rte_trace_mem_per_thread_alloc(void)
+{
+ struct trace *trace = trace_obj_get();
+ struct __rte_trace_header *header;
+ uint32_t count;
+
+ if (!rte_trace_is_enabled())
+ return;
+
+ if (RTE_PER_LCORE(trace_mem))
+ return;
+
+ rte_spinlock_lock(&trace->lock);
+
+ count = trace->nb_trace_mem_list;
+
+ /* Allocate room for storing the thread trace mem meta */
+ trace->lcore_meta = realloc(trace->lcore_meta,
+ sizeof(trace->lcore_meta[0]) * (count + 1));
+
+ /* Provide dummy space for fast path to consume */
+ if (trace->lcore_meta == NULL) {
+ trace_crit("trace mem meta memory realloc failed");
+ header = NULL;
+ goto fail;
+ }
+
+ /* First attempt from huge page */
+ header = eal_malloc_no_trace(NULL, trace_mem_sz(trace->buff_len), 8);
+ if (header) {
+ trace->lcore_meta[count].area = TRACE_AREA_HUGEPAGE;
+ goto found;
+ }
+
+ /* Second attempt from heap */
+ header = malloc(trace_mem_sz(trace->buff_len));
+ if (header == NULL) {
+ trace_crit("trace mem malloc attempt failed");
+ header = NULL;
+ goto fail;
+
+ }
+
+ /* Second attempt from heap is success */
+ trace->lcore_meta[count].area = TRACE_AREA_HEAP;
+
+ /* Initialize the trace header */
+found:
+ header->offset = 0;
+ header->len = trace->buff_len;
+ header->stream_header.magic = TRACE_CTF_MAGIC;
+ rte_uuid_copy(header->stream_header.uuid, trace->uuid);
+ header->stream_header.lcore_id = rte_lcore_id();
+
+ /* Store the thread name */
+ char *name = header->stream_header.thread_name;
+ memset(name, 0, __RTE_TRACE_EMIT_STRING_LEN_MAX);
+ rte_thread_getname(pthread_self(), name,
+ __RTE_TRACE_EMIT_STRING_LEN_MAX);
+
+ trace->lcore_meta[count].mem = header;
+ trace->nb_trace_mem_list++;
+fail:
+ RTE_PER_LCORE(trace_mem) = header;
+ rte_spinlock_unlock(&trace->lock);
+}
+
+void
+trace_mem_per_thread_free(void)
+{
+ struct trace *trace = trace_obj_get();
+ uint32_t count;
+ void *mem;
+
+ if (!rte_trace_is_enabled())
+ return;
+
+ rte_spinlock_lock(&trace->lock);
+ for (count = 0; count < trace->nb_trace_mem_list; count++) {
+ mem = trace->lcore_meta[count].mem;
+ if (trace->lcore_meta[count].area == TRACE_AREA_HUGEPAGE)
+ eal_free_no_trace(mem);
+ else if (trace->lcore_meta[count].area == TRACE_AREA_HEAP)
+ free(mem);
+ }
+ rte_spinlock_unlock(&trace->lock);
+}
+
+void
+__rte_trace_point_emit_field(size_t sz, const char *in, const char *datatype)
+{
+ char *field = RTE_PER_LCORE(ctf_field);
+ int count = RTE_PER_LCORE(ctf_count);
+ size_t size;
+ int rc;
+
+ size = RTE_MAX(0, TRACE_CTF_FIELD_SIZE - 1 - count);
+ RTE_PER_LCORE(trace_point_sz) += sz;
+ rc = snprintf(RTE_PTR_ADD(field, count), size, "%s %s;", datatype, in);
+ if (rc <= 0 || (size_t)rc >= size) {
+ RTE_PER_LCORE(trace_point_sz) = 0;
+ trace_crit("CTF field is too long");
+ return;
+ }
+ RTE_PER_LCORE(ctf_count) += rc;
+}
+
+int
+__rte_trace_point_register(rte_trace_point_t *handle, const char *name,
+ void (*register_fn)(void))
+{
+ char *field = RTE_PER_LCORE(ctf_field);
+ struct trace_point *tp;
+ uint16_t sz;
+
+ /* Sanity checks of arguments */
+ if (name == NULL || register_fn == NULL || handle == NULL) {
+ trace_err("invalid arguments");
+ rte_errno = EINVAL;
+ goto fail;
+ }
+
+ /* Check the size of the trace point object */
+ RTE_PER_LCORE(trace_point_sz) = 0;
+ RTE_PER_LCORE(ctf_count) = 0;
+ register_fn();
+ if (RTE_PER_LCORE(trace_point_sz) == 0) {
+ trace_err("missing rte_trace_emit_header() in register fn");
+ rte_errno = EBADF;
+ goto fail;
+ }
+
+ /* Is size overflowed */
+ if (RTE_PER_LCORE(trace_point_sz) > UINT16_MAX) {
+ trace_err("trace point size overflowed");
+ rte_errno = ENOSPC;
+ goto fail;
+ }
+
+ /* Are we running out of space to store trace points? */
+ if (trace.nb_trace_points > UINT16_MAX) {
+ trace_err("trace point exceeds the max count");
+ rte_errno = ENOSPC;
+ goto fail;
+ }
+
+ /* Get the size of the trace point */
+ sz = RTE_PER_LCORE(trace_point_sz);
+ tp = calloc(1, sizeof(struct trace_point));
+ if (tp == NULL) {
+ trace_err("fail to allocate trace point memory");
+ rte_errno = ENOMEM;
+ goto fail;
+ }
+
+ /* Initialize the trace point */
+ if (rte_strscpy(tp->name, name, TRACE_POINT_NAME_SIZE) < 0) {
+ trace_err("name is too long");
+ rte_errno = E2BIG;
+ goto free;
+ }
+
+ /* Copy the field data for future use */
+ if (rte_strscpy(tp->ctf_field, field, TRACE_CTF_FIELD_SIZE) < 0) {
+ trace_err("CTF field size is too long");
+ rte_errno = E2BIG;
+ goto free;
+ }
+
+ /* Clear field memory for the next event */
+ memset(field, 0, TRACE_CTF_FIELD_SIZE);
+
+ /* Form the trace handle */
+ *handle = sz;
+ *handle |= trace.nb_trace_points << __RTE_TRACE_FIELD_ID_SHIFT;
+
+ trace.nb_trace_points++;
+ tp->handle = handle;
+
+ /* Add the trace point at tail */
+ STAILQ_INSERT_TAIL(&tp_list, tp, next);
+ __atomic_thread_fence(__ATOMIC_RELEASE);
+
+ /* All Good !!! */
+ return 0;
+free:
+ free(tp);
+fail:
+ if (trace.register_errno == 0)
+ trace.register_errno = rte_errno;
+
+ return -rte_errno;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_ctf.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_ctf.c
new file mode 100644
index 000000000..302e2bb74
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_ctf.c
@@ -0,0 +1,488 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#include <inttypes.h>
+#include <time.h>
+
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_time.h>
+#include <rte_trace.h>
+#include <rte_version.h>
+
+#include "eal_trace.h"
+
+__rte_format_printf(2, 0)
+static int
+metadata_printf(char **str, const char *fmt, ...)
+{
+ va_list ap;
+ int rc;
+
+ *str = NULL;
+ va_start(ap, fmt);
+ rc = vasprintf(str, fmt, ap);
+ va_end(ap);
+
+ return rc;
+}
+
+static int
+meta_copy(char **meta, int *offset, char *str, int rc)
+{
+ int count = *offset;
+ char *ptr = *meta;
+
+ if (rc < 0)
+ return rc;
+
+ ptr = realloc(ptr, count + rc);
+ if (ptr == NULL)
+ goto free_str;
+
+ memcpy(RTE_PTR_ADD(ptr, count), str, rc);
+ count += rc;
+ free(str);
+
+ *meta = ptr;
+ *offset = count;
+
+ return rc;
+
+free_str:
+ if (str)
+ free(str);
+ return -ENOMEM;
+}
+
+static int
+meta_data_type_emit(char **meta, int *offset)
+{
+ char *str = NULL;
+ int rc;
+
+ rc = metadata_printf(&str,
+ "/* CTF 1.8 */\n"
+ "typealias integer {size = 8; base = x;}:= uint8_t;\n"
+ "typealias integer {size = 16; base = x;} := uint16_t;\n"
+ "typealias integer {size = 32; base = x;} := uint32_t;\n"
+ "typealias integer {size = 64; base = x;} := uint64_t;\n"
+ "typealias integer {size = 8; signed = true;} := int8_t;\n"
+ "typealias integer {size = 16; signed = true;} := int16_t;\n"
+ "typealias integer {size = 32; signed = true;} := int32_t;\n"
+ "typealias integer {size = 64; signed = true;} := int64_t;\n"
+#ifdef RTE_ARCH_64
+ "typealias integer {size = 64; base = x;} := uintptr_t;\n"
+#else
+ "typealias integer {size = 32; base = x;} := uintptr_t;\n"
+#endif
+#ifdef RTE_ARCH_64
+ "typealias integer {size = 64; base = x;} := long;\n"
+#else
+ "typealias integer {size = 32; base = x;} := long;\n"
+#endif
+ "typealias integer {size = 8; signed = false; encoding = ASCII; } := string_bounded_t;\n\n"
+ "typealias floating_point {\n"
+ " exp_dig = 8;\n"
+ " mant_dig = 24;\n"
+ "} := float;\n\n"
+ "typealias floating_point {\n"
+ " exp_dig = 11;\n"
+ " mant_dig = 53;\n"
+ "} := double;\n\n");
+
+ return meta_copy(meta, offset, str, rc);
+}
+
+static int
+is_be(void)
+{
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+static int
+meta_header_emit(char **meta, int *offset)
+{
+ struct trace *trace = trace_obj_get();
+ char uustr[RTE_UUID_STRLEN];
+ char *str = NULL;
+ int rc;
+
+ rte_uuid_unparse(trace->uuid, uustr, RTE_UUID_STRLEN);
+ rc = metadata_printf(&str,
+ "trace {\n"
+ " major = 1;\n"
+ " minor = 8;\n"
+ " uuid = \"%s\";\n"
+ " byte_order = %s;\n"
+ " packet.header := struct {\n"
+ " uint32_t magic;\n"
+ " uint8_t uuid[16];\n"
+ " };\n"
+ "};\n\n", uustr, is_be() ? "be" : "le");
+ return meta_copy(meta, offset, str, rc);
+}
+
+static int
+meta_env_emit(char **meta, int *offset)
+{
+ char *str = NULL;
+ int rc;
+
+ rc = metadata_printf(&str,
+ "env {\n"
+ " dpdk_version = \"%s\";\n"
+ " tracer_name = \"dpdk\";\n"
+ "};\n\n", rte_version());
+ return meta_copy(meta, offset, str, rc);
+}
+
+static int
+meta_clock_pass1_emit(char **meta, int *offset)
+{
+ char *str = NULL;
+ int rc;
+
+ rc = metadata_printf(&str,
+ "clock {\n"
+ " name = \"dpdk\";\n"
+ " freq = ");
+ return meta_copy(meta, offset, str, rc);
+}
+
+static int
+meta_clock_pass2_emit(char **meta, int *offset)
+{
+ char *str = NULL;
+ int rc;
+
+ rc = metadata_printf(&str,
+ "%20"PRIu64";\n"
+ " offset_s =", 0);
+ return meta_copy(meta, offset, str, rc);
+}
+
+static int
+meta_clock_pass3_emit(char **meta, int *offset)
+{
+ char *str = NULL;
+ int rc;
+
+ rc = metadata_printf(&str,
+ "%20"PRIu64";\n"
+ " offset =", 0);
+ return meta_copy(meta, offset, str, rc);
+}
+
+static int
+meta_clock_pass4_emit(char **meta, int *offset)
+{
+ char *str = NULL;
+ int rc;
+
+ rc = metadata_printf(&str,
+ "%20"PRIu64";\n};\n\n"
+ "typealias integer {\n"
+ " size = 48; align = 1; signed = false;\n"
+ " map = clock.dpdk.value;\n"
+ "} := uint48_clock_dpdk_t;\n\n", 0);
+
+ return meta_copy(meta, offset, str, rc);
+}
+
+static int
+meta_stream_emit(char **meta, int *offset)
+{
+ char *str = NULL;
+ int rc;
+
+ rc = metadata_printf(&str,
+ "stream {\n"
+ " packet.context := struct {\n"
+ " uint32_t cpu_id;\n"
+ " string_bounded_t name[32];\n"
+ " };\n"
+ " event.header := struct {\n"
+ " uint48_clock_dpdk_t timestamp;\n"
+ " uint16_t id;\n"
+ " } align(64);\n"
+ "};\n\n");
+ return meta_copy(meta, offset, str, rc);
+}
+
+static void
+string_fixed_replace(char *input, const char *search, const char *replace)
+{
+ char *found;
+ size_t len;
+
+ found = strstr(input, search);
+ if (found == NULL)
+ return;
+
+ if (strlen(found) != strlen(search))
+ return;
+
+ len = strlen(replace);
+ memcpy(found, replace, len);
+ found[len] = '\0';
+}
+
+static void
+ctf_fixup_align(char *str)
+{
+ string_fixed_replace(str, "align", "_align");
+}
+
+static void
+ctf_fixup_arrow_deref(char *str)
+{
+ const char *replace = "_";
+ const char *search = "->";
+ char *found;
+ size_t len;
+
+ found = strstr(str, search);
+ if (found == NULL)
+ return;
+
+ do {
+ memcpy(found, replace, strlen(replace));
+ len = strlen(found + 2);
+ memcpy(found + 1, found + 2, len);
+ found[len + 1] = '\0';
+ found = strstr(str, search);
+ } while (found != NULL);
+}
+
+static void
+ctf_fixup_dot_deref(char *str)
+{
+ const char *replace = "_";
+ const char *search = ".";
+ char *found;
+ size_t len;
+
+ found = strstr(str, search);
+ if (found == NULL)
+ return;
+
+ len = strlen(replace);
+ do {
+ memcpy(found, replace, len);
+ found = strstr(str, search);
+ } while (found != NULL);
+}
+
+static void
+ctf_fixup_event(char *str)
+{
+ string_fixed_replace(str, "event", "_event");
+}
+
+static int
+ctf_fixup_keyword(char *str)
+{
+ char dup_str[TRACE_CTF_FIELD_SIZE];
+ char input[TRACE_CTF_FIELD_SIZE];
+ const char *delim = ";";
+ char *from;
+ int len;
+
+ if (str == NULL)
+ return 0;
+
+ len = strlen(str);
+ if (len >= TRACE_CTF_FIELD_SIZE) {
+ trace_err("ctf_field reached its maximum limit");
+ return -EMSGSIZE;
+ }
+
+ /* Create duplicate string */
+ strcpy(dup_str, str);
+
+ len = 0;
+ from = strtok(dup_str, delim);
+ while (from != NULL) {
+ strcpy(input, from);
+ ctf_fixup_align(input);
+ ctf_fixup_dot_deref(input);
+ ctf_fixup_arrow_deref(input);
+ ctf_fixup_event(input);
+
+ strcpy(&input[strlen(input)], delim);
+ if ((len + strlen(input)) >= TRACE_CTF_FIELD_SIZE) {
+ trace_err("ctf_field reached its maximum limit");
+ return -EMSGSIZE;
+ }
+
+ strcpy(str + len, input);
+ len += strlen(input);
+ from = strtok(NULL, delim);
+ }
+
+ return 0;
+}
+
+static int
+meta_event_emit(char **meta, int *offset, struct trace_point *tp)
+{
+ char *str = NULL;
+ int rc;
+
+ /* Fixup ctf field string in case it using reserved ctf keywords */
+ rc = ctf_fixup_keyword(tp->ctf_field);
+ if (rc)
+ return rc;
+
+ rc = metadata_printf(&str,
+ "event {\n"
+ " id = %d;\n"
+ " name = \"%s\";\n"
+ " fields := struct {\n"
+ " %s\n"
+ " };\n"
+ "};\n\n", trace_id_get(tp->handle), tp->name, tp->ctf_field);
+ return meta_copy(meta, offset, str, rc);
+}
+
+int
+trace_metadata_create(void)
+{
+ struct trace_point_head *tp_list = trace_list_head_get();
+ struct trace *trace = trace_obj_get();
+ struct trace_point *tp;
+ int rc, offset = 0;
+ char *meta = NULL;
+
+ rc = meta_data_type_emit(&meta, &offset);
+ if (rc < 0)
+ goto fail;
+
+ rc = meta_header_emit(&meta, &offset);
+ if (rc < 0)
+ goto fail;
+
+ rc = meta_env_emit(&meta, &offset);
+ if (rc < 0)
+ goto fail;
+
+ rc = meta_clock_pass1_emit(&meta, &offset);
+ if (rc < 0)
+ goto fail;
+ trace->ctf_meta_offset_freq = offset;
+
+ rc = meta_clock_pass2_emit(&meta, &offset);
+ if (rc < 0)
+ goto fail;
+ trace->ctf_meta_offset_freq_off_s = offset;
+
+ rc = meta_clock_pass3_emit(&meta, &offset);
+ if (rc < 0)
+ goto fail;
+ trace->ctf_meta_offset_freq_off = offset;
+
+ rc = meta_clock_pass4_emit(&meta, &offset);
+ if (rc < 0)
+ goto fail;
+
+ rc = meta_stream_emit(&meta, &offset);
+ if (rc < 0)
+ goto fail;
+
+ STAILQ_FOREACH(tp, tp_list, next)
+ if (meta_event_emit(&meta, &offset, tp) < 0)
+ goto fail;
+
+ trace->ctf_meta = meta;
+ return 0;
+
+fail:
+ if (meta)
+ free(meta);
+ return -EBADF;
+}
+
+void
+trace_metadata_destroy(void)
+{
+ struct trace *trace = trace_obj_get();
+
+ if (trace->ctf_meta) {
+ free(trace->ctf_meta);
+ trace->ctf_meta = NULL;
+ }
+}
+
+static void
+meta_fix_freq(struct trace *trace, char *meta)
+{
+ char *str;
+ int rc;
+
+ str = RTE_PTR_ADD(meta, trace->ctf_meta_offset_freq);
+ rc = sprintf(str, "%20"PRIu64"", rte_get_timer_hz());
+ str[rc] = ';';
+}
+
+static void
+meta_fix_freq_offset(struct trace *trace, char *meta)
+{
+ uint64_t uptime_tickes_floor, uptime_ticks, freq, uptime_sec;
+ uint64_t offset, offset_s;
+ char *str;
+ int rc;
+
+ uptime_ticks = trace->uptime_ticks &
+ ((1ULL << __RTE_TRACE_EVENT_HEADER_ID_SHIFT) - 1);
+ freq = rte_get_tsc_hz();
+ uptime_tickes_floor = RTE_ALIGN_MUL_FLOOR(uptime_ticks, freq);
+
+ uptime_sec = uptime_tickes_floor / freq;
+ offset_s = trace->epoch_sec - uptime_sec;
+
+ offset = uptime_ticks - uptime_tickes_floor;
+ offset += trace->epoch_nsec * (freq / NSEC_PER_SEC);
+
+ str = RTE_PTR_ADD(meta, trace->ctf_meta_offset_freq_off_s);
+ rc = sprintf(str, "%20"PRIu64"", offset_s);
+ str[rc] = ';';
+ str = RTE_PTR_ADD(meta, trace->ctf_meta_offset_freq_off);
+ rc = sprintf(str, "%20"PRIu64"", offset);
+ str[rc] = ';';
+}
+
+static void
+meta_fixup(struct trace *trace, char *meta)
+{
+ meta_fix_freq(trace, meta);
+ meta_fix_freq_offset(trace, meta);
+}
+
+int
+rte_trace_metadata_dump(FILE *f)
+{
+ struct trace *trace = trace_obj_get();
+ char *ctf_meta = trace->ctf_meta;
+ int rc;
+
+ if (!rte_trace_is_enabled())
+ return 0;
+
+ if (ctf_meta == NULL)
+ return -EINVAL;
+
+ if (!__atomic_load_n(&trace->ctf_fixup_done, __ATOMIC_SEQ_CST) &&
+ rte_get_timer_hz()) {
+ meta_fixup(trace, ctf_meta);
+ __atomic_store_n(&trace->ctf_fixup_done, 1, __ATOMIC_SEQ_CST);
+ }
+
+ rc = fprintf(f, "%s", ctf_meta);
+ return rc < 0 ? rc : 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_points.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_points.c
new file mode 100644
index 000000000..4a8ce9088
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_points.c
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#include <rte_trace_point_register.h>
+
+#include <rte_eal_trace.h>
+
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_void);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_u64);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_u32);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_u16);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_u8);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_i64);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_i32);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_i16);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_i8);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_int);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_long);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_float);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_double);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_ptr);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_str);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_func);
+
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_alarm_set);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_alarm_cancel);
+
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_mem_zmalloc);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_mem_malloc);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_mem_realloc);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_mem_free);
+
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_memzone_reserve);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_memzone_lookup);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_memzone_free);
+
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_thread_remote_launch);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_thread_lcore_ready);
+
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_intr_callback_register);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_intr_callback_unregister);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_intr_enable);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_intr_disable);
+
+RTE_INIT(eal_trace_init)
+{
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_void,
+ lib.eal.generic.void);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_u64,
+ lib.eal.generic.u64);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_u32,
+ lib.eal.generic.u32);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_u16,
+ lib.eal.generic.u16);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_u8,
+ lib.eal.generic.u8);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_i64,
+ lib.eal.generic.i64);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_i32,
+ lib.eal.generic.i32);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_i16,
+ lib.eal.generic.i16);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_i8,
+ lib.eal.generic.i8);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_int,
+ lib.eal.generic.int);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_long,
+ lib.eal.generic.long);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_float,
+ lib.eal.generic.float);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_double,
+ lib.eal.generic.double);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_ptr,
+ lib.eal.generic.ptr);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_str,
+ lib.eal.generic.string);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_func,
+ lib.eal.generic.func);
+
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_alarm_set,
+ lib.eal.alarm.set);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_alarm_cancel,
+ lib.eal.alarm.cancel);
+
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_mem_zmalloc,
+ lib.eal.mem.zmalloc);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_mem_malloc,
+ lib.eal.mem.malloc);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_mem_realloc,
+ lib.eal.mem.realloc);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_mem_free,
+ lib.eal.mem.free);
+
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_memzone_reserve,
+ lib.eal.memzone.reserve);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_memzone_lookup,
+ lib.eal.memzone.lookup);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_memzone_free,
+ lib.eal.memzone.free);
+
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_thread_remote_launch,
+ lib.eal.thread.remote.launch);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_thread_lcore_ready,
+ lib.eal.thread.lcore.ready);
+
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_callback_register,
+ lib.eal.intr.register);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_callback_unregister,
+ lib.eal.intr.unregister);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_enable,
+ lib.eal.intr.enable);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_disable,
+ lib.eal.intr.disable);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_utils.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_utils.c
new file mode 100644
index 000000000..64f58fb66
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_utils.c
@@ -0,0 +1,448 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#include <fnmatch.h>
+#include <pwd.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+
+#include "eal_filesystem.h"
+#include "eal_trace.h"
+
+const char *
+trace_mode_to_string(enum rte_trace_mode mode)
+{
+ switch (mode) {
+ case RTE_TRACE_MODE_OVERWRITE: return "overwrite";
+ case RTE_TRACE_MODE_DISCARD: return "discard";
+ default: return "unknown";
+ }
+}
+
+const char *
+trace_area_to_string(enum trace_area_e area)
+{
+ switch (area) {
+ case TRACE_AREA_HEAP: return "heap";
+ case TRACE_AREA_HUGEPAGE: return "hugepage";
+ default: return "unknown";
+ }
+}
+
+static bool
+trace_entry_compare(const char *name)
+{
+ struct trace_point_head *tp_list = trace_list_head_get();
+ struct trace_point *tp;
+ int count = 0;
+
+ STAILQ_FOREACH(tp, tp_list, next) {
+ if (strncmp(tp->name, name, TRACE_POINT_NAME_SIZE) == 0)
+ count++;
+ if (count > 1) {
+ trace_err("found duplicate entry %s", name);
+ rte_errno = EEXIST;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool
+trace_has_duplicate_entry(void)
+{
+ struct trace_point_head *tp_list = trace_list_head_get();
+ struct trace_point *tp;
+
+ /* Is duplicate trace name registered */
+ STAILQ_FOREACH(tp, tp_list, next)
+ if (trace_entry_compare(tp->name))
+ return true;
+
+ return false;
+}
+
+void
+trace_uuid_generate(void)
+{
+ struct trace_point_head *tp_list = trace_list_head_get();
+ struct trace *trace = trace_obj_get();
+ struct trace_point *tp;
+ uint64_t sz_total = 0;
+
+ /* Go over the registered trace points to get total size of events */
+ STAILQ_FOREACH(tp, tp_list, next) {
+ const uint16_t sz = *tp->handle & __RTE_TRACE_FIELD_SIZE_MASK;
+ sz_total += sz;
+ }
+
+ rte_uuid_t uuid = RTE_UUID_INIT(sz_total, trace->nb_trace_points,
+ 0x4370, 0x8f50, 0x222ddd514176ULL);
+ rte_uuid_copy(trace->uuid, uuid);
+}
+
+static int
+trace_session_name_generate(char *trace_dir)
+{
+ struct tm *tm_result;
+ time_t tm;
+ int rc;
+
+ tm = time(NULL);
+ if ((int)tm == -1)
+ goto fail;
+
+ tm_result = localtime(&tm);
+ if (tm_result == NULL)
+ goto fail;
+
+ rc = rte_strscpy(trace_dir, eal_get_hugefile_prefix(),
+ TRACE_PREFIX_LEN);
+ if (rc == -E2BIG)
+ rc = TRACE_PREFIX_LEN;
+ trace_dir[rc++] = '-';
+
+ rc = strftime(trace_dir + rc, TRACE_DIR_STR_LEN - rc,
+ "%Y-%m-%d-%p-%I-%M-%S", tm_result);
+ if (rc == 0)
+ goto fail;
+
+ return rc;
+fail:
+ rte_errno = errno;
+ return -rte_errno;
+}
+
+static int
+trace_dir_update(const char *str)
+{
+ struct trace *trace = trace_obj_get();
+ int rc, remaining;
+
+ remaining = sizeof(trace->dir) - trace->dir_offset;
+ rc = rte_strscpy(&trace->dir[0] + trace->dir_offset, str, remaining);
+ if (rc < 0)
+ goto fail;
+
+ trace->dir_offset += rc;
+fail:
+ return rc;
+}
+
+int
+eal_trace_args_save(const char *val)
+{
+ struct trace *trace = trace_obj_get();
+ struct trace_arg *arg = malloc(sizeof(*arg));
+
+ if (arg == NULL) {
+ trace_err("failed to allocate memory for %s", val);
+ return -ENOMEM;
+ }
+
+ arg->val = strdup(val);
+ if (arg->val == NULL) {
+ trace_err("failed to allocate memory for %s", val);
+ free(arg);
+ return -ENOMEM;
+ }
+
+ STAILQ_INSERT_TAIL(&trace->args, arg, next);
+ return 0;
+}
+
+void
+eal_trace_args_free(void)
+{
+ struct trace *trace = trace_obj_get();
+ struct trace_arg *arg;
+
+ while (!STAILQ_EMPTY(&trace->args)) {
+ arg = STAILQ_FIRST(&trace->args);
+ STAILQ_REMOVE_HEAD(&trace->args, next);
+ free(arg->val);
+ free(arg);
+ }
+}
+
+int
+trace_args_apply(const char *arg)
+{
+ if (rte_trace_regexp(arg, true) < 0) {
+ trace_err("cannot enable trace for %s", arg);
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+eal_trace_bufsz_args_save(char const *val)
+{
+ struct trace *trace = trace_obj_get();
+ uint64_t bufsz;
+
+ bufsz = rte_str_to_size(val);
+ if (bufsz == 0) {
+ trace_err("buffer size cannot be zero");
+ return -EINVAL;
+ }
+
+ trace->buff_len = bufsz;
+ return 0;
+}
+
+void
+trace_bufsz_args_apply(void)
+{
+ struct trace *trace = trace_obj_get();
+
+ if (trace->buff_len == 0)
+ trace->buff_len = 1024 * 1024; /* 1MB */
+}
+
+int
+eal_trace_mode_args_save(const char *val)
+{
+ struct trace *trace = trace_obj_get();
+ size_t len = strlen(val);
+ unsigned long tmp;
+ char *pattern;
+
+ if (len == 0) {
+ trace_err("value is not provided with option");
+ return -EINVAL;
+ }
+
+ pattern = (char *)calloc(1, len + 2);
+ if (pattern == NULL) {
+ trace_err("fail to allocate memory");
+ return -ENOMEM;
+ }
+
+ sprintf(pattern, "%s*", val);
+
+ if (fnmatch(pattern, "overwrite", 0) == 0)
+ tmp = RTE_TRACE_MODE_OVERWRITE;
+ else if (fnmatch(pattern, "discard", 0) == 0)
+ tmp = RTE_TRACE_MODE_DISCARD;
+ else {
+ free(pattern);
+ return -EINVAL;
+ }
+
+ trace->mode = tmp;
+ free(pattern);
+ return 0;
+}
+
+int
+eal_trace_dir_args_save(char const *val)
+{
+ struct trace *trace = trace_obj_get();
+ char *dir_path;
+ int rc;
+
+ if (strlen(val) >= sizeof(trace->dir) - 1) {
+ trace_err("input string is too big");
+ return -ENAMETOOLONG;
+ }
+
+ if (asprintf(&dir_path, "%s/", val) == -1) {
+ trace_err("failed to copy directory: %s", strerror(errno));
+ return -ENOMEM;
+ }
+
+ rc = trace_dir_update(dir_path);
+
+ free(dir_path);
+ return rc;
+}
+
+int
+trace_epoch_time_save(void)
+{
+ struct trace *trace = trace_obj_get();
+ struct timespec epoch = { 0, 0 };
+ uint64_t avg, start, end;
+
+ start = rte_get_tsc_cycles();
+ if (clock_gettime(CLOCK_REALTIME, &epoch) < 0) {
+ trace_err("failed to get the epoch time");
+ return -1;
+ }
+ end = rte_get_tsc_cycles();
+ avg = (start + end) >> 1;
+
+ trace->epoch_sec = (uint64_t) epoch.tv_sec;
+ trace->epoch_nsec = (uint64_t) epoch.tv_nsec;
+ trace->uptime_ticks = avg;
+
+ return 0;
+}
+
+static int
+trace_dir_default_path_get(char *dir_path)
+{
+ struct trace *trace = trace_obj_get();
+ uint32_t size = sizeof(trace->dir);
+ struct passwd *pwd;
+ char *home_dir;
+
+ /* First check for shell environment variable */
+ home_dir = getenv("HOME");
+ if (home_dir == NULL) {
+ /* Fallback to password file entry */
+ pwd = getpwuid(getuid());
+ if (pwd == NULL)
+ return -EINVAL;
+
+ home_dir = pwd->pw_dir;
+ }
+
+ /* Append dpdk-traces to directory */
+ if (snprintf(dir_path, size, "%s/dpdk-traces/", home_dir) < 0)
+ return -ENAMETOOLONG;
+
+ return 0;
+}
+
+int
+trace_mkdir(void)
+{
+ struct trace *trace = trace_obj_get();
+ char session[TRACE_DIR_STR_LEN];
+ char *dir_path;
+ int rc;
+
+ if (!trace->dir_offset) {
+ dir_path = calloc(1, sizeof(trace->dir));
+ if (dir_path == NULL) {
+ trace_err("fail to allocate memory");
+ return -ENOMEM;
+ }
+
+ rc = trace_dir_default_path_get(dir_path);
+ if (rc < 0) {
+ trace_err("fail to get default path");
+ free(dir_path);
+ return rc;
+ }
+
+ rc = trace_dir_update(dir_path);
+ free(dir_path);
+ if (rc < 0)
+ return rc;
+ }
+
+ /* Create the path if it t exist, no "mkdir -p" available here */
+ rc = mkdir(trace->dir, 0700);
+ if (rc < 0 && errno != EEXIST) {
+ trace_err("mkdir %s failed [%s]", trace->dir, strerror(errno));
+ rte_errno = errno;
+ return -rte_errno;
+ }
+
+ rc = trace_session_name_generate(session);
+ if (rc < 0)
+ return rc;
+ rc = trace_dir_update(session);
+ if (rc < 0)
+ return rc;
+
+ rc = mkdir(trace->dir, 0700);
+ if (rc < 0) {
+ trace_err("mkdir %s failed [%s]", trace->dir, strerror(errno));
+ rte_errno = errno;
+ return -rte_errno;
+ }
+
+ RTE_LOG(INFO, EAL, "Trace dir: %s\n", trace->dir);
+ return 0;
+}
+
+static int
+trace_meta_save(struct trace *trace)
+{
+ char file_name[PATH_MAX];
+ FILE *f;
+ int rc;
+
+ rc = snprintf(file_name, PATH_MAX, "%s/metadata", trace->dir);
+ if (rc < 0)
+ return rc;
+
+ f = fopen(file_name, "w");
+ if (f == NULL)
+ return -errno;
+
+ rc = rte_trace_metadata_dump(f);
+
+ if (fclose(f))
+ rc = -errno;
+
+ return rc;
+}
+
+
+static inline int
+trace_file_sz(struct __rte_trace_header *hdr)
+{
+ return sizeof(struct __rte_trace_stream_header) + hdr->offset;
+}
+
+static int
+trace_mem_save(struct trace *trace, struct __rte_trace_header *hdr,
+ uint32_t cnt)
+{
+ char file_name[PATH_MAX];
+ FILE *f;
+ int rc;
+
+ rc = snprintf(file_name, PATH_MAX, "%s/channel0_%d", trace->dir, cnt);
+ if (rc < 0)
+ return rc;
+
+ f = fopen(file_name, "w");
+ if (f == NULL)
+ return -errno;
+
+ rc = fwrite(&hdr->stream_header, trace_file_sz(hdr), 1, f);
+ rc = (rc == 1) ? 0 : -EACCES;
+
+ if (fclose(f))
+ rc = -errno;
+
+ return rc;
+}
+
+int
+rte_trace_save(void)
+{
+ struct trace *trace = trace_obj_get();
+ struct __rte_trace_header *header;
+ uint32_t count;
+ int rc = 0;
+
+ if (trace->nb_trace_mem_list == 0)
+ return rc;
+
+ rc = trace_meta_save(trace);
+ if (rc)
+ return rc;
+
+ rte_spinlock_lock(&trace->lock);
+ for (count = 0; count < trace->nb_trace_mem_list; count++) {
+ header = trace->lcore_meta[count].mem;
+ rc = trace_mem_save(trace, header, count);
+ if (rc)
+ break;
+ }
+ rte_spinlock_unlock(&trace->lock);
+ return rc;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_uuid.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_uuid.c
new file mode 100644
index 000000000..0a80bfbb3
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_uuid.c
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) 1996, 1997 Theodore Ts'o.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include <rte_uuid.h>
+
+/* UUID packed form */
+struct uuid {
+ uint32_t time_low;
+ uint16_t time_mid;
+ uint16_t time_hi_and_version;
+ uint16_t clock_seq;
+ uint8_t node[6];
+};
+
+static void uuid_pack(const struct uuid *uu, rte_uuid_t ptr)
+{
+ uint32_t tmp;
+ uint8_t *out = ptr;
+
+ tmp = uu->time_low;
+ out[3] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[2] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[1] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[0] = (uint8_t) tmp;
+
+ tmp = uu->time_mid;
+ out[5] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[4] = (uint8_t) tmp;
+
+ tmp = uu->time_hi_and_version;
+ out[7] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[6] = (uint8_t) tmp;
+
+ tmp = uu->clock_seq;
+ out[9] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[8] = (uint8_t) tmp;
+
+ memcpy(out+10, uu->node, 6);
+}
+
+static void uuid_unpack(const rte_uuid_t in, struct uuid *uu)
+{
+ const uint8_t *ptr = in;
+ uint32_t tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_low = tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_mid = tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_hi_and_version = tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->clock_seq = tmp;
+
+ memcpy(uu->node, ptr, 6);
+}
+
+bool rte_uuid_is_null(const rte_uuid_t uu)
+{
+ const uint8_t *cp = uu;
+ int i;
+
+ for (i = 0; i < 16; i++)
+ if (*cp++)
+ return false;
+ return true;
+}
+
+/*
+ * rte_uuid_compare() - compare two UUIDs.
+ */
+int rte_uuid_compare(const rte_uuid_t uu1, const rte_uuid_t uu2)
+{
+ struct uuid uuid1, uuid2;
+
+ uuid_unpack(uu1, &uuid1);
+ uuid_unpack(uu2, &uuid2);
+
+#define UUCMP(u1, u2) \
+ do { if (u1 != u2) return (u1 < u2) ? -1 : 1; } while (0)
+
+ UUCMP(uuid1.time_low, uuid2.time_low);
+ UUCMP(uuid1.time_mid, uuid2.time_mid);
+ UUCMP(uuid1.time_hi_and_version, uuid2.time_hi_and_version);
+ UUCMP(uuid1.clock_seq, uuid2.clock_seq);
+#undef UUCMP
+
+ return memcmp(uuid1.node, uuid2.node, 6);
+}
+
+int rte_uuid_parse(const char *in, rte_uuid_t uu)
+{
+ struct uuid uuid;
+ int i;
+ const char *cp;
+ char buf[3];
+
+ if (strlen(in) != 36)
+ return -1;
+
+ for (i = 0, cp = in; i <= 36; i++, cp++) {
+ if ((i == 8) || (i == 13) || (i == 18) ||
+ (i == 23)) {
+ if (*cp == '-')
+ continue;
+ else
+ return -1;
+ }
+ if (i == 36)
+ if (*cp == 0)
+ continue;
+ if (!isxdigit(*cp))
+ return -1;
+ }
+
+ uuid.time_low = strtoul(in, NULL, 16);
+ uuid.time_mid = strtoul(in+9, NULL, 16);
+ uuid.time_hi_and_version = strtoul(in+14, NULL, 16);
+ uuid.clock_seq = strtoul(in+19, NULL, 16);
+ cp = in+24;
+ buf[2] = 0;
+
+ for (i = 0; i < 6; i++) {
+ buf[0] = *cp++;
+ buf[1] = *cp++;
+ uuid.node[i] = strtoul(buf, NULL, 16);
+ }
+
+ uuid_pack(&uuid, uu);
+ return 0;
+}
+
+void rte_uuid_unparse(const rte_uuid_t uu, char *out, size_t len)
+{
+ struct uuid uuid;
+
+ uuid_unpack(uu, &uuid);
+
+ snprintf(out, len,
+ "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+ uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
+ uuid.clock_seq >> 8, uuid.clock_seq & 0xFF,
+ uuid.node[0], uuid.node[1], uuid.node[2],
+ uuid.node[3], uuid.node[4], uuid.node[5]);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_filesystem.h b/src/spdk/dpdk/lib/librte_eal/common/eal_filesystem.h
new file mode 100644
index 000000000..5d21f07c2
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_filesystem.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+/**
+ * @file
+ * Stores functions and path defines for files and directories
+ * on the filesystem for Linux, that are used by the Linux EAL.
+ */
+
+#ifndef EAL_FILESYSTEM_H
+#define EAL_FILESYSTEM_H
+
+/** Path of rte config file. */
+
+#include <stdint.h>
+#include <limits.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#include <rte_string_fns.h>
+#include "eal_internal_cfg.h"
+
+/* sets up platform-specific runtime data dir */
+int
+eal_create_runtime_dir(void);
+
+int
+eal_clean_runtime_dir(void);
+
+/** Function to return hugefile prefix that's currently set up */
+const char *
+eal_get_hugefile_prefix(void);
+
+#define RUNTIME_CONFIG_FNAME "config"
+static inline const char *
+eal_runtime_config_path(void)
+{
+ static char buffer[PATH_MAX]; /* static so auto-zeroed */
+
+ snprintf(buffer, sizeof(buffer), "%s/%s", rte_eal_get_runtime_dir(),
+ RUNTIME_CONFIG_FNAME);
+ return buffer;
+}
+
+/** Path of primary/secondary communication unix socket file. */
+#define MP_SOCKET_FNAME "mp_socket"
+static inline const char *
+eal_mp_socket_path(void)
+{
+ static char buffer[PATH_MAX]; /* static so auto-zeroed */
+
+ snprintf(buffer, sizeof(buffer), "%s/%s", rte_eal_get_runtime_dir(),
+ MP_SOCKET_FNAME);
+ return buffer;
+}
+
+#define FBARRAY_NAME_FMT "%s/fbarray_%s"
+static inline const char *
+eal_get_fbarray_path(char *buffer, size_t buflen, const char *name) {
+ snprintf(buffer, buflen, FBARRAY_NAME_FMT, rte_eal_get_runtime_dir(),
+ name);
+ return buffer;
+}
+
+/** Path of hugepage info file. */
+#define HUGEPAGE_INFO_FNAME "hugepage_info"
+static inline const char *
+eal_hugepage_info_path(void)
+{
+ static char buffer[PATH_MAX]; /* static so auto-zeroed */
+
+ snprintf(buffer, sizeof(buffer), "%s/%s", rte_eal_get_runtime_dir(),
+ HUGEPAGE_INFO_FNAME);
+ return buffer;
+}
+
+/** Path of hugepage data file. */
+#define HUGEPAGE_DATA_FNAME "hugepage_data"
+static inline const char *
+eal_hugepage_data_path(void)
+{
+ static char buffer[PATH_MAX]; /* static so auto-zeroed */
+
+ snprintf(buffer, sizeof(buffer), "%s/%s", rte_eal_get_runtime_dir(),
+ HUGEPAGE_DATA_FNAME);
+ return buffer;
+}
+
+/** String format for hugepage map files. */
+#define HUGEFILE_FMT "%s/%smap_%d"
+static inline const char *
+eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
+{
+ snprintf(buffer, buflen, HUGEFILE_FMT, hugedir,
+ eal_get_hugefile_prefix(), f_id);
+ return buffer;
+}
+
+/** define the default filename prefix for the %s values above */
+#define HUGEFILE_PREFIX_DEFAULT "rte"
+
+/** Function to read a single numeric value from a file on the filesystem.
+ * Used to read information from files on /sys */
+int eal_parse_sysfs_value(const char *filename, unsigned long *val);
+
+#endif /* EAL_FILESYSTEM_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_hugepages.h b/src/spdk/dpdk/lib/librte_eal/common/eal_hugepages.h
new file mode 100644
index 000000000..1b560d337
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_hugepages.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef EAL_HUGEPAGES_H
+#define EAL_HUGEPAGES_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <limits.h>
+
+#define MAX_HUGEPAGE_PATH PATH_MAX
+
+/**
+ * Structure used to store information about hugepages that we mapped
+ * through the files in hugetlbfs.
+ */
+struct hugepage_file {
+ void *orig_va; /**< virtual addr of first mmap() */
+ void *final_va; /**< virtual addr of 2nd mmap() */
+ uint64_t physaddr; /**< physical addr */
+ size_t size; /**< the page size */
+ int socket_id; /**< NUMA socket ID */
+ int file_id; /**< the '%d' in HUGEFILE_FMT */
+ char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */
+};
+
+/**
+ * Read the information on what hugepages are available for the EAL to use,
+ * clearing out any unused ones.
+ */
+int eal_hugepage_info_init(void);
+
+/**
+ * Read whatever information primary process has shared about hugepages into
+ * secondary process.
+ */
+int eal_hugepage_info_read(void);
+
+#endif /* EAL_HUGEPAGES_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_internal_cfg.h b/src/spdk/dpdk/lib/librte_eal/common/eal_internal_cfg.h
new file mode 100644
index 000000000..c650bc081
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_internal_cfg.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+/**
+ * @file
+ * Holds the structures for the eal internal configuration
+ */
+
+#ifndef EAL_INTERNAL_CFG_H
+#define EAL_INTERNAL_CFG_H
+
+#include <rte_eal.h>
+#include <rte_pci_dev_feature_defs.h>
+
+#include "eal_thread.h"
+
+#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
+#define MAX_HUGEPAGE_SIZES 4 /**< support up to 4 page sizes */
+#else
+#define MAX_HUGEPAGE_SIZES 3 /**< support up to 3 page sizes */
+#endif
+
+/*
+ * internal configuration structure for the number, size and
+ * mount points of hugepages
+ */
+struct hugepage_info {
+ uint64_t hugepage_sz; /**< size of a huge page */
+ char hugedir[PATH_MAX]; /**< dir where hugetlbfs is mounted */
+ uint32_t num_pages[RTE_MAX_NUMA_NODES];
+ /**< number of hugepages of that size on each socket */
+ int lock_descriptor; /**< file descriptor for hugepage dir */
+};
+
+/**
+ * internal configuration
+ */
+struct internal_config {
+ volatile size_t memory; /**< amount of asked memory */
+ volatile unsigned force_nchannel; /**< force number of channels */
+ volatile unsigned force_nrank; /**< force number of ranks */
+ volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */
+ unsigned hugepage_unlink; /**< true to unlink backing files */
+ volatile unsigned no_pci; /**< true to disable PCI */
+ volatile unsigned no_hpet; /**< true to disable HPET */
+ volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
+ * instead of native TSC */
+ volatile unsigned no_shconf; /**< true if there is no shared config */
+ volatile unsigned in_memory;
+ /**< true if DPDK should operate entirely in-memory and not create any
+ * shared files or runtime data.
+ */
+ volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */
+ volatile enum rte_proc_type_t process_type; /**< multi-process proc type */
+ /** true to try allocating memory on specific sockets */
+ volatile unsigned force_sockets;
+ volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */
+ volatile unsigned force_socket_limits;
+ volatile uint64_t socket_limit[RTE_MAX_NUMA_NODES]; /**< limit amount of memory per socket */
+ uintptr_t base_virtaddr; /**< base address to try and reserve memory from */
+ volatile unsigned legacy_mem;
+ /**< true to enable legacy memory behavior (no dynamic allocation,
+ * IOVA-contiguous segments).
+ */
+ volatile unsigned match_allocations;
+ /**< true to free hugepages exactly as allocated */
+ volatile unsigned single_file_segments;
+ /**< true if storing all pages within single files (per-page-size,
+ * per-node) non-legacy mode only.
+ */
+ volatile int syslog_facility; /**< facility passed to openlog() */
+ /** default interrupt mode for VFIO */
+ volatile enum rte_intr_mode vfio_intr_mode;
+ char *hugefile_prefix; /**< the base filename of hugetlbfs files */
+ char *hugepage_dir; /**< specific hugetlbfs directory to use */
+ char *user_mbuf_pool_ops_name;
+ /**< user defined mbuf pool ops name */
+ unsigned num_hugepage_sizes; /**< how many sizes on this system */
+ struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
+ enum rte_iova_mode iova_mode ; /**< Set IOVA mode on this system */
+ rte_cpuset_t ctrl_cpuset; /**< cpuset for ctrl threads */
+ volatile unsigned int init_complete;
+ /**< indicates whether EAL has completed initialization */
+ unsigned int no_telemetry; /**< true to disable Telemetry */
+};
+extern struct internal_config internal_config; /**< Global EAL configuration. */
+
+void eal_reset_internal_config(struct internal_config *internal_cfg);
+
+#endif /* EAL_INTERNAL_CFG_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_memalloc.h b/src/spdk/dpdk/lib/librte_eal/common/eal_memalloc.h
new file mode 100644
index 000000000..e953cd84e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_memalloc.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#ifndef EAL_MEMALLOC_H
+#define EAL_MEMALLOC_H
+
+#include <stdbool.h>
+
+#include <rte_memory.h>
+
+/*
+ * Allocate segment of specified page size.
+ */
+struct rte_memseg *
+eal_memalloc_alloc_seg(size_t page_sz, int socket);
+
+/*
+ * Allocate `n_segs` segments.
+ *
+ * Note: `ms` can be NULL.
+ *
+ * Note: it is possible to request best-effort allocation by setting `exact` to
+ * `false`, in which case allocator will return however many pages it managed to
+ * allocate successfully.
+ */
+int
+eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms, int n_segs, size_t page_sz,
+ int socket, bool exact);
+
+/*
+ * Deallocate segment
+ */
+int
+eal_memalloc_free_seg(struct rte_memseg *ms);
+
+/*
+ * Deallocate `n_segs` segments. Returns 0 on successful deallocation of all
+ * segments, returns -1 on error. Any segments that could have been deallocated,
+ * will be deallocated even in case of error.
+ */
+int
+eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs);
+
+/*
+ * Check if memory pointed to by `start` and of `length` that resides in
+ * memseg list `msl` is IOVA-contiguous.
+ */
+bool
+eal_memalloc_is_contig(const struct rte_memseg_list *msl, void *start,
+ size_t len);
+
+/* synchronize local memory map to primary process */
+int
+eal_memalloc_sync_with_primary(void);
+
+int
+eal_memalloc_mem_event_callback_register(const char *name,
+ rte_mem_event_callback_t clb, void *arg);
+
+int
+eal_memalloc_mem_event_callback_unregister(const char *name, void *arg);
+
+void
+eal_memalloc_mem_event_notify(enum rte_mem_event event, const void *start,
+ size_t len);
+
+int
+eal_memalloc_mem_alloc_validator_register(const char *name,
+ rte_mem_alloc_validator_t clb, int socket_id, size_t limit);
+
+int
+eal_memalloc_mem_alloc_validator_unregister(const char *name, int socket_id);
+
+int
+eal_memalloc_mem_alloc_validate(int socket_id, size_t new_len);
+
+/* returns fd or -errno */
+int
+eal_memalloc_get_seg_fd(int list_idx, int seg_idx);
+
+/* returns 0 or -errno */
+int
+eal_memalloc_set_seg_fd(int list_idx, int seg_idx, int fd);
+
+/* returns 0 or -errno */
+int
+eal_memalloc_set_seg_list_fd(int list_idx, int fd);
+
+int
+eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset);
+
+int
+eal_memalloc_init(void);
+
+#endif /* EAL_MEMALLOC_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_memcfg.h b/src/spdk/dpdk/lib/librte_eal/common/eal_memcfg.h
new file mode 100644
index 000000000..583fcb595
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_memcfg.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef EAL_MEMCFG_H
+#define EAL_MEMCFG_H
+
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_pause.h>
+#include <rte_spinlock.h>
+#include <rte_rwlock.h>
+#include <rte_tailq.h>
+
+#include "malloc_heap.h"
+
+/**
+ * Memory configuration shared across multiple processes.
+ */
+struct rte_mem_config {
+ volatile uint32_t magic; /**< Magic number - sanity check. */
+ uint32_t version;
+ /**< Prevent secondary processes using different DPDK versions. */
+
+ /* memory topology */
+ uint32_t nchannel; /**< Number of channels (0 if unknown). */
+ uint32_t nrank; /**< Number of ranks (0 if unknown). */
+
+ /**
+ * current lock nest order
+ * - qlock->mlock (ring/hash/lpm)
+ * - mplock->qlock->mlock (mempool)
+ * Notice:
+ * *ALWAYS* obtain qlock first if having to obtain both qlock and mlock
+ */
+ rte_rwlock_t mlock; /**< used by memzones for thread safety. */
+ rte_rwlock_t qlock; /**< used by tailqs for thread safety. */
+ rte_rwlock_t mplock; /**< used by mempool library for thread safety. */
+ rte_spinlock_t tlock; /**< used by timer library for thread safety. */
+
+ rte_rwlock_t memory_hotplug_lock;
+ /**< Indicates whether memory hotplug request is in progress. */
+
+ /* memory segments and zones */
+ struct rte_fbarray memzones; /**< Memzone descriptors. */
+
+ struct rte_memseg_list memsegs[RTE_MAX_MEMSEG_LISTS];
+ /**< List of dynamic arrays holding memsegs */
+
+ struct rte_tailq_head tailq_head[RTE_MAX_TAILQ];
+ /**< Tailqs for objects */
+
+ struct malloc_heap malloc_heaps[RTE_MAX_HEAPS];
+ /**< DPDK malloc heaps */
+
+ int next_socket_id; /**< Next socket ID for external malloc heap */
+
+ /* rte_mem_config has to be mapped at the exact same address in all
+ * processes, so we need to store it.
+ */
+ uint64_t mem_cfg_addr; /**< Address of this structure in memory. */
+
+ /* Primary and secondary processes cannot run with different legacy or
+ * single file segments options, so to avoid having to specify these
+ * options to all processes, store them in shared config and update the
+ * internal config at init time.
+ */
+ uint32_t legacy_mem; /**< stored legacy mem parameter. */
+ uint32_t single_file_segments;
+ /**< stored single file segments parameter. */
+
+ uint64_t tsc_hz;
+ /**< TSC rate */
+
+ uint8_t dma_maskbits; /**< Keeps the more restricted dma mask. */
+};
+
+/* update internal config from shared mem config */
+void
+eal_mcfg_update_internal(void);
+
+/* update shared mem config from internal config */
+void
+eal_mcfg_update_from_internal(void);
+
+/* wait until primary process initialization is complete */
+void
+eal_mcfg_wait_complete(void);
+
+/* check if DPDK version of current process matches one stored in the config */
+int
+eal_mcfg_check_version(void);
+
+/* set mem config as complete */
+void
+eal_mcfg_complete(void);
+
+#endif /* EAL_MEMCFG_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_options.h b/src/spdk/dpdk/lib/librte_eal/common/eal_options.h
new file mode 100644
index 000000000..18e6da9ab
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_options.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2014 6WIND S.A.
+ */
+
+#ifndef EAL_OPTIONS_H
+#define EAL_OPTIONS_H
+
+#include "getopt.h"
+
+struct rte_tel_data;
+
+enum {
+ /* long options mapped to a short option */
+#define OPT_HELP "help"
+ OPT_HELP_NUM = 'h',
+#define OPT_PCI_BLACKLIST "pci-blacklist"
+ OPT_PCI_BLACKLIST_NUM = 'b',
+#define OPT_PCI_WHITELIST "pci-whitelist"
+ OPT_PCI_WHITELIST_NUM = 'w',
+
+ /* first long only option value must be >= 256, so that we won't
+ * conflict with short options */
+ OPT_LONG_MIN_NUM = 256,
+#define OPT_BASE_VIRTADDR "base-virtaddr"
+ OPT_BASE_VIRTADDR_NUM,
+#define OPT_CREATE_UIO_DEV "create-uio-dev"
+ OPT_CREATE_UIO_DEV_NUM,
+#define OPT_FILE_PREFIX "file-prefix"
+ OPT_FILE_PREFIX_NUM,
+#define OPT_HUGE_DIR "huge-dir"
+ OPT_HUGE_DIR_NUM,
+#define OPT_HUGE_UNLINK "huge-unlink"
+ OPT_HUGE_UNLINK_NUM,
+#define OPT_LCORES "lcores"
+ OPT_LCORES_NUM,
+#define OPT_LOG_LEVEL "log-level"
+ OPT_LOG_LEVEL_NUM,
+#define OPT_TRACE "trace"
+ OPT_TRACE_NUM,
+#define OPT_TRACE_DIR "trace-dir"
+ OPT_TRACE_DIR_NUM,
+#define OPT_TRACE_BUF_SIZE "trace-bufsz"
+ OPT_TRACE_BUF_SIZE_NUM,
+#define OPT_TRACE_MODE "trace-mode"
+ OPT_TRACE_MODE_NUM,
+#define OPT_MASTER_LCORE "master-lcore"
+ OPT_MASTER_LCORE_NUM,
+#define OPT_MBUF_POOL_OPS_NAME "mbuf-pool-ops-name"
+ OPT_MBUF_POOL_OPS_NAME_NUM,
+#define OPT_PROC_TYPE "proc-type"
+ OPT_PROC_TYPE_NUM,
+#define OPT_NO_HPET "no-hpet"
+ OPT_NO_HPET_NUM,
+#define OPT_NO_HUGE "no-huge"
+ OPT_NO_HUGE_NUM,
+#define OPT_NO_PCI "no-pci"
+ OPT_NO_PCI_NUM,
+#define OPT_NO_SHCONF "no-shconf"
+ OPT_NO_SHCONF_NUM,
+#define OPT_IN_MEMORY "in-memory"
+ OPT_IN_MEMORY_NUM,
+#define OPT_SOCKET_MEM "socket-mem"
+ OPT_SOCKET_MEM_NUM,
+#define OPT_SOCKET_LIMIT "socket-limit"
+ OPT_SOCKET_LIMIT_NUM,
+#define OPT_SYSLOG "syslog"
+ OPT_SYSLOG_NUM,
+#define OPT_VDEV "vdev"
+ OPT_VDEV_NUM,
+#define OPT_VFIO_INTR "vfio-intr"
+ OPT_VFIO_INTR_NUM,
+#define OPT_VMWARE_TSC_MAP "vmware-tsc-map"
+ OPT_VMWARE_TSC_MAP_NUM,
+#define OPT_LEGACY_MEM "legacy-mem"
+ OPT_LEGACY_MEM_NUM,
+#define OPT_SINGLE_FILE_SEGMENTS "single-file-segments"
+ OPT_SINGLE_FILE_SEGMENTS_NUM,
+#define OPT_IOVA_MODE "iova-mode"
+ OPT_IOVA_MODE_NUM,
+#define OPT_MATCH_ALLOCATIONS "match-allocations"
+ OPT_MATCH_ALLOCATIONS_NUM,
+#define OPT_TELEMETRY "telemetry"
+ OPT_TELEMETRY_NUM,
+#define OPT_NO_TELEMETRY "no-telemetry"
+ OPT_NO_TELEMETRY_NUM,
+ OPT_LONG_MAX_NUM
+};
+
+extern const char eal_short_options[];
+extern const struct option eal_long_options[];
+
+int eal_parse_common_option(int opt, const char *argv,
+ struct internal_config *conf);
+int eal_option_device_parse(void);
+int eal_adjust_config(struct internal_config *internal_cfg);
+int eal_cleanup_config(struct internal_config *internal_cfg);
+int eal_check_common_options(struct internal_config *internal_cfg);
+void eal_common_usage(void);
+enum rte_proc_type_t eal_proc_type_detect(void);
+int eal_plugins_init(void);
+int eal_save_args(int argc, char **argv);
+int handle_eal_info_request(const char *cmd, const char *params __rte_unused,
+ struct rte_tel_data *d);
+
+#endif /* EAL_OPTIONS_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_private.h b/src/spdk/dpdk/lib/librte_eal/common/eal_private.h
new file mode 100644
index 000000000..869ce183a
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_private.h
@@ -0,0 +1,423 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#ifndef _EAL_PRIVATE_H_
+#define _EAL_PRIVATE_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <rte_dev.h>
+#include <rte_lcore.h>
+
+/**
+ * Structure storing internal configuration (per-lcore)
+ */
+struct lcore_config {
+ pthread_t thread_id; /**< pthread identifier */
+ int pipe_master2slave[2]; /**< communication pipe with master */
+ int pipe_slave2master[2]; /**< communication pipe with master */
+
+ lcore_function_t * volatile f; /**< function to call */
+ void * volatile arg; /**< argument of function */
+ volatile int ret; /**< return value of function */
+
+ volatile enum rte_lcore_state_t state; /**< lcore state */
+ unsigned int socket_id; /**< physical socket id for this lcore */
+ unsigned int core_id; /**< core number on socket for this lcore */
+ int core_index; /**< relative index, starting from 0 */
+ uint8_t core_role; /**< role of core eg: OFF, RTE, SERVICE */
+
+ rte_cpuset_t cpuset; /**< cpu set which the lcore affinity to */
+};
+
+extern struct lcore_config lcore_config[RTE_MAX_LCORE];
+
+/**
+ * The global RTE configuration structure.
+ */
+struct rte_config {
+ uint32_t master_lcore; /**< Id of the master lcore */
+ uint32_t lcore_count; /**< Number of available logical cores. */
+ uint32_t numa_node_count; /**< Number of detected NUMA nodes. */
+ uint32_t numa_nodes[RTE_MAX_NUMA_NODES]; /**< List of detected NUMA nodes. */
+ uint32_t service_lcore_count;/**< Number of available service cores. */
+ enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /**< State of cores. */
+
+ /** Primary or secondary configuration */
+ enum rte_proc_type_t process_type;
+
+ /** PA or VA mapping mode */
+ enum rte_iova_mode iova_mode;
+
+ /**
+ * Pointer to memory configuration, which may be shared across multiple
+ * DPDK instances
+ */
+ struct rte_mem_config *mem_config;
+} __rte_packed;
+
+/**
+ * Get the global configuration structure.
+ *
+ * @return
+ * A pointer to the global configuration structure.
+ */
+struct rte_config *rte_eal_get_configuration(void);
+
+/**
+ * Initialize the memzone subsystem (private to eal).
+ *
+ * @return
+ * - 0 on success
+ * - Negative on error
+ */
+int rte_eal_memzone_init(void);
+
+/**
+ * Common log initialization function (private to eal). Determines
+ * where log data is written when no call to rte_openlog_stream is
+ * in effect.
+ *
+ * @param default_log
+ * The default log stream to be used.
+ * @return
+ * - 0 on success
+ * - Negative on error
+ */
+void eal_log_set_default(FILE *default_log);
+
+/**
+ * Fill configuration with number of physical and logical processors
+ *
+ * This function is private to EAL.
+ *
+ * Parse /proc/cpuinfo to get the number of physical and logical
+ * processors on the machine.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_cpu_init(void);
+
+/**
+ * Create memseg lists
+ *
+ * This function is private to EAL.
+ *
+ * Preallocate virtual memory.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_memseg_init(void);
+
+/**
+ * Map memory
+ *
+ * This function is private to EAL.
+ *
+ * Fill configuration structure with these infos, and return 0 on success.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_memory_init(void);
+
+/**
+ * Configure timers
+ *
+ * This function is private to EAL.
+ *
+ * Mmap memory areas used by HPET (high precision event timer) that will
+ * provide our time reference, and configure the TSC frequency also for it
+ * to be used as a reference.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_timer_init(void);
+
+/**
+ * Init the default log stream
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_log_init(const char *id, int facility);
+
+/**
+ * Save the log regexp for later
+ */
+int rte_log_save_regexp(const char *type, int priority);
+int rte_log_save_pattern(const char *pattern, int priority);
+
+/**
+ * Init tail queues for non-EAL library structures. This is to allow
+ * the rings, mempools, etc. lists to be shared among multiple processes
+ *
+ * This function is private to EAL
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_tailqs_init(void);
+
+/**
+ * Init interrupt handling.
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_intr_init(void);
+
+/**
+ * Init alarm mechanism. This is to allow a callback be called after
+ * specific time.
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_alarm_init(void);
+
+/**
+ * Function is to check if the kernel module(like, vfio, vfio_iommu_type1,
+ * etc.) loaded.
+ *
+ * @param module_name
+ * The module's name which need to be checked
+ *
+ * @return
+ * -1 means some error happens(NULL pointer or open failure)
+ * 0 means the module not loaded
+ * 1 means the module loaded
+ */
+int rte_eal_check_module(const char *module_name);
+
+/**
+ * Get virtual area of specified size from the OS.
+ *
+ * This function is private to the EAL.
+ *
+ * @param requested_addr
+ * Address where to request address space.
+ * @param size
+ * Size of requested area.
+ * @param page_sz
+ * Page size on which to align requested virtual area.
+ * @param flags
+ * EAL_VIRTUAL_AREA_* flags.
+ * @param mmap_flags
+ * Extra flags passed directly to mmap().
+ *
+ * @return
+ * Virtual area address if successful.
+ * NULL if unsuccessful.
+ */
+
+#define EAL_VIRTUAL_AREA_ADDR_IS_HINT (1 << 0)
+/**< don't fail if cannot get exact requested address. */
+#define EAL_VIRTUAL_AREA_ALLOW_SHRINK (1 << 1)
+/**< try getting smaller sized (decrement by page size) virtual areas if cannot
+ * get area of requested size.
+ */
+#define EAL_VIRTUAL_AREA_UNMAP (1 << 2)
+/**< immediately unmap reserved virtual area. */
+void *
+eal_get_virtual_area(void *requested_addr, size_t *size,
+ size_t page_sz, int flags, int mmap_flags);
+
+/**
+ * Get cpu core_id.
+ *
+ * This function is private to the EAL.
+ */
+unsigned eal_cpu_core_id(unsigned lcore_id);
+
+/**
+ * Check if cpu is present.
+ *
+ * This function is private to the EAL.
+ */
+int eal_cpu_detected(unsigned lcore_id);
+
+/**
+ * Set TSC frequency from precise value or estimation
+ *
+ * This function is private to the EAL.
+ */
+void set_tsc_freq(void);
+
+/**
+ * Get precise TSC frequency from system
+ *
+ * This function is private to the EAL.
+ */
+uint64_t get_tsc_freq(void);
+
+/**
+ * Get TSC frequency if the architecture supports.
+ *
+ * This function is private to the EAL.
+ *
+ * @return
+ * The number of TSC cycles in one second.
+ * Returns zero if the architecture support is not available.
+ */
+uint64_t get_tsc_freq_arch(void);
+
+/**
+ * Prepare physical memory mapping
+ * i.e. hugepages on Linux and
+ * contigmem on BSD.
+ *
+ * This function is private to the EAL.
+ */
+int rte_eal_hugepage_init(void);
+
+/**
+ * Creates memory mapping in secondary process
+ * i.e. hugepages on Linux and
+ * contigmem on BSD.
+ *
+ * This function is private to the EAL.
+ */
+int rte_eal_hugepage_attach(void);
+
+/**
+ * Find a bus capable of identifying a device.
+ *
+ * @param str
+ * A device identifier (PCI address, virtual PMD name, ...).
+ *
+ * @return
+ * A valid bus handle if found.
+ * NULL if no bus is able to parse this device.
+ */
+struct rte_bus *rte_bus_find_by_device_name(const char *str);
+
+/**
+ * Create the unix channel for primary/secondary communication.
+ *
+ * @return
+ * 0 on success;
+ * (<0) on failure.
+ */
+int rte_mp_channel_init(void);
+
+/**
+ * Primary/secondary communication cleanup.
+ */
+void rte_mp_channel_cleanup(void);
+
+/**
+ * @internal
+ * Parse a device string and store its information in an
+ * rte_devargs structure.
+ *
+ * A device description is split by layers of abstraction of the device:
+ * bus, class and driver. Each layer will offer a set of properties that
+ * can be applied either to configure or recognize a device.
+ *
+ * This function will parse those properties and prepare the rte_devargs
+ * to be given to each layers for processing.
+ *
+ * Note: if the "data" field of the devargs points to devstr,
+ * then no dynamic allocation is performed and the rte_devargs
+ * can be safely discarded.
+ *
+ * Otherwise ``data`` will hold a workable copy of devstr, that will be
+ * used by layers descriptors within rte_devargs. In this case,
+ * any rte_devargs should be cleaned-up before being freed.
+ *
+ * @param da
+ * rte_devargs structure to fill.
+ *
+ * @param devstr
+ * Device string.
+ *
+ * @return
+ * 0 on success.
+ * Negative errno values on error (rte_errno is set).
+ */
+int
+rte_devargs_layers_parse(struct rte_devargs *devargs,
+ const char *devstr);
+
+/*
+ * probe a device at local process.
+ *
+ * @param devargs
+ * Device arguments including bus, class and driver properties.
+ * @param new_dev
+ * new device be probed as output.
+ * @return
+ * 0 on success, negative on error.
+ */
+int local_dev_probe(const char *devargs, struct rte_device **new_dev);
+
+/**
+ * Hotplug remove a given device from a specific bus at local process.
+ *
+ * @param dev
+ * Data structure of the device to remove.
+ * @return
+ * 0 on success, negative on error.
+ */
+int local_dev_remove(struct rte_device *dev);
+
+/**
+ * Iterate over all buses to find the corresponding bus to handle the sigbus
+ * error.
+ * @param failure_addr
+ * Pointer of the fault address of the sigbus error.
+ *
+ * @return
+ * 0 success to handle the sigbus.
+ * -1 failed to handle the sigbus
+ * 1 no bus can handler the sigbus
+ */
+int rte_bus_sigbus_handler(const void *failure_addr);
+
+/**
+ * @internal
+ * Register the sigbus handler.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+dev_sigbus_handler_register(void);
+
+/**
+ * @internal
+ * Unregister the sigbus handler.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+dev_sigbus_handler_unregister(void);
+
+/**
+ * Get OS-specific EAL mapping base address.
+ */
+uint64_t
+eal_get_baseaddr(void);
+
+void *
+eal_malloc_no_trace(const char *type, size_t size, unsigned int align);
+
+void eal_free_no_trace(void *addr);
+
+#endif /* _EAL_PRIVATE_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_thread.h b/src/spdk/dpdk/lib/librte_eal/common/eal_thread.h
new file mode 100644
index 000000000..b40ed249e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_thread.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef EAL_THREAD_H
+#define EAL_THREAD_H
+
+#include <rte_lcore.h>
+
+/**
+ * basic loop of thread, called for each thread by eal_init().
+ *
+ * @param arg
+ * opaque pointer
+ */
+__rte_noreturn void *eal_thread_loop(void *arg);
+
+/**
+ * Init per-lcore info for master thread
+ *
+ * @param lcore_id
+ * identifier of master lcore
+ */
+void eal_thread_init_master(unsigned lcore_id);
+
+/**
+ * Get the NUMA socket id from cpu id.
+ * This function is private to EAL.
+ *
+ * @param cpu_id
+ * The logical process id.
+ * @return
+ * socket_id or SOCKET_ID_ANY
+ */
+unsigned eal_cpu_socket_id(unsigned cpu_id);
+
+/**
+ * Default buffer size to use with eal_thread_dump_affinity()
+ */
+#define RTE_CPU_AFFINITY_STR_LEN 256
+
+/**
+ * Dump the current pthread cpuset.
+ * This function is private to EAL.
+ *
+ * Note:
+ * If the dump size is greater than the size of given buffer,
+ * the string will be truncated and with '\0' at the end.
+ *
+ * @param str
+ * The string buffer the cpuset will dump to.
+ * @param size
+ * The string buffer size.
+ * @return
+ * 0 for success, -1 if truncation happens.
+ */
+int
+eal_thread_dump_affinity(char *str, unsigned size);
+
+#endif /* EAL_THREAD_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_trace.h b/src/spdk/dpdk/lib/librte_eal/common/eal_trace.h
new file mode 100644
index 000000000..8f6061615
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_trace.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#ifndef __EAL_TRACE_H
+#define __EAL_TRACE_H
+
+#include <rte_cycles.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_spinlock.h>
+#include <rte_trace.h>
+#include <rte_trace_point.h>
+#include <rte_uuid.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+#define trace_err(fmt, args...) \
+ RTE_LOG(ERR, EAL, "%s():%u " fmt "\n", __func__, __LINE__, ## args)
+
+#define trace_crit(fmt, args...) \
+ RTE_LOG(CRIT, EAL, "%s():%u " fmt "\n", __func__, __LINE__, ## args)
+
+#define TRACE_PREFIX_LEN 12
+#define TRACE_DIR_STR_LEN (sizeof("YYYY-mm-dd-AM-HH-MM-SS") + TRACE_PREFIX_LEN)
+#define TRACE_CTF_FIELD_SIZE 384
+#define TRACE_POINT_NAME_SIZE 64
+#define TRACE_CTF_MAGIC 0xC1FC1FC1
+#define TRACE_MAX_ARGS 32
+
+struct trace_point {
+ STAILQ_ENTRY(trace_point) next;
+ rte_trace_point_t *handle;
+ char name[TRACE_POINT_NAME_SIZE];
+ char ctf_field[TRACE_CTF_FIELD_SIZE];
+};
+
+enum trace_area_e {
+ TRACE_AREA_HEAP,
+ TRACE_AREA_HUGEPAGE,
+};
+
+struct thread_mem_meta {
+ void *mem;
+ enum trace_area_e area;
+};
+
+struct trace_arg {
+ STAILQ_ENTRY(trace_arg) next;
+ char *val;
+};
+
+struct trace {
+ char dir[PATH_MAX];
+ int dir_offset;
+ int register_errno;
+ bool status;
+ enum rte_trace_mode mode;
+ rte_uuid_t uuid;
+ uint32_t buff_len;
+ STAILQ_HEAD(, trace_arg) args;
+ uint32_t nb_trace_points;
+ uint32_t nb_trace_mem_list;
+ struct thread_mem_meta *lcore_meta;
+ uint64_t epoch_sec;
+ uint64_t epoch_nsec;
+ uint64_t uptime_ticks;
+ char *ctf_meta;
+ uint32_t ctf_meta_offset_freq;
+ uint32_t ctf_meta_offset_freq_off_s;
+ uint32_t ctf_meta_offset_freq_off;
+ uint16_t ctf_fixup_done;
+ rte_spinlock_t lock;
+};
+
+/* Helper functions */
+static inline uint16_t
+trace_id_get(rte_trace_point_t *trace)
+{
+ return (*trace & __RTE_TRACE_FIELD_ID_MASK) >>
+ __RTE_TRACE_FIELD_ID_SHIFT;
+}
+
+static inline size_t
+trace_mem_sz(uint32_t len)
+{
+ return len + sizeof(struct __rte_trace_header);
+}
+
+/* Trace object functions */
+struct trace *trace_obj_get(void);
+
+/* Trace point list functions */
+STAILQ_HEAD(trace_point_head, trace_point);
+struct trace_point_head *trace_list_head_get(void);
+
+/* Util functions */
+const char *trace_mode_to_string(enum rte_trace_mode mode);
+const char *trace_area_to_string(enum trace_area_e area);
+int trace_args_apply(const char *arg);
+void trace_bufsz_args_apply(void);
+bool trace_has_duplicate_entry(void);
+void trace_uuid_generate(void);
+int trace_metadata_create(void);
+void trace_metadata_destroy(void);
+int trace_mkdir(void);
+int trace_epoch_time_save(void);
+void trace_mem_per_thread_free(void);
+
+/* EAL interface */
+int eal_trace_init(void);
+void eal_trace_fini(void);
+int eal_trace_args_save(const char *val);
+void eal_trace_args_free(void);
+int eal_trace_dir_args_save(const char *val);
+int eal_trace_mode_args_save(const char *val);
+int eal_trace_bufsz_args_save(const char *val);
+
+#endif /* __EAL_TRACE_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.c b/src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.c
new file mode 100644
index 000000000..ee791903b
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.c
@@ -0,0 +1,465 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#include <string.h>
+
+#include <rte_eal.h>
+#include <rte_errno.h>
+#include <rte_alarm.h>
+#include <rte_string_fns.h>
+#include <rte_devargs.h>
+
+#include "hotplug_mp.h"
+#include "eal_private.h"
+
+#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */
+
+struct mp_reply_bundle {
+ struct rte_mp_msg msg;
+ void *peer;
+};
+
+static int cmp_dev_name(const struct rte_device *dev, const void *_name)
+{
+ const char *name = _name;
+
+ return strcmp(dev->name, name);
+}
+
+/**
+ * Secondary to primary request.
+ * start from function eal_dev_hotplug_request_to_primary.
+ *
+ * device attach on secondary:
+ * a) secondary send sync request to the primary.
+ * b) primary receive the request and attach the new device if
+ * failed goto i).
+ * c) primary forward attach sync request to all secondary.
+ * d) secondary receive the request and attach the device and send a reply.
+ * e) primary check the reply if all success goes to j).
+ * f) primary send attach rollback sync request to all secondary.
+ * g) secondary receive the request and detach the device and send a reply.
+ * h) primary receive the reply and detach device as rollback action.
+ * i) send attach fail to secondary as a reply of step a), goto k).
+ * j) send attach success to secondary as a reply of step a).
+ * k) secondary receive reply and return.
+ *
+ * device detach on secondary:
+ * a) secondary send sync request to the primary.
+ * b) primary send detach sync request to all secondary.
+ * c) secondary detach the device and send a reply.
+ * d) primary check the reply if all success goes to g).
+ * e) primary send detach rollback sync request to all secondary.
+ * f) secondary receive the request and attach back device. goto h).
+ * g) primary detach the device if success goto i), else goto e).
+ * h) primary send detach fail to secondary as a reply of step a), goto j).
+ * i) primary send detach success to secondary as a reply of step a).
+ * j) secondary receive reply and return.
+ */
+
+static int
+send_response_to_secondary(const struct eal_dev_mp_req *req,
+ int result,
+ const void *peer)
+{
+ struct rte_mp_msg mp_resp;
+ struct eal_dev_mp_req *resp =
+ (struct eal_dev_mp_req *)mp_resp.param;
+ int ret;
+
+ memset(&mp_resp, 0, sizeof(mp_resp));
+ mp_resp.len_param = sizeof(*resp);
+ strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+ memcpy(resp, req, sizeof(*req));
+ resp->result = result;
+
+ ret = rte_mp_reply(&mp_resp, peer);
+ if (ret != 0)
+ RTE_LOG(ERR, EAL, "failed to send response to secondary\n");
+
+ return ret;
+}
+
+static void
+__handle_secondary_request(void *param)
+{
+ struct mp_reply_bundle *bundle = param;
+ const struct rte_mp_msg *msg = &bundle->msg;
+ const struct eal_dev_mp_req *req =
+ (const struct eal_dev_mp_req *)msg->param;
+ struct eal_dev_mp_req tmp_req;
+ struct rte_devargs da;
+ struct rte_device *dev;
+ struct rte_bus *bus;
+ int ret = 0;
+
+ tmp_req = *req;
+
+ if (req->t == EAL_DEV_REQ_TYPE_ATTACH) {
+ ret = local_dev_probe(req->devargs, &dev);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Failed to hotplug add device on primary\n");
+ if (ret != -EEXIST)
+ goto finish;
+ }
+ ret = eal_dev_hotplug_request_to_secondary(&tmp_req);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Failed to send hotplug request to secondary\n");
+ ret = -ENOMSG;
+ goto rollback;
+ }
+ if (tmp_req.result != 0) {
+ ret = tmp_req.result;
+ RTE_LOG(ERR, EAL, "Failed to hotplug add device on secondary\n");
+ if (ret != -EEXIST)
+ goto rollback;
+ }
+ } else if (req->t == EAL_DEV_REQ_TYPE_DETACH) {
+ ret = rte_devargs_parse(&da, req->devargs);
+ if (ret != 0)
+ goto finish;
+ free(da.args); /* we don't need those */
+ da.args = NULL;
+
+ ret = eal_dev_hotplug_request_to_secondary(&tmp_req);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Failed to send hotplug request to secondary\n");
+ ret = -ENOMSG;
+ goto rollback;
+ }
+
+ bus = rte_bus_find_by_name(da.bus->name);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", da.bus->name);
+ ret = -ENOENT;
+ goto finish;
+ }
+
+ dev = bus->find_device(NULL, cmp_dev_name, da.name);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", da.name);
+ ret = -ENOENT;
+ goto finish;
+ }
+
+ if (tmp_req.result != 0) {
+ RTE_LOG(ERR, EAL, "Failed to hotplug remove device on secondary\n");
+ ret = tmp_req.result;
+ if (ret != -ENOENT)
+ goto rollback;
+ }
+
+ ret = local_dev_remove(dev);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Failed to hotplug remove device on primary\n");
+ if (ret != -ENOENT)
+ goto rollback;
+ }
+ } else {
+ RTE_LOG(ERR, EAL, "unsupported secondary to primary request\n");
+ ret = -ENOTSUP;
+ }
+ goto finish;
+
+rollback:
+ if (req->t == EAL_DEV_REQ_TYPE_ATTACH) {
+ tmp_req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK;
+ eal_dev_hotplug_request_to_secondary(&tmp_req);
+ local_dev_remove(dev);
+ } else {
+ tmp_req.t = EAL_DEV_REQ_TYPE_DETACH_ROLLBACK;
+ eal_dev_hotplug_request_to_secondary(&tmp_req);
+ }
+
+finish:
+ ret = send_response_to_secondary(&tmp_req, ret, bundle->peer);
+ if (ret)
+ RTE_LOG(ERR, EAL, "failed to send response to secondary\n");
+
+ free(bundle->peer);
+ free(bundle);
+}
+
+static int
+handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+ struct mp_reply_bundle *bundle;
+ const struct eal_dev_mp_req *req =
+ (const struct eal_dev_mp_req *)msg->param;
+ int ret = 0;
+
+ bundle = malloc(sizeof(*bundle));
+ if (bundle == NULL) {
+ RTE_LOG(ERR, EAL, "not enough memory\n");
+ return send_response_to_secondary(req, -ENOMEM, peer);
+ }
+
+ bundle->msg = *msg;
+ /**
+ * We need to send reply on interrupt thread, but peer can't be
+ * parsed directly, so this is a temporal hack, need to be fixed
+ * when it is ready.
+ */
+ bundle->peer = strdup(peer);
+ if (bundle->peer == NULL) {
+ free(bundle);
+ RTE_LOG(ERR, EAL, "not enough memory\n");
+ return send_response_to_secondary(req, -ENOMEM, peer);
+ }
+
+ /**
+ * We are at IPC callback thread, sync IPC is not allowed due to
+ * dead lock, so we delegate the task to interrupt thread.
+ */
+ ret = rte_eal_alarm_set(1, __handle_secondary_request, bundle);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "failed to add mp task\n");
+ free(bundle->peer);
+ free(bundle);
+ return send_response_to_secondary(req, ret, peer);
+ }
+ return 0;
+}
+
+static void __handle_primary_request(void *param)
+{
+ struct mp_reply_bundle *bundle = param;
+ struct rte_mp_msg *msg = &bundle->msg;
+ const struct eal_dev_mp_req *req =
+ (const struct eal_dev_mp_req *)msg->param;
+ struct rte_mp_msg mp_resp;
+ struct eal_dev_mp_req *resp =
+ (struct eal_dev_mp_req *)mp_resp.param;
+ struct rte_devargs *da;
+ struct rte_device *dev;
+ struct rte_bus *bus;
+ int ret = 0;
+
+ memset(&mp_resp, 0, sizeof(mp_resp));
+
+ switch (req->t) {
+ case EAL_DEV_REQ_TYPE_ATTACH:
+ case EAL_DEV_REQ_TYPE_DETACH_ROLLBACK:
+ ret = local_dev_probe(req->devargs, &dev);
+ break;
+ case EAL_DEV_REQ_TYPE_DETACH:
+ case EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK:
+ da = calloc(1, sizeof(*da));
+ if (da == NULL) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ ret = rte_devargs_parse(da, req->devargs);
+ if (ret != 0)
+ goto quit;
+
+ bus = rte_bus_find_by_name(da->bus->name);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", da->bus->name);
+ ret = -ENOENT;
+ goto quit;
+ }
+
+ dev = bus->find_device(NULL, cmp_dev_name, da->name);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", da->name);
+ ret = -ENOENT;
+ goto quit;
+ }
+
+ if (!rte_dev_is_probed(dev)) {
+ if (req->t == EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK) {
+ /**
+ * Don't fail the rollback just because there's
+ * nothing to do.
+ */
+ ret = 0;
+ } else
+ ret = -ENODEV;
+
+ goto quit;
+ }
+
+ ret = local_dev_remove(dev);
+quit:
+ free(da->args);
+ free(da);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+ mp_resp.len_param = sizeof(*req);
+ memcpy(resp, req, sizeof(*resp));
+ resp->result = ret;
+ if (rte_mp_reply(&mp_resp, bundle->peer) < 0)
+ RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+
+ free(bundle->peer);
+ free(bundle);
+}
+
+static int
+handle_primary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+ struct rte_mp_msg mp_resp;
+ const struct eal_dev_mp_req *req =
+ (const struct eal_dev_mp_req *)msg->param;
+ struct eal_dev_mp_req *resp =
+ (struct eal_dev_mp_req *)mp_resp.param;
+ struct mp_reply_bundle *bundle;
+ int ret = 0;
+
+ memset(&mp_resp, 0, sizeof(mp_resp));
+ strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+ mp_resp.len_param = sizeof(*req);
+ memcpy(resp, req, sizeof(*resp));
+
+ bundle = calloc(1, sizeof(*bundle));
+ if (bundle == NULL) {
+ RTE_LOG(ERR, EAL, "not enough memory\n");
+ resp->result = -ENOMEM;
+ ret = rte_mp_reply(&mp_resp, peer);
+ if (ret)
+ RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+ return ret;
+ }
+
+ bundle->msg = *msg;
+ /**
+ * We need to send reply on interrupt thread, but peer can't be
+ * parsed directly, so this is a temporal hack, need to be fixed
+ * when it is ready.
+ */
+ bundle->peer = (void *)strdup(peer);
+ if (bundle->peer == NULL) {
+ RTE_LOG(ERR, EAL, "not enough memory\n");
+ free(bundle);
+ resp->result = -ENOMEM;
+ ret = rte_mp_reply(&mp_resp, peer);
+ if (ret)
+ RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+ return ret;
+ }
+
+ /**
+ * We are at IPC callback thread, sync IPC is not allowed due to
+ * dead lock, so we delegate the task to interrupt thread.
+ */
+ ret = rte_eal_alarm_set(1, __handle_primary_request, bundle);
+ if (ret != 0) {
+ free(bundle->peer);
+ free(bundle);
+ resp->result = ret;
+ ret = rte_mp_reply(&mp_resp, peer);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+ return ret;
+ }
+ }
+ return 0;
+}
+
+int eal_dev_hotplug_request_to_primary(struct eal_dev_mp_req *req)
+{
+ struct rte_mp_msg mp_req;
+ struct rte_mp_reply mp_reply;
+ struct timespec ts = {.tv_sec = MP_TIMEOUT_S, .tv_nsec = 0};
+ struct eal_dev_mp_req *resp;
+ int ret;
+
+ memset(&mp_req, 0, sizeof(mp_req));
+ memcpy(mp_req.param, req, sizeof(*req));
+ mp_req.len_param = sizeof(*req);
+ strlcpy(mp_req.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_req.name));
+
+ ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
+ if (ret || mp_reply.nb_received != 1) {
+ RTE_LOG(ERR, EAL, "Cannot send request to primary\n");
+ if (!ret)
+ return -1;
+ return ret;
+ }
+
+ resp = (struct eal_dev_mp_req *)mp_reply.msgs[0].param;
+ req->result = resp->result;
+
+ free(mp_reply.msgs);
+ return ret;
+}
+
+int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req)
+{
+ struct rte_mp_msg mp_req;
+ struct rte_mp_reply mp_reply;
+ struct timespec ts = {.tv_sec = MP_TIMEOUT_S, .tv_nsec = 0};
+ int ret;
+ int i;
+
+ memset(&mp_req, 0, sizeof(mp_req));
+ memcpy(mp_req.param, req, sizeof(*req));
+ mp_req.len_param = sizeof(*req);
+ strlcpy(mp_req.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_req.name));
+
+ ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
+ if (ret != 0) {
+ /* if IPC is not supported, behave as if the call succeeded */
+ if (rte_errno != ENOTSUP)
+ RTE_LOG(ERR, EAL, "rte_mp_request_sync failed\n");
+ else
+ ret = 0;
+ return ret;
+ }
+
+ if (mp_reply.nb_sent != mp_reply.nb_received) {
+ RTE_LOG(ERR, EAL, "not all secondary reply\n");
+ free(mp_reply.msgs);
+ return -1;
+ }
+
+ req->result = 0;
+ for (i = 0; i < mp_reply.nb_received; i++) {
+ struct eal_dev_mp_req *resp =
+ (struct eal_dev_mp_req *)mp_reply.msgs[i].param;
+ if (resp->result != 0) {
+ if (req->t == EAL_DEV_REQ_TYPE_ATTACH &&
+ resp->result == -EEXIST)
+ continue;
+ if (req->t == EAL_DEV_REQ_TYPE_DETACH &&
+ resp->result == -ENOENT)
+ continue;
+ req->result = resp->result;
+ }
+ }
+
+ free(mp_reply.msgs);
+ return 0;
+}
+
+int eal_mp_dev_hotplug_init(void)
+{
+ int ret;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ ret = rte_mp_action_register(EAL_DEV_MP_ACTION_REQUEST,
+ handle_secondary_request);
+ /* primary is allowed to not support IPC */
+ if (ret != 0 && rte_errno != ENOTSUP) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ EAL_DEV_MP_ACTION_REQUEST);
+ return ret;
+ }
+ } else {
+ ret = rte_mp_action_register(EAL_DEV_MP_ACTION_REQUEST,
+ handle_primary_request);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ EAL_DEV_MP_ACTION_REQUEST);
+ return ret;
+ }
+ }
+
+ return 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.h b/src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.h
new file mode 100644
index 000000000..8fcf9b52e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _HOTPLUG_MP_H_
+#define _HOTPLUG_MP_H_
+
+#include "rte_dev.h"
+#include "rte_bus.h"
+
+#define EAL_DEV_MP_ACTION_REQUEST "eal_dev_mp_request"
+#define EAL_DEV_MP_ACTION_RESPONSE "eal_dev_mp_response"
+
+#define EAL_DEV_MP_DEV_NAME_MAX_LEN RTE_DEV_NAME_MAX_LEN
+#define EAL_DEV_MP_BUS_NAME_MAX_LEN 32
+#define EAL_DEV_MP_DEV_ARGS_MAX_LEN 128
+
+enum eal_dev_req_type {
+ EAL_DEV_REQ_TYPE_ATTACH,
+ EAL_DEV_REQ_TYPE_DETACH,
+ EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK,
+ EAL_DEV_REQ_TYPE_DETACH_ROLLBACK,
+};
+
+struct eal_dev_mp_req {
+ enum eal_dev_req_type t;
+ char devargs[EAL_DEV_MP_DEV_ARGS_MAX_LEN];
+ int result;
+};
+
+/**
+ * Register all mp action callbacks for hotplug.
+ *
+ * @return
+ * 0 on success, negative on error.
+ */
+int
+eal_mp_dev_hotplug_init(void);
+
+/**
+ * This is a synchronous wrapper for secondary process send
+ * request to primary process, this is invoked when an attach
+ * or detach request is issued from primary process.
+ */
+int eal_dev_hotplug_request_to_primary(struct eal_dev_mp_req *req);
+
+/**
+ * this is a synchronous wrapper for primary process send
+ * request to secondary process, this is invoked when an attach
+ * or detach request issued from secondary process.
+ */
+int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req);
+
+
+#endif /* _HOTPLUG_MP_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.c b/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.c
new file mode 100644
index 000000000..51cdfc5d5
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.c
@@ -0,0 +1,682 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+#include <inttypes.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_debug.h>
+#include <rte_common.h>
+#include <rte_spinlock.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
+#include "malloc_elem.h"
+#include "malloc_heap.h"
+
+/*
+ * If debugging is enabled, freed memory is set to poison value
+ * to catch buggy programs. Otherwise, freed memory is set to zero
+ * to avoid having to zero in zmalloc
+ */
+#ifdef RTE_MALLOC_DEBUG
+#define MALLOC_POISON 0x6b
+#else
+#define MALLOC_POISON 0
+#endif
+
+size_t
+malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
+{
+ void *cur_page, *contig_seg_start, *page_end, *cur_seg_end;
+ void *data_start, *data_end;
+ rte_iova_t expected_iova;
+ struct rte_memseg *ms;
+ size_t page_sz, cur, max;
+
+ page_sz = (size_t)elem->msl->page_sz;
+ data_start = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
+ data_end = RTE_PTR_ADD(elem, elem->size - MALLOC_ELEM_TRAILER_LEN);
+ /* segment must start after header and with specified alignment */
+ contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align);
+
+ /* return if aligned address is already out of malloc element */
+ if (contig_seg_start > data_end)
+ return 0;
+
+ /* if we're in IOVA as VA mode, or if we're in legacy mode with
+ * hugepages, all elements are IOVA-contiguous. however, we can only
+ * make these assumptions about internal memory - externally allocated
+ * segments have to be checked.
+ */
+ if (!elem->msl->external &&
+ (rte_eal_iova_mode() == RTE_IOVA_VA ||
+ (internal_config.legacy_mem &&
+ rte_eal_has_hugepages())))
+ return RTE_PTR_DIFF(data_end, contig_seg_start);
+
+ cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz);
+ ms = rte_mem_virt2memseg(cur_page, elem->msl);
+
+ /* do first iteration outside the loop */
+ page_end = RTE_PTR_ADD(cur_page, page_sz);
+ cur_seg_end = RTE_MIN(page_end, data_end);
+ cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start) -
+ MALLOC_ELEM_TRAILER_LEN;
+ max = cur;
+ expected_iova = ms->iova + page_sz;
+ /* memsegs are contiguous in memory */
+ ms++;
+
+ cur_page = RTE_PTR_ADD(cur_page, page_sz);
+
+ while (cur_page < data_end) {
+ page_end = RTE_PTR_ADD(cur_page, page_sz);
+ cur_seg_end = RTE_MIN(page_end, data_end);
+
+ /* reset start of contiguous segment if unexpected iova */
+ if (ms->iova != expected_iova) {
+ /* next contiguous segment must start at specified
+ * alignment.
+ */
+ contig_seg_start = RTE_PTR_ALIGN(cur_page, align);
+ /* new segment start may be on a different page, so find
+ * the page and skip to next iteration to make sure
+ * we're not blowing past data end.
+ */
+ ms = rte_mem_virt2memseg(contig_seg_start, elem->msl);
+ cur_page = ms->addr;
+ /* don't trigger another recalculation */
+ expected_iova = ms->iova;
+ continue;
+ }
+ /* cur_seg_end ends on a page boundary or on data end. if we're
+ * looking at data end, then malloc trailer is already included
+ * in the calculations. if we're looking at page end, then we
+ * know there's more data past this page and thus there's space
+ * for malloc element trailer, so don't count it here.
+ */
+ cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start);
+ /* update max if cur value is bigger */
+ if (cur > max)
+ max = cur;
+
+ /* move to next page */
+ cur_page = page_end;
+ expected_iova = ms->iova + page_sz;
+ /* memsegs are contiguous in memory */
+ ms++;
+ }
+
+ return max;
+}
+
+/*
+ * Initialize a general malloc_elem header structure
+ */
+void
+malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap,
+ struct rte_memseg_list *msl, size_t size,
+ struct malloc_elem *orig_elem, size_t orig_size)
+{
+ elem->heap = heap;
+ elem->msl = msl;
+ elem->prev = NULL;
+ elem->next = NULL;
+ memset(&elem->free_list, 0, sizeof(elem->free_list));
+ elem->state = ELEM_FREE;
+ elem->size = size;
+ elem->pad = 0;
+ elem->orig_elem = orig_elem;
+ elem->orig_size = orig_size;
+ set_header(elem);
+ set_trailer(elem);
+}
+
+void
+malloc_elem_insert(struct malloc_elem *elem)
+{
+ struct malloc_elem *prev_elem, *next_elem;
+ struct malloc_heap *heap = elem->heap;
+
+ /* first and last elements must be both NULL or both non-NULL */
+ if ((heap->first == NULL) != (heap->last == NULL)) {
+ RTE_LOG(ERR, EAL, "Heap is probably corrupt\n");
+ return;
+ }
+
+ if (heap->first == NULL && heap->last == NULL) {
+ /* if empty heap */
+ heap->first = elem;
+ heap->last = elem;
+ prev_elem = NULL;
+ next_elem = NULL;
+ } else if (elem < heap->first) {
+ /* if lower than start */
+ prev_elem = NULL;
+ next_elem = heap->first;
+ heap->first = elem;
+ } else if (elem > heap->last) {
+ /* if higher than end */
+ prev_elem = heap->last;
+ next_elem = NULL;
+ heap->last = elem;
+ } else {
+ /* the new memory is somewhere between start and end */
+ uint64_t dist_from_start, dist_from_end;
+
+ dist_from_end = RTE_PTR_DIFF(heap->last, elem);
+ dist_from_start = RTE_PTR_DIFF(elem, heap->first);
+
+ /* check which is closer, and find closest list entries */
+ if (dist_from_start < dist_from_end) {
+ prev_elem = heap->first;
+ while (prev_elem->next < elem)
+ prev_elem = prev_elem->next;
+ next_elem = prev_elem->next;
+ } else {
+ next_elem = heap->last;
+ while (next_elem->prev > elem)
+ next_elem = next_elem->prev;
+ prev_elem = next_elem->prev;
+ }
+ }
+
+ /* insert new element */
+ elem->prev = prev_elem;
+ elem->next = next_elem;
+ if (prev_elem)
+ prev_elem->next = elem;
+ if (next_elem)
+ next_elem->prev = elem;
+}
+
+/*
+ * Attempt to find enough physically contiguous memory in this block to store
+ * our data. Assume that element has at least enough space to fit in the data,
+ * so we just check the page addresses.
+ */
+static bool
+elem_check_phys_contig(const struct rte_memseg_list *msl,
+ void *start, size_t size)
+{
+ return eal_memalloc_is_contig(msl, start, size);
+}
+
+/*
+ * calculate the starting point of where data of the requested size
+ * and alignment would fit in the current element. If the data doesn't
+ * fit, return NULL.
+ */
+static void *
+elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align,
+ size_t bound, bool contig)
+{
+ size_t elem_size = elem->size;
+
+ /*
+ * we're allocating from the end, so adjust the size of element by
+ * alignment size.
+ */
+ while (elem_size >= size) {
+ const size_t bmask = ~(bound - 1);
+ uintptr_t end_pt = (uintptr_t)elem +
+ elem_size - MALLOC_ELEM_TRAILER_LEN;
+ uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size),
+ align);
+ uintptr_t new_elem_start;
+
+ /* check boundary */
+ if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) {
+ end_pt = RTE_ALIGN_FLOOR(end_pt, bound);
+ new_data_start = RTE_ALIGN_FLOOR((end_pt - size),
+ align);
+ end_pt = new_data_start + size;
+
+ if (((end_pt - 1) & bmask) != (new_data_start & bmask))
+ return NULL;
+ }
+
+ new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN;
+
+ /* if the new start point is before the exist start,
+ * it won't fit
+ */
+ if (new_elem_start < (uintptr_t)elem)
+ return NULL;
+
+ if (contig) {
+ size_t new_data_size = end_pt - new_data_start;
+
+ /*
+ * if physical contiguousness was requested and we
+ * couldn't fit all data into one physically contiguous
+ * block, try again with lower addresses.
+ */
+ if (!elem_check_phys_contig(elem->msl,
+ (void *)new_data_start,
+ new_data_size)) {
+ elem_size -= align;
+ continue;
+ }
+ }
+ return (void *)new_elem_start;
+ }
+ return NULL;
+}
+
+/*
+ * use elem_start_pt to determine if we get meet the size and
+ * alignment request from the current element
+ */
+int
+malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align,
+ size_t bound, bool contig)
+{
+ return elem_start_pt(elem, size, align, bound, contig) != NULL;
+}
+
+/*
+ * split an existing element into two smaller elements at the given
+ * split_pt parameter.
+ */
+static void
+split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
+{
+ struct malloc_elem *next_elem = elem->next;
+ const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem;
+ const size_t new_elem_size = elem->size - old_elem_size;
+
+ malloc_elem_init(split_pt, elem->heap, elem->msl, new_elem_size,
+ elem->orig_elem, elem->orig_size);
+ split_pt->prev = elem;
+ split_pt->next = next_elem;
+ if (next_elem)
+ next_elem->prev = split_pt;
+ else
+ elem->heap->last = split_pt;
+ elem->next = split_pt;
+ elem->size = old_elem_size;
+ set_trailer(elem);
+ if (elem->pad) {
+ /* Update inner padding inner element size. */
+ elem = RTE_PTR_ADD(elem, elem->pad);
+ elem->size = old_elem_size - elem->pad;
+ }
+}
+
+/*
+ * our malloc heap is a doubly linked list, so doubly remove our element.
+ */
+static void __rte_unused
+remove_elem(struct malloc_elem *elem)
+{
+ struct malloc_elem *next, *prev;
+ next = elem->next;
+ prev = elem->prev;
+
+ if (next)
+ next->prev = prev;
+ else
+ elem->heap->last = prev;
+ if (prev)
+ prev->next = next;
+ else
+ elem->heap->first = next;
+
+ elem->prev = NULL;
+ elem->next = NULL;
+}
+
+static int
+next_elem_is_adjacent(struct malloc_elem *elem)
+{
+ return elem->next == RTE_PTR_ADD(elem, elem->size) &&
+ elem->next->msl == elem->msl &&
+ (!internal_config.match_allocations ||
+ elem->orig_elem == elem->next->orig_elem);
+}
+
+static int
+prev_elem_is_adjacent(struct malloc_elem *elem)
+{
+ return elem == RTE_PTR_ADD(elem->prev, elem->prev->size) &&
+ elem->prev->msl == elem->msl &&
+ (!internal_config.match_allocations ||
+ elem->orig_elem == elem->prev->orig_elem);
+}
+
+/*
+ * Given an element size, compute its freelist index.
+ * We free an element into the freelist containing similarly-sized elements.
+ * We try to allocate elements starting with the freelist containing
+ * similarly-sized elements, and if necessary, we search freelists
+ * containing larger elements.
+ *
+ * Example element size ranges for a heap with five free lists:
+ * heap->free_head[0] - (0 , 2^8]
+ * heap->free_head[1] - (2^8 , 2^10]
+ * heap->free_head[2] - (2^10 ,2^12]
+ * heap->free_head[3] - (2^12, 2^14]
+ * heap->free_head[4] - (2^14, MAX_SIZE]
+ */
+size_t
+malloc_elem_free_list_index(size_t size)
+{
+#define MALLOC_MINSIZE_LOG2 8
+#define MALLOC_LOG2_INCREMENT 2
+
+ size_t log2;
+ size_t index;
+
+ if (size <= (1UL << MALLOC_MINSIZE_LOG2))
+ return 0;
+
+ /* Find next power of 2 >= size. */
+ log2 = sizeof(size) * 8 - __builtin_clzl(size-1);
+
+ /* Compute freelist index, based on log2(size). */
+ index = (log2 - MALLOC_MINSIZE_LOG2 + MALLOC_LOG2_INCREMENT - 1) /
+ MALLOC_LOG2_INCREMENT;
+
+ return index <= RTE_HEAP_NUM_FREELISTS-1?
+ index: RTE_HEAP_NUM_FREELISTS-1;
+}
+
+/*
+ * Add the specified element to its heap's free list.
+ */
+void
+malloc_elem_free_list_insert(struct malloc_elem *elem)
+{
+ size_t idx;
+
+ idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN);
+ elem->state = ELEM_FREE;
+ LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list);
+}
+
+/*
+ * Remove the specified element from its heap's free list.
+ */
+void
+malloc_elem_free_list_remove(struct malloc_elem *elem)
+{
+ LIST_REMOVE(elem, free_list);
+}
+
+/*
+ * reserve a block of data in an existing malloc_elem. If the malloc_elem
+ * is much larger than the data block requested, we split the element in two.
+ * This function is only called from malloc_heap_alloc so parameter checking
+ * is not done here, as it's done there previously.
+ */
+struct malloc_elem *
+malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
+ size_t bound, bool contig)
+{
+ struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound,
+ contig);
+ const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem;
+ const size_t trailer_size = elem->size - old_elem_size - size -
+ MALLOC_ELEM_OVERHEAD;
+
+ malloc_elem_free_list_remove(elem);
+
+ if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+ /* split it, too much free space after elem */
+ struct malloc_elem *new_free_elem =
+ RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD);
+
+ split_elem(elem, new_free_elem);
+ malloc_elem_free_list_insert(new_free_elem);
+
+ if (elem == elem->heap->last)
+ elem->heap->last = new_free_elem;
+ }
+
+ if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+ /* don't split it, pad the element instead */
+ elem->state = ELEM_BUSY;
+ elem->pad = old_elem_size;
+
+ /* put a dummy header in padding, to point to real element header */
+ if (elem->pad > 0) { /* pad will be at least 64-bytes, as everything
+ * is cache-line aligned */
+ new_elem->pad = elem->pad;
+ new_elem->state = ELEM_PAD;
+ new_elem->size = elem->size - elem->pad;
+ set_header(new_elem);
+ }
+
+ return new_elem;
+ }
+
+ /* we are going to split the element in two. The original element
+ * remains free, and the new element is the one allocated.
+ * Re-insert original element, in case its new size makes it
+ * belong on a different list.
+ */
+ split_elem(elem, new_elem);
+ new_elem->state = ELEM_BUSY;
+ malloc_elem_free_list_insert(elem);
+
+ return new_elem;
+}
+
+/*
+ * join two struct malloc_elem together. elem1 and elem2 must
+ * be contiguous in memory.
+ */
+static inline void
+join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
+{
+ struct malloc_elem *next = elem2->next;
+ elem1->size += elem2->size;
+ if (next)
+ next->prev = elem1;
+ else
+ elem1->heap->last = elem1;
+ elem1->next = next;
+ if (elem1->pad) {
+ struct malloc_elem *inner = RTE_PTR_ADD(elem1, elem1->pad);
+ inner->size = elem1->size - elem1->pad;
+ }
+}
+
+struct malloc_elem *
+malloc_elem_join_adjacent_free(struct malloc_elem *elem)
+{
+ /*
+ * check if next element exists, is adjacent and is free, if so join
+ * with it, need to remove from free list.
+ */
+ if (elem->next != NULL && elem->next->state == ELEM_FREE &&
+ next_elem_is_adjacent(elem)) {
+ void *erase;
+ size_t erase_len;
+
+ /* we will want to erase the trailer and header */
+ erase = RTE_PTR_SUB(elem->next, MALLOC_ELEM_TRAILER_LEN);
+ erase_len = MALLOC_ELEM_OVERHEAD + elem->next->pad;
+
+ /* remove from free list, join to this one */
+ malloc_elem_free_list_remove(elem->next);
+ join_elem(elem, elem->next);
+
+ /* erase header, trailer and pad */
+ memset(erase, MALLOC_POISON, erase_len);
+ }
+
+ /*
+ * check if prev element exists, is adjacent and is free, if so join
+ * with it, need to remove from free list.
+ */
+ if (elem->prev != NULL && elem->prev->state == ELEM_FREE &&
+ prev_elem_is_adjacent(elem)) {
+ struct malloc_elem *new_elem;
+ void *erase;
+ size_t erase_len;
+
+ /* we will want to erase trailer and header */
+ erase = RTE_PTR_SUB(elem, MALLOC_ELEM_TRAILER_LEN);
+ erase_len = MALLOC_ELEM_OVERHEAD + elem->pad;
+
+ /* remove from free list, join to this one */
+ malloc_elem_free_list_remove(elem->prev);
+
+ new_elem = elem->prev;
+ join_elem(new_elem, elem);
+
+ /* erase header, trailer and pad */
+ memset(erase, MALLOC_POISON, erase_len);
+
+ elem = new_elem;
+ }
+
+ return elem;
+}
+
+/*
+ * free a malloc_elem block by adding it to the free list. If the
+ * blocks either immediately before or immediately after newly freed block
+ * are also free, the blocks are merged together.
+ */
+struct malloc_elem *
+malloc_elem_free(struct malloc_elem *elem)
+{
+ void *ptr;
+ size_t data_len;
+
+ ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
+ data_len = elem->size - MALLOC_ELEM_OVERHEAD;
+
+ elem = malloc_elem_join_adjacent_free(elem);
+
+ malloc_elem_free_list_insert(elem);
+
+ elem->pad = 0;
+
+ /* decrease heap's count of allocated elements */
+ elem->heap->alloc_count--;
+
+ /* poison memory */
+ memset(ptr, MALLOC_POISON, data_len);
+
+ return elem;
+}
+
+/* assume all checks were already done */
+void
+malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len)
+{
+ struct malloc_elem *hide_start, *hide_end, *prev, *next;
+ size_t len_before, len_after;
+
+ hide_start = start;
+ hide_end = RTE_PTR_ADD(start, len);
+
+ prev = elem->prev;
+ next = elem->next;
+
+ /* we cannot do anything with non-adjacent elements */
+ if (next && next_elem_is_adjacent(elem)) {
+ len_after = RTE_PTR_DIFF(next, hide_end);
+ if (len_after >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+ /* split after */
+ split_elem(elem, hide_end);
+
+ malloc_elem_free_list_insert(hide_end);
+ } else if (len_after > 0) {
+ RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n");
+ return;
+ }
+ }
+
+ /* we cannot do anything with non-adjacent elements */
+ if (prev && prev_elem_is_adjacent(elem)) {
+ len_before = RTE_PTR_DIFF(hide_start, elem);
+ if (len_before >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+ /* split before */
+ split_elem(elem, hide_start);
+
+ prev = elem;
+ elem = hide_start;
+
+ malloc_elem_free_list_insert(prev);
+ } else if (len_before > 0) {
+ RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n");
+ return;
+ }
+ }
+
+ remove_elem(elem);
+}
+
+/*
+ * attempt to resize a malloc_elem by expanding into any free space
+ * immediately after it in memory.
+ */
+int
+malloc_elem_resize(struct malloc_elem *elem, size_t size)
+{
+ const size_t new_size = size + elem->pad + MALLOC_ELEM_OVERHEAD;
+
+ /* if we request a smaller size, then always return ok */
+ if (elem->size >= new_size)
+ return 0;
+
+ /* check if there is a next element, it's free and adjacent */
+ if (!elem->next || elem->next->state != ELEM_FREE ||
+ !next_elem_is_adjacent(elem))
+ return -1;
+ if (elem->size + elem->next->size < new_size)
+ return -1;
+
+ /* we now know the element fits, so remove from free list,
+ * join the two
+ */
+ malloc_elem_free_list_remove(elem->next);
+ join_elem(elem, elem->next);
+
+ if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
+ /* now we have a big block together. Lets cut it down a bit, by splitting */
+ struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size);
+ split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE);
+ split_elem(elem, split_pt);
+ malloc_elem_free_list_insert(split_pt);
+ }
+ return 0;
+}
+
+static inline const char *
+elem_state_to_str(enum elem_state state)
+{
+ switch (state) {
+ case ELEM_PAD:
+ return "PAD";
+ case ELEM_BUSY:
+ return "BUSY";
+ case ELEM_FREE:
+ return "FREE";
+ }
+ return "ERROR";
+}
+
+void
+malloc_elem_dump(const struct malloc_elem *elem, FILE *f)
+{
+ fprintf(f, "Malloc element at %p (%s)\n", elem,
+ elem_state_to_str(elem->state));
+ fprintf(f, " len: 0x%zx pad: 0x%" PRIx32 "\n", elem->size, elem->pad);
+ fprintf(f, " prev: %p next: %p\n", elem->prev, elem->next);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.h b/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.h
new file mode 100644
index 000000000..a1e5f7f02
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef MALLOC_ELEM_H_
+#define MALLOC_ELEM_H_
+
+#include <stdbool.h>
+
+#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE)
+
+/* dummy definition of struct so we can use pointers to it in malloc_elem struct */
+struct malloc_heap;
+
+enum elem_state {
+ ELEM_FREE = 0,
+ ELEM_BUSY,
+ ELEM_PAD /* element is a padding-only header */
+};
+
+struct malloc_elem {
+ struct malloc_heap *heap;
+ struct malloc_elem *volatile prev;
+ /**< points to prev elem in memseg */
+ struct malloc_elem *volatile next;
+ /**< points to next elem in memseg */
+ LIST_ENTRY(malloc_elem) free_list;
+ /**< list of free elements in heap */
+ struct rte_memseg_list *msl;
+ volatile enum elem_state state;
+ uint32_t pad;
+ size_t size;
+ struct malloc_elem *orig_elem;
+ size_t orig_size;
+#ifdef RTE_MALLOC_DEBUG
+ uint64_t header_cookie; /* Cookie marking start of data */
+ /* trailer cookie at start + size */
+#endif
+} __rte_cache_aligned;
+
+#ifndef RTE_MALLOC_DEBUG
+static const unsigned MALLOC_ELEM_TRAILER_LEN = 0;
+
+/* dummy function - just check if pointer is non-null */
+static inline int
+malloc_elem_cookies_ok(const struct malloc_elem *elem){ return elem != NULL; }
+
+/* dummy function - no header if malloc_debug is not enabled */
+static inline void
+set_header(struct malloc_elem *elem __rte_unused){ }
+
+/* dummy function - no trailer if malloc_debug is not enabled */
+static inline void
+set_trailer(struct malloc_elem *elem __rte_unused){ }
+
+
+#else
+static const unsigned MALLOC_ELEM_TRAILER_LEN = RTE_CACHE_LINE_SIZE;
+
+#define MALLOC_HEADER_COOKIE 0xbadbadbadadd2e55ULL /**< Header cookie. */
+#define MALLOC_TRAILER_COOKIE 0xadd2e55badbadbadULL /**< Trailer cookie.*/
+
+/* define macros to make referencing the header and trailer cookies easier */
+#define MALLOC_ELEM_TRAILER(elem) (*((uint64_t*)RTE_PTR_ADD(elem, \
+ elem->size - MALLOC_ELEM_TRAILER_LEN)))
+#define MALLOC_ELEM_HEADER(elem) (elem->header_cookie)
+
+static inline void
+set_header(struct malloc_elem *elem)
+{
+ if (elem != NULL)
+ MALLOC_ELEM_HEADER(elem) = MALLOC_HEADER_COOKIE;
+}
+
+static inline void
+set_trailer(struct malloc_elem *elem)
+{
+ if (elem != NULL)
+ MALLOC_ELEM_TRAILER(elem) = MALLOC_TRAILER_COOKIE;
+}
+
+/* check that the header and trailer cookies are set correctly */
+static inline int
+malloc_elem_cookies_ok(const struct malloc_elem *elem)
+{
+ return elem != NULL &&
+ MALLOC_ELEM_HEADER(elem) == MALLOC_HEADER_COOKIE &&
+ MALLOC_ELEM_TRAILER(elem) == MALLOC_TRAILER_COOKIE;
+}
+
+#endif
+
+static const unsigned MALLOC_ELEM_HEADER_LEN = sizeof(struct malloc_elem);
+#define MALLOC_ELEM_OVERHEAD (MALLOC_ELEM_HEADER_LEN + MALLOC_ELEM_TRAILER_LEN)
+
+/*
+ * Given a pointer to the start of a memory block returned by malloc, get
+ * the actual malloc_elem header for that block.
+ */
+static inline struct malloc_elem *
+malloc_elem_from_data(const void *data)
+{
+ if (data == NULL)
+ return NULL;
+
+ struct malloc_elem *elem = RTE_PTR_SUB(data, MALLOC_ELEM_HEADER_LEN);
+ if (!malloc_elem_cookies_ok(elem))
+ return NULL;
+ return elem->state != ELEM_PAD ? elem: RTE_PTR_SUB(elem, elem->pad);
+}
+
+/*
+ * initialise a malloc_elem header
+ */
+void
+malloc_elem_init(struct malloc_elem *elem,
+ struct malloc_heap *heap,
+ struct rte_memseg_list *msl,
+ size_t size,
+ struct malloc_elem *orig_elem,
+ size_t orig_size);
+
+void
+malloc_elem_insert(struct malloc_elem *elem);
+
+/*
+ * return true if the current malloc_elem can hold a block of data
+ * of the requested size and with the requested alignment
+ */
+int
+malloc_elem_can_hold(struct malloc_elem *elem, size_t size,
+ unsigned int align, size_t bound, bool contig);
+
+/*
+ * reserve a block of data in an existing malloc_elem. If the malloc_elem
+ * is much larger than the data block requested, we split the element in two.
+ */
+struct malloc_elem *
+malloc_elem_alloc(struct malloc_elem *elem, size_t size,
+ unsigned int align, size_t bound, bool contig);
+
+/*
+ * free a malloc_elem block by adding it to the free list. If the
+ * blocks either immediately before or immediately after newly freed block
+ * are also free, the blocks are merged together.
+ */
+struct malloc_elem *
+malloc_elem_free(struct malloc_elem *elem);
+
+struct malloc_elem *
+malloc_elem_join_adjacent_free(struct malloc_elem *elem);
+
+/*
+ * attempt to resize a malloc_elem by expanding into any free space
+ * immediately after it in memory.
+ */
+int
+malloc_elem_resize(struct malloc_elem *elem, size_t size);
+
+void
+malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len);
+
+void
+malloc_elem_free_list_remove(struct malloc_elem *elem);
+
+/*
+ * dump contents of malloc elem to a file.
+ */
+void
+malloc_elem_dump(const struct malloc_elem *elem, FILE *f);
+
+/*
+ * Given an element size, compute its freelist index.
+ */
+size_t
+malloc_elem_free_list_index(size_t size);
+
+/*
+ * Add element to its heap's free list.
+ */
+void
+malloc_elem_free_list_insert(struct malloc_elem *elem);
+
+/*
+ * Find biggest IOVA-contiguous zone within an element with specified alignment.
+ */
+size_t
+malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align);
+
+#endif /* MALLOC_ELEM_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.c b/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.c
new file mode 100644
index 000000000..bd5065698
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.c
@@ -0,0 +1,1367 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_errno.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_string_fns.h>
+#include <rte_spinlock.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_atomic.h>
+#include <rte_fbarray.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
+#include "eal_memcfg.h"
+#include "eal_private.h"
+#include "malloc_elem.h"
+#include "malloc_heap.h"
+#include "malloc_mp.h"
+
+/* start external socket ID's at a very high number */
+#define CONST_MAX(a, b) (a > b ? a : b) /* RTE_MAX is not a constant */
+#define EXTERNAL_HEAP_MIN_SOCKET_ID (CONST_MAX((1 << 8), RTE_MAX_NUMA_NODES))
+
+static unsigned
+check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
+{
+ unsigned check_flag = 0;
+
+ if (!(flags & ~RTE_MEMZONE_SIZE_HINT_ONLY))
+ return 1;
+
+ switch (hugepage_sz) {
+ case RTE_PGSIZE_256K:
+ check_flag = RTE_MEMZONE_256KB;
+ break;
+ case RTE_PGSIZE_2M:
+ check_flag = RTE_MEMZONE_2MB;
+ break;
+ case RTE_PGSIZE_16M:
+ check_flag = RTE_MEMZONE_16MB;
+ break;
+ case RTE_PGSIZE_256M:
+ check_flag = RTE_MEMZONE_256MB;
+ break;
+ case RTE_PGSIZE_512M:
+ check_flag = RTE_MEMZONE_512MB;
+ break;
+ case RTE_PGSIZE_1G:
+ check_flag = RTE_MEMZONE_1GB;
+ break;
+ case RTE_PGSIZE_4G:
+ check_flag = RTE_MEMZONE_4GB;
+ break;
+ case RTE_PGSIZE_16G:
+ check_flag = RTE_MEMZONE_16GB;
+ }
+
+ return check_flag & flags;
+}
+
+int
+malloc_socket_to_heap_id(unsigned int socket_id)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i;
+
+ for (i = 0; i < RTE_MAX_HEAPS; i++) {
+ struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+
+ if (heap->socket_id == socket_id)
+ return i;
+ }
+ return -1;
+}
+
+/*
+ * Expand the heap with a memory area.
+ */
+static struct malloc_elem *
+malloc_heap_add_memory(struct malloc_heap *heap, struct rte_memseg_list *msl,
+ void *start, size_t len)
+{
+ struct malloc_elem *elem = start;
+
+ malloc_elem_init(elem, heap, msl, len, elem, len);
+
+ malloc_elem_insert(elem);
+
+ elem = malloc_elem_join_adjacent_free(elem);
+
+ malloc_elem_free_list_insert(elem);
+
+ return elem;
+}
+
+static int
+malloc_add_seg(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, size_t len, void *arg __rte_unused)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *found_msl;
+ struct malloc_heap *heap;
+ int msl_idx, heap_idx;
+
+ if (msl->external)
+ return 0;
+
+ heap_idx = malloc_socket_to_heap_id(msl->socket_id);
+ if (heap_idx < 0) {
+ RTE_LOG(ERR, EAL, "Memseg list has invalid socket id\n");
+ return -1;
+ }
+ heap = &mcfg->malloc_heaps[heap_idx];
+
+ /* msl is const, so find it */
+ msl_idx = msl - mcfg->memsegs;
+
+ if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS)
+ return -1;
+
+ found_msl = &mcfg->memsegs[msl_idx];
+
+ malloc_heap_add_memory(heap, found_msl, ms->addr, len);
+
+ heap->total_size += len;
+
+ RTE_LOG(DEBUG, EAL, "Added %zuM to heap on socket %i\n", len >> 20,
+ msl->socket_id);
+ return 0;
+}
+
+/*
+ * Iterates through the freelist for a heap to find a free element
+ * which can store data of the required size and with the requested alignment.
+ * If size is 0, find the biggest available elem.
+ * Returns null on failure, or pointer to element on success.
+ */
+static struct malloc_elem *
+find_suitable_element(struct malloc_heap *heap, size_t size,
+ unsigned int flags, size_t align, size_t bound, bool contig)
+{
+ size_t idx;
+ struct malloc_elem *elem, *alt_elem = NULL;
+
+ for (idx = malloc_elem_free_list_index(size);
+ idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+ for (elem = LIST_FIRST(&heap->free_head[idx]);
+ !!elem; elem = LIST_NEXT(elem, free_list)) {
+ if (malloc_elem_can_hold(elem, size, align, bound,
+ contig)) {
+ if (check_hugepage_sz(flags,
+ elem->msl->page_sz))
+ return elem;
+ if (alt_elem == NULL)
+ alt_elem = elem;
+ }
+ }
+ }
+
+ if ((alt_elem != NULL) && (flags & RTE_MEMZONE_SIZE_HINT_ONLY))
+ return alt_elem;
+
+ return NULL;
+}
+
+/*
+ * Iterates through the freelist for a heap to find a free element with the
+ * biggest size and requested alignment. Will also set size to whatever element
+ * size that was found.
+ * Returns null on failure, or pointer to element on success.
+ */
+static struct malloc_elem *
+find_biggest_element(struct malloc_heap *heap, size_t *size,
+ unsigned int flags, size_t align, bool contig)
+{
+ struct malloc_elem *elem, *max_elem = NULL;
+ size_t idx, max_size = 0;
+
+ for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+ for (elem = LIST_FIRST(&heap->free_head[idx]);
+ !!elem; elem = LIST_NEXT(elem, free_list)) {
+ size_t cur_size;
+ if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) == 0 &&
+ !check_hugepage_sz(flags,
+ elem->msl->page_sz))
+ continue;
+ if (contig) {
+ cur_size =
+ malloc_elem_find_max_iova_contig(elem,
+ align);
+ } else {
+ void *data_start = RTE_PTR_ADD(elem,
+ MALLOC_ELEM_HEADER_LEN);
+ void *data_end = RTE_PTR_ADD(elem, elem->size -
+ MALLOC_ELEM_TRAILER_LEN);
+ void *aligned = RTE_PTR_ALIGN_CEIL(data_start,
+ align);
+ /* check if aligned data start is beyond end */
+ if (aligned >= data_end)
+ continue;
+ cur_size = RTE_PTR_DIFF(data_end, aligned);
+ }
+ if (cur_size > max_size) {
+ max_size = cur_size;
+ max_elem = elem;
+ }
+ }
+ }
+
+ *size = max_size;
+ return max_elem;
+}
+
+/*
+ * Main function to allocate a block of memory from the heap.
+ * It locks the free list, scans it, and adds a new memseg if the
+ * scan fails. Once the new memseg is added, it re-scans and should return
+ * the new element after releasing the lock.
+ */
+static void *
+heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t size,
+ unsigned int flags, size_t align, size_t bound, bool contig)
+{
+ struct malloc_elem *elem;
+
+ size = RTE_CACHE_LINE_ROUNDUP(size);
+ align = RTE_CACHE_LINE_ROUNDUP(align);
+
+ /* roundup might cause an overflow */
+ if (size == 0)
+ return NULL;
+ elem = find_suitable_element(heap, size, flags, align, bound, contig);
+ if (elem != NULL) {
+ elem = malloc_elem_alloc(elem, size, align, bound, contig);
+
+ /* increase heap's count of allocated elements */
+ heap->alloc_count++;
+ }
+
+ return elem == NULL ? NULL : (void *)(&elem[1]);
+}
+
+static void *
+heap_alloc_biggest(struct malloc_heap *heap, const char *type __rte_unused,
+ unsigned int flags, size_t align, bool contig)
+{
+ struct malloc_elem *elem;
+ size_t size;
+
+ align = RTE_CACHE_LINE_ROUNDUP(align);
+
+ elem = find_biggest_element(heap, &size, flags, align, contig);
+ if (elem != NULL) {
+ elem = malloc_elem_alloc(elem, size, align, 0, contig);
+
+ /* increase heap's count of allocated elements */
+ heap->alloc_count++;
+ }
+
+ return elem == NULL ? NULL : (void *)(&elem[1]);
+}
+
+/* this function is exposed in malloc_mp.h */
+void
+rollback_expand_heap(struct rte_memseg **ms, int n_segs,
+ struct malloc_elem *elem, void *map_addr, size_t map_len)
+{
+ if (elem != NULL) {
+ malloc_elem_free_list_remove(elem);
+ malloc_elem_hide_region(elem, map_addr, map_len);
+ }
+
+ eal_memalloc_free_seg_bulk(ms, n_segs);
+}
+
+/* this function is exposed in malloc_mp.h */
+struct malloc_elem *
+alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
+ int socket, unsigned int flags, size_t align, size_t bound,
+ bool contig, struct rte_memseg **ms, int n_segs)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *msl;
+ struct malloc_elem *elem = NULL;
+ size_t alloc_sz;
+ int allocd_pages;
+ void *ret, *map_addr;
+
+ alloc_sz = (size_t)pg_sz * n_segs;
+
+ /* first, check if we're allowed to allocate this memory */
+ if (eal_memalloc_mem_alloc_validate(socket,
+ heap->total_size + alloc_sz) < 0) {
+ RTE_LOG(DEBUG, EAL, "User has disallowed allocation\n");
+ return NULL;
+ }
+
+ allocd_pages = eal_memalloc_alloc_seg_bulk(ms, n_segs, pg_sz,
+ socket, true);
+
+ /* make sure we've allocated our pages... */
+ if (allocd_pages < 0)
+ return NULL;
+
+ map_addr = ms[0]->addr;
+ msl = rte_mem_virt2memseg_list(map_addr);
+
+ /* check if we wanted contiguous memory but didn't get it */
+ if (contig && !eal_memalloc_is_contig(msl, map_addr, alloc_sz)) {
+ RTE_LOG(DEBUG, EAL, "%s(): couldn't allocate physically contiguous space\n",
+ __func__);
+ goto fail;
+ }
+
+ /*
+ * Once we have all the memseg lists configured, if there is a dma mask
+ * set, check iova addresses are not out of range. Otherwise the device
+ * setting the dma mask could have problems with the mapped memory.
+ *
+ * There are two situations when this can happen:
+ * 1) memory initialization
+ * 2) dynamic memory allocation
+ *
+ * For 1), an error when checking dma mask implies app can not be
+ * executed. For 2) implies the new memory can not be added.
+ */
+ if (mcfg->dma_maskbits &&
+ rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
+ /*
+ * Currently this can only happen if IOMMU is enabled
+ * and the address width supported by the IOMMU hw is
+ * not enough for using the memory mapped IOVAs.
+ *
+ * If IOVA is VA, advice to try with '--iova-mode pa'
+ * which could solve some situations when IOVA VA is not
+ * really needed.
+ */
+ RTE_LOG(ERR, EAL,
+ "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask\n",
+ __func__);
+
+ /*
+ * If IOVA is VA and it is possible to run with IOVA PA,
+ * because user is root, give and advice for solving the
+ * problem.
+ */
+ if ((rte_eal_iova_mode() == RTE_IOVA_VA) &&
+ rte_eal_using_phys_addrs())
+ RTE_LOG(ERR, EAL,
+ "%s(): Please try initializing EAL with --iova-mode=pa parameter\n",
+ __func__);
+ goto fail;
+ }
+
+ /* add newly minted memsegs to malloc heap */
+ elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz);
+
+ /* try once more, as now we have allocated new memory */
+ ret = find_suitable_element(heap, elt_size, flags, align, bound,
+ contig);
+
+ if (ret == NULL)
+ goto fail;
+
+ return elem;
+
+fail:
+ rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz);
+ return NULL;
+}
+
+static int
+try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz,
+ size_t elt_size, int socket, unsigned int flags, size_t align,
+ size_t bound, bool contig)
+{
+ struct malloc_elem *elem;
+ struct rte_memseg **ms;
+ void *map_addr;
+ size_t alloc_sz;
+ int n_segs;
+ bool callback_triggered = false;
+
+ alloc_sz = RTE_ALIGN_CEIL(align + elt_size +
+ MALLOC_ELEM_TRAILER_LEN, pg_sz);
+ n_segs = alloc_sz / pg_sz;
+
+ /* we can't know in advance how many pages we'll need, so we malloc */
+ ms = malloc(sizeof(*ms) * n_segs);
+ if (ms == NULL)
+ return -1;
+ memset(ms, 0, sizeof(*ms) * n_segs);
+
+ elem = alloc_pages_on_heap(heap, pg_sz, elt_size, socket, flags, align,
+ bound, contig, ms, n_segs);
+
+ if (elem == NULL)
+ goto free_ms;
+
+ map_addr = ms[0]->addr;
+
+ /* notify user about changes in memory map */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz);
+
+ /* notify other processes that this has happened */
+ if (request_sync()) {
+ /* we couldn't ensure all processes have mapped memory,
+ * so free it back and notify everyone that it's been
+ * freed back.
+ *
+ * technically, we could've avoided adding memory addresses to
+ * the map, but that would've led to inconsistent behavior
+ * between primary and secondary processes, as those get
+ * callbacks during sync. therefore, force primary process to
+ * do alloc-and-rollback syncs as well.
+ */
+ callback_triggered = true;
+ goto free_elem;
+ }
+ heap->total_size += alloc_sz;
+
+ RTE_LOG(DEBUG, EAL, "Heap on socket %d was expanded by %zdMB\n",
+ socket, alloc_sz >> 20ULL);
+
+ free(ms);
+
+ return 0;
+
+free_elem:
+ if (callback_triggered)
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+ map_addr, alloc_sz);
+
+ rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz);
+
+ request_sync();
+free_ms:
+ free(ms);
+
+ return -1;
+}
+
+static int
+try_expand_heap_secondary(struct malloc_heap *heap, uint64_t pg_sz,
+ size_t elt_size, int socket, unsigned int flags, size_t align,
+ size_t bound, bool contig)
+{
+ struct malloc_mp_req req;
+ int req_result;
+
+ memset(&req, 0, sizeof(req));
+
+ req.t = REQ_TYPE_ALLOC;
+ req.alloc_req.align = align;
+ req.alloc_req.bound = bound;
+ req.alloc_req.contig = contig;
+ req.alloc_req.flags = flags;
+ req.alloc_req.elt_size = elt_size;
+ req.alloc_req.page_sz = pg_sz;
+ req.alloc_req.socket = socket;
+ req.alloc_req.heap = heap; /* it's in shared memory */
+
+ req_result = request_to_primary(&req);
+
+ if (req_result != 0)
+ return -1;
+
+ if (req.result != REQ_RESULT_SUCCESS)
+ return -1;
+
+ return 0;
+}
+
+static int
+try_expand_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
+ int socket, unsigned int flags, size_t align, size_t bound,
+ bool contig)
+{
+ int ret;
+
+ rte_mcfg_mem_write_lock();
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ ret = try_expand_heap_primary(heap, pg_sz, elt_size, socket,
+ flags, align, bound, contig);
+ } else {
+ ret = try_expand_heap_secondary(heap, pg_sz, elt_size, socket,
+ flags, align, bound, contig);
+ }
+
+ rte_mcfg_mem_write_unlock();
+ return ret;
+}
+
+static int
+compare_pagesz(const void *a, const void *b)
+{
+ const struct rte_memseg_list * const*mpa = a;
+ const struct rte_memseg_list * const*mpb = b;
+ const struct rte_memseg_list *msla = *mpa;
+ const struct rte_memseg_list *mslb = *mpb;
+ uint64_t pg_sz_a = msla->page_sz;
+ uint64_t pg_sz_b = mslb->page_sz;
+
+ if (pg_sz_a < pg_sz_b)
+ return -1;
+ if (pg_sz_a > pg_sz_b)
+ return 1;
+ return 0;
+}
+
+static int
+alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket,
+ unsigned int flags, size_t align, size_t bound, bool contig)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *requested_msls[RTE_MAX_MEMSEG_LISTS];
+ struct rte_memseg_list *other_msls[RTE_MAX_MEMSEG_LISTS];
+ uint64_t requested_pg_sz[RTE_MAX_MEMSEG_LISTS];
+ uint64_t other_pg_sz[RTE_MAX_MEMSEG_LISTS];
+ uint64_t prev_pg_sz;
+ int i, n_other_msls, n_other_pg_sz, n_requested_msls, n_requested_pg_sz;
+ bool size_hint = (flags & RTE_MEMZONE_SIZE_HINT_ONLY) > 0;
+ unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
+ void *ret;
+
+ memset(requested_msls, 0, sizeof(requested_msls));
+ memset(other_msls, 0, sizeof(other_msls));
+ memset(requested_pg_sz, 0, sizeof(requested_pg_sz));
+ memset(other_pg_sz, 0, sizeof(other_pg_sz));
+
+ /*
+ * go through memseg list and take note of all the page sizes available,
+ * and if any of them were specifically requested by the user.
+ */
+ n_requested_msls = 0;
+ n_other_msls = 0;
+ for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+ struct rte_memseg_list *msl = &mcfg->memsegs[i];
+
+ if (msl->socket_id != socket)
+ continue;
+
+ if (msl->base_va == NULL)
+ continue;
+
+ /* if pages of specific size were requested */
+ if (size_flags != 0 && check_hugepage_sz(size_flags,
+ msl->page_sz))
+ requested_msls[n_requested_msls++] = msl;
+ else if (size_flags == 0 || size_hint)
+ other_msls[n_other_msls++] = msl;
+ }
+
+ /* sort the lists, smallest first */
+ qsort(requested_msls, n_requested_msls, sizeof(requested_msls[0]),
+ compare_pagesz);
+ qsort(other_msls, n_other_msls, sizeof(other_msls[0]),
+ compare_pagesz);
+
+ /* now, extract page sizes we are supposed to try */
+ prev_pg_sz = 0;
+ n_requested_pg_sz = 0;
+ for (i = 0; i < n_requested_msls; i++) {
+ uint64_t pg_sz = requested_msls[i]->page_sz;
+
+ if (prev_pg_sz != pg_sz) {
+ requested_pg_sz[n_requested_pg_sz++] = pg_sz;
+ prev_pg_sz = pg_sz;
+ }
+ }
+ prev_pg_sz = 0;
+ n_other_pg_sz = 0;
+ for (i = 0; i < n_other_msls; i++) {
+ uint64_t pg_sz = other_msls[i]->page_sz;
+
+ if (prev_pg_sz != pg_sz) {
+ other_pg_sz[n_other_pg_sz++] = pg_sz;
+ prev_pg_sz = pg_sz;
+ }
+ }
+
+ /* finally, try allocating memory of specified page sizes, starting from
+ * the smallest sizes
+ */
+ for (i = 0; i < n_requested_pg_sz; i++) {
+ uint64_t pg_sz = requested_pg_sz[i];
+
+ /*
+ * do not pass the size hint here, as user expects other page
+ * sizes first, before resorting to best effort allocation.
+ */
+ if (!try_expand_heap(heap, pg_sz, size, socket, size_flags,
+ align, bound, contig))
+ return 0;
+ }
+ if (n_other_pg_sz == 0)
+ return -1;
+
+ /* now, check if we can reserve anything with size hint */
+ ret = find_suitable_element(heap, size, flags, align, bound, contig);
+ if (ret != NULL)
+ return 0;
+
+ /*
+ * we still couldn't reserve memory, so try expanding heap with other
+ * page sizes, if there are any
+ */
+ for (i = 0; i < n_other_pg_sz; i++) {
+ uint64_t pg_sz = other_pg_sz[i];
+
+ if (!try_expand_heap(heap, pg_sz, size, socket, flags,
+ align, bound, contig))
+ return 0;
+ }
+ return -1;
+}
+
+/* this will try lower page sizes first */
+static void *
+malloc_heap_alloc_on_heap_id(const char *type, size_t size,
+ unsigned int heap_id, unsigned int flags, size_t align,
+ size_t bound, bool contig)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
+ unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
+ int socket_id;
+ void *ret;
+
+ rte_spinlock_lock(&(heap->lock));
+
+ align = align == 0 ? 1 : align;
+
+ /* for legacy mode, try once and with all flags */
+ if (internal_config.legacy_mem) {
+ ret = heap_alloc(heap, type, size, flags, align, bound, contig);
+ goto alloc_unlock;
+ }
+
+ /*
+ * we do not pass the size hint here, because even if allocation fails,
+ * we may still be able to allocate memory from appropriate page sizes,
+ * we just need to request more memory first.
+ */
+
+ socket_id = rte_socket_id_by_idx(heap_id);
+ /*
+ * if socket ID is negative, we cannot find a socket ID for this heap -
+ * which means it's an external heap. those can have unexpected page
+ * sizes, so if the user asked to allocate from there - assume user
+ * knows what they're doing, and allow allocating from there with any
+ * page size flags.
+ */
+ if (socket_id < 0)
+ size_flags |= RTE_MEMZONE_SIZE_HINT_ONLY;
+
+ ret = heap_alloc(heap, type, size, size_flags, align, bound, contig);
+ if (ret != NULL)
+ goto alloc_unlock;
+
+ /* if socket ID is invalid, this is an external heap */
+ if (socket_id < 0)
+ goto alloc_unlock;
+
+ if (!alloc_more_mem_on_socket(heap, size, socket_id, flags, align,
+ bound, contig)) {
+ ret = heap_alloc(heap, type, size, flags, align, bound, contig);
+
+ /* this should have succeeded */
+ if (ret == NULL)
+ RTE_LOG(ERR, EAL, "Error allocating from heap\n");
+ }
+alloc_unlock:
+ rte_spinlock_unlock(&(heap->lock));
+ return ret;
+}
+
+void *
+malloc_heap_alloc(const char *type, size_t size, int socket_arg,
+ unsigned int flags, size_t align, size_t bound, bool contig)
+{
+ int socket, heap_id, i;
+ void *ret;
+
+ /* return NULL if size is 0 or alignment is not power-of-2 */
+ if (size == 0 || (align && !rte_is_power_of_2(align)))
+ return NULL;
+
+ if (!rte_eal_has_hugepages() && socket_arg < RTE_MAX_NUMA_NODES)
+ socket_arg = SOCKET_ID_ANY;
+
+ if (socket_arg == SOCKET_ID_ANY)
+ socket = malloc_get_numa_socket();
+ else
+ socket = socket_arg;
+
+ /* turn socket ID into heap ID */
+ heap_id = malloc_socket_to_heap_id(socket);
+ /* if heap id is negative, socket ID was invalid */
+ if (heap_id < 0)
+ return NULL;
+
+ ret = malloc_heap_alloc_on_heap_id(type, size, heap_id, flags, align,
+ bound, contig);
+ if (ret != NULL || socket_arg != SOCKET_ID_ANY)
+ return ret;
+
+ /* try other heaps. we are only iterating through native DPDK sockets,
+ * so external heaps won't be included.
+ */
+ for (i = 0; i < (int) rte_socket_count(); i++) {
+ if (i == heap_id)
+ continue;
+ ret = malloc_heap_alloc_on_heap_id(type, size, i, flags, align,
+ bound, contig);
+ if (ret != NULL)
+ return ret;
+ }
+ return NULL;
+}
+
+static void *
+heap_alloc_biggest_on_heap_id(const char *type, unsigned int heap_id,
+ unsigned int flags, size_t align, bool contig)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
+ void *ret;
+
+ rte_spinlock_lock(&(heap->lock));
+
+ align = align == 0 ? 1 : align;
+
+ ret = heap_alloc_biggest(heap, type, flags, align, contig);
+
+ rte_spinlock_unlock(&(heap->lock));
+
+ return ret;
+}
+
+void *
+malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
+ size_t align, bool contig)
+{
+ int socket, i, cur_socket, heap_id;
+ void *ret;
+
+ /* return NULL if align is not power-of-2 */
+ if ((align && !rte_is_power_of_2(align)))
+ return NULL;
+
+ if (!rte_eal_has_hugepages())
+ socket_arg = SOCKET_ID_ANY;
+
+ if (socket_arg == SOCKET_ID_ANY)
+ socket = malloc_get_numa_socket();
+ else
+ socket = socket_arg;
+
+ /* turn socket ID into heap ID */
+ heap_id = malloc_socket_to_heap_id(socket);
+ /* if heap id is negative, socket ID was invalid */
+ if (heap_id < 0)
+ return NULL;
+
+ ret = heap_alloc_biggest_on_heap_id(type, heap_id, flags, align,
+ contig);
+ if (ret != NULL || socket_arg != SOCKET_ID_ANY)
+ return ret;
+
+ /* try other heaps */
+ for (i = 0; i < (int) rte_socket_count(); i++) {
+ cur_socket = rte_socket_id_by_idx(i);
+ if (cur_socket == socket)
+ continue;
+ ret = heap_alloc_biggest_on_heap_id(type, i, flags, align,
+ contig);
+ if (ret != NULL)
+ return ret;
+ }
+ return NULL;
+}
+
+/* this function is exposed in malloc_mp.h */
+int
+malloc_heap_free_pages(void *aligned_start, size_t aligned_len)
+{
+ int n_segs, seg_idx, max_seg_idx;
+ struct rte_memseg_list *msl;
+ size_t page_sz;
+
+ msl = rte_mem_virt2memseg_list(aligned_start);
+ if (msl == NULL)
+ return -1;
+
+ page_sz = (size_t)msl->page_sz;
+ n_segs = aligned_len / page_sz;
+ seg_idx = RTE_PTR_DIFF(aligned_start, msl->base_va) / page_sz;
+ max_seg_idx = seg_idx + n_segs;
+
+ for (; seg_idx < max_seg_idx; seg_idx++) {
+ struct rte_memseg *ms;
+
+ ms = rte_fbarray_get(&msl->memseg_arr, seg_idx);
+ eal_memalloc_free_seg(ms);
+ }
+ return 0;
+}
+
+int
+malloc_heap_free(struct malloc_elem *elem)
+{
+ struct malloc_heap *heap;
+ void *start, *aligned_start, *end, *aligned_end;
+ size_t len, aligned_len, page_sz;
+ struct rte_memseg_list *msl;
+ unsigned int i, n_segs, before_space, after_space;
+ int ret;
+
+ if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
+ return -1;
+
+ /* elem may be merged with previous element, so keep heap address */
+ heap = elem->heap;
+ msl = elem->msl;
+ page_sz = (size_t)msl->page_sz;
+
+ rte_spinlock_lock(&(heap->lock));
+
+ /* mark element as free */
+ elem->state = ELEM_FREE;
+
+ elem = malloc_elem_free(elem);
+
+ /* anything after this is a bonus */
+ ret = 0;
+
+ /* ...of which we can't avail if we are in legacy mode, or if this is an
+ * externally allocated segment.
+ */
+ if (internal_config.legacy_mem || (msl->external > 0))
+ goto free_unlock;
+
+ /* check if we can free any memory back to the system */
+ if (elem->size < page_sz)
+ goto free_unlock;
+
+ /* if user requested to match allocations, the sizes must match - if not,
+ * we will defer freeing these hugepages until the entire original allocation
+ * can be freed
+ */
+ if (internal_config.match_allocations && elem->size != elem->orig_size)
+ goto free_unlock;
+
+ /* probably, but let's make sure, as we may not be using up full page */
+ start = elem;
+ len = elem->size;
+ aligned_start = RTE_PTR_ALIGN_CEIL(start, page_sz);
+ end = RTE_PTR_ADD(elem, len);
+ aligned_end = RTE_PTR_ALIGN_FLOOR(end, page_sz);
+
+ aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start);
+
+ /* can't free anything */
+ if (aligned_len < page_sz)
+ goto free_unlock;
+
+ /* we can free something. however, some of these pages may be marked as
+ * unfreeable, so also check that as well
+ */
+ n_segs = aligned_len / page_sz;
+ for (i = 0; i < n_segs; i++) {
+ const struct rte_memseg *tmp =
+ rte_mem_virt2memseg(aligned_start, msl);
+
+ if (tmp->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) {
+ /* this is an unfreeable segment, so move start */
+ aligned_start = RTE_PTR_ADD(tmp->addr, tmp->len);
+ }
+ }
+
+ /* recalculate length and number of segments */
+ aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start);
+ n_segs = aligned_len / page_sz;
+
+ /* check if we can still free some pages */
+ if (n_segs == 0)
+ goto free_unlock;
+
+ /* We're not done yet. We also have to check if by freeing space we will
+ * be leaving free elements that are too small to store new elements.
+ * Check if we have enough space in the beginning and at the end, or if
+ * start/end are exactly page aligned.
+ */
+ before_space = RTE_PTR_DIFF(aligned_start, elem);
+ after_space = RTE_PTR_DIFF(end, aligned_end);
+ if (before_space != 0 &&
+ before_space < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+ /* There is not enough space before start, but we may be able to
+ * move the start forward by one page.
+ */
+ if (n_segs == 1)
+ goto free_unlock;
+
+ /* move start */
+ aligned_start = RTE_PTR_ADD(aligned_start, page_sz);
+ aligned_len -= page_sz;
+ n_segs--;
+ }
+ if (after_space != 0 && after_space <
+ MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+ /* There is not enough space after end, but we may be able to
+ * move the end backwards by one page.
+ */
+ if (n_segs == 1)
+ goto free_unlock;
+
+ /* move end */
+ aligned_end = RTE_PTR_SUB(aligned_end, page_sz);
+ aligned_len -= page_sz;
+ n_segs--;
+ }
+
+ /* now we can finally free us some pages */
+
+ rte_mcfg_mem_write_lock();
+
+ /*
+ * we allow secondary processes to clear the heap of this allocated
+ * memory because it is safe to do so, as even if notifications about
+ * unmapped pages don't make it to other processes, heap is shared
+ * across all processes, and will become empty of this memory anyway,
+ * and nothing can allocate it back unless primary process will be able
+ * to deliver allocation message to every single running process.
+ */
+
+ malloc_elem_free_list_remove(elem);
+
+ malloc_elem_hide_region(elem, (void *) aligned_start, aligned_len);
+
+ heap->total_size -= aligned_len;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* notify user about changes in memory map */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+ aligned_start, aligned_len);
+
+ /* don't care if any of this fails */
+ malloc_heap_free_pages(aligned_start, aligned_len);
+
+ request_sync();
+ } else {
+ struct malloc_mp_req req;
+
+ memset(&req, 0, sizeof(req));
+
+ req.t = REQ_TYPE_FREE;
+ req.free_req.addr = aligned_start;
+ req.free_req.len = aligned_len;
+
+ /*
+ * we request primary to deallocate pages, but we don't do it
+ * in this thread. instead, we notify primary that we would like
+ * to deallocate pages, and this process will receive another
+ * request (in parallel) that will do it for us on another
+ * thread.
+ *
+ * we also don't really care if this succeeds - the data is
+ * already removed from the heap, so it is, for all intents and
+ * purposes, hidden from the rest of DPDK even if some other
+ * process (including this one) may have these pages mapped.
+ *
+ * notifications about deallocated memory happen during sync.
+ */
+ request_to_primary(&req);
+ }
+
+ RTE_LOG(DEBUG, EAL, "Heap on socket %d was shrunk by %zdMB\n",
+ msl->socket_id, aligned_len >> 20ULL);
+
+ rte_mcfg_mem_write_unlock();
+free_unlock:
+ rte_spinlock_unlock(&(heap->lock));
+ return ret;
+}
+
+int
+malloc_heap_resize(struct malloc_elem *elem, size_t size)
+{
+ int ret;
+
+ if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
+ return -1;
+
+ rte_spinlock_lock(&(elem->heap->lock));
+
+ ret = malloc_elem_resize(elem, size);
+
+ rte_spinlock_unlock(&(elem->heap->lock));
+
+ return ret;
+}
+
+/*
+ * Function to retrieve data for a given heap
+ */
+int
+malloc_heap_get_stats(struct malloc_heap *heap,
+ struct rte_malloc_socket_stats *socket_stats)
+{
+ size_t idx;
+ struct malloc_elem *elem;
+
+ rte_spinlock_lock(&heap->lock);
+
+ /* Initialise variables for heap */
+ socket_stats->free_count = 0;
+ socket_stats->heap_freesz_bytes = 0;
+ socket_stats->greatest_free_size = 0;
+
+ /* Iterate through free list */
+ for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+ for (elem = LIST_FIRST(&heap->free_head[idx]);
+ !!elem; elem = LIST_NEXT(elem, free_list))
+ {
+ socket_stats->free_count++;
+ socket_stats->heap_freesz_bytes += elem->size;
+ if (elem->size > socket_stats->greatest_free_size)
+ socket_stats->greatest_free_size = elem->size;
+ }
+ }
+ /* Get stats on overall heap and allocated memory on this heap */
+ socket_stats->heap_totalsz_bytes = heap->total_size;
+ socket_stats->heap_allocsz_bytes = (socket_stats->heap_totalsz_bytes -
+ socket_stats->heap_freesz_bytes);
+ socket_stats->alloc_count = heap->alloc_count;
+
+ rte_spinlock_unlock(&heap->lock);
+ return 0;
+}
+
+/*
+ * Function to retrieve data for a given heap
+ */
+void
+malloc_heap_dump(struct malloc_heap *heap, FILE *f)
+{
+ struct malloc_elem *elem;
+
+ rte_spinlock_lock(&heap->lock);
+
+ fprintf(f, "Heap size: 0x%zx\n", heap->total_size);
+ fprintf(f, "Heap alloc count: %u\n", heap->alloc_count);
+
+ elem = heap->first;
+ while (elem) {
+ malloc_elem_dump(elem, f);
+ elem = elem->next;
+ }
+
+ rte_spinlock_unlock(&heap->lock);
+}
+
+static int
+destroy_elem(struct malloc_elem *elem, size_t len)
+{
+ struct malloc_heap *heap = elem->heap;
+
+ /* notify all subscribers that a memory area is going to be removed */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, elem, len);
+
+ /* this element can be removed */
+ malloc_elem_free_list_remove(elem);
+ malloc_elem_hide_region(elem, elem, len);
+
+ heap->total_size -= len;
+
+ memset(elem, 0, sizeof(*elem));
+
+ return 0;
+}
+
+struct rte_memseg_list *
+malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[],
+ unsigned int n_pages, size_t page_sz, const char *seg_name,
+ unsigned int socket_id)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ char fbarray_name[RTE_FBARRAY_NAME_LEN];
+ struct rte_memseg_list *msl = NULL;
+ struct rte_fbarray *arr;
+ size_t seg_len = n_pages * page_sz;
+ unsigned int i;
+
+ /* first, find a free memseg list */
+ for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+ struct rte_memseg_list *tmp = &mcfg->memsegs[i];
+ if (tmp->base_va == NULL) {
+ msl = tmp;
+ break;
+ }
+ }
+ if (msl == NULL) {
+ RTE_LOG(ERR, EAL, "Couldn't find empty memseg list\n");
+ rte_errno = ENOSPC;
+ return NULL;
+ }
+
+ snprintf(fbarray_name, sizeof(fbarray_name), "%s_%p",
+ seg_name, va_addr);
+
+ /* create the backing fbarray */
+ if (rte_fbarray_init(&msl->memseg_arr, fbarray_name, n_pages,
+ sizeof(struct rte_memseg)) < 0) {
+ RTE_LOG(ERR, EAL, "Couldn't create fbarray backing the memseg list\n");
+ return NULL;
+ }
+ arr = &msl->memseg_arr;
+
+ /* fbarray created, fill it up */
+ for (i = 0; i < n_pages; i++) {
+ struct rte_memseg *ms;
+
+ rte_fbarray_set_used(arr, i);
+ ms = rte_fbarray_get(arr, i);
+ ms->addr = RTE_PTR_ADD(va_addr, i * page_sz);
+ ms->iova = iova_addrs == NULL ? RTE_BAD_IOVA : iova_addrs[i];
+ ms->hugepage_sz = page_sz;
+ ms->len = page_sz;
+ ms->nchannel = rte_memory_get_nchannel();
+ ms->nrank = rte_memory_get_nrank();
+ ms->socket_id = socket_id;
+ }
+
+ /* set up the memseg list */
+ msl->base_va = va_addr;
+ msl->page_sz = page_sz;
+ msl->socket_id = socket_id;
+ msl->len = seg_len;
+ msl->version = 0;
+ msl->external = 1;
+
+ return msl;
+}
+
+struct extseg_walk_arg {
+ void *va_addr;
+ size_t len;
+ struct rte_memseg_list *msl;
+};
+
+static int
+extseg_walk(const struct rte_memseg_list *msl, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct extseg_walk_arg *wa = arg;
+
+ if (msl->base_va == wa->va_addr && msl->len == wa->len) {
+ unsigned int found_idx;
+
+ /* msl is const */
+ found_idx = msl - mcfg->memsegs;
+ wa->msl = &mcfg->memsegs[found_idx];
+ return 1;
+ }
+ return 0;
+}
+
+struct rte_memseg_list *
+malloc_heap_find_external_seg(void *va_addr, size_t len)
+{
+ struct extseg_walk_arg wa;
+ int res;
+
+ wa.va_addr = va_addr;
+ wa.len = len;
+
+ res = rte_memseg_list_walk_thread_unsafe(extseg_walk, &wa);
+
+ if (res != 1) {
+ /* 0 means nothing was found, -1 shouldn't happen */
+ if (res == 0)
+ rte_errno = ENOENT;
+ return NULL;
+ }
+ return wa.msl;
+}
+
+int
+malloc_heap_destroy_external_seg(struct rte_memseg_list *msl)
+{
+ /* destroy the fbarray backing this memory */
+ if (rte_fbarray_destroy(&msl->memseg_arr) < 0)
+ return -1;
+
+ /* reset the memseg list */
+ memset(msl, 0, sizeof(*msl));
+
+ return 0;
+}
+
+int
+malloc_heap_add_external_memory(struct malloc_heap *heap,
+ struct rte_memseg_list *msl)
+{
+ /* erase contents of new memory */
+ memset(msl->base_va, 0, msl->len);
+
+ /* now, add newly minted memory to the malloc heap */
+ malloc_heap_add_memory(heap, msl, msl->base_va, msl->len);
+
+ heap->total_size += msl->len;
+
+ /* all done! */
+ RTE_LOG(DEBUG, EAL, "Added segment for heap %s starting at %p\n",
+ heap->name, msl->base_va);
+
+ /* notify all subscribers that a new memory area has been added */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
+ msl->base_va, msl->len);
+
+ return 0;
+}
+
+int
+malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr,
+ size_t len)
+{
+ struct malloc_elem *elem = heap->first;
+
+ /* find element with specified va address */
+ while (elem != NULL && elem != va_addr) {
+ elem = elem->next;
+ /* stop if we've blown past our VA */
+ if (elem > (struct malloc_elem *)va_addr) {
+ rte_errno = ENOENT;
+ return -1;
+ }
+ }
+ /* check if element was found */
+ if (elem == NULL || elem->msl->len != len) {
+ rte_errno = ENOENT;
+ return -1;
+ }
+ /* if element's size is not equal to segment len, segment is busy */
+ if (elem->state == ELEM_BUSY || elem->size != len) {
+ rte_errno = EBUSY;
+ return -1;
+ }
+ return destroy_elem(elem, len);
+}
+
+int
+malloc_heap_create(struct malloc_heap *heap, const char *heap_name)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ uint32_t next_socket_id = mcfg->next_socket_id;
+
+ /* prevent overflow. did you really create 2 billion heaps??? */
+ if (next_socket_id > INT32_MAX) {
+ RTE_LOG(ERR, EAL, "Cannot assign new socket ID's\n");
+ rte_errno = ENOSPC;
+ return -1;
+ }
+
+ /* initialize empty heap */
+ heap->alloc_count = 0;
+ heap->first = NULL;
+ heap->last = NULL;
+ LIST_INIT(heap->free_head);
+ rte_spinlock_init(&heap->lock);
+ heap->total_size = 0;
+ heap->socket_id = next_socket_id;
+
+ /* we hold a global mem hotplug writelock, so it's safe to increment */
+ mcfg->next_socket_id++;
+
+ /* set up name */
+ strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
+ return 0;
+}
+
+int
+malloc_heap_destroy(struct malloc_heap *heap)
+{
+ if (heap->alloc_count != 0) {
+ RTE_LOG(ERR, EAL, "Heap is still in use\n");
+ rte_errno = EBUSY;
+ return -1;
+ }
+ if (heap->first != NULL || heap->last != NULL) {
+ RTE_LOG(ERR, EAL, "Heap still contains memory segments\n");
+ rte_errno = EBUSY;
+ return -1;
+ }
+ if (heap->total_size != 0)
+ RTE_LOG(ERR, EAL, "Total size not zero, heap is likely corrupt\n");
+
+ /* after this, the lock will be dropped */
+ memset(heap, 0, sizeof(*heap));
+
+ return 0;
+}
+
+int
+rte_eal_malloc_heap_init(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int i;
+
+ if (internal_config.match_allocations) {
+ RTE_LOG(DEBUG, EAL, "Hugepages will be freed exactly as allocated.\n");
+ }
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* assign min socket ID to external heaps */
+ mcfg->next_socket_id = EXTERNAL_HEAP_MIN_SOCKET_ID;
+
+ /* assign names to default DPDK heaps */
+ for (i = 0; i < rte_socket_count(); i++) {
+ struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+ char heap_name[RTE_HEAP_NAME_MAX_LEN];
+ int socket_id = rte_socket_id_by_idx(i);
+
+ snprintf(heap_name, sizeof(heap_name),
+ "socket_%i", socket_id);
+ strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
+ heap->socket_id = socket_id;
+ }
+ }
+
+
+ if (register_mp_requests()) {
+ RTE_LOG(ERR, EAL, "Couldn't register malloc multiprocess actions\n");
+ rte_mcfg_mem_read_unlock();
+ return -1;
+ }
+
+ /* unlock mem hotplug here. it's safe for primary as no requests can
+ * even come before primary itself is fully initialized, and secondaries
+ * do not need to initialize the heap.
+ */
+ rte_mcfg_mem_read_unlock();
+
+ /* secondary process does not need to initialize anything */
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return 0;
+
+ /* add all IOVA-contiguous areas to the heap */
+ return rte_memseg_contig_walk(malloc_add_seg, NULL);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.h b/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.h
new file mode 100644
index 000000000..772736b53
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef MALLOC_HEAP_H_
+#define MALLOC_HEAP_H_
+
+#include <stdbool.h>
+#include <sys/queue.h>
+
+#include <rte_malloc.h>
+#include <rte_spinlock.h>
+
+/* Number of free lists per heap, grouped by size. */
+#define RTE_HEAP_NUM_FREELISTS 13
+#define RTE_HEAP_NAME_MAX_LEN 32
+
+/* dummy definition, for pointers */
+struct malloc_elem;
+
+/**
+ * Structure to hold malloc heap
+ */
+struct malloc_heap {
+ rte_spinlock_t lock;
+ LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
+ struct malloc_elem *volatile first;
+ struct malloc_elem *volatile last;
+
+ unsigned int alloc_count;
+ unsigned int socket_id;
+ size_t total_size;
+ char name[RTE_HEAP_NAME_MAX_LEN];
+} __rte_cache_aligned;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline unsigned
+malloc_get_numa_socket(void)
+{
+ unsigned socket_id = rte_socket_id();
+
+ if (socket_id == (unsigned)SOCKET_ID_ANY)
+ return 0;
+
+ return socket_id;
+}
+
+void *
+malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags,
+ size_t align, size_t bound, bool contig);
+
+void *
+malloc_heap_alloc_biggest(const char *type, int socket, unsigned int flags,
+ size_t align, bool contig);
+
+int
+malloc_heap_create(struct malloc_heap *heap, const char *heap_name);
+
+int
+malloc_heap_destroy(struct malloc_heap *heap);
+
+struct rte_memseg_list *
+malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[],
+ unsigned int n_pages, size_t page_sz, const char *seg_name,
+ unsigned int socket_id);
+
+struct rte_memseg_list *
+malloc_heap_find_external_seg(void *va_addr, size_t len);
+
+int
+malloc_heap_destroy_external_seg(struct rte_memseg_list *msl);
+
+int
+malloc_heap_add_external_memory(struct malloc_heap *heap,
+ struct rte_memseg_list *msl);
+
+int
+malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr,
+ size_t len);
+
+int
+malloc_heap_free(struct malloc_elem *elem);
+
+int
+malloc_heap_resize(struct malloc_elem *elem, size_t size);
+
+int
+malloc_heap_get_stats(struct malloc_heap *heap,
+ struct rte_malloc_socket_stats *socket_stats);
+
+void
+malloc_heap_dump(struct malloc_heap *heap, FILE *f);
+
+int
+malloc_socket_to_heap_id(unsigned int socket_id);
+
+int
+rte_eal_malloc_heap_init(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* MALLOC_HEAP_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/malloc_mp.c b/src/spdk/dpdk/lib/librte_eal/common/malloc_mp.c
new file mode 100644
index 000000000..1f212f834
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/malloc_mp.c
@@ -0,0 +1,751 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <string.h>
+#include <sys/time.h>
+
+#include <rte_alarm.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+
+#include "eal_memalloc.h"
+#include "eal_memcfg.h"
+
+#include "malloc_elem.h"
+#include "malloc_mp.h"
+
+#define MP_ACTION_SYNC "mp_malloc_sync"
+/**< request sent by primary process to notify of changes in memory map */
+#define MP_ACTION_ROLLBACK "mp_malloc_rollback"
+/**< request sent by primary process to notify of changes in memory map. this is
+ * essentially a regular sync request, but we cannot send sync requests while
+ * another one is in progress, and we might have to - therefore, we do this as
+ * a separate callback.
+ */
+#define MP_ACTION_REQUEST "mp_malloc_request"
+/**< request sent by secondary process to ask for allocation/deallocation */
+#define MP_ACTION_RESPONSE "mp_malloc_response"
+/**< response sent to secondary process to indicate result of request */
+
+/* forward declarations */
+static int
+handle_sync_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply);
+static int
+handle_rollback_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply);
+
+#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */
+
+/* when we're allocating, we need to store some state to ensure that we can
+ * roll back later
+ */
+struct primary_alloc_req_state {
+ struct malloc_heap *heap;
+ struct rte_memseg **ms;
+ int ms_len;
+ struct malloc_elem *elem;
+ void *map_addr;
+ size_t map_len;
+};
+
+enum req_state {
+ REQ_STATE_INACTIVE = 0,
+ REQ_STATE_ACTIVE,
+ REQ_STATE_COMPLETE
+};
+
+struct mp_request {
+ TAILQ_ENTRY(mp_request) next;
+ struct malloc_mp_req user_req; /**< contents of request */
+ pthread_cond_t cond; /**< variable we use to time out on this request */
+ enum req_state state; /**< indicate status of this request */
+ struct primary_alloc_req_state alloc_state;
+};
+
+/*
+ * We could've used just a single request, but it may be possible for
+ * secondaries to timeout earlier than the primary, and send a new request while
+ * primary is still expecting replies to the old one. Therefore, each new
+ * request will get assigned a new ID, which is how we will distinguish between
+ * expected and unexpected messages.
+ */
+TAILQ_HEAD(mp_request_list, mp_request);
+static struct {
+ struct mp_request_list list;
+ pthread_mutex_t lock;
+} mp_request_list = {
+ .list = TAILQ_HEAD_INITIALIZER(mp_request_list.list),
+ .lock = PTHREAD_MUTEX_INITIALIZER
+};
+
+/**
+ * General workflow is the following:
+ *
+ * Allocation:
+ * S: send request to primary
+ * P: attempt to allocate memory
+ * if failed, sendmsg failure
+ * if success, send sync request
+ * S: if received msg of failure, quit
+ * if received sync request, synchronize memory map and reply with result
+ * P: if received sync request result
+ * if success, sendmsg success
+ * if failure, roll back allocation and send a rollback request
+ * S: if received msg of success, quit
+ * if received rollback request, synchronize memory map and reply with result
+ * P: if received sync request result
+ * sendmsg sync request result
+ * S: if received msg, quit
+ *
+ * Aside from timeouts, there are three points where we can quit:
+ * - if allocation failed straight away
+ * - if allocation and sync request succeeded
+ * - if allocation succeeded, sync request failed, allocation rolled back and
+ * rollback request received (irrespective of whether it succeeded or failed)
+ *
+ * Deallocation:
+ * S: send request to primary
+ * P: attempt to deallocate memory
+ * if failed, sendmsg failure
+ * if success, send sync request
+ * S: if received msg of failure, quit
+ * if received sync request, synchronize memory map and reply with result
+ * P: if received sync request result
+ * sendmsg sync request result
+ * S: if received msg, quit
+ *
+ * There is no "rollback" from deallocation, as it's safe to have some memory
+ * mapped in some processes - it's absent from the heap, so it won't get used.
+ */
+
+static struct mp_request *
+find_request_by_id(uint64_t id)
+{
+ struct mp_request *req;
+ TAILQ_FOREACH(req, &mp_request_list.list, next) {
+ if (req->user_req.id == id)
+ break;
+ }
+ return req;
+}
+
+/* this ID is, like, totally guaranteed to be absolutely unique. pinky swear. */
+static uint64_t
+get_unique_id(void)
+{
+ uint64_t id;
+ do {
+ id = rte_rand();
+ } while (find_request_by_id(id) != NULL);
+ return id;
+}
+
+/* secondary will respond to sync requests thusly */
+static int
+handle_sync(const struct rte_mp_msg *msg, const void *peer)
+{
+ struct rte_mp_msg reply;
+ const struct malloc_mp_req *req =
+ (const struct malloc_mp_req *)msg->param;
+ struct malloc_mp_req *resp =
+ (struct malloc_mp_req *)reply.param;
+ int ret;
+
+ if (req->t != REQ_TYPE_SYNC) {
+ RTE_LOG(ERR, EAL, "Unexpected request from primary\n");
+ return -1;
+ }
+
+ memset(&reply, 0, sizeof(reply));
+
+ reply.num_fds = 0;
+ strlcpy(reply.name, msg->name, sizeof(reply.name));
+ reply.len_param = sizeof(*resp);
+
+ ret = eal_memalloc_sync_with_primary();
+
+ resp->t = REQ_TYPE_SYNC;
+ resp->id = req->id;
+ resp->result = ret == 0 ? REQ_RESULT_SUCCESS : REQ_RESULT_FAIL;
+
+ rte_mp_reply(&reply, peer);
+
+ return 0;
+}
+
+static int
+handle_alloc_request(const struct malloc_mp_req *m,
+ struct mp_request *req)
+{
+ const struct malloc_req_alloc *ar = &m->alloc_req;
+ struct malloc_heap *heap;
+ struct malloc_elem *elem;
+ struct rte_memseg **ms;
+ size_t alloc_sz;
+ int n_segs;
+ void *map_addr;
+
+ alloc_sz = RTE_ALIGN_CEIL(ar->align + ar->elt_size +
+ MALLOC_ELEM_TRAILER_LEN, ar->page_sz);
+ n_segs = alloc_sz / ar->page_sz;
+
+ heap = ar->heap;
+
+ /* we can't know in advance how many pages we'll need, so we malloc */
+ ms = malloc(sizeof(*ms) * n_segs);
+ if (ms == NULL) {
+ RTE_LOG(ERR, EAL, "Couldn't allocate memory for request state\n");
+ goto fail;
+ }
+ memset(ms, 0, sizeof(*ms) * n_segs);
+
+ elem = alloc_pages_on_heap(heap, ar->page_sz, ar->elt_size, ar->socket,
+ ar->flags, ar->align, ar->bound, ar->contig, ms,
+ n_segs);
+
+ if (elem == NULL)
+ goto fail;
+
+ map_addr = ms[0]->addr;
+
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz);
+
+ /* we have succeeded in allocating memory, but we still need to sync
+ * with other processes. however, since DPDK IPC is single-threaded, we
+ * send an asynchronous request and exit this callback.
+ */
+
+ req->alloc_state.ms = ms;
+ req->alloc_state.ms_len = n_segs;
+ req->alloc_state.map_addr = map_addr;
+ req->alloc_state.map_len = alloc_sz;
+ req->alloc_state.elem = elem;
+ req->alloc_state.heap = heap;
+
+ return 0;
+fail:
+ free(ms);
+ return -1;
+}
+
+/* first stage of primary handling requests from secondary */
+static int
+handle_request(const struct rte_mp_msg *msg, const void *peer __rte_unused)
+{
+ const struct malloc_mp_req *m =
+ (const struct malloc_mp_req *)msg->param;
+ struct mp_request *entry;
+ int ret;
+
+ /* lock access to request */
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ /* make sure it's not a dupe */
+ entry = find_request_by_id(m->id);
+ if (entry != NULL) {
+ RTE_LOG(ERR, EAL, "Duplicate request id\n");
+ goto fail;
+ }
+
+ entry = malloc(sizeof(*entry));
+ if (entry == NULL) {
+ RTE_LOG(ERR, EAL, "Unable to allocate memory for request\n");
+ goto fail;
+ }
+
+ /* erase all data */
+ memset(entry, 0, sizeof(*entry));
+
+ if (m->t == REQ_TYPE_ALLOC) {
+ ret = handle_alloc_request(m, entry);
+ } else if (m->t == REQ_TYPE_FREE) {
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+ m->free_req.addr, m->free_req.len);
+
+ ret = malloc_heap_free_pages(m->free_req.addr,
+ m->free_req.len);
+ } else {
+ RTE_LOG(ERR, EAL, "Unexpected request from secondary\n");
+ goto fail;
+ }
+
+ if (ret != 0) {
+ struct rte_mp_msg resp_msg;
+ struct malloc_mp_req *resp =
+ (struct malloc_mp_req *)resp_msg.param;
+
+ /* send failure message straight away */
+ resp_msg.num_fds = 0;
+ resp_msg.len_param = sizeof(*resp);
+ strlcpy(resp_msg.name, MP_ACTION_RESPONSE,
+ sizeof(resp_msg.name));
+
+ resp->t = m->t;
+ resp->result = REQ_RESULT_FAIL;
+ resp->id = m->id;
+
+ if (rte_mp_sendmsg(&resp_msg)) {
+ RTE_LOG(ERR, EAL, "Couldn't send response\n");
+ goto fail;
+ }
+ /* we did not modify the request */
+ free(entry);
+ } else {
+ struct rte_mp_msg sr_msg;
+ struct malloc_mp_req *sr =
+ (struct malloc_mp_req *)sr_msg.param;
+ struct timespec ts;
+
+ memset(&sr_msg, 0, sizeof(sr_msg));
+
+ /* we can do something, so send sync request asynchronously */
+ sr_msg.num_fds = 0;
+ sr_msg.len_param = sizeof(*sr);
+ strlcpy(sr_msg.name, MP_ACTION_SYNC, sizeof(sr_msg.name));
+
+ ts.tv_nsec = 0;
+ ts.tv_sec = MP_TIMEOUT_S;
+
+ /* sync requests carry no data */
+ sr->t = REQ_TYPE_SYNC;
+ sr->id = m->id;
+
+ /* there may be stray timeout still waiting */
+ do {
+ ret = rte_mp_request_async(&sr_msg, &ts,
+ handle_sync_response);
+ } while (ret != 0 && rte_errno == EEXIST);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Couldn't send sync request\n");
+ if (m->t == REQ_TYPE_ALLOC)
+ free(entry->alloc_state.ms);
+ goto fail;
+ }
+
+ /* mark request as in progress */
+ memcpy(&entry->user_req, m, sizeof(*m));
+ entry->state = REQ_STATE_ACTIVE;
+
+ TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
+ }
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return 0;
+fail:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ free(entry);
+ return -1;
+}
+
+/* callback for asynchronous sync requests for primary. this will either do a
+ * sendmsg with results, or trigger rollback request.
+ */
+static int
+handle_sync_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply)
+{
+ enum malloc_req_result result;
+ struct mp_request *entry;
+ const struct malloc_mp_req *mpreq =
+ (const struct malloc_mp_req *)request->param;
+ int i;
+
+ /* lock the request */
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(mpreq->id);
+ if (entry == NULL) {
+ RTE_LOG(ERR, EAL, "Wrong request ID\n");
+ goto fail;
+ }
+
+ result = REQ_RESULT_SUCCESS;
+
+ if (reply->nb_received != reply->nb_sent)
+ result = REQ_RESULT_FAIL;
+
+ for (i = 0; i < reply->nb_received; i++) {
+ struct malloc_mp_req *resp =
+ (struct malloc_mp_req *)reply->msgs[i].param;
+
+ if (resp->t != REQ_TYPE_SYNC) {
+ RTE_LOG(ERR, EAL, "Unexpected response to sync request\n");
+ result = REQ_RESULT_FAIL;
+ break;
+ }
+ if (resp->id != entry->user_req.id) {
+ RTE_LOG(ERR, EAL, "Response to wrong sync request\n");
+ result = REQ_RESULT_FAIL;
+ break;
+ }
+ if (resp->result == REQ_RESULT_FAIL) {
+ result = REQ_RESULT_FAIL;
+ break;
+ }
+ }
+
+ if (entry->user_req.t == REQ_TYPE_FREE) {
+ struct rte_mp_msg msg;
+ struct malloc_mp_req *resp = (struct malloc_mp_req *)msg.param;
+
+ memset(&msg, 0, sizeof(msg));
+
+ /* this is a free request, just sendmsg result */
+ resp->t = REQ_TYPE_FREE;
+ resp->result = result;
+ resp->id = entry->user_req.id;
+ msg.num_fds = 0;
+ msg.len_param = sizeof(*resp);
+ strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
+
+ if (rte_mp_sendmsg(&msg))
+ RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
+
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+ } else if (entry->user_req.t == REQ_TYPE_ALLOC &&
+ result == REQ_RESULT_SUCCESS) {
+ struct malloc_heap *heap = entry->alloc_state.heap;
+ struct rte_mp_msg msg;
+ struct malloc_mp_req *resp =
+ (struct malloc_mp_req *)msg.param;
+
+ memset(&msg, 0, sizeof(msg));
+
+ heap->total_size += entry->alloc_state.map_len;
+
+ /* result is success, so just notify secondary about this */
+ resp->t = REQ_TYPE_ALLOC;
+ resp->result = result;
+ resp->id = entry->user_req.id;
+ msg.num_fds = 0;
+ msg.len_param = sizeof(*resp);
+ strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
+
+ if (rte_mp_sendmsg(&msg))
+ RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
+
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry->alloc_state.ms);
+ free(entry);
+ } else if (entry->user_req.t == REQ_TYPE_ALLOC &&
+ result == REQ_RESULT_FAIL) {
+ struct rte_mp_msg rb_msg;
+ struct malloc_mp_req *rb =
+ (struct malloc_mp_req *)rb_msg.param;
+ struct timespec ts;
+ struct primary_alloc_req_state *state =
+ &entry->alloc_state;
+ int ret;
+
+ memset(&rb_msg, 0, sizeof(rb_msg));
+
+ /* we've failed to sync, so do a rollback */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+ state->map_addr, state->map_len);
+
+ rollback_expand_heap(state->ms, state->ms_len, state->elem,
+ state->map_addr, state->map_len);
+
+ /* send rollback request */
+ rb_msg.num_fds = 0;
+ rb_msg.len_param = sizeof(*rb);
+ strlcpy(rb_msg.name, MP_ACTION_ROLLBACK, sizeof(rb_msg.name));
+
+ ts.tv_nsec = 0;
+ ts.tv_sec = MP_TIMEOUT_S;
+
+ /* sync requests carry no data */
+ rb->t = REQ_TYPE_SYNC;
+ rb->id = entry->user_req.id;
+
+ /* there may be stray timeout still waiting */
+ do {
+ ret = rte_mp_request_async(&rb_msg, &ts,
+ handle_rollback_response);
+ } while (ret != 0 && rte_errno == EEXIST);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Could not send rollback request to secondary process\n");
+
+ /* we couldn't send rollback request, but that's OK -
+ * secondary will time out, and memory has been removed
+ * from heap anyway.
+ */
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(state->ms);
+ free(entry);
+ goto fail;
+ }
+ } else {
+ RTE_LOG(ERR, EAL, " to sync request of unknown type\n");
+ goto fail;
+ }
+
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return 0;
+fail:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return -1;
+}
+
+static int
+handle_rollback_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply __rte_unused)
+{
+ struct rte_mp_msg msg;
+ struct malloc_mp_req *resp = (struct malloc_mp_req *)msg.param;
+ const struct malloc_mp_req *mpreq =
+ (const struct malloc_mp_req *)request->param;
+ struct mp_request *entry;
+
+ /* lock the request */
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ memset(&msg, 0, sizeof(msg));
+
+ entry = find_request_by_id(mpreq->id);
+ if (entry == NULL) {
+ RTE_LOG(ERR, EAL, "Wrong request ID\n");
+ goto fail;
+ }
+
+ if (entry->user_req.t != REQ_TYPE_ALLOC) {
+ RTE_LOG(ERR, EAL, "Unexpected active request\n");
+ goto fail;
+ }
+
+ /* we don't care if rollback succeeded, request still failed */
+ resp->t = REQ_TYPE_ALLOC;
+ resp->result = REQ_RESULT_FAIL;
+ resp->id = mpreq->id;
+ msg.num_fds = 0;
+ msg.len_param = sizeof(*resp);
+ strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
+
+ if (rte_mp_sendmsg(&msg))
+ RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
+
+ /* clean up */
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry->alloc_state.ms);
+ free(entry);
+
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return 0;
+fail:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return -1;
+}
+
+/* final stage of the request from secondary */
+static int
+handle_response(const struct rte_mp_msg *msg, const void *peer __rte_unused)
+{
+ const struct malloc_mp_req *m =
+ (const struct malloc_mp_req *)msg->param;
+ struct mp_request *entry;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(m->id);
+ if (entry != NULL) {
+ /* update request status */
+ entry->user_req.result = m->result;
+
+ entry->state = REQ_STATE_COMPLETE;
+
+ /* trigger thread wakeup */
+ pthread_cond_signal(&entry->cond);
+ }
+
+ pthread_mutex_unlock(&mp_request_list.lock);
+
+ return 0;
+}
+
+/* synchronously request memory map sync, this is only called whenever primary
+ * process initiates the allocation.
+ */
+int
+request_sync(void)
+{
+ struct rte_mp_msg msg;
+ struct rte_mp_reply reply;
+ struct malloc_mp_req *req = (struct malloc_mp_req *)msg.param;
+ struct timespec ts;
+ int i, ret = -1;
+
+ memset(&msg, 0, sizeof(msg));
+ memset(&reply, 0, sizeof(reply));
+
+ /* no need to create tailq entries as this is entirely synchronous */
+
+ msg.num_fds = 0;
+ msg.len_param = sizeof(*req);
+ strlcpy(msg.name, MP_ACTION_SYNC, sizeof(msg.name));
+
+ /* sync request carries no data */
+ req->t = REQ_TYPE_SYNC;
+ req->id = get_unique_id();
+
+ ts.tv_nsec = 0;
+ ts.tv_sec = MP_TIMEOUT_S;
+
+ /* there may be stray timeout still waiting */
+ do {
+ ret = rte_mp_request_sync(&msg, &reply, &ts);
+ } while (ret != 0 && rte_errno == EEXIST);
+ if (ret != 0) {
+ /* if IPC is unsupported, behave as if the call succeeded */
+ if (rte_errno != ENOTSUP)
+ RTE_LOG(ERR, EAL, "Could not send sync request to secondary process\n");
+ else
+ ret = 0;
+ goto out;
+ }
+
+ if (reply.nb_received != reply.nb_sent) {
+ RTE_LOG(ERR, EAL, "Not all secondaries have responded\n");
+ goto out;
+ }
+
+ for (i = 0; i < reply.nb_received; i++) {
+ struct malloc_mp_req *resp =
+ (struct malloc_mp_req *)reply.msgs[i].param;
+ if (resp->t != REQ_TYPE_SYNC) {
+ RTE_LOG(ERR, EAL, "Unexpected response from secondary\n");
+ goto out;
+ }
+ if (resp->id != req->id) {
+ RTE_LOG(ERR, EAL, "Wrong request ID\n");
+ goto out;
+ }
+ if (resp->result != REQ_RESULT_SUCCESS) {
+ RTE_LOG(ERR, EAL, "Secondary process failed to synchronize\n");
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ free(reply.msgs);
+ return ret;
+}
+
+/* this is a synchronous wrapper around a bunch of asynchronous requests to
+ * primary process. this will initiate a request and wait until responses come.
+ */
+int
+request_to_primary(struct malloc_mp_req *user_req)
+{
+ struct rte_mp_msg msg;
+ struct malloc_mp_req *msg_req = (struct malloc_mp_req *)msg.param;
+ struct mp_request *entry;
+ struct timespec ts;
+ struct timeval now;
+ int ret;
+
+ memset(&msg, 0, sizeof(msg));
+ memset(&ts, 0, sizeof(ts));
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = malloc(sizeof(*entry));
+ if (entry == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memory for request\n");
+ goto fail;
+ }
+
+ memset(entry, 0, sizeof(*entry));
+
+ if (gettimeofday(&now, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot get current time\n");
+ goto fail;
+ }
+
+ ts.tv_nsec = (now.tv_usec * 1000) % 1000000000;
+ ts.tv_sec = now.tv_sec + MP_TIMEOUT_S +
+ (now.tv_usec * 1000) / 1000000000;
+
+ /* initialize the request */
+ pthread_cond_init(&entry->cond, NULL);
+
+ msg.num_fds = 0;
+ msg.len_param = sizeof(*msg_req);
+ strlcpy(msg.name, MP_ACTION_REQUEST, sizeof(msg.name));
+
+ /* (attempt to) get a unique id */
+ user_req->id = get_unique_id();
+
+ /* copy contents of user request into the message */
+ memcpy(msg_req, user_req, sizeof(*msg_req));
+
+ if (rte_mp_sendmsg(&msg)) {
+ RTE_LOG(ERR, EAL, "Cannot send message to primary\n");
+ goto fail;
+ }
+
+ /* copy contents of user request into active request */
+ memcpy(&entry->user_req, user_req, sizeof(*user_req));
+
+ /* mark request as in progress */
+ entry->state = REQ_STATE_ACTIVE;
+
+ TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
+
+ /* finally, wait on timeout */
+ do {
+ ret = pthread_cond_timedwait(&entry->cond,
+ &mp_request_list.lock, &ts);
+ } while (ret != 0 && ret != ETIMEDOUT);
+
+ if (entry->state != REQ_STATE_COMPLETE) {
+ RTE_LOG(ERR, EAL, "Request timed out\n");
+ ret = -1;
+ } else {
+ ret = 0;
+ user_req->result = entry->user_req.result;
+ }
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return ret;
+fail:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ free(entry);
+ return -1;
+}
+
+int
+register_mp_requests(void)
+{
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* it's OK for primary to not support IPC */
+ if (rte_mp_action_register(MP_ACTION_REQUEST, handle_request) &&
+ rte_errno != ENOTSUP) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ MP_ACTION_REQUEST);
+ return -1;
+ }
+ } else {
+ if (rte_mp_action_register(MP_ACTION_SYNC, handle_sync)) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ MP_ACTION_SYNC);
+ return -1;
+ }
+ if (rte_mp_action_register(MP_ACTION_ROLLBACK, handle_sync)) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ MP_ACTION_SYNC);
+ return -1;
+ }
+ if (rte_mp_action_register(MP_ACTION_RESPONSE,
+ handle_response)) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ MP_ACTION_RESPONSE);
+ return -1;
+ }
+ }
+ return 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/malloc_mp.h b/src/spdk/dpdk/lib/librte_eal/common/malloc_mp.h
new file mode 100644
index 000000000..2b86b76f6
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/malloc_mp.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef MALLOC_MP_H
+#define MALLOC_MP_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <rte_common.h>
+#include <rte_random.h>
+#include <rte_spinlock.h>
+#include <rte_tailq.h>
+
+/* forward declarations */
+struct malloc_heap;
+struct rte_memseg;
+
+/* multiprocess synchronization structures for malloc */
+enum malloc_req_type {
+ REQ_TYPE_ALLOC, /**< ask primary to allocate */
+ REQ_TYPE_FREE, /**< ask primary to free */
+ REQ_TYPE_SYNC /**< ask secondary to synchronize its memory map */
+};
+
+enum malloc_req_result {
+ REQ_RESULT_SUCCESS,
+ REQ_RESULT_FAIL
+};
+
+struct malloc_req_alloc {
+ struct malloc_heap *heap;
+ uint64_t page_sz;
+ size_t elt_size;
+ int socket;
+ unsigned int flags;
+ size_t align;
+ size_t bound;
+ bool contig;
+};
+
+struct malloc_req_free {
+ RTE_STD_C11
+ union {
+ void *addr;
+ uint64_t addr_64;
+ };
+ uint64_t len;
+};
+
+struct malloc_mp_req {
+ enum malloc_req_type t;
+ RTE_STD_C11
+ union {
+ struct malloc_req_alloc alloc_req;
+ struct malloc_req_free free_req;
+ };
+ uint64_t id; /**< not to be populated by caller */
+ enum malloc_req_result result;
+};
+
+int
+register_mp_requests(void);
+
+int
+request_to_primary(struct malloc_mp_req *req);
+
+/* synchronous memory map sync request */
+int
+request_sync(void);
+
+/* functions from malloc_heap exposed here */
+int
+malloc_heap_free_pages(void *aligned_start, size_t aligned_len);
+
+struct malloc_elem *
+alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
+ int socket, unsigned int flags, size_t align, size_t bound,
+ bool contig, struct rte_memseg **ms, int n_segs);
+
+void
+rollback_expand_heap(struct rte_memseg **ms, int n_segs,
+ struct malloc_elem *elem, void *map_addr, size_t map_len);
+
+#endif /* MALLOC_MP_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/meson.build b/src/spdk/dpdk/lib/librte_eal/common/meson.build
new file mode 100644
index 000000000..55aaeb18e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/meson.build
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation
+
+includes += include_directories('.')
+
+if is_windows
+ sources += files(
+ 'eal_common_bus.c',
+ 'eal_common_class.c',
+ 'eal_common_devargs.c',
+ 'eal_common_errno.c',
+ 'eal_common_launch.c',
+ 'eal_common_lcore.c',
+ 'eal_common_log.c',
+ 'eal_common_options.c',
+ 'eal_common_thread.c',
+ )
+ subdir_done()
+endif
+
+sources += files(
+ 'eal_common_bus.c',
+ 'eal_common_cpuflags.c',
+ 'eal_common_class.c',
+ 'eal_common_devargs.c',
+ 'eal_common_dev.c',
+ 'eal_common_errno.c',
+ 'eal_common_fbarray.c',
+ 'eal_common_hexdump.c',
+ 'eal_common_hypervisor.c',
+ 'eal_common_launch.c',
+ 'eal_common_lcore.c',
+ 'eal_common_log.c',
+ 'eal_common_mcfg.c',
+ 'eal_common_memalloc.c',
+ 'eal_common_memory.c',
+ 'eal_common_memzone.c',
+ 'eal_common_options.c',
+ 'eal_common_proc.c',
+ 'eal_common_string_fns.c',
+ 'eal_common_tailqs.c',
+ 'eal_common_thread.c',
+ 'eal_common_timer.c',
+ 'eal_common_trace.c',
+ 'eal_common_trace_ctf.c',
+ 'eal_common_trace_points.c',
+ 'eal_common_trace_utils.c',
+ 'eal_common_uuid.c',
+ 'hotplug_mp.c',
+ 'malloc_elem.c',
+ 'malloc_heap.c',
+ 'malloc_mp.c',
+ 'rte_keepalive.c',
+ 'rte_malloc.c',
+ 'rte_random.c',
+ 'rte_reciprocal.c',
+ 'rte_service.c',
+)
diff --git a/src/spdk/dpdk/lib/librte_eal/common/rte_keepalive.c b/src/spdk/dpdk/lib/librte_eal/common/rte_keepalive.c
new file mode 100644
index 000000000..e0494b201
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/rte_keepalive.c
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015-2016 Intel Corporation
+ */
+
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_keepalive.h>
+#include <rte_malloc.h>
+
+struct rte_keepalive {
+ /** Core Liveness. */
+ struct {
+ /*
+ * Each element must be cache aligned to prevent false sharing.
+ */
+ enum rte_keepalive_state core_state __rte_cache_aligned;
+ } live_data[RTE_KEEPALIVE_MAXCORES];
+
+ /** Last-seen-alive timestamps */
+ uint64_t last_alive[RTE_KEEPALIVE_MAXCORES];
+
+ /**
+ * Cores to check.
+ * Indexed by core id, non-zero if the core should be checked.
+ */
+ uint8_t active_cores[RTE_KEEPALIVE_MAXCORES];
+
+ /** Dead core handler. */
+ rte_keepalive_failure_callback_t callback;
+
+ /**
+ * Dead core handler app data.
+ * Pointer is passed to dead core handler.
+ */
+ void *callback_data;
+ uint64_t tsc_initial;
+ uint64_t tsc_mhz;
+
+ /** Core state relay handler. */
+ rte_keepalive_relay_callback_t relay_callback;
+
+ /**
+ * Core state relay handler app data.
+ * Pointer is passed to live core handler.
+ */
+ void *relay_callback_data;
+};
+
+static void
+print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core)
+{
+ RTE_LOG(INFO, EAL, "%sLast seen %" PRId64 "ms ago.\n",
+ msg,
+ ((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)
+ / rte_get_tsc_hz()
+ );
+}
+
+void
+rte_keepalive_dispatch_pings(__rte_unused void *ptr_timer,
+ void *ptr_data)
+{
+ struct rte_keepalive *keepcfg = ptr_data;
+ int idx_core;
+
+ for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
+ if (keepcfg->active_cores[idx_core] == 0)
+ continue;
+
+ switch (keepcfg->live_data[idx_core].core_state) {
+ case RTE_KA_STATE_UNUSED:
+ break;
+ case RTE_KA_STATE_ALIVE: /* Alive */
+ keepcfg->live_data[idx_core].core_state =
+ RTE_KA_STATE_MISSING;
+ keepcfg->last_alive[idx_core] = rte_rdtsc();
+ break;
+ case RTE_KA_STATE_MISSING: /* MIA */
+ print_trace("Core MIA. ", keepcfg, idx_core);
+ keepcfg->live_data[idx_core].core_state =
+ RTE_KA_STATE_DEAD;
+ break;
+ case RTE_KA_STATE_DEAD: /* Dead */
+ keepcfg->live_data[idx_core].core_state =
+ RTE_KA_STATE_GONE;
+ print_trace("Core died. ", keepcfg, idx_core);
+ if (keepcfg->callback)
+ keepcfg->callback(
+ keepcfg->callback_data,
+ idx_core
+ );
+ break;
+ case RTE_KA_STATE_GONE: /* Buried */
+ break;
+ case RTE_KA_STATE_DOZING: /* Core going idle */
+ keepcfg->live_data[idx_core].core_state =
+ RTE_KA_STATE_SLEEP;
+ keepcfg->last_alive[idx_core] = rte_rdtsc();
+ break;
+ case RTE_KA_STATE_SLEEP: /* Idled core */
+ break;
+ }
+ if (keepcfg->relay_callback)
+ keepcfg->relay_callback(
+ keepcfg->relay_callback_data,
+ idx_core,
+ keepcfg->live_data[idx_core].core_state,
+ keepcfg->last_alive[idx_core]
+ );
+ }
+}
+
+struct rte_keepalive *
+rte_keepalive_create(rte_keepalive_failure_callback_t callback,
+ void *data)
+{
+ struct rte_keepalive *keepcfg;
+
+ keepcfg = rte_zmalloc("RTE_EAL_KEEPALIVE",
+ sizeof(struct rte_keepalive),
+ RTE_CACHE_LINE_SIZE);
+ if (keepcfg != NULL) {
+ keepcfg->callback = callback;
+ keepcfg->callback_data = data;
+ keepcfg->tsc_initial = rte_rdtsc();
+ keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000;
+ }
+ return keepcfg;
+}
+
+void rte_keepalive_register_relay_callback(struct rte_keepalive *keepcfg,
+ rte_keepalive_relay_callback_t callback,
+ void *data)
+{
+ keepcfg->relay_callback = callback;
+ keepcfg->relay_callback_data = data;
+}
+
+void
+rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core)
+{
+ if (id_core < RTE_KEEPALIVE_MAXCORES) {
+ keepcfg->active_cores[id_core] = RTE_KA_STATE_ALIVE;
+ keepcfg->last_alive[id_core] = rte_rdtsc();
+ }
+}
+
+void
+rte_keepalive_mark_alive(struct rte_keepalive *keepcfg)
+{
+ keepcfg->live_data[rte_lcore_id()].core_state = RTE_KA_STATE_ALIVE;
+}
+
+void
+rte_keepalive_mark_sleep(struct rte_keepalive *keepcfg)
+{
+ keepcfg->live_data[rte_lcore_id()].core_state = RTE_KA_STATE_DOZING;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/rte_malloc.c b/src/spdk/dpdk/lib/librte_eal/common/rte_malloc.c
new file mode 100644
index 000000000..f1b73168b
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/rte_malloc.c
@@ -0,0 +1,668 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2019 Intel Corporation
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_errno.h>
+#include <rte_memcpy.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_branch_prediction.h>
+#include <rte_debug.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_spinlock.h>
+#include <rte_eal_trace.h>
+
+#include <rte_malloc.h>
+#include "malloc_elem.h"
+#include "malloc_heap.h"
+#include "eal_memalloc.h"
+#include "eal_memcfg.h"
+#include "eal_private.h"
+
+
+/* Free the memory space back to heap */
+static void
+mem_free(void *addr, const bool trace_ena)
+{
+ if (trace_ena)
+ rte_eal_trace_mem_free(addr);
+
+ if (addr == NULL) return;
+ if (malloc_heap_free(malloc_elem_from_data(addr)) < 0)
+ RTE_LOG(ERR, EAL, "Error: Invalid memory\n");
+}
+
+void
+rte_free(void *addr)
+{
+ return mem_free(addr, true);
+}
+
+void
+eal_free_no_trace(void *addr)
+{
+ return mem_free(addr, false);
+}
+
+static void *
+malloc_socket(const char *type, size_t size, unsigned int align,
+ int socket_arg, const bool trace_ena)
+{
+ void *ptr;
+
+ /* return NULL if size is 0 or alignment is not power-of-2 */
+ if (size == 0 || (align && !rte_is_power_of_2(align)))
+ return NULL;
+
+ /* if there are no hugepages and if we are not allocating from an
+ * external heap, use memory from any socket available. checking for
+ * socket being external may return -1 in case of invalid socket, but
+ * that's OK - if there are no hugepages, it doesn't matter.
+ */
+ if (rte_malloc_heap_socket_is_external(socket_arg) != 1 &&
+ !rte_eal_has_hugepages())
+ socket_arg = SOCKET_ID_ANY;
+
+ ptr = malloc_heap_alloc(type, size, socket_arg, 0,
+ align == 0 ? 1 : align, 0, false);
+
+ if (trace_ena)
+ rte_eal_trace_mem_malloc(type, size, align, socket_arg, ptr);
+ return ptr;
+}
+
+/*
+ * Allocate memory on specified heap.
+ */
+void *
+rte_malloc_socket(const char *type, size_t size, unsigned int align,
+ int socket_arg)
+{
+ return malloc_socket(type, size, align, socket_arg, true);
+}
+
+void *
+eal_malloc_no_trace(const char *type, size_t size, unsigned int align)
+{
+ return malloc_socket(type, size, align, SOCKET_ID_ANY, false);
+}
+
+/*
+ * Allocate memory on default heap.
+ */
+void *
+rte_malloc(const char *type, size_t size, unsigned align)
+{
+ return rte_malloc_socket(type, size, align, SOCKET_ID_ANY);
+}
+
+/*
+ * Allocate zero'd memory on specified heap.
+ */
+void *
+rte_zmalloc_socket(const char *type, size_t size, unsigned align, int socket)
+{
+ void *ptr = rte_malloc_socket(type, size, align, socket);
+
+#ifdef RTE_MALLOC_DEBUG
+ /*
+ * If DEBUG is enabled, then freed memory is marked with poison
+ * value and set to zero on allocation.
+ * If DEBUG is not enabled then memory is already zeroed.
+ */
+ if (ptr != NULL)
+ memset(ptr, 0, size);
+#endif
+
+ rte_eal_trace_mem_zmalloc(type, size, align, socket, ptr);
+ return ptr;
+}
+
+/*
+ * Allocate zero'd memory on default heap.
+ */
+void *
+rte_zmalloc(const char *type, size_t size, unsigned align)
+{
+ return rte_zmalloc_socket(type, size, align, SOCKET_ID_ANY);
+}
+
+/*
+ * Allocate zero'd memory on specified heap.
+ */
+void *
+rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int socket)
+{
+ return rte_zmalloc_socket(type, num * size, align, socket);
+}
+
+/*
+ * Allocate zero'd memory on default heap.
+ */
+void *
+rte_calloc(const char *type, size_t num, size_t size, unsigned align)
+{
+ return rte_zmalloc(type, num * size, align);
+}
+
+/*
+ * Resize allocated memory on specified heap.
+ */
+void *
+rte_realloc_socket(void *ptr, size_t size, unsigned int align, int socket)
+{
+ if (ptr == NULL)
+ return rte_malloc_socket(NULL, size, align, socket);
+
+ struct malloc_elem *elem = malloc_elem_from_data(ptr);
+ if (elem == NULL) {
+ RTE_LOG(ERR, EAL, "Error: memory corruption detected\n");
+ return NULL;
+ }
+
+ size = RTE_CACHE_LINE_ROUNDUP(size), align = RTE_CACHE_LINE_ROUNDUP(align);
+
+ /* check requested socket id and alignment matches first, and if ok,
+ * see if we can resize block
+ */
+ if ((socket == SOCKET_ID_ANY ||
+ (unsigned int)socket == elem->heap->socket_id) &&
+ RTE_PTR_ALIGN(ptr, align) == ptr &&
+ malloc_heap_resize(elem, size) == 0) {
+ rte_eal_trace_mem_realloc(size, align, socket, ptr);
+ return ptr;
+ }
+
+ /* either requested socket id doesn't match, alignment is off
+ * or we have no room to expand,
+ * so move the data.
+ */
+ void *new_ptr = rte_malloc_socket(NULL, size, align, socket);
+ if (new_ptr == NULL)
+ return NULL;
+ /* elem: |pad|data_elem|data|trailer| */
+ const size_t old_size = elem->size - elem->pad - MALLOC_ELEM_OVERHEAD;
+ rte_memcpy(new_ptr, ptr, old_size < size ? old_size : size);
+ rte_free(ptr);
+
+ rte_eal_trace_mem_realloc(size, align, socket, new_ptr);
+ return new_ptr;
+}
+
+/*
+ * Resize allocated memory.
+ */
+void *
+rte_realloc(void *ptr, size_t size, unsigned int align)
+{
+ return rte_realloc_socket(ptr, size, align, SOCKET_ID_ANY);
+}
+
+int
+rte_malloc_validate(const void *ptr, size_t *size)
+{
+ const struct malloc_elem *elem = malloc_elem_from_data(ptr);
+ if (!malloc_elem_cookies_ok(elem))
+ return -1;
+ if (size != NULL)
+ *size = elem->size - elem->pad - MALLOC_ELEM_OVERHEAD;
+ return 0;
+}
+
+/*
+ * Function to retrieve data for heap on given socket
+ */
+int
+rte_malloc_get_socket_stats(int socket,
+ struct rte_malloc_socket_stats *socket_stats)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int heap_idx;
+
+ heap_idx = malloc_socket_to_heap_id(socket);
+ if (heap_idx < 0)
+ return -1;
+
+ return malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx],
+ socket_stats);
+}
+
+/*
+ * Function to dump contents of all heaps
+ */
+void
+rte_malloc_dump_heaps(FILE *f)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int idx;
+
+ for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+ fprintf(f, "Heap id: %u\n", idx);
+ malloc_heap_dump(&mcfg->malloc_heaps[idx], f);
+ }
+}
+
+int
+rte_malloc_heap_get_socket(const char *name)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap = NULL;
+ unsigned int idx;
+ int ret;
+
+ if (name == NULL ||
+ strnlen(name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+ strnlen(name, RTE_HEAP_NAME_MAX_LEN) ==
+ RTE_HEAP_NAME_MAX_LEN) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_mcfg_mem_read_lock();
+ for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+ struct malloc_heap *tmp = &mcfg->malloc_heaps[idx];
+
+ if (!strncmp(name, tmp->name, RTE_HEAP_NAME_MAX_LEN)) {
+ heap = tmp;
+ break;
+ }
+ }
+
+ if (heap != NULL) {
+ ret = heap->socket_id;
+ } else {
+ rte_errno = ENOENT;
+ ret = -1;
+ }
+ rte_mcfg_mem_read_unlock();
+
+ return ret;
+}
+
+int
+rte_malloc_heap_socket_is_external(int socket_id)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int idx;
+ int ret = -1;
+
+ if (socket_id == SOCKET_ID_ANY)
+ return 0;
+
+ rte_mcfg_mem_read_lock();
+ for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+ struct malloc_heap *tmp = &mcfg->malloc_heaps[idx];
+
+ if ((int)tmp->socket_id == socket_id) {
+ /* external memory always has large socket ID's */
+ ret = tmp->socket_id >= RTE_MAX_NUMA_NODES;
+ break;
+ }
+ }
+ rte_mcfg_mem_read_unlock();
+
+ return ret;
+}
+
+/*
+ * Print stats on memory type. If type is NULL, info on all types is printed
+ */
+void
+rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int heap_id;
+ struct rte_malloc_socket_stats sock_stats;
+
+ /* Iterate through all initialised heaps */
+ for (heap_id = 0; heap_id < RTE_MAX_HEAPS; heap_id++) {
+ struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
+
+ malloc_heap_get_stats(heap, &sock_stats);
+
+ fprintf(f, "Heap id:%u\n", heap_id);
+ fprintf(f, "\tHeap name:%s\n", heap->name);
+ fprintf(f, "\tHeap_size:%zu,\n", sock_stats.heap_totalsz_bytes);
+ fprintf(f, "\tFree_size:%zu,\n", sock_stats.heap_freesz_bytes);
+ fprintf(f, "\tAlloc_size:%zu,\n", sock_stats.heap_allocsz_bytes);
+ fprintf(f, "\tGreatest_free_size:%zu,\n",
+ sock_stats.greatest_free_size);
+ fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count);
+ fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count);
+ }
+ return;
+}
+
+/*
+ * TODO: Set limit to memory that can be allocated to memory type
+ */
+int
+rte_malloc_set_limit(__rte_unused const char *type,
+ __rte_unused size_t max)
+{
+ return 0;
+}
+
+/*
+ * Return the IO address of a virtual address obtained through rte_malloc
+ */
+rte_iova_t
+rte_malloc_virt2iova(const void *addr)
+{
+ const struct rte_memseg *ms;
+ struct malloc_elem *elem = malloc_elem_from_data(addr);
+
+ if (elem == NULL)
+ return RTE_BAD_IOVA;
+
+ if (!elem->msl->external && rte_eal_iova_mode() == RTE_IOVA_VA)
+ return (uintptr_t) addr;
+
+ ms = rte_mem_virt2memseg(addr, elem->msl);
+ if (ms == NULL)
+ return RTE_BAD_IOVA;
+
+ if (ms->iova == RTE_BAD_IOVA)
+ return RTE_BAD_IOVA;
+
+ return ms->iova + RTE_PTR_DIFF(addr, ms->addr);
+}
+
+static struct malloc_heap *
+find_named_heap(const char *name)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int i;
+
+ for (i = 0; i < RTE_MAX_HEAPS; i++) {
+ struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+
+ if (!strncmp(name, heap->name, RTE_HEAP_NAME_MAX_LEN))
+ return heap;
+ }
+ return NULL;
+}
+
+int
+rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len,
+ rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz)
+{
+ struct malloc_heap *heap = NULL;
+ struct rte_memseg_list *msl;
+ unsigned int n;
+ int ret;
+
+ if (heap_name == NULL || va_addr == NULL ||
+ page_sz == 0 || !rte_is_power_of_2(page_sz) ||
+ RTE_ALIGN(len, page_sz) != len ||
+ !rte_is_aligned(va_addr, page_sz) ||
+ ((len / page_sz) != n_pages && iova_addrs != NULL) ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+ RTE_HEAP_NAME_MAX_LEN) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_mcfg_mem_write_lock();
+
+ /* find our heap */
+ heap = find_named_heap(heap_name);
+ if (heap == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+ /* cannot add memory to internal heaps */
+ rte_errno = EPERM;
+ ret = -1;
+ goto unlock;
+ }
+ n = len / page_sz;
+
+ msl = malloc_heap_create_external_seg(va_addr, iova_addrs, n, page_sz,
+ heap_name, heap->socket_id);
+ if (msl == NULL) {
+ ret = -1;
+ goto unlock;
+ }
+
+ rte_spinlock_lock(&heap->lock);
+ ret = malloc_heap_add_external_memory(heap, msl);
+ msl->heap = 1; /* mark it as heap segment */
+ rte_spinlock_unlock(&heap->lock);
+
+unlock:
+ rte_mcfg_mem_write_unlock();
+
+ return ret;
+}
+
+int
+rte_malloc_heap_memory_remove(const char *heap_name, void *va_addr, size_t len)
+{
+ struct malloc_heap *heap = NULL;
+ struct rte_memseg_list *msl;
+ int ret;
+
+ if (heap_name == NULL || va_addr == NULL || len == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+ RTE_HEAP_NAME_MAX_LEN) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_mcfg_mem_write_lock();
+ /* find our heap */
+ heap = find_named_heap(heap_name);
+ if (heap == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+ /* cannot remove memory from internal heaps */
+ rte_errno = EPERM;
+ ret = -1;
+ goto unlock;
+ }
+
+ msl = malloc_heap_find_external_seg(va_addr, len);
+ if (msl == NULL) {
+ ret = -1;
+ goto unlock;
+ }
+
+ rte_spinlock_lock(&heap->lock);
+ ret = malloc_heap_remove_external_memory(heap, va_addr, len);
+ rte_spinlock_unlock(&heap->lock);
+ if (ret != 0)
+ goto unlock;
+
+ ret = malloc_heap_destroy_external_seg(msl);
+
+unlock:
+ rte_mcfg_mem_write_unlock();
+
+ return ret;
+}
+
+static int
+sync_memory(const char *heap_name, void *va_addr, size_t len, bool attach)
+{
+ struct malloc_heap *heap = NULL;
+ struct rte_memseg_list *msl;
+ int ret;
+
+ if (heap_name == NULL || va_addr == NULL || len == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+ RTE_HEAP_NAME_MAX_LEN) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_mcfg_mem_read_lock();
+
+ /* find our heap */
+ heap = find_named_heap(heap_name);
+ if (heap == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ /* we shouldn't be able to sync to internal heaps */
+ if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+ rte_errno = EPERM;
+ ret = -1;
+ goto unlock;
+ }
+
+ /* find corresponding memseg list to sync to */
+ msl = malloc_heap_find_external_seg(va_addr, len);
+ if (msl == NULL) {
+ ret = -1;
+ goto unlock;
+ }
+
+ if (attach) {
+ ret = rte_fbarray_attach(&msl->memseg_arr);
+ if (ret == 0) {
+ /* notify all subscribers that a new memory area was
+ * added.
+ */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
+ va_addr, len);
+ } else {
+ ret = -1;
+ goto unlock;
+ }
+ } else {
+ /* notify all subscribers that a memory area is about to
+ * be removed.
+ */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+ msl->base_va, msl->len);
+ ret = rte_fbarray_detach(&msl->memseg_arr);
+ if (ret < 0) {
+ ret = -1;
+ goto unlock;
+ }
+ }
+unlock:
+ rte_mcfg_mem_read_unlock();
+ return ret;
+}
+
+int
+rte_malloc_heap_memory_attach(const char *heap_name, void *va_addr, size_t len)
+{
+ return sync_memory(heap_name, va_addr, len, true);
+}
+
+int
+rte_malloc_heap_memory_detach(const char *heap_name, void *va_addr, size_t len)
+{
+ return sync_memory(heap_name, va_addr, len, false);
+}
+
+int
+rte_malloc_heap_create(const char *heap_name)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap = NULL;
+ int i, ret;
+
+ if (heap_name == NULL ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+ RTE_HEAP_NAME_MAX_LEN) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ /* check if there is space in the heap list, or if heap with this name
+ * already exists.
+ */
+ rte_mcfg_mem_write_lock();
+
+ for (i = 0; i < RTE_MAX_HEAPS; i++) {
+ struct malloc_heap *tmp = &mcfg->malloc_heaps[i];
+ /* existing heap */
+ if (strncmp(heap_name, tmp->name,
+ RTE_HEAP_NAME_MAX_LEN) == 0) {
+ RTE_LOG(ERR, EAL, "Heap %s already exists\n",
+ heap_name);
+ rte_errno = EEXIST;
+ ret = -1;
+ goto unlock;
+ }
+ /* empty heap */
+ if (strnlen(tmp->name, RTE_HEAP_NAME_MAX_LEN) == 0) {
+ heap = tmp;
+ break;
+ }
+ }
+ if (heap == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot create new heap: no space\n");
+ rte_errno = ENOSPC;
+ ret = -1;
+ goto unlock;
+ }
+
+ /* we're sure that we can create a new heap, so do it */
+ ret = malloc_heap_create(heap, heap_name);
+unlock:
+ rte_mcfg_mem_write_unlock();
+
+ return ret;
+}
+
+int
+rte_malloc_heap_destroy(const char *heap_name)
+{
+ struct malloc_heap *heap = NULL;
+ int ret;
+
+ if (heap_name == NULL ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+ RTE_HEAP_NAME_MAX_LEN) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_mcfg_mem_write_lock();
+
+ /* start from non-socket heaps */
+ heap = find_named_heap(heap_name);
+ if (heap == NULL) {
+ RTE_LOG(ERR, EAL, "Heap %s not found\n", heap_name);
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ /* we shouldn't be able to destroy internal heaps */
+ if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+ rte_errno = EPERM;
+ ret = -1;
+ goto unlock;
+ }
+ /* sanity checks done, now we can destroy the heap */
+ rte_spinlock_lock(&heap->lock);
+ ret = malloc_heap_destroy(heap);
+
+ /* if we failed, lock is still active */
+ if (ret < 0)
+ rte_spinlock_unlock(&heap->lock);
+unlock:
+ rte_mcfg_mem_write_unlock();
+
+ return ret;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/rte_random.c b/src/spdk/dpdk/lib/librte_eal/common/rte_random.c
new file mode 100644
index 000000000..b7a089ac4
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/rte_random.c
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Ericsson AB
+ */
+
+#ifdef RTE_MACHINE_CPUFLAG_RDSEED
+#include <x86intrin.h>
+#endif
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <rte_branch_prediction.h>
+#include <rte_cycles.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_random.h>
+
+struct rte_rand_state {
+ uint64_t z1;
+ uint64_t z2;
+ uint64_t z3;
+ uint64_t z4;
+ uint64_t z5;
+} __rte_cache_aligned;
+
+static struct rte_rand_state rand_states[RTE_MAX_LCORE];
+
+static uint32_t
+__rte_rand_lcg32(uint32_t *seed)
+{
+ *seed = 1103515245U * *seed + 12345U;
+
+ return *seed;
+}
+
+static uint64_t
+__rte_rand_lcg64(uint32_t *seed)
+{
+ uint64_t low;
+ uint64_t high;
+
+ /* A 64-bit LCG would have been much cleaner, but good
+ * multiplier/increments for such seem hard to come by.
+ */
+
+ low = __rte_rand_lcg32(seed);
+ high = __rte_rand_lcg32(seed);
+
+ return low | (high << 32);
+}
+
+static uint64_t
+__rte_rand_lfsr258_gen_seed(uint32_t *seed, uint64_t min_value)
+{
+ uint64_t res;
+
+ res = __rte_rand_lcg64(seed);
+
+ if (res < min_value)
+ res += min_value;
+
+ return res;
+}
+
+static void
+__rte_srand_lfsr258(uint64_t seed, struct rte_rand_state *state)
+{
+ uint32_t lcg_seed;
+
+ lcg_seed = (uint32_t)(seed ^ (seed >> 32));
+
+ state->z1 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 2UL);
+ state->z2 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 512UL);
+ state->z3 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 4096UL);
+ state->z4 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 131072UL);
+ state->z5 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 8388608UL);
+}
+
+void
+rte_srand(uint64_t seed)
+{
+ unsigned int lcore_id;
+
+ /* add lcore_id to seed to avoid having the same sequence */
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+ __rte_srand_lfsr258(seed + lcore_id, &rand_states[lcore_id]);
+}
+
+static __rte_always_inline uint64_t
+__rte_rand_lfsr258_comp(uint64_t z, uint64_t a, uint64_t b, uint64_t c,
+ uint64_t d)
+{
+ return ((z & c) << d) ^ (((z << a) ^ z) >> b);
+}
+
+/* Based on L’Ecuyer, P.: Tables of maximally equidistributed combined
+ * LFSR generators.
+ */
+
+static __rte_always_inline uint64_t
+__rte_rand_lfsr258(struct rte_rand_state *state)
+{
+ state->z1 = __rte_rand_lfsr258_comp(state->z1, 1UL, 53UL,
+ 18446744073709551614UL, 10UL);
+ state->z2 = __rte_rand_lfsr258_comp(state->z2, 24UL, 50UL,
+ 18446744073709551104UL, 5UL);
+ state->z3 = __rte_rand_lfsr258_comp(state->z3, 3UL, 23UL,
+ 18446744073709547520UL, 29UL);
+ state->z4 = __rte_rand_lfsr258_comp(state->z4, 5UL, 24UL,
+ 18446744073709420544UL, 23UL);
+ state->z5 = __rte_rand_lfsr258_comp(state->z5, 3UL, 33UL,
+ 18446744073701163008UL, 8UL);
+
+ return state->z1 ^ state->z2 ^ state->z3 ^ state->z4 ^ state->z5;
+}
+
+static __rte_always_inline
+struct rte_rand_state *__rte_rand_get_state(void)
+{
+ unsigned int lcore_id;
+
+ lcore_id = rte_lcore_id();
+
+ if (unlikely(lcore_id == LCORE_ID_ANY))
+ lcore_id = rte_get_master_lcore();
+
+ return &rand_states[lcore_id];
+}
+
+uint64_t
+rte_rand(void)
+{
+ struct rte_rand_state *state;
+
+ state = __rte_rand_get_state();
+
+ return __rte_rand_lfsr258(state);
+}
+
+uint64_t
+rte_rand_max(uint64_t upper_bound)
+{
+ struct rte_rand_state *state;
+ uint8_t ones;
+ uint8_t leading_zeros;
+ uint64_t mask = ~((uint64_t)0);
+ uint64_t res;
+
+ if (unlikely(upper_bound < 2))
+ return 0;
+
+ state = __rte_rand_get_state();
+
+ ones = __builtin_popcountll(upper_bound);
+
+ /* Handle power-of-2 upper_bound as a special case, since it
+ * has no bias issues.
+ */
+ if (unlikely(ones == 1))
+ return __rte_rand_lfsr258(state) & (upper_bound - 1);
+
+ /* The approach to avoiding bias is to create a mask that
+ * stretches beyond the request value range, and up to the
+ * next power-of-2. In case the masked generated random value
+ * is equal to or greater than the upper bound, just discard
+ * the value and generate a new one.
+ */
+
+ leading_zeros = __builtin_clzll(upper_bound);
+ mask >>= leading_zeros;
+
+ do {
+ res = __rte_rand_lfsr258(state) & mask;
+ } while (unlikely(res >= upper_bound));
+
+ return res;
+}
+
+static uint64_t
+__rte_random_initial_seed(void)
+{
+#ifdef RTE_LIBEAL_USE_GETENTROPY
+ int ge_rc;
+ uint64_t ge_seed;
+
+ ge_rc = getentropy(&ge_seed, sizeof(ge_seed));
+
+ if (ge_rc == 0)
+ return ge_seed;
+#endif
+#ifdef RTE_MACHINE_CPUFLAG_RDSEED
+ unsigned int rdseed_low;
+ unsigned int rdseed_high;
+
+ /* first fallback: rdseed instruction, if available */
+ if (_rdseed32_step(&rdseed_low) == 1 &&
+ _rdseed32_step(&rdseed_high) == 1)
+ return (uint64_t)rdseed_low | ((uint64_t)rdseed_high << 32);
+#endif
+ /* second fallback: seed using rdtsc */
+ return rte_get_tsc_cycles();
+}
+
+RTE_INIT(rte_rand_init)
+{
+ uint64_t seed;
+
+ seed = __rte_random_initial_seed();
+
+ rte_srand(seed);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/rte_reciprocal.c b/src/spdk/dpdk/lib/librte_eal/common/rte_reciprocal.c
new file mode 100644
index 000000000..42dfa44eb
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/rte_reciprocal.c
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Cavium, Inc
+ * Copyright(c) Hannes Frederic Sowa
+ * All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+
+#include <rte_common.h>
+
+#include "rte_reciprocal.h"
+
+struct rte_reciprocal rte_reciprocal_value(uint32_t d)
+{
+ struct rte_reciprocal R;
+ uint64_t m;
+ int l;
+
+ l = rte_fls_u32(d - 1);
+ m = ((1ULL << 32) * ((1ULL << l) - d));
+ m /= d;
+
+ ++m;
+ R.m = m;
+ R.sh1 = RTE_MIN(l, 1);
+ R.sh2 = RTE_MAX(l - 1, 0);
+
+ return R;
+}
+
+/*
+ * Code taken from Hacker's Delight:
+ * http://www.hackersdelight.org/hdcodetxt/divlu.c.txt
+ * License permits inclusion here per:
+ * http://www.hackersdelight.org/permissions.htm
+ */
+static uint64_t
+divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r)
+{
+ const uint64_t b = (1ULL << 32); /* Number base (16 bits). */
+ uint64_t un1, un0, /* Norm. dividend LSD's. */
+ vn1, vn0, /* Norm. divisor digits. */
+ q1, q0, /* Quotient digits. */
+ un64, un21, un10, /* Dividend digit pairs. */
+ rhat; /* A remainder. */
+ int s; /* Shift amount for norm. */
+
+ /* If overflow, set rem. to an impossible value. */
+ if (u1 >= v) {
+ if (r != NULL)
+ *r = (uint64_t) -1;
+ return (uint64_t) -1;
+ }
+
+ /* Count leading zeros. */
+ s = __builtin_clzll(v);
+ if (s > 0) {
+ v = v << s;
+ un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
+ un10 = u0 << s;
+ } else {
+
+ un64 = u1 | u0;
+ un10 = u0;
+ }
+
+ vn1 = v >> 32;
+ vn0 = v & 0xFFFFFFFF;
+
+ un1 = un10 >> 32;
+ un0 = un10 & 0xFFFFFFFF;
+
+ q1 = un64/vn1;
+ rhat = un64 - q1*vn1;
+again1:
+ if (q1 >= b || q1*vn0 > b*rhat + un1) {
+ q1 = q1 - 1;
+ rhat = rhat + vn1;
+ if (rhat < b)
+ goto again1;
+ }
+
+ un21 = un64*b + un1 - q1*v;
+
+ q0 = un21/vn1;
+ rhat = un21 - q0*vn1;
+again2:
+ if (q0 >= b || q0*vn0 > b*rhat + un0) {
+ q0 = q0 - 1;
+ rhat = rhat + vn1;
+ if (rhat < b)
+ goto again2;
+ }
+
+ if (r != NULL)
+ *r = (un21*b + un0 - q0*v) >> s;
+ return q1*b + q0;
+}
+
+struct rte_reciprocal_u64
+rte_reciprocal_value_u64(uint64_t d)
+{
+ struct rte_reciprocal_u64 R;
+ uint64_t m;
+ uint64_t r;
+ int l;
+
+ l = 63 - __builtin_clzll(d);
+
+ m = divide_128_div_64_to_64((1ULL << l), 0, d, &r) << 1;
+ if (r << 1 < r || r << 1 >= d)
+ m++;
+ m = (1ULL << l) - d ? m + 1 : 1;
+ R.m = m;
+
+ R.sh1 = l > 1 ? 1 : l;
+ R.sh2 = (l > 0) ? l : 0;
+ R.sh2 -= R.sh2 && (m == 1) ? 1 : 0;
+
+ return R;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/rte_service.c b/src/spdk/dpdk/lib/librte_eal/common/rte_service.c
new file mode 100644
index 000000000..6123a2124
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/rte_service.c
@@ -0,0 +1,919 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <string.h>
+
+#include <rte_compat.h>
+#include <rte_service.h>
+#include <rte_service_component.h>
+
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_cycles.h>
+#include <rte_atomic.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_spinlock.h>
+
+#include "eal_private.h"
+
+#define RTE_SERVICE_NUM_MAX 64
+
+#define SERVICE_F_REGISTERED (1 << 0)
+#define SERVICE_F_STATS_ENABLED (1 << 1)
+#define SERVICE_F_START_CHECK (1 << 2)
+
+/* runstates for services and lcores, denoting if they are active or not */
+#define RUNSTATE_STOPPED 0
+#define RUNSTATE_RUNNING 1
+
+/* internal representation of a service */
+struct rte_service_spec_impl {
+ /* public part of the struct */
+ struct rte_service_spec spec;
+
+ /* spin lock that when set indicates a service core is currently
+ * running this service callback. When not set, a core may take the
+ * lock and then run the service callback.
+ */
+ rte_spinlock_t execute_lock;
+
+ /* API set/get-able variables */
+ int8_t app_runstate;
+ int8_t comp_runstate;
+ uint8_t internal_flags;
+
+ /* per service statistics */
+ /* Indicates how many cores the service is mapped to run on.
+ * It does not indicate the number of cores the service is running
+ * on currently.
+ */
+ uint32_t num_mapped_cores;
+ uint64_t calls;
+ uint64_t cycles_spent;
+} __rte_cache_aligned;
+
+/* the internal values of a service core */
+struct core_state {
+ /* map of services IDs are run on this core */
+ uint64_t service_mask;
+ uint8_t runstate; /* running or stopped */
+ uint8_t is_service_core; /* set if core is currently a service core */
+ uint8_t service_active_on_lcore[RTE_SERVICE_NUM_MAX];
+ uint64_t loops;
+ uint64_t calls_per_service[RTE_SERVICE_NUM_MAX];
+} __rte_cache_aligned;
+
+static uint32_t rte_service_count;
+static struct rte_service_spec_impl *rte_services;
+static struct core_state *lcore_states;
+static uint32_t rte_service_library_initialized;
+
+int32_t
+rte_service_init(void)
+{
+ if (rte_service_library_initialized) {
+ RTE_LOG(NOTICE, EAL,
+ "service library init() called, init flag %d\n",
+ rte_service_library_initialized);
+ return -EALREADY;
+ }
+
+ rte_services = rte_calloc("rte_services", RTE_SERVICE_NUM_MAX,
+ sizeof(struct rte_service_spec_impl),
+ RTE_CACHE_LINE_SIZE);
+ if (!rte_services) {
+ RTE_LOG(ERR, EAL, "error allocating rte services array\n");
+ goto fail_mem;
+ }
+
+ lcore_states = rte_calloc("rte_service_core_states", RTE_MAX_LCORE,
+ sizeof(struct core_state), RTE_CACHE_LINE_SIZE);
+ if (!lcore_states) {
+ RTE_LOG(ERR, EAL, "error allocating core states array\n");
+ goto fail_mem;
+ }
+
+ int i;
+ int count = 0;
+ struct rte_config *cfg = rte_eal_get_configuration();
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if (lcore_config[i].core_role == ROLE_SERVICE) {
+ if ((unsigned int)i == cfg->master_lcore)
+ continue;
+ rte_service_lcore_add(i);
+ count++;
+ }
+ }
+
+ rte_service_library_initialized = 1;
+ return 0;
+fail_mem:
+ rte_free(rte_services);
+ rte_free(lcore_states);
+ return -ENOMEM;
+}
+
+void
+rte_service_finalize(void)
+{
+ if (!rte_service_library_initialized)
+ return;
+
+ rte_service_lcore_reset_all();
+ rte_eal_mp_wait_lcore();
+
+ rte_free(rte_services);
+ rte_free(lcore_states);
+
+ rte_service_library_initialized = 0;
+}
+
+/* returns 1 if service is registered and has not been unregistered
+ * Returns 0 if service never registered, or has been unregistered
+ */
+static inline int
+service_valid(uint32_t id)
+{
+ return !!(rte_services[id].internal_flags & SERVICE_F_REGISTERED);
+}
+
+static struct rte_service_spec_impl *
+service_get(uint32_t id)
+{
+ return &rte_services[id];
+}
+
+/* validate ID and retrieve service pointer, or return error value */
+#define SERVICE_VALID_GET_OR_ERR_RET(id, service, retval) do { \
+ if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id)) \
+ return retval; \
+ service = &rte_services[id]; \
+} while (0)
+
+/* returns 1 if statistics should be collected for service
+ * Returns 0 if statistics should not be collected for service
+ */
+static inline int
+service_stats_enabled(struct rte_service_spec_impl *impl)
+{
+ return !!(impl->internal_flags & SERVICE_F_STATS_ENABLED);
+}
+
+static inline int
+service_mt_safe(struct rte_service_spec_impl *s)
+{
+ return !!(s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE);
+}
+
+int32_t
+rte_service_set_stats_enable(uint32_t id, int32_t enabled)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, 0);
+
+ if (enabled)
+ s->internal_flags |= SERVICE_F_STATS_ENABLED;
+ else
+ s->internal_flags &= ~(SERVICE_F_STATS_ENABLED);
+
+ return 0;
+}
+
+int32_t
+rte_service_set_runstate_mapped_check(uint32_t id, int32_t enabled)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, 0);
+
+ if (enabled)
+ s->internal_flags |= SERVICE_F_START_CHECK;
+ else
+ s->internal_flags &= ~(SERVICE_F_START_CHECK);
+
+ return 0;
+}
+
+uint32_t
+rte_service_get_count(void)
+{
+ return rte_service_count;
+}
+
+int32_t
+rte_service_get_by_name(const char *name, uint32_t *service_id)
+{
+ if (!service_id)
+ return -EINVAL;
+
+ int i;
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ if (service_valid(i) &&
+ strcmp(name, rte_services[i].spec.name) == 0) {
+ *service_id = i;
+ return 0;
+ }
+ }
+
+ return -ENODEV;
+}
+
+const char *
+rte_service_get_name(uint32_t id)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, 0);
+ return s->spec.name;
+}
+
+int32_t
+rte_service_probe_capability(uint32_t id, uint32_t capability)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+ return !!(s->spec.capabilities & capability);
+}
+
+int32_t
+rte_service_component_register(const struct rte_service_spec *spec,
+ uint32_t *id_ptr)
+{
+ uint32_t i;
+ int32_t free_slot = -1;
+
+ if (spec->callback == NULL || strlen(spec->name) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ if (!service_valid(i)) {
+ free_slot = i;
+ break;
+ }
+ }
+
+ if ((free_slot < 0) || (i == RTE_SERVICE_NUM_MAX))
+ return -ENOSPC;
+
+ struct rte_service_spec_impl *s = &rte_services[free_slot];
+ s->spec = *spec;
+ s->internal_flags |= SERVICE_F_REGISTERED | SERVICE_F_START_CHECK;
+
+ rte_service_count++;
+
+ if (id_ptr)
+ *id_ptr = free_slot;
+
+ return 0;
+}
+
+int32_t
+rte_service_component_unregister(uint32_t id)
+{
+ uint32_t i;
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+ rte_service_count--;
+
+ s->internal_flags &= ~(SERVICE_F_REGISTERED);
+
+ /* clear the run-bit in all cores */
+ for (i = 0; i < RTE_MAX_LCORE; i++)
+ lcore_states[i].service_mask &= ~(UINT64_C(1) << id);
+
+ memset(&rte_services[id], 0, sizeof(struct rte_service_spec_impl));
+
+ return 0;
+}
+
+int32_t
+rte_service_component_runstate_set(uint32_t id, uint32_t runstate)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+ /* comp_runstate act as the guard variable. Use store-release
+ * memory order. This synchronizes with load-acquire in
+ * service_run and service_runstate_get function.
+ */
+ if (runstate)
+ __atomic_store_n(&s->comp_runstate, RUNSTATE_RUNNING,
+ __ATOMIC_RELEASE);
+ else
+ __atomic_store_n(&s->comp_runstate, RUNSTATE_STOPPED,
+ __ATOMIC_RELEASE);
+
+ return 0;
+}
+
+int32_t
+rte_service_runstate_set(uint32_t id, uint32_t runstate)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+ /* app_runstate act as the guard variable. Use store-release
+ * memory order. This synchronizes with load-acquire in
+ * service_run runstate_get function.
+ */
+ if (runstate)
+ __atomic_store_n(&s->app_runstate, RUNSTATE_RUNNING,
+ __ATOMIC_RELEASE);
+ else
+ __atomic_store_n(&s->app_runstate, RUNSTATE_STOPPED,
+ __ATOMIC_RELEASE);
+
+ return 0;
+}
+
+int32_t
+rte_service_runstate_get(uint32_t id)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+ /* comp_runstate and app_runstate act as the guard variables.
+ * Use load-acquire memory order. This synchronizes with
+ * store-release in service state set functions.
+ */
+ if (__atomic_load_n(&s->comp_runstate, __ATOMIC_ACQUIRE) ==
+ RUNSTATE_RUNNING &&
+ __atomic_load_n(&s->app_runstate, __ATOMIC_ACQUIRE) ==
+ RUNSTATE_RUNNING) {
+ int check_disabled = !(s->internal_flags &
+ SERVICE_F_START_CHECK);
+ int lcore_mapped = (__atomic_load_n(&s->num_mapped_cores,
+ __ATOMIC_RELAXED) > 0);
+
+ return (check_disabled | lcore_mapped);
+ } else
+ return 0;
+
+}
+
+static inline void
+service_runner_do_callback(struct rte_service_spec_impl *s,
+ struct core_state *cs, uint32_t service_idx)
+{
+ void *userdata = s->spec.callback_userdata;
+
+ if (service_stats_enabled(s)) {
+ uint64_t start = rte_rdtsc();
+ s->spec.callback(userdata);
+ uint64_t end = rte_rdtsc();
+ s->cycles_spent += end - start;
+ cs->calls_per_service[service_idx]++;
+ s->calls++;
+ } else
+ s->spec.callback(userdata);
+}
+
+
+/* Expects the service 's' is valid. */
+static int32_t
+service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
+ struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe)
+{
+ if (!s)
+ return -EINVAL;
+
+ /* comp_runstate and app_runstate act as the guard variables.
+ * Use load-acquire memory order. This synchronizes with
+ * store-release in service state set functions.
+ */
+ if (__atomic_load_n(&s->comp_runstate, __ATOMIC_ACQUIRE) !=
+ RUNSTATE_RUNNING ||
+ __atomic_load_n(&s->app_runstate, __ATOMIC_ACQUIRE) !=
+ RUNSTATE_RUNNING ||
+ !(service_mask & (UINT64_C(1) << i))) {
+ cs->service_active_on_lcore[i] = 0;
+ return -ENOEXEC;
+ }
+
+ cs->service_active_on_lcore[i] = 1;
+
+ if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
+ if (!rte_spinlock_trylock(&s->execute_lock))
+ return -EBUSY;
+
+ service_runner_do_callback(s, cs, i);
+ rte_spinlock_unlock(&s->execute_lock);
+ } else
+ service_runner_do_callback(s, cs, i);
+
+ return 0;
+}
+
+int32_t
+rte_service_may_be_active(uint32_t id)
+{
+ uint32_t ids[RTE_MAX_LCORE] = {0};
+ int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE);
+ int i;
+
+ if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id))
+ return -EINVAL;
+
+ for (i = 0; i < lcore_count; i++) {
+ if (lcore_states[i].service_active_on_lcore[id])
+ return 1;
+ }
+
+ return 0;
+}
+
+int32_t
+rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
+{
+ struct core_state *cs = &lcore_states[rte_lcore_id()];
+ struct rte_service_spec_impl *s;
+
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+ /* Increment num_mapped_cores to reflect that this core is
+ * now mapped capable of running the service.
+ */
+ __atomic_add_fetch(&s->num_mapped_cores, 1, __ATOMIC_RELAXED);
+
+ int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
+
+ __atomic_sub_fetch(&s->num_mapped_cores, 1, __ATOMIC_RELAXED);
+
+ return ret;
+}
+
+static int32_t
+service_runner_func(void *arg)
+{
+ RTE_SET_USED(arg);
+ uint32_t i;
+ const int lcore = rte_lcore_id();
+ struct core_state *cs = &lcore_states[lcore];
+
+ /* runstate act as the guard variable. Use load-acquire
+ * memory order here to synchronize with store-release
+ * in runstate update functions.
+ */
+ while (__atomic_load_n(&cs->runstate, __ATOMIC_ACQUIRE) ==
+ RUNSTATE_RUNNING) {
+ const uint64_t service_mask = cs->service_mask;
+
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ if (!service_valid(i))
+ continue;
+ /* return value ignored as no change to code flow */
+ service_run(i, cs, service_mask, service_get(i), 1);
+ }
+
+ cs->loops++;
+ }
+
+ lcore_config[lcore].state = WAIT;
+
+ return 0;
+}
+
+int32_t
+rte_service_lcore_count(void)
+{
+ int32_t count = 0;
+ uint32_t i;
+ for (i = 0; i < RTE_MAX_LCORE; i++)
+ count += lcore_states[i].is_service_core;
+ return count;
+}
+
+int32_t
+rte_service_lcore_list(uint32_t array[], uint32_t n)
+{
+ uint32_t count = rte_service_lcore_count();
+ if (count > n)
+ return -ENOMEM;
+
+ if (!array)
+ return -EINVAL;
+
+ uint32_t i;
+ uint32_t idx = 0;
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ struct core_state *cs = &lcore_states[i];
+ if (cs->is_service_core) {
+ array[idx] = i;
+ idx++;
+ }
+ }
+
+ return count;
+}
+
+int32_t
+rte_service_lcore_count_services(uint32_t lcore)
+{
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ struct core_state *cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -ENOTSUP;
+
+ return __builtin_popcountll(cs->service_mask);
+}
+
+int32_t
+rte_service_start_with_defaults(void)
+{
+ /* create a default mapping from cores to services, then start the
+ * services to make them transparent to unaware applications.
+ */
+ uint32_t i;
+ int ret;
+ uint32_t count = rte_service_get_count();
+
+ int32_t lcore_iter = 0;
+ uint32_t ids[RTE_MAX_LCORE] = {0};
+ int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE);
+
+ if (lcore_count == 0)
+ return -ENOTSUP;
+
+ for (i = 0; (int)i < lcore_count; i++)
+ rte_service_lcore_start(ids[i]);
+
+ for (i = 0; i < count; i++) {
+ /* do 1:1 core mapping here, with each service getting
+ * assigned a single core by default. Adding multiple services
+ * should multiplex to a single core, or 1:1 if there are the
+ * same amount of services as service-cores
+ */
+ ret = rte_service_map_lcore_set(i, ids[lcore_iter], 1);
+ if (ret)
+ return -ENODEV;
+
+ lcore_iter++;
+ if (lcore_iter >= lcore_count)
+ lcore_iter = 0;
+
+ ret = rte_service_runstate_set(i, 1);
+ if (ret)
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+static int32_t
+service_update(uint32_t sid, uint32_t lcore, uint32_t *set, uint32_t *enabled)
+{
+ /* validate ID, or return error value */
+ if (sid >= RTE_SERVICE_NUM_MAX || !service_valid(sid) ||
+ lcore >= RTE_MAX_LCORE || !lcore_states[lcore].is_service_core)
+ return -EINVAL;
+
+ uint64_t sid_mask = UINT64_C(1) << sid;
+ if (set) {
+ uint64_t lcore_mapped = lcore_states[lcore].service_mask &
+ sid_mask;
+
+ if (*set && !lcore_mapped) {
+ lcore_states[lcore].service_mask |= sid_mask;
+ __atomic_add_fetch(&rte_services[sid].num_mapped_cores,
+ 1, __ATOMIC_RELAXED);
+ }
+ if (!*set && lcore_mapped) {
+ lcore_states[lcore].service_mask &= ~(sid_mask);
+ __atomic_sub_fetch(&rte_services[sid].num_mapped_cores,
+ 1, __ATOMIC_RELAXED);
+ }
+ }
+
+ if (enabled)
+ *enabled = !!(lcore_states[lcore].service_mask & (sid_mask));
+
+ return 0;
+}
+
+int32_t
+rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled)
+{
+ uint32_t on = enabled > 0;
+ return service_update(id, lcore, &on, 0);
+}
+
+int32_t
+rte_service_map_lcore_get(uint32_t id, uint32_t lcore)
+{
+ uint32_t enabled;
+ int ret = service_update(id, lcore, 0, &enabled);
+ if (ret == 0)
+ return enabled;
+ return ret;
+}
+
+static void
+set_lcore_state(uint32_t lcore, int32_t state)
+{
+ /* mark core state in hugepage backed config */
+ struct rte_config *cfg = rte_eal_get_configuration();
+ cfg->lcore_role[lcore] = state;
+
+ /* mark state in process local lcore_config */
+ lcore_config[lcore].core_role = state;
+
+ /* update per-lcore optimized state tracking */
+ lcore_states[lcore].is_service_core = (state == ROLE_SERVICE);
+}
+
+int32_t
+rte_service_lcore_reset_all(void)
+{
+ /* loop over cores, reset all to mask 0 */
+ uint32_t i;
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if (lcore_states[i].is_service_core) {
+ lcore_states[i].service_mask = 0;
+ set_lcore_state(i, ROLE_RTE);
+ /* runstate act as guard variable Use
+ * store-release memory order here to synchronize
+ * with load-acquire in runstate read functions.
+ */
+ __atomic_store_n(&lcore_states[i].runstate,
+ RUNSTATE_STOPPED, __ATOMIC_RELEASE);
+ }
+ }
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++)
+ __atomic_store_n(&rte_services[i].num_mapped_cores, 0,
+ __ATOMIC_RELAXED);
+
+ return 0;
+}
+
+int32_t
+rte_service_lcore_add(uint32_t lcore)
+{
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+ if (lcore_states[lcore].is_service_core)
+ return -EALREADY;
+
+ set_lcore_state(lcore, ROLE_SERVICE);
+
+ /* ensure that after adding a core the mask and state are defaults */
+ lcore_states[lcore].service_mask = 0;
+ /* Use store-release memory order here to synchronize with
+ * load-acquire in runstate read functions.
+ */
+ __atomic_store_n(&lcore_states[lcore].runstate, RUNSTATE_STOPPED,
+ __ATOMIC_RELEASE);
+
+ return rte_eal_wait_lcore(lcore);
+}
+
+int32_t
+rte_service_lcore_del(uint32_t lcore)
+{
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ struct core_state *cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -EINVAL;
+
+ /* runstate act as the guard variable. Use load-acquire
+ * memory order here to synchronize with store-release
+ * in runstate update functions.
+ */
+ if (__atomic_load_n(&cs->runstate, __ATOMIC_ACQUIRE) !=
+ RUNSTATE_STOPPED)
+ return -EBUSY;
+
+ set_lcore_state(lcore, ROLE_RTE);
+
+ rte_smp_wmb();
+ return 0;
+}
+
+int32_t
+rte_service_lcore_start(uint32_t lcore)
+{
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ struct core_state *cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -EINVAL;
+
+ /* runstate act as the guard variable. Use load-acquire
+ * memory order here to synchronize with store-release
+ * in runstate update functions.
+ */
+ if (__atomic_load_n(&cs->runstate, __ATOMIC_ACQUIRE) ==
+ RUNSTATE_RUNNING)
+ return -EALREADY;
+
+ /* set core to run state first, and then launch otherwise it will
+ * return immediately as runstate keeps it in the service poll loop
+ */
+ /* Use load-acquire memory order here to synchronize with
+ * store-release in runstate update functions.
+ */
+ __atomic_store_n(&cs->runstate, RUNSTATE_RUNNING, __ATOMIC_RELEASE);
+
+ int ret = rte_eal_remote_launch(service_runner_func, 0, lcore);
+ /* returns -EBUSY if the core is already launched, 0 on success */
+ return ret;
+}
+
+int32_t
+rte_service_lcore_stop(uint32_t lcore)
+{
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ /* runstate act as the guard variable. Use load-acquire
+ * memory order here to synchronize with store-release
+ * in runstate update functions.
+ */
+ if (__atomic_load_n(&lcore_states[lcore].runstate, __ATOMIC_ACQUIRE) ==
+ RUNSTATE_STOPPED)
+ return -EALREADY;
+
+ uint32_t i;
+ uint64_t service_mask = lcore_states[lcore].service_mask;
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ int32_t enabled = service_mask & (UINT64_C(1) << i);
+ int32_t service_running = rte_service_runstate_get(i);
+ int32_t only_core = (1 ==
+ __atomic_load_n(&rte_services[i].num_mapped_cores,
+ __ATOMIC_RELAXED));
+
+ /* if the core is mapped, and the service is running, and this
+ * is the only core that is mapped, the service would cease to
+ * run if this core stopped, so fail instead.
+ */
+ if (enabled && service_running && only_core)
+ return -EBUSY;
+ }
+
+ /* Use store-release memory order here to synchronize with
+ * load-acquire in runstate read functions.
+ */
+ __atomic_store_n(&lcore_states[lcore].runstate, RUNSTATE_STOPPED,
+ __ATOMIC_RELEASE);
+
+ return 0;
+}
+
+int32_t
+rte_service_attr_get(uint32_t id, uint32_t attr_id, uint64_t *attr_value)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+ if (!attr_value)
+ return -EINVAL;
+
+ switch (attr_id) {
+ case RTE_SERVICE_ATTR_CYCLES:
+ *attr_value = s->cycles_spent;
+ return 0;
+ case RTE_SERVICE_ATTR_CALL_COUNT:
+ *attr_value = s->calls;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+int32_t
+rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
+ uint64_t *attr_value)
+{
+ struct core_state *cs;
+
+ if (lcore >= RTE_MAX_LCORE || !attr_value)
+ return -EINVAL;
+
+ cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -ENOTSUP;
+
+ switch (attr_id) {
+ case RTE_SERVICE_LCORE_ATTR_LOOPS:
+ *attr_value = cs->loops;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static void
+service_dump_one(FILE *f, struct rte_service_spec_impl *s, uint32_t reset)
+{
+ /* avoid divide by zero */
+ int calls = 1;
+ if (s->calls != 0)
+ calls = s->calls;
+
+ if (reset) {
+ s->cycles_spent = 0;
+ s->calls = 0;
+ return;
+ }
+
+ if (f == NULL)
+ return;
+
+ fprintf(f, " %s: stats %d\tcalls %"PRIu64"\tcycles %"
+ PRIu64"\tavg: %"PRIu64"\n",
+ s->spec.name, service_stats_enabled(s), s->calls,
+ s->cycles_spent, s->cycles_spent / calls);
+}
+
+int32_t
+rte_service_attr_reset_all(uint32_t id)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+ int reset = 1;
+ service_dump_one(NULL, s, reset);
+ return 0;
+}
+
+int32_t
+rte_service_lcore_attr_reset_all(uint32_t lcore)
+{
+ struct core_state *cs;
+
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -ENOTSUP;
+
+ cs->loops = 0;
+
+ return 0;
+}
+
+static void
+service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset)
+{
+ uint32_t i;
+ struct core_state *cs = &lcore_states[lcore];
+
+ fprintf(f, "%02d\t", lcore);
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ if (!service_valid(i))
+ continue;
+ fprintf(f, "%"PRIu64"\t", cs->calls_per_service[i]);
+ if (reset)
+ cs->calls_per_service[i] = 0;
+ }
+ fprintf(f, "\n");
+}
+
+int32_t
+rte_service_dump(FILE *f, uint32_t id)
+{
+ uint32_t i;
+ int print_one = (id != UINT32_MAX);
+
+ /* print only the specified service */
+ if (print_one) {
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+ fprintf(f, "Service %s Summary\n", s->spec.name);
+ uint32_t reset = 0;
+ service_dump_one(f, s, reset);
+ return 0;
+ }
+
+ /* print all services, as UINT32_MAX was passed as id */
+ fprintf(f, "Services Summary\n");
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ if (!service_valid(i))
+ continue;
+ uint32_t reset = 0;
+ service_dump_one(f, &rte_services[i], reset);
+ }
+
+ fprintf(f, "Service Cores Summary\n");
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if (lcore_config[i].core_role != ROLE_SERVICE)
+ continue;
+
+ uint32_t reset = 0;
+ service_dump_calls_per_lcore(f, i, reset);
+ }
+
+ return 0;
+}