Adding upstream version 16.2.11+ds.upstream/16.2.11+ds upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 18:45:59 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 18:45:59 +0000
commit: 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree: 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/spdk/dpdk/lib/librte_eal/common
parent: Initial commit. (diff)
download: ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz
ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip
50 files changed, 18271 insertions, 0 deletions
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_bus.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_bus.c
new file mode 100644
index 000000000..baa5b532a
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_bus.c
@@ -0,0 +1,279 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2016 NXP
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_bus.h>
+#include <rte_debug.h>
+#include <rte_string_fns.h>
+#include <rte_errno.h>
+
+#include "eal_private.h"
+
+static struct rte_bus_list rte_bus_list =
+	TAILQ_HEAD_INITIALIZER(rte_bus_list);
+
+void
+rte_bus_register(struct rte_bus *bus)
+{
+	RTE_VERIFY(bus);
+	RTE_VERIFY(bus->name && strlen(bus->name));
+	/* A bus should mandatorily have the scan implemented */
+	RTE_VERIFY(bus->scan);
+	RTE_VERIFY(bus->probe);
+	RTE_VERIFY(bus->find_device);
+	/* Buses supporting driver plug also require unplug. */
+	RTE_VERIFY(!bus->plug || bus->unplug);
+
+	TAILQ_INSERT_TAIL(&rte_bus_list, bus, next);
+	RTE_LOG(DEBUG, EAL, "Registered [%s] bus.\n", bus->name);
+}
+
+void
+rte_bus_unregister(struct rte_bus *bus)
+{
+	TAILQ_REMOVE(&rte_bus_list, bus, next);
+	RTE_LOG(DEBUG, EAL, "Unregistered [%s] bus.\n", bus->name);
+}
+
+/* Scan all the buses for registered devices */
+int
+rte_bus_scan(void)
+{
+	int ret;
+	struct rte_bus *bus = NULL;
+
+	TAILQ_FOREACH(bus, &rte_bus_list, next) {
+		ret = bus->scan();
+		if (ret)
+			RTE_LOG(ERR, EAL, "Scan for (%s) bus failed.\n",
+				bus->name);
+	}
+
+	return 0;
+}
+
+/* Probe all devices of all buses */
+int
+rte_bus_probe(void)
+{
+	int ret;
+	struct rte_bus *bus, *vbus = NULL;
+
+	TAILQ_FOREACH(bus, &rte_bus_list, next) {
+		if (!strcmp(bus->name, "vdev")) {
+			vbus = bus;
+			continue;
+		}
+
+		ret = bus->probe();
+		if (ret)
+			RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n",
+				bus->name);
+	}
+
+	if (vbus) {
+		ret = vbus->probe();
+		if (ret)
+			RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n",
+				vbus->name);
+	}
+
+	return 0;
+}
+
+/* Dump information of a single bus */
+static int
+bus_dump_one(FILE *f, struct rte_bus *bus)
+{
+	int ret;
+
+	/* For now, dump only the bus name */
+	ret = fprintf(f, " %s\n", bus->name);
+
+	/* Error in case of inability in writing to stream */
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+void
+rte_bus_dump(FILE *f)
+{
+	int ret;
+	struct rte_bus *bus;
+
+	TAILQ_FOREACH(bus, &rte_bus_list, next) {
+		ret = bus_dump_one(f, bus);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "Unable to write to stream (%d)\n",
+				ret);
+			break;
+		}
+	}
+}
+
+struct rte_bus *
+rte_bus_find(const struct rte_bus *start, rte_bus_cmp_t cmp,
+	     const void *data)
+{
+	struct rte_bus *bus;
+
+	if (start != NULL)
+		bus = TAILQ_NEXT(start, next);
+	else
+		bus = TAILQ_FIRST(&rte_bus_list);
+	while (bus != NULL) {
+		if (cmp(bus, data) == 0)
+			break;
+		bus = TAILQ_NEXT(bus, next);
+	}
+	return bus;
+}
+
+static int
+cmp_rte_device(const struct rte_device *dev1, const void *_dev2)
+{
+	const struct rte_device *dev2 = _dev2;
+
+	return dev1 != dev2;
+}
+
+static int
+bus_find_device(const struct rte_bus *bus, const void *_dev)
+{
+	struct rte_device *dev;
+
+	dev = bus->find_device(NULL, cmp_rte_device, _dev);
+	return dev == NULL;
+}
+
+struct rte_bus *
+rte_bus_find_by_device(const struct rte_device *dev)
+{
+	return rte_bus_find(NULL, bus_find_device, (const void *)dev);
+}
+
+static int
+cmp_bus_name(const struct rte_bus *bus, const void *_name)
+{
+	const char *name = _name;
+
+	return strcmp(bus->name, name);
+}
+
+struct rte_bus *
+rte_bus_find_by_name(const char *busname)
+{
+	return rte_bus_find(NULL, cmp_bus_name, (const void *)busname);
+}
+
+static int
+bus_can_parse(const struct rte_bus *bus, const void *_name)
+{
+	const char *name = _name;
+
+	return !(bus->parse && bus->parse(name, NULL) == 0);
+}
+
+struct rte_bus *
+rte_bus_find_by_device_name(const char *str)
+{
+	char name[RTE_DEV_NAME_MAX_LEN];
+	char *c;
+
+	strlcpy(name, str, sizeof(name));
+	c = strchr(name, ',');
+	if (c != NULL)
+		c[0] = '\0';
+	return rte_bus_find(NULL, bus_can_parse, name);
+}
+
+
+/*
+ * Get iommu class of devices on the bus.
+ */
+enum rte_iova_mode
+rte_bus_get_iommu_class(void)
+{
+	enum rte_iova_mode mode = RTE_IOVA_DC;
+	bool buses_want_va = false;
+	bool buses_want_pa = false;
+	struct rte_bus *bus;
+
+	TAILQ_FOREACH(bus, &rte_bus_list, next) {
+		enum rte_iova_mode bus_iova_mode;
+
+		if (bus->get_iommu_class == NULL)
+			continue;
+
+		bus_iova_mode = bus->get_iommu_class();
+		RTE_LOG(DEBUG, EAL, "Bus %s wants IOVA as '%s'\n",
+			bus->name,
+			bus_iova_mode == RTE_IOVA_DC ? "DC" :
+			(bus_iova_mode == RTE_IOVA_PA ? "PA" : "VA"));
+		if (bus_iova_mode == RTE_IOVA_PA)
+			buses_want_pa = true;
+		else if (bus_iova_mode == RTE_IOVA_VA)
+			buses_want_va = true;
+	}
+	if (buses_want_va && !buses_want_pa) {
+		mode = RTE_IOVA_VA;
+	} else if (buses_want_pa && !buses_want_va) {
+		mode = RTE_IOVA_PA;
+	} else {
+		mode = RTE_IOVA_DC;
+		if (buses_want_va) {
+			RTE_LOG(WARNING, EAL, "Some buses want 'VA' but forcing 'DC' because other buses want 'PA'.\n");
+			RTE_LOG(WARNING, EAL, "Depending on the final decision by the EAL, not all buses may be able to initialize.\n");
+		}
+	}
+
+	return mode;
+}
+
+static int
+bus_handle_sigbus(const struct rte_bus *bus,
+			const void *failure_addr)
+{
+	int ret;
+
+	if (!bus->sigbus_handler)
+		return -1;
+
+	ret = bus->sigbus_handler(failure_addr);
+
+	/* find bus but handle failed, keep the errno be set. */
+	if (ret < 0 && rte_errno == 0)
+		rte_errno = ENOTSUP;
+
+	return ret > 0;
+}
+
+int
+rte_bus_sigbus_handler(const void *failure_addr)
+{
+	struct rte_bus *bus;
+
+	int ret = 0;
+	int old_errno = rte_errno;
+
+	rte_errno = 0;
+
+	bus = rte_bus_find(NULL, bus_handle_sigbus, failure_addr);
+	/* can not find bus. */
+	if (!bus)
+		return 1;
+	/* find bus but handle failed, pass on the new errno. */
+	else if (rte_errno != 0)
+		return -1;
+
+	/* restore the old errno. */
+	rte_errno = old_errno;
+
+	return ret;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_class.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_class.c
new file mode 100644
index 000000000..0187076af
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_class.c
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Gaëtan Rivet
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_class.h>
+#include <rte_debug.h>
+
+static struct rte_class_list rte_class_list =
+	TAILQ_HEAD_INITIALIZER(rte_class_list);
+
+void
+rte_class_register(struct rte_class *class)
+{
+	RTE_VERIFY(class);
+	RTE_VERIFY(class->name && strlen(class->name));
+
+	TAILQ_INSERT_TAIL(&rte_class_list, class, next);
+	RTE_LOG(DEBUG, EAL, "Registered [%s] device class.\n", class->name);
+}
+
+void
+rte_class_unregister(struct rte_class *class)
+{
+	TAILQ_REMOVE(&rte_class_list, class, next);
+	RTE_LOG(DEBUG, EAL, "Unregistered [%s] device class.\n", class->name);
+}
+
+struct rte_class *
+rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp,
+	       const void *data)
+{
+	struct rte_class *cls;
+
+	if (start != NULL)
+		cls = TAILQ_NEXT(start, next);
+	else
+		cls = TAILQ_FIRST(&rte_class_list);
+	while (cls != NULL) {
+		if (cmp(cls, data) == 0)
+			break;
+		cls = TAILQ_NEXT(cls, next);
+	}
+	return cls;
+}
+
+static int
+cmp_class_name(const struct rte_class *class, const void *_name)
+{
+	const char *name = _name;
+
+	return strcmp(class->name, name);
+}
+
+struct rte_class *
+rte_class_find_by_name(const char *name)
+{
+	return rte_class_find(NULL, cmp_class_name, (const void *)name);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_cpuflags.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_cpuflags.c
new file mode 100644
index 000000000..dc5f75d05
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_cpuflags.c
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_cpuflags.h>
+
+int
+rte_cpu_is_supported(void)
+{
+	/* This is generated at compile-time by the build system */
+	static const enum rte_cpu_flag_t compile_time_flags[] = {
+			RTE_COMPILE_TIME_CPUFLAGS
+	};
+	unsigned count = RTE_DIM(compile_time_flags), i;
+	int ret;
+
+	for (i = 0; i < count; i++) {
+		ret = rte_cpu_get_flag_enabled(compile_time_flags[i]);
+
+		if (ret < 0) {
+			fprintf(stderr,
+				"ERROR: CPU feature flag lookup failed with error %d\n",
+				ret);
+			return 0;
+		}
+		if (!ret) {
+			fprintf(stderr,
+			        "ERROR: This system does not support \"%s\".\n"
+			        "Please check that RTE_MACHINE is set correctly.\n",
+			        rte_cpu_get_flag_name(compile_time_flags[i]));
+			return 0;
+		}
+	}
+
+	return 1;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_dev.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_dev.c
new file mode 100644
index 000000000..9e4f09d83
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_dev.c
@@ -0,0 +1,793 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation.
+ * Copyright(c) 2014 6WIND S.A.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/queue.h>
+
+#include <rte_compat.h>
+#include <rte_bus.h>
+#include <rte_class.h>
+#include <rte_dev.h>
+#include <rte_devargs.h>
+#include <rte_debug.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
+#include <rte_spinlock.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+
+#include "eal_private.h"
+#include "hotplug_mp.h"
+
+/**
+ * The device event callback description.
+ *
+ * It contains callback address to be registered by user application,
+ * the pointer to the parameters for callback, and the device name.
+ */
+struct dev_event_callback {
+	TAILQ_ENTRY(dev_event_callback) next; /**< Callbacks list */
+	rte_dev_event_cb_fn cb_fn;            /**< Callback address */
+	void *cb_arg;                         /**< Callback parameter */
+	char *dev_name;	 /**< Callback device name, NULL is for all device */
+	uint32_t active;                      /**< Callback is executing */
+};
+
+/** @internal Structure to keep track of registered callbacks */
+TAILQ_HEAD(dev_event_cb_list, dev_event_callback);
+
+/* The device event callback list for all registered callbacks. */
+static struct dev_event_cb_list dev_event_cbs;
+
+/* spinlock for device callbacks */
+static rte_spinlock_t dev_event_lock = RTE_SPINLOCK_INITIALIZER;
+
+struct dev_next_ctx {
+	struct rte_dev_iterator *it;
+	const char *bus_str;
+	const char *cls_str;
+};
+
+#define CTX(it, bus_str, cls_str) \
+	(&(const struct dev_next_ctx){ \
+		.it = it, \
+		.bus_str = bus_str, \
+		.cls_str = cls_str, \
+	})
+
+#define ITCTX(ptr) \
+	(((struct dev_next_ctx *)(intptr_t)ptr)->it)
+
+#define BUSCTX(ptr) \
+	(((struct dev_next_ctx *)(intptr_t)ptr)->bus_str)
+
+#define CLSCTX(ptr) \
+	(((struct dev_next_ctx *)(intptr_t)ptr)->cls_str)
+
+static int cmp_dev_name(const struct rte_device *dev, const void *_name)
+{
+	const char *name = _name;
+
+	return strcmp(dev->name, name);
+}
+
+int
+rte_dev_is_probed(const struct rte_device *dev)
+{
+	/* The field driver should be set only when the probe is successful. */
+	return dev->driver != NULL;
+}
+
+/* helper function to build devargs, caller should free the memory */
+static int
+build_devargs(const char *busname, const char *devname,
+	      const char *drvargs, char **devargs)
+{
+	int length;
+
+	length = snprintf(NULL, 0, "%s:%s,%s", busname, devname, drvargs);
+	if (length < 0)
+		return -EINVAL;
+
+	*devargs = malloc(length + 1);
+	if (*devargs == NULL)
+		return -ENOMEM;
+
+	length = snprintf(*devargs, length + 1, "%s:%s,%s",
+			busname, devname, drvargs);
+	if (length < 0) {
+		free(*devargs);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int
+rte_eal_hotplug_add(const char *busname, const char *devname,
+		    const char *drvargs)
+{
+
+	char *devargs;
+	int ret;
+
+	ret = build_devargs(busname, devname, drvargs, &devargs);
+	if (ret != 0)
+		return ret;
+
+	ret = rte_dev_probe(devargs);
+	free(devargs);
+
+	return ret;
+}
+
+/* probe device at local process. */
+int
+local_dev_probe(const char *devargs, struct rte_device **new_dev)
+{
+	struct rte_device *dev;
+	struct rte_devargs *da;
+	int ret;
+
+	*new_dev = NULL;
+	da = calloc(1, sizeof(*da));
+	if (da == NULL)
+		return -ENOMEM;
+
+	ret = rte_devargs_parse(da, devargs);
+	if (ret)
+		goto err_devarg;
+
+	if (da->bus->plug == NULL) {
+		RTE_LOG(ERR, EAL, "Function plug not supported by bus (%s)\n",
+			da->bus->name);
+		ret = -ENOTSUP;
+		goto err_devarg;
+	}
+
+	ret = rte_devargs_insert(&da);
+	if (ret)
+		goto err_devarg;
+
+	/* the rte_devargs will be referenced in the matching rte_device */
+	ret = da->bus->scan();
+	if (ret)
+		goto err_devarg;
+
+	dev = da->bus->find_device(NULL, cmp_dev_name, da->name);
+	if (dev == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot find device (%s)\n",
+			da->name);
+		ret = -ENODEV;
+		goto err_devarg;
+	}
+	/* Since there is a matching device, it is now its responsibility
+	 * to manage the devargs we've just inserted. From this point
+	 * those devargs shouldn't be removed manually anymore.
+	 */
+
+	ret = dev->bus->plug(dev);
+	if (ret > 0)
+		ret = -ENOTSUP;
+
+	if (ret && !rte_dev_is_probed(dev)) { /* if hasn't ever succeeded */
+		RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
+			dev->name);
+		return ret;
+	}
+
+	*new_dev = dev;
+	return ret;
+
+err_devarg:
+	if (rte_devargs_remove(da) != 0) {
+		free(da->args);
+		free(da);
+	}
+	return ret;
+}
+
+int
+rte_dev_probe(const char *devargs)
+{
+	struct eal_dev_mp_req req;
+	struct rte_device *dev;
+	int ret;
+
+	memset(&req, 0, sizeof(req));
+	req.t = EAL_DEV_REQ_TYPE_ATTACH;
+	strlcpy(req.devargs, devargs, EAL_DEV_MP_DEV_ARGS_MAX_LEN);
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		/**
+		 * If in secondary process, just send IPC request to
+		 * primary process.
+		 */
+		ret = eal_dev_hotplug_request_to_primary(&req);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL,
+				"Failed to send hotplug request to primary\n");
+			return -ENOMSG;
+		}
+		if (req.result != 0)
+			RTE_LOG(ERR, EAL,
+				"Failed to hotplug add device\n");
+		return req.result;
+	}
+
+	/* attach a shared device from primary start from here: */
+
+	/* primary attach the new device itself. */
+	ret = local_dev_probe(devargs, &dev);
+
+	if (ret != 0) {
+		RTE_LOG(ERR, EAL,
+			"Failed to attach device on primary process\n");
+
+		/**
+		 * it is possible that secondary process failed to attached a
+		 * device that primary process have during initialization,
+		 * so for -EEXIST case, we still need to sync with secondary
+		 * process.
+		 */
+		if (ret != -EEXIST)
+			return ret;
+	}
+
+	/* primary send attach sync request to secondary. */
+	ret = eal_dev_hotplug_request_to_secondary(&req);
+
+	/* if any communication error, we need to rollback. */
+	if (ret != 0) {
+		RTE_LOG(ERR, EAL,
+			"Failed to send hotplug add request to secondary\n");
+		ret = -ENOMSG;
+		goto rollback;
+	}
+
+	/**
+	 * if any secondary failed to attach, we need to consider if rollback
+	 * is necessary.
+	 */
+	if (req.result != 0) {
+		RTE_LOG(ERR, EAL,
+			"Failed to attach device on secondary process\n");
+		ret = req.result;
+
+		/* for -EEXIST, we don't need to rollback. */
+		if (ret == -EEXIST)
+			return ret;
+		goto rollback;
+	}
+
+	return 0;
+
+rollback:
+	req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK;
+
+	/* primary send rollback request to secondary. */
+	if (eal_dev_hotplug_request_to_secondary(&req) != 0)
+		RTE_LOG(WARNING, EAL,
+			"Failed to rollback device attach on secondary."
+			"Devices in secondary may not sync with primary\n");
+
+	/* primary rollback itself. */
+	if (local_dev_remove(dev) != 0)
+		RTE_LOG(WARNING, EAL,
+			"Failed to rollback device attach on primary."
+			"Devices in secondary may not sync with primary\n");
+
+	return ret;
+}
+
+int
+rte_eal_hotplug_remove(const char *busname, const char *devname)
+{
+	struct rte_device *dev;
+	struct rte_bus *bus;
+
+	bus = rte_bus_find_by_name(busname);
+	if (bus == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", busname);
+		return -ENOENT;
+	}
+
+	dev = bus->find_device(NULL, cmp_dev_name, devname);
+	if (dev == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", devname);
+		return -EINVAL;
+	}
+
+	return rte_dev_remove(dev);
+}
+
+/* remove device at local process. */
+int
+local_dev_remove(struct rte_device *dev)
+{
+	int ret;
+
+	if (dev->bus->unplug == NULL) {
+		RTE_LOG(ERR, EAL, "Function unplug not supported by bus (%s)\n",
+			dev->bus->name);
+		return -ENOTSUP;
+	}
+
+	ret = dev->bus->unplug(dev);
+	if (ret) {
+		RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
+			dev->name);
+		return (ret < 0) ? ret : -ENOENT;
+	}
+
+	return 0;
+}
+
+int
+rte_dev_remove(struct rte_device *dev)
+{
+	struct eal_dev_mp_req req;
+	char *devargs;
+	int ret;
+
+	if (!rte_dev_is_probed(dev)) {
+		RTE_LOG(ERR, EAL, "Device is not probed\n");
+		return -ENOENT;
+	}
+
+	ret = build_devargs(dev->bus->name, dev->name, "", &devargs);
+	if (ret != 0)
+		return ret;
+
+	memset(&req, 0, sizeof(req));
+	req.t = EAL_DEV_REQ_TYPE_DETACH;
+	strlcpy(req.devargs, devargs, EAL_DEV_MP_DEV_ARGS_MAX_LEN);
+	free(devargs);
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		/**
+		 * If in secondary process, just send IPC request to
+		 * primary process.
+		 */
+		ret = eal_dev_hotplug_request_to_primary(&req);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL,
+				"Failed to send hotplug request to primary\n");
+			return -ENOMSG;
+		}
+		if (req.result != 0)
+			RTE_LOG(ERR, EAL,
+				"Failed to hotplug remove device\n");
+		return req.result;
+	}
+
+	/* detach a device from primary start from here: */
+
+	/* primary send detach sync request to secondary */
+	ret = eal_dev_hotplug_request_to_secondary(&req);
+
+	/**
+	 * if communication error, we need to rollback, because it is possible
+	 * part of the secondary processes still detached it successfully.
+	 */
+	if (ret != 0) {
+		RTE_LOG(ERR, EAL,
+			"Failed to send device detach request to secondary\n");
+		ret = -ENOMSG;
+		goto rollback;
+	}
+
+	/**
+	 * if any secondary failed to detach, we need to consider if rollback
+	 * is necessary.
+	 */
+	if (req.result != 0) {
+		RTE_LOG(ERR, EAL,
+			"Failed to detach device on secondary process\n");
+		ret = req.result;
+		/**
+		 * if -ENOENT, we don't need to rollback, since devices is
+		 * already detached on secondary process.
+		 */
+		if (ret != -ENOENT)
+			goto rollback;
+	}
+
+	/* primary detach the device itself. */
+	ret = local_dev_remove(dev);
+
+	/* if primary failed, still need to consider if rollback is necessary */
+	if (ret != 0) {
+		RTE_LOG(ERR, EAL,
+			"Failed to detach device on primary process\n");
+		/* if -ENOENT, we don't need to rollback */
+		if (ret == -ENOENT)
+			return ret;
+		goto rollback;
+	}
+
+	return 0;
+
+rollback:
+	req.t = EAL_DEV_REQ_TYPE_DETACH_ROLLBACK;
+
+	/* primary send rollback request to secondary. */
+	if (eal_dev_hotplug_request_to_secondary(&req) != 0)
+		RTE_LOG(WARNING, EAL,
+			"Failed to rollback device detach on secondary."
+			"Devices in secondary may not sync with primary\n");
+
+	return ret;
+}
+
+int
+rte_dev_event_callback_register(const char *device_name,
+				rte_dev_event_cb_fn cb_fn,
+				void *cb_arg)
+{
+	struct dev_event_callback *event_cb;
+	int ret;
+
+	if (!cb_fn)
+		return -EINVAL;
+
+	rte_spinlock_lock(&dev_event_lock);
+
+	if (TAILQ_EMPTY(&dev_event_cbs))
+		TAILQ_INIT(&dev_event_cbs);
+
+	TAILQ_FOREACH(event_cb, &dev_event_cbs, next) {
+		if (event_cb->cb_fn == cb_fn && event_cb->cb_arg == cb_arg) {
+			if (device_name == NULL && event_cb->dev_name == NULL)
+				break;
+			if (device_name == NULL || event_cb->dev_name == NULL)
+				continue;
+			if (!strcmp(event_cb->dev_name, device_name))
+				break;
+		}
+	}
+
+	/* create a new callback. */
+	if (event_cb == NULL) {
+		event_cb = malloc(sizeof(struct dev_event_callback));
+		if (event_cb != NULL) {
+			event_cb->cb_fn = cb_fn;
+			event_cb->cb_arg = cb_arg;
+			event_cb->active = 0;
+			if (!device_name) {
+				event_cb->dev_name = NULL;
+			} else {
+				event_cb->dev_name = strdup(device_name);
+				if (event_cb->dev_name == NULL) {
+					ret = -ENOMEM;
+					goto error;
+				}
+			}
+			TAILQ_INSERT_TAIL(&dev_event_cbs, event_cb, next);
+		} else {
+			RTE_LOG(ERR, EAL,
+				"Failed to allocate memory for device "
+				"event callback.");
+			ret = -ENOMEM;
+			goto error;
+		}
+	} else {
+		RTE_LOG(ERR, EAL,
+			"The callback is already exist, no need "
+			"to register again.\n");
+		ret = -EEXIST;
+	}
+
+	rte_spinlock_unlock(&dev_event_lock);
+	return 0;
+error:
+	free(event_cb);
+	rte_spinlock_unlock(&dev_event_lock);
+	return ret;
+}
+
+int
+rte_dev_event_callback_unregister(const char *device_name,
+				  rte_dev_event_cb_fn cb_fn,
+				  void *cb_arg)
+{
+	int ret = 0;
+	struct dev_event_callback *event_cb, *next;
+
+	if (!cb_fn)
+		return -EINVAL;
+
+	rte_spinlock_lock(&dev_event_lock);
+	/*walk through the callbacks and remove all that match. */
+	for (event_cb = TAILQ_FIRST(&dev_event_cbs); event_cb != NULL;
+	     event_cb = next) {
+
+		next = TAILQ_NEXT(event_cb, next);
+
+		if (device_name != NULL && event_cb->dev_name != NULL) {
+			if (!strcmp(event_cb->dev_name, device_name)) {
+				if (event_cb->cb_fn != cb_fn ||
+				    (cb_arg != (void *)-1 &&
+				    event_cb->cb_arg != cb_arg))
+					continue;
+			}
+		} else if (device_name != NULL) {
+			continue;
+		}
+
+		/*
+		 * if this callback is not executing right now,
+		 * then remove it.
+		 */
+		if (event_cb->active == 0) {
+			TAILQ_REMOVE(&dev_event_cbs, event_cb, next);
+			free(event_cb);
+			ret++;
+		} else {
+			continue;
+		}
+	}
+	rte_spinlock_unlock(&dev_event_lock);
+	return ret;
+}
+
+void
+rte_dev_event_callback_process(const char *device_name,
+			       enum rte_dev_event_type event)
+{
+	struct dev_event_callback *cb_lst;
+
+	if (device_name == NULL)
+		return;
+
+	rte_spinlock_lock(&dev_event_lock);
+
+	TAILQ_FOREACH(cb_lst, &dev_event_cbs, next) {
+		if (cb_lst->dev_name) {
+			if (strcmp(cb_lst->dev_name, device_name))
+				continue;
+		}
+		cb_lst->active = 1;
+		rte_spinlock_unlock(&dev_event_lock);
+		cb_lst->cb_fn(device_name, event,
+				cb_lst->cb_arg);
+		rte_spinlock_lock(&dev_event_lock);
+		cb_lst->active = 0;
+	}
+	rte_spinlock_unlock(&dev_event_lock);
+}
+
+int
+rte_dev_iterator_init(struct rte_dev_iterator *it,
+		      const char *dev_str)
+{
+	struct rte_devargs devargs;
+	struct rte_class *cls = NULL;
+	struct rte_bus *bus = NULL;
+
+	/* Having both bus_str and cls_str NULL is illegal,
+	 * marking this iterator as invalid unless
+	 * everything goes well.
+	 */
+	it->bus_str = NULL;
+	it->cls_str = NULL;
+
+	devargs.data = dev_str;
+	if (rte_devargs_layers_parse(&devargs, dev_str))
+		goto get_out;
+
+	bus = devargs.bus;
+	cls = devargs.cls;
+	/* The string should have at least
+	 * one layer specified.
+	 */
+	if (bus == NULL && cls == NULL) {
+		RTE_LOG(ERR, EAL,
+			"Either bus or class must be specified.\n");
+		rte_errno = EINVAL;
+		goto get_out;
+	}
+	if (bus != NULL && bus->dev_iterate == NULL) {
+		RTE_LOG(ERR, EAL, "Bus %s not supported\n", bus->name);
+		rte_errno = ENOTSUP;
+		goto get_out;
+	}
+	if (cls != NULL && cls->dev_iterate == NULL) {
+		RTE_LOG(ERR, EAL, "Class %s not supported\n", cls->name);
+		rte_errno = ENOTSUP;
+		goto get_out;
+	}
+	it->bus_str = devargs.bus_str;
+	it->cls_str = devargs.cls_str;
+	it->dev_str = dev_str;
+	it->bus = bus;
+	it->cls = cls;
+	it->device = NULL;
+	it->class_device = NULL;
+get_out:
+	return -rte_errno;
+}
+
+static char *
+dev_str_sane_copy(const char *str)
+{
+	size_t end;
+	char *copy;
+
+	end = strcspn(str, ",/");
+	if (str[end] == ',') {
+		copy = strdup(&str[end + 1]);
+	} else {
+		/* '/' or '\0' */
+		copy = strdup("");
+	}
+	if (copy == NULL) {
+		rte_errno = ENOMEM;
+	} else {
+		char *slash;
+
+		slash = strchr(copy, '/');
+		if (slash != NULL)
+			slash[0] = '\0';
+	}
+	return copy;
+}
+
+static int
+class_next_dev_cmp(const struct rte_class *cls,
+		   const void *ctx)
+{
+	struct rte_dev_iterator *it;
+	const char *cls_str = NULL;
+	void *dev;
+
+	if (cls->dev_iterate == NULL)
+		return 1;
+	it = ITCTX(ctx);
+	cls_str = CLSCTX(ctx);
+	dev = it->class_device;
+	/* it->cls_str != NULL means a class
+	 * was specified in the devstr.
+	 */
+	if (it->cls_str != NULL && cls != it->cls)
+		return 1;
+	/* If an error occurred previously,
+	 * no need to test further.
+	 */
+	if (rte_errno != 0)
+		return -1;
+	dev = cls->dev_iterate(dev, cls_str, it);
+	it->class_device = dev;
+	return dev == NULL;
+}
+
+static int
+bus_next_dev_cmp(const struct rte_bus *bus,
+		 const void *ctx)
+{
+	struct rte_device *dev = NULL;
+	struct rte_class *cls = NULL;
+	struct rte_dev_iterator *it;
+	const char *bus_str = NULL;
+
+	if (bus->dev_iterate == NULL)
+		return 1;
+	it = ITCTX(ctx);
+	bus_str = BUSCTX(ctx);
+	dev = it->device;
+	/* it->bus_str != NULL means a bus
+	 * was specified in the devstr.
+	 */
+	if (it->bus_str != NULL && bus != it->bus)
+		return 1;
+	/* If an error occurred previously,
+	 * no need to test further.
+	 */
+	if (rte_errno != 0)
+		return -1;
+	if (it->cls_str == NULL) {
+		dev = bus->dev_iterate(dev, bus_str, it);
+		goto end;
+	}
+	/* cls_str != NULL */
+	if (dev == NULL) {
+next_dev_on_bus:
+		dev = bus->dev_iterate(dev, bus_str, it);
+		it->device = dev;
+	}
+	if (dev == NULL)
+		return 1;
+	if (it->cls != NULL)
+		cls = TAILQ_PREV(it->cls, rte_class_list, next);
+	cls = rte_class_find(cls, class_next_dev_cmp, ctx);
+	if (cls != NULL) {
+		it->cls = cls;
+		goto end;
+	}
+	goto next_dev_on_bus;
+end:
+	it->device = dev;
+	return dev == NULL;
+}
+struct rte_device *
+rte_dev_iterator_next(struct rte_dev_iterator *it)
+{
+	struct rte_bus *bus = NULL;
+	int old_errno = rte_errno;
+	char *bus_str = NULL;
+	char *cls_str = NULL;
+
+	rte_errno = 0;
+	if (it->bus_str == NULL && it->cls_str == NULL) {
+		/* Invalid iterator. */
+		rte_errno = EINVAL;
+		return NULL;
+	}
+	if (it->bus != NULL)
+		bus = TAILQ_PREV(it->bus, rte_bus_list, next);
+	if (it->bus_str != NULL) {
+		bus_str = dev_str_sane_copy(it->bus_str);
+		if (bus_str == NULL)
+			goto out;
+	}
+	if (it->cls_str != NULL) {
+		cls_str = dev_str_sane_copy(it->cls_str);
+		if (cls_str == NULL)
+			goto out;
+	}
+	while ((bus = rte_bus_find(bus, bus_next_dev_cmp,
+				   CTX(it, bus_str, cls_str)))) {
+		if (it->device != NULL) {
+			it->bus = bus;
+			goto out;
+		}
+		if (it->bus_str != NULL ||
+		    rte_errno != 0)
+			break;
+	}
+	if (rte_errno == 0)
+		rte_errno = old_errno;
+out:
+	free(bus_str);
+	free(cls_str);
+	return it->device;
+}
+
+int
+rte_dev_dma_map(struct rte_device *dev, void *addr, uint64_t iova,
+		size_t len)
+{
+	if (dev->bus->dma_map == NULL || len == 0) {
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+	/* Memory must be registered through rte_extmem_* APIs */
+	if (rte_mem_virt2memseg_list(addr) == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	return dev->bus->dma_map(dev, addr, iova, len);
+}
+
+int
+rte_dev_dma_unmap(struct rte_device *dev, void *addr, uint64_t iova,
+		  size_t len)
+{
+	if (dev->bus->dma_unmap == NULL || len == 0) {
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+	/* Memory must be registered through rte_extmem_* APIs */
+	if (rte_mem_virt2memseg_list(addr) == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	return dev->bus->dma_unmap(dev, addr, iova, len);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_devargs.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_devargs.c
new file mode 100644
index 000000000..2123773ef
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_devargs.c
@@ -0,0 +1,403 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2014 6WIND S.A.
+ */
+
+/* This file manages the list of devices and their arguments, as given
+ * by the user at startup
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+
+#include <rte_bus.h>
+#include <rte_class.h>
+#include <rte_compat.h>
+#include <rte_dev.h>
+#include <rte_devargs.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
+#include <rte_tailq.h>
+#include "eal_private.h"
+
+/** user device double-linked queue type definition */
+TAILQ_HEAD(rte_devargs_list, rte_devargs);
+
+/** Global list of user devices */
+static struct rte_devargs_list devargs_list =
+	TAILQ_HEAD_INITIALIZER(devargs_list);
+
+static size_t
+devargs_layer_count(const char *s)
+{
+	size_t i = s ? 1 : 0;
+
+	while (s != NULL && s[0] != '\0') {
+		i += s[0] == '/';
+		s++;
+	}
+	return i;
+}
+
+int
+rte_devargs_layers_parse(struct rte_devargs *devargs,
+			 const char *devstr)
+{
+	struct {
+		const char *key;
+		const char *str;
+		struct rte_kvargs *kvlist;
+	} layers[] = {
+		{ "bus=",    NULL, NULL, },
+		{ "class=",  NULL, NULL, },
+		{ "driver=", NULL, NULL, },
+	};
+	struct rte_kvargs_pair *kv = NULL;
+	struct rte_class *cls = NULL;
+	struct rte_bus *bus = NULL;
+	const char *s = devstr;
+	size_t nblayer;
+	size_t i = 0;
+	int ret = 0;
+
+	/* Split each sub-lists. */
+	nblayer = devargs_layer_count(devstr);
+	if (nblayer > RTE_DIM(layers)) {
+		RTE_LOG(ERR, EAL, "Invalid format: too many layers (%zu)\n",
+			nblayer);
+		ret = -E2BIG;
+		goto get_out;
+	}
+
+	/* If the devargs points the devstr
+	 * as source data, then it should not allocate
+	 * anything and keep referring only to it.
+	 */
+	if (devargs->data != devstr) {
+		devargs->data = strdup(devstr);
+		if (devargs->data == NULL) {
+			RTE_LOG(ERR, EAL, "OOM\n");
+			ret = -ENOMEM;
+			goto get_out;
+		}
+		s = devargs->data;
+	}
+
+	while (s != NULL) {
+		if (i >= RTE_DIM(layers)) {
+			RTE_LOG(ERR, EAL, "Unrecognized layer %s\n", s);
+			ret = -EINVAL;
+			goto get_out;
+		}
+		/*
+		 * The last layer is free-form.
+		 * The "driver" key is not required (but accepted).
+		 */
+		if (strncmp(layers[i].key, s, strlen(layers[i].key)) &&
+				i != RTE_DIM(layers) - 1)
+			goto next_layer;
+		layers[i].str = s;
+		layers[i].kvlist = rte_kvargs_parse_delim(s, NULL, "/");
+		if (layers[i].kvlist == NULL) {
+			RTE_LOG(ERR, EAL, "Could not parse %s\n", s);
+			ret = -EINVAL;
+			goto get_out;
+		}
+		s = strchr(s, '/');
+		if (s != NULL)
+			s++;
+next_layer:
+		i++;
+	}
+
+	/* Parse each sub-list. */
+	for (i = 0; i < RTE_DIM(layers); i++) {
+		if (layers[i].kvlist == NULL)
+			continue;
+		kv = &layers[i].kvlist->pairs[0];
+		if (strcmp(kv->key, "bus") == 0) {
+			bus = rte_bus_find_by_name(kv->value);
+			if (bus == NULL) {
+				RTE_LOG(ERR, EAL, "Could not find bus \"%s\"\n",
+					kv->value);
+				ret = -EFAULT;
+				goto get_out;
+			}
+		} else if (strcmp(kv->key, "class") == 0) {
+			cls = rte_class_find_by_name(kv->value);
+			if (cls == NULL) {
+				RTE_LOG(ERR, EAL, "Could not find class \"%s\"\n",
+					kv->value);
+				ret = -EFAULT;
+				goto get_out;
+			}
+		} else if (strcmp(kv->key, "driver") == 0) {
+			/* Ignore */
+			continue;
+		}
+	}
+
+	/* Fill devargs fields. */
+	devargs->bus_str = layers[0].str;
+	devargs->cls_str = layers[1].str;
+	devargs->drv_str = layers[2].str;
+	devargs->bus = bus;
+	devargs->cls = cls;
+
+	/* If we own the data, clean up a bit
+	 * the several layers string, to ease
+	 * their parsing afterward.
+	 */
+	if (devargs->data != devstr) {
+		char *s = (void *)(intptr_t)(devargs->data);
+
+		while ((s = strchr(s, '/'))) {
+			*s = '\0';
+			s++;
+		}
+	}
+
+get_out:
+	for (i = 0; i < RTE_DIM(layers); i++) {
+		if (layers[i].kvlist)
+			rte_kvargs_free(layers[i].kvlist);
+	}
+	if (ret != 0)
+		rte_errno = -ret;
+	return ret;
+}
+
+static int
+bus_name_cmp(const struct rte_bus *bus, const void *name)
+{
+	return strncmp(bus->name, name, strlen(bus->name));
+}
+
+int
+rte_devargs_parse(struct rte_devargs *da, const char *dev)
+{
+	struct rte_bus *bus = NULL;
+	const char *devname;
+	const size_t maxlen = sizeof(da->name);
+	size_t i;
+
+	if (da == NULL)
+		return -EINVAL;
+
+	/* Retrieve eventual bus info */
+	do {
+		devname = dev;
+		bus = rte_bus_find(bus, bus_name_cmp, dev);
+		if (bus == NULL)
+			break;
+		devname = dev + strlen(bus->name) + 1;
+		if (rte_bus_find_by_device_name(devname) == bus)
+			break;
+	} while (1);
+	/* Store device name */
+	i = 0;
+	while (devname[i] != '\0' && devname[i] != ',') {
+		da->name[i] = devname[i];
+		i++;
+		if (i == maxlen) {
+			RTE_LOG(WARNING, EAL, "Parsing \"%s\": device name should be shorter than %zu\n",
+				dev, maxlen);
+			da->name[i - 1] = '\0';
+			return -EINVAL;
+		}
+	}
+	da->name[i] = '\0';
+	if (bus == NULL) {
+		bus = rte_bus_find_by_device_name(da->name);
+		if (bus == NULL) {
+			RTE_LOG(ERR, EAL, "failed to parse device \"%s\"\n",
+				da->name);
+			return -EFAULT;
+		}
+	}
+	da->bus = bus;
+	/* Parse eventual device arguments */
+	if (devname[i] == ',')
+		da->args = strdup(&devname[i + 1]);
+	else
+		da->args = strdup("");
+	if (da->args == NULL) {
+		RTE_LOG(ERR, EAL, "not enough memory to parse arguments\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+int
+rte_devargs_parsef(struct rte_devargs *da, const char *format, ...)
+{
+	va_list ap;
+	size_t len;
+	char *dev;
+	int ret;
+
+	if (da == NULL)
+		return -EINVAL;
+
+	va_start(ap, format);
+	len = vsnprintf(NULL, 0, format, ap);
+	va_end(ap);
+
+	dev = calloc(1, len + 1);
+	if (dev == NULL) {
+		RTE_LOG(ERR, EAL, "not enough memory to parse device\n");
+		return -ENOMEM;
+	}
+
+	va_start(ap, format);
+	vsnprintf(dev, len + 1, format, ap);
+	va_end(ap);
+
+	ret = rte_devargs_parse(da, dev);
+
+	free(dev);
+	return ret;
+}
+
+int
+rte_devargs_insert(struct rte_devargs **da)
+{
+	struct rte_devargs *listed_da;
+	void *tmp;
+
+	if (*da == NULL || (*da)->bus == NULL)
+		return -1;
+
+	TAILQ_FOREACH_SAFE(listed_da, &devargs_list, next, tmp) {
+		if (listed_da == *da)
+			/* devargs already in the list */
+			return 0;
+		if (strcmp(listed_da->bus->name, (*da)->bus->name) == 0 &&
+				strcmp(listed_da->name, (*da)->name) == 0) {
+			/* device already in devargs list, must be updated */
+			listed_da->type = (*da)->type;
+			listed_da->policy = (*da)->policy;
+			free(listed_da->args);
+			listed_da->args = (*da)->args;
+			listed_da->bus = (*da)->bus;
+			listed_da->cls = (*da)->cls;
+			listed_da->bus_str = (*da)->bus_str;
+			listed_da->cls_str = (*da)->cls_str;
+			listed_da->data = (*da)->data;
+			/* replace provided devargs with found one */
+			free(*da);
+			*da = listed_da;
+			return 0;
+		}
+	}
+	/* new device in the list */
+	TAILQ_INSERT_TAIL(&devargs_list, *da, next);
+	return 0;
+}
+
+/* store a whitelist parameter for later parsing */
+int
+rte_devargs_add(enum rte_devtype devtype, const char *devargs_str)
+{
+	struct rte_devargs *devargs = NULL;
+	struct rte_bus *bus = NULL;
+	const char *dev = devargs_str;
+
+	/* use calloc instead of rte_zmalloc as it's called early at init */
+	devargs = calloc(1, sizeof(*devargs));
+	if (devargs == NULL)
+		goto fail;
+
+	if (rte_devargs_parse(devargs, dev))
+		goto fail;
+	devargs->type = devtype;
+	bus = devargs->bus;
+	if (devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI)
+		devargs->policy = RTE_DEV_BLACKLISTED;
+	if (bus->conf.scan_mode == RTE_BUS_SCAN_UNDEFINED) {
+		if (devargs->policy == RTE_DEV_WHITELISTED)
+			bus->conf.scan_mode = RTE_BUS_SCAN_WHITELIST;
+		else if (devargs->policy == RTE_DEV_BLACKLISTED)
+			bus->conf.scan_mode = RTE_BUS_SCAN_BLACKLIST;
+	}
+	TAILQ_INSERT_TAIL(&devargs_list, devargs, next);
+	return 0;
+
+fail:
+	if (devargs) {
+		free(devargs->args);
+		free(devargs);
+	}
+
+	return -1;
+}
+
+int
+rte_devargs_remove(struct rte_devargs *devargs)
+{
+	struct rte_devargs *d;
+	void *tmp;
+
+	if (devargs == NULL || devargs->bus == NULL)
+		return -1;
+
+	TAILQ_FOREACH_SAFE(d, &devargs_list, next, tmp) {
+		if (strcmp(d->bus->name, devargs->bus->name) == 0 &&
+		    strcmp(d->name, devargs->name) == 0) {
+			TAILQ_REMOVE(&devargs_list, d, next);
+			free(d->args);
+			free(d);
+			return 0;
+		}
+	}
+	return 1;
+}
+
+/* count the number of devices of a specified type */
+unsigned int
+rte_devargs_type_count(enum rte_devtype devtype)
+{
+	struct rte_devargs *devargs;
+	unsigned int count = 0;
+
+	TAILQ_FOREACH(devargs, &devargs_list, next) {
+		if (devargs->type != devtype)
+			continue;
+		count++;
+	}
+	return count;
+}
+
+/* dump the user devices on the console */
+void
+rte_devargs_dump(FILE *f)
+{
+	struct rte_devargs *devargs;
+
+	fprintf(f, "User device list:\n");
+	TAILQ_FOREACH(devargs, &devargs_list, next) {
+		fprintf(f, "  [%s]: %s %s\n",
+			(devargs->bus ? devargs->bus->name : "??"),
+			devargs->name, devargs->args);
+	}
+}
+
+/* bus-aware rte_devargs iterator. */
+struct rte_devargs *
+rte_devargs_next(const char *busname, const struct rte_devargs *start)
+{
+	struct rte_devargs *da;
+
+	if (start != NULL)
+		da = TAILQ_NEXT(start, next);
+	else
+		da = TAILQ_FIRST(&devargs_list);
+	while (da != NULL) {
+		if (busname == NULL ||
+		    (strcmp(busname, da->bus->name) == 0))
+			return da;
+		da = TAILQ_NEXT(da, next);
+	}
+	return NULL;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_errno.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_errno.c
new file mode 100644
index 000000000..2a10fb823
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_errno.c
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+/* Use XSI-compliant portable version of strerror_r() */
+#undef _GNU_SOURCE
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+
+#include <rte_per_lcore.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+
+RTE_DEFINE_PER_LCORE(int, _rte_errno);
+
+const char *
+rte_strerror(int errnum)
+{
+	/* BSD puts a colon in the "unknown error" messages, Linux doesn't */
+#ifdef RTE_EXEC_ENV_FREEBSD
+	static const char *sep = ":";
+#else
+	static const char *sep = "";
+#endif
+#define RETVAL_SZ 256
+	static RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
+	char *ret = RTE_PER_LCORE(retval);
+
+	/* since some implementations of strerror_r throw an error
+	 * themselves if errnum is too big, we handle that case here */
+	if (errnum >= RTE_MAX_ERRNO)
+		snprintf(ret, RETVAL_SZ, "Unknown error%s %d", sep, errnum);
+	else
+		switch (errnum){
+		case E_RTE_SECONDARY:
+			return "Invalid call in secondary process";
+		case E_RTE_NO_CONFIG:
+			return "Missing rte_config structure";
+		default:
+			if (strerror_r(errnum, ret, RETVAL_SZ) != 0)
+				snprintf(ret, RETVAL_SZ, "Unknown error%s %d",
+						sep, errnum);
+		}
+
+	return ret;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_fbarray.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_fbarray.c
new file mode 100644
index 000000000..4f8f1af73
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_fbarray.c
@@ -0,0 +1,1510 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <sys/mman.h>
+#include <stdint.h>
+#include <errno.h>
+#include <sys/file.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_errno.h>
+#include <rte_spinlock.h>
+#include <rte_tailq.h>
+
+#include "eal_filesystem.h"
+#include "eal_private.h"
+
+#include "rte_fbarray.h"
+
+#define MASK_SHIFT 6ULL
+#define MASK_ALIGN (1ULL << MASK_SHIFT)
+#define MASK_LEN_TO_IDX(x) ((x) >> MASK_SHIFT)
+#define MASK_LEN_TO_MOD(x) ((x) - RTE_ALIGN_FLOOR(x, MASK_ALIGN))
+#define MASK_GET_IDX(idx, mod) ((idx << MASK_SHIFT) + mod)
+
+/*
+ * We use this to keep track of created/attached memory areas to prevent user
+ * errors in API usage.
+ */
+struct mem_area {
+	TAILQ_ENTRY(mem_area) next;
+	void *addr;
+	size_t len;
+	int fd;
+};
+TAILQ_HEAD(mem_area_head, mem_area);
+/* local per-process tailq */
+static struct mem_area_head mem_area_tailq =
+	TAILQ_HEAD_INITIALIZER(mem_area_tailq);
+static rte_spinlock_t mem_area_lock = RTE_SPINLOCK_INITIALIZER;
+
+/*
+ * This is a mask that is always stored at the end of array, to provide fast
+ * way of finding free/used spots without looping through each element.
+ */
+
+struct used_mask {
+	unsigned int n_masks;
+	uint64_t data[];
+};
+
+static size_t
+calc_mask_size(unsigned int len)
+{
+	/* mask must be multiple of MASK_ALIGN, even though length of array
+	 * itself may not be aligned on that boundary.
+	 */
+	len = RTE_ALIGN_CEIL(len, MASK_ALIGN);
+	return sizeof(struct used_mask) +
+			sizeof(uint64_t) * MASK_LEN_TO_IDX(len);
+}
+
+static size_t
+calc_data_size(size_t page_sz, unsigned int elt_sz, unsigned int len)
+{
+	size_t data_sz = elt_sz * len;
+	size_t msk_sz = calc_mask_size(len);
+	return RTE_ALIGN_CEIL(data_sz + msk_sz, page_sz);
+}
+
+static struct used_mask *
+get_used_mask(void *data, unsigned int elt_sz, unsigned int len)
+{
+	return (struct used_mask *) RTE_PTR_ADD(data, elt_sz * len);
+}
+
+static int
+resize_and_map(int fd, void *addr, size_t len)
+{
+	char path[PATH_MAX];
+	void *map_addr;
+
+	if (ftruncate(fd, len)) {
+		RTE_LOG(ERR, EAL, "Cannot truncate %s\n", path);
+		/* pass errno up the chain */
+		rte_errno = errno;
+		return -1;
+	}
+
+	map_addr = mmap(addr, len, PROT_READ | PROT_WRITE,
+			MAP_SHARED | MAP_FIXED, fd, 0);
+	if (map_addr != addr) {
+		RTE_LOG(ERR, EAL, "mmap() failed: %s\n", strerror(errno));
+		/* pass errno up the chain */
+		rte_errno = errno;
+		return -1;
+	}
+	return 0;
+}
+
+static int
+overlap(const struct mem_area *ma, const void *start, size_t len)
+{
+	const void *end = RTE_PTR_ADD(start, len);
+	const void *ma_start = ma->addr;
+	const void *ma_end = RTE_PTR_ADD(ma->addr, ma->len);
+
+	/* start overlap? */
+	if (start >= ma_start && start < ma_end)
+		return 1;
+	/* end overlap? */
+	if (end >= ma_start && end < ma_end)
+		return 1;
+	return 0;
+}
+
+static int
+find_next_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
+	    bool used)
+{
+	const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+			arr->len);
+	unsigned int msk_idx, lookahead_idx, first, first_mod;
+	unsigned int last, last_mod;
+	uint64_t last_msk, ignore_msk;
+
+	/*
+	 * mask only has granularity of MASK_ALIGN, but start may not be aligned
+	 * on that boundary, so construct a special mask to exclude anything we
+	 * don't want to see to avoid confusing ctz.
+	 */
+	first = MASK_LEN_TO_IDX(start);
+	first_mod = MASK_LEN_TO_MOD(start);
+	ignore_msk = ~((1ULL << first_mod) - 1);
+
+	/* array length may not be aligned, so calculate ignore mask for last
+	 * mask index.
+	 */
+	last = MASK_LEN_TO_IDX(arr->len);
+	last_mod = MASK_LEN_TO_MOD(arr->len);
+	last_msk = ~(-1ULL << last_mod);
+
+	for (msk_idx = first; msk_idx < msk->n_masks; msk_idx++) {
+		uint64_t cur_msk, lookahead_msk;
+		unsigned int run_start, clz, left;
+		bool found = false;
+		/*
+		 * The process of getting n consecutive bits for arbitrary n is
+		 * a bit involved, but here it is in a nutshell:
+		 *
+		 *  1. let n be the number of consecutive bits we're looking for
+		 *  2. check if n can fit in one mask, and if so, do n-1
+		 *     rshift-ands to see if there is an appropriate run inside
+		 *     our current mask
+		 *    2a. if we found a run, bail out early
+		 *    2b. if we didn't find a run, proceed
+		 *  3. invert the mask and count leading zeroes (that is, count
+		 *     how many consecutive set bits we had starting from the
+		 *     end of current mask) as k
+		 *    3a. if k is 0, continue to next mask
+		 *    3b. if k is not 0, we have a potential run
+		 *  4. to satisfy our requirements, next mask must have n-k
+		 *     consecutive set bits right at the start, so we will do
+		 *     (n-k-1) rshift-ands and check if first bit is set.
+		 *
+		 * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until
+		 * we either run out of masks, lose the run, or find what we
+		 * were looking for.
+		 */
+		cur_msk = msk->data[msk_idx];
+		left = n;
+
+		/* if we're looking for free spaces, invert the mask */
+		if (!used)
+			cur_msk = ~cur_msk;
+
+		/* combine current ignore mask with last index ignore mask */
+		if (msk_idx == last)
+			ignore_msk |= last_msk;
+
+		/* if we have an ignore mask, ignore once */
+		if (ignore_msk) {
+			cur_msk &= ignore_msk;
+			ignore_msk = 0;
+		}
+
+		/* if n can fit in within a single mask, do a search */
+		if (n <= MASK_ALIGN) {
+			uint64_t tmp_msk = cur_msk;
+			unsigned int s_idx;
+			for (s_idx = 0; s_idx < n - 1; s_idx++)
+				tmp_msk &= tmp_msk >> 1ULL;
+			/* we found what we were looking for */
+			if (tmp_msk != 0) {
+				run_start = __builtin_ctzll(tmp_msk);
+				return MASK_GET_IDX(msk_idx, run_start);
+			}
+		}
+
+		/*
+		 * we didn't find our run within the mask, or n > MASK_ALIGN,
+		 * so we're going for plan B.
+		 */
+
+		/* count leading zeroes on inverted mask */
+		if (~cur_msk == 0)
+			clz = sizeof(cur_msk) * 8;
+		else
+			clz = __builtin_clzll(~cur_msk);
+
+		/* if there aren't any runs at the end either, just continue */
+		if (clz == 0)
+			continue;
+
+		/* we have a partial run at the end, so try looking ahead */
+		run_start = MASK_ALIGN - clz;
+		left -= clz;
+
+		for (lookahead_idx = msk_idx + 1; lookahead_idx < msk->n_masks;
+				lookahead_idx++) {
+			unsigned int s_idx, need;
+			lookahead_msk = msk->data[lookahead_idx];
+
+			/* if we're looking for free space, invert the mask */
+			if (!used)
+				lookahead_msk = ~lookahead_msk;
+
+			/* figure out how many consecutive bits we need here */
+			need = RTE_MIN(left, MASK_ALIGN);
+
+			for (s_idx = 0; s_idx < need - 1; s_idx++)
+				lookahead_msk &= lookahead_msk >> 1ULL;
+
+			/* if first bit is not set, we've lost the run */
+			if ((lookahead_msk & 1) == 0) {
+				/*
+				 * we've scanned this far, so we know there are
+				 * no runs in the space we've lookahead-scanned
+				 * as well, so skip that on next iteration.
+				 */
+				ignore_msk = ~((1ULL << need) - 1);
+				msk_idx = lookahead_idx;
+				break;
+			}
+
+			left -= need;
+
+			/* check if we've found what we were looking for */
+			if (left == 0) {
+				found = true;
+				break;
+			}
+		}
+
+		/* we didn't find anything, so continue */
+		if (!found)
+			continue;
+
+		return MASK_GET_IDX(msk_idx, run_start);
+	}
+	/* we didn't find anything */
+	rte_errno = used ? ENOENT : ENOSPC;
+	return -1;
+}
+
+static int
+find_next(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+	const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+			arr->len);
+	unsigned int idx, first, first_mod;
+	unsigned int last, last_mod;
+	uint64_t last_msk, ignore_msk;
+
+	/*
+	 * mask only has granularity of MASK_ALIGN, but start may not be aligned
+	 * on that boundary, so construct a special mask to exclude anything we
+	 * don't want to see to avoid confusing ctz.
+	 */
+	first = MASK_LEN_TO_IDX(start);
+	first_mod = MASK_LEN_TO_MOD(start);
+	ignore_msk = ~((1ULL << first_mod) - 1ULL);
+
+	/* array length may not be aligned, so calculate ignore mask for last
+	 * mask index.
+	 */
+	last = MASK_LEN_TO_IDX(arr->len);
+	last_mod = MASK_LEN_TO_MOD(arr->len);
+	last_msk = ~(-(1ULL) << last_mod);
+
+	for (idx = first; idx < msk->n_masks; idx++) {
+		uint64_t cur = msk->data[idx];
+		int found;
+
+		/* if we're looking for free entries, invert mask */
+		if (!used)
+			cur = ~cur;
+
+		if (idx == last)
+			cur &= last_msk;
+
+		/* ignore everything before start on first iteration */
+		if (idx == first)
+			cur &= ignore_msk;
+
+		/* check if we have any entries */
+		if (cur == 0)
+			continue;
+
+		/*
+		 * find first set bit - that will correspond to whatever it is
+		 * that we're looking for.
+		 */
+		found = __builtin_ctzll(cur);
+		return MASK_GET_IDX(idx, found);
+	}
+	/* we didn't find anything */
+	rte_errno = used ? ENOENT : ENOSPC;
+	return -1;
+}
+
+static int
+find_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+	const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+			arr->len);
+	unsigned int idx, first, first_mod;
+	unsigned int last, last_mod;
+	uint64_t last_msk;
+	unsigned int need_len, result = 0;
+
+	/* array length may not be aligned, so calculate ignore mask for last
+	 * mask index.
+	 */
+	last = MASK_LEN_TO_IDX(arr->len);
+	last_mod = MASK_LEN_TO_MOD(arr->len);
+	last_msk = ~(-(1ULL) << last_mod);
+
+	first = MASK_LEN_TO_IDX(start);
+	first_mod = MASK_LEN_TO_MOD(start);
+	for (idx = first; idx < msk->n_masks; idx++, result += need_len) {
+		uint64_t cur = msk->data[idx];
+		unsigned int run_len;
+
+		need_len = MASK_ALIGN;
+
+		/* if we're looking for free entries, invert mask */
+		if (!used)
+			cur = ~cur;
+
+		/* if this is last mask, ignore everything after last bit */
+		if (idx == last)
+			cur &= last_msk;
+
+		/* ignore everything before start on first iteration */
+		if (idx == first) {
+			cur >>= first_mod;
+			/* at the start, we don't need the full mask len */
+			need_len -= first_mod;
+		}
+
+		/* we will be looking for zeroes, so invert the mask */
+		cur = ~cur;
+
+		/* if mask is zero, we have a complete run */
+		if (cur == 0)
+			continue;
+
+		/*
+		 * see if current run ends before mask end.
+		 */
+		run_len = __builtin_ctzll(cur);
+
+		/* add however many zeroes we've had in the last run and quit */
+		if (run_len < need_len) {
+			result += run_len;
+			break;
+		}
+	}
+	return result;
+}
+
+static int
+find_prev_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
+		bool used)
+{
+	const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+			arr->len);
+	unsigned int msk_idx, lookbehind_idx, first, first_mod;
+	uint64_t ignore_msk;
+
+	/*
+	 * mask only has granularity of MASK_ALIGN, but start may not be aligned
+	 * on that boundary, so construct a special mask to exclude anything we
+	 * don't want to see to avoid confusing ctz.
+	 */
+	first = MASK_LEN_TO_IDX(start);
+	first_mod = MASK_LEN_TO_MOD(start);
+	/* we're going backwards, so mask must start from the top */
+	ignore_msk = first_mod == MASK_ALIGN - 1 ?
+				-1ULL : /* prevent overflow */
+				~(-1ULL << (first_mod + 1));
+
+	/* go backwards, include zero */
+	msk_idx = first;
+	do {
+		uint64_t cur_msk, lookbehind_msk;
+		unsigned int run_start, run_end, ctz, left;
+		bool found = false;
+		/*
+		 * The process of getting n consecutive bits from the top for
+		 * arbitrary n is a bit involved, but here it is in a nutshell:
+		 *
+		 *  1. let n be the number of consecutive bits we're looking for
+		 *  2. check if n can fit in one mask, and if so, do n-1
+		 *     lshift-ands to see if there is an appropriate run inside
+		 *     our current mask
+		 *    2a. if we found a run, bail out early
+		 *    2b. if we didn't find a run, proceed
+		 *  3. invert the mask and count trailing zeroes (that is, count
+		 *     how many consecutive set bits we had starting from the
+		 *     start of current mask) as k
+		 *    3a. if k is 0, continue to next mask
+		 *    3b. if k is not 0, we have a potential run
+		 *  4. to satisfy our requirements, next mask must have n-k
+		 *     consecutive set bits at the end, so we will do (n-k-1)
+		 *     lshift-ands and check if last bit is set.
+		 *
+		 * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until
+		 * we either run out of masks, lose the run, or find what we
+		 * were looking for.
+		 */
+		cur_msk = msk->data[msk_idx];
+		left = n;
+
+		/* if we're looking for free spaces, invert the mask */
+		if (!used)
+			cur_msk = ~cur_msk;
+
+		/* if we have an ignore mask, ignore once */
+		if (ignore_msk) {
+			cur_msk &= ignore_msk;
+			ignore_msk = 0;
+		}
+
+		/* if n can fit in within a single mask, do a search */
+		if (n <= MASK_ALIGN) {
+			uint64_t tmp_msk = cur_msk;
+			unsigned int s_idx;
+			for (s_idx = 0; s_idx < n - 1; s_idx++)
+				tmp_msk &= tmp_msk << 1ULL;
+			/* we found what we were looking for */
+			if (tmp_msk != 0) {
+				/* clz will give us offset from end of mask, and
+				 * we only get the end of our run, not start,
+				 * so adjust result to point to where start
+				 * would have been.
+				 */
+				run_start = MASK_ALIGN -
+						__builtin_clzll(tmp_msk) - n;
+				return MASK_GET_IDX(msk_idx, run_start);
+			}
+		}
+
+		/*
+		 * we didn't find our run within the mask, or n > MASK_ALIGN,
+		 * so we're going for plan B.
+		 */
+
+		/* count trailing zeroes on inverted mask */
+		if (~cur_msk == 0)
+			ctz = sizeof(cur_msk) * 8;
+		else
+			ctz = __builtin_ctzll(~cur_msk);
+
+		/* if there aren't any runs at the start either, just
+		 * continue
+		 */
+		if (ctz == 0)
+			continue;
+
+		/* we have a partial run at the start, so try looking behind */
+		run_end = MASK_GET_IDX(msk_idx, ctz);
+		left -= ctz;
+
+		/* go backwards, include zero */
+		lookbehind_idx = msk_idx - 1;
+
+		/* we can't lookbehind as we've run out of masks, so stop */
+		if (msk_idx == 0)
+			break;
+
+		do {
+			const uint64_t last_bit = 1ULL << (MASK_ALIGN - 1);
+			unsigned int s_idx, need;
+
+			lookbehind_msk = msk->data[lookbehind_idx];
+
+			/* if we're looking for free space, invert the mask */
+			if (!used)
+				lookbehind_msk = ~lookbehind_msk;
+
+			/* figure out how many consecutive bits we need here */
+			need = RTE_MIN(left, MASK_ALIGN);
+
+			for (s_idx = 0; s_idx < need - 1; s_idx++)
+				lookbehind_msk &= lookbehind_msk << 1ULL;
+
+			/* if last bit is not set, we've lost the run */
+			if ((lookbehind_msk & last_bit) == 0) {
+				/*
+				 * we've scanned this far, so we know there are
+				 * no runs in the space we've lookbehind-scanned
+				 * as well, so skip that on next iteration.
+				 */
+				ignore_msk = -1ULL << need;
+				msk_idx = lookbehind_idx;
+				break;
+			}
+
+			left -= need;
+
+			/* check if we've found what we were looking for */
+			if (left == 0) {
+				found = true;
+				break;
+			}
+		} while ((lookbehind_idx--) != 0); /* decrement after check to
+						    * include zero
+						    */
+
+		/* we didn't find anything, so continue */
+		if (!found)
+			continue;
+
+		/* we've found what we were looking for, but we only know where
+		 * the run ended, so calculate start position.
+		 */
+		return run_end - n;
+	} while (msk_idx-- != 0); /* decrement after check to include zero */
+	/* we didn't find anything */
+	rte_errno = used ? ENOENT : ENOSPC;
+	return -1;
+}
+
+static int
+find_prev(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+	const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+			arr->len);
+	unsigned int idx, first, first_mod;
+	uint64_t ignore_msk;
+
+	/*
+	 * mask only has granularity of MASK_ALIGN, but start may not be aligned
+	 * on that boundary, so construct a special mask to exclude anything we
+	 * don't want to see to avoid confusing clz.
+	 */
+	first = MASK_LEN_TO_IDX(start);
+	first_mod = MASK_LEN_TO_MOD(start);
+	/* we're going backwards, so mask must start from the top */
+	ignore_msk = first_mod == MASK_ALIGN - 1 ?
+				-1ULL : /* prevent overflow */
+				~(-1ULL << (first_mod + 1));
+
+	/* go backwards, include zero */
+	idx = first;
+	do {
+		uint64_t cur = msk->data[idx];
+		int found;
+
+		/* if we're looking for free entries, invert mask */
+		if (!used)
+			cur = ~cur;
+
+		/* ignore everything before start on first iteration */
+		if (idx == first)
+			cur &= ignore_msk;
+
+		/* check if we have any entries */
+		if (cur == 0)
+			continue;
+
+		/*
+		 * find last set bit - that will correspond to whatever it is
+		 * that we're looking for. we're counting trailing zeroes, thus
+		 * the value we get is counted from end of mask, so calculate
+		 * position from start of mask.
+		 */
+		found = MASK_ALIGN - __builtin_clzll(cur) - 1;
+
+		return MASK_GET_IDX(idx, found);
+	} while (idx-- != 0); /* decrement after check  to include zero*/
+
+	/* we didn't find anything */
+	rte_errno = used ? ENOENT : ENOSPC;
+	return -1;
+}
+
+static int
+find_rev_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+	const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+			arr->len);
+	unsigned int idx, first, first_mod;
+	unsigned int need_len, result = 0;
+
+	first = MASK_LEN_TO_IDX(start);
+	first_mod = MASK_LEN_TO_MOD(start);
+
+	/* go backwards, include zero */
+	idx = first;
+	do {
+		uint64_t cur = msk->data[idx];
+		unsigned int run_len;
+
+		need_len = MASK_ALIGN;
+
+		/* if we're looking for free entries, invert mask */
+		if (!used)
+			cur = ~cur;
+
+		/* ignore everything after start on first iteration */
+		if (idx == first) {
+			unsigned int end_len = MASK_ALIGN - first_mod - 1;
+			cur <<= end_len;
+			/* at the start, we don't need the full mask len */
+			need_len -= end_len;
+		}
+
+		/* we will be looking for zeroes, so invert the mask */
+		cur = ~cur;
+
+		/* if mask is zero, we have a complete run */
+		if (cur == 0)
+			goto endloop;
+
+		/*
+		 * see where run ends, starting from the end.
+		 */
+		run_len = __builtin_clzll(cur);
+
+		/* add however many zeroes we've had in the last run and quit */
+		if (run_len < need_len) {
+			result += run_len;
+			break;
+		}
+endloop:
+		result += need_len;
+	} while (idx-- != 0); /* decrement after check to include zero */
+	return result;
+}
+
+static int
+set_used(struct rte_fbarray *arr, unsigned int idx, bool used)
+{
+	struct used_mask *msk;
+	uint64_t msk_bit = 1ULL << MASK_LEN_TO_MOD(idx);
+	unsigned int msk_idx = MASK_LEN_TO_IDX(idx);
+	bool already_used;
+	int ret = -1;
+
+	if (arr == NULL || idx >= arr->len) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
+	ret = 0;
+
+	/* prevent array from changing under us */
+	rte_rwlock_write_lock(&arr->rwlock);
+
+	already_used = (msk->data[msk_idx] & msk_bit) != 0;
+
+	/* nothing to be done */
+	if (used == already_used)
+		goto out;
+
+	if (used) {
+		msk->data[msk_idx] |= msk_bit;
+		arr->count++;
+	} else {
+		msk->data[msk_idx] &= ~msk_bit;
+		arr->count--;
+	}
+out:
+	rte_rwlock_write_unlock(&arr->rwlock);
+
+	return ret;
+}
+
+static int
+fully_validate(const char *name, unsigned int elt_sz, unsigned int len)
+{
+	if (name == NULL || elt_sz == 0 || len == 0 || len > INT_MAX) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	if (strnlen(name, RTE_FBARRAY_NAME_LEN) == RTE_FBARRAY_NAME_LEN) {
+		rte_errno = ENAMETOOLONG;
+		return -1;
+	}
+	return 0;
+}
+
+int
+rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
+		unsigned int elt_sz)
+{
+	size_t page_sz, mmap_len;
+	char path[PATH_MAX];
+	struct used_mask *msk;
+	struct mem_area *ma = NULL;
+	void *data = NULL;
+	int fd = -1;
+
+	if (arr == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	if (fully_validate(name, elt_sz, len))
+		return -1;
+
+	/* allocate mem area before doing anything */
+	ma = malloc(sizeof(*ma));
+	if (ma == NULL) {
+		rte_errno = ENOMEM;
+		return -1;
+	}
+
+	page_sz = sysconf(_SC_PAGESIZE);
+	if (page_sz == (size_t)-1) {
+		free(ma);
+		return -1;
+	}
+
+	/* calculate our memory limits */
+	mmap_len = calc_data_size(page_sz, elt_sz, len);
+
+	data = eal_get_virtual_area(NULL, &mmap_len, page_sz, 0, 0);
+	if (data == NULL) {
+		free(ma);
+		return -1;
+	}
+
+	rte_spinlock_lock(&mem_area_lock);
+
+	fd = -1;
+
+	if (internal_config.no_shconf) {
+		/* remap virtual area as writable */
+		void *new_data = mmap(data, mmap_len, PROT_READ | PROT_WRITE,
+				MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, fd, 0);
+		if (new_data == MAP_FAILED) {
+			RTE_LOG(DEBUG, EAL, "%s(): couldn't remap anonymous memory: %s\n",
+					__func__, strerror(errno));
+			goto fail;
+		}
+	} else {
+		eal_get_fbarray_path(path, sizeof(path), name);
+
+		/*
+		 * Each fbarray is unique to process namespace, i.e. the
+		 * filename depends on process prefix. Try to take out a lock
+		 * and see if we succeed. If we don't, someone else is using it
+		 * already.
+		 */
+		fd = open(path, O_CREAT | O_RDWR, 0600);
+		if (fd < 0) {
+			RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n",
+					__func__, path, strerror(errno));
+			rte_errno = errno;
+			goto fail;
+		} else if (flock(fd, LOCK_EX | LOCK_NB)) {
+			RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n",
+					__func__, path, strerror(errno));
+			rte_errno = EBUSY;
+			goto fail;
+		}
+
+		/* take out a non-exclusive lock, so that other processes could
+		 * still attach to it, but no other process could reinitialize
+		 * it.
+		 */
+		if (flock(fd, LOCK_SH | LOCK_NB)) {
+			rte_errno = errno;
+			goto fail;
+		}
+
+		if (resize_and_map(fd, data, mmap_len))
+			goto fail;
+	}
+	ma->addr = data;
+	ma->len = mmap_len;
+	ma->fd = fd;
+
+	/* do not close fd - keep it until detach/destroy */
+	TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next);
+
+	/* initialize the data */
+	memset(data, 0, mmap_len);
+
+	/* populate data structure */
+	strlcpy(arr->name, name, sizeof(arr->name));
+	arr->data = data;
+	arr->len = len;
+	arr->elt_sz = elt_sz;
+	arr->count = 0;
+
+	msk = get_used_mask(data, elt_sz, len);
+	msk->n_masks = MASK_LEN_TO_IDX(RTE_ALIGN_CEIL(len, MASK_ALIGN));
+
+	rte_rwlock_init(&arr->rwlock);
+
+	rte_spinlock_unlock(&mem_area_lock);
+
+	return 0;
+fail:
+	if (data)
+		munmap(data, mmap_len);
+	if (fd >= 0)
+		close(fd);
+	free(ma);
+
+	rte_spinlock_unlock(&mem_area_lock);
+	return -1;
+}
+
+int
+rte_fbarray_attach(struct rte_fbarray *arr)
+{
+	struct mem_area *ma = NULL, *tmp = NULL;
+	size_t page_sz, mmap_len;
+	char path[PATH_MAX];
+	void *data = NULL;
+	int fd = -1;
+
+	if (arr == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/*
+	 * we don't need to synchronize attach as two values we need (element
+	 * size and array length) are constant for the duration of life of
+	 * the array, so the parts we care about will not race.
+	 */
+
+	if (fully_validate(arr->name, arr->elt_sz, arr->len))
+		return -1;
+
+	ma = malloc(sizeof(*ma));
+	if (ma == NULL) {
+		rte_errno = ENOMEM;
+		return -1;
+	}
+
+	page_sz = sysconf(_SC_PAGESIZE);
+	if (page_sz == (size_t)-1) {
+		free(ma);
+		return -1;
+	}
+
+	mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len);
+
+	/* check the tailq - maybe user has already mapped this address space */
+	rte_spinlock_lock(&mem_area_lock);
+
+	TAILQ_FOREACH(tmp, &mem_area_tailq, next) {
+		if (overlap(tmp, arr->data, mmap_len)) {
+			rte_errno = EEXIST;
+			goto fail;
+		}
+	}
+
+	/* we know this memory area is unique, so proceed */
+
+	data = eal_get_virtual_area(arr->data, &mmap_len, page_sz, 0, 0);
+	if (data == NULL)
+		goto fail;
+
+	eal_get_fbarray_path(path, sizeof(path), arr->name);
+
+	fd = open(path, O_RDWR);
+	if (fd < 0) {
+		rte_errno = errno;
+		goto fail;
+	}
+
+	/* lock the file, to let others know we're using it */
+	if (flock(fd, LOCK_SH | LOCK_NB)) {
+		rte_errno = errno;
+		goto fail;
+	}
+
+	if (resize_and_map(fd, data, mmap_len))
+		goto fail;
+
+	/* store our new memory area */
+	ma->addr = data;
+	ma->fd = fd; /* keep fd until detach/destroy */
+	ma->len = mmap_len;
+
+	TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next);
+
+	/* we're done */
+
+	rte_spinlock_unlock(&mem_area_lock);
+	return 0;
+fail:
+	if (data)
+		munmap(data, mmap_len);
+	if (fd >= 0)
+		close(fd);
+	free(ma);
+	rte_spinlock_unlock(&mem_area_lock);
+	return -1;
+}
+
+int
+rte_fbarray_detach(struct rte_fbarray *arr)
+{
+	struct mem_area *tmp = NULL;
+	size_t mmap_len;
+	int ret = -1;
+
+	if (arr == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/*
+	 * we don't need to synchronize detach as two values we need (element
+	 * size and total capacity) are constant for the duration of life of
+	 * the array, so the parts we care about will not race. if the user is
+	 * detaching while doing something else in the same process, we can't
+	 * really do anything about it, things will blow up either way.
+	 */
+
+	size_t page_sz = sysconf(_SC_PAGESIZE);
+
+	if (page_sz == (size_t)-1)
+		return -1;
+
+	mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len);
+
+	/* does this area exist? */
+	rte_spinlock_lock(&mem_area_lock);
+
+	TAILQ_FOREACH(tmp, &mem_area_tailq, next) {
+		if (tmp->addr == arr->data && tmp->len == mmap_len)
+			break;
+	}
+	if (tmp == NULL) {
+		rte_errno = ENOENT;
+		ret = -1;
+		goto out;
+	}
+
+	munmap(arr->data, mmap_len);
+
+	/* area is unmapped, close fd and remove the tailq entry */
+	if (tmp->fd >= 0)
+		close(tmp->fd);
+	TAILQ_REMOVE(&mem_area_tailq, tmp, next);
+	free(tmp);
+
+	ret = 0;
+out:
+	rte_spinlock_unlock(&mem_area_lock);
+	return ret;
+}
+
+int
+rte_fbarray_destroy(struct rte_fbarray *arr)
+{
+	struct mem_area *tmp = NULL;
+	size_t mmap_len;
+	int fd, ret;
+	char path[PATH_MAX];
+
+	if (arr == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/*
+	 * we don't need to synchronize detach as two values we need (element
+	 * size and total capacity) are constant for the duration of life of
+	 * the array, so the parts we care about will not race. if the user is
+	 * detaching while doing something else in the same process, we can't
+	 * really do anything about it, things will blow up either way.
+	 */
+
+	size_t page_sz = sysconf(_SC_PAGESIZE);
+
+	if (page_sz == (size_t)-1)
+		return -1;
+
+	mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len);
+
+	/* does this area exist? */
+	rte_spinlock_lock(&mem_area_lock);
+
+	TAILQ_FOREACH(tmp, &mem_area_tailq, next) {
+		if (tmp->addr == arr->data && tmp->len == mmap_len)
+			break;
+	}
+	if (tmp == NULL) {
+		rte_errno = ENOENT;
+		ret = -1;
+		goto out;
+	}
+	/* with no shconf, there were never any files to begin with */
+	if (!internal_config.no_shconf) {
+		/*
+		 * attempt to get an exclusive lock on the file, to ensure it
+		 * has been detached by all other processes
+		 */
+		fd = tmp->fd;
+		if (flock(fd, LOCK_EX | LOCK_NB)) {
+			RTE_LOG(DEBUG, EAL, "Cannot destroy fbarray - another process is using it\n");
+			rte_errno = EBUSY;
+			ret = -1;
+			goto out;
+		}
+
+		/* we're OK to destroy the file */
+		eal_get_fbarray_path(path, sizeof(path), arr->name);
+		if (unlink(path)) {
+			RTE_LOG(DEBUG, EAL, "Cannot unlink fbarray: %s\n",
+				strerror(errno));
+			rte_errno = errno;
+			/*
+			 * we're still holding an exclusive lock, so drop it to
+			 * shared.
+			 */
+			flock(fd, LOCK_SH | LOCK_NB);
+
+			ret = -1;
+			goto out;
+		}
+		close(fd);
+	}
+	munmap(arr->data, mmap_len);
+
+	/* area is unmapped, remove the tailq entry */
+	TAILQ_REMOVE(&mem_area_tailq, tmp, next);
+	free(tmp);
+	ret = 0;
+
+	/* reset the fbarray structure */
+	memset(arr, 0, sizeof(*arr));
+out:
+	rte_spinlock_unlock(&mem_area_lock);
+	return ret;
+}
+
+void *
+rte_fbarray_get(const struct rte_fbarray *arr, unsigned int idx)
+{
+	void *ret = NULL;
+	if (arr == NULL) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	if (idx >= arr->len) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	ret = RTE_PTR_ADD(arr->data, idx * arr->elt_sz);
+
+	return ret;
+}
+
+int
+rte_fbarray_set_used(struct rte_fbarray *arr, unsigned int idx)
+{
+	return set_used(arr, idx, true);
+}
+
+int
+rte_fbarray_set_free(struct rte_fbarray *arr, unsigned int idx)
+{
+	return set_used(arr, idx, false);
+}
+
+int
+rte_fbarray_is_used(struct rte_fbarray *arr, unsigned int idx)
+{
+	struct used_mask *msk;
+	int msk_idx;
+	uint64_t msk_bit;
+	int ret = -1;
+
+	if (arr == NULL || idx >= arr->len) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/* prevent array from changing under us */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
+	msk_idx = MASK_LEN_TO_IDX(idx);
+	msk_bit = 1ULL << MASK_LEN_TO_MOD(idx);
+
+	ret = (msk->data[msk_idx] & msk_bit) != 0;
+
+	rte_rwlock_read_unlock(&arr->rwlock);
+
+	return ret;
+}
+
+static int
+fbarray_find(struct rte_fbarray *arr, unsigned int start, bool next, bool used)
+{
+	int ret = -1;
+
+	if (arr == NULL || start >= arr->len) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/* prevent array from changing under us */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	/* cheap checks to prevent doing useless work */
+	if (!used) {
+		if (arr->len == arr->count) {
+			rte_errno = ENOSPC;
+			goto out;
+		}
+		if (arr->count == 0) {
+			ret = start;
+			goto out;
+		}
+	} else {
+		if (arr->count == 0) {
+			rte_errno = ENOENT;
+			goto out;
+		}
+		if (arr->len == arr->count) {
+			ret = start;
+			goto out;
+		}
+	}
+	if (next)
+		ret = find_next(arr, start, used);
+	else
+		ret = find_prev(arr, start, used);
+out:
+	rte_rwlock_read_unlock(&arr->rwlock);
+	return ret;
+}
+
+int
+rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find(arr, start, true, false);
+}
+
+int
+rte_fbarray_find_next_used(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find(arr, start, true, true);
+}
+
+int
+rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find(arr, start, false, false);
+}
+
+int
+rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find(arr, start, false, true);
+}
+
+static int
+fbarray_find_n(struct rte_fbarray *arr, unsigned int start, unsigned int n,
+		bool next, bool used)
+{
+	int ret = -1;
+
+	if (arr == NULL || start >= arr->len || n > arr->len || n == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	if (next && (arr->len - start) < n) {
+		rte_errno = used ? ENOENT : ENOSPC;
+		return -1;
+	}
+	if (!next && start < (n - 1)) {
+		rte_errno = used ? ENOENT : ENOSPC;
+		return -1;
+	}
+
+	/* prevent array from changing under us */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	/* cheap checks to prevent doing useless work */
+	if (!used) {
+		if (arr->len == arr->count || arr->len - arr->count < n) {
+			rte_errno = ENOSPC;
+			goto out;
+		}
+		if (arr->count == 0) {
+			ret = next ? start : start - n + 1;
+			goto out;
+		}
+	} else {
+		if (arr->count < n) {
+			rte_errno = ENOENT;
+			goto out;
+		}
+		if (arr->count == arr->len) {
+			ret = next ? start : start - n + 1;
+			goto out;
+		}
+	}
+
+	if (next)
+		ret = find_next_n(arr, start, n, used);
+	else
+		ret = find_prev_n(arr, start, n, used);
+out:
+	rte_rwlock_read_unlock(&arr->rwlock);
+	return ret;
+}
+
+int
+rte_fbarray_find_next_n_free(struct rte_fbarray *arr, unsigned int start,
+		unsigned int n)
+{
+	return fbarray_find_n(arr, start, n, true, false);
+}
+
+int
+rte_fbarray_find_next_n_used(struct rte_fbarray *arr, unsigned int start,
+		unsigned int n)
+{
+	return fbarray_find_n(arr, start, n, true, true);
+}
+
+int
+rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start,
+		unsigned int n)
+{
+	return fbarray_find_n(arr, start, n, false, false);
+}
+
+int
+rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start,
+		unsigned int n)
+{
+	return fbarray_find_n(arr, start, n, false, true);
+}
+
+static int
+fbarray_find_contig(struct rte_fbarray *arr, unsigned int start, bool next,
+		bool used)
+{
+	int ret = -1;
+
+	if (arr == NULL || start >= arr->len) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/* prevent array from changing under us */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	/* cheap checks to prevent doing useless work */
+	if (used) {
+		if (arr->count == 0) {
+			ret = 0;
+			goto out;
+		}
+		if (next && arr->count == arr->len) {
+			ret = arr->len - start;
+			goto out;
+		}
+		if (!next && arr->count == arr->len) {
+			ret = start + 1;
+			goto out;
+		}
+	} else {
+		if (arr->len == arr->count) {
+			ret = 0;
+			goto out;
+		}
+		if (next && arr->count == 0) {
+			ret = arr->len - start;
+			goto out;
+		}
+		if (!next && arr->count == 0) {
+			ret = start + 1;
+			goto out;
+		}
+	}
+
+	if (next)
+		ret = find_contig(arr, start, used);
+	else
+		ret = find_rev_contig(arr, start, used);
+out:
+	rte_rwlock_read_unlock(&arr->rwlock);
+	return ret;
+}
+
+static int
+fbarray_find_biggest(struct rte_fbarray *arr, unsigned int start, bool used,
+		bool rev)
+{
+	int cur_idx, next_idx, cur_len, biggest_idx, biggest_len;
+	/* don't stack if conditions, use function pointers instead */
+	int (*find_func)(struct rte_fbarray *, unsigned int);
+	int (*find_contig_func)(struct rte_fbarray *, unsigned int);
+
+	if (arr == NULL || start >= arr->len) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	/* the other API calls already do their fair share of cheap checks, so
+	 * no need to do them here.
+	 */
+
+	/* the API's called are thread-safe, but something may still happen
+	 * between the API calls, so lock the fbarray. all other API's are
+	 * read-locking the fbarray, so read lock here is OK.
+	 */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	/* pick out appropriate functions */
+	if (used) {
+		if (rev) {
+			find_func = rte_fbarray_find_prev_used;
+			find_contig_func = rte_fbarray_find_rev_contig_used;
+		} else {
+			find_func = rte_fbarray_find_next_used;
+			find_contig_func = rte_fbarray_find_contig_used;
+		}
+	} else {
+		if (rev) {
+			find_func = rte_fbarray_find_prev_free;
+			find_contig_func = rte_fbarray_find_rev_contig_free;
+		} else {
+			find_func = rte_fbarray_find_next_free;
+			find_contig_func = rte_fbarray_find_contig_free;
+		}
+	}
+
+	cur_idx = start;
+	biggest_idx = -1; /* default is error */
+	biggest_len = 0;
+	for (;;) {
+		cur_idx = find_func(arr, cur_idx);
+
+		/* block found, check its length */
+		if (cur_idx >= 0) {
+			cur_len = find_contig_func(arr, cur_idx);
+			/* decide where we go next */
+			next_idx = rev ? cur_idx - cur_len : cur_idx + cur_len;
+			/* move current index to start of chunk */
+			cur_idx = rev ? next_idx + 1 : cur_idx;
+
+			if (cur_len > biggest_len) {
+				biggest_idx = cur_idx;
+				biggest_len = cur_len;
+			}
+			cur_idx = next_idx;
+			/* in reverse mode, next_idx may be -1 if chunk started
+			 * at array beginning. this means there's no more work
+			 * to do.
+			 */
+			if (cur_idx < 0)
+				break;
+		} else {
+			/* nothing more to find, stop. however, a failed API
+			 * call has set rte_errno, which we want to ignore, as
+			 * reaching the end of fbarray is not an error.
+			 */
+			rte_errno = 0;
+			break;
+		}
+	}
+	/* if we didn't find anything at all, set rte_errno */
+	if (biggest_idx < 0)
+		rte_errno = used ? ENOENT : ENOSPC;
+
+	rte_rwlock_read_unlock(&arr->rwlock);
+	return biggest_idx;
+}
+
+int
+rte_fbarray_find_biggest_free(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find_biggest(arr, start, false, false);
+}
+
+int
+rte_fbarray_find_biggest_used(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find_biggest(arr, start, true, false);
+}
+
+int
+rte_fbarray_find_rev_biggest_free(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find_biggest(arr, start, false, true);
+}
+
+int
+rte_fbarray_find_rev_biggest_used(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find_biggest(arr, start, true, true);
+}
+
+
+int
+rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find_contig(arr, start, true, false);
+}
+
+int
+rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find_contig(arr, start, true, true);
+}
+
+int
+rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find_contig(arr, start, false, false);
+}
+
+int
+rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find_contig(arr, start, false, true);
+}
+
+int
+rte_fbarray_find_idx(const struct rte_fbarray *arr, const void *elt)
+{
+	void *end;
+	int ret = -1;
+
+	/*
+	 * no need to synchronize as it doesn't matter if underlying data
+	 * changes - we're doing pointer arithmetic here.
+	 */
+
+	if (arr == NULL || elt == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	end = RTE_PTR_ADD(arr->data, arr->elt_sz * arr->len);
+	if (elt < arr->data || elt >= end) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	ret = RTE_PTR_DIFF(elt, arr->data) / arr->elt_sz;
+
+	return ret;
+}
+
+void
+rte_fbarray_dump_metadata(struct rte_fbarray *arr, FILE *f)
+{
+	struct used_mask *msk;
+	unsigned int i;
+
+	if (arr == NULL || f == NULL) {
+		rte_errno = EINVAL;
+		return;
+	}
+
+	if (fully_validate(arr->name, arr->elt_sz, arr->len)) {
+		fprintf(f, "Invalid file-backed array\n");
+		goto out;
+	}
+
+	/* prevent array from changing under us */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	fprintf(f, "File-backed array: %s\n", arr->name);
+	fprintf(f, "size: %i occupied: %i elt_sz: %i\n",
+			arr->len, arr->count, arr->elt_sz);
+
+	msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
+
+	for (i = 0; i < msk->n_masks; i++)
+		fprintf(f, "msk idx %i: 0x%016" PRIx64 "\n", i, msk->data[i]);
+out:
+	rte_rwlock_read_unlock(&arr->rwlock);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_hexdump.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_hexdump.c
new file mode 100644
index 000000000..2d2179d41
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_hexdump.c
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdint.h>
+#include <rte_hexdump.h>
+#include <rte_string_fns.h>
+
+#define LINE_LEN 128
+
+void
+rte_hexdump(FILE *f, const char *title, const void *buf, unsigned int len)
+{
+	unsigned int i, out, ofs;
+	const unsigned char *data = buf;
+	char line[LINE_LEN];	/* space needed 8+16*3+3+16 == 75 */
+
+	fprintf(f, "%s at [%p], len=%u\n",
+		title ? : "  Dump data", data, len);
+	ofs = 0;
+	while (ofs < len) {
+		/* format the line in the buffer */
+		out = snprintf(line, LINE_LEN, "%08X:", ofs);
+		for (i = 0; i < 16; i++) {
+			if (ofs + i < len)
+				snprintf(line + out, LINE_LEN - out,
+					 " %02X", (data[ofs + i] & 0xff));
+			else
+				strcpy(line + out, "   ");
+			out += 3;
+		}
+
+
+		for (; i <= 16; i++)
+			out += snprintf(line + out, LINE_LEN - out, " | ");
+
+		for (i = 0; ofs < len && i < 16; i++, ofs++) {
+			unsigned char c = data[ofs];
+
+			if (c < ' ' || c > '~')
+				c = '.';
+			out += snprintf(line + out, LINE_LEN - out, "%c", c);
+		}
+		fprintf(f, "%s\n", line);
+	}
+	fflush(f);
+}
+
+void
+rte_memdump(FILE *f, const char *title, const void *buf, unsigned int len)
+{
+	unsigned int i, out;
+	const unsigned char *data = buf;
+	char line[LINE_LEN];
+
+	if (title)
+		fprintf(f, "%s: ", title);
+
+	line[0] = '\0';
+	for (i = 0, out = 0; i < len; i++) {
+		/* Make sure we do not overrun the line buffer length. */
+		if (out >= LINE_LEN - 4) {
+			fprintf(f, "%s", line);
+			out = 0;
+			line[out] = '\0';
+		}
+		out += snprintf(line + out, LINE_LEN - out, "%02x%s",
+				(data[i] & 0xff), ((i + 1) < len) ? ":" : "");
+	}
+	if (out > 0)
+		fprintf(f, "%s", line);
+	fprintf(f, "\n");
+
+	fflush(f);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_hypervisor.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_hypervisor.c
new file mode 100644
index 000000000..5388b81a5
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_hypervisor.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 Mellanox Technologies, Ltd
+ */
+
+#include "rte_hypervisor.h"
+
+const char *
+rte_hypervisor_get_name(enum rte_hypervisor id)
+{
+	switch (id) {
+	case RTE_HYPERVISOR_NONE:
+		return "none";
+	case RTE_HYPERVISOR_KVM:
+		return "KVM";
+	case RTE_HYPERVISOR_HYPERV:
+		return "Hyper-V";
+	case RTE_HYPERVISOR_VMWARE:
+		return "VMware";
+	default:
+		return "unknown";
+	}
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_launch.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_launch.c
new file mode 100644
index 000000000..cf52d717f
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_launch.c
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/queue.h>
+
+#include <rte_launch.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_atomic.h>
+#include <rte_pause.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+
+#include "eal_private.h"
+
+/*
+ * Wait until a lcore finished its job.
+ */
+int
+rte_eal_wait_lcore(unsigned slave_id)
+{
+	if (lcore_config[slave_id].state == WAIT)
+		return 0;
+
+	while (lcore_config[slave_id].state != WAIT &&
+	       lcore_config[slave_id].state != FINISHED)
+		rte_pause();
+
+	rte_rmb();
+
+	/* we are in finished state, go to wait state */
+	lcore_config[slave_id].state = WAIT;
+	return lcore_config[slave_id].ret;
+}
+
+/*
+ * Check that every SLAVE lcores are in WAIT state, then call
+ * rte_eal_remote_launch() for all of them. If call_master is true
+ * (set to CALL_MASTER), also call the function on the master lcore.
+ */
+int
+rte_eal_mp_remote_launch(int (*f)(void *), void *arg,
+			 enum rte_rmt_call_master_t call_master)
+{
+	int lcore_id;
+	int master = rte_get_master_lcore();
+
+	/* check state of lcores */
+	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+		if (lcore_config[lcore_id].state != WAIT)
+			return -EBUSY;
+	}
+
+	/* send messages to cores */
+	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+		rte_eal_remote_launch(f, arg, lcore_id);
+	}
+
+	if (call_master == CALL_MASTER) {
+		lcore_config[master].ret = f(arg);
+		lcore_config[master].state = FINISHED;
+	}
+
+	return 0;
+}
+
+/*
+ * Return the state of the lcore identified by slave_id.
+ */
+enum rte_lcore_state_t
+rte_eal_get_lcore_state(unsigned lcore_id)
+{
+	return lcore_config[lcore_id].state;
+}
+
+/*
+ * Do a rte_eal_wait_lcore() for every lcore. The return values are
+ * ignored.
+ */
+void
+rte_eal_mp_wait_lcore(void)
+{
+	unsigned lcore_id;
+
+	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+		rte_eal_wait_lcore(lcore_id);
+	}
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_lcore.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_lcore.c
new file mode 100644
index 000000000..5404922a8
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_lcore.c
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <unistd.h>
+#include <limits.h>
+#include <string.h>
+
+#include <rte_errno.h>
+#include <rte_log.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+unsigned int rte_get_master_lcore(void)
+{
+	return rte_eal_get_configuration()->master_lcore;
+}
+
+unsigned int rte_lcore_count(void)
+{
+	return rte_eal_get_configuration()->lcore_count;
+}
+
+int rte_lcore_index(int lcore_id)
+{
+	if (unlikely(lcore_id >= RTE_MAX_LCORE))
+		return -1;
+
+	if (lcore_id < 0)
+		lcore_id = (int)rte_lcore_id();
+
+	return lcore_config[lcore_id].core_index;
+}
+
+int rte_lcore_to_cpu_id(int lcore_id)
+{
+	if (unlikely(lcore_id >= RTE_MAX_LCORE))
+		return -1;
+
+	if (lcore_id < 0)
+		lcore_id = (int)rte_lcore_id();
+
+	return lcore_config[lcore_id].core_id;
+}
+
+rte_cpuset_t rte_lcore_cpuset(unsigned int lcore_id)
+{
+	return lcore_config[lcore_id].cpuset;
+}
+
+enum rte_lcore_role_t
+rte_eal_lcore_role(unsigned int lcore_id)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	if (lcore_id >= RTE_MAX_LCORE)
+		return ROLE_OFF;
+	return cfg->lcore_role[lcore_id];
+}
+
+int rte_lcore_is_enabled(unsigned int lcore_id)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	if (lcore_id >= RTE_MAX_LCORE)
+		return 0;
+	return cfg->lcore_role[lcore_id] == ROLE_RTE;
+}
+
+unsigned int rte_get_next_lcore(unsigned int i, int skip_master, int wrap)
+{
+	i++;
+	if (wrap)
+		i %= RTE_MAX_LCORE;
+
+	while (i < RTE_MAX_LCORE) {
+		if (!rte_lcore_is_enabled(i) ||
+		    (skip_master && (i == rte_get_master_lcore()))) {
+			i++;
+			if (wrap)
+				i %= RTE_MAX_LCORE;
+			continue;
+		}
+		break;
+	}
+	return i;
+}
+
+unsigned int
+rte_lcore_to_socket_id(unsigned int lcore_id)
+{
+	return lcore_config[lcore_id].socket_id;
+}
+
+static int
+socket_id_cmp(const void *a, const void *b)
+{
+	const int *lcore_id_a = a;
+	const int *lcore_id_b = b;
+
+	if (*lcore_id_a < *lcore_id_b)
+		return -1;
+	if (*lcore_id_a > *lcore_id_b)
+		return 1;
+	return 0;
+}
+
+/*
+ * Parse /sys/devices/system/cpu to get the number of physical and logical
+ * processors on the machine. The function will fill the cpu_info
+ * structure.
+ */
+int
+rte_eal_cpu_init(void)
+{
+	/* pointer to global configuration */
+	struct rte_config *config = rte_eal_get_configuration();
+	unsigned lcore_id;
+	unsigned count = 0;
+	unsigned int socket_id, prev_socket_id;
+	int lcore_to_socket_id[RTE_MAX_LCORE];
+
+	/*
+	 * Parse the maximum set of logical cores, detect the subset of running
+	 * ones and enable them by default.
+	 */
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		lcore_config[lcore_id].core_index = count;
+
+		/* init cpuset for per lcore config */
+		CPU_ZERO(&lcore_config[lcore_id].cpuset);
+
+		/* find socket first */
+		socket_id = eal_cpu_socket_id(lcore_id);
+		lcore_to_socket_id[lcore_id] = socket_id;
+
+		if (eal_cpu_detected(lcore_id) == 0) {
+			config->lcore_role[lcore_id] = ROLE_OFF;
+			lcore_config[lcore_id].core_index = -1;
+			continue;
+		}
+
+		/* By default, lcore 1:1 map to cpu id */
+		CPU_SET(lcore_id, &lcore_config[lcore_id].cpuset);
+
+		/* By default, each detected core is enabled */
+		config->lcore_role[lcore_id] = ROLE_RTE;
+		lcore_config[lcore_id].core_role = ROLE_RTE;
+		lcore_config[lcore_id].core_id = eal_cpu_core_id(lcore_id);
+		lcore_config[lcore_id].socket_id = socket_id;
+		RTE_LOG(DEBUG, EAL, "Detected lcore %u as "
+				"core %u on socket %u\n",
+				lcore_id, lcore_config[lcore_id].core_id,
+				lcore_config[lcore_id].socket_id);
+		count++;
+	}
+	for (; lcore_id < CPU_SETSIZE; lcore_id++) {
+		if (eal_cpu_detected(lcore_id) == 0)
+			continue;
+		RTE_LOG(DEBUG, EAL, "Skipped lcore %u as core %u on socket %u\n",
+			lcore_id, eal_cpu_core_id(lcore_id),
+			eal_cpu_socket_id(lcore_id));
+	}
+
+	/* Set the count of enabled logical cores of the EAL configuration */
+	config->lcore_count = count;
+	RTE_LOG(DEBUG, EAL,
+		"Support maximum %u logical core(s) by configuration.\n",
+		RTE_MAX_LCORE);
+	RTE_LOG(INFO, EAL, "Detected %u lcore(s)\n", config->lcore_count);
+
+	/* sort all socket id's in ascending order */
+	qsort(lcore_to_socket_id, RTE_DIM(lcore_to_socket_id),
+			sizeof(lcore_to_socket_id[0]), socket_id_cmp);
+
+	prev_socket_id = -1;
+	config->numa_node_count = 0;
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		socket_id = lcore_to_socket_id[lcore_id];
+		if (socket_id != prev_socket_id)
+			config->numa_nodes[config->numa_node_count++] =
+					socket_id;
+		prev_socket_id = socket_id;
+	}
+	RTE_LOG(INFO, EAL, "Detected %u NUMA nodes\n", config->numa_node_count);
+
+	return 0;
+}
+
+unsigned int
+rte_socket_count(void)
+{
+	const struct rte_config *config = rte_eal_get_configuration();
+	return config->numa_node_count;
+}
+
+int
+rte_socket_id_by_idx(unsigned int idx)
+{
+	const struct rte_config *config = rte_eal_get_configuration();
+	if (idx >= config->numa_node_count) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	return config->numa_nodes[idx];
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_log.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_log.c
new file mode 100644
index 000000000..8835c8fff
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_log.c
@@ -0,0 +1,481 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <regex.h>
+#include <fnmatch.h>
+
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_per_lcore.h>
+
+#include "eal_private.h"
+
+/* global log structure */
+struct rte_logs rte_logs = {
+	.type = ~0,
+	.level = RTE_LOG_DEBUG,
+	.file = NULL,
+};
+
+struct rte_eal_opt_loglevel {
+	/** Next list entry */
+	TAILQ_ENTRY(rte_eal_opt_loglevel) next;
+	/** Compiled regular expression obtained from the option */
+	regex_t re_match;
+	/** Globbing pattern option */
+	char *pattern;
+	/** Log level value obtained from the option */
+	uint32_t level;
+};
+
+TAILQ_HEAD(rte_eal_opt_loglevel_list, rte_eal_opt_loglevel);
+
+/** List of valid EAL log level options */
+static struct rte_eal_opt_loglevel_list opt_loglevel_list =
+	TAILQ_HEAD_INITIALIZER(opt_loglevel_list);
+
+/* Stream to use for logging if rte_logs.file is NULL */
+static FILE *default_log_stream;
+
+/**
+ * This global structure stores some information about the message
+ * that is currently being processed by one lcore
+ */
+struct log_cur_msg {
+	uint32_t loglevel; /**< log level - see rte_log.h */
+	uint32_t logtype;  /**< log type  - see rte_log.h */
+};
+
+struct rte_log_dynamic_type {
+	const char *name;
+	uint32_t loglevel;
+};
+
+ /* per core log */
+static RTE_DEFINE_PER_LCORE(struct log_cur_msg, log_cur_msg);
+
+/* default logs */
+
+/* Change the stream that will be used by logging system */
+int
+rte_openlog_stream(FILE *f)
+{
+	rte_logs.file = f;
+	return 0;
+}
+
+FILE *
+rte_log_get_stream(void)
+{
+	FILE *f = rte_logs.file;
+
+	if (f == NULL) {
+		/*
+		 * Grab the current value of stderr here, rather than
+		 * just initializing default_log_stream to stderr. This
+		 * ensures that we will always use the current value
+		 * of stderr, even if the application closes and
+		 * reopens it.
+		 */
+		return default_log_stream ? : stderr;
+	}
+	return f;
+}
+
+/* Set global log level */
+void
+rte_log_set_global_level(uint32_t level)
+{
+	rte_logs.level = (uint32_t)level;
+}
+
+/* Get global log level */
+uint32_t
+rte_log_get_global_level(void)
+{
+	return rte_logs.level;
+}
+
+int
+rte_log_get_level(uint32_t type)
+{
+	if (type >= rte_logs.dynamic_types_len)
+		return -1;
+
+	return rte_logs.dynamic_types[type].loglevel;
+}
+
+bool
+rte_log_can_log(uint32_t logtype, uint32_t level)
+{
+	int log_level;
+
+	if (level > rte_log_get_global_level())
+		return false;
+
+	log_level = rte_log_get_level(logtype);
+	if (log_level < 0)
+		return false;
+
+	if (level > (uint32_t)log_level)
+		return false;
+
+	return true;
+}
+
+int
+rte_log_set_level(uint32_t type, uint32_t level)
+{
+	if (type >= rte_logs.dynamic_types_len)
+		return -1;
+	if (level > RTE_LOG_DEBUG)
+		return -1;
+
+	rte_logs.dynamic_types[type].loglevel = level;
+
+	return 0;
+}
+
+/* set log level by regular expression */
+int
+rte_log_set_level_regexp(const char *regex, uint32_t level)
+{
+	regex_t r;
+	size_t i;
+
+	if (level > RTE_LOG_DEBUG)
+		return -1;
+
+	if (regcomp(&r, regex, 0) != 0)
+		return -1;
+
+	for (i = 0; i < rte_logs.dynamic_types_len; i++) {
+		if (rte_logs.dynamic_types[i].name == NULL)
+			continue;
+		if (regexec(&r, rte_logs.dynamic_types[i].name, 0,
+				NULL, 0) == 0)
+			rte_logs.dynamic_types[i].loglevel = level;
+	}
+
+	regfree(&r);
+
+	return 0;
+}
+
+/*
+ * Save the type string and the loglevel for later dynamic
+ * logtypes which may register later.
+ */
+static int rte_log_save_level(int priority,
+			      const char *regex, const char *pattern)
+{
+	struct rte_eal_opt_loglevel *opt_ll = NULL;
+
+	opt_ll = malloc(sizeof(*opt_ll));
+	if (opt_ll == NULL)
+		goto fail;
+
+	opt_ll->level = priority;
+
+	if (regex) {
+		opt_ll->pattern = NULL;
+		if (regcomp(&opt_ll->re_match, regex, 0) != 0)
+			goto fail;
+	} else if (pattern) {
+		opt_ll->pattern = strdup(pattern);
+		if (opt_ll->pattern == NULL)
+			goto fail;
+	} else
+		goto fail;
+
+	TAILQ_INSERT_HEAD(&opt_loglevel_list, opt_ll, next);
+	return 0;
+fail:
+	free(opt_ll);
+	return -1;
+}
+
+int rte_log_save_regexp(const char *regex, int tmp)
+{
+	return rte_log_save_level(tmp, regex, NULL);
+}
+
+/* set log level based on globbing pattern */
+int
+rte_log_set_level_pattern(const char *pattern, uint32_t level)
+{
+	size_t i;
+
+	if (level > RTE_LOG_DEBUG)
+		return -1;
+
+	for (i = 0; i < rte_logs.dynamic_types_len; i++) {
+		if (rte_logs.dynamic_types[i].name == NULL)
+			continue;
+
+		if (fnmatch(pattern, rte_logs.dynamic_types[i].name, 0) == 0)
+			rte_logs.dynamic_types[i].loglevel = level;
+	}
+
+	return 0;
+}
+
+int rte_log_save_pattern(const char *pattern, int priority)
+{
+	return rte_log_save_level(priority, NULL, pattern);
+}
+
+/* get the current loglevel for the message being processed */
+int rte_log_cur_msg_loglevel(void)
+{
+	return RTE_PER_LCORE(log_cur_msg).loglevel;
+}
+
+/* get the current logtype for the message being processed */
+int rte_log_cur_msg_logtype(void)
+{
+	return RTE_PER_LCORE(log_cur_msg).logtype;
+}
+
+static int
+rte_log_lookup(const char *name)
+{
+	size_t i;
+
+	for (i = 0; i < rte_logs.dynamic_types_len; i++) {
+		if (rte_logs.dynamic_types[i].name == NULL)
+			continue;
+		if (strcmp(name, rte_logs.dynamic_types[i].name) == 0)
+			return i;
+	}
+
+	return -1;
+}
+
+/* register an extended log type, assuming table is large enough, and id
+ * is not yet registered.
+ */
+static int
+__rte_log_register(const char *name, int id)
+{
+	char *dup_name = strdup(name);
+
+	if (dup_name == NULL)
+		return -ENOMEM;
+
+	rte_logs.dynamic_types[id].name = dup_name;
+	rte_logs.dynamic_types[id].loglevel = RTE_LOG_INFO;
+
+	return id;
+}
+
+/* register an extended log type */
+int
+rte_log_register(const char *name)
+{
+	struct rte_log_dynamic_type *new_dynamic_types;
+	int id, ret;
+
+	id = rte_log_lookup(name);
+	if (id >= 0)
+		return id;
+
+	new_dynamic_types = realloc(rte_logs.dynamic_types,
+		sizeof(struct rte_log_dynamic_type) *
+		(rte_logs.dynamic_types_len + 1));
+	if (new_dynamic_types == NULL)
+		return -ENOMEM;
+	rte_logs.dynamic_types = new_dynamic_types;
+
+	ret = __rte_log_register(name, rte_logs.dynamic_types_len);
+	if (ret < 0)
+		return ret;
+
+	rte_logs.dynamic_types_len++;
+
+	return ret;
+}
+
+/* Register an extended log type and try to pick its level from EAL options */
+int
+rte_log_register_type_and_pick_level(const char *name, uint32_t level_def)
+{
+	struct rte_eal_opt_loglevel *opt_ll;
+	uint32_t level = level_def;
+	int type;
+
+	type = rte_log_register(name);
+	if (type < 0)
+		return type;
+
+	TAILQ_FOREACH(opt_ll, &opt_loglevel_list, next) {
+		if (opt_ll->level > RTE_LOG_DEBUG)
+			continue;
+
+		if (opt_ll->pattern) {
+			if (fnmatch(opt_ll->pattern, name, 0) == 0)
+				level = opt_ll->level;
+		} else {
+			if (regexec(&opt_ll->re_match, name, 0, NULL, 0) == 0)
+				level = opt_ll->level;
+		}
+	}
+
+	rte_logs.dynamic_types[type].loglevel = level;
+
+	return type;
+}
+
+struct logtype {
+	uint32_t log_id;
+	const char *logtype;
+};
+
+static const struct logtype logtype_strings[] = {
+	{RTE_LOGTYPE_EAL,        "lib.eal"},
+	{RTE_LOGTYPE_MALLOC,     "lib.malloc"},
+	{RTE_LOGTYPE_RING,       "lib.ring"},
+	{RTE_LOGTYPE_MEMPOOL,    "lib.mempool"},
+	{RTE_LOGTYPE_TIMER,      "lib.timer"},
+	{RTE_LOGTYPE_PMD,        "pmd"},
+	{RTE_LOGTYPE_HASH,       "lib.hash"},
+	{RTE_LOGTYPE_LPM,        "lib.lpm"},
+	{RTE_LOGTYPE_KNI,        "lib.kni"},
+	{RTE_LOGTYPE_ACL,        "lib.acl"},
+	{RTE_LOGTYPE_POWER,      "lib.power"},
+	{RTE_LOGTYPE_METER,      "lib.meter"},
+	{RTE_LOGTYPE_SCHED,      "lib.sched"},
+	{RTE_LOGTYPE_PORT,       "lib.port"},
+	{RTE_LOGTYPE_TABLE,      "lib.table"},
+	{RTE_LOGTYPE_PIPELINE,   "lib.pipeline"},
+	{RTE_LOGTYPE_MBUF,       "lib.mbuf"},
+	{RTE_LOGTYPE_CRYPTODEV,  "lib.cryptodev"},
+	{RTE_LOGTYPE_EFD,        "lib.efd"},
+	{RTE_LOGTYPE_EVENTDEV,   "lib.eventdev"},
+	{RTE_LOGTYPE_GSO,        "lib.gso"},
+	{RTE_LOGTYPE_USER1,      "user1"},
+	{RTE_LOGTYPE_USER2,      "user2"},
+	{RTE_LOGTYPE_USER3,      "user3"},
+	{RTE_LOGTYPE_USER4,      "user4"},
+	{RTE_LOGTYPE_USER5,      "user5"},
+	{RTE_LOGTYPE_USER6,      "user6"},
+	{RTE_LOGTYPE_USER7,      "user7"},
+	{RTE_LOGTYPE_USER8,      "user8"}
+};
+
+/* Logging should be first initializer (before drivers and bus) */
+RTE_INIT_PRIO(rte_log_init, LOG)
+{
+	uint32_t i;
+
+	rte_log_set_global_level(RTE_LOG_DEBUG);
+
+	rte_logs.dynamic_types = calloc(RTE_LOGTYPE_FIRST_EXT_ID,
+		sizeof(struct rte_log_dynamic_type));
+	if (rte_logs.dynamic_types == NULL)
+		return;
+
+	/* register legacy log types */
+	for (i = 0; i < RTE_DIM(logtype_strings); i++)
+		__rte_log_register(logtype_strings[i].logtype,
+				logtype_strings[i].log_id);
+
+	rte_logs.dynamic_types_len = RTE_LOGTYPE_FIRST_EXT_ID;
+}
+
+static const char *
+loglevel_to_string(uint32_t level)
+{
+	switch (level) {
+	case 0: return "disabled";
+	case RTE_LOG_EMERG: return "emerg";
+	case RTE_LOG_ALERT: return "alert";
+	case RTE_LOG_CRIT: return "critical";
+	case RTE_LOG_ERR: return "error";
+	case RTE_LOG_WARNING: return "warning";
+	case RTE_LOG_NOTICE: return "notice";
+	case RTE_LOG_INFO: return "info";
+	case RTE_LOG_DEBUG: return "debug";
+	default: return "unknown";
+	}
+}
+
+/* dump global level and registered log types */
+void
+rte_log_dump(FILE *f)
+{
+	size_t i;
+
+	fprintf(f, "global log level is %s\n",
+		loglevel_to_string(rte_log_get_global_level()));
+
+	for (i = 0; i < rte_logs.dynamic_types_len; i++) {
+		if (rte_logs.dynamic_types[i].name == NULL)
+			continue;
+		fprintf(f, "id %zu: %s, level is %s\n",
+			i, rte_logs.dynamic_types[i].name,
+			loglevel_to_string(rte_logs.dynamic_types[i].loglevel));
+	}
+}
+
+/*
+ * Generates a log message The message will be sent in the stream
+ * defined by the previous call to rte_openlog_stream().
+ */
+int
+rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap)
+{
+	FILE *f = rte_log_get_stream();
+	int ret;
+
+	if (logtype >= rte_logs.dynamic_types_len)
+		return -1;
+	if (!rte_log_can_log(logtype, level))
+		return 0;
+
+	/* save loglevel and logtype in a global per-lcore variable */
+	RTE_PER_LCORE(log_cur_msg).loglevel = level;
+	RTE_PER_LCORE(log_cur_msg).logtype = logtype;
+
+	ret = vfprintf(f, format, ap);
+	fflush(f);
+	return ret;
+}
+
+/*
+ * Generates a log message The message will be sent in the stream
+ * defined by the previous call to rte_openlog_stream().
+ * No need to check level here, done by rte_vlog().
+ */
+int
+rte_log(uint32_t level, uint32_t logtype, const char *format, ...)
+{
+	va_list ap;
+	int ret;
+
+	va_start(ap, format);
+	ret = rte_vlog(level, logtype, format, ap);
+	va_end(ap);
+	return ret;
+}
+
+/*
+ * Called by environment-specific initialization functions.
+ */
+void
+eal_log_set_default(FILE *default_log)
+{
+	default_log_stream = default_log;
+
+#if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG
+	RTE_LOG(NOTICE, EAL,
+		"Debug dataplane logs available - lower performance\n");
+#endif
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_mcfg.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_mcfg.c
new file mode 100644
index 000000000..49d3ed0ce
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_mcfg.c
@@ -0,0 +1,170 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#include <rte_eal_memconfig.h>
+#include <rte_version.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_memcfg.h"
+#include "eal_private.h"
+
+void
+eal_mcfg_complete(void)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	struct rte_mem_config *mcfg = cfg->mem_config;
+
+	/* ALL shared mem_config related INIT DONE */
+	if (cfg->process_type == RTE_PROC_PRIMARY)
+		mcfg->magic = RTE_MAGIC;
+
+	internal_config.init_complete = 1;
+}
+
+void
+eal_mcfg_wait_complete(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+	/* wait until shared mem_config finish initialising */
+	while (mcfg->magic != RTE_MAGIC)
+		rte_pause();
+}
+
+int
+eal_mcfg_check_version(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+	/* check if version from memconfig matches compiled in macro */
+	if (mcfg->version != RTE_VERSION)
+		return -1;
+
+	return 0;
+}
+
+void
+eal_mcfg_update_internal(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+	internal_config.legacy_mem = mcfg->legacy_mem;
+	internal_config.single_file_segments = mcfg->single_file_segments;
+}
+
+void
+eal_mcfg_update_from_internal(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+	mcfg->legacy_mem = internal_config.legacy_mem;
+	mcfg->single_file_segments = internal_config.single_file_segments;
+	/* record current DPDK version */
+	mcfg->version = RTE_VERSION;
+}
+
+void
+rte_mcfg_mem_read_lock(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+}
+
+void
+rte_mcfg_mem_read_unlock(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+}
+
+void
+rte_mcfg_mem_write_lock(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+}
+
+void
+rte_mcfg_mem_write_unlock(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+}
+
+void
+rte_mcfg_tailq_read_lock(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_read_lock(&mcfg->qlock);
+}
+
+void
+rte_mcfg_tailq_read_unlock(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_read_unlock(&mcfg->qlock);
+}
+
+void
+rte_mcfg_tailq_write_lock(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_write_lock(&mcfg->qlock);
+}
+
+void
+rte_mcfg_tailq_write_unlock(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_write_unlock(&mcfg->qlock);
+}
+
+void
+rte_mcfg_mempool_read_lock(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_read_lock(&mcfg->mplock);
+}
+
+void
+rte_mcfg_mempool_read_unlock(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_read_unlock(&mcfg->mplock);
+}
+
+void
+rte_mcfg_mempool_write_lock(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_write_lock(&mcfg->mplock);
+}
+
+void
+rte_mcfg_mempool_write_unlock(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_write_unlock(&mcfg->mplock);
+}
+
+void
+rte_mcfg_timer_lock(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_spinlock_lock(&mcfg->tlock);
+}
+
+void
+rte_mcfg_timer_unlock(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_spinlock_unlock(&mcfg->tlock);
+}
+
+bool
+rte_mcfg_get_single_file_segments(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	return (bool)mcfg->single_file_segments;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_memalloc.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_memalloc.c
new file mode 100644
index 000000000..55189d072
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_memalloc.c
@@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include <string.h>
+
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_fbarray.h>
+#include <rte_memzone.h>
+#include <rte_memory.h>
+#include <rte_string_fns.h>
+#include <rte_rwlock.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
+
+struct mem_event_callback_entry {
+	TAILQ_ENTRY(mem_event_callback_entry) next;
+	char name[RTE_MEM_EVENT_CALLBACK_NAME_LEN];
+	rte_mem_event_callback_t clb;
+	void *arg;
+};
+
+struct mem_alloc_validator_entry {
+	TAILQ_ENTRY(mem_alloc_validator_entry) next;
+	char name[RTE_MEM_ALLOC_VALIDATOR_NAME_LEN];
+	rte_mem_alloc_validator_t clb;
+	int socket_id;
+	size_t limit;
+};
+
+/** Double linked list of actions. */
+TAILQ_HEAD(mem_event_callback_entry_list, mem_event_callback_entry);
+TAILQ_HEAD(mem_alloc_validator_entry_list, mem_alloc_validator_entry);
+
+static struct mem_event_callback_entry_list mem_event_callback_list =
+	TAILQ_HEAD_INITIALIZER(mem_event_callback_list);
+static rte_rwlock_t mem_event_rwlock = RTE_RWLOCK_INITIALIZER;
+
+static struct mem_alloc_validator_entry_list mem_alloc_validator_list =
+	TAILQ_HEAD_INITIALIZER(mem_alloc_validator_list);
+static rte_rwlock_t mem_alloc_validator_rwlock = RTE_RWLOCK_INITIALIZER;
+
+static struct mem_event_callback_entry *
+find_mem_event_callback(const char *name, void *arg)
+{
+	struct mem_event_callback_entry *r;
+
+	TAILQ_FOREACH(r, &mem_event_callback_list, next) {
+		if (!strcmp(r->name, name) && r->arg == arg)
+			break;
+	}
+	return r;
+}
+
+static struct mem_alloc_validator_entry *
+find_mem_alloc_validator(const char *name, int socket_id)
+{
+	struct mem_alloc_validator_entry *r;
+
+	TAILQ_FOREACH(r, &mem_alloc_validator_list, next) {
+		if (!strcmp(r->name, name) && r->socket_id == socket_id)
+			break;
+	}
+	return r;
+}
+
+bool
+eal_memalloc_is_contig(const struct rte_memseg_list *msl, void *start,
+		size_t len)
+{
+	void *end, *aligned_start, *aligned_end;
+	size_t pgsz = (size_t)msl->page_sz;
+	const struct rte_memseg *ms;
+
+	/* for IOVA_VA, it's always contiguous */
+	if (rte_eal_iova_mode() == RTE_IOVA_VA && !msl->external)
+		return true;
+
+	/* for legacy memory, it's always contiguous */
+	if (internal_config.legacy_mem)
+		return true;
+
+	end = RTE_PTR_ADD(start, len);
+
+	/* for nohuge, we check pagemap, otherwise check memseg */
+	if (!rte_eal_has_hugepages()) {
+		rte_iova_t cur, expected;
+
+		aligned_start = RTE_PTR_ALIGN_FLOOR(start, pgsz);
+		aligned_end = RTE_PTR_ALIGN_CEIL(end, pgsz);
+
+		/* if start and end are on the same page, bail out early */
+		if (RTE_PTR_DIFF(aligned_end, aligned_start) == pgsz)
+			return true;
+
+		/* skip first iteration */
+		cur = rte_mem_virt2iova(aligned_start);
+		expected = cur + pgsz;
+		aligned_start = RTE_PTR_ADD(aligned_start, pgsz);
+
+		while (aligned_start < aligned_end) {
+			cur = rte_mem_virt2iova(aligned_start);
+			if (cur != expected)
+				return false;
+			aligned_start = RTE_PTR_ADD(aligned_start, pgsz);
+			expected += pgsz;
+		}
+	} else {
+		int start_seg, end_seg, cur_seg;
+		rte_iova_t cur, expected;
+
+		aligned_start = RTE_PTR_ALIGN_FLOOR(start, pgsz);
+		aligned_end = RTE_PTR_ALIGN_CEIL(end, pgsz);
+
+		start_seg = RTE_PTR_DIFF(aligned_start, msl->base_va) /
+				pgsz;
+		end_seg = RTE_PTR_DIFF(aligned_end, msl->base_va) /
+				pgsz;
+
+		/* if start and end are on the same page, bail out early */
+		if (RTE_PTR_DIFF(aligned_end, aligned_start) == pgsz)
+			return true;
+
+		/* skip first iteration */
+		ms = rte_fbarray_get(&msl->memseg_arr, start_seg);
+		cur = ms->iova;
+		expected = cur + pgsz;
+
+		/* if we can't access IOVA addresses, assume non-contiguous */
+		if (cur == RTE_BAD_IOVA)
+			return false;
+
+		for (cur_seg = start_seg + 1; cur_seg < end_seg;
+				cur_seg++, expected += pgsz) {
+			ms = rte_fbarray_get(&msl->memseg_arr, cur_seg);
+
+			if (ms->iova != expected)
+				return false;
+		}
+	}
+	return true;
+}
+
+int
+eal_memalloc_mem_event_callback_register(const char *name,
+		rte_mem_event_callback_t clb, void *arg)
+{
+	struct mem_event_callback_entry *entry;
+	int ret, len;
+	if (name == NULL || clb == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	len = strnlen(name, RTE_MEM_EVENT_CALLBACK_NAME_LEN);
+	if (len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	} else if (len == RTE_MEM_EVENT_CALLBACK_NAME_LEN) {
+		rte_errno = ENAMETOOLONG;
+		return -1;
+	}
+	rte_rwlock_write_lock(&mem_event_rwlock);
+
+	entry = find_mem_event_callback(name, arg);
+	if (entry != NULL) {
+		rte_errno = EEXIST;
+		ret = -1;
+		goto unlock;
+	}
+
+	entry = malloc(sizeof(*entry));
+	if (entry == NULL) {
+		rte_errno = ENOMEM;
+		ret = -1;
+		goto unlock;
+	}
+
+	/* callback successfully created and is valid, add it to the list */
+	entry->clb = clb;
+	entry->arg = arg;
+	strlcpy(entry->name, name, RTE_MEM_EVENT_CALLBACK_NAME_LEN);
+	TAILQ_INSERT_TAIL(&mem_event_callback_list, entry, next);
+
+	ret = 0;
+
+	RTE_LOG(DEBUG, EAL, "Mem event callback '%s:%p' registered\n",
+			name, arg);
+
+unlock:
+	rte_rwlock_write_unlock(&mem_event_rwlock);
+	return ret;
+}
+
+int
+eal_memalloc_mem_event_callback_unregister(const char *name, void *arg)
+{
+	struct mem_event_callback_entry *entry;
+	int ret, len;
+
+	if (name == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	len = strnlen(name, RTE_MEM_EVENT_CALLBACK_NAME_LEN);
+	if (len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	} else if (len == RTE_MEM_EVENT_CALLBACK_NAME_LEN) {
+		rte_errno = ENAMETOOLONG;
+		return -1;
+	}
+	rte_rwlock_write_lock(&mem_event_rwlock);
+
+	entry = find_mem_event_callback(name, arg);
+	if (entry == NULL) {
+		rte_errno = ENOENT;
+		ret = -1;
+		goto unlock;
+	}
+	TAILQ_REMOVE(&mem_event_callback_list, entry, next);
+	free(entry);
+
+	ret = 0;
+
+	RTE_LOG(DEBUG, EAL, "Mem event callback '%s:%p' unregistered\n",
+			name, arg);
+
+unlock:
+	rte_rwlock_write_unlock(&mem_event_rwlock);
+	return ret;
+}
+
+void
+eal_memalloc_mem_event_notify(enum rte_mem_event event, const void *start,
+		size_t len)
+{
+	struct mem_event_callback_entry *entry;
+
+	rte_rwlock_read_lock(&mem_event_rwlock);
+
+	TAILQ_FOREACH(entry, &mem_event_callback_list, next) {
+		RTE_LOG(DEBUG, EAL, "Calling mem event callback '%s:%p'\n",
+			entry->name, entry->arg);
+		entry->clb(event, start, len, entry->arg);
+	}
+
+	rte_rwlock_read_unlock(&mem_event_rwlock);
+}
+
+int
+eal_memalloc_mem_alloc_validator_register(const char *name,
+		rte_mem_alloc_validator_t clb, int socket_id, size_t limit)
+{
+	struct mem_alloc_validator_entry *entry;
+	int ret, len;
+	if (name == NULL || clb == NULL || socket_id < 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	len = strnlen(name, RTE_MEM_ALLOC_VALIDATOR_NAME_LEN);
+	if (len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	} else if (len == RTE_MEM_ALLOC_VALIDATOR_NAME_LEN) {
+		rte_errno = ENAMETOOLONG;
+		return -1;
+	}
+	rte_rwlock_write_lock(&mem_alloc_validator_rwlock);
+
+	entry = find_mem_alloc_validator(name, socket_id);
+	if (entry != NULL) {
+		rte_errno = EEXIST;
+		ret = -1;
+		goto unlock;
+	}
+
+	entry = malloc(sizeof(*entry));
+	if (entry == NULL) {
+		rte_errno = ENOMEM;
+		ret = -1;
+		goto unlock;
+	}
+
+	/* callback successfully created and is valid, add it to the list */
+	entry->clb = clb;
+	entry->socket_id = socket_id;
+	entry->limit = limit;
+	strlcpy(entry->name, name, RTE_MEM_ALLOC_VALIDATOR_NAME_LEN);
+	TAILQ_INSERT_TAIL(&mem_alloc_validator_list, entry, next);
+
+	ret = 0;
+
+	RTE_LOG(DEBUG, EAL, "Mem alloc validator '%s' on socket %i with limit %zu registered\n",
+		name, socket_id, limit);
+
+unlock:
+	rte_rwlock_write_unlock(&mem_alloc_validator_rwlock);
+	return ret;
+}
+
+int
+eal_memalloc_mem_alloc_validator_unregister(const char *name, int socket_id)
+{
+	struct mem_alloc_validator_entry *entry;
+	int ret, len;
+
+	if (name == NULL || socket_id < 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	len = strnlen(name, RTE_MEM_ALLOC_VALIDATOR_NAME_LEN);
+	if (len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	} else if (len == RTE_MEM_ALLOC_VALIDATOR_NAME_LEN) {
+		rte_errno = ENAMETOOLONG;
+		return -1;
+	}
+	rte_rwlock_write_lock(&mem_alloc_validator_rwlock);
+
+	entry = find_mem_alloc_validator(name, socket_id);
+	if (entry == NULL) {
+		rte_errno = ENOENT;
+		ret = -1;
+		goto unlock;
+	}
+	TAILQ_REMOVE(&mem_alloc_validator_list, entry, next);
+	free(entry);
+
+	ret = 0;
+
+	RTE_LOG(DEBUG, EAL, "Mem alloc validator '%s' on socket %i unregistered\n",
+		name, socket_id);
+
+unlock:
+	rte_rwlock_write_unlock(&mem_alloc_validator_rwlock);
+	return ret;
+}
+
+int
+eal_memalloc_mem_alloc_validate(int socket_id, size_t new_len)
+{
+	struct mem_alloc_validator_entry *entry;
+	int ret = 0;
+
+	rte_rwlock_read_lock(&mem_alloc_validator_rwlock);
+
+	TAILQ_FOREACH(entry, &mem_alloc_validator_list, next) {
+		if (entry->socket_id != socket_id || entry->limit > new_len)
+			continue;
+		RTE_LOG(DEBUG, EAL, "Calling mem alloc validator '%s' on socket %i\n",
+			entry->name, entry->socket_id);
+		if (entry->clb(socket_id, entry->limit, new_len) < 0)
+			ret = -1;
+	}
+
+	rte_rwlock_read_unlock(&mem_alloc_validator_rwlock);
+
+	return ret;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_memory.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_memory.c
new file mode 100644
index 000000000..4c897a13f
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_memory.c
@@ -0,0 +1,939 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+
+#include <rte_fbarray.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_errno.h>
+#include <rte_log.h>
+
+#include "eal_memalloc.h"
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+#include "eal_memcfg.h"
+#include "malloc_heap.h"
+
+/*
+ * Try to mmap *size bytes in /dev/zero. If it is successful, return the
+ * pointer to the mmap'd area and keep *size unmodified. Else, retry
+ * with a smaller zone: decrease *size by hugepage_sz until it reaches
+ * 0. In this case, return NULL. Note: this function returns an address
+ * which is a multiple of hugepage size.
+ */
+
+#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
+
+static void *next_baseaddr;
+static uint64_t system_page_sz;
+
+#ifdef RTE_EXEC_ENV_LINUX
+#define RTE_DONTDUMP MADV_DONTDUMP
+#elif defined RTE_EXEC_ENV_FREEBSD
+#define RTE_DONTDUMP MADV_NOCORE
+#else
+#error "madvise doesn't support this OS"
+#endif
+
+#define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5
+void *
+eal_get_virtual_area(void *requested_addr, size_t *size,
+		size_t page_sz, int flags, int mmap_flags)
+{
+	bool addr_is_hint, allow_shrink, unmap, no_align;
+	uint64_t map_sz;
+	void *mapped_addr, *aligned_addr;
+	uint8_t try = 0;
+
+	if (system_page_sz == 0)
+		system_page_sz = sysconf(_SC_PAGESIZE);
+
+	mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
+
+	RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
+
+	addr_is_hint = (flags & EAL_VIRTUAL_AREA_ADDR_IS_HINT) > 0;
+	allow_shrink = (flags & EAL_VIRTUAL_AREA_ALLOW_SHRINK) > 0;
+	unmap = (flags & EAL_VIRTUAL_AREA_UNMAP) > 0;
+
+	if (next_baseaddr == NULL && internal_config.base_virtaddr != 0 &&
+			rte_eal_process_type() == RTE_PROC_PRIMARY)
+		next_baseaddr = (void *) internal_config.base_virtaddr;
+
+#ifdef RTE_ARCH_64
+	if (next_baseaddr == NULL && internal_config.base_virtaddr == 0 &&
+			rte_eal_process_type() == RTE_PROC_PRIMARY)
+		next_baseaddr = (void *) eal_get_baseaddr();
+#endif
+	if (requested_addr == NULL && next_baseaddr != NULL) {
+		requested_addr = next_baseaddr;
+		requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz);
+		addr_is_hint = true;
+	}
+
+	/* we don't need alignment of resulting pointer in the following cases:
+	 *
+	 * 1. page size is equal to system size
+	 * 2. we have a requested address, and it is page-aligned, and we will
+	 *    be discarding the address if we get a different one.
+	 *
+	 * for all other cases, alignment is potentially necessary.
+	 */
+	no_align = (requested_addr != NULL &&
+		requested_addr == RTE_PTR_ALIGN(requested_addr, page_sz) &&
+		!addr_is_hint) ||
+		page_sz == system_page_sz;
+
+	do {
+		map_sz = no_align ? *size : *size + page_sz;
+		if (map_sz > SIZE_MAX) {
+			RTE_LOG(ERR, EAL, "Map size too big\n");
+			rte_errno = E2BIG;
+			return NULL;
+		}
+
+		mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_NONE,
+				mmap_flags, -1, 0);
+		if (mapped_addr == MAP_FAILED && allow_shrink)
+			*size -= page_sz;
+
+		if (mapped_addr != MAP_FAILED && addr_is_hint &&
+		    mapped_addr != requested_addr) {
+			try++;
+			next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz);
+			if (try <= MAX_MMAP_WITH_DEFINED_ADDR_TRIES) {
+				/* hint was not used. Try with another offset */
+				munmap(mapped_addr, map_sz);
+				mapped_addr = MAP_FAILED;
+				requested_addr = next_baseaddr;
+			}
+		}
+	} while ((allow_shrink || addr_is_hint) &&
+		 mapped_addr == MAP_FAILED && *size > 0);
+
+	/* align resulting address - if map failed, we will ignore the value
+	 * anyway, so no need to add additional checks.
+	 */
+	aligned_addr = no_align ? mapped_addr :
+			RTE_PTR_ALIGN(mapped_addr, page_sz);
+
+	if (*size == 0) {
+		RTE_LOG(ERR, EAL, "Cannot get a virtual area of any size: %s\n",
+			strerror(errno));
+		rte_errno = errno;
+		return NULL;
+	} else if (mapped_addr == MAP_FAILED) {
+		RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n",
+			strerror(errno));
+		/* pass errno up the call chain */
+		rte_errno = errno;
+		return NULL;
+	} else if (requested_addr != NULL && !addr_is_hint &&
+			aligned_addr != requested_addr) {
+		RTE_LOG(ERR, EAL, "Cannot get a virtual area at requested address: %p (got %p)\n",
+			requested_addr, aligned_addr);
+		munmap(mapped_addr, map_sz);
+		rte_errno = EADDRNOTAVAIL;
+		return NULL;
+	} else if (requested_addr != NULL && addr_is_hint &&
+			aligned_addr != requested_addr) {
+		RTE_LOG(WARNING, EAL, "WARNING! Base virtual address hint (%p != %p) not respected!\n",
+			requested_addr, aligned_addr);
+		RTE_LOG(WARNING, EAL, "   This may cause issues with mapping memory into secondary processes\n");
+	} else if (next_baseaddr != NULL) {
+		next_baseaddr = RTE_PTR_ADD(aligned_addr, *size);
+	}
+
+	RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
+		aligned_addr, *size);
+
+	if (unmap) {
+		munmap(mapped_addr, map_sz);
+	} else if (!no_align) {
+		void *map_end, *aligned_end;
+		size_t before_len, after_len;
+
+		/* when we reserve space with alignment, we add alignment to
+		 * mapping size. On 32-bit, if 1GB alignment was requested, this
+		 * would waste 1GB of address space, which is a luxury we cannot
+		 * afford. so, if alignment was performed, check if any unneeded
+		 * address space can be unmapped back.
+		 */
+
+		map_end = RTE_PTR_ADD(mapped_addr, (size_t)map_sz);
+		aligned_end = RTE_PTR_ADD(aligned_addr, *size);
+
+		/* unmap space before aligned mmap address */
+		before_len = RTE_PTR_DIFF(aligned_addr, mapped_addr);
+		if (before_len > 0)
+			munmap(mapped_addr, before_len);
+
+		/* unmap space after aligned end mmap address */
+		after_len = RTE_PTR_DIFF(map_end, aligned_end);
+		if (after_len > 0)
+			munmap(aligned_end, after_len);
+	}
+
+	if (!unmap) {
+		/* Exclude these pages from a core dump. */
+		if (madvise(aligned_addr, *size, RTE_DONTDUMP) != 0)
+			RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
+				strerror(errno));
+	}
+
+	return aligned_addr;
+}
+
+static struct rte_memseg *
+virt2memseg(const void *addr, const struct rte_memseg_list *msl)
+{
+	const struct rte_fbarray *arr;
+	void *start, *end;
+	int ms_idx;
+
+	if (msl == NULL)
+		return NULL;
+
+	/* a memseg list was specified, check if it's the right one */
+	start = msl->base_va;
+	end = RTE_PTR_ADD(start, msl->len);
+
+	if (addr < start || addr >= end)
+		return NULL;
+
+	/* now, calculate index */
+	arr = &msl->memseg_arr;
+	ms_idx = RTE_PTR_DIFF(addr, msl->base_va) / msl->page_sz;
+	return rte_fbarray_get(arr, ms_idx);
+}
+
+static struct rte_memseg_list *
+virt2memseg_list(const void *addr)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *msl;
+	int msl_idx;
+
+	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+		void *start, *end;
+		msl = &mcfg->memsegs[msl_idx];
+
+		start = msl->base_va;
+		end = RTE_PTR_ADD(start, msl->len);
+		if (addr >= start && addr < end)
+			break;
+	}
+	/* if we didn't find our memseg list */
+	if (msl_idx == RTE_MAX_MEMSEG_LISTS)
+		return NULL;
+	return msl;
+}
+
+struct rte_memseg_list *
+rte_mem_virt2memseg_list(const void *addr)
+{
+	return virt2memseg_list(addr);
+}
+
+struct virtiova {
+	rte_iova_t iova;
+	void *virt;
+};
+static int
+find_virt(const struct rte_memseg_list *msl __rte_unused,
+		const struct rte_memseg *ms, void *arg)
+{
+	struct virtiova *vi = arg;
+	if (vi->iova >= ms->iova && vi->iova < (ms->iova + ms->len)) {
+		size_t offset = vi->iova - ms->iova;
+		vi->virt = RTE_PTR_ADD(ms->addr, offset);
+		/* stop the walk */
+		return 1;
+	}
+	return 0;
+}
+static int
+find_virt_legacy(const struct rte_memseg_list *msl __rte_unused,
+		const struct rte_memseg *ms, size_t len, void *arg)
+{
+	struct virtiova *vi = arg;
+	if (vi->iova >= ms->iova && vi->iova < (ms->iova + len)) {
+		size_t offset = vi->iova - ms->iova;
+		vi->virt = RTE_PTR_ADD(ms->addr, offset);
+		/* stop the walk */
+		return 1;
+	}
+	return 0;
+}
+
+void *
+rte_mem_iova2virt(rte_iova_t iova)
+{
+	struct virtiova vi;
+
+	memset(&vi, 0, sizeof(vi));
+
+	vi.iova = iova;
+	/* for legacy mem, we can get away with scanning VA-contiguous segments,
+	 * as we know they are PA-contiguous as well
+	 */
+	if (internal_config.legacy_mem)
+		rte_memseg_contig_walk(find_virt_legacy, &vi);
+	else
+		rte_memseg_walk(find_virt, &vi);
+
+	return vi.virt;
+}
+
+struct rte_memseg *
+rte_mem_virt2memseg(const void *addr, const struct rte_memseg_list *msl)
+{
+	return virt2memseg(addr, msl != NULL ? msl :
+			rte_mem_virt2memseg_list(addr));
+}
+
+static int
+physmem_size(const struct rte_memseg_list *msl, void *arg)
+{
+	uint64_t *total_len = arg;
+
+	if (msl->external)
+		return 0;
+
+	*total_len += msl->memseg_arr.count * msl->page_sz;
+
+	return 0;
+}
+
+/* get the total size of memory */
+uint64_t
+rte_eal_get_physmem_size(void)
+{
+	uint64_t total_len = 0;
+
+	rte_memseg_list_walk(physmem_size, &total_len);
+
+	return total_len;
+}
+
+static int
+dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+		void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int msl_idx, ms_idx, fd;
+	FILE *f = arg;
+
+	msl_idx = msl - mcfg->memsegs;
+	if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS)
+		return -1;
+
+	ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
+	if (ms_idx < 0)
+		return -1;
+
+	fd = eal_memalloc_get_seg_fd(msl_idx, ms_idx);
+	fprintf(f, "Segment %i-%i: IOVA:0x%"PRIx64", len:%zu, "
+			"virt:%p, socket_id:%"PRId32", "
+			"hugepage_sz:%"PRIu64", nchannel:%"PRIx32", "
+			"nrank:%"PRIx32" fd:%i\n",
+			msl_idx, ms_idx,
+			ms->iova,
+			ms->len,
+			ms->addr,
+			ms->socket_id,
+			ms->hugepage_sz,
+			ms->nchannel,
+			ms->nrank,
+			fd);
+
+	return 0;
+}
+
+/*
+ * Defining here because declared in rte_memory.h, but the actual implementation
+ * is in eal_common_memalloc.c, like all other memalloc internals.
+ */
+int
+rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb,
+		void *arg)
+{
+	/* FreeBSD boots with legacy mem enabled by default */
+	if (internal_config.legacy_mem) {
+		RTE_LOG(DEBUG, EAL, "Registering mem event callbacks not supported\n");
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+	return eal_memalloc_mem_event_callback_register(name, clb, arg);
+}
+
+int
+rte_mem_event_callback_unregister(const char *name, void *arg)
+{
+	/* FreeBSD boots with legacy mem enabled by default */
+	if (internal_config.legacy_mem) {
+		RTE_LOG(DEBUG, EAL, "Registering mem event callbacks not supported\n");
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+	return eal_memalloc_mem_event_callback_unregister(name, arg);
+}
+
+int
+rte_mem_alloc_validator_register(const char *name,
+		rte_mem_alloc_validator_t clb, int socket_id, size_t limit)
+{
+	/* FreeBSD boots with legacy mem enabled by default */
+	if (internal_config.legacy_mem) {
+		RTE_LOG(DEBUG, EAL, "Registering mem alloc validators not supported\n");
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+	return eal_memalloc_mem_alloc_validator_register(name, clb, socket_id,
+			limit);
+}
+
+int
+rte_mem_alloc_validator_unregister(const char *name, int socket_id)
+{
+	/* FreeBSD boots with legacy mem enabled by default */
+	if (internal_config.legacy_mem) {
+		RTE_LOG(DEBUG, EAL, "Registering mem alloc validators not supported\n");
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+	return eal_memalloc_mem_alloc_validator_unregister(name, socket_id);
+}
+
+/* Dump the physical memory layout on console */
+void
+rte_dump_physmem_layout(FILE *f)
+{
+	rte_memseg_walk(dump_memseg, f);
+}
+
+static int
+check_iova(const struct rte_memseg_list *msl __rte_unused,
+		const struct rte_memseg *ms, void *arg)
+{
+	uint64_t *mask = arg;
+	rte_iova_t iova;
+
+	/* higher address within segment */
+	iova = (ms->iova + ms->len) - 1;
+	if (!(iova & *mask))
+		return 0;
+
+	RTE_LOG(DEBUG, EAL, "memseg iova %"PRIx64", len %zx, out of range\n",
+			    ms->iova, ms->len);
+
+	RTE_LOG(DEBUG, EAL, "\tusing dma mask %"PRIx64"\n", *mask);
+	return 1;
+}
+
+#define MAX_DMA_MASK_BITS 63
+
+/* check memseg iovas are within the required range based on dma mask */
+static int
+check_dma_mask(uint8_t maskbits, bool thread_unsafe)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	uint64_t mask;
+	int ret;
+
+	/* Sanity check. We only check width can be managed with 64 bits
+	 * variables. Indeed any higher value is likely wrong. */
+	if (maskbits > MAX_DMA_MASK_BITS) {
+		RTE_LOG(ERR, EAL, "wrong dma mask size %u (Max: %u)\n",
+				   maskbits, MAX_DMA_MASK_BITS);
+		return -1;
+	}
+
+	/* create dma mask */
+	mask = ~((1ULL << maskbits) - 1);
+
+	if (thread_unsafe)
+		ret = rte_memseg_walk_thread_unsafe(check_iova, &mask);
+	else
+		ret = rte_memseg_walk(check_iova, &mask);
+
+	if (ret)
+		/*
+		 * Dma mask precludes hugepage usage.
+		 * This device can not be used and we do not need to keep
+		 * the dma mask.
+		 */
+		return 1;
+
+	/*
+	 * we need to keep the more restricted maskbit for checking
+	 * potential dynamic memory allocation in the future.
+	 */
+	mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits :
+			     RTE_MIN(mcfg->dma_maskbits, maskbits);
+
+	return 0;
+}
+
+int
+rte_mem_check_dma_mask(uint8_t maskbits)
+{
+	return check_dma_mask(maskbits, false);
+}
+
+int
+rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits)
+{
+	return check_dma_mask(maskbits, true);
+}
+
+/*
+ * Set dma mask to use when memory initialization is done.
+ *
+ * This function should ONLY be used by code executed before the memory
+ * initialization. PMDs should use rte_mem_check_dma_mask if addressing
+ * limitations by the device.
+ */
+void
+rte_mem_set_dma_mask(uint8_t maskbits)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+	mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits :
+			     RTE_MIN(mcfg->dma_maskbits, maskbits);
+}
+
+/* return the number of memory channels */
+unsigned rte_memory_get_nchannel(void)
+{
+	return rte_eal_get_configuration()->mem_config->nchannel;
+}
+
+/* return the number of memory rank */
+unsigned rte_memory_get_nrank(void)
+{
+	return rte_eal_get_configuration()->mem_config->nrank;
+}
+
+static int
+rte_eal_memdevice_init(void)
+{
+	struct rte_config *config;
+
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+		return 0;
+
+	config = rte_eal_get_configuration();
+	config->mem_config->nchannel = internal_config.force_nchannel;
+	config->mem_config->nrank = internal_config.force_nrank;
+
+	return 0;
+}
+
+/* Lock page in physical memory and prevent from swapping. */
+int
+rte_mem_lock_page(const void *virt)
+{
+	unsigned long virtual = (unsigned long)virt;
+	int page_size = getpagesize();
+	unsigned long aligned = (virtual & ~(page_size - 1));
+	return mlock((void *)aligned, page_size);
+}
+
+int
+rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int i, ms_idx, ret = 0;
+
+	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+		struct rte_memseg_list *msl = &mcfg->memsegs[i];
+		const struct rte_memseg *ms;
+		struct rte_fbarray *arr;
+
+		if (msl->memseg_arr.count == 0)
+			continue;
+
+		arr = &msl->memseg_arr;
+
+		ms_idx = rte_fbarray_find_next_used(arr, 0);
+		while (ms_idx >= 0) {
+			int n_segs;
+			size_t len;
+
+			ms = rte_fbarray_get(arr, ms_idx);
+
+			/* find how many more segments there are, starting with
+			 * this one.
+			 */
+			n_segs = rte_fbarray_find_contig_used(arr, ms_idx);
+			len = n_segs * msl->page_sz;
+
+			ret = func(msl, ms, len, arg);
+			if (ret)
+				return ret;
+			ms_idx = rte_fbarray_find_next_used(arr,
+					ms_idx + n_segs);
+		}
+	}
+	return 0;
+}
+
+int
+rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
+{
+	int ret = 0;
+
+	/* do not allow allocations/frees/init while we iterate */
+	rte_mcfg_mem_read_lock();
+	ret = rte_memseg_contig_walk_thread_unsafe(func, arg);
+	rte_mcfg_mem_read_unlock();
+
+	return ret;
+}
+
+int
+rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int i, ms_idx, ret = 0;
+
+	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+		struct rte_memseg_list *msl = &mcfg->memsegs[i];
+		const struct rte_memseg *ms;
+		struct rte_fbarray *arr;
+
+		if (msl->memseg_arr.count == 0)
+			continue;
+
+		arr = &msl->memseg_arr;
+
+		ms_idx = rte_fbarray_find_next_used(arr, 0);
+		while (ms_idx >= 0) {
+			ms = rte_fbarray_get(arr, ms_idx);
+			ret = func(msl, ms, arg);
+			if (ret)
+				return ret;
+			ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1);
+		}
+	}
+	return 0;
+}
+
+int
+rte_memseg_walk(rte_memseg_walk_t func, void *arg)
+{
+	int ret = 0;
+
+	/* do not allow allocations/frees/init while we iterate */
+	rte_mcfg_mem_read_lock();
+	ret = rte_memseg_walk_thread_unsafe(func, arg);
+	rte_mcfg_mem_read_unlock();
+
+	return ret;
+}
+
+int
+rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int i, ret = 0;
+
+	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+		struct rte_memseg_list *msl = &mcfg->memsegs[i];
+
+		if (msl->base_va == NULL)
+			continue;
+
+		ret = func(msl, arg);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+int
+rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
+{
+	int ret = 0;
+
+	/* do not allow allocations/frees/init while we iterate */
+	rte_mcfg_mem_read_lock();
+	ret = rte_memseg_list_walk_thread_unsafe(func, arg);
+	rte_mcfg_mem_read_unlock();
+
+	return ret;
+}
+
+int
+rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *msl;
+	struct rte_fbarray *arr;
+	int msl_idx, seg_idx, ret;
+
+	if (ms == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	msl = rte_mem_virt2memseg_list(ms->addr);
+	if (msl == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	arr = &msl->memseg_arr;
+
+	msl_idx = msl - mcfg->memsegs;
+	seg_idx = rte_fbarray_find_idx(arr, ms);
+
+	if (!rte_fbarray_is_used(arr, seg_idx)) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	/* segment fd API is not supported for external segments */
+	if (msl->external) {
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+
+	ret = eal_memalloc_get_seg_fd(msl_idx, seg_idx);
+	if (ret < 0) {
+		rte_errno = -ret;
+		ret = -1;
+	}
+	return ret;
+}
+
+int
+rte_memseg_get_fd(const struct rte_memseg *ms)
+{
+	int ret;
+
+	rte_mcfg_mem_read_lock();
+	ret = rte_memseg_get_fd_thread_unsafe(ms);
+	rte_mcfg_mem_read_unlock();
+
+	return ret;
+}
+
+int
+rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
+		size_t *offset)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *msl;
+	struct rte_fbarray *arr;
+	int msl_idx, seg_idx, ret;
+
+	if (ms == NULL || offset == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	msl = rte_mem_virt2memseg_list(ms->addr);
+	if (msl == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	arr = &msl->memseg_arr;
+
+	msl_idx = msl - mcfg->memsegs;
+	seg_idx = rte_fbarray_find_idx(arr, ms);
+
+	if (!rte_fbarray_is_used(arr, seg_idx)) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	/* segment fd API is not supported for external segments */
+	if (msl->external) {
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+
+	ret = eal_memalloc_get_seg_fd_offset(msl_idx, seg_idx, offset);
+	if (ret < 0) {
+		rte_errno = -ret;
+		ret = -1;
+	}
+	return ret;
+}
+
+int
+rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset)
+{
+	int ret;
+
+	rte_mcfg_mem_read_lock();
+	ret = rte_memseg_get_fd_offset_thread_unsafe(ms, offset);
+	rte_mcfg_mem_read_unlock();
+
+	return ret;
+}
+
+int
+rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[],
+		unsigned int n_pages, size_t page_sz)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	unsigned int socket_id, n;
+	int ret = 0;
+
+	if (va_addr == NULL || page_sz == 0 || len == 0 ||
+			!rte_is_power_of_2(page_sz) ||
+			RTE_ALIGN(len, page_sz) != len ||
+			((len / page_sz) != n_pages && iova_addrs != NULL) ||
+			!rte_is_aligned(va_addr, page_sz)) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_mcfg_mem_write_lock();
+
+	/* make sure the segment doesn't already exist */
+	if (malloc_heap_find_external_seg(va_addr, len) != NULL) {
+		rte_errno = EEXIST;
+		ret = -1;
+		goto unlock;
+	}
+
+	/* get next available socket ID */
+	socket_id = mcfg->next_socket_id;
+	if (socket_id > INT32_MAX) {
+		RTE_LOG(ERR, EAL, "Cannot assign new socket ID's\n");
+		rte_errno = ENOSPC;
+		ret = -1;
+		goto unlock;
+	}
+
+	/* we can create a new memseg */
+	n = len / page_sz;
+	if (malloc_heap_create_external_seg(va_addr, iova_addrs, n,
+			page_sz, "extmem", socket_id) == NULL) {
+		ret = -1;
+		goto unlock;
+	}
+
+	/* memseg list successfully created - increment next socket ID */
+	mcfg->next_socket_id++;
+unlock:
+	rte_mcfg_mem_write_unlock();
+	return ret;
+}
+
+int
+rte_extmem_unregister(void *va_addr, size_t len)
+{
+	struct rte_memseg_list *msl;
+	int ret = 0;
+
+	if (va_addr == NULL || len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_mcfg_mem_write_lock();
+
+	/* find our segment */
+	msl = malloc_heap_find_external_seg(va_addr, len);
+	if (msl == NULL) {
+		rte_errno = ENOENT;
+		ret = -1;
+		goto unlock;
+	}
+
+	ret = malloc_heap_destroy_external_seg(msl);
+unlock:
+	rte_mcfg_mem_write_unlock();
+	return ret;
+}
+
+static int
+sync_memory(void *va_addr, size_t len, bool attach)
+{
+	struct rte_memseg_list *msl;
+	int ret = 0;
+
+	if (va_addr == NULL || len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_mcfg_mem_write_lock();
+
+	/* find our segment */
+	msl = malloc_heap_find_external_seg(va_addr, len);
+	if (msl == NULL) {
+		rte_errno = ENOENT;
+		ret = -1;
+		goto unlock;
+	}
+	if (attach)
+		ret = rte_fbarray_attach(&msl->memseg_arr);
+	else
+		ret = rte_fbarray_detach(&msl->memseg_arr);
+
+unlock:
+	rte_mcfg_mem_write_unlock();
+	return ret;
+}
+
+int
+rte_extmem_attach(void *va_addr, size_t len)
+{
+	return sync_memory(va_addr, len, true);
+}
+
+int
+rte_extmem_detach(void *va_addr, size_t len)
+{
+	return sync_memory(va_addr, len, false);
+}
+
+/* init memory subsystem */
+int
+rte_eal_memory_init(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int retval;
+	RTE_LOG(DEBUG, EAL, "Setting up physically contiguous memory...\n");
+
+	if (!mcfg)
+		return -1;
+
+	/* lock mem hotplug here, to prevent races while we init */
+	rte_mcfg_mem_read_lock();
+
+	if (rte_eal_memseg_init() < 0)
+		goto fail;
+
+	if (eal_memalloc_init() < 0)
+		goto fail;
+
+	retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
+			rte_eal_hugepage_init() :
+			rte_eal_hugepage_attach();
+	if (retval < 0)
+		goto fail;
+
+	if (internal_config.no_shconf == 0 && rte_eal_memdevice_init() < 0)
+		goto fail;
+
+	return 0;
+fail:
+	rte_mcfg_mem_read_unlock();
+	return -1;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_memzone.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_memzone.c
new file mode 100644
index 000000000..7c21aa921
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_memzone.c
@@ -0,0 +1,420 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+#include <rte_common.h>
+#include <rte_eal_trace.h>
+
+#include "malloc_heap.h"
+#include "malloc_elem.h"
+#include "eal_private.h"
+#include "eal_memcfg.h"
+
+static inline const struct rte_memzone *
+memzone_lookup_thread_unsafe(const char *name)
+{
+	struct rte_mem_config *mcfg;
+	struct rte_fbarray *arr;
+	const struct rte_memzone *mz;
+	int i = 0;
+
+	/* get pointer to global configuration */
+	mcfg = rte_eal_get_configuration()->mem_config;
+	arr = &mcfg->memzones;
+
+	/*
+	 * the algorithm is not optimal (linear), but there are few
+	 * zones and this function should be called at init only
+	 */
+	i = rte_fbarray_find_next_used(arr, 0);
+	while (i >= 0) {
+		mz = rte_fbarray_get(arr, i);
+		if (mz->addr != NULL &&
+				!strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE))
+			return mz;
+		i = rte_fbarray_find_next_used(arr, i + 1);
+	}
+	return NULL;
+}
+
+static const struct rte_memzone *
+memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
+		int socket_id, unsigned int flags, unsigned int align,
+		unsigned int bound)
+{
+	struct rte_memzone *mz;
+	struct rte_mem_config *mcfg;
+	struct rte_fbarray *arr;
+	void *mz_addr;
+	size_t requested_len;
+	int mz_idx;
+	bool contig;
+
+	/* get pointer to global configuration */
+	mcfg = rte_eal_get_configuration()->mem_config;
+	arr = &mcfg->memzones;
+
+	/* no more room in config */
+	if (arr->count >= arr->len) {
+		RTE_LOG(ERR, EAL,
+		"%s(): Number of requested memzone segments exceeds RTE_MAX_MEMZONE\n",
+			__func__);
+		rte_errno = ENOSPC;
+		return NULL;
+	}
+
+	if (strlen(name) > sizeof(mz->name) - 1) {
+		RTE_LOG(DEBUG, EAL, "%s(): memzone <%s>: name too long\n",
+			__func__, name);
+		rte_errno = ENAMETOOLONG;
+		return NULL;
+	}
+
+	/* zone already exist */
+	if ((memzone_lookup_thread_unsafe(name)) != NULL) {
+		RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n",
+			__func__, name);
+		rte_errno = EEXIST;
+		return NULL;
+	}
+
+	/* if alignment is not a power of two */
+	if (align && !rte_is_power_of_2(align)) {
+		RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__,
+				align);
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	/* alignment less than cache size is not allowed */
+	if (align < RTE_CACHE_LINE_SIZE)
+		align = RTE_CACHE_LINE_SIZE;
+
+	/* align length on cache boundary. Check for overflow before doing so */
+	if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) {
+		rte_errno = EINVAL; /* requested size too big */
+		return NULL;
+	}
+
+	len = RTE_ALIGN_CEIL(len, RTE_CACHE_LINE_SIZE);
+
+	/* save minimal requested  length */
+	requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE,  len);
+
+	/* check that boundary condition is valid */
+	if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	if ((socket_id != SOCKET_ID_ANY) && socket_id < 0) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	/* only set socket to SOCKET_ID_ANY if we aren't allocating for an
+	 * external heap.
+	 */
+	if (!rte_eal_has_hugepages() && socket_id < RTE_MAX_NUMA_NODES)
+		socket_id = SOCKET_ID_ANY;
+
+	contig = (flags & RTE_MEMZONE_IOVA_CONTIG) != 0;
+	/* malloc only cares about size flags, remove contig flag from flags */
+	flags &= ~RTE_MEMZONE_IOVA_CONTIG;
+
+	if (len == 0 && bound == 0) {
+		/* no size constraints were placed, so use malloc elem len */
+		requested_len = 0;
+		mz_addr = malloc_heap_alloc_biggest(NULL, socket_id, flags,
+				align, contig);
+	} else {
+		if (len == 0)
+			requested_len = bound;
+		/* allocate memory on heap */
+		mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id,
+				flags, align, bound, contig);
+	}
+	if (mz_addr == NULL) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
+
+	/* fill the zone in config */
+	mz_idx = rte_fbarray_find_next_free(arr, 0);
+
+	if (mz_idx < 0) {
+		mz = NULL;
+	} else {
+		rte_fbarray_set_used(arr, mz_idx);
+		mz = rte_fbarray_get(arr, mz_idx);
+	}
+
+	if (mz == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone\n", __func__);
+		malloc_heap_free(elem);
+		rte_errno = ENOSPC;
+		return NULL;
+	}
+
+	strlcpy(mz->name, name, sizeof(mz->name));
+	mz->iova = rte_malloc_virt2iova(mz_addr);
+	mz->addr = mz_addr;
+	mz->len = requested_len == 0 ?
+			elem->size - elem->pad - MALLOC_ELEM_OVERHEAD :
+			requested_len;
+	mz->hugepage_sz = elem->msl->page_sz;
+	mz->socket_id = elem->msl->socket_id;
+	mz->flags = 0;
+
+	return mz;
+}
+
+static const struct rte_memzone *
+rte_memzone_reserve_thread_safe(const char *name, size_t len, int socket_id,
+		unsigned int flags, unsigned int align, unsigned int bound)
+{
+	struct rte_mem_config *mcfg;
+	const struct rte_memzone *mz = NULL;
+
+	/* get pointer to global configuration */
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	rte_rwlock_write_lock(&mcfg->mlock);
+
+	mz = memzone_reserve_aligned_thread_unsafe(
+		name, len, socket_id, flags, align, bound);
+
+	rte_eal_trace_memzone_reserve(name, len, socket_id, flags, align,
+		bound, mz);
+
+	rte_rwlock_write_unlock(&mcfg->mlock);
+
+	return mz;
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor (with a
+ * specified alignment and boundary). If the allocation cannot be done,
+ * return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve_bounded(const char *name, size_t len, int socket_id,
+			    unsigned flags, unsigned align, unsigned bound)
+{
+	return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
+					       align, bound);
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor (with a
+ * specified alignment). If the allocation cannot be done, return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve_aligned(const char *name, size_t len, int socket_id,
+			    unsigned flags, unsigned align)
+{
+	return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
+					       align, 0);
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor. If the
+ * allocation cannot be done, return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve(const char *name, size_t len, int socket_id,
+		    unsigned flags)
+{
+	return rte_memzone_reserve_thread_safe(name, len, socket_id,
+					       flags, RTE_CACHE_LINE_SIZE, 0);
+}
+
+int
+rte_memzone_free(const struct rte_memzone *mz)
+{
+	char name[RTE_MEMZONE_NAMESIZE];
+	struct rte_mem_config *mcfg;
+	struct rte_fbarray *arr;
+	struct rte_memzone *found_mz;
+	int ret = 0;
+	void *addr = NULL;
+	unsigned idx;
+
+	if (mz == NULL)
+		return -EINVAL;
+
+	rte_strlcpy(name, mz->name, RTE_MEMZONE_NAMESIZE);
+	mcfg = rte_eal_get_configuration()->mem_config;
+	arr = &mcfg->memzones;
+
+	rte_rwlock_write_lock(&mcfg->mlock);
+
+	idx = rte_fbarray_find_idx(arr, mz);
+	found_mz = rte_fbarray_get(arr, idx);
+
+	if (found_mz == NULL) {
+		ret = -EINVAL;
+	} else if (found_mz->addr == NULL) {
+		RTE_LOG(ERR, EAL, "Memzone is not allocated\n");
+		ret = -EINVAL;
+	} else {
+		addr = found_mz->addr;
+		memset(found_mz, 0, sizeof(*found_mz));
+		rte_fbarray_set_free(arr, idx);
+	}
+
+	rte_rwlock_write_unlock(&mcfg->mlock);
+
+	if (addr != NULL)
+		rte_free(addr);
+
+	rte_eal_trace_memzone_free(name, addr, ret);
+	return ret;
+}
+
+/*
+ * Lookup for the memzone identified by the given name
+ */
+const struct rte_memzone *
+rte_memzone_lookup(const char *name)
+{
+	struct rte_mem_config *mcfg;
+	const struct rte_memzone *memzone = NULL;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	rte_rwlock_read_lock(&mcfg->mlock);
+
+	memzone = memzone_lookup_thread_unsafe(name);
+
+	rte_rwlock_read_unlock(&mcfg->mlock);
+
+	rte_eal_trace_memzone_lookup(name, memzone);
+	return memzone;
+}
+
+static void
+dump_memzone(const struct rte_memzone *mz, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *msl = NULL;
+	void *cur_addr, *mz_end;
+	struct rte_memseg *ms;
+	int mz_idx, ms_idx;
+	size_t page_sz;
+	FILE *f = arg;
+
+	mz_idx = rte_fbarray_find_idx(&mcfg->memzones, mz);
+
+	fprintf(f, "Zone %u: name:<%s>, len:0x%zx, virt:%p, "
+				"socket_id:%"PRId32", flags:%"PRIx32"\n",
+			mz_idx,
+			mz->name,
+			mz->len,
+			mz->addr,
+			mz->socket_id,
+			mz->flags);
+
+	/* go through each page occupied by this memzone */
+	msl = rte_mem_virt2memseg_list(mz->addr);
+	if (!msl) {
+		RTE_LOG(DEBUG, EAL, "Skipping bad memzone\n");
+		return;
+	}
+	page_sz = (size_t)mz->hugepage_sz;
+	cur_addr = RTE_PTR_ALIGN_FLOOR(mz->addr, page_sz);
+	mz_end = RTE_PTR_ADD(cur_addr, mz->len);
+
+	fprintf(f, "physical segments used:\n");
+	ms_idx = RTE_PTR_DIFF(mz->addr, msl->base_va) / page_sz;
+	ms = rte_fbarray_get(&msl->memseg_arr, ms_idx);
+
+	do {
+		fprintf(f, "  addr: %p iova: 0x%" PRIx64 " "
+				"len: 0x%zx "
+				"pagesz: 0x%zx\n",
+			cur_addr, ms->iova, ms->len, page_sz);
+
+		/* advance VA to next page */
+		cur_addr = RTE_PTR_ADD(cur_addr, page_sz);
+
+		/* memzones occupy contiguous segments */
+		++ms;
+	} while (cur_addr < mz_end);
+}
+
+/* Dump all reserved memory zones on console */
+void
+rte_memzone_dump(FILE *f)
+{
+	rte_memzone_walk(dump_memzone, f);
+}
+
+/*
+ * Init the memzone subsystem
+ */
+int
+rte_eal_memzone_init(void)
+{
+	struct rte_mem_config *mcfg;
+	int ret = 0;
+
+	/* get pointer to global configuration */
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	rte_rwlock_write_lock(&mcfg->mlock);
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
+			rte_fbarray_init(&mcfg->memzones, "memzone",
+			RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
+		RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
+		ret = -1;
+	} else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
+			rte_fbarray_attach(&mcfg->memzones)) {
+		RTE_LOG(ERR, EAL, "Cannot attach to memzone list\n");
+		ret = -1;
+	}
+
+	rte_rwlock_write_unlock(&mcfg->mlock);
+
+	return ret;
+}
+
+/* Walk all reserved memory zones */
+void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
+		      void *arg)
+{
+	struct rte_mem_config *mcfg;
+	struct rte_fbarray *arr;
+	int i;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+	arr = &mcfg->memzones;
+
+	rte_rwlock_read_lock(&mcfg->mlock);
+	i = rte_fbarray_find_next_used(arr, 0);
+	while (i >= 0) {
+		struct rte_memzone *mz = rte_fbarray_get(arr, i);
+		(*func)(mz, arg);
+		i = rte_fbarray_find_next_used(arr, i + 1);
+	}
+	rte_rwlock_read_unlock(&mcfg->mlock);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_options.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_options.c
new file mode 100644
index 000000000..8f2cbd1c6
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_options.c
@@ -0,0 +1,1861 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation.
+ * Copyright(c) 2014 6WIND S.A.
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#ifndef RTE_EXEC_ENV_WINDOWS
+#include <syslog.h>
+#endif
+#include <ctype.h>
+#include <limits.h>
+#include <errno.h>
+#include <getopt.h>
+#ifndef RTE_EXEC_ENV_WINDOWS
+#include <dlfcn.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dirent.h>
+
+#include <rte_string_fns.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_tailq.h>
+#include <rte_version.h>
+#include <rte_devargs.h>
+#include <rte_memcpy.h>
+#ifndef RTE_EXEC_ENV_WINDOWS
+#include <rte_telemetry.h>
+#endif
+
+#include "eal_internal_cfg.h"
+#include "eal_options.h"
+#include "eal_filesystem.h"
+#include "eal_private.h"
+#ifndef RTE_EXEC_ENV_WINDOWS
+#include "eal_trace.h"
+#endif
+
+#define BITS_PER_HEX 4
+#define LCORE_OPT_LST 1
+#define LCORE_OPT_MSK 2
+#define LCORE_OPT_MAP 3
+
+const char
+eal_short_options[] =
+	"b:" /* pci-blacklist */
+	"c:" /* coremask */
+	"s:" /* service coremask */
+	"d:" /* driver */
+	"h"  /* help */
+	"l:" /* corelist */
+	"S:" /* service corelist */
+	"m:" /* memory size */
+	"n:" /* memory channels */
+	"r:" /* memory ranks */
+	"v"  /* version */
+	"w:" /* pci-whitelist */
+	;
+
+const struct option
+eal_long_options[] = {
+	{OPT_BASE_VIRTADDR,     1, NULL, OPT_BASE_VIRTADDR_NUM    },
+	{OPT_CREATE_UIO_DEV,    0, NULL, OPT_CREATE_UIO_DEV_NUM   },
+	{OPT_FILE_PREFIX,       1, NULL, OPT_FILE_PREFIX_NUM      },
+	{OPT_HELP,              0, NULL, OPT_HELP_NUM             },
+	{OPT_HUGE_DIR,          1, NULL, OPT_HUGE_DIR_NUM         },
+	{OPT_HUGE_UNLINK,       0, NULL, OPT_HUGE_UNLINK_NUM      },
+	{OPT_IOVA_MODE,	        1, NULL, OPT_IOVA_MODE_NUM        },
+	{OPT_LCORES,            1, NULL, OPT_LCORES_NUM           },
+	{OPT_LOG_LEVEL,         1, NULL, OPT_LOG_LEVEL_NUM        },
+	{OPT_TRACE,             1, NULL, OPT_TRACE_NUM            },
+	{OPT_TRACE_DIR,         1, NULL, OPT_TRACE_DIR_NUM        },
+	{OPT_TRACE_BUF_SIZE,    1, NULL, OPT_TRACE_BUF_SIZE_NUM   },
+	{OPT_TRACE_MODE,        1, NULL, OPT_TRACE_MODE_NUM       },
+	{OPT_MASTER_LCORE,      1, NULL, OPT_MASTER_LCORE_NUM     },
+	{OPT_MBUF_POOL_OPS_NAME, 1, NULL, OPT_MBUF_POOL_OPS_NAME_NUM},
+	{OPT_NO_HPET,           0, NULL, OPT_NO_HPET_NUM          },
+	{OPT_NO_HUGE,           0, NULL, OPT_NO_HUGE_NUM          },
+	{OPT_NO_PCI,            0, NULL, OPT_NO_PCI_NUM           },
+	{OPT_NO_SHCONF,         0, NULL, OPT_NO_SHCONF_NUM        },
+	{OPT_IN_MEMORY,         0, NULL, OPT_IN_MEMORY_NUM        },
+	{OPT_PCI_BLACKLIST,     1, NULL, OPT_PCI_BLACKLIST_NUM    },
+	{OPT_PCI_WHITELIST,     1, NULL, OPT_PCI_WHITELIST_NUM    },
+	{OPT_PROC_TYPE,         1, NULL, OPT_PROC_TYPE_NUM        },
+	{OPT_SOCKET_MEM,        1, NULL, OPT_SOCKET_MEM_NUM       },
+	{OPT_SOCKET_LIMIT,      1, NULL, OPT_SOCKET_LIMIT_NUM     },
+	{OPT_SYSLOG,            1, NULL, OPT_SYSLOG_NUM           },
+	{OPT_VDEV,              1, NULL, OPT_VDEV_NUM             },
+	{OPT_VFIO_INTR,         1, NULL, OPT_VFIO_INTR_NUM        },
+	{OPT_VMWARE_TSC_MAP,    0, NULL, OPT_VMWARE_TSC_MAP_NUM   },
+	{OPT_LEGACY_MEM,        0, NULL, OPT_LEGACY_MEM_NUM       },
+	{OPT_SINGLE_FILE_SEGMENTS, 0, NULL, OPT_SINGLE_FILE_SEGMENTS_NUM},
+	{OPT_MATCH_ALLOCATIONS, 0, NULL, OPT_MATCH_ALLOCATIONS_NUM},
+	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
+	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
+	{0,                     0, NULL, 0                        }
+};
+
+TAILQ_HEAD(shared_driver_list, shared_driver);
+
+/* Definition for shared object drivers. */
+struct shared_driver {
+	TAILQ_ENTRY(shared_driver) next;
+
+	char    name[PATH_MAX];
+	void*   lib_handle;
+};
+
+/* List of external loadable drivers */
+static struct shared_driver_list solib_list =
+TAILQ_HEAD_INITIALIZER(solib_list);
+
+/* Default path of external loadable drivers */
+static const char *default_solib_dir = RTE_EAL_PMD_PATH;
+
+/*
+ * Stringified version of solib path used by dpdk-pmdinfo.py
+ * Note: PLEASE DO NOT ALTER THIS without making a corresponding
+ * change to usertools/dpdk-pmdinfo.py
+ */
+static const char dpdk_solib_path[] __rte_used =
+"DPDK_PLUGIN_PATH=" RTE_EAL_PMD_PATH;
+
+TAILQ_HEAD(device_option_list, device_option);
+
+struct device_option {
+	TAILQ_ENTRY(device_option) next;
+
+	enum rte_devtype type;
+	char arg[];
+};
+
+static struct device_option_list devopt_list =
+TAILQ_HEAD_INITIALIZER(devopt_list);
+
+static int master_lcore_parsed;
+static int mem_parsed;
+static int core_parsed;
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+static char **eal_args;
+static char **eal_app_args;
+
+#define EAL_PARAM_REQ "/eal/params"
+#define EAL_APP_PARAM_REQ "/eal/app_params"
+
+/* callback handler for telemetry library to report out EAL flags */
+int
+handle_eal_info_request(const char *cmd, const char *params __rte_unused,
+		struct rte_tel_data *d)
+{
+	char **args;
+	int used = 0;
+	int i = 0;
+
+	if (strcmp(cmd, EAL_PARAM_REQ) == 0)
+		args = eal_args;
+	else
+		args = eal_app_args;
+
+	rte_tel_data_start_array(d, RTE_TEL_STRING_VAL);
+	if (args == NULL || args[0] == NULL)
+		return 0;
+
+	for ( ; args[i] != NULL; i++)
+		used = rte_tel_data_add_array_string(d, args[i]);
+	return used;
+}
+
+int
+eal_save_args(int argc, char **argv)
+{
+	int i, j;
+
+	rte_telemetry_register_cmd(EAL_PARAM_REQ, handle_eal_info_request,
+			"Returns EAL commandline parameters used. Takes no parameters");
+	rte_telemetry_register_cmd(EAL_APP_PARAM_REQ, handle_eal_info_request,
+			"Returns app commandline parameters used. Takes no parameters");
+
+	/* clone argv to report out later. We overprovision, but
+	 * this does not waste huge amounts of memory
+	 */
+	eal_args = calloc(argc + 1, sizeof(*eal_args));
+	if (eal_args == NULL)
+		return -1;
+
+	for (i = 0; i < argc; i++) {
+		eal_args[i] = strdup(argv[i]);
+		if (strcmp(argv[i], "--") == 0)
+			break;
+	}
+	eal_args[i++] = NULL; /* always finish with NULL */
+
+	/* allow reporting of any app args we know about too */
+	if (i >= argc)
+		return 0;
+
+	eal_app_args = calloc(argc - i + 1, sizeof(*eal_args));
+	if (eal_app_args == NULL)
+		return -1;
+
+	for (j = 0; i < argc; j++, i++)
+		eal_app_args[j] = strdup(argv[i]);
+	eal_app_args[j] = NULL;
+
+	return 0;
+}
+#endif
+
+static int
+eal_option_device_add(enum rte_devtype type, const char *optarg)
+{
+	struct device_option *devopt;
+	size_t optlen;
+	int ret;
+
+	optlen = strlen(optarg) + 1;
+	devopt = calloc(1, sizeof(*devopt) + optlen);
+	if (devopt == NULL) {
+		RTE_LOG(ERR, EAL, "Unable to allocate device option\n");
+		return -ENOMEM;
+	}
+
+	devopt->type = type;
+	ret = strlcpy(devopt->arg, optarg, optlen);
+	if (ret < 0) {
+		RTE_LOG(ERR, EAL, "Unable to copy device option\n");
+		free(devopt);
+		return -EINVAL;
+	}
+	TAILQ_INSERT_TAIL(&devopt_list, devopt, next);
+	return 0;
+}
+
+int
+eal_option_device_parse(void)
+{
+	struct device_option *devopt;
+	void *tmp;
+	int ret = 0;
+
+	TAILQ_FOREACH_SAFE(devopt, &devopt_list, next, tmp) {
+		if (ret == 0) {
+			ret = rte_devargs_add(devopt->type, devopt->arg);
+			if (ret)
+				RTE_LOG(ERR, EAL, "Unable to parse device '%s'\n",
+					devopt->arg);
+		}
+		TAILQ_REMOVE(&devopt_list, devopt, next);
+		free(devopt);
+	}
+	return ret;
+}
+
+const char *
+eal_get_hugefile_prefix(void)
+{
+	if (internal_config.hugefile_prefix != NULL)
+		return internal_config.hugefile_prefix;
+	return HUGEFILE_PREFIX_DEFAULT;
+}
+
+void
+eal_reset_internal_config(struct internal_config *internal_cfg)
+{
+	int i;
+
+	internal_cfg->memory = 0;
+	internal_cfg->force_nrank = 0;
+	internal_cfg->force_nchannel = 0;
+	internal_cfg->hugefile_prefix = NULL;
+	internal_cfg->hugepage_dir = NULL;
+	internal_cfg->force_sockets = 0;
+	/* zero out the NUMA config */
+	for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+		internal_cfg->socket_mem[i] = 0;
+	internal_cfg->force_socket_limits = 0;
+	/* zero out the NUMA limits config */
+	for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+		internal_cfg->socket_limit[i] = 0;
+	/* zero out hugedir descriptors */
+	for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) {
+		memset(&internal_cfg->hugepage_info[i], 0,
+				sizeof(internal_cfg->hugepage_info[0]));
+		internal_cfg->hugepage_info[i].lock_descriptor = -1;
+	}
+	internal_cfg->base_virtaddr = 0;
+
+#ifdef LOG_DAEMON
+	internal_cfg->syslog_facility = LOG_DAEMON;
+#endif
+
+	/* if set to NONE, interrupt mode is determined automatically */
+	internal_cfg->vfio_intr_mode = RTE_INTR_MODE_NONE;
+
+#ifdef RTE_LIBEAL_USE_HPET
+	internal_cfg->no_hpet = 0;
+#else
+	internal_cfg->no_hpet = 1;
+#endif
+	internal_cfg->vmware_tsc_map = 0;
+	internal_cfg->create_uio_dev = 0;
+	internal_cfg->iova_mode = RTE_IOVA_DC;
+	internal_cfg->user_mbuf_pool_ops_name = NULL;
+	CPU_ZERO(&internal_cfg->ctrl_cpuset);
+	internal_cfg->init_complete = 0;
+}
+
+static int
+eal_plugin_add(const char *path)
+{
+	struct shared_driver *solib;
+
+	solib = malloc(sizeof(*solib));
+	if (solib == NULL) {
+		RTE_LOG(ERR, EAL, "malloc(solib) failed\n");
+		return -1;
+	}
+	memset(solib, 0, sizeof(*solib));
+	strlcpy(solib->name, path, PATH_MAX-1);
+	solib->name[PATH_MAX-1] = 0;
+	TAILQ_INSERT_TAIL(&solib_list, solib, next);
+
+	return 0;
+}
+
+static int
+eal_plugindir_init(const char *path)
+{
+	DIR *d = NULL;
+	struct dirent *dent = NULL;
+	char sopath[PATH_MAX];
+
+	if (path == NULL || *path == '\0')
+		return 0;
+
+	d = opendir(path);
+	if (d == NULL) {
+		RTE_LOG(ERR, EAL, "failed to open directory %s: %s\n",
+			path, strerror(errno));
+		return -1;
+	}
+
+	while ((dent = readdir(d)) != NULL) {
+		struct stat sb;
+
+		snprintf(sopath, sizeof(sopath), "%s/%s", path, dent->d_name);
+
+		if (!(stat(sopath, &sb) == 0 && S_ISREG(sb.st_mode)))
+			continue;
+
+		if (eal_plugin_add(sopath) == -1)
+			break;
+	}
+
+	closedir(d);
+	/* XXX this ignores failures from readdir() itself */
+	return (dent == NULL) ? 0 : -1;
+}
+
+int
+eal_plugins_init(void)
+{
+#ifndef RTE_EXEC_ENV_WINDOWS
+	struct shared_driver *solib = NULL;
+	struct stat sb;
+
+	if (*default_solib_dir != '\0' && stat(default_solib_dir, &sb) == 0 &&
+				S_ISDIR(sb.st_mode))
+		eal_plugin_add(default_solib_dir);
+
+	TAILQ_FOREACH(solib, &solib_list, next) {
+
+		if (stat(solib->name, &sb) == 0 && S_ISDIR(sb.st_mode)) {
+			if (eal_plugindir_init(solib->name) == -1) {
+				RTE_LOG(ERR, EAL,
+					"Cannot init plugin directory %s\n",
+					solib->name);
+				return -1;
+			}
+		} else {
+			RTE_LOG(DEBUG, EAL, "open shared lib %s\n",
+				solib->name);
+			solib->lib_handle = dlopen(solib->name, RTLD_NOW);
+			if (solib->lib_handle == NULL) {
+				RTE_LOG(ERR, EAL, "%s\n", dlerror());
+				return -1;
+			}
+		}
+
+	}
+	return 0;
+#endif
+}
+
+/*
+ * Parse the coremask given as argument (hexadecimal string) and fill
+ * the global configuration (core role and core count) with the parsed
+ * value.
+ */
+static int xdigit2val(unsigned char c)
+{
+	int val;
+
+	if (isdigit(c))
+		val = c - '0';
+	else if (isupper(c))
+		val = c - 'A' + 10;
+	else
+		val = c - 'a' + 10;
+	return val;
+}
+
+static int
+eal_parse_service_coremask(const char *coremask)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	int i, j, idx = 0;
+	unsigned int count = 0;
+	char c;
+	int val;
+	uint32_t taken_lcore_count = 0;
+
+	if (coremask == NULL)
+		return -1;
+	/* Remove all blank characters ahead and after .
+	 * Remove 0x/0X if exists.
+	 */
+	while (isblank(*coremask))
+		coremask++;
+	if (coremask[0] == '0' && ((coremask[1] == 'x')
+		|| (coremask[1] == 'X')))
+		coremask += 2;
+	i = strlen(coremask);
+	while ((i > 0) && isblank(coremask[i - 1]))
+		i--;
+
+	if (i == 0)
+		return -1;
+
+	for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) {
+		c = coremask[i];
+		if (isxdigit(c) == 0) {
+			/* invalid characters */
+			return -1;
+		}
+		val = xdigit2val(c);
+		for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE;
+				j++, idx++) {
+			if ((1 << j) & val) {
+				/* handle master lcore already parsed */
+				uint32_t lcore = idx;
+				if (master_lcore_parsed &&
+						cfg->master_lcore == lcore) {
+					RTE_LOG(ERR, EAL,
+						"lcore %u is master lcore, cannot use as service core\n",
+						idx);
+					return -1;
+				}
+
+				if (eal_cpu_detected(idx) == 0) {
+					RTE_LOG(ERR, EAL,
+						"lcore %u unavailable\n", idx);
+					return -1;
+				}
+
+				if (cfg->lcore_role[idx] == ROLE_RTE)
+					taken_lcore_count++;
+
+				lcore_config[idx].core_role = ROLE_SERVICE;
+				count++;
+			}
+		}
+	}
+
+	for (; i >= 0; i--)
+		if (coremask[i] != '0')
+			return -1;
+
+	for (; idx < RTE_MAX_LCORE; idx++)
+		lcore_config[idx].core_index = -1;
+
+	if (count == 0)
+		return -1;
+
+	if (core_parsed && taken_lcore_count != count) {
+		RTE_LOG(WARNING, EAL,
+			"Not all service cores are in the coremask. "
+			"Please ensure -c or -l includes service cores\n");
+	}
+
+	cfg->service_lcore_count = count;
+	return 0;
+}
+
+static int
+eal_service_cores_parsed(void)
+{
+	int idx;
+	for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+		if (lcore_config[idx].core_role == ROLE_SERVICE)
+			return 1;
+	}
+	return 0;
+}
+
+static int
+update_lcore_config(int *cores)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	unsigned int count = 0;
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		if (cores[i] != -1) {
+			if (eal_cpu_detected(i) == 0) {
+				RTE_LOG(ERR, EAL, "lcore %u unavailable\n", i);
+				ret = -1;
+				continue;
+			}
+			cfg->lcore_role[i] = ROLE_RTE;
+			count++;
+		} else {
+			cfg->lcore_role[i] = ROLE_OFF;
+		}
+		lcore_config[i].core_index = cores[i];
+	}
+	if (!ret)
+		cfg->lcore_count = count;
+	return ret;
+}
+
+static int
+eal_parse_coremask(const char *coremask, int *cores)
+{
+	unsigned count = 0;
+	int i, j, idx;
+	int val;
+	char c;
+
+	for (idx = 0; idx < RTE_MAX_LCORE; idx++)
+		cores[idx] = -1;
+	idx = 0;
+
+	/* Remove all blank characters ahead and after .
+	 * Remove 0x/0X if exists.
+	 */
+	while (isblank(*coremask))
+		coremask++;
+	if (coremask[0] == '0' && ((coremask[1] == 'x')
+		|| (coremask[1] == 'X')))
+		coremask += 2;
+	i = strlen(coremask);
+	while ((i > 0) && isblank(coremask[i - 1]))
+		i--;
+	if (i == 0)
+		return -1;
+
+	for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) {
+		c = coremask[i];
+		if (isxdigit(c) == 0) {
+			/* invalid characters */
+			return -1;
+		}
+		val = xdigit2val(c);
+		for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; j++, idx++)
+		{
+			if ((1 << j) & val) {
+				cores[idx] = count;
+				count++;
+			}
+		}
+	}
+	for (; i >= 0; i--)
+		if (coremask[i] != '0')
+			return -1;
+	if (count == 0)
+		return -1;
+	return 0;
+}
+
+static int
+eal_parse_service_corelist(const char *corelist)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	int i, idx = 0;
+	unsigned count = 0;
+	char *end = NULL;
+	int min, max;
+	uint32_t taken_lcore_count = 0;
+
+	if (corelist == NULL)
+		return -1;
+
+	/* Remove all blank characters ahead and after */
+	while (isblank(*corelist))
+		corelist++;
+	i = strlen(corelist);
+	while ((i > 0) && isblank(corelist[i - 1]))
+		i--;
+
+	/* Get list of cores */
+	min = RTE_MAX_LCORE;
+	do {
+		while (isblank(*corelist))
+			corelist++;
+		if (*corelist == '\0')
+			return -1;
+		errno = 0;
+		idx = strtoul(corelist, &end, 10);
+		if (errno || end == NULL)
+			return -1;
+		while (isblank(*end))
+			end++;
+		if (*end == '-') {
+			min = idx;
+		} else if ((*end == ',') || (*end == '\0')) {
+			max = idx;
+			if (min == RTE_MAX_LCORE)
+				min = idx;
+			for (idx = min; idx <= max; idx++) {
+				if (cfg->lcore_role[idx] != ROLE_SERVICE) {
+					/* handle master lcore already parsed */
+					uint32_t lcore = idx;
+					if (cfg->master_lcore == lcore &&
+							master_lcore_parsed) {
+						RTE_LOG(ERR, EAL,
+							"Error: lcore %u is master lcore, cannot use as service core\n",
+							idx);
+						return -1;
+					}
+					if (cfg->lcore_role[idx] == ROLE_RTE)
+						taken_lcore_count++;
+
+					lcore_config[idx].core_role =
+							ROLE_SERVICE;
+					count++;
+				}
+			}
+			min = RTE_MAX_LCORE;
+		} else
+			return -1;
+		corelist = end + 1;
+	} while (*end != '\0');
+
+	if (count == 0)
+		return -1;
+
+	if (core_parsed && taken_lcore_count != count) {
+		RTE_LOG(WARNING, EAL,
+			"Not all service cores were in the coremask. "
+			"Please ensure -c or -l includes service cores\n");
+	}
+
+	return 0;
+}
+
+static int
+eal_parse_corelist(const char *corelist, int *cores)
+{
+	unsigned count = 0;
+	char *end = NULL;
+	int min, max;
+	int idx;
+
+	for (idx = 0; idx < RTE_MAX_LCORE; idx++)
+		cores[idx] = -1;
+
+	/* Remove all blank characters ahead */
+	while (isblank(*corelist))
+		corelist++;
+
+	/* Get list of cores */
+	min = RTE_MAX_LCORE;
+	do {
+		while (isblank(*corelist))
+			corelist++;
+		if (*corelist == '\0')
+			return -1;
+		errno = 0;
+		idx = strtol(corelist, &end, 10);
+		if (errno || end == NULL)
+			return -1;
+		if (idx < 0 || idx >= RTE_MAX_LCORE)
+			return -1;
+		while (isblank(*end))
+			end++;
+		if (*end == '-') {
+			min = idx;
+		} else if ((*end == ',') || (*end == '\0')) {
+			max = idx;
+			if (min == RTE_MAX_LCORE)
+				min = idx;
+			for (idx = min; idx <= max; idx++) {
+				if (cores[idx] == -1) {
+					cores[idx] = count;
+					count++;
+				}
+			}
+			min = RTE_MAX_LCORE;
+		} else
+			return -1;
+		corelist = end + 1;
+	} while (*end != '\0');
+
+	if (count == 0)
+		return -1;
+	return 0;
+}
+
+/* Changes the lcore id of the master thread */
+static int
+eal_parse_master_lcore(const char *arg)
+{
+	char *parsing_end;
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	errno = 0;
+	cfg->master_lcore = (uint32_t) strtol(arg, &parsing_end, 0);
+	if (errno || parsing_end[0] != 0)
+		return -1;
+	if (cfg->master_lcore >= RTE_MAX_LCORE)
+		return -1;
+	master_lcore_parsed = 1;
+
+	/* ensure master core is not used as service core */
+	if (lcore_config[cfg->master_lcore].core_role == ROLE_SERVICE) {
+		RTE_LOG(ERR, EAL,
+			"Error: Master lcore is used as a service core\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Parse elem, the elem could be single number/range or '(' ')' group
+ * 1) A single number elem, it's just a simple digit. e.g. 9
+ * 2) A single range elem, two digits with a '-' between. e.g. 2-6
+ * 3) A group elem, combines multiple 1) or 2) with '( )'. e.g (0,2-4,6)
+ *    Within group elem, '-' used for a range separator;
+ *                       ',' used for a single number.
+ */
+static int
+eal_parse_set(const char *input, rte_cpuset_t *set)
+{
+	unsigned idx;
+	const char *str = input;
+	char *end = NULL;
+	unsigned min, max;
+
+	CPU_ZERO(set);
+
+	while (isblank(*str))
+		str++;
+
+	/* only digit or left bracket is qualify for start point */
+	if ((!isdigit(*str) && *str != '(') || *str == '\0')
+		return -1;
+
+	/* process single number or single range of number */
+	if (*str != '(') {
+		errno = 0;
+		idx = strtoul(str, &end, 10);
+		if (errno || end == NULL || idx >= CPU_SETSIZE)
+			return -1;
+		else {
+			while (isblank(*end))
+				end++;
+
+			min = idx;
+			max = idx;
+			if (*end == '-') {
+				/* process single <number>-<number> */
+				end++;
+				while (isblank(*end))
+					end++;
+				if (!isdigit(*end))
+					return -1;
+
+				errno = 0;
+				idx = strtoul(end, &end, 10);
+				if (errno || end == NULL || idx >= CPU_SETSIZE)
+					return -1;
+				max = idx;
+				while (isblank(*end))
+					end++;
+				if (*end != ',' && *end != '\0')
+					return -1;
+			}
+
+			if (*end != ',' && *end != '\0' &&
+			    *end != '@')
+				return -1;
+
+			for (idx = RTE_MIN(min, max);
+			     idx <= RTE_MAX(min, max); idx++)
+				CPU_SET(idx, set);
+
+			return end - input;
+		}
+	}
+
+	/* process set within bracket */
+	str++;
+	while (isblank(*str))
+		str++;
+	if (*str == '\0')
+		return -1;
+
+	min = RTE_MAX_LCORE;
+	do {
+
+		/* go ahead to the first digit */
+		while (isblank(*str))
+			str++;
+		if (!isdigit(*str))
+			return -1;
+
+		/* get the digit value */
+		errno = 0;
+		idx = strtoul(str, &end, 10);
+		if (errno || end == NULL || idx >= CPU_SETSIZE)
+			return -1;
+
+		/* go ahead to separator '-',',' and ')' */
+		while (isblank(*end))
+			end++;
+		if (*end == '-') {
+			if (min == RTE_MAX_LCORE)
+				min = idx;
+			else /* avoid continuous '-' */
+				return -1;
+		} else if ((*end == ',') || (*end == ')')) {
+			max = idx;
+			if (min == RTE_MAX_LCORE)
+				min = idx;
+			for (idx = RTE_MIN(min, max);
+			     idx <= RTE_MAX(min, max); idx++)
+				CPU_SET(idx, set);
+
+			min = RTE_MAX_LCORE;
+		} else
+			return -1;
+
+		str = end + 1;
+	} while (*end != '\0' && *end != ')');
+
+	/*
+	 * to avoid failure that tail blank makes end character check fail
+	 * in eal_parse_lcores( )
+	 */
+	while (isblank(*str))
+		str++;
+
+	return str - input;
+}
+
+static int
+check_cpuset(rte_cpuset_t *set)
+{
+	unsigned int idx;
+
+	for (idx = 0; idx < CPU_SETSIZE; idx++) {
+		if (!CPU_ISSET(idx, set))
+			continue;
+
+		if (eal_cpu_detected(idx) == 0) {
+			RTE_LOG(ERR, EAL, "core %u "
+				"unavailable\n", idx);
+			return -1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * The format pattern: --lcores='<lcores[@cpus]>[<,lcores[@cpus]>...]'
+ * lcores, cpus could be a single digit/range or a group.
+ * '(' and ')' are necessary if it's a group.
+ * If not supply '@cpus', the value of cpus uses the same as lcores.
+ * e.g. '1,2@(5-7),(3-5)@(0,2),(0,6),7-8' means start 9 EAL thread as below
+ *   lcore 0 runs on cpuset 0x41 (cpu 0,6)
+ *   lcore 1 runs on cpuset 0x2 (cpu 1)
+ *   lcore 2 runs on cpuset 0xe0 (cpu 5,6,7)
+ *   lcore 3,4,5 runs on cpuset 0x5 (cpu 0,2)
+ *   lcore 6 runs on cpuset 0x41 (cpu 0,6)
+ *   lcore 7 runs on cpuset 0x80 (cpu 7)
+ *   lcore 8 runs on cpuset 0x100 (cpu 8)
+ */
+static int
+eal_parse_lcores(const char *lcores)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	rte_cpuset_t lcore_set;
+	unsigned int set_count;
+	unsigned idx = 0;
+	unsigned count = 0;
+	const char *lcore_start = NULL;
+	const char *end = NULL;
+	int offset;
+	rte_cpuset_t cpuset;
+	int lflags;
+	int ret = -1;
+
+	if (lcores == NULL)
+		return -1;
+
+	/* Remove all blank characters ahead and after */
+	while (isblank(*lcores))
+		lcores++;
+
+	CPU_ZERO(&cpuset);
+
+	/* Reset lcore config */
+	for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+		cfg->lcore_role[idx] = ROLE_OFF;
+		lcore_config[idx].core_index = -1;
+		CPU_ZERO(&lcore_config[idx].cpuset);
+	}
+
+	/* Get list of cores */
+	do {
+		while (isblank(*lcores))
+			lcores++;
+		if (*lcores == '\0')
+			goto err;
+
+		lflags = 0;
+
+		/* record lcore_set start point */
+		lcore_start = lcores;
+
+		/* go across a complete bracket */
+		if (*lcore_start == '(') {
+			lcores += strcspn(lcores, ")");
+			if (*lcores++ == '\0')
+				goto err;
+		}
+
+		/* scan the separator '@', ','(next) or '\0'(finish) */
+		lcores += strcspn(lcores, "@,");
+
+		if (*lcores == '@') {
+			/* explicit assign cpuset and update the end cursor */
+			offset = eal_parse_set(lcores + 1, &cpuset);
+			if (offset < 0)
+				goto err;
+			end = lcores + 1 + offset;
+		} else { /* ',' or '\0' */
+			/* haven't given cpuset, current loop done */
+			end = lcores;
+
+			/* go back to check <number>-<number> */
+			offset = strcspn(lcore_start, "(-");
+			if (offset < (end - lcore_start) &&
+			    *(lcore_start + offset) != '(')
+				lflags = 1;
+		}
+
+		if (*end != ',' && *end != '\0')
+			goto err;
+
+		/* parse lcore_set from start point */
+		if (eal_parse_set(lcore_start, &lcore_set) < 0)
+			goto err;
+
+		/* without '@', by default using lcore_set as cpuset */
+		if (*lcores != '@')
+			rte_memcpy(&cpuset, &lcore_set, sizeof(cpuset));
+
+		set_count = CPU_COUNT(&lcore_set);
+		/* start to update lcore_set */
+		for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+			if (!CPU_ISSET(idx, &lcore_set))
+				continue;
+			set_count--;
+
+			if (cfg->lcore_role[idx] != ROLE_RTE) {
+				lcore_config[idx].core_index = count;
+				cfg->lcore_role[idx] = ROLE_RTE;
+				count++;
+			}
+
+			if (lflags) {
+				CPU_ZERO(&cpuset);
+				CPU_SET(idx, &cpuset);
+			}
+
+			if (check_cpuset(&cpuset) < 0)
+				goto err;
+			rte_memcpy(&lcore_config[idx].cpuset, &cpuset,
+				   sizeof(rte_cpuset_t));
+		}
+
+		/* some cores from the lcore_set can't be handled by EAL */
+		if (set_count != 0)
+			goto err;
+
+		lcores = end + 1;
+	} while (*end != '\0');
+
+	if (count == 0)
+		goto err;
+
+	cfg->lcore_count = count;
+	ret = 0;
+
+err:
+
+	return ret;
+}
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+static int
+eal_parse_syslog(const char *facility, struct internal_config *conf)
+{
+	int i;
+	static const struct {
+		const char *name;
+		int value;
+	} map[] = {
+		{ "auth", LOG_AUTH },
+		{ "cron", LOG_CRON },
+		{ "daemon", LOG_DAEMON },
+		{ "ftp", LOG_FTP },
+		{ "kern", LOG_KERN },
+		{ "lpr", LOG_LPR },
+		{ "mail", LOG_MAIL },
+		{ "news", LOG_NEWS },
+		{ "syslog", LOG_SYSLOG },
+		{ "user", LOG_USER },
+		{ "uucp", LOG_UUCP },
+		{ "local0", LOG_LOCAL0 },
+		{ "local1", LOG_LOCAL1 },
+		{ "local2", LOG_LOCAL2 },
+		{ "local3", LOG_LOCAL3 },
+		{ "local4", LOG_LOCAL4 },
+		{ "local5", LOG_LOCAL5 },
+		{ "local6", LOG_LOCAL6 },
+		{ "local7", LOG_LOCAL7 },
+		{ NULL, 0 }
+	};
+
+	for (i = 0; map[i].name; i++) {
+		if (!strcmp(facility, map[i].name)) {
+			conf->syslog_facility = map[i].value;
+			return 0;
+		}
+	}
+	return -1;
+}
+#endif
+
+static int
+eal_parse_log_priority(const char *level)
+{
+	static const char * const levels[] = {
+		[RTE_LOG_EMERG]   = "emergency",
+		[RTE_LOG_ALERT]   = "alert",
+		[RTE_LOG_CRIT]    = "critical",
+		[RTE_LOG_ERR]     = "error",
+		[RTE_LOG_WARNING] = "warning",
+		[RTE_LOG_NOTICE]  = "notice",
+		[RTE_LOG_INFO]    = "info",
+		[RTE_LOG_DEBUG]   = "debug",
+	};
+	size_t len = strlen(level);
+	unsigned long tmp;
+	char *end;
+	unsigned int i;
+
+	if (len == 0)
+		return -1;
+
+	/* look for named values, skip 0 which is not a valid level */
+	for (i = 1; i < RTE_DIM(levels); i++) {
+		if (strncmp(levels[i], level, len) == 0)
+			return i;
+	}
+
+	/* not a string, maybe it is numeric */
+	errno = 0;
+	tmp = strtoul(level, &end, 0);
+
+	/* check for errors */
+	if (errno != 0 || end == NULL || *end != '\0' ||
+	    tmp >= UINT32_MAX)
+		return -1;
+
+	return tmp;
+}
+
+static int
+eal_parse_log_level(const char *arg)
+{
+	const char *pattern = NULL;
+	const char *regex = NULL;
+	char *str, *level;
+	int priority;
+
+	str = strdup(arg);
+	if (str == NULL)
+		return -1;
+
+	if ((level = strchr(str, ','))) {
+		regex = str;
+		*level++ = '\0';
+	} else if ((level = strchr(str, ':'))) {
+		pattern = str;
+		*level++ = '\0';
+	} else {
+		level = str;
+	}
+
+	priority = eal_parse_log_priority(level);
+	if (priority < 0) {
+		fprintf(stderr, "invalid log priority: %s\n", level);
+		goto fail;
+	}
+
+	if (regex) {
+		if (rte_log_set_level_regexp(regex, priority) < 0) {
+			fprintf(stderr, "cannot set log level %s,%d\n",
+				regex, priority);
+			goto fail;
+		}
+		if (rte_log_save_regexp(regex, priority) < 0)
+			goto fail;
+	} else if (pattern) {
+		if (rte_log_set_level_pattern(pattern, priority) < 0) {
+			fprintf(stderr, "cannot set log level %s:%d\n",
+				pattern, priority);
+			goto fail;
+		}
+		if (rte_log_save_pattern(pattern, priority) < 0)
+			goto fail;
+	} else {
+		rte_log_set_global_level(priority);
+	}
+
+	free(str);
+	return 0;
+
+fail:
+	free(str);
+	return -1;
+}
+
+static enum rte_proc_type_t
+eal_parse_proc_type(const char *arg)
+{
+	if (strncasecmp(arg, "primary", sizeof("primary")) == 0)
+		return RTE_PROC_PRIMARY;
+	if (strncasecmp(arg, "secondary", sizeof("secondary")) == 0)
+		return RTE_PROC_SECONDARY;
+	if (strncasecmp(arg, "auto", sizeof("auto")) == 0)
+		return RTE_PROC_AUTO;
+
+	return RTE_PROC_INVALID;
+}
+
+static int
+eal_parse_iova_mode(const char *name)
+{
+	int mode;
+
+	if (name == NULL)
+		return -1;
+
+	if (!strcmp("pa", name))
+		mode = RTE_IOVA_PA;
+	else if (!strcmp("va", name))
+		mode = RTE_IOVA_VA;
+	else
+		return -1;
+
+	internal_config.iova_mode = mode;
+	return 0;
+}
+
+static int
+eal_parse_base_virtaddr(const char *arg)
+{
+	char *end;
+	uint64_t addr;
+
+	errno = 0;
+	addr = strtoull(arg, &end, 16);
+
+	/* check for errors */
+	if ((errno != 0) || (arg[0] == '\0') || end == NULL || (*end != '\0'))
+		return -1;
+
+	/* make sure we don't exceed 32-bit boundary on 32-bit target */
+#ifndef RTE_ARCH_64
+	if (addr >= UINTPTR_MAX)
+		return -1;
+#endif
+
+	/* align the addr on 16M boundary, 16MB is the minimum huge page
+	 * size on IBM Power architecture. If the addr is aligned to 16MB,
+	 * it can align to 2MB for x86. So this alignment can also be used
+	 * on x86 and other architectures.
+	 */
+	internal_config.base_virtaddr =
+		RTE_PTR_ALIGN_CEIL((uintptr_t)addr, (size_t)RTE_PGSIZE_16M);
+
+	return 0;
+}
+
+/* caller is responsible for freeing the returned string */
+static char *
+available_cores(void)
+{
+	char *str = NULL;
+	int previous;
+	int sequence;
+	char *tmp;
+	int idx;
+
+	/* find the first available cpu */
+	for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+		if (eal_cpu_detected(idx) == 0)
+			continue;
+		break;
+	}
+	if (idx >= RTE_MAX_LCORE)
+		return NULL;
+
+	/* first sequence */
+	if (asprintf(&str, "%d", idx) < 0)
+		return NULL;
+	previous = idx;
+	sequence = 0;
+
+	for (idx++ ; idx < RTE_MAX_LCORE; idx++) {
+		if (eal_cpu_detected(idx) == 0)
+			continue;
+
+		if (idx == previous + 1) {
+			previous = idx;
+			sequence = 1;
+			continue;
+		}
+
+		/* finish current sequence */
+		if (sequence) {
+			if (asprintf(&tmp, "%s-%d", str, previous) < 0) {
+				free(str);
+				return NULL;
+			}
+			free(str);
+			str = tmp;
+		}
+
+		/* new sequence */
+		if (asprintf(&tmp, "%s,%d", str, idx) < 0) {
+			free(str);
+			return NULL;
+		}
+		free(str);
+		str = tmp;
+		previous = idx;
+		sequence = 0;
+	}
+
+	/* finish last sequence */
+	if (sequence) {
+		if (asprintf(&tmp, "%s-%d", str, previous) < 0) {
+			free(str);
+			return NULL;
+		}
+		free(str);
+		str = tmp;
+	}
+
+	return str;
+}
+
+int
+eal_parse_common_option(int opt, const char *optarg,
+			struct internal_config *conf)
+{
+	static int b_used;
+	static int w_used;
+
+	switch (opt) {
+	/* blacklist */
+	case 'b':
+		if (w_used)
+			goto bw_used;
+		if (eal_option_device_add(RTE_DEVTYPE_BLACKLISTED_PCI,
+				optarg) < 0) {
+			return -1;
+		}
+		b_used = 1;
+		break;
+	/* whitelist */
+	case 'w':
+		if (b_used)
+			goto bw_used;
+		if (eal_option_device_add(RTE_DEVTYPE_WHITELISTED_PCI,
+				optarg) < 0) {
+			return -1;
+		}
+		w_used = 1;
+		break;
+	/* coremask */
+	case 'c': {
+		int lcore_indexes[RTE_MAX_LCORE];
+
+		if (eal_service_cores_parsed())
+			RTE_LOG(WARNING, EAL,
+				"Service cores parsed before dataplane cores. Please ensure -c is before -s or -S\n");
+		if (eal_parse_coremask(optarg, lcore_indexes) < 0) {
+			RTE_LOG(ERR, EAL, "invalid coremask syntax\n");
+			return -1;
+		}
+		if (update_lcore_config(lcore_indexes) < 0) {
+			char *available = available_cores();
+
+			RTE_LOG(ERR, EAL,
+				"invalid coremask, please check specified cores are part of %s\n",
+				available);
+			free(available);
+			return -1;
+		}
+
+		if (core_parsed) {
+			RTE_LOG(ERR, EAL, "Option -c is ignored, because (%s) is set!\n",
+				(core_parsed == LCORE_OPT_LST) ? "-l" :
+				(core_parsed == LCORE_OPT_MAP) ? "--lcore" :
+				"-c");
+			return -1;
+		}
+
+		core_parsed = LCORE_OPT_MSK;
+		break;
+	}
+	/* corelist */
+	case 'l': {
+		int lcore_indexes[RTE_MAX_LCORE];
+
+		if (eal_service_cores_parsed())
+			RTE_LOG(WARNING, EAL,
+				"Service cores parsed before dataplane cores. Please ensure -l is before -s or -S\n");
+
+		if (eal_parse_corelist(optarg, lcore_indexes) < 0) {
+			RTE_LOG(ERR, EAL, "invalid core list syntax\n");
+			return -1;
+		}
+		if (update_lcore_config(lcore_indexes) < 0) {
+			char *available = available_cores();
+
+			RTE_LOG(ERR, EAL,
+				"invalid core list, please check specified cores are part of %s\n",
+				available);
+			free(available);
+			return -1;
+		}
+
+		if (core_parsed) {
+			RTE_LOG(ERR, EAL, "Option -l is ignored, because (%s) is set!\n",
+				(core_parsed == LCORE_OPT_MSK) ? "-c" :
+				(core_parsed == LCORE_OPT_MAP) ? "--lcore" :
+				"-l");
+			return -1;
+		}
+
+		core_parsed = LCORE_OPT_LST;
+		break;
+	}
+	/* service coremask */
+	case 's':
+		if (eal_parse_service_coremask(optarg) < 0) {
+			RTE_LOG(ERR, EAL, "invalid service coremask\n");
+			return -1;
+		}
+		break;
+	/* service corelist */
+	case 'S':
+		if (eal_parse_service_corelist(optarg) < 0) {
+			RTE_LOG(ERR, EAL, "invalid service core list\n");
+			return -1;
+		}
+		break;
+	/* size of memory */
+	case 'm':
+		conf->memory = atoi(optarg);
+		conf->memory *= 1024ULL;
+		conf->memory *= 1024ULL;
+		mem_parsed = 1;
+		break;
+	/* force number of channels */
+	case 'n':
+		conf->force_nchannel = atoi(optarg);
+		if (conf->force_nchannel == 0) {
+			RTE_LOG(ERR, EAL, "invalid channel number\n");
+			return -1;
+		}
+		break;
+	/* force number of ranks */
+	case 'r':
+		conf->force_nrank = atoi(optarg);
+		if (conf->force_nrank == 0 ||
+		    conf->force_nrank > 16) {
+			RTE_LOG(ERR, EAL, "invalid rank number\n");
+			return -1;
+		}
+		break;
+	/* force loading of external driver */
+	case 'd':
+		if (eal_plugin_add(optarg) == -1)
+			return -1;
+		break;
+	case 'v':
+		/* since message is explicitly requested by user, we
+		 * write message at highest log level so it can always
+		 * be seen
+		 * even if info or warning messages are disabled */
+		RTE_LOG(CRIT, EAL, "RTE Version: '%s'\n", rte_version());
+		break;
+
+	/* long options */
+	case OPT_HUGE_UNLINK_NUM:
+		conf->hugepage_unlink = 1;
+		break;
+
+	case OPT_NO_HUGE_NUM:
+		conf->no_hugetlbfs = 1;
+		/* no-huge is legacy mem */
+		conf->legacy_mem = 1;
+		break;
+
+	case OPT_NO_PCI_NUM:
+		conf->no_pci = 1;
+		break;
+
+	case OPT_NO_HPET_NUM:
+		conf->no_hpet = 1;
+		break;
+
+	case OPT_VMWARE_TSC_MAP_NUM:
+		conf->vmware_tsc_map = 1;
+		break;
+
+	case OPT_NO_SHCONF_NUM:
+		conf->no_shconf = 1;
+		break;
+
+	case OPT_IN_MEMORY_NUM:
+		conf->in_memory = 1;
+		/* in-memory is a superset of noshconf and huge-unlink */
+		conf->no_shconf = 1;
+		conf->hugepage_unlink = 1;
+		break;
+
+	case OPT_PROC_TYPE_NUM:
+		conf->process_type = eal_parse_proc_type(optarg);
+		break;
+
+	case OPT_MASTER_LCORE_NUM:
+		if (eal_parse_master_lcore(optarg) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameter for --"
+					OPT_MASTER_LCORE "\n");
+			return -1;
+		}
+		break;
+
+	case OPT_VDEV_NUM:
+		if (eal_option_device_add(RTE_DEVTYPE_VIRTUAL,
+				optarg) < 0) {
+			return -1;
+		}
+		break;
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+	case OPT_SYSLOG_NUM:
+		if (eal_parse_syslog(optarg, conf) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameters for --"
+					OPT_SYSLOG "\n");
+			return -1;
+		}
+		break;
+#endif
+
+	case OPT_LOG_LEVEL_NUM: {
+		if (eal_parse_log_level(optarg) < 0) {
+			RTE_LOG(ERR, EAL,
+				"invalid parameters for --"
+				OPT_LOG_LEVEL "\n");
+			return -1;
+		}
+		break;
+	}
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+	case OPT_TRACE_NUM: {
+		if (eal_trace_args_save(optarg) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameters for --"
+				OPT_TRACE "\n");
+			return -1;
+		}
+		break;
+	}
+
+	case OPT_TRACE_DIR_NUM: {
+		if (eal_trace_dir_args_save(optarg) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameters for --"
+				OPT_TRACE_DIR "\n");
+			return -1;
+		}
+		break;
+	}
+
+	case OPT_TRACE_BUF_SIZE_NUM: {
+		if (eal_trace_bufsz_args_save(optarg) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameters for --"
+				OPT_TRACE_BUF_SIZE "\n");
+			return -1;
+		}
+		break;
+	}
+
+	case OPT_TRACE_MODE_NUM: {
+		if (eal_trace_mode_args_save(optarg) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameters for --"
+				OPT_TRACE_MODE "\n");
+			return -1;
+		}
+		break;
+	}
+#endif /* !RTE_EXEC_ENV_WINDOWS */
+
+	case OPT_LCORES_NUM:
+		if (eal_parse_lcores(optarg) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameter for --"
+				OPT_LCORES "\n");
+			return -1;
+		}
+
+		if (core_parsed) {
+			RTE_LOG(ERR, EAL, "Option --lcore is ignored, because (%s) is set!\n",
+				(core_parsed == LCORE_OPT_LST) ? "-l" :
+				(core_parsed == LCORE_OPT_MSK) ? "-c" :
+				"--lcore");
+			return -1;
+		}
+
+		core_parsed = LCORE_OPT_MAP;
+		break;
+	case OPT_LEGACY_MEM_NUM:
+		conf->legacy_mem = 1;
+		break;
+	case OPT_SINGLE_FILE_SEGMENTS_NUM:
+		conf->single_file_segments = 1;
+		break;
+	case OPT_IOVA_MODE_NUM:
+		if (eal_parse_iova_mode(optarg) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameters for --"
+				OPT_IOVA_MODE "\n");
+			return -1;
+		}
+		break;
+	case OPT_BASE_VIRTADDR_NUM:
+		if (eal_parse_base_virtaddr(optarg) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameter for --"
+					OPT_BASE_VIRTADDR "\n");
+			return -1;
+		}
+		break;
+	case OPT_TELEMETRY_NUM:
+		break;
+	case OPT_NO_TELEMETRY_NUM:
+		conf->no_telemetry = 1;
+		break;
+
+	/* don't know what to do, leave this to caller */
+	default:
+		return 1;
+
+	}
+
+	return 0;
+bw_used:
+	RTE_LOG(ERR, EAL, "Options blacklist (-b) and whitelist (-w) "
+		"cannot be used at the same time\n");
+	return -1;
+}
+
+static void
+eal_auto_detect_cores(struct rte_config *cfg)
+{
+	unsigned int lcore_id;
+	unsigned int removed = 0;
+	rte_cpuset_t affinity_set;
+
+	if (pthread_getaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+				&affinity_set))
+		CPU_ZERO(&affinity_set);
+
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] == ROLE_RTE &&
+		    !CPU_ISSET(lcore_id, &affinity_set)) {
+			cfg->lcore_role[lcore_id] = ROLE_OFF;
+			removed++;
+		}
+	}
+
+	cfg->lcore_count -= removed;
+}
+
+static void
+compute_ctrl_threads_cpuset(struct internal_config *internal_cfg)
+{
+	rte_cpuset_t *cpuset = &internal_cfg->ctrl_cpuset;
+	rte_cpuset_t default_set;
+	unsigned int lcore_id;
+
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (rte_lcore_has_role(lcore_id, ROLE_OFF))
+			continue;
+		RTE_CPU_OR(cpuset, cpuset, &lcore_config[lcore_id].cpuset);
+	}
+	RTE_CPU_NOT(cpuset, cpuset);
+
+	if (pthread_getaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+				&default_set))
+		CPU_ZERO(&default_set);
+
+	RTE_CPU_AND(cpuset, cpuset, &default_set);
+
+	/* if no remaining cpu, use master lcore cpu affinity */
+	if (!CPU_COUNT(cpuset)) {
+		memcpy(cpuset, &lcore_config[rte_get_master_lcore()].cpuset,
+			sizeof(*cpuset));
+	}
+}
+
+int
+eal_cleanup_config(struct internal_config *internal_cfg)
+{
+	if (internal_cfg->hugefile_prefix != NULL)
+		free(internal_cfg->hugefile_prefix);
+	if (internal_cfg->hugepage_dir != NULL)
+		free(internal_cfg->hugepage_dir);
+	if (internal_cfg->user_mbuf_pool_ops_name != NULL)
+		free(internal_cfg->user_mbuf_pool_ops_name);
+
+	return 0;
+}
+
+int
+eal_adjust_config(struct internal_config *internal_cfg)
+{
+	int i;
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	if (!core_parsed)
+		eal_auto_detect_cores(cfg);
+
+	if (internal_config.process_type == RTE_PROC_AUTO)
+		internal_config.process_type = eal_proc_type_detect();
+
+	/* default master lcore is the first one */
+	if (!master_lcore_parsed) {
+		cfg->master_lcore = rte_get_next_lcore(-1, 0, 0);
+		if (cfg->master_lcore >= RTE_MAX_LCORE)
+			return -1;
+		lcore_config[cfg->master_lcore].core_role = ROLE_RTE;
+	}
+
+	compute_ctrl_threads_cpuset(internal_cfg);
+
+	/* if no memory amounts were requested, this will result in 0 and
+	 * will be overridden later, right after eal_hugepage_info_init() */
+	for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+		internal_cfg->memory += internal_cfg->socket_mem[i];
+
+	return 0;
+}
+
+int
+eal_check_common_options(struct internal_config *internal_cfg)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	if (cfg->lcore_role[cfg->master_lcore] != ROLE_RTE) {
+		RTE_LOG(ERR, EAL, "Master lcore is not enabled for DPDK\n");
+		return -1;
+	}
+
+	if (internal_cfg->process_type == RTE_PROC_INVALID) {
+		RTE_LOG(ERR, EAL, "Invalid process type specified\n");
+		return -1;
+	}
+	if (internal_cfg->hugefile_prefix != NULL &&
+			strlen(internal_cfg->hugefile_prefix) < 1) {
+		RTE_LOG(ERR, EAL, "Invalid length of --" OPT_FILE_PREFIX " option\n");
+		return -1;
+	}
+	if (internal_cfg->hugepage_dir != NULL &&
+			strlen(internal_cfg->hugepage_dir) < 1) {
+		RTE_LOG(ERR, EAL, "Invalid length of --" OPT_HUGE_DIR" option\n");
+		return -1;
+	}
+	if (internal_cfg->user_mbuf_pool_ops_name != NULL &&
+			strlen(internal_cfg->user_mbuf_pool_ops_name) < 1) {
+		RTE_LOG(ERR, EAL, "Invalid length of --" OPT_MBUF_POOL_OPS_NAME" option\n");
+		return -1;
+	}
+	if (index(eal_get_hugefile_prefix(), '%') != NULL) {
+		RTE_LOG(ERR, EAL, "Invalid char, '%%', in --"OPT_FILE_PREFIX" "
+			"option\n");
+		return -1;
+	}
+	if (mem_parsed && internal_cfg->force_sockets == 1) {
+		RTE_LOG(ERR, EAL, "Options -m and --"OPT_SOCKET_MEM" cannot "
+			"be specified at the same time\n");
+		return -1;
+	}
+	if (internal_cfg->no_hugetlbfs && internal_cfg->force_sockets == 1) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_MEM" cannot "
+			"be specified together with --"OPT_NO_HUGE"\n");
+		return -1;
+	}
+	if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink &&
+			!internal_cfg->in_memory) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot "
+			"be specified together with --"OPT_NO_HUGE"\n");
+		return -1;
+	}
+	if (internal_config.force_socket_limits && internal_config.legacy_mem) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_LIMIT
+			" is only supported in non-legacy memory mode\n");
+	}
+	if (internal_cfg->single_file_segments &&
+			internal_cfg->hugepage_unlink &&
+			!internal_cfg->in_memory) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_SINGLE_FILE_SEGMENTS" is "
+			"not compatible with --"OPT_HUGE_UNLINK"\n");
+		return -1;
+	}
+	if (internal_cfg->legacy_mem &&
+			internal_cfg->in_memory) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_LEGACY_MEM" is not compatible "
+				"with --"OPT_IN_MEMORY"\n");
+		return -1;
+	}
+	if (internal_cfg->legacy_mem && internal_cfg->match_allocations) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_LEGACY_MEM" is not compatible "
+				"with --"OPT_MATCH_ALLOCATIONS"\n");
+		return -1;
+	}
+	if (internal_cfg->no_hugetlbfs && internal_cfg->match_allocations) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_NO_HUGE" is not compatible "
+				"with --"OPT_MATCH_ALLOCATIONS"\n");
+		return -1;
+	}
+	if (internal_cfg->legacy_mem && internal_cfg->memory == 0) {
+		RTE_LOG(NOTICE, EAL, "Static memory layout is selected, "
+			"amount of reserved memory can be adjusted with "
+			"-m or --"OPT_SOCKET_MEM"\n");
+	}
+
+	return 0;
+}
+
+void
+eal_common_usage(void)
+{
+	printf("[options]\n\n"
+	       "EAL common options:\n"
+	       "  -c COREMASK         Hexadecimal bitmask of cores to run on\n"
+	       "  -l CORELIST         List of cores to run on\n"
+	       "                      The argument format is <c1>[-c2][,c3[-c4],...]\n"
+	       "                      where c1, c2, etc are core indexes between 0 and %d\n"
+	       "  --"OPT_LCORES" COREMAP    Map lcore set to physical cpu set\n"
+	       "                      The argument format is\n"
+	       "                            '<lcores[@cpus]>[<,lcores[@cpus]>...]'\n"
+	       "                      lcores and cpus list are grouped by '(' and ')'\n"
+	       "                      Within the group, '-' is used for range separator,\n"
+	       "                      ',' is used for single number separator.\n"
+	       "                      '( )' can be omitted for single element group,\n"
+	       "                      '@' can be omitted if cpus and lcores have the same value\n"
+	       "  -s SERVICE COREMASK Hexadecimal bitmask of cores to be used as service cores\n"
+	       "  --"OPT_MASTER_LCORE" ID   Core ID that is used as master\n"
+	       "  --"OPT_MBUF_POOL_OPS_NAME" Pool ops name for mbuf to use\n"
+	       "  -n CHANNELS         Number of memory channels\n"
+	       "  -m MB               Memory to allocate (see also --"OPT_SOCKET_MEM")\n"
+	       "  -r RANKS            Force number of memory ranks (don't detect)\n"
+	       "  -b, --"OPT_PCI_BLACKLIST" Add a PCI device in black list.\n"
+	       "                      Prevent EAL from using this PCI device. The argument\n"
+	       "                      format is <domain:bus:devid.func>.\n"
+	       "  -w, --"OPT_PCI_WHITELIST" Add a PCI device in white list.\n"
+	       "                      Only use the specified PCI devices. The argument format\n"
+	       "                      is <[domain:]bus:devid.func>. This option can be present\n"
+	       "                      several times (once per device).\n"
+	       "                      [NOTE: PCI whitelist cannot be used with -b option]\n"
+	       "  --"OPT_VDEV"              Add a virtual device.\n"
+	       "                      The argument format is <driver><id>[,key=val,...]\n"
+	       "                      (ex: --vdev=net_pcap0,iface=eth2).\n"
+	       "  --"OPT_IOVA_MODE"   Set IOVA mode. 'pa' for IOVA_PA\n"
+	       "                      'va' for IOVA_VA\n"
+	       "  -d LIB.so|DIR       Add a driver or driver directory\n"
+	       "                      (can be used multiple times)\n"
+	       "  --"OPT_VMWARE_TSC_MAP"    Use VMware TSC map instead of native RDTSC\n"
+	       "  --"OPT_PROC_TYPE"         Type of this process (primary|secondary|auto)\n"
+#ifndef RTE_EXEC_ENV_WINDOWS
+	       "  --"OPT_SYSLOG"            Set syslog facility\n"
+#endif
+	       "  --"OPT_LOG_LEVEL"=<int>   Set global log level\n"
+	       "  --"OPT_LOG_LEVEL"=<type-match>:<int>\n"
+	       "                      Set specific log level\n"
+#ifndef RTE_EXEC_ENV_WINDOWS
+	       "  --"OPT_TRACE"=<regex-match>\n"
+	       "                      Enable trace based on regular expression trace name.\n"
+	       "                      By default, the trace is disabled.\n"
+	       "		      User must specify this option to enable trace.\n"
+	       "  --"OPT_TRACE_DIR"=<directory path>\n"
+	       "                      Specify trace directory for trace output.\n"
+	       "                      By default, trace output will created at\n"
+	       "                      $HOME directory and parameter must be\n"
+	       "                      specified once only.\n"
+	       "  --"OPT_TRACE_BUF_SIZE"=<int>\n"
+	       "                      Specify maximum size of allocated memory\n"
+	       "                      for trace output for each thread. Valid\n"
+	       "                      unit can be either 'B|K|M' for 'Bytes',\n"
+	       "                      'KBytes' and 'MBytes' respectively.\n"
+	       "                      Default is 1MB and parameter must be\n"
+	       "                      specified once only.\n"
+	       "  --"OPT_TRACE_MODE"=<o[verwrite] | d[iscard]>\n"
+	       "                      Specify the mode of update of trace\n"
+	       "                      output file. Either update on a file can\n"
+	       "                      be wrapped or discarded when file size\n"
+	       "                      reaches its maximum limit.\n"
+	       "                      Default mode is 'overwrite' and parameter\n"
+	       "                      must be specified once only.\n"
+#endif /* !RTE_EXEC_ENV_WINDOWS */
+	       "  -v                  Display version information on startup\n"
+	       "  -h, --help          This help\n"
+	       "  --"OPT_IN_MEMORY"   Operate entirely in memory. This will\n"
+	       "                      disable secondary process support\n"
+	       "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
+	       "  --"OPT_TELEMETRY"   Enable telemetry support (on by default)\n"
+	       "  --"OPT_NO_TELEMETRY"   Disable telemetry support\n"
+	       "\nEAL options for DEBUG use only:\n"
+	       "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
+	       "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
+	       "  --"OPT_NO_PCI"            Disable PCI\n"
+	       "  --"OPT_NO_HPET"           Disable HPET\n"
+	       "  --"OPT_NO_SHCONF"         No shared config (mmap'd files)\n"
+	       "\n", RTE_MAX_LCORE);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_proc.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_proc.c
new file mode 100644
index 000000000..935e8fefe
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_proc.c
@@ -0,0 +1,1217 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016-2018 Intel Corporation
+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <inttypes.h>
+#include <libgen.h>
+#include <limits.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include <rte_alarm.h>
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_eal.h>
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_tailq.h>
+
+#include "eal_private.h"
+#include "eal_filesystem.h"
+#include "eal_internal_cfg.h"
+
+static int mp_fd = -1;
+static char mp_filter[PATH_MAX];   /* Filter for secondary process sockets */
+static char mp_dir_path[PATH_MAX]; /* The directory path for all mp sockets */
+static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
+static char peer_name[PATH_MAX];
+
+struct action_entry {
+	TAILQ_ENTRY(action_entry) next;
+	char action_name[RTE_MP_MAX_NAME_LEN];
+	rte_mp_t action;
+};
+
+/** Double linked list of actions. */
+TAILQ_HEAD(action_entry_list, action_entry);
+
+static struct action_entry_list action_entry_list =
+	TAILQ_HEAD_INITIALIZER(action_entry_list);
+
+enum mp_type {
+	MP_MSG, /* Share message with peers, will not block */
+	MP_REQ, /* Request for information, Will block for a reply */
+	MP_REP, /* Response to previously-received request */
+	MP_IGN, /* Response telling requester to ignore this response */
+};
+
+struct mp_msg_internal {
+	int type;
+	struct rte_mp_msg msg;
+};
+
+struct async_request_param {
+	rte_mp_async_reply_t clb;
+	struct rte_mp_reply user_reply;
+	struct timespec end;
+	int n_responses_processed;
+};
+
+struct pending_request {
+	TAILQ_ENTRY(pending_request) next;
+	enum {
+		REQUEST_TYPE_SYNC,
+		REQUEST_TYPE_ASYNC
+	} type;
+	char dst[PATH_MAX];
+	struct rte_mp_msg *request;
+	struct rte_mp_msg *reply;
+	int reply_received;
+	RTE_STD_C11
+	union {
+		struct {
+			struct async_request_param *param;
+		} async;
+		struct {
+			pthread_cond_t cond;
+		} sync;
+	};
+};
+
+TAILQ_HEAD(pending_request_list, pending_request);
+
+static struct {
+	struct pending_request_list requests;
+	pthread_mutex_t lock;
+} pending_requests = {
+	.requests = TAILQ_HEAD_INITIALIZER(pending_requests.requests),
+	.lock = PTHREAD_MUTEX_INITIALIZER,
+	/**< used in async requests only */
+};
+
+/* forward declarations */
+static int
+mp_send(struct rte_mp_msg *msg, const char *peer, int type);
+
+/* for use with alarm callback */
+static void
+async_reply_handle(void *arg);
+
+/* for use with process_msg */
+static struct pending_request *
+async_reply_handle_thread_unsafe(void *arg);
+
+static void
+trigger_async_action(struct pending_request *req);
+
+static struct pending_request *
+find_pending_request(const char *dst, const char *act_name)
+{
+	struct pending_request *r;
+
+	TAILQ_FOREACH(r, &pending_requests.requests, next) {
+		if (!strcmp(r->dst, dst) &&
+		    !strcmp(r->request->name, act_name))
+			break;
+	}
+
+	return r;
+}
+
+static void
+create_socket_path(const char *name, char *buf, int len)
+{
+	const char *prefix = eal_mp_socket_path();
+
+	if (strlen(name) > 0)
+		snprintf(buf, len, "%s_%s", prefix, name);
+	else
+		strlcpy(buf, prefix, len);
+}
+
+int
+rte_eal_primary_proc_alive(const char *config_file_path)
+{
+	int config_fd;
+
+	if (config_file_path)
+		config_fd = open(config_file_path, O_RDONLY);
+	else {
+		const char *path;
+
+		path = eal_runtime_config_path();
+		config_fd = open(path, O_RDONLY);
+	}
+	if (config_fd < 0)
+		return 0;
+
+	int ret = lockf(config_fd, F_TEST, 0);
+	close(config_fd);
+
+	return !!ret;
+}
+
+static struct action_entry *
+find_action_entry_by_name(const char *name)
+{
+	struct action_entry *entry;
+
+	TAILQ_FOREACH(entry, &action_entry_list, next) {
+		if (strncmp(entry->action_name, name, RTE_MP_MAX_NAME_LEN) == 0)
+			break;
+	}
+
+	return entry;
+}
+
+static int
+validate_action_name(const char *name)
+{
+	if (name == NULL) {
+		RTE_LOG(ERR, EAL, "Action name cannot be NULL\n");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	if (strnlen(name, RTE_MP_MAX_NAME_LEN) == 0) {
+		RTE_LOG(ERR, EAL, "Length of action name is zero\n");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	if (strnlen(name, RTE_MP_MAX_NAME_LEN) == RTE_MP_MAX_NAME_LEN) {
+		rte_errno = E2BIG;
+		return -1;
+	}
+	return 0;
+}
+
+int
+rte_mp_action_register(const char *name, rte_mp_t action)
+{
+	struct action_entry *entry;
+
+	if (validate_action_name(name) != 0)
+		return -1;
+
+	if (internal_config.no_shconf) {
+		RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+
+	entry = malloc(sizeof(struct action_entry));
+	if (entry == NULL) {
+		rte_errno = ENOMEM;
+		return -1;
+	}
+	strlcpy(entry->action_name, name, sizeof(entry->action_name));
+	entry->action = action;
+
+	pthread_mutex_lock(&mp_mutex_action);
+	if (find_action_entry_by_name(name) != NULL) {
+		pthread_mutex_unlock(&mp_mutex_action);
+		rte_errno = EEXIST;
+		free(entry);
+		return -1;
+	}
+	TAILQ_INSERT_TAIL(&action_entry_list, entry, next);
+	pthread_mutex_unlock(&mp_mutex_action);
+	return 0;
+}
+
+void
+rte_mp_action_unregister(const char *name)
+{
+	struct action_entry *entry;
+
+	if (validate_action_name(name) != 0)
+		return;
+
+	if (internal_config.no_shconf) {
+		RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+		return;
+	}
+
+	pthread_mutex_lock(&mp_mutex_action);
+	entry = find_action_entry_by_name(name);
+	if (entry == NULL) {
+		pthread_mutex_unlock(&mp_mutex_action);
+		return;
+	}
+	TAILQ_REMOVE(&action_entry_list, entry, next);
+	pthread_mutex_unlock(&mp_mutex_action);
+	free(entry);
+}
+
+static int
+read_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
+{
+	int msglen;
+	struct iovec iov;
+	struct msghdr msgh;
+	char control[CMSG_SPACE(sizeof(m->msg.fds))];
+	struct cmsghdr *cmsg;
+	int buflen = sizeof(*m) - sizeof(m->msg.fds);
+
+	memset(&msgh, 0, sizeof(msgh));
+	iov.iov_base = m;
+	iov.iov_len  = buflen;
+
+	msgh.msg_name = s;
+	msgh.msg_namelen = sizeof(*s);
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+	msgh.msg_control = control;
+	msgh.msg_controllen = sizeof(control);
+
+	msglen = recvmsg(mp_fd, &msgh, 0);
+	if (msglen < 0) {
+		RTE_LOG(ERR, EAL, "recvmsg failed, %s\n", strerror(errno));
+		return -1;
+	}
+
+	if (msglen != buflen || (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
+		RTE_LOG(ERR, EAL, "truncated msg\n");
+		return -1;
+	}
+
+	/* read auxiliary FDs if any */
+	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
+		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
+		if ((cmsg->cmsg_level == SOL_SOCKET) &&
+			(cmsg->cmsg_type == SCM_RIGHTS)) {
+			memcpy(m->msg.fds, CMSG_DATA(cmsg), sizeof(m->msg.fds));
+			break;
+		}
+	}
+	/* sanity-check the response */
+	if (m->msg.num_fds < 0 || m->msg.num_fds > RTE_MP_MAX_FD_NUM) {
+		RTE_LOG(ERR, EAL, "invalid number of fd's received\n");
+		return -1;
+	}
+	if (m->msg.len_param < 0 || m->msg.len_param > RTE_MP_MAX_PARAM_LEN) {
+		RTE_LOG(ERR, EAL, "invalid received data length\n");
+		return -1;
+	}
+	return 0;
+}
+
+static void
+process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
+{
+	struct pending_request *pending_req;
+	struct action_entry *entry;
+	struct rte_mp_msg *msg = &m->msg;
+	rte_mp_t action = NULL;
+
+	RTE_LOG(DEBUG, EAL, "msg: %s\n", msg->name);
+
+	if (m->type == MP_REP || m->type == MP_IGN) {
+		struct pending_request *req = NULL;
+
+		pthread_mutex_lock(&pending_requests.lock);
+		pending_req = find_pending_request(s->sun_path, msg->name);
+		if (pending_req) {
+			memcpy(pending_req->reply, msg, sizeof(*msg));
+			/* -1 indicates that we've been asked to ignore */
+			pending_req->reply_received =
+				m->type == MP_REP ? 1 : -1;
+
+			if (pending_req->type == REQUEST_TYPE_SYNC)
+				pthread_cond_signal(&pending_req->sync.cond);
+			else if (pending_req->type == REQUEST_TYPE_ASYNC)
+				req = async_reply_handle_thread_unsafe(
+						pending_req);
+		} else
+			RTE_LOG(ERR, EAL, "Drop mp reply: %s\n", msg->name);
+		pthread_mutex_unlock(&pending_requests.lock);
+
+		if (req != NULL)
+			trigger_async_action(req);
+		return;
+	}
+
+	pthread_mutex_lock(&mp_mutex_action);
+	entry = find_action_entry_by_name(msg->name);
+	if (entry != NULL)
+		action = entry->action;
+	pthread_mutex_unlock(&mp_mutex_action);
+
+	if (!action) {
+		if (m->type == MP_REQ && !internal_config.init_complete) {
+			/* if this is a request, and init is not yet complete,
+			 * and callback wasn't registered, we should tell the
+			 * requester to ignore our existence because we're not
+			 * yet ready to process this request.
+			 */
+			struct rte_mp_msg dummy;
+
+			memset(&dummy, 0, sizeof(dummy));
+			strlcpy(dummy.name, msg->name, sizeof(dummy.name));
+			mp_send(&dummy, s->sun_path, MP_IGN);
+		} else {
+			RTE_LOG(ERR, EAL, "Cannot find action: %s\n",
+				msg->name);
+		}
+	} else if (action(msg, s->sun_path) < 0) {
+		RTE_LOG(ERR, EAL, "Fail to handle message: %s\n", msg->name);
+	}
+}
+
+static void *
+mp_handle(void *arg __rte_unused)
+{
+	struct mp_msg_internal msg;
+	struct sockaddr_un sa;
+
+	while (1) {
+		if (read_msg(&msg, &sa) == 0)
+			process_msg(&msg, &sa);
+	}
+
+	return NULL;
+}
+
+static int
+timespec_cmp(const struct timespec *a, const struct timespec *b)
+{
+	if (a->tv_sec < b->tv_sec)
+		return -1;
+	if (a->tv_sec > b->tv_sec)
+		return 1;
+	if (a->tv_nsec < b->tv_nsec)
+		return -1;
+	if (a->tv_nsec > b->tv_nsec)
+		return 1;
+	return 0;
+}
+
+enum async_action {
+	ACTION_FREE, /**< free the action entry, but don't trigger callback */
+	ACTION_TRIGGER /**< trigger callback, then free action entry */
+};
+
+static enum async_action
+process_async_request(struct pending_request *sr, const struct timespec *now)
+{
+	struct async_request_param *param;
+	struct rte_mp_reply *reply;
+	bool timeout, last_msg;
+
+	param = sr->async.param;
+	reply = &param->user_reply;
+
+	/* did we timeout? */
+	timeout = timespec_cmp(&param->end, now) <= 0;
+
+	/* if we received a response, adjust relevant data and copy mesasge. */
+	if (sr->reply_received == 1 && sr->reply) {
+		struct rte_mp_msg *msg, *user_msgs, *tmp;
+
+		msg = sr->reply;
+		user_msgs = reply->msgs;
+
+		tmp = realloc(user_msgs, sizeof(*msg) *
+				(reply->nb_received + 1));
+		if (!tmp) {
+			RTE_LOG(ERR, EAL, "Fail to alloc reply for request %s:%s\n",
+				sr->dst, sr->request->name);
+			/* this entry is going to be removed and its message
+			 * dropped, but we don't want to leak memory, so
+			 * continue.
+			 */
+		} else {
+			user_msgs = tmp;
+			reply->msgs = user_msgs;
+			memcpy(&user_msgs[reply->nb_received],
+					msg, sizeof(*msg));
+			reply->nb_received++;
+		}
+
+		/* mark this request as processed */
+		param->n_responses_processed++;
+	} else if (sr->reply_received == -1) {
+		/* we were asked to ignore this process */
+		reply->nb_sent--;
+	} else if (timeout) {
+		/* count it as processed response, but don't increment
+		 * nb_received.
+		 */
+		param->n_responses_processed++;
+	}
+
+	free(sr->reply);
+
+	last_msg = param->n_responses_processed == reply->nb_sent;
+
+	return last_msg ? ACTION_TRIGGER : ACTION_FREE;
+}
+
+static void
+trigger_async_action(struct pending_request *sr)
+{
+	struct async_request_param *param;
+	struct rte_mp_reply *reply;
+
+	param = sr->async.param;
+	reply = &param->user_reply;
+
+	param->clb(sr->request, reply);
+
+	/* clean up */
+	free(sr->async.param->user_reply.msgs);
+	free(sr->async.param);
+	free(sr->request);
+	free(sr);
+}
+
+static struct pending_request *
+async_reply_handle_thread_unsafe(void *arg)
+{
+	struct pending_request *req = (struct pending_request *)arg;
+	enum async_action action;
+	struct timespec ts_now;
+	struct timeval now;
+
+	if (gettimeofday(&now, NULL) < 0) {
+		RTE_LOG(ERR, EAL, "Cannot get current time\n");
+		goto no_trigger;
+	}
+	ts_now.tv_nsec = now.tv_usec * 1000;
+	ts_now.tv_sec = now.tv_sec;
+
+	action = process_async_request(req, &ts_now);
+
+	TAILQ_REMOVE(&pending_requests.requests, req, next);
+
+	if (rte_eal_alarm_cancel(async_reply_handle, req) < 0) {
+		/* if we failed to cancel the alarm because it's already in
+		 * progress, don't proceed because otherwise we will end up
+		 * handling the same message twice.
+		 */
+		if (rte_errno == EINPROGRESS) {
+			RTE_LOG(DEBUG, EAL, "Request handling is already in progress\n");
+			goto no_trigger;
+		}
+		RTE_LOG(ERR, EAL, "Failed to cancel alarm\n");
+	}
+
+	if (action == ACTION_TRIGGER)
+		return req;
+no_trigger:
+	free(req);
+	return NULL;
+}
+
+static void
+async_reply_handle(void *arg)
+{
+	struct pending_request *req;
+
+	pthread_mutex_lock(&pending_requests.lock);
+	req = async_reply_handle_thread_unsafe(arg);
+	pthread_mutex_unlock(&pending_requests.lock);
+
+	if (req != NULL)
+		trigger_async_action(req);
+}
+
+static int
+open_socket_fd(void)
+{
+	struct sockaddr_un un;
+
+	peer_name[0] = '\0';
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+		snprintf(peer_name, sizeof(peer_name),
+				"%d_%"PRIx64, getpid(), rte_rdtsc());
+
+	mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+	if (mp_fd < 0) {
+		RTE_LOG(ERR, EAL, "failed to create unix socket\n");
+		return -1;
+	}
+
+	memset(&un, 0, sizeof(un));
+	un.sun_family = AF_UNIX;
+
+	create_socket_path(peer_name, un.sun_path, sizeof(un.sun_path));
+
+	unlink(un.sun_path); /* May still exist since last run */
+
+	if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
+		RTE_LOG(ERR, EAL, "failed to bind %s: %s\n",
+			un.sun_path, strerror(errno));
+		close(mp_fd);
+		return -1;
+	}
+
+	RTE_LOG(INFO, EAL, "Multi-process socket %s\n", un.sun_path);
+	return mp_fd;
+}
+
+static void
+close_socket_fd(void)
+{
+	char path[PATH_MAX];
+
+	if (mp_fd < 0)
+		return;
+
+	close(mp_fd);
+	create_socket_path(peer_name, path, sizeof(path));
+	unlink(path);
+}
+
+int
+rte_mp_channel_init(void)
+{
+	char path[PATH_MAX];
+	int dir_fd;
+	pthread_t mp_handle_tid;
+
+	/* in no shared files mode, we do not have secondary processes support,
+	 * so no need to initialize IPC.
+	 */
+	if (internal_config.no_shconf) {
+		RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC will be disabled\n");
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+
+	/* create filter path */
+	create_socket_path("*", path, sizeof(path));
+	strlcpy(mp_filter, basename(path), sizeof(mp_filter));
+
+	/* path may have been modified, so recreate it */
+	create_socket_path("*", path, sizeof(path));
+	strlcpy(mp_dir_path, dirname(path), sizeof(mp_dir_path));
+
+	/* lock the directory */
+	dir_fd = open(mp_dir_path, O_RDONLY);
+	if (dir_fd < 0) {
+		RTE_LOG(ERR, EAL, "failed to open %s: %s\n",
+			mp_dir_path, strerror(errno));
+		return -1;
+	}
+
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "failed to lock %s: %s\n",
+			mp_dir_path, strerror(errno));
+		close(dir_fd);
+		return -1;
+	}
+
+	if (open_socket_fd() < 0) {
+		close(dir_fd);
+		return -1;
+	}
+
+	if (rte_ctrl_thread_create(&mp_handle_tid, "rte_mp_handle",
+			NULL, mp_handle, NULL) < 0) {
+		RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
+			strerror(errno));
+		close(mp_fd);
+		close(dir_fd);
+		mp_fd = -1;
+		return -1;
+	}
+
+	/* unlock the directory */
+	flock(dir_fd, LOCK_UN);
+	close(dir_fd);
+
+	return 0;
+}
+
+void
+rte_mp_channel_cleanup(void)
+{
+	close_socket_fd();
+}
+
+/**
+ * Return -1, as fail to send message and it's caused by the local side.
+ * Return 0, as fail to send message and it's caused by the remote side.
+ * Return 1, as succeed to send message.
+ *
+ */
+static int
+send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
+{
+	int snd;
+	struct iovec iov;
+	struct msghdr msgh;
+	struct cmsghdr *cmsg;
+	struct sockaddr_un dst;
+	struct mp_msg_internal m;
+	int fd_size = msg->num_fds * sizeof(int);
+	char control[CMSG_SPACE(fd_size)];
+
+	m.type = type;
+	memcpy(&m.msg, msg, sizeof(*msg));
+
+	memset(&dst, 0, sizeof(dst));
+	dst.sun_family = AF_UNIX;
+	strlcpy(dst.sun_path, dst_path, sizeof(dst.sun_path));
+
+	memset(&msgh, 0, sizeof(msgh));
+	memset(control, 0, sizeof(control));
+
+	iov.iov_base = &m;
+	iov.iov_len = sizeof(m) - sizeof(msg->fds);
+
+	msgh.msg_name = &dst;
+	msgh.msg_namelen = sizeof(dst);
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+	msgh.msg_control = control;
+	msgh.msg_controllen = sizeof(control);
+
+	cmsg = CMSG_FIRSTHDR(&msgh);
+	cmsg->cmsg_len = CMSG_LEN(fd_size);
+	cmsg->cmsg_level = SOL_SOCKET;
+	cmsg->cmsg_type = SCM_RIGHTS;
+	memcpy(CMSG_DATA(cmsg), msg->fds, fd_size);
+
+	do {
+		snd = sendmsg(mp_fd, &msgh, 0);
+	} while (snd < 0 && errno == EINTR);
+
+	if (snd < 0) {
+		rte_errno = errno;
+		/* Check if it caused by peer process exits */
+		if (errno == ECONNREFUSED &&
+				rte_eal_process_type() == RTE_PROC_PRIMARY) {
+			unlink(dst_path);
+			return 0;
+		}
+		RTE_LOG(ERR, EAL, "failed to send to (%s) due to %s\n",
+			dst_path, strerror(errno));
+		return -1;
+	}
+
+	return 1;
+}
+
+static int
+mp_send(struct rte_mp_msg *msg, const char *peer, int type)
+{
+	int dir_fd, ret = 0;
+	DIR *mp_dir;
+	struct dirent *ent;
+
+	if (!peer && (rte_eal_process_type() == RTE_PROC_SECONDARY))
+		peer = eal_mp_socket_path();
+
+	if (peer) {
+		if (send_msg(peer, msg, type) < 0)
+			return -1;
+		else
+			return 0;
+	}
+
+	/* broadcast to all secondary processes */
+	mp_dir = opendir(mp_dir_path);
+	if (!mp_dir) {
+		RTE_LOG(ERR, EAL, "Unable to open directory %s\n",
+				mp_dir_path);
+		rte_errno = errno;
+		return -1;
+	}
+
+	dir_fd = dirfd(mp_dir);
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_SH)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		rte_errno = errno;
+		closedir(mp_dir);
+		return -1;
+	}
+
+	while ((ent = readdir(mp_dir))) {
+		char path[PATH_MAX];
+
+		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
+			continue;
+
+		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+			 ent->d_name);
+		if (send_msg(path, msg, type) < 0)
+			ret = -1;
+	}
+	/* unlock the dir */
+	flock(dir_fd, LOCK_UN);
+
+	/* dir_fd automatically closed on closedir */
+	closedir(mp_dir);
+	return ret;
+}
+
+static int
+check_input(const struct rte_mp_msg *msg)
+{
+	if (msg == NULL) {
+		RTE_LOG(ERR, EAL, "Msg cannot be NULL\n");
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	if (validate_action_name(msg->name) != 0)
+		return -1;
+
+	if (msg->len_param < 0) {
+		RTE_LOG(ERR, EAL, "Message data length is negative\n");
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	if (msg->num_fds < 0) {
+		RTE_LOG(ERR, EAL, "Number of fd's is negative\n");
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	if (msg->len_param > RTE_MP_MAX_PARAM_LEN) {
+		RTE_LOG(ERR, EAL, "Message data is too long\n");
+		rte_errno = E2BIG;
+		return -1;
+	}
+
+	if (msg->num_fds > RTE_MP_MAX_FD_NUM) {
+		RTE_LOG(ERR, EAL, "Cannot send more than %d FDs\n",
+			RTE_MP_MAX_FD_NUM);
+		rte_errno = E2BIG;
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+rte_mp_sendmsg(struct rte_mp_msg *msg)
+{
+	if (check_input(msg) != 0)
+		return -1;
+
+	if (internal_config.no_shconf) {
+		RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+
+	RTE_LOG(DEBUG, EAL, "sendmsg: %s\n", msg->name);
+	return mp_send(msg, NULL, MP_MSG);
+}
+
+static int
+mp_request_async(const char *dst, struct rte_mp_msg *req,
+		struct async_request_param *param, const struct timespec *ts)
+{
+	struct rte_mp_msg *reply_msg;
+	struct pending_request *pending_req, *exist;
+	int ret = -1;
+
+	pending_req = calloc(1, sizeof(*pending_req));
+	reply_msg = calloc(1, sizeof(*reply_msg));
+	if (pending_req == NULL || reply_msg == NULL) {
+		RTE_LOG(ERR, EAL, "Could not allocate space for sync request\n");
+		rte_errno = ENOMEM;
+		ret = -1;
+		goto fail;
+	}
+
+	pending_req->type = REQUEST_TYPE_ASYNC;
+	strlcpy(pending_req->dst, dst, sizeof(pending_req->dst));
+	pending_req->request = req;
+	pending_req->reply = reply_msg;
+	pending_req->async.param = param;
+
+	/* queue already locked by caller */
+
+	exist = find_pending_request(dst, req->name);
+	if (exist) {
+		RTE_LOG(ERR, EAL, "A pending request %s:%s\n", dst, req->name);
+		rte_errno = EEXIST;
+		ret = -1;
+		goto fail;
+	}
+
+	ret = send_msg(dst, req, MP_REQ);
+	if (ret < 0) {
+		RTE_LOG(ERR, EAL, "Fail to send request %s:%s\n",
+			dst, req->name);
+		ret = -1;
+		goto fail;
+	} else if (ret == 0) {
+		ret = 0;
+		goto fail;
+	}
+	param->user_reply.nb_sent++;
+
+	/* if alarm set fails, we simply ignore the reply */
+	if (rte_eal_alarm_set(ts->tv_sec * 1000000 + ts->tv_nsec / 1000,
+			      async_reply_handle, pending_req) < 0) {
+		RTE_LOG(ERR, EAL, "Fail to set alarm for request %s:%s\n",
+			dst, req->name);
+		ret = -1;
+		goto fail;
+	}
+	TAILQ_INSERT_TAIL(&pending_requests.requests, pending_req, next);
+
+	return 0;
+fail:
+	free(pending_req);
+	free(reply_msg);
+	return ret;
+}
+
+static int
+mp_request_sync(const char *dst, struct rte_mp_msg *req,
+	       struct rte_mp_reply *reply, const struct timespec *ts)
+{
+	int ret;
+	struct rte_mp_msg msg, *tmp;
+	struct pending_request pending_req, *exist;
+
+	pending_req.type = REQUEST_TYPE_SYNC;
+	pending_req.reply_received = 0;
+	strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
+	pending_req.request = req;
+	pending_req.reply = &msg;
+	pthread_cond_init(&pending_req.sync.cond, NULL);
+
+	exist = find_pending_request(dst, req->name);
+	if (exist) {
+		RTE_LOG(ERR, EAL, "A pending request %s:%s\n", dst, req->name);
+		rte_errno = EEXIST;
+		return -1;
+	}
+
+	ret = send_msg(dst, req, MP_REQ);
+	if (ret < 0) {
+		RTE_LOG(ERR, EAL, "Fail to send request %s:%s\n",
+			dst, req->name);
+		return -1;
+	} else if (ret == 0)
+		return 0;
+
+	TAILQ_INSERT_TAIL(&pending_requests.requests, &pending_req, next);
+
+	reply->nb_sent++;
+
+	do {
+		ret = pthread_cond_timedwait(&pending_req.sync.cond,
+				&pending_requests.lock, ts);
+	} while (ret != 0 && ret != ETIMEDOUT);
+
+	TAILQ_REMOVE(&pending_requests.requests, &pending_req, next);
+
+	if (pending_req.reply_received == 0) {
+		RTE_LOG(ERR, EAL, "Fail to recv reply for request %s:%s\n",
+			dst, req->name);
+		rte_errno = ETIMEDOUT;
+		return -1;
+	}
+	if (pending_req.reply_received == -1) {
+		RTE_LOG(DEBUG, EAL, "Asked to ignore response\n");
+		/* not receiving this message is not an error, so decrement
+		 * number of sent messages
+		 */
+		reply->nb_sent--;
+		return 0;
+	}
+
+	tmp = realloc(reply->msgs, sizeof(msg) * (reply->nb_received + 1));
+	if (!tmp) {
+		RTE_LOG(ERR, EAL, "Fail to alloc reply for request %s:%s\n",
+			dst, req->name);
+		rte_errno = ENOMEM;
+		return -1;
+	}
+	memcpy(&tmp[reply->nb_received], &msg, sizeof(msg));
+	reply->msgs = tmp;
+	reply->nb_received++;
+	return 0;
+}
+
+int
+rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
+		const struct timespec *ts)
+{
+	int dir_fd, ret = -1;
+	DIR *mp_dir;
+	struct dirent *ent;
+	struct timeval now;
+	struct timespec end;
+
+	RTE_LOG(DEBUG, EAL, "request: %s\n", req->name);
+
+	reply->nb_sent = 0;
+	reply->nb_received = 0;
+	reply->msgs = NULL;
+
+	if (check_input(req) != 0)
+		goto end;
+
+	if (internal_config.no_shconf) {
+		RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+
+	if (gettimeofday(&now, NULL) < 0) {
+		RTE_LOG(ERR, EAL, "Failed to get current time\n");
+		rte_errno = errno;
+		goto end;
+	}
+
+	end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
+	end.tv_sec = now.tv_sec + ts->tv_sec +
+			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
+
+	/* for secondary process, send request to the primary process only */
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		pthread_mutex_lock(&pending_requests.lock);
+		ret = mp_request_sync(eal_mp_socket_path(), req, reply, &end);
+		pthread_mutex_unlock(&pending_requests.lock);
+		goto end;
+	}
+
+	/* for primary process, broadcast request, and collect reply 1 by 1 */
+	mp_dir = opendir(mp_dir_path);
+	if (!mp_dir) {
+		RTE_LOG(ERR, EAL, "Unable to open directory %s\n", mp_dir_path);
+		rte_errno = errno;
+		goto end;
+	}
+
+	dir_fd = dirfd(mp_dir);
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_SH)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		rte_errno = errno;
+		goto close_end;
+	}
+
+	pthread_mutex_lock(&pending_requests.lock);
+	while ((ent = readdir(mp_dir))) {
+		char path[PATH_MAX];
+
+		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
+			continue;
+
+		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+			 ent->d_name);
+
+		/* unlocks the mutex while waiting for response,
+		 * locks on receive
+		 */
+		if (mp_request_sync(path, req, reply, &end))
+			goto unlock_end;
+	}
+	ret = 0;
+
+unlock_end:
+	pthread_mutex_unlock(&pending_requests.lock);
+	/* unlock the directory */
+	flock(dir_fd, LOCK_UN);
+
+close_end:
+	/* dir_fd automatically closed on closedir */
+	closedir(mp_dir);
+
+end:
+	if (ret) {
+		free(reply->msgs);
+		reply->nb_received = 0;
+		reply->msgs = NULL;
+	}
+	return ret;
+}
+
+int
+rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
+		rte_mp_async_reply_t clb)
+{
+	struct rte_mp_msg *copy;
+	struct pending_request *dummy;
+	struct async_request_param *param;
+	struct rte_mp_reply *reply;
+	int dir_fd, ret = 0;
+	DIR *mp_dir;
+	struct dirent *ent;
+	struct timeval now;
+	struct timespec *end;
+	bool dummy_used = false;
+
+	RTE_LOG(DEBUG, EAL, "request: %s\n", req->name);
+
+	if (check_input(req) != 0)
+		return -1;
+
+	if (internal_config.no_shconf) {
+		RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+
+	if (gettimeofday(&now, NULL) < 0) {
+		RTE_LOG(ERR, EAL, "Failed to get current time\n");
+		rte_errno = errno;
+		return -1;
+	}
+	copy = calloc(1, sizeof(*copy));
+	dummy = calloc(1, sizeof(*dummy));
+	param = calloc(1, sizeof(*param));
+	if (copy == NULL || dummy == NULL || param == NULL) {
+		RTE_LOG(ERR, EAL, "Failed to allocate memory for async reply\n");
+		rte_errno = ENOMEM;
+		goto fail;
+	}
+
+	/* copy message */
+	memcpy(copy, req, sizeof(*copy));
+
+	param->n_responses_processed = 0;
+	param->clb = clb;
+	end = &param->end;
+	reply = &param->user_reply;
+
+	end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
+	end->tv_sec = now.tv_sec + ts->tv_sec +
+			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
+	reply->nb_sent = 0;
+	reply->nb_received = 0;
+	reply->msgs = NULL;
+
+	/* we have to lock the request queue here, as we will be adding a bunch
+	 * of requests to the queue at once, and some of the replies may arrive
+	 * before we add all of the requests to the queue.
+	 */
+	pthread_mutex_lock(&pending_requests.lock);
+
+	/* we have to ensure that callback gets triggered even if we don't send
+	 * anything, therefore earlier we have allocated a dummy request. fill
+	 * it, and put it on the queue if we don't send any requests.
+	 */
+	dummy->type = REQUEST_TYPE_ASYNC;
+	dummy->request = copy;
+	dummy->reply = NULL;
+	dummy->async.param = param;
+	dummy->reply_received = 1; /* short-circuit the timeout */
+
+	/* for secondary process, send request to the primary process only */
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		ret = mp_request_async(eal_mp_socket_path(), copy, param, ts);
+
+		/* if we didn't send anything, put dummy request on the queue */
+		if (ret == 0 && reply->nb_sent == 0) {
+			TAILQ_INSERT_TAIL(&pending_requests.requests, dummy,
+					next);
+			dummy_used = true;
+		}
+
+		pthread_mutex_unlock(&pending_requests.lock);
+
+		/* if we couldn't send anything, clean up */
+		if (ret != 0)
+			goto fail;
+		return 0;
+	}
+
+	/* for primary process, broadcast request */
+	mp_dir = opendir(mp_dir_path);
+	if (!mp_dir) {
+		RTE_LOG(ERR, EAL, "Unable to open directory %s\n", mp_dir_path);
+		rte_errno = errno;
+		goto unlock_fail;
+	}
+	dir_fd = dirfd(mp_dir);
+
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_SH)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		rte_errno = errno;
+		goto closedir_fail;
+	}
+
+	while ((ent = readdir(mp_dir))) {
+		char path[PATH_MAX];
+
+		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
+			continue;
+
+		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+			 ent->d_name);
+
+		if (mp_request_async(path, copy, param, ts))
+			ret = -1;
+	}
+	/* if we didn't send anything, put dummy request on the queue */
+	if (ret == 0 && reply->nb_sent == 0) {
+		TAILQ_INSERT_HEAD(&pending_requests.requests, dummy, next);
+		dummy_used = true;
+	}
+
+	/* finally, unlock the queue */
+	pthread_mutex_unlock(&pending_requests.lock);
+
+	/* unlock the directory */
+	flock(dir_fd, LOCK_UN);
+
+	/* dir_fd automatically closed on closedir */
+	closedir(mp_dir);
+
+	/* if dummy was unused, free it */
+	if (!dummy_used)
+		free(dummy);
+
+	return ret;
+closedir_fail:
+	closedir(mp_dir);
+unlock_fail:
+	pthread_mutex_unlock(&pending_requests.lock);
+fail:
+	free(dummy);
+	free(param);
+	free(copy);
+	return -1;
+}
+
+int
+rte_mp_reply(struct rte_mp_msg *msg, const char *peer)
+{
+	RTE_LOG(DEBUG, EAL, "reply: %s\n", msg->name);
+
+	if (check_input(msg) != 0)
+		return -1;
+
+	if (peer == NULL) {
+		RTE_LOG(ERR, EAL, "peer is not specified\n");
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	if (internal_config.no_shconf) {
+		RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+		return 0;
+	}
+
+	return mp_send(msg, peer, MP_REP);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_string_fns.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_string_fns.c
new file mode 100644
index 000000000..60c5dd66f
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_string_fns.c
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <errno.h>
+
+#include <rte_string_fns.h>
+
+/* split string into tokens */
+int
+rte_strsplit(char *string, int stringlen,
+	     char **tokens, int maxtokens, char delim)
+{
+	int i, tok = 0;
+	int tokstart = 1; /* first token is right at start of string */
+
+	if (string == NULL || tokens == NULL)
+		goto einval_error;
+
+	for (i = 0; i < stringlen; i++) {
+		if (string[i] == '\0' || tok >= maxtokens)
+			break;
+		if (tokstart) {
+			tokstart = 0;
+			tokens[tok++] = &string[i];
+		}
+		if (string[i] == delim) {
+			string[i] = '\0';
+			tokstart = 1;
+		}
+	}
+	return tok;
+
+einval_error:
+	errno = EINVAL;
+	return -1;
+}
+
+/* Copy src string into dst.
+ *
+ * Return negative value and NUL-terminate if dst is too short,
+ * Otherwise return number of bytes copied.
+ */
+ssize_t
+rte_strscpy(char *dst, const char *src, size_t dsize)
+{
+	size_t nleft = dsize;
+	size_t res = 0;
+
+	/* Copy as many bytes as will fit. */
+	while (nleft != 0) {
+		dst[res] = src[res];
+		if (src[res] == '\0')
+			return res;
+		res++;
+		nleft--;
+	}
+
+	/* Not enough room in dst, set NUL and return error. */
+	if (res != 0)
+		dst[res - 1] = '\0';
+	return -E2BIG;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_tailqs.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_tailqs.c
new file mode 100644
index 000000000..ead06897b
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_tailqs.c
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <sys/queue.h>
+#include <stdint.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <rte_memory.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_memcfg.h"
+
+TAILQ_HEAD(rte_tailq_elem_head, rte_tailq_elem);
+/* local tailq list */
+static struct rte_tailq_elem_head rte_tailq_elem_head =
+	TAILQ_HEAD_INITIALIZER(rte_tailq_elem_head);
+
+/* number of tailqs registered, -1 before call to rte_eal_tailqs_init */
+static int rte_tailqs_count = -1;
+
+struct rte_tailq_head *
+rte_eal_tailq_lookup(const char *name)
+{
+	unsigned i;
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+	if (name == NULL)
+		return NULL;
+
+	for (i = 0; i < RTE_MAX_TAILQ; i++) {
+		if (!strncmp(name, mcfg->tailq_head[i].name,
+			     RTE_TAILQ_NAMESIZE-1))
+			return &mcfg->tailq_head[i];
+	}
+
+	return NULL;
+}
+
+void
+rte_dump_tailq(FILE *f)
+{
+	struct rte_mem_config *mcfg;
+	unsigned i = 0;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	rte_mcfg_tailq_read_lock();
+	for (i = 0; i < RTE_MAX_TAILQ; i++) {
+		const struct rte_tailq_head *tailq = &mcfg->tailq_head[i];
+		const struct rte_tailq_entry_head *head = &tailq->tailq_head;
+
+		fprintf(f, "Tailq %u: qname:<%s>, tqh_first:%p, tqh_last:%p\n",
+			i, tailq->name, head->tqh_first, head->tqh_last);
+	}
+	rte_mcfg_tailq_read_unlock();
+}
+
+static struct rte_tailq_head *
+rte_eal_tailq_create(const char *name)
+{
+	struct rte_tailq_head *head = NULL;
+
+	if (!rte_eal_tailq_lookup(name) &&
+	    (rte_tailqs_count + 1 < RTE_MAX_TAILQ)) {
+		struct rte_mem_config *mcfg;
+
+		mcfg = rte_eal_get_configuration()->mem_config;
+		head = &mcfg->tailq_head[rte_tailqs_count];
+		strlcpy(head->name, name, sizeof(head->name) - 1);
+		TAILQ_INIT(&head->tailq_head);
+		rte_tailqs_count++;
+	}
+
+	return head;
+}
+
+/* local register, used to store "early" tailqs before rte_eal_init() and to
+ * ensure secondary process only registers tailqs once. */
+static int
+rte_eal_tailq_local_register(struct rte_tailq_elem *t)
+{
+	struct rte_tailq_elem *temp;
+
+	TAILQ_FOREACH(temp, &rte_tailq_elem_head, next) {
+		if (!strncmp(t->name, temp->name, sizeof(temp->name)))
+			return -1;
+	}
+
+	TAILQ_INSERT_TAIL(&rte_tailq_elem_head, t, next);
+	return 0;
+}
+
+static void
+rte_eal_tailq_update(struct rte_tailq_elem *t)
+{
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		/* primary process is the only one that creates */
+		t->head = rte_eal_tailq_create(t->name);
+	} else {
+		t->head = rte_eal_tailq_lookup(t->name);
+	}
+}
+
+int
+rte_eal_tailq_register(struct rte_tailq_elem *t)
+{
+	if (rte_eal_tailq_local_register(t) < 0) {
+		RTE_LOG(ERR, EAL,
+			"%s tailq is already registered\n", t->name);
+		goto error;
+	}
+
+	/* if a register happens after rte_eal_tailqs_init(), then we can update
+	 * tailq head */
+	if (rte_tailqs_count >= 0) {
+		rte_eal_tailq_update(t);
+		if (t->head == NULL) {
+			RTE_LOG(ERR, EAL,
+				"Cannot initialize tailq: %s\n", t->name);
+			TAILQ_REMOVE(&rte_tailq_elem_head, t, next);
+			goto error;
+		}
+	}
+
+	return 0;
+
+error:
+	t->head = NULL;
+	return -1;
+}
+
+int
+rte_eal_tailqs_init(void)
+{
+	struct rte_tailq_elem *t;
+
+	rte_tailqs_count = 0;
+
+	TAILQ_FOREACH(t, &rte_tailq_elem_head, next) {
+		/* second part of register job for "early" tailqs, see
+		 * rte_eal_tailq_register and EAL_REGISTER_TAILQ */
+		rte_eal_tailq_update(t);
+		if (t->head == NULL) {
+			RTE_LOG(ERR, EAL,
+				"Cannot initialize tailq: %s\n", t->name);
+			/* TAILQ_REMOVE not needed, error is already fatal */
+			goto fail;
+		}
+	}
+
+	return 0;
+
+fail:
+	rte_dump_tailq(stderr);
+	return -1;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_thread.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_thread.c
new file mode 100644
index 000000000..f9f588c17
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_thread.c
@@ -0,0 +1,230 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sched.h>
+#include <assert.h>
+#include <string.h>
+
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_log.h>
+#ifndef RTE_EXEC_ENV_WINDOWS
+#include <rte_trace_point.h>
+#endif
+
+#include "eal_internal_cfg.h"
+#include "eal_private.h"
+#include "eal_thread.h"
+
+RTE_DECLARE_PER_LCORE(unsigned , _socket_id);
+
+unsigned rte_socket_id(void)
+{
+	return RTE_PER_LCORE(_socket_id);
+}
+
+int
+rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	if (lcore_id >= RTE_MAX_LCORE)
+		return -EINVAL;
+
+	return cfg->lcore_role[lcore_id] == role;
+}
+
+static int
+eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
+{
+	unsigned cpu = 0;
+	int socket_id = SOCKET_ID_ANY;
+	int sid;
+
+	if (cpusetp == NULL)
+		return SOCKET_ID_ANY;
+
+	do {
+		if (!CPU_ISSET(cpu, cpusetp))
+			continue;
+
+		if (socket_id == SOCKET_ID_ANY)
+			socket_id = eal_cpu_socket_id(cpu);
+
+		sid = eal_cpu_socket_id(cpu);
+		if (socket_id != sid) {
+			socket_id = SOCKET_ID_ANY;
+			break;
+		}
+
+	} while (++cpu < CPU_SETSIZE);
+
+	return socket_id;
+}
+
+int
+rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+{
+	int s;
+	unsigned lcore_id;
+	pthread_t tid;
+
+	tid = pthread_self();
+
+	s = pthread_setaffinity_np(tid, sizeof(rte_cpuset_t), cpusetp);
+	if (s != 0) {
+		RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
+		return -1;
+	}
+
+	/* store socket_id in TLS for quick access */
+	RTE_PER_LCORE(_socket_id) =
+		eal_cpuset_socket_id(cpusetp);
+
+	/* store cpuset in TLS for quick access */
+	memmove(&RTE_PER_LCORE(_cpuset), cpusetp,
+		sizeof(rte_cpuset_t));
+
+	lcore_id = rte_lcore_id();
+	if (lcore_id != (unsigned)LCORE_ID_ANY) {
+		/* EAL thread will update lcore_config */
+		lcore_config[lcore_id].socket_id = RTE_PER_LCORE(_socket_id);
+		memmove(&lcore_config[lcore_id].cpuset, cpusetp,
+			sizeof(rte_cpuset_t));
+	}
+
+	return 0;
+}
+
+void
+rte_thread_get_affinity(rte_cpuset_t *cpusetp)
+{
+	assert(cpusetp);
+	memmove(cpusetp, &RTE_PER_LCORE(_cpuset),
+		sizeof(rte_cpuset_t));
+}
+
+int
+eal_thread_dump_affinity(char *str, unsigned size)
+{
+	rte_cpuset_t cpuset;
+	unsigned cpu;
+	int ret;
+	unsigned int out = 0;
+
+	rte_thread_get_affinity(&cpuset);
+
+	for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+		if (!CPU_ISSET(cpu, &cpuset))
+			continue;
+
+		ret = snprintf(str + out,
+			       size - out, "%u,", cpu);
+		if (ret < 0 || (unsigned)ret >= size - out) {
+			/* string will be truncated */
+			ret = -1;
+			goto exit;
+		}
+
+		out += ret;
+	}
+
+	ret = 0;
+exit:
+	/* remove the last separator */
+	if (out > 0)
+		str[out - 1] = '\0';
+
+	return ret;
+}
+
+
+struct rte_thread_ctrl_params {
+	void *(*start_routine)(void *);
+	void *arg;
+	pthread_barrier_t configured;
+};
+
+static void *rte_thread_init(void *arg)
+{
+	int ret;
+	rte_cpuset_t *cpuset = &internal_config.ctrl_cpuset;
+	struct rte_thread_ctrl_params *params = arg;
+	void *(*start_routine)(void *) = params->start_routine;
+	void *routine_arg = params->arg;
+
+	/* Store cpuset in TLS for quick access */
+	memmove(&RTE_PER_LCORE(_cpuset), cpuset, sizeof(rte_cpuset_t));
+
+	ret = pthread_barrier_wait(&params->configured);
+	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
+		pthread_barrier_destroy(&params->configured);
+		free(params);
+	}
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+	__rte_trace_mem_per_thread_alloc();
+#endif
+	return start_routine(routine_arg);
+}
+
+int
+rte_ctrl_thread_create(pthread_t *thread, const char *name,
+		const pthread_attr_t *attr,
+		void *(*start_routine)(void *), void *arg)
+{
+	rte_cpuset_t *cpuset = &internal_config.ctrl_cpuset;
+	struct rte_thread_ctrl_params *params;
+	int ret;
+
+	params = malloc(sizeof(*params));
+	if (!params)
+		return -ENOMEM;
+
+	params->start_routine = start_routine;
+	params->arg = arg;
+
+	pthread_barrier_init(&params->configured, NULL, 2);
+
+	ret = pthread_create(thread, attr, rte_thread_init, (void *)params);
+	if (ret != 0) {
+		free(params);
+		return -ret;
+	}
+
+	if (name != NULL) {
+		ret = rte_thread_setname(*thread, name);
+		if (ret < 0)
+			RTE_LOG(DEBUG, EAL,
+				"Cannot set name for ctrl thread\n");
+	}
+
+	ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
+	if (ret)
+		goto fail;
+
+	ret = pthread_barrier_wait(&params->configured);
+	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
+		pthread_barrier_destroy(&params->configured);
+		free(params);
+	}
+
+	return 0;
+
+fail:
+	if (PTHREAD_BARRIER_SERIAL_THREAD ==
+	    pthread_barrier_wait(&params->configured)) {
+		pthread_barrier_destroy(&params->configured);
+		free(params);
+	}
+	pthread_cancel(*thread);
+	pthread_join(*thread, NULL);
+	return -ret;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_timer.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_timer.c
new file mode 100644
index 000000000..fa9ee1b22
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_timer.c
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <time.h>
+#include <errno.h>
+
+#include <rte_common.h>
+#include <rte_compat.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include <rte_pause.h>
+#include <rte_eal.h>
+
+#include "eal_private.h"
+#include "eal_memcfg.h"
+
+/* The frequency of the RDTSC timer resolution */
+static uint64_t eal_tsc_resolution_hz;
+
+/* Pointer to user delay function */
+void (*rte_delay_us)(unsigned int) = NULL;
+
+void
+rte_delay_us_block(unsigned int us)
+{
+	const uint64_t start = rte_get_timer_cycles();
+	const uint64_t ticks = (uint64_t)us * rte_get_timer_hz() / 1E6;
+	while ((rte_get_timer_cycles() - start) < ticks)
+		rte_pause();
+}
+
+void
+rte_delay_us_sleep(unsigned int us)
+{
+	struct timespec wait[2];
+	int ind = 0;
+
+	wait[0].tv_sec = 0;
+	if (us >= US_PER_S) {
+		wait[0].tv_sec = us / US_PER_S;
+		us -= wait[0].tv_sec * US_PER_S;
+	}
+	wait[0].tv_nsec = 1000 * us;
+
+	while (nanosleep(&wait[ind], &wait[1 - ind]) && errno == EINTR) {
+		/*
+		 * Sleep was interrupted. Flip the index, so the 'remainder'
+		 * will become the 'request' for a next call.
+		 */
+		ind = 1 - ind;
+	}
+}
+
+uint64_t
+rte_get_tsc_hz(void)
+{
+	return eal_tsc_resolution_hz;
+}
+
+static uint64_t
+estimate_tsc_freq(void)
+{
+#define CYC_PER_10MHZ 1E7
+	RTE_LOG(WARNING, EAL, "WARNING: TSC frequency estimated roughly"
+		" - clock timings may be less accurate.\n");
+	/* assume that the sleep(1) will sleep for 1 second */
+	uint64_t start = rte_rdtsc();
+	sleep(1);
+	/* Round up to 10Mhz. 1E7 ~ 10Mhz */
+	return RTE_ALIGN_MUL_NEAR(rte_rdtsc() - start, CYC_PER_10MHZ);
+}
+
+void
+set_tsc_freq(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	uint64_t freq;
+
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		/*
+		 * Just use the primary process calculated TSC rate in any
+		 * secondary process.  It avoids any unnecessary overhead on
+		 * systems where arch-specific frequency detection is not
+		 * available.
+		 */
+		eal_tsc_resolution_hz = mcfg->tsc_hz;
+		return;
+	}
+
+	freq = get_tsc_freq_arch();
+	if (!freq)
+		freq = get_tsc_freq();
+	if (!freq)
+		freq = estimate_tsc_freq();
+
+	RTE_LOG(DEBUG, EAL, "TSC frequency is ~%" PRIu64 " KHz\n", freq / 1000);
+	eal_tsc_resolution_hz = freq;
+	mcfg->tsc_hz = freq;
+}
+
+void rte_delay_us_callback_register(void (*userfunc)(unsigned int))
+{
+	rte_delay_us = userfunc;
+}
+
+RTE_INIT(rte_timer_init)
+{
+	/* set rte_delay_us_block as a delay function */
+	rte_delay_us_callback_register(rte_delay_us_block);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace.c
new file mode 100644
index 000000000..875553d7e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace.c
@@ -0,0 +1,498 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#include <fnmatch.h>
+#include <inttypes.h>
+#include <sys/queue.h>
+#include <regex.h>
+
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_string_fns.h>
+
+#include "eal_trace.h"
+
+RTE_DEFINE_PER_LCORE(volatile int, trace_point_sz);
+RTE_DEFINE_PER_LCORE(void *, trace_mem);
+static RTE_DEFINE_PER_LCORE(char, ctf_field[TRACE_CTF_FIELD_SIZE]);
+static RTE_DEFINE_PER_LCORE(int, ctf_count);
+
+static struct trace_point_head tp_list = STAILQ_HEAD_INITIALIZER(tp_list);
+static struct trace trace = { .args = STAILQ_HEAD_INITIALIZER(trace.args), };
+
+struct trace *
+trace_obj_get(void)
+{
+	return &trace;
+}
+
+struct trace_point_head *
+trace_list_head_get(void)
+{
+	return &tp_list;
+}
+
+int
+eal_trace_init(void)
+{
+	struct trace_arg *arg;
+
+	/* Trace memory should start with 8B aligned for natural alignment */
+	RTE_BUILD_BUG_ON((offsetof(struct __rte_trace_header, mem) % 8) != 0);
+
+	/* One of the trace point registration failed */
+	if (trace.register_errno) {
+		rte_errno = trace.register_errno;
+		goto fail;
+	}
+
+	if (!STAILQ_EMPTY(&trace.args))
+		trace.status = true;
+
+	if (!rte_trace_is_enabled())
+		return 0;
+
+	rte_spinlock_init(&trace.lock);
+
+	/* Is duplicate trace name registered */
+	if (trace_has_duplicate_entry())
+		goto fail;
+
+	/* Generate UUID ver 4 with total size of events and number of
+	 * events
+	 */
+	trace_uuid_generate();
+
+	/* Apply buffer size configuration for trace output */
+	trace_bufsz_args_apply();
+
+	/* Generate CTF TDSL metadata */
+	if (trace_metadata_create() < 0)
+		goto fail;
+
+	/* Create trace directory */
+	if (trace_mkdir())
+		goto free_meta;
+
+	/* Save current epoch timestamp for future use */
+	if (trace_epoch_time_save() < 0)
+		goto fail;
+
+	/* Apply global configurations */
+	STAILQ_FOREACH(arg, &trace.args, next)
+		trace_args_apply(arg->val);
+
+	rte_trace_mode_set(trace.mode);
+
+	return 0;
+
+free_meta:
+	trace_metadata_destroy();
+fail:
+	trace_err("failed to initialize trace [%s]", rte_strerror(rte_errno));
+	return -rte_errno;
+}
+
+void
+eal_trace_fini(void)
+{
+	if (!rte_trace_is_enabled())
+		return;
+	trace_mem_per_thread_free();
+	trace_metadata_destroy();
+	eal_trace_args_free();
+}
+
+bool
+rte_trace_is_enabled(void)
+{
+	return trace.status;
+}
+
+static void
+trace_mode_set(rte_trace_point_t *trace, enum rte_trace_mode mode)
+{
+	if (mode == RTE_TRACE_MODE_OVERWRITE)
+		__atomic_and_fetch(trace, ~__RTE_TRACE_FIELD_ENABLE_DISCARD,
+			__ATOMIC_RELEASE);
+	else
+		__atomic_or_fetch(trace, __RTE_TRACE_FIELD_ENABLE_DISCARD,
+			__ATOMIC_RELEASE);
+}
+
+void
+rte_trace_mode_set(enum rte_trace_mode mode)
+{
+	struct trace_point *tp;
+
+	if (!rte_trace_is_enabled())
+		return;
+
+	STAILQ_FOREACH(tp, &tp_list, next)
+		trace_mode_set(tp->handle, mode);
+
+	trace.mode = mode;
+}
+
+enum
+rte_trace_mode rte_trace_mode_get(void)
+{
+	return trace.mode;
+}
+
+static bool
+trace_point_is_invalid(rte_trace_point_t *t)
+{
+	return (t == NULL) || (trace_id_get(t) >= trace.nb_trace_points);
+}
+
+bool
+rte_trace_point_is_enabled(rte_trace_point_t *trace)
+{
+	uint64_t val;
+
+	if (trace_point_is_invalid(trace))
+		return false;
+
+	val = __atomic_load_n(trace, __ATOMIC_ACQUIRE);
+	return (val & __RTE_TRACE_FIELD_ENABLE_MASK) != 0;
+}
+
+int
+rte_trace_point_enable(rte_trace_point_t *trace)
+{
+	if (trace_point_is_invalid(trace))
+		return -ERANGE;
+
+	__atomic_or_fetch(trace, __RTE_TRACE_FIELD_ENABLE_MASK,
+		__ATOMIC_RELEASE);
+	return 0;
+}
+
+int
+rte_trace_point_disable(rte_trace_point_t *trace)
+{
+	if (trace_point_is_invalid(trace))
+		return -ERANGE;
+
+	__atomic_and_fetch(trace, ~__RTE_TRACE_FIELD_ENABLE_MASK,
+		__ATOMIC_RELEASE);
+	return 0;
+}
+
+int
+rte_trace_pattern(const char *pattern, bool enable)
+{
+	struct trace_point *tp;
+	int rc = 0, found = 0;
+
+	STAILQ_FOREACH(tp, &tp_list, next) {
+		if (fnmatch(pattern, tp->name, 0) == 0) {
+			if (enable)
+				rc = rte_trace_point_enable(tp->handle);
+			else
+				rc = rte_trace_point_disable(tp->handle);
+			found = 1;
+		}
+		if (rc < 0)
+			return rc;
+	}
+
+	return rc | found;
+}
+
+int
+rte_trace_regexp(const char *regex, bool enable)
+{
+	struct trace_point *tp;
+	int rc = 0, found = 0;
+	regex_t r;
+
+	if (regcomp(&r, regex, 0) != 0)
+		return -EINVAL;
+
+	STAILQ_FOREACH(tp, &tp_list, next) {
+		if (regexec(&r, tp->name, 0, NULL, 0) == 0) {
+			if (enable)
+				rc = rte_trace_point_enable(tp->handle);
+			else
+				rc = rte_trace_point_disable(tp->handle);
+			found = 1;
+		}
+		if (rc < 0)
+			return rc;
+	}
+	regfree(&r);
+
+	return rc | found;
+}
+
+rte_trace_point_t *
+rte_trace_point_lookup(const char *name)
+{
+	struct trace_point *tp;
+
+	if (name == NULL)
+		return NULL;
+
+	STAILQ_FOREACH(tp, &tp_list, next)
+		if (strncmp(tp->name, name, TRACE_POINT_NAME_SIZE) == 0)
+			return tp->handle;
+
+	return NULL;
+}
+
+static void
+trace_point_dump(FILE *f, struct trace_point *tp)
+{
+	rte_trace_point_t *handle = tp->handle;
+
+	fprintf(f, "\tid %d, %s, size is %d, %s\n",
+		trace_id_get(handle), tp->name,
+		(uint16_t)(*handle & __RTE_TRACE_FIELD_SIZE_MASK),
+		rte_trace_point_is_enabled(handle) ? "enabled" : "disabled");
+}
+
+static void
+trace_lcore_mem_dump(FILE *f)
+{
+	struct trace *trace = trace_obj_get();
+	struct __rte_trace_header *header;
+	uint32_t count;
+
+	if (trace->nb_trace_mem_list == 0)
+		return;
+
+	rte_spinlock_lock(&trace->lock);
+	fprintf(f, "nb_trace_mem_list = %d\n", trace->nb_trace_mem_list);
+	fprintf(f, "\nTrace mem info\n--------------\n");
+	for (count = 0; count < trace->nb_trace_mem_list; count++) {
+		header = trace->lcore_meta[count].mem;
+		fprintf(f, "\tid %d, mem=%p, area=%s, lcore_id=%d, name=%s\n",
+		count, header,
+		trace_area_to_string(trace->lcore_meta[count].area),
+		header->stream_header.lcore_id,
+		header->stream_header.thread_name);
+	}
+	rte_spinlock_unlock(&trace->lock);
+}
+
+void
+rte_trace_dump(FILE *f)
+{
+	struct trace_point_head *tp_list = trace_list_head_get();
+	struct trace *trace = trace_obj_get();
+	struct trace_point *tp;
+
+	fprintf(f, "\nGlobal info\n-----------\n");
+	fprintf(f, "status = %s\n",
+		rte_trace_is_enabled() ? "enabled" : "disabled");
+	fprintf(f, "mode = %s\n",
+		trace_mode_to_string(rte_trace_mode_get()));
+	fprintf(f, "dir = %s\n", trace->dir);
+	fprintf(f, "buffer len = %d\n", trace->buff_len);
+	fprintf(f, "number of trace points = %d\n", trace->nb_trace_points);
+
+	trace_lcore_mem_dump(f);
+	fprintf(f, "\nTrace point info\n----------------\n");
+	STAILQ_FOREACH(tp, tp_list, next)
+		trace_point_dump(f, tp);
+}
+
+void
+__rte_trace_mem_per_thread_alloc(void)
+{
+	struct trace *trace = trace_obj_get();
+	struct __rte_trace_header *header;
+	uint32_t count;
+
+	if (!rte_trace_is_enabled())
+		return;
+
+	if (RTE_PER_LCORE(trace_mem))
+		return;
+
+	rte_spinlock_lock(&trace->lock);
+
+	count = trace->nb_trace_mem_list;
+
+	/* Allocate room for storing the thread trace mem meta */
+	trace->lcore_meta = realloc(trace->lcore_meta,
+		sizeof(trace->lcore_meta[0]) * (count + 1));
+
+	/* Provide dummy space for fast path to consume */
+	if (trace->lcore_meta == NULL) {
+		trace_crit("trace mem meta memory realloc failed");
+		header = NULL;
+		goto fail;
+	}
+
+	/* First attempt from huge page */
+	header = eal_malloc_no_trace(NULL, trace_mem_sz(trace->buff_len), 8);
+	if (header) {
+		trace->lcore_meta[count].area = TRACE_AREA_HUGEPAGE;
+		goto found;
+	}
+
+	/* Second attempt from heap */
+	header = malloc(trace_mem_sz(trace->buff_len));
+	if (header == NULL) {
+		trace_crit("trace mem malloc attempt failed");
+		header = NULL;
+		goto fail;
+
+	}
+
+	/* Second attempt from heap is success */
+	trace->lcore_meta[count].area = TRACE_AREA_HEAP;
+
+	/* Initialize the trace header */
+found:
+	header->offset = 0;
+	header->len = trace->buff_len;
+	header->stream_header.magic = TRACE_CTF_MAGIC;
+	rte_uuid_copy(header->stream_header.uuid, trace->uuid);
+	header->stream_header.lcore_id = rte_lcore_id();
+
+	/* Store the thread name */
+	char *name = header->stream_header.thread_name;
+	memset(name, 0, __RTE_TRACE_EMIT_STRING_LEN_MAX);
+	rte_thread_getname(pthread_self(), name,
+		__RTE_TRACE_EMIT_STRING_LEN_MAX);
+
+	trace->lcore_meta[count].mem = header;
+	trace->nb_trace_mem_list++;
+fail:
+	RTE_PER_LCORE(trace_mem) = header;
+	rte_spinlock_unlock(&trace->lock);
+}
+
+void
+trace_mem_per_thread_free(void)
+{
+	struct trace *trace = trace_obj_get();
+	uint32_t count;
+	void *mem;
+
+	if (!rte_trace_is_enabled())
+		return;
+
+	rte_spinlock_lock(&trace->lock);
+	for (count = 0; count < trace->nb_trace_mem_list; count++) {
+		mem = trace->lcore_meta[count].mem;
+		if (trace->lcore_meta[count].area == TRACE_AREA_HUGEPAGE)
+			eal_free_no_trace(mem);
+		else if (trace->lcore_meta[count].area == TRACE_AREA_HEAP)
+			free(mem);
+	}
+	rte_spinlock_unlock(&trace->lock);
+}
+
+void
+__rte_trace_point_emit_field(size_t sz, const char *in, const char *datatype)
+{
+	char *field = RTE_PER_LCORE(ctf_field);
+	int count = RTE_PER_LCORE(ctf_count);
+	size_t size;
+	int rc;
+
+	size = RTE_MAX(0, TRACE_CTF_FIELD_SIZE - 1 - count);
+	RTE_PER_LCORE(trace_point_sz) += sz;
+	rc = snprintf(RTE_PTR_ADD(field, count), size, "%s %s;", datatype, in);
+	if (rc <= 0 || (size_t)rc >= size) {
+		RTE_PER_LCORE(trace_point_sz) = 0;
+		trace_crit("CTF field is too long");
+		return;
+	}
+	RTE_PER_LCORE(ctf_count) += rc;
+}
+
+int
+__rte_trace_point_register(rte_trace_point_t *handle, const char *name,
+		void (*register_fn)(void))
+{
+	char *field = RTE_PER_LCORE(ctf_field);
+	struct trace_point *tp;
+	uint16_t sz;
+
+	/* Sanity checks of arguments */
+	if (name == NULL || register_fn == NULL || handle == NULL) {
+		trace_err("invalid arguments");
+		rte_errno = EINVAL;
+		goto fail;
+	}
+
+	/* Check the size of the trace point object */
+	RTE_PER_LCORE(trace_point_sz) = 0;
+	RTE_PER_LCORE(ctf_count) = 0;
+	register_fn();
+	if (RTE_PER_LCORE(trace_point_sz) == 0) {
+		trace_err("missing rte_trace_emit_header() in register fn");
+		rte_errno = EBADF;
+		goto fail;
+	}
+
+	/* Is size overflowed */
+	if (RTE_PER_LCORE(trace_point_sz) > UINT16_MAX) {
+		trace_err("trace point size overflowed");
+		rte_errno = ENOSPC;
+		goto fail;
+	}
+
+	/* Are we running out of space to store trace points? */
+	if (trace.nb_trace_points > UINT16_MAX) {
+		trace_err("trace point exceeds the max count");
+		rte_errno = ENOSPC;
+		goto fail;
+	}
+
+	/* Get the size of the trace point */
+	sz = RTE_PER_LCORE(trace_point_sz);
+	tp = calloc(1, sizeof(struct trace_point));
+	if (tp == NULL) {
+		trace_err("fail to allocate trace point memory");
+		rte_errno = ENOMEM;
+		goto fail;
+	}
+
+	/* Initialize the trace point */
+	if (rte_strscpy(tp->name, name, TRACE_POINT_NAME_SIZE) < 0) {
+		trace_err("name is too long");
+		rte_errno = E2BIG;
+		goto free;
+	}
+
+	/* Copy the field data for future use */
+	if (rte_strscpy(tp->ctf_field, field, TRACE_CTF_FIELD_SIZE) < 0) {
+		trace_err("CTF field size is too long");
+		rte_errno = E2BIG;
+		goto free;
+	}
+
+	/* Clear field memory for the next event */
+	memset(field, 0, TRACE_CTF_FIELD_SIZE);
+
+	/* Form the trace handle */
+	*handle = sz;
+	*handle |= trace.nb_trace_points << __RTE_TRACE_FIELD_ID_SHIFT;
+
+	trace.nb_trace_points++;
+	tp->handle = handle;
+
+	/* Add the trace point at tail */
+	STAILQ_INSERT_TAIL(&tp_list, tp, next);
+	__atomic_thread_fence(__ATOMIC_RELEASE);
+
+	/* All Good !!! */
+	return 0;
+free:
+	free(tp);
+fail:
+	if (trace.register_errno == 0)
+		trace.register_errno = rte_errno;
+
+	return -rte_errno;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_ctf.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_ctf.c
new file mode 100644
index 000000000..302e2bb74
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_ctf.c
@@ -0,0 +1,488 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#include <inttypes.h>
+#include <time.h>
+
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_time.h>
+#include <rte_trace.h>
+#include <rte_version.h>
+
+#include "eal_trace.h"
+
+__rte_format_printf(2, 0)
+static int
+metadata_printf(char **str, const char *fmt, ...)
+{
+	va_list ap;
+	int rc;
+
+	*str = NULL;
+	va_start(ap, fmt);
+	rc = vasprintf(str, fmt, ap);
+	va_end(ap);
+
+	return rc;
+}
+
+static int
+meta_copy(char **meta, int *offset, char *str, int rc)
+{
+	int count = *offset;
+	char *ptr = *meta;
+
+	if (rc < 0)
+		return rc;
+
+	ptr = realloc(ptr, count + rc);
+	if (ptr == NULL)
+		goto free_str;
+
+	memcpy(RTE_PTR_ADD(ptr, count), str, rc);
+	count += rc;
+	free(str);
+
+	*meta = ptr;
+	*offset = count;
+
+	return rc;
+
+free_str:
+	if (str)
+		free(str);
+	return -ENOMEM;
+}
+
+static int
+meta_data_type_emit(char **meta, int *offset)
+{
+	char *str = NULL;
+	int rc;
+
+	rc = metadata_printf(&str,
+		"/* CTF 1.8 */\n"
+		"typealias integer {size = 8; base = x;}:= uint8_t;\n"
+		"typealias integer {size = 16; base = x;} := uint16_t;\n"
+		"typealias integer {size = 32; base = x;} := uint32_t;\n"
+		"typealias integer {size = 64; base = x;} := uint64_t;\n"
+		"typealias integer {size = 8; signed = true;}  := int8_t;\n"
+		"typealias integer {size = 16; signed = true;} := int16_t;\n"
+		"typealias integer {size = 32; signed = true;} := int32_t;\n"
+		"typealias integer {size = 64; signed = true;} := int64_t;\n"
+#ifdef RTE_ARCH_64
+		"typealias integer {size = 64; base = x;} := uintptr_t;\n"
+#else
+		"typealias integer {size = 32; base = x;} := uintptr_t;\n"
+#endif
+#ifdef RTE_ARCH_64
+		"typealias integer {size = 64; base = x;} := long;\n"
+#else
+		"typealias integer {size = 32; base = x;} := long;\n"
+#endif
+		"typealias integer {size = 8; signed = false; encoding = ASCII; } := string_bounded_t;\n\n"
+		"typealias floating_point {\n"
+		"    exp_dig = 8;\n"
+		"    mant_dig = 24;\n"
+		"} := float;\n\n"
+		"typealias floating_point {\n"
+		"    exp_dig = 11;\n"
+		"    mant_dig = 53;\n"
+		"} := double;\n\n");
+
+	return meta_copy(meta, offset, str, rc);
+}
+
+static int
+is_be(void)
+{
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+	return 1;
+#else
+	return 0;
+#endif
+}
+
+static int
+meta_header_emit(char **meta, int *offset)
+{
+	struct trace *trace = trace_obj_get();
+	char uustr[RTE_UUID_STRLEN];
+	char *str = NULL;
+	int rc;
+
+	rte_uuid_unparse(trace->uuid, uustr, RTE_UUID_STRLEN);
+	rc = metadata_printf(&str,
+		"trace {\n"
+		"    major = 1;\n"
+		"    minor = 8;\n"
+		"    uuid = \"%s\";\n"
+		"    byte_order = %s;\n"
+		"    packet.header := struct {\n"
+		"	    uint32_t magic;\n"
+		"	    uint8_t  uuid[16];\n"
+		"    };\n"
+		"};\n\n", uustr, is_be() ? "be" : "le");
+	return meta_copy(meta, offset, str, rc);
+}
+
+static int
+meta_env_emit(char **meta, int *offset)
+{
+	char *str = NULL;
+	int rc;
+
+	rc = metadata_printf(&str,
+		"env {\n"
+		"    dpdk_version = \"%s\";\n"
+		"    tracer_name = \"dpdk\";\n"
+		"};\n\n", rte_version());
+	return meta_copy(meta, offset, str, rc);
+}
+
+static int
+meta_clock_pass1_emit(char **meta, int *offset)
+{
+	char *str = NULL;
+	int rc;
+
+	rc = metadata_printf(&str,
+		"clock {\n"
+		"    name = \"dpdk\";\n"
+		"    freq = ");
+	return meta_copy(meta, offset, str, rc);
+}
+
+static int
+meta_clock_pass2_emit(char **meta, int *offset)
+{
+	char *str = NULL;
+	int rc;
+
+	rc = metadata_printf(&str,
+		"%20"PRIu64";\n"
+		"    offset_s =", 0);
+	return meta_copy(meta, offset, str, rc);
+}
+
+static int
+meta_clock_pass3_emit(char **meta, int *offset)
+{
+	char *str = NULL;
+	int rc;
+
+	rc = metadata_printf(&str,
+		"%20"PRIu64";\n"
+		"    offset =", 0);
+	return meta_copy(meta, offset, str, rc);
+}
+
+static int
+meta_clock_pass4_emit(char **meta, int *offset)
+{
+	char *str = NULL;
+	int rc;
+
+	rc = metadata_printf(&str,
+		"%20"PRIu64";\n};\n\n"
+		"typealias integer {\n"
+		"    size = 48; align = 1; signed = false;\n"
+		"    map = clock.dpdk.value;\n"
+		"} := uint48_clock_dpdk_t;\n\n", 0);
+
+	return meta_copy(meta, offset, str, rc);
+}
+
+static int
+meta_stream_emit(char **meta, int *offset)
+{
+	char *str = NULL;
+	int rc;
+
+	rc = metadata_printf(&str,
+		"stream {\n"
+		"    packet.context := struct {\n"
+		"         uint32_t cpu_id;\n"
+		"         string_bounded_t name[32];\n"
+		"    };\n"
+		"    event.header := struct {\n"
+		"          uint48_clock_dpdk_t timestamp;\n"
+		"          uint16_t id;\n"
+		"    } align(64);\n"
+		"};\n\n");
+	return meta_copy(meta, offset, str, rc);
+}
+
+static void
+string_fixed_replace(char *input, const char *search, const char *replace)
+{
+	char *found;
+	size_t len;
+
+	found = strstr(input, search);
+	if (found == NULL)
+		return;
+
+	if (strlen(found) != strlen(search))
+		return;
+
+	len = strlen(replace);
+	memcpy(found, replace, len);
+	found[len] = '\0';
+}
+
+static void
+ctf_fixup_align(char *str)
+{
+	string_fixed_replace(str, "align", "_align");
+}
+
+static void
+ctf_fixup_arrow_deref(char *str)
+{
+	const char *replace = "_";
+	const char *search = "->";
+	char *found;
+	size_t len;
+
+	found = strstr(str, search);
+	if (found == NULL)
+		return;
+
+	do {
+		memcpy(found, replace, strlen(replace));
+		len = strlen(found + 2);
+		memcpy(found + 1, found + 2, len);
+		found[len + 1] = '\0';
+		found = strstr(str, search);
+	} while (found != NULL);
+}
+
+static void
+ctf_fixup_dot_deref(char *str)
+{
+	const char *replace = "_";
+	const char *search = ".";
+	char *found;
+	size_t len;
+
+	found = strstr(str, search);
+	if (found == NULL)
+		return;
+
+	len = strlen(replace);
+	do {
+		memcpy(found, replace, len);
+		found = strstr(str, search);
+	} while (found != NULL);
+}
+
+static void
+ctf_fixup_event(char *str)
+{
+	string_fixed_replace(str, "event", "_event");
+}
+
+static int
+ctf_fixup_keyword(char *str)
+{
+	char dup_str[TRACE_CTF_FIELD_SIZE];
+	char input[TRACE_CTF_FIELD_SIZE];
+	const char *delim = ";";
+	char *from;
+	int len;
+
+	if (str == NULL)
+		return 0;
+
+	len = strlen(str);
+	if (len >= TRACE_CTF_FIELD_SIZE) {
+		trace_err("ctf_field reached its maximum limit");
+		return -EMSGSIZE;
+	}
+
+	/* Create duplicate string */
+	strcpy(dup_str, str);
+
+	len = 0;
+	from = strtok(dup_str, delim);
+	while (from != NULL) {
+		strcpy(input, from);
+		ctf_fixup_align(input);
+		ctf_fixup_dot_deref(input);
+		ctf_fixup_arrow_deref(input);
+		ctf_fixup_event(input);
+
+		strcpy(&input[strlen(input)], delim);
+		if ((len + strlen(input)) >= TRACE_CTF_FIELD_SIZE) {
+			trace_err("ctf_field reached its maximum limit");
+			return -EMSGSIZE;
+		}
+
+		strcpy(str + len, input);
+		len += strlen(input);
+		from = strtok(NULL, delim);
+	}
+
+	return 0;
+}
+
+static int
+meta_event_emit(char **meta, int *offset, struct trace_point *tp)
+{
+	char *str = NULL;
+	int rc;
+
+	/* Fixup ctf field string in case it using reserved ctf keywords */
+	rc = ctf_fixup_keyword(tp->ctf_field);
+	if (rc)
+		return rc;
+
+	rc = metadata_printf(&str,
+		"event {\n"
+		"    id = %d;\n"
+		"    name = \"%s\";\n"
+		"    fields := struct {\n"
+		"        %s\n"
+		"    };\n"
+		"};\n\n", trace_id_get(tp->handle), tp->name, tp->ctf_field);
+	return meta_copy(meta, offset, str, rc);
+}
+
+int
+trace_metadata_create(void)
+{
+	struct trace_point_head *tp_list = trace_list_head_get();
+	struct trace *trace = trace_obj_get();
+	struct trace_point *tp;
+	int rc, offset = 0;
+	char *meta = NULL;
+
+	rc = meta_data_type_emit(&meta, &offset);
+	if (rc < 0)
+		goto fail;
+
+	rc = meta_header_emit(&meta, &offset);
+	if (rc < 0)
+		goto fail;
+
+	rc = meta_env_emit(&meta, &offset);
+	if (rc < 0)
+		goto fail;
+
+	rc = meta_clock_pass1_emit(&meta, &offset);
+	if (rc < 0)
+		goto fail;
+	trace->ctf_meta_offset_freq = offset;
+
+	rc = meta_clock_pass2_emit(&meta, &offset);
+	if (rc < 0)
+		goto fail;
+	trace->ctf_meta_offset_freq_off_s = offset;
+
+	rc = meta_clock_pass3_emit(&meta, &offset);
+	if (rc < 0)
+		goto fail;
+	trace->ctf_meta_offset_freq_off = offset;
+
+	rc = meta_clock_pass4_emit(&meta, &offset);
+	if (rc < 0)
+		goto fail;
+
+	rc = meta_stream_emit(&meta, &offset);
+	if (rc < 0)
+		goto fail;
+
+	STAILQ_FOREACH(tp, tp_list, next)
+		if (meta_event_emit(&meta, &offset, tp) < 0)
+			goto fail;
+
+	trace->ctf_meta = meta;
+	return 0;
+
+fail:
+	if (meta)
+		free(meta);
+	return -EBADF;
+}
+
+void
+trace_metadata_destroy(void)
+{
+	struct trace *trace = trace_obj_get();
+
+	if (trace->ctf_meta) {
+		free(trace->ctf_meta);
+		trace->ctf_meta = NULL;
+	}
+}
+
+static void
+meta_fix_freq(struct trace *trace, char *meta)
+{
+	char *str;
+	int rc;
+
+	str = RTE_PTR_ADD(meta, trace->ctf_meta_offset_freq);
+	rc = sprintf(str, "%20"PRIu64"", rte_get_timer_hz());
+	str[rc] = ';';
+}
+
+static void
+meta_fix_freq_offset(struct trace *trace, char *meta)
+{
+	uint64_t uptime_tickes_floor, uptime_ticks, freq, uptime_sec;
+	uint64_t offset, offset_s;
+	char *str;
+	int rc;
+
+	uptime_ticks = trace->uptime_ticks &
+			((1ULL << __RTE_TRACE_EVENT_HEADER_ID_SHIFT) - 1);
+	freq = rte_get_tsc_hz();
+	uptime_tickes_floor = RTE_ALIGN_MUL_FLOOR(uptime_ticks, freq);
+
+	uptime_sec = uptime_tickes_floor / freq;
+	offset_s = trace->epoch_sec - uptime_sec;
+
+	offset = uptime_ticks - uptime_tickes_floor;
+	offset += trace->epoch_nsec * (freq / NSEC_PER_SEC);
+
+	str = RTE_PTR_ADD(meta, trace->ctf_meta_offset_freq_off_s);
+	rc = sprintf(str, "%20"PRIu64"", offset_s);
+	str[rc] = ';';
+	str = RTE_PTR_ADD(meta, trace->ctf_meta_offset_freq_off);
+	rc = sprintf(str, "%20"PRIu64"", offset);
+	str[rc] = ';';
+}
+
+static void
+meta_fixup(struct trace *trace, char *meta)
+{
+	meta_fix_freq(trace, meta);
+	meta_fix_freq_offset(trace, meta);
+}
+
+int
+rte_trace_metadata_dump(FILE *f)
+{
+	struct trace *trace = trace_obj_get();
+	char *ctf_meta = trace->ctf_meta;
+	int rc;
+
+	if (!rte_trace_is_enabled())
+		return 0;
+
+	if (ctf_meta == NULL)
+		return -EINVAL;
+
+	if (!__atomic_load_n(&trace->ctf_fixup_done, __ATOMIC_SEQ_CST) &&
+				rte_get_timer_hz()) {
+		meta_fixup(trace, ctf_meta);
+		__atomic_store_n(&trace->ctf_fixup_done, 1, __ATOMIC_SEQ_CST);
+	}
+
+	rc = fprintf(f, "%s", ctf_meta);
+	return rc < 0 ? rc : 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_points.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_points.c
new file mode 100644
index 000000000..4a8ce9088
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_points.c
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#include <rte_trace_point_register.h>
+
+#include <rte_eal_trace.h>
+
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_void);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_u64);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_u32);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_u16);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_u8);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_i64);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_i32);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_i16);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_i8);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_int);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_long);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_float);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_double);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_ptr);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_str);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_func);
+
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_alarm_set);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_alarm_cancel);
+
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_mem_zmalloc);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_mem_malloc);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_mem_realloc);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_mem_free);
+
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_memzone_reserve);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_memzone_lookup);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_memzone_free);
+
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_thread_remote_launch);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_thread_lcore_ready);
+
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_intr_callback_register);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_intr_callback_unregister);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_intr_enable);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_intr_disable);
+
+RTE_INIT(eal_trace_init)
+{
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_void,
+		lib.eal.generic.void);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_u64,
+		lib.eal.generic.u64);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_u32,
+		lib.eal.generic.u32);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_u16,
+		lib.eal.generic.u16);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_u8,
+		lib.eal.generic.u8);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_i64,
+		lib.eal.generic.i64);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_i32,
+		lib.eal.generic.i32);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_i16,
+		lib.eal.generic.i16);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_i8,
+		lib.eal.generic.i8);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_int,
+		lib.eal.generic.int);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_long,
+		lib.eal.generic.long);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_float,
+		lib.eal.generic.float);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_double,
+		lib.eal.generic.double);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_ptr,
+		lib.eal.generic.ptr);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_str,
+		lib.eal.generic.string);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_func,
+		lib.eal.generic.func);
+
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_alarm_set,
+		lib.eal.alarm.set);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_alarm_cancel,
+		lib.eal.alarm.cancel);
+
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_mem_zmalloc,
+		lib.eal.mem.zmalloc);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_mem_malloc,
+		lib.eal.mem.malloc);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_mem_realloc,
+		lib.eal.mem.realloc);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_mem_free,
+		lib.eal.mem.free);
+
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_memzone_reserve,
+		lib.eal.memzone.reserve);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_memzone_lookup,
+		lib.eal.memzone.lookup);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_memzone_free,
+		lib.eal.memzone.free);
+
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_thread_remote_launch,
+		lib.eal.thread.remote.launch);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_thread_lcore_ready,
+		lib.eal.thread.lcore.ready);
+
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_callback_register,
+		lib.eal.intr.register);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_callback_unregister,
+		lib.eal.intr.unregister);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_enable,
+		lib.eal.intr.enable);
+	RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_disable,
+		lib.eal.intr.disable);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_utils.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_utils.c
new file mode 100644
index 000000000..64f58fb66
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_utils.c
@@ -0,0 +1,448 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#include <fnmatch.h>
+#include <pwd.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+
+#include "eal_filesystem.h"
+#include "eal_trace.h"
+
+const char *
+trace_mode_to_string(enum rte_trace_mode mode)
+{
+	switch (mode) {
+	case RTE_TRACE_MODE_OVERWRITE: return "overwrite";
+	case RTE_TRACE_MODE_DISCARD: return "discard";
+	default: return "unknown";
+	}
+}
+
+const char *
+trace_area_to_string(enum trace_area_e area)
+{
+	switch (area) {
+	case TRACE_AREA_HEAP: return "heap";
+	case TRACE_AREA_HUGEPAGE: return "hugepage";
+	default: return "unknown";
+	}
+}
+
+static bool
+trace_entry_compare(const char *name)
+{
+	struct trace_point_head *tp_list = trace_list_head_get();
+	struct trace_point *tp;
+	int count = 0;
+
+	STAILQ_FOREACH(tp, tp_list, next) {
+		if (strncmp(tp->name, name, TRACE_POINT_NAME_SIZE) == 0)
+			count++;
+		if (count > 1) {
+			trace_err("found duplicate entry %s", name);
+			rte_errno = EEXIST;
+			return true;
+		}
+	}
+	return false;
+}
+
+bool
+trace_has_duplicate_entry(void)
+{
+	struct trace_point_head *tp_list = trace_list_head_get();
+	struct trace_point *tp;
+
+	/* Is duplicate trace name registered */
+	STAILQ_FOREACH(tp, tp_list, next)
+		if (trace_entry_compare(tp->name))
+			return true;
+
+	return false;
+}
+
+void
+trace_uuid_generate(void)
+{
+	struct trace_point_head *tp_list = trace_list_head_get();
+	struct trace *trace = trace_obj_get();
+	struct trace_point *tp;
+	uint64_t sz_total = 0;
+
+	/* Go over the registered trace points to get total size of events */
+	STAILQ_FOREACH(tp, tp_list, next) {
+		const uint16_t sz = *tp->handle & __RTE_TRACE_FIELD_SIZE_MASK;
+		sz_total += sz;
+	}
+
+	rte_uuid_t uuid = RTE_UUID_INIT(sz_total, trace->nb_trace_points,
+		0x4370, 0x8f50, 0x222ddd514176ULL);
+	rte_uuid_copy(trace->uuid, uuid);
+}
+
+static int
+trace_session_name_generate(char *trace_dir)
+{
+	struct tm *tm_result;
+	time_t tm;
+	int rc;
+
+	tm = time(NULL);
+	if ((int)tm == -1)
+		goto fail;
+
+	tm_result = localtime(&tm);
+	if (tm_result == NULL)
+		goto fail;
+
+	rc = rte_strscpy(trace_dir, eal_get_hugefile_prefix(),
+			TRACE_PREFIX_LEN);
+	if (rc == -E2BIG)
+		rc = TRACE_PREFIX_LEN;
+	trace_dir[rc++] = '-';
+
+	rc = strftime(trace_dir + rc, TRACE_DIR_STR_LEN - rc,
+			"%Y-%m-%d-%p-%I-%M-%S", tm_result);
+	if (rc == 0)
+		goto fail;
+
+	return rc;
+fail:
+	rte_errno = errno;
+	return -rte_errno;
+}
+
+static int
+trace_dir_update(const char *str)
+{
+	struct trace *trace = trace_obj_get();
+	int rc, remaining;
+
+	remaining = sizeof(trace->dir) - trace->dir_offset;
+	rc = rte_strscpy(&trace->dir[0] + trace->dir_offset, str, remaining);
+	if (rc < 0)
+		goto fail;
+
+	trace->dir_offset += rc;
+fail:
+	return rc;
+}
+
+int
+eal_trace_args_save(const char *val)
+{
+	struct trace *trace = trace_obj_get();
+	struct trace_arg *arg = malloc(sizeof(*arg));
+
+	if (arg == NULL) {
+		trace_err("failed to allocate memory for %s", val);
+		return -ENOMEM;
+	}
+
+	arg->val = strdup(val);
+	if (arg->val == NULL) {
+		trace_err("failed to allocate memory for %s", val);
+		free(arg);
+		return -ENOMEM;
+	}
+
+	STAILQ_INSERT_TAIL(&trace->args, arg, next);
+	return 0;
+}
+
+void
+eal_trace_args_free(void)
+{
+	struct trace *trace = trace_obj_get();
+	struct trace_arg *arg;
+
+	while (!STAILQ_EMPTY(&trace->args)) {
+		arg = STAILQ_FIRST(&trace->args);
+		STAILQ_REMOVE_HEAD(&trace->args, next);
+		free(arg->val);
+		free(arg);
+	}
+}
+
+int
+trace_args_apply(const char *arg)
+{
+	if (rte_trace_regexp(arg, true) < 0) {
+		trace_err("cannot enable trace for %s", arg);
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+eal_trace_bufsz_args_save(char const *val)
+{
+	struct trace *trace = trace_obj_get();
+	uint64_t bufsz;
+
+	bufsz = rte_str_to_size(val);
+	if (bufsz == 0) {
+		trace_err("buffer size cannot be zero");
+		return -EINVAL;
+	}
+
+	trace->buff_len = bufsz;
+	return 0;
+}
+
+void
+trace_bufsz_args_apply(void)
+{
+	struct trace *trace = trace_obj_get();
+
+	if (trace->buff_len == 0)
+		trace->buff_len = 1024 * 1024; /* 1MB */
+}
+
+int
+eal_trace_mode_args_save(const char *val)
+{
+	struct trace *trace = trace_obj_get();
+	size_t len = strlen(val);
+	unsigned long tmp;
+	char *pattern;
+
+	if (len == 0) {
+		trace_err("value is not provided with option");
+		return -EINVAL;
+	}
+
+	pattern = (char *)calloc(1, len + 2);
+	if (pattern == NULL) {
+		trace_err("fail to allocate memory");
+		return -ENOMEM;
+	}
+
+	sprintf(pattern, "%s*", val);
+
+	if (fnmatch(pattern, "overwrite", 0) == 0)
+		tmp = RTE_TRACE_MODE_OVERWRITE;
+	else if (fnmatch(pattern, "discard", 0) == 0)
+		tmp = RTE_TRACE_MODE_DISCARD;
+	else {
+		free(pattern);
+		return -EINVAL;
+	}
+
+	trace->mode = tmp;
+	free(pattern);
+	return 0;
+}
+
+int
+eal_trace_dir_args_save(char const *val)
+{
+	struct trace *trace = trace_obj_get();
+	char *dir_path;
+	int rc;
+
+	if (strlen(val) >= sizeof(trace->dir) - 1) {
+		trace_err("input string is too big");
+		return -ENAMETOOLONG;
+	}
+
+	if (asprintf(&dir_path, "%s/", val) == -1) {
+		trace_err("failed to copy directory: %s", strerror(errno));
+		return -ENOMEM;
+	}
+
+	rc = trace_dir_update(dir_path);
+
+	free(dir_path);
+	return rc;
+}
+
+int
+trace_epoch_time_save(void)
+{
+	struct trace *trace = trace_obj_get();
+	struct timespec epoch = { 0, 0 };
+	uint64_t avg, start, end;
+
+	start = rte_get_tsc_cycles();
+	if (clock_gettime(CLOCK_REALTIME, &epoch) < 0) {
+		trace_err("failed to get the epoch time");
+		return -1;
+	}
+	end = rte_get_tsc_cycles();
+	avg = (start + end) >> 1;
+
+	trace->epoch_sec = (uint64_t) epoch.tv_sec;
+	trace->epoch_nsec = (uint64_t) epoch.tv_nsec;
+	trace->uptime_ticks = avg;
+
+	return 0;
+}
+
+static int
+trace_dir_default_path_get(char *dir_path)
+{
+	struct trace *trace = trace_obj_get();
+	uint32_t size = sizeof(trace->dir);
+	struct passwd *pwd;
+	char *home_dir;
+
+	/* First check for shell environment variable */
+	home_dir = getenv("HOME");
+	if (home_dir == NULL) {
+		/* Fallback to password file entry */
+		pwd = getpwuid(getuid());
+		if (pwd == NULL)
+			return -EINVAL;
+
+		home_dir = pwd->pw_dir;
+	}
+
+	/* Append dpdk-traces to directory */
+	if (snprintf(dir_path, size, "%s/dpdk-traces/", home_dir) < 0)
+		return -ENAMETOOLONG;
+
+	return 0;
+}
+
+int
+trace_mkdir(void)
+{
+	struct trace *trace = trace_obj_get();
+	char session[TRACE_DIR_STR_LEN];
+	char *dir_path;
+	int rc;
+
+	if (!trace->dir_offset) {
+		dir_path = calloc(1, sizeof(trace->dir));
+		if (dir_path == NULL) {
+			trace_err("fail to allocate memory");
+			return -ENOMEM;
+		}
+
+		rc = trace_dir_default_path_get(dir_path);
+		if (rc < 0) {
+			trace_err("fail to get default path");
+			free(dir_path);
+			return rc;
+		}
+
+		rc = trace_dir_update(dir_path);
+		free(dir_path);
+		if (rc < 0)
+			return rc;
+	}
+
+	/* Create the path if it t exist, no "mkdir -p" available here */
+	rc = mkdir(trace->dir, 0700);
+	if (rc < 0 && errno != EEXIST) {
+		trace_err("mkdir %s failed [%s]", trace->dir, strerror(errno));
+		rte_errno = errno;
+		return -rte_errno;
+	}
+
+	rc = trace_session_name_generate(session);
+	if (rc < 0)
+		return rc;
+	rc = trace_dir_update(session);
+	if (rc < 0)
+		return rc;
+
+	rc = mkdir(trace->dir, 0700);
+	if (rc < 0) {
+		trace_err("mkdir %s failed [%s]", trace->dir, strerror(errno));
+		rte_errno = errno;
+		return -rte_errno;
+	}
+
+	RTE_LOG(INFO, EAL, "Trace dir: %s\n", trace->dir);
+	return 0;
+}
+
+static int
+trace_meta_save(struct trace *trace)
+{
+	char file_name[PATH_MAX];
+	FILE *f;
+	int rc;
+
+	rc = snprintf(file_name, PATH_MAX, "%s/metadata", trace->dir);
+	if (rc < 0)
+		return rc;
+
+	f = fopen(file_name, "w");
+	if (f == NULL)
+		return -errno;
+
+	rc = rte_trace_metadata_dump(f);
+
+	if (fclose(f))
+		rc = -errno;
+
+	return rc;
+}
+
+
+static inline int
+trace_file_sz(struct __rte_trace_header *hdr)
+{
+	return sizeof(struct __rte_trace_stream_header) + hdr->offset;
+}
+
+static int
+trace_mem_save(struct trace *trace, struct __rte_trace_header *hdr,
+		uint32_t cnt)
+{
+	char file_name[PATH_MAX];
+	FILE *f;
+	int rc;
+
+	rc = snprintf(file_name, PATH_MAX, "%s/channel0_%d", trace->dir, cnt);
+	if (rc < 0)
+		return rc;
+
+	f = fopen(file_name, "w");
+	if (f == NULL)
+		return -errno;
+
+	rc = fwrite(&hdr->stream_header, trace_file_sz(hdr), 1, f);
+	rc = (rc == 1) ?  0 : -EACCES;
+
+	if (fclose(f))
+		rc = -errno;
+
+	return rc;
+}
+
+int
+rte_trace_save(void)
+{
+	struct trace *trace = trace_obj_get();
+	struct __rte_trace_header *header;
+	uint32_t count;
+	int rc = 0;
+
+	if (trace->nb_trace_mem_list == 0)
+		return rc;
+
+	rc = trace_meta_save(trace);
+	if (rc)
+		return rc;
+
+	rte_spinlock_lock(&trace->lock);
+	for (count = 0; count < trace->nb_trace_mem_list; count++) {
+		header = trace->lcore_meta[count].mem;
+		rc =  trace_mem_save(trace, header, count);
+		if (rc)
+			break;
+	}
+	rte_spinlock_unlock(&trace->lock);
+	return rc;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_uuid.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_uuid.c
new file mode 100644
index 000000000..0a80bfbb3
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_uuid.c
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) 1996, 1997 Theodore Ts'o.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include <rte_uuid.h>
+
+/* UUID packed form */
+struct uuid {
+	uint32_t	time_low;
+	uint16_t	time_mid;
+	uint16_t	time_hi_and_version;
+	uint16_t	clock_seq;
+	uint8_t		node[6];
+};
+
+static void uuid_pack(const struct uuid *uu, rte_uuid_t ptr)
+{
+	uint32_t tmp;
+	uint8_t	*out = ptr;
+
+	tmp = uu->time_low;
+	out[3] = (uint8_t) tmp;
+	tmp >>= 8;
+	out[2] = (uint8_t) tmp;
+	tmp >>= 8;
+	out[1] = (uint8_t) tmp;
+	tmp >>= 8;
+	out[0] = (uint8_t) tmp;
+
+	tmp = uu->time_mid;
+	out[5] = (uint8_t) tmp;
+	tmp >>= 8;
+	out[4] = (uint8_t) tmp;
+
+	tmp = uu->time_hi_and_version;
+	out[7] = (uint8_t) tmp;
+	tmp >>= 8;
+	out[6] = (uint8_t) tmp;
+
+	tmp = uu->clock_seq;
+	out[9] = (uint8_t) tmp;
+	tmp >>= 8;
+	out[8] = (uint8_t) tmp;
+
+	memcpy(out+10, uu->node, 6);
+}
+
+static void uuid_unpack(const rte_uuid_t in, struct uuid *uu)
+{
+	const uint8_t *ptr = in;
+	uint32_t tmp;
+
+	tmp = *ptr++;
+	tmp = (tmp << 8) | *ptr++;
+	tmp = (tmp << 8) | *ptr++;
+	tmp = (tmp << 8) | *ptr++;
+	uu->time_low = tmp;
+
+	tmp = *ptr++;
+	tmp = (tmp << 8) | *ptr++;
+	uu->time_mid = tmp;
+
+	tmp = *ptr++;
+	tmp = (tmp << 8) | *ptr++;
+	uu->time_hi_and_version = tmp;
+
+	tmp = *ptr++;
+	tmp = (tmp << 8) | *ptr++;
+	uu->clock_seq = tmp;
+
+	memcpy(uu->node, ptr, 6);
+}
+
+bool rte_uuid_is_null(const rte_uuid_t uu)
+{
+	const uint8_t *cp = uu;
+	int i;
+
+	for (i = 0; i < 16; i++)
+		if (*cp++)
+			return false;
+	return true;
+}
+
+/*
+ * rte_uuid_compare() - compare two UUIDs.
+ */
+int rte_uuid_compare(const rte_uuid_t uu1, const rte_uuid_t uu2)
+{
+	struct uuid	uuid1, uuid2;
+
+	uuid_unpack(uu1, &uuid1);
+	uuid_unpack(uu2, &uuid2);
+
+#define UUCMP(u1, u2) \
+	do { if (u1 != u2) return (u1 < u2) ? -1 : 1; } while (0)
+
+	UUCMP(uuid1.time_low, uuid2.time_low);
+	UUCMP(uuid1.time_mid, uuid2.time_mid);
+	UUCMP(uuid1.time_hi_and_version, uuid2.time_hi_and_version);
+	UUCMP(uuid1.clock_seq, uuid2.clock_seq);
+#undef UUCMP
+
+	return memcmp(uuid1.node, uuid2.node, 6);
+}
+
+int rte_uuid_parse(const char *in, rte_uuid_t uu)
+{
+	struct uuid	uuid;
+	int		i;
+	const char	*cp;
+	char		buf[3];
+
+	if (strlen(in) != 36)
+		return -1;
+
+	for (i = 0, cp = in; i <= 36; i++, cp++) {
+		if ((i == 8) || (i == 13) || (i == 18) ||
+		    (i == 23)) {
+			if (*cp == '-')
+				continue;
+			else
+				return -1;
+		}
+		if (i == 36)
+			if (*cp == 0)
+				continue;
+		if (!isxdigit(*cp))
+			return -1;
+	}
+
+	uuid.time_low = strtoul(in, NULL, 16);
+	uuid.time_mid = strtoul(in+9, NULL, 16);
+	uuid.time_hi_and_version = strtoul(in+14, NULL, 16);
+	uuid.clock_seq = strtoul(in+19, NULL, 16);
+	cp = in+24;
+	buf[2] = 0;
+
+	for (i = 0; i < 6; i++) {
+		buf[0] = *cp++;
+		buf[1] = *cp++;
+		uuid.node[i] = strtoul(buf, NULL, 16);
+	}
+
+	uuid_pack(&uuid, uu);
+	return 0;
+}
+
+void rte_uuid_unparse(const rte_uuid_t uu, char *out, size_t len)
+{
+	struct uuid uuid;
+
+	uuid_unpack(uu, &uuid);
+
+	snprintf(out, len,
+		 "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+		uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
+		uuid.clock_seq >> 8, uuid.clock_seq & 0xFF,
+		uuid.node[0], uuid.node[1], uuid.node[2],
+		uuid.node[3], uuid.node[4], uuid.node[5]);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_filesystem.h b/src/spdk/dpdk/lib/librte_eal/common/eal_filesystem.h
new file mode 100644
index 000000000..5d21f07c2
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_filesystem.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+/**
+ * @file
+ * Stores functions and path defines for files and directories
+ * on the filesystem for Linux, that are used by the Linux EAL.
+ */
+
+#ifndef EAL_FILESYSTEM_H
+#define EAL_FILESYSTEM_H
+
+/** Path of rte config file. */
+
+#include <stdint.h>
+#include <limits.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#include <rte_string_fns.h>
+#include "eal_internal_cfg.h"
+
+/* sets up platform-specific runtime data dir */
+int
+eal_create_runtime_dir(void);
+
+int
+eal_clean_runtime_dir(void);
+
+/** Function to return hugefile prefix that's currently set up */
+const char *
+eal_get_hugefile_prefix(void);
+
+#define RUNTIME_CONFIG_FNAME "config"
+static inline const char *
+eal_runtime_config_path(void)
+{
+	static char buffer[PATH_MAX]; /* static so auto-zeroed */
+
+	snprintf(buffer, sizeof(buffer), "%s/%s", rte_eal_get_runtime_dir(),
+			RUNTIME_CONFIG_FNAME);
+	return buffer;
+}
+
+/** Path of primary/secondary communication unix socket file. */
+#define MP_SOCKET_FNAME "mp_socket"
+static inline const char *
+eal_mp_socket_path(void)
+{
+	static char buffer[PATH_MAX]; /* static so auto-zeroed */
+
+	snprintf(buffer, sizeof(buffer), "%s/%s", rte_eal_get_runtime_dir(),
+			MP_SOCKET_FNAME);
+	return buffer;
+}
+
+#define FBARRAY_NAME_FMT "%s/fbarray_%s"
+static inline const char *
+eal_get_fbarray_path(char *buffer, size_t buflen, const char *name) {
+	snprintf(buffer, buflen, FBARRAY_NAME_FMT, rte_eal_get_runtime_dir(),
+			name);
+	return buffer;
+}
+
+/** Path of hugepage info file. */
+#define HUGEPAGE_INFO_FNAME "hugepage_info"
+static inline const char *
+eal_hugepage_info_path(void)
+{
+	static char buffer[PATH_MAX]; /* static so auto-zeroed */
+
+	snprintf(buffer, sizeof(buffer), "%s/%s", rte_eal_get_runtime_dir(),
+			HUGEPAGE_INFO_FNAME);
+	return buffer;
+}
+
+/** Path of hugepage data file. */
+#define HUGEPAGE_DATA_FNAME "hugepage_data"
+static inline const char *
+eal_hugepage_data_path(void)
+{
+	static char buffer[PATH_MAX]; /* static so auto-zeroed */
+
+	snprintf(buffer, sizeof(buffer), "%s/%s", rte_eal_get_runtime_dir(),
+			HUGEPAGE_DATA_FNAME);
+	return buffer;
+}
+
+/** String format for hugepage map files. */
+#define HUGEFILE_FMT "%s/%smap_%d"
+static inline const char *
+eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
+{
+	snprintf(buffer, buflen, HUGEFILE_FMT, hugedir,
+			eal_get_hugefile_prefix(), f_id);
+	return buffer;
+}
+
+/** define the default filename prefix for the %s values above */
+#define HUGEFILE_PREFIX_DEFAULT "rte"
+
+/** Function to read a single numeric value from a file on the filesystem.
+ * Used to read information from files on /sys */
+int eal_parse_sysfs_value(const char *filename, unsigned long *val);
+
+#endif /* EAL_FILESYSTEM_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_hugepages.h b/src/spdk/dpdk/lib/librte_eal/common/eal_hugepages.h
new file mode 100644
index 000000000..1b560d337
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_hugepages.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef EAL_HUGEPAGES_H
+#define EAL_HUGEPAGES_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <limits.h>
+
+#define MAX_HUGEPAGE_PATH PATH_MAX
+
+/**
+ * Structure used to store information about hugepages that we mapped
+ * through the files in hugetlbfs.
+ */
+struct hugepage_file {
+	void *orig_va;      /**< virtual addr of first mmap() */
+	void *final_va;     /**< virtual addr of 2nd mmap() */
+	uint64_t physaddr;  /**< physical addr */
+	size_t size;        /**< the page size */
+	int socket_id;      /**< NUMA socket ID */
+	int file_id;        /**< the '%d' in HUGEFILE_FMT */
+	char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */
+};
+
+/**
+ * Read the information on what hugepages are available for the EAL to use,
+ * clearing out any unused ones.
+ */
+int eal_hugepage_info_init(void);
+
+/**
+ * Read whatever information primary process has shared about hugepages into
+ * secondary process.
+ */
+int eal_hugepage_info_read(void);
+
+#endif /* EAL_HUGEPAGES_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_internal_cfg.h b/src/spdk/dpdk/lib/librte_eal/common/eal_internal_cfg.h
new file mode 100644
index 000000000..c650bc081
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_internal_cfg.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+/**
+ * @file
+ * Holds the structures for the eal internal configuration
+ */
+
+#ifndef EAL_INTERNAL_CFG_H
+#define EAL_INTERNAL_CFG_H
+
+#include <rte_eal.h>
+#include <rte_pci_dev_feature_defs.h>
+
+#include "eal_thread.h"
+
+#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
+#define MAX_HUGEPAGE_SIZES 4  /**< support up to 4 page sizes */
+#else
+#define MAX_HUGEPAGE_SIZES 3  /**< support up to 3 page sizes */
+#endif
+
+/*
+ * internal configuration structure for the number, size and
+ * mount points of hugepages
+ */
+struct hugepage_info {
+	uint64_t hugepage_sz;   /**< size of a huge page */
+	char hugedir[PATH_MAX];    /**< dir where hugetlbfs is mounted */
+	uint32_t num_pages[RTE_MAX_NUMA_NODES];
+	/**< number of hugepages of that size on each socket */
+	int lock_descriptor;    /**< file descriptor for hugepage dir */
+};
+
+/**
+ * internal configuration
+ */
+struct internal_config {
+	volatile size_t memory;           /**< amount of asked memory */
+	volatile unsigned force_nchannel; /**< force number of channels */
+	volatile unsigned force_nrank;    /**< force number of ranks */
+	volatile unsigned no_hugetlbfs;   /**< true to disable hugetlbfs */
+	unsigned hugepage_unlink;         /**< true to unlink backing files */
+	volatile unsigned no_pci;         /**< true to disable PCI */
+	volatile unsigned no_hpet;        /**< true to disable HPET */
+	volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
+										* instead of native TSC */
+	volatile unsigned no_shconf;      /**< true if there is no shared config */
+	volatile unsigned in_memory;
+	/**< true if DPDK should operate entirely in-memory and not create any
+	 * shared files or runtime data.
+	 */
+	volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */
+	volatile enum rte_proc_type_t process_type; /**< multi-process proc type */
+	/** true to try allocating memory on specific sockets */
+	volatile unsigned force_sockets;
+	volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */
+	volatile unsigned force_socket_limits;
+	volatile uint64_t socket_limit[RTE_MAX_NUMA_NODES]; /**< limit amount of memory per socket */
+	uintptr_t base_virtaddr;          /**< base address to try and reserve memory from */
+	volatile unsigned legacy_mem;
+	/**< true to enable legacy memory behavior (no dynamic allocation,
+	 * IOVA-contiguous segments).
+	 */
+	volatile unsigned match_allocations;
+	/**< true to free hugepages exactly as allocated */
+	volatile unsigned single_file_segments;
+	/**< true if storing all pages within single files (per-page-size,
+	 * per-node) non-legacy mode only.
+	 */
+	volatile int syslog_facility;	  /**< facility passed to openlog() */
+	/** default interrupt mode for VFIO */
+	volatile enum rte_intr_mode vfio_intr_mode;
+	char *hugefile_prefix;      /**< the base filename of hugetlbfs files */
+	char *hugepage_dir;         /**< specific hugetlbfs directory to use */
+	char *user_mbuf_pool_ops_name;
+			/**< user defined mbuf pool ops name */
+	unsigned num_hugepage_sizes;      /**< how many sizes on this system */
+	struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
+	enum rte_iova_mode iova_mode ;    /**< Set IOVA mode on this system  */
+	rte_cpuset_t ctrl_cpuset;         /**< cpuset for ctrl threads */
+	volatile unsigned int init_complete;
+	/**< indicates whether EAL has completed initialization */
+	unsigned int no_telemetry; /**< true to disable Telemetry */
+};
+extern struct internal_config internal_config; /**< Global EAL configuration. */
+
+void eal_reset_internal_config(struct internal_config *internal_cfg);
+
+#endif /* EAL_INTERNAL_CFG_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_memalloc.h b/src/spdk/dpdk/lib/librte_eal/common/eal_memalloc.h
new file mode 100644
index 000000000..e953cd84e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_memalloc.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#ifndef EAL_MEMALLOC_H
+#define EAL_MEMALLOC_H
+
+#include <stdbool.h>
+
+#include <rte_memory.h>
+
+/*
+ * Allocate segment of specified page size.
+ */
+struct rte_memseg *
+eal_memalloc_alloc_seg(size_t page_sz, int socket);
+
+/*
+ * Allocate `n_segs` segments.
+ *
+ * Note: `ms` can be NULL.
+ *
+ * Note: it is possible to request best-effort allocation by setting `exact` to
+ * `false`, in which case allocator will return however many pages it managed to
+ * allocate successfully.
+ */
+int
+eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms, int n_segs, size_t page_sz,
+		int socket, bool exact);
+
+/*
+ * Deallocate segment
+ */
+int
+eal_memalloc_free_seg(struct rte_memseg *ms);
+
+/*
+ * Deallocate `n_segs` segments. Returns 0 on successful deallocation of all
+ * segments, returns -1 on error. Any segments that could have been deallocated,
+ * will be deallocated even in case of error.
+ */
+int
+eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs);
+
+/*
+ * Check if memory pointed to by `start` and of `length` that resides in
+ * memseg list `msl` is IOVA-contiguous.
+ */
+bool
+eal_memalloc_is_contig(const struct rte_memseg_list *msl, void *start,
+		size_t len);
+
+/* synchronize local memory map to primary process */
+int
+eal_memalloc_sync_with_primary(void);
+
+int
+eal_memalloc_mem_event_callback_register(const char *name,
+		rte_mem_event_callback_t clb, void *arg);
+
+int
+eal_memalloc_mem_event_callback_unregister(const char *name, void *arg);
+
+void
+eal_memalloc_mem_event_notify(enum rte_mem_event event, const void *start,
+		size_t len);
+
+int
+eal_memalloc_mem_alloc_validator_register(const char *name,
+		rte_mem_alloc_validator_t clb, int socket_id, size_t limit);
+
+int
+eal_memalloc_mem_alloc_validator_unregister(const char *name, int socket_id);
+
+int
+eal_memalloc_mem_alloc_validate(int socket_id, size_t new_len);
+
+/* returns fd or -errno */
+int
+eal_memalloc_get_seg_fd(int list_idx, int seg_idx);
+
+/* returns 0 or -errno */
+int
+eal_memalloc_set_seg_fd(int list_idx, int seg_idx, int fd);
+
+/* returns 0 or -errno */
+int
+eal_memalloc_set_seg_list_fd(int list_idx, int fd);
+
+int
+eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset);
+
+int
+eal_memalloc_init(void);
+
+#endif /* EAL_MEMALLOC_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_memcfg.h b/src/spdk/dpdk/lib/librte_eal/common/eal_memcfg.h
new file mode 100644
index 000000000..583fcb595
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_memcfg.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef EAL_MEMCFG_H
+#define EAL_MEMCFG_H
+
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_pause.h>
+#include <rte_spinlock.h>
+#include <rte_rwlock.h>
+#include <rte_tailq.h>
+
+#include "malloc_heap.h"
+
+/**
+ * Memory configuration shared across multiple processes.
+ */
+struct rte_mem_config {
+	volatile uint32_t magic;   /**< Magic number - sanity check. */
+	uint32_t version;
+	/**< Prevent secondary processes using different DPDK versions. */
+
+	/* memory topology */
+	uint32_t nchannel;    /**< Number of channels (0 if unknown). */
+	uint32_t nrank;       /**< Number of ranks (0 if unknown). */
+
+	/**
+	 * current lock nest order
+	 *  - qlock->mlock (ring/hash/lpm)
+	 *  - mplock->qlock->mlock (mempool)
+	 * Notice:
+	 *  *ALWAYS* obtain qlock first if having to obtain both qlock and mlock
+	 */
+	rte_rwlock_t mlock;   /**< used by memzones for thread safety. */
+	rte_rwlock_t qlock;   /**< used by tailqs for thread safety. */
+	rte_rwlock_t mplock;  /**< used by mempool library for thread safety. */
+	rte_spinlock_t tlock; /**< used by timer library for thread safety. */
+
+	rte_rwlock_t memory_hotplug_lock;
+	/**< Indicates whether memory hotplug request is in progress. */
+
+	/* memory segments and zones */
+	struct rte_fbarray memzones; /**< Memzone descriptors. */
+
+	struct rte_memseg_list memsegs[RTE_MAX_MEMSEG_LISTS];
+	/**< List of dynamic arrays holding memsegs */
+
+	struct rte_tailq_head tailq_head[RTE_MAX_TAILQ];
+	/**< Tailqs for objects */
+
+	struct malloc_heap malloc_heaps[RTE_MAX_HEAPS];
+	/**< DPDK malloc heaps */
+
+	int next_socket_id; /**< Next socket ID for external malloc heap */
+
+	/* rte_mem_config has to be mapped at the exact same address in all
+	 * processes, so we need to store it.
+	 */
+	uint64_t mem_cfg_addr; /**< Address of this structure in memory. */
+
+	/* Primary and secondary processes cannot run with different legacy or
+	 * single file segments options, so to avoid having to specify these
+	 * options to all processes, store them in shared config and update the
+	 * internal config at init time.
+	 */
+	uint32_t legacy_mem; /**< stored legacy mem parameter. */
+	uint32_t single_file_segments;
+	/**< stored single file segments parameter. */
+
+	uint64_t tsc_hz;
+	/**< TSC rate */
+
+	uint8_t dma_maskbits; /**< Keeps the more restricted dma mask. */
+};
+
+/* update internal config from shared mem config */
+void
+eal_mcfg_update_internal(void);
+
+/* update shared mem config from internal config */
+void
+eal_mcfg_update_from_internal(void);
+
+/* wait until primary process initialization is complete */
+void
+eal_mcfg_wait_complete(void);
+
+/* check if DPDK version of current process matches one stored in the config */
+int
+eal_mcfg_check_version(void);
+
+/* set mem config as complete */
+void
+eal_mcfg_complete(void);
+
+#endif /* EAL_MEMCFG_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_options.h b/src/spdk/dpdk/lib/librte_eal/common/eal_options.h
new file mode 100644
index 000000000..18e6da9ab
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_options.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2014 6WIND S.A.
+ */
+
+#ifndef EAL_OPTIONS_H
+#define EAL_OPTIONS_H
+
+#include "getopt.h"
+
+struct rte_tel_data;
+
+enum {
+	/* long options mapped to a short option */
+#define OPT_HELP              "help"
+	OPT_HELP_NUM            = 'h',
+#define OPT_PCI_BLACKLIST     "pci-blacklist"
+	OPT_PCI_BLACKLIST_NUM   = 'b',
+#define OPT_PCI_WHITELIST     "pci-whitelist"
+	OPT_PCI_WHITELIST_NUM   = 'w',
+
+	/* first long only option value must be >= 256, so that we won't
+	 * conflict with short options */
+	OPT_LONG_MIN_NUM = 256,
+#define OPT_BASE_VIRTADDR     "base-virtaddr"
+	OPT_BASE_VIRTADDR_NUM,
+#define OPT_CREATE_UIO_DEV    "create-uio-dev"
+	OPT_CREATE_UIO_DEV_NUM,
+#define OPT_FILE_PREFIX       "file-prefix"
+	OPT_FILE_PREFIX_NUM,
+#define OPT_HUGE_DIR          "huge-dir"
+	OPT_HUGE_DIR_NUM,
+#define OPT_HUGE_UNLINK       "huge-unlink"
+	OPT_HUGE_UNLINK_NUM,
+#define OPT_LCORES            "lcores"
+	OPT_LCORES_NUM,
+#define OPT_LOG_LEVEL         "log-level"
+	OPT_LOG_LEVEL_NUM,
+#define OPT_TRACE             "trace"
+	OPT_TRACE_NUM,
+#define OPT_TRACE_DIR         "trace-dir"
+	OPT_TRACE_DIR_NUM,
+#define OPT_TRACE_BUF_SIZE    "trace-bufsz"
+	OPT_TRACE_BUF_SIZE_NUM,
+#define OPT_TRACE_MODE        "trace-mode"
+	OPT_TRACE_MODE_NUM,
+#define OPT_MASTER_LCORE      "master-lcore"
+	OPT_MASTER_LCORE_NUM,
+#define OPT_MBUF_POOL_OPS_NAME "mbuf-pool-ops-name"
+	OPT_MBUF_POOL_OPS_NAME_NUM,
+#define OPT_PROC_TYPE         "proc-type"
+	OPT_PROC_TYPE_NUM,
+#define OPT_NO_HPET           "no-hpet"
+	OPT_NO_HPET_NUM,
+#define OPT_NO_HUGE           "no-huge"
+	OPT_NO_HUGE_NUM,
+#define OPT_NO_PCI            "no-pci"
+	OPT_NO_PCI_NUM,
+#define OPT_NO_SHCONF         "no-shconf"
+	OPT_NO_SHCONF_NUM,
+#define OPT_IN_MEMORY         "in-memory"
+	OPT_IN_MEMORY_NUM,
+#define OPT_SOCKET_MEM        "socket-mem"
+	OPT_SOCKET_MEM_NUM,
+#define OPT_SOCKET_LIMIT        "socket-limit"
+	OPT_SOCKET_LIMIT_NUM,
+#define OPT_SYSLOG            "syslog"
+	OPT_SYSLOG_NUM,
+#define OPT_VDEV              "vdev"
+	OPT_VDEV_NUM,
+#define OPT_VFIO_INTR         "vfio-intr"
+	OPT_VFIO_INTR_NUM,
+#define OPT_VMWARE_TSC_MAP    "vmware-tsc-map"
+	OPT_VMWARE_TSC_MAP_NUM,
+#define OPT_LEGACY_MEM    "legacy-mem"
+	OPT_LEGACY_MEM_NUM,
+#define OPT_SINGLE_FILE_SEGMENTS    "single-file-segments"
+	OPT_SINGLE_FILE_SEGMENTS_NUM,
+#define OPT_IOVA_MODE          "iova-mode"
+	OPT_IOVA_MODE_NUM,
+#define OPT_MATCH_ALLOCATIONS  "match-allocations"
+	OPT_MATCH_ALLOCATIONS_NUM,
+#define OPT_TELEMETRY         "telemetry"
+	OPT_TELEMETRY_NUM,
+#define OPT_NO_TELEMETRY      "no-telemetry"
+	OPT_NO_TELEMETRY_NUM,
+	OPT_LONG_MAX_NUM
+};
+
+extern const char eal_short_options[];
+extern const struct option eal_long_options[];
+
+int eal_parse_common_option(int opt, const char *argv,
+			    struct internal_config *conf);
+int eal_option_device_parse(void);
+int eal_adjust_config(struct internal_config *internal_cfg);
+int eal_cleanup_config(struct internal_config *internal_cfg);
+int eal_check_common_options(struct internal_config *internal_cfg);
+void eal_common_usage(void);
+enum rte_proc_type_t eal_proc_type_detect(void);
+int eal_plugins_init(void);
+int eal_save_args(int argc, char **argv);
+int handle_eal_info_request(const char *cmd, const char *params __rte_unused,
+		struct rte_tel_data *d);
+
+#endif /* EAL_OPTIONS_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_private.h b/src/spdk/dpdk/lib/librte_eal/common/eal_private.h
new file mode 100644
index 000000000..869ce183a
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_private.h
@@ -0,0 +1,423 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#ifndef _EAL_PRIVATE_H_
+#define _EAL_PRIVATE_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <rte_dev.h>
+#include <rte_lcore.h>
+
+/**
+ * Structure storing internal configuration (per-lcore)
+ */
+struct lcore_config {
+	pthread_t thread_id;       /**< pthread identifier */
+	int pipe_master2slave[2];  /**< communication pipe with master */
+	int pipe_slave2master[2];  /**< communication pipe with master */
+
+	lcore_function_t * volatile f; /**< function to call */
+	void * volatile arg;       /**< argument of function */
+	volatile int ret;          /**< return value of function */
+
+	volatile enum rte_lcore_state_t state; /**< lcore state */
+	unsigned int socket_id;    /**< physical socket id for this lcore */
+	unsigned int core_id;      /**< core number on socket for this lcore */
+	int core_index;            /**< relative index, starting from 0 */
+	uint8_t core_role;         /**< role of core eg: OFF, RTE, SERVICE */
+
+	rte_cpuset_t cpuset;       /**< cpu set which the lcore affinity to */
+};
+
+extern struct lcore_config lcore_config[RTE_MAX_LCORE];
+
+/**
+ * The global RTE configuration structure.
+ */
+struct rte_config {
+	uint32_t master_lcore;       /**< Id of the master lcore */
+	uint32_t lcore_count;        /**< Number of available logical cores. */
+	uint32_t numa_node_count;    /**< Number of detected NUMA nodes. */
+	uint32_t numa_nodes[RTE_MAX_NUMA_NODES]; /**< List of detected NUMA nodes. */
+	uint32_t service_lcore_count;/**< Number of available service cores. */
+	enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /**< State of cores. */
+
+	/** Primary or secondary configuration */
+	enum rte_proc_type_t process_type;
+
+	/** PA or VA mapping mode */
+	enum rte_iova_mode iova_mode;
+
+	/**
+	 * Pointer to memory configuration, which may be shared across multiple
+	 * DPDK instances
+	 */
+	struct rte_mem_config *mem_config;
+} __rte_packed;
+
+/**
+ * Get the global configuration structure.
+ *
+ * @return
+ *   A pointer to the global configuration structure.
+ */
+struct rte_config *rte_eal_get_configuration(void);
+
+/**
+ * Initialize the memzone subsystem (private to eal).
+ *
+ * @return
+ *   - 0 on success
+ *   - Negative on error
+ */
+int rte_eal_memzone_init(void);
+
+/**
+ * Common log initialization function (private to eal).  Determines
+ * where log data is written when no call to rte_openlog_stream is
+ * in effect.
+ *
+ * @param default_log
+ *   The default log stream to be used.
+ * @return
+ *   - 0 on success
+ *   - Negative on error
+ */
+void eal_log_set_default(FILE *default_log);
+
+/**
+ * Fill configuration with number of physical and logical processors
+ *
+ * This function is private to EAL.
+ *
+ * Parse /proc/cpuinfo to get the number of physical and logical
+ * processors on the machine.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int rte_eal_cpu_init(void);
+
+/**
+ * Create memseg lists
+ *
+ * This function is private to EAL.
+ *
+ * Preallocate virtual memory.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int rte_eal_memseg_init(void);
+
+/**
+ * Map memory
+ *
+ * This function is private to EAL.
+ *
+ * Fill configuration structure with these infos, and return 0 on success.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int rte_eal_memory_init(void);
+
+/**
+ * Configure timers
+ *
+ * This function is private to EAL.
+ *
+ * Mmap memory areas used by HPET (high precision event timer) that will
+ * provide our time reference, and configure the TSC frequency also for it
+ * to be used as a reference.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int rte_eal_timer_init(void);
+
+/**
+ * Init the default log stream
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int rte_eal_log_init(const char *id, int facility);
+
+/**
+ * Save the log regexp for later
+ */
+int rte_log_save_regexp(const char *type, int priority);
+int rte_log_save_pattern(const char *pattern, int priority);
+
+/**
+ * Init tail queues for non-EAL library structures. This is to allow
+ * the rings, mempools, etc. lists to be shared among multiple processes
+ *
+ * This function is private to EAL
+ *
+ * @return
+ *    0 on success, negative on error
+ */
+int rte_eal_tailqs_init(void);
+
+/**
+ * Init interrupt handling.
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ *  0 on success, negative on error
+ */
+int rte_eal_intr_init(void);
+
+/**
+ * Init alarm mechanism. This is to allow a callback be called after
+ * specific time.
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ *  0 on success, negative on error
+ */
+int rte_eal_alarm_init(void);
+
+/**
+ * Function is to check if the kernel module(like, vfio, vfio_iommu_type1,
+ * etc.) loaded.
+ *
+ * @param module_name
+ *	The module's name which need to be checked
+ *
+ * @return
+ *	-1 means some error happens(NULL pointer or open failure)
+ *	0  means the module not loaded
+ *	1  means the module loaded
+ */
+int rte_eal_check_module(const char *module_name);
+
+/**
+ * Get virtual area of specified size from the OS.
+ *
+ * This function is private to the EAL.
+ *
+ * @param requested_addr
+ *   Address where to request address space.
+ * @param size
+ *   Size of requested area.
+ * @param page_sz
+ *   Page size on which to align requested virtual area.
+ * @param flags
+ *   EAL_VIRTUAL_AREA_* flags.
+ * @param mmap_flags
+ *   Extra flags passed directly to mmap().
+ *
+ * @return
+ *   Virtual area address if successful.
+ *   NULL if unsuccessful.
+ */
+
+#define EAL_VIRTUAL_AREA_ADDR_IS_HINT (1 << 0)
+/**< don't fail if cannot get exact requested address. */
+#define EAL_VIRTUAL_AREA_ALLOW_SHRINK (1 << 1)
+/**< try getting smaller sized (decrement by page size) virtual areas if cannot
+ * get area of requested size.
+ */
+#define EAL_VIRTUAL_AREA_UNMAP (1 << 2)
+/**< immediately unmap reserved virtual area. */
+void *
+eal_get_virtual_area(void *requested_addr, size_t *size,
+		size_t page_sz, int flags, int mmap_flags);
+
+/**
+ * Get cpu core_id.
+ *
+ * This function is private to the EAL.
+ */
+unsigned eal_cpu_core_id(unsigned lcore_id);
+
+/**
+ * Check if cpu is present.
+ *
+ * This function is private to the EAL.
+ */
+int eal_cpu_detected(unsigned lcore_id);
+
+/**
+ * Set TSC frequency from precise value or estimation
+ *
+ * This function is private to the EAL.
+ */
+void set_tsc_freq(void);
+
+/**
+ * Get precise TSC frequency from system
+ *
+ * This function is private to the EAL.
+ */
+uint64_t get_tsc_freq(void);
+
+/**
+ * Get TSC frequency if the architecture supports.
+ *
+ * This function is private to the EAL.
+ *
+ * @return
+ *   The number of TSC cycles in one second.
+ *   Returns zero if the architecture support is not available.
+ */
+uint64_t get_tsc_freq_arch(void);
+
+/**
+ * Prepare physical memory mapping
+ * i.e. hugepages on Linux and
+ *      contigmem on BSD.
+ *
+ * This function is private to the EAL.
+ */
+int rte_eal_hugepage_init(void);
+
+/**
+ * Creates memory mapping in secondary process
+ * i.e. hugepages on Linux and
+ *      contigmem on BSD.
+ *
+ * This function is private to the EAL.
+ */
+int rte_eal_hugepage_attach(void);
+
+/**
+ * Find a bus capable of identifying a device.
+ *
+ * @param str
+ *   A device identifier (PCI address, virtual PMD name, ...).
+ *
+ * @return
+ *   A valid bus handle if found.
+ *   NULL if no bus is able to parse this device.
+ */
+struct rte_bus *rte_bus_find_by_device_name(const char *str);
+
+/**
+ * Create the unix channel for primary/secondary communication.
+ *
+ * @return
+ *   0 on success;
+ *   (<0) on failure.
+ */
+int rte_mp_channel_init(void);
+
+/**
+ * Primary/secondary communication cleanup.
+ */
+void rte_mp_channel_cleanup(void);
+
+/**
+ * @internal
+ * Parse a device string and store its information in an
+ * rte_devargs structure.
+ *
+ * A device description is split by layers of abstraction of the device:
+ * bus, class and driver. Each layer will offer a set of properties that
+ * can be applied either to configure or recognize a device.
+ *
+ * This function will parse those properties and prepare the rte_devargs
+ * to be given to each layers for processing.
+ *
+ * Note: if the "data" field of the devargs points to devstr,
+ * then no dynamic allocation is performed and the rte_devargs
+ * can be safely discarded.
+ *
+ * Otherwise ``data`` will hold a workable copy of devstr, that will be
+ * used by layers descriptors within rte_devargs. In this case,
+ * any rte_devargs should be cleaned-up before being freed.
+ *
+ * @param da
+ *   rte_devargs structure to fill.
+ *
+ * @param devstr
+ *   Device string.
+ *
+ * @return
+ *   0 on success.
+ *   Negative errno values on error (rte_errno is set).
+ */
+int
+rte_devargs_layers_parse(struct rte_devargs *devargs,
+			 const char *devstr);
+
+/*
+ * probe a device at local process.
+ *
+ * @param devargs
+ *   Device arguments including bus, class and driver properties.
+ * @param new_dev
+ *   new device be probed as output.
+ * @return
+ *   0 on success, negative on error.
+ */
+int local_dev_probe(const char *devargs, struct rte_device **new_dev);
+
+/**
+ * Hotplug remove a given device from a specific bus at local process.
+ *
+ * @param dev
+ *   Data structure of the device to remove.
+ * @return
+ *   0 on success, negative on error.
+ */
+int local_dev_remove(struct rte_device *dev);
+
+/**
+ * Iterate over all buses to find the corresponding bus to handle the sigbus
+ * error.
+ * @param failure_addr
+ *	Pointer of the fault address of the sigbus error.
+ *
+ * @return
+ *	 0 success to handle the sigbus.
+ *	-1 failed to handle the sigbus
+ *	 1 no bus can handler the sigbus
+ */
+int rte_bus_sigbus_handler(const void *failure_addr);
+
+/**
+ * @internal
+ * Register the sigbus handler.
+ *
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+dev_sigbus_handler_register(void);
+
+/**
+ * @internal
+ * Unregister the sigbus handler.
+ *
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+dev_sigbus_handler_unregister(void);
+
+/**
+ * Get OS-specific EAL mapping base address.
+ */
+uint64_t
+eal_get_baseaddr(void);
+
+void *
+eal_malloc_no_trace(const char *type, size_t size, unsigned int align);
+
+void eal_free_no_trace(void *addr);
+
+#endif /* _EAL_PRIVATE_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_thread.h b/src/spdk/dpdk/lib/librte_eal/common/eal_thread.h
new file mode 100644
index 000000000..b40ed249e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_thread.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef EAL_THREAD_H
+#define EAL_THREAD_H
+
+#include <rte_lcore.h>
+
+/**
+ * basic loop of thread, called for each thread by eal_init().
+ *
+ * @param arg
+ *   opaque pointer
+ */
+__rte_noreturn void *eal_thread_loop(void *arg);
+
+/**
+ * Init per-lcore info for master thread
+ *
+ * @param lcore_id
+ *   identifier of master lcore
+ */
+void eal_thread_init_master(unsigned lcore_id);
+
+/**
+ * Get the NUMA socket id from cpu id.
+ * This function is private to EAL.
+ *
+ * @param cpu_id
+ *   The logical process id.
+ * @return
+ *   socket_id or SOCKET_ID_ANY
+ */
+unsigned eal_cpu_socket_id(unsigned cpu_id);
+
+/**
+ * Default buffer size to use with eal_thread_dump_affinity()
+ */
+#define RTE_CPU_AFFINITY_STR_LEN            256
+
+/**
+ * Dump the current pthread cpuset.
+ * This function is private to EAL.
+ *
+ * Note:
+ *   If the dump size is greater than the size of given buffer,
+ *   the string will be truncated and with '\0' at the end.
+ *
+ * @param str
+ *   The string buffer the cpuset will dump to.
+ * @param size
+ *   The string buffer size.
+ * @return
+ *   0 for success, -1 if truncation happens.
+ */
+int
+eal_thread_dump_affinity(char *str, unsigned size);
+
+#endif /* EAL_THREAD_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_trace.h b/src/spdk/dpdk/lib/librte_eal/common/eal_trace.h
new file mode 100644
index 000000000..8f6061615
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_trace.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#ifndef __EAL_TRACE_H
+#define __EAL_TRACE_H
+
+#include <rte_cycles.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_spinlock.h>
+#include <rte_trace.h>
+#include <rte_trace_point.h>
+#include <rte_uuid.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+#define trace_err(fmt, args...) \
+	RTE_LOG(ERR, EAL, "%s():%u " fmt "\n", __func__, __LINE__, ## args)
+
+#define trace_crit(fmt, args...) \
+	RTE_LOG(CRIT, EAL, "%s():%u " fmt "\n", __func__, __LINE__, ## args)
+
+#define TRACE_PREFIX_LEN 12
+#define TRACE_DIR_STR_LEN (sizeof("YYYY-mm-dd-AM-HH-MM-SS") + TRACE_PREFIX_LEN)
+#define TRACE_CTF_FIELD_SIZE 384
+#define TRACE_POINT_NAME_SIZE 64
+#define TRACE_CTF_MAGIC 0xC1FC1FC1
+#define TRACE_MAX_ARGS	32
+
+struct trace_point {
+	STAILQ_ENTRY(trace_point) next;
+	rte_trace_point_t *handle;
+	char name[TRACE_POINT_NAME_SIZE];
+	char ctf_field[TRACE_CTF_FIELD_SIZE];
+};
+
+enum trace_area_e {
+	TRACE_AREA_HEAP,
+	TRACE_AREA_HUGEPAGE,
+};
+
+struct thread_mem_meta {
+	void *mem;
+	enum trace_area_e area;
+};
+
+struct trace_arg {
+	STAILQ_ENTRY(trace_arg) next;
+	char *val;
+};
+
+struct trace {
+	char dir[PATH_MAX];
+	int dir_offset;
+	int register_errno;
+	bool status;
+	enum rte_trace_mode mode;
+	rte_uuid_t uuid;
+	uint32_t buff_len;
+	STAILQ_HEAD(, trace_arg) args;
+	uint32_t nb_trace_points;
+	uint32_t nb_trace_mem_list;
+	struct thread_mem_meta *lcore_meta;
+	uint64_t epoch_sec;
+	uint64_t epoch_nsec;
+	uint64_t uptime_ticks;
+	char *ctf_meta;
+	uint32_t ctf_meta_offset_freq;
+	uint32_t ctf_meta_offset_freq_off_s;
+	uint32_t ctf_meta_offset_freq_off;
+	uint16_t ctf_fixup_done;
+	rte_spinlock_t lock;
+};
+
+/* Helper functions */
+static inline uint16_t
+trace_id_get(rte_trace_point_t *trace)
+{
+	return (*trace & __RTE_TRACE_FIELD_ID_MASK) >>
+		__RTE_TRACE_FIELD_ID_SHIFT;
+}
+
+static inline size_t
+trace_mem_sz(uint32_t len)
+{
+	return len + sizeof(struct __rte_trace_header);
+}
+
+/* Trace object functions */
+struct trace *trace_obj_get(void);
+
+/* Trace point list functions */
+STAILQ_HEAD(trace_point_head, trace_point);
+struct trace_point_head *trace_list_head_get(void);
+
+/* Util functions */
+const char *trace_mode_to_string(enum rte_trace_mode mode);
+const char *trace_area_to_string(enum trace_area_e area);
+int trace_args_apply(const char *arg);
+void trace_bufsz_args_apply(void);
+bool trace_has_duplicate_entry(void);
+void trace_uuid_generate(void);
+int trace_metadata_create(void);
+void trace_metadata_destroy(void);
+int trace_mkdir(void);
+int trace_epoch_time_save(void);
+void trace_mem_per_thread_free(void);
+
+/* EAL interface */
+int eal_trace_init(void);
+void eal_trace_fini(void);
+int eal_trace_args_save(const char *val);
+void eal_trace_args_free(void);
+int eal_trace_dir_args_save(const char *val);
+int eal_trace_mode_args_save(const char *val);
+int eal_trace_bufsz_args_save(const char *val);
+
+#endif /* __EAL_TRACE_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.c b/src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.c
new file mode 100644
index 000000000..ee791903b
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.c
@@ -0,0 +1,465 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#include <string.h>
+
+#include <rte_eal.h>
+#include <rte_errno.h>
+#include <rte_alarm.h>
+#include <rte_string_fns.h>
+#include <rte_devargs.h>
+
+#include "hotplug_mp.h"
+#include "eal_private.h"
+
+#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */
+
+struct mp_reply_bundle {
+	struct rte_mp_msg msg;
+	void *peer;
+};
+
+static int cmp_dev_name(const struct rte_device *dev, const void *_name)
+{
+	const char *name = _name;
+
+	return strcmp(dev->name, name);
+}
+
+/**
+ * Secondary to primary request.
+ * start from function eal_dev_hotplug_request_to_primary.
+ *
+ * device attach on secondary:
+ * a) secondary send sync request to the primary.
+ * b) primary receive the request and attach the new device if
+ *    failed goto i).
+ * c) primary forward attach sync request to all secondary.
+ * d) secondary receive the request and attach the device and send a reply.
+ * e) primary check the reply if all success goes to j).
+ * f) primary send attach rollback sync request to all secondary.
+ * g) secondary receive the request and detach the device and send a reply.
+ * h) primary receive the reply and detach device as rollback action.
+ * i) send attach fail to secondary as a reply of step a), goto k).
+ * j) send attach success to secondary as a reply of step a).
+ * k) secondary receive reply and return.
+ *
+ * device detach on secondary:
+ * a) secondary send sync request to the primary.
+ * b) primary send detach sync request to all secondary.
+ * c) secondary detach the device and send a reply.
+ * d) primary check the reply if all success goes to g).
+ * e) primary send detach rollback sync request to all secondary.
+ * f) secondary receive the request and attach back device. goto h).
+ * g) primary detach the device if success goto i), else goto e).
+ * h) primary send detach fail to secondary as a reply of step a), goto j).
+ * i) primary send detach success to secondary as a reply of step a).
+ * j) secondary receive reply and return.
+ */
+
+static int
+send_response_to_secondary(const struct eal_dev_mp_req *req,
+			int result,
+			const void *peer)
+{
+	struct rte_mp_msg mp_resp;
+	struct eal_dev_mp_req *resp =
+		(struct eal_dev_mp_req *)mp_resp.param;
+	int ret;
+
+	memset(&mp_resp, 0, sizeof(mp_resp));
+	mp_resp.len_param = sizeof(*resp);
+	strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+	memcpy(resp, req, sizeof(*req));
+	resp->result = result;
+
+	ret = rte_mp_reply(&mp_resp, peer);
+	if (ret != 0)
+		RTE_LOG(ERR, EAL, "failed to send response to secondary\n");
+
+	return ret;
+}
+
+static void
+__handle_secondary_request(void *param)
+{
+	struct mp_reply_bundle *bundle = param;
+		const struct rte_mp_msg *msg = &bundle->msg;
+	const struct eal_dev_mp_req *req =
+		(const struct eal_dev_mp_req *)msg->param;
+	struct eal_dev_mp_req tmp_req;
+	struct rte_devargs da;
+	struct rte_device *dev;
+	struct rte_bus *bus;
+	int ret = 0;
+
+	tmp_req = *req;
+
+	if (req->t == EAL_DEV_REQ_TYPE_ATTACH) {
+		ret = local_dev_probe(req->devargs, &dev);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL, "Failed to hotplug add device on primary\n");
+			if (ret != -EEXIST)
+				goto finish;
+		}
+		ret = eal_dev_hotplug_request_to_secondary(&tmp_req);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL, "Failed to send hotplug request to secondary\n");
+			ret = -ENOMSG;
+			goto rollback;
+		}
+		if (tmp_req.result != 0) {
+			ret = tmp_req.result;
+			RTE_LOG(ERR, EAL, "Failed to hotplug add device on secondary\n");
+			if (ret != -EEXIST)
+				goto rollback;
+		}
+	} else if (req->t == EAL_DEV_REQ_TYPE_DETACH) {
+		ret = rte_devargs_parse(&da, req->devargs);
+		if (ret != 0)
+			goto finish;
+		free(da.args); /* we don't need those */
+		da.args = NULL;
+
+		ret = eal_dev_hotplug_request_to_secondary(&tmp_req);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL, "Failed to send hotplug request to secondary\n");
+			ret = -ENOMSG;
+			goto rollback;
+		}
+
+		bus = rte_bus_find_by_name(da.bus->name);
+		if (bus == NULL) {
+			RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", da.bus->name);
+			ret = -ENOENT;
+			goto finish;
+		}
+
+		dev = bus->find_device(NULL, cmp_dev_name, da.name);
+		if (dev == NULL) {
+			RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", da.name);
+			ret = -ENOENT;
+			goto finish;
+		}
+
+		if (tmp_req.result != 0) {
+			RTE_LOG(ERR, EAL, "Failed to hotplug remove device on secondary\n");
+			ret = tmp_req.result;
+			if (ret != -ENOENT)
+				goto rollback;
+		}
+
+		ret = local_dev_remove(dev);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL, "Failed to hotplug remove device on primary\n");
+			if (ret != -ENOENT)
+				goto rollback;
+		}
+	} else {
+		RTE_LOG(ERR, EAL, "unsupported secondary to primary request\n");
+		ret = -ENOTSUP;
+	}
+	goto finish;
+
+rollback:
+	if (req->t == EAL_DEV_REQ_TYPE_ATTACH) {
+		tmp_req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK;
+		eal_dev_hotplug_request_to_secondary(&tmp_req);
+		local_dev_remove(dev);
+	} else {
+		tmp_req.t = EAL_DEV_REQ_TYPE_DETACH_ROLLBACK;
+		eal_dev_hotplug_request_to_secondary(&tmp_req);
+	}
+
+finish:
+	ret = send_response_to_secondary(&tmp_req, ret, bundle->peer);
+	if (ret)
+		RTE_LOG(ERR, EAL, "failed to send response to secondary\n");
+
+	free(bundle->peer);
+	free(bundle);
+}
+
+static int
+handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+	struct mp_reply_bundle *bundle;
+	const struct eal_dev_mp_req *req =
+		(const struct eal_dev_mp_req *)msg->param;
+	int ret = 0;
+
+	bundle = malloc(sizeof(*bundle));
+	if (bundle == NULL) {
+		RTE_LOG(ERR, EAL, "not enough memory\n");
+		return send_response_to_secondary(req, -ENOMEM, peer);
+	}
+
+	bundle->msg = *msg;
+	/**
+	 * We need to send reply on interrupt thread, but peer can't be
+	 * parsed directly, so this is a temporal hack, need to be fixed
+	 * when it is ready.
+	 */
+	bundle->peer = strdup(peer);
+	if (bundle->peer == NULL) {
+		free(bundle);
+		RTE_LOG(ERR, EAL, "not enough memory\n");
+		return send_response_to_secondary(req, -ENOMEM, peer);
+	}
+
+	/**
+	 * We are at IPC callback thread, sync IPC is not allowed due to
+	 * dead lock, so we delegate the task to interrupt thread.
+	 */
+	ret = rte_eal_alarm_set(1, __handle_secondary_request, bundle);
+	if (ret != 0) {
+		RTE_LOG(ERR, EAL, "failed to add mp task\n");
+		free(bundle->peer);
+		free(bundle);
+		return send_response_to_secondary(req, ret, peer);
+	}
+	return 0;
+}
+
+static void __handle_primary_request(void *param)
+{
+	struct mp_reply_bundle *bundle = param;
+	struct rte_mp_msg *msg = &bundle->msg;
+	const struct eal_dev_mp_req *req =
+		(const struct eal_dev_mp_req *)msg->param;
+	struct rte_mp_msg mp_resp;
+	struct eal_dev_mp_req *resp =
+		(struct eal_dev_mp_req *)mp_resp.param;
+	struct rte_devargs *da;
+	struct rte_device *dev;
+	struct rte_bus *bus;
+	int ret = 0;
+
+	memset(&mp_resp, 0, sizeof(mp_resp));
+
+	switch (req->t) {
+	case EAL_DEV_REQ_TYPE_ATTACH:
+	case EAL_DEV_REQ_TYPE_DETACH_ROLLBACK:
+		ret = local_dev_probe(req->devargs, &dev);
+		break;
+	case EAL_DEV_REQ_TYPE_DETACH:
+	case EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK:
+		da = calloc(1, sizeof(*da));
+		if (da == NULL) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		ret = rte_devargs_parse(da, req->devargs);
+		if (ret != 0)
+			goto quit;
+
+		bus = rte_bus_find_by_name(da->bus->name);
+		if (bus == NULL) {
+			RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", da->bus->name);
+			ret = -ENOENT;
+			goto quit;
+		}
+
+		dev = bus->find_device(NULL, cmp_dev_name, da->name);
+		if (dev == NULL) {
+			RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", da->name);
+			ret = -ENOENT;
+			goto quit;
+		}
+
+		if (!rte_dev_is_probed(dev)) {
+			if (req->t == EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK) {
+				/**
+				 * Don't fail the rollback just because there's
+				 * nothing to do.
+				 */
+				ret = 0;
+			} else
+				ret = -ENODEV;
+
+			goto quit;
+		}
+
+		ret = local_dev_remove(dev);
+quit:
+		free(da->args);
+		free(da);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+	mp_resp.len_param = sizeof(*req);
+	memcpy(resp, req, sizeof(*resp));
+	resp->result = ret;
+	if (rte_mp_reply(&mp_resp, bundle->peer) < 0)
+		RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+
+	free(bundle->peer);
+	free(bundle);
+}
+
+static int
+handle_primary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+	struct rte_mp_msg mp_resp;
+	const struct eal_dev_mp_req *req =
+		(const struct eal_dev_mp_req *)msg->param;
+	struct eal_dev_mp_req *resp =
+		(struct eal_dev_mp_req *)mp_resp.param;
+	struct mp_reply_bundle *bundle;
+	int ret = 0;
+
+	memset(&mp_resp, 0, sizeof(mp_resp));
+	strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+	mp_resp.len_param = sizeof(*req);
+	memcpy(resp, req, sizeof(*resp));
+
+	bundle = calloc(1, sizeof(*bundle));
+	if (bundle == NULL) {
+		RTE_LOG(ERR, EAL, "not enough memory\n");
+		resp->result = -ENOMEM;
+		ret = rte_mp_reply(&mp_resp, peer);
+		if (ret)
+			RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+		return ret;
+	}
+
+	bundle->msg = *msg;
+	/**
+	 * We need to send reply on interrupt thread, but peer can't be
+	 * parsed directly, so this is a temporal hack, need to be fixed
+	 * when it is ready.
+	 */
+	bundle->peer = (void *)strdup(peer);
+	if (bundle->peer == NULL) {
+		RTE_LOG(ERR, EAL, "not enough memory\n");
+		free(bundle);
+		resp->result = -ENOMEM;
+		ret = rte_mp_reply(&mp_resp, peer);
+		if (ret)
+			RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+		return ret;
+	}
+
+	/**
+	 * We are at IPC callback thread, sync IPC is not allowed due to
+	 * dead lock, so we delegate the task to interrupt thread.
+	 */
+	ret = rte_eal_alarm_set(1, __handle_primary_request, bundle);
+	if (ret != 0) {
+		free(bundle->peer);
+		free(bundle);
+		resp->result = ret;
+		ret = rte_mp_reply(&mp_resp, peer);
+		if  (ret != 0) {
+			RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+			return ret;
+		}
+	}
+	return 0;
+}
+
+int eal_dev_hotplug_request_to_primary(struct eal_dev_mp_req *req)
+{
+	struct rte_mp_msg mp_req;
+	struct rte_mp_reply mp_reply;
+	struct timespec ts = {.tv_sec = MP_TIMEOUT_S, .tv_nsec = 0};
+	struct eal_dev_mp_req *resp;
+	int ret;
+
+	memset(&mp_req, 0, sizeof(mp_req));
+	memcpy(mp_req.param, req, sizeof(*req));
+	mp_req.len_param = sizeof(*req);
+	strlcpy(mp_req.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_req.name));
+
+	ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
+	if (ret || mp_reply.nb_received != 1) {
+		RTE_LOG(ERR, EAL, "Cannot send request to primary\n");
+		if (!ret)
+			return -1;
+		return ret;
+	}
+
+	resp = (struct eal_dev_mp_req *)mp_reply.msgs[0].param;
+	req->result = resp->result;
+
+	free(mp_reply.msgs);
+	return ret;
+}
+
+int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req)
+{
+	struct rte_mp_msg mp_req;
+	struct rte_mp_reply mp_reply;
+	struct timespec ts = {.tv_sec = MP_TIMEOUT_S, .tv_nsec = 0};
+	int ret;
+	int i;
+
+	memset(&mp_req, 0, sizeof(mp_req));
+	memcpy(mp_req.param, req, sizeof(*req));
+	mp_req.len_param = sizeof(*req);
+	strlcpy(mp_req.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_req.name));
+
+	ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
+	if (ret != 0) {
+		/* if IPC is not supported, behave as if the call succeeded */
+		if (rte_errno != ENOTSUP)
+			RTE_LOG(ERR, EAL, "rte_mp_request_sync failed\n");
+		else
+			ret = 0;
+		return ret;
+	}
+
+	if (mp_reply.nb_sent != mp_reply.nb_received) {
+		RTE_LOG(ERR, EAL, "not all secondary reply\n");
+		free(mp_reply.msgs);
+		return -1;
+	}
+
+	req->result = 0;
+	for (i = 0; i < mp_reply.nb_received; i++) {
+		struct eal_dev_mp_req *resp =
+			(struct eal_dev_mp_req *)mp_reply.msgs[i].param;
+		if (resp->result != 0) {
+			if (req->t == EAL_DEV_REQ_TYPE_ATTACH &&
+				resp->result == -EEXIST)
+				continue;
+			if (req->t == EAL_DEV_REQ_TYPE_DETACH &&
+				resp->result == -ENOENT)
+				continue;
+			req->result = resp->result;
+		}
+	}
+
+	free(mp_reply.msgs);
+	return 0;
+}
+
+int eal_mp_dev_hotplug_init(void)
+{
+	int ret;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		ret = rte_mp_action_register(EAL_DEV_MP_ACTION_REQUEST,
+					handle_secondary_request);
+		/* primary is allowed to not support IPC */
+		if (ret != 0 && rte_errno != ENOTSUP) {
+			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+				EAL_DEV_MP_ACTION_REQUEST);
+			return ret;
+		}
+	} else {
+		ret = rte_mp_action_register(EAL_DEV_MP_ACTION_REQUEST,
+					handle_primary_request);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+				EAL_DEV_MP_ACTION_REQUEST);
+			return ret;
+		}
+	}
+
+	return 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.h b/src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.h
new file mode 100644
index 000000000..8fcf9b52e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _HOTPLUG_MP_H_
+#define _HOTPLUG_MP_H_
+
+#include "rte_dev.h"
+#include "rte_bus.h"
+
+#define EAL_DEV_MP_ACTION_REQUEST      "eal_dev_mp_request"
+#define EAL_DEV_MP_ACTION_RESPONSE     "eal_dev_mp_response"
+
+#define EAL_DEV_MP_DEV_NAME_MAX_LEN RTE_DEV_NAME_MAX_LEN
+#define EAL_DEV_MP_BUS_NAME_MAX_LEN 32
+#define EAL_DEV_MP_DEV_ARGS_MAX_LEN 128
+
+enum eal_dev_req_type {
+	EAL_DEV_REQ_TYPE_ATTACH,
+	EAL_DEV_REQ_TYPE_DETACH,
+	EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK,
+	EAL_DEV_REQ_TYPE_DETACH_ROLLBACK,
+};
+
+struct eal_dev_mp_req {
+	enum eal_dev_req_type t;
+	char devargs[EAL_DEV_MP_DEV_ARGS_MAX_LEN];
+	int result;
+};
+
+/**
+ * Register all mp action callbacks for hotplug.
+ *
+ * @return
+ *   0 on success, negative on error.
+ */
+int
+eal_mp_dev_hotplug_init(void);
+
+/**
+ * This is a synchronous wrapper for secondary process send
+ * request to primary process, this is invoked when an attach
+ * or detach request is issued from primary process.
+ */
+int eal_dev_hotplug_request_to_primary(struct eal_dev_mp_req *req);
+
+/**
+ * this is a synchronous wrapper for primary process send
+ * request to secondary process, this is invoked when an attach
+ * or detach request issued from secondary process.
+ */
+int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req);
+
+
+#endif /* _HOTPLUG_MP_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.c b/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.c
new file mode 100644
index 000000000..51cdfc5d5
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.c
@@ -0,0 +1,682 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+#include <inttypes.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_debug.h>
+#include <rte_common.h>
+#include <rte_spinlock.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
+#include "malloc_elem.h"
+#include "malloc_heap.h"
+
+/*
+ * If debugging is enabled, freed memory is set to poison value
+ * to catch buggy programs. Otherwise, freed memory is set to zero
+ * to avoid having to zero in zmalloc
+ */
+#ifdef RTE_MALLOC_DEBUG
+#define MALLOC_POISON	       0x6b
+#else
+#define MALLOC_POISON	       0
+#endif
+
+size_t
+malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
+{
+	void *cur_page, *contig_seg_start, *page_end, *cur_seg_end;
+	void *data_start, *data_end;
+	rte_iova_t expected_iova;
+	struct rte_memseg *ms;
+	size_t page_sz, cur, max;
+
+	page_sz = (size_t)elem->msl->page_sz;
+	data_start = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
+	data_end = RTE_PTR_ADD(elem, elem->size - MALLOC_ELEM_TRAILER_LEN);
+	/* segment must start after header and with specified alignment */
+	contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align);
+
+	/* return if aligned address is already out of malloc element */
+	if (contig_seg_start > data_end)
+		return 0;
+
+	/* if we're in IOVA as VA mode, or if we're in legacy mode with
+	 * hugepages, all elements are IOVA-contiguous. however, we can only
+	 * make these assumptions about internal memory - externally allocated
+	 * segments have to be checked.
+	 */
+	if (!elem->msl->external &&
+			(rte_eal_iova_mode() == RTE_IOVA_VA ||
+				(internal_config.legacy_mem &&
+					rte_eal_has_hugepages())))
+		return RTE_PTR_DIFF(data_end, contig_seg_start);
+
+	cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz);
+	ms = rte_mem_virt2memseg(cur_page, elem->msl);
+
+	/* do first iteration outside the loop */
+	page_end = RTE_PTR_ADD(cur_page, page_sz);
+	cur_seg_end = RTE_MIN(page_end, data_end);
+	cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start) -
+			MALLOC_ELEM_TRAILER_LEN;
+	max = cur;
+	expected_iova = ms->iova + page_sz;
+	/* memsegs are contiguous in memory */
+	ms++;
+
+	cur_page = RTE_PTR_ADD(cur_page, page_sz);
+
+	while (cur_page < data_end) {
+		page_end = RTE_PTR_ADD(cur_page, page_sz);
+		cur_seg_end = RTE_MIN(page_end, data_end);
+
+		/* reset start of contiguous segment if unexpected iova */
+		if (ms->iova != expected_iova) {
+			/* next contiguous segment must start at specified
+			 * alignment.
+			 */
+			contig_seg_start = RTE_PTR_ALIGN(cur_page, align);
+			/* new segment start may be on a different page, so find
+			 * the page and skip to next iteration to make sure
+			 * we're not blowing past data end.
+			 */
+			ms = rte_mem_virt2memseg(contig_seg_start, elem->msl);
+			cur_page = ms->addr;
+			/* don't trigger another recalculation */
+			expected_iova = ms->iova;
+			continue;
+		}
+		/* cur_seg_end ends on a page boundary or on data end. if we're
+		 * looking at data end, then malloc trailer is already included
+		 * in the calculations. if we're looking at page end, then we
+		 * know there's more data past this page and thus there's space
+		 * for malloc element trailer, so don't count it here.
+		 */
+		cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start);
+		/* update max if cur value is bigger */
+		if (cur > max)
+			max = cur;
+
+		/* move to next page */
+		cur_page = page_end;
+		expected_iova = ms->iova + page_sz;
+		/* memsegs are contiguous in memory */
+		ms++;
+	}
+
+	return max;
+}
+
+/*
+ * Initialize a general malloc_elem header structure
+ */
+void
+malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap,
+		struct rte_memseg_list *msl, size_t size,
+		struct malloc_elem *orig_elem, size_t orig_size)
+{
+	elem->heap = heap;
+	elem->msl = msl;
+	elem->prev = NULL;
+	elem->next = NULL;
+	memset(&elem->free_list, 0, sizeof(elem->free_list));
+	elem->state = ELEM_FREE;
+	elem->size = size;
+	elem->pad = 0;
+	elem->orig_elem = orig_elem;
+	elem->orig_size = orig_size;
+	set_header(elem);
+	set_trailer(elem);
+}
+
+void
+malloc_elem_insert(struct malloc_elem *elem)
+{
+	struct malloc_elem *prev_elem, *next_elem;
+	struct malloc_heap *heap = elem->heap;
+
+	/* first and last elements must be both NULL or both non-NULL */
+	if ((heap->first == NULL) != (heap->last == NULL)) {
+		RTE_LOG(ERR, EAL, "Heap is probably corrupt\n");
+		return;
+	}
+
+	if (heap->first == NULL && heap->last == NULL) {
+		/* if empty heap */
+		heap->first = elem;
+		heap->last = elem;
+		prev_elem = NULL;
+		next_elem = NULL;
+	} else if (elem < heap->first) {
+		/* if lower than start */
+		prev_elem = NULL;
+		next_elem = heap->first;
+		heap->first = elem;
+	} else if (elem > heap->last) {
+		/* if higher than end */
+		prev_elem = heap->last;
+		next_elem = NULL;
+		heap->last = elem;
+	} else {
+		/* the new memory is somewhere between start and end */
+		uint64_t dist_from_start, dist_from_end;
+
+		dist_from_end = RTE_PTR_DIFF(heap->last, elem);
+		dist_from_start = RTE_PTR_DIFF(elem, heap->first);
+
+		/* check which is closer, and find closest list entries */
+		if (dist_from_start < dist_from_end) {
+			prev_elem = heap->first;
+			while (prev_elem->next < elem)
+				prev_elem = prev_elem->next;
+			next_elem = prev_elem->next;
+		} else {
+			next_elem = heap->last;
+			while (next_elem->prev > elem)
+				next_elem = next_elem->prev;
+			prev_elem = next_elem->prev;
+		}
+	}
+
+	/* insert new element */
+	elem->prev = prev_elem;
+	elem->next = next_elem;
+	if (prev_elem)
+		prev_elem->next = elem;
+	if (next_elem)
+		next_elem->prev = elem;
+}
+
+/*
+ * Attempt to find enough physically contiguous memory in this block to store
+ * our data. Assume that element has at least enough space to fit in the data,
+ * so we just check the page addresses.
+ */
+static bool
+elem_check_phys_contig(const struct rte_memseg_list *msl,
+		void *start, size_t size)
+{
+	return eal_memalloc_is_contig(msl, start, size);
+}
+
+/*
+ * calculate the starting point of where data of the requested size
+ * and alignment would fit in the current element. If the data doesn't
+ * fit, return NULL.
+ */
+static void *
+elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align,
+		size_t bound, bool contig)
+{
+	size_t elem_size = elem->size;
+
+	/*
+	 * we're allocating from the end, so adjust the size of element by
+	 * alignment size.
+	 */
+	while (elem_size >= size) {
+		const size_t bmask = ~(bound - 1);
+		uintptr_t end_pt = (uintptr_t)elem +
+				elem_size - MALLOC_ELEM_TRAILER_LEN;
+		uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size),
+				align);
+		uintptr_t new_elem_start;
+
+		/* check boundary */
+		if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) {
+			end_pt = RTE_ALIGN_FLOOR(end_pt, bound);
+			new_data_start = RTE_ALIGN_FLOOR((end_pt - size),
+					align);
+			end_pt = new_data_start + size;
+
+			if (((end_pt - 1) & bmask) != (new_data_start & bmask))
+				return NULL;
+		}
+
+		new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN;
+
+		/* if the new start point is before the exist start,
+		 * it won't fit
+		 */
+		if (new_elem_start < (uintptr_t)elem)
+			return NULL;
+
+		if (contig) {
+			size_t new_data_size = end_pt - new_data_start;
+
+			/*
+			 * if physical contiguousness was requested and we
+			 * couldn't fit all data into one physically contiguous
+			 * block, try again with lower addresses.
+			 */
+			if (!elem_check_phys_contig(elem->msl,
+					(void *)new_data_start,
+					new_data_size)) {
+				elem_size -= align;
+				continue;
+			}
+		}
+		return (void *)new_elem_start;
+	}
+	return NULL;
+}
+
+/*
+ * use elem_start_pt to determine if we get meet the size and
+ * alignment request from the current element
+ */
+int
+malloc_elem_can_hold(struct malloc_elem *elem, size_t size,	unsigned align,
+		size_t bound, bool contig)
+{
+	return elem_start_pt(elem, size, align, bound, contig) != NULL;
+}
+
+/*
+ * split an existing element into two smaller elements at the given
+ * split_pt parameter.
+ */
+static void
+split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
+{
+	struct malloc_elem *next_elem = elem->next;
+	const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem;
+	const size_t new_elem_size = elem->size - old_elem_size;
+
+	malloc_elem_init(split_pt, elem->heap, elem->msl, new_elem_size,
+			 elem->orig_elem, elem->orig_size);
+	split_pt->prev = elem;
+	split_pt->next = next_elem;
+	if (next_elem)
+		next_elem->prev = split_pt;
+	else
+		elem->heap->last = split_pt;
+	elem->next = split_pt;
+	elem->size = old_elem_size;
+	set_trailer(elem);
+	if (elem->pad) {
+		/* Update inner padding inner element size. */
+		elem = RTE_PTR_ADD(elem, elem->pad);
+		elem->size = old_elem_size - elem->pad;
+	}
+}
+
+/*
+ * our malloc heap is a doubly linked list, so doubly remove our element.
+ */
+static void __rte_unused
+remove_elem(struct malloc_elem *elem)
+{
+	struct malloc_elem *next, *prev;
+	next = elem->next;
+	prev = elem->prev;
+
+	if (next)
+		next->prev = prev;
+	else
+		elem->heap->last = prev;
+	if (prev)
+		prev->next = next;
+	else
+		elem->heap->first = next;
+
+	elem->prev = NULL;
+	elem->next = NULL;
+}
+
+static int
+next_elem_is_adjacent(struct malloc_elem *elem)
+{
+	return elem->next == RTE_PTR_ADD(elem, elem->size) &&
+			elem->next->msl == elem->msl &&
+			(!internal_config.match_allocations ||
+			 elem->orig_elem == elem->next->orig_elem);
+}
+
+static int
+prev_elem_is_adjacent(struct malloc_elem *elem)
+{
+	return elem == RTE_PTR_ADD(elem->prev, elem->prev->size) &&
+			elem->prev->msl == elem->msl &&
+			(!internal_config.match_allocations ||
+			 elem->orig_elem == elem->prev->orig_elem);
+}
+
+/*
+ * Given an element size, compute its freelist index.
+ * We free an element into the freelist containing similarly-sized elements.
+ * We try to allocate elements starting with the freelist containing
+ * similarly-sized elements, and if necessary, we search freelists
+ * containing larger elements.
+ *
+ * Example element size ranges for a heap with five free lists:
+ *   heap->free_head[0] - (0   , 2^8]
+ *   heap->free_head[1] - (2^8 , 2^10]
+ *   heap->free_head[2] - (2^10 ,2^12]
+ *   heap->free_head[3] - (2^12, 2^14]
+ *   heap->free_head[4] - (2^14, MAX_SIZE]
+ */
+size_t
+malloc_elem_free_list_index(size_t size)
+{
+#define MALLOC_MINSIZE_LOG2   8
+#define MALLOC_LOG2_INCREMENT 2
+
+	size_t log2;
+	size_t index;
+
+	if (size <= (1UL << MALLOC_MINSIZE_LOG2))
+		return 0;
+
+	/* Find next power of 2 >= size. */
+	log2 = sizeof(size) * 8 - __builtin_clzl(size-1);
+
+	/* Compute freelist index, based on log2(size). */
+	index = (log2 - MALLOC_MINSIZE_LOG2 + MALLOC_LOG2_INCREMENT - 1) /
+	        MALLOC_LOG2_INCREMENT;
+
+	return index <= RTE_HEAP_NUM_FREELISTS-1?
+	        index: RTE_HEAP_NUM_FREELISTS-1;
+}
+
+/*
+ * Add the specified element to its heap's free list.
+ */
+void
+malloc_elem_free_list_insert(struct malloc_elem *elem)
+{
+	size_t idx;
+
+	idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN);
+	elem->state = ELEM_FREE;
+	LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list);
+}
+
+/*
+ * Remove the specified element from its heap's free list.
+ */
+void
+malloc_elem_free_list_remove(struct malloc_elem *elem)
+{
+	LIST_REMOVE(elem, free_list);
+}
+
+/*
+ * reserve a block of data in an existing malloc_elem. If the malloc_elem
+ * is much larger than the data block requested, we split the element in two.
+ * This function is only called from malloc_heap_alloc so parameter checking
+ * is not done here, as it's done there previously.
+ */
+struct malloc_elem *
+malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
+		size_t bound, bool contig)
+{
+	struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound,
+			contig);
+	const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem;
+	const size_t trailer_size = elem->size - old_elem_size - size -
+		MALLOC_ELEM_OVERHEAD;
+
+	malloc_elem_free_list_remove(elem);
+
+	if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+		/* split it, too much free space after elem */
+		struct malloc_elem *new_free_elem =
+				RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD);
+
+		split_elem(elem, new_free_elem);
+		malloc_elem_free_list_insert(new_free_elem);
+
+		if (elem == elem->heap->last)
+			elem->heap->last = new_free_elem;
+	}
+
+	if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+		/* don't split it, pad the element instead */
+		elem->state = ELEM_BUSY;
+		elem->pad = old_elem_size;
+
+		/* put a dummy header in padding, to point to real element header */
+		if (elem->pad > 0) { /* pad will be at least 64-bytes, as everything
+		                     * is cache-line aligned */
+			new_elem->pad = elem->pad;
+			new_elem->state = ELEM_PAD;
+			new_elem->size = elem->size - elem->pad;
+			set_header(new_elem);
+		}
+
+		return new_elem;
+	}
+
+	/* we are going to split the element in two. The original element
+	 * remains free, and the new element is the one allocated.
+	 * Re-insert original element, in case its new size makes it
+	 * belong on a different list.
+	 */
+	split_elem(elem, new_elem);
+	new_elem->state = ELEM_BUSY;
+	malloc_elem_free_list_insert(elem);
+
+	return new_elem;
+}
+
+/*
+ * join two struct malloc_elem together. elem1 and elem2 must
+ * be contiguous in memory.
+ */
+static inline void
+join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
+{
+	struct malloc_elem *next = elem2->next;
+	elem1->size += elem2->size;
+	if (next)
+		next->prev = elem1;
+	else
+		elem1->heap->last = elem1;
+	elem1->next = next;
+	if (elem1->pad) {
+		struct malloc_elem *inner = RTE_PTR_ADD(elem1, elem1->pad);
+		inner->size = elem1->size - elem1->pad;
+	}
+}
+
+struct malloc_elem *
+malloc_elem_join_adjacent_free(struct malloc_elem *elem)
+{
+	/*
+	 * check if next element exists, is adjacent and is free, if so join
+	 * with it, need to remove from free list.
+	 */
+	if (elem->next != NULL && elem->next->state == ELEM_FREE &&
+			next_elem_is_adjacent(elem)) {
+		void *erase;
+		size_t erase_len;
+
+		/* we will want to erase the trailer and header */
+		erase = RTE_PTR_SUB(elem->next, MALLOC_ELEM_TRAILER_LEN);
+		erase_len = MALLOC_ELEM_OVERHEAD + elem->next->pad;
+
+		/* remove from free list, join to this one */
+		malloc_elem_free_list_remove(elem->next);
+		join_elem(elem, elem->next);
+
+		/* erase header, trailer and pad */
+		memset(erase, MALLOC_POISON, erase_len);
+	}
+
+	/*
+	 * check if prev element exists, is adjacent and is free, if so join
+	 * with it, need to remove from free list.
+	 */
+	if (elem->prev != NULL && elem->prev->state == ELEM_FREE &&
+			prev_elem_is_adjacent(elem)) {
+		struct malloc_elem *new_elem;
+		void *erase;
+		size_t erase_len;
+
+		/* we will want to erase trailer and header */
+		erase = RTE_PTR_SUB(elem, MALLOC_ELEM_TRAILER_LEN);
+		erase_len = MALLOC_ELEM_OVERHEAD + elem->pad;
+
+		/* remove from free list, join to this one */
+		malloc_elem_free_list_remove(elem->prev);
+
+		new_elem = elem->prev;
+		join_elem(new_elem, elem);
+
+		/* erase header, trailer and pad */
+		memset(erase, MALLOC_POISON, erase_len);
+
+		elem = new_elem;
+	}
+
+	return elem;
+}
+
+/*
+ * free a malloc_elem block by adding it to the free list. If the
+ * blocks either immediately before or immediately after newly freed block
+ * are also free, the blocks are merged together.
+ */
+struct malloc_elem *
+malloc_elem_free(struct malloc_elem *elem)
+{
+	void *ptr;
+	size_t data_len;
+
+	ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
+	data_len = elem->size - MALLOC_ELEM_OVERHEAD;
+
+	elem = malloc_elem_join_adjacent_free(elem);
+
+	malloc_elem_free_list_insert(elem);
+
+	elem->pad = 0;
+
+	/* decrease heap's count of allocated elements */
+	elem->heap->alloc_count--;
+
+	/* poison memory */
+	memset(ptr, MALLOC_POISON, data_len);
+
+	return elem;
+}
+
+/* assume all checks were already done */
+void
+malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len)
+{
+	struct malloc_elem *hide_start, *hide_end, *prev, *next;
+	size_t len_before, len_after;
+
+	hide_start = start;
+	hide_end = RTE_PTR_ADD(start, len);
+
+	prev = elem->prev;
+	next = elem->next;
+
+	/* we cannot do anything with non-adjacent elements */
+	if (next && next_elem_is_adjacent(elem)) {
+		len_after = RTE_PTR_DIFF(next, hide_end);
+		if (len_after >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+			/* split after */
+			split_elem(elem, hide_end);
+
+			malloc_elem_free_list_insert(hide_end);
+		} else if (len_after > 0) {
+			RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n");
+			return;
+		}
+	}
+
+	/* we cannot do anything with non-adjacent elements */
+	if (prev && prev_elem_is_adjacent(elem)) {
+		len_before = RTE_PTR_DIFF(hide_start, elem);
+		if (len_before >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+			/* split before */
+			split_elem(elem, hide_start);
+
+			prev = elem;
+			elem = hide_start;
+
+			malloc_elem_free_list_insert(prev);
+		} else if (len_before > 0) {
+			RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n");
+			return;
+		}
+	}
+
+	remove_elem(elem);
+}
+
+/*
+ * attempt to resize a malloc_elem by expanding into any free space
+ * immediately after it in memory.
+ */
+int
+malloc_elem_resize(struct malloc_elem *elem, size_t size)
+{
+	const size_t new_size = size + elem->pad + MALLOC_ELEM_OVERHEAD;
+
+	/* if we request a smaller size, then always return ok */
+	if (elem->size >= new_size)
+		return 0;
+
+	/* check if there is a next element, it's free and adjacent */
+	if (!elem->next || elem->next->state != ELEM_FREE ||
+			!next_elem_is_adjacent(elem))
+		return -1;
+	if (elem->size + elem->next->size < new_size)
+		return -1;
+
+	/* we now know the element fits, so remove from free list,
+	 * join the two
+	 */
+	malloc_elem_free_list_remove(elem->next);
+	join_elem(elem, elem->next);
+
+	if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
+		/* now we have a big block together. Lets cut it down a bit, by splitting */
+		struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size);
+		split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE);
+		split_elem(elem, split_pt);
+		malloc_elem_free_list_insert(split_pt);
+	}
+	return 0;
+}
+
+static inline const char *
+elem_state_to_str(enum elem_state state)
+{
+	switch (state) {
+	case ELEM_PAD:
+		return "PAD";
+	case ELEM_BUSY:
+		return "BUSY";
+	case ELEM_FREE:
+		return "FREE";
+	}
+	return "ERROR";
+}
+
+void
+malloc_elem_dump(const struct malloc_elem *elem, FILE *f)
+{
+	fprintf(f, "Malloc element at %p (%s)\n", elem,
+			elem_state_to_str(elem->state));
+	fprintf(f, "  len: 0x%zx pad: 0x%" PRIx32 "\n", elem->size, elem->pad);
+	fprintf(f, "  prev: %p next: %p\n", elem->prev, elem->next);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.h b/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.h
new file mode 100644
index 000000000..a1e5f7f02
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef MALLOC_ELEM_H_
+#define MALLOC_ELEM_H_
+
+#include <stdbool.h>
+
+#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE)
+
+/* dummy definition of struct so we can use pointers to it in malloc_elem struct */
+struct malloc_heap;
+
+enum elem_state {
+	ELEM_FREE = 0,
+	ELEM_BUSY,
+	ELEM_PAD  /* element is a padding-only header */
+};
+
+struct malloc_elem {
+	struct malloc_heap *heap;
+	struct malloc_elem *volatile prev;
+	/**< points to prev elem in memseg */
+	struct malloc_elem *volatile next;
+	/**< points to next elem in memseg */
+	LIST_ENTRY(malloc_elem) free_list;
+	/**< list of free elements in heap */
+	struct rte_memseg_list *msl;
+	volatile enum elem_state state;
+	uint32_t pad;
+	size_t size;
+	struct malloc_elem *orig_elem;
+	size_t orig_size;
+#ifdef RTE_MALLOC_DEBUG
+	uint64_t header_cookie;         /* Cookie marking start of data */
+	                                /* trailer cookie at start + size */
+#endif
+} __rte_cache_aligned;
+
+#ifndef RTE_MALLOC_DEBUG
+static const unsigned MALLOC_ELEM_TRAILER_LEN = 0;
+
+/* dummy function - just check if pointer is non-null */
+static inline int
+malloc_elem_cookies_ok(const struct malloc_elem *elem){ return elem != NULL; }
+
+/* dummy function - no header if malloc_debug is not enabled */
+static inline void
+set_header(struct malloc_elem *elem __rte_unused){ }
+
+/* dummy function - no trailer if malloc_debug is not enabled */
+static inline void
+set_trailer(struct malloc_elem *elem __rte_unused){ }
+
+
+#else
+static const unsigned MALLOC_ELEM_TRAILER_LEN = RTE_CACHE_LINE_SIZE;
+
+#define MALLOC_HEADER_COOKIE   0xbadbadbadadd2e55ULL /**< Header cookie. */
+#define MALLOC_TRAILER_COOKIE  0xadd2e55badbadbadULL /**< Trailer cookie.*/
+
+/* define macros to make referencing the header and trailer cookies easier */
+#define MALLOC_ELEM_TRAILER(elem) (*((uint64_t*)RTE_PTR_ADD(elem, \
+		elem->size - MALLOC_ELEM_TRAILER_LEN)))
+#define MALLOC_ELEM_HEADER(elem) (elem->header_cookie)
+
+static inline void
+set_header(struct malloc_elem *elem)
+{
+	if (elem != NULL)
+		MALLOC_ELEM_HEADER(elem) = MALLOC_HEADER_COOKIE;
+}
+
+static inline void
+set_trailer(struct malloc_elem *elem)
+{
+	if (elem != NULL)
+		MALLOC_ELEM_TRAILER(elem) = MALLOC_TRAILER_COOKIE;
+}
+
+/* check that the header and trailer cookies are set correctly */
+static inline int
+malloc_elem_cookies_ok(const struct malloc_elem *elem)
+{
+	return elem != NULL &&
+			MALLOC_ELEM_HEADER(elem) == MALLOC_HEADER_COOKIE &&
+			MALLOC_ELEM_TRAILER(elem) == MALLOC_TRAILER_COOKIE;
+}
+
+#endif
+
+static const unsigned MALLOC_ELEM_HEADER_LEN = sizeof(struct malloc_elem);
+#define MALLOC_ELEM_OVERHEAD (MALLOC_ELEM_HEADER_LEN + MALLOC_ELEM_TRAILER_LEN)
+
+/*
+ * Given a pointer to the start of a memory block returned by malloc, get
+ * the actual malloc_elem header for that block.
+ */
+static inline struct malloc_elem *
+malloc_elem_from_data(const void *data)
+{
+	if (data == NULL)
+		return NULL;
+
+	struct malloc_elem *elem = RTE_PTR_SUB(data, MALLOC_ELEM_HEADER_LEN);
+	if (!malloc_elem_cookies_ok(elem))
+		return NULL;
+	return elem->state != ELEM_PAD ? elem:  RTE_PTR_SUB(elem, elem->pad);
+}
+
+/*
+ * initialise a malloc_elem header
+ */
+void
+malloc_elem_init(struct malloc_elem *elem,
+		struct malloc_heap *heap,
+		struct rte_memseg_list *msl,
+		size_t size,
+		struct malloc_elem *orig_elem,
+		size_t orig_size);
+
+void
+malloc_elem_insert(struct malloc_elem *elem);
+
+/*
+ * return true if the current malloc_elem can hold a block of data
+ * of the requested size and with the requested alignment
+ */
+int
+malloc_elem_can_hold(struct malloc_elem *elem, size_t size,
+		unsigned int align, size_t bound, bool contig);
+
+/*
+ * reserve a block of data in an existing malloc_elem. If the malloc_elem
+ * is much larger than the data block requested, we split the element in two.
+ */
+struct malloc_elem *
+malloc_elem_alloc(struct malloc_elem *elem, size_t size,
+		unsigned int align, size_t bound, bool contig);
+
+/*
+ * free a malloc_elem block by adding it to the free list. If the
+ * blocks either immediately before or immediately after newly freed block
+ * are also free, the blocks are merged together.
+ */
+struct malloc_elem *
+malloc_elem_free(struct malloc_elem *elem);
+
+struct malloc_elem *
+malloc_elem_join_adjacent_free(struct malloc_elem *elem);
+
+/*
+ * attempt to resize a malloc_elem by expanding into any free space
+ * immediately after it in memory.
+ */
+int
+malloc_elem_resize(struct malloc_elem *elem, size_t size);
+
+void
+malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len);
+
+void
+malloc_elem_free_list_remove(struct malloc_elem *elem);
+
+/*
+ * dump contents of malloc elem to a file.
+ */
+void
+malloc_elem_dump(const struct malloc_elem *elem, FILE *f);
+
+/*
+ * Given an element size, compute its freelist index.
+ */
+size_t
+malloc_elem_free_list_index(size_t size);
+
+/*
+ * Add element to its heap's free list.
+ */
+void
+malloc_elem_free_list_insert(struct malloc_elem *elem);
+
+/*
+ * Find biggest IOVA-contiguous zone within an element with specified alignment.
+ */
+size_t
+malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align);
+
+#endif /* MALLOC_ELEM_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.c b/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.c
new file mode 100644
index 000000000..bd5065698
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.c
@@ -0,0 +1,1367 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_errno.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_string_fns.h>
+#include <rte_spinlock.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_atomic.h>
+#include <rte_fbarray.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
+#include "eal_memcfg.h"
+#include "eal_private.h"
+#include "malloc_elem.h"
+#include "malloc_heap.h"
+#include "malloc_mp.h"
+
+/* start external socket ID's at a very high number */
+#define CONST_MAX(a, b) (a > b ? a : b) /* RTE_MAX is not a constant */
+#define EXTERNAL_HEAP_MIN_SOCKET_ID (CONST_MAX((1 << 8), RTE_MAX_NUMA_NODES))
+
+static unsigned
+check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
+{
+	unsigned check_flag = 0;
+
+	if (!(flags & ~RTE_MEMZONE_SIZE_HINT_ONLY))
+		return 1;
+
+	switch (hugepage_sz) {
+	case RTE_PGSIZE_256K:
+		check_flag = RTE_MEMZONE_256KB;
+		break;
+	case RTE_PGSIZE_2M:
+		check_flag = RTE_MEMZONE_2MB;
+		break;
+	case RTE_PGSIZE_16M:
+		check_flag = RTE_MEMZONE_16MB;
+		break;
+	case RTE_PGSIZE_256M:
+		check_flag = RTE_MEMZONE_256MB;
+		break;
+	case RTE_PGSIZE_512M:
+		check_flag = RTE_MEMZONE_512MB;
+		break;
+	case RTE_PGSIZE_1G:
+		check_flag = RTE_MEMZONE_1GB;
+		break;
+	case RTE_PGSIZE_4G:
+		check_flag = RTE_MEMZONE_4GB;
+		break;
+	case RTE_PGSIZE_16G:
+		check_flag = RTE_MEMZONE_16GB;
+	}
+
+	return check_flag & flags;
+}
+
+int
+malloc_socket_to_heap_id(unsigned int socket_id)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int i;
+
+	for (i = 0; i < RTE_MAX_HEAPS; i++) {
+		struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+
+		if (heap->socket_id == socket_id)
+			return i;
+	}
+	return -1;
+}
+
+/*
+ * Expand the heap with a memory area.
+ */
+static struct malloc_elem *
+malloc_heap_add_memory(struct malloc_heap *heap, struct rte_memseg_list *msl,
+		void *start, size_t len)
+{
+	struct malloc_elem *elem = start;
+
+	malloc_elem_init(elem, heap, msl, len, elem, len);
+
+	malloc_elem_insert(elem);
+
+	elem = malloc_elem_join_adjacent_free(elem);
+
+	malloc_elem_free_list_insert(elem);
+
+	return elem;
+}
+
+static int
+malloc_add_seg(const struct rte_memseg_list *msl,
+		const struct rte_memseg *ms, size_t len, void *arg __rte_unused)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *found_msl;
+	struct malloc_heap *heap;
+	int msl_idx, heap_idx;
+
+	if (msl->external)
+		return 0;
+
+	heap_idx = malloc_socket_to_heap_id(msl->socket_id);
+	if (heap_idx < 0) {
+		RTE_LOG(ERR, EAL, "Memseg list has invalid socket id\n");
+		return -1;
+	}
+	heap = &mcfg->malloc_heaps[heap_idx];
+
+	/* msl is const, so find it */
+	msl_idx = msl - mcfg->memsegs;
+
+	if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS)
+		return -1;
+
+	found_msl = &mcfg->memsegs[msl_idx];
+
+	malloc_heap_add_memory(heap, found_msl, ms->addr, len);
+
+	heap->total_size += len;
+
+	RTE_LOG(DEBUG, EAL, "Added %zuM to heap on socket %i\n", len >> 20,
+			msl->socket_id);
+	return 0;
+}
+
+/*
+ * Iterates through the freelist for a heap to find a free element
+ * which can store data of the required size and with the requested alignment.
+ * If size is 0, find the biggest available elem.
+ * Returns null on failure, or pointer to element on success.
+ */
+static struct malloc_elem *
+find_suitable_element(struct malloc_heap *heap, size_t size,
+		unsigned int flags, size_t align, size_t bound, bool contig)
+{
+	size_t idx;
+	struct malloc_elem *elem, *alt_elem = NULL;
+
+	for (idx = malloc_elem_free_list_index(size);
+			idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+		for (elem = LIST_FIRST(&heap->free_head[idx]);
+				!!elem; elem = LIST_NEXT(elem, free_list)) {
+			if (malloc_elem_can_hold(elem, size, align, bound,
+					contig)) {
+				if (check_hugepage_sz(flags,
+						elem->msl->page_sz))
+					return elem;
+				if (alt_elem == NULL)
+					alt_elem = elem;
+			}
+		}
+	}
+
+	if ((alt_elem != NULL) && (flags & RTE_MEMZONE_SIZE_HINT_ONLY))
+		return alt_elem;
+
+	return NULL;
+}
+
+/*
+ * Iterates through the freelist for a heap to find a free element with the
+ * biggest size and requested alignment. Will also set size to whatever element
+ * size that was found.
+ * Returns null on failure, or pointer to element on success.
+ */
+static struct malloc_elem *
+find_biggest_element(struct malloc_heap *heap, size_t *size,
+		unsigned int flags, size_t align, bool contig)
+{
+	struct malloc_elem *elem, *max_elem = NULL;
+	size_t idx, max_size = 0;
+
+	for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+		for (elem = LIST_FIRST(&heap->free_head[idx]);
+				!!elem; elem = LIST_NEXT(elem, free_list)) {
+			size_t cur_size;
+			if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) == 0 &&
+					!check_hugepage_sz(flags,
+						elem->msl->page_sz))
+				continue;
+			if (contig) {
+				cur_size =
+					malloc_elem_find_max_iova_contig(elem,
+							align);
+			} else {
+				void *data_start = RTE_PTR_ADD(elem,
+						MALLOC_ELEM_HEADER_LEN);
+				void *data_end = RTE_PTR_ADD(elem, elem->size -
+						MALLOC_ELEM_TRAILER_LEN);
+				void *aligned = RTE_PTR_ALIGN_CEIL(data_start,
+						align);
+				/* check if aligned data start is beyond end */
+				if (aligned >= data_end)
+					continue;
+				cur_size = RTE_PTR_DIFF(data_end, aligned);
+			}
+			if (cur_size > max_size) {
+				max_size = cur_size;
+				max_elem = elem;
+			}
+		}
+	}
+
+	*size = max_size;
+	return max_elem;
+}
+
+/*
+ * Main function to allocate a block of memory from the heap.
+ * It locks the free list, scans it, and adds a new memseg if the
+ * scan fails. Once the new memseg is added, it re-scans and should return
+ * the new element after releasing the lock.
+ */
+static void *
+heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t size,
+		unsigned int flags, size_t align, size_t bound, bool contig)
+{
+	struct malloc_elem *elem;
+
+	size = RTE_CACHE_LINE_ROUNDUP(size);
+	align = RTE_CACHE_LINE_ROUNDUP(align);
+
+	/* roundup might cause an overflow */
+	if (size == 0)
+		return NULL;
+	elem = find_suitable_element(heap, size, flags, align, bound, contig);
+	if (elem != NULL) {
+		elem = malloc_elem_alloc(elem, size, align, bound, contig);
+
+		/* increase heap's count of allocated elements */
+		heap->alloc_count++;
+	}
+
+	return elem == NULL ? NULL : (void *)(&elem[1]);
+}
+
+static void *
+heap_alloc_biggest(struct malloc_heap *heap, const char *type __rte_unused,
+		unsigned int flags, size_t align, bool contig)
+{
+	struct malloc_elem *elem;
+	size_t size;
+
+	align = RTE_CACHE_LINE_ROUNDUP(align);
+
+	elem = find_biggest_element(heap, &size, flags, align, contig);
+	if (elem != NULL) {
+		elem = malloc_elem_alloc(elem, size, align, 0, contig);
+
+		/* increase heap's count of allocated elements */
+		heap->alloc_count++;
+	}
+
+	return elem == NULL ? NULL : (void *)(&elem[1]);
+}
+
+/* this function is exposed in malloc_mp.h */
+void
+rollback_expand_heap(struct rte_memseg **ms, int n_segs,
+		struct malloc_elem *elem, void *map_addr, size_t map_len)
+{
+	if (elem != NULL) {
+		malloc_elem_free_list_remove(elem);
+		malloc_elem_hide_region(elem, map_addr, map_len);
+	}
+
+	eal_memalloc_free_seg_bulk(ms, n_segs);
+}
+
+/* this function is exposed in malloc_mp.h */
+struct malloc_elem *
+alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
+		int socket, unsigned int flags, size_t align, size_t bound,
+		bool contig, struct rte_memseg **ms, int n_segs)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *msl;
+	struct malloc_elem *elem = NULL;
+	size_t alloc_sz;
+	int allocd_pages;
+	void *ret, *map_addr;
+
+	alloc_sz = (size_t)pg_sz * n_segs;
+
+	/* first, check if we're allowed to allocate this memory */
+	if (eal_memalloc_mem_alloc_validate(socket,
+			heap->total_size + alloc_sz) < 0) {
+		RTE_LOG(DEBUG, EAL, "User has disallowed allocation\n");
+		return NULL;
+	}
+
+	allocd_pages = eal_memalloc_alloc_seg_bulk(ms, n_segs, pg_sz,
+			socket, true);
+
+	/* make sure we've allocated our pages... */
+	if (allocd_pages < 0)
+		return NULL;
+
+	map_addr = ms[0]->addr;
+	msl = rte_mem_virt2memseg_list(map_addr);
+
+	/* check if we wanted contiguous memory but didn't get it */
+	if (contig && !eal_memalloc_is_contig(msl, map_addr, alloc_sz)) {
+		RTE_LOG(DEBUG, EAL, "%s(): couldn't allocate physically contiguous space\n",
+				__func__);
+		goto fail;
+	}
+
+	/*
+	 * Once we have all the memseg lists configured, if there is a dma mask
+	 * set, check iova addresses are not out of range. Otherwise the device
+	 * setting the dma mask could have problems with the mapped memory.
+	 *
+	 * There are two situations when this can happen:
+	 *	1) memory initialization
+	 *	2) dynamic memory allocation
+	 *
+	 * For 1), an error when checking dma mask implies app can not be
+	 * executed. For 2) implies the new memory can not be added.
+	 */
+	if (mcfg->dma_maskbits &&
+	    rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
+		/*
+		 * Currently this can only happen if IOMMU is enabled
+		 * and the address width supported by the IOMMU hw is
+		 * not enough for using the memory mapped IOVAs.
+		 *
+		 * If IOVA is VA, advice to try with '--iova-mode pa'
+		 * which could solve some situations when IOVA VA is not
+		 * really needed.
+		 */
+		RTE_LOG(ERR, EAL,
+			"%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask\n",
+			__func__);
+
+		/*
+		 * If IOVA is VA and it is possible to run with IOVA PA,
+		 * because user is root, give and advice for solving the
+		 * problem.
+		 */
+		if ((rte_eal_iova_mode() == RTE_IOVA_VA) &&
+		     rte_eal_using_phys_addrs())
+			RTE_LOG(ERR, EAL,
+				"%s(): Please try initializing EAL with --iova-mode=pa parameter\n",
+				__func__);
+		goto fail;
+	}
+
+	/* add newly minted memsegs to malloc heap */
+	elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz);
+
+	/* try once more, as now we have allocated new memory */
+	ret = find_suitable_element(heap, elt_size, flags, align, bound,
+			contig);
+
+	if (ret == NULL)
+		goto fail;
+
+	return elem;
+
+fail:
+	rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz);
+	return NULL;
+}
+
+static int
+try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz,
+		size_t elt_size, int socket, unsigned int flags, size_t align,
+		size_t bound, bool contig)
+{
+	struct malloc_elem *elem;
+	struct rte_memseg **ms;
+	void *map_addr;
+	size_t alloc_sz;
+	int n_segs;
+	bool callback_triggered = false;
+
+	alloc_sz = RTE_ALIGN_CEIL(align + elt_size +
+			MALLOC_ELEM_TRAILER_LEN, pg_sz);
+	n_segs = alloc_sz / pg_sz;
+
+	/* we can't know in advance how many pages we'll need, so we malloc */
+	ms = malloc(sizeof(*ms) * n_segs);
+	if (ms == NULL)
+		return -1;
+	memset(ms, 0, sizeof(*ms) * n_segs);
+
+	elem = alloc_pages_on_heap(heap, pg_sz, elt_size, socket, flags, align,
+			bound, contig, ms, n_segs);
+
+	if (elem == NULL)
+		goto free_ms;
+
+	map_addr = ms[0]->addr;
+
+	/* notify user about changes in memory map */
+	eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz);
+
+	/* notify other processes that this has happened */
+	if (request_sync()) {
+		/* we couldn't ensure all processes have mapped memory,
+		 * so free it back and notify everyone that it's been
+		 * freed back.
+		 *
+		 * technically, we could've avoided adding memory addresses to
+		 * the map, but that would've led to inconsistent behavior
+		 * between primary and secondary processes, as those get
+		 * callbacks during sync. therefore, force primary process to
+		 * do alloc-and-rollback syncs as well.
+		 */
+		callback_triggered = true;
+		goto free_elem;
+	}
+	heap->total_size += alloc_sz;
+
+	RTE_LOG(DEBUG, EAL, "Heap on socket %d was expanded by %zdMB\n",
+		socket, alloc_sz >> 20ULL);
+
+	free(ms);
+
+	return 0;
+
+free_elem:
+	if (callback_triggered)
+		eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+				map_addr, alloc_sz);
+
+	rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz);
+
+	request_sync();
+free_ms:
+	free(ms);
+
+	return -1;
+}
+
+static int
+try_expand_heap_secondary(struct malloc_heap *heap, uint64_t pg_sz,
+		size_t elt_size, int socket, unsigned int flags, size_t align,
+		size_t bound, bool contig)
+{
+	struct malloc_mp_req req;
+	int req_result;
+
+	memset(&req, 0, sizeof(req));
+
+	req.t = REQ_TYPE_ALLOC;
+	req.alloc_req.align = align;
+	req.alloc_req.bound = bound;
+	req.alloc_req.contig = contig;
+	req.alloc_req.flags = flags;
+	req.alloc_req.elt_size = elt_size;
+	req.alloc_req.page_sz = pg_sz;
+	req.alloc_req.socket = socket;
+	req.alloc_req.heap = heap; /* it's in shared memory */
+
+	req_result = request_to_primary(&req);
+
+	if (req_result != 0)
+		return -1;
+
+	if (req.result != REQ_RESULT_SUCCESS)
+		return -1;
+
+	return 0;
+}
+
+static int
+try_expand_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
+		int socket, unsigned int flags, size_t align, size_t bound,
+		bool contig)
+{
+	int ret;
+
+	rte_mcfg_mem_write_lock();
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		ret = try_expand_heap_primary(heap, pg_sz, elt_size, socket,
+				flags, align, bound, contig);
+	} else {
+		ret = try_expand_heap_secondary(heap, pg_sz, elt_size, socket,
+				flags, align, bound, contig);
+	}
+
+	rte_mcfg_mem_write_unlock();
+	return ret;
+}
+
+static int
+compare_pagesz(const void *a, const void *b)
+{
+	const struct rte_memseg_list * const*mpa = a;
+	const struct rte_memseg_list * const*mpb = b;
+	const struct rte_memseg_list *msla = *mpa;
+	const struct rte_memseg_list *mslb = *mpb;
+	uint64_t pg_sz_a = msla->page_sz;
+	uint64_t pg_sz_b = mslb->page_sz;
+
+	if (pg_sz_a < pg_sz_b)
+		return -1;
+	if (pg_sz_a > pg_sz_b)
+		return 1;
+	return 0;
+}
+
+static int
+alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket,
+		unsigned int flags, size_t align, size_t bound, bool contig)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *requested_msls[RTE_MAX_MEMSEG_LISTS];
+	struct rte_memseg_list *other_msls[RTE_MAX_MEMSEG_LISTS];
+	uint64_t requested_pg_sz[RTE_MAX_MEMSEG_LISTS];
+	uint64_t other_pg_sz[RTE_MAX_MEMSEG_LISTS];
+	uint64_t prev_pg_sz;
+	int i, n_other_msls, n_other_pg_sz, n_requested_msls, n_requested_pg_sz;
+	bool size_hint = (flags & RTE_MEMZONE_SIZE_HINT_ONLY) > 0;
+	unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
+	void *ret;
+
+	memset(requested_msls, 0, sizeof(requested_msls));
+	memset(other_msls, 0, sizeof(other_msls));
+	memset(requested_pg_sz, 0, sizeof(requested_pg_sz));
+	memset(other_pg_sz, 0, sizeof(other_pg_sz));
+
+	/*
+	 * go through memseg list and take note of all the page sizes available,
+	 * and if any of them were specifically requested by the user.
+	 */
+	n_requested_msls = 0;
+	n_other_msls = 0;
+	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+		struct rte_memseg_list *msl = &mcfg->memsegs[i];
+
+		if (msl->socket_id != socket)
+			continue;
+
+		if (msl->base_va == NULL)
+			continue;
+
+		/* if pages of specific size were requested */
+		if (size_flags != 0 && check_hugepage_sz(size_flags,
+				msl->page_sz))
+			requested_msls[n_requested_msls++] = msl;
+		else if (size_flags == 0 || size_hint)
+			other_msls[n_other_msls++] = msl;
+	}
+
+	/* sort the lists, smallest first */
+	qsort(requested_msls, n_requested_msls, sizeof(requested_msls[0]),
+			compare_pagesz);
+	qsort(other_msls, n_other_msls, sizeof(other_msls[0]),
+			compare_pagesz);
+
+	/* now, extract page sizes we are supposed to try */
+	prev_pg_sz = 0;
+	n_requested_pg_sz = 0;
+	for (i = 0; i < n_requested_msls; i++) {
+		uint64_t pg_sz = requested_msls[i]->page_sz;
+
+		if (prev_pg_sz != pg_sz) {
+			requested_pg_sz[n_requested_pg_sz++] = pg_sz;
+			prev_pg_sz = pg_sz;
+		}
+	}
+	prev_pg_sz = 0;
+	n_other_pg_sz = 0;
+	for (i = 0; i < n_other_msls; i++) {
+		uint64_t pg_sz = other_msls[i]->page_sz;
+
+		if (prev_pg_sz != pg_sz) {
+			other_pg_sz[n_other_pg_sz++] = pg_sz;
+			prev_pg_sz = pg_sz;
+		}
+	}
+
+	/* finally, try allocating memory of specified page sizes, starting from
+	 * the smallest sizes
+	 */
+	for (i = 0; i < n_requested_pg_sz; i++) {
+		uint64_t pg_sz = requested_pg_sz[i];
+
+		/*
+		 * do not pass the size hint here, as user expects other page
+		 * sizes first, before resorting to best effort allocation.
+		 */
+		if (!try_expand_heap(heap, pg_sz, size, socket, size_flags,
+				align, bound, contig))
+			return 0;
+	}
+	if (n_other_pg_sz == 0)
+		return -1;
+
+	/* now, check if we can reserve anything with size hint */
+	ret = find_suitable_element(heap, size, flags, align, bound, contig);
+	if (ret != NULL)
+		return 0;
+
+	/*
+	 * we still couldn't reserve memory, so try expanding heap with other
+	 * page sizes, if there are any
+	 */
+	for (i = 0; i < n_other_pg_sz; i++) {
+		uint64_t pg_sz = other_pg_sz[i];
+
+		if (!try_expand_heap(heap, pg_sz, size, socket, flags,
+				align, bound, contig))
+			return 0;
+	}
+	return -1;
+}
+
+/* this will try lower page sizes first */
+static void *
+malloc_heap_alloc_on_heap_id(const char *type, size_t size,
+		unsigned int heap_id, unsigned int flags, size_t align,
+		size_t bound, bool contig)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
+	unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
+	int socket_id;
+	void *ret;
+
+	rte_spinlock_lock(&(heap->lock));
+
+	align = align == 0 ? 1 : align;
+
+	/* for legacy mode, try once and with all flags */
+	if (internal_config.legacy_mem) {
+		ret = heap_alloc(heap, type, size, flags, align, bound, contig);
+		goto alloc_unlock;
+	}
+
+	/*
+	 * we do not pass the size hint here, because even if allocation fails,
+	 * we may still be able to allocate memory from appropriate page sizes,
+	 * we just need to request more memory first.
+	 */
+
+	socket_id = rte_socket_id_by_idx(heap_id);
+	/*
+	 * if socket ID is negative, we cannot find a socket ID for this heap -
+	 * which means it's an external heap. those can have unexpected page
+	 * sizes, so if the user asked to allocate from there - assume user
+	 * knows what they're doing, and allow allocating from there with any
+	 * page size flags.
+	 */
+	if (socket_id < 0)
+		size_flags |= RTE_MEMZONE_SIZE_HINT_ONLY;
+
+	ret = heap_alloc(heap, type, size, size_flags, align, bound, contig);
+	if (ret != NULL)
+		goto alloc_unlock;
+
+	/* if socket ID is invalid, this is an external heap */
+	if (socket_id < 0)
+		goto alloc_unlock;
+
+	if (!alloc_more_mem_on_socket(heap, size, socket_id, flags, align,
+			bound, contig)) {
+		ret = heap_alloc(heap, type, size, flags, align, bound, contig);
+
+		/* this should have succeeded */
+		if (ret == NULL)
+			RTE_LOG(ERR, EAL, "Error allocating from heap\n");
+	}
+alloc_unlock:
+	rte_spinlock_unlock(&(heap->lock));
+	return ret;
+}
+
+void *
+malloc_heap_alloc(const char *type, size_t size, int socket_arg,
+		unsigned int flags, size_t align, size_t bound, bool contig)
+{
+	int socket, heap_id, i;
+	void *ret;
+
+	/* return NULL if size is 0 or alignment is not power-of-2 */
+	if (size == 0 || (align && !rte_is_power_of_2(align)))
+		return NULL;
+
+	if (!rte_eal_has_hugepages() && socket_arg < RTE_MAX_NUMA_NODES)
+		socket_arg = SOCKET_ID_ANY;
+
+	if (socket_arg == SOCKET_ID_ANY)
+		socket = malloc_get_numa_socket();
+	else
+		socket = socket_arg;
+
+	/* turn socket ID into heap ID */
+	heap_id = malloc_socket_to_heap_id(socket);
+	/* if heap id is negative, socket ID was invalid */
+	if (heap_id < 0)
+		return NULL;
+
+	ret = malloc_heap_alloc_on_heap_id(type, size, heap_id, flags, align,
+			bound, contig);
+	if (ret != NULL || socket_arg != SOCKET_ID_ANY)
+		return ret;
+
+	/* try other heaps. we are only iterating through native DPDK sockets,
+	 * so external heaps won't be included.
+	 */
+	for (i = 0; i < (int) rte_socket_count(); i++) {
+		if (i == heap_id)
+			continue;
+		ret = malloc_heap_alloc_on_heap_id(type, size, i, flags, align,
+				bound, contig);
+		if (ret != NULL)
+			return ret;
+	}
+	return NULL;
+}
+
+static void *
+heap_alloc_biggest_on_heap_id(const char *type, unsigned int heap_id,
+		unsigned int flags, size_t align, bool contig)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
+	void *ret;
+
+	rte_spinlock_lock(&(heap->lock));
+
+	align = align == 0 ? 1 : align;
+
+	ret = heap_alloc_biggest(heap, type, flags, align, contig);
+
+	rte_spinlock_unlock(&(heap->lock));
+
+	return ret;
+}
+
+void *
+malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
+		size_t align, bool contig)
+{
+	int socket, i, cur_socket, heap_id;
+	void *ret;
+
+	/* return NULL if align is not power-of-2 */
+	if ((align && !rte_is_power_of_2(align)))
+		return NULL;
+
+	if (!rte_eal_has_hugepages())
+		socket_arg = SOCKET_ID_ANY;
+
+	if (socket_arg == SOCKET_ID_ANY)
+		socket = malloc_get_numa_socket();
+	else
+		socket = socket_arg;
+
+	/* turn socket ID into heap ID */
+	heap_id = malloc_socket_to_heap_id(socket);
+	/* if heap id is negative, socket ID was invalid */
+	if (heap_id < 0)
+		return NULL;
+
+	ret = heap_alloc_biggest_on_heap_id(type, heap_id, flags, align,
+			contig);
+	if (ret != NULL || socket_arg != SOCKET_ID_ANY)
+		return ret;
+
+	/* try other heaps */
+	for (i = 0; i < (int) rte_socket_count(); i++) {
+		cur_socket = rte_socket_id_by_idx(i);
+		if (cur_socket == socket)
+			continue;
+		ret = heap_alloc_biggest_on_heap_id(type, i, flags, align,
+				contig);
+		if (ret != NULL)
+			return ret;
+	}
+	return NULL;
+}
+
+/* this function is exposed in malloc_mp.h */
+int
+malloc_heap_free_pages(void *aligned_start, size_t aligned_len)
+{
+	int n_segs, seg_idx, max_seg_idx;
+	struct rte_memseg_list *msl;
+	size_t page_sz;
+
+	msl = rte_mem_virt2memseg_list(aligned_start);
+	if (msl == NULL)
+		return -1;
+
+	page_sz = (size_t)msl->page_sz;
+	n_segs = aligned_len / page_sz;
+	seg_idx = RTE_PTR_DIFF(aligned_start, msl->base_va) / page_sz;
+	max_seg_idx = seg_idx + n_segs;
+
+	for (; seg_idx < max_seg_idx; seg_idx++) {
+		struct rte_memseg *ms;
+
+		ms = rte_fbarray_get(&msl->memseg_arr, seg_idx);
+		eal_memalloc_free_seg(ms);
+	}
+	return 0;
+}
+
+int
+malloc_heap_free(struct malloc_elem *elem)
+{
+	struct malloc_heap *heap;
+	void *start, *aligned_start, *end, *aligned_end;
+	size_t len, aligned_len, page_sz;
+	struct rte_memseg_list *msl;
+	unsigned int i, n_segs, before_space, after_space;
+	int ret;
+
+	if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
+		return -1;
+
+	/* elem may be merged with previous element, so keep heap address */
+	heap = elem->heap;
+	msl = elem->msl;
+	page_sz = (size_t)msl->page_sz;
+
+	rte_spinlock_lock(&(heap->lock));
+
+	/* mark element as free */
+	elem->state = ELEM_FREE;
+
+	elem = malloc_elem_free(elem);
+
+	/* anything after this is a bonus */
+	ret = 0;
+
+	/* ...of which we can't avail if we are in legacy mode, or if this is an
+	 * externally allocated segment.
+	 */
+	if (internal_config.legacy_mem || (msl->external > 0))
+		goto free_unlock;
+
+	/* check if we can free any memory back to the system */
+	if (elem->size < page_sz)
+		goto free_unlock;
+
+	/* if user requested to match allocations, the sizes must match - if not,
+	 * we will defer freeing these hugepages until the entire original allocation
+	 * can be freed
+	 */
+	if (internal_config.match_allocations && elem->size != elem->orig_size)
+		goto free_unlock;
+
+	/* probably, but let's make sure, as we may not be using up full page */
+	start = elem;
+	len = elem->size;
+	aligned_start = RTE_PTR_ALIGN_CEIL(start, page_sz);
+	end = RTE_PTR_ADD(elem, len);
+	aligned_end = RTE_PTR_ALIGN_FLOOR(end, page_sz);
+
+	aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start);
+
+	/* can't free anything */
+	if (aligned_len < page_sz)
+		goto free_unlock;
+
+	/* we can free something. however, some of these pages may be marked as
+	 * unfreeable, so also check that as well
+	 */
+	n_segs = aligned_len / page_sz;
+	for (i = 0; i < n_segs; i++) {
+		const struct rte_memseg *tmp =
+				rte_mem_virt2memseg(aligned_start, msl);
+
+		if (tmp->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) {
+			/* this is an unfreeable segment, so move start */
+			aligned_start = RTE_PTR_ADD(tmp->addr, tmp->len);
+		}
+	}
+
+	/* recalculate length and number of segments */
+	aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start);
+	n_segs = aligned_len / page_sz;
+
+	/* check if we can still free some pages */
+	if (n_segs == 0)
+		goto free_unlock;
+
+	/* We're not done yet. We also have to check if by freeing space we will
+	 * be leaving free elements that are too small to store new elements.
+	 * Check if we have enough space in the beginning and at the end, or if
+	 * start/end are exactly page aligned.
+	 */
+	before_space = RTE_PTR_DIFF(aligned_start, elem);
+	after_space = RTE_PTR_DIFF(end, aligned_end);
+	if (before_space != 0 &&
+			before_space < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+		/* There is not enough space before start, but we may be able to
+		 * move the start forward by one page.
+		 */
+		if (n_segs == 1)
+			goto free_unlock;
+
+		/* move start */
+		aligned_start = RTE_PTR_ADD(aligned_start, page_sz);
+		aligned_len -= page_sz;
+		n_segs--;
+	}
+	if (after_space != 0 && after_space <
+			MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+		/* There is not enough space after end, but we may be able to
+		 * move the end backwards by one page.
+		 */
+		if (n_segs == 1)
+			goto free_unlock;
+
+		/* move end */
+		aligned_end = RTE_PTR_SUB(aligned_end, page_sz);
+		aligned_len -= page_sz;
+		n_segs--;
+	}
+
+	/* now we can finally free us some pages */
+
+	rte_mcfg_mem_write_lock();
+
+	/*
+	 * we allow secondary processes to clear the heap of this allocated
+	 * memory because it is safe to do so, as even if notifications about
+	 * unmapped pages don't make it to other processes, heap is shared
+	 * across all processes, and will become empty of this memory anyway,
+	 * and nothing can allocate it back unless primary process will be able
+	 * to deliver allocation message to every single running process.
+	 */
+
+	malloc_elem_free_list_remove(elem);
+
+	malloc_elem_hide_region(elem, (void *) aligned_start, aligned_len);
+
+	heap->total_size -= aligned_len;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		/* notify user about changes in memory map */
+		eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+				aligned_start, aligned_len);
+
+		/* don't care if any of this fails */
+		malloc_heap_free_pages(aligned_start, aligned_len);
+
+		request_sync();
+	} else {
+		struct malloc_mp_req req;
+
+		memset(&req, 0, sizeof(req));
+
+		req.t = REQ_TYPE_FREE;
+		req.free_req.addr = aligned_start;
+		req.free_req.len = aligned_len;
+
+		/*
+		 * we request primary to deallocate pages, but we don't do it
+		 * in this thread. instead, we notify primary that we would like
+		 * to deallocate pages, and this process will receive another
+		 * request (in parallel) that will do it for us on another
+		 * thread.
+		 *
+		 * we also don't really care if this succeeds - the data is
+		 * already removed from the heap, so it is, for all intents and
+		 * purposes, hidden from the rest of DPDK even if some other
+		 * process (including this one) may have these pages mapped.
+		 *
+		 * notifications about deallocated memory happen during sync.
+		 */
+		request_to_primary(&req);
+	}
+
+	RTE_LOG(DEBUG, EAL, "Heap on socket %d was shrunk by %zdMB\n",
+		msl->socket_id, aligned_len >> 20ULL);
+
+	rte_mcfg_mem_write_unlock();
+free_unlock:
+	rte_spinlock_unlock(&(heap->lock));
+	return ret;
+}
+
+int
+malloc_heap_resize(struct malloc_elem *elem, size_t size)
+{
+	int ret;
+
+	if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
+		return -1;
+
+	rte_spinlock_lock(&(elem->heap->lock));
+
+	ret = malloc_elem_resize(elem, size);
+
+	rte_spinlock_unlock(&(elem->heap->lock));
+
+	return ret;
+}
+
+/*
+ * Function to retrieve data for a given heap
+ */
+int
+malloc_heap_get_stats(struct malloc_heap *heap,
+		struct rte_malloc_socket_stats *socket_stats)
+{
+	size_t idx;
+	struct malloc_elem *elem;
+
+	rte_spinlock_lock(&heap->lock);
+
+	/* Initialise variables for heap */
+	socket_stats->free_count = 0;
+	socket_stats->heap_freesz_bytes = 0;
+	socket_stats->greatest_free_size = 0;
+
+	/* Iterate through free list */
+	for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+		for (elem = LIST_FIRST(&heap->free_head[idx]);
+			!!elem; elem = LIST_NEXT(elem, free_list))
+		{
+			socket_stats->free_count++;
+			socket_stats->heap_freesz_bytes += elem->size;
+			if (elem->size > socket_stats->greatest_free_size)
+				socket_stats->greatest_free_size = elem->size;
+		}
+	}
+	/* Get stats on overall heap and allocated memory on this heap */
+	socket_stats->heap_totalsz_bytes = heap->total_size;
+	socket_stats->heap_allocsz_bytes = (socket_stats->heap_totalsz_bytes -
+			socket_stats->heap_freesz_bytes);
+	socket_stats->alloc_count = heap->alloc_count;
+
+	rte_spinlock_unlock(&heap->lock);
+	return 0;
+}
+
+/*
+ * Function to retrieve data for a given heap
+ */
+void
+malloc_heap_dump(struct malloc_heap *heap, FILE *f)
+{
+	struct malloc_elem *elem;
+
+	rte_spinlock_lock(&heap->lock);
+
+	fprintf(f, "Heap size: 0x%zx\n", heap->total_size);
+	fprintf(f, "Heap alloc count: %u\n", heap->alloc_count);
+
+	elem = heap->first;
+	while (elem) {
+		malloc_elem_dump(elem, f);
+		elem = elem->next;
+	}
+
+	rte_spinlock_unlock(&heap->lock);
+}
+
+static int
+destroy_elem(struct malloc_elem *elem, size_t len)
+{
+	struct malloc_heap *heap = elem->heap;
+
+	/* notify all subscribers that a memory area is going to be removed */
+	eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, elem, len);
+
+	/* this element can be removed */
+	malloc_elem_free_list_remove(elem);
+	malloc_elem_hide_region(elem, elem, len);
+
+	heap->total_size -= len;
+
+	memset(elem, 0, sizeof(*elem));
+
+	return 0;
+}
+
+struct rte_memseg_list *
+malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[],
+		unsigned int n_pages, size_t page_sz, const char *seg_name,
+		unsigned int socket_id)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	char fbarray_name[RTE_FBARRAY_NAME_LEN];
+	struct rte_memseg_list *msl = NULL;
+	struct rte_fbarray *arr;
+	size_t seg_len = n_pages * page_sz;
+	unsigned int i;
+
+	/* first, find a free memseg list */
+	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+		struct rte_memseg_list *tmp = &mcfg->memsegs[i];
+		if (tmp->base_va == NULL) {
+			msl = tmp;
+			break;
+		}
+	}
+	if (msl == NULL) {
+		RTE_LOG(ERR, EAL, "Couldn't find empty memseg list\n");
+		rte_errno = ENOSPC;
+		return NULL;
+	}
+
+	snprintf(fbarray_name, sizeof(fbarray_name), "%s_%p",
+			seg_name, va_addr);
+
+	/* create the backing fbarray */
+	if (rte_fbarray_init(&msl->memseg_arr, fbarray_name, n_pages,
+			sizeof(struct rte_memseg)) < 0) {
+		RTE_LOG(ERR, EAL, "Couldn't create fbarray backing the memseg list\n");
+		return NULL;
+	}
+	arr = &msl->memseg_arr;
+
+	/* fbarray created, fill it up */
+	for (i = 0; i < n_pages; i++) {
+		struct rte_memseg *ms;
+
+		rte_fbarray_set_used(arr, i);
+		ms = rte_fbarray_get(arr, i);
+		ms->addr = RTE_PTR_ADD(va_addr, i * page_sz);
+		ms->iova = iova_addrs == NULL ? RTE_BAD_IOVA : iova_addrs[i];
+		ms->hugepage_sz = page_sz;
+		ms->len = page_sz;
+		ms->nchannel = rte_memory_get_nchannel();
+		ms->nrank = rte_memory_get_nrank();
+		ms->socket_id = socket_id;
+	}
+
+	/* set up the memseg list */
+	msl->base_va = va_addr;
+	msl->page_sz = page_sz;
+	msl->socket_id = socket_id;
+	msl->len = seg_len;
+	msl->version = 0;
+	msl->external = 1;
+
+	return msl;
+}
+
+struct extseg_walk_arg {
+	void *va_addr;
+	size_t len;
+	struct rte_memseg_list *msl;
+};
+
+static int
+extseg_walk(const struct rte_memseg_list *msl, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct extseg_walk_arg *wa = arg;
+
+	if (msl->base_va == wa->va_addr && msl->len == wa->len) {
+		unsigned int found_idx;
+
+		/* msl is const */
+		found_idx = msl - mcfg->memsegs;
+		wa->msl = &mcfg->memsegs[found_idx];
+		return 1;
+	}
+	return 0;
+}
+
+struct rte_memseg_list *
+malloc_heap_find_external_seg(void *va_addr, size_t len)
+{
+	struct extseg_walk_arg wa;
+	int res;
+
+	wa.va_addr = va_addr;
+	wa.len = len;
+
+	res = rte_memseg_list_walk_thread_unsafe(extseg_walk, &wa);
+
+	if (res != 1) {
+		/* 0 means nothing was found, -1 shouldn't happen */
+		if (res == 0)
+			rte_errno = ENOENT;
+		return NULL;
+	}
+	return wa.msl;
+}
+
+int
+malloc_heap_destroy_external_seg(struct rte_memseg_list *msl)
+{
+	/* destroy the fbarray backing this memory */
+	if (rte_fbarray_destroy(&msl->memseg_arr) < 0)
+		return -1;
+
+	/* reset the memseg list */
+	memset(msl, 0, sizeof(*msl));
+
+	return 0;
+}
+
+int
+malloc_heap_add_external_memory(struct malloc_heap *heap,
+		struct rte_memseg_list *msl)
+{
+	/* erase contents of new memory */
+	memset(msl->base_va, 0, msl->len);
+
+	/* now, add newly minted memory to the malloc heap */
+	malloc_heap_add_memory(heap, msl, msl->base_va, msl->len);
+
+	heap->total_size += msl->len;
+
+	/* all done! */
+	RTE_LOG(DEBUG, EAL, "Added segment for heap %s starting at %p\n",
+			heap->name, msl->base_va);
+
+	/* notify all subscribers that a new memory area has been added */
+	eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
+			msl->base_va, msl->len);
+
+	return 0;
+}
+
+int
+malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr,
+		size_t len)
+{
+	struct malloc_elem *elem = heap->first;
+
+	/* find element with specified va address */
+	while (elem != NULL && elem != va_addr) {
+		elem = elem->next;
+		/* stop if we've blown past our VA */
+		if (elem > (struct malloc_elem *)va_addr) {
+			rte_errno = ENOENT;
+			return -1;
+		}
+	}
+	/* check if element was found */
+	if (elem == NULL || elem->msl->len != len) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+	/* if element's size is not equal to segment len, segment is busy */
+	if (elem->state == ELEM_BUSY || elem->size != len) {
+		rte_errno = EBUSY;
+		return -1;
+	}
+	return destroy_elem(elem, len);
+}
+
+int
+malloc_heap_create(struct malloc_heap *heap, const char *heap_name)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	uint32_t next_socket_id = mcfg->next_socket_id;
+
+	/* prevent overflow. did you really create 2 billion heaps??? */
+	if (next_socket_id > INT32_MAX) {
+		RTE_LOG(ERR, EAL, "Cannot assign new socket ID's\n");
+		rte_errno = ENOSPC;
+		return -1;
+	}
+
+	/* initialize empty heap */
+	heap->alloc_count = 0;
+	heap->first = NULL;
+	heap->last = NULL;
+	LIST_INIT(heap->free_head);
+	rte_spinlock_init(&heap->lock);
+	heap->total_size = 0;
+	heap->socket_id = next_socket_id;
+
+	/* we hold a global mem hotplug writelock, so it's safe to increment */
+	mcfg->next_socket_id++;
+
+	/* set up name */
+	strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
+	return 0;
+}
+
+int
+malloc_heap_destroy(struct malloc_heap *heap)
+{
+	if (heap->alloc_count != 0) {
+		RTE_LOG(ERR, EAL, "Heap is still in use\n");
+		rte_errno = EBUSY;
+		return -1;
+	}
+	if (heap->first != NULL || heap->last != NULL) {
+		RTE_LOG(ERR, EAL, "Heap still contains memory segments\n");
+		rte_errno = EBUSY;
+		return -1;
+	}
+	if (heap->total_size != 0)
+		RTE_LOG(ERR, EAL, "Total size not zero, heap is likely corrupt\n");
+
+	/* after this, the lock will be dropped */
+	memset(heap, 0, sizeof(*heap));
+
+	return 0;
+}
+
+int
+rte_eal_malloc_heap_init(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	unsigned int i;
+
+	if (internal_config.match_allocations) {
+		RTE_LOG(DEBUG, EAL, "Hugepages will be freed exactly as allocated.\n");
+	}
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		/* assign min socket ID to external heaps */
+		mcfg->next_socket_id = EXTERNAL_HEAP_MIN_SOCKET_ID;
+
+		/* assign names to default DPDK heaps */
+		for (i = 0; i < rte_socket_count(); i++) {
+			struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+			char heap_name[RTE_HEAP_NAME_MAX_LEN];
+			int socket_id = rte_socket_id_by_idx(i);
+
+			snprintf(heap_name, sizeof(heap_name),
+					"socket_%i", socket_id);
+			strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
+			heap->socket_id = socket_id;
+		}
+	}
+
+
+	if (register_mp_requests()) {
+		RTE_LOG(ERR, EAL, "Couldn't register malloc multiprocess actions\n");
+		rte_mcfg_mem_read_unlock();
+		return -1;
+	}
+
+	/* unlock mem hotplug here. it's safe for primary as no requests can
+	 * even come before primary itself is fully initialized, and secondaries
+	 * do not need to initialize the heap.
+	 */
+	rte_mcfg_mem_read_unlock();
+
+	/* secondary process does not need to initialize anything */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return 0;
+
+	/* add all IOVA-contiguous areas to the heap */
+	return rte_memseg_contig_walk(malloc_add_seg, NULL);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.h b/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.h
new file mode 100644
index 000000000..772736b53
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef MALLOC_HEAP_H_
+#define MALLOC_HEAP_H_
+
+#include <stdbool.h>
+#include <sys/queue.h>
+
+#include <rte_malloc.h>
+#include <rte_spinlock.h>
+
+/* Number of free lists per heap, grouped by size. */
+#define RTE_HEAP_NUM_FREELISTS  13
+#define RTE_HEAP_NAME_MAX_LEN 32
+
+/* dummy definition, for pointers */
+struct malloc_elem;
+
+/**
+ * Structure to hold malloc heap
+ */
+struct malloc_heap {
+	rte_spinlock_t lock;
+	LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
+	struct malloc_elem *volatile first;
+	struct malloc_elem *volatile last;
+
+	unsigned int alloc_count;
+	unsigned int socket_id;
+	size_t total_size;
+	char name[RTE_HEAP_NAME_MAX_LEN];
+} __rte_cache_aligned;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline unsigned
+malloc_get_numa_socket(void)
+{
+	unsigned socket_id = rte_socket_id();
+
+	if (socket_id == (unsigned)SOCKET_ID_ANY)
+		return 0;
+
+	return socket_id;
+}
+
+void *
+malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags,
+		size_t align, size_t bound, bool contig);
+
+void *
+malloc_heap_alloc_biggest(const char *type, int socket, unsigned int flags,
+		size_t align, bool contig);
+
+int
+malloc_heap_create(struct malloc_heap *heap, const char *heap_name);
+
+int
+malloc_heap_destroy(struct malloc_heap *heap);
+
+struct rte_memseg_list *
+malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[],
+		unsigned int n_pages, size_t page_sz, const char *seg_name,
+		unsigned int socket_id);
+
+struct rte_memseg_list *
+malloc_heap_find_external_seg(void *va_addr, size_t len);
+
+int
+malloc_heap_destroy_external_seg(struct rte_memseg_list *msl);
+
+int
+malloc_heap_add_external_memory(struct malloc_heap *heap,
+		struct rte_memseg_list *msl);
+
+int
+malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr,
+		size_t len);
+
+int
+malloc_heap_free(struct malloc_elem *elem);
+
+int
+malloc_heap_resize(struct malloc_elem *elem, size_t size);
+
+int
+malloc_heap_get_stats(struct malloc_heap *heap,
+		struct rte_malloc_socket_stats *socket_stats);
+
+void
+malloc_heap_dump(struct malloc_heap *heap, FILE *f);
+
+int
+malloc_socket_to_heap_id(unsigned int socket_id);
+
+int
+rte_eal_malloc_heap_init(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* MALLOC_HEAP_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/malloc_mp.c b/src/spdk/dpdk/lib/librte_eal/common/malloc_mp.c
new file mode 100644
index 000000000..1f212f834
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/malloc_mp.c
@@ -0,0 +1,751 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <string.h>
+#include <sys/time.h>
+
+#include <rte_alarm.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+
+#include "eal_memalloc.h"
+#include "eal_memcfg.h"
+
+#include "malloc_elem.h"
+#include "malloc_mp.h"
+
+#define MP_ACTION_SYNC "mp_malloc_sync"
+/**< request sent by primary process to notify of changes in memory map */
+#define MP_ACTION_ROLLBACK "mp_malloc_rollback"
+/**< request sent by primary process to notify of changes in memory map. this is
+ * essentially a regular sync request, but we cannot send sync requests while
+ * another one is in progress, and we might have to - therefore, we do this as
+ * a separate callback.
+ */
+#define MP_ACTION_REQUEST "mp_malloc_request"
+/**< request sent by secondary process to ask for allocation/deallocation */
+#define MP_ACTION_RESPONSE "mp_malloc_response"
+/**< response sent to secondary process to indicate result of request */
+
+/* forward declarations */
+static int
+handle_sync_response(const struct rte_mp_msg *request,
+		const struct rte_mp_reply *reply);
+static int
+handle_rollback_response(const struct rte_mp_msg *request,
+		const struct rte_mp_reply *reply);
+
+#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */
+
+/* when we're allocating, we need to store some state to ensure that we can
+ * roll back later
+ */
+struct primary_alloc_req_state {
+	struct malloc_heap *heap;
+	struct rte_memseg **ms;
+	int ms_len;
+	struct malloc_elem *elem;
+	void *map_addr;
+	size_t map_len;
+};
+
+enum req_state {
+	REQ_STATE_INACTIVE = 0,
+	REQ_STATE_ACTIVE,
+	REQ_STATE_COMPLETE
+};
+
+struct mp_request {
+	TAILQ_ENTRY(mp_request) next;
+	struct malloc_mp_req user_req; /**< contents of request */
+	pthread_cond_t cond; /**< variable we use to time out on this request */
+	enum req_state state; /**< indicate status of this request */
+	struct primary_alloc_req_state alloc_state;
+};
+
+/*
+ * We could've used just a single request, but it may be possible for
+ * secondaries to timeout earlier than the primary, and send a new request while
+ * primary is still expecting replies to the old one. Therefore, each new
+ * request will get assigned a new ID, which is how we will distinguish between
+ * expected and unexpected messages.
+ */
+TAILQ_HEAD(mp_request_list, mp_request);
+static struct {
+	struct mp_request_list list;
+	pthread_mutex_t lock;
+} mp_request_list = {
+	.list = TAILQ_HEAD_INITIALIZER(mp_request_list.list),
+	.lock = PTHREAD_MUTEX_INITIALIZER
+};
+
+/**
+ * General workflow is the following:
+ *
+ * Allocation:
+ * S: send request to primary
+ * P: attempt to allocate memory
+ *    if failed, sendmsg failure
+ *    if success, send sync request
+ * S: if received msg of failure, quit
+ *    if received sync request, synchronize memory map and reply with result
+ * P: if received sync request result
+ *    if success, sendmsg success
+ *    if failure, roll back allocation and send a rollback request
+ * S: if received msg of success, quit
+ *    if received rollback request, synchronize memory map and reply with result
+ * P: if received sync request result
+ *    sendmsg sync request result
+ * S: if received msg, quit
+ *
+ * Aside from timeouts, there are three points where we can quit:
+ *  - if allocation failed straight away
+ *  - if allocation and sync request succeeded
+ *  - if allocation succeeded, sync request failed, allocation rolled back and
+ *    rollback request received (irrespective of whether it succeeded or failed)
+ *
+ * Deallocation:
+ * S: send request to primary
+ * P: attempt to deallocate memory
+ *    if failed, sendmsg failure
+ *    if success, send sync request
+ * S: if received msg of failure, quit
+ *    if received sync request, synchronize memory map and reply with result
+ * P: if received sync request result
+ *    sendmsg sync request result
+ * S: if received msg, quit
+ *
+ * There is no "rollback" from deallocation, as it's safe to have some memory
+ * mapped in some processes - it's absent from the heap, so it won't get used.
+ */
+
+static struct mp_request *
+find_request_by_id(uint64_t id)
+{
+	struct mp_request *req;
+	TAILQ_FOREACH(req, &mp_request_list.list, next) {
+		if (req->user_req.id == id)
+			break;
+	}
+	return req;
+}
+
+/* this ID is, like, totally guaranteed to be absolutely unique. pinky swear. */
+static uint64_t
+get_unique_id(void)
+{
+	uint64_t id;
+	do {
+		id = rte_rand();
+	} while (find_request_by_id(id) != NULL);
+	return id;
+}
+
+/* secondary will respond to sync requests thusly */
+static int
+handle_sync(const struct rte_mp_msg *msg, const void *peer)
+{
+	struct rte_mp_msg reply;
+	const struct malloc_mp_req *req =
+			(const struct malloc_mp_req *)msg->param;
+	struct malloc_mp_req *resp =
+			(struct malloc_mp_req *)reply.param;
+	int ret;
+
+	if (req->t != REQ_TYPE_SYNC) {
+		RTE_LOG(ERR, EAL, "Unexpected request from primary\n");
+		return -1;
+	}
+
+	memset(&reply, 0, sizeof(reply));
+
+	reply.num_fds = 0;
+	strlcpy(reply.name, msg->name, sizeof(reply.name));
+	reply.len_param = sizeof(*resp);
+
+	ret = eal_memalloc_sync_with_primary();
+
+	resp->t = REQ_TYPE_SYNC;
+	resp->id = req->id;
+	resp->result = ret == 0 ? REQ_RESULT_SUCCESS : REQ_RESULT_FAIL;
+
+	rte_mp_reply(&reply, peer);
+
+	return 0;
+}
+
+static int
+handle_alloc_request(const struct malloc_mp_req *m,
+		struct mp_request *req)
+{
+	const struct malloc_req_alloc *ar = &m->alloc_req;
+	struct malloc_heap *heap;
+	struct malloc_elem *elem;
+	struct rte_memseg **ms;
+	size_t alloc_sz;
+	int n_segs;
+	void *map_addr;
+
+	alloc_sz = RTE_ALIGN_CEIL(ar->align + ar->elt_size +
+			MALLOC_ELEM_TRAILER_LEN, ar->page_sz);
+	n_segs = alloc_sz / ar->page_sz;
+
+	heap = ar->heap;
+
+	/* we can't know in advance how many pages we'll need, so we malloc */
+	ms = malloc(sizeof(*ms) * n_segs);
+	if (ms == NULL) {
+		RTE_LOG(ERR, EAL, "Couldn't allocate memory for request state\n");
+		goto fail;
+	}
+	memset(ms, 0, sizeof(*ms) * n_segs);
+
+	elem = alloc_pages_on_heap(heap, ar->page_sz, ar->elt_size, ar->socket,
+			ar->flags, ar->align, ar->bound, ar->contig, ms,
+			n_segs);
+
+	if (elem == NULL)
+		goto fail;
+
+	map_addr = ms[0]->addr;
+
+	eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz);
+
+	/* we have succeeded in allocating memory, but we still need to sync
+	 * with other processes. however, since DPDK IPC is single-threaded, we
+	 * send an asynchronous request and exit this callback.
+	 */
+
+	req->alloc_state.ms = ms;
+	req->alloc_state.ms_len = n_segs;
+	req->alloc_state.map_addr = map_addr;
+	req->alloc_state.map_len = alloc_sz;
+	req->alloc_state.elem = elem;
+	req->alloc_state.heap = heap;
+
+	return 0;
+fail:
+	free(ms);
+	return -1;
+}
+
+/* first stage of primary handling requests from secondary */
+static int
+handle_request(const struct rte_mp_msg *msg, const void *peer __rte_unused)
+{
+	const struct malloc_mp_req *m =
+			(const struct malloc_mp_req *)msg->param;
+	struct mp_request *entry;
+	int ret;
+
+	/* lock access to request */
+	pthread_mutex_lock(&mp_request_list.lock);
+
+	/* make sure it's not a dupe */
+	entry = find_request_by_id(m->id);
+	if (entry != NULL) {
+		RTE_LOG(ERR, EAL, "Duplicate request id\n");
+		goto fail;
+	}
+
+	entry = malloc(sizeof(*entry));
+	if (entry == NULL) {
+		RTE_LOG(ERR, EAL, "Unable to allocate memory for request\n");
+		goto fail;
+	}
+
+	/* erase all data */
+	memset(entry, 0, sizeof(*entry));
+
+	if (m->t == REQ_TYPE_ALLOC) {
+		ret = handle_alloc_request(m, entry);
+	} else if (m->t == REQ_TYPE_FREE) {
+		eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+				m->free_req.addr, m->free_req.len);
+
+		ret = malloc_heap_free_pages(m->free_req.addr,
+				m->free_req.len);
+	} else {
+		RTE_LOG(ERR, EAL, "Unexpected request from secondary\n");
+		goto fail;
+	}
+
+	if (ret != 0) {
+		struct rte_mp_msg resp_msg;
+		struct malloc_mp_req *resp =
+				(struct malloc_mp_req *)resp_msg.param;
+
+		/* send failure message straight away */
+		resp_msg.num_fds = 0;
+		resp_msg.len_param = sizeof(*resp);
+		strlcpy(resp_msg.name, MP_ACTION_RESPONSE,
+				sizeof(resp_msg.name));
+
+		resp->t = m->t;
+		resp->result = REQ_RESULT_FAIL;
+		resp->id = m->id;
+
+		if (rte_mp_sendmsg(&resp_msg)) {
+			RTE_LOG(ERR, EAL, "Couldn't send response\n");
+			goto fail;
+		}
+		/* we did not modify the request */
+		free(entry);
+	} else {
+		struct rte_mp_msg sr_msg;
+		struct malloc_mp_req *sr =
+				(struct malloc_mp_req *)sr_msg.param;
+		struct timespec ts;
+
+		memset(&sr_msg, 0, sizeof(sr_msg));
+
+		/* we can do something, so send sync request asynchronously */
+		sr_msg.num_fds = 0;
+		sr_msg.len_param = sizeof(*sr);
+		strlcpy(sr_msg.name, MP_ACTION_SYNC, sizeof(sr_msg.name));
+
+		ts.tv_nsec = 0;
+		ts.tv_sec = MP_TIMEOUT_S;
+
+		/* sync requests carry no data */
+		sr->t = REQ_TYPE_SYNC;
+		sr->id = m->id;
+
+		/* there may be stray timeout still waiting */
+		do {
+			ret = rte_mp_request_async(&sr_msg, &ts,
+					handle_sync_response);
+		} while (ret != 0 && rte_errno == EEXIST);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL, "Couldn't send sync request\n");
+			if (m->t == REQ_TYPE_ALLOC)
+				free(entry->alloc_state.ms);
+			goto fail;
+		}
+
+		/* mark request as in progress */
+		memcpy(&entry->user_req, m, sizeof(*m));
+		entry->state = REQ_STATE_ACTIVE;
+
+		TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
+	}
+	pthread_mutex_unlock(&mp_request_list.lock);
+	return 0;
+fail:
+	pthread_mutex_unlock(&mp_request_list.lock);
+	free(entry);
+	return -1;
+}
+
+/* callback for asynchronous sync requests for primary. this will either do a
+ * sendmsg with results, or trigger rollback request.
+ */
+static int
+handle_sync_response(const struct rte_mp_msg *request,
+		const struct rte_mp_reply *reply)
+{
+	enum malloc_req_result result;
+	struct mp_request *entry;
+	const struct malloc_mp_req *mpreq =
+			(const struct malloc_mp_req *)request->param;
+	int i;
+
+	/* lock the request */
+	pthread_mutex_lock(&mp_request_list.lock);
+
+	entry = find_request_by_id(mpreq->id);
+	if (entry == NULL) {
+		RTE_LOG(ERR, EAL, "Wrong request ID\n");
+		goto fail;
+	}
+
+	result = REQ_RESULT_SUCCESS;
+
+	if (reply->nb_received != reply->nb_sent)
+		result = REQ_RESULT_FAIL;
+
+	for (i = 0; i < reply->nb_received; i++) {
+		struct malloc_mp_req *resp =
+				(struct malloc_mp_req *)reply->msgs[i].param;
+
+		if (resp->t != REQ_TYPE_SYNC) {
+			RTE_LOG(ERR, EAL, "Unexpected response to sync request\n");
+			result = REQ_RESULT_FAIL;
+			break;
+		}
+		if (resp->id != entry->user_req.id) {
+			RTE_LOG(ERR, EAL, "Response to wrong sync request\n");
+			result = REQ_RESULT_FAIL;
+			break;
+		}
+		if (resp->result == REQ_RESULT_FAIL) {
+			result = REQ_RESULT_FAIL;
+			break;
+		}
+	}
+
+	if (entry->user_req.t == REQ_TYPE_FREE) {
+		struct rte_mp_msg msg;
+		struct malloc_mp_req *resp = (struct malloc_mp_req *)msg.param;
+
+		memset(&msg, 0, sizeof(msg));
+
+		/* this is a free request, just sendmsg result */
+		resp->t = REQ_TYPE_FREE;
+		resp->result = result;
+		resp->id = entry->user_req.id;
+		msg.num_fds = 0;
+		msg.len_param = sizeof(*resp);
+		strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
+
+		if (rte_mp_sendmsg(&msg))
+			RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
+
+		TAILQ_REMOVE(&mp_request_list.list, entry, next);
+		free(entry);
+	} else if (entry->user_req.t == REQ_TYPE_ALLOC &&
+			result == REQ_RESULT_SUCCESS) {
+		struct malloc_heap *heap = entry->alloc_state.heap;
+		struct rte_mp_msg msg;
+		struct malloc_mp_req *resp =
+				(struct malloc_mp_req *)msg.param;
+
+		memset(&msg, 0, sizeof(msg));
+
+		heap->total_size += entry->alloc_state.map_len;
+
+		/* result is success, so just notify secondary about this */
+		resp->t = REQ_TYPE_ALLOC;
+		resp->result = result;
+		resp->id = entry->user_req.id;
+		msg.num_fds = 0;
+		msg.len_param = sizeof(*resp);
+		strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
+
+		if (rte_mp_sendmsg(&msg))
+			RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
+
+		TAILQ_REMOVE(&mp_request_list.list, entry, next);
+		free(entry->alloc_state.ms);
+		free(entry);
+	} else if (entry->user_req.t == REQ_TYPE_ALLOC &&
+			result == REQ_RESULT_FAIL) {
+		struct rte_mp_msg rb_msg;
+		struct malloc_mp_req *rb =
+				(struct malloc_mp_req *)rb_msg.param;
+		struct timespec ts;
+		struct primary_alloc_req_state *state =
+				&entry->alloc_state;
+		int ret;
+
+		memset(&rb_msg, 0, sizeof(rb_msg));
+
+		/* we've failed to sync, so do a rollback */
+		eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+				state->map_addr, state->map_len);
+
+		rollback_expand_heap(state->ms, state->ms_len, state->elem,
+				state->map_addr, state->map_len);
+
+		/* send rollback request */
+		rb_msg.num_fds = 0;
+		rb_msg.len_param = sizeof(*rb);
+		strlcpy(rb_msg.name, MP_ACTION_ROLLBACK, sizeof(rb_msg.name));
+
+		ts.tv_nsec = 0;
+		ts.tv_sec = MP_TIMEOUT_S;
+
+		/* sync requests carry no data */
+		rb->t = REQ_TYPE_SYNC;
+		rb->id = entry->user_req.id;
+
+		/* there may be stray timeout still waiting */
+		do {
+			ret = rte_mp_request_async(&rb_msg, &ts,
+					handle_rollback_response);
+		} while (ret != 0 && rte_errno == EEXIST);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL, "Could not send rollback request to secondary process\n");
+
+			/* we couldn't send rollback request, but that's OK -
+			 * secondary will time out, and memory has been removed
+			 * from heap anyway.
+			 */
+			TAILQ_REMOVE(&mp_request_list.list, entry, next);
+			free(state->ms);
+			free(entry);
+			goto fail;
+		}
+	} else {
+		RTE_LOG(ERR, EAL, " to sync request of unknown type\n");
+		goto fail;
+	}
+
+	pthread_mutex_unlock(&mp_request_list.lock);
+	return 0;
+fail:
+	pthread_mutex_unlock(&mp_request_list.lock);
+	return -1;
+}
+
+static int
+handle_rollback_response(const struct rte_mp_msg *request,
+		const struct rte_mp_reply *reply __rte_unused)
+{
+	struct rte_mp_msg msg;
+	struct malloc_mp_req *resp = (struct malloc_mp_req *)msg.param;
+	const struct malloc_mp_req *mpreq =
+			(const struct malloc_mp_req *)request->param;
+	struct mp_request *entry;
+
+	/* lock the request */
+	pthread_mutex_lock(&mp_request_list.lock);
+
+	memset(&msg, 0, sizeof(msg));
+
+	entry = find_request_by_id(mpreq->id);
+	if (entry == NULL) {
+		RTE_LOG(ERR, EAL, "Wrong request ID\n");
+		goto fail;
+	}
+
+	if (entry->user_req.t != REQ_TYPE_ALLOC) {
+		RTE_LOG(ERR, EAL, "Unexpected active request\n");
+		goto fail;
+	}
+
+	/* we don't care if rollback succeeded, request still failed */
+	resp->t = REQ_TYPE_ALLOC;
+	resp->result = REQ_RESULT_FAIL;
+	resp->id = mpreq->id;
+	msg.num_fds = 0;
+	msg.len_param = sizeof(*resp);
+	strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
+
+	if (rte_mp_sendmsg(&msg))
+		RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
+
+	/* clean up */
+	TAILQ_REMOVE(&mp_request_list.list, entry, next);
+	free(entry->alloc_state.ms);
+	free(entry);
+
+	pthread_mutex_unlock(&mp_request_list.lock);
+	return 0;
+fail:
+	pthread_mutex_unlock(&mp_request_list.lock);
+	return -1;
+}
+
+/* final stage of the request from secondary */
+static int
+handle_response(const struct rte_mp_msg *msg, const void *peer  __rte_unused)
+{
+	const struct malloc_mp_req *m =
+			(const struct malloc_mp_req *)msg->param;
+	struct mp_request *entry;
+
+	pthread_mutex_lock(&mp_request_list.lock);
+
+	entry = find_request_by_id(m->id);
+	if (entry != NULL) {
+		/* update request status */
+		entry->user_req.result = m->result;
+
+		entry->state = REQ_STATE_COMPLETE;
+
+		/* trigger thread wakeup */
+		pthread_cond_signal(&entry->cond);
+	}
+
+	pthread_mutex_unlock(&mp_request_list.lock);
+
+	return 0;
+}
+
+/* synchronously request memory map sync, this is only called whenever primary
+ * process initiates the allocation.
+ */
+int
+request_sync(void)
+{
+	struct rte_mp_msg msg;
+	struct rte_mp_reply reply;
+	struct malloc_mp_req *req = (struct malloc_mp_req *)msg.param;
+	struct timespec ts;
+	int i, ret = -1;
+
+	memset(&msg, 0, sizeof(msg));
+	memset(&reply, 0, sizeof(reply));
+
+	/* no need to create tailq entries as this is entirely synchronous */
+
+	msg.num_fds = 0;
+	msg.len_param = sizeof(*req);
+	strlcpy(msg.name, MP_ACTION_SYNC, sizeof(msg.name));
+
+	/* sync request carries no data */
+	req->t = REQ_TYPE_SYNC;
+	req->id = get_unique_id();
+
+	ts.tv_nsec = 0;
+	ts.tv_sec = MP_TIMEOUT_S;
+
+	/* there may be stray timeout still waiting */
+	do {
+		ret = rte_mp_request_sync(&msg, &reply, &ts);
+	} while (ret != 0 && rte_errno == EEXIST);
+	if (ret != 0) {
+		/* if IPC is unsupported, behave as if the call succeeded */
+		if (rte_errno != ENOTSUP)
+			RTE_LOG(ERR, EAL, "Could not send sync request to secondary process\n");
+		else
+			ret = 0;
+		goto out;
+	}
+
+	if (reply.nb_received != reply.nb_sent) {
+		RTE_LOG(ERR, EAL, "Not all secondaries have responded\n");
+		goto out;
+	}
+
+	for (i = 0; i < reply.nb_received; i++) {
+		struct malloc_mp_req *resp =
+				(struct malloc_mp_req *)reply.msgs[i].param;
+		if (resp->t != REQ_TYPE_SYNC) {
+			RTE_LOG(ERR, EAL, "Unexpected response from secondary\n");
+			goto out;
+		}
+		if (resp->id != req->id) {
+			RTE_LOG(ERR, EAL, "Wrong request ID\n");
+			goto out;
+		}
+		if (resp->result != REQ_RESULT_SUCCESS) {
+			RTE_LOG(ERR, EAL, "Secondary process failed to synchronize\n");
+			goto out;
+		}
+	}
+
+	ret = 0;
+out:
+	free(reply.msgs);
+	return ret;
+}
+
+/* this is a synchronous wrapper around a bunch of asynchronous requests to
+ * primary process. this will initiate a request and wait until responses come.
+ */
+int
+request_to_primary(struct malloc_mp_req *user_req)
+{
+	struct rte_mp_msg msg;
+	struct malloc_mp_req *msg_req = (struct malloc_mp_req *)msg.param;
+	struct mp_request *entry;
+	struct timespec ts;
+	struct timeval now;
+	int ret;
+
+	memset(&msg, 0, sizeof(msg));
+	memset(&ts, 0, sizeof(ts));
+
+	pthread_mutex_lock(&mp_request_list.lock);
+
+	entry = malloc(sizeof(*entry));
+	if (entry == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot allocate memory for request\n");
+		goto fail;
+	}
+
+	memset(entry, 0, sizeof(*entry));
+
+	if (gettimeofday(&now, NULL) < 0) {
+		RTE_LOG(ERR, EAL, "Cannot get current time\n");
+		goto fail;
+	}
+
+	ts.tv_nsec = (now.tv_usec * 1000) % 1000000000;
+	ts.tv_sec = now.tv_sec + MP_TIMEOUT_S +
+			(now.tv_usec * 1000) / 1000000000;
+
+	/* initialize the request */
+	pthread_cond_init(&entry->cond, NULL);
+
+	msg.num_fds = 0;
+	msg.len_param = sizeof(*msg_req);
+	strlcpy(msg.name, MP_ACTION_REQUEST, sizeof(msg.name));
+
+	/* (attempt to) get a unique id */
+	user_req->id = get_unique_id();
+
+	/* copy contents of user request into the message */
+	memcpy(msg_req, user_req, sizeof(*msg_req));
+
+	if (rte_mp_sendmsg(&msg)) {
+		RTE_LOG(ERR, EAL, "Cannot send message to primary\n");
+		goto fail;
+	}
+
+	/* copy contents of user request into active request */
+	memcpy(&entry->user_req, user_req, sizeof(*user_req));
+
+	/* mark request as in progress */
+	entry->state = REQ_STATE_ACTIVE;
+
+	TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
+
+	/* finally, wait on timeout */
+	do {
+		ret = pthread_cond_timedwait(&entry->cond,
+				&mp_request_list.lock, &ts);
+	} while (ret != 0 && ret != ETIMEDOUT);
+
+	if (entry->state != REQ_STATE_COMPLETE) {
+		RTE_LOG(ERR, EAL, "Request timed out\n");
+		ret = -1;
+	} else {
+		ret = 0;
+		user_req->result = entry->user_req.result;
+	}
+	TAILQ_REMOVE(&mp_request_list.list, entry, next);
+	free(entry);
+
+	pthread_mutex_unlock(&mp_request_list.lock);
+	return ret;
+fail:
+	pthread_mutex_unlock(&mp_request_list.lock);
+	free(entry);
+	return -1;
+}
+
+int
+register_mp_requests(void)
+{
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		/* it's OK for primary to not support IPC */
+		if (rte_mp_action_register(MP_ACTION_REQUEST, handle_request) &&
+				rte_errno != ENOTSUP) {
+			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+				MP_ACTION_REQUEST);
+			return -1;
+		}
+	} else {
+		if (rte_mp_action_register(MP_ACTION_SYNC, handle_sync)) {
+			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+				MP_ACTION_SYNC);
+			return -1;
+		}
+		if (rte_mp_action_register(MP_ACTION_ROLLBACK, handle_sync)) {
+			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+				MP_ACTION_SYNC);
+			return -1;
+		}
+		if (rte_mp_action_register(MP_ACTION_RESPONSE,
+				handle_response)) {
+			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+				MP_ACTION_RESPONSE);
+			return -1;
+		}
+	}
+	return 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/malloc_mp.h b/src/spdk/dpdk/lib/librte_eal/common/malloc_mp.h
new file mode 100644
index 000000000..2b86b76f6
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/malloc_mp.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef MALLOC_MP_H
+#define MALLOC_MP_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <rte_common.h>
+#include <rte_random.h>
+#include <rte_spinlock.h>
+#include <rte_tailq.h>
+
+/* forward declarations */
+struct malloc_heap;
+struct rte_memseg;
+
+/* multiprocess synchronization structures for malloc */
+enum malloc_req_type {
+	REQ_TYPE_ALLOC,     /**< ask primary to allocate */
+	REQ_TYPE_FREE,      /**< ask primary to free */
+	REQ_TYPE_SYNC       /**< ask secondary to synchronize its memory map */
+};
+
+enum malloc_req_result {
+	REQ_RESULT_SUCCESS,
+	REQ_RESULT_FAIL
+};
+
+struct malloc_req_alloc {
+	struct malloc_heap *heap;
+	uint64_t page_sz;
+	size_t elt_size;
+	int socket;
+	unsigned int flags;
+	size_t align;
+	size_t bound;
+	bool contig;
+};
+
+struct malloc_req_free {
+	RTE_STD_C11
+	union {
+		void *addr;
+		uint64_t addr_64;
+	};
+	uint64_t len;
+};
+
+struct malloc_mp_req {
+	enum malloc_req_type t;
+	RTE_STD_C11
+	union {
+		struct malloc_req_alloc alloc_req;
+		struct malloc_req_free free_req;
+	};
+	uint64_t id; /**< not to be populated by caller */
+	enum malloc_req_result result;
+};
+
+int
+register_mp_requests(void);
+
+int
+request_to_primary(struct malloc_mp_req *req);
+
+/* synchronous memory map sync request */
+int
+request_sync(void);
+
+/* functions from malloc_heap exposed here */
+int
+malloc_heap_free_pages(void *aligned_start, size_t aligned_len);
+
+struct malloc_elem *
+alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
+		int socket, unsigned int flags, size_t align, size_t bound,
+		bool contig, struct rte_memseg **ms, int n_segs);
+
+void
+rollback_expand_heap(struct rte_memseg **ms, int n_segs,
+		struct malloc_elem *elem, void *map_addr, size_t map_len);
+
+#endif /* MALLOC_MP_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/meson.build b/src/spdk/dpdk/lib/librte_eal/common/meson.build
new file mode 100644
index 000000000..55aaeb18e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/meson.build
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation
+
+includes += include_directories('.')
+
+if is_windows
+	sources += files(
+		'eal_common_bus.c',
+		'eal_common_class.c',
+		'eal_common_devargs.c',
+		'eal_common_errno.c',
+		'eal_common_launch.c',
+		'eal_common_lcore.c',
+		'eal_common_log.c',
+		'eal_common_options.c',
+		'eal_common_thread.c',
+	)
+	subdir_done()
+endif
+
+sources += files(
+	'eal_common_bus.c',
+	'eal_common_cpuflags.c',
+	'eal_common_class.c',
+	'eal_common_devargs.c',
+	'eal_common_dev.c',
+	'eal_common_errno.c',
+	'eal_common_fbarray.c',
+	'eal_common_hexdump.c',
+	'eal_common_hypervisor.c',
+	'eal_common_launch.c',
+	'eal_common_lcore.c',
+	'eal_common_log.c',
+	'eal_common_mcfg.c',
+	'eal_common_memalloc.c',
+	'eal_common_memory.c',
+	'eal_common_memzone.c',
+	'eal_common_options.c',
+	'eal_common_proc.c',
+	'eal_common_string_fns.c',
+	'eal_common_tailqs.c',
+	'eal_common_thread.c',
+	'eal_common_timer.c',
+	'eal_common_trace.c',
+	'eal_common_trace_ctf.c',
+	'eal_common_trace_points.c',
+	'eal_common_trace_utils.c',
+	'eal_common_uuid.c',
+	'hotplug_mp.c',
+	'malloc_elem.c',
+	'malloc_heap.c',
+	'malloc_mp.c',
+	'rte_keepalive.c',
+	'rte_malloc.c',
+	'rte_random.c',
+	'rte_reciprocal.c',
+	'rte_service.c',
+)
diff --git a/src/spdk/dpdk/lib/librte_eal/common/rte_keepalive.c b/src/spdk/dpdk/lib/librte_eal/common/rte_keepalive.c
new file mode 100644
index 000000000..e0494b201
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/rte_keepalive.c
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015-2016 Intel Corporation
+ */
+
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_keepalive.h>
+#include <rte_malloc.h>
+
+struct rte_keepalive {
+	/** Core Liveness. */
+	struct {
+		/*
+		 * Each element must be cache aligned to prevent false sharing.
+		 */
+		enum rte_keepalive_state core_state __rte_cache_aligned;
+	} live_data[RTE_KEEPALIVE_MAXCORES];
+
+	/** Last-seen-alive timestamps */
+	uint64_t last_alive[RTE_KEEPALIVE_MAXCORES];
+
+	/**
+	 * Cores to check.
+	 * Indexed by core id, non-zero if the core should be checked.
+	 */
+	uint8_t active_cores[RTE_KEEPALIVE_MAXCORES];
+
+	/** Dead core handler. */
+	rte_keepalive_failure_callback_t callback;
+
+	/**
+	 * Dead core handler app data.
+	 * Pointer is passed to dead core handler.
+	 */
+	void *callback_data;
+	uint64_t tsc_initial;
+	uint64_t tsc_mhz;
+
+	/** Core state relay handler. */
+	rte_keepalive_relay_callback_t relay_callback;
+
+	/**
+	 * Core state relay handler app data.
+	 * Pointer is passed to live core handler.
+	 */
+	void *relay_callback_data;
+};
+
+static void
+print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core)
+{
+	RTE_LOG(INFO, EAL, "%sLast seen %" PRId64 "ms ago.\n",
+		msg,
+		((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)
+		/ rte_get_tsc_hz()
+	      );
+}
+
+void
+rte_keepalive_dispatch_pings(__rte_unused void *ptr_timer,
+	void *ptr_data)
+{
+	struct rte_keepalive *keepcfg = ptr_data;
+	int idx_core;
+
+	for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
+		if (keepcfg->active_cores[idx_core] == 0)
+			continue;
+
+		switch (keepcfg->live_data[idx_core].core_state) {
+		case RTE_KA_STATE_UNUSED:
+			break;
+		case RTE_KA_STATE_ALIVE: /* Alive */
+			keepcfg->live_data[idx_core].core_state =
+			    RTE_KA_STATE_MISSING;
+			keepcfg->last_alive[idx_core] = rte_rdtsc();
+			break;
+		case RTE_KA_STATE_MISSING: /* MIA */
+			print_trace("Core MIA. ", keepcfg, idx_core);
+			keepcfg->live_data[idx_core].core_state =
+			    RTE_KA_STATE_DEAD;
+			break;
+		case RTE_KA_STATE_DEAD: /* Dead */
+			keepcfg->live_data[idx_core].core_state =
+			    RTE_KA_STATE_GONE;
+			print_trace("Core died. ", keepcfg, idx_core);
+			if (keepcfg->callback)
+				keepcfg->callback(
+					keepcfg->callback_data,
+					idx_core
+					);
+			break;
+		case RTE_KA_STATE_GONE: /* Buried */
+			break;
+		case RTE_KA_STATE_DOZING: /* Core going idle */
+			keepcfg->live_data[idx_core].core_state =
+			    RTE_KA_STATE_SLEEP;
+			keepcfg->last_alive[idx_core] = rte_rdtsc();
+			break;
+		case RTE_KA_STATE_SLEEP: /* Idled core */
+			break;
+		}
+		if (keepcfg->relay_callback)
+			keepcfg->relay_callback(
+				keepcfg->relay_callback_data,
+				idx_core,
+				keepcfg->live_data[idx_core].core_state,
+				keepcfg->last_alive[idx_core]
+				);
+	}
+}
+
+struct rte_keepalive *
+rte_keepalive_create(rte_keepalive_failure_callback_t callback,
+	void *data)
+{
+	struct rte_keepalive *keepcfg;
+
+	keepcfg = rte_zmalloc("RTE_EAL_KEEPALIVE",
+		sizeof(struct rte_keepalive),
+		RTE_CACHE_LINE_SIZE);
+	if (keepcfg != NULL) {
+		keepcfg->callback = callback;
+		keepcfg->callback_data = data;
+		keepcfg->tsc_initial = rte_rdtsc();
+		keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000;
+	}
+	return keepcfg;
+}
+
+void rte_keepalive_register_relay_callback(struct rte_keepalive *keepcfg,
+	rte_keepalive_relay_callback_t callback,
+	void *data)
+{
+	keepcfg->relay_callback = callback;
+	keepcfg->relay_callback_data = data;
+}
+
+void
+rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core)
+{
+	if (id_core < RTE_KEEPALIVE_MAXCORES) {
+		keepcfg->active_cores[id_core] = RTE_KA_STATE_ALIVE;
+		keepcfg->last_alive[id_core] = rte_rdtsc();
+	}
+}
+
+void
+rte_keepalive_mark_alive(struct rte_keepalive *keepcfg)
+{
+	keepcfg->live_data[rte_lcore_id()].core_state = RTE_KA_STATE_ALIVE;
+}
+
+void
+rte_keepalive_mark_sleep(struct rte_keepalive *keepcfg)
+{
+	keepcfg->live_data[rte_lcore_id()].core_state = RTE_KA_STATE_DOZING;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/rte_malloc.c b/src/spdk/dpdk/lib/librte_eal/common/rte_malloc.c
new file mode 100644
index 000000000..f1b73168b
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/rte_malloc.c
@@ -0,0 +1,668 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2019 Intel Corporation
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_errno.h>
+#include <rte_memcpy.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_branch_prediction.h>
+#include <rte_debug.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_spinlock.h>
+#include <rte_eal_trace.h>
+
+#include <rte_malloc.h>
+#include "malloc_elem.h"
+#include "malloc_heap.h"
+#include "eal_memalloc.h"
+#include "eal_memcfg.h"
+#include "eal_private.h"
+
+
+/* Free the memory space back to heap */
+static void
+mem_free(void *addr, const bool trace_ena)
+{
+	if (trace_ena)
+		rte_eal_trace_mem_free(addr);
+
+	if (addr == NULL) return;
+	if (malloc_heap_free(malloc_elem_from_data(addr)) < 0)
+		RTE_LOG(ERR, EAL, "Error: Invalid memory\n");
+}
+
+void
+rte_free(void *addr)
+{
+	return mem_free(addr, true);
+}
+
+void
+eal_free_no_trace(void *addr)
+{
+	return mem_free(addr, false);
+}
+
+static void *
+malloc_socket(const char *type, size_t size, unsigned int align,
+		int socket_arg, const bool trace_ena)
+{
+	void *ptr;
+
+	/* return NULL if size is 0 or alignment is not power-of-2 */
+	if (size == 0 || (align && !rte_is_power_of_2(align)))
+		return NULL;
+
+	/* if there are no hugepages and if we are not allocating from an
+	 * external heap, use memory from any socket available. checking for
+	 * socket being external may return -1 in case of invalid socket, but
+	 * that's OK - if there are no hugepages, it doesn't matter.
+	 */
+	if (rte_malloc_heap_socket_is_external(socket_arg) != 1 &&
+				!rte_eal_has_hugepages())
+		socket_arg = SOCKET_ID_ANY;
+
+	ptr = malloc_heap_alloc(type, size, socket_arg, 0,
+			align == 0 ? 1 : align, 0, false);
+
+	if (trace_ena)
+		rte_eal_trace_mem_malloc(type, size, align, socket_arg, ptr);
+	return ptr;
+}
+
+/*
+ * Allocate memory on specified heap.
+ */
+void *
+rte_malloc_socket(const char *type, size_t size, unsigned int align,
+		int socket_arg)
+{
+	return malloc_socket(type, size, align, socket_arg, true);
+}
+
+void *
+eal_malloc_no_trace(const char *type, size_t size, unsigned int align)
+{
+	return malloc_socket(type, size, align, SOCKET_ID_ANY, false);
+}
+
+/*
+ * Allocate memory on default heap.
+ */
+void *
+rte_malloc(const char *type, size_t size, unsigned align)
+{
+	return rte_malloc_socket(type, size, align, SOCKET_ID_ANY);
+}
+
+/*
+ * Allocate zero'd memory on specified heap.
+ */
+void *
+rte_zmalloc_socket(const char *type, size_t size, unsigned align, int socket)
+{
+	void *ptr = rte_malloc_socket(type, size, align, socket);
+
+#ifdef RTE_MALLOC_DEBUG
+	/*
+	 * If DEBUG is enabled, then freed memory is marked with poison
+	 * value and set to zero on allocation.
+	 * If DEBUG is not enabled then  memory is already zeroed.
+	 */
+	if (ptr != NULL)
+		memset(ptr, 0, size);
+#endif
+
+	rte_eal_trace_mem_zmalloc(type, size, align, socket, ptr);
+	return ptr;
+}
+
+/*
+ * Allocate zero'd memory on default heap.
+ */
+void *
+rte_zmalloc(const char *type, size_t size, unsigned align)
+{
+	return rte_zmalloc_socket(type, size, align, SOCKET_ID_ANY);
+}
+
+/*
+ * Allocate zero'd memory on specified heap.
+ */
+void *
+rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int socket)
+{
+	return rte_zmalloc_socket(type, num * size, align, socket);
+}
+
+/*
+ * Allocate zero'd memory on default heap.
+ */
+void *
+rte_calloc(const char *type, size_t num, size_t size, unsigned align)
+{
+	return rte_zmalloc(type, num * size, align);
+}
+
+/*
+ * Resize allocated memory on specified heap.
+ */
+void *
+rte_realloc_socket(void *ptr, size_t size, unsigned int align, int socket)
+{
+	if (ptr == NULL)
+		return rte_malloc_socket(NULL, size, align, socket);
+
+	struct malloc_elem *elem = malloc_elem_from_data(ptr);
+	if (elem == NULL) {
+		RTE_LOG(ERR, EAL, "Error: memory corruption detected\n");
+		return NULL;
+	}
+
+	size = RTE_CACHE_LINE_ROUNDUP(size), align = RTE_CACHE_LINE_ROUNDUP(align);
+
+	/* check requested socket id and alignment matches first, and if ok,
+	 * see if we can resize block
+	 */
+	if ((socket == SOCKET_ID_ANY ||
+	     (unsigned int)socket == elem->heap->socket_id) &&
+			RTE_PTR_ALIGN(ptr, align) == ptr &&
+			malloc_heap_resize(elem, size) == 0) {
+		rte_eal_trace_mem_realloc(size, align, socket, ptr);
+		return ptr;
+	}
+
+	/* either requested socket id doesn't match, alignment is off
+	 * or we have no room to expand,
+	 * so move the data.
+	 */
+	void *new_ptr = rte_malloc_socket(NULL, size, align, socket);
+	if (new_ptr == NULL)
+		return NULL;
+	/* elem: |pad|data_elem|data|trailer| */
+	const size_t old_size = elem->size - elem->pad - MALLOC_ELEM_OVERHEAD;
+	rte_memcpy(new_ptr, ptr, old_size < size ? old_size : size);
+	rte_free(ptr);
+
+	rte_eal_trace_mem_realloc(size, align, socket, new_ptr);
+	return new_ptr;
+}
+
+/*
+ * Resize allocated memory.
+ */
+void *
+rte_realloc(void *ptr, size_t size, unsigned int align)
+{
+	return rte_realloc_socket(ptr, size, align, SOCKET_ID_ANY);
+}
+
+int
+rte_malloc_validate(const void *ptr, size_t *size)
+{
+	const struct malloc_elem *elem = malloc_elem_from_data(ptr);
+	if (!malloc_elem_cookies_ok(elem))
+		return -1;
+	if (size != NULL)
+		*size = elem->size - elem->pad - MALLOC_ELEM_OVERHEAD;
+	return 0;
+}
+
+/*
+ * Function to retrieve data for heap on given socket
+ */
+int
+rte_malloc_get_socket_stats(int socket,
+		struct rte_malloc_socket_stats *socket_stats)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int heap_idx;
+
+	heap_idx = malloc_socket_to_heap_id(socket);
+	if (heap_idx < 0)
+		return -1;
+
+	return malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx],
+			socket_stats);
+}
+
+/*
+ * Function to dump contents of all heaps
+ */
+void
+rte_malloc_dump_heaps(FILE *f)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	unsigned int idx;
+
+	for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+		fprintf(f, "Heap id: %u\n", idx);
+		malloc_heap_dump(&mcfg->malloc_heaps[idx], f);
+	}
+}
+
+int
+rte_malloc_heap_get_socket(const char *name)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct malloc_heap *heap = NULL;
+	unsigned int idx;
+	int ret;
+
+	if (name == NULL ||
+			strnlen(name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+			strnlen(name, RTE_HEAP_NAME_MAX_LEN) ==
+				RTE_HEAP_NAME_MAX_LEN) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_mcfg_mem_read_lock();
+	for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+		struct malloc_heap *tmp = &mcfg->malloc_heaps[idx];
+
+		if (!strncmp(name, tmp->name, RTE_HEAP_NAME_MAX_LEN)) {
+			heap = tmp;
+			break;
+		}
+	}
+
+	if (heap != NULL) {
+		ret = heap->socket_id;
+	} else {
+		rte_errno = ENOENT;
+		ret = -1;
+	}
+	rte_mcfg_mem_read_unlock();
+
+	return ret;
+}
+
+int
+rte_malloc_heap_socket_is_external(int socket_id)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	unsigned int idx;
+	int ret = -1;
+
+	if (socket_id == SOCKET_ID_ANY)
+		return 0;
+
+	rte_mcfg_mem_read_lock();
+	for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+		struct malloc_heap *tmp = &mcfg->malloc_heaps[idx];
+
+		if ((int)tmp->socket_id == socket_id) {
+			/* external memory always has large socket ID's */
+			ret = tmp->socket_id >= RTE_MAX_NUMA_NODES;
+			break;
+		}
+	}
+	rte_mcfg_mem_read_unlock();
+
+	return ret;
+}
+
+/*
+ * Print stats on memory type. If type is NULL, info on all types is printed
+ */
+void
+rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	unsigned int heap_id;
+	struct rte_malloc_socket_stats sock_stats;
+
+	/* Iterate through all initialised heaps */
+	for (heap_id = 0; heap_id < RTE_MAX_HEAPS; heap_id++) {
+		struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
+
+		malloc_heap_get_stats(heap, &sock_stats);
+
+		fprintf(f, "Heap id:%u\n", heap_id);
+		fprintf(f, "\tHeap name:%s\n", heap->name);
+		fprintf(f, "\tHeap_size:%zu,\n", sock_stats.heap_totalsz_bytes);
+		fprintf(f, "\tFree_size:%zu,\n", sock_stats.heap_freesz_bytes);
+		fprintf(f, "\tAlloc_size:%zu,\n", sock_stats.heap_allocsz_bytes);
+		fprintf(f, "\tGreatest_free_size:%zu,\n",
+				sock_stats.greatest_free_size);
+		fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count);
+		fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count);
+	}
+	return;
+}
+
+/*
+ * TODO: Set limit to memory that can be allocated to memory type
+ */
+int
+rte_malloc_set_limit(__rte_unused const char *type,
+		__rte_unused size_t max)
+{
+	return 0;
+}
+
+/*
+ * Return the IO address of a virtual address obtained through rte_malloc
+ */
+rte_iova_t
+rte_malloc_virt2iova(const void *addr)
+{
+	const struct rte_memseg *ms;
+	struct malloc_elem *elem = malloc_elem_from_data(addr);
+
+	if (elem == NULL)
+		return RTE_BAD_IOVA;
+
+	if (!elem->msl->external && rte_eal_iova_mode() == RTE_IOVA_VA)
+		return (uintptr_t) addr;
+
+	ms = rte_mem_virt2memseg(addr, elem->msl);
+	if (ms == NULL)
+		return RTE_BAD_IOVA;
+
+	if (ms->iova == RTE_BAD_IOVA)
+		return RTE_BAD_IOVA;
+
+	return ms->iova + RTE_PTR_DIFF(addr, ms->addr);
+}
+
+static struct malloc_heap *
+find_named_heap(const char *name)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	unsigned int i;
+
+	for (i = 0; i < RTE_MAX_HEAPS; i++) {
+		struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+
+		if (!strncmp(name, heap->name, RTE_HEAP_NAME_MAX_LEN))
+			return heap;
+	}
+	return NULL;
+}
+
+int
+rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len,
+		rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz)
+{
+	struct malloc_heap *heap = NULL;
+	struct rte_memseg_list *msl;
+	unsigned int n;
+	int ret;
+
+	if (heap_name == NULL || va_addr == NULL ||
+			page_sz == 0 || !rte_is_power_of_2(page_sz) ||
+			RTE_ALIGN(len, page_sz) != len ||
+			!rte_is_aligned(va_addr, page_sz) ||
+			((len / page_sz) != n_pages && iova_addrs != NULL) ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+				RTE_HEAP_NAME_MAX_LEN) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_mcfg_mem_write_lock();
+
+	/* find our heap */
+	heap = find_named_heap(heap_name);
+	if (heap == NULL) {
+		rte_errno = ENOENT;
+		ret = -1;
+		goto unlock;
+	}
+	if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+		/* cannot add memory to internal heaps */
+		rte_errno = EPERM;
+		ret = -1;
+		goto unlock;
+	}
+	n = len / page_sz;
+
+	msl = malloc_heap_create_external_seg(va_addr, iova_addrs, n, page_sz,
+			heap_name, heap->socket_id);
+	if (msl == NULL) {
+		ret = -1;
+		goto unlock;
+	}
+
+	rte_spinlock_lock(&heap->lock);
+	ret = malloc_heap_add_external_memory(heap, msl);
+	msl->heap = 1; /* mark it as heap segment */
+	rte_spinlock_unlock(&heap->lock);
+
+unlock:
+	rte_mcfg_mem_write_unlock();
+
+	return ret;
+}
+
+int
+rte_malloc_heap_memory_remove(const char *heap_name, void *va_addr, size_t len)
+{
+	struct malloc_heap *heap = NULL;
+	struct rte_memseg_list *msl;
+	int ret;
+
+	if (heap_name == NULL || va_addr == NULL || len == 0 ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+				RTE_HEAP_NAME_MAX_LEN) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_mcfg_mem_write_lock();
+	/* find our heap */
+	heap = find_named_heap(heap_name);
+	if (heap == NULL) {
+		rte_errno = ENOENT;
+		ret = -1;
+		goto unlock;
+	}
+	if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+		/* cannot remove memory from internal heaps */
+		rte_errno = EPERM;
+		ret = -1;
+		goto unlock;
+	}
+
+	msl = malloc_heap_find_external_seg(va_addr, len);
+	if (msl == NULL) {
+		ret = -1;
+		goto unlock;
+	}
+
+	rte_spinlock_lock(&heap->lock);
+	ret = malloc_heap_remove_external_memory(heap, va_addr, len);
+	rte_spinlock_unlock(&heap->lock);
+	if (ret != 0)
+		goto unlock;
+
+	ret = malloc_heap_destroy_external_seg(msl);
+
+unlock:
+	rte_mcfg_mem_write_unlock();
+
+	return ret;
+}
+
+static int
+sync_memory(const char *heap_name, void *va_addr, size_t len, bool attach)
+{
+	struct malloc_heap *heap = NULL;
+	struct rte_memseg_list *msl;
+	int ret;
+
+	if (heap_name == NULL || va_addr == NULL || len == 0 ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+				RTE_HEAP_NAME_MAX_LEN) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_mcfg_mem_read_lock();
+
+	/* find our heap */
+	heap = find_named_heap(heap_name);
+	if (heap == NULL) {
+		rte_errno = ENOENT;
+		ret = -1;
+		goto unlock;
+	}
+	/* we shouldn't be able to sync to internal heaps */
+	if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+		rte_errno = EPERM;
+		ret = -1;
+		goto unlock;
+	}
+
+	/* find corresponding memseg list to sync to */
+	msl = malloc_heap_find_external_seg(va_addr, len);
+	if (msl == NULL) {
+		ret = -1;
+		goto unlock;
+	}
+
+	if (attach) {
+		ret = rte_fbarray_attach(&msl->memseg_arr);
+		if (ret == 0) {
+			/* notify all subscribers that a new memory area was
+			 * added.
+			 */
+			eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
+					va_addr, len);
+		} else {
+			ret = -1;
+			goto unlock;
+		}
+	} else {
+		/* notify all subscribers that a memory area is about to
+		 * be removed.
+		 */
+		eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+				msl->base_va, msl->len);
+		ret = rte_fbarray_detach(&msl->memseg_arr);
+		if (ret < 0) {
+			ret = -1;
+			goto unlock;
+		}
+	}
+unlock:
+	rte_mcfg_mem_read_unlock();
+	return ret;
+}
+
+int
+rte_malloc_heap_memory_attach(const char *heap_name, void *va_addr, size_t len)
+{
+	return sync_memory(heap_name, va_addr, len, true);
+}
+
+int
+rte_malloc_heap_memory_detach(const char *heap_name, void *va_addr, size_t len)
+{
+	return sync_memory(heap_name, va_addr, len, false);
+}
+
+int
+rte_malloc_heap_create(const char *heap_name)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct malloc_heap *heap = NULL;
+	int i, ret;
+
+	if (heap_name == NULL ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+				RTE_HEAP_NAME_MAX_LEN) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	/* check if there is space in the heap list, or if heap with this name
+	 * already exists.
+	 */
+	rte_mcfg_mem_write_lock();
+
+	for (i = 0; i < RTE_MAX_HEAPS; i++) {
+		struct malloc_heap *tmp = &mcfg->malloc_heaps[i];
+		/* existing heap */
+		if (strncmp(heap_name, tmp->name,
+				RTE_HEAP_NAME_MAX_LEN) == 0) {
+			RTE_LOG(ERR, EAL, "Heap %s already exists\n",
+				heap_name);
+			rte_errno = EEXIST;
+			ret = -1;
+			goto unlock;
+		}
+		/* empty heap */
+		if (strnlen(tmp->name, RTE_HEAP_NAME_MAX_LEN) == 0) {
+			heap = tmp;
+			break;
+		}
+	}
+	if (heap == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot create new heap: no space\n");
+		rte_errno = ENOSPC;
+		ret = -1;
+		goto unlock;
+	}
+
+	/* we're sure that we can create a new heap, so do it */
+	ret = malloc_heap_create(heap, heap_name);
+unlock:
+	rte_mcfg_mem_write_unlock();
+
+	return ret;
+}
+
+int
+rte_malloc_heap_destroy(const char *heap_name)
+{
+	struct malloc_heap *heap = NULL;
+	int ret;
+
+	if (heap_name == NULL ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+				RTE_HEAP_NAME_MAX_LEN) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_mcfg_mem_write_lock();
+
+	/* start from non-socket heaps */
+	heap = find_named_heap(heap_name);
+	if (heap == NULL) {
+		RTE_LOG(ERR, EAL, "Heap %s not found\n", heap_name);
+		rte_errno = ENOENT;
+		ret = -1;
+		goto unlock;
+	}
+	/* we shouldn't be able to destroy internal heaps */
+	if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+		rte_errno = EPERM;
+		ret = -1;
+		goto unlock;
+	}
+	/* sanity checks done, now we can destroy the heap */
+	rte_spinlock_lock(&heap->lock);
+	ret = malloc_heap_destroy(heap);
+
+	/* if we failed, lock is still active */
+	if (ret < 0)
+		rte_spinlock_unlock(&heap->lock);
+unlock:
+	rte_mcfg_mem_write_unlock();
+
+	return ret;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/rte_random.c b/src/spdk/dpdk/lib/librte_eal/common/rte_random.c
new file mode 100644
index 000000000..b7a089ac4
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/rte_random.c
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Ericsson AB
+ */
+
+#ifdef RTE_MACHINE_CPUFLAG_RDSEED
+#include <x86intrin.h>
+#endif
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <rte_branch_prediction.h>
+#include <rte_cycles.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_random.h>
+
+struct rte_rand_state {
+	uint64_t z1;
+	uint64_t z2;
+	uint64_t z3;
+	uint64_t z4;
+	uint64_t z5;
+} __rte_cache_aligned;
+
+static struct rte_rand_state rand_states[RTE_MAX_LCORE];
+
+static uint32_t
+__rte_rand_lcg32(uint32_t *seed)
+{
+	*seed = 1103515245U * *seed + 12345U;
+
+	return *seed;
+}
+
+static uint64_t
+__rte_rand_lcg64(uint32_t *seed)
+{
+	uint64_t low;
+	uint64_t high;
+
+	/* A 64-bit LCG would have been much cleaner, but good
+	 * multiplier/increments for such seem hard to come by.
+	 */
+
+	low = __rte_rand_lcg32(seed);
+	high = __rte_rand_lcg32(seed);
+
+	return low | (high << 32);
+}
+
+static uint64_t
+__rte_rand_lfsr258_gen_seed(uint32_t *seed, uint64_t min_value)
+{
+	uint64_t res;
+
+	res = __rte_rand_lcg64(seed);
+
+	if (res < min_value)
+		res += min_value;
+
+	return res;
+}
+
+static void
+__rte_srand_lfsr258(uint64_t seed, struct rte_rand_state *state)
+{
+	uint32_t lcg_seed;
+
+	lcg_seed = (uint32_t)(seed ^ (seed >> 32));
+
+	state->z1 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 2UL);
+	state->z2 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 512UL);
+	state->z3 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 4096UL);
+	state->z4 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 131072UL);
+	state->z5 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 8388608UL);
+}
+
+void
+rte_srand(uint64_t seed)
+{
+	unsigned int lcore_id;
+
+	/* add lcore_id to seed to avoid having the same sequence */
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+		__rte_srand_lfsr258(seed + lcore_id, &rand_states[lcore_id]);
+}
+
+static __rte_always_inline uint64_t
+__rte_rand_lfsr258_comp(uint64_t z, uint64_t a, uint64_t b, uint64_t c,
+			uint64_t d)
+{
+	return ((z & c) << d) ^ (((z << a) ^ z) >> b);
+}
+
+/* Based on L’Ecuyer, P.: Tables of maximally equidistributed combined
+ * LFSR generators.
+ */
+
+static __rte_always_inline uint64_t
+__rte_rand_lfsr258(struct rte_rand_state *state)
+{
+	state->z1 = __rte_rand_lfsr258_comp(state->z1, 1UL, 53UL,
+					    18446744073709551614UL, 10UL);
+	state->z2 = __rte_rand_lfsr258_comp(state->z2, 24UL, 50UL,
+					    18446744073709551104UL, 5UL);
+	state->z3 = __rte_rand_lfsr258_comp(state->z3, 3UL, 23UL,
+					    18446744073709547520UL, 29UL);
+	state->z4 = __rte_rand_lfsr258_comp(state->z4, 5UL, 24UL,
+					    18446744073709420544UL, 23UL);
+	state->z5 = __rte_rand_lfsr258_comp(state->z5, 3UL, 33UL,
+					    18446744073701163008UL, 8UL);
+
+	return state->z1 ^ state->z2 ^ state->z3 ^ state->z4 ^ state->z5;
+}
+
+static __rte_always_inline
+struct rte_rand_state *__rte_rand_get_state(void)
+{
+	unsigned int lcore_id;
+
+	lcore_id = rte_lcore_id();
+
+	if (unlikely(lcore_id == LCORE_ID_ANY))
+		lcore_id = rte_get_master_lcore();
+
+	return &rand_states[lcore_id];
+}
+
+uint64_t
+rte_rand(void)
+{
+	struct rte_rand_state *state;
+
+	state = __rte_rand_get_state();
+
+	return __rte_rand_lfsr258(state);
+}
+
+uint64_t
+rte_rand_max(uint64_t upper_bound)
+{
+	struct rte_rand_state *state;
+	uint8_t ones;
+	uint8_t leading_zeros;
+	uint64_t mask = ~((uint64_t)0);
+	uint64_t res;
+
+	if (unlikely(upper_bound < 2))
+		return 0;
+
+	state = __rte_rand_get_state();
+
+	ones = __builtin_popcountll(upper_bound);
+
+	/* Handle power-of-2 upper_bound as a special case, since it
+	 * has no bias issues.
+	 */
+	if (unlikely(ones == 1))
+		return __rte_rand_lfsr258(state) & (upper_bound - 1);
+
+	/* The approach to avoiding bias is to create a mask that
+	 * stretches beyond the request value range, and up to the
+	 * next power-of-2. In case the masked generated random value
+	 * is equal to or greater than the upper bound, just discard
+	 * the value and generate a new one.
+	 */
+
+	leading_zeros = __builtin_clzll(upper_bound);
+	mask >>= leading_zeros;
+
+	do {
+		res = __rte_rand_lfsr258(state) & mask;
+	} while (unlikely(res >= upper_bound));
+
+	return res;
+}
+
+static uint64_t
+__rte_random_initial_seed(void)
+{
+#ifdef RTE_LIBEAL_USE_GETENTROPY
+	int ge_rc;
+	uint64_t ge_seed;
+
+	ge_rc = getentropy(&ge_seed, sizeof(ge_seed));
+
+	if (ge_rc == 0)
+		return ge_seed;
+#endif
+#ifdef RTE_MACHINE_CPUFLAG_RDSEED
+	unsigned int rdseed_low;
+	unsigned int rdseed_high;
+
+	/* first fallback: rdseed instruction, if available */
+	if (_rdseed32_step(&rdseed_low) == 1 &&
+	    _rdseed32_step(&rdseed_high) == 1)
+		return (uint64_t)rdseed_low | ((uint64_t)rdseed_high << 32);
+#endif
+	/* second fallback: seed using rdtsc */
+	return rte_get_tsc_cycles();
+}
+
+RTE_INIT(rte_rand_init)
+{
+	uint64_t seed;
+
+	seed = __rte_random_initial_seed();
+
+	rte_srand(seed);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/rte_reciprocal.c b/src/spdk/dpdk/lib/librte_eal/common/rte_reciprocal.c
new file mode 100644
index 000000000..42dfa44eb
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/rte_reciprocal.c
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Cavium, Inc
+ * Copyright(c) Hannes Frederic Sowa
+ * All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+
+#include <rte_common.h>
+
+#include "rte_reciprocal.h"
+
+struct rte_reciprocal rte_reciprocal_value(uint32_t d)
+{
+	struct rte_reciprocal R;
+	uint64_t m;
+	int l;
+
+	l = rte_fls_u32(d - 1);
+	m = ((1ULL << 32) * ((1ULL << l) - d));
+	m /= d;
+
+	++m;
+	R.m = m;
+	R.sh1 = RTE_MIN(l, 1);
+	R.sh2 = RTE_MAX(l - 1, 0);
+
+	return R;
+}
+
+/*
+ * Code taken from Hacker's Delight:
+ * http://www.hackersdelight.org/hdcodetxt/divlu.c.txt
+ * License permits inclusion here per:
+ * http://www.hackersdelight.org/permissions.htm
+ */
+static uint64_t
+divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r)
+{
+	const uint64_t b = (1ULL << 32); /* Number base (16 bits). */
+	uint64_t un1, un0,           /* Norm. dividend LSD's. */
+		 vn1, vn0,           /* Norm. divisor digits. */
+		 q1, q0,             /* Quotient digits. */
+		 un64, un21, un10,   /* Dividend digit pairs. */
+		 rhat;               /* A remainder. */
+	int s;                       /* Shift amount for norm. */
+
+	/* If overflow, set rem. to an impossible value. */
+	if (u1 >= v) {
+		if (r != NULL)
+			*r = (uint64_t) -1;
+		return (uint64_t) -1;
+	}
+
+	/* Count leading zeros. */
+	s = __builtin_clzll(v);
+	if (s > 0) {
+		v = v << s;
+		un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
+		un10 = u0 << s;
+	} else {
+
+		un64 = u1 | u0;
+		un10 = u0;
+	}
+
+	vn1 = v >> 32;
+	vn0 = v & 0xFFFFFFFF;
+
+	un1 = un10 >> 32;
+	un0 = un10 & 0xFFFFFFFF;
+
+	q1 = un64/vn1;
+	rhat = un64 - q1*vn1;
+again1:
+	if (q1 >= b || q1*vn0 > b*rhat + un1) {
+		q1 = q1 - 1;
+		rhat = rhat + vn1;
+		if (rhat < b)
+			goto again1;
+	}
+
+	un21 = un64*b + un1 - q1*v;
+
+	q0 = un21/vn1;
+	rhat = un21 - q0*vn1;
+again2:
+	if (q0 >= b || q0*vn0 > b*rhat + un0) {
+		q0 = q0 - 1;
+		rhat = rhat + vn1;
+		if (rhat < b)
+			goto again2;
+	}
+
+	if (r != NULL)
+		*r = (un21*b + un0 - q0*v) >> s;
+	return q1*b + q0;
+}
+
+struct rte_reciprocal_u64
+rte_reciprocal_value_u64(uint64_t d)
+{
+	struct rte_reciprocal_u64 R;
+	uint64_t m;
+	uint64_t r;
+	int l;
+
+	l = 63 - __builtin_clzll(d);
+
+	m = divide_128_div_64_to_64((1ULL << l), 0, d, &r) << 1;
+	if (r << 1 < r || r << 1 >= d)
+		m++;
+	m = (1ULL << l) - d ? m + 1 : 1;
+	R.m = m;
+
+	R.sh1 = l > 1 ? 1 : l;
+	R.sh2 = (l > 0) ? l : 0;
+	R.sh2 -= R.sh2 && (m == 1) ? 1 : 0;
+
+	return R;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/rte_service.c b/src/spdk/dpdk/lib/librte_eal/common/rte_service.c
new file mode 100644
index 000000000..6123a2124
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/rte_service.c
@@ -0,0 +1,919 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <string.h>
+
+#include <rte_compat.h>
+#include <rte_service.h>
+#include <rte_service_component.h>
+
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_cycles.h>
+#include <rte_atomic.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_spinlock.h>
+
+#include "eal_private.h"
+
+#define RTE_SERVICE_NUM_MAX 64
+
+#define SERVICE_F_REGISTERED    (1 << 0)
+#define SERVICE_F_STATS_ENABLED (1 << 1)
+#define SERVICE_F_START_CHECK   (1 << 2)
+
+/* runstates for services and lcores, denoting if they are active or not */
+#define RUNSTATE_STOPPED 0
+#define RUNSTATE_RUNNING 1
+
+/* internal representation of a service */
+struct rte_service_spec_impl {
+	/* public part of the struct */
+	struct rte_service_spec spec;
+
+	/* spin lock that when set indicates a service core is currently
+	 * running this service callback. When not set, a core may take the
+	 * lock and then run the service callback.
+	 */
+	rte_spinlock_t execute_lock;
+
+	/* API set/get-able variables */
+	int8_t app_runstate;
+	int8_t comp_runstate;
+	uint8_t internal_flags;
+
+	/* per service statistics */
+	/* Indicates how many cores the service is mapped to run on.
+	 * It does not indicate the number of cores the service is running
+	 * on currently.
+	 */
+	uint32_t num_mapped_cores;
+	uint64_t calls;
+	uint64_t cycles_spent;
+} __rte_cache_aligned;
+
+/* the internal values of a service core */
+struct core_state {
+	/* map of services IDs are run on this core */
+	uint64_t service_mask;
+	uint8_t runstate; /* running or stopped */
+	uint8_t is_service_core; /* set if core is currently a service core */
+	uint8_t service_active_on_lcore[RTE_SERVICE_NUM_MAX];
+	uint64_t loops;
+	uint64_t calls_per_service[RTE_SERVICE_NUM_MAX];
+} __rte_cache_aligned;
+
+static uint32_t rte_service_count;
+static struct rte_service_spec_impl *rte_services;
+static struct core_state *lcore_states;
+static uint32_t rte_service_library_initialized;
+
+int32_t
+rte_service_init(void)
+{
+	if (rte_service_library_initialized) {
+		RTE_LOG(NOTICE, EAL,
+			"service library init() called, init flag %d\n",
+			rte_service_library_initialized);
+		return -EALREADY;
+	}
+
+	rte_services = rte_calloc("rte_services", RTE_SERVICE_NUM_MAX,
+			sizeof(struct rte_service_spec_impl),
+			RTE_CACHE_LINE_SIZE);
+	if (!rte_services) {
+		RTE_LOG(ERR, EAL, "error allocating rte services array\n");
+		goto fail_mem;
+	}
+
+	lcore_states = rte_calloc("rte_service_core_states", RTE_MAX_LCORE,
+			sizeof(struct core_state), RTE_CACHE_LINE_SIZE);
+	if (!lcore_states) {
+		RTE_LOG(ERR, EAL, "error allocating core states array\n");
+		goto fail_mem;
+	}
+
+	int i;
+	int count = 0;
+	struct rte_config *cfg = rte_eal_get_configuration();
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		if (lcore_config[i].core_role == ROLE_SERVICE) {
+			if ((unsigned int)i == cfg->master_lcore)
+				continue;
+			rte_service_lcore_add(i);
+			count++;
+		}
+	}
+
+	rte_service_library_initialized = 1;
+	return 0;
+fail_mem:
+	rte_free(rte_services);
+	rte_free(lcore_states);
+	return -ENOMEM;
+}
+
+void
+rte_service_finalize(void)
+{
+	if (!rte_service_library_initialized)
+		return;
+
+	rte_service_lcore_reset_all();
+	rte_eal_mp_wait_lcore();
+
+	rte_free(rte_services);
+	rte_free(lcore_states);
+
+	rte_service_library_initialized = 0;
+}
+
+/* returns 1 if service is registered and has not been unregistered
+ * Returns 0 if service never registered, or has been unregistered
+ */
+static inline int
+service_valid(uint32_t id)
+{
+	return !!(rte_services[id].internal_flags & SERVICE_F_REGISTERED);
+}
+
+static struct rte_service_spec_impl *
+service_get(uint32_t id)
+{
+	return &rte_services[id];
+}
+
+/* validate ID and retrieve service pointer, or return error value */
+#define SERVICE_VALID_GET_OR_ERR_RET(id, service, retval) do {          \
+	if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id))            \
+		return retval;                                          \
+	service = &rte_services[id];                                    \
+} while (0)
+
+/* returns 1 if statistics should be collected for service
+ * Returns 0 if statistics should not be collected for service
+ */
+static inline int
+service_stats_enabled(struct rte_service_spec_impl *impl)
+{
+	return !!(impl->internal_flags & SERVICE_F_STATS_ENABLED);
+}
+
+static inline int
+service_mt_safe(struct rte_service_spec_impl *s)
+{
+	return !!(s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE);
+}
+
+int32_t
+rte_service_set_stats_enable(uint32_t id, int32_t enabled)
+{
+	struct rte_service_spec_impl *s;
+	SERVICE_VALID_GET_OR_ERR_RET(id, s, 0);
+
+	if (enabled)
+		s->internal_flags |= SERVICE_F_STATS_ENABLED;
+	else
+		s->internal_flags &= ~(SERVICE_F_STATS_ENABLED);
+
+	return 0;
+}
+
+int32_t
+rte_service_set_runstate_mapped_check(uint32_t id, int32_t enabled)
+{
+	struct rte_service_spec_impl *s;
+	SERVICE_VALID_GET_OR_ERR_RET(id, s, 0);
+
+	if (enabled)
+		s->internal_flags |= SERVICE_F_START_CHECK;
+	else
+		s->internal_flags &= ~(SERVICE_F_START_CHECK);
+
+	return 0;
+}
+
+uint32_t
+rte_service_get_count(void)
+{
+	return rte_service_count;
+}
+
+int32_t
+rte_service_get_by_name(const char *name, uint32_t *service_id)
+{
+	if (!service_id)
+		return -EINVAL;
+
+	int i;
+	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+		if (service_valid(i) &&
+				strcmp(name, rte_services[i].spec.name) == 0) {
+			*service_id = i;
+			return 0;
+		}
+	}
+
+	return -ENODEV;
+}
+
+const char *
+rte_service_get_name(uint32_t id)
+{
+	struct rte_service_spec_impl *s;
+	SERVICE_VALID_GET_OR_ERR_RET(id, s, 0);
+	return s->spec.name;
+}
+
+int32_t
+rte_service_probe_capability(uint32_t id, uint32_t capability)
+{
+	struct rte_service_spec_impl *s;
+	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+	return !!(s->spec.capabilities & capability);
+}
+
+int32_t
+rte_service_component_register(const struct rte_service_spec *spec,
+			       uint32_t *id_ptr)
+{
+	uint32_t i;
+	int32_t free_slot = -1;
+
+	if (spec->callback == NULL || strlen(spec->name) == 0)
+		return -EINVAL;
+
+	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+		if (!service_valid(i)) {
+			free_slot = i;
+			break;
+		}
+	}
+
+	if ((free_slot < 0) || (i == RTE_SERVICE_NUM_MAX))
+		return -ENOSPC;
+
+	struct rte_service_spec_impl *s = &rte_services[free_slot];
+	s->spec = *spec;
+	s->internal_flags |= SERVICE_F_REGISTERED | SERVICE_F_START_CHECK;
+
+	rte_service_count++;
+
+	if (id_ptr)
+		*id_ptr = free_slot;
+
+	return 0;
+}
+
+int32_t
+rte_service_component_unregister(uint32_t id)
+{
+	uint32_t i;
+	struct rte_service_spec_impl *s;
+	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+	rte_service_count--;
+
+	s->internal_flags &= ~(SERVICE_F_REGISTERED);
+
+	/* clear the run-bit in all cores */
+	for (i = 0; i < RTE_MAX_LCORE; i++)
+		lcore_states[i].service_mask &= ~(UINT64_C(1) << id);
+
+	memset(&rte_services[id], 0, sizeof(struct rte_service_spec_impl));
+
+	return 0;
+}
+
+int32_t
+rte_service_component_runstate_set(uint32_t id, uint32_t runstate)
+{
+	struct rte_service_spec_impl *s;
+	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+	/* comp_runstate act as the guard variable. Use store-release
+	 * memory order. This synchronizes with load-acquire in
+	 * service_run and service_runstate_get function.
+	 */
+	if (runstate)
+		__atomic_store_n(&s->comp_runstate, RUNSTATE_RUNNING,
+			__ATOMIC_RELEASE);
+	else
+		__atomic_store_n(&s->comp_runstate, RUNSTATE_STOPPED,
+			__ATOMIC_RELEASE);
+
+	return 0;
+}
+
+int32_t
+rte_service_runstate_set(uint32_t id, uint32_t runstate)
+{
+	struct rte_service_spec_impl *s;
+	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+	/* app_runstate act as the guard variable. Use store-release
+	 * memory order. This synchronizes with load-acquire in
+	 * service_run runstate_get function.
+	 */
+	if (runstate)
+		__atomic_store_n(&s->app_runstate, RUNSTATE_RUNNING,
+			__ATOMIC_RELEASE);
+	else
+		__atomic_store_n(&s->app_runstate, RUNSTATE_STOPPED,
+			__ATOMIC_RELEASE);
+
+	return 0;
+}
+
+int32_t
+rte_service_runstate_get(uint32_t id)
+{
+	struct rte_service_spec_impl *s;
+	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+	/* comp_runstate and app_runstate act as the guard variables.
+	 * Use load-acquire memory order. This synchronizes with
+	 * store-release in service state set functions.
+	 */
+	if (__atomic_load_n(&s->comp_runstate, __ATOMIC_ACQUIRE) ==
+			RUNSTATE_RUNNING &&
+	    __atomic_load_n(&s->app_runstate, __ATOMIC_ACQUIRE) ==
+			RUNSTATE_RUNNING) {
+		int check_disabled = !(s->internal_flags &
+			SERVICE_F_START_CHECK);
+		int lcore_mapped = (__atomic_load_n(&s->num_mapped_cores,
+			__ATOMIC_RELAXED) > 0);
+
+		return (check_disabled | lcore_mapped);
+	} else
+		return 0;
+
+}
+
+static inline void
+service_runner_do_callback(struct rte_service_spec_impl *s,
+			   struct core_state *cs, uint32_t service_idx)
+{
+	void *userdata = s->spec.callback_userdata;
+
+	if (service_stats_enabled(s)) {
+		uint64_t start = rte_rdtsc();
+		s->spec.callback(userdata);
+		uint64_t end = rte_rdtsc();
+		s->cycles_spent += end - start;
+		cs->calls_per_service[service_idx]++;
+		s->calls++;
+	} else
+		s->spec.callback(userdata);
+}
+
+
+/* Expects the service 's' is valid. */
+static int32_t
+service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
+	    struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe)
+{
+	if (!s)
+		return -EINVAL;
+
+	/* comp_runstate and app_runstate act as the guard variables.
+	 * Use load-acquire memory order. This synchronizes with
+	 * store-release in service state set functions.
+	 */
+	if (__atomic_load_n(&s->comp_runstate, __ATOMIC_ACQUIRE) !=
+			RUNSTATE_RUNNING ||
+	    __atomic_load_n(&s->app_runstate, __ATOMIC_ACQUIRE) !=
+			RUNSTATE_RUNNING ||
+	    !(service_mask & (UINT64_C(1) << i))) {
+		cs->service_active_on_lcore[i] = 0;
+		return -ENOEXEC;
+	}
+
+	cs->service_active_on_lcore[i] = 1;
+
+	if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
+		if (!rte_spinlock_trylock(&s->execute_lock))
+			return -EBUSY;
+
+		service_runner_do_callback(s, cs, i);
+		rte_spinlock_unlock(&s->execute_lock);
+	} else
+		service_runner_do_callback(s, cs, i);
+
+	return 0;
+}
+
+int32_t
+rte_service_may_be_active(uint32_t id)
+{
+	uint32_t ids[RTE_MAX_LCORE] = {0};
+	int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE);
+	int i;
+
+	if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id))
+		return -EINVAL;
+
+	for (i = 0; i < lcore_count; i++) {
+		if (lcore_states[i].service_active_on_lcore[id])
+			return 1;
+	}
+
+	return 0;
+}
+
+int32_t
+rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
+{
+	struct core_state *cs = &lcore_states[rte_lcore_id()];
+	struct rte_service_spec_impl *s;
+
+	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+	/* Increment num_mapped_cores to reflect that this core is
+	 * now mapped capable of running the service.
+	 */
+	__atomic_add_fetch(&s->num_mapped_cores, 1, __ATOMIC_RELAXED);
+
+	int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
+
+	__atomic_sub_fetch(&s->num_mapped_cores, 1, __ATOMIC_RELAXED);
+
+	return ret;
+}
+
+static int32_t
+service_runner_func(void *arg)
+{
+	RTE_SET_USED(arg);
+	uint32_t i;
+	const int lcore = rte_lcore_id();
+	struct core_state *cs = &lcore_states[lcore];
+
+	/* runstate act as the guard variable. Use load-acquire
+	 * memory order here to synchronize with store-release
+	 * in runstate update functions.
+	 */
+	while (__atomic_load_n(&cs->runstate, __ATOMIC_ACQUIRE) ==
+			RUNSTATE_RUNNING) {
+		const uint64_t service_mask = cs->service_mask;
+
+		for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+			if (!service_valid(i))
+				continue;
+			/* return value ignored as no change to code flow */
+			service_run(i, cs, service_mask, service_get(i), 1);
+		}
+
+		cs->loops++;
+	}
+
+	lcore_config[lcore].state = WAIT;
+
+	return 0;
+}
+
+int32_t
+rte_service_lcore_count(void)
+{
+	int32_t count = 0;
+	uint32_t i;
+	for (i = 0; i < RTE_MAX_LCORE; i++)
+		count += lcore_states[i].is_service_core;
+	return count;
+}
+
+int32_t
+rte_service_lcore_list(uint32_t array[], uint32_t n)
+{
+	uint32_t count = rte_service_lcore_count();
+	if (count > n)
+		return -ENOMEM;
+
+	if (!array)
+		return -EINVAL;
+
+	uint32_t i;
+	uint32_t idx = 0;
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		struct core_state *cs = &lcore_states[i];
+		if (cs->is_service_core) {
+			array[idx] = i;
+			idx++;
+		}
+	}
+
+	return count;
+}
+
+int32_t
+rte_service_lcore_count_services(uint32_t lcore)
+{
+	if (lcore >= RTE_MAX_LCORE)
+		return -EINVAL;
+
+	struct core_state *cs = &lcore_states[lcore];
+	if (!cs->is_service_core)
+		return -ENOTSUP;
+
+	return __builtin_popcountll(cs->service_mask);
+}
+
+int32_t
+rte_service_start_with_defaults(void)
+{
+	/* create a default mapping from cores to services, then start the
+	 * services to make them transparent to unaware applications.
+	 */
+	uint32_t i;
+	int ret;
+	uint32_t count = rte_service_get_count();
+
+	int32_t lcore_iter = 0;
+	uint32_t ids[RTE_MAX_LCORE] = {0};
+	int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE);
+
+	if (lcore_count == 0)
+		return -ENOTSUP;
+
+	for (i = 0; (int)i < lcore_count; i++)
+		rte_service_lcore_start(ids[i]);
+
+	for (i = 0; i < count; i++) {
+		/* do 1:1 core mapping here, with each service getting
+		 * assigned a single core by default. Adding multiple services
+		 * should multiplex to a single core, or 1:1 if there are the
+		 * same amount of services as service-cores
+		 */
+		ret = rte_service_map_lcore_set(i, ids[lcore_iter], 1);
+		if (ret)
+			return -ENODEV;
+
+		lcore_iter++;
+		if (lcore_iter >= lcore_count)
+			lcore_iter = 0;
+
+		ret = rte_service_runstate_set(i, 1);
+		if (ret)
+			return -ENOEXEC;
+	}
+
+	return 0;
+}
+
+static int32_t
+service_update(uint32_t sid, uint32_t lcore, uint32_t *set, uint32_t *enabled)
+{
+	/* validate ID, or return error value */
+	if (sid >= RTE_SERVICE_NUM_MAX || !service_valid(sid) ||
+	    lcore >= RTE_MAX_LCORE || !lcore_states[lcore].is_service_core)
+		return -EINVAL;
+
+	uint64_t sid_mask = UINT64_C(1) << sid;
+	if (set) {
+		uint64_t lcore_mapped = lcore_states[lcore].service_mask &
+			sid_mask;
+
+		if (*set && !lcore_mapped) {
+			lcore_states[lcore].service_mask |= sid_mask;
+			__atomic_add_fetch(&rte_services[sid].num_mapped_cores,
+				1, __ATOMIC_RELAXED);
+		}
+		if (!*set && lcore_mapped) {
+			lcore_states[lcore].service_mask &= ~(sid_mask);
+			__atomic_sub_fetch(&rte_services[sid].num_mapped_cores,
+				1, __ATOMIC_RELAXED);
+		}
+	}
+
+	if (enabled)
+		*enabled = !!(lcore_states[lcore].service_mask & (sid_mask));
+
+	return 0;
+}
+
+int32_t
+rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled)
+{
+	uint32_t on = enabled > 0;
+	return service_update(id, lcore, &on, 0);
+}
+
+int32_t
+rte_service_map_lcore_get(uint32_t id, uint32_t lcore)
+{
+	uint32_t enabled;
+	int ret = service_update(id, lcore, 0, &enabled);
+	if (ret == 0)
+		return enabled;
+	return ret;
+}
+
+static void
+set_lcore_state(uint32_t lcore, int32_t state)
+{
+	/* mark core state in hugepage backed config */
+	struct rte_config *cfg = rte_eal_get_configuration();
+	cfg->lcore_role[lcore] = state;
+
+	/* mark state in process local lcore_config */
+	lcore_config[lcore].core_role = state;
+
+	/* update per-lcore optimized state tracking */
+	lcore_states[lcore].is_service_core = (state == ROLE_SERVICE);
+}
+
+int32_t
+rte_service_lcore_reset_all(void)
+{
+	/* loop over cores, reset all to mask 0 */
+	uint32_t i;
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		if (lcore_states[i].is_service_core) {
+			lcore_states[i].service_mask = 0;
+			set_lcore_state(i, ROLE_RTE);
+			/* runstate act as guard variable Use
+			 * store-release memory order here to synchronize
+			 * with load-acquire in runstate read functions.
+			 */
+			__atomic_store_n(&lcore_states[i].runstate,
+				RUNSTATE_STOPPED, __ATOMIC_RELEASE);
+		}
+	}
+	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++)
+		__atomic_store_n(&rte_services[i].num_mapped_cores, 0,
+			__ATOMIC_RELAXED);
+
+	return 0;
+}
+
+int32_t
+rte_service_lcore_add(uint32_t lcore)
+{
+	if (lcore >= RTE_MAX_LCORE)
+		return -EINVAL;
+	if (lcore_states[lcore].is_service_core)
+		return -EALREADY;
+
+	set_lcore_state(lcore, ROLE_SERVICE);
+
+	/* ensure that after adding a core the mask and state are defaults */
+	lcore_states[lcore].service_mask = 0;
+	/* Use store-release memory order here to synchronize with
+	 * load-acquire in runstate read functions.
+	 */
+	__atomic_store_n(&lcore_states[lcore].runstate, RUNSTATE_STOPPED,
+		__ATOMIC_RELEASE);
+
+	return rte_eal_wait_lcore(lcore);
+}
+
+int32_t
+rte_service_lcore_del(uint32_t lcore)
+{
+	if (lcore >= RTE_MAX_LCORE)
+		return -EINVAL;
+
+	struct core_state *cs = &lcore_states[lcore];
+	if (!cs->is_service_core)
+		return -EINVAL;
+
+	/* runstate act as the guard variable. Use load-acquire
+	 * memory order here to synchronize with store-release
+	 * in runstate update functions.
+	 */
+	if (__atomic_load_n(&cs->runstate, __ATOMIC_ACQUIRE) !=
+			RUNSTATE_STOPPED)
+		return -EBUSY;
+
+	set_lcore_state(lcore, ROLE_RTE);
+
+	rte_smp_wmb();
+	return 0;
+}
+
+int32_t
+rte_service_lcore_start(uint32_t lcore)
+{
+	if (lcore >= RTE_MAX_LCORE)
+		return -EINVAL;
+
+	struct core_state *cs = &lcore_states[lcore];
+	if (!cs->is_service_core)
+		return -EINVAL;
+
+	/* runstate act as the guard variable. Use load-acquire
+	 * memory order here to synchronize with store-release
+	 * in runstate update functions.
+	 */
+	if (__atomic_load_n(&cs->runstate, __ATOMIC_ACQUIRE) ==
+			RUNSTATE_RUNNING)
+		return -EALREADY;
+
+	/* set core to run state first, and then launch otherwise it will
+	 * return immediately as runstate keeps it in the service poll loop
+	 */
+	/* Use load-acquire memory order here to synchronize with
+	 * store-release in runstate update functions.
+	 */
+	__atomic_store_n(&cs->runstate, RUNSTATE_RUNNING, __ATOMIC_RELEASE);
+
+	int ret = rte_eal_remote_launch(service_runner_func, 0, lcore);
+	/* returns -EBUSY if the core is already launched, 0 on success */
+	return ret;
+}
+
+int32_t
+rte_service_lcore_stop(uint32_t lcore)
+{
+	if (lcore >= RTE_MAX_LCORE)
+		return -EINVAL;
+
+	/* runstate act as the guard variable. Use load-acquire
+	 * memory order here to synchronize with store-release
+	 * in runstate update functions.
+	 */
+	if (__atomic_load_n(&lcore_states[lcore].runstate, __ATOMIC_ACQUIRE) ==
+			RUNSTATE_STOPPED)
+		return -EALREADY;
+
+	uint32_t i;
+	uint64_t service_mask = lcore_states[lcore].service_mask;
+	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+		int32_t enabled = service_mask & (UINT64_C(1) << i);
+		int32_t service_running = rte_service_runstate_get(i);
+		int32_t only_core = (1 ==
+			__atomic_load_n(&rte_services[i].num_mapped_cores,
+				__ATOMIC_RELAXED));
+
+		/* if the core is mapped, and the service is running, and this
+		 * is the only core that is mapped, the service would cease to
+		 * run if this core stopped, so fail instead.
+		 */
+		if (enabled && service_running && only_core)
+			return -EBUSY;
+	}
+
+	/* Use store-release memory order here to synchronize with
+	 * load-acquire in runstate read functions.
+	 */
+	__atomic_store_n(&lcore_states[lcore].runstate, RUNSTATE_STOPPED,
+		__ATOMIC_RELEASE);
+
+	return 0;
+}
+
+int32_t
+rte_service_attr_get(uint32_t id, uint32_t attr_id, uint64_t *attr_value)
+{
+	struct rte_service_spec_impl *s;
+	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+	if (!attr_value)
+		return -EINVAL;
+
+	switch (attr_id) {
+	case RTE_SERVICE_ATTR_CYCLES:
+		*attr_value = s->cycles_spent;
+		return 0;
+	case RTE_SERVICE_ATTR_CALL_COUNT:
+		*attr_value = s->calls;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+int32_t
+rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
+			   uint64_t *attr_value)
+{
+	struct core_state *cs;
+
+	if (lcore >= RTE_MAX_LCORE || !attr_value)
+		return -EINVAL;
+
+	cs = &lcore_states[lcore];
+	if (!cs->is_service_core)
+		return -ENOTSUP;
+
+	switch (attr_id) {
+	case RTE_SERVICE_LCORE_ATTR_LOOPS:
+		*attr_value = cs->loops;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+static void
+service_dump_one(FILE *f, struct rte_service_spec_impl *s, uint32_t reset)
+{
+	/* avoid divide by zero */
+	int calls = 1;
+	if (s->calls != 0)
+		calls = s->calls;
+
+	if (reset) {
+		s->cycles_spent = 0;
+		s->calls = 0;
+		return;
+	}
+
+	if (f == NULL)
+		return;
+
+	fprintf(f, "  %s: stats %d\tcalls %"PRIu64"\tcycles %"
+			PRIu64"\tavg: %"PRIu64"\n",
+			s->spec.name, service_stats_enabled(s), s->calls,
+			s->cycles_spent, s->cycles_spent / calls);
+}
+
+int32_t
+rte_service_attr_reset_all(uint32_t id)
+{
+	struct rte_service_spec_impl *s;
+	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+	int reset = 1;
+	service_dump_one(NULL, s, reset);
+	return 0;
+}
+
+int32_t
+rte_service_lcore_attr_reset_all(uint32_t lcore)
+{
+	struct core_state *cs;
+
+	if (lcore >= RTE_MAX_LCORE)
+		return -EINVAL;
+
+	cs = &lcore_states[lcore];
+	if (!cs->is_service_core)
+		return -ENOTSUP;
+
+	cs->loops = 0;
+
+	return 0;
+}
+
+static void
+service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset)
+{
+	uint32_t i;
+	struct core_state *cs = &lcore_states[lcore];
+
+	fprintf(f, "%02d\t", lcore);
+	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+		if (!service_valid(i))
+			continue;
+		fprintf(f, "%"PRIu64"\t", cs->calls_per_service[i]);
+		if (reset)
+			cs->calls_per_service[i] = 0;
+	}
+	fprintf(f, "\n");
+}
+
+int32_t
+rte_service_dump(FILE *f, uint32_t id)
+{
+	uint32_t i;
+	int print_one = (id != UINT32_MAX);
+
+	/* print only the specified service */
+	if (print_one) {
+		struct rte_service_spec_impl *s;
+		SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+		fprintf(f, "Service %s Summary\n", s->spec.name);
+		uint32_t reset = 0;
+		service_dump_one(f, s, reset);
+		return 0;
+	}
+
+	/* print all services, as UINT32_MAX was passed as id */
+	fprintf(f, "Services Summary\n");
+	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+		if (!service_valid(i))
+			continue;
+		uint32_t reset = 0;
+		service_dump_one(f, &rte_services[i], reset);
+	}
+
+	fprintf(f, "Service Cores Summary\n");
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		if (lcore_config[i].core_role != ROLE_SERVICE)
+			continue;
+
+		uint32_t reset = 0;
+		service_dump_calls_per_lcore(f, i, reset);
+	}
+
+	return 0;
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 18:45:59 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 18:45:59 +0000
commit	19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree	42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/spdk/dpdk/lib/librte_eal/common
parent	Initial commit. (diff)
download	ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip