Adding upstream version 18.2.2.upstream/18.2.2

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-21 11:54:28 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-21 11:54:28 +0000
commit: e6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree: 64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/lib/idxd
parent: Initial commit. (diff)
download: ceph-upstream/18.2.2.tar.xz
ceph-upstream/18.2.2.zip
5 files changed, 2057 insertions, 0 deletions
diff --git a/src/spdk/lib/idxd/Makefile b/src/spdk/lib/idxd/Makefile
new file mode 100644
index 000000000..ed66aeb15
--- /dev/null
+++ b/src/spdk/lib/idxd/Makefile
@@ -0,0 +1,45 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+SO_VER := 2
+SO_MINOR := 0
+
+C_SRCS = idxd.c
+LIBNAME = idxd
+
+SPDK_MAP_FILE = $(abspath $(CURDIR)/spdk_idxd.map)
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/idxd/idxd.c b/src/spdk/lib/idxd/idxd.c
new file mode 100644
index 000000000..992d96211
--- /dev/null
+++ b/src/spdk/lib/idxd/idxd.c
@@ -0,0 +1,1292 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/env.h"
+#include "spdk/util.h"
+#include "spdk/memory.h"
+
+#include "spdk_internal/log.h"
+#include "spdk_internal/idxd.h"
+
+#include "idxd.h"
+
+#define ALIGN_4K 0x1000
+
+pthread_mutex_t	g_driver_lock = PTHREAD_MUTEX_INITIALIZER;
+
+/*
+ * g_dev_cfg gives us 2 pre-set configurations of DSA to choose from
+ * via RPC.
+ */
+struct device_config *g_dev_cfg = NULL;
+
+/*
+ * Pre-built configurations. Variations depend on various factors
+ * including how many different types of target latency profiles there
+ * are, how many different QOS requirements there might be, etc.
+ */
+struct device_config g_dev_cfg0 = {
+	.config_num = 0,
+	.num_groups = 4,
+	.num_wqs_per_group = 1,
+	.num_engines_per_group = 1,
+	.total_wqs = 4,
+	.total_engines = 4,
+};
+
+struct device_config g_dev_cfg1 = {
+	.config_num = 1,
+	.num_groups = 2,
+	.num_wqs_per_group = 2,
+	.num_engines_per_group = 2,
+	.total_wqs = 4,
+	.total_engines = 4,
+};
+
+static uint32_t
+_idxd_read_4(struct spdk_idxd_device *idxd, uint32_t offset)
+{
+	return spdk_mmio_read_4((uint32_t *)(idxd->reg_base + offset));
+}
+
+static void
+_idxd_write_4(struct spdk_idxd_device *idxd, uint32_t offset, uint32_t value)
+{
+	spdk_mmio_write_4((uint32_t *)(idxd->reg_base + offset), value);
+}
+
+static uint64_t
+_idxd_read_8(struct spdk_idxd_device *idxd, uint32_t offset)
+{
+	return spdk_mmio_read_8((uint64_t *)(idxd->reg_base + offset));
+}
+
+static void
+_idxd_write_8(struct spdk_idxd_device *idxd, uint32_t offset, uint64_t value)
+{
+	spdk_mmio_write_8((uint64_t *)(idxd->reg_base + offset), value);
+}
+
+struct spdk_idxd_io_channel *
+spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
+{
+	struct spdk_idxd_io_channel *chan;
+	struct idxd_batch *batch;
+	int i;
+
+	chan = calloc(1, sizeof(struct spdk_idxd_io_channel));
+	if (chan == NULL) {
+		SPDK_ERRLOG("Failed to allocate idxd chan\n");
+		return NULL;
+	}
+	chan->idxd = idxd;
+
+	TAILQ_INIT(&chan->batches);
+
+	TAILQ_INIT(&chan->batch_pool);
+	for (i = 0 ; i < NUM_BATCHES ; i++) {
+		batch = calloc(1, sizeof(struct idxd_batch));
+		if (batch == NULL) {
+			SPDK_ERRLOG("Failed to allocate batch\n");
+			while ((batch = TAILQ_FIRST(&chan->batch_pool))) {
+				TAILQ_REMOVE(&chan->batch_pool, batch, link);
+				free(batch);
+			}
+			return NULL;
+		}
+		TAILQ_INSERT_TAIL(&chan->batch_pool, batch, link);
+	}
+
+	return chan;
+}
+
+void
+spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan)
+{
+	free(chan);
+}
+
+int
+spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan)
+{
+	uint32_t num_ring_slots;
+	int rc;
+
+	/* Round robin the WQ selection for the chan on this IDXD device. */
+	chan->idxd->wq_id++;
+	if (chan->idxd->wq_id == g_dev_cfg->total_wqs) {
+		chan->idxd->wq_id = 0;
+	}
+
+	num_ring_slots = chan->idxd->queues[chan->idxd->wq_id].wqcfg.wq_size;
+
+	chan->ring_ctrl.ring_slots = spdk_bit_array_create(num_ring_slots);
+	if (chan->ring_ctrl.ring_slots == NULL) {
+		SPDK_ERRLOG("Failed to allocate bit array for ring\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * max ring slots can change as channels come and go but we
+	 * start off getting all of the slots for this work queue.
+	 */
+	chan->ring_ctrl.max_ring_slots = num_ring_slots;
+
+	/* Store the original size of the ring. */
+	chan->ring_ctrl.ring_size = num_ring_slots;
+
+	chan->ring_ctrl.desc = spdk_zmalloc(num_ring_slots * sizeof(struct idxd_hw_desc),
+					    0x40, NULL,
+					    SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+	if (chan->ring_ctrl.desc == NULL) {
+		SPDK_ERRLOG("Failed to allocate descriptor memory\n");
+		rc = -ENOMEM;
+		goto err_desc;
+	}
+
+	chan->ring_ctrl.completions = spdk_zmalloc(num_ring_slots * sizeof(struct idxd_comp),
+				      0x40, NULL,
+				      SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+	if (chan->ring_ctrl.completions == NULL) {
+		SPDK_ERRLOG("Failed to allocate completion memory\n");
+		rc = -ENOMEM;
+		goto err_comp;
+	}
+
+	chan->ring_ctrl.user_desc = spdk_zmalloc(TOTAL_USER_DESC * sizeof(struct idxd_hw_desc),
+				    0x40, NULL,
+				    SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+	if (chan->ring_ctrl.user_desc == NULL) {
+		SPDK_ERRLOG("Failed to allocate batch descriptor memory\n");
+		rc = -ENOMEM;
+		goto err_user_desc;
+	}
+
+	/* Each slot on the ring reserves DESC_PER_BATCH elemnts in user_desc. */
+	chan->ring_ctrl.user_ring_slots = spdk_bit_array_create(NUM_BATCHES);
+	if (chan->ring_ctrl.user_ring_slots == NULL) {
+		SPDK_ERRLOG("Failed to allocate bit array for user ring\n");
+		rc = -ENOMEM;
+		goto err_user_ring;
+	}
+
+	chan->ring_ctrl.user_completions = spdk_zmalloc(TOTAL_USER_DESC * sizeof(struct idxd_comp),
+					   0x40, NULL,
+					   SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+	if (chan->ring_ctrl.user_completions == NULL) {
+		SPDK_ERRLOG("Failed to allocate user completion memory\n");
+		rc = -ENOMEM;
+		goto err_user_comp;
+	}
+
+	chan->ring_ctrl.portal = (char *)chan->idxd->portals + chan->idxd->wq_id * PORTAL_SIZE;
+
+	return 0;
+
+err_user_comp:
+	spdk_bit_array_free(&chan->ring_ctrl.user_ring_slots);
+err_user_ring:
+	spdk_free(chan->ring_ctrl.user_desc);
+err_user_desc:
+	spdk_free(chan->ring_ctrl.completions);
+err_comp:
+	spdk_free(chan->ring_ctrl.desc);
+err_desc:
+	spdk_bit_array_free(&chan->ring_ctrl.ring_slots);
+
+	return rc;
+}
+
+/* Used for control commands, not for descriptor submission. */
+static int
+idxd_wait_cmd(struct spdk_idxd_device *idxd, int _timeout)
+{
+	uint32_t timeout = _timeout;
+	union idxd_cmdsts_reg cmd_status = {};
+
+	cmd_status.raw = _idxd_read_4(idxd, IDXD_CMDSTS_OFFSET);
+	while (cmd_status.active && --timeout) {
+		usleep(1);
+		cmd_status.raw = _idxd_read_4(idxd, IDXD_CMDSTS_OFFSET);
+	}
+
+	/* Check for timeout */
+	if (timeout == 0 && cmd_status.active) {
+		SPDK_ERRLOG("Command timeout, waited %u\n", _timeout);
+		return -EBUSY;
+	}
+
+	/* Check for error */
+	if (cmd_status.err) {
+		SPDK_ERRLOG("Command status reg reports error 0x%x\n", cmd_status.err);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void
+_idxd_drain(struct spdk_idxd_io_channel *chan)
+{
+	uint32_t index;
+	int set = 0;
+
+	do {
+		spdk_idxd_process_events(chan);
+		set = 0;
+		for (index = 0; index < chan->ring_ctrl.max_ring_slots; index++) {
+			set |= spdk_bit_array_get(chan->ring_ctrl.ring_slots, index);
+		}
+	} while (set);
+}
+
+int
+spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan, uint32_t num_channels)
+{
+	uint32_t num_ring_slots;
+	int rc;
+	struct idxd_batch *batch;
+
+	_idxd_drain(chan);
+
+	assert(spdk_bit_array_count_set(chan->ring_ctrl.ring_slots) == 0);
+
+	if (num_channels == 0) {
+		spdk_free(chan->ring_ctrl.completions);
+		spdk_free(chan->ring_ctrl.desc);
+		spdk_bit_array_free(&chan->ring_ctrl.ring_slots);
+		spdk_free(chan->ring_ctrl.user_completions);
+		spdk_free(chan->ring_ctrl.user_desc);
+		spdk_bit_array_free(&chan->ring_ctrl.user_ring_slots);
+		while ((batch = TAILQ_FIRST(&chan->batch_pool))) {
+			TAILQ_REMOVE(&chan->batch_pool, batch, link);
+			free(batch);
+		}
+		return 0;
+	}
+
+	num_ring_slots = chan->ring_ctrl.ring_size / num_channels;
+
+	/* re-allocate our descriptor ring for hw flow control. */
+	rc = spdk_bit_array_resize(&chan->ring_ctrl.ring_slots, num_ring_slots);
+	if (rc < 0) {
+		SPDK_ERRLOG("Unable to resize channel bit array\n");
+		return -ENOMEM;
+	}
+
+	chan->ring_ctrl.max_ring_slots = num_ring_slots;
+
+	/*
+	 * Note: The batch descriptor ring does not change with the
+	 * number of channels as descriptors on this ring do not
+	 * "count" for flow control.
+	 */
+
+	return rc;
+}
+
+/* Called via RPC to select a pre-defined configuration. */
+void
+spdk_idxd_set_config(uint32_t config_num)
+{
+	switch (config_num) {
+	case 0:
+		g_dev_cfg = &g_dev_cfg0;
+		break;
+	case 1:
+		g_dev_cfg = &g_dev_cfg1;
+		break;
+	default:
+		g_dev_cfg = &g_dev_cfg0;
+		SPDK_ERRLOG("Invalid config, using default\n");
+		break;
+	}
+}
+
+static int
+idxd_unmap_pci_bar(struct spdk_idxd_device *idxd, int bar)
+{
+	int rc = 0;
+	void *addr = NULL;
+
+	if (bar == IDXD_MMIO_BAR) {
+		addr = (void *)idxd->reg_base;
+	} else if (bar == IDXD_WQ_BAR) {
+		addr = (void *)idxd->portals;
+	}
+
+	if (addr) {
+		rc = spdk_pci_device_unmap_bar(idxd->device, 0, addr);
+	}
+	return rc;
+}
+
+static int
+idxd_map_pci_bars(struct spdk_idxd_device *idxd)
+{
+	int rc;
+	void *addr;
+	uint64_t phys_addr, size;
+
+	rc = spdk_pci_device_map_bar(idxd->device, IDXD_MMIO_BAR, &addr, &phys_addr, &size);
+	if (rc != 0 || addr == NULL) {
+		SPDK_ERRLOG("pci_device_map_range failed with error code %d\n", rc);
+		return -1;
+	}
+	idxd->reg_base = addr;
+
+	rc = spdk_pci_device_map_bar(idxd->device, IDXD_WQ_BAR, &addr, &phys_addr, &size);
+	if (rc != 0 || addr == NULL) {
+		SPDK_ERRLOG("pci_device_map_range failed with error code %d\n", rc);
+		rc = idxd_unmap_pci_bar(idxd, IDXD_MMIO_BAR);
+		if (rc) {
+			SPDK_ERRLOG("unable to unmap MMIO bar\n");
+		}
+		return -EINVAL;
+	}
+	idxd->portals = addr;
+
+	return 0;
+}
+
+static int
+idxd_reset_dev(struct spdk_idxd_device *idxd)
+{
+	int rc;
+
+	_idxd_write_4(idxd, IDXD_CMD_OFFSET, IDXD_RESET_DEVICE << IDXD_CMD_SHIFT);
+	rc = idxd_wait_cmd(idxd, IDXD_REGISTER_TIMEOUT_US);
+	if (rc < 0) {
+		SPDK_ERRLOG("Error resetting device %u\n", rc);
+	}
+
+	return rc;
+}
+
+/*
+ * Build group config based on getting info from the device combined
+ * with the defined configuration. Once built, it is written to the
+ * device.
+ */
+static int
+idxd_group_config(struct spdk_idxd_device *idxd)
+{
+	int i;
+	uint64_t base_offset;
+
+	assert(g_dev_cfg->num_groups <= idxd->registers.groupcap.num_groups);
+	idxd->groups = calloc(idxd->registers.groupcap.num_groups, sizeof(struct idxd_group));
+	if (idxd->groups == NULL) {
+		SPDK_ERRLOG("Failed to allocate group memory\n");
+		return -ENOMEM;
+	}
+
+	assert(g_dev_cfg->total_engines <= idxd->registers.enginecap.num_engines);
+	for (i = 0; i < g_dev_cfg->total_engines; i++) {
+		idxd->groups[i % g_dev_cfg->num_groups].grpcfg.engines |= (1 << i);
+	}
+
+	assert(g_dev_cfg->total_wqs <= idxd->registers.wqcap.num_wqs);
+	for (i = 0; i < g_dev_cfg->total_wqs; i++) {
+		idxd->groups[i % g_dev_cfg->num_groups].grpcfg.wqs[0] |= (1 << i);
+	}
+
+	for (i = 0; i < g_dev_cfg->num_groups; i++) {
+		idxd->groups[i].idxd = idxd;
+		idxd->groups[i].id = i;
+
+		/* Divide BW tokens evenly */
+		idxd->groups[i].grpcfg.flags.tokens_allowed =
+			idxd->registers.groupcap.total_tokens / g_dev_cfg->num_groups;
+	}
+
+	/*
+	 * Now write the group config to the device for all groups. We write
+	 * to the max number of groups in order to 0 out the ones we didn't
+	 * configure.
+	 */
+	for (i = 0 ; i < idxd->registers.groupcap.num_groups; i++) {
+
+		base_offset = idxd->grpcfg_offset + i * 64;
+
+		/* GRPWQCFG, work queues config */
+		_idxd_write_8(idxd, base_offset, idxd->groups[i].grpcfg.wqs[0]);
+
+		/* GRPENGCFG, engine config */
+		_idxd_write_8(idxd, base_offset + CFG_ENGINE_OFFSET, idxd->groups[i].grpcfg.engines);
+
+		/* GRPFLAGS, flags config */
+		_idxd_write_8(idxd, base_offset + CFG_FLAG_OFFSET, idxd->groups[i].grpcfg.flags.raw);
+	}
+
+	return 0;
+}
+
+/*
+ * Build work queue (WQ) config based on getting info from the device combined
+ * with the defined configuration. Once built, it is written to the device.
+ */
+static int
+idxd_wq_config(struct spdk_idxd_device *idxd)
+{
+	int i, j;
+	struct idxd_wq *queue;
+	u_int32_t wq_size = idxd->registers.wqcap.total_wq_size / g_dev_cfg->total_wqs;
+
+	SPDK_NOTICELOG("Total ring slots available space 0x%x, so per work queue is 0x%x\n",
+		       idxd->registers.wqcap.total_wq_size, wq_size);
+	assert(g_dev_cfg->total_wqs <= IDXD_MAX_QUEUES);
+	assert(g_dev_cfg->total_wqs <= idxd->registers.wqcap.num_wqs);
+	assert(LOG2_WQ_MAX_BATCH <= idxd->registers.gencap.max_batch_shift);
+	assert(LOG2_WQ_MAX_XFER <= idxd->registers.gencap.max_xfer_shift);
+
+	idxd->queues = calloc(1, idxd->registers.wqcap.num_wqs * sizeof(struct idxd_wq));
+	if (idxd->queues == NULL) {
+		SPDK_ERRLOG("Failed to allocate queue memory\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < g_dev_cfg->total_wqs; i++) {
+		queue = &idxd->queues[i];
+		queue->wqcfg.wq_size = wq_size;
+		queue->wqcfg.mode = WQ_MODE_DEDICATED;
+		queue->wqcfg.max_batch_shift = LOG2_WQ_MAX_BATCH;
+		queue->wqcfg.max_xfer_shift = LOG2_WQ_MAX_XFER;
+		queue->wqcfg.wq_state = WQ_ENABLED;
+		queue->wqcfg.priority = WQ_PRIORITY_1;
+
+		/* Not part of the config struct */
+		queue->idxd = idxd;
+		queue->group = &idxd->groups[i % g_dev_cfg->num_groups];
+	}
+
+	/*
+	 * Now write the work queue config to the device for all wq space
+	 */
+	for (i = 0 ; i < idxd->registers.wqcap.num_wqs; i++) {
+		queue = &idxd->queues[i];
+		for (j = 0 ; j < WQCFG_NUM_DWORDS; j++) {
+			_idxd_write_4(idxd, idxd->wqcfg_offset + i * 32 + j * 4,
+				      queue->wqcfg.raw[j]);
+		}
+	}
+
+	return 0;
+}
+
+static int
+idxd_device_configure(struct spdk_idxd_device *idxd)
+{
+	int i, rc = 0;
+	union idxd_offsets_register offsets_reg;
+	union idxd_genstatus_register genstatus_reg;
+
+	/*
+	 * Map BAR0 and BAR2
+	 */
+	rc = idxd_map_pci_bars(idxd);
+	if (rc) {
+		return rc;
+	}
+
+	/*
+	 * Reset the device
+	 */
+	rc = idxd_reset_dev(idxd);
+	if (rc) {
+		goto err_reset;
+	}
+
+	/*
+	 * Read in config registers
+	 */
+	idxd->registers.version = _idxd_read_4(idxd, IDXD_VERSION_OFFSET);
+	idxd->registers.gencap.raw = _idxd_read_8(idxd, IDXD_GENCAP_OFFSET);
+	idxd->registers.wqcap.raw = _idxd_read_8(idxd, IDXD_WQCAP_OFFSET);
+	idxd->registers.groupcap.raw = _idxd_read_8(idxd, IDXD_GRPCAP_OFFSET);
+	idxd->registers.enginecap.raw = _idxd_read_8(idxd, IDXD_ENGCAP_OFFSET);
+	for (i = 0; i < IDXD_OPCAP_WORDS; i++) {
+		idxd->registers.opcap.raw[i] =
+			_idxd_read_8(idxd, i * sizeof(uint64_t) + IDXD_OPCAP_OFFSET);
+	}
+	offsets_reg.raw[0] = _idxd_read_8(idxd, IDXD_TABLE_OFFSET);
+	offsets_reg.raw[1] = _idxd_read_8(idxd, IDXD_TABLE_OFFSET + sizeof(uint64_t));
+	idxd->grpcfg_offset = offsets_reg.grpcfg * IDXD_TABLE_OFFSET_MULT;
+	idxd->wqcfg_offset = offsets_reg.wqcfg * IDXD_TABLE_OFFSET_MULT;
+	idxd->ims_offset = offsets_reg.ims * IDXD_TABLE_OFFSET_MULT;
+	idxd->msix_perm_offset = offsets_reg.msix_perm  * IDXD_TABLE_OFFSET_MULT;
+	idxd->perfmon_offset = offsets_reg.perfmon * IDXD_TABLE_OFFSET_MULT;
+
+	/*
+	 * Configure groups and work queues.
+	 */
+	rc = idxd_group_config(idxd);
+	if (rc) {
+		goto err_group_cfg;
+	}
+
+	rc = idxd_wq_config(idxd);
+	if (rc) {
+		goto err_wq_cfg;
+	}
+
+	/*
+	 * Enable the device
+	 */
+	genstatus_reg.raw = _idxd_read_4(idxd, IDXD_GENSTATUS_OFFSET);
+	assert(genstatus_reg.state == IDXD_DEVICE_STATE_DISABLED);
+
+	_idxd_write_4(idxd, IDXD_CMD_OFFSET, IDXD_ENABLE_DEV << IDXD_CMD_SHIFT);
+	rc = idxd_wait_cmd(idxd, IDXD_REGISTER_TIMEOUT_US);
+	genstatus_reg.raw = _idxd_read_4(idxd, IDXD_GENSTATUS_OFFSET);
+	if ((rc < 0) || (genstatus_reg.state != IDXD_DEVICE_STATE_ENABLED)) {
+		rc = -EINVAL;
+		SPDK_ERRLOG("Error enabling device %u\n", rc);
+		goto err_device_enable;
+	}
+
+	genstatus_reg.raw = spdk_mmio_read_4((uint32_t *)(idxd->reg_base + IDXD_GENSTATUS_OFFSET));
+	assert(genstatus_reg.state == IDXD_DEVICE_STATE_ENABLED);
+
+	/*
+	 * Enable the work queues that we've configured
+	 */
+	for (i = 0; i < g_dev_cfg->total_wqs; i++) {
+		_idxd_write_4(idxd, IDXD_CMD_OFFSET,
+			      (IDXD_ENABLE_WQ << IDXD_CMD_SHIFT) | i);
+		rc = idxd_wait_cmd(idxd, IDXD_REGISTER_TIMEOUT_US);
+		if (rc < 0) {
+			SPDK_ERRLOG("Error enabling work queues 0x%x\n", rc);
+			goto err_wq_enable;
+		}
+	}
+
+	if ((rc == 0) && (genstatus_reg.state == IDXD_DEVICE_STATE_ENABLED)) {
+		SPDK_NOTICELOG("Device enabled, version 0x%x gencap: 0x%lx\n",
+			       idxd->registers.version,
+			       idxd->registers.gencap.raw);
+
+	}
+
+	return rc;
+err_wq_enable:
+err_device_enable:
+	free(idxd->queues);
+err_wq_cfg:
+	free(idxd->groups);
+err_group_cfg:
+err_reset:
+	idxd_unmap_pci_bar(idxd, IDXD_MMIO_BAR);
+	idxd_unmap_pci_bar(idxd, IDXD_MMIO_BAR);
+
+	return rc;
+}
+
+static void
+idxd_device_destruct(struct spdk_idxd_device *idxd)
+{
+	idxd_unmap_pci_bar(idxd, IDXD_MMIO_BAR);
+	idxd_unmap_pci_bar(idxd, IDXD_WQ_BAR);
+	free(idxd->groups);
+	free(idxd->queues);
+	free(idxd);
+}
+
+/* Caller must hold g_driver_lock */
+static struct spdk_idxd_device *
+idxd_attach(struct spdk_pci_device *device)
+{
+	struct spdk_idxd_device *idxd;
+	uint32_t cmd_reg;
+	int rc;
+
+	idxd = calloc(1, sizeof(struct spdk_idxd_device));
+	if (idxd == NULL) {
+		SPDK_ERRLOG("Failed to allocate memory for idxd device.\n");
+		return NULL;
+	}
+
+	idxd->device = device;
+
+	/* Enable PCI busmaster. */
+	spdk_pci_device_cfg_read32(device, &cmd_reg, 4);
+	cmd_reg |= 0x4;
+	spdk_pci_device_cfg_write32(device, cmd_reg, 4);
+
+	rc = idxd_device_configure(idxd);
+	if (rc) {
+		goto err;
+	}
+
+	return idxd;
+err:
+	idxd_device_destruct(idxd);
+	return NULL;
+}
+
+struct idxd_enum_ctx {
+	spdk_idxd_probe_cb probe_cb;
+	spdk_idxd_attach_cb attach_cb;
+	void *cb_ctx;
+};
+
+/* This function must only be called while holding g_driver_lock */
+static int
+idxd_enum_cb(void *ctx, struct spdk_pci_device *pci_dev)
+{
+	struct idxd_enum_ctx *enum_ctx = ctx;
+	struct spdk_idxd_device *idxd;
+
+	if (enum_ctx->probe_cb(enum_ctx->cb_ctx, pci_dev)) {
+		idxd = idxd_attach(pci_dev);
+		if (idxd == NULL) {
+			SPDK_ERRLOG("idxd_attach() failed\n");
+			return -EINVAL;
+		}
+
+		enum_ctx->attach_cb(enum_ctx->cb_ctx, pci_dev, idxd);
+	}
+
+	return 0;
+}
+
+int
+spdk_idxd_probe(void *cb_ctx, spdk_idxd_probe_cb probe_cb, spdk_idxd_attach_cb attach_cb)
+{
+	int rc;
+	struct idxd_enum_ctx enum_ctx;
+
+	enum_ctx.probe_cb = probe_cb;
+	enum_ctx.attach_cb = attach_cb;
+	enum_ctx.cb_ctx = cb_ctx;
+
+	pthread_mutex_lock(&g_driver_lock);
+	rc = spdk_pci_enumerate(spdk_pci_idxd_get_driver(), idxd_enum_cb, &enum_ctx);
+	pthread_mutex_unlock(&g_driver_lock);
+
+	return rc;
+}
+
+void
+spdk_idxd_detach(struct spdk_idxd_device *idxd)
+{
+	idxd_device_destruct(idxd);
+}
+
+static struct idxd_hw_desc *
+_idxd_prep_command(struct spdk_idxd_io_channel *chan, spdk_idxd_req_cb cb_fn,
+		   void *cb_arg, struct idxd_batch *batch)
+{
+	uint32_t index;
+	struct idxd_hw_desc *desc;
+	struct idxd_comp *comp;
+
+	index = spdk_bit_array_find_first_clear(chan->ring_ctrl.ring_slots, 0);
+	if (index == UINT32_MAX) {
+		/* ran out of ring slots */
+		return NULL;
+	}
+
+	spdk_bit_array_set(chan->ring_ctrl.ring_slots, index);
+
+	desc = &chan->ring_ctrl.desc[index];
+	comp = &chan->ring_ctrl.completions[index];
+
+	desc->flags = IDXD_FLAG_COMPLETION_ADDR_VALID | IDXD_FLAG_REQUEST_COMPLETION;
+	desc->completion_addr = (uintptr_t)&comp->hw;
+	comp->cb_arg = cb_arg;
+	comp->cb_fn = cb_fn;
+	if (batch) {
+		comp->batch = batch;
+		batch->batch_desc_index = index;
+	}
+
+	return desc;
+}
+
+int
+spdk_idxd_submit_copy(struct spdk_idxd_io_channel *chan, void *dst, const void *src,
+		      uint64_t nbytes, spdk_idxd_req_cb cb_fn, void *cb_arg)
+{
+	struct idxd_hw_desc *desc;
+
+	/* Common prep. */
+	desc = _idxd_prep_command(chan, cb_fn, cb_arg, NULL);
+	if (desc == NULL) {
+		return -EBUSY;
+	}
+
+	/* Command specific. */
+	desc->opcode = IDXD_OPCODE_MEMMOVE;
+	desc->src_addr = (uintptr_t)src;
+	desc->dst_addr = (uintptr_t)dst;
+	desc->xfer_size = nbytes;
+
+	/* Submit operation. */
+	movdir64b(chan->ring_ctrl.portal, desc);
+
+	return 0;
+}
+
+/* Dual-cast copies the same source to two separate destination buffers. */
+int
+spdk_idxd_submit_dualcast(struct spdk_idxd_io_channel *chan, void *dst1, void *dst2,
+			  const void *src, uint64_t nbytes, spdk_idxd_req_cb cb_fn, void *cb_arg)
+{
+	struct idxd_hw_desc *desc;
+
+	if ((uintptr_t)dst1 & (ALIGN_4K - 1) || (uintptr_t)dst2 & (ALIGN_4K - 1)) {
+		SPDK_ERRLOG("Dualcast requires 4K alignment on dst addresses\n");
+		return -EINVAL;
+	}
+
+	/* Common prep. */
+	desc = _idxd_prep_command(chan, cb_fn, cb_arg, NULL);
+	if (desc == NULL) {
+		return -EBUSY;
+	}
+
+	/* Command specific. */
+	desc->opcode = IDXD_OPCODE_DUALCAST;
+	desc->src_addr = (uintptr_t)src;
+	desc->dst_addr = (uintptr_t)dst1;
+	desc->dest2 = (uintptr_t)dst2;
+	desc->xfer_size = nbytes;
+
+	/* Submit operation. */
+	movdir64b(chan->ring_ctrl.portal, desc);
+
+	return 0;
+}
+
+int
+spdk_idxd_submit_compare(struct spdk_idxd_io_channel *chan, void *src1, const void *src2,
+			 uint64_t nbytes,
+			 spdk_idxd_req_cb cb_fn, void *cb_arg)
+{
+	struct idxd_hw_desc *desc;
+
+	/* Common prep. */
+	desc = _idxd_prep_command(chan, cb_fn, cb_arg, NULL);
+	if (desc == NULL) {
+		return -EBUSY;
+	}
+
+	/* Command specific. */
+	desc->opcode = IDXD_OPCODE_COMPARE;
+	desc->src_addr = (uintptr_t)src1;
+	desc->src2_addr = (uintptr_t)src2;
+	desc->xfer_size = nbytes;
+
+	/* Submit operation. */
+	movdir64b(chan->ring_ctrl.portal, desc);
+
+	return 0;
+}
+
+int
+spdk_idxd_submit_fill(struct spdk_idxd_io_channel *chan, void *dst, uint64_t fill_pattern,
+		      uint64_t nbytes, spdk_idxd_req_cb cb_fn, void *cb_arg)
+{
+	struct idxd_hw_desc *desc;
+
+	/* Common prep. */
+	desc = _idxd_prep_command(chan, cb_fn, cb_arg, NULL);
+	if (desc == NULL) {
+		return -EBUSY;
+	}
+
+	/* Command specific. */
+	desc->opcode = IDXD_OPCODE_MEMFILL;
+	desc->pattern = fill_pattern;
+	desc->dst_addr = (uintptr_t)dst;
+	desc->xfer_size = nbytes;
+
+	/* Submit operation. */
+	movdir64b(chan->ring_ctrl.portal, desc);
+
+	return 0;
+}
+
+int
+spdk_idxd_submit_crc32c(struct spdk_idxd_io_channel *chan, uint32_t *dst, void *src,
+			uint32_t seed, uint64_t nbytes,
+			spdk_idxd_req_cb cb_fn, void *cb_arg)
+{
+	struct idxd_hw_desc *desc;
+
+	/* Common prep. */
+	desc = _idxd_prep_command(chan, cb_fn, cb_arg, NULL);
+	if (desc == NULL) {
+		return -EBUSY;
+	}
+
+	/* Command specific. */
+	desc->opcode = IDXD_OPCODE_CRC32C_GEN;
+	desc->dst_addr = (uintptr_t)dst;
+	desc->src_addr = (uintptr_t)src;
+	desc->flags &= IDXD_CLEAR_CRC_FLAGS;
+	desc->crc32c.seed = seed;
+	desc->xfer_size = nbytes;
+
+	/* Submit operation. */
+	movdir64b(chan->ring_ctrl.portal, desc);
+
+	return 0;
+}
+
+uint32_t
+spdk_idxd_batch_get_max(void)
+{
+	return DESC_PER_BATCH; /* TODO maybe add startup RPC to set this */
+}
+
+struct idxd_batch *
+spdk_idxd_batch_create(struct spdk_idxd_io_channel *chan)
+{
+	struct idxd_batch *batch = NULL;
+
+	if (!TAILQ_EMPTY(&chan->batch_pool)) {
+		batch = TAILQ_FIRST(&chan->batch_pool);
+		TAILQ_REMOVE(&chan->batch_pool, batch, link);
+	} else {
+		/* The application needs to handle this. */
+		return NULL;
+	}
+
+	batch->batch_num = spdk_bit_array_find_first_clear(chan->ring_ctrl.user_ring_slots, 0);
+	if (batch->batch_num == UINT32_MAX) {
+		/* ran out of ring slots, the application needs to handle this. */
+		TAILQ_INSERT_TAIL(&chan->batch_pool, batch, link);
+		return NULL;
+	}
+
+	spdk_bit_array_set(chan->ring_ctrl.user_ring_slots, batch->batch_num);
+
+	/*
+	 * Find the first descriptor address for the given batch. The
+	 * descriptor ring used for user desctipors is allocated in
+	 * units of DESC_PER_BATCH.  The actual index is in units of
+	 * one descriptor.
+	 */
+	batch->start_index = batch->cur_index = batch->batch_num * DESC_PER_BATCH;
+
+	TAILQ_INSERT_TAIL(&chan->batches, batch, link);
+	SPDK_DEBUGLOG(SPDK_LOG_IDXD, "New batch %p num %u\n", batch, batch->batch_num);
+
+	return batch;
+}
+
+static bool
+_does_batch_exist(struct idxd_batch *batch, struct spdk_idxd_io_channel *chan)
+{
+	bool found = false;
+	struct idxd_batch *cur_batch;
+
+	TAILQ_FOREACH(cur_batch, &chan->batches, link) {
+		if (cur_batch == batch) {
+			found = true;
+			break;
+		}
+	}
+
+	return found;
+}
+
+int
+spdk_idxd_batch_cancel(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch)
+{
+	if (_does_batch_exist(batch, chan) == false) {
+		SPDK_ERRLOG("Attempt to cancel a batch that doesn't exist\n.");
+		return -EINVAL;
+	}
+
+	if (batch->remaining > 0) {
+		SPDK_ERRLOG("Cannot cancel batch, already submitted to HW\n.");
+		return -EINVAL;
+	}
+
+	TAILQ_REMOVE(&chan->batches, batch, link);
+	spdk_bit_array_clear(chan->ring_ctrl.user_ring_slots, batch->batch_num);
+	TAILQ_INSERT_TAIL(&chan->batch_pool, batch, link);
+
+	return 0;
+}
+
+int
+spdk_idxd_batch_submit(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch,
+		       spdk_idxd_req_cb cb_fn, void *cb_arg)
+{
+	struct idxd_hw_desc *desc;
+
+	if (_does_batch_exist(batch, chan) == false) {
+		SPDK_ERRLOG("Attempt to submit a batch that doesn't exist\n.");
+		return -EINVAL;
+	}
+
+	/* Common prep. */
+	desc = _idxd_prep_command(chan, cb_fn, cb_arg, batch);
+	if (desc == NULL) {
+		SPDK_DEBUGLOG(SPDK_LOG_IDXD, "Can't submit batch %p busy batch num %u\n", batch, batch->batch_num);
+		return -EBUSY;
+	}
+
+	/* Command specific. */
+	desc->opcode = IDXD_OPCODE_BATCH;
+	desc->desc_list_addr = (uintptr_t)&chan->ring_ctrl.user_desc[batch->start_index];
+	desc->desc_count = batch->cur_index - batch->start_index;
+	assert(desc->desc_count <= DESC_PER_BATCH);
+
+	if (desc->desc_count < MIN_USER_DESC_COUNT) {
+		SPDK_ERRLOG("Attempt to submit a batch without at least %u operations.\n",
+			    MIN_USER_DESC_COUNT);
+		return -EINVAL;
+	}
+
+	/* Total completions for the batch = num desc plus 1 for the batch desc itself. */
+	batch->remaining = desc->desc_count + 1;
+
+	/* Submit operation. */
+	movdir64b(chan->ring_ctrl.portal, desc);
+
+	return 0;
+}
+
+static struct idxd_hw_desc *
+_idxd_prep_batch_cmd(struct spdk_idxd_io_channel *chan, spdk_idxd_req_cb cb_fn,
+		     void *cb_arg, struct idxd_batch *batch)
+{
+	struct idxd_hw_desc *desc;
+	struct idxd_comp *comp;
+
+	if (_does_batch_exist(batch, chan) == false) {
+		SPDK_ERRLOG("Attempt to add to a batch that doesn't exist\n.");
+		return NULL;
+	}
+
+	if ((batch->cur_index - batch->start_index) == DESC_PER_BATCH) {
+		SPDK_ERRLOG("Attempt to add to a batch that is already full\n.");
+		return NULL;
+	}
+
+	desc = &chan->ring_ctrl.user_desc[batch->cur_index];
+	comp = &chan->ring_ctrl.user_completions[batch->cur_index];
+	SPDK_DEBUGLOG(SPDK_LOG_IDXD, "Prep batch %p index %u\n", batch, batch->cur_index);
+
+	batch->cur_index++;
+	assert(batch->cur_index > batch->start_index);
+
+	desc->flags = IDXD_FLAG_COMPLETION_ADDR_VALID | IDXD_FLAG_REQUEST_COMPLETION;
+	desc->completion_addr = (uintptr_t)&comp->hw;
+	comp->cb_arg = cb_arg;
+	comp->cb_fn = cb_fn;
+	comp->batch = batch;
+
+	return desc;
+}
+
+int
+spdk_idxd_batch_prep_copy(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch,
+			  void *dst, const void *src, uint64_t nbytes, spdk_idxd_req_cb cb_fn, void *cb_arg)
+{
+	struct idxd_hw_desc *desc;
+
+	/* Common prep. */
+	desc = _idxd_prep_batch_cmd(chan, cb_fn, cb_arg, batch);
+	if (desc == NULL) {
+		return -EINVAL;
+	}
+
+	/* Command specific. */
+	desc->opcode = IDXD_OPCODE_MEMMOVE;
+	desc->src_addr = (uintptr_t)src;
+	desc->dst_addr = (uintptr_t)dst;
+	desc->xfer_size = nbytes;
+
+	return 0;
+}
+
+int
+spdk_idxd_batch_prep_fill(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch,
+			  void *dst, uint64_t fill_pattern, uint64_t nbytes,
+			  spdk_idxd_req_cb cb_fn, void *cb_arg)
+{
+	struct idxd_hw_desc *desc;
+
+	/* Common prep. */
+	desc = _idxd_prep_batch_cmd(chan, cb_fn, cb_arg, batch);
+	if (desc == NULL) {
+		return -EINVAL;
+	}
+
+	/* Command specific. */
+	desc->opcode = IDXD_OPCODE_MEMFILL;
+	desc->pattern = fill_pattern;
+	desc->dst_addr = (uintptr_t)dst;
+	desc->xfer_size = nbytes;
+
+	return 0;
+}
+
+int
+spdk_idxd_batch_prep_dualcast(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch,
+			      void *dst1, void *dst2, const void *src, uint64_t nbytes, spdk_idxd_req_cb cb_fn, void *cb_arg)
+{
+	struct idxd_hw_desc *desc;
+
+	if ((uintptr_t)dst1 & (ALIGN_4K - 1) || (uintptr_t)dst2 & (ALIGN_4K - 1)) {
+		SPDK_ERRLOG("Dualcast requires 4K alignment on dst addresses\n");
+		return -EINVAL;
+	}
+
+	/* Common prep. */
+	desc = _idxd_prep_batch_cmd(chan, cb_fn, cb_arg, batch);
+	if (desc == NULL) {
+		return -EINVAL;
+	}
+	desc->opcode = IDXD_OPCODE_DUALCAST;
+	desc->src_addr = (uintptr_t)src;
+	desc->dst_addr = (uintptr_t)dst1;
+	desc->dest2 = (uintptr_t)dst2;
+	desc->xfer_size = nbytes;
+
+	return 0;
+}
+
+int
+spdk_idxd_batch_prep_crc32c(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch,
+			    uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes,
+			    spdk_idxd_req_cb cb_fn, void *cb_arg)
+{
+	struct idxd_hw_desc *desc;
+
+	/* Common prep. */
+	desc = _idxd_prep_batch_cmd(chan, cb_fn, cb_arg, batch);
+	if (desc == NULL) {
+		return -EINVAL;
+	}
+
+	/* Command specific. */
+	desc->opcode = IDXD_OPCODE_CRC32C_GEN;
+	desc->dst_addr = (uintptr_t)dst;
+	desc->src_addr = (uintptr_t)src;
+	desc->flags &= IDXD_CLEAR_CRC_FLAGS;
+	desc->crc32c.seed = seed;
+	desc->xfer_size = nbytes;
+
+	return 0;
+}
+
+int
+spdk_idxd_batch_prep_compare(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch,
+			     void *src1, void *src2, uint64_t nbytes, spdk_idxd_req_cb cb_fn,
+			     void *cb_arg)
+{
+	struct idxd_hw_desc *desc;
+
+	/* Common prep. */
+	desc = _idxd_prep_batch_cmd(chan, cb_fn, cb_arg, batch);
+	if (desc == NULL) {
+		return -EINVAL;
+	}
+
+	/* Command specific. */
+	desc->opcode = IDXD_OPCODE_COMPARE;
+	desc->src_addr = (uintptr_t)src1;
+	desc->src2_addr = (uintptr_t)src2;
+	desc->xfer_size = nbytes;
+
+	return 0;
+}
+
+static void
+_dump_error_reg(struct spdk_idxd_io_channel *chan)
+{
+	uint64_t sw_error_0;
+	uint16_t i;
+
+	sw_error_0 = _idxd_read_8(chan->idxd, IDXD_SWERR_OFFSET);
+
+	SPDK_NOTICELOG("SW Error bits set:");
+	for (i = 0; i < CHAR_BIT; i++) {
+		if ((1ULL << i) & sw_error_0) {
+			SPDK_NOTICELOG("    %d\n", i);
+		}
+	}
+	SPDK_NOTICELOG("SW Error error code: %#x\n", (uint8_t)(sw_error_0 >> 8));
+	SPDK_NOTICELOG("SW Error WQ index: %u\n", (uint8_t)(sw_error_0 >> 16));
+	SPDK_NOTICELOG("SW Error Operation: %u\n", (uint8_t)(sw_error_0 >> 32));
+}
+
+static void
+_free_batch(struct idxd_batch *batch, struct spdk_idxd_io_channel *chan,
+	    struct idxd_comp *comp)
+{
+	TAILQ_REMOVE(&chan->batches, batch, link);
+	TAILQ_INSERT_TAIL(&chan->batch_pool, batch, link);
+	comp->batch = NULL;
+	spdk_bit_array_clear(chan->ring_ctrl.user_ring_slots, batch->batch_num);
+	spdk_bit_array_clear(chan->ring_ctrl.ring_slots, batch->batch_desc_index);
+}
+
+static void
+_spdk_idxd_process_batch_events(struct spdk_idxd_io_channel *chan)
+{
+	uint16_t index;
+	struct idxd_comp *comp;
+	uint64_t sw_error_0;
+	int status = 0;
+	struct idxd_batch *batch;
+
+	/*
+	 * We don't check the bit array for user completions as there's only
+	 * one bit per per batch.
+	 */
+	for (index = 0; index < TOTAL_USER_DESC; index++) {
+		comp = &chan->ring_ctrl.user_completions[index];
+		if (comp->hw.status == 1) {
+			struct idxd_hw_desc *desc;
+
+			sw_error_0 = _idxd_read_8(chan->idxd, IDXD_SWERR_OFFSET);
+			if (sw_error_0 & 0x1) {
+				_dump_error_reg(chan);
+				status = -EINVAL;
+			}
+
+			desc = &chan->ring_ctrl.user_desc[index];
+			switch (desc->opcode) {
+			case IDXD_OPCODE_CRC32C_GEN:
+				*(uint32_t *)desc->dst_addr = comp->hw.crc32c_val;
+				*(uint32_t *)desc->dst_addr ^= ~0;
+				break;
+			case IDXD_OPCODE_COMPARE:
+				if (status == 0) {
+					status = comp->hw.result;
+				}
+				break;
+			case IDXD_OPCODE_MEMFILL:
+			case IDXD_OPCODE_DUALCAST:
+			case IDXD_OPCODE_MEMMOVE:
+				break;
+			default:
+				assert(false);
+				break;
+			}
+
+			/* The hw will complete all user desc first before the batch
+			 * desc (see spec for configuration exceptions) however
+			 * because of the order that we check for comps in the poller
+			 * we may "see" them in a different order than they actually
+			 * completed in.
+			 */
+			batch = comp->batch;
+			assert(batch->remaining > 0);
+			if (--batch->remaining == 0) {
+				_free_batch(batch, chan, comp);
+			}
+
+			comp->cb_fn((void *)comp->cb_arg, status);
+			comp->hw.status = status = 0;
+		}
+	}
+}
+
+/*
+ * TODO: Experiment with different methods of reaping completions for performance
+ * once we have real silicon.
+ */
+void
+spdk_idxd_process_events(struct spdk_idxd_io_channel *chan)
+{
+	uint16_t index;
+	struct idxd_comp *comp;
+	uint64_t sw_error_0;
+	int status = 0;
+	struct idxd_batch *batch;
+
+	if (!TAILQ_EMPTY(&chan->batches)) {
+		_spdk_idxd_process_batch_events(chan);
+	}
+
+	for (index = 0; index < chan->ring_ctrl.max_ring_slots; index++) {
+		if (spdk_bit_array_get(chan->ring_ctrl.ring_slots, index)) {
+			comp = &chan->ring_ctrl.completions[index];
+			if (comp->hw.status == 1) {
+				struct idxd_hw_desc *desc;
+
+				sw_error_0 = _idxd_read_8(chan->idxd, IDXD_SWERR_OFFSET);
+				if (sw_error_0 & 0x1) {
+					_dump_error_reg(chan);
+					status = -EINVAL;
+				}
+
+				desc = &chan->ring_ctrl.desc[index];
+				switch (desc->opcode) {
+				case IDXD_OPCODE_BATCH:
+					/* The hw will complete all user desc first before the batch
+					 * desc (see spec for configuration exceptions) however
+					 * because of the order that we check for comps in the poller
+					 * we may "see" them in a different order than they actually
+					 * completed in.
+					 */
+					batch = comp->batch;
+					assert(batch->remaining > 0);
+					if (--batch->remaining == 0) {
+						_free_batch(batch, chan, comp);
+					}
+					break;
+				case IDXD_OPCODE_CRC32C_GEN:
+					*(uint32_t *)desc->dst_addr = comp->hw.crc32c_val;
+					*(uint32_t *)desc->dst_addr ^= ~0;
+					break;
+				case IDXD_OPCODE_COMPARE:
+					if (status == 0) {
+						status = comp->hw.result;
+					}
+					break;
+				}
+
+				comp->cb_fn(comp->cb_arg, status);
+				comp->hw.status = status = 0;
+				if (desc->opcode != IDXD_OPCODE_BATCH) {
+					spdk_bit_array_clear(chan->ring_ctrl.ring_slots, index);
+				}
+			}
+		}
+	}
+}
+
+SPDK_LOG_REGISTER_COMPONENT("idxd", SPDK_LOG_IDXD)
diff --git a/src/spdk/lib/idxd/idxd.h b/src/spdk/lib/idxd/idxd.h
new file mode 100644
index 000000000..09d021152
--- /dev/null
+++ b/src/spdk/lib/idxd/idxd.h
@@ -0,0 +1,188 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __IDXD_H__
+#define __IDXD_H__
+
+#include "spdk/stdinc.h"
+
+#include "spdk/idxd.h"
+#include "spdk/queue.h"
+#include "spdk/mmio.h"
+#include "spdk/bit_array.h"
+
+#include "idxd_spec.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* TODO: get the gcc intrinsic to work. */
+#define nop() asm volatile ("nop")
+static inline void movdir64b(void *dst, const void *src)
+{
+	asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
+		     : "=m"(*(char *)dst)
+		     : "d"(src), "a"(dst));
+}
+
+#define IDXD_REGISTER_TIMEOUT_US		50
+#define IDXD_DRAIN_TIMEOUT_US			500000
+
+/* TODO: make some of these RPC selectable */
+#define WQ_MODE_DEDICATED	1
+#define LOG2_WQ_MAX_BATCH	8  /* 2^8 = 256 */
+#define LOG2_WQ_MAX_XFER	30 /* 2^30 = 1073741824 */
+#define WQCFG_NUM_DWORDS	8
+#define WQ_PRIORITY_1		1
+#define IDXD_MAX_QUEUES		64
+
+#define TOTAL_USER_DESC		(1 << LOG2_WQ_MAX_BATCH)
+#define DESC_PER_BATCH		16 /* TODO maybe make this a startup RPC */
+#define NUM_BATCHES		(TOTAL_USER_DESC / DESC_PER_BATCH)
+#define MIN_USER_DESC_COUNT	2
+
+struct idxd_batch {
+	uint32_t			batch_desc_index;
+	uint32_t			batch_num;
+	uint32_t			cur_index;
+	uint32_t			start_index;
+	uint32_t			remaining;
+	TAILQ_ENTRY(idxd_batch)		link;
+};
+
+struct device_config {
+	uint8_t		config_num;
+	uint8_t		num_wqs_per_group;
+	uint8_t		num_engines_per_group;
+	uint8_t		num_groups;
+	uint16_t	total_wqs;
+	uint16_t	total_engines;
+};
+
+struct idxd_ring_control {
+	void				*portal;
+
+	uint16_t			ring_size;
+
+	/*
+	 * Rings for this channel, one for descriptors and one
+	 * for completions, share the same index. Batch descriptors
+	 * are managed independently from data descriptors.
+	 */
+	struct idxd_hw_desc		*desc;
+	struct idxd_comp		*completions;
+	struct idxd_hw_desc		*user_desc;
+	struct idxd_comp		*user_completions;
+
+	/*
+	 * We use one bit array to track ring slots for both
+	 * desc and completions.
+	 */
+	struct spdk_bit_array		*ring_slots;
+	uint32_t			max_ring_slots;
+
+	/*
+	 * We use a separate bit array to track ring slots for
+	 * descriptors submitted via the user in a batch.
+	 */
+	struct spdk_bit_array		*user_ring_slots;
+};
+
+struct spdk_idxd_io_channel {
+	struct spdk_idxd_device		*idxd;
+	struct idxd_ring_control	ring_ctrl;
+	TAILQ_HEAD(, idxd_batch)	batch_pool; /* free batches */
+	TAILQ_HEAD(, idxd_batch)	batches; /* in use batches */
+};
+
+struct pci_dev_id {
+	int vendor_id;
+	int device_id;
+};
+
+struct idxd_group {
+	struct spdk_idxd_device	*idxd;
+	struct idxd_grpcfg	grpcfg;
+	struct pci_dev_id	pcidev;
+	int			num_engines;
+	int			num_wqs;
+	int			id;
+	uint8_t			tokens_allowed;
+	bool			use_token_limit;
+	uint8_t			tokens_reserved;
+	int			tc_a;
+	int			tc_b;
+};
+
+/*
+ * This struct wraps the hardware completion record which is 32 bytes in
+ * size and must be 32 byte aligned.
+ */
+struct idxd_comp {
+	struct idxd_hw_comp_record	hw;
+	void				*cb_arg;
+	spdk_idxd_req_cb		cb_fn;
+	struct idxd_batch		*batch;
+	uint64_t			pad2;
+} __attribute__((packed));
+SPDK_STATIC_ASSERT(sizeof(struct idxd_comp) == 64, "size mismatch");
+
+struct idxd_wq {
+	struct spdk_idxd_device		*idxd;
+	struct idxd_group		*group;
+	union idxd_wqcfg		wqcfg;
+};
+
+struct spdk_idxd_device {
+	struct spdk_pci_device		*device;
+	void				*reg_base;
+	void				*portals;
+	int				socket_id;
+	int				wq_id;
+
+	struct idxd_registers		registers;
+	uint32_t			ims_offset;
+	uint32_t			msix_perm_offset;
+	uint32_t			wqcfg_offset;
+	uint32_t			grpcfg_offset;
+	uint32_t			perfmon_offset;
+	struct idxd_group		*groups;
+	struct idxd_wq			*queues;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __IDXD_H__ */
diff --git a/src/spdk/lib/idxd/idxd_spec.h b/src/spdk/lib/idxd/idxd_spec.h
new file mode 100644
index 000000000..51d52cdcc
--- /dev/null
+++ b/src/spdk/lib/idxd/idxd_spec.h
@@ -0,0 +1,503 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * IDXD specification definitions
+ */
+
+#ifndef SPDK_IDXD_SPEC_H
+#define SPDK_IDXD_SPEC_H
+
+#include "spdk/stdinc.h"
+#include "spdk/assert.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define IDXD_MMIO_BAR			0
+#define IDXD_WQ_BAR			2
+#define PORTAL_SIZE			(4096 * 4)
+
+#define CFG_ENGINE_OFFSET		0x20
+#define CFG_FLAG_OFFSET			0x28
+
+#define IDXD_CMD_SHIFT			20
+
+#define IDXD_VERSION_OFFSET		0x00
+#define IDXD_GENCAP_OFFSET		0x10
+#define IDXD_WQCAP_OFFSET		0x20
+#define IDXD_GRPCAP_OFFSET		0x30
+#define IDXD_OPCAP_OFFSET		0x40
+#define IDXD_ENGCAP_OFFSET		0x38
+#define IDXD_OPCAP_OFFSET		0x40
+#define IDXD_TABLE_OFFSET		0x60
+#define IDXD_GENCFG_OFFSET		0x80
+#define IDXD_GENCTRL_OFFSET		0x88
+#define IDXD_GENSTATUS_OFFSET		0x90
+#define IDXD_INTCAUSE_OFFSET		0x98
+#define IDXD_CMD_OFFSET			0xa0
+#define IDXD_CMDSTS_OFFSET		0xa8
+#define IDXD_SWERR_OFFSET		0xc0
+#define IDXD_TABLE_OFFSET_MULT		0x100
+
+#define IDXD_OPCAP_WORDS		0x4
+
+#define IDXD_CLEAR_CRC_FLAGS		0xFFFFu
+
+#define IDXD_FLAG_FENCE                 (1 << 0)
+#define IDXD_FLAG_COMPLETION_ADDR_VALID (1 << 2)
+#define IDXD_FLAG_REQUEST_COMPLETION    (1 << 3)
+#define IDXD_FLAG_CACHE_CONTROL         (1 << 8)
+
+/*
+ * IDXD is a family of devices, DSA is the only currently
+ * supported one.
+ */
+enum dsa_completion_status {
+	IDXD_COMP_NONE			= 0,
+	IDXD_COMP_SUCCESS		= 1,
+	IDXD_COMP_SUCCESS_PRED		= 2,
+	IDXD_COMP_PAGE_FAULT_NOBOF	= 3,
+	IDXD_COMP_PAGE_FAULT_IR		= 4,
+	IDXD_COMP_BATCH_FAIL		= 5,
+	IDXD_COMP_BATCH_PAGE_FAULT	= 6,
+	IDXD_COMP_DR_OFFSET_NOINC	= 7,
+	IDXD_COMP_DR_OFFSET_ERANGE	= 8,
+	IDXD_COMP_DIF_ERR		= 9,
+	IDXD_COMP_BAD_OPCODE		= 16,
+	IDXD_COMP_INVALID_FLAGS		= 17,
+	IDXD_COMP_NOZERO_RESERVE	= 18,
+	IDXD_COMP_XFER_ERANGE		= 19,
+	IDXD_COMP_DESC_CNT_ERANGE	= 20,
+	IDXD_COMP_DR_ERANGE		= 21,
+	IDXD_COMP_OVERLAP_BUFFERS	= 22,
+	IDXD_COMP_DCAST_ERR		= 23,
+	IDXD_COMP_DESCLIST_ALIGN	= 24,
+	IDXD_COMP_INT_HANDLE_INVAL	= 25,
+	IDXD_COMP_CRA_XLAT		= 26,
+	IDXD_COMP_CRA_ALIGN		= 27,
+	IDXD_COMP_ADDR_ALIGN		= 28,
+	IDXD_COMP_PRIV_BAD		= 29,
+	IDXD_COMP_TRAFFIC_CLASS_CONF	= 30,
+	IDXD_COMP_PFAULT_RDBA		= 31,
+	IDXD_COMP_HW_ERR1		= 32,
+	IDXD_COMP_HW_ERR_DRB		= 33,
+	IDXD_COMP_TRANSLATION_FAIL	= 34,
+};
+
+enum idxd_wq_state {
+	WQ_DISABLED	= 0,
+	WQ_ENABLED	= 1,
+};
+
+enum idxd_wq_flag {
+	WQ_FLAG_DEDICATED	= 0,
+	WQ_FLAG_BOF		= 1,
+};
+
+enum idxd_wq_type {
+	WQT_NONE	= 0,
+	WQT_KERNEL	= 1,
+	WQT_USER	= 2,
+	WQT_MDEV	= 3,
+};
+
+enum idxd_dev_state {
+	IDXD_DEVICE_STATE_DISABLED	= 0,
+	IDXD_DEVICE_STATE_ENABLED	= 1,
+	IDXD_DEVICE_STATE_DRAIN		= 2,
+	IDXD_DEVICE_STATE_HALT		= 3,
+};
+
+enum idxd_device_reset_type {
+	IDXD_DEVICE_RESET_SOFTWARE	= 0,
+	IDXD_DEVICE_RESET_FLR		= 1,
+	IDXD_DEVICE_RESET_WARM		= 2,
+	IDXD_DEVICE_RESET_COLD		= 3,
+};
+
+enum idxd_cmds {
+	IDXD_ENABLE_DEV		= 1,
+	IDXD_DISABLE_DEV	= 2,
+	IDXD_DRAIN_ALL		= 3,
+	IDXD_ABORT_ALL		= 4,
+	IDXD_RESET_DEVICE	= 5,
+	IDXD_ENABLE_WQ		= 6,
+	IDXD_DISABLE_WQ		= 7,
+	IDXD_DRAIN_WQ		= 8,
+	IDXD_ABORT_WQ		= 9,
+	IDXD_RESET_WQ		= 10,
+};
+
+enum idxd_cmdsts_err {
+	IDXD_CMDSTS_SUCCESS		= 0,
+	IDXD_CMDSTS_INVAL_CMD		= 1,
+	IDXD_CMDSTS_INVAL_WQIDX		= 2,
+	IDXD_CMDSTS_HW_ERR		= 3,
+	IDXD_CMDSTS_ERR_DEV_ENABLED	= 16,
+	IDXD_CMDSTS_ERR_CONFIG		= 17,
+	IDXD_CMDSTS_ERR_BUSMASTER_EN	= 18,
+	IDXD_CMDSTS_ERR_PASID_INVAL	= 19,
+	IDXD_CMDSTS_ERR_WQ_SIZE_ERANGE	= 20,
+	IDXD_CMDSTS_ERR_GRP_CONFIG	= 21,
+	IDXD_CMDSTS_ERR_GRP_CONFIG2	= 22,
+	IDXD_CMDSTS_ERR_GRP_CONFIG3	= 23,
+	IDXD_CMDSTS_ERR_GRP_CONFIG4	= 24,
+	IDXD_CMDSTS_ERR_DEV_NOTEN	= 32,
+	IDXD_CMDSTS_ERR_WQ_ENABLED	= 33,
+	IDXD_CMDSTS_ERR_WQ_SIZE		= 34,
+	IDXD_CMDSTS_ERR_WQ_PRIOR	= 35,
+	IDXD_CMDSTS_ERR_WQ_MODE		= 36,
+	IDXD_CMDSTS_ERR_BOF_EN		= 37,
+	IDXD_CMDSTS_ERR_PASID_EN	= 38,
+	IDXD_CMDSTS_ERR_MAX_BATCH_SIZE	= 39,
+	IDXD_CMDSTS_ERR_MAX_XFER_SIZE	= 40,
+	IDXD_CMDSTS_ERR_DIS_DEV_EN	= 49,
+	IDXD_CMDSTS_ERR_DEV_NOT_EN	= 50,
+	IDXD_CMDSTS_ERR_INVAL_INT_IDX	= 65,
+	IDXD_CMDSTS_ERR_NO_HANDLE	= 66,
+};
+
+enum idxd_wq_hw_state {
+	IDXD_WQ_DEV_DISABLED	= 0,
+	IDXD_WQ_DEV_ENABLED	= 1,
+	IDXD_WQ_DEV_BUSY	= 2,
+};
+
+struct idxd_hw_desc {
+	uint32_t	pasid: 20;
+	uint32_t	rsvd: 11;
+	uint32_t	priv: 1;
+	uint32_t	flags: 24;
+	uint32_t	opcode: 8;
+	uint64_t	completion_addr;
+	union {
+		uint64_t	src_addr;
+		uint64_t	readback_addr;
+		uint64_t	pattern;
+		uint64_t	desc_list_addr;
+	};
+	union {
+		uint64_t	dst_addr;
+		uint64_t	readback_addr2;
+		uint64_t	src2_addr;
+		uint64_t	comp_pattern;
+	};
+	union {
+		uint32_t	xfer_size;
+		uint32_t	desc_count;
+	};
+	uint16_t	int_handle;
+	uint16_t	rsvd1;
+	union {
+		uint8_t		expected_res;
+		struct delta {
+			uint64_t	addr;
+			uint32_t	max_size;
+		} delta;
+		uint32_t	delta_rec_size;
+		uint64_t	dest2;
+		struct crc32c {
+			uint32_t	seed;
+			uint32_t	rsvd;
+			uint64_t	addr;
+		} crc32c;
+		struct dif_chk {
+			uint8_t		src_flags;
+			uint8_t		rsvd1;
+			uint8_t		flags;
+			uint8_t		rsvd2[5];
+			uint32_t	ref_tag_seed;
+			uint16_t	app_tag_mask;
+			uint16_t	app_tag_seed;
+		} dif_chk;
+		struct dif_ins {
+			uint8_t		rsvd1;
+			uint8_t		dest_flag;
+			uint8_t		flags;
+			uint8_t		rsvd2[13];
+			uint32_t	ref_tag_seed;
+			uint16_t	app_tag_mask;
+			uint16_t	app_tag_seed;
+		} dif_ins;
+		struct dif_upd {
+			uint8_t		src_flags;
+			uint8_t		dest_flags;
+			uint8_t		flags;
+			uint8_t		rsvd[5];
+			uint32_t	src_ref_tag_seed;
+			uint16_t	src_app_tag_mask;
+			uint16_t	src_app_tag_seed;
+			uint32_t	dest_ref_tag_seed;
+			uint16_t	dest_app_tag_mask;
+			uint16_t	dest_app_tag_seed;
+		} dif_upd;
+		uint8_t		op_specific[24];
+	};
+} __attribute__((packed));
+SPDK_STATIC_ASSERT(sizeof(struct idxd_hw_desc) == 64, "size mismatch");
+
+struct idxd_hw_comp_record {
+	volatile uint8_t	status;
+	union {
+		uint8_t		result;
+		uint8_t		dif_status;
+	};
+	uint16_t		rsvd;
+	uint32_t		bytes_completed;
+	uint64_t		fault_addr;
+	union {
+		uint32_t	delta_rec_size;
+		uint32_t	crc32c_val;
+		struct {
+			uint32_t	dif_chk_ref_tag;
+			uint16_t	dif_chk_app_tag_mask;
+			uint16_t	dif_chk_app_tag;
+		};
+		struct dif_ins_comp {
+			uint64_t	rsvd;
+			uint32_t	ref_tag;
+			uint16_t	app_tag_mask;
+			uint16_t	app_tag;
+		} dif_ins_comp;
+		struct dif_upd_comp {
+			uint32_t	src_ref_tag;
+			uint16_t	src_app_tag_mask;
+			uint16_t	src_app_tag;
+			uint32_t	dest_ref_tag;
+			uint16_t	dest_app_tag_mask;
+			uint16_t	dest_app_tag;
+		} dif_upd_comp;
+		uint8_t		op_specific[16];
+	};
+} __attribute__((packed));
+SPDK_STATIC_ASSERT(sizeof(struct idxd_hw_comp_record) == 32, "size mismatch");
+
+union idxd_gencap_register {
+	struct {
+		uint64_t block_on_fault: 1;
+		uint64_t overlap_copy: 1;
+		uint64_t cache_control_mem: 1;
+		uint64_t cache_control_cache: 1;
+		uint64_t rsvd: 3;
+		uint64_t int_handle_req: 1;
+		uint64_t dest_readback: 1;
+		uint64_t drain_readback: 1;
+		uint64_t rsvd2: 6;
+		uint64_t max_xfer_shift: 5;
+		uint64_t max_batch_shift: 4;
+		uint64_t max_ims_mult: 6;
+		uint64_t config_en: 1;
+		uint64_t max_descs_per_engine: 8;
+		uint64_t rsvd3: 24;
+	} __attribute__((packed));
+	uint64_t raw;
+};
+SPDK_STATIC_ASSERT(sizeof(union idxd_gencap_register) == 8, "size mismatch");
+
+union idxd_wqcap_register {
+	struct {
+		uint64_t total_wq_size: 16;
+		uint64_t num_wqs: 8;
+		uint64_t rsvd: 24;
+		uint64_t shared_mode: 1;
+		uint64_t dedicated_mode: 1;
+		uint64_t rsvd2: 1;
+		uint64_t priority: 1;
+		uint64_t occupancy: 1;
+		uint64_t occupancy_int: 1;
+		uint64_t rsvd3: 10;
+	} __attribute__((packed));
+	uint64_t raw;
+};
+SPDK_STATIC_ASSERT(sizeof(union idxd_wqcap_register) == 8, "size mismatch");
+
+union idxd_groupcap_register {
+	struct {
+		uint64_t num_groups: 8;
+		uint64_t total_tokens: 8;
+		uint64_t token_en: 1;
+		uint64_t token_limit: 1;
+		uint64_t rsvd: 46;
+	} __attribute__((packed));
+	uint64_t raw;
+};
+SPDK_STATIC_ASSERT(sizeof(union idxd_groupcap_register) == 8, "size mismatch");
+
+union idxd_enginecap_register {
+	struct {
+		uint64_t num_engines: 8;
+		uint64_t rsvd: 56;
+	} __attribute__((packed));
+	uint64_t raw;
+};
+SPDK_STATIC_ASSERT(sizeof(union idxd_enginecap_register) == 8, "size mismatch");
+
+struct idxd_opcap_register {
+	uint64_t raw[4];
+};
+SPDK_STATIC_ASSERT(sizeof(struct idxd_opcap_register) == 32, "size mismatch");
+
+struct idxd_registers {
+	uint32_t version;
+	union idxd_gencap_register gencap;
+	union idxd_wqcap_register wqcap;
+	union idxd_groupcap_register groupcap;
+	union idxd_enginecap_register enginecap;
+	struct idxd_opcap_register opcap;
+};
+SPDK_STATIC_ASSERT(sizeof(struct idxd_registers) == 72, "size mismatch");
+
+union idxd_offsets_register {
+	struct {
+		uint64_t grpcfg: 16;
+		uint64_t wqcfg: 16;
+		uint64_t msix_perm: 16;
+		uint64_t ims: 16;
+		uint64_t perfmon: 16;
+		uint64_t rsvd: 48;
+	} __attribute__((packed));
+	uint64_t raw[2];
+};
+SPDK_STATIC_ASSERT(sizeof(union idxd_offsets_register) == 16, "size mismatch");
+
+union idxd_genstatus_register {
+	struct {
+		uint32_t state: 2;
+		uint32_t reset_type: 2;
+		uint32_t rsvd: 28;
+	} __attribute__((packed));
+	uint32_t raw;
+};
+SPDK_STATIC_ASSERT(sizeof(union idxd_genstatus_register) == 4, "size mismatch");
+
+union idxd_cmdsts_reg {
+	struct {
+		uint8_t err;
+		uint16_t result;
+		uint8_t rsvd: 7;
+		uint8_t active: 1;
+	} __attribute__((packed));
+	uint32_t raw;
+};
+SPDK_STATIC_ASSERT(sizeof(union idxd_cmdsts_reg) == 4, "size mismatch");
+
+union idxd_swerr_register {
+	struct {
+		uint64_t valid: 1;
+		uint64_t overflow: 1;
+		uint64_t desc_valid: 1;
+		uint64_t wq_idx_valid: 1;
+		uint64_t batch: 1;
+		uint64_t fault_rw: 1;
+		uint64_t priv: 1;
+		uint64_t rsvd: 1;
+		uint64_t error: 8;
+		uint64_t wq_idx: 8;
+		uint64_t rsvd2: 8;
+		uint64_t operation: 8;
+		uint64_t pasid: 20;
+		uint64_t rsvd3: 4;
+		uint64_t batch_idx: 16;
+		uint64_t rsvd4: 16;
+		uint64_t invalid_flags: 32;
+		uint64_t fault_addr;
+		uint64_t rsvd5;
+	} __attribute__((packed));
+	uint64_t raw[4];
+};
+SPDK_STATIC_ASSERT(sizeof(union idxd_swerr_register) == 32, "size mismatch");
+
+union idxd_group_flags {
+	struct {
+		uint32_t tc_a: 3;
+		uint32_t tc_b: 3;
+		uint32_t rsvd: 1;
+		uint32_t use_token_limit: 1;
+		uint32_t tokens_reserved: 8;
+		uint32_t rsvd2: 4;
+		uint32_t tokens_allowed: 8;
+		uint32_t rsvd3: 4;
+	} __attribute__((packed));
+	uint32_t raw;
+};
+SPDK_STATIC_ASSERT(sizeof(union idxd_group_flags) == 4, "size mismatch");
+
+struct idxd_grpcfg {
+	uint64_t wqs[4];
+	uint64_t engines;
+	union idxd_group_flags flags;
+};
+SPDK_STATIC_ASSERT(sizeof(struct idxd_grpcfg) == 48, "size mismatch");
+
+union idxd_wqcfg {
+	struct {
+		uint16_t wq_size;
+		uint16_t rsvd;
+		uint16_t wq_thresh;
+		uint16_t rsvd1;
+		uint32_t mode: 1;
+		uint32_t bof: 1;
+		uint32_t rsvd2: 2;
+		uint32_t priority: 4;
+		uint32_t pasid: 20;
+		uint32_t pasid_en: 1;
+		uint32_t priv: 1;
+		uint32_t rsvd3: 2;
+		uint32_t max_xfer_shift: 5;
+		uint32_t max_batch_shift: 4;
+		uint32_t rsvd4: 23;
+		uint16_t occupancy_inth;
+		uint16_t occupancy_table_sel: 1;
+		uint16_t rsvd5: 15;
+		uint16_t occupancy_limit;
+		uint16_t occupancy_int_en: 1;
+		uint16_t rsvd6: 15;
+		uint16_t occupancy;
+		uint16_t occupancy_int: 1;
+		uint16_t rsvd7: 12;
+		uint16_t mode_support: 1;
+		uint16_t wq_state: 2;
+		uint32_t rsvd8;
+	} __attribute__((packed));
+	uint32_t raw[8];
+};
+SPDK_STATIC_ASSERT(sizeof(union idxd_wqcfg) == 32, "size mismatch");
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_IDXD_SPEC_H */
diff --git a/src/spdk/lib/idxd/spdk_idxd.map b/src/spdk/lib/idxd/spdk_idxd.map
new file mode 100644
index 000000000..4bffdf209
--- /dev/null
+++ b/src/spdk/lib/idxd/spdk_idxd.map
@@ -0,0 +1,29 @@
+{
+	global:
+
+	# public functions
+	spdk_idxd_configure_chan;
+	spdk_idxd_reconfigure_chan;
+	spdk_idxd_probe;
+	spdk_idxd_detach;
+	spdk_idxd_batch_prep_copy;
+	spdk_idxd_batch_prep_dualcast;
+	spdk_idxd_batch_prep_fill;
+	spdk_idxd_batch_prep_crc32c;
+	spdk_idxd_batch_prep_compare;
+	spdk_idxd_batch_submit;
+	spdk_idxd_batch_create;
+	spdk_idxd_batch_cancel;
+	spdk_idxd_batch_get_max;
+	spdk_idxd_set_config;
+	spdk_idxd_submit_compare;
+	spdk_idxd_submit_crc32c;
+	spdk_idxd_submit_copy;
+	spdk_idxd_submit_dualcast;
+	spdk_idxd_submit_fill;
+	spdk_idxd_process_events;
+	spdk_idxd_get_channel;
+	spdk_idxd_put_channel;
+
+	local: *;
+};
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-21 11:54:28 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-21 11:54:28 +0000
commit	e6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree	64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/lib/idxd
parent	Initial commit. (diff)
download	ceph-upstream/18.2.2.tar.xz ceph-upstream/18.2.2.zip