summaryrefslogtreecommitdiffstats
path: root/src/spdk/lib/ioat
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/spdk/lib/ioat
parentInitial commit. (diff)
downloadceph-upstream.tar.xz
ceph-upstream.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/lib/ioat')
-rw-r--r--src/spdk/lib/ioat/Makefile45
-rw-r--r--src/spdk/lib/ioat/ioat.c775
-rw-r--r--src/spdk/lib/ioat/ioat_internal.h100
-rw-r--r--src/spdk/lib/ioat/spdk_ioat.map17
4 files changed, 937 insertions, 0 deletions
diff --git a/src/spdk/lib/ioat/Makefile b/src/spdk/lib/ioat/Makefile
new file mode 100644
index 000000000..4cada5685
--- /dev/null
+++ b/src/spdk/lib/ioat/Makefile
@@ -0,0 +1,45 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+SO_VER := 3
+SO_MINOR := 0
+
+C_SRCS = ioat.c
+LIBNAME = ioat
+
+SPDK_MAP_FILE = $(abspath $(CURDIR)/spdk_ioat.map)
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/ioat/ioat.c b/src/spdk/lib/ioat/ioat.c
new file mode 100644
index 000000000..516fa545c
--- /dev/null
+++ b/src/spdk/lib/ioat/ioat.c
@@ -0,0 +1,775 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "ioat_internal.h"
+
+#include "spdk/env.h"
+#include "spdk/util.h"
+#include "spdk/memory.h"
+
+#include "spdk_internal/log.h"
+
+struct ioat_driver {
+ pthread_mutex_t lock;
+ TAILQ_HEAD(, spdk_ioat_chan) attached_chans;
+};
+
+static struct ioat_driver g_ioat_driver = {
+ .lock = PTHREAD_MUTEX_INITIALIZER,
+ .attached_chans = TAILQ_HEAD_INITIALIZER(g_ioat_driver.attached_chans),
+};
+
+static uint64_t
+ioat_get_chansts(struct spdk_ioat_chan *ioat)
+{
+ return spdk_mmio_read_8(&ioat->regs->chansts);
+}
+
+static void
+ioat_write_chancmp(struct spdk_ioat_chan *ioat, uint64_t addr)
+{
+ spdk_mmio_write_8(&ioat->regs->chancmp, addr);
+}
+
+static void
+ioat_write_chainaddr(struct spdk_ioat_chan *ioat, uint64_t addr)
+{
+ spdk_mmio_write_8(&ioat->regs->chainaddr, addr);
+}
+
+static inline void
+ioat_suspend(struct spdk_ioat_chan *ioat)
+{
+ ioat->regs->chancmd = SPDK_IOAT_CHANCMD_SUSPEND;
+}
+
+static inline void
+ioat_reset(struct spdk_ioat_chan *ioat)
+{
+ ioat->regs->chancmd = SPDK_IOAT_CHANCMD_RESET;
+}
+
+static inline uint32_t
+ioat_reset_pending(struct spdk_ioat_chan *ioat)
+{
+ uint8_t cmd;
+
+ cmd = ioat->regs->chancmd;
+ return (cmd & SPDK_IOAT_CHANCMD_RESET) == SPDK_IOAT_CHANCMD_RESET;
+}
+
+static int
+ioat_map_pci_bar(struct spdk_ioat_chan *ioat)
+{
+ int regs_bar, rc;
+ void *addr;
+ uint64_t phys_addr, size;
+
+ regs_bar = 0;
+ rc = spdk_pci_device_map_bar(ioat->device, regs_bar, &addr, &phys_addr, &size);
+ if (rc != 0 || addr == NULL) {
+ SPDK_ERRLOG("pci_device_map_range failed with error code %d\n",
+ rc);
+ return -1;
+ }
+
+ ioat->regs = (volatile struct spdk_ioat_registers *)addr;
+
+ return 0;
+}
+
+static int
+ioat_unmap_pci_bar(struct spdk_ioat_chan *ioat)
+{
+ int rc = 0;
+ void *addr = (void *)ioat->regs;
+
+ if (addr) {
+ rc = spdk_pci_device_unmap_bar(ioat->device, 0, addr);
+ }
+ return rc;
+}
+
+
+static inline uint32_t
+ioat_get_active(struct spdk_ioat_chan *ioat)
+{
+ return (ioat->head - ioat->tail) & ((1 << ioat->ring_size_order) - 1);
+}
+
+static inline uint32_t
+ioat_get_ring_space(struct spdk_ioat_chan *ioat)
+{
+ return (1 << ioat->ring_size_order) - ioat_get_active(ioat) - 1;
+}
+
+static uint32_t
+ioat_get_ring_index(struct spdk_ioat_chan *ioat, uint32_t index)
+{
+ return index & ((1 << ioat->ring_size_order) - 1);
+}
+
+static void
+ioat_get_ring_entry(struct spdk_ioat_chan *ioat, uint32_t index,
+ struct ioat_descriptor **desc,
+ union spdk_ioat_hw_desc **hw_desc)
+{
+ uint32_t i = ioat_get_ring_index(ioat, index);
+
+ *desc = &ioat->ring[i];
+ *hw_desc = &ioat->hw_ring[i];
+}
+
+static void
+ioat_submit_single(struct spdk_ioat_chan *ioat)
+{
+ ioat->head++;
+}
+
+void
+spdk_ioat_flush(struct spdk_ioat_chan *ioat)
+{
+ uint32_t index = ioat_get_ring_index(ioat, ioat->head - 1);
+ union spdk_ioat_hw_desc *hw_desc;
+
+ hw_desc = &ioat->hw_ring[index];
+ hw_desc->dma.u.control.completion_update = 1;
+ ioat->regs->dmacount = (uint16_t)ioat->head;
+}
+
+static struct ioat_descriptor *
+ioat_prep_null(struct spdk_ioat_chan *ioat)
+{
+ struct ioat_descriptor *desc;
+ union spdk_ioat_hw_desc *hw_desc;
+
+ if (ioat_get_ring_space(ioat) < 1) {
+ return NULL;
+ }
+
+ ioat_get_ring_entry(ioat, ioat->head, &desc, &hw_desc);
+
+ hw_desc->dma.u.control_raw = 0;
+ hw_desc->dma.u.control.op = SPDK_IOAT_OP_COPY;
+ hw_desc->dma.u.control.null = 1;
+
+ hw_desc->dma.size = 8;
+ hw_desc->dma.src_addr = 0;
+ hw_desc->dma.dest_addr = 0;
+
+ desc->callback_fn = NULL;
+ desc->callback_arg = NULL;
+
+ ioat_submit_single(ioat);
+
+ return desc;
+}
+
+static struct ioat_descriptor *
+ioat_prep_copy(struct spdk_ioat_chan *ioat, uint64_t dst,
+ uint64_t src, uint32_t len)
+{
+ struct ioat_descriptor *desc;
+ union spdk_ioat_hw_desc *hw_desc;
+
+ assert(len <= ioat->max_xfer_size);
+
+ if (ioat_get_ring_space(ioat) < 1) {
+ return NULL;
+ }
+
+ ioat_get_ring_entry(ioat, ioat->head, &desc, &hw_desc);
+
+ hw_desc->dma.u.control_raw = 0;
+ hw_desc->dma.u.control.op = SPDK_IOAT_OP_COPY;
+
+ hw_desc->dma.size = len;
+ hw_desc->dma.src_addr = src;
+ hw_desc->dma.dest_addr = dst;
+
+ desc->callback_fn = NULL;
+ desc->callback_arg = NULL;
+
+ ioat_submit_single(ioat);
+
+ return desc;
+}
+
+static struct ioat_descriptor *
+ioat_prep_fill(struct spdk_ioat_chan *ioat, uint64_t dst,
+ uint64_t fill_pattern, uint32_t len)
+{
+ struct ioat_descriptor *desc;
+ union spdk_ioat_hw_desc *hw_desc;
+
+ assert(len <= ioat->max_xfer_size);
+
+ if (ioat_get_ring_space(ioat) < 1) {
+ return NULL;
+ }
+
+ ioat_get_ring_entry(ioat, ioat->head, &desc, &hw_desc);
+
+ hw_desc->fill.u.control_raw = 0;
+ hw_desc->fill.u.control.op = SPDK_IOAT_OP_FILL;
+
+ hw_desc->fill.size = len;
+ hw_desc->fill.src_data = fill_pattern;
+ hw_desc->fill.dest_addr = dst;
+
+ desc->callback_fn = NULL;
+ desc->callback_arg = NULL;
+
+ ioat_submit_single(ioat);
+
+ return desc;
+}
+
+static int ioat_reset_hw(struct spdk_ioat_chan *ioat)
+{
+ int timeout;
+ uint64_t status;
+ uint32_t chanerr;
+ int rc;
+
+ status = ioat_get_chansts(ioat);
+ if (is_ioat_active(status) || is_ioat_idle(status)) {
+ ioat_suspend(ioat);
+ }
+
+ timeout = 20; /* in milliseconds */
+ while (is_ioat_active(status) || is_ioat_idle(status)) {
+ spdk_delay_us(1000);
+ timeout--;
+ if (timeout == 0) {
+ SPDK_ERRLOG("timed out waiting for suspend\n");
+ return -1;
+ }
+ status = ioat_get_chansts(ioat);
+ }
+
+ /*
+ * Clear any outstanding errors.
+ * CHANERR is write-1-to-clear, so write the current CHANERR bits back to reset everything.
+ */
+ chanerr = ioat->regs->chanerr;
+ ioat->regs->chanerr = chanerr;
+
+ if (ioat->regs->cbver < SPDK_IOAT_VER_3_3) {
+ rc = spdk_pci_device_cfg_read32(ioat->device, &chanerr,
+ SPDK_IOAT_PCI_CHANERR_INT_OFFSET);
+ if (rc) {
+ SPDK_ERRLOG("failed to read the internal channel error register\n");
+ return -1;
+ }
+
+ spdk_pci_device_cfg_write32(ioat->device, chanerr,
+ SPDK_IOAT_PCI_CHANERR_INT_OFFSET);
+ }
+
+ ioat_reset(ioat);
+
+ timeout = 20;
+ while (ioat_reset_pending(ioat)) {
+ spdk_delay_us(1000);
+ timeout--;
+ if (timeout == 0) {
+ SPDK_ERRLOG("timed out waiting for reset\n");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+ioat_process_channel_events(struct spdk_ioat_chan *ioat)
+{
+ struct ioat_descriptor *desc;
+ uint64_t status, completed_descriptor, hw_desc_phys_addr, events_count = 0;
+ uint32_t tail;
+
+ if (ioat->head == ioat->tail) {
+ return 0;
+ }
+
+ status = *ioat->comp_update;
+ completed_descriptor = status & SPDK_IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK;
+
+ if (is_ioat_halted(status)) {
+ SPDK_ERRLOG("Channel halted (%x)\n", ioat->regs->chanerr);
+ return -1;
+ }
+
+ if (completed_descriptor == ioat->last_seen) {
+ return 0;
+ }
+
+ do {
+ tail = ioat_get_ring_index(ioat, ioat->tail);
+ desc = &ioat->ring[tail];
+
+ if (desc->callback_fn) {
+ desc->callback_fn(desc->callback_arg);
+ }
+
+ hw_desc_phys_addr = desc->phys_addr;
+ ioat->tail++;
+ events_count++;
+ } while (hw_desc_phys_addr != completed_descriptor);
+
+ ioat->last_seen = hw_desc_phys_addr;
+
+ return events_count;
+}
+
+static void
+ioat_channel_destruct(struct spdk_ioat_chan *ioat)
+{
+ ioat_unmap_pci_bar(ioat);
+
+ if (ioat->ring) {
+ free(ioat->ring);
+ }
+
+ if (ioat->hw_ring) {
+ spdk_free(ioat->hw_ring);
+ }
+
+ if (ioat->comp_update) {
+ spdk_free((void *)ioat->comp_update);
+ ioat->comp_update = NULL;
+ }
+}
+
+uint32_t
+spdk_ioat_get_max_descriptors(struct spdk_ioat_chan *ioat)
+{
+ return 1 << ioat->ring_size_order;
+}
+
+static int
+ioat_channel_start(struct spdk_ioat_chan *ioat)
+{
+ uint8_t xfercap, version;
+ uint64_t status;
+ int i, num_descriptors;
+ uint64_t comp_update_bus_addr = 0;
+ uint64_t phys_addr;
+
+ if (ioat_map_pci_bar(ioat) != 0) {
+ SPDK_ERRLOG("ioat_map_pci_bar() failed\n");
+ return -1;
+ }
+
+ version = ioat->regs->cbver;
+ if (version < SPDK_IOAT_VER_3_0) {
+ SPDK_ERRLOG(" unsupported IOAT version %u.%u\n",
+ version >> 4, version & 0xF);
+ return -1;
+ }
+
+ /* Always support DMA copy */
+ ioat->dma_capabilities = SPDK_IOAT_ENGINE_COPY_SUPPORTED;
+ if (ioat->regs->dmacapability & SPDK_IOAT_DMACAP_BFILL) {
+ ioat->dma_capabilities |= SPDK_IOAT_ENGINE_FILL_SUPPORTED;
+ }
+ xfercap = ioat->regs->xfercap;
+
+ /* Only bits [4:0] are valid. */
+ xfercap &= 0x1f;
+ if (xfercap == 0) {
+ /* 0 means 4 GB max transfer size. */
+ ioat->max_xfer_size = 1ULL << 32;
+ } else if (xfercap < 12) {
+ /* XFERCAP must be at least 12 (4 KB) according to the spec. */
+ SPDK_ERRLOG("invalid XFERCAP value %u\n", xfercap);
+ return -1;
+ } else {
+ ioat->max_xfer_size = 1U << xfercap;
+ }
+
+ ioat->comp_update = spdk_zmalloc(sizeof(*ioat->comp_update), SPDK_IOAT_CHANCMP_ALIGN,
+ NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+ if (ioat->comp_update == NULL) {
+ return -1;
+ }
+
+ comp_update_bus_addr = spdk_vtophys((void *)ioat->comp_update, NULL);
+ if (comp_update_bus_addr == SPDK_VTOPHYS_ERROR) {
+ spdk_free((void *)ioat->comp_update);
+ return -1;
+ }
+
+ ioat->ring_size_order = IOAT_DEFAULT_ORDER;
+
+ num_descriptors = 1 << ioat->ring_size_order;
+
+ ioat->ring = calloc(num_descriptors, sizeof(struct ioat_descriptor));
+ if (!ioat->ring) {
+ return -1;
+ }
+
+ ioat->hw_ring = spdk_zmalloc(num_descriptors * sizeof(union spdk_ioat_hw_desc), 64,
+ NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+ if (!ioat->hw_ring) {
+ return -1;
+ }
+
+ for (i = 0; i < num_descriptors; i++) {
+ phys_addr = spdk_vtophys(&ioat->hw_ring[i], NULL);
+ if (phys_addr == SPDK_VTOPHYS_ERROR) {
+ SPDK_ERRLOG("Failed to translate descriptor %u to physical address\n", i);
+ return -1;
+ }
+
+ ioat->ring[i].phys_addr = phys_addr;
+ ioat->hw_ring[ioat_get_ring_index(ioat, i - 1)].generic.next = phys_addr;
+ }
+
+ ioat->head = 0;
+ ioat->tail = 0;
+ ioat->last_seen = 0;
+
+ ioat_reset_hw(ioat);
+
+ ioat->regs->chanctrl = SPDK_IOAT_CHANCTRL_ANY_ERR_ABORT_EN;
+ ioat_write_chancmp(ioat, comp_update_bus_addr);
+ ioat_write_chainaddr(ioat, ioat->ring[0].phys_addr);
+
+ ioat_prep_null(ioat);
+ spdk_ioat_flush(ioat);
+
+ i = 100;
+ while (i-- > 0) {
+ spdk_delay_us(100);
+ status = ioat_get_chansts(ioat);
+ if (is_ioat_idle(status)) {
+ break;
+ }
+ }
+
+ if (is_ioat_idle(status)) {
+ ioat_process_channel_events(ioat);
+ } else {
+ SPDK_ERRLOG("could not start channel: status = %p\n error = %#x\n",
+ (void *)status, ioat->regs->chanerr);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Caller must hold g_ioat_driver.lock */
+static struct spdk_ioat_chan *
+ioat_attach(struct spdk_pci_device *device)
+{
+ struct spdk_ioat_chan *ioat;
+ uint32_t cmd_reg;
+
+ ioat = calloc(1, sizeof(struct spdk_ioat_chan));
+ if (ioat == NULL) {
+ return NULL;
+ }
+
+ /* Enable PCI busmaster. */
+ spdk_pci_device_cfg_read32(device, &cmd_reg, 4);
+ cmd_reg |= 0x4;
+ spdk_pci_device_cfg_write32(device, cmd_reg, 4);
+
+ ioat->device = device;
+
+ if (ioat_channel_start(ioat) != 0) {
+ ioat_channel_destruct(ioat);
+ free(ioat);
+ return NULL;
+ }
+
+ return ioat;
+}
+
+struct ioat_enum_ctx {
+ spdk_ioat_probe_cb probe_cb;
+ spdk_ioat_attach_cb attach_cb;
+ void *cb_ctx;
+};
+
+/* This function must only be called while holding g_ioat_driver.lock */
+static int
+ioat_enum_cb(void *ctx, struct spdk_pci_device *pci_dev)
+{
+ struct ioat_enum_ctx *enum_ctx = ctx;
+ struct spdk_ioat_chan *ioat;
+
+ /* Verify that this device is not already attached */
+ TAILQ_FOREACH(ioat, &g_ioat_driver.attached_chans, tailq) {
+ /*
+ * NOTE: This assumes that the PCI abstraction layer will use the same device handle
+ * across enumerations; we could compare by BDF instead if this is not true.
+ */
+ if (pci_dev == ioat->device) {
+ return 0;
+ }
+ }
+
+ if (enum_ctx->probe_cb(enum_ctx->cb_ctx, pci_dev)) {
+ /*
+ * Since I/OAT init is relatively quick, just perform the full init during probing.
+ * If this turns out to be a bottleneck later, this can be changed to work like
+ * NVMe with a list of devices to initialize in parallel.
+ */
+ ioat = ioat_attach(pci_dev);
+ if (ioat == NULL) {
+ SPDK_ERRLOG("ioat_attach() failed\n");
+ return -1;
+ }
+
+ TAILQ_INSERT_TAIL(&g_ioat_driver.attached_chans, ioat, tailq);
+
+ enum_ctx->attach_cb(enum_ctx->cb_ctx, pci_dev, ioat);
+ }
+
+ return 0;
+}
+
+int
+spdk_ioat_probe(void *cb_ctx, spdk_ioat_probe_cb probe_cb, spdk_ioat_attach_cb attach_cb)
+{
+ int rc;
+ struct ioat_enum_ctx enum_ctx;
+
+ pthread_mutex_lock(&g_ioat_driver.lock);
+
+ enum_ctx.probe_cb = probe_cb;
+ enum_ctx.attach_cb = attach_cb;
+ enum_ctx.cb_ctx = cb_ctx;
+
+ rc = spdk_pci_enumerate(spdk_pci_ioat_get_driver(), ioat_enum_cb, &enum_ctx);
+
+ pthread_mutex_unlock(&g_ioat_driver.lock);
+
+ return rc;
+}
+
+void
+spdk_ioat_detach(struct spdk_ioat_chan *ioat)
+{
+ struct ioat_driver *driver = &g_ioat_driver;
+
+ /* ioat should be in the free list (not registered to a thread)
+ * when calling ioat_detach().
+ */
+ pthread_mutex_lock(&driver->lock);
+ TAILQ_REMOVE(&driver->attached_chans, ioat, tailq);
+ pthread_mutex_unlock(&driver->lock);
+
+ ioat_channel_destruct(ioat);
+ free(ioat);
+}
+
+int
+spdk_ioat_build_copy(struct spdk_ioat_chan *ioat, void *cb_arg, spdk_ioat_req_cb cb_fn,
+ void *dst, const void *src, uint64_t nbytes)
+{
+ struct ioat_descriptor *last_desc;
+ uint64_t remaining, op_size;
+ uint64_t vdst, vsrc;
+ uint64_t vdst_page, vsrc_page;
+ uint64_t pdst_page, psrc_page;
+ uint32_t orig_head;
+
+ if (!ioat) {
+ return -EINVAL;
+ }
+
+ orig_head = ioat->head;
+
+ vdst = (uint64_t)dst;
+ vsrc = (uint64_t)src;
+ vdst_page = vsrc_page = 0;
+ pdst_page = psrc_page = SPDK_VTOPHYS_ERROR;
+
+ remaining = nbytes;
+ while (remaining) {
+ if (_2MB_PAGE(vsrc) != vsrc_page) {
+ vsrc_page = _2MB_PAGE(vsrc);
+ psrc_page = spdk_vtophys((void *)vsrc_page, NULL);
+ }
+
+ if (_2MB_PAGE(vdst) != vdst_page) {
+ vdst_page = _2MB_PAGE(vdst);
+ pdst_page = spdk_vtophys((void *)vdst_page, NULL);
+ }
+ op_size = remaining;
+ op_size = spdk_min(op_size, (VALUE_2MB - _2MB_OFFSET(vsrc)));
+ op_size = spdk_min(op_size, (VALUE_2MB - _2MB_OFFSET(vdst)));
+ op_size = spdk_min(op_size, ioat->max_xfer_size);
+ remaining -= op_size;
+
+ last_desc = ioat_prep_copy(ioat,
+ pdst_page + _2MB_OFFSET(vdst),
+ psrc_page + _2MB_OFFSET(vsrc),
+ op_size);
+
+ if (remaining == 0 || last_desc == NULL) {
+ break;
+ }
+
+ vsrc += op_size;
+ vdst += op_size;
+
+ }
+ /* Issue null descriptor for null transfer */
+ if (nbytes == 0) {
+ last_desc = ioat_prep_null(ioat);
+ }
+
+ if (last_desc) {
+ last_desc->callback_fn = cb_fn;
+ last_desc->callback_arg = cb_arg;
+ } else {
+ /*
+ * Ran out of descriptors in the ring - reset head to leave things as they were
+ * in case we managed to fill out any descriptors.
+ */
+ ioat->head = orig_head;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+int
+spdk_ioat_submit_copy(struct spdk_ioat_chan *ioat, void *cb_arg, spdk_ioat_req_cb cb_fn,
+ void *dst, const void *src, uint64_t nbytes)
+{
+ int rc;
+
+ rc = spdk_ioat_build_copy(ioat, cb_arg, cb_fn, dst, src, nbytes);
+ if (rc != 0) {
+ return rc;
+ }
+
+ spdk_ioat_flush(ioat);
+ return 0;
+}
+
+int
+spdk_ioat_build_fill(struct spdk_ioat_chan *ioat, void *cb_arg, spdk_ioat_req_cb cb_fn,
+ void *dst, uint64_t fill_pattern, uint64_t nbytes)
+{
+ struct ioat_descriptor *last_desc = NULL;
+ uint64_t remaining, op_size;
+ uint64_t vdst;
+ uint32_t orig_head;
+
+ if (!ioat) {
+ return -EINVAL;
+ }
+
+ if (!(ioat->dma_capabilities & SPDK_IOAT_ENGINE_FILL_SUPPORTED)) {
+ SPDK_ERRLOG("Channel does not support memory fill\n");
+ return -1;
+ }
+
+ orig_head = ioat->head;
+
+ vdst = (uint64_t)dst;
+ remaining = nbytes;
+
+ while (remaining) {
+ op_size = remaining;
+ op_size = spdk_min(op_size, (VALUE_2MB - _2MB_OFFSET(vdst)));
+ op_size = spdk_min(op_size, ioat->max_xfer_size);
+ remaining -= op_size;
+
+ last_desc = ioat_prep_fill(ioat,
+ spdk_vtophys((void *)vdst, NULL),
+ fill_pattern,
+ op_size);
+
+ if (remaining == 0 || last_desc == NULL) {
+ break;
+ }
+
+ vdst += op_size;
+ }
+
+ if (last_desc) {
+ last_desc->callback_fn = cb_fn;
+ last_desc->callback_arg = cb_arg;
+ } else {
+ /*
+ * Ran out of descriptors in the ring - reset head to leave things as they were
+ * in case we managed to fill out any descriptors.
+ */
+ ioat->head = orig_head;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+int
+spdk_ioat_submit_fill(struct spdk_ioat_chan *ioat, void *cb_arg, spdk_ioat_req_cb cb_fn,
+ void *dst, uint64_t fill_pattern, uint64_t nbytes)
+{
+ int rc;
+
+ rc = spdk_ioat_build_fill(ioat, cb_arg, cb_fn, dst, fill_pattern, nbytes);
+ if (rc != 0) {
+ return rc;
+ }
+
+ spdk_ioat_flush(ioat);
+ return 0;
+}
+
+uint32_t
+spdk_ioat_get_dma_capabilities(struct spdk_ioat_chan *ioat)
+{
+ if (!ioat) {
+ return 0;
+ }
+ return ioat->dma_capabilities;
+}
+
+int
+spdk_ioat_process_events(struct spdk_ioat_chan *ioat)
+{
+ return ioat_process_channel_events(ioat);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("ioat", SPDK_LOG_IOAT)
diff --git a/src/spdk/lib/ioat/ioat_internal.h b/src/spdk/lib/ioat/ioat_internal.h
new file mode 100644
index 000000000..19593bb00
--- /dev/null
+++ b/src/spdk/lib/ioat/ioat_internal.h
@@ -0,0 +1,100 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __IOAT_INTERNAL_H__
+#define __IOAT_INTERNAL_H__
+
+#include "spdk/stdinc.h"
+
+#include "spdk/ioat.h"
+#include "spdk/ioat_spec.h"
+#include "spdk/queue.h"
+#include "spdk/mmio.h"
+
+/* Allocate 1 << 15 (32K) descriptors per channel by default. */
+#define IOAT_DEFAULT_ORDER 15
+
+struct ioat_descriptor {
+ uint64_t phys_addr;
+ spdk_ioat_req_cb callback_fn;
+ void *callback_arg;
+};
+
+/* One of these per allocated PCI device. */
+struct spdk_ioat_chan {
+ /* Opaque handle to upper layer */
+ struct spdk_pci_device *device;
+ uint64_t max_xfer_size;
+ volatile struct spdk_ioat_registers *regs;
+
+ volatile uint64_t *comp_update;
+
+ uint32_t head;
+ uint32_t tail;
+
+ uint32_t ring_size_order;
+ uint64_t last_seen;
+
+ struct ioat_descriptor *ring;
+ union spdk_ioat_hw_desc *hw_ring;
+ uint32_t dma_capabilities;
+
+ /* tailq entry for attached_chans */
+ TAILQ_ENTRY(spdk_ioat_chan) tailq;
+};
+
+static inline uint32_t
+is_ioat_active(uint64_t status)
+{
+ return (status & SPDK_IOAT_CHANSTS_STATUS) == SPDK_IOAT_CHANSTS_ACTIVE;
+}
+
+static inline uint32_t
+is_ioat_idle(uint64_t status)
+{
+ return (status & SPDK_IOAT_CHANSTS_STATUS) == SPDK_IOAT_CHANSTS_IDLE;
+}
+
+static inline uint32_t
+is_ioat_halted(uint64_t status)
+{
+ return (status & SPDK_IOAT_CHANSTS_STATUS) == SPDK_IOAT_CHANSTS_HALTED;
+}
+
+static inline uint32_t
+is_ioat_suspended(uint64_t status)
+{
+ return (status & SPDK_IOAT_CHANSTS_STATUS) == SPDK_IOAT_CHANSTS_SUSPENDED;
+}
+
+#endif /* __IOAT_INTERNAL_H__ */
diff --git a/src/spdk/lib/ioat/spdk_ioat.map b/src/spdk/lib/ioat/spdk_ioat.map
new file mode 100644
index 000000000..f467da817
--- /dev/null
+++ b/src/spdk/lib/ioat/spdk_ioat.map
@@ -0,0 +1,17 @@
+{
+ global:
+
+ # public functions
+ spdk_ioat_probe;
+ spdk_ioat_detach;
+ spdk_ioat_build_copy;
+ spdk_ioat_submit_copy;
+ spdk_ioat_build_fill;
+ spdk_ioat_submit_fill;
+ spdk_ioat_flush;
+ spdk_ioat_process_events;
+ spdk_ioat_get_dma_capabilities;
+ spdk_ioat_get_max_descriptors;
+
+ local: *;
+};