summaryrefslogtreecommitdiffstats
path: root/src/spdk/include
diff options
context:
space:
mode:
Diffstat (limited to 'src/spdk/include')
-rw-r--r--src/spdk/include/Makefile59
-rw-r--r--src/spdk/include/linux/virtio_blk.h201
-rw-r--r--src/spdk/include/linux/virtio_config.h91
-rw-r--r--src/spdk/include/linux/virtio_pci.h199
-rw-r--r--src/spdk/include/linux/virtio_ring.h218
-rw-r--r--src/spdk/include/linux/virtio_scsi.h172
-rw-r--r--src/spdk/include/linux/virtio_types.h46
-rw-r--r--src/spdk/include/spdk/accel_engine.h361
-rw-r--r--src/spdk/include/spdk/assert.h65
-rw-r--r--src/spdk/include/spdk/barrier.h116
-rw-r--r--src/spdk/include/spdk/base64.h144
-rw-r--r--src/spdk/include/spdk/bdev.h1705
-rw-r--r--src/spdk/include/spdk/bdev_module.h1219
-rw-r--r--src/spdk/include/spdk/bdev_zone.h259
-rw-r--r--src/spdk/include/spdk/bit_array.h203
-rw-r--r--src/spdk/include/spdk/blob.h897
-rw-r--r--src/spdk/include/spdk/blob_bdev.h88
-rw-r--r--src/spdk/include/spdk/blobfs.h599
-rw-r--r--src/spdk/include/spdk/blobfs_bdev.h98
-rw-r--r--src/spdk/include/spdk/conf.h215
-rw-r--r--src/spdk/include/spdk/cpuset.h182
-rw-r--r--src/spdk/include/spdk/crc16.h78
-rw-r--r--src/spdk/include/spdk/crc32.h73
-rw-r--r--src/spdk/include/spdk/dif.h457
-rw-r--r--src/spdk/include/spdk/endian.h178
-rw-r--r--src/spdk/include/spdk/env.h1301
-rw-r--r--src/spdk/include/spdk/env_dpdk.h86
-rw-r--r--src/spdk/include/spdk/event.h318
-rw-r--r--src/spdk/include/spdk/fd.h69
-rw-r--r--src/spdk/include/spdk/file.h61
-rw-r--r--src/spdk/include/spdk/ftl.h251
-rw-r--r--src/spdk/include/spdk/gpt_spec.h144
-rw-r--r--src/spdk/include/spdk/histogram_data.h264
-rw-r--r--src/spdk/include/spdk/idxd.h418
-rw-r--r--src/spdk/include/spdk/ioat.h244
-rw-r--r--src/spdk/include/spdk/ioat_spec.h330
-rw-r--r--src/spdk/include/spdk/iscsi_spec.h567
-rw-r--r--src/spdk/include/spdk/json.h337
-rw-r--r--src/spdk/include/spdk/jsonrpc.h352
-rw-r--r--src/spdk/include/spdk/likely.h46
-rw-r--r--src/spdk/include/spdk/log.h224
-rw-r--r--src/spdk/include/spdk/lvol.h299
-rw-r--r--src/spdk/include/spdk/memory.h60
-rw-r--r--src/spdk/include/spdk/mmio.h139
-rw-r--r--src/spdk/include/spdk/nbd.h102
-rw-r--r--src/spdk/include/spdk/net.h120
-rw-r--r--src/spdk/include/spdk/notify.h126
-rw-r--r--src/spdk/include/spdk/nvme.h3236
-rw-r--r--src/spdk/include/spdk/nvme_intel.h218
-rw-r--r--src/spdk/include/spdk/nvme_ocssd.h227
-rw-r--r--src/spdk/include/spdk/nvme_ocssd_spec.h414
-rw-r--r--src/spdk/include/spdk/nvme_spec.h2945
-rw-r--r--src/spdk/include/spdk/nvmf.h1048
-rw-r--r--src/spdk/include/spdk/nvmf_cmd.h226
-rw-r--r--src/spdk/include/spdk/nvmf_fc_spec.h411
-rw-r--r--src/spdk/include/spdk/nvmf_spec.h733
-rw-r--r--src/spdk/include/spdk/nvmf_transport.h495
-rw-r--r--src/spdk/include/spdk/opal.h145
-rw-r--r--src/spdk/include/spdk/opal_spec.h379
-rw-r--r--src/spdk/include/spdk/pci_ids.h139
-rw-r--r--src/spdk/include/spdk/pipe.h149
-rw-r--r--src/spdk/include/spdk/queue.h79
-rw-r--r--src/spdk/include/spdk/queue_extras.h343
-rw-r--r--src/spdk/include/spdk/reduce.h253
-rw-r--r--src/spdk/include/spdk/rpc.h155
-rw-r--r--src/spdk/include/spdk/scsi.h571
-rw-r--r--src/spdk/include/spdk/scsi_spec.h742
-rw-r--r--src/spdk/include/spdk/sock.h475
-rw-r--r--src/spdk/include/spdk/stdinc.h98
-rw-r--r--src/spdk/include/spdk/string.h271
-rw-r--r--src/spdk/include/spdk/thread.h736
-rw-r--r--src/spdk/include/spdk/trace.h404
-rw-r--r--src/spdk/include/spdk/util.h190
-rw-r--r--src/spdk/include/spdk/uuid.h108
-rw-r--r--src/spdk/include/spdk/version.h119
-rw-r--r--src/spdk/include/spdk/vhost.h337
-rw-r--r--src/spdk/include/spdk/vmd.h116
-rw-r--r--src/spdk/include/spdk_internal/accel_engine.h130
-rw-r--r--src/spdk/include/spdk_internal/assert.h55
-rw-r--r--src/spdk/include/spdk_internal/event.h197
-rw-r--r--src/spdk/include/spdk_internal/idxd.h74
-rw-r--r--src/spdk/include/spdk_internal/log.h108
-rw-r--r--src/spdk/include/spdk_internal/lvolstore.h128
-rw-r--r--src/spdk/include/spdk_internal/mock.h135
-rw-r--r--src/spdk/include/spdk_internal/nvme_tcp.h633
-rw-r--r--src/spdk/include/spdk_internal/rdma.h117
-rw-r--r--src/spdk/include/spdk_internal/sock.h227
-rw-r--r--src/spdk/include/spdk_internal/thread.h136
-rw-r--r--src/spdk/include/spdk_internal/uring.h51
-rw-r--r--src/spdk/include/spdk_internal/utf.h325
-rw-r--r--src/spdk/include/spdk_internal/vhost_user.h140
-rw-r--r--src/spdk/include/spdk_internal/virtio.h486
92 files changed, 32485 insertions, 0 deletions
diff --git a/src/spdk/include/Makefile b/src/spdk/include/Makefile
new file mode 100644
index 000000000..5bf07bd0a
--- /dev/null
+++ b/src/spdk/include/Makefile
@@ -0,0 +1,59 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+HEADERS := $(wildcard $(SPDK_ROOT_DIR)/include/spdk/*.h)
+INSTALLED_HEADERS := $(patsubst $(SPDK_ROOT_DIR)/include%,$(DESTDIR)$(includedir)%,$(HEADERS))
+
+$(INSTALLED_HEADERS):
+ifeq ($(MAKECMDGOALS),install)
+ $(INSTALL_HEADER)
+else
+ $(UNINSTALL_HEADER)
+endif
+
+.PHONY: $(INSTALLED_HEADERS)
+
+all:
+ $(Q)cp $(SPDK_ROOT_DIR)/include/spdk/*.h $(SPDK_ROOT_DIR)/build/include/spdk/
+
+clean:
+ @:
+
+install: $(INSTALLED_HEADERS)
+
+uninstall: $(INSTALLED_HEADERS)
+
+include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk
diff --git a/src/spdk/include/linux/virtio_blk.h b/src/spdk/include/linux/virtio_blk.h
new file mode 100644
index 000000000..95c438312
--- /dev/null
+++ b/src/spdk/include/linux/virtio_blk.h
@@ -0,0 +1,201 @@
+#ifndef _LINUX_VIRTIO_BLK_H
+#define _LINUX_VIRTIO_BLK_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
+#include <linux/types.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_types.h>
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX 1 /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY 4 /* Legacy geometry available */
+#define VIRTIO_BLK_F_RO 5 /* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */
+#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */
+
+/* Legacy feature bits */
+#ifndef VIRTIO_BLK_NO_LEGACY
+#define VIRTIO_BLK_F_BARRIER 0 /* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_FLUSH 9 /* Flush command supported */
+#define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */
+/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */
+#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+#define VIRTIO_BLK_ID_BYTES 20 /* ID string length */
+
+struct virtio_blk_config {
+ /* The capacity (in 512-byte sectors). */
+ __u64 capacity;
+ /* The maximum segment size (if VIRTIO_BLK_F_SIZE_MAX) */
+ __u32 size_max;
+ /* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */
+ __u32 seg_max;
+ /* geometry of the device (if VIRTIO_BLK_F_GEOMETRY) */
+ struct virtio_blk_geometry {
+ __u16 cylinders;
+ __u8 heads;
+ __u8 sectors;
+ } geometry;
+
+ /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
+ __u32 blk_size;
+
+ /* the next 4 entries are guarded by VIRTIO_BLK_F_TOPOLOGY */
+ /* exponent for physical block per logical block. */
+ __u8 physical_block_exp;
+ /* alignment offset in logical blocks. */
+ __u8 alignment_offset;
+ /* minimum I/O size without performance penalty in logical blocks. */
+ __u16 min_io_size;
+ /* optimal sustained I/O size in logical blocks. */
+ __u32 opt_io_size;
+
+ /* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */
+ __u8 wce;
+ __u8 unused;
+
+ /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
+ __u16 num_queues;
+
+ /* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */
+ /*
+ * The maximum discard sectors (in 512-byte sectors) for
+ * one segment.
+ */
+ __u32 max_discard_sectors;
+ /*
+ * The maximum number of discard segments in a
+ * discard command.
+ */
+ __u32 max_discard_seg;
+ /* Discard commands must be aligned to this number of sectors. */
+ __u32 discard_sector_alignment;
+
+ /* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */
+ /*
+ * The maximum number of write zeroes sectors (in 512-byte sectors) in
+ * one segment.
+ */
+ __u32 max_write_zeroes_sectors;
+ /*
+ * The maximum number of segments in a write zeroes
+ * command.
+ */
+ __u32 max_write_zeroes_seg;
+ /*
+ * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the
+ * deallocation of one or more of the sectors.
+ */
+ __u8 write_zeroes_may_unmap;
+
+ __u8 unused1[3];
+} __attribute__((packed));
+
+/*
+ * Command types
+ *
+ * Usage is a bit tricky as some bits are used as flags and some are not.
+ *
+ * Rules:
+ * VIRTIO_BLK_T_OUT may be combined with VIRTIO_BLK_T_SCSI_CMD or
+ * VIRTIO_BLK_T_BARRIER. VIRTIO_BLK_T_FLUSH is a command of its own
+ * and may not be combined with any of the other flags.
+ */
+
+/* These two define direction. */
+#define VIRTIO_BLK_T_IN 0
+#define VIRTIO_BLK_T_OUT 1
+
+#ifndef VIRTIO_BLK_NO_LEGACY
+/* This bit says it's a scsi command, not an actual read or write. */
+#define VIRTIO_BLK_T_SCSI_CMD 2
+#endif /* VIRTIO_BLK_NO_LEGACY */
+
+/* Cache flush command */
+#define VIRTIO_BLK_T_FLUSH 4
+
+/* Get device ID command */
+#define VIRTIO_BLK_T_GET_ID 8
+
+/* Discard command */
+#define VIRTIO_BLK_T_DISCARD 11
+
+/* Write zeroes command */
+#define VIRTIO_BLK_T_WRITE_ZEROES 13
+
+#ifndef VIRTIO_BLK_NO_LEGACY
+/* Barrier before this op. */
+#define VIRTIO_BLK_T_BARRIER 0x80000000
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+/*
+ * This comes first in the read scatter-gather list.
+ * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated,
+ * this is the first element of the read scatter-gather list.
+ */
+struct virtio_blk_outhdr {
+ /* VIRTIO_BLK_T* */
+ __virtio32 type;
+ /* io priority. */
+ __virtio32 ioprio;
+ /* Sector (ie. 512 byte offset) */
+ __virtio64 sector;
+};
+
+/* Unmap this range (only valid for write zeroes command) */
+#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP 0x00000001
+
+/* Discard/write zeroes range for each request. */
+struct virtio_blk_discard_write_zeroes {
+ /* discard/write zeroes start sector */
+ __le64 sector;
+ /* number of discard/write zeroes sectors */
+ __le32 num_sectors;
+ /* flags for this range */
+ __le32 flags;
+};
+
+#ifndef VIRTIO_BLK_NO_LEGACY
+struct virtio_scsi_inhdr {
+ __virtio32 errors;
+ __virtio32 data_len;
+ __virtio32 sense_len;
+ __virtio32 residual;
+};
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+/* And this is the final byte of the write scatter-gather list. */
+#define VIRTIO_BLK_S_OK 0
+#define VIRTIO_BLK_S_IOERR 1
+#define VIRTIO_BLK_S_UNSUPP 2
+#endif /* _LINUX_VIRTIO_BLK_H */
diff --git a/src/spdk/include/linux/virtio_config.h b/src/spdk/include/linux/virtio_config.h
new file mode 100644
index 000000000..6c8e43a70
--- /dev/null
+++ b/src/spdk/include/linux/virtio_config.h
@@ -0,0 +1,91 @@
+#ifndef _LINUX_VIRTIO_CONFIG_H
+#define _LINUX_VIRTIO_CONFIG_H
+/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
+ * anyone can use the definitions to implement compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
+
+/* Virtio devices use a standardized configuration space to define their
+ * features and pass configuration information, but each implementation can
+ * store and access that space differently. */
+#include <linux/types.h>
+
+/* Status byte for guest to report progress, and synchronize features. */
+/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */
+#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1
+/* We have found a driver for the device. */
+#define VIRTIO_CONFIG_S_DRIVER 2
+/* Driver has used its parts of the config, and is happy */
+#define VIRTIO_CONFIG_S_DRIVER_OK 4
+/* Driver has finished configuring features */
+#define VIRTIO_CONFIG_S_FEATURES_OK 8
+/* Device entered invalid state, driver must reset it */
+#define VIRTIO_CONFIG_S_NEEDS_RESET 0x40
+/* We've given up on this device. */
+#define VIRTIO_CONFIG_S_FAILED 0x80
+
+/*
+ * Virtio feature bits VIRTIO_TRANSPORT_F_START through
+ * VIRTIO_TRANSPORT_F_END are reserved for the transport
+ * being used (e.g. virtio_ring, virtio_pci etc.), the
+ * rest are per-device feature bits.
+ */
+#define VIRTIO_TRANSPORT_F_START 28
+#define VIRTIO_TRANSPORT_F_END 38
+
+#ifndef VIRTIO_CONFIG_NO_LEGACY
+/* Do we get callbacks when the ring is completely used, even if we've
+ * suppressed them? */
+#define VIRTIO_F_NOTIFY_ON_EMPTY 24
+
+/* Can the device handle any descriptor layout? */
+#define VIRTIO_F_ANY_LAYOUT 27
+#endif /* VIRTIO_CONFIG_NO_LEGACY */
+
+/* v1.0 compliant. */
+#define VIRTIO_F_VERSION_1 32
+
+/*
+ * If clear - device has the IOMMU bypass quirk feature.
+ * If set - use platform tools to detect the IOMMU.
+ *
+ * Note the reverse polarity (compared to most other features),
+ * this is for compatibility with legacy systems.
+ */
+#define VIRTIO_F_IOMMU_PLATFORM 33
+
+/* This feature indicates support for the packed virtqueue layout. */
+#define VIRTIO_F_RING_PACKED 34
+
+/*
+ * This feature indicates that memory accesses by the driver and the
+ * device are ordered in a way described by the platform.
+ */
+#define VIRTIO_F_ORDER_PLATFORM 36
+
+/*
+ * Does the device support Single Root I/O Virtualization?
+ */
+#define VIRTIO_F_SR_IOV 37
+#endif /* _LINUX_VIRTIO_CONFIG_H */
diff --git a/src/spdk/include/linux/virtio_pci.h b/src/spdk/include/linux/virtio_pci.h
new file mode 100644
index 000000000..90007a1ab
--- /dev/null
+++ b/src/spdk/include/linux/virtio_pci.h
@@ -0,0 +1,199 @@
+/*
+ * Virtio PCI driver
+ *
+ * This module allows virtio devices to be used over a virtual PCI device.
+ * This can be used with QEMU based VMMs like KVM or Xen.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_VIRTIO_PCI_H
+#define _LINUX_VIRTIO_PCI_H
+
+#include <linux/types.h>
+
+#ifndef VIRTIO_PCI_NO_LEGACY
+
+/* A 32-bit r/o bitmask of the features supported by the host */
+#define VIRTIO_PCI_HOST_FEATURES 0
+
+/* A 32-bit r/w bitmask of features activated by the guest */
+#define VIRTIO_PCI_GUEST_FEATURES 4
+
+/* A 32-bit r/w PFN for the currently selected queue */
+#define VIRTIO_PCI_QUEUE_PFN 8
+
+/* A 16-bit r/o queue size for the currently selected queue */
+#define VIRTIO_PCI_QUEUE_NUM 12
+
+/* A 16-bit r/w queue selector */
+#define VIRTIO_PCI_QUEUE_SEL 14
+
+/* A 16-bit r/w queue notifier */
+#define VIRTIO_PCI_QUEUE_NOTIFY 16
+
+/* An 8-bit device status register. */
+#define VIRTIO_PCI_STATUS 18
+
+/* An 8-bit r/o interrupt status register. Reading the value will return the
+ * current contents of the ISR and will also clear it. This is effectively
+ * a read-and-acknowledge. */
+#define VIRTIO_PCI_ISR 19
+
+/* MSI-X registers: only enabled if MSI-X is enabled. */
+/* A 16-bit vector for configuration changes. */
+#define VIRTIO_MSI_CONFIG_VECTOR 20
+/* A 16-bit vector for selected queue notifications. */
+#define VIRTIO_MSI_QUEUE_VECTOR 22
+
+/* The remaining space is defined by each driver as the per-driver
+ * configuration space */
+#define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24 : 20)
+/* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */
+#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->msix_enabled)
+
+/* Virtio ABI version, this must match exactly */
+#define VIRTIO_PCI_ABI_VERSION 0
+
+/* How many bits to shift physical queue address written to QUEUE_PFN.
+ * 12 is historical, and due to x86 page size. */
+#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12
+
+/* The alignment to use between consumer and producer parts of vring.
+ * x86 pagesize again. */
+#define VIRTIO_PCI_VRING_ALIGN 4096
+
+#endif /* VIRTIO_PCI_NO_LEGACY */
+
+/* The bit of the ISR which indicates a device configuration change. */
+#define VIRTIO_PCI_ISR_CONFIG 0x2
+/* Vector value used to disable MSI for queue */
+#define VIRTIO_MSI_NO_VECTOR 0xffff
+
+#ifndef VIRTIO_PCI_NO_MODERN
+
+/* IDs for different capabilities. Must all exist. */
+
+/* Common configuration */
+#define VIRTIO_PCI_CAP_COMMON_CFG 1
+/* Notifications */
+#define VIRTIO_PCI_CAP_NOTIFY_CFG 2
+/* ISR access */
+#define VIRTIO_PCI_CAP_ISR_CFG 3
+/* Device specific configuration */
+#define VIRTIO_PCI_CAP_DEVICE_CFG 4
+/* PCI configuration access */
+#define VIRTIO_PCI_CAP_PCI_CFG 5
+
+/* This is the PCI capability header: */
+struct virtio_pci_cap {
+ __u8 cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */
+ __u8 cap_next; /* Generic PCI field: next ptr. */
+ __u8 cap_len; /* Generic PCI field: capability length */
+ __u8 cfg_type; /* Identifies the structure. */
+ __u8 bar; /* Where to find it. */
+ __u8 padding[3]; /* Pad to full dword. */
+ __le32 offset; /* Offset within bar. */
+ __le32 length; /* Length of the structure, in bytes. */
+};
+
+struct virtio_pci_notify_cap {
+ struct virtio_pci_cap cap;
+ __le32 notify_off_multiplier; /* Multiplier for queue_notify_off. */
+};
+
+/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */
+struct virtio_pci_common_cfg {
+ /* About the whole device. */
+ __le32 device_feature_select; /* read-write */
+ __le32 device_feature; /* read-only */
+ __le32 guest_feature_select; /* read-write */
+ __le32 guest_feature; /* read-write */
+ __le16 msix_config; /* read-write */
+ __le16 num_queues; /* read-only */
+ __u8 device_status; /* read-write */
+ __u8 config_generation; /* read-only */
+
+ /* About a specific virtqueue. */
+ __le16 queue_select; /* read-write */
+ __le16 queue_size; /* read-write, power of 2. */
+ __le16 queue_msix_vector; /* read-write */
+ __le16 queue_enable; /* read-write */
+ __le16 queue_notify_off; /* read-only */
+ __le32 queue_desc_lo; /* read-write */
+ __le32 queue_desc_hi; /* read-write */
+ __le32 queue_avail_lo; /* read-write */
+ __le32 queue_avail_hi; /* read-write */
+ __le32 queue_used_lo; /* read-write */
+ __le32 queue_used_hi; /* read-write */
+};
+
+/* Fields in VIRTIO_PCI_CAP_PCI_CFG: */
+struct virtio_pci_cfg_cap {
+ struct virtio_pci_cap cap;
+ __u8 pci_cfg_data[4]; /* Data for BAR access. */
+};
+
+/* Macro versions of offsets for the Old Timers! */
+#define VIRTIO_PCI_CAP_VNDR 0
+#define VIRTIO_PCI_CAP_NEXT 1
+#define VIRTIO_PCI_CAP_LEN 2
+#define VIRTIO_PCI_CAP_CFG_TYPE 3
+#define VIRTIO_PCI_CAP_BAR 4
+#define VIRTIO_PCI_CAP_OFFSET 8
+#define VIRTIO_PCI_CAP_LENGTH 12
+
+#define VIRTIO_PCI_NOTIFY_CAP_MULT 16
+
+#define VIRTIO_PCI_COMMON_DFSELECT 0
+#define VIRTIO_PCI_COMMON_DF 4
+#define VIRTIO_PCI_COMMON_GFSELECT 8
+#define VIRTIO_PCI_COMMON_GF 12
+#define VIRTIO_PCI_COMMON_MSIX 16
+#define VIRTIO_PCI_COMMON_NUMQ 18
+#define VIRTIO_PCI_COMMON_STATUS 20
+#define VIRTIO_PCI_COMMON_CFGGENERATION 21
+#define VIRTIO_PCI_COMMON_Q_SELECT 22
+#define VIRTIO_PCI_COMMON_Q_SIZE 24
+#define VIRTIO_PCI_COMMON_Q_MSIX 26
+#define VIRTIO_PCI_COMMON_Q_ENABLE 28
+#define VIRTIO_PCI_COMMON_Q_NOFF 30
+#define VIRTIO_PCI_COMMON_Q_DESCLO 32
+#define VIRTIO_PCI_COMMON_Q_DESCHI 36
+#define VIRTIO_PCI_COMMON_Q_AVAILLO 40
+#define VIRTIO_PCI_COMMON_Q_AVAILHI 44
+#define VIRTIO_PCI_COMMON_Q_USEDLO 48
+#define VIRTIO_PCI_COMMON_Q_USEDHI 52
+
+#endif /* VIRTIO_PCI_NO_MODERN */
+
+#endif
diff --git a/src/spdk/include/linux/virtio_ring.h b/src/spdk/include/linux/virtio_ring.h
new file mode 100644
index 000000000..660138ffb
--- /dev/null
+++ b/src/spdk/include/linux/virtio_ring.h
@@ -0,0 +1,218 @@
+#ifndef _LINUX_VIRTIO_RING_H
+#define _LINUX_VIRTIO_RING_H
+/* An interface for efficient virtio implementation, currently for use by KVM,
+ * but hopefully others soon. Do NOT change this since it will
+ * break existing servers and clients.
+ *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS
+'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Copyright Rusty Russell IBM Corporation 2007. */
+#ifndef __KERNEL__
+#include <stdint.h>
+#endif
+#include <linux/types.h>
+#include <linux/virtio_types.h>
+
+/* This marks a buffer as continuing via the next field. */
+#define VRING_DESC_F_NEXT 1
+/* This marks a buffer as write-only (otherwise read-only). */
+#define VRING_DESC_F_WRITE 2
+/* This means the buffer contains a list of buffer descriptors. */
+#define VRING_DESC_F_INDIRECT 4
+
+/*
+ * Mark a descriptor as available or used in packed ring.
+ * Notice: they are defined as shifts instead of shifted values.
+ */
+#define VRING_PACKED_DESC_F_AVAIL 7
+#define VRING_PACKED_DESC_F_USED 15
+
+/* The Host uses this in used->flags to advise the Guest: don't kick me when
+ * you add a buffer. It's unreliable, so it's simply an optimization. Guest
+ * will still kick if it's out of buffers. */
+#define VRING_USED_F_NO_NOTIFY 1
+/* The Guest uses this in avail->flags to advise the Host: don't interrupt me
+ * when you consume a buffer. It's unreliable, so it's simply an
+ * optimization. */
+#define VRING_AVAIL_F_NO_INTERRUPT 1
+
+/* Enable events in packed ring. */
+#define VRING_PACKED_EVENT_FLAG_ENABLE 0x0
+/* Disable events in packed ring. */
+#define VRING_PACKED_EVENT_FLAG_DISABLE 0x1
+/*
+ * Enable events for a specific descriptor in packed ring.
+ * (as specified by Descriptor Ring Change Event Offset/Wrap Counter).
+ * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated.
+ */
+#define VRING_PACKED_EVENT_FLAG_DESC 0x2
+
+/*
+ * Wrap counter bit shift in event suppression structure
+ * of packed ring.
+ */
+#define VRING_PACKED_EVENT_F_WRAP_CTR 15
+
+/* We support indirect buffer descriptors */
+#define VIRTIO_RING_F_INDIRECT_DESC 28
+
+/* The Guest publishes the used index for which it expects an interrupt
+ * at the end of the avail ring. Host should ignore the avail->flags field. */
+/* The Host publishes the avail index for which it expects a kick
+ * at the end of the used ring. Guest should ignore the used->flags field. */
+#define VIRTIO_RING_F_EVENT_IDX 29
+
+/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */
+struct vring_desc {
+ /* Address (guest-physical). */
+ __virtio64 addr;
+ /* Length. */
+ __virtio32 len;
+ /* The flags as indicated above. */
+ __virtio16 flags;
+ /* We chain unused descriptors via this, too */
+ __virtio16 next;
+};
+
+struct vring_avail {
+ __virtio16 flags;
+ __virtio16 idx;
+ __virtio16 ring[];
+};
+
+/* u32 is used here for ids for padding reasons. */
+struct vring_used_elem {
+ /* Index of start of used descriptor chain. */
+ __virtio32 id;
+ /* Total length of the descriptor chain which was used (written to) */
+ __virtio32 len;
+};
+
+struct vring_used {
+ __virtio16 flags;
+ __virtio16 idx;
+ struct vring_used_elem ring[];
+};
+
+struct vring {
+ unsigned int num;
+
+ struct vring_desc *desc;
+
+ struct vring_avail *avail;
+
+ struct vring_used *used;
+};
+
+/* Alignment requirements for vring elements.
+ * When using pre-virtio 1.0 layout, these fall out naturally.
+ */
+#define VRING_AVAIL_ALIGN_SIZE 2
+#define VRING_USED_ALIGN_SIZE 4
+#define VRING_DESC_ALIGN_SIZE 16
+
+/* The standard layout for the ring is a continuous chunk of memory which looks
+ * like this. We assume num is a power of 2.
+ *
+ * struct vring
+ * {
+ * The actual descriptors (16 bytes each)
+ * struct vring_desc desc[num];
+ *
+ * A ring of available descriptor heads with free-running index.
+ * __virtio16 avail_flags;
+ * __virtio16 avail_idx;
+ * __virtio16 available[num];
+ * __virtio16 used_event_idx;
+ *
+ * Padding to the next align boundary.
+ * char pad[];
+ *
+ * A ring of used descriptor heads with free-running index.
+ * __virtio16 used_flags;
+ * __virtio16 used_idx;
+ * struct vring_used_elem used[num];
+ * __virtio16 avail_event_idx;
+ * };
+ */
+/* We publish the used event index at the end of the available ring, and vice
+ * versa. They are at the end for backwards compatibility. */
+#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num])
+#define vring_avail_event(vr) (*(__virtio16 *)&(vr)->used->ring[(vr)->num])
+
+static inline void vring_init(struct vring *vr, unsigned int num, void *p,
+ unsigned long align)
+{
+ vr->num = num;
+ vr->desc = p;
+ vr->avail = (struct vring_avail *)((char *)p + num * sizeof(struct vring_desc
+ ));
+ vr->used = (void *)(((uintptr_t)&vr->avail->ring[num] + sizeof(__virtio16)
+ + align - 1) & ~(align - 1));
+}
+
+static inline unsigned vring_size(unsigned int num, unsigned long align)
+{
+ return ((sizeof(struct vring_desc) * num + sizeof(__virtio16) * (3 + num)
+ + align - 1) & ~(align - 1))
+ + sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num;
+}
+
+/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
+/* Assuming a given event_idx value from the other side, if
+ * we have just incremented index from old to new_idx,
+ * should we trigger an event? */
+static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old)
+{
+ /* Note: Xen has similar logic for notification hold-off
+ * in include/xen/interface/io/ring.h with req_event and req_prod
+ * corresponding to event_idx + 1 and new_idx respectively.
+ * Note also that req_event and req_prod in Xen start at 1,
+ * event indexes in virtio start at 0. */
+ return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old);
+}
+
+struct vring_packed_desc_event {
+ /* Descriptor Ring Change Event Offset/Wrap Counter. */
+ __le16 off_wrap;
+ /* Descriptor Ring Change Event Flags. */
+ __le16 flags;
+};
+
+struct vring_packed_desc {
+ /* Buffer Address. */
+ __le64 addr;
+ /* Buffer Length. */
+ __le32 len;
+ /* Buffer ID. */
+ __le16 id;
+ /* The flags depending on descriptor type. */
+ __le16 flags;
+};
+
+#endif /* _LINUX_VIRTIO_RING_H */
diff --git a/src/spdk/include/linux/virtio_scsi.h b/src/spdk/include/linux/virtio_scsi.h
new file mode 100644
index 000000000..cc18ef882
--- /dev/null
+++ b/src/spdk/include/linux/virtio_scsi.h
@@ -0,0 +1,172 @@
+/*
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_VIRTIO_SCSI_H
+#define _LINUX_VIRTIO_SCSI_H
+
+#include <linux/virtio_types.h>
+
+/* Default values of the CDB and sense data size configuration fields */
+#define VIRTIO_SCSI_CDB_DEFAULT_SIZE 32
+#define VIRTIO_SCSI_SENSE_DEFAULT_SIZE 96
+
+#ifndef VIRTIO_SCSI_CDB_SIZE
+#define VIRTIO_SCSI_CDB_SIZE VIRTIO_SCSI_CDB_DEFAULT_SIZE
+#endif
+#ifndef VIRTIO_SCSI_SENSE_SIZE
+#define VIRTIO_SCSI_SENSE_SIZE VIRTIO_SCSI_SENSE_DEFAULT_SIZE
+#endif
+
+/* SCSI command request, followed by data-out */
+struct virtio_scsi_cmd_req {
+ __u8 lun[8]; /* Logical Unit Number */
+ __virtio64 tag; /* Command identifier */
+ __u8 task_attr; /* Task attribute */
+ __u8 prio; /* SAM command priority field */
+ __u8 crn;
+ __u8 cdb[VIRTIO_SCSI_CDB_SIZE];
+} __attribute__((packed));
+
+/* SCSI command request, followed by protection information */
+struct virtio_scsi_cmd_req_pi {
+ __u8 lun[8]; /* Logical Unit Number */
+ __virtio64 tag; /* Command identifier */
+ __u8 task_attr; /* Task attribute */
+ __u8 prio; /* SAM command priority field */
+ __u8 crn;
+ __virtio32 pi_bytesout; /* DataOUT PI Number of bytes */
+ __virtio32 pi_bytesin; /* DataIN PI Number of bytes */
+ __u8 cdb[VIRTIO_SCSI_CDB_SIZE];
+} __attribute__((packed));
+
+/* Response, followed by sense data and data-in */
+struct virtio_scsi_cmd_resp {
+ __virtio32 sense_len; /* Sense data length */
+ __virtio32 resid; /* Residual bytes in data buffer */
+ __virtio16 status_qualifier; /* Status qualifier */
+ __u8 status; /* Command completion status */
+ __u8 response; /* Response values */
+ __u8 sense[VIRTIO_SCSI_SENSE_SIZE];
+} __attribute__((packed));
+
+/* Task Management Request */
+struct virtio_scsi_ctrl_tmf_req {
+ __virtio32 type;
+ __virtio32 subtype;
+ __u8 lun[8];
+ __virtio64 tag;
+} __attribute__((packed));
+
+struct virtio_scsi_ctrl_tmf_resp {
+ __u8 response;
+} __attribute__((packed));
+
+/* Asynchronous notification query/subscription */
+struct virtio_scsi_ctrl_an_req {
+ __virtio32 type;
+ __u8 lun[8];
+ __virtio32 event_requested;
+} __attribute__((packed));
+
+struct virtio_scsi_ctrl_an_resp {
+ __virtio32 event_actual;
+ __u8 response;
+} __attribute__((packed));
+
+struct virtio_scsi_event {
+ __virtio32 event;
+ __u8 lun[8];
+ __virtio32 reason;
+} __attribute__((packed));
+
+struct virtio_scsi_config {
+ __u32 num_queues;
+ __u32 seg_max;
+ __u32 max_sectors;
+ __u32 cmd_per_lun;
+ __u32 event_info_size;
+ __u32 sense_size;
+ __u32 cdb_size;
+ __u16 max_channel;
+ __u16 max_target;
+ __u32 max_lun;
+} __attribute__((packed));
+
+/* Feature Bits */
+#define VIRTIO_SCSI_F_INOUT 0
+#define VIRTIO_SCSI_F_HOTPLUG 1
+#define VIRTIO_SCSI_F_CHANGE 2
+#define VIRTIO_SCSI_F_T10_PI 3
+
+/* Response codes */
+#define VIRTIO_SCSI_S_OK 0
+#define VIRTIO_SCSI_S_OVERRUN 1
+#define VIRTIO_SCSI_S_ABORTED 2
+#define VIRTIO_SCSI_S_BAD_TARGET 3
+#define VIRTIO_SCSI_S_RESET 4
+#define VIRTIO_SCSI_S_BUSY 5
+#define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6
+#define VIRTIO_SCSI_S_TARGET_FAILURE 7
+#define VIRTIO_SCSI_S_NEXUS_FAILURE 8
+#define VIRTIO_SCSI_S_FAILURE 9
+#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED 10
+#define VIRTIO_SCSI_S_FUNCTION_REJECTED 11
+#define VIRTIO_SCSI_S_INCORRECT_LUN 12
+
+/* Controlq type codes. */
+#define VIRTIO_SCSI_T_TMF 0
+#define VIRTIO_SCSI_T_AN_QUERY 1
+#define VIRTIO_SCSI_T_AN_SUBSCRIBE 2
+
+/* Valid TMF subtypes. */
+#define VIRTIO_SCSI_T_TMF_ABORT_TASK 0
+#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET 1
+#define VIRTIO_SCSI_T_TMF_CLEAR_ACA 2
+#define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET 3
+#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4
+#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5
+#define VIRTIO_SCSI_T_TMF_QUERY_TASK 6
+#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7
+
+/* Events. */
+#define VIRTIO_SCSI_T_EVENTS_MISSED 0x80000000
+#define VIRTIO_SCSI_T_NO_EVENT 0
+#define VIRTIO_SCSI_T_TRANSPORT_RESET 1
+#define VIRTIO_SCSI_T_ASYNC_NOTIFY 2
+#define VIRTIO_SCSI_T_PARAM_CHANGE 3
+
+/* Reasons of transport reset event */
+#define VIRTIO_SCSI_EVT_RESET_HARD 0
+#define VIRTIO_SCSI_EVT_RESET_RESCAN 1
+#define VIRTIO_SCSI_EVT_RESET_REMOVED 2
+
+#define VIRTIO_SCSI_S_SIMPLE 0
+#define VIRTIO_SCSI_S_ORDERED 1
+#define VIRTIO_SCSI_S_HEAD 2
+#define VIRTIO_SCSI_S_ACA 3
+
+
+#endif /* _LINUX_VIRTIO_SCSI_H */
diff --git a/src/spdk/include/linux/virtio_types.h b/src/spdk/include/linux/virtio_types.h
new file mode 100644
index 000000000..6162bdf03
--- /dev/null
+++ b/src/spdk/include/linux/virtio_types.h
@@ -0,0 +1,46 @@
+#ifndef _LINUX_VIRTIO_TYPES_H
+#define _LINUX_VIRTIO_TYPES_H
+/* Type definitions for virtio implementations.
+ *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Copyright (C) 2014 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ */
+#include <linux/types.h>
+
+/*
+ * __virtio{16,32,64} have the following meaning:
+ * - __u{16,32,64} for virtio devices in legacy mode, accessed in native endian
+ * - __le{16,32,64} for standard-compliant virtio devices
+ */
+
+typedef __u16 __bitwise__ __virtio16;
+typedef __u32 __bitwise__ __virtio32;
+typedef __u64 __bitwise__ __virtio64;
+
+#endif /* _LINUX_VIRTIO_TYPES_H */
diff --git a/src/spdk/include/spdk/accel_engine.h b/src/spdk/include/spdk/accel_engine.h
new file mode 100644
index 000000000..be48e2ce3
--- /dev/null
+++ b/src/spdk/include/spdk/accel_engine.h
@@ -0,0 +1,361 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Acceleration engine abstraction layer
+ */
+
+#ifndef SPDK_ACCEL_ENGINE_H
+#define SPDK_ACCEL_ENGINE_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum accel_capability {
+ ACCEL_COPY = 1 << 0,
+ ACCEL_FILL = 1 << 1,
+ ACCEL_DUALCAST = 1 << 2,
+ ACCEL_COMPARE = 1 << 3,
+ ACCEL_BATCH = 1 << 4,
+ ACCEL_CRC32C = 1 << 5,
+ ACCEL_DIF = 1 << 6,
+};
+
+/**
+ * Acceleration operation callback.
+ *
+ * \param ref 'accel_req' passed to the corresponding spdk_accel_submit* call.
+ * \param status 0 if it completed successfully, or negative errno if it failed.
+ */
+typedef void (*spdk_accel_completion_cb)(void *ref, int status);
+
+/**
+ * Acceleration engine finish callback.
+ *
+ * \param cb_arg Callback argument.
+ */
+typedef void (*spdk_accel_fini_cb)(void *cb_arg);
+
+struct spdk_io_channel;
+
+struct spdk_accel_batch;
+
+/**
+ * Initialize the acceleration engine.
+ *
+ * \return 0 on success.
+ */
+int spdk_accel_engine_initialize(void);
+
+/**
+ * Close the acceleration engine.
+ *
+ * \param cb_fn Called when the close operation completes.
+ * \param cb_arg Argument passed to the callback function.
+ */
+void spdk_accel_engine_finish(spdk_accel_fini_cb cb_fn, void *cb_arg);
+
+/**
+ * Get the configuration for the acceleration engine.
+ *
+ * \param fp The pointer to a file that will be written to the configuration.
+ */
+void spdk_accel_engine_config_text(FILE *fp);
+
+/**
+ * Close the acceleration engine module and perform any necessary cleanup.
+ */
+void spdk_accel_engine_module_finish(void);
+
+/**
+ * Get the I/O channel registered on the acceleration engine.
+ *
+ * This I/O channel is used to submit copy request.
+ *
+ * \return a pointer to the I/O channel on success, or NULL on failure.
+ */
+struct spdk_io_channel *spdk_accel_engine_get_io_channel(void);
+
+/**
+ * Retrieve accel engine capabilities.
+ *
+ * \param ch I/O channel associated with this call.
+ *
+ * \return bitmap of capabilities defined by enum accel_capability.
+ */
+uint64_t spdk_accel_get_capabilities(struct spdk_io_channel *ch);
+
+/**
+ * Submit a copy request.
+ *
+ * \param ch I/O channel associated with this call.
+ * \param dst Destination to copy to.
+ * \param src Source to copy from.
+ * \param nbytes Length in bytes to copy.
+ * \param cb_fn Called when this copy operation completes.
+ * \param cb_arg Callback argument.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_accel_submit_copy(struct spdk_io_channel *ch, void *dst, void *src, uint64_t nbytes,
+ spdk_accel_completion_cb cb_fn, void *cb_arg);
+
+/**
+ * Synchronous call to get batch size. This is the maximum number of
+ * descriptors that a batch can contain. Once this limit is reached the batch
+ * should be processed with spdk_accel_batch_submit().
+ *
+ * \param ch I/O channel associated with this call.
+ *
+ * \return max number of descriptors per batch.
+ */
+uint32_t spdk_accel_batch_get_max(struct spdk_io_channel *ch);
+
+/**
+ * Synchronous call to create a batch sequence.
+ *
+ * \param ch I/O channel associated with this call.
+ *
+ * \return handle to use for subsequent batch requests, NULL on failure.
+ */
+struct spdk_accel_batch *spdk_accel_batch_create(struct spdk_io_channel *ch);
+
+/**
+ * Asynchronous call to submit a batch sequence.
+ *
+ * \param ch I/O channel associated with this call.
+ * \param batch Handle provided when the batch was started with spdk_accel_batch_create().
+ * \param cb_fn Called when this operation completes.
+ * \param cb_arg Callback argument.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_accel_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+ spdk_accel_completion_cb cb_fn, void *cb_arg);
+
+/**
+ * Synchronous call to cancel a batch sequence. In some cases prepared commands will be
+ * processed if they cannot be cancelled.
+ *
+ * \param ch I/O channel associated with this call.
+ * \param batch Handle provided when the batch was started with spdk_accel_batch_create().
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_accel_batch_cancel(struct spdk_io_channel *ch, struct spdk_accel_batch *batch);
+
+/**
+ * Synchronous call to prepare a copy request into a previously initialized batch
+ * created with spdk_accel_batch_create(). The callback will be called when the copy
+ * completes after the batch has been submitted by an asynchronous call to
+ * spdk_accel_batch_submit().
+ *
+ * \param ch I/O channel associated with this call.
+ * \param batch Handle provided when the batch was started with spdk_accel_batch_create().
+ * \param dst Destination to copy to.
+ * \param src Source to copy from.
+ * \param nbytes Length in bytes to copy.
+ * \param cb_fn Called when this operation completes.
+ * \param cb_arg Callback argument.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_accel_batch_prep_copy(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+ void *dst, void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn,
+ void *cb_arg);
+
+/**
+ * Synchronous call to prepare a dualcast request into a previously initialized batch
+ * created with spdk_accel_batch_create(). The callback will be called when the dualcast
+ * completes after the batch has been submitted by an asynchronous call to
+ * spdk_accel_batch_submit().
+ *
+ * \param ch I/O channel associated with this call.
+ * \param batch Handle provided when the batch was started with spdk_accel_batch_create().
+ * \param dst1 First destination to copy to (must be 4K aligned).
+ * \param dst2 Second destination to copy to (must be 4K aligned).
+ * \param src Source to copy from.
+ * \param nbytes Length in bytes to copy.
+ * \param cb_fn Called when this operation completes.
+ * \param cb_arg Callback argument.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_accel_batch_prep_dualcast(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+ void *dst1, void *dst2, void *src, uint64_t nbytes,
+ spdk_accel_completion_cb cb_fn, void *cb_arg);
+
+/**
+ * Submit a dual cast copy request.
+ *
+ * \param ch I/O channel associated with this call.
+ * \param dst1 First destination to copy to (must be 4K aligned).
+ * \param dst2 Second destination to copy to (must be 4K aligned).
+ * \param src Source to copy from.
+ * \param nbytes Length in bytes to copy.
+ * \param cb_fn Called when this copy operation completes.
+ * \param cb_arg Callback argument.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_accel_submit_dualcast(struct spdk_io_channel *ch, void *dst1, void *dst2, void *src,
+ uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+
+/**
+ * Synchronous call to prepare a compare request into a previously initialized batch
+ * created with spdk_accel_batch_create(). The callback will be called when the comapre
+ * completes after the batch has been submitted by an asynchronous call to
+ * spdk_accel_batch_submit().
+ *
+ * \param ch I/O channel associated with this call.
+ * \param batch Handle provided when the batch was started with spdk_accel_batch_create().
+ * \param src1 First location to perform compare on.
+ * \param src2 Second location to perform compare on.
+ * \param nbytes Length in bytes to compare.
+ * \param cb_fn Called when this operation completes.
+ * \param cb_arg Callback argument.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_accel_batch_prep_compare(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+ void *src1, void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn,
+ void *cb_arg);
+
+/**
+ * Submit a compare request.
+ *
+ * \param ch I/O channel associated with this call.
+ * \param src1 First location to perform compare on.
+ * \param src2 Second location to perform compare on.
+ * \param nbytes Length in bytes to compare.
+ * \param cb_fn Called when this compare operation completes.
+ * \param cb_arg Callback argument.
+ *
+ * \return 0 on success, any other value means there was a miscompare.
+ */
+int spdk_accel_submit_compare(struct spdk_io_channel *ch, void *src1, void *src2, uint64_t nbytes,
+ spdk_accel_completion_cb cb_fn, void *cb_arg);
+
+/**
+ * Synchronous call to prepare a fill request into a previously initialized batch
+ * created with spdk_accel_batch_create(). The callback will be called when the fill
+ * completes after the batch has been submitted by an asynchronous call to
+ * spdk_accel_batch_submit().
+ *
+ * \param ch I/O channel associated with this call.
+ * \param batch Handle provided when the batch was started with spdk_accel_batch_create().
+ * \param dst Destination to fill.
+ * \param fill Constant byte to fill to the destination.
+ * \param nbytes Length in bytes to fill.
+ * \param cb_fn Called when this operation completes.
+ * \param cb_arg Callback argument.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_accel_batch_prep_fill(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+ void *dst, uint8_t fill, uint64_t nbytes,
+ spdk_accel_completion_cb cb_fn, void *cb_arg);
+
+/**
+ * Submit a fill request.
+ *
+ * This operation will fill the destination buffer with the specified value.
+ *
+ * \param ch I/O channel associated with this call.
+ * \param dst Destination to fill.
+ * \param fill Constant byte to fill to the destination.
+ * \param nbytes Length in bytes to fill.
+ * \param cb_fn Called when this fill operation completes.
+ * \param cb_arg Callback argument.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_accel_submit_fill(struct spdk_io_channel *ch, void *dst, uint8_t fill, uint64_t nbytes,
+ spdk_accel_completion_cb cb_fn, void *cb_arg);
+
+/**
+ * Synchronous call to prepare a crc32c request into a previously initialized batch
+ * created with spdk_accel_batch_create(). The callback will be called when the crc32c
+ * completes after the batch has been submitted by an asynchronous call to
+ * spdk_accel_batch_submit().
+ *
+ * \param ch I/O channel associated with this call.
+ * \param batch Handle provided when the batch was started with spdk_accel_batch_create().
+ * \param dst Destination to write the CRC-32C to.
+ * \param src The source address for the data.
+ * \param seed Four byte seed value.
+ * \param nbytes Length in bytes.
+ * \param cb_fn Called when this operation completes.
+ * \param cb_arg Callback argument.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_accel_batch_prep_crc32c(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+ uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes,
+ spdk_accel_completion_cb cb_fn, void *cb_arg);
+
+/**
+ * Submit a CRC-32C calculation request.
+ *
+ * This operation will calculate the 4 byte CRC32-C for the given data.
+ *
+ * \param ch I/O channel associated with this call.
+ * \param dst Destination to write the CRC-32C to.
+ * \param src The source address for the data.
+ * \param seed Four byte seed value.
+ * \param nbytes Length in bytes.
+ * \param cb_fn Called when this CRC-32C operation completes.
+ * \param cb_arg Callback argument.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_accel_submit_crc32c(struct spdk_io_channel *ch, uint32_t *dst, void *src, uint32_t seed,
+ uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+
+struct spdk_json_write_ctx;
+
+/**
+ * Write Acceleration subsystem configuration into provided JSON context.
+ *
+ * \param w JSON write context
+ */
+void spdk_accel_write_config_json(struct spdk_json_write_ctx *w);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/assert.h b/src/spdk/include/spdk/assert.h
new file mode 100644
index 000000000..67e674aac
--- /dev/null
+++ b/src/spdk/include/spdk/assert.h
@@ -0,0 +1,65 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Runtime and compile-time assert macros
+ */
+
+#ifndef SPDK_ASSERT_H
+#define SPDK_ASSERT_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef static_assert
+#define SPDK_STATIC_ASSERT(cond, msg) static_assert(cond, msg)
+#else
+/**
+ * Compatibility wrapper for static_assert.
+ *
+ * This won't actually enforce the condition when compiled with an environment that doesn't support
+ * C11 static_assert; it is only intended to allow end users with old compilers to build the package.
+ *
+ * Developers should use a recent compiler that provides static_assert.
+ */
+#define SPDK_STATIC_ASSERT(cond, msg)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_ASSERT_H */
diff --git a/src/spdk/include/spdk/barrier.h b/src/spdk/include/spdk/barrier.h
new file mode 100644
index 000000000..acae360c7
--- /dev/null
+++ b/src/spdk/include/spdk/barrier.h
@@ -0,0 +1,116 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * Copyright (c) 2017, IBM Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Memory barriers
+ */
+
+#ifndef SPDK_BARRIER_H
+#define SPDK_BARRIER_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Compiler memory barrier */
+#define spdk_compiler_barrier() __asm volatile("" ::: "memory")
+
+/** Read memory barrier */
+#define spdk_rmb() _spdk_rmb()
+
+/** Write memory barrier */
+#define spdk_wmb() _spdk_wmb()
+
+/** Full read/write memory barrier */
+#define spdk_mb() _spdk_mb()
+
+/** SMP read memory barrier. */
+#define spdk_smp_rmb() _spdk_smp_rmb()
+
+/** SMP write memory barrier. */
+#define spdk_smp_wmb() _spdk_smp_wmb()
+
+/** SMP read/write memory barrier. */
+#define spdk_smp_mb() _spdk_smp_mb()
+
+#ifdef __PPC64__
+
+#define _spdk_rmb() __asm volatile("sync" ::: "memory")
+#define _spdk_wmb() __asm volatile("sync" ::: "memory")
+#define _spdk_mb() __asm volatile("sync" ::: "memory")
+#define _spdk_smp_rmb() __asm volatile("lwsync" ::: "memory")
+#define _spdk_smp_wmb() __asm volatile("lwsync" ::: "memory")
+#define _spdk_smp_mb() spdk_mb()
+
+#elif defined(__aarch64__)
+
+#define _spdk_rmb() __asm volatile("dsb ld" ::: "memory")
+#define _spdk_wmb() __asm volatile("dsb st" ::: "memory")
+#define _spdk_mb() __asm volatile("dsb sy" ::: "memory")
+#define _spdk_smp_rmb() __asm volatile("dmb ishld" ::: "memory")
+#define _spdk_smp_wmb() __asm volatile("dmb ishst" ::: "memory")
+#define _spdk_smp_mb() __asm volatile("dmb ish" ::: "memory")
+
+#elif defined(__i386__) || defined(__x86_64__)
+
+#define _spdk_rmb() __asm volatile("lfence" ::: "memory")
+#define _spdk_wmb() __asm volatile("sfence" ::: "memory")
+#define _spdk_mb() __asm volatile("mfence" ::: "memory")
+#define _spdk_smp_rmb() spdk_compiler_barrier()
+#define _spdk_smp_wmb() spdk_compiler_barrier()
+#if defined(__x86_64__)
+#define _spdk_smp_mb() __asm volatile("lock addl $0, -128(%%rsp); " ::: "memory");
+#elif defined(__i386__)
+#define _spdk_smp_mb() __asm volatile("lock addl $0, -128(%%esp); " ::: "memory");
+#endif
+
+#else
+
+#define _spdk_rmb()
+#define _spdk_wmb()
+#define _spdk_mb()
+#define _spdk_smp_rmb()
+#define _spdk_smp_wmb()
+#define _spdk_smp_mb()
+#error Unknown architecture
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/base64.h b/src/spdk/include/spdk/base64.h
new file mode 100644
index 000000000..86f41bba6
--- /dev/null
+++ b/src/spdk/include/spdk/base64.h
@@ -0,0 +1,144 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * Base64 utility functions
+ */
+
+#ifndef SPDK_BASE64_H
+#define SPDK_BASE64_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Following the Base64 part in RFC4648:
+ * https://tools.ietf.org/html/rfc4648.html
+ */
+
+/**
+ * Calculate strlen of encoded Base64 string based on raw buffer length.
+ *
+ * \param raw_len Length of raw buffer.
+ * \return Encoded Base64 string length, excluding the terminating null byte ('\0').
+ */
+static inline size_t spdk_base64_get_encoded_strlen(size_t raw_len)
+{
+ return (raw_len + 2) / 3 * 4;
+}
+
+/**
+ * Calculate length of raw buffer based on strlen of encoded Base64.
+ *
+ * This length will be the max possible decoded len. The exact decoded length could be
+ * shorter depending on if there was padding in the Base64 string.
+ *
+ * \param encoded_strlen Length of encoded Base64 string, excluding terminating null
+ * byte ('\0').
+ * \return Length of raw buffer.
+ */
+static inline size_t spdk_base64_get_decoded_len(size_t encoded_strlen)
+{
+ /* text_strlen and raw_len should be (4n,3n), (4n+2, 3n+1) or (4n+3, 3n+2) */
+ return encoded_strlen / 4 * 3 + ((encoded_strlen % 4 + 1) / 2);
+}
+
+/**
+ * Base 64 Encoding with Standard Base64 Alphabet defined in RFC4684.
+ *
+ * \param dst Buffer address of encoded Base64 string. Its length should be enough
+ * to contain Base64 string and the terminating null byte ('\0'), so it needs to be at
+ * least as long as 1 + spdk_base64_get_encoded_strlen(src_len).
+ * \param src Raw data buffer to be encoded.
+ * \param src_len Length of raw data buffer.
+ *
+ * \return 0 on success.
+ * \return -EINVAL if dst or src is NULL, or binary_len <= 0.
+ */
+int spdk_base64_encode(char *dst, const void *src, size_t src_len);
+
+/**
+ * Base 64 Encoding with URL and Filename Safe Alphabet.
+ *
+ * \param dst Buffer address of encoded Base64 string. Its length should be enough
+ * to contain Base64 string and the terminating null byte ('\0'), so it needs to be at
+ * least as long as 1 + spdk_base64_get_encoded_strlen(src_len).
+ * \param src Raw data buffer to be encoded.
+ * \param src_len Length of raw data buffer.
+ *
+ * \return 0 on success.
+ * \return -EINVAL if dst or src is NULL, or binary_len <= 0.
+ */
+int spdk_base64_urlsafe_encode(char *dst, const void *src, size_t src_len);
+
+/**
+ * Base 64 Decoding with Standard Base64 Alphabet defined in RFC4684.
+ *
+ * \param dst Buffer address of decoded raw data. Its length should be enough
+ * to contain decoded raw data, so it needs to be at least as long as
+ * spdk_base64_get_decoded_len(encoded_strlen). If NULL, only dst_len will be populated
+ * indicating the exact decoded length.
+ * \param dst_len Output parameter for the length of actual decoded raw data.
+ * If NULL, the actual decoded length won't be returned.
+ * \param src Data buffer for base64 string to be decoded.
+ *
+ * \return 0 on success.
+ * \return -EINVAL if src is NULL, or content of src is illegal.
+ */
+int spdk_base64_decode(void *dst, size_t *dst_len, const char *src);
+
+/**
+ * Base 64 Decoding with URL and Filename Safe Alphabet.
+ *
+ * \param dst Buffer address of decoded raw data. Its length should be enough
+ * to contain decoded raw data, so it needs to be at least as long as
+ * spdk_base64_get_decoded_len(encoded_strlen). If NULL, only dst_len will be populated
+ * indicating the exact decoded length.
+ * \param dst_len Output parameter for the length of actual decoded raw data.
+ * If NULL, the actual decoded length won't be returned.
+ * \param src Data buffer for base64 string to be decoded.
+ *
+ * \return 0 on success.
+ * \return -EINVAL if src is NULL, or content of src is illegal.
+ */
+int spdk_base64_urlsafe_decode(void *dst, size_t *dst_len, const char *src);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_BASE64_H */
diff --git a/src/spdk/include/spdk/bdev.h b/src/spdk/include/spdk/bdev.h
new file mode 100644
index 000000000..0bb39c410
--- /dev/null
+++ b/src/spdk/include/spdk/bdev.h
@@ -0,0 +1,1705 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Block device abstraction layer
+ */
+
+#ifndef SPDK_BDEV_H_
+#define SPDK_BDEV_H_
+
+#include "spdk/stdinc.h"
+
+#include "spdk/scsi_spec.h"
+#include "spdk/nvme_spec.h"
+#include "spdk/json.h"
+#include "spdk/queue.h"
+#include "spdk/histogram_data.h"
+#include "spdk/dif.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SPDK_BDEV_SMALL_BUF_MAX_SIZE 8192
+#define SPDK_BDEV_LARGE_BUF_MAX_SIZE (64 * 1024)
+
+/* Increase the buffer size to store interleaved metadata. Increment is the
+ * amount necessary to store metadata per data block. 16 byte metadata per
+ * 512 byte data block is the current maximum ratio of metadata per block.
+ */
+#define SPDK_BDEV_BUF_SIZE_WITH_MD(x) (((x) / 512) * (512 + 16))
+
+/** Asynchronous event type */
+enum spdk_bdev_event_type {
+ SPDK_BDEV_EVENT_REMOVE,
+ SPDK_BDEV_EVENT_RESIZE,
+ SPDK_BDEV_EVENT_MEDIA_MANAGEMENT,
+};
+
+/** Media management event details */
+struct spdk_bdev_media_event {
+ uint64_t offset;
+ uint64_t num_blocks;
+};
+
+/**
+ * \brief SPDK block device.
+ *
+ * This is a virtual representation of a block device that is exported by the backend.
+ */
+struct spdk_bdev;
+
+/**
+ * Block device remove callback.
+ *
+ * \param remove_ctx Context for the removed block device.
+ */
+typedef void (*spdk_bdev_remove_cb_t)(void *remove_ctx);
+
+/**
+ * Block device event callback.
+ *
+ * \param event Event details.
+ * \param bdev Block device that triggered event.
+ * \param event_ctx Context for the block device event.
+ */
+typedef void (*spdk_bdev_event_cb_t)(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
+ void *event_ctx);
+
+/**
+ * Block device I/O
+ *
+ * This is an I/O that is passed to an spdk_bdev.
+ */
+struct spdk_bdev_io;
+
+struct spdk_bdev_fn_table;
+struct spdk_io_channel;
+struct spdk_json_write_ctx;
+struct spdk_uuid;
+
+/** bdev status */
+enum spdk_bdev_status {
+ SPDK_BDEV_STATUS_INVALID,
+ SPDK_BDEV_STATUS_READY,
+ SPDK_BDEV_STATUS_REMOVING,
+};
+
+/**
+ * \brief Handle to an opened SPDK block device.
+ */
+struct spdk_bdev_desc;
+
+/** bdev I/O type */
+enum spdk_bdev_io_type {
+ SPDK_BDEV_IO_TYPE_INVALID = 0,
+ SPDK_BDEV_IO_TYPE_READ,
+ SPDK_BDEV_IO_TYPE_WRITE,
+ SPDK_BDEV_IO_TYPE_UNMAP,
+ SPDK_BDEV_IO_TYPE_FLUSH,
+ SPDK_BDEV_IO_TYPE_RESET,
+ SPDK_BDEV_IO_TYPE_NVME_ADMIN,
+ SPDK_BDEV_IO_TYPE_NVME_IO,
+ SPDK_BDEV_IO_TYPE_NVME_IO_MD,
+ SPDK_BDEV_IO_TYPE_WRITE_ZEROES,
+ SPDK_BDEV_IO_TYPE_ZCOPY,
+ SPDK_BDEV_IO_TYPE_GET_ZONE_INFO,
+ SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT,
+ SPDK_BDEV_IO_TYPE_ZONE_APPEND,
+ SPDK_BDEV_IO_TYPE_COMPARE,
+ SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE,
+ SPDK_BDEV_IO_TYPE_ABORT,
+ SPDK_BDEV_NUM_IO_TYPES /* Keep last */
+};
+
+/** bdev QoS rate limit type */
+enum spdk_bdev_qos_rate_limit_type {
+ /** IOPS rate limit for both read and write */
+ SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT = 0,
+ /** Byte per second rate limit for both read and write */
+ SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT,
+ /** Byte per second rate limit for read only */
+ SPDK_BDEV_QOS_R_BPS_RATE_LIMIT,
+ /** Byte per second rate limit for write only */
+ SPDK_BDEV_QOS_W_BPS_RATE_LIMIT,
+ /** Keep last */
+ SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES
+};
+
+/**
+ * Block device completion callback.
+ *
+ * \param bdev_io Block device I/O that has completed.
+ * \param success True if I/O completed successfully or false if it failed;
+ * additional error information may be retrieved from bdev_io by calling
+ * spdk_bdev_io_get_nvme_status() or spdk_bdev_io_get_scsi_status().
+ * \param cb_arg Callback argument specified when bdev_io was submitted.
+ */
+typedef void (*spdk_bdev_io_completion_cb)(struct spdk_bdev_io *bdev_io,
+ bool success,
+ void *cb_arg);
+
+struct spdk_bdev_io_stat {
+ uint64_t bytes_read;
+ uint64_t num_read_ops;
+ uint64_t bytes_written;
+ uint64_t num_write_ops;
+ uint64_t bytes_unmapped;
+ uint64_t num_unmap_ops;
+ uint64_t read_latency_ticks;
+ uint64_t write_latency_ticks;
+ uint64_t unmap_latency_ticks;
+ uint64_t ticks_rate;
+};
+
+struct spdk_bdev_opts {
+ uint32_t bdev_io_pool_size;
+ uint32_t bdev_io_cache_size;
+ bool bdev_auto_examine;
+};
+
+void spdk_bdev_get_opts(struct spdk_bdev_opts *opts);
+
+int spdk_bdev_set_opts(struct spdk_bdev_opts *opts);
+
+/**
+ * Block device initialization callback.
+ *
+ * \param cb_arg Callback argument.
+ * \param rc 0 if block device initialized successfully or negative errno if it failed.
+ */
+typedef void (*spdk_bdev_init_cb)(void *cb_arg, int rc);
+
+/**
+ * Block device finish callback.
+ *
+ * \param cb_arg Callback argument.
+ */
+typedef void (*spdk_bdev_fini_cb)(void *cb_arg);
+typedef void (*spdk_bdev_get_device_stat_cb)(struct spdk_bdev *bdev,
+ struct spdk_bdev_io_stat *stat, void *cb_arg, int rc);
+
+/**
+ * Block device channel IO timeout callback
+ *
+ * \param cb_arg Callback argument
+ * \param bdev_io The IO cause the timeout
+ */
+typedef void (*spdk_bdev_io_timeout_cb)(void *cb_arg, struct spdk_bdev_io *bdev_io);
+
+/**
+ * Initialize block device modules.
+ *
+ * \param cb_fn Called when the initialization is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bdev_initialize(spdk_bdev_init_cb cb_fn, void *cb_arg);
+
+/**
+ * Perform cleanup work to remove the registered block device modules.
+ *
+ * \param cb_fn Called when the removal is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bdev_finish(spdk_bdev_fini_cb cb_fn, void *cb_arg);
+
+/**
+ * Get the configuration options for the registered block device modules.
+ *
+ * \param fp The pointer to a file that will be written to the configuration options.
+ */
+void spdk_bdev_config_text(FILE *fp);
+
+/**
+ * Get the full configuration options for the registered block device modules and created bdevs.
+ *
+ * \param w pointer to a JSON write context where the configuration will be written.
+ */
+void spdk_bdev_subsystem_config_json(struct spdk_json_write_ctx *w);
+
+/**
+ * Get block device by the block device name.
+ *
+ * \param bdev_name The name of the block device.
+ * \return Block device associated with the name or NULL if no block device with
+ * bdev_name is currently registered.
+ */
+struct spdk_bdev *spdk_bdev_get_by_name(const char *bdev_name);
+
+/**
+ * Get the first registered block device.
+ *
+ * \return The first registered block device.
+ */
+struct spdk_bdev *spdk_bdev_first(void);
+
+/**
+ * Get the next registered block device.
+ *
+ * \param prev The current block device.
+ * \return The next registered block device.
+ */
+struct spdk_bdev *spdk_bdev_next(struct spdk_bdev *prev);
+
+/**
+ * Get the first block device without virtual block devices on top.
+ *
+ * This function only traverses over block devices which have no virtual block
+ * devices on top of them, then get the first one.
+ *
+ * \return The first block device without virtual block devices on top.
+ */
+struct spdk_bdev *spdk_bdev_first_leaf(void);
+
+/**
+ * Get the next block device without virtual block devices on top.
+ *
+ * This function only traverses over block devices which have no virtual block
+ * devices on top of them, then get the next one.
+ *
+ * \param prev The current block device.
+ * \return The next block device without virtual block devices on top.
+ */
+struct spdk_bdev *spdk_bdev_next_leaf(struct spdk_bdev *prev);
+
+/**
+ * Open a block device for I/O operations (deprecated, please use spdk_bdev_open_ext).
+ *
+ * \param bdev Block device to open.
+ * \param write true is read/write access requested, false if read-only
+ * \param remove_cb notification callback to be called when the bdev gets
+ * hotremoved. This will always be called on the same thread that
+ * spdk_bdev_open() was called on. It can be NULL, in which case the upper
+ * layer won't be notified about the bdev hotremoval. The descriptor will
+ * have to be manually closed to make the bdev unregister proceed.
+ * \param remove_ctx param for remove_cb.
+ * \param desc output parameter for the descriptor when operation is successful
+ * \return 0 if operation is successful, suitable errno value otherwise
+ */
+int spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_cb,
+ void *remove_ctx, struct spdk_bdev_desc **desc);
+
+/**
+ * Open a block device for I/O operations.
+ *
+ * \param bdev_name Block device name to open.
+ * \param write true is read/write access requested, false if read-only
+ * \param event_cb notification callback to be called when the bdev triggers
+ * asynchronous event such as bdev removal. This will always be called on the
+ * same thread that spdk_bdev_open() was called on. In case of removal event
+ * the descriptor will have to be manually closed to make the bdev unregister
+ * proceed.
+ * \param event_ctx param for event_cb.
+ * \param desc output parameter for the descriptor when operation is successful
+ * \return 0 if operation is successful, suitable errno value otherwise
+ */
+int spdk_bdev_open_ext(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb,
+ void *event_ctx, struct spdk_bdev_desc **desc);
+
+/**
+ * Close a previously opened block device.
+ *
+ * Must be called on the same thread that the spdk_bdev_open()
+ * was performed on.
+ *
+ * \param desc Block device descriptor to close.
+ */
+void spdk_bdev_close(struct spdk_bdev_desc *desc);
+
+/**
+ * Get the bdev associated with a bdev descriptor.
+ *
+ * \param desc Open block device desciptor
+ * \return bdev associated with the descriptor
+ */
+struct spdk_bdev *spdk_bdev_desc_get_bdev(struct spdk_bdev_desc *desc);
+
+/**
+ * Set a time limit for the timeout IO of the bdev and timeout callback.
+ * We can use this function to enable/disable the timeout handler. If
+ * the timeout_in_sec > 0 then it means to enable the timeout IO handling
+ * or change the time limit. If the timeout_in_sec == 0 it means to
+ * disable the timeout IO handling. If you want to enable or change the
+ * timeout IO handle you need to specify the spdk_bdev_io_timeout_cb it
+ * means the upper user determines what to do if you meet the timeout IO,
+ * for example, you can reset the device or abort the IO.
+ * Note: This function must run in the desc's thread.
+ *
+ * \param desc Block device descriptor.
+ * \param timeout_in_sec Timeout value
+ * \param cb_fn Bdev IO timeout callback
+ * \param cb_arg Callback argument
+ *
+ * \return 0 on success, negated errno on failure.
+ */
+int spdk_bdev_set_timeout(struct spdk_bdev_desc *desc, uint64_t timeout_in_sec,
+ spdk_bdev_io_timeout_cb cb_fn, void *cb_arg);
+
+/**
+ * Check whether the block device supports the I/O type.
+ *
+ * \param bdev Block device to check.
+ * \param io_type The specific I/O type like read, write, flush, unmap.
+ * \return true if support, false otherwise.
+ */
+bool spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type);
+
+/**
+ * Output driver-specific information to a JSON stream.
+ *
+ * The JSON write context will be initialized with an open object, so the bdev
+ * driver should write a name(based on the driver name) followed by a JSON value
+ * (most likely another nested object).
+ *
+ * \param bdev Block device to query.
+ * \param w JSON write context. It will store the driver-specific configuration context.
+ * \return 0 on success, negated errno on failure.
+ */
+int spdk_bdev_dump_info_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w);
+
+/**
+ * Get block device name.
+ *
+ * \param bdev Block device to query.
+ * \return Name of bdev as a null-terminated string.
+ */
+const char *spdk_bdev_get_name(const struct spdk_bdev *bdev);
+
+/**
+ * Get block device product name.
+ *
+ * \param bdev Block device to query.
+ * \return Product name of bdev as a null-terminated string.
+ */
+const char *spdk_bdev_get_product_name(const struct spdk_bdev *bdev);
+
+/**
+ * Get block device logical block size.
+ *
+ * \param bdev Block device to query.
+ * \return Size of logical block for this bdev in bytes.
+ */
+uint32_t spdk_bdev_get_block_size(const struct spdk_bdev *bdev);
+
+/**
+ * Get the write unit size for this bdev.
+ *
+ * Write unit size is required number of logical blocks to perform write
+ * operation on block device.
+ *
+ * Unit of write unit size is logical block and the minimum of write unit
+ * size is one. Write operations must be multiple of write unit size.
+ *
+ * \param bdev Block device to query.
+ *
+ * \return The write unit size in logical blocks.
+ */
+uint32_t spdk_bdev_get_write_unit_size(const struct spdk_bdev *bdev);
+
+/**
+ * Get size of block device in logical blocks.
+ *
+ * \param bdev Block device to query.
+ * \return Size of bdev in logical blocks.
+ *
+ * Logical blocks are numbered from 0 to spdk_bdev_get_num_blocks(bdev) - 1, inclusive.
+ */
+uint64_t spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev);
+
+/**
+ * Get the string of quality of service rate limit.
+ *
+ * \param type Type of rate limit to query.
+ * \return String of QoS type.
+ */
+const char *spdk_bdev_get_qos_rpc_type(enum spdk_bdev_qos_rate_limit_type type);
+
+/**
+ * Get the quality of service rate limits on a bdev.
+ *
+ * \param bdev Block device to query.
+ * \param limits Pointer to the QoS rate limits array which holding the limits.
+ *
+ * The limits are ordered based on the @ref spdk_bdev_qos_rate_limit_type enum.
+ */
+void spdk_bdev_get_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits);
+
+/**
+ * Set the quality of service rate limits on a bdev.
+ *
+ * \param bdev Block device.
+ * \param limits Pointer to the QoS rate limits array which holding the limits.
+ * \param cb_fn Callback function to be called when the QoS limit has been updated.
+ * \param cb_arg Argument to pass to cb_fn.
+ *
+ * The limits are ordered based on the @ref spdk_bdev_qos_rate_limit_type enum.
+ */
+void spdk_bdev_set_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits,
+ void (*cb_fn)(void *cb_arg, int status), void *cb_arg);
+
+/**
+ * Get minimum I/O buffer address alignment for a bdev.
+ *
+ * \param bdev Block device to query.
+ * \return Required alignment of I/O buffers in bytes.
+ */
+size_t spdk_bdev_get_buf_align(const struct spdk_bdev *bdev);
+
+/**
+ * Get optimal I/O boundary for a bdev.
+ *
+ * \param bdev Block device to query.
+ * \return Optimal I/O boundary in blocks that should not be crossed for best performance, or 0 if
+ * no optimal boundary is reported.
+ */
+uint32_t spdk_bdev_get_optimal_io_boundary(const struct spdk_bdev *bdev);
+
+/**
+ * Query whether block device has an enabled write cache.
+ *
+ * \param bdev Block device to query.
+ * \return true if block device has a volatile write cache enabled.
+ *
+ * If this function returns true, written data may not be persistent until a flush command
+ * is issued.
+ */
+bool spdk_bdev_has_write_cache(const struct spdk_bdev *bdev);
+
+/**
+ * Get a bdev's UUID.
+ *
+ * \param bdev Block device to query.
+ * \return Pointer to UUID.
+ *
+ * All bdevs will have a UUID, but not all UUIDs will be persistent across
+ * application runs.
+ */
+const struct spdk_uuid *spdk_bdev_get_uuid(const struct spdk_bdev *bdev);
+
+/**
+ * Get block device atomic compare and write unit.
+ *
+ * \param bdev Block device to query.
+ * \return Atomic compare and write unit for this bdev in blocks.
+ */
+uint16_t spdk_bdev_get_acwu(const struct spdk_bdev *bdev);
+
+/**
+ * Get block device metadata size.
+ *
+ * \param bdev Block device to query.
+ * \return Size of metadata for this bdev in bytes.
+ */
+uint32_t spdk_bdev_get_md_size(const struct spdk_bdev *bdev);
+
+/**
+ * Query whether metadata is interleaved with block data or separated
+ * with block data.
+ *
+ * \param bdev Block device to query.
+ * \return true if metadata is interleaved with block data or false
+ * if metadata is separated with block data.
+ *
+ * Note this function is valid only if there is metadata.
+ */
+bool spdk_bdev_is_md_interleaved(const struct spdk_bdev *bdev);
+
+/**
+ * Query whether metadata is interleaved with block data or separated
+ * from block data.
+ *
+ * \param bdev Block device to query.
+ * \return true if metadata is separated from block data, false
+ * otherwise.
+ *
+ * Note this function is valid only if there is metadata.
+ */
+bool spdk_bdev_is_md_separate(const struct spdk_bdev *bdev);
+
+/**
+ * Checks if bdev supports zoned namespace semantics.
+ *
+ * \param bdev Block device to query.
+ * \return true if device supports zoned namespace sementics.
+ */
+bool spdk_bdev_is_zoned(const struct spdk_bdev *bdev);
+
+/**
+ * Get block device data block size.
+ *
+ * Data block size is equal to block size if there is no metadata or
+ * metadata is separated with block data, or equal to block size minus
+ * metadata size if there is metadata and it is interleaved with
+ * block data.
+ *
+ * \param bdev Block device to query.
+ * \return Size of data block for this bdev in bytes.
+ */
+uint32_t spdk_bdev_get_data_block_size(const struct spdk_bdev *bdev);
+
+/**
+ * Get DIF type of the block device.
+ *
+ * \param bdev Block device to query.
+ * \return DIF type of the block device.
+ */
+enum spdk_dif_type spdk_bdev_get_dif_type(const struct spdk_bdev *bdev);
+
+/**
+ * Check whether DIF is set in the first 8 bytes or the last 8 bytes of metadata.
+ *
+ * \param bdev Block device to query.
+ * \return true if DIF is set in the first 8 bytes of metadata, or false
+ * if DIF is set in the last 8 bytes of metadata.
+ *
+ * Note that this function is valid only if DIF type is not SPDK_DIF_DISABLE.
+ */
+bool spdk_bdev_is_dif_head_of_md(const struct spdk_bdev *bdev);
+
+/**
+ * Check whether the DIF check type is enabled.
+ *
+ * \param bdev Block device to query.
+ * \param check_type The specific DIF check type.
+ * \return true if enabled, false otherwise.
+ */
+bool spdk_bdev_is_dif_check_enabled(const struct spdk_bdev *bdev,
+ enum spdk_dif_check_type check_type);
+
+/**
+ * Get the most recently measured queue depth from a bdev.
+ *
+ * The reported queue depth is the aggregate of outstanding I/O
+ * across all open channels associated with this bdev.
+ *
+ * \param bdev Block device to query.
+ *
+ * \return The most recent queue depth measurement for the bdev.
+ * If tracking is not enabled, the function will return UINT64_MAX
+ * It is also possible to receive UINT64_MAX after enabling tracking
+ * but before the first period has expired.
+ */
+uint64_t
+spdk_bdev_get_qd(const struct spdk_bdev *bdev);
+
+/**
+ * Get the queue depth polling period.
+ *
+ * The return value of this function is only valid if the bdev's
+ * queue depth tracking status is set to true.
+ *
+ * \param bdev Block device to query.
+ *
+ * \return The period at which this bdev's gueue depth is being refreshed.
+ */
+uint64_t
+spdk_bdev_get_qd_sampling_period(const struct spdk_bdev *bdev);
+
+/**
+ * Enable or disable queue depth sampling for this bdev.
+ *
+ * Enables queue depth sampling when period is greater than 0. Disables it when the period
+ * is equal to zero. The resulting queue depth is stored in the spdk_bdev object as
+ * measured_queue_depth.
+ *
+ * \param bdev Block device on which to enable queue depth tracking.
+ * \param period The period at which to poll this bdev's queue depth. If this is set
+ * to zero, polling will be disabled.
+ */
+void spdk_bdev_set_qd_sampling_period(struct spdk_bdev *bdev, uint64_t period);
+
+/**
+ * Get the time spent processing IO for this device.
+ *
+ * This value is dependent upon the queue depth sampling period and is
+ * incremented at sampling time by the sampling period only if the measured
+ * queue depth is greater than 0.
+ *
+ * The disk utilization can be calculated by the following formula:
+ * disk_util = (io_time_2 - io_time_1) / elapsed_time.
+ * The user is responsible for tracking the elapsed time between two measurements.
+ *
+ * \param bdev Block device to query.
+ *
+ * \return The io time for this device in microseconds.
+ */
+uint64_t spdk_bdev_get_io_time(const struct spdk_bdev *bdev);
+
+/**
+ * Get the weighted IO processing time for this bdev.
+ *
+ * This value is dependent upon the queue depth sampling period and is
+ * equal to the time spent reading from or writing to a device times
+ * the measured queue depth during each sampling period.
+ *
+ * The average queue depth can be calculated by the following formula:
+ * queue_depth = (weighted_io_time_2 - weighted_io_time_1) / elapsed_time.
+ * The user is responsible for tracking the elapsed time between two measurements.
+ *
+ * \param bdev Block device to query.
+ *
+ * \return The weighted io time for this device in microseconds.
+ */
+uint64_t spdk_bdev_get_weighted_io_time(const struct spdk_bdev *bdev);
+
+/**
+ * Obtain an I/O channel for the block device opened by the specified
+ * descriptor. I/O channels are bound to threads, so the resulting I/O
+ * channel may only be used from the thread it was originally obtained
+ * from.
+ *
+ * \param desc Block device descriptor.
+ *
+ * \return A handle to the I/O channel or NULL on failure.
+ */
+struct spdk_io_channel *spdk_bdev_get_io_channel(struct spdk_bdev_desc *desc);
+
+/**
+ * \defgroup bdev_io_submit_functions bdev I/O Submit Functions
+ *
+ * These functions submit a new I/O request to a bdev. The I/O request will
+ * be represented by an spdk_bdev_io structure allocated from a global pool.
+ * These functions will return -ENOMEM if the spdk_bdev_io pool is empty.
+ */
+
+/**
+ * Submit a read request to the bdev on the given channel.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param buf Data buffer to read into.
+ * \param offset The offset, in bytes, from the start of the block device.
+ * \param nbytes The number of bytes to read.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset and/or nbytes are not aligned or out of range
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ */
+int spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ void *buf, uint64_t offset, uint64_t nbytes,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a read request to the bdev on the given channel.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param buf Data buffer to read into.
+ * \param offset_blocks The offset, in blocks, from the start of the block device.
+ * \param num_blocks The number of blocks to read.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset_blocks and/or num_blocks are out of range
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ */
+int spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ void *buf, uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a read request to the bdev on the given channel. This function uses
+ * separate buffer for metadata transfer (valid only if bdev supports this
+ * mode).
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param buf Data buffer to read into.
+ * \param md Metadata buffer.
+ * \param offset_blocks The offset, in blocks, from the start of the block device.
+ * \param num_blocks The number of blocks to read.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate
+ * metadata is not supported
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ */
+int spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ void *buf, void *md, int64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a read request to the bdev on the given channel. This differs from
+ * spdk_bdev_read by allowing the data buffer to be described in a scatter
+ * gather list. Some physical devices place memory alignment requirements on
+ * data and may not be able to directly transfer into the buffers provided. In
+ * this case, the request may fail.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param iov A scatter gather list of buffers to be read into.
+ * \param iovcnt The number of elements in iov.
+ * \param offset The offset, in bytes, from the start of the block device.
+ * \param nbytes The number of bytes to read.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset and/or nbytes are not aligned or out of range
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ */
+int spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ struct iovec *iov, int iovcnt,
+ uint64_t offset, uint64_t nbytes,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a read request to the bdev on the given channel. This differs from
+ * spdk_bdev_read by allowing the data buffer to be described in a scatter
+ * gather list. Some physical devices place memory alignment requirements on
+ * data and may not be able to directly transfer into the buffers provided. In
+ * this case, the request may fail.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param iov A scatter gather list of buffers to be read into.
+ * \param iovcnt The number of elements in iov.
+ * \param offset_blocks The offset, in blocks, from the start of the block device.
+ * \param num_blocks The number of blocks to read.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset_blocks and/or num_blocks are out of range
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ */
+int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ struct iovec *iov, int iovcnt,
+ uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a read request to the bdev on the given channel. This differs from
+ * spdk_bdev_read by allowing the data buffer to be described in a scatter
+ * gather list. Some physical devices place memory alignment requirements on
+ * data or metadata and may not be able to directly transfer into the buffers
+ * provided. In this case, the request may fail. This function uses separate
+ * buffer for metadata transfer (valid only if bdev supports this mode).
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param iov A scatter gather list of buffers to be read into.
+ * \param iovcnt The number of elements in iov.
+ * \param md Metadata buffer.
+ * \param offset_blocks The offset, in blocks, from the start of the block device.
+ * \param num_blocks The number of blocks to read.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate
+ * metadata is not supported
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ */
+int spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ struct iovec *iov, int iovcnt, void *md,
+ uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a write request to the bdev on the given channel.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param buf Data buffer to written from.
+ * \param offset The offset, in bytes, from the start of the block device.
+ * \param nbytes The number of bytes to write. buf must be greater than or equal to this size.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset and/or nbytes are not aligned or out of range
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ * * -EBADF - desc not open for writing
+ */
+int spdk_bdev_write(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ void *buf, uint64_t offset, uint64_t nbytes,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a write request to the bdev on the given channel.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param buf Data buffer to written from.
+ * \param offset_blocks The offset, in blocks, from the start of the block device.
+ * \param num_blocks The number of blocks to write. buf must be greater than or equal to this size.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset_blocks and/or num_blocks are out of range
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ * * -EBADF - desc not open for writing
+ */
+int spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ void *buf, uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a write request to the bdev on the given channel. This function uses
+ * separate buffer for metadata transfer (valid only if bdev supports this
+ * mode).
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param buf Data buffer to written from.
+ * \param md Metadata buffer.
+ * \param offset_blocks The offset, in blocks, from the start of the block device.
+ * \param num_blocks The number of blocks to write. buf must be greater than or equal to this size.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate
+ * metadata is not supported
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ * * -EBADF - desc not open for writing
+ */
+int spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ void *buf, void *md, uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a write request to the bdev on the given channel. This differs from
+ * spdk_bdev_write by allowing the data buffer to be described in a scatter
+ * gather list. Some physical devices place memory alignment requirements on
+ * data and may not be able to directly transfer out of the buffers provided. In
+ * this case, the request may fail.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param iov A scatter gather list of buffers to be written from.
+ * \param iovcnt The number of elements in iov.
+ * \param offset The offset, in bytes, from the start of the block device.
+ * \param len The size of data to write.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset and/or nbytes are not aligned or out of range
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ * * -EBADF - desc not open for writing
+ */
+int spdk_bdev_writev(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ struct iovec *iov, int iovcnt,
+ uint64_t offset, uint64_t len,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a write request to the bdev on the given channel. This differs from
+ * spdk_bdev_write by allowing the data buffer to be described in a scatter
+ * gather list. Some physical devices place memory alignment requirements on
+ * data and may not be able to directly transfer out of the buffers provided. In
+ * this case, the request may fail.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param iov A scatter gather list of buffers to be written from.
+ * \param iovcnt The number of elements in iov.
+ * \param offset_blocks The offset, in blocks, from the start of the block device.
+ * \param num_blocks The number of blocks to write.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset_blocks and/or num_blocks are out of range
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ * * -EBADF - desc not open for writing
+ */
+int spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ struct iovec *iov, int iovcnt,
+ uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a write request to the bdev on the given channel. This differs from
+ * spdk_bdev_write by allowing the data buffer to be described in a scatter
+ * gather list. Some physical devices place memory alignment requirements on
+ * data or metadata and may not be able to directly transfer out of the buffers
+ * provided. In this case, the request may fail. This function uses separate
+ * buffer for metadata transfer (valid only if bdev supports this mode).
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param iov A scatter gather list of buffers to be written from.
+ * \param iovcnt The number of elements in iov.
+ * \param md Metadata buffer.
+ * \param offset_blocks The offset, in blocks, from the start of the block device.
+ * \param num_blocks The number of blocks to write.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate
+ * metadata is not supported
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ * * -EBADF - desc not open for writing
+ */
+int spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ struct iovec *iov, int iovcnt, void *md,
+ uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a compare request to the bdev on the given channel.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param buf Data buffer to compare to.
+ * \param offset_blocks The offset, in blocks, from the start of the block device.
+ * \param num_blocks The number of blocks to compare. buf must be greater than or equal to this size.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset_blocks and/or num_blocks are out of range
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ */
+int spdk_bdev_compare_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ void *buf, uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a compare request to the bdev on the given channel. This function uses
+ * separate buffer for metadata transfer (valid only if bdev supports this
+ * mode).
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param buf Data buffer to compare to.
+ * \param md Metadata buffer.
+ * \param offset_blocks The offset, in blocks, from the start of the block device.
+ * \param num_blocks The number of blocks to compare. buf must be greater than or equal to this size.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate
+ * metadata is not supported
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ */
+int spdk_bdev_compare_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ void *buf, void *md, uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a compare request to the bdev on the given channel. This differs from
+ * spdk_bdev_compare by allowing the data buffer to be described in a scatter
+ * gather list. Some physical devices place memory alignment requirements on
+ * data and may not be able to directly transfer out of the buffers provided. In
+ * this case, the request may fail.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param iov A scatter gather list of buffers to be compared to.
+ * \param iovcnt The number of elements in iov.
+ * \param offset_blocks The offset, in blocks, from the start of the block device.
+ * \param num_blocks The number of blocks to compare.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset_blocks and/or num_blocks are out of range
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ */
+int spdk_bdev_comparev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ struct iovec *iov, int iovcnt,
+ uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a compare request to the bdev on the given channel. This differs from
+ * spdk_bdev_compare by allowing the data buffer to be described in a scatter
+ * gather list. Some physical devices place memory alignment requirements on
+ * data or metadata and may not be able to directly transfer out of the buffers
+ * provided. In this case, the request may fail. This function uses separate
+ * buffer for metadata transfer (valid only if bdev supports this mode).
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param iov A scatter gather list of buffers to be compared to.
+ * \param iovcnt The number of elements in iov.
+ * \param md Metadata buffer.
+ * \param offset_blocks The offset, in blocks, from the start of the block device.
+ * \param num_blocks The number of blocks to compare.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate
+ * metadata is not supported
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ */
+int spdk_bdev_comparev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ struct iovec *iov, int iovcnt, void *md,
+ uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit an atomic compare-and-write request to the bdev on the given channel.
+ * For bdevs that do not natively support atomic compare-and-write, the bdev layer
+ * will quiesce I/O to the specified LBA range, before performing the read,
+ * compare and write operations.
+ *
+ * Currently this supports compare-and-write of only one block.
+ *
+ * The data buffers for both the compare and write operations are described in a
+ * scatter gather list. Some physical devices place memory alignment requirements on
+ * data and may not be able to directly transfer out of the buffers provided. In
+ * this case, the request may fail.
+ *
+ * spdk_bdev_io_get_nvme_fused_status() function should be called in callback function
+ * to get status for the individual operation.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param compare_iov A scatter gather list of buffers to be compared.
+ * \param compare_iovcnt The number of elements in compare_iov.
+ * \param write_iov A scatter gather list of buffers to be written if the compare is
+ * successful.
+ * \param write_iovcnt The number of elements in write_iov.
+ * \param offset_blocks The offset, in blocks, from the start of the block device.
+ * \param num_blocks The number of blocks to compare-and-write.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset_blocks and/or num_blocks are out of range
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ * * -EBADF - desc not open for writing
+ */
+int spdk_bdev_comparev_and_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ struct iovec *compare_iov, int compare_iovcnt,
+ struct iovec *write_iov, int write_iovcnt,
+ uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a request to acquire a data buffer that represents the given
+ * range of blocks. The data buffer is placed in the spdk_bdev_io structure
+ * and can be obtained by calling spdk_bdev_io_get_iovec().
+ *
+ * \param desc Block device descriptor
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param offset_blocks The offset, in blocks, from the start of the block device.
+ * \param num_blocks The number of blocks.
+ * \param populate Whether the data buffer should be populated with the
+ * data at the given blocks. Populating the data buffer can
+ * be skipped if the user writes new data to the entire buffer.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ */
+int spdk_bdev_zcopy_start(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ uint64_t offset_blocks, uint64_t num_blocks,
+ bool populate,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+
+/**
+ * Submit a request to release a data buffer representing a range of blocks.
+ *
+ * \param bdev_io I/O request returned in the completion callback of spdk_bdev_zcopy_start().
+ * \param commit Whether to commit the data in the buffers to the blocks before releasing.
+ * The data does not need to be committed if it was not modified.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ */
+int spdk_bdev_zcopy_end(struct spdk_bdev_io *bdev_io, bool commit,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a write zeroes request to the bdev on the given channel. This command
+ * ensures that all bytes in the specified range are set to 00h
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param offset The offset, in bytes, from the start of the block device.
+ * \param len The size of data to zero.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset and/or nbytes are not aligned or out of range
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ * * -EBADF - desc not open for writing
+ */
+int spdk_bdev_write_zeroes(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ uint64_t offset, uint64_t len,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a write zeroes request to the bdev on the given channel. This command
+ * ensures that all bytes in the specified range are set to 00h
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param offset_blocks The offset, in blocks, from the start of the block device.
+ * \param num_blocks The number of blocks to zero.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset_blocks and/or num_blocks are out of range
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ * * -EBADF - desc not open for writing
+ */
+int spdk_bdev_write_zeroes_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit an unmap request to the block device. Unmap is sometimes also called trim or
+ * deallocate. This notifies the device that the data in the blocks described is no
+ * longer valid. Reading blocks that have been unmapped results in indeterminate data.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param offset The offset, in bytes, from the start of the block device.
+ * \param nbytes The number of bytes to unmap. Must be a multiple of the block size.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset and/or nbytes are not aligned or out of range
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ * * -EBADF - desc not open for writing
+ */
+int spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ uint64_t offset, uint64_t nbytes,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit an unmap request to the block device. Unmap is sometimes also called trim or
+ * deallocate. This notifies the device that the data in the blocks described is no
+ * longer valid. Reading blocks that have been unmapped results in indeterminate data.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param offset_blocks The offset, in blocks, from the start of the block device.
+ * \param num_blocks The number of blocks to unmap.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset_blocks and/or num_blocks are out of range
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ * * -EBADF - desc not open for writing
+ */
+int spdk_bdev_unmap_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a flush request to the bdev on the given channel. For devices with volatile
+ * caches, data is not guaranteed to be persistent until the completion of a flush
+ * request. Call spdk_bdev_has_write_cache() to check if the bdev has a volatile cache.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param offset The offset, in bytes, from the start of the block device.
+ * \param length The number of bytes.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset and/or nbytes are not aligned or out of range
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ * * -EBADF - desc not open for writing
+ */
+int spdk_bdev_flush(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ uint64_t offset, uint64_t length,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a flush request to the bdev on the given channel. For devices with volatile
+ * caches, data is not guaranteed to be persistent until the completion of a flush
+ * request. Call spdk_bdev_has_write_cache() to check if the bdev has a volatile cache.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param offset_blocks The offset, in blocks, from the start of the block device.
+ * \param num_blocks The number of blocks.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -EINVAL - offset_blocks and/or num_blocks are out of range
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ * * -EBADF - desc not open for writing
+ */
+int spdk_bdev_flush_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a reset request to the bdev on the given channel.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ */
+int spdk_bdev_reset(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit abort requests to abort all I/Os which has bio_cb_arg as its callback
+ * context to the bdev on the given channel.
+ *
+ * This goes all the way down to the bdev driver module and attempts to abort all
+ * I/Os which have bio_cb_arg as their callback context if they exist. This is a best
+ * effort command. Upon completion of this, the status SPDK_BDEV_IO_STATUS_SUCCESS
+ * indicates all the I/Os were successfully aborted, or the status
+ * SPDK_BDEV_IO_STATUS_FAILED indicates any I/O was failed to abort for any reason
+ * or no I/O which has bio_cb_arg as its callback context was found.
+ *
+ * \ingroup bdev_io_submit functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch The I/O channel which the I/Os to be aborted are associated with.
+ * \param bio_cb_arg Callback argument for the outstanding requests which this
+ * function attempts to abort.
+ * \param cb Called when the abort request is completed.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always be called (even if the
+ * request ultimately failed). Return negated errno on failure, in which case the
+ * callback will not be called.
+ * * -EINVAL - bio_cb_arg was not specified.
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated.
+ * * -ENOTSUP - the bdev does not support abort.
+ */
+int spdk_bdev_abort(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ void *bio_cb_arg,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit an NVMe Admin command to the bdev. This passes directly through
+ * the block layer to the device. Support for NVMe passthru is optional,
+ * indicated by calling spdk_bdev_io_type_supported().
+ *
+ * The SGL/PRP will be automated generated based on the given buffer,
+ * so that portion of the command may be left empty.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param cmd The raw NVMe command. Must be an admin command.
+ * \param buf Data buffer to written from.
+ * \param nbytes The number of bytes to transfer. buf must be greater than or equal to this size.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ * * -EBADF - desc not open for writing
+ */
+int spdk_bdev_nvme_admin_passthru(struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch,
+ const struct spdk_nvme_cmd *cmd,
+ void *buf, size_t nbytes,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit an NVMe I/O command to the bdev. This passes directly through
+ * the block layer to the device. Support for NVMe passthru is optional,
+ * indicated by calling spdk_bdev_io_type_supported().
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * The SGL/PRP will be automated generated based on the given buffer,
+ * so that portion of the command may be left empty. Also, the namespace
+ * id (nsid) will be populated automatically.
+ *
+ * \param bdev_desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param cmd The raw NVMe command. Must be in the NVM command set.
+ * \param buf Data buffer to written from.
+ * \param nbytes The number of bytes to transfer. buf must be greater than or equal to this size.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ * * -EBADF - desc not open for writing
+ */
+int spdk_bdev_nvme_io_passthru(struct spdk_bdev_desc *bdev_desc,
+ struct spdk_io_channel *ch,
+ const struct spdk_nvme_cmd *cmd,
+ void *buf, size_t nbytes,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit an NVMe I/O command to the bdev. This passes directly through
+ * the block layer to the device. Support for NVMe passthru is optional,
+ * indicated by calling spdk_bdev_io_type_supported().
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * The SGL/PRP will be automated generated based on the given buffer,
+ * so that portion of the command may be left empty. Also, the namespace
+ * id (nsid) will be populated automatically.
+ *
+ * \param bdev_desc Block device descriptor
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param cmd The raw NVMe command. Must be in the NVM command set.
+ * \param buf Data buffer to written from.
+ * \param nbytes The number of bytes to transfer. buf must be greater than or equal to this size.
+ * \param md_buf Meta data buffer to written from.
+ * \param md_len md_buf size to transfer. md_buf must be greater than or equal to this size.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ * * -EBADF - desc not open for writing
+ */
+int spdk_bdev_nvme_io_passthru_md(struct spdk_bdev_desc *bdev_desc,
+ struct spdk_io_channel *ch,
+ const struct spdk_nvme_cmd *cmd,
+ void *buf, size_t nbytes, void *md_buf, size_t md_len,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Free an I/O request. This should only be called after the completion callback
+ * for the I/O has been called and notifies the bdev layer that memory may now
+ * be released.
+ *
+ * \param bdev_io I/O request.
+ */
+void spdk_bdev_free_io(struct spdk_bdev_io *bdev_io);
+
+/**
+ * Block device I/O wait callback
+ *
+ * Callback function to notify when an spdk_bdev_io structure is available
+ * to satisfy a call to one of the @ref bdev_io_submit_functions.
+ */
+typedef void (*spdk_bdev_io_wait_cb)(void *cb_arg);
+
+/**
+ * Structure to register a callback when an spdk_bdev_io becomes available.
+ */
+struct spdk_bdev_io_wait_entry {
+ struct spdk_bdev *bdev;
+ spdk_bdev_io_wait_cb cb_fn;
+ void *cb_arg;
+ TAILQ_ENTRY(spdk_bdev_io_wait_entry) link;
+};
+
+/**
+ * Add an entry into the calling thread's queue to be notified when an
+ * spdk_bdev_io becomes available.
+ *
+ * When one of the @ref bdev_io_submit_functions returns -ENOMEM, it means
+ * the spdk_bdev_io buffer pool has no available buffers. This function may
+ * be called to register a callback to be notified when a buffer becomes
+ * available on the calling thread.
+ *
+ * The callback function will always be called on the same thread as this
+ * function was called.
+ *
+ * This function must only be called immediately after one of the
+ * @ref bdev_io_submit_functions returns -ENOMEM.
+ *
+ * \param bdev Block device. The block device that the caller will submit
+ * an I/O to when the callback is invoked. Must match the bdev
+ * member in the entry parameter.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param entry Data structure allocated by the caller specifying the callback
+ * function and argument.
+ *
+ * \return 0 on success.
+ * -EINVAL if bdev parameter does not match bdev member in entry
+ * -EINVAL if an spdk_bdev_io structure was available on this thread.
+ */
+int spdk_bdev_queue_io_wait(struct spdk_bdev *bdev, struct spdk_io_channel *ch,
+ struct spdk_bdev_io_wait_entry *entry);
+
+/**
+ * Return I/O statistics for this channel.
+ *
+ * \param bdev Block device.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param stat The per-channel statistics.
+ *
+ */
+void spdk_bdev_get_io_stat(struct spdk_bdev *bdev, struct spdk_io_channel *ch,
+ struct spdk_bdev_io_stat *stat);
+
+
+/**
+ * Return I/O statistics for this bdev. All the required information will be passed
+ * via the callback function.
+ *
+ * \param bdev Block device to query.
+ * \param stat Structure for aggregating collected statistics. Passed as argument to cb.
+ * \param cb Called when this operation completes.
+ * \param cb_arg Argument passed to callback function.
+ */
+void spdk_bdev_get_device_stat(struct spdk_bdev *bdev, struct spdk_bdev_io_stat *stat,
+ spdk_bdev_get_device_stat_cb cb, void *cb_arg);
+
+/**
+ * Get the status of bdev_io as an NVMe status code and command specific
+ * completion queue value.
+ *
+ * \param bdev_io I/O to get the status from.
+ * \param cdw0 Command specific completion queue value
+ * \param sct Status Code Type return value, as defined by the NVMe specification.
+ * \param sc Status Code return value, as defined by the NVMe specification.
+ */
+void spdk_bdev_io_get_nvme_status(const struct spdk_bdev_io *bdev_io, uint32_t *cdw0, int *sct,
+ int *sc);
+
+/**
+ * Get the status of bdev_io as an NVMe status codes and command specific
+ * completion queue value for fused operations such as compare-and-write.
+ *
+ * \param bdev_io I/O to get the status from.
+ * \param cdw0 Command specific completion queue value
+ * \param first_sct Status Code Type return value for the first operation, as defined by the NVMe specification.
+ * \param first_sc Status Code return value for the first operation, as defined by the NVMe specification.
+ * \param second_sct Status Code Type return value for the second operation, as defined by the NVMe specification.
+ * \param second_sc Status Code return value for the second operation, as defined by the NVMe specification.
+ */
+void spdk_bdev_io_get_nvme_fused_status(const struct spdk_bdev_io *bdev_io, uint32_t *cdw0,
+ int *first_sct, int *first_sc, int *second_sct, int *second_sc);
+
+/**
+ * Get the status of bdev_io as a SCSI status code.
+ *
+ * \param bdev_io I/O to get the status from.
+ * \param sc SCSI Status Code.
+ * \param sk SCSI Sense Key.
+ * \param asc SCSI Additional Sense Code.
+ * \param ascq SCSI Additional Sense Code Qualifier.
+ */
+void spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io,
+ int *sc, int *sk, int *asc, int *ascq);
+
+/**
+ * Get the iovec describing the data buffer of a bdev_io.
+ *
+ * \param bdev_io I/O to describe with iovec.
+ * \param iovp Pointer to be filled with iovec.
+ * \param iovcntp Pointer to be filled with number of iovec entries.
+ */
+void spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp);
+
+/**
+ * Get metadata buffer. Only makes sense if the IO uses separate buffer for
+ * metadata transfer.
+ *
+ * \param bdev_io I/O to retrieve the buffer from.
+ * \return Pointer to metadata buffer, NULL if the IO doesn't use separate
+ * buffer for metadata transfer.
+ */
+void *spdk_bdev_io_get_md_buf(struct spdk_bdev_io *bdev_io);
+
+/**
+ * Get the callback argument of bdev_io to abort it by spdk_bdev_abort.
+ *
+ * \param bdev_io I/O to get the callback argument from.
+ * \return Callback argument of bdev_io.
+ */
+void *spdk_bdev_io_get_cb_arg(struct spdk_bdev_io *bdev_io);
+
+typedef void (*spdk_bdev_histogram_status_cb)(void *cb_arg, int status);
+typedef void (*spdk_bdev_histogram_data_cb)(void *cb_arg, int status,
+ struct spdk_histogram_data *histogram);
+
+/**
+ * Enable or disable collecting histogram data on a bdev.
+ *
+ * \param bdev Block device.
+ * \param cb_fn Callback function to be called when histograms are enabled.
+ * \param cb_arg Argument to pass to cb_fn.
+ * \param enable Enable/disable flag
+ */
+void spdk_bdev_histogram_enable(struct spdk_bdev *bdev, spdk_bdev_histogram_status_cb cb_fn,
+ void *cb_arg, bool enable);
+
+/**
+ * Get aggregated histogram data from a bdev. Callback provides merged histogram
+ * for specified bdev.
+ *
+ * \param bdev Block device.
+ * \param histogram Histogram for aggregated data
+ * \param cb_fn Callback function to be called with data collected on bdev.
+ * \param cb_arg Argument to pass to cb_fn.
+ */
+void spdk_bdev_histogram_get(struct spdk_bdev *bdev, struct spdk_histogram_data *histogram,
+ spdk_bdev_histogram_data_cb cb_fn,
+ void *cb_arg);
+
+/**
+ * Retrieves media events. Can only be called from the context of
+ * SPDK_BDEV_EVENT_MEDIA_MANAGEMENT event callback. These events are sent by
+ * devices exposing raw access to the physical medium (e.g. Open Channel SSD).
+ *
+ * \param bdev_desc Block device descriptor
+ * \param events Array of media mangement event descriptors
+ * \param max_events Size of the events array
+ *
+ * \return number of events retrieved
+ */
+size_t spdk_bdev_get_media_events(struct spdk_bdev_desc *bdev_desc,
+ struct spdk_bdev_media_event *events, size_t max_events);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_BDEV_H_ */
diff --git a/src/spdk/include/spdk/bdev_module.h b/src/spdk/include/spdk/bdev_module.h
new file mode 100644
index 000000000..edf967897
--- /dev/null
+++ b/src/spdk/include/spdk/bdev_module.h
@@ -0,0 +1,1219 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Block Device Module Interface
+ *
+ * For information on how to write a bdev module, see @ref bdev_module.
+ */
+
+#ifndef SPDK_BDEV_MODULE_H
+#define SPDK_BDEV_MODULE_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/bdev_zone.h"
+#include "spdk/queue.h"
+#include "spdk/scsi_spec.h"
+#include "spdk/thread.h"
+#include "spdk/util.h"
+#include "spdk/uuid.h"
+
+/** Block device module */
+struct spdk_bdev_module {
+ /**
+ * Initialization function for the module. Called by the spdk
+ * application during startup.
+ *
+ * Modules are required to define this function.
+ */
+ int (*module_init)(void);
+
+ /**
+ * Optional callback for modules that require notification of when
+ * the bdev subsystem has completed initialization.
+ *
+ * Modules are not required to define this function.
+ */
+ void (*init_complete)(void);
+
+ /**
+ * Optional callback for modules that require notification of when
+ * the bdev subsystem is starting the fini process.
+ *
+ * Modules are not required to define this function.
+ */
+ void (*fini_start)(void);
+
+ /**
+ * Finish function for the module. Called by the spdk application
+ * after all bdevs for all modules have been unregistered. This allows
+ * the module to do any final cleanup before the SPDK application exits.
+ *
+ * Modules are not required to define this function.
+ */
+ void (*module_fini)(void);
+
+ /**
+ * Function called to return a text string representing the
+ * module's configuration options for inclusion in a configuration file.
+ */
+ void (*config_text)(FILE *fp);
+
+ /**
+ * Function called to return a text string representing the module-level
+ * JSON RPCs required to regenerate the current configuration. This will
+ * include module-level configuration options, or methods to construct
+ * bdevs when one RPC may generate multiple bdevs (for example, an NVMe
+ * controller with multiple namespaces).
+ *
+ * Per-bdev JSON RPCs (where one "construct" RPC always creates one bdev)
+ * may be implemented here, or by the bdev's write_config_json function -
+ * but not both. Bdev module implementers may choose which mechanism to
+ * use based on the module's design.
+ *
+ * \return 0 on success or Bdev specific negative error code.
+ */
+ int (*config_json)(struct spdk_json_write_ctx *w);
+
+ /** Name for the modules being defined. */
+ const char *name;
+
+ /**
+ * Returns the allocation size required for the backend for uses such as local
+ * command structs, local SGL, iovecs, or other user context.
+ */
+ int (*get_ctx_size)(void);
+
+ /**
+ * First notification that a bdev should be examined by a virtual bdev module.
+ * Virtual bdev modules may use this to examine newly-added bdevs and automatically
+ * create their own vbdevs, but no I/O to device can be send to bdev at this point.
+ * Only vbdevs based on config files can be created here. This callback must make
+ * its decision to claim the module synchronously.
+ * It must also call spdk_bdev_module_examine_done() before returning. If the module
+ * needs to perform asynchronous operations such as I/O after claiming the bdev,
+ * it may define an examine_disk callback. The examine_disk callback will then
+ * be called immediately after the examine_config callback returns.
+ */
+ void (*examine_config)(struct spdk_bdev *bdev);
+
+ /**
+ * Second notification that a bdev should be examined by a virtual bdev module.
+ * Virtual bdev modules may use this to examine newly-added bdevs and automatically
+ * create their own vbdevs. This callback may use I/O operations end finish asynchronously.
+ */
+ void (*examine_disk)(struct spdk_bdev *bdev);
+
+ /**
+ * Denotes if the module_init function may complete asynchronously. If set to true,
+ * the module initialization has to be explicitly completed by calling
+ * spdk_bdev_module_init_done().
+ */
+ bool async_init;
+
+ /**
+ * Denotes if the module_fini function may complete asynchronously.
+ * If set to true finishing has to be explicitly completed by calling
+ * spdk_bdev_module_fini_done().
+ */
+ bool async_fini;
+
+ /**
+ * Fields that are used by the internal bdev subsystem. Bdev modules
+ * must not read or write to these fields.
+ */
+ struct __bdev_module_internal_fields {
+ /**
+ * Count of bdev inits/examinations in progress. Used by generic bdev
+ * layer and must not be modified by bdev modules.
+ *
+ * \note Used internally by bdev subsystem, don't change this value in bdev module.
+ */
+ uint32_t action_in_progress;
+
+ TAILQ_ENTRY(spdk_bdev_module) tailq;
+ } internal;
+};
+
+typedef void (*spdk_bdev_unregister_cb)(void *cb_arg, int rc);
+
+/**
+ * Function table for a block device backend.
+ *
+ * The backend block device function table provides a set of APIs to allow
+ * communication with a backend. The main commands are read/write API
+ * calls for I/O via submit_request.
+ */
+struct spdk_bdev_fn_table {
+ /** Destroy the backend block device object */
+ int (*destruct)(void *ctx);
+
+ /** Process the IO. */
+ void (*submit_request)(struct spdk_io_channel *ch, struct spdk_bdev_io *);
+
+ /** Check if the block device supports a specific I/O type. */
+ bool (*io_type_supported)(void *ctx, enum spdk_bdev_io_type);
+
+ /** Get an I/O channel for the specific bdev for the calling thread. */
+ struct spdk_io_channel *(*get_io_channel)(void *ctx);
+
+ /**
+ * Output driver-specific information to a JSON stream. Optional - may be NULL.
+ *
+ * The JSON write context will be initialized with an open object, so the bdev
+ * driver should write a name (based on the driver name) followed by a JSON value
+ * (most likely another nested object).
+ */
+ int (*dump_info_json)(void *ctx, struct spdk_json_write_ctx *w);
+
+ /**
+ * Output bdev-specific RPC configuration to a JSON stream. Optional - may be NULL.
+ *
+ * This function should only be implemented for bdevs which can be configured
+ * independently of other bdevs. For example, RPCs to create a bdev for an NVMe
+ * namespace may not be generated by this function, since enumerating an NVMe
+ * namespace requires attaching to an NVMe controller, and that controller may
+ * contain multiple namespaces. The spdk_bdev_module's config_json function should
+ * be used instead for these cases.
+ *
+ * The JSON write context will be initialized with an open object, so the bdev
+ * driver should write all data necessary to recreate this bdev by invoking
+ * constructor method. No other data should be written.
+ */
+ void (*write_config_json)(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w);
+
+ /** Get spin-time per I/O channel in microseconds.
+ * Optional - may be NULL.
+ */
+ uint64_t (*get_spin_time)(struct spdk_io_channel *ch);
+};
+
+/** bdev I/O completion status */
+enum spdk_bdev_io_status {
+ SPDK_BDEV_IO_STATUS_ABORTED = -7,
+ SPDK_BDEV_IO_STATUS_FIRST_FUSED_FAILED = -6,
+ SPDK_BDEV_IO_STATUS_MISCOMPARE = -5,
+ /*
+ * NOMEM should be returned when a bdev module cannot start an I/O because of
+ * some lack of resources. It may not be returned for RESET I/O. I/O completed
+ * with NOMEM status will be retried after some I/O from the same channel have
+ * completed.
+ */
+ SPDK_BDEV_IO_STATUS_NOMEM = -4,
+ SPDK_BDEV_IO_STATUS_SCSI_ERROR = -3,
+ SPDK_BDEV_IO_STATUS_NVME_ERROR = -2,
+ SPDK_BDEV_IO_STATUS_FAILED = -1,
+ SPDK_BDEV_IO_STATUS_PENDING = 0,
+ SPDK_BDEV_IO_STATUS_SUCCESS = 1,
+};
+
+struct spdk_bdev_alias {
+ char *alias;
+ TAILQ_ENTRY(spdk_bdev_alias) tailq;
+};
+
+typedef TAILQ_HEAD(, spdk_bdev_io) bdev_io_tailq_t;
+typedef STAILQ_HEAD(, spdk_bdev_io) bdev_io_stailq_t;
+typedef TAILQ_HEAD(, lba_range) lba_range_tailq_t;
+
+struct spdk_bdev {
+ /** User context passed in by the backend */
+ void *ctxt;
+
+ /** Unique name for this block device. */
+ char *name;
+
+ /** Unique aliases for this block device. */
+ TAILQ_HEAD(spdk_bdev_aliases_list, spdk_bdev_alias) aliases;
+
+ /** Unique product name for this kind of block device. */
+ char *product_name;
+
+ /** write cache enabled, not used at the moment */
+ int write_cache;
+
+ /** Size in bytes of a logical block for the backend */
+ uint32_t blocklen;
+
+ /** Number of blocks */
+ uint64_t blockcnt;
+
+ /** Number of blocks required for write */
+ uint32_t write_unit_size;
+
+ /** Atomic compare & write unit */
+ uint16_t acwu;
+
+ /**
+ * Specifies an alignment requirement for data buffers associated with an spdk_bdev_io.
+ * 0 = no alignment requirement
+ * >0 = alignment requirement is 2 ^ required_alignment.
+ * bdev layer will automatically double buffer any spdk_bdev_io that violates this
+ * alignment, before the spdk_bdev_io is submitted to the bdev module.
+ */
+ uint8_t required_alignment;
+
+ /**
+ * Specifies whether the optimal_io_boundary is mandatory or
+ * only advisory. If set to true, the bdev layer will split
+ * READ and WRITE I/O that span the optimal_io_boundary before
+ * submitting them to the bdev module.
+ *
+ * Note that this field cannot be used to force splitting of
+ * UNMAP, WRITE_ZEROES or FLUSH I/O.
+ */
+ bool split_on_optimal_io_boundary;
+
+ /**
+ * Optimal I/O boundary in blocks, or 0 for no value reported.
+ */
+ uint32_t optimal_io_boundary;
+
+ /**
+ * UUID for this bdev.
+ *
+ * Fill with zeroes if no uuid is available. The bdev layer
+ * will automatically populate this if necessary.
+ */
+ struct spdk_uuid uuid;
+
+ /** Size in bytes of a metadata for the backend */
+ uint32_t md_len;
+
+ /**
+ * Specify metadata location and set to true if metadata is interleaved
+ * with block data or false if metadata is separated with block data.
+ *
+ * Note that this field is valid only if there is metadata.
+ */
+ bool md_interleave;
+
+ /**
+ * DIF type for this bdev.
+ *
+ * Note that this field is valid only if there is metadata.
+ */
+ enum spdk_dif_type dif_type;
+
+ /*
+ * DIF location.
+ *
+ * Set to true if DIF is set in the first 8 bytes of metadata or false
+ * if DIF is set in the last 8 bytes of metadata.
+ *
+ * Note that this field is valid only if DIF is enabled.
+ */
+ bool dif_is_head_of_md;
+
+ /**
+ * Specify whether each DIF check type is enabled.
+ */
+ uint32_t dif_check_flags;
+
+ /**
+ * Specify whether bdev is zoned device.
+ */
+ bool zoned;
+
+ /**
+ * Default size of each zone (in blocks).
+ */
+ uint64_t zone_size;
+
+ /**
+ * Maximum number of open zones.
+ */
+ uint32_t max_open_zones;
+
+ /**
+ * Optimal number of open zones.
+ */
+ uint32_t optimal_open_zones;
+
+ /**
+ * Specifies whether bdev supports media management events.
+ */
+ bool media_events;
+
+ /**
+ * Pointer to the bdev module that registered this bdev.
+ */
+ struct spdk_bdev_module *module;
+
+ /** function table for all LUN ops */
+ const struct spdk_bdev_fn_table *fn_table;
+
+ /** Fields that are used internally by the bdev subsystem. Bdev modules
+ * must not read or write to these fields.
+ */
+ struct __bdev_internal_fields {
+ /** Quality of service parameters */
+ struct spdk_bdev_qos *qos;
+
+ /** True if the state of the QoS is being modified */
+ bool qos_mod_in_progress;
+
+ /** Mutex protecting claimed */
+ pthread_mutex_t mutex;
+
+ /** The bdev status */
+ enum spdk_bdev_status status;
+
+ /**
+ * Pointer to the module that has claimed this bdev for purposes of creating virtual
+ * bdevs on top of it. Set to NULL if the bdev has not been claimed.
+ */
+ struct spdk_bdev_module *claim_module;
+
+ /** Callback function that will be called after bdev destruct is completed. */
+ spdk_bdev_unregister_cb unregister_cb;
+
+ /** Unregister call context */
+ void *unregister_ctx;
+
+ /** List of open descriptors for this block device. */
+ TAILQ_HEAD(, spdk_bdev_desc) open_descs;
+
+ TAILQ_ENTRY(spdk_bdev) link;
+
+ /** points to a reset bdev_io if one is in progress. */
+ struct spdk_bdev_io *reset_in_progress;
+
+ /** poller for tracking the queue_depth of a device, NULL if not tracking */
+ struct spdk_poller *qd_poller;
+
+ /** period at which we poll for queue depth information */
+ uint64_t period;
+
+ /** used to aggregate queue depth while iterating across the bdev's open channels */
+ uint64_t temporary_queue_depth;
+
+ /** queue depth as calculated the last time the telemetry poller checked. */
+ uint64_t measured_queue_depth;
+
+ /** most recent value of ticks spent performing I/O. Used to calculate the weighted time doing I/O */
+ uint64_t io_time;
+
+ /** weighted time performing I/O. Equal to measured_queue_depth * period */
+ uint64_t weighted_io_time;
+
+ /** accumulated I/O statistics for previously deleted channels of this bdev */
+ struct spdk_bdev_io_stat stat;
+
+ /** histogram enabled on this bdev */
+ bool histogram_enabled;
+ bool histogram_in_progress;
+
+ /** Currently locked ranges for this bdev. Used to populate new channels. */
+ lba_range_tailq_t locked_ranges;
+
+ /** Pending locked ranges for this bdev. These ranges are not currently
+ * locked due to overlapping with another locked range.
+ */
+ lba_range_tailq_t pending_locked_ranges;
+ } internal;
+};
+
+/**
+ * Callback when buffer is allocated for the bdev I/O.
+ *
+ * \param ch The I/O channel the bdev I/O was handled on.
+ * \param bdev_io The bdev I/O
+ * \param success True if buffer is allocated successfully or the bdev I/O has an SGL
+ * assigned already, or false if it failed. The possible reason of failure is the size
+ * of the buffer to allocate is greater than the permitted maximum.
+ */
+typedef void (*spdk_bdev_io_get_buf_cb)(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
+ bool success);
+
+/**
+ * Callback when an auxiliary buffer is allocated for the bdev I/O.
+ *
+ * \param ch The I/O channel the bdev I/O was handled on.
+ * \param bdev_io The bdev I/O
+ * \param aux_buf Pointer to the allocated buffer. NULL if there was a failuer such as
+ * the size of the buffer to allocate is greater than the permitted maximum.
+ */
+typedef void (*spdk_bdev_io_get_aux_buf_cb)(struct spdk_io_channel *ch,
+ struct spdk_bdev_io *bdev_io, void *aux_buf);
+
+#define BDEV_IO_NUM_CHILD_IOV 32
+
+struct spdk_bdev_io {
+ /** The block device that this I/O belongs to. */
+ struct spdk_bdev *bdev;
+
+ /** Enumerated value representing the I/O type. */
+ uint8_t type;
+
+ /** Number of IO submission retries */
+ uint16_t num_retries;
+
+ /** A single iovec element for use by this bdev_io. */
+ struct iovec iov;
+
+ /** Array of iovecs used for I/O splitting. */
+ struct iovec child_iov[BDEV_IO_NUM_CHILD_IOV];
+
+ union {
+ struct {
+ /** For SG buffer cases, array of iovecs to transfer. */
+ struct iovec *iovs;
+
+ /** For SG buffer cases, number of iovecs in iovec array. */
+ int iovcnt;
+
+ /** For fused operations such as COMPARE_AND_WRITE, array of iovecs
+ * for the second operation.
+ */
+ struct iovec *fused_iovs;
+
+ /** Number of iovecs in fused_iovs. */
+ int fused_iovcnt;
+
+ /* Metadata buffer */
+ void *md_buf;
+
+ /** Total size of data to be transferred. */
+ uint64_t num_blocks;
+
+ /** Starting offset (in blocks) of the bdev for this I/O. */
+ uint64_t offset_blocks;
+
+ /** stored user callback in case we split the I/O and use a temporary callback */
+ spdk_bdev_io_completion_cb stored_user_cb;
+
+ /** number of blocks remaining in a split i/o */
+ uint64_t split_remaining_num_blocks;
+
+ /** current offset of the split I/O in the bdev */
+ uint64_t split_current_offset_blocks;
+
+ /** count of outstanding batched split I/Os */
+ uint32_t split_outstanding;
+
+ struct {
+ /** Whether the buffer should be populated with the real data */
+ uint8_t populate : 1;
+
+ /** Whether the buffer should be committed back to disk */
+ uint8_t commit : 1;
+
+ /** True if this request is in the 'start' phase of zcopy. False if in 'end'. */
+ uint8_t start : 1;
+ } zcopy;
+
+ struct {
+ /** The callback argument for the outstanding request which this abort
+ * attempts to cancel.
+ */
+ void *bio_cb_arg;
+ } abort;
+ } bdev;
+ struct {
+ /** Channel reference held while messages for this reset are in progress. */
+ struct spdk_io_channel *ch_ref;
+ } reset;
+ struct {
+ /** The outstanding request matching bio_cb_arg which this abort attempts to cancel. */
+ struct spdk_bdev_io *bio_to_abort;
+ } abort;
+ struct {
+ /* The NVMe command to execute */
+ struct spdk_nvme_cmd cmd;
+
+ /* The data buffer to transfer */
+ void *buf;
+
+ /* The number of bytes to transfer */
+ size_t nbytes;
+
+ /* The meta data buffer to transfer */
+ void *md_buf;
+
+ /* meta data buffer size to transfer */
+ size_t md_len;
+ } nvme_passthru;
+ struct {
+ /* First logical block of a zone */
+ uint64_t zone_id;
+
+ /* Number of zones */
+ uint32_t num_zones;
+
+ /* Used to change zoned device zone state */
+ enum spdk_bdev_zone_action zone_action;
+
+ /* The data buffer */
+ void *buf;
+ } zone_mgmt;
+ } u;
+
+ /** It may be used by modules to put the bdev_io into its own list. */
+ TAILQ_ENTRY(spdk_bdev_io) module_link;
+
+ /**
+ * Fields that are used internally by the bdev subsystem. Bdev modules
+ * must not read or write to these fields.
+ */
+ struct __bdev_io_internal_fields {
+ /** The bdev I/O channel that this was handled on. */
+ struct spdk_bdev_channel *ch;
+
+ /** The bdev I/O channel that this was submitted on. */
+ struct spdk_bdev_channel *io_submit_ch;
+
+ /** The bdev descriptor that was used when submitting this I/O. */
+ struct spdk_bdev_desc *desc;
+
+ /** User function that will be called when this completes */
+ spdk_bdev_io_completion_cb cb;
+
+ /** Context that will be passed to the completion callback */
+ void *caller_ctx;
+
+ /** Current tsc at submit time. Used to calculate latency at completion. */
+ uint64_t submit_tsc;
+
+ /** Error information from a device */
+ union {
+ struct {
+ /** NVMe completion queue entry DW0 */
+ uint32_t cdw0;
+ /** NVMe status code type */
+ uint8_t sct;
+ /** NVMe status code */
+ uint8_t sc;
+ } nvme;
+ /** Only valid when status is SPDK_BDEV_IO_STATUS_SCSI_ERROR */
+ struct {
+ /** SCSI status code */
+ uint8_t sc;
+ /** SCSI sense key */
+ uint8_t sk;
+ /** SCSI additional sense code */
+ uint8_t asc;
+ /** SCSI additional sense code qualifier */
+ uint8_t ascq;
+ } scsi;
+ } error;
+
+ /**
+ * Set to true while the bdev module submit_request function is in progress.
+ *
+ * This is used to decide whether spdk_bdev_io_complete() can complete the I/O directly
+ * or if completion must be deferred via an event.
+ */
+ bool in_submit_request;
+
+ /** Status for the IO */
+ int8_t status;
+
+ /** bdev allocated memory associated with this request */
+ void *buf;
+
+ /** requested size of the buffer associated with this I/O */
+ uint64_t buf_len;
+
+ /** if the request is double buffered, store original request iovs here */
+ struct iovec bounce_iov;
+ struct iovec *orig_iovs;
+ int orig_iovcnt;
+ void *orig_md_buf;
+
+ /** Callback for when the aux buf is allocated */
+ spdk_bdev_io_get_aux_buf_cb get_aux_buf_cb;
+
+ /** Callback for when buf is allocated */
+ spdk_bdev_io_get_buf_cb get_buf_cb;
+
+ /** Member used for linking child I/Os together. */
+ TAILQ_ENTRY(spdk_bdev_io) link;
+
+ /** Entry to the list need_buf of struct spdk_bdev. */
+ STAILQ_ENTRY(spdk_bdev_io) buf_link;
+
+ /** Entry to the list io_submitted of struct spdk_bdev_channel */
+ TAILQ_ENTRY(spdk_bdev_io) ch_link;
+
+ /** Enables queuing parent I/O when no bdev_ios available for split children. */
+ struct spdk_bdev_io_wait_entry waitq_entry;
+ } internal;
+
+ /**
+ * Per I/O context for use by the bdev module.
+ */
+ uint8_t driver_ctx[0];
+
+ /* No members may be added after driver_ctx! */
+};
+
+/**
+ * Register a new bdev.
+ *
+ * \param bdev Block device to register.
+ *
+ * \return 0 on success.
+ * \return -EINVAL if the bdev name is NULL.
+ * \return -EEXIST if a bdev or bdev alias with the same name already exists.
+ */
+int spdk_bdev_register(struct spdk_bdev *bdev);
+
+/**
+ * Start unregistering a bdev. This will notify each currently open descriptor
+ * on this bdev about the hotremoval in hopes that the upper layers will stop
+ * using this bdev and manually close all the descriptors with spdk_bdev_close().
+ * The actual bdev unregistration may be deferred until all descriptors are closed.
+ *
+ * \param bdev Block device to unregister.
+ * \param cb_fn Callback function to be called when the unregister is complete.
+ * \param cb_arg Argument to be supplied to cb_fn
+ */
+void spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg);
+
+/**
+ * Invokes the unregister callback of a bdev backing a virtual bdev.
+ *
+ * A Bdev with an asynchronous destruct path should return 1 from its
+ * destruct function and call this function at the conclusion of that path.
+ * Bdevs with synchronous destruct paths should return 0 from their destruct
+ * path.
+ *
+ * \param bdev Block device that was destroyed.
+ * \param bdeverrno Error code returned from bdev's destruct callback.
+ */
+void spdk_bdev_destruct_done(struct spdk_bdev *bdev, int bdeverrno);
+
+/**
+ * Register a virtual bdev.
+ *
+ * This function is deprecated. Users should call spdk_bdev_register instead.
+ * The bdev layer currently makes no use of the base_bdevs array, so switching
+ * to spdk_bdev_register results in no loss of functionality.
+ *
+ * \param vbdev Virtual bdev to register.
+ * \param base_bdevs Array of bdevs upon which this vbdev is based.
+ * \param base_bdev_count Number of bdevs in base_bdevs.
+ *
+ * \return 0 on success
+ * \return -EINVAL if the bdev name is NULL.
+ * \return -EEXIST if the bdev already exists.
+ * \return -ENOMEM if allocation of the base_bdevs array or the base bdevs vbdevs array fails.
+ */
+int spdk_vbdev_register(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs,
+ int base_bdev_count);
+
+/**
+ * Indicate to the bdev layer that the module is done examining a bdev.
+ *
+ * To be called synchronously or asynchronously in response to the
+ * module's examine function being called.
+ *
+ * \param module Pointer to the module completing the examination.
+ */
+void spdk_bdev_module_examine_done(struct spdk_bdev_module *module);
+
+/**
+ * Indicate to the bdev layer that the module is done initializing.
+ *
+ * To be called synchronously or asynchronously in response to the
+ * module_init function being called.
+ *
+ * \param module Pointer to the module completing the initialization.
+ */
+void spdk_bdev_module_init_done(struct spdk_bdev_module *module);
+
+/**
+ * Indicate to the bdev layer that the module is done cleaning up.
+ *
+ * To be called either synchronously or asynchronously
+ * in response to the module_fini function being called.
+ *
+ */
+void spdk_bdev_module_finish_done(void);
+
+/**
+ * Called by a bdev module to lay exclusive write claim to a bdev.
+ *
+ * Also upgrades that bdev's descriptor to have write access.
+ *
+ * \param bdev Block device to be claimed.
+ * \param desc Descriptor for the above block device.
+ * \param module Bdev module attempting to claim bdev.
+ *
+ * \return 0 on success
+ * \return -EPERM if the bdev is already claimed by another module.
+ */
+int spdk_bdev_module_claim_bdev(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_bdev_module *module);
+
+/**
+ * Called to release a write claim on a block device.
+ *
+ * \param bdev Block device to be released.
+ */
+void spdk_bdev_module_release_bdev(struct spdk_bdev *bdev);
+
+/**
+ * Add alias to block device names list.
+ * Aliases can be add only to registered bdev.
+ *
+ * \param bdev Block device to query.
+ * \param alias Alias to be added to list.
+ *
+ * \return 0 on success
+ * \return -EEXIST if alias already exists as name or alias on any bdev
+ * \return -ENOMEM if memory cannot be allocated to store alias
+ * \return -EINVAL if passed alias is empty
+ */
+int spdk_bdev_alias_add(struct spdk_bdev *bdev, const char *alias);
+
+/**
+ * Removes name from block device names list.
+ *
+ * \param bdev Block device to query.
+ * \param alias Alias to be deleted from list.
+ * \return 0 on success
+ * \return -ENOENT if alias does not exists
+ */
+int spdk_bdev_alias_del(struct spdk_bdev *bdev, const char *alias);
+
+/**
+ * Removes all alias from block device alias list.
+ *
+ * \param bdev Block device to operate.
+ */
+void spdk_bdev_alias_del_all(struct spdk_bdev *bdev);
+
+/**
+ * Get pointer to block device aliases list.
+ *
+ * \param bdev Block device to query.
+ * \return Pointer to bdev aliases list.
+ */
+const struct spdk_bdev_aliases_list *spdk_bdev_get_aliases(const struct spdk_bdev *bdev);
+
+/**
+ * Allocate a buffer for given bdev_io. Allocation will happen
+ * only if the bdev_io has no assigned SGL yet or SGL is not
+ * aligned to \c bdev->required_alignment. If SGL is not aligned,
+ * this call will cause copy from SGL to bounce buffer on write
+ * path or copy from bounce buffer to SGL before completion
+ * callback on read path. The buffer will be freed automatically
+ * on \c spdk_bdev_free_io() call. This call will never fail.
+ * In case of lack of memory given callback \c cb will be deferred
+ * until enough memory is freed.
+ *
+ * \param bdev_io I/O to allocate buffer for.
+ * \param cb callback to be called when the buffer is allocated
+ * or the bdev_io has an SGL assigned already.
+ * \param len size of the buffer to allocate. In case the bdev_io
+ * doesn't have an SGL assigned this field must be no bigger than
+ * \c SPDK_BDEV_LARGE_BUF_MAX_SIZE.
+ */
+void spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, uint64_t len);
+
+/**
+ * Allocate an auxillary buffer for given bdev_io. The length of the
+ * buffer will be the same size as the bdev_io primary buffer. The buffer
+ * must be freed using \c spdk_bdev_io_put_aux_buf() before completing
+ * the associated bdev_io. This call will never fail. In case of lack of
+ * memory given callback \c cb will be deferred until enough memory is freed.
+ *
+ * \param bdev_io I/O to allocate buffer for.
+ * \param cb callback to be called when the buffer is allocated
+ */
+void spdk_bdev_io_get_aux_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_aux_buf_cb cb);
+
+/**
+ * Free an auxiliary buffer previously allocated by \c spdk_bdev_io_get_aux_buf().
+ *
+ * \param bdev_io bdev_io specified when the aux_buf was allocated.
+ * \param aux_buf auxiliary buffer to free
+ */
+void spdk_bdev_io_put_aux_buf(struct spdk_bdev_io *bdev_io, void *aux_buf);
+
+/**
+ * Set the given buffer as the data buffer described by this bdev_io.
+ *
+ * The portion of the buffer used may be adjusted for memory alignement
+ * purposes.
+ *
+ * \param bdev_io I/O to set the buffer on.
+ * \param buf The buffer to set as the active data buffer.
+ * \param len The length of the buffer.
+ *
+ */
+void spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len);
+
+/**
+ * Set the given buffer as metadata buffer described by this bdev_io.
+ *
+ * \param bdev_io I/O to set the buffer on.
+ * \param md_buf The buffer to set as the active metadata buffer.
+ * \param len The length of the metadata buffer.
+ */
+void spdk_bdev_io_set_md_buf(struct spdk_bdev_io *bdev_io, void *md_buf, size_t len);
+
+/**
+ * Complete a bdev_io
+ *
+ * \param bdev_io I/O to complete.
+ * \param status The I/O completion status.
+ */
+void spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io,
+ enum spdk_bdev_io_status status);
+
+/**
+ * Complete a bdev_io with an NVMe status code and DW0 completion queue entry
+ *
+ * \param bdev_io I/O to complete.
+ * \param cdw0 NVMe Completion Queue DW0 value (set to 0 if not applicable)
+ * \param sct NVMe Status Code Type.
+ * \param sc NVMe Status Code.
+ */
+void spdk_bdev_io_complete_nvme_status(struct spdk_bdev_io *bdev_io, uint32_t cdw0, int sct,
+ int sc);
+
+/**
+ * Complete a bdev_io with a SCSI status code.
+ *
+ * \param bdev_io I/O to complete.
+ * \param sc SCSI Status Code.
+ * \param sk SCSI Sense Key.
+ * \param asc SCSI Additional Sense Code.
+ * \param ascq SCSI Additional Sense Code Qualifier.
+ */
+void spdk_bdev_io_complete_scsi_status(struct spdk_bdev_io *bdev_io, enum spdk_scsi_status sc,
+ enum spdk_scsi_sense sk, uint8_t asc, uint8_t ascq);
+
+/**
+ * Get a thread that given bdev_io was submitted on.
+ *
+ * \param bdev_io I/O
+ * \return thread that submitted the I/O
+ */
+struct spdk_thread *spdk_bdev_io_get_thread(struct spdk_bdev_io *bdev_io);
+
+/**
+ * Get the bdev module's I/O channel that the given bdev_io was submitted on.
+ *
+ * \param bdev_io I/O
+ * \return the bdev module's I/O channel that the given bdev_io was submitted on.
+ */
+struct spdk_io_channel *spdk_bdev_io_get_io_channel(struct spdk_bdev_io *bdev_io);
+
+/**
+ * Resize for a bdev.
+ *
+ * Change number of blocks for provided block device.
+ * It can only be called on a registered bdev.
+ *
+ * \param bdev Block device to change.
+ * \param size New size of bdev.
+ * \return 0 on success, negated errno on failure.
+ */
+int spdk_bdev_notify_blockcnt_change(struct spdk_bdev *bdev, uint64_t size);
+
+/**
+ * Translates NVMe status codes to SCSI status information.
+ *
+ * The codes are stored in the user supplied integers.
+ *
+ * \param bdev_io I/O containing status codes to translate.
+ * \param sc SCSI Status Code will be stored here.
+ * \param sk SCSI Sense Key will be stored here.
+ * \param asc SCSI Additional Sense Code will be stored here.
+ * \param ascq SCSI Additional Sense Code Qualifier will be stored here.
+ */
+void spdk_scsi_nvme_translate(const struct spdk_bdev_io *bdev_io,
+ int *sc, int *sk, int *asc, int *ascq);
+
+/**
+ * Add the given module to the list of registered modules.
+ * This function should be invoked by referencing the macro
+ * SPDK_BDEV_MODULE_REGISTER in the module c file.
+ *
+ * \param bdev_module Module to be added.
+ */
+void spdk_bdev_module_list_add(struct spdk_bdev_module *bdev_module);
+
+/**
+ * Find registered module with name pointed by \c name.
+ *
+ * \param name name of module to be searched for.
+ * \return pointer to module or NULL if no module with \c name exist
+ */
+struct spdk_bdev_module *spdk_bdev_module_list_find(const char *name);
+
+static inline struct spdk_bdev_io *
+spdk_bdev_io_from_ctx(void *ctx)
+{
+ return SPDK_CONTAINEROF(ctx, struct spdk_bdev_io, driver_ctx);
+}
+
+struct spdk_bdev_part_base;
+
+/**
+ * Returns a pointer to the spdk_bdev associated with an spdk_bdev_part_base
+ *
+ * \param part_base A pointer to an spdk_bdev_part_base object.
+ *
+ * \return A pointer to the base's spdk_bdev struct.
+ */
+struct spdk_bdev *spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base *part_base);
+
+/**
+ * Returns a spdk_bdev name of the corresponding spdk_bdev_part_base
+ *
+ * \param part_base A pointer to an spdk_bdev_part_base object.
+ *
+ * \return A text string representing the name of the base bdev.
+ */
+const char *spdk_bdev_part_base_get_bdev_name(struct spdk_bdev_part_base *part_base);
+
+/**
+ * Returns a pointer to the spdk_bdev_descriptor associated with an spdk_bdev_part_base
+ *
+ * \param part_base A pointer to an spdk_bdev_part_base object.
+ *
+ * \return A pointer to the base's spdk_bdev_desc struct.
+ */
+struct spdk_bdev_desc *spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base *part_base);
+
+/**
+ * Returns a pointer to the tailq associated with an spdk_bdev_part_base
+ *
+ * \param part_base A pointer to an spdk_bdev_part_base object.
+ *
+ * \return The head of a tailq of spdk_bdev_part structs registered to the base's module.
+ */
+struct bdev_part_tailq *spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base *part_base);
+
+/**
+ * Returns a pointer to the module level context associated with an spdk_bdev_part_base
+ *
+ * \param part_base A pointer to an spdk_bdev_part_base object.
+ *
+ * \return A pointer to the module level context registered with the base in spdk_bdev_part_base_construct.
+ */
+void *spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base *part_base);
+
+typedef void (*spdk_bdev_part_base_free_fn)(void *ctx);
+
+struct spdk_bdev_part {
+ /* Entry into the module's global list of bdev parts */
+ TAILQ_ENTRY(spdk_bdev_part) tailq;
+
+ /**
+ * Fields that are used internally by part.c These fields should only
+ * be accessed from a module using any pertinent get and set methods.
+ */
+ struct bdev_part_internal_fields {
+
+ /* This part's corresponding bdev object. Not to be confused with the base bdev */
+ struct spdk_bdev bdev;
+
+ /* The base to which this part belongs */
+ struct spdk_bdev_part_base *base;
+
+ /* number of blocks from the start of the base bdev to the start of this part */
+ uint64_t offset_blocks;
+ } internal;
+};
+
+struct spdk_bdev_part_channel {
+ struct spdk_bdev_part *part;
+ struct spdk_io_channel *base_ch;
+};
+
+typedef TAILQ_HEAD(bdev_part_tailq, spdk_bdev_part) SPDK_BDEV_PART_TAILQ;
+
+/**
+ * Free the base corresponding to one or more spdk_bdev_part.
+ *
+ * \param base The base to free.
+ */
+void spdk_bdev_part_base_free(struct spdk_bdev_part_base *base);
+
+/**
+ * Free an spdk_bdev_part context.
+ *
+ * \param part The part to free.
+ *
+ * \return 1 always. To indicate that the operation is asynchronous.
+ */
+int spdk_bdev_part_free(struct spdk_bdev_part *part);
+
+/**
+ * Calls spdk_bdev_unregister on the bdev for each part associated with base_bdev.
+ *
+ * \param part_base The part base object built on top of an spdk_bdev
+ * \param tailq The list of spdk_bdev_part bdevs associated with this base bdev.
+ */
+void spdk_bdev_part_base_hotremove(struct spdk_bdev_part_base *part_base,
+ struct bdev_part_tailq *tailq);
+
+/**
+ * Construct a new spdk_bdev_part_base on top of the provided bdev.
+ *
+ * \param bdev The spdk_bdev upon which this base will be built.
+ * \param remove_cb Function to be called upon hotremove of the bdev.
+ * \param module The module to which this bdev base belongs.
+ * \param fn_table Function table for communicating with the bdev backend.
+ * \param tailq The head of the list of all spdk_bdev_part structures registered to this base's module.
+ * \param free_fn User provided function to free base related context upon bdev removal or shutdown.
+ * \param ctx Module specific context for this bdev part base.
+ * \param channel_size Channel size in bytes.
+ * \param ch_create_cb Called after a new channel is allocated.
+ * \param ch_destroy_cb Called upon channel deletion.
+ *
+ * \return 0 on success
+ * \return -1 if the underlying bdev cannot be opened.
+ */
+struct spdk_bdev_part_base *spdk_bdev_part_base_construct(struct spdk_bdev *bdev,
+ spdk_bdev_remove_cb_t remove_cb,
+ struct spdk_bdev_module *module,
+ struct spdk_bdev_fn_table *fn_table,
+ struct bdev_part_tailq *tailq,
+ spdk_bdev_part_base_free_fn free_fn,
+ void *ctx,
+ uint32_t channel_size,
+ spdk_io_channel_create_cb ch_create_cb,
+ spdk_io_channel_destroy_cb ch_destroy_cb);
+
+/**
+ * Create a logical spdk_bdev_part on top of a base.
+ *
+ * \param part The part object allocated by the user.
+ * \param base The base from which to create the part.
+ * \param name The name of the new spdk_bdev_part.
+ * \param offset_blocks The offset into the base bdev at which this part begins.
+ * \param num_blocks The number of blocks that this part will span.
+ * \param product_name Unique name for this type of block device.
+ *
+ * \return 0 on success.
+ * \return -1 if the bases underlying bdev cannot be claimed by the current module.
+ */
+int spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base,
+ char *name, uint64_t offset_blocks, uint64_t num_blocks,
+ char *product_name);
+
+/**
+ * Forwards I/O from an spdk_bdev_part to the underlying base bdev.
+ *
+ * This function will apply the offset_blocks the user provided to
+ * spdk_bdev_part_construct to the I/O. The user should not manually
+ * apply this offset before submitting any I/O through this function.
+ *
+ * \param ch The I/O channel associated with the spdk_bdev_part.
+ * \param bdev_io The I/O to be submitted to the underlying bdev.
+ * \return 0 on success or non-zero if submit request failed.
+ */
+int spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io);
+
+/**
+ * Return a pointer to this part's spdk_bdev.
+ *
+ * \param part An spdk_bdev_part object.
+ *
+ * \return A pointer to this part's spdk_bdev object.
+ */
+struct spdk_bdev *spdk_bdev_part_get_bdev(struct spdk_bdev_part *part);
+
+/**
+ * Return a pointer to this part's base.
+ *
+ * \param part An spdk_bdev_part object.
+ *
+ * \return A pointer to this part's spdk_bdev_part_base object.
+ */
+struct spdk_bdev_part_base *spdk_bdev_part_get_base(struct spdk_bdev_part *part);
+
+/**
+ * Return a pointer to this part's base bdev.
+ *
+ * The return value of this function is equivalent to calling
+ * spdk_bdev_part_base_get_bdev on this part's base.
+ *
+ * \param part An spdk_bdev_part object.
+ *
+ * \return A pointer to the bdev belonging to this part's base.
+ */
+struct spdk_bdev *spdk_bdev_part_get_base_bdev(struct spdk_bdev_part *part);
+
+/**
+ * Return this part's offset from the beginning of the base bdev.
+ *
+ * This function should not be called in the I/O path. Any block
+ * translations to I/O will be handled in spdk_bdev_part_submit_request.
+ *
+ * \param part An spdk_bdev_part object.
+ *
+ * \return the block offset of this part from it's underlying bdev.
+ */
+uint64_t spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part *part);
+
+/**
+ * Push media management events. To send the notification that new events are
+ * available, spdk_bdev_notify_media_management needs to be called.
+ *
+ * \param bdev Block device
+ * \param events Array of media events
+ * \param num_events Size of the events array
+ *
+ * \return number of events pushed or negative errno in case of failure
+ */
+int spdk_bdev_push_media_events(struct spdk_bdev *bdev, const struct spdk_bdev_media_event *events,
+ size_t num_events);
+
+/**
+ * Send SPDK_BDEV_EVENT_MEDIA_MANAGEMENT to all open descriptors that have
+ * pending media events.
+ *
+ * \param bdev Block device
+ */
+void spdk_bdev_notify_media_management(struct spdk_bdev *bdev);
+
+/*
+ * Macro used to register module for later initialization.
+ */
+#define SPDK_BDEV_MODULE_REGISTER(name, module) \
+static void __attribute__((constructor)) _spdk_bdev_module_register_##name(void) \
+{ \
+ spdk_bdev_module_list_add(module); \
+} \
+
+#endif /* SPDK_BDEV_MODULE_H */
diff --git a/src/spdk/include/spdk/bdev_zone.h b/src/spdk/include/spdk/bdev_zone.h
new file mode 100644
index 000000000..9306256b8
--- /dev/null
+++ b/src/spdk/include/spdk/bdev_zone.h
@@ -0,0 +1,259 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Zoned device public interface
+ */
+
+#ifndef SPDK_BDEV_ZONE_H
+#define SPDK_BDEV_ZONE_H
+
+#include "spdk/stdinc.h"
+#include "spdk/bdev.h"
+
+/**
+ * \brief SPDK block device.
+ *
+ * This is a virtual representation of a block device that is exported by the backend.
+ */
+
+struct spdk_bdev;
+
+enum spdk_bdev_zone_action {
+ SPDK_BDEV_ZONE_CLOSE,
+ SPDK_BDEV_ZONE_FINISH,
+ SPDK_BDEV_ZONE_OPEN,
+ SPDK_BDEV_ZONE_RESET
+};
+
+enum spdk_bdev_zone_state {
+ SPDK_BDEV_ZONE_STATE_EMPTY,
+ SPDK_BDEV_ZONE_STATE_OPEN,
+ SPDK_BDEV_ZONE_STATE_FULL,
+ SPDK_BDEV_ZONE_STATE_CLOSED,
+ SPDK_BDEV_ZONE_STATE_READ_ONLY,
+ SPDK_BDEV_ZONE_STATE_OFFLINE
+};
+
+struct spdk_bdev_zone_info {
+ uint64_t zone_id;
+ uint64_t write_pointer;
+ uint64_t capacity;
+ enum spdk_bdev_zone_state state;
+};
+
+/**
+ * Get device zone size in logical blocks.
+ *
+ * \param bdev Block device to query.
+ * \return Size of zone for this zoned device in logical blocks.
+ */
+uint64_t spdk_bdev_get_zone_size(const struct spdk_bdev *bdev);
+
+/**
+ * Get device maximum number of open zones.
+ *
+ * If this value is 0, there is no limit.
+ *
+ * \param bdev Block device to query.
+ * \return Maximum number of open zones for this zoned device.
+ */
+uint32_t spdk_bdev_get_max_open_zones(const struct spdk_bdev *bdev);
+
+/**
+ * Get device optimal number of open zones.
+ *
+ * \param bdev Block device to query.
+ * \return Optimal number of open zones for this zoned device.
+ */
+uint32_t spdk_bdev_get_optimal_open_zones(const struct spdk_bdev *bdev);
+
+/**
+ * Submit a get_zone_info request to the bdev.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param zone_id First logical block of a zone.
+ * \param num_zones Number of consecutive zones info to retrieve.
+ * \param info Pointer to array capable of storing num_zones elements.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ */
+int spdk_bdev_get_zone_info(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ uint64_t zone_id, size_t num_zones, struct spdk_bdev_zone_info *info,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+
+/**
+ * Submit a zone_management request to the bdev.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param zone_id First logical block of a zone.
+ * \param action Action to perform on a zone (open, close, reset, finish).
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ */
+int spdk_bdev_zone_management(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ uint64_t zone_id, enum spdk_bdev_zone_action action,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a zone_append request to the bdev.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param buf Data buffer to written from.
+ * \param zone_id First logical block of a zone.
+ * \param num_blocks The number of blocks to write. buf must be greater than or equal to this size.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed).
+ * Appended logical block address can be obtained with spdk_bdev_io_get_append_location().
+ * Return negated errno on failure, in which case the callback will not be called.
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ */
+int spdk_bdev_zone_append(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ void *buf, uint64_t zone_id, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a zone_append request to the bdev. This differs from
+ * spdk_bdev_zone_append by allowing the data buffer to be described in a scatter
+ * gather list.
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param iov A scatter gather list of buffers to be written from.
+ * \param iovcnt The number of elements in iov.
+ * \param zone_id First logical block of a zone.
+ * \param num_blocks The number of blocks to write. buf must be greater than or equal to this size.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed).
+ * Appended logical block address can be obtained with spdk_bdev_io_get_append_location().
+ * Return negated errno on failure, in which case the callback will not be called.
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ */
+int spdk_bdev_zone_appendv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ struct iovec *iov, int iovcnt, uint64_t zone_id, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a zone_append request with metadata to the bdev.
+ *
+ * This function uses separate buffer for metadata transfer (valid only if bdev supports this
+ * mode).
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param buf Data buffer to written from.
+ * \param md Metadata buffer.
+ * \param zone_id First logical block of a zone.
+ * \param num_blocks The number of blocks to write. buf must be greater than or equal to this size.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed).
+ * Appended logical block address can be obtained with spdk_bdev_io_get_append_location().
+ * Return negated errno on failure, in which case the callback will not be called.
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ */
+int spdk_bdev_zone_append_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ void *buf, void *md, uint64_t zone_id, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+/**
+ * Submit a zone_append request with metadata to the bdev. This differs from
+ * spdk_bdev_zone_append by allowing the data buffer to be described in a scatter
+ * gather list.
+ *
+ * This function uses separate buffer for metadata transfer (valid only if bdev supports this
+ * mode).
+ *
+ * \ingroup bdev_io_submit_functions
+ *
+ * \param desc Block device descriptor.
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param iov A scatter gather list of buffers to be written from.
+ * \param iovcnt The number of elements in iov.
+ * \param md Metadata buffer.
+ * \param zone_id First logical block of a zone.
+ * \param num_blocks The number of blocks to write. buf must be greater than or equal to this size.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed).
+ * Appended logical block address can be obtained with spdk_bdev_io_get_append_location().
+ * Return negated errno on failure, in which case the callback will not be called.
+ * * -ENOMEM - spdk_bdev_io buffer cannot be allocated
+ */
+int spdk_bdev_zone_appendv_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ struct iovec *iov, int iovcnt, void *md, uint64_t zone_id,
+ uint64_t num_blocks, spdk_bdev_io_completion_cb cb,
+ void *cb_arg);
+
+/**
+ * Get append location (offset in blocks of the bdev) for this I/O.
+ *
+ * \param bdev_io I/O to get append location from.
+ */
+uint64_t spdk_bdev_io_get_append_location(struct spdk_bdev_io *bdev_io);
+
+#endif /* SPDK_BDEV_ZONE_H */
diff --git a/src/spdk/include/spdk/bit_array.h b/src/spdk/include/spdk/bit_array.h
new file mode 100644
index 000000000..3019f9f17
--- /dev/null
+++ b/src/spdk/include/spdk/bit_array.h
@@ -0,0 +1,203 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Bit array data structure
+ */
+
+#ifndef SPDK_BIT_ARRAY_H
+#define SPDK_BIT_ARRAY_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Variable-length bit array.
+ */
+struct spdk_bit_array;
+
+/**
+ * Return the number of bits that a bit array is currently sized to hold.
+ *
+ * \param ba Bit array to query.
+ *
+ * \return the number of bits.
+ */
+uint32_t spdk_bit_array_capacity(const struct spdk_bit_array *ba);
+
+/**
+ * Create a bit array.
+ *
+ * \param num_bits Number of bits that the bit array is sized to hold.
+ *
+ * All bits in the array will be cleared.
+ *
+ * \return a pointer to the new bit array.
+ */
+struct spdk_bit_array *spdk_bit_array_create(uint32_t num_bits);
+
+/**
+ * Free a bit array and set the pointer to NULL.
+ *
+ * \param bap Bit array to free.
+ */
+void spdk_bit_array_free(struct spdk_bit_array **bap);
+
+/**
+ * Create or resize a bit array.
+ *
+ * To create a new bit array, pass a pointer to a spdk_bit_array pointer that is
+ * NULL for bap.
+ *
+ * The bit array will be sized to hold at least num_bits.
+ *
+ * If num_bits is smaller than the previous size of the bit array,
+ * any data beyond the new num_bits size will be cleared.
+ *
+ * If num_bits is larger than the previous size of the bit array,
+ * any data beyond the old num_bits size will be cleared.
+ *
+ * \param bap Bit array to create/resize.
+ * \param num_bits Number of bits that the bit array is sized to hold.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_bit_array_resize(struct spdk_bit_array **bap, uint32_t num_bits);
+
+/**
+ * Get the value of a bit from the bit array.
+ *
+ * If bit_index is beyond the end of the current size of the bit array, this
+ * function will return false (i.e. bits beyond the end of the array are implicitly 0).
+ *
+ * \param ba Bit array to query.
+ * \param bit_index The index of a bit to query.
+ *
+ * \return the value of a bit from the bit array on success, or false on failure.
+ */
+bool spdk_bit_array_get(const struct spdk_bit_array *ba, uint32_t bit_index);
+
+/**
+ * Set (to 1) a bit in the bit array.
+ *
+ * If bit_index is beyond the end of the bit array, this function will return -EINVAL.
+ *
+ * \param ba Bit array to set a bit.
+ * \param bit_index The index of a bit to set.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_bit_array_set(struct spdk_bit_array *ba, uint32_t bit_index);
+
+/**
+ * Clear (to 0) a bit in the bit array.
+ *
+ * If bit_index is beyond the end of the bit array, no action is taken. Bits
+ * beyond the end of the bit array are implicitly 0.
+ *
+ * \param ba Bit array to clear a bit.
+ * \param bit_index The index of a bit to clear.
+ */
+void spdk_bit_array_clear(struct spdk_bit_array *ba, uint32_t bit_index);
+
+/**
+ * Find the index of the first set bit in the array.
+ *
+ * \param ba The bit array to search.
+ * \param start_bit_index The bit index from which to start searching (0 to start
+ * from the beginning of the array).
+ *
+ * \return the index of the first set bit. If no bits are set, returns UINT32_MAX.
+ */
+uint32_t spdk_bit_array_find_first_set(const struct spdk_bit_array *ba, uint32_t start_bit_index);
+
+/**
+ * Find the index of the first cleared bit in the array.
+ *
+ * \param ba The bit array to search.
+ * \param start_bit_index The bit index from which to start searching (0 to start
+ * from the beginning of the array).
+ *
+ * \return the index of the first cleared bit. If no bits are cleared, returns UINT32_MAX.
+ */
+uint32_t spdk_bit_array_find_first_clear(const struct spdk_bit_array *ba, uint32_t start_bit_index);
+
+/**
+ * Count the number of set bits in the array.
+ *
+ * \param ba The bit array to search.
+ *
+ * \return the number of bits set in the array.
+ */
+uint32_t spdk_bit_array_count_set(const struct spdk_bit_array *ba);
+
+/**
+ * Count the number of cleared bits in the array.
+ *
+ * \param ba The bit array to search.
+ *
+ * \return the number of bits cleared in the array.
+ */
+uint32_t spdk_bit_array_count_clear(const struct spdk_bit_array *ba);
+
+/**
+ * Store bitmask from bit array.
+ *
+ * \param ba Bit array.
+ * \param mask Destination mask. Mask and bit array capacity must be equal.
+ */
+void spdk_bit_array_store_mask(const struct spdk_bit_array *ba, void *mask);
+
+/**
+ * Load bitmask to bit array.
+ *
+ * \param ba Bit array.
+ * \param mask Source mask. Mask and bit array capacity must be equal.
+ */
+void spdk_bit_array_load_mask(struct spdk_bit_array *ba, const void *mask);
+
+/**
+ * Clear (to 0) bit array bitmask.
+ *
+ * \param ba Bit array.
+ */
+void spdk_bit_array_clear_mask(struct spdk_bit_array *ba);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/blob.h b/src/spdk/include/spdk/blob.h
new file mode 100644
index 000000000..fbc2728ee
--- /dev/null
+++ b/src/spdk/include/spdk/blob.h
@@ -0,0 +1,897 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Blob Storage System
+ *
+ * The blob storage system, or the blobstore for short, is a low level
+ * library for placing opaque blobs of data onto a storage device such
+ * that scattered physical blocks on the storage device appear as a
+ * single, contiguous storage region. These blobs are also persistent,
+ * which means they are rediscoverable after reboot or power loss.
+ *
+ * The blobstore is designed to be very high performance, and thus has
+ * a few general rules regarding thread safety to avoid taking locks
+ * in the I/O path. This is primarily done by only allowing most
+ * functions to be called on the metadata thread. The metadata thread is
+ * the thread which called spdk_bs_init() or spdk_bs_load().
+ *
+ * Functions starting with the prefix "spdk_blob_io" are passed a channel
+ * as an argument, and channels may only be used from the thread they were
+ * created on. See \ref spdk_bs_alloc_io_channel. These are the only
+ * functions that may be called from a thread other than the metadata
+ * thread.
+ *
+ * The blobstore returns errors using negated POSIX errno values, either
+ * returned in the callback or as a return value. An errno value of 0 means
+ * success.
+ */
+
+#ifndef SPDK_BLOB_H
+#define SPDK_BLOB_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef uint64_t spdk_blob_id;
+#define SPDK_BLOBID_INVALID (uint64_t)-1
+#define SPDK_BLOBSTORE_TYPE_LENGTH 16
+
+enum blob_clear_method {
+ BLOB_CLEAR_WITH_DEFAULT,
+ BLOB_CLEAR_WITH_NONE,
+ BLOB_CLEAR_WITH_UNMAP,
+ BLOB_CLEAR_WITH_WRITE_ZEROES,
+};
+
+enum bs_clear_method {
+ BS_CLEAR_WITH_UNMAP,
+ BS_CLEAR_WITH_WRITE_ZEROES,
+ BS_CLEAR_WITH_NONE,
+};
+
+struct spdk_blob_store;
+struct spdk_io_channel;
+struct spdk_blob;
+struct spdk_xattr_names;
+
+/**
+ * Blobstore operation completion callback.
+ *
+ * \param cb_arg Callback argument.
+ * \param bserrno 0 if it completed successfully, or negative errno if it failed.
+ */
+typedef void (*spdk_bs_op_complete)(void *cb_arg, int bserrno);
+
+/**
+ * Blobstore operation completion callback with handle.
+ *
+ * \param cb_arg Callback argument.
+ * \param bs Handle to a blobstore.
+ * \param bserrno 0 if it completed successfully, or negative errno if it failed.
+ */
+typedef void (*spdk_bs_op_with_handle_complete)(void *cb_arg, struct spdk_blob_store *bs,
+ int bserrno);
+
+/**
+ * Blob operation completion callback.
+ *
+ * \param cb_arg Callback argument.
+ * \param bserrno 0 if it completed successfully, or negative errno if it failed.
+ */
+typedef void (*spdk_blob_op_complete)(void *cb_arg, int bserrno);
+
+/**
+ * Blob operation completion callback with blob ID.
+ *
+ * \param cb_arg Callback argument.
+ * \param blobid Blob ID.
+ * \param bserrno 0 if it completed successfully, or negative errno if it failed.
+ */
+typedef void (*spdk_blob_op_with_id_complete)(void *cb_arg, spdk_blob_id blobid, int bserrno);
+
+/**
+ * Blob operation completion callback with handle.
+ *
+ * \param cb_arg Callback argument.
+ * \param bs Handle to a blob.
+ * \param bserrno 0 if it completed successfully, or negative errno if it failed.
+ */
+typedef void (*spdk_blob_op_with_handle_complete)(void *cb_arg, struct spdk_blob *blb, int bserrno);
+
+/**
+ * Blobstore device completion callback.
+ *
+ * \param channel I/O channel the operation was initiated on.
+ * \param cb_arg Callback argument.
+ * \param bserrno 0 if it completed successfully, or negative errno if it failed.
+ */
+typedef void (*spdk_bs_dev_cpl)(struct spdk_io_channel *channel,
+ void *cb_arg, int bserrno);
+
+struct spdk_bs_dev_cb_args {
+ spdk_bs_dev_cpl cb_fn;
+ struct spdk_io_channel *channel;
+ void *cb_arg;
+};
+
+struct spdk_bs_dev {
+ /* Create a new channel which is a software construct that is used
+ * to submit I/O. */
+ struct spdk_io_channel *(*create_channel)(struct spdk_bs_dev *dev);
+
+ /* Destroy a previously created channel */
+ void (*destroy_channel)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel);
+
+ /* Destroy this blobstore device. Applications must not destroy the blobstore device,
+ * rather the blobstore will destroy it using this function pointer once all
+ * references to it during unload callback context have been completed.
+ */
+ void (*destroy)(struct spdk_bs_dev *dev);
+
+ void (*read)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload,
+ uint64_t lba, uint32_t lba_count,
+ struct spdk_bs_dev_cb_args *cb_args);
+
+ void (*write)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload,
+ uint64_t lba, uint32_t lba_count,
+ struct spdk_bs_dev_cb_args *cb_args);
+
+ void (*readv)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
+ struct iovec *iov, int iovcnt,
+ uint64_t lba, uint32_t lba_count,
+ struct spdk_bs_dev_cb_args *cb_args);
+
+ void (*writev)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
+ struct iovec *iov, int iovcnt,
+ uint64_t lba, uint32_t lba_count,
+ struct spdk_bs_dev_cb_args *cb_args);
+
+ void (*flush)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
+ struct spdk_bs_dev_cb_args *cb_args);
+
+ void (*write_zeroes)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
+ uint64_t lba, uint32_t lba_count,
+ struct spdk_bs_dev_cb_args *cb_args);
+
+ void (*unmap)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
+ uint64_t lba, uint32_t lba_count,
+ struct spdk_bs_dev_cb_args *cb_args);
+
+ uint64_t blockcnt;
+ uint32_t blocklen; /* In bytes */
+};
+
+struct spdk_bs_type {
+ char bstype[SPDK_BLOBSTORE_TYPE_LENGTH];
+};
+
+struct spdk_bs_opts {
+ /** Size of cluster in bytes. Must be multiple of 4KiB page size. */
+ uint32_t cluster_sz;
+
+ /** Count of the number of pages reserved for metadata */
+ uint32_t num_md_pages;
+
+ /** Maximum simultaneous metadata operations */
+ uint32_t max_md_ops;
+
+ /** Maximum simultaneous operations per channel */
+ uint32_t max_channel_ops;
+
+ /** Clear method */
+ enum bs_clear_method clear_method;
+
+ /** Blobstore type */
+ struct spdk_bs_type bstype;
+
+ /** Callback function to invoke for each blob. */
+ spdk_blob_op_with_handle_complete iter_cb_fn;
+
+ /** Argument passed to iter_cb_fn for each blob. */
+ void *iter_cb_arg;
+};
+
+/**
+ * Initialize a spdk_bs_opts structure to the default blobstore option values.
+ *
+ * \param opts The spdk_bs_opts structure to be initialized.
+ */
+void spdk_bs_opts_init(struct spdk_bs_opts *opts);
+
+/**
+ * Load a blobstore from the given device.
+ *
+ * \param dev Blobstore block device.
+ * \param opts The structure which contains the option values for the blobstore.
+ * \param cb_fn Called when the loading is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bs_load(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts,
+ spdk_bs_op_with_handle_complete cb_fn, void *cb_arg);
+
+/**
+ * Initialize a blobstore on the given device.
+ *
+ * \param dev Blobstore block device.
+ * \param opts The structure which contains the option values for the blobstore.
+ * \param cb_fn Called when the initialization is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts,
+ spdk_bs_op_with_handle_complete cb_fn, void *cb_arg);
+
+typedef void (*spdk_bs_dump_print_xattr)(FILE *fp, const char *bstype, const char *name,
+ const void *value, size_t value_length);
+
+/**
+ * Dump a blobstore's metadata to a given FILE in human-readable format.
+ *
+ * \param dev Blobstore block device.
+ * \param fp FILE pointer to dump the metadata contents.
+ * \param print_xattr_fn Callback function to interpret external xattrs.
+ * \param cb_fn Called when the dump is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bs_dump(struct spdk_bs_dev *dev, FILE *fp, spdk_bs_dump_print_xattr print_xattr_fn,
+ spdk_bs_op_complete cb_fn, void *cb_arg);
+/**
+ * Destroy the blobstore.
+ *
+ * It will destroy the blobstore by zeroing the super block.
+ *
+ * \param bs blobstore to destroy.
+ * \param cb_fn Called when the destruction is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bs_destroy(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn,
+ void *cb_arg);
+
+/**
+ * Unload the blobstore.
+ *
+ * It will flush all volatile data to disk.
+ *
+ * \param bs blobstore to unload.
+ * \param cb_fn Called when the unloading is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Set a super blob on the given blobstore.
+ *
+ * This will be retrievable immediately after spdk_bs_load() on the next initializaiton.
+ *
+ * \param bs blobstore.
+ * \param blobid The id of the blob which will be set as the super blob.
+ * \param cb_fn Called when the setting is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bs_set_super(struct spdk_blob_store *bs, spdk_blob_id blobid,
+ spdk_bs_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Get the super blob. The obtained blob id will be passed to the callback function.
+ *
+ * \param bs blobstore.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bs_get_super(struct spdk_blob_store *bs,
+ spdk_blob_op_with_id_complete cb_fn, void *cb_arg);
+
+/**
+ * Get the cluster size in bytes.
+ *
+ * \param bs blobstore to query.
+ *
+ * \return cluster size.
+ */
+uint64_t spdk_bs_get_cluster_size(struct spdk_blob_store *bs);
+
+/**
+ * Get the page size in bytes. This is the write and read granularity of blobs.
+ *
+ * \param bs blobstore to query.
+ *
+ * \return page size.
+ */
+uint64_t spdk_bs_get_page_size(struct spdk_blob_store *bs);
+
+/**
+ * Get the io unit size in bytes.
+ *
+ * \param bs blobstore to query.
+ *
+ * \return io unit size.
+ */
+uint64_t spdk_bs_get_io_unit_size(struct spdk_blob_store *bs);
+
+/**
+ * Get the number of free clusters.
+ *
+ * \param bs blobstore to query.
+ *
+ * \return the number of free clusters.
+ */
+uint64_t spdk_bs_free_cluster_count(struct spdk_blob_store *bs);
+
+/**
+ * Get the total number of clusters accessible by user.
+ *
+ * \param bs blobstore to query.
+ *
+ * \return the total number of clusters accessible by user.
+ */
+uint64_t spdk_bs_total_data_cluster_count(struct spdk_blob_store *bs);
+
+/**
+ * Get the blob id.
+ *
+ * \param blob Blob struct to query.
+ *
+ * \return blob id.
+ */
+spdk_blob_id spdk_blob_get_id(struct spdk_blob *blob);
+
+/**
+ * Get the number of pages allocated to the blob.
+ *
+ * \param blob Blob struct to query.
+ *
+ * \return the number of pages.
+ */
+uint64_t spdk_blob_get_num_pages(struct spdk_blob *blob);
+
+/**
+ * Get the number of io_units allocated to the blob.
+ *
+ * \param blob Blob struct to query.
+ *
+ * \return the number of io_units.
+ */
+uint64_t spdk_blob_get_num_io_units(struct spdk_blob *blob);
+
+/**
+ * Get the number of clusters allocated to the blob.
+ *
+ * \param blob Blob struct to query.
+ *
+ * \return the number of clusters.
+ */
+uint64_t spdk_blob_get_num_clusters(struct spdk_blob *blob);
+
+struct spdk_blob_xattr_opts {
+ /* Number of attributes */
+ size_t count;
+ /* Array of attribute names. Caller should free this array after use. */
+ char **names;
+ /* User context passed to get_xattr_value function */
+ void *ctx;
+ /* Callback that will return value for each attribute name. */
+ void (*get_value)(void *xattr_ctx, const char *name,
+ const void **value, size_t *value_len);
+};
+
+struct spdk_blob_opts {
+ uint64_t num_clusters;
+ bool thin_provision;
+ enum blob_clear_method clear_method;
+ struct spdk_blob_xattr_opts xattrs;
+
+ /** Enable separate extent pages in metadata */
+ bool use_extent_table;
+};
+
+/**
+ * Initialize a spdk_blob_opts structure to the default blob option values.
+ *
+ * \param opts spdk_blob_opts structure to initialize.
+ */
+void spdk_blob_opts_init(struct spdk_blob_opts *opts);
+
+/**
+ * Create a new blob with options on the given blobstore. The new blob id will
+ * be passed to the callback function.
+ *
+ * \param bs blobstore.
+ * \param opts The structure which contains the option values for the new blob.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to funcion cb_fn.
+ */
+void spdk_bs_create_blob_ext(struct spdk_blob_store *bs, const struct spdk_blob_opts *opts,
+ spdk_blob_op_with_id_complete cb_fn, void *cb_arg);
+
+/**
+ * Create a new blob with default option values on the given blobstore.
+ * The new blob id will be passed to the callback function.
+ *
+ * \param bs blobstore.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bs_create_blob(struct spdk_blob_store *bs,
+ spdk_blob_op_with_id_complete cb_fn, void *cb_arg);
+
+/**
+ * Create a read-only snapshot of specified blob with provided options.
+ * This will automatically sync specified blob.
+ *
+ * When operation is done, original blob is converted to the thin-provisioned
+ * blob with a newly created read-only snapshot set as a backing blob.
+ * Structure snapshot_xattrs as well as anything it references (like e.g. names
+ * array) must be valid until the completion is called.
+ *
+ * \param bs blobstore.
+ * \param blobid Id of the source blob used to create a snapshot.
+ * \param snapshot_xattrs xattrs specified for snapshot.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bs_create_snapshot(struct spdk_blob_store *bs, spdk_blob_id blobid,
+ const struct spdk_blob_xattr_opts *snapshot_xattrs,
+ spdk_blob_op_with_id_complete cb_fn, void *cb_arg);
+
+/**
+ * Create a clone of specified read-only blob.
+ *
+ * Structure clone_xattrs as well as anything it references (like e.g. names
+ * array) must be valid until the completion is called.
+ *
+ * \param bs blobstore.
+ * \param blobid Id of the read only blob used as a snapshot for new clone.
+ * \param clone_xattrs xattrs specified for clone.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bs_create_clone(struct spdk_blob_store *bs, spdk_blob_id blobid,
+ const struct spdk_blob_xattr_opts *clone_xattrs,
+ spdk_blob_op_with_id_complete cb_fn, void *cb_arg);
+
+/**
+ * Provide table with blob id's of clones are dependent on specified snapshot.
+ *
+ * Ids array should be allocated and the count parameter set to the number of
+ * id's it can store, before calling this function.
+ *
+ * If ids is NULL or count parameter is not sufficient to handle ids of all
+ * clones, -ENOMEM error is returned and count parameter is updated to the
+ * total number of clones.
+ *
+ * \param bs blobstore.
+ * \param blobid Snapshots blob id.
+ * \param ids Array of the clone ids or NULL to get required size in count.
+ * \param count Size of ids. After call it is updated to the number of clones.
+ *
+ * \return -ENOMEM if count is not sufficient to store all clones.
+ */
+int spdk_blob_get_clones(struct spdk_blob_store *bs, spdk_blob_id blobid, spdk_blob_id *ids,
+ size_t *count);
+
+/**
+ * Get the blob id for the parent snapshot of this blob.
+ *
+ * \param bs blobstore.
+ * \param blobid Blob id.
+ *
+ * \return blob id of parent blob or SPDK_BLOBID_INVALID if have no parent
+ */
+spdk_blob_id spdk_blob_get_parent_snapshot(struct spdk_blob_store *bs, spdk_blob_id blobid);
+
+/**
+ * Check if blob is read only.
+ *
+ * \param blob Blob.
+ *
+ * \return true if blob is read only.
+ */
+bool spdk_blob_is_read_only(struct spdk_blob *blob);
+
+/**
+ * Check if blob is a snapshot.
+ *
+ * \param blob Blob.
+ *
+ * \return true if blob is a snapshot.
+ */
+bool spdk_blob_is_snapshot(struct spdk_blob *blob);
+
+/**
+ * Check if blob is a clone.
+ *
+ * \param blob Blob.
+ *
+ * \return true if blob is a clone.
+ */
+bool spdk_blob_is_clone(struct spdk_blob *blob);
+
+/**
+ * Check if blob is thin-provisioned.
+ *
+ * \param blob Blob.
+ *
+ * \return true if blob is thin-provisioned.
+ */
+bool spdk_blob_is_thin_provisioned(struct spdk_blob *blob);
+
+/**
+ * Delete an existing blob from the given blobstore.
+ *
+ * \param bs blobstore.
+ * \param blobid The id of the blob to delete.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bs_delete_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
+ spdk_blob_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Allocate all clusters in this blob. Data for allocated clusters is copied
+ * from backing blob(s) if they exist.
+ *
+ * This call removes all dependencies on any backing blobs.
+ *
+ * \param bs blobstore.
+ * \param channel IO channel used to inflate blob.
+ * \param blobid The id of the blob to inflate.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
+ spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Remove dependency on parent blob.
+ *
+ * This call allocates and copies data for any clusters that are allocated in
+ * the parent blob, and decouples parent updating dependencies of blob to
+ * its ancestor.
+ *
+ * If blob have no parent -EINVAL error is reported.
+ *
+ * \param bs blobstore.
+ * \param channel IO channel used to inflate blob.
+ * \param blobid The id of the blob.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bs_blob_decouple_parent(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
+ spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg);
+
+struct spdk_blob_open_opts {
+ enum blob_clear_method clear_method;
+};
+
+/**
+ * Initialize a spdk_blob_open_opts structure to the default blob option values.
+ *
+ * \param opts spdk_blob_open_opts structure to initialize.
+ */
+void spdk_blob_open_opts_init(struct spdk_blob_open_opts *opts);
+
+/**
+ * Open a blob from the given blobstore.
+ *
+ * \param bs blobstore.
+ * \param blobid The id of the blob to open.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bs_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
+ spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
+
+/**
+ * Open a blob from the given blobstore with additional options.
+ *
+ * \param bs blobstore.
+ * \param blobid The id of the blob to open.
+ * \param opts The structure which contains the option values for the blob.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bs_open_blob_ext(struct spdk_blob_store *bs, spdk_blob_id blobid,
+ struct spdk_blob_open_opts *opts, spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
+
+/**
+ * Resize a blob to 'sz' clusters. These changes are not persisted to disk until
+ * spdk_bs_md_sync_blob() is called.
+ * If called before previous resize finish, it will fail with errno -EBUSY
+ *
+ * \param blob Blob to resize.
+ * \param sz The new number of clusters.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ *
+ */
+void spdk_blob_resize(struct spdk_blob *blob, uint64_t sz, spdk_blob_op_complete cb_fn,
+ void *cb_arg);
+
+/**
+ * Set blob as read only.
+ *
+ * These changes do not take effect until spdk_blob_sync_md() is called.
+ *
+ * \param blob Blob to set.
+ */
+int spdk_blob_set_read_only(struct spdk_blob *blob);
+
+/**
+ * Sync a blob.
+ *
+ * Make a blob persistent. This applies to open, resize, set xattr, and remove
+ * xattr. These operations will not be persistent until the blob has been synced.
+ *
+ * \param blob Blob to sync.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Close a blob. This will automatically sync.
+ *
+ * \param blob Blob to close.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_blob_close(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Allocate an I/O channel for the given blobstore.
+ *
+ * \param bs blobstore.
+ * \return a pointer to the allocated I/O channel.
+ */
+struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs);
+
+/**
+ * Free the I/O channel.
+ *
+ * \param channel I/O channel to free.
+ */
+void spdk_bs_free_io_channel(struct spdk_io_channel *channel);
+
+/**
+ * Write data to a blob.
+ *
+ * \param blob Blob to write.
+ * \param channel The I/O channel used to submit requests.
+ * \param payload The specified buffer which should contain the data to be written.
+ * \param offset Offset is in io units from the beginning of the blob.
+ * \param length Size of data in io units.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_blob_io_write(struct spdk_blob *blob, struct spdk_io_channel *channel,
+ void *payload, uint64_t offset, uint64_t length,
+ spdk_blob_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Read data from a blob.
+ *
+ * \param blob Blob to read.
+ * \param channel The I/O channel used to submit requests.
+ * \param payload The specified buffer which will store the obtained data.
+ * \param offset Offset is in io units from the beginning of the blob.
+ * \param length Size of data in io units.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_blob_io_read(struct spdk_blob *blob, struct spdk_io_channel *channel,
+ void *payload, uint64_t offset, uint64_t length,
+ spdk_blob_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Write the data described by 'iov' to 'length' io_units beginning at 'offset' io_units
+ * into the blob.
+ *
+ * \param blob Blob to write.
+ * \param channel I/O channel used to submit requests.
+ * \param iov The pointer points to an array of iovec structures.
+ * \param iovcnt The number of buffers.
+ * \param offset Offset is in io units from the beginning of the blob.
+ * \param length Size of data in io units.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_blob_io_writev(struct spdk_blob *blob, struct spdk_io_channel *channel,
+ struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
+ spdk_blob_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Read 'length' io_units starting at 'offset' io_units into the blob into the memory
+ * described by 'iov'.
+ *
+ * \param blob Blob to read.
+ * \param channel I/O channel used to submit requests.
+ * \param iov The pointer points to an array of iovec structures.
+ * \param iovcnt The number of buffers.
+ * \param offset Offset is in io units from the beginning of the blob.
+ * \param length Size of data in io units.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_blob_io_readv(struct spdk_blob *blob, struct spdk_io_channel *channel,
+ struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
+ spdk_blob_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Unmap 'length' io_units beginning at 'offset' io_units on the blob as unused. Unmapped
+ * io_units may allow the underlying storage media to behave more effciently.
+ *
+ * \param blob Blob to unmap.
+ * \param channel I/O channel used to submit requests.
+ * \param offset Offset is in io units from the beginning of the blob.
+ * \param length Size of unmap area in io_units.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_blob_io_unmap(struct spdk_blob *blob, struct spdk_io_channel *channel,
+ uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Write zeros into area of a blob.
+ *
+ * \param blob Blob to write.
+ * \param channel I/O channel used to submit requests.
+ * \param offset Offset is in io units from the beginning of the blob.
+ * \param length Size of data in io units.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_blob_io_write_zeroes(struct spdk_blob *blob, struct spdk_io_channel *channel,
+ uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Get the first blob of the blobstore. The obtained blob will be passed to
+ * the callback function.
+ *
+ * \param bs blobstore to traverse.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bs_iter_first(struct spdk_blob_store *bs,
+ spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
+
+/**
+ * Get the next blob by using the current blob. The obtained blob will be passed
+ * to the callback function.
+ *
+ * \param bs blobstore to traverse.
+ * \param blob The current blob.
+ * \param cb_fn Called when the operation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_bs_iter_next(struct spdk_blob_store *bs, struct spdk_blob *blob,
+ spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
+
+/**
+ * Set an extended attribute for the given blob.
+ *
+ * \param blob Blob to set attribute.
+ * \param name Name of the extended attribute.
+ * \param value Value of the extended attribute.
+ * \param value_len Length of the value.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
+ uint16_t value_len);
+
+/**
+ * Remove the extended attribute from the given blob.
+ *
+ * \param blob Blob to remove attribute.
+ * \param name Name of the extended attribute.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_blob_remove_xattr(struct spdk_blob *blob, const char *name);
+
+/**
+ * Get the value of the specified extended attribute. The obtained value and its
+ * size will be stored in value and value_len.
+ *
+ * \param blob Blob to query.
+ * \param name Name of the extended attribute.
+ * \param value Parameter as output.
+ * \param value_len Parameter as output.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_blob_get_xattr_value(struct spdk_blob *blob, const char *name,
+ const void **value, size_t *value_len);
+
+/**
+ * Iterate through all extended attributes of the blob. Get the names of all extended
+ * attributes that will be stored in names.
+ *
+ * \param blob Blob to query.
+ * \param names Parameter as output.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_blob_get_xattr_names(struct spdk_blob *blob, struct spdk_xattr_names **names);
+
+/**
+ * Get the number of extended attributes.
+ *
+ * \param names Names of total extended attributes of the blob.
+ *
+ * \return the number of extended attributes.
+ */
+uint32_t spdk_xattr_names_get_count(struct spdk_xattr_names *names);
+
+/**
+ * Get the attribute name specified by the index.
+ *
+ * \param names Names of total extended attributes of the blob.
+ * \param index Index position of the specified attribute.
+ *
+ * \return attribute name.
+ */
+const char *spdk_xattr_names_get_name(struct spdk_xattr_names *names, uint32_t index);
+
+/**
+ * Free the attribute names.
+ *
+ * \param names Names of total extended attributes of the blob.
+ */
+void spdk_xattr_names_free(struct spdk_xattr_names *names);
+
+/**
+ * Get blobstore type of the given device.
+ *
+ * \param bs blobstore to query.
+ *
+ * \return blobstore type.
+ */
+struct spdk_bs_type spdk_bs_get_bstype(struct spdk_blob_store *bs);
+
+/**
+ * Set blobstore type to the given device.
+ *
+ * \param bs blobstore to set to.
+ * \param bstype Type label to set.
+ */
+void spdk_bs_set_bstype(struct spdk_blob_store *bs, struct spdk_bs_type bstype);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_BLOB_H_ */
diff --git a/src/spdk/include/spdk/blob_bdev.h b/src/spdk/include/spdk/blob_bdev.h
new file mode 100644
index 000000000..1867c464c
--- /dev/null
+++ b/src/spdk/include/spdk/blob_bdev.h
@@ -0,0 +1,88 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Helper library to use spdk_bdev as the backing device for a blobstore
+ */
+
+#ifndef SPDK_BLOB_BDEV_H
+#define SPDK_BLOB_BDEV_H
+
+#include "spdk/stdinc.h"
+#include "spdk/bdev.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct spdk_bs_dev;
+struct spdk_bdev;
+struct spdk_bdev_module;
+
+/**
+ * Create a blobstore block device from a bdev. (deprecated, please use spdk_bdev_create_bs_dev_from_desc,
+ * together with spdk_bdev_open_ext).
+ *
+ * \param bdev Bdev to use.
+ * \param remove_cb Called when the block device is removed.
+ * \param remove_ctx Argument passed to function remove_cb.
+ *
+ * \return a pointer to the blobstore block device on success or NULL otherwise.
+ */
+struct spdk_bs_dev *spdk_bdev_create_bs_dev(struct spdk_bdev *bdev, spdk_bdev_remove_cb_t remove_cb,
+ void *remove_ctx);
+
+/**
+ * Create a blobstore block device from the descriptor of a bdev.
+ *
+ * \param desc Descriptor of a bdev. spdk_bdev_open_ext() is recommended to get the desc.
+ *
+ * \return a pointer to the blobstore block device on success or NULL otherwise.
+ */
+struct spdk_bs_dev *spdk_bdev_create_bs_dev_from_desc(struct spdk_bdev_desc *desc);
+
+/**
+ * Claim the bdev module for the given blobstore.
+ *
+ * \param bs_dev Blobstore block device.
+ * \param module Bdev module to claim.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_bs_bdev_claim(struct spdk_bs_dev *bs_dev, struct spdk_bdev_module *module);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/blobfs.h b/src/spdk/include/spdk/blobfs.h
new file mode 100644
index 000000000..2a4342ded
--- /dev/null
+++ b/src/spdk/include/spdk/blobfs.h
@@ -0,0 +1,599 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * SPDK Filesystem
+ */
+
+#ifndef SPDK_FS_H
+#define SPDK_FS_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/blob.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SPDK_FILE_NAME_MAX 255
+
+struct spdk_file;
+struct spdk_filesystem;
+
+typedef struct spdk_file *spdk_fs_iter;
+
+struct spdk_blobfs_opts {
+ uint32_t cluster_sz;
+};
+
+struct spdk_file_stat {
+ spdk_blob_id blobid;
+ uint64_t size;
+};
+
+/**
+ * Filesystem operation completion callback with handle.
+ *
+ * \param ctx Context for the operation.
+ * \param fs Handle to a blobfs.
+ * \param fserrno 0 if it completed successfully, or negative errno if it failed.
+ */
+typedef void (*spdk_fs_op_with_handle_complete)(void *ctx, struct spdk_filesystem *fs,
+ int fserrno);
+
+/**
+ * File operation completion callback with handle.
+ *
+ * \param ctx Context for the operation.
+ * \param f Handle to a file.
+ * \param fserrno 0 if it completed successfully, or negative errno if it failed.
+ */
+typedef void (*spdk_file_op_with_handle_complete)(void *ctx, struct spdk_file *f, int fserrno);
+typedef spdk_bs_op_complete spdk_fs_op_complete;
+
+/**
+ * File operation completion callback.
+ *
+ * \param ctx Context for the operation.
+ * \param fserrno 0 if it completed successfully, or negative errno if it failed.
+ */
+typedef void (*spdk_file_op_complete)(void *ctx, int fserrno);
+
+/**
+ * File stat operation completion callback.
+ *
+ * \param ctx Context for the operation.
+ * \param stat Handle to the stat about the file.
+ * \param fserrno 0 if it completed successfully, or negative errno if it failed.
+ */
+typedef void (*spdk_file_stat_op_complete)(void *ctx, struct spdk_file_stat *stat, int fserrno);
+
+/**
+ * Function for a request of file system.
+ *
+ * \param arg Argument to the request function.
+ */
+typedef void (*fs_request_fn)(void *arg);
+
+/**
+ * Function for sending request.
+ *
+ * This function will be invoked any time when the filesystem wants to pass a
+ * message to the main dispatch thread.
+ *
+ * \param fs_request_fn A pointer to the request function.
+ * \param arg Argument to the request function.
+ */
+typedef void (*fs_send_request_fn)(fs_request_fn, void *arg);
+
+/**
+ * Initialize a spdk_blobfs_opts structure to the default option values.
+ *
+ * \param opts spdk_blobf_opts struture to intialize.
+ */
+void spdk_fs_opts_init(struct spdk_blobfs_opts *opts);
+
+/**
+ * Initialize blobstore filesystem.
+ *
+ * Initialize the blobstore filesystem on the blobstore block device which has
+ * been created by the function spdk_bdev_create_bs_dev() in the blob_bdev.h.
+ * The obtained blobstore filesystem will be passed to the callback function.
+ *
+ * \param dev Blobstore block device used by this blobstore filesystem.
+ * \param opt Initialization options used for this blobstore filesystem.
+ * \param send_request_fn The function for sending request. This function will
+ * be invoked any time when the blobstore filesystem wants to pass a message to
+ * the main dispatch thread.
+ * \param cb_fn Called when the initialization is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_fs_init(struct spdk_bs_dev *dev, struct spdk_blobfs_opts *opt,
+ fs_send_request_fn send_request_fn,
+ spdk_fs_op_with_handle_complete cb_fn, void *cb_arg);
+
+/**
+ * Load blobstore filesystem from the given blobstore block device.
+ *
+ * The obtained blobstore filesystem will be passed to the callback function.
+ *
+ * \param dev Blobstore block device used by this blobstore filesystem.
+ * \param send_request_fn The function for sending request. This function will
+ * be invoked any time when the blobstore filesystem wants to pass a message to
+ * the main dispatch thread.
+ * \param cb_fn Called when the loading is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_fs_load(struct spdk_bs_dev *dev, fs_send_request_fn send_request_fn,
+ spdk_fs_op_with_handle_complete cb_fn, void *cb_arg);
+
+/**
+ * Unload blobstore filesystem.
+ *
+ * \param fs Blobstore filesystem to unload.
+ * \param cb_fn Called when the unloading is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_fs_unload(struct spdk_filesystem *fs, spdk_fs_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Allocate an I/O channel for asynchronous operations.
+ *
+ * \param fs Blobstore filesystem to allocate I/O channel.
+ *
+ * \return a pointer to the I/O channel on success or NULL otherwise.
+ */
+struct spdk_io_channel *spdk_fs_alloc_io_channel(struct spdk_filesystem *fs);
+
+/**
+ * Free I/O channel.
+ *
+ * This function will decrease the references of this I/O channel. If the reference
+ * is reduced to 0, the I/O channel will be freed.
+ *
+ * \param channel I/O channel to free.
+ */
+void spdk_fs_free_io_channel(struct spdk_io_channel *channel);
+
+/**
+ * Allocate a context for synchronous operations.
+ *
+ * \param fs Blobstore filesystem for this context.
+ *
+ * \return a pointer to the context on success or NULL otherwise.
+ */
+struct spdk_fs_thread_ctx *spdk_fs_alloc_thread_ctx(struct spdk_filesystem *fs);
+
+/**
+ * Free thread context.
+ *
+ * \param ctx Thread context to free.
+ */
+void spdk_fs_free_thread_ctx(struct spdk_fs_thread_ctx *ctx);
+
+/**
+ * Get statistics about the file including the underlying blob id and the file size.
+ *
+ * \param fs Blobstore filesystem.
+ * \param ctx The thread context for this operation
+ * \param name The file name used to look up the matched file in the blobstore filesystem.
+ * \param stat Caller allocated structure to store the obtained information about
+ * this file.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_fs_file_stat(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx,
+ const char *name, struct spdk_file_stat *stat);
+
+#define SPDK_BLOBFS_OPEN_CREATE (1ULL << 0)
+
+/**
+ * Create a new file on the given blobstore filesystem.
+ *
+ * \param fs Blobstore filesystem.
+ * \param ctx The thread context for this operation
+ * \param name The file name for this new file.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_fs_create_file(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx,
+ const char *name);
+
+/**
+ * Open the file.
+ *
+ * \param fs Blobstore filesystem.
+ * \param ctx The thread context for this operation
+ * \param name The file name used to look up the matched file in the blobstore filesystem.
+ * \param flags This flags will be used to control the open mode.
+ * \param file It will point to the open file if sccessful or NULL otherwirse.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_fs_open_file(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx,
+ const char *name, uint32_t flags, struct spdk_file **file);
+
+/**
+ * Close the file.
+ *
+ * \param file File to close.
+ * \param ctx The thread context for this operation
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_file_close(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx);
+
+/**
+ * Change the file name.
+ *
+ * This operation will overwrite an existing file if there is a file with the
+ * same name.
+ *
+ * \param fs Blobstore filesystem.
+ * \param ctx The thread context for this operation
+ * \param old_name Old name of the file.
+ * \param new_name New name of the file.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_fs_rename_file(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx,
+ const char *old_name, const char *new_name);
+
+/**
+ * Delete the file.
+ *
+ * \param fs Blobstore filesystem.
+ * \param ctx The thread context for this operation
+ * \param name The name of the file to be deleted.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_fs_delete_file(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx,
+ const char *name);
+
+/**
+ * Get the first file in the blobstore filesystem.
+ *
+ * \param fs Blobstore filesystem to traverse.
+ *
+ * \return an iterator which points to the first file in the blobstore filesystem.
+ */
+spdk_fs_iter spdk_fs_iter_first(struct spdk_filesystem *fs);
+
+/**
+ * Get the next file in the blobstore filesystem by using the input iterator.
+ *
+ * \param iter The iterator which points to the current file struct.
+ *
+ * \return an iterator which points to the next file in the blobstore filesystem.
+ */
+spdk_fs_iter spdk_fs_iter_next(spdk_fs_iter iter);
+
+#define spdk_fs_iter_get_file(iter) ((struct spdk_file *)(iter))
+
+/**
+ * Truncate the file.
+ *
+ * \param file File to truncate.
+ * \param ctx The thread context for this operation
+ * \param length New size in bytes of the file.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_file_truncate(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx,
+ uint64_t length);
+
+/**
+ * Get file name.
+ *
+ * \param file File to query.
+ *
+ * \return the name of the file.
+ */
+const char *spdk_file_get_name(struct spdk_file *file);
+
+/**
+ * Obtain the size of the file.
+ *
+ * \param file File to query.
+ *
+ * \return the size in bytes of the file.
+ */
+uint64_t spdk_file_get_length(struct spdk_file *file);
+
+/**
+ * Write data to the given file.
+ *
+ * \param file File to write.
+ * \param ctx The thread context for this operation
+ * \param payload The specified buffer which should contain the data to be transmitted.
+ * \param offset The beginning position to write data.
+ * \param length The size in bytes of data to write.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_file_write(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx,
+ void *payload, uint64_t offset, uint64_t length);
+
+/**
+ * Read data to user buffer from the given file.
+ *
+ * \param file File to read.
+ * \param ctx The thread context for this operation
+ * \param payload The specified buffer which will store the obtained data.
+ * \param offset The beginning position to read.
+ * \param length The size in bytes of data to read.
+ *
+ * \return the end position of this read operation on success, negated errno on failure.
+ */
+int64_t spdk_file_read(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx,
+ void *payload, uint64_t offset, uint64_t length);
+
+/**
+ * Set cache size for the blobstore filesystem.
+ *
+ * \param size_in_mb Cache size in megabytes.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_fs_set_cache_size(uint64_t size_in_mb);
+
+/**
+ * Obtain the cache size.
+ *
+ * \return cache size in megabytes.
+ */
+uint64_t spdk_fs_get_cache_size(void);
+
+#define SPDK_FILE_PRIORITY_LOW 0 /* default */
+#define SPDK_FILE_PRIORITY_HIGH 1
+
+/**
+ * Set priority for the file.
+ *
+ * \param file File to set priority.
+ * \param priority Priority level (SPDK_FILE_PRIORITY_LOW or SPDK_FILE_PRIORITY_HIGH).
+ */
+void spdk_file_set_priority(struct spdk_file *file, uint32_t priority);
+
+/**
+ * Synchronize the data from the cache to the disk.
+ *
+ * \param file File to sync.
+ * \param ctx The thread context for this operation
+ *
+ * \return 0 on success.
+ */
+int spdk_file_sync(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx);
+
+/**
+ * Get the unique ID for the file.
+ *
+ * \param file File to get the ID.
+ * \param id ID buffer.
+ * \param size Size of the ID buffer.
+ *
+ * \return the length of ID on success.
+ */
+int spdk_file_get_id(struct spdk_file *file, void *id, size_t size);
+
+/**
+ * Read data to user buffer from the given file.
+ *
+ * \param file File to read.
+ * \param channel I/O channel for asynchronous operations.
+ * \param iovs A scatter gather list of buffers to be read into.
+ * \param iovcnt The number of elements in iov.
+ * \param offset The beginning position to read.
+ * \param length The size in bytes of data to read.
+ * \param cb_fn Called when the request is complete.
+ * \param cb_arg Argument passed to cb_fn.
+ *
+ * \return None.
+ */
+void spdk_file_readv_async(struct spdk_file *file, struct spdk_io_channel *channel,
+ struct iovec *iovs, uint32_t iovcnt, uint64_t offset, uint64_t length,
+ spdk_file_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Write data to the given file.
+ *
+ * \param file File to write.
+ * \param channel I/O channel for asynchronous operations.
+ * \param iovs A scatter gather list of buffers to be written from.
+ * \param iovcnt The number of elements in iov.
+ * \param offset The beginning position to write.
+ * \param length The size in bytes of data to write.
+ * \param cb_fn Called when the request is complete.
+ * \param cb_arg Argument passed to cb_fn.
+ *
+ * \return None.
+ */
+void spdk_file_writev_async(struct spdk_file *file, struct spdk_io_channel *channel,
+ struct iovec *iovs, uint32_t iovcnt, uint64_t offset, uint64_t length,
+ spdk_file_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Get statistics about the file including the underlying blob id and the file size.
+ *
+ * \param fs Blobstore filesystem.
+ * \param name The file name used to look up the matched file in the blobstore filesystem.
+ * \param cb_fn Called when the request is complete.
+ * \param cb_arg Argument passed to cb_fn.
+ *
+ * return None.
+ */
+void spdk_fs_file_stat_async(struct spdk_filesystem *fs, const char *name,
+ spdk_file_stat_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Create a new file on the given blobstore filesystem.
+ *
+ * \param fs Blobstore filesystem.
+ * \param name The file name for this new file.
+ * \param cb_fn Called when the request is complete.
+ * \param cb_arg Argument passed to cb_fn.
+ *
+ * return None.
+ */
+void spdk_fs_create_file_async(struct spdk_filesystem *fs, const char *name,
+ spdk_file_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Open the file.
+ *
+ * \param fs Blobstore filesystem.
+ * \param name The file name used to look up the matched file in the blobstore filesystem.
+ * \param flags This flags will be used to control the open mode.
+ * \param cb_fn Called when the request is complete.
+ * \param cb_arg Argument passed to cb_fn.
+ *
+ * return None.
+ */
+void spdk_fs_open_file_async(struct spdk_filesystem *fs, const char *name, uint32_t flags,
+ spdk_file_op_with_handle_complete cb_fn, void *cb_arg);
+
+/**
+ * Close the file.
+ *
+ * \param file File to close.
+ * \param cb_fn Called when the request is complete.
+ * \param cb_arg Argument passed to cb_fn.
+ *
+ * return None.
+ */
+void spdk_file_close_async(struct spdk_file *file, spdk_file_op_complete cb_fn, void *cb_arg);
+
+
+/**
+ * Change the file name.
+ *
+ * This operation will overwrite an existing file if there is a file with the
+ * same name.
+ *
+ * \param fs Blobstore filesystem.
+ * \param old_name Old name of the file.
+ * \param new_name New name of the file.
+ * \param cb_fn Called when the request is complete.
+ * \param cb_arg Argument passed to cb_fn.
+ *
+ * return None.
+ */
+void spdk_fs_rename_file_async(struct spdk_filesystem *fs, const char *old_name,
+ const char *new_name, spdk_fs_op_complete cb_fn,
+ void *cb_arg);
+
+/**
+ * Delete the file.
+ *
+ * \param fs Blobstore filesystem.
+ * \param name The name of the file to be deleted.
+ * \param cb_fn Called when the request is complete.
+ * \param cb_arg Argument passed to cb_fn.
+ *
+ * return None.
+ *
+ */
+void spdk_fs_delete_file_async(struct spdk_filesystem *fs, const char *name,
+ spdk_file_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Truncate the file.
+ *
+ * \param file File to truncate.
+ * \param length New size in bytes of the file.
+ * \param cb_fn Called when the request is complete.
+ * \param cb_arg Argument passed to cb_fn.
+ *
+ * return None.
+ */
+void spdk_file_truncate_async(struct spdk_file *file, uint64_t length,
+ spdk_file_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Write data to the given file.
+ *
+ * \param file File to write.
+ * \param channel I/O channel for asynchronous operations.
+ * \param payload The specified buffer which should contain the data to be transmitted.
+ * \param offset The beginning position to write data.
+ * \param length The size in bytes of data to write.
+ * \param cb_fn Called when the request is complete.
+ * \param cb_arg Argument passed to cb_fn.
+ *
+ * return None.
+ */
+void spdk_file_write_async(struct spdk_file *file, struct spdk_io_channel *channel,
+ void *payload, uint64_t offset, uint64_t length,
+ spdk_file_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Read data to user buffer from the given file.
+ *
+ * \param file File to write.
+ * \param channel I/O channel for asynchronous operations.
+ * \param payload The specified buffer which will store the obtained data.
+ * \param offset The beginning position to read.
+ * \param length The size in bytes of data to read.
+ * \param cb_fn Called when the request is complete.
+ * \param cb_arg Argument passed to cb_fn.
+ *
+ * return None.
+ */
+void spdk_file_read_async(struct spdk_file *file, struct spdk_io_channel *channel,
+ void *payload, uint64_t offset, uint64_t length,
+ spdk_file_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Sync all dirty cache buffers to the backing block device. For async
+ * usage models, completion of the sync indicates only that data written
+ * when the sync command was issued have been flushed to disk - it does
+ * not guarantee any writes submitted after the sync have been flushed,
+ * even if those writes are completed before the sync.
+ *
+ * \param file File to write.
+ * \param channel I/O channel for asynchronous operations.
+ * \param cb_fn Called when the request is complete.
+ * \param cb_arg Argument passed to cb_fn.
+ *
+ * return None.
+ */
+void spdk_file_sync_async(struct spdk_file *file, struct spdk_io_channel *channel,
+ spdk_file_op_complete cb_fn, void *cb_arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_FS_H_ */
diff --git a/src/spdk/include/spdk/blobfs_bdev.h b/src/spdk/include/spdk/blobfs_bdev.h
new file mode 100644
index 000000000..e915b18f8
--- /dev/null
+++ b/src/spdk/include/spdk/blobfs_bdev.h
@@ -0,0 +1,98 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Operations on blobfs whose backing device is spdk_bdev
+ */
+
+#ifndef SPDK_BLOBFS_BDEV_H
+#define SPDK_BLOBFS_BDEV_H
+
+#include "spdk/stdinc.h"
+#include "spdk/bdev.h"
+#include "spdk/config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * blobfs on bdev operation completion callback.
+ *
+ * \param cb_arg Callback argument.
+ * \param fserrno 0 if it completed successfully, or negative errno if it failed.
+ */
+typedef void (*spdk_blobfs_bdev_op_complete)(void *cb_arg, int fserrno);
+
+/**
+ * Detect whether blobfs exists on the given device.
+ *
+ * \param bdev_name Name of block device.
+ * \param cb_fn Called when the detecting is complete. fserrno is -EILSEQ if no blobfs exists.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_blobfs_bdev_detect(const char *bdev_name,
+ spdk_blobfs_bdev_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Create a blobfs on the given device.
+ *
+ * \param bdev_name Name of block device.
+ * \param cluster_sz Size of cluster in bytes. Must be multiple of 4KiB page size.
+ * \param cb_fn Called when the creation is complete.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_blobfs_bdev_create(const char *bdev_name, uint32_t cluster_sz,
+ spdk_blobfs_bdev_op_complete cb_fn, void *cb_arg);
+
+#ifdef SPDK_CONFIG_FUSE
+/**
+ * Mount a blobfs on given device to a host path by FUSE
+ *
+ * A new thread is created dedicatedly for one mountpoint to handle FUSE request
+ * by blobfs API.
+ *
+ * \param bdev_name Name of block device.
+ * \param mountpoint Host path to mount blobfs.
+ * \param cb_fn Called when mount operation is complete. fserrno is -EILSEQ if no blobfs exists.
+ * \param cb_arg Argument passed to function cb_fn.
+ */
+void spdk_blobfs_bdev_mount(const char *bdev_name, const char *mountpoint,
+ spdk_blobfs_bdev_op_complete cb_fn, void *cb_arg);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_BLOBFS_BDEV_H */
diff --git a/src/spdk/include/spdk/conf.h b/src/spdk/include/spdk/conf.h
new file mode 100644
index 000000000..4a5292d32
--- /dev/null
+++ b/src/spdk/include/spdk/conf.h
@@ -0,0 +1,215 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Configuration file parser
+ */
+
+#ifndef SPDK_CONF_H
+#define SPDK_CONF_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct spdk_conf_value;
+struct spdk_conf_item;
+struct spdk_conf_section;
+struct spdk_conf;
+
+/**
+ * Allocate a configuration struct used for the initialization of SPDK app.
+ *
+ * \return a pointer to the allocated configuration struct.
+ */
+struct spdk_conf *spdk_conf_allocate(void);
+
+/**
+ * Free the configuration struct.
+ *
+ * \param cp Configuration struct to free.
+ */
+void spdk_conf_free(struct spdk_conf *cp);
+
+/**
+ * Read configuration file for spdk_conf struct.
+ *
+ * \param cp Configuration struct used for the initialization of SPDK app.
+ * \param file File to read that is created by user to configure SPDK app.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_conf_read(struct spdk_conf *cp, const char *file);
+
+/**
+ * Find the specified section of the configuration.
+ *
+ * \param cp Configuration struct used for the initialization of SPDK app.
+ * \param name Name of section to find.
+ *
+ * \return a pointer to the requested section on success or NULL otherwise.
+ */
+struct spdk_conf_section *spdk_conf_find_section(struct spdk_conf *cp, const char *name);
+
+/**
+ * Get the first section of the configuration.
+ *
+ * \param cp Configuration struct used for the initialization of SPDK app.
+ *
+ * \return a pointer to the requested section on success or NULL otherwise.
+ */
+struct spdk_conf_section *spdk_conf_first_section(struct spdk_conf *cp);
+
+/**
+ * Get the next section of the configuration.
+ *
+ * \param sp The current section of the configuration.
+ *
+ * \return a pointer to the requested section on success or NULL otherwise.
+ */
+struct spdk_conf_section *spdk_conf_next_section(struct spdk_conf_section *sp);
+
+/**
+ * Match prefix of the name of section.
+ *
+ * \param sp The section of the configuration.
+ * \param name_prefix Prefix name to match.
+ *
+ * \return ture on success, false on failure.
+ */
+bool spdk_conf_section_match_prefix(const struct spdk_conf_section *sp, const char *name_prefix);
+
+/**
+ * Get the name of the section.
+ *
+ * \param sp The section of the configuration.
+ *
+ * \return the name of the section.
+ */
+const char *spdk_conf_section_get_name(const struct spdk_conf_section *sp);
+
+/**
+ * Get the number of the section.
+ *
+ * \param sp The section of the configuration.
+ *
+ * \return the number of the section.
+ */
+int spdk_conf_section_get_num(const struct spdk_conf_section *sp);
+
+/**
+ * Get the value of the item with name 'key' in the section.
+ *
+ * If key appears multiple times, idx1 will control which version to retrieve.
+ * Indices will start from the top of the configuration file at 0 and increment
+ * by one for each new apperarance. If the configuration key contains multiple
+ * whitespace delimited values, idx2 controls which value is returned. The index
+ * begins at 0.
+ *
+ *
+ * \param sp The section of the configuration.
+ * \param key Name of item.
+ * \param idx1 The index into the item list for the key.
+ * \param idx2 The index into the value list for the item.
+ *
+ * \return the requested value on success or NULL otherwise.
+ */
+char *spdk_conf_section_get_nmval(struct spdk_conf_section *sp, const char *key,
+ int idx1, int idx2);
+
+/**
+ * Get the first value of the item with name 'key' in the section.
+ *
+ * \param sp The section of the configuration.
+ * \param key Name of item.
+ * \param idx The index into the value list for the item.
+ *
+ * \return the requested value on success or NULL otherwise.
+ */
+char *spdk_conf_section_get_nval(struct spdk_conf_section *sp, const char *key, int idx);
+
+/**
+ * Get the first value of the first item with name 'key' in the section.
+ *
+ * \param sp The section of the configuration.
+ * \param key Name of item.
+ *
+ * \return the requested value on success or NULL otherwise.
+ */
+char *spdk_conf_section_get_val(struct spdk_conf_section *sp, const char *key);
+
+/**
+ * Get the first value of the first item with name 'key' in the section.
+ *
+ * \param sp The section of the configuration.
+ * \param key Name of item.
+ *
+ * \return the requested value on success or NULL otherwise.
+ */
+int spdk_conf_section_get_intval(struct spdk_conf_section *sp, const char *key);
+
+/**
+ * Get the bool value of the item with name 'key' in the section.
+ *
+ * This is used to check whether the service is enabled.
+ *
+ * \param sp The section of the configuration.
+ * \param key Name of item.
+ * \param default_val Default value.
+ *
+ * \return true if matching 'Yes/Y/True', false if matching 'No/N/False', default value otherwise.
+ */
+bool spdk_conf_section_get_boolval(struct spdk_conf_section *sp, const char *key, bool default_val);
+
+/**
+ * Set the configuration as the default.
+ *
+ * \param cp Configuration to set.
+ */
+void spdk_conf_set_as_default(struct spdk_conf *cp);
+
+/**
+ * Disable sections merging during 'spdk_conf_read()'
+ *
+ * \param cp Configuration to be read
+ */
+void spdk_conf_disable_sections_merge(struct spdk_conf *cp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/cpuset.h b/src/spdk/include/spdk/cpuset.h
new file mode 100644
index 000000000..43f2e7343
--- /dev/null
+++ b/src/spdk/include/spdk/cpuset.h
@@ -0,0 +1,182 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * CPU set management functions
+ */
+
+#ifndef SPDK_CPUSET_H
+#define SPDK_CPUSET_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SPDK_CPUSET_SIZE 1024
+
+/**
+ * List of CPUs.
+ */
+struct spdk_cpuset {
+ char str[SPDK_CPUSET_SIZE / 4 + 1];
+ uint8_t cpus[SPDK_CPUSET_SIZE / 8];
+};
+
+/**
+ * Allocate CPU set object.
+ *
+ * \return a pointer to the allocated zeroed cpuset on success, or NULL on failure.
+ */
+struct spdk_cpuset *spdk_cpuset_alloc(void);
+
+/**
+ * Free allocated CPU set.
+ *
+ * \param set CPU set to be freed.
+ */
+void spdk_cpuset_free(struct spdk_cpuset *set);
+
+/**
+ * Compare two CPU sets.
+ *
+ * \param set1 CPU set1.
+ * \param set2 CPU set2.
+ *
+ * \return true if both CPU sets are equal.
+ */
+bool spdk_cpuset_equal(const struct spdk_cpuset *set1, const struct spdk_cpuset *set2);
+
+/**
+ * Copy the content of CPU set to another.
+ *
+ * \param dst Destination CPU set
+ * \param src Source CPU set
+ */
+void spdk_cpuset_copy(struct spdk_cpuset *dst, const struct spdk_cpuset *src);
+
+/**
+ * Perform AND operation on two CPU sets. The result is stored in dst.
+ *
+ * \param dst First argument of operation. This value also stores the result of operation.
+ * \param src Second argument of operation.
+ */
+void spdk_cpuset_and(struct spdk_cpuset *dst, const struct spdk_cpuset *src);
+
+/**
+ * Perform OR operation on two CPU sets. The result is stored in dst.
+ *
+ * \param dst First argument of operation. This value also stores the result of operation.
+ * \param src Second argument of operation.
+ */
+void spdk_cpuset_or(struct spdk_cpuset *dst, const struct spdk_cpuset *src);
+
+/**
+ * Perform XOR operation on two CPU sets. The result is stored in dst.
+ *
+ * \param dst First argument of operation. This value also stores the result of operation.
+ * \param src Second argument of operation.
+ */
+void spdk_cpuset_xor(struct spdk_cpuset *dst, const struct spdk_cpuset *src);
+
+/**
+ * Negate all CPUs in CPU set.
+ *
+ * \param set CPU set to be negated. This value also stores the result of operation.
+ */
+void spdk_cpuset_negate(struct spdk_cpuset *set);
+
+/**
+ * Clear all CPUs in CPU set.
+ *
+ * \param set CPU set to be cleared.
+ */
+void spdk_cpuset_zero(struct spdk_cpuset *set);
+
+/**
+ * Set or clear CPU state in CPU set.
+ *
+ * \param set CPU set object.
+ * \param cpu CPU index to be set or cleared.
+ * \param state *true* to set cpu, *false* to clear.
+ */
+void spdk_cpuset_set_cpu(struct spdk_cpuset *set, uint32_t cpu, bool state);
+
+/**
+ * Get the state of CPU in CPU set.
+ *
+ * \param set CPU set object.
+ * \param cpu CPU index.
+ *
+ * \return the state of selected CPU.
+ */
+bool spdk_cpuset_get_cpu(const struct spdk_cpuset *set, uint32_t cpu);
+
+/**
+ * Get the number of CPUs that are set in CPU set.
+ *
+ * \param set CPU set object.
+ *
+ * \return the number of CPUs.
+ */
+uint32_t spdk_cpuset_count(const struct spdk_cpuset *set);
+
+/**
+ * Convert a CPU set to hex string.
+ *
+ * \param set CPU set.
+ *
+ * \return a pointer to hexadecimal representation of CPU set. Buffer to store a
+ * string is dynamically allocated internally and freed with CPU set object.
+ * Memory returned by this function might be changed after subsequent calls to
+ * this function so string should be copied by user.
+ */
+const char *spdk_cpuset_fmt(struct spdk_cpuset *set);
+
+/**
+ * Convert a string containing a CPU core mask into a CPU set.
+ *
+ * \param set CPU set.
+ * \param mask String defining CPU set. By default hexadecimal value is used or
+ * as CPU list enclosed in square brackets defined as: 'c1[-c2][,c3[-c4],...]'.
+ *
+ * \return zero if success, non zero if fails.
+ */
+int spdk_cpuset_parse(struct spdk_cpuset *set, const char *mask);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* SPDK_CPUSET_H */
diff --git a/src/spdk/include/spdk/crc16.h b/src/spdk/include/spdk/crc16.h
new file mode 100644
index 000000000..053fbd5e4
--- /dev/null
+++ b/src/spdk/include/spdk/crc16.h
@@ -0,0 +1,78 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * CRC-16 utility functions
+ */
+
+#ifndef SPDK_CRC16_H
+#define SPDK_CRC16_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * T10-DIF CRC-16 polynomial
+ */
+#define SPDK_T10DIF_CRC16_POLYNOMIAL 0x8bb7u
+
+/**
+ * Calculate T10-DIF CRC-16 checksum.
+ *
+ * \param init_crc Initial CRC-16 value.
+ * \param buf Data buffer to checksum.
+ * \param len Length of buf in bytes.
+ * \return CRC-16 value.
+ */
+uint16_t spdk_crc16_t10dif(uint16_t init_crc, const void *buf, size_t len);
+
+/**
+ * Calculate T10-DIF CRC-16 checksum and copy data.
+ *
+ * \param init_crc Initial CRC-16 value.
+ * \param dst Destination data buffer for copy.
+ * \param src Source data buffer for CRC calculation and copy.
+ * \param len Length of buffer in bytes.
+ * \return CRC-16 value.
+ */
+uint16_t spdk_crc16_t10dif_copy(uint16_t init_crc, uint8_t *dst, uint8_t *src,
+ size_t len);
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_CRC16_H */
diff --git a/src/spdk/include/spdk/crc32.h b/src/spdk/include/spdk/crc32.h
new file mode 100644
index 000000000..a2032a25e
--- /dev/null
+++ b/src/spdk/include/spdk/crc32.h
@@ -0,0 +1,73 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * CRC-32 utility functions
+ */
+
+#ifndef SPDK_CRC32_H
+#define SPDK_CRC32_H
+
+#include "spdk/stdinc.h"
+#include "spdk/config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Calculate a partial CRC-32 IEEE checksum.
+ *
+ * \param buf Data buffer to checksum.
+ * \param len Length of buf in bytes.
+ * \param crc Previous CRC-32 value.
+ * \return Updated CRC-32 value.
+ */
+uint32_t spdk_crc32_ieee_update(const void *buf, size_t len, uint32_t crc);
+
+/**
+ * Calculate a partial CRC-32C checksum.
+ *
+ * \param buf Data buffer to checksum.
+ * \param len Length of buf in bytes.
+ * \param crc Previous CRC-32C value.
+ * \return Updated CRC-32C value.
+ */
+uint32_t spdk_crc32c_update(const void *buf, size_t len, uint32_t crc);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_CRC32_H */
diff --git a/src/spdk/include/spdk/dif.h b/src/spdk/include/spdk/dif.h
new file mode 100644
index 000000000..7d4006dab
--- /dev/null
+++ b/src/spdk/include/spdk/dif.h
@@ -0,0 +1,457 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_DIF_H
+#define SPDK_DIF_H
+
+#include "spdk/stdinc.h"
+#include "spdk/assert.h"
+
+#define SPDK_DIF_FLAGS_REFTAG_CHECK (1U << 26)
+#define SPDK_DIF_FLAGS_APPTAG_CHECK (1U << 27)
+#define SPDK_DIF_FLAGS_GUARD_CHECK (1U << 28)
+
+#define SPDK_DIF_REFTAG_ERROR 0x1
+#define SPDK_DIF_APPTAG_ERROR 0x2
+#define SPDK_DIF_GUARD_ERROR 0x4
+#define SPDK_DIF_DATA_ERROR 0x8
+
+enum spdk_dif_type {
+ SPDK_DIF_DISABLE = 0,
+ SPDK_DIF_TYPE1 = 1,
+ SPDK_DIF_TYPE2 = 2,
+ SPDK_DIF_TYPE3 = 3,
+};
+
+enum spdk_dif_check_type {
+ SPDK_DIF_CHECK_TYPE_REFTAG = 1,
+ SPDK_DIF_CHECK_TYPE_APPTAG = 2,
+ SPDK_DIF_CHECK_TYPE_GUARD = 3,
+};
+
+struct spdk_dif {
+ uint16_t guard;
+ uint16_t app_tag;
+ uint32_t ref_tag;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_dif) == 8, "Incorrect size");
+
+/** DIF context information */
+struct spdk_dif_ctx {
+ /** Block size */
+ uint32_t block_size;
+
+ /** Metadata size */
+ uint32_t md_size;
+
+ /** Metadata location */
+ bool md_interleave;
+
+ /** Interval for guard computation for DIF */
+ uint32_t guard_interval;
+
+ /** DIF type */
+ enum spdk_dif_type dif_type;
+
+ /* Flags to specify the DIF action */
+ uint32_t dif_flags;
+
+ /* Initial reference tag */
+ uint32_t init_ref_tag;
+
+ /** Application tag */
+ uint16_t app_tag;
+
+ /* Application tag mask */
+ uint16_t apptag_mask;
+
+ /* Byte offset from the start of the whole data buffer. */
+ uint32_t data_offset;
+
+ /* Offset to initial reference tag */
+ uint32_t ref_tag_offset;
+
+ /** Guard value of the last data block.
+ *
+ * Interim guard value is set if the last data block is partial, or
+ * seed value is set otherwise.
+ */
+ uint16_t last_guard;
+
+ /* Seed value for guard computation */
+ uint16_t guard_seed;
+
+ /* Remapped initial reference tag. */
+ uint32_t remapped_init_ref_tag;
+};
+
+/** DIF error information */
+struct spdk_dif_error {
+ /** Error type */
+ uint8_t err_type;
+
+ /** Expected value */
+ uint32_t expected;
+
+ /** Actual value */
+ uint32_t actual;
+
+ /** Offset the error occurred at, block based */
+ uint32_t err_offset;
+};
+
+/**
+ * Initialize DIF context.
+ *
+ * \param ctx DIF context.
+ * \param block_size Block size in a block.
+ * \param md_size Metadata size in a block.
+ * \param md_interleave If true, metadata is interleaved with block data.
+ * If false, metadata is separated with block data.
+ * \param dif_loc DIF location. If true, DIF is set in the first 8 bytes of metadata.
+ * If false, DIF is in the last 8 bytes of metadata.
+ * \param dif_type Type of DIF.
+ * \param dif_flags Flag to specify the DIF action.
+ * \param init_ref_tag Initial reference tag. For type 1, this is the
+ * starting block address.
+ * \param apptag_mask Application tag mask.
+ * \param app_tag Application tag.
+ * \param data_offset Byte offset from the start of the whole data buffer.
+ * \param guard_seed Seed value for guard computation.
+ *
+ * \return 0 on success and negated errno otherwise.
+ */
+int spdk_dif_ctx_init(struct spdk_dif_ctx *ctx, uint32_t block_size, uint32_t md_size,
+ bool md_interleave, bool dif_loc, enum spdk_dif_type dif_type, uint32_t dif_flags,
+ uint32_t init_ref_tag, uint16_t apptag_mask, uint16_t app_tag,
+ uint32_t data_offset, uint16_t guard_seed);
+
+/**
+ * Update date offset of DIF context.
+ *
+ * \param ctx DIF context.
+ * \param data_offset Byte offset from the start of the whole data buffer.
+ */
+void spdk_dif_ctx_set_data_offset(struct spdk_dif_ctx *ctx, uint32_t data_offset);
+
+/**
+ * Set remapped initial reference tag of DIF context.
+ *
+ * \param ctx DIF context.
+ * \param remapped_init_ref_tag Remapped initial reference tag. For type 1, this is the
+ * starting block address.
+ */
+void spdk_dif_ctx_set_remapped_init_ref_tag(struct spdk_dif_ctx *ctx,
+ uint32_t remapped_init_ref_tag);
+
+/**
+ * Generate DIF for extended LBA payload.
+ *
+ * \param iovs iovec array describing the extended LBA payload.
+ * \param iovcnt Number of elements in the iovec array.
+ * \param num_blocks Number of blocks of the payload.
+ * \param ctx DIF context.
+ *
+ * \return 0 on success and negated errno otherwise.
+ */
+int spdk_dif_generate(struct iovec *iovs, int iovcnt, uint32_t num_blocks,
+ const struct spdk_dif_ctx *ctx);
+
+/**
+ * Verify DIF for extended LBA payload.
+ *
+ * \param iovs iovec array describing the extended LBA payload.
+ * \param iovcnt Number of elements in the iovec array.
+ * \param num_blocks Number of blocks of the payload.
+ * \param ctx DIF context.
+ * \param err_blk Error information of the block in which DIF error is found.
+ *
+ * \return 0 on success and negated errno otherwise.
+ */
+int spdk_dif_verify(struct iovec *iovs, int iovcnt, uint32_t num_blocks,
+ const struct spdk_dif_ctx *ctx, struct spdk_dif_error *err_blk);
+
+/**
+ * Calculate CRC-32C checksum for extended LBA payload.
+ *
+ * \param iovs iovec array describing the extended LBA payload.
+ * \param iovcnt Number of elements in the iovec array.
+ * \param num_blocks Number of blocks of the payload.
+ * \param crc32c Initial and updated CRC-32C value.
+ * \param ctx DIF context.
+ *
+ * \return 0 on success and negated errno otherwise.
+ */
+int spdk_dif_update_crc32c(struct iovec *iovs, int iovcnt, uint32_t num_blocks,
+ uint32_t *crc32c, const struct spdk_dif_ctx *ctx);
+
+/**
+ * Copy data and generate DIF for extended LBA payload.
+ *
+ * \param iovs iovec array describing the LBA payload.
+ * \param iovcnt Number of elements in the iovec array.
+ * \param bounce_iov A contiguous buffer forming extended LBA payload.
+ * \param num_blocks Number of blocks of the LBA payload.
+ * \param ctx DIF context.
+ *
+ * \return 0 on success and negated errno otherwise.
+ */
+int spdk_dif_generate_copy(struct iovec *iovs, int iovcnt, struct iovec *bounce_iov,
+ uint32_t num_blocks, const struct spdk_dif_ctx *ctx);
+
+/**
+ * Verify DIF and copy data for extended LBA payload.
+ *
+ * \param iovs iovec array describing the LBA payload.
+ * \param iovcnt Number of elements in the iovec array.
+ * \param bounce_iov A contiguous buffer forming extended LBA payload.
+ * \param num_blocks Number of blocks of the LBA payload.
+ * \param ctx DIF context.
+ * \param err_blk Error information of the block in which DIF error is found.
+ *
+ * \return 0 on success and negated errno otherwise.
+ */
+int spdk_dif_verify_copy(struct iovec *iovs, int iovcnt, struct iovec *bounce_iov,
+ uint32_t num_blocks, const struct spdk_dif_ctx *ctx,
+ struct spdk_dif_error *err_blk);
+
+/**
+ * Inject bit flip error to extended LBA payload.
+ *
+ * \param iovs iovec array describing the extended LBA payload.
+ * \param iovcnt Number of elements in the iovec array.
+ * \param num_blocks Number of blocks of the payload.
+ * \param ctx DIF context.
+ * \param inject_flags Flags to specify the action of error injection.
+ * \param inject_offset Offset, in blocks, to which error is injected.
+ * If multiple error is injected, only the last injection is stored.
+ *
+ * \return 0 on success and negated errno otherwise including no metadata.
+ */
+int spdk_dif_inject_error(struct iovec *iovs, int iovcnt, uint32_t num_blocks,
+ const struct spdk_dif_ctx *ctx, uint32_t inject_flags,
+ uint32_t *inject_offset);
+
+/**
+ * Generate DIF for separate metadata payload.
+ *
+ * \param iovs iovec array describing the LBA payload.
+ * \params iovcnt Number of elements in iovs.
+ * \param md_iov A contiguous buffer for metadata.
+ * \param num_blocks Number of blocks of the separate metadata payload.
+ * \param ctx DIF context.
+ *
+ * \return 0 on success and negated errno otherwise.
+ */
+int spdk_dix_generate(struct iovec *iovs, int iovcnt, struct iovec *md_iov,
+ uint32_t num_blocks, const struct spdk_dif_ctx *ctx);
+
+/**
+ * Verify DIF for separate metadata payload.
+ *
+ * \param iovs iovec array describing the LBA payload.
+ * \params iovcnt Number of elements in iovs.
+ * \param md_iov A contiguous buffer for metadata.
+ * \param num_blocks Number of blocks of the separate metadata payload.
+ * \param ctx DIF context.
+ * \param err_blk Error information of the block in which DIF error is found.
+ *
+ * \return 0 on success and negated errno otherwise.
+ */
+int spdk_dix_verify(struct iovec *iovs, int iovcnt, struct iovec *md_iov,
+ uint32_t num_blocks, const struct spdk_dif_ctx *ctx,
+ struct spdk_dif_error *err_blk);
+
+/**
+ * Inject bit flip error to separate metadata payload.
+ *
+ * \param iovs iovec array describing the extended LBA payload.
+ * \param iovcnt Number of elements in the iovec array.
+ * \param md_iov A contiguous buffer for metadata.
+ * \param num_blocks Number of blocks of the payload.
+ * \param ctx DIF context.
+ * \param inject_flags Flag to specify the action of error injection.
+ * \param inject_offset Offset, in blocks, to which error is injected.
+ * If multiple error is injected, only the last injection is stored.
+ *
+ * \return 0 on success and negated errno otherwise including no metadata.
+ */
+int spdk_dix_inject_error(struct iovec *iovs, int iovcnt, struct iovec *md_iov,
+ uint32_t num_blocks, const struct spdk_dif_ctx *ctx,
+ uint32_t inject_flags, uint32_t *inject_offset);
+
+/**
+ * Setup iovec array to leave a space for metadata for each block.
+ *
+ * This function is used to leave a space for metadata for each block when
+ * the network socket reads data, or to make the network socket ignore a
+ * space for metadata for each block when the network socket writes data.
+ * This function removes the necessity of data copy in the SPDK application
+ * during DIF insertion and strip.
+ *
+ * When the extended LBA payload is splitted into multiple data segments,
+ * start of each data segment is passed through the DIF context. data_offset
+ * and data_len is within a data segment.
+ *
+ * \param iovs iovec array set by this function.
+ * \param iovcnt Number of elements in the iovec array.
+ * \param buf_iovs SGL for the buffer to create extended LBA payload.
+ * \param buf_iovcnt Size of the SGL for the buffer to create extended LBA payload.
+ * \param data_offset Offset to store the next incoming data in the current data segment.
+ * \param data_len Expected length of the newly read data in the current data segment of
+ * the extended LBA payload.
+ * \param mapped_len Output parameter that will contain data length mapped by
+ * the iovec array.
+ * \param ctx DIF context.
+ *
+ * \return Number of used elements in the iovec array on success or negated
+ * errno otherwise.
+ */
+int spdk_dif_set_md_interleave_iovs(struct iovec *iovs, int iovcnt,
+ struct iovec *buf_iovs, int buf_iovcnt,
+ uint32_t data_offset, uint32_t data_len,
+ uint32_t *mapped_len,
+ const struct spdk_dif_ctx *ctx);
+
+/**
+ * Generate and insert DIF into metadata space for newly read data block.
+ *
+ * When the extended LBA payload is splitted into multiple data segments,
+ * start of each data segment is passed through the DIF context. data_offset
+ * and data_len is within a data segment.
+ *
+ * \param iovs iovec array describing the extended LBA payload.
+ * \param iovcnt Number of elements in the iovec array.
+ * \param data_offset Offset to the newly read data in the current data segment of
+ * the extended LBA payload.
+ * \param data_len Length of the newly read data in the current data segment of
+ * the extended LBA payload.
+ * \param ctx DIF context.
+ *
+ * \return 0 on success and negated errno otherwise.
+ */
+int spdk_dif_generate_stream(struct iovec *iovs, int iovcnt,
+ uint32_t data_offset, uint32_t data_len,
+ struct spdk_dif_ctx *ctx);
+
+/**
+ * Verify DIF for the to-be-written block of the extended LBA payload.
+ *
+ * \param iovs iovec array describing the extended LBA payload.
+ * \param iovcnt Number of elements in the iovec array.
+ * \param data_offset Offset to the to-be-written data in the extended LBA payload.
+ * \param data_len Length of the to-be-written data in the extended LBA payload.
+ * \param ctx DIF context.
+ *
+ * \return 0 on success and negated errno otherwise.
+ */
+int spdk_dif_verify_stream(struct iovec *iovs, int iovcnt,
+ uint32_t data_offset, uint32_t data_len,
+ struct spdk_dif_ctx *ctx,
+ struct spdk_dif_error *err_blk);
+
+/**
+ * Calculate CRC-32C checksum of the specified range in the extended LBA payload.
+ *
+ * \param iovs iovec array describing the extended LBA payload.
+ * \param iovcnt Number of elements in the iovec array.
+ * \param data_offset Offset to the range
+ * \param data_len Length of the range
+ * \param crc32c Initial and updated CRC-32C value.
+ * \param ctx DIF context.
+ *
+ * \return 0 on success and negated errno otherwise.
+ */
+int spdk_dif_update_crc32c_stream(struct iovec *iovs, int iovcnt,
+ uint32_t data_offset, uint32_t data_len,
+ uint32_t *crc32c, const struct spdk_dif_ctx *ctx);
+/**
+ * Convert offset and size from LBA based to extended LBA based.
+ *
+ * \param data_offset Data offset
+ * \param data_len Data length
+ * \param buf_offset Buffer offset converted from data offset.
+ * \param buf_len Buffer length converted from data length
+ * \param ctx DIF context.
+ */
+void spdk_dif_get_range_with_md(uint32_t data_offset, uint32_t data_len,
+ uint32_t *buf_offset, uint32_t *buf_len,
+ const struct spdk_dif_ctx *ctx);
+
+/**
+ * Convert length from LBA based to extended LBA based.
+ *
+ * \param data_len Data length
+ * \param ctx DIF context.
+ *
+ * \return Extended LBA based data length.
+ */
+uint32_t spdk_dif_get_length_with_md(uint32_t data_len, const struct spdk_dif_ctx *ctx);
+
+/**
+ * Remap reference tag for extended LBA payload.
+ *
+ * When using stacked virtual bdev (e.g. split virtual bdev), block address space for I/O
+ * will be remapped during I/O processing and so reference tag will have to be remapped
+ * accordingly. This patch is for that case.
+ *
+ * \param iovs iovec array describing the extended LBA payload.
+ * \param iovcnt Number of elements in the iovec array.
+ * \param num_blocks Number of blocks of the payload.
+ * \param ctx DIF context.
+ * \param err_blk Error information of the block in which DIF error is found.
+ *
+ * \return 0 on success and negated errno otherwise.
+ */
+int spdk_dif_remap_ref_tag(struct iovec *iovs, int iovcnt, uint32_t num_blocks,
+ const struct spdk_dif_ctx *dif_ctx,
+ struct spdk_dif_error *err_blk);
+
+/**
+ * Remap reference tag for separate metadata payload.
+ *
+ * When using stacked virtual bdev (e.g. split virtual bdev), block address space for I/O
+ * will be remapped during I/O processing and so reference tag will have to be remapped
+ * accordingly. This patch is for that case.
+ *
+ * \param md_iov A contiguous buffer for metadata.
+ * \param num_blocks Number of blocks of the payload.
+ * \param ctx DIF context.
+ * \param err_blk Error information of the block in which DIF error is found.
+ *
+ * \return 0 on success and negated errno otherwise.
+ */
+int spdk_dix_remap_ref_tag(struct iovec *md_iov, uint32_t num_blocks,
+ const struct spdk_dif_ctx *dif_ctx,
+ struct spdk_dif_error *err_blk);
+#endif /* SPDK_DIF_H */
diff --git a/src/spdk/include/spdk/endian.h b/src/spdk/include/spdk/endian.h
new file mode 100644
index 000000000..116b7fb9c
--- /dev/null
+++ b/src/spdk/include/spdk/endian.h
@@ -0,0 +1,178 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * Endian conversion functions
+ */
+
+#ifndef SPDK_ENDIAN_H
+#define SPDK_ENDIAN_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline uint16_t
+from_be16(const void *ptr)
+{
+ const uint8_t *tmp = (const uint8_t *)ptr;
+ return (((uint16_t)tmp[0] << 8) | tmp[1]);
+}
+
+static inline void
+to_be16(void *out, uint16_t in)
+{
+ uint8_t *tmp = (uint8_t *)out;
+ tmp[0] = (in >> 8) & 0xFF;
+ tmp[1] = in & 0xFF;
+}
+
+static inline uint32_t
+from_be32(const void *ptr)
+{
+ const uint8_t *tmp = (const uint8_t *)ptr;
+ return (((uint32_t)tmp[0] << 24) |
+ ((uint32_t)tmp[1] << 16) |
+ ((uint32_t)tmp[2] << 8) |
+ ((uint32_t)tmp[3]));
+}
+
+static inline void
+to_be32(void *out, uint32_t in)
+{
+ uint8_t *tmp = (uint8_t *)out;
+ tmp[0] = (in >> 24) & 0xFF;
+ tmp[1] = (in >> 16) & 0xFF;
+ tmp[2] = (in >> 8) & 0xFF;
+ tmp[3] = in & 0xFF;
+}
+
+static inline uint64_t
+from_be64(const void *ptr)
+{
+ const uint8_t *tmp = (const uint8_t *)ptr;
+ return (((uint64_t)tmp[0] << 56) |
+ ((uint64_t)tmp[1] << 48) |
+ ((uint64_t)tmp[2] << 40) |
+ ((uint64_t)tmp[3] << 32) |
+ ((uint64_t)tmp[4] << 24) |
+ ((uint64_t)tmp[5] << 16) |
+ ((uint64_t)tmp[6] << 8) |
+ ((uint64_t)tmp[7]));
+}
+
+static inline void
+to_be64(void *out, uint64_t in)
+{
+ uint8_t *tmp = (uint8_t *)out;
+ tmp[0] = (in >> 56) & 0xFF;
+ tmp[1] = (in >> 48) & 0xFF;
+ tmp[2] = (in >> 40) & 0xFF;
+ tmp[3] = (in >> 32) & 0xFF;
+ tmp[4] = (in >> 24) & 0xFF;
+ tmp[5] = (in >> 16) & 0xFF;
+ tmp[6] = (in >> 8) & 0xFF;
+ tmp[7] = in & 0xFF;
+}
+
+static inline uint16_t
+from_le16(const void *ptr)
+{
+ const uint8_t *tmp = (const uint8_t *)ptr;
+ return (((uint16_t)tmp[1] << 8) | tmp[0]);
+}
+
+static inline void
+to_le16(void *out, uint16_t in)
+{
+ uint8_t *tmp = (uint8_t *)out;
+ tmp[1] = (in >> 8) & 0xFF;
+ tmp[0] = in & 0xFF;
+}
+
+static inline uint32_t
+from_le32(const void *ptr)
+{
+ const uint8_t *tmp = (const uint8_t *)ptr;
+ return (((uint32_t)tmp[3] << 24) |
+ ((uint32_t)tmp[2] << 16) |
+ ((uint32_t)tmp[1] << 8) |
+ ((uint32_t)tmp[0]));
+}
+
+static inline void
+to_le32(void *out, uint32_t in)
+{
+ uint8_t *tmp = (uint8_t *)out;
+ tmp[3] = (in >> 24) & 0xFF;
+ tmp[2] = (in >> 16) & 0xFF;
+ tmp[1] = (in >> 8) & 0xFF;
+ tmp[0] = in & 0xFF;
+}
+
+static inline uint64_t
+from_le64(const void *ptr)
+{
+ const uint8_t *tmp = (const uint8_t *)ptr;
+ return (((uint64_t)tmp[7] << 56) |
+ ((uint64_t)tmp[6] << 48) |
+ ((uint64_t)tmp[5] << 40) |
+ ((uint64_t)tmp[4] << 32) |
+ ((uint64_t)tmp[3] << 24) |
+ ((uint64_t)tmp[2] << 16) |
+ ((uint64_t)tmp[1] << 8) |
+ ((uint64_t)tmp[0]));
+}
+
+static inline void
+to_le64(void *out, uint64_t in)
+{
+ uint8_t *tmp = (uint8_t *)out;
+ tmp[7] = (in >> 56) & 0xFF;
+ tmp[6] = (in >> 48) & 0xFF;
+ tmp[5] = (in >> 40) & 0xFF;
+ tmp[4] = (in >> 32) & 0xFF;
+ tmp[3] = (in >> 24) & 0xFF;
+ tmp[2] = (in >> 16) & 0xFF;
+ tmp[1] = (in >> 8) & 0xFF;
+ tmp[0] = in & 0xFF;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/env.h b/src/spdk/include/spdk/env.h
new file mode 100644
index 000000000..3e2018ac8
--- /dev/null
+++ b/src/spdk/include/spdk/env.h
@@ -0,0 +1,1301 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * Copyright (c) NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Encapsulated third-party dependencies
+ */
+
+#ifndef SPDK_ENV_H
+#define SPDK_ENV_H
+
+#include "spdk/stdinc.h"
+#include "spdk/queue.h"
+#include "spdk/pci_ids.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SPDK_ENV_SOCKET_ID_ANY (-1)
+#define SPDK_ENV_LCORE_ID_ANY (UINT32_MAX)
+
+/**
+ * Memory is dma-safe.
+ */
+#define SPDK_MALLOC_DMA 0x01
+
+/**
+ * Memory is sharable across process boundaries.
+ */
+#define SPDK_MALLOC_SHARE 0x02
+
+#define SPDK_MAX_MEMZONE_NAME_LEN 32
+#define SPDK_MAX_MEMPOOL_NAME_LEN 29
+
+/**
+ * Memzone flags
+ */
+#define SPDK_MEMZONE_NO_IOVA_CONTIG 0x00100000 /**< no iova contiguity */
+
+/**
+ * \brief Environment initialization options
+ */
+struct spdk_env_opts {
+ const char *name;
+ const char *core_mask;
+ int shm_id;
+ int mem_channel;
+ int master_core;
+ int mem_size;
+ bool no_pci;
+ bool hugepage_single_segments;
+ bool unlink_hugepage;
+ size_t num_pci_addr;
+ const char *hugedir;
+ struct spdk_pci_addr *pci_blacklist;
+ struct spdk_pci_addr *pci_whitelist;
+ const char *iova_mode;
+ uint64_t base_virtaddr;
+
+ /** Opaque context for use of the env implementation. */
+ void *env_context;
+};
+
+/**
+ * Allocate dma/sharable memory based on a given dma_flg. It is a memory buffer
+ * with the given size, alignment and socket id.
+ *
+ * \param size Size in bytes.
+ * \param align If non-zero, the allocated buffer is aligned to a multiple of
+ * align. In this case, it must be a power of two. The returned buffer is always
+ * aligned to at least cache line size.
+ * \param phys_addr **Deprecated**. Please use spdk_vtophys() for retrieving physical
+ * addresses. A pointer to the variable to hold the physical address of
+ * the allocated buffer is passed. If NULL, the physical address is not returned.
+ * \param socket_id Socket ID to allocate memory on, or SPDK_ENV_SOCKET_ID_ANY
+ * for any socket.
+ * \param flags Combination of SPDK_MALLOC flags (\ref SPDK_MALLOC_DMA, \ref SPDK_MALLOC_SHARE).
+ * At least one flag must be specified.
+ *
+ * \return a pointer to the allocated memory buffer.
+ */
+void *spdk_malloc(size_t size, size_t align, uint64_t *phys_addr, int socket_id, uint32_t flags);
+
+/**
+ * Allocate dma/sharable memory based on a given dma_flg. It is a memory buffer
+ * with the given size, alignment and socket id. Also, the buffer will be zeroed.
+ *
+ * \param size Size in bytes.
+ * \param align If non-zero, the allocated buffer is aligned to a multiple of
+ * align. In this case, it must be a power of two. The returned buffer is always
+ * aligned to at least cache line size.
+ * \param phys_addr **Deprecated**. Please use spdk_vtophys() for retrieving physical
+ * addresses. A pointer to the variable to hold the physical address of
+ * the allocated buffer is passed. If NULL, the physical address is not returned.
+ * \param socket_id Socket ID to allocate memory on, or SPDK_ENV_SOCKET_ID_ANY
+ * for any socket.
+ * \param flags Combination of SPDK_MALLOC flags (\ref SPDK_MALLOC_DMA, \ref SPDK_MALLOC_SHARE).
+ *
+ * \return a pointer to the allocated memory buffer.
+ */
+void *spdk_zmalloc(size_t size, size_t align, uint64_t *phys_addr, int socket_id, uint32_t flags);
+
+/**
+ * Resize a dma/sharable memory buffer with the given new size and alignment.
+ * Existing contents are preserved.
+ *
+ * \param buf Buffer to resize.
+ * \param size Size in bytes.
+ * \param align If non-zero, the allocated buffer is aligned to a multiple of
+ * align. In this case, it must be a power of two. The returned buffer is always
+ * aligned to at least cache line size.
+ *
+ * \return a pointer to the resized memory buffer.
+ */
+void *spdk_realloc(void *buf, size_t size, size_t align);
+
+/**
+ * Free buffer memory that was previously allocated with spdk_malloc() or spdk_zmalloc().
+ *
+ * \param buf Buffer to free.
+ */
+void spdk_free(void *buf);
+
+/**
+ * Initialize the default value of opts.
+ *
+ * \param opts Data structure where SPDK will initialize the default options.
+ */
+void spdk_env_opts_init(struct spdk_env_opts *opts);
+
+/**
+ * Initialize or reinitialize the environment library.
+ * For initialization, this must be called prior to using any other functions
+ * in this library. For reinitialization, the parameter `opts` must be set to
+ * NULL and this must be called after the environment library was finished by
+ * spdk_env_fini() within the same process.
+ *
+ * \param opts Environment initialization options.
+ * \return 0 on success, or negative errno on failure.
+ */
+int spdk_env_init(const struct spdk_env_opts *opts);
+
+/**
+ * Release any resources of the environment library that were allocated with
+ * spdk_env_init(). After this call, no SPDK env function calls may be made.
+ * It is expected that common usage of this function is to call it just before
+ * terminating the process or before reinitializing the environment library
+ * within the same process.
+ */
+void spdk_env_fini(void);
+
+/**
+ * Allocate a pinned memory buffer with the given size and alignment.
+ *
+ * \param size Size in bytes.
+ * \param align If non-zero, the allocated buffer is aligned to a multiple of
+ * align. In this case, it must be a power of two. The returned buffer is always
+ * aligned to at least cache line size.
+ * \param phys_addr A pointer to the variable to hold the physical address of
+ * the allocated buffer is passed. If NULL, the physical address is not returned.
+ *
+ * \return a pointer to the allocated memory buffer.
+ */
+void *spdk_dma_malloc(size_t size, size_t align, uint64_t *phys_addr);
+
+/**
+ * Allocate a pinned, memory buffer with the given size, alignment and socket id.
+ *
+ * \param size Size in bytes.
+ * \param align If non-zero, the allocated buffer is aligned to a multiple of
+ * align. In this case, it must be a power of two. The returned buffer is always
+ * aligned to at least cache line size.
+ * \param phys_addr A pointer to the variable to hold the physical address of
+ * the allocated buffer is passed. If NULL, the physical address is not returned.
+ * \param socket_id Socket ID to allocate memory on, or SPDK_ENV_SOCKET_ID_ANY
+ * for any socket.
+ *
+ * \return a pointer to the allocated memory buffer.
+ */
+void *spdk_dma_malloc_socket(size_t size, size_t align, uint64_t *phys_addr, int socket_id);
+
+/**
+ * Allocate a pinned memory buffer with the given size and alignment. The buffer
+ * will be zeroed.
+ *
+ * \param size Size in bytes.
+ * \param align If non-zero, the allocated buffer is aligned to a multiple of
+ * align. In this case, it must be a power of two. The returned buffer is always
+ * aligned to at least cache line size.
+ * \param phys_addr A pointer to the variable to hold the physical address of
+ * the allocated buffer is passed. If NULL, the physical address is not returned.
+ *
+ * \return a pointer to the allocated memory buffer.
+ */
+void *spdk_dma_zmalloc(size_t size, size_t align, uint64_t *phys_addr);
+
+/**
+ * Allocate a pinned memory buffer with the given size, alignment and socket id.
+ * The buffer will be zeroed.
+ *
+ * \param size Size in bytes.
+ * \param align If non-zero, the allocated buffer is aligned to a multiple of
+ * align. In this case, it must be a power of two. The returned buffer is always
+ * aligned to at least cache line size.
+ * \param phys_addr A pointer to the variable to hold the physical address of
+ * the allocated buffer is passed. If NULL, the physical address is not returned.
+ * \param socket_id Socket ID to allocate memory on, or SPDK_ENV_SOCKET_ID_ANY
+ * for any socket.
+ *
+ * \return a pointer to the allocated memory buffer.
+ */
+void *spdk_dma_zmalloc_socket(size_t size, size_t align, uint64_t *phys_addr, int socket_id);
+
+/**
+ * Resize the allocated and pinned memory buffer with the given new size and
+ * alignment. Existing contents are preserved.
+ *
+ * \param buf Buffer to resize.
+ * \param size Size in bytes.
+ * \param align If non-zero, the allocated buffer is aligned to a multiple of
+ * align. In this case, it must be a power of two. The returned buffer is always
+ * aligned to at least cache line size.
+ * \param phys_addr A pointer to the variable to hold the physical address of
+ * the allocated buffer is passed. If NULL, the physical address is not returned.
+ *
+ * \return a pointer to the resized memory buffer.
+ */
+void *spdk_dma_realloc(void *buf, size_t size, size_t align, uint64_t *phys_addr);
+
+/**
+ * Free a memory buffer previously allocated, for example from spdk_dma_zmalloc().
+ * This call is never made from the performance path.
+ *
+ * \param buf Buffer to free.
+ */
+void spdk_dma_free(void *buf);
+
+/**
+ * Reserve a named, process shared memory zone with the given size, socket_id
+ * and flags. Unless `SPDK_MEMZONE_NO_IOVA_CONTIG` flag is provided, the returned
+ * memory will be IOVA contiguous.
+ *
+ * \param name Name to set for this memory zone.
+ * \param len Length in bytes.
+ * \param socket_id Socket ID to allocate memory on, or SPDK_ENV_SOCKET_ID_ANY
+ * for any socket.
+ * \param flags Flags to set for this memory zone.
+ *
+ * \return a pointer to the allocated memory address on success, or NULL on failure.
+ */
+void *spdk_memzone_reserve(const char *name, size_t len, int socket_id, unsigned flags);
+
+/**
+ * Reserve a named, process shared memory zone with the given size, socket_id,
+ * flags and alignment. Unless `SPDK_MEMZONE_NO_IOVA_CONTIG` flag is provided,
+ * the returned memory will be IOVA contiguous.
+ *
+ * \param name Name to set for this memory zone.
+ * \param len Length in bytes.
+ * \param socket_id Socket ID to allocate memory on, or SPDK_ENV_SOCKET_ID_ANY
+ * for any socket.
+ * \param flags Flags to set for this memory zone.
+ * \param align Alignment for resulting memzone. Must be a power of 2.
+ *
+ * \return a pointer to the allocated memory address on success, or NULL on failure.
+ */
+void *spdk_memzone_reserve_aligned(const char *name, size_t len, int socket_id,
+ unsigned flags, unsigned align);
+
+/**
+ * Lookup the memory zone identified by the given name.
+ *
+ * \param name Name of the memory zone.
+ *
+ * \return a pointer to the reserved memory address on success, or NULL on failure.
+ */
+void *spdk_memzone_lookup(const char *name);
+
+/**
+ * Free the memory zone identified by the given name.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_memzone_free(const char *name);
+
+/**
+ * Dump debug information about all memzones.
+ *
+ * \param f File to write debug information to.
+ */
+void spdk_memzone_dump(FILE *f);
+
+struct spdk_mempool;
+
+#define SPDK_MEMPOOL_DEFAULT_CACHE_SIZE SIZE_MAX
+
+/**
+ * Create a thread-safe memory pool.
+ *
+ * \param name Name for the memory pool.
+ * \param count Count of elements.
+ * \param ele_size Element size in bytes.
+ * \param cache_size How many elements may be cached in per-core caches. Use
+ * SPDK_MEMPOOL_DEFAULT_CACHE_SIZE for a reasonable default, or 0 for no per-core cache.
+ * \param socket_id Socket ID to allocate memory on, or SPDK_ENV_SOCKET_ID_ANY
+ * for any socket.
+ *
+ * \return a pointer to the created memory pool.
+ */
+struct spdk_mempool *spdk_mempool_create(const char *name, size_t count,
+ size_t ele_size, size_t cache_size, int socket_id);
+
+/**
+ * An object callback function for memory pool.
+ *
+ * Used by spdk_mempool_create_ctor().
+ */
+typedef void (spdk_mempool_obj_cb_t)(struct spdk_mempool *mp,
+ void *opaque, void *obj, unsigned obj_idx);
+
+/**
+ * Create a thread-safe memory pool with user provided initialization function
+ * and argument.
+ *
+ * \param name Name for the memory pool.
+ * \param count Count of elements.
+ * \param ele_size Element size in bytes.
+ * \param cache_size How many elements may be cached in per-core caches. Use
+ * SPDK_MEMPOOL_DEFAULT_CACHE_SIZE for a reasonable default, or 0 for no per-core cache.
+ * \param socket_id Socket ID to allocate memory on, or SPDK_ENV_SOCKET_ID_ANY
+ * for any socket.
+ * \param obj_init User provided object calllback initialization function.
+ * \param obj_init_arg User provided callback initialization function argument.
+ *
+ * \return a pointer to the created memory pool.
+ */
+struct spdk_mempool *spdk_mempool_create_ctor(const char *name, size_t count,
+ size_t ele_size, size_t cache_size, int socket_id,
+ spdk_mempool_obj_cb_t *obj_init, void *obj_init_arg);
+
+/**
+ * Get the name of a memory pool.
+ *
+ * \param mp Memory pool to query.
+ *
+ * \return the name of the memory pool.
+ */
+char *spdk_mempool_get_name(struct spdk_mempool *mp);
+
+/**
+ * Free a memory pool.
+ */
+void spdk_mempool_free(struct spdk_mempool *mp);
+
+/**
+ * Get an element from a memory pool. If no elements remain, return NULL.
+ *
+ * \param mp Memory pool to query.
+ *
+ * \return a pointer to the element.
+ */
+void *spdk_mempool_get(struct spdk_mempool *mp);
+
+/**
+ * Get multiple elements from a memory pool.
+ *
+ * \param mp Memory pool to get multiple elements from.
+ * \param ele_arr Array of the elements to fill.
+ * \param count Count of elements to get.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_mempool_get_bulk(struct spdk_mempool *mp, void **ele_arr, size_t count);
+
+/**
+ * Put an element back into the memory pool.
+ *
+ * \param mp Memory pool to put element back into.
+ * \param ele Element to put.
+ */
+void spdk_mempool_put(struct spdk_mempool *mp, void *ele);
+
+/**
+ * Put multiple elements back into the memory pool.
+ *
+ * \param mp Memory pool to put multiple elements back into.
+ * \param ele_arr Array of the elements to put.
+ * \param count Count of elements to put.
+ */
+void spdk_mempool_put_bulk(struct spdk_mempool *mp, void **ele_arr, size_t count);
+
+/**
+ * Get the number of entries in the memory pool.
+ *
+ * \param pool Memory pool to query.
+ *
+ * \return the number of entries in the memory pool.
+ */
+size_t spdk_mempool_count(const struct spdk_mempool *pool);
+
+/**
+ * Iterate through all elements of the pool and call a function on each one.
+ *
+ * \param mp Memory pool to iterate on.
+ * \param obj_cb Function to call on each element.
+ * \param obj_cb_arg Opaque pointer passed to the callback function.
+ *
+ * \return Number of elements iterated.
+ */
+uint32_t spdk_mempool_obj_iter(struct spdk_mempool *mp, spdk_mempool_obj_cb_t obj_cb,
+ void *obj_cb_arg);
+
+/**
+ * Lookup the memory pool identified by the given name.
+ *
+ * \param name Name of the memory pool.
+ *
+ * \return a pointer to the memory pool on success, or NULL on failure.
+ */
+struct spdk_mempool *spdk_mempool_lookup(const char *name);
+
+/**
+ * Get the number of dedicated CPU cores utilized by this env abstraction.
+ *
+ * \return the number of dedicated CPU cores.
+ */
+uint32_t spdk_env_get_core_count(void);
+
+/**
+ * Get the CPU core index of the current thread.
+ *
+ * This will only function when called from threads set up by
+ * this environment abstraction. For any other threads \c SPDK_ENV_LCORE_ID_ANY
+ * will be returned.
+ *
+ * \return the CPU core index of the current thread.
+ */
+uint32_t spdk_env_get_current_core(void);
+
+/**
+ * Get the index of the first dedicated CPU core for this application.
+ *
+ * \return the index of the first dedicated CPU core.
+ */
+uint32_t spdk_env_get_first_core(void);
+
+/**
+ * Get the index of the last dedicated CPU core for this application.
+ *
+ * \return the index of the last dedicated CPU core.
+ */
+uint32_t spdk_env_get_last_core(void);
+
+/**
+ * Get the index of the next dedicated CPU core for this application.
+ *
+ * If there is no next core, return UINT32_MAX.
+ *
+ * \param prev_core Index of previous core.
+ *
+ * \return the index of the next dedicated CPU core.
+ */
+uint32_t spdk_env_get_next_core(uint32_t prev_core);
+
+#define SPDK_ENV_FOREACH_CORE(i) \
+ for (i = spdk_env_get_first_core(); \
+ i < UINT32_MAX; \
+ i = spdk_env_get_next_core(i))
+
+/**
+ * Get the socket ID for the given core.
+ *
+ * \param core CPU core to query.
+ *
+ * \return the socket ID for the given core.
+ */
+uint32_t spdk_env_get_socket_id(uint32_t core);
+
+typedef int (*thread_start_fn)(void *);
+
+/**
+ * Launch a thread pinned to the given core. Only a single pinned thread may be
+ * launched per core. Subsequent attempts to launch pinned threads on that core
+ * will fail.
+ *
+ * \param core The core to pin the thread to.
+ * \param fn Entry point on the new thread.
+ * \param arg Argument apssed to thread_start_fn
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_env_thread_launch_pinned(uint32_t core, thread_start_fn fn, void *arg);
+
+/**
+ * Wait for all threads to exit before returning.
+ */
+void spdk_env_thread_wait_all(void);
+
+/**
+ * Check whether the calling process is primary process.
+ *
+ * \return true if the calling process is primary process, or false otherwise.
+ */
+bool spdk_process_is_primary(void);
+
+/**
+ * Get a monotonic timestamp counter.
+ *
+ * \return the monotonic timestamp counter.
+ */
+uint64_t spdk_get_ticks(void);
+
+/**
+ * Get the tick rate of spdk_get_ticks() per second.
+ *
+ * \return the tick rate of spdk_get_ticks() per second.
+ */
+uint64_t spdk_get_ticks_hz(void);
+
+/**
+ * Delay the given number of microseconds.
+ *
+ * \param us Number of microseconds.
+ */
+void spdk_delay_us(unsigned int us);
+
+/**
+ * Pause CPU execution for a short while
+ */
+void spdk_pause(void);
+
+struct spdk_ring;
+
+enum spdk_ring_type {
+ SPDK_RING_TYPE_SP_SC, /* Single-producer, single-consumer */
+ SPDK_RING_TYPE_MP_SC, /* Multi-producer, single-consumer */
+ SPDK_RING_TYPE_MP_MC, /* Multi-producer, multi-consumer */
+};
+
+/**
+ * Create a ring.
+ *
+ * \param type Type for the ring. (SPDK_RING_TYPE_SP_SC or SPDK_RING_TYPE_MP_SC).
+ * \param count Size of the ring in elements.
+ * \param socket_id Socket ID to allocate memory on, or SPDK_ENV_SOCKET_ID_ANY
+ * for any socket.
+ *
+ * \return a pointer to the created ring.
+ */
+struct spdk_ring *spdk_ring_create(enum spdk_ring_type type, size_t count, int socket_id);
+
+/**
+ * Free the ring.
+ *
+ * \param ring Ring to free.
+ */
+void spdk_ring_free(struct spdk_ring *ring);
+
+/**
+ * Get the number of objects in the ring.
+ *
+ * \param ring the ring.
+ *
+ * \return the number of objects in the ring.
+ */
+size_t spdk_ring_count(struct spdk_ring *ring);
+
+/**
+ * Queue the array of objects (with length count) on the ring.
+ *
+ * \param ring A pointer to the ring.
+ * \param objs A pointer to the array to be queued.
+ * \param count Length count of the array of objects.
+ * \param free_space If non-NULL, amount of free space after the enqueue has finished.
+ *
+ * \return the number of objects enqueued.
+ */
+size_t spdk_ring_enqueue(struct spdk_ring *ring, void **objs, size_t count,
+ size_t *free_space);
+
+/**
+ * Dequeue count objects from the ring into the array objs.
+ *
+ * \param ring A pointer to the ring.
+ * \param objs A pointer to the array to be dequeued.
+ * \param count Maximum number of elements to be dequeued.
+ *
+ * \return the number of objects dequeued which is less than 'count'.
+ */
+size_t spdk_ring_dequeue(struct spdk_ring *ring, void **objs, size_t count);
+
+/**
+ * Reports whether the SPDK application is using the IOMMU for DMA
+ *
+ * \return True if we are using the IOMMU, false otherwise.
+ */
+bool spdk_iommu_is_enabled(void);
+
+#define SPDK_VTOPHYS_ERROR (0xFFFFFFFFFFFFFFFFULL)
+
+/**
+ * Get the physical address of a buffer.
+ *
+ * \param buf A pointer to a buffer.
+ * \param size Contains the size of the memory region pointed to by vaddr.
+ * If vaddr is successfully translated, then this is updated with the size of
+ * the memory region for which the translation is valid.
+ *
+ * \return the physical address of this buffer on success, or SPDK_VTOPHYS_ERROR
+ * on failure.
+ */
+uint64_t spdk_vtophys(void *buf, uint64_t *size);
+
+struct spdk_pci_addr {
+ uint32_t domain;
+ uint8_t bus;
+ uint8_t dev;
+ uint8_t func;
+};
+
+struct spdk_pci_id {
+ uint32_t class_id; /**< Class ID or SPDK_PCI_CLASS_ANY_ID. */
+ uint16_t vendor_id; /**< Vendor ID or SPDK_PCI_ANY_ID. */
+ uint16_t device_id; /**< Device ID or SPDK_PCI_ANY_ID. */
+ uint16_t subvendor_id; /**< Subsystem vendor ID or SPDK_PCI_ANY_ID. */
+ uint16_t subdevice_id; /**< Subsystem device ID or SPDK_PCI_ANY_ID. */
+};
+
+/** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */
+#define SPDK_PCI_DRIVER_NEED_MAPPING 0x0001
+/** Device needs PCI BAR mapping with enabled write combining (wc) */
+#define SPDK_PCI_DRIVER_WC_ACTIVATE 0x0002
+
+void spdk_pci_driver_register(const char *name, struct spdk_pci_id *id_table, uint32_t flags);
+
+struct spdk_pci_device {
+ struct spdk_pci_device *parent;
+ void *dev_handle;
+ struct spdk_pci_addr addr;
+ struct spdk_pci_id id;
+ int socket_id;
+ const char *type;
+
+ int (*map_bar)(struct spdk_pci_device *dev, uint32_t bar,
+ void **mapped_addr, uint64_t *phys_addr, uint64_t *size);
+ int (*unmap_bar)(struct spdk_pci_device *dev, uint32_t bar,
+ void *addr);
+ int (*cfg_read)(struct spdk_pci_device *dev, void *value,
+ uint32_t len, uint32_t offset);
+ int (*cfg_write)(struct spdk_pci_device *dev, void *value,
+ uint32_t len, uint32_t offset);
+
+ struct _spdk_pci_device_internal {
+ struct spdk_pci_driver *driver;
+ bool attached;
+ /* optional fd for exclusive access to this device on this process */
+ int claim_fd;
+ bool pending_removal;
+ /* The device was successfully removed on a DPDK interrupt thread,
+ * but to prevent data races we couldn't remove it from the global
+ * device list right away. It'll be removed as soon as possible
+ * on a regular thread when any public pci function is called.
+ */
+ bool removed;
+ TAILQ_ENTRY(spdk_pci_device) tailq;
+ } internal;
+};
+
+typedef int (*spdk_pci_enum_cb)(void *enum_ctx, struct spdk_pci_device *pci_dev);
+
+#define SPDK_PCI_DEVICE(vend, dev) \
+ .class_id = SPDK_PCI_CLASS_ANY_ID, \
+ .vendor_id = (vend), \
+ .device_id = (dev), \
+ .subvendor_id = SPDK_PCI_ANY_ID, \
+ .subdevice_id = SPDK_PCI_ANY_ID
+
+#define SPDK_PCI_DRIVER_REGISTER(name, id_table, flags) \
+__attribute__((constructor)) static void pci_drv ## _register(void) \
+{ \
+ spdk_pci_driver_register(name, id_table, flags); \
+}
+
+/**
+ * Get the VMD PCI driver object.
+ *
+ * \return PCI driver.
+ */
+struct spdk_pci_driver *spdk_pci_vmd_get_driver(void);
+
+/**
+ * Get the I/OAT PCI driver object.
+ *
+ * \return PCI driver.
+ */
+struct spdk_pci_driver *spdk_pci_ioat_get_driver(void);
+
+/**
+ * Get the IDXD PCI driver object.
+ *
+ * \return PCI driver.
+ */
+struct spdk_pci_driver *spdk_pci_idxd_get_driver(void);
+
+/**
+ * Get the Virtio PCI driver object.
+ *
+ * \return PCI driver.
+ */
+struct spdk_pci_driver *spdk_pci_virtio_get_driver(void);
+
+/**
+ * Get PCI driver by name (e.g. "nvme", "vmd", "ioat").
+ */
+struct spdk_pci_driver *spdk_pci_get_driver(const char *name);
+
+/**
+ * Get the NVMe PCI driver object.
+ *
+ * \return PCI driver.
+ */
+struct spdk_pci_driver *spdk_pci_nvme_get_driver(void);
+
+/**
+ * Enumerate all PCI devices supported by the provided driver and try to
+ * attach those that weren't attached yet. The provided callback will be
+ * called for each such device and its return code will decide whether that
+ * device is attached or not. Attached devices have to be manually detached
+ * with spdk_pci_device_detach() to be attach-able again.
+ *
+ * \param driver Driver for a specific device type.
+ * \param enum_cb Callback to be called for each non-attached PCI device.
+ * The return code can be as follows:
+ * -1 - device was not attached, the enumeration is stopped
+ * 0 - device attached successfully, enumeration continues
+ * 1 - device was not attached, enumeration continues
+ * \param enum_ctx Additional context passed to the callback function.
+ *
+ * \return -1 if an internal error occured or the provided callback returned -1,
+ * 0 otherwise
+ */
+int spdk_pci_enumerate(struct spdk_pci_driver *driver, spdk_pci_enum_cb enum_cb, void *enum_ctx);
+
+/**
+ * Begin iterating over enumerated PCI device by calling this function to get
+ * the first PCI device. If there no PCI devices enumerated, return NULL
+ *
+ * \return a pointer to a PCI device on success, NULL otherwise.
+ */
+struct spdk_pci_device *spdk_pci_get_first_device(void);
+
+/**
+ * Continue iterating over enumerated PCI devices.
+ * If no additional PCI devices, return NULL
+ *
+ * \param prev Previous PCI device returned from \ref spdk_pci_get_first_device
+ * or \ref spdk_pci_get_next_device
+ *
+ * \return a pointer to the next PCI device on success, NULL otherwise.
+ */
+struct spdk_pci_device *spdk_pci_get_next_device(struct spdk_pci_device *prev);
+
+/**
+ * Map a PCI BAR in the current process.
+ *
+ * \param dev PCI device.
+ * \param bar BAR number.
+ * \param mapped_addr A variable to store the virtual address of the mapping.
+ * \param phys_addr A variable to store the physical address of the mapping.
+ * \param size A variable to store the size of the bar (in bytes).
+ *
+ * \return 0 on success.
+ */
+int spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar,
+ void **mapped_addr, uint64_t *phys_addr, uint64_t *size);
+
+/**
+ * Unmap a PCI BAR from the current process. This happens automatically when
+ * the PCI device is detached.
+ *
+ * \param dev PCI device.
+ * \param bar BAR number.
+ * \param mapped_addr Virtual address of the bar.
+ *
+ * \return 0 on success.
+ */
+int spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar,
+ void *mapped_addr);
+
+/**
+ * Get the domain of a PCI device.
+ *
+ * \param dev PCI device.
+ *
+ * \return PCI device domain.
+ */
+uint32_t spdk_pci_device_get_domain(struct spdk_pci_device *dev);
+
+/**
+ * Get the bus number of a PCI device.
+ *
+ * \param dev PCI device.
+ *
+ * \return PCI bus number.
+ */
+uint8_t spdk_pci_device_get_bus(struct spdk_pci_device *dev);
+
+/**
+ * Get the device number within the PCI bus the device is on.
+ *
+ * \param dev PCI device.
+ *
+ * \return PCI device number.
+ */
+uint8_t spdk_pci_device_get_dev(struct spdk_pci_device *dev);
+
+/**
+ * Get the particular function number represented by struct spdk_pci_device.
+ *
+ * \param dev PCI device.
+ *
+ * \return PCI function number.
+ */
+uint8_t spdk_pci_device_get_func(struct spdk_pci_device *dev);
+
+/**
+ * Get the full DomainBDF address of a PCI device.
+ *
+ * \param dev PCI device.
+ *
+ * \return PCI address.
+ */
+struct spdk_pci_addr spdk_pci_device_get_addr(struct spdk_pci_device *dev);
+
+/**
+ * Get the vendor ID of a PCI device.
+ *
+ * \param dev PCI device.
+ *
+ * \return vendor ID.
+ */
+uint16_t spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev);
+
+/**
+ * Get the device ID of a PCI device.
+ *
+ * \param dev PCI device.
+ *
+ * \return device ID.
+ */
+uint16_t spdk_pci_device_get_device_id(struct spdk_pci_device *dev);
+
+/**
+ * Get the subvendor ID of a PCI device.
+ *
+ * \param dev PCI device.
+ *
+ * \return subvendor ID.
+ */
+uint16_t spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev);
+
+/**
+ * Get the subdevice ID of a PCI device.
+ *
+ * \param dev PCI device.
+ *
+ * \return subdevice ID.
+ */
+uint16_t spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev);
+
+/**
+ * Get the PCI ID of a PCI device.
+ *
+ * \param dev PCI device.
+ *
+ * \return PCI ID.
+ */
+struct spdk_pci_id spdk_pci_device_get_id(struct spdk_pci_device *dev);
+
+/**
+ * Get the NUMA node the PCI device is on.
+ *
+ * \param dev PCI device.
+ *
+ * \return NUMA node index (>= 0).
+ */
+int spdk_pci_device_get_socket_id(struct spdk_pci_device *dev);
+
+/**
+ * Serialize the PCIe Device Serial Number into the provided buffer.
+ * The buffer will contain a 16-character-long serial number followed by
+ * a NULL terminator.
+ *
+ * \param dev PCI device.
+ * \param sn Buffer to store the serial number in.
+ * \param len Length of buffer. Must be at least 17.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len);
+
+/**
+ * Claim a PCI device for exclusive SPDK userspace access.
+ *
+ * Uses F_SETLK on a shared memory file with the PCI address embedded in its name.
+ * As long as this file remains open with the lock acquired, other processes will
+ * not be able to successfully call this function on the same PCI device.
+ *
+ * The device can be un-claimed by the owning process with spdk_pci_device_unclaim().
+ * It will be also unclaimed automatically when detached.
+ *
+ * \param dev PCI device to claim.
+ *
+ * \return -EACCES if the device has already been claimed,
+ * negative errno on unexpected errors,
+ * 0 on success.
+ */
+int spdk_pci_device_claim(struct spdk_pci_device *dev);
+
+/**
+ * Undo spdk_pci_device_claim().
+ *
+ * \param dev PCI device to unclaim.
+ */
+void spdk_pci_device_unclaim(struct spdk_pci_device *dev);
+
+/**
+ * Release all resources associated with the given device and detach it. As long
+ * as the PCI device is physically available, it will attachable again.
+ *
+ * \param device PCI device.
+ */
+void spdk_pci_device_detach(struct spdk_pci_device *device);
+
+/**
+ * Attach a PCI device. This will bypass all blacklist rules and explicitly
+ * attach a device at the provided address. The return code of the provided
+ * callback will decide whether that device is attached or not. Attached
+ * devices have to be manually detached with spdk_pci_device_detach() to be
+ * attach-able again.
+ *
+ * \param driver Driver for a specific device type. The device will only be
+ * attached if it's supported by this driver.
+ * \param enum_cb Callback to be called for the PCI device once it's found.
+ * The return code can be as follows:
+ * -1, 1 - an error occurred, fail the attach request entirely
+ * 0 - device attached successfully
+ * \param enum_ctx Additional context passed to the callback function.
+ * \param pci_address Address of the device to attach.
+ *
+ * \return -1 if a device at the provided PCI address couldn't be found,
+ * -1 if an internal error happened or the provided callback returned non-zero,
+ * 0 otherwise
+ */
+int spdk_pci_device_attach(struct spdk_pci_driver *driver, spdk_pci_enum_cb enum_cb,
+ void *enum_ctx, struct spdk_pci_addr *pci_address);
+
+/**
+ * Read \c len bytes from the PCI configuration space.
+ *
+ * \param dev PCI device.
+ * \param buf A buffer to copy the data into.
+ * \param len Number of bytes to read.
+ * \param offset Offset (in bytes) in the PCI config space to start reading from.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *buf, uint32_t len,
+ uint32_t offset);
+
+/**
+ * Write \c len bytes into the PCI configuration space.
+ *
+ * \param dev PCI device.
+ * \param buf A buffer to copy the data from.
+ * \param len Number of bytes to write.
+ * \param offset Offset (in bytes) in the PCI config space to start writing to.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *buf, uint32_t len,
+ uint32_t offset);
+
+/**
+ * Read 1 byte from the PCI configuration space.
+ *
+ * \param dev PCI device.
+ * \param value A buffer to copy the data into.
+ * \param offset Offset (in bytes) in the PCI config space to start reading from.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset);
+
+/**
+ * Write 1 byte into the PCI configuration space.
+ *
+ * \param dev PCI device.
+ * \param value A value to write.
+ * \param offset Offset (in bytes) in the PCI config space to start writing to.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset);
+
+/**
+ * Read 2 bytes from the PCI configuration space.
+ *
+ * \param dev PCI device.
+ * \param value A buffer to copy the data into.
+ * \param offset Offset (in bytes) in the PCI config space to start reading from.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset);
+
+/**
+ * Write 2 bytes into the PCI configuration space.
+ *
+ * \param dev PCI device.
+ * \param value A value to write.
+ * \param offset Offset (in bytes) in the PCI config space to start writing to.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset);
+
+/**
+ * Read 4 bytes from the PCI configuration space.
+ *
+ * \param dev PCI device.
+ * \param value A buffer to copy the data into.
+ * \param offset Offset (in bytes) in the PCI config space to start reading from.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset);
+
+/**
+ * Write 4 bytes into the PCI configuration space.
+ *
+ * \param dev PCI device.
+ * \param value A value to write.
+ * \param offset Offset (in bytes) in the PCI config space to start writing to.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset);
+
+/**
+ * Check if device was requested to be removed from the process. This can be
+ * caused either by physical device hotremoval or OS-triggered removal. In the
+ * latter case, the device may continue to function properly even if this
+ * function returns \c true . The upper-layer driver may check this function
+ * periodically and eventually detach the device.
+ *
+ * \param dev PCI device.
+ *
+ * \return if device was requested to be removed
+ */
+bool spdk_pci_device_is_removed(struct spdk_pci_device *dev);
+
+/**
+ * Compare two PCI addresses.
+ *
+ * \param a1 PCI address 1.
+ * \param a2 PCI address 2.
+ *
+ * \return 0 if a1 == a2, less than 0 if a1 < a2, greater than 0 if a1 > a2
+ */
+int spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2);
+
+/**
+ * Convert a string representation of a PCI address into a struct spdk_pci_addr.
+ *
+ * \param addr PCI adddress output on success.
+ * \param bdf PCI address in domain:bus:device.function format or
+ * domain.bus.device.function format.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf);
+
+/**
+ * Convert a struct spdk_pci_addr to a string.
+ *
+ * \param bdf String into which a string will be output in the format
+ * domain:bus:device.function. The string must be at least 14 characters in size.
+ * \param sz Size of bdf in bytes. Must be at least 14.
+ * \param addr PCI address.
+ *
+ * \return 0 on success, or a negated errno on failure.
+ */
+int spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr);
+
+/**
+ * Hook a custom PCI device into the PCI layer. The device will be attachable,
+ * enumerable, and will call provided callbacks on each PCI resource access
+ * request.
+ *
+ * \param drv driver that will be able to attach the device
+ * \param dev fully initialized PCI device struct
+ */
+void spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev);
+
+/**
+ * Un-hook a custom PCI device from the PCI layer. The device must not be attached.
+ *
+ * \param dev fully initialized PCI device struct
+ */
+void spdk_pci_unhook_device(struct spdk_pci_device *dev);
+
+/**
+ * Return the type of the PCI device.
+ *
+ * \param dev PCI device
+ *
+ * \return string representing the type of the device
+ */
+const char *spdk_pci_device_get_type(const struct spdk_pci_device *dev);
+
+/**
+ * Remove any CPU affinity from the current thread.
+ */
+void spdk_unaffinitize_thread(void);
+
+/**
+ * Call a function with CPU affinity unset.
+ *
+ * This can be used to run a function that creates other threads without inheriting the calling
+ * thread's CPU affinity.
+ *
+ * \param cb Function to call
+ * \param arg Parameter to the function cb().
+ *
+ * \return the return value of cb().
+ */
+void *spdk_call_unaffinitized(void *cb(void *arg), void *arg);
+
+/**
+ * Page-granularity memory address translation table.
+ */
+struct spdk_mem_map;
+
+enum spdk_mem_map_notify_action {
+ SPDK_MEM_MAP_NOTIFY_REGISTER,
+ SPDK_MEM_MAP_NOTIFY_UNREGISTER,
+};
+
+typedef int (*spdk_mem_map_notify_cb)(void *cb_ctx, struct spdk_mem_map *map,
+ enum spdk_mem_map_notify_action action,
+ void *vaddr, size_t size);
+
+typedef int (*spdk_mem_map_contiguous_translations)(uint64_t addr_1, uint64_t addr_2);
+
+/**
+ * A function table to be implemented by each memory map.
+ */
+struct spdk_mem_map_ops {
+ spdk_mem_map_notify_cb notify_cb;
+ spdk_mem_map_contiguous_translations are_contiguous;
+};
+
+/**
+ * Allocate a virtual memory address translation map.
+ *
+ * \param default_translation Default translation for the map.
+ * \param ops Table of callback functions for map operations.
+ * \param cb_ctx Argument passed to the callback function.
+ *
+ * \return a pointer to the allocated virtual memory address translation map.
+ */
+struct spdk_mem_map *spdk_mem_map_alloc(uint64_t default_translation,
+ const struct spdk_mem_map_ops *ops, void *cb_ctx);
+
+/**
+ * Free a memory map previously allocated by spdk_mem_map_alloc().
+ *
+ * \param pmap Memory map to free.
+ */
+void spdk_mem_map_free(struct spdk_mem_map **pmap);
+
+/**
+ * Register an address translation for a range of virtual memory.
+ *
+ * \param map Memory map.
+ * \param vaddr Virtual address of the region to register - must be 2 MB aligned.
+ * \param size Size of the region in bytes - must be multiple of 2 MB in the
+ * current implementation.
+ * \param translation Translation to store in the map for this address range.
+ *
+ * \sa spdk_mem_map_clear_translation().
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_mem_map_set_translation(struct spdk_mem_map *map, uint64_t vaddr, uint64_t size,
+ uint64_t translation);
+
+/**
+ * Unregister an address translation.
+ *
+ * \param map Memory map.
+ * \param vaddr Virtual address of the region to unregister - must be 2 MB aligned.
+ * \param size Size of the region in bytes - must be multiple of 2 MB in the
+ * current implementation.
+ *
+ * \sa spdk_mem_map_set_translation().
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_mem_map_clear_translation(struct spdk_mem_map *map, uint64_t vaddr, uint64_t size);
+
+/**
+ * Look up the translation of a virtual address in a memory map.
+ *
+ * \param map Memory map.
+ * \param vaddr Virtual address.
+ * \param size Contains the size of the memory region pointed to by vaddr.
+ * If vaddr is successfully translated, then this is updated with the size of
+ * the memory region for which the translation is valid.
+ *
+ * \return the translation of vaddr stored in the map, or default_translation
+ * as specified in spdk_mem_map_alloc() if vaddr is not present in the map.
+ */
+uint64_t spdk_mem_map_translate(const struct spdk_mem_map *map, uint64_t vaddr, uint64_t *size);
+
+/**
+ * Register the specified memory region for address translation.
+ *
+ * The memory region must map to pinned huge pages (2MB or greater).
+ *
+ * \param vaddr Virtual address to register.
+ * \param len Length in bytes of the vaddr.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_mem_register(void *vaddr, size_t len);
+
+/**
+ * Unregister the specified memory region from vtophys address translation.
+ *
+ * The caller must ensure all in-flight DMA operations to this memory region
+ * are completed or cancelled before calling this function.
+ *
+ * \param vaddr Virtual address to unregister.
+ * \param len Length in bytes of the vaddr.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_mem_unregister(void *vaddr, size_t len);
+
+/**
+ * Reserve the address space specified in all memory maps.
+ *
+ * This pre-allocates the necessary space in the memory maps such that
+ * future calls to spdk_mem_register() on that region require no
+ * internal memory allocations.
+ *
+ * \param vaddr Virtual address to reserve
+ * \param len Length in bytes of vaddr
+ *
+ * \return 0 on success, negated errno on failure.
+ */
+int spdk_mem_reserve(void *vaddr, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/env_dpdk.h b/src/spdk/include/spdk/env_dpdk.h
new file mode 100644
index 000000000..6716f323c
--- /dev/null
+++ b/src/spdk/include/spdk/env_dpdk.h
@@ -0,0 +1,86 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Encapsulated DPDK specific dependencies
+ */
+
+#include "spdk/stdinc.h"
+
+#ifndef SPDK_ENV_DPDK_H
+#define SPDK_ENV_DPDK_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Initialize the environment library after DPDK env is already initialized.
+ * If DPDK's rte_eal_init is already called, this function must be called
+ * instead of spdk_env_init, prior to using any other functions in SPDK
+ * env library.
+ *
+ * \param legacy_mem Indicates whether DPDK was initialized with --legacy-mem
+ * eal parameter.
+ * \return 0 on success, or negative errno on failure.
+ */
+int spdk_env_dpdk_post_init(bool legacy_mem);
+
+/**
+ * Release any resources of the environment library that were alllocated with
+ * spdk_env_dpdk_post_init(). After this call, no DPDK function calls may
+ * be made. It is expected that common usage of this function is to call it
+ * just before terminating the process.
+ */
+void spdk_env_dpdk_post_fini(void);
+
+/**
+ * Check if DPDK was initialized external to the SPDK env_dpdk library.
+ *
+ * \return true if DPDK was initialized external to the SPDK env_dpdk library.
+ * \return false otherwise
+ */
+bool spdk_env_dpdk_external_init(void);
+
+/**
+ * Dump the env allocated memory to the given file.
+ *
+ * \param file The file object to write to.
+ */
+void spdk_env_dpdk_dump_mem_stats(FILE *file);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/event.h b/src/spdk/include/spdk/event.h
new file mode 100644
index 000000000..ea870fe9f
--- /dev/null
+++ b/src/spdk/include/spdk/event.h
@@ -0,0 +1,318 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * Event framework public API.
+ *
+ * See @ref event_components for an overview of the SPDK event framework API.
+ */
+
+#ifndef SPDK_EVENT_H
+#define SPDK_EVENT_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/cpuset.h"
+#include "spdk/queue.h"
+#include "spdk/log.h"
+#include "spdk/thread.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Event handler function.
+ *
+ * \param arg1 Argument 1.
+ * \param arg2 Argument 2.
+ */
+typedef void (*spdk_event_fn)(void *arg1, void *arg2);
+
+/**
+ * \brief An event is a function that is passed to and called on an lcore.
+ */
+struct spdk_event;
+
+/**
+ * \brief A poller is a function that is repeatedly called on an lcore.
+ */
+struct spdk_poller;
+
+/**
+ * Callback function for customized shutdown handling of application.
+ */
+typedef void (*spdk_app_shutdown_cb)(void);
+
+/**
+ * Signal handler fucntion.
+ *
+ * \param signal Signal number.
+ */
+typedef void (*spdk_sighandler_t)(int signal);
+
+#define SPDK_DEFAULT_RPC_ADDR "/var/tmp/spdk.sock"
+
+/**
+ * \brief Event framework initialization options
+ */
+struct spdk_app_opts {
+ const char *name;
+ const char *config_file;
+ const char *json_config_file;
+ bool json_config_ignore_errors;
+ const char *rpc_addr; /* Can be UNIX domain socket path or IP address + TCP port */
+ const char *reactor_mask;
+ const char *tpoint_group_mask;
+
+ int shm_id;
+
+ spdk_app_shutdown_cb shutdown_cb;
+ spdk_sighandler_t usr1_handler;
+
+ bool enable_coredump;
+ int mem_channel;
+ int master_core;
+ int mem_size;
+ bool no_pci;
+ bool hugepage_single_segments;
+ bool unlink_hugepage;
+ const char *hugedir;
+ enum spdk_log_level print_level;
+ size_t num_pci_addr;
+ struct spdk_pci_addr *pci_blacklist;
+ struct spdk_pci_addr *pci_whitelist;
+ const char *iova_mode;
+
+ /* DEPRECATED. No longer has any effect.
+ *
+ * The maximum latency allowed when passing an event
+ * from one core to another. A value of 0
+ * means all cores continually poll. This is
+ * specified in microseconds.
+ */
+ uint64_t max_delay_us;
+
+ /* Wait for the associated RPC before initializing subsystems
+ * when this flag is enabled.
+ */
+ bool delay_subsystem_init;
+
+ /* Number of trace entries allocated for each core */
+ uint64_t num_entries;
+
+ /** Opaque context for use of the env implementation. */
+ void *env_context;
+
+ /**
+ * for passing user-provided log call
+ */
+ logfunc *log;
+
+ uint64_t base_virtaddr;
+};
+
+/**
+ * Initialize the default value of opts
+ *
+ * \param opts Data structure where SPDK will initialize the default options.
+ */
+void spdk_app_opts_init(struct spdk_app_opts *opts);
+
+/**
+ * Start the framework.
+ *
+ * Before calling this function, opts must be initialized by
+ * spdk_app_opts_init(). Once started, the framework will call start_fn on
+ * an spdk_thread running on the current system thread with the
+ * argument provided.
+ *
+ * If opts->delay_subsystem_init is set
+ * (e.g. through --wait-for-rpc flag in spdk_app_parse_args())
+ * this function will only start a limited RPC server accepting
+ * only a few RPC commands - mostly related to pre-initialization.
+ * With this option, the framework won't be started and start_fn
+ * won't be called until the user sends an `rpc_framework_start_init`
+ * RPC command, which marks the pre-initialization complete and
+ * allows start_fn to be finally called.
+ *
+ * This call will block until spdk_app_stop() is called. If an error
+ * condition occurs during the intialization code within spdk_app_start(),
+ * this function will immediately return before invoking start_fn.
+ *
+ * \param opts Initialization options used for this application.
+ * \param start_fn Entry point that will execute on an internally created thread
+ * once the framework has been started.
+ * \param ctx Argument passed to function start_fn.
+ *
+ * \return 0 on success or non-zero on failure.
+ */
+int spdk_app_start(struct spdk_app_opts *opts, spdk_msg_fn start_fn,
+ void *ctx);
+
+/**
+ * Perform final shutdown operations on an application using the event framework.
+ */
+void spdk_app_fini(void);
+
+/**
+ * Start shutting down the framework.
+ *
+ * Typically this function is not called directly, and the shutdown process is
+ * started implicitly by a process signal. But in applications that are using
+ * SPDK for a subset of its process threads, this function can be called in lieu
+ * of a signal.
+ */
+void spdk_app_start_shutdown(void);
+
+/**
+ * Stop the framework.
+ *
+ * This does not wait for all threads to exit. Instead, it kicks off the shutdown
+ * process and returns. Once the shutdown process is complete, spdk_app_start()
+ * will return.
+ *
+ * \param rc The rc value specified here will be returned to caller of spdk_app_start().
+ */
+void spdk_app_stop(int rc);
+
+/**
+ * Generate a configuration file that corresponds to the current running state.
+ *
+ * \param config_str Values obtained from the generated configuration file.
+ * \param name Prefix for name of temporary configuration file to save the current config.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_app_get_running_config(char **config_str, char *name);
+
+/**
+ * Return the shared memory id for this application.
+ *
+ * \return shared memory id.
+ */
+int spdk_app_get_shm_id(void);
+
+/**
+ * Convert a string containing a CPU core mask into a bitmask
+ *
+ * \param mask String containing a CPU core mask.
+ * \param cpumask Bitmask of CPU cores.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_app_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask);
+
+/**
+ * Get the mask of the CPU cores active for this application
+ *
+ * \return the bitmask of the active CPU cores.
+ */
+struct spdk_cpuset *spdk_app_get_core_mask(void);
+
+#define SPDK_APP_GETOPT_STRING "c:de:ghi:m:n:p:r:s:uvB:L:RW:"
+
+enum spdk_app_parse_args_rvals {
+ SPDK_APP_PARSE_ARGS_HELP = 0,
+ SPDK_APP_PARSE_ARGS_SUCCESS = 1,
+ SPDK_APP_PARSE_ARGS_FAIL = 2
+};
+typedef enum spdk_app_parse_args_rvals spdk_app_parse_args_rvals_t;
+
+/**
+ * Helper function for parsing arguments and printing usage messages.
+ *
+ * \param argc Count of arguments in argv parameter array.
+ * \param argv Array of command line arguments.
+ * \param opts Default options for the application.
+ * \param getopt_str String representing the app-specific command line parameters.
+ * Characters in this string must not conflict with characters in SPDK_APP_GETOPT_STRING.
+ * \param app_long_opts Array of full-name parameters. Can be NULL.
+ * \param parse Function pointer to call if an argument in getopt_str is found.
+ * \param usage Function pointer to print usage messages for app-specific command
+ * line parameters.
+ *\return SPDK_APP_PARSE_ARGS_FAIL on failure, SPDK_APP_PARSE_ARGS_SUCCESS on
+ * success, SPDK_APP_PARSE_ARGS_HELP if '-h' passed as an option.
+ */
+spdk_app_parse_args_rvals_t spdk_app_parse_args(int argc, char **argv,
+ struct spdk_app_opts *opts, const char *getopt_str,
+ struct option *app_long_opts, int (*parse)(int ch, char *arg),
+ void (*usage)(void));
+
+/**
+ * Print usage strings for common SPDK command line options.
+ *
+ * May only be called after spdk_app_parse_args().
+ */
+void spdk_app_usage(void);
+
+/**
+ * Allocate an event to be passed to spdk_event_call().
+ *
+ * \param lcore Lcore to run this event.
+ * \param fn Function used to execute event.
+ * \param arg1 Argument passed to function fn.
+ * \param arg2 Argument passed to function fn.
+ *
+ * \return a pointer to the allocated event.
+ */
+struct spdk_event *spdk_event_allocate(uint32_t lcore, spdk_event_fn fn,
+ void *arg1, void *arg2);
+
+/**
+ * Pass the given event to the associated lcore and call the function.
+ *
+ * \param event Event to execute.
+ */
+void spdk_event_call(struct spdk_event *event);
+
+/**
+ * Enable or disable monitoring of context switches.
+ *
+ * \param enabled True to enable, false to disable.
+ */
+void spdk_framework_enable_context_switch_monitor(bool enabled);
+
+/**
+ * Return whether context switch monitoring is enabled.
+ *
+ * \return true if enabled or false otherwise.
+ */
+bool spdk_framework_context_switch_monitor_enabled(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/fd.h b/src/spdk/include/spdk/fd.h
new file mode 100644
index 000000000..8da7f2cd7
--- /dev/null
+++ b/src/spdk/include/spdk/fd.h
@@ -0,0 +1,69 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * OS filesystem utility functions
+ */
+
+#ifndef SPDK_FD_H
+#define SPDK_FD_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Get the file size.
+ *
+ * \param fd File descriptor.
+ *
+ * \return File size.
+ */
+uint64_t spdk_fd_get_size(int fd);
+
+/**
+ * Get the block size of the file.
+ *
+ * \param fd File descriptor.
+ *
+ * \return Block size.
+ */
+uint32_t spdk_fd_get_blocklen(int fd);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/file.h b/src/spdk/include/spdk/file.h
new file mode 100644
index 000000000..59cab4e27
--- /dev/null
+++ b/src/spdk/include/spdk/file.h
@@ -0,0 +1,61 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * file operation functions
+ */
+
+#ifndef SPDK_FILE_H
+#define SPDK_FILE_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Load the input file content into a data buffer.
+ *
+ * \param file File handle.
+ * \param size Size of bytes read from the file.
+ *
+ * \return data contains the content on success, NULL on failure.
+ */
+void *spdk_posix_file_load(FILE *file, size_t *size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/ftl.h b/src/spdk/include/spdk/ftl.h
new file mode 100644
index 000000000..6f85ab371
--- /dev/null
+++ b/src/spdk/include/spdk/ftl.h
@@ -0,0 +1,251 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_FTL_H
+#define SPDK_FTL_H
+
+#include "spdk/stdinc.h"
+#include "spdk/uuid.h"
+#include "spdk/thread.h"
+#include "spdk/bdev.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct spdk_ftl_dev;
+
+/* Limit thresholds */
+enum {
+ SPDK_FTL_LIMIT_CRIT,
+ SPDK_FTL_LIMIT_HIGH,
+ SPDK_FTL_LIMIT_LOW,
+ SPDK_FTL_LIMIT_START,
+ SPDK_FTL_LIMIT_MAX
+};
+
+struct spdk_ftl_limit {
+ /* Threshold from which the limiting starts */
+ size_t thld;
+
+ /* Limit percentage */
+ size_t limit;
+};
+
+struct spdk_ftl_conf {
+ /* Number of reserved addresses not exposed to the user */
+ size_t lba_rsvd;
+
+ /* Size of the per-io_channel write buffer */
+ size_t write_buffer_size;
+
+ /* Threshold for opening new band */
+ size_t band_thld;
+
+ /* Maximum IO depth per band relocate */
+ size_t max_reloc_qdepth;
+
+ /* Maximum active band relocates */
+ size_t max_active_relocs;
+
+ /* IO pool size per user thread */
+ size_t user_io_pool_size;
+
+ /* Lowest percentage of invalid blocks for a band to be defragged */
+ size_t invalid_thld;
+
+ /* User writes limits */
+ struct spdk_ftl_limit limits[SPDK_FTL_LIMIT_MAX];
+
+ /* Allow for partial recovery from open bands instead of returning error */
+ bool allow_open_bands;
+
+ /* Use append instead of write */
+ bool use_append;
+
+ /* Maximum supported number of IO channels */
+ uint32_t max_io_channels;
+
+ struct {
+ /* Maximum number of concurrent requests */
+ size_t max_request_cnt;
+ /* Maximum number of blocks per one request */
+ size_t max_request_size;
+ } nv_cache;
+
+ /* Create l2p table on l2p_path persistent memory file or device instead of in DRAM */
+ const char *l2p_path;
+};
+
+enum spdk_ftl_mode {
+ /* Create new device */
+ SPDK_FTL_MODE_CREATE = (1 << 0),
+};
+
+struct spdk_ftl_dev_init_opts {
+ /* Underlying device */
+ const char *base_bdev;
+ /* Write buffer cache */
+ const char *cache_bdev;
+
+ /* Thread responsible for core tasks execution */
+ struct spdk_thread *core_thread;
+
+ /* Device's config */
+ const struct spdk_ftl_conf *conf;
+ /* Device's name */
+ const char *name;
+ /* Mode flags */
+ unsigned int mode;
+ /* Device UUID (valid when restoring device from disk) */
+ struct spdk_uuid uuid;
+};
+
+struct spdk_ftl_attrs {
+ /* Device's UUID */
+ struct spdk_uuid uuid;
+ /* Number of logical blocks */
+ uint64_t num_blocks;
+ /* Logical block size */
+ size_t block_size;
+ /* Underlying device */
+ const char *base_bdev;
+ /* Write buffer cache */
+ const char *cache_bdev;
+ /* Number of zones per parallel unit in the underlying device (including any offline ones) */
+ size_t num_zones;
+ /* Number of logical blocks per zone */
+ size_t zone_size;
+ /* Device specific configuration */
+ struct spdk_ftl_conf conf;
+};
+
+typedef void (*spdk_ftl_fn)(void *, int);
+typedef void (*spdk_ftl_init_fn)(struct spdk_ftl_dev *, void *, int);
+
+/**
+ * Initialize the FTL on given NVMe device and parallel unit range.
+ *
+ * Covers the following:
+ * - retrieve zone device information,
+ * - allocate buffers and resources,
+ * - initialize internal structures,
+ * - initialize internal thread(s),
+ * - restore or create L2P table.
+ *
+ * \param opts configuration for new device
+ * \param cb callback function to call when the device is created
+ * \param cb_arg callback's argument
+ *
+ * \return 0 if initialization was started successfully, negative errno otherwise.
+ */
+int spdk_ftl_dev_init(const struct spdk_ftl_dev_init_opts *opts, spdk_ftl_init_fn cb, void *cb_arg);
+
+/**
+ * Deinitialize and free given device.
+ *
+ * \param dev device
+ * \param cb callback function to call when the device is freed
+ * \param cb_arg callback's argument
+ *
+ * \return 0 if successfully scheduled free, negative errno otherwise.
+ */
+int spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb, void *cb_arg);
+
+/**
+ * Initialize FTL configuration structure with default values.
+ *
+ * \param conf FTL configuration to initialize
+ */
+void spdk_ftl_conf_init_defaults(struct spdk_ftl_conf *conf);
+
+/**
+ * Retrieve device’s attributes.
+ *
+ * \param dev device
+ * \param attr Attribute structure to fill
+ */
+void spdk_ftl_dev_get_attrs(const struct spdk_ftl_dev *dev, struct spdk_ftl_attrs *attr);
+
+/**
+ * Submits a read to the specified device.
+ *
+ * \param dev Device
+ * \param ch I/O channel
+ * \param lba Starting LBA to read the data
+ * \param lba_cnt Number of sectors to read
+ * \param iov Single IO vector or pointer to IO vector table
+ * \param iov_cnt Number of IO vectors
+ * \param cb_fn Callback function to invoke when the I/O is completed
+ * \param cb_arg Argument to pass to the callback function
+ *
+ * \return 0 if successfully submitted, negative errno otherwise.
+ */
+int spdk_ftl_read(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba,
+ size_t lba_cnt,
+ struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg);
+
+/**
+ * Submits a write to the specified device.
+ *
+ * \param dev Device
+ * \param ch I/O channel
+ * \param lba Starting LBA to write the data
+ * \param lba_cnt Number of sectors to write
+ * \param iov Single IO vector or pointer to IO vector table
+ * \param iov_cnt Number of IO vectors
+ * \param cb_fn Callback function to invoke when the I/O is completed
+ * \param cb_arg Argument to pass to the callback function
+ *
+ * \return 0 if successfully submitted, negative errno otherwise.
+ */
+int spdk_ftl_write(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba,
+ size_t lba_cnt,
+ struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg);
+
+/**
+ * Submits a flush request to the specified device.
+ *
+ * \param dev device
+ * \param cb_fn Callback function to invoke when all prior IOs have been completed
+ * \param cb_arg Argument to pass to the callback function
+ *
+ * \return 0 if successfully submitted, negative errno otherwise.
+ */
+int spdk_ftl_flush(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_FTL_H */
diff --git a/src/spdk/include/spdk/gpt_spec.h b/src/spdk/include/spdk/gpt_spec.h
new file mode 100644
index 000000000..c67eb572e
--- /dev/null
+++ b/src/spdk/include/spdk/gpt_spec.h
@@ -0,0 +1,144 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * GUID Partition Table (GPT) specification definitions
+ */
+
+#ifndef SPDK_GPT_SPEC_H
+#define SPDK_GPT_SPEC_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/assert.h"
+
+#pragma pack(push, 1)
+
+#define SPDK_MBR_SIGNATURE 0xAA55
+
+#define SPDK_MBR_OS_TYPE_GPT_PROTECTIVE 0xEE
+#define SPDK_MBR_OS_TYPE_EFI_SYSTEM_PARTITION 0xEF
+
+struct spdk_mbr_chs {
+ uint8_t head;
+ uint16_t sector : 6;
+ uint16_t cylinder : 10;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_mbr_chs) == 3, "size incorrect");
+
+struct spdk_mbr_partition_entry {
+ uint8_t reserved : 7;
+ uint8_t bootable : 1;
+
+ struct spdk_mbr_chs start_chs;
+
+ uint8_t os_type;
+
+ struct spdk_mbr_chs end_chs;
+
+ uint32_t start_lba;
+ uint32_t size_lba;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_mbr_partition_entry) == 16, "size incorrect");
+
+struct spdk_mbr {
+ uint8_t boot_code[440];
+ uint32_t disk_signature;
+ uint16_t reserved_444;
+ struct spdk_mbr_partition_entry partitions[4];
+ uint16_t mbr_signature;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_mbr) == 512, "size incorrect");
+
+#define SPDK_GPT_SIGNATURE "EFI PART"
+
+#define SPDK_GPT_REVISION_1_0 0x00010000u
+
+struct spdk_gpt_guid {
+ uint8_t raw[16];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_gpt_guid) == 16, "size incorrect");
+
+#define SPDK_GPT_GUID(a, b, c, d, e) \
+ (struct spdk_gpt_guid){{ \
+ (uint8_t)(a), (uint8_t)(((uint32_t)a) >> 8), \
+ (uint8_t)(((uint32_t)a) >> 16), (uint8_t)(((uint32_t)a >> 24)), \
+ (uint8_t)(b), (uint8_t)(((uint16_t)b) >> 8), \
+ (uint8_t)(c), (uint8_t)(((uint16_t)c) >> 8), \
+ (uint8_t)(((uint16_t)d) >> 8), (uint8_t)(d), \
+ (uint8_t)(((uint64_t)e) >> 40), (uint8_t)(((uint64_t)e) >> 32), (uint8_t)(((uint64_t)e) >> 24), \
+ (uint8_t)(((uint64_t)e) >> 16), (uint8_t)(((uint64_t)e) >> 8), (uint8_t)(e) \
+ }}
+
+#define SPDK_GPT_PART_TYPE_UNUSED SPDK_GPT_GUID(0x00000000, 0x0000, 0x0000, 0x0000, 0x000000000000)
+#define SPDK_GPT_PART_TYPE_EFI_SYSTEM_PARTITION SPDK_GPT_GUID(0xC12A7328, 0xF81F, 0x11D2, 0xBA4B, 0x00A0C93EC93B)
+#define SPDK_GPT_PART_TYPE_LEGACY_MBR SPDK_GPT_GUID(0x024DEE41, 0x33E7, 0x11D3, 0x9D69, 0x0008C781F39F)
+
+struct spdk_gpt_header {
+ char gpt_signature[8];
+ uint32_t revision;
+ uint32_t header_size;
+ uint32_t header_crc32;
+ uint32_t reserved;
+ uint64_t my_lba;
+ uint64_t alternate_lba;
+ uint64_t first_usable_lba;
+ uint64_t last_usable_lba;
+ struct spdk_gpt_guid disk_guid;
+ uint64_t partition_entry_lba;
+ uint32_t num_partition_entries;
+ uint32_t size_of_partition_entry;
+ uint32_t partition_entry_array_crc32;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_gpt_header) == 92, "size incorrect");
+
+struct spdk_gpt_partition_entry {
+ struct spdk_gpt_guid part_type_guid;
+ struct spdk_gpt_guid unique_partition_guid;
+ uint64_t starting_lba;
+ uint64_t ending_lba;
+ struct {
+ uint64_t required : 1;
+ uint64_t no_block_io_proto : 1;
+ uint64_t legacy_bios_bootable : 1;
+ uint64_t reserved_uefi : 45;
+ uint64_t guid_specific : 16;
+ } attr;
+ uint16_t partition_name[36];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_gpt_partition_entry) == 128, "size incorrect");
+
+#pragma pack(pop)
+
+#endif
diff --git a/src/spdk/include/spdk/histogram_data.h b/src/spdk/include/spdk/histogram_data.h
new file mode 100644
index 000000000..5f114fe69
--- /dev/null
+++ b/src/spdk/include/spdk/histogram_data.h
@@ -0,0 +1,264 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * Generic histogram library
+ */
+
+#ifndef _SPDK_HISTOGRAM_DATA_H_
+#define _SPDK_HISTOGRAM_DATA_H_
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SPDK_HISTOGRAM_BUCKET_SHIFT_DEFAULT 7
+#define SPDK_HISTOGRAM_BUCKET_SHIFT(h) h->bucket_shift
+#define SPDK_HISTOGRAM_BUCKET_LSB(h) (64 - SPDK_HISTOGRAM_BUCKET_SHIFT(h))
+#define SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE(h) (1ULL << SPDK_HISTOGRAM_BUCKET_SHIFT(h))
+#define SPDK_HISTOGRAM_BUCKET_MASK(h) (SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE(h) - 1)
+#define SPDK_HISTOGRAM_NUM_BUCKET_RANGES(h) (SPDK_HISTOGRAM_BUCKET_LSB(h) + 1)
+#define SPDK_HISTOGRAM_NUM_BUCKETS(h) (SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE(h) * \
+ SPDK_HISTOGRAM_NUM_BUCKET_RANGES(h))
+
+/*
+ * SPDK histograms are implemented using ranges of bucket arrays. The most common usage
+ * model is using TSC datapoints to capture an I/O latency histogram. For this usage model,
+ * the histogram tracks only TSC deltas - any translation to microseconds is done by the
+ * histogram user calling spdk_histogram_data_iterate() to iterate over the buckets to perform
+ * the translations.
+ *
+ * Each range has a number of buckets determined by SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE
+ * which is 128. The buckets in ranges 0 and 1 each map to one specific datapoint value.
+ * The buckets in subsequent ranges each map to twice as many datapoint values as buckets
+ * in the range before it:
+ *
+ * Range 0: 1 value each - 128 buckets cover 0 to 127 (2^7-1)
+ * Range 1: 1 value each - 128 buckets cover 128 to 255 (2^8-1)
+ * Range 2: 2 values each - 128 buckets cover 256 to 511 (2^9-1)
+ * Range 3: 4 values each - 128 buckets cover 512 to 1023 (2^10-1)
+ * Range 4: 8 values each - 128 buckets cover 1024 to 2047 (2^11-1)
+ * Range 5: 16 values each - 128 buckets cover 2048 to 4095 (2^12-1)
+ * ...
+ * Range 55: 2^54 values each - 128 buckets cover 2^61 to 2^62-1
+ * Range 56: 2^55 values each - 128 buckets cover 2^62 to 2^63-1
+ * Range 57: 2^56 values each - 128 buckets cover 2^63 to 2^64-1
+ *
+ * On a 2.3GHz processor, this strategy results in 50ns buckets in the 7-14us range (sweet
+ * spot for Intel Optane SSD latency testing).
+ *
+ * Buckets can be made more granular by increasing SPDK_HISTOGRAM_BUCKET_SHIFT. This
+ * comes at the cost of additional storage per namespace context to store the bucket data.
+ */
+
+struct spdk_histogram_data {
+
+ uint32_t bucket_shift;
+ uint64_t *bucket;
+
+};
+
+static inline void
+__spdk_histogram_increment(struct spdk_histogram_data *h, uint32_t range, uint32_t index)
+{
+ uint64_t *count;
+
+ count = &h->bucket[(range << SPDK_HISTOGRAM_BUCKET_SHIFT(h)) + index];
+ (*count)++;
+}
+
+static inline uint64_t
+__spdk_histogram_get_count(const struct spdk_histogram_data *h, uint32_t range, uint32_t index)
+{
+ return h->bucket[(range << SPDK_HISTOGRAM_BUCKET_SHIFT(h)) + index];
+}
+
+static inline uint64_t *
+__spdk_histogram_get_bucket(const struct spdk_histogram_data *h, uint32_t range, uint32_t index)
+{
+ return &h->bucket[(range << SPDK_HISTOGRAM_BUCKET_SHIFT(h)) + index];
+}
+
+static inline void
+spdk_histogram_data_reset(struct spdk_histogram_data *histogram)
+{
+ memset(histogram->bucket, 0, SPDK_HISTOGRAM_NUM_BUCKETS(histogram) * sizeof(uint64_t));
+}
+
+static inline uint32_t
+__spdk_histogram_data_get_bucket_range(struct spdk_histogram_data *h, uint64_t datapoint)
+{
+ uint32_t clz, range;
+
+ assert(datapoint != 0);
+
+ clz = __builtin_clzll(datapoint);
+
+ if (clz <= SPDK_HISTOGRAM_BUCKET_LSB(h)) {
+ range = SPDK_HISTOGRAM_BUCKET_LSB(h) - clz;
+ } else {
+ range = 0;
+ }
+
+ return range;
+}
+
+static inline uint32_t
+__spdk_histogram_data_get_bucket_index(struct spdk_histogram_data *h, uint64_t datapoint,
+ uint32_t range)
+{
+ uint32_t shift;
+
+ if (range == 0) {
+ shift = 0;
+ } else {
+ shift = range - 1;
+ }
+
+ return (datapoint >> shift) & SPDK_HISTOGRAM_BUCKET_MASK(h);
+}
+
+static inline void
+spdk_histogram_data_tally(struct spdk_histogram_data *histogram, uint64_t datapoint)
+{
+ uint32_t range = __spdk_histogram_data_get_bucket_range(histogram, datapoint);
+ uint32_t index = __spdk_histogram_data_get_bucket_index(histogram, datapoint, range);
+
+ __spdk_histogram_increment(histogram, range, index);
+}
+
+static inline uint64_t
+__spdk_histogram_data_get_bucket_start(const struct spdk_histogram_data *h, uint32_t range,
+ uint32_t index)
+{
+ uint64_t bucket;
+
+ index += 1;
+ if (range > 0) {
+ bucket = 1ULL << (range + SPDK_HISTOGRAM_BUCKET_SHIFT(h) - 1);
+ bucket += (uint64_t)index << (range - 1);
+ } else {
+ bucket = index;
+ }
+
+ return bucket;
+}
+
+typedef void (*spdk_histogram_data_fn)(void *ctx, uint64_t start, uint64_t end, uint64_t count,
+ uint64_t total, uint64_t so_far);
+
+static inline void
+spdk_histogram_data_iterate(const struct spdk_histogram_data *histogram,
+ spdk_histogram_data_fn fn, void *ctx)
+{
+ uint64_t i, j, count, so_far, total;
+ uint64_t bucket, last_bucket;
+
+ total = 0;
+
+ for (i = 0; i < SPDK_HISTOGRAM_NUM_BUCKET_RANGES(histogram); i++) {
+ for (j = 0; j < SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE(histogram); j++) {
+ total += __spdk_histogram_get_count(histogram, i, j);
+ }
+ }
+
+ so_far = 0;
+ bucket = 0;
+
+ for (i = 0; i < SPDK_HISTOGRAM_NUM_BUCKET_RANGES(histogram); i++) {
+ for (j = 0; j < SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE(histogram); j++) {
+ count = __spdk_histogram_get_count(histogram, i, j);
+ so_far += count;
+ last_bucket = bucket;
+ bucket = __spdk_histogram_data_get_bucket_start(histogram, i, j);
+ fn(ctx, last_bucket, bucket, count, total, so_far);
+ }
+ }
+}
+
+static inline void
+spdk_histogram_data_merge(const struct spdk_histogram_data *dst,
+ const struct spdk_histogram_data *src)
+{
+ uint64_t i;
+
+ for (i = 0; i < SPDK_HISTOGRAM_NUM_BUCKETS(dst); i++) {
+ dst->bucket[i] += src->bucket[i];
+ }
+}
+
+static inline struct spdk_histogram_data *
+spdk_histogram_data_alloc_sized(uint32_t bucket_shift)
+{
+ struct spdk_histogram_data *h;
+
+ h = (struct spdk_histogram_data *)calloc(1, sizeof(*h));
+ if (h == NULL) {
+ return NULL;
+ }
+
+ h->bucket_shift = bucket_shift;
+ h->bucket = (uint64_t *)calloc(SPDK_HISTOGRAM_NUM_BUCKETS(h), sizeof(uint64_t));
+ if (h->bucket == NULL) {
+ free(h);
+ return NULL;
+ }
+
+ return h;
+}
+
+static inline struct spdk_histogram_data *
+spdk_histogram_data_alloc(void)
+{
+ return spdk_histogram_data_alloc_sized(SPDK_HISTOGRAM_BUCKET_SHIFT_DEFAULT);
+}
+
+static inline void
+spdk_histogram_data_free(struct spdk_histogram_data *h)
+{
+ if (h == NULL) {
+ return;
+ }
+
+ free(h->bucket);
+ free(h);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/idxd.h b/src/spdk/include/spdk/idxd.h
new file mode 100644
index 000000000..cb9ebe8b8
--- /dev/null
+++ b/src/spdk/include/spdk/idxd.h
@@ -0,0 +1,418 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * IDXD driver public interface
+ */
+
+#ifndef SPDK_IDXD_H
+#define SPDK_IDXD_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "spdk/env.h"
+
+/**
+ * Opaque handle for a single IDXD channel.
+ */
+struct spdk_idxd_io_channel;
+
+/**
+ * Opaque handle for a single IDXD device.
+ */
+struct spdk_idxd_device;
+
+/**
+ * Opaque handle for batching.
+ */
+struct idxd_batch;
+
+/**
+ * Signature for configuring a channel
+ *
+ * \param chan IDXD channel to be configured.
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan);
+
+/**
+ * Reconfigures this channel based on how many current channels there are.
+ *
+ * \param chan IDXD channel to be set.
+ * \param num_channels total number of channels in use.
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan, uint32_t num_channels);
+
+/**
+ * Signature for callback function invoked when a request is completed.
+ *
+ * \param arg User-specified opaque value corresponding to cb_arg from the
+ * request submission.
+ * \param status 0 on success, negative errno on failure.
+ */
+typedef void (*spdk_idxd_req_cb)(void *arg, int status);
+
+/**
+ * Callback for spdk_idxd_probe() enumeration.
+ *
+ * \param cb_ctx User-specified opaque value corresponding to cb_ctx from spdk_idxd_probe().
+ * \param pci_dev PCI device that is being probed.
+ *
+ * \return true to attach to this device.
+ */
+typedef bool (*spdk_idxd_probe_cb)(void *cb_ctx, struct spdk_pci_device *pci_dev);
+
+/**
+ * Callback for spdk_idxd_probe() to report a device that has been attached to
+ * the userspace IDXD driver.
+ *
+ * \param cb_ctx User-specified opaque value corresponding to cb_ctx from spdk_idxd_probe().
+ * \param pci_dev PCI device that was attached to the driver.
+ * \param idxd IDXD device that was attached to the driver.
+ */
+typedef void (*spdk_idxd_attach_cb)(void *cb_ctx, struct spdk_pci_device *pci_dev,
+ struct spdk_idxd_device *idxd);
+
+/**
+ * Enumerate the IDXD devices attached to the system and attach the userspace
+ * IDXD driver to them if desired.
+ *
+ * If called more than once, only devices that are not already attached to the
+ * SPDK IDXD driver will be reported.
+ *
+ * To stop using the controller and release its associated resources, call
+ * spdk_idxd_detach() with the idxd_channel instance returned by this function.
+ *
+ * \param cb_ctx Opaque value which will be passed back in cb_ctx parameter of
+ * the callbacks.
+ * \param probe_cb will be called once per IDXD device found in the system.
+ * \param attach_cb will be called for devices for which probe_cb returned true
+ * once the IDXD controller has been attached to the userspace driver.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_idxd_probe(void *cb_ctx, spdk_idxd_probe_cb probe_cb, spdk_idxd_attach_cb attach_cb);
+
+/**
+ * Detach specified device returned by spdk_idxd_probe() from the IDXD driver.
+ *
+ * \param idxd IDXD device to detach from the driver.
+ */
+void spdk_idxd_detach(struct spdk_idxd_device *idxd);
+
+/**
+ * Sets the IDXD configuration.
+ *
+ * \param config_number the configuration number for a valid IDXD config.
+ */
+void spdk_idxd_set_config(uint32_t config_number);
+
+/**
+ * Return the max number of descriptors per batch for IDXD.
+ *
+ * \return max number of desciptors per batch.
+ */
+uint32_t spdk_idxd_batch_get_max(void);
+
+/**
+ * Create a batch sequence.
+ *
+ * \param chan IDXD channel to submit request.
+ *
+ * \return handle to use for subsequent batch requests, NULL on failure.
+ */
+struct idxd_batch *spdk_idxd_batch_create(struct spdk_idxd_io_channel *chan);
+
+/**
+ * Submit a batch sequence.
+ *
+ * \param chan IDXD channel to submit request.
+ * \param batch Handle provided when the batch was started with spdk_idxd_batch_create().
+ * \param cb_fn Callback function which will be called when the request is complete.
+ * \param cb_arg Opaque value which will be passed back as the arg parameter in
+ * the completion callback.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_idxd_batch_submit(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch,
+ spdk_idxd_req_cb cb_fn, void *cb_arg);
+
+/**
+ * Cancel a batch sequence.
+ *
+ * \param chan IDXD channel to submit request.
+ * \param batch Handle provided when the batch was started with spdk_idxd_batch_create().
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_idxd_batch_cancel(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch);
+
+/**
+ * Synchronous call to prepare a copy request into a previously initialized batch
+ * created with spdk_idxd_batch_create(). The callback will be called when the copy
+ * completes after the batch has been submitted by an asynchronous call to
+ * spdk_idxd_batch_submit().
+ *
+ * \param chan IDXD channel to submit request.
+ * \param batch Handle provided when the batch was started with spdk_idxd_batch_create().
+ * \param dst Destination virtual address.
+ * \param src Source virtual address.
+ * \param nbytes Number of bytes to copy.
+ * \param cb_fn Callback function which will be called when the request is complete.
+ * \param cb_arg Opaque value which will be passed back as the arg parameter in
+ * the completion callback.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_idxd_batch_prep_copy(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch,
+ void *dst, const void *src, uint64_t nbytes, spdk_idxd_req_cb cb_fn, void *cb_arg);
+
+/**
+ * Synchronous call to prepare a dualcast request into a previously initialized batch
+ * created with spdk_idxd_batch_create(). The callback will be called when the dualcast
+ * completes after the batch has been submitted by an asynchronous call to
+ * spdk_idxd_batch_submit().
+ *
+ * \param chan IDXD channel to submit request.
+ * \param batch Handle provided when the batch was started with spdk_idxd_batch_create().
+ * \param dst1 First destination virtual address (must be 4K aligned).
+ * \param dst2 Second destination virtual address (must be 4K aligned).
+ * \param src Source virtual address.
+ * \param nbytes Number of bytes to copy.
+ * \param cb_fn Callback function which will be called when the request is complete.
+ * \param cb_arg Opaque value which will be passed back as the arg parameter in
+ * the completion callback.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_idxd_batch_prep_dualcast(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch,
+ void *dst1, void *dst2, const void *src, uint64_t nbytes, spdk_idxd_req_cb cb_fn, void *cb_arg);
+
+/**
+ * Build and submit an idxd memory copy request.
+ *
+ * This function will build the copy descriptor and then immediately submit
+ * by writing to the proper device portal.
+ *
+ * \param chan IDXD channel to submit request.
+ * \param dst Destination virtual address.
+ * \param src Source virtual address.
+ * \param nbytes Number of bytes to copy.
+ * \param cb_fn Callback function which will be called when the request is complete.
+ * \param cb_arg Opaque value which will be passed back as the arg parameter in
+ * the completion callback.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_idxd_submit_copy(struct spdk_idxd_io_channel *chan,
+ void *dst, const void *src, uint64_t nbytes,
+ spdk_idxd_req_cb cb_fn, void *cb_arg);
+
+/**
+ * Build and submit an idxd dualcast request.
+ *
+ * This function will build the dual cast descriptor and then immediately submit
+ * by writing to the proper device portal.
+ *
+ * \param chan IDXD channel to submit request.
+ * \param dst1 First destination virtual address (must be 4K aligned).
+ * \param dst2 Second destination virtual address (must be 4K aligned).
+ * \param src Source virtual address.
+ * \param nbytes Number of bytes to copy.
+ * \param cb_fn Callback function which will be called when the request is complete.
+ * \param cb_arg Opaque value which will be passed back as the arg parameter in
+ * the completion callback.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_idxd_submit_dualcast(struct spdk_idxd_io_channel *chan,
+ void *dst1, void *dst2, const void *src, uint64_t nbytes,
+ spdk_idxd_req_cb cb_fn, void *cb_arg);
+
+/**
+ * Synchronous call to prepare a compare request into a previously initialized batch
+ * created with spdk_idxd_batch_create(). The callback will be called when the compare
+ * completes after the batch has been submitted by an asynchronous call to
+ * spdk_idxd_batch_submit().
+ *
+ * \param chan IDXD channel to submit request.
+ * \param batch Handle provided when the batch was started with spdk_idxd_batch_create().
+ * \param src1 First source to compare.
+ * \param src2 Second source to compare.
+ * \param nbytes Number of bytes to compare.
+ * \param cb_fn Callback function which will be called when the request is complete.
+ * \param cb_arg Opaque value which will be passed back as the arg parameter in
+ * the completion callback.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_idxd_batch_prep_compare(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch,
+ void *src1, void *src2, uint64_t nbytes, spdk_idxd_req_cb cb_fn,
+ void *cb_arg);
+
+/**
+ * Build and submit a memory compare request.
+ *
+ * This function will build the compare descriptor and then immediately submit
+ * by writing to the proper device portal.
+ *
+ * \param chan IDXD channel to submit request.
+ * \param src1 First source to compare.
+ * \param src2 Second source to compare.
+ * \param nbytes Number of bytes to compare.
+ * \param cb_fn Callback function which will be called when the request is complete.
+ * \param cb_arg Opaque value which will be passed back as the arg parameter in
+ * the completion callback.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_idxd_submit_compare(struct spdk_idxd_io_channel *chan,
+ void *src1, const void *src2, uint64_t nbytes,
+ spdk_idxd_req_cb cb_fn, void *cb_arg);
+
+/**
+ * Synchronous call to prepare a fill request into a previously initialized batch
+ * created with spdk_idxd_batch_create(). The callback will be called when the fill
+ * completes after the batch has been submitted by an asynchronous call to
+ * spdk_idxd_batch_submit().
+ *
+ * \param chan IDXD channel to submit request.
+ * \param batch Handle provided when the batch was started with spdk_idxd_batch_create().
+ * \param dst Destination virtual address.
+ * \param fill_pattern Repeating eight-byte pattern to use for memory fill.
+ * \param nbytes Number of bytes to fill.
+ * \param cb_fn Callback function which will be called when the request is complete.
+ * \param cb_arg Opaque value which will be passed back as the arg parameter in
+ * the completion callback.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_idxd_batch_prep_fill(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch,
+ void *dst, uint64_t fill_pattern, uint64_t nbytes, spdk_idxd_req_cb cb_fn, void *cb_arg);
+
+/**
+ * Build and submit a idxd memory fill request.
+ *
+ * This function will build the fill descriptor and then immediately submit
+ * by writing to the proper device portal.
+ *
+ * \param chan IDXD channel to submit request.
+ * \param dst Destination virtual address.
+ * \param fill_pattern Repeating eight-byte pattern to use for memory fill.
+ * \param nbytes Number of bytes to fill.
+ * \param cb_fn Callback function which will be called when the request is complete.
+ * \param cb_arg Opaque value which will be passed back as the cb_arg parameter
+ * in the completion callback.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_idxd_submit_fill(struct spdk_idxd_io_channel *chan,
+ void *dst, uint64_t fill_pattern, uint64_t nbytes,
+ spdk_idxd_req_cb cb_fn, void *cb_arg);
+
+/**
+ * Synchronous call to prepare a crc32c request into a previously initialized batch
+ * created with spdk_idxd_batch_create(). The callback will be called when the crc32c
+ * completes after the batch has been submitted by an asynchronous call to
+ * spdk_idxd_batch_submit().
+ *
+ * \param chan IDXD channel to submit request.
+ * \param batch Handle provided when the batch was started with spdk_idxd_batch_create().
+ * \param dst Resulting calculation.
+ * \param src Source virtual address.
+ * \param seed Four byte CRC-32C seed value.
+ * \param nbytes Number of bytes to calculate on.
+ * \param cb_fn Callback function which will be called when the request is complete.
+ * \param cb_arg Opaque value which will be passed back as the arg parameter in
+ * the completion callback.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_idxd_batch_prep_crc32c(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch,
+ uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes,
+ spdk_idxd_req_cb cb_fn, void *cb_arg);
+
+/**
+ * Build and submit a memory CRC32-C request.
+ *
+ * This function will build the CRC-32C descriptor and then immediately submit
+ * by writing to the proper device portal.
+ *
+ * \param chan IDXD channel to submit request.
+ * \param dst Resulting calculation.
+ * \param src Source virtual address.
+ * \param seed Four byte CRC-32C seed value.
+ * \param nbytes Number of bytes to calculate on.
+ * \param cb_fn Callback function which will be called when the request is complete.
+ * \param cb_arg Opaque value which will be passed back as the cb_arg parameter
+ * in the completion callback.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_idxd_submit_crc32c(struct spdk_idxd_io_channel *chan, uint32_t *dst, void *src,
+ uint32_t seed, uint64_t nbytes,
+ spdk_idxd_req_cb cb_fn, void *cb_arg);
+
+/**
+ * Check for completed requests on an IDXD channel.
+ *
+ * \param chan IDXD channel to check for completions.
+ */
+void spdk_idxd_process_events(struct spdk_idxd_io_channel *chan);
+
+/**
+ * Returns an IDXD channel for a given IDXD device.
+ *
+ * \param idxd IDXD device to get a channel for.
+ *
+ * \return pointer to an IDXD channel.
+ */
+struct spdk_idxd_io_channel *spdk_idxd_get_channel(struct spdk_idxd_device *idxd);
+
+/**
+ * Free an IDXD channel.
+ *
+ * \param chan IDXD channel to free.
+ */
+void spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/ioat.h b/src/spdk/include/spdk/ioat.h
new file mode 100644
index 000000000..c4e66be3b
--- /dev/null
+++ b/src/spdk/include/spdk/ioat.h
@@ -0,0 +1,244 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * I/OAT DMA engine driver public interface
+ */
+
+#ifndef SPDK_IOAT_H
+#define SPDK_IOAT_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "spdk/env.h"
+
+/**
+ * Opaque handle for a single I/OAT channel returned by \ref spdk_ioat_probe().
+ */
+struct spdk_ioat_chan;
+
+/**
+ * Signature for callback function invoked when a request is completed.
+ *
+ * \param arg User-specified opaque value corresponding to cb_arg from the
+ * request submission.
+ */
+typedef void (*spdk_ioat_req_cb)(void *arg);
+
+/**
+ * Callback for spdk_ioat_probe() enumeration.
+ *
+ * \param cb_ctx User-specified opaque value corresponding to cb_ctx from spdk_ioat_probe().
+ * \param pci_dev PCI device that is being probed.
+ *
+ * \return true to attach to this device.
+ */
+typedef bool (*spdk_ioat_probe_cb)(void *cb_ctx, struct spdk_pci_device *pci_dev);
+
+/**
+ * Callback for spdk_ioat_probe() to report a device that has been attached to
+ * the userspace I/OAT driver.
+ *
+ * \param cb_ctx User-specified opaque value corresponding to cb_ctx from spdk_ioat_probe().
+ * \param pci_dev PCI device that was attached to the driver.
+ * \param ioat I/OAT channel that was attached to the driver.
+ */
+typedef void (*spdk_ioat_attach_cb)(void *cb_ctx, struct spdk_pci_device *pci_dev,
+ struct spdk_ioat_chan *ioat);
+
+/**
+ * Enumerate the I/OAT devices attached to the system and attach the userspace
+ * I/OAT driver to them if desired.
+ *
+ * If called more than once, only devices that are not already attached to the
+ * SPDK I/OAT driver will be reported.
+ *
+ * To stop using the controller and release its associated resources, call
+ * spdk_ioat_detach() with the ioat_channel instance returned by this function.
+ *
+ * \param cb_ctx Opaque value which will be passed back in cb_ctx parameter of
+ * the callbacks.
+ * \param probe_cb will be called once per I/OAT device found in the system.
+ * \param attach_cb will be called for devices for which probe_cb returned true
+ * once the I/OAT controller has been attached to the userspace driver.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_ioat_probe(void *cb_ctx, spdk_ioat_probe_cb probe_cb, spdk_ioat_attach_cb attach_cb);
+
+/**
+ * Detach specified device returned by spdk_ioat_probe() from the I/OAT driver.
+ *
+ * \param ioat I/OAT channel to detach from the driver.
+ */
+void spdk_ioat_detach(struct spdk_ioat_chan *ioat);
+
+/**
+ * Get the maximum number of descriptors supported by the library.
+ *
+ * \param chan I/OAT channel
+ *
+ * \return maximum number of descriptors.
+ */
+uint32_t spdk_ioat_get_max_descriptors(struct spdk_ioat_chan *chan);
+
+/**
+ * Build a DMA engine memory copy request.
+ *
+ * This function will build the descriptor in the channel's ring. The
+ * caller must also explicitly call spdk_ioat_flush to submit the
+ * descriptor, possibly after building additional descriptors.
+ *
+ * \param chan I/OAT channel to build request.
+ * \param cb_arg Opaque value which will be passed back as the arg parameter in
+ * the completion callback.
+ * \param cb_fn Callback function which will be called when the request is complete.
+ * \param dst Destination virtual address.
+ * \param src Source virtual address.
+ * \param nbytes Number of bytes to copy.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_ioat_build_copy(struct spdk_ioat_chan *chan,
+ void *cb_arg, spdk_ioat_req_cb cb_fn,
+ void *dst, const void *src, uint64_t nbytes);
+
+/**
+ * Build and submit a DMA engine memory copy request.
+ *
+ * This function will build the descriptor in the channel's ring and then
+ * immediately submit it by writing the channel's doorbell. Calling this
+ * function does not require a subsequent call to spdk_ioat_flush.
+ *
+ * \param chan I/OAT channel to submit request.
+ * \param cb_arg Opaque value which will be passed back as the arg parameter in
+ * the completion callback.
+ * \param cb_fn Callback function which will be called when the request is complete.
+ * \param dst Destination virtual address.
+ * \param src Source virtual address.
+ * \param nbytes Number of bytes to copy.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_ioat_submit_copy(struct spdk_ioat_chan *chan,
+ void *cb_arg, spdk_ioat_req_cb cb_fn,
+ void *dst, const void *src, uint64_t nbytes);
+
+/**
+ * Build a DMA engine memory fill request.
+ *
+ * This function will build the descriptor in the channel's ring. The
+ * caller must also explicitly call spdk_ioat_flush to submit the
+ * descriptor, possibly after building additional descriptors.
+ *
+ * \param chan I/OAT channel to build request.
+ * \param cb_arg Opaque value which will be passed back as the cb_arg parameter
+ * in the completion callback.
+ * \param cb_fn Callback function which will be called when the request is complete.
+ * \param dst Destination virtual address.
+ * \param fill_pattern Repeating eight-byte pattern to use for memory fill.
+ * \param nbytes Number of bytes to fill.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_ioat_build_fill(struct spdk_ioat_chan *chan,
+ void *cb_arg, spdk_ioat_req_cb cb_fn,
+ void *dst, uint64_t fill_pattern, uint64_t nbytes);
+
+/**
+ * Build and submit a DMA engine memory fill request.
+ *
+ * This function will build the descriptor in the channel's ring and then
+ * immediately submit it by writing the channel's doorbell. Calling this
+ * function does not require a subsequent call to spdk_ioat_flush.
+ *
+ * \param chan I/OAT channel to submit request.
+ * \param cb_arg Opaque value which will be passed back as the cb_arg parameter
+ * in the completion callback.
+ * \param cb_fn Callback function which will be called when the request is complete.
+ * \param dst Destination virtual address.
+ * \param fill_pattern Repeating eight-byte pattern to use for memory fill.
+ * \param nbytes Number of bytes to fill.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_ioat_submit_fill(struct spdk_ioat_chan *chan,
+ void *cb_arg, spdk_ioat_req_cb cb_fn,
+ void *dst, uint64_t fill_pattern, uint64_t nbytes);
+
+/**
+ * Flush previously built descriptors.
+ *
+ * Descriptors are flushed by writing the channel's dmacount doorbell
+ * register. This function enables batching multiple descriptors followed by
+ * a single doorbell write.
+ *
+ * \param chan I/OAT channel to flush.
+ */
+void spdk_ioat_flush(struct spdk_ioat_chan *chan);
+
+/**
+ * Check for completed requests on an I/OAT channel.
+ *
+ * \param chan I/OAT channel to check for completions.
+ *
+ * \return number of events handled on success, negative errno on failure.
+ */
+int spdk_ioat_process_events(struct spdk_ioat_chan *chan);
+
+/**
+ * DMA engine capability flags
+ */
+enum spdk_ioat_dma_capability_flags {
+ SPDK_IOAT_ENGINE_COPY_SUPPORTED = 0x1, /**< The memory copy is supported */
+ SPDK_IOAT_ENGINE_FILL_SUPPORTED = 0x2, /**< The memory fill is supported */
+};
+
+/**
+ * Get the DMA engine capabilities.
+ *
+ * \param chan I/OAT channel to query.
+ *
+ * \return a combination of flags from spdk_ioat_dma_capability_flags().
+ */
+uint32_t spdk_ioat_get_dma_capabilities(struct spdk_ioat_chan *chan);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/ioat_spec.h b/src/spdk/include/spdk/ioat_spec.h
new file mode 100644
index 000000000..12202701a
--- /dev/null
+++ b/src/spdk/include/spdk/ioat_spec.h
@@ -0,0 +1,330 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * I/OAT specification definitions
+ */
+
+#ifndef SPDK_IOAT_SPEC_H
+#define SPDK_IOAT_SPEC_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "spdk/assert.h"
+
+#define SPDK_IOAT_PCI_CHANERR_INT_OFFSET 0x180
+
+#define SPDK_IOAT_INTRCTRL_MASTER_INT_EN 0x01
+
+#define SPDK_IOAT_VER_3_0 0x30
+#define SPDK_IOAT_VER_3_3 0x33
+
+/* DMA Channel Registers */
+#define SPDK_IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000
+#define SPDK_IOAT_CHANCTRL_COMPL_DCA_EN 0x0200
+#define SPDK_IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100
+#define SPDK_IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020
+#define SPDK_IOAT_CHANCTRL_ERR_INT_EN 0x0010
+#define SPDK_IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008
+#define SPDK_IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
+#define SPDK_IOAT_CHANCTRL_INT_REARM 0x0001
+
+/* DMA Channel Capabilities */
+#define SPDK_IOAT_DMACAP_PB (1 << 0)
+#define SPDK_IOAT_DMACAP_DCA (1 << 4)
+#define SPDK_IOAT_DMACAP_BFILL (1 << 6)
+#define SPDK_IOAT_DMACAP_XOR (1 << 8)
+#define SPDK_IOAT_DMACAP_PQ (1 << 9)
+#define SPDK_IOAT_DMACAP_DMA_DIF (1 << 10)
+
+struct spdk_ioat_registers {
+ uint8_t chancnt;
+ uint8_t xfercap;
+ uint8_t genctrl;
+ uint8_t intrctrl;
+ uint32_t attnstatus;
+ uint8_t cbver; /* 0x08 */
+ uint8_t reserved4[0x3]; /* 0x09 */
+ uint16_t intrdelay; /* 0x0C */
+ uint16_t cs_status; /* 0x0E */
+ uint32_t dmacapability; /* 0x10 */
+ uint8_t reserved5[0x6C]; /* 0x14 */
+ uint16_t chanctrl; /* 0x80 */
+ uint8_t reserved6[0x2]; /* 0x82 */
+ uint8_t chancmd; /* 0x84 */
+ uint8_t reserved3[1]; /* 0x85 */
+ uint16_t dmacount; /* 0x86 */
+ uint64_t chansts; /* 0x88 */
+ uint64_t chainaddr; /* 0x90 */
+ uint64_t chancmp; /* 0x98 */
+ uint8_t reserved2[0x8]; /* 0xA0 */
+ uint32_t chanerr; /* 0xA8 */
+ uint32_t chanerrmask; /* 0xAC */
+} __attribute__((packed)) __attribute__((aligned));
+
+#define SPDK_IOAT_CHANCMD_RESET 0x20
+#define SPDK_IOAT_CHANCMD_SUSPEND 0x04
+
+#define SPDK_IOAT_CHANSTS_STATUS 0x7ULL
+#define SPDK_IOAT_CHANSTS_ACTIVE 0x0
+#define SPDK_IOAT_CHANSTS_IDLE 0x1
+#define SPDK_IOAT_CHANSTS_SUSPENDED 0x2
+#define SPDK_IOAT_CHANSTS_HALTED 0x3
+#define SPDK_IOAT_CHANSTS_ARMED 0x4
+
+#define SPDK_IOAT_CHANSTS_UNAFFILIATED_ERROR 0x8ULL
+#define SPDK_IOAT_CHANSTS_SOFT_ERROR 0x10ULL
+
+#define SPDK_IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK (~0x3FULL)
+
+#define SPDK_IOAT_CHANCMP_ALIGN 8 /* CHANCMP address must be 64-bit aligned */
+
+struct spdk_ioat_generic_hw_desc {
+ uint32_t size;
+ union {
+ uint32_t control_raw;
+ struct {
+ uint32_t int_enable: 1;
+ uint32_t src_snoop_disable: 1;
+ uint32_t dest_snoop_disable: 1;
+ uint32_t completion_update: 1;
+ uint32_t fence: 1;
+ uint32_t reserved2: 1;
+ uint32_t src_page_break: 1;
+ uint32_t dest_page_break: 1;
+ uint32_t bundle: 1;
+ uint32_t dest_dca: 1;
+ uint32_t hint: 1;
+ uint32_t reserved: 13;
+ uint32_t op: 8;
+ } control;
+ } u;
+ uint64_t src_addr;
+ uint64_t dest_addr;
+ uint64_t next;
+ uint64_t op_specific[4];
+};
+
+struct spdk_ioat_dma_hw_desc {
+ uint32_t size;
+ union {
+ uint32_t control_raw;
+ struct {
+ uint32_t int_enable: 1;
+ uint32_t src_snoop_disable: 1;
+ uint32_t dest_snoop_disable: 1;
+ uint32_t completion_update: 1;
+ uint32_t fence: 1;
+ uint32_t null: 1;
+ uint32_t src_page_break: 1;
+ uint32_t dest_page_break: 1;
+ uint32_t bundle: 1;
+ uint32_t dest_dca: 1;
+ uint32_t hint: 1;
+ uint32_t reserved: 13;
+#define SPDK_IOAT_OP_COPY 0x00
+ uint32_t op: 8;
+ } control;
+ } u;
+ uint64_t src_addr;
+ uint64_t dest_addr;
+ uint64_t next;
+ uint64_t reserved;
+ uint64_t reserved2;
+ uint64_t user1;
+ uint64_t user2;
+};
+
+struct spdk_ioat_fill_hw_desc {
+ uint32_t size;
+ union {
+ uint32_t control_raw;
+ struct {
+ uint32_t int_enable: 1;
+ uint32_t reserved: 1;
+ uint32_t dest_snoop_disable: 1;
+ uint32_t completion_update: 1;
+ uint32_t fence: 1;
+ uint32_t reserved2: 2;
+ uint32_t dest_page_break: 1;
+ uint32_t bundle: 1;
+ uint32_t reserved3: 15;
+#define SPDK_IOAT_OP_FILL 0x01
+ uint32_t op: 8;
+ } control;
+ } u;
+ uint64_t src_data;
+ uint64_t dest_addr;
+ uint64_t next;
+ uint64_t reserved;
+ uint64_t next_dest_addr;
+ uint64_t user1;
+ uint64_t user2;
+};
+
+struct spdk_ioat_xor_hw_desc {
+ uint32_t size;
+ union {
+ uint32_t control_raw;
+ struct {
+ uint32_t int_enable: 1;
+ uint32_t src_snoop_disable: 1;
+ uint32_t dest_snoop_disable: 1;
+ uint32_t completion_update: 1;
+ uint32_t fence: 1;
+ uint32_t src_count: 3;
+ uint32_t bundle: 1;
+ uint32_t dest_dca: 1;
+ uint32_t hint: 1;
+ uint32_t reserved: 13;
+#define SPDK_IOAT_OP_XOR 0x87
+#define SPDK_IOAT_OP_XOR_VAL 0x88
+ uint32_t op: 8;
+ } control;
+ } u;
+ uint64_t src_addr;
+ uint64_t dest_addr;
+ uint64_t next;
+ uint64_t src_addr2;
+ uint64_t src_addr3;
+ uint64_t src_addr4;
+ uint64_t src_addr5;
+};
+
+struct spdk_ioat_xor_ext_hw_desc {
+ uint64_t src_addr6;
+ uint64_t src_addr7;
+ uint64_t src_addr8;
+ uint64_t next;
+ uint64_t reserved[4];
+};
+
+struct spdk_ioat_pq_hw_desc {
+ uint32_t size;
+ union {
+ uint32_t control_raw;
+ struct {
+ uint32_t int_enable: 1;
+ uint32_t src_snoop_disable: 1;
+ uint32_t dest_snoop_disable: 1;
+ uint32_t completion_update: 1;
+ uint32_t fence: 1;
+ uint32_t src_count: 3;
+ uint32_t bundle: 1;
+ uint32_t dest_dca: 1;
+ uint32_t hint: 1;
+ uint32_t p_disable: 1;
+ uint32_t q_disable: 1;
+ uint32_t reserved: 11;
+#define SPDK_IOAT_OP_PQ 0x89
+#define SPDK_IOAT_OP_PQ_VAL 0x8a
+ uint32_t op: 8;
+ } control;
+ } u;
+ uint64_t src_addr;
+ uint64_t p_addr;
+ uint64_t next;
+ uint64_t src_addr2;
+ uint64_t src_addr3;
+ uint8_t coef[8];
+ uint64_t q_addr;
+};
+
+struct spdk_ioat_pq_ext_hw_desc {
+ uint64_t src_addr4;
+ uint64_t src_addr5;
+ uint64_t src_addr6;
+ uint64_t next;
+ uint64_t src_addr7;
+ uint64_t src_addr8;
+ uint64_t reserved[2];
+};
+
+struct spdk_ioat_pq_update_hw_desc {
+ uint32_t size;
+ union {
+ uint32_t control_raw;
+ struct {
+ uint32_t int_enable: 1;
+ uint32_t src_snoop_disable: 1;
+ uint32_t dest_snoop_disable: 1;
+ uint32_t completion_update: 1;
+ uint32_t fence: 1;
+ uint32_t src_cnt: 3;
+ uint32_t bundle: 1;
+ uint32_t dest_dca: 1;
+ uint32_t hint: 1;
+ uint32_t p_disable: 1;
+ uint32_t q_disable: 1;
+ uint32_t reserved: 3;
+ uint32_t coef: 8;
+#define SPDK_IOAT_OP_PQ_UP 0x8b
+ uint32_t op: 8;
+ } control;
+ } u;
+ uint64_t src_addr;
+ uint64_t p_addr;
+ uint64_t next;
+ uint64_t src_addr2;
+ uint64_t p_src;
+ uint64_t q_src;
+ uint64_t q_addr;
+};
+
+struct spdk_ioat_raw_hw_desc {
+ uint64_t field[8];
+};
+
+union spdk_ioat_hw_desc {
+ struct spdk_ioat_raw_hw_desc raw;
+ struct spdk_ioat_generic_hw_desc generic;
+ struct spdk_ioat_dma_hw_desc dma;
+ struct spdk_ioat_fill_hw_desc fill;
+ struct spdk_ioat_xor_hw_desc xor_desc;
+ struct spdk_ioat_xor_ext_hw_desc xor_ext;
+ struct spdk_ioat_pq_hw_desc pq;
+ struct spdk_ioat_pq_ext_hw_desc pq_ext;
+ struct spdk_ioat_pq_update_hw_desc pq_update;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_ioat_hw_desc) == 64, "incorrect spdk_ioat_hw_desc layout");
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_IOAT_SPEC_H */
diff --git a/src/spdk/include/spdk/iscsi_spec.h b/src/spdk/include/spdk/iscsi_spec.h
new file mode 100644
index 000000000..06e567865
--- /dev/null
+++ b/src/spdk/include/spdk/iscsi_spec.h
@@ -0,0 +1,567 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * iSCSI specification definitions
+ */
+
+#ifndef SPDK_ISCSI_SPEC_H
+#define SPDK_ISCSI_SPEC_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/assert.h"
+
+#define ISCSI_BHS_LEN 48
+#define ISCSI_DIGEST_LEN 4
+#define ISCSI_ALIGNMENT 4
+
+/** support version - RFC3720(10.12.4) */
+#define ISCSI_VERSION 0x00
+
+#define ISCSI_ALIGN(SIZE) \
+ (((SIZE) + (ISCSI_ALIGNMENT - 1)) & ~(ISCSI_ALIGNMENT - 1))
+
+/** for authentication key (non encoded 1024bytes) RFC3720(5.1/11.1.4) */
+#define ISCSI_TEXT_MAX_VAL_LEN 8192
+
+/**
+ * RFC 3720 5.1
+ * If not otherwise specified, the maximum length of a simple-value
+ * (not its encoded representation) is 255 bytes, not including the delimiter
+ * (comma or zero byte).
+ */
+#define ISCSI_TEXT_MAX_SIMPLE_VAL_LEN 255
+
+#define ISCSI_TEXT_MAX_KEY_LEN 63
+
+enum iscsi_op {
+ /* Initiator opcodes */
+ ISCSI_OP_NOPOUT = 0x00,
+ ISCSI_OP_SCSI = 0x01,
+ ISCSI_OP_TASK = 0x02,
+ ISCSI_OP_LOGIN = 0x03,
+ ISCSI_OP_TEXT = 0x04,
+ ISCSI_OP_SCSI_DATAOUT = 0x05,
+ ISCSI_OP_LOGOUT = 0x06,
+ ISCSI_OP_SNACK = 0x10,
+ ISCSI_OP_VENDOR_1C = 0x1c,
+ ISCSI_OP_VENDOR_1D = 0x1d,
+ ISCSI_OP_VENDOR_1E = 0x1e,
+
+ /* Target opcodes */
+ ISCSI_OP_NOPIN = 0x20,
+ ISCSI_OP_SCSI_RSP = 0x21,
+ ISCSI_OP_TASK_RSP = 0x22,
+ ISCSI_OP_LOGIN_RSP = 0x23,
+ ISCSI_OP_TEXT_RSP = 0x24,
+ ISCSI_OP_SCSI_DATAIN = 0x25,
+ ISCSI_OP_LOGOUT_RSP = 0x26,
+ ISCSI_OP_R2T = 0x31,
+ ISCSI_OP_ASYNC = 0x32,
+ ISCSI_OP_VENDOR_3C = 0x3c,
+ ISCSI_OP_VENDOR_3D = 0x3d,
+ ISCSI_OP_VENDOR_3E = 0x3e,
+ ISCSI_OP_REJECT = 0x3f,
+};
+
+enum iscsi_task_func {
+ ISCSI_TASK_FUNC_ABORT_TASK = 1,
+ ISCSI_TASK_FUNC_ABORT_TASK_SET = 2,
+ ISCSI_TASK_FUNC_CLEAR_ACA = 3,
+ ISCSI_TASK_FUNC_CLEAR_TASK_SET = 4,
+ ISCSI_TASK_FUNC_LOGICAL_UNIT_RESET = 5,
+ ISCSI_TASK_FUNC_TARGET_WARM_RESET = 6,
+ ISCSI_TASK_FUNC_TARGET_COLD_RESET = 7,
+ ISCSI_TASK_FUNC_TASK_REASSIGN = 8,
+};
+
+enum iscsi_task_func_resp {
+ ISCSI_TASK_FUNC_RESP_COMPLETE = 0,
+ ISCSI_TASK_FUNC_RESP_TASK_NOT_EXIST = 1,
+ ISCSI_TASK_FUNC_RESP_LUN_NOT_EXIST = 2,
+ ISCSI_TASK_FUNC_RESP_TASK_STILL_ALLEGIANT = 3,
+ ISCSI_TASK_FUNC_RESP_REASSIGNMENT_NOT_SUPPORTED = 4,
+ ISCSI_TASK_FUNC_RESP_FUNC_NOT_SUPPORTED = 5,
+ ISCSI_TASK_FUNC_RESP_AUTHORIZATION_FAILED = 6,
+ ISCSI_TASK_FUNC_REJECTED = 255
+};
+
+struct iscsi_bhs {
+ uint8_t opcode : 6;
+ uint8_t immediate : 1;
+ uint8_t reserved : 1;
+ uint8_t flags;
+ uint8_t rsv[2];
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+ uint64_t lun;
+ uint32_t itt;
+ uint32_t ttt;
+ uint32_t stat_sn;
+ uint32_t exp_stat_sn;
+ uint32_t max_stat_sn;
+ uint8_t res3[12];
+};
+SPDK_STATIC_ASSERT(sizeof(struct iscsi_bhs) == ISCSI_BHS_LEN, "ISCSI_BHS_LEN mismatch");
+
+struct iscsi_bhs_async {
+ uint8_t opcode : 6; /* opcode = 0x32 */
+ uint8_t reserved : 2;
+ uint8_t flags;
+ uint8_t res[2];
+
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+
+ uint64_t lun;
+ uint32_t ffffffff;
+ uint32_t res3;
+ uint32_t stat_sn;
+ uint32_t exp_cmd_sn;
+ uint32_t max_cmd_sn;
+ uint8_t async_event;
+ uint8_t async_vcode;
+ uint16_t param1;
+ uint16_t param2;
+ uint16_t param3;
+ uint8_t res4[4];
+};
+
+struct iscsi_bhs_login_req {
+ uint8_t opcode : 6; /* opcode = 0x03 */
+ uint8_t immediate : 1;
+ uint8_t reserved : 1;
+ uint8_t flags;
+ uint8_t version_max;
+ uint8_t version_min;
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+ uint8_t isid[6];
+ uint16_t tsih;
+ uint32_t itt;
+ uint16_t cid;
+ uint16_t res2;
+ uint32_t cmd_sn;
+ uint32_t exp_stat_sn;
+ uint8_t res3[16];
+};
+
+struct iscsi_bhs_login_rsp {
+ uint8_t opcode : 6; /* opcode = 0x23 */
+ uint8_t reserved : 2;
+ uint8_t flags;
+ uint8_t version_max;
+ uint8_t version_act;
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+ uint8_t isid[6];
+ uint16_t tsih;
+ uint32_t itt;
+ uint32_t res2;
+ uint32_t stat_sn;
+ uint32_t exp_cmd_sn;
+ uint32_t max_cmd_sn;
+ uint8_t status_class;
+ uint8_t status_detail;
+ uint8_t res3[10];
+};
+
+struct iscsi_bhs_logout_req {
+ uint8_t opcode : 6; /* opcode = 0x06 */
+ uint8_t immediate : 1;
+ uint8_t reserved : 1;
+ uint8_t reason : 7;
+ uint8_t reason_1 : 1;
+ uint8_t res[2];
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+ uint8_t res2[8];
+ uint32_t itt;
+ uint16_t cid;
+ uint16_t res3;
+ uint32_t cmd_sn;
+ uint32_t exp_stat_sn;
+ uint8_t res4[16];
+};
+
+struct iscsi_bhs_logout_resp {
+ uint8_t opcode : 6; /* opcode = 0x26 */
+ uint8_t reserved : 2;
+ uint8_t flags;
+ uint8_t response;
+ uint8_t res;
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+ uint8_t res2[8];
+ uint32_t itt;
+ uint32_t res3;
+ uint32_t stat_sn;
+ uint32_t exp_cmd_sn;
+ uint32_t max_cmd_sn;
+ uint32_t res4;
+ uint16_t time_2_wait;
+ uint16_t time_2_retain;
+ uint32_t res5;
+};
+
+struct iscsi_bhs_nop_in {
+ uint8_t opcode : 6; /* opcode = 0x20 */
+ uint8_t reserved : 2;
+ uint8_t flags;
+ uint8_t res[2];
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+ uint64_t lun;
+ uint32_t itt;
+ uint32_t ttt;
+ uint32_t stat_sn;
+ uint32_t exp_cmd_sn;
+ uint32_t max_cmd_sn;
+ uint8_t res3[12];
+};
+
+struct iscsi_bhs_nop_out {
+ uint8_t opcode : 6; /* opcode = 0x00 */
+ uint8_t immediate : 1;
+ uint8_t reserved : 1;
+ uint8_t flags;
+ uint8_t res[2];
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+ uint64_t lun;
+ uint32_t itt;
+ uint32_t ttt;
+ uint32_t cmd_sn;
+ uint32_t exp_stat_sn;
+ uint8_t res4[16];
+};
+
+struct iscsi_bhs_r2t {
+ uint8_t opcode : 6; /* opcode = 0x31 */
+ uint8_t reserved : 2;
+ uint8_t flags;
+ uint8_t rsv[2];
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+ uint64_t lun;
+ uint32_t itt;
+ uint32_t ttt;
+ uint32_t stat_sn;
+ uint32_t exp_cmd_sn;
+ uint32_t max_cmd_sn;
+ uint32_t r2t_sn;
+ uint32_t buffer_offset;
+ uint32_t desired_xfer_len;
+};
+
+struct iscsi_bhs_reject {
+ uint8_t opcode : 6; /* opcode = 0x3f */
+ uint8_t reserved : 2;
+ uint8_t flags;
+ uint8_t reason;
+ uint8_t res;
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+ uint8_t res2[8];
+ uint32_t ffffffff;
+ uint32_t res3;
+ uint32_t stat_sn;
+ uint32_t exp_cmd_sn;
+ uint32_t max_cmd_sn;
+ uint32_t data_sn;
+ uint8_t res4[8];
+};
+
+struct iscsi_bhs_scsi_req {
+ uint8_t opcode : 6; /* opcode = 0x01 */
+ uint8_t immediate : 1;
+ uint8_t reserved : 1;
+ uint8_t attribute : 3;
+ uint8_t reserved2 : 2;
+ uint8_t write_bit : 1;
+ uint8_t read_bit : 1;
+ uint8_t final_bit : 1;
+ uint8_t res[2];
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+ uint64_t lun;
+ uint32_t itt;
+ uint32_t expected_data_xfer_len;
+ uint32_t cmd_sn;
+ uint32_t exp_stat_sn;
+ uint8_t cdb[16];
+};
+
+struct iscsi_bhs_scsi_resp {
+ uint8_t opcode : 6; /* opcode = 0x21 */
+ uint8_t reserved : 2;
+ uint8_t flags;
+ uint8_t response;
+ uint8_t status;
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+ uint8_t res4[8];
+ uint32_t itt;
+ uint32_t snacktag;
+ uint32_t stat_sn;
+ uint32_t exp_cmd_sn;
+ uint32_t max_cmd_sn;
+ uint32_t exp_data_sn;
+ uint32_t bi_read_res_cnt;
+ uint32_t res_cnt;
+};
+
+struct iscsi_bhs_data_in {
+ uint8_t opcode : 6; /* opcode = 0x05 */
+ uint8_t reserved : 2;
+ uint8_t flags;
+ uint8_t res;
+ uint8_t status;
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+ uint64_t lun;
+ uint32_t itt;
+ uint32_t ttt;
+ uint32_t stat_sn;
+ uint32_t exp_cmd_sn;
+ uint32_t max_cmd_sn;
+ uint32_t data_sn;
+ uint32_t buffer_offset;
+ uint32_t res_cnt;
+};
+
+struct iscsi_bhs_data_out {
+ uint8_t opcode : 6; /* opcode = 0x25 */
+ uint8_t reserved : 2;
+ uint8_t flags;
+ uint8_t res[2];
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+ uint64_t lun;
+ uint32_t itt;
+ uint32_t ttt;
+ uint32_t res3;
+ uint32_t exp_stat_sn;
+ uint32_t res4;
+ uint32_t data_sn;
+ uint32_t buffer_offset;
+ uint32_t res5;
+};
+
+struct iscsi_bhs_snack_req {
+ uint8_t opcode : 6; /* opcode = 0x10 */
+ uint8_t reserved : 2;
+ uint8_t flags;
+ uint8_t res[2];
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+ uint64_t lun;
+ uint32_t itt;
+ uint32_t ttt;
+ uint32_t res5;
+ uint32_t exp_stat_sn;
+ uint8_t res6[8];
+ uint32_t beg_run;
+ uint32_t run_len;
+};
+
+struct iscsi_bhs_task_req {
+ uint8_t opcode : 6; /* opcode = 0x02 */
+ uint8_t immediate : 1;
+ uint8_t reserved : 1;
+ uint8_t flags;
+ uint8_t res[2];
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+ uint64_t lun;
+ uint32_t itt;
+ uint32_t ref_task_tag;
+ uint32_t cmd_sn;
+ uint32_t exp_stat_sn;
+ uint32_t ref_cmd_sn;
+ uint32_t exp_data_sn;
+ uint8_t res5[8];
+};
+
+struct iscsi_bhs_task_resp {
+ uint8_t opcode : 6; /* opcode = 0x22 */
+ uint8_t reserved : 2;
+ uint8_t flags;
+ uint8_t response;
+ uint8_t res;
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+ uint8_t res2[8];
+ uint32_t itt;
+ uint32_t res3;
+ uint32_t stat_sn;
+ uint32_t exp_cmd_sn;
+ uint32_t max_cmd_sn;
+ uint8_t res4[12];
+};
+
+struct iscsi_bhs_text_req {
+ uint8_t opcode : 6; /* opcode = 0x04 */
+ uint8_t immediate : 1;
+ uint8_t reserved : 1;
+ uint8_t flags;
+ uint8_t res[2];
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+ uint64_t lun;
+ uint32_t itt;
+ uint32_t ttt;
+ uint32_t cmd_sn;
+ uint32_t exp_stat_sn;
+ uint8_t res3[16];
+};
+
+struct iscsi_bhs_text_resp {
+ uint8_t opcode : 6; /* opcode = 0x24 */
+ uint8_t reserved : 2;
+ uint8_t flags;
+ uint8_t res[2];
+ uint8_t total_ahs_len;
+ uint8_t data_segment_len[3];
+ uint64_t lun;
+ uint32_t itt;
+ uint32_t ttt;
+ uint32_t stat_sn;
+ uint32_t exp_cmd_sn;
+ uint32_t max_cmd_sn;
+ uint8_t res4[12];
+};
+
+/* generic flags */
+#define ISCSI_FLAG_FINAL 0x80
+
+/* login flags */
+#define ISCSI_LOGIN_TRANSIT 0x80
+#define ISCSI_LOGIN_CONTINUE 0x40
+#define ISCSI_LOGIN_CURRENT_STAGE_MASK 0x0c
+#define ISCSI_LOGIN_CURRENT_STAGE_0 0x04
+#define ISCSI_LOGIN_CURRENT_STAGE_1 0x08
+#define ISCSI_LOGIN_CURRENT_STAGE_3 0x0c
+#define ISCSI_LOGIN_NEXT_STAGE_MASK 0x03
+#define ISCSI_LOGIN_NEXT_STAGE_0 0x01
+#define ISCSI_LOGIN_NEXT_STAGE_1 0x02
+#define ISCSI_LOGIN_NEXT_STAGE_3 0x03
+
+/* text flags */
+#define ISCSI_TEXT_CONTINUE 0x40
+
+/* datain flags */
+#define ISCSI_DATAIN_ACKNOLWEDGE 0x40
+#define ISCSI_DATAIN_OVERFLOW 0x04
+#define ISCSI_DATAIN_UNDERFLOW 0x02
+#define ISCSI_DATAIN_STATUS 0x01
+
+/* SCSI resp flags */
+#define ISCSI_SCSI_BIDI_OVERFLOW 0x10
+#define ISCSI_SCSI_BIDI_UNDERFLOW 0x08
+#define ISCSI_SCSI_OVERFLOW 0x04
+#define ISCSI_SCSI_UNDERFLOW 0x02
+
+/* SCSI task flags */
+#define ISCSI_TASK_FUNCTION_MASK 0x7f
+
+/* Reason for Reject */
+#define ISCSI_REASON_RESERVED 0x1
+#define ISCSI_REASON_DATA_DIGEST_ERROR 0x2
+#define ISCSI_REASON_DATA_SNACK_REJECT 0x3
+#define ISCSI_REASON_PROTOCOL_ERROR 0x4
+#define ISCSI_REASON_CMD_NOT_SUPPORTED 0x5
+#define ISCSI_REASON_IMM_CMD_REJECT 0x6
+#define ISCSI_REASON_TASK_IN_PROGRESS 0x7
+#define ISCSI_REASON_INVALID_SNACK 0x8
+#define ISCSI_REASON_INVALID_PDU_FIELD 0x9
+#define ISCSI_REASON_LONG_OPERATION_REJECT 0xa
+#define ISCSI_REASON_NEGOTIATION_RESET 0xb
+#define ISCSI_REASON_WAIT_FOR_RESET 0xc
+
+#define ISCSI_FLAG_SNACK_TYPE_DATA 0
+#define ISCSI_FLAG_SNACK_TYPE_R2T 0
+#define ISCSI_FLAG_SNACK_TYPE_STATUS 1
+#define ISCSI_FLAG_SNACK_TYPE_DATA_ACK 2
+#define ISCSI_FLAG_SNACK_TYPE_RDATA 3
+#define ISCSI_FLAG_SNACK_TYPE_MASK 0x0F /* 4 bits */
+
+struct iscsi_ahs {
+ /* 0-3 */
+ uint8_t ahs_len[2];
+ uint8_t ahs_type;
+ uint8_t ahs_specific1;
+ /* 4-x */
+ uint8_t ahs_specific2[];
+};
+
+#define ISCSI_BHS_LOGIN_GET_TBIT(X) (!!(X & ISCSI_LOGIN_TRANSIT))
+#define ISCSI_BHS_LOGIN_GET_CBIT(X) (!!(X & ISCSI_LOGIN_CONTINUE))
+#define ISCSI_BHS_LOGIN_GET_CSG(X) ((X & ISCSI_LOGIN_CURRENT_STAGE_MASK) >> 2)
+#define ISCSI_BHS_LOGIN_GET_NSG(X) (X & ISCSI_LOGIN_NEXT_STAGE_MASK)
+
+#define ISCSI_CLASS_SUCCESS 0x00
+#define ISCSI_CLASS_REDIRECT 0x01
+#define ISCSI_CLASS_INITIATOR_ERROR 0x02
+#define ISCSI_CLASS_TARGET_ERROR 0x03
+
+/* Class (Success) detailed info: 0 */
+#define ISCSI_LOGIN_ACCEPT 0x00
+
+/* Class (Redirection) detailed info: 1 */
+#define ISCSI_LOGIN_TARGET_TEMPORARILY_MOVED 0x01
+#define ISCSI_LOGIN_TARGET_PERMANENTLY_MOVED 0x02
+
+/* Class (Initiator Error) detailed info: 2 */
+#define ISCSI_LOGIN_INITIATOR_ERROR 0x00
+#define ISCSI_LOGIN_AUTHENT_FAIL 0x01
+#define ISCSI_LOGIN_AUTHORIZATION_FAIL 0x02
+#define ISCSI_LOGIN_TARGET_NOT_FOUND 0x03
+#define ISCSI_LOGIN_TARGET_REMOVED 0x04
+#define ISCSI_LOGIN_UNSUPPORTED_VERSION 0x05
+#define ISCSI_LOGIN_TOO_MANY_CONNECTIONS 0x06
+#define ISCSI_LOGIN_MISSING_PARMS 0x07
+#define ISCSI_LOGIN_CONN_ADD_FAIL 0x08
+#define ISCSI_LOGIN_NOT_SUPPORTED_SESSION_TYPE 0x09
+#define ISCSI_LOGIN_NO_SESSION 0x0a
+#define ISCSI_LOGIN_INVALID_LOGIN_REQUEST 0x0b
+
+/* Class (Target Error) detailed info: 3 */
+#define ISCSI_LOGIN_STATUS_TARGET_ERROR 0x00
+#define ISCSI_LOGIN_STATUS_SERVICE_UNAVAILABLE 0x01
+#define ISCSI_LOGIN_STATUS_NO_RESOURCES 0x02
+
+#endif /* SPDK_ISCSI_SPEC_H */
diff --git a/src/spdk/include/spdk/json.h b/src/spdk/include/spdk/json.h
new file mode 100644
index 000000000..8109e5188
--- /dev/null
+++ b/src/spdk/include/spdk/json.h
@@ -0,0 +1,337 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * JSON parsing and encoding
+ */
+
+#ifndef SPDK_JSON_H_
+#define SPDK_JSON_H_
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum spdk_json_val_type {
+ SPDK_JSON_VAL_INVALID = 0,
+#define SPDK_JSON_VAL_ANY SPDK_JSON_VAL_INVALID
+ SPDK_JSON_VAL_NULL = 1U << 1,
+ SPDK_JSON_VAL_TRUE = 1U << 2,
+ SPDK_JSON_VAL_FALSE = 1U << 3,
+ SPDK_JSON_VAL_NUMBER = 1U << 4,
+ SPDK_JSON_VAL_STRING = 1U << 5,
+ SPDK_JSON_VAL_ARRAY_BEGIN = 1U << 6,
+ SPDK_JSON_VAL_ARRAY_END = 1U << 7,
+ SPDK_JSON_VAL_OBJECT_BEGIN = 1U << 8,
+ SPDK_JSON_VAL_OBJECT_END = 1U << 9,
+ SPDK_JSON_VAL_NAME = 1U << 10,
+};
+
+struct spdk_json_val {
+ /**
+ * Pointer to the location of the value within the parsed JSON input.
+ *
+ * For SPDK_JSON_VAL_STRING and SPDK_JSON_VAL_NAME,
+ * this points to the beginning of the decoded UTF-8 string without quotes.
+ *
+ * For SPDK_JSON_VAL_NUMBER, this points to the beginning of the number as represented in
+ * the original JSON (text representation, not converted to a numeric value).
+ */
+ void *start;
+
+ /**
+ * Length of value.
+ *
+ * For SPDK_JSON_VAL_STRING, SPDK_JSON_VAL_NUMBER, and SPDK_JSON_VAL_NAME,
+ * this is the length in bytes of the value starting at \ref start.
+ *
+ * For SPDK_JSON_VAL_ARRAY_BEGIN and SPDK_JSON_VAL_OBJECT_BEGIN,
+ * this is the number of values contained within the array or object (including
+ * nested objects and arrays, but not including the _END value). The array or object _END
+ * value can be found by advancing len values from the _BEGIN value.
+ */
+ uint32_t len;
+
+ /**
+ * Type of value.
+ */
+ enum spdk_json_val_type type;
+};
+
+/**
+ * Invalid JSON syntax.
+ */
+#define SPDK_JSON_PARSE_INVALID -1
+
+/**
+ * JSON was valid up to the end of the current buffer, but did not represent a complete JSON value.
+ */
+#define SPDK_JSON_PARSE_INCOMPLETE -2
+
+#define SPDK_JSON_PARSE_MAX_DEPTH_EXCEEDED -3
+
+/**
+ * Decode JSON strings and names in place (modify the input buffer).
+ */
+#define SPDK_JSON_PARSE_FLAG_DECODE_IN_PLACE 0x000000001
+
+/**
+ * Allow parsing of comments.
+ *
+ * Comments are not allowed by the JSON RFC, so this is not enabled by default.
+ */
+#define SPDK_JSON_PARSE_FLAG_ALLOW_COMMENTS 0x000000002
+
+/*
+ * Parse JSON data.
+ *
+ * \param data Raw JSON data; must be encoded in UTF-8.
+ * Note that the data may be modified to perform in-place string decoding.
+ *
+ * \param size Size of data in bytes.
+ *
+ * \param end If non-NULL, this will be filled a pointer to the byte just beyond the end
+ * of the valid JSON.
+ *
+ * \return Number of values parsed, or negative on failure:
+ * SPDK_JSON_PARSE_INVALID if the provided data was not valid JSON, or
+ * SPDK_JSON_PARSE_INCOMPLETE if the provided data was not a complete JSON value.
+ */
+ssize_t spdk_json_parse(void *json, size_t size, struct spdk_json_val *values, size_t num_values,
+ void **end, uint32_t flags);
+
+typedef int (*spdk_json_decode_fn)(const struct spdk_json_val *val, void *out);
+
+struct spdk_json_object_decoder {
+ const char *name;
+ size_t offset;
+ spdk_json_decode_fn decode_func;
+ bool optional;
+};
+
+int spdk_json_decode_object(const struct spdk_json_val *values,
+ const struct spdk_json_object_decoder *decoders, size_t num_decoders, void *out);
+int spdk_json_decode_array(const struct spdk_json_val *values, spdk_json_decode_fn decode_func,
+ void *out, size_t max_size, size_t *out_size, size_t stride);
+
+int spdk_json_decode_bool(const struct spdk_json_val *val, void *out);
+int spdk_json_decode_uint16(const struct spdk_json_val *val, void *out);
+int spdk_json_decode_int32(const struct spdk_json_val *val, void *out);
+int spdk_json_decode_uint32(const struct spdk_json_val *val, void *out);
+int spdk_json_decode_uint64(const struct spdk_json_val *val, void *out);
+int spdk_json_decode_string(const struct spdk_json_val *val, void *out);
+
+/**
+ * Get length of a value in number of values.
+ *
+ * This can be used to skip over a value while interpreting parse results.
+ *
+ * For SPDK_JSON_VAL_ARRAY_BEGIN and SPDK_JSON_VAL_OBJECT_BEGIN,
+ * this returns the number of values contained within this value, plus the _BEGIN and _END values.
+ *
+ * For all other values, this returns 1.
+ */
+size_t spdk_json_val_len(const struct spdk_json_val *val);
+
+/**
+ * Compare JSON string with null terminated C string.
+ *
+ * \return true if strings are equal or false if not
+ */
+bool spdk_json_strequal(const struct spdk_json_val *val, const char *str);
+
+/**
+ * Equivalent of strdup() for JSON string values.
+ *
+ * If val is not representable as a C string (contains embedded '\0' characters),
+ * returns NULL.
+ *
+ * Caller is responsible for passing the result to free() when it is no longer needed.
+ */
+char *spdk_json_strdup(const struct spdk_json_val *val);
+
+int spdk_json_number_to_uint16(const struct spdk_json_val *val, uint16_t *num);
+int spdk_json_number_to_int32(const struct spdk_json_val *val, int32_t *num);
+int spdk_json_number_to_uint32(const struct spdk_json_val *val, uint32_t *num);
+int spdk_json_number_to_uint64(const struct spdk_json_val *val, uint64_t *num);
+
+struct spdk_json_write_ctx;
+
+#define SPDK_JSON_WRITE_FLAG_FORMATTED 0x00000001
+
+typedef int (*spdk_json_write_cb)(void *cb_ctx, const void *data, size_t size);
+
+struct spdk_json_write_ctx *spdk_json_write_begin(spdk_json_write_cb write_cb, void *cb_ctx,
+ uint32_t flags);
+int spdk_json_write_end(struct spdk_json_write_ctx *w);
+int spdk_json_write_null(struct spdk_json_write_ctx *w);
+int spdk_json_write_bool(struct spdk_json_write_ctx *w, bool val);
+int spdk_json_write_int32(struct spdk_json_write_ctx *w, int32_t val);
+int spdk_json_write_uint32(struct spdk_json_write_ctx *w, uint32_t val);
+int spdk_json_write_int64(struct spdk_json_write_ctx *w, int64_t val);
+int spdk_json_write_uint64(struct spdk_json_write_ctx *w, uint64_t val);
+int spdk_json_write_string(struct spdk_json_write_ctx *w, const char *val);
+int spdk_json_write_string_raw(struct spdk_json_write_ctx *w, const char *val, size_t len);
+
+/**
+ * Write null-terminated UTF-16LE string.
+ *
+ * \param w JSON write context.
+ * \param val UTF-16LE string; must be null terminated.
+ * \return 0 on success or negative on failure.
+ */
+int spdk_json_write_string_utf16le(struct spdk_json_write_ctx *w, const uint16_t *val);
+
+/**
+ * Write UTF-16LE string.
+ *
+ * \param w JSON write context.
+ * \param val UTF-16LE string; may contain embedded null characters.
+ * \param len Length of val in 16-bit code units (i.e. size of string in bytes divided by 2).
+ * \return 0 on success or negative on failure.
+ */
+int spdk_json_write_string_utf16le_raw(struct spdk_json_write_ctx *w, const uint16_t *val,
+ size_t len);
+
+int spdk_json_write_string_fmt(struct spdk_json_write_ctx *w, const char *fmt,
+ ...) __attribute__((__format__(__printf__, 2, 3)));
+int spdk_json_write_string_fmt_v(struct spdk_json_write_ctx *w, const char *fmt, va_list args);
+
+int spdk_json_write_array_begin(struct spdk_json_write_ctx *w);
+int spdk_json_write_array_end(struct spdk_json_write_ctx *w);
+int spdk_json_write_object_begin(struct spdk_json_write_ctx *w);
+int spdk_json_write_object_end(struct spdk_json_write_ctx *w);
+int spdk_json_write_name(struct spdk_json_write_ctx *w, const char *name);
+int spdk_json_write_name_raw(struct spdk_json_write_ctx *w, const char *name, size_t len);
+
+int spdk_json_write_val(struct spdk_json_write_ctx *w, const struct spdk_json_val *val);
+
+/*
+ * Append bytes directly to the output stream without validation.
+ *
+ * Can be used to write values with specific encodings that differ from the JSON writer output.
+ */
+int spdk_json_write_val_raw(struct spdk_json_write_ctx *w, const void *data, size_t len);
+
+/* Utility functions */
+int spdk_json_write_named_null(struct spdk_json_write_ctx *w, const char *name);
+int spdk_json_write_named_bool(struct spdk_json_write_ctx *w, const char *name, bool val);
+int spdk_json_write_named_int32(struct spdk_json_write_ctx *w, const char *name, int32_t val);
+int spdk_json_write_named_uint32(struct spdk_json_write_ctx *w, const char *name, uint32_t val);
+int spdk_json_write_named_uint64(struct spdk_json_write_ctx *w, const char *name, uint64_t val);
+int spdk_json_write_named_int64(struct spdk_json_write_ctx *w, const char *name, int64_t val);
+int spdk_json_write_named_string(struct spdk_json_write_ctx *w, const char *name, const char *val);
+int spdk_json_write_named_string_fmt(struct spdk_json_write_ctx *w, const char *name,
+ const char *fmt, ...) __attribute__((__format__(__printf__, 3, 4)));
+int spdk_json_write_named_string_fmt_v(struct spdk_json_write_ctx *w, const char *name,
+ const char *fmt, va_list args);
+
+int spdk_json_write_named_array_begin(struct spdk_json_write_ctx *w, const char *name);
+int spdk_json_write_named_object_begin(struct spdk_json_write_ctx *w, const char *name);
+
+/**
+ * Return JSON value asociated with key \c key_name. Subobjects won't be searched.
+ *
+ * \param object JSON object to be examined
+ * \param key_name name of the key
+ * \param key optional, will be set with found key
+ * \param val optional, will be set with value of the key
+ * \param type search for specific value type. Pass SPDK_JSON_VAL_ANY to match any type.
+ * \return 0 if found or negative error code:
+ * -EINVAL - json object is invalid
+ * -ENOENT - key not found
+ * -EDOM - key exists but value type mismatch.
+ */
+int spdk_json_find(struct spdk_json_val *object, const char *key_name, struct spdk_json_val **key,
+ struct spdk_json_val **val, enum spdk_json_val_type type);
+
+/**
+ * The same as calling \c spdk_json_find() function with \c type set to \c SPDK_JSON_VAL_STRING
+ *
+ * \param object JSON object to be examined
+ * \param key_name name of the key
+ * \param key optional, will be set with found key
+ * \param val optional, will be set with value of the key
+ * \return See \c spdk_json_find
+ */
+
+int spdk_json_find_string(struct spdk_json_val *object, const char *key_name,
+ struct spdk_json_val **key, struct spdk_json_val **val);
+
+/**
+ * The same as calling \c spdk_json_key() function with \c type set to \c SPDK_JSON_VAL_ARRAY_BEGIN
+ *
+ * \param object JSON object to be examined
+ * \param key_name name of the key
+ * \param key optional, will be set with found key
+ * \param value optional, will be set with key value
+ * \return See \c spdk_json_find
+ */
+int spdk_json_find_array(struct spdk_json_val *object, const char *key_name,
+ struct spdk_json_val **key, struct spdk_json_val **value);
+
+/**
+ * Return first JSON value in given JSON object.
+ *
+ * \param object pointer to JSON object begin
+ * \return Pointer to first object or NULL if object is empty or is not an JSON object
+ */
+struct spdk_json_val *spdk_json_object_first(struct spdk_json_val *object);
+
+/**
+ * Return first JSON value in array.
+ *
+ * \param array_begin pointer to JSON array begin
+ * \return Pointer to first JSON value or NULL if array is empty or is not an JSON array.
+ */
+
+struct spdk_json_val *spdk_json_array_first(struct spdk_json_val *array_begin);
+
+/**
+ * Advance to the next JSON value in JSON object or array.
+ *
+ * \warning if \c pos is not JSON key or JSON array element behaviour is undefined.
+ *
+ * \param pos pointer to JSON key if iterating over JSON object or array element
+ * \return next JSON value or NULL if there is no more objects or array elements
+ */
+struct spdk_json_val *spdk_json_next(struct spdk_json_val *pos);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/jsonrpc.h b/src/spdk/include/spdk/jsonrpc.h
new file mode 100644
index 000000000..650a06ff4
--- /dev/null
+++ b/src/spdk/include/spdk/jsonrpc.h
@@ -0,0 +1,352 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * JSON-RPC 2.0 server implementation
+ */
+
+#ifndef SPDK_JSONRPC_H_
+#define SPDK_JSONRPC_H_
+
+#include "spdk/stdinc.h"
+
+#include "spdk/json.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Defined error codes in JSON-RPC specification 2.0 */
+#define SPDK_JSONRPC_ERROR_PARSE_ERROR -32700
+#define SPDK_JSONRPC_ERROR_INVALID_REQUEST -32600
+#define SPDK_JSONRPC_ERROR_METHOD_NOT_FOUND -32601
+#define SPDK_JSONRPC_ERROR_INVALID_PARAMS -32602
+#define SPDK_JSONRPC_ERROR_INTERNAL_ERROR -32603
+
+/* Custom error codes in SPDK
+
+ * Error codes from and including -32768 to -32000 are reserved for
+ * predefined errors, hence custom error codes must be outside of the range.
+ */
+#define SPDK_JSONRPC_ERROR_INVALID_STATE -1
+
+struct spdk_jsonrpc_server;
+struct spdk_jsonrpc_request;
+
+struct spdk_jsonrpc_client;
+struct spdk_jsonrpc_client_request;
+
+struct spdk_jsonrpc_client_response {
+ struct spdk_json_val *version;
+ struct spdk_json_val *id;
+ struct spdk_json_val *result;
+ struct spdk_json_val *error;
+};
+
+/**
+ * User callback to handle a single JSON-RPC request.
+ *
+ * The user should respond by calling one of spdk_jsonrpc_begin_result() or
+ * spdk_jsonrpc_send_error_response().
+ *
+ * \param request JSON-RPC request to handle.
+ * \param method Function to handle the request.
+ * \param param Parameters passed to the function 'method'.
+ */
+typedef void (*spdk_jsonrpc_handle_request_fn)(
+ struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *method,
+ const struct spdk_json_val *params);
+
+struct spdk_jsonrpc_server_conn;
+
+typedef void (*spdk_jsonrpc_conn_closed_fn)(struct spdk_jsonrpc_server_conn *conn, void *arg);
+
+/**
+ * Function for specific RPC method response parsing handlers.
+ *
+ * \param parser_ctx context where analysis are put.
+ * \param result json values responsed to this method.
+ *
+ * \return 0 on success.
+ * SPDK_JSON_PARSE_INVALID on failure.
+ */
+typedef int (*spdk_jsonrpc_client_response_parser)(
+ void *parser_ctx,
+ const struct spdk_json_val *result);
+
+/**
+ * Create a JSON-RPC server listening on the required address.
+ *
+ * \param domain Socket family.
+ * \param protocol Protocol.
+ * \param listen_addr Listening address.
+ * \param addrlen Length of address.
+ * \param handle_request User callback to handle a JSON-RPC request.
+ *
+ * \return a pointer to the JSON-RPC server.
+ */
+struct spdk_jsonrpc_server *spdk_jsonrpc_server_listen(int domain, int protocol,
+ struct sockaddr *listen_addr, socklen_t addrlen, spdk_jsonrpc_handle_request_fn handle_request);
+
+/**
+ * Poll the requests to the JSON-RPC server.
+ *
+ * This function does accept, receive, handle the requests and reply to them.
+ *
+ * \param server JSON-RPC server.
+ *
+ * \return 0 on success.
+ */
+int spdk_jsonrpc_server_poll(struct spdk_jsonrpc_server *server);
+
+/**
+ * Shutdown the JSON-RPC server.
+ *
+ * \param server JSON-RPC server.
+ */
+void spdk_jsonrpc_server_shutdown(struct spdk_jsonrpc_server *server);
+
+/**
+ * Return connection associated to \c request
+ *
+ * \param request JSON-RPC request
+ * \return JSON RPC server connection
+ */
+struct spdk_jsonrpc_server_conn *spdk_jsonrpc_get_conn(struct spdk_jsonrpc_request *request);
+
+/**
+ * Add callback called when connection is closed. Pair of \c cb and \c ctx must be unique or error is returned.
+ * Registered callback is called only once and there is no need to call \c spdk_jsonrpc_conn_del_close_cb
+ * inside from \c cb.
+ *
+ * \note Current implementation allow only one close callback per connection.
+ *
+ * \param conn JSON RPC server connection
+ * \param cb calback function
+ * \param ctx argument for \c cb
+ *
+ * \return 0 on success, or negated errno code:
+ * -EEXIST \c cb and \c ctx is already registered
+ * -ENOTCONN Callback can't be added because connection is closed.
+ * -ENOSPC no more space to register callback.
+ */
+int spdk_jsonrpc_conn_add_close_cb(struct spdk_jsonrpc_server_conn *conn,
+ spdk_jsonrpc_conn_closed_fn cb, void *ctx);
+
+/**
+ * Remove registered close callback.
+ *
+ * \param conn JSON RPC server connection
+ * \param cb calback function
+ * \param ctx argument for \c cb
+ *
+ * \return 0 on success, or negated errno code:
+ * -ENOENT \c cb and \c ctx pair is not registered
+ */
+int spdk_jsonrpc_conn_del_close_cb(struct spdk_jsonrpc_server_conn *conn,
+ spdk_jsonrpc_conn_closed_fn cb, void *ctx);
+
+/**
+ * Begin building a response to a JSON-RPC request.
+ *
+ * If this function returns non-NULL, the user must call spdk_jsonrpc_end_result()
+ * on the request after writing the desired response object to the spdk_json_write_ctx.
+ *
+ * \param request JSON-RPC request to respond to.
+
+ * \return Non-NULL pointer to JSON write context to write the response object to.
+ */
+struct spdk_json_write_ctx *spdk_jsonrpc_begin_result(struct spdk_jsonrpc_request *request);
+
+/**
+ * Complete and send a JSON-RPC response.
+ *
+ * \param request Request to complete the response for.
+ * \param w JSON write context returned from spdk_jsonrpc_begin_result().
+ */
+void spdk_jsonrpc_end_result(struct spdk_jsonrpc_request *request, struct spdk_json_write_ctx *w);
+
+/**
+ * Send an error response to a JSON-RPC request.
+ *
+ * This is shorthand for spdk_jsonrpc_begin_result() + spdk_jsonrpc_end_result()
+ * with an error object.
+ *
+ * \param request JSON-RPC request to respond to.
+ * \param error_code Integer error code to return (may be one of the
+ * SPDK_JSONRPC_ERROR_ errors, or a custom error code).
+ * \param msg String error message to return.
+ */
+void spdk_jsonrpc_send_error_response(struct spdk_jsonrpc_request *request,
+ int error_code, const char *msg);
+
+/**
+ * Send an error response to a JSON-RPC request.
+ *
+ * This is shorthand for printf() + spdk_jsonrpc_send_error_response().
+ *
+ * \param request JSON-RPC request to respond to.
+ * \param error_code Integer error code to return (may be one of the
+ * SPDK_JSONRPC_ERROR_ errors, or a custom error code).
+ * \param fmt Printf-like format string.
+ */
+void spdk_jsonrpc_send_error_response_fmt(struct spdk_jsonrpc_request *request,
+ int error_code, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
+
+/**
+ * Begin building a JSON-RPC request.
+ *
+ * If this function returns non-NULL, the user must call spdk_jsonrpc_end_request()
+ * on the request after writing the desired request object to the spdk_json_write_ctx.
+ *
+ * \param request JSON-RPC request.
+ * \param id ID index for the request. If < 0 skip ID.
+ * \param method Name of the RPC method. If NULL caller will have to create "method" key.
+ *
+ * \return JSON write context or NULL in case of error.
+ */
+struct spdk_json_write_ctx *
+spdk_jsonrpc_begin_request(struct spdk_jsonrpc_client_request *request, int32_t id,
+ const char *method);
+
+/**
+ * Complete a JSON-RPC request.
+ *
+ * \param request JSON-RPC request.
+ * \param w JSON write context returned from spdk_jsonrpc_begin_request().
+ */
+void spdk_jsonrpc_end_request(struct spdk_jsonrpc_client_request *request,
+ struct spdk_json_write_ctx *w);
+
+/**
+ * Connect to the specified RPC server.
+ *
+ * \param addr RPC socket address.
+ * \param addr_family Protocol families of address.
+ *
+ * \return JSON-RPC client on success, NULL on failure and errno set to indicate
+ * the cause of the error.
+ */
+struct spdk_jsonrpc_client *spdk_jsonrpc_client_connect(const char *addr, int addr_family);
+
+/**
+ * Close JSON-RPC connection and free \c client object.
+ *
+ * This function is not thread safe and should only be called from one thread at
+ * a time while no other threads are actively \c client object.
+ *
+ * \param client JSON-RPC client.
+ */
+void spdk_jsonrpc_client_close(struct spdk_jsonrpc_client *client);
+
+/**
+ * Create one JSON-RPC request. Returned request must be passed to
+ * \c spdk_jsonrpc_client_send_request when done or to \c spdk_jsonrpc_client_free_request
+ * if discaded.
+ *
+ * \return pointer to JSON-RPC request object.
+ */
+struct spdk_jsonrpc_client_request *spdk_jsonrpc_client_create_request(void);
+
+/**
+ * Free one JSON-RPC request.
+ *
+ * \param req pointer to JSON-RPC request object.
+ */
+void spdk_jsonrpc_client_free_request(struct spdk_jsonrpc_client_request *req);
+
+/**
+ * Send the JSON-RPC request in JSON-RPC client. Library takes ownership of the
+ * request object and will free it when done.
+ *
+ * This function is not thread safe and should only be called from one thread at
+ * a time while no other threads are actively \c client object.
+ *
+ * \param client JSON-RPC client.
+ * \param req JSON-RPC request.
+ *
+ * \return 0 on success or negative error code.
+ * -ENOSPC - no space left to queue another request. Try again later.
+ */
+int spdk_jsonrpc_client_send_request(struct spdk_jsonrpc_client *client,
+ struct spdk_jsonrpc_client_request *req);
+
+/**
+ * Poll the JSON-RPC client. When any response is available use
+ * \c spdk_jsonrpc_client_get_response to retrieve it.
+ *
+ * This function is not thread safe and should only be called from one thread at
+ * a time while no other threads are actively \c client object.
+ *
+ * \param client JSON-RPC client.
+ * \param timeout Time in miliseconds this function will block. -1 block forever, 0 don't block.
+ *
+ * \return If no error occurred, this function returns a non-negative number indicating how
+ * many ready responses can be retrieved. If an error occurred, this function returns one of
+ * the following negated errno values:
+ * -ENOTCONN - not connected yet. Try again later.
+ * -EINVAL - response is detected to be invalid. Client connection should be terminated.
+ * -ENOSPC - no space to receive another response. User need to retrieve waiting responses.
+ * -EIO - connection terminated (or other critical error). Client connection should be terminated.
+ * -ENOMEM - out of memory
+ */
+int spdk_jsonrpc_client_poll(struct spdk_jsonrpc_client *client, int timeout);
+
+/**
+ * Return JSON RPC response object representing next available response from client connection.
+ * Returned pointer must be freed using \c spdk_jsonrpc_client_free_response
+ *
+ * This function is not thread safe and should only be called from one thread at
+ * a time while no other threads are actively \c client object.
+ *
+ * \param client
+ * \return pointer to JSON RPC response object or NULL if no response available.
+ */
+struct spdk_jsonrpc_client_response *spdk_jsonrpc_client_get_response(struct spdk_jsonrpc_client
+ *client);
+
+/**
+ * Free response object obtained from \c spdk_jsonrpc_client_get_response
+ *
+ * \param resp pointer to JSON RPC response object. If NULL no operation is performed.
+ */
+void spdk_jsonrpc_client_free_response(struct spdk_jsonrpc_client_response *resp);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/likely.h b/src/spdk/include/spdk/likely.h
new file mode 100644
index 000000000..034a9b98b
--- /dev/null
+++ b/src/spdk/include/spdk/likely.h
@@ -0,0 +1,46 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Likely/unlikely branch prediction macros
+ */
+
+#ifndef SPDK_LIKELY_H
+#define SPDK_LIKELY_H
+
+#include "spdk/stdinc.h"
+
+#define spdk_unlikely(cond) __builtin_expect((cond), 0)
+#define spdk_likely(cond) __builtin_expect(!!(cond), 1)
+
+#endif
diff --git a/src/spdk/include/spdk/log.h b/src/spdk/include/spdk/log.h
new file mode 100644
index 000000000..92c899ff1
--- /dev/null
+++ b/src/spdk/include/spdk/log.h
@@ -0,0 +1,224 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * Logging interfaces
+ */
+
+#ifndef SPDK_LOG_H
+#define SPDK_LOG_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * for passing user-provided log call
+ *
+ * \param level Log level threshold.
+ * \param file Name of the current source file.
+ * \param line Current source file line.
+ * \param func Current source function name.
+ * \param format Format string to the message.
+ * \param args Additional arguments for format string.
+ */
+typedef void logfunc(int level, const char *file, const int line,
+ const char *func, const char *format, va_list args);
+
+/**
+ * Initialize the logging module. Messages prior
+ * to this call will be dropped.
+ */
+void spdk_log_open(logfunc *logf);
+
+/**
+ * Close the currently active log. Messages after this call
+ * will be dropped.
+ */
+void spdk_log_close(void);
+
+enum spdk_log_level {
+ /** All messages will be suppressed. */
+ SPDK_LOG_DISABLED = -1,
+ SPDK_LOG_ERROR,
+ SPDK_LOG_WARN,
+ SPDK_LOG_NOTICE,
+ SPDK_LOG_INFO,
+ SPDK_LOG_DEBUG,
+};
+
+/**
+ * Set the log level threshold to log messages. Messages with a higher
+ * level than this are ignored.
+ *
+ * \param level Log level threshold to set to log messages.
+ */
+void spdk_log_set_level(enum spdk_log_level level);
+
+/**
+ * Get the current log level threshold.
+ *
+ * \return the current log level threshold.
+ */
+enum spdk_log_level spdk_log_get_level(void);
+
+/**
+ * Set the log level threshold to include stack trace in log messages.
+ * Messages with a higher level than this will not contain stack trace. You
+ * can use \c SPDK_LOG_DISABLED to completely disable stack trace printing
+ * even if it is supported.
+ *
+ * \note This function has no effect if SPDK is built without stack trace
+ * printing support.
+ *
+ * \param level Log level threshold for stacktrace.
+ */
+void spdk_log_set_backtrace_level(enum spdk_log_level level);
+
+/**
+ * Get the current log level threshold for showing stack trace in log message.
+ *
+ * \return the current log level threshold for stack trace.
+ */
+enum spdk_log_level spdk_log_get_backtrace_level(void);
+
+/**
+ * Set the current log level threshold for printing to stderr.
+ * Messages with a level less than or equal to this level
+ * are also printed to stderr. You can use \c SPDK_LOG_DISABLED to completely
+ * suppress log printing.
+ *
+ * \param level Log level threshold for printing to stderr.
+ */
+void spdk_log_set_print_level(enum spdk_log_level level);
+
+/**
+ * Get the current log level print threshold.
+ *
+ * \return the current log level print threshold.
+ */
+enum spdk_log_level spdk_log_get_print_level(void);
+
+#ifdef DEBUG
+#define SPDK_DEBUGLOG_FLAG_ENABLED(name) spdk_log_get_flag(name)
+#else
+#define SPDK_DEBUGLOG_FLAG_ENABLED(name) false
+#endif
+
+#define SPDK_NOTICELOG(...) \
+ spdk_log(SPDK_LOG_NOTICE, __FILE__, __LINE__, __func__, __VA_ARGS__)
+#define SPDK_WARNLOG(...) \
+ spdk_log(SPDK_LOG_WARN, __FILE__, __LINE__, __func__, __VA_ARGS__)
+#define SPDK_ERRLOG(...) \
+ spdk_log(SPDK_LOG_ERROR, __FILE__, __LINE__, __func__, __VA_ARGS__)
+#define SPDK_PRINTF(...) \
+ spdk_log(SPDK_LOG_NOTICE, NULL, -1, NULL, __VA_ARGS__)
+
+/**
+ * Write messages to the log file. If \c level is set to \c SPDK_LOG_DISABLED,
+ * this log message won't be written.
+ *
+ * \param level Log level threshold.
+ * \param file Name of the current source file.
+ * \param line Current source line number.
+ * \param func Current source function name.
+ * \param format Format string to the message.
+ */
+void spdk_log(enum spdk_log_level level, const char *file, const int line, const char *func,
+ const char *format, ...) __attribute__((__format__(__printf__, 5, 6)));
+
+/**
+ * Same as spdk_log except that instead of being called with variable number of
+ * arguments it is called with an argument list as defined in stdarg.h
+ *
+ * \param level Log level threshold.
+ * \param file Name of the current source file.
+ * \param line Current source line number.
+ * \param func Current source function name.
+ * \param format Format string to the message.
+ * \param ap printf arguments
+ */
+void spdk_vlog(enum spdk_log_level level, const char *file, const int line, const char *func,
+ const char *format, va_list ap);
+
+/**
+ * Log the contents of a raw buffer to a file.
+ *
+ * \param fp File to hold the log.
+ * \param label Label to print to the file.
+ * \param buf Buffer that holds the log information.
+ * \param len Length of buffer to dump.
+ */
+void spdk_log_dump(FILE *fp, const char *label, const void *buf, size_t len);
+
+/**
+ * Check whether the log flag exists and is enabled.
+ *
+ * \return true if enabled, or false otherwise.
+ */
+bool spdk_log_get_flag(const char *flag);
+
+/**
+ * Enable the log flag.
+ *
+ * \param flag Log flag to be enabled.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_log_set_flag(const char *flag);
+
+/**
+ * Clear a log flag.
+ *
+ * \param flag Log flag to clear.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_log_clear_flag(const char *flag);
+
+/**
+ * Show all the log flags and their usage.
+ *
+ * \param f File to hold all the flags' information.
+ * \param log_arg Command line option to set/enable the log flag.
+ */
+void spdk_log_usage(FILE *f, const char *log_arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_LOG_H */
diff --git a/src/spdk/include/spdk/lvol.h b/src/spdk/include/spdk/lvol.h
new file mode 100644
index 000000000..ca271a638
--- /dev/null
+++ b/src/spdk/include/spdk/lvol.h
@@ -0,0 +1,299 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Logical Volume Interface
+ */
+
+#ifndef SPDK_LVOL_H
+#define SPDK_LVOL_H
+
+#include "spdk/stdinc.h"
+#include "spdk/blob.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct spdk_bs_dev;
+struct spdk_lvol_store;
+struct spdk_lvol;
+
+enum lvol_clear_method {
+ LVOL_CLEAR_WITH_DEFAULT = BLOB_CLEAR_WITH_DEFAULT,
+ LVOL_CLEAR_WITH_NONE = BLOB_CLEAR_WITH_NONE,
+ LVOL_CLEAR_WITH_UNMAP = BLOB_CLEAR_WITH_UNMAP,
+ LVOL_CLEAR_WITH_WRITE_ZEROES = BLOB_CLEAR_WITH_WRITE_ZEROES,
+};
+
+enum lvs_clear_method {
+ LVS_CLEAR_WITH_UNMAP = BS_CLEAR_WITH_UNMAP,
+ LVS_CLEAR_WITH_WRITE_ZEROES = BS_CLEAR_WITH_WRITE_ZEROES,
+ LVS_CLEAR_WITH_NONE = BS_CLEAR_WITH_NONE,
+};
+
+/* Must include null terminator. */
+#define SPDK_LVS_NAME_MAX 64
+#define SPDK_LVOL_NAME_MAX 64
+
+/**
+ * Parameters for lvolstore initialization.
+ */
+struct spdk_lvs_opts {
+ uint32_t cluster_sz;
+ enum lvs_clear_method clear_method;
+ char name[SPDK_LVS_NAME_MAX];
+};
+
+/**
+ * Initialize an spdk_lvs_opts structure to the defaults.
+ *
+ * \param opts Pointer to the spdk_lvs_opts structure to initialize.
+ */
+void spdk_lvs_opts_init(struct spdk_lvs_opts *opts);
+
+/**
+ * Callback definition for lvolstore operations, including handle to lvs.
+ *
+ * \param cb_arg Custom arguments
+ * \param lvol_store Handle to lvol_store or NULL when lvserrno is set
+ * \param lvserrno Error
+ */
+typedef void (*spdk_lvs_op_with_handle_complete)(void *cb_arg, struct spdk_lvol_store *lvol_store,
+ int lvserrno);
+
+/**
+ * Callback definition for lvolstore operations without handle.
+ *
+ * \param cb_arg Custom arguments
+ * \param lvserrno Error
+ */
+typedef void (*spdk_lvs_op_complete)(void *cb_arg, int lvserrno);
+
+
+/**
+ * Callback definition for lvol operations with handle to lvol.
+ *
+ * \param cb_arg Custom arguments
+ * \param lvol Handle to lvol or NULL when lvserrno is set
+ * \param lvolerrno Error
+ */
+typedef void (*spdk_lvol_op_with_handle_complete)(void *cb_arg, struct spdk_lvol *lvol,
+ int lvolerrno);
+
+/**
+ * Callback definition for lvol operations without handle to lvol.
+ *
+ * \param cb_arg Custom arguments
+ * \param lvolerrno Error
+ */
+typedef void (*spdk_lvol_op_complete)(void *cb_arg, int lvolerrno);
+
+/**
+ * Initialize lvolstore on given bs_bdev.
+ *
+ * \param bs_dev This is created on the given bdev by using spdk_bdev_create_bs_dev()
+ * beforehand.
+ * \param o Options for lvolstore.
+ * \param cb_fn Completion callback.
+ * \param cb_arg Completion callback custom arguments.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_lvs_init(struct spdk_bs_dev *bs_dev, struct spdk_lvs_opts *o,
+ spdk_lvs_op_with_handle_complete cb_fn, void *cb_arg);
+
+/**
+ * Rename the given lvolstore.
+ *
+ * \param lvs Pointer to lvolstore.
+ * \param new_name New name of lvs.
+ * \param cb_fn Completion callback.
+ * \param cb_arg Completion callback custom arguments.
+ */
+void spdk_lvs_rename(struct spdk_lvol_store *lvs, const char *new_name,
+ spdk_lvs_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Unload lvolstore.
+ *
+ * All lvols have to be closed beforehand, when doing unload.
+ *
+ * \param lvol_store Handle to lvolstore.
+ * \param cb_fn Completion callback.
+ * \param cb_arg Completion callback custom arguments.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_lvs_unload(struct spdk_lvol_store *lvol_store,
+ spdk_lvs_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Destroy lvolstore.
+ *
+ * All lvols have to be closed beforehand, when doing destroy.
+ *
+ * \param lvol_store Handle to lvolstore.
+ * \param cb_fn Completion callback.
+ * \param cb_arg Completion callback custom arguments.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_lvs_destroy(struct spdk_lvol_store *lvol_store,
+ spdk_lvs_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Create lvol on given lvolstore with specified size.
+ *
+ * \param lvs Handle to lvolstore.
+ * \param name Name of lvol.
+ * \param sz size of lvol in bytes.
+ * \param thin_provisioned Enables thin provisioning.
+ * \param clear_method Changes default data clusters clear method
+ * \param cb_fn Completion callback.
+ * \param cb_arg Completion callback custom arguments.
+ *
+ * \return 0 on success, negative errno on failure.
+ */
+int spdk_lvol_create(struct spdk_lvol_store *lvs, const char *name, uint64_t sz,
+ bool thin_provisioned, enum lvol_clear_method clear_method,
+ spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg);
+/**
+ * Create snapshot of given lvol.
+ *
+ * \param lvol Handle to lvol.
+ * \param snapshot_name Name of created snapshot.
+ * \param cb_fn Completion callback.
+ * \param cb_arg Completion callback custom arguments.
+ */
+void spdk_lvol_create_snapshot(struct spdk_lvol *lvol, const char *snapshot_name,
+ spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg);
+
+/**
+ * Create clone of given snapshot.
+ *
+ * \param lvol Handle to lvol snapshot.
+ * \param clone_name Name of created clone.
+ * \param cb_fn Completion callback.
+ * \param cb_arg Completion callback custom arguments.
+ */
+void spdk_lvol_create_clone(struct spdk_lvol *lvol, const char *clone_name,
+ spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg);
+
+/**
+ * Rename lvol with new_name.
+ *
+ * \param lvol Handle to lvol.
+ * \param new_name new name for lvol.
+ * \param cb_fn Completion callback.
+ * \param cb_arg Completion callback custom arguments.
+ */
+void
+spdk_lvol_rename(struct spdk_lvol *lvol, const char *new_name,
+ spdk_lvol_op_complete cb_fn, void *cb_arg);
+
+/**
+ * \brief Returns if it is possible to delete an lvol (i.e. lvol is not a snapshot that have at least one clone).
+ * \param lvol Handle to lvol
+ */
+bool spdk_lvol_deletable(struct spdk_lvol *lvol);
+
+/**
+ * Close lvol and remove information about lvol from its lvolstore.
+ *
+ * \param lvol Handle to lvol.
+ * \param cb_fn Completion callback.
+ * \param cb_arg Completion callback custom arguments.
+ */
+void spdk_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Close lvol, but information is kept on lvolstore.
+ *
+ * \param lvol Handle to lvol.
+ * \param cb_fn Completion callback.
+ * \param cb_arg Completion callback custom arguments.
+ */
+void spdk_lvol_close(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Get I/O channel of bdev associated with specified lvol.
+ *
+ * \param lvol Handle to lvol.
+ *
+ * \return a pointer to the I/O channel.
+ */
+struct spdk_io_channel *spdk_lvol_get_io_channel(struct spdk_lvol *lvol);
+
+/**
+ * Load lvolstore from the given blobstore device.
+ *
+ * \param bs_dev Pointer to the blobstore device.
+ * \param cb_fn Completion callback.
+ * \param cb_arg Completion callback custom arguments.
+ */
+void spdk_lvs_load(struct spdk_bs_dev *bs_dev, spdk_lvs_op_with_handle_complete cb_fn,
+ void *cb_arg);
+
+/**
+ * Open a lvol.
+ *
+ * \param lvol Handle to lvol.
+ * \param cb_fn Completion callback.
+ * \param cb_arg Completion callback custom arguments.
+ */
+void spdk_lvol_open(struct spdk_lvol *lvol, spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg);
+
+/**
+ * Inflate lvol
+ *
+ * \param lvol Handle to lvol
+ * \param cb_fn Completion callback
+ * \param cb_arg Completion callback custom arguments
+ */
+void spdk_lvol_inflate(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Decouple parent of lvol
+ *
+ * \param lvol Handle to lvol
+ * \param cb_fn Completion callback
+ * \param cb_arg Completion callback custom arguments
+ */
+void spdk_lvol_decouple_parent(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_LVOL_H */
diff --git a/src/spdk/include/spdk/memory.h b/src/spdk/include/spdk/memory.h
new file mode 100644
index 000000000..a2cb19669
--- /dev/null
+++ b/src/spdk/include/spdk/memory.h
@@ -0,0 +1,60 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_MEMORY_H
+#define SPDK_MEMORY_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SHIFT_2MB 21 /* (1 << 21) == 2MB */
+#define VALUE_2MB (1ULL << SHIFT_2MB)
+#define MASK_2MB (VALUE_2MB - 1)
+
+#define SHIFT_4KB 12 /* (1 << 12) == 4KB */
+#define VALUE_4KB (1ULL << SHIFT_4KB)
+#define MASK_4KB (VALUE_4KB - 1)
+
+#define _2MB_OFFSET(ptr) (((uintptr_t)(ptr)) & MASK_2MB)
+#define _2MB_PAGE(ptr) FLOOR_2MB((uintptr_t)(ptr))
+#define FLOOR_2MB(x) (((uintptr_t)(x)) & ~MASK_2MB)
+#define CEIL_2MB(x) FLOOR_2MB(((uintptr_t)(x)) + VALUE_2MB - 1)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_MEMORY_H */
diff --git a/src/spdk/include/spdk/mmio.h b/src/spdk/include/spdk/mmio.h
new file mode 100644
index 000000000..68b16605f
--- /dev/null
+++ b/src/spdk/include/spdk/mmio.h
@@ -0,0 +1,139 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Memory-mapped I/O utility functions
+ */
+
+#ifndef SPDK_MMIO_H
+#define SPDK_MMIO_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "spdk/barrier.h"
+
+#ifdef __x86_64__
+#define SPDK_MMIO_64BIT 1 /* Can do atomic 64-bit memory read/write (over PCIe) */
+#else
+#define SPDK_MMIO_64BIT 0
+#endif
+
+static inline uint8_t
+spdk_mmio_read_1(const volatile uint8_t *addr)
+{
+ spdk_compiler_barrier();
+ return *addr;
+}
+
+static inline void
+spdk_mmio_write_1(volatile uint8_t *addr, uint8_t val)
+{
+ spdk_compiler_barrier();
+ *addr = val;
+}
+
+static inline uint16_t
+spdk_mmio_read_2(const volatile uint16_t *addr)
+{
+ spdk_compiler_barrier();
+ return *addr;
+}
+
+static inline void
+spdk_mmio_write_2(volatile uint16_t *addr, uint16_t val)
+{
+ spdk_compiler_barrier();
+ *addr = val;
+}
+
+static inline uint32_t
+spdk_mmio_read_4(const volatile uint32_t *addr)
+{
+ spdk_compiler_barrier();
+ return *addr;
+}
+
+static inline void
+spdk_mmio_write_4(volatile uint32_t *addr, uint32_t val)
+{
+ spdk_compiler_barrier();
+ *addr = val;
+}
+
+static inline uint64_t
+spdk_mmio_read_8(volatile uint64_t *addr)
+{
+ uint64_t val;
+ volatile uint32_t *addr32 = (volatile uint32_t *)addr;
+
+ spdk_compiler_barrier();
+
+ if (SPDK_MMIO_64BIT) {
+ val = *addr;
+ } else {
+ /*
+ * Read lower 4 bytes before upper 4 bytes.
+ * This particular order is required by I/OAT.
+ * If the other order is required, use a pair of spdk_mmio_read_4() calls.
+ */
+ val = addr32[0];
+ val |= (uint64_t)addr32[1] << 32;
+ }
+
+ return val;
+}
+
+static inline void
+spdk_mmio_write_8(volatile uint64_t *addr, uint64_t val)
+{
+ volatile uint32_t *addr32 = (volatile uint32_t *)addr;
+
+ spdk_compiler_barrier();
+
+ if (SPDK_MMIO_64BIT) {
+ *addr = val;
+ } else {
+ addr32[0] = (uint32_t)val;
+ addr32[1] = (uint32_t)(val >> 32);
+ }
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/nbd.h b/src/spdk/include/spdk/nbd.h
new file mode 100644
index 000000000..be57c09cd
--- /dev/null
+++ b/src/spdk/include/spdk/nbd.h
@@ -0,0 +1,102 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Network block device layer
+ */
+
+#ifndef SPDK_NBD_H_
+#define SPDK_NBD_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct spdk_bdev;
+struct spdk_nbd_disk;
+struct spdk_json_write_ctx;
+
+/**
+ * Initialize the network block device layer.
+ *
+ * \return 0 on success.
+ */
+int spdk_nbd_init(void);
+
+/**
+ * Stop and close all the running network block devices.
+ */
+void spdk_nbd_fini(void);
+
+/**
+ * Called when an NBD device has been started.
+ * On success, rc is assigned 0; On failure, rc is assigned negated errno.
+ */
+typedef void (*spdk_nbd_start_cb)(void *cb_arg, struct spdk_nbd_disk *nbd,
+ int rc);
+
+/**
+ * Start a network block device backed by the bdev.
+ *
+ * \param bdev_name Name of bdev exposed as a network block device.
+ * \param nbd_path Path to the registered network block device.
+ * \param cb_fn Callback to be always called.
+ * \param cb_arg Passed to cb_fn.
+ */
+void spdk_nbd_start(const char *bdev_name, const char *nbd_path,
+ spdk_nbd_start_cb cb_fn, void *cb_arg);
+
+/**
+ * Stop the running network block device safely.
+ *
+ * \param nbd A pointer to the network block device to stop.
+ */
+void spdk_nbd_stop(struct spdk_nbd_disk *nbd);
+
+/**
+ * Get the local filesystem path used for the network block device.
+ */
+const char *spdk_nbd_get_path(struct spdk_nbd_disk *nbd);
+
+/**
+ * Write NBD subsystem configuration into provided JSON context.
+ *
+ * \param w JSON write context
+ */
+void spdk_nbd_write_config_json(struct spdk_json_write_ctx *w);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/net.h b/src/spdk/include/spdk/net.h
new file mode 100644
index 000000000..e49322302
--- /dev/null
+++ b/src/spdk/include/spdk/net.h
@@ -0,0 +1,120 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Net framework abstraction layer
+ */
+
+#ifndef SPDK_NET_H
+#define SPDK_NET_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/queue.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct spdk_sock;
+
+struct spdk_net_framework {
+ const char *name;
+
+ void (*init)(void);
+ void (*fini)(void);
+
+ STAILQ_ENTRY(spdk_net_framework) link;
+};
+
+/**
+ * Register a net framework.
+ *
+ * \param frame Net framework to register.
+ */
+void spdk_net_framework_register(struct spdk_net_framework *frame);
+
+#define SPDK_NET_FRAMEWORK_REGISTER(name, frame) \
+static void __attribute__((constructor)) net_framework_register_##name(void) \
+{ \
+ spdk_net_framework_register(frame); \
+}
+
+/**
+ * Initialize the network interfaces by getting information through netlink socket.
+ *
+ * \return 0 on success, 1 on failure.
+ */
+int spdk_interface_init(void);
+
+/**
+ * Destroy the network interfaces.
+ */
+void spdk_interface_destroy(void);
+
+/**
+ * Net framework initialization callback.
+ *
+ * \param cb_arg Callback argument.
+ * \param rc 0 if net framework initialized successfully or negative errno if it failed.
+ */
+typedef void (*spdk_net_init_cb)(void *cb_arg, int rc);
+
+/**
+ * Net framework finish callback.
+ *
+ * \param cb_arg Callback argument.
+ */
+typedef void (*spdk_net_fini_cb)(void *cb_arg);
+
+void spdk_net_framework_init_next(int rc);
+
+/**
+ * Start all registered frameworks.
+ *
+ * \return 0 on success.
+ */
+void spdk_net_framework_start(spdk_net_init_cb cb_fn, void *cb_arg);
+
+void spdk_net_framework_fini_next(void);
+
+/**
+ * Stop all registered frameworks.
+ */
+void spdk_net_framework_fini(spdk_net_fini_cb cb_fn, void *cb_arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_NET_H */
diff --git a/src/spdk/include/spdk/notify.h b/src/spdk/include/spdk/notify.h
new file mode 100644
index 000000000..fa9746503
--- /dev/null
+++ b/src/spdk/include/spdk/notify.h
@@ -0,0 +1,126 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_NOTIFY_H
+#define SPDK_NOTIFY_H
+
+#include "spdk/stdinc.h"
+#include "spdk/json.h"
+#include "spdk/queue.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Opaque event type.
+ */
+struct spdk_notify_type;
+
+typedef int (*spdk_notify_foreach_type_cb)(const struct spdk_notify_type *type, void *ctx);
+
+#define SPDK_NOTIFY_MAX_NAME_SIZE 128
+#define SPDK_NOTIFY_MAX_CTX_SIZE 128
+
+struct spdk_notify_event {
+ char type[SPDK_NOTIFY_MAX_NAME_SIZE];
+ char ctx[SPDK_NOTIFY_MAX_CTX_SIZE];
+};
+
+/**
+ * Callback type for event enumeration.
+ *
+ * \param idx Event index
+ * \param event Event data
+ * \param ctx User context
+ * \return Non zero to break iteration.
+ */
+typedef int (*spdk_notify_foreach_event_cb)(uint64_t idx, const struct spdk_notify_event *event,
+ void *ctx);
+
+/**
+ * Register \c type as new notification type.
+ *
+ * \note This function is thread safe.
+ *
+ * \param type New notification type to register.
+ * \return registered notification type or NULL on failure.
+ */
+struct spdk_notify_type *spdk_notify_type_register(const char *type);
+
+/**
+ * Return name of the notification type.
+ *
+ * \param type Notification type we are talking about.
+ * \return Name of notification type.
+ */
+const char *spdk_notify_type_get_name(const struct spdk_notify_type *type);
+
+/**
+ * Call cb_fn for all event types.
+ *
+ * \note Whole function call is under lock so user callback should not sleep.
+ * \param cb_fn
+ * \param ctx
+ */
+void spdk_notify_foreach_type(spdk_notify_foreach_type_cb cb_fn, void *ctx);
+
+/**
+ * Send given notification.
+ *
+ * \param type Notification type
+ * \param ctx Notification context
+ *
+ * \return Event index.
+ */
+uint64_t spdk_notify_send(const char *type, const char *ctx);
+
+/**
+ * Call cb_fn with events from given range.
+ *
+ * \note Whole function call is under lock so user callback should not sleep.
+ *
+ * \param start_idx First event index
+ * \param cb_fn User callback function. Return non-zero to break iteration.
+ * \param max Maximum number of invocations of user calback function.
+ * \param ctx User context
+ * \return Number of user callback invocations
+ */
+uint64_t spdk_notify_foreach_event(uint64_t start_idx, uint64_t max,
+ spdk_notify_foreach_event_cb cb_fn, void *ctx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_NOTIFY_H */
diff --git a/src/spdk/include/spdk/nvme.h b/src/spdk/include/spdk/nvme.h
new file mode 100644
index 000000000..3f28f9e24
--- /dev/null
+++ b/src/spdk/include/spdk/nvme.h
@@ -0,0 +1,3236 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * NVMe driver public API
+ */
+
+#ifndef SPDK_NVME_H
+#define SPDK_NVME_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "spdk/env.h"
+#include "spdk/nvme_spec.h"
+#include "spdk/nvmf_spec.h"
+
+#define SPDK_NVME_TRANSPORT_NAME_FC "FC"
+#define SPDK_NVME_TRANSPORT_NAME_PCIE "PCIE"
+#define SPDK_NVME_TRANSPORT_NAME_RDMA "RDMA"
+#define SPDK_NVME_TRANSPORT_NAME_TCP "TCP"
+
+#define SPDK_NVMF_PRIORITY_MAX_LEN 4
+
+/**
+ * Opaque handle to a controller. Returned by spdk_nvme_probe()'s attach_cb.
+ */
+struct spdk_nvme_ctrlr;
+
+/**
+ * NVMe controller initialization options.
+ *
+ * A pointer to this structure will be provided for each probe callback from spdk_nvme_probe() to
+ * allow the user to request non-default options, and the actual options enabled on the controller
+ * will be provided during the attach callback.
+ */
+struct spdk_nvme_ctrlr_opts {
+ /**
+ * Number of I/O queues to request (used to set Number of Queues feature)
+ */
+ uint32_t num_io_queues;
+
+ /**
+ * Enable submission queue in controller memory buffer
+ */
+ bool use_cmb_sqs;
+
+ /**
+ * Don't initiate shutdown processing
+ */
+ bool no_shn_notification;
+
+ /**
+ * Type of arbitration mechanism
+ */
+ enum spdk_nvme_cc_ams arb_mechanism;
+
+ /**
+ * Maximum number of commands that the controller may launch at one time. The
+ * value is expressed as a power of two, valid values are from 0-7, and 7 means
+ * unlimited.
+ */
+ uint8_t arbitration_burst;
+
+ /**
+ * Number of commands that may be executed from the low priority queue in each
+ * arbitration round. This field is only valid when arb_mechanism is set to
+ * SPDK_NVME_CC_AMS_WRR (weighted round robin).
+ */
+ uint8_t low_priority_weight;
+
+ /**
+ * Number of commands that may be executed from the medium priority queue in each
+ * arbitration round. This field is only valid when arb_mechanism is set to
+ * SPDK_NVME_CC_AMS_WRR (weighted round robin).
+ */
+ uint8_t medium_priority_weight;
+
+ /**
+ * Number of commands that may be executed from the high priority queue in each
+ * arbitration round. This field is only valid when arb_mechanism is set to
+ * SPDK_NVME_CC_AMS_WRR (weighted round robin).
+ */
+ uint8_t high_priority_weight;
+
+ /**
+ * Keep alive timeout in milliseconds (0 = disabled).
+ *
+ * The NVMe library will set the Keep Alive Timer feature to this value and automatically
+ * send Keep Alive commands as needed. The library user must call
+ * spdk_nvme_ctrlr_process_admin_completions() periodically to ensure Keep Alive commands
+ * are sent.
+ */
+ uint32_t keep_alive_timeout_ms;
+
+ /**
+ * Specify the retry number when there is issue with the transport
+ */
+ uint8_t transport_retry_count;
+
+ /**
+ * The queue depth of each NVMe I/O queue.
+ */
+ uint32_t io_queue_size;
+
+ /**
+ * The host NQN to use when connecting to NVMe over Fabrics controllers.
+ *
+ * Unused for local PCIe-attached NVMe devices.
+ */
+ char hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1];
+
+ /**
+ * The number of requests to allocate for each NVMe I/O queue.
+ *
+ * This should be at least as large as io_queue_size.
+ *
+ * A single I/O may allocate more than one request, since splitting may be necessary to
+ * conform to the device's maximum transfer size, PRP list compatibility requirements,
+ * or driver-assisted striping.
+ */
+ uint32_t io_queue_requests;
+
+ /**
+ * Source address for NVMe-oF connections.
+ * Set src_addr and src_svcid to empty strings if no source address should be
+ * specified.
+ */
+ char src_addr[SPDK_NVMF_TRADDR_MAX_LEN + 1];
+
+ /**
+ * Source service ID (port) for NVMe-oF connections.
+ * Set src_addr and src_svcid to empty strings if no source address should be
+ * specified.
+ */
+ char src_svcid[SPDK_NVMF_TRSVCID_MAX_LEN + 1];
+
+ /**
+ * The host identifier to use when connecting to controllers with 64-bit host ID support.
+ *
+ * Set to all zeroes to specify that no host ID should be provided to the controller.
+ */
+ uint8_t host_id[8];
+
+ /**
+ * The host identifier to use when connecting to controllers with extended (128-bit) host ID support.
+ *
+ * Set to all zeroes to specify that no host ID should be provided to the controller.
+ */
+ uint8_t extended_host_id[16];
+
+ /**
+ * The I/O command set to select.
+ *
+ * If the requested command set is not supported, the controller
+ * initialization process will not proceed. By default, the NVM
+ * command set is used.
+ */
+ enum spdk_nvme_cc_css command_set;
+
+ /**
+ * Admin commands timeout in milliseconds (0 = no timeout).
+ *
+ * The timeout value is used for admin commands submitted internally
+ * by the nvme driver during initialization, before the user is able
+ * to call spdk_nvme_ctrlr_register_timeout_callback(). By default,
+ * this is set to 120 seconds, users can change it in the probing
+ * callback.
+ */
+ uint32_t admin_timeout_ms;
+
+ /**
+ * It is used for TCP transport.
+ *
+ * Set to true, means having header digest for the header in the NVMe/TCP PDU
+ */
+ bool header_digest;
+
+ /**
+ * It is used for TCP transport.
+ *
+ * Set to true, means having data digest for the data in the NVMe/TCP PDU
+ */
+ bool data_digest;
+
+ /**
+ * Disable logging of requests that are completed with error status.
+ *
+ * Defaults to 'false' (errors are logged).
+ */
+ bool disable_error_logging;
+
+ /**
+ * It is used for RDMA transport
+ * Specify the transport ACK timeout. The value should be in range 0-31 where 0 means
+ * use driver-specific default value. The value is applied to each RDMA qpair
+ * and affects the time that qpair waits for transport layer acknowledgement
+ * until it retransmits a packet. The value should be chosen empirically
+ * to meet the needs of a particular application. A low value means less time
+ * the qpair waits for ACK which can increase the number of retransmissions.
+ * A large value can increase the time the connection is closed.
+ * The value of ACK timeout is calculated according to the formula
+ * 4.096 * 2^(transport_ack_timeout) usec.
+ */
+ uint8_t transport_ack_timeout;
+
+ /**
+ * The queue depth of NVMe Admin queue.
+ */
+ uint16_t admin_queue_size;
+
+ /**
+ * The size of spdk_nvme_ctrlr_opts according to the caller of this library is used for ABI
+ * compatibility. The library uses this field to know how many fields in this
+ * structure are valid. And the library will populate any remaining fields with default values.
+ */
+ size_t opts_size;
+};
+
+/**
+ * Indicate whether a ctrlr handle is associated with a Discovery controller.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ *
+ * \return true if a discovery controller, else false.
+ */
+bool spdk_nvme_ctrlr_is_discovery(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Get the default options for the creation of a specific NVMe controller.
+ *
+ * \param[out] opts Will be filled with the default option.
+ * \param opts_size Must be set to sizeof(struct spdk_nvme_ctrlr_opts).
+ */
+void spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts,
+ size_t opts_size);
+
+/**
+ * Reason for qpair disconnect at the transport layer.
+ *
+ * NONE implies that the qpair is still connected while UNKNOWN means that the
+ * qpair is disconnected, but the cause was not apparent.
+ */
+enum spdk_nvme_qp_failure_reason {
+ SPDK_NVME_QPAIR_FAILURE_NONE = 0,
+ SPDK_NVME_QPAIR_FAILURE_LOCAL,
+ SPDK_NVME_QPAIR_FAILURE_REMOTE,
+ SPDK_NVME_QPAIR_FAILURE_UNKNOWN,
+};
+
+typedef enum spdk_nvme_qp_failure_reason spdk_nvme_qp_failure_reason;
+
+/**
+ * NVMe library transports
+ *
+ * NOTE: These are mapped directly to the NVMe over Fabrics TRTYPE values, except for PCIe,
+ * which is a special case since NVMe over Fabrics does not define a TRTYPE for local PCIe.
+ *
+ * Currently, this uses 256 for PCIe which is intentionally outside of the 8-bit range of TRTYPE.
+ * If the NVMe-oF specification ever defines a PCIe TRTYPE, this should be updated.
+ */
+enum spdk_nvme_transport_type {
+ /**
+ * PCIe Transport (locally attached devices)
+ */
+ SPDK_NVME_TRANSPORT_PCIE = 256,
+
+ /**
+ * RDMA Transport (RoCE, iWARP, etc.)
+ */
+ SPDK_NVME_TRANSPORT_RDMA = SPDK_NVMF_TRTYPE_RDMA,
+
+ /**
+ * Fibre Channel (FC) Transport
+ */
+ SPDK_NVME_TRANSPORT_FC = SPDK_NVMF_TRTYPE_FC,
+
+ /**
+ * TCP Transport
+ */
+ SPDK_NVME_TRANSPORT_TCP = SPDK_NVMF_TRTYPE_TCP,
+
+ /**
+ * Custom Transport (Not spec defined)
+ */
+ SPDK_NVME_TRANSPORT_CUSTOM = 4096,
+};
+
+/* typedef added for coding style reasons */
+typedef enum spdk_nvme_transport_type spdk_nvme_transport_type_t;
+
+/**
+ * NVMe transport identifier.
+ *
+ * This identifies a unique endpoint on an NVMe fabric.
+ *
+ * A string representation of a transport ID may be converted to this type using
+ * spdk_nvme_transport_id_parse().
+ */
+struct spdk_nvme_transport_id {
+ /**
+ * NVMe transport string.
+ */
+ char trstring[SPDK_NVMF_TRSTRING_MAX_LEN + 1];
+
+ /**
+ * NVMe transport type.
+ */
+ enum spdk_nvme_transport_type trtype;
+
+ /**
+ * Address family of the transport address.
+ *
+ * For PCIe, this value is ignored.
+ */
+ enum spdk_nvmf_adrfam adrfam;
+
+ /**
+ * Transport address of the NVMe-oF endpoint. For transports which use IP
+ * addressing (e.g. RDMA), this should be an IP address. For PCIe, this
+ * can either be a zero length string (the whole bus) or a PCI address
+ * in the format DDDD:BB:DD.FF or DDDD.BB.DD.FF. For FC the string is
+ * formatted as: nn-0xWWNN:pn-0xWWPN” where WWNN is the Node_Name of the
+ * target NVMe_Port and WWPN is the N_Port_Name of the target NVMe_Port.
+ */
+ char traddr[SPDK_NVMF_TRADDR_MAX_LEN + 1];
+
+ /**
+ * Transport service id of the NVMe-oF endpoint. For transports which use
+ * IP addressing (e.g. RDMA), this field shoud be the port number. For PCIe,
+ * and FC this is always a zero length string.
+ */
+ char trsvcid[SPDK_NVMF_TRSVCID_MAX_LEN + 1];
+
+ /**
+ * Subsystem NQN of the NVMe over Fabrics endpoint. May be a zero length string.
+ */
+ char subnqn[SPDK_NVMF_NQN_MAX_LEN + 1];
+
+ /**
+ * The Transport connection priority of the NVMe-oF endpoint. Currently this is
+ * only supported by posix based sock implementation on Kernel TCP stack. More
+ * information of this field can be found from the socket(7) man page.
+ */
+ int priority;
+};
+
+/**
+ * NVMe host identifier
+ *
+ * Used for defining the host identity for an NVMe-oF connection.
+ *
+ * In terms of configuration, this object can be considered a subtype of TransportID
+ * Please see etc/spdk/nvmf.conf.in for more details.
+ *
+ * A string representation of this type may be converted to this type using
+ * spdk_nvme_host_id_parse().
+ */
+struct spdk_nvme_host_id {
+ /**
+ * Transport address to be used by the host when connecting to the NVMe-oF endpoint.
+ * May be an IP address or a zero length string for transports which
+ * use IP addressing (e.g. RDMA).
+ * For PCIe and FC this is always a zero length string.
+ */
+ char hostaddr[SPDK_NVMF_TRADDR_MAX_LEN + 1];
+
+ /**
+ * Transport service ID used by the host when connecting to the NVMe.
+ * May be a port number or a zero length string for transports which
+ * use IP addressing (e.g. RDMA).
+ * For PCIe and FC this is always a zero length string.
+ */
+ char hostsvcid[SPDK_NVMF_TRSVCID_MAX_LEN + 1];
+};
+
+/*
+ * Controller support flags
+ *
+ * Used for identifying if the controller supports these flags.
+ */
+enum spdk_nvme_ctrlr_flags {
+ SPDK_NVME_CTRLR_SGL_SUPPORTED = 0x1, /**< SGL is supported */
+ SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED = 0x2, /**< security send/receive is supported */
+ SPDK_NVME_CTRLR_WRR_SUPPORTED = 0x4, /**< Weighted Round Robin is supported */
+ SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED = 0x8, /**< Compare and write fused operations supported */
+ SPDK_NVME_CTRLR_SGL_REQUIRES_DWORD_ALIGNMENT = 0x10, /**< Dword alignment is required for SGL */
+};
+
+/**
+ * Parse the string representation of a transport ID.
+ *
+ * \param trid Output transport ID structure (must be allocated and initialized by caller).
+ * \param str Input string representation of a transport ID to parse.
+ *
+ * str must be a zero-terminated C string containing one or more key:value pairs
+ * separated by whitespace.
+ *
+ * Key | Value
+ * ------------ | -----
+ * trtype | Transport type (e.g. PCIe, RDMA)
+ * adrfam | Address family (e.g. IPv4, IPv6)
+ * traddr | Transport address (e.g. 0000:04:00.0 for PCIe, 192.168.100.8 for RDMA, or WWN for FC)
+ * trsvcid | Transport service identifier (e.g. 4420)
+ * subnqn | Subsystem NQN
+ *
+ * Unspecified fields of trid are left unmodified, so the caller must initialize
+ * trid (for example, memset() to 0) before calling this function.
+ *
+ * \return 0 if parsing was successful and trid is filled out, or negated errno
+ * values on failure.
+ */
+int spdk_nvme_transport_id_parse(struct spdk_nvme_transport_id *trid, const char *str);
+
+
+/**
+ * Fill in the trtype and trstring fields of this trid based on a known transport type.
+ *
+ * \param trid The trid to fill out.
+ * \param trtype The transport type to use for filling the trid fields. Only valid for
+ * transport types referenced in the NVMe-oF spec.
+ */
+void spdk_nvme_trid_populate_transport(struct spdk_nvme_transport_id *trid,
+ enum spdk_nvme_transport_type trtype);
+
+/**
+ * Parse the string representation of a host ID.
+ *
+ * \param hostid Output host ID structure (must be allocated and initialized by caller).
+ * \param str Input string representation of a transport ID to parse (hostid is a sub-configuration).
+ *
+ * str must be a zero-terminated C string containing one or more key:value pairs
+ * separated by whitespace.
+ *
+ * Key | Value
+ * -------------- | -----
+ * hostaddr | Transport address (e.g. 192.168.100.8 for RDMA)
+ * hostsvcid | Transport service identifier (e.g. 4420)
+ *
+ * Unspecified fields of trid are left unmodified, so the caller must initialize
+ * hostid (for example, memset() to 0) before calling this function.
+ *
+ * This function should not be used with Fiber Channel or PCIe as these transports
+ * do not require host information for connections.
+ *
+ * \return 0 if parsing was successful and hostid is filled out, or negated errno
+ * values on failure.
+ */
+int spdk_nvme_host_id_parse(struct spdk_nvme_host_id *hostid, const char *str);
+
+/**
+ * Parse the string representation of a transport ID tranport type into the trid struct.
+ *
+ * \param trid The trid to write to
+ * \param trstring Input string representation of transport type (e.g. "PCIe", "RDMA").
+ *
+ * \return 0 if parsing was successful and trtype is filled out, or negated errno
+ * values if the provided string was an invalid transport string.
+ */
+int spdk_nvme_transport_id_populate_trstring(struct spdk_nvme_transport_id *trid,
+ const char *trstring);
+
+/**
+ * Parse the string representation of a transport ID tranport type.
+ *
+ * \param trtype Output transport type (allocated by caller).
+ * \param str Input string representation of transport type (e.g. "PCIe", "RDMA").
+ *
+ * \return 0 if parsing was successful and trtype is filled out, or negated errno
+ * values on failure.
+ */
+int spdk_nvme_transport_id_parse_trtype(enum spdk_nvme_transport_type *trtype, const char *str);
+
+/**
+ * Look up the string representation of a transport ID transport type.
+ *
+ * \param trtype Transport type to convert.
+ *
+ * \return static string constant describing trtype, or NULL if trtype not found.
+ */
+const char *spdk_nvme_transport_id_trtype_str(enum spdk_nvme_transport_type trtype);
+
+/**
+ * Look up the string representation of a transport ID address family.
+ *
+ * \param adrfam Address family to convert.
+ *
+ * \return static string constant describing adrfam, or NULL if adrmfam not found.
+ */
+const char *spdk_nvme_transport_id_adrfam_str(enum spdk_nvmf_adrfam adrfam);
+
+/**
+ * Parse the string representation of a tranport ID address family.
+ *
+ * \param adrfam Output address family (allocated by caller).
+ * \param str Input string representation of address family (e.g. "IPv4", "IPv6").
+ *
+ * \return 0 if parsing was successful and adrfam is filled out, or negated errno
+ * values on failure.
+ */
+int spdk_nvme_transport_id_parse_adrfam(enum spdk_nvmf_adrfam *adrfam, const char *str);
+
+/**
+ * Compare two transport IDs.
+ *
+ * The result of this function may be used to sort transport IDs in a consistent
+ * order; however, the comparison result is not guaranteed to be consistent across
+ * library versions.
+ *
+ * This function uses a case-insensitive comparison for string fields, but it does
+ * not otherwise normalize the transport ID. It is the caller's responsibility to
+ * provide the transport IDs in a consistent format.
+ *
+ * \param trid1 First transport ID to compare.
+ * \param trid2 Second transport ID to compare.
+ *
+ * \return 0 if trid1 == trid2, less than 0 if trid1 < trid2, greater than 0 if
+ * trid1 > trid2.
+ */
+int spdk_nvme_transport_id_compare(const struct spdk_nvme_transport_id *trid1,
+ const struct spdk_nvme_transport_id *trid2);
+
+/**
+ * Parse the string representation of PI check settings (prchk:guard|reftag)
+ *
+ * \param prchk_flags Output PI check flags.
+ * \param str Input string representation of PI check settings.
+ *
+ * \return 0 if parsing was successful and prchk_flags is set, or negated errno
+ * values on failure.
+ */
+int spdk_nvme_prchk_flags_parse(uint32_t *prchk_flags, const char *str);
+
+/**
+ * Look up the string representation of PI check settings (prchk:guard|reftag)
+ *
+ * \param prchk_flags PI check flags to convert.
+ *
+ * \return static string constant describing PI check settings. If prchk_flags is 0,
+ * NULL is returned.
+ */
+const char *spdk_nvme_prchk_flags_str(uint32_t prchk_flags);
+
+/**
+ * Determine whether the NVMe library can handle a specific NVMe over Fabrics
+ * transport type.
+ *
+ * \param trtype NVMe over Fabrics transport type to check.
+ *
+ * \return true if trtype is supported or false if it is not supported or if
+ * SPDK_NVME_TRANSPORT_CUSTOM is supplied as trtype since it can represent multiple
+ * transports.
+ */
+bool spdk_nvme_transport_available(enum spdk_nvme_transport_type trtype);
+
+/**
+ * Determine whether the NVMe library can handle a specific NVMe over Fabrics
+ * transport type.
+ *
+ * \param transport_name Name of the NVMe over Fabrics transport type to check.
+ *
+ * \return true if transport_name is supported or false if it is not supported.
+ */
+bool spdk_nvme_transport_available_by_name(const char *transport_name);
+
+/**
+ * Callback for spdk_nvme_probe() enumeration.
+ *
+ * \param cb_ctx Opaque value passed to spdk_nvme_probe().
+ * \param trid NVMe transport identifier.
+ * \param opts NVMe controller initialization options. This structure will be
+ * populated with the default values on entry, and the user callback may update
+ * any options to request a different value. The controller may not support all
+ * requested parameters, so the final values will be provided during the attach
+ * callback.
+ *
+ * \return true to attach to this device.
+ */
+typedef bool (*spdk_nvme_probe_cb)(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
+ struct spdk_nvme_ctrlr_opts *opts);
+
+/**
+ * Callback for spdk_nvme_attach() to report a device that has been attached to
+ * the userspace NVMe driver.
+ *
+ * \param cb_ctx Opaque value passed to spdk_nvme_attach_cb().
+ * \param trid NVMe transport identifier.
+ * \param ctrlr Opaque handle to NVMe controller.
+ * \param opts NVMe controller initialization options that were actually used.
+ * Options may differ from the requested options from the attach call depending
+ * on what the controller supports.
+ */
+typedef void (*spdk_nvme_attach_cb)(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
+ struct spdk_nvme_ctrlr *ctrlr,
+ const struct spdk_nvme_ctrlr_opts *opts);
+
+/**
+ * Callback for spdk_nvme_remove() to report that a device attached to the userspace
+ * NVMe driver has been removed from the system.
+ *
+ * The controller will remain in a failed state (any new I/O submitted will fail).
+ *
+ * The controller must be detached from the userspace driver by calling spdk_nvme_detach()
+ * once the controller is no longer in use. It is up to the library user to ensure
+ * that no other threads are using the controller before calling spdk_nvme_detach().
+ *
+ * \param cb_ctx Opaque value passed to spdk_nvme_remove_cb().
+ * \param ctrlr NVMe controller instance that was removed.
+ */
+typedef void (*spdk_nvme_remove_cb)(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Enumerate the bus indicated by the transport ID and attach the userspace NVMe
+ * driver to each device found if desired.
+ *
+ * This function is not thread safe and should only be called from one thread at
+ * a time while no other threads are actively using any NVMe devices.
+ *
+ * If called from a secondary process, only devices that have been attached to
+ * the userspace driver in the primary process will be probed.
+ *
+ * If called more than once, only devices that are not already attached to the
+ * SPDK NVMe driver will be reported.
+ *
+ * To stop using the the controller and release its associated resources,
+ * call spdk_nvme_detach() with the spdk_nvme_ctrlr instance from the attach_cb()
+ * function.
+ *
+ * \param trid The transport ID indicating which bus to enumerate. If the trtype
+ * is PCIe or trid is NULL, this will scan the local PCIe bus. If the trtype is
+ * RDMA, the traddr and trsvcid must point at the location of an NVMe-oF discovery
+ * service.
+ * \param cb_ctx Opaque value which will be passed back in cb_ctx parameter of
+ * the callbacks.
+ * \param probe_cb will be called once per NVMe device found in the system.
+ * \param attach_cb will be called for devices for which probe_cb returned true
+ * once that NVMe controller has been attached to the userspace driver.
+ * \param remove_cb will be called for devices that were attached in a previous
+ * spdk_nvme_probe() call but are no longer attached to the system. Optional;
+ * specify NULL if removal notices are not desired.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_nvme_probe(const struct spdk_nvme_transport_id *trid,
+ void *cb_ctx,
+ spdk_nvme_probe_cb probe_cb,
+ spdk_nvme_attach_cb attach_cb,
+ spdk_nvme_remove_cb remove_cb);
+
+/**
+ * Connect the NVMe driver to the device located at the given transport ID.
+ *
+ * This function is not thread safe and should only be called from one thread at
+ * a time while no other threads are actively using this NVMe device.
+ *
+ * If called from a secondary process, only the device that has been attached to
+ * the userspace driver in the primary process will be connected.
+ *
+ * If connecting to multiple controllers, it is suggested to use spdk_nvme_probe()
+ * and filter the requested controllers with the probe callback. For PCIe controllers,
+ * spdk_nvme_probe() will be more efficient since the controller resets will happen
+ * in parallel.
+ *
+ * To stop using the the controller and release its associated resources, call
+ * spdk_nvme_detach() with the spdk_nvme_ctrlr instance returned by this function.
+ *
+ * \param trid The transport ID indicating which device to connect. If the trtype
+ * is PCIe, this will connect the local PCIe bus. If the trtype is RDMA, the traddr
+ * and trsvcid must point at the location of an NVMe-oF service.
+ * \param opts NVMe controller initialization options. Default values will be used
+ * if the user does not specify the options. The controller may not support all
+ * requested parameters.
+ * \param opts_size Must be set to sizeof(struct spdk_nvme_ctrlr_opts), or 0 if
+ * opts is NULL.
+ *
+ * \return pointer to the connected NVMe controller or NULL if there is any failure.
+ *
+ */
+struct spdk_nvme_ctrlr *spdk_nvme_connect(const struct spdk_nvme_transport_id *trid,
+ const struct spdk_nvme_ctrlr_opts *opts,
+ size_t opts_size);
+
+struct spdk_nvme_probe_ctx;
+
+/**
+ * Connect the NVMe driver to the device located at the given transport ID.
+ *
+ * The function will return a probe context on success, controller associates with
+ * the context is not ready for use, user must call spdk_nvme_probe_poll_async()
+ * until spdk_nvme_probe_poll_async() returns 0.
+ *
+ * \param trid The transport ID indicating which device to connect. If the trtype
+ * is PCIe, this will connect the local PCIe bus. If the trtype is RDMA, the traddr
+ * and trsvcid must point at the location of an NVMe-oF service.
+ * \param opts NVMe controller initialization options. Default values will be used
+ * if the user does not specify the options. The controller may not support all
+ * requested parameters.
+ * \param attach_cb will be called once the NVMe controller has been attached
+ * to the userspace driver.
+ *
+ * \return probe context on success, NULL on failure.
+ *
+ */
+struct spdk_nvme_probe_ctx *spdk_nvme_connect_async(const struct spdk_nvme_transport_id *trid,
+ const struct spdk_nvme_ctrlr_opts *opts,
+ spdk_nvme_attach_cb attach_cb);
+
+/**
+ * Probe and add controllers to the probe context list.
+ *
+ * Users must call spdk_nvme_probe_poll_async() to initialize
+ * controllers in the probe context list to the READY state.
+ *
+ * \param trid The transport ID indicating which bus to enumerate. If the trtype
+ * is PCIe or trid is NULL, this will scan the local PCIe bus. If the trtype is
+ * RDMA, the traddr and trsvcid must point at the location of an NVMe-oF discovery
+ * service.
+ * \param cb_ctx Opaque value which will be passed back in cb_ctx parameter of
+ * the callbacks.
+ * \param probe_cb will be called once per NVMe device found in the system.
+ * \param attach_cb will be called for devices for which probe_cb returned true
+ * once that NVMe controller has been attached to the userspace driver.
+ * \param remove_cb will be called for devices that were attached in a previous
+ * spdk_nvme_probe() call but are no longer attached to the system. Optional;
+ * specify NULL if removal notices are not desired.
+ *
+ * \return probe context on success, NULL on failure.
+ */
+struct spdk_nvme_probe_ctx *spdk_nvme_probe_async(const struct spdk_nvme_transport_id *trid,
+ void *cb_ctx,
+ spdk_nvme_probe_cb probe_cb,
+ spdk_nvme_attach_cb attach_cb,
+ spdk_nvme_remove_cb remove_cb);
+
+/**
+ * Start controllers in the context list.
+ *
+ * Users may call the function util it returns True.
+ *
+ * \param probe_ctx Context used to track probe actions.
+ *
+ * \return 0 if all probe operations are complete; the probe_ctx
+ * is also freed and no longer valid.
+ * \return -EAGAIN if there are still pending probe operations; user must call
+ * spdk_nvme_probe_poll_async again to continue progress.
+ * \return value other than 0 and -EAGAIN probe error with one controller.
+ */
+int spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx);
+
+/**
+ * Detach specified device returned by spdk_nvme_probe()'s attach_cb from the
+ * NVMe driver.
+ *
+ * On success, the spdk_nvme_ctrlr handle is no longer valid.
+ *
+ * This function should be called from a single thread while no other threads
+ * are actively using the NVMe device.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_nvme_detach(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Update the transport ID for a given controller.
+ *
+ * This function allows the user to set a new trid for a controller only if the
+ * controller is failed. The controller's failed state can be obtained from
+ * spdk_nvme_ctrlr_is_failed(). The controller can also be forced to the failed
+ * state using spdk_nvme_ctrlr_fail().
+ *
+ * This function also requires that the transport type and subnqn of the new trid
+ * be the same as the old trid.
+ *
+ * \param ctrlr Opaque handle to an NVMe controller.
+ * \param trid The new transport ID.
+ *
+ * \return 0 on success, -EINVAL if the trid is invalid,
+ * -EPERM if the ctrlr is not failed.
+ */
+int spdk_nvme_ctrlr_set_trid(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_transport_id *trid);
+
+/**
+ * Perform a full hardware reset of the NVMe controller.
+ *
+ * This function should be called from a single thread while no other threads
+ * are actively using the NVMe device.
+ *
+ * Any pointers returned from spdk_nvme_ctrlr_get_ns() and spdk_nvme_ns_get_data()
+ * may be invalidated by calling this function. The number of namespaces as returned
+ * by spdk_nvme_ctrlr_get_num_ns() may also change.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Fail the given NVMe controller.
+ *
+ * This function gives the application the opportunity to fail a controller
+ * at will. When a controller is failed, any calls to process completions or
+ * submit I/O on qpairs associated with that controller will fail with an error
+ * code of -ENXIO.
+ * The controller can only be taken from the failed state by
+ * calling spdk_nvme_ctrlr_reset. After the controller has been successfully
+ * reset, any I/O pending when the controller was moved to failed will be
+ * aborted back to the application and can be resubmitted. I/O can then resume.
+ *
+ * \param ctrlr Opaque handle to an NVMe controller.
+ */
+void spdk_nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * This function returns the failed status of a given controller.
+ *
+ * \param ctrlr Opaque handle to an NVMe controller.
+ *
+ * \return True if the controller is failed, false otherwise.
+ */
+bool spdk_nvme_ctrlr_is_failed(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Get the identify controller data as defined by the NVMe specification.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ *
+ * \return pointer to the identify controller data.
+ */
+const struct spdk_nvme_ctrlr_data *spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Get the NVMe controller CSTS (Status) register.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ *
+ * \return the NVMe controller CSTS (Status) register.
+ */
+union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Get the NVMe controller CAP (Capabilities) register.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ *
+ * \return the NVMe controller CAP (Capabilities) register.
+ */
+union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Get the NVMe controller VS (Version) register.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ *
+ * \return the NVMe controller VS (Version) register.
+ */
+union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Get the NVMe controller CMBSZ (Controller Memory Buffer Size) register
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ *
+ * \return the NVMe controller CMBSZ (Controller Memory Buffer Size) register.
+ */
+union spdk_nvme_cmbsz_register spdk_nvme_ctrlr_get_regs_cmbsz(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Get the number of namespaces for the given NVMe controller.
+ *
+ * This function is thread safe and can be called at any point while the
+ * controller is attached to the SPDK NVMe driver.
+ *
+ * This is equivalent to calling spdk_nvme_ctrlr_get_data() to get the
+ * spdk_nvme_ctrlr_data and then reading the nn field.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ *
+ * \return the number of namespaces.
+ */
+uint32_t spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Get the PCI device of a given NVMe controller.
+ *
+ * This only works for local (PCIe-attached) NVMe controllers; other transports
+ * will return NULL.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ *
+ * \return PCI device of the NVMe controller, or NULL if not available.
+ */
+struct spdk_pci_device *spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Get the maximum data transfer size of a given NVMe controller.
+ *
+ * \return Maximum data transfer size of the NVMe controller in bytes.
+ *
+ * The I/O command helper functions, such as spdk_nvme_ns_cmd_read(), will split
+ * large I/Os automatically; however, it is up to the user to obey this limit for
+ * commands submitted with the raw command functions, such as spdk_nvme_ctrlr_cmd_io_raw().
+ */
+uint32_t spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Check whether the nsid is an active nv for the given NVMe controller.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ * \param nsid Namespace id.
+ *
+ * \return true if nsid is an active ns, or false otherwise.
+ */
+bool spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid);
+
+/**
+ * Get the nsid of the first active namespace.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ *
+ * \return the nsid of the first active namespace, 0 if there are no active namespaces.
+ */
+uint32_t spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Get next active namespace given the previous nsid.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ * \param prev_nsid Namespace id.
+ *
+ * \return a next active namespace given the previous nsid, 0 when there are no
+ * more active namespaces.
+ */
+uint32_t spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid);
+
+/**
+ * Determine if a particular log page is supported by the given NVMe controller.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \sa spdk_nvme_ctrlr_cmd_get_log_page().
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ * \param log_page Log page to query.
+ *
+ * \return true if supported, or false otherwise.
+ */
+bool spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page);
+
+/**
+ * Determine if a particular feature is supported by the given NVMe controller.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \sa spdk_nvme_ctrlr_cmd_get_feature().
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ * \param feature_code Feature to query.
+ *
+ * \return true if supported, or false otherwise.
+ */
+bool spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code);
+
+/**
+ * Signature for callback function invoked when a command is completed.
+ *
+ * \param spdk_nvme_cpl Completion queue entry that coontains the completion status.
+ */
+typedef void (*spdk_nvme_cmd_cb)(void *, const struct spdk_nvme_cpl *);
+
+/**
+ * Signature for callback function invoked when an asynchronous error request
+ * command is completed.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ * \param aer_cb_arg Context specified by spdk_nvme_register_aer_callback().
+ * \param spdk_nvme_cpl Completion queue entry that contains the completion status
+ * of the asynchronous event request that was completed.
+ */
+typedef void (*spdk_nvme_aer_cb)(void *aer_cb_arg,
+ const struct spdk_nvme_cpl *);
+
+/**
+ * Register callback function invoked when an AER command is completed for the
+ * given NVMe controller.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ * \param aer_cb_fn Callback function invoked when an asynchronous error request
+ * command is completed.
+ * \param aer_cb_arg Argument passed to callback function.
+ */
+void spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr,
+ spdk_nvme_aer_cb aer_cb_fn,
+ void *aer_cb_arg);
+
+/**
+ * Opaque handle to a queue pair.
+ *
+ * I/O queue pairs may be allocated using spdk_nvme_ctrlr_alloc_io_qpair().
+ */
+struct spdk_nvme_qpair;
+
+/**
+ * Signature for the callback function invoked when a timeout is detected on a
+ * request.
+ *
+ * For timeouts detected on the admin queue pair, the qpair returned here will
+ * be NULL. If the controller has a serious error condition and is unable to
+ * communicate with driver via completion queue, the controller can set Controller
+ * Fatal Status field to 1, then reset is required to recover from such error.
+ * Users may detect Controller Fatal Status when timeout happens.
+ *
+ * \param cb_arg Argument passed to callback funciton.
+ * \param ctrlr Opaque handle to NVMe controller.
+ * \param qpair Opaque handle to a queue pair.
+ * \param cid Command ID.
+ */
+typedef void (*spdk_nvme_timeout_cb)(void *cb_arg,
+ struct spdk_nvme_ctrlr *ctrlr,
+ struct spdk_nvme_qpair *qpair,
+ uint16_t cid);
+
+/**
+ * Register for timeout callback on a controller.
+ *
+ * The application can choose to register for timeout callback or not register
+ * for timeout callback.
+ *
+ * \param ctrlr NVMe controller on which to monitor for timeout.
+ * \param timeout_us Timeout value in microseconds.
+ * \param cb_fn A function pointer that points to the callback function.
+ * \param cb_arg Argument to the callback function.
+ */
+void spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr,
+ uint64_t timeout_us, spdk_nvme_timeout_cb cb_fn, void *cb_arg);
+
+/**
+ * NVMe I/O queue pair initialization options.
+ *
+ * These options may be passed to spdk_nvme_ctrlr_alloc_io_qpair() to configure queue pair
+ * options at queue creation time.
+ *
+ * The user may retrieve the default I/O queue pair creation options for a controller using
+ * spdk_nvme_ctrlr_get_default_io_qpair_opts().
+ */
+struct spdk_nvme_io_qpair_opts {
+ /**
+ * Queue priority for weighted round robin arbitration. If a different arbitration
+ * method is in use, pass 0.
+ */
+ enum spdk_nvme_qprio qprio;
+
+ /**
+ * The queue depth of this NVMe I/O queue. Overrides spdk_nvme_ctrlr_opts::io_queue_size.
+ */
+ uint32_t io_queue_size;
+
+ /**
+ * The number of requests to allocate for this NVMe I/O queue.
+ *
+ * Overrides spdk_nvme_ctrlr_opts::io_queue_requests.
+ *
+ * This should be at least as large as io_queue_size.
+ *
+ * A single I/O may allocate more than one request, since splitting may be
+ * necessary to conform to the device's maximum transfer size, PRP list
+ * compatibility requirements, or driver-assisted striping.
+ */
+ uint32_t io_queue_requests;
+
+ /**
+ * When submitting I/O via spdk_nvme_ns_read/write and similar functions,
+ * don't immediately submit it to hardware. Instead, queue up new commands
+ * and submit them to the hardware inside spdk_nvme_qpair_process_completions().
+ *
+ * This results in better batching of I/O commands. Often, it is more efficient
+ * to submit batches of commands to the underlying hardware than each command
+ * individually.
+ *
+ * This only applies to PCIe and RDMA transports.
+ *
+ * The flag was originally named delay_pcie_doorbell. To allow backward compatibility
+ * both names are kept in unnamed union.
+ */
+ union {
+ bool delay_cmd_submit;
+ bool delay_pcie_doorbell;
+ };
+
+ /**
+ * These fields allow specifying the memory buffers for the submission and/or
+ * completion queues.
+ * By default, vaddr is set to NULL meaning SPDK will allocate the memory to be used.
+ * If vaddr is NULL then paddr must be set to 0.
+ * If vaddr is non-NULL, and paddr is zero, SPDK derives the physical
+ * address for the NVMe device, in this case the memory must be registered.
+ * If a paddr value is non-zero, SPDK uses the vaddr and paddr as passed
+ * SPDK assumes that the memory passed is both virtually and physically
+ * contiguous.
+ * If these fields are used, SPDK will NOT impose any restriction
+ * on the number of elements in the queues.
+ * The buffer sizes are in number of bytes, and are used to confirm
+ * that the buffers are large enough to contain the appropriate queue.
+ * These fields are only used by PCIe attached NVMe devices. They
+ * are presently ignored for other transports.
+ */
+ struct {
+ struct spdk_nvme_cmd *vaddr;
+ uint64_t paddr;
+ uint64_t buffer_size;
+ } sq;
+ struct {
+ struct spdk_nvme_cpl *vaddr;
+ uint64_t paddr;
+ uint64_t buffer_size;
+ } cq;
+
+ /**
+ * This flag indicates to the alloc_io_qpair function that it should not perform
+ * the connect portion on this qpair. This allows the user to add the qpair to a
+ * poll group and then connect it later.
+ */
+ bool create_only;
+};
+
+/**
+ * Get the default options for I/O qpair creation for a specific NVMe controller.
+ *
+ * \param ctrlr NVMe controller to retrieve the defaults from.
+ * \param[out] opts Will be filled with the default options for
+ * spdk_nvme_ctrlr_alloc_io_qpair().
+ * \param opts_size Must be set to sizeof(struct spdk_nvme_io_qpair_opts).
+ */
+void spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr,
+ struct spdk_nvme_io_qpair_opts *opts,
+ size_t opts_size);
+
+/**
+ * Allocate an I/O queue pair (submission and completion queue).
+ *
+ * This function by default also performs any connection activities required for
+ * a newly created qpair. To avoid that behavior, the user should set the create_only
+ * flag in the opts structure to true.
+ *
+ * Each queue pair should only be used from a single thread at a time (mutual
+ * exclusion must be enforced by the user).
+ *
+ * \param ctrlr NVMe controller for which to allocate the I/O queue pair.
+ * \param opts I/O qpair creation options, or NULL to use the defaults as returned
+ * by spdk_nvme_ctrlr_get_default_io_qpair_opts().
+ * \param opts_size Must be set to sizeof(struct spdk_nvme_io_qpair_opts), or 0
+ * if opts is NULL.
+ *
+ * \return a pointer to the allocated I/O queue pair.
+ */
+struct spdk_nvme_qpair *spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr,
+ const struct spdk_nvme_io_qpair_opts *opts,
+ size_t opts_size);
+
+/**
+ * Connect a newly created I/O qpair.
+ *
+ * This function does any connection activities required for a newly created qpair.
+ * It should be called after spdk_nvme_ctrlr_alloc_io_qpair has been called with the
+ * create_only flag set to true in the spdk_nvme_io_qpair_opts structure.
+ *
+ * This call will fail if performed on a qpair that is already connected.
+ * For reconnecting qpairs, see spdk_nvme_ctrlr_reconnect_io_qpair.
+ *
+ * For fabrics like TCP and RDMA, this function actually sends the commands over the wire
+ * that connect the qpair. For PCIe, this function performs some internal state machine operations.
+ *
+ * \param ctrlr NVMe controller for which to allocate the I/O queue pair.
+ * \param qpair Opaque handle to the qpair to connect.
+ *
+ * return 0 on success or negated errno on failure. Specifically -EISCONN if the qpair is already connected.
+ *
+ */
+int spdk_nvme_ctrlr_connect_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair);
+
+/**
+ * Disconnect the given I/O qpair.
+ *
+ * This function must be called from the same thread as spdk_nvme_qpair_process_completions
+ * and the spdk_nvme_ns_cmd_* functions.
+ *
+ * After disconnect, calling spdk_nvme_qpair_process_completions or one of the
+ * spdk_nvme_ns_cmd* on a qpair will result in a return value of -ENXIO. A
+ * disconnected qpair may be reconnected with either the spdk_nvme_ctrlr_connect_io_qpair
+ * or spdk_nvme_ctrlr_reconnect_io_qpair APIs.
+ *
+ * \param qpair The qpair to disconnect.
+ */
+void spdk_nvme_ctrlr_disconnect_io_qpair(struct spdk_nvme_qpair *qpair);
+
+/**
+ * Attempt to reconnect the given qpair.
+ *
+ * This function is intended to be called on qpairs that have already been connected,
+ * but have since entered a failed state as indicated by a return value of -ENXIO from
+ * either spdk_nvme_qpair_process_completions or one of the spdk_nvme_ns_cmd_* functions.
+ * This function must be called from the same thread as spdk_nvme_qpair_process_completions
+ * and the spdk_nvme_ns_cmd_* functions.
+ *
+ * Calling this function has the same effect as calling spdk_nvme_ctrlr_disconnect_io_qpair
+ * followed by spdk_nvme_ctrlr_connect_io_qpair.
+ *
+ * This function may be called on newly created qpairs, but it does extra checks and attempts
+ * to disconnect the qpair before connecting it. The recommended API for newly created qpairs
+ * is spdk_nvme_ctrlr_connect_io_qpair.
+ *
+ * \param qpair The qpair to reconnect.
+ *
+ * \return 0 on success, or if the qpair was already connected.
+ * -EAGAIN if the driver was unable to reconnect during this call,
+ * but the controller is still connected and is either resetting or enabled.
+ * -ENODEV if the controller is removed. In this case, the controller cannot be recovered
+ * and the application will have to destroy it and the associated qpairs.
+ * -ENXIO if the controller is in a failed state but is not yet resetting. In this case,
+ * the application should call spdk_nvme_ctrlr_reset to reset the entire controller.
+ */
+int spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair);
+
+/**
+ * Returns the reason the admin qpair for a given controller is disconnected.
+ *
+ * \param ctrlr The controller to check.
+ *
+ * \return a valid spdk_nvme_qp_failure_reason.
+ */
+spdk_nvme_qp_failure_reason spdk_nvme_ctrlr_get_admin_qp_failure_reason(
+ struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Free an I/O queue pair that was allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ *
+ * \param qpair I/O queue pair to free.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair);
+
+/**
+ * Send the given NVM I/O command, I/O buffers, lists and all to the NVMe controller.
+ *
+ * This is a low level interface for submitting I/O commands directly.
+ *
+ * This function allows a caller to submit an I/O request that is
+ * COMPLETELY pre-defined, right down to the "physical" memory buffers.
+ * It is intended for testing hardware, specifying exact buffer location,
+ * alignment, and offset. It also allows for specific choice of PRP
+ * and SGLs.
+ *
+ * The driver sets the CID. EVERYTHING else is assumed set by the caller.
+ * Needless to say, this is potentially extremely dangerous for both the host
+ * (accidental/malicionus storage usage/corruption), and the device.
+ * Thus its intent is for very specific hardware testing and environment
+ * reproduction.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * This function can only be used on PCIe controllers and qpairs.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ * \param qpair I/O qpair to submit command.
+ * \param cmd NVM I/O command to submit.
+ * \param cb_fn Callback function invoked when the I/O command completes.
+ * \param cb_arg Argument passed to callback function.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ */
+
+int spdk_nvme_ctrlr_io_cmd_raw_no_payload_build(struct spdk_nvme_ctrlr *ctrlr,
+ struct spdk_nvme_qpair *qpair,
+ struct spdk_nvme_cmd *cmd,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ * Send the given NVM I/O command to the NVMe controller.
+ *
+ * This is a low level interface for submitting I/O commands directly. Prefer
+ * the spdk_nvme_ns_cmd_* functions instead. The validity of the command will
+ * not be checked!
+ *
+ * When constructing the nvme_command it is not necessary to fill out the PRP
+ * list/SGL or the CID. The driver will handle both of those for you.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ * \param qpair I/O qpair to submit command.
+ * \param cmd NVM I/O command to submit.
+ * \param buf Virtual memory address of a single physically contiguous buffer.
+ * \param len Size of buffer.
+ * \param cb_fn Callback function invoked when the I/O command completes.
+ * \param cb_arg Argument passed to callback function.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ctrlr_cmd_io_raw(struct spdk_nvme_ctrlr *ctrlr,
+ struct spdk_nvme_qpair *qpair,
+ struct spdk_nvme_cmd *cmd,
+ void *buf, uint32_t len,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ * Send the given NVM I/O command with metadata to the NVMe controller.
+ *
+ * This is a low level interface for submitting I/O commands directly. Prefer
+ * the spdk_nvme_ns_cmd_* functions instead. The validity of the command will
+ * not be checked!
+ *
+ * When constructing the nvme_command it is not necessary to fill out the PRP
+ * list/SGL or the CID. The driver will handle both of those for you.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ * \param qpair I/O qpair to submit command.
+ * \param cmd NVM I/O command to submit.
+ * \param buf Virtual memory address of a single physically contiguous buffer.
+ * \param len Size of buffer.
+ * \param md_buf Virtual memory address of a single physically contiguous metadata
+ * buffer.
+ * \param cb_fn Callback function invoked when the I/O command completes.
+ * \param cb_arg Argument passed to callback function.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ctrlr_cmd_io_raw_with_md(struct spdk_nvme_ctrlr *ctrlr,
+ struct spdk_nvme_qpair *qpair,
+ struct spdk_nvme_cmd *cmd,
+ void *buf, uint32_t len, void *md_buf,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ * Process any outstanding completions for I/O submitted on a queue pair.
+ *
+ * This call is non-blocking, i.e. it only processes completions that are ready
+ * at the time of this function call. It does not wait for outstanding commands
+ * to finish.
+ *
+ * For each completed command, the request's callback function will be called if
+ * specified as non-NULL when the request was submitted.
+ *
+ * The caller must ensure that each queue pair is only used from one thread at a
+ * time.
+ *
+ * This function may be called at any point while the controller is attached to
+ * the SPDK NVMe driver.
+ *
+ * \sa spdk_nvme_cmd_cb
+ *
+ * \param qpair Queue pair to check for completions.
+ * \param max_completions Limit the number of completions to be processed in one
+ * call, or 0 for unlimited.
+ *
+ * \return number of completions processed (may be 0) or negated on error. -ENXIO
+ * in the special case that the qpair is failed at the transport layer.
+ */
+int32_t spdk_nvme_qpair_process_completions(struct spdk_nvme_qpair *qpair,
+ uint32_t max_completions);
+
+/**
+ * Returns the reason the qpair is disconnected.
+ *
+ * \param qpair The qpair to check.
+ *
+ * \return a valid spdk_nvme_qp_failure_reason.
+ */
+spdk_nvme_qp_failure_reason spdk_nvme_qpair_get_failure_reason(struct spdk_nvme_qpair *qpair);
+
+/**
+ * Send the given admin command to the NVMe controller.
+ *
+ * This is a low level interface for submitting admin commands directly. Prefer
+ * the spdk_nvme_ctrlr_cmd_* functions instead. The validity of the command will
+ * not be checked!
+ *
+ * When constructing the nvme_command it is not necessary to fill out the PRP
+ * list/SGL or the CID. The driver will handle both of those for you.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion
+ * of commands submitted through this function.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ * \param cmd NVM admin command to submit.
+ * \param buf Virtual memory address of a single physically contiguous buffer.
+ * \param len Size of buffer.
+ * \param cb_fn Callback function invoked when the admin command completes.
+ * \param cb_arg Argument passed to callback function.
+ *
+ * \return 0 if successfully submitted, negated errno if resources could not be
+ * allocated for this request, -ENXIO if the admin qpair is failed at the transport layer.
+ */
+int spdk_nvme_ctrlr_cmd_admin_raw(struct spdk_nvme_ctrlr *ctrlr,
+ struct spdk_nvme_cmd *cmd,
+ void *buf, uint32_t len,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ * Process any outstanding completions for admin commands.
+ *
+ * This will process completions for admin commands submitted on any thread.
+ *
+ * This call is non-blocking, i.e. it only processes completions that are ready
+ * at the time of this function call. It does not wait for outstanding commands
+ * to finish.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ *
+ * \return number of completions processed (may be 0) or negated on error. -ENXIO
+ * in the special case that the qpair is failed at the transport layer.
+ */
+int32_t spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr);
+
+
+/**
+ * Opaque handle to a namespace. Obtained by calling spdk_nvme_ctrlr_get_ns().
+ */
+struct spdk_nvme_ns;
+
+/**
+ * Get a handle to a namespace for the given controller.
+ *
+ * Namespaces are numbered from 1 to the total number of namespaces. There will
+ * never be any gaps in the numbering. The number of namespaces is obtained by
+ * calling spdk_nvme_ctrlr_get_num_ns().
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ * \param ns_id Namespace id.
+ *
+ * \return a pointer to the namespace.
+ */
+struct spdk_nvme_ns *spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t ns_id);
+
+/**
+ * Get a specific log page from the NVMe controller.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion of
+ * commands submitted through this function.
+ *
+ * \sa spdk_nvme_ctrlr_is_log_page_supported()
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ * \param log_page The log page identifier.
+ * \param nsid Depending on the log page, this may be 0, a namespace identifier,
+ * or SPDK_NVME_GLOBAL_NS_TAG.
+ * \param payload The pointer to the payload buffer.
+ * \param payload_size The size of payload buffer.
+ * \param offset Offset in bytes within the log page to start retrieving log page
+ * data. May only be non-zero if the controller supports extended data for Get Log
+ * Page as reported in the controller data log page attributes.
+ * \param cb_fn Callback function to invoke when the log page has been retrieved.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, negated errno if resources could not be
+ * allocated for this request, -ENXIO if the admin qpair is failed at the transport layer.
+ */
+int spdk_nvme_ctrlr_cmd_get_log_page(struct spdk_nvme_ctrlr *ctrlr,
+ uint8_t log_page, uint32_t nsid,
+ void *payload, uint32_t payload_size,
+ uint64_t offset,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ * Get a specific log page from the NVMe controller.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * This function allows specifying extra fields in cdw10 and cdw11 such as
+ * Retain Asynchronous Event and Log Specific Field.
+ *
+ * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion of
+ * commands submitted through this function.
+ *
+ * \sa spdk_nvme_ctrlr_is_log_page_supported()
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ * \param log_page The log page identifier.
+ * \param nsid Depending on the log page, this may be 0, a namespace identifier,
+ * or SPDK_NVME_GLOBAL_NS_TAG.
+ * \param payload The pointer to the payload buffer.
+ * \param payload_size The size of payload buffer.
+ * \param offset Offset in bytes within the log page to start retrieving log page
+ * data. May only be non-zero if the controller supports extended data for Get Log
+ * Page as reported in the controller data log page attributes.
+ * \param cdw10 Value to specify for cdw10. Specify 0 for numdl - it will be
+ * set by this function based on the payload_size parameter. Specify 0 for lid -
+ * it will be set by this function based on the log_page parameter.
+ * \param cdw11 Value to specify for cdw11. Specify 0 for numdu - it will be
+ * set by this function based on the payload_size.
+ * \param cdw14 Value to specify for cdw14.
+ * \param cb_fn Callback function to invoke when the log page has been retrieved.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, negated errno if resources could not be
+ * allocated for this request, -ENXIO if the admin qpair is failed at the transport layer.
+ */
+int spdk_nvme_ctrlr_cmd_get_log_page_ext(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page,
+ uint32_t nsid, void *payload, uint32_t payload_size,
+ uint64_t offset, uint32_t cdw10, uint32_t cdw11,
+ uint32_t cdw14, spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ * Abort a specific previously-submitted NVMe command.
+ *
+ * \sa spdk_nvme_ctrlr_register_timeout_callback()
+ *
+ * \param ctrlr NVMe controller to which the command was submitted.
+ * \param qpair NVMe queue pair to which the command was submitted. For admin
+ * commands, pass NULL for the qpair.
+ * \param cid Command ID of the command to abort.
+ * \param cb_fn Callback function to invoke when the abort has completed.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, negated errno if resources could not be
+ * allocated for this request, -ENXIO if the admin qpair is failed at the transport layer.
+ */
+int spdk_nvme_ctrlr_cmd_abort(struct spdk_nvme_ctrlr *ctrlr,
+ struct spdk_nvme_qpair *qpair,
+ uint16_t cid,
+ spdk_nvme_cmd_cb cb_fn,
+ void *cb_arg);
+
+/**
+ * Abort previously submitted commands which have cmd_cb_arg as its callback argument.
+ *
+ * \param ctrlr NVMe controller to which the commands were submitted.
+ * \param qpair NVMe queue pair to which the commands were submitted. For admin
+ * commands, pass NULL for the qpair.
+ * \param cmd_cb_arg Callback argument for the NVMe commands which this function
+ * attempts to abort.
+ * \param cb_fn Callback function to invoke when this function has completed.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, negated errno otherwise.
+ */
+int spdk_nvme_ctrlr_cmd_abort_ext(struct spdk_nvme_ctrlr *ctrlr,
+ struct spdk_nvme_qpair *qpair,
+ void *cmd_cb_arg,
+ spdk_nvme_cmd_cb cb_fn,
+ void *cb_arg);
+
+/**
+ * Set specific feature for the given NVMe controller.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion of
+ * commands submitted through this function.
+ *
+ * \sa spdk_nvme_ctrlr_cmd_get_feature().
+ *
+ * \param ctrlr NVMe controller to manipulate.
+ * \param feature The feature identifier.
+ * \param cdw11 as defined by the specification for this command.
+ * \param cdw12 as defined by the specification for this command.
+ * \param payload The pointer to the payload buffer.
+ * \param payload_size The size of payload buffer.
+ * \param cb_fn Callback function to invoke when the feature has been set.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, negated errno if resources could not be
+ * allocated for this request, -ENXIO if the admin qpair is failed at the transport layer.
+ */
+int spdk_nvme_ctrlr_cmd_set_feature(struct spdk_nvme_ctrlr *ctrlr,
+ uint8_t feature, uint32_t cdw11, uint32_t cdw12,
+ void *payload, uint32_t payload_size,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ * Get specific feature from given NVMe controller.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion of
+ * commands submitted through this function.
+ *
+ * \sa spdk_nvme_ctrlr_cmd_set_feature()
+ *
+ * \param ctrlr NVMe controller to query.
+ * \param feature The feature identifier.
+ * \param cdw11 as defined by the specification for this command.
+ * \param payload The pointer to the payload buffer.
+ * \param payload_size The size of payload buffer.
+ * \param cb_fn Callback function to invoke when the feature has been retrieved.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, -ENOMEM if resources could not be allocated
+ * for this request, -ENXIO if the admin qpair is failed at the transport layer.
+ */
+int spdk_nvme_ctrlr_cmd_get_feature(struct spdk_nvme_ctrlr *ctrlr,
+ uint8_t feature, uint32_t cdw11,
+ void *payload, uint32_t payload_size,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ * Get specific feature from given NVMe controller.
+ *
+ * \param ctrlr NVMe controller to query.
+ * \param feature The feature identifier.
+ * \param cdw11 as defined by the specification for this command.
+ * \param payload The pointer to the payload buffer.
+ * \param payload_size The size of payload buffer.
+ * \param cb_fn Callback function to invoke when the feature has been retrieved.
+ * \param cb_arg Argument to pass to the callback function.
+ * \param ns_id The namespace identifier.
+ *
+ * \return 0 if successfully submitted, -ENOMEM if resources could not be allocated
+ * for this request, -ENXIO if the admin qpair is failed at the transport layer.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * Call \ref spdk_nvme_ctrlr_process_admin_completions() to poll for completion
+ * of commands submitted through this function.
+ *
+ * \sa spdk_nvme_ctrlr_cmd_set_feature_ns()
+ */
+int spdk_nvme_ctrlr_cmd_get_feature_ns(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature,
+ uint32_t cdw11, void *payload, uint32_t payload_size,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t ns_id);
+
+/**
+ * Set specific feature for the given NVMe controller and namespace ID.
+ *
+ * \param ctrlr NVMe controller to manipulate.
+ * \param feature The feature identifier.
+ * \param cdw11 as defined by the specification for this command.
+ * \param cdw12 as defined by the specification for this command.
+ * \param payload The pointer to the payload buffer.
+ * \param payload_size The size of payload buffer.
+ * \param cb_fn Callback function to invoke when the feature has been set.
+ * \param cb_arg Argument to pass to the callback function.
+ * \param ns_id The namespace identifier.
+ *
+ * \return 0 if successfully submitted, -ENOMEM if resources could not be allocated
+ * for this request, -ENXIO if the admin qpair is failed at the transport layer.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * Call \ref spdk_nvme_ctrlr_process_admin_completions() to poll for completion
+ * of commands submitted through this function.
+ *
+ * \sa spdk_nvme_ctrlr_cmd_get_feature_ns()
+ */
+int spdk_nvme_ctrlr_cmd_set_feature_ns(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature,
+ uint32_t cdw11, uint32_t cdw12, void *payload,
+ uint32_t payload_size, spdk_nvme_cmd_cb cb_fn,
+ void *cb_arg, uint32_t ns_id);
+
+/**
+ * Receive security protocol data from controller.
+ *
+ * This function is thread safe and can be called at any point after spdk_nvme_probe().
+ *
+ * \param ctrlr NVMe controller to use for security receive command submission.
+ * \param secp Security Protocol that is used.
+ * \param spsp Security Protocol Specific field.
+ * \param nssf NVMe Security Specific field. Indicate RPMB target when using Security
+ * Protocol EAh.
+ * \param payload The pointer to the payload buffer.
+ * \param payload_size The size of payload buffer.
+ * \param cb_fn Callback function to invoke when the command has been completed.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, negated errno if resources could not be allocated
+ * for this request.
+ */
+int spdk_nvme_ctrlr_cmd_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp,
+ uint16_t spsp, uint8_t nssf, void *payload,
+ uint32_t payload_size,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ * Send security protocol data to controller.
+ *
+ * This function is thread safe and can be called at any point after spdk_nvme_probe().
+ *
+ * \param ctrlr NVMe controller to use for security send command submission.
+ * \param secp Security Protocol that is used.
+ * \param spsp Security Protocol Specific field.
+ * \param nssf NVMe Security Specific field. Indicate RPMB target when using Security
+ * Protocol EAh.
+ * \param payload The pointer to the payload buffer.
+ * \param payload_size The size of payload buffer.
+ * \param cb_fn Callback function to invoke when the command has been completed.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, negated errno if resources could not be allocated
+ * for this request.
+ */
+int spdk_nvme_ctrlr_cmd_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp,
+ uint16_t spsp, uint8_t nssf, void *payload,
+ uint32_t payload_size, spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ * Receive security protocol data from controller.
+ *
+ * This function is thread safe and can be called at any point after spdk_nvme_probe().
+ *
+ * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion of
+ * commands submitted through this function.
+ *
+ * \param ctrlr NVMe controller to use for security receive command submission.
+ * \param secp Security Protocol that is used.
+ * \param spsp Security Protocol Specific field.
+ * \param nssf NVMe Security Specific field. Indicate RPMB target when using Security
+ * Protocol EAh.
+ * \param payload The pointer to the payload buffer.
+ * \param size The size of payload buffer.
+ *
+ * \return 0 if successfully submitted, negated errno if resources could not be allocated
+ * for this request.
+ */
+int spdk_nvme_ctrlr_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp,
+ uint16_t spsp, uint8_t nssf, void *payload, size_t size);
+
+/**
+ * Send security protocol data to controller.
+ *
+ * This function is thread safe and can be called at any point after spdk_nvme_probe().
+ *
+ * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion of
+ * commands submitted through this function.
+ *
+ * \param ctrlr NVMe controller to use for security send command submission.
+ * \param secp Security Protocol that is used.
+ * \param spsp Security Protocol Specific field.
+ * \param nssf NVMe Security Specific field. Indicate RPMB target when using Security
+ * Protocol EAh.
+ * \param payload The pointer to the payload buffer.
+ * \param size The size of payload buffer.
+ *
+ * \return 0 if successfully submitted, negated errno if resources could not be allocated
+ * for this request.
+ */
+int spdk_nvme_ctrlr_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp,
+ uint16_t spsp, uint8_t nssf, void *payload, size_t size);
+
+/**
+ * Get supported flags of the controller.
+ *
+ * \param ctrlr NVMe controller to get flags.
+ *
+ * \return supported flags of this controller.
+ */
+uint64_t spdk_nvme_ctrlr_get_flags(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Attach the specified namespace to controllers.
+ *
+ * This function is thread safe and can be called at any point after spdk_nvme_probe().
+ *
+ * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion of
+ * commands submitted through this function.
+ *
+ * \param ctrlr NVMe controller to use for command submission.
+ * \param nsid Namespace identifier for namespace to attach.
+ * \param payload The pointer to the controller list.
+ *
+ * \return 0 if successfully submitted, ENOMEM if resources could not be allocated
+ * for this request.
+ */
+int spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
+ struct spdk_nvme_ctrlr_list *payload);
+
+/**
+ * Detach the specified namespace from controllers.
+ *
+ * This function is thread safe and can be called at any point after spdk_nvme_probe().
+ *
+ * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion of
+ * commands submitted through this function.
+ *
+ * \param ctrlr NVMe controller to use for command submission.
+ * \param nsid Namespace ID to detach.
+ * \param payload The pointer to the controller list.
+ *
+ * \return 0 if successfully submitted, ENOMEM if resources could not be allocated
+ * for this request
+ */
+int spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
+ struct spdk_nvme_ctrlr_list *payload);
+
+/**
+ * Create a namespace.
+ *
+ * This function is thread safe and can be called at any point after spdk_nvme_probe().
+ *
+ * \param ctrlr NVMe controller to create namespace on.
+ * \param payload The pointer to the NVMe namespace data.
+ *
+ * \return Namespace ID (>= 1) if successfully created, or 0 if the request failed.
+ */
+uint32_t spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr,
+ struct spdk_nvme_ns_data *payload);
+
+/**
+ * Delete a namespace.
+ *
+ * This function is thread safe and can be called at any point after spdk_nvme_probe().
+ *
+ * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion of
+ * commands submitted through this function.
+ *
+ * \param ctrlr NVMe controller to delete namespace from.
+ * \param nsid The namespace identifier.
+ *
+ * \return 0 if successfully submitted, negated errno if resources could not be
+ * allocated
+ * for this request
+ */
+int spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid);
+
+/**
+ * Format NVM.
+ *
+ * This function requests a low-level format of the media.
+ *
+ * This function is thread safe and can be called at any point after spdk_nvme_probe().
+ *
+ * \param ctrlr NVMe controller to format.
+ * \param nsid The namespace identifier. May be SPDK_NVME_GLOBAL_NS_TAG to format
+ * all namespaces.
+ * \param format The format information for the command.
+ *
+ * \return 0 if successfully submitted, negated errno if resources could not be
+ * allocated for this request
+ */
+int spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
+ struct spdk_nvme_format *format);
+
+/**
+ * Download a new firmware image.
+ *
+ * This function is thread safe and can be called at any point after spdk_nvme_probe().
+ *
+ * \param ctrlr NVMe controller to perform firmware operation on.
+ * \param payload The data buffer for the firmware image.
+ * \param size The data size will be downloaded.
+ * \param slot The slot that the firmware image will be committed to.
+ * \param commit_action The action to perform when firmware is committed.
+ * \param completion_status output parameter. Contains the completion status of
+ * the firmware commit operation.
+ *
+ * \return 0 if successfully submitted, ENOMEM if resources could not be allocated
+ * for this request, -1 if the size is not multiple of 4.
+ */
+int spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size,
+ int slot, enum spdk_nvme_fw_commit_action commit_action,
+ struct spdk_nvme_status *completion_status);
+
+/**
+ * Return virtual address of PCIe NVM I/O registers
+ *
+ * This function returns a pointer to the PCIe I/O registers for a controller
+ * or NULL if unsupported for this transport.
+ *
+ * \param ctrlr Controller whose registers are to be accessed.
+ *
+ * \return Pointer to virtual address of register bank, or NULL.
+ */
+volatile struct spdk_nvme_registers *spdk_nvme_ctrlr_get_registers(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Reserve the controller memory buffer for data transfer use.
+ *
+ * This function reserves the full size of the controller memory buffer
+ * for use in data transfers. If submission queues or completion queues are
+ * already placed in the controller memory buffer, this call will fail.
+ *
+ * \param ctrlr Controller from which to allocate memory buffer
+ *
+ * \return The size of the controller memory buffer on success. Negated errno
+ * on failure.
+ */
+int spdk_nvme_ctrlr_reserve_cmb(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Map a previously reserved controller memory buffer so that it's data is
+ * visible from the CPU. This operation is not always possible.
+ *
+ * \param ctrlr Controller that contains the memory buffer
+ * \param size Size of buffer that was mapped.
+ *
+ * \return Pointer to controller memory buffer, or NULL on failure.
+ */
+void *spdk_nvme_ctrlr_map_cmb(struct spdk_nvme_ctrlr *ctrlr, size_t *size);
+
+/**
+ * Free a controller memory I/O buffer.
+ *
+ * \param ctrlr Controller from which to unmap the memory buffer.
+ */
+void spdk_nvme_ctrlr_unmap_cmb(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Get the transport ID for a given NVMe controller.
+ *
+ * \param ctrlr Controller to get the transport ID.
+ * \return Pointer to the controller's transport ID.
+ */
+const struct spdk_nvme_transport_id *spdk_nvme_ctrlr_get_transport_id(
+ struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Opaque handle for a poll group. A poll group is a collection of spdk_nvme_qpair
+ * objects that are polled for completions as a unit.
+ *
+ * Returned by spdk_nvme_poll_group_create().
+ */
+struct spdk_nvme_poll_group;
+
+
+/**
+ * This function alerts the user to disconnected qpairs when calling
+ * spdk_nvme_poll_group_process_completions.
+ */
+typedef void (*spdk_nvme_disconnected_qpair_cb)(struct spdk_nvme_qpair *qpair,
+ void *poll_group_ctx);
+
+/**
+ * Create a new poll group.
+ *
+ * \param ctx A user supplied context that can be retrieved later with spdk_nvme_poll_group_get_ctx
+ *
+ * \return Pointer to the new poll group, or NULL on error.
+ */
+struct spdk_nvme_poll_group *spdk_nvme_poll_group_create(void *ctx);
+
+/**
+ * Add an spdk_nvme_qpair to a poll group. qpairs may only be added to
+ * a poll group if they are in the disconnected state; i.e. either they were
+ * just allocated and not yet connected or they have been disconnected with a call
+ * to spdk_nvme_ctrlr_disconnect_io_qpair.
+ *
+ * \param group The group to which the qpair will be added.
+ * \param qpair The qpair to add to the poll group.
+ *
+ * return 0 on success, -EINVAL if the qpair is not in the disabled state, -ENODEV if the transport
+ * doesn't exist, -ENOMEM on memory allocation failures, or -EPROTO on a protocol (transport) specific failure.
+ */
+int spdk_nvme_poll_group_add(struct spdk_nvme_poll_group *group, struct spdk_nvme_qpair *qpair);
+
+/**
+ * Remove an spdk_nvme_qpair from a poll group.
+ *
+ * \param group The group from which to remove the qpair.
+ * \param qpair The qpair to remove from the poll group.
+ *
+ * return 0 on success, -ENOENT if the qpair is not found in the group, or -EPROTO on a protocol (transport) specific failure.
+ */
+int spdk_nvme_poll_group_remove(struct spdk_nvme_poll_group *group, struct spdk_nvme_qpair *qpair);
+
+/**
+ * Destroy an empty poll group.
+ *
+ * \param group The group to destroy.
+ *
+ * return 0 on success, -EBUSY if the poll group is not empty.
+ */
+int spdk_nvme_poll_group_destroy(struct spdk_nvme_poll_group *group);
+
+/**
+ * Poll for completions on all qpairs in this poll group.
+ *
+ * the disconnected_qpair_cb will be called for all disconnected qpairs in the poll group
+ * including qpairs which fail within the context of this call.
+ * The user is responsible for trying to reconnect or destroy those qpairs.
+ *
+ * \param group The group on which to poll for completions.
+ * \param completions_per_qpair The maximum number of completions per qpair.
+ * \param disconnected_qpair_cb A callback function of type spdk_nvme_disconnected_qpair_cb. Must be non-NULL.
+ *
+ * return The number of completions across all qpairs, -EINVAL if no disconnected_qpair_cb is passed, or
+ * -EIO if the shared completion queue cannot be polled for the RDMA transport.
+ */
+int64_t spdk_nvme_poll_group_process_completions(struct spdk_nvme_poll_group *group,
+ uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb);
+
+/**
+ * Retrieve the user context for this specific poll group.
+ *
+ * \param group The poll group from which to retrieve the context.
+ *
+ * \return A pointer to the user provided poll group context.
+ */
+void *spdk_nvme_poll_group_get_ctx(struct spdk_nvme_poll_group *group);
+
+/**
+ * Get the identify namespace data as defined by the NVMe specification.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ns Namespace.
+ *
+ * \return a pointer to the namespace data.
+ */
+const struct spdk_nvme_ns_data *spdk_nvme_ns_get_data(struct spdk_nvme_ns *ns);
+
+/**
+ * Get the namespace id (index number) from the given namespace handle.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ns Namespace.
+ *
+ * \return namespace id.
+ */
+uint32_t spdk_nvme_ns_get_id(struct spdk_nvme_ns *ns);
+
+/**
+ * Get the controller with which this namespace is associated.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ns Namespace.
+ *
+ * \return a pointer to the controller.
+ */
+struct spdk_nvme_ctrlr *spdk_nvme_ns_get_ctrlr(struct spdk_nvme_ns *ns);
+
+/**
+ * Determine whether a namespace is active.
+ *
+ * Inactive namespaces cannot be the target of I/O commands.
+ *
+ * \param ns Namespace to query.
+ *
+ * \return true if active, or false if inactive.
+ */
+bool spdk_nvme_ns_is_active(struct spdk_nvme_ns *ns);
+
+/**
+ * Get the maximum transfer size, in bytes, for an I/O sent to the given namespace.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ns Namespace to query.
+ *
+ * \return the maximum transfer size in bytes.
+ */
+uint32_t spdk_nvme_ns_get_max_io_xfer_size(struct spdk_nvme_ns *ns);
+
+/**
+ * Get the sector size, in bytes, of the given namespace.
+ *
+ * This function returns the size of the data sector only. It does not
+ * include metadata size.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ns Namespace to query.
+ *
+ * /return the sector size in bytes.
+ */
+uint32_t spdk_nvme_ns_get_sector_size(struct spdk_nvme_ns *ns);
+
+/**
+ * Get the extended sector size, in bytes, of the given namespace.
+ *
+ * This function returns the size of the data sector plus metadata.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ns Namespace to query.
+ *
+ * /return the extended sector size in bytes.
+ */
+uint32_t spdk_nvme_ns_get_extended_sector_size(struct spdk_nvme_ns *ns);
+
+/**
+ * Get the number of sectors for the given namespace.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ns Namespace to query.
+ *
+ * \return the number of sectors.
+ */
+uint64_t spdk_nvme_ns_get_num_sectors(struct spdk_nvme_ns *ns);
+
+/**
+ * Get the size, in bytes, of the given namespace.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ns Namespace to query.
+ *
+ * \return the size of the given namespace in bytes.
+ */
+uint64_t spdk_nvme_ns_get_size(struct spdk_nvme_ns *ns);
+
+/**
+ * Get the end-to-end data protection information type of the given namespace.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ns Namespace to query.
+ *
+ * \return the end-to-end data protection information type.
+ */
+enum spdk_nvme_pi_type spdk_nvme_ns_get_pi_type(struct spdk_nvme_ns *ns);
+
+/**
+ * Get the metadata size, in bytes, of the given namespace.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ns Namespace to query.
+ *
+ * \return the metadata size of the given namespace in bytes.
+ */
+uint32_t spdk_nvme_ns_get_md_size(struct spdk_nvme_ns *ns);
+
+/**
+ * Check whether if the namespace can support extended LBA when end-to-end data
+ * protection enabled.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ns Namespace to query.
+ *
+ * \return true if the namespace can support extended LBA when end-to-end data
+ * protection enabled, or false otherwise.
+ */
+bool spdk_nvme_ns_supports_extended_lba(struct spdk_nvme_ns *ns);
+
+/**
+ * Check whether if the namespace supports compare operation
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ns Namespace to query.
+ *
+ * \return true if the namespace supports compare operation, or false otherwise.
+ */
+bool spdk_nvme_ns_supports_compare(struct spdk_nvme_ns *ns);
+
+/**
+ * Determine the value returned when reading deallocated blocks.
+ *
+ * If deallocated blocks return 0, the deallocate command can be used as a more
+ * efficient alternative to the write_zeroes command, especially for large requests.
+ *
+ * \param ns Namespace.
+ *
+ * \return the logical block read value.
+ */
+enum spdk_nvme_dealloc_logical_block_read_value spdk_nvme_ns_get_dealloc_logical_block_read_value(
+ struct spdk_nvme_ns *ns);
+
+/**
+ * Get the optimal I/O boundary, in blocks, for the given namespace.
+ *
+ * Read and write commands should not cross the optimal I/O boundary for best
+ * performance.
+ *
+ * \param ns Namespace to query.
+ *
+ * \return Optimal granularity of I/O commands, in blocks, or 0 if no optimal
+ * granularity is reported.
+ */
+uint32_t spdk_nvme_ns_get_optimal_io_boundary(struct spdk_nvme_ns *ns);
+
+/**
+ * Get the UUID for the given namespace.
+ *
+ * \param ns Namespace to query.
+ *
+ * \return a pointer to namespace UUID, or NULL if ns does not have a UUID.
+ */
+const struct spdk_uuid *spdk_nvme_ns_get_uuid(const struct spdk_nvme_ns *ns);
+
+/**
+ * \brief Namespace command support flags.
+ */
+enum spdk_nvme_ns_flags {
+ SPDK_NVME_NS_DEALLOCATE_SUPPORTED = 0x1, /**< The deallocate command is supported */
+ SPDK_NVME_NS_FLUSH_SUPPORTED = 0x2, /**< The flush command is supported */
+ SPDK_NVME_NS_RESERVATION_SUPPORTED = 0x4, /**< The reservation command is supported */
+ SPDK_NVME_NS_WRITE_ZEROES_SUPPORTED = 0x8, /**< The write zeroes command is supported */
+ SPDK_NVME_NS_DPS_PI_SUPPORTED = 0x10, /**< The end-to-end data protection is supported */
+ SPDK_NVME_NS_EXTENDED_LBA_SUPPORTED = 0x20, /**< The extended lba format is supported,
+ metadata is transferred as a contiguous
+ part of the logical block that it is associated with */
+ SPDK_NVME_NS_WRITE_UNCORRECTABLE_SUPPORTED = 0x40, /**< The write uncorrectable command is supported */
+ SPDK_NVME_NS_COMPARE_SUPPORTED = 0x80, /**< The compare command is supported */
+};
+
+/**
+ * Get the flags for the given namespace.
+ *
+ * See spdk_nvme_ns_flags for the possible flags returned.
+ *
+ * This function is thread safe and can be called at any point while the controller
+ * is attached to the SPDK NVMe driver.
+ *
+ * \param ns Namespace to query.
+ *
+ * \return the flags for the given namespace.
+ */
+uint32_t spdk_nvme_ns_get_flags(struct spdk_nvme_ns *ns);
+
+/**
+ * Restart the SGL walk to the specified offset when the command has scattered payloads.
+ *
+ * \param cb_arg Argument passed to readv/writev.
+ * \param offset Offset for SGL.
+ */
+typedef void (*spdk_nvme_req_reset_sgl_cb)(void *cb_arg, uint32_t offset);
+
+/**
+ * Fill out *address and *length with the current SGL entry and advance to the next
+ * entry for the next time the callback is invoked.
+ *
+ * The described segment must be physically contiguous.
+ *
+ * \param cb_arg Argument passed to readv/writev.
+ * \param address Virtual address of this segment, a value of UINT64_MAX
+ * means the segment should be described via Bit Bucket SGL.
+ * \param length Length of this physical segment.
+ */
+typedef int (*spdk_nvme_req_next_sge_cb)(void *cb_arg, void **address, uint32_t *length);
+
+/**
+ * Submit a write I/O to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ns NVMe namespace to submit the write I/O.
+ * \param qpair I/O queue pair to submit the request.
+ * \param payload Virtual address pointer to the data payload.
+ * \param lba Starting LBA to write the data.
+ * \param lba_count Length (in sectors) for the write operation.
+ * \param cb_fn Callback function to invoke when the I/O is completed.
+ * \param cb_arg Argument to pass to the callback function.
+ * \param io_flags Set flags, defined by the SPDK_NVME_IO_FLAGS_* entries in
+ * spdk/nvme_spec.h, for this I/O.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -EINVAL: The request is malformed.
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ns_cmd_write(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *payload,
+ uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn,
+ void *cb_arg, uint32_t io_flags);
+
+/**
+ * Submit a write I/O to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ns NVMe namespace to submit the write I/O.
+ * \param qpair I/O queue pair to submit the request.
+ * \param lba Starting LBA to write the data.
+ * \param lba_count Length (in sectors) for the write operation.
+ * \param cb_fn Callback function to invoke when the I/O is completed.
+ * \param cb_arg Argument to pass to the callback function.
+ * \param io_flags Set flags, defined in nvme_spec.h, for this I/O.
+ * \param reset_sgl_fn Callback function to reset scattered payload.
+ * \param next_sge_fn Callback function to iterate each scattered payload memory
+ * segment.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -EINVAL: The request is malformed.
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ns_cmd_writev(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+ uint64_t lba, uint32_t lba_count,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
+ spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
+ spdk_nvme_req_next_sge_cb next_sge_fn);
+
+/**
+ * Submit a write I/O to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ns NVMe namespace to submit the write I/O
+ * \param qpair I/O queue pair to submit the request
+ * \param lba starting LBA to write the data
+ * \param lba_count length (in sectors) for the write operation
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ * \param io_flags set flags, defined in nvme_spec.h, for this I/O
+ * \param reset_sgl_fn callback function to reset scattered payload
+ * \param next_sge_fn callback function to iterate each scattered
+ * payload memory segment
+ * \param metadata virtual address pointer to the metadata payload, the length
+ * of metadata is specified by spdk_nvme_ns_get_md_size()
+ * \param apptag_mask application tag mask.
+ * \param apptag application tag to use end-to-end protection information.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -EINVAL: The request is malformed.
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ns_cmd_writev_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+ uint64_t lba, uint32_t lba_count,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
+ spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
+ spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata,
+ uint16_t apptag_mask, uint16_t apptag);
+
+/**
+ * Submit a write I/O to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ns NVMe namespace to submit the write I/O.
+ * \param qpair I/O queue pair to submit the request.
+ * \param payload Virtual address pointer to the data payload.
+ * \param metadata Virtual address pointer to the metadata payload, the length
+ * of metadata is specified by spdk_nvme_ns_get_md_size().
+ * \param lba Starting LBA to write the data.
+ * \param lba_count Length (in sectors) for the write operation.
+ * \param cb_fn Callback function to invoke when the I/O is completed.
+ * \param cb_arg Argument to pass to the callback function.
+ * \param io_flags Set flags, defined by the SPDK_NVME_IO_FLAGS_* entries in
+ * spdk/nvme_spec.h, for this I/O.
+ * \param apptag_mask Application tag mask.
+ * \param apptag Application tag to use end-to-end protection information.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -EINVAL: The request is malformed.
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ns_cmd_write_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+ void *payload, void *metadata,
+ uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn,
+ void *cb_arg, uint32_t io_flags,
+ uint16_t apptag_mask, uint16_t apptag);
+
+/**
+ * Submit a write zeroes I/O to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ns NVMe namespace to submit the write zeroes I/O.
+ * \param qpair I/O queue pair to submit the request.
+ * \param lba Starting LBA for this command.
+ * \param lba_count Length (in sectors) for the write zero operation.
+ * \param cb_fn Callback function to invoke when the I/O is completed.
+ * \param cb_arg Argument to pass to the callback function.
+ * \param io_flags Set flags, defined by the SPDK_NVME_IO_FLAGS_* entries in
+ * spdk/nvme_spec.h, for this I/O.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -EINVAL: The request is malformed.
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ns_cmd_write_zeroes(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+ uint64_t lba, uint32_t lba_count,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+ uint32_t io_flags);
+
+/**
+ * Submit a write uncorrectable I/O to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ns NVMe namespace to submit the write uncorrectable I/O.
+ * \param qpair I/O queue pair to submit the request.
+ * \param lba Starting LBA for this command.
+ * \param lba_count Length (in sectors) for the write uncorrectable operation.
+ * \param cb_fn Callback function to invoke when the I/O is completed.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -EINVAL: The request is malformed.
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ns_cmd_write_uncorrectable(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+ uint64_t lba, uint32_t lba_count,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ * \brief Submits a read I/O to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ns NVMe namespace to submit the read I/O.
+ * \param qpair I/O queue pair to submit the request.
+ * \param payload Virtual address pointer to the data payload.
+ * \param lba Starting LBA to read the data.
+ * \param lba_count Length (in sectors) for the read operation.
+ * \param cb_fn Callback function to invoke when the I/O is completed.
+ * \param cb_arg Argument to pass to the callback function.
+ * \param io_flags Set flags, defined in nvme_spec.h, for this I/O.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -EINVAL: The request is malformed.
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ns_cmd_read(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *payload,
+ uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn,
+ void *cb_arg, uint32_t io_flags);
+
+/**
+ * Submit a read I/O to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ns NVMe namespace to submit the read I/O.
+ * \param qpair I/O queue pair to submit the request.
+ * \param lba Starting LBA to read the data.
+ * \param lba_count Length (in sectors) for the read operation.
+ * \param cb_fn Callback function to invoke when the I/O is completed.
+ * \param cb_arg Argument to pass to the callback function.
+ * \param io_flags Set flags, defined in nvme_spec.h, for this I/O.
+ * \param reset_sgl_fn Callback function to reset scattered payload.
+ * \param next_sge_fn Callback function to iterate each scattered payload memory
+ * segment.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -EINVAL: The request is malformed.
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ns_cmd_readv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+ uint64_t lba, uint32_t lba_count,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
+ spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
+ spdk_nvme_req_next_sge_cb next_sge_fn);
+
+/**
+ * Submit a read I/O to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any given time.
+ *
+ * \param ns NVMe namespace to submit the read I/O
+ * \param qpair I/O queue pair to submit the request
+ * \param lba starting LBA to read the data
+ * \param lba_count length (in sectors) for the read operation
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ * \param io_flags set flags, defined in nvme_spec.h, for this I/O
+ * \param reset_sgl_fn callback function to reset scattered payload
+ * \param next_sge_fn callback function to iterate each scattered
+ * payload memory segment
+ * \param metadata virtual address pointer to the metadata payload, the length
+ * of metadata is specified by spdk_nvme_ns_get_md_size()
+ * \param apptag_mask application tag mask.
+ * \param apptag application tag to use end-to-end protection information.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -EINVAL: The request is malformed.
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ns_cmd_readv_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+ uint64_t lba, uint32_t lba_count,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
+ spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
+ spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata,
+ uint16_t apptag_mask, uint16_t apptag);
+
+/**
+ * Submits a read I/O to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ns NVMe namespace to submit the read I/O
+ * \param qpair I/O queue pair to submit the request
+ * \param payload virtual address pointer to the data payload
+ * \param metadata virtual address pointer to the metadata payload, the length
+ * of metadata is specified by spdk_nvme_ns_get_md_size().
+ * \param lba starting LBA to read the data.
+ * \param lba_count Length (in sectors) for the read operation.
+ * \param cb_fn Callback function to invoke when the I/O is completed.
+ * \param cb_arg Argument to pass to the callback function.
+ * \param io_flags Set flags, defined in nvme_spec.h, for this I/O.
+ * \param apptag_mask Application tag mask.
+ * \param apptag Application tag to use end-to-end protection information.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -EINVAL: The request is malformed.
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ns_cmd_read_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+ void *payload, void *metadata,
+ uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn,
+ void *cb_arg, uint32_t io_flags,
+ uint16_t apptag_mask, uint16_t apptag);
+
+/**
+ * Submit a data set management request to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * This is a convenience wrapper that will automatically allocate and construct
+ * the correct data buffers. Therefore, ranges does not need to be allocated from
+ * pinned memory and can be placed on the stack. If a higher performance, zero-copy
+ * version of DSM is required, simply build and submit a raw command using
+ * spdk_nvme_ctrlr_cmd_io_raw().
+ *
+ * \param ns NVMe namespace to submit the DSM request
+ * \param type A bit field constructed from \ref spdk_nvme_dsm_attribute.
+ * \param qpair I/O queue pair to submit the request
+ * \param ranges An array of \ref spdk_nvme_dsm_range elements describing the LBAs
+ * to operate on.
+ * \param num_ranges The number of elements in the ranges array.
+ * \param cb_fn Callback function to invoke when the I/O is completed
+ * \param cb_arg Argument to pass to the callback function
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ */
+int spdk_nvme_ns_cmd_dataset_management(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+ uint32_t type,
+ const struct spdk_nvme_dsm_range *ranges,
+ uint16_t num_ranges,
+ spdk_nvme_cmd_cb cb_fn,
+ void *cb_arg);
+
+/**
+ * Submit a flush request to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ns NVMe namespace to submit the flush request.
+ * \param qpair I/O queue pair to submit the request.
+ * \param cb_fn Callback function to invoke when the I/O is completed.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ */
+int spdk_nvme_ns_cmd_flush(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ * Submit a reservation register to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ns NVMe namespace to submit the reservation register request.
+ * \param qpair I/O queue pair to submit the request.
+ * \param payload Virtual address pointer to the reservation register data.
+ * \param ignore_key '1' the current reservation key check is disabled.
+ * \param action Specifies the registration action.
+ * \param cptpl Change the Persist Through Power Loss state.
+ * \param cb_fn Callback function to invoke when the I/O is completed.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ns_cmd_reservation_register(struct spdk_nvme_ns *ns,
+ struct spdk_nvme_qpair *qpair,
+ struct spdk_nvme_reservation_register_data *payload,
+ bool ignore_key,
+ enum spdk_nvme_reservation_register_action action,
+ enum spdk_nvme_reservation_register_cptpl cptpl,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ * Submits a reservation release to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ns NVMe namespace to submit the reservation release request.
+ * \param qpair I/O queue pair to submit the request.
+ * \param payload Virtual address pointer to current reservation key.
+ * \param ignore_key '1' the current reservation key check is disabled.
+ * \param action Specifies the reservation release action.
+ * \param type Reservation type for the namespace.
+ * \param cb_fn Callback function to invoke when the I/O is completed.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ns_cmd_reservation_release(struct spdk_nvme_ns *ns,
+ struct spdk_nvme_qpair *qpair,
+ struct spdk_nvme_reservation_key_data *payload,
+ bool ignore_key,
+ enum spdk_nvme_reservation_release_action action,
+ enum spdk_nvme_reservation_type type,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ * Submits a reservation acquire to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ns NVMe namespace to submit the reservation acquire request.
+ * \param qpair I/O queue pair to submit the request.
+ * \param payload Virtual address pointer to reservation acquire data.
+ * \param ignore_key '1' the current reservation key check is disabled.
+ * \param action Specifies the reservation acquire action.
+ * \param type Reservation type for the namespace.
+ * \param cb_fn Callback function to invoke when the I/O is completed.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ns_cmd_reservation_acquire(struct spdk_nvme_ns *ns,
+ struct spdk_nvme_qpair *qpair,
+ struct spdk_nvme_reservation_acquire_data *payload,
+ bool ignore_key,
+ enum spdk_nvme_reservation_acquire_action action,
+ enum spdk_nvme_reservation_type type,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ * Submit a reservation report to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ns NVMe namespace to submit the reservation report request.
+ * \param qpair I/O queue pair to submit the request.
+ * \param payload Virtual address pointer for reservation status data.
+ * \param len Length bytes for reservation status data structure.
+ * \param cb_fn Callback function to invoke when the I/O is completed.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ns_cmd_reservation_report(struct spdk_nvme_ns *ns,
+ struct spdk_nvme_qpair *qpair,
+ void *payload, uint32_t len,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ * Submit a compare I/O to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ns NVMe namespace to submit the compare I/O.
+ * \param qpair I/O queue pair to submit the request.
+ * \param payload Virtual address pointer to the data payload.
+ * \param lba Starting LBA to compare the data.
+ * \param lba_count Length (in sectors) for the compare operation.
+ * \param cb_fn Callback function to invoke when the I/O is completed.
+ * \param cb_arg Argument to pass to the callback function.
+ * \param io_flags Set flags, defined in nvme_spec.h, for this I/O.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -EINVAL: The request is malformed.
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ns_cmd_compare(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *payload,
+ uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn,
+ void *cb_arg, uint32_t io_flags);
+
+/**
+ * Submit a compare I/O to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ns NVMe namespace to submit the compare I/O.
+ * \param qpair I/O queue pair to submit the request.
+ * \param lba Starting LBA to compare the data.
+ * \param lba_count Length (in sectors) for the compare operation.
+ * \param cb_fn Callback function to invoke when the I/O is completed.
+ * \param cb_arg Argument to pass to the callback function.
+ * \param io_flags Set flags, defined in nvme_spec.h, for this I/O.
+ * \param reset_sgl_fn Callback function to reset scattered payload.
+ * \param next_sge_fn Callback function to iterate each scattered payload memory
+ * segment.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -EINVAL: The request is malformed.
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ns_cmd_comparev(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+ uint64_t lba, uint32_t lba_count,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
+ spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
+ spdk_nvme_req_next_sge_cb next_sge_fn);
+
+/**
+ * Submit a compare I/O to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ns NVMe namespace to submit the compare I/O.
+ * \param qpair I/O queue pair to submit the request.
+ * \param lba Starting LBA to compare the data.
+ * \param lba_count Length (in sectors) for the compare operation.
+ * \param cb_fn Callback function to invoke when the I/O is completed.
+ * \param cb_arg Argument to pass to the callback function.
+ * \param io_flags Set flags, defined in nvme_spec.h, for this I/O.
+ * \param reset_sgl_fn Callback function to reset scattered payload.
+ * \param next_sge_fn Callback function to iterate each scattered payload memory
+ * segment.
+ * \param metadata Virtual address pointer to the metadata payload, the length
+ * of metadata is specified by spdk_nvme_ns_get_md_size()
+ * \param apptag_mask Application tag mask.
+ * \param apptag Application tag to use end-to-end protection information.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -EINVAL: The request is malformed.
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int
+spdk_nvme_ns_cmd_comparev_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+ uint64_t lba, uint32_t lba_count,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
+ spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
+ spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata,
+ uint16_t apptag_mask, uint16_t apptag);
+
+/**
+ * Submit a compare I/O to the specified NVMe namespace.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * \param ns NVMe namespace to submit the compare I/O.
+ * \param qpair I/O queue pair to submit the request.
+ * \param payload Virtual address pointer to the data payload.
+ * \param metadata Virtual address pointer to the metadata payload, the length
+ * of metadata is specified by spdk_nvme_ns_get_md_size().
+ * \param lba Starting LBA to compare the data.
+ * \param lba_count Length (in sectors) for the compare operation.
+ * \param cb_fn Callback function to invoke when the I/O is completed.
+ * \param cb_arg Argument to pass to the callback function.
+ * \param io_flags Set flags, defined in nvme_spec.h, for this I/O.
+ * \param apptag_mask Application tag mask.
+ * \param apptag Application tag to use end-to-end protection information.
+ *
+ * \return 0 if successfully submitted, negated errnos on the following error conditions:
+ * -EINVAL: The request is malformed.
+ * -ENOMEM: The request cannot be allocated.
+ * -ENXIO: The qpair is failed at the transport level.
+ * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
+ * with error status including dnr=1 in this case.
+ */
+int spdk_nvme_ns_cmd_compare_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+ void *payload, void *metadata,
+ uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn,
+ void *cb_arg, uint32_t io_flags,
+ uint16_t apptag_mask, uint16_t apptag);
+
+/**
+ * \brief Inject an error for the next request with a given opcode.
+ *
+ * \param ctrlr NVMe controller.
+ * \param qpair I/O queue pair to add the error command,
+ * NULL for Admin queue pair.
+ * \param opc Opcode for Admin or I/O commands.
+ * \param do_not_submit True if matching requests should not be submitted
+ * to the controller, but instead completed manually
+ * after timeout_in_us has expired. False if matching
+ * requests should be submitted to the controller and
+ * have their completion status modified after the
+ * controller completes the request.
+ * \param timeout_in_us Wait specified microseconds when do_not_submit is true.
+ * \param err_count Number of matching requests to inject errors.
+ * \param sct Status code type.
+ * \param sc Status code.
+ *
+ * \return 0 if successfully enabled, ENOMEM if an error command
+ * structure cannot be allocated.
+ *
+ * The function can be called multiple times to inject errors for different
+ * commands. If the opcode matches an existing entry, the existing entry
+ * will be updated with the values specified.
+ */
+int spdk_nvme_qpair_add_cmd_error_injection(struct spdk_nvme_ctrlr *ctrlr,
+ struct spdk_nvme_qpair *qpair,
+ uint8_t opc,
+ bool do_not_submit,
+ uint64_t timeout_in_us,
+ uint32_t err_count,
+ uint8_t sct, uint8_t sc);
+
+/**
+ * \brief Clear the specified NVMe command with error status.
+ *
+ * \param ctrlr NVMe controller.
+ * \param qpair I/O queue pair to remove the error command,
+ * \ NULL for Admin queue pair.
+ * \param opc Opcode for Admin or I/O commands.
+ *
+ * The function will remove specified command in the error list.
+ */
+void spdk_nvme_qpair_remove_cmd_error_injection(struct spdk_nvme_ctrlr *ctrlr,
+ struct spdk_nvme_qpair *qpair,
+ uint8_t opc);
+
+/**
+ * \brief Given NVMe status, return ASCII string for that error.
+ *
+ * \param status Status from NVMe completion queue element.
+ * \return Returns status as an ASCII string.
+ */
+const char *spdk_nvme_cpl_get_status_string(const struct spdk_nvme_status *status);
+
+/**
+ * \brief Prints (SPDK_NOTICELOG) the contents of an NVMe submission queue entry (command).
+ *
+ * \param qpair Pointer to the NVMe queue pair - used to determine admin versus I/O queue.
+ * \param cmd Pointer to the submission queue command to be formatted.
+ */
+void spdk_nvme_qpair_print_command(struct spdk_nvme_qpair *qpair,
+ struct spdk_nvme_cmd *cmd);
+
+/**
+ * \brief Prints (SPDK_NOTICELOG) the contents of an NVMe completion queue entry.
+ *
+ * \param qpair Pointer to the NVMe queue pair - presently unused.
+ * \param cpl Pointer to the completion queue element to be formatted.
+ */
+void spdk_nvme_qpair_print_completion(struct spdk_nvme_qpair *qpair,
+ struct spdk_nvme_cpl *cpl);
+
+/**
+ * \brief Prints (SPDK_NOTICELOG) the contents of an NVMe submission queue entry (command).
+ *
+ * \param qid Queue identifier.
+ * \param cmd Pointer to the submission queue command to be formatted.
+ */
+void spdk_nvme_print_command(uint16_t qid, struct spdk_nvme_cmd *cmd);
+
+/**
+ * \brief Prints (SPDK_NOTICELOG) the contents of an NVMe completion queue entry.
+ *
+ * \param qid Queue identifier.
+ * \param cpl Pointer to the completion queue element to be formatted.
+ */
+void spdk_nvme_print_completion(uint16_t qid, struct spdk_nvme_cpl *cpl);
+
+struct ibv_context;
+struct ibv_pd;
+struct ibv_mr;
+
+/**
+ * RDMA Transport Hooks
+ */
+struct spdk_nvme_rdma_hooks {
+ /**
+ * \brief Get an InfiniBand Verbs protection domain.
+ *
+ * \param trid the transport id
+ * \param verbs Infiniband verbs context
+ *
+ * \return pd of the nvme ctrlr
+ */
+ struct ibv_pd *(*get_ibv_pd)(const struct spdk_nvme_transport_id *trid,
+ struct ibv_context *verbs);
+
+ /**
+ * \brief Get an InfiniBand Verbs memory region for a buffer.
+ *
+ * \param pd The protection domain returned from get_ibv_pd
+ * \param buf Memory buffer for which an rkey should be returned.
+ * \param size size of buf
+ *
+ * \return Infiniband remote key (rkey) for this buf
+ */
+ uint64_t (*get_rkey)(struct ibv_pd *pd, void *buf, size_t size);
+
+ /**
+ * \brief Put back keys got from get_rkey.
+ *
+ * \param key The Infiniband remote key (rkey) got from get_rkey
+ *
+ */
+ void (*put_rkey)(uint64_t key);
+};
+
+/**
+ * \brief Set the global hooks for the RDMA transport, if necessary.
+ *
+ * This call is optional and must be performed prior to probing for
+ * any devices. By default, the RDMA transport will use the ibverbs
+ * library to create protection domains and register memory. This
+ * is a mechanism to subvert that and use an existing registration.
+ *
+ * This function may only be called one time per process.
+ *
+ * \param hooks for initializing global hooks
+ */
+void spdk_nvme_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks);
+
+/**
+ * Get name of cuse device associated with NVMe controller.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ * \param name Buffer of be filled with cuse device name.
+ * \param size Size of name buffer.
+ *
+ * \return 0 on success. Negated errno on the following error conditions:
+ * -ENODEV: No cuse device registered for the controller.
+ * -ENSPC: Too small buffer size passed. Value of size pointer changed to the required length.
+ */
+int spdk_nvme_cuse_get_ctrlr_name(struct spdk_nvme_ctrlr *ctrlr, char *name, size_t *size);
+
+/**
+ * Get name of cuse device associated with NVMe namespace.
+ *
+ * \param ctrlr Opaque handle to NVMe controller.
+ * \param nsid Namespace id.
+ * \param name Buffer of be filled with cuse device name.
+ * \param size Size of name buffer.
+ *
+ * \return 0 on success. Negated errno on the following error conditions:
+ * -ENODEV: No cuse device registered for the namespace.
+ * -ENSPC: Too small buffer size passed. Value of size pointer changed to the required length.
+ */
+int spdk_nvme_cuse_get_ns_name(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
+ char *name, size_t *size);
+
+/**
+ * Create a character device at the path specified (Experimental)
+ *
+ * The character device can handle ioctls and is compatible with a standard
+ * Linux kernel NVMe device. Tools such as nvme-cli can be used to configure
+ * SPDK devices through this interface.
+ *
+ * The user is expected to be polling the admin qpair for this controller periodically
+ * for the CUSE device to function.
+ *
+ * \param ctrlr Opaque handle to the NVMe controller.
+ *
+ * \return 0 on success. Negated errno on failure.
+ */
+int spdk_nvme_cuse_register(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * Remove a previously created character device (Experimental)
+ *
+ * \param ctrlr Opaque handle to the NVMe controller.
+ *
+ * \return 0 on success. Negated errno on failure.
+ */
+int spdk_nvme_cuse_unregister(struct spdk_nvme_ctrlr *ctrlr);
+
+int spdk_nvme_map_prps(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs,
+ uint32_t len, size_t mps,
+ void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len));
+
+/**
+ * Opaque handle for a transport poll group. Used by the transport function table.
+ */
+struct spdk_nvme_transport_poll_group;
+
+/**
+ * Update and populate namespace CUSE devices (Experimental)
+ *
+ * \param ctrlr Opaque handle to the NVMe controller.
+ *
+ */
+void spdk_nvme_cuse_update_namespaces(struct spdk_nvme_ctrlr *ctrlr);
+
+struct nvme_request;
+
+struct spdk_nvme_transport;
+
+struct spdk_nvme_transport_ops {
+ char name[SPDK_NVMF_TRSTRING_MAX_LEN + 1];
+
+ enum spdk_nvme_transport_type type;
+
+ struct spdk_nvme_ctrlr *(*ctrlr_construct)(const struct spdk_nvme_transport_id *trid,
+ const struct spdk_nvme_ctrlr_opts *opts,
+ void *devhandle);
+
+ int (*ctrlr_scan)(struct spdk_nvme_probe_ctx *probe_ctx, bool direct_connect);
+
+ int (*ctrlr_destruct)(struct spdk_nvme_ctrlr *ctrlr);
+
+ int (*ctrlr_enable)(struct spdk_nvme_ctrlr *ctrlr);
+
+ int (*ctrlr_set_reg_4)(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value);
+
+ int (*ctrlr_set_reg_8)(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value);
+
+ int (*ctrlr_get_reg_4)(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value);
+
+ int (*ctrlr_get_reg_8)(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value);
+
+ uint32_t (*ctrlr_get_max_xfer_size)(struct spdk_nvme_ctrlr *ctrlr);
+
+ uint16_t (*ctrlr_get_max_sges)(struct spdk_nvme_ctrlr *ctrlr);
+
+ int (*ctrlr_reserve_cmb)(struct spdk_nvme_ctrlr *ctrlr);
+
+ void *(*ctrlr_map_cmb)(struct spdk_nvme_ctrlr *ctrlr, size_t *size);
+
+ int (*ctrlr_unmap_cmb)(struct spdk_nvme_ctrlr *ctrlr);
+
+ struct spdk_nvme_qpair *(*ctrlr_create_io_qpair)(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
+ const struct spdk_nvme_io_qpair_opts *opts);
+
+ int (*ctrlr_delete_io_qpair)(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair);
+
+ int (*ctrlr_connect_qpair)(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair);
+
+ void (*ctrlr_disconnect_qpair)(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair);
+
+ void (*qpair_abort_reqs)(struct spdk_nvme_qpair *qpair, uint32_t dnr);
+
+ int (*qpair_reset)(struct spdk_nvme_qpair *qpair);
+
+ int (*qpair_submit_request)(struct spdk_nvme_qpair *qpair, struct nvme_request *req);
+
+ int32_t (*qpair_process_completions)(struct spdk_nvme_qpair *qpair, uint32_t max_completions);
+
+ int (*qpair_iterate_requests)(struct spdk_nvme_qpair *qpair,
+ int (*iter_fn)(struct nvme_request *req, void *arg),
+ void *arg);
+
+ void (*admin_qpair_abort_aers)(struct spdk_nvme_qpair *qpair);
+
+ struct spdk_nvme_transport_poll_group *(*poll_group_create)(void);
+
+ int (*poll_group_add)(struct spdk_nvme_transport_poll_group *tgroup, struct spdk_nvme_qpair *qpair);
+
+ int (*poll_group_remove)(struct spdk_nvme_transport_poll_group *tgroup,
+ struct spdk_nvme_qpair *qpair);
+
+ int (*poll_group_connect_qpair)(struct spdk_nvme_qpair *qpair);
+
+ int (*poll_group_disconnect_qpair)(struct spdk_nvme_qpair *qpair);
+
+ int64_t (*poll_group_process_completions)(struct spdk_nvme_transport_poll_group *tgroup,
+ uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb);
+
+ int (*poll_group_destroy)(struct spdk_nvme_transport_poll_group *tgroup);
+};
+
+/**
+ * Register the operations for a given transport type.
+ *
+ * This function should be invoked by referencing the macro
+ * SPDK_NVME_TRANSPORT_REGISTER macro in the transport's .c file.
+ *
+ * \param ops The operations associated with an NVMe-oF transport.
+ */
+void spdk_nvme_transport_register(const struct spdk_nvme_transport_ops *ops);
+
+/*
+ * Macro used to register new transports.
+ */
+#define SPDK_NVME_TRANSPORT_REGISTER(name, transport_ops) \
+static void __attribute__((constructor)) _spdk_nvme_transport_register_##name(void) \
+{ \
+ spdk_nvme_transport_register(transport_ops); \
+}\
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/nvme_intel.h b/src/spdk/include/spdk/nvme_intel.h
new file mode 100644
index 000000000..2814e2a7c
--- /dev/null
+++ b/src/spdk/include/spdk/nvme_intel.h
@@ -0,0 +1,218 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * Intel NVMe vendor-specific definitions
+ *
+ * Reference:
+ * http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/ssd-dc-p3700-spec.pdf
+ */
+
+#ifndef SPDK_NVME_INTEL_H
+#define SPDK_NVME_INTEL_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "spdk/assert.h"
+
+enum spdk_nvme_intel_feat {
+ SPDK_NVME_INTEL_FEAT_MAX_LBA = 0xC1,
+ SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA = 0xC2,
+ SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING = 0xC6,
+ SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS = 0xC8,
+ SPDK_NVME_INTEL_FEAT_LED_PATTERN = 0xC9,
+ SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS = 0xD5,
+ SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING = 0xE2,
+};
+
+enum spdk_nvme_intel_set_max_lba_command_status_code {
+ SPDK_NVME_INTEL_EXCEEDS_AVAILABLE_CAPACITY = 0xC0,
+ SPDK_NVME_INTEL_SMALLER_THAN_MIN_LIMIT = 0xC1,
+ SPDK_NVME_INTEL_SMALLER_THAN_NS_REQUIREMENTS = 0xC2,
+};
+
+enum spdk_nvme_intel_log_page {
+ SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY = 0xC0,
+ SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY = 0xC1,
+ SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY = 0xC2,
+ SPDK_NVME_INTEL_LOG_TEMPERATURE = 0xC5,
+ SPDK_NVME_INTEL_LOG_SMART = 0xCA,
+ SPDK_NVME_INTEL_MARKETING_DESCRIPTION = 0xDD,
+};
+
+enum spdk_nvme_intel_smart_attribute_code {
+ SPDK_NVME_INTEL_SMART_PROGRAM_FAIL_COUNT = 0xAB,
+ SPDK_NVME_INTEL_SMART_ERASE_FAIL_COUNT = 0xAC,
+ SPDK_NVME_INTEL_SMART_WEAR_LEVELING_COUNT = 0xAD,
+ SPDK_NVME_INTEL_SMART_E2E_ERROR_COUNT = 0xB8,
+ SPDK_NVME_INTEL_SMART_CRC_ERROR_COUNT = 0xC7,
+ SPDK_NVME_INTEL_SMART_MEDIA_WEAR = 0xE2,
+ SPDK_NVME_INTEL_SMART_HOST_READ_PERCENTAGE = 0xE3,
+ SPDK_NVME_INTEL_SMART_TIMER = 0xE4,
+ SPDK_NVME_INTEL_SMART_THERMAL_THROTTLE_STATUS = 0xEA,
+ SPDK_NVME_INTEL_SMART_RETRY_BUFFER_OVERFLOW_COUNTER = 0xF0,
+ SPDK_NVME_INTEL_SMART_PLL_LOCK_LOSS_COUNT = 0xF3,
+ SPDK_NVME_INTEL_SMART_NAND_BYTES_WRITTEN = 0xF4,
+ SPDK_NVME_INTEL_SMART_HOST_BYTES_WRITTEN = 0xF5,
+};
+
+struct spdk_nvme_intel_log_page_directory {
+ uint8_t version[2];
+ uint8_t reserved[384];
+ uint8_t read_latency_log_len;
+ uint8_t reserved2;
+ uint8_t write_latency_log_len;
+ uint8_t reserved3[5];
+ uint8_t temperature_statistics_log_len;
+ uint8_t reserved4[9];
+ uint8_t smart_log_len;
+ uint8_t reserved5[37];
+ uint8_t marketing_description_log_len;
+ uint8_t reserved6[69];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_intel_log_page_directory) == 512, "Incorrect size");
+
+struct spdk_nvme_intel_rw_latency_page {
+ uint16_t major_revison;
+ uint16_t minor_revison;
+ uint32_t buckets_32us[32];
+ uint32_t buckets_1ms[31];
+ uint32_t buckets_32ms[31];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_intel_rw_latency_page) == 380, "Incorrect size");
+
+struct spdk_nvme_intel_temperature_page {
+ uint64_t current_temperature;
+ uint64_t shutdown_flag_last;
+ uint64_t shutdown_flag_life;
+ uint64_t highest_temperature;
+ uint64_t lowest_temperature;
+ uint64_t reserved[5];
+ uint64_t specified_max_op_temperature;
+ uint64_t reserved2;
+ uint64_t specified_min_op_temperature;
+ uint64_t estimated_offset;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_intel_temperature_page) == 112, "Incorrect size");
+
+struct spdk_nvme_intel_smart_attribute {
+ uint8_t code;
+ uint8_t reserved[2];
+ uint8_t normalized_value;
+ uint8_t reserved2;
+ uint8_t raw_value[6];
+ uint8_t reserved3;
+};
+
+struct __attribute__((packed)) spdk_nvme_intel_smart_information_page {
+ struct spdk_nvme_intel_smart_attribute attributes[13];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_intel_smart_information_page) == 156, "Incorrect size");
+
+union spdk_nvme_intel_feat_power_governor {
+ uint32_t raw;
+ struct {
+ /** power governor setting : 00h = 25W 01h = 20W 02h = 10W */
+ uint32_t power_governor_setting : 8;
+ uint32_t reserved : 24;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_intel_feat_power_governor) == 4, "Incorrect size");
+
+union spdk_nvme_intel_feat_smbus_address {
+ uint32_t raw;
+ struct {
+ uint32_t reserved : 1;
+ uint32_t smbus_controller_address : 8;
+ uint32_t reserved2 : 23;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_intel_feat_smbus_address) == 4, "Incorrect size");
+
+union spdk_nvme_intel_feat_led_pattern {
+ uint32_t raw;
+ struct {
+ uint32_t feature_options : 24;
+ uint32_t value : 8;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_intel_feat_led_pattern) == 4, "Incorrect size");
+
+union spdk_nvme_intel_feat_reset_timed_workload_counters {
+ uint32_t raw;
+ struct {
+ /**
+ * Write Usage: 00 = NOP, 1 = Reset E2, E3,E4 counters;
+ * Read Usage: Not Supported
+ */
+ uint32_t reset : 1;
+ uint32_t reserved : 31;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_intel_feat_reset_timed_workload_counters) == 4,
+ "Incorrect size");
+
+union spdk_nvme_intel_feat_latency_tracking {
+ uint32_t raw;
+ struct {
+ /**
+ * Write Usage:
+ * 00h = Disable Latency Tracking (Default)
+ * 01h = Enable Latency Tracking
+ */
+ uint32_t enable : 32;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_intel_feat_latency_tracking) == 4, "Incorrect size");
+
+struct spdk_nvme_intel_marketing_description_page {
+ uint8_t marketing_product[512];
+ /* Spec says this log page will only write 512 bytes, but there are some older FW
+ * versions that accidentally write 516 instead. So just pad this out to 4096 bytes
+ * to make sure users of this structure never end up overwriting unintended parts of
+ * memory.
+ */
+ uint8_t reserved[3584];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_intel_marketing_description_page) == 4096,
+ "Incorrect size");
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/nvme_ocssd.h b/src/spdk/include/spdk/nvme_ocssd.h
new file mode 100644
index 000000000..7ebb07991
--- /dev/null
+++ b/src/spdk/include/spdk/nvme_ocssd.h
@@ -0,0 +1,227 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * NVMe driver public API extension for Open-Channel
+ */
+
+#ifndef SPDK_NVME_OCSSD_H
+#define SPDK_NVME_OCSSD_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "spdk/nvme.h"
+#include "spdk/nvme_ocssd_spec.h"
+
+/**
+ * \brief Determine if OpenChannel is supported by the given NVMe controller.
+ * \param ctrlr NVMe controller to check.
+ *
+ * \return true if support OpenChannel
+ */
+bool spdk_nvme_ctrlr_is_ocssd_supported(struct spdk_nvme_ctrlr *ctrlr);
+
+/**
+ * \brief Identify geometry of the given namespace.
+ * \param ctrlr NVMe controller to query.
+ * \param nsid Id of the given namesapce.
+ * \param payload The pointer to the payload buffer.
+ * \param payload_size The size of payload buffer. Shall be multiple of 4K.
+ * \param cb_fn Callback function to invoke when the feature has been retrieved.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, ENOMEM if resources could not be
+ * allocated for this request, EINVAL if wrong payload size.
+ *
+ */
+int spdk_nvme_ocssd_ctrlr_cmd_geometry(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
+ void *payload, uint32_t payload_size,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ * \brief Submits a vector reset command to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the command
+ * \param qpair I/O queue pair to submit the request
+ * \param lba_list an array of LBAs for processing.
+ * LBAs must correspond to the start of chunks to reset.
+ * Must be allocated through spdk_dma_malloc() or its variants
+ * \param num_lbas number of LBAs stored in lba_list
+ * \param chunk_info an array of chunk info on DMA-able memory
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ * structure cannot be allocated for the I/O request
+ */
+int spdk_nvme_ocssd_ns_cmd_vector_reset(struct spdk_nvme_ns *ns,
+ struct spdk_nvme_qpair *qpair,
+ uint64_t *lba_list, uint32_t num_lbas,
+ struct spdk_ocssd_chunk_information_entry *chunk_info,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ * \brief Submits a vector write command to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the command
+ * \param qpair I/O queue pair to submit the request
+ * \param buffer virtual address pointer to the data payload
+ * \param lba_list an array of LBAs for processing.
+ * Must be allocated through spdk_dma_malloc() or its variants
+ * \param num_lbas number of LBAs stored in lba_list
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ * \param io_flags set flags, defined by the SPDK_OCSSD_IO_FLAGS_* entries
+ * in spdk/nvme_ocssd_spec.h, for this I/O.
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ * structure cannot be allocated for the I/O request
+ */
+int spdk_nvme_ocssd_ns_cmd_vector_write(struct spdk_nvme_ns *ns,
+ struct spdk_nvme_qpair *qpair,
+ void *buffer,
+ uint64_t *lba_list, uint32_t num_lbas,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+ uint32_t io_flags);
+
+/**
+ * \brief Submits a vector write command to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the command
+ * \param qpair I/O queue pair to submit the request
+ * \param buffer virtual address pointer to the data payload
+ * \param metadata virtual address pointer to the metadata payload, the length
+ * of metadata is specified by spdk_nvme_ns_get_md_size()
+ * \param lba_list an array of LBAs for processing.
+ * Must be allocated through spdk_dma_malloc() or its variants
+ * \param num_lbas number of LBAs stored in lba_list
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ * \param io_flags set flags, defined by the SPDK_OCSSD_IO_FLAGS_* entries
+ * in spdk/nvme_ocssd_spec.h, for this I/O.
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ * structure cannot be allocated for the I/O request
+ */
+int spdk_nvme_ocssd_ns_cmd_vector_write_with_md(struct spdk_nvme_ns *ns,
+ struct spdk_nvme_qpair *qpair,
+ void *buffer, void *metadata,
+ uint64_t *lba_list, uint32_t num_lbas,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+ uint32_t io_flags);
+
+/**
+ * \brief Submits a vector read command to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the command
+ * \param qpair I/O queue pair to submit the request
+ * \param buffer virtual address pointer to the data payload
+ * \param lba_list an array of LBAs for processing.
+ * Must be allocated through spdk_dma_malloc() or its variants
+ * \param num_lbas number of LBAs stored in lba_list
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ * \param io_flags set flags, defined by the SPDK_OCSSD_IO_FLAGS_* entries
+ * in spdk/nvme_ocssd_spec.h, for this I/O.
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ * structure cannot be allocated for the I/O request
+ */
+int spdk_nvme_ocssd_ns_cmd_vector_read(struct spdk_nvme_ns *ns,
+ struct spdk_nvme_qpair *qpair,
+ void *buffer,
+ uint64_t *lba_list, uint32_t num_lbas,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+ uint32_t io_flags);
+
+/**
+ * \brief Submits a vector read command to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the command
+ * \param qpair I/O queue pair to submit the request
+ * \param buffer virtual address pointer to the data payload
+ * \param metadata virtual address pointer to the metadata payload, the length
+ * of metadata is specified by spdk_nvme_ns_get_md_size()
+ * \param lba_list an array of LBAs for processing.
+ * Must be allocated through spdk_dma_malloc() or its variants
+ * \param num_lbas number of LBAs stored in lba_list
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ * \param io_flags set flags, defined by the SPDK_OCSSD_IO_FLAGS_* entries
+ * in spdk/nvme_ocssd_spec.h, for this I/O.
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ * structure cannot be allocated for the I/O request
+ */
+int spdk_nvme_ocssd_ns_cmd_vector_read_with_md(struct spdk_nvme_ns *ns,
+ struct spdk_nvme_qpair *qpair,
+ void *buffer, void *metadata,
+ uint64_t *lba_list, uint32_t num_lbas,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+ uint32_t io_flags);
+
+/**
+ * \brief Submits a vector copy command to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the command
+ * \param qpair I/O queue pair to submit the request
+ * \param dst_lba_list an array of destination LBAs for processing.
+ * Must be allocated through spdk_dma_malloc() or its variants
+ * \param src_lba_list an array of source LBAs for processing.
+ * Must be allocated through spdk_dma_malloc() or its variants
+ * \param num_lbas number of LBAs stored in src_lba_list and dst_lba_list
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ * \param io_flags set flags, defined by the SPDK_OCSSD_IO_FLAGS_* entries
+ * in spdk/nvme_ocssd_spec.h, for this I/O.
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ * structure cannot be allocated for the I/O request
+ */
+int spdk_nvme_ocssd_ns_cmd_vector_copy(struct spdk_nvme_ns *ns,
+ struct spdk_nvme_qpair *qpair,
+ uint64_t *dst_lba_list, uint64_t *src_lba_list,
+ uint32_t num_lbas,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+ uint32_t io_flags);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/nvme_ocssd_spec.h b/src/spdk/include/spdk/nvme_ocssd_spec.h
new file mode 100644
index 000000000..21e9bcefc
--- /dev/null
+++ b/src/spdk/include/spdk/nvme_ocssd_spec.h
@@ -0,0 +1,414 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * Open-Channel specification definitions
+ */
+
+#ifndef SPDK_NVME_OCSSD_SPEC_H
+#define SPDK_NVME_OCSSD_SPEC_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "spdk/assert.h"
+#include "spdk/nvme_spec.h"
+
+/** A maximum number of LBAs that can be issued by vector I/O commands */
+#define SPDK_NVME_OCSSD_MAX_LBAL_ENTRIES 64
+
+struct spdk_ocssd_dev_lba_fmt {
+ /** Contiguous number of bits assigned to Group addressing */
+ uint8_t grp_len;
+
+ /** Contiguous number of bits assigned to PU addressing */
+ uint8_t pu_len;
+
+ /** Contiguous number of bits assigned to Chunk addressing */
+ uint8_t chk_len;
+
+ /** Contiguous number of bits assigned to logical blocks within Chunk */
+ uint8_t lbk_len;
+
+ uint8_t reserved[4];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_ocssd_dev_lba_fmt) == 8, "Incorrect size");
+
+struct spdk_ocssd_geometry_data {
+ /** Major Version Number */
+ uint8_t mjr;
+
+ /** Minor Version Number */
+ uint8_t mnr;
+
+ uint8_t reserved1[6];
+
+ /** LBA format */
+ struct spdk_ocssd_dev_lba_fmt lbaf;
+
+ /** Media and Controller Capabilities */
+ struct {
+ /* Supports the Vector Chunk Copy I/O Command */
+ uint32_t vec_chk_cpy : 1;
+
+ /* Supports multiple resets when a chunk is in its free state */
+ uint32_t multi_reset : 1;
+
+ uint32_t reserved : 30;
+ } mccap;
+
+ uint8_t reserved2[12];
+
+ /** Wear-level Index Delta Threshold */
+ uint8_t wit;
+
+ uint8_t reserved3[31];
+
+ /** Number of Groups */
+ uint16_t num_grp;
+
+ /** Number of parallel units per group */
+ uint16_t num_pu;
+
+ /** Number of chunks per parallel unit */
+ uint32_t num_chk;
+
+ /** Chunk Size */
+ uint32_t clba;
+
+ uint8_t reserved4[52];
+
+ /** Minimum Write Size */
+ uint32_t ws_min;
+
+ /** Optimal Write Size */
+ uint32_t ws_opt;
+
+ /** Cache Minimum Write Size Units */
+ uint32_t mw_cunits;
+
+ /** Maximum Open Chunks */
+ uint32_t maxoc;
+
+ /** Maximum Open Chunks per PU */
+ uint32_t maxocpu;
+
+ uint8_t reserved5[44];
+
+ /** tRD Typical */
+ uint32_t trdt;
+
+ /** tRD Max */
+ uint32_t trdm;
+
+ /** tWR Typical */
+ uint32_t twrt;
+
+ /** tWR Max */
+ uint32_t twrm;
+
+ /** tCRS Typical */
+ uint32_t tcrst;
+
+ /** tCRS Max */
+ uint32_t tcrsm;
+
+ /** bytes 216-255: reserved for performance related metrics */
+ uint8_t reserved6[40];
+
+ uint8_t reserved7[3071 - 255];
+
+ /** bytes 3072-4095: Vendor Specific */
+ uint8_t vs[4095 - 3071];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_ocssd_geometry_data) == 4096, "Incorrect size");
+
+struct spdk_ocssd_chunk_information_entry {
+ /** Chunk State */
+ struct {
+ /** if set to 1 chunk is free */
+ uint8_t free : 1;
+
+ /** if set to 1 chunk is closed */
+ uint8_t closed : 1;
+
+ /** if set to 1 chunk is open */
+ uint8_t open : 1;
+
+ /** if set to 1 chunk is offline */
+ uint8_t offline : 1;
+
+ uint8_t reserved : 4;
+ } cs;
+
+ /** Chunk Type */
+ struct {
+ /** If set to 1 chunk must be written sequentially */
+ uint8_t seq_write : 1;
+
+ /** If set to 1 chunk allows random writes */
+ uint8_t rnd_write : 1;
+
+ uint8_t reserved1 : 2;
+
+ /**
+ * If set to 1 chunk deviates from the chunk size reported
+ * in identify geometry command.
+ */
+ uint8_t size_deviate : 1;
+
+ uint8_t reserved2 : 3;
+ } ct;
+
+ /** Wear-level Index */
+ uint8_t wli;
+
+ uint8_t reserved[5];
+
+ /** Starting LBA */
+ uint64_t slba;
+
+ /** Number of blocks in chunk */
+ uint64_t cnlb;
+
+ /** Write Pointer */
+ uint64_t wp;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_ocssd_chunk_information_entry) == 32, "Incorrect size");
+
+struct spdk_ocssd_chunk_notification_entry {
+
+ /**
+ * This is a 64-bit incrementing notification count, indicating a
+ * unique identifier for this notification. The counter begins at 1h
+ * and is incremented for each unique event
+ */
+ uint64_t nc;
+
+ /** This field points to the chunk that has its state updated */
+ uint64_t lba;
+
+ /**
+ * This field indicates the namespace id that the event is associated
+ * with
+ */
+ uint32_t nsid;
+
+ /** Field that indicate the state of the block */
+ struct {
+
+ /**
+ * If set to 1, then the error rate of the chunk has been
+ * changed to low
+ */
+ uint8_t error_rate_low : 1;
+
+ /**
+ * If set to 1, then the error rate of the chunk has been
+ * changed to medium
+ */
+ uint8_t error_rate_medium : 1;
+
+ /**
+ * If set to 1, then the error rate of the chunk has been
+ * changed to high
+ */
+ uint8_t error_rate_high : 1;
+
+ /**
+ * If set to 1, then the error rate of the chunk has been
+ * changed to unrecoverable
+ */
+ uint8_t unrecoverable : 1;
+
+ /**
+ * If set to 1, then the chunk has been refreshed by the
+ * device
+ */
+ uint8_t refreshed : 1;
+
+ uint8_t rsvd : 3;
+
+ /**
+ * If set to 1 then the chunk's wear-level index is outside
+ * the average wear-level index threshold defined by the
+ * controller
+ */
+ uint8_t wit_exceeded : 1;
+
+ uint8_t rsvd2 : 7;
+ } state;
+
+ /**
+ * The address provided is covering either logical block, chunk, or
+ * parallel unit
+ */
+ struct {
+
+ /** If set to 1, the LBA covers the logical block */
+ uint8_t lblk : 1;
+
+ /** If set to 1, the LBA covers the respecting chunk */
+ uint8_t chunk : 1;
+
+ /**
+ * If set to 1, the LBA covers the respecting parallel unit
+ * including all chunks
+ */
+ uint8_t pu : 1;
+
+ uint8_t rsvd : 5;
+ } mask;
+
+ uint8_t rsvd[9];
+
+ /**
+ * This field indicates the number of logical chunks to be written.
+ * This is a 0's based value. This field is only valid if mask bit 0 is
+ * set. The number of blocks addressed shall not be outside the boundary
+ * of the specified chunk.
+ */
+ uint16_t nlb;
+
+ uint8_t rsvd2[30];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_ocssd_chunk_notification_entry) == 64, "Incorrect size");
+
+/**
+ * Vector completion queue entry
+ */
+struct spdk_ocssd_vector_cpl {
+ /* dword 0,1 */
+ uint64_t lba_status; /* completion status bit array */
+
+ /* dword 2 */
+ uint16_t sqhd; /* submission queue head pointer */
+ uint16_t sqid; /* submission queue identifier */
+
+ /* dword 3 */
+ uint16_t cid; /* command identifier */
+ struct spdk_nvme_status status;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_ocssd_vector_cpl) == 16, "Incorrect size");
+
+/**
+ * OCSSD admin command set opcodes
+ */
+enum spdk_ocssd_admin_opcode {
+ SPDK_OCSSD_OPC_GEOMETRY = 0xE2
+};
+
+/**
+ * OCSSD I/O command set opcodes
+ */
+enum spdk_ocssd_io_opcode {
+ SPDK_OCSSD_OPC_VECTOR_RESET = 0x90,
+ SPDK_OCSSD_OPC_VECTOR_WRITE = 0x91,
+ SPDK_OCSSD_OPC_VECTOR_READ = 0x92,
+ SPDK_OCSSD_OPC_VECTOR_COPY = 0x93
+};
+
+/**
+ * Log page identifiers for SPDK_NVME_OPC_GET_LOG_PAGE
+ */
+enum spdk_ocssd_log_page {
+ /** Chunk Information */
+ SPDK_OCSSD_LOG_CHUNK_INFO = 0xCA,
+
+ /** Chunk Notification Log */
+ SPDK_OCSSD_LOG_CHUNK_NOTIFICATION = 0xD0,
+};
+
+/**
+ * OCSSD feature identifiers
+ * Defines OCSSD specific features that may be configured with Set Features and
+ * retrieved with Get Features.
+ */
+enum spdk_ocssd_feat {
+ /** Media Feedback feature identifier */
+ SPDK_OCSSD_FEAT_MEDIA_FEEDBACK = 0xCA
+};
+
+/**
+ * OCSSD media error status codes extension.
+ * Additional error codes for status code type “2h” (media errors)
+ */
+enum spdk_ocssd_media_error_status_code {
+ /**
+ * The chunk was either marked offline by the reset or the state
+ * of the chunk is already offline.
+ */
+ SPDK_OCSSD_SC_OFFLINE_CHUNK = 0xC0,
+
+ /**
+ * Invalid reset if chunk state is either “Free” or “Open”
+ */
+ SPDK_OCSSD_SC_INVALID_RESET = 0xC1,
+
+ /**
+ * Write failed, chunk remains open.
+ * Host should proceed to write to next write unit.
+ */
+ SPDK_OCSSD_SC_WRITE_FAIL_WRITE_NEXT_UNIT = 0xF0,
+
+ /**
+ * The writes ended prematurely. The chunk state is set to closed.
+ * The host can read up to the value of the write pointer.
+ */
+ SPDK_OCSSD_SC_WRITE_FAIL_CHUNK_EARLY_CLOSE = 0xF1,
+
+ /**
+ * The write corresponds to a write out of order within an open
+ * chunk or the write is to a closed or offline chunk.
+ */
+ SPDK_OCSSD_SC_OUT_OF_ORDER_WRITE = 0xF2,
+
+ /**
+ * The data retrieved is nearing its limit for reading.
+ * The limit is vendor specific, and only provides a hint
+ * to the host that should refresh its data in the future.
+ */
+ SPDK_OCSSD_SC_READ_HIGH_ECC = 0xD0,
+};
+
+#define SPDK_OCSSD_IO_FLAGS_LIMITED_RETRY (1U << 31)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/nvme_spec.h b/src/spdk/include/spdk/nvme_spec.h
new file mode 100644
index 000000000..281ac500b
--- /dev/null
+++ b/src/spdk/include/spdk/nvme_spec.h
@@ -0,0 +1,2945 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * NVMe specification definitions
+ */
+
+#ifndef SPDK_NVME_SPEC_H
+#define SPDK_NVME_SPEC_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "spdk/assert.h"
+
+/**
+ * Use to mark a command to apply to all namespaces, or to retrieve global
+ * log pages.
+ */
+#define SPDK_NVME_GLOBAL_NS_TAG ((uint32_t)0xFFFFFFFF)
+
+#define SPDK_NVME_MAX_IO_QUEUES (65535)
+
+#define SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES 2
+#define SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES 4096
+
+#define SPDK_NVME_IO_QUEUE_MIN_ENTRIES 2
+#define SPDK_NVME_IO_QUEUE_MAX_ENTRIES 65536
+
+/**
+ * Indicates the maximum number of range sets that may be specified
+ * in the dataset management command.
+ */
+#define SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES 256
+
+/**
+ * Maximum number of blocks that may be specified in a single dataset management range.
+ */
+#define SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS 0xFFFFFFFFu
+
+union spdk_nvme_cap_register {
+ uint64_t raw;
+ struct {
+ /** maximum queue entries supported */
+ uint32_t mqes : 16;
+
+ /** contiguous queues required */
+ uint32_t cqr : 1;
+
+ /** arbitration mechanism supported */
+ uint32_t ams : 2;
+
+ uint32_t reserved1 : 5;
+
+ /** timeout */
+ uint32_t to : 8;
+
+ /** doorbell stride */
+ uint32_t dstrd : 4;
+
+ /** NVM subsystem reset supported */
+ uint32_t nssrs : 1;
+
+ /** command sets supported */
+ uint32_t css : 8;
+
+ /** boot partition support */
+ uint32_t bps : 1;
+
+ uint32_t reserved2 : 2;
+
+ /** memory page size minimum */
+ uint32_t mpsmin : 4;
+
+ /** memory page size maximum */
+ uint32_t mpsmax : 4;
+
+ uint32_t reserved3 : 8;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_cap_register) == 8, "Incorrect size");
+
+/**
+ * I/O Command Set Selected
+ *
+ * Only a single command set is defined as of NVMe 1.3 (NVM). Later, it became
+ * possible to disable I/O Command Sets, that is, configuring it to only use the
+ * Admin Command Set. With 1.4c and Namespace Types, additional I/O Command Sets
+ * are available.
+ */
+enum spdk_nvme_cc_css {
+ SPDK_NVME_CC_CSS_NVM = 0x0, /**< NVM command set */
+ SPDK_NVME_CC_CSS_IOCS = 0x6, /**< One or more I/O command sets */
+ SPDK_NVME_CC_CSS_NOIO = 0x7, /**< No I/O, only admin */
+};
+
+#define SPDK_NVME_CAP_CSS_NVM (1u << SPDK_NVME_CC_CSS_NVM) /**< NVM command set supported */
+#define SPDK_NVME_CAP_CSS_IOCS (1u << SPDK_NVME_CC_CSS_IOCS) /**< One or more I/O Command sets supported */
+#define SPDK_NVME_CAP_CSS_NOIO (1u << SPDK_NVME_CC_CSS_NOIO) /**< No I/O, only admin */
+
+union spdk_nvme_cc_register {
+ uint32_t raw;
+ struct {
+ /** enable */
+ uint32_t en : 1;
+
+ uint32_t reserved1 : 3;
+
+ /** i/o command set selected */
+ uint32_t css : 3;
+
+ /** memory page size */
+ uint32_t mps : 4;
+
+ /** arbitration mechanism selected */
+ uint32_t ams : 3;
+
+ /** shutdown notification */
+ uint32_t shn : 2;
+
+ /** i/o submission queue entry size */
+ uint32_t iosqes : 4;
+
+ /** i/o completion queue entry size */
+ uint32_t iocqes : 4;
+
+ uint32_t reserved2 : 8;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_cc_register) == 4, "Incorrect size");
+
+enum spdk_nvme_shn_value {
+ SPDK_NVME_SHN_NORMAL = 0x1,
+ SPDK_NVME_SHN_ABRUPT = 0x2,
+};
+
+union spdk_nvme_csts_register {
+ uint32_t raw;
+ struct {
+ /** ready */
+ uint32_t rdy : 1;
+
+ /** controller fatal status */
+ uint32_t cfs : 1;
+
+ /** shutdown status */
+ uint32_t shst : 2;
+
+ /** NVM subsystem reset occurred */
+ uint32_t nssro : 1;
+
+ /** Processing paused */
+ uint32_t pp : 1;
+
+ uint32_t reserved1 : 26;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_csts_register) == 4, "Incorrect size");
+
+enum spdk_nvme_shst_value {
+ SPDK_NVME_SHST_NORMAL = 0x0,
+ SPDK_NVME_SHST_OCCURRING = 0x1,
+ SPDK_NVME_SHST_COMPLETE = 0x2,
+};
+
+union spdk_nvme_aqa_register {
+ uint32_t raw;
+ struct {
+ /** admin submission queue size */
+ uint32_t asqs : 12;
+
+ uint32_t reserved1 : 4;
+
+ /** admin completion queue size */
+ uint32_t acqs : 12;
+
+ uint32_t reserved2 : 4;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_aqa_register) == 4, "Incorrect size");
+
+union spdk_nvme_vs_register {
+ uint32_t raw;
+ struct {
+ /** indicates the tertiary version */
+ uint32_t ter : 8;
+ /** indicates the minor version */
+ uint32_t mnr : 8;
+ /** indicates the major version */
+ uint32_t mjr : 16;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_vs_register) == 4, "Incorrect size");
+
+/** Generate raw version in the same format as \ref spdk_nvme_vs_register for comparison. */
+#define SPDK_NVME_VERSION(mjr, mnr, ter) \
+ (((uint32_t)(mjr) << 16) | \
+ ((uint32_t)(mnr) << 8) | \
+ (uint32_t)(ter))
+
+/* Test that the shifts are correct */
+SPDK_STATIC_ASSERT(SPDK_NVME_VERSION(1, 0, 0) == 0x00010000, "version macro error");
+SPDK_STATIC_ASSERT(SPDK_NVME_VERSION(1, 2, 1) == 0x00010201, "version macro error");
+
+union spdk_nvme_cmbloc_register {
+ uint32_t raw;
+ struct {
+ /** indicator of BAR which contains controller memory buffer(CMB) */
+ uint32_t bir : 3;
+ uint32_t reserved1 : 9;
+ /** offset of CMB in multiples of the size unit */
+ uint32_t ofst : 20;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_cmbloc_register) == 4, "Incorrect size");
+
+union spdk_nvme_cmbsz_register {
+ uint32_t raw;
+ struct {
+ /** support submission queues in CMB */
+ uint32_t sqs : 1;
+ /** support completion queues in CMB */
+ uint32_t cqs : 1;
+ /** support PRP and SGLs lists in CMB */
+ uint32_t lists : 1;
+ /** support read data and metadata in CMB */
+ uint32_t rds : 1;
+ /** support write data and metadata in CMB */
+ uint32_t wds : 1;
+ uint32_t reserved1 : 3;
+ /** indicates the granularity of the size unit */
+ uint32_t szu : 4;
+ /** size of CMB in multiples of the size unit */
+ uint32_t sz : 20;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_cmbsz_register) == 4, "Incorrect size");
+
+union spdk_nvme_cmbmsc_register {
+ uint64_t raw;
+ struct {
+ /** capability registers enabled */
+ uint64_t cre : 1;
+
+ /** controller memory space enable */
+ uint64_t cmse : 1;
+
+ uint64_t reserved : 10;
+
+ /** controller base address */
+ uint64_t cba : 52;
+ } bits;
+
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_cmbmsc_register) == 8, "Incorrect size");
+
+union spdk_nvme_cmbsts_register {
+ uint32_t raw;
+ struct {
+ /** controller base address invalid */
+ uint32_t cbai : 1;
+
+ uint32_t reserved : 31;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_cmbsts_register) == 4, "Incorrect size");
+
+/** Boot partition information */
+union spdk_nvme_bpinfo_register {
+ uint32_t raw;
+ struct {
+ /** Boot partition size in 128KB multiples */
+ uint32_t bpsz : 15;
+
+ uint32_t reserved1 : 9;
+
+ /**
+ * Boot read status
+ * 00b: No Boot Partition read operation requested
+ * 01b: Boot Partition read in progress
+ * 10b: Boot Partition read completed successfully
+ * 11b: Error completing Boot Partition read
+ */
+ uint32_t brs : 2;
+
+ uint32_t reserved2 : 5;
+
+ /** Active Boot Partition ID */
+ uint32_t abpid : 1;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_bpinfo_register) == 4, "Incorrect size");
+
+/** Boot partition read select */
+union spdk_nvme_bprsel_register {
+ uint32_t raw;
+ struct {
+ /** Boot partition read size in multiples of 4KB */
+ uint32_t bprsz : 10;
+
+ /** Boot partition read offset in multiples of 4KB */
+ uint32_t bprof : 20;
+
+ uint32_t reserved : 1;
+
+ /** Boot Partition Identifier */
+ uint32_t bpid : 1;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_bprsel_register) == 4, "Incorrect size");
+
+/** Value to write to NSSR to indicate a NVM subsystem reset ("NVMe") */
+#define SPDK_NVME_NSSR_VALUE 0x4E564D65
+
+struct spdk_nvme_registers {
+ /** controller capabilities */
+ union spdk_nvme_cap_register cap;
+
+ /** version of NVMe specification */
+ union spdk_nvme_vs_register vs;
+ uint32_t intms; /* interrupt mask set */
+ uint32_t intmc; /* interrupt mask clear */
+
+ /** controller configuration */
+ union spdk_nvme_cc_register cc;
+
+ uint32_t reserved1;
+ union spdk_nvme_csts_register csts; /* controller status */
+ uint32_t nssr; /* NVM subsystem reset */
+
+ /** admin queue attributes */
+ union spdk_nvme_aqa_register aqa;
+
+ uint64_t asq; /* admin submission queue base addr */
+ uint64_t acq; /* admin completion queue base addr */
+ /** controller memory buffer location */
+ union spdk_nvme_cmbloc_register cmbloc;
+ /** controller memory buffer size */
+ union spdk_nvme_cmbsz_register cmbsz;
+
+ /** boot partition information */
+ union spdk_nvme_bpinfo_register bpinfo;
+
+ /** boot partition read select */
+ union spdk_nvme_bprsel_register bprsel;
+
+ /** boot partition memory buffer location (must be 4KB aligned) */
+ uint64_t bpmbl;
+
+ /** controller memory buffer memory space control */
+ union spdk_nvme_cmbmsc_register cmbmsc;
+
+ /** controller memory buffer status */
+ union spdk_nvme_cmbsts_register cmbsts;
+
+ uint32_t reserved3[0x3e9];
+
+ struct {
+ uint32_t sq_tdbl; /* submission queue tail doorbell */
+ uint32_t cq_hdbl; /* completion queue head doorbell */
+ } doorbell[1];
+};
+
+/* NVMe controller register space offsets */
+SPDK_STATIC_ASSERT(0x00 == offsetof(struct spdk_nvme_registers, cap),
+ "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x08 == offsetof(struct spdk_nvme_registers, vs), "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x0C == offsetof(struct spdk_nvme_registers, intms),
+ "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x10 == offsetof(struct spdk_nvme_registers, intmc),
+ "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x14 == offsetof(struct spdk_nvme_registers, cc), "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x1C == offsetof(struct spdk_nvme_registers, csts), "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x20 == offsetof(struct spdk_nvme_registers, nssr), "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x24 == offsetof(struct spdk_nvme_registers, aqa), "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x28 == offsetof(struct spdk_nvme_registers, asq), "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x30 == offsetof(struct spdk_nvme_registers, acq), "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x38 == offsetof(struct spdk_nvme_registers, cmbloc),
+ "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x3C == offsetof(struct spdk_nvme_registers, cmbsz),
+ "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x40 == offsetof(struct spdk_nvme_registers, bpinfo),
+ "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x44 == offsetof(struct spdk_nvme_registers, bprsel),
+ "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x48 == offsetof(struct spdk_nvme_registers, bpmbl),
+ "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x50 == offsetof(struct spdk_nvme_registers, cmbmsc),
+ "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x58 == offsetof(struct spdk_nvme_registers, cmbsts),
+ "Incorrect register offset");
+
+enum spdk_nvme_sgl_descriptor_type {
+ SPDK_NVME_SGL_TYPE_DATA_BLOCK = 0x0,
+ SPDK_NVME_SGL_TYPE_BIT_BUCKET = 0x1,
+ SPDK_NVME_SGL_TYPE_SEGMENT = 0x2,
+ SPDK_NVME_SGL_TYPE_LAST_SEGMENT = 0x3,
+ SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK = 0x4,
+ SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK = 0x5,
+ /* 0x6 - 0xE reserved */
+ SPDK_NVME_SGL_TYPE_VENDOR_SPECIFIC = 0xF
+};
+
+enum spdk_nvme_sgl_descriptor_subtype {
+ SPDK_NVME_SGL_SUBTYPE_ADDRESS = 0x0,
+ SPDK_NVME_SGL_SUBTYPE_OFFSET = 0x1,
+ SPDK_NVME_SGL_SUBTYPE_TRANSPORT = 0xa,
+};
+
+struct __attribute__((packed)) spdk_nvme_sgl_descriptor {
+ uint64_t address;
+ union {
+ struct {
+ uint8_t reserved[7];
+ uint8_t subtype : 4;
+ uint8_t type : 4;
+ } generic;
+
+ struct {
+ uint32_t length;
+ uint8_t reserved[3];
+ uint8_t subtype : 4;
+ uint8_t type : 4;
+ } unkeyed;
+
+ struct {
+ uint64_t length : 24;
+ uint64_t key : 32;
+ uint64_t subtype : 4;
+ uint64_t type : 4;
+ } keyed;
+ };
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_sgl_descriptor) == 16, "Incorrect size");
+
+enum spdk_nvme_psdt_value {
+ SPDK_NVME_PSDT_PRP = 0x0,
+ SPDK_NVME_PSDT_SGL_MPTR_CONTIG = 0x1,
+ SPDK_NVME_PSDT_SGL_MPTR_SGL = 0x2,
+ SPDK_NVME_PSDT_RESERVED = 0x3
+};
+
+/**
+ * Submission queue priority values for Create I/O Submission Queue Command.
+ *
+ * Only valid for weighted round robin arbitration method.
+ */
+enum spdk_nvme_qprio {
+ SPDK_NVME_QPRIO_URGENT = 0x0,
+ SPDK_NVME_QPRIO_HIGH = 0x1,
+ SPDK_NVME_QPRIO_MEDIUM = 0x2,
+ SPDK_NVME_QPRIO_LOW = 0x3
+};
+
+#define SPDK_NVME_CREATE_IO_SQ_QPRIO_MASK 0x3
+
+/**
+ * Optional Arbitration Mechanism Supported by the controller.
+ *
+ * Two bits for CAP.AMS (18:17) field are set to '1' when the controller supports.
+ * There is no bit for AMS_RR where all controllers support and set to 0x0 by default.
+ */
+enum spdk_nvme_cap_ams {
+ SPDK_NVME_CAP_AMS_WRR = 0x1, /**< weighted round robin */
+ SPDK_NVME_CAP_AMS_VS = 0x2, /**< vendor specific */
+};
+
+/**
+ * Arbitration Mechanism Selected to the controller.
+ *
+ * Value 0x2 to 0x6 is reserved.
+ */
+enum spdk_nvme_cc_ams {
+ SPDK_NVME_CC_AMS_RR = 0x0, /**< default round robin */
+ SPDK_NVME_CC_AMS_WRR = 0x1, /**< weighted round robin */
+ SPDK_NVME_CC_AMS_VS = 0x7, /**< vendor specific */
+};
+
+/**
+ * Fused Operation
+ */
+enum spdk_nvme_cmd_fuse {
+ SPDK_NVME_CMD_FUSE_NONE = 0x0, /**< normal operation */
+ SPDK_NVME_CMD_FUSE_FIRST = 0x1, /**< fused operation, first command */
+ SPDK_NVME_CMD_FUSE_SECOND = 0x2, /**< fused operation, second command */
+ SPDK_NVME_CMD_FUSE_MASK = 0x3, /**< fused operation flags mask */
+};
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_ARBITRATION
+ */
+union spdk_nvme_feat_arbitration {
+ uint32_t raw;
+ struct {
+ /** Arbitration Burst */
+ uint32_t ab : 3;
+
+ uint32_t reserved : 5;
+
+ /** Low Priority Weight */
+ uint32_t lpw : 8;
+
+ /** Medium Priority Weight */
+ uint32_t mpw : 8;
+
+ /** High Priority Weight */
+ uint32_t hpw : 8;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_arbitration) == 4, "Incorrect size");
+
+#define SPDK_NVME_ARBITRATION_BURST_UNLIMITED 0x7
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_POWER_MANAGEMENT
+ */
+union spdk_nvme_feat_power_management {
+ uint32_t raw;
+ struct {
+ /** Power State */
+ uint32_t ps : 5;
+
+ /** Workload Hint */
+ uint32_t wh : 3;
+
+ uint32_t reserved : 24;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_power_management) == 4, "Incorrect size");
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_LBA_RANGE_TYPE
+ */
+union spdk_nvme_feat_lba_range_type {
+ uint32_t raw;
+ struct {
+ /** Number of LBA Ranges */
+ uint32_t num : 6;
+
+ uint32_t reserved : 26;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_lba_range_type) == 4, "Incorrect size");
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD
+ */
+union spdk_nvme_feat_temperature_threshold {
+ uint32_t raw;
+ struct {
+ /** Temperature Threshold */
+ uint32_t tmpth : 16;
+
+ /** Threshold Temperature Select */
+ uint32_t tmpsel : 4;
+
+ /** Threshold Type Select */
+ uint32_t thsel : 2;
+
+ uint32_t reserved : 10;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_temperature_threshold) == 4, "Incorrect size");
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_ERROR_RECOVERY
+ */
+union spdk_nvme_feat_error_recovery {
+ uint32_t raw;
+ struct {
+ /** Time Limited Error Recovery */
+ uint32_t tler : 16;
+
+ /** Deallocated or Unwritten Logical Block Error Enable */
+ uint32_t dulbe : 1;
+
+ uint32_t reserved : 15;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_error_recovery) == 4, "Incorrect size");
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE
+ */
+union spdk_nvme_feat_volatile_write_cache {
+ uint32_t raw;
+ struct {
+ /** Volatile Write Cache Enable */
+ uint32_t wce : 1;
+
+ uint32_t reserved : 31;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_volatile_write_cache) == 4, "Incorrect size");
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_NUMBER_OF_QUEUES
+ */
+union spdk_nvme_feat_number_of_queues {
+ uint32_t raw;
+ struct {
+ /** Number of I/O Submission Queues Requested */
+ uint32_t nsqr : 16;
+
+ /** Number of I/O Completion Queues Requested */
+ uint32_t ncqr : 16;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_number_of_queues) == 4, "Incorrect size");
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_INTERRUPT_COALESCING
+ */
+union spdk_nvme_feat_interrupt_coalescing {
+ uint32_t raw;
+ struct {
+ /** Aggregation Threshold */
+ uint32_t thr : 8;
+
+ /** Aggregration time */
+ uint32_t time : 8;
+
+ uint32_t reserved : 16;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_interrupt_coalescing) == 4, "Incorrect size");
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION
+ */
+union spdk_nvme_feat_interrupt_vector_configuration {
+ uint32_t raw;
+ struct {
+ /** Interrupt Vector */
+ uint32_t iv : 16;
+
+ /** Coalescing Disable */
+ uint32_t cd : 1;
+
+ uint32_t reserved : 15;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_interrupt_vector_configuration) == 4,
+ "Incorrect size");
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_WRITE_ATOMICITY
+ */
+union spdk_nvme_feat_write_atomicity {
+ uint32_t raw;
+ struct {
+ /** Disable Normal */
+ uint32_t dn : 1;
+
+ uint32_t reserved : 31;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_write_atomicity) == 4, "Incorrect size");
+
+union spdk_nvme_critical_warning_state {
+ uint8_t raw;
+
+ struct {
+ uint8_t available_spare : 1;
+ uint8_t temperature : 1;
+ uint8_t device_reliability : 1;
+ uint8_t read_only : 1;
+ uint8_t volatile_memory_backup : 1;
+ uint8_t reserved : 3;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_critical_warning_state) == 1, "Incorrect size");
+
+/**
+ * Data used by Set Features / Get Features \ref SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION
+ */
+union spdk_nvme_feat_async_event_configuration {
+ uint32_t raw;
+ struct {
+ union spdk_nvme_critical_warning_state crit_warn;
+ uint32_t ns_attr_notice : 1;
+ uint32_t fw_activation_notice : 1;
+ uint32_t telemetry_log_notice : 1;
+ uint32_t reserved : 21;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_async_event_configuration) == 4, "Incorrect size");
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION
+ */
+union spdk_nvme_feat_autonomous_power_state_transition {
+ uint32_t raw;
+ struct {
+ /** Autonomous Power State Transition Enable */
+ uint32_t apste : 1;
+
+ uint32_t reserved : 31;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_autonomous_power_state_transition) == 4,
+ "Incorrect size");
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_HOST_MEM_BUFFER
+ */
+union spdk_nvme_feat_host_mem_buffer {
+ uint32_t raw;
+ struct {
+ /** Enable Host Memory */
+ uint32_t ehm : 1;
+
+ /** Memory Return */
+ uint32_t mr : 1;
+
+ uint32_t reserved : 30;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_host_mem_buffer) == 4, "Incorrect size");
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_KEEP_ALIVE_TIMER
+ */
+union spdk_nvme_feat_keep_alive_timer {
+ uint32_t raw;
+ struct {
+ /** Keep Alive Timeout */
+ uint32_t kato : 32;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_keep_alive_timer) == 4, "Incorrect size");
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_HOST_CONTROLLED_THERMAL_MANAGEMENT
+ */
+union spdk_nvme_feat_host_controlled_thermal_management {
+ uint32_t raw;
+ struct {
+ /** Thermal Management Temperature 2 */
+ uint32_t tmt2 : 16;
+
+ /** Thermal Management Temperature 1 */
+ uint32_t tmt1 : 16;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_host_controlled_thermal_management) == 4,
+ "Incorrect size");
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_NON_OPERATIONAL_POWER_STATE_CONFIG
+ */
+union spdk_nvme_feat_non_operational_power_state_config {
+ uint32_t raw;
+ struct {
+ /** Non-Operational Power State Permissive Mode Enable */
+ uint32_t noppme : 1;
+
+ uint32_t reserved : 31;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_non_operational_power_state_config) == 4,
+ "Incorrect size");
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_SOFTWARE_PROGRESS_MARKER
+ */
+union spdk_nvme_feat_software_progress_marker {
+ uint32_t raw;
+ struct {
+ /** Pre-boot Software Load Count */
+ uint32_t pbslc : 8;
+
+ uint32_t reserved : 24;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_software_progress_marker) == 4, "Incorrect size");
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_HOST_IDENTIFIER
+ */
+union spdk_nvme_feat_host_identifier {
+ uint32_t raw;
+ struct {
+ /** Enable Extended Host Identifier */
+ uint32_t exhid : 1;
+
+ uint32_t reserved : 31;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_host_identifier) == 4, "Incorrect size");
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_HOST_RESERVE_MASK
+ */
+union spdk_nvme_feat_reservation_notification_mask {
+ uint32_t raw;
+ struct {
+ uint32_t reserved1 : 1;
+ /* Mask Registration Preempted Notification */
+ uint32_t regpre : 1;
+ /* Mask Reservation Released Notification */
+ uint32_t resrel : 1;
+ /* Mask Reservation Preempted Notification */
+ uint32_t respre : 1;
+ uint32_t reserved2 : 28;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_reservation_notification_mask) == 4,
+ "Incorrect size");
+
+/**
+ * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_HOST_RESERVE_PERSIST
+ */
+union spdk_nvme_feat_reservation_persistence {
+ uint32_t raw;
+ struct {
+ /* Persist Through Power Loss */
+ uint32_t ptpl : 1;
+ uint32_t reserved : 31;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_reservation_persistence) == 4, "Incorrect size");
+
+union spdk_nvme_cmd_cdw10 {
+ uint32_t raw;
+ struct {
+ /* Controller or Namespace Structure */
+ uint32_t cns : 8;
+ uint32_t reserved : 8;
+ /* Controller Identifier */
+ uint32_t cntid : 16;
+ } identify;
+
+ struct {
+ /* Log Page Identifier */
+ uint32_t lid : 8;
+ /* Log Specific Field */
+ uint32_t lsp : 4;
+ uint32_t reserved : 3;
+ /* Retain Asynchronous Event */
+ uint32_t rae : 1;
+ /* Number of Dwords Lower */
+ uint32_t numdl : 16;
+ } get_log_page;
+
+ struct {
+ /* Submission Queue Identifier */
+ uint32_t sqid : 16;
+ /* Command Identifier */
+ uint32_t cid : 16;
+ } abort;
+
+ struct {
+ /* NVMe Security Specific Field */
+ uint32_t nssf : 8;
+ /* SP Specific 0 */
+ uint32_t spsp0 : 8;
+ /* SP Specific 1 */
+ uint32_t spsp1 : 8;
+ /* Security Protocol */
+ uint32_t secp : 8;
+ } sec_send_recv;
+
+ struct {
+ /* Queue Identifier */
+ uint32_t qid : 16;
+ /* Queue Size */
+ uint32_t qsize : 16;
+ } create_io_q;
+
+ struct {
+ /* Queue Identifier */
+ uint32_t qid : 16;
+ uint32_t reserved : 16;
+ } delete_io_q;
+
+ struct {
+ /* Feature Identifier */
+ uint32_t fid : 8;
+ /* Select */
+ uint32_t sel : 3;
+ uint32_t reserved : 21;
+ } get_features;
+
+ struct {
+ /* Feature Identifier */
+ uint32_t fid : 8;
+ uint32_t reserved : 23;
+ /* Save */
+ uint32_t sv : 1;
+ } set_features;
+
+ struct {
+ /* Select */
+ uint32_t sel : 4;
+ uint32_t reserved : 28;
+ } ns_attach;
+
+ struct {
+ /* Select */
+ uint32_t sel : 4;
+ uint32_t reserved : 28;
+ } ns_manage;
+
+ struct {
+ /* Number of Ranges */
+ uint32_t nr : 8;
+ uint32_t reserved : 24;
+ } dsm;
+
+ struct {
+ /* Reservation Register Action */
+ uint32_t rrega : 3;
+ /* Ignore Existing Key */
+ uint32_t iekey : 1;
+ uint32_t reserved : 26;
+ /* Change Persist Through Power Loss State */
+ uint32_t cptpl : 2;
+ } resv_register;
+
+ struct {
+ /* Reservation Release Action */
+ uint32_t rrela : 3;
+ /* Ignore Existing Key */
+ uint32_t iekey : 1;
+ uint32_t reserved1 : 4;
+ /* Reservation Type */
+ uint32_t rtype : 8;
+ uint32_t reserved2 : 16;
+ } resv_release;
+
+ struct {
+ /* Reservation Acquire Action */
+ uint32_t racqa : 3;
+ /* Ignore Existing Key */
+ uint32_t iekey : 1;
+ uint32_t reserved1 : 4;
+ /* Reservation Type */
+ uint32_t rtype : 8;
+ uint32_t reserved2 : 16;
+ } resv_acquire;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_cmd_cdw10) == 4, "Incorrect size");
+
+union spdk_nvme_cmd_cdw11 {
+ uint32_t raw;
+
+ struct {
+ /* Physically Contiguous */
+ uint32_t pc : 1;
+ /* Queue Priority */
+ uint32_t qprio : 2;
+ uint32_t reserved : 13;
+ /* Completion Queue Identifier */
+ uint32_t cqid : 16;
+ } create_io_sq;
+
+ struct {
+ /* Physically Contiguous */
+ uint32_t pc : 1;
+ /* Interrupts Enabled */
+ uint32_t ien : 1;
+ uint32_t reserved : 14;
+ /* Interrupt Vector */
+ uint32_t iv : 16;
+ } create_io_cq;
+
+ struct {
+ /* Number of Dwords */
+ uint32_t numdu : 16;
+ /* Log Specific Identifier */
+ uint32_t lsid : 16;
+ } get_log_page;
+
+ struct {
+ /* Extended Data Structure */
+ uint32_t eds : 1;
+ uint32_t reserved : 31;
+ } resv_report;
+
+ union spdk_nvme_feat_arbitration feat_arbitration;
+ union spdk_nvme_feat_power_management feat_power_management;
+ union spdk_nvme_feat_lba_range_type feat_lba_range_type;
+ union spdk_nvme_feat_temperature_threshold feat_temp_threshold;
+ union spdk_nvme_feat_error_recovery feat_error_recovery;
+ union spdk_nvme_feat_volatile_write_cache feat_volatile_write_cache;
+ union spdk_nvme_feat_number_of_queues feat_num_of_queues;
+ union spdk_nvme_feat_interrupt_coalescing feat_interrupt_coalescing;
+ union spdk_nvme_feat_interrupt_vector_configuration feat_interrupt_vector_configuration;
+ union spdk_nvme_feat_write_atomicity feat_write_atomicity;
+ union spdk_nvme_feat_async_event_configuration feat_async_event_cfg;
+ union spdk_nvme_feat_keep_alive_timer feat_keep_alive_timer;
+ union spdk_nvme_feat_host_identifier feat_host_identifier;
+ union spdk_nvme_feat_reservation_notification_mask feat_rsv_notification_mask;
+ union spdk_nvme_feat_reservation_persistence feat_rsv_persistence;
+
+ struct {
+ /* Attribute – Integral Dataset for Read */
+ uint32_t idr : 1;
+ /* Attribute – Integral Dataset for Write */
+ uint32_t idw : 1;
+ /* Attribute – Deallocate */
+ uint32_t ad : 1;
+ uint32_t reserved : 29;
+ } dsm;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_cmd_cdw11) == 4, "Incorrect size");
+
+struct spdk_nvme_cmd {
+ /* dword 0 */
+ uint16_t opc : 8; /* opcode */
+ uint16_t fuse : 2; /* fused operation */
+ uint16_t rsvd1 : 4;
+ uint16_t psdt : 2;
+ uint16_t cid; /* command identifier */
+
+ /* dword 1 */
+ uint32_t nsid; /* namespace identifier */
+
+ /* dword 2-3 */
+ uint32_t rsvd2;
+ uint32_t rsvd3;
+
+ /* dword 4-5 */
+ uint64_t mptr; /* metadata pointer */
+
+ /* dword 6-9: data pointer */
+ union {
+ struct {
+ uint64_t prp1; /* prp entry 1 */
+ uint64_t prp2; /* prp entry 2 */
+ } prp;
+
+ struct spdk_nvme_sgl_descriptor sgl1;
+ } dptr;
+
+ /* command-specific */
+ union {
+ uint32_t cdw10;
+ union spdk_nvme_cmd_cdw10 cdw10_bits;
+ };
+ /* command-specific */
+ union {
+ uint32_t cdw11;
+ union spdk_nvme_cmd_cdw11 cdw11_bits;
+ };
+ /* dword 12-15 */
+ uint32_t cdw12; /* command-specific */
+ uint32_t cdw13; /* command-specific */
+ uint32_t cdw14; /* command-specific */
+ uint32_t cdw15; /* command-specific */
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_cmd) == 64, "Incorrect size");
+
+struct spdk_nvme_status {
+ uint16_t p : 1; /* phase tag */
+ uint16_t sc : 8; /* status code */
+ uint16_t sct : 3; /* status code type */
+ uint16_t rsvd2 : 2;
+ uint16_t m : 1; /* more */
+ uint16_t dnr : 1; /* do not retry */
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_status) == 2, "Incorrect size");
+
+/**
+ * Completion queue entry
+ */
+struct spdk_nvme_cpl {
+ /* dword 0 */
+ uint32_t cdw0; /* command-specific */
+
+ /* dword 1 */
+ uint32_t rsvd1;
+
+ /* dword 2 */
+ uint16_t sqhd; /* submission queue head pointer */
+ uint16_t sqid; /* submission queue identifier */
+
+ /* dword 3 */
+ uint16_t cid; /* command identifier */
+ union {
+ uint16_t status_raw;
+ struct spdk_nvme_status status;
+ };
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_cpl) == 16, "Incorrect size");
+
+/**
+ * Dataset Management range
+ */
+struct spdk_nvme_dsm_range {
+ union {
+ struct {
+ uint32_t af : 4; /**< access frequencey */
+ uint32_t al : 2; /**< access latency */
+ uint32_t reserved0 : 2;
+
+ uint32_t sr : 1; /**< sequential read range */
+ uint32_t sw : 1; /**< sequential write range */
+ uint32_t wp : 1; /**< write prepare */
+ uint32_t reserved1 : 13;
+
+ uint32_t access_size : 8; /**< command access size */
+ } bits;
+
+ uint32_t raw;
+ } attributes;
+
+ uint32_t length;
+ uint64_t starting_lba;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_dsm_range) == 16, "Incorrect size");
+
+/**
+ * Status code types
+ */
+enum spdk_nvme_status_code_type {
+ SPDK_NVME_SCT_GENERIC = 0x0,
+ SPDK_NVME_SCT_COMMAND_SPECIFIC = 0x1,
+ SPDK_NVME_SCT_MEDIA_ERROR = 0x2,
+ SPDK_NVME_SCT_PATH = 0x3,
+ /* 0x4-0x6 - reserved */
+ SPDK_NVME_SCT_VENDOR_SPECIFIC = 0x7,
+};
+
+/**
+ * Generic command status codes
+ */
+enum spdk_nvme_generic_command_status_code {
+ SPDK_NVME_SC_SUCCESS = 0x00,
+ SPDK_NVME_SC_INVALID_OPCODE = 0x01,
+ SPDK_NVME_SC_INVALID_FIELD = 0x02,
+ SPDK_NVME_SC_COMMAND_ID_CONFLICT = 0x03,
+ SPDK_NVME_SC_DATA_TRANSFER_ERROR = 0x04,
+ SPDK_NVME_SC_ABORTED_POWER_LOSS = 0x05,
+ SPDK_NVME_SC_INTERNAL_DEVICE_ERROR = 0x06,
+ SPDK_NVME_SC_ABORTED_BY_REQUEST = 0x07,
+ SPDK_NVME_SC_ABORTED_SQ_DELETION = 0x08,
+ SPDK_NVME_SC_ABORTED_FAILED_FUSED = 0x09,
+ SPDK_NVME_SC_ABORTED_MISSING_FUSED = 0x0a,
+ SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT = 0x0b,
+ SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR = 0x0c,
+ SPDK_NVME_SC_INVALID_SGL_SEG_DESCRIPTOR = 0x0d,
+ SPDK_NVME_SC_INVALID_NUM_SGL_DESCIRPTORS = 0x0e,
+ SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID = 0x0f,
+ SPDK_NVME_SC_METADATA_SGL_LENGTH_INVALID = 0x10,
+ SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID = 0x11,
+ SPDK_NVME_SC_INVALID_CONTROLLER_MEM_BUF = 0x12,
+ SPDK_NVME_SC_INVALID_PRP_OFFSET = 0x13,
+ SPDK_NVME_SC_ATOMIC_WRITE_UNIT_EXCEEDED = 0x14,
+ SPDK_NVME_SC_OPERATION_DENIED = 0x15,
+ SPDK_NVME_SC_INVALID_SGL_OFFSET = 0x16,
+ /* 0x17 - reserved */
+ SPDK_NVME_SC_HOSTID_INCONSISTENT_FORMAT = 0x18,
+ SPDK_NVME_SC_KEEP_ALIVE_EXPIRED = 0x19,
+ SPDK_NVME_SC_KEEP_ALIVE_INVALID = 0x1a,
+ SPDK_NVME_SC_ABORTED_PREEMPT = 0x1b,
+ SPDK_NVME_SC_SANITIZE_FAILED = 0x1c,
+ SPDK_NVME_SC_SANITIZE_IN_PROGRESS = 0x1d,
+ SPDK_NVME_SC_SGL_DATA_BLOCK_GRANULARITY_INVALID = 0x1e,
+ SPDK_NVME_SC_COMMAND_INVALID_IN_CMB = 0x1f,
+
+ SPDK_NVME_SC_LBA_OUT_OF_RANGE = 0x80,
+ SPDK_NVME_SC_CAPACITY_EXCEEDED = 0x81,
+ SPDK_NVME_SC_NAMESPACE_NOT_READY = 0x82,
+ SPDK_NVME_SC_RESERVATION_CONFLICT = 0x83,
+ SPDK_NVME_SC_FORMAT_IN_PROGRESS = 0x84,
+};
+
+/**
+ * Command specific status codes
+ */
+enum spdk_nvme_command_specific_status_code {
+ SPDK_NVME_SC_COMPLETION_QUEUE_INVALID = 0x00,
+ SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER = 0x01,
+ SPDK_NVME_SC_INVALID_QUEUE_SIZE = 0x02,
+ SPDK_NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED = 0x03,
+ /* 0x04 - reserved */
+ SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED = 0x05,
+ SPDK_NVME_SC_INVALID_FIRMWARE_SLOT = 0x06,
+ SPDK_NVME_SC_INVALID_FIRMWARE_IMAGE = 0x07,
+ SPDK_NVME_SC_INVALID_INTERRUPT_VECTOR = 0x08,
+ SPDK_NVME_SC_INVALID_LOG_PAGE = 0x09,
+ SPDK_NVME_SC_INVALID_FORMAT = 0x0a,
+ SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET = 0x0b,
+ SPDK_NVME_SC_INVALID_QUEUE_DELETION = 0x0c,
+ SPDK_NVME_SC_FEATURE_ID_NOT_SAVEABLE = 0x0d,
+ SPDK_NVME_SC_FEATURE_NOT_CHANGEABLE = 0x0e,
+ SPDK_NVME_SC_FEATURE_NOT_NAMESPACE_SPECIFIC = 0x0f,
+ SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET = 0x10,
+ SPDK_NVME_SC_FIRMWARE_REQ_RESET = 0x11,
+ SPDK_NVME_SC_FIRMWARE_REQ_MAX_TIME_VIOLATION = 0x12,
+ SPDK_NVME_SC_FIRMWARE_ACTIVATION_PROHIBITED = 0x13,
+ SPDK_NVME_SC_OVERLAPPING_RANGE = 0x14,
+ SPDK_NVME_SC_NAMESPACE_INSUFFICIENT_CAPACITY = 0x15,
+ SPDK_NVME_SC_NAMESPACE_ID_UNAVAILABLE = 0x16,
+ /* 0x17 - reserved */
+ SPDK_NVME_SC_NAMESPACE_ALREADY_ATTACHED = 0x18,
+ SPDK_NVME_SC_NAMESPACE_IS_PRIVATE = 0x19,
+ SPDK_NVME_SC_NAMESPACE_NOT_ATTACHED = 0x1a,
+ SPDK_NVME_SC_THINPROVISIONING_NOT_SUPPORTED = 0x1b,
+ SPDK_NVME_SC_CONTROLLER_LIST_INVALID = 0x1c,
+ SPDK_NVME_SC_DEVICE_SELF_TEST_IN_PROGRESS = 0x1d,
+ SPDK_NVME_SC_BOOT_PARTITION_WRITE_PROHIBITED = 0x1e,
+ SPDK_NVME_SC_INVALID_CTRLR_ID = 0x1f,
+ SPDK_NVME_SC_INVALID_SECONDARY_CTRLR_STATE = 0x20,
+ SPDK_NVME_SC_INVALID_NUM_CTRLR_RESOURCES = 0x21,
+ SPDK_NVME_SC_INVALID_RESOURCE_ID = 0x22,
+
+ SPDK_NVME_SC_IOCS_NOT_SUPPORTED = 0x29,
+ SPDK_NVME_SC_IOCS_NOT_ENABLED = 0x2a,
+ SPDK_NVME_SC_IOCS_COMBINATION_REJECTED = 0x2b,
+ SPDK_NVME_SC_INVALID_IOCS = 0x2c,
+
+ SPDK_NVME_SC_CONFLICTING_ATTRIBUTES = 0x80,
+ SPDK_NVME_SC_INVALID_PROTECTION_INFO = 0x81,
+ SPDK_NVME_SC_ATTEMPTED_WRITE_TO_RO_RANGE = 0x82,
+};
+
+/**
+ * Media error status codes
+ */
+enum spdk_nvme_media_error_status_code {
+ SPDK_NVME_SC_WRITE_FAULTS = 0x80,
+ SPDK_NVME_SC_UNRECOVERED_READ_ERROR = 0x81,
+ SPDK_NVME_SC_GUARD_CHECK_ERROR = 0x82,
+ SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR = 0x83,
+ SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR = 0x84,
+ SPDK_NVME_SC_COMPARE_FAILURE = 0x85,
+ SPDK_NVME_SC_ACCESS_DENIED = 0x86,
+ SPDK_NVME_SC_DEALLOCATED_OR_UNWRITTEN_BLOCK = 0x87,
+};
+
+/**
+ * Path related status codes
+ */
+enum spdk_nvme_path_status_code {
+ SPDK_NVME_SC_INTERNAL_PATH_ERROR = 0x00,
+
+ SPDK_NVME_SC_CONTROLLER_PATH_ERROR = 0x60,
+
+ SPDK_NVME_SC_HOST_PATH_ERROR = 0x70,
+ SPDK_NVME_SC_ABORTED_BY_HOST = 0x71,
+};
+
+#define SPDK_NVME_MAX_OPC 0xff
+
+/**
+ * Admin opcodes
+ */
+enum spdk_nvme_admin_opcode {
+ SPDK_NVME_OPC_DELETE_IO_SQ = 0x00,
+ SPDK_NVME_OPC_CREATE_IO_SQ = 0x01,
+ SPDK_NVME_OPC_GET_LOG_PAGE = 0x02,
+ /* 0x03 - reserved */
+ SPDK_NVME_OPC_DELETE_IO_CQ = 0x04,
+ SPDK_NVME_OPC_CREATE_IO_CQ = 0x05,
+ SPDK_NVME_OPC_IDENTIFY = 0x06,
+ /* 0x07 - reserved */
+ SPDK_NVME_OPC_ABORT = 0x08,
+ SPDK_NVME_OPC_SET_FEATURES = 0x09,
+ SPDK_NVME_OPC_GET_FEATURES = 0x0a,
+ /* 0x0b - reserved */
+ SPDK_NVME_OPC_ASYNC_EVENT_REQUEST = 0x0c,
+ SPDK_NVME_OPC_NS_MANAGEMENT = 0x0d,
+ /* 0x0e-0x0f - reserved */
+ SPDK_NVME_OPC_FIRMWARE_COMMIT = 0x10,
+ SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD = 0x11,
+
+ SPDK_NVME_OPC_DEVICE_SELF_TEST = 0x14,
+ SPDK_NVME_OPC_NS_ATTACHMENT = 0x15,
+
+ SPDK_NVME_OPC_KEEP_ALIVE = 0x18,
+ SPDK_NVME_OPC_DIRECTIVE_SEND = 0x19,
+ SPDK_NVME_OPC_DIRECTIVE_RECEIVE = 0x1a,
+
+ SPDK_NVME_OPC_VIRTUALIZATION_MANAGEMENT = 0x1c,
+ SPDK_NVME_OPC_NVME_MI_SEND = 0x1d,
+ SPDK_NVME_OPC_NVME_MI_RECEIVE = 0x1e,
+
+ SPDK_NVME_OPC_DOORBELL_BUFFER_CONFIG = 0x7c,
+
+ SPDK_NVME_OPC_FORMAT_NVM = 0x80,
+ SPDK_NVME_OPC_SECURITY_SEND = 0x81,
+ SPDK_NVME_OPC_SECURITY_RECEIVE = 0x82,
+
+ SPDK_NVME_OPC_SANITIZE = 0x84,
+
+ SPDK_NVME_OPC_GET_LBA_STATUS = 0x86,
+};
+
+/**
+ * NVM command set opcodes
+ */
+enum spdk_nvme_nvm_opcode {
+ SPDK_NVME_OPC_FLUSH = 0x00,
+ SPDK_NVME_OPC_WRITE = 0x01,
+ SPDK_NVME_OPC_READ = 0x02,
+ /* 0x03 - reserved */
+ SPDK_NVME_OPC_WRITE_UNCORRECTABLE = 0x04,
+ SPDK_NVME_OPC_COMPARE = 0x05,
+ /* 0x06-0x07 - reserved */
+ SPDK_NVME_OPC_WRITE_ZEROES = 0x08,
+ SPDK_NVME_OPC_DATASET_MANAGEMENT = 0x09,
+
+ SPDK_NVME_OPC_RESERVATION_REGISTER = 0x0d,
+ SPDK_NVME_OPC_RESERVATION_REPORT = 0x0e,
+
+ SPDK_NVME_OPC_RESERVATION_ACQUIRE = 0x11,
+ SPDK_NVME_OPC_RESERVATION_RELEASE = 0x15,
+};
+
+/**
+ * Data transfer (bits 1:0) of an NVMe opcode.
+ *
+ * \sa spdk_nvme_opc_get_data_transfer
+ */
+enum spdk_nvme_data_transfer {
+ /** Opcode does not transfer data */
+ SPDK_NVME_DATA_NONE = 0,
+ /** Opcode transfers data from host to controller (e.g. Write) */
+ SPDK_NVME_DATA_HOST_TO_CONTROLLER = 1,
+ /** Opcode transfers data from controller to host (e.g. Read) */
+ SPDK_NVME_DATA_CONTROLLER_TO_HOST = 2,
+ /** Opcode transfers data both directions */
+ SPDK_NVME_DATA_BIDIRECTIONAL = 3
+};
+
+/**
+ * Extract the Data Transfer bits from an NVMe opcode.
+ *
+ * This determines whether a command requires a data buffer and
+ * which direction (host to controller or controller to host) it is
+ * transferred.
+ */
+static inline enum spdk_nvme_data_transfer spdk_nvme_opc_get_data_transfer(uint8_t opc)
+{
+ return (enum spdk_nvme_data_transfer)(opc & 3);
+}
+
+enum spdk_nvme_feat {
+ /* 0x00 - reserved */
+
+ /** cdw11 layout defined by \ref spdk_nvme_feat_arbitration */
+ SPDK_NVME_FEAT_ARBITRATION = 0x01,
+ /** cdw11 layout defined by \ref spdk_nvme_feat_power_management */
+ SPDK_NVME_FEAT_POWER_MANAGEMENT = 0x02,
+ /** cdw11 layout defined by \ref spdk_nvme_feat_lba_range_type */
+ SPDK_NVME_FEAT_LBA_RANGE_TYPE = 0x03,
+ /** cdw11 layout defined by \ref spdk_nvme_feat_temperature_threshold */
+ SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD = 0x04,
+ /** cdw11 layout defined by \ref spdk_nvme_feat_error_recovery */
+ SPDK_NVME_FEAT_ERROR_RECOVERY = 0x05,
+ /** cdw11 layout defined by \ref spdk_nvme_feat_volatile_write_cache */
+ SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE = 0x06,
+ /** cdw11 layout defined by \ref spdk_nvme_feat_number_of_queues */
+ SPDK_NVME_FEAT_NUMBER_OF_QUEUES = 0x07,
+ /** cdw11 layout defined by \ref spdk_nvme_feat_interrupt_coalescing */
+ SPDK_NVME_FEAT_INTERRUPT_COALESCING = 0x08,
+ /** cdw11 layout defined by \ref spdk_nvme_feat_interrupt_vector_configuration */
+ SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION = 0x09,
+ /** cdw11 layout defined by \ref spdk_nvme_feat_write_atomicity */
+ SPDK_NVME_FEAT_WRITE_ATOMICITY = 0x0A,
+ /** cdw11 layout defined by \ref spdk_nvme_feat_async_event_configuration */
+ SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION = 0x0B,
+ /** cdw11 layout defined by \ref spdk_nvme_feat_autonomous_power_state_transition */
+ SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION = 0x0C,
+ /** cdw11 layout defined by \ref spdk_nvme_feat_host_mem_buffer */
+ SPDK_NVME_FEAT_HOST_MEM_BUFFER = 0x0D,
+ SPDK_NVME_FEAT_TIMESTAMP = 0x0E,
+ /** cdw11 layout defined by \ref spdk_nvme_feat_keep_alive_timer */
+ SPDK_NVME_FEAT_KEEP_ALIVE_TIMER = 0x0F,
+ /** cdw11 layout defined by \ref spdk_nvme_feat_host_controlled_thermal_management */
+ SPDK_NVME_FEAT_HOST_CONTROLLED_THERMAL_MANAGEMENT = 0x10,
+ /** cdw11 layout defined by \ref spdk_nvme_feat_non_operational_power_state_config */
+ SPDK_NVME_FEAT_NON_OPERATIONAL_POWER_STATE_CONFIG = 0x11,
+
+ /* 0x12-0x77 - reserved */
+
+ /* 0x78-0x7F - NVMe-MI features */
+
+ /** cdw11 layout defined by \ref spdk_nvme_feat_software_progress_marker */
+ SPDK_NVME_FEAT_SOFTWARE_PROGRESS_MARKER = 0x80,
+
+ /** cdw11 layout defined by \ref spdk_nvme_feat_host_identifier */
+ SPDK_NVME_FEAT_HOST_IDENTIFIER = 0x81,
+ /** cdw11 layout defined by \ref spdk_nvme_feat_reservation_notification_mask */
+ SPDK_NVME_FEAT_HOST_RESERVE_MASK = 0x82,
+ /** cdw11 layout defined by \ref spdk_nvme_feat_reservation_persistence */
+ SPDK_NVME_FEAT_HOST_RESERVE_PERSIST = 0x83,
+
+ /* 0x84-0xBF - command set specific (reserved) */
+
+ /* 0xC0-0xFF - vendor specific */
+};
+
+/** Bit set of attributes for DATASET MANAGEMENT commands. */
+enum spdk_nvme_dsm_attribute {
+ SPDK_NVME_DSM_ATTR_INTEGRAL_READ = 0x1,
+ SPDK_NVME_DSM_ATTR_INTEGRAL_WRITE = 0x2,
+ SPDK_NVME_DSM_ATTR_DEALLOCATE = 0x4,
+};
+
+struct spdk_nvme_power_state {
+ uint16_t mp; /* bits 15:00: maximum power */
+
+ uint8_t reserved1;
+
+ uint8_t mps : 1; /* bit 24: max power scale */
+ uint8_t nops : 1; /* bit 25: non-operational state */
+ uint8_t reserved2 : 6;
+
+ uint32_t enlat; /* bits 63:32: entry latency in microseconds */
+ uint32_t exlat; /* bits 95:64: exit latency in microseconds */
+
+ uint8_t rrt : 5; /* bits 100:96: relative read throughput */
+ uint8_t reserved3 : 3;
+
+ uint8_t rrl : 5; /* bits 108:104: relative read latency */
+ uint8_t reserved4 : 3;
+
+ uint8_t rwt : 5; /* bits 116:112: relative write throughput */
+ uint8_t reserved5 : 3;
+
+ uint8_t rwl : 5; /* bits 124:120: relative write latency */
+ uint8_t reserved6 : 3;
+
+ uint8_t reserved7[16];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_power_state) == 32, "Incorrect size");
+
+/** Identify command CNS value */
+enum spdk_nvme_identify_cns {
+ /** Identify namespace indicated in CDW1.NSID */
+ SPDK_NVME_IDENTIFY_NS = 0x00,
+
+ /** Identify controller */
+ SPDK_NVME_IDENTIFY_CTRLR = 0x01,
+
+ /** List active NSIDs greater than CDW1.NSID */
+ SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST = 0x02,
+
+ /** List namespace identification descriptors */
+ SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST = 0x03,
+
+ /** Identify namespace indicated in CDW1.NSID, specific to CWD11.CSI */
+ SPDK_NVME_IDENTIFY_NS_IOCS = 0x05,
+
+ /** Identify controller, specific to CWD11.CSI */
+ SPDK_NVME_IDENTIFY_CTRLR_IOCS = 0x06,
+
+ /** List active NSIDs greater than CDW1.NSID, specific to CWD11.CSI */
+ SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST_IOCS = 0x07,
+
+ /** List allocated NSIDs greater than CDW1.NSID */
+ SPDK_NVME_IDENTIFY_ALLOCATED_NS_LIST = 0x10,
+
+ /** Identify namespace if CDW1.NSID is allocated */
+ SPDK_NVME_IDENTIFY_NS_ALLOCATED = 0x11,
+
+ /** Get list of controllers starting at CDW10.CNTID that are attached to CDW1.NSID */
+ SPDK_NVME_IDENTIFY_NS_ATTACHED_CTRLR_LIST = 0x12,
+
+ /** Get list of controllers starting at CDW10.CNTID */
+ SPDK_NVME_IDENTIFY_CTRLR_LIST = 0x13,
+
+ /** Get primary controller capabilities structure */
+ SPDK_NVME_IDENTIFY_PRIMARY_CTRLR_CAP = 0x14,
+
+ /** Get secondary controller list */
+ SPDK_NVME_IDENTIFY_SECONDARY_CTRLR_LIST = 0x15,
+
+ /** List allocated NSIDs greater than CDW1.NSID, specific to CWD11.CSI */
+ SPDK_NVME_IDENTIFY_ALLOCATED_NS_LIST_IOCS = 0x1a,
+
+ /** Identify namespace if CDW1.NSID is allocated, specific to CDWD11.CSI */
+ SPDK_NVME_IDENTIFY_NS_ALLOCATED_IOCS = 0x1b,
+
+ /** Identify I/O Command Sets */
+ SPDK_NVME_IDENTIFY_IOCS = 0x1c,
+};
+
+/** NVMe over Fabrics controller model */
+enum spdk_nvmf_ctrlr_model {
+ /** NVM subsystem uses dynamic controller model */
+ SPDK_NVMF_CTRLR_MODEL_DYNAMIC = 0,
+
+ /** NVM subsystem uses static controller model */
+ SPDK_NVMF_CTRLR_MODEL_STATIC = 1,
+};
+
+#define SPDK_NVME_CTRLR_SN_LEN 20
+#define SPDK_NVME_CTRLR_MN_LEN 40
+#define SPDK_NVME_CTRLR_FR_LEN 8
+
+/** Identify Controller data sgls.supported values */
+enum spdk_nvme_sgls_supported {
+ /** SGLs are not supported */
+ SPDK_NVME_SGLS_NOT_SUPPORTED = 0,
+
+ /** SGLs are supported with no alignment or granularity requirement. */
+ SPDK_NVME_SGLS_SUPPORTED = 1,
+
+ /** SGLs are supported with a DWORD alignment and granularity requirement. */
+ SPDK_NVME_SGLS_SUPPORTED_DWORD_ALIGNED = 2,
+};
+
+/** Identify Controller data vwc.flush_broadcast values */
+enum spdk_nvme_flush_broadcast {
+ /** Support for NSID=FFFFFFFFh with Flush is not indicated. */
+ SPDK_NVME_FLUSH_BROADCAST_NOT_INDICATED = 0,
+
+ /* 01b: Reserved */
+
+ /** Flush does not support NSID set to FFFFFFFFh. */
+ SPDK_NVME_FLUSH_BROADCAST_NOT_SUPPORTED = 2,
+
+ /** Flush supports NSID set to FFFFFFFFh. */
+ SPDK_NVME_FLUSH_BROADCAST_SUPPORTED = 3
+};
+
+#define SPDK_NVME_NQN_FIELD_SIZE 256
+
+/** Identify Controller data NVMe over Fabrics-specific fields */
+struct spdk_nvme_cdata_nvmf_specific {
+ /** I/O queue command capsule supported size (16-byte units) */
+ uint32_t ioccsz;
+
+ /** I/O queue response capsule supported size (16-byte units) */
+ uint32_t iorcsz;
+
+ /** In-capsule data offset (16-byte units) */
+ uint16_t icdoff;
+
+ /** Controller attributes */
+ struct {
+ /** Controller model: \ref spdk_nvmf_ctrlr_model */
+ uint8_t ctrlr_model : 1;
+ uint8_t reserved : 7;
+ } ctrattr;
+
+ /** Maximum SGL block descriptors (0 = no limit) */
+ uint8_t msdbd;
+
+ uint8_t reserved[244];
+};
+
+/** Identify Controller data SGL support */
+struct spdk_nvme_cdata_sgls {
+ uint32_t supported : 2;
+ uint32_t keyed_sgl : 1;
+ uint32_t reserved1 : 13;
+ uint32_t bit_bucket_descriptor : 1;
+ uint32_t metadata_pointer : 1;
+ uint32_t oversized_sgl : 1;
+ uint32_t metadata_address : 1;
+ uint32_t sgl_offset : 1;
+ uint32_t transport_sgl : 1;
+ uint32_t reserved2 : 10;
+};
+
+struct __attribute__((packed)) __attribute__((aligned)) spdk_nvme_ctrlr_data {
+ /* bytes 0-255: controller capabilities and features */
+
+ /** pci vendor id */
+ uint16_t vid;
+
+ /** pci subsystem vendor id */
+ uint16_t ssvid;
+
+ /** serial number */
+ int8_t sn[SPDK_NVME_CTRLR_SN_LEN];
+
+ /** model number */
+ int8_t mn[SPDK_NVME_CTRLR_MN_LEN];
+
+ /** firmware revision */
+ uint8_t fr[SPDK_NVME_CTRLR_FR_LEN];
+
+ /** recommended arbitration burst */
+ uint8_t rab;
+
+ /** ieee oui identifier */
+ uint8_t ieee[3];
+
+ /** controller multi-path I/O and namespace sharing capabilities */
+ struct {
+ uint8_t multi_port : 1;
+ uint8_t multi_host : 1;
+ uint8_t sr_iov : 1;
+ uint8_t reserved : 5;
+ } cmic;
+
+ /** maximum data transfer size */
+ uint8_t mdts;
+
+ /** controller id */
+ uint16_t cntlid;
+
+ /** version */
+ union spdk_nvme_vs_register ver;
+
+ /** RTD3 resume latency */
+ uint32_t rtd3r;
+
+ /** RTD3 entry latency */
+ uint32_t rtd3e;
+
+ /** optional asynchronous events supported */
+ struct {
+ uint32_t reserved1 : 8;
+
+ /** Supports sending Namespace Attribute Notices. */
+ uint32_t ns_attribute_notices : 1;
+
+ /** Supports sending Firmware Activation Notices. */
+ uint32_t fw_activation_notices : 1;
+
+ uint32_t reserved2 : 22;
+ } oaes;
+
+ /** controller attributes */
+ struct {
+ /** Supports 128-bit host identifier */
+ uint32_t host_id_exhid_supported: 1;
+
+ /** Supports non-operational power state permissive mode */
+ uint32_t non_operational_power_state_permissive_mode: 1;
+
+ uint32_t reserved: 30;
+ } ctratt;
+
+ uint8_t reserved_100[12];
+
+ /** FRU globally unique identifier */
+ uint8_t fguid[16];
+
+ uint8_t reserved_128[128];
+
+ /* bytes 256-511: admin command set attributes */
+
+ /** optional admin command support */
+ struct {
+ /* supports security send/receive commands */
+ uint16_t security : 1;
+
+ /* supports format nvm command */
+ uint16_t format : 1;
+
+ /* supports firmware activate/download commands */
+ uint16_t firmware : 1;
+
+ /* supports ns manage/ns attach commands */
+ uint16_t ns_manage : 1;
+
+ /** Supports device self-test command (SPDK_NVME_OPC_DEVICE_SELF_TEST) */
+ uint16_t device_self_test : 1;
+
+ /** Supports SPDK_NVME_OPC_DIRECTIVE_SEND and SPDK_NVME_OPC_DIRECTIVE_RECEIVE */
+ uint16_t directives : 1;
+
+ /** Supports NVMe-MI (SPDK_NVME_OPC_NVME_MI_SEND, SPDK_NVME_OPC_NVME_MI_RECEIVE) */
+ uint16_t nvme_mi : 1;
+
+ /** Supports SPDK_NVME_OPC_VIRTUALIZATION_MANAGEMENT */
+ uint16_t virtualization_management : 1;
+
+ /** Supports SPDK_NVME_OPC_DOORBELL_BUFFER_CONFIG */
+ uint16_t doorbell_buffer_config : 1;
+
+ /** Supports SPDK_NVME_OPC_GET_LBA_STATUS */
+ uint16_t get_lba_status : 1;
+
+ uint16_t oacs_rsvd : 6;
+ } oacs;
+
+ /** abort command limit */
+ uint8_t acl;
+
+ /** asynchronous event request limit */
+ uint8_t aerl;
+
+ /** firmware updates */
+ struct {
+ /* first slot is read-only */
+ uint8_t slot1_ro : 1;
+
+ /* number of firmware slots */
+ uint8_t num_slots : 3;
+
+ /* support activation without reset */
+ uint8_t activation_without_reset : 1;
+
+ uint8_t frmw_rsvd : 3;
+ } frmw;
+
+ /** log page attributes */
+ struct {
+ /* per namespace smart/health log page */
+ uint8_t ns_smart : 1;
+ /* command effects log page */
+ uint8_t celp : 1;
+ /* extended data for get log page */
+ uint8_t edlp: 1;
+ /** telemetry log pages and notices */
+ uint8_t telemetry : 1;
+ uint8_t lpa_rsvd : 4;
+ } lpa;
+
+ /** error log page entries */
+ uint8_t elpe;
+
+ /** number of power states supported */
+ uint8_t npss;
+
+ /** admin vendor specific command configuration */
+ struct {
+ /* admin vendor specific commands use disk format */
+ uint8_t spec_format : 1;
+
+ uint8_t avscc_rsvd : 7;
+ } avscc;
+
+ /** autonomous power state transition attributes */
+ struct {
+ /** controller supports autonomous power state transitions */
+ uint8_t supported : 1;
+
+ uint8_t apsta_rsvd : 7;
+ } apsta;
+
+ /** warning composite temperature threshold */
+ uint16_t wctemp;
+
+ /** critical composite temperature threshold */
+ uint16_t cctemp;
+
+ /** maximum time for firmware activation */
+ uint16_t mtfa;
+
+ /** host memory buffer preferred size */
+ uint32_t hmpre;
+
+ /** host memory buffer minimum size */
+ uint32_t hmmin;
+
+ /** total NVM capacity */
+ uint64_t tnvmcap[2];
+
+ /** unallocated NVM capacity */
+ uint64_t unvmcap[2];
+
+ /** replay protected memory block support */
+ struct {
+ uint8_t num_rpmb_units : 3;
+ uint8_t auth_method : 3;
+ uint8_t reserved1 : 2;
+
+ uint8_t reserved2;
+
+ uint8_t total_size;
+ uint8_t access_size;
+ } rpmbs;
+
+ /** extended device self-test time (in minutes) */
+ uint16_t edstt;
+
+ /** device self-test options */
+ union {
+ uint8_t raw;
+ struct {
+ /** Device supports only one device self-test operation at a time */
+ uint8_t one_only : 1;
+
+ uint8_t reserved : 7;
+ } bits;
+ } dsto;
+
+ /**
+ * Firmware update granularity
+ *
+ * 4KB units
+ * 0x00 = no information provided
+ * 0xFF = no restriction
+ */
+ uint8_t fwug;
+
+ /**
+ * Keep Alive Support
+ *
+ * Granularity of keep alive timer in 100 ms units
+ * 0 = keep alive not supported
+ */
+ uint16_t kas;
+
+ /** Host controlled thermal management attributes */
+ union {
+ uint16_t raw;
+ struct {
+ uint16_t supported : 1;
+ uint16_t reserved : 15;
+ } bits;
+ } hctma;
+
+ /** Minimum thermal management temperature */
+ uint16_t mntmt;
+
+ /** Maximum thermal management temperature */
+ uint16_t mxtmt;
+
+ /** Sanitize capabilities */
+ union {
+ uint32_t raw;
+ struct {
+ uint32_t crypto_erase : 1;
+ uint32_t block_erase : 1;
+ uint32_t overwrite : 1;
+ uint32_t reserved : 29;
+ } bits;
+ } sanicap;
+
+ uint8_t reserved3[180];
+
+ /* bytes 512-703: nvm command set attributes */
+
+ /** submission queue entry size */
+ struct {
+ uint8_t min : 4;
+ uint8_t max : 4;
+ } sqes;
+
+ /** completion queue entry size */
+ struct {
+ uint8_t min : 4;
+ uint8_t max : 4;
+ } cqes;
+
+ uint16_t maxcmd;
+
+ /** number of namespaces */
+ uint32_t nn;
+
+ /** optional nvm command support */
+ struct {
+ uint16_t compare : 1;
+ uint16_t write_unc : 1;
+ uint16_t dsm: 1;
+ uint16_t write_zeroes: 1;
+ uint16_t set_features_save: 1;
+ uint16_t reservations: 1;
+ uint16_t timestamp: 1;
+ uint16_t reserved: 9;
+ } oncs;
+
+ /** fused operation support */
+ struct {
+ uint16_t compare_and_write : 1;
+ uint16_t reserved : 15;
+ } fuses;
+
+ /** format nvm attributes */
+ struct {
+ uint8_t format_all_ns: 1;
+ uint8_t erase_all_ns: 1;
+ uint8_t crypto_erase_supported: 1;
+ uint8_t reserved: 5;
+ } fna;
+
+ /** volatile write cache */
+ struct {
+ uint8_t present : 1;
+ uint8_t flush_broadcast : 2;
+ uint8_t reserved : 5;
+ } vwc;
+
+ /** atomic write unit normal */
+ uint16_t awun;
+
+ /** atomic write unit power fail */
+ uint16_t awupf;
+
+ /** NVM vendor specific command configuration */
+ uint8_t nvscc;
+
+ uint8_t reserved531;
+
+ /** atomic compare & write unit */
+ uint16_t acwu;
+
+ uint16_t reserved534;
+
+ struct spdk_nvme_cdata_sgls sgls;
+
+ uint8_t reserved4[228];
+
+ uint8_t subnqn[SPDK_NVME_NQN_FIELD_SIZE];
+
+ uint8_t reserved5[768];
+
+ struct spdk_nvme_cdata_nvmf_specific nvmf_specific;
+
+ /* bytes 2048-3071: power state descriptors */
+ struct spdk_nvme_power_state psd[32];
+
+ /* bytes 3072-4095: vendor specific */
+ uint8_t vs[1024];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_ctrlr_data) == 4096, "Incorrect size");
+
+struct __attribute__((packed)) spdk_nvme_primary_ctrl_capabilities {
+ /** controller id */
+ uint16_t cntlid;
+ /** port identifier */
+ uint16_t portid;
+ /** controller resource types */
+ struct {
+ uint8_t vq_supported : 1;
+ uint8_t vi_supported : 1;
+ uint8_t reserved : 6;
+ } crt;
+ uint8_t reserved[27];
+ /** total number of VQ flexible resources */
+ uint32_t vqfrt;
+ /** total number of VQ flexible resources assigned to secondary controllers */
+ uint32_t vqrfa;
+ /** total number of VQ flexible resources allocated to primary controller */
+ uint16_t vqrfap;
+ /** total number of VQ Private resources for the primary controller */
+ uint16_t vqprt;
+ /** max number of VQ flexible Resources that may be assigned to a secondary controller */
+ uint16_t vqfrsm;
+ /** preferred granularity of assigning and removing VQ Flexible Resources */
+ uint16_t vqgran;
+ uint8_t reserved1[16];
+ /** total number of VI flexible resources for the primary and its secondary controllers */
+ uint32_t vifrt;
+ /** total number of VI flexible resources assigned to the secondary controllers */
+ uint32_t virfa;
+ /** total number of VI flexible resources currently allocated to the primary controller */
+ uint16_t virfap;
+ /** total number of VI private resources for the primary controller */
+ uint16_t viprt;
+ /** max number of VI flexible resources that may be assigned to a secondary controller */
+ uint16_t vifrsm;
+ /** preferred granularity of assigning and removing VI flexible resources */
+ uint16_t vigran;
+ uint8_t reserved2[4016];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_primary_ctrl_capabilities) == 4096, "Incorrect size");
+
+struct __attribute__((packed)) spdk_nvme_secondary_ctrl_entry {
+ /** controller identifier of the secondary controller */
+ uint16_t scid;
+ /** controller identifier of the associated primary controller */
+ uint16_t pcid;
+ /** indicates the state of the secondary controller */
+ struct {
+ uint8_t is_online : 1;
+ uint8_t reserved : 7;
+ } scs;
+ uint8_t reserved[3];
+ /** VF number if the secondary controller is an SR-IOV VF */
+ uint16_t vfn;
+ /** number of VQ flexible resources assigned to the indicated secondary controller */
+ uint16_t nvq;
+ /** number of VI flexible resources assigned to the indicated secondary controller */
+ uint16_t nvi;
+ uint8_t reserved1[18];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_secondary_ctrl_entry) == 32, "Incorrect size");
+
+struct __attribute__((packed)) spdk_nvme_secondary_ctrl_list {
+ /** number of Secondary controller entries in the list */
+ uint8_t number;
+ uint8_t reserved[31];
+ struct spdk_nvme_secondary_ctrl_entry entries[127];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_secondary_ctrl_list) == 4096, "Incorrect size");
+
+struct spdk_nvme_ns_data {
+ /** namespace size */
+ uint64_t nsze;
+
+ /** namespace capacity */
+ uint64_t ncap;
+
+ /** namespace utilization */
+ uint64_t nuse;
+
+ /** namespace features */
+ struct {
+ /** thin provisioning */
+ uint8_t thin_prov : 1;
+
+ /** NAWUN, NAWUPF, and NACWU are defined for this namespace */
+ uint8_t ns_atomic_write_unit : 1;
+
+ /** Supports Deallocated or Unwritten LBA error for this namespace */
+ uint8_t dealloc_or_unwritten_error : 1;
+
+ /** Non-zero NGUID and EUI64 for namespace are never reused */
+ uint8_t guid_never_reused : 1;
+
+ uint8_t reserved1 : 4;
+ } nsfeat;
+
+ /** number of lba formats */
+ uint8_t nlbaf;
+
+ /** formatted lba size */
+ struct {
+ uint8_t format : 4;
+ uint8_t extended : 1;
+ uint8_t reserved2 : 3;
+ } flbas;
+
+ /** metadata capabilities */
+ struct {
+ /** metadata can be transferred as part of data prp list */
+ uint8_t extended : 1;
+
+ /** metadata can be transferred with separate metadata pointer */
+ uint8_t pointer : 1;
+
+ /** reserved */
+ uint8_t reserved3 : 6;
+ } mc;
+
+ /** end-to-end data protection capabilities */
+ struct {
+ /** protection information type 1 */
+ uint8_t pit1 : 1;
+
+ /** protection information type 2 */
+ uint8_t pit2 : 1;
+
+ /** protection information type 3 */
+ uint8_t pit3 : 1;
+
+ /** first eight bytes of metadata */
+ uint8_t md_start : 1;
+
+ /** last eight bytes of metadata */
+ uint8_t md_end : 1;
+ } dpc;
+
+ /** end-to-end data protection type settings */
+ struct {
+ /** protection information type */
+ uint8_t pit : 3;
+
+ /** 1 == protection info transferred at start of metadata */
+ /** 0 == protection info transferred at end of metadata */
+ uint8_t md_start : 1;
+
+ uint8_t reserved4 : 4;
+ } dps;
+
+ /** namespace multi-path I/O and namespace sharing capabilities */
+ struct {
+ uint8_t can_share : 1;
+ uint8_t reserved : 7;
+ } nmic;
+
+ /** reservation capabilities */
+ union {
+ struct {
+ /** supports persist through power loss */
+ uint8_t persist : 1;
+
+ /** supports write exclusive */
+ uint8_t write_exclusive : 1;
+
+ /** supports exclusive access */
+ uint8_t exclusive_access : 1;
+
+ /** supports write exclusive - registrants only */
+ uint8_t write_exclusive_reg_only : 1;
+
+ /** supports exclusive access - registrants only */
+ uint8_t exclusive_access_reg_only : 1;
+
+ /** supports write exclusive - all registrants */
+ uint8_t write_exclusive_all_reg : 1;
+
+ /** supports exclusive access - all registrants */
+ uint8_t exclusive_access_all_reg : 1;
+
+ /** supports ignore existing key */
+ uint8_t ignore_existing_key : 1;
+ } rescap;
+ uint8_t raw;
+ } nsrescap;
+ /** format progress indicator */
+ struct {
+ uint8_t percentage_remaining : 7;
+ uint8_t fpi_supported : 1;
+ } fpi;
+
+ /** deallocate logical features */
+ union {
+ uint8_t raw;
+ struct {
+ /**
+ * Value read from deallocated blocks
+ *
+ * 000b = not reported
+ * 001b = all bytes 0x00
+ * 010b = all bytes 0xFF
+ *
+ * \ref spdk_nvme_dealloc_logical_block_read_value
+ */
+ uint8_t read_value : 3;
+
+ /** Supports Deallocate bit in Write Zeroes */
+ uint8_t write_zero_deallocate : 1;
+
+ /**
+ * Guard field behavior for deallocated logical blocks
+ * 0: contains 0xFFFF
+ * 1: contains CRC for read value
+ */
+ uint8_t guard_value : 1;
+
+ uint8_t reserved : 3;
+ } bits;
+ } dlfeat;
+
+ /** namespace atomic write unit normal */
+ uint16_t nawun;
+
+ /** namespace atomic write unit power fail */
+ uint16_t nawupf;
+
+ /** namespace atomic compare & write unit */
+ uint16_t nacwu;
+
+ /** namespace atomic boundary size normal */
+ uint16_t nabsn;
+
+ /** namespace atomic boundary offset */
+ uint16_t nabo;
+
+ /** namespace atomic boundary size power fail */
+ uint16_t nabspf;
+
+ /** namespace optimal I/O boundary in logical blocks */
+ uint16_t noiob;
+
+ /** NVM capacity */
+ uint64_t nvmcap[2];
+
+ uint8_t reserved64[40];
+
+ /** namespace globally unique identifier */
+ uint8_t nguid[16];
+
+ /** IEEE extended unique identifier */
+ uint64_t eui64;
+
+ /** lba format support */
+ struct {
+ /** metadata size */
+ uint32_t ms : 16;
+
+ /** lba data size */
+ uint32_t lbads : 8;
+
+ /** relative performance */
+ uint32_t rp : 2;
+
+ uint32_t reserved6 : 6;
+ } lbaf[16];
+
+ uint8_t reserved6[192];
+
+ uint8_t vendor_specific[3712];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_ns_data) == 4096, "Incorrect size");
+
+/**
+ * Deallocated logical block features - read value
+ */
+enum spdk_nvme_dealloc_logical_block_read_value {
+ /** Not reported */
+ SPDK_NVME_DEALLOC_NOT_REPORTED = 0,
+
+ /** Deallocated blocks read 0x00 */
+ SPDK_NVME_DEALLOC_READ_00 = 1,
+
+ /** Deallocated blocks read 0xFF */
+ SPDK_NVME_DEALLOC_READ_FF = 2,
+};
+
+/**
+ * Reservation Type Encoding
+ */
+enum spdk_nvme_reservation_type {
+ /* 0x00 - reserved */
+
+ /* Write Exclusive Reservation */
+ SPDK_NVME_RESERVE_WRITE_EXCLUSIVE = 0x1,
+
+ /* Exclusive Access Reservation */
+ SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS = 0x2,
+
+ /* Write Exclusive - Registrants Only Reservation */
+ SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_REG_ONLY = 0x3,
+
+ /* Exclusive Access - Registrants Only Reservation */
+ SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_REG_ONLY = 0x4,
+
+ /* Write Exclusive - All Registrants Reservation */
+ SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_ALL_REGS = 0x5,
+
+ /* Exclusive Access - All Registrants Reservation */
+ SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS = 0x6,
+
+ /* 0x7-0xFF - Reserved */
+};
+
+struct spdk_nvme_reservation_acquire_data {
+ /** current reservation key */
+ uint64_t crkey;
+ /** preempt reservation key */
+ uint64_t prkey;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_reservation_acquire_data) == 16, "Incorrect size");
+
+/**
+ * Reservation Acquire action
+ */
+enum spdk_nvme_reservation_acquire_action {
+ SPDK_NVME_RESERVE_ACQUIRE = 0x0,
+ SPDK_NVME_RESERVE_PREEMPT = 0x1,
+ SPDK_NVME_RESERVE_PREEMPT_ABORT = 0x2,
+};
+
+struct __attribute__((packed)) spdk_nvme_reservation_status_data {
+ /** reservation action generation counter */
+ uint32_t gen;
+ /** reservation type */
+ uint8_t rtype;
+ /** number of registered controllers */
+ uint16_t regctl;
+ uint16_t reserved1;
+ /** persist through power loss state */
+ uint8_t ptpls;
+ uint8_t reserved[14];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_reservation_status_data) == 24, "Incorrect size");
+
+struct __attribute__((packed)) spdk_nvme_reservation_status_extended_data {
+ struct spdk_nvme_reservation_status_data data;
+ uint8_t reserved[40];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_reservation_status_extended_data) == 64,
+ "Incorrect size");
+
+struct __attribute__((packed)) spdk_nvme_registered_ctrlr_data {
+ /** controller id */
+ uint16_t cntlid;
+ /** reservation status */
+ struct {
+ uint8_t status : 1;
+ uint8_t reserved1 : 7;
+ } rcsts;
+ uint8_t reserved2[5];
+ /** 64-bit host identifier */
+ uint64_t hostid;
+ /** reservation key */
+ uint64_t rkey;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_registered_ctrlr_data) == 24, "Incorrect size");
+
+struct __attribute__((packed)) spdk_nvme_registered_ctrlr_extended_data {
+ /** controller id */
+ uint16_t cntlid;
+ /** reservation status */
+ struct {
+ uint8_t status : 1;
+ uint8_t reserved1 : 7;
+ } rcsts;
+ uint8_t reserved2[5];
+ /** reservation key */
+ uint64_t rkey;
+ /** 128-bit host identifier */
+ uint8_t hostid[16];
+ uint8_t reserved3[32];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_registered_ctrlr_extended_data) == 64, "Incorrect size");
+
+/**
+ * Change persist through power loss state for
+ * Reservation Register command
+ */
+enum spdk_nvme_reservation_register_cptpl {
+ SPDK_NVME_RESERVE_PTPL_NO_CHANGES = 0x0,
+ SPDK_NVME_RESERVE_PTPL_CLEAR_POWER_ON = 0x2,
+ SPDK_NVME_RESERVE_PTPL_PERSIST_POWER_LOSS = 0x3,
+};
+
+/**
+ * Registration action for Reservation Register command
+ */
+enum spdk_nvme_reservation_register_action {
+ SPDK_NVME_RESERVE_REGISTER_KEY = 0x0,
+ SPDK_NVME_RESERVE_UNREGISTER_KEY = 0x1,
+ SPDK_NVME_RESERVE_REPLACE_KEY = 0x2,
+};
+
+struct spdk_nvme_reservation_register_data {
+ /** current reservation key */
+ uint64_t crkey;
+ /** new reservation key */
+ uint64_t nrkey;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_reservation_register_data) == 16, "Incorrect size");
+
+struct spdk_nvme_reservation_key_data {
+ /** current reservation key */
+ uint64_t crkey;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_reservation_key_data) == 8, "Incorrect size");
+
+/**
+ * Reservation Release action
+ */
+enum spdk_nvme_reservation_release_action {
+ SPDK_NVME_RESERVE_RELEASE = 0x0,
+ SPDK_NVME_RESERVE_CLEAR = 0x1,
+};
+
+/**
+ * Reservation notification log page type
+ */
+enum spdk_nvme_reservation_notification_log_page_type {
+ SPDK_NVME_RESERVATION_LOG_PAGE_EMPTY = 0x0,
+ SPDK_NVME_REGISTRATION_PREEMPTED = 0x1,
+ SPDK_NVME_RESERVATION_RELEASED = 0x2,
+ SPDK_NVME_RESERVATION_PREEMPTED = 0x3,
+};
+
+/**
+ * Reservation notification log
+ */
+struct spdk_nvme_reservation_notification_log {
+ /** 64-bit incrementing reservation notification log page count */
+ uint64_t log_page_count;
+ /** Reservation notification log page type */
+ uint8_t type;
+ /** Number of additional available reservation notification log pages */
+ uint8_t num_avail_log_pages;
+ uint8_t reserved[2];
+ uint32_t nsid;
+ uint8_t reserved1[48];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_reservation_notification_log) == 64, "Incorrect size");
+
+/* Mask Registration Preempted Notificaton */
+#define SPDK_NVME_REGISTRATION_PREEMPTED_MASK (1U << 1)
+/* Mask Reservation Released Notification */
+#define SPDK_NVME_RESERVATION_RELEASED_MASK (1U << 2)
+/* Mask Reservation Preempted Notification */
+#define SPDK_NVME_RESERVATION_PREEMPTED_MASK (1U << 3)
+
+/**
+ * Log page identifiers for SPDK_NVME_OPC_GET_LOG_PAGE
+ */
+enum spdk_nvme_log_page {
+ /* 0x00 - reserved */
+
+ /** Error information (mandatory) - \ref spdk_nvme_error_information_entry */
+ SPDK_NVME_LOG_ERROR = 0x01,
+
+ /** SMART / health information (mandatory) - \ref spdk_nvme_health_information_page */
+ SPDK_NVME_LOG_HEALTH_INFORMATION = 0x02,
+
+ /** Firmware slot information (mandatory) - \ref spdk_nvme_firmware_page */
+ SPDK_NVME_LOG_FIRMWARE_SLOT = 0x03,
+
+ /** Changed namespace list (optional) */
+ SPDK_NVME_LOG_CHANGED_NS_LIST = 0x04,
+
+ /** Command effects log (optional) */
+ SPDK_NVME_LOG_COMMAND_EFFECTS_LOG = 0x05,
+
+ /** Device self test (optional) */
+ SPDK_NVME_LOG_DEVICE_SELF_TEST = 0x06,
+
+ /** Host initiated telemetry log (optional) */
+ SPDK_NVME_LOG_TELEMETRY_HOST_INITIATED = 0x07,
+
+ /** Controller initiated telemetry log (optional) */
+ SPDK_NVME_LOG_TELEMETRY_CTRLR_INITIATED = 0x08,
+
+ /* 0x09-0x6F - reserved */
+
+ /** Discovery(refer to the NVMe over Fabrics specification) */
+ SPDK_NVME_LOG_DISCOVERY = 0x70,
+
+ /* 0x71-0x7f - reserved for NVMe over Fabrics */
+
+ /** Reservation notification (optional) */
+ SPDK_NVME_LOG_RESERVATION_NOTIFICATION = 0x80,
+
+ /** Sanitize status (optional) */
+ SPDK_NVME_LOG_SANITIZE_STATUS = 0x81,
+
+ /* 0x81-0xBF - I/O command set specific */
+
+ /* 0xC0-0xFF - vendor specific */
+};
+
+/**
+ * Error information log page (\ref SPDK_NVME_LOG_ERROR)
+ */
+struct spdk_nvme_error_information_entry {
+ uint64_t error_count;
+ uint16_t sqid;
+ uint16_t cid;
+ struct spdk_nvme_status status;
+ uint16_t error_location;
+ uint64_t lba;
+ uint32_t nsid;
+ uint8_t vendor_specific;
+ uint8_t trtype;
+ uint8_t reserved30[2];
+ uint64_t command_specific;
+ uint16_t trtype_specific;
+ uint8_t reserved42[22];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_error_information_entry) == 64, "Incorrect size");
+
+/**
+ * SMART / health information page (\ref SPDK_NVME_LOG_HEALTH_INFORMATION)
+ */
+struct __attribute__((packed)) __attribute__((aligned)) spdk_nvme_health_information_page {
+ union spdk_nvme_critical_warning_state critical_warning;
+
+ uint16_t temperature;
+ uint8_t available_spare;
+ uint8_t available_spare_threshold;
+ uint8_t percentage_used;
+
+ uint8_t reserved[26];
+
+ /*
+ * Note that the following are 128-bit values, but are
+ * defined as an array of 2 64-bit values.
+ */
+ /* Data Units Read is always in 512-byte units. */
+ uint64_t data_units_read[2];
+ /* Data Units Written is always in 512-byte units. */
+ uint64_t data_units_written[2];
+ /* For NVM command set, this includes Compare commands. */
+ uint64_t host_read_commands[2];
+ uint64_t host_write_commands[2];
+ /* Controller Busy Time is reported in minutes. */
+ uint64_t controller_busy_time[2];
+ uint64_t power_cycles[2];
+ uint64_t power_on_hours[2];
+ uint64_t unsafe_shutdowns[2];
+ uint64_t media_errors[2];
+ uint64_t num_error_info_log_entries[2];
+ /* Controller temperature related. */
+ uint32_t warning_temp_time;
+ uint32_t critical_temp_time;
+ uint16_t temp_sensor[8];
+
+ uint8_t reserved2[296];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_health_information_page) == 512, "Incorrect size");
+
+/* Commands Supported and Effects Data Structure */
+struct spdk_nvme_cmds_and_effect_entry {
+ /** Command Supported */
+ uint16_t csupp : 1;
+
+ /** Logic Block Content Change */
+ uint16_t lbcc : 1;
+
+ /** Namespace Capability Change */
+ uint16_t ncc : 1;
+
+ /** Namespace Inventory Change */
+ uint16_t nic : 1;
+
+ /** Controller Capability Change */
+ uint16_t ccc : 1;
+
+ uint16_t reserved1 : 11;
+
+ /* Command Submission and Execution recommendation
+ * 000 - No command submission or execution restriction
+ * 001 - Submitted when there is no outstanding command to same NS
+ * 010 - Submitted when there is no outstanding command to any NS
+ * others - Reserved
+ * \ref command_submission_and_execution in section 5.14.1.5 NVMe Revision 1.3
+ */
+ uint16_t cse : 3;
+
+ uint16_t reserved2 : 13;
+};
+
+/* Commands Supported and Effects Log Page */
+struct spdk_nvme_cmds_and_effect_log_page {
+ /** Commands Supported and Effects Data Structure for the Admin Commands */
+ struct spdk_nvme_cmds_and_effect_entry admin_cmds_supported[256];
+
+ /** Commands Supported and Effects Data Structure for the IO Commands */
+ struct spdk_nvme_cmds_and_effect_entry io_cmds_supported[256];
+
+ uint8_t reserved0[2048];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_cmds_and_effect_log_page) == 4096, "Incorrect size");
+
+/*
+ * Get Log Page – Telemetry Host/Controller Initiated Log (Log Identifiers 07h/08h)
+ */
+struct spdk_nvme_telemetry_log_page_hdr {
+ /* Log page identifier */
+ uint8_t lpi;
+ uint8_t rsvd[4];
+ uint8_t ieee_oui[3];
+ /* Data area 1 last block */
+ uint16_t dalb1;
+ /* Data area 2 last block */
+ uint16_t dalb2;
+ /* Data area 3 last block */
+ uint16_t dalb3;
+ uint8_t rsvd1[368];
+ /* Controller initiated data avail */
+ uint8_t ctrlr_avail;
+ /* Controller initiated telemetry data generation */
+ uint8_t ctrlr_gen;
+ /* Reason identifier */
+ uint8_t rsnident[128];
+ uint8_t telemetry_datablock[0];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_telemetry_log_page_hdr) == 512, "Incorrect size");
+
+/**
+ * Sanitize Status Type
+ */
+enum spdk_nvme_sanitize_status_type {
+ SPDK_NVME_NEVER_BEEN_SANITIZED = 0x0,
+ SPDK_NVME_RECENT_SANITIZE_SUCCESSFUL = 0x1,
+ SPDK_NVME_SANITIZE_IN_PROGRESS = 0x2,
+ SPDK_NVME_SANITIZE_FAILED = 0x3,
+};
+
+/**
+ * Sanitize status sstat field
+ */
+struct spdk_nvme_sanitize_status_sstat {
+ uint16_t status : 3;
+ uint16_t complete_pass : 5;
+ uint16_t global_data_erase : 1;
+ uint16_t reserved : 7;
+};
+
+/**
+ * Sanitize log page
+ */
+struct spdk_nvme_sanitize_status_log_page {
+ /* Sanitize progress */
+ uint16_t sprog;
+ /* Sanitize status */
+ struct spdk_nvme_sanitize_status_sstat sstat;
+ /* CDW10 of sanitize command */
+ uint32_t scdw10;
+ /* Estimated overwrite time in seconds */
+ uint32_t et_overwrite;
+ /* Estimated block erase time in seconds */
+ uint32_t et_block_erase;
+ /* Estimated crypto erase time in seconds */
+ uint32_t et_crypto_erase;
+ uint8_t reserved[492];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_sanitize_status_log_page) == 512, "Incorrect size");
+
+/**
+ * Asynchronous Event Type
+ */
+enum spdk_nvme_async_event_type {
+ /* Error Status */
+ SPDK_NVME_ASYNC_EVENT_TYPE_ERROR = 0x0,
+ /* SMART/Health Status */
+ SPDK_NVME_ASYNC_EVENT_TYPE_SMART = 0x1,
+ /* Notice */
+ SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE = 0x2,
+ /* 0x3 - 0x5 Reserved */
+
+ /* I/O Command Set Specific Status */
+ SPDK_NVME_ASYNC_EVENT_TYPE_IO = 0x6,
+ /* Vendor Specific */
+ SPDK_NVME_ASYNC_EVENT_TYPE_VENDOR = 0x7,
+};
+
+/**
+ * Asynchronous Event Information for Error Status
+ */
+enum spdk_nvme_async_event_info_error {
+ /* Write to Invalid Doorbell Register */
+ SPDK_NVME_ASYNC_EVENT_WRITE_INVALID_DB = 0x0,
+ /* Invalid Doorbell Register Write Value */
+ SPDK_NVME_ASYNC_EVENT_INVALID_DB_WRITE = 0x1,
+ /* Diagnostic Failure */
+ SPDK_NVME_ASYNC_EVENT_DIAGNOSTIC_FAILURE = 0x2,
+ /* Persistent Internal Error */
+ SPDK_NVME_ASYNC_EVENT_PERSISTENT_INTERNAL = 0x3,
+ /* Transient Internal Error */
+ SPDK_NVME_ASYNC_EVENT_TRANSIENT_INTERNAL = 0x4,
+ /* Firmware Image Load Error */
+ SPDK_NVME_ASYNC_EVENT_FW_IMAGE_LOAD = 0x5,
+
+ /* 0x6 - 0xFF Reserved */
+};
+
+/**
+ * Asynchronous Event Information for SMART/Health Status
+ */
+enum spdk_nvme_async_event_info_smart {
+ /* NVM Subsystem Reliability */
+ SPDK_NVME_ASYNC_EVENT_SUBSYSTEM_RELIABILITY = 0x0,
+ /* Temperature Threshold */
+ SPDK_NVME_ASYNC_EVENT_TEMPERATURE_THRESHOLD = 0x1,
+ /* Spare Below Threshold */
+ SPDK_NVME_ASYNC_EVENT_SPARE_BELOW_THRESHOLD = 0x2,
+
+ /* 0x3 - 0xFF Reserved */
+};
+
+/**
+ * Asynchronous Event Information for Notice
+ */
+enum spdk_nvme_async_event_info_notice {
+ /* Namespace Attribute Changed */
+ SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED = 0x0,
+ /* Firmware Activation Starting */
+ SPDK_NVME_ASYNC_EVENT_FW_ACTIVATION_START = 0x1,
+ /* Telemetry Log Changed */
+ SPDK_NVME_ASYNC_EVENT_TELEMETRY_LOG_CHANGED = 0x2,
+
+ /* 0x3 - 0xFF Reserved */
+};
+
+/**
+ * Asynchronous Event Information for NVM Command Set Specific Status
+ */
+enum spdk_nvme_async_event_info_nvm_command_set {
+ /* Reservation Log Page Avaiable */
+ SPDK_NVME_ASYNC_EVENT_RESERVATION_LOG_AVAIL = 0x0,
+ /* Sanitize Operation Completed */
+ SPDK_NVME_ASYNC_EVENT_SANITIZE_COMPLETED = 0x1,
+
+ /* 0x2 - 0xFF Reserved */
+};
+
+/**
+ * Asynchronous Event Request Completion
+ */
+union spdk_nvme_async_event_completion {
+ uint32_t raw;
+ struct {
+ uint32_t async_event_type : 3;
+ uint32_t reserved1 : 5;
+ uint32_t async_event_info : 8;
+ uint32_t log_page_identifier : 8;
+ uint32_t reserved2 : 8;
+ } bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_async_event_completion) == 4, "Incorrect size");
+
+/**
+ * Firmware slot information page (\ref SPDK_NVME_LOG_FIRMWARE_SLOT)
+ */
+struct spdk_nvme_firmware_page {
+ struct {
+ uint8_t active_slot : 3; /**< Slot for current FW */
+ uint8_t reserved3 : 1;
+ uint8_t next_reset_slot : 3; /**< Slot that will be active at next controller reset */
+ uint8_t reserved7 : 1;
+ } afi;
+
+ uint8_t reserved[7];
+ uint8_t revision[7][8]; /** Revisions for 7 slots (ASCII strings) */
+ uint8_t reserved2[448];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_firmware_page) == 512, "Incorrect size");
+
+/**
+ * Namespace attachment Type Encoding
+ */
+enum spdk_nvme_ns_attach_type {
+ /* Controller attach */
+ SPDK_NVME_NS_CTRLR_ATTACH = 0x0,
+
+ /* Controller detach */
+ SPDK_NVME_NS_CTRLR_DETACH = 0x1,
+
+ /* 0x2-0xF - Reserved */
+};
+
+/**
+ * Namespace management Type Encoding
+ */
+enum spdk_nvme_ns_management_type {
+ /* Create */
+ SPDK_NVME_NS_MANAGEMENT_CREATE = 0x0,
+
+ /* Delete */
+ SPDK_NVME_NS_MANAGEMENT_DELETE = 0x1,
+
+ /* 0x2-0xF - Reserved */
+};
+
+struct spdk_nvme_ns_list {
+ uint32_t ns_list[1024];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_ns_list) == 4096, "Incorrect size");
+
+/**
+ * Namespace identification descriptor type
+ *
+ * \sa spdk_nvme_ns_id_desc
+ */
+enum spdk_nvme_nidt {
+ /** IEEE Extended Unique Identifier */
+ SPDK_NVME_NIDT_EUI64 = 0x01,
+
+ /** Namespace GUID */
+ SPDK_NVME_NIDT_NGUID = 0x02,
+
+ /** Namespace UUID */
+ SPDK_NVME_NIDT_UUID = 0x03,
+};
+
+struct spdk_nvme_ns_id_desc {
+ /** Namespace identifier type */
+ uint8_t nidt;
+
+ /** Namespace identifier length (length of nid field) */
+ uint8_t nidl;
+
+ uint8_t reserved2;
+ uint8_t reserved3;
+
+ /** Namespace identifier */
+ uint8_t nid[];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_ns_id_desc) == 4, "Incorrect size");
+
+struct spdk_nvme_ctrlr_list {
+ uint16_t ctrlr_count;
+ uint16_t ctrlr_list[2047];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_ctrlr_list) == 4096, "Incorrect size");
+
+enum spdk_nvme_secure_erase_setting {
+ SPDK_NVME_FMT_NVM_SES_NO_SECURE_ERASE = 0x0,
+ SPDK_NVME_FMT_NVM_SES_USER_DATA_ERASE = 0x1,
+ SPDK_NVME_FMT_NVM_SES_CRYPTO_ERASE = 0x2,
+};
+
+enum spdk_nvme_pi_location {
+ SPDK_NVME_FMT_NVM_PROTECTION_AT_TAIL = 0x0,
+ SPDK_NVME_FMT_NVM_PROTECTION_AT_HEAD = 0x1,
+};
+
+enum spdk_nvme_pi_type {
+ SPDK_NVME_FMT_NVM_PROTECTION_DISABLE = 0x0,
+ SPDK_NVME_FMT_NVM_PROTECTION_TYPE1 = 0x1,
+ SPDK_NVME_FMT_NVM_PROTECTION_TYPE2 = 0x2,
+ SPDK_NVME_FMT_NVM_PROTECTION_TYPE3 = 0x3,
+};
+
+enum spdk_nvme_metadata_setting {
+ SPDK_NVME_FMT_NVM_METADATA_TRANSFER_AS_BUFFER = 0x0,
+ SPDK_NVME_FMT_NVM_METADATA_TRANSFER_AS_LBA = 0x1,
+};
+
+struct spdk_nvme_format {
+ uint32_t lbaf : 4;
+ uint32_t ms : 1;
+ uint32_t pi : 3;
+ uint32_t pil : 1;
+ uint32_t ses : 3;
+ uint32_t reserved : 20;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_format) == 4, "Incorrect size");
+
+struct spdk_nvme_protection_info {
+ uint16_t guard;
+ uint16_t app_tag;
+ uint32_t ref_tag;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_protection_info) == 8, "Incorrect size");
+
+/* Data structures for sanitize command */
+/* Sanitize - Command Dword 10 */
+struct spdk_nvme_sanitize {
+ /* Sanitize Action (SANACT) */
+ uint32_t sanact : 3;
+ /* Allow Unrestricted Sanitize Exit (AUSE) */
+ uint32_t ause : 1;
+ /* Overwrite Pass Count (OWPASS) */
+ uint32_t owpass : 4;
+ /* Overwrite Invert Pattern Between Passes */
+ uint32_t oipbp : 1;
+ /* No Deallocate after sanitize (NDAS) */
+ uint32_t ndas : 1;
+ /* reserved */
+ uint32_t reserved : 22;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_sanitize) == 4, "Incorrect size");
+
+/* Sanitize Action */
+enum spdk_sanitize_action {
+ /* Exit Failure Mode */
+ SPDK_NVME_SANITIZE_EXIT_FAILURE_MODE = 0x1,
+ /* Start a Block Erase sanitize operation */
+ SPDK_NVME_SANITIZE_BLOCK_ERASE = 0x2,
+ /* Start an Overwrite sanitize operation */
+ SPDK_NVME_SANITIZE_OVERWRITE = 0x3,
+ /* Start a Crypto Erase sanitize operation */
+ SPDK_NVME_SANITIZE_CRYPTO_ERASE = 0x4,
+};
+
+/** Parameters for SPDK_NVME_OPC_FIRMWARE_COMMIT cdw10: commit action */
+enum spdk_nvme_fw_commit_action {
+ /**
+ * Downloaded image replaces the image specified by
+ * the Firmware Slot field. This image is not activated.
+ */
+ SPDK_NVME_FW_COMMIT_REPLACE_IMG = 0x0,
+ /**
+ * Downloaded image replaces the image specified by
+ * the Firmware Slot field. This image is activated at the next reset.
+ */
+ SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG = 0x1,
+ /**
+ * The image specified by the Firmware Slot field is
+ * activated at the next reset.
+ */
+ SPDK_NVME_FW_COMMIT_ENABLE_IMG = 0x2,
+ /**
+ * The image specified by the Firmware Slot field is
+ * requested to be activated immediately without reset.
+ */
+ SPDK_NVME_FW_COMMIT_RUN_IMG = 0x3,
+};
+
+/** Parameters for SPDK_NVME_OPC_FIRMWARE_COMMIT cdw10 */
+struct spdk_nvme_fw_commit {
+ /**
+ * Firmware Slot. Specifies the firmware slot that shall be used for the
+ * Commit Action. The controller shall choose the firmware slot (slot 1 - 7)
+ * to use for the operation if the value specified is 0h.
+ */
+ uint32_t fs : 3;
+ /**
+ * Commit Action. Specifies the action that is taken on the image downloaded
+ * with the Firmware Image Download command or on a previously downloaded and
+ * placed image.
+ */
+ uint32_t ca : 3;
+ uint32_t reserved : 26;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_fw_commit) == 4, "Incorrect size");
+
+#define spdk_nvme_cpl_is_error(cpl) \
+ ((cpl)->status.sc != SPDK_NVME_SC_SUCCESS || \
+ (cpl)->status.sct != SPDK_NVME_SCT_GENERIC)
+
+#define spdk_nvme_cpl_is_success(cpl) (!spdk_nvme_cpl_is_error(cpl))
+
+#define spdk_nvme_cpl_is_pi_error(cpl) \
+ ((cpl)->status.sct == SPDK_NVME_SCT_MEDIA_ERROR && \
+ ((cpl)->status.sc == SPDK_NVME_SC_GUARD_CHECK_ERROR || \
+ (cpl)->status.sc == SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR || \
+ (cpl)->status.sc == SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR))
+
+#define spdk_nvme_cpl_is_abort_success(cpl) \
+ (spdk_nvme_cpl_is_success(cpl) && !((cpl)->cdw0 & 1U))
+
+/** Set fused operation */
+#define SPDK_NVME_IO_FLAGS_FUSE_FIRST (SPDK_NVME_CMD_FUSE_FIRST << 0)
+#define SPDK_NVME_IO_FLAGS_FUSE_SECOND (SPDK_NVME_CMD_FUSE_SECOND << 0)
+#define SPDK_NVME_IO_FLAGS_FUSE_MASK (SPDK_NVME_CMD_FUSE_MASK << 0)
+/** Enable protection information checking of the Logical Block Reference Tag field */
+#define SPDK_NVME_IO_FLAGS_PRCHK_REFTAG (1U << 26)
+/** Enable protection information checking of the Application Tag field */
+#define SPDK_NVME_IO_FLAGS_PRCHK_APPTAG (1U << 27)
+/** Enable protection information checking of the Guard field */
+#define SPDK_NVME_IO_FLAGS_PRCHK_GUARD (1U << 28)
+/** The protection information is stripped or inserted when set this bit */
+#define SPDK_NVME_IO_FLAGS_PRACT (1U << 29)
+#define SPDK_NVME_IO_FLAGS_FORCE_UNIT_ACCESS (1U << 30)
+#define SPDK_NVME_IO_FLAGS_LIMITED_RETRY (1U << 31)
+
+/** Mask of valid io flags mask */
+#define SPDK_NVME_IO_FLAGS_VALID_MASK 0xFFFF0003
+#define SPDK_NVME_IO_FLAGS_CDW12_MASK 0xFFFF0000
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/nvmf.h b/src/spdk/include/spdk/nvmf.h
new file mode 100644
index 000000000..86ca574f6
--- /dev/null
+++ b/src/spdk/include/spdk/nvmf.h
@@ -0,0 +1,1048 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2018-2019 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * NVMe over Fabrics target public API
+ */
+
+#ifndef SPDK_NVMF_H
+#define SPDK_NVMF_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/env.h"
+#include "spdk/nvme.h"
+#include "spdk/nvmf_spec.h"
+#include "spdk/queue.h"
+#include "spdk/uuid.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define NVMF_TGT_NAME_MAX_LENGTH 256
+
+struct spdk_nvmf_tgt;
+struct spdk_nvmf_subsystem;
+struct spdk_nvmf_ctrlr;
+struct spdk_nvmf_qpair;
+struct spdk_nvmf_request;
+struct spdk_bdev;
+struct spdk_nvmf_request;
+struct spdk_nvmf_host;
+struct spdk_nvmf_subsystem_listener;
+struct spdk_nvmf_poll_group;
+struct spdk_json_write_ctx;
+struct spdk_nvmf_transport;
+
+struct spdk_nvmf_target_opts {
+ char name[NVMF_TGT_NAME_MAX_LENGTH];
+ uint32_t max_subsystems;
+};
+
+struct spdk_nvmf_transport_opts {
+ uint16_t max_queue_depth;
+ uint16_t max_qpairs_per_ctrlr;
+ uint32_t in_capsule_data_size;
+ uint32_t max_io_size;
+ uint32_t io_unit_size;
+ uint32_t max_aq_depth;
+ uint32_t num_shared_buffers;
+ uint32_t buf_cache_size;
+ uint32_t max_srq_depth;
+ bool no_srq;
+ bool c2h_success;
+ bool dif_insert_or_strip;
+ uint32_t sock_priority;
+ int acceptor_backlog;
+ uint32_t abort_timeout_sec;
+};
+
+struct spdk_nvmf_poll_group_stat {
+ uint32_t admin_qpairs;
+ uint32_t io_qpairs;
+ uint64_t pending_bdev_io;
+};
+
+struct spdk_nvmf_rdma_device_stat {
+ const char *name;
+ uint64_t polls;
+ uint64_t completions;
+ uint64_t requests;
+ uint64_t request_latency;
+ uint64_t pending_free_request;
+ uint64_t pending_rdma_read;
+ uint64_t pending_rdma_write;
+};
+
+struct spdk_nvmf_transport_poll_group_stat {
+ spdk_nvme_transport_type_t trtype;
+ union {
+ struct {
+ uint64_t pending_data_buffer;
+ uint64_t num_devices;
+ struct spdk_nvmf_rdma_device_stat *devices;
+ } rdma;
+ };
+};
+
+/**
+ * Function to be called once the listener is associated with a subsystem.
+ *
+ * \param ctx Context argument passed to this function.
+ * \param status 0 if it completed successfully, or negative errno if it failed.
+ */
+typedef void (*spdk_nvmf_tgt_subsystem_listen_done_fn)(void *ctx, int status);
+
+/**
+ * Construct an NVMe-oF target.
+ *
+ * \param opts a pointer to an spdk_nvmf_target_opts structure.
+ *
+ * \return a pointer to a NVMe-oF target on success, or NULL on failure.
+ */
+struct spdk_nvmf_tgt *spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts);
+
+typedef void (spdk_nvmf_tgt_destroy_done_fn)(void *ctx, int status);
+
+/**
+ * Destroy an NVMe-oF target.
+ *
+ * \param tgt The target to destroy. This releases all resources.
+ * \param cb_fn A callback that will be called once the target is destroyed
+ * \param cb_arg A context argument passed to cb_fn.
+ */
+void spdk_nvmf_tgt_destroy(struct spdk_nvmf_tgt *tgt,
+ spdk_nvmf_tgt_destroy_done_fn cb_fn,
+ void *cb_arg);
+
+/**
+ * Get the name of an NVMe-oF target.
+ *
+ * \param tgt The target from which to get the name.
+ *
+ * \return The name of the target as a null terminated string.
+ */
+const char *spdk_nvmf_tgt_get_name(struct spdk_nvmf_tgt *tgt);
+
+/**
+ * Get a pointer to an NVMe-oF target.
+ *
+ * In order to support some legacy applications and RPC methods that may rely on the
+ * concept that there is only one target, the name parameter can be passed as NULL.
+ * If there is only one available target, that target will be returned.
+ * Otherwise, name is a required parameter.
+ *
+ * \param name The name provided when the target was created.
+ *
+ * \return The target with the given name, or NULL if no match was found.
+ */
+struct spdk_nvmf_tgt *spdk_nvmf_get_tgt(const char *name);
+
+/**
+ * Get the pointer to the first NVMe-oF target.
+ *
+ * Combined with spdk_nvmf_get_next_tgt to iterate over all available targets.
+ *
+ * \return The first NVMe-oF target.
+ */
+struct spdk_nvmf_tgt *spdk_nvmf_get_first_tgt(void);
+
+/**
+ * Get the pointer to the first NVMe-oF target.
+ *
+ * Combined with spdk_nvmf_get_first_tgt to iterate over all available targets.
+ *
+ * \param prev A pointer to the last NVMe-oF target.
+ *
+ * \return The first NVMe-oF target.
+ */
+struct spdk_nvmf_tgt *spdk_nvmf_get_next_tgt(struct spdk_nvmf_tgt *prev);
+
+/**
+ * Write NVMe-oF target configuration into provided JSON context.
+ * \param w JSON write context
+ * \param tgt The NVMe-oF target
+ */
+void spdk_nvmf_tgt_write_config_json(struct spdk_json_write_ctx *w, struct spdk_nvmf_tgt *tgt);
+
+/**
+ * Begin accepting new connections at the address provided.
+ *
+ * The connections will be matched with a subsystem, which may or may not allow
+ * the connection based on a subsystem-specific whitelist. See
+ * spdk_nvmf_subsystem_add_host() and spdk_nvmf_subsystem_add_listener()
+ *
+ * \param tgt The target associated with this listen address.
+ * \param trid The address to listen at.
+ *
+ * \return 0 on success or a negated errno on failure.
+ */
+int spdk_nvmf_tgt_listen(struct spdk_nvmf_tgt *tgt,
+ struct spdk_nvme_transport_id *trid);
+
+/**
+ * Stop accepting new connections at the provided address.
+ *
+ * This is a counterpart to spdk_nvmf_tgt_listen().
+ *
+ * \param tgt The target associated with the listen address.
+ * \param trid The address to stop listening at.
+ *
+ * \return int. 0 on success or a negated errno on failure.
+ */
+int spdk_nvmf_tgt_stop_listen(struct spdk_nvmf_tgt *tgt,
+ struct spdk_nvme_transport_id *trid);
+
+/**
+ * Poll the target for incoming connections.
+ *
+ * \param tgt The target associated with the listen address.
+ */
+uint32_t spdk_nvmf_tgt_accept(struct spdk_nvmf_tgt *tgt);
+
+/**
+ * Create a poll group.
+ *
+ * \param tgt The target to create a poll group.
+ *
+ * \return a poll group on success, or NULL on failure.
+ */
+struct spdk_nvmf_poll_group *spdk_nvmf_poll_group_create(struct spdk_nvmf_tgt *tgt);
+
+/**
+ * Get optimal nvmf poll group for the qpair.
+ *
+ * \param qpair Requested qpair
+ *
+ * \return a poll group on success, or NULL on failure.
+ */
+struct spdk_nvmf_poll_group *spdk_nvmf_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair);
+
+typedef void(*spdk_nvmf_poll_group_destroy_done_fn)(void *cb_arg, int status);
+
+/**
+ * Destroy a poll group.
+ *
+ * \param group The poll group to destroy.
+ * \param cb_fn A callback that will be called once the poll group is destroyed.
+ * \param cb_arg A context argument passed to cb_fn.
+ */
+void spdk_nvmf_poll_group_destroy(struct spdk_nvmf_poll_group *group,
+ spdk_nvmf_poll_group_destroy_done_fn cb_fn,
+ void *cb_arg);
+
+/**
+ * Add the given qpair to the poll group.
+ *
+ * \param group The group to add qpair to.
+ * \param qpair The qpair to add.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_nvmf_poll_group_add(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_qpair *qpair);
+
+/**
+ * Get current poll group statistics.
+ *
+ * \param tgt The NVMf target.
+ * \param stat Pointer to allocated statistics structure to fill with values.
+ *
+ * \return 0 upon success.
+ * \return -EINVAL if either group or stat is NULL.
+ */
+int spdk_nvmf_poll_group_get_stat(struct spdk_nvmf_tgt *tgt,
+ struct spdk_nvmf_poll_group_stat *stat);
+
+typedef void (*nvmf_qpair_disconnect_cb)(void *ctx);
+
+/**
+ * Disconnect an NVMe-oF qpair
+ *
+ * \param qpair The NVMe-oF qpair to disconnect.
+ * \param cb_fn The function to call upon completion of the disconnect.
+ * \param ctx The context to pass to the callback function.
+ *
+ * \return 0 upon success.
+ * \return -ENOMEM if the function specific context could not be allocated.
+ */
+int spdk_nvmf_qpair_disconnect(struct spdk_nvmf_qpair *qpair, nvmf_qpair_disconnect_cb cb_fn,
+ void *ctx);
+
+/**
+ * Get the peer's transport ID for this queue pair.
+ *
+ * \param qpair The NVMe-oF qpair
+ * \param trid Output parameter that will contain the transport id.
+ *
+ * \return 0 for success.
+ * \return -EINVAL if the qpair is not connected.
+ */
+int spdk_nvmf_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid);
+
+/**
+ * Get the local transport ID for this queue pair.
+ *
+ * \param qpair The NVMe-oF qpair
+ * \param trid Output parameter that will contain the transport id.
+ *
+ * \return 0 for success.
+ * \return -EINVAL if the qpair is not connected.
+ */
+int spdk_nvmf_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid);
+
+/**
+ * Get the associated listener transport ID for this queue pair.
+ *
+ * \param qpair The NVMe-oF qpair
+ * \param trid Output parameter that will contain the transport id.
+ *
+ * \return 0 for success.
+ * \return -EINVAL if the qpair is not connected.
+ */
+int spdk_nvmf_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid);
+
+/**
+ * Create an NVMe-oF subsystem.
+ *
+ * Subsystems are in one of three states: Inactive, Active, Paused. This
+ * state affects which operations may be performed on the subsystem. Upon
+ * creation, the subsystem will be in the Inactive state and may be activated
+ * by calling spdk_nvmf_subsystem_start(). No I/O will be processed in the Inactive
+ * or Paused states, but changes to the state of the subsystem may be made.
+ *
+ * \param tgt The NVMe-oF target that will own this subsystem.
+ * \param nqn The NVMe qualified name of this subsystem.
+ * \param type Whether this subsystem is an I/O subsystem or a Discovery subsystem.
+ * \param num_ns The number of namespaces this subsystem contains.
+ *
+ * \return a pointer to a NVMe-oF subsystem on success, or NULL on failure.
+ */
+struct spdk_nvmf_subsystem *spdk_nvmf_subsystem_create(struct spdk_nvmf_tgt *tgt,
+ const char *nqn,
+ enum spdk_nvmf_subtype type,
+ uint32_t num_ns);
+
+/**
+ * Destroy an NVMe-oF subsystem. A subsystem may only be destroyed when in
+ * the Inactive state. See spdk_nvmf_subsystem_stop().
+ *
+ * \param subsystem The NVMe-oF subsystem to destroy.
+ */
+void spdk_nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem);
+
+/**
+ * Function to be called once the subsystem has changed state.
+ *
+ * \param subsytem NVMe-oF subsystem that has changed state.
+ * \param cb_arg Argument passed to callback function.
+ * \param status 0 if it completed successfully, or negative errno if it failed.
+ */
+typedef void (*spdk_nvmf_subsystem_state_change_done)(struct spdk_nvmf_subsystem *subsystem,
+ void *cb_arg, int status);
+
+/**
+ * Transition an NVMe-oF subsystem from Inactive to Active state.
+ *
+ * \param subsystem The NVMe-oF subsystem.
+ * \param cb_fn A function that will be called once the subsystem has changed state.
+ * \param cb_arg Argument passed to cb_fn.
+ *
+ * \return 0 on success, or negated errno on failure. The callback provided will only
+ * be called on success.
+ */
+int spdk_nvmf_subsystem_start(struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_subsystem_state_change_done cb_fn,
+ void *cb_arg);
+
+/**
+ * Transition an NVMe-oF subsystem from Active to Inactive state.
+ *
+ * \param subsystem The NVMe-oF subsystem.
+ * \param cb_fn A function that will be called once the subsystem has changed state.
+ * \param cb_arg Argument passed to cb_fn.
+ *
+ * \return 0 on success, or negated errno on failure. The callback provided will only
+ * be called on success.
+ */
+int spdk_nvmf_subsystem_stop(struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_subsystem_state_change_done cb_fn,
+ void *cb_arg);
+
+/**
+ * Transition an NVMe-oF subsystem from Active to Paused state.
+ *
+ * \param subsystem The NVMe-oF subsystem.
+ * \param cb_fn A function that will be called once the subsystem has changed state.
+ * \param cb_arg Argument passed to cb_fn.
+ *
+ * \return 0 on success, or negated errno on failure. The callback provided will only
+ * be called on success.
+ */
+int spdk_nvmf_subsystem_pause(struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_subsystem_state_change_done cb_fn,
+ void *cb_arg);
+
+/**
+ * Transition an NVMe-oF subsystem from Paused to Active state.
+ *
+ * \param subsystem The NVMe-oF subsystem.
+ * \param cb_fn A function that will be called once the subsystem has changed state.
+ * \param cb_arg Argument passed to cb_fn.
+ *
+ * \return 0 on success, or negated errno on failure. The callback provided will only
+ * be called on success.
+ */
+int spdk_nvmf_subsystem_resume(struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_subsystem_state_change_done cb_fn,
+ void *cb_arg);
+
+/**
+ * Search the target for a subsystem with the given NQN.
+ *
+ * \param tgt The NVMe-oF target to search from.
+ * \param subnqn NQN of the subsystem.
+ *
+ * \return a pointer to the NVMe-oF subsystem on success, or NULL on failure.
+ */
+struct spdk_nvmf_subsystem *spdk_nvmf_tgt_find_subsystem(struct spdk_nvmf_tgt *tgt,
+ const char *subnqn);
+
+/**
+ * Begin iterating over all known subsystems. If no subsystems are present, return NULL.
+ *
+ * \param tgt The NVMe-oF target to iterate.
+ *
+ * \return a pointer to the first NVMe-oF subsystem on success, or NULL on failure.
+ */
+struct spdk_nvmf_subsystem *spdk_nvmf_subsystem_get_first(struct spdk_nvmf_tgt *tgt);
+
+/**
+ * Continue iterating over all known subsystems. If no additional subsystems, return NULL.
+ *
+ * \param subsystem Previous subsystem returned from \ref spdk_nvmf_subsystem_get_first or
+ * \ref spdk_nvmf_subsystem_get_next.
+ *
+ * \return a pointer to the next NVMe-oF subsystem on success, or NULL on failure.
+ */
+struct spdk_nvmf_subsystem *spdk_nvmf_subsystem_get_next(struct spdk_nvmf_subsystem *subsystem);
+
+/**
+ * Allow the given host NQN to connect to the given subsystem.
+ *
+ * May only be performed on subsystems in the PAUSED or INACTIVE states.
+ *
+ * \param subsystem Subsystem to add host to.
+ * \param hostnqn The NQN for the host.
+ *
+ * \return 0 on success, or negated errno value on failure.
+ */
+int spdk_nvmf_subsystem_add_host(struct spdk_nvmf_subsystem *subsystem,
+ const char *hostnqn);
+
+/**
+ * Remove the given host NQN from the allowed hosts whitelist.
+ *
+ * May only be performed on subsystems in the PAUSED or INACTIVE states.
+ *
+ * \param subsystem Subsystem to remove host from.
+ * \param hostnqn The NQN for the host.
+ *
+ * \return 0 on success, or negated errno value on failure.
+ */
+int spdk_nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn);
+
+/**
+ * Set whether a subsystem should allow any host or only hosts in the allowed list.
+ *
+ * May only be performed on subsystems in the PAUSED or INACTIVE states.
+ *
+ * \param subsystem Subsystem to modify.
+ * \param allow_any_host true to allow any host to connect to this subsystem,
+ * or false to enforce the whitelist configured with spdk_nvmf_subsystem_add_host().
+ *
+ * \return 0 on success, or negated errno value on failure.
+ */
+int spdk_nvmf_subsystem_set_allow_any_host(struct spdk_nvmf_subsystem *subsystem,
+ bool allow_any_host);
+
+/**
+ * Check whether a subsystem should allow any host or only hosts in the allowed list.
+ *
+ * \param subsystem Subsystem to query.
+ *
+ * \return true if any host is allowed to connect to this subsystem, or false if
+ * connecting hosts must be in the whitelist configured with spdk_nvmf_subsystem_add_host().
+ */
+bool spdk_nvmf_subsystem_get_allow_any_host(const struct spdk_nvmf_subsystem *subsystem);
+
+/**
+ * Check if the given host is allowed to connect to the subsystem.
+ *
+ * \param subsystem The subsystem to query.
+ * \param hostnqn The NQN of the host.
+ *
+ * \return true if allowed, false if not.
+ */
+bool spdk_nvmf_subsystem_host_allowed(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn);
+
+/**
+ * Get the first allowed host in a subsystem.
+ *
+ * \param subsystem Subsystem to query.
+ *
+ * \return first allowed host in this subsystem, or NULL if none allowed.
+ */
+struct spdk_nvmf_host *spdk_nvmf_subsystem_get_first_host(struct spdk_nvmf_subsystem *subsystem);
+
+/**
+ * Get the next allowed host in a subsystem.
+ *
+ * \param subsystem Subsystem to query.
+ * \param prev_host Previous host returned from this function.
+ *
+ * \return next allowed host in this subsystem, or NULL if prev_host was the last host.
+ */
+struct spdk_nvmf_host *spdk_nvmf_subsystem_get_next_host(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_host *prev_host);
+
+/**
+ * Get a host's NQN.
+ *
+ * \param host Host to query.
+ *
+ * \return NQN of host.
+ */
+const char *spdk_nvmf_host_get_nqn(const struct spdk_nvmf_host *host);
+
+/**
+ * Accept new connections on the address provided.
+ *
+ * This does not start the listener. Use spdk_nvmf_tgt_listen() for that.
+ *
+ * May only be performed on subsystems in the PAUSED or INACTIVE states.
+ *
+ * \param subsystem Subsystem to add listener to.
+ * \param trid The address to accept connections from.
+ * \param cb_fn A callback that will be called once the association is complete.
+ * \param cb_arg Argument passed to cb_fn.
+ */
+void spdk_nvmf_subsystem_add_listener(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvme_transport_id *trid,
+ spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn,
+ void *cb_arg);
+
+/**
+ * Remove the listener from subsystem.
+ *
+ * New connections to the address won't be propagated to the subsystem.
+ * However to stop listening at target level one must use the
+ * spdk_nvmf_tgt_stop_listen().
+ *
+ * May only be performed on subsystems in the PAUSED or INACTIVE states.
+ *
+ * \param subsystem Subsystem to remove listener from.
+ * \param trid The address to no longer accept connections from.
+ *
+ * \return 0 on success, or negated errno value on failure.
+ */
+int spdk_nvmf_subsystem_remove_listener(struct spdk_nvmf_subsystem *subsystem,
+ const struct spdk_nvme_transport_id *trid);
+
+/**
+ * Check if connections originated from the given address are allowed to connect
+ * to the subsystem.
+ *
+ * \param subsystem The subsystem to query.
+ * \param trid The listen address.
+ *
+ * \return true if allowed, or false if not.
+ */
+bool spdk_nvmf_subsystem_listener_allowed(struct spdk_nvmf_subsystem *subsystem,
+ const struct spdk_nvme_transport_id *trid);
+
+/**
+ * Get the first allowed listen address in the subsystem.
+ *
+ * \param subsystem Subsystem to query.
+ *
+ * \return first allowed listen address in this subsystem, or NULL if none allowed.
+ */
+struct spdk_nvmf_subsystem_listener *spdk_nvmf_subsystem_get_first_listener(
+ struct spdk_nvmf_subsystem *subsystem);
+
+/**
+ * Get the next allowed listen address in a subsystem.
+ *
+ * \param subsystem Subsystem to query.
+ * \param prev_listener Previous listen address for this subsystem.
+ *
+ * \return next allowed listen address in this subsystem, or NULL if prev_listener
+ * was the last address.
+ */
+struct spdk_nvmf_subsystem_listener *spdk_nvmf_subsystem_get_next_listener(
+ struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_subsystem_listener *prev_listener);
+
+/**
+ * Get a listen address' transport ID
+ *
+ * \param listener This listener.
+ *
+ * \return the transport ID for this listener.
+ */
+const struct spdk_nvme_transport_id *spdk_nvmf_subsystem_listener_get_trid(
+ struct spdk_nvmf_subsystem_listener *listener);
+
+/**
+ * Set whether a subsystem should allow any listen address or only addresses in the allowed list.
+ *
+ * \param subsystem Subsystem to allow dynamic listener assignment.
+ * \param allow_any_listener true to allow dynamic listener assignment for
+ * this subsystem, or false to enforce the whitelist configured during
+ * subsystem setup.
+ */
+void spdk_nvmf_subsystem_allow_any_listener(
+ struct spdk_nvmf_subsystem *subsystem,
+ bool allow_any_listener);
+
+/**
+ * Check whether a subsystem allows any listen address or only addresses in the allowed list.
+ *
+ * \param subsystem Subsystem to query.
+ *
+ * \return true if this subsystem allows dynamic management of listen address list,
+ * or false if only allows addresses in the whitelist configured during subsystem setup.
+ */
+bool spdk_nvmf_subsytem_any_listener_allowed(
+ struct spdk_nvmf_subsystem *subsystem);
+
+/** NVMe-oF target namespace creation options */
+struct spdk_nvmf_ns_opts {
+ /**
+ * Namespace ID
+ *
+ * Set to 0 to automatically assign a free NSID.
+ */
+ uint32_t nsid;
+
+ /**
+ * Namespace Globally Unique Identifier
+ *
+ * Fill with 0s if not specified.
+ */
+ uint8_t nguid[16];
+
+ /**
+ * IEEE Extended Unique Identifier
+ *
+ * Fill with 0s if not specified.
+ */
+ uint8_t eui64[8];
+
+ /**
+ * Namespace UUID
+ *
+ * Fill with 0s if not specified.
+ */
+ struct spdk_uuid uuid;
+};
+
+/**
+ * Get default namespace creation options.
+ *
+ * \param opts Namespace options to fill with defaults.
+ * \param opts_size sizeof(struct spdk_nvmf_ns_opts)
+ */
+void spdk_nvmf_ns_opts_get_defaults(struct spdk_nvmf_ns_opts *opts, size_t opts_size);
+
+/**
+ * Add a namespace to a subsytem.
+ *
+ * May only be performed on subsystems in the PAUSED or INACTIVE states.
+ *
+ * \param subsystem Subsystem to add namespace to.
+ * \param bdev Block device to add as a namespace.
+ * \param opts Namespace options, or NULL to use defaults.
+ * \param opts_size sizeof(*opts)
+ * \param ptpl_file Persist through power loss file path.
+ *
+ * \return newly added NSID on success, or 0 on failure.
+ */
+uint32_t spdk_nvmf_subsystem_add_ns(struct spdk_nvmf_subsystem *subsystem, struct spdk_bdev *bdev,
+ const struct spdk_nvmf_ns_opts *opts, size_t opts_size,
+ const char *ptpl_file);
+
+/**
+ * Remove a namespace from a subsytem.
+ *
+ * May only be performed on subsystems in the PAUSED or INACTIVE states.
+ *
+ * \param subsystem Subsystem the namespace belong to.
+ * \param nsid Namespace ID to be removed.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_nvmf_subsystem_remove_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid);
+
+/**
+ * Get the first allocated namespace in a subsystem.
+ *
+ * \param subsystem Subsystem to query.
+ *
+ * \return first allocated namespace in this subsystem, or NULL if this subsystem
+ * has no namespaces.
+ */
+struct spdk_nvmf_ns *spdk_nvmf_subsystem_get_first_ns(struct spdk_nvmf_subsystem *subsystem);
+
+/**
+ * Get the next allocated namespace in a subsystem.
+ *
+ * \param subsystem Subsystem to query.
+ * \param prev_ns Previous ns returned from this function.
+ *
+ * \return next allocated namespace in this subsystem, or NULL if prev_ns was the
+ * last namespace.
+ */
+struct spdk_nvmf_ns *spdk_nvmf_subsystem_get_next_ns(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_ns *prev_ns);
+
+/**
+ * Get a namespace in a subsystem by NSID.
+ *
+ * \param subsystem Subsystem to search.
+ * \param nsid Namespace ID to find.
+ *
+ * \return namespace matching nsid, or NULL if nsid was not found.
+ */
+struct spdk_nvmf_ns *spdk_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem,
+ uint32_t nsid);
+
+/**
+ * Get the maximum number of namespaces allowed in a subsystem.
+ *
+ * \param subsystem Subsystem to query.
+ *
+ * \return Maximum number of namespaces allowed in the subsystem, or 0 for unlimited.
+ */
+uint32_t spdk_nvmf_subsystem_get_max_namespaces(const struct spdk_nvmf_subsystem *subsystem);
+
+/**
+ * Get a namespace's NSID.
+ *
+ * \param ns Namespace to query.
+ *
+ * \return NSID of ns.
+ */
+uint32_t spdk_nvmf_ns_get_id(const struct spdk_nvmf_ns *ns);
+
+/**
+ * Get a namespace's associated bdev.
+ *
+ * \param ns Namespace to query.
+ *
+ * \return backing bdev of ns.
+ */
+struct spdk_bdev *spdk_nvmf_ns_get_bdev(struct spdk_nvmf_ns *ns);
+
+/**
+ * Get the options specified for a namespace.
+ *
+ * \param ns Namespace to query.
+ * \param opts Output parameter for options.
+ * \param opts_size sizeof(*opts)
+ */
+void spdk_nvmf_ns_get_opts(const struct spdk_nvmf_ns *ns, struct spdk_nvmf_ns_opts *opts,
+ size_t opts_size);
+
+/**
+ * Get the serial number of the specified subsystem.
+ *
+ * \param subsystem Subsystem to query.
+ *
+ * \return serial number of the specified subsystem.
+ */
+const char *spdk_nvmf_subsystem_get_sn(const struct spdk_nvmf_subsystem *subsystem);
+
+
+/**
+ * Set the serial number for the specified subsystem.
+ *
+ * \param subsystem Subsystem to set for.
+ * \param sn serial number to set.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_nvmf_subsystem_set_sn(struct spdk_nvmf_subsystem *subsystem, const char *sn);
+
+/**
+ * Get the model number of the specified subsystem.
+ *
+ * \param subsystem Subsystem to query.
+ *
+ * \return model number of the specified subsystem.
+ */
+const char *spdk_nvmf_subsystem_get_mn(const struct spdk_nvmf_subsystem *subsystem);
+
+
+/**
+ * Set the model number for the specified subsystem.
+ *
+ * \param subsystem Subsystem to set for.
+ * \param mn model number to set.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_nvmf_subsystem_set_mn(struct spdk_nvmf_subsystem *subsystem, const char *mn);
+
+/**
+ * Get the NQN of the specified subsystem.
+ *
+ * \param subsystem Subsystem to query.
+ *
+ * \return NQN of the specified subsystem.
+ */
+const char *spdk_nvmf_subsystem_get_nqn(const struct spdk_nvmf_subsystem *subsystem);
+
+/**
+ * Get the type of the specified subsystem.
+ *
+ * \param subsystem Subsystem to query.
+ *
+ * \return the type of the specified subsystem.
+ */
+enum spdk_nvmf_subtype spdk_nvmf_subsystem_get_type(struct spdk_nvmf_subsystem *subsystem);
+
+/**
+ * Get maximum namespace id of the specified subsystem.
+ *
+ * \param subsystem Subsystem to query.
+ *
+ * \return maximum namespace id
+ */
+uint32_t spdk_nvmf_subsystem_get_max_nsid(struct spdk_nvmf_subsystem *subsystem);
+
+/**
+ * Initialize transport options
+ *
+ * \param transport_name The transport type to create
+ * \param opts The transport options (e.g. max_io_size)
+ *
+ * \return bool. true if successful, false if transport type
+ * not found.
+ */
+bool
+spdk_nvmf_transport_opts_init(const char *transport_name,
+ struct spdk_nvmf_transport_opts *opts);
+
+/**
+ * Create a protocol transport
+ *
+ * \param transport_name The transport type to create
+ * \param opts The transport options (e.g. max_io_size)
+ *
+ * \return new transport or NULL if create fails
+ */
+struct spdk_nvmf_transport *spdk_nvmf_transport_create(const char *transport_name,
+ struct spdk_nvmf_transport_opts *opts);
+
+/**
+ * Destroy a protocol transport
+ *
+ * \param transport The transport to destory
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_nvmf_transport_destroy(struct spdk_nvmf_transport *transport);
+
+/**
+ * Get an existing transport from the target
+ *
+ * \param tgt The NVMe-oF target
+ * \param transport_name The name of the transport type to get.
+ *
+ * \return the transport or NULL if not found
+ */
+struct spdk_nvmf_transport *spdk_nvmf_tgt_get_transport(struct spdk_nvmf_tgt *tgt,
+ const char *transport_name);
+
+/**
+ * Get the first transport registered with the given target
+ *
+ * \param tgt The NVMe-oF target
+ *
+ * \return The first transport registered on the target
+ */
+struct spdk_nvmf_transport *spdk_nvmf_transport_get_first(struct spdk_nvmf_tgt *tgt);
+
+/**
+ * Get the next transport in a target's list.
+ *
+ * \param transport A handle to a transport object
+ *
+ * \return The next transport associated with the NVMe-oF target
+ */
+struct spdk_nvmf_transport *spdk_nvmf_transport_get_next(struct spdk_nvmf_transport *transport);
+
+/**
+ * Get the opts for a given transport.
+ *
+ * \param transport The transport to query
+ *
+ * \return The opts associated with the given transport
+ */
+const struct spdk_nvmf_transport_opts *spdk_nvmf_get_transport_opts(struct spdk_nvmf_transport
+ *transport);
+
+/**
+ * Get the transport type for a given transport.
+ *
+ * \param transport The transport to query
+ *
+ * \return the transport type for the given transport
+ */
+spdk_nvme_transport_type_t spdk_nvmf_get_transport_type(struct spdk_nvmf_transport *transport);
+
+/**
+ * Get the transport name for a given transport.
+ *
+ * \param transport The transport to query
+ *
+ * \return the transport name for the given transport
+ */
+const char *spdk_nvmf_get_transport_name(struct spdk_nvmf_transport *transport);
+
+/**
+ * Function to be called once transport add is complete
+ *
+ * \param cb_arg Callback argument passed to this function.
+ * \param status 0 if it completed successfully, or negative errno if it failed.
+ */
+typedef void (*spdk_nvmf_tgt_add_transport_done_fn)(void *cb_arg, int status);
+
+/**
+ * Add a transport to a target
+ *
+ * \param tgt The NVMe-oF target
+ * \param transport The transport to add
+ * \param cb_fn A callback that will be called once the transport is created
+ * \param cb_arg A context argument passed to cb_fn.
+ *
+ * \return void. The callback status argument will be 0 on success
+ * or a negated errno on failure.
+ */
+void spdk_nvmf_tgt_add_transport(struct spdk_nvmf_tgt *tgt,
+ struct spdk_nvmf_transport *transport,
+ spdk_nvmf_tgt_add_transport_done_fn cb_fn,
+ void *cb_arg);
+
+/**
+ * Add listener to transport and begin accepting new connections.
+ *
+ * \param transport The transport to add listener to
+ * \param trid Address to listen at
+ *
+ * \return int. 0 if it completed successfully, or negative errno if it failed.
+ */
+int
+spdk_nvmf_transport_listen(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid);
+
+/**
+ * Remove listener from transport and stop accepting new connections.
+ *
+ * \param transport The transport to remove listener from
+ * \param trid Address to stop listen at
+ *
+ * \return int. 0 if it completed successfully, or negative errno if it failed.
+ */
+int
+spdk_nvmf_transport_stop_listen(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid);
+
+/**
+ * \brief Get current transport poll group statistics.
+ *
+ * This function allocates memory for statistics and returns it
+ * in \p stat parameter. Caller must free this memory with
+ * spdk_nvmf_transport_poll_group_free_stat() when it is not needed
+ * anymore.
+ *
+ * \param tgt The NVMf target.
+ * \param transport The NVMf transport.
+ * \param stat Output parameter that will contain pointer to allocated statistics structure.
+ *
+ * \return 0 upon success.
+ * \return -ENOTSUP if transport does not support statistics.
+ * \return -EINVAL if any of parameters is NULL.
+ * \return -ENOENT if transport poll group is not found.
+ * \return -ENOMEM if memory allocation failed.
+ */
+int
+spdk_nvmf_transport_poll_group_get_stat(struct spdk_nvmf_tgt *tgt,
+ struct spdk_nvmf_transport *transport,
+ struct spdk_nvmf_transport_poll_group_stat **stat);
+
+/**
+ * Free statistics memory previously allocated with spdk_nvmf_transport_poll_group_get_stat().
+ *
+ * \param transport The NVMf transport.
+ * \param stat Pointer to transport poll group statistics structure.
+ */
+void
+spdk_nvmf_transport_poll_group_free_stat(struct spdk_nvmf_transport *transport,
+ struct spdk_nvmf_transport_poll_group_stat *stat);
+
+/**
+ * \brief Set the global hooks for the RDMA transport, if necessary.
+ *
+ * This call is optional and must be performed prior to probing for
+ * any devices. By default, the RDMA transport will use the ibverbs
+ * library to create protection domains and register memory. This
+ * is a mechanism to subvert that and use an existing registration.
+ *
+ * This function may only be called one time per process.
+ *
+ * \param hooks for initializing global hooks
+ */
+void spdk_nvmf_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/nvmf_cmd.h b/src/spdk/include/spdk/nvmf_cmd.h
new file mode 100644
index 000000000..6cbac7de0
--- /dev/null
+++ b/src/spdk/include/spdk/nvmf_cmd.h
@@ -0,0 +1,226 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_NVMF_CMD_H_
+#define SPDK_NVMF_CMD_H_
+
+#include "spdk/stdinc.h"
+#include "spdk/nvmf.h"
+#include "spdk/bdev.h"
+
+enum spdk_nvmf_request_exec_status {
+ SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE,
+ SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS,
+};
+
+/**
+ * Fills the identify controller attributes for the specified conroller
+ *
+ * \param ctrlr The NVMe-oF controller
+ * \param cdata The filled in identify controller attributes
+ * \return \ref spdk_nvmf_request_exec_status
+ */
+int spdk_nvmf_ctrlr_identify_ctrlr(struct spdk_nvmf_ctrlr *ctrlr,
+ struct spdk_nvme_ctrlr_data *cdata);
+
+/**
+ * Fills the identify namespace attributes for the specified conroller
+ *
+ * \param ctrlr The NVMe-oF controller
+ * \param cmd The NVMe command
+ * \param rsp The NVMe command completion
+ * \param nsdata The filled in identify namespace attributes
+ * \return \ref spdk_nvmf_request_exec_status
+ */
+int spdk_nvmf_ctrlr_identify_ns(struct spdk_nvmf_ctrlr *ctrlr,
+ struct spdk_nvme_cmd *cmd,
+ struct spdk_nvme_cpl *rsp,
+ struct spdk_nvme_ns_data *nsdata);
+
+/**
+ * Callback function definition for a custom admin command handler.
+ *
+ * A function of this type is passed to \ref spdk_nvmf_set_custom_admin_cmd_hdlr.
+ * It is called for every admin command that is processed by the NVMe-oF subsystem.
+ * If the function handled the admin command then it must return a value from
+ * \ref spdk_nvmf_request_exec_status. If the function did not handle the
+ * admin command then it should return -1. In this case the SPDK default admin
+ * command processing is applied to the request.
+ *
+ * \param req The NVMe-oF request of the admin command that is currently
+ * processed
+ * \return \ref spdk_nvmf_request_exec_status if the command has been handled
+ * by the handler or -1 if the command wasn't handled
+ */
+typedef int (*spdk_nvmf_custom_cmd_hdlr)(struct spdk_nvmf_request *req);
+
+/**
+ * Installs a custom admin command handler.
+ *
+ * \param opc NVMe admin command OPC for which the handler should be installed.
+ * \param hdlr The handler function. See \ref spdk_nvmf_custom_cmd_hdlr.
+ */
+void spdk_nvmf_set_custom_admin_cmd_hdlr(uint8_t opc, spdk_nvmf_custom_cmd_hdlr hdlr);
+
+/**
+ * Forward an NVMe admin command to a namespace
+ *
+ * This function forwards all NVMe admin commands of value opc to the specified
+ * namespace id.
+ * If forward_nsid is 0, the command is sent to the namespace that was specified in the
+ * original command.
+ *
+ * \param opc - NVMe admin command OPC
+ * \param forward_nsid - nsid or 0
+ */
+void spdk_nvmf_set_passthru_admin_cmd(uint8_t opc, uint32_t forward_nsid);
+
+/**
+ * Callback function that is called right before the admin command reply
+ * is sent back to the inititator.
+ *
+ * \param req The NVMe-oF request
+ */
+typedef void (*spdk_nvmf_nvme_passthru_cmd_cb)(struct spdk_nvmf_request *req);
+
+/**
+ * Submits the NVMe-oF request to a bdev.
+ *
+ * This function can be used in a custom admin handler to send the command contained
+ * in the req to a bdev. Once the bdev completes the command, the specified cb_fn
+ * is called (which can be NULL if not needed).
+ *
+ * \param bdev The \ref spdk_bdev
+ * \param desc The \ref spdk_bdev_desc
+ * \param ch The \ref spdk_io_channel
+ * \param req The \ref spdk_nvmf_request passed to the bdev for processing
+ * \param cb_fn A callback function (or NULL) that is called before the request
+ * is completed.
+ *
+ * \return A \ref spdk_nvmf_request_exec_status
+ */
+int spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req, spdk_nvmf_nvme_passthru_cmd_cb cb_fn);
+
+/**
+ * Attempts to abort a request in the specified bdev
+ *
+ * \param bdev Bdev that is processing req_to_abort
+ * \param desc Bdev desc
+ * \param ch Channel on which req_to_abort was originally submitted
+ * \param req Abort cmd req
+ * \param req_to_abort The request that should be aborted
+ */
+int spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
+ struct spdk_nvmf_request *req_to_abort);
+
+/**
+ * Provide access to the underlying bdev that is associated with a namespace.
+ *
+ * This function can be used to communicate with the bdev. For example,
+ * a \ref spdk_nvmf_custom_admin_cmd_hdlr can use \ref spdk_nvmf_bdev_nvme_passthru_admin
+ * to pass on a \ref spdk_nvmf_request to a NVMe bdev.
+ *
+ * \param nsid The namespace id of a namespace that is valid for the
+ * underlying subsystem
+ * \param req The NVMe-oF request that is being processed
+ * \param bdev Returns the \ref spdk_bdev corresponding to the namespace id
+ * \param desc Returns the \ref spdk_bdev_desc corresponding to the namespace id
+ * \param ch Returns the \ref spdk_io_channel corresponding to the namespace id
+ *
+ * \return 0 upon success
+ * \return -EINVAL if the namespace id can't be found
+ */
+int spdk_nvmf_request_get_bdev(uint32_t nsid,
+ struct spdk_nvmf_request *req,
+ struct spdk_bdev **bdev,
+ struct spdk_bdev_desc **desc,
+ struct spdk_io_channel **ch);
+
+/**
+ * Get the NVMe-oF controller associated with this request.
+ *
+ * \param req The NVMe-oF request
+ *
+ * \return The NVMe-oF controller
+ */
+struct spdk_nvmf_ctrlr *spdk_nvmf_request_get_ctrlr(struct spdk_nvmf_request *req);
+
+/**
+ * Get the NVMe-oF subsystem associated with this request.
+ *
+ * \param req The NVMe-oF request
+ *
+ * \return The NVMe-oF subsystem
+ */
+struct spdk_nvmf_subsystem *spdk_nvmf_request_get_subsystem(struct spdk_nvmf_request *req);
+
+/**
+ * Get the data and length associated with this request.
+ *
+ * \param req The NVMe-oF request
+ * \param data The data buffer associated with this request
+ * \param length The length of the data buffer
+ */
+void spdk_nvmf_request_get_data(struct spdk_nvmf_request *req, void **data, uint32_t *length);
+
+/**
+ * Get the NVMe-oF command associated with this request.
+ *
+ * \param req The NVMe-oF request
+ *
+ * \return The NVMe command
+ */
+struct spdk_nvme_cmd *spdk_nvmf_request_get_cmd(struct spdk_nvmf_request *req);
+
+/**
+ * Get the NVMe-oF completion associated with this request.
+ *
+ * \param req The NVMe-oF request
+ *
+ * \return The NVMe completion
+ */
+struct spdk_nvme_cpl *spdk_nvmf_request_get_response(struct spdk_nvmf_request *req);
+
+/**
+ * Get the request to abort that is associated with this request.
+ * The req to abort is only set if the request processing a SPDK_NVME_OPC_ABORT cmd
+ *
+ * \param req The NVMe-oF abort request
+ *
+ * \return req_to_abort The NVMe-oF request that is in process of being aborted
+ */
+struct spdk_nvmf_request *spdk_nvmf_request_get_req_to_abort(struct spdk_nvmf_request *req);
+
+#endif /* SPDK_NVMF_CMD_H_ */
diff --git a/src/spdk/include/spdk/nvmf_fc_spec.h b/src/spdk/include/spdk/nvmf_fc_spec.h
new file mode 100644
index 000000000..0a3234249
--- /dev/null
+++ b/src/spdk/include/spdk/nvmf_fc_spec.h
@@ -0,0 +1,411 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (c) 2018-2019 Broadcom. All Rights Reserved.
+ * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __NVMF_FC_SPEC_H__
+#define __NVMF_FC_SPEC_H__
+
+#include "spdk/env.h"
+#include "spdk/nvme.h"
+
+/*
+ * FC-NVMe Spec. Definitions
+ */
+
+#define FCNVME_R_CTL_CMD_REQ 0x06
+#define FCNVME_R_CTL_DATA_OUT 0x01
+#define FCNVME_R_CTL_CONFIRM 0x03
+#define FCNVME_R_CTL_STATUS 0x07
+#define FCNVME_R_CTL_ERSP_STATUS 0x08
+#define FCNVME_R_CTL_LS_REQUEST 0x32
+#define FCNVME_R_CTL_LS_RESPONSE 0x33
+#define FCNVME_R_CTL_BA_ABTS 0x81
+
+#define FCNVME_F_CTL_END_SEQ 0x080000
+#define FCNVME_F_CTL_SEQ_INIT 0x010000
+
+/* END_SEQ | LAST_SEQ | Exchange Responder | SEQ init */
+#define FCNVME_F_CTL_RSP 0x990000
+
+#define FCNVME_TYPE_BLS 0x0
+#define FCNVME_TYPE_FC_EXCHANGE 0x08
+#define FCNVME_TYPE_NVMF_DATA 0x28
+
+#define FCNVME_CMND_IU_FC_ID 0x28
+#define FCNVME_CMND_IU_SCSI_ID 0xFD
+
+#define FCNVME_CMND_IU_NODATA 0x00
+#define FCNVME_CMND_IU_READ 0x10
+#define FCNVME_CMND_IU_WRITE 0x01
+
+/* BLS reject error codes */
+#define FCNVME_BLS_REJECT_UNABLE_TO_PERFORM 0x09
+#define FCNVME_BLS_REJECT_EXP_NOINFO 0x00
+#define FCNVME_BLS_REJECT_EXP_INVALID_OXID 0x03
+
+/*
+ * FC NVMe Link Services (LS) constants
+ */
+#define FCNVME_MAX_LS_REQ_SIZE 1536
+#define FCNVME_MAX_LS_RSP_SIZE 64
+
+#define FCNVME_LS_CA_CMD_MIN_LEN 592
+#define FCNVME_LS_CA_DESC_LIST_MIN_LEN 584
+#define FCNVME_LS_CA_DESC_MIN_LEN 576
+
+/* this value needs to be in sync with low level driver buffer size */
+#define FCNVME_MAX_LS_BUFFER_SIZE 2048
+
+#define FCNVME_GOOD_RSP_LEN 12
+#define FCNVME_ASSOC_HOSTID_LEN 16
+
+
+typedef uint64_t FCNVME_BE64;
+typedef uint32_t FCNVME_BE32;
+typedef uint16_t FCNVME_BE16;
+
+/*
+ * FC-NVME LS Commands
+ */
+enum {
+ FCNVME_LS_RSVD = 0,
+ FCNVME_LS_RJT = 1,
+ FCNVME_LS_ACC = 2,
+ FCNVME_LS_CREATE_ASSOCIATION = 3,
+ FCNVME_LS_CREATE_CONNECTION = 4,
+ FCNVME_LS_DISCONNECT = 5,
+};
+
+/*
+ * FC-NVME Link Service Descriptors
+ */
+enum {
+ FCNVME_LSDESC_RSVD = 0x0,
+ FCNVME_LSDESC_RQST = 0x1,
+ FCNVME_LSDESC_RJT = 0x2,
+ FCNVME_LSDESC_CREATE_ASSOC_CMD = 0x3,
+ FCNVME_LSDESC_CREATE_CONN_CMD = 0x4,
+ FCNVME_LSDESC_DISCONN_CMD = 0x5,
+ FCNVME_LSDESC_CONN_ID = 0x6,
+ FCNVME_LSDESC_ASSOC_ID = 0x7,
+};
+
+/*
+ * LS Reject reason_codes
+ */
+enum fcnvme_ls_rjt_reason {
+ FCNVME_RJT_RC_NONE = 0, /* no reason - not to be sent */
+ FCNVME_RJT_RC_INVAL = 0x01, /* invalid NVMe_LS command code */
+ FCNVME_RJT_RC_LOGIC = 0x03, /* logical error */
+ FCNVME_RJT_RC_UNAB = 0x09, /* unable to perform request */
+ FCNVME_RJT_RC_UNSUP = 0x0b, /* command not supported */
+ FCNVME_RJT_RC_INPROG = 0x0e, /* command already in progress */
+ FCNVME_RJT_RC_INV_ASSOC = 0x40, /* invalid Association ID */
+ FCNVME_RJT_RC_INV_CONN = 0x41, /* invalid Connection ID */
+ FCNVME_RJT_RC_INV_PARAM = 0x42, /* invalid parameters */
+ FCNVME_RJT_RC_INSUFF_RES = 0x43, /* insufficient resources */
+ FCNVME_RJT_RC_INV_HOST = 0x44, /* invalid or rejected host */
+ FCNVME_RJT_RC_VENDOR = 0xff, /* vendor specific error */
+};
+
+/*
+ * LS Reject reason_explanation codes
+ */
+enum fcnvme_ls_rjt_explan {
+ FCNVME_RJT_EXP_NONE = 0x00, /* No additional explanation */
+ FCNVME_RJT_EXP_OXID_RXID = 0x17, /* invalid OX_ID-RX_ID combo */
+ FCNVME_RJT_EXP_UNAB_DATA = 0x2a, /* unable to supply data */
+ FCNVME_RJT_EXP_INV_LEN = 0x2d, /* invalid payload length */
+ FCNVME_RJT_EXP_INV_ESRP = 0x40, /* invalid ESRP ratio */
+ FCNVME_RJT_EXP_INV_CTL_ID = 0x41, /* invalid controller ID */
+ FCNVME_RJT_EXP_INV_Q_ID = 0x42, /* invalid queue ID */
+ FCNVME_RJT_EXP_SQ_SIZE = 0x43, /* invalid submission queue size */
+ FCNVME_RJT_EXP_INV_HOST_ID = 0x44, /* invalid or rejected host ID */
+ FCNVME_RJT_EXP_INV_HOSTNQN = 0x45, /* invalid or rejected host NQN */
+ FCNVME_RJT_EXP_INV_SUBNQN = 0x46, /* invalid or rejected subsys nqn */
+};
+
+/*
+ * NVMe over FC CMD IU
+ */
+struct spdk_nvmf_fc_cmnd_iu {
+ uint32_t scsi_id: 8,
+ fc_id: 8,
+ cmnd_iu_len: 16;
+ uint32_t rsvd0: 24,
+ flags: 8;
+ uint64_t conn_id;
+ uint32_t cmnd_seq_num;
+ uint32_t data_len;
+ struct spdk_nvme_cmd cmd;
+ uint32_t rsvd1[2];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_cmnd_iu) == 96, "size_mismatch");
+
+/*
+ * NVMe over Extended Response IU
+ */
+struct spdk_nvmf_fc_ersp_iu {
+ uint32_t status_code: 8,
+ rsvd0: 8,
+ ersp_len: 16;
+ uint32_t response_seq_no;
+ uint32_t transferred_data_len;
+ uint32_t rsvd1;
+ struct spdk_nvme_cpl rsp;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ersp_iu) == 32, "size_mismatch");
+
+/*
+ * Transfer ready IU
+ */
+struct spdk_nvmf_fc_xfer_rdy_iu {
+ uint32_t relative_offset;
+ uint32_t burst_len;
+ uint32_t rsvd;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_xfer_rdy_iu) == 12, "size_mismatch");
+
+/*
+ * FC NVME Frame Header
+ */
+struct spdk_nvmf_fc_frame_hdr {
+ FCNVME_BE32 r_ctl: 8,
+ d_id: 24;
+ FCNVME_BE32 cs_ctl: 8,
+ s_id: 24;
+ FCNVME_BE32 type: 8,
+ f_ctl: 24;
+ FCNVME_BE32 seq_id: 8,
+ df_ctl: 8,
+ seq_cnt: 16;
+ FCNVME_BE32 ox_id: 16,
+ rx_id: 16;
+ FCNVME_BE32 parameter;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_frame_hdr) == 24, "size_mismatch");
+
+/*
+ * Request payload word 0
+ */
+struct spdk_nvmf_fc_ls_rqst_w0 {
+ uint8_t ls_cmd; /* FCNVME_LS_xxx */
+ uint8_t zeros[3];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_rqst_w0) == 4, "size_mismatch");
+
+/*
+ * LS request information descriptor
+ */
+struct spdk_nvmf_fc_lsdesc_rqst {
+ FCNVME_BE32 desc_tag; /* FCNVME_LSDESC_xxx */
+ FCNVME_BE32 desc_len;
+ struct spdk_nvmf_fc_ls_rqst_w0 w0;
+ FCNVME_BE32 rsvd12;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_lsdesc_rqst) == 16, "size_mismatch");
+
+/*
+ * LS accept header
+ */
+struct spdk_nvmf_fc_ls_acc_hdr {
+ struct spdk_nvmf_fc_ls_rqst_w0 w0;
+ FCNVME_BE32 desc_list_len;
+ struct spdk_nvmf_fc_lsdesc_rqst rqst;
+ /* Followed by cmd-specific ACC descriptors, see next definitions */
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_acc_hdr) == 24, "size_mismatch");
+
+/*
+ * LS descriptor connection id
+ */
+struct spdk_nvmf_fc_lsdesc_conn_id {
+ FCNVME_BE32 desc_tag;
+ FCNVME_BE32 desc_len;
+ FCNVME_BE64 connection_id;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_lsdesc_conn_id) == 16, "size_mismatch");
+
+/*
+ * LS decriptor association id
+ */
+struct spdk_nvmf_fc_lsdesc_assoc_id {
+ FCNVME_BE32 desc_tag;
+ FCNVME_BE32 desc_len;
+ FCNVME_BE64 association_id;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_lsdesc_assoc_id) == 16, "size_mismatch");
+
+/*
+ * LS Create Association descriptor
+ */
+struct spdk_nvmf_fc_lsdesc_cr_assoc_cmd {
+ FCNVME_BE32 desc_tag;
+ FCNVME_BE32 desc_len;
+ FCNVME_BE16 ersp_ratio;
+ FCNVME_BE16 rsvd10;
+ FCNVME_BE32 rsvd12[9];
+ FCNVME_BE16 cntlid;
+ FCNVME_BE16 sqsize;
+ FCNVME_BE32 rsvd52;
+ uint8_t hostid[FCNVME_ASSOC_HOSTID_LEN];
+ uint8_t hostnqn[SPDK_NVME_NQN_FIELD_SIZE];
+ uint8_t subnqn[SPDK_NVME_NQN_FIELD_SIZE];
+ uint8_t rsvd584[432];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_lsdesc_cr_assoc_cmd) == 1016, "size_mismatch");
+
+/*
+ * LS Create Association reqeust payload
+ */
+struct spdk_nvmf_fc_ls_cr_assoc_rqst {
+ struct spdk_nvmf_fc_ls_rqst_w0 w0;
+ FCNVME_BE32 desc_list_len;
+ struct spdk_nvmf_fc_lsdesc_cr_assoc_cmd assoc_cmd;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_cr_assoc_rqst) == 1024, "size_mismatch");
+
+/*
+ * LS Create Association accept payload
+ */
+struct spdk_nvmf_fc_ls_cr_assoc_acc {
+ struct spdk_nvmf_fc_ls_acc_hdr hdr;
+ struct spdk_nvmf_fc_lsdesc_assoc_id assoc_id;
+ struct spdk_nvmf_fc_lsdesc_conn_id conn_id;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_cr_assoc_acc) == 56, "size_mismatch");
+
+/*
+ * LS Create IO Connection descriptor
+ */
+struct spdk_nvmf_fc_lsdesc_cr_conn_cmd {
+ FCNVME_BE32 desc_tag;
+ FCNVME_BE32 desc_len;
+ FCNVME_BE16 ersp_ratio;
+ FCNVME_BE16 rsvd10;
+ FCNVME_BE32 rsvd12[9];
+ FCNVME_BE16 qid;
+ FCNVME_BE16 sqsize;
+ FCNVME_BE32 rsvd52;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_cr_assoc_acc) == 56, "size_mismatch");
+
+/*
+ * LS Create IO Connection payload
+ */
+struct spdk_nvmf_fc_ls_cr_conn_rqst {
+ struct spdk_nvmf_fc_ls_rqst_w0 w0;
+ FCNVME_BE32 desc_list_len;
+ struct spdk_nvmf_fc_lsdesc_assoc_id assoc_id;
+ struct spdk_nvmf_fc_lsdesc_cr_conn_cmd connect_cmd;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_cr_conn_rqst) == 80, "size_mismatch");
+
+/*
+ * LS Create IO Connection accept payload
+ */
+struct spdk_nvmf_fc_ls_cr_conn_acc {
+ struct spdk_nvmf_fc_ls_acc_hdr hdr;
+ struct spdk_nvmf_fc_lsdesc_conn_id conn_id;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_cr_conn_acc) == 40, "size_mismatch");
+
+/*
+ * LS Disconnect descriptor
+ */
+struct spdk_nvmf_fc_lsdesc_disconn_cmd {
+ FCNVME_BE32 desc_tag;
+ FCNVME_BE32 desc_len;
+ FCNVME_BE32 rsvd8;
+ FCNVME_BE32 rsvd12;
+ FCNVME_BE32 rsvd16;
+ FCNVME_BE32 rsvd20;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_lsdesc_disconn_cmd) == 24, "size_mismatch");
+
+/*
+ * LS Disconnect payload
+ */
+struct spdk_nvmf_fc_ls_disconnect_rqst {
+ struct spdk_nvmf_fc_ls_rqst_w0 w0;
+ FCNVME_BE32 desc_list_len;
+ struct spdk_nvmf_fc_lsdesc_assoc_id assoc_id;
+ struct spdk_nvmf_fc_lsdesc_disconn_cmd disconn_cmd;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_disconnect_rqst) == 48, "size_mismatch");
+
+/*
+ * LS Disconnect accept payload
+ */
+struct spdk_nvmf_fc_ls_disconnect_acc {
+ struct spdk_nvmf_fc_ls_acc_hdr hdr;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_disconnect_acc) == 24, "size_mismatch");
+
+/*
+ * LS Reject descriptor
+ */
+struct spdk_nvmf_fc_lsdesc_rjt {
+ FCNVME_BE32 desc_tag;
+ FCNVME_BE32 desc_len;
+ uint8_t rsvd8;
+
+ uint8_t reason_code;
+ uint8_t reason_explanation;
+
+ uint8_t vendor;
+ FCNVME_BE32 rsvd12;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_lsdesc_rjt) == 16, "size_mismatch");
+
+/*
+ * LS Reject payload
+ */
+struct spdk_nvmf_fc_ls_rjt {
+ struct spdk_nvmf_fc_ls_rqst_w0 w0;
+ FCNVME_BE32 desc_list_len;
+ struct spdk_nvmf_fc_lsdesc_rqst rqst;
+ struct spdk_nvmf_fc_lsdesc_rjt rjt;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_rjt) == 40, "size_mismatch");
+
+/*
+ * FC World Wide Name
+ */
+struct spdk_nvmf_fc_wwn {
+ union {
+ uint64_t wwn; /* World Wide Names consist of eight bytes */
+ uint8_t octets[sizeof(uint64_t)];
+ } u;
+};
+
+#endif
diff --git a/src/spdk/include/spdk/nvmf_spec.h b/src/spdk/include/spdk/nvmf_spec.h
new file mode 100644
index 000000000..de49feef9
--- /dev/null
+++ b/src/spdk/include/spdk/nvmf_spec.h
@@ -0,0 +1,733 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_NVMF_SPEC_H
+#define SPDK_NVMF_SPEC_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/assert.h"
+#include "spdk/nvme_spec.h"
+
+/**
+ * \file
+ * NVMe over Fabrics specification definitions
+ */
+
+#pragma pack(push, 1)
+
+struct spdk_nvmf_capsule_cmd {
+ uint8_t opcode;
+ uint8_t reserved1;
+ uint16_t cid;
+ uint8_t fctype;
+ uint8_t reserved2[35];
+ uint8_t fabric_specific[24];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_capsule_cmd) == 64, "Incorrect size");
+
+/* Fabric Command Set */
+#define SPDK_NVME_OPC_FABRIC 0x7f
+
+enum spdk_nvmf_fabric_cmd_types {
+ SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET = 0x00,
+ SPDK_NVMF_FABRIC_COMMAND_CONNECT = 0x01,
+ SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET = 0x04,
+ SPDK_NVMF_FABRIC_COMMAND_AUTHENTICATION_SEND = 0x05,
+ SPDK_NVMF_FABRIC_COMMAND_AUTHENTICATION_RECV = 0x06,
+ SPDK_NVMF_FABRIC_COMMAND_START_VENDOR_SPECIFIC = 0xC0,
+};
+
+enum spdk_nvmf_fabric_cmd_status_code {
+ SPDK_NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT = 0x80,
+ SPDK_NVMF_FABRIC_SC_CONTROLLER_BUSY = 0x81,
+ SPDK_NVMF_FABRIC_SC_INVALID_PARAM = 0x82,
+ SPDK_NVMF_FABRIC_SC_RESTART_DISCOVERY = 0x83,
+ SPDK_NVMF_FABRIC_SC_INVALID_HOST = 0x84,
+ SPDK_NVMF_FABRIC_SC_LOG_RESTART_DISCOVERY = 0x90,
+ SPDK_NVMF_FABRIC_SC_AUTH_REQUIRED = 0x91,
+};
+
+/**
+ * RDMA Queue Pair service types
+ */
+enum spdk_nvmf_rdma_qptype {
+ /** Reliable connected */
+ SPDK_NVMF_RDMA_QPTYPE_RELIABLE_CONNECTED = 0x1,
+
+ /** Reliable datagram */
+ SPDK_NVMF_RDMA_QPTYPE_RELIABLE_DATAGRAM = 0x2,
+};
+
+/**
+ * RDMA provider types
+ */
+enum spdk_nvmf_rdma_prtype {
+ /** No provider specified */
+ SPDK_NVMF_RDMA_PRTYPE_NONE = 0x1,
+
+ /** InfiniBand */
+ SPDK_NVMF_RDMA_PRTYPE_IB = 0x2,
+
+ /** RoCE v1 */
+ SPDK_NVMF_RDMA_PRTYPE_ROCE = 0x3,
+
+ /** RoCE v2 */
+ SPDK_NVMF_RDMA_PRTYPE_ROCE2 = 0x4,
+
+ /** iWARP */
+ SPDK_NVMF_RDMA_PRTYPE_IWARP = 0x5,
+};
+
+/**
+ * RDMA connection management service types
+ */
+enum spdk_nvmf_rdma_cms {
+ /** Sockets based endpoint addressing */
+ SPDK_NVMF_RDMA_CMS_RDMA_CM = 0x1,
+};
+
+/**
+ * NVMe over Fabrics transport types
+ */
+enum spdk_nvmf_trtype {
+ /** RDMA */
+ SPDK_NVMF_TRTYPE_RDMA = 0x1,
+
+ /** Fibre Channel */
+ SPDK_NVMF_TRTYPE_FC = 0x2,
+
+ /** TCP */
+ SPDK_NVMF_TRTYPE_TCP = 0x3,
+
+ /** Intra-host transport (loopback) */
+ SPDK_NVMF_TRTYPE_INTRA_HOST = 0xfe,
+};
+
+/**
+ * Address family types
+ */
+enum spdk_nvmf_adrfam {
+ /** IPv4 (AF_INET) */
+ SPDK_NVMF_ADRFAM_IPV4 = 0x1,
+
+ /** IPv6 (AF_INET6) */
+ SPDK_NVMF_ADRFAM_IPV6 = 0x2,
+
+ /** InfiniBand (AF_IB) */
+ SPDK_NVMF_ADRFAM_IB = 0x3,
+
+ /** Fibre Channel address family */
+ SPDK_NVMF_ADRFAM_FC = 0x4,
+
+ /** Intra-host transport (loopback) */
+ SPDK_NVMF_ADRFAM_INTRA_HOST = 0xfe,
+};
+
+/**
+ * NVM subsystem types
+ */
+enum spdk_nvmf_subtype {
+ /** Discovery type for NVM subsystem */
+ SPDK_NVMF_SUBTYPE_DISCOVERY = 0x1,
+
+ /** NVMe type for NVM subsystem */
+ SPDK_NVMF_SUBTYPE_NVME = 0x2,
+};
+
+/**
+ * Connections shall be made over a fabric secure channel
+ */
+enum spdk_nvmf_treq_secure_channel {
+ /** Not specified */
+ SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_SPECIFIED = 0x0,
+
+ /** Required */
+ SPDK_NVMF_TREQ_SECURE_CHANNEL_REQUIRED = 0x1,
+
+ /** Not required */
+ SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED = 0x2,
+};
+
+struct spdk_nvmf_fabric_auth_recv_cmd {
+ uint8_t opcode;
+ uint8_t reserved1;
+ uint16_t cid;
+ uint8_t fctype; /* NVMF_FABRIC_COMMAND_AUTHENTICATION_RECV (0x06) */
+ uint8_t reserved2[19];
+ struct spdk_nvme_sgl_descriptor sgl1;
+ uint8_t reserved3;
+ uint8_t spsp0;
+ uint8_t spsp1;
+ uint8_t secp;
+ uint32_t al;
+ uint8_t reserved4[16];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_auth_recv_cmd) == 64, "Incorrect size");
+
+struct spdk_nvmf_fabric_auth_send_cmd {
+ uint8_t opcode;
+ uint8_t reserved1;
+ uint16_t cid;
+ uint8_t fctype; /* NVMF_FABRIC_COMMAND_AUTHENTICATION_SEND (0x05) */
+ uint8_t reserved2[19];
+ struct spdk_nvme_sgl_descriptor sgl1;
+ uint8_t reserved3;
+ uint8_t spsp0;
+ uint8_t spsp1;
+ uint8_t secp;
+ uint32_t tl;
+ uint8_t reserved4[16];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_auth_send_cmd) == 64, "Incorrect size");
+
+struct spdk_nvmf_fabric_connect_data {
+ uint8_t hostid[16];
+ uint16_t cntlid;
+ uint8_t reserved5[238];
+ uint8_t subnqn[SPDK_NVME_NQN_FIELD_SIZE];
+ uint8_t hostnqn[SPDK_NVME_NQN_FIELD_SIZE];
+ uint8_t reserved6[256];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_connect_data) == 1024, "Incorrect size");
+
+struct spdk_nvmf_fabric_connect_cmd {
+ uint8_t opcode;
+ uint8_t reserved1;
+ uint16_t cid;
+ uint8_t fctype;
+ uint8_t reserved2[19];
+ struct spdk_nvme_sgl_descriptor sgl1;
+ uint16_t recfmt; /* Connect Record Format */
+ uint16_t qid; /* Queue Identifier */
+ uint16_t sqsize; /* Submission Queue Size */
+ uint8_t cattr; /* queue attributes */
+ uint8_t reserved3;
+ uint32_t kato; /* keep alive timeout */
+ uint8_t reserved4[12];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_connect_cmd) == 64, "Incorrect size");
+
+struct spdk_nvmf_fabric_connect_rsp {
+ union {
+ struct {
+ uint16_t cntlid;
+ uint16_t authreq;
+ } success;
+
+ struct {
+ uint16_t ipo;
+ uint8_t iattr;
+ uint8_t reserved;
+ } invalid;
+
+ uint32_t raw;
+ } status_code_specific;
+
+ uint32_t reserved0;
+ uint16_t sqhd;
+ uint16_t reserved1;
+ uint16_t cid;
+ struct spdk_nvme_status status;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_connect_rsp) == 16, "Incorrect size");
+
+#define SPDK_NVMF_PROP_SIZE_4 0
+#define SPDK_NVMF_PROP_SIZE_8 1
+
+struct spdk_nvmf_fabric_prop_get_cmd {
+ uint8_t opcode;
+ uint8_t reserved1;
+ uint16_t cid;
+ uint8_t fctype;
+ uint8_t reserved2[35];
+ struct {
+ uint8_t size : 3;
+ uint8_t reserved : 5;
+ } attrib;
+ uint8_t reserved3[3];
+ uint32_t ofst;
+ uint8_t reserved4[16];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_prop_get_cmd) == 64, "Incorrect size");
+
+struct spdk_nvmf_fabric_prop_get_rsp {
+ union {
+ uint64_t u64;
+ struct {
+ uint32_t low;
+ uint32_t high;
+ } u32;
+ } value;
+
+ uint16_t sqhd;
+ uint16_t reserved0;
+ uint16_t cid;
+ struct spdk_nvme_status status;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_prop_get_rsp) == 16, "Incorrect size");
+
+struct spdk_nvmf_fabric_prop_set_cmd {
+ uint8_t opcode;
+ uint8_t reserved0;
+ uint16_t cid;
+ uint8_t fctype;
+ uint8_t reserved1[35];
+ struct {
+ uint8_t size : 3;
+ uint8_t reserved : 5;
+ } attrib;
+ uint8_t reserved2[3];
+ uint32_t ofst;
+
+ union {
+ uint64_t u64;
+ struct {
+ uint32_t low;
+ uint32_t high;
+ } u32;
+ } value;
+
+ uint8_t reserved4[8];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_prop_set_cmd) == 64, "Incorrect size");
+
+#define SPDK_NVMF_NQN_MIN_LEN 11 /* The prefix in the spec is 11 characters */
+#define SPDK_NVMF_NQN_MAX_LEN 223
+#define SPDK_NVMF_NQN_UUID_PRE_LEN 32
+#define SPDK_NVMF_UUID_STRING_LEN 36
+#define SPDK_NVMF_NQN_UUID_PRE "nqn.2014-08.org.nvmexpress:uuid:"
+#define SPDK_NVMF_DISCOVERY_NQN "nqn.2014-08.org.nvmexpress.discovery"
+
+#define SPDK_DOMAIN_LABEL_MAX_LEN 63 /* RFC 1034 max domain label length */
+
+#define SPDK_NVMF_TRSTRING_MAX_LEN 32
+#define SPDK_NVMF_TRADDR_MAX_LEN 256
+#define SPDK_NVMF_TRSVCID_MAX_LEN 32
+
+/** RDMA transport-specific address subtype */
+struct spdk_nvmf_rdma_transport_specific_address_subtype {
+ /** RDMA QP service type (\ref spdk_nvmf_rdma_qptype) */
+ uint8_t rdma_qptype;
+
+ /** RDMA provider type (\ref spdk_nvmf_rdma_prtype) */
+ uint8_t rdma_prtype;
+
+ /** RDMA connection management service (\ref spdk_nvmf_rdma_cms) */
+ uint8_t rdma_cms;
+
+ uint8_t reserved0[5];
+
+ /** RDMA partition key for AF_IB */
+ uint16_t rdma_pkey;
+
+ uint8_t reserved2[246];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_rdma_transport_specific_address_subtype) == 256,
+ "Incorrect size");
+
+/** TCP Secure Socket Type */
+enum spdk_nvme_tcp_secure_socket_type {
+ /** No security */
+ SPDK_NVME_TCP_SECURITY_NONE = 0,
+
+ /** TLS (Secure Sockets) */
+ SPDK_NVME_TCP_SECURITY_TLS = 1,
+};
+
+/** TCP transport-specific address subtype */
+struct spdk_nvme_tcp_transport_specific_address_subtype {
+ /** Security type (\ref spdk_nvme_tcp_secure_socket_type) */
+ uint8_t sectype;
+
+ uint8_t reserved0[255];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_transport_specific_address_subtype) == 256,
+ "Incorrect size");
+
+/** Transport-specific address subtype */
+union spdk_nvmf_transport_specific_address_subtype {
+ uint8_t raw[256];
+
+ /** RDMA */
+ struct spdk_nvmf_rdma_transport_specific_address_subtype rdma;
+
+ /** TCP */
+ struct spdk_nvme_tcp_transport_specific_address_subtype tcp;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvmf_transport_specific_address_subtype) == 256,
+ "Incorrect size");
+
+#define SPDK_NVMF_MIN_ADMIN_MAX_SQ_SIZE 32
+
+/**
+ * Discovery Log Page entry
+ */
+struct spdk_nvmf_discovery_log_page_entry {
+ /** Transport type (\ref spdk_nvmf_trtype) */
+ uint8_t trtype;
+
+ /** Address family (\ref spdk_nvmf_adrfam) */
+ uint8_t adrfam;
+
+ /** Subsystem type (\ref spdk_nvmf_subtype) */
+ uint8_t subtype;
+
+ /** Transport requirements */
+ struct {
+ /** Secure channel requirements (\ref spdk_nvmf_treq_secure_channel) */
+ uint8_t secure_channel : 2;
+
+ uint8_t reserved : 6;
+ } treq;
+
+ /** NVM subsystem port ID */
+ uint16_t portid;
+
+ /** Controller ID */
+ uint16_t cntlid;
+
+ /** Admin max SQ size */
+ uint16_t asqsz;
+
+ uint8_t reserved0[22];
+
+ /** Transport service identifier */
+ uint8_t trsvcid[SPDK_NVMF_TRSVCID_MAX_LEN];
+
+ uint8_t reserved1[192];
+
+ /** NVM subsystem qualified name */
+ uint8_t subnqn[256];
+
+ /** Transport address */
+ uint8_t traddr[SPDK_NVMF_TRADDR_MAX_LEN];
+
+ /** Transport-specific address subtype */
+ union spdk_nvmf_transport_specific_address_subtype tsas;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_discovery_log_page_entry) == 1024, "Incorrect size");
+
+struct spdk_nvmf_discovery_log_page {
+ uint64_t genctr;
+ uint64_t numrec;
+ uint16_t recfmt;
+ uint8_t reserved0[1006];
+ struct spdk_nvmf_discovery_log_page_entry entries[0];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_discovery_log_page) == 1024, "Incorrect size");
+
+/* RDMA Fabric specific definitions below */
+
+#define SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY 0xF
+
+struct spdk_nvmf_rdma_request_private_data {
+ uint16_t recfmt; /* record format */
+ uint16_t qid; /* queue id */
+ uint16_t hrqsize; /* host receive queue size */
+ uint16_t hsqsize; /* host send queue size */
+ uint16_t cntlid; /* controller id */
+ uint8_t reserved[22];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_rdma_request_private_data) == 32, "Incorrect size");
+
+struct spdk_nvmf_rdma_accept_private_data {
+ uint16_t recfmt; /* record format */
+ uint16_t crqsize; /* controller receive queue size */
+ uint8_t reserved[28];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_rdma_accept_private_data) == 32, "Incorrect size");
+
+struct spdk_nvmf_rdma_reject_private_data {
+ uint16_t recfmt; /* record format */
+ uint16_t sts; /* status */
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_rdma_reject_private_data) == 4, "Incorrect size");
+
+union spdk_nvmf_rdma_private_data {
+ struct spdk_nvmf_rdma_request_private_data pd_request;
+ struct spdk_nvmf_rdma_accept_private_data pd_accept;
+ struct spdk_nvmf_rdma_reject_private_data pd_reject;
+};
+SPDK_STATIC_ASSERT(sizeof(union spdk_nvmf_rdma_private_data) == 32, "Incorrect size");
+
+enum spdk_nvmf_rdma_transport_error {
+ SPDK_NVMF_RDMA_ERROR_INVALID_PRIVATE_DATA_LENGTH = 0x1,
+ SPDK_NVMF_RDMA_ERROR_INVALID_RECFMT = 0x2,
+ SPDK_NVMF_RDMA_ERROR_INVALID_QID = 0x3,
+ SPDK_NVMF_RDMA_ERROR_INVALID_HSQSIZE = 0x4,
+ SPDK_NVMF_RDMA_ERROR_INVALID_HRQSIZE = 0x5,
+ SPDK_NVMF_RDMA_ERROR_NO_RESOURCES = 0x6,
+ SPDK_NVMF_RDMA_ERROR_INVALID_IRD = 0x7,
+ SPDK_NVMF_RDMA_ERROR_INVALID_ORD = 0x8,
+};
+
+/* TCP transport specific definitions below */
+
+/** NVMe/TCP PDU type */
+enum spdk_nvme_tcp_pdu_type {
+ /** Initialize Connection Request (ICReq) */
+ SPDK_NVME_TCP_PDU_TYPE_IC_REQ = 0x00,
+
+ /** Initialize Connection Response (ICResp) */
+ SPDK_NVME_TCP_PDU_TYPE_IC_RESP = 0x01,
+
+ /** Terminate Connection Request (TermReq) */
+ SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ = 0x02,
+
+ /** Terminate Connection Response (TermResp) */
+ SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ = 0x03,
+
+ /** Command Capsule (CapsuleCmd) */
+ SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD = 0x04,
+
+ /** Response Capsule (CapsuleRsp) */
+ SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP = 0x05,
+
+ /** Host To Controller Data (H2CData) */
+ SPDK_NVME_TCP_PDU_TYPE_H2C_DATA = 0x06,
+
+ /** Controller To Host Data (C2HData) */
+ SPDK_NVME_TCP_PDU_TYPE_C2H_DATA = 0x07,
+
+ /** Ready to Transfer (R2T) */
+ SPDK_NVME_TCP_PDU_TYPE_R2T = 0x09,
+};
+
+/** Common NVMe/TCP PDU header */
+struct spdk_nvme_tcp_common_pdu_hdr {
+ /** PDU type (\ref spdk_nvme_tcp_pdu_type) */
+ uint8_t pdu_type;
+
+ /** pdu_type-specific flags */
+ uint8_t flags;
+
+ /** Length of PDU header (not including the Header Digest) */
+ uint8_t hlen;
+
+ /** PDU Data Offset from the start of the PDU */
+ uint8_t pdo;
+
+ /** Total number of bytes in PDU, including pdu_hdr */
+ uint32_t plen;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_common_pdu_hdr) == 8, "Incorrect size");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type) == 0,
+ "Incorrect offset");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_common_pdu_hdr, flags) == 1, "Incorrect offset");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen) == 2, "Incorrect offset");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo) == 3, "Incorrect offset");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen) == 4, "Incorrect offset");
+
+#define SPDK_NVME_TCP_CH_FLAGS_HDGSTF (1u << 0)
+#define SPDK_NVME_TCP_CH_FLAGS_DDGSTF (1u << 1)
+
+/**
+ * ICReq
+ *
+ * common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ
+ */
+struct spdk_nvme_tcp_ic_req {
+ struct spdk_nvme_tcp_common_pdu_hdr common;
+ uint16_t pfv;
+ /** Specifies the data alignment for all PDUs transferred from the controller to the host that contain data */
+ uint8_t hpda;
+ union {
+ uint8_t raw;
+ struct {
+ uint8_t hdgst_enable : 1;
+ uint8_t ddgst_enable : 1;
+ uint8_t reserved : 6;
+ } bits;
+ } dgst;
+ uint32_t maxr2t;
+ uint8_t reserved16[112];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_ic_req) == 128, "Incorrect size");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_ic_req, pfv) == 8, "Incorrect offset");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_ic_req, hpda) == 10, "Incorrect offset");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_ic_req, maxr2t) == 12, "Incorrect offset");
+
+#define SPDK_NVME_TCP_CPDA_MAX 31
+#define SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET ((SPDK_NVME_TCP_CPDA_MAX + 1) << 2)
+
+/**
+ * ICResp
+ *
+ * common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP
+ */
+struct spdk_nvme_tcp_ic_resp {
+ struct spdk_nvme_tcp_common_pdu_hdr common;
+ uint16_t pfv;
+ /** Specifies the data alignment for all PDUs transferred from the host to the controller that contain data */
+ uint8_t cpda;
+ union {
+ uint8_t raw;
+ struct {
+ uint8_t hdgst_enable : 1;
+ uint8_t ddgst_enable : 1;
+ uint8_t reserved : 6;
+ } bits;
+ } dgst;
+ /** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */
+ uint32_t maxh2cdata;
+ uint8_t reserved16[112];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_ic_resp) == 128, "Incorrect size");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_ic_resp, pfv) == 8, "Incorrect offset");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_ic_resp, cpda) == 10, "Incorrect offset");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_ic_resp, maxh2cdata) == 12, "Incorrect offset");
+
+/**
+ * TermReq
+ *
+ * common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_TERM_REQ
+ */
+struct spdk_nvme_tcp_term_req_hdr {
+ struct spdk_nvme_tcp_common_pdu_hdr common;
+ uint16_t fes;
+ uint8_t fei[4];
+ uint8_t reserved14[10];
+};
+
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_term_req_hdr) == 24, "Incorrect size");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_term_req_hdr, fes) == 8, "Incorrect offset");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_term_req_hdr, fei) == 10, "Incorrect offset");
+
+enum spdk_nvme_tcp_term_req_fes {
+ SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD = 0x01,
+ SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR = 0x02,
+ SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR = 0x03,
+ SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE = 0x04,
+ SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED = 0x05,
+ SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED = 0x05,
+ SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER = 0x06,
+};
+
+/* Total length of term req PDU (including PDU header and DATA) in bytes shall not exceed a limit of 152 bytes. */
+#define SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE 128
+#define SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE (SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE + sizeof(struct spdk_nvme_tcp_term_req_hdr))
+
+/**
+ * CapsuleCmd
+ *
+ * common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD
+ */
+struct spdk_nvme_tcp_cmd {
+ struct spdk_nvme_tcp_common_pdu_hdr common;
+ struct spdk_nvme_cmd ccsqe;
+ /**< icdoff hdgest padding + in-capsule data + ddgst (if enabled) */
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_cmd) == 72, "Incorrect size");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_cmd, ccsqe) == 8, "Incorrect offset");
+
+/**
+ * CapsuleResp
+ *
+ * common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP
+ */
+struct spdk_nvme_tcp_rsp {
+ struct spdk_nvme_tcp_common_pdu_hdr common;
+ struct spdk_nvme_cpl rccqe;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_rsp) == 24, "incorrect size");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_rsp, rccqe) == 8, "Incorrect offset");
+
+
+/**
+ * H2CData
+ *
+ * hdr.pdu_type == SPDK_NVME_TCP_PDU_TYPE_H2C_DATA
+ */
+struct spdk_nvme_tcp_h2c_data_hdr {
+ struct spdk_nvme_tcp_common_pdu_hdr common;
+ uint16_t cccid;
+ uint16_t ttag;
+ uint32_t datao;
+ uint32_t datal;
+ uint8_t reserved20[4];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_h2c_data_hdr) == 24, "Incorrect size");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid) == 8, "Incorrect offset");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag) == 10, "Incorrect offset");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_h2c_data_hdr, datao) == 12, "Incorrect offset");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_h2c_data_hdr, datal) == 16, "Incorrect offset");
+
+#define SPDK_NVME_TCP_H2C_DATA_FLAGS_LAST_PDU (1u << 2)
+#define SPDK_NVME_TCP_H2C_DATA_FLAGS_SUCCESS (1u << 3)
+#define SPDK_NVME_TCP_H2C_DATA_PDO_MULT 8u
+
+/**
+ * C2HData
+ *
+ * hdr.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA
+ */
+struct spdk_nvme_tcp_c2h_data_hdr {
+ struct spdk_nvme_tcp_common_pdu_hdr common;
+ uint16_t cccid;
+ uint8_t reserved10[2];
+ uint32_t datao;
+ uint32_t datal;
+ uint8_t reserved20[4];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_c2h_data_hdr) == 24, "Incorrect size");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_c2h_data_hdr, cccid) == 8, "Incorrect offset");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datao) == 12, "Incorrect offset");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datal) == 16, "Incorrect offset");
+
+#define SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS (1u << 3)
+#define SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU (1u << 2)
+#define SPDK_NVME_TCP_C2H_DATA_PDO_MULT 8u
+
+/**
+ * R2T
+ *
+ * common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_R2T
+ */
+struct spdk_nvme_tcp_r2t_hdr {
+ struct spdk_nvme_tcp_common_pdu_hdr common;
+ uint16_t cccid;
+ uint16_t ttag;
+ uint32_t r2to;
+ uint32_t r2tl;
+ uint8_t reserved20[4];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_r2t_hdr) == 24, "Incorrect size");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_r2t_hdr, cccid) == 8, "Incorrect offset");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_r2t_hdr, ttag) == 10, "Incorrect offset");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_r2t_hdr, r2to) == 12, "Incorrect offset");
+SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_r2t_hdr, r2tl) == 16, "Incorrect offset");
+
+#pragma pack(pop)
+
+#endif /* __NVMF_SPEC_H__ */
diff --git a/src/spdk/include/spdk/nvmf_transport.h b/src/spdk/include/spdk/nvmf_transport.h
new file mode 100644
index 000000000..ceb331856
--- /dev/null
+++ b/src/spdk/include/spdk/nvmf_transport.h
@@ -0,0 +1,495 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * NVMe-oF Target transport plugin API
+ */
+
+#ifndef SPDK_NVMF_TRANSPORT_H_
+#define SPDK_NVMF_TRANSPORT_H_
+
+#include "spdk/bdev.h"
+#include "spdk/nvme_spec.h"
+#include "spdk/nvmf.h"
+#include "spdk/nvmf_cmd.h"
+#include "spdk/nvmf_spec.h"
+#include "spdk/memory.h"
+
+#define SPDK_NVMF_MAX_SGL_ENTRIES 16
+
+/* The maximum number of buffers per request */
+#define NVMF_REQ_MAX_BUFFERS (SPDK_NVMF_MAX_SGL_ENTRIES * 2)
+
+/* AIO backend requires block size aligned data buffers,
+ * extra 4KiB aligned data buffer should work for most devices.
+ */
+#define NVMF_DATA_BUFFER_ALIGNMENT VALUE_4KB
+#define NVMF_DATA_BUFFER_MASK (NVMF_DATA_BUFFER_ALIGNMENT - 1LL)
+
+union nvmf_h2c_msg {
+ struct spdk_nvmf_capsule_cmd nvmf_cmd;
+ struct spdk_nvme_cmd nvme_cmd;
+ struct spdk_nvmf_fabric_prop_set_cmd prop_set_cmd;
+ struct spdk_nvmf_fabric_prop_get_cmd prop_get_cmd;
+ struct spdk_nvmf_fabric_connect_cmd connect_cmd;
+};
+SPDK_STATIC_ASSERT(sizeof(union nvmf_h2c_msg) == 64, "Incorrect size");
+
+union nvmf_c2h_msg {
+ struct spdk_nvme_cpl nvme_cpl;
+ struct spdk_nvmf_fabric_prop_get_rsp prop_get_rsp;
+ struct spdk_nvmf_fabric_connect_rsp connect_rsp;
+};
+SPDK_STATIC_ASSERT(sizeof(union nvmf_c2h_msg) == 16, "Incorrect size");
+
+struct spdk_nvmf_dif_info {
+ struct spdk_dif_ctx dif_ctx;
+ bool dif_insert_or_strip;
+ uint32_t elba_length;
+ uint32_t orig_length;
+};
+
+struct spdk_nvmf_request {
+ struct spdk_nvmf_qpair *qpair;
+ uint32_t length;
+ enum spdk_nvme_data_transfer xfer;
+ void *data;
+ union nvmf_h2c_msg *cmd;
+ union nvmf_c2h_msg *rsp;
+ void *buffers[NVMF_REQ_MAX_BUFFERS];
+ struct iovec iov[NVMF_REQ_MAX_BUFFERS];
+ uint32_t iovcnt;
+ bool data_from_pool;
+ struct spdk_bdev_io_wait_entry bdev_io_wait;
+ struct spdk_nvmf_dif_info dif;
+ spdk_nvmf_nvme_passthru_cmd_cb cmd_cb_fn;
+ struct spdk_nvmf_request *first_fused_req;
+ struct spdk_nvmf_request *req_to_abort;
+ struct spdk_poller *poller;
+ uint64_t timeout_tsc;
+
+ STAILQ_ENTRY(spdk_nvmf_request) buf_link;
+ TAILQ_ENTRY(spdk_nvmf_request) link;
+};
+
+enum spdk_nvmf_qpair_state {
+ SPDK_NVMF_QPAIR_UNINITIALIZED = 0,
+ SPDK_NVMF_QPAIR_ACTIVE,
+ SPDK_NVMF_QPAIR_DEACTIVATING,
+ SPDK_NVMF_QPAIR_ERROR,
+};
+
+typedef void (*spdk_nvmf_state_change_done)(void *cb_arg, int status);
+
+struct spdk_nvmf_qpair {
+ enum spdk_nvmf_qpair_state state;
+ spdk_nvmf_state_change_done state_cb;
+ void *state_cb_arg;
+
+ struct spdk_nvmf_transport *transport;
+ struct spdk_nvmf_ctrlr *ctrlr;
+ struct spdk_nvmf_poll_group *group;
+
+ uint16_t qid;
+ uint16_t sq_head;
+ uint16_t sq_head_max;
+
+ struct spdk_nvmf_request *first_fused_req;
+
+ TAILQ_HEAD(, spdk_nvmf_request) outstanding;
+ TAILQ_ENTRY(spdk_nvmf_qpair) link;
+};
+
+struct spdk_nvmf_transport_pg_cache_buf {
+ STAILQ_ENTRY(spdk_nvmf_transport_pg_cache_buf) link;
+};
+
+struct spdk_nvmf_transport_poll_group {
+ struct spdk_nvmf_transport *transport;
+ /* Requests that are waiting to obtain a data buffer */
+ STAILQ_HEAD(, spdk_nvmf_request) pending_buf_queue;
+ STAILQ_HEAD(, spdk_nvmf_transport_pg_cache_buf) buf_cache;
+ uint32_t buf_cache_count;
+ uint32_t buf_cache_size;
+ struct spdk_nvmf_poll_group *group;
+ TAILQ_ENTRY(spdk_nvmf_transport_poll_group) link;
+};
+
+struct spdk_nvmf_poll_group {
+ struct spdk_thread *thread;
+ struct spdk_poller *poller;
+
+ TAILQ_HEAD(, spdk_nvmf_transport_poll_group) tgroups;
+
+ /* Array of poll groups indexed by subsystem id (sid) */
+ struct spdk_nvmf_subsystem_poll_group *sgroups;
+ uint32_t num_sgroups;
+
+ /* All of the queue pairs that belong to this poll group */
+ TAILQ_HEAD(, spdk_nvmf_qpair) qpairs;
+
+ /* Statistics */
+ struct spdk_nvmf_poll_group_stat stat;
+
+ spdk_nvmf_poll_group_destroy_done_fn destroy_cb_fn;
+ void *destroy_cb_arg;
+
+ TAILQ_ENTRY(spdk_nvmf_poll_group) link;
+};
+
+struct spdk_nvmf_listener {
+ struct spdk_nvme_transport_id trid;
+ uint32_t ref;
+
+ TAILQ_ENTRY(spdk_nvmf_listener) link;
+};
+
+/**
+ * A subset of struct spdk_nvme_ctrlr_data that are emulated by a fabrics device.
+ */
+struct spdk_nvmf_ctrlr_data {
+ uint16_t kas;
+ struct spdk_nvme_cdata_sgls sgls;
+ struct spdk_nvme_cdata_nvmf_specific nvmf_specific;
+};
+
+struct spdk_nvmf_transport {
+ struct spdk_nvmf_tgt *tgt;
+ const struct spdk_nvmf_transport_ops *ops;
+ struct spdk_nvmf_transport_opts opts;
+
+ /* A mempool for transport related data transfers */
+ struct spdk_mempool *data_buf_pool;
+
+ TAILQ_HEAD(, spdk_nvmf_listener) listeners;
+ TAILQ_ENTRY(spdk_nvmf_transport) link;
+};
+
+struct spdk_nvmf_transport_ops {
+ /**
+ * Transport name
+ */
+ char name[SPDK_NVMF_TRSTRING_MAX_LEN];
+
+ /**
+ * Transport type
+ */
+ enum spdk_nvme_transport_type type;
+
+ /**
+ * Initialize transport options to default value
+ */
+ void (*opts_init)(struct spdk_nvmf_transport_opts *opts);
+
+ /**
+ * Create a transport for the given transport opts
+ */
+ struct spdk_nvmf_transport *(*create)(struct spdk_nvmf_transport_opts *opts);
+
+ /**
+ * Destroy the transport
+ */
+ int (*destroy)(struct spdk_nvmf_transport *transport);
+
+ /**
+ * Instruct the transport to accept new connections at the address
+ * provided. This may be called multiple times.
+ */
+ int (*listen)(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid);
+
+ /**
+ * Stop accepting new connections at the given address.
+ */
+ void (*stop_listen)(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid);
+
+ /**
+ * A listener has been associated with a subsystem with the given NQN.
+ * This is only a notification. Most transports will not need to take any
+ * action here, as the enforcement of the association is done in the generic
+ * code.
+ *
+ * The association is not considered complete until cb_fn is called. New
+ * connections on the listener targeting this subsystem will be rejected
+ * until that time.
+ *
+ * Pass a negated errno code to `cb_fn` to block the association. 0 to allow.
+ */
+ void (*listen_associate)(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvmf_subsystem *subsystem,
+ const struct spdk_nvme_transport_id *trid,
+ spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn,
+ void *cb_arg);
+
+ /**
+ * Check for new connections on the transport.
+ */
+ uint32_t (*accept)(struct spdk_nvmf_transport *transport);
+
+ /**
+ * Initialize subset of identify controller data.
+ */
+ void (*cdata_init)(struct spdk_nvmf_transport *transport, struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_ctrlr_data *cdata);
+
+ /**
+ * Fill out a discovery log entry for a specific listen address.
+ */
+ void (*listener_discover)(struct spdk_nvmf_transport *transport,
+ struct spdk_nvme_transport_id *trid,
+ struct spdk_nvmf_discovery_log_page_entry *entry);
+
+ /**
+ * Create a new poll group
+ */
+ struct spdk_nvmf_transport_poll_group *(*poll_group_create)(struct spdk_nvmf_transport *transport);
+
+ /**
+ * Get the polling group of the queue pair optimal for the specific transport
+ */
+ struct spdk_nvmf_transport_poll_group *(*get_optimal_poll_group)(struct spdk_nvmf_qpair *qpair);
+
+ /**
+ * Destroy a poll group
+ */
+ void (*poll_group_destroy)(struct spdk_nvmf_transport_poll_group *group);
+
+ /**
+ * Add a qpair to a poll group
+ */
+ int (*poll_group_add)(struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_qpair *qpair);
+
+ /**
+ * Remove a qpair from a poll group
+ */
+ int (*poll_group_remove)(struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_qpair *qpair);
+
+ /**
+ * Poll the group to process I/O
+ */
+ int (*poll_group_poll)(struct spdk_nvmf_transport_poll_group *group);
+
+ /*
+ * Free the request without sending a response
+ * to the originator. Release memory tied to this request.
+ */
+ int (*req_free)(struct spdk_nvmf_request *req);
+
+ /*
+ * Signal request completion, which sends a response
+ * to the originator.
+ */
+ int (*req_complete)(struct spdk_nvmf_request *req);
+
+ /*
+ * Deinitialize a connection.
+ */
+ void (*qpair_fini)(struct spdk_nvmf_qpair *qpair);
+
+ /*
+ * Get the peer transport ID for the queue pair.
+ */
+ int (*qpair_get_peer_trid)(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid);
+
+ /*
+ * Get the local transport ID for the queue pair.
+ */
+ int (*qpair_get_local_trid)(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid);
+
+ /*
+ * Get the listener transport ID that accepted this qpair originally.
+ */
+ int (*qpair_get_listen_trid)(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid);
+
+ /*
+ * Abort the request which the abort request specifies.
+ * This function can complete synchronously or asynchronously, but
+ * is expected to call spdk_nvmf_request_complete() in the end
+ * for both cases.
+ */
+ void (*qpair_abort_request)(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvmf_request *req);
+
+ /*
+ * Get transport poll group statistics
+ */
+ int (*poll_group_get_stat)(struct spdk_nvmf_tgt *tgt,
+ struct spdk_nvmf_transport_poll_group_stat **stat);
+
+ /*
+ * Free transport poll group statistics previously allocated with poll_group_get_stat()
+ */
+ void (*poll_group_free_stat)(struct spdk_nvmf_transport_poll_group_stat *stat);
+};
+
+/**
+ * Register the operations for a given transport type.
+ *
+ * This function should be invoked by referencing the macro
+ * SPDK_NVMF_TRANSPORT_REGISTER macro in the transport's .c file.
+ *
+ * \param ops The operations associated with an NVMe-oF transport.
+ */
+void spdk_nvmf_transport_register(const struct spdk_nvmf_transport_ops *ops);
+
+int spdk_nvmf_ctrlr_connect(struct spdk_nvmf_request *req);
+
+/**
+ * Function to be called for each newly discovered qpair.
+ *
+ * \param tgt The nvmf target
+ * \param qpair The newly discovered qpair.
+ */
+void spdk_nvmf_tgt_new_qpair(struct spdk_nvmf_tgt *tgt, struct spdk_nvmf_qpair *qpair);
+
+/**
+ * A subset of struct spdk_nvme_registers that are emulated by a fabrics device.
+ */
+struct spdk_nvmf_registers {
+ union spdk_nvme_cap_register cap;
+ union spdk_nvme_vs_register vs;
+ union spdk_nvme_cc_register cc;
+ union spdk_nvme_csts_register csts;
+ union spdk_nvme_aqa_register aqa;
+ uint64_t asq;
+ uint64_t acq;
+};
+
+const struct spdk_nvmf_registers *spdk_nvmf_ctrlr_get_regs(struct spdk_nvmf_ctrlr *ctrlr);
+
+void spdk_nvmf_request_free_buffers(struct spdk_nvmf_request *req,
+ struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_transport *transport);
+int spdk_nvmf_request_get_buffers(struct spdk_nvmf_request *req,
+ struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_transport *transport,
+ uint32_t length);
+int spdk_nvmf_request_get_buffers_multi(struct spdk_nvmf_request *req,
+ struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_transport *transport,
+ uint32_t *lengths, uint32_t num_lengths);
+
+bool spdk_nvmf_request_get_dif_ctx(struct spdk_nvmf_request *req, struct spdk_dif_ctx *dif_ctx);
+
+void spdk_nvmf_request_exec(struct spdk_nvmf_request *req);
+void spdk_nvmf_request_exec_fabrics(struct spdk_nvmf_request *req);
+int spdk_nvmf_request_free(struct spdk_nvmf_request *req);
+int spdk_nvmf_request_complete(struct spdk_nvmf_request *req);
+
+/**
+ * Remove the given qpair from the poll group.
+ *
+ * \param qpair The qpair to remove.
+ */
+void spdk_nvmf_poll_group_remove(struct spdk_nvmf_qpair *qpair);
+
+/**
+ * Get the NVMe-oF subsystem associated with this controller.
+ *
+ * \param ctrlr The NVMe-oF controller
+ *
+ * \return The NVMe-oF subsystem
+ */
+struct spdk_nvmf_subsystem *
+spdk_nvmf_ctrlr_get_subsystem(struct spdk_nvmf_ctrlr *ctrlr);
+
+/**
+ * Get the NVMe-oF controller ID.
+ *
+ * \param ctrlr The NVMe-oF controller
+ *
+ * \return The NVMe-oF controller ID
+ */
+uint16_t
+spdk_nvmf_ctrlr_get_id(struct spdk_nvmf_ctrlr *ctrlr);
+
+static inline enum spdk_nvme_data_transfer
+spdk_nvmf_req_get_xfer(struct spdk_nvmf_request *req) {
+ enum spdk_nvme_data_transfer xfer;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
+
+ /* Figure out data transfer direction */
+ if (cmd->opc == SPDK_NVME_OPC_FABRIC)
+ {
+ xfer = spdk_nvme_opc_get_data_transfer(req->cmd->nvmf_cmd.fctype);
+ } else
+ {
+ xfer = spdk_nvme_opc_get_data_transfer(cmd->opc);
+ }
+
+ if (xfer == SPDK_NVME_DATA_NONE)
+ {
+ return xfer;
+ }
+
+ /* Even for commands that may transfer data, they could have specified 0 length.
+ * We want those to show up with xfer SPDK_NVME_DATA_NONE.
+ */
+ switch (sgl->generic.type)
+ {
+ case SPDK_NVME_SGL_TYPE_DATA_BLOCK:
+ case SPDK_NVME_SGL_TYPE_BIT_BUCKET:
+ case SPDK_NVME_SGL_TYPE_SEGMENT:
+ case SPDK_NVME_SGL_TYPE_LAST_SEGMENT:
+ case SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK:
+ if (sgl->unkeyed.length == 0) {
+ xfer = SPDK_NVME_DATA_NONE;
+ }
+ break;
+ case SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK:
+ if (sgl->keyed.length == 0) {
+ xfer = SPDK_NVME_DATA_NONE;
+ }
+ break;
+ }
+
+ return xfer;
+}
+
+/*
+ * Macro used to register new transports.
+ */
+#define SPDK_NVMF_TRANSPORT_REGISTER(name, transport_ops) \
+static void __attribute__((constructor)) _spdk_nvmf_transport_register_##name(void) \
+{ \
+ spdk_nvmf_transport_register(transport_ops); \
+}\
+
+#endif
diff --git a/src/spdk/include/spdk/opal.h b/src/spdk/include/spdk/opal.h
new file mode 100644
index 000000000..270fcdd3c
--- /dev/null
+++ b/src/spdk/include/spdk/opal.h
@@ -0,0 +1,145 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_OPAL_H
+#define SPDK_OPAL_H
+
+#include "spdk/stdinc.h"
+#include "spdk/nvme.h"
+#include "spdk/log.h"
+#include "spdk/endian.h"
+#include "spdk/string.h"
+#include "spdk/opal_spec.h"
+
+struct spdk_opal_d0_features_info {
+ struct spdk_opal_d0_tper_feat tper;
+ struct spdk_opal_d0_locking_feat locking;
+ struct spdk_opal_d0_single_user_mode_feat single_user;
+ struct spdk_opal_d0_geo_feat geo;
+ struct spdk_opal_d0_datastore_feat datastore;
+ struct spdk_opal_d0_v100_feat v100;
+ struct spdk_opal_d0_v200_feat v200;
+};
+
+enum spdk_opal_lock_state {
+ OPAL_READONLY = 0x01,
+ OPAL_RWLOCK = 0x02,
+ OPAL_READWRITE = 0x04,
+};
+
+enum spdk_opal_user {
+ OPAL_ADMIN1 = 0x0,
+ OPAL_USER1 = 0x01,
+ OPAL_USER2 = 0x02,
+ OPAL_USER3 = 0x03,
+ OPAL_USER4 = 0x04,
+ OPAL_USER5 = 0x05,
+ OPAL_USER6 = 0x06,
+ OPAL_USER7 = 0x07,
+ OPAL_USER8 = 0x08,
+ OPAL_USER9 = 0x09,
+};
+
+enum spdk_opal_locking_range {
+ OPAL_LOCKING_RANGE_GLOBAL = 0x0,
+ OPAL_LOCKING_RANGE_1,
+ OPAL_LOCKING_RANGE_2,
+ OPAL_LOCKING_RANGE_3,
+ OPAL_LOCKING_RANGE_4,
+ OPAL_LOCKING_RANGE_5,
+ OPAL_LOCKING_RANGE_6,
+ OPAL_LOCKING_RANGE_7,
+ OPAL_LOCKING_RANGE_8,
+ OPAL_LOCKING_RANGE_9,
+ OPAL_LOCKING_RANGE_10,
+};
+
+struct spdk_opal_locking_range_info {
+ uint8_t locking_range_id;
+ uint8_t _padding[7];
+ uint64_t range_start;
+ uint64_t range_length;
+ bool read_lock_enabled;
+ bool write_lock_enabled;
+ bool read_locked;
+ bool write_locked;
+};
+
+struct spdk_opal_dev;
+
+struct spdk_opal_dev *spdk_opal_dev_construct(struct spdk_nvme_ctrlr *ctrlr);
+void spdk_opal_dev_destruct(struct spdk_opal_dev *dev);
+
+struct spdk_opal_d0_features_info *spdk_opal_get_d0_features_info(struct spdk_opal_dev *dev);
+
+__attribute__((__deprecated__)) bool spdk_opal_supported(struct spdk_opal_dev *dev);
+
+int spdk_opal_cmd_take_ownership(struct spdk_opal_dev *dev, char *new_passwd);
+
+/**
+ * synchronous function: send and then receive.
+ *
+ * Wait until response is received.
+ */
+int spdk_opal_cmd_revert_tper(struct spdk_opal_dev *dev, const char *passwd);
+
+int spdk_opal_cmd_activate_locking_sp(struct spdk_opal_dev *dev, const char *passwd);
+int spdk_opal_cmd_lock_unlock(struct spdk_opal_dev *dev, enum spdk_opal_user user,
+ enum spdk_opal_lock_state flag, enum spdk_opal_locking_range locking_range,
+ const char *passwd);
+int spdk_opal_cmd_setup_locking_range(struct spdk_opal_dev *dev, enum spdk_opal_user user,
+ enum spdk_opal_locking_range locking_range_id, uint64_t range_start,
+ uint64_t range_length, const char *passwd);
+
+int spdk_opal_cmd_get_max_ranges(struct spdk_opal_dev *dev, const char *passwd);
+int spdk_opal_cmd_get_locking_range_info(struct spdk_opal_dev *dev, const char *passwd,
+ enum spdk_opal_user user_id,
+ enum spdk_opal_locking_range locking_range_id);
+int spdk_opal_cmd_enable_user(struct spdk_opal_dev *dev, enum spdk_opal_user user_id,
+ const char *passwd);
+int spdk_opal_cmd_add_user_to_locking_range(struct spdk_opal_dev *dev, enum spdk_opal_user user_id,
+ enum spdk_opal_locking_range locking_range_id,
+ enum spdk_opal_lock_state lock_flag, const char *passwd);
+int spdk_opal_cmd_set_new_passwd(struct spdk_opal_dev *dev, enum spdk_opal_user user_id,
+ const char *new_passwd, const char *old_passwd, bool new_user);
+
+int spdk_opal_cmd_erase_locking_range(struct spdk_opal_dev *dev, enum spdk_opal_user user_id,
+ enum spdk_opal_locking_range locking_range_id, const char *password);
+
+int spdk_opal_cmd_secure_erase_locking_range(struct spdk_opal_dev *dev, enum spdk_opal_user user_id,
+ enum spdk_opal_locking_range locking_range_id, const char *password);
+
+struct spdk_opal_locking_range_info *spdk_opal_get_locking_range_info(struct spdk_opal_dev *dev,
+ enum spdk_opal_locking_range id);
+void spdk_opal_free_locking_range_info(struct spdk_opal_dev *dev, enum spdk_opal_locking_range id);
+#endif
diff --git a/src/spdk/include/spdk/opal_spec.h b/src/spdk/include/spdk/opal_spec.h
new file mode 100644
index 000000000..dae615cb0
--- /dev/null
+++ b/src/spdk/include/spdk/opal_spec.h
@@ -0,0 +1,379 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_OPAL_SPEC_H
+#define SPDK_OPAL_SPEC_H
+
+#include "spdk/stdinc.h"
+#include "spdk/assert.h"
+
+/*
+ * TCG Storage Architecture Core Spec v2.01 r1.00
+ * 3.2.2.3 Tokens
+ */
+#define SPDK_TINY_ATOM_TYPE_MAX 0x7F
+#define SPDK_SHORT_ATOM_TYPE_MAX 0xBF
+#define SPDK_MEDIUM_ATOM_TYPE_MAX 0xDF
+#define SPDK_LONG_ATOM_TYPE_MAX 0xE3
+
+#define SPDK_TINY_ATOM_SIGN_FLAG 0x40
+
+#define SPDK_TINY_ATOM_DATA_MASK 0x3F
+
+#define SPDK_SHORT_ATOM_ID 0x80
+#define SPDK_SHORT_ATOM_BYTESTRING_FLAG 0x20
+#define SPDK_SHORT_ATOM_SIGN_FLAG 0x10
+#define SPDK_SHORT_ATOM_LEN_MASK 0x0F
+
+#define SPDK_MEDIUM_ATOM_ID 0xC0
+#define SPDK_MEDIUM_ATOM_BYTESTRING_FLAG 0x10
+
+#define SPDK_MEDIUM_ATOM_SIGN_FLAG 0x08
+#define SPDK_MEDIUM_ATOM_LEN_MASK 0x07
+
+#define SPDK_LONG_ATOM_ID 0xE0
+#define SPDK_LONG_ATOM_BYTESTRING_FLAG 0x02
+#define SPDK_LONG_ATOM_SIGN_FLAG 0x01
+
+/*
+ * TCG Storage Architecture Core Spec v2.01 r1.00
+ * Table-26 ComID management
+ */
+#define LV0_DISCOVERY_COMID 0x01
+
+/*
+ * TCG Storage Opal v2.01 r1.00
+ * 5.2.3 Type Table Modification
+ */
+#define OPAL_MANUFACTURED_INACTIVE 0x08
+
+#define LOCKING_RANGE_NON_GLOBAL 0x03
+
+#define SPDK_OPAL_MAX_PASSWORD_SIZE 32 /* in bytes */
+
+#define SPDK_OPAL_MAX_LOCKING_RANGE 8 /* maximum 8 ranges defined by spec */
+
+/*
+ * Feature Code
+ */
+enum spdk_lv0_discovery_feature_code {
+ /*
+ * TCG Storage Architecture Core Spec v2.01 r1.00
+ * 3.3.6 Level 0 Discovery
+ */
+ FEATURECODE_TPER = 0x0001,
+ FEATURECODE_LOCKING = 0x0002,
+
+ /*
+ * Opal SSC 1.00 r3.00 Final
+ * 3.1.1.4 Opal SSC Feature
+ */
+ FEATURECODE_OPALV100 = 0x0200,
+
+ /*
+ * TCG Storage Opal v2.01 r1.00
+ * 3.1.1.4 Geometry Reporting Feature
+ * 3.1.1.5 Opal SSC V2.00 Feature
+ */
+ FEATURECODE_OPALV200 = 0x0203,
+ FEATURECODE_GEOMETRY = 0x0003,
+
+ /*
+ * TCG Storage Opal Feature Set Single User Mode v1.00 r2.00
+ * 4.2.1 Single User Mode Feature Descriptor
+ */
+ FEATURECODE_SINGLEUSER = 0x0201,
+
+ /*
+ * TCG Storage Opal Feature Set Additional DataStore Tables v1.00 r1.00
+ * 4.1.1 DataStore Table Feature Descriptor
+ */
+ FEATURECODE_DATASTORE = 0x0202,
+};
+
+/*
+ * TCG Storage Architecture Core Spec v2.01 r1.00
+ * 5.1.4 Abstract Type
+ */
+enum spdk_opal_token {
+ /* boolean */
+ SPDK_OPAL_TRUE = 0x01,
+ SPDK_OPAL_FALSE = 0x00,
+
+ /* cell_block
+ * 5.1.4.2.3 */
+ SPDK_OPAL_TABLE = 0x00,
+ SPDK_OPAL_STARTROW = 0x01,
+ SPDK_OPAL_ENDROW = 0x02,
+ SPDK_OPAL_STARTCOLUMN = 0x03,
+ SPDK_OPAL_ENDCOLUMN = 0x04,
+ SPDK_OPAL_VALUES = 0x01,
+
+ /* C_PIN table
+ * 5.3.2.12 */
+ SPDK_OPAL_PIN = 0x03,
+
+ /* locking table
+ * 5.7.2.2 */
+ SPDK_OPAL_RANGESTART = 0x03,
+ SPDK_OPAL_RANGELENGTH = 0x04,
+ SPDK_OPAL_READLOCKENABLED = 0x05,
+ SPDK_OPAL_WRITELOCKENABLED = 0x06,
+ SPDK_OPAL_READLOCKED = 0x07,
+ SPDK_OPAL_WRITELOCKED = 0x08,
+ SPDK_OPAL_ACTIVEKEY = 0x0A,
+
+ /* locking info table */
+ SPDK_OPAL_MAXRANGES = 0x04,
+
+ /* mbr control */
+ SPDK_OPAL_MBRENABLE = 0x01,
+ SPDK_OPAL_MBRDONE = 0x02,
+
+ /* properties */
+ SPDK_OPAL_HOSTPROPERTIES = 0x00,
+
+ /* control tokens */
+ SPDK_OPAL_STARTLIST = 0xF0,
+ SPDK_OPAL_ENDLIST = 0xF1,
+ SPDK_OPAL_STARTNAME = 0xF2,
+ SPDK_OPAL_ENDNAME = 0xF3,
+ SPDK_OPAL_CALL = 0xF8,
+ SPDK_OPAL_ENDOFDATA = 0xF9,
+ SPDK_OPAL_ENDOFSESSION = 0xFA,
+ SPDK_OPAL_STARTTRANSACTON = 0xFB,
+ SPDK_OPAL_ENDTRANSACTON = 0xFC,
+ SPDK_OPAL_EMPTYATOM = 0xFF,
+ SPDK_OPAL_WHERE = 0x00,
+
+ /* life cycle */
+ SPDK_OPAL_LIFECYCLE = 0x06,
+
+ /* Autority table */
+ SPDK_OPAL_AUTH_ENABLE = 0x05,
+
+ /* ACE table */
+ SPDK_OPAL_BOOLEAN_EXPR = 0x03,
+};
+
+/*
+ * TCG Storage Architecture Core Spec v2.01 r1.00
+ * Table-39 Level0 Discovery Header Format
+ */
+struct spdk_opal_d0_hdr {
+ uint32_t length;
+ uint32_t revision;
+ uint32_t reserved_0;
+ uint32_t reserved_1;
+ uint8_t vendor_specfic[32];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_d0_hdr) == 48, "Incorrect size");
+
+/*
+ * Level 0 Discovery Feature Header
+ */
+struct spdk_opal_d0_feat_hdr {
+ uint16_t code;
+ uint8_t reserved : 4;
+ uint8_t version : 4;
+ uint8_t length;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_d0_feat_hdr) == 4, "Incorrect size");
+
+
+/*
+ * TCG Storage Architecture Core Spec v2.01 r1.00
+ * Table-42 TPer Feature Descriptor
+ */
+struct __attribute__((packed)) spdk_opal_d0_tper_feat {
+ struct spdk_opal_d0_feat_hdr hdr;
+ uint8_t sync : 1;
+ uint8_t async : 1;
+ uint8_t acknack : 1;
+ uint8_t buffer_management : 1;
+ uint8_t streaming : 1;
+ uint8_t reserved_1 : 1;
+ uint8_t comid_management : 1;
+ uint8_t reserved_2 : 1;
+
+ uint8_t reserved_3[3];
+ uint32_t reserved_4;
+ uint32_t reserved_5;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_d0_tper_feat) == 16, "Incorrect size");
+
+/*
+ * TCG Storage Architecture Core Spec v2.01 r1.00
+ * Table-43 Locking Feature Descriptor
+ */
+struct __attribute__((packed)) spdk_opal_d0_locking_feat {
+ struct spdk_opal_d0_feat_hdr hdr;
+ uint8_t locking_supported : 1;
+ uint8_t locking_enabled : 1;
+ uint8_t locked : 1;
+ uint8_t media_encryption : 1;
+ uint8_t mbr_enabled : 1;
+ uint8_t mbr_done : 1;
+ uint8_t reserved_1 : 1;
+ uint8_t reserved_2 : 1;
+
+ uint8_t reserved_3[3];
+ uint32_t reserved_4;
+ uint32_t reserved_5;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_d0_locking_feat) == 16, "Incorrect size");
+
+/*
+ * TCG Storage Opal Feature Set Single User Mode v1.00 r2.00
+ * 4.2.1 Single User Mode Feature Descriptor
+ */
+struct __attribute__((packed)) spdk_opal_d0_single_user_mode_feat {
+ struct spdk_opal_d0_feat_hdr hdr;
+ uint32_t num_locking_objects;
+ uint8_t any : 1;
+ uint8_t all : 1;
+ uint8_t policy : 1;
+ uint8_t reserved_1 : 5;
+
+ uint8_t reserved_2;
+ uint16_t reserved_3;
+ uint32_t reserved_4;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_d0_single_user_mode_feat) == 16, "Incorrect size");
+
+/*
+ * TCG Storage Opal v2.01 r1.00
+ * 3.1.1.4 Geometry Reporting Feature
+ */
+struct __attribute__((packed)) spdk_opal_d0_geo_feat {
+ struct spdk_opal_d0_feat_hdr hdr;
+ uint8_t align : 1;
+ uint8_t reserved_1 : 7;
+ uint8_t reserved_2[7];
+ uint32_t logical_block_size;
+ uint64_t alignment_granularity;
+ uint64_t lowest_aligned_lba;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_d0_geo_feat) == 32, "Incorrect size");
+
+/*
+ * TCG Storage Opal Feature Set Additional DataStore Tables v1.00 r1.00
+ * 4.1.1 DataStore Table Feature Descriptor
+ */
+struct __attribute__((packed)) spdk_opal_d0_datastore_feat {
+ struct spdk_opal_d0_feat_hdr hdr;
+ uint16_t reserved_1;
+ uint16_t max_tables;
+ uint32_t max_table_size;
+ uint32_t alignment;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_d0_datastore_feat) == 16, "Incorrect size");
+
+/*
+ * Opal SSC 1.00 r3.00 Final
+ * 3.1.1.4 Opal SSC Feature
+ */
+struct __attribute__((packed)) spdk_opal_d0_v100_feat {
+ struct spdk_opal_d0_feat_hdr hdr;
+ uint16_t base_comid;
+ uint16_t number_comids;
+ uint8_t range_crossing : 1;
+
+ uint8_t reserved_1 : 7;
+ uint8_t reserved_2;
+ uint16_t reserved_3;
+ uint32_t reserved_4;
+ uint32_t reserved_5;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_d0_v100_feat) == 20, "Incorrect size");
+
+/*
+ * TCG Storage Opal v2.01 r1.00
+ * 3.1.1.4 Geometry Reporting Feature
+ * 3.1.1.5 Opal SSC V2.00 Feature
+ */
+struct __attribute__((packed)) spdk_opal_d0_v200_feat {
+ struct spdk_opal_d0_feat_hdr hdr;
+ uint16_t base_comid;
+ uint16_t num_comids;
+ uint8_t range_crossing : 1;
+ uint8_t reserved_1 : 7;
+ uint16_t num_locking_admin_auth; /* Number of Locking SP Admin Authorities Supported */
+ uint16_t num_locking_user_auth;
+ uint8_t initial_pin;
+ uint8_t reverted_pin;
+
+ uint8_t reserved_2;
+ uint32_t reserved_3;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_d0_v200_feat) == 20, "Incorrect size");
+
+/*
+ * TCG Storage Architecture Core Spec v2.01 r1.00
+ * 3.2.3 ComPackets, Packets & Subpackets
+ */
+
+/* CommPacket header format
+ * (big-endian)
+ */
+struct __attribute__((packed)) spdk_opal_compacket {
+ uint32_t reserved;
+ uint8_t comid[2];
+ uint8_t extended_comid[2];
+ uint32_t outstanding_data;
+ uint32_t min_transfer;
+ uint32_t length;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_compacket) == 20, "Incorrect size");
+
+/* packet header format */
+struct __attribute__((packed)) spdk_opal_packet {
+ uint32_t session_tsn;
+ uint32_t session_hsn;
+ uint32_t seq_number;
+ uint16_t reserved;
+ uint16_t ack_type;
+ uint32_t acknowledgment;
+ uint32_t length;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_packet) == 24, "Incorrect size");
+
+/* data subpacket header */
+struct __attribute__((packed)) spdk_opal_data_subpacket {
+ uint8_t reserved[6];
+ uint16_t kind;
+ uint32_t length;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_data_subpacket) == 12, "Incorrect size");
+
+#endif
diff --git a/src/spdk/include/spdk/pci_ids.h b/src/spdk/include/spdk/pci_ids.h
new file mode 100644
index 000000000..816eb0a84
--- /dev/null
+++ b/src/spdk/include/spdk/pci_ids.h
@@ -0,0 +1,139 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * PCI device ID list
+ */
+
+#ifndef SPDK_PCI_IDS
+#define SPDK_PCI_IDS
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SPDK_PCI_ANY_ID 0xffff
+#define SPDK_PCI_VID_INTEL 0x8086
+#define SPDK_PCI_VID_MEMBLAZE 0x1c5f
+#define SPDK_PCI_VID_SAMSUNG 0x144d
+#define SPDK_PCI_VID_VIRTUALBOX 0x80ee
+#define SPDK_PCI_VID_VIRTIO 0x1af4
+#define SPDK_PCI_VID_CNEXLABS 0x1d1d
+#define SPDK_PCI_VID_VMWARE 0x15ad
+
+#define SPDK_PCI_CLASS_ANY_ID 0xffffff
+/**
+ * PCI class code for NVMe devices.
+ *
+ * Base class code 01h: mass storage
+ * Subclass code 08h: non-volatile memory
+ * Programming interface 02h: NVM Express
+ */
+#define SPDK_PCI_CLASS_NVME 0x010802
+
+#define PCI_DEVICE_ID_INTEL_IDXD 0x0b25
+
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB0 0x3c20
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB1 0x3c21
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB2 0x3c22
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB3 0x3c23
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB4 0x3c24
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB5 0x3c25
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB6 0x3c26
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB7 0x3c27
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB8 0x3c2e
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB9 0x3c2f
+
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB0 0x0e20
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB1 0x0e21
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB2 0x0e22
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB3 0x0e23
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB4 0x0e24
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB5 0x0e25
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB6 0x0e26
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB7 0x0e27
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB8 0x0e2e
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB9 0x0e2f
+
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW0 0x2f20
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW1 0x2f21
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW2 0x2f22
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW3 0x2f23
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW4 0x2f24
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW5 0x2f25
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW6 0x2f26
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW7 0x2f27
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW8 0x2f2e
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW9 0x2f2f
+
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD0 0x0C50
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD1 0x0C51
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD2 0x0C52
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD3 0x0C53
+
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE0 0x6f50
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE1 0x6f51
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE2 0x6f52
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE3 0x6f53
+
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX0 0x6f20
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX1 0x6f21
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX2 0x6f22
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX3 0x6f23
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX4 0x6f24
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX5 0x6f25
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX6 0x6f26
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX7 0x6f27
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX8 0x6f2e
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX9 0x6f2f
+
+#define PCI_DEVICE_ID_INTEL_IOAT_SKX 0x2021
+
+#define PCI_DEVICE_ID_INTEL_IOAT_ICX 0x0b00
+
+#define PCI_DEVICE_ID_VIRTIO_BLK_LEGACY 0x1001
+#define PCI_DEVICE_ID_VIRTIO_SCSI_LEGACY 0x1004
+#define PCI_DEVICE_ID_VIRTIO_BLK_MODERN 0x1042
+#define PCI_DEVICE_ID_VIRTIO_SCSI_MODERN 0x1048
+
+#define PCI_DEVICE_ID_VIRTIO_VHOST_USER 0x1017
+
+#define PCI_DEVICE_ID_INTEL_VMD 0x201d
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_PCI_IDS */
diff --git a/src/spdk/include/spdk/pipe.h b/src/spdk/include/spdk/pipe.h
new file mode 100644
index 000000000..36d7eb630
--- /dev/null
+++ b/src/spdk/include/spdk/pipe.h
@@ -0,0 +1,149 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * A pipe that is intended for buffering data between a source, such as
+ * a socket, and a sink, such as a parser, or vice versa. Any time data
+ * is received in units that differ from the the units it is consumed
+ * in may benefit from using a pipe.
+ *
+ * The pipe is not thread safe. Only a single thread can act as both
+ * the producer (called the writer) and the consumer (called the reader).
+ */
+
+#ifndef SPDK_PIPE_H
+#define SPDK_PIPE_H
+
+#include "spdk/stdinc.h"
+
+struct spdk_pipe;
+
+/**
+ * Construct a pipe around the given memory buffer. The pipe treats the memory
+ * buffer as a circular ring of bytes.
+ *
+ * The available size for writing will be one less byte than provided. A single
+ * byte must be reserved to distinguish queue full from queue empty conditions.
+ *
+ * \param buf The data buffer that backs this pipe.
+ * \param sz The size of the data buffer.
+ *
+ * \return spdk_pipe. The new pipe.
+ */
+struct spdk_pipe *spdk_pipe_create(void *buf, uint32_t sz);
+
+/**
+ * Destroys the pipe. This does not release the buffer, but does
+ * make it safe for the user to release the buffer.
+ *
+ * \param pipe The pipe to operate on.
+ */
+void spdk_pipe_destroy(struct spdk_pipe *pipe);
+
+/**
+ * Acquire memory from the pipe for writing.
+ *
+ * This function will acquire up to sz bytes from the pipe to be used for
+ * writing. It may return fewer total bytes.
+ *
+ * The memory is only marked as consumed upon a call to spdk_pipe_writer_advance().
+ * Multiple calls to this function without calling advance return the same region
+ * of memory.
+ *
+ * \param pipe The pipe to operate on.
+ * \param sz The size requested.
+ * \param iovs A two element iovec array that will be populated with the requested memory.
+ *
+ * \return The total bytes obtained. May be 0.
+ */
+int spdk_pipe_writer_get_buffer(struct spdk_pipe *pipe, uint32_t sz, struct iovec *iovs);
+
+/**
+ * Advance the write pointer by the given number of bytes
+ *
+ * The user can obtain memory from the pipe using spdk_pipe_writer_get_buffer(),
+ * but only calling this function marks it as consumed. The user is not required
+ * to advance the same number of bytes as was obtained from spdk_pipe_writer_get_buffer().
+ * However, upon calling this function, the previous memory region is considered
+ * invalid and the user must call spdk_pipe_writer_get_buffer() again to obtain
+ * additional memory.
+ *
+ * The user cannot advance past the current read location.
+ *
+ * \param pipe The pipe to operate on.
+ * \param count The number of bytes to advance.
+ *
+ * \return On error, a negated errno. On success, 0.
+ */
+int spdk_pipe_writer_advance(struct spdk_pipe *pipe, uint32_t count);
+
+/**
+ * Get the number of bytes available to read from the pipe.
+ *
+ * \param pipe The pipe to operate on.
+ *
+ * \return The number of bytes available for reading.
+ */
+uint32_t spdk_pipe_reader_bytes_available(struct spdk_pipe *pipe);
+
+/**
+ * Obtain previously written memory from the pipe for reading.
+ *
+ * This call populates the two element iovec provided with a region
+ * of memory containing the next available data in the pipe. The size
+ * will be up to sz bytes, but may be less.
+ *
+ * Calling this function does not mark the memory as consumed. Calling this function
+ * twice without a call to spdk_pipe_reader_advance in between will return the same
+ * region of memory.
+ *
+ * \param pipe The pipe to operate on.
+ * \param sz The size requested.
+ * \param iovs A two element iovec array that will be populated with the requested memory.
+ *
+ * \return On error, a negated errno. On success, the total number of bytes available.
+ */
+int spdk_pipe_reader_get_buffer(struct spdk_pipe *pipe, uint32_t sz, struct iovec *iovs);
+
+/**
+ * Mark memory as read, making it available for writing. The user is not required
+ * to advance the same number of byte as was obtained by a previous call to
+ * spdk_pipe_reader_get_buffer().
+ *
+ * \param pipe The pipe to operate on.
+ * \param count The number of bytes to advance.
+ *
+ * \return On error, a negated errno. On success, 0.
+ */
+int spdk_pipe_reader_advance(struct spdk_pipe *pipe, uint32_t count);
+
+#endif
diff --git a/src/spdk/include/spdk/queue.h b/src/spdk/include/spdk/queue.h
new file mode 100644
index 000000000..24e2e2e20
--- /dev/null
+++ b/src/spdk/include/spdk/queue.h
@@ -0,0 +1,79 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_QUEUE_H
+#define SPDK_QUEUE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/cdefs.h>
+#include <sys/queue.h>
+
+/*
+ * The SPDK NVMe driver was originally ported from FreeBSD, which makes
+ * use of features in FreeBSD's queue.h that do not exist on Linux.
+ * Include a header with these additional features on Linux only.
+ */
+#ifndef __FreeBSD__
+#include "spdk/queue_extras.h"
+#endif
+
+/*
+ * scan-build can't follow double pointers in queues and often assumes
+ * that removed elements are still on the list. We redefine TAILQ_REMOVE
+ * with extra asserts to silence it.
+ */
+#ifdef __clang_analyzer__
+#undef TAILQ_REMOVE
+#define TAILQ_REMOVE(head, elm, field) do { \
+ __typeof__(elm) _elm; \
+ if (((elm)->field.tqe_next) != NULL) \
+ (elm)->field.tqe_next->field.tqe_prev = \
+ (elm)->field.tqe_prev; \
+ else \
+ (head)->tqh_last = (elm)->field.tqe_prev; \
+ *(elm)->field.tqe_prev = (elm)->field.tqe_next; \
+ /* make sure the removed elm is not on the list anymore */ \
+ TAILQ_FOREACH(_elm, head, field) { \
+ assert(_elm != elm); \
+ } \
+} while (0)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/queue_extras.h b/src/spdk/include/spdk/queue_extras.h
new file mode 100644
index 000000000..904625e4d
--- /dev/null
+++ b/src/spdk/include/spdk/queue_extras.h
@@ -0,0 +1,343 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)queue.h 8.5 (Berkeley) 8/20/94
+ * $FreeBSD$
+ */
+
+#ifndef SPDK_QUEUE_EXTRAS_H
+#define SPDK_QUEUE_EXTRAS_H
+
+/*
+ * This file defines four types of data structures: singly-linked lists,
+ * singly-linked tail queues, lists and tail queues.
+ *
+ * A singly-linked list is headed by a single forward pointer. The elements
+ * are singly linked for minimum space and pointer manipulation overhead at
+ * the expense of O(n) removal for arbitrary elements. New elements can be
+ * added to the list after an existing element or at the head of the list.
+ * Elements being removed from the head of the list should use the explicit
+ * macro for this purpose for optimum efficiency. A singly-linked list may
+ * only be traversed in the forward direction. Singly-linked lists are ideal
+ * for applications with large datasets and few or no removals or for
+ * implementing a LIFO queue.
+ *
+ * A singly-linked tail queue is headed by a pair of pointers, one to the
+ * head of the list and the other to the tail of the list. The elements are
+ * singly linked for minimum space and pointer manipulation overhead at the
+ * expense of O(n) removal for arbitrary elements. New elements can be added
+ * to the list after an existing element, at the head of the list, or at the
+ * end of the list. Elements being removed from the head of the tail queue
+ * should use the explicit macro for this purpose for optimum efficiency.
+ * A singly-linked tail queue may only be traversed in the forward direction.
+ * Singly-linked tail queues are ideal for applications with large datasets
+ * and few or no removals or for implementing a FIFO queue.
+ *
+ * A list is headed by a single forward pointer (or an array of forward
+ * pointers for a hash table header). The elements are doubly linked
+ * so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before
+ * or after an existing element or at the head of the list. A list
+ * may be traversed in either direction.
+ *
+ * A tail queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or
+ * after an existing element, at the head of the list, or at the end of
+ * the list. A tail queue may be traversed in either direction.
+ *
+ * For details on the use of these macros, see the queue(3) manual page.
+ *
+ *
+ * SLIST LIST STAILQ TAILQ
+ * _HEAD + + + +
+ * _HEAD_INITIALIZER + + + +
+ * _ENTRY + + + +
+ * _INIT + + + +
+ * _EMPTY + + + +
+ * _FIRST + + + +
+ * _NEXT + + + +
+ * _PREV - + - +
+ * _LAST - - + +
+ * _FOREACH + + + +
+ * _FOREACH_FROM + + + +
+ * _FOREACH_SAFE + + + +
+ * _FOREACH_FROM_SAFE + + + +
+ * _FOREACH_REVERSE - - - +
+ * _FOREACH_REVERSE_FROM - - - +
+ * _FOREACH_REVERSE_SAFE - - - +
+ * _FOREACH_REVERSE_FROM_SAFE - - - +
+ * _INSERT_HEAD + + + +
+ * _INSERT_BEFORE - + - +
+ * _INSERT_AFTER + + + +
+ * _INSERT_TAIL - - + +
+ * _CONCAT - - + +
+ * _REMOVE_AFTER + - + -
+ * _REMOVE_HEAD + - + -
+ * _REMOVE + + + +
+ * _SWAP + + + +
+ *
+ */
+
+#include "spdk/util.h"
+
+/*
+ * Singly-linked Tail queue declarations.
+ */
+#define STAILQ_HEAD(name, type) \
+struct name { \
+ struct type *stqh_first;/* first element */ \
+ struct type **stqh_last;/* addr of last next element */ \
+}
+
+#define STAILQ_HEAD_INITIALIZER(head) \
+ { NULL, &(head).stqh_first }
+
+/*
+ * Singly-linked Tail queue functions.
+ */
+#define STAILQ_EMPTY(head) ((head)->stqh_first == NULL)
+
+#define STAILQ_FIRST(head) ((head)->stqh_first)
+
+#define STAILQ_FOREACH_FROM(var, head, field) \
+ for ((var) = ((var) ? (var) : STAILQ_FIRST((head))); \
+ (var); \
+ (var) = STAILQ_NEXT((var), field))
+
+#define STAILQ_FOREACH_SAFE(var, head, field, tvar) \
+ for ((var) = STAILQ_FIRST((head)); \
+ (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define STAILQ_FOREACH_FROM_SAFE(var, head, field, tvar) \
+ for ((var) = ((var) ? (var) : STAILQ_FIRST((head))); \
+ (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define STAILQ_LAST(head, type, field) \
+ (STAILQ_EMPTY((head)) ? NULL : \
+ SPDK_CONTAINEROF((head)->stqh_last, struct type, field.stqe_next))
+
+#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next)
+
+#define STAILQ_REMOVE_AFTER(head, elm, field) do { \
+ if ((STAILQ_NEXT(elm, field) = \
+ STAILQ_NEXT(STAILQ_NEXT(elm, field), field)) == NULL) \
+ (head)->stqh_last = &STAILQ_NEXT((elm), field); \
+} while (0)
+
+#define STAILQ_SWAP(head1, head2, type) do { \
+ struct type *swap_first = STAILQ_FIRST(head1); \
+ struct type **swap_last = (head1)->stqh_last; \
+ STAILQ_FIRST(head1) = STAILQ_FIRST(head2); \
+ (head1)->stqh_last = (head2)->stqh_last; \
+ STAILQ_FIRST(head2) = swap_first; \
+ (head2)->stqh_last = swap_last; \
+ if (STAILQ_EMPTY(head1)) \
+ (head1)->stqh_last = &STAILQ_FIRST(head1); \
+ if (STAILQ_EMPTY(head2)) \
+ (head2)->stqh_last = &STAILQ_FIRST(head2); \
+} while (0)
+
+/*
+ * List declarations.
+ */
+#define LIST_HEAD(name, type) \
+struct name { \
+ struct type *lh_first; /* first element */ \
+}
+
+#define LIST_HEAD_INITIALIZER(head) \
+ { NULL }
+
+#define LIST_ENTRY(type) \
+struct { \
+ struct type *le_next; /* next element */ \
+ struct type **le_prev; /* address of previous next element */ \
+}
+
+/*
+ * List functions.
+ */
+
+#if (defined(_KERNEL) && defined(INVARIANTS))
+#define QMD_LIST_CHECK_HEAD(head, field) do { \
+ if (LIST_FIRST((head)) != NULL && \
+ LIST_FIRST((head))->field.le_prev != \
+ &LIST_FIRST((head))) \
+ panic("Bad list head %p first->prev != head", (head)); \
+} while (0)
+
+#define QMD_LIST_CHECK_NEXT(elm, field) do { \
+ if (LIST_NEXT((elm), field) != NULL && \
+ LIST_NEXT((elm), field)->field.le_prev != \
+ &((elm)->field.le_next)) \
+ panic("Bad link elm %p next->prev != elm", (elm)); \
+} while (0)
+
+#define QMD_LIST_CHECK_PREV(elm, field) do { \
+ if (*(elm)->field.le_prev != (elm)) \
+ panic("Bad link elm %p prev->next != elm", (elm)); \
+} while (0)
+#else
+#define QMD_LIST_CHECK_HEAD(head, field)
+#define QMD_LIST_CHECK_NEXT(elm, field)
+#define QMD_LIST_CHECK_PREV(elm, field)
+#endif /* (_KERNEL && INVARIANTS) */
+
+#define LIST_EMPTY(head) ((head)->lh_first == NULL)
+
+#define LIST_FIRST(head) ((head)->lh_first)
+
+#define LIST_FOREACH_FROM(var, head, field) \
+ for ((var) = ((var) ? (var) : LIST_FIRST((head))); \
+ (var); \
+ (var) = LIST_NEXT((var), field))
+
+#define LIST_FOREACH_SAFE(var, head, field, tvar) \
+ for ((var) = LIST_FIRST((head)); \
+ (var) && ((tvar) = LIST_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define LIST_FOREACH_FROM_SAFE(var, head, field, tvar) \
+ for ((var) = ((var) ? (var) : LIST_FIRST((head))); \
+ (var) && ((tvar) = LIST_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define LIST_NEXT(elm, field) ((elm)->field.le_next)
+
+#define LIST_PREV(elm, head, type, field) \
+ ((elm)->field.le_prev == &LIST_FIRST((head)) ? NULL : \
+ SPDK_CONTAINEROF((elm)->field.le_prev, struct type, field.le_next))
+
+#define LIST_SWAP(head1, head2, type, field) do { \
+ struct type *swap_tmp = LIST_FIRST((head1)); \
+ LIST_FIRST((head1)) = LIST_FIRST((head2)); \
+ LIST_FIRST((head2)) = swap_tmp; \
+ if ((swap_tmp = LIST_FIRST((head1))) != NULL) \
+ swap_tmp->field.le_prev = &LIST_FIRST((head1)); \
+ if ((swap_tmp = LIST_FIRST((head2))) != NULL) \
+ swap_tmp->field.le_prev = &LIST_FIRST((head2)); \
+} while (0)
+
+/*
+ * Tail queue functions.
+ */
+#if (defined(_KERNEL) && defined(INVARIANTS))
+#define QMD_TAILQ_CHECK_HEAD(head, field) do { \
+ if (!TAILQ_EMPTY(head) && \
+ TAILQ_FIRST((head))->field.tqe_prev != \
+ &TAILQ_FIRST((head))) \
+ panic("Bad tailq head %p first->prev != head", (head)); \
+} while (0)
+
+#define QMD_TAILQ_CHECK_TAIL(head, field) do { \
+ if (*(head)->tqh_last != NULL) \
+ panic("Bad tailq NEXT(%p->tqh_last) != NULL", (head)); \
+} while (0)
+
+#define QMD_TAILQ_CHECK_NEXT(elm, field) do { \
+ if (TAILQ_NEXT((elm), field) != NULL && \
+ TAILQ_NEXT((elm), field)->field.tqe_prev != \
+ &((elm)->field.tqe_next)) \
+ panic("Bad link elm %p next->prev != elm", (elm)); \
+} while (0)
+
+#define QMD_TAILQ_CHECK_PREV(elm, field) do { \
+ if (*(elm)->field.tqe_prev != (elm)) \
+ panic("Bad link elm %p prev->next != elm", (elm)); \
+} while (0)
+#else
+#define QMD_TAILQ_CHECK_HEAD(head, field)
+#define QMD_TAILQ_CHECK_TAIL(head, headname)
+#define QMD_TAILQ_CHECK_NEXT(elm, field)
+#define QMD_TAILQ_CHECK_PREV(elm, field)
+#endif /* (_KERNEL && INVARIANTS) */
+
+#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL)
+
+#define TAILQ_FIRST(head) ((head)->tqh_first)
+
+#define TAILQ_FOREACH_FROM(var, head, field) \
+ for ((var) = ((var) ? (var) : TAILQ_FIRST((head))); \
+ (var); \
+ (var) = TAILQ_NEXT((var), field))
+
+#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \
+ for ((var) = TAILQ_FIRST((head)); \
+ (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define TAILQ_FOREACH_FROM_SAFE(var, head, field, tvar) \
+ for ((var) = ((var) ? (var) : TAILQ_FIRST((head))); \
+ (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define TAILQ_FOREACH_REVERSE_FROM(var, head, headname, field) \
+ for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname)); \
+ (var); \
+ (var) = TAILQ_PREV((var), headname, field))
+
+#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \
+ for ((var) = TAILQ_LAST((head), headname); \
+ (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \
+ (var) = (tvar))
+
+#define TAILQ_FOREACH_REVERSE_FROM_SAFE(var, head, headname, field, tvar) \
+ for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname)); \
+ (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \
+ (var) = (tvar))
+
+#define TAILQ_LAST(head, headname) \
+ (*(((struct headname *)((head)->tqh_last))->tqh_last))
+
+#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
+
+#define TAILQ_PREV(elm, headname, field) \
+ (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
+
+#define TAILQ_SWAP(head1, head2, type, field) do { \
+ struct type *swap_first = (head1)->tqh_first; \
+ struct type **swap_last = (head1)->tqh_last; \
+ (head1)->tqh_first = (head2)->tqh_first; \
+ (head1)->tqh_last = (head2)->tqh_last; \
+ (head2)->tqh_first = swap_first; \
+ (head2)->tqh_last = swap_last; \
+ if ((swap_first = (head1)->tqh_first) != NULL) \
+ swap_first->field.tqe_prev = &(head1)->tqh_first; \
+ else \
+ (head1)->tqh_last = &(head1)->tqh_first; \
+ if ((swap_first = (head2)->tqh_first) != NULL) \
+ swap_first->field.tqe_prev = &(head2)->tqh_first; \
+ else \
+ (head2)->tqh_last = &(head2)->tqh_first; \
+} while (0)
+
+#endif
diff --git a/src/spdk/include/spdk/reduce.h b/src/spdk/include/spdk/reduce.h
new file mode 100644
index 000000000..f67c484fb
--- /dev/null
+++ b/src/spdk/include/spdk/reduce.h
@@ -0,0 +1,253 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * SPDK block compression
+ */
+
+#ifndef SPDK_REDUCE_H_
+#define SPDK_REDUCE_H_
+
+#include "spdk/uuid.h"
+
+#define REDUCE_MAX_IOVECS 17
+
+/**
+ * Describes the parameters of an spdk_reduce_vol.
+ */
+struct spdk_reduce_vol_params {
+ struct spdk_uuid uuid;
+
+ /**
+ * Size in bytes of the IO unit for the backing device. This
+ * is the unit in which space is allocated from the backing
+ * device, and the unit in which data is read from of written
+ * to the backing device. Must be greater than 0.
+ */
+ uint32_t backing_io_unit_size;
+
+ /**
+ * Size in bytes of a logical block. This is the unit in
+ * which users read or write data to the compressed volume.
+ * Must be greater than 0.
+ */
+ uint32_t logical_block_size;
+
+ /**
+ * Size in bytes of a chunk on the compressed volume. This
+ * is the unit in which data is compressed. Must be an even
+ * multiple of backing_io_unit_size and logical_block_size.
+ * Must be greater than 0.
+ */
+ uint32_t chunk_size;
+
+ /**
+ * Total size in bytes of the compressed volume. During
+ * initialization, the size is calculated from the size of
+ * backing device size, so this must be set to 0 in the
+ * structure passed to spdk_reduce_vol_init(). After
+ * initialization, or a successful load, this field will
+ * contain the total size which will be an even multiple
+ * of the chunk size.
+ */
+ uint64_t vol_size;
+};
+
+struct spdk_reduce_vol;
+
+typedef void (*spdk_reduce_vol_op_complete)(void *ctx, int reduce_errno);
+typedef void (*spdk_reduce_vol_op_with_handle_complete)(void *ctx,
+ struct spdk_reduce_vol *vol,
+ int reduce_errno);
+
+/**
+ * Defines function type for callback functions called when backing_dev
+ * operations are complete.
+ *
+ * \param cb_arg Callback argument
+ * \param reduce_errno Completion status of backing_dev operation
+ * Negative values indicate negated errno value
+ * 0 indicates successful readv/writev/unmap operation
+ * Positive value indicates successful compress/decompress
+ * operations; number indicates number of bytes written to
+ * destination iovs
+ */
+typedef void (*spdk_reduce_dev_cpl)(void *cb_arg, int reduce_errno);
+
+struct spdk_reduce_vol_cb_args {
+ spdk_reduce_dev_cpl cb_fn;
+ void *cb_arg;
+};
+
+struct spdk_reduce_backing_dev {
+ void (*readv)(struct spdk_reduce_backing_dev *dev, struct iovec *iov, int iovcnt,
+ uint64_t lba, uint32_t lba_count, struct spdk_reduce_vol_cb_args *args);
+
+ void (*writev)(struct spdk_reduce_backing_dev *dev, struct iovec *iov, int iovcnt,
+ uint64_t lba, uint32_t lba_count, struct spdk_reduce_vol_cb_args *args);
+
+ void (*unmap)(struct spdk_reduce_backing_dev *dev,
+ uint64_t lba, uint32_t lba_count, struct spdk_reduce_vol_cb_args *args);
+
+ void (*compress)(struct spdk_reduce_backing_dev *dev,
+ struct iovec *src_iov, int src_iovcnt,
+ struct iovec *dst_iov, int dst_iovcnt,
+ struct spdk_reduce_vol_cb_args *args);
+
+ void (*decompress)(struct spdk_reduce_backing_dev *dev,
+ struct iovec *src_iov, int src_iovcnt,
+ struct iovec *dst_iov, int dst_iovcnt,
+ struct spdk_reduce_vol_cb_args *args);
+
+ uint64_t blockcnt;
+ uint32_t blocklen;
+};
+
+/**
+ * Get the UUID for a libreduce compressed volume.
+ *
+ * \param vol Previously loaded or initialized compressed volume.
+ * \return UUID for the compressed volume.
+ */
+const struct spdk_uuid *spdk_reduce_vol_get_uuid(struct spdk_reduce_vol *vol);
+
+/**
+ * Initialize a new libreduce compressed volume.
+ *
+ * \param params Parameters for the new volume.
+ * \param backing_dev Structure describing the backing device to use for the new volume.
+ * \param pm_file_dir Directory to use for creation of the persistent memory file to
+ * use for the new volume. This function will append the UUID as
+ * the filename to create in this directory.
+ * \param cb_fn Callback function to signal completion of the initialization process.
+ * \param cb_arg Argument to pass to the callback function.
+ */
+void spdk_reduce_vol_init(struct spdk_reduce_vol_params *params,
+ struct spdk_reduce_backing_dev *backing_dev,
+ const char *pm_file_dir,
+ spdk_reduce_vol_op_with_handle_complete cb_fn,
+ void *cb_arg);
+
+/**
+ * Load an existing libreduce compressed volume.
+ *
+ * \param backing_dev Structure describing the backing device containing the compressed volume.
+ * \param cb_fn Callback function to signal completion of the loading process.
+ * \param cb_arg Argument to pass to the callback function.
+ */
+void spdk_reduce_vol_load(struct spdk_reduce_backing_dev *backing_dev,
+ spdk_reduce_vol_op_with_handle_complete cb_fn,
+ void *cb_arg);
+
+/**
+ * Unload a previously initialized or loaded libreduce compressed volume.
+ *
+ * \param vol Volume to unload.
+ * \param cb_fn Callback function to signal completion of the unload process.
+ * \param cb_arg Argument to pass to the callback function.
+ */
+void spdk_reduce_vol_unload(struct spdk_reduce_vol *vol,
+ spdk_reduce_vol_op_complete cb_fn,
+ void *cb_arg);
+
+/**
+ * Destroy an existing libreduce compressed volume.
+ *
+ * This will zero the metadata region on the backing device and delete the associated
+ * pm metadata file. If the backing device does not contain a compressed volume, the
+ * cb_fn will be called with error status without modifying the backing device nor
+ * deleting a pm file.
+ *
+ * \param backing_dev Structure describing the backing device containing the compressed volume.
+ * \param cb_fn Callback function to signal completion of the destruction process.
+ * \param cb_arg Argument to pass to the callback function.
+ */
+void spdk_reduce_vol_destroy(struct spdk_reduce_backing_dev *backing_dev,
+ spdk_reduce_vol_op_complete cb_fn,
+ void *cb_arg);
+
+/**
+ * Read data from a libreduce compressed volume.
+ *
+ * This function will only read from logical blocks on the comparessed volume that
+ * fall within the same chunk.
+ *
+ * \param vol Volume to read data.
+ * \param iov iovec array describing the data to be read
+ * \param iovcnt Number of elements in the iovec array
+ * \param offset Offset (in logical blocks) to read the data on the compressed volume
+ * \param length Length (in logical blocks) of the data to read
+ * \param cb_fn Callback function to signal completion of the readv operation.
+ * \param cb_arg Argument to pass to the callback function.
+ */
+void spdk_reduce_vol_readv(struct spdk_reduce_vol *vol,
+ struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
+ spdk_reduce_vol_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Write data to a libreduce compressed volume.
+ *
+ * This function will only write to logical blocks on the comparessed volume that
+ * fall within the same chunk.
+ *
+ * \param vol Volume to write data.
+ * \param iov iovec array describing the data to be written
+ * \param iovcnt Number of elements in the iovec array
+ * \param offset Offset (in logical blocks) to write the data on the compressed volume
+ * \param length Length (in logical blocks) of the data to write
+ * \param cb_fn Callback function to signal completion of the writev operation.
+ * \param cb_arg Argument to pass to the callback function.
+ */
+void spdk_reduce_vol_writev(struct spdk_reduce_vol *vol,
+ struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
+ spdk_reduce_vol_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Get the params structure for a libreduce compressed volume.
+ *
+ * This function will populate the given params structure for a given volume.
+ *
+ * \param vol Previously loaded or initialized compressed volume.
+ * \return params structure for the compressed volume.
+ */
+const struct spdk_reduce_vol_params *spdk_reduce_vol_get_params(struct spdk_reduce_vol *vol);
+
+/**
+ * Dump out key information for a libreduce compressed volume and its PMEM.
+ *
+ * This function will print key information for a given volume its PMEM.
+ *
+ * \param vol Previously loaded or initialized compressed volume.
+ */
+void spdk_reduce_vol_print_info(struct spdk_reduce_vol *vol);
+#endif /* SPDK_REDUCE_H_ */
diff --git a/src/spdk/include/spdk/rpc.h b/src/spdk/include/spdk/rpc.h
new file mode 100644
index 000000000..b85606e43
--- /dev/null
+++ b/src/spdk/include/spdk/rpc.h
@@ -0,0 +1,155 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_RPC_CONFIG_H_
+#define SPDK_RPC_CONFIG_H_
+
+#include "spdk/stdinc.h"
+
+#include "spdk/jsonrpc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Verify correctness of registered RPC methods and aliases.
+ *
+ * Incorrect registrations include:
+ * - multiple RPC methods registered with the same name
+ * - RPC alias registered with a method that does not exist
+ * - RPC alias registered that points to another alias
+ *
+ * \return true if registrations are all correct, false otherwise
+ */
+bool spdk_rpc_verify_methods(void);
+
+/**
+ * Start listening for RPC connections.
+ *
+ * \param listen_addr Listening address.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_rpc_listen(const char *listen_addr);
+
+/**
+ * Poll the RPC server.
+ */
+void spdk_rpc_accept(void);
+
+/**
+ * Stop listening for RPC connections.
+ */
+void spdk_rpc_close(void);
+
+/**
+ * Function signature for RPC request handlers.
+ *
+ * \param request RPC request to handle.
+ * \param params Parameters associated with the RPC request.
+ */
+typedef void (*spdk_rpc_method_handler)(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params);
+
+/**
+ * Register an RPC method.
+ *
+ * \param method Name for the registered method.
+ * \param func Function registered for this method to handle the RPC request.
+ * \param state_mask State mask of the registered method. If the bit of the state of
+ * the RPC server is set in the state_mask, the method is allowed. Otherwise, it is rejected.
+ */
+void spdk_rpc_register_method(const char *method, spdk_rpc_method_handler func,
+ uint32_t state_mask);
+
+/**
+ * Register a deprecated alias for an RPC method.
+ *
+ * \param method Name for the registered method.
+ * \param alias Alias for the registered method.
+ */
+void spdk_rpc_register_alias_deprecated(const char *method, const char *alias);
+
+/**
+ * Check if \c method is allowed for \c state_mask
+ *
+ * \param method Method name
+ * \param state_mask state mask to check against
+ * \return 0 if method is allowed or negative error code:
+ * -EPERM method is not allowed
+ * -ENOENT method not found
+ */
+int spdk_rpc_is_method_allowed(const char *method, uint32_t state_mask);
+
+#define SPDK_RPC_STARTUP 0x1
+#define SPDK_RPC_RUNTIME 0x2
+
+/* Give SPDK_RPC_REGISTER a higher execution priority than
+ * SPDK_RPC_REGISTER_ALIAS_DEPRECATED to ensure all of the RPCs are registered
+ * before we try registering any aliases. Some older versions of clang may
+ * otherwise execute the constructors in a different order than
+ * defined in the source file (see issue #892).
+ */
+#define SPDK_RPC_REGISTER(method, func, state_mask) \
+static void __attribute__((constructor(1000))) rpc_register_##func(void) \
+{ \
+ spdk_rpc_register_method(method, func, state_mask); \
+}
+
+#define SPDK_RPC_REGISTER_ALIAS_DEPRECATED(method, alias) \
+static void __attribute__((constructor(1001))) rpc_register_##alias(void) \
+{ \
+ spdk_rpc_register_alias_deprecated(#method, #alias); \
+}
+
+/**
+ * Set the state mask of the RPC server. Any RPC method whose state mask is
+ * equal to the state of the RPC server is allowed.
+ *
+ * \param state_mask New state mask of the RPC server.
+ */
+void spdk_rpc_set_state(uint32_t state_mask);
+
+/**
+ * Get the current state of the RPC server.
+ *
+ * \return The current state of the RPC server.
+ */
+uint32_t spdk_rpc_get_state(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/scsi.h b/src/spdk/include/spdk/scsi.h
new file mode 100644
index 000000000..1b3f75577
--- /dev/null
+++ b/src/spdk/include/spdk/scsi.h
@@ -0,0 +1,571 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * SCSI to bdev translation layer
+ */
+
+#ifndef SPDK_SCSI_H
+#define SPDK_SCSI_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/queue.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Defines for SPDK tracing framework */
+#define OWNER_SCSI_DEV 0x10
+#define OBJECT_SCSI_TASK 0x10
+#define TRACE_GROUP_SCSI 0x2
+#define TRACE_SCSI_TASK_DONE SPDK_TPOINT_ID(TRACE_GROUP_SCSI, 0x0)
+#define TRACE_SCSI_TASK_START SPDK_TPOINT_ID(TRACE_GROUP_SCSI, 0x1)
+
+#define SPDK_SCSI_MAX_DEVS 1024
+#define SPDK_SCSI_DEV_MAX_LUN 64
+#define SPDK_SCSI_DEV_MAX_PORTS 4
+#define SPDK_SCSI_DEV_MAX_NAME 255
+
+#define SPDK_SCSI_PORT_MAX_NAME_LENGTH 255
+#define SPDK_SCSI_MAX_TRANSPORT_ID_LENGTH 255
+
+enum spdk_scsi_data_dir {
+ SPDK_SCSI_DIR_NONE = 0,
+ SPDK_SCSI_DIR_TO_DEV = 1,
+ SPDK_SCSI_DIR_FROM_DEV = 2,
+};
+
+enum spdk_scsi_task_func {
+ SPDK_SCSI_TASK_FUNC_ABORT_TASK = 0,
+ SPDK_SCSI_TASK_FUNC_ABORT_TASK_SET,
+ SPDK_SCSI_TASK_FUNC_CLEAR_TASK_SET,
+ SPDK_SCSI_TASK_FUNC_LUN_RESET,
+};
+
+/*
+ * SAM does not define the value for these service responses. Each transport
+ * (i.e. SAS, FC, iSCSI) will map these value to transport-specific codes,
+ * and may add their own.
+ */
+enum spdk_scsi_task_mgmt_resp {
+ SPDK_SCSI_TASK_MGMT_RESP_COMPLETE,
+ SPDK_SCSI_TASK_MGMT_RESP_SUCCESS,
+ SPDK_SCSI_TASK_MGMT_RESP_REJECT,
+ SPDK_SCSI_TASK_MGMT_RESP_INVALID_LUN,
+ SPDK_SCSI_TASK_MGMT_RESP_TARGET_FAILURE,
+ SPDK_SCSI_TASK_MGMT_RESP_REJECT_FUNC_NOT_SUPPORTED
+};
+
+struct spdk_scsi_task;
+typedef void (*spdk_scsi_task_cpl)(struct spdk_scsi_task *task);
+typedef void (*spdk_scsi_task_free)(struct spdk_scsi_task *task);
+
+struct spdk_scsi_task {
+ uint8_t status;
+ uint8_t function; /* task mgmt function */
+ uint8_t response; /* task mgmt response */
+
+ struct spdk_scsi_lun *lun;
+ struct spdk_scsi_port *target_port;
+ struct spdk_scsi_port *initiator_port;
+
+ spdk_scsi_task_cpl cpl_fn;
+ spdk_scsi_task_free free_fn;
+
+ uint32_t ref;
+ uint32_t transfer_len;
+ uint32_t dxfer_dir;
+ uint32_t length;
+
+ /**
+ * Amount of data actually transferred. Can be less than requested
+ * transfer_len - i.e. SCSI INQUIRY.
+ */
+ uint32_t data_transferred;
+
+ uint64_t offset;
+
+ uint8_t *cdb;
+
+ /**
+ * \internal
+ * Size of internal buffer or zero when iov.iov_base is not internally managed.
+ */
+ uint32_t alloc_len;
+ /**
+ * \internal
+ * iov is internal buffer. Use iovs to access elements of IO.
+ */
+ struct iovec iov;
+ struct iovec *iovs;
+ uint16_t iovcnt;
+
+ uint8_t sense_data[32];
+ size_t sense_data_len;
+
+ void *bdev_io;
+
+ TAILQ_ENTRY(spdk_scsi_task) scsi_link;
+
+ uint32_t abort_id;
+ struct spdk_bdev_io_wait_entry bdev_io_wait;
+};
+
+struct spdk_scsi_port;
+struct spdk_scsi_dev;
+struct spdk_scsi_lun;
+struct spdk_scsi_lun_desc;
+
+typedef void (*spdk_scsi_lun_remove_cb_t)(struct spdk_scsi_lun *, void *);
+typedef void (*spdk_scsi_dev_destruct_cb_t)(void *cb_arg, int rc);
+
+/**
+ * Initialize SCSI layer.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_scsi_init(void);
+
+/**
+ * Stop and clean the SCSI layer.
+ */
+void spdk_scsi_fini(void);
+
+/**
+ * Get the LUN id of the given logical unit.
+ *
+ * \param lun Logical unit.
+ *
+ * \return LUN id of the logical unit.
+ */
+int spdk_scsi_lun_get_id(const struct spdk_scsi_lun *lun);
+
+/**
+ * Get the name of the bdev associated with the given logical unit.
+ *
+ * \param lun Logical unit.
+ *
+ * \return the name of the bdev associated with the logical unit.
+ */
+const char *spdk_scsi_lun_get_bdev_name(const struct spdk_scsi_lun *lun);
+
+/**
+ * Get the SCSI device associated with the given logical unit.
+ *
+ * \param lun Logical unit.
+ *
+ * \return the SCSI device associated with the logical unit.
+ */
+const struct spdk_scsi_dev *spdk_scsi_lun_get_dev(const struct spdk_scsi_lun *lun);
+
+/**
+ * Check if the logical unit is hot removing.
+ *
+ * \param lun Logical unit
+ *
+ * \return true if removing, false otherwise.
+ */
+bool spdk_scsi_lun_is_removing(const struct spdk_scsi_lun *lun);
+
+/**
+ * Get the name of the given SCSI device.
+ *
+ * \param dev SCSI device.
+ *
+ * \return the name of the SCSI device on success, or NULL on failure.
+ */
+const char *spdk_scsi_dev_get_name(const struct spdk_scsi_dev *dev);
+
+/**
+ * Get the id of the given SCSI device.
+ *
+ * \param dev SCSI device.
+ *
+ * \return the id of the SCSI device.
+ */
+int spdk_scsi_dev_get_id(const struct spdk_scsi_dev *dev);
+
+/**
+ * Get the logical unit of the given SCSI device whose id is lun_id.
+ *
+ * \param dev SCSI device.
+ * \param lun_id Id of the logical unit.
+ *
+ * \return the logical unit on success, or NULL on failure.
+ */
+struct spdk_scsi_lun *spdk_scsi_dev_get_lun(struct spdk_scsi_dev *dev, int lun_id);
+
+/**
+ * Check whether the SCSI device has any pending task.
+ *
+ * \param dev SCSI device.
+ * \param initiator_port Check tasks only from the initiator if specified, or
+ * all all tasks otherwise.
+ *
+ * \return true if the SCSI device has any pending task, or false otherwise.
+ */
+bool spdk_scsi_dev_has_pending_tasks(const struct spdk_scsi_dev *dev,
+ const struct spdk_scsi_port *initiator_port);
+
+/**
+ * Destruct the SCSI decice.
+ *
+ * \param dev SCSI device.
+ * \param cb_fn Callback function.
+ * \param cb_arg Argument to callback function.
+ */
+void spdk_scsi_dev_destruct(struct spdk_scsi_dev *dev,
+ spdk_scsi_dev_destruct_cb_t cb_fn, void *cb_arg);
+
+/**
+ * Execute the SCSI management task.
+ *
+ * The task can be constructed by the function spdk_scsi_task_construct().
+ * Code of task management function to be executed is set before calling this API.
+ *
+ * \param dev SCSI device.
+ * \param task SCSI task to be executed.
+ */
+void spdk_scsi_dev_queue_mgmt_task(struct spdk_scsi_dev *dev, struct spdk_scsi_task *task);
+
+/**
+ * Execute the SCSI task.
+ *
+ * The task can be constructed by the function spdk_scsi_task_construct().
+ *
+ * \param dev SCSI device.
+ * \param task Task to be executed.
+ */
+void spdk_scsi_dev_queue_task(struct spdk_scsi_dev *dev, struct spdk_scsi_task *task);
+
+/**
+ * Add a new port to the given SCSI device.
+ *
+ * \param dev SCSI device.
+ * \param id Port id.
+ * \param name Port name.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_scsi_dev_add_port(struct spdk_scsi_dev *dev, uint64_t id, const char *name);
+
+/**
+ * Delete a specified port of the given SCSI device.
+ *
+ * \param dev SCSI device.
+ * \param id Port id.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_scsi_dev_delete_port(struct spdk_scsi_dev *dev, uint64_t id);
+
+/**
+ * Get the port of the given SCSI device whose port ID is id.
+ *
+ * \param dev SCSI device.
+ * \param id Port id.
+ *
+ * \return the port of the SCSI device on success, or NULL on failure.
+ */
+struct spdk_scsi_port *spdk_scsi_dev_find_port_by_id(struct spdk_scsi_dev *dev, uint64_t id);
+
+/**
+ * Allocate I/O channels for all LUNs of the given SCSI device.
+ *
+ * \param dev SCSI device.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_scsi_dev_allocate_io_channels(struct spdk_scsi_dev *dev);
+
+/**
+ * Free I/O channels from all LUNs of the given SCSI device.
+ */
+void spdk_scsi_dev_free_io_channels(struct spdk_scsi_dev *dev);
+
+/**
+ * Construct a SCSI device object using the given parameters.
+ *
+ * \param name Name for the SCSI device.
+ * \param bdev_name_list List of bdev names to attach to the LUNs for this SCSI
+ * device.
+ * \param lun_id_list List of LUN IDs for the LUN in this SCSI device. Caller is
+ * responsible for managing the memory containing this list. lun_id_list[x] is
+ * the LUN ID for lun_list[x].
+ * \param num_luns Number of entries in lun_list and lun_id_list.
+ * \param protocol_id SCSI SPC protocol identifier to report in INQUIRY data
+ * \param hotremove_cb Callback to lun hotremoval. Will be called once hotremove
+ * is first triggered.
+ * \param hotremove_ctx Additional argument to hotremove_cb.
+ *
+ * \return the constructed spdk_scsi_dev object.
+ */
+struct spdk_scsi_dev *spdk_scsi_dev_construct(const char *name,
+ const char *bdev_name_list[],
+ int *lun_id_list,
+ int num_luns,
+ uint8_t protocol_id,
+ void (*hotremove_cb)(const struct spdk_scsi_lun *, void *),
+ void *hotremove_ctx);
+
+/**
+ * Delete a logical unit of the given SCSI device.
+ *
+ * \param dev SCSI device.
+ * \param lun Logical unit to delete.
+ */
+void spdk_scsi_dev_delete_lun(struct spdk_scsi_dev *dev, struct spdk_scsi_lun *lun);
+
+/**
+ * Add a new logical unit to the given SCSI device.
+ *
+ * \param dev SCSI device.
+ * \param bdev_name Name of the bdev attached to the logical unit.
+ * \param lun_id LUN id for the new logical unit.
+ * \param hotremove_cb Callback to lun hotremoval. Will be called once hotremove
+ * is first triggered.
+ * \param hotremove_ctx Additional argument to hotremove_cb.
+ */
+int spdk_scsi_dev_add_lun(struct spdk_scsi_dev *dev, const char *bdev_name, int lun_id,
+ void (*hotremove_cb)(const struct spdk_scsi_lun *, void *),
+ void *hotremove_ctx);
+
+/**
+ * Create a new SCSI port.
+ *
+ * \param id Port id.
+ * \param index Port index.
+ * \param name Port Name.
+ *
+ * \return a pointer to the created SCSI port on success, or NULL on failure.
+ */
+struct spdk_scsi_port *spdk_scsi_port_create(uint64_t id, uint16_t index, const char *name);
+
+/**
+ * Free the SCSI port.
+ *
+ * \param pport SCSI port to free.
+ */
+void spdk_scsi_port_free(struct spdk_scsi_port **pport);
+
+/**
+ * Get the name of the SCSI port.
+ *
+ * \param port SCSI port to query.
+ *
+ * \return the name of the SCSI port.
+ */
+const char *spdk_scsi_port_get_name(const struct spdk_scsi_port *port);
+
+/**
+ * Construct a new SCSI task.
+ *
+ * \param task SCSI task to consturct.
+ * \param cpl_fn Called when the task is completed.
+ * \param free_fn Called when the task is freed
+ */
+void spdk_scsi_task_construct(struct spdk_scsi_task *task,
+ spdk_scsi_task_cpl cpl_fn,
+ spdk_scsi_task_free free_fn);
+
+/**
+ * Put the SCSI task.
+ *
+ * \param task SCSI task to put.
+ */
+void spdk_scsi_task_put(struct spdk_scsi_task *task);
+
+/**
+ * Set internal buffer to given one. Caller is owner of that buffer.
+ *
+ * \param task SCSI task.
+ * \param data Pointer to buffer.
+ * \param len Buffer length.
+ */
+void spdk_scsi_task_set_data(struct spdk_scsi_task *task, void *data, uint32_t len);
+
+/**
+ * Single buffer -> vector of buffers.
+ *
+ * \param task SCSI task.
+ * \param src A pointer to the data buffer read from.
+ * \param len Length of the data buffer read from.
+ *
+ * \return the total length of the vector of buffers written into on success, or
+ * -1 on failure.
+ */
+int spdk_scsi_task_scatter_data(struct spdk_scsi_task *task, const void *src, size_t len);
+
+/**
+ * Vector of buffers -> single buffer.
+ *
+ * \param task SCSI task,
+ * \param len Length of the buffer allocated and written into.
+ *
+ * \return a pointer to the buffer allocated and written into.
+ */
+void *spdk_scsi_task_gather_data(struct spdk_scsi_task *task, int *len);
+
+/**
+ * Build sense data for the SCSI task.
+ *
+ * \param task SCSI task.
+ * \param sk Sense key.
+ * \param asc Additional sense code.
+ * \param ascq Additional sense code qualifier.
+ */
+void spdk_scsi_task_build_sense_data(struct spdk_scsi_task *task, int sk, int asc,
+ int ascq);
+
+/**
+ * Set SCSI status code to the SCSI task. When the status code is CHECK CONDITION,
+ * sense data is build too.
+ *
+ * \param task SCSI task.
+ * \param sc Sense code
+ * \param sk Sense key.
+ * \param asc Additional sense code.
+ * \param ascq Additional sense code qualifier.
+ */
+void spdk_scsi_task_set_status(struct spdk_scsi_task *task, int sc, int sk, int asc,
+ int ascq);
+
+/**
+ * Copy SCSI status.
+ *
+ * \param dst SCSI task whose status is written to.
+ * \param src SCSI task whose status is read from.
+ */
+void spdk_scsi_task_copy_status(struct spdk_scsi_task *dst, struct spdk_scsi_task *src);
+
+/**
+ * Process the SCSI task when no LUN is attached.
+ *
+ * \param task SCSI task.
+ */
+void spdk_scsi_task_process_null_lun(struct spdk_scsi_task *task);
+
+/**
+ * Process the aborted SCSI task.
+ *
+ * \param task SCSI task.
+ */
+void spdk_scsi_task_process_abort(struct spdk_scsi_task *task);
+
+/**
+ * Open a logical unit for I/O operations.
+ *
+ * The registered callback function must get all tasks from the upper layer
+ * (e.g. iSCSI) to the LUN done, free the IO channel of the LUN if allocated,
+ * and then close the LUN.
+ *
+ * \param lun Logical unit to open.
+ * \param hotremove_cb Callback function for hot removal of the logical unit.
+ * \param hotremove_ctx Param for hot removal callback function.
+ * \param desc Output parameter for the descriptor when operation is successful.
+ * \return 0 if operation is successful, suitable errno value otherwise
+ */
+int spdk_scsi_lun_open(struct spdk_scsi_lun *lun, spdk_scsi_lun_remove_cb_t hotremove_cb,
+ void *hotremove_ctx, struct spdk_scsi_lun_desc **desc);
+
+/**
+ * Close an opened logical unit.
+ *
+ * \param desc Descriptor of the logical unit.
+ */
+void spdk_scsi_lun_close(struct spdk_scsi_lun_desc *desc);
+
+/**
+ * Allocate I/O channel for the LUN
+ *
+ * \param desc Descriptor of the logical unit.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_scsi_lun_allocate_io_channel(struct spdk_scsi_lun_desc *desc);
+
+/**
+ * Free I/O channel from the logical unit
+ *
+ * \param desc Descriptor of the logical unit.
+ */
+void spdk_scsi_lun_free_io_channel(struct spdk_scsi_lun_desc *desc);
+
+/**
+ * Get DIF context for SCSI LUN and SCSI command.
+ *
+ * \param lun Logical unit.
+ * \param task SCSI task which has the payload.
+ * \param dif_ctx Output parameter which will contain initialized DIF context.
+ *
+ * \return true on success or false otherwise.
+ */
+bool spdk_scsi_lun_get_dif_ctx(struct spdk_scsi_lun *lun, struct spdk_scsi_task *task,
+ struct spdk_dif_ctx *dif_ctx);
+
+/**
+ * Set iSCSI Initiator port TransportID
+ *
+ * \param port SCSI initiator port.
+ * \param iscsi_name Initiator name.
+ * \param isid Session ID.
+ */
+void spdk_scsi_port_set_iscsi_transport_id(struct spdk_scsi_port *port,
+ char *iscsi_name, uint64_t isid);
+
+/**
+ * Convert LUN ID from integer to LUN format
+ *
+ * \param lun_id Integer LUN ID
+ *
+ * \return LUN format of LUN ID
+ */
+uint64_t spdk_scsi_lun_id_int_to_fmt(int lun_id);
+
+/**
+ * Convert LUN ID from LUN format to integer
+ *
+ * \param fmt_lun LUN format of LUN ID
+ *
+ * \return integer LUN ID
+ */
+int spdk_scsi_lun_id_fmt_to_int(uint64_t fmt_lun);
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_SCSI_H */
diff --git a/src/spdk/include/spdk/scsi_spec.h b/src/spdk/include/spdk/scsi_spec.h
new file mode 100644
index 000000000..2711c8ea4
--- /dev/null
+++ b/src/spdk/include/spdk/scsi_spec.h
@@ -0,0 +1,742 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * SCSI specification definitions
+ */
+
+#ifndef SPDK_SCSI_SPEC_H
+#define SPDK_SCSI_SPEC_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/assert.h"
+
+enum spdk_scsi_group_code {
+ SPDK_SCSI_6BYTE_CMD = 0x00,
+ SPDK_SCSI_10BYTE_CMD = 0x20,
+ SPDK_SCSI_10BYTE_CMD2 = 0x40,
+ SPDK_SCSI_16BYTE_CMD = 0x80,
+ SPDK_SCSI_12BYTE_CMD = 0xa0,
+};
+
+#define SPDK_SCSI_GROUP_MASK 0xe0
+#define SPDK_SCSI_OPCODE_MASK 0x1f
+
+enum spdk_scsi_status {
+ SPDK_SCSI_STATUS_GOOD = 0x00,
+ SPDK_SCSI_STATUS_CHECK_CONDITION = 0x02,
+ SPDK_SCSI_STATUS_CONDITION_MET = 0x04,
+ SPDK_SCSI_STATUS_BUSY = 0x08,
+ SPDK_SCSI_STATUS_INTERMEDIATE = 0x10,
+ SPDK_SCSI_STATUS_INTERMEDIATE_CONDITION_MET = 0x14,
+ SPDK_SCSI_STATUS_RESERVATION_CONFLICT = 0x18,
+ SPDK_SCSI_STATUS_Obsolete = 0x22,
+ SPDK_SCSI_STATUS_TASK_SET_FULL = 0x28,
+ SPDK_SCSI_STATUS_ACA_ACTIVE = 0x30,
+ SPDK_SCSI_STATUS_TASK_ABORTED = 0x40,
+};
+
+enum spdk_scsi_sense {
+ SPDK_SCSI_SENSE_NO_SENSE = 0x00,
+ SPDK_SCSI_SENSE_RECOVERED_ERROR = 0x01,
+ SPDK_SCSI_SENSE_NOT_READY = 0x02,
+ SPDK_SCSI_SENSE_MEDIUM_ERROR = 0x03,
+ SPDK_SCSI_SENSE_HARDWARE_ERROR = 0x04,
+ SPDK_SCSI_SENSE_ILLEGAL_REQUEST = 0x05,
+ SPDK_SCSI_SENSE_UNIT_ATTENTION = 0x06,
+ SPDK_SCSI_SENSE_DATA_PROTECT = 0x07,
+ SPDK_SCSI_SENSE_BLANK_CHECK = 0x08,
+ SPDK_SCSI_SENSE_VENDOR_SPECIFIC = 0x09,
+ SPDK_SCSI_SENSE_COPY_ABORTED = 0x0a,
+ SPDK_SCSI_SENSE_ABORTED_COMMAND = 0x0b,
+ SPDK_SCSI_SENSE_VOLUME_OVERFLOW = 0x0d,
+ SPDK_SCSI_SENSE_MISCOMPARE = 0x0e,
+};
+
+enum spdk_scsi_asc {
+ SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE = 0x00,
+ SPDK_SCSI_ASC_PERIPHERAL_DEVICE_WRITE_FAULT = 0x03,
+ SPDK_SCSI_ASC_LOGICAL_UNIT_NOT_READY = 0x04,
+ SPDK_SCSI_ASC_WARNING = 0x0b,
+ SPDK_SCSI_ASC_LOGICAL_BLOCK_GUARD_CHECK_FAILED = 0x10,
+ SPDK_SCSI_ASC_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED = 0x10,
+ SPDK_SCSI_ASC_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED = 0x10,
+ SPDK_SCSI_ASC_UNRECOVERED_READ_ERROR = 0x11,
+ SPDK_SCSI_ASC_MISCOMPARE_DURING_VERIFY_OPERATION = 0x1d,
+ SPDK_SCSI_ASC_INVALID_COMMAND_OPERATION_CODE = 0x20,
+ SPDK_SCSI_ASC_ACCESS_DENIED = 0x20,
+ SPDK_SCSI_ASC_LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE = 0x21,
+ SPDK_SCSI_ASC_INVALID_FIELD_IN_CDB = 0x24,
+ SPDK_SCSI_ASC_LOGICAL_UNIT_NOT_SUPPORTED = 0x25,
+ SPDK_SCSI_ASC_WRITE_PROTECTED = 0x27,
+ SPDK_SCSI_ASC_FORMAT_COMMAND_FAILED = 0x31,
+ SPDK_SCSI_ASC_SAVING_PARAMETERS_NOT_SUPPORTED = 0x39,
+ SPDK_SCSI_ASC_INTERNAL_TARGET_FAILURE = 0x44,
+};
+
+enum spdk_scsi_ascq {
+ SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE = 0x00,
+ SPDK_SCSI_ASCQ_BECOMING_READY = 0x01,
+ SPDK_SCSI_ASCQ_FORMAT_COMMAND_FAILED = 0x01,
+ SPDK_SCSI_ASCQ_LOGICAL_BLOCK_GUARD_CHECK_FAILED = 0x01,
+ SPDK_SCSI_ASCQ_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED = 0x02,
+ SPDK_SCSI_ASCQ_NO_ACCESS_RIGHTS = 0x02,
+ SPDK_SCSI_ASCQ_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED = 0x03,
+ SPDK_SCSI_ASCQ_POWER_LOSS_EXPECTED = 0x08,
+ SPDK_SCSI_ASCQ_INVALID_LU_IDENTIFIER = 0x09,
+};
+
+enum spdk_spc_opcode {
+ /* SPC3 related */
+ SPDK_SPC_ACCESS_CONTROL_IN = 0x86,
+ SPDK_SPC_ACCESS_CONTROL_OUT = 0x87,
+ SPDK_SPC_EXTENDED_COPY = 0x83,
+ SPDK_SPC_INQUIRY = 0x12,
+ SPDK_SPC_LOG_SELECT = 0x4c,
+ SPDK_SPC_LOG_SENSE = 0x4d,
+ SPDK_SPC_MODE_SELECT_6 = 0x15,
+ SPDK_SPC_MODE_SELECT_10 = 0x55,
+ SPDK_SPC_MODE_SENSE_6 = 0x1a,
+ SPDK_SPC_MODE_SENSE_10 = 0x5a,
+ SPDK_SPC_PERSISTENT_RESERVE_IN = 0x5e,
+ SPDK_SPC_PERSISTENT_RESERVE_OUT = 0x5f,
+ SPDK_SPC_PREVENT_ALLOW_MEDIUM_REMOVAL = 0x1e,
+ SPDK_SPC_READ_ATTRIBUTE = 0x8c,
+ SPDK_SPC_READ_BUFFER = 0x3c,
+ SPDK_SPC_RECEIVE_COPY_RESULTS = 0x84,
+ SPDK_SPC_RECEIVE_DIAGNOSTIC_RESULTS = 0x1c,
+ SPDK_SPC_REPORT_LUNS = 0xa0,
+ SPDK_SPC_REQUEST_SENSE = 0x03,
+ SPDK_SPC_SEND_DIAGNOSTIC = 0x1d,
+ SPDK_SPC_TEST_UNIT_READY = 0x00,
+ SPDK_SPC_WRITE_ATTRIBUTE = 0x8d,
+ SPDK_SPC_WRITE_BUFFER = 0x3b,
+
+ SPDK_SPC_SERVICE_ACTION_IN_12 = 0xab,
+ SPDK_SPC_SERVICE_ACTION_OUT_12 = 0xa9,
+ SPDK_SPC_SERVICE_ACTION_IN_16 = 0x9e,
+ SPDK_SPC_SERVICE_ACTION_OUT_16 = 0x9f,
+
+ SPDK_SPC_VARIABLE_LENGTH = 0x7f,
+
+ SPDK_SPC_MO_CHANGE_ALIASES = 0x0b,
+ SPDK_SPC_MO_SET_DEVICE_IDENTIFIER = 0x06,
+ SPDK_SPC_MO_SET_PRIORITY = 0x0e,
+ SPDK_SPC_MO_SET_TARGET_PORT_GROUPS = 0x0a,
+ SPDK_SPC_MO_SET_TIMESTAMP = 0x0f,
+ SPDK_SPC_MI_REPORT_ALIASES = 0x0b,
+ SPDK_SPC_MI_REPORT_DEVICE_IDENTIFIER = 0x05,
+ SPDK_SPC_MI_REPORT_PRIORITY = 0x0e,
+ SPDK_SPC_MI_REPORT_SUPPORTED_OPERATION_CODES = 0x0c,
+ SPDK_SPC_MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS = 0x0d,
+ SPDK_SPC_MI_REPORT_TARGET_PORT_GROUPS = 0x0a,
+ SPDK_SPC_MI_REPORT_TIMESTAMP = 0x0f,
+
+ /* SPC2 related (Obsolete) */
+ SPDK_SPC2_RELEASE_6 = 0x17,
+ SPDK_SPC2_RELEASE_10 = 0x57,
+ SPDK_SPC2_RESERVE_6 = 0x16,
+ SPDK_SPC2_RESERVE_10 = 0x56,
+};
+
+enum spdk_scc_opcode {
+ SPDK_SCC_MAINTENANCE_IN = 0xa3,
+ SPDK_SCC_MAINTENANCE_OUT = 0xa4,
+};
+
+enum spdk_sbc_opcode {
+ SPDK_SBC_COMPARE_AND_WRITE = 0x89,
+ SPDK_SBC_FORMAT_UNIT = 0x04,
+ SPDK_SBC_GET_LBA_STATUS = 0x0012009e,
+ SPDK_SBC_ORWRITE_16 = 0x8b,
+ SPDK_SBC_PRE_FETCH_10 = 0x34,
+ SPDK_SBC_PRE_FETCH_16 = 0x90,
+ SPDK_SBC_READ_6 = 0x08,
+ SPDK_SBC_READ_10 = 0x28,
+ SPDK_SBC_READ_12 = 0xa8,
+ SPDK_SBC_READ_16 = 0x88,
+ SPDK_SBC_READ_ATTRIBUTE = 0x8c,
+ SPDK_SBC_READ_BUFFER = 0x3c,
+ SPDK_SBC_READ_CAPACITY_10 = 0x25,
+ SPDK_SBC_READ_DEFECT_DATA_10 = 0x37,
+ SPDK_SBC_READ_DEFECT_DATA_12 = 0xb7,
+ SPDK_SBC_READ_LONG_10 = 0x3e,
+ SPDK_SBC_REASSIGN_BLOCKS = 0x07,
+ SPDK_SBC_SANITIZE = 0x48,
+ SPDK_SBC_START_STOP_UNIT = 0x1b,
+ SPDK_SBC_SYNCHRONIZE_CACHE_10 = 0x35,
+ SPDK_SBC_SYNCHRONIZE_CACHE_16 = 0x91,
+ SPDK_SBC_UNMAP = 0x42,
+ SPDK_SBC_VERIFY_10 = 0x2f,
+ SPDK_SBC_VERIFY_12 = 0xaf,
+ SPDK_SBC_VERIFY_16 = 0x8f,
+ SPDK_SBC_WRITE_6 = 0x0a,
+ SPDK_SBC_WRITE_10 = 0x2a,
+ SPDK_SBC_WRITE_12 = 0xaa,
+ SPDK_SBC_WRITE_16 = 0x8a,
+ SPDK_SBC_WRITE_AND_VERIFY_10 = 0x2e,
+ SPDK_SBC_WRITE_AND_VERIFY_12 = 0xae,
+ SPDK_SBC_WRITE_AND_VERIFY_16 = 0x8e,
+ SPDK_SBC_WRITE_LONG_10 = 0x3f,
+ SPDK_SBC_WRITE_SAME_10 = 0x41,
+ SPDK_SBC_WRITE_SAME_16 = 0x93,
+ SPDK_SBC_XDREAD_10 = 0x52,
+ SPDK_SBC_XDWRITE_10 = 0x50,
+ SPDK_SBC_XDWRITEREAD_10 = 0x53,
+ SPDK_SBC_XPWRITE_10 = 0x51,
+
+ SPDK_SBC_SAI_READ_CAPACITY_16 = 0x10,
+ SPDK_SBC_SAI_READ_LONG_16 = 0x11,
+ SPDK_SBC_SAO_WRITE_LONG_16 = 0x11,
+
+ SPDK_SBC_VL_READ_32 = 0x0009,
+ SPDK_SBC_VL_VERIFY_32 = 0x000a,
+ SPDK_SBC_VL_WRITE_32 = 0x000b,
+ SPDK_SBC_VL_WRITE_AND_VERIFY_32 = 0x000c,
+ SPDK_SBC_VL_WRITE_SAME_32 = 0x000d,
+ SPDK_SBC_VL_XDREAD_32 = 0x0003,
+ SPDK_SBC_VL_XDWRITE_32 = 0x0004,
+ SPDK_SBC_VL_XDWRITEREAD_32 = 0x0007,
+ SPDK_SBC_VL_XPWRITE_32 = 0x0006,
+};
+
+#define SPDK_SBC_START_STOP_UNIT_START_BIT (1 << 0)
+
+enum spdk_mmc_opcode {
+ /* MMC6 */
+ SPDK_MMC_READ_DISC_STRUCTURE = 0xad,
+
+ /* MMC4 */
+ SPDK_MMC_BLANK = 0xa1,
+ SPDK_MMC_CLOSE_TRACK_SESSION = 0x5b,
+ SPDK_MMC_ERASE_10 = 0x2c,
+ SPDK_MMC_FORMAT_UNIT = 0x04,
+ SPDK_MMC_GET_CONFIGURATION = 0x46,
+ SPDK_MMC_GET_EVENT_STATUS_NOTIFICATION = 0x4a,
+ SPDK_MMC_GET_PERFORMANCE = 0xac,
+ SPDK_MMC_INQUIRY = 0x12,
+ SPDK_MMC_LOAD_UNLOAD_MEDIUM = 0xa6,
+ SPDK_MMC_MECHANISM_STATUS = 0xbd,
+ SPDK_MMC_MODE_SELECT_10 = 0x55,
+ SPDK_MMC_MODE_SENSE_10 = 0x5a,
+ SPDK_MMC_PAUSE_RESUME = 0x4b,
+ SPDK_MMC_PLAY_AUDIO_10 = 0x45,
+ SPDK_MMC_PLAY_AUDIO_12 = 0xa5,
+ SPDK_MMC_PLAY_AUDIO_MSF = 0x47,
+ SPDK_MMC_PREVENT_ALLOW_MEDIUM_REMOVAL = 0x1e,
+ SPDK_MMC_READ_10 = 0x28,
+ SPDK_MMC_READ_12 = 0xa8,
+ SPDK_MMC_READ_BUFFER = 0x3c,
+ SPDK_MMC_READ_BUFFER_CAPACITY = 0x5c,
+ SPDK_MMC_READ_CAPACITY = 0x25,
+ SPDK_MMC_READ_CD = 0xbe,
+ SPDK_MMC_READ_CD_MSF = 0xb9,
+ SPDK_MMC_READ_DISC_INFORMATION = 0x51,
+ SPDK_MMC_READ_DVD_STRUCTURE = 0xad,
+ SPDK_MMC_READ_FORMAT_CAPACITIES = 0x23,
+ SPDK_MMC_READ_SUB_CHANNEL = 0x42,
+ SPDK_MMC_READ_TOC_PMA_ATIP = 0x43,
+ SPDK_MMC_READ_TRACK_INFORMATION = 0x52,
+ SPDK_MMC_REPAIR_TRACK = 0x58,
+ SPDK_MMC_REPORT_KEY = 0xa4,
+ SPDK_MMC_REQUEST_SENSE = 0x03,
+ SPDK_MMC_RESERVE_TRACK = 0x53,
+ SPDK_MMC_SCAN = 0xba,
+ SPDK_MMC_SEEK_10 = 0x2b,
+ SPDK_MMC_SEND_CUE_SHEET = 0x5d,
+ SPDK_MMC_SEND_DVD_STRUCTURE = 0xbf,
+ SPDK_MMC_SEND_KEY = 0xa3,
+ SPDK_MMC_SEND_OPC_INFORMATION = 0x54,
+ SPDK_MMC_SET_CD_SPEED = 0xbb,
+ SPDK_MMC_SET_READ_AHEAD = 0xa7,
+ SPDK_MMC_SET_STREAMING = 0xb6,
+ SPDK_MMC_START_STOP_UNIT = 0x1b,
+ SPDK_MMC_STOP_PLAY_SCAN = 0x4e,
+ SPDK_MMC_SYNCHRONIZE_CACHE = 0x35,
+ SPDK_MMC_TEST_UNIT_READY = 0x00,
+ SPDK_MMC_VERIFY_10 = 0x2f,
+ SPDK_MMC_WRITE_10 = 0xa2,
+ SPDK_MMC_WRITE_12 = 0xaa,
+ SPDK_MMC_WRITE_AND_VERIFY_10 = 0x2e,
+ SPDK_MMC_WRITE_BUFFER = 0x3b,
+};
+
+enum spdk_ssc_opcode {
+ SPDK_SSC_ERASE_6 = 0x19,
+ SPDK_SSC_FORMAT_MEDIUM = 0x04,
+ SPDK_SSC_LOAD_UNLOAD = 0x1b,
+ SPDK_SSC_LOCATE_10 = 0x2b,
+ SPDK_SSC_LOCATE_16 = 0x92,
+ SPDK_SSC_MOVE_MEDIUM_ATTACHED = 0xa7,
+ SPDK_SSC_READ_6 = 0x08,
+ SPDK_SSC_READ_BLOCK_LIMITS = 0x05,
+ SPDK_SSC_READ_ELEMENT_STATUS_ATTACHED = 0xb4,
+ SPDK_SSC_READ_POSITION = 0x34,
+ SPDK_SSC_READ_REVERSE_6 = 0x0f,
+ SPDK_SSC_RECOVER_BUFFERED_DATA = 0x14,
+ SPDK_SSC_REPORT_DENSITY_SUPPORT = 0x44,
+ SPDK_SSC_REWIND = 0x01,
+ SPDK_SSC_SET_CAPACITY = 0x0b,
+ SPDK_SSC_SPACE_6 = 0x11,
+ SPDK_SSC_SPACE_16 = 0x91,
+ SPDK_SSC_VERIFY_6 = 0x13,
+ SPDK_SSC_WRITE_6 = 0x0a,
+ SPDK_SSC_WRITE_FILEMARKS_6 = 0x10,
+};
+
+enum spdk_spc_vpd {
+ SPDK_SPC_VPD_DEVICE_IDENTIFICATION = 0x83,
+ SPDK_SPC_VPD_EXTENDED_INQUIRY_DATA = 0x86,
+ SPDK_SPC_VPD_MANAGEMENT_NETWORK_ADDRESSES = 0x85,
+ SPDK_SPC_VPD_MODE_PAGE_POLICY = 0x87,
+ SPDK_SPC_VPD_SCSI_PORTS = 0x88,
+ SPDK_SPC_VPD_SOFTWARE_INTERFACE_IDENTIFICATION = 0x84,
+ SPDK_SPC_VPD_SUPPORTED_VPD_PAGES = 0x00,
+ SPDK_SPC_VPD_UNIT_SERIAL_NUMBER = 0x80,
+ SPDK_SPC_VPD_BLOCK_LIMITS = 0xb0,
+ SPDK_SPC_VPD_BLOCK_DEV_CHARS = 0xb1,
+ SPDK_SPC_VPD_BLOCK_THIN_PROVISION = 0xb2,
+};
+
+enum spdk_spc_peripheral_qualifier {
+ SPDK_SPC_PERIPHERAL_QUALIFIER_CONNECTED = 0,
+ SPDK_SPC_PERIPHERAL_QUALIFIER_NOT_CONNECTED = 1,
+ SPDK_SPC_PERIPHERAL_QUALIFIER_NOT_CAPABLE = 3,
+};
+
+enum {
+ SPDK_SPC_PERIPHERAL_DEVICE_TYPE_DISK = 0x00,
+ SPDK_SPC_PERIPHERAL_DEVICE_TYPE_TAPE = 0x01,
+ SPDK_SPC_PERIPHERAL_DEVICE_TYPE_DVD = 0x05,
+ SPDK_SPC_PERIPHERAL_DEVICE_TYPE_CHANGER = 0x08,
+
+ SPDK_SPC_VERSION_NONE = 0x00,
+ SPDK_SPC_VERSION_SPC = 0x03,
+ SPDK_SPC_VERSION_SPC2 = 0x04,
+ SPDK_SPC_VERSION_SPC3 = 0x05,
+ SPDK_SPC_VERSION_SPC4 = 0x06,
+
+ SPDK_SPC_PROTOCOL_IDENTIFIER_FC = 0x00,
+ SPDK_SPC_PROTOCOL_IDENTIFIER_PSCSI = 0x01,
+ SPDK_SPC_PROTOCOL_IDENTIFIER_SSA = 0x02,
+ SPDK_SPC_PROTOCOL_IDENTIFIER_IEEE1394 = 0x03,
+ SPDK_SPC_PROTOCOL_IDENTIFIER_RDMA = 0x04,
+ SPDK_SPC_PROTOCOL_IDENTIFIER_ISCSI = 0x05,
+ SPDK_SPC_PROTOCOL_IDENTIFIER_SAS = 0x06,
+ SPDK_SPC_PROTOCOL_IDENTIFIER_ADT = 0x07,
+ SPDK_SPC_PROTOCOL_IDENTIFIER_ATA = 0x08,
+
+ SPDK_SPC_VPD_CODE_SET_BINARY = 0x01,
+ SPDK_SPC_VPD_CODE_SET_ASCII = 0x02,
+ SPDK_SPC_VPD_CODE_SET_UTF8 = 0x03,
+
+ SPDK_SPC_VPD_ASSOCIATION_LOGICAL_UNIT = 0x00,
+ SPDK_SPC_VPD_ASSOCIATION_TARGET_PORT = 0x01,
+ SPDK_SPC_VPD_ASSOCIATION_TARGET_DEVICE = 0x02,
+
+ SPDK_SPC_VPD_IDENTIFIER_TYPE_VENDOR_SPECIFIC = 0x00,
+ SPDK_SPC_VPD_IDENTIFIER_TYPE_T10_VENDOR_ID = 0x01,
+ SPDK_SPC_VPD_IDENTIFIER_TYPE_EUI64 = 0x02,
+ SPDK_SPC_VPD_IDENTIFIER_TYPE_NAA = 0x03,
+ SPDK_SPC_VPD_IDENTIFIER_TYPE_RELATIVE_TARGET_PORT = 0x04,
+ SPDK_SPC_VPD_IDENTIFIER_TYPE_TARGET_PORT_GROUP = 0x05,
+ SPDK_SPC_VPD_IDENTIFIER_TYPE_LOGICAL_UNIT_GROUP = 0x06,
+ SPDK_SPC_VPD_IDENTIFIER_TYPE_MD5_LOGICAL_UNIT = 0x07,
+ SPDK_SPC_VPD_IDENTIFIER_TYPE_SCSI_NAME = 0x08,
+};
+
+struct spdk_scsi_cdb_inquiry {
+ uint8_t opcode;
+ uint8_t evpd;
+ uint8_t page_code;
+ uint8_t alloc_len[2];
+ uint8_t control;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_cdb_inquiry) == 6, "incorrect CDB size");
+
+struct spdk_scsi_cdb_inquiry_data {
+ uint8_t peripheral_device_type : 5;
+ uint8_t peripheral_qualifier : 3;
+ uint8_t rmb;
+ uint8_t version;
+ uint8_t response;
+ uint8_t add_len;
+ uint8_t flags;
+ uint8_t flags2;
+ uint8_t flags3;
+ uint8_t t10_vendor_id[8];
+ uint8_t product_id[16];
+ uint8_t product_rev[4];
+ uint8_t vendor[20];
+ uint8_t ius;
+ uint8_t reserved;
+ uint8_t desc[];
+};
+
+struct spdk_scsi_vpd_page {
+ uint8_t peripheral_device_type : 5;
+ uint8_t peripheral_qualifier : 3;
+ uint8_t page_code;
+ uint8_t alloc_len[2];
+ uint8_t params[];
+};
+
+#define SPDK_SCSI_VEXT_REF_CHK 0x01
+#define SPDK_SCSI_VEXT_APP_CHK 0x02
+#define SPDK_SCSI_VEXT_GRD_CHK 0x04
+#define SPDK_SCSI_VEXT_SIMPSUP 0x01
+#define SPDK_SCSI_VEXT_ORDSUP 0x02
+#define SPDK_SCSI_VEXT_HEADSUP 0x04
+#define SPDK_SCSI_VEXT_PRIOR_SUP 0x08
+#define SPDK_SCSI_VEXT_GROUP_SUP 0x10
+#define SPDK_SCSI_VEXT_UASK_SUP 0x20
+#define SPDK_SCSI_VEXT_V_SUP 0x01
+#define SPDK_SCSI_VEXT_NV_SUP 0x02
+#define SPDK_SCSI_VEXT_CRD_SUP 0x04
+#define SPDK_SCSI_VEXT_WU_SUP 0x08
+
+struct spdk_scsi_vpd_ext_inquiry {
+ uint8_t peripheral;
+ uint8_t page_code;
+ uint8_t alloc_len[2];
+ uint8_t check;
+ uint8_t sup;
+ uint8_t sup2;
+ uint8_t luiclr;
+ uint8_t cbcs;
+ uint8_t micro_dl;
+ uint8_t reserved[54];
+};
+
+#define SPDK_SPC_VPD_DESIG_PIV 0x80
+
+/* designation descriptor */
+struct spdk_scsi_desig_desc {
+ uint8_t code_set : 4;
+ uint8_t protocol_id : 4;
+ uint8_t type : 4;
+ uint8_t association : 2;
+ uint8_t reserved0 : 1;
+ uint8_t piv : 1;
+ uint8_t reserved1;
+ uint8_t len;
+ uint8_t desig[];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_desig_desc) == 4, "Invalid size");
+
+/* mode page policy descriptor */
+struct spdk_scsi_mpage_policy_desc {
+ uint8_t page_code;
+ uint8_t sub_page_code;
+ uint8_t policy;
+ uint8_t reserved;
+};
+
+/* target port descriptor */
+struct spdk_scsi_tgt_port_desc {
+ uint8_t code_set;
+ uint8_t desig_type;
+ uint8_t reserved;
+ uint8_t len;
+ uint8_t designator[];
+};
+
+/* SCSI port designation descriptor */
+struct spdk_scsi_port_desc {
+ uint16_t reserved;
+ uint16_t rel_port_id;
+ uint16_t reserved2;
+ uint16_t init_port_len;
+ uint16_t init_port_id;
+ uint16_t reserved3;
+ uint16_t tgt_desc_len;
+ uint8_t tgt_desc[];
+};
+
+/* iSCSI initiator port TransportID header */
+struct spdk_scsi_iscsi_transport_id {
+ uint8_t protocol_id : 4;
+ uint8_t reserved1 : 2;
+ uint8_t format : 2;
+ uint8_t reserved2;
+ uint16_t additional_len;
+ uint8_t name[];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_iscsi_transport_id) == 4, "Incorrect size");
+
+/* SCSI UNMAP block descriptor */
+struct spdk_scsi_unmap_bdesc {
+ /* UNMAP LOGICAL BLOCK ADDRESS */
+ uint64_t lba;
+
+ /* NUMBER OF LOGICAL BLOCKS */
+ uint32_t block_count;
+
+ /* RESERVED */
+ uint32_t reserved;
+};
+
+/* SCSI Persistent Reserve In action codes */
+enum spdk_scsi_pr_in_action_code {
+ /* Read all registered reservation keys */
+ SPDK_SCSI_PR_IN_READ_KEYS = 0x00,
+ /* Read current persistent reservations */
+ SPDK_SCSI_PR_IN_READ_RESERVATION = 0x01,
+ /* Return capabilities information */
+ SPDK_SCSI_PR_IN_REPORT_CAPABILITIES = 0x02,
+ /* Read all registrations and persistent reservations */
+ SPDK_SCSI_PR_IN_READ_FULL_STATUS = 0x03,
+ /* 0x04h - 0x1fh Reserved */
+};
+
+enum spdk_scsi_pr_scope_code {
+ /* Persistent reservation applies to full logical unit */
+ SPDK_SCSI_PR_LU_SCOPE = 0x00,
+};
+
+/* SCSI Persistent Reservation type codes */
+enum spdk_scsi_pr_type_code {
+ /* Write Exclusive */
+ SPDK_SCSI_PR_WRITE_EXCLUSIVE = 0x01,
+ /* Exclusive Access */
+ SPDK_SCSI_PR_EXCLUSIVE_ACCESS = 0x03,
+ /* Write Exclusive - Registrants Only */
+ SPDK_SCSI_PR_WRITE_EXCLUSIVE_REGS_ONLY = 0x05,
+ /* Exclusive Access - Registrants Only */
+ SPDK_SCSI_PR_EXCLUSIVE_ACCESS_REGS_ONLY = 0x06,
+ /* Write Exclusive - All Registrants */
+ SPDK_SCSI_PR_WRITE_EXCLUSIVE_ALL_REGS = 0x07,
+ /* Exclusive Access - All Registrants */
+ SPDK_SCSI_PR_EXCLUSIVE_ACCESS_ALL_REGS = 0x08,
+};
+
+/* SCSI Persistent Reserve In header for
+ * Read Keys, Read Reservation, Read Full Status
+ */
+struct spdk_scsi_pr_in_read_header {
+ /* persistent reservation generation */
+ uint32_t pr_generation;
+ uint32_t additional_len;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_pr_in_read_header) == 8, "Incorrect size");
+
+/* SCSI Persistent Reserve In read keys data */
+struct spdk_scsi_pr_in_read_keys_data {
+ struct spdk_scsi_pr_in_read_header header;
+ /* reservation key list */
+ uint64_t rkeys[];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_pr_in_read_keys_data) == 8, "Incorrect size");
+
+/* SCSI Persistent Reserve In read reservations data */
+struct spdk_scsi_pr_in_read_reservations_data {
+ /* Fixed 0x10 with reservation and 0 for no reservation */
+ struct spdk_scsi_pr_in_read_header header;
+ /* reservation key */
+ uint64_t rkey;
+ uint32_t obsolete1;
+ uint8_t reserved;
+ uint8_t type : 4;
+ uint8_t scope : 4;
+ uint16_t obsolete2;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_pr_in_read_reservations_data) == 24, "Incorrect size");
+
+/* SCSI Persistent Reserve In report capabilities data */
+struct spdk_scsi_pr_in_report_capabilities_data {
+ /* Fixed value 0x8 */
+ uint16_t length;
+
+ /* Persist through power loss capable */
+ uint8_t ptpl_c : 1;
+ uint8_t reserved1 : 1;
+ /* All target ports capable */
+ uint8_t atp_c : 1;
+ /* Specify initiator port capable */
+ uint8_t sip_c : 1;
+ /* Compatible reservation handing bit to indicate
+ * SPC-2 reserve/release is supported
+ */
+ uint8_t crh : 1;
+ uint8_t reserved2 : 3;
+ /* Persist through power loss activated */
+ uint8_t ptpl_a : 1;
+ uint8_t reserved3 : 6;
+ /* Type mask valid */
+ uint8_t tmv : 1;
+
+ /* Type mask format */
+ uint8_t reserved4 : 1;
+ /* Write Exclusive */
+ uint8_t wr_ex : 1;
+ uint8_t reserved5 : 1;
+ /* Exclusive Access */
+ uint8_t ex_ac : 1;
+ uint8_t reserved6 : 1;
+ /* Write Exclusive - Registrants Only */
+ uint8_t wr_ex_ro : 1;
+ /* Exclusive Access - Registrants Only */
+ uint8_t ex_ac_ro : 1;
+ /* Write Exclusive - All Registrants */
+ uint8_t wr_ex_ar : 1;
+ /* Exclusive Access - All Registrants */
+ uint8_t ex_ac_ar : 1;
+ uint8_t reserved7 : 7;
+
+ uint8_t reserved8[2];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_pr_in_report_capabilities_data) == 8, "Incorrect size");
+
+/* SCSI Persistent Reserve In full status descriptor */
+struct spdk_scsi_pr_in_full_status_desc {
+ /* Reservation key */
+ uint64_t rkey;
+ uint8_t reserved1[4];
+
+ /* 0 - Registrant only
+ * 1 - Registrant and reservation holder
+ */
+ uint8_t r_holder : 1;
+ /* All target ports */
+ uint8_t all_tg_pt : 1;
+ uint8_t reserved2 : 6;
+
+ /* Reservation type */
+ uint8_t type : 4;
+ /* Set to LU_SCOPE */
+ uint8_t scope : 4;
+
+ uint8_t reserved3[4];
+ uint16_t relative_target_port_id;
+ /* Size of TransportID */
+ uint32_t desc_len;
+
+ uint8_t transport_id[];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_pr_in_full_status_desc) == 24, "Incorrect size");
+
+/* SCSI Persistent Reserve In full status data */
+struct spdk_scsi_pr_in_full_status_data {
+ struct spdk_scsi_pr_in_read_header header;
+ /* Full status descriptors */
+ struct spdk_scsi_pr_in_full_status_desc desc_list[];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_pr_in_full_status_data) == 8, "Incorrect size");
+
+/* SCSI Persistent Reserve Out service action codes */
+enum spdk_scsi_pr_out_service_action_code {
+ /* Register/unregister a reservation key */
+ SPDK_SCSI_PR_OUT_REGISTER = 0x00,
+ /* Create a persistent reservation */
+ SPDK_SCSI_PR_OUT_RESERVE = 0x01,
+ /* Release a persistent reservation */
+ SPDK_SCSI_PR_OUT_RELEASE = 0x02,
+ /* Clear all reservation keys and persistent reservations */
+ SPDK_SCSI_PR_OUT_CLEAR = 0x03,
+ /* Preempt persistent reservations and/or remove registrants */
+ SPDK_SCSI_PR_OUT_PREEMPT = 0x04,
+ /* Preempt persistent reservations and or remove registrants
+ * and abort all tasks for all preempted I_T nexuses
+ */
+ SPDK_SCSI_PR_OUT_PREEMPT_AND_ABORT = 0x05,
+ /* Register/unregister a reservation key based on the ignore bit */
+ SPDK_SCSI_PR_OUT_REG_AND_IGNORE_KEY = 0x06,
+ /* Register a reservation key for another I_T nexus
+ * and move a persistent reservation to that I_T nexus
+ */
+ SPDK_SCSI_PR_OUT_REG_AND_MOVE = 0x07,
+ /* 0x08 - 0x1f Reserved */
+};
+
+/* SCSI Persistent Reserve Out parameter list */
+struct spdk_scsi_pr_out_param_list {
+ /* Reservation key */
+ uint64_t rkey;
+ /* Service action reservation key */
+ uint64_t sa_rkey;
+ uint8_t obsolete1[4];
+
+ /* Active persist through power loss */
+ uint8_t aptpl : 1;
+ uint8_t reserved1 : 1;
+ /* All target ports */
+ uint8_t all_tg_pt : 1;
+ /* Specify initiator ports */
+ uint8_t spec_i_pt : 1;
+ uint8_t reserved2 : 4;
+
+ uint8_t reserved3;
+ uint16_t obsolete2;
+
+ uint8_t param_data[];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_pr_out_param_list) == 24, "Incorrect size");
+
+struct spdk_scsi_pr_out_reg_and_move_param_list {
+ /* Reservation key */
+ uint64_t rkey;
+ /* Service action reservation key */
+ uint64_t sa_rkey;
+ uint8_t reserved1;
+
+ /* Active persist through power loss */
+ uint8_t aptpl : 1;
+ /* Unregister */
+ uint8_t unreg : 1;
+ uint8_t reserved2 : 6;
+
+ uint16_t relative_target_port_id;
+ /* TransportID parameter data length */
+ uint32_t transport_id_len;
+ uint8_t transport_id[];
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_pr_out_reg_and_move_param_list) == 24, "Incorrect size");
+
+/*
+ * SPC-4
+ * Table-258 SECURITY PROTOCOL field in SECURITY PROTOCOL IN command
+ */
+#define SPDK_SCSI_SECP_INFO 0x00
+#define SPDK_SCSI_SECP_TCG 0x01
+
+#define SPDK_SCSI_UNMAP_LBPU 1 << 7
+#define SPDK_SCSI_UNMAP_LBPWS 1 << 6
+#define SPDK_SCSI_UNMAP_LBPWS10 1 << 5
+
+#define SPDK_SCSI_UNMAP_FULL_PROVISIONING 0x00
+#define SPDK_SCSI_UNMAP_RESOURCE_PROVISIONING 0x01
+#define SPDK_SCSI_UNMAP_THIN_PROVISIONING 0x02
+
+#endif /* SPDK_SCSI_SPEC_H */
diff --git a/src/spdk/include/spdk/sock.h b/src/spdk/include/spdk/sock.h
new file mode 100644
index 000000000..f70a2ac39
--- /dev/null
+++ b/src/spdk/include/spdk/sock.h
@@ -0,0 +1,475 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * TCP socket abstraction layer
+ */
+
+#ifndef SPDK_SOCK_H
+#define SPDK_SOCK_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/queue.h"
+#include "spdk/json.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct spdk_sock;
+struct spdk_sock_group;
+
+/**
+ * Anywhere this struct is used, an iovec array is assumed to
+ * immediately follow the last member in memory, without any
+ * padding.
+ *
+ * A simpler implementation would be to place a 0-length array
+ * of struct iovec at the end of this request. However, embedding
+ * a structure that ends with a variable length array inside of
+ * another structure is a GNU C extension and not standard.
+ */
+struct spdk_sock_request {
+ /* When the request is completed, this callback will be called.
+ * err will be 0 on success or a negated errno value on failure. */
+ void (*cb_fn)(void *cb_arg, int err);
+ void *cb_arg;
+
+ /**
+ * These fields are used by the socket layer and should not be modified
+ */
+ struct __sock_request_internal {
+ TAILQ_ENTRY(spdk_sock_request) link;
+ uint32_t offset;
+ } internal;
+
+ int iovcnt;
+ /* struct iovec iov[]; */
+};
+
+#define SPDK_SOCK_REQUEST_IOV(req, i) ((struct iovec *)(((uint8_t *)req + sizeof(struct spdk_sock_request)) + (sizeof(struct iovec) * i)))
+
+/**
+ * SPDK socket implementation options.
+ *
+ * A pointer to this structure is used by spdk_sock_impl_get_opts() and spdk_sock_impl_set_opts()
+ * to allow the user to request options for the socket module implementation.
+ * Each socket module defines which options from this structure are applicable to the module.
+ */
+struct spdk_sock_impl_opts {
+ /**
+ * Size of sock receive buffer. Used by posix socket module.
+ */
+ uint32_t recv_buf_size;
+
+ /**
+ * Size of sock send buffer. Used by posix socket module.
+ */
+ uint32_t send_buf_size;
+
+ /**
+ * Enable or disable receive pipe. Used by posix socket module.
+ */
+ bool enable_recv_pipe;
+
+ /**
+ * Enable or disable use of zero copy flow on send. Used by posix socket module.
+ */
+ bool enable_zerocopy_send;
+};
+
+/**
+ * Spdk socket initialization options.
+ *
+ * A pointer to this structure will be used by spdk_sock_listen_ext() or spdk_sock_connect_ext() to
+ * allow the user to request non-default options on the socket.
+ */
+struct spdk_sock_opts {
+ /**
+ * The size of spdk_sock_opts according to the caller of this library is used for ABI
+ * compatibility. The library uses this field to know how many fields in this
+ * structure are valid. And the library will populate any remaining fields with default values.
+ */
+ size_t opts_size;
+
+ /**
+ * The priority on the socket and default value is zero.
+ */
+ int priority;
+};
+
+/**
+ * Initialize the default value of opts.
+ *
+ * \param opts Data structure where SPDK will initialize the default sock options.
+ * Users must set opts_size to sizeof(struct spdk_sock_opts). This will ensure that the
+ * libraryonly tries to fill as many fields as allocated by the caller. This allows ABI
+ * compatibility with future versions of this library that may extend the spdk_sock_opts
+ * structure.
+ */
+void spdk_sock_get_default_opts(struct spdk_sock_opts *opts);
+
+/**
+ * Get client and server addresses of the given socket.
+ *
+ * \param sock Socket to get address.
+ * \param saddr A pointer to the buffer to hold the address of server.
+ * \param slen Length of the buffer 'saddr'.
+ * \param sport A pointer(May be NULL) to the buffer to hold the port info of server.
+ * \param caddr A pointer to the buffer to hold the address of client.
+ * \param clen Length of the buffer 'caddr'.
+ * \param cport A pointer(May be NULL) to the buffer to hold the port info of server.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_sock_getaddr(struct spdk_sock *sock, char *saddr, int slen, uint16_t *sport,
+ char *caddr, int clen, uint16_t *cport);
+
+/**
+ * Create a socket using the specific sock implementation, connect the socket
+ * to the specified address and port (of the server), and then return the socket.
+ * This function is used by client.
+ *
+ * \param ip IP address of the server.
+ * \param port Port number of the server.
+ * \param impl_name The sock_implementation to use, such as "posix". If impl_name is
+ * specified, it will *only* try to connect on that impl. If it is NULL, it will try
+ * all the sock implementations in order and uses the first sock implementation which
+ * can connect. For example, it may try vpp first, then fall back to posix.
+ *
+ * \return a pointer to the connected socket on success, or NULL on failure.
+ */
+struct spdk_sock *spdk_sock_connect(const char *ip, int port, char *impl_name);
+
+/**
+ * Create a socket using the specific sock implementation, connect the socket
+ * to the specified address and port (of the server), and then return the socket.
+ * This function is used by client.
+ *
+ * \param ip IP address of the server.
+ * \param port Port number of the server.
+ * \param impl_name The sock_implementation to use, such as "posix". If impl_name is
+ * specified, it will *only* try to connect on that impl. If it is NULL, it will try
+ * all the sock implementations in order and uses the first sock implementation which
+ * can connect. For example, it may try vpp first, then fall back to posix.
+ * \param opts The sock option pointer provided by the user which should not be NULL pointer.
+ *
+ * \return a pointer to the connected socket on success, or NULL on failure.
+ */
+struct spdk_sock *spdk_sock_connect_ext(const char *ip, int port, char *impl_name,
+ struct spdk_sock_opts *opts);
+
+/**
+ * Create a socket using the specific sock implementation, bind the socket to
+ * the specified address and port and listen on the socket, and then return the socket.
+ * This function is used by server.
+ *
+ * \param ip IP address to listen on.
+ * \param port Port number.
+ * \param impl_name The sock_implementation to use, such as "posix". If impl_name is
+ * specified, it will *only* try to listen on that impl. If it is NULL, it will try
+ * all the sock implementations in order and uses the first sock implementation which
+ * can listen. For example, it may try vpp first, then fall back to posix.
+ *
+ * \return a pointer to the listened socket on success, or NULL on failure.
+ */
+struct spdk_sock *spdk_sock_listen(const char *ip, int port, char *impl_name);
+
+/**
+ * Create a socket using the specific sock implementation, bind the socket to
+ * the specified address and port and listen on the socket, and then return the socket.
+ * This function is used by server.
+ *
+ * \param ip IP address to listen on.
+ * \param port Port number.
+ * \param impl_name The sock_implementation to use, such as "posix". If impl_name is
+ * specified, it will *only* try to listen on that impl. If it is NULL, it will try
+ * all the sock implementations in order and uses the first sock implementation which
+ * can listen. For example, it may try vpp first, then fall back to posix.
+ * \param opts The sock option pointer provided by the user, which should not be NULL pointer.
+ *
+ * \return a pointer to the listened socket on success, or NULL on failure.
+ */
+struct spdk_sock *spdk_sock_listen_ext(const char *ip, int port, char *impl_name,
+ struct spdk_sock_opts *opts);
+
+/**
+ * Accept a new connection from a client on the specified socket and return a
+ * socket structure which holds the connection.
+ *
+ * \param sock Listening socket.
+ *
+ * \return a pointer to the accepted socket on success, or NULL on failure.
+ */
+struct spdk_sock *spdk_sock_accept(struct spdk_sock *sock);
+
+/**
+ * Close a socket.
+ *
+ * \param sock Socket to close.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_sock_close(struct spdk_sock **sock);
+
+/**
+ * Flush a socket from data gathered in previous writev_async calls.
+ *
+ * \param sock Socket to flush.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_sock_flush(struct spdk_sock *sock);
+
+/**
+ * Receive a message from the given socket.
+ *
+ * \param sock Socket to receive message.
+ * \param buf Pointer to a buffer to hold the data.
+ * \param len Length of the buffer.
+ *
+ * \return the length of the received message on success, -1 on failure.
+ */
+ssize_t spdk_sock_recv(struct spdk_sock *sock, void *buf, size_t len);
+
+/**
+ * Write message to the given socket from the I/O vector array.
+ *
+ * \param sock Socket to write to.
+ * \param iov I/O vector.
+ * \param iovcnt Number of I/O vectors in the array.
+ *
+ * \return the length of written message on success, -1 on failure.
+ */
+ssize_t spdk_sock_writev(struct spdk_sock *sock, struct iovec *iov, int iovcnt);
+
+/**
+ * Write data to the given socket asynchronously, calling
+ * the provided callback when the data has been written.
+ *
+ * \param sock Socket to write to.
+ * \param req The write request to submit.
+ */
+void spdk_sock_writev_async(struct spdk_sock *sock, struct spdk_sock_request *req);
+
+/**
+ * Read message from the given socket to the I/O vector array.
+ *
+ * \param sock Socket to receive message.
+ * \param iov I/O vector.
+ * \param iovcnt Number of I/O vectors in the array.
+ *
+ * \return the length of the received message on success, -1 on failure.
+ */
+ssize_t spdk_sock_readv(struct spdk_sock *sock, struct iovec *iov, int iovcnt);
+
+/**
+ * Set the value used to specify the low water mark (in bytes) for this socket.
+ *
+ * \param sock Socket to set for.
+ * \param nbytes Value for recvlowat.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_sock_set_recvlowat(struct spdk_sock *sock, int nbytes);
+
+/**
+ * Set receive buffer size for the given socket.
+ *
+ * \param sock Socket to set buffer size for.
+ * \param sz Buffer size in bytes.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_sock_set_recvbuf(struct spdk_sock *sock, int sz);
+
+/**
+ * Set send buffer size for the given socket.
+ *
+ * \param sock Socket to set buffer size for.
+ * \param sz Buffer size in bytes.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_sock_set_sendbuf(struct spdk_sock *sock, int sz);
+
+/**
+ * Check whether the address of socket is ipv6.
+ *
+ * \param sock Socket to check.
+ *
+ * \return true if the address of socket is ipv6, or false otherwise.
+ */
+bool spdk_sock_is_ipv6(struct spdk_sock *sock);
+
+/**
+ * Check whether the address of socket is ipv4.
+ *
+ * \param sock Socket to check.
+ *
+ * \return true if the address of socket is ipv4, or false otherwise.
+ */
+bool spdk_sock_is_ipv4(struct spdk_sock *sock);
+
+/**
+ * Check whether the socket is currently connected.
+ *
+ * \param sock Socket to check
+ *
+ * \return true if the socket is connected or false otherwise.
+ */
+bool spdk_sock_is_connected(struct spdk_sock *sock);
+
+/**
+ * Callback function for spdk_sock_group_add_sock().
+ *
+ * \param arg Argument for the callback function.
+ * \param group Socket group.
+ * \param sock Socket.
+ */
+typedef void (*spdk_sock_cb)(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock);
+
+/**
+ * Create a new socket group with user provided pointer
+ *
+ * \param ctx the context provided by user.
+ * \return a pointer to the created group on success, or NULL on failure.
+ */
+struct spdk_sock_group *spdk_sock_group_create(void *ctx);
+
+/**
+ * Get the ctx of the sock group
+ *
+ * \param sock_group Socket group.
+ * \return a pointer which is ctx of the sock_group.
+ */
+void *spdk_sock_group_get_ctx(struct spdk_sock_group *sock_group);
+
+
+/**
+ * Add a socket to the group.
+ *
+ * \param group Socket group.
+ * \param sock Socket to add.
+ * \param cb_fn Called when the operation completes.
+ * \param cb_arg Argument passed to the callback function.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_sock_group_add_sock(struct spdk_sock_group *group, struct spdk_sock *sock,
+ spdk_sock_cb cb_fn, void *cb_arg);
+
+/**
+ * Remove a socket from the group.
+ *
+ * \param group Socket group.
+ * \param sock Socket to remove.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_sock_group_remove_sock(struct spdk_sock_group *group, struct spdk_sock *sock);
+
+/**
+ * Poll incoming events for each registered socket.
+ *
+ * \param group Group to poll.
+ *
+ * \return the number of events on success, -1 on failure.
+ */
+int spdk_sock_group_poll(struct spdk_sock_group *group);
+
+/**
+ * Poll incoming events up to max_events for each registered socket.
+ *
+ * \param group Group to poll.
+ * \param max_events Number of maximum events to poll for each socket.
+ *
+ * \return the number of events on success, -1 on failure.
+ */
+int spdk_sock_group_poll_count(struct spdk_sock_group *group, int max_events);
+
+/**
+ * Close all registered sockets of the group and then remove the group.
+ *
+ * \param group Group to close.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_sock_group_close(struct spdk_sock_group **group);
+
+/**
+ * Get the optimal sock group for this sock.
+ *
+ * \param sock The socket
+ * \param group Returns the optimal sock group. If there is no optimal sock group, returns NULL.
+ *
+ * \return 0 on success. Negated errno on failure.
+ */
+int spdk_sock_get_optimal_sock_group(struct spdk_sock *sock, struct spdk_sock_group **group);
+
+/**
+ * Get current socket implementation options.
+ *
+ * \param impl_name The socket implementation to use, such as "posix".
+ * \param opts Pointer to allocated spdk_sock_impl_opts structure that will be filled with actual values.
+ * \param len On input specifies size of passed opts structure. On return it is set to actual size that was filled with values.
+ *
+ * \return 0 on success, -1 on failure. errno is set to indicate the reason of failure.
+ */
+int spdk_sock_impl_get_opts(const char *impl_name, struct spdk_sock_impl_opts *opts, size_t *len);
+
+/**
+ * Set socket implementation options.
+ *
+ * \param impl_name The socket implementation to use, such as "posix".
+ * \param opts Pointer to allocated spdk_sock_impl_opts structure with new options values.
+ * \param len Size of passed opts structure.
+ *
+ * \return 0 on success, -1 on failure. errno is set to indicate the reason of failure.
+ */
+int spdk_sock_impl_set_opts(const char *impl_name, const struct spdk_sock_impl_opts *opts,
+ size_t len);
+
+/**
+ * Write socket subsystem configuration into provided JSON context.
+ *
+ * \param w JSON write context
+ */
+void spdk_sock_write_config_json(struct spdk_json_write_ctx *w);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_SOCK_H */
diff --git a/src/spdk/include/spdk/stdinc.h b/src/spdk/include/spdk/stdinc.h
new file mode 100644
index 000000000..65820d58e
--- /dev/null
+++ b/src/spdk/include/spdk/stdinc.h
@@ -0,0 +1,98 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Standard C headers
+ *
+ * This file is intended to be included first by all other SPDK files.
+ */
+
+#ifndef SPDK_STDINC_H
+#define SPDK_STDINC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Standard C */
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <time.h>
+
+/* POSIX */
+#include <arpa/inet.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <ifaddrs.h>
+#include <netdb.h>
+#include <poll.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <syslog.h>
+#include <termios.h>
+#include <unistd.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <sys/user.h>
+#include <sys/wait.h>
+#include <regex.h>
+
+/* GNU extension */
+#include <getopt.h>
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_STDINC_H */
diff --git a/src/spdk/include/spdk/string.h b/src/spdk/include/spdk/string.h
new file mode 100644
index 000000000..041010e20
--- /dev/null
+++ b/src/spdk/include/spdk/string.h
@@ -0,0 +1,271 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * String utility functions
+ */
+
+#ifndef SPDK_STRING_H
+#define SPDK_STRING_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * sprintf with automatic buffer allocation.
+ *
+ * The return value is the formatted string, which should be passed to free()
+ * when no longer needed.
+ *
+ * \param format Format for the string to print.
+ *
+ * \return the formatted string on success, or NULL on failure.
+ */
+char *spdk_sprintf_alloc(const char *format, ...) __attribute__((format(printf, 1, 2)));
+
+/**
+ * vsprintf with automatic buffer allocation.
+ *
+ * The return value is the formatted string, which should be passed to free()
+ * when no longer needed.
+ *
+ * \param format Format for the string to print.
+ * \param args A value that identifies a variable arguments list.
+ *
+ * \return the formatted string on success, or NULL on failure.
+ */
+char *spdk_vsprintf_alloc(const char *format, va_list args);
+
+/**
+ * Append string using vsprintf with automatic buffer re-allocation.
+ *
+ * The return value is the formatted string, in which the original string in
+ * buffer is unchanged and the specified formatted string is appended.
+ *
+ * The returned string should be passed to free() when no longer needed.
+ *
+ * If buffer is NULL, the call is equivalent to spdk_sprintf_alloc().
+ * If the call fails, the original buffer is left untouched.
+ *
+ * \param buffer Buffer which has a formatted string.
+ * \param format Format for the string to print.
+ *
+ * \return the formatted string on success, or NULL on failure.
+ */
+char *spdk_sprintf_append_realloc(char *buffer, const char *format, ...);
+
+/**
+ * Append string using vsprintf with automatic buffer re-allocation.
+ * The return value is the formatted string, in which the original string in
+ * buffer is unchanged and the specified formatted string is appended.
+ *
+ * The returned string should be passed to free() when no longer needed.
+ *
+ * If buffer is NULL, the call is equivalent to spdk_sprintf_alloc().
+ * If the call fails, the original buffer is left untouched.
+ *
+ * \param buffer Buffer which has a formatted string.
+ * \param format Format for the string to print.
+ * \param args A value that identifies a variable arguments list.
+ *
+ * \return the formatted string on success, or NULL on failure.
+ */
+char *spdk_vsprintf_append_realloc(char *buffer, const char *format, va_list args);
+
+/**
+ * Convert string to lowercase in place.
+ *
+ * \param s String to convert to lowercase.
+ *
+ * \return the converted string.
+ */
+char *spdk_strlwr(char *s);
+
+/**
+ * Parse a delimited string with quote handling.
+ *
+ * Note that the string will be modified in place to add the string terminator
+ * to each field.
+ *
+ * \param stringp Pointer to starting location in string. *stringp will be updated
+ * to point to the start of the next field, or NULL if the end of the string has
+ * been reached.
+ * \param delim Null-terminated string containing the list of accepted delimiters.
+ *
+ * \return a pointer to beginning of the current field.
+ */
+char *spdk_strsepq(char **stringp, const char *delim);
+
+/**
+ * Trim whitespace from a string in place.
+ *
+ * \param s String to trim.
+ *
+ * \return the trimmed string.
+ */
+char *spdk_str_trim(char *s);
+
+/**
+ * Copy the string version of an error into the user supplied buffer
+ *
+ * \param errnum Error code.
+ * \param buf Pointer to a buffer in which to place the error message.
+ * \param buflen The size of the buffer in bytes.
+ */
+void spdk_strerror_r(int errnum, char *buf, size_t buflen);
+
+/**
+ * Return the string version of an error from a static, thread-local buffer. This
+ * function is thread safe.
+ *
+ * \param errnum Error code.
+ *
+ * \return a pointer to buffer upon success.
+ */
+const char *spdk_strerror(int errnum);
+
+/**
+ * Remove trailing newlines from the end of a string in place.
+ *
+ * Any sequence of trailing \\r and \\n characters is removed from the end of the
+ * string.
+ *
+ * \param s String to remove newline from.
+ *
+ * \return the number of characters removed.
+ */
+size_t spdk_str_chomp(char *s);
+
+/**
+ * Copy a string into a fixed-size buffer, padding extra bytes with a specific
+ * character.
+ *
+ * If src is longer than size, only size bytes will be copied.
+ *
+ * \param dst Pointer to destination fixed-size buffer to fill.
+ * \param src Pointer to source null-terminated string to copy into dst.
+ * \param size Number of bytes to fill in dst.
+ * \param pad Character to pad extra space in dst beyond the size of src.
+ */
+void spdk_strcpy_pad(void *dst, const char *src, size_t size, int pad);
+
+/**
+ * Find the length of a string that has been padded with a specific byte.
+ *
+ * \param str Right-padded string to find the length of.
+ * \param size Size of the full string pointed to by str, including padding.
+ * \param pad Character that was used to pad str up to size.
+ *
+ * \return the length of the non-padded portion of str.
+ */
+size_t spdk_strlen_pad(const void *str, size_t size, int pad);
+
+/**
+ * Parse an IP address into its hostname and port components. This modifies the
+ * IP address in place.
+ *
+ * \param ip A null terminated IP address, including port. Both IPv4 and IPv6
+ * are supported.
+ * \param host Will point to the start of the hostname within ip. The string will
+ * be null terminated.
+ * \param port Will point to the start of the port within ip. The string will be
+ * null terminated.
+ *
+ * \return 0 on success. -EINVAL on failure.
+ */
+int spdk_parse_ip_addr(char *ip, char **host, char **port);
+
+/**
+ * Parse a string representing a number possibly followed by a binary prefix.
+ *
+ * The string can contain a trailing "B" (KB,MB,GB) but it's not necessary.
+ * "128K" = 128 * 1024; "2G" = 2 * 1024 * 1024; "2GB" = 2 * 1024 * 1024;
+ * Additionally, lowercase "k", "m", "g" are parsed as well. They are processed
+ * the same as their uppercase equivalents.
+ *
+ * \param cap_str Null terminated string.
+ * \param cap Pointer where the parsed capacity (in bytes) will be put.
+ * \param has_prefix Pointer to a flag that will be set to describe whether given
+ * string contains a binary prefix.
+ *
+ * \return 0 on success, or negative errno on failure.
+ */
+int spdk_parse_capacity(const char *cap_str, uint64_t *cap, bool *has_prefix);
+
+/**
+ * Check if a buffer is all zero (0x00) bytes or not.
+ *
+ * \param data Buffer to check.
+ * \param size Size of data in bytes.
+ *
+ * \return true if data consists entirely of zeroes, or false if any byte in data
+ * is not zero.
+ */
+bool spdk_mem_all_zero(const void *data, size_t size);
+
+/**
+ * Convert the string in nptr to a long integer value according to the given base.
+ *
+ * spdk_strtol() does the additional error checking and allows only strings that
+ * contains only numbers and is positive number or zero. The caller only has to check
+ * if the return value is not negative.
+ *
+ * \param nptr String containing numbers.
+ * \param base Base which must be between 2 and 32 inclusive, or be the special value 0.
+ *
+ * \return positive number or zero on success, or negative errno on failure.
+ */
+long int spdk_strtol(const char *nptr, int base);
+
+/**
+ * Convert the string in nptr to a long long integer value according to the given base.
+ *
+ * spdk_strtoll() does the additional error checking and allows only strings that
+ * contains only numbers and is positive number or zero. The caller only has to check
+ * if the return value is not negative.
+ *
+ * \param nptr String containing numbers.
+ * \param base Base which must be between 2 and 32 inclusive, or be the special value 0.
+ *
+ * \return positive number or zero on success, or negative errno on failure.
+ */
+long long int spdk_strtoll(const char *nptr, int base);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/thread.h b/src/spdk/include/spdk/thread.h
new file mode 100644
index 000000000..841cf39a8
--- /dev/null
+++ b/src/spdk/include/spdk/thread.h
@@ -0,0 +1,736 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Thread
+ */
+
+#ifndef SPDK_THREAD_H_
+#define SPDK_THREAD_H_
+
+#include "spdk/stdinc.h"
+
+#include "spdk/cpuset.h"
+#include "spdk/queue.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum spdk_thread_poller_rc {
+ SPDK_POLLER_IDLE,
+ SPDK_POLLER_BUSY,
+};
+
+/**
+ * A stackless, lightweight thread.
+ */
+struct spdk_thread;
+
+/**
+ * A function repeatedly called on the same spdk_thread.
+ */
+struct spdk_poller;
+
+struct spdk_io_channel_iter;
+
+/**
+ * A function that is called each time a new thread is created.
+ * The implementor of this function should frequently call
+ * spdk_thread_poll() on the thread provided.
+ *
+ * \param thread The new spdk_thread.
+ */
+typedef int (*spdk_new_thread_fn)(struct spdk_thread *thread);
+
+/**
+ * SPDK thread operation type.
+ */
+enum spdk_thread_op {
+ /* Called each time a new thread is created. The implementor of this operation
+ * should frequently call spdk_thread_poll() on the thread provided.
+ */
+ SPDK_THREAD_OP_NEW,
+
+ /* Called when SPDK thread needs to be rescheduled. (e.g., when cpumask of the
+ * SPDK thread is updated.
+ */
+ SPDK_THREAD_OP_RESCHED,
+};
+
+/**
+ * Function to be called for SPDK thread operation.
+ */
+typedef int (*spdk_thread_op_fn)(struct spdk_thread *thread, enum spdk_thread_op op);
+
+/**
+ * Function to check whether the SPDK thread operation is supported.
+ */
+typedef bool (*spdk_thread_op_supported_fn)(enum spdk_thread_op op);
+
+/**
+ * A function that will be called on the target thread.
+ *
+ * \param ctx Context passed as arg to spdk_thread_pass_msg().
+ */
+typedef void (*spdk_msg_fn)(void *ctx);
+
+/**
+ * Function to be called to pass a message to a thread.
+ *
+ * \param fn Callback function for a thread.
+ * \param ctx Context passed to fn.
+ * \param thread_ctx Context for the thread.
+ */
+typedef void (*spdk_thread_pass_msg)(spdk_msg_fn fn, void *ctx,
+ void *thread_ctx);
+
+/**
+ * Callback function for a poller.
+ *
+ * \param ctx Context passed as arg to spdk_poller_register().
+ * \return 0 to indicate that polling took place but no events were found;
+ * positive to indicate that polling took place and some events were processed;
+ * negative if the poller does not provide spin-wait information.
+ */
+typedef int (*spdk_poller_fn)(void *ctx);
+
+/**
+ * Function to be called to start a poller for the thread.
+ *
+ * \param thread_ctx Context for the thread.
+ * \param fn Callback function for a poller.
+ * \param arg Argument passed to callback.
+ * \param period Polling period in microseconds.
+ *
+ * \return a pointer to the poller on success, or NULL on failure.
+ */
+typedef struct spdk_poller *(*spdk_start_poller)(void *thread_ctx,
+ spdk_poller_fn fn,
+ void *arg,
+ uint64_t period_microseconds);
+
+/**
+ * Function to be called to stop a poller.
+ *
+ * \param poller Poller to stop.
+ * \param thread_ctx Context for the thread.
+ */
+typedef void (*spdk_stop_poller)(struct spdk_poller *poller, void *thread_ctx);
+
+/**
+ * I/O channel creation callback.
+ *
+ * \param io_device I/O device associated with this channel.
+ * \param ctx_buf Context for the I/O device.
+ */
+typedef int (*spdk_io_channel_create_cb)(void *io_device, void *ctx_buf);
+
+/**
+ * I/O channel destruction callback.
+ *
+ * \param io_device I/O device associated with this channel.
+ * \param ctx_buf Context for the I/O device.
+ */
+typedef void (*spdk_io_channel_destroy_cb)(void *io_device, void *ctx_buf);
+
+/**
+ * I/O device unregister callback.
+ *
+ * \param io_device Unregistered I/O device.
+ */
+typedef void (*spdk_io_device_unregister_cb)(void *io_device);
+
+/**
+ * Called on the appropriate thread for each channel associated with io_device.
+ *
+ * \param i I/O channel iterator.
+ */
+typedef void (*spdk_channel_msg)(struct spdk_io_channel_iter *i);
+
+/**
+ * spdk_for_each_channel() callback.
+ *
+ * \param i I/O channel iterator.
+ * \param status 0 if it completed successfully, or negative errno if it failed.
+ */
+typedef void (*spdk_channel_for_each_cpl)(struct spdk_io_channel_iter *i, int status);
+
+/**
+ * \brief Represents a per-thread channel for accessing an I/O device.
+ *
+ * An I/O device may be a physical entity (i.e. NVMe controller) or a software
+ * entity (i.e. a blobstore).
+ *
+ * This structure is not part of the API - all accesses should be done through
+ * spdk_io_channel function calls.
+ */
+struct spdk_io_channel {
+ struct spdk_thread *thread;
+ struct io_device *dev;
+ uint32_t ref;
+ uint32_t destroy_ref;
+ TAILQ_ENTRY(spdk_io_channel) tailq;
+ spdk_io_channel_destroy_cb destroy_cb;
+
+ /*
+ * Modules will allocate extra memory off the end of this structure
+ * to store references to hardware-specific references (i.e. NVMe queue
+ * pairs, or references to child device spdk_io_channels (i.e.
+ * virtual bdevs).
+ */
+};
+
+/**
+ * Initialize the threading library. Must be called once prior to allocating any threads.
+ *
+ * \param new_thread_fn Called each time a new SPDK thread is created. The implementor
+ * is expected to frequently call spdk_thread_poll() on the provided thread.
+ * \param ctx_sz For each thread allocated, an additional region of memory of
+ * size ctx_size will also be allocated, for use by the thread scheduler. A pointer
+ * to this region may be obtained by calling spdk_thread_get_ctx().
+ *
+ * \return 0 on success. Negated errno on failure.
+ */
+int spdk_thread_lib_init(spdk_new_thread_fn new_thread_fn, size_t ctx_sz);
+
+/**
+ * Initialize the threading library. Must be called once prior to allocating any threads
+ *
+ * Both thread_op_fn and thread_op_type_supported_fn have to be specified or not
+ * specified together.
+ *
+ * \param thread_op_fn Called for SPDK thread operation.
+ * \param thread_op_supported_fn Called to check whether the SPDK thread operation is supported.
+ * \param ctx_sz For each thread allocated, for use by the thread scheduler. A pointer
+ * to this region may be obtained by calling spdk_thread_get_ctx().
+ *
+ * \return 0 on success. Negated errno on failure.
+ */
+int spdk_thread_lib_init_ext(spdk_thread_op_fn thread_op_fn,
+ spdk_thread_op_supported_fn thread_op_supported_fn,
+ size_t ctx_sz);
+
+/**
+ * Release all resources associated with this library.
+ */
+void spdk_thread_lib_fini(void);
+
+/**
+ * Creates a new SPDK thread object.
+ *
+ * \param name Human-readable name for the thread; can be retrieved with spdk_thread_get_name().
+ * The string is copied, so the pointed-to data only needs to be valid during the
+ * spdk_thread_create() call. May be NULL to specify no name.
+ * \param cpumask Optional mask of CPU cores on which to schedule this thread. This is only
+ * a suggestion to the scheduler. The value is copied, so cpumask may be released when
+ * this function returns. May be NULL if no mask is required.
+ *
+ * \return a pointer to the allocated thread on success or NULL on failure..
+ */
+struct spdk_thread *spdk_thread_create(const char *name, struct spdk_cpuset *cpumask);
+
+/**
+ * Force the current system thread to act as if executing the given SPDK thread.
+ *
+ * \param thread The thread to set.
+ */
+void spdk_set_thread(struct spdk_thread *thread);
+
+/**
+ * Mark the thread as exited, failing all future spdk_thread_send_msg(),
+ * spdk_poller_register(), and spdk_get_io_channel() calls. May only be called
+ * within an spdk poller or message.
+ *
+ * All I/O channel references associated with the thread must be released
+ * using spdk_put_io_channel(), and all active pollers associated with the thread
+ * should be unregistered using spdk_poller_unregister(), prior to calling
+ * this function. This function will complete these processing. The completion can
+ * be queried by spdk_thread_is_exited().
+ *
+ * \param thread The thread to destroy.
+ *
+ * \return always 0. (return value was deprecated but keep it for ABI compatibility.)
+ */
+int spdk_thread_exit(struct spdk_thread *thread);
+
+/**
+ * Returns whether the thread is marked as exited.
+ *
+ * \param thread The thread to query.
+ *
+ * \return true if marked as exited, false otherwise.
+ */
+bool spdk_thread_is_exited(struct spdk_thread *thread);
+
+/**
+ * Destroy a thread, releasing all of its resources. May only be called
+ * on a thread previously marked as exited.
+ *
+ * \param thread The thread to destroy.
+ *
+ */
+void spdk_thread_destroy(struct spdk_thread *thread);
+
+/**
+ * Return a pointer to this thread's context.
+ *
+ * \param thread The thread on which to get the context.
+ *
+ * \return a pointer to the per-thread context, or NULL if there is
+ * no per-thread context.
+ */
+void *spdk_thread_get_ctx(struct spdk_thread *thread);
+
+/**
+ * Get the thread's cpumask.
+ *
+ * \param thread The thread to get the cpumask for.
+ *
+ * \return cpuset pointer
+ */
+struct spdk_cpuset *spdk_thread_get_cpumask(struct spdk_thread *thread);
+
+/**
+ * Set the current thread's cpumask to the specified value. The thread may be
+ * rescheduled to one of the CPUs specified in the cpumask.
+ *
+ * This API requires SPDK thread operation supports SPDK_THREAD_OP_RESCHED.
+ *
+ * \param cpumask The new cpumask for the thread.
+ *
+ * \return 0 on success, negated errno otherwise.
+ */
+int spdk_thread_set_cpumask(struct spdk_cpuset *cpumask);
+
+/**
+ * Return the thread object associated with the context handle previously
+ * obtained by calling spdk_thread_get_ctx().
+ *
+ * \param ctx A context previously obtained by calling spdk_thread_get_ctx()
+ *
+ * \return The associated thread.
+ */
+struct spdk_thread *spdk_thread_get_from_ctx(void *ctx);
+
+/**
+ * Perform one iteration worth of processing on the thread. This includes
+ * both expired and continuous pollers as well as messages. If the thread
+ * has exited, return immediately.
+ *
+ * \param thread The thread to process
+ * \param max_msgs The maximum number of messages that will be processed.
+ * Use 0 to process the default number of messages (8).
+ * \param now The current time, in ticks. Optional. If 0 is passed, this
+ * function will call spdk_get_ticks() to get the current time.
+ * The current time is used as start time and this function
+ * will call spdk_get_ticks() at its end to know end time to
+ * measure run time of this function.
+ *
+ * \return 1 if work was done. 0 if no work was done.
+ */
+int spdk_thread_poll(struct spdk_thread *thread, uint32_t max_msgs, uint64_t now);
+
+/**
+ * Return the number of ticks until the next timed poller
+ * would expire. Timed pollers are pollers for which
+ * period_microseconds is greater than 0.
+ *
+ * \param thread The thread to check poller expiration times on
+ *
+ * \return Number of ticks. If no timed pollers, return 0.
+ */
+uint64_t spdk_thread_next_poller_expiration(struct spdk_thread *thread);
+
+/**
+ * Returns whether there are any active pollers (pollers for which
+ * period_microseconds equals 0) registered to be run on the thread.
+ *
+ * \param thread The thread to check.
+ *
+ * \return 1 if there is at least one active poller, 0 otherwise.
+ */
+int spdk_thread_has_active_pollers(struct spdk_thread *thread);
+
+/**
+ * Returns whether there are any pollers registered to be run
+ * on the thread.
+ *
+ * \param thread The thread to check.
+ *
+ * \return true if there is any active poller, false otherwise.
+ */
+bool spdk_thread_has_pollers(struct spdk_thread *thread);
+
+/**
+ * Returns whether there are scheduled operations to be run on the thread.
+ *
+ * \param thread The thread to check.
+ *
+ * \return true if there are no scheduled operations, false otherwise.
+ */
+bool spdk_thread_is_idle(struct spdk_thread *thread);
+
+/**
+ * Get count of allocated threads.
+ */
+uint32_t spdk_thread_get_count(void);
+
+/**
+ * Get a handle to the current thread.
+ *
+ * This handle may be passed to other threads and used as the target of
+ * spdk_thread_send_msg().
+ *
+ * \sa spdk_io_channel_get_thread()
+ *
+ * \return a pointer to the current thread on success or NULL on failure.
+ */
+struct spdk_thread *spdk_get_thread(void);
+
+/**
+ * Get a thread's name.
+ *
+ * \param thread Thread to query.
+ *
+ * \return the name of the thread.
+ */
+const char *spdk_thread_get_name(const struct spdk_thread *thread);
+
+/**
+ * Get a thread's ID.
+ *
+ * \param thread Thread to query.
+ *
+ * \return the ID of the thread..
+ */
+uint64_t spdk_thread_get_id(const struct spdk_thread *thread);
+
+/**
+ * Get the thread by the ID.
+ *
+ * \param id ID of the thread.
+ * \return Thread whose ID matches or NULL otherwise.
+ */
+struct spdk_thread *spdk_thread_get_by_id(uint64_t id);
+
+struct spdk_thread_stats {
+ uint64_t busy_tsc;
+ uint64_t idle_tsc;
+};
+
+/**
+ * Get statistics about the current thread.
+ *
+ * Copy cumulative thread stats values to the provided thread stats structure.
+ *
+ * \param stats User's thread_stats structure.
+ */
+int spdk_thread_get_stats(struct spdk_thread_stats *stats);
+
+/**
+ * Return the TSC value from the end of the last time this thread was polled.
+ *
+ * \param thread Thread to query.
+ *
+ * \return TSC value from the end of the last time this thread was polled.
+ */
+uint64_t spdk_thread_get_last_tsc(struct spdk_thread *thread);
+
+/**
+ * Send a message to the given thread.
+ *
+ * The message will be sent asynchronously - i.e. spdk_thread_send_msg will always return
+ * prior to `fn` being called.
+ *
+ * \param thread The target thread.
+ * \param fn This function will be called on the given thread.
+ * \param ctx This context will be passed to fn when called.
+ *
+ * \return 0 on success
+ * \return -ENOMEM if the message could not be allocated
+ * \return -EIO if the message could not be sent to the destination thread
+ */
+int spdk_thread_send_msg(const struct spdk_thread *thread, spdk_msg_fn fn, void *ctx);
+
+/**
+ * Send a message to the given thread. Only one critical message can be outstanding at the same
+ * time. It's intended to use this function in any cases that might interrupt the execution of the
+ * application, such as signal handlers.
+ *
+ * The message will be sent asynchronously - i.e. spdk_thread_send_critical_msg will always return
+ * prior to `fn` being called.
+ *
+ * \param thread The target thread.
+ * \param fn This function will be called on the given thread.
+ *
+ * \return 0 on success
+ * \return -EIO if the message could not be sent to the destination thread, due to an already
+ * outstanding critical message
+ */
+int spdk_thread_send_critical_msg(struct spdk_thread *thread, spdk_msg_fn fn);
+
+/**
+ * Send a message to each thread, serially.
+ *
+ * The message is sent asynchronously - i.e. spdk_for_each_thread will return
+ * prior to `fn` being called on each thread.
+ *
+ * \param fn This is the function that will be called on each thread.
+ * \param ctx This context will be passed to fn when called.
+ * \param cpl This will be called on the originating thread after `fn` has been
+ * called on each thread.
+ */
+void spdk_for_each_thread(spdk_msg_fn fn, void *ctx, spdk_msg_fn cpl);
+
+/**
+ * Register a poller on the current thread.
+ *
+ * The poller can be unregistered by calling spdk_poller_unregister().
+ *
+ * \param fn This function will be called every `period_microseconds`.
+ * \param arg Argument passed to fn.
+ * \param period_microseconds How often to call `fn`. If 0, call `fn` as often
+ * as possible.
+ *
+ * \return a pointer to the poller registered on the current thread on success
+ * or NULL on failure.
+ */
+struct spdk_poller *spdk_poller_register(spdk_poller_fn fn,
+ void *arg,
+ uint64_t period_microseconds);
+
+/**
+ * Register a poller on the current thread with arbitrary name.
+ *
+ * The poller can be unregistered by calling spdk_poller_unregister().
+ *
+ * \param fn This function will be called every `period_microseconds`.
+ * \param arg Argument passed to fn.
+ * \param period_microseconds How often to call `fn`. If 0, call `fn` as often
+ * as possible.
+ * \param name Human readable name for the poller. Pointer of the poller function
+ * name is set if NULL.
+ *
+ * \return a pointer to the poller registered on the current thread on success
+ * or NULL on failure.
+ */
+struct spdk_poller *spdk_poller_register_named(spdk_poller_fn fn,
+ void *arg,
+ uint64_t period_microseconds,
+ const char *name);
+
+/*
+ * \brief Register a poller on the current thread with setting its name
+ * to the string of the poller function name.
+ */
+#define SPDK_POLLER_REGISTER(fn, arg, period_microseconds) \
+ spdk_poller_register_named(fn, arg, period_microseconds, #fn)
+
+/**
+ * Unregister a poller on the current thread.
+ *
+ * \param ppoller The poller to unregister.
+ */
+void spdk_poller_unregister(struct spdk_poller **ppoller);
+
+/**
+ * Pause a poller on the current thread.
+ *
+ * The poller is not run until it is resumed with spdk_poller_resume(). It is
+ * perfectly fine to pause an already paused poller.
+ *
+ * \param poller The poller to pause.
+ */
+void spdk_poller_pause(struct spdk_poller *poller);
+
+/**
+ * Resume a poller on the current thread.
+ *
+ * Resumes a poller paused with spdk_poller_pause(). It is perfectly fine to
+ * resume an unpaused poller.
+ *
+ * \param poller The poller to resume.
+ */
+void spdk_poller_resume(struct spdk_poller *poller);
+
+/**
+ * Register the opaque io_device context as an I/O device.
+ *
+ * After an I/O device is registered, it can return I/O channels using the
+ * spdk_get_io_channel() function.
+ *
+ * \param io_device The pointer to io_device context.
+ * \param create_cb Callback function invoked to allocate any resources required
+ * for a new I/O channel.
+ * \param destroy_cb Callback function invoked to release the resources for an
+ * I/O channel.
+ * \param ctx_size The size of the context buffer allocated to store references
+ * to allocated I/O channel resources.
+ * \param name A string name for the device used only for debugging. Optional -
+ * may be NULL.
+ */
+void spdk_io_device_register(void *io_device, spdk_io_channel_create_cb create_cb,
+ spdk_io_channel_destroy_cb destroy_cb, uint32_t ctx_size,
+ const char *name);
+
+/**
+ * Unregister the opaque io_device context as an I/O device.
+ *
+ * The actual unregistration might be deferred until all active I/O channels are
+ * destroyed.
+ *
+ * \param io_device The pointer to io_device context.
+ * \param unregister_cb An optional callback function invoked to release any
+ * references to this I/O device.
+ */
+void spdk_io_device_unregister(void *io_device, spdk_io_device_unregister_cb unregister_cb);
+
+/**
+ * Get an I/O channel for the specified io_device to be used by the calling thread.
+ *
+ * The io_device context pointer specified must have previously been registered
+ * using spdk_io_device_register(). If an existing I/O channel does not exist
+ * yet for the given io_device on the calling thread, it will allocate an I/O
+ * channel and invoke the create_cb function pointer specified in spdk_io_device_register().
+ * If an I/O channel already exists for the given io_device on the calling thread,
+ * its reference is returned rather than creating a new I/O channel.
+ *
+ * \param io_device The pointer to io_device context.
+ *
+ * \return a pointer to the I/O channel for this device on success or NULL on failure.
+ */
+struct spdk_io_channel *spdk_get_io_channel(void *io_device);
+
+/**
+ * Release a reference to an I/O channel. This happens asynchronously.
+ *
+ * This must be called on the same thread that called spdk_get_io_channel()
+ * for the specified I/O channel. If this releases the last reference to the
+ * I/O channel, The destroy_cb function specified in spdk_io_device_register()
+ * will be invoked to release any associated resources.
+ *
+ * \param ch I/O channel to release a reference.
+ */
+void spdk_put_io_channel(struct spdk_io_channel *ch);
+
+/**
+ * Get the context buffer associated with an I/O channel.
+ *
+ * \param ch I/O channel.
+ *
+ * \return a pointer to the context buffer.
+ */
+static inline void *
+spdk_io_channel_get_ctx(struct spdk_io_channel *ch)
+{
+ return (uint8_t *)ch + sizeof(*ch);
+}
+
+/**
+ * Get I/O channel from the context buffer. This is the inverse of
+ * spdk_io_channel_get_ctx().
+ *
+ * \param ctx The pointer to the context buffer.
+ *
+ * \return a pointer to the I/O channel associated with the context buffer.
+ */
+struct spdk_io_channel *spdk_io_channel_from_ctx(void *ctx);
+
+/**
+ * Get the thread associated with an I/O channel.
+ *
+ * \param ch I/O channel.
+ *
+ * \return a pointer to the thread associated with the I/O channel
+ */
+struct spdk_thread *spdk_io_channel_get_thread(struct spdk_io_channel *ch);
+
+/**
+ * Call 'fn' on each channel associated with io_device.
+ *
+ * This happens asynchronously, so fn may be called after spdk_for_each_channel
+ * returns. 'fn' will be called for each channel serially, such that two calls
+ * to 'fn' will not overlap in time. After 'fn' has been called, call
+ * spdk_for_each_channel_continue() to continue iterating.
+ *
+ * \param io_device 'fn' will be called on each channel associated with this io_device.
+ * \param fn Called on the appropriate thread for each channel associated with io_device.
+ * \param ctx Context buffer registered to spdk_io_channel_iter that can be obatined
+ * form the function spdk_io_channel_iter_get_ctx().
+ * \param cpl Called on the thread that spdk_for_each_channel was initially called
+ * from when 'fn' has been called on each channel.
+ */
+void spdk_for_each_channel(void *io_device, spdk_channel_msg fn, void *ctx,
+ spdk_channel_for_each_cpl cpl);
+
+/**
+ * Get io_device from the I/O channel iterator.
+ *
+ * \param i I/O channel iterator.
+ *
+ * \return a pointer to the io_device.
+ */
+void *spdk_io_channel_iter_get_io_device(struct spdk_io_channel_iter *i);
+
+/**
+ * Get I/O channel from the I/O channel iterator.
+ *
+ * \param i I/O channel iterator.
+ *
+ * \return a pointer to the I/O channel.
+ */
+struct spdk_io_channel *spdk_io_channel_iter_get_channel(struct spdk_io_channel_iter *i);
+
+/**
+ * Get context buffer from the I/O channel iterator.
+ *
+ * \param i I/O channel iterator.
+ *
+ * \return a pointer to the context buffer.
+ */
+void *spdk_io_channel_iter_get_ctx(struct spdk_io_channel_iter *i);
+
+/**
+ * Helper function to iterate all channels for spdk_for_each_channel().
+ *
+ * \param i I/O channel iterator.
+ * \param status Status for the I/O channel iterator.
+ */
+void spdk_for_each_channel_continue(struct spdk_io_channel_iter *i, int status);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_THREAD_H_ */
diff --git a/src/spdk/include/spdk/trace.h b/src/spdk/include/spdk/trace.h
new file mode 100644
index 000000000..1ed6c8b31
--- /dev/null
+++ b/src/spdk/include/spdk/trace.h
@@ -0,0 +1,404 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * Tracepoint library
+ */
+
+#ifndef _SPDK_TRACE_H_
+#define _SPDK_TRACE_H_
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SPDK_DEFAULT_NUM_TRACE_ENTRIES (32 * 1024)
+
+struct spdk_trace_entry {
+ uint64_t tsc;
+ uint16_t tpoint_id;
+ uint16_t poller_id;
+ uint32_t size;
+ uint64_t object_id;
+ uint64_t arg1;
+};
+
+/* If type changes from a uint8_t, change this value. */
+#define SPDK_TRACE_MAX_OWNER (UCHAR_MAX + 1)
+
+struct spdk_trace_owner {
+ uint8_t type;
+ char id_prefix;
+};
+
+/* If type changes from a uint8_t, change this value. */
+#define SPDK_TRACE_MAX_OBJECT (UCHAR_MAX + 1)
+
+struct spdk_trace_object {
+ uint8_t type;
+ char id_prefix;
+};
+
+#define SPDK_TRACE_MAX_GROUP_ID 16
+#define SPDK_TRACE_MAX_TPOINT_ID (SPDK_TRACE_MAX_GROUP_ID * 64)
+#define SPDK_TPOINT_ID(group, tpoint) ((group * 64) + tpoint)
+
+#define SPDK_TRACE_ARG_TYPE_INT 0
+#define SPDK_TRACE_ARG_TYPE_PTR 1
+#define SPDK_TRACE_ARG_TYPE_STR 2
+
+struct spdk_trace_tpoint {
+ char name[24];
+ uint16_t tpoint_id;
+ uint8_t owner_type;
+ uint8_t object_type;
+ uint8_t new_object;
+ uint8_t arg1_type;
+ uint8_t reserved;
+ char arg1_name[8];
+};
+
+struct spdk_trace_history {
+ /** Logical core number associated with this structure instance. */
+ int lcore;
+
+ /** Number of trace_entries contained in each trace_history. */
+ uint64_t num_entries;
+
+ /**
+ * Running count of number of occurrences of each tracepoint on this
+ * lcore. Debug tools can use this to easily count tracepoints such as
+ * number of SCSI tasks completed or PDUs read.
+ */
+ uint64_t tpoint_count[SPDK_TRACE_MAX_TPOINT_ID];
+
+ /** Index to next spdk_trace_entry to fill. */
+ uint64_t next_entry;
+
+ /**
+ * Circular buffer of spdk_trace_entry structures for tracing
+ * tpoints on this core. Debug tool spdk_trace reads this
+ * buffer from shared memory to post-process the tpoint entries and
+ * display in a human-readable format.
+ */
+ struct spdk_trace_entry entries[0];
+};
+
+#define SPDK_TRACE_MAX_LCORE 128
+
+struct spdk_trace_flags {
+ uint64_t tsc_rate;
+ uint64_t tpoint_mask[SPDK_TRACE_MAX_GROUP_ID];
+ struct spdk_trace_owner owner[UCHAR_MAX + 1];
+ struct spdk_trace_object object[UCHAR_MAX + 1];
+ struct spdk_trace_tpoint tpoint[SPDK_TRACE_MAX_TPOINT_ID];
+
+ /** Offset of each trace_history from the beginning of this data structure.
+ * The last one is the offset of the file end.
+ */
+ uint64_t lcore_history_offsets[SPDK_TRACE_MAX_LCORE + 1];
+};
+extern struct spdk_trace_flags *g_trace_flags;
+extern struct spdk_trace_histories *g_trace_histories;
+
+
+struct spdk_trace_histories {
+ struct spdk_trace_flags flags;
+
+ /**
+ * struct spdk_trace_history has a dynamic size determined by num_entries
+ * in spdk_trace_init. Mark array size of per_lcore_history to be 0 in uint8_t
+ * as a reminder that each per_lcore_history pointer should be gotten by
+ * proper API, instead of directly referencing by struct element.
+ */
+ uint8_t per_lcore_history[0];
+};
+
+static inline uint64_t
+spdk_get_trace_history_size(uint64_t num_entries)
+{
+ return sizeof(struct spdk_trace_history) + num_entries * sizeof(struct spdk_trace_entry);
+}
+
+static inline uint64_t
+spdk_get_trace_histories_size(struct spdk_trace_histories *trace_histories)
+{
+ return trace_histories->flags.lcore_history_offsets[SPDK_TRACE_MAX_LCORE];
+}
+
+static inline struct spdk_trace_history *
+spdk_get_per_lcore_history(struct spdk_trace_histories *trace_histories, unsigned lcore)
+{
+ char *lcore_history_offset;
+
+ if (lcore >= SPDK_TRACE_MAX_LCORE) {
+ return NULL;
+ }
+
+ lcore_history_offset = (char *)trace_histories;
+ lcore_history_offset += trace_histories->flags.lcore_history_offsets[lcore];
+
+ return (struct spdk_trace_history *)lcore_history_offset;
+}
+
+void _spdk_trace_record(uint64_t tsc, uint16_t tpoint_id, uint16_t poller_id,
+ uint32_t size, uint64_t object_id, uint64_t arg1);
+
+/**
+ * Record the current trace state for tracing tpoints. Debug tool can read the
+ * information from shared memory to post-process the tpoint entries and display
+ * in a human-readable format. This function will call spdk_get_ticks() to get
+ * the current tsc to save in the tracepoint.
+ *
+ * \param tpoint_id Tracepoint id to record.
+ * \param poller_id Poller id to record.
+ * \param size Size to record.
+ * \param object_id Object id to record.
+ * \param arg1 Argument to record.
+ */
+static inline
+void spdk_trace_record(uint16_t tpoint_id, uint16_t poller_id, uint32_t size,
+ uint64_t object_id, uint64_t arg1)
+{
+ /*
+ * Tracepoint group ID is encoded in the tpoint_id. Lower 6 bits determine the tracepoint
+ * within the group, the remaining upper bits determine the tracepoint group. Each
+ * tracepoint group has its own tracepoint mask.
+ */
+ assert(tpoint_id < SPDK_TRACE_MAX_TPOINT_ID);
+ if (g_trace_histories == NULL ||
+ !((1ULL << (tpoint_id & 0x3F)) & g_trace_histories->flags.tpoint_mask[tpoint_id >> 6])) {
+ return;
+ }
+
+ _spdk_trace_record(0, tpoint_id, poller_id, size, object_id, arg1);
+}
+
+/**
+ * Record the current trace state for tracing tpoints. Debug tool can read the
+ * information from shared memory to post-process the tpoint entries and display
+ * in a human-readable format.
+ *
+ * \param tsc Current tsc.
+ * \param tpoint_id Tracepoint id to record.
+ * \param poller_id Poller id to record.
+ * \param size Size to record.
+ * \param object_id Object id to record.
+ * \param arg1 Argument to record.
+ */
+static inline
+void spdk_trace_record_tsc(uint64_t tsc, uint16_t tpoint_id, uint16_t poller_id,
+ uint32_t size, uint64_t object_id, uint64_t arg1)
+{
+ /*
+ * Tracepoint group ID is encoded in the tpoint_id. Lower 6 bits determine the tracepoint
+ * within the group, the remaining upper bits determine the tracepoint group. Each
+ * tracepoint group has its own tracepoint mask.
+ */
+ assert(tpoint_id < SPDK_TRACE_MAX_TPOINT_ID);
+ if (g_trace_histories == NULL ||
+ !((1ULL << (tpoint_id & 0x3F)) & g_trace_histories->flags.tpoint_mask[tpoint_id >> 6])) {
+ return;
+ }
+
+ _spdk_trace_record(tsc, tpoint_id, poller_id, size, object_id, arg1);
+}
+
+/**
+ * Get the current tpoint mask of the given tpoint group.
+ *
+ * \param group_id Tpoint group id associated with the tpoint mask.
+ *
+ * \return current tpoint mask.
+ */
+uint64_t spdk_trace_get_tpoint_mask(uint32_t group_id);
+
+/**
+ * Add the specified tpoints to the current tpoint mask for the given tpoint group.
+ *
+ * \param group_id Tpoint group id associated with the tpoint mask.
+ * \param tpoint_mask Tpoint mask which indicates which tpoints to add to the
+ * current tpoint mask.
+ */
+void spdk_trace_set_tpoints(uint32_t group_id, uint64_t tpoint_mask);
+
+/**
+ * Clear the specified tpoints from the current tpoint mask for the given tpoint group.
+ *
+ * \param group_id Tpoint group id associated with the tpoint mask.
+ * \param tpoint_mask Tpoint mask which indicates which tpoints to clear from
+ * the current tpoint mask.
+ */
+void spdk_trace_clear_tpoints(uint32_t group_id, uint64_t tpoint_mask);
+
+/**
+ * Get a mask of all tracepoint groups which have at least one tracepoint enabled.
+ *
+ * \return a mask of all tracepoint groups.
+ */
+uint64_t spdk_trace_get_tpoint_group_mask(void);
+
+/**
+ * For each tpoint group specified in the group mask, enable all of its tpoints.
+ *
+ * \param tpoint_group_mask Tpoint group mask that indicates which tpoints to enable.
+ */
+void spdk_trace_set_tpoint_group_mask(uint64_t tpoint_group_mask);
+
+/**
+ * For each tpoint group specified in the group mask, disable all of its tpoints.
+ *
+ * \param tpoint_group_mask Tpoint group mask that indicates which tpoints to disable.
+ */
+void spdk_trace_clear_tpoint_group_mask(uint64_t tpoint_group_mask);
+
+/**
+ * Initialize the trace environment. Debug tool can read the information from
+ * the given shared memory to post-process the tpoint entries and display in a
+ * human-readable format.
+ *
+ * \param shm_name Name of shared memory.
+ * \param num_entries Number of trace entries per lcore.
+ * \return 0 on success, else non-zero indicates a failure.
+ */
+int spdk_trace_init(const char *shm_name, uint64_t num_entries);
+
+/**
+ * Unmap global trace memory structs.
+ */
+void spdk_trace_cleanup(void);
+
+/**
+ * Initialize trace flags.
+ */
+void spdk_trace_flags_init(void);
+
+#define OWNER_NONE 0
+#define OBJECT_NONE 0
+
+/**
+ * Register the trace owner.
+ *
+ * \param type Type of the trace owner.
+ * \param id_prefix Prefix of id for the trace owner.
+ */
+void spdk_trace_register_owner(uint8_t type, char id_prefix);
+
+/**
+ * Register the trace object.
+ *
+ * \param type Type of the trace object.
+ * \param id_prefix Prefix of id for the trace object.
+ */
+void spdk_trace_register_object(uint8_t type, char id_prefix);
+
+/**
+ * Register the description for the tpoint.
+ *
+ * \param name Name for the tpoint.
+ * \param tpoint_id Id for the tpoint.
+ * \param owner_type Owner type for the tpoint.
+ * \param object_type Object type for the tpoint.
+ * \param new_object New object for the tpoint.
+ * \param arg1_type Type of arg1.
+ * \param arg1_name Name of argument.
+ */
+void spdk_trace_register_description(const char *name, uint16_t tpoint_id, uint8_t owner_type,
+ uint8_t object_type, uint8_t new_object,
+ uint8_t arg1_type, const char *arg1_name);
+
+struct spdk_trace_register_fn *spdk_trace_get_first_register_fn(void);
+
+struct spdk_trace_register_fn *spdk_trace_get_next_register_fn(struct spdk_trace_register_fn
+ *register_fn);
+
+/**
+ * Enable trace on specific tpoint group
+ *
+ * \param group_name Name of group to enable, "all" for enabling all groups.
+ * \return 0 on success, else non-zero indicates a failure.
+ */
+int spdk_trace_enable_tpoint_group(const char *group_name);
+
+/**
+ * Disable trace on specific tpoint group
+ *
+ * \param group_name Name of group to disable, "all" for disabling all groups.
+ * \return 0 on success, else non-zero indicates a failure.
+ */
+int spdk_trace_disable_tpoint_group(const char *group_name);
+
+/**
+ * Show trace mask and its usage.
+ *
+ * \param f File to hold the mask's information.
+ * \param tmask_arg Command line option to set the trace group mask.
+ */
+void spdk_trace_mask_usage(FILE *f, const char *tmask_arg);
+
+struct spdk_trace_register_fn {
+ const char *name;
+ uint8_t tgroup_id;
+ void (*reg_fn)(void);
+ struct spdk_trace_register_fn *next;
+};
+
+/**
+ * Add new trace register function.
+ *
+ * \param reg_fn Trace register function to add.
+ */
+void spdk_trace_add_register_fn(struct spdk_trace_register_fn *reg_fn);
+
+#define SPDK_TRACE_REGISTER_FN(fn, name_str, _tgroup_id) \
+ static void fn(void); \
+ struct spdk_trace_register_fn reg_ ## fn = { \
+ .name = name_str, \
+ .tgroup_id = _tgroup_id, \
+ .reg_fn = fn, \
+ .next = NULL, \
+ }; \
+ __attribute__((constructor)) static void _ ## fn(void) \
+ { \
+ spdk_trace_add_register_fn(&reg_ ## fn); \
+ } \
+ static void fn(void)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/util.h b/src/spdk/include/spdk/util.h
new file mode 100644
index 000000000..6358524fa
--- /dev/null
+++ b/src/spdk/include/spdk/util.h
@@ -0,0 +1,190 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * General utility functions
+ */
+
+#ifndef SPDK_UTIL_H
+#define SPDK_UTIL_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SPDK_CACHE_LINE_SIZE 64
+
+#define spdk_min(a,b) (((a)<(b))?(a):(b))
+#define spdk_max(a,b) (((a)>(b))?(a):(b))
+
+#define SPDK_COUNTOF(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define SPDK_CONTAINEROF(ptr, type, member) ((type *)((uintptr_t)ptr - offsetof(type, member)))
+
+#define SPDK_SEC_TO_USEC 1000000ULL
+#define SPDK_SEC_TO_NSEC 1000000000ULL
+
+/* Ceiling division of unsigned integers */
+#define SPDK_CEIL_DIV(x,y) (((x)+(y)-1)/(y))
+
+/**
+ * Macro to align a value to a given power-of-two. The resultant value
+ * will be of the same type as the first parameter, and will be no
+ * bigger than the first parameter. Second parameter must be a
+ * power-of-two value.
+ */
+#define SPDK_ALIGN_FLOOR(val, align) \
+ (typeof(val))((val) & (~((typeof(val))((align) - 1))))
+/**
+ * Macro to align a value to a given power-of-two. The resultant value
+ * will be of the same type as the first parameter, and will be no lower
+ * than the first parameter. Second parameter must be a power-of-two
+ * value.
+ */
+#define SPDK_ALIGN_CEIL(val, align) \
+ SPDK_ALIGN_FLOOR(((val) + ((typeof(val)) (align) - 1)), align)
+
+uint32_t spdk_u32log2(uint32_t x);
+
+static inline uint32_t
+spdk_align32pow2(uint32_t x)
+{
+ return 1u << (1 + spdk_u32log2(x - 1));
+}
+
+uint64_t spdk_u64log2(uint64_t x);
+
+static inline uint64_t
+spdk_align64pow2(uint64_t x)
+{
+ return 1ULL << (1 + spdk_u64log2(x - 1));
+}
+
+/**
+ * Check if a uint32_t is a power of 2.
+ */
+static inline bool
+spdk_u32_is_pow2(uint32_t x)
+{
+ if (x == 0) {
+ return false;
+ }
+
+ return (x & (x - 1)) == 0;
+}
+
+static inline uint64_t
+spdk_divide_round_up(uint64_t num, uint64_t divisor)
+{
+ return (num + divisor - 1) / divisor;
+}
+
+/**
+ * Copy the data described by the source iovec to the destination iovec.
+ *
+ * \return The number of bytes copied.
+ */
+size_t spdk_iovcpy(struct iovec *siov, size_t siovcnt, struct iovec *diov, size_t diovcnt);
+
+
+/**
+ * Scan build is really pessimistic and assumes that mempool functions can
+ * dequeue NULL buffers even if they return success. This is obviously a false
+ * possitive, but the mempool dequeue can be done in a DPDK inline function that
+ * we can't decorate with usual assert(buf != NULL). Instead, we'll
+ * preinitialize the dequeued buffer array with some dummy objects.
+ */
+#define SPDK_CLANG_ANALYZER_PREINIT_PTR_ARRAY(arr, arr_size, buf_size) \
+ do { \
+ static char dummy_buf[buf_size]; \
+ int i; \
+ for (i = 0; i < arr_size; i++) { \
+ arr[i] = (void *)dummy_buf; \
+ } \
+ } while (0)
+
+/**
+ * Add two sequece numbers s1 and s2
+ *
+ * \param s1 First sequence number
+ * \param s2 Second sequence number
+ *
+ * \return Sum of s1 and s2 based on serial number arithmetic.
+ */
+static inline uint32_t
+spdk_sn32_add(uint32_t s1, uint32_t s2)
+{
+ return (uint32_t)(s1 + s2);
+}
+
+#define SPDK_SN32_CMPMAX (1U << (32 - 1))
+
+/**
+ * Compare if sequence number s1 is less than s2.
+ *
+ * \param s1 First sequence number
+ * \param s2 Second sequence number
+ *
+ * \return true if s1 is less than s2, or false otherwise.
+ */
+static inline bool
+spdk_sn32_lt(uint32_t s1, uint32_t s2)
+{
+ return (s1 != s2) &&
+ ((s1 < s2 && s2 - s1 < SPDK_SN32_CMPMAX) ||
+ (s1 > s2 && s1 - s2 > SPDK_SN32_CMPMAX));
+}
+
+/**
+ * Compare if sequence number s1 is greater than s2.
+ *
+ * \param s1 First sequence number
+ * \param s2 Second sequence number
+ *
+ * \return true if s1 is greater than s2, or false otherwise.
+ */
+static inline bool
+spdk_sn32_gt(uint32_t s1, uint32_t s2)
+{
+ return (s1 != s2) &&
+ ((s1 < s2 && s2 - s1 > SPDK_SN32_CMPMAX) ||
+ (s1 > s2 && s1 - s2 < SPDK_SN32_CMPMAX));
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/uuid.h b/src/spdk/include/spdk/uuid.h
new file mode 100644
index 000000000..820944e34
--- /dev/null
+++ b/src/spdk/include/spdk/uuid.h
@@ -0,0 +1,108 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * UUID types and functions
+ */
+
+#ifndef SPDK_UUID_H
+#define SPDK_UUID_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/assert.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct spdk_uuid {
+ union {
+ uint8_t raw[16];
+ } u;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_uuid) == 16, "Incorrect size");
+
+#define SPDK_UUID_STRING_LEN 37 /* 36 characters + null terminator */
+
+/**
+ * Convert UUID in textual format into a spdk_uuid.
+ *
+ * \param[out] uuid User-provided UUID buffer.
+ * \param uuid_str UUID in textual format in C string.
+ *
+ * \return 0 on success, or negative errno on failure.
+ */
+int spdk_uuid_parse(struct spdk_uuid *uuid, const char *uuid_str);
+
+/**
+ * Convert UUID in spdk_uuid into lowercase textual format.
+ *
+ * \param uuid_str User-provided string buffer to write the textual format into.
+ * \param uuid_str_size Size of uuid_str buffer. Must be at least SPDK_UUID_STRING_LEN.
+ * \param uuid UUID to convert to textual format.
+ *
+ * \return 0 on success, or negative errno on failure.
+ */
+int spdk_uuid_fmt_lower(char *uuid_str, size_t uuid_str_size, const struct spdk_uuid *uuid);
+
+/**
+ * Compare two UUIDs.
+ *
+ * \param u1 UUID 1.
+ * \param u2 UUID 2.
+ *
+ * \return 0 if u1 == u2, less than 0 if u1 < u2, greater than 0 if u1 > u2.
+ */
+int spdk_uuid_compare(const struct spdk_uuid *u1, const struct spdk_uuid *u2);
+
+/**
+ * Generate a new UUID.
+ *
+ * \param[out] uuid User-provided UUID buffer to fill.
+ */
+void spdk_uuid_generate(struct spdk_uuid *uuid);
+
+/**
+ * Copy a UUID.
+ *
+ * \param src Source UUID to copy from.
+ * \param dst Destination UUID to store.
+ */
+void spdk_uuid_copy(struct spdk_uuid *dst, const struct spdk_uuid *src);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/version.h b/src/spdk/include/spdk/version.h
new file mode 100644
index 000000000..4b8b5d561
--- /dev/null
+++ b/src/spdk/include/spdk/version.h
@@ -0,0 +1,119 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * SPDK version number definitions
+ */
+
+#ifndef SPDK_VERSION_H
+#define SPDK_VERSION_H
+
+/**
+ * Major version number (year of original release minus 2000).
+ */
+#define SPDK_VERSION_MAJOR 20
+
+/**
+ * Minor version number (month of original release).
+ */
+#define SPDK_VERSION_MINOR 7
+
+/**
+ * Patch level.
+ *
+ * Patch level is incremented on maintenance branch releases and reset to 0 for each
+ * new major.minor release.
+ */
+#define SPDK_VERSION_PATCH 0
+
+/**
+ * Version string suffix.
+ */
+#define SPDK_VERSION_SUFFIX ""
+
+/**
+ * Single numeric value representing a version number for compile-time comparisons.
+ *
+ * Example usage:
+ *
+ * \code
+ * #if SPDK_VERSION >= SPDK_VERSION_NUM(17, 7, 0)
+ * Use feature from SPDK v17.07
+ * #endif
+ * \endcode
+ */
+#define SPDK_VERSION_NUM(major, minor, patch) \
+ (((major) * 100 + (minor)) * 100 + (patch))
+
+/**
+ * Current version as a SPDK_VERSION_NUM.
+ */
+#define SPDK_VERSION SPDK_VERSION_NUM(SPDK_VERSION_MAJOR, SPDK_VERSION_MINOR, SPDK_VERSION_PATCH)
+
+#define SPDK_VERSION_STRINGIFY_x(x) #x
+#define SPDK_VERSION_STRINGIFY(x) SPDK_VERSION_STRINGIFY_x(x)
+
+#define SPDK_VERSION_MAJOR_STRING SPDK_VERSION_STRINGIFY(SPDK_VERSION_MAJOR)
+
+#if SPDK_VERSION_MINOR < 10
+#define SPDK_VERSION_MINOR_STRING ".0" SPDK_VERSION_STRINGIFY(SPDK_VERSION_MINOR)
+#else
+#define SPDK_VERSION_MINOR_STRING "." SPDK_VERSION_STRINGIFY(SPDK_VERSION_MINOR)
+#endif
+
+#if SPDK_VERSION_PATCH != 0
+#define SPDK_VERSION_PATCH_STRING "." SPDK_VERSION_STRINGIFY(SPDK_VERSION_PATCH)
+#else
+#define SPDK_VERSION_PATCH_STRING ""
+#endif
+
+#ifdef SPDK_GIT_COMMIT
+#define SPDK_GIT_COMMIT_STRING SPDK_VERSION_STRINGIFY(SPDK_GIT_COMMIT)
+#define SPDK_GIT_COMMIT_STRING_SHA1 " git sha1 " SPDK_GIT_COMMIT_STRING
+#else
+#define SPDK_GIT_COMMIT_STRING ""
+#define SPDK_GIT_COMMIT_STRING_SHA1 ""
+#endif
+
+/**
+ * Human-readable version string.
+ */
+#define SPDK_VERSION_STRING \
+ "SPDK v" \
+ SPDK_VERSION_MAJOR_STRING \
+ SPDK_VERSION_MINOR_STRING \
+ SPDK_VERSION_PATCH_STRING \
+ SPDK_VERSION_SUFFIX \
+ SPDK_GIT_COMMIT_STRING_SHA1
+
+#endif
diff --git a/src/spdk/include/spdk/vhost.h b/src/spdk/include/spdk/vhost.h
new file mode 100644
index 000000000..211c2d337
--- /dev/null
+++ b/src/spdk/include/spdk/vhost.h
@@ -0,0 +1,337 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * SPDK vhost
+ */
+
+#ifndef SPDK_VHOST_H
+#define SPDK_VHOST_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/cpuset.h"
+#include "spdk/json.h"
+#include "spdk/thread.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Callback for spdk_vhost_init().
+ *
+ * \param rc 0 on success, negative errno on failure
+ */
+typedef void (*spdk_vhost_init_cb)(int rc);
+
+/** Callback for spdk_vhost_fini(). */
+typedef void (*spdk_vhost_fini_cb)(void);
+
+/**
+ * Set the path to the directory where vhost sockets will be created.
+ *
+ * This function must be called before spdk_vhost_init().
+ *
+ * \param basename Path to vhost socket directory
+ *
+ * \return 0 on success, negative errno on error.
+ */
+int spdk_vhost_set_socket_path(const char *basename);
+
+/**
+ * Init vhost environment.
+ *
+ * \param init_cb Function to be called when the initialization is complete.
+ */
+void spdk_vhost_init(spdk_vhost_init_cb init_cb);
+
+/**
+ * Clean up the environment of vhost.
+ *
+ * \param fini_cb Function to be called when the cleanup is complete.
+ */
+void spdk_vhost_fini(spdk_vhost_fini_cb fini_cb);
+
+
+/**
+ * Write vhost subsystem configuration into provided JSON context.
+ *
+ * \param w JSON write context
+ */
+void spdk_vhost_config_json(struct spdk_json_write_ctx *w);
+
+/**
+ * Deinit vhost application. This is called once by SPDK app layer.
+ */
+void spdk_vhost_shutdown_cb(void);
+
+/**
+ * SPDK vhost device (vdev). An equivalent of Virtio device.
+ * Both virtio-blk and virtio-scsi devices are represented by this
+ * struct. For virtio-scsi a single vhost device (also called SCSI
+ * controller) may contain multiple SCSI targets (devices), each of
+ * which may contain multiple logical units (SCSI LUNs). For now
+ * only one LUN per target is available.
+ *
+ * All vdev-changing functions operate directly on this object.
+ * Note that \c spdk_vhost_dev cannot be acquired. This object is
+ * only accessible as a callback parameter via \c
+ * spdk_vhost_call_external_event and it's derivatives. This ensures
+ * that all access to the vdev is piped through a single,
+ * thread-safe API.
+ */
+struct spdk_vhost_dev;
+
+/**
+ * Lock the global vhost mutex synchronizing all the vhost device accesses.
+ */
+void spdk_vhost_lock(void);
+
+/**
+ * Lock the global vhost mutex synchronizing all the vhost device accesses.
+ *
+ * \return 0 if the mutex could be locked immediately, negative errno otherwise.
+ */
+int spdk_vhost_trylock(void);
+
+/**
+ * Unlock the global vhost mutex.
+ */
+void spdk_vhost_unlock(void);
+
+/**
+ * Find a vhost device by name.
+ *
+ * \return vhost device or NULL
+ */
+struct spdk_vhost_dev *spdk_vhost_dev_find(const char *name);
+
+/**
+ * Get the next vhost device. If there's no more devices to iterate
+ * through, NULL will be returned.
+ *
+ * \param vdev vhost device. If NULL, this function will return the
+ * very first device.
+ * \return vdev vhost device or NULL
+ */
+struct spdk_vhost_dev *spdk_vhost_dev_next(struct spdk_vhost_dev *vdev);
+
+/**
+ * Synchronized vhost event used for user callbacks.
+ *
+ * \param vdev vhost device.
+ * \param arg user-provided parameter.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+typedef int (*spdk_vhost_event_fn)(struct spdk_vhost_dev *vdev, void *arg);
+
+/**
+ * Get the name of the vhost device. This is equal to the filename
+ * of socket file. The name is constant throughout the lifetime of
+ * a vdev.
+ *
+ * \param vdev vhost device.
+ *
+ * \return name of the vdev.
+ */
+const char *spdk_vhost_dev_get_name(struct spdk_vhost_dev *vdev);
+
+/**
+ * Get cpuset of the vhost device. The cpuset is constant throughout the lifetime
+ * of a vdev. It is a subset of SPDK app cpuset vhost was started with.
+ *
+ * \param vdev vhost device.
+ *
+ * \return cpuset of the vdev.
+ */
+const struct spdk_cpuset *spdk_vhost_dev_get_cpumask(struct spdk_vhost_dev *vdev);
+
+/**
+ * By default, events are generated when asked, but for high queue depth and
+ * high IOPS this prove to be inefficient both for guest kernel that have to
+ * handle a lot more IO completions and for SPDK vhost that need to make more
+ * syscalls. If enabled, limit amount of events (IRQs) sent to initiator by SPDK
+ * vhost effectively coalescing couple of completions. This of cource introduce
+ * IO latency penalty proportional to event delay time.
+ *
+ * Actual events delay time when is calculated according to below formula:
+ * if (delay_base == 0 || IOPS < iops_threshold) {
+ * delay = 0;
+ * } else if (IOPS < iops_threshold) {
+ * delay = delay_base * (iops - iops_threshold) / iops_threshold;
+ * }
+ *
+ * \param vdev vhost device.
+ * \param delay_base_us Base delay time in microseconds. If 0, coalescing is disabled.
+ * \param iops_threshold IOPS threshold when coalescing is activated.
+ */
+int spdk_vhost_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us,
+ uint32_t iops_threshold);
+
+/**
+ * Get coalescing parameters.
+ *
+ * \see spdk_vhost_set_coalescing
+ *
+ * \param vdev vhost device.
+ * \param delay_base_us Optional pointer to store base delay time.
+ * \param iops_threshold Optional pointer to store IOPS threshold.
+ */
+void spdk_vhost_get_coalescing(struct spdk_vhost_dev *vdev, uint32_t *delay_base_us,
+ uint32_t *iops_threshold);
+
+/**
+ * Construct an empty vhost SCSI device. This will create a
+ * Unix domain socket together with a vhost-user slave server waiting
+ * for a connection on this socket. Creating the vdev does not
+ * start any I/O pollers and does not hog the CPU. I/O processing
+ * starts after receiving proper message on the created socket.
+ * See QEMU's vhost-user documentation for details.
+ * All physical devices have to be separately attached to this
+ * vdev via \c spdk_vhost_scsi_dev_add_tgt().
+ *
+ * This function is thread-safe.
+ *
+ * \param name name of the vhost device. The name will also be used
+ * for socket name, which is exactly \c socket_base_dir/name
+ * \param cpumask string containing cpumask in hex. The leading *0x*
+ * is allowed but not required. The mask itself can be constructed as:
+ * ((1 << cpu0) | (1 << cpu1) | ... | (1 << cpuN)).
+ *
+ * \return 0 on success, negative errno on error.
+ */
+int spdk_vhost_scsi_dev_construct(const char *name, const char *cpumask);
+
+/**
+ * Construct and attach new SCSI target to the vhost SCSI device
+ * on given (unoccupied) slot. The device will be created with a single
+ * LUN0 associated with given SPDK bdev. Currently only one LUN per
+ * device is supported.
+ *
+ * If the vhost SCSI device has an active connection and has negotiated
+ * \c VIRTIO_SCSI_F_HOTPLUG feature, the new SCSI target should be
+ * automatically detected by the other side.
+ *
+ * \param vdev vhost SCSI device.
+ * \param scsi_tgt_num slot to attach to or negative value to use first free.
+ * \param bdev_name name of the SPDK bdev to associate with SCSI LUN0.
+ *
+ * \return value >= 0 on success - the SCSI target ID, negative errno code:
+ * -EINVAL - one of the arguments is invalid:
+ * - vdev is not vhost SCSI device
+ * - SCSI target ID is out of range
+ * - bdev name is NULL
+ * - can't create SCSI LUN because of other errors e.g.: bdev does not exist
+ * -ENOSPC - scsi_tgt_num is -1 and maximum targets in vhost SCSI device reached
+ * -EEXIST - SCSI target ID already exists
+ */
+int spdk_vhost_scsi_dev_add_tgt(struct spdk_vhost_dev *vdev, int scsi_tgt_num,
+ const char *bdev_name);
+
+/**
+ * Get SCSI target from vhost SCSI device on given slot. Max
+ * number of available slots is defined by.
+ * \c SPDK_VHOST_SCSI_CTRLR_MAX_DEVS.
+ *
+ * \param vdev vhost SCSI device.
+ * \param num slot id.
+ *
+ * \return SCSI device on given slot or NULL.
+ */
+struct spdk_scsi_dev *spdk_vhost_scsi_dev_get_tgt(struct spdk_vhost_dev *vdev, uint8_t num);
+
+/**
+ * Detach and destruct SCSI target from a vhost SCSI device.
+ *
+ * The device will be deleted after all pending I/O is finished.
+ * If the driver supports VIRTIO_SCSI_F_HOTPLUG, then a hotremove
+ * notification will be sent.
+ *
+ * \param vdev vhost SCSI device
+ * \param scsi_tgt_num slot id to delete target from
+ * \param cb_fn callback to be fired once target has been successfully
+ * deleted. The first parameter of callback function is the vhost SCSI
+ * device, the second is user provided argument *cb_arg*.
+ * \param cb_arg parameter to be passed to *cb_fn*.
+ *
+ * \return 0 on success, negative errno on error.
+ */
+int spdk_vhost_scsi_dev_remove_tgt(struct spdk_vhost_dev *vdev, unsigned scsi_tgt_num,
+ spdk_vhost_event_fn cb_fn, void *cb_arg);
+
+/**
+ * Construct a vhost blk device. This will create a Unix domain
+ * socket together with a vhost-user slave server waiting for a
+ * connection on this socket. Creating the vdev does not start
+ * any I/O pollers and does not hog the CPU. I/O processing starts
+ * after receiving proper message on the created socket.
+ * See QEMU's vhost-user documentation for details. Vhost blk
+ * device is tightly associated with given SPDK bdev. Given
+ * bdev can not be changed, unless it has been hotremoved. This
+ * would result in all I/O failing with virtio \c VIRTIO_BLK_S_IOERR
+ * error code.
+ *
+ * This function is thread-safe.
+ *
+ * \param name name of the vhost blk device. The name will also be
+ * used for socket name, which is exactly \c socket_base_dir/name
+ * \param cpumask string containing cpumask in hex. The leading *0x*
+ * is allowed but not required. The mask itself can be constructed as:
+ * ((1 << cpu0) | (1 << cpu1) | ... | (1 << cpuN)).
+ * \param dev_name bdev name to associate with this vhost device
+ * \param readonly if set, all writes to the device will fail with
+ * \c VIRTIO_BLK_S_IOERR error code.
+ * \param packed_ring this controller supports packed ring if set.
+ *
+ * \return 0 on success, negative errno on error.
+ */
+int spdk_vhost_blk_construct(const char *name, const char *cpumask, const char *dev_name,
+ bool readonly, bool packed_ring);
+
+/**
+ * Remove a vhost device. The device must not have any open connections on it's socket.
+ *
+ * \param vdev vhost blk device.
+ *
+ * \return 0 on success, negative errno on error.
+ */
+int spdk_vhost_dev_remove(struct spdk_vhost_dev *vdev);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_VHOST_H */
diff --git a/src/spdk/include/spdk/vmd.h b/src/spdk/include/spdk/vmd.h
new file mode 100644
index 000000000..100aa893c
--- /dev/null
+++ b/src/spdk/include/spdk/vmd.h
@@ -0,0 +1,116 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * VMD driver public interface
+ */
+
+#ifndef SPDK_VMD_H
+#define SPDK_VMD_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "spdk/config.h"
+#include "spdk/env.h"
+
+/* Maximum VMD devices - up to 6 per cpu */
+#define MAX_VMD_TARGET 24
+
+/**
+ * Enumerate VMD devices and hook them into the spdk pci subsystem
+ *
+ * \return 0 on success, -1 on failure
+ */
+int spdk_vmd_init(void);
+
+/**
+ * Release any resources allocated by the VMD library via spdk_vmd_init().
+ */
+void spdk_vmd_fini(void);
+
+/**
+ * Returns a list of nvme devices found on the given vmd pci BDF.
+ *
+ * \param vmd_addr pci BDF of the vmd device to return end device list
+ * \param nvme_list buffer of exactly MAX_VMD_TARGET to return spdk_pci_device array.
+ *
+ * \return Returns count of nvme device attached to input VMD.
+ */
+int spdk_vmd_pci_device_list(struct spdk_pci_addr vmd_addr, struct spdk_pci_device *nvme_list);
+
+/** State of the LEDs */
+enum spdk_vmd_led_state {
+ SPDK_VMD_LED_STATE_OFF,
+ SPDK_VMD_LED_STATE_IDENTIFY,
+ SPDK_VMD_LED_STATE_FAULT,
+ SPDK_VMD_LED_STATE_REBUILD,
+ SPDK_VMD_LED_STATE_UNKNOWN,
+};
+
+/**
+ * Sets the state of the LED on specified PCI device. The device needs to be behind VMD.
+ *
+ * \param pci_device PCI device
+ * \param state LED state to set
+ *
+ * \return 0 on success, negative errno otherwise
+ */
+int spdk_vmd_set_led_state(struct spdk_pci_device *pci_device, enum spdk_vmd_led_state state);
+
+/**
+ * Retrieves the state of the LED on specified PCI device. The device needs to be behind VMD.
+ *
+ * \param pci_device PCI device
+ * \param state current LED state
+ *
+ * \return 0 on success, negative errno otherwise
+ */
+int spdk_vmd_get_led_state(struct spdk_pci_device *pci_device, enum spdk_vmd_led_state *state);
+
+/**
+ * Checks for hotplug/hotremove events of the devices behind the VMD. Needs to be called
+ * periodically to detect them.
+ *
+ * \return number of hotplug events detected or negative errno in case of errors
+ */
+int spdk_vmd_hotplug_monitor(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_VMD_H */
diff --git a/src/spdk/include/spdk_internal/accel_engine.h b/src/spdk/include/spdk_internal/accel_engine.h
new file mode 100644
index 000000000..9b78bc967
--- /dev/null
+++ b/src/spdk/include/spdk_internal/accel_engine.h
@@ -0,0 +1,130 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_ACCEL_ENGINE_H
+#define SPDK_INTERNAL_ACCEL_ENGINE_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/accel_engine.h"
+#include "spdk/queue.h"
+
+struct spdk_accel_task {
+ spdk_accel_completion_cb cb;
+ void *cb_arg;
+ uint8_t offload_ctx[0];
+};
+
+struct spdk_accel_engine {
+ uint64_t (*get_capabilities)(void);
+ int (*copy)(struct spdk_io_channel *ch, void *dst, void *src,
+ uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+ int (*dualcast)(struct spdk_io_channel *ch, void *dst1, void *dst2, void *src,
+ uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+ uint32_t (*batch_get_max)(void);
+ struct spdk_accel_batch *(*batch_create)(struct spdk_io_channel *ch);
+ int (*batch_prep_copy)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+ void *dst, void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+ int (*batch_prep_dualcast)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+ void *dst1, void *dst2, void *src, uint64_t nbytes,
+ spdk_accel_completion_cb cb_fn, void *cb_arg);
+ int (*batch_prep_compare)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+ void *src1, void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+ int (*batch_prep_fill)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+ void *dst, uint8_t fill, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+ int (*batch_prep_crc32c)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+ uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes,
+ spdk_accel_completion_cb cb_fn, void *cb_arg);
+ int (*batch_submit)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+ spdk_accel_completion_cb cb_fn, void *cb_arg);
+ int (*batch_cancel)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch);
+ int (*compare)(struct spdk_io_channel *ch, void *src1, void *src2,
+ uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+ int (*fill)(struct spdk_io_channel *ch, void *dst, uint8_t fill,
+ uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+ int (*crc32c)(struct spdk_io_channel *ch, uint32_t *dst, void *src,
+ uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+ struct spdk_io_channel *(*get_io_channel)(void);
+};
+
+struct spdk_accel_module_if {
+ /** Initialization function for the module. Called by the spdk
+ * application during startup.
+ *
+ * Modules are required to define this function.
+ */
+ int (*module_init)(void);
+
+ /** Finish function for the module. Called by the spdk application
+ * before the spdk application exits to perform any necessary cleanup.
+ *
+ * Modules are not required to define this function.
+ */
+ void (*module_fini)(void *ctx);
+
+ /** Function called to return a text string representing the
+ * module's configuration options for inclusion in an
+ * spdk configuration file.
+ */
+ void (*config_text)(FILE *fp);
+
+ /**
+ * Write Acceleration module configuration into provided JSON context.
+ */
+ void (*write_config_json)(struct spdk_json_write_ctx *w);
+
+ /**
+ * Returns the allocation size required for the modules to use for context.
+ */
+ size_t (*get_ctx_size)(void);
+
+ TAILQ_ENTRY(spdk_accel_module_if) tailq;
+};
+
+void spdk_accel_hw_engine_register(struct spdk_accel_engine *accel_engine);
+void spdk_accel_module_list_add(struct spdk_accel_module_if *accel_module);
+
+#define SPDK_ACCEL_MODULE_REGISTER(init_fn, fini_fn, config_fn, config_json, ctx_size_fn) \
+ static struct spdk_accel_module_if init_fn ## _if = { \
+ .module_init = init_fn, \
+ .module_fini = fini_fn, \
+ .config_text = config_fn, \
+ .write_config_json = config_json, \
+ .get_ctx_size = ctx_size_fn, \
+ }; \
+ __attribute__((constructor)) static void init_fn ## _init(void) \
+ { \
+ spdk_accel_module_list_add(&init_fn ## _if); \
+ }
+
+#endif
diff --git a/src/spdk/include/spdk_internal/assert.h b/src/spdk/include/spdk_internal/assert.h
new file mode 100644
index 000000000..7e4c45070
--- /dev/null
+++ b/src/spdk/include/spdk_internal/assert.h
@@ -0,0 +1,55 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_ASSERT_H
+#define SPDK_INTERNAL_ASSERT_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "spdk/assert.h"
+
+#if !defined(DEBUG) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))
+#define SPDK_UNREACHABLE() __builtin_unreachable()
+#else
+#define SPDK_UNREACHABLE() abort()
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_INTERNAL_ASSERT_H */
diff --git a/src/spdk/include/spdk_internal/event.h b/src/spdk/include/spdk_internal/event.h
new file mode 100644
index 000000000..2d88d08ba
--- /dev/null
+++ b/src/spdk/include/spdk_internal/event.h
@@ -0,0 +1,197 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_EVENT_H
+#define SPDK_INTERNAL_EVENT_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "spdk/event.h"
+#include "spdk/json.h"
+#include "spdk/thread.h"
+#include "spdk/util.h"
+
+struct spdk_event {
+ uint32_t lcore;
+ spdk_event_fn fn;
+ void *arg1;
+ void *arg2;
+};
+
+enum spdk_reactor_state {
+ SPDK_REACTOR_STATE_UNINITIALIZED = 0,
+ SPDK_REACTOR_STATE_INITIALIZED = 1,
+ SPDK_REACTOR_STATE_RUNNING = 2,
+ SPDK_REACTOR_STATE_EXITING = 3,
+ SPDK_REACTOR_STATE_SHUTDOWN = 4,
+};
+
+struct spdk_lw_thread {
+ TAILQ_ENTRY(spdk_lw_thread) link;
+ bool resched;
+ uint64_t tsc_start;
+};
+
+struct spdk_reactor {
+ /* Lightweight threads running on this reactor */
+ TAILQ_HEAD(, spdk_lw_thread) threads;
+ uint32_t thread_count;
+
+ /* Logical core number for this reactor. */
+ uint32_t lcore;
+
+ struct {
+ uint32_t is_valid : 1;
+ uint32_t reserved : 31;
+ } flags;
+
+ uint64_t tsc_last;
+
+ struct spdk_ring *events;
+
+ /* The last known rusage values */
+ struct rusage rusage;
+ uint64_t last_rusage;
+
+ uint64_t busy_tsc;
+ uint64_t idle_tsc;
+} __attribute__((aligned(SPDK_CACHE_LINE_SIZE)));
+
+int spdk_reactors_init(void);
+void spdk_reactors_fini(void);
+
+void spdk_reactors_start(void);
+void spdk_reactors_stop(void *arg1);
+
+struct spdk_reactor *spdk_reactor_get(uint32_t lcore);
+
+/**
+ * Allocate and pass an event to each reactor, serially.
+ *
+ * The allocated event is processed asynchronously - i.e. spdk_for_each_reactor
+ * will return prior to `fn` being called on each reactor.
+ *
+ * \param fn This is the function that will be called on each reactor.
+ * \param arg1 Argument will be passed to fn when called.
+ * \param arg2 Argument will be passed to fn when called.
+ * \param cpl This will be called on the originating reactor after `fn` has been
+ * called on each reactor.
+ */
+void spdk_for_each_reactor(spdk_event_fn fn, void *arg1, void *arg2, spdk_event_fn cpl);
+
+struct spdk_subsystem {
+ const char *name;
+ /* User must call spdk_subsystem_init_next() when they are done with their initialization. */
+ void (*init)(void);
+ void (*fini)(void);
+ void (*config)(FILE *fp);
+
+ /**
+ * Write JSON configuration handler.
+ *
+ * \param w JSON write context
+ */
+ void (*write_config_json)(struct spdk_json_write_ctx *w);
+ TAILQ_ENTRY(spdk_subsystem) tailq;
+};
+
+struct spdk_subsystem *spdk_subsystem_find(const char *name);
+struct spdk_subsystem *spdk_subsystem_get_first(void);
+struct spdk_subsystem *spdk_subsystem_get_next(struct spdk_subsystem *cur_subsystem);
+
+struct spdk_subsystem_depend {
+ const char *name;
+ const char *depends_on;
+ TAILQ_ENTRY(spdk_subsystem_depend) tailq;
+};
+
+struct spdk_subsystem_depend *spdk_subsystem_get_first_depend(void);
+struct spdk_subsystem_depend *spdk_subsystem_get_next_depend(struct spdk_subsystem_depend
+ *cur_depend);
+
+void spdk_add_subsystem(struct spdk_subsystem *subsystem);
+void spdk_add_subsystem_depend(struct spdk_subsystem_depend *depend);
+
+typedef void (*spdk_subsystem_init_fn)(int rc, void *ctx);
+void spdk_subsystem_init(spdk_subsystem_init_fn cb_fn, void *cb_arg);
+void spdk_subsystem_fini(spdk_msg_fn cb_fn, void *cb_arg);
+void spdk_subsystem_init_next(int rc);
+void spdk_subsystem_fini_next(void);
+void spdk_subsystem_config(FILE *fp);
+void spdk_app_json_config_load(const char *json_config_file, const char *rpc_addr,
+ spdk_subsystem_init_fn cb_fn, void *cb_arg,
+ bool stop_on_error);
+
+/**
+ * Save pointed \c subsystem configuration to the JSON write context \c w. In case of
+ * error \c null is written to the JSON context.
+ *
+ * \param w JSON write context
+ * \param subsystem the subsystem to query
+ */
+void spdk_subsystem_config_json(struct spdk_json_write_ctx *w, struct spdk_subsystem *subsystem);
+
+void spdk_rpc_initialize(const char *listen_addr);
+void spdk_rpc_finish(void);
+
+/**
+ * \brief Register a new subsystem
+ */
+#define SPDK_SUBSYSTEM_REGISTER(_name) \
+ __attribute__((constructor)) static void _name ## _register(void) \
+ { \
+ spdk_add_subsystem(&_name); \
+ }
+
+/**
+ * \brief Declare that a subsystem depends on another subsystem.
+ */
+#define SPDK_SUBSYSTEM_DEPEND(_name, _depends_on) \
+ static struct spdk_subsystem_depend __subsystem_ ## _name ## _depend_on ## _depends_on = { \
+ .name = #_name, \
+ .depends_on = #_depends_on, \
+ }; \
+ __attribute__((constructor)) static void _name ## _depend_on ## _depends_on(void) \
+ { \
+ spdk_add_subsystem_depend(&__subsystem_ ## _name ## _depend_on ## _depends_on); \
+ }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_INTERNAL_EVENT_H */
diff --git a/src/spdk/include/spdk_internal/idxd.h b/src/spdk/include/spdk_internal/idxd.h
new file mode 100644
index 000000000..17db2405d
--- /dev/null
+++ b/src/spdk/include/spdk_internal/idxd.h
@@ -0,0 +1,74 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __IDXD_INTERNAL_H__
+#define __IDXD_INTERNAL_H__
+
+#include "spdk/stdinc.h"
+
+#include "spdk/idxd.h"
+#include "spdk/queue.h"
+#include "spdk/mmio.h"
+#include "spdk/bit_array.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define IDXD_MAX_CONFIG_NUM 1
+
+enum dsa_opcode {
+ IDXD_OPCODE_NOOP = 0,
+ IDXD_OPCODE_BATCH = 1,
+ IDXD_OPCODE_DRAIN = 2,
+ IDXD_OPCODE_MEMMOVE = 3,
+ IDXD_OPCODE_MEMFILL = 4,
+ IDXD_OPCODE_COMPARE = 5,
+ IDXD_OPCODE_COMPVAL = 6,
+ IDXD_OPCODE_CR_DELTA = 7,
+ IDXD_OPCODE_AP_DELTA = 8,
+ IDXD_OPCODE_DUALCAST = 9,
+ IDXD_OPCODE_CRC32C_GEN = 16,
+ IDXD_OPCODE_COPY_CRC = 17,
+ IDXD_OPCODE_DIF_CHECK = 18,
+ IDXD_OPCODE_DIF_INS = 19,
+ IDXD_OPCODE_DIF_STRP = 20,
+ IDXD_OPCODE_DIF_UPDT = 21,
+ IDXD_OPCODE_CFLUSH = 32,
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __IDXD_INTERNAL_H__ */
diff --git a/src/spdk/include/spdk_internal/log.h b/src/spdk/include/spdk_internal/log.h
new file mode 100644
index 000000000..0993d1016
--- /dev/null
+++ b/src/spdk/include/spdk_internal/log.h
@@ -0,0 +1,108 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * Logging interfaces
+ */
+
+#ifndef SPDK_INTERNAL_LOG_H
+#define SPDK_INTERNAL_LOG_H
+
+#include "spdk/log.h"
+#include "spdk/queue.h"
+
+extern enum spdk_log_level g_spdk_log_level;
+extern enum spdk_log_level g_spdk_log_print_level;
+extern enum spdk_log_level g_spdk_log_backtrace_level;
+
+struct spdk_log_flag {
+ TAILQ_ENTRY(spdk_log_flag) tailq;
+ const char *name;
+ bool enabled;
+};
+
+void spdk_log_register_flag(const char *name, struct spdk_log_flag *flag);
+
+struct spdk_log_flag *spdk_log_get_first_flag(void);
+struct spdk_log_flag *spdk_log_get_next_flag(struct spdk_log_flag *flag);
+
+#define SPDK_LOG_REGISTER_COMPONENT(str, flag) \
+struct spdk_log_flag flag = { \
+ .enabled = false, \
+ .name = str, \
+}; \
+__attribute__((constructor)) static void register_flag_##flag(void) \
+{ \
+ spdk_log_register_flag(str, &flag); \
+}
+
+#define SPDK_INFOLOG(FLAG, ...) \
+ do { \
+ extern struct spdk_log_flag FLAG; \
+ if (FLAG.enabled) { \
+ spdk_log(SPDK_LOG_INFO, __FILE__, __LINE__, __func__, __VA_ARGS__); \
+ } \
+ } while (0)
+
+#ifdef DEBUG
+
+#define SPDK_DEBUGLOG(FLAG, ...) \
+ do { \
+ extern struct spdk_log_flag FLAG; \
+ if (FLAG.enabled) { \
+ spdk_log(SPDK_LOG_DEBUG, __FILE__, __LINE__, __func__, __VA_ARGS__); \
+ } \
+ } while (0)
+
+#define SPDK_LOGDUMP(FLAG, LABEL, BUF, LEN) \
+ do { \
+ extern struct spdk_log_flag FLAG; \
+ if ((FLAG.enabled) && (LEN)) { \
+ spdk_log_dump(stderr, (LABEL), (BUF), (LEN)); \
+ } \
+ } while (0)
+
+#else
+#define SPDK_DEBUGLOG(...) do { } while (0)
+#define SPDK_LOGDUMP(...) do { } while (0)
+#endif
+
+#define SPDK_ERRLOGDUMP(LABEL, BUF, LEN) \
+ do { \
+ if ((LEN)) { \
+ spdk_log_dump(stderr, (LABEL), (BUF), (LEN)); \
+ } \
+ } while (0)
+
+#endif /* SPDK_INTERNAL_LOG_H */
diff --git a/src/spdk/include/spdk_internal/lvolstore.h b/src/spdk/include/spdk_internal/lvolstore.h
new file mode 100644
index 000000000..f82157e53
--- /dev/null
+++ b/src/spdk/include/spdk_internal/lvolstore.h
@@ -0,0 +1,128 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_LVOLSTORE_H
+#define SPDK_INTERNAL_LVOLSTORE_H
+
+#include "spdk/blob.h"
+#include "spdk/lvol.h"
+#include "spdk/uuid.h"
+#include "spdk/bdev_module.h"
+
+/* Default size of blobstore cluster */
+#define SPDK_LVS_OPTS_CLUSTER_SZ (4 * 1024 * 1024)
+
+/* UUID + '_' + blobid (20 characters for uint64_t).
+ * Null terminator is already included in SPDK_UUID_STRING_LEN. */
+#define SPDK_LVOL_UNIQUE_ID_MAX (SPDK_UUID_STRING_LEN + 1 + 20)
+
+struct spdk_lvs_req {
+ spdk_lvs_op_complete cb_fn;
+ void *cb_arg;
+ struct spdk_lvol_store *lvol_store;
+ int lvserrno;
+};
+
+struct spdk_lvol_req {
+ spdk_lvol_op_complete cb_fn;
+ void *cb_arg;
+ struct spdk_lvol *lvol;
+ size_t sz;
+ struct spdk_io_channel *channel;
+ char name[SPDK_LVOL_NAME_MAX];
+};
+
+struct spdk_lvs_with_handle_req {
+ spdk_lvs_op_with_handle_complete cb_fn;
+ void *cb_arg;
+ struct spdk_lvol_store *lvol_store;
+ struct spdk_bs_dev *bs_dev;
+ struct spdk_bdev *base_bdev;
+ int lvserrno;
+};
+
+struct spdk_lvs_destroy_req {
+ spdk_lvs_op_complete cb_fn;
+ void *cb_arg;
+ struct spdk_lvol_store *lvs;
+};
+
+struct spdk_lvol_with_handle_req {
+ spdk_lvol_op_with_handle_complete cb_fn;
+ void *cb_arg;
+ struct spdk_lvol *lvol;
+};
+
+struct spdk_lvol_store {
+ struct spdk_bs_dev *bs_dev;
+ struct spdk_blob_store *blobstore;
+ struct spdk_blob *super_blob;
+ spdk_blob_id super_blob_id;
+ struct spdk_uuid uuid;
+ int lvol_count;
+ int lvols_opened;
+ bool destruct;
+ TAILQ_HEAD(, spdk_lvol) lvols;
+ TAILQ_HEAD(, spdk_lvol) pending_lvols;
+ bool on_list;
+ TAILQ_ENTRY(spdk_lvol_store) link;
+ char name[SPDK_LVS_NAME_MAX];
+ char new_name[SPDK_LVS_NAME_MAX];
+};
+
+struct spdk_lvol {
+ struct spdk_lvol_store *lvol_store;
+ struct spdk_blob *blob;
+ spdk_blob_id blob_id;
+ char unique_id[SPDK_LVOL_UNIQUE_ID_MAX];
+ char name[SPDK_LVOL_NAME_MAX];
+ struct spdk_uuid uuid;
+ char uuid_str[SPDK_UUID_STRING_LEN];
+ bool thin_provision;
+ struct spdk_bdev *bdev;
+ int ref_count;
+ bool action_in_progress;
+ enum blob_clear_method clear_method;
+ TAILQ_ENTRY(spdk_lvol) link;
+};
+
+struct lvol_store_bdev *vbdev_lvol_store_first(void);
+struct lvol_store_bdev *vbdev_lvol_store_next(struct lvol_store_bdev *prev);
+
+void spdk_lvol_resize(struct spdk_lvol *lvol, uint64_t sz, spdk_lvol_op_complete cb_fn,
+ void *cb_arg);
+
+void spdk_lvol_set_read_only(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn,
+ void *cb_arg);
+
+#endif /* SPDK_INTERNAL_LVOLSTORE_H */
diff --git a/src/spdk/include/spdk_internal/mock.h b/src/spdk/include/spdk_internal/mock.h
new file mode 100644
index 000000000..8de44ae55
--- /dev/null
+++ b/src/spdk/include/spdk_internal/mock.h
@@ -0,0 +1,135 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_MOCK_H
+#define SPDK_INTERNAL_MOCK_H
+
+#include "spdk/stdinc.h"
+
+#define MOCK_STRUCT_INIT(...) \
+ { __VA_ARGS__ }
+
+#define DEFINE_RETURN_MOCK(fn, ret) \
+ bool ut_ ## fn ## _mocked = false; \
+ ret ut_ ## fn
+
+/*
+ * For controlling mocked function behavior, setting
+ * and getting values from the stub, the _P macros are
+ * for mocking functions that return pointer values.
+ */
+#define MOCK_SET(fn, val) \
+ ut_ ## fn ## _mocked = true; \
+ ut_ ## fn = val
+
+#define MOCK_GET(fn) \
+ ut_ ## fn
+
+#define MOCK_CLEAR(fn) \
+ ut_ ## fn ## _mocked = false
+
+#define MOCK_CLEAR_P(fn) \
+ ut_ ## fn ## _mocked = false; \
+ ut_ ## fn = NULL
+
+/* for proving to *certain* static analysis tools that we didn't reset the mock function. */
+#define MOCK_CLEARED_ASSERT(fn) \
+ SPDK_CU_ASSERT_FATAL(ut_ ## fn ## _mocked == false)
+
+/* for declaring function protoypes for wrappers */
+#define DECLARE_WRAPPER(fn, ret, args) \
+ extern bool ut_ ## fn ## _mocked; \
+ extern ret ut_ ## fn; \
+ ret __wrap_ ## fn args; ret __real_ ## fn args
+
+/* for defining the implmentation of wrappers for syscalls */
+#define DEFINE_WRAPPER(fn, ret, dargs, pargs) \
+ DEFINE_RETURN_MOCK(fn, ret); \
+ __attribute__((used)) ret __wrap_ ## fn dargs \
+ { \
+ if (!ut_ ## fn ## _mocked) { \
+ return __real_ ## fn pargs; \
+ } else { \
+ return MOCK_GET(fn); \
+ } \
+ }
+
+/* DEFINE_STUB is for defining the implmentation of stubs for SPDK funcs. */
+#define DEFINE_STUB(fn, ret, dargs, val) \
+ bool ut_ ## fn ## _mocked = true; \
+ ret ut_ ## fn = val; \
+ ret fn dargs; \
+ ret fn dargs \
+ { \
+ return MOCK_GET(fn); \
+ }
+
+/* DEFINE_STUB_V macro is for stubs that don't have a return value */
+#define DEFINE_STUB_V(fn, dargs) \
+ void fn dargs; \
+ void fn dargs \
+ { \
+ }
+
+#define HANDLE_RETURN_MOCK(fn) \
+ if (ut_ ## fn ## _mocked) { \
+ return ut_ ## fn; \
+ }
+
+
+/* declare wrapper protos (alphabetically please) here */
+DECLARE_WRAPPER(calloc, void *, (size_t nmemb, size_t size));
+
+DECLARE_WRAPPER(pthread_mutex_init, int,
+ (pthread_mutex_t *mtx, const pthread_mutexattr_t *attr));
+
+DECLARE_WRAPPER(pthread_mutexattr_init, int,
+ (pthread_mutexattr_t *attr));
+
+DECLARE_WRAPPER(recvmsg, ssize_t, (int sockfd, struct msghdr *msg, int flags));
+
+DECLARE_WRAPPER(sendmsg, ssize_t, (int sockfd, const struct msghdr *msg, int flags));
+
+DECLARE_WRAPPER(writev, ssize_t, (int fd, const struct iovec *iov, int iovcnt));
+
+/* unlink is done a bit differently. */
+extern char *g_unlink_path;
+extern void (*g_unlink_callback)(void);
+/* If g_unlink_path is NULL, __wrap_unlink will return ENOENT.
+ * If the __wrap_unlink() parameter does not match g_unlink_path, it will return ENOENT.
+ * If g_unlink_path does match, and g_unlink_callback has been set, g_unlink_callback will
+ * be called before returning 0.
+ */
+int __wrap_unlink(const char *path);
+
+#endif /* SPDK_INTERNAL_MOCK_H */
diff --git a/src/spdk/include/spdk_internal/nvme_tcp.h b/src/spdk/include/spdk_internal/nvme_tcp.h
new file mode 100644
index 000000000..7065bc060
--- /dev/null
+++ b/src/spdk/include/spdk_internal/nvme_tcp.h
@@ -0,0 +1,633 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_NVME_TCP_H
+#define SPDK_INTERNAL_NVME_TCP_H
+
+#include "spdk/likely.h"
+#include "spdk/sock.h"
+#include "spdk/dif.h"
+
+#define SPDK_CRC32C_XOR 0xffffffffUL
+#define SPDK_NVME_TCP_DIGEST_LEN 4
+#define SPDK_NVME_TCP_DIGEST_ALIGNMENT 4
+#define SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT 30
+#define SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 8
+
+/*
+ * Maximum number of SGL elements.
+ */
+#define NVME_TCP_MAX_SGL_DESCRIPTORS (16)
+
+#define MAKE_DIGEST_WORD(BUF, CRC32C) \
+ ( ((*((uint8_t *)(BUF)+0)) = (uint8_t)((uint32_t)(CRC32C) >> 0)), \
+ ((*((uint8_t *)(BUF)+1)) = (uint8_t)((uint32_t)(CRC32C) >> 8)), \
+ ((*((uint8_t *)(BUF)+2)) = (uint8_t)((uint32_t)(CRC32C) >> 16)), \
+ ((*((uint8_t *)(BUF)+3)) = (uint8_t)((uint32_t)(CRC32C) >> 24)))
+
+#define MATCH_DIGEST_WORD(BUF, CRC32C) \
+ ( ((((uint32_t) *((uint8_t *)(BUF)+0)) << 0) \
+ | (((uint32_t) *((uint8_t *)(BUF)+1)) << 8) \
+ | (((uint32_t) *((uint8_t *)(BUF)+2)) << 16) \
+ | (((uint32_t) *((uint8_t *)(BUF)+3)) << 24)) \
+ == (CRC32C))
+
+#define DGET32(B) \
+ ((( (uint32_t) *((uint8_t *)(B)+0)) << 0) \
+ | (((uint32_t) *((uint8_t *)(B)+1)) << 8) \
+ | (((uint32_t) *((uint8_t *)(B)+2)) << 16) \
+ | (((uint32_t) *((uint8_t *)(B)+3)) << 24))
+
+#define DSET32(B,D) \
+ (((*((uint8_t *)(B)+0)) = (uint8_t)((uint32_t)(D) >> 0)), \
+ ((*((uint8_t *)(B)+1)) = (uint8_t)((uint32_t)(D) >> 8)), \
+ ((*((uint8_t *)(B)+2)) = (uint8_t)((uint32_t)(D) >> 16)), \
+ ((*((uint8_t *)(B)+3)) = (uint8_t)((uint32_t)(D) >> 24)))
+
+typedef void (*nvme_tcp_qpair_xfer_complete_cb)(void *cb_arg);
+
+struct _nvme_tcp_sgl {
+ struct iovec *iov;
+ int iovcnt;
+ uint32_t iov_offset;
+ uint32_t total_size;
+};
+
+struct nvme_tcp_pdu {
+ union {
+ /* to hold error pdu data */
+ uint8_t raw[SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE];
+ struct spdk_nvme_tcp_common_pdu_hdr common;
+ struct spdk_nvme_tcp_ic_req ic_req;
+ struct spdk_nvme_tcp_term_req_hdr term_req;
+ struct spdk_nvme_tcp_cmd capsule_cmd;
+ struct spdk_nvme_tcp_h2c_data_hdr h2c_data;
+ struct spdk_nvme_tcp_ic_resp ic_resp;
+ struct spdk_nvme_tcp_rsp capsule_resp;
+ struct spdk_nvme_tcp_c2h_data_hdr c2h_data;
+ struct spdk_nvme_tcp_r2t_hdr r2t;
+
+ } hdr;
+
+ bool has_hdgst;
+ bool ddgst_enable;
+ uint8_t data_digest[SPDK_NVME_TCP_DIGEST_LEN];
+
+ uint8_t ch_valid_bytes;
+ uint8_t psh_valid_bytes;
+ uint8_t psh_len;
+
+ nvme_tcp_qpair_xfer_complete_cb cb_fn;
+ void *cb_arg;
+
+ /* The sock request ends with a 0 length iovec. Place the actual iovec immediately
+ * after it. There is a static assert below to check if the compiler inserted
+ * any unwanted padding */
+ struct spdk_sock_request sock_req;
+ struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS * 2];
+
+ struct iovec data_iov[NVME_TCP_MAX_SGL_DESCRIPTORS];
+ uint32_t data_iovcnt;
+ uint32_t data_len;
+
+ uint32_t readv_offset;
+ TAILQ_ENTRY(nvme_tcp_pdu) tailq;
+ uint32_t remaining;
+ uint32_t padding_len;
+ struct _nvme_tcp_sgl sgl;
+
+ struct spdk_dif_ctx *dif_ctx;
+
+ void *req; /* data tied to a tcp request */
+ void *qpair;
+};
+SPDK_STATIC_ASSERT(offsetof(struct nvme_tcp_pdu,
+ sock_req) + sizeof(struct spdk_sock_request) == offsetof(struct nvme_tcp_pdu, iov),
+ "Compiler inserted padding between iov and sock_req");
+
+enum nvme_tcp_pdu_recv_state {
+ /* Ready to wait for PDU */
+ NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY,
+
+ /* Active tqpair waiting for any PDU common header */
+ NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH,
+
+ /* Active tqpair waiting for any PDU specific header */
+ NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH,
+
+ /* Active tqpair waiting for a tcp request, only use in target side */
+ NVME_TCP_PDU_RECV_STATE_AWAIT_REQ,
+
+ /* Active tqpair waiting for payload */
+ NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD,
+
+ /* Active tqpair does not wait for payload */
+ NVME_TCP_PDU_RECV_STATE_ERROR,
+};
+
+enum nvme_tcp_error_codes {
+ NVME_TCP_PDU_IN_PROGRESS = 0,
+ NVME_TCP_CONNECTION_FATAL = -1,
+ NVME_TCP_PDU_FATAL = -2,
+};
+
+enum nvme_tcp_qpair_state {
+ NVME_TCP_QPAIR_STATE_INVALID = 0,
+ NVME_TCP_QPAIR_STATE_INITIALIZING = 1,
+ NVME_TCP_QPAIR_STATE_RUNNING = 2,
+ NVME_TCP_QPAIR_STATE_EXITING = 3,
+ NVME_TCP_QPAIR_STATE_EXITED = 4,
+};
+
+static const bool g_nvme_tcp_hdgst[] = {
+ [SPDK_NVME_TCP_PDU_TYPE_IC_REQ] = false,
+ [SPDK_NVME_TCP_PDU_TYPE_IC_RESP] = false,
+ [SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ] = false,
+ [SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ] = false,
+ [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD] = true,
+ [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP] = true,
+ [SPDK_NVME_TCP_PDU_TYPE_H2C_DATA] = true,
+ [SPDK_NVME_TCP_PDU_TYPE_C2H_DATA] = true,
+ [SPDK_NVME_TCP_PDU_TYPE_R2T] = true
+};
+
+static const bool g_nvme_tcp_ddgst[] = {
+ [SPDK_NVME_TCP_PDU_TYPE_IC_REQ] = false,
+ [SPDK_NVME_TCP_PDU_TYPE_IC_RESP] = false,
+ [SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ] = false,
+ [SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ] = false,
+ [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD] = true,
+ [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP] = false,
+ [SPDK_NVME_TCP_PDU_TYPE_H2C_DATA] = true,
+ [SPDK_NVME_TCP_PDU_TYPE_C2H_DATA] = true,
+ [SPDK_NVME_TCP_PDU_TYPE_R2T] = false
+};
+
+static uint32_t
+nvme_tcp_pdu_calc_header_digest(struct nvme_tcp_pdu *pdu)
+{
+ uint32_t crc32c;
+ uint32_t hlen = pdu->hdr.common.hlen;
+
+ crc32c = spdk_crc32c_update(&pdu->hdr.raw, hlen, ~0);
+ crc32c = crc32c ^ SPDK_CRC32C_XOR;
+ return crc32c;
+}
+
+static uint32_t
+_update_crc32c_iov(struct iovec *iov, int iovcnt, uint32_t crc32c)
+{
+ int i;
+
+ for (i = 0; i < iovcnt; i++) {
+ assert(iov[i].iov_base != NULL);
+ assert(iov[i].iov_len != 0);
+ crc32c = spdk_crc32c_update(iov[i].iov_base, iov[i].iov_len, crc32c);
+ }
+
+ return crc32c;
+}
+
+static uint32_t
+nvme_tcp_pdu_calc_data_digest(struct nvme_tcp_pdu *pdu)
+{
+ uint32_t crc32c = SPDK_CRC32C_XOR;
+ uint32_t mod;
+
+ assert(pdu->data_len != 0);
+
+ if (spdk_likely(!pdu->dif_ctx)) {
+ crc32c = _update_crc32c_iov(pdu->data_iov, pdu->data_iovcnt, crc32c);
+ } else {
+ spdk_dif_update_crc32c_stream(pdu->data_iov, pdu->data_iovcnt,
+ 0, pdu->data_len, &crc32c, pdu->dif_ctx);
+ }
+
+ mod = pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT;
+ if (mod != 0) {
+ uint32_t pad_length = SPDK_NVME_TCP_DIGEST_ALIGNMENT - mod;
+ uint8_t pad[3] = {0, 0, 0};
+
+ assert(pad_length > 0);
+ assert(pad_length <= sizeof(pad));
+ crc32c = spdk_crc32c_update(pad, pad_length, crc32c);
+ }
+ crc32c = crc32c ^ SPDK_CRC32C_XOR;
+ return crc32c;
+}
+
+static inline void
+_nvme_tcp_sgl_init(struct _nvme_tcp_sgl *s, struct iovec *iov, int iovcnt,
+ uint32_t iov_offset)
+{
+ s->iov = iov;
+ s->iovcnt = iovcnt;
+ s->iov_offset = iov_offset;
+ s->total_size = 0;
+}
+
+static inline void
+_nvme_tcp_sgl_advance(struct _nvme_tcp_sgl *s, uint32_t step)
+{
+ s->iov_offset += step;
+ while (s->iovcnt > 0) {
+ if (s->iov_offset < s->iov->iov_len) {
+ break;
+ }
+
+ s->iov_offset -= s->iov->iov_len;
+ s->iov++;
+ s->iovcnt--;
+ }
+}
+
+static inline void
+_nvme_tcp_sgl_get_buf(struct _nvme_tcp_sgl *s, void **_buf, uint32_t *_buf_len)
+{
+ if (_buf != NULL) {
+ *_buf = s->iov->iov_base + s->iov_offset;
+ }
+ if (_buf_len != NULL) {
+ *_buf_len = s->iov->iov_len - s->iov_offset;
+ }
+}
+
+static inline bool
+_nvme_tcp_sgl_append(struct _nvme_tcp_sgl *s, uint8_t *data, uint32_t data_len)
+{
+ if (s->iov_offset >= data_len) {
+ s->iov_offset -= data_len;
+ } else {
+ assert(s->iovcnt > 0);
+ s->iov->iov_base = data + s->iov_offset;
+ s->iov->iov_len = data_len - s->iov_offset;
+ s->total_size += data_len - s->iov_offset;
+ s->iov_offset = 0;
+ s->iov++;
+ s->iovcnt--;
+ if (s->iovcnt == 0) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static inline bool
+_nvme_tcp_sgl_append_multi(struct _nvme_tcp_sgl *s, struct iovec *iov, int iovcnt)
+{
+ int i;
+
+ for (i = 0; i < iovcnt; i++) {
+ if (!_nvme_tcp_sgl_append(s, iov[i].iov_base, iov[i].iov_len)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static inline uint32_t
+_get_iov_array_size(struct iovec *iov, int iovcnt)
+{
+ int i;
+ uint32_t size = 0;
+
+ for (i = 0; i < iovcnt; i++) {
+ size += iov[i].iov_len;
+ }
+
+ return size;
+}
+
+static inline bool
+_nvme_tcp_sgl_append_multi_with_md(struct _nvme_tcp_sgl *s, struct iovec *iov, int iovcnt,
+ uint32_t data_len, const struct spdk_dif_ctx *dif_ctx)
+{
+ int rc;
+ uint32_t mapped_len = 0;
+
+ if (s->iov_offset >= data_len) {
+ s->iov_offset -= _get_iov_array_size(iov, iovcnt);
+ } else {
+ rc = spdk_dif_set_md_interleave_iovs(s->iov, s->iovcnt, iov, iovcnt,
+ s->iov_offset, data_len - s->iov_offset,
+ &mapped_len, dif_ctx);
+ if (rc < 0) {
+ SPDK_ERRLOG("Failed to setup iovs for DIF insert/strip.\n");
+ return false;
+ }
+
+ s->total_size += mapped_len;
+ s->iov_offset = 0;
+ assert(s->iovcnt >= rc);
+ s->iovcnt -= rc;
+ s->iov += rc;
+
+ if (s->iovcnt == 0) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int
+nvme_tcp_build_iovs(struct iovec *iov, int iovcnt, struct nvme_tcp_pdu *pdu,
+ bool hdgst_enable, bool ddgst_enable, uint32_t *_mapped_length)
+{
+ uint32_t hlen, plen;
+ struct _nvme_tcp_sgl *sgl;
+
+ if (iovcnt == 0) {
+ return 0;
+ }
+
+ sgl = &pdu->sgl;
+ _nvme_tcp_sgl_init(sgl, iov, iovcnt, 0);
+ hlen = pdu->hdr.common.hlen;
+
+ /* Header Digest */
+ if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && hdgst_enable) {
+ hlen += SPDK_NVME_TCP_DIGEST_LEN;
+ }
+
+ plen = hlen;
+ if (!pdu->data_len) {
+ /* PDU header + possible header digest */
+ _nvme_tcp_sgl_append(sgl, (uint8_t *)&pdu->hdr.raw, hlen);
+ goto end;
+ }
+
+ /* Padding */
+ if (pdu->padding_len > 0) {
+ hlen += pdu->padding_len;
+ plen = hlen;
+ }
+
+ if (!_nvme_tcp_sgl_append(sgl, (uint8_t *)&pdu->hdr.raw, hlen)) {
+ goto end;
+ }
+
+ /* Data Segment */
+ plen += pdu->data_len;
+ if (spdk_likely(!pdu->dif_ctx)) {
+ if (!_nvme_tcp_sgl_append_multi(sgl, pdu->data_iov, pdu->data_iovcnt)) {
+ goto end;
+ }
+ } else {
+ if (!_nvme_tcp_sgl_append_multi_with_md(sgl, pdu->data_iov, pdu->data_iovcnt,
+ pdu->data_len, pdu->dif_ctx)) {
+ goto end;
+ }
+ }
+
+ /* Data Digest */
+ if (g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && ddgst_enable) {
+ plen += SPDK_NVME_TCP_DIGEST_LEN;
+ _nvme_tcp_sgl_append(sgl, pdu->data_digest, SPDK_NVME_TCP_DIGEST_LEN);
+ }
+
+ assert(plen == pdu->hdr.common.plen);
+
+end:
+ if (_mapped_length != NULL) {
+ *_mapped_length = sgl->total_size;
+ }
+
+ return iovcnt - sgl->iovcnt;
+}
+
+static int
+nvme_tcp_build_payload_iovs(struct iovec *iov, int iovcnt, struct nvme_tcp_pdu *pdu,
+ bool ddgst_enable, uint32_t *_mapped_length)
+{
+ struct _nvme_tcp_sgl *sgl;
+
+ if (iovcnt == 0) {
+ return 0;
+ }
+
+ sgl = &pdu->sgl;
+ _nvme_tcp_sgl_init(sgl, iov, iovcnt, pdu->readv_offset);
+
+ if (spdk_likely(!pdu->dif_ctx)) {
+ if (!_nvme_tcp_sgl_append_multi(sgl, pdu->data_iov, pdu->data_iovcnt)) {
+ goto end;
+ }
+ } else {
+ if (!_nvme_tcp_sgl_append_multi_with_md(sgl, pdu->data_iov, pdu->data_iovcnt,
+ pdu->data_len, pdu->dif_ctx)) {
+ goto end;
+ }
+ }
+
+ /* Data Digest */
+ if (ddgst_enable) {
+ _nvme_tcp_sgl_append(sgl, pdu->data_digest, SPDK_NVME_TCP_DIGEST_LEN);
+ }
+
+end:
+ if (_mapped_length != NULL) {
+ *_mapped_length = sgl->total_size;
+ }
+ return iovcnt - sgl->iovcnt;
+}
+
+static int
+nvme_tcp_read_data(struct spdk_sock *sock, int bytes,
+ void *buf)
+{
+ int ret;
+
+ ret = spdk_sock_recv(sock, buf, bytes);
+
+ if (ret > 0) {
+ return ret;
+ }
+
+ if (ret < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ return 0;
+ }
+
+ /* For connect reset issue, do not output error log */
+ if (errno != ECONNRESET) {
+ SPDK_ERRLOG("spdk_sock_recv() failed, errno %d: %s\n",
+ errno, spdk_strerror(errno));
+ }
+ }
+
+ /* connection closed */
+ return NVME_TCP_CONNECTION_FATAL;
+}
+
+static int
+nvme_tcp_readv_data(struct spdk_sock *sock, struct iovec *iov, int iovcnt)
+{
+ int ret;
+
+ assert(sock != NULL);
+ if (iov == NULL || iovcnt == 0) {
+ return 0;
+ }
+
+ if (iovcnt == 1) {
+ return nvme_tcp_read_data(sock, iov->iov_len, iov->iov_base);
+ }
+
+ ret = spdk_sock_readv(sock, iov, iovcnt);
+
+ if (ret > 0) {
+ return ret;
+ }
+
+ if (ret < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ return 0;
+ }
+
+ /* For connect reset issue, do not output error log */
+ if (errno != ECONNRESET) {
+ SPDK_ERRLOG("spdk_sock_readv() failed, errno %d: %s\n",
+ errno, spdk_strerror(errno));
+ }
+ }
+
+ /* connection closed */
+ return NVME_TCP_CONNECTION_FATAL;
+}
+
+
+static int
+nvme_tcp_read_payload_data(struct spdk_sock *sock, struct nvme_tcp_pdu *pdu)
+{
+ struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS + 1];
+ int iovcnt;
+
+ iovcnt = nvme_tcp_build_payload_iovs(iov, NVME_TCP_MAX_SGL_DESCRIPTORS + 1, pdu,
+ pdu->ddgst_enable, NULL);
+ assert(iovcnt >= 0);
+
+ return nvme_tcp_readv_data(sock, iov, iovcnt);
+}
+
+static void
+_nvme_tcp_pdu_set_data(struct nvme_tcp_pdu *pdu, void *data, uint32_t data_len)
+{
+ pdu->data_iov[0].iov_base = data;
+ pdu->data_iov[0].iov_len = data_len;
+ pdu->data_iovcnt = 1;
+}
+
+static void
+nvme_tcp_pdu_set_data(struct nvme_tcp_pdu *pdu, void *data, uint32_t data_len)
+{
+ _nvme_tcp_pdu_set_data(pdu, data, data_len);
+ pdu->data_len = data_len;
+}
+
+static void
+nvme_tcp_pdu_set_data_buf(struct nvme_tcp_pdu *pdu,
+ struct iovec *iov, int iovcnt,
+ uint32_t data_offset, uint32_t data_len)
+{
+ uint32_t buf_offset, buf_len, remain_len, len;
+ uint8_t *buf;
+ struct _nvme_tcp_sgl *pdu_sgl, buf_sgl;
+
+ pdu->data_len = data_len;
+
+ if (spdk_likely(!pdu->dif_ctx)) {
+ buf_offset = data_offset;
+ buf_len = data_len;
+ } else {
+ spdk_dif_ctx_set_data_offset(pdu->dif_ctx, data_offset);
+ spdk_dif_get_range_with_md(data_offset, data_len,
+ &buf_offset, &buf_len, pdu->dif_ctx);
+ }
+
+ if (iovcnt == 1) {
+ _nvme_tcp_pdu_set_data(pdu, (void *)((uint64_t)iov[0].iov_base + buf_offset), buf_len);
+ } else {
+ pdu_sgl = &pdu->sgl;
+
+ _nvme_tcp_sgl_init(pdu_sgl, pdu->data_iov, NVME_TCP_MAX_SGL_DESCRIPTORS, 0);
+ _nvme_tcp_sgl_init(&buf_sgl, iov, iovcnt, 0);
+
+ _nvme_tcp_sgl_advance(&buf_sgl, buf_offset);
+ remain_len = buf_len;
+
+ while (remain_len > 0) {
+ _nvme_tcp_sgl_get_buf(&buf_sgl, (void *)&buf, &len);
+ len = spdk_min(len, remain_len);
+
+ _nvme_tcp_sgl_advance(&buf_sgl, len);
+ remain_len -= len;
+
+ if (!_nvme_tcp_sgl_append(pdu_sgl, buf, len)) {
+ break;
+ }
+ }
+
+ assert(remain_len == 0);
+ assert(pdu_sgl->total_size == buf_len);
+
+ pdu->data_iovcnt = NVME_TCP_MAX_SGL_DESCRIPTORS - pdu_sgl->iovcnt;
+ }
+}
+
+static void
+nvme_tcp_pdu_calc_psh_len(struct nvme_tcp_pdu *pdu, bool hdgst_enable)
+{
+ uint8_t psh_len, pdo, padding_len;
+
+ psh_len = pdu->hdr.common.hlen;
+
+ if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && hdgst_enable) {
+ pdu->has_hdgst = true;
+ psh_len += SPDK_NVME_TCP_DIGEST_LEN;
+ if (pdu->hdr.common.plen > psh_len) {
+ pdo = pdu->hdr.common.pdo;
+ padding_len = pdo - psh_len;
+ if (padding_len > 0) {
+ psh_len = pdo;
+ }
+ }
+ }
+
+ psh_len -= sizeof(struct spdk_nvme_tcp_common_pdu_hdr);
+ pdu->psh_len = psh_len;
+}
+
+#endif /* SPDK_INTERNAL_NVME_TCP_H */
diff --git a/src/spdk/include/spdk_internal/rdma.h b/src/spdk/include/spdk_internal/rdma.h
new file mode 100644
index 000000000..4a6d5104b
--- /dev/null
+++ b/src/spdk/include/spdk_internal/rdma.h
@@ -0,0 +1,117 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_RDMA_H
+#define SPDK_RDMA_H
+
+#include <infiniband/verbs.h>
+#include <rdma/rdma_cma.h>
+#include <rdma/rdma_verbs.h>
+
+struct spdk_rdma_qp_init_attr {
+ void *qp_context;
+ struct ibv_cq *send_cq;
+ struct ibv_cq *recv_cq;
+ struct ibv_srq *srq;
+ struct ibv_qp_cap cap;
+ struct ibv_pd *pd;
+};
+
+struct spdk_rdma_send_wr_list {
+ struct ibv_send_wr *first;
+ struct ibv_send_wr *last;
+};
+
+struct spdk_rdma_qp {
+ struct ibv_qp *qp;
+ struct rdma_cm_id *cm_id;
+ struct spdk_rdma_send_wr_list send_wrs;
+};
+
+/**
+ * Create RDMA provider specific qpair
+ * \param cm_id Pointer to RDMACM cm_id
+ * \param qp_attr Pointer to qpair init attributes
+ * \return Pointer to a newly created qpair on success or NULL on failure
+ */
+struct spdk_rdma_qp *spdk_rdma_qp_create(struct rdma_cm_id *cm_id,
+ struct spdk_rdma_qp_init_attr *qp_attr);
+
+/**
+ * Accept a connection request. Called by the passive side (NVMEoF target)
+ * \param spdk_rdma_qp Pointer to a qpair
+ * \param conn_param Optional information needed to establish the connection
+ * \return 0 on success, errno on failure
+ */
+int spdk_rdma_qp_accept(struct spdk_rdma_qp *spdk_rdma_qp, struct rdma_conn_param *conn_param);
+
+/**
+ * Complete the connection process, must be called by the active
+ * side (NVMEoF initiator) upon receipt RDMA_CM_EVENT_CONNECT_RESPONSE
+ * \param spdk_rdma_qp Pointer to a qpair
+ * \return 0 on success, errno on failure
+ */
+int spdk_rdma_qp_complete_connect(struct spdk_rdma_qp *spdk_rdma_qp);
+
+/**
+ * Destroy RDMA provider specific qpair
+ * \param spdk_rdma_qp Pointer to qpair to be destroyed
+ */
+void spdk_rdma_qp_destroy(struct spdk_rdma_qp *spdk_rdma_qp);
+
+/**
+ * Disconnect a connection and transition assoiciated qpair to error state.
+ * Generates RDMA_CM_EVENT_DISCONNECTED on both connection sides
+ * \param spdk_rdma_qp Pointer to qpair to be destroyed
+ */
+int spdk_rdma_qp_disconnect(struct spdk_rdma_qp *spdk_rdma_qp);
+
+/**
+ * Append the given send wr structure to the qpair's outstanding sends list.
+ * This function accepts either a single Work Request or the first WR in a linked list.
+ *
+ * \param spdk_rdma_qp Pointer to SPDK RDMA qpair
+ * \param first Pointer to the first Work Request
+ * \return true if there were no outstanding WRs before, false otherwise
+ */
+bool spdk_rdma_qp_queue_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr *first);
+
+/**
+ * Submit all queued Work Request
+ * \param spdk_rdma_qp Pointer to SPDK RDMA qpair
+ * \param bad_wr Stores a pointer to the first failed WR if this function return nonzero value
+ * \return 0 on succes, errno on failure
+ */
+int spdk_rdma_qp_flush_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr **bad_wr);
+
+#endif /* SPDK_RDMA_H */
diff --git a/src/spdk/include/spdk_internal/sock.h b/src/spdk/include/spdk_internal/sock.h
new file mode 100644
index 000000000..d88d6bd03
--- /dev/null
+++ b/src/spdk/include/spdk_internal/sock.h
@@ -0,0 +1,227 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * TCP network implementation abstraction layer
+ */
+
+#ifndef SPDK_INTERNAL_SOCK_H
+#define SPDK_INTERNAL_SOCK_H
+
+#include "spdk/stdinc.h"
+#include "spdk/sock.h"
+#include "spdk/queue.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_EVENTS_PER_POLL 32
+#define DEFAULT_SOCK_PRIORITY 0
+#define MIN_SOCK_PIPE_SIZE 1024
+
+struct spdk_sock {
+ struct spdk_net_impl *net_impl;
+ struct spdk_sock_opts opts;
+ int cb_cnt;
+ spdk_sock_cb cb_fn;
+ void *cb_arg;
+ struct spdk_sock_group_impl *group_impl;
+ TAILQ_ENTRY(spdk_sock) link;
+
+ int max_iovcnt;
+ TAILQ_HEAD(, spdk_sock_request) queued_reqs;
+ TAILQ_HEAD(, spdk_sock_request) pending_reqs;
+ int queued_iovcnt;
+
+ struct {
+ uint8_t closed : 1;
+ uint8_t reserved : 7;
+ } flags;
+};
+
+struct spdk_sock_group {
+ STAILQ_HEAD(, spdk_sock_group_impl) group_impls;
+ void *ctx;
+};
+
+struct spdk_sock_group_impl {
+ struct spdk_net_impl *net_impl;
+ TAILQ_HEAD(, spdk_sock) socks;
+ STAILQ_ENTRY(spdk_sock_group_impl) link;
+ /* List of removed sockets. refreshed each time we poll the sock group. */
+ int num_removed_socks;
+ /* Unfortunately, we can't just keep a tailq of the sockets in case they are freed
+ * or added to another poll group later.
+ */
+ uintptr_t removed_socks[MAX_EVENTS_PER_POLL];
+};
+
+struct spdk_net_impl {
+ const char *name;
+ int priority;
+
+ int (*getaddr)(struct spdk_sock *sock, char *saddr, int slen, uint16_t *sport, char *caddr,
+ int clen, uint16_t *cport);
+ struct spdk_sock *(*connect)(const char *ip, int port, struct spdk_sock_opts *opts);
+ struct spdk_sock *(*listen)(const char *ip, int port, struct spdk_sock_opts *opts);
+ struct spdk_sock *(*accept)(struct spdk_sock *sock);
+ int (*close)(struct spdk_sock *sock);
+ ssize_t (*recv)(struct spdk_sock *sock, void *buf, size_t len);
+ ssize_t (*readv)(struct spdk_sock *sock, struct iovec *iov, int iovcnt);
+ ssize_t (*writev)(struct spdk_sock *sock, struct iovec *iov, int iovcnt);
+
+ void (*writev_async)(struct spdk_sock *sock, struct spdk_sock_request *req);
+ int (*flush)(struct spdk_sock *sock);
+
+ int (*set_recvlowat)(struct spdk_sock *sock, int nbytes);
+ int (*set_recvbuf)(struct spdk_sock *sock, int sz);
+ int (*set_sendbuf)(struct spdk_sock *sock, int sz);
+
+ bool (*is_ipv6)(struct spdk_sock *sock);
+ bool (*is_ipv4)(struct spdk_sock *sock);
+ bool (*is_connected)(struct spdk_sock *sock);
+
+ int (*get_placement_id)(struct spdk_sock *sock, int *placement_id);
+ struct spdk_sock_group_impl *(*group_impl_create)(void);
+ int (*group_impl_add_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock);
+ int (*group_impl_remove_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock);
+ int (*group_impl_poll)(struct spdk_sock_group_impl *group, int max_events,
+ struct spdk_sock **socks);
+ int (*group_impl_close)(struct spdk_sock_group_impl *group);
+
+ int (*get_opts)(struct spdk_sock_impl_opts *opts, size_t *len);
+ int (*set_opts)(const struct spdk_sock_impl_opts *opts, size_t len);
+
+ STAILQ_ENTRY(spdk_net_impl) link;
+};
+
+void spdk_net_impl_register(struct spdk_net_impl *impl, int priority);
+
+#define SPDK_NET_IMPL_REGISTER(name, impl, priority) \
+static void __attribute__((constructor)) net_impl_register_##name(void) \
+{ \
+ spdk_net_impl_register(impl, priority); \
+}
+
+static inline void
+spdk_sock_request_queue(struct spdk_sock *sock, struct spdk_sock_request *req)
+{
+ TAILQ_INSERT_TAIL(&sock->queued_reqs, req, internal.link);
+ sock->queued_iovcnt += req->iovcnt;
+}
+
+static inline void
+spdk_sock_request_pend(struct spdk_sock *sock, struct spdk_sock_request *req)
+{
+ TAILQ_REMOVE(&sock->queued_reqs, req, internal.link);
+ assert(sock->queued_iovcnt >= req->iovcnt);
+ sock->queued_iovcnt -= req->iovcnt;
+ TAILQ_INSERT_TAIL(&sock->pending_reqs, req, internal.link);
+}
+
+static inline int
+spdk_sock_request_put(struct spdk_sock *sock, struct spdk_sock_request *req, int err)
+{
+ bool closed;
+ int rc = 0;
+
+ TAILQ_REMOVE(&sock->pending_reqs, req, internal.link);
+
+ req->internal.offset = 0;
+
+ closed = sock->flags.closed;
+ sock->cb_cnt++;
+ req->cb_fn(req->cb_arg, err);
+ assert(sock->cb_cnt > 0);
+ sock->cb_cnt--;
+
+ if (sock->cb_cnt == 0 && !closed && sock->flags.closed) {
+ /* The user closed the socket in response to a callback above. */
+ rc = -1;
+ spdk_sock_close(&sock);
+ }
+
+ return rc;
+}
+
+static inline int
+spdk_sock_abort_requests(struct spdk_sock *sock)
+{
+ struct spdk_sock_request *req;
+ bool closed;
+ int rc = 0;
+
+ closed = sock->flags.closed;
+ sock->cb_cnt++;
+
+ req = TAILQ_FIRST(&sock->pending_reqs);
+ while (req) {
+ TAILQ_REMOVE(&sock->pending_reqs, req, internal.link);
+
+ req->cb_fn(req->cb_arg, -ECANCELED);
+
+ req = TAILQ_FIRST(&sock->pending_reqs);
+ }
+
+ req = TAILQ_FIRST(&sock->queued_reqs);
+ while (req) {
+ TAILQ_REMOVE(&sock->queued_reqs, req, internal.link);
+
+ assert(sock->queued_iovcnt >= req->iovcnt);
+ sock->queued_iovcnt -= req->iovcnt;
+
+ req->cb_fn(req->cb_arg, -ECANCELED);
+
+ req = TAILQ_FIRST(&sock->queued_reqs);
+ }
+ assert(sock->cb_cnt > 0);
+ sock->cb_cnt--;
+
+ assert(TAILQ_EMPTY(&sock->queued_reqs));
+ assert(TAILQ_EMPTY(&sock->pending_reqs));
+
+ if (sock->cb_cnt == 0 && !closed && sock->flags.closed) {
+ /* The user closed the socket in response to a callback above. */
+ rc = -1;
+ spdk_sock_close(&sock);
+ }
+
+ return rc;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_INTERNAL_SOCK_H */
diff --git a/src/spdk/include/spdk_internal/thread.h b/src/spdk/include/spdk_internal/thread.h
new file mode 100644
index 000000000..10bc4824c
--- /dev/null
+++ b/src/spdk/include/spdk_internal/thread.h
@@ -0,0 +1,136 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_THREAD_INTERNAL_H_
+#define SPDK_THREAD_INTERNAL_H_
+
+#include "spdk/stdinc.h"
+#include "spdk/thread.h"
+
+#define SPDK_MAX_POLLER_NAME_LEN 256
+#define SPDK_MAX_THREAD_NAME_LEN 256
+
+enum spdk_poller_state {
+ /* The poller is registered with a thread but not currently executing its fn. */
+ SPDK_POLLER_STATE_WAITING,
+
+ /* The poller is currently running its fn. */
+ SPDK_POLLER_STATE_RUNNING,
+
+ /* The poller was unregistered during the execution of its fn. */
+ SPDK_POLLER_STATE_UNREGISTERED,
+
+ /* The poller is in the process of being paused. It will be paused
+ * during the next time it's supposed to be executed.
+ */
+ SPDK_POLLER_STATE_PAUSING,
+
+ /* The poller is registered but currently paused. It's on the
+ * paused_pollers list.
+ */
+ SPDK_POLLER_STATE_PAUSED,
+};
+
+struct spdk_poller {
+ TAILQ_ENTRY(spdk_poller) tailq;
+
+ /* Current state of the poller; should only be accessed from the poller's thread. */
+ enum spdk_poller_state state;
+
+ uint64_t period_ticks;
+ uint64_t next_run_tick;
+ uint64_t run_count;
+ uint64_t busy_count;
+ spdk_poller_fn fn;
+ void *arg;
+ struct spdk_thread *thread;
+
+ char name[SPDK_MAX_POLLER_NAME_LEN + 1];
+};
+
+enum spdk_thread_state {
+ /* The thread is pocessing poller and message by spdk_thread_poll(). */
+ SPDK_THREAD_STATE_RUNNING,
+
+ /* The thread is in the process of termination. It reaps unregistering
+ * poller are releasing I/O channel.
+ */
+ SPDK_THREAD_STATE_EXITING,
+
+ /* The thread is exited. It is ready to call spdk_thread_destroy(). */
+ SPDK_THREAD_STATE_EXITED,
+};
+
+struct spdk_thread {
+ uint64_t tsc_last;
+ struct spdk_thread_stats stats;
+ /*
+ * Contains pollers actively running on this thread. Pollers
+ * are run round-robin. The thread takes one poller from the head
+ * of the ring, executes it, then puts it back at the tail of
+ * the ring.
+ */
+ TAILQ_HEAD(active_pollers_head, spdk_poller) active_pollers;
+ /**
+ * Contains pollers running on this thread with a periodic timer.
+ */
+ TAILQ_HEAD(timed_pollers_head, spdk_poller) timed_pollers;
+ /*
+ * Contains paused pollers. Pollers on this queue are waiting until
+ * they are resumed (in which case they're put onto the active/timer
+ * queues) or unregistered.
+ */
+ TAILQ_HEAD(paused_pollers_head, spdk_poller) paused_pollers;
+ struct spdk_ring *messages;
+ SLIST_HEAD(, spdk_msg) msg_cache;
+ size_t msg_cache_count;
+ spdk_msg_fn critical_msg;
+ uint64_t id;
+ enum spdk_thread_state state;
+
+ TAILQ_HEAD(, spdk_io_channel) io_channels;
+ TAILQ_ENTRY(spdk_thread) tailq;
+
+ char name[SPDK_MAX_THREAD_NAME_LEN + 1];
+ struct spdk_cpuset cpumask;
+ uint64_t exit_timeout_tsc;
+
+ /* User context allocated at the end */
+ uint8_t ctx[0];
+};
+
+const char *spdk_poller_state_str(enum spdk_poller_state state);
+
+const char *spdk_io_device_get_name(struct io_device *dev);
+
+#endif /* SPDK_THREAD_INTERNAL_H_ */
diff --git a/src/spdk/include/spdk_internal/uring.h b/src/spdk/include/spdk_internal/uring.h
new file mode 100644
index 000000000..ff22f11d4
--- /dev/null
+++ b/src/spdk/include/spdk_internal/uring.h
@@ -0,0 +1,51 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_URING_H
+#define SPDK_INTERNAL_URING_H
+
+#include <liburing.h>
+
+#ifndef __NR_sys_io_uring_enter
+#define __NR_sys_io_uring_enter 426
+#endif
+
+static int
+spdk_io_uring_enter(int ring_fd, unsigned int to_submit,
+ unsigned int min_complete, unsigned int flags)
+{
+ return syscall(__NR_sys_io_uring_enter, ring_fd, to_submit,
+ min_complete, flags, NULL, 0);
+}
+
+#endif /* SPDK_INTERNAL_URING_H */
diff --git a/src/spdk/include/spdk_internal/utf.h b/src/spdk/include/spdk_internal/utf.h
new file mode 100644
index 000000000..b2b1c3c45
--- /dev/null
+++ b/src/spdk/include/spdk_internal/utf.h
@@ -0,0 +1,325 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_UTF_H_
+#define SPDK_UTF_H_
+
+#include "spdk/stdinc.h"
+
+#include "spdk/endian.h"
+#include "spdk/likely.h"
+#include "spdk/string.h"
+
+static inline bool
+utf8_tail(uint8_t c)
+{
+ /* c >= 0x80 && c <= 0xBF, or binary 01xxxxxx */
+ return (c & 0xC0) == 0x80;
+}
+
+/*
+ * Check for a valid UTF-8 encoding of a single codepoint.
+ *
+ * \return Length of valid UTF-8 byte sequence, or negative if invalid.
+ */
+static inline int
+utf8_valid(const uint8_t *start, const uint8_t *end)
+{
+ const uint8_t *p = start;
+ uint8_t b0, b1, b2, b3;
+
+ if (p == end) {
+ return 0;
+ }
+
+ b0 = *p;
+
+ if (b0 <= 0x7F) {
+ return 1;
+ }
+
+ if (b0 <= 0xC1) {
+ /* Invalid start byte */
+ return -1;
+ }
+
+ if (++p == end) {
+ /* Not enough bytes left */
+ return -1;
+ }
+ b1 = *p;
+
+ if (b0 <= 0xDF) {
+ /* C2..DF 80..BF */
+ if (!utf8_tail(b1)) {
+ return -1;
+ }
+ return 2;
+ }
+
+ if (++p == end) {
+ /* Not enough bytes left */
+ return -1;
+ }
+ b2 = *p;
+
+ if (b0 == 0xE0) {
+ /* E0 A0..BF 80..BF */
+ if (b1 < 0xA0 || b1 > 0xBF || !utf8_tail(b2)) {
+ return -1;
+ }
+ return 3;
+ } else if (b0 == 0xED && b1 >= 0xA0) {
+ /*
+ * UTF-16 surrogate pairs use U+D800..U+DFFF, which would be encoded as
+ * ED A0..BF 80..BF in UTF-8; however, surrogate pairs are not allowed in UTF-8.
+ */
+ return -1;
+ } else if (b0 <= 0xEF) {
+ /* E1..EF 80..BF 80..BF */
+ if (!utf8_tail(b1) || !utf8_tail(b2)) {
+ return -1;
+ }
+ return 3;
+ }
+
+ if (++p == end) {
+ /* Not enough bytes left */
+ return -1;
+ }
+ b3 = *p;
+
+ if (b0 == 0xF0) {
+ /* F0 90..BF 80..BF 80..BF */
+ if (b1 < 0x90 || b1 > 0xBF || !utf8_tail(b2) || !utf8_tail(b3)) {
+ return -1;
+ }
+ return 4;
+ } else if (b0 <= 0xF3) {
+ /* F1..F3 80..BF 80..BF 80..BF */
+ if (!utf8_tail(b1) || !utf8_tail(b2) || !utf8_tail(b3)) {
+ return -1;
+ }
+ return 4;
+ } else if (b0 == 0xF4) {
+ /* F4 80..8F 80..BF 80..BF */
+ if (b1 < 0x80 || b1 > 0x8F || !utf8_tail(b2) || !utf8_tail(b3)) {
+ return -1;
+ }
+ return 4;
+ }
+
+ return -1;
+}
+
+static inline uint32_t
+utf8_decode_unsafe_1(const uint8_t *data)
+{
+ return data[0];
+}
+
+static inline uint32_t
+utf8_decode_unsafe_2(const uint8_t *data)
+{
+ uint32_t codepoint;
+
+ codepoint = ((data[0] & 0x1F) << 6);
+ codepoint |= (data[1] & 0x3F);
+
+ return codepoint;
+}
+
+static inline uint32_t
+utf8_decode_unsafe_3(const uint8_t *data)
+{
+ uint32_t codepoint;
+
+ codepoint = ((data[0] & 0x0F) << 12);
+ codepoint |= (data[1] & 0x3F) << 6;
+ codepoint |= (data[2] & 0x3F);
+
+ return codepoint;
+}
+
+static inline uint32_t
+utf8_decode_unsafe_4(const uint8_t *data)
+{
+ uint32_t codepoint;
+
+ codepoint = ((data[0] & 0x07) << 18);
+ codepoint |= (data[1] & 0x3F) << 12;
+ codepoint |= (data[2] & 0x3F) << 6;
+ codepoint |= (data[3] & 0x3F);
+
+ return codepoint;
+}
+
+/*
+ * Encode a single Unicode codepoint as UTF-8.
+ *
+ * buf must have at least 4 bytes of space available (hence unsafe).
+ *
+ * \return Number of bytes appended to buf, or negative if encoding failed.
+ */
+static inline int
+utf8_encode_unsafe(uint8_t *buf, uint32_t c)
+{
+ if (c <= 0x7F) {
+ buf[0] = c;
+ return 1;
+ } else if (c <= 0x7FF) {
+ buf[0] = 0xC0 | (c >> 6);
+ buf[1] = 0x80 | (c & 0x3F);
+ return 2;
+ } else if (c >= 0xD800 && c <= 0xDFFF) {
+ /* UTF-16 surrogate pairs - invalid in UTF-8 */
+ return -1;
+ } else if (c <= 0xFFFF) {
+ buf[0] = 0xE0 | (c >> 12);
+ buf[1] = 0x80 | ((c >> 6) & 0x3F);
+ buf[2] = 0x80 | (c & 0x3F);
+ return 3;
+ } else if (c <= 0x10FFFF) {
+ buf[0] = 0xF0 | (c >> 18);
+ buf[1] = 0x80 | ((c >> 12) & 0x3F);
+ buf[2] = 0x80 | ((c >> 6) & 0x3F);
+ buf[3] = 0x80 | (c & 0x3F);
+ return 4;
+ }
+ return -1;
+}
+
+static inline int
+utf8_codepoint_len(uint32_t c)
+{
+ if (c <= 0x7F) {
+ return 1;
+ } else if (c <= 0x7FF) {
+ return 2;
+ } else if (c >= 0xD800 && c <= 0xDFFF) {
+ /* UTF-16 surrogate pairs - invalid in UTF-8 */
+ return -1;
+ } else if (c <= 0xFFFF) {
+ return 3;
+ } else if (c <= 0x10FFFF) {
+ return 4;
+ }
+ return -1;
+}
+
+static inline bool
+utf16_valid_surrogate_high(uint32_t val)
+{
+ return val >= 0xD800 && val <= 0xDBFF;
+}
+
+static inline bool
+utf16_valid_surrogate_low(uint32_t val)
+{
+ return val >= 0xDC00 && val <= 0xDFFF;
+}
+
+/*
+ * Check for a valid UTF-16LE encoding of a single codepoint.
+ *
+ * \return Length of valid UTF-16LE sequence in 16-bit code units, or negative if invalid.
+ */
+static inline int
+utf16le_valid(const uint16_t *start, const uint16_t *end)
+{
+ const uint16_t *p = start;
+ uint16_t high, low;
+
+ if (p == end) {
+ return 0;
+ }
+
+ high = from_le16(p);
+
+ if (high <= 0xD7FF || high >= 0xE000) {
+ /* Single code unit in BMP */
+ return 1;
+ }
+
+ if (high >= 0xDC00) {
+ /* Low surrogate in first code unit - invalid */
+ return -1;
+ }
+
+ assert(utf16_valid_surrogate_high(high));
+
+ if (++p == end) {
+ /* Not enough code units left */
+ return -1;
+ }
+ low = from_le16(p);
+
+ if (!utf16_valid_surrogate_low(low)) {
+ return -1;
+ }
+
+ /* Valid surrogate pair */
+ return 2;
+}
+
+static inline uint32_t
+utf16_decode_surrogate_pair(uint32_t high, uint32_t low)
+{
+ uint32_t codepoint;
+
+ assert(utf16_valid_surrogate_high(high));
+ assert(utf16_valid_surrogate_low(low));
+
+ codepoint = low;
+ codepoint &= 0x3FF;
+ codepoint |= ((high & 0x3FF) << 10);
+ codepoint += 0x10000;
+
+ return codepoint;
+}
+
+static inline void
+utf16_encode_surrogate_pair(uint32_t codepoint, uint16_t *high, uint16_t *low)
+{
+ assert(codepoint >= 0x10000);
+ assert(codepoint <= 0x10FFFF);
+
+ codepoint -= 0x10000;
+ *high = 0xD800 | (codepoint >> 10);
+ *low = 0xDC00 | (codepoint & 0x3FF);
+
+ assert(utf16_valid_surrogate_high(*high));
+ assert(utf16_valid_surrogate_low(*low));
+}
+
+#endif
diff --git a/src/spdk/include/spdk_internal/vhost_user.h b/src/spdk/include/spdk_internal/vhost_user.h
new file mode 100644
index 000000000..92ed3b65b
--- /dev/null
+++ b/src/spdk/include/spdk_internal/vhost_user.h
@@ -0,0 +1,140 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Structures defined in the vhost-user specification
+ */
+
+#ifndef SPDK_VHOST_USER_H
+#define SPDK_VHOST_USER_H
+
+#include "spdk/stdinc.h"
+
+#include <linux/vhost.h>
+
+#ifndef VHOST_USER_MEMORY_MAX_NREGIONS
+#define VHOST_USER_MEMORY_MAX_NREGIONS 8
+#endif
+
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE 256
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_MQ
+#define VHOST_USER_PROTOCOL_F_MQ 0
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG 9
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD
+#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12
+#endif
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#endif
+
+enum vhost_user_request {
+ VHOST_USER_NONE = 0,
+ VHOST_USER_GET_FEATURES = 1,
+ VHOST_USER_SET_FEATURES = 2,
+ VHOST_USER_SET_OWNER = 3,
+ VHOST_USER_RESET_OWNER = 4,
+ VHOST_USER_SET_MEM_TABLE = 5,
+ VHOST_USER_SET_LOG_BASE = 6,
+ VHOST_USER_SET_LOG_FD = 7,
+ VHOST_USER_SET_VRING_NUM = 8,
+ VHOST_USER_SET_VRING_ADDR = 9,
+ VHOST_USER_SET_VRING_BASE = 10,
+ VHOST_USER_GET_VRING_BASE = 11,
+ VHOST_USER_SET_VRING_KICK = 12,
+ VHOST_USER_SET_VRING_CALL = 13,
+ VHOST_USER_SET_VRING_ERR = 14,
+ VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+ VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+ VHOST_USER_GET_QUEUE_NUM = 17,
+ VHOST_USER_SET_VRING_ENABLE = 18,
+ VHOST_USER_SEND_RARP = 19,
+ VHOST_USER_NET_SET_MTU = 20,
+ VHOST_USER_SET_SLAVE_REQ_FD = 21,
+ VHOST_USER_IOTLB_MSG = 22,
+ VHOST_USER_GET_CONFIG = 24,
+ VHOST_USER_SET_CONFIG = 25,
+ VHOST_USER_CRYPTO_CREATE_SESS = 26,
+ VHOST_USER_CRYPTO_CLOSE_SESS = 27,
+ VHOST_USER_POSTCOPY_ADVISE = 28,
+ VHOST_USER_POSTCOPY_LISTEN = 29,
+ VHOST_USER_POSTCOPY_END = 30,
+ VHOST_USER_MAX
+};
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+ uint32_t offset;
+ uint32_t size;
+ uint32_t flags;
+ uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
+/** Fixed-size vhost_memory struct */
+struct vhost_memory_padded {
+ uint32_t nregions;
+ uint32_t padding;
+ struct vhost_memory_region regions[VHOST_USER_MEMORY_MAX_NREGIONS];
+};
+
+struct vhost_user_msg {
+ enum vhost_user_request request;
+
+#define VHOST_USER_VERSION_MASK 0x3
+#define VHOST_USER_REPLY_MASK (0x1 << 2)
+ uint32_t flags;
+ uint32_t size; /**< the following payload size */
+ union {
+#define VHOST_USER_VRING_IDX_MASK 0xff
+#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
+ uint64_t u64;
+ struct vhost_vring_state state;
+ struct vhost_vring_addr addr;
+ struct vhost_memory_padded memory;
+ struct vhost_user_config cfg;
+ } payload;
+} __attribute((packed));
+
+#define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64)
+#define VHOST_USER_PAYLOAD_SIZE \
+ (sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE)
+
+#endif /* SPDK_VHOST_USER_H */
diff --git a/src/spdk/include/spdk_internal/virtio.h b/src/spdk/include/spdk_internal/virtio.h
new file mode 100644
index 000000000..c30013efe
--- /dev/null
+++ b/src/spdk/include/spdk_internal/virtio.h
@@ -0,0 +1,486 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_VIRTIO_H
+#define SPDK_VIRTIO_H
+
+#include "spdk/stdinc.h"
+
+#include <linux/virtio_ring.h>
+#include <linux/virtio_pci.h>
+#include <linux/virtio_config.h>
+
+#include "spdk_internal/log.h"
+#include "spdk/likely.h"
+#include "spdk/queue.h"
+#include "spdk/json.h"
+#include "spdk/thread.h"
+#include "spdk/pci_ids.h"
+#include "spdk/env.h"
+
+/**
+ * The maximum virtqueue size is 2^15. Use that value as the end of
+ * descriptor chain terminator since it will never be a valid index
+ * in the descriptor table. This is used to verify we are correctly
+ * handling vq_free_cnt.
+ */
+#define VQ_RING_DESC_CHAIN_END 32768
+
+#define SPDK_VIRTIO_MAX_VIRTQUEUES 0x100
+
+/* Extra status define for readability */
+#define VIRTIO_CONFIG_S_RESET 0
+
+struct virtio_dev_ops;
+
+struct virtio_dev {
+ struct virtqueue **vqs;
+
+ /** Name of this virtio dev set by backend */
+ char *name;
+
+ /** Fixed number of backend-specific non-I/O virtqueues. */
+ uint16_t fixed_queues_num;
+
+ /** Max number of virtqueues the host supports. */
+ uint16_t max_queues;
+
+ /** Common device & guest features. */
+ uint64_t negotiated_features;
+
+ int is_hw;
+
+ /** Modern/legacy virtio device flag. */
+ uint8_t modern;
+
+ /** Mutex for asynchronous virtqueue-changing operations. */
+ pthread_mutex_t mutex;
+
+ /** Backend-specific callbacks. */
+ const struct virtio_dev_ops *backend_ops;
+
+ /** Context for the backend ops */
+ void *ctx;
+};
+
+struct virtio_dev_ops {
+ int (*read_dev_cfg)(struct virtio_dev *hw, size_t offset,
+ void *dst, int len);
+ int (*write_dev_cfg)(struct virtio_dev *hw, size_t offset,
+ const void *src, int len);
+ uint8_t (*get_status)(struct virtio_dev *hw);
+ void (*set_status)(struct virtio_dev *hw, uint8_t status);
+
+ /**
+ * Get device features. The features might be already
+ * negotiated with driver (guest) features.
+ */
+ uint64_t (*get_features)(struct virtio_dev *vdev);
+
+ /**
+ * Negotiate and set device features.
+ * The negotiation can fail with return code -1.
+ * This function should also set vdev->negotiated_features field.
+ */
+ int (*set_features)(struct virtio_dev *vdev, uint64_t features);
+
+ /** Destruct virtio device */
+ void (*destruct_dev)(struct virtio_dev *vdev);
+
+ uint16_t (*get_queue_size)(struct virtio_dev *vdev, uint16_t queue_id);
+ int (*setup_queue)(struct virtio_dev *hw, struct virtqueue *vq);
+ void (*del_queue)(struct virtio_dev *hw, struct virtqueue *vq);
+ void (*notify_queue)(struct virtio_dev *hw, struct virtqueue *vq);
+
+ void (*dump_json_info)(struct virtio_dev *hw, struct spdk_json_write_ctx *w);
+ void (*write_json_config)(struct virtio_dev *hw, struct spdk_json_write_ctx *w);
+};
+
+struct vq_desc_extra {
+ void *cookie;
+ uint16_t ndescs;
+};
+
+struct virtqueue {
+ struct virtio_dev *vdev; /**< owner of this virtqueue */
+ struct vring vq_ring; /**< vring keeping desc, used and avail */
+ /**
+ * Last consumed descriptor in the used table,
+ * trails vq_ring.used->idx.
+ */
+ uint16_t vq_used_cons_idx;
+ uint16_t vq_nentries; /**< vring desc numbers */
+ uint16_t vq_free_cnt; /**< num of desc available */
+ uint16_t vq_avail_idx; /**< sync until needed */
+
+ void *vq_ring_virt_mem; /**< virtual address of vring */
+ unsigned int vq_ring_size;
+
+ uint64_t vq_ring_mem; /**< physical address of vring */
+
+ /**
+ * Head of the free chain in the descriptor table. If
+ * there are no free descriptors, this will be set to
+ * VQ_RING_DESC_CHAIN_END.
+ */
+ uint16_t vq_desc_head_idx;
+
+ /**
+ * Tail of the free chain in desc table. If
+ * there are no free descriptors, this will be set to
+ * VQ_RING_DESC_CHAIN_END.
+ */
+ uint16_t vq_desc_tail_idx;
+ uint16_t vq_queue_index; /**< PCI queue index */
+ uint16_t *notify_addr;
+
+ /** Thread that's polling this queue. */
+ struct spdk_thread *owner_thread;
+
+ uint16_t req_start;
+ uint16_t req_end;
+ uint16_t reqs_finished;
+
+ struct vq_desc_extra vq_descx[0];
+};
+
+enum spdk_virtio_desc_type {
+ SPDK_VIRTIO_DESC_RO = 0, /**< Read only */
+ SPDK_VIRTIO_DESC_WR = VRING_DESC_F_WRITE, /**< Write only */
+ /* TODO VIRTIO_DESC_INDIRECT */
+};
+
+/** Context for creating PCI virtio_devs */
+struct virtio_pci_ctx;
+
+/**
+ * Callback for creating virtio_dev from a PCI device.
+ * \param pci_ctx PCI context to be associated with a virtio_dev
+ * \param ctx context provided by the user
+ * \return 0 on success, -1 on error.
+ */
+typedef int (*virtio_pci_create_cb)(struct virtio_pci_ctx *pci_ctx, void *ctx);
+
+uint16_t virtio_recv_pkts(struct virtqueue *vq, void **io, uint32_t *len, uint16_t io_cnt);
+
+/**
+ * Start a new request on the current vring head position and associate it
+ * with an opaque cookie object. The previous request in given vq will be
+ * made visible to the device in hopes it can be processed early, but there's
+ * no guarantee it will be until the device is notified with \c
+ * virtqueue_req_flush. This behavior is simply an optimization and virtqueues
+ * must always be flushed. Empty requests (with no descriptors added) will be
+ * ignored. The device owning given virtqueue must be started.
+ *
+ * \param vq virtio queue
+ * \param cookie opaque object to associate with this request. Once the request
+ * is sent, processed and a response is received, the same object will be
+ * returned to the user after calling the virtio poll API.
+ * \param iovcnt number of required iovectors for the request. This can be
+ * higher than than the actual number of iovectors to be added.
+ * \return 0 on success or negative errno otherwise. If the `iovcnt` is
+ * greater than virtqueue depth, -EINVAL is returned. If simply not enough
+ * iovectors are available, -ENOMEM is returned.
+ */
+int virtqueue_req_start(struct virtqueue *vq, void *cookie, int iovcnt);
+
+/**
+ * Flush a virtqueue. This will notify the device if it's required.
+ * The device owning given virtqueue must be started.
+ *
+ * \param vq virtio queue
+ */
+void virtqueue_req_flush(struct virtqueue *vq);
+
+/**
+ * Abort the very last request in a virtqueue. This will restore virtqueue
+ * state to the point before the last request was created. Note that this
+ * is only effective if a queue hasn't been flushed yet. The device owning
+ * given virtqueue must be started.
+ *
+ * \param vq virtio queue
+ */
+void virtqueue_req_abort(struct virtqueue *vq);
+
+/**
+ * Add iovec chain to the last created request. This call does not provide any
+ * error-checking. The caller has to ensure that he doesn't add more iovs than
+ * what was specified during request creation. The device owning given virtqueue
+ * must be started.
+ *
+ * \param vq virtio queue
+ * \param iovs iovec array
+ * \param iovcnt number of iovs in iovec array
+ * \param desc_type type of all given iovectors
+ */
+void virtqueue_req_add_iovs(struct virtqueue *vq, struct iovec *iovs, uint16_t iovcnt,
+ enum spdk_virtio_desc_type desc_type);
+
+/**
+ * Construct a virtio device. The device will be in stopped state by default.
+ * Before doing any I/O, it has to be manually started via \c virtio_dev_restart.
+ *
+ * \param vdev memory for virtio device, must be zeroed
+ * \param name name for the virtio device
+ * \param ops backend callbacks
+ * \param ops_ctx argument for the backend callbacks
+ * \return zero on success, or negative error code otherwise
+ */
+int virtio_dev_construct(struct virtio_dev *vdev, const char *name,
+ const struct virtio_dev_ops *ops, void *ops_ctx);
+
+/**
+ * Reset the device and prepare it to be `virtio_dev_start`ed. This call
+ * will also renegotiate feature flags.
+ *
+ * \param vdev virtio device
+ * \param req_features features this driver supports. A VIRTIO_F_VERSION_1
+ * flag will be automatically appended, as legacy devices are not supported.
+ */
+int virtio_dev_reset(struct virtio_dev *vdev, uint64_t req_features);
+
+/**
+ * Notify the host to start processing this virtio device. This is
+ * a blocking call that won't return until the host has started.
+ * This will also allocate virtqueues.
+ *
+ * \param vdev virtio device
+ * \param max_queues number of queues to allocate. The max number of
+ * usable I/O queues is also limited by the host device. `vdev` will be
+ * started successfully even if the host supports less queues than requested.
+ * \param fixed_queue_num number of queues preceeding the first
+ * request queue. For Virtio-SCSI this is equal to 2, as there are
+ * additional event and control queues.
+ */
+int virtio_dev_start(struct virtio_dev *vdev, uint16_t max_queues,
+ uint16_t fixed_queues_num);
+
+/**
+ * Stop the host from processing the device. This is a blocking call
+ * that won't return until all outstanding I/O has been processed on
+ * the host (virtio device) side. In order to re-start the device, it
+ * has to be `virtio_dev_reset` first.
+ *
+ * \param vdev virtio device
+ */
+void virtio_dev_stop(struct virtio_dev *vdev);
+
+/**
+ * Destruct a virtio device. Note that it must be in the stopped state.
+ * The virtio_dev should be manually freed afterwards.
+ *
+ * \param vdev virtio device
+ */
+void virtio_dev_destruct(struct virtio_dev *vdev);
+
+/**
+ * Bind a virtqueue with given index to the current thread;
+ *
+ * This function is thread-safe.
+ *
+ * \param vdev vhost device
+ * \param index virtqueue index
+ * \return 0 on success, -1 in case a virtqueue with given index either
+ * does not exists or is already acquired.
+ */
+int virtio_dev_acquire_queue(struct virtio_dev *vdev, uint16_t index);
+
+/**
+ * Look for unused queue and bind it to the current thread. This will
+ * scan the queues in range from *start_index* (inclusive) up to
+ * vdev->max_queues (exclusive).
+ *
+ * This function is thread-safe.
+ *
+ * \param vdev vhost device
+ * \param start_index virtqueue index to start looking from
+ * \return index of acquired queue or -1 in case no unused queue in given range
+ * has been found
+ */
+int32_t virtio_dev_find_and_acquire_queue(struct virtio_dev *vdev, uint16_t start_index);
+
+/**
+ * Get thread that acquired given virtqueue.
+ *
+ * This function is thread-safe.
+ *
+ * \param vdev vhost device
+ * \param index index of virtqueue
+ * \return thread that acquired given virtqueue. If the queue is unused
+ * or doesn't exist a NULL is returned.
+ */
+struct spdk_thread *virtio_dev_queue_get_thread(struct virtio_dev *vdev, uint16_t index);
+
+/**
+ * Check if virtqueue with given index is acquired.
+ *
+ * This function is thread-safe.
+ *
+ * \param vdev vhost device
+ * \param index index of virtqueue
+ * \return virtqueue acquire status. in case of invalid index *false* is returned.
+ */
+bool virtio_dev_queue_is_acquired(struct virtio_dev *vdev, uint16_t index);
+
+/**
+ * Release previously acquired queue.
+ *
+ * This function must be called from the thread that acquired the queue.
+ *
+ * \param vdev vhost device
+ * \param index index of virtqueue to release
+ */
+void virtio_dev_release_queue(struct virtio_dev *vdev, uint16_t index);
+
+/**
+ * Get Virtio status flags.
+ *
+ * \param vdev virtio device
+ */
+uint8_t virtio_dev_get_status(struct virtio_dev *vdev);
+
+/**
+ * Set Virtio status flag. The flags have to be set in very specific order
+ * defined the VIRTIO 1.0 spec section 3.1.1. To unset the flags, stop the
+ * device or set \c VIRTIO_CONFIG_S_RESET status flag. There is no way to
+ * unset only particular flags.
+ *
+ * \param vdev virtio device
+ * \param flag flag to set
+ */
+void virtio_dev_set_status(struct virtio_dev *vdev, uint8_t flag);
+
+/**
+ * Write raw data into the device config at given offset. This call does not
+ * provide any error checking.
+ *
+ * \param vdev virtio device
+ * \param offset offset in bytes
+ * \param src pointer to data to copy from
+ * \param len length of data to copy in bytes
+ * \return 0 on success, negative errno otherwise
+ */
+int virtio_dev_write_dev_config(struct virtio_dev *vdev, size_t offset, const void *src, int len);
+
+/**
+ * Read raw data from the device config at given offset. This call does not
+ * provide any error checking.
+ *
+ * \param vdev virtio device
+ * \param offset offset in bytes
+ * \param dst pointer to buffer to copy data into
+ * \param len length of data to copy in bytes
+ * \return 0 on success, negative errno otherwise
+ */
+int virtio_dev_read_dev_config(struct virtio_dev *vdev, size_t offset, void *dst, int len);
+
+/**
+ * Get backend-specific ops for given device.
+ *
+ * \param vdev virtio device
+ */
+const struct virtio_dev_ops *virtio_dev_backend_ops(struct virtio_dev *vdev);
+
+/**
+ * Check if the device has negotiated given feature bit.
+ *
+ * \param vdev virtio device
+ * \param bit feature bit
+ */
+static inline bool
+virtio_dev_has_feature(struct virtio_dev *vdev, uint64_t bit)
+{
+ return !!(vdev->negotiated_features & (1ULL << bit));
+}
+
+/**
+ * Dump all device specific information into given json stream.
+ *
+ * \param vdev virtio device
+ * \param w json stream
+ */
+void virtio_dev_dump_json_info(struct virtio_dev *vdev, struct spdk_json_write_ctx *w);
+
+/**
+ * Enumerate all PCI Virtio devices of given type on the system.
+ *
+ * \param enum_cb a function to be called for each valid PCI device.
+ * If a virtio_dev is has been created, the callback should return 0.
+ * Returning any other value will cause the PCI context to be freed,
+ * making it unusable.
+ * \param enum_ctx additional opaque context to be passed into `enum_cb`
+ * \param pci_device_id PCI Device ID of devices to iterate through
+ */
+int virtio_pci_dev_enumerate(virtio_pci_create_cb enum_cb, void *enum_ctx,
+ uint16_t pci_device_id);
+
+/**
+ * Attach a PCI Virtio device of given type.
+ *
+ * \param create_cb callback to create a virtio_dev.
+ * If virtio_dev is has been created, the callback should return 0.
+ * Returning any other value will cause the PCI context to be freed,
+ * making it unusable.
+ * \param enum_ctx additional opaque context to be passed into `enum_cb`
+ * \param pci_device_id PCI Device ID of devices to iterate through
+ * \param pci_addr PCI address of the device to attach
+ */
+int virtio_pci_dev_attach(virtio_pci_create_cb create_cb, void *enum_ctx,
+ uint16_t pci_device_id, struct spdk_pci_addr *pci_addr);
+
+/**
+ * Connect to a vhost-user device and init corresponding virtio_dev struct.
+ * The virtio_dev will have to be freed with \c virtio_dev_free.
+ *
+ * \param vdev preallocated vhost device struct to operate on
+ * \param name name of this virtio device
+ * \param path path to the Unix domain socket of the vhost-user device
+ * \param queue_size size of each of the queues
+ * \return virtio device
+ */
+int virtio_user_dev_init(struct virtio_dev *vdev, const char *name, const char *path,
+ uint32_t queue_size);
+
+/**
+ * Initialize virtio_dev for a given PCI device.
+ * The virtio_dev has to be freed with \c virtio_dev_destruct.
+ *
+ * \param vdev preallocated vhost device struct to operate on
+ * \param name name of this virtio device
+ * \param pci_ctx context of the PCI device
+ * \return 0 on success, -1 on error.
+ */
+int virtio_pci_dev_init(struct virtio_dev *vdev, const char *name,
+ struct virtio_pci_ctx *pci_ctx);
+
+#endif /* SPDK_VIRTIO_H */