diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/include | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/include')
92 files changed, 32485 insertions, 0 deletions
diff --git a/src/spdk/include/Makefile b/src/spdk/include/Makefile new file mode 100644 index 000000000..5bf07bd0a --- /dev/null +++ b/src/spdk/include/Makefile @@ -0,0 +1,59 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +HEADERS := $(wildcard $(SPDK_ROOT_DIR)/include/spdk/*.h) +INSTALLED_HEADERS := $(patsubst $(SPDK_ROOT_DIR)/include%,$(DESTDIR)$(includedir)%,$(HEADERS)) + +$(INSTALLED_HEADERS): +ifeq ($(MAKECMDGOALS),install) + $(INSTALL_HEADER) +else + $(UNINSTALL_HEADER) +endif + +.PHONY: $(INSTALLED_HEADERS) + +all: + $(Q)cp $(SPDK_ROOT_DIR)/include/spdk/*.h $(SPDK_ROOT_DIR)/build/include/spdk/ + +clean: + @: + +install: $(INSTALLED_HEADERS) + +uninstall: $(INSTALLED_HEADERS) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/src/spdk/include/linux/virtio_blk.h b/src/spdk/include/linux/virtio_blk.h new file mode 100644 index 000000000..95c438312 --- /dev/null +++ b/src/spdk/include/linux/virtio_blk.h @@ -0,0 +1,201 @@ +#ifndef _LINUX_VIRTIO_BLK_H +#define _LINUX_VIRTIO_BLK_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ +#include <linux/types.h> +#include <linux/virtio_ids.h> +#include <linux/virtio_config.h> +#include <linux/virtio_types.h> + +/* Feature bits */ +#define VIRTIO_BLK_F_SIZE_MAX 1 /* Indicates maximum segment size */ +#define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */ +#define VIRTIO_BLK_F_GEOMETRY 4 /* Legacy geometry available */ +#define VIRTIO_BLK_F_RO 5 /* Disk is read-only */ +#define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available */ +#define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */ +#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ +#define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */ +#define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */ + +/* Legacy feature bits */ +#ifndef VIRTIO_BLK_NO_LEGACY +#define VIRTIO_BLK_F_BARRIER 0 /* Does host support barriers? */ +#define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */ +#define VIRTIO_BLK_F_FLUSH 9 /* Flush command supported */ +#define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */ +/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */ +#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH +#endif /* !VIRTIO_BLK_NO_LEGACY */ + +#define VIRTIO_BLK_ID_BYTES 20 /* ID string length */ + +struct virtio_blk_config { + /* The capacity (in 512-byte sectors). */ + __u64 capacity; + /* The maximum segment size (if VIRTIO_BLK_F_SIZE_MAX) */ + __u32 size_max; + /* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */ + __u32 seg_max; + /* geometry of the device (if VIRTIO_BLK_F_GEOMETRY) */ + struct virtio_blk_geometry { + __u16 cylinders; + __u8 heads; + __u8 sectors; + } geometry; + + /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */ + __u32 blk_size; + + /* the next 4 entries are guarded by VIRTIO_BLK_F_TOPOLOGY */ + /* exponent for physical block per logical block. */ + __u8 physical_block_exp; + /* alignment offset in logical blocks. */ + __u8 alignment_offset; + /* minimum I/O size without performance penalty in logical blocks. */ + __u16 min_io_size; + /* optimal sustained I/O size in logical blocks. */ + __u32 opt_io_size; + + /* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */ + __u8 wce; + __u8 unused; + + /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */ + __u16 num_queues; + + /* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */ + /* + * The maximum discard sectors (in 512-byte sectors) for + * one segment. + */ + __u32 max_discard_sectors; + /* + * The maximum number of discard segments in a + * discard command. + */ + __u32 max_discard_seg; + /* Discard commands must be aligned to this number of sectors. */ + __u32 discard_sector_alignment; + + /* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */ + /* + * The maximum number of write zeroes sectors (in 512-byte sectors) in + * one segment. + */ + __u32 max_write_zeroes_sectors; + /* + * The maximum number of segments in a write zeroes + * command. + */ + __u32 max_write_zeroes_seg; + /* + * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the + * deallocation of one or more of the sectors. + */ + __u8 write_zeroes_may_unmap; + + __u8 unused1[3]; +} __attribute__((packed)); + +/* + * Command types + * + * Usage is a bit tricky as some bits are used as flags and some are not. + * + * Rules: + * VIRTIO_BLK_T_OUT may be combined with VIRTIO_BLK_T_SCSI_CMD or + * VIRTIO_BLK_T_BARRIER. VIRTIO_BLK_T_FLUSH is a command of its own + * and may not be combined with any of the other flags. + */ + +/* These two define direction. */ +#define VIRTIO_BLK_T_IN 0 +#define VIRTIO_BLK_T_OUT 1 + +#ifndef VIRTIO_BLK_NO_LEGACY +/* This bit says it's a scsi command, not an actual read or write. */ +#define VIRTIO_BLK_T_SCSI_CMD 2 +#endif /* VIRTIO_BLK_NO_LEGACY */ + +/* Cache flush command */ +#define VIRTIO_BLK_T_FLUSH 4 + +/* Get device ID command */ +#define VIRTIO_BLK_T_GET_ID 8 + +/* Discard command */ +#define VIRTIO_BLK_T_DISCARD 11 + +/* Write zeroes command */ +#define VIRTIO_BLK_T_WRITE_ZEROES 13 + +#ifndef VIRTIO_BLK_NO_LEGACY +/* Barrier before this op. */ +#define VIRTIO_BLK_T_BARRIER 0x80000000 +#endif /* !VIRTIO_BLK_NO_LEGACY */ + +/* + * This comes first in the read scatter-gather list. + * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, + * this is the first element of the read scatter-gather list. + */ +struct virtio_blk_outhdr { + /* VIRTIO_BLK_T* */ + __virtio32 type; + /* io priority. */ + __virtio32 ioprio; + /* Sector (ie. 512 byte offset) */ + __virtio64 sector; +}; + +/* Unmap this range (only valid for write zeroes command) */ +#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP 0x00000001 + +/* Discard/write zeroes range for each request. */ +struct virtio_blk_discard_write_zeroes { + /* discard/write zeroes start sector */ + __le64 sector; + /* number of discard/write zeroes sectors */ + __le32 num_sectors; + /* flags for this range */ + __le32 flags; +}; + +#ifndef VIRTIO_BLK_NO_LEGACY +struct virtio_scsi_inhdr { + __virtio32 errors; + __virtio32 data_len; + __virtio32 sense_len; + __virtio32 residual; +}; +#endif /* !VIRTIO_BLK_NO_LEGACY */ + +/* And this is the final byte of the write scatter-gather list. */ +#define VIRTIO_BLK_S_OK 0 +#define VIRTIO_BLK_S_IOERR 1 +#define VIRTIO_BLK_S_UNSUPP 2 +#endif /* _LINUX_VIRTIO_BLK_H */ diff --git a/src/spdk/include/linux/virtio_config.h b/src/spdk/include/linux/virtio_config.h new file mode 100644 index 000000000..6c8e43a70 --- /dev/null +++ b/src/spdk/include/linux/virtio_config.h @@ -0,0 +1,91 @@ +#ifndef _LINUX_VIRTIO_CONFIG_H +#define _LINUX_VIRTIO_CONFIG_H +/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so + * anyone can use the definitions to implement compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ + +/* Virtio devices use a standardized configuration space to define their + * features and pass configuration information, but each implementation can + * store and access that space differently. */ +#include <linux/types.h> + +/* Status byte for guest to report progress, and synchronize features. */ +/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */ +#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1 +/* We have found a driver for the device. */ +#define VIRTIO_CONFIG_S_DRIVER 2 +/* Driver has used its parts of the config, and is happy */ +#define VIRTIO_CONFIG_S_DRIVER_OK 4 +/* Driver has finished configuring features */ +#define VIRTIO_CONFIG_S_FEATURES_OK 8 +/* Device entered invalid state, driver must reset it */ +#define VIRTIO_CONFIG_S_NEEDS_RESET 0x40 +/* We've given up on this device. */ +#define VIRTIO_CONFIG_S_FAILED 0x80 + +/* + * Virtio feature bits VIRTIO_TRANSPORT_F_START through + * VIRTIO_TRANSPORT_F_END are reserved for the transport + * being used (e.g. virtio_ring, virtio_pci etc.), the + * rest are per-device feature bits. + */ +#define VIRTIO_TRANSPORT_F_START 28 +#define VIRTIO_TRANSPORT_F_END 38 + +#ifndef VIRTIO_CONFIG_NO_LEGACY +/* Do we get callbacks when the ring is completely used, even if we've + * suppressed them? */ +#define VIRTIO_F_NOTIFY_ON_EMPTY 24 + +/* Can the device handle any descriptor layout? */ +#define VIRTIO_F_ANY_LAYOUT 27 +#endif /* VIRTIO_CONFIG_NO_LEGACY */ + +/* v1.0 compliant. */ +#define VIRTIO_F_VERSION_1 32 + +/* + * If clear - device has the IOMMU bypass quirk feature. + * If set - use platform tools to detect the IOMMU. + * + * Note the reverse polarity (compared to most other features), + * this is for compatibility with legacy systems. + */ +#define VIRTIO_F_IOMMU_PLATFORM 33 + +/* This feature indicates support for the packed virtqueue layout. */ +#define VIRTIO_F_RING_PACKED 34 + +/* + * This feature indicates that memory accesses by the driver and the + * device are ordered in a way described by the platform. + */ +#define VIRTIO_F_ORDER_PLATFORM 36 + +/* + * Does the device support Single Root I/O Virtualization? + */ +#define VIRTIO_F_SR_IOV 37 +#endif /* _LINUX_VIRTIO_CONFIG_H */ diff --git a/src/spdk/include/linux/virtio_pci.h b/src/spdk/include/linux/virtio_pci.h new file mode 100644 index 000000000..90007a1ab --- /dev/null +++ b/src/spdk/include/linux/virtio_pci.h @@ -0,0 +1,199 @@ +/* + * Virtio PCI driver + * + * This module allows virtio devices to be used over a virtual PCI device. + * This can be used with QEMU based VMMs like KVM or Xen. + * + * Copyright IBM Corp. 2007 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _LINUX_VIRTIO_PCI_H +#define _LINUX_VIRTIO_PCI_H + +#include <linux/types.h> + +#ifndef VIRTIO_PCI_NO_LEGACY + +/* A 32-bit r/o bitmask of the features supported by the host */ +#define VIRTIO_PCI_HOST_FEATURES 0 + +/* A 32-bit r/w bitmask of features activated by the guest */ +#define VIRTIO_PCI_GUEST_FEATURES 4 + +/* A 32-bit r/w PFN for the currently selected queue */ +#define VIRTIO_PCI_QUEUE_PFN 8 + +/* A 16-bit r/o queue size for the currently selected queue */ +#define VIRTIO_PCI_QUEUE_NUM 12 + +/* A 16-bit r/w queue selector */ +#define VIRTIO_PCI_QUEUE_SEL 14 + +/* A 16-bit r/w queue notifier */ +#define VIRTIO_PCI_QUEUE_NOTIFY 16 + +/* An 8-bit device status register. */ +#define VIRTIO_PCI_STATUS 18 + +/* An 8-bit r/o interrupt status register. Reading the value will return the + * current contents of the ISR and will also clear it. This is effectively + * a read-and-acknowledge. */ +#define VIRTIO_PCI_ISR 19 + +/* MSI-X registers: only enabled if MSI-X is enabled. */ +/* A 16-bit vector for configuration changes. */ +#define VIRTIO_MSI_CONFIG_VECTOR 20 +/* A 16-bit vector for selected queue notifications. */ +#define VIRTIO_MSI_QUEUE_VECTOR 22 + +/* The remaining space is defined by each driver as the per-driver + * configuration space */ +#define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24 : 20) +/* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */ +#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->msix_enabled) + +/* Virtio ABI version, this must match exactly */ +#define VIRTIO_PCI_ABI_VERSION 0 + +/* How many bits to shift physical queue address written to QUEUE_PFN. + * 12 is historical, and due to x86 page size. */ +#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 + +/* The alignment to use between consumer and producer parts of vring. + * x86 pagesize again. */ +#define VIRTIO_PCI_VRING_ALIGN 4096 + +#endif /* VIRTIO_PCI_NO_LEGACY */ + +/* The bit of the ISR which indicates a device configuration change. */ +#define VIRTIO_PCI_ISR_CONFIG 0x2 +/* Vector value used to disable MSI for queue */ +#define VIRTIO_MSI_NO_VECTOR 0xffff + +#ifndef VIRTIO_PCI_NO_MODERN + +/* IDs for different capabilities. Must all exist. */ + +/* Common configuration */ +#define VIRTIO_PCI_CAP_COMMON_CFG 1 +/* Notifications */ +#define VIRTIO_PCI_CAP_NOTIFY_CFG 2 +/* ISR access */ +#define VIRTIO_PCI_CAP_ISR_CFG 3 +/* Device specific configuration */ +#define VIRTIO_PCI_CAP_DEVICE_CFG 4 +/* PCI configuration access */ +#define VIRTIO_PCI_CAP_PCI_CFG 5 + +/* This is the PCI capability header: */ +struct virtio_pci_cap { + __u8 cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */ + __u8 cap_next; /* Generic PCI field: next ptr. */ + __u8 cap_len; /* Generic PCI field: capability length */ + __u8 cfg_type; /* Identifies the structure. */ + __u8 bar; /* Where to find it. */ + __u8 padding[3]; /* Pad to full dword. */ + __le32 offset; /* Offset within bar. */ + __le32 length; /* Length of the structure, in bytes. */ +}; + +struct virtio_pci_notify_cap { + struct virtio_pci_cap cap; + __le32 notify_off_multiplier; /* Multiplier for queue_notify_off. */ +}; + +/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */ +struct virtio_pci_common_cfg { + /* About the whole device. */ + __le32 device_feature_select; /* read-write */ + __le32 device_feature; /* read-only */ + __le32 guest_feature_select; /* read-write */ + __le32 guest_feature; /* read-write */ + __le16 msix_config; /* read-write */ + __le16 num_queues; /* read-only */ + __u8 device_status; /* read-write */ + __u8 config_generation; /* read-only */ + + /* About a specific virtqueue. */ + __le16 queue_select; /* read-write */ + __le16 queue_size; /* read-write, power of 2. */ + __le16 queue_msix_vector; /* read-write */ + __le16 queue_enable; /* read-write */ + __le16 queue_notify_off; /* read-only */ + __le32 queue_desc_lo; /* read-write */ + __le32 queue_desc_hi; /* read-write */ + __le32 queue_avail_lo; /* read-write */ + __le32 queue_avail_hi; /* read-write */ + __le32 queue_used_lo; /* read-write */ + __le32 queue_used_hi; /* read-write */ +}; + +/* Fields in VIRTIO_PCI_CAP_PCI_CFG: */ +struct virtio_pci_cfg_cap { + struct virtio_pci_cap cap; + __u8 pci_cfg_data[4]; /* Data for BAR access. */ +}; + +/* Macro versions of offsets for the Old Timers! */ +#define VIRTIO_PCI_CAP_VNDR 0 +#define VIRTIO_PCI_CAP_NEXT 1 +#define VIRTIO_PCI_CAP_LEN 2 +#define VIRTIO_PCI_CAP_CFG_TYPE 3 +#define VIRTIO_PCI_CAP_BAR 4 +#define VIRTIO_PCI_CAP_OFFSET 8 +#define VIRTIO_PCI_CAP_LENGTH 12 + +#define VIRTIO_PCI_NOTIFY_CAP_MULT 16 + +#define VIRTIO_PCI_COMMON_DFSELECT 0 +#define VIRTIO_PCI_COMMON_DF 4 +#define VIRTIO_PCI_COMMON_GFSELECT 8 +#define VIRTIO_PCI_COMMON_GF 12 +#define VIRTIO_PCI_COMMON_MSIX 16 +#define VIRTIO_PCI_COMMON_NUMQ 18 +#define VIRTIO_PCI_COMMON_STATUS 20 +#define VIRTIO_PCI_COMMON_CFGGENERATION 21 +#define VIRTIO_PCI_COMMON_Q_SELECT 22 +#define VIRTIO_PCI_COMMON_Q_SIZE 24 +#define VIRTIO_PCI_COMMON_Q_MSIX 26 +#define VIRTIO_PCI_COMMON_Q_ENABLE 28 +#define VIRTIO_PCI_COMMON_Q_NOFF 30 +#define VIRTIO_PCI_COMMON_Q_DESCLO 32 +#define VIRTIO_PCI_COMMON_Q_DESCHI 36 +#define VIRTIO_PCI_COMMON_Q_AVAILLO 40 +#define VIRTIO_PCI_COMMON_Q_AVAILHI 44 +#define VIRTIO_PCI_COMMON_Q_USEDLO 48 +#define VIRTIO_PCI_COMMON_Q_USEDHI 52 + +#endif /* VIRTIO_PCI_NO_MODERN */ + +#endif diff --git a/src/spdk/include/linux/virtio_ring.h b/src/spdk/include/linux/virtio_ring.h new file mode 100644 index 000000000..660138ffb --- /dev/null +++ b/src/spdk/include/linux/virtio_ring.h @@ -0,0 +1,218 @@ +#ifndef _LINUX_VIRTIO_RING_H +#define _LINUX_VIRTIO_RING_H +/* An interface for efficient virtio implementation, currently for use by KVM, + * but hopefully others soon. Do NOT change this since it will + * break existing servers and clients. + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS +'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright Rusty Russell IBM Corporation 2007. */ +#ifndef __KERNEL__ +#include <stdint.h> +#endif +#include <linux/types.h> +#include <linux/virtio_types.h> + +/* This marks a buffer as continuing via the next field. */ +#define VRING_DESC_F_NEXT 1 +/* This marks a buffer as write-only (otherwise read-only). */ +#define VRING_DESC_F_WRITE 2 +/* This means the buffer contains a list of buffer descriptors. */ +#define VRING_DESC_F_INDIRECT 4 + +/* + * Mark a descriptor as available or used in packed ring. + * Notice: they are defined as shifts instead of shifted values. + */ +#define VRING_PACKED_DESC_F_AVAIL 7 +#define VRING_PACKED_DESC_F_USED 15 + +/* The Host uses this in used->flags to advise the Guest: don't kick me when + * you add a buffer. It's unreliable, so it's simply an optimization. Guest + * will still kick if it's out of buffers. */ +#define VRING_USED_F_NO_NOTIFY 1 +/* The Guest uses this in avail->flags to advise the Host: don't interrupt me + * when you consume a buffer. It's unreliable, so it's simply an + * optimization. */ +#define VRING_AVAIL_F_NO_INTERRUPT 1 + +/* Enable events in packed ring. */ +#define VRING_PACKED_EVENT_FLAG_ENABLE 0x0 +/* Disable events in packed ring. */ +#define VRING_PACKED_EVENT_FLAG_DISABLE 0x1 +/* + * Enable events for a specific descriptor in packed ring. + * (as specified by Descriptor Ring Change Event Offset/Wrap Counter). + * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated. + */ +#define VRING_PACKED_EVENT_FLAG_DESC 0x2 + +/* + * Wrap counter bit shift in event suppression structure + * of packed ring. + */ +#define VRING_PACKED_EVENT_F_WRAP_CTR 15 + +/* We support indirect buffer descriptors */ +#define VIRTIO_RING_F_INDIRECT_DESC 28 + +/* The Guest publishes the used index for which it expects an interrupt + * at the end of the avail ring. Host should ignore the avail->flags field. */ +/* The Host publishes the avail index for which it expects a kick + * at the end of the used ring. Guest should ignore the used->flags field. */ +#define VIRTIO_RING_F_EVENT_IDX 29 + +/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ +struct vring_desc { + /* Address (guest-physical). */ + __virtio64 addr; + /* Length. */ + __virtio32 len; + /* The flags as indicated above. */ + __virtio16 flags; + /* We chain unused descriptors via this, too */ + __virtio16 next; +}; + +struct vring_avail { + __virtio16 flags; + __virtio16 idx; + __virtio16 ring[]; +}; + +/* u32 is used here for ids for padding reasons. */ +struct vring_used_elem { + /* Index of start of used descriptor chain. */ + __virtio32 id; + /* Total length of the descriptor chain which was used (written to) */ + __virtio32 len; +}; + +struct vring_used { + __virtio16 flags; + __virtio16 idx; + struct vring_used_elem ring[]; +}; + +struct vring { + unsigned int num; + + struct vring_desc *desc; + + struct vring_avail *avail; + + struct vring_used *used; +}; + +/* Alignment requirements for vring elements. + * When using pre-virtio 1.0 layout, these fall out naturally. + */ +#define VRING_AVAIL_ALIGN_SIZE 2 +#define VRING_USED_ALIGN_SIZE 4 +#define VRING_DESC_ALIGN_SIZE 16 + +/* The standard layout for the ring is a continuous chunk of memory which looks + * like this. We assume num is a power of 2. + * + * struct vring + * { + * The actual descriptors (16 bytes each) + * struct vring_desc desc[num]; + * + * A ring of available descriptor heads with free-running index. + * __virtio16 avail_flags; + * __virtio16 avail_idx; + * __virtio16 available[num]; + * __virtio16 used_event_idx; + * + * Padding to the next align boundary. + * char pad[]; + * + * A ring of used descriptor heads with free-running index. + * __virtio16 used_flags; + * __virtio16 used_idx; + * struct vring_used_elem used[num]; + * __virtio16 avail_event_idx; + * }; + */ +/* We publish the used event index at the end of the available ring, and vice + * versa. They are at the end for backwards compatibility. */ +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) +#define vring_avail_event(vr) (*(__virtio16 *)&(vr)->used->ring[(vr)->num]) + +static inline void vring_init(struct vring *vr, unsigned int num, void *p, + unsigned long align) +{ + vr->num = num; + vr->desc = p; + vr->avail = (struct vring_avail *)((char *)p + num * sizeof(struct vring_desc + )); + vr->used = (void *)(((uintptr_t)&vr->avail->ring[num] + sizeof(__virtio16) + + align - 1) & ~(align - 1)); +} + +static inline unsigned vring_size(unsigned int num, unsigned long align) +{ + return ((sizeof(struct vring_desc) * num + sizeof(__virtio16) * (3 + num) + + align - 1) & ~(align - 1)) + + sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num; +} + +/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ +/* Assuming a given event_idx value from the other side, if + * we have just incremented index from old to new_idx, + * should we trigger an event? */ +static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old) +{ + /* Note: Xen has similar logic for notification hold-off + * in include/xen/interface/io/ring.h with req_event and req_prod + * corresponding to event_idx + 1 and new_idx respectively. + * Note also that req_event and req_prod in Xen start at 1, + * event indexes in virtio start at 0. */ + return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old); +} + +struct vring_packed_desc_event { + /* Descriptor Ring Change Event Offset/Wrap Counter. */ + __le16 off_wrap; + /* Descriptor Ring Change Event Flags. */ + __le16 flags; +}; + +struct vring_packed_desc { + /* Buffer Address. */ + __le64 addr; + /* Buffer Length. */ + __le32 len; + /* Buffer ID. */ + __le16 id; + /* The flags depending on descriptor type. */ + __le16 flags; +}; + +#endif /* _LINUX_VIRTIO_RING_H */ diff --git a/src/spdk/include/linux/virtio_scsi.h b/src/spdk/include/linux/virtio_scsi.h new file mode 100644 index 000000000..cc18ef882 --- /dev/null +++ b/src/spdk/include/linux/virtio_scsi.h @@ -0,0 +1,172 @@ +/* + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _LINUX_VIRTIO_SCSI_H +#define _LINUX_VIRTIO_SCSI_H + +#include <linux/virtio_types.h> + +/* Default values of the CDB and sense data size configuration fields */ +#define VIRTIO_SCSI_CDB_DEFAULT_SIZE 32 +#define VIRTIO_SCSI_SENSE_DEFAULT_SIZE 96 + +#ifndef VIRTIO_SCSI_CDB_SIZE +#define VIRTIO_SCSI_CDB_SIZE VIRTIO_SCSI_CDB_DEFAULT_SIZE +#endif +#ifndef VIRTIO_SCSI_SENSE_SIZE +#define VIRTIO_SCSI_SENSE_SIZE VIRTIO_SCSI_SENSE_DEFAULT_SIZE +#endif + +/* SCSI command request, followed by data-out */ +struct virtio_scsi_cmd_req { + __u8 lun[8]; /* Logical Unit Number */ + __virtio64 tag; /* Command identifier */ + __u8 task_attr; /* Task attribute */ + __u8 prio; /* SAM command priority field */ + __u8 crn; + __u8 cdb[VIRTIO_SCSI_CDB_SIZE]; +} __attribute__((packed)); + +/* SCSI command request, followed by protection information */ +struct virtio_scsi_cmd_req_pi { + __u8 lun[8]; /* Logical Unit Number */ + __virtio64 tag; /* Command identifier */ + __u8 task_attr; /* Task attribute */ + __u8 prio; /* SAM command priority field */ + __u8 crn; + __virtio32 pi_bytesout; /* DataOUT PI Number of bytes */ + __virtio32 pi_bytesin; /* DataIN PI Number of bytes */ + __u8 cdb[VIRTIO_SCSI_CDB_SIZE]; +} __attribute__((packed)); + +/* Response, followed by sense data and data-in */ +struct virtio_scsi_cmd_resp { + __virtio32 sense_len; /* Sense data length */ + __virtio32 resid; /* Residual bytes in data buffer */ + __virtio16 status_qualifier; /* Status qualifier */ + __u8 status; /* Command completion status */ + __u8 response; /* Response values */ + __u8 sense[VIRTIO_SCSI_SENSE_SIZE]; +} __attribute__((packed)); + +/* Task Management Request */ +struct virtio_scsi_ctrl_tmf_req { + __virtio32 type; + __virtio32 subtype; + __u8 lun[8]; + __virtio64 tag; +} __attribute__((packed)); + +struct virtio_scsi_ctrl_tmf_resp { + __u8 response; +} __attribute__((packed)); + +/* Asynchronous notification query/subscription */ +struct virtio_scsi_ctrl_an_req { + __virtio32 type; + __u8 lun[8]; + __virtio32 event_requested; +} __attribute__((packed)); + +struct virtio_scsi_ctrl_an_resp { + __virtio32 event_actual; + __u8 response; +} __attribute__((packed)); + +struct virtio_scsi_event { + __virtio32 event; + __u8 lun[8]; + __virtio32 reason; +} __attribute__((packed)); + +struct virtio_scsi_config { + __u32 num_queues; + __u32 seg_max; + __u32 max_sectors; + __u32 cmd_per_lun; + __u32 event_info_size; + __u32 sense_size; + __u32 cdb_size; + __u16 max_channel; + __u16 max_target; + __u32 max_lun; +} __attribute__((packed)); + +/* Feature Bits */ +#define VIRTIO_SCSI_F_INOUT 0 +#define VIRTIO_SCSI_F_HOTPLUG 1 +#define VIRTIO_SCSI_F_CHANGE 2 +#define VIRTIO_SCSI_F_T10_PI 3 + +/* Response codes */ +#define VIRTIO_SCSI_S_OK 0 +#define VIRTIO_SCSI_S_OVERRUN 1 +#define VIRTIO_SCSI_S_ABORTED 2 +#define VIRTIO_SCSI_S_BAD_TARGET 3 +#define VIRTIO_SCSI_S_RESET 4 +#define VIRTIO_SCSI_S_BUSY 5 +#define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6 +#define VIRTIO_SCSI_S_TARGET_FAILURE 7 +#define VIRTIO_SCSI_S_NEXUS_FAILURE 8 +#define VIRTIO_SCSI_S_FAILURE 9 +#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED 10 +#define VIRTIO_SCSI_S_FUNCTION_REJECTED 11 +#define VIRTIO_SCSI_S_INCORRECT_LUN 12 + +/* Controlq type codes. */ +#define VIRTIO_SCSI_T_TMF 0 +#define VIRTIO_SCSI_T_AN_QUERY 1 +#define VIRTIO_SCSI_T_AN_SUBSCRIBE 2 + +/* Valid TMF subtypes. */ +#define VIRTIO_SCSI_T_TMF_ABORT_TASK 0 +#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET 1 +#define VIRTIO_SCSI_T_TMF_CLEAR_ACA 2 +#define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET 3 +#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4 +#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5 +#define VIRTIO_SCSI_T_TMF_QUERY_TASK 6 +#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7 + +/* Events. */ +#define VIRTIO_SCSI_T_EVENTS_MISSED 0x80000000 +#define VIRTIO_SCSI_T_NO_EVENT 0 +#define VIRTIO_SCSI_T_TRANSPORT_RESET 1 +#define VIRTIO_SCSI_T_ASYNC_NOTIFY 2 +#define VIRTIO_SCSI_T_PARAM_CHANGE 3 + +/* Reasons of transport reset event */ +#define VIRTIO_SCSI_EVT_RESET_HARD 0 +#define VIRTIO_SCSI_EVT_RESET_RESCAN 1 +#define VIRTIO_SCSI_EVT_RESET_REMOVED 2 + +#define VIRTIO_SCSI_S_SIMPLE 0 +#define VIRTIO_SCSI_S_ORDERED 1 +#define VIRTIO_SCSI_S_HEAD 2 +#define VIRTIO_SCSI_S_ACA 3 + + +#endif /* _LINUX_VIRTIO_SCSI_H */ diff --git a/src/spdk/include/linux/virtio_types.h b/src/spdk/include/linux/virtio_types.h new file mode 100644 index 000000000..6162bdf03 --- /dev/null +++ b/src/spdk/include/linux/virtio_types.h @@ -0,0 +1,46 @@ +#ifndef _LINUX_VIRTIO_TYPES_H +#define _LINUX_VIRTIO_TYPES_H +/* Type definitions for virtio implementations. + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright (C) 2014 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + */ +#include <linux/types.h> + +/* + * __virtio{16,32,64} have the following meaning: + * - __u{16,32,64} for virtio devices in legacy mode, accessed in native endian + * - __le{16,32,64} for standard-compliant virtio devices + */ + +typedef __u16 __bitwise__ __virtio16; +typedef __u32 __bitwise__ __virtio32; +typedef __u64 __bitwise__ __virtio64; + +#endif /* _LINUX_VIRTIO_TYPES_H */ diff --git a/src/spdk/include/spdk/accel_engine.h b/src/spdk/include/spdk/accel_engine.h new file mode 100644 index 000000000..be48e2ce3 --- /dev/null +++ b/src/spdk/include/spdk/accel_engine.h @@ -0,0 +1,361 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Acceleration engine abstraction layer + */ + +#ifndef SPDK_ACCEL_ENGINE_H +#define SPDK_ACCEL_ENGINE_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum accel_capability { + ACCEL_COPY = 1 << 0, + ACCEL_FILL = 1 << 1, + ACCEL_DUALCAST = 1 << 2, + ACCEL_COMPARE = 1 << 3, + ACCEL_BATCH = 1 << 4, + ACCEL_CRC32C = 1 << 5, + ACCEL_DIF = 1 << 6, +}; + +/** + * Acceleration operation callback. + * + * \param ref 'accel_req' passed to the corresponding spdk_accel_submit* call. + * \param status 0 if it completed successfully, or negative errno if it failed. + */ +typedef void (*spdk_accel_completion_cb)(void *ref, int status); + +/** + * Acceleration engine finish callback. + * + * \param cb_arg Callback argument. + */ +typedef void (*spdk_accel_fini_cb)(void *cb_arg); + +struct spdk_io_channel; + +struct spdk_accel_batch; + +/** + * Initialize the acceleration engine. + * + * \return 0 on success. + */ +int spdk_accel_engine_initialize(void); + +/** + * Close the acceleration engine. + * + * \param cb_fn Called when the close operation completes. + * \param cb_arg Argument passed to the callback function. + */ +void spdk_accel_engine_finish(spdk_accel_fini_cb cb_fn, void *cb_arg); + +/** + * Get the configuration for the acceleration engine. + * + * \param fp The pointer to a file that will be written to the configuration. + */ +void spdk_accel_engine_config_text(FILE *fp); + +/** + * Close the acceleration engine module and perform any necessary cleanup. + */ +void spdk_accel_engine_module_finish(void); + +/** + * Get the I/O channel registered on the acceleration engine. + * + * This I/O channel is used to submit copy request. + * + * \return a pointer to the I/O channel on success, or NULL on failure. + */ +struct spdk_io_channel *spdk_accel_engine_get_io_channel(void); + +/** + * Retrieve accel engine capabilities. + * + * \param ch I/O channel associated with this call. + * + * \return bitmap of capabilities defined by enum accel_capability. + */ +uint64_t spdk_accel_get_capabilities(struct spdk_io_channel *ch); + +/** + * Submit a copy request. + * + * \param ch I/O channel associated with this call. + * \param dst Destination to copy to. + * \param src Source to copy from. + * \param nbytes Length in bytes to copy. + * \param cb_fn Called when this copy operation completes. + * \param cb_arg Callback argument. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_accel_submit_copy(struct spdk_io_channel *ch, void *dst, void *src, uint64_t nbytes, + spdk_accel_completion_cb cb_fn, void *cb_arg); + +/** + * Synchronous call to get batch size. This is the maximum number of + * descriptors that a batch can contain. Once this limit is reached the batch + * should be processed with spdk_accel_batch_submit(). + * + * \param ch I/O channel associated with this call. + * + * \return max number of descriptors per batch. + */ +uint32_t spdk_accel_batch_get_max(struct spdk_io_channel *ch); + +/** + * Synchronous call to create a batch sequence. + * + * \param ch I/O channel associated with this call. + * + * \return handle to use for subsequent batch requests, NULL on failure. + */ +struct spdk_accel_batch *spdk_accel_batch_create(struct spdk_io_channel *ch); + +/** + * Asynchronous call to submit a batch sequence. + * + * \param ch I/O channel associated with this call. + * \param batch Handle provided when the batch was started with spdk_accel_batch_create(). + * \param cb_fn Called when this operation completes. + * \param cb_arg Callback argument. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_accel_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, + spdk_accel_completion_cb cb_fn, void *cb_arg); + +/** + * Synchronous call to cancel a batch sequence. In some cases prepared commands will be + * processed if they cannot be cancelled. + * + * \param ch I/O channel associated with this call. + * \param batch Handle provided when the batch was started with spdk_accel_batch_create(). + * + * \return 0 on success, negative errno on failure. + */ +int spdk_accel_batch_cancel(struct spdk_io_channel *ch, struct spdk_accel_batch *batch); + +/** + * Synchronous call to prepare a copy request into a previously initialized batch + * created with spdk_accel_batch_create(). The callback will be called when the copy + * completes after the batch has been submitted by an asynchronous call to + * spdk_accel_batch_submit(). + * + * \param ch I/O channel associated with this call. + * \param batch Handle provided when the batch was started with spdk_accel_batch_create(). + * \param dst Destination to copy to. + * \param src Source to copy from. + * \param nbytes Length in bytes to copy. + * \param cb_fn Called when this operation completes. + * \param cb_arg Callback argument. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_accel_batch_prep_copy(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, + void *dst, void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, + void *cb_arg); + +/** + * Synchronous call to prepare a dualcast request into a previously initialized batch + * created with spdk_accel_batch_create(). The callback will be called when the dualcast + * completes after the batch has been submitted by an asynchronous call to + * spdk_accel_batch_submit(). + * + * \param ch I/O channel associated with this call. + * \param batch Handle provided when the batch was started with spdk_accel_batch_create(). + * \param dst1 First destination to copy to (must be 4K aligned). + * \param dst2 Second destination to copy to (must be 4K aligned). + * \param src Source to copy from. + * \param nbytes Length in bytes to copy. + * \param cb_fn Called when this operation completes. + * \param cb_arg Callback argument. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_accel_batch_prep_dualcast(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, + void *dst1, void *dst2, void *src, uint64_t nbytes, + spdk_accel_completion_cb cb_fn, void *cb_arg); + +/** + * Submit a dual cast copy request. + * + * \param ch I/O channel associated with this call. + * \param dst1 First destination to copy to (must be 4K aligned). + * \param dst2 Second destination to copy to (must be 4K aligned). + * \param src Source to copy from. + * \param nbytes Length in bytes to copy. + * \param cb_fn Called when this copy operation completes. + * \param cb_arg Callback argument. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_accel_submit_dualcast(struct spdk_io_channel *ch, void *dst1, void *dst2, void *src, + uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg); + +/** + * Synchronous call to prepare a compare request into a previously initialized batch + * created with spdk_accel_batch_create(). The callback will be called when the comapre + * completes after the batch has been submitted by an asynchronous call to + * spdk_accel_batch_submit(). + * + * \param ch I/O channel associated with this call. + * \param batch Handle provided when the batch was started with spdk_accel_batch_create(). + * \param src1 First location to perform compare on. + * \param src2 Second location to perform compare on. + * \param nbytes Length in bytes to compare. + * \param cb_fn Called when this operation completes. + * \param cb_arg Callback argument. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_accel_batch_prep_compare(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, + void *src1, void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn, + void *cb_arg); + +/** + * Submit a compare request. + * + * \param ch I/O channel associated with this call. + * \param src1 First location to perform compare on. + * \param src2 Second location to perform compare on. + * \param nbytes Length in bytes to compare. + * \param cb_fn Called when this compare operation completes. + * \param cb_arg Callback argument. + * + * \return 0 on success, any other value means there was a miscompare. + */ +int spdk_accel_submit_compare(struct spdk_io_channel *ch, void *src1, void *src2, uint64_t nbytes, + spdk_accel_completion_cb cb_fn, void *cb_arg); + +/** + * Synchronous call to prepare a fill request into a previously initialized batch + * created with spdk_accel_batch_create(). The callback will be called when the fill + * completes after the batch has been submitted by an asynchronous call to + * spdk_accel_batch_submit(). + * + * \param ch I/O channel associated with this call. + * \param batch Handle provided when the batch was started with spdk_accel_batch_create(). + * \param dst Destination to fill. + * \param fill Constant byte to fill to the destination. + * \param nbytes Length in bytes to fill. + * \param cb_fn Called when this operation completes. + * \param cb_arg Callback argument. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_accel_batch_prep_fill(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, + void *dst, uint8_t fill, uint64_t nbytes, + spdk_accel_completion_cb cb_fn, void *cb_arg); + +/** + * Submit a fill request. + * + * This operation will fill the destination buffer with the specified value. + * + * \param ch I/O channel associated with this call. + * \param dst Destination to fill. + * \param fill Constant byte to fill to the destination. + * \param nbytes Length in bytes to fill. + * \param cb_fn Called when this fill operation completes. + * \param cb_arg Callback argument. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_accel_submit_fill(struct spdk_io_channel *ch, void *dst, uint8_t fill, uint64_t nbytes, + spdk_accel_completion_cb cb_fn, void *cb_arg); + +/** + * Synchronous call to prepare a crc32c request into a previously initialized batch + * created with spdk_accel_batch_create(). The callback will be called when the crc32c + * completes after the batch has been submitted by an asynchronous call to + * spdk_accel_batch_submit(). + * + * \param ch I/O channel associated with this call. + * \param batch Handle provided when the batch was started with spdk_accel_batch_create(). + * \param dst Destination to write the CRC-32C to. + * \param src The source address for the data. + * \param seed Four byte seed value. + * \param nbytes Length in bytes. + * \param cb_fn Called when this operation completes. + * \param cb_arg Callback argument. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_accel_batch_prep_crc32c(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, + uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes, + spdk_accel_completion_cb cb_fn, void *cb_arg); + +/** + * Submit a CRC-32C calculation request. + * + * This operation will calculate the 4 byte CRC32-C for the given data. + * + * \param ch I/O channel associated with this call. + * \param dst Destination to write the CRC-32C to. + * \param src The source address for the data. + * \param seed Four byte seed value. + * \param nbytes Length in bytes. + * \param cb_fn Called when this CRC-32C operation completes. + * \param cb_arg Callback argument. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_accel_submit_crc32c(struct spdk_io_channel *ch, uint32_t *dst, void *src, uint32_t seed, + uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg); + +struct spdk_json_write_ctx; + +/** + * Write Acceleration subsystem configuration into provided JSON context. + * + * \param w JSON write context + */ +void spdk_accel_write_config_json(struct spdk_json_write_ctx *w); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/assert.h b/src/spdk/include/spdk/assert.h new file mode 100644 index 000000000..67e674aac --- /dev/null +++ b/src/spdk/include/spdk/assert.h @@ -0,0 +1,65 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Runtime and compile-time assert macros + */ + +#ifndef SPDK_ASSERT_H +#define SPDK_ASSERT_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef static_assert +#define SPDK_STATIC_ASSERT(cond, msg) static_assert(cond, msg) +#else +/** + * Compatibility wrapper for static_assert. + * + * This won't actually enforce the condition when compiled with an environment that doesn't support + * C11 static_assert; it is only intended to allow end users with old compilers to build the package. + * + * Developers should use a recent compiler that provides static_assert. + */ +#define SPDK_STATIC_ASSERT(cond, msg) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_ASSERT_H */ diff --git a/src/spdk/include/spdk/barrier.h b/src/spdk/include/spdk/barrier.h new file mode 100644 index 000000000..acae360c7 --- /dev/null +++ b/src/spdk/include/spdk/barrier.h @@ -0,0 +1,116 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * Copyright (c) 2017, IBM Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Memory barriers + */ + +#ifndef SPDK_BARRIER_H +#define SPDK_BARRIER_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** Compiler memory barrier */ +#define spdk_compiler_barrier() __asm volatile("" ::: "memory") + +/** Read memory barrier */ +#define spdk_rmb() _spdk_rmb() + +/** Write memory barrier */ +#define spdk_wmb() _spdk_wmb() + +/** Full read/write memory barrier */ +#define spdk_mb() _spdk_mb() + +/** SMP read memory barrier. */ +#define spdk_smp_rmb() _spdk_smp_rmb() + +/** SMP write memory barrier. */ +#define spdk_smp_wmb() _spdk_smp_wmb() + +/** SMP read/write memory barrier. */ +#define spdk_smp_mb() _spdk_smp_mb() + +#ifdef __PPC64__ + +#define _spdk_rmb() __asm volatile("sync" ::: "memory") +#define _spdk_wmb() __asm volatile("sync" ::: "memory") +#define _spdk_mb() __asm volatile("sync" ::: "memory") +#define _spdk_smp_rmb() __asm volatile("lwsync" ::: "memory") +#define _spdk_smp_wmb() __asm volatile("lwsync" ::: "memory") +#define _spdk_smp_mb() spdk_mb() + +#elif defined(__aarch64__) + +#define _spdk_rmb() __asm volatile("dsb ld" ::: "memory") +#define _spdk_wmb() __asm volatile("dsb st" ::: "memory") +#define _spdk_mb() __asm volatile("dsb sy" ::: "memory") +#define _spdk_smp_rmb() __asm volatile("dmb ishld" ::: "memory") +#define _spdk_smp_wmb() __asm volatile("dmb ishst" ::: "memory") +#define _spdk_smp_mb() __asm volatile("dmb ish" ::: "memory") + +#elif defined(__i386__) || defined(__x86_64__) + +#define _spdk_rmb() __asm volatile("lfence" ::: "memory") +#define _spdk_wmb() __asm volatile("sfence" ::: "memory") +#define _spdk_mb() __asm volatile("mfence" ::: "memory") +#define _spdk_smp_rmb() spdk_compiler_barrier() +#define _spdk_smp_wmb() spdk_compiler_barrier() +#if defined(__x86_64__) +#define _spdk_smp_mb() __asm volatile("lock addl $0, -128(%%rsp); " ::: "memory"); +#elif defined(__i386__) +#define _spdk_smp_mb() __asm volatile("lock addl $0, -128(%%esp); " ::: "memory"); +#endif + +#else + +#define _spdk_rmb() +#define _spdk_wmb() +#define _spdk_mb() +#define _spdk_smp_rmb() +#define _spdk_smp_wmb() +#define _spdk_smp_mb() +#error Unknown architecture + +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/base64.h b/src/spdk/include/spdk/base64.h new file mode 100644 index 000000000..86f41bba6 --- /dev/null +++ b/src/spdk/include/spdk/base64.h @@ -0,0 +1,144 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * Base64 utility functions + */ + +#ifndef SPDK_BASE64_H +#define SPDK_BASE64_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Following the Base64 part in RFC4648: + * https://tools.ietf.org/html/rfc4648.html + */ + +/** + * Calculate strlen of encoded Base64 string based on raw buffer length. + * + * \param raw_len Length of raw buffer. + * \return Encoded Base64 string length, excluding the terminating null byte ('\0'). + */ +static inline size_t spdk_base64_get_encoded_strlen(size_t raw_len) +{ + return (raw_len + 2) / 3 * 4; +} + +/** + * Calculate length of raw buffer based on strlen of encoded Base64. + * + * This length will be the max possible decoded len. The exact decoded length could be + * shorter depending on if there was padding in the Base64 string. + * + * \param encoded_strlen Length of encoded Base64 string, excluding terminating null + * byte ('\0'). + * \return Length of raw buffer. + */ +static inline size_t spdk_base64_get_decoded_len(size_t encoded_strlen) +{ + /* text_strlen and raw_len should be (4n,3n), (4n+2, 3n+1) or (4n+3, 3n+2) */ + return encoded_strlen / 4 * 3 + ((encoded_strlen % 4 + 1) / 2); +} + +/** + * Base 64 Encoding with Standard Base64 Alphabet defined in RFC4684. + * + * \param dst Buffer address of encoded Base64 string. Its length should be enough + * to contain Base64 string and the terminating null byte ('\0'), so it needs to be at + * least as long as 1 + spdk_base64_get_encoded_strlen(src_len). + * \param src Raw data buffer to be encoded. + * \param src_len Length of raw data buffer. + * + * \return 0 on success. + * \return -EINVAL if dst or src is NULL, or binary_len <= 0. + */ +int spdk_base64_encode(char *dst, const void *src, size_t src_len); + +/** + * Base 64 Encoding with URL and Filename Safe Alphabet. + * + * \param dst Buffer address of encoded Base64 string. Its length should be enough + * to contain Base64 string and the terminating null byte ('\0'), so it needs to be at + * least as long as 1 + spdk_base64_get_encoded_strlen(src_len). + * \param src Raw data buffer to be encoded. + * \param src_len Length of raw data buffer. + * + * \return 0 on success. + * \return -EINVAL if dst or src is NULL, or binary_len <= 0. + */ +int spdk_base64_urlsafe_encode(char *dst, const void *src, size_t src_len); + +/** + * Base 64 Decoding with Standard Base64 Alphabet defined in RFC4684. + * + * \param dst Buffer address of decoded raw data. Its length should be enough + * to contain decoded raw data, so it needs to be at least as long as + * spdk_base64_get_decoded_len(encoded_strlen). If NULL, only dst_len will be populated + * indicating the exact decoded length. + * \param dst_len Output parameter for the length of actual decoded raw data. + * If NULL, the actual decoded length won't be returned. + * \param src Data buffer for base64 string to be decoded. + * + * \return 0 on success. + * \return -EINVAL if src is NULL, or content of src is illegal. + */ +int spdk_base64_decode(void *dst, size_t *dst_len, const char *src); + +/** + * Base 64 Decoding with URL and Filename Safe Alphabet. + * + * \param dst Buffer address of decoded raw data. Its length should be enough + * to contain decoded raw data, so it needs to be at least as long as + * spdk_base64_get_decoded_len(encoded_strlen). If NULL, only dst_len will be populated + * indicating the exact decoded length. + * \param dst_len Output parameter for the length of actual decoded raw data. + * If NULL, the actual decoded length won't be returned. + * \param src Data buffer for base64 string to be decoded. + * + * \return 0 on success. + * \return -EINVAL if src is NULL, or content of src is illegal. + */ +int spdk_base64_urlsafe_decode(void *dst, size_t *dst_len, const char *src); + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_BASE64_H */ diff --git a/src/spdk/include/spdk/bdev.h b/src/spdk/include/spdk/bdev.h new file mode 100644 index 000000000..0bb39c410 --- /dev/null +++ b/src/spdk/include/spdk/bdev.h @@ -0,0 +1,1705 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Block device abstraction layer + */ + +#ifndef SPDK_BDEV_H_ +#define SPDK_BDEV_H_ + +#include "spdk/stdinc.h" + +#include "spdk/scsi_spec.h" +#include "spdk/nvme_spec.h" +#include "spdk/json.h" +#include "spdk/queue.h" +#include "spdk/histogram_data.h" +#include "spdk/dif.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define SPDK_BDEV_SMALL_BUF_MAX_SIZE 8192 +#define SPDK_BDEV_LARGE_BUF_MAX_SIZE (64 * 1024) + +/* Increase the buffer size to store interleaved metadata. Increment is the + * amount necessary to store metadata per data block. 16 byte metadata per + * 512 byte data block is the current maximum ratio of metadata per block. + */ +#define SPDK_BDEV_BUF_SIZE_WITH_MD(x) (((x) / 512) * (512 + 16)) + +/** Asynchronous event type */ +enum spdk_bdev_event_type { + SPDK_BDEV_EVENT_REMOVE, + SPDK_BDEV_EVENT_RESIZE, + SPDK_BDEV_EVENT_MEDIA_MANAGEMENT, +}; + +/** Media management event details */ +struct spdk_bdev_media_event { + uint64_t offset; + uint64_t num_blocks; +}; + +/** + * \brief SPDK block device. + * + * This is a virtual representation of a block device that is exported by the backend. + */ +struct spdk_bdev; + +/** + * Block device remove callback. + * + * \param remove_ctx Context for the removed block device. + */ +typedef void (*spdk_bdev_remove_cb_t)(void *remove_ctx); + +/** + * Block device event callback. + * + * \param event Event details. + * \param bdev Block device that triggered event. + * \param event_ctx Context for the block device event. + */ +typedef void (*spdk_bdev_event_cb_t)(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, + void *event_ctx); + +/** + * Block device I/O + * + * This is an I/O that is passed to an spdk_bdev. + */ +struct spdk_bdev_io; + +struct spdk_bdev_fn_table; +struct spdk_io_channel; +struct spdk_json_write_ctx; +struct spdk_uuid; + +/** bdev status */ +enum spdk_bdev_status { + SPDK_BDEV_STATUS_INVALID, + SPDK_BDEV_STATUS_READY, + SPDK_BDEV_STATUS_REMOVING, +}; + +/** + * \brief Handle to an opened SPDK block device. + */ +struct spdk_bdev_desc; + +/** bdev I/O type */ +enum spdk_bdev_io_type { + SPDK_BDEV_IO_TYPE_INVALID = 0, + SPDK_BDEV_IO_TYPE_READ, + SPDK_BDEV_IO_TYPE_WRITE, + SPDK_BDEV_IO_TYPE_UNMAP, + SPDK_BDEV_IO_TYPE_FLUSH, + SPDK_BDEV_IO_TYPE_RESET, + SPDK_BDEV_IO_TYPE_NVME_ADMIN, + SPDK_BDEV_IO_TYPE_NVME_IO, + SPDK_BDEV_IO_TYPE_NVME_IO_MD, + SPDK_BDEV_IO_TYPE_WRITE_ZEROES, + SPDK_BDEV_IO_TYPE_ZCOPY, + SPDK_BDEV_IO_TYPE_GET_ZONE_INFO, + SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT, + SPDK_BDEV_IO_TYPE_ZONE_APPEND, + SPDK_BDEV_IO_TYPE_COMPARE, + SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE, + SPDK_BDEV_IO_TYPE_ABORT, + SPDK_BDEV_NUM_IO_TYPES /* Keep last */ +}; + +/** bdev QoS rate limit type */ +enum spdk_bdev_qos_rate_limit_type { + /** IOPS rate limit for both read and write */ + SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT = 0, + /** Byte per second rate limit for both read and write */ + SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT, + /** Byte per second rate limit for read only */ + SPDK_BDEV_QOS_R_BPS_RATE_LIMIT, + /** Byte per second rate limit for write only */ + SPDK_BDEV_QOS_W_BPS_RATE_LIMIT, + /** Keep last */ + SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES +}; + +/** + * Block device completion callback. + * + * \param bdev_io Block device I/O that has completed. + * \param success True if I/O completed successfully or false if it failed; + * additional error information may be retrieved from bdev_io by calling + * spdk_bdev_io_get_nvme_status() or spdk_bdev_io_get_scsi_status(). + * \param cb_arg Callback argument specified when bdev_io was submitted. + */ +typedef void (*spdk_bdev_io_completion_cb)(struct spdk_bdev_io *bdev_io, + bool success, + void *cb_arg); + +struct spdk_bdev_io_stat { + uint64_t bytes_read; + uint64_t num_read_ops; + uint64_t bytes_written; + uint64_t num_write_ops; + uint64_t bytes_unmapped; + uint64_t num_unmap_ops; + uint64_t read_latency_ticks; + uint64_t write_latency_ticks; + uint64_t unmap_latency_ticks; + uint64_t ticks_rate; +}; + +struct spdk_bdev_opts { + uint32_t bdev_io_pool_size; + uint32_t bdev_io_cache_size; + bool bdev_auto_examine; +}; + +void spdk_bdev_get_opts(struct spdk_bdev_opts *opts); + +int spdk_bdev_set_opts(struct spdk_bdev_opts *opts); + +/** + * Block device initialization callback. + * + * \param cb_arg Callback argument. + * \param rc 0 if block device initialized successfully or negative errno if it failed. + */ +typedef void (*spdk_bdev_init_cb)(void *cb_arg, int rc); + +/** + * Block device finish callback. + * + * \param cb_arg Callback argument. + */ +typedef void (*spdk_bdev_fini_cb)(void *cb_arg); +typedef void (*spdk_bdev_get_device_stat_cb)(struct spdk_bdev *bdev, + struct spdk_bdev_io_stat *stat, void *cb_arg, int rc); + +/** + * Block device channel IO timeout callback + * + * \param cb_arg Callback argument + * \param bdev_io The IO cause the timeout + */ +typedef void (*spdk_bdev_io_timeout_cb)(void *cb_arg, struct spdk_bdev_io *bdev_io); + +/** + * Initialize block device modules. + * + * \param cb_fn Called when the initialization is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bdev_initialize(spdk_bdev_init_cb cb_fn, void *cb_arg); + +/** + * Perform cleanup work to remove the registered block device modules. + * + * \param cb_fn Called when the removal is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bdev_finish(spdk_bdev_fini_cb cb_fn, void *cb_arg); + +/** + * Get the configuration options for the registered block device modules. + * + * \param fp The pointer to a file that will be written to the configuration options. + */ +void spdk_bdev_config_text(FILE *fp); + +/** + * Get the full configuration options for the registered block device modules and created bdevs. + * + * \param w pointer to a JSON write context where the configuration will be written. + */ +void spdk_bdev_subsystem_config_json(struct spdk_json_write_ctx *w); + +/** + * Get block device by the block device name. + * + * \param bdev_name The name of the block device. + * \return Block device associated with the name or NULL if no block device with + * bdev_name is currently registered. + */ +struct spdk_bdev *spdk_bdev_get_by_name(const char *bdev_name); + +/** + * Get the first registered block device. + * + * \return The first registered block device. + */ +struct spdk_bdev *spdk_bdev_first(void); + +/** + * Get the next registered block device. + * + * \param prev The current block device. + * \return The next registered block device. + */ +struct spdk_bdev *spdk_bdev_next(struct spdk_bdev *prev); + +/** + * Get the first block device without virtual block devices on top. + * + * This function only traverses over block devices which have no virtual block + * devices on top of them, then get the first one. + * + * \return The first block device without virtual block devices on top. + */ +struct spdk_bdev *spdk_bdev_first_leaf(void); + +/** + * Get the next block device without virtual block devices on top. + * + * This function only traverses over block devices which have no virtual block + * devices on top of them, then get the next one. + * + * \param prev The current block device. + * \return The next block device without virtual block devices on top. + */ +struct spdk_bdev *spdk_bdev_next_leaf(struct spdk_bdev *prev); + +/** + * Open a block device for I/O operations (deprecated, please use spdk_bdev_open_ext). + * + * \param bdev Block device to open. + * \param write true is read/write access requested, false if read-only + * \param remove_cb notification callback to be called when the bdev gets + * hotremoved. This will always be called on the same thread that + * spdk_bdev_open() was called on. It can be NULL, in which case the upper + * layer won't be notified about the bdev hotremoval. The descriptor will + * have to be manually closed to make the bdev unregister proceed. + * \param remove_ctx param for remove_cb. + * \param desc output parameter for the descriptor when operation is successful + * \return 0 if operation is successful, suitable errno value otherwise + */ +int spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_cb, + void *remove_ctx, struct spdk_bdev_desc **desc); + +/** + * Open a block device for I/O operations. + * + * \param bdev_name Block device name to open. + * \param write true is read/write access requested, false if read-only + * \param event_cb notification callback to be called when the bdev triggers + * asynchronous event such as bdev removal. This will always be called on the + * same thread that spdk_bdev_open() was called on. In case of removal event + * the descriptor will have to be manually closed to make the bdev unregister + * proceed. + * \param event_ctx param for event_cb. + * \param desc output parameter for the descriptor when operation is successful + * \return 0 if operation is successful, suitable errno value otherwise + */ +int spdk_bdev_open_ext(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb, + void *event_ctx, struct spdk_bdev_desc **desc); + +/** + * Close a previously opened block device. + * + * Must be called on the same thread that the spdk_bdev_open() + * was performed on. + * + * \param desc Block device descriptor to close. + */ +void spdk_bdev_close(struct spdk_bdev_desc *desc); + +/** + * Get the bdev associated with a bdev descriptor. + * + * \param desc Open block device desciptor + * \return bdev associated with the descriptor + */ +struct spdk_bdev *spdk_bdev_desc_get_bdev(struct spdk_bdev_desc *desc); + +/** + * Set a time limit for the timeout IO of the bdev and timeout callback. + * We can use this function to enable/disable the timeout handler. If + * the timeout_in_sec > 0 then it means to enable the timeout IO handling + * or change the time limit. If the timeout_in_sec == 0 it means to + * disable the timeout IO handling. If you want to enable or change the + * timeout IO handle you need to specify the spdk_bdev_io_timeout_cb it + * means the upper user determines what to do if you meet the timeout IO, + * for example, you can reset the device or abort the IO. + * Note: This function must run in the desc's thread. + * + * \param desc Block device descriptor. + * \param timeout_in_sec Timeout value + * \param cb_fn Bdev IO timeout callback + * \param cb_arg Callback argument + * + * \return 0 on success, negated errno on failure. + */ +int spdk_bdev_set_timeout(struct spdk_bdev_desc *desc, uint64_t timeout_in_sec, + spdk_bdev_io_timeout_cb cb_fn, void *cb_arg); + +/** + * Check whether the block device supports the I/O type. + * + * \param bdev Block device to check. + * \param io_type The specific I/O type like read, write, flush, unmap. + * \return true if support, false otherwise. + */ +bool spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type); + +/** + * Output driver-specific information to a JSON stream. + * + * The JSON write context will be initialized with an open object, so the bdev + * driver should write a name(based on the driver name) followed by a JSON value + * (most likely another nested object). + * + * \param bdev Block device to query. + * \param w JSON write context. It will store the driver-specific configuration context. + * \return 0 on success, negated errno on failure. + */ +int spdk_bdev_dump_info_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w); + +/** + * Get block device name. + * + * \param bdev Block device to query. + * \return Name of bdev as a null-terminated string. + */ +const char *spdk_bdev_get_name(const struct spdk_bdev *bdev); + +/** + * Get block device product name. + * + * \param bdev Block device to query. + * \return Product name of bdev as a null-terminated string. + */ +const char *spdk_bdev_get_product_name(const struct spdk_bdev *bdev); + +/** + * Get block device logical block size. + * + * \param bdev Block device to query. + * \return Size of logical block for this bdev in bytes. + */ +uint32_t spdk_bdev_get_block_size(const struct spdk_bdev *bdev); + +/** + * Get the write unit size for this bdev. + * + * Write unit size is required number of logical blocks to perform write + * operation on block device. + * + * Unit of write unit size is logical block and the minimum of write unit + * size is one. Write operations must be multiple of write unit size. + * + * \param bdev Block device to query. + * + * \return The write unit size in logical blocks. + */ +uint32_t spdk_bdev_get_write_unit_size(const struct spdk_bdev *bdev); + +/** + * Get size of block device in logical blocks. + * + * \param bdev Block device to query. + * \return Size of bdev in logical blocks. + * + * Logical blocks are numbered from 0 to spdk_bdev_get_num_blocks(bdev) - 1, inclusive. + */ +uint64_t spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev); + +/** + * Get the string of quality of service rate limit. + * + * \param type Type of rate limit to query. + * \return String of QoS type. + */ +const char *spdk_bdev_get_qos_rpc_type(enum spdk_bdev_qos_rate_limit_type type); + +/** + * Get the quality of service rate limits on a bdev. + * + * \param bdev Block device to query. + * \param limits Pointer to the QoS rate limits array which holding the limits. + * + * The limits are ordered based on the @ref spdk_bdev_qos_rate_limit_type enum. + */ +void spdk_bdev_get_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits); + +/** + * Set the quality of service rate limits on a bdev. + * + * \param bdev Block device. + * \param limits Pointer to the QoS rate limits array which holding the limits. + * \param cb_fn Callback function to be called when the QoS limit has been updated. + * \param cb_arg Argument to pass to cb_fn. + * + * The limits are ordered based on the @ref spdk_bdev_qos_rate_limit_type enum. + */ +void spdk_bdev_set_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits, + void (*cb_fn)(void *cb_arg, int status), void *cb_arg); + +/** + * Get minimum I/O buffer address alignment for a bdev. + * + * \param bdev Block device to query. + * \return Required alignment of I/O buffers in bytes. + */ +size_t spdk_bdev_get_buf_align(const struct spdk_bdev *bdev); + +/** + * Get optimal I/O boundary for a bdev. + * + * \param bdev Block device to query. + * \return Optimal I/O boundary in blocks that should not be crossed for best performance, or 0 if + * no optimal boundary is reported. + */ +uint32_t spdk_bdev_get_optimal_io_boundary(const struct spdk_bdev *bdev); + +/** + * Query whether block device has an enabled write cache. + * + * \param bdev Block device to query. + * \return true if block device has a volatile write cache enabled. + * + * If this function returns true, written data may not be persistent until a flush command + * is issued. + */ +bool spdk_bdev_has_write_cache(const struct spdk_bdev *bdev); + +/** + * Get a bdev's UUID. + * + * \param bdev Block device to query. + * \return Pointer to UUID. + * + * All bdevs will have a UUID, but not all UUIDs will be persistent across + * application runs. + */ +const struct spdk_uuid *spdk_bdev_get_uuid(const struct spdk_bdev *bdev); + +/** + * Get block device atomic compare and write unit. + * + * \param bdev Block device to query. + * \return Atomic compare and write unit for this bdev in blocks. + */ +uint16_t spdk_bdev_get_acwu(const struct spdk_bdev *bdev); + +/** + * Get block device metadata size. + * + * \param bdev Block device to query. + * \return Size of metadata for this bdev in bytes. + */ +uint32_t spdk_bdev_get_md_size(const struct spdk_bdev *bdev); + +/** + * Query whether metadata is interleaved with block data or separated + * with block data. + * + * \param bdev Block device to query. + * \return true if metadata is interleaved with block data or false + * if metadata is separated with block data. + * + * Note this function is valid only if there is metadata. + */ +bool spdk_bdev_is_md_interleaved(const struct spdk_bdev *bdev); + +/** + * Query whether metadata is interleaved with block data or separated + * from block data. + * + * \param bdev Block device to query. + * \return true if metadata is separated from block data, false + * otherwise. + * + * Note this function is valid only if there is metadata. + */ +bool spdk_bdev_is_md_separate(const struct spdk_bdev *bdev); + +/** + * Checks if bdev supports zoned namespace semantics. + * + * \param bdev Block device to query. + * \return true if device supports zoned namespace sementics. + */ +bool spdk_bdev_is_zoned(const struct spdk_bdev *bdev); + +/** + * Get block device data block size. + * + * Data block size is equal to block size if there is no metadata or + * metadata is separated with block data, or equal to block size minus + * metadata size if there is metadata and it is interleaved with + * block data. + * + * \param bdev Block device to query. + * \return Size of data block for this bdev in bytes. + */ +uint32_t spdk_bdev_get_data_block_size(const struct spdk_bdev *bdev); + +/** + * Get DIF type of the block device. + * + * \param bdev Block device to query. + * \return DIF type of the block device. + */ +enum spdk_dif_type spdk_bdev_get_dif_type(const struct spdk_bdev *bdev); + +/** + * Check whether DIF is set in the first 8 bytes or the last 8 bytes of metadata. + * + * \param bdev Block device to query. + * \return true if DIF is set in the first 8 bytes of metadata, or false + * if DIF is set in the last 8 bytes of metadata. + * + * Note that this function is valid only if DIF type is not SPDK_DIF_DISABLE. + */ +bool spdk_bdev_is_dif_head_of_md(const struct spdk_bdev *bdev); + +/** + * Check whether the DIF check type is enabled. + * + * \param bdev Block device to query. + * \param check_type The specific DIF check type. + * \return true if enabled, false otherwise. + */ +bool spdk_bdev_is_dif_check_enabled(const struct spdk_bdev *bdev, + enum spdk_dif_check_type check_type); + +/** + * Get the most recently measured queue depth from a bdev. + * + * The reported queue depth is the aggregate of outstanding I/O + * across all open channels associated with this bdev. + * + * \param bdev Block device to query. + * + * \return The most recent queue depth measurement for the bdev. + * If tracking is not enabled, the function will return UINT64_MAX + * It is also possible to receive UINT64_MAX after enabling tracking + * but before the first period has expired. + */ +uint64_t +spdk_bdev_get_qd(const struct spdk_bdev *bdev); + +/** + * Get the queue depth polling period. + * + * The return value of this function is only valid if the bdev's + * queue depth tracking status is set to true. + * + * \param bdev Block device to query. + * + * \return The period at which this bdev's gueue depth is being refreshed. + */ +uint64_t +spdk_bdev_get_qd_sampling_period(const struct spdk_bdev *bdev); + +/** + * Enable or disable queue depth sampling for this bdev. + * + * Enables queue depth sampling when period is greater than 0. Disables it when the period + * is equal to zero. The resulting queue depth is stored in the spdk_bdev object as + * measured_queue_depth. + * + * \param bdev Block device on which to enable queue depth tracking. + * \param period The period at which to poll this bdev's queue depth. If this is set + * to zero, polling will be disabled. + */ +void spdk_bdev_set_qd_sampling_period(struct spdk_bdev *bdev, uint64_t period); + +/** + * Get the time spent processing IO for this device. + * + * This value is dependent upon the queue depth sampling period and is + * incremented at sampling time by the sampling period only if the measured + * queue depth is greater than 0. + * + * The disk utilization can be calculated by the following formula: + * disk_util = (io_time_2 - io_time_1) / elapsed_time. + * The user is responsible for tracking the elapsed time between two measurements. + * + * \param bdev Block device to query. + * + * \return The io time for this device in microseconds. + */ +uint64_t spdk_bdev_get_io_time(const struct spdk_bdev *bdev); + +/** + * Get the weighted IO processing time for this bdev. + * + * This value is dependent upon the queue depth sampling period and is + * equal to the time spent reading from or writing to a device times + * the measured queue depth during each sampling period. + * + * The average queue depth can be calculated by the following formula: + * queue_depth = (weighted_io_time_2 - weighted_io_time_1) / elapsed_time. + * The user is responsible for tracking the elapsed time between two measurements. + * + * \param bdev Block device to query. + * + * \return The weighted io time for this device in microseconds. + */ +uint64_t spdk_bdev_get_weighted_io_time(const struct spdk_bdev *bdev); + +/** + * Obtain an I/O channel for the block device opened by the specified + * descriptor. I/O channels are bound to threads, so the resulting I/O + * channel may only be used from the thread it was originally obtained + * from. + * + * \param desc Block device descriptor. + * + * \return A handle to the I/O channel or NULL on failure. + */ +struct spdk_io_channel *spdk_bdev_get_io_channel(struct spdk_bdev_desc *desc); + +/** + * \defgroup bdev_io_submit_functions bdev I/O Submit Functions + * + * These functions submit a new I/O request to a bdev. The I/O request will + * be represented by an spdk_bdev_io structure allocated from a global pool. + * These functions will return -ENOMEM if the spdk_bdev_io pool is empty. + */ + +/** + * Submit a read request to the bdev on the given channel. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param buf Data buffer to read into. + * \param offset The offset, in bytes, from the start of the block device. + * \param nbytes The number of bytes to read. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset and/or nbytes are not aligned or out of range + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, uint64_t offset, uint64_t nbytes, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a read request to the bdev on the given channel. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param buf Data buffer to read into. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to read. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a read request to the bdev on the given channel. This function uses + * separate buffer for metadata transfer (valid only if bdev supports this + * mode). + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param buf Data buffer to read into. + * \param md Metadata buffer. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to read. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate + * metadata is not supported + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, void *md, int64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a read request to the bdev on the given channel. This differs from + * spdk_bdev_read by allowing the data buffer to be described in a scatter + * gather list. Some physical devices place memory alignment requirements on + * data and may not be able to directly transfer into the buffers provided. In + * this case, the request may fail. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param iov A scatter gather list of buffers to be read into. + * \param iovcnt The number of elements in iov. + * \param offset The offset, in bytes, from the start of the block device. + * \param nbytes The number of bytes to read. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset and/or nbytes are not aligned or out of range + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, + uint64_t offset, uint64_t nbytes, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a read request to the bdev on the given channel. This differs from + * spdk_bdev_read by allowing the data buffer to be described in a scatter + * gather list. Some physical devices place memory alignment requirements on + * data and may not be able to directly transfer into the buffers provided. In + * this case, the request may fail. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param iov A scatter gather list of buffers to be read into. + * \param iovcnt The number of elements in iov. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to read. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a read request to the bdev on the given channel. This differs from + * spdk_bdev_read by allowing the data buffer to be described in a scatter + * gather list. Some physical devices place memory alignment requirements on + * data or metadata and may not be able to directly transfer into the buffers + * provided. In this case, the request may fail. This function uses separate + * buffer for metadata transfer (valid only if bdev supports this mode). + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param iov A scatter gather list of buffers to be read into. + * \param iovcnt The number of elements in iov. + * \param md Metadata buffer. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to read. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate + * metadata is not supported + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a write request to the bdev on the given channel. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param buf Data buffer to written from. + * \param offset The offset, in bytes, from the start of the block device. + * \param nbytes The number of bytes to write. buf must be greater than or equal to this size. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset and/or nbytes are not aligned or out of range + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_write(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, uint64_t offset, uint64_t nbytes, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a write request to the bdev on the given channel. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param buf Data buffer to written from. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to write. buf must be greater than or equal to this size. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a write request to the bdev on the given channel. This function uses + * separate buffer for metadata transfer (valid only if bdev supports this + * mode). + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param buf Data buffer to written from. + * \param md Metadata buffer. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to write. buf must be greater than or equal to this size. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate + * metadata is not supported + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, void *md, uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a write request to the bdev on the given channel. This differs from + * spdk_bdev_write by allowing the data buffer to be described in a scatter + * gather list. Some physical devices place memory alignment requirements on + * data and may not be able to directly transfer out of the buffers provided. In + * this case, the request may fail. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param iov A scatter gather list of buffers to be written from. + * \param iovcnt The number of elements in iov. + * \param offset The offset, in bytes, from the start of the block device. + * \param len The size of data to write. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset and/or nbytes are not aligned or out of range + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_writev(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, + uint64_t offset, uint64_t len, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a write request to the bdev on the given channel. This differs from + * spdk_bdev_write by allowing the data buffer to be described in a scatter + * gather list. Some physical devices place memory alignment requirements on + * data and may not be able to directly transfer out of the buffers provided. In + * this case, the request may fail. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param iov A scatter gather list of buffers to be written from. + * \param iovcnt The number of elements in iov. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to write. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a write request to the bdev on the given channel. This differs from + * spdk_bdev_write by allowing the data buffer to be described in a scatter + * gather list. Some physical devices place memory alignment requirements on + * data or metadata and may not be able to directly transfer out of the buffers + * provided. In this case, the request may fail. This function uses separate + * buffer for metadata transfer (valid only if bdev supports this mode). + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param iov A scatter gather list of buffers to be written from. + * \param iovcnt The number of elements in iov. + * \param md Metadata buffer. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to write. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate + * metadata is not supported + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a compare request to the bdev on the given channel. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param buf Data buffer to compare to. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to compare. buf must be greater than or equal to this size. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_compare_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a compare request to the bdev on the given channel. This function uses + * separate buffer for metadata transfer (valid only if bdev supports this + * mode). + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param buf Data buffer to compare to. + * \param md Metadata buffer. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to compare. buf must be greater than or equal to this size. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate + * metadata is not supported + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_compare_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, void *md, uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a compare request to the bdev on the given channel. This differs from + * spdk_bdev_compare by allowing the data buffer to be described in a scatter + * gather list. Some physical devices place memory alignment requirements on + * data and may not be able to directly transfer out of the buffers provided. In + * this case, the request may fail. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param iov A scatter gather list of buffers to be compared to. + * \param iovcnt The number of elements in iov. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to compare. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_comparev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a compare request to the bdev on the given channel. This differs from + * spdk_bdev_compare by allowing the data buffer to be described in a scatter + * gather list. Some physical devices place memory alignment requirements on + * data or metadata and may not be able to directly transfer out of the buffers + * provided. In this case, the request may fail. This function uses separate + * buffer for metadata transfer (valid only if bdev supports this mode). + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param iov A scatter gather list of buffers to be compared to. + * \param iovcnt The number of elements in iov. + * \param md Metadata buffer. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to compare. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate + * metadata is not supported + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_comparev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit an atomic compare-and-write request to the bdev on the given channel. + * For bdevs that do not natively support atomic compare-and-write, the bdev layer + * will quiesce I/O to the specified LBA range, before performing the read, + * compare and write operations. + * + * Currently this supports compare-and-write of only one block. + * + * The data buffers for both the compare and write operations are described in a + * scatter gather list. Some physical devices place memory alignment requirements on + * data and may not be able to directly transfer out of the buffers provided. In + * this case, the request may fail. + * + * spdk_bdev_io_get_nvme_fused_status() function should be called in callback function + * to get status for the individual operation. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param compare_iov A scatter gather list of buffers to be compared. + * \param compare_iovcnt The number of elements in compare_iov. + * \param write_iov A scatter gather list of buffers to be written if the compare is + * successful. + * \param write_iovcnt The number of elements in write_iov. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to compare-and-write. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_comparev_and_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *compare_iov, int compare_iovcnt, + struct iovec *write_iov, int write_iovcnt, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a request to acquire a data buffer that represents the given + * range of blocks. The data buffer is placed in the spdk_bdev_io structure + * and can be obtained by calling spdk_bdev_io_get_iovec(). + * + * \param desc Block device descriptor + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks. + * \param populate Whether the data buffer should be populated with the + * data at the given blocks. Populating the data buffer can + * be skipped if the user writes new data to the entire buffer. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + */ +int spdk_bdev_zcopy_start(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + uint64_t offset_blocks, uint64_t num_blocks, + bool populate, + spdk_bdev_io_completion_cb cb, void *cb_arg); + + +/** + * Submit a request to release a data buffer representing a range of blocks. + * + * \param bdev_io I/O request returned in the completion callback of spdk_bdev_zcopy_start(). + * \param commit Whether to commit the data in the buffers to the blocks before releasing. + * The data does not need to be committed if it was not modified. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + */ +int spdk_bdev_zcopy_end(struct spdk_bdev_io *bdev_io, bool commit, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a write zeroes request to the bdev on the given channel. This command + * ensures that all bytes in the specified range are set to 00h + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param offset The offset, in bytes, from the start of the block device. + * \param len The size of data to zero. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset and/or nbytes are not aligned or out of range + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_write_zeroes(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + uint64_t offset, uint64_t len, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a write zeroes request to the bdev on the given channel. This command + * ensures that all bytes in the specified range are set to 00h + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to zero. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_write_zeroes_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit an unmap request to the block device. Unmap is sometimes also called trim or + * deallocate. This notifies the device that the data in the blocks described is no + * longer valid. Reading blocks that have been unmapped results in indeterminate data. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param offset The offset, in bytes, from the start of the block device. + * \param nbytes The number of bytes to unmap. Must be a multiple of the block size. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset and/or nbytes are not aligned or out of range + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + uint64_t offset, uint64_t nbytes, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit an unmap request to the block device. Unmap is sometimes also called trim or + * deallocate. This notifies the device that the data in the blocks described is no + * longer valid. Reading blocks that have been unmapped results in indeterminate data. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to unmap. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_unmap_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a flush request to the bdev on the given channel. For devices with volatile + * caches, data is not guaranteed to be persistent until the completion of a flush + * request. Call spdk_bdev_has_write_cache() to check if the bdev has a volatile cache. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param offset The offset, in bytes, from the start of the block device. + * \param length The number of bytes. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset and/or nbytes are not aligned or out of range + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_flush(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + uint64_t offset, uint64_t length, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a flush request to the bdev on the given channel. For devices with volatile + * caches, data is not guaranteed to be persistent until the completion of a flush + * request. Call spdk_bdev_has_write_cache() to check if the bdev has a volatile cache. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_flush_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a reset request to the bdev on the given channel. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_reset(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit abort requests to abort all I/Os which has bio_cb_arg as its callback + * context to the bdev on the given channel. + * + * This goes all the way down to the bdev driver module and attempts to abort all + * I/Os which have bio_cb_arg as their callback context if they exist. This is a best + * effort command. Upon completion of this, the status SPDK_BDEV_IO_STATUS_SUCCESS + * indicates all the I/Os were successfully aborted, or the status + * SPDK_BDEV_IO_STATUS_FAILED indicates any I/O was failed to abort for any reason + * or no I/O which has bio_cb_arg as its callback context was found. + * + * \ingroup bdev_io_submit functions + * + * \param desc Block device descriptor. + * \param ch The I/O channel which the I/Os to be aborted are associated with. + * \param bio_cb_arg Callback argument for the outstanding requests which this + * function attempts to abort. + * \param cb Called when the abort request is completed. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always be called (even if the + * request ultimately failed). Return negated errno on failure, in which case the + * callback will not be called. + * * -EINVAL - bio_cb_arg was not specified. + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated. + * * -ENOTSUP - the bdev does not support abort. + */ +int spdk_bdev_abort(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *bio_cb_arg, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit an NVMe Admin command to the bdev. This passes directly through + * the block layer to the device. Support for NVMe passthru is optional, + * indicated by calling spdk_bdev_io_type_supported(). + * + * The SGL/PRP will be automated generated based on the given buffer, + * so that portion of the command may be left empty. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param cmd The raw NVMe command. Must be an admin command. + * \param buf Data buffer to written from. + * \param nbytes The number of bytes to transfer. buf must be greater than or equal to this size. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_nvme_admin_passthru(struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, + const struct spdk_nvme_cmd *cmd, + void *buf, size_t nbytes, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit an NVMe I/O command to the bdev. This passes directly through + * the block layer to the device. Support for NVMe passthru is optional, + * indicated by calling spdk_bdev_io_type_supported(). + * + * \ingroup bdev_io_submit_functions + * + * The SGL/PRP will be automated generated based on the given buffer, + * so that portion of the command may be left empty. Also, the namespace + * id (nsid) will be populated automatically. + * + * \param bdev_desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param cmd The raw NVMe command. Must be in the NVM command set. + * \param buf Data buffer to written from. + * \param nbytes The number of bytes to transfer. buf must be greater than or equal to this size. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_nvme_io_passthru(struct spdk_bdev_desc *bdev_desc, + struct spdk_io_channel *ch, + const struct spdk_nvme_cmd *cmd, + void *buf, size_t nbytes, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit an NVMe I/O command to the bdev. This passes directly through + * the block layer to the device. Support for NVMe passthru is optional, + * indicated by calling spdk_bdev_io_type_supported(). + * + * \ingroup bdev_io_submit_functions + * + * The SGL/PRP will be automated generated based on the given buffer, + * so that portion of the command may be left empty. Also, the namespace + * id (nsid) will be populated automatically. + * + * \param bdev_desc Block device descriptor + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param cmd The raw NVMe command. Must be in the NVM command set. + * \param buf Data buffer to written from. + * \param nbytes The number of bytes to transfer. buf must be greater than or equal to this size. + * \param md_buf Meta data buffer to written from. + * \param md_len md_buf size to transfer. md_buf must be greater than or equal to this size. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_nvme_io_passthru_md(struct spdk_bdev_desc *bdev_desc, + struct spdk_io_channel *ch, + const struct spdk_nvme_cmd *cmd, + void *buf, size_t nbytes, void *md_buf, size_t md_len, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Free an I/O request. This should only be called after the completion callback + * for the I/O has been called and notifies the bdev layer that memory may now + * be released. + * + * \param bdev_io I/O request. + */ +void spdk_bdev_free_io(struct spdk_bdev_io *bdev_io); + +/** + * Block device I/O wait callback + * + * Callback function to notify when an spdk_bdev_io structure is available + * to satisfy a call to one of the @ref bdev_io_submit_functions. + */ +typedef void (*spdk_bdev_io_wait_cb)(void *cb_arg); + +/** + * Structure to register a callback when an spdk_bdev_io becomes available. + */ +struct spdk_bdev_io_wait_entry { + struct spdk_bdev *bdev; + spdk_bdev_io_wait_cb cb_fn; + void *cb_arg; + TAILQ_ENTRY(spdk_bdev_io_wait_entry) link; +}; + +/** + * Add an entry into the calling thread's queue to be notified when an + * spdk_bdev_io becomes available. + * + * When one of the @ref bdev_io_submit_functions returns -ENOMEM, it means + * the spdk_bdev_io buffer pool has no available buffers. This function may + * be called to register a callback to be notified when a buffer becomes + * available on the calling thread. + * + * The callback function will always be called on the same thread as this + * function was called. + * + * This function must only be called immediately after one of the + * @ref bdev_io_submit_functions returns -ENOMEM. + * + * \param bdev Block device. The block device that the caller will submit + * an I/O to when the callback is invoked. Must match the bdev + * member in the entry parameter. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param entry Data structure allocated by the caller specifying the callback + * function and argument. + * + * \return 0 on success. + * -EINVAL if bdev parameter does not match bdev member in entry + * -EINVAL if an spdk_bdev_io structure was available on this thread. + */ +int spdk_bdev_queue_io_wait(struct spdk_bdev *bdev, struct spdk_io_channel *ch, + struct spdk_bdev_io_wait_entry *entry); + +/** + * Return I/O statistics for this channel. + * + * \param bdev Block device. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param stat The per-channel statistics. + * + */ +void spdk_bdev_get_io_stat(struct spdk_bdev *bdev, struct spdk_io_channel *ch, + struct spdk_bdev_io_stat *stat); + + +/** + * Return I/O statistics for this bdev. All the required information will be passed + * via the callback function. + * + * \param bdev Block device to query. + * \param stat Structure for aggregating collected statistics. Passed as argument to cb. + * \param cb Called when this operation completes. + * \param cb_arg Argument passed to callback function. + */ +void spdk_bdev_get_device_stat(struct spdk_bdev *bdev, struct spdk_bdev_io_stat *stat, + spdk_bdev_get_device_stat_cb cb, void *cb_arg); + +/** + * Get the status of bdev_io as an NVMe status code and command specific + * completion queue value. + * + * \param bdev_io I/O to get the status from. + * \param cdw0 Command specific completion queue value + * \param sct Status Code Type return value, as defined by the NVMe specification. + * \param sc Status Code return value, as defined by the NVMe specification. + */ +void spdk_bdev_io_get_nvme_status(const struct spdk_bdev_io *bdev_io, uint32_t *cdw0, int *sct, + int *sc); + +/** + * Get the status of bdev_io as an NVMe status codes and command specific + * completion queue value for fused operations such as compare-and-write. + * + * \param bdev_io I/O to get the status from. + * \param cdw0 Command specific completion queue value + * \param first_sct Status Code Type return value for the first operation, as defined by the NVMe specification. + * \param first_sc Status Code return value for the first operation, as defined by the NVMe specification. + * \param second_sct Status Code Type return value for the second operation, as defined by the NVMe specification. + * \param second_sc Status Code return value for the second operation, as defined by the NVMe specification. + */ +void spdk_bdev_io_get_nvme_fused_status(const struct spdk_bdev_io *bdev_io, uint32_t *cdw0, + int *first_sct, int *first_sc, int *second_sct, int *second_sc); + +/** + * Get the status of bdev_io as a SCSI status code. + * + * \param bdev_io I/O to get the status from. + * \param sc SCSI Status Code. + * \param sk SCSI Sense Key. + * \param asc SCSI Additional Sense Code. + * \param ascq SCSI Additional Sense Code Qualifier. + */ +void spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io, + int *sc, int *sk, int *asc, int *ascq); + +/** + * Get the iovec describing the data buffer of a bdev_io. + * + * \param bdev_io I/O to describe with iovec. + * \param iovp Pointer to be filled with iovec. + * \param iovcntp Pointer to be filled with number of iovec entries. + */ +void spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp); + +/** + * Get metadata buffer. Only makes sense if the IO uses separate buffer for + * metadata transfer. + * + * \param bdev_io I/O to retrieve the buffer from. + * \return Pointer to metadata buffer, NULL if the IO doesn't use separate + * buffer for metadata transfer. + */ +void *spdk_bdev_io_get_md_buf(struct spdk_bdev_io *bdev_io); + +/** + * Get the callback argument of bdev_io to abort it by spdk_bdev_abort. + * + * \param bdev_io I/O to get the callback argument from. + * \return Callback argument of bdev_io. + */ +void *spdk_bdev_io_get_cb_arg(struct spdk_bdev_io *bdev_io); + +typedef void (*spdk_bdev_histogram_status_cb)(void *cb_arg, int status); +typedef void (*spdk_bdev_histogram_data_cb)(void *cb_arg, int status, + struct spdk_histogram_data *histogram); + +/** + * Enable or disable collecting histogram data on a bdev. + * + * \param bdev Block device. + * \param cb_fn Callback function to be called when histograms are enabled. + * \param cb_arg Argument to pass to cb_fn. + * \param enable Enable/disable flag + */ +void spdk_bdev_histogram_enable(struct spdk_bdev *bdev, spdk_bdev_histogram_status_cb cb_fn, + void *cb_arg, bool enable); + +/** + * Get aggregated histogram data from a bdev. Callback provides merged histogram + * for specified bdev. + * + * \param bdev Block device. + * \param histogram Histogram for aggregated data + * \param cb_fn Callback function to be called with data collected on bdev. + * \param cb_arg Argument to pass to cb_fn. + */ +void spdk_bdev_histogram_get(struct spdk_bdev *bdev, struct spdk_histogram_data *histogram, + spdk_bdev_histogram_data_cb cb_fn, + void *cb_arg); + +/** + * Retrieves media events. Can only be called from the context of + * SPDK_BDEV_EVENT_MEDIA_MANAGEMENT event callback. These events are sent by + * devices exposing raw access to the physical medium (e.g. Open Channel SSD). + * + * \param bdev_desc Block device descriptor + * \param events Array of media mangement event descriptors + * \param max_events Size of the events array + * + * \return number of events retrieved + */ +size_t spdk_bdev_get_media_events(struct spdk_bdev_desc *bdev_desc, + struct spdk_bdev_media_event *events, size_t max_events); + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_BDEV_H_ */ diff --git a/src/spdk/include/spdk/bdev_module.h b/src/spdk/include/spdk/bdev_module.h new file mode 100644 index 000000000..edf967897 --- /dev/null +++ b/src/spdk/include/spdk/bdev_module.h @@ -0,0 +1,1219 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Block Device Module Interface + * + * For information on how to write a bdev module, see @ref bdev_module. + */ + +#ifndef SPDK_BDEV_MODULE_H +#define SPDK_BDEV_MODULE_H + +#include "spdk/stdinc.h" + +#include "spdk/bdev.h" +#include "spdk/bdev_zone.h" +#include "spdk/queue.h" +#include "spdk/scsi_spec.h" +#include "spdk/thread.h" +#include "spdk/util.h" +#include "spdk/uuid.h" + +/** Block device module */ +struct spdk_bdev_module { + /** + * Initialization function for the module. Called by the spdk + * application during startup. + * + * Modules are required to define this function. + */ + int (*module_init)(void); + + /** + * Optional callback for modules that require notification of when + * the bdev subsystem has completed initialization. + * + * Modules are not required to define this function. + */ + void (*init_complete)(void); + + /** + * Optional callback for modules that require notification of when + * the bdev subsystem is starting the fini process. + * + * Modules are not required to define this function. + */ + void (*fini_start)(void); + + /** + * Finish function for the module. Called by the spdk application + * after all bdevs for all modules have been unregistered. This allows + * the module to do any final cleanup before the SPDK application exits. + * + * Modules are not required to define this function. + */ + void (*module_fini)(void); + + /** + * Function called to return a text string representing the + * module's configuration options for inclusion in a configuration file. + */ + void (*config_text)(FILE *fp); + + /** + * Function called to return a text string representing the module-level + * JSON RPCs required to regenerate the current configuration. This will + * include module-level configuration options, or methods to construct + * bdevs when one RPC may generate multiple bdevs (for example, an NVMe + * controller with multiple namespaces). + * + * Per-bdev JSON RPCs (where one "construct" RPC always creates one bdev) + * may be implemented here, or by the bdev's write_config_json function - + * but not both. Bdev module implementers may choose which mechanism to + * use based on the module's design. + * + * \return 0 on success or Bdev specific negative error code. + */ + int (*config_json)(struct spdk_json_write_ctx *w); + + /** Name for the modules being defined. */ + const char *name; + + /** + * Returns the allocation size required for the backend for uses such as local + * command structs, local SGL, iovecs, or other user context. + */ + int (*get_ctx_size)(void); + + /** + * First notification that a bdev should be examined by a virtual bdev module. + * Virtual bdev modules may use this to examine newly-added bdevs and automatically + * create their own vbdevs, but no I/O to device can be send to bdev at this point. + * Only vbdevs based on config files can be created here. This callback must make + * its decision to claim the module synchronously. + * It must also call spdk_bdev_module_examine_done() before returning. If the module + * needs to perform asynchronous operations such as I/O after claiming the bdev, + * it may define an examine_disk callback. The examine_disk callback will then + * be called immediately after the examine_config callback returns. + */ + void (*examine_config)(struct spdk_bdev *bdev); + + /** + * Second notification that a bdev should be examined by a virtual bdev module. + * Virtual bdev modules may use this to examine newly-added bdevs and automatically + * create their own vbdevs. This callback may use I/O operations end finish asynchronously. + */ + void (*examine_disk)(struct spdk_bdev *bdev); + + /** + * Denotes if the module_init function may complete asynchronously. If set to true, + * the module initialization has to be explicitly completed by calling + * spdk_bdev_module_init_done(). + */ + bool async_init; + + /** + * Denotes if the module_fini function may complete asynchronously. + * If set to true finishing has to be explicitly completed by calling + * spdk_bdev_module_fini_done(). + */ + bool async_fini; + + /** + * Fields that are used by the internal bdev subsystem. Bdev modules + * must not read or write to these fields. + */ + struct __bdev_module_internal_fields { + /** + * Count of bdev inits/examinations in progress. Used by generic bdev + * layer and must not be modified by bdev modules. + * + * \note Used internally by bdev subsystem, don't change this value in bdev module. + */ + uint32_t action_in_progress; + + TAILQ_ENTRY(spdk_bdev_module) tailq; + } internal; +}; + +typedef void (*spdk_bdev_unregister_cb)(void *cb_arg, int rc); + +/** + * Function table for a block device backend. + * + * The backend block device function table provides a set of APIs to allow + * communication with a backend. The main commands are read/write API + * calls for I/O via submit_request. + */ +struct spdk_bdev_fn_table { + /** Destroy the backend block device object */ + int (*destruct)(void *ctx); + + /** Process the IO. */ + void (*submit_request)(struct spdk_io_channel *ch, struct spdk_bdev_io *); + + /** Check if the block device supports a specific I/O type. */ + bool (*io_type_supported)(void *ctx, enum spdk_bdev_io_type); + + /** Get an I/O channel for the specific bdev for the calling thread. */ + struct spdk_io_channel *(*get_io_channel)(void *ctx); + + /** + * Output driver-specific information to a JSON stream. Optional - may be NULL. + * + * The JSON write context will be initialized with an open object, so the bdev + * driver should write a name (based on the driver name) followed by a JSON value + * (most likely another nested object). + */ + int (*dump_info_json)(void *ctx, struct spdk_json_write_ctx *w); + + /** + * Output bdev-specific RPC configuration to a JSON stream. Optional - may be NULL. + * + * This function should only be implemented for bdevs which can be configured + * independently of other bdevs. For example, RPCs to create a bdev for an NVMe + * namespace may not be generated by this function, since enumerating an NVMe + * namespace requires attaching to an NVMe controller, and that controller may + * contain multiple namespaces. The spdk_bdev_module's config_json function should + * be used instead for these cases. + * + * The JSON write context will be initialized with an open object, so the bdev + * driver should write all data necessary to recreate this bdev by invoking + * constructor method. No other data should be written. + */ + void (*write_config_json)(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w); + + /** Get spin-time per I/O channel in microseconds. + * Optional - may be NULL. + */ + uint64_t (*get_spin_time)(struct spdk_io_channel *ch); +}; + +/** bdev I/O completion status */ +enum spdk_bdev_io_status { + SPDK_BDEV_IO_STATUS_ABORTED = -7, + SPDK_BDEV_IO_STATUS_FIRST_FUSED_FAILED = -6, + SPDK_BDEV_IO_STATUS_MISCOMPARE = -5, + /* + * NOMEM should be returned when a bdev module cannot start an I/O because of + * some lack of resources. It may not be returned for RESET I/O. I/O completed + * with NOMEM status will be retried after some I/O from the same channel have + * completed. + */ + SPDK_BDEV_IO_STATUS_NOMEM = -4, + SPDK_BDEV_IO_STATUS_SCSI_ERROR = -3, + SPDK_BDEV_IO_STATUS_NVME_ERROR = -2, + SPDK_BDEV_IO_STATUS_FAILED = -1, + SPDK_BDEV_IO_STATUS_PENDING = 0, + SPDK_BDEV_IO_STATUS_SUCCESS = 1, +}; + +struct spdk_bdev_alias { + char *alias; + TAILQ_ENTRY(spdk_bdev_alias) tailq; +}; + +typedef TAILQ_HEAD(, spdk_bdev_io) bdev_io_tailq_t; +typedef STAILQ_HEAD(, spdk_bdev_io) bdev_io_stailq_t; +typedef TAILQ_HEAD(, lba_range) lba_range_tailq_t; + +struct spdk_bdev { + /** User context passed in by the backend */ + void *ctxt; + + /** Unique name for this block device. */ + char *name; + + /** Unique aliases for this block device. */ + TAILQ_HEAD(spdk_bdev_aliases_list, spdk_bdev_alias) aliases; + + /** Unique product name for this kind of block device. */ + char *product_name; + + /** write cache enabled, not used at the moment */ + int write_cache; + + /** Size in bytes of a logical block for the backend */ + uint32_t blocklen; + + /** Number of blocks */ + uint64_t blockcnt; + + /** Number of blocks required for write */ + uint32_t write_unit_size; + + /** Atomic compare & write unit */ + uint16_t acwu; + + /** + * Specifies an alignment requirement for data buffers associated with an spdk_bdev_io. + * 0 = no alignment requirement + * >0 = alignment requirement is 2 ^ required_alignment. + * bdev layer will automatically double buffer any spdk_bdev_io that violates this + * alignment, before the spdk_bdev_io is submitted to the bdev module. + */ + uint8_t required_alignment; + + /** + * Specifies whether the optimal_io_boundary is mandatory or + * only advisory. If set to true, the bdev layer will split + * READ and WRITE I/O that span the optimal_io_boundary before + * submitting them to the bdev module. + * + * Note that this field cannot be used to force splitting of + * UNMAP, WRITE_ZEROES or FLUSH I/O. + */ + bool split_on_optimal_io_boundary; + + /** + * Optimal I/O boundary in blocks, or 0 for no value reported. + */ + uint32_t optimal_io_boundary; + + /** + * UUID for this bdev. + * + * Fill with zeroes if no uuid is available. The bdev layer + * will automatically populate this if necessary. + */ + struct spdk_uuid uuid; + + /** Size in bytes of a metadata for the backend */ + uint32_t md_len; + + /** + * Specify metadata location and set to true if metadata is interleaved + * with block data or false if metadata is separated with block data. + * + * Note that this field is valid only if there is metadata. + */ + bool md_interleave; + + /** + * DIF type for this bdev. + * + * Note that this field is valid only if there is metadata. + */ + enum spdk_dif_type dif_type; + + /* + * DIF location. + * + * Set to true if DIF is set in the first 8 bytes of metadata or false + * if DIF is set in the last 8 bytes of metadata. + * + * Note that this field is valid only if DIF is enabled. + */ + bool dif_is_head_of_md; + + /** + * Specify whether each DIF check type is enabled. + */ + uint32_t dif_check_flags; + + /** + * Specify whether bdev is zoned device. + */ + bool zoned; + + /** + * Default size of each zone (in blocks). + */ + uint64_t zone_size; + + /** + * Maximum number of open zones. + */ + uint32_t max_open_zones; + + /** + * Optimal number of open zones. + */ + uint32_t optimal_open_zones; + + /** + * Specifies whether bdev supports media management events. + */ + bool media_events; + + /** + * Pointer to the bdev module that registered this bdev. + */ + struct spdk_bdev_module *module; + + /** function table for all LUN ops */ + const struct spdk_bdev_fn_table *fn_table; + + /** Fields that are used internally by the bdev subsystem. Bdev modules + * must not read or write to these fields. + */ + struct __bdev_internal_fields { + /** Quality of service parameters */ + struct spdk_bdev_qos *qos; + + /** True if the state of the QoS is being modified */ + bool qos_mod_in_progress; + + /** Mutex protecting claimed */ + pthread_mutex_t mutex; + + /** The bdev status */ + enum spdk_bdev_status status; + + /** + * Pointer to the module that has claimed this bdev for purposes of creating virtual + * bdevs on top of it. Set to NULL if the bdev has not been claimed. + */ + struct spdk_bdev_module *claim_module; + + /** Callback function that will be called after bdev destruct is completed. */ + spdk_bdev_unregister_cb unregister_cb; + + /** Unregister call context */ + void *unregister_ctx; + + /** List of open descriptors for this block device. */ + TAILQ_HEAD(, spdk_bdev_desc) open_descs; + + TAILQ_ENTRY(spdk_bdev) link; + + /** points to a reset bdev_io if one is in progress. */ + struct spdk_bdev_io *reset_in_progress; + + /** poller for tracking the queue_depth of a device, NULL if not tracking */ + struct spdk_poller *qd_poller; + + /** period at which we poll for queue depth information */ + uint64_t period; + + /** used to aggregate queue depth while iterating across the bdev's open channels */ + uint64_t temporary_queue_depth; + + /** queue depth as calculated the last time the telemetry poller checked. */ + uint64_t measured_queue_depth; + + /** most recent value of ticks spent performing I/O. Used to calculate the weighted time doing I/O */ + uint64_t io_time; + + /** weighted time performing I/O. Equal to measured_queue_depth * period */ + uint64_t weighted_io_time; + + /** accumulated I/O statistics for previously deleted channels of this bdev */ + struct spdk_bdev_io_stat stat; + + /** histogram enabled on this bdev */ + bool histogram_enabled; + bool histogram_in_progress; + + /** Currently locked ranges for this bdev. Used to populate new channels. */ + lba_range_tailq_t locked_ranges; + + /** Pending locked ranges for this bdev. These ranges are not currently + * locked due to overlapping with another locked range. + */ + lba_range_tailq_t pending_locked_ranges; + } internal; +}; + +/** + * Callback when buffer is allocated for the bdev I/O. + * + * \param ch The I/O channel the bdev I/O was handled on. + * \param bdev_io The bdev I/O + * \param success True if buffer is allocated successfully or the bdev I/O has an SGL + * assigned already, or false if it failed. The possible reason of failure is the size + * of the buffer to allocate is greater than the permitted maximum. + */ +typedef void (*spdk_bdev_io_get_buf_cb)(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, + bool success); + +/** + * Callback when an auxiliary buffer is allocated for the bdev I/O. + * + * \param ch The I/O channel the bdev I/O was handled on. + * \param bdev_io The bdev I/O + * \param aux_buf Pointer to the allocated buffer. NULL if there was a failuer such as + * the size of the buffer to allocate is greater than the permitted maximum. + */ +typedef void (*spdk_bdev_io_get_aux_buf_cb)(struct spdk_io_channel *ch, + struct spdk_bdev_io *bdev_io, void *aux_buf); + +#define BDEV_IO_NUM_CHILD_IOV 32 + +struct spdk_bdev_io { + /** The block device that this I/O belongs to. */ + struct spdk_bdev *bdev; + + /** Enumerated value representing the I/O type. */ + uint8_t type; + + /** Number of IO submission retries */ + uint16_t num_retries; + + /** A single iovec element for use by this bdev_io. */ + struct iovec iov; + + /** Array of iovecs used for I/O splitting. */ + struct iovec child_iov[BDEV_IO_NUM_CHILD_IOV]; + + union { + struct { + /** For SG buffer cases, array of iovecs to transfer. */ + struct iovec *iovs; + + /** For SG buffer cases, number of iovecs in iovec array. */ + int iovcnt; + + /** For fused operations such as COMPARE_AND_WRITE, array of iovecs + * for the second operation. + */ + struct iovec *fused_iovs; + + /** Number of iovecs in fused_iovs. */ + int fused_iovcnt; + + /* Metadata buffer */ + void *md_buf; + + /** Total size of data to be transferred. */ + uint64_t num_blocks; + + /** Starting offset (in blocks) of the bdev for this I/O. */ + uint64_t offset_blocks; + + /** stored user callback in case we split the I/O and use a temporary callback */ + spdk_bdev_io_completion_cb stored_user_cb; + + /** number of blocks remaining in a split i/o */ + uint64_t split_remaining_num_blocks; + + /** current offset of the split I/O in the bdev */ + uint64_t split_current_offset_blocks; + + /** count of outstanding batched split I/Os */ + uint32_t split_outstanding; + + struct { + /** Whether the buffer should be populated with the real data */ + uint8_t populate : 1; + + /** Whether the buffer should be committed back to disk */ + uint8_t commit : 1; + + /** True if this request is in the 'start' phase of zcopy. False if in 'end'. */ + uint8_t start : 1; + } zcopy; + + struct { + /** The callback argument for the outstanding request which this abort + * attempts to cancel. + */ + void *bio_cb_arg; + } abort; + } bdev; + struct { + /** Channel reference held while messages for this reset are in progress. */ + struct spdk_io_channel *ch_ref; + } reset; + struct { + /** The outstanding request matching bio_cb_arg which this abort attempts to cancel. */ + struct spdk_bdev_io *bio_to_abort; + } abort; + struct { + /* The NVMe command to execute */ + struct spdk_nvme_cmd cmd; + + /* The data buffer to transfer */ + void *buf; + + /* The number of bytes to transfer */ + size_t nbytes; + + /* The meta data buffer to transfer */ + void *md_buf; + + /* meta data buffer size to transfer */ + size_t md_len; + } nvme_passthru; + struct { + /* First logical block of a zone */ + uint64_t zone_id; + + /* Number of zones */ + uint32_t num_zones; + + /* Used to change zoned device zone state */ + enum spdk_bdev_zone_action zone_action; + + /* The data buffer */ + void *buf; + } zone_mgmt; + } u; + + /** It may be used by modules to put the bdev_io into its own list. */ + TAILQ_ENTRY(spdk_bdev_io) module_link; + + /** + * Fields that are used internally by the bdev subsystem. Bdev modules + * must not read or write to these fields. + */ + struct __bdev_io_internal_fields { + /** The bdev I/O channel that this was handled on. */ + struct spdk_bdev_channel *ch; + + /** The bdev I/O channel that this was submitted on. */ + struct spdk_bdev_channel *io_submit_ch; + + /** The bdev descriptor that was used when submitting this I/O. */ + struct spdk_bdev_desc *desc; + + /** User function that will be called when this completes */ + spdk_bdev_io_completion_cb cb; + + /** Context that will be passed to the completion callback */ + void *caller_ctx; + + /** Current tsc at submit time. Used to calculate latency at completion. */ + uint64_t submit_tsc; + + /** Error information from a device */ + union { + struct { + /** NVMe completion queue entry DW0 */ + uint32_t cdw0; + /** NVMe status code type */ + uint8_t sct; + /** NVMe status code */ + uint8_t sc; + } nvme; + /** Only valid when status is SPDK_BDEV_IO_STATUS_SCSI_ERROR */ + struct { + /** SCSI status code */ + uint8_t sc; + /** SCSI sense key */ + uint8_t sk; + /** SCSI additional sense code */ + uint8_t asc; + /** SCSI additional sense code qualifier */ + uint8_t ascq; + } scsi; + } error; + + /** + * Set to true while the bdev module submit_request function is in progress. + * + * This is used to decide whether spdk_bdev_io_complete() can complete the I/O directly + * or if completion must be deferred via an event. + */ + bool in_submit_request; + + /** Status for the IO */ + int8_t status; + + /** bdev allocated memory associated with this request */ + void *buf; + + /** requested size of the buffer associated with this I/O */ + uint64_t buf_len; + + /** if the request is double buffered, store original request iovs here */ + struct iovec bounce_iov; + struct iovec *orig_iovs; + int orig_iovcnt; + void *orig_md_buf; + + /** Callback for when the aux buf is allocated */ + spdk_bdev_io_get_aux_buf_cb get_aux_buf_cb; + + /** Callback for when buf is allocated */ + spdk_bdev_io_get_buf_cb get_buf_cb; + + /** Member used for linking child I/Os together. */ + TAILQ_ENTRY(spdk_bdev_io) link; + + /** Entry to the list need_buf of struct spdk_bdev. */ + STAILQ_ENTRY(spdk_bdev_io) buf_link; + + /** Entry to the list io_submitted of struct spdk_bdev_channel */ + TAILQ_ENTRY(spdk_bdev_io) ch_link; + + /** Enables queuing parent I/O when no bdev_ios available for split children. */ + struct spdk_bdev_io_wait_entry waitq_entry; + } internal; + + /** + * Per I/O context for use by the bdev module. + */ + uint8_t driver_ctx[0]; + + /* No members may be added after driver_ctx! */ +}; + +/** + * Register a new bdev. + * + * \param bdev Block device to register. + * + * \return 0 on success. + * \return -EINVAL if the bdev name is NULL. + * \return -EEXIST if a bdev or bdev alias with the same name already exists. + */ +int spdk_bdev_register(struct spdk_bdev *bdev); + +/** + * Start unregistering a bdev. This will notify each currently open descriptor + * on this bdev about the hotremoval in hopes that the upper layers will stop + * using this bdev and manually close all the descriptors with spdk_bdev_close(). + * The actual bdev unregistration may be deferred until all descriptors are closed. + * + * \param bdev Block device to unregister. + * \param cb_fn Callback function to be called when the unregister is complete. + * \param cb_arg Argument to be supplied to cb_fn + */ +void spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg); + +/** + * Invokes the unregister callback of a bdev backing a virtual bdev. + * + * A Bdev with an asynchronous destruct path should return 1 from its + * destruct function and call this function at the conclusion of that path. + * Bdevs with synchronous destruct paths should return 0 from their destruct + * path. + * + * \param bdev Block device that was destroyed. + * \param bdeverrno Error code returned from bdev's destruct callback. + */ +void spdk_bdev_destruct_done(struct spdk_bdev *bdev, int bdeverrno); + +/** + * Register a virtual bdev. + * + * This function is deprecated. Users should call spdk_bdev_register instead. + * The bdev layer currently makes no use of the base_bdevs array, so switching + * to spdk_bdev_register results in no loss of functionality. + * + * \param vbdev Virtual bdev to register. + * \param base_bdevs Array of bdevs upon which this vbdev is based. + * \param base_bdev_count Number of bdevs in base_bdevs. + * + * \return 0 on success + * \return -EINVAL if the bdev name is NULL. + * \return -EEXIST if the bdev already exists. + * \return -ENOMEM if allocation of the base_bdevs array or the base bdevs vbdevs array fails. + */ +int spdk_vbdev_register(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs, + int base_bdev_count); + +/** + * Indicate to the bdev layer that the module is done examining a bdev. + * + * To be called synchronously or asynchronously in response to the + * module's examine function being called. + * + * \param module Pointer to the module completing the examination. + */ +void spdk_bdev_module_examine_done(struct spdk_bdev_module *module); + +/** + * Indicate to the bdev layer that the module is done initializing. + * + * To be called synchronously or asynchronously in response to the + * module_init function being called. + * + * \param module Pointer to the module completing the initialization. + */ +void spdk_bdev_module_init_done(struct spdk_bdev_module *module); + +/** + * Indicate to the bdev layer that the module is done cleaning up. + * + * To be called either synchronously or asynchronously + * in response to the module_fini function being called. + * + */ +void spdk_bdev_module_finish_done(void); + +/** + * Called by a bdev module to lay exclusive write claim to a bdev. + * + * Also upgrades that bdev's descriptor to have write access. + * + * \param bdev Block device to be claimed. + * \param desc Descriptor for the above block device. + * \param module Bdev module attempting to claim bdev. + * + * \return 0 on success + * \return -EPERM if the bdev is already claimed by another module. + */ +int spdk_bdev_module_claim_bdev(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_bdev_module *module); + +/** + * Called to release a write claim on a block device. + * + * \param bdev Block device to be released. + */ +void spdk_bdev_module_release_bdev(struct spdk_bdev *bdev); + +/** + * Add alias to block device names list. + * Aliases can be add only to registered bdev. + * + * \param bdev Block device to query. + * \param alias Alias to be added to list. + * + * \return 0 on success + * \return -EEXIST if alias already exists as name or alias on any bdev + * \return -ENOMEM if memory cannot be allocated to store alias + * \return -EINVAL if passed alias is empty + */ +int spdk_bdev_alias_add(struct spdk_bdev *bdev, const char *alias); + +/** + * Removes name from block device names list. + * + * \param bdev Block device to query. + * \param alias Alias to be deleted from list. + * \return 0 on success + * \return -ENOENT if alias does not exists + */ +int spdk_bdev_alias_del(struct spdk_bdev *bdev, const char *alias); + +/** + * Removes all alias from block device alias list. + * + * \param bdev Block device to operate. + */ +void spdk_bdev_alias_del_all(struct spdk_bdev *bdev); + +/** + * Get pointer to block device aliases list. + * + * \param bdev Block device to query. + * \return Pointer to bdev aliases list. + */ +const struct spdk_bdev_aliases_list *spdk_bdev_get_aliases(const struct spdk_bdev *bdev); + +/** + * Allocate a buffer for given bdev_io. Allocation will happen + * only if the bdev_io has no assigned SGL yet or SGL is not + * aligned to \c bdev->required_alignment. If SGL is not aligned, + * this call will cause copy from SGL to bounce buffer on write + * path or copy from bounce buffer to SGL before completion + * callback on read path. The buffer will be freed automatically + * on \c spdk_bdev_free_io() call. This call will never fail. + * In case of lack of memory given callback \c cb will be deferred + * until enough memory is freed. + * + * \param bdev_io I/O to allocate buffer for. + * \param cb callback to be called when the buffer is allocated + * or the bdev_io has an SGL assigned already. + * \param len size of the buffer to allocate. In case the bdev_io + * doesn't have an SGL assigned this field must be no bigger than + * \c SPDK_BDEV_LARGE_BUF_MAX_SIZE. + */ +void spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, uint64_t len); + +/** + * Allocate an auxillary buffer for given bdev_io. The length of the + * buffer will be the same size as the bdev_io primary buffer. The buffer + * must be freed using \c spdk_bdev_io_put_aux_buf() before completing + * the associated bdev_io. This call will never fail. In case of lack of + * memory given callback \c cb will be deferred until enough memory is freed. + * + * \param bdev_io I/O to allocate buffer for. + * \param cb callback to be called when the buffer is allocated + */ +void spdk_bdev_io_get_aux_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_aux_buf_cb cb); + +/** + * Free an auxiliary buffer previously allocated by \c spdk_bdev_io_get_aux_buf(). + * + * \param bdev_io bdev_io specified when the aux_buf was allocated. + * \param aux_buf auxiliary buffer to free + */ +void spdk_bdev_io_put_aux_buf(struct spdk_bdev_io *bdev_io, void *aux_buf); + +/** + * Set the given buffer as the data buffer described by this bdev_io. + * + * The portion of the buffer used may be adjusted for memory alignement + * purposes. + * + * \param bdev_io I/O to set the buffer on. + * \param buf The buffer to set as the active data buffer. + * \param len The length of the buffer. + * + */ +void spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len); + +/** + * Set the given buffer as metadata buffer described by this bdev_io. + * + * \param bdev_io I/O to set the buffer on. + * \param md_buf The buffer to set as the active metadata buffer. + * \param len The length of the metadata buffer. + */ +void spdk_bdev_io_set_md_buf(struct spdk_bdev_io *bdev_io, void *md_buf, size_t len); + +/** + * Complete a bdev_io + * + * \param bdev_io I/O to complete. + * \param status The I/O completion status. + */ +void spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, + enum spdk_bdev_io_status status); + +/** + * Complete a bdev_io with an NVMe status code and DW0 completion queue entry + * + * \param bdev_io I/O to complete. + * \param cdw0 NVMe Completion Queue DW0 value (set to 0 if not applicable) + * \param sct NVMe Status Code Type. + * \param sc NVMe Status Code. + */ +void spdk_bdev_io_complete_nvme_status(struct spdk_bdev_io *bdev_io, uint32_t cdw0, int sct, + int sc); + +/** + * Complete a bdev_io with a SCSI status code. + * + * \param bdev_io I/O to complete. + * \param sc SCSI Status Code. + * \param sk SCSI Sense Key. + * \param asc SCSI Additional Sense Code. + * \param ascq SCSI Additional Sense Code Qualifier. + */ +void spdk_bdev_io_complete_scsi_status(struct spdk_bdev_io *bdev_io, enum spdk_scsi_status sc, + enum spdk_scsi_sense sk, uint8_t asc, uint8_t ascq); + +/** + * Get a thread that given bdev_io was submitted on. + * + * \param bdev_io I/O + * \return thread that submitted the I/O + */ +struct spdk_thread *spdk_bdev_io_get_thread(struct spdk_bdev_io *bdev_io); + +/** + * Get the bdev module's I/O channel that the given bdev_io was submitted on. + * + * \param bdev_io I/O + * \return the bdev module's I/O channel that the given bdev_io was submitted on. + */ +struct spdk_io_channel *spdk_bdev_io_get_io_channel(struct spdk_bdev_io *bdev_io); + +/** + * Resize for a bdev. + * + * Change number of blocks for provided block device. + * It can only be called on a registered bdev. + * + * \param bdev Block device to change. + * \param size New size of bdev. + * \return 0 on success, negated errno on failure. + */ +int spdk_bdev_notify_blockcnt_change(struct spdk_bdev *bdev, uint64_t size); + +/** + * Translates NVMe status codes to SCSI status information. + * + * The codes are stored in the user supplied integers. + * + * \param bdev_io I/O containing status codes to translate. + * \param sc SCSI Status Code will be stored here. + * \param sk SCSI Sense Key will be stored here. + * \param asc SCSI Additional Sense Code will be stored here. + * \param ascq SCSI Additional Sense Code Qualifier will be stored here. + */ +void spdk_scsi_nvme_translate(const struct spdk_bdev_io *bdev_io, + int *sc, int *sk, int *asc, int *ascq); + +/** + * Add the given module to the list of registered modules. + * This function should be invoked by referencing the macro + * SPDK_BDEV_MODULE_REGISTER in the module c file. + * + * \param bdev_module Module to be added. + */ +void spdk_bdev_module_list_add(struct spdk_bdev_module *bdev_module); + +/** + * Find registered module with name pointed by \c name. + * + * \param name name of module to be searched for. + * \return pointer to module or NULL if no module with \c name exist + */ +struct spdk_bdev_module *spdk_bdev_module_list_find(const char *name); + +static inline struct spdk_bdev_io * +spdk_bdev_io_from_ctx(void *ctx) +{ + return SPDK_CONTAINEROF(ctx, struct spdk_bdev_io, driver_ctx); +} + +struct spdk_bdev_part_base; + +/** + * Returns a pointer to the spdk_bdev associated with an spdk_bdev_part_base + * + * \param part_base A pointer to an spdk_bdev_part_base object. + * + * \return A pointer to the base's spdk_bdev struct. + */ +struct spdk_bdev *spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base *part_base); + +/** + * Returns a spdk_bdev name of the corresponding spdk_bdev_part_base + * + * \param part_base A pointer to an spdk_bdev_part_base object. + * + * \return A text string representing the name of the base bdev. + */ +const char *spdk_bdev_part_base_get_bdev_name(struct spdk_bdev_part_base *part_base); + +/** + * Returns a pointer to the spdk_bdev_descriptor associated with an spdk_bdev_part_base + * + * \param part_base A pointer to an spdk_bdev_part_base object. + * + * \return A pointer to the base's spdk_bdev_desc struct. + */ +struct spdk_bdev_desc *spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base *part_base); + +/** + * Returns a pointer to the tailq associated with an spdk_bdev_part_base + * + * \param part_base A pointer to an spdk_bdev_part_base object. + * + * \return The head of a tailq of spdk_bdev_part structs registered to the base's module. + */ +struct bdev_part_tailq *spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base *part_base); + +/** + * Returns a pointer to the module level context associated with an spdk_bdev_part_base + * + * \param part_base A pointer to an spdk_bdev_part_base object. + * + * \return A pointer to the module level context registered with the base in spdk_bdev_part_base_construct. + */ +void *spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base *part_base); + +typedef void (*spdk_bdev_part_base_free_fn)(void *ctx); + +struct spdk_bdev_part { + /* Entry into the module's global list of bdev parts */ + TAILQ_ENTRY(spdk_bdev_part) tailq; + + /** + * Fields that are used internally by part.c These fields should only + * be accessed from a module using any pertinent get and set methods. + */ + struct bdev_part_internal_fields { + + /* This part's corresponding bdev object. Not to be confused with the base bdev */ + struct spdk_bdev bdev; + + /* The base to which this part belongs */ + struct spdk_bdev_part_base *base; + + /* number of blocks from the start of the base bdev to the start of this part */ + uint64_t offset_blocks; + } internal; +}; + +struct spdk_bdev_part_channel { + struct spdk_bdev_part *part; + struct spdk_io_channel *base_ch; +}; + +typedef TAILQ_HEAD(bdev_part_tailq, spdk_bdev_part) SPDK_BDEV_PART_TAILQ; + +/** + * Free the base corresponding to one or more spdk_bdev_part. + * + * \param base The base to free. + */ +void spdk_bdev_part_base_free(struct spdk_bdev_part_base *base); + +/** + * Free an spdk_bdev_part context. + * + * \param part The part to free. + * + * \return 1 always. To indicate that the operation is asynchronous. + */ +int spdk_bdev_part_free(struct spdk_bdev_part *part); + +/** + * Calls spdk_bdev_unregister on the bdev for each part associated with base_bdev. + * + * \param part_base The part base object built on top of an spdk_bdev + * \param tailq The list of spdk_bdev_part bdevs associated with this base bdev. + */ +void spdk_bdev_part_base_hotremove(struct spdk_bdev_part_base *part_base, + struct bdev_part_tailq *tailq); + +/** + * Construct a new spdk_bdev_part_base on top of the provided bdev. + * + * \param bdev The spdk_bdev upon which this base will be built. + * \param remove_cb Function to be called upon hotremove of the bdev. + * \param module The module to which this bdev base belongs. + * \param fn_table Function table for communicating with the bdev backend. + * \param tailq The head of the list of all spdk_bdev_part structures registered to this base's module. + * \param free_fn User provided function to free base related context upon bdev removal or shutdown. + * \param ctx Module specific context for this bdev part base. + * \param channel_size Channel size in bytes. + * \param ch_create_cb Called after a new channel is allocated. + * \param ch_destroy_cb Called upon channel deletion. + * + * \return 0 on success + * \return -1 if the underlying bdev cannot be opened. + */ +struct spdk_bdev_part_base *spdk_bdev_part_base_construct(struct spdk_bdev *bdev, + spdk_bdev_remove_cb_t remove_cb, + struct spdk_bdev_module *module, + struct spdk_bdev_fn_table *fn_table, + struct bdev_part_tailq *tailq, + spdk_bdev_part_base_free_fn free_fn, + void *ctx, + uint32_t channel_size, + spdk_io_channel_create_cb ch_create_cb, + spdk_io_channel_destroy_cb ch_destroy_cb); + +/** + * Create a logical spdk_bdev_part on top of a base. + * + * \param part The part object allocated by the user. + * \param base The base from which to create the part. + * \param name The name of the new spdk_bdev_part. + * \param offset_blocks The offset into the base bdev at which this part begins. + * \param num_blocks The number of blocks that this part will span. + * \param product_name Unique name for this type of block device. + * + * \return 0 on success. + * \return -1 if the bases underlying bdev cannot be claimed by the current module. + */ +int spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base, + char *name, uint64_t offset_blocks, uint64_t num_blocks, + char *product_name); + +/** + * Forwards I/O from an spdk_bdev_part to the underlying base bdev. + * + * This function will apply the offset_blocks the user provided to + * spdk_bdev_part_construct to the I/O. The user should not manually + * apply this offset before submitting any I/O through this function. + * + * \param ch The I/O channel associated with the spdk_bdev_part. + * \param bdev_io The I/O to be submitted to the underlying bdev. + * \return 0 on success or non-zero if submit request failed. + */ +int spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io); + +/** + * Return a pointer to this part's spdk_bdev. + * + * \param part An spdk_bdev_part object. + * + * \return A pointer to this part's spdk_bdev object. + */ +struct spdk_bdev *spdk_bdev_part_get_bdev(struct spdk_bdev_part *part); + +/** + * Return a pointer to this part's base. + * + * \param part An spdk_bdev_part object. + * + * \return A pointer to this part's spdk_bdev_part_base object. + */ +struct spdk_bdev_part_base *spdk_bdev_part_get_base(struct spdk_bdev_part *part); + +/** + * Return a pointer to this part's base bdev. + * + * The return value of this function is equivalent to calling + * spdk_bdev_part_base_get_bdev on this part's base. + * + * \param part An spdk_bdev_part object. + * + * \return A pointer to the bdev belonging to this part's base. + */ +struct spdk_bdev *spdk_bdev_part_get_base_bdev(struct spdk_bdev_part *part); + +/** + * Return this part's offset from the beginning of the base bdev. + * + * This function should not be called in the I/O path. Any block + * translations to I/O will be handled in spdk_bdev_part_submit_request. + * + * \param part An spdk_bdev_part object. + * + * \return the block offset of this part from it's underlying bdev. + */ +uint64_t spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part *part); + +/** + * Push media management events. To send the notification that new events are + * available, spdk_bdev_notify_media_management needs to be called. + * + * \param bdev Block device + * \param events Array of media events + * \param num_events Size of the events array + * + * \return number of events pushed or negative errno in case of failure + */ +int spdk_bdev_push_media_events(struct spdk_bdev *bdev, const struct spdk_bdev_media_event *events, + size_t num_events); + +/** + * Send SPDK_BDEV_EVENT_MEDIA_MANAGEMENT to all open descriptors that have + * pending media events. + * + * \param bdev Block device + */ +void spdk_bdev_notify_media_management(struct spdk_bdev *bdev); + +/* + * Macro used to register module for later initialization. + */ +#define SPDK_BDEV_MODULE_REGISTER(name, module) \ +static void __attribute__((constructor)) _spdk_bdev_module_register_##name(void) \ +{ \ + spdk_bdev_module_list_add(module); \ +} \ + +#endif /* SPDK_BDEV_MODULE_H */ diff --git a/src/spdk/include/spdk/bdev_zone.h b/src/spdk/include/spdk/bdev_zone.h new file mode 100644 index 000000000..9306256b8 --- /dev/null +++ b/src/spdk/include/spdk/bdev_zone.h @@ -0,0 +1,259 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Zoned device public interface + */ + +#ifndef SPDK_BDEV_ZONE_H +#define SPDK_BDEV_ZONE_H + +#include "spdk/stdinc.h" +#include "spdk/bdev.h" + +/** + * \brief SPDK block device. + * + * This is a virtual representation of a block device that is exported by the backend. + */ + +struct spdk_bdev; + +enum spdk_bdev_zone_action { + SPDK_BDEV_ZONE_CLOSE, + SPDK_BDEV_ZONE_FINISH, + SPDK_BDEV_ZONE_OPEN, + SPDK_BDEV_ZONE_RESET +}; + +enum spdk_bdev_zone_state { + SPDK_BDEV_ZONE_STATE_EMPTY, + SPDK_BDEV_ZONE_STATE_OPEN, + SPDK_BDEV_ZONE_STATE_FULL, + SPDK_BDEV_ZONE_STATE_CLOSED, + SPDK_BDEV_ZONE_STATE_READ_ONLY, + SPDK_BDEV_ZONE_STATE_OFFLINE +}; + +struct spdk_bdev_zone_info { + uint64_t zone_id; + uint64_t write_pointer; + uint64_t capacity; + enum spdk_bdev_zone_state state; +}; + +/** + * Get device zone size in logical blocks. + * + * \param bdev Block device to query. + * \return Size of zone for this zoned device in logical blocks. + */ +uint64_t spdk_bdev_get_zone_size(const struct spdk_bdev *bdev); + +/** + * Get device maximum number of open zones. + * + * If this value is 0, there is no limit. + * + * \param bdev Block device to query. + * \return Maximum number of open zones for this zoned device. + */ +uint32_t spdk_bdev_get_max_open_zones(const struct spdk_bdev *bdev); + +/** + * Get device optimal number of open zones. + * + * \param bdev Block device to query. + * \return Optimal number of open zones for this zoned device. + */ +uint32_t spdk_bdev_get_optimal_open_zones(const struct spdk_bdev *bdev); + +/** + * Submit a get_zone_info request to the bdev. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param zone_id First logical block of a zone. + * \param num_zones Number of consecutive zones info to retrieve. + * \param info Pointer to array capable of storing num_zones elements. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_get_zone_info(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + uint64_t zone_id, size_t num_zones, struct spdk_bdev_zone_info *info, + spdk_bdev_io_completion_cb cb, void *cb_arg); + + +/** + * Submit a zone_management request to the bdev. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param zone_id First logical block of a zone. + * \param action Action to perform on a zone (open, close, reset, finish). + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_zone_management(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + uint64_t zone_id, enum spdk_bdev_zone_action action, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a zone_append request to the bdev. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param buf Data buffer to written from. + * \param zone_id First logical block of a zone. + * \param num_blocks The number of blocks to write. buf must be greater than or equal to this size. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). + * Appended logical block address can be obtained with spdk_bdev_io_get_append_location(). + * Return negated errno on failure, in which case the callback will not be called. + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_zone_append(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, uint64_t zone_id, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a zone_append request to the bdev. This differs from + * spdk_bdev_zone_append by allowing the data buffer to be described in a scatter + * gather list. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param iov A scatter gather list of buffers to be written from. + * \param iovcnt The number of elements in iov. + * \param zone_id First logical block of a zone. + * \param num_blocks The number of blocks to write. buf must be greater than or equal to this size. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). + * Appended logical block address can be obtained with spdk_bdev_io_get_append_location(). + * Return negated errno on failure, in which case the callback will not be called. + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_zone_appendv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, uint64_t zone_id, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a zone_append request with metadata to the bdev. + * + * This function uses separate buffer for metadata transfer (valid only if bdev supports this + * mode). + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param buf Data buffer to written from. + * \param md Metadata buffer. + * \param zone_id First logical block of a zone. + * \param num_blocks The number of blocks to write. buf must be greater than or equal to this size. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). + * Appended logical block address can be obtained with spdk_bdev_io_get_append_location(). + * Return negated errno on failure, in which case the callback will not be called. + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_zone_append_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, void *md, uint64_t zone_id, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/** + * Submit a zone_append request with metadata to the bdev. This differs from + * spdk_bdev_zone_append by allowing the data buffer to be described in a scatter + * gather list. + * + * This function uses separate buffer for metadata transfer (valid only if bdev supports this + * mode). + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param iov A scatter gather list of buffers to be written from. + * \param iovcnt The number of elements in iov. + * \param md Metadata buffer. + * \param zone_id First logical block of a zone. + * \param num_blocks The number of blocks to write. buf must be greater than or equal to this size. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). + * Appended logical block address can be obtained with spdk_bdev_io_get_append_location(). + * Return negated errno on failure, in which case the callback will not be called. + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_zone_appendv_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md, uint64_t zone_id, + uint64_t num_blocks, spdk_bdev_io_completion_cb cb, + void *cb_arg); + +/** + * Get append location (offset in blocks of the bdev) for this I/O. + * + * \param bdev_io I/O to get append location from. + */ +uint64_t spdk_bdev_io_get_append_location(struct spdk_bdev_io *bdev_io); + +#endif /* SPDK_BDEV_ZONE_H */ diff --git a/src/spdk/include/spdk/bit_array.h b/src/spdk/include/spdk/bit_array.h new file mode 100644 index 000000000..3019f9f17 --- /dev/null +++ b/src/spdk/include/spdk/bit_array.h @@ -0,0 +1,203 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Bit array data structure + */ + +#ifndef SPDK_BIT_ARRAY_H +#define SPDK_BIT_ARRAY_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Variable-length bit array. + */ +struct spdk_bit_array; + +/** + * Return the number of bits that a bit array is currently sized to hold. + * + * \param ba Bit array to query. + * + * \return the number of bits. + */ +uint32_t spdk_bit_array_capacity(const struct spdk_bit_array *ba); + +/** + * Create a bit array. + * + * \param num_bits Number of bits that the bit array is sized to hold. + * + * All bits in the array will be cleared. + * + * \return a pointer to the new bit array. + */ +struct spdk_bit_array *spdk_bit_array_create(uint32_t num_bits); + +/** + * Free a bit array and set the pointer to NULL. + * + * \param bap Bit array to free. + */ +void spdk_bit_array_free(struct spdk_bit_array **bap); + +/** + * Create or resize a bit array. + * + * To create a new bit array, pass a pointer to a spdk_bit_array pointer that is + * NULL for bap. + * + * The bit array will be sized to hold at least num_bits. + * + * If num_bits is smaller than the previous size of the bit array, + * any data beyond the new num_bits size will be cleared. + * + * If num_bits is larger than the previous size of the bit array, + * any data beyond the old num_bits size will be cleared. + * + * \param bap Bit array to create/resize. + * \param num_bits Number of bits that the bit array is sized to hold. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_bit_array_resize(struct spdk_bit_array **bap, uint32_t num_bits); + +/** + * Get the value of a bit from the bit array. + * + * If bit_index is beyond the end of the current size of the bit array, this + * function will return false (i.e. bits beyond the end of the array are implicitly 0). + * + * \param ba Bit array to query. + * \param bit_index The index of a bit to query. + * + * \return the value of a bit from the bit array on success, or false on failure. + */ +bool spdk_bit_array_get(const struct spdk_bit_array *ba, uint32_t bit_index); + +/** + * Set (to 1) a bit in the bit array. + * + * If bit_index is beyond the end of the bit array, this function will return -EINVAL. + * + * \param ba Bit array to set a bit. + * \param bit_index The index of a bit to set. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_bit_array_set(struct spdk_bit_array *ba, uint32_t bit_index); + +/** + * Clear (to 0) a bit in the bit array. + * + * If bit_index is beyond the end of the bit array, no action is taken. Bits + * beyond the end of the bit array are implicitly 0. + * + * \param ba Bit array to clear a bit. + * \param bit_index The index of a bit to clear. + */ +void spdk_bit_array_clear(struct spdk_bit_array *ba, uint32_t bit_index); + +/** + * Find the index of the first set bit in the array. + * + * \param ba The bit array to search. + * \param start_bit_index The bit index from which to start searching (0 to start + * from the beginning of the array). + * + * \return the index of the first set bit. If no bits are set, returns UINT32_MAX. + */ +uint32_t spdk_bit_array_find_first_set(const struct spdk_bit_array *ba, uint32_t start_bit_index); + +/** + * Find the index of the first cleared bit in the array. + * + * \param ba The bit array to search. + * \param start_bit_index The bit index from which to start searching (0 to start + * from the beginning of the array). + * + * \return the index of the first cleared bit. If no bits are cleared, returns UINT32_MAX. + */ +uint32_t spdk_bit_array_find_first_clear(const struct spdk_bit_array *ba, uint32_t start_bit_index); + +/** + * Count the number of set bits in the array. + * + * \param ba The bit array to search. + * + * \return the number of bits set in the array. + */ +uint32_t spdk_bit_array_count_set(const struct spdk_bit_array *ba); + +/** + * Count the number of cleared bits in the array. + * + * \param ba The bit array to search. + * + * \return the number of bits cleared in the array. + */ +uint32_t spdk_bit_array_count_clear(const struct spdk_bit_array *ba); + +/** + * Store bitmask from bit array. + * + * \param ba Bit array. + * \param mask Destination mask. Mask and bit array capacity must be equal. + */ +void spdk_bit_array_store_mask(const struct spdk_bit_array *ba, void *mask); + +/** + * Load bitmask to bit array. + * + * \param ba Bit array. + * \param mask Source mask. Mask and bit array capacity must be equal. + */ +void spdk_bit_array_load_mask(struct spdk_bit_array *ba, const void *mask); + +/** + * Clear (to 0) bit array bitmask. + * + * \param ba Bit array. + */ +void spdk_bit_array_clear_mask(struct spdk_bit_array *ba); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/blob.h b/src/spdk/include/spdk/blob.h new file mode 100644 index 000000000..fbc2728ee --- /dev/null +++ b/src/spdk/include/spdk/blob.h @@ -0,0 +1,897 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Blob Storage System + * + * The blob storage system, or the blobstore for short, is a low level + * library for placing opaque blobs of data onto a storage device such + * that scattered physical blocks on the storage device appear as a + * single, contiguous storage region. These blobs are also persistent, + * which means they are rediscoverable after reboot or power loss. + * + * The blobstore is designed to be very high performance, and thus has + * a few general rules regarding thread safety to avoid taking locks + * in the I/O path. This is primarily done by only allowing most + * functions to be called on the metadata thread. The metadata thread is + * the thread which called spdk_bs_init() or spdk_bs_load(). + * + * Functions starting with the prefix "spdk_blob_io" are passed a channel + * as an argument, and channels may only be used from the thread they were + * created on. See \ref spdk_bs_alloc_io_channel. These are the only + * functions that may be called from a thread other than the metadata + * thread. + * + * The blobstore returns errors using negated POSIX errno values, either + * returned in the callback or as a return value. An errno value of 0 means + * success. + */ + +#ifndef SPDK_BLOB_H +#define SPDK_BLOB_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef uint64_t spdk_blob_id; +#define SPDK_BLOBID_INVALID (uint64_t)-1 +#define SPDK_BLOBSTORE_TYPE_LENGTH 16 + +enum blob_clear_method { + BLOB_CLEAR_WITH_DEFAULT, + BLOB_CLEAR_WITH_NONE, + BLOB_CLEAR_WITH_UNMAP, + BLOB_CLEAR_WITH_WRITE_ZEROES, +}; + +enum bs_clear_method { + BS_CLEAR_WITH_UNMAP, + BS_CLEAR_WITH_WRITE_ZEROES, + BS_CLEAR_WITH_NONE, +}; + +struct spdk_blob_store; +struct spdk_io_channel; +struct spdk_blob; +struct spdk_xattr_names; + +/** + * Blobstore operation completion callback. + * + * \param cb_arg Callback argument. + * \param bserrno 0 if it completed successfully, or negative errno if it failed. + */ +typedef void (*spdk_bs_op_complete)(void *cb_arg, int bserrno); + +/** + * Blobstore operation completion callback with handle. + * + * \param cb_arg Callback argument. + * \param bs Handle to a blobstore. + * \param bserrno 0 if it completed successfully, or negative errno if it failed. + */ +typedef void (*spdk_bs_op_with_handle_complete)(void *cb_arg, struct spdk_blob_store *bs, + int bserrno); + +/** + * Blob operation completion callback. + * + * \param cb_arg Callback argument. + * \param bserrno 0 if it completed successfully, or negative errno if it failed. + */ +typedef void (*spdk_blob_op_complete)(void *cb_arg, int bserrno); + +/** + * Blob operation completion callback with blob ID. + * + * \param cb_arg Callback argument. + * \param blobid Blob ID. + * \param bserrno 0 if it completed successfully, or negative errno if it failed. + */ +typedef void (*spdk_blob_op_with_id_complete)(void *cb_arg, spdk_blob_id blobid, int bserrno); + +/** + * Blob operation completion callback with handle. + * + * \param cb_arg Callback argument. + * \param bs Handle to a blob. + * \param bserrno 0 if it completed successfully, or negative errno if it failed. + */ +typedef void (*spdk_blob_op_with_handle_complete)(void *cb_arg, struct spdk_blob *blb, int bserrno); + +/** + * Blobstore device completion callback. + * + * \param channel I/O channel the operation was initiated on. + * \param cb_arg Callback argument. + * \param bserrno 0 if it completed successfully, or negative errno if it failed. + */ +typedef void (*spdk_bs_dev_cpl)(struct spdk_io_channel *channel, + void *cb_arg, int bserrno); + +struct spdk_bs_dev_cb_args { + spdk_bs_dev_cpl cb_fn; + struct spdk_io_channel *channel; + void *cb_arg; +}; + +struct spdk_bs_dev { + /* Create a new channel which is a software construct that is used + * to submit I/O. */ + struct spdk_io_channel *(*create_channel)(struct spdk_bs_dev *dev); + + /* Destroy a previously created channel */ + void (*destroy_channel)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel); + + /* Destroy this blobstore device. Applications must not destroy the blobstore device, + * rather the blobstore will destroy it using this function pointer once all + * references to it during unload callback context have been completed. + */ + void (*destroy)(struct spdk_bs_dev *dev); + + void (*read)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload, + uint64_t lba, uint32_t lba_count, + struct spdk_bs_dev_cb_args *cb_args); + + void (*write)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload, + uint64_t lba, uint32_t lba_count, + struct spdk_bs_dev_cb_args *cb_args); + + void (*readv)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, + struct iovec *iov, int iovcnt, + uint64_t lba, uint32_t lba_count, + struct spdk_bs_dev_cb_args *cb_args); + + void (*writev)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, + struct iovec *iov, int iovcnt, + uint64_t lba, uint32_t lba_count, + struct spdk_bs_dev_cb_args *cb_args); + + void (*flush)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, + struct spdk_bs_dev_cb_args *cb_args); + + void (*write_zeroes)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, + uint64_t lba, uint32_t lba_count, + struct spdk_bs_dev_cb_args *cb_args); + + void (*unmap)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, + uint64_t lba, uint32_t lba_count, + struct spdk_bs_dev_cb_args *cb_args); + + uint64_t blockcnt; + uint32_t blocklen; /* In bytes */ +}; + +struct spdk_bs_type { + char bstype[SPDK_BLOBSTORE_TYPE_LENGTH]; +}; + +struct spdk_bs_opts { + /** Size of cluster in bytes. Must be multiple of 4KiB page size. */ + uint32_t cluster_sz; + + /** Count of the number of pages reserved for metadata */ + uint32_t num_md_pages; + + /** Maximum simultaneous metadata operations */ + uint32_t max_md_ops; + + /** Maximum simultaneous operations per channel */ + uint32_t max_channel_ops; + + /** Clear method */ + enum bs_clear_method clear_method; + + /** Blobstore type */ + struct spdk_bs_type bstype; + + /** Callback function to invoke for each blob. */ + spdk_blob_op_with_handle_complete iter_cb_fn; + + /** Argument passed to iter_cb_fn for each blob. */ + void *iter_cb_arg; +}; + +/** + * Initialize a spdk_bs_opts structure to the default blobstore option values. + * + * \param opts The spdk_bs_opts structure to be initialized. + */ +void spdk_bs_opts_init(struct spdk_bs_opts *opts); + +/** + * Load a blobstore from the given device. + * + * \param dev Blobstore block device. + * \param opts The structure which contains the option values for the blobstore. + * \param cb_fn Called when the loading is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bs_load(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts, + spdk_bs_op_with_handle_complete cb_fn, void *cb_arg); + +/** + * Initialize a blobstore on the given device. + * + * \param dev Blobstore block device. + * \param opts The structure which contains the option values for the blobstore. + * \param cb_fn Called when the initialization is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts, + spdk_bs_op_with_handle_complete cb_fn, void *cb_arg); + +typedef void (*spdk_bs_dump_print_xattr)(FILE *fp, const char *bstype, const char *name, + const void *value, size_t value_length); + +/** + * Dump a blobstore's metadata to a given FILE in human-readable format. + * + * \param dev Blobstore block device. + * \param fp FILE pointer to dump the metadata contents. + * \param print_xattr_fn Callback function to interpret external xattrs. + * \param cb_fn Called when the dump is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bs_dump(struct spdk_bs_dev *dev, FILE *fp, spdk_bs_dump_print_xattr print_xattr_fn, + spdk_bs_op_complete cb_fn, void *cb_arg); +/** + * Destroy the blobstore. + * + * It will destroy the blobstore by zeroing the super block. + * + * \param bs blobstore to destroy. + * \param cb_fn Called when the destruction is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bs_destroy(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, + void *cb_arg); + +/** + * Unload the blobstore. + * + * It will flush all volatile data to disk. + * + * \param bs blobstore to unload. + * \param cb_fn Called when the unloading is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_arg); + +/** + * Set a super blob on the given blobstore. + * + * This will be retrievable immediately after spdk_bs_load() on the next initializaiton. + * + * \param bs blobstore. + * \param blobid The id of the blob which will be set as the super blob. + * \param cb_fn Called when the setting is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bs_set_super(struct spdk_blob_store *bs, spdk_blob_id blobid, + spdk_bs_op_complete cb_fn, void *cb_arg); + +/** + * Get the super blob. The obtained blob id will be passed to the callback function. + * + * \param bs blobstore. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bs_get_super(struct spdk_blob_store *bs, + spdk_blob_op_with_id_complete cb_fn, void *cb_arg); + +/** + * Get the cluster size in bytes. + * + * \param bs blobstore to query. + * + * \return cluster size. + */ +uint64_t spdk_bs_get_cluster_size(struct spdk_blob_store *bs); + +/** + * Get the page size in bytes. This is the write and read granularity of blobs. + * + * \param bs blobstore to query. + * + * \return page size. + */ +uint64_t spdk_bs_get_page_size(struct spdk_blob_store *bs); + +/** + * Get the io unit size in bytes. + * + * \param bs blobstore to query. + * + * \return io unit size. + */ +uint64_t spdk_bs_get_io_unit_size(struct spdk_blob_store *bs); + +/** + * Get the number of free clusters. + * + * \param bs blobstore to query. + * + * \return the number of free clusters. + */ +uint64_t spdk_bs_free_cluster_count(struct spdk_blob_store *bs); + +/** + * Get the total number of clusters accessible by user. + * + * \param bs blobstore to query. + * + * \return the total number of clusters accessible by user. + */ +uint64_t spdk_bs_total_data_cluster_count(struct spdk_blob_store *bs); + +/** + * Get the blob id. + * + * \param blob Blob struct to query. + * + * \return blob id. + */ +spdk_blob_id spdk_blob_get_id(struct spdk_blob *blob); + +/** + * Get the number of pages allocated to the blob. + * + * \param blob Blob struct to query. + * + * \return the number of pages. + */ +uint64_t spdk_blob_get_num_pages(struct spdk_blob *blob); + +/** + * Get the number of io_units allocated to the blob. + * + * \param blob Blob struct to query. + * + * \return the number of io_units. + */ +uint64_t spdk_blob_get_num_io_units(struct spdk_blob *blob); + +/** + * Get the number of clusters allocated to the blob. + * + * \param blob Blob struct to query. + * + * \return the number of clusters. + */ +uint64_t spdk_blob_get_num_clusters(struct spdk_blob *blob); + +struct spdk_blob_xattr_opts { + /* Number of attributes */ + size_t count; + /* Array of attribute names. Caller should free this array after use. */ + char **names; + /* User context passed to get_xattr_value function */ + void *ctx; + /* Callback that will return value for each attribute name. */ + void (*get_value)(void *xattr_ctx, const char *name, + const void **value, size_t *value_len); +}; + +struct spdk_blob_opts { + uint64_t num_clusters; + bool thin_provision; + enum blob_clear_method clear_method; + struct spdk_blob_xattr_opts xattrs; + + /** Enable separate extent pages in metadata */ + bool use_extent_table; +}; + +/** + * Initialize a spdk_blob_opts structure to the default blob option values. + * + * \param opts spdk_blob_opts structure to initialize. + */ +void spdk_blob_opts_init(struct spdk_blob_opts *opts); + +/** + * Create a new blob with options on the given blobstore. The new blob id will + * be passed to the callback function. + * + * \param bs blobstore. + * \param opts The structure which contains the option values for the new blob. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to funcion cb_fn. + */ +void spdk_bs_create_blob_ext(struct spdk_blob_store *bs, const struct spdk_blob_opts *opts, + spdk_blob_op_with_id_complete cb_fn, void *cb_arg); + +/** + * Create a new blob with default option values on the given blobstore. + * The new blob id will be passed to the callback function. + * + * \param bs blobstore. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bs_create_blob(struct spdk_blob_store *bs, + spdk_blob_op_with_id_complete cb_fn, void *cb_arg); + +/** + * Create a read-only snapshot of specified blob with provided options. + * This will automatically sync specified blob. + * + * When operation is done, original blob is converted to the thin-provisioned + * blob with a newly created read-only snapshot set as a backing blob. + * Structure snapshot_xattrs as well as anything it references (like e.g. names + * array) must be valid until the completion is called. + * + * \param bs blobstore. + * \param blobid Id of the source blob used to create a snapshot. + * \param snapshot_xattrs xattrs specified for snapshot. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bs_create_snapshot(struct spdk_blob_store *bs, spdk_blob_id blobid, + const struct spdk_blob_xattr_opts *snapshot_xattrs, + spdk_blob_op_with_id_complete cb_fn, void *cb_arg); + +/** + * Create a clone of specified read-only blob. + * + * Structure clone_xattrs as well as anything it references (like e.g. names + * array) must be valid until the completion is called. + * + * \param bs blobstore. + * \param blobid Id of the read only blob used as a snapshot for new clone. + * \param clone_xattrs xattrs specified for clone. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bs_create_clone(struct spdk_blob_store *bs, spdk_blob_id blobid, + const struct spdk_blob_xattr_opts *clone_xattrs, + spdk_blob_op_with_id_complete cb_fn, void *cb_arg); + +/** + * Provide table with blob id's of clones are dependent on specified snapshot. + * + * Ids array should be allocated and the count parameter set to the number of + * id's it can store, before calling this function. + * + * If ids is NULL or count parameter is not sufficient to handle ids of all + * clones, -ENOMEM error is returned and count parameter is updated to the + * total number of clones. + * + * \param bs blobstore. + * \param blobid Snapshots blob id. + * \param ids Array of the clone ids or NULL to get required size in count. + * \param count Size of ids. After call it is updated to the number of clones. + * + * \return -ENOMEM if count is not sufficient to store all clones. + */ +int spdk_blob_get_clones(struct spdk_blob_store *bs, spdk_blob_id blobid, spdk_blob_id *ids, + size_t *count); + +/** + * Get the blob id for the parent snapshot of this blob. + * + * \param bs blobstore. + * \param blobid Blob id. + * + * \return blob id of parent blob or SPDK_BLOBID_INVALID if have no parent + */ +spdk_blob_id spdk_blob_get_parent_snapshot(struct spdk_blob_store *bs, spdk_blob_id blobid); + +/** + * Check if blob is read only. + * + * \param blob Blob. + * + * \return true if blob is read only. + */ +bool spdk_blob_is_read_only(struct spdk_blob *blob); + +/** + * Check if blob is a snapshot. + * + * \param blob Blob. + * + * \return true if blob is a snapshot. + */ +bool spdk_blob_is_snapshot(struct spdk_blob *blob); + +/** + * Check if blob is a clone. + * + * \param blob Blob. + * + * \return true if blob is a clone. + */ +bool spdk_blob_is_clone(struct spdk_blob *blob); + +/** + * Check if blob is thin-provisioned. + * + * \param blob Blob. + * + * \return true if blob is thin-provisioned. + */ +bool spdk_blob_is_thin_provisioned(struct spdk_blob *blob); + +/** + * Delete an existing blob from the given blobstore. + * + * \param bs blobstore. + * \param blobid The id of the blob to delete. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bs_delete_blob(struct spdk_blob_store *bs, spdk_blob_id blobid, + spdk_blob_op_complete cb_fn, void *cb_arg); + +/** + * Allocate all clusters in this blob. Data for allocated clusters is copied + * from backing blob(s) if they exist. + * + * This call removes all dependencies on any backing blobs. + * + * \param bs blobstore. + * \param channel IO channel used to inflate blob. + * \param blobid The id of the blob to inflate. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel, + spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg); + +/** + * Remove dependency on parent blob. + * + * This call allocates and copies data for any clusters that are allocated in + * the parent blob, and decouples parent updating dependencies of blob to + * its ancestor. + * + * If blob have no parent -EINVAL error is reported. + * + * \param bs blobstore. + * \param channel IO channel used to inflate blob. + * \param blobid The id of the blob. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bs_blob_decouple_parent(struct spdk_blob_store *bs, struct spdk_io_channel *channel, + spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg); + +struct spdk_blob_open_opts { + enum blob_clear_method clear_method; +}; + +/** + * Initialize a spdk_blob_open_opts structure to the default blob option values. + * + * \param opts spdk_blob_open_opts structure to initialize. + */ +void spdk_blob_open_opts_init(struct spdk_blob_open_opts *opts); + +/** + * Open a blob from the given blobstore. + * + * \param bs blobstore. + * \param blobid The id of the blob to open. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bs_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid, + spdk_blob_op_with_handle_complete cb_fn, void *cb_arg); + +/** + * Open a blob from the given blobstore with additional options. + * + * \param bs blobstore. + * \param blobid The id of the blob to open. + * \param opts The structure which contains the option values for the blob. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bs_open_blob_ext(struct spdk_blob_store *bs, spdk_blob_id blobid, + struct spdk_blob_open_opts *opts, spdk_blob_op_with_handle_complete cb_fn, void *cb_arg); + +/** + * Resize a blob to 'sz' clusters. These changes are not persisted to disk until + * spdk_bs_md_sync_blob() is called. + * If called before previous resize finish, it will fail with errno -EBUSY + * + * \param blob Blob to resize. + * \param sz The new number of clusters. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + * + */ +void spdk_blob_resize(struct spdk_blob *blob, uint64_t sz, spdk_blob_op_complete cb_fn, + void *cb_arg); + +/** + * Set blob as read only. + * + * These changes do not take effect until spdk_blob_sync_md() is called. + * + * \param blob Blob to set. + */ +int spdk_blob_set_read_only(struct spdk_blob *blob); + +/** + * Sync a blob. + * + * Make a blob persistent. This applies to open, resize, set xattr, and remove + * xattr. These operations will not be persistent until the blob has been synced. + * + * \param blob Blob to sync. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg); + +/** + * Close a blob. This will automatically sync. + * + * \param blob Blob to close. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_blob_close(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg); + +/** + * Allocate an I/O channel for the given blobstore. + * + * \param bs blobstore. + * \return a pointer to the allocated I/O channel. + */ +struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs); + +/** + * Free the I/O channel. + * + * \param channel I/O channel to free. + */ +void spdk_bs_free_io_channel(struct spdk_io_channel *channel); + +/** + * Write data to a blob. + * + * \param blob Blob to write. + * \param channel The I/O channel used to submit requests. + * \param payload The specified buffer which should contain the data to be written. + * \param offset Offset is in io units from the beginning of the blob. + * \param length Size of data in io units. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_blob_io_write(struct spdk_blob *blob, struct spdk_io_channel *channel, + void *payload, uint64_t offset, uint64_t length, + spdk_blob_op_complete cb_fn, void *cb_arg); + +/** + * Read data from a blob. + * + * \param blob Blob to read. + * \param channel The I/O channel used to submit requests. + * \param payload The specified buffer which will store the obtained data. + * \param offset Offset is in io units from the beginning of the blob. + * \param length Size of data in io units. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_blob_io_read(struct spdk_blob *blob, struct spdk_io_channel *channel, + void *payload, uint64_t offset, uint64_t length, + spdk_blob_op_complete cb_fn, void *cb_arg); + +/** + * Write the data described by 'iov' to 'length' io_units beginning at 'offset' io_units + * into the blob. + * + * \param blob Blob to write. + * \param channel I/O channel used to submit requests. + * \param iov The pointer points to an array of iovec structures. + * \param iovcnt The number of buffers. + * \param offset Offset is in io units from the beginning of the blob. + * \param length Size of data in io units. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_blob_io_writev(struct spdk_blob *blob, struct spdk_io_channel *channel, + struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length, + spdk_blob_op_complete cb_fn, void *cb_arg); + +/** + * Read 'length' io_units starting at 'offset' io_units into the blob into the memory + * described by 'iov'. + * + * \param blob Blob to read. + * \param channel I/O channel used to submit requests. + * \param iov The pointer points to an array of iovec structures. + * \param iovcnt The number of buffers. + * \param offset Offset is in io units from the beginning of the blob. + * \param length Size of data in io units. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_blob_io_readv(struct spdk_blob *blob, struct spdk_io_channel *channel, + struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length, + spdk_blob_op_complete cb_fn, void *cb_arg); + +/** + * Unmap 'length' io_units beginning at 'offset' io_units on the blob as unused. Unmapped + * io_units may allow the underlying storage media to behave more effciently. + * + * \param blob Blob to unmap. + * \param channel I/O channel used to submit requests. + * \param offset Offset is in io units from the beginning of the blob. + * \param length Size of unmap area in io_units. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_blob_io_unmap(struct spdk_blob *blob, struct spdk_io_channel *channel, + uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg); + +/** + * Write zeros into area of a blob. + * + * \param blob Blob to write. + * \param channel I/O channel used to submit requests. + * \param offset Offset is in io units from the beginning of the blob. + * \param length Size of data in io units. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_blob_io_write_zeroes(struct spdk_blob *blob, struct spdk_io_channel *channel, + uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg); + +/** + * Get the first blob of the blobstore. The obtained blob will be passed to + * the callback function. + * + * \param bs blobstore to traverse. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bs_iter_first(struct spdk_blob_store *bs, + spdk_blob_op_with_handle_complete cb_fn, void *cb_arg); + +/** + * Get the next blob by using the current blob. The obtained blob will be passed + * to the callback function. + * + * \param bs blobstore to traverse. + * \param blob The current blob. + * \param cb_fn Called when the operation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_bs_iter_next(struct spdk_blob_store *bs, struct spdk_blob *blob, + spdk_blob_op_with_handle_complete cb_fn, void *cb_arg); + +/** + * Set an extended attribute for the given blob. + * + * \param blob Blob to set attribute. + * \param name Name of the extended attribute. + * \param value Value of the extended attribute. + * \param value_len Length of the value. + * + * \return 0 on success, -1 on failure. + */ +int spdk_blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value, + uint16_t value_len); + +/** + * Remove the extended attribute from the given blob. + * + * \param blob Blob to remove attribute. + * \param name Name of the extended attribute. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_blob_remove_xattr(struct spdk_blob *blob, const char *name); + +/** + * Get the value of the specified extended attribute. The obtained value and its + * size will be stored in value and value_len. + * + * \param blob Blob to query. + * \param name Name of the extended attribute. + * \param value Parameter as output. + * \param value_len Parameter as output. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_blob_get_xattr_value(struct spdk_blob *blob, const char *name, + const void **value, size_t *value_len); + +/** + * Iterate through all extended attributes of the blob. Get the names of all extended + * attributes that will be stored in names. + * + * \param blob Blob to query. + * \param names Parameter as output. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_blob_get_xattr_names(struct spdk_blob *blob, struct spdk_xattr_names **names); + +/** + * Get the number of extended attributes. + * + * \param names Names of total extended attributes of the blob. + * + * \return the number of extended attributes. + */ +uint32_t spdk_xattr_names_get_count(struct spdk_xattr_names *names); + +/** + * Get the attribute name specified by the index. + * + * \param names Names of total extended attributes of the blob. + * \param index Index position of the specified attribute. + * + * \return attribute name. + */ +const char *spdk_xattr_names_get_name(struct spdk_xattr_names *names, uint32_t index); + +/** + * Free the attribute names. + * + * \param names Names of total extended attributes of the blob. + */ +void spdk_xattr_names_free(struct spdk_xattr_names *names); + +/** + * Get blobstore type of the given device. + * + * \param bs blobstore to query. + * + * \return blobstore type. + */ +struct spdk_bs_type spdk_bs_get_bstype(struct spdk_blob_store *bs); + +/** + * Set blobstore type to the given device. + * + * \param bs blobstore to set to. + * \param bstype Type label to set. + */ +void spdk_bs_set_bstype(struct spdk_blob_store *bs, struct spdk_bs_type bstype); + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_BLOB_H_ */ diff --git a/src/spdk/include/spdk/blob_bdev.h b/src/spdk/include/spdk/blob_bdev.h new file mode 100644 index 000000000..1867c464c --- /dev/null +++ b/src/spdk/include/spdk/blob_bdev.h @@ -0,0 +1,88 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Helper library to use spdk_bdev as the backing device for a blobstore + */ + +#ifndef SPDK_BLOB_BDEV_H +#define SPDK_BLOB_BDEV_H + +#include "spdk/stdinc.h" +#include "spdk/bdev.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct spdk_bs_dev; +struct spdk_bdev; +struct spdk_bdev_module; + +/** + * Create a blobstore block device from a bdev. (deprecated, please use spdk_bdev_create_bs_dev_from_desc, + * together with spdk_bdev_open_ext). + * + * \param bdev Bdev to use. + * \param remove_cb Called when the block device is removed. + * \param remove_ctx Argument passed to function remove_cb. + * + * \return a pointer to the blobstore block device on success or NULL otherwise. + */ +struct spdk_bs_dev *spdk_bdev_create_bs_dev(struct spdk_bdev *bdev, spdk_bdev_remove_cb_t remove_cb, + void *remove_ctx); + +/** + * Create a blobstore block device from the descriptor of a bdev. + * + * \param desc Descriptor of a bdev. spdk_bdev_open_ext() is recommended to get the desc. + * + * \return a pointer to the blobstore block device on success or NULL otherwise. + */ +struct spdk_bs_dev *spdk_bdev_create_bs_dev_from_desc(struct spdk_bdev_desc *desc); + +/** + * Claim the bdev module for the given blobstore. + * + * \param bs_dev Blobstore block device. + * \param module Bdev module to claim. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_bs_bdev_claim(struct spdk_bs_dev *bs_dev, struct spdk_bdev_module *module); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/blobfs.h b/src/spdk/include/spdk/blobfs.h new file mode 100644 index 000000000..2a4342ded --- /dev/null +++ b/src/spdk/include/spdk/blobfs.h @@ -0,0 +1,599 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * SPDK Filesystem + */ + +#ifndef SPDK_FS_H +#define SPDK_FS_H + +#include "spdk/stdinc.h" + +#include "spdk/blob.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define SPDK_FILE_NAME_MAX 255 + +struct spdk_file; +struct spdk_filesystem; + +typedef struct spdk_file *spdk_fs_iter; + +struct spdk_blobfs_opts { + uint32_t cluster_sz; +}; + +struct spdk_file_stat { + spdk_blob_id blobid; + uint64_t size; +}; + +/** + * Filesystem operation completion callback with handle. + * + * \param ctx Context for the operation. + * \param fs Handle to a blobfs. + * \param fserrno 0 if it completed successfully, or negative errno if it failed. + */ +typedef void (*spdk_fs_op_with_handle_complete)(void *ctx, struct spdk_filesystem *fs, + int fserrno); + +/** + * File operation completion callback with handle. + * + * \param ctx Context for the operation. + * \param f Handle to a file. + * \param fserrno 0 if it completed successfully, or negative errno if it failed. + */ +typedef void (*spdk_file_op_with_handle_complete)(void *ctx, struct spdk_file *f, int fserrno); +typedef spdk_bs_op_complete spdk_fs_op_complete; + +/** + * File operation completion callback. + * + * \param ctx Context for the operation. + * \param fserrno 0 if it completed successfully, or negative errno if it failed. + */ +typedef void (*spdk_file_op_complete)(void *ctx, int fserrno); + +/** + * File stat operation completion callback. + * + * \param ctx Context for the operation. + * \param stat Handle to the stat about the file. + * \param fserrno 0 if it completed successfully, or negative errno if it failed. + */ +typedef void (*spdk_file_stat_op_complete)(void *ctx, struct spdk_file_stat *stat, int fserrno); + +/** + * Function for a request of file system. + * + * \param arg Argument to the request function. + */ +typedef void (*fs_request_fn)(void *arg); + +/** + * Function for sending request. + * + * This function will be invoked any time when the filesystem wants to pass a + * message to the main dispatch thread. + * + * \param fs_request_fn A pointer to the request function. + * \param arg Argument to the request function. + */ +typedef void (*fs_send_request_fn)(fs_request_fn, void *arg); + +/** + * Initialize a spdk_blobfs_opts structure to the default option values. + * + * \param opts spdk_blobf_opts struture to intialize. + */ +void spdk_fs_opts_init(struct spdk_blobfs_opts *opts); + +/** + * Initialize blobstore filesystem. + * + * Initialize the blobstore filesystem on the blobstore block device which has + * been created by the function spdk_bdev_create_bs_dev() in the blob_bdev.h. + * The obtained blobstore filesystem will be passed to the callback function. + * + * \param dev Blobstore block device used by this blobstore filesystem. + * \param opt Initialization options used for this blobstore filesystem. + * \param send_request_fn The function for sending request. This function will + * be invoked any time when the blobstore filesystem wants to pass a message to + * the main dispatch thread. + * \param cb_fn Called when the initialization is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_fs_init(struct spdk_bs_dev *dev, struct spdk_blobfs_opts *opt, + fs_send_request_fn send_request_fn, + spdk_fs_op_with_handle_complete cb_fn, void *cb_arg); + +/** + * Load blobstore filesystem from the given blobstore block device. + * + * The obtained blobstore filesystem will be passed to the callback function. + * + * \param dev Blobstore block device used by this blobstore filesystem. + * \param send_request_fn The function for sending request. This function will + * be invoked any time when the blobstore filesystem wants to pass a message to + * the main dispatch thread. + * \param cb_fn Called when the loading is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_fs_load(struct spdk_bs_dev *dev, fs_send_request_fn send_request_fn, + spdk_fs_op_with_handle_complete cb_fn, void *cb_arg); + +/** + * Unload blobstore filesystem. + * + * \param fs Blobstore filesystem to unload. + * \param cb_fn Called when the unloading is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_fs_unload(struct spdk_filesystem *fs, spdk_fs_op_complete cb_fn, void *cb_arg); + +/** + * Allocate an I/O channel for asynchronous operations. + * + * \param fs Blobstore filesystem to allocate I/O channel. + * + * \return a pointer to the I/O channel on success or NULL otherwise. + */ +struct spdk_io_channel *spdk_fs_alloc_io_channel(struct spdk_filesystem *fs); + +/** + * Free I/O channel. + * + * This function will decrease the references of this I/O channel. If the reference + * is reduced to 0, the I/O channel will be freed. + * + * \param channel I/O channel to free. + */ +void spdk_fs_free_io_channel(struct spdk_io_channel *channel); + +/** + * Allocate a context for synchronous operations. + * + * \param fs Blobstore filesystem for this context. + * + * \return a pointer to the context on success or NULL otherwise. + */ +struct spdk_fs_thread_ctx *spdk_fs_alloc_thread_ctx(struct spdk_filesystem *fs); + +/** + * Free thread context. + * + * \param ctx Thread context to free. + */ +void spdk_fs_free_thread_ctx(struct spdk_fs_thread_ctx *ctx); + +/** + * Get statistics about the file including the underlying blob id and the file size. + * + * \param fs Blobstore filesystem. + * \param ctx The thread context for this operation + * \param name The file name used to look up the matched file in the blobstore filesystem. + * \param stat Caller allocated structure to store the obtained information about + * this file. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_fs_file_stat(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx, + const char *name, struct spdk_file_stat *stat); + +#define SPDK_BLOBFS_OPEN_CREATE (1ULL << 0) + +/** + * Create a new file on the given blobstore filesystem. + * + * \param fs Blobstore filesystem. + * \param ctx The thread context for this operation + * \param name The file name for this new file. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_fs_create_file(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx, + const char *name); + +/** + * Open the file. + * + * \param fs Blobstore filesystem. + * \param ctx The thread context for this operation + * \param name The file name used to look up the matched file in the blobstore filesystem. + * \param flags This flags will be used to control the open mode. + * \param file It will point to the open file if sccessful or NULL otherwirse. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_fs_open_file(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx, + const char *name, uint32_t flags, struct spdk_file **file); + +/** + * Close the file. + * + * \param file File to close. + * \param ctx The thread context for this operation + * + * \return 0 on success, negative errno on failure. + */ +int spdk_file_close(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx); + +/** + * Change the file name. + * + * This operation will overwrite an existing file if there is a file with the + * same name. + * + * \param fs Blobstore filesystem. + * \param ctx The thread context for this operation + * \param old_name Old name of the file. + * \param new_name New name of the file. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_fs_rename_file(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx, + const char *old_name, const char *new_name); + +/** + * Delete the file. + * + * \param fs Blobstore filesystem. + * \param ctx The thread context for this operation + * \param name The name of the file to be deleted. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_fs_delete_file(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx, + const char *name); + +/** + * Get the first file in the blobstore filesystem. + * + * \param fs Blobstore filesystem to traverse. + * + * \return an iterator which points to the first file in the blobstore filesystem. + */ +spdk_fs_iter spdk_fs_iter_first(struct spdk_filesystem *fs); + +/** + * Get the next file in the blobstore filesystem by using the input iterator. + * + * \param iter The iterator which points to the current file struct. + * + * \return an iterator which points to the next file in the blobstore filesystem. + */ +spdk_fs_iter spdk_fs_iter_next(spdk_fs_iter iter); + +#define spdk_fs_iter_get_file(iter) ((struct spdk_file *)(iter)) + +/** + * Truncate the file. + * + * \param file File to truncate. + * \param ctx The thread context for this operation + * \param length New size in bytes of the file. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_file_truncate(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx, + uint64_t length); + +/** + * Get file name. + * + * \param file File to query. + * + * \return the name of the file. + */ +const char *spdk_file_get_name(struct spdk_file *file); + +/** + * Obtain the size of the file. + * + * \param file File to query. + * + * \return the size in bytes of the file. + */ +uint64_t spdk_file_get_length(struct spdk_file *file); + +/** + * Write data to the given file. + * + * \param file File to write. + * \param ctx The thread context for this operation + * \param payload The specified buffer which should contain the data to be transmitted. + * \param offset The beginning position to write data. + * \param length The size in bytes of data to write. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_file_write(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx, + void *payload, uint64_t offset, uint64_t length); + +/** + * Read data to user buffer from the given file. + * + * \param file File to read. + * \param ctx The thread context for this operation + * \param payload The specified buffer which will store the obtained data. + * \param offset The beginning position to read. + * \param length The size in bytes of data to read. + * + * \return the end position of this read operation on success, negated errno on failure. + */ +int64_t spdk_file_read(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx, + void *payload, uint64_t offset, uint64_t length); + +/** + * Set cache size for the blobstore filesystem. + * + * \param size_in_mb Cache size in megabytes. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_fs_set_cache_size(uint64_t size_in_mb); + +/** + * Obtain the cache size. + * + * \return cache size in megabytes. + */ +uint64_t spdk_fs_get_cache_size(void); + +#define SPDK_FILE_PRIORITY_LOW 0 /* default */ +#define SPDK_FILE_PRIORITY_HIGH 1 + +/** + * Set priority for the file. + * + * \param file File to set priority. + * \param priority Priority level (SPDK_FILE_PRIORITY_LOW or SPDK_FILE_PRIORITY_HIGH). + */ +void spdk_file_set_priority(struct spdk_file *file, uint32_t priority); + +/** + * Synchronize the data from the cache to the disk. + * + * \param file File to sync. + * \param ctx The thread context for this operation + * + * \return 0 on success. + */ +int spdk_file_sync(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx); + +/** + * Get the unique ID for the file. + * + * \param file File to get the ID. + * \param id ID buffer. + * \param size Size of the ID buffer. + * + * \return the length of ID on success. + */ +int spdk_file_get_id(struct spdk_file *file, void *id, size_t size); + +/** + * Read data to user buffer from the given file. + * + * \param file File to read. + * \param channel I/O channel for asynchronous operations. + * \param iovs A scatter gather list of buffers to be read into. + * \param iovcnt The number of elements in iov. + * \param offset The beginning position to read. + * \param length The size in bytes of data to read. + * \param cb_fn Called when the request is complete. + * \param cb_arg Argument passed to cb_fn. + * + * \return None. + */ +void spdk_file_readv_async(struct spdk_file *file, struct spdk_io_channel *channel, + struct iovec *iovs, uint32_t iovcnt, uint64_t offset, uint64_t length, + spdk_file_op_complete cb_fn, void *cb_arg); + +/** + * Write data to the given file. + * + * \param file File to write. + * \param channel I/O channel for asynchronous operations. + * \param iovs A scatter gather list of buffers to be written from. + * \param iovcnt The number of elements in iov. + * \param offset The beginning position to write. + * \param length The size in bytes of data to write. + * \param cb_fn Called when the request is complete. + * \param cb_arg Argument passed to cb_fn. + * + * \return None. + */ +void spdk_file_writev_async(struct spdk_file *file, struct spdk_io_channel *channel, + struct iovec *iovs, uint32_t iovcnt, uint64_t offset, uint64_t length, + spdk_file_op_complete cb_fn, void *cb_arg); + +/** + * Get statistics about the file including the underlying blob id and the file size. + * + * \param fs Blobstore filesystem. + * \param name The file name used to look up the matched file in the blobstore filesystem. + * \param cb_fn Called when the request is complete. + * \param cb_arg Argument passed to cb_fn. + * + * return None. + */ +void spdk_fs_file_stat_async(struct spdk_filesystem *fs, const char *name, + spdk_file_stat_op_complete cb_fn, void *cb_arg); + +/** + * Create a new file on the given blobstore filesystem. + * + * \param fs Blobstore filesystem. + * \param name The file name for this new file. + * \param cb_fn Called when the request is complete. + * \param cb_arg Argument passed to cb_fn. + * + * return None. + */ +void spdk_fs_create_file_async(struct spdk_filesystem *fs, const char *name, + spdk_file_op_complete cb_fn, void *cb_arg); + +/** + * Open the file. + * + * \param fs Blobstore filesystem. + * \param name The file name used to look up the matched file in the blobstore filesystem. + * \param flags This flags will be used to control the open mode. + * \param cb_fn Called when the request is complete. + * \param cb_arg Argument passed to cb_fn. + * + * return None. + */ +void spdk_fs_open_file_async(struct spdk_filesystem *fs, const char *name, uint32_t flags, + spdk_file_op_with_handle_complete cb_fn, void *cb_arg); + +/** + * Close the file. + * + * \param file File to close. + * \param cb_fn Called when the request is complete. + * \param cb_arg Argument passed to cb_fn. + * + * return None. + */ +void spdk_file_close_async(struct spdk_file *file, spdk_file_op_complete cb_fn, void *cb_arg); + + +/** + * Change the file name. + * + * This operation will overwrite an existing file if there is a file with the + * same name. + * + * \param fs Blobstore filesystem. + * \param old_name Old name of the file. + * \param new_name New name of the file. + * \param cb_fn Called when the request is complete. + * \param cb_arg Argument passed to cb_fn. + * + * return None. + */ +void spdk_fs_rename_file_async(struct spdk_filesystem *fs, const char *old_name, + const char *new_name, spdk_fs_op_complete cb_fn, + void *cb_arg); + +/** + * Delete the file. + * + * \param fs Blobstore filesystem. + * \param name The name of the file to be deleted. + * \param cb_fn Called when the request is complete. + * \param cb_arg Argument passed to cb_fn. + * + * return None. + * + */ +void spdk_fs_delete_file_async(struct spdk_filesystem *fs, const char *name, + spdk_file_op_complete cb_fn, void *cb_arg); + +/** + * Truncate the file. + * + * \param file File to truncate. + * \param length New size in bytes of the file. + * \param cb_fn Called when the request is complete. + * \param cb_arg Argument passed to cb_fn. + * + * return None. + */ +void spdk_file_truncate_async(struct spdk_file *file, uint64_t length, + spdk_file_op_complete cb_fn, void *cb_arg); + +/** + * Write data to the given file. + * + * \param file File to write. + * \param channel I/O channel for asynchronous operations. + * \param payload The specified buffer which should contain the data to be transmitted. + * \param offset The beginning position to write data. + * \param length The size in bytes of data to write. + * \param cb_fn Called when the request is complete. + * \param cb_arg Argument passed to cb_fn. + * + * return None. + */ +void spdk_file_write_async(struct spdk_file *file, struct spdk_io_channel *channel, + void *payload, uint64_t offset, uint64_t length, + spdk_file_op_complete cb_fn, void *cb_arg); + +/** + * Read data to user buffer from the given file. + * + * \param file File to write. + * \param channel I/O channel for asynchronous operations. + * \param payload The specified buffer which will store the obtained data. + * \param offset The beginning position to read. + * \param length The size in bytes of data to read. + * \param cb_fn Called when the request is complete. + * \param cb_arg Argument passed to cb_fn. + * + * return None. + */ +void spdk_file_read_async(struct spdk_file *file, struct spdk_io_channel *channel, + void *payload, uint64_t offset, uint64_t length, + spdk_file_op_complete cb_fn, void *cb_arg); + +/** + * Sync all dirty cache buffers to the backing block device. For async + * usage models, completion of the sync indicates only that data written + * when the sync command was issued have been flushed to disk - it does + * not guarantee any writes submitted after the sync have been flushed, + * even if those writes are completed before the sync. + * + * \param file File to write. + * \param channel I/O channel for asynchronous operations. + * \param cb_fn Called when the request is complete. + * \param cb_arg Argument passed to cb_fn. + * + * return None. + */ +void spdk_file_sync_async(struct spdk_file *file, struct spdk_io_channel *channel, + spdk_file_op_complete cb_fn, void *cb_arg); + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_FS_H_ */ diff --git a/src/spdk/include/spdk/blobfs_bdev.h b/src/spdk/include/spdk/blobfs_bdev.h new file mode 100644 index 000000000..e915b18f8 --- /dev/null +++ b/src/spdk/include/spdk/blobfs_bdev.h @@ -0,0 +1,98 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Operations on blobfs whose backing device is spdk_bdev + */ + +#ifndef SPDK_BLOBFS_BDEV_H +#define SPDK_BLOBFS_BDEV_H + +#include "spdk/stdinc.h" +#include "spdk/bdev.h" +#include "spdk/config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * blobfs on bdev operation completion callback. + * + * \param cb_arg Callback argument. + * \param fserrno 0 if it completed successfully, or negative errno if it failed. + */ +typedef void (*spdk_blobfs_bdev_op_complete)(void *cb_arg, int fserrno); + +/** + * Detect whether blobfs exists on the given device. + * + * \param bdev_name Name of block device. + * \param cb_fn Called when the detecting is complete. fserrno is -EILSEQ if no blobfs exists. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_blobfs_bdev_detect(const char *bdev_name, + spdk_blobfs_bdev_op_complete cb_fn, void *cb_arg); + +/** + * Create a blobfs on the given device. + * + * \param bdev_name Name of block device. + * \param cluster_sz Size of cluster in bytes. Must be multiple of 4KiB page size. + * \param cb_fn Called when the creation is complete. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_blobfs_bdev_create(const char *bdev_name, uint32_t cluster_sz, + spdk_blobfs_bdev_op_complete cb_fn, void *cb_arg); + +#ifdef SPDK_CONFIG_FUSE +/** + * Mount a blobfs on given device to a host path by FUSE + * + * A new thread is created dedicatedly for one mountpoint to handle FUSE request + * by blobfs API. + * + * \param bdev_name Name of block device. + * \param mountpoint Host path to mount blobfs. + * \param cb_fn Called when mount operation is complete. fserrno is -EILSEQ if no blobfs exists. + * \param cb_arg Argument passed to function cb_fn. + */ +void spdk_blobfs_bdev_mount(const char *bdev_name, const char *mountpoint, + spdk_blobfs_bdev_op_complete cb_fn, void *cb_arg); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_BLOBFS_BDEV_H */ diff --git a/src/spdk/include/spdk/conf.h b/src/spdk/include/spdk/conf.h new file mode 100644 index 000000000..4a5292d32 --- /dev/null +++ b/src/spdk/include/spdk/conf.h @@ -0,0 +1,215 @@ +/*- + * BSD LICENSE + * + * Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>. + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Configuration file parser + */ + +#ifndef SPDK_CONF_H +#define SPDK_CONF_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct spdk_conf_value; +struct spdk_conf_item; +struct spdk_conf_section; +struct spdk_conf; + +/** + * Allocate a configuration struct used for the initialization of SPDK app. + * + * \return a pointer to the allocated configuration struct. + */ +struct spdk_conf *spdk_conf_allocate(void); + +/** + * Free the configuration struct. + * + * \param cp Configuration struct to free. + */ +void spdk_conf_free(struct spdk_conf *cp); + +/** + * Read configuration file for spdk_conf struct. + * + * \param cp Configuration struct used for the initialization of SPDK app. + * \param file File to read that is created by user to configure SPDK app. + * + * \return 0 on success, -1 on failure. + */ +int spdk_conf_read(struct spdk_conf *cp, const char *file); + +/** + * Find the specified section of the configuration. + * + * \param cp Configuration struct used for the initialization of SPDK app. + * \param name Name of section to find. + * + * \return a pointer to the requested section on success or NULL otherwise. + */ +struct spdk_conf_section *spdk_conf_find_section(struct spdk_conf *cp, const char *name); + +/** + * Get the first section of the configuration. + * + * \param cp Configuration struct used for the initialization of SPDK app. + * + * \return a pointer to the requested section on success or NULL otherwise. + */ +struct spdk_conf_section *spdk_conf_first_section(struct spdk_conf *cp); + +/** + * Get the next section of the configuration. + * + * \param sp The current section of the configuration. + * + * \return a pointer to the requested section on success or NULL otherwise. + */ +struct spdk_conf_section *spdk_conf_next_section(struct spdk_conf_section *sp); + +/** + * Match prefix of the name of section. + * + * \param sp The section of the configuration. + * \param name_prefix Prefix name to match. + * + * \return ture on success, false on failure. + */ +bool spdk_conf_section_match_prefix(const struct spdk_conf_section *sp, const char *name_prefix); + +/** + * Get the name of the section. + * + * \param sp The section of the configuration. + * + * \return the name of the section. + */ +const char *spdk_conf_section_get_name(const struct spdk_conf_section *sp); + +/** + * Get the number of the section. + * + * \param sp The section of the configuration. + * + * \return the number of the section. + */ +int spdk_conf_section_get_num(const struct spdk_conf_section *sp); + +/** + * Get the value of the item with name 'key' in the section. + * + * If key appears multiple times, idx1 will control which version to retrieve. + * Indices will start from the top of the configuration file at 0 and increment + * by one for each new apperarance. If the configuration key contains multiple + * whitespace delimited values, idx2 controls which value is returned. The index + * begins at 0. + * + * + * \param sp The section of the configuration. + * \param key Name of item. + * \param idx1 The index into the item list for the key. + * \param idx2 The index into the value list for the item. + * + * \return the requested value on success or NULL otherwise. + */ +char *spdk_conf_section_get_nmval(struct spdk_conf_section *sp, const char *key, + int idx1, int idx2); + +/** + * Get the first value of the item with name 'key' in the section. + * + * \param sp The section of the configuration. + * \param key Name of item. + * \param idx The index into the value list for the item. + * + * \return the requested value on success or NULL otherwise. + */ +char *spdk_conf_section_get_nval(struct spdk_conf_section *sp, const char *key, int idx); + +/** + * Get the first value of the first item with name 'key' in the section. + * + * \param sp The section of the configuration. + * \param key Name of item. + * + * \return the requested value on success or NULL otherwise. + */ +char *spdk_conf_section_get_val(struct spdk_conf_section *sp, const char *key); + +/** + * Get the first value of the first item with name 'key' in the section. + * + * \param sp The section of the configuration. + * \param key Name of item. + * + * \return the requested value on success or NULL otherwise. + */ +int spdk_conf_section_get_intval(struct spdk_conf_section *sp, const char *key); + +/** + * Get the bool value of the item with name 'key' in the section. + * + * This is used to check whether the service is enabled. + * + * \param sp The section of the configuration. + * \param key Name of item. + * \param default_val Default value. + * + * \return true if matching 'Yes/Y/True', false if matching 'No/N/False', default value otherwise. + */ +bool spdk_conf_section_get_boolval(struct spdk_conf_section *sp, const char *key, bool default_val); + +/** + * Set the configuration as the default. + * + * \param cp Configuration to set. + */ +void spdk_conf_set_as_default(struct spdk_conf *cp); + +/** + * Disable sections merging during 'spdk_conf_read()' + * + * \param cp Configuration to be read + */ +void spdk_conf_disable_sections_merge(struct spdk_conf *cp); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/cpuset.h b/src/spdk/include/spdk/cpuset.h new file mode 100644 index 000000000..43f2e7343 --- /dev/null +++ b/src/spdk/include/spdk/cpuset.h @@ -0,0 +1,182 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * CPU set management functions + */ + +#ifndef SPDK_CPUSET_H +#define SPDK_CPUSET_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define SPDK_CPUSET_SIZE 1024 + +/** + * List of CPUs. + */ +struct spdk_cpuset { + char str[SPDK_CPUSET_SIZE / 4 + 1]; + uint8_t cpus[SPDK_CPUSET_SIZE / 8]; +}; + +/** + * Allocate CPU set object. + * + * \return a pointer to the allocated zeroed cpuset on success, or NULL on failure. + */ +struct spdk_cpuset *spdk_cpuset_alloc(void); + +/** + * Free allocated CPU set. + * + * \param set CPU set to be freed. + */ +void spdk_cpuset_free(struct spdk_cpuset *set); + +/** + * Compare two CPU sets. + * + * \param set1 CPU set1. + * \param set2 CPU set2. + * + * \return true if both CPU sets are equal. + */ +bool spdk_cpuset_equal(const struct spdk_cpuset *set1, const struct spdk_cpuset *set2); + +/** + * Copy the content of CPU set to another. + * + * \param dst Destination CPU set + * \param src Source CPU set + */ +void spdk_cpuset_copy(struct spdk_cpuset *dst, const struct spdk_cpuset *src); + +/** + * Perform AND operation on two CPU sets. The result is stored in dst. + * + * \param dst First argument of operation. This value also stores the result of operation. + * \param src Second argument of operation. + */ +void spdk_cpuset_and(struct spdk_cpuset *dst, const struct spdk_cpuset *src); + +/** + * Perform OR operation on two CPU sets. The result is stored in dst. + * + * \param dst First argument of operation. This value also stores the result of operation. + * \param src Second argument of operation. + */ +void spdk_cpuset_or(struct spdk_cpuset *dst, const struct spdk_cpuset *src); + +/** + * Perform XOR operation on two CPU sets. The result is stored in dst. + * + * \param dst First argument of operation. This value also stores the result of operation. + * \param src Second argument of operation. + */ +void spdk_cpuset_xor(struct spdk_cpuset *dst, const struct spdk_cpuset *src); + +/** + * Negate all CPUs in CPU set. + * + * \param set CPU set to be negated. This value also stores the result of operation. + */ +void spdk_cpuset_negate(struct spdk_cpuset *set); + +/** + * Clear all CPUs in CPU set. + * + * \param set CPU set to be cleared. + */ +void spdk_cpuset_zero(struct spdk_cpuset *set); + +/** + * Set or clear CPU state in CPU set. + * + * \param set CPU set object. + * \param cpu CPU index to be set or cleared. + * \param state *true* to set cpu, *false* to clear. + */ +void spdk_cpuset_set_cpu(struct spdk_cpuset *set, uint32_t cpu, bool state); + +/** + * Get the state of CPU in CPU set. + * + * \param set CPU set object. + * \param cpu CPU index. + * + * \return the state of selected CPU. + */ +bool spdk_cpuset_get_cpu(const struct spdk_cpuset *set, uint32_t cpu); + +/** + * Get the number of CPUs that are set in CPU set. + * + * \param set CPU set object. + * + * \return the number of CPUs. + */ +uint32_t spdk_cpuset_count(const struct spdk_cpuset *set); + +/** + * Convert a CPU set to hex string. + * + * \param set CPU set. + * + * \return a pointer to hexadecimal representation of CPU set. Buffer to store a + * string is dynamically allocated internally and freed with CPU set object. + * Memory returned by this function might be changed after subsequent calls to + * this function so string should be copied by user. + */ +const char *spdk_cpuset_fmt(struct spdk_cpuset *set); + +/** + * Convert a string containing a CPU core mask into a CPU set. + * + * \param set CPU set. + * \param mask String defining CPU set. By default hexadecimal value is used or + * as CPU list enclosed in square brackets defined as: 'c1[-c2][,c3[-c4],...]'. + * + * \return zero if success, non zero if fails. + */ +int spdk_cpuset_parse(struct spdk_cpuset *set, const char *mask); + +#ifdef __cplusplus +} +#endif +#endif /* SPDK_CPUSET_H */ diff --git a/src/spdk/include/spdk/crc16.h b/src/spdk/include/spdk/crc16.h new file mode 100644 index 000000000..053fbd5e4 --- /dev/null +++ b/src/spdk/include/spdk/crc16.h @@ -0,0 +1,78 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * CRC-16 utility functions + */ + +#ifndef SPDK_CRC16_H +#define SPDK_CRC16_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * T10-DIF CRC-16 polynomial + */ +#define SPDK_T10DIF_CRC16_POLYNOMIAL 0x8bb7u + +/** + * Calculate T10-DIF CRC-16 checksum. + * + * \param init_crc Initial CRC-16 value. + * \param buf Data buffer to checksum. + * \param len Length of buf in bytes. + * \return CRC-16 value. + */ +uint16_t spdk_crc16_t10dif(uint16_t init_crc, const void *buf, size_t len); + +/** + * Calculate T10-DIF CRC-16 checksum and copy data. + * + * \param init_crc Initial CRC-16 value. + * \param dst Destination data buffer for copy. + * \param src Source data buffer for CRC calculation and copy. + * \param len Length of buffer in bytes. + * \return CRC-16 value. + */ +uint16_t spdk_crc16_t10dif_copy(uint16_t init_crc, uint8_t *dst, uint8_t *src, + size_t len); +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_CRC16_H */ diff --git a/src/spdk/include/spdk/crc32.h b/src/spdk/include/spdk/crc32.h new file mode 100644 index 000000000..a2032a25e --- /dev/null +++ b/src/spdk/include/spdk/crc32.h @@ -0,0 +1,73 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * CRC-32 utility functions + */ + +#ifndef SPDK_CRC32_H +#define SPDK_CRC32_H + +#include "spdk/stdinc.h" +#include "spdk/config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Calculate a partial CRC-32 IEEE checksum. + * + * \param buf Data buffer to checksum. + * \param len Length of buf in bytes. + * \param crc Previous CRC-32 value. + * \return Updated CRC-32 value. + */ +uint32_t spdk_crc32_ieee_update(const void *buf, size_t len, uint32_t crc); + +/** + * Calculate a partial CRC-32C checksum. + * + * \param buf Data buffer to checksum. + * \param len Length of buf in bytes. + * \param crc Previous CRC-32C value. + * \return Updated CRC-32C value. + */ +uint32_t spdk_crc32c_update(const void *buf, size_t len, uint32_t crc); + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_CRC32_H */ diff --git a/src/spdk/include/spdk/dif.h b/src/spdk/include/spdk/dif.h new file mode 100644 index 000000000..7d4006dab --- /dev/null +++ b/src/spdk/include/spdk/dif.h @@ -0,0 +1,457 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_DIF_H +#define SPDK_DIF_H + +#include "spdk/stdinc.h" +#include "spdk/assert.h" + +#define SPDK_DIF_FLAGS_REFTAG_CHECK (1U << 26) +#define SPDK_DIF_FLAGS_APPTAG_CHECK (1U << 27) +#define SPDK_DIF_FLAGS_GUARD_CHECK (1U << 28) + +#define SPDK_DIF_REFTAG_ERROR 0x1 +#define SPDK_DIF_APPTAG_ERROR 0x2 +#define SPDK_DIF_GUARD_ERROR 0x4 +#define SPDK_DIF_DATA_ERROR 0x8 + +enum spdk_dif_type { + SPDK_DIF_DISABLE = 0, + SPDK_DIF_TYPE1 = 1, + SPDK_DIF_TYPE2 = 2, + SPDK_DIF_TYPE3 = 3, +}; + +enum spdk_dif_check_type { + SPDK_DIF_CHECK_TYPE_REFTAG = 1, + SPDK_DIF_CHECK_TYPE_APPTAG = 2, + SPDK_DIF_CHECK_TYPE_GUARD = 3, +}; + +struct spdk_dif { + uint16_t guard; + uint16_t app_tag; + uint32_t ref_tag; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_dif) == 8, "Incorrect size"); + +/** DIF context information */ +struct spdk_dif_ctx { + /** Block size */ + uint32_t block_size; + + /** Metadata size */ + uint32_t md_size; + + /** Metadata location */ + bool md_interleave; + + /** Interval for guard computation for DIF */ + uint32_t guard_interval; + + /** DIF type */ + enum spdk_dif_type dif_type; + + /* Flags to specify the DIF action */ + uint32_t dif_flags; + + /* Initial reference tag */ + uint32_t init_ref_tag; + + /** Application tag */ + uint16_t app_tag; + + /* Application tag mask */ + uint16_t apptag_mask; + + /* Byte offset from the start of the whole data buffer. */ + uint32_t data_offset; + + /* Offset to initial reference tag */ + uint32_t ref_tag_offset; + + /** Guard value of the last data block. + * + * Interim guard value is set if the last data block is partial, or + * seed value is set otherwise. + */ + uint16_t last_guard; + + /* Seed value for guard computation */ + uint16_t guard_seed; + + /* Remapped initial reference tag. */ + uint32_t remapped_init_ref_tag; +}; + +/** DIF error information */ +struct spdk_dif_error { + /** Error type */ + uint8_t err_type; + + /** Expected value */ + uint32_t expected; + + /** Actual value */ + uint32_t actual; + + /** Offset the error occurred at, block based */ + uint32_t err_offset; +}; + +/** + * Initialize DIF context. + * + * \param ctx DIF context. + * \param block_size Block size in a block. + * \param md_size Metadata size in a block. + * \param md_interleave If true, metadata is interleaved with block data. + * If false, metadata is separated with block data. + * \param dif_loc DIF location. If true, DIF is set in the first 8 bytes of metadata. + * If false, DIF is in the last 8 bytes of metadata. + * \param dif_type Type of DIF. + * \param dif_flags Flag to specify the DIF action. + * \param init_ref_tag Initial reference tag. For type 1, this is the + * starting block address. + * \param apptag_mask Application tag mask. + * \param app_tag Application tag. + * \param data_offset Byte offset from the start of the whole data buffer. + * \param guard_seed Seed value for guard computation. + * + * \return 0 on success and negated errno otherwise. + */ +int spdk_dif_ctx_init(struct spdk_dif_ctx *ctx, uint32_t block_size, uint32_t md_size, + bool md_interleave, bool dif_loc, enum spdk_dif_type dif_type, uint32_t dif_flags, + uint32_t init_ref_tag, uint16_t apptag_mask, uint16_t app_tag, + uint32_t data_offset, uint16_t guard_seed); + +/** + * Update date offset of DIF context. + * + * \param ctx DIF context. + * \param data_offset Byte offset from the start of the whole data buffer. + */ +void spdk_dif_ctx_set_data_offset(struct spdk_dif_ctx *ctx, uint32_t data_offset); + +/** + * Set remapped initial reference tag of DIF context. + * + * \param ctx DIF context. + * \param remapped_init_ref_tag Remapped initial reference tag. For type 1, this is the + * starting block address. + */ +void spdk_dif_ctx_set_remapped_init_ref_tag(struct spdk_dif_ctx *ctx, + uint32_t remapped_init_ref_tag); + +/** + * Generate DIF for extended LBA payload. + * + * \param iovs iovec array describing the extended LBA payload. + * \param iovcnt Number of elements in the iovec array. + * \param num_blocks Number of blocks of the payload. + * \param ctx DIF context. + * + * \return 0 on success and negated errno otherwise. + */ +int spdk_dif_generate(struct iovec *iovs, int iovcnt, uint32_t num_blocks, + const struct spdk_dif_ctx *ctx); + +/** + * Verify DIF for extended LBA payload. + * + * \param iovs iovec array describing the extended LBA payload. + * \param iovcnt Number of elements in the iovec array. + * \param num_blocks Number of blocks of the payload. + * \param ctx DIF context. + * \param err_blk Error information of the block in which DIF error is found. + * + * \return 0 on success and negated errno otherwise. + */ +int spdk_dif_verify(struct iovec *iovs, int iovcnt, uint32_t num_blocks, + const struct spdk_dif_ctx *ctx, struct spdk_dif_error *err_blk); + +/** + * Calculate CRC-32C checksum for extended LBA payload. + * + * \param iovs iovec array describing the extended LBA payload. + * \param iovcnt Number of elements in the iovec array. + * \param num_blocks Number of blocks of the payload. + * \param crc32c Initial and updated CRC-32C value. + * \param ctx DIF context. + * + * \return 0 on success and negated errno otherwise. + */ +int spdk_dif_update_crc32c(struct iovec *iovs, int iovcnt, uint32_t num_blocks, + uint32_t *crc32c, const struct spdk_dif_ctx *ctx); + +/** + * Copy data and generate DIF for extended LBA payload. + * + * \param iovs iovec array describing the LBA payload. + * \param iovcnt Number of elements in the iovec array. + * \param bounce_iov A contiguous buffer forming extended LBA payload. + * \param num_blocks Number of blocks of the LBA payload. + * \param ctx DIF context. + * + * \return 0 on success and negated errno otherwise. + */ +int spdk_dif_generate_copy(struct iovec *iovs, int iovcnt, struct iovec *bounce_iov, + uint32_t num_blocks, const struct spdk_dif_ctx *ctx); + +/** + * Verify DIF and copy data for extended LBA payload. + * + * \param iovs iovec array describing the LBA payload. + * \param iovcnt Number of elements in the iovec array. + * \param bounce_iov A contiguous buffer forming extended LBA payload. + * \param num_blocks Number of blocks of the LBA payload. + * \param ctx DIF context. + * \param err_blk Error information of the block in which DIF error is found. + * + * \return 0 on success and negated errno otherwise. + */ +int spdk_dif_verify_copy(struct iovec *iovs, int iovcnt, struct iovec *bounce_iov, + uint32_t num_blocks, const struct spdk_dif_ctx *ctx, + struct spdk_dif_error *err_blk); + +/** + * Inject bit flip error to extended LBA payload. + * + * \param iovs iovec array describing the extended LBA payload. + * \param iovcnt Number of elements in the iovec array. + * \param num_blocks Number of blocks of the payload. + * \param ctx DIF context. + * \param inject_flags Flags to specify the action of error injection. + * \param inject_offset Offset, in blocks, to which error is injected. + * If multiple error is injected, only the last injection is stored. + * + * \return 0 on success and negated errno otherwise including no metadata. + */ +int spdk_dif_inject_error(struct iovec *iovs, int iovcnt, uint32_t num_blocks, + const struct spdk_dif_ctx *ctx, uint32_t inject_flags, + uint32_t *inject_offset); + +/** + * Generate DIF for separate metadata payload. + * + * \param iovs iovec array describing the LBA payload. + * \params iovcnt Number of elements in iovs. + * \param md_iov A contiguous buffer for metadata. + * \param num_blocks Number of blocks of the separate metadata payload. + * \param ctx DIF context. + * + * \return 0 on success and negated errno otherwise. + */ +int spdk_dix_generate(struct iovec *iovs, int iovcnt, struct iovec *md_iov, + uint32_t num_blocks, const struct spdk_dif_ctx *ctx); + +/** + * Verify DIF for separate metadata payload. + * + * \param iovs iovec array describing the LBA payload. + * \params iovcnt Number of elements in iovs. + * \param md_iov A contiguous buffer for metadata. + * \param num_blocks Number of blocks of the separate metadata payload. + * \param ctx DIF context. + * \param err_blk Error information of the block in which DIF error is found. + * + * \return 0 on success and negated errno otherwise. + */ +int spdk_dix_verify(struct iovec *iovs, int iovcnt, struct iovec *md_iov, + uint32_t num_blocks, const struct spdk_dif_ctx *ctx, + struct spdk_dif_error *err_blk); + +/** + * Inject bit flip error to separate metadata payload. + * + * \param iovs iovec array describing the extended LBA payload. + * \param iovcnt Number of elements in the iovec array. + * \param md_iov A contiguous buffer for metadata. + * \param num_blocks Number of blocks of the payload. + * \param ctx DIF context. + * \param inject_flags Flag to specify the action of error injection. + * \param inject_offset Offset, in blocks, to which error is injected. + * If multiple error is injected, only the last injection is stored. + * + * \return 0 on success and negated errno otherwise including no metadata. + */ +int spdk_dix_inject_error(struct iovec *iovs, int iovcnt, struct iovec *md_iov, + uint32_t num_blocks, const struct spdk_dif_ctx *ctx, + uint32_t inject_flags, uint32_t *inject_offset); + +/** + * Setup iovec array to leave a space for metadata for each block. + * + * This function is used to leave a space for metadata for each block when + * the network socket reads data, or to make the network socket ignore a + * space for metadata for each block when the network socket writes data. + * This function removes the necessity of data copy in the SPDK application + * during DIF insertion and strip. + * + * When the extended LBA payload is splitted into multiple data segments, + * start of each data segment is passed through the DIF context. data_offset + * and data_len is within a data segment. + * + * \param iovs iovec array set by this function. + * \param iovcnt Number of elements in the iovec array. + * \param buf_iovs SGL for the buffer to create extended LBA payload. + * \param buf_iovcnt Size of the SGL for the buffer to create extended LBA payload. + * \param data_offset Offset to store the next incoming data in the current data segment. + * \param data_len Expected length of the newly read data in the current data segment of + * the extended LBA payload. + * \param mapped_len Output parameter that will contain data length mapped by + * the iovec array. + * \param ctx DIF context. + * + * \return Number of used elements in the iovec array on success or negated + * errno otherwise. + */ +int spdk_dif_set_md_interleave_iovs(struct iovec *iovs, int iovcnt, + struct iovec *buf_iovs, int buf_iovcnt, + uint32_t data_offset, uint32_t data_len, + uint32_t *mapped_len, + const struct spdk_dif_ctx *ctx); + +/** + * Generate and insert DIF into metadata space for newly read data block. + * + * When the extended LBA payload is splitted into multiple data segments, + * start of each data segment is passed through the DIF context. data_offset + * and data_len is within a data segment. + * + * \param iovs iovec array describing the extended LBA payload. + * \param iovcnt Number of elements in the iovec array. + * \param data_offset Offset to the newly read data in the current data segment of + * the extended LBA payload. + * \param data_len Length of the newly read data in the current data segment of + * the extended LBA payload. + * \param ctx DIF context. + * + * \return 0 on success and negated errno otherwise. + */ +int spdk_dif_generate_stream(struct iovec *iovs, int iovcnt, + uint32_t data_offset, uint32_t data_len, + struct spdk_dif_ctx *ctx); + +/** + * Verify DIF for the to-be-written block of the extended LBA payload. + * + * \param iovs iovec array describing the extended LBA payload. + * \param iovcnt Number of elements in the iovec array. + * \param data_offset Offset to the to-be-written data in the extended LBA payload. + * \param data_len Length of the to-be-written data in the extended LBA payload. + * \param ctx DIF context. + * + * \return 0 on success and negated errno otherwise. + */ +int spdk_dif_verify_stream(struct iovec *iovs, int iovcnt, + uint32_t data_offset, uint32_t data_len, + struct spdk_dif_ctx *ctx, + struct spdk_dif_error *err_blk); + +/** + * Calculate CRC-32C checksum of the specified range in the extended LBA payload. + * + * \param iovs iovec array describing the extended LBA payload. + * \param iovcnt Number of elements in the iovec array. + * \param data_offset Offset to the range + * \param data_len Length of the range + * \param crc32c Initial and updated CRC-32C value. + * \param ctx DIF context. + * + * \return 0 on success and negated errno otherwise. + */ +int spdk_dif_update_crc32c_stream(struct iovec *iovs, int iovcnt, + uint32_t data_offset, uint32_t data_len, + uint32_t *crc32c, const struct spdk_dif_ctx *ctx); +/** + * Convert offset and size from LBA based to extended LBA based. + * + * \param data_offset Data offset + * \param data_len Data length + * \param buf_offset Buffer offset converted from data offset. + * \param buf_len Buffer length converted from data length + * \param ctx DIF context. + */ +void spdk_dif_get_range_with_md(uint32_t data_offset, uint32_t data_len, + uint32_t *buf_offset, uint32_t *buf_len, + const struct spdk_dif_ctx *ctx); + +/** + * Convert length from LBA based to extended LBA based. + * + * \param data_len Data length + * \param ctx DIF context. + * + * \return Extended LBA based data length. + */ +uint32_t spdk_dif_get_length_with_md(uint32_t data_len, const struct spdk_dif_ctx *ctx); + +/** + * Remap reference tag for extended LBA payload. + * + * When using stacked virtual bdev (e.g. split virtual bdev), block address space for I/O + * will be remapped during I/O processing and so reference tag will have to be remapped + * accordingly. This patch is for that case. + * + * \param iovs iovec array describing the extended LBA payload. + * \param iovcnt Number of elements in the iovec array. + * \param num_blocks Number of blocks of the payload. + * \param ctx DIF context. + * \param err_blk Error information of the block in which DIF error is found. + * + * \return 0 on success and negated errno otherwise. + */ +int spdk_dif_remap_ref_tag(struct iovec *iovs, int iovcnt, uint32_t num_blocks, + const struct spdk_dif_ctx *dif_ctx, + struct spdk_dif_error *err_blk); + +/** + * Remap reference tag for separate metadata payload. + * + * When using stacked virtual bdev (e.g. split virtual bdev), block address space for I/O + * will be remapped during I/O processing and so reference tag will have to be remapped + * accordingly. This patch is for that case. + * + * \param md_iov A contiguous buffer for metadata. + * \param num_blocks Number of blocks of the payload. + * \param ctx DIF context. + * \param err_blk Error information of the block in which DIF error is found. + * + * \return 0 on success and negated errno otherwise. + */ +int spdk_dix_remap_ref_tag(struct iovec *md_iov, uint32_t num_blocks, + const struct spdk_dif_ctx *dif_ctx, + struct spdk_dif_error *err_blk); +#endif /* SPDK_DIF_H */ diff --git a/src/spdk/include/spdk/endian.h b/src/spdk/include/spdk/endian.h new file mode 100644 index 000000000..116b7fb9c --- /dev/null +++ b/src/spdk/include/spdk/endian.h @@ -0,0 +1,178 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * Endian conversion functions + */ + +#ifndef SPDK_ENDIAN_H +#define SPDK_ENDIAN_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static inline uint16_t +from_be16(const void *ptr) +{ + const uint8_t *tmp = (const uint8_t *)ptr; + return (((uint16_t)tmp[0] << 8) | tmp[1]); +} + +static inline void +to_be16(void *out, uint16_t in) +{ + uint8_t *tmp = (uint8_t *)out; + tmp[0] = (in >> 8) & 0xFF; + tmp[1] = in & 0xFF; +} + +static inline uint32_t +from_be32(const void *ptr) +{ + const uint8_t *tmp = (const uint8_t *)ptr; + return (((uint32_t)tmp[0] << 24) | + ((uint32_t)tmp[1] << 16) | + ((uint32_t)tmp[2] << 8) | + ((uint32_t)tmp[3])); +} + +static inline void +to_be32(void *out, uint32_t in) +{ + uint8_t *tmp = (uint8_t *)out; + tmp[0] = (in >> 24) & 0xFF; + tmp[1] = (in >> 16) & 0xFF; + tmp[2] = (in >> 8) & 0xFF; + tmp[3] = in & 0xFF; +} + +static inline uint64_t +from_be64(const void *ptr) +{ + const uint8_t *tmp = (const uint8_t *)ptr; + return (((uint64_t)tmp[0] << 56) | + ((uint64_t)tmp[1] << 48) | + ((uint64_t)tmp[2] << 40) | + ((uint64_t)tmp[3] << 32) | + ((uint64_t)tmp[4] << 24) | + ((uint64_t)tmp[5] << 16) | + ((uint64_t)tmp[6] << 8) | + ((uint64_t)tmp[7])); +} + +static inline void +to_be64(void *out, uint64_t in) +{ + uint8_t *tmp = (uint8_t *)out; + tmp[0] = (in >> 56) & 0xFF; + tmp[1] = (in >> 48) & 0xFF; + tmp[2] = (in >> 40) & 0xFF; + tmp[3] = (in >> 32) & 0xFF; + tmp[4] = (in >> 24) & 0xFF; + tmp[5] = (in >> 16) & 0xFF; + tmp[6] = (in >> 8) & 0xFF; + tmp[7] = in & 0xFF; +} + +static inline uint16_t +from_le16(const void *ptr) +{ + const uint8_t *tmp = (const uint8_t *)ptr; + return (((uint16_t)tmp[1] << 8) | tmp[0]); +} + +static inline void +to_le16(void *out, uint16_t in) +{ + uint8_t *tmp = (uint8_t *)out; + tmp[1] = (in >> 8) & 0xFF; + tmp[0] = in & 0xFF; +} + +static inline uint32_t +from_le32(const void *ptr) +{ + const uint8_t *tmp = (const uint8_t *)ptr; + return (((uint32_t)tmp[3] << 24) | + ((uint32_t)tmp[2] << 16) | + ((uint32_t)tmp[1] << 8) | + ((uint32_t)tmp[0])); +} + +static inline void +to_le32(void *out, uint32_t in) +{ + uint8_t *tmp = (uint8_t *)out; + tmp[3] = (in >> 24) & 0xFF; + tmp[2] = (in >> 16) & 0xFF; + tmp[1] = (in >> 8) & 0xFF; + tmp[0] = in & 0xFF; +} + +static inline uint64_t +from_le64(const void *ptr) +{ + const uint8_t *tmp = (const uint8_t *)ptr; + return (((uint64_t)tmp[7] << 56) | + ((uint64_t)tmp[6] << 48) | + ((uint64_t)tmp[5] << 40) | + ((uint64_t)tmp[4] << 32) | + ((uint64_t)tmp[3] << 24) | + ((uint64_t)tmp[2] << 16) | + ((uint64_t)tmp[1] << 8) | + ((uint64_t)tmp[0])); +} + +static inline void +to_le64(void *out, uint64_t in) +{ + uint8_t *tmp = (uint8_t *)out; + tmp[7] = (in >> 56) & 0xFF; + tmp[6] = (in >> 48) & 0xFF; + tmp[5] = (in >> 40) & 0xFF; + tmp[4] = (in >> 32) & 0xFF; + tmp[3] = (in >> 24) & 0xFF; + tmp[2] = (in >> 16) & 0xFF; + tmp[1] = (in >> 8) & 0xFF; + tmp[0] = in & 0xFF; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/env.h b/src/spdk/include/spdk/env.h new file mode 100644 index 000000000..3e2018ac8 --- /dev/null +++ b/src/spdk/include/spdk/env.h @@ -0,0 +1,1301 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * Copyright (c) NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Encapsulated third-party dependencies + */ + +#ifndef SPDK_ENV_H +#define SPDK_ENV_H + +#include "spdk/stdinc.h" +#include "spdk/queue.h" +#include "spdk/pci_ids.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define SPDK_ENV_SOCKET_ID_ANY (-1) +#define SPDK_ENV_LCORE_ID_ANY (UINT32_MAX) + +/** + * Memory is dma-safe. + */ +#define SPDK_MALLOC_DMA 0x01 + +/** + * Memory is sharable across process boundaries. + */ +#define SPDK_MALLOC_SHARE 0x02 + +#define SPDK_MAX_MEMZONE_NAME_LEN 32 +#define SPDK_MAX_MEMPOOL_NAME_LEN 29 + +/** + * Memzone flags + */ +#define SPDK_MEMZONE_NO_IOVA_CONTIG 0x00100000 /**< no iova contiguity */ + +/** + * \brief Environment initialization options + */ +struct spdk_env_opts { + const char *name; + const char *core_mask; + int shm_id; + int mem_channel; + int master_core; + int mem_size; + bool no_pci; + bool hugepage_single_segments; + bool unlink_hugepage; + size_t num_pci_addr; + const char *hugedir; + struct spdk_pci_addr *pci_blacklist; + struct spdk_pci_addr *pci_whitelist; + const char *iova_mode; + uint64_t base_virtaddr; + + /** Opaque context for use of the env implementation. */ + void *env_context; +}; + +/** + * Allocate dma/sharable memory based on a given dma_flg. It is a memory buffer + * with the given size, alignment and socket id. + * + * \param size Size in bytes. + * \param align If non-zero, the allocated buffer is aligned to a multiple of + * align. In this case, it must be a power of two. The returned buffer is always + * aligned to at least cache line size. + * \param phys_addr **Deprecated**. Please use spdk_vtophys() for retrieving physical + * addresses. A pointer to the variable to hold the physical address of + * the allocated buffer is passed. If NULL, the physical address is not returned. + * \param socket_id Socket ID to allocate memory on, or SPDK_ENV_SOCKET_ID_ANY + * for any socket. + * \param flags Combination of SPDK_MALLOC flags (\ref SPDK_MALLOC_DMA, \ref SPDK_MALLOC_SHARE). + * At least one flag must be specified. + * + * \return a pointer to the allocated memory buffer. + */ +void *spdk_malloc(size_t size, size_t align, uint64_t *phys_addr, int socket_id, uint32_t flags); + +/** + * Allocate dma/sharable memory based on a given dma_flg. It is a memory buffer + * with the given size, alignment and socket id. Also, the buffer will be zeroed. + * + * \param size Size in bytes. + * \param align If non-zero, the allocated buffer is aligned to a multiple of + * align. In this case, it must be a power of two. The returned buffer is always + * aligned to at least cache line size. + * \param phys_addr **Deprecated**. Please use spdk_vtophys() for retrieving physical + * addresses. A pointer to the variable to hold the physical address of + * the allocated buffer is passed. If NULL, the physical address is not returned. + * \param socket_id Socket ID to allocate memory on, or SPDK_ENV_SOCKET_ID_ANY + * for any socket. + * \param flags Combination of SPDK_MALLOC flags (\ref SPDK_MALLOC_DMA, \ref SPDK_MALLOC_SHARE). + * + * \return a pointer to the allocated memory buffer. + */ +void *spdk_zmalloc(size_t size, size_t align, uint64_t *phys_addr, int socket_id, uint32_t flags); + +/** + * Resize a dma/sharable memory buffer with the given new size and alignment. + * Existing contents are preserved. + * + * \param buf Buffer to resize. + * \param size Size in bytes. + * \param align If non-zero, the allocated buffer is aligned to a multiple of + * align. In this case, it must be a power of two. The returned buffer is always + * aligned to at least cache line size. + * + * \return a pointer to the resized memory buffer. + */ +void *spdk_realloc(void *buf, size_t size, size_t align); + +/** + * Free buffer memory that was previously allocated with spdk_malloc() or spdk_zmalloc(). + * + * \param buf Buffer to free. + */ +void spdk_free(void *buf); + +/** + * Initialize the default value of opts. + * + * \param opts Data structure where SPDK will initialize the default options. + */ +void spdk_env_opts_init(struct spdk_env_opts *opts); + +/** + * Initialize or reinitialize the environment library. + * For initialization, this must be called prior to using any other functions + * in this library. For reinitialization, the parameter `opts` must be set to + * NULL and this must be called after the environment library was finished by + * spdk_env_fini() within the same process. + * + * \param opts Environment initialization options. + * \return 0 on success, or negative errno on failure. + */ +int spdk_env_init(const struct spdk_env_opts *opts); + +/** + * Release any resources of the environment library that were allocated with + * spdk_env_init(). After this call, no SPDK env function calls may be made. + * It is expected that common usage of this function is to call it just before + * terminating the process or before reinitializing the environment library + * within the same process. + */ +void spdk_env_fini(void); + +/** + * Allocate a pinned memory buffer with the given size and alignment. + * + * \param size Size in bytes. + * \param align If non-zero, the allocated buffer is aligned to a multiple of + * align. In this case, it must be a power of two. The returned buffer is always + * aligned to at least cache line size. + * \param phys_addr A pointer to the variable to hold the physical address of + * the allocated buffer is passed. If NULL, the physical address is not returned. + * + * \return a pointer to the allocated memory buffer. + */ +void *spdk_dma_malloc(size_t size, size_t align, uint64_t *phys_addr); + +/** + * Allocate a pinned, memory buffer with the given size, alignment and socket id. + * + * \param size Size in bytes. + * \param align If non-zero, the allocated buffer is aligned to a multiple of + * align. In this case, it must be a power of two. The returned buffer is always + * aligned to at least cache line size. + * \param phys_addr A pointer to the variable to hold the physical address of + * the allocated buffer is passed. If NULL, the physical address is not returned. + * \param socket_id Socket ID to allocate memory on, or SPDK_ENV_SOCKET_ID_ANY + * for any socket. + * + * \return a pointer to the allocated memory buffer. + */ +void *spdk_dma_malloc_socket(size_t size, size_t align, uint64_t *phys_addr, int socket_id); + +/** + * Allocate a pinned memory buffer with the given size and alignment. The buffer + * will be zeroed. + * + * \param size Size in bytes. + * \param align If non-zero, the allocated buffer is aligned to a multiple of + * align. In this case, it must be a power of two. The returned buffer is always + * aligned to at least cache line size. + * \param phys_addr A pointer to the variable to hold the physical address of + * the allocated buffer is passed. If NULL, the physical address is not returned. + * + * \return a pointer to the allocated memory buffer. + */ +void *spdk_dma_zmalloc(size_t size, size_t align, uint64_t *phys_addr); + +/** + * Allocate a pinned memory buffer with the given size, alignment and socket id. + * The buffer will be zeroed. + * + * \param size Size in bytes. + * \param align If non-zero, the allocated buffer is aligned to a multiple of + * align. In this case, it must be a power of two. The returned buffer is always + * aligned to at least cache line size. + * \param phys_addr A pointer to the variable to hold the physical address of + * the allocated buffer is passed. If NULL, the physical address is not returned. + * \param socket_id Socket ID to allocate memory on, or SPDK_ENV_SOCKET_ID_ANY + * for any socket. + * + * \return a pointer to the allocated memory buffer. + */ +void *spdk_dma_zmalloc_socket(size_t size, size_t align, uint64_t *phys_addr, int socket_id); + +/** + * Resize the allocated and pinned memory buffer with the given new size and + * alignment. Existing contents are preserved. + * + * \param buf Buffer to resize. + * \param size Size in bytes. + * \param align If non-zero, the allocated buffer is aligned to a multiple of + * align. In this case, it must be a power of two. The returned buffer is always + * aligned to at least cache line size. + * \param phys_addr A pointer to the variable to hold the physical address of + * the allocated buffer is passed. If NULL, the physical address is not returned. + * + * \return a pointer to the resized memory buffer. + */ +void *spdk_dma_realloc(void *buf, size_t size, size_t align, uint64_t *phys_addr); + +/** + * Free a memory buffer previously allocated, for example from spdk_dma_zmalloc(). + * This call is never made from the performance path. + * + * \param buf Buffer to free. + */ +void spdk_dma_free(void *buf); + +/** + * Reserve a named, process shared memory zone with the given size, socket_id + * and flags. Unless `SPDK_MEMZONE_NO_IOVA_CONTIG` flag is provided, the returned + * memory will be IOVA contiguous. + * + * \param name Name to set for this memory zone. + * \param len Length in bytes. + * \param socket_id Socket ID to allocate memory on, or SPDK_ENV_SOCKET_ID_ANY + * for any socket. + * \param flags Flags to set for this memory zone. + * + * \return a pointer to the allocated memory address on success, or NULL on failure. + */ +void *spdk_memzone_reserve(const char *name, size_t len, int socket_id, unsigned flags); + +/** + * Reserve a named, process shared memory zone with the given size, socket_id, + * flags and alignment. Unless `SPDK_MEMZONE_NO_IOVA_CONTIG` flag is provided, + * the returned memory will be IOVA contiguous. + * + * \param name Name to set for this memory zone. + * \param len Length in bytes. + * \param socket_id Socket ID to allocate memory on, or SPDK_ENV_SOCKET_ID_ANY + * for any socket. + * \param flags Flags to set for this memory zone. + * \param align Alignment for resulting memzone. Must be a power of 2. + * + * \return a pointer to the allocated memory address on success, or NULL on failure. + */ +void *spdk_memzone_reserve_aligned(const char *name, size_t len, int socket_id, + unsigned flags, unsigned align); + +/** + * Lookup the memory zone identified by the given name. + * + * \param name Name of the memory zone. + * + * \return a pointer to the reserved memory address on success, or NULL on failure. + */ +void *spdk_memzone_lookup(const char *name); + +/** + * Free the memory zone identified by the given name. + * + * \return 0 on success, -1 on failure. + */ +int spdk_memzone_free(const char *name); + +/** + * Dump debug information about all memzones. + * + * \param f File to write debug information to. + */ +void spdk_memzone_dump(FILE *f); + +struct spdk_mempool; + +#define SPDK_MEMPOOL_DEFAULT_CACHE_SIZE SIZE_MAX + +/** + * Create a thread-safe memory pool. + * + * \param name Name for the memory pool. + * \param count Count of elements. + * \param ele_size Element size in bytes. + * \param cache_size How many elements may be cached in per-core caches. Use + * SPDK_MEMPOOL_DEFAULT_CACHE_SIZE for a reasonable default, or 0 for no per-core cache. + * \param socket_id Socket ID to allocate memory on, or SPDK_ENV_SOCKET_ID_ANY + * for any socket. + * + * \return a pointer to the created memory pool. + */ +struct spdk_mempool *spdk_mempool_create(const char *name, size_t count, + size_t ele_size, size_t cache_size, int socket_id); + +/** + * An object callback function for memory pool. + * + * Used by spdk_mempool_create_ctor(). + */ +typedef void (spdk_mempool_obj_cb_t)(struct spdk_mempool *mp, + void *opaque, void *obj, unsigned obj_idx); + +/** + * Create a thread-safe memory pool with user provided initialization function + * and argument. + * + * \param name Name for the memory pool. + * \param count Count of elements. + * \param ele_size Element size in bytes. + * \param cache_size How many elements may be cached in per-core caches. Use + * SPDK_MEMPOOL_DEFAULT_CACHE_SIZE for a reasonable default, or 0 for no per-core cache. + * \param socket_id Socket ID to allocate memory on, or SPDK_ENV_SOCKET_ID_ANY + * for any socket. + * \param obj_init User provided object calllback initialization function. + * \param obj_init_arg User provided callback initialization function argument. + * + * \return a pointer to the created memory pool. + */ +struct spdk_mempool *spdk_mempool_create_ctor(const char *name, size_t count, + size_t ele_size, size_t cache_size, int socket_id, + spdk_mempool_obj_cb_t *obj_init, void *obj_init_arg); + +/** + * Get the name of a memory pool. + * + * \param mp Memory pool to query. + * + * \return the name of the memory pool. + */ +char *spdk_mempool_get_name(struct spdk_mempool *mp); + +/** + * Free a memory pool. + */ +void spdk_mempool_free(struct spdk_mempool *mp); + +/** + * Get an element from a memory pool. If no elements remain, return NULL. + * + * \param mp Memory pool to query. + * + * \return a pointer to the element. + */ +void *spdk_mempool_get(struct spdk_mempool *mp); + +/** + * Get multiple elements from a memory pool. + * + * \param mp Memory pool to get multiple elements from. + * \param ele_arr Array of the elements to fill. + * \param count Count of elements to get. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_mempool_get_bulk(struct spdk_mempool *mp, void **ele_arr, size_t count); + +/** + * Put an element back into the memory pool. + * + * \param mp Memory pool to put element back into. + * \param ele Element to put. + */ +void spdk_mempool_put(struct spdk_mempool *mp, void *ele); + +/** + * Put multiple elements back into the memory pool. + * + * \param mp Memory pool to put multiple elements back into. + * \param ele_arr Array of the elements to put. + * \param count Count of elements to put. + */ +void spdk_mempool_put_bulk(struct spdk_mempool *mp, void **ele_arr, size_t count); + +/** + * Get the number of entries in the memory pool. + * + * \param pool Memory pool to query. + * + * \return the number of entries in the memory pool. + */ +size_t spdk_mempool_count(const struct spdk_mempool *pool); + +/** + * Iterate through all elements of the pool and call a function on each one. + * + * \param mp Memory pool to iterate on. + * \param obj_cb Function to call on each element. + * \param obj_cb_arg Opaque pointer passed to the callback function. + * + * \return Number of elements iterated. + */ +uint32_t spdk_mempool_obj_iter(struct spdk_mempool *mp, spdk_mempool_obj_cb_t obj_cb, + void *obj_cb_arg); + +/** + * Lookup the memory pool identified by the given name. + * + * \param name Name of the memory pool. + * + * \return a pointer to the memory pool on success, or NULL on failure. + */ +struct spdk_mempool *spdk_mempool_lookup(const char *name); + +/** + * Get the number of dedicated CPU cores utilized by this env abstraction. + * + * \return the number of dedicated CPU cores. + */ +uint32_t spdk_env_get_core_count(void); + +/** + * Get the CPU core index of the current thread. + * + * This will only function when called from threads set up by + * this environment abstraction. For any other threads \c SPDK_ENV_LCORE_ID_ANY + * will be returned. + * + * \return the CPU core index of the current thread. + */ +uint32_t spdk_env_get_current_core(void); + +/** + * Get the index of the first dedicated CPU core for this application. + * + * \return the index of the first dedicated CPU core. + */ +uint32_t spdk_env_get_first_core(void); + +/** + * Get the index of the last dedicated CPU core for this application. + * + * \return the index of the last dedicated CPU core. + */ +uint32_t spdk_env_get_last_core(void); + +/** + * Get the index of the next dedicated CPU core for this application. + * + * If there is no next core, return UINT32_MAX. + * + * \param prev_core Index of previous core. + * + * \return the index of the next dedicated CPU core. + */ +uint32_t spdk_env_get_next_core(uint32_t prev_core); + +#define SPDK_ENV_FOREACH_CORE(i) \ + for (i = spdk_env_get_first_core(); \ + i < UINT32_MAX; \ + i = spdk_env_get_next_core(i)) + +/** + * Get the socket ID for the given core. + * + * \param core CPU core to query. + * + * \return the socket ID for the given core. + */ +uint32_t spdk_env_get_socket_id(uint32_t core); + +typedef int (*thread_start_fn)(void *); + +/** + * Launch a thread pinned to the given core. Only a single pinned thread may be + * launched per core. Subsequent attempts to launch pinned threads on that core + * will fail. + * + * \param core The core to pin the thread to. + * \param fn Entry point on the new thread. + * \param arg Argument apssed to thread_start_fn + * + * \return 0 on success, negative errno on failure. + */ +int spdk_env_thread_launch_pinned(uint32_t core, thread_start_fn fn, void *arg); + +/** + * Wait for all threads to exit before returning. + */ +void spdk_env_thread_wait_all(void); + +/** + * Check whether the calling process is primary process. + * + * \return true if the calling process is primary process, or false otherwise. + */ +bool spdk_process_is_primary(void); + +/** + * Get a monotonic timestamp counter. + * + * \return the monotonic timestamp counter. + */ +uint64_t spdk_get_ticks(void); + +/** + * Get the tick rate of spdk_get_ticks() per second. + * + * \return the tick rate of spdk_get_ticks() per second. + */ +uint64_t spdk_get_ticks_hz(void); + +/** + * Delay the given number of microseconds. + * + * \param us Number of microseconds. + */ +void spdk_delay_us(unsigned int us); + +/** + * Pause CPU execution for a short while + */ +void spdk_pause(void); + +struct spdk_ring; + +enum spdk_ring_type { + SPDK_RING_TYPE_SP_SC, /* Single-producer, single-consumer */ + SPDK_RING_TYPE_MP_SC, /* Multi-producer, single-consumer */ + SPDK_RING_TYPE_MP_MC, /* Multi-producer, multi-consumer */ +}; + +/** + * Create a ring. + * + * \param type Type for the ring. (SPDK_RING_TYPE_SP_SC or SPDK_RING_TYPE_MP_SC). + * \param count Size of the ring in elements. + * \param socket_id Socket ID to allocate memory on, or SPDK_ENV_SOCKET_ID_ANY + * for any socket. + * + * \return a pointer to the created ring. + */ +struct spdk_ring *spdk_ring_create(enum spdk_ring_type type, size_t count, int socket_id); + +/** + * Free the ring. + * + * \param ring Ring to free. + */ +void spdk_ring_free(struct spdk_ring *ring); + +/** + * Get the number of objects in the ring. + * + * \param ring the ring. + * + * \return the number of objects in the ring. + */ +size_t spdk_ring_count(struct spdk_ring *ring); + +/** + * Queue the array of objects (with length count) on the ring. + * + * \param ring A pointer to the ring. + * \param objs A pointer to the array to be queued. + * \param count Length count of the array of objects. + * \param free_space If non-NULL, amount of free space after the enqueue has finished. + * + * \return the number of objects enqueued. + */ +size_t spdk_ring_enqueue(struct spdk_ring *ring, void **objs, size_t count, + size_t *free_space); + +/** + * Dequeue count objects from the ring into the array objs. + * + * \param ring A pointer to the ring. + * \param objs A pointer to the array to be dequeued. + * \param count Maximum number of elements to be dequeued. + * + * \return the number of objects dequeued which is less than 'count'. + */ +size_t spdk_ring_dequeue(struct spdk_ring *ring, void **objs, size_t count); + +/** + * Reports whether the SPDK application is using the IOMMU for DMA + * + * \return True if we are using the IOMMU, false otherwise. + */ +bool spdk_iommu_is_enabled(void); + +#define SPDK_VTOPHYS_ERROR (0xFFFFFFFFFFFFFFFFULL) + +/** + * Get the physical address of a buffer. + * + * \param buf A pointer to a buffer. + * \param size Contains the size of the memory region pointed to by vaddr. + * If vaddr is successfully translated, then this is updated with the size of + * the memory region for which the translation is valid. + * + * \return the physical address of this buffer on success, or SPDK_VTOPHYS_ERROR + * on failure. + */ +uint64_t spdk_vtophys(void *buf, uint64_t *size); + +struct spdk_pci_addr { + uint32_t domain; + uint8_t bus; + uint8_t dev; + uint8_t func; +}; + +struct spdk_pci_id { + uint32_t class_id; /**< Class ID or SPDK_PCI_CLASS_ANY_ID. */ + uint16_t vendor_id; /**< Vendor ID or SPDK_PCI_ANY_ID. */ + uint16_t device_id; /**< Device ID or SPDK_PCI_ANY_ID. */ + uint16_t subvendor_id; /**< Subsystem vendor ID or SPDK_PCI_ANY_ID. */ + uint16_t subdevice_id; /**< Subsystem device ID or SPDK_PCI_ANY_ID. */ +}; + +/** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */ +#define SPDK_PCI_DRIVER_NEED_MAPPING 0x0001 +/** Device needs PCI BAR mapping with enabled write combining (wc) */ +#define SPDK_PCI_DRIVER_WC_ACTIVATE 0x0002 + +void spdk_pci_driver_register(const char *name, struct spdk_pci_id *id_table, uint32_t flags); + +struct spdk_pci_device { + struct spdk_pci_device *parent; + void *dev_handle; + struct spdk_pci_addr addr; + struct spdk_pci_id id; + int socket_id; + const char *type; + + int (*map_bar)(struct spdk_pci_device *dev, uint32_t bar, + void **mapped_addr, uint64_t *phys_addr, uint64_t *size); + int (*unmap_bar)(struct spdk_pci_device *dev, uint32_t bar, + void *addr); + int (*cfg_read)(struct spdk_pci_device *dev, void *value, + uint32_t len, uint32_t offset); + int (*cfg_write)(struct spdk_pci_device *dev, void *value, + uint32_t len, uint32_t offset); + + struct _spdk_pci_device_internal { + struct spdk_pci_driver *driver; + bool attached; + /* optional fd for exclusive access to this device on this process */ + int claim_fd; + bool pending_removal; + /* The device was successfully removed on a DPDK interrupt thread, + * but to prevent data races we couldn't remove it from the global + * device list right away. It'll be removed as soon as possible + * on a regular thread when any public pci function is called. + */ + bool removed; + TAILQ_ENTRY(spdk_pci_device) tailq; + } internal; +}; + +typedef int (*spdk_pci_enum_cb)(void *enum_ctx, struct spdk_pci_device *pci_dev); + +#define SPDK_PCI_DEVICE(vend, dev) \ + .class_id = SPDK_PCI_CLASS_ANY_ID, \ + .vendor_id = (vend), \ + .device_id = (dev), \ + .subvendor_id = SPDK_PCI_ANY_ID, \ + .subdevice_id = SPDK_PCI_ANY_ID + +#define SPDK_PCI_DRIVER_REGISTER(name, id_table, flags) \ +__attribute__((constructor)) static void pci_drv ## _register(void) \ +{ \ + spdk_pci_driver_register(name, id_table, flags); \ +} + +/** + * Get the VMD PCI driver object. + * + * \return PCI driver. + */ +struct spdk_pci_driver *spdk_pci_vmd_get_driver(void); + +/** + * Get the I/OAT PCI driver object. + * + * \return PCI driver. + */ +struct spdk_pci_driver *spdk_pci_ioat_get_driver(void); + +/** + * Get the IDXD PCI driver object. + * + * \return PCI driver. + */ +struct spdk_pci_driver *spdk_pci_idxd_get_driver(void); + +/** + * Get the Virtio PCI driver object. + * + * \return PCI driver. + */ +struct spdk_pci_driver *spdk_pci_virtio_get_driver(void); + +/** + * Get PCI driver by name (e.g. "nvme", "vmd", "ioat"). + */ +struct spdk_pci_driver *spdk_pci_get_driver(const char *name); + +/** + * Get the NVMe PCI driver object. + * + * \return PCI driver. + */ +struct spdk_pci_driver *spdk_pci_nvme_get_driver(void); + +/** + * Enumerate all PCI devices supported by the provided driver and try to + * attach those that weren't attached yet. The provided callback will be + * called for each such device and its return code will decide whether that + * device is attached or not. Attached devices have to be manually detached + * with spdk_pci_device_detach() to be attach-able again. + * + * \param driver Driver for a specific device type. + * \param enum_cb Callback to be called for each non-attached PCI device. + * The return code can be as follows: + * -1 - device was not attached, the enumeration is stopped + * 0 - device attached successfully, enumeration continues + * 1 - device was not attached, enumeration continues + * \param enum_ctx Additional context passed to the callback function. + * + * \return -1 if an internal error occured or the provided callback returned -1, + * 0 otherwise + */ +int spdk_pci_enumerate(struct spdk_pci_driver *driver, spdk_pci_enum_cb enum_cb, void *enum_ctx); + +/** + * Begin iterating over enumerated PCI device by calling this function to get + * the first PCI device. If there no PCI devices enumerated, return NULL + * + * \return a pointer to a PCI device on success, NULL otherwise. + */ +struct spdk_pci_device *spdk_pci_get_first_device(void); + +/** + * Continue iterating over enumerated PCI devices. + * If no additional PCI devices, return NULL + * + * \param prev Previous PCI device returned from \ref spdk_pci_get_first_device + * or \ref spdk_pci_get_next_device + * + * \return a pointer to the next PCI device on success, NULL otherwise. + */ +struct spdk_pci_device *spdk_pci_get_next_device(struct spdk_pci_device *prev); + +/** + * Map a PCI BAR in the current process. + * + * \param dev PCI device. + * \param bar BAR number. + * \param mapped_addr A variable to store the virtual address of the mapping. + * \param phys_addr A variable to store the physical address of the mapping. + * \param size A variable to store the size of the bar (in bytes). + * + * \return 0 on success. + */ +int spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar, + void **mapped_addr, uint64_t *phys_addr, uint64_t *size); + +/** + * Unmap a PCI BAR from the current process. This happens automatically when + * the PCI device is detached. + * + * \param dev PCI device. + * \param bar BAR number. + * \param mapped_addr Virtual address of the bar. + * + * \return 0 on success. + */ +int spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, + void *mapped_addr); + +/** + * Get the domain of a PCI device. + * + * \param dev PCI device. + * + * \return PCI device domain. + */ +uint32_t spdk_pci_device_get_domain(struct spdk_pci_device *dev); + +/** + * Get the bus number of a PCI device. + * + * \param dev PCI device. + * + * \return PCI bus number. + */ +uint8_t spdk_pci_device_get_bus(struct spdk_pci_device *dev); + +/** + * Get the device number within the PCI bus the device is on. + * + * \param dev PCI device. + * + * \return PCI device number. + */ +uint8_t spdk_pci_device_get_dev(struct spdk_pci_device *dev); + +/** + * Get the particular function number represented by struct spdk_pci_device. + * + * \param dev PCI device. + * + * \return PCI function number. + */ +uint8_t spdk_pci_device_get_func(struct spdk_pci_device *dev); + +/** + * Get the full DomainBDF address of a PCI device. + * + * \param dev PCI device. + * + * \return PCI address. + */ +struct spdk_pci_addr spdk_pci_device_get_addr(struct spdk_pci_device *dev); + +/** + * Get the vendor ID of a PCI device. + * + * \param dev PCI device. + * + * \return vendor ID. + */ +uint16_t spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev); + +/** + * Get the device ID of a PCI device. + * + * \param dev PCI device. + * + * \return device ID. + */ +uint16_t spdk_pci_device_get_device_id(struct spdk_pci_device *dev); + +/** + * Get the subvendor ID of a PCI device. + * + * \param dev PCI device. + * + * \return subvendor ID. + */ +uint16_t spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev); + +/** + * Get the subdevice ID of a PCI device. + * + * \param dev PCI device. + * + * \return subdevice ID. + */ +uint16_t spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev); + +/** + * Get the PCI ID of a PCI device. + * + * \param dev PCI device. + * + * \return PCI ID. + */ +struct spdk_pci_id spdk_pci_device_get_id(struct spdk_pci_device *dev); + +/** + * Get the NUMA node the PCI device is on. + * + * \param dev PCI device. + * + * \return NUMA node index (>= 0). + */ +int spdk_pci_device_get_socket_id(struct spdk_pci_device *dev); + +/** + * Serialize the PCIe Device Serial Number into the provided buffer. + * The buffer will contain a 16-character-long serial number followed by + * a NULL terminator. + * + * \param dev PCI device. + * \param sn Buffer to store the serial number in. + * \param len Length of buffer. Must be at least 17. + * + * \return 0 on success, -1 on failure. + */ +int spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len); + +/** + * Claim a PCI device for exclusive SPDK userspace access. + * + * Uses F_SETLK on a shared memory file with the PCI address embedded in its name. + * As long as this file remains open with the lock acquired, other processes will + * not be able to successfully call this function on the same PCI device. + * + * The device can be un-claimed by the owning process with spdk_pci_device_unclaim(). + * It will be also unclaimed automatically when detached. + * + * \param dev PCI device to claim. + * + * \return -EACCES if the device has already been claimed, + * negative errno on unexpected errors, + * 0 on success. + */ +int spdk_pci_device_claim(struct spdk_pci_device *dev); + +/** + * Undo spdk_pci_device_claim(). + * + * \param dev PCI device to unclaim. + */ +void spdk_pci_device_unclaim(struct spdk_pci_device *dev); + +/** + * Release all resources associated with the given device and detach it. As long + * as the PCI device is physically available, it will attachable again. + * + * \param device PCI device. + */ +void spdk_pci_device_detach(struct spdk_pci_device *device); + +/** + * Attach a PCI device. This will bypass all blacklist rules and explicitly + * attach a device at the provided address. The return code of the provided + * callback will decide whether that device is attached or not. Attached + * devices have to be manually detached with spdk_pci_device_detach() to be + * attach-able again. + * + * \param driver Driver for a specific device type. The device will only be + * attached if it's supported by this driver. + * \param enum_cb Callback to be called for the PCI device once it's found. + * The return code can be as follows: + * -1, 1 - an error occurred, fail the attach request entirely + * 0 - device attached successfully + * \param enum_ctx Additional context passed to the callback function. + * \param pci_address Address of the device to attach. + * + * \return -1 if a device at the provided PCI address couldn't be found, + * -1 if an internal error happened or the provided callback returned non-zero, + * 0 otherwise + */ +int spdk_pci_device_attach(struct spdk_pci_driver *driver, spdk_pci_enum_cb enum_cb, + void *enum_ctx, struct spdk_pci_addr *pci_address); + +/** + * Read \c len bytes from the PCI configuration space. + * + * \param dev PCI device. + * \param buf A buffer to copy the data into. + * \param len Number of bytes to read. + * \param offset Offset (in bytes) in the PCI config space to start reading from. + * + * \return 0 on success, -1 on failure. + */ +int spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *buf, uint32_t len, + uint32_t offset); + +/** + * Write \c len bytes into the PCI configuration space. + * + * \param dev PCI device. + * \param buf A buffer to copy the data from. + * \param len Number of bytes to write. + * \param offset Offset (in bytes) in the PCI config space to start writing to. + * + * \return 0 on success, -1 on failure. + */ +int spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *buf, uint32_t len, + uint32_t offset); + +/** + * Read 1 byte from the PCI configuration space. + * + * \param dev PCI device. + * \param value A buffer to copy the data into. + * \param offset Offset (in bytes) in the PCI config space to start reading from. + * + * \return 0 on success, -1 on failure. + */ +int spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset); + +/** + * Write 1 byte into the PCI configuration space. + * + * \param dev PCI device. + * \param value A value to write. + * \param offset Offset (in bytes) in the PCI config space to start writing to. + * + * \return 0 on success, -1 on failure. + */ +int spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset); + +/** + * Read 2 bytes from the PCI configuration space. + * + * \param dev PCI device. + * \param value A buffer to copy the data into. + * \param offset Offset (in bytes) in the PCI config space to start reading from. + * + * \return 0 on success, -1 on failure. + */ +int spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset); + +/** + * Write 2 bytes into the PCI configuration space. + * + * \param dev PCI device. + * \param value A value to write. + * \param offset Offset (in bytes) in the PCI config space to start writing to. + * + * \return 0 on success, -1 on failure. + */ +int spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset); + +/** + * Read 4 bytes from the PCI configuration space. + * + * \param dev PCI device. + * \param value A buffer to copy the data into. + * \param offset Offset (in bytes) in the PCI config space to start reading from. + * + * \return 0 on success, -1 on failure. + */ +int spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset); + +/** + * Write 4 bytes into the PCI configuration space. + * + * \param dev PCI device. + * \param value A value to write. + * \param offset Offset (in bytes) in the PCI config space to start writing to. + * + * \return 0 on success, -1 on failure. + */ +int spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset); + +/** + * Check if device was requested to be removed from the process. This can be + * caused either by physical device hotremoval or OS-triggered removal. In the + * latter case, the device may continue to function properly even if this + * function returns \c true . The upper-layer driver may check this function + * periodically and eventually detach the device. + * + * \param dev PCI device. + * + * \return if device was requested to be removed + */ +bool spdk_pci_device_is_removed(struct spdk_pci_device *dev); + +/** + * Compare two PCI addresses. + * + * \param a1 PCI address 1. + * \param a2 PCI address 2. + * + * \return 0 if a1 == a2, less than 0 if a1 < a2, greater than 0 if a1 > a2 + */ +int spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2); + +/** + * Convert a string representation of a PCI address into a struct spdk_pci_addr. + * + * \param addr PCI adddress output on success. + * \param bdf PCI address in domain:bus:device.function format or + * domain.bus.device.function format. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf); + +/** + * Convert a struct spdk_pci_addr to a string. + * + * \param bdf String into which a string will be output in the format + * domain:bus:device.function. The string must be at least 14 characters in size. + * \param sz Size of bdf in bytes. Must be at least 14. + * \param addr PCI address. + * + * \return 0 on success, or a negated errno on failure. + */ +int spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr); + +/** + * Hook a custom PCI device into the PCI layer. The device will be attachable, + * enumerable, and will call provided callbacks on each PCI resource access + * request. + * + * \param drv driver that will be able to attach the device + * \param dev fully initialized PCI device struct + */ +void spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev); + +/** + * Un-hook a custom PCI device from the PCI layer. The device must not be attached. + * + * \param dev fully initialized PCI device struct + */ +void spdk_pci_unhook_device(struct spdk_pci_device *dev); + +/** + * Return the type of the PCI device. + * + * \param dev PCI device + * + * \return string representing the type of the device + */ +const char *spdk_pci_device_get_type(const struct spdk_pci_device *dev); + +/** + * Remove any CPU affinity from the current thread. + */ +void spdk_unaffinitize_thread(void); + +/** + * Call a function with CPU affinity unset. + * + * This can be used to run a function that creates other threads without inheriting the calling + * thread's CPU affinity. + * + * \param cb Function to call + * \param arg Parameter to the function cb(). + * + * \return the return value of cb(). + */ +void *spdk_call_unaffinitized(void *cb(void *arg), void *arg); + +/** + * Page-granularity memory address translation table. + */ +struct spdk_mem_map; + +enum spdk_mem_map_notify_action { + SPDK_MEM_MAP_NOTIFY_REGISTER, + SPDK_MEM_MAP_NOTIFY_UNREGISTER, +}; + +typedef int (*spdk_mem_map_notify_cb)(void *cb_ctx, struct spdk_mem_map *map, + enum spdk_mem_map_notify_action action, + void *vaddr, size_t size); + +typedef int (*spdk_mem_map_contiguous_translations)(uint64_t addr_1, uint64_t addr_2); + +/** + * A function table to be implemented by each memory map. + */ +struct spdk_mem_map_ops { + spdk_mem_map_notify_cb notify_cb; + spdk_mem_map_contiguous_translations are_contiguous; +}; + +/** + * Allocate a virtual memory address translation map. + * + * \param default_translation Default translation for the map. + * \param ops Table of callback functions for map operations. + * \param cb_ctx Argument passed to the callback function. + * + * \return a pointer to the allocated virtual memory address translation map. + */ +struct spdk_mem_map *spdk_mem_map_alloc(uint64_t default_translation, + const struct spdk_mem_map_ops *ops, void *cb_ctx); + +/** + * Free a memory map previously allocated by spdk_mem_map_alloc(). + * + * \param pmap Memory map to free. + */ +void spdk_mem_map_free(struct spdk_mem_map **pmap); + +/** + * Register an address translation for a range of virtual memory. + * + * \param map Memory map. + * \param vaddr Virtual address of the region to register - must be 2 MB aligned. + * \param size Size of the region in bytes - must be multiple of 2 MB in the + * current implementation. + * \param translation Translation to store in the map for this address range. + * + * \sa spdk_mem_map_clear_translation(). + * + * \return 0 on success, negative errno on failure. + */ +int spdk_mem_map_set_translation(struct spdk_mem_map *map, uint64_t vaddr, uint64_t size, + uint64_t translation); + +/** + * Unregister an address translation. + * + * \param map Memory map. + * \param vaddr Virtual address of the region to unregister - must be 2 MB aligned. + * \param size Size of the region in bytes - must be multiple of 2 MB in the + * current implementation. + * + * \sa spdk_mem_map_set_translation(). + * + * \return 0 on success, negative errno on failure. + */ +int spdk_mem_map_clear_translation(struct spdk_mem_map *map, uint64_t vaddr, uint64_t size); + +/** + * Look up the translation of a virtual address in a memory map. + * + * \param map Memory map. + * \param vaddr Virtual address. + * \param size Contains the size of the memory region pointed to by vaddr. + * If vaddr is successfully translated, then this is updated with the size of + * the memory region for which the translation is valid. + * + * \return the translation of vaddr stored in the map, or default_translation + * as specified in spdk_mem_map_alloc() if vaddr is not present in the map. + */ +uint64_t spdk_mem_map_translate(const struct spdk_mem_map *map, uint64_t vaddr, uint64_t *size); + +/** + * Register the specified memory region for address translation. + * + * The memory region must map to pinned huge pages (2MB or greater). + * + * \param vaddr Virtual address to register. + * \param len Length in bytes of the vaddr. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_mem_register(void *vaddr, size_t len); + +/** + * Unregister the specified memory region from vtophys address translation. + * + * The caller must ensure all in-flight DMA operations to this memory region + * are completed or cancelled before calling this function. + * + * \param vaddr Virtual address to unregister. + * \param len Length in bytes of the vaddr. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_mem_unregister(void *vaddr, size_t len); + +/** + * Reserve the address space specified in all memory maps. + * + * This pre-allocates the necessary space in the memory maps such that + * future calls to spdk_mem_register() on that region require no + * internal memory allocations. + * + * \param vaddr Virtual address to reserve + * \param len Length in bytes of vaddr + * + * \return 0 on success, negated errno on failure. + */ +int spdk_mem_reserve(void *vaddr, size_t len); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/env_dpdk.h b/src/spdk/include/spdk/env_dpdk.h new file mode 100644 index 000000000..6716f323c --- /dev/null +++ b/src/spdk/include/spdk/env_dpdk.h @@ -0,0 +1,86 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Encapsulated DPDK specific dependencies + */ + +#include "spdk/stdinc.h" + +#ifndef SPDK_ENV_DPDK_H +#define SPDK_ENV_DPDK_H + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Initialize the environment library after DPDK env is already initialized. + * If DPDK's rte_eal_init is already called, this function must be called + * instead of spdk_env_init, prior to using any other functions in SPDK + * env library. + * + * \param legacy_mem Indicates whether DPDK was initialized with --legacy-mem + * eal parameter. + * \return 0 on success, or negative errno on failure. + */ +int spdk_env_dpdk_post_init(bool legacy_mem); + +/** + * Release any resources of the environment library that were alllocated with + * spdk_env_dpdk_post_init(). After this call, no DPDK function calls may + * be made. It is expected that common usage of this function is to call it + * just before terminating the process. + */ +void spdk_env_dpdk_post_fini(void); + +/** + * Check if DPDK was initialized external to the SPDK env_dpdk library. + * + * \return true if DPDK was initialized external to the SPDK env_dpdk library. + * \return false otherwise + */ +bool spdk_env_dpdk_external_init(void); + +/** + * Dump the env allocated memory to the given file. + * + * \param file The file object to write to. + */ +void spdk_env_dpdk_dump_mem_stats(FILE *file); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/event.h b/src/spdk/include/spdk/event.h new file mode 100644 index 000000000..ea870fe9f --- /dev/null +++ b/src/spdk/include/spdk/event.h @@ -0,0 +1,318 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * Event framework public API. + * + * See @ref event_components for an overview of the SPDK event framework API. + */ + +#ifndef SPDK_EVENT_H +#define SPDK_EVENT_H + +#include "spdk/stdinc.h" + +#include "spdk/cpuset.h" +#include "spdk/queue.h" +#include "spdk/log.h" +#include "spdk/thread.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Event handler function. + * + * \param arg1 Argument 1. + * \param arg2 Argument 2. + */ +typedef void (*spdk_event_fn)(void *arg1, void *arg2); + +/** + * \brief An event is a function that is passed to and called on an lcore. + */ +struct spdk_event; + +/** + * \brief A poller is a function that is repeatedly called on an lcore. + */ +struct spdk_poller; + +/** + * Callback function for customized shutdown handling of application. + */ +typedef void (*spdk_app_shutdown_cb)(void); + +/** + * Signal handler fucntion. + * + * \param signal Signal number. + */ +typedef void (*spdk_sighandler_t)(int signal); + +#define SPDK_DEFAULT_RPC_ADDR "/var/tmp/spdk.sock" + +/** + * \brief Event framework initialization options + */ +struct spdk_app_opts { + const char *name; + const char *config_file; + const char *json_config_file; + bool json_config_ignore_errors; + const char *rpc_addr; /* Can be UNIX domain socket path or IP address + TCP port */ + const char *reactor_mask; + const char *tpoint_group_mask; + + int shm_id; + + spdk_app_shutdown_cb shutdown_cb; + spdk_sighandler_t usr1_handler; + + bool enable_coredump; + int mem_channel; + int master_core; + int mem_size; + bool no_pci; + bool hugepage_single_segments; + bool unlink_hugepage; + const char *hugedir; + enum spdk_log_level print_level; + size_t num_pci_addr; + struct spdk_pci_addr *pci_blacklist; + struct spdk_pci_addr *pci_whitelist; + const char *iova_mode; + + /* DEPRECATED. No longer has any effect. + * + * The maximum latency allowed when passing an event + * from one core to another. A value of 0 + * means all cores continually poll. This is + * specified in microseconds. + */ + uint64_t max_delay_us; + + /* Wait for the associated RPC before initializing subsystems + * when this flag is enabled. + */ + bool delay_subsystem_init; + + /* Number of trace entries allocated for each core */ + uint64_t num_entries; + + /** Opaque context for use of the env implementation. */ + void *env_context; + + /** + * for passing user-provided log call + */ + logfunc *log; + + uint64_t base_virtaddr; +}; + +/** + * Initialize the default value of opts + * + * \param opts Data structure where SPDK will initialize the default options. + */ +void spdk_app_opts_init(struct spdk_app_opts *opts); + +/** + * Start the framework. + * + * Before calling this function, opts must be initialized by + * spdk_app_opts_init(). Once started, the framework will call start_fn on + * an spdk_thread running on the current system thread with the + * argument provided. + * + * If opts->delay_subsystem_init is set + * (e.g. through --wait-for-rpc flag in spdk_app_parse_args()) + * this function will only start a limited RPC server accepting + * only a few RPC commands - mostly related to pre-initialization. + * With this option, the framework won't be started and start_fn + * won't be called until the user sends an `rpc_framework_start_init` + * RPC command, which marks the pre-initialization complete and + * allows start_fn to be finally called. + * + * This call will block until spdk_app_stop() is called. If an error + * condition occurs during the intialization code within spdk_app_start(), + * this function will immediately return before invoking start_fn. + * + * \param opts Initialization options used for this application. + * \param start_fn Entry point that will execute on an internally created thread + * once the framework has been started. + * \param ctx Argument passed to function start_fn. + * + * \return 0 on success or non-zero on failure. + */ +int spdk_app_start(struct spdk_app_opts *opts, spdk_msg_fn start_fn, + void *ctx); + +/** + * Perform final shutdown operations on an application using the event framework. + */ +void spdk_app_fini(void); + +/** + * Start shutting down the framework. + * + * Typically this function is not called directly, and the shutdown process is + * started implicitly by a process signal. But in applications that are using + * SPDK for a subset of its process threads, this function can be called in lieu + * of a signal. + */ +void spdk_app_start_shutdown(void); + +/** + * Stop the framework. + * + * This does not wait for all threads to exit. Instead, it kicks off the shutdown + * process and returns. Once the shutdown process is complete, spdk_app_start() + * will return. + * + * \param rc The rc value specified here will be returned to caller of spdk_app_start(). + */ +void spdk_app_stop(int rc); + +/** + * Generate a configuration file that corresponds to the current running state. + * + * \param config_str Values obtained from the generated configuration file. + * \param name Prefix for name of temporary configuration file to save the current config. + * + * \return 0 on success, -1 on failure. + */ +int spdk_app_get_running_config(char **config_str, char *name); + +/** + * Return the shared memory id for this application. + * + * \return shared memory id. + */ +int spdk_app_get_shm_id(void); + +/** + * Convert a string containing a CPU core mask into a bitmask + * + * \param mask String containing a CPU core mask. + * \param cpumask Bitmask of CPU cores. + * + * \return 0 on success, -1 on failure. + */ +int spdk_app_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask); + +/** + * Get the mask of the CPU cores active for this application + * + * \return the bitmask of the active CPU cores. + */ +struct spdk_cpuset *spdk_app_get_core_mask(void); + +#define SPDK_APP_GETOPT_STRING "c:de:ghi:m:n:p:r:s:uvB:L:RW:" + +enum spdk_app_parse_args_rvals { + SPDK_APP_PARSE_ARGS_HELP = 0, + SPDK_APP_PARSE_ARGS_SUCCESS = 1, + SPDK_APP_PARSE_ARGS_FAIL = 2 +}; +typedef enum spdk_app_parse_args_rvals spdk_app_parse_args_rvals_t; + +/** + * Helper function for parsing arguments and printing usage messages. + * + * \param argc Count of arguments in argv parameter array. + * \param argv Array of command line arguments. + * \param opts Default options for the application. + * \param getopt_str String representing the app-specific command line parameters. + * Characters in this string must not conflict with characters in SPDK_APP_GETOPT_STRING. + * \param app_long_opts Array of full-name parameters. Can be NULL. + * \param parse Function pointer to call if an argument in getopt_str is found. + * \param usage Function pointer to print usage messages for app-specific command + * line parameters. + *\return SPDK_APP_PARSE_ARGS_FAIL on failure, SPDK_APP_PARSE_ARGS_SUCCESS on + * success, SPDK_APP_PARSE_ARGS_HELP if '-h' passed as an option. + */ +spdk_app_parse_args_rvals_t spdk_app_parse_args(int argc, char **argv, + struct spdk_app_opts *opts, const char *getopt_str, + struct option *app_long_opts, int (*parse)(int ch, char *arg), + void (*usage)(void)); + +/** + * Print usage strings for common SPDK command line options. + * + * May only be called after spdk_app_parse_args(). + */ +void spdk_app_usage(void); + +/** + * Allocate an event to be passed to spdk_event_call(). + * + * \param lcore Lcore to run this event. + * \param fn Function used to execute event. + * \param arg1 Argument passed to function fn. + * \param arg2 Argument passed to function fn. + * + * \return a pointer to the allocated event. + */ +struct spdk_event *spdk_event_allocate(uint32_t lcore, spdk_event_fn fn, + void *arg1, void *arg2); + +/** + * Pass the given event to the associated lcore and call the function. + * + * \param event Event to execute. + */ +void spdk_event_call(struct spdk_event *event); + +/** + * Enable or disable monitoring of context switches. + * + * \param enabled True to enable, false to disable. + */ +void spdk_framework_enable_context_switch_monitor(bool enabled); + +/** + * Return whether context switch monitoring is enabled. + * + * \return true if enabled or false otherwise. + */ +bool spdk_framework_context_switch_monitor_enabled(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/fd.h b/src/spdk/include/spdk/fd.h new file mode 100644 index 000000000..8da7f2cd7 --- /dev/null +++ b/src/spdk/include/spdk/fd.h @@ -0,0 +1,69 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * OS filesystem utility functions + */ + +#ifndef SPDK_FD_H +#define SPDK_FD_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Get the file size. + * + * \param fd File descriptor. + * + * \return File size. + */ +uint64_t spdk_fd_get_size(int fd); + +/** + * Get the block size of the file. + * + * \param fd File descriptor. + * + * \return Block size. + */ +uint32_t spdk_fd_get_blocklen(int fd); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/file.h b/src/spdk/include/spdk/file.h new file mode 100644 index 000000000..59cab4e27 --- /dev/null +++ b/src/spdk/include/spdk/file.h @@ -0,0 +1,61 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * file operation functions + */ + +#ifndef SPDK_FILE_H +#define SPDK_FILE_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Load the input file content into a data buffer. + * + * \param file File handle. + * \param size Size of bytes read from the file. + * + * \return data contains the content on success, NULL on failure. + */ +void *spdk_posix_file_load(FILE *file, size_t *size); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/ftl.h b/src/spdk/include/spdk/ftl.h new file mode 100644 index 000000000..6f85ab371 --- /dev/null +++ b/src/spdk/include/spdk/ftl.h @@ -0,0 +1,251 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_FTL_H +#define SPDK_FTL_H + +#include "spdk/stdinc.h" +#include "spdk/uuid.h" +#include "spdk/thread.h" +#include "spdk/bdev.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct spdk_ftl_dev; + +/* Limit thresholds */ +enum { + SPDK_FTL_LIMIT_CRIT, + SPDK_FTL_LIMIT_HIGH, + SPDK_FTL_LIMIT_LOW, + SPDK_FTL_LIMIT_START, + SPDK_FTL_LIMIT_MAX +}; + +struct spdk_ftl_limit { + /* Threshold from which the limiting starts */ + size_t thld; + + /* Limit percentage */ + size_t limit; +}; + +struct spdk_ftl_conf { + /* Number of reserved addresses not exposed to the user */ + size_t lba_rsvd; + + /* Size of the per-io_channel write buffer */ + size_t write_buffer_size; + + /* Threshold for opening new band */ + size_t band_thld; + + /* Maximum IO depth per band relocate */ + size_t max_reloc_qdepth; + + /* Maximum active band relocates */ + size_t max_active_relocs; + + /* IO pool size per user thread */ + size_t user_io_pool_size; + + /* Lowest percentage of invalid blocks for a band to be defragged */ + size_t invalid_thld; + + /* User writes limits */ + struct spdk_ftl_limit limits[SPDK_FTL_LIMIT_MAX]; + + /* Allow for partial recovery from open bands instead of returning error */ + bool allow_open_bands; + + /* Use append instead of write */ + bool use_append; + + /* Maximum supported number of IO channels */ + uint32_t max_io_channels; + + struct { + /* Maximum number of concurrent requests */ + size_t max_request_cnt; + /* Maximum number of blocks per one request */ + size_t max_request_size; + } nv_cache; + + /* Create l2p table on l2p_path persistent memory file or device instead of in DRAM */ + const char *l2p_path; +}; + +enum spdk_ftl_mode { + /* Create new device */ + SPDK_FTL_MODE_CREATE = (1 << 0), +}; + +struct spdk_ftl_dev_init_opts { + /* Underlying device */ + const char *base_bdev; + /* Write buffer cache */ + const char *cache_bdev; + + /* Thread responsible for core tasks execution */ + struct spdk_thread *core_thread; + + /* Device's config */ + const struct spdk_ftl_conf *conf; + /* Device's name */ + const char *name; + /* Mode flags */ + unsigned int mode; + /* Device UUID (valid when restoring device from disk) */ + struct spdk_uuid uuid; +}; + +struct spdk_ftl_attrs { + /* Device's UUID */ + struct spdk_uuid uuid; + /* Number of logical blocks */ + uint64_t num_blocks; + /* Logical block size */ + size_t block_size; + /* Underlying device */ + const char *base_bdev; + /* Write buffer cache */ + const char *cache_bdev; + /* Number of zones per parallel unit in the underlying device (including any offline ones) */ + size_t num_zones; + /* Number of logical blocks per zone */ + size_t zone_size; + /* Device specific configuration */ + struct spdk_ftl_conf conf; +}; + +typedef void (*spdk_ftl_fn)(void *, int); +typedef void (*spdk_ftl_init_fn)(struct spdk_ftl_dev *, void *, int); + +/** + * Initialize the FTL on given NVMe device and parallel unit range. + * + * Covers the following: + * - retrieve zone device information, + * - allocate buffers and resources, + * - initialize internal structures, + * - initialize internal thread(s), + * - restore or create L2P table. + * + * \param opts configuration for new device + * \param cb callback function to call when the device is created + * \param cb_arg callback's argument + * + * \return 0 if initialization was started successfully, negative errno otherwise. + */ +int spdk_ftl_dev_init(const struct spdk_ftl_dev_init_opts *opts, spdk_ftl_init_fn cb, void *cb_arg); + +/** + * Deinitialize and free given device. + * + * \param dev device + * \param cb callback function to call when the device is freed + * \param cb_arg callback's argument + * + * \return 0 if successfully scheduled free, negative errno otherwise. + */ +int spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb, void *cb_arg); + +/** + * Initialize FTL configuration structure with default values. + * + * \param conf FTL configuration to initialize + */ +void spdk_ftl_conf_init_defaults(struct spdk_ftl_conf *conf); + +/** + * Retrieve device’s attributes. + * + * \param dev device + * \param attr Attribute structure to fill + */ +void spdk_ftl_dev_get_attrs(const struct spdk_ftl_dev *dev, struct spdk_ftl_attrs *attr); + +/** + * Submits a read to the specified device. + * + * \param dev Device + * \param ch I/O channel + * \param lba Starting LBA to read the data + * \param lba_cnt Number of sectors to read + * \param iov Single IO vector or pointer to IO vector table + * \param iov_cnt Number of IO vectors + * \param cb_fn Callback function to invoke when the I/O is completed + * \param cb_arg Argument to pass to the callback function + * + * \return 0 if successfully submitted, negative errno otherwise. + */ +int spdk_ftl_read(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba, + size_t lba_cnt, + struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg); + +/** + * Submits a write to the specified device. + * + * \param dev Device + * \param ch I/O channel + * \param lba Starting LBA to write the data + * \param lba_cnt Number of sectors to write + * \param iov Single IO vector or pointer to IO vector table + * \param iov_cnt Number of IO vectors + * \param cb_fn Callback function to invoke when the I/O is completed + * \param cb_arg Argument to pass to the callback function + * + * \return 0 if successfully submitted, negative errno otherwise. + */ +int spdk_ftl_write(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba, + size_t lba_cnt, + struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg); + +/** + * Submits a flush request to the specified device. + * + * \param dev device + * \param cb_fn Callback function to invoke when all prior IOs have been completed + * \param cb_arg Argument to pass to the callback function + * + * \return 0 if successfully submitted, negative errno otherwise. + */ +int spdk_ftl_flush(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg); + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_FTL_H */ diff --git a/src/spdk/include/spdk/gpt_spec.h b/src/spdk/include/spdk/gpt_spec.h new file mode 100644 index 000000000..c67eb572e --- /dev/null +++ b/src/spdk/include/spdk/gpt_spec.h @@ -0,0 +1,144 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * GUID Partition Table (GPT) specification definitions + */ + +#ifndef SPDK_GPT_SPEC_H +#define SPDK_GPT_SPEC_H + +#include "spdk/stdinc.h" + +#include "spdk/assert.h" + +#pragma pack(push, 1) + +#define SPDK_MBR_SIGNATURE 0xAA55 + +#define SPDK_MBR_OS_TYPE_GPT_PROTECTIVE 0xEE +#define SPDK_MBR_OS_TYPE_EFI_SYSTEM_PARTITION 0xEF + +struct spdk_mbr_chs { + uint8_t head; + uint16_t sector : 6; + uint16_t cylinder : 10; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_mbr_chs) == 3, "size incorrect"); + +struct spdk_mbr_partition_entry { + uint8_t reserved : 7; + uint8_t bootable : 1; + + struct spdk_mbr_chs start_chs; + + uint8_t os_type; + + struct spdk_mbr_chs end_chs; + + uint32_t start_lba; + uint32_t size_lba; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_mbr_partition_entry) == 16, "size incorrect"); + +struct spdk_mbr { + uint8_t boot_code[440]; + uint32_t disk_signature; + uint16_t reserved_444; + struct spdk_mbr_partition_entry partitions[4]; + uint16_t mbr_signature; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_mbr) == 512, "size incorrect"); + +#define SPDK_GPT_SIGNATURE "EFI PART" + +#define SPDK_GPT_REVISION_1_0 0x00010000u + +struct spdk_gpt_guid { + uint8_t raw[16]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_gpt_guid) == 16, "size incorrect"); + +#define SPDK_GPT_GUID(a, b, c, d, e) \ + (struct spdk_gpt_guid){{ \ + (uint8_t)(a), (uint8_t)(((uint32_t)a) >> 8), \ + (uint8_t)(((uint32_t)a) >> 16), (uint8_t)(((uint32_t)a >> 24)), \ + (uint8_t)(b), (uint8_t)(((uint16_t)b) >> 8), \ + (uint8_t)(c), (uint8_t)(((uint16_t)c) >> 8), \ + (uint8_t)(((uint16_t)d) >> 8), (uint8_t)(d), \ + (uint8_t)(((uint64_t)e) >> 40), (uint8_t)(((uint64_t)e) >> 32), (uint8_t)(((uint64_t)e) >> 24), \ + (uint8_t)(((uint64_t)e) >> 16), (uint8_t)(((uint64_t)e) >> 8), (uint8_t)(e) \ + }} + +#define SPDK_GPT_PART_TYPE_UNUSED SPDK_GPT_GUID(0x00000000, 0x0000, 0x0000, 0x0000, 0x000000000000) +#define SPDK_GPT_PART_TYPE_EFI_SYSTEM_PARTITION SPDK_GPT_GUID(0xC12A7328, 0xF81F, 0x11D2, 0xBA4B, 0x00A0C93EC93B) +#define SPDK_GPT_PART_TYPE_LEGACY_MBR SPDK_GPT_GUID(0x024DEE41, 0x33E7, 0x11D3, 0x9D69, 0x0008C781F39F) + +struct spdk_gpt_header { + char gpt_signature[8]; + uint32_t revision; + uint32_t header_size; + uint32_t header_crc32; + uint32_t reserved; + uint64_t my_lba; + uint64_t alternate_lba; + uint64_t first_usable_lba; + uint64_t last_usable_lba; + struct spdk_gpt_guid disk_guid; + uint64_t partition_entry_lba; + uint32_t num_partition_entries; + uint32_t size_of_partition_entry; + uint32_t partition_entry_array_crc32; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_gpt_header) == 92, "size incorrect"); + +struct spdk_gpt_partition_entry { + struct spdk_gpt_guid part_type_guid; + struct spdk_gpt_guid unique_partition_guid; + uint64_t starting_lba; + uint64_t ending_lba; + struct { + uint64_t required : 1; + uint64_t no_block_io_proto : 1; + uint64_t legacy_bios_bootable : 1; + uint64_t reserved_uefi : 45; + uint64_t guid_specific : 16; + } attr; + uint16_t partition_name[36]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_gpt_partition_entry) == 128, "size incorrect"); + +#pragma pack(pop) + +#endif diff --git a/src/spdk/include/spdk/histogram_data.h b/src/spdk/include/spdk/histogram_data.h new file mode 100644 index 000000000..5f114fe69 --- /dev/null +++ b/src/spdk/include/spdk/histogram_data.h @@ -0,0 +1,264 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * Generic histogram library + */ + +#ifndef _SPDK_HISTOGRAM_DATA_H_ +#define _SPDK_HISTOGRAM_DATA_H_ + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define SPDK_HISTOGRAM_BUCKET_SHIFT_DEFAULT 7 +#define SPDK_HISTOGRAM_BUCKET_SHIFT(h) h->bucket_shift +#define SPDK_HISTOGRAM_BUCKET_LSB(h) (64 - SPDK_HISTOGRAM_BUCKET_SHIFT(h)) +#define SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE(h) (1ULL << SPDK_HISTOGRAM_BUCKET_SHIFT(h)) +#define SPDK_HISTOGRAM_BUCKET_MASK(h) (SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE(h) - 1) +#define SPDK_HISTOGRAM_NUM_BUCKET_RANGES(h) (SPDK_HISTOGRAM_BUCKET_LSB(h) + 1) +#define SPDK_HISTOGRAM_NUM_BUCKETS(h) (SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE(h) * \ + SPDK_HISTOGRAM_NUM_BUCKET_RANGES(h)) + +/* + * SPDK histograms are implemented using ranges of bucket arrays. The most common usage + * model is using TSC datapoints to capture an I/O latency histogram. For this usage model, + * the histogram tracks only TSC deltas - any translation to microseconds is done by the + * histogram user calling spdk_histogram_data_iterate() to iterate over the buckets to perform + * the translations. + * + * Each range has a number of buckets determined by SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE + * which is 128. The buckets in ranges 0 and 1 each map to one specific datapoint value. + * The buckets in subsequent ranges each map to twice as many datapoint values as buckets + * in the range before it: + * + * Range 0: 1 value each - 128 buckets cover 0 to 127 (2^7-1) + * Range 1: 1 value each - 128 buckets cover 128 to 255 (2^8-1) + * Range 2: 2 values each - 128 buckets cover 256 to 511 (2^9-1) + * Range 3: 4 values each - 128 buckets cover 512 to 1023 (2^10-1) + * Range 4: 8 values each - 128 buckets cover 1024 to 2047 (2^11-1) + * Range 5: 16 values each - 128 buckets cover 2048 to 4095 (2^12-1) + * ... + * Range 55: 2^54 values each - 128 buckets cover 2^61 to 2^62-1 + * Range 56: 2^55 values each - 128 buckets cover 2^62 to 2^63-1 + * Range 57: 2^56 values each - 128 buckets cover 2^63 to 2^64-1 + * + * On a 2.3GHz processor, this strategy results in 50ns buckets in the 7-14us range (sweet + * spot for Intel Optane SSD latency testing). + * + * Buckets can be made more granular by increasing SPDK_HISTOGRAM_BUCKET_SHIFT. This + * comes at the cost of additional storage per namespace context to store the bucket data. + */ + +struct spdk_histogram_data { + + uint32_t bucket_shift; + uint64_t *bucket; + +}; + +static inline void +__spdk_histogram_increment(struct spdk_histogram_data *h, uint32_t range, uint32_t index) +{ + uint64_t *count; + + count = &h->bucket[(range << SPDK_HISTOGRAM_BUCKET_SHIFT(h)) + index]; + (*count)++; +} + +static inline uint64_t +__spdk_histogram_get_count(const struct spdk_histogram_data *h, uint32_t range, uint32_t index) +{ + return h->bucket[(range << SPDK_HISTOGRAM_BUCKET_SHIFT(h)) + index]; +} + +static inline uint64_t * +__spdk_histogram_get_bucket(const struct spdk_histogram_data *h, uint32_t range, uint32_t index) +{ + return &h->bucket[(range << SPDK_HISTOGRAM_BUCKET_SHIFT(h)) + index]; +} + +static inline void +spdk_histogram_data_reset(struct spdk_histogram_data *histogram) +{ + memset(histogram->bucket, 0, SPDK_HISTOGRAM_NUM_BUCKETS(histogram) * sizeof(uint64_t)); +} + +static inline uint32_t +__spdk_histogram_data_get_bucket_range(struct spdk_histogram_data *h, uint64_t datapoint) +{ + uint32_t clz, range; + + assert(datapoint != 0); + + clz = __builtin_clzll(datapoint); + + if (clz <= SPDK_HISTOGRAM_BUCKET_LSB(h)) { + range = SPDK_HISTOGRAM_BUCKET_LSB(h) - clz; + } else { + range = 0; + } + + return range; +} + +static inline uint32_t +__spdk_histogram_data_get_bucket_index(struct spdk_histogram_data *h, uint64_t datapoint, + uint32_t range) +{ + uint32_t shift; + + if (range == 0) { + shift = 0; + } else { + shift = range - 1; + } + + return (datapoint >> shift) & SPDK_HISTOGRAM_BUCKET_MASK(h); +} + +static inline void +spdk_histogram_data_tally(struct spdk_histogram_data *histogram, uint64_t datapoint) +{ + uint32_t range = __spdk_histogram_data_get_bucket_range(histogram, datapoint); + uint32_t index = __spdk_histogram_data_get_bucket_index(histogram, datapoint, range); + + __spdk_histogram_increment(histogram, range, index); +} + +static inline uint64_t +__spdk_histogram_data_get_bucket_start(const struct spdk_histogram_data *h, uint32_t range, + uint32_t index) +{ + uint64_t bucket; + + index += 1; + if (range > 0) { + bucket = 1ULL << (range + SPDK_HISTOGRAM_BUCKET_SHIFT(h) - 1); + bucket += (uint64_t)index << (range - 1); + } else { + bucket = index; + } + + return bucket; +} + +typedef void (*spdk_histogram_data_fn)(void *ctx, uint64_t start, uint64_t end, uint64_t count, + uint64_t total, uint64_t so_far); + +static inline void +spdk_histogram_data_iterate(const struct spdk_histogram_data *histogram, + spdk_histogram_data_fn fn, void *ctx) +{ + uint64_t i, j, count, so_far, total; + uint64_t bucket, last_bucket; + + total = 0; + + for (i = 0; i < SPDK_HISTOGRAM_NUM_BUCKET_RANGES(histogram); i++) { + for (j = 0; j < SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE(histogram); j++) { + total += __spdk_histogram_get_count(histogram, i, j); + } + } + + so_far = 0; + bucket = 0; + + for (i = 0; i < SPDK_HISTOGRAM_NUM_BUCKET_RANGES(histogram); i++) { + for (j = 0; j < SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE(histogram); j++) { + count = __spdk_histogram_get_count(histogram, i, j); + so_far += count; + last_bucket = bucket; + bucket = __spdk_histogram_data_get_bucket_start(histogram, i, j); + fn(ctx, last_bucket, bucket, count, total, so_far); + } + } +} + +static inline void +spdk_histogram_data_merge(const struct spdk_histogram_data *dst, + const struct spdk_histogram_data *src) +{ + uint64_t i; + + for (i = 0; i < SPDK_HISTOGRAM_NUM_BUCKETS(dst); i++) { + dst->bucket[i] += src->bucket[i]; + } +} + +static inline struct spdk_histogram_data * +spdk_histogram_data_alloc_sized(uint32_t bucket_shift) +{ + struct spdk_histogram_data *h; + + h = (struct spdk_histogram_data *)calloc(1, sizeof(*h)); + if (h == NULL) { + return NULL; + } + + h->bucket_shift = bucket_shift; + h->bucket = (uint64_t *)calloc(SPDK_HISTOGRAM_NUM_BUCKETS(h), sizeof(uint64_t)); + if (h->bucket == NULL) { + free(h); + return NULL; + } + + return h; +} + +static inline struct spdk_histogram_data * +spdk_histogram_data_alloc(void) +{ + return spdk_histogram_data_alloc_sized(SPDK_HISTOGRAM_BUCKET_SHIFT_DEFAULT); +} + +static inline void +spdk_histogram_data_free(struct spdk_histogram_data *h) +{ + if (h == NULL) { + return; + } + + free(h->bucket); + free(h); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/idxd.h b/src/spdk/include/spdk/idxd.h new file mode 100644 index 000000000..cb9ebe8b8 --- /dev/null +++ b/src/spdk/include/spdk/idxd.h @@ -0,0 +1,418 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * IDXD driver public interface + */ + +#ifndef SPDK_IDXD_H +#define SPDK_IDXD_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "spdk/env.h" + +/** + * Opaque handle for a single IDXD channel. + */ +struct spdk_idxd_io_channel; + +/** + * Opaque handle for a single IDXD device. + */ +struct spdk_idxd_device; + +/** + * Opaque handle for batching. + */ +struct idxd_batch; + +/** + * Signature for configuring a channel + * + * \param chan IDXD channel to be configured. + * \return 0 on success, negative errno on failure. + */ +int spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan); + +/** + * Reconfigures this channel based on how many current channels there are. + * + * \param chan IDXD channel to be set. + * \param num_channels total number of channels in use. + * \return 0 on success, negative errno on failure. + */ +int spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan, uint32_t num_channels); + +/** + * Signature for callback function invoked when a request is completed. + * + * \param arg User-specified opaque value corresponding to cb_arg from the + * request submission. + * \param status 0 on success, negative errno on failure. + */ +typedef void (*spdk_idxd_req_cb)(void *arg, int status); + +/** + * Callback for spdk_idxd_probe() enumeration. + * + * \param cb_ctx User-specified opaque value corresponding to cb_ctx from spdk_idxd_probe(). + * \param pci_dev PCI device that is being probed. + * + * \return true to attach to this device. + */ +typedef bool (*spdk_idxd_probe_cb)(void *cb_ctx, struct spdk_pci_device *pci_dev); + +/** + * Callback for spdk_idxd_probe() to report a device that has been attached to + * the userspace IDXD driver. + * + * \param cb_ctx User-specified opaque value corresponding to cb_ctx from spdk_idxd_probe(). + * \param pci_dev PCI device that was attached to the driver. + * \param idxd IDXD device that was attached to the driver. + */ +typedef void (*spdk_idxd_attach_cb)(void *cb_ctx, struct spdk_pci_device *pci_dev, + struct spdk_idxd_device *idxd); + +/** + * Enumerate the IDXD devices attached to the system and attach the userspace + * IDXD driver to them if desired. + * + * If called more than once, only devices that are not already attached to the + * SPDK IDXD driver will be reported. + * + * To stop using the controller and release its associated resources, call + * spdk_idxd_detach() with the idxd_channel instance returned by this function. + * + * \param cb_ctx Opaque value which will be passed back in cb_ctx parameter of + * the callbacks. + * \param probe_cb will be called once per IDXD device found in the system. + * \param attach_cb will be called for devices for which probe_cb returned true + * once the IDXD controller has been attached to the userspace driver. + * + * \return 0 on success, -1 on failure. + */ +int spdk_idxd_probe(void *cb_ctx, spdk_idxd_probe_cb probe_cb, spdk_idxd_attach_cb attach_cb); + +/** + * Detach specified device returned by spdk_idxd_probe() from the IDXD driver. + * + * \param idxd IDXD device to detach from the driver. + */ +void spdk_idxd_detach(struct spdk_idxd_device *idxd); + +/** + * Sets the IDXD configuration. + * + * \param config_number the configuration number for a valid IDXD config. + */ +void spdk_idxd_set_config(uint32_t config_number); + +/** + * Return the max number of descriptors per batch for IDXD. + * + * \return max number of desciptors per batch. + */ +uint32_t spdk_idxd_batch_get_max(void); + +/** + * Create a batch sequence. + * + * \param chan IDXD channel to submit request. + * + * \return handle to use for subsequent batch requests, NULL on failure. + */ +struct idxd_batch *spdk_idxd_batch_create(struct spdk_idxd_io_channel *chan); + +/** + * Submit a batch sequence. + * + * \param chan IDXD channel to submit request. + * \param batch Handle provided when the batch was started with spdk_idxd_batch_create(). + * \param cb_fn Callback function which will be called when the request is complete. + * \param cb_arg Opaque value which will be passed back as the arg parameter in + * the completion callback. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_idxd_batch_submit(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch, + spdk_idxd_req_cb cb_fn, void *cb_arg); + +/** + * Cancel a batch sequence. + * + * \param chan IDXD channel to submit request. + * \param batch Handle provided when the batch was started with spdk_idxd_batch_create(). + * + * \return 0 on success, negative errno on failure. + */ +int spdk_idxd_batch_cancel(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch); + +/** + * Synchronous call to prepare a copy request into a previously initialized batch + * created with spdk_idxd_batch_create(). The callback will be called when the copy + * completes after the batch has been submitted by an asynchronous call to + * spdk_idxd_batch_submit(). + * + * \param chan IDXD channel to submit request. + * \param batch Handle provided when the batch was started with spdk_idxd_batch_create(). + * \param dst Destination virtual address. + * \param src Source virtual address. + * \param nbytes Number of bytes to copy. + * \param cb_fn Callback function which will be called when the request is complete. + * \param cb_arg Opaque value which will be passed back as the arg parameter in + * the completion callback. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_idxd_batch_prep_copy(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch, + void *dst, const void *src, uint64_t nbytes, spdk_idxd_req_cb cb_fn, void *cb_arg); + +/** + * Synchronous call to prepare a dualcast request into a previously initialized batch + * created with spdk_idxd_batch_create(). The callback will be called when the dualcast + * completes after the batch has been submitted by an asynchronous call to + * spdk_idxd_batch_submit(). + * + * \param chan IDXD channel to submit request. + * \param batch Handle provided when the batch was started with spdk_idxd_batch_create(). + * \param dst1 First destination virtual address (must be 4K aligned). + * \param dst2 Second destination virtual address (must be 4K aligned). + * \param src Source virtual address. + * \param nbytes Number of bytes to copy. + * \param cb_fn Callback function which will be called when the request is complete. + * \param cb_arg Opaque value which will be passed back as the arg parameter in + * the completion callback. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_idxd_batch_prep_dualcast(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch, + void *dst1, void *dst2, const void *src, uint64_t nbytes, spdk_idxd_req_cb cb_fn, void *cb_arg); + +/** + * Build and submit an idxd memory copy request. + * + * This function will build the copy descriptor and then immediately submit + * by writing to the proper device portal. + * + * \param chan IDXD channel to submit request. + * \param dst Destination virtual address. + * \param src Source virtual address. + * \param nbytes Number of bytes to copy. + * \param cb_fn Callback function which will be called when the request is complete. + * \param cb_arg Opaque value which will be passed back as the arg parameter in + * the completion callback. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_idxd_submit_copy(struct spdk_idxd_io_channel *chan, + void *dst, const void *src, uint64_t nbytes, + spdk_idxd_req_cb cb_fn, void *cb_arg); + +/** + * Build and submit an idxd dualcast request. + * + * This function will build the dual cast descriptor and then immediately submit + * by writing to the proper device portal. + * + * \param chan IDXD channel to submit request. + * \param dst1 First destination virtual address (must be 4K aligned). + * \param dst2 Second destination virtual address (must be 4K aligned). + * \param src Source virtual address. + * \param nbytes Number of bytes to copy. + * \param cb_fn Callback function which will be called when the request is complete. + * \param cb_arg Opaque value which will be passed back as the arg parameter in + * the completion callback. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_idxd_submit_dualcast(struct spdk_idxd_io_channel *chan, + void *dst1, void *dst2, const void *src, uint64_t nbytes, + spdk_idxd_req_cb cb_fn, void *cb_arg); + +/** + * Synchronous call to prepare a compare request into a previously initialized batch + * created with spdk_idxd_batch_create(). The callback will be called when the compare + * completes after the batch has been submitted by an asynchronous call to + * spdk_idxd_batch_submit(). + * + * \param chan IDXD channel to submit request. + * \param batch Handle provided when the batch was started with spdk_idxd_batch_create(). + * \param src1 First source to compare. + * \param src2 Second source to compare. + * \param nbytes Number of bytes to compare. + * \param cb_fn Callback function which will be called when the request is complete. + * \param cb_arg Opaque value which will be passed back as the arg parameter in + * the completion callback. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_idxd_batch_prep_compare(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch, + void *src1, void *src2, uint64_t nbytes, spdk_idxd_req_cb cb_fn, + void *cb_arg); + +/** + * Build and submit a memory compare request. + * + * This function will build the compare descriptor and then immediately submit + * by writing to the proper device portal. + * + * \param chan IDXD channel to submit request. + * \param src1 First source to compare. + * \param src2 Second source to compare. + * \param nbytes Number of bytes to compare. + * \param cb_fn Callback function which will be called when the request is complete. + * \param cb_arg Opaque value which will be passed back as the arg parameter in + * the completion callback. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_idxd_submit_compare(struct spdk_idxd_io_channel *chan, + void *src1, const void *src2, uint64_t nbytes, + spdk_idxd_req_cb cb_fn, void *cb_arg); + +/** + * Synchronous call to prepare a fill request into a previously initialized batch + * created with spdk_idxd_batch_create(). The callback will be called when the fill + * completes after the batch has been submitted by an asynchronous call to + * spdk_idxd_batch_submit(). + * + * \param chan IDXD channel to submit request. + * \param batch Handle provided when the batch was started with spdk_idxd_batch_create(). + * \param dst Destination virtual address. + * \param fill_pattern Repeating eight-byte pattern to use for memory fill. + * \param nbytes Number of bytes to fill. + * \param cb_fn Callback function which will be called when the request is complete. + * \param cb_arg Opaque value which will be passed back as the arg parameter in + * the completion callback. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_idxd_batch_prep_fill(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch, + void *dst, uint64_t fill_pattern, uint64_t nbytes, spdk_idxd_req_cb cb_fn, void *cb_arg); + +/** + * Build and submit a idxd memory fill request. + * + * This function will build the fill descriptor and then immediately submit + * by writing to the proper device portal. + * + * \param chan IDXD channel to submit request. + * \param dst Destination virtual address. + * \param fill_pattern Repeating eight-byte pattern to use for memory fill. + * \param nbytes Number of bytes to fill. + * \param cb_fn Callback function which will be called when the request is complete. + * \param cb_arg Opaque value which will be passed back as the cb_arg parameter + * in the completion callback. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_idxd_submit_fill(struct spdk_idxd_io_channel *chan, + void *dst, uint64_t fill_pattern, uint64_t nbytes, + spdk_idxd_req_cb cb_fn, void *cb_arg); + +/** + * Synchronous call to prepare a crc32c request into a previously initialized batch + * created with spdk_idxd_batch_create(). The callback will be called when the crc32c + * completes after the batch has been submitted by an asynchronous call to + * spdk_idxd_batch_submit(). + * + * \param chan IDXD channel to submit request. + * \param batch Handle provided when the batch was started with spdk_idxd_batch_create(). + * \param dst Resulting calculation. + * \param src Source virtual address. + * \param seed Four byte CRC-32C seed value. + * \param nbytes Number of bytes to calculate on. + * \param cb_fn Callback function which will be called when the request is complete. + * \param cb_arg Opaque value which will be passed back as the arg parameter in + * the completion callback. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_idxd_batch_prep_crc32c(struct spdk_idxd_io_channel *chan, struct idxd_batch *batch, + uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes, + spdk_idxd_req_cb cb_fn, void *cb_arg); + +/** + * Build and submit a memory CRC32-C request. + * + * This function will build the CRC-32C descriptor and then immediately submit + * by writing to the proper device portal. + * + * \param chan IDXD channel to submit request. + * \param dst Resulting calculation. + * \param src Source virtual address. + * \param seed Four byte CRC-32C seed value. + * \param nbytes Number of bytes to calculate on. + * \param cb_fn Callback function which will be called when the request is complete. + * \param cb_arg Opaque value which will be passed back as the cb_arg parameter + * in the completion callback. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_idxd_submit_crc32c(struct spdk_idxd_io_channel *chan, uint32_t *dst, void *src, + uint32_t seed, uint64_t nbytes, + spdk_idxd_req_cb cb_fn, void *cb_arg); + +/** + * Check for completed requests on an IDXD channel. + * + * \param chan IDXD channel to check for completions. + */ +void spdk_idxd_process_events(struct spdk_idxd_io_channel *chan); + +/** + * Returns an IDXD channel for a given IDXD device. + * + * \param idxd IDXD device to get a channel for. + * + * \return pointer to an IDXD channel. + */ +struct spdk_idxd_io_channel *spdk_idxd_get_channel(struct spdk_idxd_device *idxd); + +/** + * Free an IDXD channel. + * + * \param chan IDXD channel to free. + */ +void spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/ioat.h b/src/spdk/include/spdk/ioat.h new file mode 100644 index 000000000..c4e66be3b --- /dev/null +++ b/src/spdk/include/spdk/ioat.h @@ -0,0 +1,244 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * I/OAT DMA engine driver public interface + */ + +#ifndef SPDK_IOAT_H +#define SPDK_IOAT_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "spdk/env.h" + +/** + * Opaque handle for a single I/OAT channel returned by \ref spdk_ioat_probe(). + */ +struct spdk_ioat_chan; + +/** + * Signature for callback function invoked when a request is completed. + * + * \param arg User-specified opaque value corresponding to cb_arg from the + * request submission. + */ +typedef void (*spdk_ioat_req_cb)(void *arg); + +/** + * Callback for spdk_ioat_probe() enumeration. + * + * \param cb_ctx User-specified opaque value corresponding to cb_ctx from spdk_ioat_probe(). + * \param pci_dev PCI device that is being probed. + * + * \return true to attach to this device. + */ +typedef bool (*spdk_ioat_probe_cb)(void *cb_ctx, struct spdk_pci_device *pci_dev); + +/** + * Callback for spdk_ioat_probe() to report a device that has been attached to + * the userspace I/OAT driver. + * + * \param cb_ctx User-specified opaque value corresponding to cb_ctx from spdk_ioat_probe(). + * \param pci_dev PCI device that was attached to the driver. + * \param ioat I/OAT channel that was attached to the driver. + */ +typedef void (*spdk_ioat_attach_cb)(void *cb_ctx, struct spdk_pci_device *pci_dev, + struct spdk_ioat_chan *ioat); + +/** + * Enumerate the I/OAT devices attached to the system and attach the userspace + * I/OAT driver to them if desired. + * + * If called more than once, only devices that are not already attached to the + * SPDK I/OAT driver will be reported. + * + * To stop using the controller and release its associated resources, call + * spdk_ioat_detach() with the ioat_channel instance returned by this function. + * + * \param cb_ctx Opaque value which will be passed back in cb_ctx parameter of + * the callbacks. + * \param probe_cb will be called once per I/OAT device found in the system. + * \param attach_cb will be called for devices for which probe_cb returned true + * once the I/OAT controller has been attached to the userspace driver. + * + * \return 0 on success, -1 on failure. + */ +int spdk_ioat_probe(void *cb_ctx, spdk_ioat_probe_cb probe_cb, spdk_ioat_attach_cb attach_cb); + +/** + * Detach specified device returned by spdk_ioat_probe() from the I/OAT driver. + * + * \param ioat I/OAT channel to detach from the driver. + */ +void spdk_ioat_detach(struct spdk_ioat_chan *ioat); + +/** + * Get the maximum number of descriptors supported by the library. + * + * \param chan I/OAT channel + * + * \return maximum number of descriptors. + */ +uint32_t spdk_ioat_get_max_descriptors(struct spdk_ioat_chan *chan); + +/** + * Build a DMA engine memory copy request. + * + * This function will build the descriptor in the channel's ring. The + * caller must also explicitly call spdk_ioat_flush to submit the + * descriptor, possibly after building additional descriptors. + * + * \param chan I/OAT channel to build request. + * \param cb_arg Opaque value which will be passed back as the arg parameter in + * the completion callback. + * \param cb_fn Callback function which will be called when the request is complete. + * \param dst Destination virtual address. + * \param src Source virtual address. + * \param nbytes Number of bytes to copy. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_ioat_build_copy(struct spdk_ioat_chan *chan, + void *cb_arg, spdk_ioat_req_cb cb_fn, + void *dst, const void *src, uint64_t nbytes); + +/** + * Build and submit a DMA engine memory copy request. + * + * This function will build the descriptor in the channel's ring and then + * immediately submit it by writing the channel's doorbell. Calling this + * function does not require a subsequent call to spdk_ioat_flush. + * + * \param chan I/OAT channel to submit request. + * \param cb_arg Opaque value which will be passed back as the arg parameter in + * the completion callback. + * \param cb_fn Callback function which will be called when the request is complete. + * \param dst Destination virtual address. + * \param src Source virtual address. + * \param nbytes Number of bytes to copy. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_ioat_submit_copy(struct spdk_ioat_chan *chan, + void *cb_arg, spdk_ioat_req_cb cb_fn, + void *dst, const void *src, uint64_t nbytes); + +/** + * Build a DMA engine memory fill request. + * + * This function will build the descriptor in the channel's ring. The + * caller must also explicitly call spdk_ioat_flush to submit the + * descriptor, possibly after building additional descriptors. + * + * \param chan I/OAT channel to build request. + * \param cb_arg Opaque value which will be passed back as the cb_arg parameter + * in the completion callback. + * \param cb_fn Callback function which will be called when the request is complete. + * \param dst Destination virtual address. + * \param fill_pattern Repeating eight-byte pattern to use for memory fill. + * \param nbytes Number of bytes to fill. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_ioat_build_fill(struct spdk_ioat_chan *chan, + void *cb_arg, spdk_ioat_req_cb cb_fn, + void *dst, uint64_t fill_pattern, uint64_t nbytes); + +/** + * Build and submit a DMA engine memory fill request. + * + * This function will build the descriptor in the channel's ring and then + * immediately submit it by writing the channel's doorbell. Calling this + * function does not require a subsequent call to spdk_ioat_flush. + * + * \param chan I/OAT channel to submit request. + * \param cb_arg Opaque value which will be passed back as the cb_arg parameter + * in the completion callback. + * \param cb_fn Callback function which will be called when the request is complete. + * \param dst Destination virtual address. + * \param fill_pattern Repeating eight-byte pattern to use for memory fill. + * \param nbytes Number of bytes to fill. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_ioat_submit_fill(struct spdk_ioat_chan *chan, + void *cb_arg, spdk_ioat_req_cb cb_fn, + void *dst, uint64_t fill_pattern, uint64_t nbytes); + +/** + * Flush previously built descriptors. + * + * Descriptors are flushed by writing the channel's dmacount doorbell + * register. This function enables batching multiple descriptors followed by + * a single doorbell write. + * + * \param chan I/OAT channel to flush. + */ +void spdk_ioat_flush(struct spdk_ioat_chan *chan); + +/** + * Check for completed requests on an I/OAT channel. + * + * \param chan I/OAT channel to check for completions. + * + * \return number of events handled on success, negative errno on failure. + */ +int spdk_ioat_process_events(struct spdk_ioat_chan *chan); + +/** + * DMA engine capability flags + */ +enum spdk_ioat_dma_capability_flags { + SPDK_IOAT_ENGINE_COPY_SUPPORTED = 0x1, /**< The memory copy is supported */ + SPDK_IOAT_ENGINE_FILL_SUPPORTED = 0x2, /**< The memory fill is supported */ +}; + +/** + * Get the DMA engine capabilities. + * + * \param chan I/OAT channel to query. + * + * \return a combination of flags from spdk_ioat_dma_capability_flags(). + */ +uint32_t spdk_ioat_get_dma_capabilities(struct spdk_ioat_chan *chan); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/ioat_spec.h b/src/spdk/include/spdk/ioat_spec.h new file mode 100644 index 000000000..12202701a --- /dev/null +++ b/src/spdk/include/spdk/ioat_spec.h @@ -0,0 +1,330 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * I/OAT specification definitions + */ + +#ifndef SPDK_IOAT_SPEC_H +#define SPDK_IOAT_SPEC_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "spdk/assert.h" + +#define SPDK_IOAT_PCI_CHANERR_INT_OFFSET 0x180 + +#define SPDK_IOAT_INTRCTRL_MASTER_INT_EN 0x01 + +#define SPDK_IOAT_VER_3_0 0x30 +#define SPDK_IOAT_VER_3_3 0x33 + +/* DMA Channel Registers */ +#define SPDK_IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000 +#define SPDK_IOAT_CHANCTRL_COMPL_DCA_EN 0x0200 +#define SPDK_IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100 +#define SPDK_IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020 +#define SPDK_IOAT_CHANCTRL_ERR_INT_EN 0x0010 +#define SPDK_IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008 +#define SPDK_IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 +#define SPDK_IOAT_CHANCTRL_INT_REARM 0x0001 + +/* DMA Channel Capabilities */ +#define SPDK_IOAT_DMACAP_PB (1 << 0) +#define SPDK_IOAT_DMACAP_DCA (1 << 4) +#define SPDK_IOAT_DMACAP_BFILL (1 << 6) +#define SPDK_IOAT_DMACAP_XOR (1 << 8) +#define SPDK_IOAT_DMACAP_PQ (1 << 9) +#define SPDK_IOAT_DMACAP_DMA_DIF (1 << 10) + +struct spdk_ioat_registers { + uint8_t chancnt; + uint8_t xfercap; + uint8_t genctrl; + uint8_t intrctrl; + uint32_t attnstatus; + uint8_t cbver; /* 0x08 */ + uint8_t reserved4[0x3]; /* 0x09 */ + uint16_t intrdelay; /* 0x0C */ + uint16_t cs_status; /* 0x0E */ + uint32_t dmacapability; /* 0x10 */ + uint8_t reserved5[0x6C]; /* 0x14 */ + uint16_t chanctrl; /* 0x80 */ + uint8_t reserved6[0x2]; /* 0x82 */ + uint8_t chancmd; /* 0x84 */ + uint8_t reserved3[1]; /* 0x85 */ + uint16_t dmacount; /* 0x86 */ + uint64_t chansts; /* 0x88 */ + uint64_t chainaddr; /* 0x90 */ + uint64_t chancmp; /* 0x98 */ + uint8_t reserved2[0x8]; /* 0xA0 */ + uint32_t chanerr; /* 0xA8 */ + uint32_t chanerrmask; /* 0xAC */ +} __attribute__((packed)) __attribute__((aligned)); + +#define SPDK_IOAT_CHANCMD_RESET 0x20 +#define SPDK_IOAT_CHANCMD_SUSPEND 0x04 + +#define SPDK_IOAT_CHANSTS_STATUS 0x7ULL +#define SPDK_IOAT_CHANSTS_ACTIVE 0x0 +#define SPDK_IOAT_CHANSTS_IDLE 0x1 +#define SPDK_IOAT_CHANSTS_SUSPENDED 0x2 +#define SPDK_IOAT_CHANSTS_HALTED 0x3 +#define SPDK_IOAT_CHANSTS_ARMED 0x4 + +#define SPDK_IOAT_CHANSTS_UNAFFILIATED_ERROR 0x8ULL +#define SPDK_IOAT_CHANSTS_SOFT_ERROR 0x10ULL + +#define SPDK_IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK (~0x3FULL) + +#define SPDK_IOAT_CHANCMP_ALIGN 8 /* CHANCMP address must be 64-bit aligned */ + +struct spdk_ioat_generic_hw_desc { + uint32_t size; + union { + uint32_t control_raw; + struct { + uint32_t int_enable: 1; + uint32_t src_snoop_disable: 1; + uint32_t dest_snoop_disable: 1; + uint32_t completion_update: 1; + uint32_t fence: 1; + uint32_t reserved2: 1; + uint32_t src_page_break: 1; + uint32_t dest_page_break: 1; + uint32_t bundle: 1; + uint32_t dest_dca: 1; + uint32_t hint: 1; + uint32_t reserved: 13; + uint32_t op: 8; + } control; + } u; + uint64_t src_addr; + uint64_t dest_addr; + uint64_t next; + uint64_t op_specific[4]; +}; + +struct spdk_ioat_dma_hw_desc { + uint32_t size; + union { + uint32_t control_raw; + struct { + uint32_t int_enable: 1; + uint32_t src_snoop_disable: 1; + uint32_t dest_snoop_disable: 1; + uint32_t completion_update: 1; + uint32_t fence: 1; + uint32_t null: 1; + uint32_t src_page_break: 1; + uint32_t dest_page_break: 1; + uint32_t bundle: 1; + uint32_t dest_dca: 1; + uint32_t hint: 1; + uint32_t reserved: 13; +#define SPDK_IOAT_OP_COPY 0x00 + uint32_t op: 8; + } control; + } u; + uint64_t src_addr; + uint64_t dest_addr; + uint64_t next; + uint64_t reserved; + uint64_t reserved2; + uint64_t user1; + uint64_t user2; +}; + +struct spdk_ioat_fill_hw_desc { + uint32_t size; + union { + uint32_t control_raw; + struct { + uint32_t int_enable: 1; + uint32_t reserved: 1; + uint32_t dest_snoop_disable: 1; + uint32_t completion_update: 1; + uint32_t fence: 1; + uint32_t reserved2: 2; + uint32_t dest_page_break: 1; + uint32_t bundle: 1; + uint32_t reserved3: 15; +#define SPDK_IOAT_OP_FILL 0x01 + uint32_t op: 8; + } control; + } u; + uint64_t src_data; + uint64_t dest_addr; + uint64_t next; + uint64_t reserved; + uint64_t next_dest_addr; + uint64_t user1; + uint64_t user2; +}; + +struct spdk_ioat_xor_hw_desc { + uint32_t size; + union { + uint32_t control_raw; + struct { + uint32_t int_enable: 1; + uint32_t src_snoop_disable: 1; + uint32_t dest_snoop_disable: 1; + uint32_t completion_update: 1; + uint32_t fence: 1; + uint32_t src_count: 3; + uint32_t bundle: 1; + uint32_t dest_dca: 1; + uint32_t hint: 1; + uint32_t reserved: 13; +#define SPDK_IOAT_OP_XOR 0x87 +#define SPDK_IOAT_OP_XOR_VAL 0x88 + uint32_t op: 8; + } control; + } u; + uint64_t src_addr; + uint64_t dest_addr; + uint64_t next; + uint64_t src_addr2; + uint64_t src_addr3; + uint64_t src_addr4; + uint64_t src_addr5; +}; + +struct spdk_ioat_xor_ext_hw_desc { + uint64_t src_addr6; + uint64_t src_addr7; + uint64_t src_addr8; + uint64_t next; + uint64_t reserved[4]; +}; + +struct spdk_ioat_pq_hw_desc { + uint32_t size; + union { + uint32_t control_raw; + struct { + uint32_t int_enable: 1; + uint32_t src_snoop_disable: 1; + uint32_t dest_snoop_disable: 1; + uint32_t completion_update: 1; + uint32_t fence: 1; + uint32_t src_count: 3; + uint32_t bundle: 1; + uint32_t dest_dca: 1; + uint32_t hint: 1; + uint32_t p_disable: 1; + uint32_t q_disable: 1; + uint32_t reserved: 11; +#define SPDK_IOAT_OP_PQ 0x89 +#define SPDK_IOAT_OP_PQ_VAL 0x8a + uint32_t op: 8; + } control; + } u; + uint64_t src_addr; + uint64_t p_addr; + uint64_t next; + uint64_t src_addr2; + uint64_t src_addr3; + uint8_t coef[8]; + uint64_t q_addr; +}; + +struct spdk_ioat_pq_ext_hw_desc { + uint64_t src_addr4; + uint64_t src_addr5; + uint64_t src_addr6; + uint64_t next; + uint64_t src_addr7; + uint64_t src_addr8; + uint64_t reserved[2]; +}; + +struct spdk_ioat_pq_update_hw_desc { + uint32_t size; + union { + uint32_t control_raw; + struct { + uint32_t int_enable: 1; + uint32_t src_snoop_disable: 1; + uint32_t dest_snoop_disable: 1; + uint32_t completion_update: 1; + uint32_t fence: 1; + uint32_t src_cnt: 3; + uint32_t bundle: 1; + uint32_t dest_dca: 1; + uint32_t hint: 1; + uint32_t p_disable: 1; + uint32_t q_disable: 1; + uint32_t reserved: 3; + uint32_t coef: 8; +#define SPDK_IOAT_OP_PQ_UP 0x8b + uint32_t op: 8; + } control; + } u; + uint64_t src_addr; + uint64_t p_addr; + uint64_t next; + uint64_t src_addr2; + uint64_t p_src; + uint64_t q_src; + uint64_t q_addr; +}; + +struct spdk_ioat_raw_hw_desc { + uint64_t field[8]; +}; + +union spdk_ioat_hw_desc { + struct spdk_ioat_raw_hw_desc raw; + struct spdk_ioat_generic_hw_desc generic; + struct spdk_ioat_dma_hw_desc dma; + struct spdk_ioat_fill_hw_desc fill; + struct spdk_ioat_xor_hw_desc xor_desc; + struct spdk_ioat_xor_ext_hw_desc xor_ext; + struct spdk_ioat_pq_hw_desc pq; + struct spdk_ioat_pq_ext_hw_desc pq_ext; + struct spdk_ioat_pq_update_hw_desc pq_update; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_ioat_hw_desc) == 64, "incorrect spdk_ioat_hw_desc layout"); + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_IOAT_SPEC_H */ diff --git a/src/spdk/include/spdk/iscsi_spec.h b/src/spdk/include/spdk/iscsi_spec.h new file mode 100644 index 000000000..06e567865 --- /dev/null +++ b/src/spdk/include/spdk/iscsi_spec.h @@ -0,0 +1,567 @@ +/*- + * BSD LICENSE + * + * Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>. + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * iSCSI specification definitions + */ + +#ifndef SPDK_ISCSI_SPEC_H +#define SPDK_ISCSI_SPEC_H + +#include "spdk/stdinc.h" + +#include "spdk/assert.h" + +#define ISCSI_BHS_LEN 48 +#define ISCSI_DIGEST_LEN 4 +#define ISCSI_ALIGNMENT 4 + +/** support version - RFC3720(10.12.4) */ +#define ISCSI_VERSION 0x00 + +#define ISCSI_ALIGN(SIZE) \ + (((SIZE) + (ISCSI_ALIGNMENT - 1)) & ~(ISCSI_ALIGNMENT - 1)) + +/** for authentication key (non encoded 1024bytes) RFC3720(5.1/11.1.4) */ +#define ISCSI_TEXT_MAX_VAL_LEN 8192 + +/** + * RFC 3720 5.1 + * If not otherwise specified, the maximum length of a simple-value + * (not its encoded representation) is 255 bytes, not including the delimiter + * (comma or zero byte). + */ +#define ISCSI_TEXT_MAX_SIMPLE_VAL_LEN 255 + +#define ISCSI_TEXT_MAX_KEY_LEN 63 + +enum iscsi_op { + /* Initiator opcodes */ + ISCSI_OP_NOPOUT = 0x00, + ISCSI_OP_SCSI = 0x01, + ISCSI_OP_TASK = 0x02, + ISCSI_OP_LOGIN = 0x03, + ISCSI_OP_TEXT = 0x04, + ISCSI_OP_SCSI_DATAOUT = 0x05, + ISCSI_OP_LOGOUT = 0x06, + ISCSI_OP_SNACK = 0x10, + ISCSI_OP_VENDOR_1C = 0x1c, + ISCSI_OP_VENDOR_1D = 0x1d, + ISCSI_OP_VENDOR_1E = 0x1e, + + /* Target opcodes */ + ISCSI_OP_NOPIN = 0x20, + ISCSI_OP_SCSI_RSP = 0x21, + ISCSI_OP_TASK_RSP = 0x22, + ISCSI_OP_LOGIN_RSP = 0x23, + ISCSI_OP_TEXT_RSP = 0x24, + ISCSI_OP_SCSI_DATAIN = 0x25, + ISCSI_OP_LOGOUT_RSP = 0x26, + ISCSI_OP_R2T = 0x31, + ISCSI_OP_ASYNC = 0x32, + ISCSI_OP_VENDOR_3C = 0x3c, + ISCSI_OP_VENDOR_3D = 0x3d, + ISCSI_OP_VENDOR_3E = 0x3e, + ISCSI_OP_REJECT = 0x3f, +}; + +enum iscsi_task_func { + ISCSI_TASK_FUNC_ABORT_TASK = 1, + ISCSI_TASK_FUNC_ABORT_TASK_SET = 2, + ISCSI_TASK_FUNC_CLEAR_ACA = 3, + ISCSI_TASK_FUNC_CLEAR_TASK_SET = 4, + ISCSI_TASK_FUNC_LOGICAL_UNIT_RESET = 5, + ISCSI_TASK_FUNC_TARGET_WARM_RESET = 6, + ISCSI_TASK_FUNC_TARGET_COLD_RESET = 7, + ISCSI_TASK_FUNC_TASK_REASSIGN = 8, +}; + +enum iscsi_task_func_resp { + ISCSI_TASK_FUNC_RESP_COMPLETE = 0, + ISCSI_TASK_FUNC_RESP_TASK_NOT_EXIST = 1, + ISCSI_TASK_FUNC_RESP_LUN_NOT_EXIST = 2, + ISCSI_TASK_FUNC_RESP_TASK_STILL_ALLEGIANT = 3, + ISCSI_TASK_FUNC_RESP_REASSIGNMENT_NOT_SUPPORTED = 4, + ISCSI_TASK_FUNC_RESP_FUNC_NOT_SUPPORTED = 5, + ISCSI_TASK_FUNC_RESP_AUTHORIZATION_FAILED = 6, + ISCSI_TASK_FUNC_REJECTED = 255 +}; + +struct iscsi_bhs { + uint8_t opcode : 6; + uint8_t immediate : 1; + uint8_t reserved : 1; + uint8_t flags; + uint8_t rsv[2]; + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + uint64_t lun; + uint32_t itt; + uint32_t ttt; + uint32_t stat_sn; + uint32_t exp_stat_sn; + uint32_t max_stat_sn; + uint8_t res3[12]; +}; +SPDK_STATIC_ASSERT(sizeof(struct iscsi_bhs) == ISCSI_BHS_LEN, "ISCSI_BHS_LEN mismatch"); + +struct iscsi_bhs_async { + uint8_t opcode : 6; /* opcode = 0x32 */ + uint8_t reserved : 2; + uint8_t flags; + uint8_t res[2]; + + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + + uint64_t lun; + uint32_t ffffffff; + uint32_t res3; + uint32_t stat_sn; + uint32_t exp_cmd_sn; + uint32_t max_cmd_sn; + uint8_t async_event; + uint8_t async_vcode; + uint16_t param1; + uint16_t param2; + uint16_t param3; + uint8_t res4[4]; +}; + +struct iscsi_bhs_login_req { + uint8_t opcode : 6; /* opcode = 0x03 */ + uint8_t immediate : 1; + uint8_t reserved : 1; + uint8_t flags; + uint8_t version_max; + uint8_t version_min; + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + uint8_t isid[6]; + uint16_t tsih; + uint32_t itt; + uint16_t cid; + uint16_t res2; + uint32_t cmd_sn; + uint32_t exp_stat_sn; + uint8_t res3[16]; +}; + +struct iscsi_bhs_login_rsp { + uint8_t opcode : 6; /* opcode = 0x23 */ + uint8_t reserved : 2; + uint8_t flags; + uint8_t version_max; + uint8_t version_act; + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + uint8_t isid[6]; + uint16_t tsih; + uint32_t itt; + uint32_t res2; + uint32_t stat_sn; + uint32_t exp_cmd_sn; + uint32_t max_cmd_sn; + uint8_t status_class; + uint8_t status_detail; + uint8_t res3[10]; +}; + +struct iscsi_bhs_logout_req { + uint8_t opcode : 6; /* opcode = 0x06 */ + uint8_t immediate : 1; + uint8_t reserved : 1; + uint8_t reason : 7; + uint8_t reason_1 : 1; + uint8_t res[2]; + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + uint8_t res2[8]; + uint32_t itt; + uint16_t cid; + uint16_t res3; + uint32_t cmd_sn; + uint32_t exp_stat_sn; + uint8_t res4[16]; +}; + +struct iscsi_bhs_logout_resp { + uint8_t opcode : 6; /* opcode = 0x26 */ + uint8_t reserved : 2; + uint8_t flags; + uint8_t response; + uint8_t res; + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + uint8_t res2[8]; + uint32_t itt; + uint32_t res3; + uint32_t stat_sn; + uint32_t exp_cmd_sn; + uint32_t max_cmd_sn; + uint32_t res4; + uint16_t time_2_wait; + uint16_t time_2_retain; + uint32_t res5; +}; + +struct iscsi_bhs_nop_in { + uint8_t opcode : 6; /* opcode = 0x20 */ + uint8_t reserved : 2; + uint8_t flags; + uint8_t res[2]; + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + uint64_t lun; + uint32_t itt; + uint32_t ttt; + uint32_t stat_sn; + uint32_t exp_cmd_sn; + uint32_t max_cmd_sn; + uint8_t res3[12]; +}; + +struct iscsi_bhs_nop_out { + uint8_t opcode : 6; /* opcode = 0x00 */ + uint8_t immediate : 1; + uint8_t reserved : 1; + uint8_t flags; + uint8_t res[2]; + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + uint64_t lun; + uint32_t itt; + uint32_t ttt; + uint32_t cmd_sn; + uint32_t exp_stat_sn; + uint8_t res4[16]; +}; + +struct iscsi_bhs_r2t { + uint8_t opcode : 6; /* opcode = 0x31 */ + uint8_t reserved : 2; + uint8_t flags; + uint8_t rsv[2]; + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + uint64_t lun; + uint32_t itt; + uint32_t ttt; + uint32_t stat_sn; + uint32_t exp_cmd_sn; + uint32_t max_cmd_sn; + uint32_t r2t_sn; + uint32_t buffer_offset; + uint32_t desired_xfer_len; +}; + +struct iscsi_bhs_reject { + uint8_t opcode : 6; /* opcode = 0x3f */ + uint8_t reserved : 2; + uint8_t flags; + uint8_t reason; + uint8_t res; + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + uint8_t res2[8]; + uint32_t ffffffff; + uint32_t res3; + uint32_t stat_sn; + uint32_t exp_cmd_sn; + uint32_t max_cmd_sn; + uint32_t data_sn; + uint8_t res4[8]; +}; + +struct iscsi_bhs_scsi_req { + uint8_t opcode : 6; /* opcode = 0x01 */ + uint8_t immediate : 1; + uint8_t reserved : 1; + uint8_t attribute : 3; + uint8_t reserved2 : 2; + uint8_t write_bit : 1; + uint8_t read_bit : 1; + uint8_t final_bit : 1; + uint8_t res[2]; + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + uint64_t lun; + uint32_t itt; + uint32_t expected_data_xfer_len; + uint32_t cmd_sn; + uint32_t exp_stat_sn; + uint8_t cdb[16]; +}; + +struct iscsi_bhs_scsi_resp { + uint8_t opcode : 6; /* opcode = 0x21 */ + uint8_t reserved : 2; + uint8_t flags; + uint8_t response; + uint8_t status; + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + uint8_t res4[8]; + uint32_t itt; + uint32_t snacktag; + uint32_t stat_sn; + uint32_t exp_cmd_sn; + uint32_t max_cmd_sn; + uint32_t exp_data_sn; + uint32_t bi_read_res_cnt; + uint32_t res_cnt; +}; + +struct iscsi_bhs_data_in { + uint8_t opcode : 6; /* opcode = 0x05 */ + uint8_t reserved : 2; + uint8_t flags; + uint8_t res; + uint8_t status; + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + uint64_t lun; + uint32_t itt; + uint32_t ttt; + uint32_t stat_sn; + uint32_t exp_cmd_sn; + uint32_t max_cmd_sn; + uint32_t data_sn; + uint32_t buffer_offset; + uint32_t res_cnt; +}; + +struct iscsi_bhs_data_out { + uint8_t opcode : 6; /* opcode = 0x25 */ + uint8_t reserved : 2; + uint8_t flags; + uint8_t res[2]; + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + uint64_t lun; + uint32_t itt; + uint32_t ttt; + uint32_t res3; + uint32_t exp_stat_sn; + uint32_t res4; + uint32_t data_sn; + uint32_t buffer_offset; + uint32_t res5; +}; + +struct iscsi_bhs_snack_req { + uint8_t opcode : 6; /* opcode = 0x10 */ + uint8_t reserved : 2; + uint8_t flags; + uint8_t res[2]; + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + uint64_t lun; + uint32_t itt; + uint32_t ttt; + uint32_t res5; + uint32_t exp_stat_sn; + uint8_t res6[8]; + uint32_t beg_run; + uint32_t run_len; +}; + +struct iscsi_bhs_task_req { + uint8_t opcode : 6; /* opcode = 0x02 */ + uint8_t immediate : 1; + uint8_t reserved : 1; + uint8_t flags; + uint8_t res[2]; + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + uint64_t lun; + uint32_t itt; + uint32_t ref_task_tag; + uint32_t cmd_sn; + uint32_t exp_stat_sn; + uint32_t ref_cmd_sn; + uint32_t exp_data_sn; + uint8_t res5[8]; +}; + +struct iscsi_bhs_task_resp { + uint8_t opcode : 6; /* opcode = 0x22 */ + uint8_t reserved : 2; + uint8_t flags; + uint8_t response; + uint8_t res; + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + uint8_t res2[8]; + uint32_t itt; + uint32_t res3; + uint32_t stat_sn; + uint32_t exp_cmd_sn; + uint32_t max_cmd_sn; + uint8_t res4[12]; +}; + +struct iscsi_bhs_text_req { + uint8_t opcode : 6; /* opcode = 0x04 */ + uint8_t immediate : 1; + uint8_t reserved : 1; + uint8_t flags; + uint8_t res[2]; + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + uint64_t lun; + uint32_t itt; + uint32_t ttt; + uint32_t cmd_sn; + uint32_t exp_stat_sn; + uint8_t res3[16]; +}; + +struct iscsi_bhs_text_resp { + uint8_t opcode : 6; /* opcode = 0x24 */ + uint8_t reserved : 2; + uint8_t flags; + uint8_t res[2]; + uint8_t total_ahs_len; + uint8_t data_segment_len[3]; + uint64_t lun; + uint32_t itt; + uint32_t ttt; + uint32_t stat_sn; + uint32_t exp_cmd_sn; + uint32_t max_cmd_sn; + uint8_t res4[12]; +}; + +/* generic flags */ +#define ISCSI_FLAG_FINAL 0x80 + +/* login flags */ +#define ISCSI_LOGIN_TRANSIT 0x80 +#define ISCSI_LOGIN_CONTINUE 0x40 +#define ISCSI_LOGIN_CURRENT_STAGE_MASK 0x0c +#define ISCSI_LOGIN_CURRENT_STAGE_0 0x04 +#define ISCSI_LOGIN_CURRENT_STAGE_1 0x08 +#define ISCSI_LOGIN_CURRENT_STAGE_3 0x0c +#define ISCSI_LOGIN_NEXT_STAGE_MASK 0x03 +#define ISCSI_LOGIN_NEXT_STAGE_0 0x01 +#define ISCSI_LOGIN_NEXT_STAGE_1 0x02 +#define ISCSI_LOGIN_NEXT_STAGE_3 0x03 + +/* text flags */ +#define ISCSI_TEXT_CONTINUE 0x40 + +/* datain flags */ +#define ISCSI_DATAIN_ACKNOLWEDGE 0x40 +#define ISCSI_DATAIN_OVERFLOW 0x04 +#define ISCSI_DATAIN_UNDERFLOW 0x02 +#define ISCSI_DATAIN_STATUS 0x01 + +/* SCSI resp flags */ +#define ISCSI_SCSI_BIDI_OVERFLOW 0x10 +#define ISCSI_SCSI_BIDI_UNDERFLOW 0x08 +#define ISCSI_SCSI_OVERFLOW 0x04 +#define ISCSI_SCSI_UNDERFLOW 0x02 + +/* SCSI task flags */ +#define ISCSI_TASK_FUNCTION_MASK 0x7f + +/* Reason for Reject */ +#define ISCSI_REASON_RESERVED 0x1 +#define ISCSI_REASON_DATA_DIGEST_ERROR 0x2 +#define ISCSI_REASON_DATA_SNACK_REJECT 0x3 +#define ISCSI_REASON_PROTOCOL_ERROR 0x4 +#define ISCSI_REASON_CMD_NOT_SUPPORTED 0x5 +#define ISCSI_REASON_IMM_CMD_REJECT 0x6 +#define ISCSI_REASON_TASK_IN_PROGRESS 0x7 +#define ISCSI_REASON_INVALID_SNACK 0x8 +#define ISCSI_REASON_INVALID_PDU_FIELD 0x9 +#define ISCSI_REASON_LONG_OPERATION_REJECT 0xa +#define ISCSI_REASON_NEGOTIATION_RESET 0xb +#define ISCSI_REASON_WAIT_FOR_RESET 0xc + +#define ISCSI_FLAG_SNACK_TYPE_DATA 0 +#define ISCSI_FLAG_SNACK_TYPE_R2T 0 +#define ISCSI_FLAG_SNACK_TYPE_STATUS 1 +#define ISCSI_FLAG_SNACK_TYPE_DATA_ACK 2 +#define ISCSI_FLAG_SNACK_TYPE_RDATA 3 +#define ISCSI_FLAG_SNACK_TYPE_MASK 0x0F /* 4 bits */ + +struct iscsi_ahs { + /* 0-3 */ + uint8_t ahs_len[2]; + uint8_t ahs_type; + uint8_t ahs_specific1; + /* 4-x */ + uint8_t ahs_specific2[]; +}; + +#define ISCSI_BHS_LOGIN_GET_TBIT(X) (!!(X & ISCSI_LOGIN_TRANSIT)) +#define ISCSI_BHS_LOGIN_GET_CBIT(X) (!!(X & ISCSI_LOGIN_CONTINUE)) +#define ISCSI_BHS_LOGIN_GET_CSG(X) ((X & ISCSI_LOGIN_CURRENT_STAGE_MASK) >> 2) +#define ISCSI_BHS_LOGIN_GET_NSG(X) (X & ISCSI_LOGIN_NEXT_STAGE_MASK) + +#define ISCSI_CLASS_SUCCESS 0x00 +#define ISCSI_CLASS_REDIRECT 0x01 +#define ISCSI_CLASS_INITIATOR_ERROR 0x02 +#define ISCSI_CLASS_TARGET_ERROR 0x03 + +/* Class (Success) detailed info: 0 */ +#define ISCSI_LOGIN_ACCEPT 0x00 + +/* Class (Redirection) detailed info: 1 */ +#define ISCSI_LOGIN_TARGET_TEMPORARILY_MOVED 0x01 +#define ISCSI_LOGIN_TARGET_PERMANENTLY_MOVED 0x02 + +/* Class (Initiator Error) detailed info: 2 */ +#define ISCSI_LOGIN_INITIATOR_ERROR 0x00 +#define ISCSI_LOGIN_AUTHENT_FAIL 0x01 +#define ISCSI_LOGIN_AUTHORIZATION_FAIL 0x02 +#define ISCSI_LOGIN_TARGET_NOT_FOUND 0x03 +#define ISCSI_LOGIN_TARGET_REMOVED 0x04 +#define ISCSI_LOGIN_UNSUPPORTED_VERSION 0x05 +#define ISCSI_LOGIN_TOO_MANY_CONNECTIONS 0x06 +#define ISCSI_LOGIN_MISSING_PARMS 0x07 +#define ISCSI_LOGIN_CONN_ADD_FAIL 0x08 +#define ISCSI_LOGIN_NOT_SUPPORTED_SESSION_TYPE 0x09 +#define ISCSI_LOGIN_NO_SESSION 0x0a +#define ISCSI_LOGIN_INVALID_LOGIN_REQUEST 0x0b + +/* Class (Target Error) detailed info: 3 */ +#define ISCSI_LOGIN_STATUS_TARGET_ERROR 0x00 +#define ISCSI_LOGIN_STATUS_SERVICE_UNAVAILABLE 0x01 +#define ISCSI_LOGIN_STATUS_NO_RESOURCES 0x02 + +#endif /* SPDK_ISCSI_SPEC_H */ diff --git a/src/spdk/include/spdk/json.h b/src/spdk/include/spdk/json.h new file mode 100644 index 000000000..8109e5188 --- /dev/null +++ b/src/spdk/include/spdk/json.h @@ -0,0 +1,337 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * JSON parsing and encoding + */ + +#ifndef SPDK_JSON_H_ +#define SPDK_JSON_H_ + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum spdk_json_val_type { + SPDK_JSON_VAL_INVALID = 0, +#define SPDK_JSON_VAL_ANY SPDK_JSON_VAL_INVALID + SPDK_JSON_VAL_NULL = 1U << 1, + SPDK_JSON_VAL_TRUE = 1U << 2, + SPDK_JSON_VAL_FALSE = 1U << 3, + SPDK_JSON_VAL_NUMBER = 1U << 4, + SPDK_JSON_VAL_STRING = 1U << 5, + SPDK_JSON_VAL_ARRAY_BEGIN = 1U << 6, + SPDK_JSON_VAL_ARRAY_END = 1U << 7, + SPDK_JSON_VAL_OBJECT_BEGIN = 1U << 8, + SPDK_JSON_VAL_OBJECT_END = 1U << 9, + SPDK_JSON_VAL_NAME = 1U << 10, +}; + +struct spdk_json_val { + /** + * Pointer to the location of the value within the parsed JSON input. + * + * For SPDK_JSON_VAL_STRING and SPDK_JSON_VAL_NAME, + * this points to the beginning of the decoded UTF-8 string without quotes. + * + * For SPDK_JSON_VAL_NUMBER, this points to the beginning of the number as represented in + * the original JSON (text representation, not converted to a numeric value). + */ + void *start; + + /** + * Length of value. + * + * For SPDK_JSON_VAL_STRING, SPDK_JSON_VAL_NUMBER, and SPDK_JSON_VAL_NAME, + * this is the length in bytes of the value starting at \ref start. + * + * For SPDK_JSON_VAL_ARRAY_BEGIN and SPDK_JSON_VAL_OBJECT_BEGIN, + * this is the number of values contained within the array or object (including + * nested objects and arrays, but not including the _END value). The array or object _END + * value can be found by advancing len values from the _BEGIN value. + */ + uint32_t len; + + /** + * Type of value. + */ + enum spdk_json_val_type type; +}; + +/** + * Invalid JSON syntax. + */ +#define SPDK_JSON_PARSE_INVALID -1 + +/** + * JSON was valid up to the end of the current buffer, but did not represent a complete JSON value. + */ +#define SPDK_JSON_PARSE_INCOMPLETE -2 + +#define SPDK_JSON_PARSE_MAX_DEPTH_EXCEEDED -3 + +/** + * Decode JSON strings and names in place (modify the input buffer). + */ +#define SPDK_JSON_PARSE_FLAG_DECODE_IN_PLACE 0x000000001 + +/** + * Allow parsing of comments. + * + * Comments are not allowed by the JSON RFC, so this is not enabled by default. + */ +#define SPDK_JSON_PARSE_FLAG_ALLOW_COMMENTS 0x000000002 + +/* + * Parse JSON data. + * + * \param data Raw JSON data; must be encoded in UTF-8. + * Note that the data may be modified to perform in-place string decoding. + * + * \param size Size of data in bytes. + * + * \param end If non-NULL, this will be filled a pointer to the byte just beyond the end + * of the valid JSON. + * + * \return Number of values parsed, or negative on failure: + * SPDK_JSON_PARSE_INVALID if the provided data was not valid JSON, or + * SPDK_JSON_PARSE_INCOMPLETE if the provided data was not a complete JSON value. + */ +ssize_t spdk_json_parse(void *json, size_t size, struct spdk_json_val *values, size_t num_values, + void **end, uint32_t flags); + +typedef int (*spdk_json_decode_fn)(const struct spdk_json_val *val, void *out); + +struct spdk_json_object_decoder { + const char *name; + size_t offset; + spdk_json_decode_fn decode_func; + bool optional; +}; + +int spdk_json_decode_object(const struct spdk_json_val *values, + const struct spdk_json_object_decoder *decoders, size_t num_decoders, void *out); +int spdk_json_decode_array(const struct spdk_json_val *values, spdk_json_decode_fn decode_func, + void *out, size_t max_size, size_t *out_size, size_t stride); + +int spdk_json_decode_bool(const struct spdk_json_val *val, void *out); +int spdk_json_decode_uint16(const struct spdk_json_val *val, void *out); +int spdk_json_decode_int32(const struct spdk_json_val *val, void *out); +int spdk_json_decode_uint32(const struct spdk_json_val *val, void *out); +int spdk_json_decode_uint64(const struct spdk_json_val *val, void *out); +int spdk_json_decode_string(const struct spdk_json_val *val, void *out); + +/** + * Get length of a value in number of values. + * + * This can be used to skip over a value while interpreting parse results. + * + * For SPDK_JSON_VAL_ARRAY_BEGIN and SPDK_JSON_VAL_OBJECT_BEGIN, + * this returns the number of values contained within this value, plus the _BEGIN and _END values. + * + * For all other values, this returns 1. + */ +size_t spdk_json_val_len(const struct spdk_json_val *val); + +/** + * Compare JSON string with null terminated C string. + * + * \return true if strings are equal or false if not + */ +bool spdk_json_strequal(const struct spdk_json_val *val, const char *str); + +/** + * Equivalent of strdup() for JSON string values. + * + * If val is not representable as a C string (contains embedded '\0' characters), + * returns NULL. + * + * Caller is responsible for passing the result to free() when it is no longer needed. + */ +char *spdk_json_strdup(const struct spdk_json_val *val); + +int spdk_json_number_to_uint16(const struct spdk_json_val *val, uint16_t *num); +int spdk_json_number_to_int32(const struct spdk_json_val *val, int32_t *num); +int spdk_json_number_to_uint32(const struct spdk_json_val *val, uint32_t *num); +int spdk_json_number_to_uint64(const struct spdk_json_val *val, uint64_t *num); + +struct spdk_json_write_ctx; + +#define SPDK_JSON_WRITE_FLAG_FORMATTED 0x00000001 + +typedef int (*spdk_json_write_cb)(void *cb_ctx, const void *data, size_t size); + +struct spdk_json_write_ctx *spdk_json_write_begin(spdk_json_write_cb write_cb, void *cb_ctx, + uint32_t flags); +int spdk_json_write_end(struct spdk_json_write_ctx *w); +int spdk_json_write_null(struct spdk_json_write_ctx *w); +int spdk_json_write_bool(struct spdk_json_write_ctx *w, bool val); +int spdk_json_write_int32(struct spdk_json_write_ctx *w, int32_t val); +int spdk_json_write_uint32(struct spdk_json_write_ctx *w, uint32_t val); +int spdk_json_write_int64(struct spdk_json_write_ctx *w, int64_t val); +int spdk_json_write_uint64(struct spdk_json_write_ctx *w, uint64_t val); +int spdk_json_write_string(struct spdk_json_write_ctx *w, const char *val); +int spdk_json_write_string_raw(struct spdk_json_write_ctx *w, const char *val, size_t len); + +/** + * Write null-terminated UTF-16LE string. + * + * \param w JSON write context. + * \param val UTF-16LE string; must be null terminated. + * \return 0 on success or negative on failure. + */ +int spdk_json_write_string_utf16le(struct spdk_json_write_ctx *w, const uint16_t *val); + +/** + * Write UTF-16LE string. + * + * \param w JSON write context. + * \param val UTF-16LE string; may contain embedded null characters. + * \param len Length of val in 16-bit code units (i.e. size of string in bytes divided by 2). + * \return 0 on success or negative on failure. + */ +int spdk_json_write_string_utf16le_raw(struct spdk_json_write_ctx *w, const uint16_t *val, + size_t len); + +int spdk_json_write_string_fmt(struct spdk_json_write_ctx *w, const char *fmt, + ...) __attribute__((__format__(__printf__, 2, 3))); +int spdk_json_write_string_fmt_v(struct spdk_json_write_ctx *w, const char *fmt, va_list args); + +int spdk_json_write_array_begin(struct spdk_json_write_ctx *w); +int spdk_json_write_array_end(struct spdk_json_write_ctx *w); +int spdk_json_write_object_begin(struct spdk_json_write_ctx *w); +int spdk_json_write_object_end(struct spdk_json_write_ctx *w); +int spdk_json_write_name(struct spdk_json_write_ctx *w, const char *name); +int spdk_json_write_name_raw(struct spdk_json_write_ctx *w, const char *name, size_t len); + +int spdk_json_write_val(struct spdk_json_write_ctx *w, const struct spdk_json_val *val); + +/* + * Append bytes directly to the output stream without validation. + * + * Can be used to write values with specific encodings that differ from the JSON writer output. + */ +int spdk_json_write_val_raw(struct spdk_json_write_ctx *w, const void *data, size_t len); + +/* Utility functions */ +int spdk_json_write_named_null(struct spdk_json_write_ctx *w, const char *name); +int spdk_json_write_named_bool(struct spdk_json_write_ctx *w, const char *name, bool val); +int spdk_json_write_named_int32(struct spdk_json_write_ctx *w, const char *name, int32_t val); +int spdk_json_write_named_uint32(struct spdk_json_write_ctx *w, const char *name, uint32_t val); +int spdk_json_write_named_uint64(struct spdk_json_write_ctx *w, const char *name, uint64_t val); +int spdk_json_write_named_int64(struct spdk_json_write_ctx *w, const char *name, int64_t val); +int spdk_json_write_named_string(struct spdk_json_write_ctx *w, const char *name, const char *val); +int spdk_json_write_named_string_fmt(struct spdk_json_write_ctx *w, const char *name, + const char *fmt, ...) __attribute__((__format__(__printf__, 3, 4))); +int spdk_json_write_named_string_fmt_v(struct spdk_json_write_ctx *w, const char *name, + const char *fmt, va_list args); + +int spdk_json_write_named_array_begin(struct spdk_json_write_ctx *w, const char *name); +int spdk_json_write_named_object_begin(struct spdk_json_write_ctx *w, const char *name); + +/** + * Return JSON value asociated with key \c key_name. Subobjects won't be searched. + * + * \param object JSON object to be examined + * \param key_name name of the key + * \param key optional, will be set with found key + * \param val optional, will be set with value of the key + * \param type search for specific value type. Pass SPDK_JSON_VAL_ANY to match any type. + * \return 0 if found or negative error code: + * -EINVAL - json object is invalid + * -ENOENT - key not found + * -EDOM - key exists but value type mismatch. + */ +int spdk_json_find(struct spdk_json_val *object, const char *key_name, struct spdk_json_val **key, + struct spdk_json_val **val, enum spdk_json_val_type type); + +/** + * The same as calling \c spdk_json_find() function with \c type set to \c SPDK_JSON_VAL_STRING + * + * \param object JSON object to be examined + * \param key_name name of the key + * \param key optional, will be set with found key + * \param val optional, will be set with value of the key + * \return See \c spdk_json_find + */ + +int spdk_json_find_string(struct spdk_json_val *object, const char *key_name, + struct spdk_json_val **key, struct spdk_json_val **val); + +/** + * The same as calling \c spdk_json_key() function with \c type set to \c SPDK_JSON_VAL_ARRAY_BEGIN + * + * \param object JSON object to be examined + * \param key_name name of the key + * \param key optional, will be set with found key + * \param value optional, will be set with key value + * \return See \c spdk_json_find + */ +int spdk_json_find_array(struct spdk_json_val *object, const char *key_name, + struct spdk_json_val **key, struct spdk_json_val **value); + +/** + * Return first JSON value in given JSON object. + * + * \param object pointer to JSON object begin + * \return Pointer to first object or NULL if object is empty or is not an JSON object + */ +struct spdk_json_val *spdk_json_object_first(struct spdk_json_val *object); + +/** + * Return first JSON value in array. + * + * \param array_begin pointer to JSON array begin + * \return Pointer to first JSON value or NULL if array is empty or is not an JSON array. + */ + +struct spdk_json_val *spdk_json_array_first(struct spdk_json_val *array_begin); + +/** + * Advance to the next JSON value in JSON object or array. + * + * \warning if \c pos is not JSON key or JSON array element behaviour is undefined. + * + * \param pos pointer to JSON key if iterating over JSON object or array element + * \return next JSON value or NULL if there is no more objects or array elements + */ +struct spdk_json_val *spdk_json_next(struct spdk_json_val *pos); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/jsonrpc.h b/src/spdk/include/spdk/jsonrpc.h new file mode 100644 index 000000000..650a06ff4 --- /dev/null +++ b/src/spdk/include/spdk/jsonrpc.h @@ -0,0 +1,352 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * JSON-RPC 2.0 server implementation + */ + +#ifndef SPDK_JSONRPC_H_ +#define SPDK_JSONRPC_H_ + +#include "spdk/stdinc.h" + +#include "spdk/json.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Defined error codes in JSON-RPC specification 2.0 */ +#define SPDK_JSONRPC_ERROR_PARSE_ERROR -32700 +#define SPDK_JSONRPC_ERROR_INVALID_REQUEST -32600 +#define SPDK_JSONRPC_ERROR_METHOD_NOT_FOUND -32601 +#define SPDK_JSONRPC_ERROR_INVALID_PARAMS -32602 +#define SPDK_JSONRPC_ERROR_INTERNAL_ERROR -32603 + +/* Custom error codes in SPDK + + * Error codes from and including -32768 to -32000 are reserved for + * predefined errors, hence custom error codes must be outside of the range. + */ +#define SPDK_JSONRPC_ERROR_INVALID_STATE -1 + +struct spdk_jsonrpc_server; +struct spdk_jsonrpc_request; + +struct spdk_jsonrpc_client; +struct spdk_jsonrpc_client_request; + +struct spdk_jsonrpc_client_response { + struct spdk_json_val *version; + struct spdk_json_val *id; + struct spdk_json_val *result; + struct spdk_json_val *error; +}; + +/** + * User callback to handle a single JSON-RPC request. + * + * The user should respond by calling one of spdk_jsonrpc_begin_result() or + * spdk_jsonrpc_send_error_response(). + * + * \param request JSON-RPC request to handle. + * \param method Function to handle the request. + * \param param Parameters passed to the function 'method'. + */ +typedef void (*spdk_jsonrpc_handle_request_fn)( + struct spdk_jsonrpc_request *request, + const struct spdk_json_val *method, + const struct spdk_json_val *params); + +struct spdk_jsonrpc_server_conn; + +typedef void (*spdk_jsonrpc_conn_closed_fn)(struct spdk_jsonrpc_server_conn *conn, void *arg); + +/** + * Function for specific RPC method response parsing handlers. + * + * \param parser_ctx context where analysis are put. + * \param result json values responsed to this method. + * + * \return 0 on success. + * SPDK_JSON_PARSE_INVALID on failure. + */ +typedef int (*spdk_jsonrpc_client_response_parser)( + void *parser_ctx, + const struct spdk_json_val *result); + +/** + * Create a JSON-RPC server listening on the required address. + * + * \param domain Socket family. + * \param protocol Protocol. + * \param listen_addr Listening address. + * \param addrlen Length of address. + * \param handle_request User callback to handle a JSON-RPC request. + * + * \return a pointer to the JSON-RPC server. + */ +struct spdk_jsonrpc_server *spdk_jsonrpc_server_listen(int domain, int protocol, + struct sockaddr *listen_addr, socklen_t addrlen, spdk_jsonrpc_handle_request_fn handle_request); + +/** + * Poll the requests to the JSON-RPC server. + * + * This function does accept, receive, handle the requests and reply to them. + * + * \param server JSON-RPC server. + * + * \return 0 on success. + */ +int spdk_jsonrpc_server_poll(struct spdk_jsonrpc_server *server); + +/** + * Shutdown the JSON-RPC server. + * + * \param server JSON-RPC server. + */ +void spdk_jsonrpc_server_shutdown(struct spdk_jsonrpc_server *server); + +/** + * Return connection associated to \c request + * + * \param request JSON-RPC request + * \return JSON RPC server connection + */ +struct spdk_jsonrpc_server_conn *spdk_jsonrpc_get_conn(struct spdk_jsonrpc_request *request); + +/** + * Add callback called when connection is closed. Pair of \c cb and \c ctx must be unique or error is returned. + * Registered callback is called only once and there is no need to call \c spdk_jsonrpc_conn_del_close_cb + * inside from \c cb. + * + * \note Current implementation allow only one close callback per connection. + * + * \param conn JSON RPC server connection + * \param cb calback function + * \param ctx argument for \c cb + * + * \return 0 on success, or negated errno code: + * -EEXIST \c cb and \c ctx is already registered + * -ENOTCONN Callback can't be added because connection is closed. + * -ENOSPC no more space to register callback. + */ +int spdk_jsonrpc_conn_add_close_cb(struct spdk_jsonrpc_server_conn *conn, + spdk_jsonrpc_conn_closed_fn cb, void *ctx); + +/** + * Remove registered close callback. + * + * \param conn JSON RPC server connection + * \param cb calback function + * \param ctx argument for \c cb + * + * \return 0 on success, or negated errno code: + * -ENOENT \c cb and \c ctx pair is not registered + */ +int spdk_jsonrpc_conn_del_close_cb(struct spdk_jsonrpc_server_conn *conn, + spdk_jsonrpc_conn_closed_fn cb, void *ctx); + +/** + * Begin building a response to a JSON-RPC request. + * + * If this function returns non-NULL, the user must call spdk_jsonrpc_end_result() + * on the request after writing the desired response object to the spdk_json_write_ctx. + * + * \param request JSON-RPC request to respond to. + + * \return Non-NULL pointer to JSON write context to write the response object to. + */ +struct spdk_json_write_ctx *spdk_jsonrpc_begin_result(struct spdk_jsonrpc_request *request); + +/** + * Complete and send a JSON-RPC response. + * + * \param request Request to complete the response for. + * \param w JSON write context returned from spdk_jsonrpc_begin_result(). + */ +void spdk_jsonrpc_end_result(struct spdk_jsonrpc_request *request, struct spdk_json_write_ctx *w); + +/** + * Send an error response to a JSON-RPC request. + * + * This is shorthand for spdk_jsonrpc_begin_result() + spdk_jsonrpc_end_result() + * with an error object. + * + * \param request JSON-RPC request to respond to. + * \param error_code Integer error code to return (may be one of the + * SPDK_JSONRPC_ERROR_ errors, or a custom error code). + * \param msg String error message to return. + */ +void spdk_jsonrpc_send_error_response(struct spdk_jsonrpc_request *request, + int error_code, const char *msg); + +/** + * Send an error response to a JSON-RPC request. + * + * This is shorthand for printf() + spdk_jsonrpc_send_error_response(). + * + * \param request JSON-RPC request to respond to. + * \param error_code Integer error code to return (may be one of the + * SPDK_JSONRPC_ERROR_ errors, or a custom error code). + * \param fmt Printf-like format string. + */ +void spdk_jsonrpc_send_error_response_fmt(struct spdk_jsonrpc_request *request, + int error_code, const char *fmt, ...) __attribute__((format(printf, 3, 4))); + +/** + * Begin building a JSON-RPC request. + * + * If this function returns non-NULL, the user must call spdk_jsonrpc_end_request() + * on the request after writing the desired request object to the spdk_json_write_ctx. + * + * \param request JSON-RPC request. + * \param id ID index for the request. If < 0 skip ID. + * \param method Name of the RPC method. If NULL caller will have to create "method" key. + * + * \return JSON write context or NULL in case of error. + */ +struct spdk_json_write_ctx * +spdk_jsonrpc_begin_request(struct spdk_jsonrpc_client_request *request, int32_t id, + const char *method); + +/** + * Complete a JSON-RPC request. + * + * \param request JSON-RPC request. + * \param w JSON write context returned from spdk_jsonrpc_begin_request(). + */ +void spdk_jsonrpc_end_request(struct spdk_jsonrpc_client_request *request, + struct spdk_json_write_ctx *w); + +/** + * Connect to the specified RPC server. + * + * \param addr RPC socket address. + * \param addr_family Protocol families of address. + * + * \return JSON-RPC client on success, NULL on failure and errno set to indicate + * the cause of the error. + */ +struct spdk_jsonrpc_client *spdk_jsonrpc_client_connect(const char *addr, int addr_family); + +/** + * Close JSON-RPC connection and free \c client object. + * + * This function is not thread safe and should only be called from one thread at + * a time while no other threads are actively \c client object. + * + * \param client JSON-RPC client. + */ +void spdk_jsonrpc_client_close(struct spdk_jsonrpc_client *client); + +/** + * Create one JSON-RPC request. Returned request must be passed to + * \c spdk_jsonrpc_client_send_request when done or to \c spdk_jsonrpc_client_free_request + * if discaded. + * + * \return pointer to JSON-RPC request object. + */ +struct spdk_jsonrpc_client_request *spdk_jsonrpc_client_create_request(void); + +/** + * Free one JSON-RPC request. + * + * \param req pointer to JSON-RPC request object. + */ +void spdk_jsonrpc_client_free_request(struct spdk_jsonrpc_client_request *req); + +/** + * Send the JSON-RPC request in JSON-RPC client. Library takes ownership of the + * request object and will free it when done. + * + * This function is not thread safe and should only be called from one thread at + * a time while no other threads are actively \c client object. + * + * \param client JSON-RPC client. + * \param req JSON-RPC request. + * + * \return 0 on success or negative error code. + * -ENOSPC - no space left to queue another request. Try again later. + */ +int spdk_jsonrpc_client_send_request(struct spdk_jsonrpc_client *client, + struct spdk_jsonrpc_client_request *req); + +/** + * Poll the JSON-RPC client. When any response is available use + * \c spdk_jsonrpc_client_get_response to retrieve it. + * + * This function is not thread safe and should only be called from one thread at + * a time while no other threads are actively \c client object. + * + * \param client JSON-RPC client. + * \param timeout Time in miliseconds this function will block. -1 block forever, 0 don't block. + * + * \return If no error occurred, this function returns a non-negative number indicating how + * many ready responses can be retrieved. If an error occurred, this function returns one of + * the following negated errno values: + * -ENOTCONN - not connected yet. Try again later. + * -EINVAL - response is detected to be invalid. Client connection should be terminated. + * -ENOSPC - no space to receive another response. User need to retrieve waiting responses. + * -EIO - connection terminated (or other critical error). Client connection should be terminated. + * -ENOMEM - out of memory + */ +int spdk_jsonrpc_client_poll(struct spdk_jsonrpc_client *client, int timeout); + +/** + * Return JSON RPC response object representing next available response from client connection. + * Returned pointer must be freed using \c spdk_jsonrpc_client_free_response + * + * This function is not thread safe and should only be called from one thread at + * a time while no other threads are actively \c client object. + * + * \param client + * \return pointer to JSON RPC response object or NULL if no response available. + */ +struct spdk_jsonrpc_client_response *spdk_jsonrpc_client_get_response(struct spdk_jsonrpc_client + *client); + +/** + * Free response object obtained from \c spdk_jsonrpc_client_get_response + * + * \param resp pointer to JSON RPC response object. If NULL no operation is performed. + */ +void spdk_jsonrpc_client_free_response(struct spdk_jsonrpc_client_response *resp); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/likely.h b/src/spdk/include/spdk/likely.h new file mode 100644 index 000000000..034a9b98b --- /dev/null +++ b/src/spdk/include/spdk/likely.h @@ -0,0 +1,46 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Likely/unlikely branch prediction macros + */ + +#ifndef SPDK_LIKELY_H +#define SPDK_LIKELY_H + +#include "spdk/stdinc.h" + +#define spdk_unlikely(cond) __builtin_expect((cond), 0) +#define spdk_likely(cond) __builtin_expect(!!(cond), 1) + +#endif diff --git a/src/spdk/include/spdk/log.h b/src/spdk/include/spdk/log.h new file mode 100644 index 000000000..92c899ff1 --- /dev/null +++ b/src/spdk/include/spdk/log.h @@ -0,0 +1,224 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * Logging interfaces + */ + +#ifndef SPDK_LOG_H +#define SPDK_LOG_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * for passing user-provided log call + * + * \param level Log level threshold. + * \param file Name of the current source file. + * \param line Current source file line. + * \param func Current source function name. + * \param format Format string to the message. + * \param args Additional arguments for format string. + */ +typedef void logfunc(int level, const char *file, const int line, + const char *func, const char *format, va_list args); + +/** + * Initialize the logging module. Messages prior + * to this call will be dropped. + */ +void spdk_log_open(logfunc *logf); + +/** + * Close the currently active log. Messages after this call + * will be dropped. + */ +void spdk_log_close(void); + +enum spdk_log_level { + /** All messages will be suppressed. */ + SPDK_LOG_DISABLED = -1, + SPDK_LOG_ERROR, + SPDK_LOG_WARN, + SPDK_LOG_NOTICE, + SPDK_LOG_INFO, + SPDK_LOG_DEBUG, +}; + +/** + * Set the log level threshold to log messages. Messages with a higher + * level than this are ignored. + * + * \param level Log level threshold to set to log messages. + */ +void spdk_log_set_level(enum spdk_log_level level); + +/** + * Get the current log level threshold. + * + * \return the current log level threshold. + */ +enum spdk_log_level spdk_log_get_level(void); + +/** + * Set the log level threshold to include stack trace in log messages. + * Messages with a higher level than this will not contain stack trace. You + * can use \c SPDK_LOG_DISABLED to completely disable stack trace printing + * even if it is supported. + * + * \note This function has no effect if SPDK is built without stack trace + * printing support. + * + * \param level Log level threshold for stacktrace. + */ +void spdk_log_set_backtrace_level(enum spdk_log_level level); + +/** + * Get the current log level threshold for showing stack trace in log message. + * + * \return the current log level threshold for stack trace. + */ +enum spdk_log_level spdk_log_get_backtrace_level(void); + +/** + * Set the current log level threshold for printing to stderr. + * Messages with a level less than or equal to this level + * are also printed to stderr. You can use \c SPDK_LOG_DISABLED to completely + * suppress log printing. + * + * \param level Log level threshold for printing to stderr. + */ +void spdk_log_set_print_level(enum spdk_log_level level); + +/** + * Get the current log level print threshold. + * + * \return the current log level print threshold. + */ +enum spdk_log_level spdk_log_get_print_level(void); + +#ifdef DEBUG +#define SPDK_DEBUGLOG_FLAG_ENABLED(name) spdk_log_get_flag(name) +#else +#define SPDK_DEBUGLOG_FLAG_ENABLED(name) false +#endif + +#define SPDK_NOTICELOG(...) \ + spdk_log(SPDK_LOG_NOTICE, __FILE__, __LINE__, __func__, __VA_ARGS__) +#define SPDK_WARNLOG(...) \ + spdk_log(SPDK_LOG_WARN, __FILE__, __LINE__, __func__, __VA_ARGS__) +#define SPDK_ERRLOG(...) \ + spdk_log(SPDK_LOG_ERROR, __FILE__, __LINE__, __func__, __VA_ARGS__) +#define SPDK_PRINTF(...) \ + spdk_log(SPDK_LOG_NOTICE, NULL, -1, NULL, __VA_ARGS__) + +/** + * Write messages to the log file. If \c level is set to \c SPDK_LOG_DISABLED, + * this log message won't be written. + * + * \param level Log level threshold. + * \param file Name of the current source file. + * \param line Current source line number. + * \param func Current source function name. + * \param format Format string to the message. + */ +void spdk_log(enum spdk_log_level level, const char *file, const int line, const char *func, + const char *format, ...) __attribute__((__format__(__printf__, 5, 6))); + +/** + * Same as spdk_log except that instead of being called with variable number of + * arguments it is called with an argument list as defined in stdarg.h + * + * \param level Log level threshold. + * \param file Name of the current source file. + * \param line Current source line number. + * \param func Current source function name. + * \param format Format string to the message. + * \param ap printf arguments + */ +void spdk_vlog(enum spdk_log_level level, const char *file, const int line, const char *func, + const char *format, va_list ap); + +/** + * Log the contents of a raw buffer to a file. + * + * \param fp File to hold the log. + * \param label Label to print to the file. + * \param buf Buffer that holds the log information. + * \param len Length of buffer to dump. + */ +void spdk_log_dump(FILE *fp, const char *label, const void *buf, size_t len); + +/** + * Check whether the log flag exists and is enabled. + * + * \return true if enabled, or false otherwise. + */ +bool spdk_log_get_flag(const char *flag); + +/** + * Enable the log flag. + * + * \param flag Log flag to be enabled. + * + * \return 0 on success, -1 on failure. + */ +int spdk_log_set_flag(const char *flag); + +/** + * Clear a log flag. + * + * \param flag Log flag to clear. + * + * \return 0 on success, -1 on failure. + */ +int spdk_log_clear_flag(const char *flag); + +/** + * Show all the log flags and their usage. + * + * \param f File to hold all the flags' information. + * \param log_arg Command line option to set/enable the log flag. + */ +void spdk_log_usage(FILE *f, const char *log_arg); + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_LOG_H */ diff --git a/src/spdk/include/spdk/lvol.h b/src/spdk/include/spdk/lvol.h new file mode 100644 index 000000000..ca271a638 --- /dev/null +++ b/src/spdk/include/spdk/lvol.h @@ -0,0 +1,299 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Logical Volume Interface + */ + +#ifndef SPDK_LVOL_H +#define SPDK_LVOL_H + +#include "spdk/stdinc.h" +#include "spdk/blob.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct spdk_bs_dev; +struct spdk_lvol_store; +struct spdk_lvol; + +enum lvol_clear_method { + LVOL_CLEAR_WITH_DEFAULT = BLOB_CLEAR_WITH_DEFAULT, + LVOL_CLEAR_WITH_NONE = BLOB_CLEAR_WITH_NONE, + LVOL_CLEAR_WITH_UNMAP = BLOB_CLEAR_WITH_UNMAP, + LVOL_CLEAR_WITH_WRITE_ZEROES = BLOB_CLEAR_WITH_WRITE_ZEROES, +}; + +enum lvs_clear_method { + LVS_CLEAR_WITH_UNMAP = BS_CLEAR_WITH_UNMAP, + LVS_CLEAR_WITH_WRITE_ZEROES = BS_CLEAR_WITH_WRITE_ZEROES, + LVS_CLEAR_WITH_NONE = BS_CLEAR_WITH_NONE, +}; + +/* Must include null terminator. */ +#define SPDK_LVS_NAME_MAX 64 +#define SPDK_LVOL_NAME_MAX 64 + +/** + * Parameters for lvolstore initialization. + */ +struct spdk_lvs_opts { + uint32_t cluster_sz; + enum lvs_clear_method clear_method; + char name[SPDK_LVS_NAME_MAX]; +}; + +/** + * Initialize an spdk_lvs_opts structure to the defaults. + * + * \param opts Pointer to the spdk_lvs_opts structure to initialize. + */ +void spdk_lvs_opts_init(struct spdk_lvs_opts *opts); + +/** + * Callback definition for lvolstore operations, including handle to lvs. + * + * \param cb_arg Custom arguments + * \param lvol_store Handle to lvol_store or NULL when lvserrno is set + * \param lvserrno Error + */ +typedef void (*spdk_lvs_op_with_handle_complete)(void *cb_arg, struct spdk_lvol_store *lvol_store, + int lvserrno); + +/** + * Callback definition for lvolstore operations without handle. + * + * \param cb_arg Custom arguments + * \param lvserrno Error + */ +typedef void (*spdk_lvs_op_complete)(void *cb_arg, int lvserrno); + + +/** + * Callback definition for lvol operations with handle to lvol. + * + * \param cb_arg Custom arguments + * \param lvol Handle to lvol or NULL when lvserrno is set + * \param lvolerrno Error + */ +typedef void (*spdk_lvol_op_with_handle_complete)(void *cb_arg, struct spdk_lvol *lvol, + int lvolerrno); + +/** + * Callback definition for lvol operations without handle to lvol. + * + * \param cb_arg Custom arguments + * \param lvolerrno Error + */ +typedef void (*spdk_lvol_op_complete)(void *cb_arg, int lvolerrno); + +/** + * Initialize lvolstore on given bs_bdev. + * + * \param bs_dev This is created on the given bdev by using spdk_bdev_create_bs_dev() + * beforehand. + * \param o Options for lvolstore. + * \param cb_fn Completion callback. + * \param cb_arg Completion callback custom arguments. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_lvs_init(struct spdk_bs_dev *bs_dev, struct spdk_lvs_opts *o, + spdk_lvs_op_with_handle_complete cb_fn, void *cb_arg); + +/** + * Rename the given lvolstore. + * + * \param lvs Pointer to lvolstore. + * \param new_name New name of lvs. + * \param cb_fn Completion callback. + * \param cb_arg Completion callback custom arguments. + */ +void spdk_lvs_rename(struct spdk_lvol_store *lvs, const char *new_name, + spdk_lvs_op_complete cb_fn, void *cb_arg); + +/** + * Unload lvolstore. + * + * All lvols have to be closed beforehand, when doing unload. + * + * \param lvol_store Handle to lvolstore. + * \param cb_fn Completion callback. + * \param cb_arg Completion callback custom arguments. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_lvs_unload(struct spdk_lvol_store *lvol_store, + spdk_lvs_op_complete cb_fn, void *cb_arg); + +/** + * Destroy lvolstore. + * + * All lvols have to be closed beforehand, when doing destroy. + * + * \param lvol_store Handle to lvolstore. + * \param cb_fn Completion callback. + * \param cb_arg Completion callback custom arguments. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_lvs_destroy(struct spdk_lvol_store *lvol_store, + spdk_lvs_op_complete cb_fn, void *cb_arg); + +/** + * Create lvol on given lvolstore with specified size. + * + * \param lvs Handle to lvolstore. + * \param name Name of lvol. + * \param sz size of lvol in bytes. + * \param thin_provisioned Enables thin provisioning. + * \param clear_method Changes default data clusters clear method + * \param cb_fn Completion callback. + * \param cb_arg Completion callback custom arguments. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_lvol_create(struct spdk_lvol_store *lvs, const char *name, uint64_t sz, + bool thin_provisioned, enum lvol_clear_method clear_method, + spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg); +/** + * Create snapshot of given lvol. + * + * \param lvol Handle to lvol. + * \param snapshot_name Name of created snapshot. + * \param cb_fn Completion callback. + * \param cb_arg Completion callback custom arguments. + */ +void spdk_lvol_create_snapshot(struct spdk_lvol *lvol, const char *snapshot_name, + spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg); + +/** + * Create clone of given snapshot. + * + * \param lvol Handle to lvol snapshot. + * \param clone_name Name of created clone. + * \param cb_fn Completion callback. + * \param cb_arg Completion callback custom arguments. + */ +void spdk_lvol_create_clone(struct spdk_lvol *lvol, const char *clone_name, + spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg); + +/** + * Rename lvol with new_name. + * + * \param lvol Handle to lvol. + * \param new_name new name for lvol. + * \param cb_fn Completion callback. + * \param cb_arg Completion callback custom arguments. + */ +void +spdk_lvol_rename(struct spdk_lvol *lvol, const char *new_name, + spdk_lvol_op_complete cb_fn, void *cb_arg); + +/** + * \brief Returns if it is possible to delete an lvol (i.e. lvol is not a snapshot that have at least one clone). + * \param lvol Handle to lvol + */ +bool spdk_lvol_deletable(struct spdk_lvol *lvol); + +/** + * Close lvol and remove information about lvol from its lvolstore. + * + * \param lvol Handle to lvol. + * \param cb_fn Completion callback. + * \param cb_arg Completion callback custom arguments. + */ +void spdk_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg); + +/** + * Close lvol, but information is kept on lvolstore. + * + * \param lvol Handle to lvol. + * \param cb_fn Completion callback. + * \param cb_arg Completion callback custom arguments. + */ +void spdk_lvol_close(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg); + +/** + * Get I/O channel of bdev associated with specified lvol. + * + * \param lvol Handle to lvol. + * + * \return a pointer to the I/O channel. + */ +struct spdk_io_channel *spdk_lvol_get_io_channel(struct spdk_lvol *lvol); + +/** + * Load lvolstore from the given blobstore device. + * + * \param bs_dev Pointer to the blobstore device. + * \param cb_fn Completion callback. + * \param cb_arg Completion callback custom arguments. + */ +void spdk_lvs_load(struct spdk_bs_dev *bs_dev, spdk_lvs_op_with_handle_complete cb_fn, + void *cb_arg); + +/** + * Open a lvol. + * + * \param lvol Handle to lvol. + * \param cb_fn Completion callback. + * \param cb_arg Completion callback custom arguments. + */ +void spdk_lvol_open(struct spdk_lvol *lvol, spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg); + +/** + * Inflate lvol + * + * \param lvol Handle to lvol + * \param cb_fn Completion callback + * \param cb_arg Completion callback custom arguments + */ +void spdk_lvol_inflate(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg); + +/** + * Decouple parent of lvol + * + * \param lvol Handle to lvol + * \param cb_fn Completion callback + * \param cb_arg Completion callback custom arguments + */ +void spdk_lvol_decouple_parent(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg); + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_LVOL_H */ diff --git a/src/spdk/include/spdk/memory.h b/src/spdk/include/spdk/memory.h new file mode 100644 index 000000000..a2cb19669 --- /dev/null +++ b/src/spdk/include/spdk/memory.h @@ -0,0 +1,60 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_MEMORY_H +#define SPDK_MEMORY_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define SHIFT_2MB 21 /* (1 << 21) == 2MB */ +#define VALUE_2MB (1ULL << SHIFT_2MB) +#define MASK_2MB (VALUE_2MB - 1) + +#define SHIFT_4KB 12 /* (1 << 12) == 4KB */ +#define VALUE_4KB (1ULL << SHIFT_4KB) +#define MASK_4KB (VALUE_4KB - 1) + +#define _2MB_OFFSET(ptr) (((uintptr_t)(ptr)) & MASK_2MB) +#define _2MB_PAGE(ptr) FLOOR_2MB((uintptr_t)(ptr)) +#define FLOOR_2MB(x) (((uintptr_t)(x)) & ~MASK_2MB) +#define CEIL_2MB(x) FLOOR_2MB(((uintptr_t)(x)) + VALUE_2MB - 1) + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_MEMORY_H */ diff --git a/src/spdk/include/spdk/mmio.h b/src/spdk/include/spdk/mmio.h new file mode 100644 index 000000000..68b16605f --- /dev/null +++ b/src/spdk/include/spdk/mmio.h @@ -0,0 +1,139 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Memory-mapped I/O utility functions + */ + +#ifndef SPDK_MMIO_H +#define SPDK_MMIO_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "spdk/barrier.h" + +#ifdef __x86_64__ +#define SPDK_MMIO_64BIT 1 /* Can do atomic 64-bit memory read/write (over PCIe) */ +#else +#define SPDK_MMIO_64BIT 0 +#endif + +static inline uint8_t +spdk_mmio_read_1(const volatile uint8_t *addr) +{ + spdk_compiler_barrier(); + return *addr; +} + +static inline void +spdk_mmio_write_1(volatile uint8_t *addr, uint8_t val) +{ + spdk_compiler_barrier(); + *addr = val; +} + +static inline uint16_t +spdk_mmio_read_2(const volatile uint16_t *addr) +{ + spdk_compiler_barrier(); + return *addr; +} + +static inline void +spdk_mmio_write_2(volatile uint16_t *addr, uint16_t val) +{ + spdk_compiler_barrier(); + *addr = val; +} + +static inline uint32_t +spdk_mmio_read_4(const volatile uint32_t *addr) +{ + spdk_compiler_barrier(); + return *addr; +} + +static inline void +spdk_mmio_write_4(volatile uint32_t *addr, uint32_t val) +{ + spdk_compiler_barrier(); + *addr = val; +} + +static inline uint64_t +spdk_mmio_read_8(volatile uint64_t *addr) +{ + uint64_t val; + volatile uint32_t *addr32 = (volatile uint32_t *)addr; + + spdk_compiler_barrier(); + + if (SPDK_MMIO_64BIT) { + val = *addr; + } else { + /* + * Read lower 4 bytes before upper 4 bytes. + * This particular order is required by I/OAT. + * If the other order is required, use a pair of spdk_mmio_read_4() calls. + */ + val = addr32[0]; + val |= (uint64_t)addr32[1] << 32; + } + + return val; +} + +static inline void +spdk_mmio_write_8(volatile uint64_t *addr, uint64_t val) +{ + volatile uint32_t *addr32 = (volatile uint32_t *)addr; + + spdk_compiler_barrier(); + + if (SPDK_MMIO_64BIT) { + *addr = val; + } else { + addr32[0] = (uint32_t)val; + addr32[1] = (uint32_t)(val >> 32); + } +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/nbd.h b/src/spdk/include/spdk/nbd.h new file mode 100644 index 000000000..be57c09cd --- /dev/null +++ b/src/spdk/include/spdk/nbd.h @@ -0,0 +1,102 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Network block device layer + */ + +#ifndef SPDK_NBD_H_ +#define SPDK_NBD_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +struct spdk_bdev; +struct spdk_nbd_disk; +struct spdk_json_write_ctx; + +/** + * Initialize the network block device layer. + * + * \return 0 on success. + */ +int spdk_nbd_init(void); + +/** + * Stop and close all the running network block devices. + */ +void spdk_nbd_fini(void); + +/** + * Called when an NBD device has been started. + * On success, rc is assigned 0; On failure, rc is assigned negated errno. + */ +typedef void (*spdk_nbd_start_cb)(void *cb_arg, struct spdk_nbd_disk *nbd, + int rc); + +/** + * Start a network block device backed by the bdev. + * + * \param bdev_name Name of bdev exposed as a network block device. + * \param nbd_path Path to the registered network block device. + * \param cb_fn Callback to be always called. + * \param cb_arg Passed to cb_fn. + */ +void spdk_nbd_start(const char *bdev_name, const char *nbd_path, + spdk_nbd_start_cb cb_fn, void *cb_arg); + +/** + * Stop the running network block device safely. + * + * \param nbd A pointer to the network block device to stop. + */ +void spdk_nbd_stop(struct spdk_nbd_disk *nbd); + +/** + * Get the local filesystem path used for the network block device. + */ +const char *spdk_nbd_get_path(struct spdk_nbd_disk *nbd); + +/** + * Write NBD subsystem configuration into provided JSON context. + * + * \param w JSON write context + */ +void spdk_nbd_write_config_json(struct spdk_json_write_ctx *w); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/net.h b/src/spdk/include/spdk/net.h new file mode 100644 index 000000000..e49322302 --- /dev/null +++ b/src/spdk/include/spdk/net.h @@ -0,0 +1,120 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Net framework abstraction layer + */ + +#ifndef SPDK_NET_H +#define SPDK_NET_H + +#include "spdk/stdinc.h" + +#include "spdk/queue.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct spdk_sock; + +struct spdk_net_framework { + const char *name; + + void (*init)(void); + void (*fini)(void); + + STAILQ_ENTRY(spdk_net_framework) link; +}; + +/** + * Register a net framework. + * + * \param frame Net framework to register. + */ +void spdk_net_framework_register(struct spdk_net_framework *frame); + +#define SPDK_NET_FRAMEWORK_REGISTER(name, frame) \ +static void __attribute__((constructor)) net_framework_register_##name(void) \ +{ \ + spdk_net_framework_register(frame); \ +} + +/** + * Initialize the network interfaces by getting information through netlink socket. + * + * \return 0 on success, 1 on failure. + */ +int spdk_interface_init(void); + +/** + * Destroy the network interfaces. + */ +void spdk_interface_destroy(void); + +/** + * Net framework initialization callback. + * + * \param cb_arg Callback argument. + * \param rc 0 if net framework initialized successfully or negative errno if it failed. + */ +typedef void (*spdk_net_init_cb)(void *cb_arg, int rc); + +/** + * Net framework finish callback. + * + * \param cb_arg Callback argument. + */ +typedef void (*spdk_net_fini_cb)(void *cb_arg); + +void spdk_net_framework_init_next(int rc); + +/** + * Start all registered frameworks. + * + * \return 0 on success. + */ +void spdk_net_framework_start(spdk_net_init_cb cb_fn, void *cb_arg); + +void spdk_net_framework_fini_next(void); + +/** + * Stop all registered frameworks. + */ +void spdk_net_framework_fini(spdk_net_fini_cb cb_fn, void *cb_arg); + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_NET_H */ diff --git a/src/spdk/include/spdk/notify.h b/src/spdk/include/spdk/notify.h new file mode 100644 index 000000000..fa9746503 --- /dev/null +++ b/src/spdk/include/spdk/notify.h @@ -0,0 +1,126 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_NOTIFY_H +#define SPDK_NOTIFY_H + +#include "spdk/stdinc.h" +#include "spdk/json.h" +#include "spdk/queue.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Opaque event type. + */ +struct spdk_notify_type; + +typedef int (*spdk_notify_foreach_type_cb)(const struct spdk_notify_type *type, void *ctx); + +#define SPDK_NOTIFY_MAX_NAME_SIZE 128 +#define SPDK_NOTIFY_MAX_CTX_SIZE 128 + +struct spdk_notify_event { + char type[SPDK_NOTIFY_MAX_NAME_SIZE]; + char ctx[SPDK_NOTIFY_MAX_CTX_SIZE]; +}; + +/** + * Callback type for event enumeration. + * + * \param idx Event index + * \param event Event data + * \param ctx User context + * \return Non zero to break iteration. + */ +typedef int (*spdk_notify_foreach_event_cb)(uint64_t idx, const struct spdk_notify_event *event, + void *ctx); + +/** + * Register \c type as new notification type. + * + * \note This function is thread safe. + * + * \param type New notification type to register. + * \return registered notification type or NULL on failure. + */ +struct spdk_notify_type *spdk_notify_type_register(const char *type); + +/** + * Return name of the notification type. + * + * \param type Notification type we are talking about. + * \return Name of notification type. + */ +const char *spdk_notify_type_get_name(const struct spdk_notify_type *type); + +/** + * Call cb_fn for all event types. + * + * \note Whole function call is under lock so user callback should not sleep. + * \param cb_fn + * \param ctx + */ +void spdk_notify_foreach_type(spdk_notify_foreach_type_cb cb_fn, void *ctx); + +/** + * Send given notification. + * + * \param type Notification type + * \param ctx Notification context + * + * \return Event index. + */ +uint64_t spdk_notify_send(const char *type, const char *ctx); + +/** + * Call cb_fn with events from given range. + * + * \note Whole function call is under lock so user callback should not sleep. + * + * \param start_idx First event index + * \param cb_fn User callback function. Return non-zero to break iteration. + * \param max Maximum number of invocations of user calback function. + * \param ctx User context + * \return Number of user callback invocations + */ +uint64_t spdk_notify_foreach_event(uint64_t start_idx, uint64_t max, + spdk_notify_foreach_event_cb cb_fn, void *ctx); + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_NOTIFY_H */ diff --git a/src/spdk/include/spdk/nvme.h b/src/spdk/include/spdk/nvme.h new file mode 100644 index 000000000..3f28f9e24 --- /dev/null +++ b/src/spdk/include/spdk/nvme.h @@ -0,0 +1,3236 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * NVMe driver public API + */ + +#ifndef SPDK_NVME_H +#define SPDK_NVME_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "spdk/env.h" +#include "spdk/nvme_spec.h" +#include "spdk/nvmf_spec.h" + +#define SPDK_NVME_TRANSPORT_NAME_FC "FC" +#define SPDK_NVME_TRANSPORT_NAME_PCIE "PCIE" +#define SPDK_NVME_TRANSPORT_NAME_RDMA "RDMA" +#define SPDK_NVME_TRANSPORT_NAME_TCP "TCP" + +#define SPDK_NVMF_PRIORITY_MAX_LEN 4 + +/** + * Opaque handle to a controller. Returned by spdk_nvme_probe()'s attach_cb. + */ +struct spdk_nvme_ctrlr; + +/** + * NVMe controller initialization options. + * + * A pointer to this structure will be provided for each probe callback from spdk_nvme_probe() to + * allow the user to request non-default options, and the actual options enabled on the controller + * will be provided during the attach callback. + */ +struct spdk_nvme_ctrlr_opts { + /** + * Number of I/O queues to request (used to set Number of Queues feature) + */ + uint32_t num_io_queues; + + /** + * Enable submission queue in controller memory buffer + */ + bool use_cmb_sqs; + + /** + * Don't initiate shutdown processing + */ + bool no_shn_notification; + + /** + * Type of arbitration mechanism + */ + enum spdk_nvme_cc_ams arb_mechanism; + + /** + * Maximum number of commands that the controller may launch at one time. The + * value is expressed as a power of two, valid values are from 0-7, and 7 means + * unlimited. + */ + uint8_t arbitration_burst; + + /** + * Number of commands that may be executed from the low priority queue in each + * arbitration round. This field is only valid when arb_mechanism is set to + * SPDK_NVME_CC_AMS_WRR (weighted round robin). + */ + uint8_t low_priority_weight; + + /** + * Number of commands that may be executed from the medium priority queue in each + * arbitration round. This field is only valid when arb_mechanism is set to + * SPDK_NVME_CC_AMS_WRR (weighted round robin). + */ + uint8_t medium_priority_weight; + + /** + * Number of commands that may be executed from the high priority queue in each + * arbitration round. This field is only valid when arb_mechanism is set to + * SPDK_NVME_CC_AMS_WRR (weighted round robin). + */ + uint8_t high_priority_weight; + + /** + * Keep alive timeout in milliseconds (0 = disabled). + * + * The NVMe library will set the Keep Alive Timer feature to this value and automatically + * send Keep Alive commands as needed. The library user must call + * spdk_nvme_ctrlr_process_admin_completions() periodically to ensure Keep Alive commands + * are sent. + */ + uint32_t keep_alive_timeout_ms; + + /** + * Specify the retry number when there is issue with the transport + */ + uint8_t transport_retry_count; + + /** + * The queue depth of each NVMe I/O queue. + */ + uint32_t io_queue_size; + + /** + * The host NQN to use when connecting to NVMe over Fabrics controllers. + * + * Unused for local PCIe-attached NVMe devices. + */ + char hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; + + /** + * The number of requests to allocate for each NVMe I/O queue. + * + * This should be at least as large as io_queue_size. + * + * A single I/O may allocate more than one request, since splitting may be necessary to + * conform to the device's maximum transfer size, PRP list compatibility requirements, + * or driver-assisted striping. + */ + uint32_t io_queue_requests; + + /** + * Source address for NVMe-oF connections. + * Set src_addr and src_svcid to empty strings if no source address should be + * specified. + */ + char src_addr[SPDK_NVMF_TRADDR_MAX_LEN + 1]; + + /** + * Source service ID (port) for NVMe-oF connections. + * Set src_addr and src_svcid to empty strings if no source address should be + * specified. + */ + char src_svcid[SPDK_NVMF_TRSVCID_MAX_LEN + 1]; + + /** + * The host identifier to use when connecting to controllers with 64-bit host ID support. + * + * Set to all zeroes to specify that no host ID should be provided to the controller. + */ + uint8_t host_id[8]; + + /** + * The host identifier to use when connecting to controllers with extended (128-bit) host ID support. + * + * Set to all zeroes to specify that no host ID should be provided to the controller. + */ + uint8_t extended_host_id[16]; + + /** + * The I/O command set to select. + * + * If the requested command set is not supported, the controller + * initialization process will not proceed. By default, the NVM + * command set is used. + */ + enum spdk_nvme_cc_css command_set; + + /** + * Admin commands timeout in milliseconds (0 = no timeout). + * + * The timeout value is used for admin commands submitted internally + * by the nvme driver during initialization, before the user is able + * to call spdk_nvme_ctrlr_register_timeout_callback(). By default, + * this is set to 120 seconds, users can change it in the probing + * callback. + */ + uint32_t admin_timeout_ms; + + /** + * It is used for TCP transport. + * + * Set to true, means having header digest for the header in the NVMe/TCP PDU + */ + bool header_digest; + + /** + * It is used for TCP transport. + * + * Set to true, means having data digest for the data in the NVMe/TCP PDU + */ + bool data_digest; + + /** + * Disable logging of requests that are completed with error status. + * + * Defaults to 'false' (errors are logged). + */ + bool disable_error_logging; + + /** + * It is used for RDMA transport + * Specify the transport ACK timeout. The value should be in range 0-31 where 0 means + * use driver-specific default value. The value is applied to each RDMA qpair + * and affects the time that qpair waits for transport layer acknowledgement + * until it retransmits a packet. The value should be chosen empirically + * to meet the needs of a particular application. A low value means less time + * the qpair waits for ACK which can increase the number of retransmissions. + * A large value can increase the time the connection is closed. + * The value of ACK timeout is calculated according to the formula + * 4.096 * 2^(transport_ack_timeout) usec. + */ + uint8_t transport_ack_timeout; + + /** + * The queue depth of NVMe Admin queue. + */ + uint16_t admin_queue_size; + + /** + * The size of spdk_nvme_ctrlr_opts according to the caller of this library is used for ABI + * compatibility. The library uses this field to know how many fields in this + * structure are valid. And the library will populate any remaining fields with default values. + */ + size_t opts_size; +}; + +/** + * Indicate whether a ctrlr handle is associated with a Discovery controller. + * + * \param ctrlr Opaque handle to NVMe controller. + * + * \return true if a discovery controller, else false. + */ +bool spdk_nvme_ctrlr_is_discovery(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Get the default options for the creation of a specific NVMe controller. + * + * \param[out] opts Will be filled with the default option. + * \param opts_size Must be set to sizeof(struct spdk_nvme_ctrlr_opts). + */ +void spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, + size_t opts_size); + +/** + * Reason for qpair disconnect at the transport layer. + * + * NONE implies that the qpair is still connected while UNKNOWN means that the + * qpair is disconnected, but the cause was not apparent. + */ +enum spdk_nvme_qp_failure_reason { + SPDK_NVME_QPAIR_FAILURE_NONE = 0, + SPDK_NVME_QPAIR_FAILURE_LOCAL, + SPDK_NVME_QPAIR_FAILURE_REMOTE, + SPDK_NVME_QPAIR_FAILURE_UNKNOWN, +}; + +typedef enum spdk_nvme_qp_failure_reason spdk_nvme_qp_failure_reason; + +/** + * NVMe library transports + * + * NOTE: These are mapped directly to the NVMe over Fabrics TRTYPE values, except for PCIe, + * which is a special case since NVMe over Fabrics does not define a TRTYPE for local PCIe. + * + * Currently, this uses 256 for PCIe which is intentionally outside of the 8-bit range of TRTYPE. + * If the NVMe-oF specification ever defines a PCIe TRTYPE, this should be updated. + */ +enum spdk_nvme_transport_type { + /** + * PCIe Transport (locally attached devices) + */ + SPDK_NVME_TRANSPORT_PCIE = 256, + + /** + * RDMA Transport (RoCE, iWARP, etc.) + */ + SPDK_NVME_TRANSPORT_RDMA = SPDK_NVMF_TRTYPE_RDMA, + + /** + * Fibre Channel (FC) Transport + */ + SPDK_NVME_TRANSPORT_FC = SPDK_NVMF_TRTYPE_FC, + + /** + * TCP Transport + */ + SPDK_NVME_TRANSPORT_TCP = SPDK_NVMF_TRTYPE_TCP, + + /** + * Custom Transport (Not spec defined) + */ + SPDK_NVME_TRANSPORT_CUSTOM = 4096, +}; + +/* typedef added for coding style reasons */ +typedef enum spdk_nvme_transport_type spdk_nvme_transport_type_t; + +/** + * NVMe transport identifier. + * + * This identifies a unique endpoint on an NVMe fabric. + * + * A string representation of a transport ID may be converted to this type using + * spdk_nvme_transport_id_parse(). + */ +struct spdk_nvme_transport_id { + /** + * NVMe transport string. + */ + char trstring[SPDK_NVMF_TRSTRING_MAX_LEN + 1]; + + /** + * NVMe transport type. + */ + enum spdk_nvme_transport_type trtype; + + /** + * Address family of the transport address. + * + * For PCIe, this value is ignored. + */ + enum spdk_nvmf_adrfam adrfam; + + /** + * Transport address of the NVMe-oF endpoint. For transports which use IP + * addressing (e.g. RDMA), this should be an IP address. For PCIe, this + * can either be a zero length string (the whole bus) or a PCI address + * in the format DDDD:BB:DD.FF or DDDD.BB.DD.FF. For FC the string is + * formatted as: nn-0xWWNN:pn-0xWWPN” where WWNN is the Node_Name of the + * target NVMe_Port and WWPN is the N_Port_Name of the target NVMe_Port. + */ + char traddr[SPDK_NVMF_TRADDR_MAX_LEN + 1]; + + /** + * Transport service id of the NVMe-oF endpoint. For transports which use + * IP addressing (e.g. RDMA), this field shoud be the port number. For PCIe, + * and FC this is always a zero length string. + */ + char trsvcid[SPDK_NVMF_TRSVCID_MAX_LEN + 1]; + + /** + * Subsystem NQN of the NVMe over Fabrics endpoint. May be a zero length string. + */ + char subnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; + + /** + * The Transport connection priority of the NVMe-oF endpoint. Currently this is + * only supported by posix based sock implementation on Kernel TCP stack. More + * information of this field can be found from the socket(7) man page. + */ + int priority; +}; + +/** + * NVMe host identifier + * + * Used for defining the host identity for an NVMe-oF connection. + * + * In terms of configuration, this object can be considered a subtype of TransportID + * Please see etc/spdk/nvmf.conf.in for more details. + * + * A string representation of this type may be converted to this type using + * spdk_nvme_host_id_parse(). + */ +struct spdk_nvme_host_id { + /** + * Transport address to be used by the host when connecting to the NVMe-oF endpoint. + * May be an IP address or a zero length string for transports which + * use IP addressing (e.g. RDMA). + * For PCIe and FC this is always a zero length string. + */ + char hostaddr[SPDK_NVMF_TRADDR_MAX_LEN + 1]; + + /** + * Transport service ID used by the host when connecting to the NVMe. + * May be a port number or a zero length string for transports which + * use IP addressing (e.g. RDMA). + * For PCIe and FC this is always a zero length string. + */ + char hostsvcid[SPDK_NVMF_TRSVCID_MAX_LEN + 1]; +}; + +/* + * Controller support flags + * + * Used for identifying if the controller supports these flags. + */ +enum spdk_nvme_ctrlr_flags { + SPDK_NVME_CTRLR_SGL_SUPPORTED = 0x1, /**< SGL is supported */ + SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED = 0x2, /**< security send/receive is supported */ + SPDK_NVME_CTRLR_WRR_SUPPORTED = 0x4, /**< Weighted Round Robin is supported */ + SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED = 0x8, /**< Compare and write fused operations supported */ + SPDK_NVME_CTRLR_SGL_REQUIRES_DWORD_ALIGNMENT = 0x10, /**< Dword alignment is required for SGL */ +}; + +/** + * Parse the string representation of a transport ID. + * + * \param trid Output transport ID structure (must be allocated and initialized by caller). + * \param str Input string representation of a transport ID to parse. + * + * str must be a zero-terminated C string containing one or more key:value pairs + * separated by whitespace. + * + * Key | Value + * ------------ | ----- + * trtype | Transport type (e.g. PCIe, RDMA) + * adrfam | Address family (e.g. IPv4, IPv6) + * traddr | Transport address (e.g. 0000:04:00.0 for PCIe, 192.168.100.8 for RDMA, or WWN for FC) + * trsvcid | Transport service identifier (e.g. 4420) + * subnqn | Subsystem NQN + * + * Unspecified fields of trid are left unmodified, so the caller must initialize + * trid (for example, memset() to 0) before calling this function. + * + * \return 0 if parsing was successful and trid is filled out, or negated errno + * values on failure. + */ +int spdk_nvme_transport_id_parse(struct spdk_nvme_transport_id *trid, const char *str); + + +/** + * Fill in the trtype and trstring fields of this trid based on a known transport type. + * + * \param trid The trid to fill out. + * \param trtype The transport type to use for filling the trid fields. Only valid for + * transport types referenced in the NVMe-oF spec. + */ +void spdk_nvme_trid_populate_transport(struct spdk_nvme_transport_id *trid, + enum spdk_nvme_transport_type trtype); + +/** + * Parse the string representation of a host ID. + * + * \param hostid Output host ID structure (must be allocated and initialized by caller). + * \param str Input string representation of a transport ID to parse (hostid is a sub-configuration). + * + * str must be a zero-terminated C string containing one or more key:value pairs + * separated by whitespace. + * + * Key | Value + * -------------- | ----- + * hostaddr | Transport address (e.g. 192.168.100.8 for RDMA) + * hostsvcid | Transport service identifier (e.g. 4420) + * + * Unspecified fields of trid are left unmodified, so the caller must initialize + * hostid (for example, memset() to 0) before calling this function. + * + * This function should not be used with Fiber Channel or PCIe as these transports + * do not require host information for connections. + * + * \return 0 if parsing was successful and hostid is filled out, or negated errno + * values on failure. + */ +int spdk_nvme_host_id_parse(struct spdk_nvme_host_id *hostid, const char *str); + +/** + * Parse the string representation of a transport ID tranport type into the trid struct. + * + * \param trid The trid to write to + * \param trstring Input string representation of transport type (e.g. "PCIe", "RDMA"). + * + * \return 0 if parsing was successful and trtype is filled out, or negated errno + * values if the provided string was an invalid transport string. + */ +int spdk_nvme_transport_id_populate_trstring(struct spdk_nvme_transport_id *trid, + const char *trstring); + +/** + * Parse the string representation of a transport ID tranport type. + * + * \param trtype Output transport type (allocated by caller). + * \param str Input string representation of transport type (e.g. "PCIe", "RDMA"). + * + * \return 0 if parsing was successful and trtype is filled out, or negated errno + * values on failure. + */ +int spdk_nvme_transport_id_parse_trtype(enum spdk_nvme_transport_type *trtype, const char *str); + +/** + * Look up the string representation of a transport ID transport type. + * + * \param trtype Transport type to convert. + * + * \return static string constant describing trtype, or NULL if trtype not found. + */ +const char *spdk_nvme_transport_id_trtype_str(enum spdk_nvme_transport_type trtype); + +/** + * Look up the string representation of a transport ID address family. + * + * \param adrfam Address family to convert. + * + * \return static string constant describing adrfam, or NULL if adrmfam not found. + */ +const char *spdk_nvme_transport_id_adrfam_str(enum spdk_nvmf_adrfam adrfam); + +/** + * Parse the string representation of a tranport ID address family. + * + * \param adrfam Output address family (allocated by caller). + * \param str Input string representation of address family (e.g. "IPv4", "IPv6"). + * + * \return 0 if parsing was successful and adrfam is filled out, or negated errno + * values on failure. + */ +int spdk_nvme_transport_id_parse_adrfam(enum spdk_nvmf_adrfam *adrfam, const char *str); + +/** + * Compare two transport IDs. + * + * The result of this function may be used to sort transport IDs in a consistent + * order; however, the comparison result is not guaranteed to be consistent across + * library versions. + * + * This function uses a case-insensitive comparison for string fields, but it does + * not otherwise normalize the transport ID. It is the caller's responsibility to + * provide the transport IDs in a consistent format. + * + * \param trid1 First transport ID to compare. + * \param trid2 Second transport ID to compare. + * + * \return 0 if trid1 == trid2, less than 0 if trid1 < trid2, greater than 0 if + * trid1 > trid2. + */ +int spdk_nvme_transport_id_compare(const struct spdk_nvme_transport_id *trid1, + const struct spdk_nvme_transport_id *trid2); + +/** + * Parse the string representation of PI check settings (prchk:guard|reftag) + * + * \param prchk_flags Output PI check flags. + * \param str Input string representation of PI check settings. + * + * \return 0 if parsing was successful and prchk_flags is set, or negated errno + * values on failure. + */ +int spdk_nvme_prchk_flags_parse(uint32_t *prchk_flags, const char *str); + +/** + * Look up the string representation of PI check settings (prchk:guard|reftag) + * + * \param prchk_flags PI check flags to convert. + * + * \return static string constant describing PI check settings. If prchk_flags is 0, + * NULL is returned. + */ +const char *spdk_nvme_prchk_flags_str(uint32_t prchk_flags); + +/** + * Determine whether the NVMe library can handle a specific NVMe over Fabrics + * transport type. + * + * \param trtype NVMe over Fabrics transport type to check. + * + * \return true if trtype is supported or false if it is not supported or if + * SPDK_NVME_TRANSPORT_CUSTOM is supplied as trtype since it can represent multiple + * transports. + */ +bool spdk_nvme_transport_available(enum spdk_nvme_transport_type trtype); + +/** + * Determine whether the NVMe library can handle a specific NVMe over Fabrics + * transport type. + * + * \param transport_name Name of the NVMe over Fabrics transport type to check. + * + * \return true if transport_name is supported or false if it is not supported. + */ +bool spdk_nvme_transport_available_by_name(const char *transport_name); + +/** + * Callback for spdk_nvme_probe() enumeration. + * + * \param cb_ctx Opaque value passed to spdk_nvme_probe(). + * \param trid NVMe transport identifier. + * \param opts NVMe controller initialization options. This structure will be + * populated with the default values on entry, and the user callback may update + * any options to request a different value. The controller may not support all + * requested parameters, so the final values will be provided during the attach + * callback. + * + * \return true to attach to this device. + */ +typedef bool (*spdk_nvme_probe_cb)(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts); + +/** + * Callback for spdk_nvme_attach() to report a device that has been attached to + * the userspace NVMe driver. + * + * \param cb_ctx Opaque value passed to spdk_nvme_attach_cb(). + * \param trid NVMe transport identifier. + * \param ctrlr Opaque handle to NVMe controller. + * \param opts NVMe controller initialization options that were actually used. + * Options may differ from the requested options from the attach call depending + * on what the controller supports. + */ +typedef void (*spdk_nvme_attach_cb)(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, + const struct spdk_nvme_ctrlr_opts *opts); + +/** + * Callback for spdk_nvme_remove() to report that a device attached to the userspace + * NVMe driver has been removed from the system. + * + * The controller will remain in a failed state (any new I/O submitted will fail). + * + * The controller must be detached from the userspace driver by calling spdk_nvme_detach() + * once the controller is no longer in use. It is up to the library user to ensure + * that no other threads are using the controller before calling spdk_nvme_detach(). + * + * \param cb_ctx Opaque value passed to spdk_nvme_remove_cb(). + * \param ctrlr NVMe controller instance that was removed. + */ +typedef void (*spdk_nvme_remove_cb)(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr); + +/** + * Enumerate the bus indicated by the transport ID and attach the userspace NVMe + * driver to each device found if desired. + * + * This function is not thread safe and should only be called from one thread at + * a time while no other threads are actively using any NVMe devices. + * + * If called from a secondary process, only devices that have been attached to + * the userspace driver in the primary process will be probed. + * + * If called more than once, only devices that are not already attached to the + * SPDK NVMe driver will be reported. + * + * To stop using the the controller and release its associated resources, + * call spdk_nvme_detach() with the spdk_nvme_ctrlr instance from the attach_cb() + * function. + * + * \param trid The transport ID indicating which bus to enumerate. If the trtype + * is PCIe or trid is NULL, this will scan the local PCIe bus. If the trtype is + * RDMA, the traddr and trsvcid must point at the location of an NVMe-oF discovery + * service. + * \param cb_ctx Opaque value which will be passed back in cb_ctx parameter of + * the callbacks. + * \param probe_cb will be called once per NVMe device found in the system. + * \param attach_cb will be called for devices for which probe_cb returned true + * once that NVMe controller has been attached to the userspace driver. + * \param remove_cb will be called for devices that were attached in a previous + * spdk_nvme_probe() call but are no longer attached to the system. Optional; + * specify NULL if removal notices are not desired. + * + * \return 0 on success, -1 on failure. + */ +int spdk_nvme_probe(const struct spdk_nvme_transport_id *trid, + void *cb_ctx, + spdk_nvme_probe_cb probe_cb, + spdk_nvme_attach_cb attach_cb, + spdk_nvme_remove_cb remove_cb); + +/** + * Connect the NVMe driver to the device located at the given transport ID. + * + * This function is not thread safe and should only be called from one thread at + * a time while no other threads are actively using this NVMe device. + * + * If called from a secondary process, only the device that has been attached to + * the userspace driver in the primary process will be connected. + * + * If connecting to multiple controllers, it is suggested to use spdk_nvme_probe() + * and filter the requested controllers with the probe callback. For PCIe controllers, + * spdk_nvme_probe() will be more efficient since the controller resets will happen + * in parallel. + * + * To stop using the the controller and release its associated resources, call + * spdk_nvme_detach() with the spdk_nvme_ctrlr instance returned by this function. + * + * \param trid The transport ID indicating which device to connect. If the trtype + * is PCIe, this will connect the local PCIe bus. If the trtype is RDMA, the traddr + * and trsvcid must point at the location of an NVMe-oF service. + * \param opts NVMe controller initialization options. Default values will be used + * if the user does not specify the options. The controller may not support all + * requested parameters. + * \param opts_size Must be set to sizeof(struct spdk_nvme_ctrlr_opts), or 0 if + * opts is NULL. + * + * \return pointer to the connected NVMe controller or NULL if there is any failure. + * + */ +struct spdk_nvme_ctrlr *spdk_nvme_connect(const struct spdk_nvme_transport_id *trid, + const struct spdk_nvme_ctrlr_opts *opts, + size_t opts_size); + +struct spdk_nvme_probe_ctx; + +/** + * Connect the NVMe driver to the device located at the given transport ID. + * + * The function will return a probe context on success, controller associates with + * the context is not ready for use, user must call spdk_nvme_probe_poll_async() + * until spdk_nvme_probe_poll_async() returns 0. + * + * \param trid The transport ID indicating which device to connect. If the trtype + * is PCIe, this will connect the local PCIe bus. If the trtype is RDMA, the traddr + * and trsvcid must point at the location of an NVMe-oF service. + * \param opts NVMe controller initialization options. Default values will be used + * if the user does not specify the options. The controller may not support all + * requested parameters. + * \param attach_cb will be called once the NVMe controller has been attached + * to the userspace driver. + * + * \return probe context on success, NULL on failure. + * + */ +struct spdk_nvme_probe_ctx *spdk_nvme_connect_async(const struct spdk_nvme_transport_id *trid, + const struct spdk_nvme_ctrlr_opts *opts, + spdk_nvme_attach_cb attach_cb); + +/** + * Probe and add controllers to the probe context list. + * + * Users must call spdk_nvme_probe_poll_async() to initialize + * controllers in the probe context list to the READY state. + * + * \param trid The transport ID indicating which bus to enumerate. If the trtype + * is PCIe or trid is NULL, this will scan the local PCIe bus. If the trtype is + * RDMA, the traddr and trsvcid must point at the location of an NVMe-oF discovery + * service. + * \param cb_ctx Opaque value which will be passed back in cb_ctx parameter of + * the callbacks. + * \param probe_cb will be called once per NVMe device found in the system. + * \param attach_cb will be called for devices for which probe_cb returned true + * once that NVMe controller has been attached to the userspace driver. + * \param remove_cb will be called for devices that were attached in a previous + * spdk_nvme_probe() call but are no longer attached to the system. Optional; + * specify NULL if removal notices are not desired. + * + * \return probe context on success, NULL on failure. + */ +struct spdk_nvme_probe_ctx *spdk_nvme_probe_async(const struct spdk_nvme_transport_id *trid, + void *cb_ctx, + spdk_nvme_probe_cb probe_cb, + spdk_nvme_attach_cb attach_cb, + spdk_nvme_remove_cb remove_cb); + +/** + * Start controllers in the context list. + * + * Users may call the function util it returns True. + * + * \param probe_ctx Context used to track probe actions. + * + * \return 0 if all probe operations are complete; the probe_ctx + * is also freed and no longer valid. + * \return -EAGAIN if there are still pending probe operations; user must call + * spdk_nvme_probe_poll_async again to continue progress. + * \return value other than 0 and -EAGAIN probe error with one controller. + */ +int spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx); + +/** + * Detach specified device returned by spdk_nvme_probe()'s attach_cb from the + * NVMe driver. + * + * On success, the spdk_nvme_ctrlr handle is no longer valid. + * + * This function should be called from a single thread while no other threads + * are actively using the NVMe device. + * + * \param ctrlr Opaque handle to NVMe controller. + * + * \return 0 on success, -1 on failure. + */ +int spdk_nvme_detach(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Update the transport ID for a given controller. + * + * This function allows the user to set a new trid for a controller only if the + * controller is failed. The controller's failed state can be obtained from + * spdk_nvme_ctrlr_is_failed(). The controller can also be forced to the failed + * state using spdk_nvme_ctrlr_fail(). + * + * This function also requires that the transport type and subnqn of the new trid + * be the same as the old trid. + * + * \param ctrlr Opaque handle to an NVMe controller. + * \param trid The new transport ID. + * + * \return 0 on success, -EINVAL if the trid is invalid, + * -EPERM if the ctrlr is not failed. + */ +int spdk_nvme_ctrlr_set_trid(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_transport_id *trid); + +/** + * Perform a full hardware reset of the NVMe controller. + * + * This function should be called from a single thread while no other threads + * are actively using the NVMe device. + * + * Any pointers returned from spdk_nvme_ctrlr_get_ns() and spdk_nvme_ns_get_data() + * may be invalidated by calling this function. The number of namespaces as returned + * by spdk_nvme_ctrlr_get_num_ns() may also change. + * + * \param ctrlr Opaque handle to NVMe controller. + * + * \return 0 on success, -1 on failure. + */ +int spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Fail the given NVMe controller. + * + * This function gives the application the opportunity to fail a controller + * at will. When a controller is failed, any calls to process completions or + * submit I/O on qpairs associated with that controller will fail with an error + * code of -ENXIO. + * The controller can only be taken from the failed state by + * calling spdk_nvme_ctrlr_reset. After the controller has been successfully + * reset, any I/O pending when the controller was moved to failed will be + * aborted back to the application and can be resubmitted. I/O can then resume. + * + * \param ctrlr Opaque handle to an NVMe controller. + */ +void spdk_nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr); + +/** + * This function returns the failed status of a given controller. + * + * \param ctrlr Opaque handle to an NVMe controller. + * + * \return True if the controller is failed, false otherwise. + */ +bool spdk_nvme_ctrlr_is_failed(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Get the identify controller data as defined by the NVMe specification. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ctrlr Opaque handle to NVMe controller. + * + * \return pointer to the identify controller data. + */ +const struct spdk_nvme_ctrlr_data *spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Get the NVMe controller CSTS (Status) register. + * + * \param ctrlr Opaque handle to NVMe controller. + * + * \return the NVMe controller CSTS (Status) register. + */ +union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Get the NVMe controller CAP (Capabilities) register. + * + * \param ctrlr Opaque handle to NVMe controller. + * + * \return the NVMe controller CAP (Capabilities) register. + */ +union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Get the NVMe controller VS (Version) register. + * + * \param ctrlr Opaque handle to NVMe controller. + * + * \return the NVMe controller VS (Version) register. + */ +union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Get the NVMe controller CMBSZ (Controller Memory Buffer Size) register + * + * \param ctrlr Opaque handle to NVMe controller. + * + * \return the NVMe controller CMBSZ (Controller Memory Buffer Size) register. + */ +union spdk_nvme_cmbsz_register spdk_nvme_ctrlr_get_regs_cmbsz(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Get the number of namespaces for the given NVMe controller. + * + * This function is thread safe and can be called at any point while the + * controller is attached to the SPDK NVMe driver. + * + * This is equivalent to calling spdk_nvme_ctrlr_get_data() to get the + * spdk_nvme_ctrlr_data and then reading the nn field. + * + * \param ctrlr Opaque handle to NVMe controller. + * + * \return the number of namespaces. + */ +uint32_t spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Get the PCI device of a given NVMe controller. + * + * This only works for local (PCIe-attached) NVMe controllers; other transports + * will return NULL. + * + * \param ctrlr Opaque handle to NVMe controller. + * + * \return PCI device of the NVMe controller, or NULL if not available. + */ +struct spdk_pci_device *spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Get the maximum data transfer size of a given NVMe controller. + * + * \return Maximum data transfer size of the NVMe controller in bytes. + * + * The I/O command helper functions, such as spdk_nvme_ns_cmd_read(), will split + * large I/Os automatically; however, it is up to the user to obey this limit for + * commands submitted with the raw command functions, such as spdk_nvme_ctrlr_cmd_io_raw(). + */ +uint32_t spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr); + +/** + * Check whether the nsid is an active nv for the given NVMe controller. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ctrlr Opaque handle to NVMe controller. + * \param nsid Namespace id. + * + * \return true if nsid is an active ns, or false otherwise. + */ +bool spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid); + +/** + * Get the nsid of the first active namespace. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ctrlr Opaque handle to NVMe controller. + * + * \return the nsid of the first active namespace, 0 if there are no active namespaces. + */ +uint32_t spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Get next active namespace given the previous nsid. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ctrlr Opaque handle to NVMe controller. + * \param prev_nsid Namespace id. + * + * \return a next active namespace given the previous nsid, 0 when there are no + * more active namespaces. + */ +uint32_t spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid); + +/** + * Determine if a particular log page is supported by the given NVMe controller. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \sa spdk_nvme_ctrlr_cmd_get_log_page(). + * + * \param ctrlr Opaque handle to NVMe controller. + * \param log_page Log page to query. + * + * \return true if supported, or false otherwise. + */ +bool spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page); + +/** + * Determine if a particular feature is supported by the given NVMe controller. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \sa spdk_nvme_ctrlr_cmd_get_feature(). + * + * \param ctrlr Opaque handle to NVMe controller. + * \param feature_code Feature to query. + * + * \return true if supported, or false otherwise. + */ +bool spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code); + +/** + * Signature for callback function invoked when a command is completed. + * + * \param spdk_nvme_cpl Completion queue entry that coontains the completion status. + */ +typedef void (*spdk_nvme_cmd_cb)(void *, const struct spdk_nvme_cpl *); + +/** + * Signature for callback function invoked when an asynchronous error request + * command is completed. + * + * \param ctrlr Opaque handle to NVMe controller. + * \param aer_cb_arg Context specified by spdk_nvme_register_aer_callback(). + * \param spdk_nvme_cpl Completion queue entry that contains the completion status + * of the asynchronous event request that was completed. + */ +typedef void (*spdk_nvme_aer_cb)(void *aer_cb_arg, + const struct spdk_nvme_cpl *); + +/** + * Register callback function invoked when an AER command is completed for the + * given NVMe controller. + * + * \param ctrlr Opaque handle to NVMe controller. + * \param aer_cb_fn Callback function invoked when an asynchronous error request + * command is completed. + * \param aer_cb_arg Argument passed to callback function. + */ +void spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr, + spdk_nvme_aer_cb aer_cb_fn, + void *aer_cb_arg); + +/** + * Opaque handle to a queue pair. + * + * I/O queue pairs may be allocated using spdk_nvme_ctrlr_alloc_io_qpair(). + */ +struct spdk_nvme_qpair; + +/** + * Signature for the callback function invoked when a timeout is detected on a + * request. + * + * For timeouts detected on the admin queue pair, the qpair returned here will + * be NULL. If the controller has a serious error condition and is unable to + * communicate with driver via completion queue, the controller can set Controller + * Fatal Status field to 1, then reset is required to recover from such error. + * Users may detect Controller Fatal Status when timeout happens. + * + * \param cb_arg Argument passed to callback funciton. + * \param ctrlr Opaque handle to NVMe controller. + * \param qpair Opaque handle to a queue pair. + * \param cid Command ID. + */ +typedef void (*spdk_nvme_timeout_cb)(void *cb_arg, + struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_qpair *qpair, + uint16_t cid); + +/** + * Register for timeout callback on a controller. + * + * The application can choose to register for timeout callback or not register + * for timeout callback. + * + * \param ctrlr NVMe controller on which to monitor for timeout. + * \param timeout_us Timeout value in microseconds. + * \param cb_fn A function pointer that points to the callback function. + * \param cb_arg Argument to the callback function. + */ +void spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr, + uint64_t timeout_us, spdk_nvme_timeout_cb cb_fn, void *cb_arg); + +/** + * NVMe I/O queue pair initialization options. + * + * These options may be passed to spdk_nvme_ctrlr_alloc_io_qpair() to configure queue pair + * options at queue creation time. + * + * The user may retrieve the default I/O queue pair creation options for a controller using + * spdk_nvme_ctrlr_get_default_io_qpair_opts(). + */ +struct spdk_nvme_io_qpair_opts { + /** + * Queue priority for weighted round robin arbitration. If a different arbitration + * method is in use, pass 0. + */ + enum spdk_nvme_qprio qprio; + + /** + * The queue depth of this NVMe I/O queue. Overrides spdk_nvme_ctrlr_opts::io_queue_size. + */ + uint32_t io_queue_size; + + /** + * The number of requests to allocate for this NVMe I/O queue. + * + * Overrides spdk_nvme_ctrlr_opts::io_queue_requests. + * + * This should be at least as large as io_queue_size. + * + * A single I/O may allocate more than one request, since splitting may be + * necessary to conform to the device's maximum transfer size, PRP list + * compatibility requirements, or driver-assisted striping. + */ + uint32_t io_queue_requests; + + /** + * When submitting I/O via spdk_nvme_ns_read/write and similar functions, + * don't immediately submit it to hardware. Instead, queue up new commands + * and submit them to the hardware inside spdk_nvme_qpair_process_completions(). + * + * This results in better batching of I/O commands. Often, it is more efficient + * to submit batches of commands to the underlying hardware than each command + * individually. + * + * This only applies to PCIe and RDMA transports. + * + * The flag was originally named delay_pcie_doorbell. To allow backward compatibility + * both names are kept in unnamed union. + */ + union { + bool delay_cmd_submit; + bool delay_pcie_doorbell; + }; + + /** + * These fields allow specifying the memory buffers for the submission and/or + * completion queues. + * By default, vaddr is set to NULL meaning SPDK will allocate the memory to be used. + * If vaddr is NULL then paddr must be set to 0. + * If vaddr is non-NULL, and paddr is zero, SPDK derives the physical + * address for the NVMe device, in this case the memory must be registered. + * If a paddr value is non-zero, SPDK uses the vaddr and paddr as passed + * SPDK assumes that the memory passed is both virtually and physically + * contiguous. + * If these fields are used, SPDK will NOT impose any restriction + * on the number of elements in the queues. + * The buffer sizes are in number of bytes, and are used to confirm + * that the buffers are large enough to contain the appropriate queue. + * These fields are only used by PCIe attached NVMe devices. They + * are presently ignored for other transports. + */ + struct { + struct spdk_nvme_cmd *vaddr; + uint64_t paddr; + uint64_t buffer_size; + } sq; + struct { + struct spdk_nvme_cpl *vaddr; + uint64_t paddr; + uint64_t buffer_size; + } cq; + + /** + * This flag indicates to the alloc_io_qpair function that it should not perform + * the connect portion on this qpair. This allows the user to add the qpair to a + * poll group and then connect it later. + */ + bool create_only; +}; + +/** + * Get the default options for I/O qpair creation for a specific NVMe controller. + * + * \param ctrlr NVMe controller to retrieve the defaults from. + * \param[out] opts Will be filled with the default options for + * spdk_nvme_ctrlr_alloc_io_qpair(). + * \param opts_size Must be set to sizeof(struct spdk_nvme_io_qpair_opts). + */ +void spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_io_qpair_opts *opts, + size_t opts_size); + +/** + * Allocate an I/O queue pair (submission and completion queue). + * + * This function by default also performs any connection activities required for + * a newly created qpair. To avoid that behavior, the user should set the create_only + * flag in the opts structure to true. + * + * Each queue pair should only be used from a single thread at a time (mutual + * exclusion must be enforced by the user). + * + * \param ctrlr NVMe controller for which to allocate the I/O queue pair. + * \param opts I/O qpair creation options, or NULL to use the defaults as returned + * by spdk_nvme_ctrlr_get_default_io_qpair_opts(). + * \param opts_size Must be set to sizeof(struct spdk_nvme_io_qpair_opts), or 0 + * if opts is NULL. + * + * \return a pointer to the allocated I/O queue pair. + */ +struct spdk_nvme_qpair *spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr, + const struct spdk_nvme_io_qpair_opts *opts, + size_t opts_size); + +/** + * Connect a newly created I/O qpair. + * + * This function does any connection activities required for a newly created qpair. + * It should be called after spdk_nvme_ctrlr_alloc_io_qpair has been called with the + * create_only flag set to true in the spdk_nvme_io_qpair_opts structure. + * + * This call will fail if performed on a qpair that is already connected. + * For reconnecting qpairs, see spdk_nvme_ctrlr_reconnect_io_qpair. + * + * For fabrics like TCP and RDMA, this function actually sends the commands over the wire + * that connect the qpair. For PCIe, this function performs some internal state machine operations. + * + * \param ctrlr NVMe controller for which to allocate the I/O queue pair. + * \param qpair Opaque handle to the qpair to connect. + * + * return 0 on success or negated errno on failure. Specifically -EISCONN if the qpair is already connected. + * + */ +int spdk_nvme_ctrlr_connect_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); + +/** + * Disconnect the given I/O qpair. + * + * This function must be called from the same thread as spdk_nvme_qpair_process_completions + * and the spdk_nvme_ns_cmd_* functions. + * + * After disconnect, calling spdk_nvme_qpair_process_completions or one of the + * spdk_nvme_ns_cmd* on a qpair will result in a return value of -ENXIO. A + * disconnected qpair may be reconnected with either the spdk_nvme_ctrlr_connect_io_qpair + * or spdk_nvme_ctrlr_reconnect_io_qpair APIs. + * + * \param qpair The qpair to disconnect. + */ +void spdk_nvme_ctrlr_disconnect_io_qpair(struct spdk_nvme_qpair *qpair); + +/** + * Attempt to reconnect the given qpair. + * + * This function is intended to be called on qpairs that have already been connected, + * but have since entered a failed state as indicated by a return value of -ENXIO from + * either spdk_nvme_qpair_process_completions or one of the spdk_nvme_ns_cmd_* functions. + * This function must be called from the same thread as spdk_nvme_qpair_process_completions + * and the spdk_nvme_ns_cmd_* functions. + * + * Calling this function has the same effect as calling spdk_nvme_ctrlr_disconnect_io_qpair + * followed by spdk_nvme_ctrlr_connect_io_qpair. + * + * This function may be called on newly created qpairs, but it does extra checks and attempts + * to disconnect the qpair before connecting it. The recommended API for newly created qpairs + * is spdk_nvme_ctrlr_connect_io_qpair. + * + * \param qpair The qpair to reconnect. + * + * \return 0 on success, or if the qpair was already connected. + * -EAGAIN if the driver was unable to reconnect during this call, + * but the controller is still connected and is either resetting or enabled. + * -ENODEV if the controller is removed. In this case, the controller cannot be recovered + * and the application will have to destroy it and the associated qpairs. + * -ENXIO if the controller is in a failed state but is not yet resetting. In this case, + * the application should call spdk_nvme_ctrlr_reset to reset the entire controller. + */ +int spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair); + +/** + * Returns the reason the admin qpair for a given controller is disconnected. + * + * \param ctrlr The controller to check. + * + * \return a valid spdk_nvme_qp_failure_reason. + */ +spdk_nvme_qp_failure_reason spdk_nvme_ctrlr_get_admin_qp_failure_reason( + struct spdk_nvme_ctrlr *ctrlr); + +/** + * Free an I/O queue pair that was allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * + * \param qpair I/O queue pair to free. + * + * \return 0 on success, -1 on failure. + */ +int spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair); + +/** + * Send the given NVM I/O command, I/O buffers, lists and all to the NVMe controller. + * + * This is a low level interface for submitting I/O commands directly. + * + * This function allows a caller to submit an I/O request that is + * COMPLETELY pre-defined, right down to the "physical" memory buffers. + * It is intended for testing hardware, specifying exact buffer location, + * alignment, and offset. It also allows for specific choice of PRP + * and SGLs. + * + * The driver sets the CID. EVERYTHING else is assumed set by the caller. + * Needless to say, this is potentially extremely dangerous for both the host + * (accidental/malicionus storage usage/corruption), and the device. + * Thus its intent is for very specific hardware testing and environment + * reproduction. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * This function can only be used on PCIe controllers and qpairs. + * + * \param ctrlr Opaque handle to NVMe controller. + * \param qpair I/O qpair to submit command. + * \param cmd NVM I/O command to submit. + * \param cb_fn Callback function invoked when the I/O command completes. + * \param cb_arg Argument passed to callback function. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + */ + +int spdk_nvme_ctrlr_io_cmd_raw_no_payload_build(struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_qpair *qpair, + struct spdk_nvme_cmd *cmd, + spdk_nvme_cmd_cb cb_fn, void *cb_arg); + +/** + * Send the given NVM I/O command to the NVMe controller. + * + * This is a low level interface for submitting I/O commands directly. Prefer + * the spdk_nvme_ns_cmd_* functions instead. The validity of the command will + * not be checked! + * + * When constructing the nvme_command it is not necessary to fill out the PRP + * list/SGL or the CID. The driver will handle both of those for you. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ctrlr Opaque handle to NVMe controller. + * \param qpair I/O qpair to submit command. + * \param cmd NVM I/O command to submit. + * \param buf Virtual memory address of a single physically contiguous buffer. + * \param len Size of buffer. + * \param cb_fn Callback function invoked when the I/O command completes. + * \param cb_arg Argument passed to callback function. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ctrlr_cmd_io_raw(struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_qpair *qpair, + struct spdk_nvme_cmd *cmd, + void *buf, uint32_t len, + spdk_nvme_cmd_cb cb_fn, void *cb_arg); + +/** + * Send the given NVM I/O command with metadata to the NVMe controller. + * + * This is a low level interface for submitting I/O commands directly. Prefer + * the spdk_nvme_ns_cmd_* functions instead. The validity of the command will + * not be checked! + * + * When constructing the nvme_command it is not necessary to fill out the PRP + * list/SGL or the CID. The driver will handle both of those for you. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ctrlr Opaque handle to NVMe controller. + * \param qpair I/O qpair to submit command. + * \param cmd NVM I/O command to submit. + * \param buf Virtual memory address of a single physically contiguous buffer. + * \param len Size of buffer. + * \param md_buf Virtual memory address of a single physically contiguous metadata + * buffer. + * \param cb_fn Callback function invoked when the I/O command completes. + * \param cb_arg Argument passed to callback function. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ctrlr_cmd_io_raw_with_md(struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_qpair *qpair, + struct spdk_nvme_cmd *cmd, + void *buf, uint32_t len, void *md_buf, + spdk_nvme_cmd_cb cb_fn, void *cb_arg); + +/** + * Process any outstanding completions for I/O submitted on a queue pair. + * + * This call is non-blocking, i.e. it only processes completions that are ready + * at the time of this function call. It does not wait for outstanding commands + * to finish. + * + * For each completed command, the request's callback function will be called if + * specified as non-NULL when the request was submitted. + * + * The caller must ensure that each queue pair is only used from one thread at a + * time. + * + * This function may be called at any point while the controller is attached to + * the SPDK NVMe driver. + * + * \sa spdk_nvme_cmd_cb + * + * \param qpair Queue pair to check for completions. + * \param max_completions Limit the number of completions to be processed in one + * call, or 0 for unlimited. + * + * \return number of completions processed (may be 0) or negated on error. -ENXIO + * in the special case that the qpair is failed at the transport layer. + */ +int32_t spdk_nvme_qpair_process_completions(struct spdk_nvme_qpair *qpair, + uint32_t max_completions); + +/** + * Returns the reason the qpair is disconnected. + * + * \param qpair The qpair to check. + * + * \return a valid spdk_nvme_qp_failure_reason. + */ +spdk_nvme_qp_failure_reason spdk_nvme_qpair_get_failure_reason(struct spdk_nvme_qpair *qpair); + +/** + * Send the given admin command to the NVMe controller. + * + * This is a low level interface for submitting admin commands directly. Prefer + * the spdk_nvme_ctrlr_cmd_* functions instead. The validity of the command will + * not be checked! + * + * When constructing the nvme_command it is not necessary to fill out the PRP + * list/SGL or the CID. The driver will handle both of those for you. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion + * of commands submitted through this function. + * + * \param ctrlr Opaque handle to NVMe controller. + * \param cmd NVM admin command to submit. + * \param buf Virtual memory address of a single physically contiguous buffer. + * \param len Size of buffer. + * \param cb_fn Callback function invoked when the admin command completes. + * \param cb_arg Argument passed to callback function. + * + * \return 0 if successfully submitted, negated errno if resources could not be + * allocated for this request, -ENXIO if the admin qpair is failed at the transport layer. + */ +int spdk_nvme_ctrlr_cmd_admin_raw(struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_cmd *cmd, + void *buf, uint32_t len, + spdk_nvme_cmd_cb cb_fn, void *cb_arg); + +/** + * Process any outstanding completions for admin commands. + * + * This will process completions for admin commands submitted on any thread. + * + * This call is non-blocking, i.e. it only processes completions that are ready + * at the time of this function call. It does not wait for outstanding commands + * to finish. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ctrlr Opaque handle to NVMe controller. + * + * \return number of completions processed (may be 0) or negated on error. -ENXIO + * in the special case that the qpair is failed at the transport layer. + */ +int32_t spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr); + + +/** + * Opaque handle to a namespace. Obtained by calling spdk_nvme_ctrlr_get_ns(). + */ +struct spdk_nvme_ns; + +/** + * Get a handle to a namespace for the given controller. + * + * Namespaces are numbered from 1 to the total number of namespaces. There will + * never be any gaps in the numbering. The number of namespaces is obtained by + * calling spdk_nvme_ctrlr_get_num_ns(). + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ctrlr Opaque handle to NVMe controller. + * \param ns_id Namespace id. + * + * \return a pointer to the namespace. + */ +struct spdk_nvme_ns *spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t ns_id); + +/** + * Get a specific log page from the NVMe controller. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion of + * commands submitted through this function. + * + * \sa spdk_nvme_ctrlr_is_log_page_supported() + * + * \param ctrlr Opaque handle to NVMe controller. + * \param log_page The log page identifier. + * \param nsid Depending on the log page, this may be 0, a namespace identifier, + * or SPDK_NVME_GLOBAL_NS_TAG. + * \param payload The pointer to the payload buffer. + * \param payload_size The size of payload buffer. + * \param offset Offset in bytes within the log page to start retrieving log page + * data. May only be non-zero if the controller supports extended data for Get Log + * Page as reported in the controller data log page attributes. + * \param cb_fn Callback function to invoke when the log page has been retrieved. + * \param cb_arg Argument to pass to the callback function. + * + * \return 0 if successfully submitted, negated errno if resources could not be + * allocated for this request, -ENXIO if the admin qpair is failed at the transport layer. + */ +int spdk_nvme_ctrlr_cmd_get_log_page(struct spdk_nvme_ctrlr *ctrlr, + uint8_t log_page, uint32_t nsid, + void *payload, uint32_t payload_size, + uint64_t offset, + spdk_nvme_cmd_cb cb_fn, void *cb_arg); + +/** + * Get a specific log page from the NVMe controller. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * This function allows specifying extra fields in cdw10 and cdw11 such as + * Retain Asynchronous Event and Log Specific Field. + * + * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion of + * commands submitted through this function. + * + * \sa spdk_nvme_ctrlr_is_log_page_supported() + * + * \param ctrlr Opaque handle to NVMe controller. + * \param log_page The log page identifier. + * \param nsid Depending on the log page, this may be 0, a namespace identifier, + * or SPDK_NVME_GLOBAL_NS_TAG. + * \param payload The pointer to the payload buffer. + * \param payload_size The size of payload buffer. + * \param offset Offset in bytes within the log page to start retrieving log page + * data. May only be non-zero if the controller supports extended data for Get Log + * Page as reported in the controller data log page attributes. + * \param cdw10 Value to specify for cdw10. Specify 0 for numdl - it will be + * set by this function based on the payload_size parameter. Specify 0 for lid - + * it will be set by this function based on the log_page parameter. + * \param cdw11 Value to specify for cdw11. Specify 0 for numdu - it will be + * set by this function based on the payload_size. + * \param cdw14 Value to specify for cdw14. + * \param cb_fn Callback function to invoke when the log page has been retrieved. + * \param cb_arg Argument to pass to the callback function. + * + * \return 0 if successfully submitted, negated errno if resources could not be + * allocated for this request, -ENXIO if the admin qpair is failed at the transport layer. + */ +int spdk_nvme_ctrlr_cmd_get_log_page_ext(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page, + uint32_t nsid, void *payload, uint32_t payload_size, + uint64_t offset, uint32_t cdw10, uint32_t cdw11, + uint32_t cdw14, spdk_nvme_cmd_cb cb_fn, void *cb_arg); + +/** + * Abort a specific previously-submitted NVMe command. + * + * \sa spdk_nvme_ctrlr_register_timeout_callback() + * + * \param ctrlr NVMe controller to which the command was submitted. + * \param qpair NVMe queue pair to which the command was submitted. For admin + * commands, pass NULL for the qpair. + * \param cid Command ID of the command to abort. + * \param cb_fn Callback function to invoke when the abort has completed. + * \param cb_arg Argument to pass to the callback function. + * + * \return 0 if successfully submitted, negated errno if resources could not be + * allocated for this request, -ENXIO if the admin qpair is failed at the transport layer. + */ +int spdk_nvme_ctrlr_cmd_abort(struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_qpair *qpair, + uint16_t cid, + spdk_nvme_cmd_cb cb_fn, + void *cb_arg); + +/** + * Abort previously submitted commands which have cmd_cb_arg as its callback argument. + * + * \param ctrlr NVMe controller to which the commands were submitted. + * \param qpair NVMe queue pair to which the commands were submitted. For admin + * commands, pass NULL for the qpair. + * \param cmd_cb_arg Callback argument for the NVMe commands which this function + * attempts to abort. + * \param cb_fn Callback function to invoke when this function has completed. + * \param cb_arg Argument to pass to the callback function. + * + * \return 0 if successfully submitted, negated errno otherwise. + */ +int spdk_nvme_ctrlr_cmd_abort_ext(struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_qpair *qpair, + void *cmd_cb_arg, + spdk_nvme_cmd_cb cb_fn, + void *cb_arg); + +/** + * Set specific feature for the given NVMe controller. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion of + * commands submitted through this function. + * + * \sa spdk_nvme_ctrlr_cmd_get_feature(). + * + * \param ctrlr NVMe controller to manipulate. + * \param feature The feature identifier. + * \param cdw11 as defined by the specification for this command. + * \param cdw12 as defined by the specification for this command. + * \param payload The pointer to the payload buffer. + * \param payload_size The size of payload buffer. + * \param cb_fn Callback function to invoke when the feature has been set. + * \param cb_arg Argument to pass to the callback function. + * + * \return 0 if successfully submitted, negated errno if resources could not be + * allocated for this request, -ENXIO if the admin qpair is failed at the transport layer. + */ +int spdk_nvme_ctrlr_cmd_set_feature(struct spdk_nvme_ctrlr *ctrlr, + uint8_t feature, uint32_t cdw11, uint32_t cdw12, + void *payload, uint32_t payload_size, + spdk_nvme_cmd_cb cb_fn, void *cb_arg); + +/** + * Get specific feature from given NVMe controller. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion of + * commands submitted through this function. + * + * \sa spdk_nvme_ctrlr_cmd_set_feature() + * + * \param ctrlr NVMe controller to query. + * \param feature The feature identifier. + * \param cdw11 as defined by the specification for this command. + * \param payload The pointer to the payload buffer. + * \param payload_size The size of payload buffer. + * \param cb_fn Callback function to invoke when the feature has been retrieved. + * \param cb_arg Argument to pass to the callback function. + * + * \return 0 if successfully submitted, -ENOMEM if resources could not be allocated + * for this request, -ENXIO if the admin qpair is failed at the transport layer. + */ +int spdk_nvme_ctrlr_cmd_get_feature(struct spdk_nvme_ctrlr *ctrlr, + uint8_t feature, uint32_t cdw11, + void *payload, uint32_t payload_size, + spdk_nvme_cmd_cb cb_fn, void *cb_arg); + +/** + * Get specific feature from given NVMe controller. + * + * \param ctrlr NVMe controller to query. + * \param feature The feature identifier. + * \param cdw11 as defined by the specification for this command. + * \param payload The pointer to the payload buffer. + * \param payload_size The size of payload buffer. + * \param cb_fn Callback function to invoke when the feature has been retrieved. + * \param cb_arg Argument to pass to the callback function. + * \param ns_id The namespace identifier. + * + * \return 0 if successfully submitted, -ENOMEM if resources could not be allocated + * for this request, -ENXIO if the admin qpair is failed at the transport layer. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * Call \ref spdk_nvme_ctrlr_process_admin_completions() to poll for completion + * of commands submitted through this function. + * + * \sa spdk_nvme_ctrlr_cmd_set_feature_ns() + */ +int spdk_nvme_ctrlr_cmd_get_feature_ns(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature, + uint32_t cdw11, void *payload, uint32_t payload_size, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t ns_id); + +/** + * Set specific feature for the given NVMe controller and namespace ID. + * + * \param ctrlr NVMe controller to manipulate. + * \param feature The feature identifier. + * \param cdw11 as defined by the specification for this command. + * \param cdw12 as defined by the specification for this command. + * \param payload The pointer to the payload buffer. + * \param payload_size The size of payload buffer. + * \param cb_fn Callback function to invoke when the feature has been set. + * \param cb_arg Argument to pass to the callback function. + * \param ns_id The namespace identifier. + * + * \return 0 if successfully submitted, -ENOMEM if resources could not be allocated + * for this request, -ENXIO if the admin qpair is failed at the transport layer. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * Call \ref spdk_nvme_ctrlr_process_admin_completions() to poll for completion + * of commands submitted through this function. + * + * \sa spdk_nvme_ctrlr_cmd_get_feature_ns() + */ +int spdk_nvme_ctrlr_cmd_set_feature_ns(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature, + uint32_t cdw11, uint32_t cdw12, void *payload, + uint32_t payload_size, spdk_nvme_cmd_cb cb_fn, + void *cb_arg, uint32_t ns_id); + +/** + * Receive security protocol data from controller. + * + * This function is thread safe and can be called at any point after spdk_nvme_probe(). + * + * \param ctrlr NVMe controller to use for security receive command submission. + * \param secp Security Protocol that is used. + * \param spsp Security Protocol Specific field. + * \param nssf NVMe Security Specific field. Indicate RPMB target when using Security + * Protocol EAh. + * \param payload The pointer to the payload buffer. + * \param payload_size The size of payload buffer. + * \param cb_fn Callback function to invoke when the command has been completed. + * \param cb_arg Argument to pass to the callback function. + * + * \return 0 if successfully submitted, negated errno if resources could not be allocated + * for this request. + */ +int spdk_nvme_ctrlr_cmd_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, + uint16_t spsp, uint8_t nssf, void *payload, + uint32_t payload_size, + spdk_nvme_cmd_cb cb_fn, void *cb_arg); + +/** + * Send security protocol data to controller. + * + * This function is thread safe and can be called at any point after spdk_nvme_probe(). + * + * \param ctrlr NVMe controller to use for security send command submission. + * \param secp Security Protocol that is used. + * \param spsp Security Protocol Specific field. + * \param nssf NVMe Security Specific field. Indicate RPMB target when using Security + * Protocol EAh. + * \param payload The pointer to the payload buffer. + * \param payload_size The size of payload buffer. + * \param cb_fn Callback function to invoke when the command has been completed. + * \param cb_arg Argument to pass to the callback function. + * + * \return 0 if successfully submitted, negated errno if resources could not be allocated + * for this request. + */ +int spdk_nvme_ctrlr_cmd_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, + uint16_t spsp, uint8_t nssf, void *payload, + uint32_t payload_size, spdk_nvme_cmd_cb cb_fn, void *cb_arg); + +/** + * Receive security protocol data from controller. + * + * This function is thread safe and can be called at any point after spdk_nvme_probe(). + * + * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion of + * commands submitted through this function. + * + * \param ctrlr NVMe controller to use for security receive command submission. + * \param secp Security Protocol that is used. + * \param spsp Security Protocol Specific field. + * \param nssf NVMe Security Specific field. Indicate RPMB target when using Security + * Protocol EAh. + * \param payload The pointer to the payload buffer. + * \param size The size of payload buffer. + * + * \return 0 if successfully submitted, negated errno if resources could not be allocated + * for this request. + */ +int spdk_nvme_ctrlr_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, + uint16_t spsp, uint8_t nssf, void *payload, size_t size); + +/** + * Send security protocol data to controller. + * + * This function is thread safe and can be called at any point after spdk_nvme_probe(). + * + * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion of + * commands submitted through this function. + * + * \param ctrlr NVMe controller to use for security send command submission. + * \param secp Security Protocol that is used. + * \param spsp Security Protocol Specific field. + * \param nssf NVMe Security Specific field. Indicate RPMB target when using Security + * Protocol EAh. + * \param payload The pointer to the payload buffer. + * \param size The size of payload buffer. + * + * \return 0 if successfully submitted, negated errno if resources could not be allocated + * for this request. + */ +int spdk_nvme_ctrlr_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, + uint16_t spsp, uint8_t nssf, void *payload, size_t size); + +/** + * Get supported flags of the controller. + * + * \param ctrlr NVMe controller to get flags. + * + * \return supported flags of this controller. + */ +uint64_t spdk_nvme_ctrlr_get_flags(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Attach the specified namespace to controllers. + * + * This function is thread safe and can be called at any point after spdk_nvme_probe(). + * + * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion of + * commands submitted through this function. + * + * \param ctrlr NVMe controller to use for command submission. + * \param nsid Namespace identifier for namespace to attach. + * \param payload The pointer to the controller list. + * + * \return 0 if successfully submitted, ENOMEM if resources could not be allocated + * for this request. + */ +int spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, + struct spdk_nvme_ctrlr_list *payload); + +/** + * Detach the specified namespace from controllers. + * + * This function is thread safe and can be called at any point after spdk_nvme_probe(). + * + * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion of + * commands submitted through this function. + * + * \param ctrlr NVMe controller to use for command submission. + * \param nsid Namespace ID to detach. + * \param payload The pointer to the controller list. + * + * \return 0 if successfully submitted, ENOMEM if resources could not be allocated + * for this request + */ +int spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, + struct spdk_nvme_ctrlr_list *payload); + +/** + * Create a namespace. + * + * This function is thread safe and can be called at any point after spdk_nvme_probe(). + * + * \param ctrlr NVMe controller to create namespace on. + * \param payload The pointer to the NVMe namespace data. + * + * \return Namespace ID (>= 1) if successfully created, or 0 if the request failed. + */ +uint32_t spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_ns_data *payload); + +/** + * Delete a namespace. + * + * This function is thread safe and can be called at any point after spdk_nvme_probe(). + * + * Call spdk_nvme_ctrlr_process_admin_completions() to poll for completion of + * commands submitted through this function. + * + * \param ctrlr NVMe controller to delete namespace from. + * \param nsid The namespace identifier. + * + * \return 0 if successfully submitted, negated errno if resources could not be + * allocated + * for this request + */ +int spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid); + +/** + * Format NVM. + * + * This function requests a low-level format of the media. + * + * This function is thread safe and can be called at any point after spdk_nvme_probe(). + * + * \param ctrlr NVMe controller to format. + * \param nsid The namespace identifier. May be SPDK_NVME_GLOBAL_NS_TAG to format + * all namespaces. + * \param format The format information for the command. + * + * \return 0 if successfully submitted, negated errno if resources could not be + * allocated for this request + */ +int spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, + struct spdk_nvme_format *format); + +/** + * Download a new firmware image. + * + * This function is thread safe and can be called at any point after spdk_nvme_probe(). + * + * \param ctrlr NVMe controller to perform firmware operation on. + * \param payload The data buffer for the firmware image. + * \param size The data size will be downloaded. + * \param slot The slot that the firmware image will be committed to. + * \param commit_action The action to perform when firmware is committed. + * \param completion_status output parameter. Contains the completion status of + * the firmware commit operation. + * + * \return 0 if successfully submitted, ENOMEM if resources could not be allocated + * for this request, -1 if the size is not multiple of 4. + */ +int spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size, + int slot, enum spdk_nvme_fw_commit_action commit_action, + struct spdk_nvme_status *completion_status); + +/** + * Return virtual address of PCIe NVM I/O registers + * + * This function returns a pointer to the PCIe I/O registers for a controller + * or NULL if unsupported for this transport. + * + * \param ctrlr Controller whose registers are to be accessed. + * + * \return Pointer to virtual address of register bank, or NULL. + */ +volatile struct spdk_nvme_registers *spdk_nvme_ctrlr_get_registers(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Reserve the controller memory buffer for data transfer use. + * + * This function reserves the full size of the controller memory buffer + * for use in data transfers. If submission queues or completion queues are + * already placed in the controller memory buffer, this call will fail. + * + * \param ctrlr Controller from which to allocate memory buffer + * + * \return The size of the controller memory buffer on success. Negated errno + * on failure. + */ +int spdk_nvme_ctrlr_reserve_cmb(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Map a previously reserved controller memory buffer so that it's data is + * visible from the CPU. This operation is not always possible. + * + * \param ctrlr Controller that contains the memory buffer + * \param size Size of buffer that was mapped. + * + * \return Pointer to controller memory buffer, or NULL on failure. + */ +void *spdk_nvme_ctrlr_map_cmb(struct spdk_nvme_ctrlr *ctrlr, size_t *size); + +/** + * Free a controller memory I/O buffer. + * + * \param ctrlr Controller from which to unmap the memory buffer. + */ +void spdk_nvme_ctrlr_unmap_cmb(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Get the transport ID for a given NVMe controller. + * + * \param ctrlr Controller to get the transport ID. + * \return Pointer to the controller's transport ID. + */ +const struct spdk_nvme_transport_id *spdk_nvme_ctrlr_get_transport_id( + struct spdk_nvme_ctrlr *ctrlr); + +/** + * Opaque handle for a poll group. A poll group is a collection of spdk_nvme_qpair + * objects that are polled for completions as a unit. + * + * Returned by spdk_nvme_poll_group_create(). + */ +struct spdk_nvme_poll_group; + + +/** + * This function alerts the user to disconnected qpairs when calling + * spdk_nvme_poll_group_process_completions. + */ +typedef void (*spdk_nvme_disconnected_qpair_cb)(struct spdk_nvme_qpair *qpair, + void *poll_group_ctx); + +/** + * Create a new poll group. + * + * \param ctx A user supplied context that can be retrieved later with spdk_nvme_poll_group_get_ctx + * + * \return Pointer to the new poll group, or NULL on error. + */ +struct spdk_nvme_poll_group *spdk_nvme_poll_group_create(void *ctx); + +/** + * Add an spdk_nvme_qpair to a poll group. qpairs may only be added to + * a poll group if they are in the disconnected state; i.e. either they were + * just allocated and not yet connected or they have been disconnected with a call + * to spdk_nvme_ctrlr_disconnect_io_qpair. + * + * \param group The group to which the qpair will be added. + * \param qpair The qpair to add to the poll group. + * + * return 0 on success, -EINVAL if the qpair is not in the disabled state, -ENODEV if the transport + * doesn't exist, -ENOMEM on memory allocation failures, or -EPROTO on a protocol (transport) specific failure. + */ +int spdk_nvme_poll_group_add(struct spdk_nvme_poll_group *group, struct spdk_nvme_qpair *qpair); + +/** + * Remove an spdk_nvme_qpair from a poll group. + * + * \param group The group from which to remove the qpair. + * \param qpair The qpair to remove from the poll group. + * + * return 0 on success, -ENOENT if the qpair is not found in the group, or -EPROTO on a protocol (transport) specific failure. + */ +int spdk_nvme_poll_group_remove(struct spdk_nvme_poll_group *group, struct spdk_nvme_qpair *qpair); + +/** + * Destroy an empty poll group. + * + * \param group The group to destroy. + * + * return 0 on success, -EBUSY if the poll group is not empty. + */ +int spdk_nvme_poll_group_destroy(struct spdk_nvme_poll_group *group); + +/** + * Poll for completions on all qpairs in this poll group. + * + * the disconnected_qpair_cb will be called for all disconnected qpairs in the poll group + * including qpairs which fail within the context of this call. + * The user is responsible for trying to reconnect or destroy those qpairs. + * + * \param group The group on which to poll for completions. + * \param completions_per_qpair The maximum number of completions per qpair. + * \param disconnected_qpair_cb A callback function of type spdk_nvme_disconnected_qpair_cb. Must be non-NULL. + * + * return The number of completions across all qpairs, -EINVAL if no disconnected_qpair_cb is passed, or + * -EIO if the shared completion queue cannot be polled for the RDMA transport. + */ +int64_t spdk_nvme_poll_group_process_completions(struct spdk_nvme_poll_group *group, + uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb); + +/** + * Retrieve the user context for this specific poll group. + * + * \param group The poll group from which to retrieve the context. + * + * \return A pointer to the user provided poll group context. + */ +void *spdk_nvme_poll_group_get_ctx(struct spdk_nvme_poll_group *group); + +/** + * Get the identify namespace data as defined by the NVMe specification. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ns Namespace. + * + * \return a pointer to the namespace data. + */ +const struct spdk_nvme_ns_data *spdk_nvme_ns_get_data(struct spdk_nvme_ns *ns); + +/** + * Get the namespace id (index number) from the given namespace handle. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ns Namespace. + * + * \return namespace id. + */ +uint32_t spdk_nvme_ns_get_id(struct spdk_nvme_ns *ns); + +/** + * Get the controller with which this namespace is associated. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ns Namespace. + * + * \return a pointer to the controller. + */ +struct spdk_nvme_ctrlr *spdk_nvme_ns_get_ctrlr(struct spdk_nvme_ns *ns); + +/** + * Determine whether a namespace is active. + * + * Inactive namespaces cannot be the target of I/O commands. + * + * \param ns Namespace to query. + * + * \return true if active, or false if inactive. + */ +bool spdk_nvme_ns_is_active(struct spdk_nvme_ns *ns); + +/** + * Get the maximum transfer size, in bytes, for an I/O sent to the given namespace. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ns Namespace to query. + * + * \return the maximum transfer size in bytes. + */ +uint32_t spdk_nvme_ns_get_max_io_xfer_size(struct spdk_nvme_ns *ns); + +/** + * Get the sector size, in bytes, of the given namespace. + * + * This function returns the size of the data sector only. It does not + * include metadata size. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ns Namespace to query. + * + * /return the sector size in bytes. + */ +uint32_t spdk_nvme_ns_get_sector_size(struct spdk_nvme_ns *ns); + +/** + * Get the extended sector size, in bytes, of the given namespace. + * + * This function returns the size of the data sector plus metadata. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ns Namespace to query. + * + * /return the extended sector size in bytes. + */ +uint32_t spdk_nvme_ns_get_extended_sector_size(struct spdk_nvme_ns *ns); + +/** + * Get the number of sectors for the given namespace. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ns Namespace to query. + * + * \return the number of sectors. + */ +uint64_t spdk_nvme_ns_get_num_sectors(struct spdk_nvme_ns *ns); + +/** + * Get the size, in bytes, of the given namespace. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ns Namespace to query. + * + * \return the size of the given namespace in bytes. + */ +uint64_t spdk_nvme_ns_get_size(struct spdk_nvme_ns *ns); + +/** + * Get the end-to-end data protection information type of the given namespace. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ns Namespace to query. + * + * \return the end-to-end data protection information type. + */ +enum spdk_nvme_pi_type spdk_nvme_ns_get_pi_type(struct spdk_nvme_ns *ns); + +/** + * Get the metadata size, in bytes, of the given namespace. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ns Namespace to query. + * + * \return the metadata size of the given namespace in bytes. + */ +uint32_t spdk_nvme_ns_get_md_size(struct spdk_nvme_ns *ns); + +/** + * Check whether if the namespace can support extended LBA when end-to-end data + * protection enabled. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ns Namespace to query. + * + * \return true if the namespace can support extended LBA when end-to-end data + * protection enabled, or false otherwise. + */ +bool spdk_nvme_ns_supports_extended_lba(struct spdk_nvme_ns *ns); + +/** + * Check whether if the namespace supports compare operation + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ns Namespace to query. + * + * \return true if the namespace supports compare operation, or false otherwise. + */ +bool spdk_nvme_ns_supports_compare(struct spdk_nvme_ns *ns); + +/** + * Determine the value returned when reading deallocated blocks. + * + * If deallocated blocks return 0, the deallocate command can be used as a more + * efficient alternative to the write_zeroes command, especially for large requests. + * + * \param ns Namespace. + * + * \return the logical block read value. + */ +enum spdk_nvme_dealloc_logical_block_read_value spdk_nvme_ns_get_dealloc_logical_block_read_value( + struct spdk_nvme_ns *ns); + +/** + * Get the optimal I/O boundary, in blocks, for the given namespace. + * + * Read and write commands should not cross the optimal I/O boundary for best + * performance. + * + * \param ns Namespace to query. + * + * \return Optimal granularity of I/O commands, in blocks, or 0 if no optimal + * granularity is reported. + */ +uint32_t spdk_nvme_ns_get_optimal_io_boundary(struct spdk_nvme_ns *ns); + +/** + * Get the UUID for the given namespace. + * + * \param ns Namespace to query. + * + * \return a pointer to namespace UUID, or NULL if ns does not have a UUID. + */ +const struct spdk_uuid *spdk_nvme_ns_get_uuid(const struct spdk_nvme_ns *ns); + +/** + * \brief Namespace command support flags. + */ +enum spdk_nvme_ns_flags { + SPDK_NVME_NS_DEALLOCATE_SUPPORTED = 0x1, /**< The deallocate command is supported */ + SPDK_NVME_NS_FLUSH_SUPPORTED = 0x2, /**< The flush command is supported */ + SPDK_NVME_NS_RESERVATION_SUPPORTED = 0x4, /**< The reservation command is supported */ + SPDK_NVME_NS_WRITE_ZEROES_SUPPORTED = 0x8, /**< The write zeroes command is supported */ + SPDK_NVME_NS_DPS_PI_SUPPORTED = 0x10, /**< The end-to-end data protection is supported */ + SPDK_NVME_NS_EXTENDED_LBA_SUPPORTED = 0x20, /**< The extended lba format is supported, + metadata is transferred as a contiguous + part of the logical block that it is associated with */ + SPDK_NVME_NS_WRITE_UNCORRECTABLE_SUPPORTED = 0x40, /**< The write uncorrectable command is supported */ + SPDK_NVME_NS_COMPARE_SUPPORTED = 0x80, /**< The compare command is supported */ +}; + +/** + * Get the flags for the given namespace. + * + * See spdk_nvme_ns_flags for the possible flags returned. + * + * This function is thread safe and can be called at any point while the controller + * is attached to the SPDK NVMe driver. + * + * \param ns Namespace to query. + * + * \return the flags for the given namespace. + */ +uint32_t spdk_nvme_ns_get_flags(struct spdk_nvme_ns *ns); + +/** + * Restart the SGL walk to the specified offset when the command has scattered payloads. + * + * \param cb_arg Argument passed to readv/writev. + * \param offset Offset for SGL. + */ +typedef void (*spdk_nvme_req_reset_sgl_cb)(void *cb_arg, uint32_t offset); + +/** + * Fill out *address and *length with the current SGL entry and advance to the next + * entry for the next time the callback is invoked. + * + * The described segment must be physically contiguous. + * + * \param cb_arg Argument passed to readv/writev. + * \param address Virtual address of this segment, a value of UINT64_MAX + * means the segment should be described via Bit Bucket SGL. + * \param length Length of this physical segment. + */ +typedef int (*spdk_nvme_req_next_sge_cb)(void *cb_arg, void **address, uint32_t *length); + +/** + * Submit a write I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the write I/O. + * \param qpair I/O queue pair to submit the request. + * \param payload Virtual address pointer to the data payload. + * \param lba Starting LBA to write the data. + * \param lba_count Length (in sectors) for the write operation. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * \param io_flags Set flags, defined by the SPDK_NVME_IO_FLAGS_* entries in + * spdk/nvme_spec.h, for this I/O. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_write(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *payload, + uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, + void *cb_arg, uint32_t io_flags); + +/** + * Submit a write I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the write I/O. + * \param qpair I/O queue pair to submit the request. + * \param lba Starting LBA to write the data. + * \param lba_count Length (in sectors) for the write operation. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * \param io_flags Set flags, defined in nvme_spec.h, for this I/O. + * \param reset_sgl_fn Callback function to reset scattered payload. + * \param next_sge_fn Callback function to iterate each scattered payload memory + * segment. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_writev(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, + spdk_nvme_req_reset_sgl_cb reset_sgl_fn, + spdk_nvme_req_next_sge_cb next_sge_fn); + +/** + * Submit a write I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the write I/O + * \param qpair I/O queue pair to submit the request + * \param lba starting LBA to write the data + * \param lba_count length (in sectors) for the write operation + * \param cb_fn callback function to invoke when the I/O is completed + * \param cb_arg argument to pass to the callback function + * \param io_flags set flags, defined in nvme_spec.h, for this I/O + * \param reset_sgl_fn callback function to reset scattered payload + * \param next_sge_fn callback function to iterate each scattered + * payload memory segment + * \param metadata virtual address pointer to the metadata payload, the length + * of metadata is specified by spdk_nvme_ns_get_md_size() + * \param apptag_mask application tag mask. + * \param apptag application tag to use end-to-end protection information. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_writev_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, + spdk_nvme_req_reset_sgl_cb reset_sgl_fn, + spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata, + uint16_t apptag_mask, uint16_t apptag); + +/** + * Submit a write I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the write I/O. + * \param qpair I/O queue pair to submit the request. + * \param payload Virtual address pointer to the data payload. + * \param metadata Virtual address pointer to the metadata payload, the length + * of metadata is specified by spdk_nvme_ns_get_md_size(). + * \param lba Starting LBA to write the data. + * \param lba_count Length (in sectors) for the write operation. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * \param io_flags Set flags, defined by the SPDK_NVME_IO_FLAGS_* entries in + * spdk/nvme_spec.h, for this I/O. + * \param apptag_mask Application tag mask. + * \param apptag Application tag to use end-to-end protection information. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_write_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + void *payload, void *metadata, + uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, + void *cb_arg, uint32_t io_flags, + uint16_t apptag_mask, uint16_t apptag); + +/** + * Submit a write zeroes I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the write zeroes I/O. + * \param qpair I/O queue pair to submit the request. + * \param lba Starting LBA for this command. + * \param lba_count Length (in sectors) for the write zero operation. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * \param io_flags Set flags, defined by the SPDK_NVME_IO_FLAGS_* entries in + * spdk/nvme_spec.h, for this I/O. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_write_zeroes(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, + uint32_t io_flags); + +/** + * Submit a write uncorrectable I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the write uncorrectable I/O. + * \param qpair I/O queue pair to submit the request. + * \param lba Starting LBA for this command. + * \param lba_count Length (in sectors) for the write uncorrectable operation. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_write_uncorrectable(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg); + +/** + * \brief Submits a read I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the read I/O. + * \param qpair I/O queue pair to submit the request. + * \param payload Virtual address pointer to the data payload. + * \param lba Starting LBA to read the data. + * \param lba_count Length (in sectors) for the read operation. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * \param io_flags Set flags, defined in nvme_spec.h, for this I/O. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_read(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *payload, + uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, + void *cb_arg, uint32_t io_flags); + +/** + * Submit a read I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the read I/O. + * \param qpair I/O queue pair to submit the request. + * \param lba Starting LBA to read the data. + * \param lba_count Length (in sectors) for the read operation. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * \param io_flags Set flags, defined in nvme_spec.h, for this I/O. + * \param reset_sgl_fn Callback function to reset scattered payload. + * \param next_sge_fn Callback function to iterate each scattered payload memory + * segment. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_readv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, + spdk_nvme_req_reset_sgl_cb reset_sgl_fn, + spdk_nvme_req_next_sge_cb next_sge_fn); + +/** + * Submit a read I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any given time. + * + * \param ns NVMe namespace to submit the read I/O + * \param qpair I/O queue pair to submit the request + * \param lba starting LBA to read the data + * \param lba_count length (in sectors) for the read operation + * \param cb_fn callback function to invoke when the I/O is completed + * \param cb_arg argument to pass to the callback function + * \param io_flags set flags, defined in nvme_spec.h, for this I/O + * \param reset_sgl_fn callback function to reset scattered payload + * \param next_sge_fn callback function to iterate each scattered + * payload memory segment + * \param metadata virtual address pointer to the metadata payload, the length + * of metadata is specified by spdk_nvme_ns_get_md_size() + * \param apptag_mask application tag mask. + * \param apptag application tag to use end-to-end protection information. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_readv_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, + spdk_nvme_req_reset_sgl_cb reset_sgl_fn, + spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata, + uint16_t apptag_mask, uint16_t apptag); + +/** + * Submits a read I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the read I/O + * \param qpair I/O queue pair to submit the request + * \param payload virtual address pointer to the data payload + * \param metadata virtual address pointer to the metadata payload, the length + * of metadata is specified by spdk_nvme_ns_get_md_size(). + * \param lba starting LBA to read the data. + * \param lba_count Length (in sectors) for the read operation. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * \param io_flags Set flags, defined in nvme_spec.h, for this I/O. + * \param apptag_mask Application tag mask. + * \param apptag Application tag to use end-to-end protection information. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_read_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + void *payload, void *metadata, + uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, + void *cb_arg, uint32_t io_flags, + uint16_t apptag_mask, uint16_t apptag); + +/** + * Submit a data set management request to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * This is a convenience wrapper that will automatically allocate and construct + * the correct data buffers. Therefore, ranges does not need to be allocated from + * pinned memory and can be placed on the stack. If a higher performance, zero-copy + * version of DSM is required, simply build and submit a raw command using + * spdk_nvme_ctrlr_cmd_io_raw(). + * + * \param ns NVMe namespace to submit the DSM request + * \param type A bit field constructed from \ref spdk_nvme_dsm_attribute. + * \param qpair I/O queue pair to submit the request + * \param ranges An array of \ref spdk_nvme_dsm_range elements describing the LBAs + * to operate on. + * \param num_ranges The number of elements in the ranges array. + * \param cb_fn Callback function to invoke when the I/O is completed + * \param cb_arg Argument to pass to the callback function + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + */ +int spdk_nvme_ns_cmd_dataset_management(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint32_t type, + const struct spdk_nvme_dsm_range *ranges, + uint16_t num_ranges, + spdk_nvme_cmd_cb cb_fn, + void *cb_arg); + +/** + * Submit a flush request to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the flush request. + * \param qpair I/O queue pair to submit the request. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + */ +int spdk_nvme_ns_cmd_flush(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + spdk_nvme_cmd_cb cb_fn, void *cb_arg); + +/** + * Submit a reservation register to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the reservation register request. + * \param qpair I/O queue pair to submit the request. + * \param payload Virtual address pointer to the reservation register data. + * \param ignore_key '1' the current reservation key check is disabled. + * \param action Specifies the registration action. + * \param cptpl Change the Persist Through Power Loss state. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_reservation_register(struct spdk_nvme_ns *ns, + struct spdk_nvme_qpair *qpair, + struct spdk_nvme_reservation_register_data *payload, + bool ignore_key, + enum spdk_nvme_reservation_register_action action, + enum spdk_nvme_reservation_register_cptpl cptpl, + spdk_nvme_cmd_cb cb_fn, void *cb_arg); + +/** + * Submits a reservation release to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the reservation release request. + * \param qpair I/O queue pair to submit the request. + * \param payload Virtual address pointer to current reservation key. + * \param ignore_key '1' the current reservation key check is disabled. + * \param action Specifies the reservation release action. + * \param type Reservation type for the namespace. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_reservation_release(struct spdk_nvme_ns *ns, + struct spdk_nvme_qpair *qpair, + struct spdk_nvme_reservation_key_data *payload, + bool ignore_key, + enum spdk_nvme_reservation_release_action action, + enum spdk_nvme_reservation_type type, + spdk_nvme_cmd_cb cb_fn, void *cb_arg); + +/** + * Submits a reservation acquire to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the reservation acquire request. + * \param qpair I/O queue pair to submit the request. + * \param payload Virtual address pointer to reservation acquire data. + * \param ignore_key '1' the current reservation key check is disabled. + * \param action Specifies the reservation acquire action. + * \param type Reservation type for the namespace. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_reservation_acquire(struct spdk_nvme_ns *ns, + struct spdk_nvme_qpair *qpair, + struct spdk_nvme_reservation_acquire_data *payload, + bool ignore_key, + enum spdk_nvme_reservation_acquire_action action, + enum spdk_nvme_reservation_type type, + spdk_nvme_cmd_cb cb_fn, void *cb_arg); + +/** + * Submit a reservation report to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the reservation report request. + * \param qpair I/O queue pair to submit the request. + * \param payload Virtual address pointer for reservation status data. + * \param len Length bytes for reservation status data structure. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_reservation_report(struct spdk_nvme_ns *ns, + struct spdk_nvme_qpair *qpair, + void *payload, uint32_t len, + spdk_nvme_cmd_cb cb_fn, void *cb_arg); + +/** + * Submit a compare I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the compare I/O. + * \param qpair I/O queue pair to submit the request. + * \param payload Virtual address pointer to the data payload. + * \param lba Starting LBA to compare the data. + * \param lba_count Length (in sectors) for the compare operation. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * \param io_flags Set flags, defined in nvme_spec.h, for this I/O. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_compare(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *payload, + uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, + void *cb_arg, uint32_t io_flags); + +/** + * Submit a compare I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the compare I/O. + * \param qpair I/O queue pair to submit the request. + * \param lba Starting LBA to compare the data. + * \param lba_count Length (in sectors) for the compare operation. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * \param io_flags Set flags, defined in nvme_spec.h, for this I/O. + * \param reset_sgl_fn Callback function to reset scattered payload. + * \param next_sge_fn Callback function to iterate each scattered payload memory + * segment. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_comparev(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, + spdk_nvme_req_reset_sgl_cb reset_sgl_fn, + spdk_nvme_req_next_sge_cb next_sge_fn); + +/** + * Submit a compare I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the compare I/O. + * \param qpair I/O queue pair to submit the request. + * \param lba Starting LBA to compare the data. + * \param lba_count Length (in sectors) for the compare operation. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * \param io_flags Set flags, defined in nvme_spec.h, for this I/O. + * \param reset_sgl_fn Callback function to reset scattered payload. + * \param next_sge_fn Callback function to iterate each scattered payload memory + * segment. + * \param metadata Virtual address pointer to the metadata payload, the length + * of metadata is specified by spdk_nvme_ns_get_md_size() + * \param apptag_mask Application tag mask. + * \param apptag Application tag to use end-to-end protection information. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int +spdk_nvme_ns_cmd_comparev_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, + spdk_nvme_req_reset_sgl_cb reset_sgl_fn, + spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata, + uint16_t apptag_mask, uint16_t apptag); + +/** + * Submit a compare I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the compare I/O. + * \param qpair I/O queue pair to submit the request. + * \param payload Virtual address pointer to the data payload. + * \param metadata Virtual address pointer to the metadata payload, the length + * of metadata is specified by spdk_nvme_ns_get_md_size(). + * \param lba Starting LBA to compare the data. + * \param lba_count Length (in sectors) for the compare operation. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * \param io_flags Set flags, defined in nvme_spec.h, for this I/O. + * \param apptag_mask Application tag mask. + * \param apptag Application tag to use end-to-end protection information. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_compare_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + void *payload, void *metadata, + uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, + void *cb_arg, uint32_t io_flags, + uint16_t apptag_mask, uint16_t apptag); + +/** + * \brief Inject an error for the next request with a given opcode. + * + * \param ctrlr NVMe controller. + * \param qpair I/O queue pair to add the error command, + * NULL for Admin queue pair. + * \param opc Opcode for Admin or I/O commands. + * \param do_not_submit True if matching requests should not be submitted + * to the controller, but instead completed manually + * after timeout_in_us has expired. False if matching + * requests should be submitted to the controller and + * have their completion status modified after the + * controller completes the request. + * \param timeout_in_us Wait specified microseconds when do_not_submit is true. + * \param err_count Number of matching requests to inject errors. + * \param sct Status code type. + * \param sc Status code. + * + * \return 0 if successfully enabled, ENOMEM if an error command + * structure cannot be allocated. + * + * The function can be called multiple times to inject errors for different + * commands. If the opcode matches an existing entry, the existing entry + * will be updated with the values specified. + */ +int spdk_nvme_qpair_add_cmd_error_injection(struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_qpair *qpair, + uint8_t opc, + bool do_not_submit, + uint64_t timeout_in_us, + uint32_t err_count, + uint8_t sct, uint8_t sc); + +/** + * \brief Clear the specified NVMe command with error status. + * + * \param ctrlr NVMe controller. + * \param qpair I/O queue pair to remove the error command, + * \ NULL for Admin queue pair. + * \param opc Opcode for Admin or I/O commands. + * + * The function will remove specified command in the error list. + */ +void spdk_nvme_qpair_remove_cmd_error_injection(struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_qpair *qpair, + uint8_t opc); + +/** + * \brief Given NVMe status, return ASCII string for that error. + * + * \param status Status from NVMe completion queue element. + * \return Returns status as an ASCII string. + */ +const char *spdk_nvme_cpl_get_status_string(const struct spdk_nvme_status *status); + +/** + * \brief Prints (SPDK_NOTICELOG) the contents of an NVMe submission queue entry (command). + * + * \param qpair Pointer to the NVMe queue pair - used to determine admin versus I/O queue. + * \param cmd Pointer to the submission queue command to be formatted. + */ +void spdk_nvme_qpair_print_command(struct spdk_nvme_qpair *qpair, + struct spdk_nvme_cmd *cmd); + +/** + * \brief Prints (SPDK_NOTICELOG) the contents of an NVMe completion queue entry. + * + * \param qpair Pointer to the NVMe queue pair - presently unused. + * \param cpl Pointer to the completion queue element to be formatted. + */ +void spdk_nvme_qpair_print_completion(struct spdk_nvme_qpair *qpair, + struct spdk_nvme_cpl *cpl); + +/** + * \brief Prints (SPDK_NOTICELOG) the contents of an NVMe submission queue entry (command). + * + * \param qid Queue identifier. + * \param cmd Pointer to the submission queue command to be formatted. + */ +void spdk_nvme_print_command(uint16_t qid, struct spdk_nvme_cmd *cmd); + +/** + * \brief Prints (SPDK_NOTICELOG) the contents of an NVMe completion queue entry. + * + * \param qid Queue identifier. + * \param cpl Pointer to the completion queue element to be formatted. + */ +void spdk_nvme_print_completion(uint16_t qid, struct spdk_nvme_cpl *cpl); + +struct ibv_context; +struct ibv_pd; +struct ibv_mr; + +/** + * RDMA Transport Hooks + */ +struct spdk_nvme_rdma_hooks { + /** + * \brief Get an InfiniBand Verbs protection domain. + * + * \param trid the transport id + * \param verbs Infiniband verbs context + * + * \return pd of the nvme ctrlr + */ + struct ibv_pd *(*get_ibv_pd)(const struct spdk_nvme_transport_id *trid, + struct ibv_context *verbs); + + /** + * \brief Get an InfiniBand Verbs memory region for a buffer. + * + * \param pd The protection domain returned from get_ibv_pd + * \param buf Memory buffer for which an rkey should be returned. + * \param size size of buf + * + * \return Infiniband remote key (rkey) for this buf + */ + uint64_t (*get_rkey)(struct ibv_pd *pd, void *buf, size_t size); + + /** + * \brief Put back keys got from get_rkey. + * + * \param key The Infiniband remote key (rkey) got from get_rkey + * + */ + void (*put_rkey)(uint64_t key); +}; + +/** + * \brief Set the global hooks for the RDMA transport, if necessary. + * + * This call is optional and must be performed prior to probing for + * any devices. By default, the RDMA transport will use the ibverbs + * library to create protection domains and register memory. This + * is a mechanism to subvert that and use an existing registration. + * + * This function may only be called one time per process. + * + * \param hooks for initializing global hooks + */ +void spdk_nvme_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks); + +/** + * Get name of cuse device associated with NVMe controller. + * + * \param ctrlr Opaque handle to NVMe controller. + * \param name Buffer of be filled with cuse device name. + * \param size Size of name buffer. + * + * \return 0 on success. Negated errno on the following error conditions: + * -ENODEV: No cuse device registered for the controller. + * -ENSPC: Too small buffer size passed. Value of size pointer changed to the required length. + */ +int spdk_nvme_cuse_get_ctrlr_name(struct spdk_nvme_ctrlr *ctrlr, char *name, size_t *size); + +/** + * Get name of cuse device associated with NVMe namespace. + * + * \param ctrlr Opaque handle to NVMe controller. + * \param nsid Namespace id. + * \param name Buffer of be filled with cuse device name. + * \param size Size of name buffer. + * + * \return 0 on success. Negated errno on the following error conditions: + * -ENODEV: No cuse device registered for the namespace. + * -ENSPC: Too small buffer size passed. Value of size pointer changed to the required length. + */ +int spdk_nvme_cuse_get_ns_name(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, + char *name, size_t *size); + +/** + * Create a character device at the path specified (Experimental) + * + * The character device can handle ioctls and is compatible with a standard + * Linux kernel NVMe device. Tools such as nvme-cli can be used to configure + * SPDK devices through this interface. + * + * The user is expected to be polling the admin qpair for this controller periodically + * for the CUSE device to function. + * + * \param ctrlr Opaque handle to the NVMe controller. + * + * \return 0 on success. Negated errno on failure. + */ +int spdk_nvme_cuse_register(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Remove a previously created character device (Experimental) + * + * \param ctrlr Opaque handle to the NVMe controller. + * + * \return 0 on success. Negated errno on failure. + */ +int spdk_nvme_cuse_unregister(struct spdk_nvme_ctrlr *ctrlr); + +int spdk_nvme_map_prps(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs, + uint32_t len, size_t mps, + void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len)); + +/** + * Opaque handle for a transport poll group. Used by the transport function table. + */ +struct spdk_nvme_transport_poll_group; + +/** + * Update and populate namespace CUSE devices (Experimental) + * + * \param ctrlr Opaque handle to the NVMe controller. + * + */ +void spdk_nvme_cuse_update_namespaces(struct spdk_nvme_ctrlr *ctrlr); + +struct nvme_request; + +struct spdk_nvme_transport; + +struct spdk_nvme_transport_ops { + char name[SPDK_NVMF_TRSTRING_MAX_LEN + 1]; + + enum spdk_nvme_transport_type type; + + struct spdk_nvme_ctrlr *(*ctrlr_construct)(const struct spdk_nvme_transport_id *trid, + const struct spdk_nvme_ctrlr_opts *opts, + void *devhandle); + + int (*ctrlr_scan)(struct spdk_nvme_probe_ctx *probe_ctx, bool direct_connect); + + int (*ctrlr_destruct)(struct spdk_nvme_ctrlr *ctrlr); + + int (*ctrlr_enable)(struct spdk_nvme_ctrlr *ctrlr); + + int (*ctrlr_set_reg_4)(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value); + + int (*ctrlr_set_reg_8)(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value); + + int (*ctrlr_get_reg_4)(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value); + + int (*ctrlr_get_reg_8)(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value); + + uint32_t (*ctrlr_get_max_xfer_size)(struct spdk_nvme_ctrlr *ctrlr); + + uint16_t (*ctrlr_get_max_sges)(struct spdk_nvme_ctrlr *ctrlr); + + int (*ctrlr_reserve_cmb)(struct spdk_nvme_ctrlr *ctrlr); + + void *(*ctrlr_map_cmb)(struct spdk_nvme_ctrlr *ctrlr, size_t *size); + + int (*ctrlr_unmap_cmb)(struct spdk_nvme_ctrlr *ctrlr); + + struct spdk_nvme_qpair *(*ctrlr_create_io_qpair)(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, + const struct spdk_nvme_io_qpair_opts *opts); + + int (*ctrlr_delete_io_qpair)(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); + + int (*ctrlr_connect_qpair)(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); + + void (*ctrlr_disconnect_qpair)(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); + + void (*qpair_abort_reqs)(struct spdk_nvme_qpair *qpair, uint32_t dnr); + + int (*qpair_reset)(struct spdk_nvme_qpair *qpair); + + int (*qpair_submit_request)(struct spdk_nvme_qpair *qpair, struct nvme_request *req); + + int32_t (*qpair_process_completions)(struct spdk_nvme_qpair *qpair, uint32_t max_completions); + + int (*qpair_iterate_requests)(struct spdk_nvme_qpair *qpair, + int (*iter_fn)(struct nvme_request *req, void *arg), + void *arg); + + void (*admin_qpair_abort_aers)(struct spdk_nvme_qpair *qpair); + + struct spdk_nvme_transport_poll_group *(*poll_group_create)(void); + + int (*poll_group_add)(struct spdk_nvme_transport_poll_group *tgroup, struct spdk_nvme_qpair *qpair); + + int (*poll_group_remove)(struct spdk_nvme_transport_poll_group *tgroup, + struct spdk_nvme_qpair *qpair); + + int (*poll_group_connect_qpair)(struct spdk_nvme_qpair *qpair); + + int (*poll_group_disconnect_qpair)(struct spdk_nvme_qpair *qpair); + + int64_t (*poll_group_process_completions)(struct spdk_nvme_transport_poll_group *tgroup, + uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb); + + int (*poll_group_destroy)(struct spdk_nvme_transport_poll_group *tgroup); +}; + +/** + * Register the operations for a given transport type. + * + * This function should be invoked by referencing the macro + * SPDK_NVME_TRANSPORT_REGISTER macro in the transport's .c file. + * + * \param ops The operations associated with an NVMe-oF transport. + */ +void spdk_nvme_transport_register(const struct spdk_nvme_transport_ops *ops); + +/* + * Macro used to register new transports. + */ +#define SPDK_NVME_TRANSPORT_REGISTER(name, transport_ops) \ +static void __attribute__((constructor)) _spdk_nvme_transport_register_##name(void) \ +{ \ + spdk_nvme_transport_register(transport_ops); \ +}\ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/nvme_intel.h b/src/spdk/include/spdk/nvme_intel.h new file mode 100644 index 000000000..2814e2a7c --- /dev/null +++ b/src/spdk/include/spdk/nvme_intel.h @@ -0,0 +1,218 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * Intel NVMe vendor-specific definitions + * + * Reference: + * http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/ssd-dc-p3700-spec.pdf + */ + +#ifndef SPDK_NVME_INTEL_H +#define SPDK_NVME_INTEL_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "spdk/assert.h" + +enum spdk_nvme_intel_feat { + SPDK_NVME_INTEL_FEAT_MAX_LBA = 0xC1, + SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA = 0xC2, + SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING = 0xC6, + SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS = 0xC8, + SPDK_NVME_INTEL_FEAT_LED_PATTERN = 0xC9, + SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS = 0xD5, + SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING = 0xE2, +}; + +enum spdk_nvme_intel_set_max_lba_command_status_code { + SPDK_NVME_INTEL_EXCEEDS_AVAILABLE_CAPACITY = 0xC0, + SPDK_NVME_INTEL_SMALLER_THAN_MIN_LIMIT = 0xC1, + SPDK_NVME_INTEL_SMALLER_THAN_NS_REQUIREMENTS = 0xC2, +}; + +enum spdk_nvme_intel_log_page { + SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY = 0xC0, + SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY = 0xC1, + SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY = 0xC2, + SPDK_NVME_INTEL_LOG_TEMPERATURE = 0xC5, + SPDK_NVME_INTEL_LOG_SMART = 0xCA, + SPDK_NVME_INTEL_MARKETING_DESCRIPTION = 0xDD, +}; + +enum spdk_nvme_intel_smart_attribute_code { + SPDK_NVME_INTEL_SMART_PROGRAM_FAIL_COUNT = 0xAB, + SPDK_NVME_INTEL_SMART_ERASE_FAIL_COUNT = 0xAC, + SPDK_NVME_INTEL_SMART_WEAR_LEVELING_COUNT = 0xAD, + SPDK_NVME_INTEL_SMART_E2E_ERROR_COUNT = 0xB8, + SPDK_NVME_INTEL_SMART_CRC_ERROR_COUNT = 0xC7, + SPDK_NVME_INTEL_SMART_MEDIA_WEAR = 0xE2, + SPDK_NVME_INTEL_SMART_HOST_READ_PERCENTAGE = 0xE3, + SPDK_NVME_INTEL_SMART_TIMER = 0xE4, + SPDK_NVME_INTEL_SMART_THERMAL_THROTTLE_STATUS = 0xEA, + SPDK_NVME_INTEL_SMART_RETRY_BUFFER_OVERFLOW_COUNTER = 0xF0, + SPDK_NVME_INTEL_SMART_PLL_LOCK_LOSS_COUNT = 0xF3, + SPDK_NVME_INTEL_SMART_NAND_BYTES_WRITTEN = 0xF4, + SPDK_NVME_INTEL_SMART_HOST_BYTES_WRITTEN = 0xF5, +}; + +struct spdk_nvme_intel_log_page_directory { + uint8_t version[2]; + uint8_t reserved[384]; + uint8_t read_latency_log_len; + uint8_t reserved2; + uint8_t write_latency_log_len; + uint8_t reserved3[5]; + uint8_t temperature_statistics_log_len; + uint8_t reserved4[9]; + uint8_t smart_log_len; + uint8_t reserved5[37]; + uint8_t marketing_description_log_len; + uint8_t reserved6[69]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_intel_log_page_directory) == 512, "Incorrect size"); + +struct spdk_nvme_intel_rw_latency_page { + uint16_t major_revison; + uint16_t minor_revison; + uint32_t buckets_32us[32]; + uint32_t buckets_1ms[31]; + uint32_t buckets_32ms[31]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_intel_rw_latency_page) == 380, "Incorrect size"); + +struct spdk_nvme_intel_temperature_page { + uint64_t current_temperature; + uint64_t shutdown_flag_last; + uint64_t shutdown_flag_life; + uint64_t highest_temperature; + uint64_t lowest_temperature; + uint64_t reserved[5]; + uint64_t specified_max_op_temperature; + uint64_t reserved2; + uint64_t specified_min_op_temperature; + uint64_t estimated_offset; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_intel_temperature_page) == 112, "Incorrect size"); + +struct spdk_nvme_intel_smart_attribute { + uint8_t code; + uint8_t reserved[2]; + uint8_t normalized_value; + uint8_t reserved2; + uint8_t raw_value[6]; + uint8_t reserved3; +}; + +struct __attribute__((packed)) spdk_nvme_intel_smart_information_page { + struct spdk_nvme_intel_smart_attribute attributes[13]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_intel_smart_information_page) == 156, "Incorrect size"); + +union spdk_nvme_intel_feat_power_governor { + uint32_t raw; + struct { + /** power governor setting : 00h = 25W 01h = 20W 02h = 10W */ + uint32_t power_governor_setting : 8; + uint32_t reserved : 24; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_intel_feat_power_governor) == 4, "Incorrect size"); + +union spdk_nvme_intel_feat_smbus_address { + uint32_t raw; + struct { + uint32_t reserved : 1; + uint32_t smbus_controller_address : 8; + uint32_t reserved2 : 23; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_intel_feat_smbus_address) == 4, "Incorrect size"); + +union spdk_nvme_intel_feat_led_pattern { + uint32_t raw; + struct { + uint32_t feature_options : 24; + uint32_t value : 8; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_intel_feat_led_pattern) == 4, "Incorrect size"); + +union spdk_nvme_intel_feat_reset_timed_workload_counters { + uint32_t raw; + struct { + /** + * Write Usage: 00 = NOP, 1 = Reset E2, E3,E4 counters; + * Read Usage: Not Supported + */ + uint32_t reset : 1; + uint32_t reserved : 31; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_intel_feat_reset_timed_workload_counters) == 4, + "Incorrect size"); + +union spdk_nvme_intel_feat_latency_tracking { + uint32_t raw; + struct { + /** + * Write Usage: + * 00h = Disable Latency Tracking (Default) + * 01h = Enable Latency Tracking + */ + uint32_t enable : 32; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_intel_feat_latency_tracking) == 4, "Incorrect size"); + +struct spdk_nvme_intel_marketing_description_page { + uint8_t marketing_product[512]; + /* Spec says this log page will only write 512 bytes, but there are some older FW + * versions that accidentally write 516 instead. So just pad this out to 4096 bytes + * to make sure users of this structure never end up overwriting unintended parts of + * memory. + */ + uint8_t reserved[3584]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_intel_marketing_description_page) == 4096, + "Incorrect size"); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/nvme_ocssd.h b/src/spdk/include/spdk/nvme_ocssd.h new file mode 100644 index 000000000..7ebb07991 --- /dev/null +++ b/src/spdk/include/spdk/nvme_ocssd.h @@ -0,0 +1,227 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * NVMe driver public API extension for Open-Channel + */ + +#ifndef SPDK_NVME_OCSSD_H +#define SPDK_NVME_OCSSD_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "spdk/nvme.h" +#include "spdk/nvme_ocssd_spec.h" + +/** + * \brief Determine if OpenChannel is supported by the given NVMe controller. + * \param ctrlr NVMe controller to check. + * + * \return true if support OpenChannel + */ +bool spdk_nvme_ctrlr_is_ocssd_supported(struct spdk_nvme_ctrlr *ctrlr); + +/** + * \brief Identify geometry of the given namespace. + * \param ctrlr NVMe controller to query. + * \param nsid Id of the given namesapce. + * \param payload The pointer to the payload buffer. + * \param payload_size The size of payload buffer. Shall be multiple of 4K. + * \param cb_fn Callback function to invoke when the feature has been retrieved. + * \param cb_arg Argument to pass to the callback function. + * + * \return 0 if successfully submitted, ENOMEM if resources could not be + * allocated for this request, EINVAL if wrong payload size. + * + */ +int spdk_nvme_ocssd_ctrlr_cmd_geometry(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, + void *payload, uint32_t payload_size, + spdk_nvme_cmd_cb cb_fn, void *cb_arg); + +/** + * \brief Submits a vector reset command to the specified NVMe namespace. + * + * \param ns NVMe namespace to submit the command + * \param qpair I/O queue pair to submit the request + * \param lba_list an array of LBAs for processing. + * LBAs must correspond to the start of chunks to reset. + * Must be allocated through spdk_dma_malloc() or its variants + * \param num_lbas number of LBAs stored in lba_list + * \param chunk_info an array of chunk info on DMA-able memory + * \param cb_fn callback function to invoke when the I/O is completed + * \param cb_arg argument to pass to the callback function + * + * \return 0 if successfully submitted, ENOMEM if an nvme_request + * structure cannot be allocated for the I/O request + */ +int spdk_nvme_ocssd_ns_cmd_vector_reset(struct spdk_nvme_ns *ns, + struct spdk_nvme_qpair *qpair, + uint64_t *lba_list, uint32_t num_lbas, + struct spdk_ocssd_chunk_information_entry *chunk_info, + spdk_nvme_cmd_cb cb_fn, void *cb_arg); + +/** + * \brief Submits a vector write command to the specified NVMe namespace. + * + * \param ns NVMe namespace to submit the command + * \param qpair I/O queue pair to submit the request + * \param buffer virtual address pointer to the data payload + * \param lba_list an array of LBAs for processing. + * Must be allocated through spdk_dma_malloc() or its variants + * \param num_lbas number of LBAs stored in lba_list + * \param cb_fn callback function to invoke when the I/O is completed + * \param cb_arg argument to pass to the callback function + * \param io_flags set flags, defined by the SPDK_OCSSD_IO_FLAGS_* entries + * in spdk/nvme_ocssd_spec.h, for this I/O. + * + * \return 0 if successfully submitted, ENOMEM if an nvme_request + * structure cannot be allocated for the I/O request + */ +int spdk_nvme_ocssd_ns_cmd_vector_write(struct spdk_nvme_ns *ns, + struct spdk_nvme_qpair *qpair, + void *buffer, + uint64_t *lba_list, uint32_t num_lbas, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, + uint32_t io_flags); + +/** + * \brief Submits a vector write command to the specified NVMe namespace. + * + * \param ns NVMe namespace to submit the command + * \param qpair I/O queue pair to submit the request + * \param buffer virtual address pointer to the data payload + * \param metadata virtual address pointer to the metadata payload, the length + * of metadata is specified by spdk_nvme_ns_get_md_size() + * \param lba_list an array of LBAs for processing. + * Must be allocated through spdk_dma_malloc() or its variants + * \param num_lbas number of LBAs stored in lba_list + * \param cb_fn callback function to invoke when the I/O is completed + * \param cb_arg argument to pass to the callback function + * \param io_flags set flags, defined by the SPDK_OCSSD_IO_FLAGS_* entries + * in spdk/nvme_ocssd_spec.h, for this I/O. + * + * \return 0 if successfully submitted, ENOMEM if an nvme_request + * structure cannot be allocated for the I/O request + */ +int spdk_nvme_ocssd_ns_cmd_vector_write_with_md(struct spdk_nvme_ns *ns, + struct spdk_nvme_qpair *qpair, + void *buffer, void *metadata, + uint64_t *lba_list, uint32_t num_lbas, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, + uint32_t io_flags); + +/** + * \brief Submits a vector read command to the specified NVMe namespace. + * + * \param ns NVMe namespace to submit the command + * \param qpair I/O queue pair to submit the request + * \param buffer virtual address pointer to the data payload + * \param lba_list an array of LBAs for processing. + * Must be allocated through spdk_dma_malloc() or its variants + * \param num_lbas number of LBAs stored in lba_list + * \param cb_fn callback function to invoke when the I/O is completed + * \param cb_arg argument to pass to the callback function + * \param io_flags set flags, defined by the SPDK_OCSSD_IO_FLAGS_* entries + * in spdk/nvme_ocssd_spec.h, for this I/O. + * + * \return 0 if successfully submitted, ENOMEM if an nvme_request + * structure cannot be allocated for the I/O request + */ +int spdk_nvme_ocssd_ns_cmd_vector_read(struct spdk_nvme_ns *ns, + struct spdk_nvme_qpair *qpair, + void *buffer, + uint64_t *lba_list, uint32_t num_lbas, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, + uint32_t io_flags); + +/** + * \brief Submits a vector read command to the specified NVMe namespace. + * + * \param ns NVMe namespace to submit the command + * \param qpair I/O queue pair to submit the request + * \param buffer virtual address pointer to the data payload + * \param metadata virtual address pointer to the metadata payload, the length + * of metadata is specified by spdk_nvme_ns_get_md_size() + * \param lba_list an array of LBAs for processing. + * Must be allocated through spdk_dma_malloc() or its variants + * \param num_lbas number of LBAs stored in lba_list + * \param cb_fn callback function to invoke when the I/O is completed + * \param cb_arg argument to pass to the callback function + * \param io_flags set flags, defined by the SPDK_OCSSD_IO_FLAGS_* entries + * in spdk/nvme_ocssd_spec.h, for this I/O. + * + * \return 0 if successfully submitted, ENOMEM if an nvme_request + * structure cannot be allocated for the I/O request + */ +int spdk_nvme_ocssd_ns_cmd_vector_read_with_md(struct spdk_nvme_ns *ns, + struct spdk_nvme_qpair *qpair, + void *buffer, void *metadata, + uint64_t *lba_list, uint32_t num_lbas, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, + uint32_t io_flags); + +/** + * \brief Submits a vector copy command to the specified NVMe namespace. + * + * \param ns NVMe namespace to submit the command + * \param qpair I/O queue pair to submit the request + * \param dst_lba_list an array of destination LBAs for processing. + * Must be allocated through spdk_dma_malloc() or its variants + * \param src_lba_list an array of source LBAs for processing. + * Must be allocated through spdk_dma_malloc() or its variants + * \param num_lbas number of LBAs stored in src_lba_list and dst_lba_list + * \param cb_fn callback function to invoke when the I/O is completed + * \param cb_arg argument to pass to the callback function + * \param io_flags set flags, defined by the SPDK_OCSSD_IO_FLAGS_* entries + * in spdk/nvme_ocssd_spec.h, for this I/O. + * + * \return 0 if successfully submitted, ENOMEM if an nvme_request + * structure cannot be allocated for the I/O request + */ +int spdk_nvme_ocssd_ns_cmd_vector_copy(struct spdk_nvme_ns *ns, + struct spdk_nvme_qpair *qpair, + uint64_t *dst_lba_list, uint64_t *src_lba_list, + uint32_t num_lbas, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, + uint32_t io_flags); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/nvme_ocssd_spec.h b/src/spdk/include/spdk/nvme_ocssd_spec.h new file mode 100644 index 000000000..21e9bcefc --- /dev/null +++ b/src/spdk/include/spdk/nvme_ocssd_spec.h @@ -0,0 +1,414 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * Open-Channel specification definitions + */ + +#ifndef SPDK_NVME_OCSSD_SPEC_H +#define SPDK_NVME_OCSSD_SPEC_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "spdk/assert.h" +#include "spdk/nvme_spec.h" + +/** A maximum number of LBAs that can be issued by vector I/O commands */ +#define SPDK_NVME_OCSSD_MAX_LBAL_ENTRIES 64 + +struct spdk_ocssd_dev_lba_fmt { + /** Contiguous number of bits assigned to Group addressing */ + uint8_t grp_len; + + /** Contiguous number of bits assigned to PU addressing */ + uint8_t pu_len; + + /** Contiguous number of bits assigned to Chunk addressing */ + uint8_t chk_len; + + /** Contiguous number of bits assigned to logical blocks within Chunk */ + uint8_t lbk_len; + + uint8_t reserved[4]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_ocssd_dev_lba_fmt) == 8, "Incorrect size"); + +struct spdk_ocssd_geometry_data { + /** Major Version Number */ + uint8_t mjr; + + /** Minor Version Number */ + uint8_t mnr; + + uint8_t reserved1[6]; + + /** LBA format */ + struct spdk_ocssd_dev_lba_fmt lbaf; + + /** Media and Controller Capabilities */ + struct { + /* Supports the Vector Chunk Copy I/O Command */ + uint32_t vec_chk_cpy : 1; + + /* Supports multiple resets when a chunk is in its free state */ + uint32_t multi_reset : 1; + + uint32_t reserved : 30; + } mccap; + + uint8_t reserved2[12]; + + /** Wear-level Index Delta Threshold */ + uint8_t wit; + + uint8_t reserved3[31]; + + /** Number of Groups */ + uint16_t num_grp; + + /** Number of parallel units per group */ + uint16_t num_pu; + + /** Number of chunks per parallel unit */ + uint32_t num_chk; + + /** Chunk Size */ + uint32_t clba; + + uint8_t reserved4[52]; + + /** Minimum Write Size */ + uint32_t ws_min; + + /** Optimal Write Size */ + uint32_t ws_opt; + + /** Cache Minimum Write Size Units */ + uint32_t mw_cunits; + + /** Maximum Open Chunks */ + uint32_t maxoc; + + /** Maximum Open Chunks per PU */ + uint32_t maxocpu; + + uint8_t reserved5[44]; + + /** tRD Typical */ + uint32_t trdt; + + /** tRD Max */ + uint32_t trdm; + + /** tWR Typical */ + uint32_t twrt; + + /** tWR Max */ + uint32_t twrm; + + /** tCRS Typical */ + uint32_t tcrst; + + /** tCRS Max */ + uint32_t tcrsm; + + /** bytes 216-255: reserved for performance related metrics */ + uint8_t reserved6[40]; + + uint8_t reserved7[3071 - 255]; + + /** bytes 3072-4095: Vendor Specific */ + uint8_t vs[4095 - 3071]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_ocssd_geometry_data) == 4096, "Incorrect size"); + +struct spdk_ocssd_chunk_information_entry { + /** Chunk State */ + struct { + /** if set to 1 chunk is free */ + uint8_t free : 1; + + /** if set to 1 chunk is closed */ + uint8_t closed : 1; + + /** if set to 1 chunk is open */ + uint8_t open : 1; + + /** if set to 1 chunk is offline */ + uint8_t offline : 1; + + uint8_t reserved : 4; + } cs; + + /** Chunk Type */ + struct { + /** If set to 1 chunk must be written sequentially */ + uint8_t seq_write : 1; + + /** If set to 1 chunk allows random writes */ + uint8_t rnd_write : 1; + + uint8_t reserved1 : 2; + + /** + * If set to 1 chunk deviates from the chunk size reported + * in identify geometry command. + */ + uint8_t size_deviate : 1; + + uint8_t reserved2 : 3; + } ct; + + /** Wear-level Index */ + uint8_t wli; + + uint8_t reserved[5]; + + /** Starting LBA */ + uint64_t slba; + + /** Number of blocks in chunk */ + uint64_t cnlb; + + /** Write Pointer */ + uint64_t wp; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_ocssd_chunk_information_entry) == 32, "Incorrect size"); + +struct spdk_ocssd_chunk_notification_entry { + + /** + * This is a 64-bit incrementing notification count, indicating a + * unique identifier for this notification. The counter begins at 1h + * and is incremented for each unique event + */ + uint64_t nc; + + /** This field points to the chunk that has its state updated */ + uint64_t lba; + + /** + * This field indicates the namespace id that the event is associated + * with + */ + uint32_t nsid; + + /** Field that indicate the state of the block */ + struct { + + /** + * If set to 1, then the error rate of the chunk has been + * changed to low + */ + uint8_t error_rate_low : 1; + + /** + * If set to 1, then the error rate of the chunk has been + * changed to medium + */ + uint8_t error_rate_medium : 1; + + /** + * If set to 1, then the error rate of the chunk has been + * changed to high + */ + uint8_t error_rate_high : 1; + + /** + * If set to 1, then the error rate of the chunk has been + * changed to unrecoverable + */ + uint8_t unrecoverable : 1; + + /** + * If set to 1, then the chunk has been refreshed by the + * device + */ + uint8_t refreshed : 1; + + uint8_t rsvd : 3; + + /** + * If set to 1 then the chunk's wear-level index is outside + * the average wear-level index threshold defined by the + * controller + */ + uint8_t wit_exceeded : 1; + + uint8_t rsvd2 : 7; + } state; + + /** + * The address provided is covering either logical block, chunk, or + * parallel unit + */ + struct { + + /** If set to 1, the LBA covers the logical block */ + uint8_t lblk : 1; + + /** If set to 1, the LBA covers the respecting chunk */ + uint8_t chunk : 1; + + /** + * If set to 1, the LBA covers the respecting parallel unit + * including all chunks + */ + uint8_t pu : 1; + + uint8_t rsvd : 5; + } mask; + + uint8_t rsvd[9]; + + /** + * This field indicates the number of logical chunks to be written. + * This is a 0's based value. This field is only valid if mask bit 0 is + * set. The number of blocks addressed shall not be outside the boundary + * of the specified chunk. + */ + uint16_t nlb; + + uint8_t rsvd2[30]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_ocssd_chunk_notification_entry) == 64, "Incorrect size"); + +/** + * Vector completion queue entry + */ +struct spdk_ocssd_vector_cpl { + /* dword 0,1 */ + uint64_t lba_status; /* completion status bit array */ + + /* dword 2 */ + uint16_t sqhd; /* submission queue head pointer */ + uint16_t sqid; /* submission queue identifier */ + + /* dword 3 */ + uint16_t cid; /* command identifier */ + struct spdk_nvme_status status; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_ocssd_vector_cpl) == 16, "Incorrect size"); + +/** + * OCSSD admin command set opcodes + */ +enum spdk_ocssd_admin_opcode { + SPDK_OCSSD_OPC_GEOMETRY = 0xE2 +}; + +/** + * OCSSD I/O command set opcodes + */ +enum spdk_ocssd_io_opcode { + SPDK_OCSSD_OPC_VECTOR_RESET = 0x90, + SPDK_OCSSD_OPC_VECTOR_WRITE = 0x91, + SPDK_OCSSD_OPC_VECTOR_READ = 0x92, + SPDK_OCSSD_OPC_VECTOR_COPY = 0x93 +}; + +/** + * Log page identifiers for SPDK_NVME_OPC_GET_LOG_PAGE + */ +enum spdk_ocssd_log_page { + /** Chunk Information */ + SPDK_OCSSD_LOG_CHUNK_INFO = 0xCA, + + /** Chunk Notification Log */ + SPDK_OCSSD_LOG_CHUNK_NOTIFICATION = 0xD0, +}; + +/** + * OCSSD feature identifiers + * Defines OCSSD specific features that may be configured with Set Features and + * retrieved with Get Features. + */ +enum spdk_ocssd_feat { + /** Media Feedback feature identifier */ + SPDK_OCSSD_FEAT_MEDIA_FEEDBACK = 0xCA +}; + +/** + * OCSSD media error status codes extension. + * Additional error codes for status code type “2h” (media errors) + */ +enum spdk_ocssd_media_error_status_code { + /** + * The chunk was either marked offline by the reset or the state + * of the chunk is already offline. + */ + SPDK_OCSSD_SC_OFFLINE_CHUNK = 0xC0, + + /** + * Invalid reset if chunk state is either “Free” or “Open” + */ + SPDK_OCSSD_SC_INVALID_RESET = 0xC1, + + /** + * Write failed, chunk remains open. + * Host should proceed to write to next write unit. + */ + SPDK_OCSSD_SC_WRITE_FAIL_WRITE_NEXT_UNIT = 0xF0, + + /** + * The writes ended prematurely. The chunk state is set to closed. + * The host can read up to the value of the write pointer. + */ + SPDK_OCSSD_SC_WRITE_FAIL_CHUNK_EARLY_CLOSE = 0xF1, + + /** + * The write corresponds to a write out of order within an open + * chunk or the write is to a closed or offline chunk. + */ + SPDK_OCSSD_SC_OUT_OF_ORDER_WRITE = 0xF2, + + /** + * The data retrieved is nearing its limit for reading. + * The limit is vendor specific, and only provides a hint + * to the host that should refresh its data in the future. + */ + SPDK_OCSSD_SC_READ_HIGH_ECC = 0xD0, +}; + +#define SPDK_OCSSD_IO_FLAGS_LIMITED_RETRY (1U << 31) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/nvme_spec.h b/src/spdk/include/spdk/nvme_spec.h new file mode 100644 index 000000000..281ac500b --- /dev/null +++ b/src/spdk/include/spdk/nvme_spec.h @@ -0,0 +1,2945 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * NVMe specification definitions + */ + +#ifndef SPDK_NVME_SPEC_H +#define SPDK_NVME_SPEC_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "spdk/assert.h" + +/** + * Use to mark a command to apply to all namespaces, or to retrieve global + * log pages. + */ +#define SPDK_NVME_GLOBAL_NS_TAG ((uint32_t)0xFFFFFFFF) + +#define SPDK_NVME_MAX_IO_QUEUES (65535) + +#define SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES 2 +#define SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES 4096 + +#define SPDK_NVME_IO_QUEUE_MIN_ENTRIES 2 +#define SPDK_NVME_IO_QUEUE_MAX_ENTRIES 65536 + +/** + * Indicates the maximum number of range sets that may be specified + * in the dataset management command. + */ +#define SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES 256 + +/** + * Maximum number of blocks that may be specified in a single dataset management range. + */ +#define SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS 0xFFFFFFFFu + +union spdk_nvme_cap_register { + uint64_t raw; + struct { + /** maximum queue entries supported */ + uint32_t mqes : 16; + + /** contiguous queues required */ + uint32_t cqr : 1; + + /** arbitration mechanism supported */ + uint32_t ams : 2; + + uint32_t reserved1 : 5; + + /** timeout */ + uint32_t to : 8; + + /** doorbell stride */ + uint32_t dstrd : 4; + + /** NVM subsystem reset supported */ + uint32_t nssrs : 1; + + /** command sets supported */ + uint32_t css : 8; + + /** boot partition support */ + uint32_t bps : 1; + + uint32_t reserved2 : 2; + + /** memory page size minimum */ + uint32_t mpsmin : 4; + + /** memory page size maximum */ + uint32_t mpsmax : 4; + + uint32_t reserved3 : 8; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_cap_register) == 8, "Incorrect size"); + +/** + * I/O Command Set Selected + * + * Only a single command set is defined as of NVMe 1.3 (NVM). Later, it became + * possible to disable I/O Command Sets, that is, configuring it to only use the + * Admin Command Set. With 1.4c and Namespace Types, additional I/O Command Sets + * are available. + */ +enum spdk_nvme_cc_css { + SPDK_NVME_CC_CSS_NVM = 0x0, /**< NVM command set */ + SPDK_NVME_CC_CSS_IOCS = 0x6, /**< One or more I/O command sets */ + SPDK_NVME_CC_CSS_NOIO = 0x7, /**< No I/O, only admin */ +}; + +#define SPDK_NVME_CAP_CSS_NVM (1u << SPDK_NVME_CC_CSS_NVM) /**< NVM command set supported */ +#define SPDK_NVME_CAP_CSS_IOCS (1u << SPDK_NVME_CC_CSS_IOCS) /**< One or more I/O Command sets supported */ +#define SPDK_NVME_CAP_CSS_NOIO (1u << SPDK_NVME_CC_CSS_NOIO) /**< No I/O, only admin */ + +union spdk_nvme_cc_register { + uint32_t raw; + struct { + /** enable */ + uint32_t en : 1; + + uint32_t reserved1 : 3; + + /** i/o command set selected */ + uint32_t css : 3; + + /** memory page size */ + uint32_t mps : 4; + + /** arbitration mechanism selected */ + uint32_t ams : 3; + + /** shutdown notification */ + uint32_t shn : 2; + + /** i/o submission queue entry size */ + uint32_t iosqes : 4; + + /** i/o completion queue entry size */ + uint32_t iocqes : 4; + + uint32_t reserved2 : 8; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_cc_register) == 4, "Incorrect size"); + +enum spdk_nvme_shn_value { + SPDK_NVME_SHN_NORMAL = 0x1, + SPDK_NVME_SHN_ABRUPT = 0x2, +}; + +union spdk_nvme_csts_register { + uint32_t raw; + struct { + /** ready */ + uint32_t rdy : 1; + + /** controller fatal status */ + uint32_t cfs : 1; + + /** shutdown status */ + uint32_t shst : 2; + + /** NVM subsystem reset occurred */ + uint32_t nssro : 1; + + /** Processing paused */ + uint32_t pp : 1; + + uint32_t reserved1 : 26; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_csts_register) == 4, "Incorrect size"); + +enum spdk_nvme_shst_value { + SPDK_NVME_SHST_NORMAL = 0x0, + SPDK_NVME_SHST_OCCURRING = 0x1, + SPDK_NVME_SHST_COMPLETE = 0x2, +}; + +union spdk_nvme_aqa_register { + uint32_t raw; + struct { + /** admin submission queue size */ + uint32_t asqs : 12; + + uint32_t reserved1 : 4; + + /** admin completion queue size */ + uint32_t acqs : 12; + + uint32_t reserved2 : 4; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_aqa_register) == 4, "Incorrect size"); + +union spdk_nvme_vs_register { + uint32_t raw; + struct { + /** indicates the tertiary version */ + uint32_t ter : 8; + /** indicates the minor version */ + uint32_t mnr : 8; + /** indicates the major version */ + uint32_t mjr : 16; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_vs_register) == 4, "Incorrect size"); + +/** Generate raw version in the same format as \ref spdk_nvme_vs_register for comparison. */ +#define SPDK_NVME_VERSION(mjr, mnr, ter) \ + (((uint32_t)(mjr) << 16) | \ + ((uint32_t)(mnr) << 8) | \ + (uint32_t)(ter)) + +/* Test that the shifts are correct */ +SPDK_STATIC_ASSERT(SPDK_NVME_VERSION(1, 0, 0) == 0x00010000, "version macro error"); +SPDK_STATIC_ASSERT(SPDK_NVME_VERSION(1, 2, 1) == 0x00010201, "version macro error"); + +union spdk_nvme_cmbloc_register { + uint32_t raw; + struct { + /** indicator of BAR which contains controller memory buffer(CMB) */ + uint32_t bir : 3; + uint32_t reserved1 : 9; + /** offset of CMB in multiples of the size unit */ + uint32_t ofst : 20; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_cmbloc_register) == 4, "Incorrect size"); + +union spdk_nvme_cmbsz_register { + uint32_t raw; + struct { + /** support submission queues in CMB */ + uint32_t sqs : 1; + /** support completion queues in CMB */ + uint32_t cqs : 1; + /** support PRP and SGLs lists in CMB */ + uint32_t lists : 1; + /** support read data and metadata in CMB */ + uint32_t rds : 1; + /** support write data and metadata in CMB */ + uint32_t wds : 1; + uint32_t reserved1 : 3; + /** indicates the granularity of the size unit */ + uint32_t szu : 4; + /** size of CMB in multiples of the size unit */ + uint32_t sz : 20; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_cmbsz_register) == 4, "Incorrect size"); + +union spdk_nvme_cmbmsc_register { + uint64_t raw; + struct { + /** capability registers enabled */ + uint64_t cre : 1; + + /** controller memory space enable */ + uint64_t cmse : 1; + + uint64_t reserved : 10; + + /** controller base address */ + uint64_t cba : 52; + } bits; + +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_cmbmsc_register) == 8, "Incorrect size"); + +union spdk_nvme_cmbsts_register { + uint32_t raw; + struct { + /** controller base address invalid */ + uint32_t cbai : 1; + + uint32_t reserved : 31; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_cmbsts_register) == 4, "Incorrect size"); + +/** Boot partition information */ +union spdk_nvme_bpinfo_register { + uint32_t raw; + struct { + /** Boot partition size in 128KB multiples */ + uint32_t bpsz : 15; + + uint32_t reserved1 : 9; + + /** + * Boot read status + * 00b: No Boot Partition read operation requested + * 01b: Boot Partition read in progress + * 10b: Boot Partition read completed successfully + * 11b: Error completing Boot Partition read + */ + uint32_t brs : 2; + + uint32_t reserved2 : 5; + + /** Active Boot Partition ID */ + uint32_t abpid : 1; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_bpinfo_register) == 4, "Incorrect size"); + +/** Boot partition read select */ +union spdk_nvme_bprsel_register { + uint32_t raw; + struct { + /** Boot partition read size in multiples of 4KB */ + uint32_t bprsz : 10; + + /** Boot partition read offset in multiples of 4KB */ + uint32_t bprof : 20; + + uint32_t reserved : 1; + + /** Boot Partition Identifier */ + uint32_t bpid : 1; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_bprsel_register) == 4, "Incorrect size"); + +/** Value to write to NSSR to indicate a NVM subsystem reset ("NVMe") */ +#define SPDK_NVME_NSSR_VALUE 0x4E564D65 + +struct spdk_nvme_registers { + /** controller capabilities */ + union spdk_nvme_cap_register cap; + + /** version of NVMe specification */ + union spdk_nvme_vs_register vs; + uint32_t intms; /* interrupt mask set */ + uint32_t intmc; /* interrupt mask clear */ + + /** controller configuration */ + union spdk_nvme_cc_register cc; + + uint32_t reserved1; + union spdk_nvme_csts_register csts; /* controller status */ + uint32_t nssr; /* NVM subsystem reset */ + + /** admin queue attributes */ + union spdk_nvme_aqa_register aqa; + + uint64_t asq; /* admin submission queue base addr */ + uint64_t acq; /* admin completion queue base addr */ + /** controller memory buffer location */ + union spdk_nvme_cmbloc_register cmbloc; + /** controller memory buffer size */ + union spdk_nvme_cmbsz_register cmbsz; + + /** boot partition information */ + union spdk_nvme_bpinfo_register bpinfo; + + /** boot partition read select */ + union spdk_nvme_bprsel_register bprsel; + + /** boot partition memory buffer location (must be 4KB aligned) */ + uint64_t bpmbl; + + /** controller memory buffer memory space control */ + union spdk_nvme_cmbmsc_register cmbmsc; + + /** controller memory buffer status */ + union spdk_nvme_cmbsts_register cmbsts; + + uint32_t reserved3[0x3e9]; + + struct { + uint32_t sq_tdbl; /* submission queue tail doorbell */ + uint32_t cq_hdbl; /* completion queue head doorbell */ + } doorbell[1]; +}; + +/* NVMe controller register space offsets */ +SPDK_STATIC_ASSERT(0x00 == offsetof(struct spdk_nvme_registers, cap), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(0x08 == offsetof(struct spdk_nvme_registers, vs), "Incorrect register offset"); +SPDK_STATIC_ASSERT(0x0C == offsetof(struct spdk_nvme_registers, intms), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(0x10 == offsetof(struct spdk_nvme_registers, intmc), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(0x14 == offsetof(struct spdk_nvme_registers, cc), "Incorrect register offset"); +SPDK_STATIC_ASSERT(0x1C == offsetof(struct spdk_nvme_registers, csts), "Incorrect register offset"); +SPDK_STATIC_ASSERT(0x20 == offsetof(struct spdk_nvme_registers, nssr), "Incorrect register offset"); +SPDK_STATIC_ASSERT(0x24 == offsetof(struct spdk_nvme_registers, aqa), "Incorrect register offset"); +SPDK_STATIC_ASSERT(0x28 == offsetof(struct spdk_nvme_registers, asq), "Incorrect register offset"); +SPDK_STATIC_ASSERT(0x30 == offsetof(struct spdk_nvme_registers, acq), "Incorrect register offset"); +SPDK_STATIC_ASSERT(0x38 == offsetof(struct spdk_nvme_registers, cmbloc), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(0x3C == offsetof(struct spdk_nvme_registers, cmbsz), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(0x40 == offsetof(struct spdk_nvme_registers, bpinfo), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(0x44 == offsetof(struct spdk_nvme_registers, bprsel), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(0x48 == offsetof(struct spdk_nvme_registers, bpmbl), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(0x50 == offsetof(struct spdk_nvme_registers, cmbmsc), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(0x58 == offsetof(struct spdk_nvme_registers, cmbsts), + "Incorrect register offset"); + +enum spdk_nvme_sgl_descriptor_type { + SPDK_NVME_SGL_TYPE_DATA_BLOCK = 0x0, + SPDK_NVME_SGL_TYPE_BIT_BUCKET = 0x1, + SPDK_NVME_SGL_TYPE_SEGMENT = 0x2, + SPDK_NVME_SGL_TYPE_LAST_SEGMENT = 0x3, + SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK = 0x4, + SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK = 0x5, + /* 0x6 - 0xE reserved */ + SPDK_NVME_SGL_TYPE_VENDOR_SPECIFIC = 0xF +}; + +enum spdk_nvme_sgl_descriptor_subtype { + SPDK_NVME_SGL_SUBTYPE_ADDRESS = 0x0, + SPDK_NVME_SGL_SUBTYPE_OFFSET = 0x1, + SPDK_NVME_SGL_SUBTYPE_TRANSPORT = 0xa, +}; + +struct __attribute__((packed)) spdk_nvme_sgl_descriptor { + uint64_t address; + union { + struct { + uint8_t reserved[7]; + uint8_t subtype : 4; + uint8_t type : 4; + } generic; + + struct { + uint32_t length; + uint8_t reserved[3]; + uint8_t subtype : 4; + uint8_t type : 4; + } unkeyed; + + struct { + uint64_t length : 24; + uint64_t key : 32; + uint64_t subtype : 4; + uint64_t type : 4; + } keyed; + }; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_sgl_descriptor) == 16, "Incorrect size"); + +enum spdk_nvme_psdt_value { + SPDK_NVME_PSDT_PRP = 0x0, + SPDK_NVME_PSDT_SGL_MPTR_CONTIG = 0x1, + SPDK_NVME_PSDT_SGL_MPTR_SGL = 0x2, + SPDK_NVME_PSDT_RESERVED = 0x3 +}; + +/** + * Submission queue priority values for Create I/O Submission Queue Command. + * + * Only valid for weighted round robin arbitration method. + */ +enum spdk_nvme_qprio { + SPDK_NVME_QPRIO_URGENT = 0x0, + SPDK_NVME_QPRIO_HIGH = 0x1, + SPDK_NVME_QPRIO_MEDIUM = 0x2, + SPDK_NVME_QPRIO_LOW = 0x3 +}; + +#define SPDK_NVME_CREATE_IO_SQ_QPRIO_MASK 0x3 + +/** + * Optional Arbitration Mechanism Supported by the controller. + * + * Two bits for CAP.AMS (18:17) field are set to '1' when the controller supports. + * There is no bit for AMS_RR where all controllers support and set to 0x0 by default. + */ +enum spdk_nvme_cap_ams { + SPDK_NVME_CAP_AMS_WRR = 0x1, /**< weighted round robin */ + SPDK_NVME_CAP_AMS_VS = 0x2, /**< vendor specific */ +}; + +/** + * Arbitration Mechanism Selected to the controller. + * + * Value 0x2 to 0x6 is reserved. + */ +enum spdk_nvme_cc_ams { + SPDK_NVME_CC_AMS_RR = 0x0, /**< default round robin */ + SPDK_NVME_CC_AMS_WRR = 0x1, /**< weighted round robin */ + SPDK_NVME_CC_AMS_VS = 0x7, /**< vendor specific */ +}; + +/** + * Fused Operation + */ +enum spdk_nvme_cmd_fuse { + SPDK_NVME_CMD_FUSE_NONE = 0x0, /**< normal operation */ + SPDK_NVME_CMD_FUSE_FIRST = 0x1, /**< fused operation, first command */ + SPDK_NVME_CMD_FUSE_SECOND = 0x2, /**< fused operation, second command */ + SPDK_NVME_CMD_FUSE_MASK = 0x3, /**< fused operation flags mask */ +}; + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_ARBITRATION + */ +union spdk_nvme_feat_arbitration { + uint32_t raw; + struct { + /** Arbitration Burst */ + uint32_t ab : 3; + + uint32_t reserved : 5; + + /** Low Priority Weight */ + uint32_t lpw : 8; + + /** Medium Priority Weight */ + uint32_t mpw : 8; + + /** High Priority Weight */ + uint32_t hpw : 8; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_arbitration) == 4, "Incorrect size"); + +#define SPDK_NVME_ARBITRATION_BURST_UNLIMITED 0x7 + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_POWER_MANAGEMENT + */ +union spdk_nvme_feat_power_management { + uint32_t raw; + struct { + /** Power State */ + uint32_t ps : 5; + + /** Workload Hint */ + uint32_t wh : 3; + + uint32_t reserved : 24; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_power_management) == 4, "Incorrect size"); + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_LBA_RANGE_TYPE + */ +union spdk_nvme_feat_lba_range_type { + uint32_t raw; + struct { + /** Number of LBA Ranges */ + uint32_t num : 6; + + uint32_t reserved : 26; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_lba_range_type) == 4, "Incorrect size"); + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD + */ +union spdk_nvme_feat_temperature_threshold { + uint32_t raw; + struct { + /** Temperature Threshold */ + uint32_t tmpth : 16; + + /** Threshold Temperature Select */ + uint32_t tmpsel : 4; + + /** Threshold Type Select */ + uint32_t thsel : 2; + + uint32_t reserved : 10; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_temperature_threshold) == 4, "Incorrect size"); + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_ERROR_RECOVERY + */ +union spdk_nvme_feat_error_recovery { + uint32_t raw; + struct { + /** Time Limited Error Recovery */ + uint32_t tler : 16; + + /** Deallocated or Unwritten Logical Block Error Enable */ + uint32_t dulbe : 1; + + uint32_t reserved : 15; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_error_recovery) == 4, "Incorrect size"); + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE + */ +union spdk_nvme_feat_volatile_write_cache { + uint32_t raw; + struct { + /** Volatile Write Cache Enable */ + uint32_t wce : 1; + + uint32_t reserved : 31; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_volatile_write_cache) == 4, "Incorrect size"); + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_NUMBER_OF_QUEUES + */ +union spdk_nvme_feat_number_of_queues { + uint32_t raw; + struct { + /** Number of I/O Submission Queues Requested */ + uint32_t nsqr : 16; + + /** Number of I/O Completion Queues Requested */ + uint32_t ncqr : 16; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_number_of_queues) == 4, "Incorrect size"); + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_INTERRUPT_COALESCING + */ +union spdk_nvme_feat_interrupt_coalescing { + uint32_t raw; + struct { + /** Aggregation Threshold */ + uint32_t thr : 8; + + /** Aggregration time */ + uint32_t time : 8; + + uint32_t reserved : 16; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_interrupt_coalescing) == 4, "Incorrect size"); + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION + */ +union spdk_nvme_feat_interrupt_vector_configuration { + uint32_t raw; + struct { + /** Interrupt Vector */ + uint32_t iv : 16; + + /** Coalescing Disable */ + uint32_t cd : 1; + + uint32_t reserved : 15; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_interrupt_vector_configuration) == 4, + "Incorrect size"); + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_WRITE_ATOMICITY + */ +union spdk_nvme_feat_write_atomicity { + uint32_t raw; + struct { + /** Disable Normal */ + uint32_t dn : 1; + + uint32_t reserved : 31; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_write_atomicity) == 4, "Incorrect size"); + +union spdk_nvme_critical_warning_state { + uint8_t raw; + + struct { + uint8_t available_spare : 1; + uint8_t temperature : 1; + uint8_t device_reliability : 1; + uint8_t read_only : 1; + uint8_t volatile_memory_backup : 1; + uint8_t reserved : 3; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_critical_warning_state) == 1, "Incorrect size"); + +/** + * Data used by Set Features / Get Features \ref SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION + */ +union spdk_nvme_feat_async_event_configuration { + uint32_t raw; + struct { + union spdk_nvme_critical_warning_state crit_warn; + uint32_t ns_attr_notice : 1; + uint32_t fw_activation_notice : 1; + uint32_t telemetry_log_notice : 1; + uint32_t reserved : 21; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_async_event_configuration) == 4, "Incorrect size"); + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION + */ +union spdk_nvme_feat_autonomous_power_state_transition { + uint32_t raw; + struct { + /** Autonomous Power State Transition Enable */ + uint32_t apste : 1; + + uint32_t reserved : 31; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_autonomous_power_state_transition) == 4, + "Incorrect size"); + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_HOST_MEM_BUFFER + */ +union spdk_nvme_feat_host_mem_buffer { + uint32_t raw; + struct { + /** Enable Host Memory */ + uint32_t ehm : 1; + + /** Memory Return */ + uint32_t mr : 1; + + uint32_t reserved : 30; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_host_mem_buffer) == 4, "Incorrect size"); + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_KEEP_ALIVE_TIMER + */ +union spdk_nvme_feat_keep_alive_timer { + uint32_t raw; + struct { + /** Keep Alive Timeout */ + uint32_t kato : 32; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_keep_alive_timer) == 4, "Incorrect size"); + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_HOST_CONTROLLED_THERMAL_MANAGEMENT + */ +union spdk_nvme_feat_host_controlled_thermal_management { + uint32_t raw; + struct { + /** Thermal Management Temperature 2 */ + uint32_t tmt2 : 16; + + /** Thermal Management Temperature 1 */ + uint32_t tmt1 : 16; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_host_controlled_thermal_management) == 4, + "Incorrect size"); + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_NON_OPERATIONAL_POWER_STATE_CONFIG + */ +union spdk_nvme_feat_non_operational_power_state_config { + uint32_t raw; + struct { + /** Non-Operational Power State Permissive Mode Enable */ + uint32_t noppme : 1; + + uint32_t reserved : 31; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_non_operational_power_state_config) == 4, + "Incorrect size"); + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_SOFTWARE_PROGRESS_MARKER + */ +union spdk_nvme_feat_software_progress_marker { + uint32_t raw; + struct { + /** Pre-boot Software Load Count */ + uint32_t pbslc : 8; + + uint32_t reserved : 24; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_software_progress_marker) == 4, "Incorrect size"); + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_HOST_IDENTIFIER + */ +union spdk_nvme_feat_host_identifier { + uint32_t raw; + struct { + /** Enable Extended Host Identifier */ + uint32_t exhid : 1; + + uint32_t reserved : 31; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_host_identifier) == 4, "Incorrect size"); + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_HOST_RESERVE_MASK + */ +union spdk_nvme_feat_reservation_notification_mask { + uint32_t raw; + struct { + uint32_t reserved1 : 1; + /* Mask Registration Preempted Notification */ + uint32_t regpre : 1; + /* Mask Reservation Released Notification */ + uint32_t resrel : 1; + /* Mask Reservation Preempted Notification */ + uint32_t respre : 1; + uint32_t reserved2 : 28; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_reservation_notification_mask) == 4, + "Incorrect size"); + +/** + * Data used by Set Features/Get Features \ref SPDK_NVME_FEAT_HOST_RESERVE_PERSIST + */ +union spdk_nvme_feat_reservation_persistence { + uint32_t raw; + struct { + /* Persist Through Power Loss */ + uint32_t ptpl : 1; + uint32_t reserved : 31; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_feat_reservation_persistence) == 4, "Incorrect size"); + +union spdk_nvme_cmd_cdw10 { + uint32_t raw; + struct { + /* Controller or Namespace Structure */ + uint32_t cns : 8; + uint32_t reserved : 8; + /* Controller Identifier */ + uint32_t cntid : 16; + } identify; + + struct { + /* Log Page Identifier */ + uint32_t lid : 8; + /* Log Specific Field */ + uint32_t lsp : 4; + uint32_t reserved : 3; + /* Retain Asynchronous Event */ + uint32_t rae : 1; + /* Number of Dwords Lower */ + uint32_t numdl : 16; + } get_log_page; + + struct { + /* Submission Queue Identifier */ + uint32_t sqid : 16; + /* Command Identifier */ + uint32_t cid : 16; + } abort; + + struct { + /* NVMe Security Specific Field */ + uint32_t nssf : 8; + /* SP Specific 0 */ + uint32_t spsp0 : 8; + /* SP Specific 1 */ + uint32_t spsp1 : 8; + /* Security Protocol */ + uint32_t secp : 8; + } sec_send_recv; + + struct { + /* Queue Identifier */ + uint32_t qid : 16; + /* Queue Size */ + uint32_t qsize : 16; + } create_io_q; + + struct { + /* Queue Identifier */ + uint32_t qid : 16; + uint32_t reserved : 16; + } delete_io_q; + + struct { + /* Feature Identifier */ + uint32_t fid : 8; + /* Select */ + uint32_t sel : 3; + uint32_t reserved : 21; + } get_features; + + struct { + /* Feature Identifier */ + uint32_t fid : 8; + uint32_t reserved : 23; + /* Save */ + uint32_t sv : 1; + } set_features; + + struct { + /* Select */ + uint32_t sel : 4; + uint32_t reserved : 28; + } ns_attach; + + struct { + /* Select */ + uint32_t sel : 4; + uint32_t reserved : 28; + } ns_manage; + + struct { + /* Number of Ranges */ + uint32_t nr : 8; + uint32_t reserved : 24; + } dsm; + + struct { + /* Reservation Register Action */ + uint32_t rrega : 3; + /* Ignore Existing Key */ + uint32_t iekey : 1; + uint32_t reserved : 26; + /* Change Persist Through Power Loss State */ + uint32_t cptpl : 2; + } resv_register; + + struct { + /* Reservation Release Action */ + uint32_t rrela : 3; + /* Ignore Existing Key */ + uint32_t iekey : 1; + uint32_t reserved1 : 4; + /* Reservation Type */ + uint32_t rtype : 8; + uint32_t reserved2 : 16; + } resv_release; + + struct { + /* Reservation Acquire Action */ + uint32_t racqa : 3; + /* Ignore Existing Key */ + uint32_t iekey : 1; + uint32_t reserved1 : 4; + /* Reservation Type */ + uint32_t rtype : 8; + uint32_t reserved2 : 16; + } resv_acquire; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_cmd_cdw10) == 4, "Incorrect size"); + +union spdk_nvme_cmd_cdw11 { + uint32_t raw; + + struct { + /* Physically Contiguous */ + uint32_t pc : 1; + /* Queue Priority */ + uint32_t qprio : 2; + uint32_t reserved : 13; + /* Completion Queue Identifier */ + uint32_t cqid : 16; + } create_io_sq; + + struct { + /* Physically Contiguous */ + uint32_t pc : 1; + /* Interrupts Enabled */ + uint32_t ien : 1; + uint32_t reserved : 14; + /* Interrupt Vector */ + uint32_t iv : 16; + } create_io_cq; + + struct { + /* Number of Dwords */ + uint32_t numdu : 16; + /* Log Specific Identifier */ + uint32_t lsid : 16; + } get_log_page; + + struct { + /* Extended Data Structure */ + uint32_t eds : 1; + uint32_t reserved : 31; + } resv_report; + + union spdk_nvme_feat_arbitration feat_arbitration; + union spdk_nvme_feat_power_management feat_power_management; + union spdk_nvme_feat_lba_range_type feat_lba_range_type; + union spdk_nvme_feat_temperature_threshold feat_temp_threshold; + union spdk_nvme_feat_error_recovery feat_error_recovery; + union spdk_nvme_feat_volatile_write_cache feat_volatile_write_cache; + union spdk_nvme_feat_number_of_queues feat_num_of_queues; + union spdk_nvme_feat_interrupt_coalescing feat_interrupt_coalescing; + union spdk_nvme_feat_interrupt_vector_configuration feat_interrupt_vector_configuration; + union spdk_nvme_feat_write_atomicity feat_write_atomicity; + union spdk_nvme_feat_async_event_configuration feat_async_event_cfg; + union spdk_nvme_feat_keep_alive_timer feat_keep_alive_timer; + union spdk_nvme_feat_host_identifier feat_host_identifier; + union spdk_nvme_feat_reservation_notification_mask feat_rsv_notification_mask; + union spdk_nvme_feat_reservation_persistence feat_rsv_persistence; + + struct { + /* Attribute – Integral Dataset for Read */ + uint32_t idr : 1; + /* Attribute – Integral Dataset for Write */ + uint32_t idw : 1; + /* Attribute – Deallocate */ + uint32_t ad : 1; + uint32_t reserved : 29; + } dsm; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_cmd_cdw11) == 4, "Incorrect size"); + +struct spdk_nvme_cmd { + /* dword 0 */ + uint16_t opc : 8; /* opcode */ + uint16_t fuse : 2; /* fused operation */ + uint16_t rsvd1 : 4; + uint16_t psdt : 2; + uint16_t cid; /* command identifier */ + + /* dword 1 */ + uint32_t nsid; /* namespace identifier */ + + /* dword 2-3 */ + uint32_t rsvd2; + uint32_t rsvd3; + + /* dword 4-5 */ + uint64_t mptr; /* metadata pointer */ + + /* dword 6-9: data pointer */ + union { + struct { + uint64_t prp1; /* prp entry 1 */ + uint64_t prp2; /* prp entry 2 */ + } prp; + + struct spdk_nvme_sgl_descriptor sgl1; + } dptr; + + /* command-specific */ + union { + uint32_t cdw10; + union spdk_nvme_cmd_cdw10 cdw10_bits; + }; + /* command-specific */ + union { + uint32_t cdw11; + union spdk_nvme_cmd_cdw11 cdw11_bits; + }; + /* dword 12-15 */ + uint32_t cdw12; /* command-specific */ + uint32_t cdw13; /* command-specific */ + uint32_t cdw14; /* command-specific */ + uint32_t cdw15; /* command-specific */ +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_cmd) == 64, "Incorrect size"); + +struct spdk_nvme_status { + uint16_t p : 1; /* phase tag */ + uint16_t sc : 8; /* status code */ + uint16_t sct : 3; /* status code type */ + uint16_t rsvd2 : 2; + uint16_t m : 1; /* more */ + uint16_t dnr : 1; /* do not retry */ +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_status) == 2, "Incorrect size"); + +/** + * Completion queue entry + */ +struct spdk_nvme_cpl { + /* dword 0 */ + uint32_t cdw0; /* command-specific */ + + /* dword 1 */ + uint32_t rsvd1; + + /* dword 2 */ + uint16_t sqhd; /* submission queue head pointer */ + uint16_t sqid; /* submission queue identifier */ + + /* dword 3 */ + uint16_t cid; /* command identifier */ + union { + uint16_t status_raw; + struct spdk_nvme_status status; + }; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_cpl) == 16, "Incorrect size"); + +/** + * Dataset Management range + */ +struct spdk_nvme_dsm_range { + union { + struct { + uint32_t af : 4; /**< access frequencey */ + uint32_t al : 2; /**< access latency */ + uint32_t reserved0 : 2; + + uint32_t sr : 1; /**< sequential read range */ + uint32_t sw : 1; /**< sequential write range */ + uint32_t wp : 1; /**< write prepare */ + uint32_t reserved1 : 13; + + uint32_t access_size : 8; /**< command access size */ + } bits; + + uint32_t raw; + } attributes; + + uint32_t length; + uint64_t starting_lba; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_dsm_range) == 16, "Incorrect size"); + +/** + * Status code types + */ +enum spdk_nvme_status_code_type { + SPDK_NVME_SCT_GENERIC = 0x0, + SPDK_NVME_SCT_COMMAND_SPECIFIC = 0x1, + SPDK_NVME_SCT_MEDIA_ERROR = 0x2, + SPDK_NVME_SCT_PATH = 0x3, + /* 0x4-0x6 - reserved */ + SPDK_NVME_SCT_VENDOR_SPECIFIC = 0x7, +}; + +/** + * Generic command status codes + */ +enum spdk_nvme_generic_command_status_code { + SPDK_NVME_SC_SUCCESS = 0x00, + SPDK_NVME_SC_INVALID_OPCODE = 0x01, + SPDK_NVME_SC_INVALID_FIELD = 0x02, + SPDK_NVME_SC_COMMAND_ID_CONFLICT = 0x03, + SPDK_NVME_SC_DATA_TRANSFER_ERROR = 0x04, + SPDK_NVME_SC_ABORTED_POWER_LOSS = 0x05, + SPDK_NVME_SC_INTERNAL_DEVICE_ERROR = 0x06, + SPDK_NVME_SC_ABORTED_BY_REQUEST = 0x07, + SPDK_NVME_SC_ABORTED_SQ_DELETION = 0x08, + SPDK_NVME_SC_ABORTED_FAILED_FUSED = 0x09, + SPDK_NVME_SC_ABORTED_MISSING_FUSED = 0x0a, + SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT = 0x0b, + SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR = 0x0c, + SPDK_NVME_SC_INVALID_SGL_SEG_DESCRIPTOR = 0x0d, + SPDK_NVME_SC_INVALID_NUM_SGL_DESCIRPTORS = 0x0e, + SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID = 0x0f, + SPDK_NVME_SC_METADATA_SGL_LENGTH_INVALID = 0x10, + SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID = 0x11, + SPDK_NVME_SC_INVALID_CONTROLLER_MEM_BUF = 0x12, + SPDK_NVME_SC_INVALID_PRP_OFFSET = 0x13, + SPDK_NVME_SC_ATOMIC_WRITE_UNIT_EXCEEDED = 0x14, + SPDK_NVME_SC_OPERATION_DENIED = 0x15, + SPDK_NVME_SC_INVALID_SGL_OFFSET = 0x16, + /* 0x17 - reserved */ + SPDK_NVME_SC_HOSTID_INCONSISTENT_FORMAT = 0x18, + SPDK_NVME_SC_KEEP_ALIVE_EXPIRED = 0x19, + SPDK_NVME_SC_KEEP_ALIVE_INVALID = 0x1a, + SPDK_NVME_SC_ABORTED_PREEMPT = 0x1b, + SPDK_NVME_SC_SANITIZE_FAILED = 0x1c, + SPDK_NVME_SC_SANITIZE_IN_PROGRESS = 0x1d, + SPDK_NVME_SC_SGL_DATA_BLOCK_GRANULARITY_INVALID = 0x1e, + SPDK_NVME_SC_COMMAND_INVALID_IN_CMB = 0x1f, + + SPDK_NVME_SC_LBA_OUT_OF_RANGE = 0x80, + SPDK_NVME_SC_CAPACITY_EXCEEDED = 0x81, + SPDK_NVME_SC_NAMESPACE_NOT_READY = 0x82, + SPDK_NVME_SC_RESERVATION_CONFLICT = 0x83, + SPDK_NVME_SC_FORMAT_IN_PROGRESS = 0x84, +}; + +/** + * Command specific status codes + */ +enum spdk_nvme_command_specific_status_code { + SPDK_NVME_SC_COMPLETION_QUEUE_INVALID = 0x00, + SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER = 0x01, + SPDK_NVME_SC_INVALID_QUEUE_SIZE = 0x02, + SPDK_NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED = 0x03, + /* 0x04 - reserved */ + SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED = 0x05, + SPDK_NVME_SC_INVALID_FIRMWARE_SLOT = 0x06, + SPDK_NVME_SC_INVALID_FIRMWARE_IMAGE = 0x07, + SPDK_NVME_SC_INVALID_INTERRUPT_VECTOR = 0x08, + SPDK_NVME_SC_INVALID_LOG_PAGE = 0x09, + SPDK_NVME_SC_INVALID_FORMAT = 0x0a, + SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET = 0x0b, + SPDK_NVME_SC_INVALID_QUEUE_DELETION = 0x0c, + SPDK_NVME_SC_FEATURE_ID_NOT_SAVEABLE = 0x0d, + SPDK_NVME_SC_FEATURE_NOT_CHANGEABLE = 0x0e, + SPDK_NVME_SC_FEATURE_NOT_NAMESPACE_SPECIFIC = 0x0f, + SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET = 0x10, + SPDK_NVME_SC_FIRMWARE_REQ_RESET = 0x11, + SPDK_NVME_SC_FIRMWARE_REQ_MAX_TIME_VIOLATION = 0x12, + SPDK_NVME_SC_FIRMWARE_ACTIVATION_PROHIBITED = 0x13, + SPDK_NVME_SC_OVERLAPPING_RANGE = 0x14, + SPDK_NVME_SC_NAMESPACE_INSUFFICIENT_CAPACITY = 0x15, + SPDK_NVME_SC_NAMESPACE_ID_UNAVAILABLE = 0x16, + /* 0x17 - reserved */ + SPDK_NVME_SC_NAMESPACE_ALREADY_ATTACHED = 0x18, + SPDK_NVME_SC_NAMESPACE_IS_PRIVATE = 0x19, + SPDK_NVME_SC_NAMESPACE_NOT_ATTACHED = 0x1a, + SPDK_NVME_SC_THINPROVISIONING_NOT_SUPPORTED = 0x1b, + SPDK_NVME_SC_CONTROLLER_LIST_INVALID = 0x1c, + SPDK_NVME_SC_DEVICE_SELF_TEST_IN_PROGRESS = 0x1d, + SPDK_NVME_SC_BOOT_PARTITION_WRITE_PROHIBITED = 0x1e, + SPDK_NVME_SC_INVALID_CTRLR_ID = 0x1f, + SPDK_NVME_SC_INVALID_SECONDARY_CTRLR_STATE = 0x20, + SPDK_NVME_SC_INVALID_NUM_CTRLR_RESOURCES = 0x21, + SPDK_NVME_SC_INVALID_RESOURCE_ID = 0x22, + + SPDK_NVME_SC_IOCS_NOT_SUPPORTED = 0x29, + SPDK_NVME_SC_IOCS_NOT_ENABLED = 0x2a, + SPDK_NVME_SC_IOCS_COMBINATION_REJECTED = 0x2b, + SPDK_NVME_SC_INVALID_IOCS = 0x2c, + + SPDK_NVME_SC_CONFLICTING_ATTRIBUTES = 0x80, + SPDK_NVME_SC_INVALID_PROTECTION_INFO = 0x81, + SPDK_NVME_SC_ATTEMPTED_WRITE_TO_RO_RANGE = 0x82, +}; + +/** + * Media error status codes + */ +enum spdk_nvme_media_error_status_code { + SPDK_NVME_SC_WRITE_FAULTS = 0x80, + SPDK_NVME_SC_UNRECOVERED_READ_ERROR = 0x81, + SPDK_NVME_SC_GUARD_CHECK_ERROR = 0x82, + SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR = 0x83, + SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR = 0x84, + SPDK_NVME_SC_COMPARE_FAILURE = 0x85, + SPDK_NVME_SC_ACCESS_DENIED = 0x86, + SPDK_NVME_SC_DEALLOCATED_OR_UNWRITTEN_BLOCK = 0x87, +}; + +/** + * Path related status codes + */ +enum spdk_nvme_path_status_code { + SPDK_NVME_SC_INTERNAL_PATH_ERROR = 0x00, + + SPDK_NVME_SC_CONTROLLER_PATH_ERROR = 0x60, + + SPDK_NVME_SC_HOST_PATH_ERROR = 0x70, + SPDK_NVME_SC_ABORTED_BY_HOST = 0x71, +}; + +#define SPDK_NVME_MAX_OPC 0xff + +/** + * Admin opcodes + */ +enum spdk_nvme_admin_opcode { + SPDK_NVME_OPC_DELETE_IO_SQ = 0x00, + SPDK_NVME_OPC_CREATE_IO_SQ = 0x01, + SPDK_NVME_OPC_GET_LOG_PAGE = 0x02, + /* 0x03 - reserved */ + SPDK_NVME_OPC_DELETE_IO_CQ = 0x04, + SPDK_NVME_OPC_CREATE_IO_CQ = 0x05, + SPDK_NVME_OPC_IDENTIFY = 0x06, + /* 0x07 - reserved */ + SPDK_NVME_OPC_ABORT = 0x08, + SPDK_NVME_OPC_SET_FEATURES = 0x09, + SPDK_NVME_OPC_GET_FEATURES = 0x0a, + /* 0x0b - reserved */ + SPDK_NVME_OPC_ASYNC_EVENT_REQUEST = 0x0c, + SPDK_NVME_OPC_NS_MANAGEMENT = 0x0d, + /* 0x0e-0x0f - reserved */ + SPDK_NVME_OPC_FIRMWARE_COMMIT = 0x10, + SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD = 0x11, + + SPDK_NVME_OPC_DEVICE_SELF_TEST = 0x14, + SPDK_NVME_OPC_NS_ATTACHMENT = 0x15, + + SPDK_NVME_OPC_KEEP_ALIVE = 0x18, + SPDK_NVME_OPC_DIRECTIVE_SEND = 0x19, + SPDK_NVME_OPC_DIRECTIVE_RECEIVE = 0x1a, + + SPDK_NVME_OPC_VIRTUALIZATION_MANAGEMENT = 0x1c, + SPDK_NVME_OPC_NVME_MI_SEND = 0x1d, + SPDK_NVME_OPC_NVME_MI_RECEIVE = 0x1e, + + SPDK_NVME_OPC_DOORBELL_BUFFER_CONFIG = 0x7c, + + SPDK_NVME_OPC_FORMAT_NVM = 0x80, + SPDK_NVME_OPC_SECURITY_SEND = 0x81, + SPDK_NVME_OPC_SECURITY_RECEIVE = 0x82, + + SPDK_NVME_OPC_SANITIZE = 0x84, + + SPDK_NVME_OPC_GET_LBA_STATUS = 0x86, +}; + +/** + * NVM command set opcodes + */ +enum spdk_nvme_nvm_opcode { + SPDK_NVME_OPC_FLUSH = 0x00, + SPDK_NVME_OPC_WRITE = 0x01, + SPDK_NVME_OPC_READ = 0x02, + /* 0x03 - reserved */ + SPDK_NVME_OPC_WRITE_UNCORRECTABLE = 0x04, + SPDK_NVME_OPC_COMPARE = 0x05, + /* 0x06-0x07 - reserved */ + SPDK_NVME_OPC_WRITE_ZEROES = 0x08, + SPDK_NVME_OPC_DATASET_MANAGEMENT = 0x09, + + SPDK_NVME_OPC_RESERVATION_REGISTER = 0x0d, + SPDK_NVME_OPC_RESERVATION_REPORT = 0x0e, + + SPDK_NVME_OPC_RESERVATION_ACQUIRE = 0x11, + SPDK_NVME_OPC_RESERVATION_RELEASE = 0x15, +}; + +/** + * Data transfer (bits 1:0) of an NVMe opcode. + * + * \sa spdk_nvme_opc_get_data_transfer + */ +enum spdk_nvme_data_transfer { + /** Opcode does not transfer data */ + SPDK_NVME_DATA_NONE = 0, + /** Opcode transfers data from host to controller (e.g. Write) */ + SPDK_NVME_DATA_HOST_TO_CONTROLLER = 1, + /** Opcode transfers data from controller to host (e.g. Read) */ + SPDK_NVME_DATA_CONTROLLER_TO_HOST = 2, + /** Opcode transfers data both directions */ + SPDK_NVME_DATA_BIDIRECTIONAL = 3 +}; + +/** + * Extract the Data Transfer bits from an NVMe opcode. + * + * This determines whether a command requires a data buffer and + * which direction (host to controller or controller to host) it is + * transferred. + */ +static inline enum spdk_nvme_data_transfer spdk_nvme_opc_get_data_transfer(uint8_t opc) +{ + return (enum spdk_nvme_data_transfer)(opc & 3); +} + +enum spdk_nvme_feat { + /* 0x00 - reserved */ + + /** cdw11 layout defined by \ref spdk_nvme_feat_arbitration */ + SPDK_NVME_FEAT_ARBITRATION = 0x01, + /** cdw11 layout defined by \ref spdk_nvme_feat_power_management */ + SPDK_NVME_FEAT_POWER_MANAGEMENT = 0x02, + /** cdw11 layout defined by \ref spdk_nvme_feat_lba_range_type */ + SPDK_NVME_FEAT_LBA_RANGE_TYPE = 0x03, + /** cdw11 layout defined by \ref spdk_nvme_feat_temperature_threshold */ + SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD = 0x04, + /** cdw11 layout defined by \ref spdk_nvme_feat_error_recovery */ + SPDK_NVME_FEAT_ERROR_RECOVERY = 0x05, + /** cdw11 layout defined by \ref spdk_nvme_feat_volatile_write_cache */ + SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE = 0x06, + /** cdw11 layout defined by \ref spdk_nvme_feat_number_of_queues */ + SPDK_NVME_FEAT_NUMBER_OF_QUEUES = 0x07, + /** cdw11 layout defined by \ref spdk_nvme_feat_interrupt_coalescing */ + SPDK_NVME_FEAT_INTERRUPT_COALESCING = 0x08, + /** cdw11 layout defined by \ref spdk_nvme_feat_interrupt_vector_configuration */ + SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION = 0x09, + /** cdw11 layout defined by \ref spdk_nvme_feat_write_atomicity */ + SPDK_NVME_FEAT_WRITE_ATOMICITY = 0x0A, + /** cdw11 layout defined by \ref spdk_nvme_feat_async_event_configuration */ + SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION = 0x0B, + /** cdw11 layout defined by \ref spdk_nvme_feat_autonomous_power_state_transition */ + SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION = 0x0C, + /** cdw11 layout defined by \ref spdk_nvme_feat_host_mem_buffer */ + SPDK_NVME_FEAT_HOST_MEM_BUFFER = 0x0D, + SPDK_NVME_FEAT_TIMESTAMP = 0x0E, + /** cdw11 layout defined by \ref spdk_nvme_feat_keep_alive_timer */ + SPDK_NVME_FEAT_KEEP_ALIVE_TIMER = 0x0F, + /** cdw11 layout defined by \ref spdk_nvme_feat_host_controlled_thermal_management */ + SPDK_NVME_FEAT_HOST_CONTROLLED_THERMAL_MANAGEMENT = 0x10, + /** cdw11 layout defined by \ref spdk_nvme_feat_non_operational_power_state_config */ + SPDK_NVME_FEAT_NON_OPERATIONAL_POWER_STATE_CONFIG = 0x11, + + /* 0x12-0x77 - reserved */ + + /* 0x78-0x7F - NVMe-MI features */ + + /** cdw11 layout defined by \ref spdk_nvme_feat_software_progress_marker */ + SPDK_NVME_FEAT_SOFTWARE_PROGRESS_MARKER = 0x80, + + /** cdw11 layout defined by \ref spdk_nvme_feat_host_identifier */ + SPDK_NVME_FEAT_HOST_IDENTIFIER = 0x81, + /** cdw11 layout defined by \ref spdk_nvme_feat_reservation_notification_mask */ + SPDK_NVME_FEAT_HOST_RESERVE_MASK = 0x82, + /** cdw11 layout defined by \ref spdk_nvme_feat_reservation_persistence */ + SPDK_NVME_FEAT_HOST_RESERVE_PERSIST = 0x83, + + /* 0x84-0xBF - command set specific (reserved) */ + + /* 0xC0-0xFF - vendor specific */ +}; + +/** Bit set of attributes for DATASET MANAGEMENT commands. */ +enum spdk_nvme_dsm_attribute { + SPDK_NVME_DSM_ATTR_INTEGRAL_READ = 0x1, + SPDK_NVME_DSM_ATTR_INTEGRAL_WRITE = 0x2, + SPDK_NVME_DSM_ATTR_DEALLOCATE = 0x4, +}; + +struct spdk_nvme_power_state { + uint16_t mp; /* bits 15:00: maximum power */ + + uint8_t reserved1; + + uint8_t mps : 1; /* bit 24: max power scale */ + uint8_t nops : 1; /* bit 25: non-operational state */ + uint8_t reserved2 : 6; + + uint32_t enlat; /* bits 63:32: entry latency in microseconds */ + uint32_t exlat; /* bits 95:64: exit latency in microseconds */ + + uint8_t rrt : 5; /* bits 100:96: relative read throughput */ + uint8_t reserved3 : 3; + + uint8_t rrl : 5; /* bits 108:104: relative read latency */ + uint8_t reserved4 : 3; + + uint8_t rwt : 5; /* bits 116:112: relative write throughput */ + uint8_t reserved5 : 3; + + uint8_t rwl : 5; /* bits 124:120: relative write latency */ + uint8_t reserved6 : 3; + + uint8_t reserved7[16]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_power_state) == 32, "Incorrect size"); + +/** Identify command CNS value */ +enum spdk_nvme_identify_cns { + /** Identify namespace indicated in CDW1.NSID */ + SPDK_NVME_IDENTIFY_NS = 0x00, + + /** Identify controller */ + SPDK_NVME_IDENTIFY_CTRLR = 0x01, + + /** List active NSIDs greater than CDW1.NSID */ + SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST = 0x02, + + /** List namespace identification descriptors */ + SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST = 0x03, + + /** Identify namespace indicated in CDW1.NSID, specific to CWD11.CSI */ + SPDK_NVME_IDENTIFY_NS_IOCS = 0x05, + + /** Identify controller, specific to CWD11.CSI */ + SPDK_NVME_IDENTIFY_CTRLR_IOCS = 0x06, + + /** List active NSIDs greater than CDW1.NSID, specific to CWD11.CSI */ + SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST_IOCS = 0x07, + + /** List allocated NSIDs greater than CDW1.NSID */ + SPDK_NVME_IDENTIFY_ALLOCATED_NS_LIST = 0x10, + + /** Identify namespace if CDW1.NSID is allocated */ + SPDK_NVME_IDENTIFY_NS_ALLOCATED = 0x11, + + /** Get list of controllers starting at CDW10.CNTID that are attached to CDW1.NSID */ + SPDK_NVME_IDENTIFY_NS_ATTACHED_CTRLR_LIST = 0x12, + + /** Get list of controllers starting at CDW10.CNTID */ + SPDK_NVME_IDENTIFY_CTRLR_LIST = 0x13, + + /** Get primary controller capabilities structure */ + SPDK_NVME_IDENTIFY_PRIMARY_CTRLR_CAP = 0x14, + + /** Get secondary controller list */ + SPDK_NVME_IDENTIFY_SECONDARY_CTRLR_LIST = 0x15, + + /** List allocated NSIDs greater than CDW1.NSID, specific to CWD11.CSI */ + SPDK_NVME_IDENTIFY_ALLOCATED_NS_LIST_IOCS = 0x1a, + + /** Identify namespace if CDW1.NSID is allocated, specific to CDWD11.CSI */ + SPDK_NVME_IDENTIFY_NS_ALLOCATED_IOCS = 0x1b, + + /** Identify I/O Command Sets */ + SPDK_NVME_IDENTIFY_IOCS = 0x1c, +}; + +/** NVMe over Fabrics controller model */ +enum spdk_nvmf_ctrlr_model { + /** NVM subsystem uses dynamic controller model */ + SPDK_NVMF_CTRLR_MODEL_DYNAMIC = 0, + + /** NVM subsystem uses static controller model */ + SPDK_NVMF_CTRLR_MODEL_STATIC = 1, +}; + +#define SPDK_NVME_CTRLR_SN_LEN 20 +#define SPDK_NVME_CTRLR_MN_LEN 40 +#define SPDK_NVME_CTRLR_FR_LEN 8 + +/** Identify Controller data sgls.supported values */ +enum spdk_nvme_sgls_supported { + /** SGLs are not supported */ + SPDK_NVME_SGLS_NOT_SUPPORTED = 0, + + /** SGLs are supported with no alignment or granularity requirement. */ + SPDK_NVME_SGLS_SUPPORTED = 1, + + /** SGLs are supported with a DWORD alignment and granularity requirement. */ + SPDK_NVME_SGLS_SUPPORTED_DWORD_ALIGNED = 2, +}; + +/** Identify Controller data vwc.flush_broadcast values */ +enum spdk_nvme_flush_broadcast { + /** Support for NSID=FFFFFFFFh with Flush is not indicated. */ + SPDK_NVME_FLUSH_BROADCAST_NOT_INDICATED = 0, + + /* 01b: Reserved */ + + /** Flush does not support NSID set to FFFFFFFFh. */ + SPDK_NVME_FLUSH_BROADCAST_NOT_SUPPORTED = 2, + + /** Flush supports NSID set to FFFFFFFFh. */ + SPDK_NVME_FLUSH_BROADCAST_SUPPORTED = 3 +}; + +#define SPDK_NVME_NQN_FIELD_SIZE 256 + +/** Identify Controller data NVMe over Fabrics-specific fields */ +struct spdk_nvme_cdata_nvmf_specific { + /** I/O queue command capsule supported size (16-byte units) */ + uint32_t ioccsz; + + /** I/O queue response capsule supported size (16-byte units) */ + uint32_t iorcsz; + + /** In-capsule data offset (16-byte units) */ + uint16_t icdoff; + + /** Controller attributes */ + struct { + /** Controller model: \ref spdk_nvmf_ctrlr_model */ + uint8_t ctrlr_model : 1; + uint8_t reserved : 7; + } ctrattr; + + /** Maximum SGL block descriptors (0 = no limit) */ + uint8_t msdbd; + + uint8_t reserved[244]; +}; + +/** Identify Controller data SGL support */ +struct spdk_nvme_cdata_sgls { + uint32_t supported : 2; + uint32_t keyed_sgl : 1; + uint32_t reserved1 : 13; + uint32_t bit_bucket_descriptor : 1; + uint32_t metadata_pointer : 1; + uint32_t oversized_sgl : 1; + uint32_t metadata_address : 1; + uint32_t sgl_offset : 1; + uint32_t transport_sgl : 1; + uint32_t reserved2 : 10; +}; + +struct __attribute__((packed)) __attribute__((aligned)) spdk_nvme_ctrlr_data { + /* bytes 0-255: controller capabilities and features */ + + /** pci vendor id */ + uint16_t vid; + + /** pci subsystem vendor id */ + uint16_t ssvid; + + /** serial number */ + int8_t sn[SPDK_NVME_CTRLR_SN_LEN]; + + /** model number */ + int8_t mn[SPDK_NVME_CTRLR_MN_LEN]; + + /** firmware revision */ + uint8_t fr[SPDK_NVME_CTRLR_FR_LEN]; + + /** recommended arbitration burst */ + uint8_t rab; + + /** ieee oui identifier */ + uint8_t ieee[3]; + + /** controller multi-path I/O and namespace sharing capabilities */ + struct { + uint8_t multi_port : 1; + uint8_t multi_host : 1; + uint8_t sr_iov : 1; + uint8_t reserved : 5; + } cmic; + + /** maximum data transfer size */ + uint8_t mdts; + + /** controller id */ + uint16_t cntlid; + + /** version */ + union spdk_nvme_vs_register ver; + + /** RTD3 resume latency */ + uint32_t rtd3r; + + /** RTD3 entry latency */ + uint32_t rtd3e; + + /** optional asynchronous events supported */ + struct { + uint32_t reserved1 : 8; + + /** Supports sending Namespace Attribute Notices. */ + uint32_t ns_attribute_notices : 1; + + /** Supports sending Firmware Activation Notices. */ + uint32_t fw_activation_notices : 1; + + uint32_t reserved2 : 22; + } oaes; + + /** controller attributes */ + struct { + /** Supports 128-bit host identifier */ + uint32_t host_id_exhid_supported: 1; + + /** Supports non-operational power state permissive mode */ + uint32_t non_operational_power_state_permissive_mode: 1; + + uint32_t reserved: 30; + } ctratt; + + uint8_t reserved_100[12]; + + /** FRU globally unique identifier */ + uint8_t fguid[16]; + + uint8_t reserved_128[128]; + + /* bytes 256-511: admin command set attributes */ + + /** optional admin command support */ + struct { + /* supports security send/receive commands */ + uint16_t security : 1; + + /* supports format nvm command */ + uint16_t format : 1; + + /* supports firmware activate/download commands */ + uint16_t firmware : 1; + + /* supports ns manage/ns attach commands */ + uint16_t ns_manage : 1; + + /** Supports device self-test command (SPDK_NVME_OPC_DEVICE_SELF_TEST) */ + uint16_t device_self_test : 1; + + /** Supports SPDK_NVME_OPC_DIRECTIVE_SEND and SPDK_NVME_OPC_DIRECTIVE_RECEIVE */ + uint16_t directives : 1; + + /** Supports NVMe-MI (SPDK_NVME_OPC_NVME_MI_SEND, SPDK_NVME_OPC_NVME_MI_RECEIVE) */ + uint16_t nvme_mi : 1; + + /** Supports SPDK_NVME_OPC_VIRTUALIZATION_MANAGEMENT */ + uint16_t virtualization_management : 1; + + /** Supports SPDK_NVME_OPC_DOORBELL_BUFFER_CONFIG */ + uint16_t doorbell_buffer_config : 1; + + /** Supports SPDK_NVME_OPC_GET_LBA_STATUS */ + uint16_t get_lba_status : 1; + + uint16_t oacs_rsvd : 6; + } oacs; + + /** abort command limit */ + uint8_t acl; + + /** asynchronous event request limit */ + uint8_t aerl; + + /** firmware updates */ + struct { + /* first slot is read-only */ + uint8_t slot1_ro : 1; + + /* number of firmware slots */ + uint8_t num_slots : 3; + + /* support activation without reset */ + uint8_t activation_without_reset : 1; + + uint8_t frmw_rsvd : 3; + } frmw; + + /** log page attributes */ + struct { + /* per namespace smart/health log page */ + uint8_t ns_smart : 1; + /* command effects log page */ + uint8_t celp : 1; + /* extended data for get log page */ + uint8_t edlp: 1; + /** telemetry log pages and notices */ + uint8_t telemetry : 1; + uint8_t lpa_rsvd : 4; + } lpa; + + /** error log page entries */ + uint8_t elpe; + + /** number of power states supported */ + uint8_t npss; + + /** admin vendor specific command configuration */ + struct { + /* admin vendor specific commands use disk format */ + uint8_t spec_format : 1; + + uint8_t avscc_rsvd : 7; + } avscc; + + /** autonomous power state transition attributes */ + struct { + /** controller supports autonomous power state transitions */ + uint8_t supported : 1; + + uint8_t apsta_rsvd : 7; + } apsta; + + /** warning composite temperature threshold */ + uint16_t wctemp; + + /** critical composite temperature threshold */ + uint16_t cctemp; + + /** maximum time for firmware activation */ + uint16_t mtfa; + + /** host memory buffer preferred size */ + uint32_t hmpre; + + /** host memory buffer minimum size */ + uint32_t hmmin; + + /** total NVM capacity */ + uint64_t tnvmcap[2]; + + /** unallocated NVM capacity */ + uint64_t unvmcap[2]; + + /** replay protected memory block support */ + struct { + uint8_t num_rpmb_units : 3; + uint8_t auth_method : 3; + uint8_t reserved1 : 2; + + uint8_t reserved2; + + uint8_t total_size; + uint8_t access_size; + } rpmbs; + + /** extended device self-test time (in minutes) */ + uint16_t edstt; + + /** device self-test options */ + union { + uint8_t raw; + struct { + /** Device supports only one device self-test operation at a time */ + uint8_t one_only : 1; + + uint8_t reserved : 7; + } bits; + } dsto; + + /** + * Firmware update granularity + * + * 4KB units + * 0x00 = no information provided + * 0xFF = no restriction + */ + uint8_t fwug; + + /** + * Keep Alive Support + * + * Granularity of keep alive timer in 100 ms units + * 0 = keep alive not supported + */ + uint16_t kas; + + /** Host controlled thermal management attributes */ + union { + uint16_t raw; + struct { + uint16_t supported : 1; + uint16_t reserved : 15; + } bits; + } hctma; + + /** Minimum thermal management temperature */ + uint16_t mntmt; + + /** Maximum thermal management temperature */ + uint16_t mxtmt; + + /** Sanitize capabilities */ + union { + uint32_t raw; + struct { + uint32_t crypto_erase : 1; + uint32_t block_erase : 1; + uint32_t overwrite : 1; + uint32_t reserved : 29; + } bits; + } sanicap; + + uint8_t reserved3[180]; + + /* bytes 512-703: nvm command set attributes */ + + /** submission queue entry size */ + struct { + uint8_t min : 4; + uint8_t max : 4; + } sqes; + + /** completion queue entry size */ + struct { + uint8_t min : 4; + uint8_t max : 4; + } cqes; + + uint16_t maxcmd; + + /** number of namespaces */ + uint32_t nn; + + /** optional nvm command support */ + struct { + uint16_t compare : 1; + uint16_t write_unc : 1; + uint16_t dsm: 1; + uint16_t write_zeroes: 1; + uint16_t set_features_save: 1; + uint16_t reservations: 1; + uint16_t timestamp: 1; + uint16_t reserved: 9; + } oncs; + + /** fused operation support */ + struct { + uint16_t compare_and_write : 1; + uint16_t reserved : 15; + } fuses; + + /** format nvm attributes */ + struct { + uint8_t format_all_ns: 1; + uint8_t erase_all_ns: 1; + uint8_t crypto_erase_supported: 1; + uint8_t reserved: 5; + } fna; + + /** volatile write cache */ + struct { + uint8_t present : 1; + uint8_t flush_broadcast : 2; + uint8_t reserved : 5; + } vwc; + + /** atomic write unit normal */ + uint16_t awun; + + /** atomic write unit power fail */ + uint16_t awupf; + + /** NVM vendor specific command configuration */ + uint8_t nvscc; + + uint8_t reserved531; + + /** atomic compare & write unit */ + uint16_t acwu; + + uint16_t reserved534; + + struct spdk_nvme_cdata_sgls sgls; + + uint8_t reserved4[228]; + + uint8_t subnqn[SPDK_NVME_NQN_FIELD_SIZE]; + + uint8_t reserved5[768]; + + struct spdk_nvme_cdata_nvmf_specific nvmf_specific; + + /* bytes 2048-3071: power state descriptors */ + struct spdk_nvme_power_state psd[32]; + + /* bytes 3072-4095: vendor specific */ + uint8_t vs[1024]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_ctrlr_data) == 4096, "Incorrect size"); + +struct __attribute__((packed)) spdk_nvme_primary_ctrl_capabilities { + /** controller id */ + uint16_t cntlid; + /** port identifier */ + uint16_t portid; + /** controller resource types */ + struct { + uint8_t vq_supported : 1; + uint8_t vi_supported : 1; + uint8_t reserved : 6; + } crt; + uint8_t reserved[27]; + /** total number of VQ flexible resources */ + uint32_t vqfrt; + /** total number of VQ flexible resources assigned to secondary controllers */ + uint32_t vqrfa; + /** total number of VQ flexible resources allocated to primary controller */ + uint16_t vqrfap; + /** total number of VQ Private resources for the primary controller */ + uint16_t vqprt; + /** max number of VQ flexible Resources that may be assigned to a secondary controller */ + uint16_t vqfrsm; + /** preferred granularity of assigning and removing VQ Flexible Resources */ + uint16_t vqgran; + uint8_t reserved1[16]; + /** total number of VI flexible resources for the primary and its secondary controllers */ + uint32_t vifrt; + /** total number of VI flexible resources assigned to the secondary controllers */ + uint32_t virfa; + /** total number of VI flexible resources currently allocated to the primary controller */ + uint16_t virfap; + /** total number of VI private resources for the primary controller */ + uint16_t viprt; + /** max number of VI flexible resources that may be assigned to a secondary controller */ + uint16_t vifrsm; + /** preferred granularity of assigning and removing VI flexible resources */ + uint16_t vigran; + uint8_t reserved2[4016]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_primary_ctrl_capabilities) == 4096, "Incorrect size"); + +struct __attribute__((packed)) spdk_nvme_secondary_ctrl_entry { + /** controller identifier of the secondary controller */ + uint16_t scid; + /** controller identifier of the associated primary controller */ + uint16_t pcid; + /** indicates the state of the secondary controller */ + struct { + uint8_t is_online : 1; + uint8_t reserved : 7; + } scs; + uint8_t reserved[3]; + /** VF number if the secondary controller is an SR-IOV VF */ + uint16_t vfn; + /** number of VQ flexible resources assigned to the indicated secondary controller */ + uint16_t nvq; + /** number of VI flexible resources assigned to the indicated secondary controller */ + uint16_t nvi; + uint8_t reserved1[18]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_secondary_ctrl_entry) == 32, "Incorrect size"); + +struct __attribute__((packed)) spdk_nvme_secondary_ctrl_list { + /** number of Secondary controller entries in the list */ + uint8_t number; + uint8_t reserved[31]; + struct spdk_nvme_secondary_ctrl_entry entries[127]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_secondary_ctrl_list) == 4096, "Incorrect size"); + +struct spdk_nvme_ns_data { + /** namespace size */ + uint64_t nsze; + + /** namespace capacity */ + uint64_t ncap; + + /** namespace utilization */ + uint64_t nuse; + + /** namespace features */ + struct { + /** thin provisioning */ + uint8_t thin_prov : 1; + + /** NAWUN, NAWUPF, and NACWU are defined for this namespace */ + uint8_t ns_atomic_write_unit : 1; + + /** Supports Deallocated or Unwritten LBA error for this namespace */ + uint8_t dealloc_or_unwritten_error : 1; + + /** Non-zero NGUID and EUI64 for namespace are never reused */ + uint8_t guid_never_reused : 1; + + uint8_t reserved1 : 4; + } nsfeat; + + /** number of lba formats */ + uint8_t nlbaf; + + /** formatted lba size */ + struct { + uint8_t format : 4; + uint8_t extended : 1; + uint8_t reserved2 : 3; + } flbas; + + /** metadata capabilities */ + struct { + /** metadata can be transferred as part of data prp list */ + uint8_t extended : 1; + + /** metadata can be transferred with separate metadata pointer */ + uint8_t pointer : 1; + + /** reserved */ + uint8_t reserved3 : 6; + } mc; + + /** end-to-end data protection capabilities */ + struct { + /** protection information type 1 */ + uint8_t pit1 : 1; + + /** protection information type 2 */ + uint8_t pit2 : 1; + + /** protection information type 3 */ + uint8_t pit3 : 1; + + /** first eight bytes of metadata */ + uint8_t md_start : 1; + + /** last eight bytes of metadata */ + uint8_t md_end : 1; + } dpc; + + /** end-to-end data protection type settings */ + struct { + /** protection information type */ + uint8_t pit : 3; + + /** 1 == protection info transferred at start of metadata */ + /** 0 == protection info transferred at end of metadata */ + uint8_t md_start : 1; + + uint8_t reserved4 : 4; + } dps; + + /** namespace multi-path I/O and namespace sharing capabilities */ + struct { + uint8_t can_share : 1; + uint8_t reserved : 7; + } nmic; + + /** reservation capabilities */ + union { + struct { + /** supports persist through power loss */ + uint8_t persist : 1; + + /** supports write exclusive */ + uint8_t write_exclusive : 1; + + /** supports exclusive access */ + uint8_t exclusive_access : 1; + + /** supports write exclusive - registrants only */ + uint8_t write_exclusive_reg_only : 1; + + /** supports exclusive access - registrants only */ + uint8_t exclusive_access_reg_only : 1; + + /** supports write exclusive - all registrants */ + uint8_t write_exclusive_all_reg : 1; + + /** supports exclusive access - all registrants */ + uint8_t exclusive_access_all_reg : 1; + + /** supports ignore existing key */ + uint8_t ignore_existing_key : 1; + } rescap; + uint8_t raw; + } nsrescap; + /** format progress indicator */ + struct { + uint8_t percentage_remaining : 7; + uint8_t fpi_supported : 1; + } fpi; + + /** deallocate logical features */ + union { + uint8_t raw; + struct { + /** + * Value read from deallocated blocks + * + * 000b = not reported + * 001b = all bytes 0x00 + * 010b = all bytes 0xFF + * + * \ref spdk_nvme_dealloc_logical_block_read_value + */ + uint8_t read_value : 3; + + /** Supports Deallocate bit in Write Zeroes */ + uint8_t write_zero_deallocate : 1; + + /** + * Guard field behavior for deallocated logical blocks + * 0: contains 0xFFFF + * 1: contains CRC for read value + */ + uint8_t guard_value : 1; + + uint8_t reserved : 3; + } bits; + } dlfeat; + + /** namespace atomic write unit normal */ + uint16_t nawun; + + /** namespace atomic write unit power fail */ + uint16_t nawupf; + + /** namespace atomic compare & write unit */ + uint16_t nacwu; + + /** namespace atomic boundary size normal */ + uint16_t nabsn; + + /** namespace atomic boundary offset */ + uint16_t nabo; + + /** namespace atomic boundary size power fail */ + uint16_t nabspf; + + /** namespace optimal I/O boundary in logical blocks */ + uint16_t noiob; + + /** NVM capacity */ + uint64_t nvmcap[2]; + + uint8_t reserved64[40]; + + /** namespace globally unique identifier */ + uint8_t nguid[16]; + + /** IEEE extended unique identifier */ + uint64_t eui64; + + /** lba format support */ + struct { + /** metadata size */ + uint32_t ms : 16; + + /** lba data size */ + uint32_t lbads : 8; + + /** relative performance */ + uint32_t rp : 2; + + uint32_t reserved6 : 6; + } lbaf[16]; + + uint8_t reserved6[192]; + + uint8_t vendor_specific[3712]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_ns_data) == 4096, "Incorrect size"); + +/** + * Deallocated logical block features - read value + */ +enum spdk_nvme_dealloc_logical_block_read_value { + /** Not reported */ + SPDK_NVME_DEALLOC_NOT_REPORTED = 0, + + /** Deallocated blocks read 0x00 */ + SPDK_NVME_DEALLOC_READ_00 = 1, + + /** Deallocated blocks read 0xFF */ + SPDK_NVME_DEALLOC_READ_FF = 2, +}; + +/** + * Reservation Type Encoding + */ +enum spdk_nvme_reservation_type { + /* 0x00 - reserved */ + + /* Write Exclusive Reservation */ + SPDK_NVME_RESERVE_WRITE_EXCLUSIVE = 0x1, + + /* Exclusive Access Reservation */ + SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS = 0x2, + + /* Write Exclusive - Registrants Only Reservation */ + SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_REG_ONLY = 0x3, + + /* Exclusive Access - Registrants Only Reservation */ + SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_REG_ONLY = 0x4, + + /* Write Exclusive - All Registrants Reservation */ + SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_ALL_REGS = 0x5, + + /* Exclusive Access - All Registrants Reservation */ + SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS = 0x6, + + /* 0x7-0xFF - Reserved */ +}; + +struct spdk_nvme_reservation_acquire_data { + /** current reservation key */ + uint64_t crkey; + /** preempt reservation key */ + uint64_t prkey; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_reservation_acquire_data) == 16, "Incorrect size"); + +/** + * Reservation Acquire action + */ +enum spdk_nvme_reservation_acquire_action { + SPDK_NVME_RESERVE_ACQUIRE = 0x0, + SPDK_NVME_RESERVE_PREEMPT = 0x1, + SPDK_NVME_RESERVE_PREEMPT_ABORT = 0x2, +}; + +struct __attribute__((packed)) spdk_nvme_reservation_status_data { + /** reservation action generation counter */ + uint32_t gen; + /** reservation type */ + uint8_t rtype; + /** number of registered controllers */ + uint16_t regctl; + uint16_t reserved1; + /** persist through power loss state */ + uint8_t ptpls; + uint8_t reserved[14]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_reservation_status_data) == 24, "Incorrect size"); + +struct __attribute__((packed)) spdk_nvme_reservation_status_extended_data { + struct spdk_nvme_reservation_status_data data; + uint8_t reserved[40]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_reservation_status_extended_data) == 64, + "Incorrect size"); + +struct __attribute__((packed)) spdk_nvme_registered_ctrlr_data { + /** controller id */ + uint16_t cntlid; + /** reservation status */ + struct { + uint8_t status : 1; + uint8_t reserved1 : 7; + } rcsts; + uint8_t reserved2[5]; + /** 64-bit host identifier */ + uint64_t hostid; + /** reservation key */ + uint64_t rkey; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_registered_ctrlr_data) == 24, "Incorrect size"); + +struct __attribute__((packed)) spdk_nvme_registered_ctrlr_extended_data { + /** controller id */ + uint16_t cntlid; + /** reservation status */ + struct { + uint8_t status : 1; + uint8_t reserved1 : 7; + } rcsts; + uint8_t reserved2[5]; + /** reservation key */ + uint64_t rkey; + /** 128-bit host identifier */ + uint8_t hostid[16]; + uint8_t reserved3[32]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_registered_ctrlr_extended_data) == 64, "Incorrect size"); + +/** + * Change persist through power loss state for + * Reservation Register command + */ +enum spdk_nvme_reservation_register_cptpl { + SPDK_NVME_RESERVE_PTPL_NO_CHANGES = 0x0, + SPDK_NVME_RESERVE_PTPL_CLEAR_POWER_ON = 0x2, + SPDK_NVME_RESERVE_PTPL_PERSIST_POWER_LOSS = 0x3, +}; + +/** + * Registration action for Reservation Register command + */ +enum spdk_nvme_reservation_register_action { + SPDK_NVME_RESERVE_REGISTER_KEY = 0x0, + SPDK_NVME_RESERVE_UNREGISTER_KEY = 0x1, + SPDK_NVME_RESERVE_REPLACE_KEY = 0x2, +}; + +struct spdk_nvme_reservation_register_data { + /** current reservation key */ + uint64_t crkey; + /** new reservation key */ + uint64_t nrkey; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_reservation_register_data) == 16, "Incorrect size"); + +struct spdk_nvme_reservation_key_data { + /** current reservation key */ + uint64_t crkey; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_reservation_key_data) == 8, "Incorrect size"); + +/** + * Reservation Release action + */ +enum spdk_nvme_reservation_release_action { + SPDK_NVME_RESERVE_RELEASE = 0x0, + SPDK_NVME_RESERVE_CLEAR = 0x1, +}; + +/** + * Reservation notification log page type + */ +enum spdk_nvme_reservation_notification_log_page_type { + SPDK_NVME_RESERVATION_LOG_PAGE_EMPTY = 0x0, + SPDK_NVME_REGISTRATION_PREEMPTED = 0x1, + SPDK_NVME_RESERVATION_RELEASED = 0x2, + SPDK_NVME_RESERVATION_PREEMPTED = 0x3, +}; + +/** + * Reservation notification log + */ +struct spdk_nvme_reservation_notification_log { + /** 64-bit incrementing reservation notification log page count */ + uint64_t log_page_count; + /** Reservation notification log page type */ + uint8_t type; + /** Number of additional available reservation notification log pages */ + uint8_t num_avail_log_pages; + uint8_t reserved[2]; + uint32_t nsid; + uint8_t reserved1[48]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_reservation_notification_log) == 64, "Incorrect size"); + +/* Mask Registration Preempted Notificaton */ +#define SPDK_NVME_REGISTRATION_PREEMPTED_MASK (1U << 1) +/* Mask Reservation Released Notification */ +#define SPDK_NVME_RESERVATION_RELEASED_MASK (1U << 2) +/* Mask Reservation Preempted Notification */ +#define SPDK_NVME_RESERVATION_PREEMPTED_MASK (1U << 3) + +/** + * Log page identifiers for SPDK_NVME_OPC_GET_LOG_PAGE + */ +enum spdk_nvme_log_page { + /* 0x00 - reserved */ + + /** Error information (mandatory) - \ref spdk_nvme_error_information_entry */ + SPDK_NVME_LOG_ERROR = 0x01, + + /** SMART / health information (mandatory) - \ref spdk_nvme_health_information_page */ + SPDK_NVME_LOG_HEALTH_INFORMATION = 0x02, + + /** Firmware slot information (mandatory) - \ref spdk_nvme_firmware_page */ + SPDK_NVME_LOG_FIRMWARE_SLOT = 0x03, + + /** Changed namespace list (optional) */ + SPDK_NVME_LOG_CHANGED_NS_LIST = 0x04, + + /** Command effects log (optional) */ + SPDK_NVME_LOG_COMMAND_EFFECTS_LOG = 0x05, + + /** Device self test (optional) */ + SPDK_NVME_LOG_DEVICE_SELF_TEST = 0x06, + + /** Host initiated telemetry log (optional) */ + SPDK_NVME_LOG_TELEMETRY_HOST_INITIATED = 0x07, + + /** Controller initiated telemetry log (optional) */ + SPDK_NVME_LOG_TELEMETRY_CTRLR_INITIATED = 0x08, + + /* 0x09-0x6F - reserved */ + + /** Discovery(refer to the NVMe over Fabrics specification) */ + SPDK_NVME_LOG_DISCOVERY = 0x70, + + /* 0x71-0x7f - reserved for NVMe over Fabrics */ + + /** Reservation notification (optional) */ + SPDK_NVME_LOG_RESERVATION_NOTIFICATION = 0x80, + + /** Sanitize status (optional) */ + SPDK_NVME_LOG_SANITIZE_STATUS = 0x81, + + /* 0x81-0xBF - I/O command set specific */ + + /* 0xC0-0xFF - vendor specific */ +}; + +/** + * Error information log page (\ref SPDK_NVME_LOG_ERROR) + */ +struct spdk_nvme_error_information_entry { + uint64_t error_count; + uint16_t sqid; + uint16_t cid; + struct spdk_nvme_status status; + uint16_t error_location; + uint64_t lba; + uint32_t nsid; + uint8_t vendor_specific; + uint8_t trtype; + uint8_t reserved30[2]; + uint64_t command_specific; + uint16_t trtype_specific; + uint8_t reserved42[22]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_error_information_entry) == 64, "Incorrect size"); + +/** + * SMART / health information page (\ref SPDK_NVME_LOG_HEALTH_INFORMATION) + */ +struct __attribute__((packed)) __attribute__((aligned)) spdk_nvme_health_information_page { + union spdk_nvme_critical_warning_state critical_warning; + + uint16_t temperature; + uint8_t available_spare; + uint8_t available_spare_threshold; + uint8_t percentage_used; + + uint8_t reserved[26]; + + /* + * Note that the following are 128-bit values, but are + * defined as an array of 2 64-bit values. + */ + /* Data Units Read is always in 512-byte units. */ + uint64_t data_units_read[2]; + /* Data Units Written is always in 512-byte units. */ + uint64_t data_units_written[2]; + /* For NVM command set, this includes Compare commands. */ + uint64_t host_read_commands[2]; + uint64_t host_write_commands[2]; + /* Controller Busy Time is reported in minutes. */ + uint64_t controller_busy_time[2]; + uint64_t power_cycles[2]; + uint64_t power_on_hours[2]; + uint64_t unsafe_shutdowns[2]; + uint64_t media_errors[2]; + uint64_t num_error_info_log_entries[2]; + /* Controller temperature related. */ + uint32_t warning_temp_time; + uint32_t critical_temp_time; + uint16_t temp_sensor[8]; + + uint8_t reserved2[296]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_health_information_page) == 512, "Incorrect size"); + +/* Commands Supported and Effects Data Structure */ +struct spdk_nvme_cmds_and_effect_entry { + /** Command Supported */ + uint16_t csupp : 1; + + /** Logic Block Content Change */ + uint16_t lbcc : 1; + + /** Namespace Capability Change */ + uint16_t ncc : 1; + + /** Namespace Inventory Change */ + uint16_t nic : 1; + + /** Controller Capability Change */ + uint16_t ccc : 1; + + uint16_t reserved1 : 11; + + /* Command Submission and Execution recommendation + * 000 - No command submission or execution restriction + * 001 - Submitted when there is no outstanding command to same NS + * 010 - Submitted when there is no outstanding command to any NS + * others - Reserved + * \ref command_submission_and_execution in section 5.14.1.5 NVMe Revision 1.3 + */ + uint16_t cse : 3; + + uint16_t reserved2 : 13; +}; + +/* Commands Supported and Effects Log Page */ +struct spdk_nvme_cmds_and_effect_log_page { + /** Commands Supported and Effects Data Structure for the Admin Commands */ + struct spdk_nvme_cmds_and_effect_entry admin_cmds_supported[256]; + + /** Commands Supported and Effects Data Structure for the IO Commands */ + struct spdk_nvme_cmds_and_effect_entry io_cmds_supported[256]; + + uint8_t reserved0[2048]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_cmds_and_effect_log_page) == 4096, "Incorrect size"); + +/* + * Get Log Page – Telemetry Host/Controller Initiated Log (Log Identifiers 07h/08h) + */ +struct spdk_nvme_telemetry_log_page_hdr { + /* Log page identifier */ + uint8_t lpi; + uint8_t rsvd[4]; + uint8_t ieee_oui[3]; + /* Data area 1 last block */ + uint16_t dalb1; + /* Data area 2 last block */ + uint16_t dalb2; + /* Data area 3 last block */ + uint16_t dalb3; + uint8_t rsvd1[368]; + /* Controller initiated data avail */ + uint8_t ctrlr_avail; + /* Controller initiated telemetry data generation */ + uint8_t ctrlr_gen; + /* Reason identifier */ + uint8_t rsnident[128]; + uint8_t telemetry_datablock[0]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_telemetry_log_page_hdr) == 512, "Incorrect size"); + +/** + * Sanitize Status Type + */ +enum spdk_nvme_sanitize_status_type { + SPDK_NVME_NEVER_BEEN_SANITIZED = 0x0, + SPDK_NVME_RECENT_SANITIZE_SUCCESSFUL = 0x1, + SPDK_NVME_SANITIZE_IN_PROGRESS = 0x2, + SPDK_NVME_SANITIZE_FAILED = 0x3, +}; + +/** + * Sanitize status sstat field + */ +struct spdk_nvme_sanitize_status_sstat { + uint16_t status : 3; + uint16_t complete_pass : 5; + uint16_t global_data_erase : 1; + uint16_t reserved : 7; +}; + +/** + * Sanitize log page + */ +struct spdk_nvme_sanitize_status_log_page { + /* Sanitize progress */ + uint16_t sprog; + /* Sanitize status */ + struct spdk_nvme_sanitize_status_sstat sstat; + /* CDW10 of sanitize command */ + uint32_t scdw10; + /* Estimated overwrite time in seconds */ + uint32_t et_overwrite; + /* Estimated block erase time in seconds */ + uint32_t et_block_erase; + /* Estimated crypto erase time in seconds */ + uint32_t et_crypto_erase; + uint8_t reserved[492]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_sanitize_status_log_page) == 512, "Incorrect size"); + +/** + * Asynchronous Event Type + */ +enum spdk_nvme_async_event_type { + /* Error Status */ + SPDK_NVME_ASYNC_EVENT_TYPE_ERROR = 0x0, + /* SMART/Health Status */ + SPDK_NVME_ASYNC_EVENT_TYPE_SMART = 0x1, + /* Notice */ + SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE = 0x2, + /* 0x3 - 0x5 Reserved */ + + /* I/O Command Set Specific Status */ + SPDK_NVME_ASYNC_EVENT_TYPE_IO = 0x6, + /* Vendor Specific */ + SPDK_NVME_ASYNC_EVENT_TYPE_VENDOR = 0x7, +}; + +/** + * Asynchronous Event Information for Error Status + */ +enum spdk_nvme_async_event_info_error { + /* Write to Invalid Doorbell Register */ + SPDK_NVME_ASYNC_EVENT_WRITE_INVALID_DB = 0x0, + /* Invalid Doorbell Register Write Value */ + SPDK_NVME_ASYNC_EVENT_INVALID_DB_WRITE = 0x1, + /* Diagnostic Failure */ + SPDK_NVME_ASYNC_EVENT_DIAGNOSTIC_FAILURE = 0x2, + /* Persistent Internal Error */ + SPDK_NVME_ASYNC_EVENT_PERSISTENT_INTERNAL = 0x3, + /* Transient Internal Error */ + SPDK_NVME_ASYNC_EVENT_TRANSIENT_INTERNAL = 0x4, + /* Firmware Image Load Error */ + SPDK_NVME_ASYNC_EVENT_FW_IMAGE_LOAD = 0x5, + + /* 0x6 - 0xFF Reserved */ +}; + +/** + * Asynchronous Event Information for SMART/Health Status + */ +enum spdk_nvme_async_event_info_smart { + /* NVM Subsystem Reliability */ + SPDK_NVME_ASYNC_EVENT_SUBSYSTEM_RELIABILITY = 0x0, + /* Temperature Threshold */ + SPDK_NVME_ASYNC_EVENT_TEMPERATURE_THRESHOLD = 0x1, + /* Spare Below Threshold */ + SPDK_NVME_ASYNC_EVENT_SPARE_BELOW_THRESHOLD = 0x2, + + /* 0x3 - 0xFF Reserved */ +}; + +/** + * Asynchronous Event Information for Notice + */ +enum spdk_nvme_async_event_info_notice { + /* Namespace Attribute Changed */ + SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED = 0x0, + /* Firmware Activation Starting */ + SPDK_NVME_ASYNC_EVENT_FW_ACTIVATION_START = 0x1, + /* Telemetry Log Changed */ + SPDK_NVME_ASYNC_EVENT_TELEMETRY_LOG_CHANGED = 0x2, + + /* 0x3 - 0xFF Reserved */ +}; + +/** + * Asynchronous Event Information for NVM Command Set Specific Status + */ +enum spdk_nvme_async_event_info_nvm_command_set { + /* Reservation Log Page Avaiable */ + SPDK_NVME_ASYNC_EVENT_RESERVATION_LOG_AVAIL = 0x0, + /* Sanitize Operation Completed */ + SPDK_NVME_ASYNC_EVENT_SANITIZE_COMPLETED = 0x1, + + /* 0x2 - 0xFF Reserved */ +}; + +/** + * Asynchronous Event Request Completion + */ +union spdk_nvme_async_event_completion { + uint32_t raw; + struct { + uint32_t async_event_type : 3; + uint32_t reserved1 : 5; + uint32_t async_event_info : 8; + uint32_t log_page_identifier : 8; + uint32_t reserved2 : 8; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvme_async_event_completion) == 4, "Incorrect size"); + +/** + * Firmware slot information page (\ref SPDK_NVME_LOG_FIRMWARE_SLOT) + */ +struct spdk_nvme_firmware_page { + struct { + uint8_t active_slot : 3; /**< Slot for current FW */ + uint8_t reserved3 : 1; + uint8_t next_reset_slot : 3; /**< Slot that will be active at next controller reset */ + uint8_t reserved7 : 1; + } afi; + + uint8_t reserved[7]; + uint8_t revision[7][8]; /** Revisions for 7 slots (ASCII strings) */ + uint8_t reserved2[448]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_firmware_page) == 512, "Incorrect size"); + +/** + * Namespace attachment Type Encoding + */ +enum spdk_nvme_ns_attach_type { + /* Controller attach */ + SPDK_NVME_NS_CTRLR_ATTACH = 0x0, + + /* Controller detach */ + SPDK_NVME_NS_CTRLR_DETACH = 0x1, + + /* 0x2-0xF - Reserved */ +}; + +/** + * Namespace management Type Encoding + */ +enum spdk_nvme_ns_management_type { + /* Create */ + SPDK_NVME_NS_MANAGEMENT_CREATE = 0x0, + + /* Delete */ + SPDK_NVME_NS_MANAGEMENT_DELETE = 0x1, + + /* 0x2-0xF - Reserved */ +}; + +struct spdk_nvme_ns_list { + uint32_t ns_list[1024]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_ns_list) == 4096, "Incorrect size"); + +/** + * Namespace identification descriptor type + * + * \sa spdk_nvme_ns_id_desc + */ +enum spdk_nvme_nidt { + /** IEEE Extended Unique Identifier */ + SPDK_NVME_NIDT_EUI64 = 0x01, + + /** Namespace GUID */ + SPDK_NVME_NIDT_NGUID = 0x02, + + /** Namespace UUID */ + SPDK_NVME_NIDT_UUID = 0x03, +}; + +struct spdk_nvme_ns_id_desc { + /** Namespace identifier type */ + uint8_t nidt; + + /** Namespace identifier length (length of nid field) */ + uint8_t nidl; + + uint8_t reserved2; + uint8_t reserved3; + + /** Namespace identifier */ + uint8_t nid[]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_ns_id_desc) == 4, "Incorrect size"); + +struct spdk_nvme_ctrlr_list { + uint16_t ctrlr_count; + uint16_t ctrlr_list[2047]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_ctrlr_list) == 4096, "Incorrect size"); + +enum spdk_nvme_secure_erase_setting { + SPDK_NVME_FMT_NVM_SES_NO_SECURE_ERASE = 0x0, + SPDK_NVME_FMT_NVM_SES_USER_DATA_ERASE = 0x1, + SPDK_NVME_FMT_NVM_SES_CRYPTO_ERASE = 0x2, +}; + +enum spdk_nvme_pi_location { + SPDK_NVME_FMT_NVM_PROTECTION_AT_TAIL = 0x0, + SPDK_NVME_FMT_NVM_PROTECTION_AT_HEAD = 0x1, +}; + +enum spdk_nvme_pi_type { + SPDK_NVME_FMT_NVM_PROTECTION_DISABLE = 0x0, + SPDK_NVME_FMT_NVM_PROTECTION_TYPE1 = 0x1, + SPDK_NVME_FMT_NVM_PROTECTION_TYPE2 = 0x2, + SPDK_NVME_FMT_NVM_PROTECTION_TYPE3 = 0x3, +}; + +enum spdk_nvme_metadata_setting { + SPDK_NVME_FMT_NVM_METADATA_TRANSFER_AS_BUFFER = 0x0, + SPDK_NVME_FMT_NVM_METADATA_TRANSFER_AS_LBA = 0x1, +}; + +struct spdk_nvme_format { + uint32_t lbaf : 4; + uint32_t ms : 1; + uint32_t pi : 3; + uint32_t pil : 1; + uint32_t ses : 3; + uint32_t reserved : 20; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_format) == 4, "Incorrect size"); + +struct spdk_nvme_protection_info { + uint16_t guard; + uint16_t app_tag; + uint32_t ref_tag; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_protection_info) == 8, "Incorrect size"); + +/* Data structures for sanitize command */ +/* Sanitize - Command Dword 10 */ +struct spdk_nvme_sanitize { + /* Sanitize Action (SANACT) */ + uint32_t sanact : 3; + /* Allow Unrestricted Sanitize Exit (AUSE) */ + uint32_t ause : 1; + /* Overwrite Pass Count (OWPASS) */ + uint32_t owpass : 4; + /* Overwrite Invert Pattern Between Passes */ + uint32_t oipbp : 1; + /* No Deallocate after sanitize (NDAS) */ + uint32_t ndas : 1; + /* reserved */ + uint32_t reserved : 22; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_sanitize) == 4, "Incorrect size"); + +/* Sanitize Action */ +enum spdk_sanitize_action { + /* Exit Failure Mode */ + SPDK_NVME_SANITIZE_EXIT_FAILURE_MODE = 0x1, + /* Start a Block Erase sanitize operation */ + SPDK_NVME_SANITIZE_BLOCK_ERASE = 0x2, + /* Start an Overwrite sanitize operation */ + SPDK_NVME_SANITIZE_OVERWRITE = 0x3, + /* Start a Crypto Erase sanitize operation */ + SPDK_NVME_SANITIZE_CRYPTO_ERASE = 0x4, +}; + +/** Parameters for SPDK_NVME_OPC_FIRMWARE_COMMIT cdw10: commit action */ +enum spdk_nvme_fw_commit_action { + /** + * Downloaded image replaces the image specified by + * the Firmware Slot field. This image is not activated. + */ + SPDK_NVME_FW_COMMIT_REPLACE_IMG = 0x0, + /** + * Downloaded image replaces the image specified by + * the Firmware Slot field. This image is activated at the next reset. + */ + SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG = 0x1, + /** + * The image specified by the Firmware Slot field is + * activated at the next reset. + */ + SPDK_NVME_FW_COMMIT_ENABLE_IMG = 0x2, + /** + * The image specified by the Firmware Slot field is + * requested to be activated immediately without reset. + */ + SPDK_NVME_FW_COMMIT_RUN_IMG = 0x3, +}; + +/** Parameters for SPDK_NVME_OPC_FIRMWARE_COMMIT cdw10 */ +struct spdk_nvme_fw_commit { + /** + * Firmware Slot. Specifies the firmware slot that shall be used for the + * Commit Action. The controller shall choose the firmware slot (slot 1 - 7) + * to use for the operation if the value specified is 0h. + */ + uint32_t fs : 3; + /** + * Commit Action. Specifies the action that is taken on the image downloaded + * with the Firmware Image Download command or on a previously downloaded and + * placed image. + */ + uint32_t ca : 3; + uint32_t reserved : 26; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_fw_commit) == 4, "Incorrect size"); + +#define spdk_nvme_cpl_is_error(cpl) \ + ((cpl)->status.sc != SPDK_NVME_SC_SUCCESS || \ + (cpl)->status.sct != SPDK_NVME_SCT_GENERIC) + +#define spdk_nvme_cpl_is_success(cpl) (!spdk_nvme_cpl_is_error(cpl)) + +#define spdk_nvme_cpl_is_pi_error(cpl) \ + ((cpl)->status.sct == SPDK_NVME_SCT_MEDIA_ERROR && \ + ((cpl)->status.sc == SPDK_NVME_SC_GUARD_CHECK_ERROR || \ + (cpl)->status.sc == SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR || \ + (cpl)->status.sc == SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR)) + +#define spdk_nvme_cpl_is_abort_success(cpl) \ + (spdk_nvme_cpl_is_success(cpl) && !((cpl)->cdw0 & 1U)) + +/** Set fused operation */ +#define SPDK_NVME_IO_FLAGS_FUSE_FIRST (SPDK_NVME_CMD_FUSE_FIRST << 0) +#define SPDK_NVME_IO_FLAGS_FUSE_SECOND (SPDK_NVME_CMD_FUSE_SECOND << 0) +#define SPDK_NVME_IO_FLAGS_FUSE_MASK (SPDK_NVME_CMD_FUSE_MASK << 0) +/** Enable protection information checking of the Logical Block Reference Tag field */ +#define SPDK_NVME_IO_FLAGS_PRCHK_REFTAG (1U << 26) +/** Enable protection information checking of the Application Tag field */ +#define SPDK_NVME_IO_FLAGS_PRCHK_APPTAG (1U << 27) +/** Enable protection information checking of the Guard field */ +#define SPDK_NVME_IO_FLAGS_PRCHK_GUARD (1U << 28) +/** The protection information is stripped or inserted when set this bit */ +#define SPDK_NVME_IO_FLAGS_PRACT (1U << 29) +#define SPDK_NVME_IO_FLAGS_FORCE_UNIT_ACCESS (1U << 30) +#define SPDK_NVME_IO_FLAGS_LIMITED_RETRY (1U << 31) + +/** Mask of valid io flags mask */ +#define SPDK_NVME_IO_FLAGS_VALID_MASK 0xFFFF0003 +#define SPDK_NVME_IO_FLAGS_CDW12_MASK 0xFFFF0000 + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/nvmf.h b/src/spdk/include/spdk/nvmf.h new file mode 100644 index 000000000..86ca574f6 --- /dev/null +++ b/src/spdk/include/spdk/nvmf.h @@ -0,0 +1,1048 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2018-2019 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * NVMe over Fabrics target public API + */ + +#ifndef SPDK_NVMF_H +#define SPDK_NVMF_H + +#include "spdk/stdinc.h" + +#include "spdk/env.h" +#include "spdk/nvme.h" +#include "spdk/nvmf_spec.h" +#include "spdk/queue.h" +#include "spdk/uuid.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define NVMF_TGT_NAME_MAX_LENGTH 256 + +struct spdk_nvmf_tgt; +struct spdk_nvmf_subsystem; +struct spdk_nvmf_ctrlr; +struct spdk_nvmf_qpair; +struct spdk_nvmf_request; +struct spdk_bdev; +struct spdk_nvmf_request; +struct spdk_nvmf_host; +struct spdk_nvmf_subsystem_listener; +struct spdk_nvmf_poll_group; +struct spdk_json_write_ctx; +struct spdk_nvmf_transport; + +struct spdk_nvmf_target_opts { + char name[NVMF_TGT_NAME_MAX_LENGTH]; + uint32_t max_subsystems; +}; + +struct spdk_nvmf_transport_opts { + uint16_t max_queue_depth; + uint16_t max_qpairs_per_ctrlr; + uint32_t in_capsule_data_size; + uint32_t max_io_size; + uint32_t io_unit_size; + uint32_t max_aq_depth; + uint32_t num_shared_buffers; + uint32_t buf_cache_size; + uint32_t max_srq_depth; + bool no_srq; + bool c2h_success; + bool dif_insert_or_strip; + uint32_t sock_priority; + int acceptor_backlog; + uint32_t abort_timeout_sec; +}; + +struct spdk_nvmf_poll_group_stat { + uint32_t admin_qpairs; + uint32_t io_qpairs; + uint64_t pending_bdev_io; +}; + +struct spdk_nvmf_rdma_device_stat { + const char *name; + uint64_t polls; + uint64_t completions; + uint64_t requests; + uint64_t request_latency; + uint64_t pending_free_request; + uint64_t pending_rdma_read; + uint64_t pending_rdma_write; +}; + +struct spdk_nvmf_transport_poll_group_stat { + spdk_nvme_transport_type_t trtype; + union { + struct { + uint64_t pending_data_buffer; + uint64_t num_devices; + struct spdk_nvmf_rdma_device_stat *devices; + } rdma; + }; +}; + +/** + * Function to be called once the listener is associated with a subsystem. + * + * \param ctx Context argument passed to this function. + * \param status 0 if it completed successfully, or negative errno if it failed. + */ +typedef void (*spdk_nvmf_tgt_subsystem_listen_done_fn)(void *ctx, int status); + +/** + * Construct an NVMe-oF target. + * + * \param opts a pointer to an spdk_nvmf_target_opts structure. + * + * \return a pointer to a NVMe-oF target on success, or NULL on failure. + */ +struct spdk_nvmf_tgt *spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts); + +typedef void (spdk_nvmf_tgt_destroy_done_fn)(void *ctx, int status); + +/** + * Destroy an NVMe-oF target. + * + * \param tgt The target to destroy. This releases all resources. + * \param cb_fn A callback that will be called once the target is destroyed + * \param cb_arg A context argument passed to cb_fn. + */ +void spdk_nvmf_tgt_destroy(struct spdk_nvmf_tgt *tgt, + spdk_nvmf_tgt_destroy_done_fn cb_fn, + void *cb_arg); + +/** + * Get the name of an NVMe-oF target. + * + * \param tgt The target from which to get the name. + * + * \return The name of the target as a null terminated string. + */ +const char *spdk_nvmf_tgt_get_name(struct spdk_nvmf_tgt *tgt); + +/** + * Get a pointer to an NVMe-oF target. + * + * In order to support some legacy applications and RPC methods that may rely on the + * concept that there is only one target, the name parameter can be passed as NULL. + * If there is only one available target, that target will be returned. + * Otherwise, name is a required parameter. + * + * \param name The name provided when the target was created. + * + * \return The target with the given name, or NULL if no match was found. + */ +struct spdk_nvmf_tgt *spdk_nvmf_get_tgt(const char *name); + +/** + * Get the pointer to the first NVMe-oF target. + * + * Combined with spdk_nvmf_get_next_tgt to iterate over all available targets. + * + * \return The first NVMe-oF target. + */ +struct spdk_nvmf_tgt *spdk_nvmf_get_first_tgt(void); + +/** + * Get the pointer to the first NVMe-oF target. + * + * Combined with spdk_nvmf_get_first_tgt to iterate over all available targets. + * + * \param prev A pointer to the last NVMe-oF target. + * + * \return The first NVMe-oF target. + */ +struct spdk_nvmf_tgt *spdk_nvmf_get_next_tgt(struct spdk_nvmf_tgt *prev); + +/** + * Write NVMe-oF target configuration into provided JSON context. + * \param w JSON write context + * \param tgt The NVMe-oF target + */ +void spdk_nvmf_tgt_write_config_json(struct spdk_json_write_ctx *w, struct spdk_nvmf_tgt *tgt); + +/** + * Begin accepting new connections at the address provided. + * + * The connections will be matched with a subsystem, which may or may not allow + * the connection based on a subsystem-specific whitelist. See + * spdk_nvmf_subsystem_add_host() and spdk_nvmf_subsystem_add_listener() + * + * \param tgt The target associated with this listen address. + * \param trid The address to listen at. + * + * \return 0 on success or a negated errno on failure. + */ +int spdk_nvmf_tgt_listen(struct spdk_nvmf_tgt *tgt, + struct spdk_nvme_transport_id *trid); + +/** + * Stop accepting new connections at the provided address. + * + * This is a counterpart to spdk_nvmf_tgt_listen(). + * + * \param tgt The target associated with the listen address. + * \param trid The address to stop listening at. + * + * \return int. 0 on success or a negated errno on failure. + */ +int spdk_nvmf_tgt_stop_listen(struct spdk_nvmf_tgt *tgt, + struct spdk_nvme_transport_id *trid); + +/** + * Poll the target for incoming connections. + * + * \param tgt The target associated with the listen address. + */ +uint32_t spdk_nvmf_tgt_accept(struct spdk_nvmf_tgt *tgt); + +/** + * Create a poll group. + * + * \param tgt The target to create a poll group. + * + * \return a poll group on success, or NULL on failure. + */ +struct spdk_nvmf_poll_group *spdk_nvmf_poll_group_create(struct spdk_nvmf_tgt *tgt); + +/** + * Get optimal nvmf poll group for the qpair. + * + * \param qpair Requested qpair + * + * \return a poll group on success, or NULL on failure. + */ +struct spdk_nvmf_poll_group *spdk_nvmf_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair); + +typedef void(*spdk_nvmf_poll_group_destroy_done_fn)(void *cb_arg, int status); + +/** + * Destroy a poll group. + * + * \param group The poll group to destroy. + * \param cb_fn A callback that will be called once the poll group is destroyed. + * \param cb_arg A context argument passed to cb_fn. + */ +void spdk_nvmf_poll_group_destroy(struct spdk_nvmf_poll_group *group, + spdk_nvmf_poll_group_destroy_done_fn cb_fn, + void *cb_arg); + +/** + * Add the given qpair to the poll group. + * + * \param group The group to add qpair to. + * \param qpair The qpair to add. + * + * \return 0 on success, -1 on failure. + */ +int spdk_nvmf_poll_group_add(struct spdk_nvmf_poll_group *group, + struct spdk_nvmf_qpair *qpair); + +/** + * Get current poll group statistics. + * + * \param tgt The NVMf target. + * \param stat Pointer to allocated statistics structure to fill with values. + * + * \return 0 upon success. + * \return -EINVAL if either group or stat is NULL. + */ +int spdk_nvmf_poll_group_get_stat(struct spdk_nvmf_tgt *tgt, + struct spdk_nvmf_poll_group_stat *stat); + +typedef void (*nvmf_qpair_disconnect_cb)(void *ctx); + +/** + * Disconnect an NVMe-oF qpair + * + * \param qpair The NVMe-oF qpair to disconnect. + * \param cb_fn The function to call upon completion of the disconnect. + * \param ctx The context to pass to the callback function. + * + * \return 0 upon success. + * \return -ENOMEM if the function specific context could not be allocated. + */ +int spdk_nvmf_qpair_disconnect(struct spdk_nvmf_qpair *qpair, nvmf_qpair_disconnect_cb cb_fn, + void *ctx); + +/** + * Get the peer's transport ID for this queue pair. + * + * \param qpair The NVMe-oF qpair + * \param trid Output parameter that will contain the transport id. + * + * \return 0 for success. + * \return -EINVAL if the qpair is not connected. + */ +int spdk_nvmf_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid); + +/** + * Get the local transport ID for this queue pair. + * + * \param qpair The NVMe-oF qpair + * \param trid Output parameter that will contain the transport id. + * + * \return 0 for success. + * \return -EINVAL if the qpair is not connected. + */ +int spdk_nvmf_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid); + +/** + * Get the associated listener transport ID for this queue pair. + * + * \param qpair The NVMe-oF qpair + * \param trid Output parameter that will contain the transport id. + * + * \return 0 for success. + * \return -EINVAL if the qpair is not connected. + */ +int spdk_nvmf_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid); + +/** + * Create an NVMe-oF subsystem. + * + * Subsystems are in one of three states: Inactive, Active, Paused. This + * state affects which operations may be performed on the subsystem. Upon + * creation, the subsystem will be in the Inactive state and may be activated + * by calling spdk_nvmf_subsystem_start(). No I/O will be processed in the Inactive + * or Paused states, but changes to the state of the subsystem may be made. + * + * \param tgt The NVMe-oF target that will own this subsystem. + * \param nqn The NVMe qualified name of this subsystem. + * \param type Whether this subsystem is an I/O subsystem or a Discovery subsystem. + * \param num_ns The number of namespaces this subsystem contains. + * + * \return a pointer to a NVMe-oF subsystem on success, or NULL on failure. + */ +struct spdk_nvmf_subsystem *spdk_nvmf_subsystem_create(struct spdk_nvmf_tgt *tgt, + const char *nqn, + enum spdk_nvmf_subtype type, + uint32_t num_ns); + +/** + * Destroy an NVMe-oF subsystem. A subsystem may only be destroyed when in + * the Inactive state. See spdk_nvmf_subsystem_stop(). + * + * \param subsystem The NVMe-oF subsystem to destroy. + */ +void spdk_nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem); + +/** + * Function to be called once the subsystem has changed state. + * + * \param subsytem NVMe-oF subsystem that has changed state. + * \param cb_arg Argument passed to callback function. + * \param status 0 if it completed successfully, or negative errno if it failed. + */ +typedef void (*spdk_nvmf_subsystem_state_change_done)(struct spdk_nvmf_subsystem *subsystem, + void *cb_arg, int status); + +/** + * Transition an NVMe-oF subsystem from Inactive to Active state. + * + * \param subsystem The NVMe-oF subsystem. + * \param cb_fn A function that will be called once the subsystem has changed state. + * \param cb_arg Argument passed to cb_fn. + * + * \return 0 on success, or negated errno on failure. The callback provided will only + * be called on success. + */ +int spdk_nvmf_subsystem_start(struct spdk_nvmf_subsystem *subsystem, + spdk_nvmf_subsystem_state_change_done cb_fn, + void *cb_arg); + +/** + * Transition an NVMe-oF subsystem from Active to Inactive state. + * + * \param subsystem The NVMe-oF subsystem. + * \param cb_fn A function that will be called once the subsystem has changed state. + * \param cb_arg Argument passed to cb_fn. + * + * \return 0 on success, or negated errno on failure. The callback provided will only + * be called on success. + */ +int spdk_nvmf_subsystem_stop(struct spdk_nvmf_subsystem *subsystem, + spdk_nvmf_subsystem_state_change_done cb_fn, + void *cb_arg); + +/** + * Transition an NVMe-oF subsystem from Active to Paused state. + * + * \param subsystem The NVMe-oF subsystem. + * \param cb_fn A function that will be called once the subsystem has changed state. + * \param cb_arg Argument passed to cb_fn. + * + * \return 0 on success, or negated errno on failure. The callback provided will only + * be called on success. + */ +int spdk_nvmf_subsystem_pause(struct spdk_nvmf_subsystem *subsystem, + spdk_nvmf_subsystem_state_change_done cb_fn, + void *cb_arg); + +/** + * Transition an NVMe-oF subsystem from Paused to Active state. + * + * \param subsystem The NVMe-oF subsystem. + * \param cb_fn A function that will be called once the subsystem has changed state. + * \param cb_arg Argument passed to cb_fn. + * + * \return 0 on success, or negated errno on failure. The callback provided will only + * be called on success. + */ +int spdk_nvmf_subsystem_resume(struct spdk_nvmf_subsystem *subsystem, + spdk_nvmf_subsystem_state_change_done cb_fn, + void *cb_arg); + +/** + * Search the target for a subsystem with the given NQN. + * + * \param tgt The NVMe-oF target to search from. + * \param subnqn NQN of the subsystem. + * + * \return a pointer to the NVMe-oF subsystem on success, or NULL on failure. + */ +struct spdk_nvmf_subsystem *spdk_nvmf_tgt_find_subsystem(struct spdk_nvmf_tgt *tgt, + const char *subnqn); + +/** + * Begin iterating over all known subsystems. If no subsystems are present, return NULL. + * + * \param tgt The NVMe-oF target to iterate. + * + * \return a pointer to the first NVMe-oF subsystem on success, or NULL on failure. + */ +struct spdk_nvmf_subsystem *spdk_nvmf_subsystem_get_first(struct spdk_nvmf_tgt *tgt); + +/** + * Continue iterating over all known subsystems. If no additional subsystems, return NULL. + * + * \param subsystem Previous subsystem returned from \ref spdk_nvmf_subsystem_get_first or + * \ref spdk_nvmf_subsystem_get_next. + * + * \return a pointer to the next NVMe-oF subsystem on success, or NULL on failure. + */ +struct spdk_nvmf_subsystem *spdk_nvmf_subsystem_get_next(struct spdk_nvmf_subsystem *subsystem); + +/** + * Allow the given host NQN to connect to the given subsystem. + * + * May only be performed on subsystems in the PAUSED or INACTIVE states. + * + * \param subsystem Subsystem to add host to. + * \param hostnqn The NQN for the host. + * + * \return 0 on success, or negated errno value on failure. + */ +int spdk_nvmf_subsystem_add_host(struct spdk_nvmf_subsystem *subsystem, + const char *hostnqn); + +/** + * Remove the given host NQN from the allowed hosts whitelist. + * + * May only be performed on subsystems in the PAUSED or INACTIVE states. + * + * \param subsystem Subsystem to remove host from. + * \param hostnqn The NQN for the host. + * + * \return 0 on success, or negated errno value on failure. + */ +int spdk_nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn); + +/** + * Set whether a subsystem should allow any host or only hosts in the allowed list. + * + * May only be performed on subsystems in the PAUSED or INACTIVE states. + * + * \param subsystem Subsystem to modify. + * \param allow_any_host true to allow any host to connect to this subsystem, + * or false to enforce the whitelist configured with spdk_nvmf_subsystem_add_host(). + * + * \return 0 on success, or negated errno value on failure. + */ +int spdk_nvmf_subsystem_set_allow_any_host(struct spdk_nvmf_subsystem *subsystem, + bool allow_any_host); + +/** + * Check whether a subsystem should allow any host or only hosts in the allowed list. + * + * \param subsystem Subsystem to query. + * + * \return true if any host is allowed to connect to this subsystem, or false if + * connecting hosts must be in the whitelist configured with spdk_nvmf_subsystem_add_host(). + */ +bool spdk_nvmf_subsystem_get_allow_any_host(const struct spdk_nvmf_subsystem *subsystem); + +/** + * Check if the given host is allowed to connect to the subsystem. + * + * \param subsystem The subsystem to query. + * \param hostnqn The NQN of the host. + * + * \return true if allowed, false if not. + */ +bool spdk_nvmf_subsystem_host_allowed(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn); + +/** + * Get the first allowed host in a subsystem. + * + * \param subsystem Subsystem to query. + * + * \return first allowed host in this subsystem, or NULL if none allowed. + */ +struct spdk_nvmf_host *spdk_nvmf_subsystem_get_first_host(struct spdk_nvmf_subsystem *subsystem); + +/** + * Get the next allowed host in a subsystem. + * + * \param subsystem Subsystem to query. + * \param prev_host Previous host returned from this function. + * + * \return next allowed host in this subsystem, or NULL if prev_host was the last host. + */ +struct spdk_nvmf_host *spdk_nvmf_subsystem_get_next_host(struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvmf_host *prev_host); + +/** + * Get a host's NQN. + * + * \param host Host to query. + * + * \return NQN of host. + */ +const char *spdk_nvmf_host_get_nqn(const struct spdk_nvmf_host *host); + +/** + * Accept new connections on the address provided. + * + * This does not start the listener. Use spdk_nvmf_tgt_listen() for that. + * + * May only be performed on subsystems in the PAUSED or INACTIVE states. + * + * \param subsystem Subsystem to add listener to. + * \param trid The address to accept connections from. + * \param cb_fn A callback that will be called once the association is complete. + * \param cb_arg Argument passed to cb_fn. + */ +void spdk_nvmf_subsystem_add_listener(struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvme_transport_id *trid, + spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn, + void *cb_arg); + +/** + * Remove the listener from subsystem. + * + * New connections to the address won't be propagated to the subsystem. + * However to stop listening at target level one must use the + * spdk_nvmf_tgt_stop_listen(). + * + * May only be performed on subsystems in the PAUSED or INACTIVE states. + * + * \param subsystem Subsystem to remove listener from. + * \param trid The address to no longer accept connections from. + * + * \return 0 on success, or negated errno value on failure. + */ +int spdk_nvmf_subsystem_remove_listener(struct spdk_nvmf_subsystem *subsystem, + const struct spdk_nvme_transport_id *trid); + +/** + * Check if connections originated from the given address are allowed to connect + * to the subsystem. + * + * \param subsystem The subsystem to query. + * \param trid The listen address. + * + * \return true if allowed, or false if not. + */ +bool spdk_nvmf_subsystem_listener_allowed(struct spdk_nvmf_subsystem *subsystem, + const struct spdk_nvme_transport_id *trid); + +/** + * Get the first allowed listen address in the subsystem. + * + * \param subsystem Subsystem to query. + * + * \return first allowed listen address in this subsystem, or NULL if none allowed. + */ +struct spdk_nvmf_subsystem_listener *spdk_nvmf_subsystem_get_first_listener( + struct spdk_nvmf_subsystem *subsystem); + +/** + * Get the next allowed listen address in a subsystem. + * + * \param subsystem Subsystem to query. + * \param prev_listener Previous listen address for this subsystem. + * + * \return next allowed listen address in this subsystem, or NULL if prev_listener + * was the last address. + */ +struct spdk_nvmf_subsystem_listener *spdk_nvmf_subsystem_get_next_listener( + struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvmf_subsystem_listener *prev_listener); + +/** + * Get a listen address' transport ID + * + * \param listener This listener. + * + * \return the transport ID for this listener. + */ +const struct spdk_nvme_transport_id *spdk_nvmf_subsystem_listener_get_trid( + struct spdk_nvmf_subsystem_listener *listener); + +/** + * Set whether a subsystem should allow any listen address or only addresses in the allowed list. + * + * \param subsystem Subsystem to allow dynamic listener assignment. + * \param allow_any_listener true to allow dynamic listener assignment for + * this subsystem, or false to enforce the whitelist configured during + * subsystem setup. + */ +void spdk_nvmf_subsystem_allow_any_listener( + struct spdk_nvmf_subsystem *subsystem, + bool allow_any_listener); + +/** + * Check whether a subsystem allows any listen address or only addresses in the allowed list. + * + * \param subsystem Subsystem to query. + * + * \return true if this subsystem allows dynamic management of listen address list, + * or false if only allows addresses in the whitelist configured during subsystem setup. + */ +bool spdk_nvmf_subsytem_any_listener_allowed( + struct spdk_nvmf_subsystem *subsystem); + +/** NVMe-oF target namespace creation options */ +struct spdk_nvmf_ns_opts { + /** + * Namespace ID + * + * Set to 0 to automatically assign a free NSID. + */ + uint32_t nsid; + + /** + * Namespace Globally Unique Identifier + * + * Fill with 0s if not specified. + */ + uint8_t nguid[16]; + + /** + * IEEE Extended Unique Identifier + * + * Fill with 0s if not specified. + */ + uint8_t eui64[8]; + + /** + * Namespace UUID + * + * Fill with 0s if not specified. + */ + struct spdk_uuid uuid; +}; + +/** + * Get default namespace creation options. + * + * \param opts Namespace options to fill with defaults. + * \param opts_size sizeof(struct spdk_nvmf_ns_opts) + */ +void spdk_nvmf_ns_opts_get_defaults(struct spdk_nvmf_ns_opts *opts, size_t opts_size); + +/** + * Add a namespace to a subsytem. + * + * May only be performed on subsystems in the PAUSED or INACTIVE states. + * + * \param subsystem Subsystem to add namespace to. + * \param bdev Block device to add as a namespace. + * \param opts Namespace options, or NULL to use defaults. + * \param opts_size sizeof(*opts) + * \param ptpl_file Persist through power loss file path. + * + * \return newly added NSID on success, or 0 on failure. + */ +uint32_t spdk_nvmf_subsystem_add_ns(struct spdk_nvmf_subsystem *subsystem, struct spdk_bdev *bdev, + const struct spdk_nvmf_ns_opts *opts, size_t opts_size, + const char *ptpl_file); + +/** + * Remove a namespace from a subsytem. + * + * May only be performed on subsystems in the PAUSED or INACTIVE states. + * + * \param subsystem Subsystem the namespace belong to. + * \param nsid Namespace ID to be removed. + * + * \return 0 on success, -1 on failure. + */ +int spdk_nvmf_subsystem_remove_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid); + +/** + * Get the first allocated namespace in a subsystem. + * + * \param subsystem Subsystem to query. + * + * \return first allocated namespace in this subsystem, or NULL if this subsystem + * has no namespaces. + */ +struct spdk_nvmf_ns *spdk_nvmf_subsystem_get_first_ns(struct spdk_nvmf_subsystem *subsystem); + +/** + * Get the next allocated namespace in a subsystem. + * + * \param subsystem Subsystem to query. + * \param prev_ns Previous ns returned from this function. + * + * \return next allocated namespace in this subsystem, or NULL if prev_ns was the + * last namespace. + */ +struct spdk_nvmf_ns *spdk_nvmf_subsystem_get_next_ns(struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvmf_ns *prev_ns); + +/** + * Get a namespace in a subsystem by NSID. + * + * \param subsystem Subsystem to search. + * \param nsid Namespace ID to find. + * + * \return namespace matching nsid, or NULL if nsid was not found. + */ +struct spdk_nvmf_ns *spdk_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, + uint32_t nsid); + +/** + * Get the maximum number of namespaces allowed in a subsystem. + * + * \param subsystem Subsystem to query. + * + * \return Maximum number of namespaces allowed in the subsystem, or 0 for unlimited. + */ +uint32_t spdk_nvmf_subsystem_get_max_namespaces(const struct spdk_nvmf_subsystem *subsystem); + +/** + * Get a namespace's NSID. + * + * \param ns Namespace to query. + * + * \return NSID of ns. + */ +uint32_t spdk_nvmf_ns_get_id(const struct spdk_nvmf_ns *ns); + +/** + * Get a namespace's associated bdev. + * + * \param ns Namespace to query. + * + * \return backing bdev of ns. + */ +struct spdk_bdev *spdk_nvmf_ns_get_bdev(struct spdk_nvmf_ns *ns); + +/** + * Get the options specified for a namespace. + * + * \param ns Namespace to query. + * \param opts Output parameter for options. + * \param opts_size sizeof(*opts) + */ +void spdk_nvmf_ns_get_opts(const struct spdk_nvmf_ns *ns, struct spdk_nvmf_ns_opts *opts, + size_t opts_size); + +/** + * Get the serial number of the specified subsystem. + * + * \param subsystem Subsystem to query. + * + * \return serial number of the specified subsystem. + */ +const char *spdk_nvmf_subsystem_get_sn(const struct spdk_nvmf_subsystem *subsystem); + + +/** + * Set the serial number for the specified subsystem. + * + * \param subsystem Subsystem to set for. + * \param sn serial number to set. + * + * \return 0 on success, -1 on failure. + */ +int spdk_nvmf_subsystem_set_sn(struct spdk_nvmf_subsystem *subsystem, const char *sn); + +/** + * Get the model number of the specified subsystem. + * + * \param subsystem Subsystem to query. + * + * \return model number of the specified subsystem. + */ +const char *spdk_nvmf_subsystem_get_mn(const struct spdk_nvmf_subsystem *subsystem); + + +/** + * Set the model number for the specified subsystem. + * + * \param subsystem Subsystem to set for. + * \param mn model number to set. + * + * \return 0 on success, -1 on failure. + */ +int spdk_nvmf_subsystem_set_mn(struct spdk_nvmf_subsystem *subsystem, const char *mn); + +/** + * Get the NQN of the specified subsystem. + * + * \param subsystem Subsystem to query. + * + * \return NQN of the specified subsystem. + */ +const char *spdk_nvmf_subsystem_get_nqn(const struct spdk_nvmf_subsystem *subsystem); + +/** + * Get the type of the specified subsystem. + * + * \param subsystem Subsystem to query. + * + * \return the type of the specified subsystem. + */ +enum spdk_nvmf_subtype spdk_nvmf_subsystem_get_type(struct spdk_nvmf_subsystem *subsystem); + +/** + * Get maximum namespace id of the specified subsystem. + * + * \param subsystem Subsystem to query. + * + * \return maximum namespace id + */ +uint32_t spdk_nvmf_subsystem_get_max_nsid(struct spdk_nvmf_subsystem *subsystem); + +/** + * Initialize transport options + * + * \param transport_name The transport type to create + * \param opts The transport options (e.g. max_io_size) + * + * \return bool. true if successful, false if transport type + * not found. + */ +bool +spdk_nvmf_transport_opts_init(const char *transport_name, + struct spdk_nvmf_transport_opts *opts); + +/** + * Create a protocol transport + * + * \param transport_name The transport type to create + * \param opts The transport options (e.g. max_io_size) + * + * \return new transport or NULL if create fails + */ +struct spdk_nvmf_transport *spdk_nvmf_transport_create(const char *transport_name, + struct spdk_nvmf_transport_opts *opts); + +/** + * Destroy a protocol transport + * + * \param transport The transport to destory + * + * \return 0 on success, -1 on failure. + */ +int spdk_nvmf_transport_destroy(struct spdk_nvmf_transport *transport); + +/** + * Get an existing transport from the target + * + * \param tgt The NVMe-oF target + * \param transport_name The name of the transport type to get. + * + * \return the transport or NULL if not found + */ +struct spdk_nvmf_transport *spdk_nvmf_tgt_get_transport(struct spdk_nvmf_tgt *tgt, + const char *transport_name); + +/** + * Get the first transport registered with the given target + * + * \param tgt The NVMe-oF target + * + * \return The first transport registered on the target + */ +struct spdk_nvmf_transport *spdk_nvmf_transport_get_first(struct spdk_nvmf_tgt *tgt); + +/** + * Get the next transport in a target's list. + * + * \param transport A handle to a transport object + * + * \return The next transport associated with the NVMe-oF target + */ +struct spdk_nvmf_transport *spdk_nvmf_transport_get_next(struct spdk_nvmf_transport *transport); + +/** + * Get the opts for a given transport. + * + * \param transport The transport to query + * + * \return The opts associated with the given transport + */ +const struct spdk_nvmf_transport_opts *spdk_nvmf_get_transport_opts(struct spdk_nvmf_transport + *transport); + +/** + * Get the transport type for a given transport. + * + * \param transport The transport to query + * + * \return the transport type for the given transport + */ +spdk_nvme_transport_type_t spdk_nvmf_get_transport_type(struct spdk_nvmf_transport *transport); + +/** + * Get the transport name for a given transport. + * + * \param transport The transport to query + * + * \return the transport name for the given transport + */ +const char *spdk_nvmf_get_transport_name(struct spdk_nvmf_transport *transport); + +/** + * Function to be called once transport add is complete + * + * \param cb_arg Callback argument passed to this function. + * \param status 0 if it completed successfully, or negative errno if it failed. + */ +typedef void (*spdk_nvmf_tgt_add_transport_done_fn)(void *cb_arg, int status); + +/** + * Add a transport to a target + * + * \param tgt The NVMe-oF target + * \param transport The transport to add + * \param cb_fn A callback that will be called once the transport is created + * \param cb_arg A context argument passed to cb_fn. + * + * \return void. The callback status argument will be 0 on success + * or a negated errno on failure. + */ +void spdk_nvmf_tgt_add_transport(struct spdk_nvmf_tgt *tgt, + struct spdk_nvmf_transport *transport, + spdk_nvmf_tgt_add_transport_done_fn cb_fn, + void *cb_arg); + +/** + * Add listener to transport and begin accepting new connections. + * + * \param transport The transport to add listener to + * \param trid Address to listen at + * + * \return int. 0 if it completed successfully, or negative errno if it failed. + */ +int +spdk_nvmf_transport_listen(struct spdk_nvmf_transport *transport, + const struct spdk_nvme_transport_id *trid); + +/** + * Remove listener from transport and stop accepting new connections. + * + * \param transport The transport to remove listener from + * \param trid Address to stop listen at + * + * \return int. 0 if it completed successfully, or negative errno if it failed. + */ +int +spdk_nvmf_transport_stop_listen(struct spdk_nvmf_transport *transport, + const struct spdk_nvme_transport_id *trid); + +/** + * \brief Get current transport poll group statistics. + * + * This function allocates memory for statistics and returns it + * in \p stat parameter. Caller must free this memory with + * spdk_nvmf_transport_poll_group_free_stat() when it is not needed + * anymore. + * + * \param tgt The NVMf target. + * \param transport The NVMf transport. + * \param stat Output parameter that will contain pointer to allocated statistics structure. + * + * \return 0 upon success. + * \return -ENOTSUP if transport does not support statistics. + * \return -EINVAL if any of parameters is NULL. + * \return -ENOENT if transport poll group is not found. + * \return -ENOMEM if memory allocation failed. + */ +int +spdk_nvmf_transport_poll_group_get_stat(struct spdk_nvmf_tgt *tgt, + struct spdk_nvmf_transport *transport, + struct spdk_nvmf_transport_poll_group_stat **stat); + +/** + * Free statistics memory previously allocated with spdk_nvmf_transport_poll_group_get_stat(). + * + * \param transport The NVMf transport. + * \param stat Pointer to transport poll group statistics structure. + */ +void +spdk_nvmf_transport_poll_group_free_stat(struct spdk_nvmf_transport *transport, + struct spdk_nvmf_transport_poll_group_stat *stat); + +/** + * \brief Set the global hooks for the RDMA transport, if necessary. + * + * This call is optional and must be performed prior to probing for + * any devices. By default, the RDMA transport will use the ibverbs + * library to create protection domains and register memory. This + * is a mechanism to subvert that and use an existing registration. + * + * This function may only be called one time per process. + * + * \param hooks for initializing global hooks + */ +void spdk_nvmf_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/nvmf_cmd.h b/src/spdk/include/spdk/nvmf_cmd.h new file mode 100644 index 000000000..6cbac7de0 --- /dev/null +++ b/src/spdk/include/spdk/nvmf_cmd.h @@ -0,0 +1,226 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_NVMF_CMD_H_ +#define SPDK_NVMF_CMD_H_ + +#include "spdk/stdinc.h" +#include "spdk/nvmf.h" +#include "spdk/bdev.h" + +enum spdk_nvmf_request_exec_status { + SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE, + SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS, +}; + +/** + * Fills the identify controller attributes for the specified conroller + * + * \param ctrlr The NVMe-oF controller + * \param cdata The filled in identify controller attributes + * \return \ref spdk_nvmf_request_exec_status + */ +int spdk_nvmf_ctrlr_identify_ctrlr(struct spdk_nvmf_ctrlr *ctrlr, + struct spdk_nvme_ctrlr_data *cdata); + +/** + * Fills the identify namespace attributes for the specified conroller + * + * \param ctrlr The NVMe-oF controller + * \param cmd The NVMe command + * \param rsp The NVMe command completion + * \param nsdata The filled in identify namespace attributes + * \return \ref spdk_nvmf_request_exec_status + */ +int spdk_nvmf_ctrlr_identify_ns(struct spdk_nvmf_ctrlr *ctrlr, + struct spdk_nvme_cmd *cmd, + struct spdk_nvme_cpl *rsp, + struct spdk_nvme_ns_data *nsdata); + +/** + * Callback function definition for a custom admin command handler. + * + * A function of this type is passed to \ref spdk_nvmf_set_custom_admin_cmd_hdlr. + * It is called for every admin command that is processed by the NVMe-oF subsystem. + * If the function handled the admin command then it must return a value from + * \ref spdk_nvmf_request_exec_status. If the function did not handle the + * admin command then it should return -1. In this case the SPDK default admin + * command processing is applied to the request. + * + * \param req The NVMe-oF request of the admin command that is currently + * processed + * \return \ref spdk_nvmf_request_exec_status if the command has been handled + * by the handler or -1 if the command wasn't handled + */ +typedef int (*spdk_nvmf_custom_cmd_hdlr)(struct spdk_nvmf_request *req); + +/** + * Installs a custom admin command handler. + * + * \param opc NVMe admin command OPC for which the handler should be installed. + * \param hdlr The handler function. See \ref spdk_nvmf_custom_cmd_hdlr. + */ +void spdk_nvmf_set_custom_admin_cmd_hdlr(uint8_t opc, spdk_nvmf_custom_cmd_hdlr hdlr); + +/** + * Forward an NVMe admin command to a namespace + * + * This function forwards all NVMe admin commands of value opc to the specified + * namespace id. + * If forward_nsid is 0, the command is sent to the namespace that was specified in the + * original command. + * + * \param opc - NVMe admin command OPC + * \param forward_nsid - nsid or 0 + */ +void spdk_nvmf_set_passthru_admin_cmd(uint8_t opc, uint32_t forward_nsid); + +/** + * Callback function that is called right before the admin command reply + * is sent back to the inititator. + * + * \param req The NVMe-oF request + */ +typedef void (*spdk_nvmf_nvme_passthru_cmd_cb)(struct spdk_nvmf_request *req); + +/** + * Submits the NVMe-oF request to a bdev. + * + * This function can be used in a custom admin handler to send the command contained + * in the req to a bdev. Once the bdev completes the command, the specified cb_fn + * is called (which can be NULL if not needed). + * + * \param bdev The \ref spdk_bdev + * \param desc The \ref spdk_bdev_desc + * \param ch The \ref spdk_io_channel + * \param req The \ref spdk_nvmf_request passed to the bdev for processing + * \param cb_fn A callback function (or NULL) that is called before the request + * is completed. + * + * \return A \ref spdk_nvmf_request_exec_status + */ +int spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req, spdk_nvmf_nvme_passthru_cmd_cb cb_fn); + +/** + * Attempts to abort a request in the specified bdev + * + * \param bdev Bdev that is processing req_to_abort + * \param desc Bdev desc + * \param ch Channel on which req_to_abort was originally submitted + * \param req Abort cmd req + * \param req_to_abort The request that should be aborted + */ +int spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req, + struct spdk_nvmf_request *req_to_abort); + +/** + * Provide access to the underlying bdev that is associated with a namespace. + * + * This function can be used to communicate with the bdev. For example, + * a \ref spdk_nvmf_custom_admin_cmd_hdlr can use \ref spdk_nvmf_bdev_nvme_passthru_admin + * to pass on a \ref spdk_nvmf_request to a NVMe bdev. + * + * \param nsid The namespace id of a namespace that is valid for the + * underlying subsystem + * \param req The NVMe-oF request that is being processed + * \param bdev Returns the \ref spdk_bdev corresponding to the namespace id + * \param desc Returns the \ref spdk_bdev_desc corresponding to the namespace id + * \param ch Returns the \ref spdk_io_channel corresponding to the namespace id + * + * \return 0 upon success + * \return -EINVAL if the namespace id can't be found + */ +int spdk_nvmf_request_get_bdev(uint32_t nsid, + struct spdk_nvmf_request *req, + struct spdk_bdev **bdev, + struct spdk_bdev_desc **desc, + struct spdk_io_channel **ch); + +/** + * Get the NVMe-oF controller associated with this request. + * + * \param req The NVMe-oF request + * + * \return The NVMe-oF controller + */ +struct spdk_nvmf_ctrlr *spdk_nvmf_request_get_ctrlr(struct spdk_nvmf_request *req); + +/** + * Get the NVMe-oF subsystem associated with this request. + * + * \param req The NVMe-oF request + * + * \return The NVMe-oF subsystem + */ +struct spdk_nvmf_subsystem *spdk_nvmf_request_get_subsystem(struct spdk_nvmf_request *req); + +/** + * Get the data and length associated with this request. + * + * \param req The NVMe-oF request + * \param data The data buffer associated with this request + * \param length The length of the data buffer + */ +void spdk_nvmf_request_get_data(struct spdk_nvmf_request *req, void **data, uint32_t *length); + +/** + * Get the NVMe-oF command associated with this request. + * + * \param req The NVMe-oF request + * + * \return The NVMe command + */ +struct spdk_nvme_cmd *spdk_nvmf_request_get_cmd(struct spdk_nvmf_request *req); + +/** + * Get the NVMe-oF completion associated with this request. + * + * \param req The NVMe-oF request + * + * \return The NVMe completion + */ +struct spdk_nvme_cpl *spdk_nvmf_request_get_response(struct spdk_nvmf_request *req); + +/** + * Get the request to abort that is associated with this request. + * The req to abort is only set if the request processing a SPDK_NVME_OPC_ABORT cmd + * + * \param req The NVMe-oF abort request + * + * \return req_to_abort The NVMe-oF request that is in process of being aborted + */ +struct spdk_nvmf_request *spdk_nvmf_request_get_req_to_abort(struct spdk_nvmf_request *req); + +#endif /* SPDK_NVMF_CMD_H_ */ diff --git a/src/spdk/include/spdk/nvmf_fc_spec.h b/src/spdk/include/spdk/nvmf_fc_spec.h new file mode 100644 index 000000000..0a3234249 --- /dev/null +++ b/src/spdk/include/spdk/nvmf_fc_spec.h @@ -0,0 +1,411 @@ +/* + * BSD LICENSE + * + * Copyright (c) 2018-2019 Broadcom. All Rights Reserved. + * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __NVMF_FC_SPEC_H__ +#define __NVMF_FC_SPEC_H__ + +#include "spdk/env.h" +#include "spdk/nvme.h" + +/* + * FC-NVMe Spec. Definitions + */ + +#define FCNVME_R_CTL_CMD_REQ 0x06 +#define FCNVME_R_CTL_DATA_OUT 0x01 +#define FCNVME_R_CTL_CONFIRM 0x03 +#define FCNVME_R_CTL_STATUS 0x07 +#define FCNVME_R_CTL_ERSP_STATUS 0x08 +#define FCNVME_R_CTL_LS_REQUEST 0x32 +#define FCNVME_R_CTL_LS_RESPONSE 0x33 +#define FCNVME_R_CTL_BA_ABTS 0x81 + +#define FCNVME_F_CTL_END_SEQ 0x080000 +#define FCNVME_F_CTL_SEQ_INIT 0x010000 + +/* END_SEQ | LAST_SEQ | Exchange Responder | SEQ init */ +#define FCNVME_F_CTL_RSP 0x990000 + +#define FCNVME_TYPE_BLS 0x0 +#define FCNVME_TYPE_FC_EXCHANGE 0x08 +#define FCNVME_TYPE_NVMF_DATA 0x28 + +#define FCNVME_CMND_IU_FC_ID 0x28 +#define FCNVME_CMND_IU_SCSI_ID 0xFD + +#define FCNVME_CMND_IU_NODATA 0x00 +#define FCNVME_CMND_IU_READ 0x10 +#define FCNVME_CMND_IU_WRITE 0x01 + +/* BLS reject error codes */ +#define FCNVME_BLS_REJECT_UNABLE_TO_PERFORM 0x09 +#define FCNVME_BLS_REJECT_EXP_NOINFO 0x00 +#define FCNVME_BLS_REJECT_EXP_INVALID_OXID 0x03 + +/* + * FC NVMe Link Services (LS) constants + */ +#define FCNVME_MAX_LS_REQ_SIZE 1536 +#define FCNVME_MAX_LS_RSP_SIZE 64 + +#define FCNVME_LS_CA_CMD_MIN_LEN 592 +#define FCNVME_LS_CA_DESC_LIST_MIN_LEN 584 +#define FCNVME_LS_CA_DESC_MIN_LEN 576 + +/* this value needs to be in sync with low level driver buffer size */ +#define FCNVME_MAX_LS_BUFFER_SIZE 2048 + +#define FCNVME_GOOD_RSP_LEN 12 +#define FCNVME_ASSOC_HOSTID_LEN 16 + + +typedef uint64_t FCNVME_BE64; +typedef uint32_t FCNVME_BE32; +typedef uint16_t FCNVME_BE16; + +/* + * FC-NVME LS Commands + */ +enum { + FCNVME_LS_RSVD = 0, + FCNVME_LS_RJT = 1, + FCNVME_LS_ACC = 2, + FCNVME_LS_CREATE_ASSOCIATION = 3, + FCNVME_LS_CREATE_CONNECTION = 4, + FCNVME_LS_DISCONNECT = 5, +}; + +/* + * FC-NVME Link Service Descriptors + */ +enum { + FCNVME_LSDESC_RSVD = 0x0, + FCNVME_LSDESC_RQST = 0x1, + FCNVME_LSDESC_RJT = 0x2, + FCNVME_LSDESC_CREATE_ASSOC_CMD = 0x3, + FCNVME_LSDESC_CREATE_CONN_CMD = 0x4, + FCNVME_LSDESC_DISCONN_CMD = 0x5, + FCNVME_LSDESC_CONN_ID = 0x6, + FCNVME_LSDESC_ASSOC_ID = 0x7, +}; + +/* + * LS Reject reason_codes + */ +enum fcnvme_ls_rjt_reason { + FCNVME_RJT_RC_NONE = 0, /* no reason - not to be sent */ + FCNVME_RJT_RC_INVAL = 0x01, /* invalid NVMe_LS command code */ + FCNVME_RJT_RC_LOGIC = 0x03, /* logical error */ + FCNVME_RJT_RC_UNAB = 0x09, /* unable to perform request */ + FCNVME_RJT_RC_UNSUP = 0x0b, /* command not supported */ + FCNVME_RJT_RC_INPROG = 0x0e, /* command already in progress */ + FCNVME_RJT_RC_INV_ASSOC = 0x40, /* invalid Association ID */ + FCNVME_RJT_RC_INV_CONN = 0x41, /* invalid Connection ID */ + FCNVME_RJT_RC_INV_PARAM = 0x42, /* invalid parameters */ + FCNVME_RJT_RC_INSUFF_RES = 0x43, /* insufficient resources */ + FCNVME_RJT_RC_INV_HOST = 0x44, /* invalid or rejected host */ + FCNVME_RJT_RC_VENDOR = 0xff, /* vendor specific error */ +}; + +/* + * LS Reject reason_explanation codes + */ +enum fcnvme_ls_rjt_explan { + FCNVME_RJT_EXP_NONE = 0x00, /* No additional explanation */ + FCNVME_RJT_EXP_OXID_RXID = 0x17, /* invalid OX_ID-RX_ID combo */ + FCNVME_RJT_EXP_UNAB_DATA = 0x2a, /* unable to supply data */ + FCNVME_RJT_EXP_INV_LEN = 0x2d, /* invalid payload length */ + FCNVME_RJT_EXP_INV_ESRP = 0x40, /* invalid ESRP ratio */ + FCNVME_RJT_EXP_INV_CTL_ID = 0x41, /* invalid controller ID */ + FCNVME_RJT_EXP_INV_Q_ID = 0x42, /* invalid queue ID */ + FCNVME_RJT_EXP_SQ_SIZE = 0x43, /* invalid submission queue size */ + FCNVME_RJT_EXP_INV_HOST_ID = 0x44, /* invalid or rejected host ID */ + FCNVME_RJT_EXP_INV_HOSTNQN = 0x45, /* invalid or rejected host NQN */ + FCNVME_RJT_EXP_INV_SUBNQN = 0x46, /* invalid or rejected subsys nqn */ +}; + +/* + * NVMe over FC CMD IU + */ +struct spdk_nvmf_fc_cmnd_iu { + uint32_t scsi_id: 8, + fc_id: 8, + cmnd_iu_len: 16; + uint32_t rsvd0: 24, + flags: 8; + uint64_t conn_id; + uint32_t cmnd_seq_num; + uint32_t data_len; + struct spdk_nvme_cmd cmd; + uint32_t rsvd1[2]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_cmnd_iu) == 96, "size_mismatch"); + +/* + * NVMe over Extended Response IU + */ +struct spdk_nvmf_fc_ersp_iu { + uint32_t status_code: 8, + rsvd0: 8, + ersp_len: 16; + uint32_t response_seq_no; + uint32_t transferred_data_len; + uint32_t rsvd1; + struct spdk_nvme_cpl rsp; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ersp_iu) == 32, "size_mismatch"); + +/* + * Transfer ready IU + */ +struct spdk_nvmf_fc_xfer_rdy_iu { + uint32_t relative_offset; + uint32_t burst_len; + uint32_t rsvd; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_xfer_rdy_iu) == 12, "size_mismatch"); + +/* + * FC NVME Frame Header + */ +struct spdk_nvmf_fc_frame_hdr { + FCNVME_BE32 r_ctl: 8, + d_id: 24; + FCNVME_BE32 cs_ctl: 8, + s_id: 24; + FCNVME_BE32 type: 8, + f_ctl: 24; + FCNVME_BE32 seq_id: 8, + df_ctl: 8, + seq_cnt: 16; + FCNVME_BE32 ox_id: 16, + rx_id: 16; + FCNVME_BE32 parameter; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_frame_hdr) == 24, "size_mismatch"); + +/* + * Request payload word 0 + */ +struct spdk_nvmf_fc_ls_rqst_w0 { + uint8_t ls_cmd; /* FCNVME_LS_xxx */ + uint8_t zeros[3]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_rqst_w0) == 4, "size_mismatch"); + +/* + * LS request information descriptor + */ +struct spdk_nvmf_fc_lsdesc_rqst { + FCNVME_BE32 desc_tag; /* FCNVME_LSDESC_xxx */ + FCNVME_BE32 desc_len; + struct spdk_nvmf_fc_ls_rqst_w0 w0; + FCNVME_BE32 rsvd12; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_lsdesc_rqst) == 16, "size_mismatch"); + +/* + * LS accept header + */ +struct spdk_nvmf_fc_ls_acc_hdr { + struct spdk_nvmf_fc_ls_rqst_w0 w0; + FCNVME_BE32 desc_list_len; + struct spdk_nvmf_fc_lsdesc_rqst rqst; + /* Followed by cmd-specific ACC descriptors, see next definitions */ +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_acc_hdr) == 24, "size_mismatch"); + +/* + * LS descriptor connection id + */ +struct spdk_nvmf_fc_lsdesc_conn_id { + FCNVME_BE32 desc_tag; + FCNVME_BE32 desc_len; + FCNVME_BE64 connection_id; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_lsdesc_conn_id) == 16, "size_mismatch"); + +/* + * LS decriptor association id + */ +struct spdk_nvmf_fc_lsdesc_assoc_id { + FCNVME_BE32 desc_tag; + FCNVME_BE32 desc_len; + FCNVME_BE64 association_id; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_lsdesc_assoc_id) == 16, "size_mismatch"); + +/* + * LS Create Association descriptor + */ +struct spdk_nvmf_fc_lsdesc_cr_assoc_cmd { + FCNVME_BE32 desc_tag; + FCNVME_BE32 desc_len; + FCNVME_BE16 ersp_ratio; + FCNVME_BE16 rsvd10; + FCNVME_BE32 rsvd12[9]; + FCNVME_BE16 cntlid; + FCNVME_BE16 sqsize; + FCNVME_BE32 rsvd52; + uint8_t hostid[FCNVME_ASSOC_HOSTID_LEN]; + uint8_t hostnqn[SPDK_NVME_NQN_FIELD_SIZE]; + uint8_t subnqn[SPDK_NVME_NQN_FIELD_SIZE]; + uint8_t rsvd584[432]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_lsdesc_cr_assoc_cmd) == 1016, "size_mismatch"); + +/* + * LS Create Association reqeust payload + */ +struct spdk_nvmf_fc_ls_cr_assoc_rqst { + struct spdk_nvmf_fc_ls_rqst_w0 w0; + FCNVME_BE32 desc_list_len; + struct spdk_nvmf_fc_lsdesc_cr_assoc_cmd assoc_cmd; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_cr_assoc_rqst) == 1024, "size_mismatch"); + +/* + * LS Create Association accept payload + */ +struct spdk_nvmf_fc_ls_cr_assoc_acc { + struct spdk_nvmf_fc_ls_acc_hdr hdr; + struct spdk_nvmf_fc_lsdesc_assoc_id assoc_id; + struct spdk_nvmf_fc_lsdesc_conn_id conn_id; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_cr_assoc_acc) == 56, "size_mismatch"); + +/* + * LS Create IO Connection descriptor + */ +struct spdk_nvmf_fc_lsdesc_cr_conn_cmd { + FCNVME_BE32 desc_tag; + FCNVME_BE32 desc_len; + FCNVME_BE16 ersp_ratio; + FCNVME_BE16 rsvd10; + FCNVME_BE32 rsvd12[9]; + FCNVME_BE16 qid; + FCNVME_BE16 sqsize; + FCNVME_BE32 rsvd52; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_cr_assoc_acc) == 56, "size_mismatch"); + +/* + * LS Create IO Connection payload + */ +struct spdk_nvmf_fc_ls_cr_conn_rqst { + struct spdk_nvmf_fc_ls_rqst_w0 w0; + FCNVME_BE32 desc_list_len; + struct spdk_nvmf_fc_lsdesc_assoc_id assoc_id; + struct spdk_nvmf_fc_lsdesc_cr_conn_cmd connect_cmd; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_cr_conn_rqst) == 80, "size_mismatch"); + +/* + * LS Create IO Connection accept payload + */ +struct spdk_nvmf_fc_ls_cr_conn_acc { + struct spdk_nvmf_fc_ls_acc_hdr hdr; + struct spdk_nvmf_fc_lsdesc_conn_id conn_id; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_cr_conn_acc) == 40, "size_mismatch"); + +/* + * LS Disconnect descriptor + */ +struct spdk_nvmf_fc_lsdesc_disconn_cmd { + FCNVME_BE32 desc_tag; + FCNVME_BE32 desc_len; + FCNVME_BE32 rsvd8; + FCNVME_BE32 rsvd12; + FCNVME_BE32 rsvd16; + FCNVME_BE32 rsvd20; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_lsdesc_disconn_cmd) == 24, "size_mismatch"); + +/* + * LS Disconnect payload + */ +struct spdk_nvmf_fc_ls_disconnect_rqst { + struct spdk_nvmf_fc_ls_rqst_w0 w0; + FCNVME_BE32 desc_list_len; + struct spdk_nvmf_fc_lsdesc_assoc_id assoc_id; + struct spdk_nvmf_fc_lsdesc_disconn_cmd disconn_cmd; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_disconnect_rqst) == 48, "size_mismatch"); + +/* + * LS Disconnect accept payload + */ +struct spdk_nvmf_fc_ls_disconnect_acc { + struct spdk_nvmf_fc_ls_acc_hdr hdr; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_disconnect_acc) == 24, "size_mismatch"); + +/* + * LS Reject descriptor + */ +struct spdk_nvmf_fc_lsdesc_rjt { + FCNVME_BE32 desc_tag; + FCNVME_BE32 desc_len; + uint8_t rsvd8; + + uint8_t reason_code; + uint8_t reason_explanation; + + uint8_t vendor; + FCNVME_BE32 rsvd12; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_lsdesc_rjt) == 16, "size_mismatch"); + +/* + * LS Reject payload + */ +struct spdk_nvmf_fc_ls_rjt { + struct spdk_nvmf_fc_ls_rqst_w0 w0; + FCNVME_BE32 desc_list_len; + struct spdk_nvmf_fc_lsdesc_rqst rqst; + struct spdk_nvmf_fc_lsdesc_rjt rjt; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_ls_rjt) == 40, "size_mismatch"); + +/* + * FC World Wide Name + */ +struct spdk_nvmf_fc_wwn { + union { + uint64_t wwn; /* World Wide Names consist of eight bytes */ + uint8_t octets[sizeof(uint64_t)]; + } u; +}; + +#endif diff --git a/src/spdk/include/spdk/nvmf_spec.h b/src/spdk/include/spdk/nvmf_spec.h new file mode 100644 index 000000000..de49feef9 --- /dev/null +++ b/src/spdk/include/spdk/nvmf_spec.h @@ -0,0 +1,733 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_NVMF_SPEC_H +#define SPDK_NVMF_SPEC_H + +#include "spdk/stdinc.h" + +#include "spdk/assert.h" +#include "spdk/nvme_spec.h" + +/** + * \file + * NVMe over Fabrics specification definitions + */ + +#pragma pack(push, 1) + +struct spdk_nvmf_capsule_cmd { + uint8_t opcode; + uint8_t reserved1; + uint16_t cid; + uint8_t fctype; + uint8_t reserved2[35]; + uint8_t fabric_specific[24]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_capsule_cmd) == 64, "Incorrect size"); + +/* Fabric Command Set */ +#define SPDK_NVME_OPC_FABRIC 0x7f + +enum spdk_nvmf_fabric_cmd_types { + SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET = 0x00, + SPDK_NVMF_FABRIC_COMMAND_CONNECT = 0x01, + SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET = 0x04, + SPDK_NVMF_FABRIC_COMMAND_AUTHENTICATION_SEND = 0x05, + SPDK_NVMF_FABRIC_COMMAND_AUTHENTICATION_RECV = 0x06, + SPDK_NVMF_FABRIC_COMMAND_START_VENDOR_SPECIFIC = 0xC0, +}; + +enum spdk_nvmf_fabric_cmd_status_code { + SPDK_NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT = 0x80, + SPDK_NVMF_FABRIC_SC_CONTROLLER_BUSY = 0x81, + SPDK_NVMF_FABRIC_SC_INVALID_PARAM = 0x82, + SPDK_NVMF_FABRIC_SC_RESTART_DISCOVERY = 0x83, + SPDK_NVMF_FABRIC_SC_INVALID_HOST = 0x84, + SPDK_NVMF_FABRIC_SC_LOG_RESTART_DISCOVERY = 0x90, + SPDK_NVMF_FABRIC_SC_AUTH_REQUIRED = 0x91, +}; + +/** + * RDMA Queue Pair service types + */ +enum spdk_nvmf_rdma_qptype { + /** Reliable connected */ + SPDK_NVMF_RDMA_QPTYPE_RELIABLE_CONNECTED = 0x1, + + /** Reliable datagram */ + SPDK_NVMF_RDMA_QPTYPE_RELIABLE_DATAGRAM = 0x2, +}; + +/** + * RDMA provider types + */ +enum spdk_nvmf_rdma_prtype { + /** No provider specified */ + SPDK_NVMF_RDMA_PRTYPE_NONE = 0x1, + + /** InfiniBand */ + SPDK_NVMF_RDMA_PRTYPE_IB = 0x2, + + /** RoCE v1 */ + SPDK_NVMF_RDMA_PRTYPE_ROCE = 0x3, + + /** RoCE v2 */ + SPDK_NVMF_RDMA_PRTYPE_ROCE2 = 0x4, + + /** iWARP */ + SPDK_NVMF_RDMA_PRTYPE_IWARP = 0x5, +}; + +/** + * RDMA connection management service types + */ +enum spdk_nvmf_rdma_cms { + /** Sockets based endpoint addressing */ + SPDK_NVMF_RDMA_CMS_RDMA_CM = 0x1, +}; + +/** + * NVMe over Fabrics transport types + */ +enum spdk_nvmf_trtype { + /** RDMA */ + SPDK_NVMF_TRTYPE_RDMA = 0x1, + + /** Fibre Channel */ + SPDK_NVMF_TRTYPE_FC = 0x2, + + /** TCP */ + SPDK_NVMF_TRTYPE_TCP = 0x3, + + /** Intra-host transport (loopback) */ + SPDK_NVMF_TRTYPE_INTRA_HOST = 0xfe, +}; + +/** + * Address family types + */ +enum spdk_nvmf_adrfam { + /** IPv4 (AF_INET) */ + SPDK_NVMF_ADRFAM_IPV4 = 0x1, + + /** IPv6 (AF_INET6) */ + SPDK_NVMF_ADRFAM_IPV6 = 0x2, + + /** InfiniBand (AF_IB) */ + SPDK_NVMF_ADRFAM_IB = 0x3, + + /** Fibre Channel address family */ + SPDK_NVMF_ADRFAM_FC = 0x4, + + /** Intra-host transport (loopback) */ + SPDK_NVMF_ADRFAM_INTRA_HOST = 0xfe, +}; + +/** + * NVM subsystem types + */ +enum spdk_nvmf_subtype { + /** Discovery type for NVM subsystem */ + SPDK_NVMF_SUBTYPE_DISCOVERY = 0x1, + + /** NVMe type for NVM subsystem */ + SPDK_NVMF_SUBTYPE_NVME = 0x2, +}; + +/** + * Connections shall be made over a fabric secure channel + */ +enum spdk_nvmf_treq_secure_channel { + /** Not specified */ + SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_SPECIFIED = 0x0, + + /** Required */ + SPDK_NVMF_TREQ_SECURE_CHANNEL_REQUIRED = 0x1, + + /** Not required */ + SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED = 0x2, +}; + +struct spdk_nvmf_fabric_auth_recv_cmd { + uint8_t opcode; + uint8_t reserved1; + uint16_t cid; + uint8_t fctype; /* NVMF_FABRIC_COMMAND_AUTHENTICATION_RECV (0x06) */ + uint8_t reserved2[19]; + struct spdk_nvme_sgl_descriptor sgl1; + uint8_t reserved3; + uint8_t spsp0; + uint8_t spsp1; + uint8_t secp; + uint32_t al; + uint8_t reserved4[16]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_auth_recv_cmd) == 64, "Incorrect size"); + +struct spdk_nvmf_fabric_auth_send_cmd { + uint8_t opcode; + uint8_t reserved1; + uint16_t cid; + uint8_t fctype; /* NVMF_FABRIC_COMMAND_AUTHENTICATION_SEND (0x05) */ + uint8_t reserved2[19]; + struct spdk_nvme_sgl_descriptor sgl1; + uint8_t reserved3; + uint8_t spsp0; + uint8_t spsp1; + uint8_t secp; + uint32_t tl; + uint8_t reserved4[16]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_auth_send_cmd) == 64, "Incorrect size"); + +struct spdk_nvmf_fabric_connect_data { + uint8_t hostid[16]; + uint16_t cntlid; + uint8_t reserved5[238]; + uint8_t subnqn[SPDK_NVME_NQN_FIELD_SIZE]; + uint8_t hostnqn[SPDK_NVME_NQN_FIELD_SIZE]; + uint8_t reserved6[256]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_connect_data) == 1024, "Incorrect size"); + +struct spdk_nvmf_fabric_connect_cmd { + uint8_t opcode; + uint8_t reserved1; + uint16_t cid; + uint8_t fctype; + uint8_t reserved2[19]; + struct spdk_nvme_sgl_descriptor sgl1; + uint16_t recfmt; /* Connect Record Format */ + uint16_t qid; /* Queue Identifier */ + uint16_t sqsize; /* Submission Queue Size */ + uint8_t cattr; /* queue attributes */ + uint8_t reserved3; + uint32_t kato; /* keep alive timeout */ + uint8_t reserved4[12]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_connect_cmd) == 64, "Incorrect size"); + +struct spdk_nvmf_fabric_connect_rsp { + union { + struct { + uint16_t cntlid; + uint16_t authreq; + } success; + + struct { + uint16_t ipo; + uint8_t iattr; + uint8_t reserved; + } invalid; + + uint32_t raw; + } status_code_specific; + + uint32_t reserved0; + uint16_t sqhd; + uint16_t reserved1; + uint16_t cid; + struct spdk_nvme_status status; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_connect_rsp) == 16, "Incorrect size"); + +#define SPDK_NVMF_PROP_SIZE_4 0 +#define SPDK_NVMF_PROP_SIZE_8 1 + +struct spdk_nvmf_fabric_prop_get_cmd { + uint8_t opcode; + uint8_t reserved1; + uint16_t cid; + uint8_t fctype; + uint8_t reserved2[35]; + struct { + uint8_t size : 3; + uint8_t reserved : 5; + } attrib; + uint8_t reserved3[3]; + uint32_t ofst; + uint8_t reserved4[16]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_prop_get_cmd) == 64, "Incorrect size"); + +struct spdk_nvmf_fabric_prop_get_rsp { + union { + uint64_t u64; + struct { + uint32_t low; + uint32_t high; + } u32; + } value; + + uint16_t sqhd; + uint16_t reserved0; + uint16_t cid; + struct spdk_nvme_status status; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_prop_get_rsp) == 16, "Incorrect size"); + +struct spdk_nvmf_fabric_prop_set_cmd { + uint8_t opcode; + uint8_t reserved0; + uint16_t cid; + uint8_t fctype; + uint8_t reserved1[35]; + struct { + uint8_t size : 3; + uint8_t reserved : 5; + } attrib; + uint8_t reserved2[3]; + uint32_t ofst; + + union { + uint64_t u64; + struct { + uint32_t low; + uint32_t high; + } u32; + } value; + + uint8_t reserved4[8]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_prop_set_cmd) == 64, "Incorrect size"); + +#define SPDK_NVMF_NQN_MIN_LEN 11 /* The prefix in the spec is 11 characters */ +#define SPDK_NVMF_NQN_MAX_LEN 223 +#define SPDK_NVMF_NQN_UUID_PRE_LEN 32 +#define SPDK_NVMF_UUID_STRING_LEN 36 +#define SPDK_NVMF_NQN_UUID_PRE "nqn.2014-08.org.nvmexpress:uuid:" +#define SPDK_NVMF_DISCOVERY_NQN "nqn.2014-08.org.nvmexpress.discovery" + +#define SPDK_DOMAIN_LABEL_MAX_LEN 63 /* RFC 1034 max domain label length */ + +#define SPDK_NVMF_TRSTRING_MAX_LEN 32 +#define SPDK_NVMF_TRADDR_MAX_LEN 256 +#define SPDK_NVMF_TRSVCID_MAX_LEN 32 + +/** RDMA transport-specific address subtype */ +struct spdk_nvmf_rdma_transport_specific_address_subtype { + /** RDMA QP service type (\ref spdk_nvmf_rdma_qptype) */ + uint8_t rdma_qptype; + + /** RDMA provider type (\ref spdk_nvmf_rdma_prtype) */ + uint8_t rdma_prtype; + + /** RDMA connection management service (\ref spdk_nvmf_rdma_cms) */ + uint8_t rdma_cms; + + uint8_t reserved0[5]; + + /** RDMA partition key for AF_IB */ + uint16_t rdma_pkey; + + uint8_t reserved2[246]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_rdma_transport_specific_address_subtype) == 256, + "Incorrect size"); + +/** TCP Secure Socket Type */ +enum spdk_nvme_tcp_secure_socket_type { + /** No security */ + SPDK_NVME_TCP_SECURITY_NONE = 0, + + /** TLS (Secure Sockets) */ + SPDK_NVME_TCP_SECURITY_TLS = 1, +}; + +/** TCP transport-specific address subtype */ +struct spdk_nvme_tcp_transport_specific_address_subtype { + /** Security type (\ref spdk_nvme_tcp_secure_socket_type) */ + uint8_t sectype; + + uint8_t reserved0[255]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_transport_specific_address_subtype) == 256, + "Incorrect size"); + +/** Transport-specific address subtype */ +union spdk_nvmf_transport_specific_address_subtype { + uint8_t raw[256]; + + /** RDMA */ + struct spdk_nvmf_rdma_transport_specific_address_subtype rdma; + + /** TCP */ + struct spdk_nvme_tcp_transport_specific_address_subtype tcp; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvmf_transport_specific_address_subtype) == 256, + "Incorrect size"); + +#define SPDK_NVMF_MIN_ADMIN_MAX_SQ_SIZE 32 + +/** + * Discovery Log Page entry + */ +struct spdk_nvmf_discovery_log_page_entry { + /** Transport type (\ref spdk_nvmf_trtype) */ + uint8_t trtype; + + /** Address family (\ref spdk_nvmf_adrfam) */ + uint8_t adrfam; + + /** Subsystem type (\ref spdk_nvmf_subtype) */ + uint8_t subtype; + + /** Transport requirements */ + struct { + /** Secure channel requirements (\ref spdk_nvmf_treq_secure_channel) */ + uint8_t secure_channel : 2; + + uint8_t reserved : 6; + } treq; + + /** NVM subsystem port ID */ + uint16_t portid; + + /** Controller ID */ + uint16_t cntlid; + + /** Admin max SQ size */ + uint16_t asqsz; + + uint8_t reserved0[22]; + + /** Transport service identifier */ + uint8_t trsvcid[SPDK_NVMF_TRSVCID_MAX_LEN]; + + uint8_t reserved1[192]; + + /** NVM subsystem qualified name */ + uint8_t subnqn[256]; + + /** Transport address */ + uint8_t traddr[SPDK_NVMF_TRADDR_MAX_LEN]; + + /** Transport-specific address subtype */ + union spdk_nvmf_transport_specific_address_subtype tsas; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_discovery_log_page_entry) == 1024, "Incorrect size"); + +struct spdk_nvmf_discovery_log_page { + uint64_t genctr; + uint64_t numrec; + uint16_t recfmt; + uint8_t reserved0[1006]; + struct spdk_nvmf_discovery_log_page_entry entries[0]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_discovery_log_page) == 1024, "Incorrect size"); + +/* RDMA Fabric specific definitions below */ + +#define SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY 0xF + +struct spdk_nvmf_rdma_request_private_data { + uint16_t recfmt; /* record format */ + uint16_t qid; /* queue id */ + uint16_t hrqsize; /* host receive queue size */ + uint16_t hsqsize; /* host send queue size */ + uint16_t cntlid; /* controller id */ + uint8_t reserved[22]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_rdma_request_private_data) == 32, "Incorrect size"); + +struct spdk_nvmf_rdma_accept_private_data { + uint16_t recfmt; /* record format */ + uint16_t crqsize; /* controller receive queue size */ + uint8_t reserved[28]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_rdma_accept_private_data) == 32, "Incorrect size"); + +struct spdk_nvmf_rdma_reject_private_data { + uint16_t recfmt; /* record format */ + uint16_t sts; /* status */ +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_rdma_reject_private_data) == 4, "Incorrect size"); + +union spdk_nvmf_rdma_private_data { + struct spdk_nvmf_rdma_request_private_data pd_request; + struct spdk_nvmf_rdma_accept_private_data pd_accept; + struct spdk_nvmf_rdma_reject_private_data pd_reject; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvmf_rdma_private_data) == 32, "Incorrect size"); + +enum spdk_nvmf_rdma_transport_error { + SPDK_NVMF_RDMA_ERROR_INVALID_PRIVATE_DATA_LENGTH = 0x1, + SPDK_NVMF_RDMA_ERROR_INVALID_RECFMT = 0x2, + SPDK_NVMF_RDMA_ERROR_INVALID_QID = 0x3, + SPDK_NVMF_RDMA_ERROR_INVALID_HSQSIZE = 0x4, + SPDK_NVMF_RDMA_ERROR_INVALID_HRQSIZE = 0x5, + SPDK_NVMF_RDMA_ERROR_NO_RESOURCES = 0x6, + SPDK_NVMF_RDMA_ERROR_INVALID_IRD = 0x7, + SPDK_NVMF_RDMA_ERROR_INVALID_ORD = 0x8, +}; + +/* TCP transport specific definitions below */ + +/** NVMe/TCP PDU type */ +enum spdk_nvme_tcp_pdu_type { + /** Initialize Connection Request (ICReq) */ + SPDK_NVME_TCP_PDU_TYPE_IC_REQ = 0x00, + + /** Initialize Connection Response (ICResp) */ + SPDK_NVME_TCP_PDU_TYPE_IC_RESP = 0x01, + + /** Terminate Connection Request (TermReq) */ + SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ = 0x02, + + /** Terminate Connection Response (TermResp) */ + SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ = 0x03, + + /** Command Capsule (CapsuleCmd) */ + SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD = 0x04, + + /** Response Capsule (CapsuleRsp) */ + SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP = 0x05, + + /** Host To Controller Data (H2CData) */ + SPDK_NVME_TCP_PDU_TYPE_H2C_DATA = 0x06, + + /** Controller To Host Data (C2HData) */ + SPDK_NVME_TCP_PDU_TYPE_C2H_DATA = 0x07, + + /** Ready to Transfer (R2T) */ + SPDK_NVME_TCP_PDU_TYPE_R2T = 0x09, +}; + +/** Common NVMe/TCP PDU header */ +struct spdk_nvme_tcp_common_pdu_hdr { + /** PDU type (\ref spdk_nvme_tcp_pdu_type) */ + uint8_t pdu_type; + + /** pdu_type-specific flags */ + uint8_t flags; + + /** Length of PDU header (not including the Header Digest) */ + uint8_t hlen; + + /** PDU Data Offset from the start of the PDU */ + uint8_t pdo; + + /** Total number of bytes in PDU, including pdu_hdr */ + uint32_t plen; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_common_pdu_hdr) == 8, "Incorrect size"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type) == 0, + "Incorrect offset"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_common_pdu_hdr, flags) == 1, "Incorrect offset"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen) == 2, "Incorrect offset"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo) == 3, "Incorrect offset"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen) == 4, "Incorrect offset"); + +#define SPDK_NVME_TCP_CH_FLAGS_HDGSTF (1u << 0) +#define SPDK_NVME_TCP_CH_FLAGS_DDGSTF (1u << 1) + +/** + * ICReq + * + * common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ + */ +struct spdk_nvme_tcp_ic_req { + struct spdk_nvme_tcp_common_pdu_hdr common; + uint16_t pfv; + /** Specifies the data alignment for all PDUs transferred from the controller to the host that contain data */ + uint8_t hpda; + union { + uint8_t raw; + struct { + uint8_t hdgst_enable : 1; + uint8_t ddgst_enable : 1; + uint8_t reserved : 6; + } bits; + } dgst; + uint32_t maxr2t; + uint8_t reserved16[112]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_ic_req) == 128, "Incorrect size"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_ic_req, pfv) == 8, "Incorrect offset"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_ic_req, hpda) == 10, "Incorrect offset"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_ic_req, maxr2t) == 12, "Incorrect offset"); + +#define SPDK_NVME_TCP_CPDA_MAX 31 +#define SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET ((SPDK_NVME_TCP_CPDA_MAX + 1) << 2) + +/** + * ICResp + * + * common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP + */ +struct spdk_nvme_tcp_ic_resp { + struct spdk_nvme_tcp_common_pdu_hdr common; + uint16_t pfv; + /** Specifies the data alignment for all PDUs transferred from the host to the controller that contain data */ + uint8_t cpda; + union { + uint8_t raw; + struct { + uint8_t hdgst_enable : 1; + uint8_t ddgst_enable : 1; + uint8_t reserved : 6; + } bits; + } dgst; + /** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */ + uint32_t maxh2cdata; + uint8_t reserved16[112]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_ic_resp) == 128, "Incorrect size"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_ic_resp, pfv) == 8, "Incorrect offset"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_ic_resp, cpda) == 10, "Incorrect offset"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_ic_resp, maxh2cdata) == 12, "Incorrect offset"); + +/** + * TermReq + * + * common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_TERM_REQ + */ +struct spdk_nvme_tcp_term_req_hdr { + struct spdk_nvme_tcp_common_pdu_hdr common; + uint16_t fes; + uint8_t fei[4]; + uint8_t reserved14[10]; +}; + +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_term_req_hdr) == 24, "Incorrect size"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_term_req_hdr, fes) == 8, "Incorrect offset"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_term_req_hdr, fei) == 10, "Incorrect offset"); + +enum spdk_nvme_tcp_term_req_fes { + SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD = 0x01, + SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR = 0x02, + SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR = 0x03, + SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE = 0x04, + SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED = 0x05, + SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED = 0x05, + SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER = 0x06, +}; + +/* Total length of term req PDU (including PDU header and DATA) in bytes shall not exceed a limit of 152 bytes. */ +#define SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE 128 +#define SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE (SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE + sizeof(struct spdk_nvme_tcp_term_req_hdr)) + +/** + * CapsuleCmd + * + * common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD + */ +struct spdk_nvme_tcp_cmd { + struct spdk_nvme_tcp_common_pdu_hdr common; + struct spdk_nvme_cmd ccsqe; + /**< icdoff hdgest padding + in-capsule data + ddgst (if enabled) */ +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_cmd) == 72, "Incorrect size"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_cmd, ccsqe) == 8, "Incorrect offset"); + +/** + * CapsuleResp + * + * common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP + */ +struct spdk_nvme_tcp_rsp { + struct spdk_nvme_tcp_common_pdu_hdr common; + struct spdk_nvme_cpl rccqe; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_rsp) == 24, "incorrect size"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_rsp, rccqe) == 8, "Incorrect offset"); + + +/** + * H2CData + * + * hdr.pdu_type == SPDK_NVME_TCP_PDU_TYPE_H2C_DATA + */ +struct spdk_nvme_tcp_h2c_data_hdr { + struct spdk_nvme_tcp_common_pdu_hdr common; + uint16_t cccid; + uint16_t ttag; + uint32_t datao; + uint32_t datal; + uint8_t reserved20[4]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_h2c_data_hdr) == 24, "Incorrect size"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid) == 8, "Incorrect offset"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag) == 10, "Incorrect offset"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_h2c_data_hdr, datao) == 12, "Incorrect offset"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_h2c_data_hdr, datal) == 16, "Incorrect offset"); + +#define SPDK_NVME_TCP_H2C_DATA_FLAGS_LAST_PDU (1u << 2) +#define SPDK_NVME_TCP_H2C_DATA_FLAGS_SUCCESS (1u << 3) +#define SPDK_NVME_TCP_H2C_DATA_PDO_MULT 8u + +/** + * C2HData + * + * hdr.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA + */ +struct spdk_nvme_tcp_c2h_data_hdr { + struct spdk_nvme_tcp_common_pdu_hdr common; + uint16_t cccid; + uint8_t reserved10[2]; + uint32_t datao; + uint32_t datal; + uint8_t reserved20[4]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_c2h_data_hdr) == 24, "Incorrect size"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_c2h_data_hdr, cccid) == 8, "Incorrect offset"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datao) == 12, "Incorrect offset"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datal) == 16, "Incorrect offset"); + +#define SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS (1u << 3) +#define SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU (1u << 2) +#define SPDK_NVME_TCP_C2H_DATA_PDO_MULT 8u + +/** + * R2T + * + * common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_R2T + */ +struct spdk_nvme_tcp_r2t_hdr { + struct spdk_nvme_tcp_common_pdu_hdr common; + uint16_t cccid; + uint16_t ttag; + uint32_t r2to; + uint32_t r2tl; + uint8_t reserved20[4]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_tcp_r2t_hdr) == 24, "Incorrect size"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_r2t_hdr, cccid) == 8, "Incorrect offset"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_r2t_hdr, ttag) == 10, "Incorrect offset"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_r2t_hdr, r2to) == 12, "Incorrect offset"); +SPDK_STATIC_ASSERT(offsetof(struct spdk_nvme_tcp_r2t_hdr, r2tl) == 16, "Incorrect offset"); + +#pragma pack(pop) + +#endif /* __NVMF_SPEC_H__ */ diff --git a/src/spdk/include/spdk/nvmf_transport.h b/src/spdk/include/spdk/nvmf_transport.h new file mode 100644 index 000000000..ceb331856 --- /dev/null +++ b/src/spdk/include/spdk/nvmf_transport.h @@ -0,0 +1,495 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * NVMe-oF Target transport plugin API + */ + +#ifndef SPDK_NVMF_TRANSPORT_H_ +#define SPDK_NVMF_TRANSPORT_H_ + +#include "spdk/bdev.h" +#include "spdk/nvme_spec.h" +#include "spdk/nvmf.h" +#include "spdk/nvmf_cmd.h" +#include "spdk/nvmf_spec.h" +#include "spdk/memory.h" + +#define SPDK_NVMF_MAX_SGL_ENTRIES 16 + +/* The maximum number of buffers per request */ +#define NVMF_REQ_MAX_BUFFERS (SPDK_NVMF_MAX_SGL_ENTRIES * 2) + +/* AIO backend requires block size aligned data buffers, + * extra 4KiB aligned data buffer should work for most devices. + */ +#define NVMF_DATA_BUFFER_ALIGNMENT VALUE_4KB +#define NVMF_DATA_BUFFER_MASK (NVMF_DATA_BUFFER_ALIGNMENT - 1LL) + +union nvmf_h2c_msg { + struct spdk_nvmf_capsule_cmd nvmf_cmd; + struct spdk_nvme_cmd nvme_cmd; + struct spdk_nvmf_fabric_prop_set_cmd prop_set_cmd; + struct spdk_nvmf_fabric_prop_get_cmd prop_get_cmd; + struct spdk_nvmf_fabric_connect_cmd connect_cmd; +}; +SPDK_STATIC_ASSERT(sizeof(union nvmf_h2c_msg) == 64, "Incorrect size"); + +union nvmf_c2h_msg { + struct spdk_nvme_cpl nvme_cpl; + struct spdk_nvmf_fabric_prop_get_rsp prop_get_rsp; + struct spdk_nvmf_fabric_connect_rsp connect_rsp; +}; +SPDK_STATIC_ASSERT(sizeof(union nvmf_c2h_msg) == 16, "Incorrect size"); + +struct spdk_nvmf_dif_info { + struct spdk_dif_ctx dif_ctx; + bool dif_insert_or_strip; + uint32_t elba_length; + uint32_t orig_length; +}; + +struct spdk_nvmf_request { + struct spdk_nvmf_qpair *qpair; + uint32_t length; + enum spdk_nvme_data_transfer xfer; + void *data; + union nvmf_h2c_msg *cmd; + union nvmf_c2h_msg *rsp; + void *buffers[NVMF_REQ_MAX_BUFFERS]; + struct iovec iov[NVMF_REQ_MAX_BUFFERS]; + uint32_t iovcnt; + bool data_from_pool; + struct spdk_bdev_io_wait_entry bdev_io_wait; + struct spdk_nvmf_dif_info dif; + spdk_nvmf_nvme_passthru_cmd_cb cmd_cb_fn; + struct spdk_nvmf_request *first_fused_req; + struct spdk_nvmf_request *req_to_abort; + struct spdk_poller *poller; + uint64_t timeout_tsc; + + STAILQ_ENTRY(spdk_nvmf_request) buf_link; + TAILQ_ENTRY(spdk_nvmf_request) link; +}; + +enum spdk_nvmf_qpair_state { + SPDK_NVMF_QPAIR_UNINITIALIZED = 0, + SPDK_NVMF_QPAIR_ACTIVE, + SPDK_NVMF_QPAIR_DEACTIVATING, + SPDK_NVMF_QPAIR_ERROR, +}; + +typedef void (*spdk_nvmf_state_change_done)(void *cb_arg, int status); + +struct spdk_nvmf_qpair { + enum spdk_nvmf_qpair_state state; + spdk_nvmf_state_change_done state_cb; + void *state_cb_arg; + + struct spdk_nvmf_transport *transport; + struct spdk_nvmf_ctrlr *ctrlr; + struct spdk_nvmf_poll_group *group; + + uint16_t qid; + uint16_t sq_head; + uint16_t sq_head_max; + + struct spdk_nvmf_request *first_fused_req; + + TAILQ_HEAD(, spdk_nvmf_request) outstanding; + TAILQ_ENTRY(spdk_nvmf_qpair) link; +}; + +struct spdk_nvmf_transport_pg_cache_buf { + STAILQ_ENTRY(spdk_nvmf_transport_pg_cache_buf) link; +}; + +struct spdk_nvmf_transport_poll_group { + struct spdk_nvmf_transport *transport; + /* Requests that are waiting to obtain a data buffer */ + STAILQ_HEAD(, spdk_nvmf_request) pending_buf_queue; + STAILQ_HEAD(, spdk_nvmf_transport_pg_cache_buf) buf_cache; + uint32_t buf_cache_count; + uint32_t buf_cache_size; + struct spdk_nvmf_poll_group *group; + TAILQ_ENTRY(spdk_nvmf_transport_poll_group) link; +}; + +struct spdk_nvmf_poll_group { + struct spdk_thread *thread; + struct spdk_poller *poller; + + TAILQ_HEAD(, spdk_nvmf_transport_poll_group) tgroups; + + /* Array of poll groups indexed by subsystem id (sid) */ + struct spdk_nvmf_subsystem_poll_group *sgroups; + uint32_t num_sgroups; + + /* All of the queue pairs that belong to this poll group */ + TAILQ_HEAD(, spdk_nvmf_qpair) qpairs; + + /* Statistics */ + struct spdk_nvmf_poll_group_stat stat; + + spdk_nvmf_poll_group_destroy_done_fn destroy_cb_fn; + void *destroy_cb_arg; + + TAILQ_ENTRY(spdk_nvmf_poll_group) link; +}; + +struct spdk_nvmf_listener { + struct spdk_nvme_transport_id trid; + uint32_t ref; + + TAILQ_ENTRY(spdk_nvmf_listener) link; +}; + +/** + * A subset of struct spdk_nvme_ctrlr_data that are emulated by a fabrics device. + */ +struct spdk_nvmf_ctrlr_data { + uint16_t kas; + struct spdk_nvme_cdata_sgls sgls; + struct spdk_nvme_cdata_nvmf_specific nvmf_specific; +}; + +struct spdk_nvmf_transport { + struct spdk_nvmf_tgt *tgt; + const struct spdk_nvmf_transport_ops *ops; + struct spdk_nvmf_transport_opts opts; + + /* A mempool for transport related data transfers */ + struct spdk_mempool *data_buf_pool; + + TAILQ_HEAD(, spdk_nvmf_listener) listeners; + TAILQ_ENTRY(spdk_nvmf_transport) link; +}; + +struct spdk_nvmf_transport_ops { + /** + * Transport name + */ + char name[SPDK_NVMF_TRSTRING_MAX_LEN]; + + /** + * Transport type + */ + enum spdk_nvme_transport_type type; + + /** + * Initialize transport options to default value + */ + void (*opts_init)(struct spdk_nvmf_transport_opts *opts); + + /** + * Create a transport for the given transport opts + */ + struct spdk_nvmf_transport *(*create)(struct spdk_nvmf_transport_opts *opts); + + /** + * Destroy the transport + */ + int (*destroy)(struct spdk_nvmf_transport *transport); + + /** + * Instruct the transport to accept new connections at the address + * provided. This may be called multiple times. + */ + int (*listen)(struct spdk_nvmf_transport *transport, + const struct spdk_nvme_transport_id *trid); + + /** + * Stop accepting new connections at the given address. + */ + void (*stop_listen)(struct spdk_nvmf_transport *transport, + const struct spdk_nvme_transport_id *trid); + + /** + * A listener has been associated with a subsystem with the given NQN. + * This is only a notification. Most transports will not need to take any + * action here, as the enforcement of the association is done in the generic + * code. + * + * The association is not considered complete until cb_fn is called. New + * connections on the listener targeting this subsystem will be rejected + * until that time. + * + * Pass a negated errno code to `cb_fn` to block the association. 0 to allow. + */ + void (*listen_associate)(struct spdk_nvmf_transport *transport, + const struct spdk_nvmf_subsystem *subsystem, + const struct spdk_nvme_transport_id *trid, + spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn, + void *cb_arg); + + /** + * Check for new connections on the transport. + */ + uint32_t (*accept)(struct spdk_nvmf_transport *transport); + + /** + * Initialize subset of identify controller data. + */ + void (*cdata_init)(struct spdk_nvmf_transport *transport, struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvmf_ctrlr_data *cdata); + + /** + * Fill out a discovery log entry for a specific listen address. + */ + void (*listener_discover)(struct spdk_nvmf_transport *transport, + struct spdk_nvme_transport_id *trid, + struct spdk_nvmf_discovery_log_page_entry *entry); + + /** + * Create a new poll group + */ + struct spdk_nvmf_transport_poll_group *(*poll_group_create)(struct spdk_nvmf_transport *transport); + + /** + * Get the polling group of the queue pair optimal for the specific transport + */ + struct spdk_nvmf_transport_poll_group *(*get_optimal_poll_group)(struct spdk_nvmf_qpair *qpair); + + /** + * Destroy a poll group + */ + void (*poll_group_destroy)(struct spdk_nvmf_transport_poll_group *group); + + /** + * Add a qpair to a poll group + */ + int (*poll_group_add)(struct spdk_nvmf_transport_poll_group *group, + struct spdk_nvmf_qpair *qpair); + + /** + * Remove a qpair from a poll group + */ + int (*poll_group_remove)(struct spdk_nvmf_transport_poll_group *group, + struct spdk_nvmf_qpair *qpair); + + /** + * Poll the group to process I/O + */ + int (*poll_group_poll)(struct spdk_nvmf_transport_poll_group *group); + + /* + * Free the request without sending a response + * to the originator. Release memory tied to this request. + */ + int (*req_free)(struct spdk_nvmf_request *req); + + /* + * Signal request completion, which sends a response + * to the originator. + */ + int (*req_complete)(struct spdk_nvmf_request *req); + + /* + * Deinitialize a connection. + */ + void (*qpair_fini)(struct spdk_nvmf_qpair *qpair); + + /* + * Get the peer transport ID for the queue pair. + */ + int (*qpair_get_peer_trid)(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid); + + /* + * Get the local transport ID for the queue pair. + */ + int (*qpair_get_local_trid)(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid); + + /* + * Get the listener transport ID that accepted this qpair originally. + */ + int (*qpair_get_listen_trid)(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid); + + /* + * Abort the request which the abort request specifies. + * This function can complete synchronously or asynchronously, but + * is expected to call spdk_nvmf_request_complete() in the end + * for both cases. + */ + void (*qpair_abort_request)(struct spdk_nvmf_qpair *qpair, + struct spdk_nvmf_request *req); + + /* + * Get transport poll group statistics + */ + int (*poll_group_get_stat)(struct spdk_nvmf_tgt *tgt, + struct spdk_nvmf_transport_poll_group_stat **stat); + + /* + * Free transport poll group statistics previously allocated with poll_group_get_stat() + */ + void (*poll_group_free_stat)(struct spdk_nvmf_transport_poll_group_stat *stat); +}; + +/** + * Register the operations for a given transport type. + * + * This function should be invoked by referencing the macro + * SPDK_NVMF_TRANSPORT_REGISTER macro in the transport's .c file. + * + * \param ops The operations associated with an NVMe-oF transport. + */ +void spdk_nvmf_transport_register(const struct spdk_nvmf_transport_ops *ops); + +int spdk_nvmf_ctrlr_connect(struct spdk_nvmf_request *req); + +/** + * Function to be called for each newly discovered qpair. + * + * \param tgt The nvmf target + * \param qpair The newly discovered qpair. + */ +void spdk_nvmf_tgt_new_qpair(struct spdk_nvmf_tgt *tgt, struct spdk_nvmf_qpair *qpair); + +/** + * A subset of struct spdk_nvme_registers that are emulated by a fabrics device. + */ +struct spdk_nvmf_registers { + union spdk_nvme_cap_register cap; + union spdk_nvme_vs_register vs; + union spdk_nvme_cc_register cc; + union spdk_nvme_csts_register csts; + union spdk_nvme_aqa_register aqa; + uint64_t asq; + uint64_t acq; +}; + +const struct spdk_nvmf_registers *spdk_nvmf_ctrlr_get_regs(struct spdk_nvmf_ctrlr *ctrlr); + +void spdk_nvmf_request_free_buffers(struct spdk_nvmf_request *req, + struct spdk_nvmf_transport_poll_group *group, + struct spdk_nvmf_transport *transport); +int spdk_nvmf_request_get_buffers(struct spdk_nvmf_request *req, + struct spdk_nvmf_transport_poll_group *group, + struct spdk_nvmf_transport *transport, + uint32_t length); +int spdk_nvmf_request_get_buffers_multi(struct spdk_nvmf_request *req, + struct spdk_nvmf_transport_poll_group *group, + struct spdk_nvmf_transport *transport, + uint32_t *lengths, uint32_t num_lengths); + +bool spdk_nvmf_request_get_dif_ctx(struct spdk_nvmf_request *req, struct spdk_dif_ctx *dif_ctx); + +void spdk_nvmf_request_exec(struct spdk_nvmf_request *req); +void spdk_nvmf_request_exec_fabrics(struct spdk_nvmf_request *req); +int spdk_nvmf_request_free(struct spdk_nvmf_request *req); +int spdk_nvmf_request_complete(struct spdk_nvmf_request *req); + +/** + * Remove the given qpair from the poll group. + * + * \param qpair The qpair to remove. + */ +void spdk_nvmf_poll_group_remove(struct spdk_nvmf_qpair *qpair); + +/** + * Get the NVMe-oF subsystem associated with this controller. + * + * \param ctrlr The NVMe-oF controller + * + * \return The NVMe-oF subsystem + */ +struct spdk_nvmf_subsystem * +spdk_nvmf_ctrlr_get_subsystem(struct spdk_nvmf_ctrlr *ctrlr); + +/** + * Get the NVMe-oF controller ID. + * + * \param ctrlr The NVMe-oF controller + * + * \return The NVMe-oF controller ID + */ +uint16_t +spdk_nvmf_ctrlr_get_id(struct spdk_nvmf_ctrlr *ctrlr); + +static inline enum spdk_nvme_data_transfer +spdk_nvmf_req_get_xfer(struct spdk_nvmf_request *req) { + enum spdk_nvme_data_transfer xfer; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1; + + /* Figure out data transfer direction */ + if (cmd->opc == SPDK_NVME_OPC_FABRIC) + { + xfer = spdk_nvme_opc_get_data_transfer(req->cmd->nvmf_cmd.fctype); + } else + { + xfer = spdk_nvme_opc_get_data_transfer(cmd->opc); + } + + if (xfer == SPDK_NVME_DATA_NONE) + { + return xfer; + } + + /* Even for commands that may transfer data, they could have specified 0 length. + * We want those to show up with xfer SPDK_NVME_DATA_NONE. + */ + switch (sgl->generic.type) + { + case SPDK_NVME_SGL_TYPE_DATA_BLOCK: + case SPDK_NVME_SGL_TYPE_BIT_BUCKET: + case SPDK_NVME_SGL_TYPE_SEGMENT: + case SPDK_NVME_SGL_TYPE_LAST_SEGMENT: + case SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK: + if (sgl->unkeyed.length == 0) { + xfer = SPDK_NVME_DATA_NONE; + } + break; + case SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK: + if (sgl->keyed.length == 0) { + xfer = SPDK_NVME_DATA_NONE; + } + break; + } + + return xfer; +} + +/* + * Macro used to register new transports. + */ +#define SPDK_NVMF_TRANSPORT_REGISTER(name, transport_ops) \ +static void __attribute__((constructor)) _spdk_nvmf_transport_register_##name(void) \ +{ \ + spdk_nvmf_transport_register(transport_ops); \ +}\ + +#endif diff --git a/src/spdk/include/spdk/opal.h b/src/spdk/include/spdk/opal.h new file mode 100644 index 000000000..270fcdd3c --- /dev/null +++ b/src/spdk/include/spdk/opal.h @@ -0,0 +1,145 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_OPAL_H +#define SPDK_OPAL_H + +#include "spdk/stdinc.h" +#include "spdk/nvme.h" +#include "spdk/log.h" +#include "spdk/endian.h" +#include "spdk/string.h" +#include "spdk/opal_spec.h" + +struct spdk_opal_d0_features_info { + struct spdk_opal_d0_tper_feat tper; + struct spdk_opal_d0_locking_feat locking; + struct spdk_opal_d0_single_user_mode_feat single_user; + struct spdk_opal_d0_geo_feat geo; + struct spdk_opal_d0_datastore_feat datastore; + struct spdk_opal_d0_v100_feat v100; + struct spdk_opal_d0_v200_feat v200; +}; + +enum spdk_opal_lock_state { + OPAL_READONLY = 0x01, + OPAL_RWLOCK = 0x02, + OPAL_READWRITE = 0x04, +}; + +enum spdk_opal_user { + OPAL_ADMIN1 = 0x0, + OPAL_USER1 = 0x01, + OPAL_USER2 = 0x02, + OPAL_USER3 = 0x03, + OPAL_USER4 = 0x04, + OPAL_USER5 = 0x05, + OPAL_USER6 = 0x06, + OPAL_USER7 = 0x07, + OPAL_USER8 = 0x08, + OPAL_USER9 = 0x09, +}; + +enum spdk_opal_locking_range { + OPAL_LOCKING_RANGE_GLOBAL = 0x0, + OPAL_LOCKING_RANGE_1, + OPAL_LOCKING_RANGE_2, + OPAL_LOCKING_RANGE_3, + OPAL_LOCKING_RANGE_4, + OPAL_LOCKING_RANGE_5, + OPAL_LOCKING_RANGE_6, + OPAL_LOCKING_RANGE_7, + OPAL_LOCKING_RANGE_8, + OPAL_LOCKING_RANGE_9, + OPAL_LOCKING_RANGE_10, +}; + +struct spdk_opal_locking_range_info { + uint8_t locking_range_id; + uint8_t _padding[7]; + uint64_t range_start; + uint64_t range_length; + bool read_lock_enabled; + bool write_lock_enabled; + bool read_locked; + bool write_locked; +}; + +struct spdk_opal_dev; + +struct spdk_opal_dev *spdk_opal_dev_construct(struct spdk_nvme_ctrlr *ctrlr); +void spdk_opal_dev_destruct(struct spdk_opal_dev *dev); + +struct spdk_opal_d0_features_info *spdk_opal_get_d0_features_info(struct spdk_opal_dev *dev); + +__attribute__((__deprecated__)) bool spdk_opal_supported(struct spdk_opal_dev *dev); + +int spdk_opal_cmd_take_ownership(struct spdk_opal_dev *dev, char *new_passwd); + +/** + * synchronous function: send and then receive. + * + * Wait until response is received. + */ +int spdk_opal_cmd_revert_tper(struct spdk_opal_dev *dev, const char *passwd); + +int spdk_opal_cmd_activate_locking_sp(struct spdk_opal_dev *dev, const char *passwd); +int spdk_opal_cmd_lock_unlock(struct spdk_opal_dev *dev, enum spdk_opal_user user, + enum spdk_opal_lock_state flag, enum spdk_opal_locking_range locking_range, + const char *passwd); +int spdk_opal_cmd_setup_locking_range(struct spdk_opal_dev *dev, enum spdk_opal_user user, + enum spdk_opal_locking_range locking_range_id, uint64_t range_start, + uint64_t range_length, const char *passwd); + +int spdk_opal_cmd_get_max_ranges(struct spdk_opal_dev *dev, const char *passwd); +int spdk_opal_cmd_get_locking_range_info(struct spdk_opal_dev *dev, const char *passwd, + enum spdk_opal_user user_id, + enum spdk_opal_locking_range locking_range_id); +int spdk_opal_cmd_enable_user(struct spdk_opal_dev *dev, enum spdk_opal_user user_id, + const char *passwd); +int spdk_opal_cmd_add_user_to_locking_range(struct spdk_opal_dev *dev, enum spdk_opal_user user_id, + enum spdk_opal_locking_range locking_range_id, + enum spdk_opal_lock_state lock_flag, const char *passwd); +int spdk_opal_cmd_set_new_passwd(struct spdk_opal_dev *dev, enum spdk_opal_user user_id, + const char *new_passwd, const char *old_passwd, bool new_user); + +int spdk_opal_cmd_erase_locking_range(struct spdk_opal_dev *dev, enum spdk_opal_user user_id, + enum spdk_opal_locking_range locking_range_id, const char *password); + +int spdk_opal_cmd_secure_erase_locking_range(struct spdk_opal_dev *dev, enum spdk_opal_user user_id, + enum spdk_opal_locking_range locking_range_id, const char *password); + +struct spdk_opal_locking_range_info *spdk_opal_get_locking_range_info(struct spdk_opal_dev *dev, + enum spdk_opal_locking_range id); +void spdk_opal_free_locking_range_info(struct spdk_opal_dev *dev, enum spdk_opal_locking_range id); +#endif diff --git a/src/spdk/include/spdk/opal_spec.h b/src/spdk/include/spdk/opal_spec.h new file mode 100644 index 000000000..dae615cb0 --- /dev/null +++ b/src/spdk/include/spdk/opal_spec.h @@ -0,0 +1,379 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_OPAL_SPEC_H +#define SPDK_OPAL_SPEC_H + +#include "spdk/stdinc.h" +#include "spdk/assert.h" + +/* + * TCG Storage Architecture Core Spec v2.01 r1.00 + * 3.2.2.3 Tokens + */ +#define SPDK_TINY_ATOM_TYPE_MAX 0x7F +#define SPDK_SHORT_ATOM_TYPE_MAX 0xBF +#define SPDK_MEDIUM_ATOM_TYPE_MAX 0xDF +#define SPDK_LONG_ATOM_TYPE_MAX 0xE3 + +#define SPDK_TINY_ATOM_SIGN_FLAG 0x40 + +#define SPDK_TINY_ATOM_DATA_MASK 0x3F + +#define SPDK_SHORT_ATOM_ID 0x80 +#define SPDK_SHORT_ATOM_BYTESTRING_FLAG 0x20 +#define SPDK_SHORT_ATOM_SIGN_FLAG 0x10 +#define SPDK_SHORT_ATOM_LEN_MASK 0x0F + +#define SPDK_MEDIUM_ATOM_ID 0xC0 +#define SPDK_MEDIUM_ATOM_BYTESTRING_FLAG 0x10 + +#define SPDK_MEDIUM_ATOM_SIGN_FLAG 0x08 +#define SPDK_MEDIUM_ATOM_LEN_MASK 0x07 + +#define SPDK_LONG_ATOM_ID 0xE0 +#define SPDK_LONG_ATOM_BYTESTRING_FLAG 0x02 +#define SPDK_LONG_ATOM_SIGN_FLAG 0x01 + +/* + * TCG Storage Architecture Core Spec v2.01 r1.00 + * Table-26 ComID management + */ +#define LV0_DISCOVERY_COMID 0x01 + +/* + * TCG Storage Opal v2.01 r1.00 + * 5.2.3 Type Table Modification + */ +#define OPAL_MANUFACTURED_INACTIVE 0x08 + +#define LOCKING_RANGE_NON_GLOBAL 0x03 + +#define SPDK_OPAL_MAX_PASSWORD_SIZE 32 /* in bytes */ + +#define SPDK_OPAL_MAX_LOCKING_RANGE 8 /* maximum 8 ranges defined by spec */ + +/* + * Feature Code + */ +enum spdk_lv0_discovery_feature_code { + /* + * TCG Storage Architecture Core Spec v2.01 r1.00 + * 3.3.6 Level 0 Discovery + */ + FEATURECODE_TPER = 0x0001, + FEATURECODE_LOCKING = 0x0002, + + /* + * Opal SSC 1.00 r3.00 Final + * 3.1.1.4 Opal SSC Feature + */ + FEATURECODE_OPALV100 = 0x0200, + + /* + * TCG Storage Opal v2.01 r1.00 + * 3.1.1.4 Geometry Reporting Feature + * 3.1.1.5 Opal SSC V2.00 Feature + */ + FEATURECODE_OPALV200 = 0x0203, + FEATURECODE_GEOMETRY = 0x0003, + + /* + * TCG Storage Opal Feature Set Single User Mode v1.00 r2.00 + * 4.2.1 Single User Mode Feature Descriptor + */ + FEATURECODE_SINGLEUSER = 0x0201, + + /* + * TCG Storage Opal Feature Set Additional DataStore Tables v1.00 r1.00 + * 4.1.1 DataStore Table Feature Descriptor + */ + FEATURECODE_DATASTORE = 0x0202, +}; + +/* + * TCG Storage Architecture Core Spec v2.01 r1.00 + * 5.1.4 Abstract Type + */ +enum spdk_opal_token { + /* boolean */ + SPDK_OPAL_TRUE = 0x01, + SPDK_OPAL_FALSE = 0x00, + + /* cell_block + * 5.1.4.2.3 */ + SPDK_OPAL_TABLE = 0x00, + SPDK_OPAL_STARTROW = 0x01, + SPDK_OPAL_ENDROW = 0x02, + SPDK_OPAL_STARTCOLUMN = 0x03, + SPDK_OPAL_ENDCOLUMN = 0x04, + SPDK_OPAL_VALUES = 0x01, + + /* C_PIN table + * 5.3.2.12 */ + SPDK_OPAL_PIN = 0x03, + + /* locking table + * 5.7.2.2 */ + SPDK_OPAL_RANGESTART = 0x03, + SPDK_OPAL_RANGELENGTH = 0x04, + SPDK_OPAL_READLOCKENABLED = 0x05, + SPDK_OPAL_WRITELOCKENABLED = 0x06, + SPDK_OPAL_READLOCKED = 0x07, + SPDK_OPAL_WRITELOCKED = 0x08, + SPDK_OPAL_ACTIVEKEY = 0x0A, + + /* locking info table */ + SPDK_OPAL_MAXRANGES = 0x04, + + /* mbr control */ + SPDK_OPAL_MBRENABLE = 0x01, + SPDK_OPAL_MBRDONE = 0x02, + + /* properties */ + SPDK_OPAL_HOSTPROPERTIES = 0x00, + + /* control tokens */ + SPDK_OPAL_STARTLIST = 0xF0, + SPDK_OPAL_ENDLIST = 0xF1, + SPDK_OPAL_STARTNAME = 0xF2, + SPDK_OPAL_ENDNAME = 0xF3, + SPDK_OPAL_CALL = 0xF8, + SPDK_OPAL_ENDOFDATA = 0xF9, + SPDK_OPAL_ENDOFSESSION = 0xFA, + SPDK_OPAL_STARTTRANSACTON = 0xFB, + SPDK_OPAL_ENDTRANSACTON = 0xFC, + SPDK_OPAL_EMPTYATOM = 0xFF, + SPDK_OPAL_WHERE = 0x00, + + /* life cycle */ + SPDK_OPAL_LIFECYCLE = 0x06, + + /* Autority table */ + SPDK_OPAL_AUTH_ENABLE = 0x05, + + /* ACE table */ + SPDK_OPAL_BOOLEAN_EXPR = 0x03, +}; + +/* + * TCG Storage Architecture Core Spec v2.01 r1.00 + * Table-39 Level0 Discovery Header Format + */ +struct spdk_opal_d0_hdr { + uint32_t length; + uint32_t revision; + uint32_t reserved_0; + uint32_t reserved_1; + uint8_t vendor_specfic[32]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_d0_hdr) == 48, "Incorrect size"); + +/* + * Level 0 Discovery Feature Header + */ +struct spdk_opal_d0_feat_hdr { + uint16_t code; + uint8_t reserved : 4; + uint8_t version : 4; + uint8_t length; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_d0_feat_hdr) == 4, "Incorrect size"); + + +/* + * TCG Storage Architecture Core Spec v2.01 r1.00 + * Table-42 TPer Feature Descriptor + */ +struct __attribute__((packed)) spdk_opal_d0_tper_feat { + struct spdk_opal_d0_feat_hdr hdr; + uint8_t sync : 1; + uint8_t async : 1; + uint8_t acknack : 1; + uint8_t buffer_management : 1; + uint8_t streaming : 1; + uint8_t reserved_1 : 1; + uint8_t comid_management : 1; + uint8_t reserved_2 : 1; + + uint8_t reserved_3[3]; + uint32_t reserved_4; + uint32_t reserved_5; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_d0_tper_feat) == 16, "Incorrect size"); + +/* + * TCG Storage Architecture Core Spec v2.01 r1.00 + * Table-43 Locking Feature Descriptor + */ +struct __attribute__((packed)) spdk_opal_d0_locking_feat { + struct spdk_opal_d0_feat_hdr hdr; + uint8_t locking_supported : 1; + uint8_t locking_enabled : 1; + uint8_t locked : 1; + uint8_t media_encryption : 1; + uint8_t mbr_enabled : 1; + uint8_t mbr_done : 1; + uint8_t reserved_1 : 1; + uint8_t reserved_2 : 1; + + uint8_t reserved_3[3]; + uint32_t reserved_4; + uint32_t reserved_5; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_d0_locking_feat) == 16, "Incorrect size"); + +/* + * TCG Storage Opal Feature Set Single User Mode v1.00 r2.00 + * 4.2.1 Single User Mode Feature Descriptor + */ +struct __attribute__((packed)) spdk_opal_d0_single_user_mode_feat { + struct spdk_opal_d0_feat_hdr hdr; + uint32_t num_locking_objects; + uint8_t any : 1; + uint8_t all : 1; + uint8_t policy : 1; + uint8_t reserved_1 : 5; + + uint8_t reserved_2; + uint16_t reserved_3; + uint32_t reserved_4; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_d0_single_user_mode_feat) == 16, "Incorrect size"); + +/* + * TCG Storage Opal v2.01 r1.00 + * 3.1.1.4 Geometry Reporting Feature + */ +struct __attribute__((packed)) spdk_opal_d0_geo_feat { + struct spdk_opal_d0_feat_hdr hdr; + uint8_t align : 1; + uint8_t reserved_1 : 7; + uint8_t reserved_2[7]; + uint32_t logical_block_size; + uint64_t alignment_granularity; + uint64_t lowest_aligned_lba; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_d0_geo_feat) == 32, "Incorrect size"); + +/* + * TCG Storage Opal Feature Set Additional DataStore Tables v1.00 r1.00 + * 4.1.1 DataStore Table Feature Descriptor + */ +struct __attribute__((packed)) spdk_opal_d0_datastore_feat { + struct spdk_opal_d0_feat_hdr hdr; + uint16_t reserved_1; + uint16_t max_tables; + uint32_t max_table_size; + uint32_t alignment; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_d0_datastore_feat) == 16, "Incorrect size"); + +/* + * Opal SSC 1.00 r3.00 Final + * 3.1.1.4 Opal SSC Feature + */ +struct __attribute__((packed)) spdk_opal_d0_v100_feat { + struct spdk_opal_d0_feat_hdr hdr; + uint16_t base_comid; + uint16_t number_comids; + uint8_t range_crossing : 1; + + uint8_t reserved_1 : 7; + uint8_t reserved_2; + uint16_t reserved_3; + uint32_t reserved_4; + uint32_t reserved_5; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_d0_v100_feat) == 20, "Incorrect size"); + +/* + * TCG Storage Opal v2.01 r1.00 + * 3.1.1.4 Geometry Reporting Feature + * 3.1.1.5 Opal SSC V2.00 Feature + */ +struct __attribute__((packed)) spdk_opal_d0_v200_feat { + struct spdk_opal_d0_feat_hdr hdr; + uint16_t base_comid; + uint16_t num_comids; + uint8_t range_crossing : 1; + uint8_t reserved_1 : 7; + uint16_t num_locking_admin_auth; /* Number of Locking SP Admin Authorities Supported */ + uint16_t num_locking_user_auth; + uint8_t initial_pin; + uint8_t reverted_pin; + + uint8_t reserved_2; + uint32_t reserved_3; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_d0_v200_feat) == 20, "Incorrect size"); + +/* + * TCG Storage Architecture Core Spec v2.01 r1.00 + * 3.2.3 ComPackets, Packets & Subpackets + */ + +/* CommPacket header format + * (big-endian) + */ +struct __attribute__((packed)) spdk_opal_compacket { + uint32_t reserved; + uint8_t comid[2]; + uint8_t extended_comid[2]; + uint32_t outstanding_data; + uint32_t min_transfer; + uint32_t length; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_compacket) == 20, "Incorrect size"); + +/* packet header format */ +struct __attribute__((packed)) spdk_opal_packet { + uint32_t session_tsn; + uint32_t session_hsn; + uint32_t seq_number; + uint16_t reserved; + uint16_t ack_type; + uint32_t acknowledgment; + uint32_t length; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_packet) == 24, "Incorrect size"); + +/* data subpacket header */ +struct __attribute__((packed)) spdk_opal_data_subpacket { + uint8_t reserved[6]; + uint16_t kind; + uint32_t length; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_opal_data_subpacket) == 12, "Incorrect size"); + +#endif diff --git a/src/spdk/include/spdk/pci_ids.h b/src/spdk/include/spdk/pci_ids.h new file mode 100644 index 000000000..816eb0a84 --- /dev/null +++ b/src/spdk/include/spdk/pci_ids.h @@ -0,0 +1,139 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * PCI device ID list + */ + +#ifndef SPDK_PCI_IDS +#define SPDK_PCI_IDS + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define SPDK_PCI_ANY_ID 0xffff +#define SPDK_PCI_VID_INTEL 0x8086 +#define SPDK_PCI_VID_MEMBLAZE 0x1c5f +#define SPDK_PCI_VID_SAMSUNG 0x144d +#define SPDK_PCI_VID_VIRTUALBOX 0x80ee +#define SPDK_PCI_VID_VIRTIO 0x1af4 +#define SPDK_PCI_VID_CNEXLABS 0x1d1d +#define SPDK_PCI_VID_VMWARE 0x15ad + +#define SPDK_PCI_CLASS_ANY_ID 0xffffff +/** + * PCI class code for NVMe devices. + * + * Base class code 01h: mass storage + * Subclass code 08h: non-volatile memory + * Programming interface 02h: NVM Express + */ +#define SPDK_PCI_CLASS_NVME 0x010802 + +#define PCI_DEVICE_ID_INTEL_IDXD 0x0b25 + +#define PCI_DEVICE_ID_INTEL_IOAT_SNB0 0x3c20 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB1 0x3c21 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB2 0x3c22 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB3 0x3c23 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB4 0x3c24 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB5 0x3c25 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB6 0x3c26 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB7 0x3c27 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB8 0x3c2e +#define PCI_DEVICE_ID_INTEL_IOAT_SNB9 0x3c2f + +#define PCI_DEVICE_ID_INTEL_IOAT_IVB0 0x0e20 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB1 0x0e21 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB2 0x0e22 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB3 0x0e23 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB4 0x0e24 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB5 0x0e25 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB6 0x0e26 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB7 0x0e27 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB8 0x0e2e +#define PCI_DEVICE_ID_INTEL_IOAT_IVB9 0x0e2f + +#define PCI_DEVICE_ID_INTEL_IOAT_HSW0 0x2f20 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW1 0x2f21 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW2 0x2f22 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW3 0x2f23 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW4 0x2f24 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW5 0x2f25 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW6 0x2f26 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW7 0x2f27 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW8 0x2f2e +#define PCI_DEVICE_ID_INTEL_IOAT_HSW9 0x2f2f + +#define PCI_DEVICE_ID_INTEL_IOAT_BWD0 0x0C50 +#define PCI_DEVICE_ID_INTEL_IOAT_BWD1 0x0C51 +#define PCI_DEVICE_ID_INTEL_IOAT_BWD2 0x0C52 +#define PCI_DEVICE_ID_INTEL_IOAT_BWD3 0x0C53 + +#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE0 0x6f50 +#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE1 0x6f51 +#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE2 0x6f52 +#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE3 0x6f53 + +#define PCI_DEVICE_ID_INTEL_IOAT_BDX0 0x6f20 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX1 0x6f21 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX2 0x6f22 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX3 0x6f23 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX4 0x6f24 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX5 0x6f25 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX6 0x6f26 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX7 0x6f27 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX8 0x6f2e +#define PCI_DEVICE_ID_INTEL_IOAT_BDX9 0x6f2f + +#define PCI_DEVICE_ID_INTEL_IOAT_SKX 0x2021 + +#define PCI_DEVICE_ID_INTEL_IOAT_ICX 0x0b00 + +#define PCI_DEVICE_ID_VIRTIO_BLK_LEGACY 0x1001 +#define PCI_DEVICE_ID_VIRTIO_SCSI_LEGACY 0x1004 +#define PCI_DEVICE_ID_VIRTIO_BLK_MODERN 0x1042 +#define PCI_DEVICE_ID_VIRTIO_SCSI_MODERN 0x1048 + +#define PCI_DEVICE_ID_VIRTIO_VHOST_USER 0x1017 + +#define PCI_DEVICE_ID_INTEL_VMD 0x201d + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_PCI_IDS */ diff --git a/src/spdk/include/spdk/pipe.h b/src/spdk/include/spdk/pipe.h new file mode 100644 index 000000000..36d7eb630 --- /dev/null +++ b/src/spdk/include/spdk/pipe.h @@ -0,0 +1,149 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * A pipe that is intended for buffering data between a source, such as + * a socket, and a sink, such as a parser, or vice versa. Any time data + * is received in units that differ from the the units it is consumed + * in may benefit from using a pipe. + * + * The pipe is not thread safe. Only a single thread can act as both + * the producer (called the writer) and the consumer (called the reader). + */ + +#ifndef SPDK_PIPE_H +#define SPDK_PIPE_H + +#include "spdk/stdinc.h" + +struct spdk_pipe; + +/** + * Construct a pipe around the given memory buffer. The pipe treats the memory + * buffer as a circular ring of bytes. + * + * The available size for writing will be one less byte than provided. A single + * byte must be reserved to distinguish queue full from queue empty conditions. + * + * \param buf The data buffer that backs this pipe. + * \param sz The size of the data buffer. + * + * \return spdk_pipe. The new pipe. + */ +struct spdk_pipe *spdk_pipe_create(void *buf, uint32_t sz); + +/** + * Destroys the pipe. This does not release the buffer, but does + * make it safe for the user to release the buffer. + * + * \param pipe The pipe to operate on. + */ +void spdk_pipe_destroy(struct spdk_pipe *pipe); + +/** + * Acquire memory from the pipe for writing. + * + * This function will acquire up to sz bytes from the pipe to be used for + * writing. It may return fewer total bytes. + * + * The memory is only marked as consumed upon a call to spdk_pipe_writer_advance(). + * Multiple calls to this function without calling advance return the same region + * of memory. + * + * \param pipe The pipe to operate on. + * \param sz The size requested. + * \param iovs A two element iovec array that will be populated with the requested memory. + * + * \return The total bytes obtained. May be 0. + */ +int spdk_pipe_writer_get_buffer(struct spdk_pipe *pipe, uint32_t sz, struct iovec *iovs); + +/** + * Advance the write pointer by the given number of bytes + * + * The user can obtain memory from the pipe using spdk_pipe_writer_get_buffer(), + * but only calling this function marks it as consumed. The user is not required + * to advance the same number of bytes as was obtained from spdk_pipe_writer_get_buffer(). + * However, upon calling this function, the previous memory region is considered + * invalid and the user must call spdk_pipe_writer_get_buffer() again to obtain + * additional memory. + * + * The user cannot advance past the current read location. + * + * \param pipe The pipe to operate on. + * \param count The number of bytes to advance. + * + * \return On error, a negated errno. On success, 0. + */ +int spdk_pipe_writer_advance(struct spdk_pipe *pipe, uint32_t count); + +/** + * Get the number of bytes available to read from the pipe. + * + * \param pipe The pipe to operate on. + * + * \return The number of bytes available for reading. + */ +uint32_t spdk_pipe_reader_bytes_available(struct spdk_pipe *pipe); + +/** + * Obtain previously written memory from the pipe for reading. + * + * This call populates the two element iovec provided with a region + * of memory containing the next available data in the pipe. The size + * will be up to sz bytes, but may be less. + * + * Calling this function does not mark the memory as consumed. Calling this function + * twice without a call to spdk_pipe_reader_advance in between will return the same + * region of memory. + * + * \param pipe The pipe to operate on. + * \param sz The size requested. + * \param iovs A two element iovec array that will be populated with the requested memory. + * + * \return On error, a negated errno. On success, the total number of bytes available. + */ +int spdk_pipe_reader_get_buffer(struct spdk_pipe *pipe, uint32_t sz, struct iovec *iovs); + +/** + * Mark memory as read, making it available for writing. The user is not required + * to advance the same number of byte as was obtained by a previous call to + * spdk_pipe_reader_get_buffer(). + * + * \param pipe The pipe to operate on. + * \param count The number of bytes to advance. + * + * \return On error, a negated errno. On success, 0. + */ +int spdk_pipe_reader_advance(struct spdk_pipe *pipe, uint32_t count); + +#endif diff --git a/src/spdk/include/spdk/queue.h b/src/spdk/include/spdk/queue.h new file mode 100644 index 000000000..24e2e2e20 --- /dev/null +++ b/src/spdk/include/spdk/queue.h @@ -0,0 +1,79 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_QUEUE_H +#define SPDK_QUEUE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/cdefs.h> +#include <sys/queue.h> + +/* + * The SPDK NVMe driver was originally ported from FreeBSD, which makes + * use of features in FreeBSD's queue.h that do not exist on Linux. + * Include a header with these additional features on Linux only. + */ +#ifndef __FreeBSD__ +#include "spdk/queue_extras.h" +#endif + +/* + * scan-build can't follow double pointers in queues and often assumes + * that removed elements are still on the list. We redefine TAILQ_REMOVE + * with extra asserts to silence it. + */ +#ifdef __clang_analyzer__ +#undef TAILQ_REMOVE +#define TAILQ_REMOVE(head, elm, field) do { \ + __typeof__(elm) _elm; \ + if (((elm)->field.tqe_next) != NULL) \ + (elm)->field.tqe_next->field.tqe_prev = \ + (elm)->field.tqe_prev; \ + else \ + (head)->tqh_last = (elm)->field.tqe_prev; \ + *(elm)->field.tqe_prev = (elm)->field.tqe_next; \ + /* make sure the removed elm is not on the list anymore */ \ + TAILQ_FOREACH(_elm, head, field) { \ + assert(_elm != elm); \ + } \ +} while (0) +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/queue_extras.h b/src/spdk/include/spdk/queue_extras.h new file mode 100644 index 000000000..904625e4d --- /dev/null +++ b/src/spdk/include/spdk/queue_extras.h @@ -0,0 +1,343 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.5 (Berkeley) 8/20/94 + * $FreeBSD$ + */ + +#ifndef SPDK_QUEUE_EXTRAS_H +#define SPDK_QUEUE_EXTRAS_H + +/* + * This file defines four types of data structures: singly-linked lists, + * singly-linked tail queues, lists and tail queues. + * + * A singly-linked list is headed by a single forward pointer. The elements + * are singly linked for minimum space and pointer manipulation overhead at + * the expense of O(n) removal for arbitrary elements. New elements can be + * added to the list after an existing element or at the head of the list. + * Elements being removed from the head of the list should use the explicit + * macro for this purpose for optimum efficiency. A singly-linked list may + * only be traversed in the forward direction. Singly-linked lists are ideal + * for applications with large datasets and few or no removals or for + * implementing a LIFO queue. + * + * A singly-linked tail queue is headed by a pair of pointers, one to the + * head of the list and the other to the tail of the list. The elements are + * singly linked for minimum space and pointer manipulation overhead at the + * expense of O(n) removal for arbitrary elements. New elements can be added + * to the list after an existing element, at the head of the list, or at the + * end of the list. Elements being removed from the head of the tail queue + * should use the explicit macro for this purpose for optimum efficiency. + * A singly-linked tail queue may only be traversed in the forward direction. + * Singly-linked tail queues are ideal for applications with large datasets + * and few or no removals or for implementing a FIFO queue. + * + * A list is headed by a single forward pointer (or an array of forward + * pointers for a hash table header). The elements are doubly linked + * so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before + * or after an existing element or at the head of the list. A list + * may be traversed in either direction. + * + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or + * after an existing element, at the head of the list, or at the end of + * the list. A tail queue may be traversed in either direction. + * + * For details on the use of these macros, see the queue(3) manual page. + * + * + * SLIST LIST STAILQ TAILQ + * _HEAD + + + + + * _HEAD_INITIALIZER + + + + + * _ENTRY + + + + + * _INIT + + + + + * _EMPTY + + + + + * _FIRST + + + + + * _NEXT + + + + + * _PREV - + - + + * _LAST - - + + + * _FOREACH + + + + + * _FOREACH_FROM + + + + + * _FOREACH_SAFE + + + + + * _FOREACH_FROM_SAFE + + + + + * _FOREACH_REVERSE - - - + + * _FOREACH_REVERSE_FROM - - - + + * _FOREACH_REVERSE_SAFE - - - + + * _FOREACH_REVERSE_FROM_SAFE - - - + + * _INSERT_HEAD + + + + + * _INSERT_BEFORE - + - + + * _INSERT_AFTER + + + + + * _INSERT_TAIL - - + + + * _CONCAT - - + + + * _REMOVE_AFTER + - + - + * _REMOVE_HEAD + - + - + * _REMOVE + + + + + * _SWAP + + + + + * + */ + +#include "spdk/util.h" + +/* + * Singly-linked Tail queue declarations. + */ +#define STAILQ_HEAD(name, type) \ +struct name { \ + struct type *stqh_first;/* first element */ \ + struct type **stqh_last;/* addr of last next element */ \ +} + +#define STAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).stqh_first } + +/* + * Singly-linked Tail queue functions. + */ +#define STAILQ_EMPTY(head) ((head)->stqh_first == NULL) + +#define STAILQ_FIRST(head) ((head)->stqh_first) + +#define STAILQ_FOREACH_FROM(var, head, field) \ + for ((var) = ((var) ? (var) : STAILQ_FIRST((head))); \ + (var); \ + (var) = STAILQ_NEXT((var), field)) + +#define STAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = STAILQ_FIRST((head)); \ + (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define STAILQ_FOREACH_FROM_SAFE(var, head, field, tvar) \ + for ((var) = ((var) ? (var) : STAILQ_FIRST((head))); \ + (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define STAILQ_LAST(head, type, field) \ + (STAILQ_EMPTY((head)) ? NULL : \ + SPDK_CONTAINEROF((head)->stqh_last, struct type, field.stqe_next)) + +#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next) + +#define STAILQ_REMOVE_AFTER(head, elm, field) do { \ + if ((STAILQ_NEXT(elm, field) = \ + STAILQ_NEXT(STAILQ_NEXT(elm, field), field)) == NULL) \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ +} while (0) + +#define STAILQ_SWAP(head1, head2, type) do { \ + struct type *swap_first = STAILQ_FIRST(head1); \ + struct type **swap_last = (head1)->stqh_last; \ + STAILQ_FIRST(head1) = STAILQ_FIRST(head2); \ + (head1)->stqh_last = (head2)->stqh_last; \ + STAILQ_FIRST(head2) = swap_first; \ + (head2)->stqh_last = swap_last; \ + if (STAILQ_EMPTY(head1)) \ + (head1)->stqh_last = &STAILQ_FIRST(head1); \ + if (STAILQ_EMPTY(head2)) \ + (head2)->stqh_last = &STAILQ_FIRST(head2); \ +} while (0) + +/* + * List declarations. + */ +#define LIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define LIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define LIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +/* + * List functions. + */ + +#if (defined(_KERNEL) && defined(INVARIANTS)) +#define QMD_LIST_CHECK_HEAD(head, field) do { \ + if (LIST_FIRST((head)) != NULL && \ + LIST_FIRST((head))->field.le_prev != \ + &LIST_FIRST((head))) \ + panic("Bad list head %p first->prev != head", (head)); \ +} while (0) + +#define QMD_LIST_CHECK_NEXT(elm, field) do { \ + if (LIST_NEXT((elm), field) != NULL && \ + LIST_NEXT((elm), field)->field.le_prev != \ + &((elm)->field.le_next)) \ + panic("Bad link elm %p next->prev != elm", (elm)); \ +} while (0) + +#define QMD_LIST_CHECK_PREV(elm, field) do { \ + if (*(elm)->field.le_prev != (elm)) \ + panic("Bad link elm %p prev->next != elm", (elm)); \ +} while (0) +#else +#define QMD_LIST_CHECK_HEAD(head, field) +#define QMD_LIST_CHECK_NEXT(elm, field) +#define QMD_LIST_CHECK_PREV(elm, field) +#endif /* (_KERNEL && INVARIANTS) */ + +#define LIST_EMPTY(head) ((head)->lh_first == NULL) + +#define LIST_FIRST(head) ((head)->lh_first) + +#define LIST_FOREACH_FROM(var, head, field) \ + for ((var) = ((var) ? (var) : LIST_FIRST((head))); \ + (var); \ + (var) = LIST_NEXT((var), field)) + +#define LIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = LIST_FIRST((head)); \ + (var) && ((tvar) = LIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define LIST_FOREACH_FROM_SAFE(var, head, field, tvar) \ + for ((var) = ((var) ? (var) : LIST_FIRST((head))); \ + (var) && ((tvar) = LIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define LIST_NEXT(elm, field) ((elm)->field.le_next) + +#define LIST_PREV(elm, head, type, field) \ + ((elm)->field.le_prev == &LIST_FIRST((head)) ? NULL : \ + SPDK_CONTAINEROF((elm)->field.le_prev, struct type, field.le_next)) + +#define LIST_SWAP(head1, head2, type, field) do { \ + struct type *swap_tmp = LIST_FIRST((head1)); \ + LIST_FIRST((head1)) = LIST_FIRST((head2)); \ + LIST_FIRST((head2)) = swap_tmp; \ + if ((swap_tmp = LIST_FIRST((head1))) != NULL) \ + swap_tmp->field.le_prev = &LIST_FIRST((head1)); \ + if ((swap_tmp = LIST_FIRST((head2))) != NULL) \ + swap_tmp->field.le_prev = &LIST_FIRST((head2)); \ +} while (0) + +/* + * Tail queue functions. + */ +#if (defined(_KERNEL) && defined(INVARIANTS)) +#define QMD_TAILQ_CHECK_HEAD(head, field) do { \ + if (!TAILQ_EMPTY(head) && \ + TAILQ_FIRST((head))->field.tqe_prev != \ + &TAILQ_FIRST((head))) \ + panic("Bad tailq head %p first->prev != head", (head)); \ +} while (0) + +#define QMD_TAILQ_CHECK_TAIL(head, field) do { \ + if (*(head)->tqh_last != NULL) \ + panic("Bad tailq NEXT(%p->tqh_last) != NULL", (head)); \ +} while (0) + +#define QMD_TAILQ_CHECK_NEXT(elm, field) do { \ + if (TAILQ_NEXT((elm), field) != NULL && \ + TAILQ_NEXT((elm), field)->field.tqe_prev != \ + &((elm)->field.tqe_next)) \ + panic("Bad link elm %p next->prev != elm", (elm)); \ +} while (0) + +#define QMD_TAILQ_CHECK_PREV(elm, field) do { \ + if (*(elm)->field.tqe_prev != (elm)) \ + panic("Bad link elm %p prev->next != elm", (elm)); \ +} while (0) +#else +#define QMD_TAILQ_CHECK_HEAD(head, field) +#define QMD_TAILQ_CHECK_TAIL(head, headname) +#define QMD_TAILQ_CHECK_NEXT(elm, field) +#define QMD_TAILQ_CHECK_PREV(elm, field) +#endif /* (_KERNEL && INVARIANTS) */ + +#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL) + +#define TAILQ_FIRST(head) ((head)->tqh_first) + +#define TAILQ_FOREACH_FROM(var, head, field) \ + for ((var) = ((var) ? (var) : TAILQ_FIRST((head))); \ + (var); \ + (var) = TAILQ_NEXT((var), field)) + +#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = TAILQ_FIRST((head)); \ + (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define TAILQ_FOREACH_FROM_SAFE(var, head, field, tvar) \ + for ((var) = ((var) ? (var) : TAILQ_FIRST((head))); \ + (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define TAILQ_FOREACH_REVERSE_FROM(var, head, headname, field) \ + for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname)); \ + (var); \ + (var) = TAILQ_PREV((var), headname, field)) + +#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \ + for ((var) = TAILQ_LAST((head), headname); \ + (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \ + (var) = (tvar)) + +#define TAILQ_FOREACH_REVERSE_FROM_SAFE(var, head, headname, field, tvar) \ + for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname)); \ + (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \ + (var) = (tvar)) + +#define TAILQ_LAST(head, headname) \ + (*(((struct headname *)((head)->tqh_last))->tqh_last)) + +#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) + +#define TAILQ_PREV(elm, headname, field) \ + (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) + +#define TAILQ_SWAP(head1, head2, type, field) do { \ + struct type *swap_first = (head1)->tqh_first; \ + struct type **swap_last = (head1)->tqh_last; \ + (head1)->tqh_first = (head2)->tqh_first; \ + (head1)->tqh_last = (head2)->tqh_last; \ + (head2)->tqh_first = swap_first; \ + (head2)->tqh_last = swap_last; \ + if ((swap_first = (head1)->tqh_first) != NULL) \ + swap_first->field.tqe_prev = &(head1)->tqh_first; \ + else \ + (head1)->tqh_last = &(head1)->tqh_first; \ + if ((swap_first = (head2)->tqh_first) != NULL) \ + swap_first->field.tqe_prev = &(head2)->tqh_first; \ + else \ + (head2)->tqh_last = &(head2)->tqh_first; \ +} while (0) + +#endif diff --git a/src/spdk/include/spdk/reduce.h b/src/spdk/include/spdk/reduce.h new file mode 100644 index 000000000..f67c484fb --- /dev/null +++ b/src/spdk/include/spdk/reduce.h @@ -0,0 +1,253 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * SPDK block compression + */ + +#ifndef SPDK_REDUCE_H_ +#define SPDK_REDUCE_H_ + +#include "spdk/uuid.h" + +#define REDUCE_MAX_IOVECS 17 + +/** + * Describes the parameters of an spdk_reduce_vol. + */ +struct spdk_reduce_vol_params { + struct spdk_uuid uuid; + + /** + * Size in bytes of the IO unit for the backing device. This + * is the unit in which space is allocated from the backing + * device, and the unit in which data is read from of written + * to the backing device. Must be greater than 0. + */ + uint32_t backing_io_unit_size; + + /** + * Size in bytes of a logical block. This is the unit in + * which users read or write data to the compressed volume. + * Must be greater than 0. + */ + uint32_t logical_block_size; + + /** + * Size in bytes of a chunk on the compressed volume. This + * is the unit in which data is compressed. Must be an even + * multiple of backing_io_unit_size and logical_block_size. + * Must be greater than 0. + */ + uint32_t chunk_size; + + /** + * Total size in bytes of the compressed volume. During + * initialization, the size is calculated from the size of + * backing device size, so this must be set to 0 in the + * structure passed to spdk_reduce_vol_init(). After + * initialization, or a successful load, this field will + * contain the total size which will be an even multiple + * of the chunk size. + */ + uint64_t vol_size; +}; + +struct spdk_reduce_vol; + +typedef void (*spdk_reduce_vol_op_complete)(void *ctx, int reduce_errno); +typedef void (*spdk_reduce_vol_op_with_handle_complete)(void *ctx, + struct spdk_reduce_vol *vol, + int reduce_errno); + +/** + * Defines function type for callback functions called when backing_dev + * operations are complete. + * + * \param cb_arg Callback argument + * \param reduce_errno Completion status of backing_dev operation + * Negative values indicate negated errno value + * 0 indicates successful readv/writev/unmap operation + * Positive value indicates successful compress/decompress + * operations; number indicates number of bytes written to + * destination iovs + */ +typedef void (*spdk_reduce_dev_cpl)(void *cb_arg, int reduce_errno); + +struct spdk_reduce_vol_cb_args { + spdk_reduce_dev_cpl cb_fn; + void *cb_arg; +}; + +struct spdk_reduce_backing_dev { + void (*readv)(struct spdk_reduce_backing_dev *dev, struct iovec *iov, int iovcnt, + uint64_t lba, uint32_t lba_count, struct spdk_reduce_vol_cb_args *args); + + void (*writev)(struct spdk_reduce_backing_dev *dev, struct iovec *iov, int iovcnt, + uint64_t lba, uint32_t lba_count, struct spdk_reduce_vol_cb_args *args); + + void (*unmap)(struct spdk_reduce_backing_dev *dev, + uint64_t lba, uint32_t lba_count, struct spdk_reduce_vol_cb_args *args); + + void (*compress)(struct spdk_reduce_backing_dev *dev, + struct iovec *src_iov, int src_iovcnt, + struct iovec *dst_iov, int dst_iovcnt, + struct spdk_reduce_vol_cb_args *args); + + void (*decompress)(struct spdk_reduce_backing_dev *dev, + struct iovec *src_iov, int src_iovcnt, + struct iovec *dst_iov, int dst_iovcnt, + struct spdk_reduce_vol_cb_args *args); + + uint64_t blockcnt; + uint32_t blocklen; +}; + +/** + * Get the UUID for a libreduce compressed volume. + * + * \param vol Previously loaded or initialized compressed volume. + * \return UUID for the compressed volume. + */ +const struct spdk_uuid *spdk_reduce_vol_get_uuid(struct spdk_reduce_vol *vol); + +/** + * Initialize a new libreduce compressed volume. + * + * \param params Parameters for the new volume. + * \param backing_dev Structure describing the backing device to use for the new volume. + * \param pm_file_dir Directory to use for creation of the persistent memory file to + * use for the new volume. This function will append the UUID as + * the filename to create in this directory. + * \param cb_fn Callback function to signal completion of the initialization process. + * \param cb_arg Argument to pass to the callback function. + */ +void spdk_reduce_vol_init(struct spdk_reduce_vol_params *params, + struct spdk_reduce_backing_dev *backing_dev, + const char *pm_file_dir, + spdk_reduce_vol_op_with_handle_complete cb_fn, + void *cb_arg); + +/** + * Load an existing libreduce compressed volume. + * + * \param backing_dev Structure describing the backing device containing the compressed volume. + * \param cb_fn Callback function to signal completion of the loading process. + * \param cb_arg Argument to pass to the callback function. + */ +void spdk_reduce_vol_load(struct spdk_reduce_backing_dev *backing_dev, + spdk_reduce_vol_op_with_handle_complete cb_fn, + void *cb_arg); + +/** + * Unload a previously initialized or loaded libreduce compressed volume. + * + * \param vol Volume to unload. + * \param cb_fn Callback function to signal completion of the unload process. + * \param cb_arg Argument to pass to the callback function. + */ +void spdk_reduce_vol_unload(struct spdk_reduce_vol *vol, + spdk_reduce_vol_op_complete cb_fn, + void *cb_arg); + +/** + * Destroy an existing libreduce compressed volume. + * + * This will zero the metadata region on the backing device and delete the associated + * pm metadata file. If the backing device does not contain a compressed volume, the + * cb_fn will be called with error status without modifying the backing device nor + * deleting a pm file. + * + * \param backing_dev Structure describing the backing device containing the compressed volume. + * \param cb_fn Callback function to signal completion of the destruction process. + * \param cb_arg Argument to pass to the callback function. + */ +void spdk_reduce_vol_destroy(struct spdk_reduce_backing_dev *backing_dev, + spdk_reduce_vol_op_complete cb_fn, + void *cb_arg); + +/** + * Read data from a libreduce compressed volume. + * + * This function will only read from logical blocks on the comparessed volume that + * fall within the same chunk. + * + * \param vol Volume to read data. + * \param iov iovec array describing the data to be read + * \param iovcnt Number of elements in the iovec array + * \param offset Offset (in logical blocks) to read the data on the compressed volume + * \param length Length (in logical blocks) of the data to read + * \param cb_fn Callback function to signal completion of the readv operation. + * \param cb_arg Argument to pass to the callback function. + */ +void spdk_reduce_vol_readv(struct spdk_reduce_vol *vol, + struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length, + spdk_reduce_vol_op_complete cb_fn, void *cb_arg); + +/** + * Write data to a libreduce compressed volume. + * + * This function will only write to logical blocks on the comparessed volume that + * fall within the same chunk. + * + * \param vol Volume to write data. + * \param iov iovec array describing the data to be written + * \param iovcnt Number of elements in the iovec array + * \param offset Offset (in logical blocks) to write the data on the compressed volume + * \param length Length (in logical blocks) of the data to write + * \param cb_fn Callback function to signal completion of the writev operation. + * \param cb_arg Argument to pass to the callback function. + */ +void spdk_reduce_vol_writev(struct spdk_reduce_vol *vol, + struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length, + spdk_reduce_vol_op_complete cb_fn, void *cb_arg); + +/** + * Get the params structure for a libreduce compressed volume. + * + * This function will populate the given params structure for a given volume. + * + * \param vol Previously loaded or initialized compressed volume. + * \return params structure for the compressed volume. + */ +const struct spdk_reduce_vol_params *spdk_reduce_vol_get_params(struct spdk_reduce_vol *vol); + +/** + * Dump out key information for a libreduce compressed volume and its PMEM. + * + * This function will print key information for a given volume its PMEM. + * + * \param vol Previously loaded or initialized compressed volume. + */ +void spdk_reduce_vol_print_info(struct spdk_reduce_vol *vol); +#endif /* SPDK_REDUCE_H_ */ diff --git a/src/spdk/include/spdk/rpc.h b/src/spdk/include/spdk/rpc.h new file mode 100644 index 000000000..b85606e43 --- /dev/null +++ b/src/spdk/include/spdk/rpc.h @@ -0,0 +1,155 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_RPC_CONFIG_H_ +#define SPDK_RPC_CONFIG_H_ + +#include "spdk/stdinc.h" + +#include "spdk/jsonrpc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Verify correctness of registered RPC methods and aliases. + * + * Incorrect registrations include: + * - multiple RPC methods registered with the same name + * - RPC alias registered with a method that does not exist + * - RPC alias registered that points to another alias + * + * \return true if registrations are all correct, false otherwise + */ +bool spdk_rpc_verify_methods(void); + +/** + * Start listening for RPC connections. + * + * \param listen_addr Listening address. + * + * \return 0 on success, -1 on failure. + */ +int spdk_rpc_listen(const char *listen_addr); + +/** + * Poll the RPC server. + */ +void spdk_rpc_accept(void); + +/** + * Stop listening for RPC connections. + */ +void spdk_rpc_close(void); + +/** + * Function signature for RPC request handlers. + * + * \param request RPC request to handle. + * \param params Parameters associated with the RPC request. + */ +typedef void (*spdk_rpc_method_handler)(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params); + +/** + * Register an RPC method. + * + * \param method Name for the registered method. + * \param func Function registered for this method to handle the RPC request. + * \param state_mask State mask of the registered method. If the bit of the state of + * the RPC server is set in the state_mask, the method is allowed. Otherwise, it is rejected. + */ +void spdk_rpc_register_method(const char *method, spdk_rpc_method_handler func, + uint32_t state_mask); + +/** + * Register a deprecated alias for an RPC method. + * + * \param method Name for the registered method. + * \param alias Alias for the registered method. + */ +void spdk_rpc_register_alias_deprecated(const char *method, const char *alias); + +/** + * Check if \c method is allowed for \c state_mask + * + * \param method Method name + * \param state_mask state mask to check against + * \return 0 if method is allowed or negative error code: + * -EPERM method is not allowed + * -ENOENT method not found + */ +int spdk_rpc_is_method_allowed(const char *method, uint32_t state_mask); + +#define SPDK_RPC_STARTUP 0x1 +#define SPDK_RPC_RUNTIME 0x2 + +/* Give SPDK_RPC_REGISTER a higher execution priority than + * SPDK_RPC_REGISTER_ALIAS_DEPRECATED to ensure all of the RPCs are registered + * before we try registering any aliases. Some older versions of clang may + * otherwise execute the constructors in a different order than + * defined in the source file (see issue #892). + */ +#define SPDK_RPC_REGISTER(method, func, state_mask) \ +static void __attribute__((constructor(1000))) rpc_register_##func(void) \ +{ \ + spdk_rpc_register_method(method, func, state_mask); \ +} + +#define SPDK_RPC_REGISTER_ALIAS_DEPRECATED(method, alias) \ +static void __attribute__((constructor(1001))) rpc_register_##alias(void) \ +{ \ + spdk_rpc_register_alias_deprecated(#method, #alias); \ +} + +/** + * Set the state mask of the RPC server. Any RPC method whose state mask is + * equal to the state of the RPC server is allowed. + * + * \param state_mask New state mask of the RPC server. + */ +void spdk_rpc_set_state(uint32_t state_mask); + +/** + * Get the current state of the RPC server. + * + * \return The current state of the RPC server. + */ +uint32_t spdk_rpc_get_state(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/scsi.h b/src/spdk/include/spdk/scsi.h new file mode 100644 index 000000000..1b3f75577 --- /dev/null +++ b/src/spdk/include/spdk/scsi.h @@ -0,0 +1,571 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * SCSI to bdev translation layer + */ + +#ifndef SPDK_SCSI_H +#define SPDK_SCSI_H + +#include "spdk/stdinc.h" + +#include "spdk/bdev.h" +#include "spdk/queue.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Defines for SPDK tracing framework */ +#define OWNER_SCSI_DEV 0x10 +#define OBJECT_SCSI_TASK 0x10 +#define TRACE_GROUP_SCSI 0x2 +#define TRACE_SCSI_TASK_DONE SPDK_TPOINT_ID(TRACE_GROUP_SCSI, 0x0) +#define TRACE_SCSI_TASK_START SPDK_TPOINT_ID(TRACE_GROUP_SCSI, 0x1) + +#define SPDK_SCSI_MAX_DEVS 1024 +#define SPDK_SCSI_DEV_MAX_LUN 64 +#define SPDK_SCSI_DEV_MAX_PORTS 4 +#define SPDK_SCSI_DEV_MAX_NAME 255 + +#define SPDK_SCSI_PORT_MAX_NAME_LENGTH 255 +#define SPDK_SCSI_MAX_TRANSPORT_ID_LENGTH 255 + +enum spdk_scsi_data_dir { + SPDK_SCSI_DIR_NONE = 0, + SPDK_SCSI_DIR_TO_DEV = 1, + SPDK_SCSI_DIR_FROM_DEV = 2, +}; + +enum spdk_scsi_task_func { + SPDK_SCSI_TASK_FUNC_ABORT_TASK = 0, + SPDK_SCSI_TASK_FUNC_ABORT_TASK_SET, + SPDK_SCSI_TASK_FUNC_CLEAR_TASK_SET, + SPDK_SCSI_TASK_FUNC_LUN_RESET, +}; + +/* + * SAM does not define the value for these service responses. Each transport + * (i.e. SAS, FC, iSCSI) will map these value to transport-specific codes, + * and may add their own. + */ +enum spdk_scsi_task_mgmt_resp { + SPDK_SCSI_TASK_MGMT_RESP_COMPLETE, + SPDK_SCSI_TASK_MGMT_RESP_SUCCESS, + SPDK_SCSI_TASK_MGMT_RESP_REJECT, + SPDK_SCSI_TASK_MGMT_RESP_INVALID_LUN, + SPDK_SCSI_TASK_MGMT_RESP_TARGET_FAILURE, + SPDK_SCSI_TASK_MGMT_RESP_REJECT_FUNC_NOT_SUPPORTED +}; + +struct spdk_scsi_task; +typedef void (*spdk_scsi_task_cpl)(struct spdk_scsi_task *task); +typedef void (*spdk_scsi_task_free)(struct spdk_scsi_task *task); + +struct spdk_scsi_task { + uint8_t status; + uint8_t function; /* task mgmt function */ + uint8_t response; /* task mgmt response */ + + struct spdk_scsi_lun *lun; + struct spdk_scsi_port *target_port; + struct spdk_scsi_port *initiator_port; + + spdk_scsi_task_cpl cpl_fn; + spdk_scsi_task_free free_fn; + + uint32_t ref; + uint32_t transfer_len; + uint32_t dxfer_dir; + uint32_t length; + + /** + * Amount of data actually transferred. Can be less than requested + * transfer_len - i.e. SCSI INQUIRY. + */ + uint32_t data_transferred; + + uint64_t offset; + + uint8_t *cdb; + + /** + * \internal + * Size of internal buffer or zero when iov.iov_base is not internally managed. + */ + uint32_t alloc_len; + /** + * \internal + * iov is internal buffer. Use iovs to access elements of IO. + */ + struct iovec iov; + struct iovec *iovs; + uint16_t iovcnt; + + uint8_t sense_data[32]; + size_t sense_data_len; + + void *bdev_io; + + TAILQ_ENTRY(spdk_scsi_task) scsi_link; + + uint32_t abort_id; + struct spdk_bdev_io_wait_entry bdev_io_wait; +}; + +struct spdk_scsi_port; +struct spdk_scsi_dev; +struct spdk_scsi_lun; +struct spdk_scsi_lun_desc; + +typedef void (*spdk_scsi_lun_remove_cb_t)(struct spdk_scsi_lun *, void *); +typedef void (*spdk_scsi_dev_destruct_cb_t)(void *cb_arg, int rc); + +/** + * Initialize SCSI layer. + * + * \return 0 on success, -1 on failure. + */ +int spdk_scsi_init(void); + +/** + * Stop and clean the SCSI layer. + */ +void spdk_scsi_fini(void); + +/** + * Get the LUN id of the given logical unit. + * + * \param lun Logical unit. + * + * \return LUN id of the logical unit. + */ +int spdk_scsi_lun_get_id(const struct spdk_scsi_lun *lun); + +/** + * Get the name of the bdev associated with the given logical unit. + * + * \param lun Logical unit. + * + * \return the name of the bdev associated with the logical unit. + */ +const char *spdk_scsi_lun_get_bdev_name(const struct spdk_scsi_lun *lun); + +/** + * Get the SCSI device associated with the given logical unit. + * + * \param lun Logical unit. + * + * \return the SCSI device associated with the logical unit. + */ +const struct spdk_scsi_dev *spdk_scsi_lun_get_dev(const struct spdk_scsi_lun *lun); + +/** + * Check if the logical unit is hot removing. + * + * \param lun Logical unit + * + * \return true if removing, false otherwise. + */ +bool spdk_scsi_lun_is_removing(const struct spdk_scsi_lun *lun); + +/** + * Get the name of the given SCSI device. + * + * \param dev SCSI device. + * + * \return the name of the SCSI device on success, or NULL on failure. + */ +const char *spdk_scsi_dev_get_name(const struct spdk_scsi_dev *dev); + +/** + * Get the id of the given SCSI device. + * + * \param dev SCSI device. + * + * \return the id of the SCSI device. + */ +int spdk_scsi_dev_get_id(const struct spdk_scsi_dev *dev); + +/** + * Get the logical unit of the given SCSI device whose id is lun_id. + * + * \param dev SCSI device. + * \param lun_id Id of the logical unit. + * + * \return the logical unit on success, or NULL on failure. + */ +struct spdk_scsi_lun *spdk_scsi_dev_get_lun(struct spdk_scsi_dev *dev, int lun_id); + +/** + * Check whether the SCSI device has any pending task. + * + * \param dev SCSI device. + * \param initiator_port Check tasks only from the initiator if specified, or + * all all tasks otherwise. + * + * \return true if the SCSI device has any pending task, or false otherwise. + */ +bool spdk_scsi_dev_has_pending_tasks(const struct spdk_scsi_dev *dev, + const struct spdk_scsi_port *initiator_port); + +/** + * Destruct the SCSI decice. + * + * \param dev SCSI device. + * \param cb_fn Callback function. + * \param cb_arg Argument to callback function. + */ +void spdk_scsi_dev_destruct(struct spdk_scsi_dev *dev, + spdk_scsi_dev_destruct_cb_t cb_fn, void *cb_arg); + +/** + * Execute the SCSI management task. + * + * The task can be constructed by the function spdk_scsi_task_construct(). + * Code of task management function to be executed is set before calling this API. + * + * \param dev SCSI device. + * \param task SCSI task to be executed. + */ +void spdk_scsi_dev_queue_mgmt_task(struct spdk_scsi_dev *dev, struct spdk_scsi_task *task); + +/** + * Execute the SCSI task. + * + * The task can be constructed by the function spdk_scsi_task_construct(). + * + * \param dev SCSI device. + * \param task Task to be executed. + */ +void spdk_scsi_dev_queue_task(struct spdk_scsi_dev *dev, struct spdk_scsi_task *task); + +/** + * Add a new port to the given SCSI device. + * + * \param dev SCSI device. + * \param id Port id. + * \param name Port name. + * + * \return 0 on success, -1 on failure. + */ +int spdk_scsi_dev_add_port(struct spdk_scsi_dev *dev, uint64_t id, const char *name); + +/** + * Delete a specified port of the given SCSI device. + * + * \param dev SCSI device. + * \param id Port id. + * + * \return 0 on success, -1 on failure. + */ +int spdk_scsi_dev_delete_port(struct spdk_scsi_dev *dev, uint64_t id); + +/** + * Get the port of the given SCSI device whose port ID is id. + * + * \param dev SCSI device. + * \param id Port id. + * + * \return the port of the SCSI device on success, or NULL on failure. + */ +struct spdk_scsi_port *spdk_scsi_dev_find_port_by_id(struct spdk_scsi_dev *dev, uint64_t id); + +/** + * Allocate I/O channels for all LUNs of the given SCSI device. + * + * \param dev SCSI device. + * + * \return 0 on success, -1 on failure. + */ +int spdk_scsi_dev_allocate_io_channels(struct spdk_scsi_dev *dev); + +/** + * Free I/O channels from all LUNs of the given SCSI device. + */ +void spdk_scsi_dev_free_io_channels(struct spdk_scsi_dev *dev); + +/** + * Construct a SCSI device object using the given parameters. + * + * \param name Name for the SCSI device. + * \param bdev_name_list List of bdev names to attach to the LUNs for this SCSI + * device. + * \param lun_id_list List of LUN IDs for the LUN in this SCSI device. Caller is + * responsible for managing the memory containing this list. lun_id_list[x] is + * the LUN ID for lun_list[x]. + * \param num_luns Number of entries in lun_list and lun_id_list. + * \param protocol_id SCSI SPC protocol identifier to report in INQUIRY data + * \param hotremove_cb Callback to lun hotremoval. Will be called once hotremove + * is first triggered. + * \param hotremove_ctx Additional argument to hotremove_cb. + * + * \return the constructed spdk_scsi_dev object. + */ +struct spdk_scsi_dev *spdk_scsi_dev_construct(const char *name, + const char *bdev_name_list[], + int *lun_id_list, + int num_luns, + uint8_t protocol_id, + void (*hotremove_cb)(const struct spdk_scsi_lun *, void *), + void *hotremove_ctx); + +/** + * Delete a logical unit of the given SCSI device. + * + * \param dev SCSI device. + * \param lun Logical unit to delete. + */ +void spdk_scsi_dev_delete_lun(struct spdk_scsi_dev *dev, struct spdk_scsi_lun *lun); + +/** + * Add a new logical unit to the given SCSI device. + * + * \param dev SCSI device. + * \param bdev_name Name of the bdev attached to the logical unit. + * \param lun_id LUN id for the new logical unit. + * \param hotremove_cb Callback to lun hotremoval. Will be called once hotremove + * is first triggered. + * \param hotremove_ctx Additional argument to hotremove_cb. + */ +int spdk_scsi_dev_add_lun(struct spdk_scsi_dev *dev, const char *bdev_name, int lun_id, + void (*hotremove_cb)(const struct spdk_scsi_lun *, void *), + void *hotremove_ctx); + +/** + * Create a new SCSI port. + * + * \param id Port id. + * \param index Port index. + * \param name Port Name. + * + * \return a pointer to the created SCSI port on success, or NULL on failure. + */ +struct spdk_scsi_port *spdk_scsi_port_create(uint64_t id, uint16_t index, const char *name); + +/** + * Free the SCSI port. + * + * \param pport SCSI port to free. + */ +void spdk_scsi_port_free(struct spdk_scsi_port **pport); + +/** + * Get the name of the SCSI port. + * + * \param port SCSI port to query. + * + * \return the name of the SCSI port. + */ +const char *spdk_scsi_port_get_name(const struct spdk_scsi_port *port); + +/** + * Construct a new SCSI task. + * + * \param task SCSI task to consturct. + * \param cpl_fn Called when the task is completed. + * \param free_fn Called when the task is freed + */ +void spdk_scsi_task_construct(struct spdk_scsi_task *task, + spdk_scsi_task_cpl cpl_fn, + spdk_scsi_task_free free_fn); + +/** + * Put the SCSI task. + * + * \param task SCSI task to put. + */ +void spdk_scsi_task_put(struct spdk_scsi_task *task); + +/** + * Set internal buffer to given one. Caller is owner of that buffer. + * + * \param task SCSI task. + * \param data Pointer to buffer. + * \param len Buffer length. + */ +void spdk_scsi_task_set_data(struct spdk_scsi_task *task, void *data, uint32_t len); + +/** + * Single buffer -> vector of buffers. + * + * \param task SCSI task. + * \param src A pointer to the data buffer read from. + * \param len Length of the data buffer read from. + * + * \return the total length of the vector of buffers written into on success, or + * -1 on failure. + */ +int spdk_scsi_task_scatter_data(struct spdk_scsi_task *task, const void *src, size_t len); + +/** + * Vector of buffers -> single buffer. + * + * \param task SCSI task, + * \param len Length of the buffer allocated and written into. + * + * \return a pointer to the buffer allocated and written into. + */ +void *spdk_scsi_task_gather_data(struct spdk_scsi_task *task, int *len); + +/** + * Build sense data for the SCSI task. + * + * \param task SCSI task. + * \param sk Sense key. + * \param asc Additional sense code. + * \param ascq Additional sense code qualifier. + */ +void spdk_scsi_task_build_sense_data(struct spdk_scsi_task *task, int sk, int asc, + int ascq); + +/** + * Set SCSI status code to the SCSI task. When the status code is CHECK CONDITION, + * sense data is build too. + * + * \param task SCSI task. + * \param sc Sense code + * \param sk Sense key. + * \param asc Additional sense code. + * \param ascq Additional sense code qualifier. + */ +void spdk_scsi_task_set_status(struct spdk_scsi_task *task, int sc, int sk, int asc, + int ascq); + +/** + * Copy SCSI status. + * + * \param dst SCSI task whose status is written to. + * \param src SCSI task whose status is read from. + */ +void spdk_scsi_task_copy_status(struct spdk_scsi_task *dst, struct spdk_scsi_task *src); + +/** + * Process the SCSI task when no LUN is attached. + * + * \param task SCSI task. + */ +void spdk_scsi_task_process_null_lun(struct spdk_scsi_task *task); + +/** + * Process the aborted SCSI task. + * + * \param task SCSI task. + */ +void spdk_scsi_task_process_abort(struct spdk_scsi_task *task); + +/** + * Open a logical unit for I/O operations. + * + * The registered callback function must get all tasks from the upper layer + * (e.g. iSCSI) to the LUN done, free the IO channel of the LUN if allocated, + * and then close the LUN. + * + * \param lun Logical unit to open. + * \param hotremove_cb Callback function for hot removal of the logical unit. + * \param hotremove_ctx Param for hot removal callback function. + * \param desc Output parameter for the descriptor when operation is successful. + * \return 0 if operation is successful, suitable errno value otherwise + */ +int spdk_scsi_lun_open(struct spdk_scsi_lun *lun, spdk_scsi_lun_remove_cb_t hotremove_cb, + void *hotremove_ctx, struct spdk_scsi_lun_desc **desc); + +/** + * Close an opened logical unit. + * + * \param desc Descriptor of the logical unit. + */ +void spdk_scsi_lun_close(struct spdk_scsi_lun_desc *desc); + +/** + * Allocate I/O channel for the LUN + * + * \param desc Descriptor of the logical unit. + * + * \return 0 on success, -1 on failure. + */ +int spdk_scsi_lun_allocate_io_channel(struct spdk_scsi_lun_desc *desc); + +/** + * Free I/O channel from the logical unit + * + * \param desc Descriptor of the logical unit. + */ +void spdk_scsi_lun_free_io_channel(struct spdk_scsi_lun_desc *desc); + +/** + * Get DIF context for SCSI LUN and SCSI command. + * + * \param lun Logical unit. + * \param task SCSI task which has the payload. + * \param dif_ctx Output parameter which will contain initialized DIF context. + * + * \return true on success or false otherwise. + */ +bool spdk_scsi_lun_get_dif_ctx(struct spdk_scsi_lun *lun, struct spdk_scsi_task *task, + struct spdk_dif_ctx *dif_ctx); + +/** + * Set iSCSI Initiator port TransportID + * + * \param port SCSI initiator port. + * \param iscsi_name Initiator name. + * \param isid Session ID. + */ +void spdk_scsi_port_set_iscsi_transport_id(struct spdk_scsi_port *port, + char *iscsi_name, uint64_t isid); + +/** + * Convert LUN ID from integer to LUN format + * + * \param lun_id Integer LUN ID + * + * \return LUN format of LUN ID + */ +uint64_t spdk_scsi_lun_id_int_to_fmt(int lun_id); + +/** + * Convert LUN ID from LUN format to integer + * + * \param fmt_lun LUN format of LUN ID + * + * \return integer LUN ID + */ +int spdk_scsi_lun_id_fmt_to_int(uint64_t fmt_lun); +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_SCSI_H */ diff --git a/src/spdk/include/spdk/scsi_spec.h b/src/spdk/include/spdk/scsi_spec.h new file mode 100644 index 000000000..2711c8ea4 --- /dev/null +++ b/src/spdk/include/spdk/scsi_spec.h @@ -0,0 +1,742 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * SCSI specification definitions + */ + +#ifndef SPDK_SCSI_SPEC_H +#define SPDK_SCSI_SPEC_H + +#include "spdk/stdinc.h" + +#include "spdk/assert.h" + +enum spdk_scsi_group_code { + SPDK_SCSI_6BYTE_CMD = 0x00, + SPDK_SCSI_10BYTE_CMD = 0x20, + SPDK_SCSI_10BYTE_CMD2 = 0x40, + SPDK_SCSI_16BYTE_CMD = 0x80, + SPDK_SCSI_12BYTE_CMD = 0xa0, +}; + +#define SPDK_SCSI_GROUP_MASK 0xe0 +#define SPDK_SCSI_OPCODE_MASK 0x1f + +enum spdk_scsi_status { + SPDK_SCSI_STATUS_GOOD = 0x00, + SPDK_SCSI_STATUS_CHECK_CONDITION = 0x02, + SPDK_SCSI_STATUS_CONDITION_MET = 0x04, + SPDK_SCSI_STATUS_BUSY = 0x08, + SPDK_SCSI_STATUS_INTERMEDIATE = 0x10, + SPDK_SCSI_STATUS_INTERMEDIATE_CONDITION_MET = 0x14, + SPDK_SCSI_STATUS_RESERVATION_CONFLICT = 0x18, + SPDK_SCSI_STATUS_Obsolete = 0x22, + SPDK_SCSI_STATUS_TASK_SET_FULL = 0x28, + SPDK_SCSI_STATUS_ACA_ACTIVE = 0x30, + SPDK_SCSI_STATUS_TASK_ABORTED = 0x40, +}; + +enum spdk_scsi_sense { + SPDK_SCSI_SENSE_NO_SENSE = 0x00, + SPDK_SCSI_SENSE_RECOVERED_ERROR = 0x01, + SPDK_SCSI_SENSE_NOT_READY = 0x02, + SPDK_SCSI_SENSE_MEDIUM_ERROR = 0x03, + SPDK_SCSI_SENSE_HARDWARE_ERROR = 0x04, + SPDK_SCSI_SENSE_ILLEGAL_REQUEST = 0x05, + SPDK_SCSI_SENSE_UNIT_ATTENTION = 0x06, + SPDK_SCSI_SENSE_DATA_PROTECT = 0x07, + SPDK_SCSI_SENSE_BLANK_CHECK = 0x08, + SPDK_SCSI_SENSE_VENDOR_SPECIFIC = 0x09, + SPDK_SCSI_SENSE_COPY_ABORTED = 0x0a, + SPDK_SCSI_SENSE_ABORTED_COMMAND = 0x0b, + SPDK_SCSI_SENSE_VOLUME_OVERFLOW = 0x0d, + SPDK_SCSI_SENSE_MISCOMPARE = 0x0e, +}; + +enum spdk_scsi_asc { + SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE = 0x00, + SPDK_SCSI_ASC_PERIPHERAL_DEVICE_WRITE_FAULT = 0x03, + SPDK_SCSI_ASC_LOGICAL_UNIT_NOT_READY = 0x04, + SPDK_SCSI_ASC_WARNING = 0x0b, + SPDK_SCSI_ASC_LOGICAL_BLOCK_GUARD_CHECK_FAILED = 0x10, + SPDK_SCSI_ASC_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED = 0x10, + SPDK_SCSI_ASC_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED = 0x10, + SPDK_SCSI_ASC_UNRECOVERED_READ_ERROR = 0x11, + SPDK_SCSI_ASC_MISCOMPARE_DURING_VERIFY_OPERATION = 0x1d, + SPDK_SCSI_ASC_INVALID_COMMAND_OPERATION_CODE = 0x20, + SPDK_SCSI_ASC_ACCESS_DENIED = 0x20, + SPDK_SCSI_ASC_LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE = 0x21, + SPDK_SCSI_ASC_INVALID_FIELD_IN_CDB = 0x24, + SPDK_SCSI_ASC_LOGICAL_UNIT_NOT_SUPPORTED = 0x25, + SPDK_SCSI_ASC_WRITE_PROTECTED = 0x27, + SPDK_SCSI_ASC_FORMAT_COMMAND_FAILED = 0x31, + SPDK_SCSI_ASC_SAVING_PARAMETERS_NOT_SUPPORTED = 0x39, + SPDK_SCSI_ASC_INTERNAL_TARGET_FAILURE = 0x44, +}; + +enum spdk_scsi_ascq { + SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE = 0x00, + SPDK_SCSI_ASCQ_BECOMING_READY = 0x01, + SPDK_SCSI_ASCQ_FORMAT_COMMAND_FAILED = 0x01, + SPDK_SCSI_ASCQ_LOGICAL_BLOCK_GUARD_CHECK_FAILED = 0x01, + SPDK_SCSI_ASCQ_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED = 0x02, + SPDK_SCSI_ASCQ_NO_ACCESS_RIGHTS = 0x02, + SPDK_SCSI_ASCQ_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED = 0x03, + SPDK_SCSI_ASCQ_POWER_LOSS_EXPECTED = 0x08, + SPDK_SCSI_ASCQ_INVALID_LU_IDENTIFIER = 0x09, +}; + +enum spdk_spc_opcode { + /* SPC3 related */ + SPDK_SPC_ACCESS_CONTROL_IN = 0x86, + SPDK_SPC_ACCESS_CONTROL_OUT = 0x87, + SPDK_SPC_EXTENDED_COPY = 0x83, + SPDK_SPC_INQUIRY = 0x12, + SPDK_SPC_LOG_SELECT = 0x4c, + SPDK_SPC_LOG_SENSE = 0x4d, + SPDK_SPC_MODE_SELECT_6 = 0x15, + SPDK_SPC_MODE_SELECT_10 = 0x55, + SPDK_SPC_MODE_SENSE_6 = 0x1a, + SPDK_SPC_MODE_SENSE_10 = 0x5a, + SPDK_SPC_PERSISTENT_RESERVE_IN = 0x5e, + SPDK_SPC_PERSISTENT_RESERVE_OUT = 0x5f, + SPDK_SPC_PREVENT_ALLOW_MEDIUM_REMOVAL = 0x1e, + SPDK_SPC_READ_ATTRIBUTE = 0x8c, + SPDK_SPC_READ_BUFFER = 0x3c, + SPDK_SPC_RECEIVE_COPY_RESULTS = 0x84, + SPDK_SPC_RECEIVE_DIAGNOSTIC_RESULTS = 0x1c, + SPDK_SPC_REPORT_LUNS = 0xa0, + SPDK_SPC_REQUEST_SENSE = 0x03, + SPDK_SPC_SEND_DIAGNOSTIC = 0x1d, + SPDK_SPC_TEST_UNIT_READY = 0x00, + SPDK_SPC_WRITE_ATTRIBUTE = 0x8d, + SPDK_SPC_WRITE_BUFFER = 0x3b, + + SPDK_SPC_SERVICE_ACTION_IN_12 = 0xab, + SPDK_SPC_SERVICE_ACTION_OUT_12 = 0xa9, + SPDK_SPC_SERVICE_ACTION_IN_16 = 0x9e, + SPDK_SPC_SERVICE_ACTION_OUT_16 = 0x9f, + + SPDK_SPC_VARIABLE_LENGTH = 0x7f, + + SPDK_SPC_MO_CHANGE_ALIASES = 0x0b, + SPDK_SPC_MO_SET_DEVICE_IDENTIFIER = 0x06, + SPDK_SPC_MO_SET_PRIORITY = 0x0e, + SPDK_SPC_MO_SET_TARGET_PORT_GROUPS = 0x0a, + SPDK_SPC_MO_SET_TIMESTAMP = 0x0f, + SPDK_SPC_MI_REPORT_ALIASES = 0x0b, + SPDK_SPC_MI_REPORT_DEVICE_IDENTIFIER = 0x05, + SPDK_SPC_MI_REPORT_PRIORITY = 0x0e, + SPDK_SPC_MI_REPORT_SUPPORTED_OPERATION_CODES = 0x0c, + SPDK_SPC_MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS = 0x0d, + SPDK_SPC_MI_REPORT_TARGET_PORT_GROUPS = 0x0a, + SPDK_SPC_MI_REPORT_TIMESTAMP = 0x0f, + + /* SPC2 related (Obsolete) */ + SPDK_SPC2_RELEASE_6 = 0x17, + SPDK_SPC2_RELEASE_10 = 0x57, + SPDK_SPC2_RESERVE_6 = 0x16, + SPDK_SPC2_RESERVE_10 = 0x56, +}; + +enum spdk_scc_opcode { + SPDK_SCC_MAINTENANCE_IN = 0xa3, + SPDK_SCC_MAINTENANCE_OUT = 0xa4, +}; + +enum spdk_sbc_opcode { + SPDK_SBC_COMPARE_AND_WRITE = 0x89, + SPDK_SBC_FORMAT_UNIT = 0x04, + SPDK_SBC_GET_LBA_STATUS = 0x0012009e, + SPDK_SBC_ORWRITE_16 = 0x8b, + SPDK_SBC_PRE_FETCH_10 = 0x34, + SPDK_SBC_PRE_FETCH_16 = 0x90, + SPDK_SBC_READ_6 = 0x08, + SPDK_SBC_READ_10 = 0x28, + SPDK_SBC_READ_12 = 0xa8, + SPDK_SBC_READ_16 = 0x88, + SPDK_SBC_READ_ATTRIBUTE = 0x8c, + SPDK_SBC_READ_BUFFER = 0x3c, + SPDK_SBC_READ_CAPACITY_10 = 0x25, + SPDK_SBC_READ_DEFECT_DATA_10 = 0x37, + SPDK_SBC_READ_DEFECT_DATA_12 = 0xb7, + SPDK_SBC_READ_LONG_10 = 0x3e, + SPDK_SBC_REASSIGN_BLOCKS = 0x07, + SPDK_SBC_SANITIZE = 0x48, + SPDK_SBC_START_STOP_UNIT = 0x1b, + SPDK_SBC_SYNCHRONIZE_CACHE_10 = 0x35, + SPDK_SBC_SYNCHRONIZE_CACHE_16 = 0x91, + SPDK_SBC_UNMAP = 0x42, + SPDK_SBC_VERIFY_10 = 0x2f, + SPDK_SBC_VERIFY_12 = 0xaf, + SPDK_SBC_VERIFY_16 = 0x8f, + SPDK_SBC_WRITE_6 = 0x0a, + SPDK_SBC_WRITE_10 = 0x2a, + SPDK_SBC_WRITE_12 = 0xaa, + SPDK_SBC_WRITE_16 = 0x8a, + SPDK_SBC_WRITE_AND_VERIFY_10 = 0x2e, + SPDK_SBC_WRITE_AND_VERIFY_12 = 0xae, + SPDK_SBC_WRITE_AND_VERIFY_16 = 0x8e, + SPDK_SBC_WRITE_LONG_10 = 0x3f, + SPDK_SBC_WRITE_SAME_10 = 0x41, + SPDK_SBC_WRITE_SAME_16 = 0x93, + SPDK_SBC_XDREAD_10 = 0x52, + SPDK_SBC_XDWRITE_10 = 0x50, + SPDK_SBC_XDWRITEREAD_10 = 0x53, + SPDK_SBC_XPWRITE_10 = 0x51, + + SPDK_SBC_SAI_READ_CAPACITY_16 = 0x10, + SPDK_SBC_SAI_READ_LONG_16 = 0x11, + SPDK_SBC_SAO_WRITE_LONG_16 = 0x11, + + SPDK_SBC_VL_READ_32 = 0x0009, + SPDK_SBC_VL_VERIFY_32 = 0x000a, + SPDK_SBC_VL_WRITE_32 = 0x000b, + SPDK_SBC_VL_WRITE_AND_VERIFY_32 = 0x000c, + SPDK_SBC_VL_WRITE_SAME_32 = 0x000d, + SPDK_SBC_VL_XDREAD_32 = 0x0003, + SPDK_SBC_VL_XDWRITE_32 = 0x0004, + SPDK_SBC_VL_XDWRITEREAD_32 = 0x0007, + SPDK_SBC_VL_XPWRITE_32 = 0x0006, +}; + +#define SPDK_SBC_START_STOP_UNIT_START_BIT (1 << 0) + +enum spdk_mmc_opcode { + /* MMC6 */ + SPDK_MMC_READ_DISC_STRUCTURE = 0xad, + + /* MMC4 */ + SPDK_MMC_BLANK = 0xa1, + SPDK_MMC_CLOSE_TRACK_SESSION = 0x5b, + SPDK_MMC_ERASE_10 = 0x2c, + SPDK_MMC_FORMAT_UNIT = 0x04, + SPDK_MMC_GET_CONFIGURATION = 0x46, + SPDK_MMC_GET_EVENT_STATUS_NOTIFICATION = 0x4a, + SPDK_MMC_GET_PERFORMANCE = 0xac, + SPDK_MMC_INQUIRY = 0x12, + SPDK_MMC_LOAD_UNLOAD_MEDIUM = 0xa6, + SPDK_MMC_MECHANISM_STATUS = 0xbd, + SPDK_MMC_MODE_SELECT_10 = 0x55, + SPDK_MMC_MODE_SENSE_10 = 0x5a, + SPDK_MMC_PAUSE_RESUME = 0x4b, + SPDK_MMC_PLAY_AUDIO_10 = 0x45, + SPDK_MMC_PLAY_AUDIO_12 = 0xa5, + SPDK_MMC_PLAY_AUDIO_MSF = 0x47, + SPDK_MMC_PREVENT_ALLOW_MEDIUM_REMOVAL = 0x1e, + SPDK_MMC_READ_10 = 0x28, + SPDK_MMC_READ_12 = 0xa8, + SPDK_MMC_READ_BUFFER = 0x3c, + SPDK_MMC_READ_BUFFER_CAPACITY = 0x5c, + SPDK_MMC_READ_CAPACITY = 0x25, + SPDK_MMC_READ_CD = 0xbe, + SPDK_MMC_READ_CD_MSF = 0xb9, + SPDK_MMC_READ_DISC_INFORMATION = 0x51, + SPDK_MMC_READ_DVD_STRUCTURE = 0xad, + SPDK_MMC_READ_FORMAT_CAPACITIES = 0x23, + SPDK_MMC_READ_SUB_CHANNEL = 0x42, + SPDK_MMC_READ_TOC_PMA_ATIP = 0x43, + SPDK_MMC_READ_TRACK_INFORMATION = 0x52, + SPDK_MMC_REPAIR_TRACK = 0x58, + SPDK_MMC_REPORT_KEY = 0xa4, + SPDK_MMC_REQUEST_SENSE = 0x03, + SPDK_MMC_RESERVE_TRACK = 0x53, + SPDK_MMC_SCAN = 0xba, + SPDK_MMC_SEEK_10 = 0x2b, + SPDK_MMC_SEND_CUE_SHEET = 0x5d, + SPDK_MMC_SEND_DVD_STRUCTURE = 0xbf, + SPDK_MMC_SEND_KEY = 0xa3, + SPDK_MMC_SEND_OPC_INFORMATION = 0x54, + SPDK_MMC_SET_CD_SPEED = 0xbb, + SPDK_MMC_SET_READ_AHEAD = 0xa7, + SPDK_MMC_SET_STREAMING = 0xb6, + SPDK_MMC_START_STOP_UNIT = 0x1b, + SPDK_MMC_STOP_PLAY_SCAN = 0x4e, + SPDK_MMC_SYNCHRONIZE_CACHE = 0x35, + SPDK_MMC_TEST_UNIT_READY = 0x00, + SPDK_MMC_VERIFY_10 = 0x2f, + SPDK_MMC_WRITE_10 = 0xa2, + SPDK_MMC_WRITE_12 = 0xaa, + SPDK_MMC_WRITE_AND_VERIFY_10 = 0x2e, + SPDK_MMC_WRITE_BUFFER = 0x3b, +}; + +enum spdk_ssc_opcode { + SPDK_SSC_ERASE_6 = 0x19, + SPDK_SSC_FORMAT_MEDIUM = 0x04, + SPDK_SSC_LOAD_UNLOAD = 0x1b, + SPDK_SSC_LOCATE_10 = 0x2b, + SPDK_SSC_LOCATE_16 = 0x92, + SPDK_SSC_MOVE_MEDIUM_ATTACHED = 0xa7, + SPDK_SSC_READ_6 = 0x08, + SPDK_SSC_READ_BLOCK_LIMITS = 0x05, + SPDK_SSC_READ_ELEMENT_STATUS_ATTACHED = 0xb4, + SPDK_SSC_READ_POSITION = 0x34, + SPDK_SSC_READ_REVERSE_6 = 0x0f, + SPDK_SSC_RECOVER_BUFFERED_DATA = 0x14, + SPDK_SSC_REPORT_DENSITY_SUPPORT = 0x44, + SPDK_SSC_REWIND = 0x01, + SPDK_SSC_SET_CAPACITY = 0x0b, + SPDK_SSC_SPACE_6 = 0x11, + SPDK_SSC_SPACE_16 = 0x91, + SPDK_SSC_VERIFY_6 = 0x13, + SPDK_SSC_WRITE_6 = 0x0a, + SPDK_SSC_WRITE_FILEMARKS_6 = 0x10, +}; + +enum spdk_spc_vpd { + SPDK_SPC_VPD_DEVICE_IDENTIFICATION = 0x83, + SPDK_SPC_VPD_EXTENDED_INQUIRY_DATA = 0x86, + SPDK_SPC_VPD_MANAGEMENT_NETWORK_ADDRESSES = 0x85, + SPDK_SPC_VPD_MODE_PAGE_POLICY = 0x87, + SPDK_SPC_VPD_SCSI_PORTS = 0x88, + SPDK_SPC_VPD_SOFTWARE_INTERFACE_IDENTIFICATION = 0x84, + SPDK_SPC_VPD_SUPPORTED_VPD_PAGES = 0x00, + SPDK_SPC_VPD_UNIT_SERIAL_NUMBER = 0x80, + SPDK_SPC_VPD_BLOCK_LIMITS = 0xb0, + SPDK_SPC_VPD_BLOCK_DEV_CHARS = 0xb1, + SPDK_SPC_VPD_BLOCK_THIN_PROVISION = 0xb2, +}; + +enum spdk_spc_peripheral_qualifier { + SPDK_SPC_PERIPHERAL_QUALIFIER_CONNECTED = 0, + SPDK_SPC_PERIPHERAL_QUALIFIER_NOT_CONNECTED = 1, + SPDK_SPC_PERIPHERAL_QUALIFIER_NOT_CAPABLE = 3, +}; + +enum { + SPDK_SPC_PERIPHERAL_DEVICE_TYPE_DISK = 0x00, + SPDK_SPC_PERIPHERAL_DEVICE_TYPE_TAPE = 0x01, + SPDK_SPC_PERIPHERAL_DEVICE_TYPE_DVD = 0x05, + SPDK_SPC_PERIPHERAL_DEVICE_TYPE_CHANGER = 0x08, + + SPDK_SPC_VERSION_NONE = 0x00, + SPDK_SPC_VERSION_SPC = 0x03, + SPDK_SPC_VERSION_SPC2 = 0x04, + SPDK_SPC_VERSION_SPC3 = 0x05, + SPDK_SPC_VERSION_SPC4 = 0x06, + + SPDK_SPC_PROTOCOL_IDENTIFIER_FC = 0x00, + SPDK_SPC_PROTOCOL_IDENTIFIER_PSCSI = 0x01, + SPDK_SPC_PROTOCOL_IDENTIFIER_SSA = 0x02, + SPDK_SPC_PROTOCOL_IDENTIFIER_IEEE1394 = 0x03, + SPDK_SPC_PROTOCOL_IDENTIFIER_RDMA = 0x04, + SPDK_SPC_PROTOCOL_IDENTIFIER_ISCSI = 0x05, + SPDK_SPC_PROTOCOL_IDENTIFIER_SAS = 0x06, + SPDK_SPC_PROTOCOL_IDENTIFIER_ADT = 0x07, + SPDK_SPC_PROTOCOL_IDENTIFIER_ATA = 0x08, + + SPDK_SPC_VPD_CODE_SET_BINARY = 0x01, + SPDK_SPC_VPD_CODE_SET_ASCII = 0x02, + SPDK_SPC_VPD_CODE_SET_UTF8 = 0x03, + + SPDK_SPC_VPD_ASSOCIATION_LOGICAL_UNIT = 0x00, + SPDK_SPC_VPD_ASSOCIATION_TARGET_PORT = 0x01, + SPDK_SPC_VPD_ASSOCIATION_TARGET_DEVICE = 0x02, + + SPDK_SPC_VPD_IDENTIFIER_TYPE_VENDOR_SPECIFIC = 0x00, + SPDK_SPC_VPD_IDENTIFIER_TYPE_T10_VENDOR_ID = 0x01, + SPDK_SPC_VPD_IDENTIFIER_TYPE_EUI64 = 0x02, + SPDK_SPC_VPD_IDENTIFIER_TYPE_NAA = 0x03, + SPDK_SPC_VPD_IDENTIFIER_TYPE_RELATIVE_TARGET_PORT = 0x04, + SPDK_SPC_VPD_IDENTIFIER_TYPE_TARGET_PORT_GROUP = 0x05, + SPDK_SPC_VPD_IDENTIFIER_TYPE_LOGICAL_UNIT_GROUP = 0x06, + SPDK_SPC_VPD_IDENTIFIER_TYPE_MD5_LOGICAL_UNIT = 0x07, + SPDK_SPC_VPD_IDENTIFIER_TYPE_SCSI_NAME = 0x08, +}; + +struct spdk_scsi_cdb_inquiry { + uint8_t opcode; + uint8_t evpd; + uint8_t page_code; + uint8_t alloc_len[2]; + uint8_t control; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_cdb_inquiry) == 6, "incorrect CDB size"); + +struct spdk_scsi_cdb_inquiry_data { + uint8_t peripheral_device_type : 5; + uint8_t peripheral_qualifier : 3; + uint8_t rmb; + uint8_t version; + uint8_t response; + uint8_t add_len; + uint8_t flags; + uint8_t flags2; + uint8_t flags3; + uint8_t t10_vendor_id[8]; + uint8_t product_id[16]; + uint8_t product_rev[4]; + uint8_t vendor[20]; + uint8_t ius; + uint8_t reserved; + uint8_t desc[]; +}; + +struct spdk_scsi_vpd_page { + uint8_t peripheral_device_type : 5; + uint8_t peripheral_qualifier : 3; + uint8_t page_code; + uint8_t alloc_len[2]; + uint8_t params[]; +}; + +#define SPDK_SCSI_VEXT_REF_CHK 0x01 +#define SPDK_SCSI_VEXT_APP_CHK 0x02 +#define SPDK_SCSI_VEXT_GRD_CHK 0x04 +#define SPDK_SCSI_VEXT_SIMPSUP 0x01 +#define SPDK_SCSI_VEXT_ORDSUP 0x02 +#define SPDK_SCSI_VEXT_HEADSUP 0x04 +#define SPDK_SCSI_VEXT_PRIOR_SUP 0x08 +#define SPDK_SCSI_VEXT_GROUP_SUP 0x10 +#define SPDK_SCSI_VEXT_UASK_SUP 0x20 +#define SPDK_SCSI_VEXT_V_SUP 0x01 +#define SPDK_SCSI_VEXT_NV_SUP 0x02 +#define SPDK_SCSI_VEXT_CRD_SUP 0x04 +#define SPDK_SCSI_VEXT_WU_SUP 0x08 + +struct spdk_scsi_vpd_ext_inquiry { + uint8_t peripheral; + uint8_t page_code; + uint8_t alloc_len[2]; + uint8_t check; + uint8_t sup; + uint8_t sup2; + uint8_t luiclr; + uint8_t cbcs; + uint8_t micro_dl; + uint8_t reserved[54]; +}; + +#define SPDK_SPC_VPD_DESIG_PIV 0x80 + +/* designation descriptor */ +struct spdk_scsi_desig_desc { + uint8_t code_set : 4; + uint8_t protocol_id : 4; + uint8_t type : 4; + uint8_t association : 2; + uint8_t reserved0 : 1; + uint8_t piv : 1; + uint8_t reserved1; + uint8_t len; + uint8_t desig[]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_desig_desc) == 4, "Invalid size"); + +/* mode page policy descriptor */ +struct spdk_scsi_mpage_policy_desc { + uint8_t page_code; + uint8_t sub_page_code; + uint8_t policy; + uint8_t reserved; +}; + +/* target port descriptor */ +struct spdk_scsi_tgt_port_desc { + uint8_t code_set; + uint8_t desig_type; + uint8_t reserved; + uint8_t len; + uint8_t designator[]; +}; + +/* SCSI port designation descriptor */ +struct spdk_scsi_port_desc { + uint16_t reserved; + uint16_t rel_port_id; + uint16_t reserved2; + uint16_t init_port_len; + uint16_t init_port_id; + uint16_t reserved3; + uint16_t tgt_desc_len; + uint8_t tgt_desc[]; +}; + +/* iSCSI initiator port TransportID header */ +struct spdk_scsi_iscsi_transport_id { + uint8_t protocol_id : 4; + uint8_t reserved1 : 2; + uint8_t format : 2; + uint8_t reserved2; + uint16_t additional_len; + uint8_t name[]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_iscsi_transport_id) == 4, "Incorrect size"); + +/* SCSI UNMAP block descriptor */ +struct spdk_scsi_unmap_bdesc { + /* UNMAP LOGICAL BLOCK ADDRESS */ + uint64_t lba; + + /* NUMBER OF LOGICAL BLOCKS */ + uint32_t block_count; + + /* RESERVED */ + uint32_t reserved; +}; + +/* SCSI Persistent Reserve In action codes */ +enum spdk_scsi_pr_in_action_code { + /* Read all registered reservation keys */ + SPDK_SCSI_PR_IN_READ_KEYS = 0x00, + /* Read current persistent reservations */ + SPDK_SCSI_PR_IN_READ_RESERVATION = 0x01, + /* Return capabilities information */ + SPDK_SCSI_PR_IN_REPORT_CAPABILITIES = 0x02, + /* Read all registrations and persistent reservations */ + SPDK_SCSI_PR_IN_READ_FULL_STATUS = 0x03, + /* 0x04h - 0x1fh Reserved */ +}; + +enum spdk_scsi_pr_scope_code { + /* Persistent reservation applies to full logical unit */ + SPDK_SCSI_PR_LU_SCOPE = 0x00, +}; + +/* SCSI Persistent Reservation type codes */ +enum spdk_scsi_pr_type_code { + /* Write Exclusive */ + SPDK_SCSI_PR_WRITE_EXCLUSIVE = 0x01, + /* Exclusive Access */ + SPDK_SCSI_PR_EXCLUSIVE_ACCESS = 0x03, + /* Write Exclusive - Registrants Only */ + SPDK_SCSI_PR_WRITE_EXCLUSIVE_REGS_ONLY = 0x05, + /* Exclusive Access - Registrants Only */ + SPDK_SCSI_PR_EXCLUSIVE_ACCESS_REGS_ONLY = 0x06, + /* Write Exclusive - All Registrants */ + SPDK_SCSI_PR_WRITE_EXCLUSIVE_ALL_REGS = 0x07, + /* Exclusive Access - All Registrants */ + SPDK_SCSI_PR_EXCLUSIVE_ACCESS_ALL_REGS = 0x08, +}; + +/* SCSI Persistent Reserve In header for + * Read Keys, Read Reservation, Read Full Status + */ +struct spdk_scsi_pr_in_read_header { + /* persistent reservation generation */ + uint32_t pr_generation; + uint32_t additional_len; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_pr_in_read_header) == 8, "Incorrect size"); + +/* SCSI Persistent Reserve In read keys data */ +struct spdk_scsi_pr_in_read_keys_data { + struct spdk_scsi_pr_in_read_header header; + /* reservation key list */ + uint64_t rkeys[]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_pr_in_read_keys_data) == 8, "Incorrect size"); + +/* SCSI Persistent Reserve In read reservations data */ +struct spdk_scsi_pr_in_read_reservations_data { + /* Fixed 0x10 with reservation and 0 for no reservation */ + struct spdk_scsi_pr_in_read_header header; + /* reservation key */ + uint64_t rkey; + uint32_t obsolete1; + uint8_t reserved; + uint8_t type : 4; + uint8_t scope : 4; + uint16_t obsolete2; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_pr_in_read_reservations_data) == 24, "Incorrect size"); + +/* SCSI Persistent Reserve In report capabilities data */ +struct spdk_scsi_pr_in_report_capabilities_data { + /* Fixed value 0x8 */ + uint16_t length; + + /* Persist through power loss capable */ + uint8_t ptpl_c : 1; + uint8_t reserved1 : 1; + /* All target ports capable */ + uint8_t atp_c : 1; + /* Specify initiator port capable */ + uint8_t sip_c : 1; + /* Compatible reservation handing bit to indicate + * SPC-2 reserve/release is supported + */ + uint8_t crh : 1; + uint8_t reserved2 : 3; + /* Persist through power loss activated */ + uint8_t ptpl_a : 1; + uint8_t reserved3 : 6; + /* Type mask valid */ + uint8_t tmv : 1; + + /* Type mask format */ + uint8_t reserved4 : 1; + /* Write Exclusive */ + uint8_t wr_ex : 1; + uint8_t reserved5 : 1; + /* Exclusive Access */ + uint8_t ex_ac : 1; + uint8_t reserved6 : 1; + /* Write Exclusive - Registrants Only */ + uint8_t wr_ex_ro : 1; + /* Exclusive Access - Registrants Only */ + uint8_t ex_ac_ro : 1; + /* Write Exclusive - All Registrants */ + uint8_t wr_ex_ar : 1; + /* Exclusive Access - All Registrants */ + uint8_t ex_ac_ar : 1; + uint8_t reserved7 : 7; + + uint8_t reserved8[2]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_pr_in_report_capabilities_data) == 8, "Incorrect size"); + +/* SCSI Persistent Reserve In full status descriptor */ +struct spdk_scsi_pr_in_full_status_desc { + /* Reservation key */ + uint64_t rkey; + uint8_t reserved1[4]; + + /* 0 - Registrant only + * 1 - Registrant and reservation holder + */ + uint8_t r_holder : 1; + /* All target ports */ + uint8_t all_tg_pt : 1; + uint8_t reserved2 : 6; + + /* Reservation type */ + uint8_t type : 4; + /* Set to LU_SCOPE */ + uint8_t scope : 4; + + uint8_t reserved3[4]; + uint16_t relative_target_port_id; + /* Size of TransportID */ + uint32_t desc_len; + + uint8_t transport_id[]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_pr_in_full_status_desc) == 24, "Incorrect size"); + +/* SCSI Persistent Reserve In full status data */ +struct spdk_scsi_pr_in_full_status_data { + struct spdk_scsi_pr_in_read_header header; + /* Full status descriptors */ + struct spdk_scsi_pr_in_full_status_desc desc_list[]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_pr_in_full_status_data) == 8, "Incorrect size"); + +/* SCSI Persistent Reserve Out service action codes */ +enum spdk_scsi_pr_out_service_action_code { + /* Register/unregister a reservation key */ + SPDK_SCSI_PR_OUT_REGISTER = 0x00, + /* Create a persistent reservation */ + SPDK_SCSI_PR_OUT_RESERVE = 0x01, + /* Release a persistent reservation */ + SPDK_SCSI_PR_OUT_RELEASE = 0x02, + /* Clear all reservation keys and persistent reservations */ + SPDK_SCSI_PR_OUT_CLEAR = 0x03, + /* Preempt persistent reservations and/or remove registrants */ + SPDK_SCSI_PR_OUT_PREEMPT = 0x04, + /* Preempt persistent reservations and or remove registrants + * and abort all tasks for all preempted I_T nexuses + */ + SPDK_SCSI_PR_OUT_PREEMPT_AND_ABORT = 0x05, + /* Register/unregister a reservation key based on the ignore bit */ + SPDK_SCSI_PR_OUT_REG_AND_IGNORE_KEY = 0x06, + /* Register a reservation key for another I_T nexus + * and move a persistent reservation to that I_T nexus + */ + SPDK_SCSI_PR_OUT_REG_AND_MOVE = 0x07, + /* 0x08 - 0x1f Reserved */ +}; + +/* SCSI Persistent Reserve Out parameter list */ +struct spdk_scsi_pr_out_param_list { + /* Reservation key */ + uint64_t rkey; + /* Service action reservation key */ + uint64_t sa_rkey; + uint8_t obsolete1[4]; + + /* Active persist through power loss */ + uint8_t aptpl : 1; + uint8_t reserved1 : 1; + /* All target ports */ + uint8_t all_tg_pt : 1; + /* Specify initiator ports */ + uint8_t spec_i_pt : 1; + uint8_t reserved2 : 4; + + uint8_t reserved3; + uint16_t obsolete2; + + uint8_t param_data[]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_pr_out_param_list) == 24, "Incorrect size"); + +struct spdk_scsi_pr_out_reg_and_move_param_list { + /* Reservation key */ + uint64_t rkey; + /* Service action reservation key */ + uint64_t sa_rkey; + uint8_t reserved1; + + /* Active persist through power loss */ + uint8_t aptpl : 1; + /* Unregister */ + uint8_t unreg : 1; + uint8_t reserved2 : 6; + + uint16_t relative_target_port_id; + /* TransportID parameter data length */ + uint32_t transport_id_len; + uint8_t transport_id[]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_scsi_pr_out_reg_and_move_param_list) == 24, "Incorrect size"); + +/* + * SPC-4 + * Table-258 SECURITY PROTOCOL field in SECURITY PROTOCOL IN command + */ +#define SPDK_SCSI_SECP_INFO 0x00 +#define SPDK_SCSI_SECP_TCG 0x01 + +#define SPDK_SCSI_UNMAP_LBPU 1 << 7 +#define SPDK_SCSI_UNMAP_LBPWS 1 << 6 +#define SPDK_SCSI_UNMAP_LBPWS10 1 << 5 + +#define SPDK_SCSI_UNMAP_FULL_PROVISIONING 0x00 +#define SPDK_SCSI_UNMAP_RESOURCE_PROVISIONING 0x01 +#define SPDK_SCSI_UNMAP_THIN_PROVISIONING 0x02 + +#endif /* SPDK_SCSI_SPEC_H */ diff --git a/src/spdk/include/spdk/sock.h b/src/spdk/include/spdk/sock.h new file mode 100644 index 000000000..f70a2ac39 --- /dev/null +++ b/src/spdk/include/spdk/sock.h @@ -0,0 +1,475 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * TCP socket abstraction layer + */ + +#ifndef SPDK_SOCK_H +#define SPDK_SOCK_H + +#include "spdk/stdinc.h" + +#include "spdk/queue.h" +#include "spdk/json.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct spdk_sock; +struct spdk_sock_group; + +/** + * Anywhere this struct is used, an iovec array is assumed to + * immediately follow the last member in memory, without any + * padding. + * + * A simpler implementation would be to place a 0-length array + * of struct iovec at the end of this request. However, embedding + * a structure that ends with a variable length array inside of + * another structure is a GNU C extension and not standard. + */ +struct spdk_sock_request { + /* When the request is completed, this callback will be called. + * err will be 0 on success or a negated errno value on failure. */ + void (*cb_fn)(void *cb_arg, int err); + void *cb_arg; + + /** + * These fields are used by the socket layer and should not be modified + */ + struct __sock_request_internal { + TAILQ_ENTRY(spdk_sock_request) link; + uint32_t offset; + } internal; + + int iovcnt; + /* struct iovec iov[]; */ +}; + +#define SPDK_SOCK_REQUEST_IOV(req, i) ((struct iovec *)(((uint8_t *)req + sizeof(struct spdk_sock_request)) + (sizeof(struct iovec) * i))) + +/** + * SPDK socket implementation options. + * + * A pointer to this structure is used by spdk_sock_impl_get_opts() and spdk_sock_impl_set_opts() + * to allow the user to request options for the socket module implementation. + * Each socket module defines which options from this structure are applicable to the module. + */ +struct spdk_sock_impl_opts { + /** + * Size of sock receive buffer. Used by posix socket module. + */ + uint32_t recv_buf_size; + + /** + * Size of sock send buffer. Used by posix socket module. + */ + uint32_t send_buf_size; + + /** + * Enable or disable receive pipe. Used by posix socket module. + */ + bool enable_recv_pipe; + + /** + * Enable or disable use of zero copy flow on send. Used by posix socket module. + */ + bool enable_zerocopy_send; +}; + +/** + * Spdk socket initialization options. + * + * A pointer to this structure will be used by spdk_sock_listen_ext() or spdk_sock_connect_ext() to + * allow the user to request non-default options on the socket. + */ +struct spdk_sock_opts { + /** + * The size of spdk_sock_opts according to the caller of this library is used for ABI + * compatibility. The library uses this field to know how many fields in this + * structure are valid. And the library will populate any remaining fields with default values. + */ + size_t opts_size; + + /** + * The priority on the socket and default value is zero. + */ + int priority; +}; + +/** + * Initialize the default value of opts. + * + * \param opts Data structure where SPDK will initialize the default sock options. + * Users must set opts_size to sizeof(struct spdk_sock_opts). This will ensure that the + * libraryonly tries to fill as many fields as allocated by the caller. This allows ABI + * compatibility with future versions of this library that may extend the spdk_sock_opts + * structure. + */ +void spdk_sock_get_default_opts(struct spdk_sock_opts *opts); + +/** + * Get client and server addresses of the given socket. + * + * \param sock Socket to get address. + * \param saddr A pointer to the buffer to hold the address of server. + * \param slen Length of the buffer 'saddr'. + * \param sport A pointer(May be NULL) to the buffer to hold the port info of server. + * \param caddr A pointer to the buffer to hold the address of client. + * \param clen Length of the buffer 'caddr'. + * \param cport A pointer(May be NULL) to the buffer to hold the port info of server. + * + * \return 0 on success, -1 on failure. + */ +int spdk_sock_getaddr(struct spdk_sock *sock, char *saddr, int slen, uint16_t *sport, + char *caddr, int clen, uint16_t *cport); + +/** + * Create a socket using the specific sock implementation, connect the socket + * to the specified address and port (of the server), and then return the socket. + * This function is used by client. + * + * \param ip IP address of the server. + * \param port Port number of the server. + * \param impl_name The sock_implementation to use, such as "posix". If impl_name is + * specified, it will *only* try to connect on that impl. If it is NULL, it will try + * all the sock implementations in order and uses the first sock implementation which + * can connect. For example, it may try vpp first, then fall back to posix. + * + * \return a pointer to the connected socket on success, or NULL on failure. + */ +struct spdk_sock *spdk_sock_connect(const char *ip, int port, char *impl_name); + +/** + * Create a socket using the specific sock implementation, connect the socket + * to the specified address and port (of the server), and then return the socket. + * This function is used by client. + * + * \param ip IP address of the server. + * \param port Port number of the server. + * \param impl_name The sock_implementation to use, such as "posix". If impl_name is + * specified, it will *only* try to connect on that impl. If it is NULL, it will try + * all the sock implementations in order and uses the first sock implementation which + * can connect. For example, it may try vpp first, then fall back to posix. + * \param opts The sock option pointer provided by the user which should not be NULL pointer. + * + * \return a pointer to the connected socket on success, or NULL on failure. + */ +struct spdk_sock *spdk_sock_connect_ext(const char *ip, int port, char *impl_name, + struct spdk_sock_opts *opts); + +/** + * Create a socket using the specific sock implementation, bind the socket to + * the specified address and port and listen on the socket, and then return the socket. + * This function is used by server. + * + * \param ip IP address to listen on. + * \param port Port number. + * \param impl_name The sock_implementation to use, such as "posix". If impl_name is + * specified, it will *only* try to listen on that impl. If it is NULL, it will try + * all the sock implementations in order and uses the first sock implementation which + * can listen. For example, it may try vpp first, then fall back to posix. + * + * \return a pointer to the listened socket on success, or NULL on failure. + */ +struct spdk_sock *spdk_sock_listen(const char *ip, int port, char *impl_name); + +/** + * Create a socket using the specific sock implementation, bind the socket to + * the specified address and port and listen on the socket, and then return the socket. + * This function is used by server. + * + * \param ip IP address to listen on. + * \param port Port number. + * \param impl_name The sock_implementation to use, such as "posix". If impl_name is + * specified, it will *only* try to listen on that impl. If it is NULL, it will try + * all the sock implementations in order and uses the first sock implementation which + * can listen. For example, it may try vpp first, then fall back to posix. + * \param opts The sock option pointer provided by the user, which should not be NULL pointer. + * + * \return a pointer to the listened socket on success, or NULL on failure. + */ +struct spdk_sock *spdk_sock_listen_ext(const char *ip, int port, char *impl_name, + struct spdk_sock_opts *opts); + +/** + * Accept a new connection from a client on the specified socket and return a + * socket structure which holds the connection. + * + * \param sock Listening socket. + * + * \return a pointer to the accepted socket on success, or NULL on failure. + */ +struct spdk_sock *spdk_sock_accept(struct spdk_sock *sock); + +/** + * Close a socket. + * + * \param sock Socket to close. + * + * \return 0 on success, -1 on failure. + */ +int spdk_sock_close(struct spdk_sock **sock); + +/** + * Flush a socket from data gathered in previous writev_async calls. + * + * \param sock Socket to flush. + * + * \return 0 on success, -1 on failure. + */ +int spdk_sock_flush(struct spdk_sock *sock); + +/** + * Receive a message from the given socket. + * + * \param sock Socket to receive message. + * \param buf Pointer to a buffer to hold the data. + * \param len Length of the buffer. + * + * \return the length of the received message on success, -1 on failure. + */ +ssize_t spdk_sock_recv(struct spdk_sock *sock, void *buf, size_t len); + +/** + * Write message to the given socket from the I/O vector array. + * + * \param sock Socket to write to. + * \param iov I/O vector. + * \param iovcnt Number of I/O vectors in the array. + * + * \return the length of written message on success, -1 on failure. + */ +ssize_t spdk_sock_writev(struct spdk_sock *sock, struct iovec *iov, int iovcnt); + +/** + * Write data to the given socket asynchronously, calling + * the provided callback when the data has been written. + * + * \param sock Socket to write to. + * \param req The write request to submit. + */ +void spdk_sock_writev_async(struct spdk_sock *sock, struct spdk_sock_request *req); + +/** + * Read message from the given socket to the I/O vector array. + * + * \param sock Socket to receive message. + * \param iov I/O vector. + * \param iovcnt Number of I/O vectors in the array. + * + * \return the length of the received message on success, -1 on failure. + */ +ssize_t spdk_sock_readv(struct spdk_sock *sock, struct iovec *iov, int iovcnt); + +/** + * Set the value used to specify the low water mark (in bytes) for this socket. + * + * \param sock Socket to set for. + * \param nbytes Value for recvlowat. + * + * \return 0 on success, -1 on failure. + */ +int spdk_sock_set_recvlowat(struct spdk_sock *sock, int nbytes); + +/** + * Set receive buffer size for the given socket. + * + * \param sock Socket to set buffer size for. + * \param sz Buffer size in bytes. + * + * \return 0 on success, -1 on failure. + */ +int spdk_sock_set_recvbuf(struct spdk_sock *sock, int sz); + +/** + * Set send buffer size for the given socket. + * + * \param sock Socket to set buffer size for. + * \param sz Buffer size in bytes. + * + * \return 0 on success, -1 on failure. + */ +int spdk_sock_set_sendbuf(struct spdk_sock *sock, int sz); + +/** + * Check whether the address of socket is ipv6. + * + * \param sock Socket to check. + * + * \return true if the address of socket is ipv6, or false otherwise. + */ +bool spdk_sock_is_ipv6(struct spdk_sock *sock); + +/** + * Check whether the address of socket is ipv4. + * + * \param sock Socket to check. + * + * \return true if the address of socket is ipv4, or false otherwise. + */ +bool spdk_sock_is_ipv4(struct spdk_sock *sock); + +/** + * Check whether the socket is currently connected. + * + * \param sock Socket to check + * + * \return true if the socket is connected or false otherwise. + */ +bool spdk_sock_is_connected(struct spdk_sock *sock); + +/** + * Callback function for spdk_sock_group_add_sock(). + * + * \param arg Argument for the callback function. + * \param group Socket group. + * \param sock Socket. + */ +typedef void (*spdk_sock_cb)(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock); + +/** + * Create a new socket group with user provided pointer + * + * \param ctx the context provided by user. + * \return a pointer to the created group on success, or NULL on failure. + */ +struct spdk_sock_group *spdk_sock_group_create(void *ctx); + +/** + * Get the ctx of the sock group + * + * \param sock_group Socket group. + * \return a pointer which is ctx of the sock_group. + */ +void *spdk_sock_group_get_ctx(struct spdk_sock_group *sock_group); + + +/** + * Add a socket to the group. + * + * \param group Socket group. + * \param sock Socket to add. + * \param cb_fn Called when the operation completes. + * \param cb_arg Argument passed to the callback function. + * + * \return 0 on success, -1 on failure. + */ +int spdk_sock_group_add_sock(struct spdk_sock_group *group, struct spdk_sock *sock, + spdk_sock_cb cb_fn, void *cb_arg); + +/** + * Remove a socket from the group. + * + * \param group Socket group. + * \param sock Socket to remove. + * + * \return 0 on success, -1 on failure. + */ +int spdk_sock_group_remove_sock(struct spdk_sock_group *group, struct spdk_sock *sock); + +/** + * Poll incoming events for each registered socket. + * + * \param group Group to poll. + * + * \return the number of events on success, -1 on failure. + */ +int spdk_sock_group_poll(struct spdk_sock_group *group); + +/** + * Poll incoming events up to max_events for each registered socket. + * + * \param group Group to poll. + * \param max_events Number of maximum events to poll for each socket. + * + * \return the number of events on success, -1 on failure. + */ +int spdk_sock_group_poll_count(struct spdk_sock_group *group, int max_events); + +/** + * Close all registered sockets of the group and then remove the group. + * + * \param group Group to close. + * + * \return 0 on success, -1 on failure. + */ +int spdk_sock_group_close(struct spdk_sock_group **group); + +/** + * Get the optimal sock group for this sock. + * + * \param sock The socket + * \param group Returns the optimal sock group. If there is no optimal sock group, returns NULL. + * + * \return 0 on success. Negated errno on failure. + */ +int spdk_sock_get_optimal_sock_group(struct spdk_sock *sock, struct spdk_sock_group **group); + +/** + * Get current socket implementation options. + * + * \param impl_name The socket implementation to use, such as "posix". + * \param opts Pointer to allocated spdk_sock_impl_opts structure that will be filled with actual values. + * \param len On input specifies size of passed opts structure. On return it is set to actual size that was filled with values. + * + * \return 0 on success, -1 on failure. errno is set to indicate the reason of failure. + */ +int spdk_sock_impl_get_opts(const char *impl_name, struct spdk_sock_impl_opts *opts, size_t *len); + +/** + * Set socket implementation options. + * + * \param impl_name The socket implementation to use, such as "posix". + * \param opts Pointer to allocated spdk_sock_impl_opts structure with new options values. + * \param len Size of passed opts structure. + * + * \return 0 on success, -1 on failure. errno is set to indicate the reason of failure. + */ +int spdk_sock_impl_set_opts(const char *impl_name, const struct spdk_sock_impl_opts *opts, + size_t len); + +/** + * Write socket subsystem configuration into provided JSON context. + * + * \param w JSON write context + */ +void spdk_sock_write_config_json(struct spdk_json_write_ctx *w); + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_SOCK_H */ diff --git a/src/spdk/include/spdk/stdinc.h b/src/spdk/include/spdk/stdinc.h new file mode 100644 index 000000000..65820d58e --- /dev/null +++ b/src/spdk/include/spdk/stdinc.h @@ -0,0 +1,98 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Standard C headers + * + * This file is intended to be included first by all other SPDK files. + */ + +#ifndef SPDK_STDINC_H +#define SPDK_STDINC_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* Standard C */ +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <inttypes.h> +#include <limits.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <time.h> + +/* POSIX */ +#include <arpa/inet.h> +#include <dirent.h> +#include <fcntl.h> +#include <ifaddrs.h> +#include <netdb.h> +#include <poll.h> +#include <pthread.h> +#include <semaphore.h> +#include <signal.h> +#include <syslog.h> +#include <termios.h> +#include <unistd.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/uio.h> +#include <sys/un.h> +#include <sys/user.h> +#include <sys/wait.h> +#include <regex.h> + +/* GNU extension */ +#include <getopt.h> + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_STDINC_H */ diff --git a/src/spdk/include/spdk/string.h b/src/spdk/include/spdk/string.h new file mode 100644 index 000000000..041010e20 --- /dev/null +++ b/src/spdk/include/spdk/string.h @@ -0,0 +1,271 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * String utility functions + */ + +#ifndef SPDK_STRING_H +#define SPDK_STRING_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * sprintf with automatic buffer allocation. + * + * The return value is the formatted string, which should be passed to free() + * when no longer needed. + * + * \param format Format for the string to print. + * + * \return the formatted string on success, or NULL on failure. + */ +char *spdk_sprintf_alloc(const char *format, ...) __attribute__((format(printf, 1, 2))); + +/** + * vsprintf with automatic buffer allocation. + * + * The return value is the formatted string, which should be passed to free() + * when no longer needed. + * + * \param format Format for the string to print. + * \param args A value that identifies a variable arguments list. + * + * \return the formatted string on success, or NULL on failure. + */ +char *spdk_vsprintf_alloc(const char *format, va_list args); + +/** + * Append string using vsprintf with automatic buffer re-allocation. + * + * The return value is the formatted string, in which the original string in + * buffer is unchanged and the specified formatted string is appended. + * + * The returned string should be passed to free() when no longer needed. + * + * If buffer is NULL, the call is equivalent to spdk_sprintf_alloc(). + * If the call fails, the original buffer is left untouched. + * + * \param buffer Buffer which has a formatted string. + * \param format Format for the string to print. + * + * \return the formatted string on success, or NULL on failure. + */ +char *spdk_sprintf_append_realloc(char *buffer, const char *format, ...); + +/** + * Append string using vsprintf with automatic buffer re-allocation. + * The return value is the formatted string, in which the original string in + * buffer is unchanged and the specified formatted string is appended. + * + * The returned string should be passed to free() when no longer needed. + * + * If buffer is NULL, the call is equivalent to spdk_sprintf_alloc(). + * If the call fails, the original buffer is left untouched. + * + * \param buffer Buffer which has a formatted string. + * \param format Format for the string to print. + * \param args A value that identifies a variable arguments list. + * + * \return the formatted string on success, or NULL on failure. + */ +char *spdk_vsprintf_append_realloc(char *buffer, const char *format, va_list args); + +/** + * Convert string to lowercase in place. + * + * \param s String to convert to lowercase. + * + * \return the converted string. + */ +char *spdk_strlwr(char *s); + +/** + * Parse a delimited string with quote handling. + * + * Note that the string will be modified in place to add the string terminator + * to each field. + * + * \param stringp Pointer to starting location in string. *stringp will be updated + * to point to the start of the next field, or NULL if the end of the string has + * been reached. + * \param delim Null-terminated string containing the list of accepted delimiters. + * + * \return a pointer to beginning of the current field. + */ +char *spdk_strsepq(char **stringp, const char *delim); + +/** + * Trim whitespace from a string in place. + * + * \param s String to trim. + * + * \return the trimmed string. + */ +char *spdk_str_trim(char *s); + +/** + * Copy the string version of an error into the user supplied buffer + * + * \param errnum Error code. + * \param buf Pointer to a buffer in which to place the error message. + * \param buflen The size of the buffer in bytes. + */ +void spdk_strerror_r(int errnum, char *buf, size_t buflen); + +/** + * Return the string version of an error from a static, thread-local buffer. This + * function is thread safe. + * + * \param errnum Error code. + * + * \return a pointer to buffer upon success. + */ +const char *spdk_strerror(int errnum); + +/** + * Remove trailing newlines from the end of a string in place. + * + * Any sequence of trailing \\r and \\n characters is removed from the end of the + * string. + * + * \param s String to remove newline from. + * + * \return the number of characters removed. + */ +size_t spdk_str_chomp(char *s); + +/** + * Copy a string into a fixed-size buffer, padding extra bytes with a specific + * character. + * + * If src is longer than size, only size bytes will be copied. + * + * \param dst Pointer to destination fixed-size buffer to fill. + * \param src Pointer to source null-terminated string to copy into dst. + * \param size Number of bytes to fill in dst. + * \param pad Character to pad extra space in dst beyond the size of src. + */ +void spdk_strcpy_pad(void *dst, const char *src, size_t size, int pad); + +/** + * Find the length of a string that has been padded with a specific byte. + * + * \param str Right-padded string to find the length of. + * \param size Size of the full string pointed to by str, including padding. + * \param pad Character that was used to pad str up to size. + * + * \return the length of the non-padded portion of str. + */ +size_t spdk_strlen_pad(const void *str, size_t size, int pad); + +/** + * Parse an IP address into its hostname and port components. This modifies the + * IP address in place. + * + * \param ip A null terminated IP address, including port. Both IPv4 and IPv6 + * are supported. + * \param host Will point to the start of the hostname within ip. The string will + * be null terminated. + * \param port Will point to the start of the port within ip. The string will be + * null terminated. + * + * \return 0 on success. -EINVAL on failure. + */ +int spdk_parse_ip_addr(char *ip, char **host, char **port); + +/** + * Parse a string representing a number possibly followed by a binary prefix. + * + * The string can contain a trailing "B" (KB,MB,GB) but it's not necessary. + * "128K" = 128 * 1024; "2G" = 2 * 1024 * 1024; "2GB" = 2 * 1024 * 1024; + * Additionally, lowercase "k", "m", "g" are parsed as well. They are processed + * the same as their uppercase equivalents. + * + * \param cap_str Null terminated string. + * \param cap Pointer where the parsed capacity (in bytes) will be put. + * \param has_prefix Pointer to a flag that will be set to describe whether given + * string contains a binary prefix. + * + * \return 0 on success, or negative errno on failure. + */ +int spdk_parse_capacity(const char *cap_str, uint64_t *cap, bool *has_prefix); + +/** + * Check if a buffer is all zero (0x00) bytes or not. + * + * \param data Buffer to check. + * \param size Size of data in bytes. + * + * \return true if data consists entirely of zeroes, or false if any byte in data + * is not zero. + */ +bool spdk_mem_all_zero(const void *data, size_t size); + +/** + * Convert the string in nptr to a long integer value according to the given base. + * + * spdk_strtol() does the additional error checking and allows only strings that + * contains only numbers and is positive number or zero. The caller only has to check + * if the return value is not negative. + * + * \param nptr String containing numbers. + * \param base Base which must be between 2 and 32 inclusive, or be the special value 0. + * + * \return positive number or zero on success, or negative errno on failure. + */ +long int spdk_strtol(const char *nptr, int base); + +/** + * Convert the string in nptr to a long long integer value according to the given base. + * + * spdk_strtoll() does the additional error checking and allows only strings that + * contains only numbers and is positive number or zero. The caller only has to check + * if the return value is not negative. + * + * \param nptr String containing numbers. + * \param base Base which must be between 2 and 32 inclusive, or be the special value 0. + * + * \return positive number or zero on success, or negative errno on failure. + */ +long long int spdk_strtoll(const char *nptr, int base); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/thread.h b/src/spdk/include/spdk/thread.h new file mode 100644 index 000000000..841cf39a8 --- /dev/null +++ b/src/spdk/include/spdk/thread.h @@ -0,0 +1,736 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Thread + */ + +#ifndef SPDK_THREAD_H_ +#define SPDK_THREAD_H_ + +#include "spdk/stdinc.h" + +#include "spdk/cpuset.h" +#include "spdk/queue.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum spdk_thread_poller_rc { + SPDK_POLLER_IDLE, + SPDK_POLLER_BUSY, +}; + +/** + * A stackless, lightweight thread. + */ +struct spdk_thread; + +/** + * A function repeatedly called on the same spdk_thread. + */ +struct spdk_poller; + +struct spdk_io_channel_iter; + +/** + * A function that is called each time a new thread is created. + * The implementor of this function should frequently call + * spdk_thread_poll() on the thread provided. + * + * \param thread The new spdk_thread. + */ +typedef int (*spdk_new_thread_fn)(struct spdk_thread *thread); + +/** + * SPDK thread operation type. + */ +enum spdk_thread_op { + /* Called each time a new thread is created. The implementor of this operation + * should frequently call spdk_thread_poll() on the thread provided. + */ + SPDK_THREAD_OP_NEW, + + /* Called when SPDK thread needs to be rescheduled. (e.g., when cpumask of the + * SPDK thread is updated. + */ + SPDK_THREAD_OP_RESCHED, +}; + +/** + * Function to be called for SPDK thread operation. + */ +typedef int (*spdk_thread_op_fn)(struct spdk_thread *thread, enum spdk_thread_op op); + +/** + * Function to check whether the SPDK thread operation is supported. + */ +typedef bool (*spdk_thread_op_supported_fn)(enum spdk_thread_op op); + +/** + * A function that will be called on the target thread. + * + * \param ctx Context passed as arg to spdk_thread_pass_msg(). + */ +typedef void (*spdk_msg_fn)(void *ctx); + +/** + * Function to be called to pass a message to a thread. + * + * \param fn Callback function for a thread. + * \param ctx Context passed to fn. + * \param thread_ctx Context for the thread. + */ +typedef void (*spdk_thread_pass_msg)(spdk_msg_fn fn, void *ctx, + void *thread_ctx); + +/** + * Callback function for a poller. + * + * \param ctx Context passed as arg to spdk_poller_register(). + * \return 0 to indicate that polling took place but no events were found; + * positive to indicate that polling took place and some events were processed; + * negative if the poller does not provide spin-wait information. + */ +typedef int (*spdk_poller_fn)(void *ctx); + +/** + * Function to be called to start a poller for the thread. + * + * \param thread_ctx Context for the thread. + * \param fn Callback function for a poller. + * \param arg Argument passed to callback. + * \param period Polling period in microseconds. + * + * \return a pointer to the poller on success, or NULL on failure. + */ +typedef struct spdk_poller *(*spdk_start_poller)(void *thread_ctx, + spdk_poller_fn fn, + void *arg, + uint64_t period_microseconds); + +/** + * Function to be called to stop a poller. + * + * \param poller Poller to stop. + * \param thread_ctx Context for the thread. + */ +typedef void (*spdk_stop_poller)(struct spdk_poller *poller, void *thread_ctx); + +/** + * I/O channel creation callback. + * + * \param io_device I/O device associated with this channel. + * \param ctx_buf Context for the I/O device. + */ +typedef int (*spdk_io_channel_create_cb)(void *io_device, void *ctx_buf); + +/** + * I/O channel destruction callback. + * + * \param io_device I/O device associated with this channel. + * \param ctx_buf Context for the I/O device. + */ +typedef void (*spdk_io_channel_destroy_cb)(void *io_device, void *ctx_buf); + +/** + * I/O device unregister callback. + * + * \param io_device Unregistered I/O device. + */ +typedef void (*spdk_io_device_unregister_cb)(void *io_device); + +/** + * Called on the appropriate thread for each channel associated with io_device. + * + * \param i I/O channel iterator. + */ +typedef void (*spdk_channel_msg)(struct spdk_io_channel_iter *i); + +/** + * spdk_for_each_channel() callback. + * + * \param i I/O channel iterator. + * \param status 0 if it completed successfully, or negative errno if it failed. + */ +typedef void (*spdk_channel_for_each_cpl)(struct spdk_io_channel_iter *i, int status); + +/** + * \brief Represents a per-thread channel for accessing an I/O device. + * + * An I/O device may be a physical entity (i.e. NVMe controller) or a software + * entity (i.e. a blobstore). + * + * This structure is not part of the API - all accesses should be done through + * spdk_io_channel function calls. + */ +struct spdk_io_channel { + struct spdk_thread *thread; + struct io_device *dev; + uint32_t ref; + uint32_t destroy_ref; + TAILQ_ENTRY(spdk_io_channel) tailq; + spdk_io_channel_destroy_cb destroy_cb; + + /* + * Modules will allocate extra memory off the end of this structure + * to store references to hardware-specific references (i.e. NVMe queue + * pairs, or references to child device spdk_io_channels (i.e. + * virtual bdevs). + */ +}; + +/** + * Initialize the threading library. Must be called once prior to allocating any threads. + * + * \param new_thread_fn Called each time a new SPDK thread is created. The implementor + * is expected to frequently call spdk_thread_poll() on the provided thread. + * \param ctx_sz For each thread allocated, an additional region of memory of + * size ctx_size will also be allocated, for use by the thread scheduler. A pointer + * to this region may be obtained by calling spdk_thread_get_ctx(). + * + * \return 0 on success. Negated errno on failure. + */ +int spdk_thread_lib_init(spdk_new_thread_fn new_thread_fn, size_t ctx_sz); + +/** + * Initialize the threading library. Must be called once prior to allocating any threads + * + * Both thread_op_fn and thread_op_type_supported_fn have to be specified or not + * specified together. + * + * \param thread_op_fn Called for SPDK thread operation. + * \param thread_op_supported_fn Called to check whether the SPDK thread operation is supported. + * \param ctx_sz For each thread allocated, for use by the thread scheduler. A pointer + * to this region may be obtained by calling spdk_thread_get_ctx(). + * + * \return 0 on success. Negated errno on failure. + */ +int spdk_thread_lib_init_ext(spdk_thread_op_fn thread_op_fn, + spdk_thread_op_supported_fn thread_op_supported_fn, + size_t ctx_sz); + +/** + * Release all resources associated with this library. + */ +void spdk_thread_lib_fini(void); + +/** + * Creates a new SPDK thread object. + * + * \param name Human-readable name for the thread; can be retrieved with spdk_thread_get_name(). + * The string is copied, so the pointed-to data only needs to be valid during the + * spdk_thread_create() call. May be NULL to specify no name. + * \param cpumask Optional mask of CPU cores on which to schedule this thread. This is only + * a suggestion to the scheduler. The value is copied, so cpumask may be released when + * this function returns. May be NULL if no mask is required. + * + * \return a pointer to the allocated thread on success or NULL on failure.. + */ +struct spdk_thread *spdk_thread_create(const char *name, struct spdk_cpuset *cpumask); + +/** + * Force the current system thread to act as if executing the given SPDK thread. + * + * \param thread The thread to set. + */ +void spdk_set_thread(struct spdk_thread *thread); + +/** + * Mark the thread as exited, failing all future spdk_thread_send_msg(), + * spdk_poller_register(), and spdk_get_io_channel() calls. May only be called + * within an spdk poller or message. + * + * All I/O channel references associated with the thread must be released + * using spdk_put_io_channel(), and all active pollers associated with the thread + * should be unregistered using spdk_poller_unregister(), prior to calling + * this function. This function will complete these processing. The completion can + * be queried by spdk_thread_is_exited(). + * + * \param thread The thread to destroy. + * + * \return always 0. (return value was deprecated but keep it for ABI compatibility.) + */ +int spdk_thread_exit(struct spdk_thread *thread); + +/** + * Returns whether the thread is marked as exited. + * + * \param thread The thread to query. + * + * \return true if marked as exited, false otherwise. + */ +bool spdk_thread_is_exited(struct spdk_thread *thread); + +/** + * Destroy a thread, releasing all of its resources. May only be called + * on a thread previously marked as exited. + * + * \param thread The thread to destroy. + * + */ +void spdk_thread_destroy(struct spdk_thread *thread); + +/** + * Return a pointer to this thread's context. + * + * \param thread The thread on which to get the context. + * + * \return a pointer to the per-thread context, or NULL if there is + * no per-thread context. + */ +void *spdk_thread_get_ctx(struct spdk_thread *thread); + +/** + * Get the thread's cpumask. + * + * \param thread The thread to get the cpumask for. + * + * \return cpuset pointer + */ +struct spdk_cpuset *spdk_thread_get_cpumask(struct spdk_thread *thread); + +/** + * Set the current thread's cpumask to the specified value. The thread may be + * rescheduled to one of the CPUs specified in the cpumask. + * + * This API requires SPDK thread operation supports SPDK_THREAD_OP_RESCHED. + * + * \param cpumask The new cpumask for the thread. + * + * \return 0 on success, negated errno otherwise. + */ +int spdk_thread_set_cpumask(struct spdk_cpuset *cpumask); + +/** + * Return the thread object associated with the context handle previously + * obtained by calling spdk_thread_get_ctx(). + * + * \param ctx A context previously obtained by calling spdk_thread_get_ctx() + * + * \return The associated thread. + */ +struct spdk_thread *spdk_thread_get_from_ctx(void *ctx); + +/** + * Perform one iteration worth of processing on the thread. This includes + * both expired and continuous pollers as well as messages. If the thread + * has exited, return immediately. + * + * \param thread The thread to process + * \param max_msgs The maximum number of messages that will be processed. + * Use 0 to process the default number of messages (8). + * \param now The current time, in ticks. Optional. If 0 is passed, this + * function will call spdk_get_ticks() to get the current time. + * The current time is used as start time and this function + * will call spdk_get_ticks() at its end to know end time to + * measure run time of this function. + * + * \return 1 if work was done. 0 if no work was done. + */ +int spdk_thread_poll(struct spdk_thread *thread, uint32_t max_msgs, uint64_t now); + +/** + * Return the number of ticks until the next timed poller + * would expire. Timed pollers are pollers for which + * period_microseconds is greater than 0. + * + * \param thread The thread to check poller expiration times on + * + * \return Number of ticks. If no timed pollers, return 0. + */ +uint64_t spdk_thread_next_poller_expiration(struct spdk_thread *thread); + +/** + * Returns whether there are any active pollers (pollers for which + * period_microseconds equals 0) registered to be run on the thread. + * + * \param thread The thread to check. + * + * \return 1 if there is at least one active poller, 0 otherwise. + */ +int spdk_thread_has_active_pollers(struct spdk_thread *thread); + +/** + * Returns whether there are any pollers registered to be run + * on the thread. + * + * \param thread The thread to check. + * + * \return true if there is any active poller, false otherwise. + */ +bool spdk_thread_has_pollers(struct spdk_thread *thread); + +/** + * Returns whether there are scheduled operations to be run on the thread. + * + * \param thread The thread to check. + * + * \return true if there are no scheduled operations, false otherwise. + */ +bool spdk_thread_is_idle(struct spdk_thread *thread); + +/** + * Get count of allocated threads. + */ +uint32_t spdk_thread_get_count(void); + +/** + * Get a handle to the current thread. + * + * This handle may be passed to other threads and used as the target of + * spdk_thread_send_msg(). + * + * \sa spdk_io_channel_get_thread() + * + * \return a pointer to the current thread on success or NULL on failure. + */ +struct spdk_thread *spdk_get_thread(void); + +/** + * Get a thread's name. + * + * \param thread Thread to query. + * + * \return the name of the thread. + */ +const char *spdk_thread_get_name(const struct spdk_thread *thread); + +/** + * Get a thread's ID. + * + * \param thread Thread to query. + * + * \return the ID of the thread.. + */ +uint64_t spdk_thread_get_id(const struct spdk_thread *thread); + +/** + * Get the thread by the ID. + * + * \param id ID of the thread. + * \return Thread whose ID matches or NULL otherwise. + */ +struct spdk_thread *spdk_thread_get_by_id(uint64_t id); + +struct spdk_thread_stats { + uint64_t busy_tsc; + uint64_t idle_tsc; +}; + +/** + * Get statistics about the current thread. + * + * Copy cumulative thread stats values to the provided thread stats structure. + * + * \param stats User's thread_stats structure. + */ +int spdk_thread_get_stats(struct spdk_thread_stats *stats); + +/** + * Return the TSC value from the end of the last time this thread was polled. + * + * \param thread Thread to query. + * + * \return TSC value from the end of the last time this thread was polled. + */ +uint64_t spdk_thread_get_last_tsc(struct spdk_thread *thread); + +/** + * Send a message to the given thread. + * + * The message will be sent asynchronously - i.e. spdk_thread_send_msg will always return + * prior to `fn` being called. + * + * \param thread The target thread. + * \param fn This function will be called on the given thread. + * \param ctx This context will be passed to fn when called. + * + * \return 0 on success + * \return -ENOMEM if the message could not be allocated + * \return -EIO if the message could not be sent to the destination thread + */ +int spdk_thread_send_msg(const struct spdk_thread *thread, spdk_msg_fn fn, void *ctx); + +/** + * Send a message to the given thread. Only one critical message can be outstanding at the same + * time. It's intended to use this function in any cases that might interrupt the execution of the + * application, such as signal handlers. + * + * The message will be sent asynchronously - i.e. spdk_thread_send_critical_msg will always return + * prior to `fn` being called. + * + * \param thread The target thread. + * \param fn This function will be called on the given thread. + * + * \return 0 on success + * \return -EIO if the message could not be sent to the destination thread, due to an already + * outstanding critical message + */ +int spdk_thread_send_critical_msg(struct spdk_thread *thread, spdk_msg_fn fn); + +/** + * Send a message to each thread, serially. + * + * The message is sent asynchronously - i.e. spdk_for_each_thread will return + * prior to `fn` being called on each thread. + * + * \param fn This is the function that will be called on each thread. + * \param ctx This context will be passed to fn when called. + * \param cpl This will be called on the originating thread after `fn` has been + * called on each thread. + */ +void spdk_for_each_thread(spdk_msg_fn fn, void *ctx, spdk_msg_fn cpl); + +/** + * Register a poller on the current thread. + * + * The poller can be unregistered by calling spdk_poller_unregister(). + * + * \param fn This function will be called every `period_microseconds`. + * \param arg Argument passed to fn. + * \param period_microseconds How often to call `fn`. If 0, call `fn` as often + * as possible. + * + * \return a pointer to the poller registered on the current thread on success + * or NULL on failure. + */ +struct spdk_poller *spdk_poller_register(spdk_poller_fn fn, + void *arg, + uint64_t period_microseconds); + +/** + * Register a poller on the current thread with arbitrary name. + * + * The poller can be unregistered by calling spdk_poller_unregister(). + * + * \param fn This function will be called every `period_microseconds`. + * \param arg Argument passed to fn. + * \param period_microseconds How often to call `fn`. If 0, call `fn` as often + * as possible. + * \param name Human readable name for the poller. Pointer of the poller function + * name is set if NULL. + * + * \return a pointer to the poller registered on the current thread on success + * or NULL on failure. + */ +struct spdk_poller *spdk_poller_register_named(spdk_poller_fn fn, + void *arg, + uint64_t period_microseconds, + const char *name); + +/* + * \brief Register a poller on the current thread with setting its name + * to the string of the poller function name. + */ +#define SPDK_POLLER_REGISTER(fn, arg, period_microseconds) \ + spdk_poller_register_named(fn, arg, period_microseconds, #fn) + +/** + * Unregister a poller on the current thread. + * + * \param ppoller The poller to unregister. + */ +void spdk_poller_unregister(struct spdk_poller **ppoller); + +/** + * Pause a poller on the current thread. + * + * The poller is not run until it is resumed with spdk_poller_resume(). It is + * perfectly fine to pause an already paused poller. + * + * \param poller The poller to pause. + */ +void spdk_poller_pause(struct spdk_poller *poller); + +/** + * Resume a poller on the current thread. + * + * Resumes a poller paused with spdk_poller_pause(). It is perfectly fine to + * resume an unpaused poller. + * + * \param poller The poller to resume. + */ +void spdk_poller_resume(struct spdk_poller *poller); + +/** + * Register the opaque io_device context as an I/O device. + * + * After an I/O device is registered, it can return I/O channels using the + * spdk_get_io_channel() function. + * + * \param io_device The pointer to io_device context. + * \param create_cb Callback function invoked to allocate any resources required + * for a new I/O channel. + * \param destroy_cb Callback function invoked to release the resources for an + * I/O channel. + * \param ctx_size The size of the context buffer allocated to store references + * to allocated I/O channel resources. + * \param name A string name for the device used only for debugging. Optional - + * may be NULL. + */ +void spdk_io_device_register(void *io_device, spdk_io_channel_create_cb create_cb, + spdk_io_channel_destroy_cb destroy_cb, uint32_t ctx_size, + const char *name); + +/** + * Unregister the opaque io_device context as an I/O device. + * + * The actual unregistration might be deferred until all active I/O channels are + * destroyed. + * + * \param io_device The pointer to io_device context. + * \param unregister_cb An optional callback function invoked to release any + * references to this I/O device. + */ +void spdk_io_device_unregister(void *io_device, spdk_io_device_unregister_cb unregister_cb); + +/** + * Get an I/O channel for the specified io_device to be used by the calling thread. + * + * The io_device context pointer specified must have previously been registered + * using spdk_io_device_register(). If an existing I/O channel does not exist + * yet for the given io_device on the calling thread, it will allocate an I/O + * channel and invoke the create_cb function pointer specified in spdk_io_device_register(). + * If an I/O channel already exists for the given io_device on the calling thread, + * its reference is returned rather than creating a new I/O channel. + * + * \param io_device The pointer to io_device context. + * + * \return a pointer to the I/O channel for this device on success or NULL on failure. + */ +struct spdk_io_channel *spdk_get_io_channel(void *io_device); + +/** + * Release a reference to an I/O channel. This happens asynchronously. + * + * This must be called on the same thread that called spdk_get_io_channel() + * for the specified I/O channel. If this releases the last reference to the + * I/O channel, The destroy_cb function specified in spdk_io_device_register() + * will be invoked to release any associated resources. + * + * \param ch I/O channel to release a reference. + */ +void spdk_put_io_channel(struct spdk_io_channel *ch); + +/** + * Get the context buffer associated with an I/O channel. + * + * \param ch I/O channel. + * + * \return a pointer to the context buffer. + */ +static inline void * +spdk_io_channel_get_ctx(struct spdk_io_channel *ch) +{ + return (uint8_t *)ch + sizeof(*ch); +} + +/** + * Get I/O channel from the context buffer. This is the inverse of + * spdk_io_channel_get_ctx(). + * + * \param ctx The pointer to the context buffer. + * + * \return a pointer to the I/O channel associated with the context buffer. + */ +struct spdk_io_channel *spdk_io_channel_from_ctx(void *ctx); + +/** + * Get the thread associated with an I/O channel. + * + * \param ch I/O channel. + * + * \return a pointer to the thread associated with the I/O channel + */ +struct spdk_thread *spdk_io_channel_get_thread(struct spdk_io_channel *ch); + +/** + * Call 'fn' on each channel associated with io_device. + * + * This happens asynchronously, so fn may be called after spdk_for_each_channel + * returns. 'fn' will be called for each channel serially, such that two calls + * to 'fn' will not overlap in time. After 'fn' has been called, call + * spdk_for_each_channel_continue() to continue iterating. + * + * \param io_device 'fn' will be called on each channel associated with this io_device. + * \param fn Called on the appropriate thread for each channel associated with io_device. + * \param ctx Context buffer registered to spdk_io_channel_iter that can be obatined + * form the function spdk_io_channel_iter_get_ctx(). + * \param cpl Called on the thread that spdk_for_each_channel was initially called + * from when 'fn' has been called on each channel. + */ +void spdk_for_each_channel(void *io_device, spdk_channel_msg fn, void *ctx, + spdk_channel_for_each_cpl cpl); + +/** + * Get io_device from the I/O channel iterator. + * + * \param i I/O channel iterator. + * + * \return a pointer to the io_device. + */ +void *spdk_io_channel_iter_get_io_device(struct spdk_io_channel_iter *i); + +/** + * Get I/O channel from the I/O channel iterator. + * + * \param i I/O channel iterator. + * + * \return a pointer to the I/O channel. + */ +struct spdk_io_channel *spdk_io_channel_iter_get_channel(struct spdk_io_channel_iter *i); + +/** + * Get context buffer from the I/O channel iterator. + * + * \param i I/O channel iterator. + * + * \return a pointer to the context buffer. + */ +void *spdk_io_channel_iter_get_ctx(struct spdk_io_channel_iter *i); + +/** + * Helper function to iterate all channels for spdk_for_each_channel(). + * + * \param i I/O channel iterator. + * \param status Status for the I/O channel iterator. + */ +void spdk_for_each_channel_continue(struct spdk_io_channel_iter *i, int status); + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_THREAD_H_ */ diff --git a/src/spdk/include/spdk/trace.h b/src/spdk/include/spdk/trace.h new file mode 100644 index 000000000..1ed6c8b31 --- /dev/null +++ b/src/spdk/include/spdk/trace.h @@ -0,0 +1,404 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * Tracepoint library + */ + +#ifndef _SPDK_TRACE_H_ +#define _SPDK_TRACE_H_ + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define SPDK_DEFAULT_NUM_TRACE_ENTRIES (32 * 1024) + +struct spdk_trace_entry { + uint64_t tsc; + uint16_t tpoint_id; + uint16_t poller_id; + uint32_t size; + uint64_t object_id; + uint64_t arg1; +}; + +/* If type changes from a uint8_t, change this value. */ +#define SPDK_TRACE_MAX_OWNER (UCHAR_MAX + 1) + +struct spdk_trace_owner { + uint8_t type; + char id_prefix; +}; + +/* If type changes from a uint8_t, change this value. */ +#define SPDK_TRACE_MAX_OBJECT (UCHAR_MAX + 1) + +struct spdk_trace_object { + uint8_t type; + char id_prefix; +}; + +#define SPDK_TRACE_MAX_GROUP_ID 16 +#define SPDK_TRACE_MAX_TPOINT_ID (SPDK_TRACE_MAX_GROUP_ID * 64) +#define SPDK_TPOINT_ID(group, tpoint) ((group * 64) + tpoint) + +#define SPDK_TRACE_ARG_TYPE_INT 0 +#define SPDK_TRACE_ARG_TYPE_PTR 1 +#define SPDK_TRACE_ARG_TYPE_STR 2 + +struct spdk_trace_tpoint { + char name[24]; + uint16_t tpoint_id; + uint8_t owner_type; + uint8_t object_type; + uint8_t new_object; + uint8_t arg1_type; + uint8_t reserved; + char arg1_name[8]; +}; + +struct spdk_trace_history { + /** Logical core number associated with this structure instance. */ + int lcore; + + /** Number of trace_entries contained in each trace_history. */ + uint64_t num_entries; + + /** + * Running count of number of occurrences of each tracepoint on this + * lcore. Debug tools can use this to easily count tracepoints such as + * number of SCSI tasks completed or PDUs read. + */ + uint64_t tpoint_count[SPDK_TRACE_MAX_TPOINT_ID]; + + /** Index to next spdk_trace_entry to fill. */ + uint64_t next_entry; + + /** + * Circular buffer of spdk_trace_entry structures for tracing + * tpoints on this core. Debug tool spdk_trace reads this + * buffer from shared memory to post-process the tpoint entries and + * display in a human-readable format. + */ + struct spdk_trace_entry entries[0]; +}; + +#define SPDK_TRACE_MAX_LCORE 128 + +struct spdk_trace_flags { + uint64_t tsc_rate; + uint64_t tpoint_mask[SPDK_TRACE_MAX_GROUP_ID]; + struct spdk_trace_owner owner[UCHAR_MAX + 1]; + struct spdk_trace_object object[UCHAR_MAX + 1]; + struct spdk_trace_tpoint tpoint[SPDK_TRACE_MAX_TPOINT_ID]; + + /** Offset of each trace_history from the beginning of this data structure. + * The last one is the offset of the file end. + */ + uint64_t lcore_history_offsets[SPDK_TRACE_MAX_LCORE + 1]; +}; +extern struct spdk_trace_flags *g_trace_flags; +extern struct spdk_trace_histories *g_trace_histories; + + +struct spdk_trace_histories { + struct spdk_trace_flags flags; + + /** + * struct spdk_trace_history has a dynamic size determined by num_entries + * in spdk_trace_init. Mark array size of per_lcore_history to be 0 in uint8_t + * as a reminder that each per_lcore_history pointer should be gotten by + * proper API, instead of directly referencing by struct element. + */ + uint8_t per_lcore_history[0]; +}; + +static inline uint64_t +spdk_get_trace_history_size(uint64_t num_entries) +{ + return sizeof(struct spdk_trace_history) + num_entries * sizeof(struct spdk_trace_entry); +} + +static inline uint64_t +spdk_get_trace_histories_size(struct spdk_trace_histories *trace_histories) +{ + return trace_histories->flags.lcore_history_offsets[SPDK_TRACE_MAX_LCORE]; +} + +static inline struct spdk_trace_history * +spdk_get_per_lcore_history(struct spdk_trace_histories *trace_histories, unsigned lcore) +{ + char *lcore_history_offset; + + if (lcore >= SPDK_TRACE_MAX_LCORE) { + return NULL; + } + + lcore_history_offset = (char *)trace_histories; + lcore_history_offset += trace_histories->flags.lcore_history_offsets[lcore]; + + return (struct spdk_trace_history *)lcore_history_offset; +} + +void _spdk_trace_record(uint64_t tsc, uint16_t tpoint_id, uint16_t poller_id, + uint32_t size, uint64_t object_id, uint64_t arg1); + +/** + * Record the current trace state for tracing tpoints. Debug tool can read the + * information from shared memory to post-process the tpoint entries and display + * in a human-readable format. This function will call spdk_get_ticks() to get + * the current tsc to save in the tracepoint. + * + * \param tpoint_id Tracepoint id to record. + * \param poller_id Poller id to record. + * \param size Size to record. + * \param object_id Object id to record. + * \param arg1 Argument to record. + */ +static inline +void spdk_trace_record(uint16_t tpoint_id, uint16_t poller_id, uint32_t size, + uint64_t object_id, uint64_t arg1) +{ + /* + * Tracepoint group ID is encoded in the tpoint_id. Lower 6 bits determine the tracepoint + * within the group, the remaining upper bits determine the tracepoint group. Each + * tracepoint group has its own tracepoint mask. + */ + assert(tpoint_id < SPDK_TRACE_MAX_TPOINT_ID); + if (g_trace_histories == NULL || + !((1ULL << (tpoint_id & 0x3F)) & g_trace_histories->flags.tpoint_mask[tpoint_id >> 6])) { + return; + } + + _spdk_trace_record(0, tpoint_id, poller_id, size, object_id, arg1); +} + +/** + * Record the current trace state for tracing tpoints. Debug tool can read the + * information from shared memory to post-process the tpoint entries and display + * in a human-readable format. + * + * \param tsc Current tsc. + * \param tpoint_id Tracepoint id to record. + * \param poller_id Poller id to record. + * \param size Size to record. + * \param object_id Object id to record. + * \param arg1 Argument to record. + */ +static inline +void spdk_trace_record_tsc(uint64_t tsc, uint16_t tpoint_id, uint16_t poller_id, + uint32_t size, uint64_t object_id, uint64_t arg1) +{ + /* + * Tracepoint group ID is encoded in the tpoint_id. Lower 6 bits determine the tracepoint + * within the group, the remaining upper bits determine the tracepoint group. Each + * tracepoint group has its own tracepoint mask. + */ + assert(tpoint_id < SPDK_TRACE_MAX_TPOINT_ID); + if (g_trace_histories == NULL || + !((1ULL << (tpoint_id & 0x3F)) & g_trace_histories->flags.tpoint_mask[tpoint_id >> 6])) { + return; + } + + _spdk_trace_record(tsc, tpoint_id, poller_id, size, object_id, arg1); +} + +/** + * Get the current tpoint mask of the given tpoint group. + * + * \param group_id Tpoint group id associated with the tpoint mask. + * + * \return current tpoint mask. + */ +uint64_t spdk_trace_get_tpoint_mask(uint32_t group_id); + +/** + * Add the specified tpoints to the current tpoint mask for the given tpoint group. + * + * \param group_id Tpoint group id associated with the tpoint mask. + * \param tpoint_mask Tpoint mask which indicates which tpoints to add to the + * current tpoint mask. + */ +void spdk_trace_set_tpoints(uint32_t group_id, uint64_t tpoint_mask); + +/** + * Clear the specified tpoints from the current tpoint mask for the given tpoint group. + * + * \param group_id Tpoint group id associated with the tpoint mask. + * \param tpoint_mask Tpoint mask which indicates which tpoints to clear from + * the current tpoint mask. + */ +void spdk_trace_clear_tpoints(uint32_t group_id, uint64_t tpoint_mask); + +/** + * Get a mask of all tracepoint groups which have at least one tracepoint enabled. + * + * \return a mask of all tracepoint groups. + */ +uint64_t spdk_trace_get_tpoint_group_mask(void); + +/** + * For each tpoint group specified in the group mask, enable all of its tpoints. + * + * \param tpoint_group_mask Tpoint group mask that indicates which tpoints to enable. + */ +void spdk_trace_set_tpoint_group_mask(uint64_t tpoint_group_mask); + +/** + * For each tpoint group specified in the group mask, disable all of its tpoints. + * + * \param tpoint_group_mask Tpoint group mask that indicates which tpoints to disable. + */ +void spdk_trace_clear_tpoint_group_mask(uint64_t tpoint_group_mask); + +/** + * Initialize the trace environment. Debug tool can read the information from + * the given shared memory to post-process the tpoint entries and display in a + * human-readable format. + * + * \param shm_name Name of shared memory. + * \param num_entries Number of trace entries per lcore. + * \return 0 on success, else non-zero indicates a failure. + */ +int spdk_trace_init(const char *shm_name, uint64_t num_entries); + +/** + * Unmap global trace memory structs. + */ +void spdk_trace_cleanup(void); + +/** + * Initialize trace flags. + */ +void spdk_trace_flags_init(void); + +#define OWNER_NONE 0 +#define OBJECT_NONE 0 + +/** + * Register the trace owner. + * + * \param type Type of the trace owner. + * \param id_prefix Prefix of id for the trace owner. + */ +void spdk_trace_register_owner(uint8_t type, char id_prefix); + +/** + * Register the trace object. + * + * \param type Type of the trace object. + * \param id_prefix Prefix of id for the trace object. + */ +void spdk_trace_register_object(uint8_t type, char id_prefix); + +/** + * Register the description for the tpoint. + * + * \param name Name for the tpoint. + * \param tpoint_id Id for the tpoint. + * \param owner_type Owner type for the tpoint. + * \param object_type Object type for the tpoint. + * \param new_object New object for the tpoint. + * \param arg1_type Type of arg1. + * \param arg1_name Name of argument. + */ +void spdk_trace_register_description(const char *name, uint16_t tpoint_id, uint8_t owner_type, + uint8_t object_type, uint8_t new_object, + uint8_t arg1_type, const char *arg1_name); + +struct spdk_trace_register_fn *spdk_trace_get_first_register_fn(void); + +struct spdk_trace_register_fn *spdk_trace_get_next_register_fn(struct spdk_trace_register_fn + *register_fn); + +/** + * Enable trace on specific tpoint group + * + * \param group_name Name of group to enable, "all" for enabling all groups. + * \return 0 on success, else non-zero indicates a failure. + */ +int spdk_trace_enable_tpoint_group(const char *group_name); + +/** + * Disable trace on specific tpoint group + * + * \param group_name Name of group to disable, "all" for disabling all groups. + * \return 0 on success, else non-zero indicates a failure. + */ +int spdk_trace_disable_tpoint_group(const char *group_name); + +/** + * Show trace mask and its usage. + * + * \param f File to hold the mask's information. + * \param tmask_arg Command line option to set the trace group mask. + */ +void spdk_trace_mask_usage(FILE *f, const char *tmask_arg); + +struct spdk_trace_register_fn { + const char *name; + uint8_t tgroup_id; + void (*reg_fn)(void); + struct spdk_trace_register_fn *next; +}; + +/** + * Add new trace register function. + * + * \param reg_fn Trace register function to add. + */ +void spdk_trace_add_register_fn(struct spdk_trace_register_fn *reg_fn); + +#define SPDK_TRACE_REGISTER_FN(fn, name_str, _tgroup_id) \ + static void fn(void); \ + struct spdk_trace_register_fn reg_ ## fn = { \ + .name = name_str, \ + .tgroup_id = _tgroup_id, \ + .reg_fn = fn, \ + .next = NULL, \ + }; \ + __attribute__((constructor)) static void _ ## fn(void) \ + { \ + spdk_trace_add_register_fn(®_ ## fn); \ + } \ + static void fn(void) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/util.h b/src/spdk/include/spdk/util.h new file mode 100644 index 000000000..6358524fa --- /dev/null +++ b/src/spdk/include/spdk/util.h @@ -0,0 +1,190 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * General utility functions + */ + +#ifndef SPDK_UTIL_H +#define SPDK_UTIL_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define SPDK_CACHE_LINE_SIZE 64 + +#define spdk_min(a,b) (((a)<(b))?(a):(b)) +#define spdk_max(a,b) (((a)>(b))?(a):(b)) + +#define SPDK_COUNTOF(arr) (sizeof(arr) / sizeof((arr)[0])) + +#define SPDK_CONTAINEROF(ptr, type, member) ((type *)((uintptr_t)ptr - offsetof(type, member))) + +#define SPDK_SEC_TO_USEC 1000000ULL +#define SPDK_SEC_TO_NSEC 1000000000ULL + +/* Ceiling division of unsigned integers */ +#define SPDK_CEIL_DIV(x,y) (((x)+(y)-1)/(y)) + +/** + * Macro to align a value to a given power-of-two. The resultant value + * will be of the same type as the first parameter, and will be no + * bigger than the first parameter. Second parameter must be a + * power-of-two value. + */ +#define SPDK_ALIGN_FLOOR(val, align) \ + (typeof(val))((val) & (~((typeof(val))((align) - 1)))) +/** + * Macro to align a value to a given power-of-two. The resultant value + * will be of the same type as the first parameter, and will be no lower + * than the first parameter. Second parameter must be a power-of-two + * value. + */ +#define SPDK_ALIGN_CEIL(val, align) \ + SPDK_ALIGN_FLOOR(((val) + ((typeof(val)) (align) - 1)), align) + +uint32_t spdk_u32log2(uint32_t x); + +static inline uint32_t +spdk_align32pow2(uint32_t x) +{ + return 1u << (1 + spdk_u32log2(x - 1)); +} + +uint64_t spdk_u64log2(uint64_t x); + +static inline uint64_t +spdk_align64pow2(uint64_t x) +{ + return 1ULL << (1 + spdk_u64log2(x - 1)); +} + +/** + * Check if a uint32_t is a power of 2. + */ +static inline bool +spdk_u32_is_pow2(uint32_t x) +{ + if (x == 0) { + return false; + } + + return (x & (x - 1)) == 0; +} + +static inline uint64_t +spdk_divide_round_up(uint64_t num, uint64_t divisor) +{ + return (num + divisor - 1) / divisor; +} + +/** + * Copy the data described by the source iovec to the destination iovec. + * + * \return The number of bytes copied. + */ +size_t spdk_iovcpy(struct iovec *siov, size_t siovcnt, struct iovec *diov, size_t diovcnt); + + +/** + * Scan build is really pessimistic and assumes that mempool functions can + * dequeue NULL buffers even if they return success. This is obviously a false + * possitive, but the mempool dequeue can be done in a DPDK inline function that + * we can't decorate with usual assert(buf != NULL). Instead, we'll + * preinitialize the dequeued buffer array with some dummy objects. + */ +#define SPDK_CLANG_ANALYZER_PREINIT_PTR_ARRAY(arr, arr_size, buf_size) \ + do { \ + static char dummy_buf[buf_size]; \ + int i; \ + for (i = 0; i < arr_size; i++) { \ + arr[i] = (void *)dummy_buf; \ + } \ + } while (0) + +/** + * Add two sequece numbers s1 and s2 + * + * \param s1 First sequence number + * \param s2 Second sequence number + * + * \return Sum of s1 and s2 based on serial number arithmetic. + */ +static inline uint32_t +spdk_sn32_add(uint32_t s1, uint32_t s2) +{ + return (uint32_t)(s1 + s2); +} + +#define SPDK_SN32_CMPMAX (1U << (32 - 1)) + +/** + * Compare if sequence number s1 is less than s2. + * + * \param s1 First sequence number + * \param s2 Second sequence number + * + * \return true if s1 is less than s2, or false otherwise. + */ +static inline bool +spdk_sn32_lt(uint32_t s1, uint32_t s2) +{ + return (s1 != s2) && + ((s1 < s2 && s2 - s1 < SPDK_SN32_CMPMAX) || + (s1 > s2 && s1 - s2 > SPDK_SN32_CMPMAX)); +} + +/** + * Compare if sequence number s1 is greater than s2. + * + * \param s1 First sequence number + * \param s2 Second sequence number + * + * \return true if s1 is greater than s2, or false otherwise. + */ +static inline bool +spdk_sn32_gt(uint32_t s1, uint32_t s2) +{ + return (s1 != s2) && + ((s1 < s2 && s2 - s1 > SPDK_SN32_CMPMAX) || + (s1 > s2 && s1 - s2 < SPDK_SN32_CMPMAX)); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/uuid.h b/src/spdk/include/spdk/uuid.h new file mode 100644 index 000000000..820944e34 --- /dev/null +++ b/src/spdk/include/spdk/uuid.h @@ -0,0 +1,108 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * UUID types and functions + */ + +#ifndef SPDK_UUID_H +#define SPDK_UUID_H + +#include "spdk/stdinc.h" + +#include "spdk/assert.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct spdk_uuid { + union { + uint8_t raw[16]; + } u; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_uuid) == 16, "Incorrect size"); + +#define SPDK_UUID_STRING_LEN 37 /* 36 characters + null terminator */ + +/** + * Convert UUID in textual format into a spdk_uuid. + * + * \param[out] uuid User-provided UUID buffer. + * \param uuid_str UUID in textual format in C string. + * + * \return 0 on success, or negative errno on failure. + */ +int spdk_uuid_parse(struct spdk_uuid *uuid, const char *uuid_str); + +/** + * Convert UUID in spdk_uuid into lowercase textual format. + * + * \param uuid_str User-provided string buffer to write the textual format into. + * \param uuid_str_size Size of uuid_str buffer. Must be at least SPDK_UUID_STRING_LEN. + * \param uuid UUID to convert to textual format. + * + * \return 0 on success, or negative errno on failure. + */ +int spdk_uuid_fmt_lower(char *uuid_str, size_t uuid_str_size, const struct spdk_uuid *uuid); + +/** + * Compare two UUIDs. + * + * \param u1 UUID 1. + * \param u2 UUID 2. + * + * \return 0 if u1 == u2, less than 0 if u1 < u2, greater than 0 if u1 > u2. + */ +int spdk_uuid_compare(const struct spdk_uuid *u1, const struct spdk_uuid *u2); + +/** + * Generate a new UUID. + * + * \param[out] uuid User-provided UUID buffer to fill. + */ +void spdk_uuid_generate(struct spdk_uuid *uuid); + +/** + * Copy a UUID. + * + * \param src Source UUID to copy from. + * \param dst Destination UUID to store. + */ +void spdk_uuid_copy(struct spdk_uuid *dst, const struct spdk_uuid *src); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/include/spdk/version.h b/src/spdk/include/spdk/version.h new file mode 100644 index 000000000..4b8b5d561 --- /dev/null +++ b/src/spdk/include/spdk/version.h @@ -0,0 +1,119 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * SPDK version number definitions + */ + +#ifndef SPDK_VERSION_H +#define SPDK_VERSION_H + +/** + * Major version number (year of original release minus 2000). + */ +#define SPDK_VERSION_MAJOR 20 + +/** + * Minor version number (month of original release). + */ +#define SPDK_VERSION_MINOR 7 + +/** + * Patch level. + * + * Patch level is incremented on maintenance branch releases and reset to 0 for each + * new major.minor release. + */ +#define SPDK_VERSION_PATCH 0 + +/** + * Version string suffix. + */ +#define SPDK_VERSION_SUFFIX "" + +/** + * Single numeric value representing a version number for compile-time comparisons. + * + * Example usage: + * + * \code + * #if SPDK_VERSION >= SPDK_VERSION_NUM(17, 7, 0) + * Use feature from SPDK v17.07 + * #endif + * \endcode + */ +#define SPDK_VERSION_NUM(major, minor, patch) \ + (((major) * 100 + (minor)) * 100 + (patch)) + +/** + * Current version as a SPDK_VERSION_NUM. + */ +#define SPDK_VERSION SPDK_VERSION_NUM(SPDK_VERSION_MAJOR, SPDK_VERSION_MINOR, SPDK_VERSION_PATCH) + +#define SPDK_VERSION_STRINGIFY_x(x) #x +#define SPDK_VERSION_STRINGIFY(x) SPDK_VERSION_STRINGIFY_x(x) + +#define SPDK_VERSION_MAJOR_STRING SPDK_VERSION_STRINGIFY(SPDK_VERSION_MAJOR) + +#if SPDK_VERSION_MINOR < 10 +#define SPDK_VERSION_MINOR_STRING ".0" SPDK_VERSION_STRINGIFY(SPDK_VERSION_MINOR) +#else +#define SPDK_VERSION_MINOR_STRING "." SPDK_VERSION_STRINGIFY(SPDK_VERSION_MINOR) +#endif + +#if SPDK_VERSION_PATCH != 0 +#define SPDK_VERSION_PATCH_STRING "." SPDK_VERSION_STRINGIFY(SPDK_VERSION_PATCH) +#else +#define SPDK_VERSION_PATCH_STRING "" +#endif + +#ifdef SPDK_GIT_COMMIT +#define SPDK_GIT_COMMIT_STRING SPDK_VERSION_STRINGIFY(SPDK_GIT_COMMIT) +#define SPDK_GIT_COMMIT_STRING_SHA1 " git sha1 " SPDK_GIT_COMMIT_STRING +#else +#define SPDK_GIT_COMMIT_STRING "" +#define SPDK_GIT_COMMIT_STRING_SHA1 "" +#endif + +/** + * Human-readable version string. + */ +#define SPDK_VERSION_STRING \ + "SPDK v" \ + SPDK_VERSION_MAJOR_STRING \ + SPDK_VERSION_MINOR_STRING \ + SPDK_VERSION_PATCH_STRING \ + SPDK_VERSION_SUFFIX \ + SPDK_GIT_COMMIT_STRING_SHA1 + +#endif diff --git a/src/spdk/include/spdk/vhost.h b/src/spdk/include/spdk/vhost.h new file mode 100644 index 000000000..211c2d337 --- /dev/null +++ b/src/spdk/include/spdk/vhost.h @@ -0,0 +1,337 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * SPDK vhost + */ + +#ifndef SPDK_VHOST_H +#define SPDK_VHOST_H + +#include "spdk/stdinc.h" + +#include "spdk/cpuset.h" +#include "spdk/json.h" +#include "spdk/thread.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Callback for spdk_vhost_init(). + * + * \param rc 0 on success, negative errno on failure + */ +typedef void (*spdk_vhost_init_cb)(int rc); + +/** Callback for spdk_vhost_fini(). */ +typedef void (*spdk_vhost_fini_cb)(void); + +/** + * Set the path to the directory where vhost sockets will be created. + * + * This function must be called before spdk_vhost_init(). + * + * \param basename Path to vhost socket directory + * + * \return 0 on success, negative errno on error. + */ +int spdk_vhost_set_socket_path(const char *basename); + +/** + * Init vhost environment. + * + * \param init_cb Function to be called when the initialization is complete. + */ +void spdk_vhost_init(spdk_vhost_init_cb init_cb); + +/** + * Clean up the environment of vhost. + * + * \param fini_cb Function to be called when the cleanup is complete. + */ +void spdk_vhost_fini(spdk_vhost_fini_cb fini_cb); + + +/** + * Write vhost subsystem configuration into provided JSON context. + * + * \param w JSON write context + */ +void spdk_vhost_config_json(struct spdk_json_write_ctx *w); + +/** + * Deinit vhost application. This is called once by SPDK app layer. + */ +void spdk_vhost_shutdown_cb(void); + +/** + * SPDK vhost device (vdev). An equivalent of Virtio device. + * Both virtio-blk and virtio-scsi devices are represented by this + * struct. For virtio-scsi a single vhost device (also called SCSI + * controller) may contain multiple SCSI targets (devices), each of + * which may contain multiple logical units (SCSI LUNs). For now + * only one LUN per target is available. + * + * All vdev-changing functions operate directly on this object. + * Note that \c spdk_vhost_dev cannot be acquired. This object is + * only accessible as a callback parameter via \c + * spdk_vhost_call_external_event and it's derivatives. This ensures + * that all access to the vdev is piped through a single, + * thread-safe API. + */ +struct spdk_vhost_dev; + +/** + * Lock the global vhost mutex synchronizing all the vhost device accesses. + */ +void spdk_vhost_lock(void); + +/** + * Lock the global vhost mutex synchronizing all the vhost device accesses. + * + * \return 0 if the mutex could be locked immediately, negative errno otherwise. + */ +int spdk_vhost_trylock(void); + +/** + * Unlock the global vhost mutex. + */ +void spdk_vhost_unlock(void); + +/** + * Find a vhost device by name. + * + * \return vhost device or NULL + */ +struct spdk_vhost_dev *spdk_vhost_dev_find(const char *name); + +/** + * Get the next vhost device. If there's no more devices to iterate + * through, NULL will be returned. + * + * \param vdev vhost device. If NULL, this function will return the + * very first device. + * \return vdev vhost device or NULL + */ +struct spdk_vhost_dev *spdk_vhost_dev_next(struct spdk_vhost_dev *vdev); + +/** + * Synchronized vhost event used for user callbacks. + * + * \param vdev vhost device. + * \param arg user-provided parameter. + * + * \return 0 on success, -1 on failure. + */ +typedef int (*spdk_vhost_event_fn)(struct spdk_vhost_dev *vdev, void *arg); + +/** + * Get the name of the vhost device. This is equal to the filename + * of socket file. The name is constant throughout the lifetime of + * a vdev. + * + * \param vdev vhost device. + * + * \return name of the vdev. + */ +const char *spdk_vhost_dev_get_name(struct spdk_vhost_dev *vdev); + +/** + * Get cpuset of the vhost device. The cpuset is constant throughout the lifetime + * of a vdev. It is a subset of SPDK app cpuset vhost was started with. + * + * \param vdev vhost device. + * + * \return cpuset of the vdev. + */ +const struct spdk_cpuset *spdk_vhost_dev_get_cpumask(struct spdk_vhost_dev *vdev); + +/** + * By default, events are generated when asked, but for high queue depth and + * high IOPS this prove to be inefficient both for guest kernel that have to + * handle a lot more IO completions and for SPDK vhost that need to make more + * syscalls. If enabled, limit amount of events (IRQs) sent to initiator by SPDK + * vhost effectively coalescing couple of completions. This of cource introduce + * IO latency penalty proportional to event delay time. + * + * Actual events delay time when is calculated according to below formula: + * if (delay_base == 0 || IOPS < iops_threshold) { + * delay = 0; + * } else if (IOPS < iops_threshold) { + * delay = delay_base * (iops - iops_threshold) / iops_threshold; + * } + * + * \param vdev vhost device. + * \param delay_base_us Base delay time in microseconds. If 0, coalescing is disabled. + * \param iops_threshold IOPS threshold when coalescing is activated. + */ +int spdk_vhost_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us, + uint32_t iops_threshold); + +/** + * Get coalescing parameters. + * + * \see spdk_vhost_set_coalescing + * + * \param vdev vhost device. + * \param delay_base_us Optional pointer to store base delay time. + * \param iops_threshold Optional pointer to store IOPS threshold. + */ +void spdk_vhost_get_coalescing(struct spdk_vhost_dev *vdev, uint32_t *delay_base_us, + uint32_t *iops_threshold); + +/** + * Construct an empty vhost SCSI device. This will create a + * Unix domain socket together with a vhost-user slave server waiting + * for a connection on this socket. Creating the vdev does not + * start any I/O pollers and does not hog the CPU. I/O processing + * starts after receiving proper message on the created socket. + * See QEMU's vhost-user documentation for details. + * All physical devices have to be separately attached to this + * vdev via \c spdk_vhost_scsi_dev_add_tgt(). + * + * This function is thread-safe. + * + * \param name name of the vhost device. The name will also be used + * for socket name, which is exactly \c socket_base_dir/name + * \param cpumask string containing cpumask in hex. The leading *0x* + * is allowed but not required. The mask itself can be constructed as: + * ((1 << cpu0) | (1 << cpu1) | ... | (1 << cpuN)). + * + * \return 0 on success, negative errno on error. + */ +int spdk_vhost_scsi_dev_construct(const char *name, const char *cpumask); + +/** + * Construct and attach new SCSI target to the vhost SCSI device + * on given (unoccupied) slot. The device will be created with a single + * LUN0 associated with given SPDK bdev. Currently only one LUN per + * device is supported. + * + * If the vhost SCSI device has an active connection and has negotiated + * \c VIRTIO_SCSI_F_HOTPLUG feature, the new SCSI target should be + * automatically detected by the other side. + * + * \param vdev vhost SCSI device. + * \param scsi_tgt_num slot to attach to or negative value to use first free. + * \param bdev_name name of the SPDK bdev to associate with SCSI LUN0. + * + * \return value >= 0 on success - the SCSI target ID, negative errno code: + * -EINVAL - one of the arguments is invalid: + * - vdev is not vhost SCSI device + * - SCSI target ID is out of range + * - bdev name is NULL + * - can't create SCSI LUN because of other errors e.g.: bdev does not exist + * -ENOSPC - scsi_tgt_num is -1 and maximum targets in vhost SCSI device reached + * -EEXIST - SCSI target ID already exists + */ +int spdk_vhost_scsi_dev_add_tgt(struct spdk_vhost_dev *vdev, int scsi_tgt_num, + const char *bdev_name); + +/** + * Get SCSI target from vhost SCSI device on given slot. Max + * number of available slots is defined by. + * \c SPDK_VHOST_SCSI_CTRLR_MAX_DEVS. + * + * \param vdev vhost SCSI device. + * \param num slot id. + * + * \return SCSI device on given slot or NULL. + */ +struct spdk_scsi_dev *spdk_vhost_scsi_dev_get_tgt(struct spdk_vhost_dev *vdev, uint8_t num); + +/** + * Detach and destruct SCSI target from a vhost SCSI device. + * + * The device will be deleted after all pending I/O is finished. + * If the driver supports VIRTIO_SCSI_F_HOTPLUG, then a hotremove + * notification will be sent. + * + * \param vdev vhost SCSI device + * \param scsi_tgt_num slot id to delete target from + * \param cb_fn callback to be fired once target has been successfully + * deleted. The first parameter of callback function is the vhost SCSI + * device, the second is user provided argument *cb_arg*. + * \param cb_arg parameter to be passed to *cb_fn*. + * + * \return 0 on success, negative errno on error. + */ +int spdk_vhost_scsi_dev_remove_tgt(struct spdk_vhost_dev *vdev, unsigned scsi_tgt_num, + spdk_vhost_event_fn cb_fn, void *cb_arg); + +/** + * Construct a vhost blk device. This will create a Unix domain + * socket together with a vhost-user slave server waiting for a + * connection on this socket. Creating the vdev does not start + * any I/O pollers and does not hog the CPU. I/O processing starts + * after receiving proper message on the created socket. + * See QEMU's vhost-user documentation for details. Vhost blk + * device is tightly associated with given SPDK bdev. Given + * bdev can not be changed, unless it has been hotremoved. This + * would result in all I/O failing with virtio \c VIRTIO_BLK_S_IOERR + * error code. + * + * This function is thread-safe. + * + * \param name name of the vhost blk device. The name will also be + * used for socket name, which is exactly \c socket_base_dir/name + * \param cpumask string containing cpumask in hex. The leading *0x* + * is allowed but not required. The mask itself can be constructed as: + * ((1 << cpu0) | (1 << cpu1) | ... | (1 << cpuN)). + * \param dev_name bdev name to associate with this vhost device + * \param readonly if set, all writes to the device will fail with + * \c VIRTIO_BLK_S_IOERR error code. + * \param packed_ring this controller supports packed ring if set. + * + * \return 0 on success, negative errno on error. + */ +int spdk_vhost_blk_construct(const char *name, const char *cpumask, const char *dev_name, + bool readonly, bool packed_ring); + +/** + * Remove a vhost device. The device must not have any open connections on it's socket. + * + * \param vdev vhost blk device. + * + * \return 0 on success, negative errno on error. + */ +int spdk_vhost_dev_remove(struct spdk_vhost_dev *vdev); + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_VHOST_H */ diff --git a/src/spdk/include/spdk/vmd.h b/src/spdk/include/spdk/vmd.h new file mode 100644 index 000000000..100aa893c --- /dev/null +++ b/src/spdk/include/spdk/vmd.h @@ -0,0 +1,116 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * VMD driver public interface + */ + +#ifndef SPDK_VMD_H +#define SPDK_VMD_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "spdk/config.h" +#include "spdk/env.h" + +/* Maximum VMD devices - up to 6 per cpu */ +#define MAX_VMD_TARGET 24 + +/** + * Enumerate VMD devices and hook them into the spdk pci subsystem + * + * \return 0 on success, -1 on failure + */ +int spdk_vmd_init(void); + +/** + * Release any resources allocated by the VMD library via spdk_vmd_init(). + */ +void spdk_vmd_fini(void); + +/** + * Returns a list of nvme devices found on the given vmd pci BDF. + * + * \param vmd_addr pci BDF of the vmd device to return end device list + * \param nvme_list buffer of exactly MAX_VMD_TARGET to return spdk_pci_device array. + * + * \return Returns count of nvme device attached to input VMD. + */ +int spdk_vmd_pci_device_list(struct spdk_pci_addr vmd_addr, struct spdk_pci_device *nvme_list); + +/** State of the LEDs */ +enum spdk_vmd_led_state { + SPDK_VMD_LED_STATE_OFF, + SPDK_VMD_LED_STATE_IDENTIFY, + SPDK_VMD_LED_STATE_FAULT, + SPDK_VMD_LED_STATE_REBUILD, + SPDK_VMD_LED_STATE_UNKNOWN, +}; + +/** + * Sets the state of the LED on specified PCI device. The device needs to be behind VMD. + * + * \param pci_device PCI device + * \param state LED state to set + * + * \return 0 on success, negative errno otherwise + */ +int spdk_vmd_set_led_state(struct spdk_pci_device *pci_device, enum spdk_vmd_led_state state); + +/** + * Retrieves the state of the LED on specified PCI device. The device needs to be behind VMD. + * + * \param pci_device PCI device + * \param state current LED state + * + * \return 0 on success, negative errno otherwise + */ +int spdk_vmd_get_led_state(struct spdk_pci_device *pci_device, enum spdk_vmd_led_state *state); + +/** + * Checks for hotplug/hotremove events of the devices behind the VMD. Needs to be called + * periodically to detect them. + * + * \return number of hotplug events detected or negative errno in case of errors + */ +int spdk_vmd_hotplug_monitor(void); + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_VMD_H */ diff --git a/src/spdk/include/spdk_internal/accel_engine.h b/src/spdk/include/spdk_internal/accel_engine.h new file mode 100644 index 000000000..9b78bc967 --- /dev/null +++ b/src/spdk/include/spdk_internal/accel_engine.h @@ -0,0 +1,130 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_INTERNAL_ACCEL_ENGINE_H +#define SPDK_INTERNAL_ACCEL_ENGINE_H + +#include "spdk/stdinc.h" + +#include "spdk/accel_engine.h" +#include "spdk/queue.h" + +struct spdk_accel_task { + spdk_accel_completion_cb cb; + void *cb_arg; + uint8_t offload_ctx[0]; +}; + +struct spdk_accel_engine { + uint64_t (*get_capabilities)(void); + int (*copy)(struct spdk_io_channel *ch, void *dst, void *src, + uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg); + int (*dualcast)(struct spdk_io_channel *ch, void *dst1, void *dst2, void *src, + uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg); + uint32_t (*batch_get_max)(void); + struct spdk_accel_batch *(*batch_create)(struct spdk_io_channel *ch); + int (*batch_prep_copy)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, + void *dst, void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg); + int (*batch_prep_dualcast)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, + void *dst1, void *dst2, void *src, uint64_t nbytes, + spdk_accel_completion_cb cb_fn, void *cb_arg); + int (*batch_prep_compare)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, + void *src1, void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg); + int (*batch_prep_fill)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, + void *dst, uint8_t fill, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg); + int (*batch_prep_crc32c)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, + uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes, + spdk_accel_completion_cb cb_fn, void *cb_arg); + int (*batch_submit)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, + spdk_accel_completion_cb cb_fn, void *cb_arg); + int (*batch_cancel)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch); + int (*compare)(struct spdk_io_channel *ch, void *src1, void *src2, + uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg); + int (*fill)(struct spdk_io_channel *ch, void *dst, uint8_t fill, + uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg); + int (*crc32c)(struct spdk_io_channel *ch, uint32_t *dst, void *src, + uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg); + struct spdk_io_channel *(*get_io_channel)(void); +}; + +struct spdk_accel_module_if { + /** Initialization function for the module. Called by the spdk + * application during startup. + * + * Modules are required to define this function. + */ + int (*module_init)(void); + + /** Finish function for the module. Called by the spdk application + * before the spdk application exits to perform any necessary cleanup. + * + * Modules are not required to define this function. + */ + void (*module_fini)(void *ctx); + + /** Function called to return a text string representing the + * module's configuration options for inclusion in an + * spdk configuration file. + */ + void (*config_text)(FILE *fp); + + /** + * Write Acceleration module configuration into provided JSON context. + */ + void (*write_config_json)(struct spdk_json_write_ctx *w); + + /** + * Returns the allocation size required for the modules to use for context. + */ + size_t (*get_ctx_size)(void); + + TAILQ_ENTRY(spdk_accel_module_if) tailq; +}; + +void spdk_accel_hw_engine_register(struct spdk_accel_engine *accel_engine); +void spdk_accel_module_list_add(struct spdk_accel_module_if *accel_module); + +#define SPDK_ACCEL_MODULE_REGISTER(init_fn, fini_fn, config_fn, config_json, ctx_size_fn) \ + static struct spdk_accel_module_if init_fn ## _if = { \ + .module_init = init_fn, \ + .module_fini = fini_fn, \ + .config_text = config_fn, \ + .write_config_json = config_json, \ + .get_ctx_size = ctx_size_fn, \ + }; \ + __attribute__((constructor)) static void init_fn ## _init(void) \ + { \ + spdk_accel_module_list_add(&init_fn ## _if); \ + } + +#endif diff --git a/src/spdk/include/spdk_internal/assert.h b/src/spdk/include/spdk_internal/assert.h new file mode 100644 index 000000000..7e4c45070 --- /dev/null +++ b/src/spdk/include/spdk_internal/assert.h @@ -0,0 +1,55 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_INTERNAL_ASSERT_H +#define SPDK_INTERNAL_ASSERT_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "spdk/assert.h" + +#if !defined(DEBUG) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)) +#define SPDK_UNREACHABLE() __builtin_unreachable() +#else +#define SPDK_UNREACHABLE() abort() +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_INTERNAL_ASSERT_H */ diff --git a/src/spdk/include/spdk_internal/event.h b/src/spdk/include/spdk_internal/event.h new file mode 100644 index 000000000..2d88d08ba --- /dev/null +++ b/src/spdk/include/spdk_internal/event.h @@ -0,0 +1,197 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_INTERNAL_EVENT_H +#define SPDK_INTERNAL_EVENT_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "spdk/event.h" +#include "spdk/json.h" +#include "spdk/thread.h" +#include "spdk/util.h" + +struct spdk_event { + uint32_t lcore; + spdk_event_fn fn; + void *arg1; + void *arg2; +}; + +enum spdk_reactor_state { + SPDK_REACTOR_STATE_UNINITIALIZED = 0, + SPDK_REACTOR_STATE_INITIALIZED = 1, + SPDK_REACTOR_STATE_RUNNING = 2, + SPDK_REACTOR_STATE_EXITING = 3, + SPDK_REACTOR_STATE_SHUTDOWN = 4, +}; + +struct spdk_lw_thread { + TAILQ_ENTRY(spdk_lw_thread) link; + bool resched; + uint64_t tsc_start; +}; + +struct spdk_reactor { + /* Lightweight threads running on this reactor */ + TAILQ_HEAD(, spdk_lw_thread) threads; + uint32_t thread_count; + + /* Logical core number for this reactor. */ + uint32_t lcore; + + struct { + uint32_t is_valid : 1; + uint32_t reserved : 31; + } flags; + + uint64_t tsc_last; + + struct spdk_ring *events; + + /* The last known rusage values */ + struct rusage rusage; + uint64_t last_rusage; + + uint64_t busy_tsc; + uint64_t idle_tsc; +} __attribute__((aligned(SPDK_CACHE_LINE_SIZE))); + +int spdk_reactors_init(void); +void spdk_reactors_fini(void); + +void spdk_reactors_start(void); +void spdk_reactors_stop(void *arg1); + +struct spdk_reactor *spdk_reactor_get(uint32_t lcore); + +/** + * Allocate and pass an event to each reactor, serially. + * + * The allocated event is processed asynchronously - i.e. spdk_for_each_reactor + * will return prior to `fn` being called on each reactor. + * + * \param fn This is the function that will be called on each reactor. + * \param arg1 Argument will be passed to fn when called. + * \param arg2 Argument will be passed to fn when called. + * \param cpl This will be called on the originating reactor after `fn` has been + * called on each reactor. + */ +void spdk_for_each_reactor(spdk_event_fn fn, void *arg1, void *arg2, spdk_event_fn cpl); + +struct spdk_subsystem { + const char *name; + /* User must call spdk_subsystem_init_next() when they are done with their initialization. */ + void (*init)(void); + void (*fini)(void); + void (*config)(FILE *fp); + + /** + * Write JSON configuration handler. + * + * \param w JSON write context + */ + void (*write_config_json)(struct spdk_json_write_ctx *w); + TAILQ_ENTRY(spdk_subsystem) tailq; +}; + +struct spdk_subsystem *spdk_subsystem_find(const char *name); +struct spdk_subsystem *spdk_subsystem_get_first(void); +struct spdk_subsystem *spdk_subsystem_get_next(struct spdk_subsystem *cur_subsystem); + +struct spdk_subsystem_depend { + const char *name; + const char *depends_on; + TAILQ_ENTRY(spdk_subsystem_depend) tailq; +}; + +struct spdk_subsystem_depend *spdk_subsystem_get_first_depend(void); +struct spdk_subsystem_depend *spdk_subsystem_get_next_depend(struct spdk_subsystem_depend + *cur_depend); + +void spdk_add_subsystem(struct spdk_subsystem *subsystem); +void spdk_add_subsystem_depend(struct spdk_subsystem_depend *depend); + +typedef void (*spdk_subsystem_init_fn)(int rc, void *ctx); +void spdk_subsystem_init(spdk_subsystem_init_fn cb_fn, void *cb_arg); +void spdk_subsystem_fini(spdk_msg_fn cb_fn, void *cb_arg); +void spdk_subsystem_init_next(int rc); +void spdk_subsystem_fini_next(void); +void spdk_subsystem_config(FILE *fp); +void spdk_app_json_config_load(const char *json_config_file, const char *rpc_addr, + spdk_subsystem_init_fn cb_fn, void *cb_arg, + bool stop_on_error); + +/** + * Save pointed \c subsystem configuration to the JSON write context \c w. In case of + * error \c null is written to the JSON context. + * + * \param w JSON write context + * \param subsystem the subsystem to query + */ +void spdk_subsystem_config_json(struct spdk_json_write_ctx *w, struct spdk_subsystem *subsystem); + +void spdk_rpc_initialize(const char *listen_addr); +void spdk_rpc_finish(void); + +/** + * \brief Register a new subsystem + */ +#define SPDK_SUBSYSTEM_REGISTER(_name) \ + __attribute__((constructor)) static void _name ## _register(void) \ + { \ + spdk_add_subsystem(&_name); \ + } + +/** + * \brief Declare that a subsystem depends on another subsystem. + */ +#define SPDK_SUBSYSTEM_DEPEND(_name, _depends_on) \ + static struct spdk_subsystem_depend __subsystem_ ## _name ## _depend_on ## _depends_on = { \ + .name = #_name, \ + .depends_on = #_depends_on, \ + }; \ + __attribute__((constructor)) static void _name ## _depend_on ## _depends_on(void) \ + { \ + spdk_add_subsystem_depend(&__subsystem_ ## _name ## _depend_on ## _depends_on); \ + } + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_INTERNAL_EVENT_H */ diff --git a/src/spdk/include/spdk_internal/idxd.h b/src/spdk/include/spdk_internal/idxd.h new file mode 100644 index 000000000..17db2405d --- /dev/null +++ b/src/spdk/include/spdk_internal/idxd.h @@ -0,0 +1,74 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __IDXD_INTERNAL_H__ +#define __IDXD_INTERNAL_H__ + +#include "spdk/stdinc.h" + +#include "spdk/idxd.h" +#include "spdk/queue.h" +#include "spdk/mmio.h" +#include "spdk/bit_array.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define IDXD_MAX_CONFIG_NUM 1 + +enum dsa_opcode { + IDXD_OPCODE_NOOP = 0, + IDXD_OPCODE_BATCH = 1, + IDXD_OPCODE_DRAIN = 2, + IDXD_OPCODE_MEMMOVE = 3, + IDXD_OPCODE_MEMFILL = 4, + IDXD_OPCODE_COMPARE = 5, + IDXD_OPCODE_COMPVAL = 6, + IDXD_OPCODE_CR_DELTA = 7, + IDXD_OPCODE_AP_DELTA = 8, + IDXD_OPCODE_DUALCAST = 9, + IDXD_OPCODE_CRC32C_GEN = 16, + IDXD_OPCODE_COPY_CRC = 17, + IDXD_OPCODE_DIF_CHECK = 18, + IDXD_OPCODE_DIF_INS = 19, + IDXD_OPCODE_DIF_STRP = 20, + IDXD_OPCODE_DIF_UPDT = 21, + IDXD_OPCODE_CFLUSH = 32, +}; + +#ifdef __cplusplus +} +#endif + +#endif /* __IDXD_INTERNAL_H__ */ diff --git a/src/spdk/include/spdk_internal/log.h b/src/spdk/include/spdk_internal/log.h new file mode 100644 index 000000000..0993d1016 --- /dev/null +++ b/src/spdk/include/spdk_internal/log.h @@ -0,0 +1,108 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * Logging interfaces + */ + +#ifndef SPDK_INTERNAL_LOG_H +#define SPDK_INTERNAL_LOG_H + +#include "spdk/log.h" +#include "spdk/queue.h" + +extern enum spdk_log_level g_spdk_log_level; +extern enum spdk_log_level g_spdk_log_print_level; +extern enum spdk_log_level g_spdk_log_backtrace_level; + +struct spdk_log_flag { + TAILQ_ENTRY(spdk_log_flag) tailq; + const char *name; + bool enabled; +}; + +void spdk_log_register_flag(const char *name, struct spdk_log_flag *flag); + +struct spdk_log_flag *spdk_log_get_first_flag(void); +struct spdk_log_flag *spdk_log_get_next_flag(struct spdk_log_flag *flag); + +#define SPDK_LOG_REGISTER_COMPONENT(str, flag) \ +struct spdk_log_flag flag = { \ + .enabled = false, \ + .name = str, \ +}; \ +__attribute__((constructor)) static void register_flag_##flag(void) \ +{ \ + spdk_log_register_flag(str, &flag); \ +} + +#define SPDK_INFOLOG(FLAG, ...) \ + do { \ + extern struct spdk_log_flag FLAG; \ + if (FLAG.enabled) { \ + spdk_log(SPDK_LOG_INFO, __FILE__, __LINE__, __func__, __VA_ARGS__); \ + } \ + } while (0) + +#ifdef DEBUG + +#define SPDK_DEBUGLOG(FLAG, ...) \ + do { \ + extern struct spdk_log_flag FLAG; \ + if (FLAG.enabled) { \ + spdk_log(SPDK_LOG_DEBUG, __FILE__, __LINE__, __func__, __VA_ARGS__); \ + } \ + } while (0) + +#define SPDK_LOGDUMP(FLAG, LABEL, BUF, LEN) \ + do { \ + extern struct spdk_log_flag FLAG; \ + if ((FLAG.enabled) && (LEN)) { \ + spdk_log_dump(stderr, (LABEL), (BUF), (LEN)); \ + } \ + } while (0) + +#else +#define SPDK_DEBUGLOG(...) do { } while (0) +#define SPDK_LOGDUMP(...) do { } while (0) +#endif + +#define SPDK_ERRLOGDUMP(LABEL, BUF, LEN) \ + do { \ + if ((LEN)) { \ + spdk_log_dump(stderr, (LABEL), (BUF), (LEN)); \ + } \ + } while (0) + +#endif /* SPDK_INTERNAL_LOG_H */ diff --git a/src/spdk/include/spdk_internal/lvolstore.h b/src/spdk/include/spdk_internal/lvolstore.h new file mode 100644 index 000000000..f82157e53 --- /dev/null +++ b/src/spdk/include/spdk_internal/lvolstore.h @@ -0,0 +1,128 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_INTERNAL_LVOLSTORE_H +#define SPDK_INTERNAL_LVOLSTORE_H + +#include "spdk/blob.h" +#include "spdk/lvol.h" +#include "spdk/uuid.h" +#include "spdk/bdev_module.h" + +/* Default size of blobstore cluster */ +#define SPDK_LVS_OPTS_CLUSTER_SZ (4 * 1024 * 1024) + +/* UUID + '_' + blobid (20 characters for uint64_t). + * Null terminator is already included in SPDK_UUID_STRING_LEN. */ +#define SPDK_LVOL_UNIQUE_ID_MAX (SPDK_UUID_STRING_LEN + 1 + 20) + +struct spdk_lvs_req { + spdk_lvs_op_complete cb_fn; + void *cb_arg; + struct spdk_lvol_store *lvol_store; + int lvserrno; +}; + +struct spdk_lvol_req { + spdk_lvol_op_complete cb_fn; + void *cb_arg; + struct spdk_lvol *lvol; + size_t sz; + struct spdk_io_channel *channel; + char name[SPDK_LVOL_NAME_MAX]; +}; + +struct spdk_lvs_with_handle_req { + spdk_lvs_op_with_handle_complete cb_fn; + void *cb_arg; + struct spdk_lvol_store *lvol_store; + struct spdk_bs_dev *bs_dev; + struct spdk_bdev *base_bdev; + int lvserrno; +}; + +struct spdk_lvs_destroy_req { + spdk_lvs_op_complete cb_fn; + void *cb_arg; + struct spdk_lvol_store *lvs; +}; + +struct spdk_lvol_with_handle_req { + spdk_lvol_op_with_handle_complete cb_fn; + void *cb_arg; + struct spdk_lvol *lvol; +}; + +struct spdk_lvol_store { + struct spdk_bs_dev *bs_dev; + struct spdk_blob_store *blobstore; + struct spdk_blob *super_blob; + spdk_blob_id super_blob_id; + struct spdk_uuid uuid; + int lvol_count; + int lvols_opened; + bool destruct; + TAILQ_HEAD(, spdk_lvol) lvols; + TAILQ_HEAD(, spdk_lvol) pending_lvols; + bool on_list; + TAILQ_ENTRY(spdk_lvol_store) link; + char name[SPDK_LVS_NAME_MAX]; + char new_name[SPDK_LVS_NAME_MAX]; +}; + +struct spdk_lvol { + struct spdk_lvol_store *lvol_store; + struct spdk_blob *blob; + spdk_blob_id blob_id; + char unique_id[SPDK_LVOL_UNIQUE_ID_MAX]; + char name[SPDK_LVOL_NAME_MAX]; + struct spdk_uuid uuid; + char uuid_str[SPDK_UUID_STRING_LEN]; + bool thin_provision; + struct spdk_bdev *bdev; + int ref_count; + bool action_in_progress; + enum blob_clear_method clear_method; + TAILQ_ENTRY(spdk_lvol) link; +}; + +struct lvol_store_bdev *vbdev_lvol_store_first(void); +struct lvol_store_bdev *vbdev_lvol_store_next(struct lvol_store_bdev *prev); + +void spdk_lvol_resize(struct spdk_lvol *lvol, uint64_t sz, spdk_lvol_op_complete cb_fn, + void *cb_arg); + +void spdk_lvol_set_read_only(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, + void *cb_arg); + +#endif /* SPDK_INTERNAL_LVOLSTORE_H */ diff --git a/src/spdk/include/spdk_internal/mock.h b/src/spdk/include/spdk_internal/mock.h new file mode 100644 index 000000000..8de44ae55 --- /dev/null +++ b/src/spdk/include/spdk_internal/mock.h @@ -0,0 +1,135 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_INTERNAL_MOCK_H +#define SPDK_INTERNAL_MOCK_H + +#include "spdk/stdinc.h" + +#define MOCK_STRUCT_INIT(...) \ + { __VA_ARGS__ } + +#define DEFINE_RETURN_MOCK(fn, ret) \ + bool ut_ ## fn ## _mocked = false; \ + ret ut_ ## fn + +/* + * For controlling mocked function behavior, setting + * and getting values from the stub, the _P macros are + * for mocking functions that return pointer values. + */ +#define MOCK_SET(fn, val) \ + ut_ ## fn ## _mocked = true; \ + ut_ ## fn = val + +#define MOCK_GET(fn) \ + ut_ ## fn + +#define MOCK_CLEAR(fn) \ + ut_ ## fn ## _mocked = false + +#define MOCK_CLEAR_P(fn) \ + ut_ ## fn ## _mocked = false; \ + ut_ ## fn = NULL + +/* for proving to *certain* static analysis tools that we didn't reset the mock function. */ +#define MOCK_CLEARED_ASSERT(fn) \ + SPDK_CU_ASSERT_FATAL(ut_ ## fn ## _mocked == false) + +/* for declaring function protoypes for wrappers */ +#define DECLARE_WRAPPER(fn, ret, args) \ + extern bool ut_ ## fn ## _mocked; \ + extern ret ut_ ## fn; \ + ret __wrap_ ## fn args; ret __real_ ## fn args + +/* for defining the implmentation of wrappers for syscalls */ +#define DEFINE_WRAPPER(fn, ret, dargs, pargs) \ + DEFINE_RETURN_MOCK(fn, ret); \ + __attribute__((used)) ret __wrap_ ## fn dargs \ + { \ + if (!ut_ ## fn ## _mocked) { \ + return __real_ ## fn pargs; \ + } else { \ + return MOCK_GET(fn); \ + } \ + } + +/* DEFINE_STUB is for defining the implmentation of stubs for SPDK funcs. */ +#define DEFINE_STUB(fn, ret, dargs, val) \ + bool ut_ ## fn ## _mocked = true; \ + ret ut_ ## fn = val; \ + ret fn dargs; \ + ret fn dargs \ + { \ + return MOCK_GET(fn); \ + } + +/* DEFINE_STUB_V macro is for stubs that don't have a return value */ +#define DEFINE_STUB_V(fn, dargs) \ + void fn dargs; \ + void fn dargs \ + { \ + } + +#define HANDLE_RETURN_MOCK(fn) \ + if (ut_ ## fn ## _mocked) { \ + return ut_ ## fn; \ + } + + +/* declare wrapper protos (alphabetically please) here */ +DECLARE_WRAPPER(calloc, void *, (size_t nmemb, size_t size)); + +DECLARE_WRAPPER(pthread_mutex_init, int, + (pthread_mutex_t *mtx, const pthread_mutexattr_t *attr)); + +DECLARE_WRAPPER(pthread_mutexattr_init, int, + (pthread_mutexattr_t *attr)); + +DECLARE_WRAPPER(recvmsg, ssize_t, (int sockfd, struct msghdr *msg, int flags)); + +DECLARE_WRAPPER(sendmsg, ssize_t, (int sockfd, const struct msghdr *msg, int flags)); + +DECLARE_WRAPPER(writev, ssize_t, (int fd, const struct iovec *iov, int iovcnt)); + +/* unlink is done a bit differently. */ +extern char *g_unlink_path; +extern void (*g_unlink_callback)(void); +/* If g_unlink_path is NULL, __wrap_unlink will return ENOENT. + * If the __wrap_unlink() parameter does not match g_unlink_path, it will return ENOENT. + * If g_unlink_path does match, and g_unlink_callback has been set, g_unlink_callback will + * be called before returning 0. + */ +int __wrap_unlink(const char *path); + +#endif /* SPDK_INTERNAL_MOCK_H */ diff --git a/src/spdk/include/spdk_internal/nvme_tcp.h b/src/spdk/include/spdk_internal/nvme_tcp.h new file mode 100644 index 000000000..7065bc060 --- /dev/null +++ b/src/spdk/include/spdk_internal/nvme_tcp.h @@ -0,0 +1,633 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_INTERNAL_NVME_TCP_H +#define SPDK_INTERNAL_NVME_TCP_H + +#include "spdk/likely.h" +#include "spdk/sock.h" +#include "spdk/dif.h" + +#define SPDK_CRC32C_XOR 0xffffffffUL +#define SPDK_NVME_TCP_DIGEST_LEN 4 +#define SPDK_NVME_TCP_DIGEST_ALIGNMENT 4 +#define SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT 30 +#define SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 8 + +/* + * Maximum number of SGL elements. + */ +#define NVME_TCP_MAX_SGL_DESCRIPTORS (16) + +#define MAKE_DIGEST_WORD(BUF, CRC32C) \ + ( ((*((uint8_t *)(BUF)+0)) = (uint8_t)((uint32_t)(CRC32C) >> 0)), \ + ((*((uint8_t *)(BUF)+1)) = (uint8_t)((uint32_t)(CRC32C) >> 8)), \ + ((*((uint8_t *)(BUF)+2)) = (uint8_t)((uint32_t)(CRC32C) >> 16)), \ + ((*((uint8_t *)(BUF)+3)) = (uint8_t)((uint32_t)(CRC32C) >> 24))) + +#define MATCH_DIGEST_WORD(BUF, CRC32C) \ + ( ((((uint32_t) *((uint8_t *)(BUF)+0)) << 0) \ + | (((uint32_t) *((uint8_t *)(BUF)+1)) << 8) \ + | (((uint32_t) *((uint8_t *)(BUF)+2)) << 16) \ + | (((uint32_t) *((uint8_t *)(BUF)+3)) << 24)) \ + == (CRC32C)) + +#define DGET32(B) \ + ((( (uint32_t) *((uint8_t *)(B)+0)) << 0) \ + | (((uint32_t) *((uint8_t *)(B)+1)) << 8) \ + | (((uint32_t) *((uint8_t *)(B)+2)) << 16) \ + | (((uint32_t) *((uint8_t *)(B)+3)) << 24)) + +#define DSET32(B,D) \ + (((*((uint8_t *)(B)+0)) = (uint8_t)((uint32_t)(D) >> 0)), \ + ((*((uint8_t *)(B)+1)) = (uint8_t)((uint32_t)(D) >> 8)), \ + ((*((uint8_t *)(B)+2)) = (uint8_t)((uint32_t)(D) >> 16)), \ + ((*((uint8_t *)(B)+3)) = (uint8_t)((uint32_t)(D) >> 24))) + +typedef void (*nvme_tcp_qpair_xfer_complete_cb)(void *cb_arg); + +struct _nvme_tcp_sgl { + struct iovec *iov; + int iovcnt; + uint32_t iov_offset; + uint32_t total_size; +}; + +struct nvme_tcp_pdu { + union { + /* to hold error pdu data */ + uint8_t raw[SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE]; + struct spdk_nvme_tcp_common_pdu_hdr common; + struct spdk_nvme_tcp_ic_req ic_req; + struct spdk_nvme_tcp_term_req_hdr term_req; + struct spdk_nvme_tcp_cmd capsule_cmd; + struct spdk_nvme_tcp_h2c_data_hdr h2c_data; + struct spdk_nvme_tcp_ic_resp ic_resp; + struct spdk_nvme_tcp_rsp capsule_resp; + struct spdk_nvme_tcp_c2h_data_hdr c2h_data; + struct spdk_nvme_tcp_r2t_hdr r2t; + + } hdr; + + bool has_hdgst; + bool ddgst_enable; + uint8_t data_digest[SPDK_NVME_TCP_DIGEST_LEN]; + + uint8_t ch_valid_bytes; + uint8_t psh_valid_bytes; + uint8_t psh_len; + + nvme_tcp_qpair_xfer_complete_cb cb_fn; + void *cb_arg; + + /* The sock request ends with a 0 length iovec. Place the actual iovec immediately + * after it. There is a static assert below to check if the compiler inserted + * any unwanted padding */ + struct spdk_sock_request sock_req; + struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS * 2]; + + struct iovec data_iov[NVME_TCP_MAX_SGL_DESCRIPTORS]; + uint32_t data_iovcnt; + uint32_t data_len; + + uint32_t readv_offset; + TAILQ_ENTRY(nvme_tcp_pdu) tailq; + uint32_t remaining; + uint32_t padding_len; + struct _nvme_tcp_sgl sgl; + + struct spdk_dif_ctx *dif_ctx; + + void *req; /* data tied to a tcp request */ + void *qpair; +}; +SPDK_STATIC_ASSERT(offsetof(struct nvme_tcp_pdu, + sock_req) + sizeof(struct spdk_sock_request) == offsetof(struct nvme_tcp_pdu, iov), + "Compiler inserted padding between iov and sock_req"); + +enum nvme_tcp_pdu_recv_state { + /* Ready to wait for PDU */ + NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY, + + /* Active tqpair waiting for any PDU common header */ + NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH, + + /* Active tqpair waiting for any PDU specific header */ + NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH, + + /* Active tqpair waiting for a tcp request, only use in target side */ + NVME_TCP_PDU_RECV_STATE_AWAIT_REQ, + + /* Active tqpair waiting for payload */ + NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD, + + /* Active tqpair does not wait for payload */ + NVME_TCP_PDU_RECV_STATE_ERROR, +}; + +enum nvme_tcp_error_codes { + NVME_TCP_PDU_IN_PROGRESS = 0, + NVME_TCP_CONNECTION_FATAL = -1, + NVME_TCP_PDU_FATAL = -2, +}; + +enum nvme_tcp_qpair_state { + NVME_TCP_QPAIR_STATE_INVALID = 0, + NVME_TCP_QPAIR_STATE_INITIALIZING = 1, + NVME_TCP_QPAIR_STATE_RUNNING = 2, + NVME_TCP_QPAIR_STATE_EXITING = 3, + NVME_TCP_QPAIR_STATE_EXITED = 4, +}; + +static const bool g_nvme_tcp_hdgst[] = { + [SPDK_NVME_TCP_PDU_TYPE_IC_REQ] = false, + [SPDK_NVME_TCP_PDU_TYPE_IC_RESP] = false, + [SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ] = false, + [SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ] = false, + [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD] = true, + [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP] = true, + [SPDK_NVME_TCP_PDU_TYPE_H2C_DATA] = true, + [SPDK_NVME_TCP_PDU_TYPE_C2H_DATA] = true, + [SPDK_NVME_TCP_PDU_TYPE_R2T] = true +}; + +static const bool g_nvme_tcp_ddgst[] = { + [SPDK_NVME_TCP_PDU_TYPE_IC_REQ] = false, + [SPDK_NVME_TCP_PDU_TYPE_IC_RESP] = false, + [SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ] = false, + [SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ] = false, + [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD] = true, + [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP] = false, + [SPDK_NVME_TCP_PDU_TYPE_H2C_DATA] = true, + [SPDK_NVME_TCP_PDU_TYPE_C2H_DATA] = true, + [SPDK_NVME_TCP_PDU_TYPE_R2T] = false +}; + +static uint32_t +nvme_tcp_pdu_calc_header_digest(struct nvme_tcp_pdu *pdu) +{ + uint32_t crc32c; + uint32_t hlen = pdu->hdr.common.hlen; + + crc32c = spdk_crc32c_update(&pdu->hdr.raw, hlen, ~0); + crc32c = crc32c ^ SPDK_CRC32C_XOR; + return crc32c; +} + +static uint32_t +_update_crc32c_iov(struct iovec *iov, int iovcnt, uint32_t crc32c) +{ + int i; + + for (i = 0; i < iovcnt; i++) { + assert(iov[i].iov_base != NULL); + assert(iov[i].iov_len != 0); + crc32c = spdk_crc32c_update(iov[i].iov_base, iov[i].iov_len, crc32c); + } + + return crc32c; +} + +static uint32_t +nvme_tcp_pdu_calc_data_digest(struct nvme_tcp_pdu *pdu) +{ + uint32_t crc32c = SPDK_CRC32C_XOR; + uint32_t mod; + + assert(pdu->data_len != 0); + + if (spdk_likely(!pdu->dif_ctx)) { + crc32c = _update_crc32c_iov(pdu->data_iov, pdu->data_iovcnt, crc32c); + } else { + spdk_dif_update_crc32c_stream(pdu->data_iov, pdu->data_iovcnt, + 0, pdu->data_len, &crc32c, pdu->dif_ctx); + } + + mod = pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT; + if (mod != 0) { + uint32_t pad_length = SPDK_NVME_TCP_DIGEST_ALIGNMENT - mod; + uint8_t pad[3] = {0, 0, 0}; + + assert(pad_length > 0); + assert(pad_length <= sizeof(pad)); + crc32c = spdk_crc32c_update(pad, pad_length, crc32c); + } + crc32c = crc32c ^ SPDK_CRC32C_XOR; + return crc32c; +} + +static inline void +_nvme_tcp_sgl_init(struct _nvme_tcp_sgl *s, struct iovec *iov, int iovcnt, + uint32_t iov_offset) +{ + s->iov = iov; + s->iovcnt = iovcnt; + s->iov_offset = iov_offset; + s->total_size = 0; +} + +static inline void +_nvme_tcp_sgl_advance(struct _nvme_tcp_sgl *s, uint32_t step) +{ + s->iov_offset += step; + while (s->iovcnt > 0) { + if (s->iov_offset < s->iov->iov_len) { + break; + } + + s->iov_offset -= s->iov->iov_len; + s->iov++; + s->iovcnt--; + } +} + +static inline void +_nvme_tcp_sgl_get_buf(struct _nvme_tcp_sgl *s, void **_buf, uint32_t *_buf_len) +{ + if (_buf != NULL) { + *_buf = s->iov->iov_base + s->iov_offset; + } + if (_buf_len != NULL) { + *_buf_len = s->iov->iov_len - s->iov_offset; + } +} + +static inline bool +_nvme_tcp_sgl_append(struct _nvme_tcp_sgl *s, uint8_t *data, uint32_t data_len) +{ + if (s->iov_offset >= data_len) { + s->iov_offset -= data_len; + } else { + assert(s->iovcnt > 0); + s->iov->iov_base = data + s->iov_offset; + s->iov->iov_len = data_len - s->iov_offset; + s->total_size += data_len - s->iov_offset; + s->iov_offset = 0; + s->iov++; + s->iovcnt--; + if (s->iovcnt == 0) { + return false; + } + } + + return true; +} + +static inline bool +_nvme_tcp_sgl_append_multi(struct _nvme_tcp_sgl *s, struct iovec *iov, int iovcnt) +{ + int i; + + for (i = 0; i < iovcnt; i++) { + if (!_nvme_tcp_sgl_append(s, iov[i].iov_base, iov[i].iov_len)) { + return false; + } + } + + return true; +} + +static inline uint32_t +_get_iov_array_size(struct iovec *iov, int iovcnt) +{ + int i; + uint32_t size = 0; + + for (i = 0; i < iovcnt; i++) { + size += iov[i].iov_len; + } + + return size; +} + +static inline bool +_nvme_tcp_sgl_append_multi_with_md(struct _nvme_tcp_sgl *s, struct iovec *iov, int iovcnt, + uint32_t data_len, const struct spdk_dif_ctx *dif_ctx) +{ + int rc; + uint32_t mapped_len = 0; + + if (s->iov_offset >= data_len) { + s->iov_offset -= _get_iov_array_size(iov, iovcnt); + } else { + rc = spdk_dif_set_md_interleave_iovs(s->iov, s->iovcnt, iov, iovcnt, + s->iov_offset, data_len - s->iov_offset, + &mapped_len, dif_ctx); + if (rc < 0) { + SPDK_ERRLOG("Failed to setup iovs for DIF insert/strip.\n"); + return false; + } + + s->total_size += mapped_len; + s->iov_offset = 0; + assert(s->iovcnt >= rc); + s->iovcnt -= rc; + s->iov += rc; + + if (s->iovcnt == 0) { + return false; + } + } + + return true; +} + +static int +nvme_tcp_build_iovs(struct iovec *iov, int iovcnt, struct nvme_tcp_pdu *pdu, + bool hdgst_enable, bool ddgst_enable, uint32_t *_mapped_length) +{ + uint32_t hlen, plen; + struct _nvme_tcp_sgl *sgl; + + if (iovcnt == 0) { + return 0; + } + + sgl = &pdu->sgl; + _nvme_tcp_sgl_init(sgl, iov, iovcnt, 0); + hlen = pdu->hdr.common.hlen; + + /* Header Digest */ + if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && hdgst_enable) { + hlen += SPDK_NVME_TCP_DIGEST_LEN; + } + + plen = hlen; + if (!pdu->data_len) { + /* PDU header + possible header digest */ + _nvme_tcp_sgl_append(sgl, (uint8_t *)&pdu->hdr.raw, hlen); + goto end; + } + + /* Padding */ + if (pdu->padding_len > 0) { + hlen += pdu->padding_len; + plen = hlen; + } + + if (!_nvme_tcp_sgl_append(sgl, (uint8_t *)&pdu->hdr.raw, hlen)) { + goto end; + } + + /* Data Segment */ + plen += pdu->data_len; + if (spdk_likely(!pdu->dif_ctx)) { + if (!_nvme_tcp_sgl_append_multi(sgl, pdu->data_iov, pdu->data_iovcnt)) { + goto end; + } + } else { + if (!_nvme_tcp_sgl_append_multi_with_md(sgl, pdu->data_iov, pdu->data_iovcnt, + pdu->data_len, pdu->dif_ctx)) { + goto end; + } + } + + /* Data Digest */ + if (g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && ddgst_enable) { + plen += SPDK_NVME_TCP_DIGEST_LEN; + _nvme_tcp_sgl_append(sgl, pdu->data_digest, SPDK_NVME_TCP_DIGEST_LEN); + } + + assert(plen == pdu->hdr.common.plen); + +end: + if (_mapped_length != NULL) { + *_mapped_length = sgl->total_size; + } + + return iovcnt - sgl->iovcnt; +} + +static int +nvme_tcp_build_payload_iovs(struct iovec *iov, int iovcnt, struct nvme_tcp_pdu *pdu, + bool ddgst_enable, uint32_t *_mapped_length) +{ + struct _nvme_tcp_sgl *sgl; + + if (iovcnt == 0) { + return 0; + } + + sgl = &pdu->sgl; + _nvme_tcp_sgl_init(sgl, iov, iovcnt, pdu->readv_offset); + + if (spdk_likely(!pdu->dif_ctx)) { + if (!_nvme_tcp_sgl_append_multi(sgl, pdu->data_iov, pdu->data_iovcnt)) { + goto end; + } + } else { + if (!_nvme_tcp_sgl_append_multi_with_md(sgl, pdu->data_iov, pdu->data_iovcnt, + pdu->data_len, pdu->dif_ctx)) { + goto end; + } + } + + /* Data Digest */ + if (ddgst_enable) { + _nvme_tcp_sgl_append(sgl, pdu->data_digest, SPDK_NVME_TCP_DIGEST_LEN); + } + +end: + if (_mapped_length != NULL) { + *_mapped_length = sgl->total_size; + } + return iovcnt - sgl->iovcnt; +} + +static int +nvme_tcp_read_data(struct spdk_sock *sock, int bytes, + void *buf) +{ + int ret; + + ret = spdk_sock_recv(sock, buf, bytes); + + if (ret > 0) { + return ret; + } + + if (ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + return 0; + } + + /* For connect reset issue, do not output error log */ + if (errno != ECONNRESET) { + SPDK_ERRLOG("spdk_sock_recv() failed, errno %d: %s\n", + errno, spdk_strerror(errno)); + } + } + + /* connection closed */ + return NVME_TCP_CONNECTION_FATAL; +} + +static int +nvme_tcp_readv_data(struct spdk_sock *sock, struct iovec *iov, int iovcnt) +{ + int ret; + + assert(sock != NULL); + if (iov == NULL || iovcnt == 0) { + return 0; + } + + if (iovcnt == 1) { + return nvme_tcp_read_data(sock, iov->iov_len, iov->iov_base); + } + + ret = spdk_sock_readv(sock, iov, iovcnt); + + if (ret > 0) { + return ret; + } + + if (ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + return 0; + } + + /* For connect reset issue, do not output error log */ + if (errno != ECONNRESET) { + SPDK_ERRLOG("spdk_sock_readv() failed, errno %d: %s\n", + errno, spdk_strerror(errno)); + } + } + + /* connection closed */ + return NVME_TCP_CONNECTION_FATAL; +} + + +static int +nvme_tcp_read_payload_data(struct spdk_sock *sock, struct nvme_tcp_pdu *pdu) +{ + struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS + 1]; + int iovcnt; + + iovcnt = nvme_tcp_build_payload_iovs(iov, NVME_TCP_MAX_SGL_DESCRIPTORS + 1, pdu, + pdu->ddgst_enable, NULL); + assert(iovcnt >= 0); + + return nvme_tcp_readv_data(sock, iov, iovcnt); +} + +static void +_nvme_tcp_pdu_set_data(struct nvme_tcp_pdu *pdu, void *data, uint32_t data_len) +{ + pdu->data_iov[0].iov_base = data; + pdu->data_iov[0].iov_len = data_len; + pdu->data_iovcnt = 1; +} + +static void +nvme_tcp_pdu_set_data(struct nvme_tcp_pdu *pdu, void *data, uint32_t data_len) +{ + _nvme_tcp_pdu_set_data(pdu, data, data_len); + pdu->data_len = data_len; +} + +static void +nvme_tcp_pdu_set_data_buf(struct nvme_tcp_pdu *pdu, + struct iovec *iov, int iovcnt, + uint32_t data_offset, uint32_t data_len) +{ + uint32_t buf_offset, buf_len, remain_len, len; + uint8_t *buf; + struct _nvme_tcp_sgl *pdu_sgl, buf_sgl; + + pdu->data_len = data_len; + + if (spdk_likely(!pdu->dif_ctx)) { + buf_offset = data_offset; + buf_len = data_len; + } else { + spdk_dif_ctx_set_data_offset(pdu->dif_ctx, data_offset); + spdk_dif_get_range_with_md(data_offset, data_len, + &buf_offset, &buf_len, pdu->dif_ctx); + } + + if (iovcnt == 1) { + _nvme_tcp_pdu_set_data(pdu, (void *)((uint64_t)iov[0].iov_base + buf_offset), buf_len); + } else { + pdu_sgl = &pdu->sgl; + + _nvme_tcp_sgl_init(pdu_sgl, pdu->data_iov, NVME_TCP_MAX_SGL_DESCRIPTORS, 0); + _nvme_tcp_sgl_init(&buf_sgl, iov, iovcnt, 0); + + _nvme_tcp_sgl_advance(&buf_sgl, buf_offset); + remain_len = buf_len; + + while (remain_len > 0) { + _nvme_tcp_sgl_get_buf(&buf_sgl, (void *)&buf, &len); + len = spdk_min(len, remain_len); + + _nvme_tcp_sgl_advance(&buf_sgl, len); + remain_len -= len; + + if (!_nvme_tcp_sgl_append(pdu_sgl, buf, len)) { + break; + } + } + + assert(remain_len == 0); + assert(pdu_sgl->total_size == buf_len); + + pdu->data_iovcnt = NVME_TCP_MAX_SGL_DESCRIPTORS - pdu_sgl->iovcnt; + } +} + +static void +nvme_tcp_pdu_calc_psh_len(struct nvme_tcp_pdu *pdu, bool hdgst_enable) +{ + uint8_t psh_len, pdo, padding_len; + + psh_len = pdu->hdr.common.hlen; + + if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && hdgst_enable) { + pdu->has_hdgst = true; + psh_len += SPDK_NVME_TCP_DIGEST_LEN; + if (pdu->hdr.common.plen > psh_len) { + pdo = pdu->hdr.common.pdo; + padding_len = pdo - psh_len; + if (padding_len > 0) { + psh_len = pdo; + } + } + } + + psh_len -= sizeof(struct spdk_nvme_tcp_common_pdu_hdr); + pdu->psh_len = psh_len; +} + +#endif /* SPDK_INTERNAL_NVME_TCP_H */ diff --git a/src/spdk/include/spdk_internal/rdma.h b/src/spdk/include/spdk_internal/rdma.h new file mode 100644 index 000000000..4a6d5104b --- /dev/null +++ b/src/spdk/include/spdk_internal/rdma.h @@ -0,0 +1,117 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_RDMA_H +#define SPDK_RDMA_H + +#include <infiniband/verbs.h> +#include <rdma/rdma_cma.h> +#include <rdma/rdma_verbs.h> + +struct spdk_rdma_qp_init_attr { + void *qp_context; + struct ibv_cq *send_cq; + struct ibv_cq *recv_cq; + struct ibv_srq *srq; + struct ibv_qp_cap cap; + struct ibv_pd *pd; +}; + +struct spdk_rdma_send_wr_list { + struct ibv_send_wr *first; + struct ibv_send_wr *last; +}; + +struct spdk_rdma_qp { + struct ibv_qp *qp; + struct rdma_cm_id *cm_id; + struct spdk_rdma_send_wr_list send_wrs; +}; + +/** + * Create RDMA provider specific qpair + * \param cm_id Pointer to RDMACM cm_id + * \param qp_attr Pointer to qpair init attributes + * \return Pointer to a newly created qpair on success or NULL on failure + */ +struct spdk_rdma_qp *spdk_rdma_qp_create(struct rdma_cm_id *cm_id, + struct spdk_rdma_qp_init_attr *qp_attr); + +/** + * Accept a connection request. Called by the passive side (NVMEoF target) + * \param spdk_rdma_qp Pointer to a qpair + * \param conn_param Optional information needed to establish the connection + * \return 0 on success, errno on failure + */ +int spdk_rdma_qp_accept(struct spdk_rdma_qp *spdk_rdma_qp, struct rdma_conn_param *conn_param); + +/** + * Complete the connection process, must be called by the active + * side (NVMEoF initiator) upon receipt RDMA_CM_EVENT_CONNECT_RESPONSE + * \param spdk_rdma_qp Pointer to a qpair + * \return 0 on success, errno on failure + */ +int spdk_rdma_qp_complete_connect(struct spdk_rdma_qp *spdk_rdma_qp); + +/** + * Destroy RDMA provider specific qpair + * \param spdk_rdma_qp Pointer to qpair to be destroyed + */ +void spdk_rdma_qp_destroy(struct spdk_rdma_qp *spdk_rdma_qp); + +/** + * Disconnect a connection and transition assoiciated qpair to error state. + * Generates RDMA_CM_EVENT_DISCONNECTED on both connection sides + * \param spdk_rdma_qp Pointer to qpair to be destroyed + */ +int spdk_rdma_qp_disconnect(struct spdk_rdma_qp *spdk_rdma_qp); + +/** + * Append the given send wr structure to the qpair's outstanding sends list. + * This function accepts either a single Work Request or the first WR in a linked list. + * + * \param spdk_rdma_qp Pointer to SPDK RDMA qpair + * \param first Pointer to the first Work Request + * \return true if there were no outstanding WRs before, false otherwise + */ +bool spdk_rdma_qp_queue_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr *first); + +/** + * Submit all queued Work Request + * \param spdk_rdma_qp Pointer to SPDK RDMA qpair + * \param bad_wr Stores a pointer to the first failed WR if this function return nonzero value + * \return 0 on succes, errno on failure + */ +int spdk_rdma_qp_flush_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr **bad_wr); + +#endif /* SPDK_RDMA_H */ diff --git a/src/spdk/include/spdk_internal/sock.h b/src/spdk/include/spdk_internal/sock.h new file mode 100644 index 000000000..d88d6bd03 --- /dev/null +++ b/src/spdk/include/spdk_internal/sock.h @@ -0,0 +1,227 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * TCP network implementation abstraction layer + */ + +#ifndef SPDK_INTERNAL_SOCK_H +#define SPDK_INTERNAL_SOCK_H + +#include "spdk/stdinc.h" +#include "spdk/sock.h" +#include "spdk/queue.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_EVENTS_PER_POLL 32 +#define DEFAULT_SOCK_PRIORITY 0 +#define MIN_SOCK_PIPE_SIZE 1024 + +struct spdk_sock { + struct spdk_net_impl *net_impl; + struct spdk_sock_opts opts; + int cb_cnt; + spdk_sock_cb cb_fn; + void *cb_arg; + struct spdk_sock_group_impl *group_impl; + TAILQ_ENTRY(spdk_sock) link; + + int max_iovcnt; + TAILQ_HEAD(, spdk_sock_request) queued_reqs; + TAILQ_HEAD(, spdk_sock_request) pending_reqs; + int queued_iovcnt; + + struct { + uint8_t closed : 1; + uint8_t reserved : 7; + } flags; +}; + +struct spdk_sock_group { + STAILQ_HEAD(, spdk_sock_group_impl) group_impls; + void *ctx; +}; + +struct spdk_sock_group_impl { + struct spdk_net_impl *net_impl; + TAILQ_HEAD(, spdk_sock) socks; + STAILQ_ENTRY(spdk_sock_group_impl) link; + /* List of removed sockets. refreshed each time we poll the sock group. */ + int num_removed_socks; + /* Unfortunately, we can't just keep a tailq of the sockets in case they are freed + * or added to another poll group later. + */ + uintptr_t removed_socks[MAX_EVENTS_PER_POLL]; +}; + +struct spdk_net_impl { + const char *name; + int priority; + + int (*getaddr)(struct spdk_sock *sock, char *saddr, int slen, uint16_t *sport, char *caddr, + int clen, uint16_t *cport); + struct spdk_sock *(*connect)(const char *ip, int port, struct spdk_sock_opts *opts); + struct spdk_sock *(*listen)(const char *ip, int port, struct spdk_sock_opts *opts); + struct spdk_sock *(*accept)(struct spdk_sock *sock); + int (*close)(struct spdk_sock *sock); + ssize_t (*recv)(struct spdk_sock *sock, void *buf, size_t len); + ssize_t (*readv)(struct spdk_sock *sock, struct iovec *iov, int iovcnt); + ssize_t (*writev)(struct spdk_sock *sock, struct iovec *iov, int iovcnt); + + void (*writev_async)(struct spdk_sock *sock, struct spdk_sock_request *req); + int (*flush)(struct spdk_sock *sock); + + int (*set_recvlowat)(struct spdk_sock *sock, int nbytes); + int (*set_recvbuf)(struct spdk_sock *sock, int sz); + int (*set_sendbuf)(struct spdk_sock *sock, int sz); + + bool (*is_ipv6)(struct spdk_sock *sock); + bool (*is_ipv4)(struct spdk_sock *sock); + bool (*is_connected)(struct spdk_sock *sock); + + int (*get_placement_id)(struct spdk_sock *sock, int *placement_id); + struct spdk_sock_group_impl *(*group_impl_create)(void); + int (*group_impl_add_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock); + int (*group_impl_remove_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock); + int (*group_impl_poll)(struct spdk_sock_group_impl *group, int max_events, + struct spdk_sock **socks); + int (*group_impl_close)(struct spdk_sock_group_impl *group); + + int (*get_opts)(struct spdk_sock_impl_opts *opts, size_t *len); + int (*set_opts)(const struct spdk_sock_impl_opts *opts, size_t len); + + STAILQ_ENTRY(spdk_net_impl) link; +}; + +void spdk_net_impl_register(struct spdk_net_impl *impl, int priority); + +#define SPDK_NET_IMPL_REGISTER(name, impl, priority) \ +static void __attribute__((constructor)) net_impl_register_##name(void) \ +{ \ + spdk_net_impl_register(impl, priority); \ +} + +static inline void +spdk_sock_request_queue(struct spdk_sock *sock, struct spdk_sock_request *req) +{ + TAILQ_INSERT_TAIL(&sock->queued_reqs, req, internal.link); + sock->queued_iovcnt += req->iovcnt; +} + +static inline void +spdk_sock_request_pend(struct spdk_sock *sock, struct spdk_sock_request *req) +{ + TAILQ_REMOVE(&sock->queued_reqs, req, internal.link); + assert(sock->queued_iovcnt >= req->iovcnt); + sock->queued_iovcnt -= req->iovcnt; + TAILQ_INSERT_TAIL(&sock->pending_reqs, req, internal.link); +} + +static inline int +spdk_sock_request_put(struct spdk_sock *sock, struct spdk_sock_request *req, int err) +{ + bool closed; + int rc = 0; + + TAILQ_REMOVE(&sock->pending_reqs, req, internal.link); + + req->internal.offset = 0; + + closed = sock->flags.closed; + sock->cb_cnt++; + req->cb_fn(req->cb_arg, err); + assert(sock->cb_cnt > 0); + sock->cb_cnt--; + + if (sock->cb_cnt == 0 && !closed && sock->flags.closed) { + /* The user closed the socket in response to a callback above. */ + rc = -1; + spdk_sock_close(&sock); + } + + return rc; +} + +static inline int +spdk_sock_abort_requests(struct spdk_sock *sock) +{ + struct spdk_sock_request *req; + bool closed; + int rc = 0; + + closed = sock->flags.closed; + sock->cb_cnt++; + + req = TAILQ_FIRST(&sock->pending_reqs); + while (req) { + TAILQ_REMOVE(&sock->pending_reqs, req, internal.link); + + req->cb_fn(req->cb_arg, -ECANCELED); + + req = TAILQ_FIRST(&sock->pending_reqs); + } + + req = TAILQ_FIRST(&sock->queued_reqs); + while (req) { + TAILQ_REMOVE(&sock->queued_reqs, req, internal.link); + + assert(sock->queued_iovcnt >= req->iovcnt); + sock->queued_iovcnt -= req->iovcnt; + + req->cb_fn(req->cb_arg, -ECANCELED); + + req = TAILQ_FIRST(&sock->queued_reqs); + } + assert(sock->cb_cnt > 0); + sock->cb_cnt--; + + assert(TAILQ_EMPTY(&sock->queued_reqs)); + assert(TAILQ_EMPTY(&sock->pending_reqs)); + + if (sock->cb_cnt == 0 && !closed && sock->flags.closed) { + /* The user closed the socket in response to a callback above. */ + rc = -1; + spdk_sock_close(&sock); + } + + return rc; +} + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_INTERNAL_SOCK_H */ diff --git a/src/spdk/include/spdk_internal/thread.h b/src/spdk/include/spdk_internal/thread.h new file mode 100644 index 000000000..10bc4824c --- /dev/null +++ b/src/spdk/include/spdk_internal/thread.h @@ -0,0 +1,136 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_THREAD_INTERNAL_H_ +#define SPDK_THREAD_INTERNAL_H_ + +#include "spdk/stdinc.h" +#include "spdk/thread.h" + +#define SPDK_MAX_POLLER_NAME_LEN 256 +#define SPDK_MAX_THREAD_NAME_LEN 256 + +enum spdk_poller_state { + /* The poller is registered with a thread but not currently executing its fn. */ + SPDK_POLLER_STATE_WAITING, + + /* The poller is currently running its fn. */ + SPDK_POLLER_STATE_RUNNING, + + /* The poller was unregistered during the execution of its fn. */ + SPDK_POLLER_STATE_UNREGISTERED, + + /* The poller is in the process of being paused. It will be paused + * during the next time it's supposed to be executed. + */ + SPDK_POLLER_STATE_PAUSING, + + /* The poller is registered but currently paused. It's on the + * paused_pollers list. + */ + SPDK_POLLER_STATE_PAUSED, +}; + +struct spdk_poller { + TAILQ_ENTRY(spdk_poller) tailq; + + /* Current state of the poller; should only be accessed from the poller's thread. */ + enum spdk_poller_state state; + + uint64_t period_ticks; + uint64_t next_run_tick; + uint64_t run_count; + uint64_t busy_count; + spdk_poller_fn fn; + void *arg; + struct spdk_thread *thread; + + char name[SPDK_MAX_POLLER_NAME_LEN + 1]; +}; + +enum spdk_thread_state { + /* The thread is pocessing poller and message by spdk_thread_poll(). */ + SPDK_THREAD_STATE_RUNNING, + + /* The thread is in the process of termination. It reaps unregistering + * poller are releasing I/O channel. + */ + SPDK_THREAD_STATE_EXITING, + + /* The thread is exited. It is ready to call spdk_thread_destroy(). */ + SPDK_THREAD_STATE_EXITED, +}; + +struct spdk_thread { + uint64_t tsc_last; + struct spdk_thread_stats stats; + /* + * Contains pollers actively running on this thread. Pollers + * are run round-robin. The thread takes one poller from the head + * of the ring, executes it, then puts it back at the tail of + * the ring. + */ + TAILQ_HEAD(active_pollers_head, spdk_poller) active_pollers; + /** + * Contains pollers running on this thread with a periodic timer. + */ + TAILQ_HEAD(timed_pollers_head, spdk_poller) timed_pollers; + /* + * Contains paused pollers. Pollers on this queue are waiting until + * they are resumed (in which case they're put onto the active/timer + * queues) or unregistered. + */ + TAILQ_HEAD(paused_pollers_head, spdk_poller) paused_pollers; + struct spdk_ring *messages; + SLIST_HEAD(, spdk_msg) msg_cache; + size_t msg_cache_count; + spdk_msg_fn critical_msg; + uint64_t id; + enum spdk_thread_state state; + + TAILQ_HEAD(, spdk_io_channel) io_channels; + TAILQ_ENTRY(spdk_thread) tailq; + + char name[SPDK_MAX_THREAD_NAME_LEN + 1]; + struct spdk_cpuset cpumask; + uint64_t exit_timeout_tsc; + + /* User context allocated at the end */ + uint8_t ctx[0]; +}; + +const char *spdk_poller_state_str(enum spdk_poller_state state); + +const char *spdk_io_device_get_name(struct io_device *dev); + +#endif /* SPDK_THREAD_INTERNAL_H_ */ diff --git a/src/spdk/include/spdk_internal/uring.h b/src/spdk/include/spdk_internal/uring.h new file mode 100644 index 000000000..ff22f11d4 --- /dev/null +++ b/src/spdk/include/spdk_internal/uring.h @@ -0,0 +1,51 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_INTERNAL_URING_H +#define SPDK_INTERNAL_URING_H + +#include <liburing.h> + +#ifndef __NR_sys_io_uring_enter +#define __NR_sys_io_uring_enter 426 +#endif + +static int +spdk_io_uring_enter(int ring_fd, unsigned int to_submit, + unsigned int min_complete, unsigned int flags) +{ + return syscall(__NR_sys_io_uring_enter, ring_fd, to_submit, + min_complete, flags, NULL, 0); +} + +#endif /* SPDK_INTERNAL_URING_H */ diff --git a/src/spdk/include/spdk_internal/utf.h b/src/spdk/include/spdk_internal/utf.h new file mode 100644 index 000000000..b2b1c3c45 --- /dev/null +++ b/src/spdk/include/spdk_internal/utf.h @@ -0,0 +1,325 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_UTF_H_ +#define SPDK_UTF_H_ + +#include "spdk/stdinc.h" + +#include "spdk/endian.h" +#include "spdk/likely.h" +#include "spdk/string.h" + +static inline bool +utf8_tail(uint8_t c) +{ + /* c >= 0x80 && c <= 0xBF, or binary 01xxxxxx */ + return (c & 0xC0) == 0x80; +} + +/* + * Check for a valid UTF-8 encoding of a single codepoint. + * + * \return Length of valid UTF-8 byte sequence, or negative if invalid. + */ +static inline int +utf8_valid(const uint8_t *start, const uint8_t *end) +{ + const uint8_t *p = start; + uint8_t b0, b1, b2, b3; + + if (p == end) { + return 0; + } + + b0 = *p; + + if (b0 <= 0x7F) { + return 1; + } + + if (b0 <= 0xC1) { + /* Invalid start byte */ + return -1; + } + + if (++p == end) { + /* Not enough bytes left */ + return -1; + } + b1 = *p; + + if (b0 <= 0xDF) { + /* C2..DF 80..BF */ + if (!utf8_tail(b1)) { + return -1; + } + return 2; + } + + if (++p == end) { + /* Not enough bytes left */ + return -1; + } + b2 = *p; + + if (b0 == 0xE0) { + /* E0 A0..BF 80..BF */ + if (b1 < 0xA0 || b1 > 0xBF || !utf8_tail(b2)) { + return -1; + } + return 3; + } else if (b0 == 0xED && b1 >= 0xA0) { + /* + * UTF-16 surrogate pairs use U+D800..U+DFFF, which would be encoded as + * ED A0..BF 80..BF in UTF-8; however, surrogate pairs are not allowed in UTF-8. + */ + return -1; + } else if (b0 <= 0xEF) { + /* E1..EF 80..BF 80..BF */ + if (!utf8_tail(b1) || !utf8_tail(b2)) { + return -1; + } + return 3; + } + + if (++p == end) { + /* Not enough bytes left */ + return -1; + } + b3 = *p; + + if (b0 == 0xF0) { + /* F0 90..BF 80..BF 80..BF */ + if (b1 < 0x90 || b1 > 0xBF || !utf8_tail(b2) || !utf8_tail(b3)) { + return -1; + } + return 4; + } else if (b0 <= 0xF3) { + /* F1..F3 80..BF 80..BF 80..BF */ + if (!utf8_tail(b1) || !utf8_tail(b2) || !utf8_tail(b3)) { + return -1; + } + return 4; + } else if (b0 == 0xF4) { + /* F4 80..8F 80..BF 80..BF */ + if (b1 < 0x80 || b1 > 0x8F || !utf8_tail(b2) || !utf8_tail(b3)) { + return -1; + } + return 4; + } + + return -1; +} + +static inline uint32_t +utf8_decode_unsafe_1(const uint8_t *data) +{ + return data[0]; +} + +static inline uint32_t +utf8_decode_unsafe_2(const uint8_t *data) +{ + uint32_t codepoint; + + codepoint = ((data[0] & 0x1F) << 6); + codepoint |= (data[1] & 0x3F); + + return codepoint; +} + +static inline uint32_t +utf8_decode_unsafe_3(const uint8_t *data) +{ + uint32_t codepoint; + + codepoint = ((data[0] & 0x0F) << 12); + codepoint |= (data[1] & 0x3F) << 6; + codepoint |= (data[2] & 0x3F); + + return codepoint; +} + +static inline uint32_t +utf8_decode_unsafe_4(const uint8_t *data) +{ + uint32_t codepoint; + + codepoint = ((data[0] & 0x07) << 18); + codepoint |= (data[1] & 0x3F) << 12; + codepoint |= (data[2] & 0x3F) << 6; + codepoint |= (data[3] & 0x3F); + + return codepoint; +} + +/* + * Encode a single Unicode codepoint as UTF-8. + * + * buf must have at least 4 bytes of space available (hence unsafe). + * + * \return Number of bytes appended to buf, or negative if encoding failed. + */ +static inline int +utf8_encode_unsafe(uint8_t *buf, uint32_t c) +{ + if (c <= 0x7F) { + buf[0] = c; + return 1; + } else if (c <= 0x7FF) { + buf[0] = 0xC0 | (c >> 6); + buf[1] = 0x80 | (c & 0x3F); + return 2; + } else if (c >= 0xD800 && c <= 0xDFFF) { + /* UTF-16 surrogate pairs - invalid in UTF-8 */ + return -1; + } else if (c <= 0xFFFF) { + buf[0] = 0xE0 | (c >> 12); + buf[1] = 0x80 | ((c >> 6) & 0x3F); + buf[2] = 0x80 | (c & 0x3F); + return 3; + } else if (c <= 0x10FFFF) { + buf[0] = 0xF0 | (c >> 18); + buf[1] = 0x80 | ((c >> 12) & 0x3F); + buf[2] = 0x80 | ((c >> 6) & 0x3F); + buf[3] = 0x80 | (c & 0x3F); + return 4; + } + return -1; +} + +static inline int +utf8_codepoint_len(uint32_t c) +{ + if (c <= 0x7F) { + return 1; + } else if (c <= 0x7FF) { + return 2; + } else if (c >= 0xD800 && c <= 0xDFFF) { + /* UTF-16 surrogate pairs - invalid in UTF-8 */ + return -1; + } else if (c <= 0xFFFF) { + return 3; + } else if (c <= 0x10FFFF) { + return 4; + } + return -1; +} + +static inline bool +utf16_valid_surrogate_high(uint32_t val) +{ + return val >= 0xD800 && val <= 0xDBFF; +} + +static inline bool +utf16_valid_surrogate_low(uint32_t val) +{ + return val >= 0xDC00 && val <= 0xDFFF; +} + +/* + * Check for a valid UTF-16LE encoding of a single codepoint. + * + * \return Length of valid UTF-16LE sequence in 16-bit code units, or negative if invalid. + */ +static inline int +utf16le_valid(const uint16_t *start, const uint16_t *end) +{ + const uint16_t *p = start; + uint16_t high, low; + + if (p == end) { + return 0; + } + + high = from_le16(p); + + if (high <= 0xD7FF || high >= 0xE000) { + /* Single code unit in BMP */ + return 1; + } + + if (high >= 0xDC00) { + /* Low surrogate in first code unit - invalid */ + return -1; + } + + assert(utf16_valid_surrogate_high(high)); + + if (++p == end) { + /* Not enough code units left */ + return -1; + } + low = from_le16(p); + + if (!utf16_valid_surrogate_low(low)) { + return -1; + } + + /* Valid surrogate pair */ + return 2; +} + +static inline uint32_t +utf16_decode_surrogate_pair(uint32_t high, uint32_t low) +{ + uint32_t codepoint; + + assert(utf16_valid_surrogate_high(high)); + assert(utf16_valid_surrogate_low(low)); + + codepoint = low; + codepoint &= 0x3FF; + codepoint |= ((high & 0x3FF) << 10); + codepoint += 0x10000; + + return codepoint; +} + +static inline void +utf16_encode_surrogate_pair(uint32_t codepoint, uint16_t *high, uint16_t *low) +{ + assert(codepoint >= 0x10000); + assert(codepoint <= 0x10FFFF); + + codepoint -= 0x10000; + *high = 0xD800 | (codepoint >> 10); + *low = 0xDC00 | (codepoint & 0x3FF); + + assert(utf16_valid_surrogate_high(*high)); + assert(utf16_valid_surrogate_low(*low)); +} + +#endif diff --git a/src/spdk/include/spdk_internal/vhost_user.h b/src/spdk/include/spdk_internal/vhost_user.h new file mode 100644 index 000000000..92ed3b65b --- /dev/null +++ b/src/spdk/include/spdk_internal/vhost_user.h @@ -0,0 +1,140 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Structures defined in the vhost-user specification + */ + +#ifndef SPDK_VHOST_USER_H +#define SPDK_VHOST_USER_H + +#include "spdk/stdinc.h" + +#include <linux/vhost.h> + +#ifndef VHOST_USER_MEMORY_MAX_NREGIONS +#define VHOST_USER_MEMORY_MAX_NREGIONS 8 +#endif + +#ifndef VHOST_USER_MAX_CONFIG_SIZE +#define VHOST_USER_MAX_CONFIG_SIZE 256 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_MQ +#define VHOST_USER_PROTOCOL_F_MQ 0 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_CONFIG +#define VHOST_USER_PROTOCOL_F_CONFIG 9 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD +#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12 +#endif + +#ifndef VHOST_USER_F_PROTOCOL_FEATURES +#define VHOST_USER_F_PROTOCOL_FEATURES 30 +#endif + +enum vhost_user_request { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_GET_QUEUE_NUM = 17, + VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_SEND_RARP = 19, + VHOST_USER_NET_SET_MTU = 20, + VHOST_USER_SET_SLAVE_REQ_FD = 21, + VHOST_USER_IOTLB_MSG = 22, + VHOST_USER_GET_CONFIG = 24, + VHOST_USER_SET_CONFIG = 25, + VHOST_USER_CRYPTO_CREATE_SESS = 26, + VHOST_USER_CRYPTO_CLOSE_SESS = 27, + VHOST_USER_POSTCOPY_ADVISE = 28, + VHOST_USER_POSTCOPY_LISTEN = 29, + VHOST_USER_POSTCOPY_END = 30, + VHOST_USER_MAX +}; + +/** Get/set config msg payload */ +struct vhost_user_config { + uint32_t offset; + uint32_t size; + uint32_t flags; + uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; +}; + +/** Fixed-size vhost_memory struct */ +struct vhost_memory_padded { + uint32_t nregions; + uint32_t padding; + struct vhost_memory_region regions[VHOST_USER_MEMORY_MAX_NREGIONS]; +}; + +struct vhost_user_msg { + enum vhost_user_request request; + +#define VHOST_USER_VERSION_MASK 0x3 +#define VHOST_USER_REPLY_MASK (0x1 << 2) + uint32_t flags; + uint32_t size; /**< the following payload size */ + union { +#define VHOST_USER_VRING_IDX_MASK 0xff +#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + struct vhost_memory_padded memory; + struct vhost_user_config cfg; + } payload; +} __attribute((packed)); + +#define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64) +#define VHOST_USER_PAYLOAD_SIZE \ + (sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE) + +#endif /* SPDK_VHOST_USER_H */ diff --git a/src/spdk/include/spdk_internal/virtio.h b/src/spdk/include/spdk_internal/virtio.h new file mode 100644 index 000000000..c30013efe --- /dev/null +++ b/src/spdk/include/spdk_internal/virtio.h @@ -0,0 +1,486 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_VIRTIO_H +#define SPDK_VIRTIO_H + +#include "spdk/stdinc.h" + +#include <linux/virtio_ring.h> +#include <linux/virtio_pci.h> +#include <linux/virtio_config.h> + +#include "spdk_internal/log.h" +#include "spdk/likely.h" +#include "spdk/queue.h" +#include "spdk/json.h" +#include "spdk/thread.h" +#include "spdk/pci_ids.h" +#include "spdk/env.h" + +/** + * The maximum virtqueue size is 2^15. Use that value as the end of + * descriptor chain terminator since it will never be a valid index + * in the descriptor table. This is used to verify we are correctly + * handling vq_free_cnt. + */ +#define VQ_RING_DESC_CHAIN_END 32768 + +#define SPDK_VIRTIO_MAX_VIRTQUEUES 0x100 + +/* Extra status define for readability */ +#define VIRTIO_CONFIG_S_RESET 0 + +struct virtio_dev_ops; + +struct virtio_dev { + struct virtqueue **vqs; + + /** Name of this virtio dev set by backend */ + char *name; + + /** Fixed number of backend-specific non-I/O virtqueues. */ + uint16_t fixed_queues_num; + + /** Max number of virtqueues the host supports. */ + uint16_t max_queues; + + /** Common device & guest features. */ + uint64_t negotiated_features; + + int is_hw; + + /** Modern/legacy virtio device flag. */ + uint8_t modern; + + /** Mutex for asynchronous virtqueue-changing operations. */ + pthread_mutex_t mutex; + + /** Backend-specific callbacks. */ + const struct virtio_dev_ops *backend_ops; + + /** Context for the backend ops */ + void *ctx; +}; + +struct virtio_dev_ops { + int (*read_dev_cfg)(struct virtio_dev *hw, size_t offset, + void *dst, int len); + int (*write_dev_cfg)(struct virtio_dev *hw, size_t offset, + const void *src, int len); + uint8_t (*get_status)(struct virtio_dev *hw); + void (*set_status)(struct virtio_dev *hw, uint8_t status); + + /** + * Get device features. The features might be already + * negotiated with driver (guest) features. + */ + uint64_t (*get_features)(struct virtio_dev *vdev); + + /** + * Negotiate and set device features. + * The negotiation can fail with return code -1. + * This function should also set vdev->negotiated_features field. + */ + int (*set_features)(struct virtio_dev *vdev, uint64_t features); + + /** Destruct virtio device */ + void (*destruct_dev)(struct virtio_dev *vdev); + + uint16_t (*get_queue_size)(struct virtio_dev *vdev, uint16_t queue_id); + int (*setup_queue)(struct virtio_dev *hw, struct virtqueue *vq); + void (*del_queue)(struct virtio_dev *hw, struct virtqueue *vq); + void (*notify_queue)(struct virtio_dev *hw, struct virtqueue *vq); + + void (*dump_json_info)(struct virtio_dev *hw, struct spdk_json_write_ctx *w); + void (*write_json_config)(struct virtio_dev *hw, struct spdk_json_write_ctx *w); +}; + +struct vq_desc_extra { + void *cookie; + uint16_t ndescs; +}; + +struct virtqueue { + struct virtio_dev *vdev; /**< owner of this virtqueue */ + struct vring vq_ring; /**< vring keeping desc, used and avail */ + /** + * Last consumed descriptor in the used table, + * trails vq_ring.used->idx. + */ + uint16_t vq_used_cons_idx; + uint16_t vq_nentries; /**< vring desc numbers */ + uint16_t vq_free_cnt; /**< num of desc available */ + uint16_t vq_avail_idx; /**< sync until needed */ + + void *vq_ring_virt_mem; /**< virtual address of vring */ + unsigned int vq_ring_size; + + uint64_t vq_ring_mem; /**< physical address of vring */ + + /** + * Head of the free chain in the descriptor table. If + * there are no free descriptors, this will be set to + * VQ_RING_DESC_CHAIN_END. + */ + uint16_t vq_desc_head_idx; + + /** + * Tail of the free chain in desc table. If + * there are no free descriptors, this will be set to + * VQ_RING_DESC_CHAIN_END. + */ + uint16_t vq_desc_tail_idx; + uint16_t vq_queue_index; /**< PCI queue index */ + uint16_t *notify_addr; + + /** Thread that's polling this queue. */ + struct spdk_thread *owner_thread; + + uint16_t req_start; + uint16_t req_end; + uint16_t reqs_finished; + + struct vq_desc_extra vq_descx[0]; +}; + +enum spdk_virtio_desc_type { + SPDK_VIRTIO_DESC_RO = 0, /**< Read only */ + SPDK_VIRTIO_DESC_WR = VRING_DESC_F_WRITE, /**< Write only */ + /* TODO VIRTIO_DESC_INDIRECT */ +}; + +/** Context for creating PCI virtio_devs */ +struct virtio_pci_ctx; + +/** + * Callback for creating virtio_dev from a PCI device. + * \param pci_ctx PCI context to be associated with a virtio_dev + * \param ctx context provided by the user + * \return 0 on success, -1 on error. + */ +typedef int (*virtio_pci_create_cb)(struct virtio_pci_ctx *pci_ctx, void *ctx); + +uint16_t virtio_recv_pkts(struct virtqueue *vq, void **io, uint32_t *len, uint16_t io_cnt); + +/** + * Start a new request on the current vring head position and associate it + * with an opaque cookie object. The previous request in given vq will be + * made visible to the device in hopes it can be processed early, but there's + * no guarantee it will be until the device is notified with \c + * virtqueue_req_flush. This behavior is simply an optimization and virtqueues + * must always be flushed. Empty requests (with no descriptors added) will be + * ignored. The device owning given virtqueue must be started. + * + * \param vq virtio queue + * \param cookie opaque object to associate with this request. Once the request + * is sent, processed and a response is received, the same object will be + * returned to the user after calling the virtio poll API. + * \param iovcnt number of required iovectors for the request. This can be + * higher than than the actual number of iovectors to be added. + * \return 0 on success or negative errno otherwise. If the `iovcnt` is + * greater than virtqueue depth, -EINVAL is returned. If simply not enough + * iovectors are available, -ENOMEM is returned. + */ +int virtqueue_req_start(struct virtqueue *vq, void *cookie, int iovcnt); + +/** + * Flush a virtqueue. This will notify the device if it's required. + * The device owning given virtqueue must be started. + * + * \param vq virtio queue + */ +void virtqueue_req_flush(struct virtqueue *vq); + +/** + * Abort the very last request in a virtqueue. This will restore virtqueue + * state to the point before the last request was created. Note that this + * is only effective if a queue hasn't been flushed yet. The device owning + * given virtqueue must be started. + * + * \param vq virtio queue + */ +void virtqueue_req_abort(struct virtqueue *vq); + +/** + * Add iovec chain to the last created request. This call does not provide any + * error-checking. The caller has to ensure that he doesn't add more iovs than + * what was specified during request creation. The device owning given virtqueue + * must be started. + * + * \param vq virtio queue + * \param iovs iovec array + * \param iovcnt number of iovs in iovec array + * \param desc_type type of all given iovectors + */ +void virtqueue_req_add_iovs(struct virtqueue *vq, struct iovec *iovs, uint16_t iovcnt, + enum spdk_virtio_desc_type desc_type); + +/** + * Construct a virtio device. The device will be in stopped state by default. + * Before doing any I/O, it has to be manually started via \c virtio_dev_restart. + * + * \param vdev memory for virtio device, must be zeroed + * \param name name for the virtio device + * \param ops backend callbacks + * \param ops_ctx argument for the backend callbacks + * \return zero on success, or negative error code otherwise + */ +int virtio_dev_construct(struct virtio_dev *vdev, const char *name, + const struct virtio_dev_ops *ops, void *ops_ctx); + +/** + * Reset the device and prepare it to be `virtio_dev_start`ed. This call + * will also renegotiate feature flags. + * + * \param vdev virtio device + * \param req_features features this driver supports. A VIRTIO_F_VERSION_1 + * flag will be automatically appended, as legacy devices are not supported. + */ +int virtio_dev_reset(struct virtio_dev *vdev, uint64_t req_features); + +/** + * Notify the host to start processing this virtio device. This is + * a blocking call that won't return until the host has started. + * This will also allocate virtqueues. + * + * \param vdev virtio device + * \param max_queues number of queues to allocate. The max number of + * usable I/O queues is also limited by the host device. `vdev` will be + * started successfully even if the host supports less queues than requested. + * \param fixed_queue_num number of queues preceeding the first + * request queue. For Virtio-SCSI this is equal to 2, as there are + * additional event and control queues. + */ +int virtio_dev_start(struct virtio_dev *vdev, uint16_t max_queues, + uint16_t fixed_queues_num); + +/** + * Stop the host from processing the device. This is a blocking call + * that won't return until all outstanding I/O has been processed on + * the host (virtio device) side. In order to re-start the device, it + * has to be `virtio_dev_reset` first. + * + * \param vdev virtio device + */ +void virtio_dev_stop(struct virtio_dev *vdev); + +/** + * Destruct a virtio device. Note that it must be in the stopped state. + * The virtio_dev should be manually freed afterwards. + * + * \param vdev virtio device + */ +void virtio_dev_destruct(struct virtio_dev *vdev); + +/** + * Bind a virtqueue with given index to the current thread; + * + * This function is thread-safe. + * + * \param vdev vhost device + * \param index virtqueue index + * \return 0 on success, -1 in case a virtqueue with given index either + * does not exists or is already acquired. + */ +int virtio_dev_acquire_queue(struct virtio_dev *vdev, uint16_t index); + +/** + * Look for unused queue and bind it to the current thread. This will + * scan the queues in range from *start_index* (inclusive) up to + * vdev->max_queues (exclusive). + * + * This function is thread-safe. + * + * \param vdev vhost device + * \param start_index virtqueue index to start looking from + * \return index of acquired queue or -1 in case no unused queue in given range + * has been found + */ +int32_t virtio_dev_find_and_acquire_queue(struct virtio_dev *vdev, uint16_t start_index); + +/** + * Get thread that acquired given virtqueue. + * + * This function is thread-safe. + * + * \param vdev vhost device + * \param index index of virtqueue + * \return thread that acquired given virtqueue. If the queue is unused + * or doesn't exist a NULL is returned. + */ +struct spdk_thread *virtio_dev_queue_get_thread(struct virtio_dev *vdev, uint16_t index); + +/** + * Check if virtqueue with given index is acquired. + * + * This function is thread-safe. + * + * \param vdev vhost device + * \param index index of virtqueue + * \return virtqueue acquire status. in case of invalid index *false* is returned. + */ +bool virtio_dev_queue_is_acquired(struct virtio_dev *vdev, uint16_t index); + +/** + * Release previously acquired queue. + * + * This function must be called from the thread that acquired the queue. + * + * \param vdev vhost device + * \param index index of virtqueue to release + */ +void virtio_dev_release_queue(struct virtio_dev *vdev, uint16_t index); + +/** + * Get Virtio status flags. + * + * \param vdev virtio device + */ +uint8_t virtio_dev_get_status(struct virtio_dev *vdev); + +/** + * Set Virtio status flag. The flags have to be set in very specific order + * defined the VIRTIO 1.0 spec section 3.1.1. To unset the flags, stop the + * device or set \c VIRTIO_CONFIG_S_RESET status flag. There is no way to + * unset only particular flags. + * + * \param vdev virtio device + * \param flag flag to set + */ +void virtio_dev_set_status(struct virtio_dev *vdev, uint8_t flag); + +/** + * Write raw data into the device config at given offset. This call does not + * provide any error checking. + * + * \param vdev virtio device + * \param offset offset in bytes + * \param src pointer to data to copy from + * \param len length of data to copy in bytes + * \return 0 on success, negative errno otherwise + */ +int virtio_dev_write_dev_config(struct virtio_dev *vdev, size_t offset, const void *src, int len); + +/** + * Read raw data from the device config at given offset. This call does not + * provide any error checking. + * + * \param vdev virtio device + * \param offset offset in bytes + * \param dst pointer to buffer to copy data into + * \param len length of data to copy in bytes + * \return 0 on success, negative errno otherwise + */ +int virtio_dev_read_dev_config(struct virtio_dev *vdev, size_t offset, void *dst, int len); + +/** + * Get backend-specific ops for given device. + * + * \param vdev virtio device + */ +const struct virtio_dev_ops *virtio_dev_backend_ops(struct virtio_dev *vdev); + +/** + * Check if the device has negotiated given feature bit. + * + * \param vdev virtio device + * \param bit feature bit + */ +static inline bool +virtio_dev_has_feature(struct virtio_dev *vdev, uint64_t bit) +{ + return !!(vdev->negotiated_features & (1ULL << bit)); +} + +/** + * Dump all device specific information into given json stream. + * + * \param vdev virtio device + * \param w json stream + */ +void virtio_dev_dump_json_info(struct virtio_dev *vdev, struct spdk_json_write_ctx *w); + +/** + * Enumerate all PCI Virtio devices of given type on the system. + * + * \param enum_cb a function to be called for each valid PCI device. + * If a virtio_dev is has been created, the callback should return 0. + * Returning any other value will cause the PCI context to be freed, + * making it unusable. + * \param enum_ctx additional opaque context to be passed into `enum_cb` + * \param pci_device_id PCI Device ID of devices to iterate through + */ +int virtio_pci_dev_enumerate(virtio_pci_create_cb enum_cb, void *enum_ctx, + uint16_t pci_device_id); + +/** + * Attach a PCI Virtio device of given type. + * + * \param create_cb callback to create a virtio_dev. + * If virtio_dev is has been created, the callback should return 0. + * Returning any other value will cause the PCI context to be freed, + * making it unusable. + * \param enum_ctx additional opaque context to be passed into `enum_cb` + * \param pci_device_id PCI Device ID of devices to iterate through + * \param pci_addr PCI address of the device to attach + */ +int virtio_pci_dev_attach(virtio_pci_create_cb create_cb, void *enum_ctx, + uint16_t pci_device_id, struct spdk_pci_addr *pci_addr); + +/** + * Connect to a vhost-user device and init corresponding virtio_dev struct. + * The virtio_dev will have to be freed with \c virtio_dev_free. + * + * \param vdev preallocated vhost device struct to operate on + * \param name name of this virtio device + * \param path path to the Unix domain socket of the vhost-user device + * \param queue_size size of each of the queues + * \return virtio device + */ +int virtio_user_dev_init(struct virtio_dev *vdev, const char *name, const char *path, + uint32_t queue_size); + +/** + * Initialize virtio_dev for a given PCI device. + * The virtio_dev has to be freed with \c virtio_dev_destruct. + * + * \param vdev preallocated vhost device struct to operate on + * \param name name of this virtio device + * \param pci_ctx context of the PCI device + * \return 0 on success, -1 on error. + */ +int virtio_pci_dev_init(struct virtio_dev *vdev, const char *name, + struct virtio_pci_ctx *pci_ctx); + +#endif /* SPDK_VIRTIO_H */ |