summaryrefslogtreecommitdiffstats
path: root/src/spdk/lib/nvmf
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/spdk/lib/nvmf/Makefile63
-rw-r--r--src/spdk/lib/nvmf/ctrlr.c1773
-rw-r--r--src/spdk/lib/nvmf/ctrlr_bdev.c531
-rw-r--r--src/spdk/lib/nvmf/ctrlr_discovery.c144
-rw-r--r--src/spdk/lib/nvmf/nvmf.c1173
-rw-r--r--src/spdk/lib/nvmf/nvmf_fc.h871
-rw-r--r--src/spdk/lib/nvmf/nvmf_internal.h333
-rw-r--r--src/spdk/lib/nvmf/rdma.c2930
-rw-r--r--src/spdk/lib/nvmf/request.c190
-rw-r--r--src/spdk/lib/nvmf/subsystem.c1269
-rw-r--r--src/spdk/lib/nvmf/transport.c236
-rw-r--r--src/spdk/lib/nvmf/transport.h200
12 files changed, 9713 insertions, 0 deletions
diff --git a/src/spdk/lib/nvmf/Makefile b/src/spdk/lib/nvmf/Makefile
new file mode 100644
index 00000000..8f299a90
--- /dev/null
+++ b/src/spdk/lib/nvmf/Makefile
@@ -0,0 +1,63 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = ctrlr.c ctrlr_discovery.c ctrlr_bdev.c \
+ subsystem.c nvmf.c \
+ request.c transport.c
+
+C_SRCS-$(CONFIG_RDMA) += rdma.c
+LIBNAME = nvmf
+LOCAL_SYS_LIBS = -luuid
+ifeq ($(CONFIG_RDMA),y)
+LOCAL_SYS_LIBS += -libverbs -lrdmacm
+#Attach only if FreeBSD and RDMA is specified with configure
+ifeq ($(OS),FreeBSD)
+# Mellanox - MLX4 HBA Userspace Library
+ifneq ("$(wildcard /usr/lib/libmlx4.*)","")
+LOCAL_SYS_LIBS += -lmlx4
+endif
+# Mellanox - MLX5 HBA Userspace Library
+ifneq ("$(wildcard /usr/lib/libmlx5.*)","")
+LOCAL_SYS_LIBS += -lmlx5
+endif
+# Chelsio HBA Userspace Library
+ifneq ("$(wildcard /usr/lib/libcxgb4.*)","")
+LOCAL_SYS_LIBS += -lcxgb4
+endif
+endif
+endif
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/nvmf/ctrlr.c b/src/spdk/lib/nvmf/ctrlr.c
new file mode 100644
index 00000000..ed5e68f0
--- /dev/null
+++ b/src/spdk/lib/nvmf/ctrlr.c
@@ -0,0 +1,1773 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+#include "spdk/bit_array.h"
+#include "spdk/endian.h"
+#include "spdk/thread.h"
+#include "spdk/trace.h"
+#include "spdk/nvme_spec.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+#include "spdk/version.h"
+
+#include "spdk_internal/log.h"
+
+#define MIN_KEEP_ALIVE_TIMEOUT 10000
+
+#define MODEL_NUMBER "SPDK bdev Controller"
+
+/*
+ * Report the SPDK version as the firmware revision.
+ * SPDK_VERSION_STRING won't fit into FR (only 8 bytes), so try to fit the most important parts.
+ */
+#define FW_VERSION SPDK_VERSION_MAJOR_STRING SPDK_VERSION_MINOR_STRING SPDK_VERSION_PATCH_STRING
+
+static inline void
+spdk_nvmf_invalid_connect_response(struct spdk_nvmf_fabric_connect_rsp *rsp,
+ uint8_t iattr, uint16_t ipo)
+{
+ rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ rsp->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
+ rsp->status_code_specific.invalid.iattr = iattr;
+ rsp->status_code_specific.invalid.ipo = ipo;
+}
+
+#define SPDK_NVMF_INVALID_CONNECT_CMD(rsp, field) \
+ spdk_nvmf_invalid_connect_response(rsp, 0, offsetof(struct spdk_nvmf_fabric_connect_cmd, field))
+#define SPDK_NVMF_INVALID_CONNECT_DATA(rsp, field) \
+ spdk_nvmf_invalid_connect_response(rsp, 1, offsetof(struct spdk_nvmf_fabric_connect_data, field))
+
+static void
+ctrlr_add_qpair_and_update_rsp(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvmf_ctrlr *ctrlr,
+ struct spdk_nvmf_fabric_connect_rsp *rsp)
+{
+ assert(ctrlr->admin_qpair->group->thread == spdk_get_thread());
+
+ /* check if we would exceed ctrlr connection limit */
+ if (qpair->qid >= spdk_bit_array_capacity(ctrlr->qpair_mask)) {
+ SPDK_ERRLOG("Requested QID %u but Max QID is %u\n",
+ qpair->qid, spdk_bit_array_capacity(ctrlr->qpair_mask) - 1);
+ rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
+ return;
+ }
+
+ if (spdk_bit_array_get(ctrlr->qpair_mask, qpair->qid)) {
+ SPDK_ERRLOG("Got I/O connect with duplicate QID %u\n", qpair->qid);
+ rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
+ return;
+ }
+
+ qpair->ctrlr = ctrlr;
+ spdk_bit_array_set(ctrlr->qpair_mask, qpair->qid);
+
+ rsp->status.sc = SPDK_NVME_SC_SUCCESS;
+ rsp->status_code_specific.success.cntlid = ctrlr->cntlid;
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "connect capsule response: cntlid = 0x%04x\n",
+ rsp->status_code_specific.success.cntlid);
+}
+
+static void
+_spdk_nvmf_request_complete(void *ctx)
+{
+ struct spdk_nvmf_request *req = ctx;
+
+ spdk_nvmf_request_complete(req);
+}
+
+static void
+_spdk_nvmf_ctrlr_add_admin_qpair(void *ctx)
+{
+ struct spdk_nvmf_request *req = ctx;
+ struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+
+ ctrlr->admin_qpair = qpair;
+ ctrlr_add_qpair_and_update_rsp(qpair, ctrlr, rsp);
+ spdk_nvmf_request_complete(req);
+}
+
+static void
+_spdk_nvmf_subsystem_add_ctrlr(void *ctx)
+{
+ struct spdk_nvmf_request *req = ctx;
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
+ struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+
+ if (spdk_nvmf_subsystem_add_ctrlr(ctrlr->subsys, ctrlr)) {
+ SPDK_ERRLOG("Unable to add controller to subsystem\n");
+ free(ctrlr);
+ qpair->ctrlr = NULL;
+ rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ spdk_thread_send_msg(qpair->group->thread, _spdk_nvmf_request_complete, req);
+ return;
+ }
+
+ spdk_thread_send_msg(ctrlr->thread, _spdk_nvmf_ctrlr_add_admin_qpair, req);
+}
+
+static struct spdk_nvmf_ctrlr *
+spdk_nvmf_ctrlr_create(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_request *req,
+ struct spdk_nvmf_fabric_connect_cmd *connect_cmd,
+ struct spdk_nvmf_fabric_connect_data *connect_data)
+{
+ struct spdk_nvmf_ctrlr *ctrlr;
+ struct spdk_nvmf_transport *transport;
+
+ ctrlr = calloc(1, sizeof(*ctrlr));
+ if (ctrlr == NULL) {
+ SPDK_ERRLOG("Memory allocation failed\n");
+ return NULL;
+ }
+
+ req->qpair->ctrlr = ctrlr;
+ ctrlr->subsys = subsystem;
+ ctrlr->thread = req->qpair->group->thread;
+
+ transport = req->qpair->transport;
+ ctrlr->qpair_mask = spdk_bit_array_create(transport->opts.max_qpairs_per_ctrlr);
+ if (!ctrlr->qpair_mask) {
+ SPDK_ERRLOG("Failed to allocate controller qpair mask\n");
+ free(ctrlr);
+ return NULL;
+ }
+
+ ctrlr->feat.keep_alive_timer.bits.kato = connect_cmd->kato;
+ ctrlr->feat.async_event_configuration.bits.ns_attr_notice = 1;
+ ctrlr->feat.volatile_write_cache.bits.wce = 1;
+
+ /* Subtract 1 for admin queue, 1 for 0's based */
+ ctrlr->feat.number_of_queues.bits.ncqr = transport->opts.max_qpairs_per_ctrlr - 1 -
+ 1;
+ ctrlr->feat.number_of_queues.bits.nsqr = transport->opts.max_qpairs_per_ctrlr - 1 -
+ 1;
+
+ memcpy(ctrlr->hostid, connect_data->hostid, sizeof(ctrlr->hostid));
+
+ ctrlr->vcprop.cap.raw = 0;
+ ctrlr->vcprop.cap.bits.cqr = 1; /* NVMe-oF specification required */
+ ctrlr->vcprop.cap.bits.mqes = transport->opts.max_queue_depth -
+ 1; /* max queue depth */
+ ctrlr->vcprop.cap.bits.ams = 0; /* optional arb mechanisms */
+ ctrlr->vcprop.cap.bits.to = 1; /* ready timeout - 500 msec units */
+ ctrlr->vcprop.cap.bits.dstrd = 0; /* fixed to 0 for NVMe-oF */
+ ctrlr->vcprop.cap.bits.css = SPDK_NVME_CAP_CSS_NVM; /* NVM command set */
+ ctrlr->vcprop.cap.bits.mpsmin = 0; /* 2 ^ (12 + mpsmin) == 4k */
+ ctrlr->vcprop.cap.bits.mpsmax = 0; /* 2 ^ (12 + mpsmax) == 4k */
+
+ /* Version Supported: 1.3 */
+ ctrlr->vcprop.vs.bits.mjr = 1;
+ ctrlr->vcprop.vs.bits.mnr = 3;
+ ctrlr->vcprop.vs.bits.ter = 0;
+
+ ctrlr->vcprop.cc.raw = 0;
+ ctrlr->vcprop.cc.bits.en = 0; /* Init controller disabled */
+
+ ctrlr->vcprop.csts.raw = 0;
+ ctrlr->vcprop.csts.bits.rdy = 0; /* Init controller as not ready */
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "cap 0x%" PRIx64 "\n", ctrlr->vcprop.cap.raw);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "vs 0x%x\n", ctrlr->vcprop.vs.raw);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "cc 0x%x\n", ctrlr->vcprop.cc.raw);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "csts 0x%x\n", ctrlr->vcprop.csts.raw);
+
+ spdk_thread_send_msg(subsystem->thread, _spdk_nvmf_subsystem_add_ctrlr, req);
+
+ return ctrlr;
+}
+
+void
+spdk_nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ spdk_nvmf_subsystem_remove_ctrlr(ctrlr->subsys, ctrlr);
+
+ free(ctrlr);
+}
+
+static void
+spdk_nvmf_ctrlr_add_io_qpair(void *ctx)
+{
+ struct spdk_nvmf_request *req = ctx;
+ struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+
+ /* Unit test will check qpair->ctrlr after calling spdk_nvmf_ctrlr_connect.
+ * For error case, the value should be NULL. So set it to NULL at first.
+ */
+ qpair->ctrlr = NULL;
+
+ if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
+ SPDK_ERRLOG("I/O connect not allowed on discovery controller\n");
+ SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
+ goto end;
+ }
+
+ if (!ctrlr->vcprop.cc.bits.en) {
+ SPDK_ERRLOG("Got I/O connect before ctrlr was enabled\n");
+ SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
+ goto end;
+ }
+
+ if (1u << ctrlr->vcprop.cc.bits.iosqes != sizeof(struct spdk_nvme_cmd)) {
+ SPDK_ERRLOG("Got I/O connect with invalid IOSQES %u\n",
+ ctrlr->vcprop.cc.bits.iosqes);
+ SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
+ goto end;
+ }
+
+ if (1u << ctrlr->vcprop.cc.bits.iocqes != sizeof(struct spdk_nvme_cpl)) {
+ SPDK_ERRLOG("Got I/O connect with invalid IOCQES %u\n",
+ ctrlr->vcprop.cc.bits.iocqes);
+ SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
+ goto end;
+ }
+
+ ctrlr_add_qpair_and_update_rsp(qpair, ctrlr, rsp);
+
+end:
+ spdk_thread_send_msg(qpair->group->thread, _spdk_nvmf_request_complete, req);
+}
+
+static void
+_spdk_nvmf_ctrlr_add_io_qpair(void *ctx)
+{
+ struct spdk_nvmf_request *req = ctx;
+ struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
+ struct spdk_nvmf_fabric_connect_data *data = req->data;
+ struct spdk_nvmf_ctrlr *ctrlr;
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ struct spdk_nvmf_qpair *admin_qpair;
+ struct spdk_nvmf_tgt *tgt = qpair->transport->tgt;
+ struct spdk_nvmf_subsystem *subsystem;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Connect I/O Queue for controller id 0x%x\n", data->cntlid);
+
+ subsystem = spdk_nvmf_tgt_find_subsystem(tgt, data->subnqn);
+ /* We already checked this in spdk_nvmf_ctrlr_connect */
+ assert(subsystem != NULL);
+
+ ctrlr = spdk_nvmf_subsystem_get_ctrlr(subsystem, data->cntlid);
+ if (ctrlr == NULL) {
+ SPDK_ERRLOG("Unknown controller ID 0x%x\n", data->cntlid);
+ SPDK_NVMF_INVALID_CONNECT_DATA(rsp, cntlid);
+ spdk_thread_send_msg(qpair->group->thread, _spdk_nvmf_request_complete, req);
+ return;
+ }
+
+ admin_qpair = ctrlr->admin_qpair;
+ qpair->ctrlr = ctrlr;
+ spdk_thread_send_msg(admin_qpair->group->thread, spdk_nvmf_ctrlr_add_io_qpair, req);
+}
+
+static int
+spdk_nvmf_ctrlr_connect(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_fabric_connect_data *data = req->data;
+ struct spdk_nvmf_fabric_connect_cmd *cmd = &req->cmd->connect_cmd;
+ struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ struct spdk_nvmf_tgt *tgt = qpair->transport->tgt;
+ struct spdk_nvmf_ctrlr *ctrlr;
+ struct spdk_nvmf_subsystem *subsystem;
+ const char *subnqn, *hostnqn;
+ struct spdk_nvme_transport_id listen_trid = {};
+ void *end;
+
+ if (req->length < sizeof(struct spdk_nvmf_fabric_connect_data)) {
+ SPDK_ERRLOG("Connect command data length 0x%x too small\n", req->length);
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "recfmt 0x%x qid %u sqsize %u\n",
+ cmd->recfmt, cmd->qid, cmd->sqsize);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Connect data:\n");
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, " cntlid: 0x%04x\n", data->cntlid);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, " hostid: %08x-%04x-%04x-%02x%02x-%04x%08x ***\n",
+ ntohl(*(uint32_t *)&data->hostid[0]),
+ ntohs(*(uint16_t *)&data->hostid[4]),
+ ntohs(*(uint16_t *)&data->hostid[6]),
+ data->hostid[8],
+ data->hostid[9],
+ ntohs(*(uint16_t *)&data->hostid[10]),
+ ntohl(*(uint32_t *)&data->hostid[12]));
+
+ if (cmd->recfmt != 0) {
+ SPDK_ERRLOG("Connect command unsupported RECFMT %u\n", cmd->recfmt);
+ rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ rsp->status.sc = SPDK_NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ /* Ensure that subnqn is null terminated */
+ end = memchr(data->subnqn, '\0', SPDK_NVMF_NQN_MAX_LEN + 1);
+ if (!end) {
+ SPDK_ERRLOG("Connect SUBNQN is not null terminated\n");
+ SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+ subnqn = data->subnqn;
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, " subnqn: \"%s\"\n", subnqn);
+
+ subsystem = spdk_nvmf_tgt_find_subsystem(tgt, subnqn);
+ if (subsystem == NULL) {
+ SPDK_ERRLOG("Could not find subsystem '%s'\n", subnqn);
+ SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ /* Ensure that hostnqn is null terminated */
+ end = memchr(data->hostnqn, '\0', SPDK_NVMF_NQN_MAX_LEN + 1);
+ if (!end) {
+ SPDK_ERRLOG("Connect HOSTNQN is not null terminated\n");
+ SPDK_NVMF_INVALID_CONNECT_DATA(rsp, hostnqn);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+ hostnqn = data->hostnqn;
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, " hostnqn: \"%s\"\n", hostnqn);
+
+ if (!spdk_nvmf_subsystem_host_allowed(subsystem, hostnqn)) {
+ SPDK_ERRLOG("Subsystem '%s' does not allow host '%s'\n", subnqn, hostnqn);
+ rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ rsp->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_HOST;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (spdk_nvmf_qpair_get_listen_trid(qpair, &listen_trid)) {
+ SPDK_ERRLOG("Subsystem '%s' is unable to enforce access control due to an internal error.\n",
+ subnqn);
+ rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ rsp->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_HOST;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (!spdk_nvmf_subsystem_listener_allowed(subsystem, &listen_trid)) {
+ SPDK_ERRLOG("Subsystem '%s' does not allow host '%s' to connect at this address.\n", subnqn,
+ hostnqn);
+ rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ rsp->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_HOST;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ /*
+ * SQSIZE is a 0-based value, so it must be at least 1 (minimum queue depth is 2) and
+ * strictly less than max_queue_depth.
+ */
+ if (cmd->sqsize == 0 || cmd->sqsize >= qpair->transport->opts.max_queue_depth) {
+ SPDK_ERRLOG("Invalid SQSIZE %u (min 1, max %u)\n",
+ cmd->sqsize, qpair->transport->opts.max_queue_depth - 1);
+ SPDK_NVMF_INVALID_CONNECT_CMD(rsp, sqsize);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+ qpair->sq_head_max = cmd->sqsize;
+ qpair->qid = cmd->qid;
+
+ if (cmd->qid == 0) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Connect Admin Queue for controller ID 0x%x\n", data->cntlid);
+
+ if (data->cntlid != 0xFFFF) {
+ /* This NVMf target only supports dynamic mode. */
+ SPDK_ERRLOG("The NVMf target only supports dynamic mode (CNTLID = 0x%x).\n", data->cntlid);
+ SPDK_NVMF_INVALID_CONNECT_DATA(rsp, cntlid);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ /* Establish a new ctrlr */
+ ctrlr = spdk_nvmf_ctrlr_create(subsystem, req, cmd, data);
+ if (!ctrlr) {
+ SPDK_ERRLOG("spdk_nvmf_ctrlr_create() failed\n");
+ rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ } else {
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+ } else {
+ spdk_thread_send_msg(subsystem->thread, _spdk_nvmf_ctrlr_add_io_qpair, req);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+}
+
+static uint64_t
+nvmf_prop_get_cap(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ return ctrlr->vcprop.cap.raw;
+}
+
+static uint64_t
+nvmf_prop_get_vs(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ return ctrlr->vcprop.vs.raw;
+}
+
+static uint64_t
+nvmf_prop_get_cc(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ return ctrlr->vcprop.cc.raw;
+}
+
+static bool
+nvmf_prop_set_cc(struct spdk_nvmf_ctrlr *ctrlr, uint64_t value)
+{
+ union spdk_nvme_cc_register cc, diff;
+
+ cc.raw = (uint32_t)value;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "cur CC: 0x%08x\n", ctrlr->vcprop.cc.raw);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "new CC: 0x%08x\n", cc.raw);
+
+ /*
+ * Calculate which bits changed between the current and new CC.
+ * Mark each bit as 0 once it is handled to determine if any unhandled bits were changed.
+ */
+ diff.raw = cc.raw ^ ctrlr->vcprop.cc.raw;
+
+ if (diff.bits.en) {
+ if (cc.bits.en) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Property Set CC Enable!\n");
+ ctrlr->vcprop.cc.bits.en = 1;
+ ctrlr->vcprop.csts.bits.rdy = 1;
+ } else {
+ SPDK_ERRLOG("CC.EN transition from 1 to 0 (reset) not implemented!\n");
+
+ }
+ diff.bits.en = 0;
+ }
+
+ if (diff.bits.shn) {
+ if (cc.bits.shn == SPDK_NVME_SHN_NORMAL ||
+ cc.bits.shn == SPDK_NVME_SHN_ABRUPT) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Property Set CC Shutdown %u%ub!\n",
+ cc.bits.shn >> 1, cc.bits.shn & 1);
+ ctrlr->vcprop.cc.bits.shn = cc.bits.shn;
+ ctrlr->vcprop.cc.bits.en = 0;
+ ctrlr->vcprop.csts.bits.rdy = 0;
+ ctrlr->vcprop.csts.bits.shst = SPDK_NVME_SHST_COMPLETE;
+ } else if (cc.bits.shn == 0) {
+ ctrlr->vcprop.cc.bits.shn = 0;
+ } else {
+ SPDK_ERRLOG("Prop Set CC: Invalid SHN value %u%ub\n",
+ cc.bits.shn >> 1, cc.bits.shn & 1);
+ return false;
+ }
+ diff.bits.shn = 0;
+ }
+
+ if (diff.bits.iosqes) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Prop Set IOSQES = %u (%u bytes)\n",
+ cc.bits.iosqes, 1u << cc.bits.iosqes);
+ ctrlr->vcprop.cc.bits.iosqes = cc.bits.iosqes;
+ diff.bits.iosqes = 0;
+ }
+
+ if (diff.bits.iocqes) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Prop Set IOCQES = %u (%u bytes)\n",
+ cc.bits.iocqes, 1u << cc.bits.iocqes);
+ ctrlr->vcprop.cc.bits.iocqes = cc.bits.iocqes;
+ diff.bits.iocqes = 0;
+ }
+
+ if (diff.raw != 0) {
+ SPDK_ERRLOG("Prop Set CC toggled reserved bits 0x%x!\n", diff.raw);
+ return false;
+ }
+
+ return true;
+}
+
+static uint64_t
+nvmf_prop_get_csts(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ return ctrlr->vcprop.csts.raw;
+}
+
+struct nvmf_prop {
+ uint32_t ofst;
+ uint8_t size;
+ char name[11];
+ uint64_t (*get_cb)(struct spdk_nvmf_ctrlr *ctrlr);
+ bool (*set_cb)(struct spdk_nvmf_ctrlr *ctrlr, uint64_t value);
+};
+
+#define PROP(field, size, get_cb, set_cb) \
+ { \
+ offsetof(struct spdk_nvme_registers, field), \
+ SPDK_NVMF_PROP_SIZE_##size, \
+ #field, \
+ get_cb, set_cb \
+ }
+
+static const struct nvmf_prop nvmf_props[] = {
+ PROP(cap, 8, nvmf_prop_get_cap, NULL),
+ PROP(vs, 4, nvmf_prop_get_vs, NULL),
+ PROP(cc, 4, nvmf_prop_get_cc, nvmf_prop_set_cc),
+ PROP(csts, 4, nvmf_prop_get_csts, NULL),
+};
+
+static const struct nvmf_prop *
+find_prop(uint32_t ofst)
+{
+ size_t i;
+
+ for (i = 0; i < SPDK_COUNTOF(nvmf_props); i++) {
+ const struct nvmf_prop *prop = &nvmf_props[i];
+
+ if (prop->ofst == ofst) {
+ return prop;
+ }
+ }
+
+ return NULL;
+}
+
+static int
+spdk_nvmf_property_get(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvmf_fabric_prop_get_cmd *cmd = &req->cmd->prop_get_cmd;
+ struct spdk_nvmf_fabric_prop_get_rsp *response = &req->rsp->prop_get_rsp;
+ const struct nvmf_prop *prop;
+
+ response->status.sc = 0;
+ response->value.u64 = 0;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "size %d, offset 0x%x\n",
+ cmd->attrib.size, cmd->ofst);
+
+ if (cmd->attrib.size != SPDK_NVMF_PROP_SIZE_4 &&
+ cmd->attrib.size != SPDK_NVMF_PROP_SIZE_8) {
+ SPDK_ERRLOG("Invalid size value %d\n", cmd->attrib.size);
+ response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ prop = find_prop(cmd->ofst);
+ if (prop == NULL || prop->get_cb == NULL) {
+ /* Reserved properties return 0 when read */
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "name: %s\n", prop->name);
+ if (cmd->attrib.size != prop->size) {
+ SPDK_ERRLOG("offset 0x%x size mismatch: cmd %u, prop %u\n",
+ cmd->ofst, cmd->attrib.size, prop->size);
+ response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ response->value.u64 = prop->get_cb(ctrlr);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "response value: 0x%" PRIx64 "\n", response->value.u64);
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_property_set(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvmf_fabric_prop_set_cmd *cmd = &req->cmd->prop_set_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+ const struct nvmf_prop *prop;
+ uint64_t value;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "size %d, offset 0x%x, value 0x%" PRIx64 "\n",
+ cmd->attrib.size, cmd->ofst, cmd->value.u64);
+
+ prop = find_prop(cmd->ofst);
+ if (prop == NULL || prop->set_cb == NULL) {
+ SPDK_ERRLOG("Invalid offset 0x%x\n", cmd->ofst);
+ response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "name: %s\n", prop->name);
+ if (cmd->attrib.size != prop->size) {
+ SPDK_ERRLOG("offset 0x%x size mismatch: cmd %u, prop %u\n",
+ cmd->ofst, cmd->attrib.size, prop->size);
+ response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ value = cmd->value.u64;
+ if (prop->size == SPDK_NVMF_PROP_SIZE_4) {
+ value = (uint32_t)value;
+ }
+
+ if (!prop->set_cb(ctrlr, value)) {
+ SPDK_ERRLOG("prop set_cb failed\n");
+ response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_arbitration(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Arbitration (cdw11 = 0x%0x)\n", cmd->cdw11);
+
+ ctrlr->feat.arbitration.raw = cmd->cdw11;
+ ctrlr->feat.arbitration.bits.reserved = 0;
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_power_management(struct spdk_nvmf_request *req)
+{
+ union spdk_nvme_feat_power_management opts;
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Power Management (cdw11 = 0x%0x)\n", cmd->cdw11);
+ opts.raw = cmd->cdw11;
+
+ /* Only PS = 0 is allowed, since we report NPSS = 0 */
+ if (opts.bits.ps != 0) {
+ SPDK_ERRLOG("Invalid power state %u\n", opts.bits.ps);
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ ctrlr->feat.power_management.raw = cmd->cdw11;
+ ctrlr->feat.power_management.bits.reserved = 0;
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static bool
+temp_threshold_opts_valid(const union spdk_nvme_feat_temperature_threshold *opts)
+{
+ /*
+ * Valid TMPSEL values:
+ * 0000b - 1000b: temperature sensors
+ * 1111b: set all implemented temperature sensors
+ */
+ if (opts->bits.tmpsel >= 9 && opts->bits.tmpsel != 15) {
+ /* 1001b - 1110b: reserved */
+ SPDK_ERRLOG("Invalid TMPSEL %u\n", opts->bits.tmpsel);
+ return false;
+ }
+
+ /*
+ * Valid THSEL values:
+ * 00b: over temperature threshold
+ * 01b: under temperature threshold
+ */
+ if (opts->bits.thsel > 1) {
+ /* 10b - 11b: reserved */
+ SPDK_ERRLOG("Invalid THSEL %u\n", opts->bits.thsel);
+ return false;
+ }
+
+ return true;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_temperature_threshold(struct spdk_nvmf_request *req)
+{
+ union spdk_nvme_feat_temperature_threshold opts;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Temperature Threshold (cdw11 = 0x%0x)\n", cmd->cdw11);
+ opts.raw = cmd->cdw11;
+
+ if (!temp_threshold_opts_valid(&opts)) {
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ /* TODO: no sensors implemented - ignore new values */
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_get_features_temperature_threshold(struct spdk_nvmf_request *req)
+{
+ union spdk_nvme_feat_temperature_threshold opts;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Get Features - Temperature Threshold (cdw11 = 0x%0x)\n", cmd->cdw11);
+ opts.raw = cmd->cdw11;
+
+ if (!temp_threshold_opts_valid(&opts)) {
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ /* TODO: no sensors implemented - return 0 for all thresholds */
+ rsp->cdw0 = 0;
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_error_recovery(struct spdk_nvmf_request *req)
+{
+ union spdk_nvme_feat_error_recovery opts;
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Error Recovery (cdw11 = 0x%0x)\n", cmd->cdw11);
+ opts.raw = cmd->cdw11;
+
+ if (opts.bits.dulbe) {
+ /*
+ * Host is not allowed to set this bit, since we don't advertise it in
+ * Identify Namespace.
+ */
+ SPDK_ERRLOG("Host set unsupported DULBE bit\n");
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ ctrlr->feat.error_recovery.raw = cmd->cdw11;
+ ctrlr->feat.error_recovery.bits.reserved = 0;
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_volatile_write_cache(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Volatile Write Cache (cdw11 = 0x%0x)\n", cmd->cdw11);
+
+ ctrlr->feat.volatile_write_cache.raw = cmd->cdw11;
+ ctrlr->feat.volatile_write_cache.bits.reserved = 0;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Volatile Write Cache %s\n",
+ ctrlr->feat.volatile_write_cache.bits.wce ? "Enabled" : "Disabled");
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_write_atomicity(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Write Atomicity (cdw11 = 0x%0x)\n", cmd->cdw11);
+
+ ctrlr->feat.write_atomicity.raw = cmd->cdw11;
+ ctrlr->feat.write_atomicity.bits.reserved = 0;
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_host_identifier(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+
+ SPDK_ERRLOG("Set Features - Host Identifier not allowed\n");
+ response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_get_features_host_identifier(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+ union spdk_nvme_feat_host_identifier opts;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Get Features - Host Identifier\n");
+
+ opts.raw = cmd->cdw11;
+ if (!opts.bits.exhid) {
+ /* NVMe over Fabrics requires EXHID=1 (128-bit/16-byte host ID) */
+ SPDK_ERRLOG("Get Features - Host Identifier with EXHID=0 not allowed\n");
+ response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (req->data == NULL || req->length < sizeof(ctrlr->hostid)) {
+ SPDK_ERRLOG("Invalid data buffer for Get Features - Host Identifier\n");
+ response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ memcpy(req->data, ctrlr->hostid, sizeof(ctrlr->hostid));
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_keep_alive_timer(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Keep Alive Timer (%u ms)\n", cmd->cdw11);
+
+ if (cmd->cdw11 == 0) {
+ rsp->status.sc = SPDK_NVME_SC_KEEP_ALIVE_INVALID;
+ } else if (cmd->cdw11 < MIN_KEEP_ALIVE_TIMEOUT) {
+ ctrlr->feat.keep_alive_timer.bits.kato = MIN_KEEP_ALIVE_TIMEOUT;
+ } else {
+ ctrlr->feat.keep_alive_timer.bits.kato = cmd->cdw11;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Keep Alive Timer set to %u ms\n",
+ ctrlr->feat.keep_alive_timer.bits.kato);
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_number_of_queues(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ uint32_t count;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Number of Queues, cdw11 0x%x\n",
+ req->cmd->nvme_cmd.cdw11);
+
+ count = spdk_bit_array_count_set(ctrlr->qpair_mask);
+ /* verify that the controller is ready to process commands */
+ if (count > 1) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Queue pairs already active!\n");
+ rsp->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+ } else {
+ /*
+ * Ignore the value requested by the host -
+ * always return the pre-configured value based on max_qpairs_allowed.
+ */
+ rsp->cdw0 = ctrlr->feat.number_of_queues.raw;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_async_event_configuration(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Async Event Configuration, cdw11 0x%08x\n",
+ cmd->cdw11);
+ ctrlr->feat.async_event_configuration.raw = cmd->cdw11;
+ ctrlr->feat.async_event_configuration.bits.reserved = 0;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_async_event_request(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Async Event Request\n");
+
+ /* Only one asynchronous event is supported for now */
+ if (ctrlr->aer_req != NULL) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "AERL exceeded\n");
+ rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ rsp->status.sc = SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (ctrlr->notice_event.bits.async_event_type ==
+ SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) {
+ rsp->cdw0 = ctrlr->notice_event.raw;
+ ctrlr->notice_event.raw = 0;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ ctrlr->aer_req = req;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+static void
+spdk_nvmf_get_firmware_slot_log_page(void *buffer, uint64_t offset, uint32_t length)
+{
+ struct spdk_nvme_firmware_page fw_page;
+ size_t copy_len;
+
+ memset(&fw_page, 0, sizeof(fw_page));
+ fw_page.afi.active_slot = 1;
+ fw_page.afi.next_reset_slot = 0;
+ spdk_strcpy_pad(fw_page.revision[0], FW_VERSION, sizeof(fw_page.revision[0]), ' ');
+
+ if (offset < sizeof(fw_page)) {
+ copy_len = spdk_min(sizeof(fw_page) - offset, length);
+ if (copy_len > 0) {
+ memcpy(buffer, (const char *)&fw_page + offset, copy_len);
+ }
+ }
+}
+
+void
+spdk_nvmf_ctrlr_ns_changed(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid)
+{
+ uint16_t max_changes = SPDK_COUNTOF(ctrlr->changed_ns_list.ns_list);
+ uint16_t i;
+ bool found = false;
+
+ for (i = 0; i < ctrlr->changed_ns_list_count; i++) {
+ if (ctrlr->changed_ns_list.ns_list[i] == nsid) {
+ /* nsid is already in the list */
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ if (ctrlr->changed_ns_list_count == max_changes) {
+ /* Out of space - set first entry to FFFFFFFFh and zero-fill the rest. */
+ ctrlr->changed_ns_list.ns_list[0] = 0xFFFFFFFFu;
+ for (i = 1; i < max_changes; i++) {
+ ctrlr->changed_ns_list.ns_list[i] = 0;
+ }
+ } else {
+ ctrlr->changed_ns_list.ns_list[ctrlr->changed_ns_list_count++] = nsid;
+ }
+ }
+
+ spdk_nvmf_ctrlr_async_event_ns_notice(ctrlr);
+}
+
+static void
+spdk_nvmf_get_changed_ns_list_log_page(struct spdk_nvmf_ctrlr *ctrlr,
+ void *buffer, uint64_t offset, uint32_t length)
+{
+ size_t copy_length;
+
+ if (offset < sizeof(ctrlr->changed_ns_list)) {
+ copy_length = spdk_min(length, sizeof(ctrlr->changed_ns_list) - offset);
+ if (copy_length) {
+ memcpy(buffer, (char *)&ctrlr->changed_ns_list + offset, copy_length);
+ }
+ }
+
+ /* Clear log page each time it is read */
+ ctrlr->changed_ns_list_count = 0;
+ memset(&ctrlr->changed_ns_list, 0, sizeof(ctrlr->changed_ns_list));
+}
+
+/* The structure can be modified if we provide support for other commands in future */
+static const struct spdk_nvme_cmds_and_effect_log_page g_cmds_and_effect_log_page = {
+ .admin_cmds_supported = {
+ /* CSUPP, LBCC, NCC, NIC, CCC, CSE */
+ /* Get Log Page */
+ [SPDK_NVME_OPC_GET_LOG_PAGE] = {1, 0, 0, 0, 0, 0, 0, 0},
+ /* Identify */
+ [SPDK_NVME_OPC_IDENTIFY] = {1, 0, 0, 0, 0, 0, 0, 0},
+ /* Abort */
+ [SPDK_NVME_OPC_ABORT] = {1, 0, 0, 0, 0, 0, 0, 0},
+ /* Set Features */
+ [SPDK_NVME_OPC_SET_FEATURES] = {1, 0, 0, 0, 0, 0, 0, 0},
+ /* Get Features */
+ [SPDK_NVME_OPC_GET_FEATURES] = {1, 0, 0, 0, 0, 0, 0, 0},
+ /* Async Event Request */
+ [SPDK_NVME_OPC_ASYNC_EVENT_REQUEST] = {1, 0, 0, 0, 0, 0, 0, 0},
+ /* Keep Alive */
+ [SPDK_NVME_OPC_KEEP_ALIVE] = {1, 0, 0, 0, 0, 0, 0, 0},
+ },
+ .io_cmds_supported = {
+ /* FLUSH */
+ [SPDK_NVME_OPC_FLUSH] = {1, 1, 0, 0, 0, 0, 0, 0},
+ /* WRITE */
+ [SPDK_NVME_OPC_WRITE] = {1, 1, 0, 0, 0, 0, 0, 0},
+ /* READ */
+ [SPDK_NVME_OPC_READ] = {1, 0, 0, 0, 0, 0, 0, 0},
+ /* WRITE ZEROES */
+ [SPDK_NVME_OPC_WRITE_ZEROES] = {1, 1, 0, 0, 0, 0, 0, 0},
+ /* DATASET MANAGEMENT */
+ [SPDK_NVME_OPC_DATASET_MANAGEMENT] = {1, 1, 0, 0, 0, 0, 0, 0},
+ },
+};
+
+static void
+spdk_nvmf_get_cmds_and_effects_log_page(void *buffer,
+ uint64_t offset, uint32_t length)
+{
+ uint32_t page_size = sizeof(struct spdk_nvme_cmds_and_effect_log_page);
+ size_t copy_len = 0;
+ size_t zero_len = length;
+
+ if (offset < page_size) {
+ copy_len = spdk_min(page_size - offset, length);
+ zero_len -= copy_len;
+ memcpy(buffer, (char *)(&g_cmds_and_effect_log_page) + offset, copy_len);
+ }
+
+ if (zero_len) {
+ memset((char *)buffer + copy_len, 0, zero_len);
+ }
+}
+
+static int
+spdk_nvmf_ctrlr_get_log_page(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+ uint64_t offset, len;
+ uint32_t numdl, numdu;
+ uint8_t lid;
+
+ if (req->data == NULL) {
+ SPDK_ERRLOG("get log command with no buffer\n");
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ offset = (uint64_t)cmd->cdw12 | ((uint64_t)cmd->cdw13 << 32);
+ if (offset & 3) {
+ SPDK_ERRLOG("Invalid log page offset 0x%" PRIx64 "\n", offset);
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ numdl = (cmd->cdw10 >> 16) & 0xFFFFu;
+ numdu = (cmd->cdw11) & 0xFFFFu;
+ len = ((numdu << 16) + numdl + (uint64_t)1) * 4;
+ if (len > req->length) {
+ SPDK_ERRLOG("Get log page: len (%" PRIu64 ") > buf size (%u)\n",
+ len, req->length);
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ lid = cmd->cdw10 & 0xFF;
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Get log page: LID=0x%02X offset=0x%" PRIx64 " len=0x%" PRIx64 "\n",
+ lid, offset, len);
+
+ if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
+ switch (lid) {
+ case SPDK_NVME_LOG_DISCOVERY:
+ spdk_nvmf_get_discovery_log_page(subsystem->tgt, req->data, offset, len);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ default:
+ goto invalid_log_page;
+ }
+ } else {
+ switch (lid) {
+ case SPDK_NVME_LOG_ERROR:
+ case SPDK_NVME_LOG_HEALTH_INFORMATION:
+ /* TODO: actually fill out log page data */
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ case SPDK_NVME_LOG_FIRMWARE_SLOT:
+ spdk_nvmf_get_firmware_slot_log_page(req->data, offset, len);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ case SPDK_NVME_LOG_COMMAND_EFFECTS_LOG:
+ spdk_nvmf_get_cmds_and_effects_log_page(req->data, offset, len);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ case SPDK_NVME_LOG_CHANGED_NS_LIST:
+ spdk_nvmf_get_changed_ns_list_log_page(ctrlr, req->data, offset, len);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ default:
+ goto invalid_log_page;
+ }
+ }
+
+invalid_log_page:
+ SPDK_ERRLOG("Unsupported Get Log Page 0x%02X\n", lid);
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_identify_ns(struct spdk_nvmf_ctrlr *ctrlr,
+ struct spdk_nvme_cmd *cmd,
+ struct spdk_nvme_cpl *rsp,
+ struct spdk_nvme_ns_data *nsdata)
+{
+ struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
+ struct spdk_nvmf_ns *ns;
+ uint32_t max_num_blocks;
+
+ if (cmd->nsid == 0 || cmd->nsid > subsystem->max_nsid) {
+ SPDK_ERRLOG("Identify Namespace for invalid NSID %u\n", cmd->nsid);
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ ns = _spdk_nvmf_subsystem_get_ns(subsystem, cmd->nsid);
+ if (ns == NULL || ns->bdev == NULL) {
+ /*
+ * Inactive namespaces should return a zero filled data structure.
+ * The data buffer is already zeroed by spdk_nvmf_ctrlr_process_admin_cmd(),
+ * so we can just return early here.
+ */
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Identify Namespace for inactive NSID %u\n", cmd->nsid);
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_SUCCESS;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ spdk_nvmf_bdev_ctrlr_identify_ns(ns, nsdata);
+
+ /* Due to bug in the Linux kernel NVMe driver we have to set noiob no larger than mdts */
+ max_num_blocks = ctrlr->admin_qpair->transport->opts.max_io_size /
+ (1U << nsdata->lbaf[nsdata->flbas.format].lbads);
+ if (nsdata->noiob > max_num_blocks) {
+ nsdata->noiob = max_num_blocks;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_identify_ctrlr(struct spdk_nvmf_ctrlr *ctrlr, struct spdk_nvme_ctrlr_data *cdata)
+{
+ struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
+ struct spdk_nvmf_transport *transport = ctrlr->admin_qpair->transport;
+
+ /*
+ * Common fields for discovery and NVM subsystems
+ */
+ spdk_strcpy_pad(cdata->fr, FW_VERSION, sizeof(cdata->fr), ' ');
+ assert((transport->opts.max_io_size % 4096) == 0);
+ cdata->mdts = spdk_u32log2(transport->opts.max_io_size / 4096);
+ cdata->cntlid = ctrlr->cntlid;
+ cdata->ver = ctrlr->vcprop.vs;
+ cdata->lpa.edlp = 1;
+ cdata->elpe = 127;
+ cdata->maxcmd = transport->opts.max_queue_depth;
+ cdata->sgls.supported = 1;
+ cdata->sgls.keyed_sgl = 1;
+ cdata->sgls.sgl_offset = 1;
+ spdk_strcpy_pad(cdata->subnqn, subsystem->subnqn, sizeof(cdata->subnqn), '\0');
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ctrlr data: maxcmd 0x%x\n", cdata->maxcmd);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "sgls data: 0x%x\n", from_le32(&cdata->sgls));
+
+ /*
+ * NVM subsystem fields (reserved for discovery subsystems)
+ */
+ if (subsystem->subtype == SPDK_NVMF_SUBTYPE_NVME) {
+ spdk_strcpy_pad(cdata->mn, MODEL_NUMBER, sizeof(cdata->mn), ' ');
+ spdk_strcpy_pad(cdata->sn, spdk_nvmf_subsystem_get_sn(subsystem), sizeof(cdata->sn), ' ');
+ cdata->kas = 10;
+
+ cdata->rab = 6;
+ cdata->cmic.multi_port = 1;
+ cdata->cmic.multi_host = 1;
+ cdata->oaes.ns_attribute_notices = 1;
+ cdata->ctratt.host_id_exhid_supported = 1;
+ cdata->aerl = 0;
+ cdata->frmw.slot1_ro = 1;
+ cdata->frmw.num_slots = 1;
+
+ cdata->lpa.celp = 1; /* Command Effects log page supported */
+
+ cdata->sqes.min = 6;
+ cdata->sqes.max = 6;
+ cdata->cqes.min = 4;
+ cdata->cqes.max = 4;
+ cdata->nn = subsystem->max_nsid;
+ cdata->vwc.present = 1;
+ cdata->vwc.flush_broadcast = SPDK_NVME_FLUSH_BROADCAST_NOT_SUPPORTED;
+
+ cdata->nvmf_specific.ioccsz = sizeof(struct spdk_nvme_cmd) / 16;
+ cdata->nvmf_specific.iorcsz = sizeof(struct spdk_nvme_cpl) / 16;
+ cdata->nvmf_specific.icdoff = 0; /* offset starts directly after SQE */
+ cdata->nvmf_specific.ctrattr.ctrlr_model = SPDK_NVMF_CTRLR_MODEL_DYNAMIC;
+ cdata->nvmf_specific.msdbd = 1; /* target supports single SGL in capsule */
+
+ /* TODO: this should be set by the transport */
+ cdata->nvmf_specific.ioccsz += transport->opts.in_capsule_data_size / 16;
+
+ cdata->oncs.dsm = spdk_nvmf_ctrlr_dsm_supported(ctrlr);
+ cdata->oncs.write_zeroes = spdk_nvmf_ctrlr_write_zeroes_supported(ctrlr);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: ioccsz 0x%x\n",
+ cdata->nvmf_specific.ioccsz);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: iorcsz 0x%x\n",
+ cdata->nvmf_specific.iorcsz);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: icdoff 0x%x\n",
+ cdata->nvmf_specific.icdoff);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: ctrattr 0x%x\n",
+ *(uint8_t *)&cdata->nvmf_specific.ctrattr);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: msdbd 0x%x\n",
+ cdata->nvmf_specific.msdbd);
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_identify_active_ns_list(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvme_cmd *cmd,
+ struct spdk_nvme_cpl *rsp,
+ struct spdk_nvme_ns_list *ns_list)
+{
+ struct spdk_nvmf_ns *ns;
+ uint32_t count = 0;
+
+ if (cmd->nsid >= 0xfffffffeUL) {
+ SPDK_ERRLOG("Identify Active Namespace List with invalid NSID %u\n", cmd->nsid);
+ rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
+ ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
+ if (ns->opts.nsid <= cmd->nsid) {
+ continue;
+ }
+
+ ns_list->ns_list[count++] = ns->opts.nsid;
+ if (count == SPDK_COUNTOF(ns_list->ns_list)) {
+ break;
+ }
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static void
+_add_ns_id_desc(void **buf_ptr, size_t *buf_remain,
+ enum spdk_nvme_nidt type,
+ const void *data, size_t data_size)
+{
+ struct spdk_nvme_ns_id_desc *desc;
+ size_t desc_size = sizeof(*desc) + data_size;
+
+ /*
+ * These should never fail in practice, since all valid NS ID descriptors
+ * should be defined so that they fit in the available 4096-byte buffer.
+ */
+ assert(data_size > 0);
+ assert(data_size <= UINT8_MAX);
+ assert(desc_size < *buf_remain);
+ if (data_size == 0 || data_size > UINT8_MAX || desc_size > *buf_remain) {
+ return;
+ }
+
+ desc = *buf_ptr;
+ desc->nidt = type;
+ desc->nidl = data_size;
+ memcpy(desc->nid, data, data_size);
+
+ *buf_ptr += desc_size;
+ *buf_remain -= desc_size;
+}
+
+static int
+spdk_nvmf_ctrlr_identify_ns_id_descriptor_list(
+ struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvme_cmd *cmd,
+ struct spdk_nvme_cpl *rsp,
+ void *id_desc_list, size_t id_desc_list_size)
+{
+ struct spdk_nvmf_ns *ns;
+ size_t buf_remain = id_desc_list_size;
+ void *buf_ptr = id_desc_list;
+
+ ns = _spdk_nvmf_subsystem_get_ns(subsystem, cmd->nsid);
+ if (ns == NULL || ns->bdev == NULL) {
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+#define ADD_ID_DESC(type, data, size) \
+ do { \
+ if (!spdk_mem_all_zero(data, size)) { \
+ _add_ns_id_desc(&buf_ptr, &buf_remain, type, data, size); \
+ } \
+ } while (0)
+
+ ADD_ID_DESC(SPDK_NVME_NIDT_EUI64, ns->opts.eui64, sizeof(ns->opts.eui64));
+ ADD_ID_DESC(SPDK_NVME_NIDT_NGUID, ns->opts.nguid, sizeof(ns->opts.nguid));
+ ADD_ID_DESC(SPDK_NVME_NIDT_UUID, &ns->opts.uuid, sizeof(ns->opts.uuid));
+
+ /*
+ * The list is automatically 0-terminated because controller to host buffers in
+ * admin commands always get zeroed in spdk_nvmf_ctrlr_process_admin_cmd().
+ */
+
+#undef ADD_ID_DESC
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_identify(struct spdk_nvmf_request *req)
+{
+ uint8_t cns;
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
+
+ if (req->data == NULL || req->length < 4096) {
+ SPDK_ERRLOG("identify command with invalid buffer\n");
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ cns = cmd->cdw10 & 0xFF;
+
+ if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY &&
+ cns != SPDK_NVME_IDENTIFY_CTRLR) {
+ /* Discovery controllers only support Identify Controller */
+ goto invalid_cns;
+ }
+
+ switch (cns) {
+ case SPDK_NVME_IDENTIFY_NS:
+ return spdk_nvmf_ctrlr_identify_ns(ctrlr, cmd, rsp, req->data);
+ case SPDK_NVME_IDENTIFY_CTRLR:
+ return spdk_nvmf_ctrlr_identify_ctrlr(ctrlr, req->data);
+ case SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST:
+ return spdk_nvmf_ctrlr_identify_active_ns_list(subsystem, cmd, rsp, req->data);
+ case SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST:
+ return spdk_nvmf_ctrlr_identify_ns_id_descriptor_list(subsystem, cmd, rsp, req->data, req->length);
+ default:
+ goto invalid_cns;
+ }
+
+invalid_cns:
+ SPDK_ERRLOG("Identify command with unsupported CNS 0x%02x\n", cns);
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+
+static struct spdk_nvmf_request *
+spdk_nvmf_qpair_abort(struct spdk_nvmf_qpair *qpair, uint16_t cid)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+ struct spdk_nvmf_request *req;
+
+ if (spdk_nvmf_qpair_is_admin_queue(qpair)) {
+ if (ctrlr->aer_req && ctrlr->aer_req->cmd->nvme_cmd.cid == cid) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Aborting AER request\n");
+ req = ctrlr->aer_req;
+ ctrlr->aer_req = NULL;
+ return req;
+ }
+ }
+
+ /* TODO: track list of outstanding requests in qpair? */
+ return NULL;
+}
+
+static void
+spdk_nvmf_ctrlr_abort_done(struct spdk_io_channel_iter *i, int status)
+{
+ struct spdk_nvmf_request *req = spdk_io_channel_iter_get_ctx(i);
+
+ spdk_nvmf_request_complete(req);
+}
+
+static void
+spdk_nvmf_ctrlr_abort_on_pg(struct spdk_io_channel_iter *i)
+{
+ struct spdk_nvmf_request *req = spdk_io_channel_iter_get_ctx(i);
+ struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
+ struct spdk_nvmf_poll_group *group = spdk_io_channel_get_ctx(ch);
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ uint16_t sqid = cmd->cdw10 & 0xFFFFu;
+ struct spdk_nvmf_qpair *qpair;
+
+ TAILQ_FOREACH(qpair, &group->qpairs, link) {
+ if (qpair->ctrlr == req->qpair->ctrlr && qpair->qid == sqid) {
+ struct spdk_nvmf_request *req_to_abort;
+ uint16_t cid = cmd->cdw10 >> 16;
+
+ /* Found the qpair */
+
+ req_to_abort = spdk_nvmf_qpair_abort(qpair, cid);
+ if (req_to_abort == NULL) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "cid %u not found\n", cid);
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ spdk_for_each_channel_continue(i, -EINVAL);
+ return;
+ }
+
+ /* Complete the request with aborted status */
+ req_to_abort->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req_to_abort->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
+ spdk_nvmf_request_complete(req_to_abort);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "abort ctrlr=%p req=%p sqid=%u cid=%u successful\n",
+ qpair->ctrlr, req_to_abort, sqid, cid);
+ rsp->cdw0 = 0; /* Command successfully aborted */
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_SUCCESS;
+ /* Return -1 for the status so the iteration across threads stops. */
+ spdk_for_each_channel_continue(i, -1);
+
+ }
+ }
+
+ spdk_for_each_channel_continue(i, 0);
+}
+
+static int
+spdk_nvmf_ctrlr_abort(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+ rsp->cdw0 = 1; /* Command not aborted */
+ rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
+
+ /* Send a message to each poll group, searching for this ctrlr, sqid, and command. */
+ spdk_for_each_channel(req->qpair->ctrlr->subsys->tgt,
+ spdk_nvmf_ctrlr_abort_on_pg,
+ req,
+ spdk_nvmf_ctrlr_abort_done
+ );
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+static int
+get_features_generic(struct spdk_nvmf_request *req, uint32_t cdw0)
+{
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+ rsp->cdw0 = cdw0;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_get_features(struct spdk_nvmf_request *req)
+{
+ uint8_t feature;
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+
+ feature = cmd->cdw10 & 0xff; /* mask out the FID value */
+ switch (feature) {
+ case SPDK_NVME_FEAT_ARBITRATION:
+ return get_features_generic(req, ctrlr->feat.arbitration.raw);
+ case SPDK_NVME_FEAT_POWER_MANAGEMENT:
+ return get_features_generic(req, ctrlr->feat.power_management.raw);
+ case SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD:
+ return spdk_nvmf_ctrlr_get_features_temperature_threshold(req);
+ case SPDK_NVME_FEAT_ERROR_RECOVERY:
+ return get_features_generic(req, ctrlr->feat.error_recovery.raw);
+ case SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE:
+ return get_features_generic(req, ctrlr->feat.volatile_write_cache.raw);
+ case SPDK_NVME_FEAT_NUMBER_OF_QUEUES:
+ return get_features_generic(req, ctrlr->feat.number_of_queues.raw);
+ case SPDK_NVME_FEAT_WRITE_ATOMICITY:
+ return get_features_generic(req, ctrlr->feat.write_atomicity.raw);
+ case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
+ return get_features_generic(req, ctrlr->feat.async_event_configuration.raw);
+ case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER:
+ return get_features_generic(req, ctrlr->feat.keep_alive_timer.raw);
+ case SPDK_NVME_FEAT_HOST_IDENTIFIER:
+ return spdk_nvmf_ctrlr_get_features_host_identifier(req);
+ default:
+ SPDK_ERRLOG("Get Features command with unsupported feature ID 0x%02x\n", feature);
+ response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+}
+
+static int
+spdk_nvmf_ctrlr_set_features(struct spdk_nvmf_request *req)
+{
+ uint8_t feature;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+
+ feature = cmd->cdw10 & 0xff; /* mask out the FID value */
+ switch (feature) {
+ case SPDK_NVME_FEAT_ARBITRATION:
+ return spdk_nvmf_ctrlr_set_features_arbitration(req);
+ case SPDK_NVME_FEAT_POWER_MANAGEMENT:
+ return spdk_nvmf_ctrlr_set_features_power_management(req);
+ case SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD:
+ return spdk_nvmf_ctrlr_set_features_temperature_threshold(req);
+ case SPDK_NVME_FEAT_ERROR_RECOVERY:
+ return spdk_nvmf_ctrlr_set_features_error_recovery(req);
+ case SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE:
+ return spdk_nvmf_ctrlr_set_features_volatile_write_cache(req);
+ case SPDK_NVME_FEAT_NUMBER_OF_QUEUES:
+ return spdk_nvmf_ctrlr_set_features_number_of_queues(req);
+ case SPDK_NVME_FEAT_WRITE_ATOMICITY:
+ return spdk_nvmf_ctrlr_set_features_write_atomicity(req);
+ case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
+ return spdk_nvmf_ctrlr_set_features_async_event_configuration(req);
+ case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER:
+ return spdk_nvmf_ctrlr_set_features_keep_alive_timer(req);
+ case SPDK_NVME_FEAT_HOST_IDENTIFIER:
+ return spdk_nvmf_ctrlr_set_features_host_identifier(req);
+ default:
+ SPDK_ERRLOG("Set Features command with unsupported feature ID 0x%02x\n", feature);
+ response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+}
+
+static int
+spdk_nvmf_ctrlr_keep_alive(struct spdk_nvmf_request *req)
+{
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Keep Alive\n");
+ /*
+ * To handle keep alive just clear or reset the
+ * ctrlr based keep alive duration counter.
+ * When added, a separate timer based process
+ * will monitor if the time since last recorded
+ * keep alive has exceeded the max duration and
+ * take appropriate action.
+ */
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+int
+spdk_nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+
+ if (ctrlr == NULL) {
+ SPDK_ERRLOG("Admin command sent before CONNECT\n");
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (ctrlr->vcprop.cc.bits.en != 1) {
+ SPDK_ERRLOG("Admin command sent to disabled controller\n");
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (req->data && spdk_nvme_opc_get_data_transfer(cmd->opc) == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+ memset(req->data, 0, req->length);
+ }
+
+ if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
+ /* Discovery controllers only support Get Log Page and Identify */
+ switch (cmd->opc) {
+ case SPDK_NVME_OPC_IDENTIFY:
+ case SPDK_NVME_OPC_GET_LOG_PAGE:
+ break;
+ default:
+ goto invalid_opcode;
+ }
+ }
+
+ switch (cmd->opc) {
+ case SPDK_NVME_OPC_GET_LOG_PAGE:
+ return spdk_nvmf_ctrlr_get_log_page(req);
+ case SPDK_NVME_OPC_IDENTIFY:
+ return spdk_nvmf_ctrlr_identify(req);
+ case SPDK_NVME_OPC_ABORT:
+ return spdk_nvmf_ctrlr_abort(req);
+ case SPDK_NVME_OPC_GET_FEATURES:
+ return spdk_nvmf_ctrlr_get_features(req);
+ case SPDK_NVME_OPC_SET_FEATURES:
+ return spdk_nvmf_ctrlr_set_features(req);
+ case SPDK_NVME_OPC_ASYNC_EVENT_REQUEST:
+ return spdk_nvmf_ctrlr_async_event_request(req);
+ case SPDK_NVME_OPC_KEEP_ALIVE:
+ return spdk_nvmf_ctrlr_keep_alive(req);
+
+ case SPDK_NVME_OPC_CREATE_IO_SQ:
+ case SPDK_NVME_OPC_CREATE_IO_CQ:
+ case SPDK_NVME_OPC_DELETE_IO_SQ:
+ case SPDK_NVME_OPC_DELETE_IO_CQ:
+ /* Create and Delete I/O CQ/SQ not allowed in NVMe-oF */
+ goto invalid_opcode;
+
+ default:
+ goto invalid_opcode;
+ }
+
+invalid_opcode:
+ SPDK_ERRLOG("Unsupported admin opcode 0x%x\n", cmd->opc);
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_INVALID_OPCODE;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+int
+spdk_nvmf_ctrlr_process_fabrics_cmd(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ struct spdk_nvmf_capsule_cmd *cap_hdr;
+
+ cap_hdr = &req->cmd->nvmf_cmd;
+
+ if (qpair->ctrlr == NULL) {
+ /* No ctrlr established yet; the only valid command is Connect */
+ if (cap_hdr->fctype == SPDK_NVMF_FABRIC_COMMAND_CONNECT) {
+ return spdk_nvmf_ctrlr_connect(req);
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Got fctype 0x%x, expected Connect\n",
+ cap_hdr->fctype);
+ req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+ } else if (spdk_nvmf_qpair_is_admin_queue(qpair)) {
+ /*
+ * Controller session is established, and this is an admin queue.
+ * Disallow Connect and allow other fabrics commands.
+ */
+ switch (cap_hdr->fctype) {
+ case SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET:
+ return spdk_nvmf_property_set(req);
+ case SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET:
+ return spdk_nvmf_property_get(req);
+ default:
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "unknown fctype 0x%02x\n",
+ cap_hdr->fctype);
+ req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+ } else {
+ /* Controller session is established, and this is an I/O queue */
+ /* For now, no I/O-specific Fabrics commands are implemented (other than Connect) */
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Unexpected I/O fctype 0x%x\n", cap_hdr->fctype);
+ req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+}
+
+int
+spdk_nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ struct spdk_nvmf_request *req;
+ struct spdk_nvme_cpl *rsp;
+ union spdk_nvme_async_event_completion event = {0};
+
+ /* Users may disable the event notification */
+ if (!ctrlr->feat.async_event_configuration.bits.ns_attr_notice) {
+ return 0;
+ }
+
+ event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE;
+ event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED;
+ event.bits.log_page_identifier = SPDK_NVME_LOG_CHANGED_NS_LIST;
+
+ /* If there is no outstanding AER request, queue the event. Then
+ * if an AER is later submitted, this event can be sent as a
+ * response.
+ */
+ if (!ctrlr->aer_req) {
+ if (ctrlr->notice_event.bits.async_event_type ==
+ SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) {
+ return 0;
+ }
+
+ ctrlr->notice_event.raw = event.raw;
+ return 0;
+ }
+
+ req = ctrlr->aer_req;
+ rsp = &req->rsp->nvme_cpl;
+
+ rsp->cdw0 = event.raw;
+
+ spdk_nvmf_request_complete(req);
+ ctrlr->aer_req = NULL;
+
+ return 0;
+}
+
+void
+spdk_nvmf_qpair_free_aer(struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+
+ if (!spdk_nvmf_qpair_is_admin_queue(qpair)) {
+ return;
+ }
+
+ if (ctrlr->aer_req != NULL) {
+ spdk_nvmf_request_free(ctrlr->aer_req);
+ ctrlr->aer_req = NULL;
+ }
+}
+
+void
+spdk_nvmf_ctrlr_abort_aer(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ if (!ctrlr->aer_req) {
+ return;
+ }
+
+ spdk_nvmf_request_complete(ctrlr->aer_req);
+ ctrlr->aer_req = NULL;
+}
diff --git a/src/spdk/lib/nvmf/ctrlr_bdev.c b/src/spdk/lib/nvmf/ctrlr_bdev.c
new file mode 100644
index 00000000..7eb4f19a
--- /dev/null
+++ b/src/spdk/lib/nvmf/ctrlr_bdev.c
@@ -0,0 +1,531 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "nvmf_internal.h"
+
+#include "spdk/bdev.h"
+#include "spdk/endian.h"
+#include "spdk/thread.h"
+#include "spdk/likely.h"
+#include "spdk/nvme.h"
+#include "spdk/nvmf_spec.h"
+#include "spdk/trace.h"
+#include "spdk/scsi_spec.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+
+#include "spdk_internal/log.h"
+
+static bool
+spdk_nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
+ enum spdk_bdev_io_type io_type)
+{
+ struct spdk_nvmf_ns *ns;
+
+ for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
+ ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
+ if (ns->bdev == NULL) {
+ continue;
+ }
+
+ if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF,
+ "Subsystem %s namespace %u (%s) does not support io_type %d\n",
+ spdk_nvmf_subsystem_get_nqn(subsystem),
+ ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
+ return false;
+ }
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "All devices in Subsystem %s support io_type %d\n",
+ spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
+ return true;
+}
+
+bool
+spdk_nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ return spdk_nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
+}
+
+bool
+spdk_nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ return spdk_nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
+}
+
+static void
+nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
+ void *cb_arg)
+{
+ struct spdk_nvmf_request *req = cb_arg;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+ int sc, sct;
+
+ spdk_bdev_io_get_nvme_status(bdev_io, &sct, &sc);
+ response->status.sc = sc;
+ response->status.sct = sct;
+
+ spdk_nvmf_request_complete(req);
+ spdk_bdev_free_io(bdev_io);
+}
+
+void
+spdk_nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata)
+{
+ struct spdk_bdev *bdev = ns->bdev;
+ uint64_t num_blocks;
+
+ num_blocks = spdk_bdev_get_num_blocks(bdev);
+
+ nsdata->nsze = num_blocks;
+ nsdata->ncap = num_blocks;
+ nsdata->nuse = num_blocks;
+ nsdata->nlbaf = 0;
+ nsdata->flbas.format = 0;
+ nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev));
+ nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
+ nsdata->nmic.can_share = 1;
+
+ SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
+ memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
+
+ SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
+ memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
+}
+
+static void
+nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
+ uint64_t *num_blocks)
+{
+ /* SLBA: CDW10 and CDW11 */
+ *start_lba = from_le64(&cmd->cdw10);
+
+ /* NLB: CDW12 bits 15:00, 0's based */
+ *num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
+}
+
+static bool
+nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
+ uint64_t io_num_blocks)
+{
+ if (io_start_lba + io_num_blocks > bdev_num_blocks ||
+ io_start_lba + io_num_blocks < io_start_lba) {
+ return false;
+ }
+
+ return true;
+}
+
+static void
+spdk_nvmf_ctrlr_process_io_cmd_resubmit(void *arg)
+{
+ struct spdk_nvmf_request *req = arg;
+
+ spdk_nvmf_ctrlr_process_io_cmd(req);
+}
+
+static void
+nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
+ struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg)
+{
+ int rc;
+
+ req->bdev_io_wait.bdev = bdev;
+ req->bdev_io_wait.cb_fn = cb_fn;
+ req->bdev_io_wait.cb_arg = cb_arg;
+
+ rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait);
+ if (rc != 0) {
+ assert(false);
+ }
+}
+
+static int
+nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
+{
+ uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
+ uint32_t block_size = spdk_bdev_get_block_size(bdev);
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ uint64_t start_lba;
+ uint64_t num_blocks;
+ int rc;
+
+ nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
+
+ if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
+ SPDK_ERRLOG("end of media\n");
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (spdk_unlikely(num_blocks * block_size > req->length)) {
+ SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
+ num_blocks, block_size, req->length);
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ rc = spdk_bdev_readv_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
+ nvmf_bdev_ctrlr_complete_cmd, req);
+ if (spdk_unlikely(rc)) {
+ if (rc == -ENOMEM) {
+ nvmf_bdev_ctrl_queue_io(req, bdev, ch, spdk_nvmf_ctrlr_process_io_cmd_resubmit, req);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+static int
+nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
+{
+ uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
+ uint32_t block_size = spdk_bdev_get_block_size(bdev);
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ uint64_t start_lba;
+ uint64_t num_blocks;
+ int rc;
+
+ nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
+
+ if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
+ SPDK_ERRLOG("end of media\n");
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (spdk_unlikely(num_blocks * block_size > req->length)) {
+ SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
+ num_blocks, block_size, req->length);
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ rc = spdk_bdev_writev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
+ nvmf_bdev_ctrlr_complete_cmd, req);
+ if (spdk_unlikely(rc)) {
+ if (rc == -ENOMEM) {
+ nvmf_bdev_ctrl_queue_io(req, bdev, ch, spdk_nvmf_ctrlr_process_io_cmd_resubmit, req);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+static int
+nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
+{
+ uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ uint64_t start_lba;
+ uint64_t num_blocks;
+ int rc;
+
+ nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
+
+ if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
+ SPDK_ERRLOG("end of media\n");
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
+ nvmf_bdev_ctrlr_complete_cmd, req);
+ if (spdk_unlikely(rc)) {
+ if (rc == -ENOMEM) {
+ nvmf_bdev_ctrl_queue_io(req, bdev, ch, spdk_nvmf_ctrlr_process_io_cmd_resubmit, req);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+static int
+nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
+{
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+ int rc;
+
+ /* As for NVMeoF controller, SPDK always set volatile write
+ * cache bit to 1, return success for those block devices
+ * which can't support FLUSH command.
+ */
+ if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_SUCCESS;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
+ nvmf_bdev_ctrlr_complete_cmd, req);
+ if (spdk_unlikely(rc)) {
+ if (rc == -ENOMEM) {
+ nvmf_bdev_ctrl_queue_io(req, bdev, ch, spdk_nvmf_ctrlr_process_io_cmd_resubmit, req);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+ response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+struct nvmf_virtual_ctrlr_unmap {
+ struct spdk_nvmf_request *req;
+ uint32_t count;
+ struct spdk_bdev_desc *desc;
+ struct spdk_bdev *bdev;
+ struct spdk_io_channel *ch;
+};
+
+static void
+nvmf_virtual_ctrlr_dsm_cpl(struct spdk_bdev_io *bdev_io, bool success,
+ void *cb_arg)
+{
+ struct nvmf_virtual_ctrlr_unmap *unmap_ctx = cb_arg;
+ struct spdk_nvmf_request *req = unmap_ctx->req;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+ int sc, sct;
+
+ unmap_ctx->count--;
+
+ if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
+ response->status.sc == SPDK_NVME_SC_SUCCESS) {
+ spdk_bdev_io_get_nvme_status(bdev_io, &sct, &sc);
+ response->status.sc = sc;
+ response->status.sct = sct;
+ }
+
+ if (unmap_ctx->count == 0) {
+ spdk_nvmf_request_complete(req);
+ free(unmap_ctx);
+ }
+ spdk_bdev_free_io(bdev_io);
+}
+
+static int
+nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
+ struct nvmf_virtual_ctrlr_unmap *unmap_ctx);
+static void
+nvmf_bdev_ctrlr_dsm_cmd_resubmit(void *arg)
+{
+ struct nvmf_virtual_ctrlr_unmap *unmap_ctx = arg;
+ struct spdk_nvmf_request *req = unmap_ctx->req;
+ struct spdk_bdev_desc *desc = unmap_ctx->desc;
+ struct spdk_bdev *bdev = unmap_ctx->bdev;
+ struct spdk_io_channel *ch = unmap_ctx->ch;
+
+ nvmf_bdev_ctrlr_dsm_cmd(bdev, desc, ch, req, unmap_ctx);
+}
+
+static int
+nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
+ struct nvmf_virtual_ctrlr_unmap *unmap_ctx)
+{
+ uint32_t attribute;
+ uint16_t nr, i;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+ int rc;
+
+ nr = ((cmd->cdw10 & 0x000000ff) + 1);
+ if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
+ SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
+ response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ attribute = cmd->cdw11 & 0x00000007;
+ if (attribute & SPDK_NVME_DSM_ATTR_DEALLOCATE) {
+ struct spdk_nvme_dsm_range *dsm_range;
+ uint64_t lba;
+ uint32_t lba_count;
+
+ if (unmap_ctx == NULL) {
+ unmap_ctx = calloc(1, sizeof(*unmap_ctx));
+ if (!unmap_ctx) {
+ response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ unmap_ctx->req = req;
+ unmap_ctx->desc = desc;
+ unmap_ctx->ch = ch;
+ }
+
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_SUCCESS;
+
+ dsm_range = (struct spdk_nvme_dsm_range *)req->data;
+ for (i = unmap_ctx->count; i < nr; i++) {
+ lba = dsm_range[i].starting_lba;
+ lba_count = dsm_range[i].length;
+
+ unmap_ctx->count++;
+
+ rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
+ nvmf_virtual_ctrlr_dsm_cpl, unmap_ctx);
+ if (rc) {
+ if (rc == -ENOMEM) {
+ nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_dsm_cmd_resubmit, unmap_ctx);
+ /* Unmap was not yet submitted to bdev */
+ unmap_ctx->count--;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+ response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ unmap_ctx->count--;
+ /* We can't return here - we may have to wait for any other
+ * unmaps already sent to complete */
+ break;
+ }
+ }
+
+ if (unmap_ctx->count == 0) {
+ free(unmap_ctx);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_SUCCESS;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
+{
+ int rc;
+
+ rc = spdk_bdev_nvme_io_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
+ nvmf_bdev_ctrlr_complete_cmd, req);
+ if (spdk_unlikely(rc)) {
+ if (rc == -ENOMEM) {
+ nvmf_bdev_ctrl_queue_io(req, bdev, ch, spdk_nvmf_ctrlr_process_io_cmd_resubmit, req);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+ req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+int
+spdk_nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req)
+{
+ uint32_t nsid;
+ struct spdk_nvmf_ns *ns;
+ struct spdk_bdev *bdev;
+ struct spdk_bdev_desc *desc;
+ struct spdk_io_channel *ch;
+ struct spdk_nvmf_poll_group *group = req->qpair->group;
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+
+ /* pre-set response details for this command */
+ response->status.sc = SPDK_NVME_SC_SUCCESS;
+ nsid = cmd->nsid;
+
+ if (spdk_unlikely(ctrlr == NULL)) {
+ SPDK_ERRLOG("I/O command sent before CONNECT\n");
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (spdk_unlikely(ctrlr->vcprop.cc.bits.en != 1)) {
+ SPDK_ERRLOG("I/O command sent to disabled controller\n");
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ ns = _spdk_nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
+ if (ns == NULL || ns->bdev == NULL) {
+ SPDK_ERRLOG("Unsuccessful query for nsid %u\n", cmd->nsid);
+ response->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
+ response->status.dnr = 1;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ bdev = ns->bdev;
+ desc = ns->desc;
+ ch = group->sgroups[ctrlr->subsys->id].channels[nsid - 1];
+ switch (cmd->opc) {
+ case SPDK_NVME_OPC_READ:
+ return nvmf_bdev_ctrlr_read_cmd(bdev, desc, ch, req);
+ case SPDK_NVME_OPC_WRITE:
+ return nvmf_bdev_ctrlr_write_cmd(bdev, desc, ch, req);
+ case SPDK_NVME_OPC_WRITE_ZEROES:
+ return nvmf_bdev_ctrlr_write_zeroes_cmd(bdev, desc, ch, req);
+ case SPDK_NVME_OPC_FLUSH:
+ return nvmf_bdev_ctrlr_flush_cmd(bdev, desc, ch, req);
+ case SPDK_NVME_OPC_DATASET_MANAGEMENT:
+ return nvmf_bdev_ctrlr_dsm_cmd(bdev, desc, ch, req, NULL);
+ default:
+ return nvmf_bdev_ctrlr_nvme_passthru_io(bdev, desc, ch, req);
+ }
+}
diff --git a/src/spdk/lib/nvmf/ctrlr_discovery.c b/src/spdk/lib/nvmf/ctrlr_discovery.c
new file mode 100644
index 00000000..305a6076
--- /dev/null
+++ b/src/spdk/lib/nvmf/ctrlr_discovery.c
@@ -0,0 +1,144 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NVMe over Fabrics discovery service
+ */
+
+#include "spdk/stdinc.h"
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+#include "spdk/event.h"
+#include "spdk/string.h"
+#include "spdk/trace.h"
+#include "spdk/nvmf_spec.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+static void
+nvmf_update_discovery_log(struct spdk_nvmf_tgt *tgt)
+{
+ uint64_t numrec = 0;
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_listener *listener;
+ struct spdk_nvmf_discovery_log_page_entry *entry;
+ struct spdk_nvmf_discovery_log_page *disc_log;
+ size_t cur_size;
+ uint32_t sid;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Generating log page for genctr %" PRIu64 "\n",
+ tgt->discovery_genctr);
+
+ cur_size = sizeof(struct spdk_nvmf_discovery_log_page);
+ disc_log = calloc(1, cur_size);
+ if (disc_log == NULL) {
+ SPDK_ERRLOG("Discovery log page memory allocation error\n");
+ return;
+ }
+
+ for (sid = 0; sid < tgt->opts.max_subsystems; sid++) {
+ subsystem = tgt->subsystems[sid];
+ if (subsystem == NULL) {
+ continue;
+ }
+
+ if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
+ continue;
+ }
+
+ for (listener = spdk_nvmf_subsystem_get_first_listener(subsystem); listener != NULL;
+ listener = spdk_nvmf_subsystem_get_next_listener(subsystem, listener)) {
+ size_t new_size = cur_size + sizeof(*entry);
+ void *new_log_page = realloc(disc_log, new_size);
+
+ if (new_log_page == NULL) {
+ SPDK_ERRLOG("Discovery log page memory allocation error\n");
+ break;
+ }
+
+ disc_log = new_log_page;
+ cur_size = new_size;
+
+ entry = &disc_log->entries[numrec];
+ memset(entry, 0, sizeof(*entry));
+ entry->portid = numrec;
+ entry->cntlid = 0xffff;
+ entry->asqsz = listener->transport->opts.max_aq_depth;
+ entry->subtype = subsystem->subtype;
+ snprintf(entry->subnqn, sizeof(entry->subnqn), "%s", subsystem->subnqn);
+
+ spdk_nvmf_transport_listener_discover(listener->transport, &listener->trid, entry);
+
+ numrec++;
+ }
+ }
+
+ disc_log->numrec = numrec;
+ disc_log->genctr = tgt->discovery_genctr;
+
+ free(tgt->discovery_log_page);
+
+ tgt->discovery_log_page = disc_log;
+ tgt->discovery_log_page_size = cur_size;
+}
+
+void
+spdk_nvmf_get_discovery_log_page(struct spdk_nvmf_tgt *tgt, void *buffer,
+ uint64_t offset, uint32_t length)
+{
+ size_t copy_len = 0;
+ size_t zero_len = length;
+
+ if (tgt->discovery_log_page == NULL ||
+ tgt->discovery_log_page->genctr != tgt->discovery_genctr) {
+ nvmf_update_discovery_log(tgt);
+ }
+
+ /* Copy the valid part of the discovery log page, if any */
+ if (tgt->discovery_log_page && offset < tgt->discovery_log_page_size) {
+ copy_len = spdk_min(tgt->discovery_log_page_size - offset, length);
+ zero_len -= copy_len;
+ memcpy(buffer, (char *)tgt->discovery_log_page + offset, copy_len);
+ }
+
+ /* Zero out the rest of the buffer */
+ if (zero_len) {
+ memset((char *)buffer + copy_len, 0, zero_len);
+ }
+
+ /* We should have copied or zeroed every byte of the output buffer. */
+ assert(copy_len + zero_len == length);
+}
diff --git a/src/spdk/lib/nvmf/nvmf.c b/src/spdk/lib/nvmf/nvmf.c
new file mode 100644
index 00000000..32539f53
--- /dev/null
+++ b/src/spdk/lib/nvmf/nvmf.c
@@ -0,0 +1,1173 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/bit_array.h"
+#include "spdk/conf.h"
+#include "spdk/thread.h"
+#include "spdk/nvmf.h"
+#include "spdk/trace.h"
+#include "spdk/endian.h"
+#include "spdk/string.h"
+
+#include "spdk_internal/log.h"
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+SPDK_LOG_REGISTER_COMPONENT("nvmf", SPDK_LOG_NVMF)
+
+#define SPDK_NVMF_DEFAULT_MAX_QUEUE_DEPTH 128
+#define SPDK_NVMF_DEFAULT_MAX_QPAIRS_PER_CTRLR 64
+#define SPDK_NVMF_DEFAULT_IN_CAPSULE_DATA_SIZE 4096
+#define SPDK_NVMF_DEFAULT_MAX_IO_SIZE 131072
+#define SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS 1024
+#define SPDK_NVMF_DEFAULT_IO_UNIT_SIZE 131072
+
+typedef void (*nvmf_qpair_disconnect_cpl)(void *ctx, int status);
+static void spdk_nvmf_tgt_destroy_poll_group(void *io_device, void *ctx_buf);
+
+/* supplied to a single call to nvmf_qpair_disconnect */
+struct nvmf_qpair_disconnect_ctx {
+ struct spdk_nvmf_qpair *qpair;
+ struct spdk_nvmf_ctrlr *ctrlr;
+ nvmf_qpair_disconnect_cb cb_fn;
+ struct spdk_thread *thread;
+ void *ctx;
+ uint16_t qid;
+};
+
+/*
+ * There are several times when we need to iterate through the list of all qpairs and selectively delete them.
+ * In order to do this sequentially without overlap, we must provide a context to recover the next qpair from
+ * to enable calling nvmf_qpair_disconnect on the next desired qpair.
+ */
+struct nvmf_qpair_disconnect_many_ctx {
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_poll_group *group;
+ spdk_nvmf_poll_group_mod_done cpl_fn;
+ void *cpl_ctx;
+};
+
+static void
+spdk_nvmf_qpair_set_state(struct spdk_nvmf_qpair *qpair,
+ enum spdk_nvmf_qpair_state state)
+{
+ assert(qpair != NULL);
+ assert(qpair->group->thread == spdk_get_thread());
+
+ qpair->state = state;
+}
+
+void
+spdk_nvmf_tgt_opts_init(struct spdk_nvmf_tgt_opts *opts)
+{
+ opts->max_queue_depth = SPDK_NVMF_DEFAULT_MAX_QUEUE_DEPTH;
+ opts->max_qpairs_per_ctrlr = SPDK_NVMF_DEFAULT_MAX_QPAIRS_PER_CTRLR;
+ opts->in_capsule_data_size = SPDK_NVMF_DEFAULT_IN_CAPSULE_DATA_SIZE;
+ opts->max_io_size = SPDK_NVMF_DEFAULT_MAX_IO_SIZE;
+ opts->max_subsystems = SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS;
+ opts->io_unit_size = SPDK_NVMF_DEFAULT_IO_UNIT_SIZE;
+}
+
+static int
+spdk_nvmf_poll_group_poll(void *ctx)
+{
+ struct spdk_nvmf_poll_group *group = ctx;
+ int rc;
+ int count = 0;
+ struct spdk_nvmf_transport_poll_group *tgroup;
+
+ TAILQ_FOREACH(tgroup, &group->tgroups, link) {
+ rc = spdk_nvmf_transport_poll_group_poll(tgroup);
+ if (rc < 0) {
+ return -1;
+ }
+ count += rc;
+ }
+
+ return count;
+}
+
+static int
+spdk_nvmf_tgt_create_poll_group(void *io_device, void *ctx_buf)
+{
+ struct spdk_nvmf_tgt *tgt = io_device;
+ struct spdk_nvmf_poll_group *group = ctx_buf;
+ struct spdk_nvmf_transport *transport;
+ uint32_t sid;
+
+ TAILQ_INIT(&group->tgroups);
+ TAILQ_INIT(&group->qpairs);
+
+ TAILQ_FOREACH(transport, &tgt->transports, link) {
+ spdk_nvmf_poll_group_add_transport(group, transport);
+ }
+
+ group->num_sgroups = tgt->opts.max_subsystems;
+ group->sgroups = calloc(tgt->opts.max_subsystems, sizeof(struct spdk_nvmf_subsystem_poll_group));
+ if (!group->sgroups) {
+ return -1;
+ }
+
+ for (sid = 0; sid < tgt->opts.max_subsystems; sid++) {
+ struct spdk_nvmf_subsystem *subsystem;
+
+ subsystem = tgt->subsystems[sid];
+ if (!subsystem) {
+ continue;
+ }
+
+ if (spdk_nvmf_poll_group_add_subsystem(group, subsystem, NULL, NULL) != 0) {
+ spdk_nvmf_tgt_destroy_poll_group(io_device, ctx_buf);
+ return -1;
+ }
+ }
+
+ group->poller = spdk_poller_register(spdk_nvmf_poll_group_poll, group, 0);
+ group->thread = spdk_get_thread();
+
+ return 0;
+}
+
+static void
+spdk_nvmf_tgt_destroy_poll_group(void *io_device, void *ctx_buf)
+{
+ struct spdk_nvmf_poll_group *group = ctx_buf;
+ struct spdk_nvmf_transport_poll_group *tgroup, *tmp;
+ struct spdk_nvmf_subsystem_poll_group *sgroup;
+ uint32_t sid, nsid;
+
+ TAILQ_FOREACH_SAFE(tgroup, &group->tgroups, link, tmp) {
+ TAILQ_REMOVE(&group->tgroups, tgroup, link);
+ spdk_nvmf_transport_poll_group_destroy(tgroup);
+ }
+
+ for (sid = 0; sid < group->num_sgroups; sid++) {
+ sgroup = &group->sgroups[sid];
+
+ for (nsid = 0; nsid < sgroup->num_channels; nsid++) {
+ if (sgroup->channels[nsid]) {
+ spdk_put_io_channel(sgroup->channels[nsid]);
+ sgroup->channels[nsid] = NULL;
+ }
+ }
+
+ free(sgroup->channels);
+ }
+
+ free(group->sgroups);
+}
+
+static void
+_nvmf_tgt_disconnect_next_qpair(void *ctx)
+{
+ struct spdk_nvmf_qpair *qpair;
+ struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
+ struct spdk_nvmf_poll_group *group = qpair_ctx->group;
+ struct spdk_io_channel *ch;
+ int rc = 0;
+
+ qpair = TAILQ_FIRST(&group->qpairs);
+
+ if (qpair) {
+ rc = spdk_nvmf_qpair_disconnect(qpair, _nvmf_tgt_disconnect_next_qpair, ctx);
+ }
+
+ if (!qpair || rc != 0) {
+ /* When the refcount from the channels reaches 0, spdk_nvmf_tgt_destroy_poll_group will be called. */
+ ch = spdk_io_channel_from_ctx(group);
+ spdk_put_io_channel(ch);
+ free(qpair_ctx);
+ }
+}
+
+static void
+spdk_nvmf_tgt_destroy_poll_group_qpairs(struct spdk_nvmf_poll_group *group)
+{
+ struct nvmf_qpair_disconnect_many_ctx *ctx;
+
+ ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_many_ctx));
+
+ if (!ctx) {
+ SPDK_ERRLOG("Failed to allocate memory for destroy poll group ctx\n");
+ return;
+ }
+
+ spdk_poller_unregister(&group->poller);
+
+ ctx->group = group;
+ _nvmf_tgt_disconnect_next_qpair(ctx);
+}
+
+struct spdk_nvmf_tgt *
+spdk_nvmf_tgt_create(struct spdk_nvmf_tgt_opts *opts)
+{
+ struct spdk_nvmf_tgt *tgt;
+
+ tgt = calloc(1, sizeof(*tgt));
+ if (!tgt) {
+ return NULL;
+ }
+
+ if (!opts) {
+ spdk_nvmf_tgt_opts_init(&tgt->opts);
+ } else {
+ tgt->opts = *opts;
+ }
+
+ tgt->discovery_genctr = 0;
+ tgt->discovery_log_page = NULL;
+ tgt->discovery_log_page_size = 0;
+ TAILQ_INIT(&tgt->transports);
+
+ tgt->subsystems = calloc(tgt->opts.max_subsystems, sizeof(struct spdk_nvmf_subsystem *));
+ if (!tgt->subsystems) {
+ free(tgt);
+ return NULL;
+ }
+
+ spdk_io_device_register(tgt,
+ spdk_nvmf_tgt_create_poll_group,
+ spdk_nvmf_tgt_destroy_poll_group,
+ sizeof(struct spdk_nvmf_poll_group),
+ "nvmf_tgt");
+
+ return tgt;
+}
+
+static void
+spdk_nvmf_tgt_destroy_cb(void *io_device)
+{
+ struct spdk_nvmf_tgt *tgt = io_device;
+ struct spdk_nvmf_transport *transport, *transport_tmp;
+ spdk_nvmf_tgt_destroy_done_fn *destroy_cb_fn;
+ void *destroy_cb_arg;
+ uint32_t i;
+
+ if (tgt->discovery_log_page) {
+ free(tgt->discovery_log_page);
+ }
+
+ if (tgt->subsystems) {
+ for (i = 0; i < tgt->opts.max_subsystems; i++) {
+ if (tgt->subsystems[i]) {
+ spdk_nvmf_subsystem_destroy(tgt->subsystems[i]);
+ }
+ }
+ free(tgt->subsystems);
+ }
+
+ TAILQ_FOREACH_SAFE(transport, &tgt->transports, link, transport_tmp) {
+ TAILQ_REMOVE(&tgt->transports, transport, link);
+ spdk_nvmf_transport_destroy(transport);
+ }
+
+ destroy_cb_fn = tgt->destroy_cb_fn;
+ destroy_cb_arg = tgt->destroy_cb_arg;
+
+ free(tgt);
+
+ if (destroy_cb_fn) {
+ destroy_cb_fn(destroy_cb_arg, 0);
+ }
+}
+
+void
+spdk_nvmf_tgt_destroy(struct spdk_nvmf_tgt *tgt,
+ spdk_nvmf_tgt_destroy_done_fn cb_fn,
+ void *cb_arg)
+{
+ tgt->destroy_cb_fn = cb_fn;
+ tgt->destroy_cb_arg = cb_arg;
+
+ spdk_io_device_unregister(tgt, spdk_nvmf_tgt_destroy_cb);
+}
+
+static void
+spdk_nvmf_write_subsystem_config_json(struct spdk_json_write_ctx *w,
+ struct spdk_nvmf_subsystem *subsystem)
+{
+ struct spdk_nvmf_host *host;
+ struct spdk_nvmf_listener *listener;
+ const struct spdk_nvme_transport_id *trid;
+ struct spdk_nvmf_ns *ns;
+ struct spdk_nvmf_ns_opts ns_opts;
+ uint32_t max_namespaces;
+ char uuid_str[SPDK_UUID_STRING_LEN];
+ const char *trtype;
+ const char *adrfam;
+
+ if (spdk_nvmf_subsystem_get_type(subsystem) != SPDK_NVMF_SUBTYPE_NVME) {
+ return;
+ }
+
+ /* { */
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "method", "nvmf_subsystem_create");
+
+ /* "params" : { */
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
+ spdk_json_write_named_bool(w, "allow_any_host", spdk_nvmf_subsystem_get_allow_any_host(subsystem));
+ spdk_json_write_named_string(w, "serial_number", spdk_nvmf_subsystem_get_sn(subsystem));
+
+ max_namespaces = spdk_nvmf_subsystem_get_max_namespaces(subsystem);
+ if (max_namespaces != 0) {
+ spdk_json_write_named_uint32(w, "max_namespaces", max_namespaces);
+ }
+
+ /* } "params" */
+ spdk_json_write_object_end(w);
+
+ /* } */
+ spdk_json_write_object_end(w);
+
+ for (listener = spdk_nvmf_subsystem_get_first_listener(subsystem); listener != NULL;
+ listener = spdk_nvmf_subsystem_get_next_listener(subsystem, listener)) {
+ trid = spdk_nvmf_listener_get_trid(listener);
+
+ trtype = spdk_nvme_transport_id_trtype_str(trid->trtype);
+ adrfam = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
+
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_listener");
+
+ /* "params" : { */
+ spdk_json_write_named_object_begin(w, "params");
+
+ spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
+
+ /* "listen_address" : { */
+ spdk_json_write_named_object_begin(w, "listen_address");
+
+ spdk_json_write_named_string(w, "trtype", trtype);
+ if (adrfam) {
+ spdk_json_write_named_string(w, "adrfam", adrfam);
+ }
+
+ spdk_json_write_named_string(w, "traddr", trid->traddr);
+ spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
+ /* } "listen_address" */
+ spdk_json_write_object_end(w);
+
+ /* } "params" */
+ spdk_json_write_object_end(w);
+
+ /* } */
+ spdk_json_write_object_end(w);
+ }
+
+ for (host = spdk_nvmf_subsystem_get_first_host(subsystem); host != NULL;
+ host = spdk_nvmf_subsystem_get_next_host(subsystem, host)) {
+
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_host");
+
+ /* "params" : { */
+ spdk_json_write_named_object_begin(w, "params");
+
+ spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
+ spdk_json_write_named_string(w, "host", spdk_nvmf_host_get_nqn(host));
+
+ /* } "params" */
+ spdk_json_write_object_end(w);
+
+ /* } */
+ spdk_json_write_object_end(w);
+ }
+
+ for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
+ ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
+ spdk_nvmf_ns_get_opts(ns, &ns_opts, sizeof(ns_opts));
+
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_ns");
+
+ /* "params" : { */
+ spdk_json_write_named_object_begin(w, "params");
+
+ spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
+
+ /* "namespace" : { */
+ spdk_json_write_named_object_begin(w, "namespace");
+
+ spdk_json_write_named_uint32(w, "nsid", spdk_nvmf_ns_get_id(ns));
+ spdk_json_write_named_string(w, "bdev_name", spdk_bdev_get_name(spdk_nvmf_ns_get_bdev(ns)));
+
+ if (!spdk_mem_all_zero(ns_opts.nguid, sizeof(ns_opts.nguid))) {
+ SPDK_STATIC_ASSERT(sizeof(ns_opts.nguid) == sizeof(uint64_t) * 2, "size mismatch");
+ spdk_json_write_named_string_fmt(w, "nguid", "%016"PRIX64"%016"PRIX64, from_be64(&ns_opts.nguid[0]),
+ from_be64(&ns_opts.nguid[8]));
+ }
+
+ if (!spdk_mem_all_zero(ns_opts.eui64, sizeof(ns_opts.eui64))) {
+ SPDK_STATIC_ASSERT(sizeof(ns_opts.eui64) == sizeof(uint64_t), "size mismatch");
+ spdk_json_write_named_string_fmt(w, "eui64", "%016"PRIX64, from_be64(&ns_opts.eui64));
+ }
+
+ if (!spdk_mem_all_zero(&ns_opts.uuid, sizeof(ns_opts.uuid))) {
+ spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &ns_opts.uuid);
+ spdk_json_write_named_string(w, "uuid", uuid_str);
+ }
+
+ /* "namespace" */
+ spdk_json_write_object_end(w);
+
+ /* } "params" */
+ spdk_json_write_object_end(w);
+
+ /* } */
+ spdk_json_write_object_end(w);
+ }
+}
+
+void
+spdk_nvmf_tgt_write_config_json(struct spdk_json_write_ctx *w, struct spdk_nvmf_tgt *tgt)
+{
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_transport *transport;
+
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "method", "set_nvmf_target_options");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_uint32(w, "max_queue_depth", tgt->opts.max_queue_depth);
+ spdk_json_write_named_uint32(w, "max_qpairs_per_ctrlr", tgt->opts.max_qpairs_per_ctrlr);
+ spdk_json_write_named_uint32(w, "in_capsule_data_size", tgt->opts.in_capsule_data_size);
+ spdk_json_write_named_uint32(w, "max_io_size", tgt->opts.max_io_size);
+ spdk_json_write_named_uint32(w, "max_subsystems", tgt->opts.max_subsystems);
+ spdk_json_write_named_uint32(w, "io_unit_size", tgt->opts.io_unit_size);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+
+ /* write transports */
+ TAILQ_FOREACH(transport, &tgt->transports, link) {
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "method", "nvmf_create_transport");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "trtype", spdk_nvme_transport_id_trtype_str(transport->ops->type));
+ spdk_json_write_named_uint32(w, "max_queue_depth", transport->opts.max_queue_depth);
+ spdk_json_write_named_uint32(w, "max_qpairs_per_ctrlr", transport->opts.max_qpairs_per_ctrlr);
+ spdk_json_write_named_uint32(w, "in_capsule_data_size", transport->opts.in_capsule_data_size);
+ spdk_json_write_named_uint32(w, "max_io_size", transport->opts.max_io_size);
+ spdk_json_write_named_uint32(w, "io_unit_size", transport->opts.io_unit_size);
+ spdk_json_write_named_uint32(w, "max_aq_depth", transport->opts.max_aq_depth);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+ }
+
+ subsystem = spdk_nvmf_subsystem_get_first(tgt);
+ while (subsystem) {
+ spdk_nvmf_write_subsystem_config_json(w, subsystem);
+ subsystem = spdk_nvmf_subsystem_get_next(subsystem);
+ }
+}
+
+void
+spdk_nvmf_tgt_listen(struct spdk_nvmf_tgt *tgt,
+ struct spdk_nvme_transport_id *trid,
+ spdk_nvmf_tgt_listen_done_fn cb_fn,
+ void *cb_arg)
+{
+ struct spdk_nvmf_transport *transport;
+ int rc;
+ bool propagate = false;
+
+ transport = spdk_nvmf_tgt_get_transport(tgt, trid->trtype);
+ if (!transport) {
+ struct spdk_nvmf_transport_opts opts;
+
+ opts.max_queue_depth = tgt->opts.max_queue_depth;
+ opts.max_qpairs_per_ctrlr = tgt->opts.max_qpairs_per_ctrlr;
+ opts.in_capsule_data_size = tgt->opts.in_capsule_data_size;
+ opts.max_io_size = tgt->opts.max_io_size;
+ opts.io_unit_size = tgt->opts.io_unit_size;
+ /* use max_queue depth since tgt. opts. doesn't have max_aq_depth */
+ opts.max_aq_depth = tgt->opts.max_queue_depth;
+
+ transport = spdk_nvmf_transport_create(trid->trtype, &opts);
+ if (!transport) {
+ SPDK_ERRLOG("Transport initialization failed\n");
+ cb_fn(cb_arg, -EINVAL);
+ return;
+ }
+
+ propagate = true;
+ }
+
+ rc = spdk_nvmf_transport_listen(transport, trid);
+ if (rc < 0) {
+ SPDK_ERRLOG("Unable to listen on address '%s'\n", trid->traddr);
+ cb_fn(cb_arg, rc);
+ return;
+ }
+
+ tgt->discovery_genctr++;
+
+ if (propagate) {
+ spdk_nvmf_tgt_add_transport(tgt, transport, cb_fn, cb_arg);
+ } else {
+ cb_fn(cb_arg, 0);
+ }
+}
+
+struct spdk_nvmf_tgt_add_transport_ctx {
+ struct spdk_nvmf_tgt *tgt;
+ struct spdk_nvmf_transport *transport;
+ spdk_nvmf_tgt_add_transport_done_fn cb_fn;
+ void *cb_arg;
+};
+
+static void
+_spdk_nvmf_tgt_add_transport_done(struct spdk_io_channel_iter *i, int status)
+{
+ struct spdk_nvmf_tgt_add_transport_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+
+ ctx->cb_fn(ctx->cb_arg, status);
+
+ free(ctx);
+}
+
+static void
+_spdk_nvmf_tgt_add_transport(struct spdk_io_channel_iter *i)
+{
+ struct spdk_nvmf_tgt_add_transport_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+ struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
+ struct spdk_nvmf_poll_group *group = spdk_io_channel_get_ctx(ch);
+ int rc;
+
+ rc = spdk_nvmf_poll_group_add_transport(group, ctx->transport);
+ spdk_for_each_channel_continue(i, rc);
+}
+
+void spdk_nvmf_tgt_add_transport(struct spdk_nvmf_tgt *tgt,
+ struct spdk_nvmf_transport *transport,
+ spdk_nvmf_tgt_add_transport_done_fn cb_fn,
+ void *cb_arg)
+{
+ struct spdk_nvmf_tgt_add_transport_ctx *ctx;
+
+ if (spdk_nvmf_tgt_get_transport(tgt, transport->ops->type)) {
+ cb_fn(cb_arg, -EEXIST);
+ return; /* transport already created */
+ }
+
+ transport->tgt = tgt;
+ TAILQ_INSERT_TAIL(&tgt->transports, transport, link);
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ cb_fn(cb_arg, -ENOMEM);
+ return;
+ }
+
+ ctx->tgt = tgt;
+ ctx->transport = transport;
+ ctx->cb_fn = cb_fn;
+ ctx->cb_arg = cb_arg;
+
+ spdk_for_each_channel(tgt,
+ _spdk_nvmf_tgt_add_transport,
+ ctx,
+ _spdk_nvmf_tgt_add_transport_done);
+}
+
+struct spdk_nvmf_subsystem *
+spdk_nvmf_tgt_find_subsystem(struct spdk_nvmf_tgt *tgt, const char *subnqn)
+{
+ struct spdk_nvmf_subsystem *subsystem;
+ uint32_t sid;
+
+ if (!subnqn) {
+ return NULL;
+ }
+
+ for (sid = 0; sid < tgt->opts.max_subsystems; sid++) {
+ subsystem = tgt->subsystems[sid];
+ if (subsystem == NULL) {
+ continue;
+ }
+
+ if (strcmp(subnqn, subsystem->subnqn) == 0) {
+ return subsystem;
+ }
+ }
+
+ return NULL;
+}
+
+struct spdk_nvmf_transport *
+spdk_nvmf_tgt_get_transport(struct spdk_nvmf_tgt *tgt, enum spdk_nvme_transport_type type)
+{
+ struct spdk_nvmf_transport *transport;
+
+ TAILQ_FOREACH(transport, &tgt->transports, link) {
+ if (transport->ops->type == type) {
+ return transport;
+ }
+ }
+
+ return NULL;
+}
+
+void
+spdk_nvmf_tgt_accept(struct spdk_nvmf_tgt *tgt, new_qpair_fn cb_fn)
+{
+ struct spdk_nvmf_transport *transport, *tmp;
+
+ TAILQ_FOREACH_SAFE(transport, &tgt->transports, link, tmp) {
+ spdk_nvmf_transport_accept(transport, cb_fn);
+ }
+}
+
+struct spdk_nvmf_poll_group *
+spdk_nvmf_poll_group_create(struct spdk_nvmf_tgt *tgt)
+{
+ struct spdk_io_channel *ch;
+
+ ch = spdk_get_io_channel(tgt);
+ if (!ch) {
+ SPDK_ERRLOG("Unable to get I/O channel for target\n");
+ return NULL;
+ }
+
+ return spdk_io_channel_get_ctx(ch);
+}
+
+void
+spdk_nvmf_poll_group_destroy(struct spdk_nvmf_poll_group *group)
+{
+ /* This function will put the io_channel associated with this poll group */
+ spdk_nvmf_tgt_destroy_poll_group_qpairs(group);
+}
+
+int
+spdk_nvmf_poll_group_add(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_qpair *qpair)
+{
+ int rc = -1;
+ struct spdk_nvmf_transport_poll_group *tgroup;
+
+ TAILQ_INIT(&qpair->outstanding);
+ qpair->group = group;
+ spdk_nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_ACTIVATING);
+
+ TAILQ_INSERT_TAIL(&group->qpairs, qpair, link);
+
+ TAILQ_FOREACH(tgroup, &group->tgroups, link) {
+ if (tgroup->transport == qpair->transport) {
+ rc = spdk_nvmf_transport_poll_group_add(tgroup, qpair);
+ break;
+ }
+ }
+
+ if (rc == 0) {
+ spdk_nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_ACTIVE);
+ } else {
+ spdk_nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_INACTIVE);
+ }
+
+ return rc;
+}
+
+static
+void _nvmf_ctrlr_destruct(void *ctx)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = ctx;
+
+ spdk_nvmf_ctrlr_destruct(ctrlr);
+}
+
+static void
+_spdk_nvmf_ctrlr_free_from_qpair(void *ctx)
+{
+ struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
+ struct spdk_nvmf_ctrlr *ctrlr = qpair_ctx->ctrlr;
+ uint32_t count;
+
+ spdk_bit_array_clear(ctrlr->qpair_mask, qpair_ctx->qid);
+ count = spdk_bit_array_count_set(ctrlr->qpair_mask);
+ if (count == 0) {
+ spdk_bit_array_free(&ctrlr->qpair_mask);
+
+ spdk_thread_send_msg(ctrlr->subsys->thread, _nvmf_ctrlr_destruct, ctrlr);
+ }
+
+ if (qpair_ctx->cb_fn) {
+ spdk_thread_send_msg(qpair_ctx->thread, qpair_ctx->cb_fn, qpair_ctx->ctx);
+ }
+ free(qpair_ctx);
+}
+
+static void
+_spdk_nvmf_qpair_destroy(void *ctx, int status)
+{
+ struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
+ struct spdk_nvmf_qpair *qpair = qpair_ctx->qpair;
+ struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+
+ assert(qpair->state == SPDK_NVMF_QPAIR_DEACTIVATING);
+ spdk_nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_INACTIVE);
+ qpair_ctx->qid = qpair->qid;
+
+ TAILQ_REMOVE(&qpair->group->qpairs, qpair, link);
+ qpair->group = NULL;
+
+ spdk_nvmf_transport_qpair_fini(qpair);
+
+ if (!ctrlr || !ctrlr->thread) {
+ if (qpair_ctx->cb_fn) {
+ spdk_thread_send_msg(qpair_ctx->thread, qpair_ctx->cb_fn, qpair_ctx->ctx);
+ }
+ free(qpair_ctx);
+ return;
+ }
+
+ qpair_ctx->ctrlr = ctrlr;
+ spdk_thread_send_msg(ctrlr->thread, _spdk_nvmf_ctrlr_free_from_qpair, qpair_ctx);
+
+}
+
+int
+spdk_nvmf_qpair_disconnect(struct spdk_nvmf_qpair *qpair, nvmf_qpair_disconnect_cb cb_fn, void *ctx)
+{
+ struct nvmf_qpair_disconnect_ctx *qpair_ctx;
+
+ /* If we get a qpair in the uninitialized state, we can just destroy it immediately */
+ if (qpair->state == SPDK_NVMF_QPAIR_UNINITIALIZED) {
+ spdk_nvmf_transport_qpair_fini(qpair);
+ if (cb_fn) {
+ cb_fn(ctx);
+ }
+ return 0;
+ }
+
+ /* The queue pair must be disconnected from the thread that owns it */
+ assert(qpair->group->thread == spdk_get_thread());
+
+ if (qpair->state == SPDK_NVMF_QPAIR_DEACTIVATING ||
+ qpair->state == SPDK_NVMF_QPAIR_INACTIVE) {
+ /* This can occur if the connection is killed by the target,
+ * which results in a notification that the connection
+ * died. Send a message to defer the processing of this
+ * callback. This allows the stack to unwind in the case
+ * where a bunch of connections are disconnected in
+ * a loop. */
+ if (cb_fn) {
+ spdk_thread_send_msg(qpair->group->thread, cb_fn, ctx);
+ }
+ return 0;
+ }
+
+ assert(qpair->state == SPDK_NVMF_QPAIR_ACTIVE);
+ spdk_nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_DEACTIVATING);
+
+ qpair_ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_ctx));
+ if (!qpair_ctx) {
+ SPDK_ERRLOG("Unable to allocate context for nvmf_qpair_disconnect\n");
+ return -ENOMEM;
+ }
+
+ qpair_ctx->qpair = qpair;
+ qpair_ctx->cb_fn = cb_fn;
+ qpair_ctx->thread = qpair->group->thread;
+ qpair_ctx->ctx = ctx;
+
+ /* Check for outstanding I/O */
+ if (!TAILQ_EMPTY(&qpair->outstanding)) {
+ qpair->state_cb = _spdk_nvmf_qpair_destroy;
+ qpair->state_cb_arg = qpair_ctx;
+ spdk_nvmf_qpair_free_aer(qpair);
+ return 0;
+ }
+
+ _spdk_nvmf_qpair_destroy(qpair_ctx, 0);
+
+ return 0;
+}
+
+int
+spdk_nvmf_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ return spdk_nvmf_transport_qpair_get_peer_trid(qpair, trid);
+}
+
+int
+spdk_nvmf_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ return spdk_nvmf_transport_qpair_get_local_trid(qpair, trid);
+}
+
+int
+spdk_nvmf_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ return spdk_nvmf_transport_qpair_get_listen_trid(qpair, trid);
+}
+
+int
+spdk_nvmf_poll_group_add_transport(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_transport *transport)
+{
+ struct spdk_nvmf_transport_poll_group *tgroup;
+
+ TAILQ_FOREACH(tgroup, &group->tgroups, link) {
+ if (tgroup->transport == transport) {
+ /* Transport already in the poll group */
+ return 0;
+ }
+ }
+
+ tgroup = spdk_nvmf_transport_poll_group_create(transport);
+ if (!tgroup) {
+ SPDK_ERRLOG("Unable to create poll group for transport\n");
+ return -1;
+ }
+
+ TAILQ_INSERT_TAIL(&group->tgroups, tgroup, link);
+
+ return 0;
+}
+
+static int
+poll_group_update_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem)
+{
+ struct spdk_nvmf_subsystem_poll_group *sgroup;
+ uint32_t new_num_channels, old_num_channels;
+ uint32_t i;
+ struct spdk_nvmf_ns *ns;
+
+ /* Make sure our poll group has memory for this subsystem allocated */
+ if (subsystem->id >= group->num_sgroups) {
+ return -ENOMEM;
+ }
+
+ sgroup = &group->sgroups[subsystem->id];
+
+ /* Make sure the array of channels is the correct size */
+ new_num_channels = subsystem->max_nsid;
+ old_num_channels = sgroup->num_channels;
+
+ if (old_num_channels == 0) {
+ if (new_num_channels > 0) {
+ /* First allocation */
+ sgroup->channels = calloc(new_num_channels, sizeof(sgroup->channels[0]));
+ if (!sgroup->channels) {
+ return -ENOMEM;
+ }
+ }
+ } else if (new_num_channels > old_num_channels) {
+ void *buf;
+
+ /* Make the array larger */
+ buf = realloc(sgroup->channels, new_num_channels * sizeof(sgroup->channels[0]));
+ if (!buf) {
+ return -ENOMEM;
+ }
+
+ sgroup->channels = buf;
+
+ /* Null out the new channels slots */
+ for (i = old_num_channels; i < new_num_channels; i++) {
+ sgroup->channels[i] = NULL;
+ }
+ } else if (new_num_channels < old_num_channels) {
+ void *buf;
+
+ /* Free the extra I/O channels */
+ for (i = new_num_channels; i < old_num_channels; i++) {
+ if (sgroup->channels[i]) {
+ spdk_put_io_channel(sgroup->channels[i]);
+ sgroup->channels[i] = NULL;
+ }
+ }
+
+ /* Make the array smaller */
+ if (new_num_channels > 0) {
+ buf = realloc(sgroup->channels, new_num_channels * sizeof(sgroup->channels[0]));
+ if (!buf) {
+ return -ENOMEM;
+ }
+ sgroup->channels = buf;
+ } else {
+ free(sgroup->channels);
+ sgroup->channels = NULL;
+ }
+ }
+
+ sgroup->num_channels = new_num_channels;
+
+ /* Detect bdevs that were added or removed */
+ for (i = 0; i < sgroup->num_channels; i++) {
+ ns = subsystem->ns[i];
+ if (ns == NULL && sgroup->channels[i] == NULL) {
+ /* Both NULL. Leave empty */
+ } else if (ns == NULL && sgroup->channels[i] != NULL) {
+ /* There was a channel here, but the namespace is gone. */
+ spdk_put_io_channel(sgroup->channels[i]);
+ sgroup->channels[i] = NULL;
+ } else if (ns != NULL && sgroup->channels[i] == NULL) {
+ /* A namespace appeared but there is no channel yet */
+ sgroup->channels[i] = spdk_bdev_get_io_channel(ns->desc);
+ if (sgroup->channels[i] == NULL) {
+ SPDK_ERRLOG("Could not allocate I/O channel.\n");
+ return -ENOMEM;
+ }
+ } else {
+ /* A namespace was present before and didn't change. */
+ }
+ }
+
+ return 0;
+}
+
+int
+spdk_nvmf_poll_group_update_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem)
+{
+ return poll_group_update_subsystem(group, subsystem);
+}
+
+int
+spdk_nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
+{
+ int rc = 0;
+ struct spdk_nvmf_subsystem_poll_group *sgroup = &group->sgroups[subsystem->id];
+
+ TAILQ_INIT(&sgroup->queued);
+
+ rc = poll_group_update_subsystem(group, subsystem);
+ if (rc) {
+ spdk_nvmf_poll_group_remove_subsystem(group, subsystem, NULL, NULL);
+ goto fini;
+ }
+
+ sgroup->state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
+fini:
+ if (cb_fn) {
+ cb_fn(cb_arg, rc);
+ }
+
+ return rc;
+}
+
+static void
+_nvmf_poll_group_remove_subsystem_cb(void *ctx, int status)
+{
+ struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_poll_group *group;
+ struct spdk_nvmf_subsystem_poll_group *sgroup;
+ spdk_nvmf_poll_group_mod_done cpl_fn = NULL;
+ void *cpl_ctx = NULL;
+ uint32_t nsid;
+
+ group = qpair_ctx->group;
+ subsystem = qpair_ctx->subsystem;
+ cpl_fn = qpair_ctx->cpl_fn;
+ cpl_ctx = qpair_ctx->cpl_ctx;
+ sgroup = &group->sgroups[subsystem->id];
+
+ if (status) {
+ goto fini;
+ }
+
+ for (nsid = 0; nsid < sgroup->num_channels; nsid++) {
+ if (sgroup->channels[nsid]) {
+ spdk_put_io_channel(sgroup->channels[nsid]);
+ sgroup->channels[nsid] = NULL;
+ }
+ }
+
+ sgroup->num_channels = 0;
+ free(sgroup->channels);
+ sgroup->channels = NULL;
+fini:
+ free(qpair_ctx);
+ if (cpl_fn) {
+ cpl_fn(cpl_ctx, status);
+ }
+}
+
+static void
+_nvmf_subsystem_disconnect_next_qpair(void *ctx)
+{
+ struct spdk_nvmf_qpair *qpair;
+ struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_poll_group *group;
+ int rc = 0;
+
+ group = qpair_ctx->group;
+ subsystem = qpair_ctx->subsystem;
+
+ TAILQ_FOREACH(qpair, &group->qpairs, link) {
+ if (qpair->ctrlr->subsys == subsystem) {
+ break;
+ }
+ }
+
+ if (qpair) {
+ rc = spdk_nvmf_qpair_disconnect(qpair, _nvmf_subsystem_disconnect_next_qpair, qpair_ctx);
+ }
+
+ if (!qpair || rc != 0) {
+ _nvmf_poll_group_remove_subsystem_cb(ctx, rc);
+ }
+ return;
+}
+
+void
+spdk_nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
+{
+ struct spdk_nvmf_qpair *qpair;
+ struct spdk_nvmf_subsystem_poll_group *sgroup;
+ struct nvmf_qpair_disconnect_many_ctx *ctx;
+ int rc = 0;
+
+ ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_many_ctx));
+
+ if (!ctx) {
+ SPDK_ERRLOG("Unable to allocate memory for context to remove poll subsystem\n");
+ goto fini;
+ }
+
+ ctx->group = group;
+ ctx->subsystem = subsystem;
+ ctx->cpl_fn = cb_fn;
+ ctx->cpl_ctx = cb_arg;
+
+ sgroup = &group->sgroups[subsystem->id];
+ sgroup->state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
+
+ TAILQ_FOREACH(qpair, &group->qpairs, link) {
+ if (qpair->ctrlr->subsys == subsystem) {
+ break;
+ }
+ }
+
+ if (qpair) {
+ rc = spdk_nvmf_qpair_disconnect(qpair, _nvmf_subsystem_disconnect_next_qpair, ctx);
+ } else {
+ /* call the callback immediately. It will handle any channel iteration */
+ _nvmf_poll_group_remove_subsystem_cb(ctx, 0);
+ }
+
+ if (rc != 0) {
+ free(ctx);
+ goto fini;
+ }
+
+ return;
+fini:
+ if (cb_fn) {
+ cb_fn(cb_arg, rc);
+ }
+}
+
+void
+spdk_nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
+{
+ struct spdk_nvmf_subsystem_poll_group *sgroup;
+ int rc = 0;
+
+ if (subsystem->id >= group->num_sgroups) {
+ rc = -1;
+ goto fini;
+ }
+
+ sgroup = &group->sgroups[subsystem->id];
+ if (sgroup == NULL) {
+ rc = -1;
+ goto fini;
+ }
+
+ assert(sgroup->state == SPDK_NVMF_SUBSYSTEM_ACTIVE);
+ /* TODO: This currently does not quiesce I/O */
+ sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
+fini:
+ if (cb_fn) {
+ cb_fn(cb_arg, rc);
+ }
+}
+
+void
+spdk_nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
+{
+ struct spdk_nvmf_request *req, *tmp;
+ struct spdk_nvmf_subsystem_poll_group *sgroup;
+ int rc = 0;
+
+ if (subsystem->id >= group->num_sgroups) {
+ rc = -1;
+ goto fini;
+ }
+
+ sgroup = &group->sgroups[subsystem->id];
+
+ assert(sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSED);
+
+ rc = poll_group_update_subsystem(group, subsystem);
+ if (rc) {
+ goto fini;
+ }
+
+ sgroup->state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
+
+ /* Release all queued requests */
+ TAILQ_FOREACH_SAFE(req, &sgroup->queued, link, tmp) {
+ TAILQ_REMOVE(&sgroup->queued, req, link);
+ spdk_nvmf_request_exec(req);
+ }
+fini:
+ if (cb_fn) {
+ cb_fn(cb_arg, rc);
+ }
+}
diff --git a/src/spdk/lib/nvmf/nvmf_fc.h b/src/spdk/lib/nvmf/nvmf_fc.h
new file mode 100644
index 00000000..bf086831
--- /dev/null
+++ b/src/spdk/lib/nvmf/nvmf_fc.h
@@ -0,0 +1,871 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (c) 2018 Broadcom. All Rights Reserved.
+ * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __NVMF_FC_H__
+#define __NVMF_FC_H__
+
+#include "spdk/nvmf.h"
+#include "spdk/assert.h"
+#include "spdk/nvme_spec.h"
+#include "spdk/nvmf_fc_spec.h"
+#include "spdk/event.h"
+#include "spdk/io_channel.h"
+#include "nvmf_internal.h"
+
+#define SPDK_NVMF_FC_TR_ADDR_LEN 64
+
+/*
+ * FC HW port states.
+ */
+enum spdk_fc_port_state {
+ SPDK_FC_PORT_OFFLINE = 0,
+ SPDK_FC_PORT_ONLINE = 1,
+ SPDK_FC_PORT_QUIESCED = 2,
+};
+
+enum spdk_fc_hwqp_state {
+ SPDK_FC_HWQP_OFFLINE = 0,
+ SPDK_FC_HWQP_ONLINE = 1,
+};
+
+/*
+ * NVMF BCM FC Object state
+ * Add all the generic states of the object here.
+ * Specific object states can be added separately
+ */
+enum spdk_nvmf_fc_object_state {
+ SPDK_NVMF_FC_OBJECT_CREATED = 0,
+ SPDK_NVMF_FC_OBJECT_TO_BE_DELETED = 1,
+ SPDK_NVMF_FC_OBJECT_ZOMBIE = 2, /* Partial Create or Delete */
+};
+
+/*
+ * FC request state
+ */
+enum spdk_nvmf_fc_request_state {
+ SPDK_NVMF_FC_REQ_INIT = 0,
+ SPDK_NVMF_FC_REQ_READ_BDEV,
+ SPDK_NVMF_FC_REQ_READ_XFER,
+ SPDK_NVMF_FC_REQ_READ_RSP,
+ SPDK_NVMF_FC_REQ_WRITE_BUFFS,
+ SPDK_NVMF_FC_REQ_WRITE_XFER,
+ SPDK_NVMF_FC_REQ_WRITE_BDEV,
+ SPDK_NVMF_FC_REQ_WRITE_RSP,
+ SPDK_NVMF_FC_REQ_NONE_BDEV,
+ SPDK_NVMF_FC_REQ_NONE_RSP,
+ SPDK_NVMF_FC_REQ_SUCCESS,
+ SPDK_NVMF_FC_REQ_FAILED,
+ SPDK_NVMF_FC_REQ_ABORTED,
+ SPDK_NVMF_FC_REQ_PENDING,
+ SPDK_NVMF_FC_REQ_MAX_STATE,
+};
+
+/*
+ * FC HWQP pointer
+ */
+typedef void *spdk_nvmf_fc_lld_hwqp_t;
+
+/*
+ * FC World Wide Name
+ */
+struct spdk_nvmf_fc_wwn {
+ union {
+ uint64_t wwn; /* World Wide Names consist of eight bytes */
+ uint8_t octets[sizeof(uint64_t)];
+ } u;
+};
+
+/*
+ * Generic DMA buffer descriptor
+ */
+struct spdk_nvmf_fc_buffer_desc {
+ void *virt;
+ uint64_t phys;
+ size_t len;
+
+ /* Internal */
+ uint32_t buf_index;
+};
+
+/*
+ * ABTS hadling context
+ */
+struct spdk_nvmf_fc_abts_ctx {
+ bool handled;
+ uint16_t hwqps_responded;
+ uint16_t rpi;
+ uint16_t oxid;
+ uint16_t rxid;
+ struct spdk_nvmf_fc_nport *nport;
+ uint16_t nport_hdl;
+ uint8_t port_hdl;
+ void *abts_poller_args;
+ void *sync_poller_args;
+ int num_hwqps;
+ bool queue_synced;
+ uint64_t u_id;
+ struct spdk_nvmf_fc_hwqp *ls_hwqp;
+ uint16_t fcp_rq_id;
+};
+
+/*
+ * NVME FC transport errors
+ */
+struct spdk_nvmf_fc_errors {
+ uint32_t no_xri;
+ uint32_t nport_invalid;
+ uint32_t unknown_frame;
+ uint32_t wqe_cmplt_err;
+ uint32_t wqe_write_err;
+ uint32_t rq_status_err;
+ uint32_t rq_buf_len_err;
+ uint32_t rq_id_err;
+ uint32_t rq_index_err;
+ uint32_t invalid_cq_type;
+ uint32_t invalid_cq_id;
+ uint32_t fc_req_buf_err;
+ uint32_t aq_buf_alloc_err;
+ uint32_t write_buf_alloc_err;
+ uint32_t read_buf_alloc_err;
+ uint32_t unexpected_err;
+ uint32_t nvme_cmd_iu_err;
+ uint32_t nvme_cmd_xfer_err;
+ uint32_t queue_entry_invalid;
+ uint32_t invalid_conn_err;
+ uint32_t fcp_rsp_failure;
+ uint32_t write_failed;
+ uint32_t read_failed;
+ uint32_t rport_invalid;
+ uint32_t num_aborted;
+ uint32_t num_abts_sent;
+};
+
+/*
+ * Send Single Request/Response Sequence.
+ */
+struct spdk_nvmf_fc_send_srsr {
+ struct spdk_nvmf_fc_buffer_desc rqst;
+ struct spdk_nvmf_fc_buffer_desc rsp;
+ struct spdk_nvmf_fc_buffer_desc sgl; /* Note: Len = (2 * bcm_sge_t) */
+ uint16_t rpi;
+};
+
+/*
+ * Struct representing a nport
+ */
+struct spdk_nvmf_fc_nport {
+
+ uint16_t nport_hdl;
+ uint8_t port_hdl;
+ uint32_t d_id;
+ enum spdk_nvmf_fc_object_state nport_state;
+ struct spdk_nvmf_fc_wwn fc_nodename;
+ struct spdk_nvmf_fc_wwn fc_portname;
+
+ /* list of remote ports (i.e. initiators) connected to nport */
+ TAILQ_HEAD(, spdk_nvmf_fc_remote_port_info) rem_port_list;
+ uint32_t rport_count;
+
+ void *vendor_data; /* available for vendor use */
+
+ /* list of associations to nport */
+ TAILQ_HEAD(, spdk_nvmf_fc_association) fc_associations;
+ uint32_t assoc_count;
+ struct spdk_nvmf_fc_port *fc_port;
+ TAILQ_ENTRY(spdk_nvmf_fc_nport) link; /* list of nports on a hw port. */
+};
+
+/*
+ * NVMF FC Connection
+ */
+struct spdk_nvmf_fc_conn {
+ struct spdk_nvmf_qpair qpair;
+
+ uint64_t conn_id;
+ struct spdk_nvmf_fc_hwqp *hwqp;
+ uint16_t esrp_ratio;
+ uint16_t rsp_count;
+ uint32_t rsn;
+
+ /* The maximum number of I/O outstanding on this connection at one time */
+ uint16_t max_queue_depth;
+ uint16_t max_rw_depth;
+ /* The current number of I/O outstanding on this connection. This number
+ * includes all I/O from the time the capsule is first received until it is
+ * completed.
+ */
+ uint16_t cur_queue_depth;
+
+ /* number of read/write requests that are outstanding */
+ uint16_t cur_fc_rw_depth;
+
+ /* requests that are waiting to obtain xri/buffer */
+ TAILQ_HEAD(, spdk_nvmf_fc_request) pending_queue;
+
+ struct spdk_nvmf_fc_association *fc_assoc;
+
+ /* additional FC info here - TBD */
+ uint16_t rpi;
+
+ /* for association's connection list */
+ TAILQ_ENTRY(spdk_nvmf_fc_conn) assoc_link;
+
+ /* for assocations's available connection list */
+ TAILQ_ENTRY(spdk_nvmf_fc_conn) assoc_avail_link;
+
+ /* for hwqp's connection list */
+ TAILQ_ENTRY(spdk_nvmf_fc_conn) link;
+};
+
+/*
+ * Structure for maintaining the XRI's
+ */
+struct spdk_nvmf_fc_xri {
+ uint32_t xri; /* The actual xri value */
+ /* Internal */
+ TAILQ_ENTRY(spdk_nvmf_fc_xri) link;
+ bool is_active;
+};
+
+struct spdk_nvmf_fc_poll_group;
+
+/*
+ * HWQP poller structure passed from Master thread
+ */
+struct spdk_nvmf_fc_hwqp {
+ uint32_t lcore_id; /* core hwqp is running on (for tracing purposes only) */
+ struct spdk_thread *thread; /* thread hwqp is running on */
+ uint32_t hwqp_id; /* A unique id (per physical port) for a hwqp */
+ uint32_t rq_size; /* receive queue size */
+ spdk_nvmf_fc_lld_hwqp_t queues; /* vendor HW queue set */
+ struct spdk_nvmf_fc_port *fc_port; /* HW port structure for these queues */
+ struct spdk_nvmf_fc_poll_group *poll_group;
+
+ void *context; /* Vendor Context */
+
+ TAILQ_HEAD(, spdk_nvmf_fc_conn) connection_list;
+ uint32_t num_conns; /* number of connections to queue */
+ uint16_t cid_cnt; /* used to generate unique conn. id for RQ */
+ uint32_t free_q_slots; /* free q slots available for connections */
+ enum spdk_fc_hwqp_state state; /* Poller state (e.g. online, offline) */
+
+ /* Internal */
+ struct spdk_mempool *fc_request_pool;
+ TAILQ_HEAD(, spdk_nvmf_fc_request) in_use_reqs;
+
+ TAILQ_HEAD(, spdk_nvmf_fc_xri) pending_xri_list;
+
+ struct spdk_nvmf_fc_errors counters;
+ uint32_t send_frame_xri;
+ uint8_t send_frame_seqid;
+
+ /* Pending LS request waiting for XRI. */
+ TAILQ_HEAD(, spdk_nvmf_fc_ls_rqst) ls_pending_queue;
+
+ /* Sync req list */
+ TAILQ_HEAD(, spdk_nvmf_fc_poller_api_queue_sync_args) sync_cbs;
+
+ TAILQ_ENTRY(spdk_nvmf_fc_hwqp) link;
+};
+
+struct spdk_nvmf_fc_ls_rsrc_pool {
+ void *assocs_mptr;
+ uint32_t assocs_count;
+ TAILQ_HEAD(, spdk_nvmf_fc_association) assoc_free_list;
+
+ void *conns_mptr;
+ uint32_t conns_count;
+ TAILQ_HEAD(, spdk_nvmf_fc_conn) fc_conn_free_list;
+};
+
+/*
+ * FC HW port.
+ */
+struct spdk_nvmf_fc_port {
+ uint8_t port_hdl;
+ enum spdk_fc_port_state hw_port_status;
+ uint32_t xri_base;
+ uint32_t xri_count;
+ uint16_t fcp_rq_id;
+ struct spdk_ring *xri_ring;
+ struct spdk_nvmf_fc_hwqp ls_queue;
+ uint32_t num_io_queues;
+ struct spdk_nvmf_fc_hwqp *io_queues;
+ /*
+ * List of nports on this HW port.
+ */
+ TAILQ_HEAD(, spdk_nvmf_fc_nport)nport_list;
+ int num_nports;
+ TAILQ_ENTRY(spdk_nvmf_fc_port) link;
+
+ struct spdk_nvmf_fc_ls_rsrc_pool ls_rsrc_pool;
+ struct spdk_mempool *io_rsrc_pool; /* Pools to store bdev_io's for this port */
+ void *port_ctx;
+};
+
+/*
+ * NVMF FC Request
+ */
+struct spdk_nvmf_fc_request {
+ struct spdk_nvmf_request req;
+ struct spdk_nvmf_fc_ersp_iu ersp;
+ uint32_t poller_lcore; /* for tracing purposes only */
+ struct spdk_thread *poller_thread;
+ uint16_t buf_index;
+ struct spdk_nvmf_fc_xri *xri;
+ uint16_t oxid;
+ uint16_t rpi;
+ struct spdk_nvmf_fc_conn *fc_conn;
+ struct spdk_nvmf_fc_hwqp *hwqp;
+ int state;
+ uint32_t transfered_len;
+ bool is_aborted;
+ uint32_t magic;
+ uint32_t s_id;
+ uint32_t d_id;
+ TAILQ_ENTRY(spdk_nvmf_fc_request) link;
+ TAILQ_ENTRY(spdk_nvmf_fc_request) pending_link;
+ TAILQ_HEAD(, spdk_nvmf_fc_caller_ctx) abort_cbs;
+};
+
+SPDK_STATIC_ASSERT(!offsetof(struct spdk_nvmf_fc_request, req),
+ "FC request and NVMF request address don't match.");
+
+/*
+ * NVMF FC Association
+ */
+struct spdk_nvmf_fc_association {
+ uint64_t assoc_id;
+ uint32_t s_id;
+ struct spdk_nvmf_fc_nport *tgtport;
+ struct spdk_nvmf_fc_remote_port_info *rport;
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_host *host;
+ enum spdk_nvmf_fc_object_state assoc_state;
+
+ char host_id[FCNVME_ASSOC_HOSTID_LEN];
+ char host_nqn[FCNVME_ASSOC_HOSTNQN_LEN];
+ char sub_nqn[FCNVME_ASSOC_HOSTNQN_LEN];
+
+ struct spdk_nvmf_fc_conn *aq_conn; /* connection for admin queue */
+
+ uint16_t conn_count;
+ TAILQ_HEAD(, spdk_nvmf_fc_conn) fc_conns;
+
+ void *conns_buf;
+ TAILQ_HEAD(, spdk_nvmf_fc_conn) avail_fc_conns;
+
+ TAILQ_ENTRY(spdk_nvmf_fc_association) link;
+
+ /* for port's association free list */
+ TAILQ_ENTRY(spdk_nvmf_fc_association) port_free_assoc_list_link;
+
+ void *ls_del_op_ctx; /* delete assoc. callback list */
+
+ /* req/resp buffers used to send disconnect to initiator */
+ struct spdk_nvmf_fc_send_srsr snd_disconn_bufs;
+};
+
+/*
+ * FC Remote Port
+ */
+struct spdk_nvmf_fc_remote_port_info {
+ uint32_t s_id;
+ uint32_t rpi;
+ uint32_t assoc_count;
+ struct spdk_nvmf_fc_wwn fc_nodename;
+ struct spdk_nvmf_fc_wwn fc_portname;
+ enum spdk_nvmf_fc_object_state rport_state;
+ TAILQ_ENTRY(spdk_nvmf_fc_remote_port_info) link;
+};
+
+/*
+ * Poller API error codes
+ */
+enum spdk_nvmf_fc_poller_api_ret {
+ SPDK_NVMF_FC_POLLER_API_SUCCESS = 0,
+ SPDK_NVMF_FC_POLLER_API_ERROR,
+ SPDK_NVMF_FC_POLLER_API_INVALID_ARG,
+ SPDK_NVMF_FC_POLLER_API_NO_CONN_ID,
+ SPDK_NVMF_FC_POLLER_API_DUP_CONN_ID,
+ SPDK_NVMF_FC_POLLER_API_OXID_NOT_FOUND,
+};
+
+/*
+ * Poller API definitions
+ */
+enum spdk_nvmf_fc_poller_api {
+ SPDK_NVMF_FC_POLLER_API_ADD_CONNECTION,
+ SPDK_NVMF_FC_POLLER_API_DEL_CONNECTION,
+ SPDK_NVMF_FC_POLLER_API_QUIESCE_QUEUE,
+ SPDK_NVMF_FC_POLLER_API_ACTIVATE_QUEUE,
+ SPDK_NVMF_FC_POLLER_API_ABTS_RECEIVED,
+ SPDK_NVMF_FC_POLLER_API_ADAPTER_EVENT,
+ SPDK_NVMF_FC_POLLER_API_AEN,
+ SPDK_NVMF_FC_POLLER_API_QUEUE_SYNC,
+ SPDK_NVMF_FC_POLLER_API_QUEUE_SYNC_DONE,
+};
+
+/*
+ * Poller API callback function proto
+ */
+typedef void (*spdk_nvmf_fc_poller_api_cb)(void *cb_data, enum spdk_nvmf_fc_poller_api_ret ret);
+
+/*
+ * Poller API callback data
+ */
+struct spdk_nvmf_fc_poller_api_cb_info {
+ spdk_nvmf_fc_poller_api_cb cb_func;
+ void *cb_data;
+ enum spdk_nvmf_fc_poller_api_ret ret;
+};
+
+/*
+ * Poller API structures
+ */
+struct spdk_nvmf_fc_poller_api_add_connection_args {
+ struct spdk_nvmf_fc_conn *fc_conn;
+ struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+};
+
+struct spdk_nvmf_fc_poller_api_del_connection_args {
+ struct spdk_nvmf_fc_conn *fc_conn;
+ struct spdk_nvmf_fc_hwqp *hwqp;
+ struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+ bool send_abts;
+ /* internal */
+ int fc_request_cnt;
+};
+
+struct spdk_nvmf_fc_poller_api_quiesce_queue_args {
+ void *ctx;
+ struct spdk_nvmf_fc_hwqp *hwqp;
+ struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+};
+
+struct spdk_nvmf_fc_poller_api_activate_queue_args {
+ struct spdk_nvmf_fc_hwqp *hwqp;
+ struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+};
+
+struct spdk_nvmf_fc_poller_api_abts_recvd_args {
+ struct spdk_nvmf_fc_abts_ctx *ctx;
+ struct spdk_nvmf_fc_hwqp *hwqp;
+ struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+};
+
+struct spdk_nvmf_fc_poller_api_queue_sync_done_args {
+ struct spdk_nvmf_fc_hwqp *hwqp;
+ uint64_t tag;
+};
+
+/*
+ * NVMF LS request structure
+ */
+struct spdk_nvmf_fc_ls_rqst {
+ struct spdk_nvmf_fc_buffer_desc rqstbuf;
+ struct spdk_nvmf_fc_buffer_desc rspbuf;
+ uint32_t rqst_len;
+ uint32_t rsp_len;
+ uint32_t rpi;
+ struct spdk_nvmf_fc_xri *xri;
+ uint16_t oxid;
+ void *private_data; /* for LLD only (LS does not touch) */
+ TAILQ_ENTRY(spdk_nvmf_fc_ls_rqst) ls_pending_link;
+ uint32_t s_id;
+ uint32_t d_id;
+ struct spdk_nvmf_fc_nport *nport;
+ struct spdk_nvmf_fc_remote_port_info *rport;
+ struct spdk_nvmf_tgt *nvmf_tgt;
+};
+
+/*
+ * RQ Buffer LS Overlay Structure
+ */
+#define FCNVME_LS_RSVD_SIZE (FCNVME_MAX_LS_BUFFER_SIZE - \
+ (sizeof(struct spdk_nvmf_fc_ls_rqst) + FCNVME_MAX_LS_REQ_SIZE + FCNVME_MAX_LS_RSP_SIZE))
+
+struct __attribute__((__packed__)) spdk_nvmf_fc_rq_buf_ls_request {
+ uint8_t rqst[FCNVME_MAX_LS_REQ_SIZE];
+ uint8_t resp[FCNVME_MAX_LS_RSP_SIZE];
+ struct spdk_nvmf_fc_ls_rqst ls_rqst;
+ uint8_t rsvd[FCNVME_LS_RSVD_SIZE];
+};
+
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_rq_buf_ls_request) ==
+ FCNVME_MAX_LS_BUFFER_SIZE, "LS RQ Buffer overflow");
+
+
+struct spdk_nvmf_fc_poller_api_queue_sync_args {
+ uint64_t u_id;
+ struct spdk_nvmf_fc_hwqp *hwqp;
+ struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+
+ /* Used internally by poller */
+ TAILQ_ENTRY(spdk_nvmf_fc_poller_api_queue_sync_args) link;
+};
+
+/*
+ * dump info
+ */
+struct spdk_nvmf_fc_queue_dump_info {
+ char *buffer;
+ int offset;
+};
+#define SPDK_FC_HW_DUMP_BUF_SIZE (10 * 4096)
+
+static inline void
+spdk_nvmf_fc_dump_buf_print(struct spdk_nvmf_fc_queue_dump_info *dump_info, char *fmt, ...)
+{
+ uint64_t buffer_size = SPDK_FC_HW_DUMP_BUF_SIZE;
+ int32_t avail = (int32_t)(buffer_size - dump_info->offset);
+
+ if (avail > 0) {
+ va_list ap;
+ int32_t written;
+
+ va_start(ap, fmt);
+ written = vsnprintf(dump_info->buffer + dump_info->offset, avail, fmt, ap);
+ if (written >= avail) {
+ dump_info->offset += avail;
+ } else {
+ dump_info->offset += written;
+ }
+ va_end(ap);
+ }
+}
+
+/*
+ * NVMF FC caller callback definitions
+ */
+typedef void (*spdk_nvmf_fc_caller_cb)(void *hwqp, int32_t status, void *args);
+
+struct spdk_nvmf_fc_caller_ctx {
+ void *ctx;
+ spdk_nvmf_fc_caller_cb cb;
+ void *cb_args;
+ TAILQ_ENTRY(spdk_nvmf_fc_caller_ctx) link;
+};
+
+/*
+ * Low level FC driver function table (functions provided by vendor FC device driver)
+ */
+struct spdk_nvmf_fc_ll_drvr_ops {
+
+ /* initialize the low level driver */
+ int (*lld_init)(void);
+
+ /* low level driver finish */
+ void (*lld_fini)(void);
+
+ /* initialize hw queues */
+ int (*init_q)(struct spdk_nvmf_fc_hwqp *hwqp);
+
+ void (*reinit_q)(spdk_nvmf_fc_lld_hwqp_t queues_prev,
+ spdk_nvmf_fc_lld_hwqp_t queues_curr);
+
+ /* initialize hw queue buffers */
+ int (*init_q_buffers)(struct spdk_nvmf_fc_hwqp *hwqp);
+
+ /* poll the hw queues for requests */
+ uint32_t (*poll_queue)(struct spdk_nvmf_fc_hwqp *hwqp);
+
+ /* receive data (for data-in requests) */
+ int (*recv_data)(struct spdk_nvmf_fc_request *fc_req);
+
+ /* send data (for data-out requests) */
+ int (*send_data)(struct spdk_nvmf_fc_request *fc_req);
+
+ /* release hw queust buffer */
+ void (*q_buffer_release)(struct spdk_nvmf_fc_hwqp *hwqp, uint16_t buff_idx);
+
+ /* transmist nvme response */
+ int (*xmt_rsp)(struct spdk_nvmf_fc_request *fc_req, uint8_t *ersp_buf, uint32_t ersp_len);
+
+ /* transmist LS response */
+ int (*xmt_ls_rsp)(struct spdk_nvmf_fc_nport *tgtport, struct spdk_nvmf_fc_ls_rqst *ls_rqst);
+
+ /* issue abts */
+ int (*issue_abort)(struct spdk_nvmf_fc_hwqp *hwqp, struct spdk_nvmf_fc_xri *xri,
+ bool send_abts, spdk_nvmf_fc_caller_cb cb, void *cb_args);
+
+ /* transmit abts response */
+ int (*xmt_bls_rsp)(struct spdk_nvmf_fc_hwqp *hwqp, uint16_t ox_id, uint16_t rx_id, uint16_t rpi,
+ bool rjt, uint8_t rjt_exp, spdk_nvmf_fc_caller_cb cb, void *cb_args);
+
+ /* transmit single request - single response */
+ int (*xmt_srsr_req)(struct spdk_nvmf_fc_hwqp *hwqp, struct spdk_nvmf_fc_send_srsr *srsr,
+ spdk_nvmf_fc_caller_cb cb, void *cb_args);
+
+ /* issue queue marker (abts processing) */
+ int (*issue_q_marker)(struct spdk_nvmf_fc_hwqp *hwqp, uint64_t u_id, uint16_t skip_rq);
+
+ /* assign a new connection to a hwqp (return connection ID) */
+ struct spdk_nvmf_fc_hwqp *(*assign_conn_to_hwqp)(
+ struct spdk_nvmf_fc_hwqp *queues, uint32_t num_queues,
+ uint64_t *conn_id, uint32_t sq_size, bool for_aq);
+
+ /* get the hwqp from the given connection id */
+ struct spdk_nvmf_fc_hwqp *(*get_hwqp_from_conn_id)(struct spdk_nvmf_fc_hwqp *hwqp,
+ uint32_t num_queues, uint64_t conn_id);
+
+ /* release connection ID (done with using it) */
+ void (*release_conn)(struct spdk_nvmf_fc_hwqp *hwqp, uint64_t conn_id, uint32_t sq_size);
+
+ /* dump all queue info into dump_info */
+ void (*dump_all_queues)(struct spdk_nvmf_fc_hwqp *ls_queues,
+ struct spdk_nvmf_fc_hwqp *io_queues,
+ uint32_t num_queues,
+ struct spdk_nvmf_fc_queue_dump_info *dump_info);
+};
+
+extern struct spdk_nvmf_fc_ll_drvr_ops spdk_nvmf_fc_lld_ops;
+
+/*
+ * NVMF FC inline and function prototypes
+ */
+
+static inline struct spdk_nvmf_fc_request *
+spdk_nvmf_fc_get_fc_req(struct spdk_nvmf_request *req)
+{
+ return (struct spdk_nvmf_fc_request *)
+ ((uintptr_t)req - offsetof(struct spdk_nvmf_fc_request, req));
+}
+
+static inline bool
+spdk_nvmf_fc_is_port_dead(struct spdk_nvmf_fc_hwqp *hwqp)
+{
+ switch (hwqp->fc_port->hw_port_status) {
+ case SPDK_FC_PORT_QUIESCED:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool
+spdk_nvmf_fc_req_in_xfer(struct spdk_nvmf_fc_request *fc_req)
+{
+ switch (fc_req->state) {
+ case SPDK_NVMF_FC_REQ_READ_XFER:
+ case SPDK_NVMF_FC_REQ_READ_RSP:
+ case SPDK_NVMF_FC_REQ_WRITE_XFER:
+ case SPDK_NVMF_FC_REQ_WRITE_RSP:
+ case SPDK_NVMF_FC_REQ_NONE_RSP:
+ return true;
+ default:
+ return false;
+ }
+}
+
+typedef void (*spdk_nvmf_fc_del_assoc_cb)(void *arg, uint32_t err);
+int spdk_nvmf_fc_delete_association(struct spdk_nvmf_fc_nport *tgtport,
+ uint64_t assoc_id, bool send_abts,
+ spdk_nvmf_fc_del_assoc_cb del_assoc_cb,
+ void *cb_data);
+
+void spdk_nvmf_fc_ls_init(struct spdk_nvmf_fc_port *fc_port);
+
+void spdk_nvmf_fc_ls_fini(struct spdk_nvmf_fc_port *fc_port);
+
+struct spdk_nvmf_fc_port *spdk_nvmf_fc_port_list_get(uint8_t port_hdl);
+
+int spdk_nvmf_fc_nport_set_state(struct spdk_nvmf_fc_nport *nport,
+ enum spdk_nvmf_fc_object_state state);
+
+int spdk_nvmf_fc_assoc_set_state(struct spdk_nvmf_fc_association *assoc,
+ enum spdk_nvmf_fc_object_state state);
+
+bool spdk_nvmf_fc_nport_add_rem_port(struct spdk_nvmf_fc_nport *nport,
+ struct spdk_nvmf_fc_remote_port_info *rem_port);
+
+bool spdk_nvmf_fc_nport_remove_rem_port(struct spdk_nvmf_fc_nport *nport,
+ struct spdk_nvmf_fc_remote_port_info *rem_port);
+
+void spdk_nvmf_fc_init_poller_queues(struct spdk_nvmf_fc_hwqp *hwqp);
+
+void spdk_nvmf_fc_reinit_poller_queues(struct spdk_nvmf_fc_hwqp *hwqp,
+ void *queues_curr);
+
+void spdk_nvmf_fc_init_poller(struct spdk_nvmf_fc_port *fc_port,
+ struct spdk_nvmf_fc_hwqp *hwqp);
+
+void spdk_nvmf_fc_add_hwqp_to_poller(struct spdk_nvmf_fc_hwqp *hwqp, bool admin_q);
+
+void spdk_nvmf_fc_remove_hwqp_from_poller(struct spdk_nvmf_fc_hwqp *hwqp);
+
+bool spdk_nvmf_fc_port_is_offline(struct spdk_nvmf_fc_port *fc_port);
+
+int spdk_nvmf_fc_port_set_offline(struct spdk_nvmf_fc_port *fc_port);
+
+bool spdk_nvmf_fc_port_is_online(struct spdk_nvmf_fc_port *fc_port);
+
+int spdk_nvmf_fc_port_set_online(struct spdk_nvmf_fc_port *fc_port);
+
+int spdk_nvmf_fc_hwqp_port_set_online(struct spdk_nvmf_fc_hwqp *hwqp);
+
+int spdk_nvmf_fc_hwqp_port_set_offline(struct spdk_nvmf_fc_hwqp *hwqp);
+
+int spdk_nvmf_fc_rport_set_state(struct spdk_nvmf_fc_remote_port_info *rport,
+ enum spdk_nvmf_fc_object_state state);
+
+void spdk_nvmf_fc_port_list_add(struct spdk_nvmf_fc_port *fc_port);
+
+struct spdk_nvmf_fc_nport *spdk_nvmf_fc_nport_get(uint8_t port_hdl, uint16_t nport_hdl);
+
+int spdk_nvmf_fc_port_add_nport(struct spdk_nvmf_fc_port *fc_port,
+ struct spdk_nvmf_fc_nport *nport);
+
+uint32_t spdk_nvmf_fc_nport_get_association_count(struct spdk_nvmf_fc_nport *nport);
+
+int spdk_nvmf_fc_port_remove_nport(struct spdk_nvmf_fc_port *fc_port,
+ struct spdk_nvmf_fc_nport *nport);
+
+uint32_t spdk_nvmf_fc_get_prli_service_params(void);
+
+bool spdk_nvmf_fc_nport_is_rport_empty(struct spdk_nvmf_fc_nport *nport);
+
+void spdk_nvmf_fc_handle_abts_frame(struct spdk_nvmf_fc_nport *nport,
+ uint16_t rpi, uint16_t oxid,
+ uint16_t rxid);
+
+void spdk_nvmf_fc_dump_all_queues(struct spdk_nvmf_fc_port *fc_port,
+ struct spdk_nvmf_fc_queue_dump_info *dump_info);
+
+void spdk_nvmf_fc_handle_ls_rqst(struct spdk_nvmf_fc_ls_rqst *ls_rqst);
+
+int spdk_nvmf_fc_xmt_ls_rsp(struct spdk_nvmf_fc_nport *tgtport,
+ struct spdk_nvmf_fc_ls_rqst *ls_rqst);
+
+struct spdk_nvmf_fc_nport *spdk_nvmf_bcm_req_fc_nport_get(struct spdk_nvmf_request *req);
+
+struct spdk_nvmf_fc_association *spdk_nvmf_fc_get_ctrlr_assoc(struct spdk_nvmf_ctrlr *ctrlr);
+
+bool spdk_nvmf_fc_nport_is_association_empty(struct spdk_nvmf_fc_nport *nport);
+
+int spdk_nvmf_fc_xmt_srsr_req(struct spdk_nvmf_fc_hwqp *hwqp,
+ struct spdk_nvmf_fc_send_srsr *srsr,
+ spdk_nvmf_fc_caller_cb cb, void *cb_args);
+
+uint32_t spdk_nvmf_fc_get_num_nport_ctrlrs_in_subsystem(uint8_t port_hdl, uint16_t nport_hdl,
+ struct spdk_nvmf_subsystem *subsys);
+
+bool spdk_nvmf_fc_is_spdk_ctrlr_on_nport(uint8_t port_hdl, uint16_t nport_hdl,
+ struct spdk_nvmf_ctrlr *ctrlr);
+
+int spdk_nvmf_fc_get_ctrlr_init_traddr(char *traddr, struct spdk_nvmf_ctrlr *ctrlr);
+
+uint32_t spdk_nvmf_fc_get_hwqp_id(struct spdk_nvmf_request *req);
+
+void spdk_nvmf_fc_req_abort(struct spdk_nvmf_fc_request *fc_req,
+ bool send_abts, spdk_nvmf_fc_caller_cb cb,
+ void *cb_args);
+
+int spdk_nvmf_fc_add_port_listen(void *arg1, void *arg2);
+
+int spdk_nvmf_fc_remove_port_listen(void *arg1, void *arg2);
+
+void spdk_nvmf_fc_subsys_connect_cb(void *cb_ctx,
+ struct spdk_nvmf_request *req);
+
+void spdk_nvmf_fc_subsys_disconnect_cb(void *cb_ctx,
+ struct spdk_nvmf_qpair *qpair);
+
+uint32_t spdk_nvmf_fc_get_master_lcore(void);
+
+struct spdk_thread *spdk_nvmf_fc_get_master_thread(void);
+
+/*
+ * These functions are used by low level FC driver
+ */
+
+static inline struct spdk_nvmf_fc_conn *
+spdk_nvmf_fc_get_conn(struct spdk_nvmf_qpair *qpair)
+{
+ return (struct spdk_nvmf_fc_conn *)
+ ((uintptr_t)qpair - offsetof(struct spdk_nvmf_fc_conn, qpair));
+}
+
+static inline uint16_t
+spdk_nvmf_fc_advance_conn_sqhead(struct spdk_nvmf_qpair *qpair)
+{
+ /* advance sq_head pointer - wrap if needed */
+ qpair->sq_head = (qpair->sq_head == qpair->sq_head_max) ?
+ 0 : (qpair->sq_head + 1);
+ return qpair->sq_head;
+}
+
+static inline bool
+spdk_nvmf_fc_use_send_frame(struct spdk_nvmf_request *req)
+{
+ /* For now use for only keepalives. */
+ if (req->qpair->qid == 0 &&
+ (req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_KEEP_ALIVE)) {
+ return true;
+ }
+ return false;
+}
+
+enum spdk_nvmf_fc_poller_api_ret spdk_nvmf_fc_poller_api_func(
+ struct spdk_nvmf_fc_hwqp *hwqp,
+ enum spdk_nvmf_fc_poller_api api,
+ void *api_args);
+
+int spdk_nvmf_fc_process_frame(struct spdk_nvmf_fc_hwqp *hwqp, uint32_t buff_idx,
+ struct spdk_nvmf_fc_frame_hdr *frame,
+ struct spdk_nvmf_fc_buffer_desc *buffer, uint32_t plen);
+
+void spdk_nvmf_fc_process_pending_req(struct spdk_nvmf_fc_hwqp *hwqp);
+
+void spdk_nvmf_fc_process_pending_ls_rqst(struct spdk_nvmf_fc_hwqp *hwqp);
+
+void spdk_nvmf_fc_req_set_state(struct spdk_nvmf_fc_request *fc_req,
+ enum spdk_nvmf_fc_request_state state);
+
+void spdk_nvmf_fc_free_req(struct spdk_nvmf_fc_request *fc_req);
+
+void spdk_nvmf_fc_req_abort_complete(void *arg1);
+
+bool spdk_nvmf_fc_send_ersp_required(struct spdk_nvmf_fc_request *fc_req,
+ uint32_t rsp_cnt, uint32_t xfer_len);
+
+struct spdk_nvmf_fc_xri *spdk_nvmf_fc_get_xri(struct spdk_nvmf_fc_hwqp *hwqp);
+
+int spdk_nvmf_fc_put_xri(struct spdk_nvmf_fc_hwqp *hwqp,
+ struct spdk_nvmf_fc_xri *xri);
+
+void spdk_nvmf_fc_release_xri(struct spdk_nvmf_fc_hwqp *hwqp,
+ struct spdk_nvmf_fc_xri *xri, bool xb, bool abts);
+
+int spdk_nvmf_fc_handle_rsp(struct spdk_nvmf_fc_request *req);
+#endif
diff --git a/src/spdk/lib/nvmf/nvmf_internal.h b/src/spdk/lib/nvmf/nvmf_internal.h
new file mode 100644
index 00000000..c9c7bf36
--- /dev/null
+++ b/src/spdk/lib/nvmf/nvmf_internal.h
@@ -0,0 +1,333 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __NVMF_INTERNAL_H__
+#define __NVMF_INTERNAL_H__
+
+#include "spdk/stdinc.h"
+
+#include "spdk/likely.h"
+#include "spdk/nvmf.h"
+#include "spdk/nvmf_spec.h"
+#include "spdk/assert.h"
+#include "spdk/bdev.h"
+#include "spdk/queue.h"
+#include "spdk/util.h"
+#include "spdk/thread.h"
+
+#define SPDK_NVMF_MAX_SGL_ENTRIES 16
+
+enum spdk_nvmf_subsystem_state {
+ SPDK_NVMF_SUBSYSTEM_INACTIVE = 0,
+ SPDK_NVMF_SUBSYSTEM_ACTIVATING,
+ SPDK_NVMF_SUBSYSTEM_ACTIVE,
+ SPDK_NVMF_SUBSYSTEM_PAUSING,
+ SPDK_NVMF_SUBSYSTEM_PAUSED,
+ SPDK_NVMF_SUBSYSTEM_RESUMING,
+ SPDK_NVMF_SUBSYSTEM_DEACTIVATING,
+};
+
+enum spdk_nvmf_qpair_state {
+ SPDK_NVMF_QPAIR_UNINITIALIZED = 0,
+ SPDK_NVMF_QPAIR_INACTIVE,
+ SPDK_NVMF_QPAIR_ACTIVATING,
+ SPDK_NVMF_QPAIR_ACTIVE,
+ SPDK_NVMF_QPAIR_DEACTIVATING,
+ SPDK_NVMF_QPAIR_ERROR,
+};
+
+typedef void (*spdk_nvmf_state_change_done)(void *cb_arg, int status);
+
+struct spdk_nvmf_tgt {
+ struct spdk_nvmf_tgt_opts opts;
+
+ uint64_t discovery_genctr;
+
+ /* Array of subsystem pointers of size max_subsystems indexed by sid */
+ struct spdk_nvmf_subsystem **subsystems;
+
+ struct spdk_nvmf_discovery_log_page *discovery_log_page;
+ size_t discovery_log_page_size;
+ TAILQ_HEAD(, spdk_nvmf_transport) transports;
+
+ spdk_nvmf_tgt_destroy_done_fn *destroy_cb_fn;
+ void *destroy_cb_arg;
+};
+
+struct spdk_nvmf_host {
+ char *nqn;
+ TAILQ_ENTRY(spdk_nvmf_host) link;
+};
+
+struct spdk_nvmf_listener {
+ struct spdk_nvme_transport_id trid;
+ struct spdk_nvmf_transport *transport;
+ TAILQ_ENTRY(spdk_nvmf_listener) link;
+};
+
+struct spdk_nvmf_transport_poll_group {
+ struct spdk_nvmf_transport *transport;
+ TAILQ_ENTRY(spdk_nvmf_transport_poll_group) link;
+};
+
+struct spdk_nvmf_subsystem_poll_group {
+ /* Array of channels for each namespace indexed by nsid - 1 */
+ struct spdk_io_channel **channels;
+ uint32_t num_channels;
+
+ enum spdk_nvmf_subsystem_state state;
+
+ TAILQ_HEAD(, spdk_nvmf_request) queued;
+};
+
+struct spdk_nvmf_poll_group {
+ struct spdk_thread *thread;
+ struct spdk_poller *poller;
+
+ TAILQ_HEAD(, spdk_nvmf_transport_poll_group) tgroups;
+
+ /* Array of poll groups indexed by subsystem id (sid) */
+ struct spdk_nvmf_subsystem_poll_group *sgroups;
+ uint32_t num_sgroups;
+
+ /* All of the queue pairs that belong to this poll group */
+ TAILQ_HEAD(, spdk_nvmf_qpair) qpairs;
+};
+
+typedef enum _spdk_nvmf_request_exec_status {
+ SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE,
+ SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS,
+} spdk_nvmf_request_exec_status;
+
+union nvmf_h2c_msg {
+ struct spdk_nvmf_capsule_cmd nvmf_cmd;
+ struct spdk_nvme_cmd nvme_cmd;
+ struct spdk_nvmf_fabric_prop_set_cmd prop_set_cmd;
+ struct spdk_nvmf_fabric_prop_get_cmd prop_get_cmd;
+ struct spdk_nvmf_fabric_connect_cmd connect_cmd;
+};
+SPDK_STATIC_ASSERT(sizeof(union nvmf_h2c_msg) == 64, "Incorrect size");
+
+union nvmf_c2h_msg {
+ struct spdk_nvme_cpl nvme_cpl;
+ struct spdk_nvmf_fabric_prop_get_rsp prop_get_rsp;
+ struct spdk_nvmf_fabric_connect_rsp connect_rsp;
+};
+SPDK_STATIC_ASSERT(sizeof(union nvmf_c2h_msg) == 16, "Incorrect size");
+
+struct spdk_nvmf_request {
+ struct spdk_nvmf_qpair *qpair;
+ uint32_t length;
+ enum spdk_nvme_data_transfer xfer;
+ void *data;
+ union nvmf_h2c_msg *cmd;
+ union nvmf_c2h_msg *rsp;
+ struct iovec iov[SPDK_NVMF_MAX_SGL_ENTRIES];
+ uint32_t iovcnt;
+ struct spdk_bdev_io_wait_entry bdev_io_wait;
+
+ TAILQ_ENTRY(spdk_nvmf_request) link;
+};
+
+struct spdk_nvmf_ns {
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_bdev *bdev;
+ struct spdk_bdev_desc *desc;
+ struct spdk_nvmf_ns_opts opts;
+};
+
+struct spdk_nvmf_qpair {
+ enum spdk_nvmf_qpair_state state;
+ spdk_nvmf_state_change_done state_cb;
+ void *state_cb_arg;
+
+ struct spdk_nvmf_transport *transport;
+ struct spdk_nvmf_ctrlr *ctrlr;
+ struct spdk_nvmf_poll_group *group;
+
+ uint16_t qid;
+ uint16_t sq_head;
+ uint16_t sq_head_max;
+
+ TAILQ_HEAD(, spdk_nvmf_request) outstanding;
+ TAILQ_ENTRY(spdk_nvmf_qpair) link;
+};
+
+struct spdk_nvmf_ctrlr_feat {
+ union spdk_nvme_feat_arbitration arbitration;
+ union spdk_nvme_feat_power_management power_management;
+ union spdk_nvme_feat_error_recovery error_recovery;
+ union spdk_nvme_feat_volatile_write_cache volatile_write_cache;
+ union spdk_nvme_feat_number_of_queues number_of_queues;
+ union spdk_nvme_feat_write_atomicity write_atomicity;
+ union spdk_nvme_feat_async_event_configuration async_event_configuration;
+ union spdk_nvme_feat_keep_alive_timer keep_alive_timer;
+};
+
+/*
+ * This structure represents an NVMe-oF controller,
+ * which is like a "session" in networking terms.
+ */
+struct spdk_nvmf_ctrlr {
+ uint16_t cntlid;
+ struct spdk_nvmf_subsystem *subsys;
+
+ struct {
+ union spdk_nvme_cap_register cap;
+ union spdk_nvme_vs_register vs;
+ union spdk_nvme_cc_register cc;
+ union spdk_nvme_csts_register csts;
+ } vcprop; /* virtual controller properties */
+
+ struct spdk_nvmf_ctrlr_feat feat;
+
+ struct spdk_nvmf_qpair *admin_qpair;
+ struct spdk_thread *thread;
+ struct spdk_bit_array *qpair_mask;
+
+ struct spdk_nvmf_request *aer_req;
+ union spdk_nvme_async_event_completion notice_event;
+ uint8_t hostid[16];
+
+ uint16_t changed_ns_list_count;
+ struct spdk_nvme_ns_list changed_ns_list;
+
+ TAILQ_ENTRY(spdk_nvmf_ctrlr) link;
+};
+
+struct spdk_nvmf_subsystem {
+ struct spdk_thread *thread;
+ uint32_t id;
+ enum spdk_nvmf_subsystem_state state;
+
+ char subnqn[SPDK_NVMF_NQN_MAX_LEN + 1];
+ enum spdk_nvmf_subtype subtype;
+ uint16_t next_cntlid;
+ bool allow_any_host;
+
+ struct spdk_nvmf_tgt *tgt;
+
+ char sn[SPDK_NVME_CTRLR_SN_LEN + 1];
+
+ /* Array of pointers to namespaces of size max_nsid indexed by nsid - 1 */
+ struct spdk_nvmf_ns **ns;
+ uint32_t max_nsid;
+ /* This is the maximum allowed nsid to a subsystem */
+ uint32_t max_allowed_nsid;
+
+ TAILQ_HEAD(, spdk_nvmf_ctrlr) ctrlrs;
+
+ TAILQ_HEAD(, spdk_nvmf_host) hosts;
+
+ TAILQ_HEAD(, spdk_nvmf_listener) listeners;
+
+ TAILQ_ENTRY(spdk_nvmf_subsystem) entries;
+};
+
+typedef void(*spdk_nvmf_poll_group_mod_done)(void *cb_arg, int status);
+
+struct spdk_nvmf_transport *spdk_nvmf_tgt_get_transport(struct spdk_nvmf_tgt *tgt,
+ enum spdk_nvme_transport_type);
+
+int spdk_nvmf_poll_group_add_transport(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_transport *transport);
+int spdk_nvmf_poll_group_update_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem);
+int spdk_nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
+void spdk_nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
+void spdk_nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
+void spdk_nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
+void spdk_nvmf_request_exec(struct spdk_nvmf_request *req);
+int spdk_nvmf_request_free(struct spdk_nvmf_request *req);
+int spdk_nvmf_request_complete(struct spdk_nvmf_request *req);
+
+void spdk_nvmf_get_discovery_log_page(struct spdk_nvmf_tgt *tgt,
+ void *buffer, uint64_t offset,
+ uint32_t length);
+
+void spdk_nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr);
+int spdk_nvmf_ctrlr_process_fabrics_cmd(struct spdk_nvmf_request *req);
+int spdk_nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req);
+int spdk_nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req);
+bool spdk_nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr);
+bool spdk_nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr);
+void spdk_nvmf_ctrlr_ns_changed(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid);
+
+void spdk_nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata);
+
+int spdk_nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_ctrlr *ctrlr);
+void spdk_nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_ctrlr *ctrlr);
+struct spdk_nvmf_ctrlr *spdk_nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem,
+ uint16_t cntlid);
+int spdk_nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr);
+
+/*
+ * Abort aer is sent on a per controller basis and sends a completion for the aer to the host.
+ * This function should be called when attempting to recover in error paths when it is OK for
+ * the host to send a subsequent AER.
+ */
+void spdk_nvmf_ctrlr_abort_aer(struct spdk_nvmf_ctrlr *ctrlr);
+
+/*
+ * Free aer simply frees the rdma resources for the aer without informing the host.
+ * This function should be called when deleting a qpair when one wants to make sure
+ * the qpair is completely empty before freeing the request. The reason we free the
+ * AER without sending a completion is to prevent the host from sending another AER.
+ */
+void spdk_nvmf_qpair_free_aer(struct spdk_nvmf_qpair *qpair);
+
+static inline struct spdk_nvmf_ns *
+_spdk_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
+{
+ /* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
+ if (spdk_unlikely(nsid - 1 >= subsystem->max_nsid)) {
+ return NULL;
+ }
+
+ return subsystem->ns[nsid - 1];
+}
+
+static inline bool
+spdk_nvmf_qpair_is_admin_queue(struct spdk_nvmf_qpair *qpair)
+{
+ return qpair->qid == 0;
+}
+
+#endif /* __NVMF_INTERNAL_H__ */
diff --git a/src/spdk/lib/nvmf/rdma.c b/src/spdk/lib/nvmf/rdma.c
new file mode 100644
index 00000000..333e703f
--- /dev/null
+++ b/src/spdk/lib/nvmf/rdma.c
@@ -0,0 +1,2930 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include <infiniband/verbs.h>
+#include <rdma/rdma_cma.h>
+#include <rdma/rdma_verbs.h>
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+#include "spdk/config.h"
+#include "spdk/assert.h"
+#include "spdk/thread.h"
+#include "spdk/nvmf.h"
+#include "spdk/nvmf_spec.h"
+#include "spdk/string.h"
+#include "spdk/trace.h"
+#include "spdk/util.h"
+
+#include "spdk_internal/log.h"
+
+/*
+ RDMA Connection Resource Defaults
+ */
+#define NVMF_DEFAULT_TX_SGE 1
+#define NVMF_DEFAULT_RX_SGE 2
+#define NVMF_DEFAULT_DATA_SGE 16
+
+/* The RDMA completion queue size */
+#define NVMF_RDMA_CQ_SIZE 4096
+
+/* AIO backend requires block size aligned data buffers,
+ * extra 4KiB aligned data buffer should work for most devices.
+ */
+#define SHIFT_4KB 12
+#define NVMF_DATA_BUFFER_ALIGNMENT (1 << SHIFT_4KB)
+#define NVMF_DATA_BUFFER_MASK (NVMF_DATA_BUFFER_ALIGNMENT - 1)
+
+enum spdk_nvmf_rdma_request_state {
+ /* The request is not currently in use */
+ RDMA_REQUEST_STATE_FREE = 0,
+
+ /* Initial state when request first received */
+ RDMA_REQUEST_STATE_NEW,
+
+ /* The request is queued until a data buffer is available. */
+ RDMA_REQUEST_STATE_NEED_BUFFER,
+
+ /* The request is waiting on RDMA queue depth availability
+ * to transfer data between the host and the controller.
+ */
+ RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING,
+
+ /* The request is currently transferring data from the host to the controller. */
+ RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
+
+ /* The request is ready to execute at the block device */
+ RDMA_REQUEST_STATE_READY_TO_EXECUTE,
+
+ /* The request is currently executing at the block device */
+ RDMA_REQUEST_STATE_EXECUTING,
+
+ /* The request finished executing at the block device */
+ RDMA_REQUEST_STATE_EXECUTED,
+
+ /* The request is ready to send a completion */
+ RDMA_REQUEST_STATE_READY_TO_COMPLETE,
+
+ /* The request is currently transferring data from the controller to the host. */
+ RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
+
+ /* The request currently has an outstanding completion without an
+ * associated data transfer.
+ */
+ RDMA_REQUEST_STATE_COMPLETING,
+
+ /* The request completed and can be marked free. */
+ RDMA_REQUEST_STATE_COMPLETED,
+
+ /* Terminator */
+ RDMA_REQUEST_NUM_STATES,
+};
+
+#define OBJECT_NVMF_RDMA_IO 0x40
+
+#define TRACE_GROUP_NVMF_RDMA 0x4
+#define TRACE_RDMA_REQUEST_STATE_NEW SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x0)
+#define TRACE_RDMA_REQUEST_STATE_NEED_BUFFER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x1)
+#define TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x2)
+#define TRACE_RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x3)
+#define TRACE_RDMA_REQUEST_STATE_READY_TO_EXECUTE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x4)
+#define TRACE_RDMA_REQUEST_STATE_EXECUTING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x5)
+#define TRACE_RDMA_REQUEST_STATE_EXECUTED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x6)
+#define TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x7)
+#define TRACE_RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x8)
+#define TRACE_RDMA_REQUEST_STATE_COMPLETING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x9)
+#define TRACE_RDMA_REQUEST_STATE_COMPLETED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xA)
+#define TRACE_RDMA_QP_CREATE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xB)
+#define TRACE_RDMA_IBV_ASYNC_EVENT SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xC)
+#define TRACE_RDMA_CM_ASYNC_EVENT SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xD)
+#define TRACE_RDMA_QP_STATE_CHANGE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xE)
+#define TRACE_RDMA_QP_DISCONNECT SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xF)
+#define TRACE_RDMA_QP_DESTROY SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x10)
+
+SPDK_TRACE_REGISTER_FN(nvmf_trace)
+{
+ spdk_trace_register_object(OBJECT_NVMF_RDMA_IO, 'r');
+ spdk_trace_register_description("RDMA_REQ_NEW", "",
+ TRACE_RDMA_REQUEST_STATE_NEW,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 1, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_NEED_BUFFER", "",
+ TRACE_RDMA_REQUEST_STATE_NEED_BUFFER,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_TX_PENDING_H_TO_C", "",
+ TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_TX_H_TO_C", "",
+ TRACE_RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_RDY_TO_EXECUTE", "",
+ TRACE_RDMA_REQUEST_STATE_READY_TO_EXECUTE,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_EXECUTING", "",
+ TRACE_RDMA_REQUEST_STATE_EXECUTING,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_EXECUTED", "",
+ TRACE_RDMA_REQUEST_STATE_EXECUTED,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_RDY_TO_COMPLETE", "",
+ TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_COMPLETING_CONTROLLER_TO_HOST", "",
+ TRACE_RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_COMPLETING_INCAPSULE", "",
+ TRACE_RDMA_REQUEST_STATE_COMPLETING,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_COMPLETED", "",
+ TRACE_RDMA_REQUEST_STATE_COMPLETED,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+
+ spdk_trace_register_description("RDMA_QP_CREATE", "", TRACE_RDMA_QP_CREATE,
+ OWNER_NONE, OBJECT_NONE, 0, 0, "");
+ spdk_trace_register_description("RDMA_IBV_ASYNC_EVENT", "", TRACE_RDMA_IBV_ASYNC_EVENT,
+ OWNER_NONE, OBJECT_NONE, 0, 0, "type: ");
+ spdk_trace_register_description("RDMA_CM_ASYNC_EVENT", "", TRACE_RDMA_CM_ASYNC_EVENT,
+ OWNER_NONE, OBJECT_NONE, 0, 0, "type: ");
+ spdk_trace_register_description("RDMA_QP_STATE_CHANGE", "", TRACE_RDMA_QP_STATE_CHANGE,
+ OWNER_NONE, OBJECT_NONE, 0, 1, "state: ");
+ spdk_trace_register_description("RDMA_QP_DISCONNECT", "", TRACE_RDMA_QP_DISCONNECT,
+ OWNER_NONE, OBJECT_NONE, 0, 0, "");
+ spdk_trace_register_description("RDMA_QP_DESTROY", "", TRACE_RDMA_QP_DESTROY,
+ OWNER_NONE, OBJECT_NONE, 0, 0, "");
+}
+
+/* This structure holds commands as they are received off the wire.
+ * It must be dynamically paired with a full request object
+ * (spdk_nvmf_rdma_request) to service a request. It is separate
+ * from the request because RDMA does not appear to order
+ * completions, so occasionally we'll get a new incoming
+ * command when there aren't any free request objects.
+ */
+struct spdk_nvmf_rdma_recv {
+ struct ibv_recv_wr wr;
+ struct ibv_sge sgl[NVMF_DEFAULT_RX_SGE];
+
+ struct spdk_nvmf_rdma_qpair *qpair;
+
+ /* In-capsule data buffer */
+ uint8_t *buf;
+
+ TAILQ_ENTRY(spdk_nvmf_rdma_recv) link;
+};
+
+struct spdk_nvmf_rdma_request {
+ struct spdk_nvmf_request req;
+ bool data_from_pool;
+
+ enum spdk_nvmf_rdma_request_state state;
+
+ struct spdk_nvmf_rdma_recv *recv;
+
+ struct {
+ struct ibv_send_wr wr;
+ struct ibv_sge sgl[NVMF_DEFAULT_TX_SGE];
+ } rsp;
+
+ struct {
+ struct ibv_send_wr wr;
+ struct ibv_sge sgl[SPDK_NVMF_MAX_SGL_ENTRIES];
+ void *buffers[SPDK_NVMF_MAX_SGL_ENTRIES];
+ } data;
+
+ TAILQ_ENTRY(spdk_nvmf_rdma_request) link;
+ TAILQ_ENTRY(spdk_nvmf_rdma_request) state_link;
+};
+
+struct spdk_nvmf_rdma_qpair {
+ struct spdk_nvmf_qpair qpair;
+
+ struct spdk_nvmf_rdma_port *port;
+ struct spdk_nvmf_rdma_poller *poller;
+
+ struct rdma_cm_id *cm_id;
+ struct rdma_cm_id *listen_id;
+
+ /* The maximum number of I/O outstanding on this connection at one time */
+ uint16_t max_queue_depth;
+
+ /* The maximum number of active RDMA READ and WRITE operations at one time */
+ uint16_t max_rw_depth;
+
+ /* Receives that are waiting for a request object */
+ TAILQ_HEAD(, spdk_nvmf_rdma_recv) incoming_queue;
+
+ /* Queues to track the requests in all states */
+ TAILQ_HEAD(, spdk_nvmf_rdma_request) state_queue[RDMA_REQUEST_NUM_STATES];
+
+ /* Number of requests in each state */
+ uint32_t state_cntr[RDMA_REQUEST_NUM_STATES];
+
+ int max_sge;
+
+ /* Array of size "max_queue_depth" containing RDMA requests. */
+ struct spdk_nvmf_rdma_request *reqs;
+
+ /* Array of size "max_queue_depth" containing RDMA recvs. */
+ struct spdk_nvmf_rdma_recv *recvs;
+
+ /* Array of size "max_queue_depth" containing 64 byte capsules
+ * used for receive.
+ */
+ union nvmf_h2c_msg *cmds;
+ struct ibv_mr *cmds_mr;
+
+ /* Array of size "max_queue_depth" containing 16 byte completions
+ * to be sent back to the user.
+ */
+ union nvmf_c2h_msg *cpls;
+ struct ibv_mr *cpls_mr;
+
+ /* Array of size "max_queue_depth * InCapsuleDataSize" containing
+ * buffers to be used for in capsule data.
+ */
+ void *bufs;
+ struct ibv_mr *bufs_mr;
+
+ TAILQ_ENTRY(spdk_nvmf_rdma_qpair) link;
+
+ /* Mgmt channel */
+ struct spdk_io_channel *mgmt_channel;
+ struct spdk_nvmf_rdma_mgmt_channel *ch;
+
+ /* IBV queue pair attributes: they are used to manage
+ * qp state and recover from errors.
+ */
+ struct ibv_qp_init_attr ibv_init_attr;
+ struct ibv_qp_attr ibv_attr;
+
+ bool qpair_disconnected;
+
+ /* Reference counter for how many unprocessed messages
+ * from other threads are currently outstanding. The
+ * qpair cannot be destroyed until this is 0. This is
+ * atomically incremented from any thread, but only
+ * decremented and read from the thread that owns this
+ * qpair.
+ */
+ uint32_t refcnt;
+};
+
+struct spdk_nvmf_rdma_poller {
+ struct spdk_nvmf_rdma_device *device;
+ struct spdk_nvmf_rdma_poll_group *group;
+
+ struct ibv_cq *cq;
+
+ TAILQ_HEAD(, spdk_nvmf_rdma_qpair) qpairs;
+
+ TAILQ_ENTRY(spdk_nvmf_rdma_poller) link;
+};
+
+struct spdk_nvmf_rdma_poll_group {
+ struct spdk_nvmf_transport_poll_group group;
+
+ TAILQ_HEAD(, spdk_nvmf_rdma_poller) pollers;
+};
+
+/* Assuming rdma_cm uses just one protection domain per ibv_context. */
+struct spdk_nvmf_rdma_device {
+ struct ibv_device_attr attr;
+ struct ibv_context *context;
+
+ struct spdk_mem_map *map;
+ struct ibv_pd *pd;
+
+ TAILQ_ENTRY(spdk_nvmf_rdma_device) link;
+};
+
+struct spdk_nvmf_rdma_port {
+ struct spdk_nvme_transport_id trid;
+ struct rdma_cm_id *id;
+ struct spdk_nvmf_rdma_device *device;
+ uint32_t ref;
+ TAILQ_ENTRY(spdk_nvmf_rdma_port) link;
+};
+
+struct spdk_nvmf_rdma_transport {
+ struct spdk_nvmf_transport transport;
+
+ struct rdma_event_channel *event_channel;
+
+ struct spdk_mempool *data_buf_pool;
+
+ pthread_mutex_t lock;
+
+ /* fields used to poll RDMA/IB events */
+ nfds_t npoll_fds;
+ struct pollfd *poll_fds;
+
+ TAILQ_HEAD(, spdk_nvmf_rdma_device) devices;
+ TAILQ_HEAD(, spdk_nvmf_rdma_port) ports;
+};
+
+struct spdk_nvmf_rdma_mgmt_channel {
+ /* Requests that are waiting to obtain a data buffer */
+ TAILQ_HEAD(, spdk_nvmf_rdma_request) pending_data_buf_queue;
+};
+
+static inline void
+spdk_nvmf_rdma_qpair_inc_refcnt(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ __sync_fetch_and_add(&rqpair->refcnt, 1);
+}
+
+static inline uint32_t
+spdk_nvmf_rdma_qpair_dec_refcnt(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ uint32_t old_refcnt, new_refcnt;
+
+ do {
+ old_refcnt = rqpair->refcnt;
+ assert(old_refcnt > 0);
+ new_refcnt = old_refcnt - 1;
+ } while (__sync_bool_compare_and_swap(&rqpair->refcnt, old_refcnt, new_refcnt) == false);
+
+ return new_refcnt;
+}
+
+/* API to IBV QueuePair */
+static const char *str_ibv_qp_state[] = {
+ "IBV_QPS_RESET",
+ "IBV_QPS_INIT",
+ "IBV_QPS_RTR",
+ "IBV_QPS_RTS",
+ "IBV_QPS_SQD",
+ "IBV_QPS_SQE",
+ "IBV_QPS_ERR"
+};
+
+static enum ibv_qp_state
+spdk_nvmf_rdma_update_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair) {
+ enum ibv_qp_state old_state, new_state;
+ int rc;
+
+ /* All the attributes needed for recovery */
+ static int spdk_nvmf_ibv_attr_mask =
+ IBV_QP_STATE |
+ IBV_QP_PKEY_INDEX |
+ IBV_QP_PORT |
+ IBV_QP_ACCESS_FLAGS |
+ IBV_QP_AV |
+ IBV_QP_PATH_MTU |
+ IBV_QP_DEST_QPN |
+ IBV_QP_RQ_PSN |
+ IBV_QP_MAX_DEST_RD_ATOMIC |
+ IBV_QP_MIN_RNR_TIMER |
+ IBV_QP_SQ_PSN |
+ IBV_QP_TIMEOUT |
+ IBV_QP_RETRY_CNT |
+ IBV_QP_RNR_RETRY |
+ IBV_QP_MAX_QP_RD_ATOMIC;
+
+ old_state = rqpair->ibv_attr.qp_state;
+ rc = ibv_query_qp(rqpair->cm_id->qp, &rqpair->ibv_attr,
+ spdk_nvmf_ibv_attr_mask, &rqpair->ibv_init_attr);
+
+ if (rc)
+ {
+ SPDK_ERRLOG("Failed to get updated RDMA queue pair state!\n");
+ assert(false);
+ }
+
+ new_state = rqpair->ibv_attr.qp_state;
+ if (old_state != new_state)
+ {
+ spdk_trace_record(TRACE_RDMA_QP_STATE_CHANGE, 0, 0,
+ (uintptr_t)rqpair->cm_id, new_state);
+ }
+ return new_state;
+}
+
+static int
+spdk_nvmf_rdma_set_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair,
+ enum ibv_qp_state new_state)
+{
+ int rc;
+ enum ibv_qp_state state;
+ static int attr_mask_rc[] = {
+ [IBV_QPS_RESET] = IBV_QP_STATE,
+ [IBV_QPS_INIT] = (IBV_QP_STATE |
+ IBV_QP_PKEY_INDEX |
+ IBV_QP_PORT |
+ IBV_QP_ACCESS_FLAGS),
+ [IBV_QPS_RTR] = (IBV_QP_STATE |
+ IBV_QP_AV |
+ IBV_QP_PATH_MTU |
+ IBV_QP_DEST_QPN |
+ IBV_QP_RQ_PSN |
+ IBV_QP_MAX_DEST_RD_ATOMIC |
+ IBV_QP_MIN_RNR_TIMER),
+ [IBV_QPS_RTS] = (IBV_QP_STATE |
+ IBV_QP_SQ_PSN |
+ IBV_QP_TIMEOUT |
+ IBV_QP_RETRY_CNT |
+ IBV_QP_RNR_RETRY |
+ IBV_QP_MAX_QP_RD_ATOMIC),
+ [IBV_QPS_SQD] = IBV_QP_STATE,
+ [IBV_QPS_SQE] = IBV_QP_STATE,
+ [IBV_QPS_ERR] = IBV_QP_STATE,
+ };
+
+ switch (new_state) {
+ case IBV_QPS_RESET:
+ case IBV_QPS_INIT:
+ case IBV_QPS_RTR:
+ case IBV_QPS_RTS:
+ case IBV_QPS_SQD:
+ case IBV_QPS_SQE:
+ case IBV_QPS_ERR:
+ break;
+ default:
+ SPDK_ERRLOG("QP#%d: bad state requested: %u\n",
+ rqpair->qpair.qid, new_state);
+ return -1;
+ }
+ rqpair->ibv_attr.cur_qp_state = rqpair->ibv_attr.qp_state;
+ rqpair->ibv_attr.qp_state = new_state;
+ rqpair->ibv_attr.ah_attr.port_num = rqpair->ibv_attr.port_num;
+
+ rc = ibv_modify_qp(rqpair->cm_id->qp, &rqpair->ibv_attr,
+ attr_mask_rc[new_state]);
+
+ if (rc) {
+ SPDK_ERRLOG("QP#%d: failed to set state to: %s, %d (%s)\n",
+ rqpair->qpair.qid, str_ibv_qp_state[new_state], errno, strerror(errno));
+ return rc;
+ }
+
+ state = spdk_nvmf_rdma_update_ibv_state(rqpair);
+
+ if (state != new_state) {
+ SPDK_ERRLOG("QP#%d: expected state: %s, actual state: %s\n",
+ rqpair->qpair.qid, str_ibv_qp_state[new_state],
+ str_ibv_qp_state[state]);
+ return -1;
+ }
+ SPDK_NOTICELOG("IBV QP#%u changed to: %s\n", rqpair->qpair.qid,
+ str_ibv_qp_state[state]);
+ return 0;
+}
+
+static void
+spdk_nvmf_rdma_request_set_state(struct spdk_nvmf_rdma_request *rdma_req,
+ enum spdk_nvmf_rdma_request_state state)
+{
+ struct spdk_nvmf_qpair *qpair;
+ struct spdk_nvmf_rdma_qpair *rqpair;
+
+ qpair = rdma_req->req.qpair;
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ TAILQ_REMOVE(&rqpair->state_queue[rdma_req->state], rdma_req, state_link);
+ rqpair->state_cntr[rdma_req->state]--;
+
+ rdma_req->state = state;
+
+ TAILQ_INSERT_TAIL(&rqpair->state_queue[rdma_req->state], rdma_req, state_link);
+ rqpair->state_cntr[rdma_req->state]++;
+}
+
+static int
+spdk_nvmf_rdma_mgmt_channel_create(void *io_device, void *ctx_buf)
+{
+ struct spdk_nvmf_rdma_mgmt_channel *ch = ctx_buf;
+
+ TAILQ_INIT(&ch->pending_data_buf_queue);
+ return 0;
+}
+
+static void
+spdk_nvmf_rdma_mgmt_channel_destroy(void *io_device, void *ctx_buf)
+{
+ struct spdk_nvmf_rdma_mgmt_channel *ch = ctx_buf;
+
+ if (!TAILQ_EMPTY(&ch->pending_data_buf_queue)) {
+ SPDK_ERRLOG("Pending I/O list wasn't empty on channel destruction\n");
+ }
+}
+
+static int
+spdk_nvmf_rdma_cur_rw_depth(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ return rqpair->state_cntr[RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER] +
+ rqpair->state_cntr[RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST];
+}
+
+static int
+spdk_nvmf_rdma_cur_queue_depth(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ return rqpair->max_queue_depth -
+ rqpair->state_cntr[RDMA_REQUEST_STATE_FREE];
+}
+
+static void
+spdk_nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ spdk_trace_record(TRACE_RDMA_QP_DESTROY, 0, 0, (uintptr_t)rqpair->cm_id, 0);
+
+ if (spdk_nvmf_rdma_cur_queue_depth(rqpair)) {
+ rqpair->qpair_disconnected = true;
+ return;
+ }
+
+ if (rqpair->refcnt > 0) {
+ return;
+ }
+
+ if (rqpair->poller) {
+ TAILQ_REMOVE(&rqpair->poller->qpairs, rqpair, link);
+ }
+
+ if (rqpair->cmds_mr) {
+ ibv_dereg_mr(rqpair->cmds_mr);
+ }
+
+ if (rqpair->cpls_mr) {
+ ibv_dereg_mr(rqpair->cpls_mr);
+ }
+
+ if (rqpair->bufs_mr) {
+ ibv_dereg_mr(rqpair->bufs_mr);
+ }
+
+ if (rqpair->cm_id) {
+ rdma_destroy_qp(rqpair->cm_id);
+ rdma_destroy_id(rqpair->cm_id);
+ }
+
+ if (rqpair->mgmt_channel) {
+ spdk_put_io_channel(rqpair->mgmt_channel);
+ }
+
+ /* Free all memory */
+ spdk_dma_free(rqpair->cmds);
+ spdk_dma_free(rqpair->cpls);
+ spdk_dma_free(rqpair->bufs);
+ free(rqpair->reqs);
+ free(rqpair->recvs);
+ free(rqpair);
+}
+
+static int
+spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ int rc, i;
+ struct spdk_nvmf_rdma_recv *rdma_recv;
+ struct spdk_nvmf_rdma_request *rdma_req;
+ struct spdk_nvmf_transport *transport;
+
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+ rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
+ transport = &rtransport->transport;
+
+ memset(&rqpair->ibv_init_attr, 0, sizeof(struct ibv_qp_init_attr));
+ rqpair->ibv_init_attr.qp_context = rqpair;
+ rqpair->ibv_init_attr.qp_type = IBV_QPT_RC;
+ rqpair->ibv_init_attr.send_cq = rqpair->poller->cq;
+ rqpair->ibv_init_attr.recv_cq = rqpair->poller->cq;
+ rqpair->ibv_init_attr.cap.max_send_wr = rqpair->max_queue_depth *
+ 2; /* SEND, READ, and WRITE operations */
+ rqpair->ibv_init_attr.cap.max_recv_wr = rqpair->max_queue_depth; /* RECV operations */
+ rqpair->ibv_init_attr.cap.max_send_sge = rqpair->max_sge;
+ rqpair->ibv_init_attr.cap.max_recv_sge = NVMF_DEFAULT_RX_SGE;
+
+ rc = rdma_create_qp(rqpair->cm_id, rqpair->port->device->pd, &rqpair->ibv_init_attr);
+ if (rc) {
+ SPDK_ERRLOG("rdma_create_qp failed: errno %d: %s\n", errno, spdk_strerror(errno));
+ rdma_destroy_id(rqpair->cm_id);
+ rqpair->cm_id = NULL;
+ spdk_nvmf_rdma_qpair_destroy(rqpair);
+ return -1;
+ }
+
+ spdk_trace_record(TRACE_RDMA_QP_CREATE, 0, 0, (uintptr_t)rqpair->cm_id, 0);
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "New RDMA Connection: %p\n", qpair);
+
+ rqpair->reqs = calloc(rqpair->max_queue_depth, sizeof(*rqpair->reqs));
+ rqpair->recvs = calloc(rqpair->max_queue_depth, sizeof(*rqpair->recvs));
+ rqpair->cmds = spdk_dma_zmalloc(rqpair->max_queue_depth * sizeof(*rqpair->cmds),
+ 0x1000, NULL);
+ rqpair->cpls = spdk_dma_zmalloc(rqpair->max_queue_depth * sizeof(*rqpair->cpls),
+ 0x1000, NULL);
+
+
+ if (transport->opts.in_capsule_data_size > 0) {
+ rqpair->bufs = spdk_dma_zmalloc(rqpair->max_queue_depth *
+ transport->opts.in_capsule_data_size,
+ 0x1000, NULL);
+ }
+
+ if (!rqpair->reqs || !rqpair->recvs || !rqpair->cmds ||
+ !rqpair->cpls || (transport->opts.in_capsule_data_size && !rqpair->bufs)) {
+ SPDK_ERRLOG("Unable to allocate sufficient memory for RDMA queue.\n");
+ spdk_nvmf_rdma_qpair_destroy(rqpair);
+ return -1;
+ }
+
+ rqpair->cmds_mr = ibv_reg_mr(rqpair->cm_id->pd, rqpair->cmds,
+ rqpair->max_queue_depth * sizeof(*rqpair->cmds),
+ IBV_ACCESS_LOCAL_WRITE);
+ rqpair->cpls_mr = ibv_reg_mr(rqpair->cm_id->pd, rqpair->cpls,
+ rqpair->max_queue_depth * sizeof(*rqpair->cpls),
+ 0);
+
+ if (transport->opts.in_capsule_data_size) {
+ rqpair->bufs_mr = ibv_reg_mr(rqpair->cm_id->pd, rqpair->bufs,
+ rqpair->max_queue_depth *
+ transport->opts.in_capsule_data_size,
+ IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);
+ }
+
+ if (!rqpair->cmds_mr || !rqpair->cpls_mr || (transport->opts.in_capsule_data_size &&
+ !rqpair->bufs_mr)) {
+ SPDK_ERRLOG("Unable to register required memory for RDMA queue.\n");
+ spdk_nvmf_rdma_qpair_destroy(rqpair);
+ return -1;
+ }
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Command Array: %p Length: %lx LKey: %x\n",
+ rqpair->cmds, rqpair->max_queue_depth * sizeof(*rqpair->cmds), rqpair->cmds_mr->lkey);
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Completion Array: %p Length: %lx LKey: %x\n",
+ rqpair->cpls, rqpair->max_queue_depth * sizeof(*rqpair->cpls), rqpair->cpls_mr->lkey);
+ if (rqpair->bufs && rqpair->bufs_mr) {
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "In Capsule Data Array: %p Length: %x LKey: %x\n",
+ rqpair->bufs, rqpair->max_queue_depth *
+ transport->opts.in_capsule_data_size, rqpair->bufs_mr->lkey);
+ }
+
+ /* Initialise request state queues and counters of the queue pair */
+ for (i = RDMA_REQUEST_STATE_FREE; i < RDMA_REQUEST_NUM_STATES; i++) {
+ TAILQ_INIT(&rqpair->state_queue[i]);
+ rqpair->state_cntr[i] = 0;
+ }
+
+ for (i = 0; i < rqpair->max_queue_depth; i++) {
+ struct ibv_recv_wr *bad_wr = NULL;
+
+ rdma_recv = &rqpair->recvs[i];
+ rdma_recv->qpair = rqpair;
+
+ /* Set up memory to receive commands */
+ if (rqpair->bufs) {
+ rdma_recv->buf = (void *)((uintptr_t)rqpair->bufs + (i *
+ transport->opts.in_capsule_data_size));
+ }
+
+ rdma_recv->sgl[0].addr = (uintptr_t)&rqpair->cmds[i];
+ rdma_recv->sgl[0].length = sizeof(rqpair->cmds[i]);
+ rdma_recv->sgl[0].lkey = rqpair->cmds_mr->lkey;
+ rdma_recv->wr.num_sge = 1;
+
+ if (rdma_recv->buf && rqpair->bufs_mr) {
+ rdma_recv->sgl[1].addr = (uintptr_t)rdma_recv->buf;
+ rdma_recv->sgl[1].length = transport->opts.in_capsule_data_size;
+ rdma_recv->sgl[1].lkey = rqpair->bufs_mr->lkey;
+ rdma_recv->wr.num_sge++;
+ }
+
+ rdma_recv->wr.wr_id = (uintptr_t)rdma_recv;
+ rdma_recv->wr.sg_list = rdma_recv->sgl;
+
+ rc = ibv_post_recv(rqpair->cm_id->qp, &rdma_recv->wr, &bad_wr);
+ if (rc) {
+ SPDK_ERRLOG("Unable to post capsule for RDMA RECV\n");
+ spdk_nvmf_rdma_qpair_destroy(rqpair);
+ return -1;
+ }
+ }
+
+ for (i = 0; i < rqpair->max_queue_depth; i++) {
+ rdma_req = &rqpair->reqs[i];
+
+ rdma_req->req.qpair = &rqpair->qpair;
+ rdma_req->req.cmd = NULL;
+
+ /* Set up memory to send responses */
+ rdma_req->req.rsp = &rqpair->cpls[i];
+
+ rdma_req->rsp.sgl[0].addr = (uintptr_t)&rqpair->cpls[i];
+ rdma_req->rsp.sgl[0].length = sizeof(rqpair->cpls[i]);
+ rdma_req->rsp.sgl[0].lkey = rqpair->cpls_mr->lkey;
+
+ rdma_req->rsp.wr.wr_id = (uintptr_t)rdma_req;
+ rdma_req->rsp.wr.next = NULL;
+ rdma_req->rsp.wr.opcode = IBV_WR_SEND;
+ rdma_req->rsp.wr.send_flags = IBV_SEND_SIGNALED;
+ rdma_req->rsp.wr.sg_list = rdma_req->rsp.sgl;
+ rdma_req->rsp.wr.num_sge = SPDK_COUNTOF(rdma_req->rsp.sgl);
+
+ /* Set up memory for data buffers */
+ rdma_req->data.wr.wr_id = (uint64_t)rdma_req;
+ rdma_req->data.wr.next = NULL;
+ rdma_req->data.wr.send_flags = IBV_SEND_SIGNALED;
+ rdma_req->data.wr.sg_list = rdma_req->data.sgl;
+ rdma_req->data.wr.num_sge = SPDK_COUNTOF(rdma_req->data.sgl);
+
+ /* Initialize request state to FREE */
+ rdma_req->state = RDMA_REQUEST_STATE_FREE;
+ TAILQ_INSERT_TAIL(&rqpair->state_queue[rdma_req->state], rdma_req, state_link);
+ rqpair->state_cntr[rdma_req->state]++;
+ }
+
+ return 0;
+}
+
+static int
+request_transfer_in(struct spdk_nvmf_request *req)
+{
+ int rc;
+ struct spdk_nvmf_rdma_request *rdma_req;
+ struct spdk_nvmf_qpair *qpair;
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ struct ibv_send_wr *bad_wr = NULL;
+
+ qpair = req->qpair;
+ rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req);
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ assert(req->xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER);
+
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA READ POSTED. Request: %p Connection: %p\n", req, qpair);
+
+ rdma_req->data.wr.opcode = IBV_WR_RDMA_READ;
+ rdma_req->data.wr.next = NULL;
+ rc = ibv_post_send(rqpair->cm_id->qp, &rdma_req->data.wr, &bad_wr);
+ if (rc) {
+ SPDK_ERRLOG("Unable to transfer data from host to target\n");
+ return -1;
+ }
+ return 0;
+}
+
+static int
+request_transfer_out(struct spdk_nvmf_request *req, int *data_posted)
+{
+ int rc;
+ struct spdk_nvmf_rdma_request *rdma_req;
+ struct spdk_nvmf_qpair *qpair;
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ struct spdk_nvme_cpl *rsp;
+ struct ibv_recv_wr *bad_recv_wr = NULL;
+ struct ibv_send_wr *send_wr, *bad_send_wr = NULL;
+
+ *data_posted = 0;
+ qpair = req->qpair;
+ rsp = &req->rsp->nvme_cpl;
+ rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req);
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ /* Advance our sq_head pointer */
+ if (qpair->sq_head == qpair->sq_head_max) {
+ qpair->sq_head = 0;
+ } else {
+ qpair->sq_head++;
+ }
+ rsp->sqhd = qpair->sq_head;
+
+ /* Post the capsule to the recv buffer */
+ assert(rdma_req->recv != NULL);
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA RECV POSTED. Recv: %p Connection: %p\n", rdma_req->recv,
+ rqpair);
+ rc = ibv_post_recv(rqpair->cm_id->qp, &rdma_req->recv->wr, &bad_recv_wr);
+ if (rc) {
+ SPDK_ERRLOG("Unable to re-post rx descriptor\n");
+ return rc;
+ }
+ rdma_req->recv = NULL;
+
+ /* Build the response which consists of an optional
+ * RDMA WRITE to transfer data, plus an RDMA SEND
+ * containing the response.
+ */
+ send_wr = &rdma_req->rsp.wr;
+
+ if (rsp->status.sc == SPDK_NVME_SC_SUCCESS &&
+ req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA WRITE POSTED. Request: %p Connection: %p\n", req, qpair);
+
+ rdma_req->data.wr.opcode = IBV_WR_RDMA_WRITE;
+
+ rdma_req->data.wr.next = send_wr;
+ *data_posted = 1;
+ send_wr = &rdma_req->data.wr;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA SEND POSTED. Request: %p Connection: %p\n", req, qpair);
+
+ /* Send the completion */
+ rc = ibv_post_send(rqpair->cm_id->qp, send_wr, &bad_send_wr);
+ if (rc) {
+ SPDK_ERRLOG("Unable to send response capsule\n");
+ }
+
+ return rc;
+}
+
+static int
+spdk_nvmf_rdma_event_accept(struct rdma_cm_id *id, struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ struct spdk_nvmf_rdma_accept_private_data accept_data;
+ struct rdma_conn_param ctrlr_event_data = {};
+ int rc;
+
+ accept_data.recfmt = 0;
+ accept_data.crqsize = rqpair->max_queue_depth;
+
+ ctrlr_event_data.private_data = &accept_data;
+ ctrlr_event_data.private_data_len = sizeof(accept_data);
+ if (id->ps == RDMA_PS_TCP) {
+ ctrlr_event_data.responder_resources = 0; /* We accept 0 reads from the host */
+ ctrlr_event_data.initiator_depth = rqpair->max_rw_depth;
+ }
+
+ rc = rdma_accept(id, &ctrlr_event_data);
+ if (rc) {
+ SPDK_ERRLOG("Error %d on rdma_accept\n", errno);
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Sent back the accept\n");
+ }
+
+ return rc;
+}
+
+static void
+spdk_nvmf_rdma_event_reject(struct rdma_cm_id *id, enum spdk_nvmf_rdma_transport_error error)
+{
+ struct spdk_nvmf_rdma_reject_private_data rej_data;
+
+ rej_data.recfmt = 0;
+ rej_data.sts = error;
+
+ rdma_reject(id, &rej_data, sizeof(rej_data));
+}
+
+static int
+nvmf_rdma_connect(struct spdk_nvmf_transport *transport, struct rdma_cm_event *event,
+ new_qpair_fn cb_fn)
+{
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_qpair *rqpair = NULL;
+ struct spdk_nvmf_rdma_port *port;
+ struct rdma_conn_param *rdma_param = NULL;
+ const struct spdk_nvmf_rdma_request_private_data *private_data = NULL;
+ uint16_t max_queue_depth;
+ uint16_t max_rw_depth;
+
+ rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+
+ assert(event->id != NULL); /* Impossible. Can't even reject the connection. */
+ assert(event->id->verbs != NULL); /* Impossible. No way to handle this. */
+
+ rdma_param = &event->param.conn;
+ if (rdma_param->private_data == NULL ||
+ rdma_param->private_data_len < sizeof(struct spdk_nvmf_rdma_request_private_data)) {
+ SPDK_ERRLOG("connect request: no private data provided\n");
+ spdk_nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_INVALID_PRIVATE_DATA_LENGTH);
+ return -1;
+ }
+
+ private_data = rdma_param->private_data;
+ if (private_data->recfmt != 0) {
+ SPDK_ERRLOG("Received RDMA private data with RECFMT != 0\n");
+ spdk_nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_INVALID_RECFMT);
+ return -1;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Connect Recv on fabric intf name %s, dev_name %s\n",
+ event->id->verbs->device->name, event->id->verbs->device->dev_name);
+
+ port = event->listen_id->context;
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Listen Id was %p with verbs %p. ListenAddr: %p\n",
+ event->listen_id, event->listen_id->verbs, port);
+
+ /* Figure out the supported queue depth. This is a multi-step process
+ * that takes into account hardware maximums, host provided values,
+ * and our target's internal memory limits */
+
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Calculating Queue Depth\n");
+
+ /* Start with the maximum queue depth allowed by the target */
+ max_queue_depth = rtransport->transport.opts.max_queue_depth;
+ max_rw_depth = rtransport->transport.opts.max_queue_depth;
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Target Max Queue Depth: %d\n",
+ rtransport->transport.opts.max_queue_depth);
+
+ /* Next check the local NIC's hardware limitations */
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA,
+ "Local NIC Max Send/Recv Queue Depth: %d Max Read/Write Queue Depth: %d\n",
+ port->device->attr.max_qp_wr, port->device->attr.max_qp_rd_atom);
+ max_queue_depth = spdk_min(max_queue_depth, port->device->attr.max_qp_wr);
+ max_rw_depth = spdk_min(max_rw_depth, port->device->attr.max_qp_rd_atom);
+
+ /* Next check the remote NIC's hardware limitations */
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA,
+ "Host (Initiator) NIC Max Incoming RDMA R/W operations: %d Max Outgoing RDMA R/W operations: %d\n",
+ rdma_param->initiator_depth, rdma_param->responder_resources);
+ if (rdma_param->initiator_depth > 0) {
+ max_rw_depth = spdk_min(max_rw_depth, rdma_param->initiator_depth);
+ }
+
+ /* Finally check for the host software requested values, which are
+ * optional. */
+ if (rdma_param->private_data != NULL &&
+ rdma_param->private_data_len >= sizeof(struct spdk_nvmf_rdma_request_private_data)) {
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Host Receive Queue Size: %d\n", private_data->hrqsize);
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Host Send Queue Size: %d\n", private_data->hsqsize);
+ max_queue_depth = spdk_min(max_queue_depth, private_data->hrqsize);
+ max_queue_depth = spdk_min(max_queue_depth, private_data->hsqsize + 1);
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Final Negotiated Queue Depth: %d R/W Depth: %d\n",
+ max_queue_depth, max_rw_depth);
+
+ rqpair = calloc(1, sizeof(struct spdk_nvmf_rdma_qpair));
+ if (rqpair == NULL) {
+ SPDK_ERRLOG("Could not allocate new connection.\n");
+ spdk_nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_NO_RESOURCES);
+ return -1;
+ }
+
+ rqpair->port = port;
+ rqpair->max_queue_depth = max_queue_depth;
+ rqpair->max_rw_depth = max_rw_depth;
+ rqpair->cm_id = event->id;
+ rqpair->listen_id = event->listen_id;
+ rqpair->qpair.transport = transport;
+ rqpair->max_sge = spdk_min(port->device->attr.max_sge, SPDK_NVMF_MAX_SGL_ENTRIES);
+ TAILQ_INIT(&rqpair->incoming_queue);
+ event->id->context = &rqpair->qpair;
+
+ cb_fn(&rqpair->qpair);
+
+ return 0;
+}
+
+static void
+_nvmf_rdma_disconnect(void *ctx)
+{
+ struct spdk_nvmf_qpair *qpair = ctx;
+ struct spdk_nvmf_rdma_qpair *rqpair;
+
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ spdk_nvmf_rdma_qpair_dec_refcnt(rqpair);
+
+ spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
+}
+
+static void
+_nvmf_rdma_disconnect_retry(void *ctx)
+{
+ struct spdk_nvmf_qpair *qpair = ctx;
+ struct spdk_nvmf_poll_group *group;
+
+ /* Read the group out of the qpair. This is normally set and accessed only from
+ * the thread that created the group. Here, we're not on that thread necessarily.
+ * The data member qpair->group begins it's life as NULL and then is assigned to
+ * a pointer and never changes. So fortunately reading this and checking for
+ * non-NULL is thread safe in the x86_64 memory model. */
+ group = qpair->group;
+
+ if (group == NULL) {
+ /* The qpair hasn't been assigned to a group yet, so we can't
+ * process a disconnect. Send a message to ourself and try again. */
+ spdk_thread_send_msg(spdk_get_thread(), _nvmf_rdma_disconnect_retry, qpair);
+ return;
+ }
+
+ spdk_thread_send_msg(group->thread, _nvmf_rdma_disconnect, qpair);
+}
+
+static int
+nvmf_rdma_disconnect(struct rdma_cm_event *evt)
+{
+ struct spdk_nvmf_qpair *qpair;
+ struct spdk_nvmf_rdma_qpair *rqpair;
+
+ if (evt->id == NULL) {
+ SPDK_ERRLOG("disconnect request: missing cm_id\n");
+ return -1;
+ }
+
+ qpair = evt->id->context;
+ if (qpair == NULL) {
+ SPDK_ERRLOG("disconnect request: no active connection\n");
+ return -1;
+ }
+
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ spdk_trace_record(TRACE_RDMA_QP_DISCONNECT, 0, 0, (uintptr_t)rqpair->cm_id, 0);
+
+ spdk_nvmf_rdma_update_ibv_state(rqpair);
+ spdk_nvmf_rdma_qpair_inc_refcnt(rqpair);
+
+ _nvmf_rdma_disconnect_retry(qpair);
+
+ return 0;
+}
+
+#ifdef DEBUG
+static const char *CM_EVENT_STR[] = {
+ "RDMA_CM_EVENT_ADDR_RESOLVED",
+ "RDMA_CM_EVENT_ADDR_ERROR",
+ "RDMA_CM_EVENT_ROUTE_RESOLVED",
+ "RDMA_CM_EVENT_ROUTE_ERROR",
+ "RDMA_CM_EVENT_CONNECT_REQUEST",
+ "RDMA_CM_EVENT_CONNECT_RESPONSE",
+ "RDMA_CM_EVENT_CONNECT_ERROR",
+ "RDMA_CM_EVENT_UNREACHABLE",
+ "RDMA_CM_EVENT_REJECTED",
+ "RDMA_CM_EVENT_ESTABLISHED",
+ "RDMA_CM_EVENT_DISCONNECTED",
+ "RDMA_CM_EVENT_DEVICE_REMOVAL",
+ "RDMA_CM_EVENT_MULTICAST_JOIN",
+ "RDMA_CM_EVENT_MULTICAST_ERROR",
+ "RDMA_CM_EVENT_ADDR_CHANGE",
+ "RDMA_CM_EVENT_TIMEWAIT_EXIT"
+};
+#endif /* DEBUG */
+
+static void
+spdk_nvmf_process_cm_event(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn)
+{
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct rdma_cm_event *event;
+ int rc;
+
+ rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+
+ if (rtransport->event_channel == NULL) {
+ return;
+ }
+
+ while (1) {
+ rc = rdma_get_cm_event(rtransport->event_channel, &event);
+ if (rc == 0) {
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Acceptor Event: %s\n", CM_EVENT_STR[event->event]);
+
+ spdk_trace_record(TRACE_RDMA_CM_ASYNC_EVENT, 0, 0, 0, event->event);
+
+ switch (event->event) {
+ case RDMA_CM_EVENT_ADDR_RESOLVED:
+ case RDMA_CM_EVENT_ADDR_ERROR:
+ case RDMA_CM_EVENT_ROUTE_RESOLVED:
+ case RDMA_CM_EVENT_ROUTE_ERROR:
+ /* No action required. The target never attempts to resolve routes. */
+ break;
+ case RDMA_CM_EVENT_CONNECT_REQUEST:
+ rc = nvmf_rdma_connect(transport, event, cb_fn);
+ if (rc < 0) {
+ SPDK_ERRLOG("Unable to process connect event. rc: %d\n", rc);
+ break;
+ }
+ break;
+ case RDMA_CM_EVENT_CONNECT_RESPONSE:
+ /* The target never initiates a new connection. So this will not occur. */
+ break;
+ case RDMA_CM_EVENT_CONNECT_ERROR:
+ /* Can this happen? The docs say it can, but not sure what causes it. */
+ break;
+ case RDMA_CM_EVENT_UNREACHABLE:
+ case RDMA_CM_EVENT_REJECTED:
+ /* These only occur on the client side. */
+ break;
+ case RDMA_CM_EVENT_ESTABLISHED:
+ /* TODO: Should we be waiting for this event anywhere? */
+ break;
+ case RDMA_CM_EVENT_DISCONNECTED:
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ rc = nvmf_rdma_disconnect(event);
+ if (rc < 0) {
+ SPDK_ERRLOG("Unable to process disconnect event. rc: %d\n", rc);
+ break;
+ }
+ break;
+ case RDMA_CM_EVENT_MULTICAST_JOIN:
+ case RDMA_CM_EVENT_MULTICAST_ERROR:
+ /* Multicast is not used */
+ break;
+ case RDMA_CM_EVENT_ADDR_CHANGE:
+ /* Not utilizing this event */
+ break;
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+ /* For now, do nothing. The target never re-uses queue pairs. */
+ break;
+ default:
+ SPDK_ERRLOG("Unexpected Acceptor Event [%d]\n", event->event);
+ break;
+ }
+
+ rdma_ack_cm_event(event);
+ } else {
+ if (errno != EAGAIN && errno != EWOULDBLOCK) {
+ SPDK_ERRLOG("Acceptor Event Error: %s\n", spdk_strerror(errno));
+ }
+ break;
+ }
+ }
+}
+
+static int
+spdk_nvmf_rdma_mem_notify(void *cb_ctx, struct spdk_mem_map *map,
+ enum spdk_mem_map_notify_action action,
+ void *vaddr, size_t size)
+{
+ struct spdk_nvmf_rdma_device *device = cb_ctx;
+ struct ibv_pd *pd = device->pd;
+ struct ibv_mr *mr;
+
+ switch (action) {
+ case SPDK_MEM_MAP_NOTIFY_REGISTER:
+ mr = ibv_reg_mr(pd, vaddr, size,
+ IBV_ACCESS_LOCAL_WRITE |
+ IBV_ACCESS_REMOTE_READ |
+ IBV_ACCESS_REMOTE_WRITE);
+ if (mr == NULL) {
+ SPDK_ERRLOG("ibv_reg_mr() failed\n");
+ return -1;
+ } else {
+ spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
+ }
+ break;
+ case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
+ mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL);
+ spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
+ if (mr) {
+ ibv_dereg_mr(mr);
+ }
+ break;
+ }
+
+ return 0;
+}
+
+typedef enum spdk_nvme_data_transfer spdk_nvme_data_transfer_t;
+
+static spdk_nvme_data_transfer_t
+spdk_nvmf_rdma_request_get_xfer(struct spdk_nvmf_rdma_request *rdma_req)
+{
+ enum spdk_nvme_data_transfer xfer;
+ struct spdk_nvme_cmd *cmd = &rdma_req->req.cmd->nvme_cmd;
+ struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
+
+#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
+ rdma_req->rsp.wr.opcode = IBV_WR_SEND;
+ rdma_req->rsp.wr.imm_data = 0;
+#endif
+
+ /* Figure out data transfer direction */
+ if (cmd->opc == SPDK_NVME_OPC_FABRIC) {
+ xfer = spdk_nvme_opc_get_data_transfer(rdma_req->req.cmd->nvmf_cmd.fctype);
+ } else {
+ xfer = spdk_nvme_opc_get_data_transfer(cmd->opc);
+
+ /* Some admin commands are special cases */
+ if ((rdma_req->req.qpair->qid == 0) &&
+ ((cmd->opc == SPDK_NVME_OPC_GET_FEATURES) ||
+ (cmd->opc == SPDK_NVME_OPC_SET_FEATURES))) {
+ switch (cmd->cdw10 & 0xff) {
+ case SPDK_NVME_FEAT_LBA_RANGE_TYPE:
+ case SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION:
+ case SPDK_NVME_FEAT_HOST_IDENTIFIER:
+ break;
+ default:
+ xfer = SPDK_NVME_DATA_NONE;
+ }
+ }
+ }
+
+ if (xfer == SPDK_NVME_DATA_NONE) {
+ return xfer;
+ }
+
+ /* Even for commands that may transfer data, they could have specified 0 length.
+ * We want those to show up with xfer SPDK_NVME_DATA_NONE.
+ */
+ switch (sgl->generic.type) {
+ case SPDK_NVME_SGL_TYPE_DATA_BLOCK:
+ case SPDK_NVME_SGL_TYPE_BIT_BUCKET:
+ case SPDK_NVME_SGL_TYPE_SEGMENT:
+ case SPDK_NVME_SGL_TYPE_LAST_SEGMENT:
+ case SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK:
+ if (sgl->unkeyed.length == 0) {
+ xfer = SPDK_NVME_DATA_NONE;
+ }
+ break;
+ case SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK:
+ if (sgl->keyed.length == 0) {
+ xfer = SPDK_NVME_DATA_NONE;
+ }
+ break;
+ }
+
+ return xfer;
+}
+
+static int
+spdk_nvmf_rdma_request_fill_iovs(struct spdk_nvmf_rdma_transport *rtransport,
+ struct spdk_nvmf_rdma_device *device,
+ struct spdk_nvmf_rdma_request *rdma_req)
+{
+ void *buf = NULL;
+ uint32_t length = rdma_req->req.length;
+ uint32_t i = 0;
+
+ rdma_req->req.iovcnt = 0;
+ while (length) {
+ buf = spdk_mempool_get(rtransport->data_buf_pool);
+ if (!buf) {
+ goto nomem;
+ }
+
+ rdma_req->req.iov[i].iov_base = (void *)((uintptr_t)(buf + NVMF_DATA_BUFFER_MASK) &
+ ~NVMF_DATA_BUFFER_MASK);
+ rdma_req->req.iov[i].iov_len = spdk_min(length, rtransport->transport.opts.io_unit_size);
+ rdma_req->req.iovcnt++;
+ rdma_req->data.buffers[i] = buf;
+ rdma_req->data.wr.sg_list[i].addr = (uintptr_t)(rdma_req->req.iov[i].iov_base);
+ rdma_req->data.wr.sg_list[i].length = rdma_req->req.iov[i].iov_len;
+ rdma_req->data.wr.sg_list[i].lkey = ((struct ibv_mr *)spdk_mem_map_translate(device->map,
+ (uint64_t)buf, NULL))->lkey;
+
+ length -= rdma_req->req.iov[i].iov_len;
+ i++;
+ }
+
+ rdma_req->data_from_pool = true;
+
+ return 0;
+
+nomem:
+ while (i) {
+ i--;
+ spdk_mempool_put(rtransport->data_buf_pool, rdma_req->req.iov[i].iov_base);
+ rdma_req->req.iov[i].iov_base = NULL;
+ rdma_req->req.iov[i].iov_len = 0;
+
+ rdma_req->data.wr.sg_list[i].addr = 0;
+ rdma_req->data.wr.sg_list[i].length = 0;
+ rdma_req->data.wr.sg_list[i].lkey = 0;
+ }
+ rdma_req->req.iovcnt = 0;
+ return -ENOMEM;
+}
+
+static int
+spdk_nvmf_rdma_request_parse_sgl(struct spdk_nvmf_rdma_transport *rtransport,
+ struct spdk_nvmf_rdma_device *device,
+ struct spdk_nvmf_rdma_request *rdma_req)
+{
+ struct spdk_nvme_cmd *cmd;
+ struct spdk_nvme_cpl *rsp;
+ struct spdk_nvme_sgl_descriptor *sgl;
+
+ cmd = &rdma_req->req.cmd->nvme_cmd;
+ rsp = &rdma_req->req.rsp->nvme_cpl;
+ sgl = &cmd->dptr.sgl1;
+
+ if (sgl->generic.type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK &&
+ (sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_ADDRESS ||
+ sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY)) {
+ if (sgl->keyed.length > rtransport->transport.opts.max_io_size) {
+ SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n",
+ sgl->keyed.length, rtransport->transport.opts.max_io_size);
+ rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+ return -1;
+ }
+#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
+ if ((device->attr.device_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) != 0) {
+ if (sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY) {
+ rdma_req->rsp.wr.opcode = IBV_WR_SEND_WITH_INV;
+ rdma_req->rsp.wr.imm_data = sgl->keyed.key;
+ }
+ }
+#endif
+
+ /* fill request length and populate iovs */
+ rdma_req->req.length = sgl->keyed.length;
+
+ if (spdk_nvmf_rdma_request_fill_iovs(rtransport, device, rdma_req) < 0) {
+ /* No available buffers. Queue this request up. */
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "No available large data buffers. Queueing request %p\n", rdma_req);
+ return 0;
+ }
+
+ /* backward compatible */
+ rdma_req->req.data = rdma_req->req.iov[0].iov_base;
+
+ /* rdma wr specifics */
+ rdma_req->data.wr.num_sge = rdma_req->req.iovcnt;
+ rdma_req->data.wr.wr.rdma.rkey = sgl->keyed.key;
+ rdma_req->data.wr.wr.rdma.remote_addr = sgl->address;
+
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Request %p took %d buffer/s from central pool\n", rdma_req,
+ rdma_req->req.iovcnt);
+
+ return 0;
+ } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
+ sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
+ uint64_t offset = sgl->address;
+ uint32_t max_len = rtransport->transport.opts.in_capsule_data_size;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n",
+ offset, sgl->unkeyed.length);
+
+ if (offset > max_len) {
+ SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n",
+ offset, max_len);
+ rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET;
+ return -1;
+ }
+ max_len -= (uint32_t)offset;
+
+ if (sgl->unkeyed.length > max_len) {
+ SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n",
+ sgl->unkeyed.length, max_len);
+ rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+ return -1;
+ }
+
+ rdma_req->req.data = rdma_req->recv->buf + offset;
+ rdma_req->data_from_pool = false;
+ rdma_req->req.length = sgl->unkeyed.length;
+
+ rdma_req->req.iov[0].iov_base = rdma_req->req.data;
+ rdma_req->req.iov[0].iov_len = rdma_req->req.length;
+ rdma_req->req.iovcnt = 1;
+
+ return 0;
+ }
+
+ SPDK_ERRLOG("Invalid NVMf I/O Command SGL: Type 0x%x, Subtype 0x%x\n",
+ sgl->generic.type, sgl->generic.subtype);
+ rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID;
+ return -1;
+}
+
+static bool
+spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
+ struct spdk_nvmf_rdma_request *rdma_req)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ struct spdk_nvmf_rdma_device *device;
+ struct spdk_nvme_cpl *rsp = &rdma_req->req.rsp->nvme_cpl;
+ int rc;
+ struct spdk_nvmf_rdma_recv *rdma_recv;
+ enum spdk_nvmf_rdma_request_state prev_state;
+ bool progress = false;
+ int data_posted;
+ int cur_rdma_rw_depth;
+
+ rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+ device = rqpair->port->device;
+
+ assert(rdma_req->state != RDMA_REQUEST_STATE_FREE);
+
+ /* If the queue pair is in an error state, force the request to the completed state
+ * to release resources. */
+ if (rqpair->ibv_attr.qp_state == IBV_QPS_ERR || rqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) {
+ if (rdma_req->state == RDMA_REQUEST_STATE_NEED_BUFFER) {
+ TAILQ_REMOVE(&rqpair->ch->pending_data_buf_queue, rdma_req, link);
+ }
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
+ }
+
+ /* The loop here is to allow for several back-to-back state changes. */
+ do {
+ prev_state = rdma_req->state;
+
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Request %p entering state %d\n", rdma_req, prev_state);
+
+ switch (rdma_req->state) {
+ case RDMA_REQUEST_STATE_FREE:
+ /* Some external code must kick a request into RDMA_REQUEST_STATE_NEW
+ * to escape this state. */
+ break;
+ case RDMA_REQUEST_STATE_NEW:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_NEW, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+ rdma_recv = rdma_req->recv;
+
+ /* The first element of the SGL is the NVMe command */
+ rdma_req->req.cmd = (union nvmf_h2c_msg *)rdma_recv->sgl[0].addr;
+ memset(rdma_req->req.rsp, 0, sizeof(*rdma_req->req.rsp));
+
+ TAILQ_REMOVE(&rqpair->incoming_queue, rdma_recv, link);
+
+ if (rqpair->ibv_attr.qp_state == IBV_QPS_ERR) {
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
+ break;
+ }
+
+ /* The next state transition depends on the data transfer needs of this request. */
+ rdma_req->req.xfer = spdk_nvmf_rdma_request_get_xfer(rdma_req);
+
+ /* If no data to transfer, ready to execute. */
+ if (rdma_req->req.xfer == SPDK_NVME_DATA_NONE) {
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_READY_TO_EXECUTE);
+ break;
+ }
+
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_NEED_BUFFER);
+ TAILQ_INSERT_TAIL(&rqpair->ch->pending_data_buf_queue, rdma_req, link);
+ break;
+ case RDMA_REQUEST_STATE_NEED_BUFFER:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_NEED_BUFFER, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+
+ assert(rdma_req->req.xfer != SPDK_NVME_DATA_NONE);
+
+ if (rdma_req != TAILQ_FIRST(&rqpair->ch->pending_data_buf_queue)) {
+ /* This request needs to wait in line to obtain a buffer */
+ break;
+ }
+
+ /* Try to get a data buffer */
+ rc = spdk_nvmf_rdma_request_parse_sgl(rtransport, device, rdma_req);
+ if (rc < 0) {
+ TAILQ_REMOVE(&rqpair->ch->pending_data_buf_queue, rdma_req, link);
+ rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_READY_TO_COMPLETE);
+ break;
+ }
+
+ if (!rdma_req->req.data) {
+ /* No buffers available. */
+ break;
+ }
+
+ TAILQ_REMOVE(&rqpair->ch->pending_data_buf_queue, rdma_req, link);
+
+ /* If data is transferring from host to controller and the data didn't
+ * arrive using in capsule data, we need to do a transfer from the host.
+ */
+ if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER && rdma_req->data_from_pool) {
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING);
+ break;
+ }
+
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_READY_TO_EXECUTE);
+ break;
+ case RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+
+ if (rdma_req != TAILQ_FIRST(&rqpair->state_queue[RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING])) {
+ /* This request needs to wait in line to perform RDMA */
+ break;
+ }
+ cur_rdma_rw_depth = spdk_nvmf_rdma_cur_rw_depth(rqpair);
+
+ if (cur_rdma_rw_depth >= rqpair->max_rw_depth) {
+ /* R/W queue is full, need to wait */
+ break;
+ }
+
+ if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
+ rc = request_transfer_in(&rdma_req->req);
+ if (!rc) {
+ spdk_nvmf_rdma_request_set_state(rdma_req,
+ RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
+ } else {
+ rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ spdk_nvmf_rdma_request_set_state(rdma_req,
+ RDMA_REQUEST_STATE_READY_TO_COMPLETE);
+ }
+ } else if (rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+ /* The data transfer will be kicked off from
+ * RDMA_REQUEST_STATE_READY_TO_COMPLETE state.
+ */
+ spdk_nvmf_rdma_request_set_state(rdma_req,
+ RDMA_REQUEST_STATE_READY_TO_COMPLETE);
+ } else {
+ SPDK_ERRLOG("Cannot perform data transfer, unknown state: %u\n",
+ rdma_req->req.xfer);
+ assert(0);
+ }
+ break;
+ case RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+ /* Some external code must kick a request into RDMA_REQUEST_STATE_READY_TO_EXECUTE
+ * to escape this state. */
+ break;
+ case RDMA_REQUEST_STATE_READY_TO_EXECUTE:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_READY_TO_EXECUTE, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_EXECUTING);
+ spdk_nvmf_request_exec(&rdma_req->req);
+ break;
+ case RDMA_REQUEST_STATE_EXECUTING:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_EXECUTING, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+ /* Some external code must kick a request into RDMA_REQUEST_STATE_EXECUTED
+ * to escape this state. */
+ break;
+ case RDMA_REQUEST_STATE_EXECUTED:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_EXECUTED, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+ if (rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING);
+ } else {
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_READY_TO_COMPLETE);
+ }
+ break;
+ case RDMA_REQUEST_STATE_READY_TO_COMPLETE:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+ rc = request_transfer_out(&rdma_req->req, &data_posted);
+ assert(rc == 0); /* No good way to handle this currently */
+ if (rc) {
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
+ } else {
+ spdk_nvmf_rdma_request_set_state(rdma_req,
+ data_posted ?
+ RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST :
+ RDMA_REQUEST_STATE_COMPLETING);
+ }
+ break;
+ case RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+ /* Some external code must kick a request into RDMA_REQUEST_STATE_COMPLETED
+ * to escape this state. */
+ break;
+ case RDMA_REQUEST_STATE_COMPLETING:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_COMPLETING, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+ /* Some external code must kick a request into RDMA_REQUEST_STATE_COMPLETED
+ * to escape this state. */
+ break;
+ case RDMA_REQUEST_STATE_COMPLETED:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_COMPLETED, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+
+ if (rdma_req->data_from_pool) {
+ /* Put the buffer/s back in the pool */
+ for (uint32_t i = 0; i < rdma_req->req.iovcnt; i++) {
+ spdk_mempool_put(rtransport->data_buf_pool, rdma_req->data.buffers[i]);
+ rdma_req->req.iov[i].iov_base = NULL;
+ rdma_req->data.buffers[i] = NULL;
+ }
+ rdma_req->data_from_pool = false;
+ }
+ rdma_req->req.length = 0;
+ rdma_req->req.iovcnt = 0;
+ rdma_req->req.data = NULL;
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_FREE);
+ break;
+ case RDMA_REQUEST_NUM_STATES:
+ default:
+ assert(0);
+ break;
+ }
+
+ if (rdma_req->state != prev_state) {
+ progress = true;
+ }
+ } while (rdma_req->state != prev_state);
+
+ return progress;
+}
+
+/* Public API callbacks begin here */
+
+#define SPDK_NVMF_RDMA_DEFAULT_MAX_QUEUE_DEPTH 128
+#define SPDK_NVMF_RDMA_DEFAULT_AQ_DEPTH 128
+#define SPDK_NVMF_RDMA_DEFAULT_MAX_QPAIRS_PER_CTRLR 64
+#define SPDK_NVMF_RDMA_DEFAULT_IN_CAPSULE_DATA_SIZE 4096
+#define SPDK_NVMF_RDMA_DEFAULT_MAX_IO_SIZE 131072
+#define SPDK_NVMF_RDMA_DEFAULT_IO_BUFFER_SIZE 131072
+
+static void
+spdk_nvmf_rdma_opts_init(struct spdk_nvmf_transport_opts *opts)
+{
+ opts->max_queue_depth = SPDK_NVMF_RDMA_DEFAULT_MAX_QUEUE_DEPTH;
+ opts->max_qpairs_per_ctrlr = SPDK_NVMF_RDMA_DEFAULT_MAX_QPAIRS_PER_CTRLR;
+ opts->in_capsule_data_size = SPDK_NVMF_RDMA_DEFAULT_IN_CAPSULE_DATA_SIZE;
+ opts->max_io_size = SPDK_NVMF_RDMA_DEFAULT_MAX_IO_SIZE;
+ opts->io_unit_size = SPDK_NVMF_RDMA_DEFAULT_IO_BUFFER_SIZE;
+ opts->max_aq_depth = SPDK_NVMF_RDMA_DEFAULT_AQ_DEPTH;
+}
+
+static int spdk_nvmf_rdma_destroy(struct spdk_nvmf_transport *transport);
+
+static struct spdk_nvmf_transport *
+spdk_nvmf_rdma_create(struct spdk_nvmf_transport_opts *opts)
+{
+ int rc;
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_device *device, *tmp;
+ struct ibv_context **contexts;
+ uint32_t i;
+ int flag;
+ uint32_t sge_count;
+
+ const struct spdk_mem_map_ops nvmf_rdma_map_ops = {
+ .notify_cb = spdk_nvmf_rdma_mem_notify,
+ .are_contiguous = NULL
+ };
+
+ rtransport = calloc(1, sizeof(*rtransport));
+ if (!rtransport) {
+ return NULL;
+ }
+
+ if (pthread_mutex_init(&rtransport->lock, NULL)) {
+ SPDK_ERRLOG("pthread_mutex_init() failed\n");
+ free(rtransport);
+ return NULL;
+ }
+
+ spdk_io_device_register(rtransport, spdk_nvmf_rdma_mgmt_channel_create,
+ spdk_nvmf_rdma_mgmt_channel_destroy,
+ sizeof(struct spdk_nvmf_rdma_mgmt_channel),
+ "rdma_transport");
+
+ TAILQ_INIT(&rtransport->devices);
+ TAILQ_INIT(&rtransport->ports);
+
+ rtransport->transport.ops = &spdk_nvmf_transport_rdma;
+
+ SPDK_INFOLOG(SPDK_LOG_RDMA, "*** RDMA Transport Init ***\n"
+ " Transport opts: max_ioq_depth=%d, max_io_size=%d,\n"
+ " max_qpairs_per_ctrlr=%d, io_unit_size=%d,\n"
+ " in_capsule_data_size=%d, max_aq_depth=%d\n",
+ opts->max_queue_depth,
+ opts->max_io_size,
+ opts->max_qpairs_per_ctrlr,
+ opts->io_unit_size,
+ opts->in_capsule_data_size,
+ opts->max_aq_depth);
+
+ /* I/O unit size cannot be larger than max I/O size */
+ if (opts->io_unit_size > opts->max_io_size) {
+ opts->io_unit_size = opts->max_io_size;
+ }
+
+ sge_count = opts->max_io_size / opts->io_unit_size;
+ if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) {
+ SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size);
+ spdk_nvmf_rdma_destroy(&rtransport->transport);
+ return NULL;
+ }
+
+ rtransport->event_channel = rdma_create_event_channel();
+ if (rtransport->event_channel == NULL) {
+ SPDK_ERRLOG("rdma_create_event_channel() failed, %s\n", spdk_strerror(errno));
+ spdk_nvmf_rdma_destroy(&rtransport->transport);
+ return NULL;
+ }
+
+ flag = fcntl(rtransport->event_channel->fd, F_GETFL);
+ if (fcntl(rtransport->event_channel->fd, F_SETFL, flag | O_NONBLOCK) < 0) {
+ SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%s)\n",
+ rtransport->event_channel->fd, spdk_strerror(errno));
+ spdk_nvmf_rdma_destroy(&rtransport->transport);
+ return NULL;
+ }
+
+ rtransport->data_buf_pool = spdk_mempool_create("spdk_nvmf_rdma",
+ opts->max_queue_depth * 4, /* The 4 is arbitrarily chosen. Needs to be configurable. */
+ opts->max_io_size + NVMF_DATA_BUFFER_ALIGNMENT,
+ SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
+ SPDK_ENV_SOCKET_ID_ANY);
+ if (!rtransport->data_buf_pool) {
+ SPDK_ERRLOG("Unable to allocate buffer pool for poll group\n");
+ spdk_nvmf_rdma_destroy(&rtransport->transport);
+ return NULL;
+ }
+
+ contexts = rdma_get_devices(NULL);
+ if (contexts == NULL) {
+ SPDK_ERRLOG("rdma_get_devices() failed: %s (%d)\n", spdk_strerror(errno), errno);
+ spdk_nvmf_rdma_destroy(&rtransport->transport);
+ return NULL;
+ }
+
+ i = 0;
+ rc = 0;
+ while (contexts[i] != NULL) {
+ device = calloc(1, sizeof(*device));
+ if (!device) {
+ SPDK_ERRLOG("Unable to allocate memory for RDMA devices.\n");
+ rc = -ENOMEM;
+ break;
+ }
+ device->context = contexts[i];
+ rc = ibv_query_device(device->context, &device->attr);
+ if (rc < 0) {
+ SPDK_ERRLOG("Failed to query RDMA device attributes.\n");
+ free(device);
+ break;
+
+ }
+
+#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
+ if ((device->attr.device_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) == 0) {
+ SPDK_WARNLOG("The libibverbs on this system supports SEND_WITH_INVALIDATE,");
+ SPDK_WARNLOG("but the device with vendor ID %u does not.\n", device->attr.vendor_id);
+ }
+
+ /**
+ * The vendor ID is assigned by the IEEE and an ID of 0 implies Soft-RoCE.
+ * The Soft-RoCE RXE driver does not currently support send with invalidate,
+ * but incorrectly reports that it does. There are changes making their way
+ * through the kernel now that will enable this feature. When they are merged,
+ * we can conditionally enable this feature.
+ *
+ * TODO: enable this for versions of the kernel rxe driver that support it.
+ */
+ if (device->attr.vendor_id == 0) {
+ device->attr.device_cap_flags &= ~(IBV_DEVICE_MEM_MGT_EXTENSIONS);
+ }
+#endif
+
+ /* set up device context async ev fd as NON_BLOCKING */
+ flag = fcntl(device->context->async_fd, F_GETFL);
+ rc = fcntl(device->context->async_fd, F_SETFL, flag | O_NONBLOCK);
+ if (rc < 0) {
+ SPDK_ERRLOG("Failed to set context async fd to NONBLOCK.\n");
+ free(device);
+ break;
+ }
+
+ device->pd = ibv_alloc_pd(device->context);
+ if (!device->pd) {
+ SPDK_ERRLOG("Unable to allocate protection domain.\n");
+ free(device);
+ rc = -1;
+ break;
+ }
+
+ device->map = spdk_mem_map_alloc(0, &nvmf_rdma_map_ops, device);
+ if (!device->map) {
+ SPDK_ERRLOG("Unable to allocate memory map for new poll group\n");
+ ibv_dealloc_pd(device->pd);
+ free(device);
+ rc = -1;
+ break;
+ }
+
+ TAILQ_INSERT_TAIL(&rtransport->devices, device, link);
+ i++;
+ }
+ rdma_free_devices(contexts);
+
+ if (rc < 0) {
+ spdk_nvmf_rdma_destroy(&rtransport->transport);
+ return NULL;
+ }
+
+ /* Set up poll descriptor array to monitor events from RDMA and IB
+ * in a single poll syscall
+ */
+ rtransport->npoll_fds = i + 1;
+ i = 0;
+ rtransport->poll_fds = calloc(rtransport->npoll_fds, sizeof(struct pollfd));
+ if (rtransport->poll_fds == NULL) {
+ SPDK_ERRLOG("poll_fds allocation failed\n");
+ spdk_nvmf_rdma_destroy(&rtransport->transport);
+ return NULL;
+ }
+
+ rtransport->poll_fds[i].fd = rtransport->event_channel->fd;
+ rtransport->poll_fds[i++].events = POLLIN;
+
+ TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, tmp) {
+ rtransport->poll_fds[i].fd = device->context->async_fd;
+ rtransport->poll_fds[i++].events = POLLIN;
+ }
+
+ return &rtransport->transport;
+}
+
+static int
+spdk_nvmf_rdma_destroy(struct spdk_nvmf_transport *transport)
+{
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_port *port, *port_tmp;
+ struct spdk_nvmf_rdma_device *device, *device_tmp;
+
+ rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+
+ TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, port_tmp) {
+ TAILQ_REMOVE(&rtransport->ports, port, link);
+ rdma_destroy_id(port->id);
+ free(port);
+ }
+
+ if (rtransport->poll_fds != NULL) {
+ free(rtransport->poll_fds);
+ }
+
+ if (rtransport->event_channel != NULL) {
+ rdma_destroy_event_channel(rtransport->event_channel);
+ }
+
+ TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, device_tmp) {
+ TAILQ_REMOVE(&rtransport->devices, device, link);
+ if (device->map) {
+ spdk_mem_map_free(&device->map);
+ }
+ if (device->pd) {
+ ibv_dealloc_pd(device->pd);
+ }
+ free(device);
+ }
+
+ if (rtransport->data_buf_pool != NULL) {
+ if (spdk_mempool_count(rtransport->data_buf_pool) !=
+ (transport->opts.max_queue_depth * 4)) {
+ SPDK_ERRLOG("transport buffer pool count is %zu but should be %u\n",
+ spdk_mempool_count(rtransport->data_buf_pool),
+ transport->opts.max_queue_depth * 4);
+ }
+ }
+
+ spdk_mempool_free(rtransport->data_buf_pool);
+ spdk_io_device_unregister(rtransport, NULL);
+ pthread_mutex_destroy(&rtransport->lock);
+ free(rtransport);
+
+ return 0;
+}
+
+static int
+spdk_nvmf_rdma_listen(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_device *device;
+ struct spdk_nvmf_rdma_port *port_tmp, *port;
+ struct addrinfo *res;
+ struct addrinfo hints;
+ int family;
+ int rc;
+
+ rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+
+ port = calloc(1, sizeof(*port));
+ if (!port) {
+ return -ENOMEM;
+ }
+
+ /* Selectively copy the trid. Things like NQN don't matter here - that
+ * mapping is enforced elsewhere.
+ */
+ port->trid.trtype = SPDK_NVME_TRANSPORT_RDMA;
+ port->trid.adrfam = trid->adrfam;
+ snprintf(port->trid.traddr, sizeof(port->trid.traddr), "%s", trid->traddr);
+ snprintf(port->trid.trsvcid, sizeof(port->trid.trsvcid), "%s", trid->trsvcid);
+
+ pthread_mutex_lock(&rtransport->lock);
+ assert(rtransport->event_channel != NULL);
+ TAILQ_FOREACH(port_tmp, &rtransport->ports, link) {
+ if (spdk_nvme_transport_id_compare(&port_tmp->trid, &port->trid) == 0) {
+ port_tmp->ref++;
+ free(port);
+ /* Already listening at this address */
+ pthread_mutex_unlock(&rtransport->lock);
+ return 0;
+ }
+ }
+
+ rc = rdma_create_id(rtransport->event_channel, &port->id, port, RDMA_PS_TCP);
+ if (rc < 0) {
+ SPDK_ERRLOG("rdma_create_id() failed\n");
+ free(port);
+ pthread_mutex_unlock(&rtransport->lock);
+ return rc;
+ }
+
+ switch (port->trid.adrfam) {
+ case SPDK_NVMF_ADRFAM_IPV4:
+ family = AF_INET;
+ break;
+ case SPDK_NVMF_ADRFAM_IPV6:
+ family = AF_INET6;
+ break;
+ default:
+ SPDK_ERRLOG("Unhandled ADRFAM %d\n", port->trid.adrfam);
+ free(port);
+ pthread_mutex_unlock(&rtransport->lock);
+ return -EINVAL;
+ }
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = family;
+ hints.ai_flags = AI_NUMERICSERV;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_protocol = 0;
+
+ rc = getaddrinfo(port->trid.traddr, port->trid.trsvcid, &hints, &res);
+ if (rc) {
+ SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(rc), rc);
+ free(port);
+ pthread_mutex_unlock(&rtransport->lock);
+ return -EINVAL;
+ }
+
+ rc = rdma_bind_addr(port->id, res->ai_addr);
+ freeaddrinfo(res);
+
+ if (rc < 0) {
+ SPDK_ERRLOG("rdma_bind_addr() failed\n");
+ rdma_destroy_id(port->id);
+ free(port);
+ pthread_mutex_unlock(&rtransport->lock);
+ return rc;
+ }
+
+ if (!port->id->verbs) {
+ SPDK_ERRLOG("ibv_context is null\n");
+ rdma_destroy_id(port->id);
+ free(port);
+ pthread_mutex_unlock(&rtransport->lock);
+ return -1;
+ }
+
+ rc = rdma_listen(port->id, 10); /* 10 = backlog */
+ if (rc < 0) {
+ SPDK_ERRLOG("rdma_listen() failed\n");
+ rdma_destroy_id(port->id);
+ free(port);
+ pthread_mutex_unlock(&rtransport->lock);
+ return rc;
+ }
+
+ TAILQ_FOREACH(device, &rtransport->devices, link) {
+ if (device->context == port->id->verbs) {
+ port->device = device;
+ break;
+ }
+ }
+ if (!port->device) {
+ SPDK_ERRLOG("Accepted a connection with verbs %p, but unable to find a corresponding device.\n",
+ port->id->verbs);
+ rdma_destroy_id(port->id);
+ free(port);
+ pthread_mutex_unlock(&rtransport->lock);
+ return -EINVAL;
+ }
+
+ SPDK_INFOLOG(SPDK_LOG_RDMA, "*** NVMf Target Listening on %s port %d ***\n",
+ port->trid.traddr, ntohs(rdma_get_src_port(port->id)));
+
+ port->ref = 1;
+
+ TAILQ_INSERT_TAIL(&rtransport->ports, port, link);
+ pthread_mutex_unlock(&rtransport->lock);
+
+ return 0;
+}
+
+static int
+spdk_nvmf_rdma_stop_listen(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *_trid)
+{
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_port *port, *tmp;
+ struct spdk_nvme_transport_id trid = {};
+
+ rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+
+ /* Selectively copy the trid. Things like NQN don't matter here - that
+ * mapping is enforced elsewhere.
+ */
+ trid.trtype = SPDK_NVME_TRANSPORT_RDMA;
+ trid.adrfam = _trid->adrfam;
+ snprintf(trid.traddr, sizeof(port->trid.traddr), "%s", _trid->traddr);
+ snprintf(trid.trsvcid, sizeof(port->trid.trsvcid), "%s", _trid->trsvcid);
+
+ pthread_mutex_lock(&rtransport->lock);
+ TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, tmp) {
+ if (spdk_nvme_transport_id_compare(&port->trid, &trid) == 0) {
+ assert(port->ref > 0);
+ port->ref--;
+ if (port->ref == 0) {
+ TAILQ_REMOVE(&rtransport->ports, port, link);
+ rdma_destroy_id(port->id);
+ free(port);
+ }
+ break;
+ }
+ }
+
+ pthread_mutex_unlock(&rtransport->lock);
+ return 0;
+}
+
+static bool
+spdk_nvmf_rdma_qpair_is_idle(struct spdk_nvmf_qpair *qpair)
+{
+ int cur_queue_depth, cur_rdma_rw_depth;
+ struct spdk_nvmf_rdma_qpair *rqpair;
+
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+ cur_queue_depth = spdk_nvmf_rdma_cur_queue_depth(rqpair);
+ cur_rdma_rw_depth = spdk_nvmf_rdma_cur_rw_depth(rqpair);
+
+ if (cur_queue_depth == 0 && cur_rdma_rw_depth == 0) {
+ return true;
+ }
+ return false;
+}
+
+static void
+spdk_nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport,
+ struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ struct spdk_nvmf_rdma_recv *rdma_recv, *recv_tmp;
+ struct spdk_nvmf_rdma_request *rdma_req, *req_tmp;
+
+ /* We process I/O in the data transfer pending queue at the highest priority. */
+ TAILQ_FOREACH_SAFE(rdma_req, &rqpair->state_queue[RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING],
+ state_link, req_tmp) {
+ if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false) {
+ break;
+ }
+ }
+
+ /* The second highest priority is I/O waiting on memory buffers. */
+ TAILQ_FOREACH_SAFE(rdma_req, &rqpair->ch->pending_data_buf_queue, link,
+ req_tmp) {
+ if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false) {
+ break;
+ }
+ }
+
+ if (rqpair->qpair_disconnected) {
+ spdk_nvmf_rdma_qpair_destroy(rqpair);
+ return;
+ }
+
+ /* Do not process newly received commands if qp is in ERROR state,
+ * wait till the recovery is complete.
+ */
+ if (rqpair->ibv_attr.qp_state == IBV_QPS_ERR) {
+ return;
+ }
+
+ /* The lowest priority is processing newly received commands */
+ TAILQ_FOREACH_SAFE(rdma_recv, &rqpair->incoming_queue, link, recv_tmp) {
+ if (TAILQ_EMPTY(&rqpair->state_queue[RDMA_REQUEST_STATE_FREE])) {
+ break;
+ }
+
+ rdma_req = TAILQ_FIRST(&rqpair->state_queue[RDMA_REQUEST_STATE_FREE]);
+ rdma_req->recv = rdma_recv;
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_NEW);
+ if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false) {
+ break;
+ }
+ }
+}
+
+static void
+spdk_nvmf_rdma_drain_state_queue(struct spdk_nvmf_rdma_qpair *rqpair,
+ enum spdk_nvmf_rdma_request_state state)
+{
+ struct spdk_nvmf_rdma_request *rdma_req, *req_tmp;
+ struct spdk_nvmf_rdma_transport *rtransport;
+
+ TAILQ_FOREACH_SAFE(rdma_req, &rqpair->state_queue[state], state_link, req_tmp) {
+ rtransport = SPDK_CONTAINEROF(rdma_req->req.qpair->transport,
+ struct spdk_nvmf_rdma_transport, transport);
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
+ spdk_nvmf_rdma_request_process(rtransport, rdma_req);
+ }
+}
+
+static void
+spdk_nvmf_rdma_qpair_recover(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ enum ibv_qp_state state, next_state;
+ int recovered;
+ struct spdk_nvmf_rdma_transport *rtransport;
+
+ if (!spdk_nvmf_rdma_qpair_is_idle(&rqpair->qpair)) {
+ /* There must be outstanding requests down to media.
+ * If so, wait till they're complete.
+ */
+ assert(!TAILQ_EMPTY(&rqpair->qpair.outstanding));
+ return;
+ }
+
+ state = rqpair->ibv_attr.qp_state;
+ next_state = state;
+
+ SPDK_NOTICELOG("RDMA qpair %u is in state: %s\n",
+ rqpair->qpair.qid,
+ str_ibv_qp_state[state]);
+
+ if (!(state == IBV_QPS_ERR || state == IBV_QPS_RESET)) {
+ SPDK_ERRLOG("Can't recover RDMA qpair %u from the state: %s\n",
+ rqpair->qpair.qid,
+ str_ibv_qp_state[state]);
+ spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
+ return;
+ }
+
+ recovered = 0;
+ while (!recovered) {
+ switch (state) {
+ case IBV_QPS_ERR:
+ next_state = IBV_QPS_RESET;
+ break;
+ case IBV_QPS_RESET:
+ next_state = IBV_QPS_INIT;
+ break;
+ case IBV_QPS_INIT:
+ next_state = IBV_QPS_RTR;
+ break;
+ case IBV_QPS_RTR:
+ next_state = IBV_QPS_RTS;
+ break;
+ case IBV_QPS_RTS:
+ recovered = 1;
+ break;
+ default:
+ SPDK_ERRLOG("RDMA qpair %u unexpected state for recovery: %u\n",
+ rqpair->qpair.qid, state);
+ goto error;
+ }
+ /* Do not transition into same state */
+ if (next_state == state) {
+ break;
+ }
+
+ if (spdk_nvmf_rdma_set_ibv_state(rqpair, next_state)) {
+ goto error;
+ }
+
+ state = next_state;
+ }
+
+ rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport,
+ struct spdk_nvmf_rdma_transport,
+ transport);
+
+ spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair);
+
+ return;
+error:
+ SPDK_NOTICELOG("RDMA qpair %u: recovery failed, disconnecting...\n",
+ rqpair->qpair.qid);
+ spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
+}
+
+/* Clean up only the states that can be aborted at any time */
+static void
+_spdk_nvmf_rdma_qp_cleanup_safe_states(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ struct spdk_nvmf_rdma_request *rdma_req, *req_tmp;
+
+ spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_NEW);
+ TAILQ_FOREACH_SAFE(rdma_req, &rqpair->state_queue[RDMA_REQUEST_STATE_NEED_BUFFER], link, req_tmp) {
+ TAILQ_REMOVE(&rqpair->ch->pending_data_buf_queue, rdma_req, link);
+ }
+ spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_NEED_BUFFER);
+ spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING);
+ spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_READY_TO_EXECUTE);
+ spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_EXECUTED);
+ spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_READY_TO_COMPLETE);
+ spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_COMPLETED);
+}
+
+/* This cleans up all memory. It is only safe to use if the rest of the software stack
+ * has been shut down */
+static void
+_spdk_nvmf_rdma_qp_cleanup_all_states(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ _spdk_nvmf_rdma_qp_cleanup_safe_states(rqpair);
+
+ spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_EXECUTING);
+ spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
+ spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
+ spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_COMPLETING);
+}
+
+static void
+_spdk_nvmf_rdma_qp_error(void *arg)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair = arg;
+ enum ibv_qp_state state;
+
+ spdk_nvmf_rdma_qpair_dec_refcnt(rqpair);
+
+ state = rqpair->ibv_attr.qp_state;
+ if (state != IBV_QPS_ERR) {
+ /* Error was already recovered */
+ return;
+ }
+
+ if (spdk_nvmf_qpair_is_admin_queue(&rqpair->qpair)) {
+ spdk_nvmf_ctrlr_abort_aer(rqpair->qpair.ctrlr);
+ }
+
+ _spdk_nvmf_rdma_qp_cleanup_safe_states(rqpair);
+
+ /* Attempt recovery. This will exit without recovering if I/O requests
+ * are still outstanding */
+ spdk_nvmf_rdma_qpair_recover(rqpair);
+}
+
+static void
+_spdk_nvmf_rdma_qp_last_wqe(void *arg)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair = arg;
+ enum ibv_qp_state state;
+
+ spdk_nvmf_rdma_qpair_dec_refcnt(rqpair);
+
+ state = rqpair->ibv_attr.qp_state;
+ if (state != IBV_QPS_ERR) {
+ /* Error was already recovered */
+ return;
+ }
+
+ /* Clear out the states that are safe to clear any time, plus the
+ * RDMA data transfer states. */
+ _spdk_nvmf_rdma_qp_cleanup_safe_states(rqpair);
+
+ spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
+ spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
+ spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_COMPLETING);
+
+ spdk_nvmf_rdma_qpair_recover(rqpair);
+}
+
+static void
+spdk_nvmf_process_ib_event(struct spdk_nvmf_rdma_device *device)
+{
+ int rc;
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ struct ibv_async_event event;
+ enum ibv_qp_state state;
+
+ rc = ibv_get_async_event(device->context, &event);
+
+ if (rc) {
+ SPDK_ERRLOG("Failed to get async_event (%d): %s\n",
+ errno, spdk_strerror(errno));
+ return;
+ }
+
+ SPDK_NOTICELOG("Async event: %s\n",
+ ibv_event_type_str(event.event_type));
+
+ switch (event.event_type) {
+ case IBV_EVENT_QP_FATAL:
+ rqpair = event.element.qp->qp_context;
+ spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0,
+ (uintptr_t)rqpair->cm_id, event.event_type);
+ spdk_nvmf_rdma_update_ibv_state(rqpair);
+ spdk_nvmf_rdma_qpair_inc_refcnt(rqpair);
+ spdk_thread_send_msg(rqpair->qpair.group->thread, _spdk_nvmf_rdma_qp_error, rqpair);
+ break;
+ case IBV_EVENT_QP_LAST_WQE_REACHED:
+ rqpair = event.element.qp->qp_context;
+ spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0,
+ (uintptr_t)rqpair->cm_id, event.event_type);
+ spdk_nvmf_rdma_update_ibv_state(rqpair);
+ spdk_nvmf_rdma_qpair_inc_refcnt(rqpair);
+ spdk_thread_send_msg(rqpair->qpair.group->thread, _spdk_nvmf_rdma_qp_last_wqe, rqpair);
+ break;
+ case IBV_EVENT_SQ_DRAINED:
+ /* This event occurs frequently in both error and non-error states.
+ * Check if the qpair is in an error state before sending a message.
+ * Note that we're not on the correct thread to access the qpair, but
+ * the operations that the below calls make all happen to be thread
+ * safe. */
+ rqpair = event.element.qp->qp_context;
+ spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0,
+ (uintptr_t)rqpair->cm_id, event.event_type);
+ state = spdk_nvmf_rdma_update_ibv_state(rqpair);
+ if (state == IBV_QPS_ERR) {
+ spdk_nvmf_rdma_qpair_inc_refcnt(rqpair);
+ spdk_thread_send_msg(rqpair->qpair.group->thread, _spdk_nvmf_rdma_qp_error, rqpair);
+ }
+ break;
+ case IBV_EVENT_QP_REQ_ERR:
+ case IBV_EVENT_QP_ACCESS_ERR:
+ case IBV_EVENT_COMM_EST:
+ case IBV_EVENT_PATH_MIG:
+ case IBV_EVENT_PATH_MIG_ERR:
+ rqpair = event.element.qp->qp_context;
+ spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0,
+ (uintptr_t)rqpair->cm_id, event.event_type);
+ spdk_nvmf_rdma_update_ibv_state(rqpair);
+ break;
+ case IBV_EVENT_CQ_ERR:
+ case IBV_EVENT_DEVICE_FATAL:
+ case IBV_EVENT_PORT_ACTIVE:
+ case IBV_EVENT_PORT_ERR:
+ case IBV_EVENT_LID_CHANGE:
+ case IBV_EVENT_PKEY_CHANGE:
+ case IBV_EVENT_SM_CHANGE:
+ case IBV_EVENT_SRQ_ERR:
+ case IBV_EVENT_SRQ_LIMIT_REACHED:
+ case IBV_EVENT_CLIENT_REREGISTER:
+ case IBV_EVENT_GID_CHANGE:
+ default:
+ spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0, 0, event.event_type);
+ break;
+ }
+ ibv_ack_async_event(&event);
+}
+
+static void
+spdk_nvmf_rdma_accept(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn)
+{
+ int nfds, i = 0;
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_device *device, *tmp;
+
+ rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+ nfds = poll(rtransport->poll_fds, rtransport->npoll_fds, 0);
+
+ if (nfds <= 0) {
+ return;
+ }
+
+ /* The first poll descriptor is RDMA CM event */
+ if (rtransport->poll_fds[i++].revents & POLLIN) {
+ spdk_nvmf_process_cm_event(transport, cb_fn);
+ nfds--;
+ }
+
+ if (nfds == 0) {
+ return;
+ }
+
+ /* Second and subsequent poll descriptors are IB async events */
+ TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, tmp) {
+ if (rtransport->poll_fds[i++].revents & POLLIN) {
+ spdk_nvmf_process_ib_event(device);
+ nfds--;
+ }
+ }
+ /* check all flagged fd's have been served */
+ assert(nfds == 0);
+}
+
+static void
+spdk_nvmf_rdma_discover(struct spdk_nvmf_transport *transport,
+ struct spdk_nvme_transport_id *trid,
+ struct spdk_nvmf_discovery_log_page_entry *entry)
+{
+ entry->trtype = SPDK_NVMF_TRTYPE_RDMA;
+ entry->adrfam = trid->adrfam;
+ entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_SPECIFIED;
+
+ spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' ');
+ spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' ');
+
+ entry->tsas.rdma.rdma_qptype = SPDK_NVMF_RDMA_QPTYPE_RELIABLE_CONNECTED;
+ entry->tsas.rdma.rdma_prtype = SPDK_NVMF_RDMA_PRTYPE_NONE;
+ entry->tsas.rdma.rdma_cms = SPDK_NVMF_RDMA_CMS_RDMA_CM;
+}
+
+static struct spdk_nvmf_transport_poll_group *
+spdk_nvmf_rdma_poll_group_create(struct spdk_nvmf_transport *transport)
+{
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_poll_group *rgroup;
+ struct spdk_nvmf_rdma_poller *poller;
+ struct spdk_nvmf_rdma_device *device;
+
+ rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+
+ rgroup = calloc(1, sizeof(*rgroup));
+ if (!rgroup) {
+ return NULL;
+ }
+
+ TAILQ_INIT(&rgroup->pollers);
+
+ pthread_mutex_lock(&rtransport->lock);
+ TAILQ_FOREACH(device, &rtransport->devices, link) {
+ poller = calloc(1, sizeof(*poller));
+ if (!poller) {
+ SPDK_ERRLOG("Unable to allocate memory for new RDMA poller\n");
+ free(rgroup);
+ pthread_mutex_unlock(&rtransport->lock);
+ return NULL;
+ }
+
+ poller->device = device;
+ poller->group = rgroup;
+
+ TAILQ_INIT(&poller->qpairs);
+
+ poller->cq = ibv_create_cq(device->context, NVMF_RDMA_CQ_SIZE, poller, NULL, 0);
+ if (!poller->cq) {
+ SPDK_ERRLOG("Unable to create completion queue\n");
+ free(poller);
+ free(rgroup);
+ pthread_mutex_unlock(&rtransport->lock);
+ return NULL;
+ }
+
+ TAILQ_INSERT_TAIL(&rgroup->pollers, poller, link);
+ }
+
+ pthread_mutex_unlock(&rtransport->lock);
+ return &rgroup->group;
+}
+
+static void
+spdk_nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
+{
+ struct spdk_nvmf_rdma_poll_group *rgroup;
+ struct spdk_nvmf_rdma_poller *poller, *tmp;
+ struct spdk_nvmf_rdma_qpair *qpair, *tmp_qpair;
+
+ rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group);
+
+ if (!rgroup) {
+ return;
+ }
+
+ TAILQ_FOREACH_SAFE(poller, &rgroup->pollers, link, tmp) {
+ TAILQ_REMOVE(&rgroup->pollers, poller, link);
+
+ if (poller->cq) {
+ ibv_destroy_cq(poller->cq);
+ }
+ TAILQ_FOREACH_SAFE(qpair, &poller->qpairs, link, tmp_qpair) {
+ _spdk_nvmf_rdma_qp_cleanup_all_states(qpair);
+ spdk_nvmf_rdma_qpair_destroy(qpair);
+ }
+
+ free(poller);
+ }
+
+ free(rgroup);
+}
+
+static int
+spdk_nvmf_rdma_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_poll_group *rgroup;
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ struct spdk_nvmf_rdma_device *device;
+ struct spdk_nvmf_rdma_poller *poller;
+ int rc;
+
+ rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
+ rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group);
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ device = rqpair->port->device;
+
+ TAILQ_FOREACH(poller, &rgroup->pollers, link) {
+ if (poller->device == device) {
+ break;
+ }
+ }
+
+ if (!poller) {
+ SPDK_ERRLOG("No poller found for device.\n");
+ return -1;
+ }
+
+ TAILQ_INSERT_TAIL(&poller->qpairs, rqpair, link);
+ rqpair->poller = poller;
+
+ rc = spdk_nvmf_rdma_qpair_initialize(qpair);
+ if (rc < 0) {
+ SPDK_ERRLOG("Failed to initialize nvmf_rdma_qpair with qpair=%p\n", qpair);
+ return -1;
+ }
+
+ rqpair->mgmt_channel = spdk_get_io_channel(rtransport);
+ if (!rqpair->mgmt_channel) {
+ spdk_nvmf_rdma_event_reject(rqpair->cm_id, SPDK_NVMF_RDMA_ERROR_NO_RESOURCES);
+ spdk_nvmf_rdma_qpair_destroy(rqpair);
+ return -1;
+ }
+
+ rqpair->ch = spdk_io_channel_get_ctx(rqpair->mgmt_channel);
+ assert(rqpair->ch != NULL);
+
+ rc = spdk_nvmf_rdma_event_accept(rqpair->cm_id, rqpair);
+ if (rc) {
+ /* Try to reject, but we probably can't */
+ spdk_nvmf_rdma_event_reject(rqpair->cm_id, SPDK_NVMF_RDMA_ERROR_NO_RESOURCES);
+ spdk_nvmf_rdma_qpair_destroy(rqpair);
+ return -1;
+ }
+
+ spdk_nvmf_rdma_update_ibv_state(rqpair);
+
+ return 0;
+}
+
+static int
+spdk_nvmf_rdma_request_free(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_rdma_request *rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req);
+ struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(req->qpair->transport,
+ struct spdk_nvmf_rdma_transport, transport);
+
+ if (rdma_req->data_from_pool) {
+ /* Put the buffer/s back in the pool */
+ for (uint32_t i = 0; i < rdma_req->req.iovcnt; i++) {
+ spdk_mempool_put(rtransport->data_buf_pool, rdma_req->data.buffers[i]);
+ rdma_req->req.iov[i].iov_base = NULL;
+ rdma_req->data.buffers[i] = NULL;
+ }
+ rdma_req->data_from_pool = false;
+ }
+ rdma_req->req.length = 0;
+ rdma_req->req.iovcnt = 0;
+ rdma_req->req.data = NULL;
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_FREE);
+ return 0;
+}
+
+static int
+spdk_nvmf_rdma_request_complete(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(req->qpair->transport,
+ struct spdk_nvmf_rdma_transport, transport);
+ struct spdk_nvmf_rdma_request *rdma_req = SPDK_CONTAINEROF(req,
+ struct spdk_nvmf_rdma_request, req);
+ struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair,
+ struct spdk_nvmf_rdma_qpair, qpair);
+
+ if (rqpair->ibv_attr.qp_state != IBV_QPS_ERR) {
+ /* The connection is alive, so process the request as normal */
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_EXECUTED);
+ } else {
+ /* The connection is dead. Move the request directly to the completed state. */
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
+ }
+
+ spdk_nvmf_rdma_request_process(rtransport, rdma_req);
+
+ if (rqpair->qpair.state == SPDK_NVMF_QPAIR_ACTIVE && rqpair->ibv_attr.qp_state == IBV_QPS_ERR) {
+ /* If the NVMe-oF layer thinks the connection is active, but the RDMA layer thinks
+ * the connection is dead, perform error recovery. */
+ spdk_nvmf_rdma_qpair_recover(rqpair);
+ }
+
+ return 0;
+}
+
+static void
+spdk_nvmf_rdma_close_qpair(struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ spdk_nvmf_rdma_qpair_destroy(rqpair);
+}
+
+static struct spdk_nvmf_rdma_request *
+get_rdma_req_from_wc(struct ibv_wc *wc)
+{
+ struct spdk_nvmf_rdma_request *rdma_req;
+
+ rdma_req = (struct spdk_nvmf_rdma_request *)wc->wr_id;
+ assert(rdma_req != NULL);
+
+#ifdef DEBUG
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ assert(rdma_req - rqpair->reqs >= 0);
+ assert(rdma_req - rqpair->reqs < (ptrdiff_t)rqpair->max_queue_depth);
+#endif
+
+ return rdma_req;
+}
+
+static struct spdk_nvmf_rdma_recv *
+get_rdma_recv_from_wc(struct ibv_wc *wc)
+{
+ struct spdk_nvmf_rdma_recv *rdma_recv;
+
+ assert(wc->byte_len >= sizeof(struct spdk_nvmf_capsule_cmd));
+
+ rdma_recv = (struct spdk_nvmf_rdma_recv *)wc->wr_id;
+ assert(rdma_recv != NULL);
+
+#ifdef DEBUG
+ struct spdk_nvmf_rdma_qpair *rqpair = rdma_recv->qpair;
+
+ assert(rdma_recv - rqpair->recvs >= 0);
+ assert(rdma_recv - rqpair->recvs < (ptrdiff_t)rqpair->max_queue_depth);
+#endif
+
+ return rdma_recv;
+}
+
+#ifdef DEBUG
+static int
+spdk_nvmf_rdma_req_is_completing(struct spdk_nvmf_rdma_request *rdma_req)
+{
+ return rdma_req->state == RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST ||
+ rdma_req->state == RDMA_REQUEST_STATE_COMPLETING;
+}
+#endif
+
+static int
+spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,
+ struct spdk_nvmf_rdma_poller *rpoller)
+{
+ struct ibv_wc wc[32];
+ struct spdk_nvmf_rdma_request *rdma_req;
+ struct spdk_nvmf_rdma_recv *rdma_recv;
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ int reaped, i;
+ int count = 0;
+ bool error = false;
+
+ /* Poll for completing operations. */
+ reaped = ibv_poll_cq(rpoller->cq, 32, wc);
+ if (reaped < 0) {
+ SPDK_ERRLOG("Error polling CQ! (%d): %s\n",
+ errno, spdk_strerror(errno));
+ return -1;
+ }
+
+ for (i = 0; i < reaped; i++) {
+ /* Handle error conditions */
+ if (wc[i].status) {
+ SPDK_WARNLOG("CQ error on CQ %p, Request 0x%lu (%d): %s\n",
+ rpoller->cq, wc[i].wr_id, wc[i].status, ibv_wc_status_str(wc[i].status));
+ error = true;
+
+ switch (wc[i].opcode) {
+ case IBV_WC_SEND:
+ rdma_req = get_rdma_req_from_wc(&wc[i]);
+ rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ /* We're going to attempt an error recovery, so force the request into
+ * the completed state. */
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
+ spdk_nvmf_rdma_request_process(rtransport, rdma_req);
+ break;
+ case IBV_WC_RECV:
+ rdma_recv = get_rdma_recv_from_wc(&wc[i]);
+ rqpair = rdma_recv->qpair;
+
+ /* Dump this into the incoming queue. This gets cleaned up when
+ * the queue pair disconnects or recovers. */
+ TAILQ_INSERT_TAIL(&rqpair->incoming_queue, rdma_recv, link);
+ break;
+ case IBV_WC_RDMA_WRITE:
+ case IBV_WC_RDMA_READ:
+ /* If the data transfer fails still force the queue into the error state,
+ * but the rdma_req objects should only be manipulated in response to
+ * SEND and RECV operations. */
+ rdma_req = get_rdma_req_from_wc(&wc[i]);
+ rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+ break;
+ default:
+ SPDK_ERRLOG("Received an unknown opcode on the CQ: %d\n", wc[i].opcode);
+ continue;
+ }
+
+ /* Set the qpair to the error state. This will initiate a recovery. */
+ spdk_nvmf_rdma_set_ibv_state(rqpair, IBV_QPS_ERR);
+ continue;
+ }
+
+ switch (wc[i].opcode) {
+ case IBV_WC_SEND:
+ rdma_req = get_rdma_req_from_wc(&wc[i]);
+ rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ assert(spdk_nvmf_rdma_req_is_completing(rdma_req));
+
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
+ spdk_nvmf_rdma_request_process(rtransport, rdma_req);
+
+ count++;
+
+ /* Try to process other queued requests */
+ spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair);
+ break;
+
+ case IBV_WC_RDMA_WRITE:
+ rdma_req = get_rdma_req_from_wc(&wc[i]);
+ rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ /* Try to process other queued requests */
+ spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair);
+ break;
+
+ case IBV_WC_RDMA_READ:
+ rdma_req = get_rdma_req_from_wc(&wc[i]);
+ rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ assert(rdma_req->state == RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
+ spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_READY_TO_EXECUTE);
+ spdk_nvmf_rdma_request_process(rtransport, rdma_req);
+
+ /* Try to process other queued requests */
+ spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair);
+ break;
+
+ case IBV_WC_RECV:
+ rdma_recv = get_rdma_recv_from_wc(&wc[i]);
+ rqpair = rdma_recv->qpair;
+
+ TAILQ_INSERT_TAIL(&rqpair->incoming_queue, rdma_recv, link);
+ /* Try to process other queued requests */
+ spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair);
+ break;
+
+ default:
+ SPDK_ERRLOG("Received an unknown opcode on the CQ: %d\n", wc[i].opcode);
+ continue;
+ }
+ }
+
+ if (error == true) {
+ return -1;
+ }
+
+ return count;
+}
+
+static int
+spdk_nvmf_rdma_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
+{
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_poll_group *rgroup;
+ struct spdk_nvmf_rdma_poller *rpoller;
+ int count, rc;
+
+ rtransport = SPDK_CONTAINEROF(group->transport, struct spdk_nvmf_rdma_transport, transport);
+ rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group);
+
+ count = 0;
+ TAILQ_FOREACH(rpoller, &rgroup->pollers, link) {
+ rc = spdk_nvmf_rdma_poller_poll(rtransport, rpoller);
+ if (rc < 0) {
+ return rc;
+ }
+ count += rc;
+ }
+
+ return count;
+}
+
+static int
+spdk_nvmf_rdma_trid_from_cm_id(struct rdma_cm_id *id,
+ struct spdk_nvme_transport_id *trid,
+ bool peer)
+{
+ struct sockaddr *saddr;
+ uint16_t port;
+
+ trid->trtype = SPDK_NVME_TRANSPORT_RDMA;
+
+ if (peer) {
+ saddr = rdma_get_peer_addr(id);
+ } else {
+ saddr = rdma_get_local_addr(id);
+ }
+ switch (saddr->sa_family) {
+ case AF_INET: {
+ struct sockaddr_in *saddr_in = (struct sockaddr_in *)saddr;
+
+ trid->adrfam = SPDK_NVMF_ADRFAM_IPV4;
+ inet_ntop(AF_INET, &saddr_in->sin_addr,
+ trid->traddr, sizeof(trid->traddr));
+ if (peer) {
+ port = ntohs(rdma_get_dst_port(id));
+ } else {
+ port = ntohs(rdma_get_src_port(id));
+ }
+ snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%u", port);
+ break;
+ }
+ case AF_INET6: {
+ struct sockaddr_in6 *saddr_in = (struct sockaddr_in6 *)saddr;
+ trid->adrfam = SPDK_NVMF_ADRFAM_IPV6;
+ inet_ntop(AF_INET6, &saddr_in->sin6_addr,
+ trid->traddr, sizeof(trid->traddr));
+ if (peer) {
+ port = ntohs(rdma_get_dst_port(id));
+ } else {
+ port = ntohs(rdma_get_src_port(id));
+ }
+ snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%u", port);
+ break;
+ }
+ default:
+ return -1;
+
+ }
+
+ return 0;
+}
+
+static int
+spdk_nvmf_rdma_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair;
+
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ return spdk_nvmf_rdma_trid_from_cm_id(rqpair->cm_id, trid, true);
+}
+
+static int
+spdk_nvmf_rdma_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair;
+
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ return spdk_nvmf_rdma_trid_from_cm_id(rqpair->cm_id, trid, false);
+}
+
+static int
+spdk_nvmf_rdma_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair;
+
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ return spdk_nvmf_rdma_trid_from_cm_id(rqpair->listen_id, trid, false);
+}
+
+const struct spdk_nvmf_transport_ops spdk_nvmf_transport_rdma = {
+ .type = SPDK_NVME_TRANSPORT_RDMA,
+ .opts_init = spdk_nvmf_rdma_opts_init,
+ .create = spdk_nvmf_rdma_create,
+ .destroy = spdk_nvmf_rdma_destroy,
+
+ .listen = spdk_nvmf_rdma_listen,
+ .stop_listen = spdk_nvmf_rdma_stop_listen,
+ .accept = spdk_nvmf_rdma_accept,
+
+ .listener_discover = spdk_nvmf_rdma_discover,
+
+ .poll_group_create = spdk_nvmf_rdma_poll_group_create,
+ .poll_group_destroy = spdk_nvmf_rdma_poll_group_destroy,
+ .poll_group_add = spdk_nvmf_rdma_poll_group_add,
+ .poll_group_poll = spdk_nvmf_rdma_poll_group_poll,
+
+ .req_free = spdk_nvmf_rdma_request_free,
+ .req_complete = spdk_nvmf_rdma_request_complete,
+
+ .qpair_fini = spdk_nvmf_rdma_close_qpair,
+ .qpair_is_idle = spdk_nvmf_rdma_qpair_is_idle,
+ .qpair_get_peer_trid = spdk_nvmf_rdma_qpair_get_peer_trid,
+ .qpair_get_local_trid = spdk_nvmf_rdma_qpair_get_local_trid,
+ .qpair_get_listen_trid = spdk_nvmf_rdma_qpair_get_listen_trid,
+
+};
+
+SPDK_LOG_REGISTER_COMPONENT("rdma", SPDK_LOG_RDMA)
diff --git a/src/spdk/lib/nvmf/request.c b/src/spdk/lib/nvmf/request.c
new file mode 100644
index 00000000..88b6b9a9
--- /dev/null
+++ b/src/spdk/lib/nvmf/request.c
@@ -0,0 +1,190 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+#include "spdk/thread.h"
+#include "spdk/likely.h"
+#include "spdk/nvme.h"
+#include "spdk/nvmf_spec.h"
+#include "spdk/trace.h"
+
+#include "spdk_internal/assert.h"
+#include "spdk_internal/log.h"
+
+static void
+spdk_nvmf_qpair_request_cleanup(struct spdk_nvmf_qpair *qpair)
+{
+ if (qpair->state == SPDK_NVMF_QPAIR_DEACTIVATING) {
+ assert(qpair->state_cb != NULL);
+
+ if (TAILQ_EMPTY(&qpair->outstanding)) {
+ qpair->state_cb(qpair->state_cb_arg, 0);
+ }
+ } else {
+ assert(qpair->state == SPDK_NVMF_QPAIR_ACTIVE);
+ }
+}
+
+int
+spdk_nvmf_request_free(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+
+ TAILQ_REMOVE(&qpair->outstanding, req, link);
+ if (spdk_nvmf_transport_req_free(req)) {
+ SPDK_ERRLOG("Unable to free transport level request resources.\n");
+ }
+
+ spdk_nvmf_qpair_request_cleanup(qpair);
+
+ return 0;
+}
+
+int
+spdk_nvmf_request_complete(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ struct spdk_nvmf_qpair *qpair;
+
+ rsp->sqid = 0;
+ rsp->status.p = 0;
+ rsp->cid = req->cmd->nvme_cmd.cid;
+
+ qpair = req->qpair;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF,
+ "cpl: cid=%u cdw0=0x%08x rsvd1=%u status=0x%04x\n",
+ rsp->cid, rsp->cdw0, rsp->rsvd1,
+ *(uint16_t *)&rsp->status);
+
+ TAILQ_REMOVE(&qpair->outstanding, req, link);
+ if (spdk_nvmf_transport_req_complete(req)) {
+ SPDK_ERRLOG("Transport request completion error!\n");
+ }
+
+ spdk_nvmf_qpair_request_cleanup(qpair);
+
+ return 0;
+}
+
+static void
+nvmf_trace_command(union nvmf_h2c_msg *h2c_msg, bool is_admin_queue)
+{
+ struct spdk_nvmf_capsule_cmd *cap_hdr = &h2c_msg->nvmf_cmd;
+ struct spdk_nvme_cmd *cmd = &h2c_msg->nvme_cmd;
+ struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
+ uint8_t opc;
+
+ if (cmd->opc == SPDK_NVME_OPC_FABRIC) {
+ opc = cap_hdr->fctype;
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "%s Fabrics cmd: fctype 0x%02x cid %u\n",
+ is_admin_queue ? "Admin" : "I/O",
+ cap_hdr->fctype, cap_hdr->cid);
+ } else {
+ opc = cmd->opc;
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "%s cmd: opc 0x%02x fuse %u cid %u nsid %u cdw10 0x%08x\n",
+ is_admin_queue ? "Admin" : "I/O",
+ cmd->opc, cmd->fuse, cmd->cid, cmd->nsid, cmd->cdw10);
+ if (cmd->mptr) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "mptr 0x%" PRIx64 "\n", cmd->mptr);
+ }
+ if (cmd->psdt != SPDK_NVME_PSDT_SGL_MPTR_CONTIG &&
+ cmd->psdt != SPDK_NVME_PSDT_SGL_MPTR_SGL) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "psdt %u\n", cmd->psdt);
+ }
+ }
+
+ if (spdk_nvme_opc_get_data_transfer(opc) != SPDK_NVME_DATA_NONE) {
+ if (sgl->generic.type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF,
+ "SGL: Keyed%s: addr 0x%" PRIx64 " key 0x%x len 0x%x\n",
+ sgl->generic.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY ? " (Inv)" : "",
+ sgl->address, sgl->keyed.key, sgl->keyed.length);
+ } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "SGL: Data block: %s 0x%" PRIx64 " len 0x%x\n",
+ sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET ? "offs" : "addr",
+ sgl->address, sgl->unkeyed.length);
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "SGL type 0x%x subtype 0x%x\n",
+ sgl->generic.type, sgl->generic.subtype);
+ }
+ }
+}
+
+void
+spdk_nvmf_request_exec(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ spdk_nvmf_request_exec_status status;
+
+ nvmf_trace_command(req->cmd, spdk_nvmf_qpair_is_admin_queue(qpair));
+
+ if (qpair->state != SPDK_NVMF_QPAIR_ACTIVE) {
+ req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+ /* Place the request on the outstanding list so we can keep track of it */
+ TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
+ spdk_nvmf_request_complete(req);
+ return;
+ }
+
+ /* Check if the subsystem is paused (if there is a subsystem) */
+ if (qpair->ctrlr) {
+ struct spdk_nvmf_subsystem_poll_group *sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
+ if (sgroup->state != SPDK_NVMF_SUBSYSTEM_ACTIVE) {
+ /* The subsystem is not currently active. Queue this request. */
+ TAILQ_INSERT_TAIL(&sgroup->queued, req, link);
+ return;
+ }
+
+ }
+
+ /* Place the request on the outstanding list so we can keep track of it */
+ TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
+
+ if (spdk_unlikely(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC)) {
+ status = spdk_nvmf_ctrlr_process_fabrics_cmd(req);
+ } else if (spdk_unlikely(spdk_nvmf_qpair_is_admin_queue(qpair))) {
+ status = spdk_nvmf_ctrlr_process_admin_cmd(req);
+ } else {
+ status = spdk_nvmf_ctrlr_process_io_cmd(req);
+ }
+
+ if (status == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
+ spdk_nvmf_request_complete(req);
+ }
+}
diff --git a/src/spdk/lib/nvmf/subsystem.c b/src/spdk/lib/nvmf/subsystem.c
new file mode 100644
index 00000000..9e28f3c6
--- /dev/null
+++ b/src/spdk/lib/nvmf/subsystem.c
@@ -0,0 +1,1269 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+#include "spdk/event.h"
+#include "spdk/likely.h"
+#include "spdk/string.h"
+#include "spdk/trace.h"
+#include "spdk/nvmf_spec.h"
+#include "spdk/uuid.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+#include "spdk_internal/utf.h"
+
+/*
+ * States for parsing valid domains in NQNs according to RFC 1034
+ */
+enum spdk_nvmf_nqn_domain_states {
+ /* First character of a domain must be a letter */
+ SPDK_NVMF_DOMAIN_ACCEPT_LETTER = 0,
+
+ /* Subsequent characters can be any of letter, digit, or hyphen */
+ SPDK_NVMF_DOMAIN_ACCEPT_LDH = 1,
+
+ /* A domain label must end with either a letter or digit */
+ SPDK_NVMF_DOMAIN_ACCEPT_ANY = 2
+};
+
+/* Returns true if is a valid ASCII string as defined by the NVMe spec */
+static bool
+spdk_nvmf_valid_ascii_string(const void *buf, size_t size)
+{
+ const uint8_t *str = buf;
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ if (str[i] < 0x20 || str[i] > 0x7E) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool
+spdk_nvmf_valid_nqn(const char *nqn)
+{
+ size_t len;
+ struct spdk_uuid uuid_value;
+ uint32_t i;
+ int bytes_consumed;
+ uint32_t domain_label_length;
+ char *reverse_domain_end;
+ uint32_t reverse_domain_end_index;
+ enum spdk_nvmf_nqn_domain_states domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LETTER;
+
+ /* Check for length requirements */
+ len = strlen(nqn);
+ if (len > SPDK_NVMF_NQN_MAX_LEN) {
+ SPDK_ERRLOG("Invalid NQN \"%s\": length %zu > max %d\n", nqn, len, SPDK_NVMF_NQN_MAX_LEN);
+ return false;
+ }
+
+ /* The nqn must be at least as long as SPDK_NVMF_NQN_MIN_LEN to contain the necessary prefix. */
+ if (len < SPDK_NVMF_NQN_MIN_LEN) {
+ SPDK_ERRLOG("Invalid NQN \"%s\": length %zu < min %d\n", nqn, len, SPDK_NVMF_NQN_MIN_LEN);
+ return false;
+ }
+
+ /* Check for discovery controller nqn */
+ if (!strcmp(nqn, SPDK_NVMF_DISCOVERY_NQN)) {
+ return true;
+ }
+
+ /* Check for equality with the generic nqn structure of the form "nqn.2014-08.org.nvmexpress:uuid:11111111-2222-3333-4444-555555555555" */
+ if (!strncmp(nqn, SPDK_NVMF_NQN_UUID_PRE, SPDK_NVMF_NQN_UUID_PRE_LEN)) {
+ if (len != SPDK_NVMF_NQN_UUID_PRE_LEN + SPDK_NVMF_UUID_STRING_LEN) {
+ SPDK_ERRLOG("Invalid NQN \"%s\": uuid is not the correct length\n", nqn);
+ return false;
+ }
+
+ if (spdk_uuid_parse(&uuid_value, &nqn[SPDK_NVMF_NQN_UUID_PRE_LEN])) {
+ SPDK_ERRLOG("Invalid NQN \"%s\": uuid is not formatted correctly\n", nqn);
+ return false;
+ }
+ return true;
+ }
+
+ /* If the nqn does not match the uuid structure, the next several checks validate the form "nqn.yyyy-mm.reverse.domain:user-string" */
+
+ if (strncmp(nqn, "nqn.", 4) != 0) {
+ SPDK_ERRLOG("Invalid NQN \"%s\": NQN must begin with \"nqn.\".\n", nqn);
+ return false;
+ }
+
+ /* Check for yyyy-mm. */
+ if (!(isdigit(nqn[4]) && isdigit(nqn[5]) && isdigit(nqn[6]) && isdigit(nqn[7]) &&
+ nqn[8] == '-' && isdigit(nqn[9]) && isdigit(nqn[10]) && nqn[11] == '.')) {
+ SPDK_ERRLOG("Invalid date code in NQN \"%s\"\n", nqn);
+ return false;
+ }
+
+ reverse_domain_end = strchr(nqn, ':');
+ if (reverse_domain_end != NULL && (reverse_domain_end_index = reverse_domain_end - nqn) < len - 1) {
+ } else {
+ SPDK_ERRLOG("Invalid NQN \"%s\". NQN must contain user specified name with a ':' as a prefix.\n",
+ nqn);
+ return false;
+ }
+
+ /* Check for valid reverse domain */
+ domain_label_length = 0;
+ for (i = 12; i < reverse_domain_end_index; i++) {
+ if (domain_label_length > SPDK_DOMAIN_LABEL_MAX_LEN) {
+ SPDK_ERRLOG("Invalid domain name in NQN \"%s\". At least one Label is too long.\n", nqn);
+ return false;
+ }
+
+ switch (domain_state) {
+
+ case SPDK_NVMF_DOMAIN_ACCEPT_LETTER: {
+ if (isalpha(nqn[i])) {
+ domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
+ domain_label_length++;
+ break;
+ } else {
+ SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must start with a letter.\n", nqn);
+ return false;
+ }
+ }
+
+ case SPDK_NVMF_DOMAIN_ACCEPT_LDH: {
+ if (isalpha(nqn[i]) || isdigit(nqn[i])) {
+ domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
+ domain_label_length++;
+ break;
+ } else if (nqn[i] == '-') {
+ if (i == reverse_domain_end_index - 1) {
+ SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
+ nqn);
+ return false;
+ }
+ domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LDH;
+ domain_label_length++;
+ break;
+ } else if (nqn[i] == '.') {
+ SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
+ nqn);
+ return false;
+ } else {
+ SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only [a-z,A-Z,0-9,'-','.'].\n",
+ nqn);
+ return false;
+ }
+ }
+
+ case SPDK_NVMF_DOMAIN_ACCEPT_ANY: {
+ if (isalpha(nqn[i]) || isdigit(nqn[i])) {
+ domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
+ domain_label_length++;
+ break;
+ } else if (nqn[i] == '-') {
+ if (i == reverse_domain_end_index - 1) {
+ SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
+ nqn);
+ return false;
+ }
+ domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LDH;
+ domain_label_length++;
+ break;
+ } else if (nqn[i] == '.') {
+ domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LETTER;
+ domain_label_length = 0;
+ break;
+ } else {
+ SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only [a-z,A-Z,0-9,'-','.'].\n",
+ nqn);
+ return false;
+ }
+ }
+ }
+ }
+
+ i = reverse_domain_end_index + 1;
+ while (i < len) {
+ bytes_consumed = utf8_valid(&nqn[i], &nqn[len]);
+ if (bytes_consumed <= 0) {
+ SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only valid utf-8.\n", nqn);
+ return false;
+ }
+
+ i += bytes_consumed;
+ }
+ return true;
+}
+
+struct spdk_nvmf_subsystem *
+spdk_nvmf_subsystem_create(struct spdk_nvmf_tgt *tgt,
+ const char *nqn,
+ enum spdk_nvmf_subtype type,
+ uint32_t num_ns)
+{
+ struct spdk_nvmf_subsystem *subsystem;
+ uint32_t sid;
+
+ if (spdk_nvmf_tgt_find_subsystem(tgt, nqn)) {
+ SPDK_ERRLOG("Subsystem NQN '%s' already exists\n", nqn);
+ return NULL;
+ }
+
+ if (!spdk_nvmf_valid_nqn(nqn)) {
+ return NULL;
+ }
+
+ if (type == SPDK_NVMF_SUBTYPE_DISCOVERY && num_ns != 0) {
+ SPDK_ERRLOG("Discovery subsystem cannot have namespaces.\n");
+ return NULL;
+ }
+
+ /* Find a free subsystem id (sid) */
+ for (sid = 0; sid < tgt->opts.max_subsystems; sid++) {
+ if (tgt->subsystems[sid] == NULL) {
+ break;
+ }
+ }
+ if (sid >= tgt->opts.max_subsystems) {
+ return NULL;
+ }
+
+ subsystem = calloc(1, sizeof(struct spdk_nvmf_subsystem));
+ if (subsystem == NULL) {
+ return NULL;
+ }
+
+ subsystem->thread = spdk_get_thread();
+ subsystem->state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
+ subsystem->tgt = tgt;
+ subsystem->id = sid;
+ subsystem->subtype = type;
+ subsystem->max_nsid = num_ns;
+ subsystem->max_allowed_nsid = num_ns;
+ subsystem->next_cntlid = 0;
+ snprintf(subsystem->subnqn, sizeof(subsystem->subnqn), "%s", nqn);
+ TAILQ_INIT(&subsystem->listeners);
+ TAILQ_INIT(&subsystem->hosts);
+ TAILQ_INIT(&subsystem->ctrlrs);
+
+ if (num_ns != 0) {
+ subsystem->ns = calloc(num_ns, sizeof(struct spdk_nvmf_ns *));
+ if (subsystem->ns == NULL) {
+ SPDK_ERRLOG("Namespace memory allocation failed\n");
+ free(subsystem);
+ return NULL;
+ }
+ }
+
+ memset(subsystem->sn, '0', sizeof(subsystem->sn) - 1);
+ subsystem->sn[sizeof(subsystem->sn) - 1] = '\n';
+
+ tgt->subsystems[sid] = subsystem;
+ tgt->discovery_genctr++;
+
+ return subsystem;
+}
+
+static void
+_spdk_nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvmf_host *host)
+{
+ TAILQ_REMOVE(&subsystem->hosts, host, link);
+ free(host->nqn);
+ free(host);
+}
+
+static int _spdk_nvmf_subsystem_remove_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid);
+
+void
+spdk_nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem)
+{
+ struct spdk_nvmf_listener *listener, *listener_tmp;
+ struct spdk_nvmf_host *host, *host_tmp;
+ struct spdk_nvmf_ctrlr *ctrlr, *ctrlr_tmp;
+ struct spdk_nvmf_ns *ns;
+
+ if (!subsystem) {
+ return;
+ }
+
+ assert(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "subsystem is %p\n", subsystem);
+
+ TAILQ_FOREACH_SAFE(listener, &subsystem->listeners, link, listener_tmp) {
+ TAILQ_REMOVE(&subsystem->listeners, listener, link);
+ free(listener);
+ }
+
+ TAILQ_FOREACH_SAFE(host, &subsystem->hosts, link, host_tmp) {
+ _spdk_nvmf_subsystem_remove_host(subsystem, host);
+ }
+
+ TAILQ_FOREACH_SAFE(ctrlr, &subsystem->ctrlrs, link, ctrlr_tmp) {
+ spdk_nvmf_ctrlr_destruct(ctrlr);
+ }
+
+ ns = spdk_nvmf_subsystem_get_first_ns(subsystem);
+ while (ns != NULL) {
+ struct spdk_nvmf_ns *next_ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns);
+
+ _spdk_nvmf_subsystem_remove_ns(subsystem, ns->opts.nsid);
+ ns = next_ns;
+ }
+
+ free(subsystem->ns);
+
+ subsystem->tgt->subsystems[subsystem->id] = NULL;
+ subsystem->tgt->discovery_genctr++;
+
+ free(subsystem);
+}
+
+static int
+spdk_nvmf_subsystem_set_state(struct spdk_nvmf_subsystem *subsystem,
+ enum spdk_nvmf_subsystem_state state)
+{
+ enum spdk_nvmf_subsystem_state actual_old_state, expected_old_state;
+
+ switch (state) {
+ case SPDK_NVMF_SUBSYSTEM_INACTIVE:
+ expected_old_state = SPDK_NVMF_SUBSYSTEM_DEACTIVATING;
+ break;
+ case SPDK_NVMF_SUBSYSTEM_ACTIVATING:
+ expected_old_state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
+ break;
+ case SPDK_NVMF_SUBSYSTEM_ACTIVE:
+ expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
+ break;
+ case SPDK_NVMF_SUBSYSTEM_PAUSING:
+ expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
+ break;
+ case SPDK_NVMF_SUBSYSTEM_PAUSED:
+ expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSING;
+ break;
+ case SPDK_NVMF_SUBSYSTEM_RESUMING:
+ expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSED;
+ break;
+ case SPDK_NVMF_SUBSYSTEM_DEACTIVATING:
+ expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
+ break;
+ default:
+ assert(false);
+ return -1;
+ }
+
+ actual_old_state = __sync_val_compare_and_swap(&subsystem->state, expected_old_state, state);
+ if (actual_old_state != expected_old_state) {
+ if (actual_old_state == SPDK_NVMF_SUBSYSTEM_RESUMING &&
+ state == SPDK_NVMF_SUBSYSTEM_ACTIVE) {
+ expected_old_state = SPDK_NVMF_SUBSYSTEM_RESUMING;
+ }
+ /* This is for the case when activating the subsystem fails. */
+ if (actual_old_state == SPDK_NVMF_SUBSYSTEM_ACTIVATING &&
+ state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING) {
+ expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
+ }
+ actual_old_state = __sync_val_compare_and_swap(&subsystem->state, expected_old_state, state);
+ }
+ assert(actual_old_state == expected_old_state);
+ return actual_old_state - expected_old_state;
+}
+
+struct subsystem_state_change_ctx {
+ struct spdk_nvmf_subsystem *subsystem;
+
+ enum spdk_nvmf_subsystem_state requested_state;
+
+ spdk_nvmf_subsystem_state_change_done cb_fn;
+ void *cb_arg;
+};
+
+static void
+subsystem_state_change_done(struct spdk_io_channel_iter *i, int status)
+{
+ struct subsystem_state_change_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+
+ if (status == 0) {
+ status = spdk_nvmf_subsystem_set_state(ctx->subsystem, ctx->requested_state);
+ if (status) {
+ status = -1;
+ }
+ }
+
+ if (ctx->cb_fn) {
+ ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status);
+ }
+ free(ctx);
+}
+
+static void
+subsystem_state_change_continue(void *ctx, int status)
+{
+ struct spdk_io_channel_iter *i = ctx;
+ spdk_for_each_channel_continue(i, status);
+}
+
+static void
+subsystem_state_change_on_pg(struct spdk_io_channel_iter *i)
+{
+ struct subsystem_state_change_ctx *ctx;
+ struct spdk_io_channel *ch;
+ struct spdk_nvmf_poll_group *group;
+
+ ctx = spdk_io_channel_iter_get_ctx(i);
+ ch = spdk_io_channel_iter_get_channel(i);
+ group = spdk_io_channel_get_ctx(ch);
+
+ switch (ctx->requested_state) {
+ case SPDK_NVMF_SUBSYSTEM_INACTIVE:
+ spdk_nvmf_poll_group_remove_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
+ break;
+ case SPDK_NVMF_SUBSYSTEM_ACTIVE:
+ if (ctx->subsystem->state == SPDK_NVMF_SUBSYSTEM_ACTIVATING) {
+ spdk_nvmf_poll_group_add_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
+ } else if (ctx->subsystem->state == SPDK_NVMF_SUBSYSTEM_RESUMING) {
+ spdk_nvmf_poll_group_resume_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
+ }
+ break;
+ case SPDK_NVMF_SUBSYSTEM_PAUSED:
+ spdk_nvmf_poll_group_pause_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
+ break;
+ default:
+ assert(false);
+ break;
+ }
+}
+
+static int
+spdk_nvmf_subsystem_state_change(struct spdk_nvmf_subsystem *subsystem,
+ enum spdk_nvmf_subsystem_state requested_state,
+ spdk_nvmf_subsystem_state_change_done cb_fn,
+ void *cb_arg)
+{
+ struct subsystem_state_change_ctx *ctx;
+ enum spdk_nvmf_subsystem_state intermediate_state;
+ int rc;
+
+ switch (requested_state) {
+ case SPDK_NVMF_SUBSYSTEM_INACTIVE:
+ intermediate_state = SPDK_NVMF_SUBSYSTEM_DEACTIVATING;
+ break;
+ case SPDK_NVMF_SUBSYSTEM_ACTIVE:
+ if (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED) {
+ intermediate_state = SPDK_NVMF_SUBSYSTEM_RESUMING;
+ } else {
+ intermediate_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
+ }
+ break;
+ case SPDK_NVMF_SUBSYSTEM_PAUSED:
+ intermediate_state = SPDK_NVMF_SUBSYSTEM_PAUSING;
+ break;
+ default:
+ assert(false);
+ return -EINVAL;
+ }
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ return -ENOMEM;
+ }
+
+ rc = spdk_nvmf_subsystem_set_state(subsystem, intermediate_state);
+ if (rc) {
+ free(ctx);
+ return rc;
+ }
+
+ ctx->subsystem = subsystem;
+ ctx->requested_state = requested_state;
+ ctx->cb_fn = cb_fn;
+ ctx->cb_arg = cb_arg;
+
+ spdk_for_each_channel(subsystem->tgt,
+ subsystem_state_change_on_pg,
+ ctx,
+ subsystem_state_change_done);
+
+ return 0;
+}
+
+int
+spdk_nvmf_subsystem_start(struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_subsystem_state_change_done cb_fn,
+ void *cb_arg)
+{
+ return spdk_nvmf_subsystem_state_change(subsystem, SPDK_NVMF_SUBSYSTEM_ACTIVE, cb_fn, cb_arg);
+}
+
+int
+spdk_nvmf_subsystem_stop(struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_subsystem_state_change_done cb_fn,
+ void *cb_arg)
+{
+ return spdk_nvmf_subsystem_state_change(subsystem, SPDK_NVMF_SUBSYSTEM_INACTIVE, cb_fn, cb_arg);
+}
+
+int
+spdk_nvmf_subsystem_pause(struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_subsystem_state_change_done cb_fn,
+ void *cb_arg)
+{
+ return spdk_nvmf_subsystem_state_change(subsystem, SPDK_NVMF_SUBSYSTEM_PAUSED, cb_fn, cb_arg);
+}
+
+int
+spdk_nvmf_subsystem_resume(struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_subsystem_state_change_done cb_fn,
+ void *cb_arg)
+{
+ return spdk_nvmf_subsystem_state_change(subsystem, SPDK_NVMF_SUBSYSTEM_ACTIVE, cb_fn, cb_arg);
+}
+
+struct spdk_nvmf_subsystem *
+spdk_nvmf_subsystem_get_first(struct spdk_nvmf_tgt *tgt)
+{
+ struct spdk_nvmf_subsystem *subsystem;
+ uint32_t sid;
+
+ for (sid = 0; sid < tgt->opts.max_subsystems; sid++) {
+ subsystem = tgt->subsystems[sid];
+ if (subsystem) {
+ return subsystem;
+ }
+ }
+
+ return NULL;
+}
+
+struct spdk_nvmf_subsystem *
+spdk_nvmf_subsystem_get_next(struct spdk_nvmf_subsystem *subsystem)
+{
+ uint32_t sid;
+ struct spdk_nvmf_tgt *tgt;
+
+ if (!subsystem) {
+ return NULL;
+ }
+
+ tgt = subsystem->tgt;
+
+ for (sid = subsystem->id + 1; sid < tgt->opts.max_subsystems; sid++) {
+ subsystem = tgt->subsystems[sid];
+ if (subsystem) {
+ return subsystem;
+ }
+ }
+
+ return NULL;
+}
+
+static struct spdk_nvmf_host *
+_spdk_nvmf_subsystem_find_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
+{
+ struct spdk_nvmf_host *host = NULL;
+
+ TAILQ_FOREACH(host, &subsystem->hosts, link) {
+ if (strcmp(hostnqn, host->nqn) == 0) {
+ return host;
+ }
+ }
+
+ return NULL;
+}
+
+int
+spdk_nvmf_subsystem_add_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
+{
+ struct spdk_nvmf_host *host;
+
+ if (!spdk_nvmf_valid_nqn(hostnqn)) {
+ return -EINVAL;
+ }
+
+ if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+ subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+ return -EAGAIN;
+ }
+
+ if (_spdk_nvmf_subsystem_find_host(subsystem, hostnqn)) {
+ /* This subsystem already allows the specified host. */
+ return 0;
+ }
+
+ host = calloc(1, sizeof(*host));
+ if (!host) {
+ return -ENOMEM;
+ }
+ host->nqn = strdup(hostnqn);
+ if (!host->nqn) {
+ free(host);
+ return -ENOMEM;
+ }
+
+ TAILQ_INSERT_HEAD(&subsystem->hosts, host, link);
+ subsystem->tgt->discovery_genctr++;
+
+ return 0;
+}
+
+int
+spdk_nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
+{
+ struct spdk_nvmf_host *host;
+
+ if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+ subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+ return -EAGAIN;
+ }
+
+ host = _spdk_nvmf_subsystem_find_host(subsystem, hostnqn);
+ if (host == NULL) {
+ return -ENOENT;
+ }
+
+ _spdk_nvmf_subsystem_remove_host(subsystem, host);
+ return 0;
+}
+
+int
+spdk_nvmf_subsystem_set_allow_any_host(struct spdk_nvmf_subsystem *subsystem, bool allow_any_host)
+{
+ if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+ subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+ return -EAGAIN;
+ }
+
+ subsystem->allow_any_host = allow_any_host;
+
+ return 0;
+}
+
+bool
+spdk_nvmf_subsystem_get_allow_any_host(const struct spdk_nvmf_subsystem *subsystem)
+{
+ return subsystem->allow_any_host;
+}
+
+bool
+spdk_nvmf_subsystem_host_allowed(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
+{
+ if (!hostnqn) {
+ return false;
+ }
+
+ if (subsystem->allow_any_host) {
+ return true;
+ }
+
+ return _spdk_nvmf_subsystem_find_host(subsystem, hostnqn) != NULL;
+}
+
+struct spdk_nvmf_host *
+spdk_nvmf_subsystem_get_first_host(struct spdk_nvmf_subsystem *subsystem)
+{
+ return TAILQ_FIRST(&subsystem->hosts);
+}
+
+
+struct spdk_nvmf_host *
+spdk_nvmf_subsystem_get_next_host(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_host *prev_host)
+{
+ return TAILQ_NEXT(prev_host, link);
+}
+
+const char *
+spdk_nvmf_host_get_nqn(struct spdk_nvmf_host *host)
+{
+ return host->nqn;
+}
+
+static struct spdk_nvmf_listener *
+_spdk_nvmf_subsystem_find_listener(struct spdk_nvmf_subsystem *subsystem,
+ const struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_listener *listener;
+
+ TAILQ_FOREACH(listener, &subsystem->listeners, link) {
+ if (spdk_nvme_transport_id_compare(&listener->trid, trid) == 0) {
+ return listener;
+ }
+ }
+
+ return NULL;
+}
+
+int
+spdk_nvmf_subsystem_add_listener(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_transport *transport;
+ struct spdk_nvmf_listener *listener;
+
+ if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+ subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+ return -EAGAIN;
+ }
+
+ if (_spdk_nvmf_subsystem_find_listener(subsystem, trid)) {
+ /* Listener already exists in this subsystem */
+ return 0;
+ }
+
+ transport = spdk_nvmf_tgt_get_transport(subsystem->tgt, trid->trtype);
+ if (transport == NULL) {
+ SPDK_ERRLOG("Unknown transport type %d\n", trid->trtype);
+ return -EINVAL;
+ }
+
+ listener = calloc(1, sizeof(*listener));
+ if (!listener) {
+ return -ENOMEM;
+ }
+
+ listener->trid = *trid;
+ listener->transport = transport;
+
+ TAILQ_INSERT_HEAD(&subsystem->listeners, listener, link);
+
+ return 0;
+}
+
+int
+spdk_nvmf_subsystem_remove_listener(struct spdk_nvmf_subsystem *subsystem,
+ const struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_listener *listener;
+
+ if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+ subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+ return -EAGAIN;
+ }
+
+ listener = _spdk_nvmf_subsystem_find_listener(subsystem, trid);
+ if (listener == NULL) {
+ return -ENOENT;
+ }
+
+ TAILQ_REMOVE(&subsystem->listeners, listener, link);
+ free(listener);
+
+ return 0;
+}
+
+bool
+spdk_nvmf_subsystem_listener_allowed(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_listener *listener;
+
+ if (!strcmp(subsystem->subnqn, SPDK_NVMF_DISCOVERY_NQN)) {
+ return true;
+ }
+
+ TAILQ_FOREACH(listener, &subsystem->listeners, link) {
+ if (spdk_nvme_transport_id_compare(&listener->trid, trid) == 0) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+struct spdk_nvmf_listener *
+spdk_nvmf_subsystem_get_first_listener(struct spdk_nvmf_subsystem *subsystem)
+{
+ return TAILQ_FIRST(&subsystem->listeners);
+}
+
+struct spdk_nvmf_listener *
+spdk_nvmf_subsystem_get_next_listener(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_listener *prev_listener)
+{
+ return TAILQ_NEXT(prev_listener, link);
+}
+
+const struct spdk_nvme_transport_id *
+spdk_nvmf_listener_get_trid(struct spdk_nvmf_listener *listener)
+{
+ return &listener->trid;
+}
+
+struct subsystem_update_ns_ctx {
+ struct spdk_nvmf_subsystem *subsystem;
+
+ spdk_nvmf_subsystem_state_change_done cb_fn;
+ void *cb_arg;
+};
+
+static void
+subsystem_update_ns_done(struct spdk_io_channel_iter *i, int status)
+{
+ struct subsystem_update_ns_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+
+ if (ctx->cb_fn) {
+ ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status);
+ }
+ free(ctx);
+}
+
+static void
+subsystem_update_ns_on_pg(struct spdk_io_channel_iter *i)
+{
+ int rc;
+ struct subsystem_update_ns_ctx *ctx;
+ struct spdk_nvmf_poll_group *group;
+ struct spdk_nvmf_subsystem *subsystem;
+
+ ctx = spdk_io_channel_iter_get_ctx(i);
+ group = spdk_io_channel_get_ctx(spdk_io_channel_iter_get_channel(i));
+ subsystem = ctx->subsystem;
+
+ rc = spdk_nvmf_poll_group_update_subsystem(group, subsystem);
+ spdk_for_each_channel_continue(i, rc);
+}
+
+static int
+spdk_nvmf_subsystem_update_ns(struct spdk_nvmf_subsystem *subsystem, spdk_channel_for_each_cpl cpl,
+ void *ctx)
+{
+ spdk_for_each_channel(subsystem->tgt,
+ subsystem_update_ns_on_pg,
+ ctx,
+ cpl);
+
+ return 0;
+}
+
+static void
+spdk_nvmf_subsystem_ns_changed(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
+{
+ struct spdk_nvmf_ctrlr *ctrlr;
+
+ TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
+ spdk_nvmf_ctrlr_ns_changed(ctrlr, nsid);
+ }
+}
+
+static int
+_spdk_nvmf_subsystem_remove_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
+{
+ struct spdk_nvmf_ns *ns;
+
+ assert(subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED ||
+ subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE);
+
+ if (nsid == 0 || nsid > subsystem->max_nsid) {
+ return -1;
+ }
+
+ if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+ subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+ return -1;
+ }
+
+ ns = subsystem->ns[nsid - 1];
+ if (!ns) {
+ return -1;
+ }
+
+ subsystem->ns[nsid - 1] = NULL;
+
+ spdk_bdev_module_release_bdev(ns->bdev);
+ spdk_bdev_close(ns->desc);
+ free(ns);
+
+ spdk_nvmf_subsystem_ns_changed(subsystem, nsid);
+
+ return 0;
+}
+
+int
+spdk_nvmf_subsystem_remove_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid,
+ spdk_nvmf_subsystem_state_change_done cb_fn, void *cb_arg)
+{
+ int rc;
+ struct subsystem_update_ns_ctx *ctx;
+
+ rc = _spdk_nvmf_subsystem_remove_ns(subsystem, nsid);
+ if (rc < 0) {
+ return rc;
+ }
+
+ ctx = calloc(1, sizeof(*ctx));
+
+ if (ctx == NULL) {
+ return -ENOMEM;
+ }
+
+ ctx->subsystem = subsystem;
+ ctx->cb_fn = cb_fn;
+ ctx->cb_arg = cb_arg;
+
+ spdk_nvmf_subsystem_update_ns(subsystem, subsystem_update_ns_done, ctx);
+
+ return 0;
+}
+
+static void
+_spdk_nvmf_ns_hot_remove_done(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status)
+{
+ if (status != 0) {
+ SPDK_ERRLOG("Failed to make changes to NVMe-oF subsystem with id %u\n", subsystem->id);
+ }
+ spdk_nvmf_subsystem_resume(subsystem, NULL, NULL);
+}
+
+static void
+_spdk_nvmf_ns_hot_remove(struct spdk_nvmf_subsystem *subsystem,
+ void *cb_arg, int status)
+{
+ struct spdk_nvmf_ns *ns = cb_arg;
+
+ spdk_nvmf_subsystem_remove_ns(subsystem, ns->opts.nsid, _spdk_nvmf_ns_hot_remove_done,
+ subsystem);
+}
+
+static void
+spdk_nvmf_ns_hot_remove(void *remove_ctx)
+{
+ struct spdk_nvmf_ns *ns = remove_ctx;
+ int rc;
+
+ rc = spdk_nvmf_subsystem_pause(ns->subsystem, _spdk_nvmf_ns_hot_remove, ns);
+ if (rc) {
+ SPDK_ERRLOG("Unable to pause subsystem to process namespace removal!\n");
+ }
+}
+
+void
+spdk_nvmf_ns_opts_get_defaults(struct spdk_nvmf_ns_opts *opts, size_t opts_size)
+{
+ /* All current fields are set to 0 by default. */
+ memset(opts, 0, opts_size);
+}
+
+/* Dummy bdev module used to to claim bdevs. */
+static struct spdk_bdev_module ns_bdev_module = {
+ .name = "NVMe-oF Target",
+};
+
+uint32_t
+spdk_nvmf_subsystem_add_ns(struct spdk_nvmf_subsystem *subsystem, struct spdk_bdev *bdev,
+ const struct spdk_nvmf_ns_opts *user_opts, size_t opts_size)
+{
+ struct spdk_nvmf_ns_opts opts;
+ struct spdk_nvmf_ns *ns;
+ int rc;
+
+ if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+ subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+ return 0;
+ }
+
+ spdk_nvmf_ns_opts_get_defaults(&opts, sizeof(opts));
+ if (user_opts) {
+ memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size));
+ }
+
+ if (spdk_mem_all_zero(&opts.uuid, sizeof(opts.uuid))) {
+ opts.uuid = *spdk_bdev_get_uuid(bdev);
+ }
+
+ if (opts.nsid == SPDK_NVME_GLOBAL_NS_TAG) {
+ SPDK_ERRLOG("Invalid NSID %" PRIu32 "\n", opts.nsid);
+ return 0;
+ }
+
+ if (opts.nsid == 0) {
+ /*
+ * NSID not specified - find a free index.
+ *
+ * If no free slots are found, opts.nsid will be subsystem->max_nsid + 1, which will
+ * expand max_nsid if possible.
+ */
+ for (opts.nsid = 1; opts.nsid <= subsystem->max_nsid; opts.nsid++) {
+ if (_spdk_nvmf_subsystem_get_ns(subsystem, opts.nsid) == NULL) {
+ break;
+ }
+ }
+ }
+
+ if (_spdk_nvmf_subsystem_get_ns(subsystem, opts.nsid)) {
+ SPDK_ERRLOG("Requested NSID %" PRIu32 " already in use\n", opts.nsid);
+ return 0;
+ }
+
+ if (opts.nsid > subsystem->max_nsid) {
+ struct spdk_nvmf_ns **new_ns_array;
+
+ /* If MaxNamespaces was specified, we can't extend max_nsid beyond it. */
+ if (subsystem->max_allowed_nsid > 0 && opts.nsid > subsystem->max_allowed_nsid) {
+ SPDK_ERRLOG("Can't extend NSID range above MaxNamespaces\n");
+ return 0;
+ }
+
+ /* If a controller is connected, we can't change NN. */
+ if (!TAILQ_EMPTY(&subsystem->ctrlrs)) {
+ SPDK_ERRLOG("Can't extend NSID range while controllers are connected\n");
+ return 0;
+ }
+
+ new_ns_array = realloc(subsystem->ns, sizeof(struct spdk_nvmf_ns *) * opts.nsid);
+ if (new_ns_array == NULL) {
+ SPDK_ERRLOG("Memory allocation error while resizing namespace array.\n");
+ return 0;
+ }
+
+ memset(new_ns_array + subsystem->max_nsid, 0,
+ sizeof(struct spdk_nvmf_ns *) * (opts.nsid - subsystem->max_nsid));
+ subsystem->ns = new_ns_array;
+ subsystem->max_nsid = opts.nsid;
+ }
+
+ ns = calloc(1, sizeof(*ns));
+ if (ns == NULL) {
+ SPDK_ERRLOG("Namespace allocation failed\n");
+ return 0;
+ }
+
+ ns->bdev = bdev;
+ ns->opts = opts;
+ ns->subsystem = subsystem;
+ rc = spdk_bdev_open(bdev, true, spdk_nvmf_ns_hot_remove, ns, &ns->desc);
+ if (rc != 0) {
+ SPDK_ERRLOG("Subsystem %s: bdev %s cannot be opened, error=%d\n",
+ subsystem->subnqn, spdk_bdev_get_name(bdev), rc);
+ free(ns);
+ return 0;
+ }
+ rc = spdk_bdev_module_claim_bdev(bdev, ns->desc, &ns_bdev_module);
+ if (rc != 0) {
+ spdk_bdev_close(ns->desc);
+ free(ns);
+ return 0;
+ }
+ subsystem->ns[opts.nsid - 1] = ns;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Subsystem %s: bdev %s assigned nsid %" PRIu32 "\n",
+ spdk_nvmf_subsystem_get_nqn(subsystem),
+ spdk_bdev_get_name(bdev),
+ opts.nsid);
+
+ spdk_nvmf_subsystem_ns_changed(subsystem, opts.nsid);
+
+ return opts.nsid;
+}
+
+static uint32_t
+spdk_nvmf_subsystem_get_next_allocated_nsid(struct spdk_nvmf_subsystem *subsystem,
+ uint32_t prev_nsid)
+{
+ uint32_t nsid;
+
+ if (prev_nsid >= subsystem->max_nsid) {
+ return 0;
+ }
+
+ for (nsid = prev_nsid + 1; nsid <= subsystem->max_nsid; nsid++) {
+ if (subsystem->ns[nsid - 1]) {
+ return nsid;
+ }
+ }
+
+ return 0;
+}
+
+struct spdk_nvmf_ns *
+spdk_nvmf_subsystem_get_first_ns(struct spdk_nvmf_subsystem *subsystem)
+{
+ uint32_t first_nsid;
+
+ first_nsid = spdk_nvmf_subsystem_get_next_allocated_nsid(subsystem, 0);
+ return _spdk_nvmf_subsystem_get_ns(subsystem, first_nsid);
+}
+
+struct spdk_nvmf_ns *
+spdk_nvmf_subsystem_get_next_ns(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_ns *prev_ns)
+{
+ uint32_t next_nsid;
+
+ next_nsid = spdk_nvmf_subsystem_get_next_allocated_nsid(subsystem, prev_ns->opts.nsid);
+ return _spdk_nvmf_subsystem_get_ns(subsystem, next_nsid);
+}
+
+struct spdk_nvmf_ns *
+spdk_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
+{
+ return _spdk_nvmf_subsystem_get_ns(subsystem, nsid);
+}
+
+uint32_t
+spdk_nvmf_ns_get_id(const struct spdk_nvmf_ns *ns)
+{
+ return ns->opts.nsid;
+}
+
+struct spdk_bdev *
+spdk_nvmf_ns_get_bdev(struct spdk_nvmf_ns *ns)
+{
+ return ns->bdev;
+}
+
+void
+spdk_nvmf_ns_get_opts(const struct spdk_nvmf_ns *ns, struct spdk_nvmf_ns_opts *opts,
+ size_t opts_size)
+{
+ memset(opts, 0, opts_size);
+ memcpy(opts, &ns->opts, spdk_min(sizeof(ns->opts), opts_size));
+}
+
+const char *
+spdk_nvmf_subsystem_get_sn(const struct spdk_nvmf_subsystem *subsystem)
+{
+ return subsystem->sn;
+}
+
+int
+spdk_nvmf_subsystem_set_sn(struct spdk_nvmf_subsystem *subsystem, const char *sn)
+{
+ size_t len, max_len;
+
+ max_len = sizeof(subsystem->sn) - 1;
+ len = strlen(sn);
+ if (len > max_len) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Invalid sn \"%s\": length %zu > max %zu\n",
+ sn, len, max_len);
+ return -1;
+ }
+
+ if (!spdk_nvmf_valid_ascii_string(sn, len)) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Non-ASCII sn\n");
+ SPDK_TRACEDUMP(SPDK_LOG_NVMF, "sn", sn, len);
+ return -1;
+ }
+
+ snprintf(subsystem->sn, sizeof(subsystem->sn), "%s", sn);
+
+ return 0;
+}
+
+const char *
+spdk_nvmf_subsystem_get_nqn(struct spdk_nvmf_subsystem *subsystem)
+{
+ return subsystem->subnqn;
+}
+
+/* Workaround for astyle formatting bug */
+typedef enum spdk_nvmf_subtype nvmf_subtype_t;
+
+nvmf_subtype_t
+spdk_nvmf_subsystem_get_type(struct spdk_nvmf_subsystem *subsystem)
+{
+ return subsystem->subtype;
+}
+
+static uint16_t
+spdk_nvmf_subsystem_gen_cntlid(struct spdk_nvmf_subsystem *subsystem)
+{
+ int count;
+
+ /*
+ * In the worst case, we might have to try all CNTLID values between 1 and 0xFFF0 - 1
+ * before we find one that is unused (or find that all values are in use).
+ */
+ for (count = 0; count < 0xFFF0 - 1; count++) {
+ subsystem->next_cntlid++;
+ if (subsystem->next_cntlid >= 0xFFF0) {
+ /* The spec reserves cntlid values in the range FFF0h to FFFFh. */
+ subsystem->next_cntlid = 1;
+ }
+
+ /* Check if a controller with this cntlid currently exists. */
+ if (spdk_nvmf_subsystem_get_ctrlr(subsystem, subsystem->next_cntlid) == NULL) {
+ /* Found unused cntlid */
+ return subsystem->next_cntlid;
+ }
+ }
+
+ /* All valid cntlid values are in use. */
+ return 0xFFFF;
+}
+
+int
+spdk_nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvmf_ctrlr *ctrlr)
+{
+ ctrlr->cntlid = spdk_nvmf_subsystem_gen_cntlid(subsystem);
+ if (ctrlr->cntlid == 0xFFFF) {
+ /* Unable to get a cntlid */
+ SPDK_ERRLOG("Reached max simultaneous ctrlrs\n");
+ return -EBUSY;
+ }
+
+ TAILQ_INSERT_TAIL(&subsystem->ctrlrs, ctrlr, link);
+
+ return 0;
+}
+
+void
+spdk_nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_ctrlr *ctrlr)
+{
+ assert(subsystem == ctrlr->subsys);
+ TAILQ_REMOVE(&subsystem->ctrlrs, ctrlr, link);
+}
+
+struct spdk_nvmf_ctrlr *
+spdk_nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem, uint16_t cntlid)
+{
+ struct spdk_nvmf_ctrlr *ctrlr;
+
+ TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
+ if (ctrlr->cntlid == cntlid) {
+ return ctrlr;
+ }
+ }
+
+ return NULL;
+}
+
+uint32_t
+spdk_nvmf_subsystem_get_max_namespaces(const struct spdk_nvmf_subsystem *subsystem)
+{
+ return subsystem->max_allowed_nsid;
+}
diff --git a/src/spdk/lib/nvmf/transport.c b/src/spdk/lib/nvmf/transport.c
new file mode 100644
index 00000000..af4660c9
--- /dev/null
+++ b/src/spdk/lib/nvmf/transport.c
@@ -0,0 +1,236 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+#include "spdk/config.h"
+#include "spdk/log.h"
+#include "spdk/nvmf.h"
+#include "spdk/queue.h"
+#include "spdk/util.h"
+
+static const struct spdk_nvmf_transport_ops *const g_transport_ops[] = {
+#ifdef SPDK_CONFIG_RDMA
+ &spdk_nvmf_transport_rdma,
+#endif
+};
+
+#define NUM_TRANSPORTS (SPDK_COUNTOF(g_transport_ops))
+
+static inline const struct spdk_nvmf_transport_ops *
+spdk_nvmf_get_transport_ops(enum spdk_nvme_transport_type type)
+{
+ size_t i;
+ for (i = 0; i != NUM_TRANSPORTS; i++) {
+ if (g_transport_ops[i]->type == type) {
+ return g_transport_ops[i];
+ }
+ }
+ return NULL;
+}
+
+struct spdk_nvmf_transport *
+spdk_nvmf_transport_create(enum spdk_nvme_transport_type type,
+ struct spdk_nvmf_transport_opts *opts)
+{
+ const struct spdk_nvmf_transport_ops *ops = NULL;
+ struct spdk_nvmf_transport *transport;
+
+ if ((opts->max_io_size % opts->io_unit_size != 0) ||
+ (opts->max_io_size / opts->io_unit_size >
+ SPDK_NVMF_MAX_SGL_ENTRIES)) {
+ SPDK_ERRLOG("%s: invalid IO size, MaxIO:%d, UnitIO:%d, MaxSGL:%d\n",
+ spdk_nvme_transport_id_trtype_str(type),
+ opts->max_io_size,
+ opts->io_unit_size,
+ SPDK_NVMF_MAX_SGL_ENTRIES);
+ return NULL;
+ }
+
+ ops = spdk_nvmf_get_transport_ops(type);
+ if (!ops) {
+ SPDK_ERRLOG("Transport type %s unavailable.\n",
+ spdk_nvme_transport_id_trtype_str(type));
+ return NULL;
+ }
+
+ transport = ops->create(opts);
+ if (!transport) {
+ SPDK_ERRLOG("Unable to create new transport of type %s\n",
+ spdk_nvme_transport_id_trtype_str(type));
+ return NULL;
+ }
+
+ transport->ops = ops;
+ transport->opts = *opts;
+
+ return transport;
+}
+
+int
+spdk_nvmf_transport_destroy(struct spdk_nvmf_transport *transport)
+{
+ return transport->ops->destroy(transport);
+}
+
+int
+spdk_nvmf_transport_listen(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid)
+{
+ return transport->ops->listen(transport, trid);
+}
+
+int
+spdk_nvmf_transport_stop_listen(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid)
+{
+ return transport->ops->stop_listen(transport, trid);
+}
+
+void
+spdk_nvmf_transport_accept(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn)
+{
+ transport->ops->accept(transport, cb_fn);
+}
+
+void
+spdk_nvmf_transport_listener_discover(struct spdk_nvmf_transport *transport,
+ struct spdk_nvme_transport_id *trid,
+ struct spdk_nvmf_discovery_log_page_entry *entry)
+{
+ transport->ops->listener_discover(transport, trid, entry);
+}
+
+struct spdk_nvmf_transport_poll_group *
+spdk_nvmf_transport_poll_group_create(struct spdk_nvmf_transport *transport)
+{
+ struct spdk_nvmf_transport_poll_group *group;
+
+ group = transport->ops->poll_group_create(transport);
+ group->transport = transport;
+
+ return group;
+}
+
+void
+spdk_nvmf_transport_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
+{
+ group->transport->ops->poll_group_destroy(group);
+}
+
+int
+spdk_nvmf_transport_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_qpair *qpair)
+{
+ if (qpair->transport) {
+ assert(qpair->transport == group->transport);
+ if (qpair->transport != group->transport) {
+ return -1;
+ }
+ } else {
+ qpair->transport = group->transport;
+ }
+
+ return group->transport->ops->poll_group_add(group, qpair);
+}
+
+int
+spdk_nvmf_transport_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
+{
+ return group->transport->ops->poll_group_poll(group);
+}
+
+int
+spdk_nvmf_transport_req_free(struct spdk_nvmf_request *req)
+{
+ return req->qpair->transport->ops->req_free(req);
+}
+
+int
+spdk_nvmf_transport_req_complete(struct spdk_nvmf_request *req)
+{
+ return req->qpair->transport->ops->req_complete(req);
+}
+
+void
+spdk_nvmf_transport_qpair_fini(struct spdk_nvmf_qpair *qpair)
+{
+ qpair->transport->ops->qpair_fini(qpair);
+}
+
+bool
+spdk_nvmf_transport_qpair_is_idle(struct spdk_nvmf_qpair *qpair)
+{
+ return qpair->transport->ops->qpair_is_idle(qpair);
+}
+
+int
+spdk_nvmf_transport_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ return qpair->transport->ops->qpair_get_peer_trid(qpair, trid);
+}
+
+int
+spdk_nvmf_transport_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ return qpair->transport->ops->qpair_get_local_trid(qpair, trid);
+}
+
+int
+spdk_nvmf_transport_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ return qpair->transport->ops->qpair_get_listen_trid(qpair, trid);
+}
+
+bool
+spdk_nvmf_transport_opts_init(enum spdk_nvme_transport_type type,
+ struct spdk_nvmf_transport_opts *opts)
+{
+ const struct spdk_nvmf_transport_ops *ops;
+
+ ops = spdk_nvmf_get_transport_ops(type);
+ if (!ops) {
+ SPDK_ERRLOG("Transport type %s unavailable.\n",
+ spdk_nvme_transport_id_trtype_str(type));
+ return false;
+ }
+
+ ops->opts_init(opts);
+ return true;
+}
diff --git a/src/spdk/lib/nvmf/transport.h b/src/spdk/lib/nvmf/transport.h
new file mode 100644
index 00000000..1329a80c
--- /dev/null
+++ b/src/spdk/lib/nvmf/transport.h
@@ -0,0 +1,200 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_NVMF_TRANSPORT_H
+#define SPDK_NVMF_TRANSPORT_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/nvme.h"
+#include "spdk/nvmf.h"
+
+struct spdk_nvmf_transport {
+ struct spdk_nvmf_tgt *tgt;
+ const struct spdk_nvmf_transport_ops *ops;
+ struct spdk_nvmf_transport_opts opts;
+
+ TAILQ_ENTRY(spdk_nvmf_transport) link;
+};
+
+struct spdk_nvmf_transport_ops {
+ /**
+ * Transport type
+ */
+ enum spdk_nvme_transport_type type;
+
+ /**
+ * Initialize transport options to default value
+ */
+ void (*opts_init)(struct spdk_nvmf_transport_opts *opts);
+
+ /**
+ * Create a transport for the given transport opts
+ */
+ struct spdk_nvmf_transport *(*create)(struct spdk_nvmf_transport_opts *opts);
+
+ /**
+ * Destroy the transport
+ */
+ int (*destroy)(struct spdk_nvmf_transport *transport);
+
+ /**
+ * Instruct the transport to accept new connections at the address
+ * provided. This may be called multiple times.
+ */
+ int (*listen)(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid);
+
+ /**
+ * Stop accepting new connections at the given address.
+ */
+ int (*stop_listen)(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid);
+
+ /**
+ * Check for new connections on the transport.
+ */
+ void (*accept)(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn);
+
+ /**
+ * Fill out a discovery log entry for a specific listen address.
+ */
+ void (*listener_discover)(struct spdk_nvmf_transport *transport,
+ struct spdk_nvme_transport_id *trid,
+ struct spdk_nvmf_discovery_log_page_entry *entry);
+
+ /**
+ * Create a new poll group
+ */
+ struct spdk_nvmf_transport_poll_group *(*poll_group_create)(struct spdk_nvmf_transport *transport);
+
+ /**
+ * Destroy a poll group
+ */
+ void (*poll_group_destroy)(struct spdk_nvmf_transport_poll_group *group);
+
+ /**
+ * Add a qpair to a poll group
+ */
+ int (*poll_group_add)(struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_qpair *qpair);
+
+ /**
+ * Poll the group to process I/O
+ */
+ int (*poll_group_poll)(struct spdk_nvmf_transport_poll_group *group);
+
+ /*
+ * Free the request without sending a response
+ * to the originator. Release memory tied to this request.
+ */
+ int (*req_free)(struct spdk_nvmf_request *req);
+
+ /*
+ * Signal request completion, which sends a response
+ * to the originator.
+ */
+ int (*req_complete)(struct spdk_nvmf_request *req);
+
+ /*
+ * Deinitialize a connection.
+ */
+ void (*qpair_fini)(struct spdk_nvmf_qpair *qpair);
+
+ /*
+ * True if the qpair has no pending IO.
+ */
+ bool (*qpair_is_idle)(struct spdk_nvmf_qpair *qpair);
+
+ /*
+ * Get the peer transport ID for the queue pair.
+ */
+ int (*qpair_get_peer_trid)(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid);
+
+ /*
+ * Get the local transport ID for the queue pair.
+ */
+ int (*qpair_get_local_trid)(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid);
+
+ /*
+ * Get the listener transport ID that accepted this qpair originally.
+ */
+ int (*qpair_get_listen_trid)(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid);
+};
+
+
+int spdk_nvmf_transport_stop_listen(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid);
+
+void spdk_nvmf_transport_accept(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn);
+
+void spdk_nvmf_transport_listener_discover(struct spdk_nvmf_transport *transport,
+ struct spdk_nvme_transport_id *trid,
+ struct spdk_nvmf_discovery_log_page_entry *entry);
+
+struct spdk_nvmf_transport_poll_group *spdk_nvmf_transport_poll_group_create(
+ struct spdk_nvmf_transport *transport);
+
+void spdk_nvmf_transport_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group);
+
+int spdk_nvmf_transport_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_qpair *qpair);
+
+int spdk_nvmf_transport_poll_group_poll(struct spdk_nvmf_transport_poll_group *group);
+
+int spdk_nvmf_transport_req_free(struct spdk_nvmf_request *req);
+
+int spdk_nvmf_transport_req_complete(struct spdk_nvmf_request *req);
+
+void spdk_nvmf_transport_qpair_fini(struct spdk_nvmf_qpair *qpair);
+
+bool spdk_nvmf_transport_qpair_is_idle(struct spdk_nvmf_qpair *qpair);
+
+int spdk_nvmf_transport_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid);
+
+int spdk_nvmf_transport_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid);
+
+int spdk_nvmf_transport_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid);
+
+bool spdk_nvmf_transport_opts_init(enum spdk_nvme_transport_type type,
+ struct spdk_nvmf_transport_opts *opts);
+
+extern const struct spdk_nvmf_transport_ops spdk_nvmf_transport_rdma;
+
+#endif /* SPDK_NVMF_TRANSPORT_H */