summaryrefslogtreecommitdiffstats
path: root/src/spdk/lib/nvmf
diff options
context:
space:
mode:
Diffstat (limited to 'src/spdk/lib/nvmf')
-rw-r--r--src/spdk/lib/nvmf/Makefile75
-rw-r--r--src/spdk/lib/nvmf/ctrlr.c3224
-rw-r--r--src/spdk/lib/nvmf/ctrlr_bdev.c761
-rw-r--r--src/spdk/lib/nvmf/ctrlr_discovery.c159
-rw-r--r--src/spdk/lib/nvmf/fc.c3957
-rw-r--r--src/spdk/lib/nvmf/fc_ls.c1678
-rw-r--r--src/spdk/lib/nvmf/nvmf.c1457
-rw-r--r--src/spdk/lib/nvmf/nvmf_fc.h999
-rw-r--r--src/spdk/lib/nvmf/nvmf_internal.h371
-rw-r--r--src/spdk/lib/nvmf/nvmf_rpc.c2012
-rw-r--r--src/spdk/lib/nvmf/rdma.c4313
-rw-r--r--src/spdk/lib/nvmf/spdk_nvmf.map118
-rw-r--r--src/spdk/lib/nvmf/subsystem.c2515
-rw-r--r--src/spdk/lib/nvmf/tcp.c2631
-rw-r--r--src/spdk/lib/nvmf/transport.c572
-rw-r--r--src/spdk/lib/nvmf/transport.h82
16 files changed, 24924 insertions, 0 deletions
diff --git a/src/spdk/lib/nvmf/Makefile b/src/spdk/lib/nvmf/Makefile
new file mode 100644
index 000000000..b4556564a
--- /dev/null
+++ b/src/spdk/lib/nvmf/Makefile
@@ -0,0 +1,75 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+SO_VER := 5
+SO_MINOR := 0
+
+C_SRCS = ctrlr.c ctrlr_discovery.c ctrlr_bdev.c \
+ subsystem.c nvmf.c nvmf_rpc.c transport.c tcp.c
+
+C_SRCS-$(CONFIG_RDMA) += rdma.c
+LIBNAME = nvmf
+LOCAL_SYS_LIBS = -luuid
+ifeq ($(CONFIG_RDMA),y)
+LOCAL_SYS_LIBS += -libverbs -lrdmacm
+#Attach only if FreeBSD and RDMA is specified with configure
+ifeq ($(OS),FreeBSD)
+# Mellanox - MLX4 HBA Userspace Library
+ifneq ("$(wildcard /usr/lib/libmlx4.*)","")
+LOCAL_SYS_LIBS += -lmlx4
+endif
+# Mellanox - MLX5 HBA Userspace Library
+ifneq ("$(wildcard /usr/lib/libmlx5.*)","")
+LOCAL_SYS_LIBS += -lmlx5
+endif
+# Chelsio HBA Userspace Library
+ifneq ("$(wildcard /usr/lib/libcxgb4.*)","")
+LOCAL_SYS_LIBS += -lcxgb4
+endif
+endif
+endif
+
+ifeq ($(CONFIG_FC),y)
+C_SRCS += fc.c fc_ls.c
+CFLAGS += -I$(CURDIR)
+ifneq ($(strip $(CONFIG_FC_PATH)),)
+CFLAGS += -I$(CONFIG_FC_PATH)
+endif
+endif
+
+SPDK_MAP_FILE = $(abspath $(CURDIR)/spdk_nvmf.map)
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/nvmf/ctrlr.c b/src/spdk/lib/nvmf/ctrlr.c
new file mode 100644
index 000000000..638cde9d2
--- /dev/null
+++ b/src/spdk/lib/nvmf/ctrlr.c
@@ -0,0 +1,3224 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+#include "spdk/bit_array.h"
+#include "spdk/endian.h"
+#include "spdk/thread.h"
+#include "spdk/trace.h"
+#include "spdk/nvme_spec.h"
+#include "spdk/nvmf_cmd.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+#include "spdk/version.h"
+
+#include "spdk_internal/log.h"
+
+#define MIN_KEEP_ALIVE_TIMEOUT_IN_MS 10000
+#define NVMF_DISC_KATO_IN_MS 120000
+#define KAS_TIME_UNIT_IN_MS 100
+#define KAS_DEFAULT_VALUE (MIN_KEEP_ALIVE_TIMEOUT_IN_MS / KAS_TIME_UNIT_IN_MS)
+
+/*
+ * Report the SPDK version as the firmware revision.
+ * SPDK_VERSION_STRING won't fit into FR (only 8 bytes), so try to fit the most important parts.
+ */
+#define FW_VERSION SPDK_VERSION_MAJOR_STRING SPDK_VERSION_MINOR_STRING SPDK_VERSION_PATCH_STRING
+
+/*
+ * Support for custom admin command handlers
+ */
+struct spdk_nvmf_custom_admin_cmd {
+ spdk_nvmf_custom_cmd_hdlr hdlr;
+ uint32_t nsid; /* nsid to forward */
+};
+
+static struct spdk_nvmf_custom_admin_cmd g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_MAX_OPC + 1];
+
+static void _nvmf_request_complete(void *ctx);
+
+static inline void
+nvmf_invalid_connect_response(struct spdk_nvmf_fabric_connect_rsp *rsp,
+ uint8_t iattr, uint16_t ipo)
+{
+ rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ rsp->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
+ rsp->status_code_specific.invalid.iattr = iattr;
+ rsp->status_code_specific.invalid.ipo = ipo;
+}
+
+#define SPDK_NVMF_INVALID_CONNECT_CMD(rsp, field) \
+ nvmf_invalid_connect_response(rsp, 0, offsetof(struct spdk_nvmf_fabric_connect_cmd, field))
+#define SPDK_NVMF_INVALID_CONNECT_DATA(rsp, field) \
+ nvmf_invalid_connect_response(rsp, 1, offsetof(struct spdk_nvmf_fabric_connect_data, field))
+
+static void
+nvmf_ctrlr_stop_keep_alive_timer(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ if (!ctrlr) {
+ SPDK_ERRLOG("Controller is NULL\n");
+ return;
+ }
+
+ if (ctrlr->keep_alive_poller == NULL) {
+ return;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Stop keep alive poller\n");
+ spdk_poller_unregister(&ctrlr->keep_alive_poller);
+}
+
+static void
+nvmf_ctrlr_disconnect_qpairs_done(struct spdk_io_channel_iter *i, int status)
+{
+ if (status == 0) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ctrlr disconnect qpairs complete successfully\n");
+ } else {
+ SPDK_ERRLOG("Fail to disconnect ctrlr qpairs\n");
+ }
+}
+
+static int
+_nvmf_ctrlr_disconnect_qpairs_on_pg(struct spdk_io_channel_iter *i, bool include_admin)
+{
+ int rc = 0;
+ struct spdk_nvmf_ctrlr *ctrlr;
+ struct spdk_nvmf_qpair *qpair, *temp_qpair;
+ struct spdk_io_channel *ch;
+ struct spdk_nvmf_poll_group *group;
+
+ ctrlr = spdk_io_channel_iter_get_ctx(i);
+ ch = spdk_io_channel_iter_get_channel(i);
+ group = spdk_io_channel_get_ctx(ch);
+
+ TAILQ_FOREACH_SAFE(qpair, &group->qpairs, link, temp_qpair) {
+ if (qpair->ctrlr == ctrlr && (include_admin || !nvmf_qpair_is_admin_queue(qpair))) {
+ rc = spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
+ if (rc) {
+ SPDK_ERRLOG("Qpair disconnect failed\n");
+ return rc;
+ }
+ }
+ }
+
+ return rc;
+}
+
+static void
+nvmf_ctrlr_disconnect_qpairs_on_pg(struct spdk_io_channel_iter *i)
+{
+ spdk_for_each_channel_continue(i, _nvmf_ctrlr_disconnect_qpairs_on_pg(i, true));
+}
+
+static void
+nvmf_ctrlr_disconnect_io_qpairs_on_pg(struct spdk_io_channel_iter *i)
+{
+ spdk_for_each_channel_continue(i, _nvmf_ctrlr_disconnect_qpairs_on_pg(i, false));
+}
+
+static int
+nvmf_ctrlr_keep_alive_poll(void *ctx)
+{
+ uint64_t keep_alive_timeout_tick;
+ uint64_t now = spdk_get_ticks();
+ struct spdk_nvmf_ctrlr *ctrlr = ctx;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Polling ctrlr keep alive timeout\n");
+
+ /* If the Keep alive feature is in use and the timer expires */
+ keep_alive_timeout_tick = ctrlr->last_keep_alive_tick +
+ ctrlr->feat.keep_alive_timer.bits.kato * spdk_get_ticks_hz() / UINT64_C(1000);
+ if (now > keep_alive_timeout_tick) {
+ SPDK_NOTICELOG("Disconnecting host from subsystem %s due to keep alive timeout.\n",
+ ctrlr->subsys->subnqn);
+ /* set the Controller Fatal Status bit to '1' */
+ if (ctrlr->vcprop.csts.bits.cfs == 0) {
+ ctrlr->vcprop.csts.bits.cfs = 1;
+
+ /*
+ * disconnect qpairs, terminate Transport connection
+ * destroy ctrlr, break the host to controller association
+ * disconnect qpairs with qpair->ctrlr == ctrlr
+ */
+ spdk_for_each_channel(ctrlr->subsys->tgt,
+ nvmf_ctrlr_disconnect_qpairs_on_pg,
+ ctrlr,
+ nvmf_ctrlr_disconnect_qpairs_done);
+ }
+ }
+
+ return SPDK_POLLER_BUSY;
+}
+
+static void
+nvmf_ctrlr_start_keep_alive_timer(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ if (!ctrlr) {
+ SPDK_ERRLOG("Controller is NULL\n");
+ return;
+ }
+
+ /* if cleared to 0 then the Keep Alive Timer is disabled */
+ if (ctrlr->feat.keep_alive_timer.bits.kato != 0) {
+
+ ctrlr->last_keep_alive_tick = spdk_get_ticks();
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Ctrlr add keep alive poller\n");
+ ctrlr->keep_alive_poller = SPDK_POLLER_REGISTER(nvmf_ctrlr_keep_alive_poll, ctrlr,
+ ctrlr->feat.keep_alive_timer.bits.kato * 1000);
+ }
+}
+
+static void
+ctrlr_add_qpair_and_update_rsp(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvmf_ctrlr *ctrlr,
+ struct spdk_nvmf_fabric_connect_rsp *rsp)
+{
+ assert(ctrlr->admin_qpair->group->thread == spdk_get_thread());
+
+ /* check if we would exceed ctrlr connection limit */
+ if (qpair->qid >= spdk_bit_array_capacity(ctrlr->qpair_mask)) {
+ SPDK_ERRLOG("Requested QID %u but Max QID is %u\n",
+ qpair->qid, spdk_bit_array_capacity(ctrlr->qpair_mask) - 1);
+ rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
+ return;
+ }
+
+ if (spdk_bit_array_get(ctrlr->qpair_mask, qpair->qid)) {
+ SPDK_ERRLOG("Got I/O connect with duplicate QID %u\n", qpair->qid);
+ rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
+ return;
+ }
+
+ qpair->ctrlr = ctrlr;
+ spdk_bit_array_set(ctrlr->qpair_mask, qpair->qid);
+
+ rsp->status.sc = SPDK_NVME_SC_SUCCESS;
+ rsp->status_code_specific.success.cntlid = ctrlr->cntlid;
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "connect capsule response: cntlid = 0x%04x\n",
+ rsp->status_code_specific.success.cntlid);
+}
+
+static void
+_nvmf_ctrlr_add_admin_qpair(void *ctx)
+{
+ struct spdk_nvmf_request *req = ctx;
+ struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+
+ ctrlr->admin_qpair = qpair;
+ nvmf_ctrlr_start_keep_alive_timer(ctrlr);
+ ctrlr_add_qpair_and_update_rsp(qpair, ctrlr, rsp);
+ _nvmf_request_complete(req);
+}
+
+static void
+_nvmf_subsystem_add_ctrlr(void *ctx)
+{
+ struct spdk_nvmf_request *req = ctx;
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
+ struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+
+ if (nvmf_subsystem_add_ctrlr(ctrlr->subsys, ctrlr)) {
+ SPDK_ERRLOG("Unable to add controller to subsystem\n");
+ spdk_bit_array_free(&ctrlr->qpair_mask);
+ free(ctrlr);
+ qpair->ctrlr = NULL;
+ rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ spdk_nvmf_request_complete(req);
+ return;
+ }
+
+ spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_add_admin_qpair, req);
+}
+
+static void
+nvmf_ctrlr_cdata_init(struct spdk_nvmf_transport *transport, struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_ctrlr_data *cdata)
+{
+ cdata->kas = KAS_DEFAULT_VALUE;
+ cdata->sgls.supported = 1;
+ cdata->sgls.keyed_sgl = 1;
+ cdata->sgls.sgl_offset = 1;
+ cdata->nvmf_specific.ioccsz = sizeof(struct spdk_nvme_cmd) / 16;
+ cdata->nvmf_specific.ioccsz += transport->opts.in_capsule_data_size / 16;
+ cdata->nvmf_specific.iorcsz = sizeof(struct spdk_nvme_cpl) / 16;
+ cdata->nvmf_specific.icdoff = 0; /* offset starts directly after SQE */
+ cdata->nvmf_specific.ctrattr.ctrlr_model = SPDK_NVMF_CTRLR_MODEL_DYNAMIC;
+ cdata->nvmf_specific.msdbd = 1;
+
+ if (transport->ops->cdata_init) {
+ transport->ops->cdata_init(transport, subsystem, cdata);
+ }
+}
+
+static struct spdk_nvmf_ctrlr *
+nvmf_ctrlr_create(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_request *req,
+ struct spdk_nvmf_fabric_connect_cmd *connect_cmd,
+ struct spdk_nvmf_fabric_connect_data *connect_data)
+{
+ struct spdk_nvmf_ctrlr *ctrlr;
+ struct spdk_nvmf_transport *transport;
+
+ ctrlr = calloc(1, sizeof(*ctrlr));
+ if (ctrlr == NULL) {
+ SPDK_ERRLOG("Memory allocation failed\n");
+ return NULL;
+ }
+
+ TAILQ_INIT(&ctrlr->log_head);
+ ctrlr->subsys = subsystem;
+ ctrlr->thread = req->qpair->group->thread;
+
+ transport = req->qpair->transport;
+ ctrlr->qpair_mask = spdk_bit_array_create(transport->opts.max_qpairs_per_ctrlr);
+ if (!ctrlr->qpair_mask) {
+ SPDK_ERRLOG("Failed to allocate controller qpair mask\n");
+ free(ctrlr);
+ return NULL;
+ }
+
+ nvmf_ctrlr_cdata_init(transport, subsystem, &ctrlr->cdata);
+
+ /*
+ * KAS: This field indicates the granularity of the Keep Alive Timer in 100ms units.
+ * If this field is cleared to 0h, then Keep Alive is not supported.
+ */
+ if (ctrlr->cdata.kas) {
+ ctrlr->feat.keep_alive_timer.bits.kato = spdk_divide_round_up(connect_cmd->kato,
+ KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS) *
+ KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS;
+ }
+
+ ctrlr->feat.async_event_configuration.bits.ns_attr_notice = 1;
+ ctrlr->feat.volatile_write_cache.bits.wce = 1;
+
+ if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
+ /*
+ * If keep-alive timeout is not set, discovery controllers use some
+ * arbitrary high value in order to cleanup stale discovery sessions
+ *
+ * From the 1.0a nvme-of spec:
+ * "The Keep Alive command is reserved for
+ * Discovery controllers. A transport may specify a
+ * fixed Discovery controller activity timeout value
+ * (e.g., 2 minutes). If no commands are received
+ * by a Discovery controller within that time
+ * period, the controller may perform the
+ * actions for Keep Alive Timer expiration".
+ * kato is in millisecond.
+ */
+ if (ctrlr->feat.keep_alive_timer.bits.kato == 0) {
+ ctrlr->feat.keep_alive_timer.bits.kato = NVMF_DISC_KATO_IN_MS;
+ }
+ }
+
+ /* Subtract 1 for admin queue, 1 for 0's based */
+ ctrlr->feat.number_of_queues.bits.ncqr = transport->opts.max_qpairs_per_ctrlr - 1 -
+ 1;
+ ctrlr->feat.number_of_queues.bits.nsqr = transport->opts.max_qpairs_per_ctrlr - 1 -
+ 1;
+
+ spdk_uuid_copy(&ctrlr->hostid, (struct spdk_uuid *)connect_data->hostid);
+ memcpy(ctrlr->hostnqn, connect_data->hostnqn, sizeof(ctrlr->hostnqn));
+
+ ctrlr->vcprop.cap.raw = 0;
+ ctrlr->vcprop.cap.bits.cqr = 1; /* NVMe-oF specification required */
+ ctrlr->vcprop.cap.bits.mqes = transport->opts.max_queue_depth -
+ 1; /* max queue depth */
+ ctrlr->vcprop.cap.bits.ams = 0; /* optional arb mechanisms */
+ ctrlr->vcprop.cap.bits.to = 1; /* ready timeout - 500 msec units */
+ ctrlr->vcprop.cap.bits.dstrd = 0; /* fixed to 0 for NVMe-oF */
+ ctrlr->vcprop.cap.bits.css = SPDK_NVME_CAP_CSS_NVM; /* NVM command set */
+ ctrlr->vcprop.cap.bits.mpsmin = 0; /* 2 ^ (12 + mpsmin) == 4k */
+ ctrlr->vcprop.cap.bits.mpsmax = 0; /* 2 ^ (12 + mpsmax) == 4k */
+
+ /* Version Supported: 1.3 */
+ ctrlr->vcprop.vs.bits.mjr = 1;
+ ctrlr->vcprop.vs.bits.mnr = 3;
+ ctrlr->vcprop.vs.bits.ter = 0;
+
+ ctrlr->vcprop.cc.raw = 0;
+ ctrlr->vcprop.cc.bits.en = 0; /* Init controller disabled */
+
+ ctrlr->vcprop.csts.raw = 0;
+ ctrlr->vcprop.csts.bits.rdy = 0; /* Init controller as not ready */
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "cap 0x%" PRIx64 "\n", ctrlr->vcprop.cap.raw);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "vs 0x%x\n", ctrlr->vcprop.vs.raw);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "cc 0x%x\n", ctrlr->vcprop.cc.raw);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "csts 0x%x\n", ctrlr->vcprop.csts.raw);
+
+ ctrlr->dif_insert_or_strip = transport->opts.dif_insert_or_strip;
+
+ req->qpair->ctrlr = ctrlr;
+ spdk_thread_send_msg(subsystem->thread, _nvmf_subsystem_add_ctrlr, req);
+
+ return ctrlr;
+}
+
+static void
+_nvmf_ctrlr_destruct(void *ctx)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = ctx;
+ struct spdk_nvmf_reservation_log *log, *log_tmp;
+
+ nvmf_ctrlr_stop_keep_alive_timer(ctrlr);
+
+ TAILQ_FOREACH_SAFE(log, &ctrlr->log_head, link, log_tmp) {
+ TAILQ_REMOVE(&ctrlr->log_head, log, link);
+ free(log);
+ }
+ free(ctrlr);
+}
+
+void
+nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ nvmf_subsystem_remove_ctrlr(ctrlr->subsys, ctrlr);
+
+ spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_destruct, ctrlr);
+}
+
+static void
+nvmf_ctrlr_add_io_qpair(void *ctx)
+{
+ struct spdk_nvmf_request *req = ctx;
+ struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+
+ /* Unit test will check qpair->ctrlr after calling spdk_nvmf_ctrlr_connect.
+ * For error case, the value should be NULL. So set it to NULL at first.
+ */
+ qpair->ctrlr = NULL;
+
+ if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
+ SPDK_ERRLOG("I/O connect not allowed on discovery controller\n");
+ SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
+ goto end;
+ }
+
+ if (!ctrlr->vcprop.cc.bits.en) {
+ SPDK_ERRLOG("Got I/O connect before ctrlr was enabled\n");
+ SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
+ goto end;
+ }
+
+ if (1u << ctrlr->vcprop.cc.bits.iosqes != sizeof(struct spdk_nvme_cmd)) {
+ SPDK_ERRLOG("Got I/O connect with invalid IOSQES %u\n",
+ ctrlr->vcprop.cc.bits.iosqes);
+ SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
+ goto end;
+ }
+
+ if (1u << ctrlr->vcprop.cc.bits.iocqes != sizeof(struct spdk_nvme_cpl)) {
+ SPDK_ERRLOG("Got I/O connect with invalid IOCQES %u\n",
+ ctrlr->vcprop.cc.bits.iocqes);
+ SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
+ goto end;
+ }
+
+ ctrlr_add_qpair_and_update_rsp(qpair, ctrlr, rsp);
+end:
+ spdk_nvmf_request_complete(req);
+}
+
+static void
+_nvmf_ctrlr_add_io_qpair(void *ctx)
+{
+ struct spdk_nvmf_request *req = ctx;
+ struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
+ struct spdk_nvmf_fabric_connect_data *data = req->data;
+ struct spdk_nvmf_ctrlr *ctrlr;
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ struct spdk_nvmf_qpair *admin_qpair;
+ struct spdk_nvmf_tgt *tgt = qpair->transport->tgt;
+ struct spdk_nvmf_subsystem *subsystem;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Connect I/O Queue for controller id 0x%x\n", data->cntlid);
+
+ subsystem = spdk_nvmf_tgt_find_subsystem(tgt, data->subnqn);
+ /* We already checked this in spdk_nvmf_ctrlr_connect */
+ assert(subsystem != NULL);
+
+ ctrlr = nvmf_subsystem_get_ctrlr(subsystem, data->cntlid);
+ if (ctrlr == NULL) {
+ SPDK_ERRLOG("Unknown controller ID 0x%x\n", data->cntlid);
+ SPDK_NVMF_INVALID_CONNECT_DATA(rsp, cntlid);
+ spdk_nvmf_request_complete(req);
+ return;
+ }
+
+ admin_qpair = ctrlr->admin_qpair;
+ qpair->ctrlr = ctrlr;
+ spdk_thread_send_msg(admin_qpair->group->thread, nvmf_ctrlr_add_io_qpair, req);
+}
+
+static bool
+nvmf_qpair_access_allowed(struct spdk_nvmf_qpair *qpair, struct spdk_nvmf_subsystem *subsystem,
+ const char *hostnqn)
+{
+ struct spdk_nvme_transport_id listen_trid = {};
+
+ if (!spdk_nvmf_subsystem_host_allowed(subsystem, hostnqn)) {
+ SPDK_ERRLOG("Subsystem '%s' does not allow host '%s'\n", subsystem->subnqn, hostnqn);
+ return false;
+ }
+
+ if (spdk_nvmf_qpair_get_listen_trid(qpair, &listen_trid)) {
+ SPDK_ERRLOG("Subsystem '%s' is unable to enforce access control due to an internal error.\n",
+ subsystem->subnqn);
+ return false;
+ }
+
+ if (!spdk_nvmf_subsystem_listener_allowed(subsystem, &listen_trid)) {
+ SPDK_ERRLOG("Subsystem '%s' does not allow host '%s' to connect at this address.\n",
+ subsystem->subnqn, hostnqn);
+ return false;
+ }
+
+ return true;
+}
+
+static int
+_nvmf_ctrlr_connect(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_fabric_connect_data *data = req->data;
+ struct spdk_nvmf_fabric_connect_cmd *cmd = &req->cmd->connect_cmd;
+ struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ struct spdk_nvmf_transport *transport = qpair->transport;
+ struct spdk_nvmf_ctrlr *ctrlr;
+ struct spdk_nvmf_subsystem *subsystem;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "recfmt 0x%x qid %u sqsize %u\n",
+ cmd->recfmt, cmd->qid, cmd->sqsize);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Connect data:\n");
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, " cntlid: 0x%04x\n", data->cntlid);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, " hostid: %08x-%04x-%04x-%02x%02x-%04x%08x ***\n",
+ ntohl(*(uint32_t *)&data->hostid[0]),
+ ntohs(*(uint16_t *)&data->hostid[4]),
+ ntohs(*(uint16_t *)&data->hostid[6]),
+ data->hostid[8],
+ data->hostid[9],
+ ntohs(*(uint16_t *)&data->hostid[10]),
+ ntohl(*(uint32_t *)&data->hostid[12]));
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, " subnqn: \"%s\"\n", data->subnqn);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, " hostnqn: \"%s\"\n", data->hostnqn);
+
+ subsystem = spdk_nvmf_tgt_find_subsystem(transport->tgt, data->subnqn);
+ if (!subsystem) {
+ SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (cmd->recfmt != 0) {
+ SPDK_ERRLOG("Connect command unsupported RECFMT %u\n", cmd->recfmt);
+ rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ rsp->status.sc = SPDK_NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ /*
+ * SQSIZE is a 0-based value, so it must be at least 1 (minimum queue depth is 2) and
+ * strictly less than max_aq_depth (admin queues) or max_queue_depth (io queues).
+ */
+ if (cmd->sqsize == 0) {
+ SPDK_ERRLOG("Invalid SQSIZE = 0\n");
+ SPDK_NVMF_INVALID_CONNECT_CMD(rsp, sqsize);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (cmd->qid == 0) {
+ if (cmd->sqsize >= transport->opts.max_aq_depth) {
+ SPDK_ERRLOG("Invalid SQSIZE for admin queue %u (min 1, max %u)\n",
+ cmd->sqsize, transport->opts.max_aq_depth - 1);
+ SPDK_NVMF_INVALID_CONNECT_CMD(rsp, sqsize);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+ } else if (cmd->sqsize >= transport->opts.max_queue_depth) {
+ SPDK_ERRLOG("Invalid SQSIZE %u (min 1, max %u)\n",
+ cmd->sqsize, transport->opts.max_queue_depth - 1);
+ SPDK_NVMF_INVALID_CONNECT_CMD(rsp, sqsize);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ qpair->sq_head_max = cmd->sqsize;
+ qpair->qid = cmd->qid;
+
+ if (0 == qpair->qid) {
+ qpair->group->stat.admin_qpairs++;
+ } else {
+ qpair->group->stat.io_qpairs++;
+ }
+
+ if (cmd->qid == 0) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Connect Admin Queue for controller ID 0x%x\n", data->cntlid);
+
+ if (data->cntlid != 0xFFFF) {
+ /* This NVMf target only supports dynamic mode. */
+ SPDK_ERRLOG("The NVMf target only supports dynamic mode (CNTLID = 0x%x).\n", data->cntlid);
+ SPDK_NVMF_INVALID_CONNECT_DATA(rsp, cntlid);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ /* Establish a new ctrlr */
+ ctrlr = nvmf_ctrlr_create(subsystem, req, cmd, data);
+ if (!ctrlr) {
+ SPDK_ERRLOG("nvmf_ctrlr_create() failed\n");
+ rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ } else {
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+ } else {
+ spdk_thread_send_msg(subsystem->thread, _nvmf_ctrlr_add_io_qpair, req);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+}
+
+static inline bool
+nvmf_request_is_fabric_connect(struct spdk_nvmf_request *req)
+{
+ return req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC &&
+ req->cmd->nvmf_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_CONNECT;
+}
+
+static struct spdk_nvmf_subsystem_poll_group *
+nvmf_subsystem_pg_from_connect_cmd(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_fabric_connect_data *data;
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_tgt *tgt;
+
+ assert(nvmf_request_is_fabric_connect(req));
+ assert(req->qpair->ctrlr == NULL);
+
+ data = req->data;
+ tgt = req->qpair->transport->tgt;
+
+ subsystem = spdk_nvmf_tgt_find_subsystem(tgt, data->subnqn);
+ if (subsystem == NULL) {
+ return NULL;
+ }
+
+ return &req->qpair->group->sgroups[subsystem->id];
+}
+
+int
+spdk_nvmf_ctrlr_connect(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ struct spdk_nvmf_subsystem_poll_group *sgroup;
+ enum spdk_nvmf_request_exec_status status;
+
+ sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
+ if (!sgroup) {
+ SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn);
+ status = SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ goto out;
+ }
+
+ sgroup->io_outstanding++;
+ TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
+
+ status = _nvmf_ctrlr_connect(req);
+
+out:
+ if (status == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
+ _nvmf_request_complete(req);
+ }
+
+ return status;
+}
+
+static int
+nvmf_ctrlr_cmd_connect(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_fabric_connect_data *data = req->data;
+ struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
+ struct spdk_nvmf_transport *transport = req->qpair->transport;
+ struct spdk_nvmf_subsystem *subsystem;
+
+ if (req->length < sizeof(struct spdk_nvmf_fabric_connect_data)) {
+ SPDK_ERRLOG("Connect command data length 0x%x too small\n", req->length);
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ subsystem = spdk_nvmf_tgt_find_subsystem(transport->tgt, data->subnqn);
+ if (!subsystem) {
+ SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if ((subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE) ||
+ (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSING) ||
+ (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED) ||
+ (subsystem->state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING)) {
+ SPDK_ERRLOG("Subsystem '%s' is not ready\n", subsystem->subnqn);
+ rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ rsp->status.sc = SPDK_NVMF_FABRIC_SC_CONTROLLER_BUSY;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ /* Ensure that hostnqn is null terminated */
+ if (!memchr(data->hostnqn, '\0', SPDK_NVMF_NQN_MAX_LEN + 1)) {
+ SPDK_ERRLOG("Connect HOSTNQN is not null terminated\n");
+ SPDK_NVMF_INVALID_CONNECT_DATA(rsp, hostnqn);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (!nvmf_qpair_access_allowed(req->qpair, subsystem, data->hostnqn)) {
+ rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ rsp->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_HOST;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ return _nvmf_ctrlr_connect(req);
+}
+
+static void
+nvmf_ctrlr_cc_reset_done(struct spdk_io_channel_iter *i, int status)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = spdk_io_channel_iter_get_ctx(i);
+
+ if (status < 0) {
+ SPDK_ERRLOG("Fail to disconnect io ctrlr qpairs\n");
+ assert(false);
+ }
+
+ /* Only a subset of the registers are cleared out on a reset */
+ ctrlr->vcprop.cc.raw = 0;
+ ctrlr->vcprop.csts.raw = 0;
+
+}
+
+const struct spdk_nvmf_registers *
+spdk_nvmf_ctrlr_get_regs(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ return &ctrlr->vcprop;
+}
+
+static uint64_t
+nvmf_prop_get_cap(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ return ctrlr->vcprop.cap.raw;
+}
+
+static uint64_t
+nvmf_prop_get_vs(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ return ctrlr->vcprop.vs.raw;
+}
+
+static uint64_t
+nvmf_prop_get_cc(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ return ctrlr->vcprop.cc.raw;
+}
+
+static bool
+nvmf_prop_set_cc(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
+{
+ union spdk_nvme_cc_register cc, diff;
+
+ cc.raw = value;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "cur CC: 0x%08x\n", ctrlr->vcprop.cc.raw);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "new CC: 0x%08x\n", cc.raw);
+
+ /*
+ * Calculate which bits changed between the current and new CC.
+ * Mark each bit as 0 once it is handled to determine if any unhandled bits were changed.
+ */
+ diff.raw = cc.raw ^ ctrlr->vcprop.cc.raw;
+
+ if (diff.bits.en) {
+ if (cc.bits.en) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Property Set CC Enable!\n");
+ ctrlr->vcprop.cc.bits.en = 1;
+ ctrlr->vcprop.csts.bits.rdy = 1;
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Property Set CC Disable!\n");
+ ctrlr->vcprop.cc.bits.en = 0;
+ spdk_for_each_channel(ctrlr->subsys->tgt,
+ nvmf_ctrlr_disconnect_io_qpairs_on_pg,
+ ctrlr,
+ nvmf_ctrlr_cc_reset_done);
+ }
+ diff.bits.en = 0;
+ }
+
+ if (diff.bits.shn) {
+ if (cc.bits.shn == SPDK_NVME_SHN_NORMAL ||
+ cc.bits.shn == SPDK_NVME_SHN_ABRUPT) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Property Set CC Shutdown %u%ub!\n",
+ cc.bits.shn >> 1, cc.bits.shn & 1);
+ ctrlr->vcprop.cc.bits.shn = cc.bits.shn;
+ ctrlr->vcprop.cc.bits.en = 0;
+ ctrlr->vcprop.csts.bits.rdy = 0;
+ ctrlr->vcprop.csts.bits.shst = SPDK_NVME_SHST_COMPLETE;
+ } else if (cc.bits.shn == 0) {
+ ctrlr->vcprop.cc.bits.shn = 0;
+ } else {
+ SPDK_ERRLOG("Prop Set CC: Invalid SHN value %u%ub\n",
+ cc.bits.shn >> 1, cc.bits.shn & 1);
+ return false;
+ }
+ diff.bits.shn = 0;
+ }
+
+ if (diff.bits.iosqes) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Prop Set IOSQES = %u (%u bytes)\n",
+ cc.bits.iosqes, 1u << cc.bits.iosqes);
+ ctrlr->vcprop.cc.bits.iosqes = cc.bits.iosqes;
+ diff.bits.iosqes = 0;
+ }
+
+ if (diff.bits.iocqes) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Prop Set IOCQES = %u (%u bytes)\n",
+ cc.bits.iocqes, 1u << cc.bits.iocqes);
+ ctrlr->vcprop.cc.bits.iocqes = cc.bits.iocqes;
+ diff.bits.iocqes = 0;
+ }
+
+ if (diff.bits.ams) {
+ SPDK_ERRLOG("Arbitration Mechanism Selected (AMS) 0x%x not supported!\n", cc.bits.ams);
+ return false;
+ }
+
+ if (diff.bits.mps) {
+ SPDK_ERRLOG("Memory Page Size (MPS) %u KiB not supported!\n", (1 << (2 + cc.bits.mps)));
+ return false;
+ }
+
+ if (diff.bits.css) {
+ SPDK_ERRLOG("I/O Command Set Selected (CSS) 0x%x not supported!\n", cc.bits.css);
+ return false;
+ }
+
+ if (diff.raw != 0) {
+ SPDK_ERRLOG("Prop Set CC toggled reserved bits 0x%x!\n", diff.raw);
+ return false;
+ }
+
+ return true;
+}
+
+static uint64_t
+nvmf_prop_get_csts(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ return ctrlr->vcprop.csts.raw;
+}
+
+static uint64_t
+nvmf_prop_get_aqa(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ return ctrlr->vcprop.aqa.raw;
+}
+
+static bool
+nvmf_prop_set_aqa(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
+{
+ union spdk_nvme_aqa_register aqa;
+
+ aqa.raw = value;
+
+ if (aqa.bits.asqs > ctrlr->vcprop.cap.bits.mqes ||
+ aqa.bits.acqs > ctrlr->vcprop.cap.bits.mqes) {
+ return false;
+ }
+
+ ctrlr->vcprop.aqa.raw = value;
+
+ return true;
+}
+
+static uint64_t
+nvmf_prop_get_asq(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ return ctrlr->vcprop.asq;
+}
+
+static bool
+nvmf_prop_set_asq_lower(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
+{
+ ctrlr->vcprop.asq = (ctrlr->vcprop.asq & (0xFFFFFFFFULL << 32ULL)) | value;
+
+ return true;
+}
+
+static bool
+nvmf_prop_set_asq_upper(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
+{
+ ctrlr->vcprop.asq = (ctrlr->vcprop.asq & 0xFFFFFFFFULL) | ((uint64_t)value << 32ULL);
+
+ return true;
+}
+
+static uint64_t
+nvmf_prop_get_acq(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ return ctrlr->vcprop.acq;
+}
+
+static bool
+nvmf_prop_set_acq_lower(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
+{
+ ctrlr->vcprop.acq = (ctrlr->vcprop.acq & (0xFFFFFFFFULL << 32ULL)) | value;
+
+ return true;
+}
+
+static bool
+nvmf_prop_set_acq_upper(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
+{
+ ctrlr->vcprop.acq = (ctrlr->vcprop.acq & 0xFFFFFFFFULL) | ((uint64_t)value << 32ULL);
+
+ return true;
+}
+
+struct nvmf_prop {
+ uint32_t ofst;
+ uint8_t size;
+ char name[11];
+ uint64_t (*get_cb)(struct spdk_nvmf_ctrlr *ctrlr);
+ bool (*set_cb)(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value);
+ bool (*set_upper_cb)(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value);
+};
+
+#define PROP(field, size, get_cb, set_cb, set_upper_cb) \
+ { \
+ offsetof(struct spdk_nvme_registers, field), \
+ size, \
+ #field, \
+ get_cb, set_cb, set_upper_cb \
+ }
+
+static const struct nvmf_prop nvmf_props[] = {
+ PROP(cap, 8, nvmf_prop_get_cap, NULL, NULL),
+ PROP(vs, 4, nvmf_prop_get_vs, NULL, NULL),
+ PROP(cc, 4, nvmf_prop_get_cc, nvmf_prop_set_cc, NULL),
+ PROP(csts, 4, nvmf_prop_get_csts, NULL, NULL),
+ PROP(aqa, 4, nvmf_prop_get_aqa, nvmf_prop_set_aqa, NULL),
+ PROP(asq, 8, nvmf_prop_get_asq, nvmf_prop_set_asq_lower, nvmf_prop_set_asq_upper),
+ PROP(acq, 8, nvmf_prop_get_acq, nvmf_prop_set_acq_lower, nvmf_prop_set_acq_upper),
+};
+
+static const struct nvmf_prop *
+find_prop(uint32_t ofst, uint8_t size)
+{
+ size_t i;
+
+ for (i = 0; i < SPDK_COUNTOF(nvmf_props); i++) {
+ const struct nvmf_prop *prop = &nvmf_props[i];
+
+ if ((ofst >= prop->ofst) && (ofst + size <= prop->ofst + prop->size)) {
+ return prop;
+ }
+ }
+
+ return NULL;
+}
+
+static int
+nvmf_property_get(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvmf_fabric_prop_get_cmd *cmd = &req->cmd->prop_get_cmd;
+ struct spdk_nvmf_fabric_prop_get_rsp *response = &req->rsp->prop_get_rsp;
+ const struct nvmf_prop *prop;
+ uint8_t size;
+
+ response->status.sc = 0;
+ response->value.u64 = 0;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "size %d, offset 0x%x\n",
+ cmd->attrib.size, cmd->ofst);
+
+ switch (cmd->attrib.size) {
+ case SPDK_NVMF_PROP_SIZE_4:
+ size = 4;
+ break;
+ case SPDK_NVMF_PROP_SIZE_8:
+ size = 8;
+ break;
+ default:
+ SPDK_ERRLOG("Invalid size value %d\n", cmd->attrib.size);
+ response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ prop = find_prop(cmd->ofst, size);
+ if (prop == NULL || prop->get_cb == NULL) {
+ response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "name: %s\n", prop->name);
+
+ response->value.u64 = prop->get_cb(ctrlr);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "response value: 0x%" PRIx64 "\n", response->value.u64);
+
+ if (size != prop->size) {
+ /* The size must be 4 and the prop->size is 8. Figure out which part of the property to read. */
+ assert(size == 4);
+ assert(prop->size == 8);
+
+ if (cmd->ofst == prop->ofst) {
+ /* Keep bottom 4 bytes only */
+ response->value.u64 &= 0xFFFFFFFF;
+ } else {
+ /* Keep top 4 bytes only */
+ response->value.u64 >>= 32;
+ }
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_property_set(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvmf_fabric_prop_set_cmd *cmd = &req->cmd->prop_set_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+ const struct nvmf_prop *prop;
+ uint64_t value;
+ uint8_t size;
+ bool ret;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "size %d, offset 0x%x, value 0x%" PRIx64 "\n",
+ cmd->attrib.size, cmd->ofst, cmd->value.u64);
+
+ switch (cmd->attrib.size) {
+ case SPDK_NVMF_PROP_SIZE_4:
+ size = 4;
+ break;
+ case SPDK_NVMF_PROP_SIZE_8:
+ size = 8;
+ break;
+ default:
+ SPDK_ERRLOG("Invalid size value %d\n", cmd->attrib.size);
+ response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ prop = find_prop(cmd->ofst, size);
+ if (prop == NULL || prop->set_cb == NULL) {
+ SPDK_ERRLOG("Invalid offset 0x%x\n", cmd->ofst);
+ response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "name: %s\n", prop->name);
+
+ value = cmd->value.u64;
+
+ if (prop->size == 4) {
+ ret = prop->set_cb(ctrlr, (uint32_t)value);
+ } else if (size != prop->size) {
+ /* The size must be 4 and the prop->size is 8. Figure out which part of the property to write. */
+ assert(size == 4);
+ assert(prop->size == 8);
+
+ if (cmd->ofst == prop->ofst) {
+ ret = prop->set_cb(ctrlr, (uint32_t)value);
+ } else {
+ ret = prop->set_upper_cb(ctrlr, (uint32_t)value);
+ }
+ } else {
+ ret = prop->set_cb(ctrlr, (uint32_t)value);
+ if (ret) {
+ ret = prop->set_upper_cb(ctrlr, (uint32_t)(value >> 32));
+ }
+ }
+
+ if (!ret) {
+ SPDK_ERRLOG("prop set_cb failed\n");
+ response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_set_features_arbitration(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Arbitration (cdw11 = 0x%0x)\n", cmd->cdw11);
+
+ ctrlr->feat.arbitration.raw = cmd->cdw11;
+ ctrlr->feat.arbitration.bits.reserved = 0;
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_set_features_power_management(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Power Management (cdw11 = 0x%0x)\n", cmd->cdw11);
+
+ /* Only PS = 0 is allowed, since we report NPSS = 0 */
+ if (cmd->cdw11_bits.feat_power_management.bits.ps != 0) {
+ SPDK_ERRLOG("Invalid power state %u\n", cmd->cdw11_bits.feat_power_management.bits.ps);
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ ctrlr->feat.power_management.raw = cmd->cdw11;
+ ctrlr->feat.power_management.bits.reserved = 0;
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static bool
+temp_threshold_opts_valid(const union spdk_nvme_feat_temperature_threshold *opts)
+{
+ /*
+ * Valid TMPSEL values:
+ * 0000b - 1000b: temperature sensors
+ * 1111b: set all implemented temperature sensors
+ */
+ if (opts->bits.tmpsel >= 9 && opts->bits.tmpsel != 15) {
+ /* 1001b - 1110b: reserved */
+ SPDK_ERRLOG("Invalid TMPSEL %u\n", opts->bits.tmpsel);
+ return false;
+ }
+
+ /*
+ * Valid THSEL values:
+ * 00b: over temperature threshold
+ * 01b: under temperature threshold
+ */
+ if (opts->bits.thsel > 1) {
+ /* 10b - 11b: reserved */
+ SPDK_ERRLOG("Invalid THSEL %u\n", opts->bits.thsel);
+ return false;
+ }
+
+ return true;
+}
+
+static int
+nvmf_ctrlr_set_features_temperature_threshold(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Temperature Threshold (cdw11 = 0x%0x)\n", cmd->cdw11);
+
+ if (!temp_threshold_opts_valid(&cmd->cdw11_bits.feat_temp_threshold)) {
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ /* TODO: no sensors implemented - ignore new values */
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_get_features_temperature_threshold(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Get Features - Temperature Threshold (cdw11 = 0x%0x)\n", cmd->cdw11);
+
+ if (!temp_threshold_opts_valid(&cmd->cdw11_bits.feat_temp_threshold)) {
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ /* TODO: no sensors implemented - return 0 for all thresholds */
+ rsp->cdw0 = 0;
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_set_features_error_recovery(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Error Recovery (cdw11 = 0x%0x)\n", cmd->cdw11);
+
+ if (cmd->cdw11_bits.feat_error_recovery.bits.dulbe) {
+ /*
+ * Host is not allowed to set this bit, since we don't advertise it in
+ * Identify Namespace.
+ */
+ SPDK_ERRLOG("Host set unsupported DULBE bit\n");
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ ctrlr->feat.error_recovery.raw = cmd->cdw11;
+ ctrlr->feat.error_recovery.bits.reserved = 0;
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_set_features_volatile_write_cache(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Volatile Write Cache (cdw11 = 0x%0x)\n", cmd->cdw11);
+
+ ctrlr->feat.volatile_write_cache.raw = cmd->cdw11;
+ ctrlr->feat.volatile_write_cache.bits.reserved = 0;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Volatile Write Cache %s\n",
+ ctrlr->feat.volatile_write_cache.bits.wce ? "Enabled" : "Disabled");
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_set_features_write_atomicity(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Write Atomicity (cdw11 = 0x%0x)\n", cmd->cdw11);
+
+ ctrlr->feat.write_atomicity.raw = cmd->cdw11;
+ ctrlr->feat.write_atomicity.bits.reserved = 0;
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_set_features_host_identifier(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+
+ SPDK_ERRLOG("Set Features - Host Identifier not allowed\n");
+ response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_get_features_host_identifier(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Get Features - Host Identifier\n");
+
+ if (!cmd->cdw11_bits.feat_host_identifier.bits.exhid) {
+ /* NVMe over Fabrics requires EXHID=1 (128-bit/16-byte host ID) */
+ SPDK_ERRLOG("Get Features - Host Identifier with EXHID=0 not allowed\n");
+ response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (req->data == NULL || req->length < sizeof(ctrlr->hostid)) {
+ SPDK_ERRLOG("Invalid data buffer for Get Features - Host Identifier\n");
+ response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ spdk_uuid_copy((struct spdk_uuid *)req->data, &ctrlr->hostid);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_get_features_reservation_notification_mask(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ struct spdk_nvmf_ns *ns;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "get Features - Reservation Notificaton Mask\n");
+
+ if (cmd->nsid == 0xffffffffu) {
+ SPDK_ERRLOG("get Features - Invalid Namespace ID\n");
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
+ if (ns == NULL) {
+ SPDK_ERRLOG("Set Features - Invalid Namespace ID\n");
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+ rsp->cdw0 = ns->mask;
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_set_features_reservation_notification_mask(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ struct spdk_nvmf_ns *ns;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Reservation Notificaton Mask\n");
+
+ if (cmd->nsid == 0xffffffffu) {
+ for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
+ ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
+ ns->mask = cmd->cdw11;
+ }
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
+ if (ns == NULL) {
+ SPDK_ERRLOG("Set Features - Invalid Namespace ID\n");
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+ ns->mask = cmd->cdw11;
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_get_features_reservation_persistence(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+ struct spdk_nvmf_ns *ns;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Get Features - Reservation Persistence\n");
+
+ ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
+ /* NSID with 0xffffffffu also included */
+ if (ns == NULL) {
+ SPDK_ERRLOG("Get Features - Invalid Namespace ID\n");
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ response->cdw0 = ns->ptpl_activated;
+
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_SUCCESS;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_set_features_reservation_persistence(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+ struct spdk_nvmf_ns *ns;
+ bool ptpl;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Reservation Persistence\n");
+
+ ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
+ ptpl = cmd->cdw11_bits.feat_rsv_persistence.bits.ptpl;
+
+ if (cmd->nsid != 0xffffffffu && ns && ns->ptpl_file) {
+ ns->ptpl_activated = ptpl;
+ } else if (cmd->nsid == 0xffffffffu) {
+ for (ns = spdk_nvmf_subsystem_get_first_ns(ctrlr->subsys); ns && ns->ptpl_file;
+ ns = spdk_nvmf_subsystem_get_next_ns(ctrlr->subsys, ns)) {
+ ns->ptpl_activated = ptpl;
+ }
+ } else {
+ SPDK_ERRLOG("Set Features - Invalid Namespace ID or Reservation Configuration\n");
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ /* TODO: Feature not changeable for now */
+ response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ response->status.sc = SPDK_NVME_SC_FEATURE_ID_NOT_SAVEABLE;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_set_features_keep_alive_timer(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Keep Alive Timer (%u ms)\n", cmd->cdw11);
+
+ /*
+ * if attempts to disable keep alive by setting kato to 0h
+ * a status value of keep alive invalid shall be returned
+ */
+ if (cmd->cdw11_bits.feat_keep_alive_timer.bits.kato == 0) {
+ rsp->status.sc = SPDK_NVME_SC_KEEP_ALIVE_INVALID;
+ } else if (cmd->cdw11_bits.feat_keep_alive_timer.bits.kato < MIN_KEEP_ALIVE_TIMEOUT_IN_MS) {
+ ctrlr->feat.keep_alive_timer.bits.kato = MIN_KEEP_ALIVE_TIMEOUT_IN_MS;
+ } else {
+ /* round up to milliseconds */
+ ctrlr->feat.keep_alive_timer.bits.kato = spdk_divide_round_up(
+ cmd->cdw11_bits.feat_keep_alive_timer.bits.kato,
+ KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS) *
+ KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS;
+ }
+
+ /*
+ * if change the keep alive timeout value successfully
+ * update the keep alive poller.
+ */
+ if (cmd->cdw11_bits.feat_keep_alive_timer.bits.kato != 0) {
+ if (ctrlr->keep_alive_poller != NULL) {
+ spdk_poller_unregister(&ctrlr->keep_alive_poller);
+ }
+ ctrlr->keep_alive_poller = SPDK_POLLER_REGISTER(nvmf_ctrlr_keep_alive_poll, ctrlr,
+ ctrlr->feat.keep_alive_timer.bits.kato * 1000);
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Keep Alive Timer set to %u ms\n",
+ ctrlr->feat.keep_alive_timer.bits.kato);
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_set_features_number_of_queues(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ uint32_t count;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Number of Queues, cdw11 0x%x\n",
+ req->cmd->nvme_cmd.cdw11);
+
+ count = spdk_bit_array_count_set(ctrlr->qpair_mask);
+ /* verify that the controller is ready to process commands */
+ if (count > 1) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Queue pairs already active!\n");
+ rsp->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+ } else {
+ /*
+ * Ignore the value requested by the host -
+ * always return the pre-configured value based on max_qpairs_allowed.
+ */
+ rsp->cdw0 = ctrlr->feat.number_of_queues.raw;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_set_features_async_event_configuration(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Async Event Configuration, cdw11 0x%08x\n",
+ cmd->cdw11);
+ ctrlr->feat.async_event_configuration.raw = cmd->cdw11;
+ ctrlr->feat.async_event_configuration.bits.reserved = 0;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_async_event_request(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ struct spdk_nvmf_subsystem_poll_group *sgroup;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Async Event Request\n");
+
+ /* Four asynchronous events are supported for now */
+ if (ctrlr->nr_aer_reqs >= NVMF_MAX_ASYNC_EVENTS) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "AERL exceeded\n");
+ rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ rsp->status.sc = SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (ctrlr->notice_event.bits.async_event_type ==
+ SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) {
+ rsp->cdw0 = ctrlr->notice_event.raw;
+ ctrlr->notice_event.raw = 0;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (ctrlr->reservation_event.bits.async_event_type ==
+ SPDK_NVME_ASYNC_EVENT_TYPE_IO) {
+ rsp->cdw0 = ctrlr->reservation_event.raw;
+ ctrlr->reservation_event.raw = 0;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ /* AER cmd is an exception */
+ sgroup = &req->qpair->group->sgroups[ctrlr->subsys->id];
+ assert(sgroup != NULL);
+ sgroup->io_outstanding--;
+
+ ctrlr->aer_req[ctrlr->nr_aer_reqs++] = req;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+static void
+nvmf_get_firmware_slot_log_page(void *buffer, uint64_t offset, uint32_t length)
+{
+ struct spdk_nvme_firmware_page fw_page;
+ size_t copy_len;
+
+ memset(&fw_page, 0, sizeof(fw_page));
+ fw_page.afi.active_slot = 1;
+ fw_page.afi.next_reset_slot = 0;
+ spdk_strcpy_pad(fw_page.revision[0], FW_VERSION, sizeof(fw_page.revision[0]), ' ');
+
+ if (offset < sizeof(fw_page)) {
+ copy_len = spdk_min(sizeof(fw_page) - offset, length);
+ if (copy_len > 0) {
+ memcpy(buffer, (const char *)&fw_page + offset, copy_len);
+ }
+ }
+}
+
+void
+nvmf_ctrlr_ns_changed(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid)
+{
+ uint16_t max_changes = SPDK_COUNTOF(ctrlr->changed_ns_list.ns_list);
+ uint16_t i;
+ bool found = false;
+
+ for (i = 0; i < ctrlr->changed_ns_list_count; i++) {
+ if (ctrlr->changed_ns_list.ns_list[i] == nsid) {
+ /* nsid is already in the list */
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ if (ctrlr->changed_ns_list_count == max_changes) {
+ /* Out of space - set first entry to FFFFFFFFh and zero-fill the rest. */
+ ctrlr->changed_ns_list.ns_list[0] = 0xFFFFFFFFu;
+ for (i = 1; i < max_changes; i++) {
+ ctrlr->changed_ns_list.ns_list[i] = 0;
+ }
+ } else {
+ ctrlr->changed_ns_list.ns_list[ctrlr->changed_ns_list_count++] = nsid;
+ }
+ }
+}
+
+static void
+nvmf_get_changed_ns_list_log_page(struct spdk_nvmf_ctrlr *ctrlr,
+ void *buffer, uint64_t offset, uint32_t length)
+{
+ size_t copy_length;
+
+ if (offset < sizeof(ctrlr->changed_ns_list)) {
+ copy_length = spdk_min(length, sizeof(ctrlr->changed_ns_list) - offset);
+ if (copy_length) {
+ memcpy(buffer, (char *)&ctrlr->changed_ns_list + offset, copy_length);
+ }
+ }
+
+ /* Clear log page each time it is read */
+ ctrlr->changed_ns_list_count = 0;
+ memset(&ctrlr->changed_ns_list, 0, sizeof(ctrlr->changed_ns_list));
+}
+
+/* The structure can be modified if we provide support for other commands in future */
+static const struct spdk_nvme_cmds_and_effect_log_page g_cmds_and_effect_log_page = {
+ .admin_cmds_supported = {
+ /* CSUPP, LBCC, NCC, NIC, CCC, CSE */
+ /* Get Log Page */
+ [SPDK_NVME_OPC_GET_LOG_PAGE] = {1, 0, 0, 0, 0, 0, 0, 0},
+ /* Identify */
+ [SPDK_NVME_OPC_IDENTIFY] = {1, 0, 0, 0, 0, 0, 0, 0},
+ /* Abort */
+ [SPDK_NVME_OPC_ABORT] = {1, 0, 0, 0, 0, 0, 0, 0},
+ /* Set Features */
+ [SPDK_NVME_OPC_SET_FEATURES] = {1, 0, 0, 0, 0, 0, 0, 0},
+ /* Get Features */
+ [SPDK_NVME_OPC_GET_FEATURES] = {1, 0, 0, 0, 0, 0, 0, 0},
+ /* Async Event Request */
+ [SPDK_NVME_OPC_ASYNC_EVENT_REQUEST] = {1, 0, 0, 0, 0, 0, 0, 0},
+ /* Keep Alive */
+ [SPDK_NVME_OPC_KEEP_ALIVE] = {1, 0, 0, 0, 0, 0, 0, 0},
+ },
+ .io_cmds_supported = {
+ /* FLUSH */
+ [SPDK_NVME_OPC_FLUSH] = {1, 1, 0, 0, 0, 0, 0, 0},
+ /* WRITE */
+ [SPDK_NVME_OPC_WRITE] = {1, 1, 0, 0, 0, 0, 0, 0},
+ /* READ */
+ [SPDK_NVME_OPC_READ] = {1, 0, 0, 0, 0, 0, 0, 0},
+ /* WRITE ZEROES */
+ [SPDK_NVME_OPC_WRITE_ZEROES] = {1, 1, 0, 0, 0, 0, 0, 0},
+ /* DATASET MANAGEMENT */
+ [SPDK_NVME_OPC_DATASET_MANAGEMENT] = {1, 1, 0, 0, 0, 0, 0, 0},
+ /* COMPARE */
+ [SPDK_NVME_OPC_COMPARE] = {1, 0, 0, 0, 0, 0, 0, 0},
+ },
+};
+
+static void
+nvmf_get_cmds_and_effects_log_page(void *buffer,
+ uint64_t offset, uint32_t length)
+{
+ uint32_t page_size = sizeof(struct spdk_nvme_cmds_and_effect_log_page);
+ size_t copy_len = 0;
+ size_t zero_len = length;
+
+ if (offset < page_size) {
+ copy_len = spdk_min(page_size - offset, length);
+ zero_len -= copy_len;
+ memcpy(buffer, (char *)(&g_cmds_and_effect_log_page) + offset, copy_len);
+ }
+
+ if (zero_len) {
+ memset((char *)buffer + copy_len, 0, zero_len);
+ }
+}
+
+static void
+nvmf_get_reservation_notification_log_page(struct spdk_nvmf_ctrlr *ctrlr,
+ void *data, uint64_t offset, uint32_t length)
+{
+ uint32_t unit_log_len, avail_log_len, next_pos, copy_len;
+ struct spdk_nvmf_reservation_log *log, *log_tmp;
+ uint8_t *buf = data;
+
+ unit_log_len = sizeof(struct spdk_nvme_reservation_notification_log);
+ /* No available log, return 1 zeroed log page */
+ if (!ctrlr->num_avail_log_pages) {
+ memset(buf, 0, spdk_min(length, unit_log_len));
+ return;
+ }
+
+ avail_log_len = ctrlr->num_avail_log_pages * unit_log_len;
+ if (offset >= avail_log_len) {
+ return;
+ }
+
+ next_pos = copy_len = 0;
+ TAILQ_FOREACH_SAFE(log, &ctrlr->log_head, link, log_tmp) {
+ TAILQ_REMOVE(&ctrlr->log_head, log, link);
+ ctrlr->num_avail_log_pages--;
+
+ next_pos += unit_log_len;
+ if (next_pos > offset) {
+ copy_len = spdk_min(next_pos - offset, length);
+ memcpy(buf, &log->log, copy_len);
+ length -= copy_len;
+ offset += copy_len;
+ buf += copy_len;
+ }
+ free(log);
+
+ if (length == 0) {
+ break;
+ }
+ }
+ return;
+}
+
+static int
+nvmf_ctrlr_get_log_page(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+ uint64_t offset, len;
+ uint32_t numdl, numdu;
+ uint8_t lid;
+
+ if (req->data == NULL) {
+ SPDK_ERRLOG("get log command with no buffer\n");
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ offset = (uint64_t)cmd->cdw12 | ((uint64_t)cmd->cdw13 << 32);
+ if (offset & 3) {
+ SPDK_ERRLOG("Invalid log page offset 0x%" PRIx64 "\n", offset);
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ numdl = cmd->cdw10_bits.get_log_page.numdl;
+ numdu = cmd->cdw11_bits.get_log_page.numdu;
+ len = ((numdu << 16) + numdl + (uint64_t)1) * 4;
+ if (len > req->length) {
+ SPDK_ERRLOG("Get log page: len (%" PRIu64 ") > buf size (%u)\n",
+ len, req->length);
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ lid = cmd->cdw10_bits.get_log_page.lid;
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Get log page: LID=0x%02X offset=0x%" PRIx64 " len=0x%" PRIx64 "\n",
+ lid, offset, len);
+
+ if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
+ switch (lid) {
+ case SPDK_NVME_LOG_DISCOVERY:
+ nvmf_get_discovery_log_page(subsystem->tgt, ctrlr->hostnqn, req->iov, req->iovcnt, offset,
+ len);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ default:
+ goto invalid_log_page;
+ }
+ } else {
+ switch (lid) {
+ case SPDK_NVME_LOG_ERROR:
+ case SPDK_NVME_LOG_HEALTH_INFORMATION:
+ /* TODO: actually fill out log page data */
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ case SPDK_NVME_LOG_FIRMWARE_SLOT:
+ nvmf_get_firmware_slot_log_page(req->data, offset, len);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ case SPDK_NVME_LOG_COMMAND_EFFECTS_LOG:
+ nvmf_get_cmds_and_effects_log_page(req->data, offset, len);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ case SPDK_NVME_LOG_CHANGED_NS_LIST:
+ nvmf_get_changed_ns_list_log_page(ctrlr, req->data, offset, len);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ case SPDK_NVME_LOG_RESERVATION_NOTIFICATION:
+ nvmf_get_reservation_notification_log_page(ctrlr, req->data, offset, len);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ default:
+ goto invalid_log_page;
+ }
+ }
+
+invalid_log_page:
+ SPDK_ERRLOG("Unsupported Get Log Page 0x%02X\n", lid);
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+int
+spdk_nvmf_ctrlr_identify_ns(struct spdk_nvmf_ctrlr *ctrlr,
+ struct spdk_nvme_cmd *cmd,
+ struct spdk_nvme_cpl *rsp,
+ struct spdk_nvme_ns_data *nsdata)
+{
+ struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
+ struct spdk_nvmf_ns *ns;
+ uint32_t max_num_blocks;
+
+ if (cmd->nsid == 0 || cmd->nsid > subsystem->max_nsid) {
+ SPDK_ERRLOG("Identify Namespace for invalid NSID %u\n", cmd->nsid);
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ ns = _nvmf_subsystem_get_ns(subsystem, cmd->nsid);
+ if (ns == NULL || ns->bdev == NULL) {
+ /*
+ * Inactive namespaces should return a zero filled data structure.
+ * The data buffer is already zeroed by nvmf_ctrlr_process_admin_cmd(),
+ * so we can just return early here.
+ */
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Identify Namespace for inactive NSID %u\n", cmd->nsid);
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_SUCCESS;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ nvmf_bdev_ctrlr_identify_ns(ns, nsdata, ctrlr->dif_insert_or_strip);
+
+ /* Due to bug in the Linux kernel NVMe driver we have to set noiob no larger than mdts */
+ max_num_blocks = ctrlr->admin_qpair->transport->opts.max_io_size /
+ (1U << nsdata->lbaf[nsdata->flbas.format].lbads);
+ if (nsdata->noiob > max_num_blocks) {
+ nsdata->noiob = max_num_blocks;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static void
+nvmf_ctrlr_populate_oacs(struct spdk_nvmf_ctrlr *ctrlr,
+ struct spdk_nvme_ctrlr_data *cdata)
+{
+ cdata->oacs.virtualization_management =
+ g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_VIRTUALIZATION_MANAGEMENT].hdlr != NULL;
+ cdata->oacs.nvme_mi = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NVME_MI_SEND].hdlr != NULL
+ && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NVME_MI_RECEIVE].hdlr != NULL;
+ cdata->oacs.directives = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_DIRECTIVE_SEND].hdlr != NULL
+ && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_DIRECTIVE_RECEIVE].hdlr != NULL;
+ cdata->oacs.device_self_test =
+ g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_DEVICE_SELF_TEST].hdlr != NULL;
+ cdata->oacs.ns_manage = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NS_MANAGEMENT].hdlr != NULL
+ && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NS_ATTACHMENT].hdlr != NULL;
+ cdata->oacs.firmware = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD].hdlr !=
+ NULL
+ && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_FIRMWARE_COMMIT].hdlr != NULL;
+ cdata->oacs.format =
+ g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_FORMAT_NVM].hdlr != NULL;
+ cdata->oacs.security = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_SECURITY_SEND].hdlr != NULL
+ && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_SECURITY_RECEIVE].hdlr != NULL;
+ cdata->oacs.get_lba_status = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_GET_LBA_STATUS].hdlr !=
+ NULL;
+}
+
+int
+spdk_nvmf_ctrlr_identify_ctrlr(struct spdk_nvmf_ctrlr *ctrlr, struct spdk_nvme_ctrlr_data *cdata)
+{
+ struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
+ struct spdk_nvmf_transport *transport = ctrlr->admin_qpair->transport;
+
+ /*
+ * Common fields for discovery and NVM subsystems
+ */
+ spdk_strcpy_pad(cdata->fr, FW_VERSION, sizeof(cdata->fr), ' ');
+ assert((transport->opts.max_io_size % 4096) == 0);
+ cdata->mdts = spdk_u32log2(transport->opts.max_io_size / 4096);
+ cdata->cntlid = ctrlr->cntlid;
+ cdata->ver = ctrlr->vcprop.vs;
+ cdata->aerl = NVMF_MAX_ASYNC_EVENTS - 1;
+ cdata->lpa.edlp = 1;
+ cdata->elpe = 127;
+ cdata->maxcmd = transport->opts.max_queue_depth;
+ cdata->sgls = ctrlr->cdata.sgls;
+ cdata->fuses.compare_and_write = 1;
+ cdata->acwu = 1;
+ spdk_strcpy_pad(cdata->subnqn, subsystem->subnqn, sizeof(cdata->subnqn), '\0');
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ctrlr data: maxcmd 0x%x\n", cdata->maxcmd);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "sgls data: 0x%x\n", from_le32(&cdata->sgls));
+
+ /*
+ * NVM subsystem fields (reserved for discovery subsystems)
+ */
+ if (subsystem->subtype == SPDK_NVMF_SUBTYPE_NVME) {
+ spdk_strcpy_pad(cdata->mn, spdk_nvmf_subsystem_get_mn(subsystem), sizeof(cdata->mn), ' ');
+ spdk_strcpy_pad(cdata->sn, spdk_nvmf_subsystem_get_sn(subsystem), sizeof(cdata->sn), ' ');
+ cdata->kas = ctrlr->cdata.kas;
+
+ cdata->rab = 6;
+ cdata->cmic.multi_port = 1;
+ cdata->cmic.multi_host = 1;
+ cdata->oaes.ns_attribute_notices = 1;
+ cdata->ctratt.host_id_exhid_supported = 1;
+ /* TODO: Concurrent execution of multiple abort commands. */
+ cdata->acl = 0;
+ cdata->aerl = 0;
+ cdata->frmw.slot1_ro = 1;
+ cdata->frmw.num_slots = 1;
+
+ cdata->lpa.celp = 1; /* Command Effects log page supported */
+
+ cdata->sqes.min = 6;
+ cdata->sqes.max = 6;
+ cdata->cqes.min = 4;
+ cdata->cqes.max = 4;
+ cdata->nn = subsystem->max_nsid;
+ cdata->vwc.present = 1;
+ cdata->vwc.flush_broadcast = SPDK_NVME_FLUSH_BROADCAST_NOT_SUPPORTED;
+
+ cdata->nvmf_specific = ctrlr->cdata.nvmf_specific;
+
+ cdata->oncs.dsm = nvmf_ctrlr_dsm_supported(ctrlr);
+ cdata->oncs.write_zeroes = nvmf_ctrlr_write_zeroes_supported(ctrlr);
+ cdata->oncs.reservations = 1;
+
+ nvmf_ctrlr_populate_oacs(ctrlr, cdata);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: ioccsz 0x%x\n",
+ cdata->nvmf_specific.ioccsz);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: iorcsz 0x%x\n",
+ cdata->nvmf_specific.iorcsz);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: icdoff 0x%x\n",
+ cdata->nvmf_specific.icdoff);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: ctrattr 0x%x\n",
+ *(uint8_t *)&cdata->nvmf_specific.ctrattr);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: msdbd 0x%x\n",
+ cdata->nvmf_specific.msdbd);
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_identify_active_ns_list(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvme_cmd *cmd,
+ struct spdk_nvme_cpl *rsp,
+ struct spdk_nvme_ns_list *ns_list)
+{
+ struct spdk_nvmf_ns *ns;
+ uint32_t count = 0;
+
+ if (cmd->nsid >= 0xfffffffeUL) {
+ SPDK_ERRLOG("Identify Active Namespace List with invalid NSID %u\n", cmd->nsid);
+ rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
+ ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
+ if (ns->opts.nsid <= cmd->nsid) {
+ continue;
+ }
+
+ ns_list->ns_list[count++] = ns->opts.nsid;
+ if (count == SPDK_COUNTOF(ns_list->ns_list)) {
+ break;
+ }
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static void
+_add_ns_id_desc(void **buf_ptr, size_t *buf_remain,
+ enum spdk_nvme_nidt type,
+ const void *data, size_t data_size)
+{
+ struct spdk_nvme_ns_id_desc *desc;
+ size_t desc_size = sizeof(*desc) + data_size;
+
+ /*
+ * These should never fail in practice, since all valid NS ID descriptors
+ * should be defined so that they fit in the available 4096-byte buffer.
+ */
+ assert(data_size > 0);
+ assert(data_size <= UINT8_MAX);
+ assert(desc_size < *buf_remain);
+ if (data_size == 0 || data_size > UINT8_MAX || desc_size > *buf_remain) {
+ return;
+ }
+
+ desc = *buf_ptr;
+ desc->nidt = type;
+ desc->nidl = data_size;
+ memcpy(desc->nid, data, data_size);
+
+ *buf_ptr += desc_size;
+ *buf_remain -= desc_size;
+}
+
+static int
+nvmf_ctrlr_identify_ns_id_descriptor_list(
+ struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvme_cmd *cmd,
+ struct spdk_nvme_cpl *rsp,
+ void *id_desc_list, size_t id_desc_list_size)
+{
+ struct spdk_nvmf_ns *ns;
+ size_t buf_remain = id_desc_list_size;
+ void *buf_ptr = id_desc_list;
+
+ ns = _nvmf_subsystem_get_ns(subsystem, cmd->nsid);
+ if (ns == NULL || ns->bdev == NULL) {
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+#define ADD_ID_DESC(type, data, size) \
+ do { \
+ if (!spdk_mem_all_zero(data, size)) { \
+ _add_ns_id_desc(&buf_ptr, &buf_remain, type, data, size); \
+ } \
+ } while (0)
+
+ ADD_ID_DESC(SPDK_NVME_NIDT_EUI64, ns->opts.eui64, sizeof(ns->opts.eui64));
+ ADD_ID_DESC(SPDK_NVME_NIDT_NGUID, ns->opts.nguid, sizeof(ns->opts.nguid));
+ ADD_ID_DESC(SPDK_NVME_NIDT_UUID, &ns->opts.uuid, sizeof(ns->opts.uuid));
+
+ /*
+ * The list is automatically 0-terminated because controller to host buffers in
+ * admin commands always get zeroed in nvmf_ctrlr_process_admin_cmd().
+ */
+
+#undef ADD_ID_DESC
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_identify(struct spdk_nvmf_request *req)
+{
+ uint8_t cns;
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
+
+ if (req->data == NULL || req->length < 4096) {
+ SPDK_ERRLOG("identify command with invalid buffer\n");
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ cns = cmd->cdw10_bits.identify.cns;
+
+ if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY &&
+ cns != SPDK_NVME_IDENTIFY_CTRLR) {
+ /* Discovery controllers only support Identify Controller */
+ goto invalid_cns;
+ }
+
+ switch (cns) {
+ case SPDK_NVME_IDENTIFY_NS:
+ return spdk_nvmf_ctrlr_identify_ns(ctrlr, cmd, rsp, req->data);
+ case SPDK_NVME_IDENTIFY_CTRLR:
+ return spdk_nvmf_ctrlr_identify_ctrlr(ctrlr, req->data);
+ case SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST:
+ return nvmf_ctrlr_identify_active_ns_list(subsystem, cmd, rsp, req->data);
+ case SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST:
+ return nvmf_ctrlr_identify_ns_id_descriptor_list(subsystem, cmd, rsp, req->data, req->length);
+ default:
+ goto invalid_cns;
+ }
+
+invalid_cns:
+ SPDK_ERRLOG("Identify command with unsupported CNS 0x%02x\n", cns);
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static bool
+nvmf_qpair_abort_aer(struct spdk_nvmf_qpair *qpair, uint16_t cid)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+ struct spdk_nvmf_request *req;
+ int i;
+
+ if (!nvmf_qpair_is_admin_queue(qpair)) {
+ return false;
+ }
+
+ for (i = 0; i < ctrlr->nr_aer_reqs; i++) {
+ if (ctrlr->aer_req[i]->cmd->nvme_cmd.cid == cid) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Aborting AER request\n");
+ req = ctrlr->aer_req[i];
+ ctrlr->aer_req[i] = NULL;
+ ctrlr->nr_aer_reqs--;
+
+ /* Move the last req to the aborting position for making aer_reqs
+ * in continuous
+ */
+ if (i < ctrlr->nr_aer_reqs) {
+ ctrlr->aer_req[i] = ctrlr->aer_req[ctrlr->nr_aer_reqs];
+ ctrlr->aer_req[ctrlr->nr_aer_reqs] = NULL;
+ }
+
+ req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
+ _nvmf_request_complete(req);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void
+nvmf_qpair_abort_request(struct spdk_nvmf_qpair *qpair, struct spdk_nvmf_request *req)
+{
+ uint16_t cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid;
+
+ if (nvmf_qpair_abort_aer(qpair, cid)) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "abort ctrlr=%p sqid=%u cid=%u successful\n",
+ qpair->ctrlr, qpair->qid, cid);
+ req->rsp->nvme_cpl.cdw0 &= ~1U; /* Command successfully aborted */
+
+ spdk_nvmf_request_complete(req);
+ return;
+ }
+
+ nvmf_transport_qpair_abort_request(qpair, req);
+}
+
+static void
+nvmf_ctrlr_abort_done(struct spdk_io_channel_iter *i, int status)
+{
+ struct spdk_nvmf_request *req = spdk_io_channel_iter_get_ctx(i);
+
+ if (status == 0) {
+ /* There was no qpair whose ID matches SQID of the abort command.
+ * Hence call _nvmf_request_complete() here.
+ */
+ _nvmf_request_complete(req);
+ }
+}
+
+static void
+nvmf_ctrlr_abort_on_pg(struct spdk_io_channel_iter *i)
+{
+ struct spdk_nvmf_request *req = spdk_io_channel_iter_get_ctx(i);
+ struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
+ struct spdk_nvmf_poll_group *group = spdk_io_channel_get_ctx(ch);
+ uint16_t sqid = req->cmd->nvme_cmd.cdw10_bits.abort.sqid;
+ struct spdk_nvmf_qpair *qpair;
+
+ TAILQ_FOREACH(qpair, &group->qpairs, link) {
+ if (qpair->ctrlr == req->qpair->ctrlr && qpair->qid == sqid) {
+ /* Found the qpair */
+
+ nvmf_qpair_abort_request(qpair, req);
+
+ /* Return -1 for the status so the iteration across threads stops. */
+ spdk_for_each_channel_continue(i, -1);
+ return;
+ }
+ }
+
+ spdk_for_each_channel_continue(i, 0);
+}
+
+static int
+nvmf_ctrlr_abort(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+ rsp->cdw0 = 1U; /* Command not aborted */
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_SUCCESS;
+
+ /* Send a message to each poll group, searching for this ctrlr, sqid, and command. */
+ spdk_for_each_channel(req->qpair->ctrlr->subsys->tgt,
+ nvmf_ctrlr_abort_on_pg,
+ req,
+ nvmf_ctrlr_abort_done
+ );
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+int
+nvmf_ctrlr_abort_request(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_request *req_to_abort = req->req_to_abort;
+ struct spdk_bdev *bdev;
+ struct spdk_bdev_desc *desc;
+ struct spdk_io_channel *ch;
+ int rc;
+
+ assert(req_to_abort != NULL);
+
+ if (g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_ABORT].hdlr &&
+ nvmf_qpair_is_admin_queue(req_to_abort->qpair)) {
+ return g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_ABORT].hdlr(req);
+ }
+
+ rc = spdk_nvmf_request_get_bdev(req_to_abort->cmd->nvme_cmd.nsid, req_to_abort,
+ &bdev, &desc, &ch);
+ if (rc != 0) {
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ return spdk_nvmf_bdev_ctrlr_abort_cmd(bdev, desc, ch, req, req_to_abort);
+}
+
+static int
+get_features_generic(struct spdk_nvmf_request *req, uint32_t cdw0)
+{
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+ rsp->cdw0 = cdw0;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_ctrlr_get_features(struct spdk_nvmf_request *req)
+{
+ uint8_t feature;
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+
+ feature = cmd->cdw10_bits.get_features.fid;
+ switch (feature) {
+ case SPDK_NVME_FEAT_ARBITRATION:
+ return get_features_generic(req, ctrlr->feat.arbitration.raw);
+ case SPDK_NVME_FEAT_POWER_MANAGEMENT:
+ return get_features_generic(req, ctrlr->feat.power_management.raw);
+ case SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD:
+ return nvmf_ctrlr_get_features_temperature_threshold(req);
+ case SPDK_NVME_FEAT_ERROR_RECOVERY:
+ return get_features_generic(req, ctrlr->feat.error_recovery.raw);
+ case SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE:
+ return get_features_generic(req, ctrlr->feat.volatile_write_cache.raw);
+ case SPDK_NVME_FEAT_NUMBER_OF_QUEUES:
+ return get_features_generic(req, ctrlr->feat.number_of_queues.raw);
+ case SPDK_NVME_FEAT_WRITE_ATOMICITY:
+ return get_features_generic(req, ctrlr->feat.write_atomicity.raw);
+ case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
+ return get_features_generic(req, ctrlr->feat.async_event_configuration.raw);
+ case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER:
+ return get_features_generic(req, ctrlr->feat.keep_alive_timer.raw);
+ case SPDK_NVME_FEAT_HOST_IDENTIFIER:
+ return nvmf_ctrlr_get_features_host_identifier(req);
+ case SPDK_NVME_FEAT_HOST_RESERVE_MASK:
+ return nvmf_ctrlr_get_features_reservation_notification_mask(req);
+ case SPDK_NVME_FEAT_HOST_RESERVE_PERSIST:
+ return nvmf_ctrlr_get_features_reservation_persistence(req);
+ default:
+ SPDK_ERRLOG("Get Features command with unsupported feature ID 0x%02x\n", feature);
+ response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+}
+
+static int
+nvmf_ctrlr_set_features(struct spdk_nvmf_request *req)
+{
+ uint8_t feature, save;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+
+ /*
+ * Features are not saveable by the controller as indicated by
+ * ONCS field of the Identify Controller data.
+ * */
+ save = cmd->cdw10_bits.set_features.sv;
+ if (save) {
+ response->status.sc = SPDK_NVME_SC_FEATURE_ID_NOT_SAVEABLE;
+ response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ feature = cmd->cdw10_bits.set_features.fid;
+ switch (feature) {
+ case SPDK_NVME_FEAT_ARBITRATION:
+ return nvmf_ctrlr_set_features_arbitration(req);
+ case SPDK_NVME_FEAT_POWER_MANAGEMENT:
+ return nvmf_ctrlr_set_features_power_management(req);
+ case SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD:
+ return nvmf_ctrlr_set_features_temperature_threshold(req);
+ case SPDK_NVME_FEAT_ERROR_RECOVERY:
+ return nvmf_ctrlr_set_features_error_recovery(req);
+ case SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE:
+ return nvmf_ctrlr_set_features_volatile_write_cache(req);
+ case SPDK_NVME_FEAT_NUMBER_OF_QUEUES:
+ return nvmf_ctrlr_set_features_number_of_queues(req);
+ case SPDK_NVME_FEAT_WRITE_ATOMICITY:
+ return nvmf_ctrlr_set_features_write_atomicity(req);
+ case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
+ return nvmf_ctrlr_set_features_async_event_configuration(req);
+ case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER:
+ return nvmf_ctrlr_set_features_keep_alive_timer(req);
+ case SPDK_NVME_FEAT_HOST_IDENTIFIER:
+ return nvmf_ctrlr_set_features_host_identifier(req);
+ case SPDK_NVME_FEAT_HOST_RESERVE_MASK:
+ return nvmf_ctrlr_set_features_reservation_notification_mask(req);
+ case SPDK_NVME_FEAT_HOST_RESERVE_PERSIST:
+ return nvmf_ctrlr_set_features_reservation_persistence(req);
+ default:
+ SPDK_ERRLOG("Set Features command with unsupported feature ID 0x%02x\n", feature);
+ response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+}
+
+static int
+nvmf_ctrlr_keep_alive(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Keep Alive\n");
+ /*
+ * To handle keep alive just clear or reset the
+ * ctrlr based keep alive duration counter.
+ * When added, a separate timer based process
+ * will monitor if the time since last recorded
+ * keep alive has exceeded the max duration and
+ * take appropriate action.
+ */
+ ctrlr->last_keep_alive_tick = spdk_get_ticks();
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+int
+nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+ int rc;
+
+ if (ctrlr == NULL) {
+ SPDK_ERRLOG("Admin command sent before CONNECT\n");
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (ctrlr->vcprop.cc.bits.en != 1) {
+ SPDK_ERRLOG("Admin command sent to disabled controller\n");
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (req->data && spdk_nvme_opc_get_data_transfer(cmd->opc) == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+ memset(req->data, 0, req->length);
+ }
+
+ if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
+ /* Discovery controllers only support Get Log Page, Identify and Keep Alive. */
+ switch (cmd->opc) {
+ case SPDK_NVME_OPC_IDENTIFY:
+ case SPDK_NVME_OPC_GET_LOG_PAGE:
+ case SPDK_NVME_OPC_KEEP_ALIVE:
+ break;
+ default:
+ goto invalid_opcode;
+ }
+ }
+
+ /* Call a custom adm cmd handler if set. Aborts are handled in a different path (see nvmf_passthru_admin_cmd) */
+ if (g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].hdlr && cmd->opc != SPDK_NVME_OPC_ABORT) {
+ rc = g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].hdlr(req);
+ if (rc >= SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
+ /* The handler took care of this commmand */
+ return rc;
+ }
+ }
+
+ switch (cmd->opc) {
+ case SPDK_NVME_OPC_GET_LOG_PAGE:
+ return nvmf_ctrlr_get_log_page(req);
+ case SPDK_NVME_OPC_IDENTIFY:
+ return nvmf_ctrlr_identify(req);
+ case SPDK_NVME_OPC_ABORT:
+ return nvmf_ctrlr_abort(req);
+ case SPDK_NVME_OPC_GET_FEATURES:
+ return nvmf_ctrlr_get_features(req);
+ case SPDK_NVME_OPC_SET_FEATURES:
+ return nvmf_ctrlr_set_features(req);
+ case SPDK_NVME_OPC_ASYNC_EVENT_REQUEST:
+ return nvmf_ctrlr_async_event_request(req);
+ case SPDK_NVME_OPC_KEEP_ALIVE:
+ return nvmf_ctrlr_keep_alive(req);
+
+ case SPDK_NVME_OPC_CREATE_IO_SQ:
+ case SPDK_NVME_OPC_CREATE_IO_CQ:
+ case SPDK_NVME_OPC_DELETE_IO_SQ:
+ case SPDK_NVME_OPC_DELETE_IO_CQ:
+ /* Create and Delete I/O CQ/SQ not allowed in NVMe-oF */
+ goto invalid_opcode;
+
+ default:
+ goto invalid_opcode;
+ }
+
+invalid_opcode:
+ SPDK_ERRLOG("Unsupported admin opcode 0x%x\n", cmd->opc);
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_INVALID_OPCODE;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+int
+nvmf_ctrlr_process_fabrics_cmd(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ struct spdk_nvmf_capsule_cmd *cap_hdr;
+
+ cap_hdr = &req->cmd->nvmf_cmd;
+
+ if (qpair->ctrlr == NULL) {
+ /* No ctrlr established yet; the only valid command is Connect */
+ if (cap_hdr->fctype == SPDK_NVMF_FABRIC_COMMAND_CONNECT) {
+ return nvmf_ctrlr_cmd_connect(req);
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Got fctype 0x%x, expected Connect\n",
+ cap_hdr->fctype);
+ req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+ } else if (nvmf_qpair_is_admin_queue(qpair)) {
+ /*
+ * Controller session is established, and this is an admin queue.
+ * Disallow Connect and allow other fabrics commands.
+ */
+ switch (cap_hdr->fctype) {
+ case SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET:
+ return nvmf_property_set(req);
+ case SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET:
+ return nvmf_property_get(req);
+ default:
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "unknown fctype 0x%02x\n",
+ cap_hdr->fctype);
+ req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+ } else {
+ /* Controller session is established, and this is an I/O queue */
+ /* For now, no I/O-specific Fabrics commands are implemented (other than Connect) */
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Unexpected I/O fctype 0x%x\n", cap_hdr->fctype);
+ req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+}
+
+static inline int
+nvmf_ctrlr_async_event_notification(struct spdk_nvmf_ctrlr *ctrlr,
+ union spdk_nvme_async_event_completion *event)
+{
+ struct spdk_nvmf_request *req;
+ struct spdk_nvme_cpl *rsp;
+
+ assert(ctrlr->nr_aer_reqs > 0);
+
+ req = ctrlr->aer_req[--ctrlr->nr_aer_reqs];
+ rsp = &req->rsp->nvme_cpl;
+
+ rsp->cdw0 = event->raw;
+
+ _nvmf_request_complete(req);
+ ctrlr->aer_req[ctrlr->nr_aer_reqs] = NULL;
+
+ return 0;
+}
+
+int
+nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ union spdk_nvme_async_event_completion event = {0};
+
+ /* Users may disable the event notification */
+ if (!ctrlr->feat.async_event_configuration.bits.ns_attr_notice) {
+ return 0;
+ }
+
+ event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE;
+ event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED;
+ event.bits.log_page_identifier = SPDK_NVME_LOG_CHANGED_NS_LIST;
+
+ /* If there is no outstanding AER request, queue the event. Then
+ * if an AER is later submitted, this event can be sent as a
+ * response.
+ */
+ if (ctrlr->nr_aer_reqs == 0) {
+ if (ctrlr->notice_event.bits.async_event_type ==
+ SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) {
+ return 0;
+ }
+
+ ctrlr->notice_event.raw = event.raw;
+ return 0;
+ }
+
+ return nvmf_ctrlr_async_event_notification(ctrlr, &event);
+}
+
+void
+nvmf_ctrlr_async_event_reservation_notification(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ union spdk_nvme_async_event_completion event = {0};
+
+ if (!ctrlr->num_avail_log_pages) {
+ return;
+ }
+ event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_IO;
+ event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_RESERVATION_LOG_AVAIL;
+ event.bits.log_page_identifier = SPDK_NVME_LOG_RESERVATION_NOTIFICATION;
+
+ /* If there is no outstanding AER request, queue the event. Then
+ * if an AER is later submitted, this event can be sent as a
+ * response.
+ */
+ if (ctrlr->nr_aer_reqs == 0) {
+ if (ctrlr->reservation_event.bits.async_event_type ==
+ SPDK_NVME_ASYNC_EVENT_TYPE_IO) {
+ return;
+ }
+
+ ctrlr->reservation_event.raw = event.raw;
+ return;
+ }
+
+ nvmf_ctrlr_async_event_notification(ctrlr, &event);
+}
+
+void
+nvmf_qpair_free_aer(struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+ int i;
+
+ if (!nvmf_qpair_is_admin_queue(qpair)) {
+ return;
+ }
+
+ for (i = 0; i < ctrlr->nr_aer_reqs; i++) {
+ spdk_nvmf_request_free(ctrlr->aer_req[i]);
+ ctrlr->aer_req[i] = NULL;
+ }
+
+ ctrlr->nr_aer_reqs = 0;
+}
+
+void
+nvmf_ctrlr_abort_aer(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ struct spdk_nvmf_request *req;
+ int i;
+
+ for (i = 0; i < ctrlr->nr_aer_reqs; i++) {
+ req = ctrlr->aer_req[i];
+
+ req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
+ _nvmf_request_complete(req);
+
+ ctrlr->aer_req[i] = NULL;
+ }
+
+ ctrlr->nr_aer_reqs = 0;
+}
+
+static void
+_nvmf_ctrlr_add_reservation_log(void *ctx)
+{
+ struct spdk_nvmf_reservation_log *log = (struct spdk_nvmf_reservation_log *)ctx;
+ struct spdk_nvmf_ctrlr *ctrlr = log->ctrlr;
+
+ ctrlr->log_page_count++;
+
+ /* Maximum number of queued log pages is 255 */
+ if (ctrlr->num_avail_log_pages == 0xff) {
+ struct spdk_nvmf_reservation_log *entry;
+ entry = TAILQ_LAST(&ctrlr->log_head, log_page_head);
+ entry->log.log_page_count = ctrlr->log_page_count;
+ free(log);
+ return;
+ }
+
+ log->log.log_page_count = ctrlr->log_page_count;
+ log->log.num_avail_log_pages = ctrlr->num_avail_log_pages++;
+ TAILQ_INSERT_TAIL(&ctrlr->log_head, log, link);
+
+ nvmf_ctrlr_async_event_reservation_notification(ctrlr);
+}
+
+void
+nvmf_ctrlr_reservation_notice_log(struct spdk_nvmf_ctrlr *ctrlr,
+ struct spdk_nvmf_ns *ns,
+ enum spdk_nvme_reservation_notification_log_page_type type)
+{
+ struct spdk_nvmf_reservation_log *log;
+
+ switch (type) {
+ case SPDK_NVME_RESERVATION_LOG_PAGE_EMPTY:
+ return;
+ case SPDK_NVME_REGISTRATION_PREEMPTED:
+ if (ns->mask & SPDK_NVME_REGISTRATION_PREEMPTED_MASK) {
+ return;
+ }
+ break;
+ case SPDK_NVME_RESERVATION_RELEASED:
+ if (ns->mask & SPDK_NVME_RESERVATION_RELEASED_MASK) {
+ return;
+ }
+ break;
+ case SPDK_NVME_RESERVATION_PREEMPTED:
+ if (ns->mask & SPDK_NVME_RESERVATION_PREEMPTED_MASK) {
+ return;
+ }
+ break;
+ default:
+ return;
+ }
+
+ log = calloc(1, sizeof(*log));
+ if (!log) {
+ SPDK_ERRLOG("Alloc log page failed, ignore the log\n");
+ return;
+ }
+ log->ctrlr = ctrlr;
+ log->log.type = type;
+ log->log.nsid = ns->nsid;
+
+ spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_add_reservation_log, log);
+}
+
+/* Check from subsystem poll group's namespace information data structure */
+static bool
+nvmf_ns_info_ctrlr_is_registrant(struct spdk_nvmf_subsystem_pg_ns_info *ns_info,
+ struct spdk_nvmf_ctrlr *ctrlr)
+{
+ uint32_t i;
+
+ for (i = 0; i < SPDK_NVMF_MAX_NUM_REGISTRANTS; i++) {
+ if (!spdk_uuid_compare(&ns_info->reg_hostid[i], &ctrlr->hostid)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Check the NVMe command is permitted or not for current controller(Host).
+ */
+static int
+nvmf_ns_reservation_request_check(struct spdk_nvmf_subsystem_pg_ns_info *ns_info,
+ struct spdk_nvmf_ctrlr *ctrlr,
+ struct spdk_nvmf_request *req)
+{
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ enum spdk_nvme_reservation_type rtype = ns_info->rtype;
+ uint8_t status = SPDK_NVME_SC_SUCCESS;
+ uint8_t racqa;
+ bool is_registrant;
+
+ /* No valid reservation */
+ if (!rtype) {
+ return 0;
+ }
+
+ is_registrant = nvmf_ns_info_ctrlr_is_registrant(ns_info, ctrlr);
+ /* All registrants type and current ctrlr is a valid registrant */
+ if ((rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_ALL_REGS ||
+ rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS) && is_registrant) {
+ return 0;
+ } else if (!spdk_uuid_compare(&ns_info->holder_id, &ctrlr->hostid)) {
+ return 0;
+ }
+
+ /* Non-holder for current controller */
+ switch (cmd->opc) {
+ case SPDK_NVME_OPC_READ:
+ case SPDK_NVME_OPC_COMPARE:
+ if (rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS) {
+ status = SPDK_NVME_SC_RESERVATION_CONFLICT;
+ goto exit;
+ }
+ if ((rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_REG_ONLY ||
+ rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS) && !is_registrant) {
+ status = SPDK_NVME_SC_RESERVATION_CONFLICT;
+ }
+ break;
+ case SPDK_NVME_OPC_FLUSH:
+ case SPDK_NVME_OPC_WRITE:
+ case SPDK_NVME_OPC_WRITE_UNCORRECTABLE:
+ case SPDK_NVME_OPC_WRITE_ZEROES:
+ case SPDK_NVME_OPC_DATASET_MANAGEMENT:
+ if (rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE ||
+ rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS) {
+ status = SPDK_NVME_SC_RESERVATION_CONFLICT;
+ goto exit;
+ }
+ if (!is_registrant) {
+ status = SPDK_NVME_SC_RESERVATION_CONFLICT;
+ }
+ break;
+ case SPDK_NVME_OPC_RESERVATION_ACQUIRE:
+ racqa = cmd->cdw10_bits.resv_acquire.racqa;
+ if (racqa == SPDK_NVME_RESERVE_ACQUIRE) {
+ status = SPDK_NVME_SC_RESERVATION_CONFLICT;
+ goto exit;
+ }
+ if (!is_registrant) {
+ status = SPDK_NVME_SC_RESERVATION_CONFLICT;
+ }
+ break;
+ case SPDK_NVME_OPC_RESERVATION_RELEASE:
+ if (!is_registrant) {
+ status = SPDK_NVME_SC_RESERVATION_CONFLICT;
+ }
+ break;
+ default:
+ break;
+ }
+
+exit:
+ req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req->rsp->nvme_cpl.status.sc = status;
+ if (status == SPDK_NVME_SC_RESERVATION_CONFLICT) {
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+static int
+nvmf_ctrlr_process_io_fused_cmd(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
+ struct spdk_bdev_desc *desc, struct spdk_io_channel *ch)
+{
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ struct spdk_nvmf_request *first_fused_req = req->qpair->first_fused_req;
+ int rc;
+
+ if (cmd->fuse == SPDK_NVME_CMD_FUSE_FIRST) {
+ /* first fused operation (should be compare) */
+ if (first_fused_req != NULL) {
+ struct spdk_nvme_cpl *fused_response = &first_fused_req->rsp->nvme_cpl;
+
+ SPDK_ERRLOG("Wrong sequence of fused operations\n");
+
+ /* abort req->qpair->first_fused_request and continue with new fused command */
+ fused_response->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
+ fused_response->status.sct = SPDK_NVME_SCT_GENERIC;
+ _nvmf_request_complete(first_fused_req);
+ } else if (cmd->opc != SPDK_NVME_OPC_COMPARE) {
+ SPDK_ERRLOG("Wrong op code of fused operations\n");
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_OPCODE;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ req->qpair->first_fused_req = req;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ } else if (cmd->fuse == SPDK_NVME_CMD_FUSE_SECOND) {
+ /* second fused operation (should be write) */
+ if (first_fused_req == NULL) {
+ SPDK_ERRLOG("Wrong sequence of fused operations\n");
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ } else if (cmd->opc != SPDK_NVME_OPC_WRITE) {
+ struct spdk_nvme_cpl *fused_response = &first_fused_req->rsp->nvme_cpl;
+
+ SPDK_ERRLOG("Wrong op code of fused operations\n");
+
+ /* abort req->qpair->first_fused_request and fail current command */
+ fused_response->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
+ fused_response->status.sct = SPDK_NVME_SCT_GENERIC;
+ _nvmf_request_complete(first_fused_req);
+
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_OPCODE;
+ req->qpair->first_fused_req = NULL;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ /* save request of first command to generate response later */
+ req->first_fused_req = first_fused_req;
+ req->qpair->first_fused_req = NULL;
+ } else {
+ SPDK_ERRLOG("Invalid fused command fuse field.\n");
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ rc = nvmf_bdev_ctrlr_compare_and_write_cmd(bdev, desc, ch, req->first_fused_req, req);
+
+ if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
+ if (spdk_nvme_cpl_is_error(rsp)) {
+ struct spdk_nvme_cpl *fused_response = &first_fused_req->rsp->nvme_cpl;
+
+ fused_response->status = rsp->status;
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_ABORTED_FAILED_FUSED;
+ /* Complete first of fused commands. Second will be completed by upper layer */
+ _nvmf_request_complete(first_fused_req);
+ req->first_fused_req = NULL;
+ }
+ }
+
+ return rc;
+}
+
+int
+nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req)
+{
+ uint32_t nsid;
+ struct spdk_nvmf_ns *ns;
+ struct spdk_bdev *bdev;
+ struct spdk_bdev_desc *desc;
+ struct spdk_io_channel *ch;
+ struct spdk_nvmf_poll_group *group = req->qpair->group;
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+ struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
+
+ /* pre-set response details for this command */
+ response->status.sc = SPDK_NVME_SC_SUCCESS;
+ nsid = cmd->nsid;
+
+ if (spdk_unlikely(ctrlr == NULL)) {
+ SPDK_ERRLOG("I/O command sent before CONNECT\n");
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (spdk_unlikely(ctrlr->vcprop.cc.bits.en != 1)) {
+ SPDK_ERRLOG("I/O command sent to disabled controller\n");
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
+ if (ns == NULL || ns->bdev == NULL) {
+ SPDK_ERRLOG("Unsuccessful query for nsid %u\n", cmd->nsid);
+ response->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
+ response->status.dnr = 1;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ /* scan-build falsely reporting dereference of null pointer */
+ assert(group != NULL && group->sgroups != NULL);
+ ns_info = &group->sgroups[ctrlr->subsys->id].ns_info[nsid - 1];
+ if (nvmf_ns_reservation_request_check(ns_info, ctrlr, req)) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Reservation Conflict for nsid %u, opcode %u\n",
+ cmd->nsid, cmd->opc);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ bdev = ns->bdev;
+ desc = ns->desc;
+ ch = ns_info->channel;
+
+ if (spdk_unlikely(cmd->fuse & SPDK_NVME_CMD_FUSE_MASK)) {
+ return nvmf_ctrlr_process_io_fused_cmd(req, bdev, desc, ch);
+ } else if (spdk_unlikely(req->qpair->first_fused_req != NULL)) {
+ struct spdk_nvme_cpl *fused_response = &req->qpair->first_fused_req->rsp->nvme_cpl;
+
+ SPDK_ERRLOG("Expected second of fused commands - failing first of fused commands\n");
+
+ /* abort req->qpair->first_fused_request and continue with new command */
+ fused_response->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
+ fused_response->status.sct = SPDK_NVME_SCT_GENERIC;
+ _nvmf_request_complete(req->qpair->first_fused_req);
+ req->qpair->first_fused_req = NULL;
+ }
+
+ switch (cmd->opc) {
+ case SPDK_NVME_OPC_READ:
+ return nvmf_bdev_ctrlr_read_cmd(bdev, desc, ch, req);
+ case SPDK_NVME_OPC_WRITE:
+ return nvmf_bdev_ctrlr_write_cmd(bdev, desc, ch, req);
+ case SPDK_NVME_OPC_COMPARE:
+ return nvmf_bdev_ctrlr_compare_cmd(bdev, desc, ch, req);
+ case SPDK_NVME_OPC_WRITE_ZEROES:
+ return nvmf_bdev_ctrlr_write_zeroes_cmd(bdev, desc, ch, req);
+ case SPDK_NVME_OPC_FLUSH:
+ return nvmf_bdev_ctrlr_flush_cmd(bdev, desc, ch, req);
+ case SPDK_NVME_OPC_DATASET_MANAGEMENT:
+ return nvmf_bdev_ctrlr_dsm_cmd(bdev, desc, ch, req);
+ case SPDK_NVME_OPC_RESERVATION_REGISTER:
+ case SPDK_NVME_OPC_RESERVATION_ACQUIRE:
+ case SPDK_NVME_OPC_RESERVATION_RELEASE:
+ case SPDK_NVME_OPC_RESERVATION_REPORT:
+ spdk_thread_send_msg(ctrlr->subsys->thread, nvmf_ns_reservation_request, req);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ default:
+ return nvmf_bdev_ctrlr_nvme_passthru_io(bdev, desc, ch, req);
+ }
+}
+
+static void
+nvmf_qpair_request_cleanup(struct spdk_nvmf_qpair *qpair)
+{
+ if (qpair->state == SPDK_NVMF_QPAIR_DEACTIVATING) {
+ assert(qpair->state_cb != NULL);
+
+ if (TAILQ_EMPTY(&qpair->outstanding)) {
+ qpair->state_cb(qpair->state_cb_arg, 0);
+ }
+ }
+}
+
+int
+spdk_nvmf_request_free(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+
+ TAILQ_REMOVE(&qpair->outstanding, req, link);
+ if (nvmf_transport_req_free(req)) {
+ SPDK_ERRLOG("Unable to free transport level request resources.\n");
+ }
+
+ nvmf_qpair_request_cleanup(qpair);
+
+ return 0;
+}
+
+static void
+_nvmf_request_complete(void *ctx)
+{
+ struct spdk_nvmf_request *req = ctx;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ struct spdk_nvmf_qpair *qpair;
+ struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;
+ bool is_aer = false;
+
+ rsp->sqid = 0;
+ rsp->status.p = 0;
+ rsp->cid = req->cmd->nvme_cmd.cid;
+
+ qpair = req->qpair;
+ if (qpair->ctrlr) {
+ sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
+ assert(sgroup != NULL);
+ is_aer = req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST;
+ } else if (spdk_unlikely(nvmf_request_is_fabric_connect(req))) {
+ sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
+ }
+
+ if (SPDK_DEBUGLOG_FLAG_ENABLED("nvmf")) {
+ spdk_nvme_print_completion(qpair->qid, rsp);
+ }
+
+ TAILQ_REMOVE(&qpair->outstanding, req, link);
+ if (nvmf_transport_req_complete(req)) {
+ SPDK_ERRLOG("Transport request completion error!\n");
+ }
+
+ /* AER cmd is an exception */
+ if (sgroup && !is_aer) {
+ assert(sgroup->io_outstanding > 0);
+ sgroup->io_outstanding--;
+ if (sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSING &&
+ sgroup->io_outstanding == 0) {
+ sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
+ sgroup->cb_fn(sgroup->cb_arg, 0);
+ }
+ }
+
+ nvmf_qpair_request_cleanup(qpair);
+}
+
+int
+spdk_nvmf_request_complete(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+
+ if (spdk_likely(qpair->group->thread == spdk_get_thread())) {
+ _nvmf_request_complete(req);
+ } else {
+ spdk_thread_send_msg(qpair->group->thread,
+ _nvmf_request_complete, req);
+ }
+
+ return 0;
+}
+
+static void
+_nvmf_request_exec(struct spdk_nvmf_request *req,
+ struct spdk_nvmf_subsystem_poll_group *sgroup)
+{
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ enum spdk_nvmf_request_exec_status status;
+
+ if (SPDK_DEBUGLOG_FLAG_ENABLED("nvmf")) {
+ spdk_nvme_print_command(qpair->qid, &req->cmd->nvme_cmd);
+ }
+
+ if (sgroup) {
+ sgroup->io_outstanding++;
+ }
+
+ /* Place the request on the outstanding list so we can keep track of it */
+ TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
+
+ if (spdk_unlikely(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC)) {
+ status = nvmf_ctrlr_process_fabrics_cmd(req);
+ } else if (spdk_unlikely(nvmf_qpair_is_admin_queue(qpair))) {
+ status = nvmf_ctrlr_process_admin_cmd(req);
+ } else {
+ status = nvmf_ctrlr_process_io_cmd(req);
+ }
+
+ if (status == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
+ _nvmf_request_complete(req);
+ }
+}
+
+void
+spdk_nvmf_request_exec_fabrics(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;
+
+ assert(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC);
+
+ if (qpair->ctrlr) {
+ sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
+ assert(sgroup != NULL);
+ } else {
+ sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
+ }
+
+ _nvmf_request_exec(req, sgroup);
+}
+
+void
+spdk_nvmf_request_exec(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;
+
+ if (qpair->ctrlr) {
+ sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
+ assert(sgroup != NULL);
+ } else if (spdk_unlikely(nvmf_request_is_fabric_connect(req))) {
+ sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
+ }
+
+ if (qpair->state != SPDK_NVMF_QPAIR_ACTIVE) {
+ req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+ /* Place the request on the outstanding list so we can keep track of it */
+ TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
+ /* Still increment io_outstanding because request_complete decrements it */
+ if (sgroup != NULL) {
+ sgroup->io_outstanding++;
+ }
+ _nvmf_request_complete(req);
+ return;
+ }
+
+ /* Check if the subsystem is paused (if there is a subsystem) */
+ if (sgroup != NULL) {
+ if (sgroup->state != SPDK_NVMF_SUBSYSTEM_ACTIVE) {
+ /* The subsystem is not currently active. Queue this request. */
+ TAILQ_INSERT_TAIL(&sgroup->queued, req, link);
+ return;
+ }
+ }
+
+ _nvmf_request_exec(req, sgroup);
+}
+
+static bool
+nvmf_ctrlr_get_dif_ctx(struct spdk_nvmf_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd,
+ struct spdk_dif_ctx *dif_ctx)
+{
+ struct spdk_nvmf_ns *ns;
+ struct spdk_bdev *bdev;
+
+ if (ctrlr == NULL || cmd == NULL) {
+ return false;
+ }
+
+ ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
+ if (ns == NULL || ns->bdev == NULL) {
+ return false;
+ }
+
+ bdev = ns->bdev;
+
+ switch (cmd->opc) {
+ case SPDK_NVME_OPC_READ:
+ case SPDK_NVME_OPC_WRITE:
+ case SPDK_NVME_OPC_COMPARE:
+ return nvmf_bdev_ctrlr_get_dif_ctx(bdev, cmd, dif_ctx);
+ default:
+ break;
+ }
+
+ return false;
+}
+
+bool
+spdk_nvmf_request_get_dif_ctx(struct spdk_nvmf_request *req, struct spdk_dif_ctx *dif_ctx)
+{
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+
+ if (spdk_likely(ctrlr == NULL || !ctrlr->dif_insert_or_strip)) {
+ return false;
+ }
+
+ if (spdk_unlikely(qpair->state != SPDK_NVMF_QPAIR_ACTIVE)) {
+ return false;
+ }
+
+ if (spdk_unlikely(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC)) {
+ return false;
+ }
+
+ if (spdk_unlikely(nvmf_qpair_is_admin_queue(qpair))) {
+ return false;
+ }
+
+ return nvmf_ctrlr_get_dif_ctx(ctrlr, &req->cmd->nvme_cmd, dif_ctx);
+}
+
+void
+spdk_nvmf_set_custom_admin_cmd_hdlr(uint8_t opc, spdk_nvmf_custom_cmd_hdlr hdlr)
+{
+ g_nvmf_custom_admin_cmd_hdlrs[opc].hdlr = hdlr;
+}
+
+static int
+nvmf_passthru_admin_cmd(struct spdk_nvmf_request *req)
+{
+ struct spdk_bdev *bdev;
+ struct spdk_bdev_desc *desc;
+ struct spdk_io_channel *ch;
+ struct spdk_nvme_cmd *cmd = spdk_nvmf_request_get_cmd(req);
+ struct spdk_nvme_cpl *response = spdk_nvmf_request_get_response(req);
+ uint32_t bdev_nsid;
+ int rc;
+
+ if (g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].nsid == 0) {
+ bdev_nsid = cmd->nsid;
+ } else {
+ bdev_nsid = g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].nsid;
+ }
+
+ rc = spdk_nvmf_request_get_bdev(bdev_nsid, req, &bdev, &desc, &ch);
+ if (rc) {
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+ return spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(bdev, desc, ch, req, NULL);
+}
+
+void
+spdk_nvmf_set_passthru_admin_cmd(uint8_t opc, uint32_t forward_nsid)
+{
+ g_nvmf_custom_admin_cmd_hdlrs[opc].hdlr = nvmf_passthru_admin_cmd;
+ g_nvmf_custom_admin_cmd_hdlrs[opc].nsid = forward_nsid;
+}
+
+int
+spdk_nvmf_request_get_bdev(uint32_t nsid, struct spdk_nvmf_request *req,
+ struct spdk_bdev **bdev, struct spdk_bdev_desc **desc, struct spdk_io_channel **ch)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct spdk_nvmf_ns *ns;
+ struct spdk_nvmf_poll_group *group = req->qpair->group;
+ struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
+
+ *bdev = NULL;
+ *desc = NULL;
+ *ch = NULL;
+
+ ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
+ if (ns == NULL || ns->bdev == NULL) {
+ return -EINVAL;
+ }
+
+ assert(group != NULL && group->sgroups != NULL);
+ ns_info = &group->sgroups[ctrlr->subsys->id].ns_info[nsid - 1];
+ *bdev = ns->bdev;
+ *desc = ns->desc;
+ *ch = ns_info->channel;
+
+ return 0;
+}
+
+struct spdk_nvmf_ctrlr *spdk_nvmf_request_get_ctrlr(struct spdk_nvmf_request *req)
+{
+ return req->qpair->ctrlr;
+}
+
+struct spdk_nvme_cmd *spdk_nvmf_request_get_cmd(struct spdk_nvmf_request *req)
+{
+ return &req->cmd->nvme_cmd;
+}
+
+struct spdk_nvme_cpl *spdk_nvmf_request_get_response(struct spdk_nvmf_request *req)
+{
+ return &req->rsp->nvme_cpl;
+}
+
+struct spdk_nvmf_subsystem *spdk_nvmf_request_get_subsystem(struct spdk_nvmf_request *req)
+{
+ return req->qpair->ctrlr->subsys;
+}
+
+void spdk_nvmf_request_get_data(struct spdk_nvmf_request *req, void **data, uint32_t *length)
+{
+ *data = req->data;
+ *length = req->length;
+}
+
+struct spdk_nvmf_subsystem *spdk_nvmf_ctrlr_get_subsystem(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ return ctrlr->subsys;
+}
+
+uint16_t spdk_nvmf_ctrlr_get_id(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ return ctrlr->cntlid;
+}
+
+struct spdk_nvmf_request *spdk_nvmf_request_get_req_to_abort(struct spdk_nvmf_request *req)
+{
+ return req->req_to_abort;
+}
diff --git a/src/spdk/lib/nvmf/ctrlr_bdev.c b/src/spdk/lib/nvmf/ctrlr_bdev.c
new file mode 100644
index 000000000..13e0a4309
--- /dev/null
+++ b/src/spdk/lib/nvmf/ctrlr_bdev.c
@@ -0,0 +1,761 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "nvmf_internal.h"
+
+#include "spdk/bdev.h"
+#include "spdk/endian.h"
+#include "spdk/thread.h"
+#include "spdk/likely.h"
+#include "spdk/nvme.h"
+#include "spdk/nvmf_cmd.h"
+#include "spdk/nvmf_spec.h"
+#include "spdk/trace.h"
+#include "spdk/scsi_spec.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+
+#include "spdk_internal/log.h"
+
+static bool
+nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
+ enum spdk_bdev_io_type io_type)
+{
+ struct spdk_nvmf_ns *ns;
+
+ for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
+ ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
+ if (ns->bdev == NULL) {
+ continue;
+ }
+
+ if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF,
+ "Subsystem %s namespace %u (%s) does not support io_type %d\n",
+ spdk_nvmf_subsystem_get_nqn(subsystem),
+ ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
+ return false;
+ }
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "All devices in Subsystem %s support io_type %d\n",
+ spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
+ return true;
+}
+
+bool
+nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
+}
+
+bool
+nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
+}
+
+static void
+nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
+ void *cb_arg)
+{
+ struct spdk_nvmf_request *req = cb_arg;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+ int first_sc = 0, first_sct = 0, second_sc = 0, second_sct = 0;
+ uint32_t cdw0 = 0;
+ struct spdk_nvmf_request *first_req = req->first_fused_req;
+
+ if (spdk_unlikely(first_req != NULL)) {
+ /* fused commands - get status for both operations */
+ struct spdk_nvme_cpl *fused_response = &first_req->rsp->nvme_cpl;
+
+ spdk_bdev_io_get_nvme_fused_status(bdev_io, &cdw0, &second_sct, &second_sc, &first_sct, &first_sc);
+ fused_response->cdw0 = cdw0;
+ fused_response->status.sc = second_sc;
+ fused_response->status.sct = second_sct;
+
+ /* first request should be completed */
+ spdk_nvmf_request_complete(first_req);
+ req->first_fused_req = NULL;
+ } else {
+ spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &first_sct, &first_sc);
+ }
+
+ response->cdw0 = cdw0;
+ response->status.sc = first_sc;
+ response->status.sct = first_sct;
+
+ spdk_nvmf_request_complete(req);
+ spdk_bdev_free_io(bdev_io);
+}
+
+static void
+nvmf_bdev_ctrlr_complete_admin_cmd(struct spdk_bdev_io *bdev_io, bool success,
+ void *cb_arg)
+{
+ struct spdk_nvmf_request *req = cb_arg;
+
+ if (req->cmd_cb_fn) {
+ req->cmd_cb_fn(req);
+ }
+
+ nvmf_bdev_ctrlr_complete_cmd(bdev_io, success, req);
+}
+
+void
+nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
+ bool dif_insert_or_strip)
+{
+ struct spdk_bdev *bdev = ns->bdev;
+ uint64_t num_blocks;
+
+ num_blocks = spdk_bdev_get_num_blocks(bdev);
+
+ nsdata->nsze = num_blocks;
+ nsdata->ncap = num_blocks;
+ nsdata->nuse = num_blocks;
+ nsdata->nlbaf = 0;
+ nsdata->flbas.format = 0;
+ nsdata->nacwu = spdk_bdev_get_acwu(bdev);
+ if (!dif_insert_or_strip) {
+ nsdata->lbaf[0].ms = spdk_bdev_get_md_size(bdev);
+ nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev));
+ if (nsdata->lbaf[0].ms != 0) {
+ nsdata->flbas.extended = 1;
+ nsdata->mc.extended = 1;
+ nsdata->mc.pointer = 0;
+ nsdata->dps.md_start = spdk_bdev_is_dif_head_of_md(bdev);
+
+ switch (spdk_bdev_get_dif_type(bdev)) {
+ case SPDK_DIF_TYPE1:
+ nsdata->dpc.pit1 = 1;
+ nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE1;
+ break;
+ case SPDK_DIF_TYPE2:
+ nsdata->dpc.pit2 = 1;
+ nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE2;
+ break;
+ case SPDK_DIF_TYPE3:
+ nsdata->dpc.pit3 = 1;
+ nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE3;
+ break;
+ default:
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Protection Disabled\n");
+ nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_DISABLE;
+ break;
+ }
+ }
+ } else {
+ nsdata->lbaf[0].ms = 0;
+ nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_data_block_size(bdev));
+ }
+ nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
+ nsdata->nmic.can_share = 1;
+ if (ns->ptpl_file != NULL) {
+ nsdata->nsrescap.rescap.persist = 1;
+ }
+ nsdata->nsrescap.rescap.write_exclusive = 1;
+ nsdata->nsrescap.rescap.exclusive_access = 1;
+ nsdata->nsrescap.rescap.write_exclusive_reg_only = 1;
+ nsdata->nsrescap.rescap.exclusive_access_reg_only = 1;
+ nsdata->nsrescap.rescap.write_exclusive_all_reg = 1;
+ nsdata->nsrescap.rescap.exclusive_access_all_reg = 1;
+ nsdata->nsrescap.rescap.ignore_existing_key = 1;
+
+ SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
+ memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
+
+ SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
+ memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
+}
+
+static void
+nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
+ uint64_t *num_blocks)
+{
+ /* SLBA: CDW10 and CDW11 */
+ *start_lba = from_le64(&cmd->cdw10);
+
+ /* NLB: CDW12 bits 15:00, 0's based */
+ *num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
+}
+
+static bool
+nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
+ uint64_t io_num_blocks)
+{
+ if (io_start_lba + io_num_blocks > bdev_num_blocks ||
+ io_start_lba + io_num_blocks < io_start_lba) {
+ return false;
+ }
+
+ return true;
+}
+
+static void
+nvmf_ctrlr_process_io_cmd_resubmit(void *arg)
+{
+ struct spdk_nvmf_request *req = arg;
+
+ nvmf_ctrlr_process_io_cmd(req);
+}
+
+static void
+nvmf_ctrlr_process_admin_cmd_resubmit(void *arg)
+{
+ struct spdk_nvmf_request *req = arg;
+
+ nvmf_ctrlr_process_admin_cmd(req);
+}
+
+static void
+nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
+ struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg)
+{
+ int rc;
+
+ req->bdev_io_wait.bdev = bdev;
+ req->bdev_io_wait.cb_fn = cb_fn;
+ req->bdev_io_wait.cb_arg = cb_arg;
+
+ rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait);
+ if (rc != 0) {
+ assert(false);
+ }
+ req->qpair->group->stat.pending_bdev_io++;
+}
+
+int
+nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
+{
+ uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
+ uint32_t block_size = spdk_bdev_get_block_size(bdev);
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ uint64_t start_lba;
+ uint64_t num_blocks;
+ int rc;
+
+ nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
+
+ if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
+ SPDK_ERRLOG("end of media\n");
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (spdk_unlikely(num_blocks * block_size > req->length)) {
+ SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
+ num_blocks, block_size, req->length);
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ rc = spdk_bdev_readv_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
+ nvmf_bdev_ctrlr_complete_cmd, req);
+ if (spdk_unlikely(rc)) {
+ if (rc == -ENOMEM) {
+ nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+int
+nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
+{
+ uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
+ uint32_t block_size = spdk_bdev_get_block_size(bdev);
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ uint64_t start_lba;
+ uint64_t num_blocks;
+ int rc;
+
+ nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
+
+ if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
+ SPDK_ERRLOG("end of media\n");
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (spdk_unlikely(num_blocks * block_size > req->length)) {
+ SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
+ num_blocks, block_size, req->length);
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ rc = spdk_bdev_writev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
+ nvmf_bdev_ctrlr_complete_cmd, req);
+ if (spdk_unlikely(rc)) {
+ if (rc == -ENOMEM) {
+ nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+int
+nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
+{
+ uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
+ uint32_t block_size = spdk_bdev_get_block_size(bdev);
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ uint64_t start_lba;
+ uint64_t num_blocks;
+ int rc;
+
+ nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
+
+ if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
+ SPDK_ERRLOG("end of media\n");
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (spdk_unlikely(num_blocks * block_size > req->length)) {
+ SPDK_ERRLOG("Compare NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
+ num_blocks, block_size, req->length);
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ rc = spdk_bdev_comparev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
+ nvmf_bdev_ctrlr_complete_cmd, req);
+ if (spdk_unlikely(rc)) {
+ if (rc == -ENOMEM) {
+ nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+int
+nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req)
+{
+ uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
+ uint32_t block_size = spdk_bdev_get_block_size(bdev);
+ struct spdk_nvme_cmd *cmp_cmd = &cmp_req->cmd->nvme_cmd;
+ struct spdk_nvme_cmd *write_cmd = &write_req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &write_req->rsp->nvme_cpl;
+ uint64_t write_start_lba, cmp_start_lba;
+ uint64_t write_num_blocks, cmp_num_blocks;
+ int rc;
+
+ nvmf_bdev_ctrlr_get_rw_params(cmp_cmd, &cmp_start_lba, &cmp_num_blocks);
+ nvmf_bdev_ctrlr_get_rw_params(write_cmd, &write_start_lba, &write_num_blocks);
+
+ if (spdk_unlikely(write_start_lba != cmp_start_lba || write_num_blocks != cmp_num_blocks)) {
+ SPDK_ERRLOG("Fused command start lba / num blocks mismatch\n");
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, write_start_lba,
+ write_num_blocks))) {
+ SPDK_ERRLOG("end of media\n");
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (spdk_unlikely(write_num_blocks * block_size > write_req->length)) {
+ SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
+ write_num_blocks, block_size, write_req->length);
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ rc = spdk_bdev_comparev_and_writev_blocks(desc, ch, cmp_req->iov, cmp_req->iovcnt, write_req->iov,
+ write_req->iovcnt, write_start_lba, write_num_blocks, nvmf_bdev_ctrlr_complete_cmd, write_req);
+ if (spdk_unlikely(rc)) {
+ if (rc == -ENOMEM) {
+ nvmf_bdev_ctrl_queue_io(cmp_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, cmp_req);
+ nvmf_bdev_ctrl_queue_io(write_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, write_req);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+int
+nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
+{
+ uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ uint64_t start_lba;
+ uint64_t num_blocks;
+ int rc;
+
+ nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
+
+ if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
+ SPDK_ERRLOG("end of media\n");
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
+ nvmf_bdev_ctrlr_complete_cmd, req);
+ if (spdk_unlikely(rc)) {
+ if (rc == -ENOMEM) {
+ nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+ rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+int
+nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
+{
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+ int rc;
+
+ /* As for NVMeoF controller, SPDK always set volatile write
+ * cache bit to 1, return success for those block devices
+ * which can't support FLUSH command.
+ */
+ if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_SUCCESS;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
+ nvmf_bdev_ctrlr_complete_cmd, req);
+ if (spdk_unlikely(rc)) {
+ if (rc == -ENOMEM) {
+ nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+ response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+struct nvmf_bdev_ctrlr_unmap {
+ struct spdk_nvmf_request *req;
+ uint32_t count;
+ struct spdk_bdev_desc *desc;
+ struct spdk_bdev *bdev;
+ struct spdk_io_channel *ch;
+ uint32_t range_index;
+};
+
+static void
+nvmf_bdev_ctrlr_unmap_cpl(struct spdk_bdev_io *bdev_io, bool success,
+ void *cb_arg)
+{
+ struct nvmf_bdev_ctrlr_unmap *unmap_ctx = cb_arg;
+ struct spdk_nvmf_request *req = unmap_ctx->req;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+ int sc, sct;
+ uint32_t cdw0;
+
+ unmap_ctx->count--;
+
+ if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
+ response->status.sc == SPDK_NVME_SC_SUCCESS) {
+ spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
+ response->cdw0 = cdw0;
+ response->status.sc = sc;
+ response->status.sct = sct;
+ }
+
+ if (unmap_ctx->count == 0) {
+ spdk_nvmf_request_complete(req);
+ free(unmap_ctx);
+ }
+ spdk_bdev_free_io(bdev_io);
+}
+
+static int
+nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
+ struct nvmf_bdev_ctrlr_unmap *unmap_ctx);
+static void
+nvmf_bdev_ctrlr_unmap_resubmit(void *arg)
+{
+ struct nvmf_bdev_ctrlr_unmap *unmap_ctx = arg;
+ struct spdk_nvmf_request *req = unmap_ctx->req;
+ struct spdk_bdev_desc *desc = unmap_ctx->desc;
+ struct spdk_bdev *bdev = unmap_ctx->bdev;
+ struct spdk_io_channel *ch = unmap_ctx->ch;
+
+ nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, unmap_ctx);
+}
+
+static int
+nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
+ struct nvmf_bdev_ctrlr_unmap *unmap_ctx)
+{
+ uint16_t nr, i;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+ struct spdk_nvme_dsm_range *dsm_range;
+ uint64_t lba;
+ uint32_t lba_count;
+ int rc;
+
+ nr = cmd->cdw10_bits.dsm.nr + 1;
+ if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
+ SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
+ response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ if (unmap_ctx == NULL) {
+ unmap_ctx = calloc(1, sizeof(*unmap_ctx));
+ if (!unmap_ctx) {
+ response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ unmap_ctx->req = req;
+ unmap_ctx->desc = desc;
+ unmap_ctx->ch = ch;
+ unmap_ctx->bdev = bdev;
+
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_SUCCESS;
+ } else {
+ unmap_ctx->count--; /* dequeued */
+ }
+
+ dsm_range = (struct spdk_nvme_dsm_range *)req->data;
+ for (i = unmap_ctx->range_index; i < nr; i++) {
+ lba = dsm_range[i].starting_lba;
+ lba_count = dsm_range[i].length;
+
+ unmap_ctx->count++;
+
+ rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
+ nvmf_bdev_ctrlr_unmap_cpl, unmap_ctx);
+ if (rc) {
+ if (rc == -ENOMEM) {
+ nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_unmap_resubmit, unmap_ctx);
+ /* Unmap was not yet submitted to bdev */
+ /* unmap_ctx->count will be decremented when the request is dequeued */
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+ response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ unmap_ctx->count--;
+ /* We can't return here - we may have to wait for any other
+ * unmaps already sent to complete */
+ break;
+ }
+ unmap_ctx->range_index++;
+ }
+
+ if (unmap_ctx->count == 0) {
+ free(unmap_ctx);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+int
+nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
+{
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+
+ if (cmd->cdw11_bits.dsm.ad) {
+ return nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, NULL);
+ }
+
+ response->status.sct = SPDK_NVME_SCT_GENERIC;
+ response->status.sc = SPDK_NVME_SC_SUCCESS;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+int
+nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
+{
+ int rc;
+
+ rc = spdk_bdev_nvme_io_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
+ nvmf_bdev_ctrlr_complete_cmd, req);
+ if (spdk_unlikely(rc)) {
+ if (rc == -ENOMEM) {
+ nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+ req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+int
+spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
+ spdk_nvmf_nvme_passthru_cmd_cb cb_fn)
+{
+ int rc;
+
+ req->cmd_cb_fn = cb_fn;
+
+ rc = spdk_bdev_nvme_admin_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
+ nvmf_bdev_ctrlr_complete_admin_cmd, req);
+ if (spdk_unlikely(rc)) {
+ if (rc == -ENOMEM) {
+ nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ }
+ req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+static void
+nvmf_bdev_ctrlr_complete_abort_cmd(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct spdk_nvmf_request *req = cb_arg;
+
+ if (success) {
+ req->rsp->nvme_cpl.cdw0 &= ~1U;
+ }
+
+ spdk_nvmf_request_complete(req);
+ spdk_bdev_free_io(bdev_io);
+}
+
+int
+spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
+ struct spdk_nvmf_request *req_to_abort)
+{
+ int rc;
+
+ assert((req->rsp->nvme_cpl.cdw0 & 1U) != 0);
+
+ rc = spdk_bdev_abort(desc, ch, req_to_abort, nvmf_bdev_ctrlr_complete_abort_cmd, req);
+ if (spdk_likely(rc == 0)) {
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ } else if (rc == -ENOMEM) {
+ nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+ } else {
+ return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+ }
+}
+
+bool
+nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd,
+ struct spdk_dif_ctx *dif_ctx)
+{
+ uint32_t init_ref_tag, dif_check_flags = 0;
+ int rc;
+
+ if (spdk_bdev_get_md_size(bdev) == 0) {
+ return false;
+ }
+
+ /* Initial Reference Tag is the lower 32 bits of the start LBA. */
+ init_ref_tag = (uint32_t)from_le64(&cmd->cdw10);
+
+ if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) {
+ dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK;
+ }
+
+ if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) {
+ dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK;
+ }
+
+ rc = spdk_dif_ctx_init(dif_ctx,
+ spdk_bdev_get_block_size(bdev),
+ spdk_bdev_get_md_size(bdev),
+ spdk_bdev_is_md_interleaved(bdev),
+ spdk_bdev_is_dif_head_of_md(bdev),
+ spdk_bdev_get_dif_type(bdev),
+ dif_check_flags,
+ init_ref_tag, 0, 0, 0, 0);
+
+ return (rc == 0) ? true : false;
+}
diff --git a/src/spdk/lib/nvmf/ctrlr_discovery.c b/src/spdk/lib/nvmf/ctrlr_discovery.c
new file mode 100644
index 000000000..ab1c46ba1
--- /dev/null
+++ b/src/spdk/lib/nvmf/ctrlr_discovery.c
@@ -0,0 +1,159 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NVMe over Fabrics discovery service
+ */
+
+#include "spdk/stdinc.h"
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+#include "spdk/string.h"
+#include "spdk/trace.h"
+#include "spdk/nvmf_spec.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+static struct spdk_nvmf_discovery_log_page *
+nvmf_generate_discovery_log(struct spdk_nvmf_tgt *tgt, const char *hostnqn, size_t *log_page_size)
+{
+ uint64_t numrec = 0;
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_subsystem_listener *listener;
+ struct spdk_nvmf_discovery_log_page_entry *entry;
+ struct spdk_nvmf_discovery_log_page *disc_log;
+ size_t cur_size;
+ uint32_t sid;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Generating log page for genctr %" PRIu64 "\n",
+ tgt->discovery_genctr);
+
+ cur_size = sizeof(struct spdk_nvmf_discovery_log_page);
+ disc_log = calloc(1, cur_size);
+ if (disc_log == NULL) {
+ SPDK_ERRLOG("Discovery log page memory allocation error\n");
+ return NULL;
+ }
+
+ for (sid = 0; sid < tgt->max_subsystems; sid++) {
+ subsystem = tgt->subsystems[sid];
+ if ((subsystem == NULL) ||
+ (subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE) ||
+ (subsystem->state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING)) {
+ continue;
+ }
+
+ if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
+ continue;
+ }
+
+ if (!spdk_nvmf_subsystem_host_allowed(subsystem, hostnqn)) {
+ continue;
+ }
+
+ for (listener = spdk_nvmf_subsystem_get_first_listener(subsystem); listener != NULL;
+ listener = spdk_nvmf_subsystem_get_next_listener(subsystem, listener)) {
+ size_t new_size = cur_size + sizeof(*entry);
+ void *new_log_page = realloc(disc_log, new_size);
+
+ if (new_log_page == NULL) {
+ SPDK_ERRLOG("Discovery log page memory allocation error\n");
+ break;
+ }
+
+ disc_log = new_log_page;
+ cur_size = new_size;
+
+ entry = &disc_log->entries[numrec];
+ memset(entry, 0, sizeof(*entry));
+ entry->portid = numrec;
+ entry->cntlid = 0xffff;
+ entry->asqsz = listener->transport->opts.max_aq_depth;
+ entry->subtype = subsystem->subtype;
+ snprintf(entry->subnqn, sizeof(entry->subnqn), "%s", subsystem->subnqn);
+
+ nvmf_transport_listener_discover(listener->transport, listener->trid, entry);
+
+ numrec++;
+ }
+ }
+
+ disc_log->numrec = numrec;
+ disc_log->genctr = tgt->discovery_genctr;
+ *log_page_size = cur_size;
+
+ return disc_log;
+}
+
+void
+nvmf_get_discovery_log_page(struct spdk_nvmf_tgt *tgt, const char *hostnqn, struct iovec *iov,
+ uint32_t iovcnt, uint64_t offset, uint32_t length)
+{
+ size_t copy_len = 0;
+ size_t zero_len = 0;
+ struct iovec *tmp;
+ size_t log_page_size = 0;
+ struct spdk_nvmf_discovery_log_page *discovery_log_page;
+
+ discovery_log_page = nvmf_generate_discovery_log(tgt, hostnqn, &log_page_size);
+
+ /* Copy the valid part of the discovery log page, if any */
+ if (discovery_log_page) {
+ for (tmp = iov; tmp < iov + iovcnt; tmp++) {
+ copy_len = spdk_min(tmp->iov_len, length);
+ copy_len = spdk_min(log_page_size - offset, copy_len);
+
+ memcpy(tmp->iov_base, (char *)discovery_log_page + offset, copy_len);
+
+ offset += copy_len;
+ length -= copy_len;
+ zero_len = tmp->iov_len - copy_len;
+ if (log_page_size <= offset || length == 0) {
+ break;
+ }
+ }
+ /* Zero out the rest of the payload */
+ if (zero_len) {
+ memset((char *)tmp->iov_base + copy_len, 0, zero_len);
+ }
+
+ for (++tmp; tmp < iov + iovcnt; tmp++) {
+ memset((char *)tmp->iov_base, 0, tmp->iov_len);
+ }
+
+ free(discovery_log_page);
+ }
+}
diff --git a/src/spdk/lib/nvmf/fc.c b/src/spdk/lib/nvmf/fc.c
new file mode 100644
index 000000000..678cfc681
--- /dev/null
+++ b/src/spdk/lib/nvmf/fc.c
@@ -0,0 +1,3957 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (c) 2018-2019 Broadcom. All Rights Reserved.
+ * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NVMe_FC transport functions.
+ */
+
+#include "spdk/env.h"
+#include "spdk/assert.h"
+#include "spdk/nvmf_transport.h"
+#include "spdk/string.h"
+#include "spdk/trace.h"
+#include "spdk/util.h"
+#include "spdk/likely.h"
+#include "spdk/endian.h"
+#include "spdk/log.h"
+#include "spdk/thread.h"
+
+#include "spdk_internal/log.h"
+
+#include "nvmf_fc.h"
+#include "fc_lld.h"
+
+#ifndef DEV_VERIFY
+#define DEV_VERIFY assert
+#endif
+
+#ifndef ASSERT_SPDK_FC_MASTER_THREAD
+#define ASSERT_SPDK_FC_MASTER_THREAD() \
+ DEV_VERIFY(spdk_get_thread() == nvmf_fc_get_master_thread());
+#endif
+
+/*
+ * PRLI service parameters
+ */
+enum spdk_nvmf_fc_service_parameters {
+ SPDK_NVMF_FC_FIRST_BURST_SUPPORTED = 0x0001,
+ SPDK_NVMF_FC_DISCOVERY_SERVICE = 0x0008,
+ SPDK_NVMF_FC_TARGET_FUNCTION = 0x0010,
+ SPDK_NVMF_FC_INITIATOR_FUNCTION = 0x0020,
+ SPDK_NVMF_FC_CONFIRMED_COMPLETION_SUPPORTED = 0x0080,
+};
+
+static char *fc_req_state_strs[] = {
+ "SPDK_NVMF_FC_REQ_INIT",
+ "SPDK_NVMF_FC_REQ_READ_BDEV",
+ "SPDK_NVMF_FC_REQ_READ_XFER",
+ "SPDK_NVMF_FC_REQ_READ_RSP",
+ "SPDK_NVMF_FC_REQ_WRITE_BUFFS",
+ "SPDK_NVMF_FC_REQ_WRITE_XFER",
+ "SPDK_NVMF_FC_REQ_WRITE_BDEV",
+ "SPDK_NVMF_FC_REQ_WRITE_RSP",
+ "SPDK_NVMF_FC_REQ_NONE_BDEV",
+ "SPDK_NVMF_FC_REQ_NONE_RSP",
+ "SPDK_NVMF_FC_REQ_SUCCESS",
+ "SPDK_NVMF_FC_REQ_FAILED",
+ "SPDK_NVMF_FC_REQ_ABORTED",
+ "SPDK_NVMF_FC_REQ_BDEV_ABORTED",
+ "SPDK_NVMF_FC_REQ_PENDING"
+};
+
+#define OBJECT_NVMF_FC_IO 0xA0
+
+#define TRACE_GROUP_NVMF_FC 0x8
+#define TRACE_FC_REQ_INIT SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x01)
+#define TRACE_FC_REQ_READ_BDEV SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x02)
+#define TRACE_FC_REQ_READ_XFER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x03)
+#define TRACE_FC_REQ_READ_RSP SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x04)
+#define TRACE_FC_REQ_WRITE_BUFFS SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x05)
+#define TRACE_FC_REQ_WRITE_XFER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x06)
+#define TRACE_FC_REQ_WRITE_BDEV SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x07)
+#define TRACE_FC_REQ_WRITE_RSP SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x08)
+#define TRACE_FC_REQ_NONE_BDEV SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x09)
+#define TRACE_FC_REQ_NONE_RSP SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x0A)
+#define TRACE_FC_REQ_SUCCESS SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x0B)
+#define TRACE_FC_REQ_FAILED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x0C)
+#define TRACE_FC_REQ_ABORTED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x0D)
+#define TRACE_FC_REQ_BDEV_ABORTED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x0E)
+#define TRACE_FC_REQ_PENDING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x0F)
+
+SPDK_TRACE_REGISTER_FN(nvmf_fc_trace, "nvmf_fc", TRACE_GROUP_NVMF_FC)
+{
+ spdk_trace_register_object(OBJECT_NVMF_FC_IO, 'r');
+ spdk_trace_register_description("FC_REQ_NEW",
+ TRACE_FC_REQ_INIT,
+ OWNER_NONE, OBJECT_NVMF_FC_IO, 1, 1, "");
+ spdk_trace_register_description("FC_REQ_READ_SUBMIT_TO_BDEV",
+ TRACE_FC_REQ_READ_BDEV,
+ OWNER_NONE, OBJECT_NVMF_FC_IO, 0, 1, "");
+ spdk_trace_register_description("FC_REQ_READ_XFER_DATA",
+ TRACE_FC_REQ_READ_XFER,
+ OWNER_NONE, OBJECT_NVMF_FC_IO, 0, 1, "");
+ spdk_trace_register_description("FC_REQ_READ_RSP",
+ TRACE_FC_REQ_READ_RSP,
+ OWNER_NONE, OBJECT_NVMF_FC_IO, 0, 1, "");
+ spdk_trace_register_description("FC_REQ_WRITE_NEED_BUFFER",
+ TRACE_FC_REQ_WRITE_BUFFS,
+ OWNER_NONE, OBJECT_NVMF_FC_IO, 0, 1, "");
+ spdk_trace_register_description("FC_REQ_WRITE_XFER_DATA",
+ TRACE_FC_REQ_WRITE_XFER,
+ OWNER_NONE, OBJECT_NVMF_FC_IO, 0, 1, "");
+ spdk_trace_register_description("FC_REQ_WRITE_SUBMIT_TO_BDEV",
+ TRACE_FC_REQ_WRITE_BDEV,
+ OWNER_NONE, OBJECT_NVMF_FC_IO, 0, 1, "");
+ spdk_trace_register_description("FC_REQ_WRITE_RSP",
+ TRACE_FC_REQ_WRITE_RSP,
+ OWNER_NONE, OBJECT_NVMF_FC_IO, 0, 1, "");
+ spdk_trace_register_description("FC_REQ_NONE_SUBMIT_TO_BDEV",
+ TRACE_FC_REQ_NONE_BDEV,
+ OWNER_NONE, OBJECT_NVMF_FC_IO, 0, 1, "");
+ spdk_trace_register_description("FC_REQ_NONE_RSP",
+ TRACE_FC_REQ_NONE_RSP,
+ OWNER_NONE, OBJECT_NVMF_FC_IO, 0, 1, "");
+ spdk_trace_register_description("FC_REQ_SUCCESS",
+ TRACE_FC_REQ_SUCCESS,
+ OWNER_NONE, OBJECT_NONE, 0, 0, "");
+ spdk_trace_register_description("FC_REQ_FAILED",
+ TRACE_FC_REQ_FAILED,
+ OWNER_NONE, OBJECT_NONE, 0, 0, "");
+ spdk_trace_register_description("FC_REQ_ABORTED",
+ TRACE_FC_REQ_ABORTED,
+ OWNER_NONE, OBJECT_NONE, 0, 1, "");
+ spdk_trace_register_description("FC_REQ_ABORTED_SUBMIT_TO_BDEV",
+ TRACE_FC_REQ_BDEV_ABORTED,
+ OWNER_NONE, OBJECT_NONE, 0, 1, "");
+ spdk_trace_register_description("FC_REQ_PENDING",
+ TRACE_FC_REQ_PENDING,
+ OWNER_NONE, OBJECT_NONE, 0, 1, "");
+}
+
+/**
+ * The structure used by all fc adm functions
+ */
+struct spdk_nvmf_fc_adm_api_data {
+ void *api_args;
+ spdk_nvmf_fc_callback cb_func;
+};
+
+/**
+ * The callback structure for nport-delete
+ */
+struct spdk_nvmf_fc_adm_nport_del_cb_data {
+ struct spdk_nvmf_fc_nport *nport;
+ uint8_t port_handle;
+ spdk_nvmf_fc_callback fc_cb_func;
+ void *fc_cb_ctx;
+};
+
+/**
+ * The callback structure for it-delete
+ */
+struct spdk_nvmf_fc_adm_i_t_del_cb_data {
+ struct spdk_nvmf_fc_nport *nport;
+ struct spdk_nvmf_fc_remote_port_info *rport;
+ uint8_t port_handle;
+ spdk_nvmf_fc_callback fc_cb_func;
+ void *fc_cb_ctx;
+};
+
+
+typedef void (*spdk_nvmf_fc_adm_i_t_delete_assoc_cb_fn)(void *arg, uint32_t err);
+
+/**
+ * The callback structure for the it-delete-assoc callback
+ */
+struct spdk_nvmf_fc_adm_i_t_del_assoc_cb_data {
+ struct spdk_nvmf_fc_nport *nport;
+ struct spdk_nvmf_fc_remote_port_info *rport;
+ uint8_t port_handle;
+ spdk_nvmf_fc_adm_i_t_delete_assoc_cb_fn cb_func;
+ void *cb_ctx;
+};
+
+/*
+ * Call back function pointer for HW port quiesce.
+ */
+typedef void (*spdk_nvmf_fc_adm_hw_port_quiesce_cb_fn)(void *ctx, int err);
+
+/**
+ * Context structure for quiescing a hardware port
+ */
+struct spdk_nvmf_fc_adm_hw_port_quiesce_ctx {
+ int quiesce_count;
+ void *ctx;
+ spdk_nvmf_fc_adm_hw_port_quiesce_cb_fn cb_func;
+};
+
+/**
+ * Context structure used to reset a hardware port
+ */
+struct spdk_nvmf_fc_adm_hw_port_reset_ctx {
+ void *reset_args;
+ spdk_nvmf_fc_callback reset_cb_func;
+};
+
+/**
+ * The callback structure for HW port link break event
+ */
+struct spdk_nvmf_fc_adm_port_link_break_cb_data {
+ struct spdk_nvmf_hw_port_link_break_args *args;
+ struct spdk_nvmf_fc_nport_delete_args nport_del_args;
+ spdk_nvmf_fc_callback cb_func;
+};
+
+struct spdk_nvmf_fc_transport {
+ struct spdk_nvmf_transport transport;
+ pthread_mutex_t lock;
+};
+
+static struct spdk_nvmf_fc_transport *g_nvmf_ftransport;
+
+static TAILQ_HEAD(, spdk_nvmf_fc_port) g_spdk_nvmf_fc_port_list =
+ TAILQ_HEAD_INITIALIZER(g_spdk_nvmf_fc_port_list);
+
+static struct spdk_thread *g_nvmf_fc_master_thread = NULL;
+
+static uint32_t g_nvmf_fgroup_count = 0;
+static TAILQ_HEAD(, spdk_nvmf_fc_poll_group) g_nvmf_fgroups =
+ TAILQ_HEAD_INITIALIZER(g_nvmf_fgroups);
+
+struct spdk_thread *
+nvmf_fc_get_master_thread(void)
+{
+ return g_nvmf_fc_master_thread;
+}
+
+static inline void
+nvmf_fc_record_req_trace_point(struct spdk_nvmf_fc_request *fc_req,
+ enum spdk_nvmf_fc_request_state state)
+{
+ uint16_t tpoint_id = SPDK_TRACE_MAX_TPOINT_ID;
+
+ switch (state) {
+ case SPDK_NVMF_FC_REQ_INIT:
+ /* Start IO tracing */
+ tpoint_id = TRACE_FC_REQ_INIT;
+ break;
+ case SPDK_NVMF_FC_REQ_READ_BDEV:
+ tpoint_id = TRACE_FC_REQ_READ_BDEV;
+ break;
+ case SPDK_NVMF_FC_REQ_READ_XFER:
+ tpoint_id = TRACE_FC_REQ_READ_XFER;
+ break;
+ case SPDK_NVMF_FC_REQ_READ_RSP:
+ tpoint_id = TRACE_FC_REQ_READ_RSP;
+ break;
+ case SPDK_NVMF_FC_REQ_WRITE_BUFFS:
+ tpoint_id = TRACE_FC_REQ_WRITE_BUFFS;
+ break;
+ case SPDK_NVMF_FC_REQ_WRITE_XFER:
+ tpoint_id = TRACE_FC_REQ_WRITE_XFER;
+ break;
+ case SPDK_NVMF_FC_REQ_WRITE_BDEV:
+ tpoint_id = TRACE_FC_REQ_WRITE_BDEV;
+ break;
+ case SPDK_NVMF_FC_REQ_WRITE_RSP:
+ tpoint_id = TRACE_FC_REQ_WRITE_RSP;
+ break;
+ case SPDK_NVMF_FC_REQ_NONE_BDEV:
+ tpoint_id = TRACE_FC_REQ_NONE_BDEV;
+ break;
+ case SPDK_NVMF_FC_REQ_NONE_RSP:
+ tpoint_id = TRACE_FC_REQ_NONE_RSP;
+ break;
+ case SPDK_NVMF_FC_REQ_SUCCESS:
+ tpoint_id = TRACE_FC_REQ_SUCCESS;
+ break;
+ case SPDK_NVMF_FC_REQ_FAILED:
+ tpoint_id = TRACE_FC_REQ_FAILED;
+ break;
+ case SPDK_NVMF_FC_REQ_ABORTED:
+ tpoint_id = TRACE_FC_REQ_ABORTED;
+ break;
+ case SPDK_NVMF_FC_REQ_BDEV_ABORTED:
+ tpoint_id = TRACE_FC_REQ_ABORTED;
+ break;
+ case SPDK_NVMF_FC_REQ_PENDING:
+ tpoint_id = TRACE_FC_REQ_PENDING;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ if (tpoint_id != SPDK_TRACE_MAX_TPOINT_ID) {
+ spdk_trace_record(tpoint_id, fc_req->poller_lcore, 0,
+ (uint64_t)(&fc_req->req), 0);
+ }
+}
+
+static void
+nvmf_fc_handle_connection_failure(void *arg)
+{
+ struct spdk_nvmf_fc_conn *fc_conn = arg;
+ struct spdk_nvmf_fc_ls_add_conn_api_data *api_data = NULL;
+
+ if (!fc_conn->create_opd) {
+ return;
+ }
+ api_data = &fc_conn->create_opd->u.add_conn;
+
+ nvmf_fc_ls_add_conn_failure(api_data->assoc, api_data->ls_rqst,
+ api_data->args.fc_conn, api_data->aq_conn);
+}
+
+static void
+nvmf_fc_handle_assoc_deletion(void *arg)
+{
+ struct spdk_nvmf_fc_conn *fc_conn = arg;
+
+ nvmf_fc_delete_association(fc_conn->fc_assoc->tgtport,
+ fc_conn->fc_assoc->assoc_id, false, true, NULL, NULL);
+}
+
+static int
+nvmf_fc_create_req_mempool(struct spdk_nvmf_fc_hwqp *hwqp)
+{
+ uint32_t i;
+ struct spdk_nvmf_fc_request *fc_req;
+
+ TAILQ_INIT(&hwqp->free_reqs);
+ TAILQ_INIT(&hwqp->in_use_reqs);
+
+ hwqp->fc_reqs_buf = calloc(hwqp->rq_size, sizeof(struct spdk_nvmf_fc_request));
+ if (hwqp->fc_reqs_buf == NULL) {
+ SPDK_ERRLOG("create fc request pool failed\n");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < hwqp->rq_size; i++) {
+ fc_req = hwqp->fc_reqs_buf + i;
+
+ nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_INIT);
+ TAILQ_INSERT_TAIL(&hwqp->free_reqs, fc_req, link);
+ }
+
+ return 0;
+}
+
+static inline struct spdk_nvmf_fc_request *
+nvmf_fc_hwqp_alloc_fc_request(struct spdk_nvmf_fc_hwqp *hwqp)
+{
+ struct spdk_nvmf_fc_request *fc_req;
+
+ if (TAILQ_EMPTY(&hwqp->free_reqs)) {
+ SPDK_ERRLOG("Alloc request buffer failed\n");
+ return NULL;
+ }
+
+ fc_req = TAILQ_FIRST(&hwqp->free_reqs);
+ TAILQ_REMOVE(&hwqp->free_reqs, fc_req, link);
+
+ memset(fc_req, 0, sizeof(struct spdk_nvmf_fc_request));
+ TAILQ_INSERT_TAIL(&hwqp->in_use_reqs, fc_req, link);
+ TAILQ_INIT(&fc_req->abort_cbs);
+ return fc_req;
+}
+
+static inline void
+nvmf_fc_hwqp_free_fc_request(struct spdk_nvmf_fc_hwqp *hwqp, struct spdk_nvmf_fc_request *fc_req)
+{
+ if (fc_req->state != SPDK_NVMF_FC_REQ_SUCCESS) {
+ /* Log an error for debug purpose. */
+ nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_FAILED);
+ }
+
+ /* set the magic to mark req as no longer valid. */
+ fc_req->magic = 0xDEADBEEF;
+
+ TAILQ_REMOVE(&hwqp->in_use_reqs, fc_req, link);
+ TAILQ_INSERT_HEAD(&hwqp->free_reqs, fc_req, link);
+}
+
+static inline bool
+nvmf_fc_req_in_get_buff(struct spdk_nvmf_fc_request *fc_req)
+{
+ switch (fc_req->state) {
+ case SPDK_NVMF_FC_REQ_WRITE_BUFFS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void
+nvmf_fc_init_poller_queues(struct spdk_nvmf_fc_hwqp *hwqp)
+{
+ nvmf_fc_init_rqpair_buffers(hwqp);
+}
+
+struct spdk_nvmf_fc_conn *
+nvmf_fc_hwqp_find_fc_conn(struct spdk_nvmf_fc_hwqp *hwqp, uint64_t conn_id)
+{
+ struct spdk_nvmf_fc_conn *fc_conn;
+
+ TAILQ_FOREACH(fc_conn, &hwqp->connection_list, link) {
+ if (fc_conn->conn_id == conn_id) {
+ return fc_conn;
+ }
+ }
+
+ return NULL;
+}
+
+void
+nvmf_fc_hwqp_reinit_poller_queues(struct spdk_nvmf_fc_hwqp *hwqp, void *queues_curr)
+{
+ struct spdk_nvmf_fc_abts_ctx *ctx;
+ struct spdk_nvmf_fc_poller_api_queue_sync_args *args = NULL, *tmp = NULL;
+
+ /* Clean up any pending sync callbacks */
+ TAILQ_FOREACH_SAFE(args, &hwqp->sync_cbs, link, tmp) {
+ TAILQ_REMOVE(&hwqp->sync_cbs, args, link);
+ ctx = args->cb_info.cb_data;
+ if (ctx) {
+ if (++ctx->hwqps_responded == ctx->num_hwqps) {
+ free(ctx->sync_poller_args);
+ free(ctx->abts_poller_args);
+ free(ctx);
+ }
+ }
+ }
+
+ nvmf_fc_reinit_q(hwqp->queues, queues_curr);
+}
+
+void
+nvmf_fc_init_hwqp(struct spdk_nvmf_fc_port *fc_port, struct spdk_nvmf_fc_hwqp *hwqp)
+{
+ hwqp->fc_port = fc_port;
+
+ /* clear counters */
+ memset(&hwqp->counters, 0, sizeof(struct spdk_nvmf_fc_errors));
+
+ nvmf_fc_init_poller_queues(hwqp);
+ if (&fc_port->ls_queue != hwqp) {
+ nvmf_fc_create_req_mempool(hwqp);
+ }
+
+ nvmf_fc_init_q(hwqp);
+ TAILQ_INIT(&hwqp->connection_list);
+ TAILQ_INIT(&hwqp->sync_cbs);
+ TAILQ_INIT(&hwqp->ls_pending_queue);
+}
+
+static struct spdk_nvmf_fc_poll_group *
+nvmf_fc_get_idlest_poll_group(void)
+{
+ uint32_t max_count = UINT32_MAX;
+ struct spdk_nvmf_fc_poll_group *fgroup;
+ struct spdk_nvmf_fc_poll_group *ret_fgroup = NULL;
+
+ /* find poll group with least number of hwqp's assigned to it */
+ TAILQ_FOREACH(fgroup, &g_nvmf_fgroups, link) {
+ if (fgroup->hwqp_count < max_count) {
+ ret_fgroup = fgroup;
+ max_count = fgroup->hwqp_count;
+ }
+ }
+
+ return ret_fgroup;
+}
+
+void
+nvmf_fc_poll_group_add_hwqp(struct spdk_nvmf_fc_hwqp *hwqp)
+{
+ struct spdk_nvmf_fc_poll_group *fgroup = NULL;
+
+ assert(hwqp);
+ if (hwqp == NULL) {
+ SPDK_ERRLOG("Error: hwqp is NULL\n");
+ return;
+ }
+
+ assert(g_nvmf_fgroup_count);
+
+ fgroup = nvmf_fc_get_idlest_poll_group();
+ if (!fgroup) {
+ SPDK_ERRLOG("Could not assign poll group for hwqp (%d)\n", hwqp->hwqp_id);
+ return;
+ }
+
+ hwqp->thread = fgroup->group.group->thread;
+ hwqp->fgroup = fgroup;
+ fgroup->hwqp_count++;
+ nvmf_fc_poller_api_func(hwqp, SPDK_NVMF_FC_POLLER_API_ADD_HWQP, NULL);
+}
+
+void
+nvmf_fc_poll_group_remove_hwqp(struct spdk_nvmf_fc_hwqp *hwqp)
+{
+ assert(hwqp);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC,
+ "Remove hwqp from poller: for port: %d, hwqp: %d\n",
+ hwqp->fc_port->port_hdl, hwqp->hwqp_id);
+
+ if (!hwqp->fgroup) {
+ SPDK_ERRLOG("HWQP (%d) not assigned to poll group\n", hwqp->hwqp_id);
+ } else {
+ hwqp->fgroup->hwqp_count--;
+ nvmf_fc_poller_api_func(hwqp, SPDK_NVMF_FC_POLLER_API_REMOVE_HWQP, NULL);
+ }
+}
+
+/*
+ * Note: This needs to be used only on master poller.
+ */
+static uint64_t
+nvmf_fc_get_abts_unique_id(void)
+{
+ static uint32_t u_id = 0;
+
+ return (uint64_t)(++u_id);
+}
+
+static void
+nvmf_fc_queue_synced_cb(void *cb_data, enum spdk_nvmf_fc_poller_api_ret ret)
+{
+ struct spdk_nvmf_fc_abts_ctx *ctx = cb_data;
+ struct spdk_nvmf_fc_poller_api_abts_recvd_args *args, *poller_arg;
+
+ ctx->hwqps_responded++;
+
+ if (ctx->hwqps_responded < ctx->num_hwqps) {
+ /* Wait for all pollers to complete. */
+ return;
+ }
+
+ /* Free the queue sync poller args. */
+ free(ctx->sync_poller_args);
+
+ /* Mark as queue synced */
+ ctx->queue_synced = true;
+
+ /* Reset the ctx values */
+ ctx->hwqps_responded = 0;
+ ctx->handled = false;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC,
+ "QueueSync(0x%lx) completed for nport: %d, rpi: 0x%x, oxid: 0x%x, rxid: 0x%x\n",
+ ctx->u_id, ctx->nport->nport_hdl, ctx->rpi, ctx->oxid, ctx->rxid);
+
+ /* Resend ABTS to pollers */
+ args = ctx->abts_poller_args;
+ for (int i = 0; i < ctx->num_hwqps; i++) {
+ poller_arg = args + i;
+ nvmf_fc_poller_api_func(poller_arg->hwqp,
+ SPDK_NVMF_FC_POLLER_API_ABTS_RECEIVED,
+ poller_arg);
+ }
+}
+
+static int
+nvmf_fc_handle_abts_notfound(struct spdk_nvmf_fc_abts_ctx *ctx)
+{
+ struct spdk_nvmf_fc_poller_api_queue_sync_args *args, *poller_arg;
+ struct spdk_nvmf_fc_poller_api_abts_recvd_args *abts_args, *abts_poller_arg;
+
+ /* check if FC driver supports queue sync */
+ if (!nvmf_fc_q_sync_available()) {
+ return -EPERM;
+ }
+
+ assert(ctx);
+ if (!ctx) {
+ SPDK_ERRLOG("NULL ctx pointer");
+ return -EINVAL;
+ }
+
+ /* Reset the ctx values */
+ ctx->hwqps_responded = 0;
+
+ args = calloc(ctx->num_hwqps,
+ sizeof(struct spdk_nvmf_fc_poller_api_queue_sync_args));
+ if (!args) {
+ SPDK_ERRLOG("QueueSync(0x%lx) failed for nport: %d, rpi: 0x%x, oxid: 0x%x, rxid: 0x%x\n",
+ ctx->u_id, ctx->nport->nport_hdl, ctx->rpi, ctx->oxid, ctx->rxid);
+ return -ENOMEM;
+ }
+ ctx->sync_poller_args = args;
+
+ abts_args = ctx->abts_poller_args;
+ for (int i = 0; i < ctx->num_hwqps; i++) {
+ abts_poller_arg = abts_args + i;
+ poller_arg = args + i;
+ poller_arg->u_id = ctx->u_id;
+ poller_arg->hwqp = abts_poller_arg->hwqp;
+ poller_arg->cb_info.cb_func = nvmf_fc_queue_synced_cb;
+ poller_arg->cb_info.cb_data = ctx;
+ poller_arg->cb_info.cb_thread = spdk_get_thread();
+
+ /* Send a Queue sync message to interested pollers */
+ nvmf_fc_poller_api_func(poller_arg->hwqp,
+ SPDK_NVMF_FC_POLLER_API_QUEUE_SYNC,
+ poller_arg);
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC,
+ "QueueSync(0x%lx) Sent for nport: %d, rpi: 0x%x, oxid: 0x%x, rxid: 0x%x\n",
+ ctx->u_id, ctx->nport->nport_hdl, ctx->rpi, ctx->oxid, ctx->rxid);
+
+ /* Post Marker to queue to track aborted request */
+ nvmf_fc_issue_q_sync(ctx->ls_hwqp, ctx->u_id, ctx->fcp_rq_id);
+
+ return 0;
+}
+
+static void
+nvmf_fc_abts_handled_cb(void *cb_data, enum spdk_nvmf_fc_poller_api_ret ret)
+{
+ struct spdk_nvmf_fc_abts_ctx *ctx = cb_data;
+ struct spdk_nvmf_fc_nport *nport = NULL;
+
+ if (ret != SPDK_NVMF_FC_POLLER_API_OXID_NOT_FOUND) {
+ ctx->handled = true;
+ }
+
+ ctx->hwqps_responded++;
+
+ if (ctx->hwqps_responded < ctx->num_hwqps) {
+ /* Wait for all pollers to complete. */
+ return;
+ }
+
+ nport = nvmf_fc_nport_find(ctx->port_hdl, ctx->nport_hdl);
+
+ if (ctx->nport != nport) {
+ /* Nport can be deleted while this abort is being
+ * processed by the pollers.
+ */
+ SPDK_NOTICELOG("nport_%d deleted while processing ABTS frame, rpi: 0x%x, oxid: 0x%x, rxid: 0x%x\n",
+ ctx->nport_hdl, ctx->rpi, ctx->oxid, ctx->rxid);
+ } else {
+ if (!ctx->handled) {
+ /* Try syncing the queues and try one more time */
+ if (!ctx->queue_synced && (nvmf_fc_handle_abts_notfound(ctx) == 0)) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC,
+ "QueueSync(0x%lx) for nport: %d, rpi: 0x%x, oxid: 0x%x, rxid: 0x%x\n",
+ ctx->u_id, ctx->nport->nport_hdl, ctx->rpi, ctx->oxid, ctx->rxid);
+ return;
+ } else {
+ /* Send Reject */
+ nvmf_fc_xmt_bls_rsp(&ctx->nport->fc_port->ls_queue,
+ ctx->oxid, ctx->rxid, ctx->rpi, true,
+ FCNVME_BLS_REJECT_EXP_INVALID_OXID, NULL, NULL);
+ }
+ } else {
+ /* Send Accept */
+ nvmf_fc_xmt_bls_rsp(&ctx->nport->fc_port->ls_queue,
+ ctx->oxid, ctx->rxid, ctx->rpi, false,
+ 0, NULL, NULL);
+ }
+ }
+ SPDK_NOTICELOG("BLS_%s sent for ABTS frame nport: %d, rpi: 0x%x, oxid: 0x%x, rxid: 0x%x\n",
+ (ctx->handled) ? "ACC" : "REJ", ctx->nport->nport_hdl, ctx->rpi, ctx->oxid, ctx->rxid);
+
+ free(ctx->abts_poller_args);
+ free(ctx);
+}
+
+void
+nvmf_fc_handle_abts_frame(struct spdk_nvmf_fc_nport *nport, uint16_t rpi,
+ uint16_t oxid, uint16_t rxid)
+{
+ struct spdk_nvmf_fc_abts_ctx *ctx = NULL;
+ struct spdk_nvmf_fc_poller_api_abts_recvd_args *args = NULL, *poller_arg;
+ struct spdk_nvmf_fc_association *assoc = NULL;
+ struct spdk_nvmf_fc_conn *conn = NULL;
+ uint32_t hwqp_cnt = 0;
+ bool skip_hwqp_cnt;
+ struct spdk_nvmf_fc_hwqp **hwqps = NULL;
+ uint32_t i;
+
+ SPDK_NOTICELOG("Handle ABTS frame for nport: %d, rpi: 0x%x, oxid: 0x%x, rxid: 0x%x\n",
+ nport->nport_hdl, rpi, oxid, rxid);
+
+ /* Allocate memory to track hwqp's with at least 1 active connection. */
+ hwqps = calloc(nport->fc_port->num_io_queues, sizeof(struct spdk_nvmf_fc_hwqp *));
+ if (hwqps == NULL) {
+ SPDK_ERRLOG("Unable to allocate temp. hwqp array for abts processing!\n");
+ goto bls_rej;
+ }
+
+ TAILQ_FOREACH(assoc, &nport->fc_associations, link) {
+ TAILQ_FOREACH(conn, &assoc->fc_conns, assoc_link) {
+ if (conn->rpi != rpi) {
+ continue;
+ }
+
+ skip_hwqp_cnt = false;
+ for (i = 0; i < hwqp_cnt; i++) {
+ if (hwqps[i] == conn->hwqp) {
+ /* Skip. This is already present */
+ skip_hwqp_cnt = true;
+ break;
+ }
+ }
+ if (!skip_hwqp_cnt) {
+ assert(hwqp_cnt < nport->fc_port->num_io_queues);
+ hwqps[hwqp_cnt] = conn->hwqp;
+ hwqp_cnt++;
+ }
+ }
+ }
+
+ if (!hwqp_cnt) {
+ goto bls_rej;
+ }
+
+ args = calloc(hwqp_cnt,
+ sizeof(struct spdk_nvmf_fc_poller_api_abts_recvd_args));
+ if (!args) {
+ goto bls_rej;
+ }
+
+ ctx = calloc(1, sizeof(struct spdk_nvmf_fc_abts_ctx));
+ if (!ctx) {
+ goto bls_rej;
+ }
+ ctx->rpi = rpi;
+ ctx->oxid = oxid;
+ ctx->rxid = rxid;
+ ctx->nport = nport;
+ ctx->nport_hdl = nport->nport_hdl;
+ ctx->port_hdl = nport->fc_port->port_hdl;
+ ctx->num_hwqps = hwqp_cnt;
+ ctx->ls_hwqp = &nport->fc_port->ls_queue;
+ ctx->fcp_rq_id = nport->fc_port->fcp_rq_id;
+ ctx->abts_poller_args = args;
+
+ /* Get a unique context for this ABTS */
+ ctx->u_id = nvmf_fc_get_abts_unique_id();
+
+ for (i = 0; i < hwqp_cnt; i++) {
+ poller_arg = args + i;
+ poller_arg->hwqp = hwqps[i];
+ poller_arg->cb_info.cb_func = nvmf_fc_abts_handled_cb;
+ poller_arg->cb_info.cb_data = ctx;
+ poller_arg->cb_info.cb_thread = spdk_get_thread();
+ poller_arg->ctx = ctx;
+
+ nvmf_fc_poller_api_func(poller_arg->hwqp,
+ SPDK_NVMF_FC_POLLER_API_ABTS_RECEIVED,
+ poller_arg);
+ }
+
+ free(hwqps);
+
+ return;
+bls_rej:
+ free(args);
+ free(hwqps);
+
+ /* Send Reject */
+ nvmf_fc_xmt_bls_rsp(&nport->fc_port->ls_queue, oxid, rxid, rpi,
+ true, FCNVME_BLS_REJECT_EXP_NOINFO, NULL, NULL);
+ SPDK_NOTICELOG("BLS_RJT for ABTS frame for nport: %d, rpi: 0x%x, oxid: 0x%x, rxid: 0x%x\n",
+ nport->nport_hdl, rpi, oxid, rxid);
+ return;
+}
+
+/*** Accessor functions for the FC structures - BEGIN */
+/*
+ * Returns true if the port is in offline state.
+ */
+bool
+nvmf_fc_port_is_offline(struct spdk_nvmf_fc_port *fc_port)
+{
+ if (fc_port && (fc_port->hw_port_status == SPDK_FC_PORT_OFFLINE)) {
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Returns true if the port is in online state.
+ */
+bool
+nvmf_fc_port_is_online(struct spdk_nvmf_fc_port *fc_port)
+{
+ if (fc_port && (fc_port->hw_port_status == SPDK_FC_PORT_ONLINE)) {
+ return true;
+ }
+
+ return false;
+}
+
+int
+nvmf_fc_port_set_online(struct spdk_nvmf_fc_port *fc_port)
+{
+ if (fc_port && (fc_port->hw_port_status != SPDK_FC_PORT_ONLINE)) {
+ fc_port->hw_port_status = SPDK_FC_PORT_ONLINE;
+ return 0;
+ }
+
+ return -EPERM;
+}
+
+int
+nvmf_fc_port_set_offline(struct spdk_nvmf_fc_port *fc_port)
+{
+ if (fc_port && (fc_port->hw_port_status != SPDK_FC_PORT_OFFLINE)) {
+ fc_port->hw_port_status = SPDK_FC_PORT_OFFLINE;
+ return 0;
+ }
+
+ return -EPERM;
+}
+
+int
+nvmf_fc_hwqp_set_online(struct spdk_nvmf_fc_hwqp *hwqp)
+{
+ if (hwqp && (hwqp->state != SPDK_FC_HWQP_ONLINE)) {
+ hwqp->state = SPDK_FC_HWQP_ONLINE;
+ /* reset some queue counters */
+ hwqp->num_conns = 0;
+ return nvmf_fc_set_q_online_state(hwqp, true);
+ }
+
+ return -EPERM;
+}
+
+int
+nvmf_fc_hwqp_set_offline(struct spdk_nvmf_fc_hwqp *hwqp)
+{
+ if (hwqp && (hwqp->state != SPDK_FC_HWQP_OFFLINE)) {
+ hwqp->state = SPDK_FC_HWQP_OFFLINE;
+ return nvmf_fc_set_q_online_state(hwqp, false);
+ }
+
+ return -EPERM;
+}
+
+void
+nvmf_fc_port_add(struct spdk_nvmf_fc_port *fc_port)
+{
+ TAILQ_INSERT_TAIL(&g_spdk_nvmf_fc_port_list, fc_port, link);
+}
+
+struct spdk_nvmf_fc_port *
+nvmf_fc_port_lookup(uint8_t port_hdl)
+{
+ struct spdk_nvmf_fc_port *fc_port = NULL;
+
+ TAILQ_FOREACH(fc_port, &g_spdk_nvmf_fc_port_list, link) {
+ if (fc_port->port_hdl == port_hdl) {
+ return fc_port;
+ }
+ }
+ return NULL;
+}
+
+static void
+nvmf_fc_port_cleanup(void)
+{
+ struct spdk_nvmf_fc_port *fc_port, *tmp;
+ struct spdk_nvmf_fc_hwqp *hwqp;
+ uint32_t i;
+
+ TAILQ_FOREACH_SAFE(fc_port, &g_spdk_nvmf_fc_port_list, link, tmp) {
+ TAILQ_REMOVE(&g_spdk_nvmf_fc_port_list, fc_port, link);
+ for (i = 0; i < fc_port->num_io_queues; i++) {
+ hwqp = &fc_port->io_queues[i];
+ if (hwqp->fc_reqs_buf) {
+ free(hwqp->fc_reqs_buf);
+ }
+ }
+ free(fc_port);
+ }
+}
+
+uint32_t
+nvmf_fc_get_prli_service_params(void)
+{
+ return (SPDK_NVMF_FC_DISCOVERY_SERVICE | SPDK_NVMF_FC_TARGET_FUNCTION);
+}
+
+int
+nvmf_fc_port_add_nport(struct spdk_nvmf_fc_port *fc_port,
+ struct spdk_nvmf_fc_nport *nport)
+{
+ if (fc_port) {
+ TAILQ_INSERT_TAIL(&fc_port->nport_list, nport, link);
+ fc_port->num_nports++;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+int
+nvmf_fc_port_remove_nport(struct spdk_nvmf_fc_port *fc_port,
+ struct spdk_nvmf_fc_nport *nport)
+{
+ if (fc_port && nport) {
+ TAILQ_REMOVE(&fc_port->nport_list, nport, link);
+ fc_port->num_nports--;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static struct spdk_nvmf_fc_nport *
+nvmf_fc_nport_hdl_lookup(struct spdk_nvmf_fc_port *fc_port, uint16_t nport_hdl)
+{
+ struct spdk_nvmf_fc_nport *fc_nport = NULL;
+
+ TAILQ_FOREACH(fc_nport, &fc_port->nport_list, link) {
+ if (fc_nport->nport_hdl == nport_hdl) {
+ return fc_nport;
+ }
+ }
+
+ return NULL;
+}
+
+struct spdk_nvmf_fc_nport *
+nvmf_fc_nport_find(uint8_t port_hdl, uint16_t nport_hdl)
+{
+ struct spdk_nvmf_fc_port *fc_port = NULL;
+
+ fc_port = nvmf_fc_port_lookup(port_hdl);
+ if (fc_port) {
+ return nvmf_fc_nport_hdl_lookup(fc_port, nport_hdl);
+ }
+
+ return NULL;
+}
+
+static inline int
+nvmf_fc_hwqp_find_nport_and_rport(struct spdk_nvmf_fc_hwqp *hwqp,
+ uint32_t d_id, struct spdk_nvmf_fc_nport **nport,
+ uint32_t s_id, struct spdk_nvmf_fc_remote_port_info **rport)
+{
+ struct spdk_nvmf_fc_nport *n_port;
+ struct spdk_nvmf_fc_remote_port_info *r_port;
+
+ assert(hwqp);
+ if (hwqp == NULL) {
+ SPDK_ERRLOG("Error: hwqp is NULL\n");
+ return -EINVAL;
+ }
+ assert(nport);
+ if (nport == NULL) {
+ SPDK_ERRLOG("Error: nport is NULL\n");
+ return -EINVAL;
+ }
+ assert(rport);
+ if (rport == NULL) {
+ SPDK_ERRLOG("Error: rport is NULL\n");
+ return -EINVAL;
+ }
+
+ TAILQ_FOREACH(n_port, &hwqp->fc_port->nport_list, link) {
+ if (n_port->d_id == d_id) {
+ TAILQ_FOREACH(r_port, &n_port->rem_port_list, link) {
+ if (r_port->s_id == s_id) {
+ *nport = n_port;
+ *rport = r_port;
+ return 0;
+ }
+ }
+ break;
+ }
+ }
+
+ return -ENOENT;
+}
+
+/* Returns true if the Nport is empty of all rem_ports */
+bool
+nvmf_fc_nport_has_no_rport(struct spdk_nvmf_fc_nport *nport)
+{
+ if (nport && TAILQ_EMPTY(&nport->rem_port_list)) {
+ assert(nport->rport_count == 0);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+int
+nvmf_fc_nport_set_state(struct spdk_nvmf_fc_nport *nport,
+ enum spdk_nvmf_fc_object_state state)
+{
+ if (nport) {
+ nport->nport_state = state;
+ return 0;
+ } else {
+ return -EINVAL;
+ }
+}
+
+bool
+nvmf_fc_nport_add_rem_port(struct spdk_nvmf_fc_nport *nport,
+ struct spdk_nvmf_fc_remote_port_info *rem_port)
+{
+ if (nport && rem_port) {
+ TAILQ_INSERT_TAIL(&nport->rem_port_list, rem_port, link);
+ nport->rport_count++;
+ return 0;
+ } else {
+ return -EINVAL;
+ }
+}
+
+bool
+nvmf_fc_nport_remove_rem_port(struct spdk_nvmf_fc_nport *nport,
+ struct spdk_nvmf_fc_remote_port_info *rem_port)
+{
+ if (nport && rem_port) {
+ TAILQ_REMOVE(&nport->rem_port_list, rem_port, link);
+ nport->rport_count--;
+ return 0;
+ } else {
+ return -EINVAL;
+ }
+}
+
+int
+nvmf_fc_rport_set_state(struct spdk_nvmf_fc_remote_port_info *rport,
+ enum spdk_nvmf_fc_object_state state)
+{
+ if (rport) {
+ rport->rport_state = state;
+ return 0;
+ } else {
+ return -EINVAL;
+ }
+}
+int
+nvmf_fc_assoc_set_state(struct spdk_nvmf_fc_association *assoc,
+ enum spdk_nvmf_fc_object_state state)
+{
+ if (assoc) {
+ assoc->assoc_state = state;
+ return 0;
+ } else {
+ return -EINVAL;
+ }
+}
+
+static struct spdk_nvmf_fc_association *
+nvmf_ctrlr_get_fc_assoc(struct spdk_nvmf_ctrlr *ctrlr)
+{
+ struct spdk_nvmf_qpair *qpair = ctrlr->admin_qpair;
+ struct spdk_nvmf_fc_conn *fc_conn;
+
+ if (!qpair) {
+ SPDK_ERRLOG("Controller %d has no associations\n", ctrlr->cntlid);
+ return NULL;
+ }
+
+ fc_conn = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_fc_conn, qpair);
+
+ return fc_conn->fc_assoc;
+}
+
+bool
+nvmf_ctrlr_is_on_nport(uint8_t port_hdl, uint16_t nport_hdl,
+ struct spdk_nvmf_ctrlr *ctrlr)
+{
+ struct spdk_nvmf_fc_nport *fc_nport = NULL;
+ struct spdk_nvmf_fc_association *assoc = NULL;
+
+ if (!ctrlr) {
+ return false;
+ }
+
+ fc_nport = nvmf_fc_nport_find(port_hdl, nport_hdl);
+ if (!fc_nport) {
+ return false;
+ }
+
+ assoc = nvmf_ctrlr_get_fc_assoc(ctrlr);
+ if (assoc && assoc->tgtport == fc_nport) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC,
+ "Controller: %d corresponding to association: %p(%lu:%d) is on port: %d nport: %d\n",
+ ctrlr->cntlid, assoc, assoc->assoc_id, assoc->assoc_state, port_hdl,
+ nport_hdl);
+ return true;
+ }
+ return false;
+}
+
+static inline bool
+nvmf_fc_req_in_bdev(struct spdk_nvmf_fc_request *fc_req)
+{
+ switch (fc_req->state) {
+ case SPDK_NVMF_FC_REQ_READ_BDEV:
+ case SPDK_NVMF_FC_REQ_WRITE_BDEV:
+ case SPDK_NVMF_FC_REQ_NONE_BDEV:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool
+nvmf_fc_req_in_pending(struct spdk_nvmf_fc_request *fc_req)
+{
+ struct spdk_nvmf_request *tmp = NULL;
+
+ STAILQ_FOREACH(tmp, &fc_req->hwqp->fgroup->group.pending_buf_queue, buf_link) {
+ if (tmp == &fc_req->req) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static void
+nvmf_fc_req_bdev_abort(void *arg1)
+{
+ struct spdk_nvmf_fc_request *fc_req = arg1;
+ struct spdk_nvmf_ctrlr *ctrlr = fc_req->req.qpair->ctrlr;
+ int i;
+
+ /* Initial release - we don't have to abort Admin Queue or
+ * Fabric commands. The AQ commands supported at this time are
+ * Get-Log-Page,
+ * Identify
+ * Set Features
+ * Get Features
+ * AER -> Special case and handled differently.
+ * Every one of the above Admin commands (except AER) run
+ * to completion and so an Abort of such commands doesn't
+ * make sense.
+ */
+ /* The Fabric commands supported are
+ * Property Set
+ * Property Get
+ * Connect -> Special case (async. handling). Not sure how to
+ * handle at this point. Let it run to completion.
+ */
+ for (i = 0; i < NVMF_MAX_ASYNC_EVENTS; i++) {
+ if (ctrlr->aer_req[i] == &fc_req->req) {
+ SPDK_NOTICELOG("Abort AER request\n");
+ nvmf_qpair_free_aer(fc_req->req.qpair);
+ }
+ }
+}
+
+void
+nvmf_fc_request_abort_complete(void *arg1)
+{
+ struct spdk_nvmf_fc_request *fc_req =
+ (struct spdk_nvmf_fc_request *)arg1;
+ struct spdk_nvmf_fc_caller_ctx *ctx = NULL, *tmp = NULL;
+
+ /* Request abort completed. Notify all the callbacks */
+ TAILQ_FOREACH_SAFE(ctx, &fc_req->abort_cbs, link, tmp) {
+ /* Notify */
+ ctx->cb(fc_req->hwqp, 0, ctx->cb_args);
+ /* Remove */
+ TAILQ_REMOVE(&fc_req->abort_cbs, ctx, link);
+ /* free */
+ free(ctx);
+ }
+
+ SPDK_NOTICELOG("FC Request(%p) in state :%s aborted\n", fc_req,
+ fc_req_state_strs[fc_req->state]);
+
+ _nvmf_fc_request_free(fc_req);
+}
+
+void
+nvmf_fc_request_abort(struct spdk_nvmf_fc_request *fc_req, bool send_abts,
+ spdk_nvmf_fc_caller_cb cb, void *cb_args)
+{
+ struct spdk_nvmf_fc_caller_ctx *ctx = NULL;
+ bool kill_req = false;
+
+ /* Add the cb to list */
+ if (cb) {
+ ctx = calloc(1, sizeof(struct spdk_nvmf_fc_caller_ctx));
+ if (!ctx) {
+ SPDK_ERRLOG("ctx alloc failed.\n");
+ return;
+ }
+ ctx->cb = cb;
+ ctx->cb_args = cb_args;
+
+ TAILQ_INSERT_TAIL(&fc_req->abort_cbs, ctx, link);
+ }
+
+ if (!fc_req->is_aborted) {
+ /* Increment aborted command counter */
+ fc_req->hwqp->counters.num_aborted++;
+ }
+
+ /* If port is dead, skip abort wqe */
+ kill_req = nvmf_fc_is_port_dead(fc_req->hwqp);
+ if (kill_req && nvmf_fc_req_in_xfer(fc_req)) {
+ fc_req->is_aborted = true;
+ goto complete;
+ }
+
+ /* Check if the request is already marked for deletion */
+ if (fc_req->is_aborted) {
+ return;
+ }
+
+ /* Mark request as aborted */
+ fc_req->is_aborted = true;
+
+ /* If xchg is allocated, then save if we need to send abts or not. */
+ if (fc_req->xchg) {
+ fc_req->xchg->send_abts = send_abts;
+ fc_req->xchg->aborted = true;
+ }
+
+ if (fc_req->state == SPDK_NVMF_FC_REQ_BDEV_ABORTED) {
+ /* Aborted by backend */
+ goto complete;
+ } else if (nvmf_fc_req_in_bdev(fc_req)) {
+ /* Notify bdev */
+ spdk_thread_send_msg(fc_req->hwqp->thread,
+ nvmf_fc_req_bdev_abort, (void *)fc_req);
+ } else if (nvmf_fc_req_in_xfer(fc_req)) {
+ /* Notify HBA to abort this exchange */
+ nvmf_fc_issue_abort(fc_req->hwqp, fc_req->xchg, NULL, NULL);
+ } else if (nvmf_fc_req_in_get_buff(fc_req)) {
+ /* Will be completed by request_complete callback. */
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, "Abort req when getting buffers.\n");
+ } else if (nvmf_fc_req_in_pending(fc_req)) {
+ /* Remove from pending */
+ STAILQ_REMOVE(&fc_req->hwqp->fgroup->group.pending_buf_queue, &fc_req->req,
+ spdk_nvmf_request, buf_link);
+ goto complete;
+ } else {
+ /* Should never happen */
+ SPDK_ERRLOG("Request in invalid state\n");
+ goto complete;
+ }
+
+ return;
+complete:
+ nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_ABORTED);
+ nvmf_fc_poller_api_func(fc_req->hwqp, SPDK_NVMF_FC_POLLER_API_REQ_ABORT_COMPLETE,
+ (void *)fc_req);
+}
+
+static int
+nvmf_fc_request_alloc_buffers(struct spdk_nvmf_fc_request *fc_req)
+{
+ uint32_t length = fc_req->req.length;
+ struct spdk_nvmf_fc_poll_group *fgroup = fc_req->hwqp->fgroup;
+ struct spdk_nvmf_transport_poll_group *group = &fgroup->group;
+ struct spdk_nvmf_transport *transport = group->transport;
+
+ if (spdk_nvmf_request_get_buffers(&fc_req->req, group, transport, length)) {
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int
+nvmf_fc_request_execute(struct spdk_nvmf_fc_request *fc_req)
+{
+ /* Allocate an XCHG if we dont use send frame for this command. */
+ if (!nvmf_fc_use_send_frame(&fc_req->req)) {
+ fc_req->xchg = nvmf_fc_get_xri(fc_req->hwqp);
+ if (!fc_req->xchg) {
+ fc_req->hwqp->counters.no_xchg++;
+ printf("NO XCHGs!\n");
+ goto pending;
+ }
+ }
+
+ if (fc_req->req.length) {
+ if (nvmf_fc_request_alloc_buffers(fc_req) < 0) {
+ fc_req->hwqp->counters.buf_alloc_err++;
+ goto pending;
+ }
+ fc_req->req.data = fc_req->req.iov[0].iov_base;
+ }
+
+ if (fc_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, "WRITE CMD.\n");
+
+ nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_WRITE_XFER);
+
+ if (nvmf_fc_recv_data(fc_req)) {
+ /* Dropped return success to caller */
+ fc_req->hwqp->counters.unexpected_err++;
+ _nvmf_fc_request_free(fc_req);
+ }
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, "READ/NONE CMD\n");
+
+ if (fc_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+ nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_READ_BDEV);
+ } else {
+ nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_NONE_BDEV);
+ }
+ spdk_nvmf_request_exec(&fc_req->req);
+ }
+
+ return 0;
+
+pending:
+ if (fc_req->xchg) {
+ nvmf_fc_put_xchg(fc_req->hwqp, fc_req->xchg);
+ fc_req->xchg = NULL;
+ }
+
+ nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_PENDING);
+
+ return -EAGAIN;
+}
+
+static int
+nvmf_fc_hwqp_handle_request(struct spdk_nvmf_fc_hwqp *hwqp, struct spdk_nvmf_fc_frame_hdr *frame,
+ uint32_t buf_idx, struct spdk_nvmf_fc_buffer_desc *buffer, uint32_t plen)
+{
+ uint16_t cmnd_len;
+ uint64_t rqst_conn_id;
+ struct spdk_nvmf_fc_request *fc_req = NULL;
+ struct spdk_nvmf_fc_cmnd_iu *cmd_iu = NULL;
+ struct spdk_nvmf_fc_conn *fc_conn = NULL;
+ enum spdk_nvme_data_transfer xfer;
+
+ cmd_iu = buffer->virt;
+ cmnd_len = cmd_iu->cmnd_iu_len;
+ cmnd_len = from_be16(&cmnd_len);
+
+ /* check for a valid cmnd_iu format */
+ if ((cmd_iu->fc_id != FCNVME_CMND_IU_FC_ID) ||
+ (cmd_iu->scsi_id != FCNVME_CMND_IU_SCSI_ID) ||
+ (cmnd_len != sizeof(struct spdk_nvmf_fc_cmnd_iu) / 4)) {
+ SPDK_ERRLOG("IU CMD error\n");
+ hwqp->counters.nvme_cmd_iu_err++;
+ return -ENXIO;
+ }
+
+ xfer = spdk_nvme_opc_get_data_transfer(cmd_iu->flags);
+ if (xfer == SPDK_NVME_DATA_BIDIRECTIONAL) {
+ SPDK_ERRLOG("IU CMD xfer error\n");
+ hwqp->counters.nvme_cmd_xfer_err++;
+ return -EPERM;
+ }
+
+ rqst_conn_id = from_be64(&cmd_iu->conn_id);
+
+ /* Check if conn id is valid */
+ fc_conn = nvmf_fc_hwqp_find_fc_conn(hwqp, rqst_conn_id);
+ if (!fc_conn) {
+ SPDK_ERRLOG("IU CMD conn(%ld) invalid\n", rqst_conn_id);
+ hwqp->counters.invalid_conn_err++;
+ return -ENODEV;
+ }
+
+ /* If association/connection is being deleted - return */
+ if (fc_conn->fc_assoc->assoc_state != SPDK_NVMF_FC_OBJECT_CREATED) {
+ SPDK_ERRLOG("Association state not valid\n");
+ return -EACCES;
+ }
+
+ if (fc_conn->qpair.state == SPDK_NVMF_QPAIR_ERROR) {
+ return -EACCES;
+ }
+
+ /* Make sure xfer len is according to mdts */
+ if (from_be32(&cmd_iu->data_len) >
+ hwqp->fgroup->group.transport->opts.max_io_size) {
+ SPDK_ERRLOG("IO length requested is greater than MDTS\n");
+ return -EINVAL;
+ }
+
+ /* allocate a request buffer */
+ fc_req = nvmf_fc_hwqp_alloc_fc_request(hwqp);
+ if (fc_req == NULL) {
+ /* Should not happen. Since fc_reqs == RQ buffers */
+ return -ENOMEM;
+ }
+
+ fc_req->req.length = from_be32(&cmd_iu->data_len);
+ fc_req->req.qpair = &fc_conn->qpair;
+ fc_req->req.cmd = (union nvmf_h2c_msg *)&cmd_iu->cmd;
+ fc_req->req.rsp = (union nvmf_c2h_msg *)&fc_req->ersp.rsp;
+ fc_req->oxid = frame->ox_id;
+ fc_req->oxid = from_be16(&fc_req->oxid);
+ fc_req->rpi = fc_conn->rpi;
+ fc_req->buf_index = buf_idx;
+ fc_req->poller_lcore = hwqp->lcore_id;
+ fc_req->poller_thread = hwqp->thread;
+ fc_req->hwqp = hwqp;
+ fc_req->fc_conn = fc_conn;
+ fc_req->req.xfer = xfer;
+ fc_req->s_id = (uint32_t)frame->s_id;
+ fc_req->d_id = (uint32_t)frame->d_id;
+ fc_req->s_id = from_be32(&fc_req->s_id) >> 8;
+ fc_req->d_id = from_be32(&fc_req->d_id) >> 8;
+
+ nvmf_fc_record_req_trace_point(fc_req, SPDK_NVMF_FC_REQ_INIT);
+ if (nvmf_fc_request_execute(fc_req)) {
+ STAILQ_INSERT_TAIL(&hwqp->fgroup->group.pending_buf_queue, &fc_req->req, buf_link);
+ }
+
+ return 0;
+}
+
+/*
+ * These functions are called from the FC LLD
+ */
+
+void
+_nvmf_fc_request_free(struct spdk_nvmf_fc_request *fc_req)
+{
+ struct spdk_nvmf_fc_hwqp *hwqp = fc_req->hwqp;
+ struct spdk_nvmf_fc_poll_group *fgroup = hwqp->fgroup;
+ struct spdk_nvmf_transport_poll_group *group = &fgroup->group;
+ struct spdk_nvmf_transport *transport = group->transport;
+
+ if (!fc_req) {
+ return;
+ }
+
+ if (fc_req->xchg) {
+ nvmf_fc_put_xchg(hwqp, fc_req->xchg);
+ fc_req->xchg = NULL;
+ }
+
+ /* Release IO buffers */
+ if (fc_req->req.data_from_pool) {
+ spdk_nvmf_request_free_buffers(&fc_req->req, group, transport);
+ }
+ fc_req->req.data = NULL;
+ fc_req->req.iovcnt = 0;
+
+ /* Release Q buffer */
+ nvmf_fc_rqpair_buffer_release(hwqp, fc_req->buf_index);
+
+ /* Free Fc request */
+ nvmf_fc_hwqp_free_fc_request(hwqp, fc_req);
+}
+
+void
+nvmf_fc_request_set_state(struct spdk_nvmf_fc_request *fc_req,
+ enum spdk_nvmf_fc_request_state state)
+{
+ assert(fc_req->magic != 0xDEADBEEF);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC,
+ "FC Request(%p):\n\tState Old:%s New:%s\n", fc_req,
+ nvmf_fc_request_get_state_str(fc_req->state),
+ nvmf_fc_request_get_state_str(state));
+ nvmf_fc_record_req_trace_point(fc_req, state);
+ fc_req->state = state;
+}
+
+char *
+nvmf_fc_request_get_state_str(int state)
+{
+ static char *unk_str = "unknown";
+
+ return (state >= 0 && state < (int)(sizeof(fc_req_state_strs) / sizeof(char *)) ?
+ fc_req_state_strs[state] : unk_str);
+}
+
+int
+nvmf_fc_hwqp_process_frame(struct spdk_nvmf_fc_hwqp *hwqp,
+ uint32_t buff_idx,
+ struct spdk_nvmf_fc_frame_hdr *frame,
+ struct spdk_nvmf_fc_buffer_desc *buffer,
+ uint32_t plen)
+{
+ int rc = 0;
+ uint32_t s_id, d_id;
+ struct spdk_nvmf_fc_nport *nport = NULL;
+ struct spdk_nvmf_fc_remote_port_info *rport = NULL;
+
+ s_id = (uint32_t)frame->s_id;
+ d_id = (uint32_t)frame->d_id;
+ s_id = from_be32(&s_id) >> 8;
+ d_id = from_be32(&d_id) >> 8;
+
+ /* Note: In tracelog below, we directly do endian conversion on rx_id and.
+ * ox_id Since these are fields, we can't pass address to from_be16().
+ * Since ox_id and rx_id are only needed for tracelog, assigning to local
+ * vars. and doing conversion is a waste of time in non-debug builds. */
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC,
+ "Process NVME frame s_id:0x%x d_id:0x%x oxid:0x%x rxid:0x%x.\n",
+ s_id, d_id,
+ ((frame->ox_id << 8) & 0xff00) | ((frame->ox_id >> 8) & 0xff),
+ ((frame->rx_id << 8) & 0xff00) | ((frame->rx_id >> 8) & 0xff));
+
+ rc = nvmf_fc_hwqp_find_nport_and_rport(hwqp, d_id, &nport, s_id, &rport);
+ if (rc) {
+ if (nport == NULL) {
+ SPDK_ERRLOG("Nport not found. Dropping\n");
+ /* increment invalid nport counter */
+ hwqp->counters.nport_invalid++;
+ } else if (rport == NULL) {
+ SPDK_ERRLOG("Rport not found. Dropping\n");
+ /* increment invalid rport counter */
+ hwqp->counters.rport_invalid++;
+ }
+ return rc;
+ }
+
+ if (nport->nport_state != SPDK_NVMF_FC_OBJECT_CREATED ||
+ rport->rport_state != SPDK_NVMF_FC_OBJECT_CREATED) {
+ SPDK_ERRLOG("%s state not created. Dropping\n",
+ nport->nport_state != SPDK_NVMF_FC_OBJECT_CREATED ?
+ "Nport" : "Rport");
+ return -EACCES;
+ }
+
+ if ((frame->r_ctl == FCNVME_R_CTL_LS_REQUEST) &&
+ (frame->type == FCNVME_TYPE_NVMF_DATA)) {
+ struct spdk_nvmf_fc_rq_buf_ls_request *req_buf = buffer->virt;
+ struct spdk_nvmf_fc_ls_rqst *ls_rqst;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, "Process LS NVME frame\n");
+
+ /* Use the RQ buffer for holding LS request. */
+ ls_rqst = (struct spdk_nvmf_fc_ls_rqst *)&req_buf->ls_rqst;
+
+ /* Fill in the LS request structure */
+ ls_rqst->rqstbuf.virt = (void *)&req_buf->rqst;
+ ls_rqst->rqstbuf.phys = buffer->phys +
+ offsetof(struct spdk_nvmf_fc_rq_buf_ls_request, rqst);
+ ls_rqst->rqstbuf.buf_index = buff_idx;
+ ls_rqst->rqst_len = plen;
+
+ ls_rqst->rspbuf.virt = (void *)&req_buf->resp;
+ ls_rqst->rspbuf.phys = buffer->phys +
+ offsetof(struct spdk_nvmf_fc_rq_buf_ls_request, resp);
+ ls_rqst->rsp_len = FCNVME_MAX_LS_RSP_SIZE;
+
+ ls_rqst->private_data = (void *)hwqp;
+ ls_rqst->rpi = rport->rpi;
+ ls_rqst->oxid = (uint16_t)frame->ox_id;
+ ls_rqst->oxid = from_be16(&ls_rqst->oxid);
+ ls_rqst->s_id = s_id;
+ ls_rqst->d_id = d_id;
+ ls_rqst->nport = nport;
+ ls_rqst->rport = rport;
+ ls_rqst->nvmf_tgt = g_nvmf_ftransport->transport.tgt;
+
+ ls_rqst->xchg = nvmf_fc_get_xri(hwqp);
+ if (ls_rqst->xchg) {
+ /* Handover the request to LS module */
+ nvmf_fc_handle_ls_rqst(ls_rqst);
+ } else {
+ /* No XCHG available. Add to pending list. */
+ hwqp->counters.no_xchg++;
+ TAILQ_INSERT_TAIL(&hwqp->ls_pending_queue, ls_rqst, ls_pending_link);
+ }
+ } else if ((frame->r_ctl == FCNVME_R_CTL_CMD_REQ) &&
+ (frame->type == FCNVME_TYPE_FC_EXCHANGE)) {
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, "Process IO NVME frame\n");
+ rc = nvmf_fc_hwqp_handle_request(hwqp, frame, buff_idx, buffer, plen);
+ } else {
+
+ SPDK_ERRLOG("Unknown frame received. Dropping\n");
+ hwqp->counters.unknown_frame++;
+ rc = -EINVAL;
+ }
+
+ return rc;
+}
+
+void
+nvmf_fc_hwqp_process_pending_reqs(struct spdk_nvmf_fc_hwqp *hwqp)
+{
+ struct spdk_nvmf_request *req = NULL, *tmp;
+ struct spdk_nvmf_fc_request *fc_req;
+ int budget = 64;
+
+ if (!hwqp->fgroup) {
+ /* LS queue is tied to acceptor_poll group and LS pending requests
+ * are stagged and processed using hwqp->ls_pending_queue.
+ */
+ return;
+ }
+
+ STAILQ_FOREACH_SAFE(req, &hwqp->fgroup->group.pending_buf_queue, buf_link, tmp) {
+ fc_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_fc_request, req);
+ if (!nvmf_fc_request_execute(fc_req)) {
+ /* Succesfuly posted, Delete from pending. */
+ STAILQ_REMOVE_HEAD(&hwqp->fgroup->group.pending_buf_queue, buf_link);
+ }
+
+ if (budget) {
+ budget--;
+ } else {
+ return;
+ }
+ }
+}
+
+void
+nvmf_fc_hwqp_process_pending_ls_rqsts(struct spdk_nvmf_fc_hwqp *hwqp)
+{
+ struct spdk_nvmf_fc_ls_rqst *ls_rqst = NULL, *tmp;
+ struct spdk_nvmf_fc_nport *nport = NULL;
+ struct spdk_nvmf_fc_remote_port_info *rport = NULL;
+
+ TAILQ_FOREACH_SAFE(ls_rqst, &hwqp->ls_pending_queue, ls_pending_link, tmp) {
+ /* lookup nport and rport again - make sure they are still valid */
+ int rc = nvmf_fc_hwqp_find_nport_and_rport(hwqp, ls_rqst->d_id, &nport, ls_rqst->s_id, &rport);
+ if (rc) {
+ if (nport == NULL) {
+ SPDK_ERRLOG("Nport not found. Dropping\n");
+ /* increment invalid nport counter */
+ hwqp->counters.nport_invalid++;
+ } else if (rport == NULL) {
+ SPDK_ERRLOG("Rport not found. Dropping\n");
+ /* increment invalid rport counter */
+ hwqp->counters.rport_invalid++;
+ }
+ TAILQ_REMOVE(&hwqp->ls_pending_queue, ls_rqst, ls_pending_link);
+ /* Return buffer to chip */
+ nvmf_fc_rqpair_buffer_release(hwqp, ls_rqst->rqstbuf.buf_index);
+ continue;
+ }
+ if (nport->nport_state != SPDK_NVMF_FC_OBJECT_CREATED ||
+ rport->rport_state != SPDK_NVMF_FC_OBJECT_CREATED) {
+ SPDK_ERRLOG("%s state not created. Dropping\n",
+ nport->nport_state != SPDK_NVMF_FC_OBJECT_CREATED ?
+ "Nport" : "Rport");
+ TAILQ_REMOVE(&hwqp->ls_pending_queue, ls_rqst, ls_pending_link);
+ /* Return buffer to chip */
+ nvmf_fc_rqpair_buffer_release(hwqp, ls_rqst->rqstbuf.buf_index);
+ continue;
+ }
+
+ ls_rqst->xchg = nvmf_fc_get_xri(hwqp);
+ if (ls_rqst->xchg) {
+ /* Got an XCHG */
+ TAILQ_REMOVE(&hwqp->ls_pending_queue, ls_rqst, ls_pending_link);
+ /* Handover the request to LS module */
+ nvmf_fc_handle_ls_rqst(ls_rqst);
+ } else {
+ /* No more XCHGs. Stop processing. */
+ hwqp->counters.no_xchg++;
+ return;
+ }
+ }
+}
+
+int
+nvmf_fc_handle_rsp(struct spdk_nvmf_fc_request *fc_req)
+{
+ int rc = 0;
+ struct spdk_nvmf_request *req = &fc_req->req;
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ struct spdk_nvmf_fc_conn *fc_conn = nvmf_fc_get_conn(qpair);
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ uint16_t ersp_len = 0;
+
+ /* set sq head value in resp */
+ rsp->sqhd = nvmf_fc_advance_conn_sqhead(qpair);
+
+ /* Increment connection responses */
+ fc_conn->rsp_count++;
+
+ if (nvmf_fc_send_ersp_required(fc_req, fc_conn->rsp_count,
+ fc_req->transfered_len)) {
+ /* Fill ERSP Len */
+ to_be16(&ersp_len, (sizeof(struct spdk_nvmf_fc_ersp_iu) /
+ sizeof(uint32_t)));
+ fc_req->ersp.ersp_len = ersp_len;
+
+ /* Fill RSN */
+ to_be32(&fc_req->ersp.response_seq_no, fc_conn->rsn);
+ fc_conn->rsn++;
+
+ /* Fill transfer length */
+ to_be32(&fc_req->ersp.transferred_data_len, fc_req->transfered_len);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, "Posting ERSP.\n");
+ rc = nvmf_fc_xmt_rsp(fc_req, (uint8_t *)&fc_req->ersp,
+ sizeof(struct spdk_nvmf_fc_ersp_iu));
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, "Posting RSP.\n");
+ rc = nvmf_fc_xmt_rsp(fc_req, NULL, 0);
+ }
+
+ return rc;
+}
+
+bool
+nvmf_fc_send_ersp_required(struct spdk_nvmf_fc_request *fc_req,
+ uint32_t rsp_cnt, uint32_t xfer_len)
+{
+ struct spdk_nvmf_request *req = &fc_req->req;
+ struct spdk_nvmf_qpair *qpair = req->qpair;
+ struct spdk_nvmf_fc_conn *fc_conn = nvmf_fc_get_conn(qpair);
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+ uint16_t status = *((uint16_t *)&rsp->status);
+
+ /*
+ * Check if we need to send ERSP
+ * 1) For every N responses where N == ersp_ratio
+ * 2) Fabric commands.
+ * 3) Completion status failed or Completion dw0 or dw1 valid.
+ * 4) SQ == 90% full.
+ * 5) Transfer length not equal to CMD IU length
+ */
+
+ if (!(rsp_cnt % fc_conn->esrp_ratio) ||
+ (cmd->opc == SPDK_NVME_OPC_FABRIC) ||
+ (status & 0xFFFE) || rsp->cdw0 || rsp->rsvd1 ||
+ (req->length != xfer_len)) {
+ return true;
+ }
+ return false;
+}
+
+static int
+nvmf_fc_request_complete(struct spdk_nvmf_request *req)
+{
+ int rc = 0;
+ struct spdk_nvmf_fc_request *fc_req = nvmf_fc_get_fc_req(req);
+ struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+ if (fc_req->is_aborted) {
+ /* Defer this to make sure we dont call io cleanup in same context. */
+ nvmf_fc_poller_api_func(fc_req->hwqp, SPDK_NVMF_FC_POLLER_API_REQ_ABORT_COMPLETE,
+ (void *)fc_req);
+ } else if (rsp->status.sc == SPDK_NVME_SC_SUCCESS &&
+ req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+
+ nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_READ_XFER);
+
+ rc = nvmf_fc_send_data(fc_req);
+ } else {
+ if (req->xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
+ nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_WRITE_RSP);
+ } else if (req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+ nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_READ_RSP);
+ } else {
+ nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_NONE_RSP);
+ }
+
+ rc = nvmf_fc_handle_rsp(fc_req);
+ }
+
+ if (rc) {
+ SPDK_ERRLOG("Error in request complete.\n");
+ _nvmf_fc_request_free(fc_req);
+ }
+ return 0;
+}
+
+struct spdk_nvmf_tgt *
+nvmf_fc_get_tgt(void)
+{
+ if (g_nvmf_ftransport) {
+ return g_nvmf_ftransport->transport.tgt;
+ }
+ return NULL;
+}
+
+/*
+ * FC Transport Public API begins here
+ */
+
+#define SPDK_NVMF_FC_DEFAULT_MAX_QUEUE_DEPTH 128
+#define SPDK_NVMF_FC_DEFAULT_AQ_DEPTH 32
+#define SPDK_NVMF_FC_DEFAULT_MAX_QPAIRS_PER_CTRLR 5
+#define SPDK_NVMF_FC_DEFAULT_IN_CAPSULE_DATA_SIZE 0
+#define SPDK_NVMF_FC_DEFAULT_MAX_IO_SIZE 65536
+#define SPDK_NVMF_FC_DEFAULT_IO_UNIT_SIZE 4096
+#define SPDK_NVMF_FC_DEFAULT_NUM_SHARED_BUFFERS 8192
+#define SPDK_NVMF_FC_DEFAULT_MAX_SGE (SPDK_NVMF_FC_DEFAULT_MAX_IO_SIZE / \
+ SPDK_NVMF_FC_DEFAULT_IO_UNIT_SIZE)
+
+static void
+nvmf_fc_opts_init(struct spdk_nvmf_transport_opts *opts)
+{
+ opts->max_queue_depth = SPDK_NVMF_FC_DEFAULT_MAX_QUEUE_DEPTH;
+ opts->max_qpairs_per_ctrlr = SPDK_NVMF_FC_DEFAULT_MAX_QPAIRS_PER_CTRLR;
+ opts->in_capsule_data_size = SPDK_NVMF_FC_DEFAULT_IN_CAPSULE_DATA_SIZE;
+ opts->max_io_size = SPDK_NVMF_FC_DEFAULT_MAX_IO_SIZE;
+ opts->io_unit_size = SPDK_NVMF_FC_DEFAULT_IO_UNIT_SIZE;
+ opts->max_aq_depth = SPDK_NVMF_FC_DEFAULT_AQ_DEPTH;
+ opts->num_shared_buffers = SPDK_NVMF_FC_DEFAULT_NUM_SHARED_BUFFERS;
+}
+
+static struct spdk_nvmf_transport *
+nvmf_fc_create(struct spdk_nvmf_transport_opts *opts)
+{
+ uint32_t sge_count;
+
+ SPDK_INFOLOG(SPDK_LOG_NVMF_FC, "*** FC Transport Init ***\n"
+ " Transport opts: max_ioq_depth=%d, max_io_size=%d,\n"
+ " max_io_qpairs_per_ctrlr=%d, io_unit_size=%d,\n"
+ " max_aq_depth=%d\n",
+ opts->max_queue_depth,
+ opts->max_io_size,
+ opts->max_qpairs_per_ctrlr - 1,
+ opts->io_unit_size,
+ opts->max_aq_depth);
+
+ if (g_nvmf_ftransport) {
+ SPDK_ERRLOG("Duplicate NVMF-FC transport create request!\n");
+ return NULL;
+ }
+
+ if (spdk_env_get_last_core() < 1) {
+ SPDK_ERRLOG("Not enough cores/threads (%d) to run NVMF-FC transport!\n",
+ spdk_env_get_last_core() + 1);
+ return NULL;
+ }
+
+ sge_count = opts->max_io_size / opts->io_unit_size;
+ if (sge_count > SPDK_NVMF_FC_DEFAULT_MAX_SGE) {
+ SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size);
+ return NULL;
+ }
+
+ g_nvmf_fc_master_thread = spdk_get_thread();
+ g_nvmf_fgroup_count = 0;
+ g_nvmf_ftransport = calloc(1, sizeof(*g_nvmf_ftransport));
+
+ if (!g_nvmf_ftransport) {
+ SPDK_ERRLOG("Failed to allocate NVMF-FC transport\n");
+ return NULL;
+ }
+
+ if (pthread_mutex_init(&g_nvmf_ftransport->lock, NULL)) {
+ SPDK_ERRLOG("pthread_mutex_init() failed\n");
+ free(g_nvmf_ftransport);
+ g_nvmf_ftransport = NULL;
+ return NULL;
+ }
+
+ /* initialize the low level FC driver */
+ nvmf_fc_lld_init();
+
+ return &g_nvmf_ftransport->transport;
+}
+
+static int
+nvmf_fc_destroy(struct spdk_nvmf_transport *transport)
+{
+ if (transport) {
+ struct spdk_nvmf_fc_transport *ftransport;
+ struct spdk_nvmf_fc_poll_group *fgroup, *pg_tmp;
+
+ ftransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_fc_transport, transport);
+
+ free(ftransport);
+
+ /* clean up any FC poll groups still around */
+ TAILQ_FOREACH_SAFE(fgroup, &g_nvmf_fgroups, link, pg_tmp) {
+ TAILQ_REMOVE(&g_nvmf_fgroups, fgroup, link);
+ free(fgroup);
+ }
+ g_nvmf_fgroup_count = 0;
+
+ /* low level FC driver clean up */
+ nvmf_fc_lld_fini();
+
+ nvmf_fc_port_cleanup();
+ }
+
+ return 0;
+}
+
+static int
+nvmf_fc_listen(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid)
+{
+ return 0;
+}
+
+static void
+nvmf_fc_stop_listen(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *_trid)
+{
+}
+
+static uint32_t
+nvmf_fc_accept(struct spdk_nvmf_transport *transport)
+{
+ struct spdk_nvmf_fc_port *fc_port = NULL;
+ uint32_t count = 0;
+ static bool start_lld = false;
+
+ if (spdk_unlikely(!start_lld)) {
+ start_lld = true;
+ nvmf_fc_lld_start();
+ }
+
+ /* poll the LS queue on each port */
+ TAILQ_FOREACH(fc_port, &g_spdk_nvmf_fc_port_list, link) {
+ if (fc_port->hw_port_status == SPDK_FC_PORT_ONLINE) {
+ count += nvmf_fc_process_queue(&fc_port->ls_queue);
+ }
+ }
+
+ return count;
+}
+
+static void
+nvmf_fc_discover(struct spdk_nvmf_transport *transport,
+ struct spdk_nvme_transport_id *trid,
+ struct spdk_nvmf_discovery_log_page_entry *entry)
+{
+ entry->trtype = (enum spdk_nvme_transport_type) SPDK_NVMF_TRTYPE_FC;
+ entry->adrfam = trid->adrfam;
+ entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_SPECIFIED;
+
+ spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' ');
+ spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' ');
+}
+
+static struct spdk_nvmf_transport_poll_group *
+nvmf_fc_poll_group_create(struct spdk_nvmf_transport *transport)
+{
+ struct spdk_nvmf_fc_poll_group *fgroup;
+ struct spdk_nvmf_fc_transport *ftransport =
+ SPDK_CONTAINEROF(transport, struct spdk_nvmf_fc_transport, transport);
+
+ fgroup = calloc(1, sizeof(struct spdk_nvmf_fc_poll_group));
+ if (!fgroup) {
+ SPDK_ERRLOG("Unable to alloc FC poll group\n");
+ return NULL;
+ }
+
+ TAILQ_INIT(&fgroup->hwqp_list);
+
+ pthread_mutex_lock(&ftransport->lock);
+ TAILQ_INSERT_TAIL(&g_nvmf_fgroups, fgroup, link);
+ g_nvmf_fgroup_count++;
+ pthread_mutex_unlock(&ftransport->lock);
+
+ return &fgroup->group;
+}
+
+static void
+nvmf_fc_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
+{
+ struct spdk_nvmf_fc_poll_group *fgroup;
+ struct spdk_nvmf_fc_transport *ftransport =
+ SPDK_CONTAINEROF(group->transport, struct spdk_nvmf_fc_transport, transport);
+
+ fgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_fc_poll_group, group);
+ pthread_mutex_lock(&ftransport->lock);
+ TAILQ_REMOVE(&g_nvmf_fgroups, fgroup, link);
+ g_nvmf_fgroup_count--;
+ pthread_mutex_unlock(&ftransport->lock);
+
+ free(fgroup);
+}
+
+static int
+nvmf_fc_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_fc_poll_group *fgroup;
+ struct spdk_nvmf_fc_conn *fc_conn;
+ struct spdk_nvmf_fc_hwqp *hwqp = NULL;
+ struct spdk_nvmf_fc_ls_add_conn_api_data *api_data = NULL;
+ bool hwqp_found = false;
+
+ fgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_fc_poll_group, group);
+ fc_conn = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_fc_conn, qpair);
+
+ TAILQ_FOREACH(hwqp, &fgroup->hwqp_list, link) {
+ if (fc_conn->fc_assoc->tgtport->fc_port == hwqp->fc_port) {
+ hwqp_found = true;
+ break;
+ }
+ }
+
+ if (!hwqp_found) {
+ SPDK_ERRLOG("No valid hwqp found for new QP.\n");
+ goto err;
+ }
+
+ if (!nvmf_fc_assign_conn_to_hwqp(hwqp,
+ &fc_conn->conn_id,
+ fc_conn->max_queue_depth)) {
+ SPDK_ERRLOG("Failed to get a connection id for new QP.\n");
+ goto err;
+ }
+
+ fc_conn->hwqp = hwqp;
+
+ /* If this is for ADMIN connection, then update assoc ID. */
+ if (fc_conn->qpair.qid == 0) {
+ fc_conn->fc_assoc->assoc_id = fc_conn->conn_id;
+ }
+
+ api_data = &fc_conn->create_opd->u.add_conn;
+ nvmf_fc_poller_api_func(hwqp, SPDK_NVMF_FC_POLLER_API_ADD_CONNECTION, &api_data->args);
+ return 0;
+err:
+ return -1;
+}
+
+static int
+nvmf_fc_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
+{
+ uint32_t count = 0;
+ struct spdk_nvmf_fc_poll_group *fgroup;
+ struct spdk_nvmf_fc_hwqp *hwqp;
+
+ fgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_fc_poll_group, group);
+
+ TAILQ_FOREACH(hwqp, &fgroup->hwqp_list, link) {
+ if (hwqp->state == SPDK_FC_HWQP_ONLINE) {
+ count += nvmf_fc_process_queue(hwqp);
+ }
+ }
+
+ return (int) count;
+}
+
+static int
+nvmf_fc_request_free(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_fc_request *fc_req = nvmf_fc_get_fc_req(req);
+
+ if (!fc_req->is_aborted) {
+ nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_BDEV_ABORTED);
+ nvmf_fc_request_abort(fc_req, true, NULL, NULL);
+ } else {
+ nvmf_fc_request_abort_complete(fc_req);
+ }
+ return 0;
+}
+
+
+static void
+nvmf_fc_close_qpair(struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_fc_conn *fc_conn;
+
+ fc_conn = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_fc_conn, qpair);
+
+ if (fc_conn->conn_id == NVMF_FC_INVALID_CONN_ID) {
+ /* QP creation failure in FC tranport. Cleanup. */
+ spdk_thread_send_msg(nvmf_fc_get_master_thread(),
+ nvmf_fc_handle_connection_failure, fc_conn);
+ } else if (fc_conn->fc_assoc->assoc_id == fc_conn->conn_id &&
+ fc_conn->fc_assoc->assoc_state != SPDK_NVMF_FC_OBJECT_TO_BE_DELETED) {
+ /* Admin connection */
+ spdk_thread_send_msg(nvmf_fc_get_master_thread(),
+ nvmf_fc_handle_assoc_deletion, fc_conn);
+ }
+}
+
+static int
+nvmf_fc_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_fc_conn *fc_conn;
+
+ fc_conn = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_fc_conn, qpair);
+ memcpy(trid, &fc_conn->trid, sizeof(struct spdk_nvme_transport_id));
+ return 0;
+}
+
+static int
+nvmf_fc_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_fc_conn *fc_conn;
+
+ fc_conn = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_fc_conn, qpair);
+ memcpy(trid, &fc_conn->trid, sizeof(struct spdk_nvme_transport_id));
+ return 0;
+}
+
+static int
+nvmf_fc_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_fc_conn *fc_conn;
+
+ fc_conn = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_fc_conn, qpair);
+ memcpy(trid, &fc_conn->trid, sizeof(struct spdk_nvme_transport_id));
+ return 0;
+}
+
+static void
+nvmf_fc_qpair_abort_request(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvmf_request *req)
+{
+ spdk_nvmf_request_complete(req);
+}
+
+const struct spdk_nvmf_transport_ops spdk_nvmf_transport_fc = {
+ .name = "FC",
+ .type = (enum spdk_nvme_transport_type) SPDK_NVMF_TRTYPE_FC,
+ .opts_init = nvmf_fc_opts_init,
+ .create = nvmf_fc_create,
+ .destroy = nvmf_fc_destroy,
+
+ .listen = nvmf_fc_listen,
+ .stop_listen = nvmf_fc_stop_listen,
+ .accept = nvmf_fc_accept,
+
+ .listener_discover = nvmf_fc_discover,
+
+ .poll_group_create = nvmf_fc_poll_group_create,
+ .poll_group_destroy = nvmf_fc_poll_group_destroy,
+ .poll_group_add = nvmf_fc_poll_group_add,
+ .poll_group_poll = nvmf_fc_poll_group_poll,
+
+ .req_complete = nvmf_fc_request_complete,
+ .req_free = nvmf_fc_request_free,
+ .qpair_fini = nvmf_fc_close_qpair,
+ .qpair_get_peer_trid = nvmf_fc_qpair_get_peer_trid,
+ .qpair_get_local_trid = nvmf_fc_qpair_get_local_trid,
+ .qpair_get_listen_trid = nvmf_fc_qpair_get_listen_trid,
+ .qpair_abort_request = nvmf_fc_qpair_abort_request,
+};
+
+/*
+ * Re-initialize the FC-Port after an offline event.
+ * Only the queue information needs to be populated. XCHG, lcore and other hwqp information remains
+ * unchanged after the first initialization.
+ *
+ */
+static int
+nvmf_fc_adm_hw_port_reinit_validate(struct spdk_nvmf_fc_port *fc_port,
+ struct spdk_nvmf_fc_hw_port_init_args *args)
+{
+ uint32_t i;
+
+ /* Verify that the port was previously in offline or quiesced state */
+ if (nvmf_fc_port_is_online(fc_port)) {
+ SPDK_ERRLOG("SPDK FC port %d already initialized and online.\n", args->port_handle);
+ return -EINVAL;
+ }
+
+ /* Reinit information in new LS queue from previous queue */
+ nvmf_fc_hwqp_reinit_poller_queues(&fc_port->ls_queue, args->ls_queue);
+
+ fc_port->fcp_rq_id = args->fcp_rq_id;
+
+ /* Initialize the LS queue */
+ fc_port->ls_queue.queues = args->ls_queue;
+ nvmf_fc_init_poller_queues(fc_port->ls_queue.queues);
+
+ for (i = 0; i < fc_port->num_io_queues; i++) {
+ /* Reinit information in new IO queue from previous queue */
+ nvmf_fc_hwqp_reinit_poller_queues(&fc_port->io_queues[i],
+ args->io_queues[i]);
+ fc_port->io_queues[i].queues = args->io_queues[i];
+ /* Initialize the IO queues */
+ nvmf_fc_init_poller_queues(fc_port->io_queues[i].queues);
+ }
+
+ fc_port->hw_port_status = SPDK_FC_PORT_OFFLINE;
+
+ /* Validate the port information */
+ DEV_VERIFY(TAILQ_EMPTY(&fc_port->nport_list));
+ DEV_VERIFY(fc_port->num_nports == 0);
+ if (!TAILQ_EMPTY(&fc_port->nport_list) || (fc_port->num_nports != 0)) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Initializes the data for the creation of a FC-Port object in the SPDK
+ * library. The spdk_nvmf_fc_port is a well defined structure that is part of
+ * the API to the library. The contents added to this well defined structure
+ * is private to each vendors implementation.
+ */
+static int
+nvmf_fc_adm_hw_port_data_init(struct spdk_nvmf_fc_port *fc_port,
+ struct spdk_nvmf_fc_hw_port_init_args *args)
+{
+ /* Used a high number for the LS HWQP so that it does not clash with the
+ * IO HWQP's and immediately shows a LS queue during tracing.
+ */
+ uint32_t i;
+
+ fc_port->port_hdl = args->port_handle;
+ fc_port->hw_port_status = SPDK_FC_PORT_OFFLINE;
+ fc_port->fcp_rq_id = args->fcp_rq_id;
+ fc_port->num_io_queues = args->io_queue_cnt;
+
+ /*
+ * Set port context from init args. Used for FCP port stats.
+ */
+ fc_port->port_ctx = args->port_ctx;
+
+ /*
+ * Initialize the LS queue wherever needed.
+ */
+ fc_port->ls_queue.queues = args->ls_queue;
+ fc_port->ls_queue.thread = nvmf_fc_get_master_thread();
+ fc_port->ls_queue.hwqp_id = SPDK_MAX_NUM_OF_FC_PORTS * fc_port->num_io_queues;
+
+ /*
+ * Initialize the LS queue.
+ */
+ nvmf_fc_init_hwqp(fc_port, &fc_port->ls_queue);
+
+ /*
+ * Initialize the IO queues.
+ */
+ for (i = 0; i < args->io_queue_cnt; i++) {
+ struct spdk_nvmf_fc_hwqp *hwqp = &fc_port->io_queues[i];
+ hwqp->hwqp_id = i;
+ hwqp->queues = args->io_queues[i];
+ hwqp->rq_size = args->io_queue_size;
+ nvmf_fc_init_hwqp(fc_port, hwqp);
+ }
+
+ /*
+ * Initialize the LS processing for port
+ */
+ nvmf_fc_ls_init(fc_port);
+
+ /*
+ * Initialize the list of nport on this HW port.
+ */
+ TAILQ_INIT(&fc_port->nport_list);
+ fc_port->num_nports = 0;
+
+ return 0;
+}
+
+static void
+nvmf_fc_adm_port_hwqp_offline_del_poller(struct spdk_nvmf_fc_port *fc_port)
+{
+ struct spdk_nvmf_fc_hwqp *hwqp = NULL;
+ int i = 0;
+
+ hwqp = &fc_port->ls_queue;
+ (void)nvmf_fc_hwqp_set_offline(hwqp);
+
+ /* Remove poller for all the io queues. */
+ for (i = 0; i < (int)fc_port->num_io_queues; i++) {
+ hwqp = &fc_port->io_queues[i];
+ (void)nvmf_fc_hwqp_set_offline(hwqp);
+ nvmf_fc_poll_group_remove_hwqp(hwqp);
+ }
+}
+
+/*
+ * Callback function for HW port link break operation.
+ *
+ * Notice that this callback is being triggered when spdk_fc_nport_delete()
+ * completes, if that spdk_fc_nport_delete() called is issued by
+ * nvmf_fc_adm_evnt_hw_port_link_break().
+ *
+ * Since nvmf_fc_adm_evnt_hw_port_link_break() can invoke spdk_fc_nport_delete() multiple
+ * times (one per nport in the HW port's nport_list), a single call to
+ * nvmf_fc_adm_evnt_hw_port_link_break() can result in multiple calls to this callback function.
+ *
+ * As a result, this function only invokes a callback to the caller of
+ * nvmf_fc_adm_evnt_hw_port_link_break() only when the HW port's nport_list is empty.
+ */
+static void
+nvmf_fc_adm_hw_port_link_break_cb(uint8_t port_handle,
+ enum spdk_fc_event event_type, void *cb_args, int spdk_err)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct spdk_nvmf_fc_adm_port_link_break_cb_data *offline_cb_args = cb_args;
+ struct spdk_nvmf_hw_port_link_break_args *offline_args = NULL;
+ spdk_nvmf_fc_callback cb_func = NULL;
+ int err = 0;
+ struct spdk_nvmf_fc_port *fc_port = NULL;
+ int num_nports = 0;
+ char log_str[256];
+
+ if (0 != spdk_err) {
+ DEV_VERIFY(!"port link break cb: spdk_err not success.");
+ SPDK_ERRLOG("port link break cb: spdk_err:%d.\n", spdk_err);
+ goto out;
+ }
+
+ if (!offline_cb_args) {
+ DEV_VERIFY(!"port link break cb: port_offline_args is NULL.");
+ err = -EINVAL;
+ goto out;
+ }
+
+ offline_args = offline_cb_args->args;
+ if (!offline_args) {
+ DEV_VERIFY(!"port link break cb: offline_args is NULL.");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (port_handle != offline_args->port_handle) {
+ DEV_VERIFY(!"port link break cb: port_handle mismatch.");
+ err = -EINVAL;
+ goto out;
+ }
+
+ cb_func = offline_cb_args->cb_func;
+ if (!cb_func) {
+ DEV_VERIFY(!"port link break cb: cb_func is NULL.");
+ err = -EINVAL;
+ goto out;
+ }
+
+ fc_port = nvmf_fc_port_lookup(port_handle);
+ if (!fc_port) {
+ DEV_VERIFY(!"port link break cb: fc_port is NULL.");
+ SPDK_ERRLOG("port link break cb: Unable to find port:%d\n",
+ offline_args->port_handle);
+ err = -EINVAL;
+ goto out;
+ }
+
+ num_nports = fc_port->num_nports;
+ if (!TAILQ_EMPTY(&fc_port->nport_list)) {
+ /*
+ * Don't call the callback unless all nports have been deleted.
+ */
+ goto out;
+ }
+
+ if (num_nports != 0) {
+ DEV_VERIFY(!"port link break cb: num_nports in non-zero.");
+ SPDK_ERRLOG("port link break cb: # of ports should be 0. Instead, num_nports:%d\n",
+ num_nports);
+ err = -EINVAL;
+ }
+
+ /*
+ * Mark the hwqps as offline and unregister the pollers.
+ */
+ (void)nvmf_fc_adm_port_hwqp_offline_del_poller(fc_port);
+
+ /*
+ * Since there are no more nports, execute the callback(s).
+ */
+ (void)cb_func(port_handle, SPDK_FC_LINK_BREAK,
+ (void *)offline_args->cb_ctx, spdk_err);
+
+out:
+ free(offline_cb_args);
+
+ snprintf(log_str, sizeof(log_str),
+ "port link break cb: port:%d evt_type:%d num_nports:%d err:%d spdk_err:%d.\n",
+ port_handle, event_type, num_nports, err, spdk_err);
+
+ if (err != 0) {
+ SPDK_ERRLOG("%s", log_str);
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "%s", log_str);
+ }
+ return;
+}
+
+/*
+ * FC port must have all its nports deleted before transitioning to offline state.
+ */
+static void
+nvmf_fc_adm_hw_port_offline_nport_delete(struct spdk_nvmf_fc_port *fc_port)
+{
+ struct spdk_nvmf_fc_nport *nport = NULL;
+ /* All nports must have been deleted at this point for this fc port */
+ DEV_VERIFY(fc_port && TAILQ_EMPTY(&fc_port->nport_list));
+ DEV_VERIFY(fc_port->num_nports == 0);
+ /* Mark the nport states to be zombie, if they exist */
+ if (fc_port && !TAILQ_EMPTY(&fc_port->nport_list)) {
+ TAILQ_FOREACH(nport, &fc_port->nport_list, link) {
+ (void)nvmf_fc_nport_set_state(nport, SPDK_NVMF_FC_OBJECT_ZOMBIE);
+ }
+ }
+}
+
+static void
+nvmf_fc_adm_i_t_delete_cb(void *args, uint32_t err)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct spdk_nvmf_fc_adm_i_t_del_cb_data *cb_data = args;
+ struct spdk_nvmf_fc_nport *nport = cb_data->nport;
+ struct spdk_nvmf_fc_remote_port_info *rport = cb_data->rport;
+ spdk_nvmf_fc_callback cb_func = cb_data->fc_cb_func;
+ int spdk_err = 0;
+ uint8_t port_handle = cb_data->port_handle;
+ uint32_t s_id = rport->s_id;
+ uint32_t rpi = rport->rpi;
+ uint32_t assoc_count = rport->assoc_count;
+ uint32_t nport_hdl = nport->nport_hdl;
+ uint32_t d_id = nport->d_id;
+ char log_str[256];
+
+ /*
+ * Assert on any delete failure.
+ */
+ if (0 != err) {
+ DEV_VERIFY(!"Error in IT Delete callback.");
+ goto out;
+ }
+
+ if (cb_func != NULL) {
+ (void)cb_func(port_handle, SPDK_FC_IT_DELETE, cb_data->fc_cb_ctx, spdk_err);
+ }
+
+out:
+ free(cb_data);
+
+ snprintf(log_str, sizeof(log_str),
+ "IT delete assoc_cb on nport %d done, port_handle:%d s_id:%d d_id:%d rpi:%d rport_assoc_count:%d rc = %d.\n",
+ nport_hdl, port_handle, s_id, d_id, rpi, assoc_count, err);
+
+ if (err != 0) {
+ SPDK_ERRLOG("%s", log_str);
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "%s", log_str);
+ }
+}
+
+static void
+nvmf_fc_adm_i_t_delete_assoc_cb(void *args, uint32_t err)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct spdk_nvmf_fc_adm_i_t_del_assoc_cb_data *cb_data = args;
+ struct spdk_nvmf_fc_nport *nport = cb_data->nport;
+ struct spdk_nvmf_fc_remote_port_info *rport = cb_data->rport;
+ spdk_nvmf_fc_adm_i_t_delete_assoc_cb_fn cb_func = cb_data->cb_func;
+ uint32_t s_id = rport->s_id;
+ uint32_t rpi = rport->rpi;
+ uint32_t assoc_count = rport->assoc_count;
+ uint32_t nport_hdl = nport->nport_hdl;
+ uint32_t d_id = nport->d_id;
+ char log_str[256];
+
+ /*
+ * Assert on any association delete failure. We continue to delete other
+ * associations in promoted builds.
+ */
+ if (0 != err) {
+ DEV_VERIFY(!"Nport's association delete callback returned error");
+ if (nport->assoc_count > 0) {
+ nport->assoc_count--;
+ }
+ if (rport->assoc_count > 0) {
+ rport->assoc_count--;
+ }
+ }
+
+ /*
+ * If this is the last association being deleted for the ITN,
+ * execute the callback(s).
+ */
+ if (0 == rport->assoc_count) {
+ /* Remove the rport from the remote port list. */
+ if (nvmf_fc_nport_remove_rem_port(nport, rport) != 0) {
+ SPDK_ERRLOG("Error while removing rport from list.\n");
+ DEV_VERIFY(!"Error while removing rport from list.");
+ }
+
+ if (cb_func != NULL) {
+ /*
+ * Callback function is provided by the caller
+ * of nvmf_fc_adm_i_t_delete_assoc().
+ */
+ (void)cb_func(cb_data->cb_ctx, 0);
+ }
+ free(rport);
+ free(args);
+ }
+
+ snprintf(log_str, sizeof(log_str),
+ "IT delete assoc_cb on nport %d done, s_id:%d d_id:%d rpi:%d rport_assoc_count:%d err = %d.\n",
+ nport_hdl, s_id, d_id, rpi, assoc_count, err);
+
+ if (err != 0) {
+ SPDK_ERRLOG("%s", log_str);
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "%s", log_str);
+ }
+}
+
+/**
+ * Process a IT delete.
+ */
+static void
+nvmf_fc_adm_i_t_delete_assoc(struct spdk_nvmf_fc_nport *nport,
+ struct spdk_nvmf_fc_remote_port_info *rport,
+ spdk_nvmf_fc_adm_i_t_delete_assoc_cb_fn cb_func,
+ void *cb_ctx)
+{
+ int err = 0;
+ struct spdk_nvmf_fc_association *assoc = NULL;
+ int assoc_err = 0;
+ uint32_t num_assoc = 0;
+ uint32_t num_assoc_del_scheduled = 0;
+ struct spdk_nvmf_fc_adm_i_t_del_assoc_cb_data *cb_data = NULL;
+ uint8_t port_hdl = nport->port_hdl;
+ uint32_t s_id = rport->s_id;
+ uint32_t rpi = rport->rpi;
+ uint32_t assoc_count = rport->assoc_count;
+ char log_str[256];
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "IT delete associations on nport:%d begin.\n",
+ nport->nport_hdl);
+
+ /*
+ * Allocate memory for callback data.
+ * This memory will be freed by the callback function.
+ */
+ cb_data = calloc(1, sizeof(struct spdk_nvmf_fc_adm_i_t_del_assoc_cb_data));
+ if (NULL == cb_data) {
+ SPDK_ERRLOG("Failed to allocate memory for cb_data on nport:%d.\n", nport->nport_hdl);
+ err = -ENOMEM;
+ goto out;
+ }
+ cb_data->nport = nport;
+ cb_data->rport = rport;
+ cb_data->port_handle = port_hdl;
+ cb_data->cb_func = cb_func;
+ cb_data->cb_ctx = cb_ctx;
+
+ /*
+ * Delete all associations, if any, related with this ITN/remote_port.
+ */
+ TAILQ_FOREACH(assoc, &nport->fc_associations, link) {
+ num_assoc++;
+ if (assoc->s_id == s_id) {
+ assoc_err = nvmf_fc_delete_association(nport,
+ assoc->assoc_id,
+ false /* send abts */, false,
+ nvmf_fc_adm_i_t_delete_assoc_cb, cb_data);
+ if (0 != assoc_err) {
+ /*
+ * Mark this association as zombie.
+ */
+ err = -EINVAL;
+ DEV_VERIFY(!"Error while deleting association");
+ (void)nvmf_fc_assoc_set_state(assoc, SPDK_NVMF_FC_OBJECT_ZOMBIE);
+ } else {
+ num_assoc_del_scheduled++;
+ }
+ }
+ }
+
+out:
+ if ((cb_data) && (num_assoc_del_scheduled == 0)) {
+ /*
+ * Since there are no association_delete calls
+ * successfully scheduled, the association_delete
+ * callback function will never be called.
+ * In this case, call the callback function now.
+ */
+ nvmf_fc_adm_i_t_delete_assoc_cb(cb_data, 0);
+ }
+
+ snprintf(log_str, sizeof(log_str),
+ "IT delete associations on nport:%d end. "
+ "s_id:%d rpi:%d assoc_count:%d assoc:%d assoc_del_scheduled:%d rc:%d.\n",
+ nport->nport_hdl, s_id, rpi, assoc_count, num_assoc, num_assoc_del_scheduled, err);
+
+ if (err == 0) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "%s", log_str);
+ } else {
+ SPDK_ERRLOG("%s", log_str);
+ }
+}
+
+static void
+nvmf_fc_adm_queue_quiesce_cb(void *cb_data, enum spdk_nvmf_fc_poller_api_ret ret)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct spdk_nvmf_fc_poller_api_quiesce_queue_args *quiesce_api_data = NULL;
+ struct spdk_nvmf_fc_adm_hw_port_quiesce_ctx *port_quiesce_ctx = NULL;
+ struct spdk_nvmf_fc_hwqp *hwqp = NULL;
+ struct spdk_nvmf_fc_port *fc_port = NULL;
+ int err = 0;
+
+ quiesce_api_data = (struct spdk_nvmf_fc_poller_api_quiesce_queue_args *)cb_data;
+ hwqp = quiesce_api_data->hwqp;
+ fc_port = hwqp->fc_port;
+ port_quiesce_ctx = (struct spdk_nvmf_fc_adm_hw_port_quiesce_ctx *)quiesce_api_data->ctx;
+ spdk_nvmf_fc_adm_hw_port_quiesce_cb_fn cb_func = port_quiesce_ctx->cb_func;
+
+ /*
+ * Decrement the callback/quiesced queue count.
+ */
+ port_quiesce_ctx->quiesce_count--;
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "Queue%d Quiesced\n", quiesce_api_data->hwqp->hwqp_id);
+
+ free(quiesce_api_data);
+ /*
+ * Wait for call backs i.e. max_ioq_queues + LS QUEUE.
+ */
+ if (port_quiesce_ctx->quiesce_count > 0) {
+ return;
+ }
+
+ if (fc_port->hw_port_status == SPDK_FC_PORT_QUIESCED) {
+ SPDK_ERRLOG("Port %d already in quiesced state.\n", fc_port->port_hdl);
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "HW port %d quiesced.\n", fc_port->port_hdl);
+ fc_port->hw_port_status = SPDK_FC_PORT_QUIESCED;
+ }
+
+ if (cb_func) {
+ /*
+ * Callback function for the called of quiesce.
+ */
+ cb_func(port_quiesce_ctx->ctx, err);
+ }
+
+ /*
+ * Free the context structure.
+ */
+ free(port_quiesce_ctx);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "HW port %d quiesce done, rc = %d.\n", fc_port->port_hdl,
+ err);
+}
+
+static int
+nvmf_fc_adm_hw_queue_quiesce(struct spdk_nvmf_fc_hwqp *fc_hwqp, void *ctx,
+ spdk_nvmf_fc_poller_api_cb cb_func)
+{
+ struct spdk_nvmf_fc_poller_api_quiesce_queue_args *args;
+ enum spdk_nvmf_fc_poller_api_ret rc = SPDK_NVMF_FC_POLLER_API_SUCCESS;
+ int err = 0;
+
+ args = calloc(1, sizeof(struct spdk_nvmf_fc_poller_api_quiesce_queue_args));
+
+ if (args == NULL) {
+ err = -ENOMEM;
+ SPDK_ERRLOG("Failed to allocate memory for poller quiesce args, hwqp:%d\n", fc_hwqp->hwqp_id);
+ goto done;
+ }
+ args->hwqp = fc_hwqp;
+ args->ctx = ctx;
+ args->cb_info.cb_func = cb_func;
+ args->cb_info.cb_data = args;
+ args->cb_info.cb_thread = spdk_get_thread();
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "Quiesce queue %d\n", fc_hwqp->hwqp_id);
+ rc = nvmf_fc_poller_api_func(fc_hwqp, SPDK_NVMF_FC_POLLER_API_QUIESCE_QUEUE, args);
+ if (rc) {
+ free(args);
+ err = -EINVAL;
+ }
+
+done:
+ return err;
+}
+
+/*
+ * Hw port Quiesce
+ */
+static int
+nvmf_fc_adm_hw_port_quiesce(struct spdk_nvmf_fc_port *fc_port, void *ctx,
+ spdk_nvmf_fc_adm_hw_port_quiesce_cb_fn cb_func)
+{
+ struct spdk_nvmf_fc_adm_hw_port_quiesce_ctx *port_quiesce_ctx = NULL;
+ uint32_t i = 0;
+ int err = 0;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "HW port:%d is being quiesced.\n", fc_port->port_hdl);
+
+ /*
+ * If the port is in an OFFLINE state, set the state to QUIESCED
+ * and execute the callback.
+ */
+ if (fc_port->hw_port_status == SPDK_FC_PORT_OFFLINE) {
+ fc_port->hw_port_status = SPDK_FC_PORT_QUIESCED;
+ }
+
+ if (fc_port->hw_port_status == SPDK_FC_PORT_QUIESCED) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "Port %d already in quiesced state.\n",
+ fc_port->port_hdl);
+ /*
+ * Execute the callback function directly.
+ */
+ cb_func(ctx, err);
+ goto out;
+ }
+
+ port_quiesce_ctx = calloc(1, sizeof(struct spdk_nvmf_fc_adm_hw_port_quiesce_ctx));
+
+ if (port_quiesce_ctx == NULL) {
+ err = -ENOMEM;
+ SPDK_ERRLOG("Failed to allocate memory for LS queue quiesce ctx, port:%d\n",
+ fc_port->port_hdl);
+ goto out;
+ }
+
+ port_quiesce_ctx->quiesce_count = 0;
+ port_quiesce_ctx->ctx = ctx;
+ port_quiesce_ctx->cb_func = cb_func;
+
+ /*
+ * Quiesce the LS queue.
+ */
+ err = nvmf_fc_adm_hw_queue_quiesce(&fc_port->ls_queue, port_quiesce_ctx,
+ nvmf_fc_adm_queue_quiesce_cb);
+ if (err != 0) {
+ SPDK_ERRLOG("Failed to quiesce the LS queue.\n");
+ goto out;
+ }
+ port_quiesce_ctx->quiesce_count++;
+
+ /*
+ * Quiesce the IO queues.
+ */
+ for (i = 0; i < fc_port->num_io_queues; i++) {
+ err = nvmf_fc_adm_hw_queue_quiesce(&fc_port->io_queues[i],
+ port_quiesce_ctx,
+ nvmf_fc_adm_queue_quiesce_cb);
+ if (err != 0) {
+ DEV_VERIFY(0);
+ SPDK_ERRLOG("Failed to quiesce the IO queue:%d.\n", fc_port->io_queues[i].hwqp_id);
+ }
+ port_quiesce_ctx->quiesce_count++;
+ }
+
+out:
+ if (port_quiesce_ctx && err != 0) {
+ free(port_quiesce_ctx);
+ }
+ return err;
+}
+
+/*
+ * Initialize and add a HW port entry to the global
+ * HW port list.
+ */
+static void
+nvmf_fc_adm_evnt_hw_port_init(void *arg)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct spdk_nvmf_fc_port *fc_port = NULL;
+ struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg;
+ struct spdk_nvmf_fc_hw_port_init_args *args = (struct spdk_nvmf_fc_hw_port_init_args *)
+ api_data->api_args;
+ int err = 0;
+
+ if (args->io_queue_cnt > spdk_env_get_core_count()) {
+ SPDK_ERRLOG("IO queues count greater than cores for %d.\n", args->port_handle);
+ err = EINVAL;
+ goto abort_port_init;
+ }
+
+ /*
+ * 1. Check for duplicate initialization.
+ */
+ fc_port = nvmf_fc_port_lookup(args->port_handle);
+ if (fc_port != NULL) {
+ /* Port already exists, check if it has to be re-initialized */
+ err = nvmf_fc_adm_hw_port_reinit_validate(fc_port, args);
+ if (err) {
+ /*
+ * In case of an error we do not want to free the fc_port
+ * so we set that pointer to NULL.
+ */
+ fc_port = NULL;
+ }
+ goto abort_port_init;
+ }
+
+ /*
+ * 2. Get the memory to instantiate a fc port.
+ */
+ fc_port = calloc(1, sizeof(struct spdk_nvmf_fc_port) +
+ (args->io_queue_cnt * sizeof(struct spdk_nvmf_fc_hwqp)));
+ if (fc_port == NULL) {
+ SPDK_ERRLOG("Failed to allocate memory for fc_port %d.\n", args->port_handle);
+ err = -ENOMEM;
+ goto abort_port_init;
+ }
+
+ /* assign the io_queues array */
+ fc_port->io_queues = (struct spdk_nvmf_fc_hwqp *)((uint8_t *)fc_port + sizeof(
+ struct spdk_nvmf_fc_port));
+
+ /*
+ * 3. Initialize the contents for the FC-port
+ */
+ err = nvmf_fc_adm_hw_port_data_init(fc_port, args);
+
+ if (err != 0) {
+ SPDK_ERRLOG("Data initialization failed for fc_port %d.\n", args->port_handle);
+ DEV_VERIFY(!"Data initialization failed for fc_port");
+ goto abort_port_init;
+ }
+
+ /*
+ * 4. Add this port to the global fc port list in the library.
+ */
+ nvmf_fc_port_add(fc_port);
+
+abort_port_init:
+ if (err && fc_port) {
+ free(fc_port);
+ }
+ if (api_data->cb_func != NULL) {
+ (void)api_data->cb_func(args->port_handle, SPDK_FC_HW_PORT_INIT, args->cb_ctx, err);
+ }
+
+ free(arg);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "HW port %d initialize done, rc = %d.\n",
+ args->port_handle, err);
+}
+
+/*
+ * Online a HW port.
+ */
+static void
+nvmf_fc_adm_evnt_hw_port_online(void *arg)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct spdk_nvmf_fc_port *fc_port = NULL;
+ struct spdk_nvmf_fc_hwqp *hwqp = NULL;
+ struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg;
+ struct spdk_nvmf_fc_hw_port_online_args *args = (struct spdk_nvmf_fc_hw_port_online_args *)
+ api_data->api_args;
+ int i = 0;
+ int err = 0;
+
+ fc_port = nvmf_fc_port_lookup(args->port_handle);
+ if (fc_port) {
+ /* Set the port state to online */
+ err = nvmf_fc_port_set_online(fc_port);
+ if (err != 0) {
+ SPDK_ERRLOG("Hw port %d online failed. err = %d\n", fc_port->port_hdl, err);
+ DEV_VERIFY(!"Hw port online failed");
+ goto out;
+ }
+
+ hwqp = &fc_port->ls_queue;
+ hwqp->context = NULL;
+ (void)nvmf_fc_hwqp_set_online(hwqp);
+
+ /* Cycle through all the io queues and setup a hwqp poller for each. */
+ for (i = 0; i < (int)fc_port->num_io_queues; i++) {
+ hwqp = &fc_port->io_queues[i];
+ hwqp->context = NULL;
+ (void)nvmf_fc_hwqp_set_online(hwqp);
+ nvmf_fc_poll_group_add_hwqp(hwqp);
+ }
+ } else {
+ SPDK_ERRLOG("Unable to find the SPDK FC port %d\n", args->port_handle);
+ err = -EINVAL;
+ }
+
+out:
+ if (api_data->cb_func != NULL) {
+ (void)api_data->cb_func(args->port_handle, SPDK_FC_HW_PORT_ONLINE, args->cb_ctx, err);
+ }
+
+ free(arg);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "HW port %d online done, rc = %d.\n", args->port_handle,
+ err);
+}
+
+/*
+ * Offline a HW port.
+ */
+static void
+nvmf_fc_adm_evnt_hw_port_offline(void *arg)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct spdk_nvmf_fc_port *fc_port = NULL;
+ struct spdk_nvmf_fc_hwqp *hwqp = NULL;
+ struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg;
+ struct spdk_nvmf_fc_hw_port_offline_args *args = (struct spdk_nvmf_fc_hw_port_offline_args *)
+ api_data->api_args;
+ int i = 0;
+ int err = 0;
+
+ fc_port = nvmf_fc_port_lookup(args->port_handle);
+ if (fc_port) {
+ /* Set the port state to offline, if it is not already. */
+ err = nvmf_fc_port_set_offline(fc_port);
+ if (err != 0) {
+ SPDK_ERRLOG("Hw port %d already offline. err = %d\n", fc_port->port_hdl, err);
+ err = 0;
+ goto out;
+ }
+
+ hwqp = &fc_port->ls_queue;
+ (void)nvmf_fc_hwqp_set_offline(hwqp);
+
+ /* Remove poller for all the io queues. */
+ for (i = 0; i < (int)fc_port->num_io_queues; i++) {
+ hwqp = &fc_port->io_queues[i];
+ (void)nvmf_fc_hwqp_set_offline(hwqp);
+ nvmf_fc_poll_group_remove_hwqp(hwqp);
+ }
+
+ /*
+ * Delete all the nports. Ideally, the nports should have been purged
+ * before the offline event, in which case, only a validation is required.
+ */
+ nvmf_fc_adm_hw_port_offline_nport_delete(fc_port);
+ } else {
+ SPDK_ERRLOG("Unable to find the SPDK FC port %d\n", args->port_handle);
+ err = -EINVAL;
+ }
+out:
+ if (api_data->cb_func != NULL) {
+ (void)api_data->cb_func(args->port_handle, SPDK_FC_HW_PORT_OFFLINE, args->cb_ctx, err);
+ }
+
+ free(arg);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "HW port %d offline done, rc = %d.\n", args->port_handle,
+ err);
+}
+
+struct nvmf_fc_add_rem_listener_ctx {
+ struct spdk_nvmf_subsystem *subsystem;
+ bool add_listener;
+ struct spdk_nvme_transport_id trid;
+};
+
+static void
+nvmf_fc_adm_subsystem_resume_cb(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct nvmf_fc_add_rem_listener_ctx *ctx = (struct nvmf_fc_add_rem_listener_ctx *)cb_arg;
+ free(ctx);
+}
+
+static void
+nvmf_fc_adm_listen_done(void *cb_arg, int status)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct nvmf_fc_add_rem_listener_ctx *ctx = cb_arg;
+
+ if (spdk_nvmf_subsystem_resume(ctx->subsystem, nvmf_fc_adm_subsystem_resume_cb, ctx)) {
+ SPDK_ERRLOG("Failed to resume subsystem: %s\n", ctx->subsystem->subnqn);
+ free(ctx);
+ }
+}
+
+static void
+nvmf_fc_adm_subsystem_paused_cb(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct nvmf_fc_add_rem_listener_ctx *ctx = (struct nvmf_fc_add_rem_listener_ctx *)cb_arg;
+
+ if (ctx->add_listener) {
+ spdk_nvmf_subsystem_add_listener(subsystem, &ctx->trid, nvmf_fc_adm_listen_done, ctx);
+ } else {
+ spdk_nvmf_subsystem_remove_listener(subsystem, &ctx->trid);
+ nvmf_fc_adm_listen_done(ctx, 0);
+ }
+}
+
+static int
+nvmf_fc_adm_add_rem_nport_listener(struct spdk_nvmf_fc_nport *nport, bool add)
+{
+ struct spdk_nvmf_tgt *tgt = nvmf_fc_get_tgt();
+ struct spdk_nvmf_subsystem *subsystem;
+
+ if (!tgt) {
+ SPDK_ERRLOG("No nvmf target defined\n");
+ return -EINVAL;
+ }
+
+ subsystem = spdk_nvmf_subsystem_get_first(tgt);
+ while (subsystem) {
+ struct nvmf_fc_add_rem_listener_ctx *ctx;
+
+ if (spdk_nvmf_subsytem_any_listener_allowed(subsystem) == true) {
+ ctx = calloc(1, sizeof(struct nvmf_fc_add_rem_listener_ctx));
+ if (ctx) {
+ ctx->add_listener = add;
+ ctx->subsystem = subsystem;
+ nvmf_fc_create_trid(&ctx->trid,
+ nport->fc_nodename.u.wwn,
+ nport->fc_portname.u.wwn);
+
+ if (spdk_nvmf_tgt_listen(subsystem->tgt, &ctx->trid)) {
+ SPDK_ERRLOG("Failed to add transport address %s to tgt listeners\n",
+ ctx->trid.traddr);
+ free(ctx);
+ } else if (spdk_nvmf_subsystem_pause(subsystem,
+ nvmf_fc_adm_subsystem_paused_cb,
+ ctx)) {
+ SPDK_ERRLOG("Failed to pause subsystem: %s\n",
+ subsystem->subnqn);
+ free(ctx);
+ }
+ }
+ }
+
+ subsystem = spdk_nvmf_subsystem_get_next(subsystem);
+ }
+
+ return 0;
+}
+
+/*
+ * Create a Nport.
+ */
+static void
+nvmf_fc_adm_evnt_nport_create(void *arg)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg;
+ struct spdk_nvmf_fc_nport_create_args *args = (struct spdk_nvmf_fc_nport_create_args *)
+ api_data->api_args;
+ struct spdk_nvmf_fc_nport *nport = NULL;
+ struct spdk_nvmf_fc_port *fc_port = NULL;
+ int err = 0;
+
+ /*
+ * Get the physical port.
+ */
+ fc_port = nvmf_fc_port_lookup(args->port_handle);
+ if (fc_port == NULL) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Check for duplicate initialization.
+ */
+ nport = nvmf_fc_nport_find(args->port_handle, args->nport_handle);
+ if (nport != NULL) {
+ SPDK_ERRLOG("Duplicate SPDK FC nport %d exists for FC port:%d.\n", args->nport_handle,
+ args->port_handle);
+ err = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Get the memory to instantiate a fc nport.
+ */
+ nport = calloc(1, sizeof(struct spdk_nvmf_fc_nport));
+ if (nport == NULL) {
+ SPDK_ERRLOG("Failed to allocate memory for nport %d.\n",
+ args->nport_handle);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Initialize the contents for the nport
+ */
+ nport->nport_hdl = args->nport_handle;
+ nport->port_hdl = args->port_handle;
+ nport->nport_state = SPDK_NVMF_FC_OBJECT_CREATED;
+ nport->fc_nodename = args->fc_nodename;
+ nport->fc_portname = args->fc_portname;
+ nport->d_id = args->d_id;
+ nport->fc_port = nvmf_fc_port_lookup(args->port_handle);
+
+ (void)nvmf_fc_nport_set_state(nport, SPDK_NVMF_FC_OBJECT_CREATED);
+ TAILQ_INIT(&nport->rem_port_list);
+ nport->rport_count = 0;
+ TAILQ_INIT(&nport->fc_associations);
+ nport->assoc_count = 0;
+
+ /*
+ * Populate the nport address (as listening address) to the nvmf subsystems.
+ */
+ err = nvmf_fc_adm_add_rem_nport_listener(nport, true);
+
+ (void)nvmf_fc_port_add_nport(fc_port, nport);
+out:
+ if (err && nport) {
+ free(nport);
+ }
+
+ if (api_data->cb_func != NULL) {
+ (void)api_data->cb_func(args->port_handle, SPDK_FC_NPORT_CREATE, args->cb_ctx, err);
+ }
+
+ free(arg);
+}
+
+static void
+nvmf_fc_adm_delete_nport_cb(uint8_t port_handle, enum spdk_fc_event event_type,
+ void *cb_args, int spdk_err)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct spdk_nvmf_fc_adm_nport_del_cb_data *cb_data = cb_args;
+ struct spdk_nvmf_fc_nport *nport = cb_data->nport;
+ spdk_nvmf_fc_callback cb_func = cb_data->fc_cb_func;
+ int err = 0;
+ uint16_t nport_hdl = 0;
+ char log_str[256];
+
+ /*
+ * Assert on any delete failure.
+ */
+ if (nport == NULL) {
+ SPDK_ERRLOG("Nport delete callback returned null nport");
+ DEV_VERIFY(!"nport is null.");
+ goto out;
+ }
+
+ nport_hdl = nport->nport_hdl;
+ if (0 != spdk_err) {
+ SPDK_ERRLOG("Nport delete callback returned error. FC Port: "
+ "%d, Nport: %d\n",
+ nport->port_hdl, nport->nport_hdl);
+ DEV_VERIFY(!"nport delete callback error.");
+ }
+
+ /*
+ * Free the nport if this is the last rport being deleted and
+ * execute the callback(s).
+ */
+ if (nvmf_fc_nport_has_no_rport(nport)) {
+ if (0 != nport->assoc_count) {
+ SPDK_ERRLOG("association count != 0\n");
+ DEV_VERIFY(!"association count != 0");
+ }
+
+ err = nvmf_fc_port_remove_nport(nport->fc_port, nport);
+ if (0 != err) {
+ SPDK_ERRLOG("Nport delete callback: Failed to remove "
+ "nport from nport list. FC Port:%d Nport:%d\n",
+ nport->port_hdl, nport->nport_hdl);
+ }
+ /* Free the nport */
+ free(nport);
+
+ if (cb_func != NULL) {
+ (void)cb_func(cb_data->port_handle, SPDK_FC_NPORT_DELETE, cb_data->fc_cb_ctx, spdk_err);
+ }
+ free(cb_data);
+ }
+out:
+ snprintf(log_str, sizeof(log_str),
+ "port:%d nport:%d delete cb exit, evt_type:%d rc:%d.\n",
+ port_handle, nport_hdl, event_type, spdk_err);
+
+ if (err != 0) {
+ SPDK_ERRLOG("%s", log_str);
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "%s", log_str);
+ }
+}
+
+/*
+ * Delete Nport.
+ */
+static void
+nvmf_fc_adm_evnt_nport_delete(void *arg)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg;
+ struct spdk_nvmf_fc_nport_delete_args *args = (struct spdk_nvmf_fc_nport_delete_args *)
+ api_data->api_args;
+ struct spdk_nvmf_fc_nport *nport = NULL;
+ struct spdk_nvmf_fc_adm_nport_del_cb_data *cb_data = NULL;
+ struct spdk_nvmf_fc_remote_port_info *rport_iter = NULL;
+ int err = 0;
+ uint32_t rport_cnt = 0;
+ int rc = 0;
+
+ /*
+ * Make sure that the nport exists.
+ */
+ nport = nvmf_fc_nport_find(args->port_handle, args->nport_handle);
+ if (nport == NULL) {
+ SPDK_ERRLOG("Unable to find the SPDK FC nport %d for FC Port: %d.\n", args->nport_handle,
+ args->port_handle);
+ err = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Allocate memory for callback data.
+ */
+ cb_data = calloc(1, sizeof(struct spdk_nvmf_fc_adm_nport_del_cb_data));
+ if (NULL == cb_data) {
+ SPDK_ERRLOG("Failed to allocate memory for cb_data %d.\n", args->nport_handle);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ cb_data->nport = nport;
+ cb_data->port_handle = args->port_handle;
+ cb_data->fc_cb_func = api_data->cb_func;
+ cb_data->fc_cb_ctx = args->cb_ctx;
+
+ /*
+ * Begin nport tear down
+ */
+ if (nport->nport_state == SPDK_NVMF_FC_OBJECT_CREATED) {
+ (void)nvmf_fc_nport_set_state(nport, SPDK_NVMF_FC_OBJECT_TO_BE_DELETED);
+ } else if (nport->nport_state == SPDK_NVMF_FC_OBJECT_TO_BE_DELETED) {
+ /*
+ * Deletion of this nport already in progress. Register callback
+ * and return.
+ */
+ /* TODO: Register callback in callback vector. For now, set the error and return. */
+ err = -ENODEV;
+ goto out;
+ } else {
+ /* nport partially created/deleted */
+ DEV_VERIFY(nport->nport_state == SPDK_NVMF_FC_OBJECT_ZOMBIE);
+ DEV_VERIFY(0 != "Nport in zombie state");
+ err = -ENODEV;
+ goto out;
+ }
+
+ /*
+ * Remove this nport from listening addresses across subsystems
+ */
+ rc = nvmf_fc_adm_add_rem_nport_listener(nport, false);
+
+ if (0 != rc) {
+ err = nvmf_fc_nport_set_state(nport, SPDK_NVMF_FC_OBJECT_ZOMBIE);
+ SPDK_ERRLOG("Unable to remove the listen addr in the subsystems for nport %d.\n",
+ nport->nport_hdl);
+ goto out;
+ }
+
+ /*
+ * Delete all the remote ports (if any) for the nport
+ */
+ /* TODO - Need to do this with a "first" and a "next" accessor function
+ * for completeness. Look at app-subsystem as examples.
+ */
+ if (nvmf_fc_nport_has_no_rport(nport)) {
+ /* No rports to delete. Complete the nport deletion. */
+ nvmf_fc_adm_delete_nport_cb(nport->port_hdl, SPDK_FC_NPORT_DELETE, cb_data, 0);
+ goto out;
+ }
+
+ TAILQ_FOREACH(rport_iter, &nport->rem_port_list, link) {
+ struct spdk_nvmf_fc_hw_i_t_delete_args *it_del_args = calloc(
+ 1, sizeof(struct spdk_nvmf_fc_hw_i_t_delete_args));
+
+ if (it_del_args == NULL) {
+ err = -ENOMEM;
+ SPDK_ERRLOG("SPDK_FC_IT_DELETE no mem to delete rport with rpi:%d s_id:%d.\n",
+ rport_iter->rpi, rport_iter->s_id);
+ DEV_VERIFY(!"SPDK_FC_IT_DELETE failed, cannot allocate memory");
+ goto out;
+ }
+
+ rport_cnt++;
+ it_del_args->port_handle = nport->port_hdl;
+ it_del_args->nport_handle = nport->nport_hdl;
+ it_del_args->cb_ctx = (void *)cb_data;
+ it_del_args->rpi = rport_iter->rpi;
+ it_del_args->s_id = rport_iter->s_id;
+
+ nvmf_fc_master_enqueue_event(SPDK_FC_IT_DELETE, (void *)it_del_args,
+ nvmf_fc_adm_delete_nport_cb);
+ }
+
+out:
+ /* On failure, execute the callback function now */
+ if ((err != 0) || (rc != 0)) {
+ SPDK_ERRLOG("NPort %d delete failed, error:%d, fc port:%d, "
+ "rport_cnt:%d rc:%d.\n",
+ args->nport_handle, err, args->port_handle,
+ rport_cnt, rc);
+ if (cb_data) {
+ free(cb_data);
+ }
+ if (api_data->cb_func != NULL) {
+ (void)api_data->cb_func(args->port_handle, SPDK_FC_NPORT_DELETE, args->cb_ctx, err);
+ }
+
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API,
+ "NPort %d delete done succesfully, fc port:%d. "
+ "rport_cnt:%d\n",
+ args->nport_handle, args->port_handle, rport_cnt);
+ }
+
+ free(arg);
+}
+
+/*
+ * Process an PRLI/IT add.
+ */
+static void
+nvmf_fc_adm_evnt_i_t_add(void *arg)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg;
+ struct spdk_nvmf_fc_hw_i_t_add_args *args = (struct spdk_nvmf_fc_hw_i_t_add_args *)
+ api_data->api_args;
+ struct spdk_nvmf_fc_nport *nport = NULL;
+ struct spdk_nvmf_fc_remote_port_info *rport_iter = NULL;
+ struct spdk_nvmf_fc_remote_port_info *rport = NULL;
+ int err = 0;
+
+ /*
+ * Make sure the nport port exists.
+ */
+ nport = nvmf_fc_nport_find(args->port_handle, args->nport_handle);
+ if (nport == NULL) {
+ SPDK_ERRLOG("Unable to find the SPDK FC nport %d\n", args->nport_handle);
+ err = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Check for duplicate i_t_add.
+ */
+ TAILQ_FOREACH(rport_iter, &nport->rem_port_list, link) {
+ if ((rport_iter->s_id == args->s_id) && (rport_iter->rpi == args->rpi)) {
+ SPDK_ERRLOG("Duplicate rport found for FC nport %d: sid:%d rpi:%d\n",
+ args->nport_handle, rport_iter->s_id, rport_iter->rpi);
+ err = -EEXIST;
+ goto out;
+ }
+ }
+
+ /*
+ * Get the memory to instantiate the remote port
+ */
+ rport = calloc(1, sizeof(struct spdk_nvmf_fc_remote_port_info));
+ if (rport == NULL) {
+ SPDK_ERRLOG("Memory allocation for rem port failed.\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Initialize the contents for the rport
+ */
+ (void)nvmf_fc_rport_set_state(rport, SPDK_NVMF_FC_OBJECT_CREATED);
+ rport->s_id = args->s_id;
+ rport->rpi = args->rpi;
+ rport->fc_nodename = args->fc_nodename;
+ rport->fc_portname = args->fc_portname;
+
+ /*
+ * Add remote port to nport
+ */
+ if (nvmf_fc_nport_add_rem_port(nport, rport) != 0) {
+ DEV_VERIFY(!"Error while adding rport to list");
+ };
+
+ /*
+ * TODO: Do we validate the initiators service parameters?
+ */
+
+ /*
+ * Get the targets service parameters from the library
+ * to return back to the driver.
+ */
+ args->target_prli_info = nvmf_fc_get_prli_service_params();
+
+out:
+ if (api_data->cb_func != NULL) {
+ /*
+ * Passing pointer to the args struct as the first argument.
+ * The cb_func should handle this appropriately.
+ */
+ (void)api_data->cb_func(args->port_handle, SPDK_FC_IT_ADD, args->cb_ctx, err);
+ }
+
+ free(arg);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API,
+ "IT add on nport %d done, rc = %d.\n",
+ args->nport_handle, err);
+}
+
+/**
+ * Process a IT delete.
+ */
+static void
+nvmf_fc_adm_evnt_i_t_delete(void *arg)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg;
+ struct spdk_nvmf_fc_hw_i_t_delete_args *args = (struct spdk_nvmf_fc_hw_i_t_delete_args *)
+ api_data->api_args;
+ int rc = 0;
+ struct spdk_nvmf_fc_nport *nport = NULL;
+ struct spdk_nvmf_fc_adm_i_t_del_cb_data *cb_data = NULL;
+ struct spdk_nvmf_fc_remote_port_info *rport_iter = NULL;
+ struct spdk_nvmf_fc_remote_port_info *rport = NULL;
+ uint32_t num_rport = 0;
+ char log_str[256];
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "IT delete on nport:%d begin.\n", args->nport_handle);
+
+ /*
+ * Make sure the nport port exists. If it does not, error out.
+ */
+ nport = nvmf_fc_nport_find(args->port_handle, args->nport_handle);
+ if (nport == NULL) {
+ SPDK_ERRLOG("Unable to find the SPDK FC nport:%d\n", args->nport_handle);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Find this ITN / rport (remote port).
+ */
+ TAILQ_FOREACH(rport_iter, &nport->rem_port_list, link) {
+ num_rport++;
+ if ((rport_iter->s_id == args->s_id) &&
+ (rport_iter->rpi == args->rpi) &&
+ (rport_iter->rport_state == SPDK_NVMF_FC_OBJECT_CREATED)) {
+ rport = rport_iter;
+ break;
+ }
+ }
+
+ /*
+ * We should find either zero or exactly one rport.
+ *
+ * If we find zero rports, that means that a previous request has
+ * removed the rport by the time we reached here. In this case,
+ * simply return out.
+ */
+ if (rport == NULL) {
+ rc = -ENODEV;
+ goto out;
+ }
+
+ /*
+ * We have found exactly one rport. Allocate memory for callback data.
+ */
+ cb_data = calloc(1, sizeof(struct spdk_nvmf_fc_adm_i_t_del_cb_data));
+ if (NULL == cb_data) {
+ SPDK_ERRLOG("Failed to allocate memory for cb_data for nport:%d.\n", args->nport_handle);
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ cb_data->nport = nport;
+ cb_data->rport = rport;
+ cb_data->port_handle = args->port_handle;
+ cb_data->fc_cb_func = api_data->cb_func;
+ cb_data->fc_cb_ctx = args->cb_ctx;
+
+ /*
+ * Validate rport object state.
+ */
+ if (rport->rport_state == SPDK_NVMF_FC_OBJECT_CREATED) {
+ (void)nvmf_fc_rport_set_state(rport, SPDK_NVMF_FC_OBJECT_TO_BE_DELETED);
+ } else if (rport->rport_state == SPDK_NVMF_FC_OBJECT_TO_BE_DELETED) {
+ /*
+ * Deletion of this rport already in progress. Register callback
+ * and return.
+ */
+ /* TODO: Register callback in callback vector. For now, set the error and return. */
+ rc = -ENODEV;
+ goto out;
+ } else {
+ /* rport partially created/deleted */
+ DEV_VERIFY(rport->rport_state == SPDK_NVMF_FC_OBJECT_ZOMBIE);
+ DEV_VERIFY(!"Invalid rport_state");
+ rc = -ENODEV;
+ goto out;
+ }
+
+ /*
+ * We have successfully found a rport to delete. Call
+ * nvmf_fc_i_t_delete_assoc(), which will perform further
+ * IT-delete processing as well as free the cb_data.
+ */
+ nvmf_fc_adm_i_t_delete_assoc(nport, rport, nvmf_fc_adm_i_t_delete_cb,
+ (void *)cb_data);
+
+out:
+ if (rc != 0) {
+ /*
+ * We have entered here because either we encountered an
+ * error, or we did not find a rport to delete.
+ * As a result, we will not call the function
+ * nvmf_fc_i_t_delete_assoc() for further IT-delete
+ * processing. Therefore, execute the callback function now.
+ */
+ if (cb_data) {
+ free(cb_data);
+ }
+ if (api_data->cb_func != NULL) {
+ (void)api_data->cb_func(args->port_handle, SPDK_FC_IT_DELETE, args->cb_ctx, rc);
+ }
+ }
+
+ snprintf(log_str, sizeof(log_str),
+ "IT delete on nport:%d end. num_rport:%d rc = %d.\n",
+ args->nport_handle, num_rport, rc);
+
+ if (rc != 0) {
+ SPDK_ERRLOG("%s", log_str);
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "%s", log_str);
+ }
+
+ free(arg);
+}
+
+/*
+ * Process ABTS received
+ */
+static void
+nvmf_fc_adm_evnt_abts_recv(void *arg)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg;
+ struct spdk_nvmf_fc_abts_args *args = (struct spdk_nvmf_fc_abts_args *)api_data->api_args;
+ struct spdk_nvmf_fc_nport *nport = NULL;
+ int err = 0;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "FC ABTS received. RPI:%d, oxid:%d, rxid:%d\n", args->rpi,
+ args->oxid, args->rxid);
+
+ /*
+ * 1. Make sure the nport port exists.
+ */
+ nport = nvmf_fc_nport_find(args->port_handle, args->nport_handle);
+ if (nport == NULL) {
+ SPDK_ERRLOG("Unable to find the SPDK FC nport %d\n", args->nport_handle);
+ err = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * 2. If the nport is in the process of being deleted, drop the ABTS.
+ */
+ if (nport->nport_state == SPDK_NVMF_FC_OBJECT_TO_BE_DELETED) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API,
+ "FC ABTS dropped because the nport is being deleted; RPI:%d, oxid:%d, rxid:%d\n",
+ args->rpi, args->oxid, args->rxid);
+ err = 0;
+ goto out;
+
+ }
+
+ /*
+ * 3. Pass the received ABTS-LS to the library for handling.
+ */
+ nvmf_fc_handle_abts_frame(nport, args->rpi, args->oxid, args->rxid);
+
+out:
+ if (api_data->cb_func != NULL) {
+ /*
+ * Passing pointer to the args struct as the first argument.
+ * The cb_func should handle this appropriately.
+ */
+ (void)api_data->cb_func(args->port_handle, SPDK_FC_ABTS_RECV, args, err);
+ } else {
+ /* No callback set, free the args */
+ free(args);
+ }
+
+ free(arg);
+}
+
+/*
+ * Callback function for hw port quiesce.
+ */
+static void
+nvmf_fc_adm_hw_port_quiesce_reset_cb(void *ctx, int err)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct spdk_nvmf_fc_adm_hw_port_reset_ctx *reset_ctx =
+ (struct spdk_nvmf_fc_adm_hw_port_reset_ctx *)ctx;
+ struct spdk_nvmf_fc_hw_port_reset_args *args = reset_ctx->reset_args;
+ spdk_nvmf_fc_callback cb_func = reset_ctx->reset_cb_func;
+ struct spdk_nvmf_fc_queue_dump_info dump_info;
+ struct spdk_nvmf_fc_port *fc_port = NULL;
+ char *dump_buf = NULL;
+ uint32_t dump_buf_size = SPDK_FC_HW_DUMP_BUF_SIZE;
+
+ /*
+ * Free the callback context struct.
+ */
+ free(ctx);
+
+ if (err != 0) {
+ SPDK_ERRLOG("Port %d quiesce operation failed.\n", args->port_handle);
+ goto out;
+ }
+
+ if (args->dump_queues == false) {
+ /*
+ * Queues need not be dumped.
+ */
+ goto out;
+ }
+
+ SPDK_ERRLOG("Dumping queues for HW port %d\n", args->port_handle);
+
+ /*
+ * Get the fc port.
+ */
+ fc_port = nvmf_fc_port_lookup(args->port_handle);
+ if (fc_port == NULL) {
+ SPDK_ERRLOG("Unable to find the SPDK FC port %d\n", args->port_handle);
+ err = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Allocate memory for the dump buffer.
+ * This memory will be freed by FCT.
+ */
+ dump_buf = (char *)calloc(1, dump_buf_size);
+ if (dump_buf == NULL) {
+ err = -ENOMEM;
+ SPDK_ERRLOG("Memory allocation for dump buffer failed, SPDK FC port %d\n", args->port_handle);
+ goto out;
+ }
+ *args->dump_buf = (uint32_t *)dump_buf;
+ dump_info.buffer = dump_buf;
+ dump_info.offset = 0;
+
+ /*
+ * Add the dump reason to the top of the buffer.
+ */
+ nvmf_fc_dump_buf_print(&dump_info, "%s\n", args->reason);
+
+ /*
+ * Dump the hwqp.
+ */
+ nvmf_fc_dump_all_queues(&fc_port->ls_queue, fc_port->io_queues,
+ fc_port->num_io_queues, &dump_info);
+
+out:
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "HW port %d reset done, queues_dumped = %d, rc = %d.\n",
+ args->port_handle, args->dump_queues, err);
+
+ if (cb_func != NULL) {
+ (void)cb_func(args->port_handle, SPDK_FC_HW_PORT_RESET, args->cb_ctx, err);
+ }
+}
+
+/*
+ * HW port reset
+
+ */
+static void
+nvmf_fc_adm_evnt_hw_port_reset(void *arg)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg;
+ struct spdk_nvmf_fc_hw_port_reset_args *args = (struct spdk_nvmf_fc_hw_port_reset_args *)
+ api_data->api_args;
+ struct spdk_nvmf_fc_port *fc_port = NULL;
+ struct spdk_nvmf_fc_adm_hw_port_reset_ctx *ctx = NULL;
+ int err = 0;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "HW port %d dump\n", args->port_handle);
+
+ /*
+ * Make sure the physical port exists.
+ */
+ fc_port = nvmf_fc_port_lookup(args->port_handle);
+ if (fc_port == NULL) {
+ SPDK_ERRLOG("Unable to find the SPDK FC port %d\n", args->port_handle);
+ err = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Save the reset event args and the callback in a context struct.
+ */
+ ctx = calloc(1, sizeof(struct spdk_nvmf_fc_adm_hw_port_reset_ctx));
+
+ if (ctx == NULL) {
+ err = -ENOMEM;
+ SPDK_ERRLOG("Memory allocation for reset ctx failed, SPDK FC port %d\n", args->port_handle);
+ goto fail;
+ }
+
+ ctx->reset_args = arg;
+ ctx->reset_cb_func = api_data->cb_func;
+
+ /*
+ * Quiesce the hw port.
+ */
+ err = nvmf_fc_adm_hw_port_quiesce(fc_port, ctx, nvmf_fc_adm_hw_port_quiesce_reset_cb);
+ if (err != 0) {
+ goto fail;
+ }
+
+ /*
+ * Once the ports are successfully quiesced the reset processing
+ * will continue in the callback function: spdk_fc_port_quiesce_reset_cb
+ */
+ return;
+fail:
+ free(ctx);
+
+out:
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "HW port %d dump done, rc = %d.\n", args->port_handle,
+ err);
+
+ if (api_data->cb_func != NULL) {
+ (void)api_data->cb_func(args->port_handle, SPDK_FC_HW_PORT_RESET, args->cb_ctx, err);
+ }
+
+ free(arg);
+}
+
+/*
+ * Process a link break event on a HW port.
+ */
+static void
+nvmf_fc_adm_evnt_hw_port_link_break(void *arg)
+{
+ ASSERT_SPDK_FC_MASTER_THREAD();
+ struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg;
+ struct spdk_nvmf_hw_port_link_break_args *args = (struct spdk_nvmf_hw_port_link_break_args *)
+ api_data->api_args;
+ struct spdk_nvmf_fc_port *fc_port = NULL;
+ int err = 0;
+ struct spdk_nvmf_fc_adm_port_link_break_cb_data *cb_data = NULL;
+ struct spdk_nvmf_fc_nport *nport = NULL;
+ uint32_t nport_deletes_sent = 0;
+ uint32_t nport_deletes_skipped = 0;
+ struct spdk_nvmf_fc_nport_delete_args *nport_del_args = NULL;
+ char log_str[256];
+
+ /*
+ * Get the fc port using the port handle.
+ */
+ fc_port = nvmf_fc_port_lookup(args->port_handle);
+ if (!fc_port) {
+ SPDK_ERRLOG("port link break: Unable to find the SPDK FC port %d\n",
+ args->port_handle);
+ err = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Set the port state to offline, if it is not already.
+ */
+ err = nvmf_fc_port_set_offline(fc_port);
+ if (err != 0) {
+ SPDK_ERRLOG("port link break: HW port %d already offline. rc = %d\n",
+ fc_port->port_hdl, err);
+ err = 0;
+ goto out;
+ }
+
+ /*
+ * Delete all the nports, if any.
+ */
+ if (!TAILQ_EMPTY(&fc_port->nport_list)) {
+ TAILQ_FOREACH(nport, &fc_port->nport_list, link) {
+ /* Skipped the nports that are not in CREATED state */
+ if (nport->nport_state != SPDK_NVMF_FC_OBJECT_CREATED) {
+ nport_deletes_skipped++;
+ continue;
+ }
+
+ /* Allocate memory for callback data. */
+ cb_data = calloc(1, sizeof(struct spdk_nvmf_fc_adm_port_link_break_cb_data));
+ if (NULL == cb_data) {
+ SPDK_ERRLOG("port link break: Failed to allocate memory for cb_data %d.\n",
+ args->port_handle);
+ err = -ENOMEM;
+ goto out;
+ }
+ cb_data->args = args;
+ cb_data->cb_func = api_data->cb_func;
+ nport_del_args = &cb_data->nport_del_args;
+ nport_del_args->port_handle = args->port_handle;
+ nport_del_args->nport_handle = nport->nport_hdl;
+ nport_del_args->cb_ctx = cb_data;
+
+ nvmf_fc_master_enqueue_event(SPDK_FC_NPORT_DELETE,
+ (void *)nport_del_args,
+ nvmf_fc_adm_hw_port_link_break_cb);
+
+ nport_deletes_sent++;
+ }
+ }
+
+ if (nport_deletes_sent == 0 && err == 0) {
+ /*
+ * Mark the hwqps as offline and unregister the pollers.
+ */
+ (void)nvmf_fc_adm_port_hwqp_offline_del_poller(fc_port);
+ }
+
+out:
+ snprintf(log_str, sizeof(log_str),
+ "port link break done: port:%d nport_deletes_sent:%d nport_deletes_skipped:%d rc:%d.\n",
+ args->port_handle, nport_deletes_sent, nport_deletes_skipped, err);
+
+ if (err != 0) {
+ SPDK_ERRLOG("%s", log_str);
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "%s", log_str);
+ }
+
+ if ((api_data->cb_func != NULL) && (nport_deletes_sent == 0)) {
+ /*
+ * No nport_deletes are sent, which would have eventually
+ * called the port_link_break callback. Therefore, call the
+ * port_link_break callback here.
+ */
+ (void)api_data->cb_func(args->port_handle, SPDK_FC_LINK_BREAK, args->cb_ctx, err);
+ }
+
+ free(arg);
+}
+
+static inline void
+nvmf_fc_adm_run_on_master_thread(spdk_msg_fn fn, void *args)
+{
+ if (nvmf_fc_get_master_thread()) {
+ spdk_thread_send_msg(nvmf_fc_get_master_thread(), fn, args);
+ }
+}
+
+/*
+ * Queue up an event in the SPDK masters event queue.
+ * Used by the FC driver to notify the SPDK master of FC related events.
+ */
+int
+nvmf_fc_master_enqueue_event(enum spdk_fc_event event_type, void *args,
+ spdk_nvmf_fc_callback cb_func)
+{
+ int err = 0;
+ struct spdk_nvmf_fc_adm_api_data *api_data = NULL;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "Enqueue event %d.\n", event_type);
+
+ if (event_type >= SPDK_FC_EVENT_MAX) {
+ SPDK_ERRLOG("Invalid spdk_fc_event_t %d.\n", event_type);
+ err = -EINVAL;
+ goto done;
+ }
+
+ if (args == NULL) {
+ SPDK_ERRLOG("Null args for event %d.\n", event_type);
+ err = -EINVAL;
+ goto done;
+ }
+
+ api_data = calloc(1, sizeof(*api_data));
+
+ if (api_data == NULL) {
+ SPDK_ERRLOG("Failed to alloc api data for event %d.\n", event_type);
+ err = -ENOMEM;
+ goto done;
+ }
+
+ api_data->api_args = args;
+ api_data->cb_func = cb_func;
+
+ switch (event_type) {
+ case SPDK_FC_HW_PORT_INIT:
+ nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_hw_port_init,
+ (void *)api_data);
+ break;
+
+ case SPDK_FC_HW_PORT_ONLINE:
+ nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_hw_port_online,
+ (void *)api_data);
+ break;
+
+ case SPDK_FC_HW_PORT_OFFLINE:
+ nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_hw_port_offline,
+ (void *)api_data);
+ break;
+
+ case SPDK_FC_NPORT_CREATE:
+ nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_nport_create,
+ (void *)api_data);
+ break;
+
+ case SPDK_FC_NPORT_DELETE:
+ nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_nport_delete,
+ (void *)api_data);
+ break;
+
+ case SPDK_FC_IT_ADD:
+ nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_i_t_add,
+ (void *)api_data);
+ break;
+
+ case SPDK_FC_IT_DELETE:
+ nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_i_t_delete,
+ (void *)api_data);
+ break;
+
+ case SPDK_FC_ABTS_RECV:
+ nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_abts_recv,
+ (void *)api_data);
+ break;
+
+ case SPDK_FC_LINK_BREAK:
+ nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_hw_port_link_break,
+ (void *)api_data);
+ break;
+
+ case SPDK_FC_HW_PORT_RESET:
+ nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_hw_port_reset,
+ (void *)api_data);
+ break;
+
+ case SPDK_FC_UNRECOVERABLE_ERR:
+ default:
+ SPDK_ERRLOG("Invalid spdk_fc_event_t: %d\n", event_type);
+ err = -EINVAL;
+ break;
+ }
+
+done:
+
+ if (err == 0) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "Enqueue event %d done successfully\n", event_type);
+ } else {
+ SPDK_ERRLOG("Enqueue event %d failed, err = %d\n", event_type, err);
+ if (api_data) {
+ free(api_data);
+ }
+ }
+
+ return err;
+}
+
+SPDK_NVMF_TRANSPORT_REGISTER(fc, &spdk_nvmf_transport_fc);
+SPDK_LOG_REGISTER_COMPONENT("nvmf_fc_adm_api", SPDK_LOG_NVMF_FC_ADM_API);
+SPDK_LOG_REGISTER_COMPONENT("nvmf_fc", SPDK_LOG_NVMF_FC)
diff --git a/src/spdk/lib/nvmf/fc_ls.c b/src/spdk/lib/nvmf/fc_ls.c
new file mode 100644
index 000000000..1aa06bd45
--- /dev/null
+++ b/src/spdk/lib/nvmf/fc_ls.c
@@ -0,0 +1,1678 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (c) 2018-2019 Broadcom. All Rights Reserved.
+ * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/env.h"
+#include "spdk/assert.h"
+#include "spdk/nvmf.h"
+#include "spdk/nvmf_spec.h"
+#include "spdk/string.h"
+#include "spdk/trace.h"
+#include "spdk/util.h"
+#include "spdk/endian.h"
+#include "spdk_internal/log.h"
+#include "nvmf_internal.h"
+#include "transport.h"
+
+#include "nvmf_fc.h"
+#include "fc_lld.h"
+
+/* set to 1 to send ls disconnect in response to ls disconnect from host (per standard) */
+#define NVMF_FC_LS_SEND_LS_DISCONNECT 0
+
+/* Validation Error indexes into the string table below */
+enum {
+ VERR_NO_ERROR = 0,
+ VERR_CR_ASSOC_LEN = 1,
+ VERR_CR_ASSOC_RQST_LEN = 2,
+ VERR_CR_ASSOC_CMD = 3,
+ VERR_CR_ASSOC_CMD_LEN = 4,
+ VERR_ERSP_RATIO = 5,
+ VERR_ASSOC_ALLOC_FAIL = 6,
+ VERR_CONN_ALLOC_FAIL = 7,
+ VERR_CR_CONN_LEN = 8,
+ VERR_CR_CONN_RQST_LEN = 9,
+ VERR_ASSOC_ID = 10,
+ VERR_ASSOC_ID_LEN = 11,
+ VERR_NO_ASSOC = 12,
+ VERR_CONN_ID = 13,
+ VERR_CONN_ID_LEN = 14,
+ VERR_NO_CONN = 15,
+ VERR_CR_CONN_CMD = 16,
+ VERR_CR_CONN_CMD_LEN = 17,
+ VERR_DISCONN_LEN = 18,
+ VERR_DISCONN_RQST_LEN = 19,
+ VERR_DISCONN_CMD = 20,
+ VERR_DISCONN_CMD_LEN = 21,
+ VERR_DISCONN_SCOPE = 22,
+ VERR_RS_LEN = 23,
+ VERR_RS_RQST_LEN = 24,
+ VERR_RS_CMD = 25,
+ VERR_RS_CMD_LEN = 26,
+ VERR_RS_RCTL = 27,
+ VERR_RS_RO = 28,
+ VERR_CONN_TOO_MANY = 29,
+ VERR_SUBNQN = 30,
+ VERR_HOSTNQN = 31,
+ VERR_SQSIZE = 32,
+ VERR_NO_RPORT = 33,
+ VERR_SUBLISTENER = 34,
+};
+
+static char *validation_errors[] = {
+ "OK",
+ "Bad CR_ASSOC Length",
+ "Bad CR_ASSOC Rqst Length",
+ "Not CR_ASSOC Cmd",
+ "Bad CR_ASSOC Cmd Length",
+ "Bad Ersp Ratio",
+ "Association Allocation Failed",
+ "Queue Allocation Failed",
+ "Bad CR_CONN Length",
+ "Bad CR_CONN Rqst Length",
+ "Not Association ID",
+ "Bad Association ID Length",
+ "No Association",
+ "Not Connection ID",
+ "Bad Connection ID Length",
+ "No Connection",
+ "Not CR_CONN Cmd",
+ "Bad CR_CONN Cmd Length",
+ "Bad DISCONN Length",
+ "Bad DISCONN Rqst Length",
+ "Not DISCONN Cmd",
+ "Bad DISCONN Cmd Length",
+ "Bad Disconnect Scope",
+ "Bad RS Length",
+ "Bad RS Rqst Length",
+ "Not RS Cmd",
+ "Bad RS Cmd Length",
+ "Bad RS R_CTL",
+ "Bad RS Relative Offset",
+ "Too many connections for association",
+ "Invalid subnqn or subsystem not found",
+ "Invalid hostnqn or subsystem doesn't allow host",
+ "SQ size = 0 or too big",
+ "No Remote Port",
+ "Bad Subsystem Port",
+};
+
+static inline void
+nvmf_fc_add_assoc_to_tgt_port(struct spdk_nvmf_fc_nport *tgtport,
+ struct spdk_nvmf_fc_association *assoc,
+ struct spdk_nvmf_fc_remote_port_info *rport);
+
+static inline FCNVME_BE32 cpu_to_be32(uint32_t in)
+{
+ uint32_t t;
+
+ to_be32(&t, in);
+ return (FCNVME_BE32)t;
+}
+
+static inline FCNVME_BE32 nvmf_fc_lsdesc_len(size_t sz)
+{
+ uint32_t t;
+
+ to_be32(&t, sz - (2 * sizeof(uint32_t)));
+ return (FCNVME_BE32)t;
+}
+
+static void
+nvmf_fc_ls_format_rsp_hdr(void *buf, uint8_t ls_cmd, uint32_t desc_len,
+ uint8_t rqst_ls_cmd)
+{
+ struct spdk_nvmf_fc_ls_acc_hdr *acc_hdr = buf;
+
+ acc_hdr->w0.ls_cmd = ls_cmd;
+ acc_hdr->desc_list_len = desc_len;
+ to_be32(&acc_hdr->rqst.desc_tag, FCNVME_LSDESC_RQST);
+ acc_hdr->rqst.desc_len =
+ nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_lsdesc_rqst));
+ acc_hdr->rqst.w0.ls_cmd = rqst_ls_cmd;
+}
+
+static int
+nvmf_fc_ls_format_rjt(void *buf, uint16_t buflen, uint8_t ls_cmd,
+ uint8_t reason, uint8_t explanation, uint8_t vendor)
+{
+ struct spdk_nvmf_fc_ls_rjt *rjt = buf;
+
+ bzero(buf, sizeof(struct spdk_nvmf_fc_ls_rjt));
+ nvmf_fc_ls_format_rsp_hdr(buf, FCNVME_LSDESC_RQST,
+ nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_ls_rjt)),
+ ls_cmd);
+ to_be32(&rjt->rjt.desc_tag, FCNVME_LSDESC_RJT);
+ rjt->rjt.desc_len = nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_lsdesc_rjt));
+ rjt->rjt.reason_code = reason;
+ rjt->rjt.reason_explanation = explanation;
+ rjt->rjt.vendor = vendor;
+
+ return sizeof(struct spdk_nvmf_fc_ls_rjt);
+}
+
+/* ************************************************** */
+/* Allocators/Deallocators (assocations, connections, */
+/* poller API data) */
+
+static inline void
+nvmf_fc_ls_free_association(struct spdk_nvmf_fc_association *assoc)
+{
+ struct spdk_nvmf_fc_conn *fc_conn;
+
+ /* return the q slots of the conns for the association */
+ TAILQ_FOREACH(fc_conn, &assoc->avail_fc_conns, assoc_avail_link) {
+ if (fc_conn->conn_id != NVMF_FC_INVALID_CONN_ID) {
+ nvmf_fc_release_conn(fc_conn->hwqp, fc_conn->conn_id,
+ fc_conn->max_queue_depth);
+ }
+ }
+
+ /* free assocation's send disconnect buffer */
+ if (assoc->snd_disconn_bufs) {
+ nvmf_fc_free_srsr_bufs(assoc->snd_disconn_bufs);
+ }
+
+ /* free assocation's connections */
+ free(assoc->conns_buf);
+
+ /* free the association */
+ free(assoc);
+}
+
+static int
+nvmf_fc_ls_alloc_connections(struct spdk_nvmf_fc_association *assoc,
+ struct spdk_nvmf_transport *nvmf_transport)
+{
+ uint32_t i;
+ struct spdk_nvmf_fc_conn *fc_conn;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Pre-alloc %d qpairs for host NQN %s\n",
+ nvmf_transport->opts.max_qpairs_per_ctrlr, assoc->host_nqn);
+
+ /* allocate memory for all connections at once */
+ assoc->conns_buf = calloc(nvmf_transport->opts.max_qpairs_per_ctrlr + 1,
+ sizeof(struct spdk_nvmf_fc_conn));
+ if (assoc->conns_buf == NULL) {
+ SPDK_ERRLOG("Out of memory for connections for new association\n");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < nvmf_transport->opts.max_qpairs_per_ctrlr; i++) {
+ fc_conn = assoc->conns_buf + (i * sizeof(struct spdk_nvmf_fc_conn));
+ fc_conn->conn_id = NVMF_FC_INVALID_CONN_ID;
+ fc_conn->qpair.state = SPDK_NVMF_QPAIR_UNINITIALIZED;
+ fc_conn->qpair.transport = nvmf_transport;
+
+ TAILQ_INSERT_TAIL(&assoc->avail_fc_conns, fc_conn, assoc_avail_link);
+ }
+
+ return 0;
+}
+
+static struct spdk_nvmf_fc_association *
+nvmf_fc_ls_new_association(uint32_t s_id,
+ struct spdk_nvmf_fc_nport *tgtport,
+ struct spdk_nvmf_fc_remote_port_info *rport,
+ struct spdk_nvmf_fc_lsdesc_cr_assoc_cmd *a_cmd,
+ struct spdk_nvmf_subsystem *subsys,
+ uint16_t rpi,
+ struct spdk_nvmf_transport *nvmf_transport)
+{
+ struct spdk_nvmf_fc_association *assoc;
+ int rc;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS,
+ "New Association request for port %d nport %d rpi 0x%x\n",
+ tgtport->fc_port->port_hdl, tgtport->nport_hdl, rpi);
+
+ assert(rport);
+ if (!rport) {
+ SPDK_ERRLOG("rport is null.\n");
+ return NULL;
+ }
+
+ assoc = calloc(1, sizeof(struct spdk_nvmf_fc_association));
+ if (!assoc) {
+ SPDK_ERRLOG("unable to allocate memory for new association\n");
+ return NULL;
+ }
+
+ /* initialize association */
+#if (NVMF_FC_LS_SEND_LS_DISCONNECT == 1)
+ /* allocate buffers to send LS disconnect command to host */
+ assoc->snd_disconn_bufs =
+ nvmf_fc_alloc_srsr_bufs(sizeof(struct spdk_nvmf_fc_ls_disconnect_rqst),
+ sizeof(struct spdk_nvmf_fc_ls_rjt));
+ if (!assoc->snd_disconn_bufs) {
+ SPDK_ERRLOG("no dma memory for association's ls disconnect bufs\n");
+ free(assoc);
+ return NULL;
+ }
+
+ assoc->snd_disconn_bufs->rpi = rpi;
+#endif
+ assoc->s_id = s_id;
+ assoc->tgtport = tgtport;
+ assoc->rport = rport;
+ assoc->subsystem = subsys;
+ assoc->assoc_state = SPDK_NVMF_FC_OBJECT_CREATED;
+ memcpy(assoc->host_id, a_cmd->hostid, FCNVME_ASSOC_HOSTID_LEN);
+ memcpy(assoc->host_nqn, a_cmd->hostnqn, SPDK_NVME_NQN_FIELD_SIZE);
+ memcpy(assoc->sub_nqn, a_cmd->subnqn, SPDK_NVME_NQN_FIELD_SIZE);
+ TAILQ_INIT(&assoc->fc_conns);
+ TAILQ_INIT(&assoc->avail_fc_conns);
+ assoc->ls_del_op_ctx = NULL;
+
+ /* allocate and assign connections for association */
+ rc = nvmf_fc_ls_alloc_connections(assoc, nvmf_transport);
+ if (rc != 0) {
+ nvmf_fc_ls_free_association(assoc);
+ return NULL;
+ }
+
+ /* add association to target port's association list */
+ nvmf_fc_add_assoc_to_tgt_port(tgtport, assoc, rport);
+ return assoc;
+}
+
+static inline void
+nvmf_fc_ls_append_del_cb_ctx(struct spdk_nvmf_fc_association *assoc,
+ struct nvmf_fc_ls_op_ctx *opd)
+{
+ /* append to delete assoc callback list */
+ if (!assoc->ls_del_op_ctx) {
+ assoc->ls_del_op_ctx = (void *)opd;
+ } else {
+ struct nvmf_fc_ls_op_ctx *nxt =
+ (struct nvmf_fc_ls_op_ctx *) assoc->ls_del_op_ctx;
+ while (nxt->next_op_ctx) {
+ nxt = nxt->next_op_ctx;
+ }
+ nxt->next_op_ctx = opd;
+ }
+}
+
+static struct spdk_nvmf_fc_conn *
+nvmf_fc_ls_new_connection(struct spdk_nvmf_fc_association *assoc, uint16_t qid,
+ uint16_t esrp_ratio, uint16_t rpi, uint16_t sq_size,
+ struct spdk_nvmf_fc_nport *tgtport)
+{
+ struct spdk_nvmf_fc_conn *fc_conn;
+
+ fc_conn = TAILQ_FIRST(&assoc->avail_fc_conns);
+ if (!fc_conn) {
+ SPDK_ERRLOG("out of connections for association %p\n", assoc);
+ return NULL;
+ }
+
+ /* Remove from avail list and add to in use. */
+ TAILQ_REMOVE(&assoc->avail_fc_conns, fc_conn, assoc_avail_link);
+ TAILQ_INSERT_TAIL(&assoc->fc_conns, fc_conn, assoc_link);
+
+ if (qid == 0) {
+ /* AdminQ connection. */
+ assoc->aq_conn = fc_conn;
+ }
+
+ fc_conn->qpair.qid = qid;
+ fc_conn->qpair.sq_head_max = sq_size;
+ TAILQ_INIT(&fc_conn->qpair.outstanding);
+ fc_conn->esrp_ratio = esrp_ratio;
+ fc_conn->fc_assoc = assoc;
+ fc_conn->rpi = rpi;
+ fc_conn->max_queue_depth = sq_size + 1;
+
+ /* save target port trid in connection (for subsystem
+ * listener validation in fabric connect command)
+ */
+ nvmf_fc_create_trid(&fc_conn->trid, tgtport->fc_nodename.u.wwn,
+ tgtport->fc_portname.u.wwn);
+
+ return fc_conn;
+}
+
+static inline void
+nvmf_fc_ls_free_connection(struct spdk_nvmf_fc_conn *fc_conn)
+{
+ TAILQ_INSERT_TAIL(&fc_conn->fc_assoc->avail_fc_conns, fc_conn, assoc_avail_link);
+}
+
+/* End - Allocators/Deallocators (assocations, connections, */
+/* poller API data) */
+/* ******************************************************** */
+
+static inline struct spdk_nvmf_fc_association *
+nvmf_fc_ls_find_assoc(struct spdk_nvmf_fc_nport *tgtport, uint64_t assoc_id)
+{
+ struct spdk_nvmf_fc_association *assoc = NULL;
+
+ TAILQ_FOREACH(assoc, &tgtport->fc_associations, link) {
+ if (assoc->assoc_id == assoc_id) {
+ if (assoc->assoc_state == SPDK_NVMF_FC_OBJECT_ZOMBIE) {
+ assoc = NULL;
+ }
+ break;
+ }
+ }
+ return assoc;
+}
+
+static inline void
+nvmf_fc_add_assoc_to_tgt_port(struct spdk_nvmf_fc_nport *tgtport,
+ struct spdk_nvmf_fc_association *assoc,
+ struct spdk_nvmf_fc_remote_port_info *rport)
+{
+ TAILQ_INSERT_TAIL(&tgtport->fc_associations, assoc, link);
+ tgtport->assoc_count++;
+ rport->assoc_count++;
+}
+
+static inline void
+nvmf_fc_del_assoc_from_tgt_port(struct spdk_nvmf_fc_association *assoc)
+{
+ struct spdk_nvmf_fc_nport *tgtport = assoc->tgtport;
+
+ TAILQ_REMOVE(&tgtport->fc_associations, assoc, link);
+ tgtport->assoc_count--;
+ assoc->rport->assoc_count--;
+}
+
+static void
+nvmf_fc_ls_rsp_fail_del_conn_cb(void *cb_data, enum spdk_nvmf_fc_poller_api_ret ret)
+{
+ struct nvmf_fc_ls_op_ctx *opd =
+ (struct nvmf_fc_ls_op_ctx *)cb_data;
+ struct spdk_nvmf_fc_ls_del_conn_api_data *dp = &opd->u.del_conn;
+ struct spdk_nvmf_fc_association *assoc = dp->assoc;
+ struct spdk_nvmf_fc_conn *fc_conn = dp->args.fc_conn;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Delete Connection callback "
+ "for assoc_id 0x%lx conn_id 0x%lx\n", assoc->assoc_id,
+ fc_conn->conn_id);
+
+ if (dp->aq_conn) {
+ /* delete association */
+ nvmf_fc_del_assoc_from_tgt_port(assoc);
+ nvmf_fc_ls_free_association(assoc);
+ } else {
+ /* remove connection from association's connection list */
+ TAILQ_REMOVE(&assoc->fc_conns, fc_conn, assoc_link);
+ nvmf_fc_ls_free_connection(fc_conn);
+ }
+
+ free(opd);
+}
+
+static void
+nvmf_fc_handle_xmt_ls_rsp_failure(struct spdk_nvmf_fc_association *assoc,
+ struct spdk_nvmf_fc_conn *fc_conn,
+ bool aq_conn)
+{
+ struct spdk_nvmf_fc_ls_del_conn_api_data *api_data;
+ struct nvmf_fc_ls_op_ctx *opd = NULL;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Transmit LS response failure "
+ "for assoc_id 0x%lx conn_id 0x%lx\n", assoc->assoc_id,
+ fc_conn->conn_id);
+
+
+ /* create context for delete connection API */
+ opd = calloc(1, sizeof(struct nvmf_fc_ls_op_ctx));
+ if (!opd) { /* hopefully this doesn't happen - if so, we leak the connection */
+ SPDK_ERRLOG("Mem alloc failed for del conn op data");
+ return;
+ }
+
+ api_data = &opd->u.del_conn;
+ api_data->assoc = assoc;
+ api_data->ls_rqst = NULL;
+ api_data->aq_conn = aq_conn;
+ api_data->args.fc_conn = fc_conn;
+ api_data->args.send_abts = false;
+ api_data->args.hwqp = fc_conn->hwqp;
+ api_data->args.cb_info.cb_thread = spdk_get_thread();
+ api_data->args.cb_info.cb_func = nvmf_fc_ls_rsp_fail_del_conn_cb;
+ api_data->args.cb_info.cb_data = opd;
+
+ nvmf_fc_poller_api_func(api_data->args.hwqp,
+ SPDK_NVMF_FC_POLLER_API_DEL_CONNECTION,
+ &api_data->args);
+}
+
+/* callback from poller's ADD_Connection event */
+static void
+nvmf_fc_ls_add_conn_cb(void *cb_data, enum spdk_nvmf_fc_poller_api_ret ret)
+{
+ struct nvmf_fc_ls_op_ctx *opd =
+ (struct nvmf_fc_ls_op_ctx *)cb_data;
+ struct spdk_nvmf_fc_ls_add_conn_api_data *dp = &opd->u.add_conn;
+ struct spdk_nvmf_fc_association *assoc = dp->assoc;
+ struct spdk_nvmf_fc_nport *tgtport = assoc->tgtport;
+ struct spdk_nvmf_fc_conn *fc_conn = dp->args.fc_conn;
+ struct spdk_nvmf_fc_ls_rqst *ls_rqst = dp->ls_rqst;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS,
+ "add_conn_cb: assoc_id = 0x%lx, conn_id = 0x%lx\n",
+ assoc->assoc_id, fc_conn->conn_id);
+
+ fc_conn->create_opd = NULL;
+
+ if (assoc->assoc_state == SPDK_NVMF_FC_OBJECT_TO_BE_DELETED) {
+ /* association is already being deleted - don't continue */
+ free(opd);
+ return;
+ }
+
+ if (dp->aq_conn) {
+ struct spdk_nvmf_fc_ls_cr_assoc_acc *assoc_acc =
+ (struct spdk_nvmf_fc_ls_cr_assoc_acc *)ls_rqst->rspbuf.virt;
+ /* put connection and association ID in response */
+ to_be64(&assoc_acc->conn_id.connection_id, fc_conn->conn_id);
+ assoc_acc->assoc_id.association_id = assoc_acc->conn_id.connection_id;
+ } else {
+ struct spdk_nvmf_fc_ls_cr_conn_acc *conn_acc =
+ (struct spdk_nvmf_fc_ls_cr_conn_acc *)ls_rqst->rspbuf.virt;
+ /* put connection ID in response */
+ to_be64(&conn_acc->conn_id.connection_id, fc_conn->conn_id);
+ }
+
+ /* send LS response */
+ if (nvmf_fc_xmt_ls_rsp(tgtport, ls_rqst) != 0) {
+ SPDK_ERRLOG("Send LS response for %s failed - cleaning up\n",
+ dp->aq_conn ? "association" : "connection");
+ nvmf_fc_handle_xmt_ls_rsp_failure(assoc, fc_conn,
+ dp->aq_conn);
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS,
+ "LS response (conn_id 0x%lx) sent\n", fc_conn->conn_id);
+ }
+
+ free(opd);
+}
+
+void
+nvmf_fc_ls_add_conn_failure(
+ struct spdk_nvmf_fc_association *assoc,
+ struct spdk_nvmf_fc_ls_rqst *ls_rqst,
+ struct spdk_nvmf_fc_conn *fc_conn,
+ bool aq_conn)
+{
+ struct spdk_nvmf_fc_ls_cr_assoc_rqst *rqst;
+ struct spdk_nvmf_fc_ls_cr_assoc_acc *acc;
+ struct spdk_nvmf_fc_nport *tgtport = assoc->tgtport;
+
+ if (fc_conn->create_opd) {
+ free(fc_conn->create_opd);
+ fc_conn->create_opd = NULL;
+ }
+
+ rqst = (struct spdk_nvmf_fc_ls_cr_assoc_rqst *)ls_rqst->rqstbuf.virt;
+ acc = (struct spdk_nvmf_fc_ls_cr_assoc_acc *)ls_rqst->rspbuf.virt;
+
+ /* send failure response */
+ ls_rqst->rsp_len = nvmf_fc_ls_format_rjt(acc,
+ FCNVME_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd,
+ FCNVME_RJT_RC_INSUFF_RES,
+ FCNVME_RJT_EXP_NONE, 0);
+
+ nvmf_fc_ls_free_connection(fc_conn);
+ if (aq_conn) {
+ nvmf_fc_del_assoc_from_tgt_port(assoc);
+ nvmf_fc_ls_free_association(assoc);
+ }
+
+ nvmf_fc_xmt_ls_rsp(tgtport, ls_rqst);
+}
+
+
+static void
+nvmf_fc_ls_add_conn_to_poller(
+ struct spdk_nvmf_fc_association *assoc,
+ struct spdk_nvmf_fc_ls_rqst *ls_rqst,
+ struct spdk_nvmf_fc_conn *fc_conn,
+ bool aq_conn)
+{
+ struct nvmf_fc_ls_op_ctx *opd;
+ struct spdk_nvmf_fc_ls_add_conn_api_data *api_data;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Add Connection to poller for "
+ "assoc_id 0x%lx conn_id 0x%lx\n", assoc->assoc_id,
+ fc_conn->conn_id);
+
+ opd = calloc(1, sizeof(struct nvmf_fc_ls_op_ctx));
+ if (!opd) {
+ SPDK_ERRLOG("allocate api data for add conn op failed\n");
+ nvmf_fc_ls_add_conn_failure(assoc, ls_rqst, fc_conn, aq_conn);
+ return;
+ }
+
+ /* insert conn in association's connection list */
+ api_data = &opd->u.add_conn;
+ assoc->conn_count++;
+
+ api_data->args.fc_conn = fc_conn;
+ api_data->args.cb_info.cb_thread = spdk_get_thread();
+ api_data->args.cb_info.cb_func = nvmf_fc_ls_add_conn_cb;
+ api_data->args.cb_info.cb_data = (void *)opd;
+ api_data->assoc = assoc;
+ api_data->ls_rqst = ls_rqst;
+ api_data->aq_conn = aq_conn;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS,
+ "New QP callback called.\n");
+
+ /* Let the nvmf_tgt decide which pollgroup to use. */
+ fc_conn->create_opd = opd;
+ spdk_nvmf_tgt_new_qpair(ls_rqst->nvmf_tgt, &fc_conn->qpair);
+}
+
+/* Delete association functions */
+
+static void
+nvmf_fc_do_del_assoc_cbs(struct nvmf_fc_ls_op_ctx *opd, int ret)
+{
+ struct nvmf_fc_ls_op_ctx *nxt;
+ struct spdk_nvmf_fc_delete_assoc_api_data *dp;
+
+ while (opd) {
+ dp = &opd->u.del_assoc;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "performing delete assoc. callback\n");
+ dp->del_assoc_cb(dp->del_assoc_cb_data, ret);
+
+ nxt = opd->next_op_ctx;
+ free(opd);
+ opd = nxt;
+ }
+}
+
+static void
+nvmf_fs_send_ls_disconnect_cb(void *hwqp, int32_t status, void *args)
+{
+ if (args) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "free disconnect buffers\n");
+ nvmf_fc_free_srsr_bufs((struct spdk_nvmf_fc_srsr_bufs *)args);
+ }
+}
+
+static void
+nvmf_fc_del_all_conns_cb(void *cb_data, enum spdk_nvmf_fc_poller_api_ret ret)
+{
+ struct nvmf_fc_ls_op_ctx *opd = (struct nvmf_fc_ls_op_ctx *)cb_data;
+ struct spdk_nvmf_fc_delete_assoc_api_data *dp = &opd->u.del_assoc;
+ struct spdk_nvmf_fc_association *assoc = dp->assoc;
+ struct spdk_nvmf_fc_conn *fc_conn = dp->args.fc_conn;
+
+ /* Assumption here is that there will be no error (i.e. ret=success).
+ * Since connections are deleted in parallel, nothing can be
+ * done anyway if there is an error because we need to complete
+ * all connection deletes and callback to caller */
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS,
+ "Delete all connections for assoc_id 0x%lx, conn_id = %lx\n",
+ assoc->assoc_id, fc_conn->conn_id);
+
+ /* remove connection from association's connection list */
+ TAILQ_REMOVE(&assoc->fc_conns, fc_conn, assoc_link);
+ nvmf_fc_ls_free_connection(fc_conn);
+
+ if (--assoc->conn_count == 0) {
+ /* last connection - remove association from target port's association list */
+ struct nvmf_fc_ls_op_ctx *cb_opd = (struct nvmf_fc_ls_op_ctx *)assoc->ls_del_op_ctx;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS,
+ "remove assoc. %lx\n", assoc->assoc_id);
+ nvmf_fc_del_assoc_from_tgt_port(assoc);
+
+ if (assoc->snd_disconn_bufs &&
+ assoc->tgtport->fc_port->hw_port_status == SPDK_FC_PORT_ONLINE) {
+
+ struct spdk_nvmf_fc_ls_disconnect_rqst *dc_rqst;
+ struct spdk_nvmf_fc_srsr_bufs *srsr_bufs;
+
+ dc_rqst = (struct spdk_nvmf_fc_ls_disconnect_rqst *)
+ assoc->snd_disconn_bufs->rqst;
+
+ bzero(dc_rqst, sizeof(struct spdk_nvmf_fc_ls_disconnect_rqst));
+
+ /* fill in request descriptor */
+ dc_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT;
+ to_be32(&dc_rqst->desc_list_len,
+ sizeof(struct spdk_nvmf_fc_ls_disconnect_rqst) -
+ (2 * sizeof(uint32_t)));
+
+ /* fill in disconnect command descriptor */
+ to_be32(&dc_rqst->disconn_cmd.desc_tag, FCNVME_LSDESC_DISCONN_CMD);
+ to_be32(&dc_rqst->disconn_cmd.desc_len,
+ sizeof(struct spdk_nvmf_fc_lsdesc_disconn_cmd) -
+ (2 * sizeof(uint32_t)));
+
+ /* fill in association id descriptor */
+ to_be32(&dc_rqst->assoc_id.desc_tag, FCNVME_LSDESC_ASSOC_ID),
+ to_be32(&dc_rqst->assoc_id.desc_len,
+ sizeof(struct spdk_nvmf_fc_lsdesc_assoc_id) -
+ (2 * sizeof(uint32_t)));
+ to_be64(&dc_rqst->assoc_id.association_id, assoc->assoc_id);
+
+ srsr_bufs = assoc->snd_disconn_bufs;
+ assoc->snd_disconn_bufs = NULL;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Send LS disconnect\n");
+ if (nvmf_fc_xmt_srsr_req(&assoc->tgtport->fc_port->ls_queue,
+ srsr_bufs, nvmf_fs_send_ls_disconnect_cb,
+ (void *)srsr_bufs)) {
+ SPDK_ERRLOG("Error sending LS disconnect\n");
+ assoc->snd_disconn_bufs = srsr_bufs;
+ }
+ }
+
+ nvmf_fc_ls_free_association(assoc);
+
+ /* perform callbacks to all callers to delete association */
+ nvmf_fc_do_del_assoc_cbs(cb_opd, 0);
+
+ }
+
+ free(opd);
+}
+
+static void
+nvmf_fc_kill_io_del_all_conns_cb(void *cb_data, enum spdk_nvmf_fc_poller_api_ret ret)
+{
+ struct nvmf_fc_ls_op_ctx *opd = (struct nvmf_fc_ls_op_ctx *)cb_data;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Callback after killing outstanding ABTS.");
+ /*
+ * NOTE: We should not access any connection or association related data
+ * structures here.
+ */
+ free(opd);
+}
+
+
+/* Disconnect/delete (association) request functions */
+
+static int
+_nvmf_fc_delete_association(struct spdk_nvmf_fc_nport *tgtport,
+ uint64_t assoc_id, bool send_abts, bool backend_initiated,
+ spdk_nvmf_fc_del_assoc_cb del_assoc_cb,
+ void *cb_data, bool from_ls_rqst)
+{
+
+ struct nvmf_fc_ls_op_ctx *opd, *opd_tail, *opd_head = NULL;
+ struct spdk_nvmf_fc_delete_assoc_api_data *api_data;
+ struct spdk_nvmf_fc_conn *fc_conn;
+ struct spdk_nvmf_fc_association *assoc =
+ nvmf_fc_ls_find_assoc(tgtport, assoc_id);
+ struct spdk_nvmf_fc_port *fc_port = tgtport->fc_port;
+ enum spdk_nvmf_fc_object_state assoc_state;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Delete association, "
+ "assoc_id 0x%lx\n", assoc_id);
+
+ if (!assoc) {
+ SPDK_ERRLOG("Delete association failed: %s\n",
+ validation_errors[VERR_NO_ASSOC]);
+ return VERR_NO_ASSOC;
+ }
+
+ /* create cb context to put in association's list of
+ * callbacks to call when delete association is done */
+ opd = calloc(1, sizeof(struct nvmf_fc_ls_op_ctx));
+ if (!opd) {
+ SPDK_ERRLOG("Mem alloc failed for del assoc cb data");
+ return -ENOMEM;
+ }
+
+ api_data = &opd->u.del_assoc;
+ api_data->assoc = assoc;
+ api_data->from_ls_rqst = from_ls_rqst;
+ api_data->del_assoc_cb = del_assoc_cb;
+ api_data->del_assoc_cb_data = cb_data;
+ api_data->args.cb_info.cb_data = opd;
+ nvmf_fc_ls_append_del_cb_ctx(assoc, opd);
+
+ assoc_state = assoc->assoc_state;
+ if ((assoc_state == SPDK_NVMF_FC_OBJECT_TO_BE_DELETED) &&
+ (fc_port->hw_port_status != SPDK_FC_PORT_QUIESCED)) {
+ /* association already being deleted */
+ return 0;
+ }
+
+ /* mark assoc. to be deleted */
+ assoc->assoc_state = SPDK_NVMF_FC_OBJECT_TO_BE_DELETED;
+
+ /* create a list of all connection to delete */
+ TAILQ_FOREACH(fc_conn, &assoc->fc_conns, assoc_link) {
+ opd = calloc(1, sizeof(struct nvmf_fc_ls_op_ctx));
+ if (!opd) { /* hopefully this doesn't happen */
+ SPDK_ERRLOG("Mem alloc failed for del conn op data");
+ while (opd_head) { /* free any contexts already allocated */
+ opd = opd_head;
+ opd_head = opd->next_op_ctx;
+ free(opd);
+ }
+ return -ENOMEM;
+ }
+
+ api_data = &opd->u.del_assoc;
+ api_data->args.fc_conn = fc_conn;
+ api_data->assoc = assoc;
+ api_data->args.send_abts = send_abts;
+ api_data->args.backend_initiated = backend_initiated;
+ api_data->args.hwqp = nvmf_fc_get_hwqp_from_conn_id(
+ assoc->tgtport->fc_port->io_queues,
+ assoc->tgtport->fc_port->num_io_queues,
+ fc_conn->conn_id);
+ api_data->args.cb_info.cb_thread = spdk_get_thread();
+ if ((fc_port->hw_port_status == SPDK_FC_PORT_QUIESCED) &&
+ (assoc_state == SPDK_NVMF_FC_OBJECT_TO_BE_DELETED)) {
+ /*
+ * If there are any connections deletes or IO abts that are
+ * stuck because of firmware reset, a second invocation of
+ * SPDK_NVMF_FC_POLLER_API_DEL_CONNECTION will result in
+ * outstanding connections & requests being killed and
+ * their corresponding callbacks being executed.
+ */
+ api_data->args.cb_info.cb_func = nvmf_fc_kill_io_del_all_conns_cb;
+ } else {
+ api_data->args.cb_info.cb_func = nvmf_fc_del_all_conns_cb;
+ }
+ api_data->args.cb_info.cb_data = opd;
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS,
+ "conn_id = %lx\n", fc_conn->conn_id);
+
+ if (!opd_head) {
+ opd_head = opd;
+ } else {
+ opd_tail->next_op_ctx = opd;
+ }
+ opd_tail = opd;
+ }
+
+ /* make poller api calls to delete connetions */
+ while (opd_head) {
+ opd = opd_head;
+ opd_head = opd->next_op_ctx;
+ api_data = &opd->u.del_assoc;
+ nvmf_fc_poller_api_func(api_data->args.hwqp,
+ SPDK_NVMF_FC_POLLER_API_DEL_CONNECTION,
+ &api_data->args);
+ }
+
+ return 0;
+}
+
+static void
+nvmf_fc_ls_disconnect_assoc_cb(void *cb_data, uint32_t err)
+{
+ struct nvmf_fc_ls_op_ctx *opd = (struct nvmf_fc_ls_op_ctx *)cb_data;
+ struct spdk_nvmf_fc_ls_disconn_assoc_api_data *dp = &opd->u.disconn_assoc;
+ struct spdk_nvmf_fc_nport *tgtport = dp->tgtport;
+ struct spdk_nvmf_fc_ls_rqst *ls_rqst = dp->ls_rqst;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Disconnect association callback begin "
+ "nport %d\n", tgtport->nport_hdl);
+ if (err != 0) {
+ /* send failure response */
+ struct spdk_nvmf_fc_ls_cr_assoc_rqst *rqst =
+ (struct spdk_nvmf_fc_ls_cr_assoc_rqst *)ls_rqst->rqstbuf.virt;
+ struct spdk_nvmf_fc_ls_cr_assoc_acc *acc =
+ (struct spdk_nvmf_fc_ls_cr_assoc_acc *)ls_rqst->rspbuf.virt;
+ ls_rqst->rsp_len = nvmf_fc_ls_format_rjt(acc,
+ FCNVME_MAX_LS_BUFFER_SIZE,
+ rqst->w0.ls_cmd,
+ FCNVME_RJT_RC_UNAB,
+ FCNVME_RJT_EXP_NONE,
+ 0);
+ }
+
+ nvmf_fc_xmt_ls_rsp(tgtport, ls_rqst);
+
+ free(opd);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Disconnect association callback complete "
+ "nport %d err %d\n", tgtport->nport_hdl, err);
+}
+
+static void
+nvmf_fc_ls_disconnect_assoc(struct spdk_nvmf_fc_nport *tgtport,
+ struct spdk_nvmf_fc_ls_rqst *ls_rqst, uint64_t assoc_id)
+{
+ struct nvmf_fc_ls_op_ctx *opd;
+ struct spdk_nvmf_fc_ls_cr_assoc_rqst *rqst =
+ (struct spdk_nvmf_fc_ls_cr_assoc_rqst *)ls_rqst->rqstbuf.virt;
+ struct spdk_nvmf_fc_ls_cr_assoc_acc *acc =
+ (struct spdk_nvmf_fc_ls_cr_assoc_acc *)ls_rqst->rspbuf.virt;
+ struct spdk_nvmf_fc_ls_disconn_assoc_api_data *api_data;
+ int ret;
+ uint8_t reason = 0;
+
+ opd = calloc(1, sizeof(struct nvmf_fc_ls_op_ctx));
+ if (!opd) {
+ /* send failure response */
+ SPDK_ERRLOG("Allocate disconn assoc op data failed\n");
+ reason = FCNVME_RJT_RC_INSUFF_RES;
+ goto send_rjt;
+ }
+
+ api_data = &opd->u.disconn_assoc;
+ api_data->tgtport = tgtport;
+ api_data->ls_rqst = ls_rqst;
+ ret = _nvmf_fc_delete_association(tgtport, assoc_id,
+ false, false,
+ nvmf_fc_ls_disconnect_assoc_cb,
+ api_data, true);
+ if (!ret) {
+ return;
+ }
+
+ /* delete association failed */
+ switch (ret) {
+ case VERR_NO_ASSOC:
+ reason = FCNVME_RJT_RC_INV_ASSOC;
+ break;
+ case -ENOMEM:
+ reason = FCNVME_RJT_RC_INSUFF_RES;
+ break;
+ default:
+ reason = FCNVME_RJT_RC_LOGIC;
+ }
+
+ free(opd);
+
+send_rjt:
+ ls_rqst->rsp_len = nvmf_fc_ls_format_rjt(acc,
+ FCNVME_MAX_LS_BUFFER_SIZE,
+ rqst->w0.ls_cmd, reason,
+ FCNVME_RJT_EXP_NONE, 0);
+ nvmf_fc_xmt_ls_rsp(tgtport, ls_rqst);
+}
+
+static int
+nvmf_fc_ls_validate_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
+{
+
+ if (!spdk_nvmf_subsystem_host_allowed(subsystem, hostnqn)) {
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+/* **************************** */
+/* LS Reqeust Handler Functions */
+
+static void
+nvmf_fc_ls_process_cass(uint32_t s_id,
+ struct spdk_nvmf_fc_nport *tgtport,
+ struct spdk_nvmf_fc_ls_rqst *ls_rqst)
+{
+ struct spdk_nvmf_fc_ls_cr_assoc_rqst *rqst =
+ (struct spdk_nvmf_fc_ls_cr_assoc_rqst *)ls_rqst->rqstbuf.virt;
+ struct spdk_nvmf_fc_ls_cr_assoc_acc *acc =
+ (struct spdk_nvmf_fc_ls_cr_assoc_acc *)ls_rqst->rspbuf.virt;
+ struct spdk_nvmf_fc_association *assoc;
+ struct spdk_nvmf_fc_conn *fc_conn;
+ struct spdk_nvmf_subsystem *subsystem = NULL;
+ const char *hostnqn = (const char *)rqst->assoc_cmd.hostnqn;
+ int errmsg_ind = 0;
+ uint8_t rc = FCNVME_RJT_RC_NONE;
+ uint8_t ec = FCNVME_RJT_EXP_NONE;
+ struct spdk_nvmf_transport *transport = spdk_nvmf_tgt_get_transport(ls_rqst->nvmf_tgt,
+ SPDK_NVME_TRANSPORT_NAME_FC);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS,
+ "LS_CASS: ls_rqst_len=%d, desc_list_len=%d, cmd_len=%d, sq_size=%d, "
+ "Subnqn: %s, Hostnqn: %s, Tgtport nn:%lx, pn:%lx\n",
+ ls_rqst->rqst_len, from_be32(&rqst->desc_list_len),
+ from_be32(&rqst->assoc_cmd.desc_len),
+ from_be32(&rqst->assoc_cmd.sqsize),
+ rqst->assoc_cmd.subnqn, hostnqn,
+ tgtport->fc_nodename.u.wwn, tgtport->fc_portname.u.wwn);
+
+ if (ls_rqst->rqst_len < FCNVME_LS_CA_CMD_MIN_LEN) {
+ SPDK_ERRLOG("assoc_cmd req len = %d, should be at least %d\n",
+ ls_rqst->rqst_len, FCNVME_LS_CA_CMD_MIN_LEN);
+ errmsg_ind = VERR_CR_ASSOC_LEN;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ ec = FCNVME_RJT_EXP_INV_LEN;
+ } else if (from_be32(&rqst->desc_list_len) <
+ FCNVME_LS_CA_DESC_LIST_MIN_LEN) {
+ SPDK_ERRLOG("assoc_cmd desc list len = %d, should be at least %d\n",
+ from_be32(&rqst->desc_list_len),
+ FCNVME_LS_CA_DESC_LIST_MIN_LEN);
+ errmsg_ind = VERR_CR_ASSOC_RQST_LEN;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ ec = FCNVME_RJT_EXP_INV_LEN;
+ } else if (rqst->assoc_cmd.desc_tag !=
+ cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD)) {
+ errmsg_ind = VERR_CR_ASSOC_CMD;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ } else if (from_be32(&rqst->assoc_cmd.desc_len) <
+ FCNVME_LS_CA_DESC_MIN_LEN) {
+ SPDK_ERRLOG("assoc_cmd desc len = %d, should be at least %d\n",
+ from_be32(&rqst->assoc_cmd.desc_len),
+ FCNVME_LS_CA_DESC_MIN_LEN);
+ errmsg_ind = VERR_CR_ASSOC_CMD_LEN;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ ec = FCNVME_RJT_EXP_INV_LEN;
+ } else if (!rqst->assoc_cmd.ersp_ratio ||
+ (from_be16(&rqst->assoc_cmd.ersp_ratio) >=
+ from_be16(&rqst->assoc_cmd.sqsize))) {
+ errmsg_ind = VERR_ERSP_RATIO;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ ec = FCNVME_RJT_EXP_INV_ESRP;
+ } else if (from_be16(&rqst->assoc_cmd.sqsize) == 0 ||
+ from_be16(&rqst->assoc_cmd.sqsize) > transport->opts.max_aq_depth) {
+ errmsg_ind = VERR_SQSIZE;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ ec = FCNVME_RJT_EXP_SQ_SIZE;
+ }
+
+ if (rc != FCNVME_RJT_RC_NONE) {
+ goto rjt_cass;
+ }
+
+ subsystem = spdk_nvmf_tgt_find_subsystem(ls_rqst->nvmf_tgt, rqst->assoc_cmd.subnqn);
+ if (subsystem == NULL) {
+ errmsg_ind = VERR_SUBNQN;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ ec = FCNVME_RJT_EXP_INV_SUBNQN;
+ goto rjt_cass;
+ }
+
+ if (nvmf_fc_ls_validate_host(subsystem, hostnqn)) {
+ errmsg_ind = VERR_HOSTNQN;
+ rc = FCNVME_RJT_RC_INV_HOST;
+ ec = FCNVME_RJT_EXP_INV_HOSTNQN;
+ goto rjt_cass;
+ }
+
+ /* get new association */
+ assoc = nvmf_fc_ls_new_association(s_id, tgtport, ls_rqst->rport,
+ &rqst->assoc_cmd, subsystem,
+ ls_rqst->rpi, transport);
+ if (!assoc) {
+ errmsg_ind = VERR_ASSOC_ALLOC_FAIL;
+ rc = FCNVME_RJT_RC_INSUFF_RES;
+ ec = FCNVME_RJT_EXP_NONE;
+ goto rjt_cass;
+ }
+
+ /* alloc admin q (i.e. connection) */
+ fc_conn = nvmf_fc_ls_new_connection(assoc, 0,
+ from_be16(&rqst->assoc_cmd.ersp_ratio),
+ ls_rqst->rpi,
+ from_be16(&rqst->assoc_cmd.sqsize),
+ tgtport);
+ if (!fc_conn) {
+ nvmf_fc_ls_free_association(assoc);
+ errmsg_ind = VERR_CONN_ALLOC_FAIL;
+ rc = FCNVME_RJT_RC_INSUFF_RES;
+ ec = FCNVME_RJT_EXP_NONE;
+ goto rjt_cass;
+ }
+
+ /* format accept response */
+ bzero(acc, sizeof(*acc));
+ ls_rqst->rsp_len = sizeof(*acc);
+
+ nvmf_fc_ls_format_rsp_hdr(acc, FCNVME_LS_ACC,
+ nvmf_fc_lsdesc_len(
+ sizeof(struct spdk_nvmf_fc_ls_cr_assoc_acc)),
+ FCNVME_LS_CREATE_ASSOCIATION);
+ to_be32(&acc->assoc_id.desc_tag, FCNVME_LSDESC_ASSOC_ID);
+ acc->assoc_id.desc_len =
+ nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_lsdesc_assoc_id));
+ to_be32(&acc->conn_id.desc_tag, FCNVME_LSDESC_CONN_ID);
+ acc->conn_id.desc_len =
+ nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_lsdesc_conn_id));
+
+ /* assign connection to HWQP poller - also sends response */
+ nvmf_fc_ls_add_conn_to_poller(assoc, ls_rqst, fc_conn, true);
+
+ return;
+
+rjt_cass:
+ SPDK_ERRLOG("Create Association LS failed: %s\n", validation_errors[errmsg_ind]);
+ ls_rqst->rsp_len = nvmf_fc_ls_format_rjt(acc, FCNVME_MAX_LS_BUFFER_SIZE,
+ rqst->w0.ls_cmd, rc, ec, 0);
+ nvmf_fc_xmt_ls_rsp(tgtport, ls_rqst);
+}
+
+static void
+nvmf_fc_ls_process_cioc(struct spdk_nvmf_fc_nport *tgtport,
+ struct spdk_nvmf_fc_ls_rqst *ls_rqst)
+{
+ struct spdk_nvmf_fc_ls_cr_conn_rqst *rqst =
+ (struct spdk_nvmf_fc_ls_cr_conn_rqst *)ls_rqst->rqstbuf.virt;
+ struct spdk_nvmf_fc_ls_cr_conn_acc *acc =
+ (struct spdk_nvmf_fc_ls_cr_conn_acc *)ls_rqst->rspbuf.virt;
+ struct spdk_nvmf_fc_association *assoc;
+ struct spdk_nvmf_fc_conn *fc_conn = NULL;
+ int errmsg_ind = 0;
+ uint8_t rc = FCNVME_RJT_RC_NONE;
+ uint8_t ec = FCNVME_RJT_EXP_NONE;
+ struct spdk_nvmf_transport *transport = spdk_nvmf_tgt_get_transport(ls_rqst->nvmf_tgt,
+ SPDK_NVME_TRANSPORT_NAME_FC);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS,
+ "LS_CIOC: ls_rqst_len=%d, desc_list_len=%d, cmd_len=%d, "
+ "assoc_id=0x%lx, sq_size=%d, esrp=%d, Tgtport nn:%lx, pn:%lx\n",
+ ls_rqst->rqst_len, from_be32(&rqst->desc_list_len),
+ from_be32(&rqst->connect_cmd.desc_len),
+ from_be64(&rqst->assoc_id.association_id),
+ from_be32(&rqst->connect_cmd.sqsize),
+ from_be32(&rqst->connect_cmd.ersp_ratio),
+ tgtport->fc_nodename.u.wwn, tgtport->fc_portname.u.wwn);
+
+ if (ls_rqst->rqst_len < sizeof(struct spdk_nvmf_fc_ls_cr_conn_rqst)) {
+ errmsg_ind = VERR_CR_CONN_LEN;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ ec = FCNVME_RJT_EXP_INV_LEN;
+ } else if (rqst->desc_list_len !=
+ nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_ls_cr_conn_rqst))) {
+ errmsg_ind = VERR_CR_CONN_RQST_LEN;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ ec = FCNVME_RJT_EXP_INV_LEN;
+ } else if (rqst->assoc_id.desc_tag !=
+ cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) {
+ errmsg_ind = VERR_ASSOC_ID;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ } else if (rqst->assoc_id.desc_len !=
+ nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_lsdesc_assoc_id))) {
+ errmsg_ind = VERR_ASSOC_ID_LEN;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ ec = FCNVME_RJT_EXP_INV_LEN;
+ } else if (rqst->connect_cmd.desc_tag !=
+ cpu_to_be32(FCNVME_LSDESC_CREATE_CONN_CMD)) {
+ errmsg_ind = VERR_CR_CONN_CMD;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ } else if (rqst->connect_cmd.desc_len !=
+ nvmf_fc_lsdesc_len(
+ sizeof(struct spdk_nvmf_fc_lsdesc_cr_conn_cmd))) {
+ errmsg_ind = VERR_CR_CONN_CMD_LEN;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ ec = FCNVME_RJT_EXP_INV_LEN;
+ } else if (!rqst->connect_cmd.ersp_ratio ||
+ (from_be16(&rqst->connect_cmd.ersp_ratio) >=
+ from_be16(&rqst->connect_cmd.sqsize))) {
+ errmsg_ind = VERR_ERSP_RATIO;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ ec = FCNVME_RJT_EXP_INV_ESRP;
+ } else if (from_be16(&rqst->connect_cmd.sqsize) == 0 ||
+ from_be16(&rqst->connect_cmd.sqsize) > transport->opts.max_queue_depth) {
+ errmsg_ind = VERR_SQSIZE;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ ec = FCNVME_RJT_EXP_SQ_SIZE;
+ }
+
+ if (rc != FCNVME_RJT_RC_NONE) {
+ goto rjt_cioc;
+ }
+
+ /* find association */
+ assoc = nvmf_fc_ls_find_assoc(tgtport,
+ from_be64(&rqst->assoc_id.association_id));
+ if (!assoc) {
+ errmsg_ind = VERR_NO_ASSOC;
+ rc = FCNVME_RJT_RC_INV_ASSOC;
+ } else if (assoc->assoc_state == SPDK_NVMF_FC_OBJECT_TO_BE_DELETED) {
+ /* association is being deleted - don't allow more connections */
+ errmsg_ind = VERR_NO_ASSOC;
+ rc = FCNVME_RJT_RC_INV_ASSOC;
+ } else if (assoc->conn_count >= transport->opts.max_qpairs_per_ctrlr) {
+ errmsg_ind = VERR_CONN_TOO_MANY;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ ec = FCNVME_RJT_EXP_INV_Q_ID;
+ }
+
+ if (rc != FCNVME_RJT_RC_NONE) {
+ goto rjt_cioc;
+ }
+
+ fc_conn = nvmf_fc_ls_new_connection(assoc, from_be16(&rqst->connect_cmd.qid),
+ from_be16(&rqst->connect_cmd.ersp_ratio),
+ ls_rqst->rpi,
+ from_be16(&rqst->connect_cmd.sqsize),
+ tgtport);
+ if (!fc_conn) {
+ errmsg_ind = VERR_CONN_ALLOC_FAIL;
+ rc = FCNVME_RJT_RC_INSUFF_RES;
+ ec = FCNVME_RJT_EXP_NONE;
+ goto rjt_cioc;
+ }
+
+ /* format accept response */
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Formatting LS accept response for "
+ "assoc_id 0x%lx conn_id 0x%lx\n", assoc->assoc_id,
+ fc_conn->conn_id);
+ bzero(acc, sizeof(*acc));
+ ls_rqst->rsp_len = sizeof(*acc);
+ nvmf_fc_ls_format_rsp_hdr(acc, FCNVME_LS_ACC,
+ nvmf_fc_lsdesc_len(
+ sizeof(struct spdk_nvmf_fc_ls_cr_conn_acc)),
+ FCNVME_LS_CREATE_CONNECTION);
+ to_be32(&acc->conn_id.desc_tag, FCNVME_LSDESC_CONN_ID);
+ acc->conn_id.desc_len =
+ nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_lsdesc_conn_id));
+
+ /* assign connection to HWQP poller - also sends response */
+ nvmf_fc_ls_add_conn_to_poller(assoc, ls_rqst, fc_conn, false);
+
+ return;
+
+rjt_cioc:
+ SPDK_ERRLOG("Create Connection LS failed: %s\n", validation_errors[errmsg_ind]);
+
+ ls_rqst->rsp_len = nvmf_fc_ls_format_rjt(acc, FCNVME_MAX_LS_BUFFER_SIZE,
+ rqst->w0.ls_cmd, rc, ec, 0);
+ nvmf_fc_xmt_ls_rsp(tgtport, ls_rqst);
+}
+
+static void
+nvmf_fc_ls_process_disc(struct spdk_nvmf_fc_nport *tgtport,
+ struct spdk_nvmf_fc_ls_rqst *ls_rqst)
+{
+ struct spdk_nvmf_fc_ls_disconnect_rqst *rqst =
+ (struct spdk_nvmf_fc_ls_disconnect_rqst *)ls_rqst->rqstbuf.virt;
+ struct spdk_nvmf_fc_ls_disconnect_acc *acc =
+ (struct spdk_nvmf_fc_ls_disconnect_acc *)ls_rqst->rspbuf.virt;
+ struct spdk_nvmf_fc_association *assoc;
+ int errmsg_ind = 0;
+ uint8_t rc = FCNVME_RJT_RC_NONE;
+ uint8_t ec = FCNVME_RJT_EXP_NONE;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS,
+ "LS_DISC: ls_rqst_len=%d, desc_list_len=%d, cmd_len=%d,"
+ "assoc_id=0x%lx\n",
+ ls_rqst->rqst_len, from_be32(&rqst->desc_list_len),
+ from_be32(&rqst->disconn_cmd.desc_len),
+ from_be64(&rqst->assoc_id.association_id));
+
+ if (ls_rqst->rqst_len < sizeof(struct spdk_nvmf_fc_ls_disconnect_rqst)) {
+ errmsg_ind = VERR_DISCONN_LEN;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ ec = FCNVME_RJT_EXP_INV_LEN;
+ } else if (rqst->desc_list_len !=
+ nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_ls_disconnect_rqst))) {
+ errmsg_ind = VERR_DISCONN_RQST_LEN;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ ec = FCNVME_RJT_EXP_INV_LEN;
+ } else if (rqst->assoc_id.desc_tag !=
+ cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) {
+ errmsg_ind = VERR_ASSOC_ID;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ } else if (rqst->assoc_id.desc_len !=
+ nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_lsdesc_assoc_id))) {
+ errmsg_ind = VERR_ASSOC_ID_LEN;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ ec = FCNVME_RJT_EXP_INV_LEN;
+ } else if (rqst->disconn_cmd.desc_tag !=
+ cpu_to_be32(FCNVME_LSDESC_DISCONN_CMD)) {
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ errmsg_ind = VERR_DISCONN_CMD;
+ } else if (rqst->disconn_cmd.desc_len !=
+ nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_lsdesc_disconn_cmd))) {
+ errmsg_ind = VERR_DISCONN_CMD_LEN;
+ rc = FCNVME_RJT_RC_INV_PARAM;
+ ec = FCNVME_RJT_EXP_INV_LEN;
+ }
+
+ if (rc != FCNVME_RJT_RC_NONE) {
+ goto rjt_disc;
+ }
+
+ /* match an active association */
+ assoc = nvmf_fc_ls_find_assoc(tgtport,
+ from_be64(&rqst->assoc_id.association_id));
+ if (!assoc) {
+ errmsg_ind = VERR_NO_ASSOC;
+ rc = FCNVME_RJT_RC_INV_ASSOC;
+ goto rjt_disc;
+ }
+
+ /* format response */
+ bzero(acc, sizeof(*acc));
+ ls_rqst->rsp_len = sizeof(*acc);
+
+ nvmf_fc_ls_format_rsp_hdr(acc, FCNVME_LS_ACC,
+ nvmf_fc_lsdesc_len(
+ sizeof(struct spdk_nvmf_fc_ls_disconnect_acc)),
+ FCNVME_LS_DISCONNECT);
+
+ nvmf_fc_ls_disconnect_assoc(tgtport, ls_rqst, assoc->assoc_id);
+ return;
+
+rjt_disc:
+ SPDK_ERRLOG("Disconnect LS failed: %s\n", validation_errors[errmsg_ind]);
+ ls_rqst->rsp_len = nvmf_fc_ls_format_rjt(acc, FCNVME_MAX_LS_BUFFER_SIZE,
+ rqst->w0.ls_cmd, rc, ec, 0);
+ nvmf_fc_xmt_ls_rsp(tgtport, ls_rqst);
+}
+
+/* ************************ */
+/* external functions */
+
+void
+nvmf_fc_ls_init(struct spdk_nvmf_fc_port *fc_port)
+{
+}
+
+void
+nvmf_fc_ls_fini(struct spdk_nvmf_fc_port *fc_port)
+{
+}
+
+void
+nvmf_fc_handle_ls_rqst(struct spdk_nvmf_fc_ls_rqst *ls_rqst)
+{
+ struct spdk_nvmf_fc_ls_rqst_w0 *w0 =
+ (struct spdk_nvmf_fc_ls_rqst_w0 *)ls_rqst->rqstbuf.virt;
+ uint32_t s_id = ls_rqst->s_id;
+ struct spdk_nvmf_fc_nport *tgtport = ls_rqst->nport;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "LS cmd=%d\n", w0->ls_cmd);
+
+ switch (w0->ls_cmd) {
+ case FCNVME_LS_CREATE_ASSOCIATION:
+ nvmf_fc_ls_process_cass(s_id, tgtport, ls_rqst);
+ break;
+ case FCNVME_LS_CREATE_CONNECTION:
+ nvmf_fc_ls_process_cioc(tgtport, ls_rqst);
+ break;
+ case FCNVME_LS_DISCONNECT:
+ nvmf_fc_ls_process_disc(tgtport, ls_rqst);
+ break;
+ default:
+ SPDK_ERRLOG("Invalid LS cmd=%d\n", w0->ls_cmd);
+ ls_rqst->rsp_len = nvmf_fc_ls_format_rjt(ls_rqst->rspbuf.virt,
+ FCNVME_MAX_LS_BUFFER_SIZE, w0->ls_cmd,
+ FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0);
+ nvmf_fc_xmt_ls_rsp(tgtport, ls_rqst);
+ }
+}
+
+int
+nvmf_fc_delete_association(struct spdk_nvmf_fc_nport *tgtport,
+ uint64_t assoc_id, bool send_abts, bool backend_initiated,
+ spdk_nvmf_fc_del_assoc_cb del_assoc_cb,
+ void *cb_data)
+{
+ return _nvmf_fc_delete_association(tgtport, assoc_id, send_abts, backend_initiated,
+ del_assoc_cb, cb_data, false);
+}
+
+static void
+nvmf_fc_poller_api_cb_event(void *arg)
+{
+ struct spdk_nvmf_fc_poller_api_cb_info *cb_info =
+ (struct spdk_nvmf_fc_poller_api_cb_info *) arg;
+
+ assert(cb_info != NULL);
+ cb_info->cb_func(cb_info->cb_data, cb_info->ret);
+}
+
+static void
+nvmf_fc_poller_api_perform_cb(struct spdk_nvmf_fc_poller_api_cb_info *cb_info,
+ enum spdk_nvmf_fc_poller_api_ret ret)
+{
+ if (cb_info->cb_func && cb_info->cb_thread) {
+ cb_info->ret = ret;
+ /* callback to master thread */
+ spdk_thread_send_msg(cb_info->cb_thread, nvmf_fc_poller_api_cb_event,
+ (void *) cb_info);
+ }
+}
+
+static void
+nvmf_fc_poller_api_add_connection(void *arg)
+{
+ enum spdk_nvmf_fc_poller_api_ret ret = SPDK_NVMF_FC_POLLER_API_SUCCESS;
+ struct spdk_nvmf_fc_poller_api_add_connection_args *conn_args =
+ (struct spdk_nvmf_fc_poller_api_add_connection_args *)arg;
+ struct spdk_nvmf_fc_conn *fc_conn;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_POLLER_API, "Poller add connection, conn_id 0x%lx\n",
+ conn_args->fc_conn->conn_id);
+
+ /* make sure connection is not already in poller's list */
+ fc_conn = nvmf_fc_hwqp_find_fc_conn(conn_args->fc_conn->hwqp,
+ conn_args->fc_conn->conn_id);
+ if (fc_conn) {
+ SPDK_ERRLOG("duplicate connection found");
+ ret = SPDK_NVMF_FC_POLLER_API_DUP_CONN_ID;
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_POLLER_API,
+ "conn_id=%lx", fc_conn->conn_id);
+ TAILQ_INSERT_TAIL(&conn_args->fc_conn->hwqp->connection_list,
+ conn_args->fc_conn, link);
+ }
+
+ /* perform callback */
+ nvmf_fc_poller_api_perform_cb(&conn_args->cb_info, ret);
+}
+
+static void
+nvmf_fc_poller_api_quiesce_queue(void *arg)
+{
+ struct spdk_nvmf_fc_poller_api_quiesce_queue_args *q_args =
+ (struct spdk_nvmf_fc_poller_api_quiesce_queue_args *) arg;
+ struct spdk_nvmf_fc_request *fc_req = NULL, *tmp;
+
+ /* should be already, but make sure queue is quiesced */
+ q_args->hwqp->state = SPDK_FC_HWQP_OFFLINE;
+
+ /*
+ * Kill all the outstanding commands that are in the transfer state and
+ * in the process of being aborted.
+ * We can run into this situation if an adapter reset happens when an I_T Nexus delete
+ * is in progress.
+ */
+ TAILQ_FOREACH_SAFE(fc_req, &q_args->hwqp->in_use_reqs, link, tmp) {
+ if (nvmf_fc_req_in_xfer(fc_req) && fc_req->is_aborted == true) {
+ nvmf_fc_poller_api_func(q_args->hwqp, SPDK_NVMF_FC_POLLER_API_REQ_ABORT_COMPLETE,
+ (void *)fc_req);
+ }
+ }
+
+ /* perform callback */
+ nvmf_fc_poller_api_perform_cb(&q_args->cb_info, SPDK_NVMF_FC_POLLER_API_SUCCESS);
+}
+
+static void
+nvmf_fc_poller_api_activate_queue(void *arg)
+{
+ struct spdk_nvmf_fc_poller_api_quiesce_queue_args *q_args =
+ (struct spdk_nvmf_fc_poller_api_quiesce_queue_args *) arg;
+
+ q_args->hwqp->state = SPDK_FC_HWQP_ONLINE;
+
+ /* perform callback */
+ nvmf_fc_poller_api_perform_cb(&q_args->cb_info, 0);
+}
+
+static void
+nvmf_fc_disconnect_qpair_cb(void *ctx)
+{
+ struct spdk_nvmf_fc_poller_api_cb_info *cb_info = ctx;
+ /* perform callback */
+ nvmf_fc_poller_api_perform_cb(cb_info, SPDK_NVMF_FC_POLLER_API_SUCCESS);
+}
+
+static void
+nvmf_fc_poller_conn_abort_done(void *hwqp, int32_t status, void *cb_args)
+{
+ struct spdk_nvmf_fc_poller_api_del_connection_args *conn_args = cb_args;
+
+ if (conn_args->fc_request_cnt) {
+ conn_args->fc_request_cnt -= 1;
+ }
+
+ if (!conn_args->fc_request_cnt) {
+ if (!TAILQ_EMPTY(&conn_args->hwqp->connection_list)) {
+ /* All the requests for this connection are aborted. */
+ TAILQ_REMOVE(&conn_args->hwqp->connection_list, conn_args->fc_conn, link);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_POLLER_API, "Connection deleted, conn_id 0x%lx\n",
+ conn_args->fc_conn->conn_id);
+
+ if (!conn_args->backend_initiated) {
+ /* disconnect qpair from nvmf controller */
+ spdk_nvmf_qpair_disconnect(&conn_args->fc_conn->qpair,
+ nvmf_fc_disconnect_qpair_cb, &conn_args->cb_info);
+ }
+ } else {
+ /*
+ * Duplicate connection delete can happen if one is
+ * coming in via an association disconnect and the other
+ * is initiated by a port reset.
+ */
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_POLLER_API, "Duplicate conn delete.");
+ /* perform callback */
+ nvmf_fc_poller_api_perform_cb(&conn_args->cb_info, SPDK_NVMF_FC_POLLER_API_SUCCESS);
+ }
+ }
+}
+
+static void
+nvmf_fc_poller_api_del_connection(void *arg)
+{
+ struct spdk_nvmf_fc_poller_api_del_connection_args *conn_args =
+ (struct spdk_nvmf_fc_poller_api_del_connection_args *)arg;
+ struct spdk_nvmf_fc_conn *fc_conn;
+ struct spdk_nvmf_fc_request *fc_req = NULL, *tmp;
+ struct spdk_nvmf_fc_hwqp *hwqp = conn_args->hwqp;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_POLLER_API, "Poller delete connection, conn_id 0x%lx\n",
+ conn_args->fc_conn->conn_id);
+
+ /* find the connection in poller's list */
+ fc_conn = nvmf_fc_hwqp_find_fc_conn(hwqp, conn_args->fc_conn->conn_id);
+ if (!fc_conn) {
+ /* perform callback */
+ nvmf_fc_poller_api_perform_cb(&conn_args->cb_info, SPDK_NVMF_FC_POLLER_API_NO_CONN_ID);
+ return;
+ }
+
+ conn_args->fc_request_cnt = 0;
+
+ TAILQ_FOREACH_SAFE(fc_req, &hwqp->in_use_reqs, link, tmp) {
+ if (fc_req->fc_conn->conn_id == fc_conn->conn_id) {
+ if (nvmf_qpair_is_admin_queue(&fc_conn->qpair) &&
+ (fc_req->req.cmd->nvme_cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST)) {
+ /* AER will be cleaned by spdk_nvmf_qpair_disconnect. */
+ continue;
+ }
+
+ conn_args->fc_request_cnt += 1;
+ nvmf_fc_request_abort(fc_req, conn_args->send_abts,
+ nvmf_fc_poller_conn_abort_done,
+ conn_args);
+ }
+ }
+
+ if (!conn_args->fc_request_cnt) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_POLLER_API, "Connection deleted.\n");
+ TAILQ_REMOVE(&hwqp->connection_list, fc_conn, link);
+
+ if (!conn_args->backend_initiated) {
+ /* disconnect qpair from nvmf controller */
+ spdk_nvmf_qpair_disconnect(&fc_conn->qpair, nvmf_fc_disconnect_qpair_cb,
+ &conn_args->cb_info);
+ }
+ }
+}
+
+static void
+nvmf_fc_poller_abts_done(void *hwqp, int32_t status, void *cb_args)
+{
+ struct spdk_nvmf_fc_poller_api_abts_recvd_args *args = cb_args;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_POLLER_API,
+ "ABTS poller done, rpi: 0x%x, oxid: 0x%x, rxid: 0x%x\n",
+ args->ctx->rpi, args->ctx->oxid, args->ctx->rxid);
+
+ nvmf_fc_poller_api_perform_cb(&args->cb_info,
+ SPDK_NVMF_FC_POLLER_API_SUCCESS);
+}
+
+static void
+nvmf_fc_poller_api_abts_received(void *arg)
+{
+ struct spdk_nvmf_fc_poller_api_abts_recvd_args *args = arg;
+ struct spdk_nvmf_fc_request *fc_req = NULL;
+ struct spdk_nvmf_fc_hwqp *hwqp = args->hwqp;
+
+ TAILQ_FOREACH(fc_req, &hwqp->in_use_reqs, link) {
+ if ((fc_req->rpi == args->ctx->rpi) &&
+ (fc_req->oxid == args->ctx->oxid)) {
+ nvmf_fc_request_abort(fc_req, false,
+ nvmf_fc_poller_abts_done, args);
+ return;
+ }
+ }
+
+ nvmf_fc_poller_api_perform_cb(&args->cb_info,
+ SPDK_NVMF_FC_POLLER_API_OXID_NOT_FOUND);
+}
+
+static void
+nvmf_fc_poller_api_queue_sync(void *arg)
+{
+ struct spdk_nvmf_fc_poller_api_queue_sync_args *args = arg;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_POLLER_API,
+ "HWQP sync requested for u_id = 0x%lx\n", args->u_id);
+
+ /* Add this args to hwqp sync_cb list */
+ TAILQ_INSERT_TAIL(&args->hwqp->sync_cbs, args, link);
+}
+
+static void
+nvmf_fc_poller_api_queue_sync_done(void *arg)
+{
+ struct spdk_nvmf_fc_poller_api_queue_sync_done_args *args = arg;
+ struct spdk_nvmf_fc_hwqp *hwqp = args->hwqp;
+ uint64_t tag = args->tag;
+ struct spdk_nvmf_fc_poller_api_queue_sync_args *sync_args = NULL, *tmp = NULL;
+
+ assert(args != NULL);
+
+ TAILQ_FOREACH_SAFE(sync_args, &hwqp->sync_cbs, link, tmp) {
+ if (sync_args->u_id == tag) {
+ /* Queue successfully synced. Remove from cb list */
+ TAILQ_REMOVE(&hwqp->sync_cbs, sync_args, link);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_POLLER_API,
+ "HWQP sync done for u_id = 0x%lx\n", sync_args->u_id);
+
+ /* Return the status to poller */
+ nvmf_fc_poller_api_perform_cb(&sync_args->cb_info,
+ SPDK_NVMF_FC_POLLER_API_SUCCESS);
+ return;
+ }
+ }
+
+ free(arg);
+ /* note: no callback from this api */
+}
+
+static void
+nvmf_fc_poller_api_add_hwqp(void *arg)
+{
+ struct spdk_nvmf_fc_hwqp *hwqp = (struct spdk_nvmf_fc_hwqp *)arg;
+
+ hwqp->lcore_id = spdk_env_get_current_core(); /* for tracing purposes only */
+ TAILQ_INSERT_TAIL(&hwqp->fgroup->hwqp_list, hwqp, link);
+ /* note: no callback from this api */
+}
+
+static void
+nvmf_fc_poller_api_remove_hwqp(void *arg)
+{
+ struct spdk_nvmf_fc_hwqp *hwqp = (struct spdk_nvmf_fc_hwqp *)arg;
+ struct spdk_nvmf_fc_poll_group *fgroup = hwqp->fgroup;
+
+ TAILQ_REMOVE(&fgroup->hwqp_list, hwqp, link);
+ hwqp->fgroup = NULL;
+ /* note: no callback from this api */
+}
+
+enum spdk_nvmf_fc_poller_api_ret
+nvmf_fc_poller_api_func(struct spdk_nvmf_fc_hwqp *hwqp, enum spdk_nvmf_fc_poller_api api,
+ void *api_args) {
+ switch (api)
+ {
+ case SPDK_NVMF_FC_POLLER_API_ADD_CONNECTION:
+ spdk_thread_send_msg(hwqp->thread,
+ nvmf_fc_poller_api_add_connection, api_args);
+ break;
+
+ case SPDK_NVMF_FC_POLLER_API_DEL_CONNECTION:
+ spdk_thread_send_msg(hwqp->thread,
+ nvmf_fc_poller_api_del_connection, api_args);
+ break;
+
+ case SPDK_NVMF_FC_POLLER_API_QUIESCE_QUEUE:
+ /* quiesce q polling now, don't wait for poller to do it */
+ hwqp->state = SPDK_FC_HWQP_OFFLINE;
+ spdk_thread_send_msg(hwqp->thread,
+ nvmf_fc_poller_api_quiesce_queue, api_args);
+ break;
+
+ case SPDK_NVMF_FC_POLLER_API_ACTIVATE_QUEUE:
+ spdk_thread_send_msg(hwqp->thread,
+ nvmf_fc_poller_api_activate_queue, api_args);
+ break;
+
+ case SPDK_NVMF_FC_POLLER_API_ABTS_RECEIVED:
+ spdk_thread_send_msg(hwqp->thread,
+ nvmf_fc_poller_api_abts_received, api_args);
+ break;
+
+ case SPDK_NVMF_FC_POLLER_API_REQ_ABORT_COMPLETE:
+ spdk_thread_send_msg(hwqp->thread,
+ nvmf_fc_request_abort_complete, api_args);
+ break;
+
+ case SPDK_NVMF_FC_POLLER_API_QUEUE_SYNC:
+ spdk_thread_send_msg(hwqp->thread,
+ nvmf_fc_poller_api_queue_sync, api_args);
+ break;
+
+ case SPDK_NVMF_FC_POLLER_API_QUEUE_SYNC_DONE:
+ spdk_thread_send_msg(hwqp->thread,
+ nvmf_fc_poller_api_queue_sync_done, api_args);
+ break;
+
+ case SPDK_NVMF_FC_POLLER_API_ADD_HWQP:
+ spdk_thread_send_msg(hwqp->thread, nvmf_fc_poller_api_add_hwqp, (void *) hwqp);
+ break;
+
+ case SPDK_NVMF_FC_POLLER_API_REMOVE_HWQP:
+ spdk_thread_send_msg(hwqp->thread, nvmf_fc_poller_api_remove_hwqp, (void *) hwqp);
+ break;
+
+ case SPDK_NVMF_FC_POLLER_API_ADAPTER_EVENT:
+ case SPDK_NVMF_FC_POLLER_API_AEN:
+ default:
+ SPDK_ERRLOG("BAD ARG!");
+ return SPDK_NVMF_FC_POLLER_API_INVALID_ARG;
+ }
+
+ return SPDK_NVMF_FC_POLLER_API_SUCCESS;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("nvmf_fc_poller_api", SPDK_LOG_NVMF_FC_POLLER_API)
+SPDK_LOG_REGISTER_COMPONENT("nvmf_fc_ls", SPDK_LOG_NVMF_FC_LS)
diff --git a/src/spdk/lib/nvmf/nvmf.c b/src/spdk/lib/nvmf/nvmf.c
new file mode 100644
index 000000000..73fa0742e
--- /dev/null
+++ b/src/spdk/lib/nvmf/nvmf.c
@@ -0,0 +1,1457 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2018-2019 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/bit_array.h"
+#include "spdk/conf.h"
+#include "spdk/thread.h"
+#include "spdk/nvmf.h"
+#include "spdk/trace.h"
+#include "spdk/endian.h"
+#include "spdk/string.h"
+
+#include "spdk_internal/log.h"
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+SPDK_LOG_REGISTER_COMPONENT("nvmf", SPDK_LOG_NVMF)
+
+#define SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS 1024
+
+static TAILQ_HEAD(, spdk_nvmf_tgt) g_nvmf_tgts = TAILQ_HEAD_INITIALIZER(g_nvmf_tgts);
+
+typedef void (*nvmf_qpair_disconnect_cpl)(void *ctx, int status);
+static void nvmf_tgt_destroy_poll_group(void *io_device, void *ctx_buf);
+
+/* supplied to a single call to nvmf_qpair_disconnect */
+struct nvmf_qpair_disconnect_ctx {
+ struct spdk_nvmf_qpair *qpair;
+ struct spdk_nvmf_ctrlr *ctrlr;
+ nvmf_qpair_disconnect_cb cb_fn;
+ struct spdk_thread *thread;
+ void *ctx;
+ uint16_t qid;
+};
+
+/*
+ * There are several times when we need to iterate through the list of all qpairs and selectively delete them.
+ * In order to do this sequentially without overlap, we must provide a context to recover the next qpair from
+ * to enable calling nvmf_qpair_disconnect on the next desired qpair.
+ */
+struct nvmf_qpair_disconnect_many_ctx {
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_poll_group *group;
+ spdk_nvmf_poll_group_mod_done cpl_fn;
+ void *cpl_ctx;
+};
+
+static void
+nvmf_qpair_set_state(struct spdk_nvmf_qpair *qpair,
+ enum spdk_nvmf_qpair_state state)
+{
+ assert(qpair != NULL);
+ assert(qpair->group->thread == spdk_get_thread());
+
+ qpair->state = state;
+}
+
+static int
+nvmf_poll_group_poll(void *ctx)
+{
+ struct spdk_nvmf_poll_group *group = ctx;
+ int rc;
+ int count = 0;
+ struct spdk_nvmf_transport_poll_group *tgroup;
+
+ TAILQ_FOREACH(tgroup, &group->tgroups, link) {
+ rc = nvmf_transport_poll_group_poll(tgroup);
+ if (rc < 0) {
+ return SPDK_POLLER_BUSY;
+ }
+ count += rc;
+ }
+
+ return count > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
+}
+
+static int
+nvmf_tgt_create_poll_group(void *io_device, void *ctx_buf)
+{
+ struct spdk_nvmf_tgt *tgt = io_device;
+ struct spdk_nvmf_poll_group *group = ctx_buf;
+ struct spdk_nvmf_transport *transport;
+ uint32_t sid;
+
+ TAILQ_INIT(&group->tgroups);
+ TAILQ_INIT(&group->qpairs);
+
+ TAILQ_FOREACH(transport, &tgt->transports, link) {
+ nvmf_poll_group_add_transport(group, transport);
+ }
+
+ group->num_sgroups = tgt->max_subsystems;
+ group->sgroups = calloc(tgt->max_subsystems, sizeof(struct spdk_nvmf_subsystem_poll_group));
+ if (!group->sgroups) {
+ return -ENOMEM;
+ }
+
+ for (sid = 0; sid < tgt->max_subsystems; sid++) {
+ struct spdk_nvmf_subsystem *subsystem;
+
+ subsystem = tgt->subsystems[sid];
+ if (!subsystem) {
+ continue;
+ }
+
+ if (nvmf_poll_group_add_subsystem(group, subsystem, NULL, NULL) != 0) {
+ nvmf_tgt_destroy_poll_group(io_device, ctx_buf);
+ return -1;
+ }
+ }
+
+ pthread_mutex_lock(&tgt->mutex);
+ TAILQ_INSERT_TAIL(&tgt->poll_groups, group, link);
+ pthread_mutex_unlock(&tgt->mutex);
+
+ group->poller = SPDK_POLLER_REGISTER(nvmf_poll_group_poll, group, 0);
+ group->thread = spdk_get_thread();
+
+ return 0;
+}
+
+static void
+nvmf_tgt_destroy_poll_group(void *io_device, void *ctx_buf)
+{
+ struct spdk_nvmf_tgt *tgt = io_device;
+ struct spdk_nvmf_poll_group *group = ctx_buf;
+ struct spdk_nvmf_transport_poll_group *tgroup, *tmp;
+ struct spdk_nvmf_subsystem_poll_group *sgroup;
+ uint32_t sid, nsid;
+
+ pthread_mutex_lock(&tgt->mutex);
+ TAILQ_REMOVE(&tgt->poll_groups, group, link);
+ pthread_mutex_unlock(&tgt->mutex);
+
+ TAILQ_FOREACH_SAFE(tgroup, &group->tgroups, link, tmp) {
+ TAILQ_REMOVE(&group->tgroups, tgroup, link);
+ nvmf_transport_poll_group_destroy(tgroup);
+ }
+
+ for (sid = 0; sid < group->num_sgroups; sid++) {
+ sgroup = &group->sgroups[sid];
+
+ for (nsid = 0; nsid < sgroup->num_ns; nsid++) {
+ if (sgroup->ns_info[nsid].channel) {
+ spdk_put_io_channel(sgroup->ns_info[nsid].channel);
+ sgroup->ns_info[nsid].channel = NULL;
+ }
+ }
+
+ free(sgroup->ns_info);
+ }
+
+ free(group->sgroups);
+
+ if (group->destroy_cb_fn) {
+ group->destroy_cb_fn(group->destroy_cb_arg, 0);
+ }
+}
+
+static void
+_nvmf_tgt_disconnect_next_qpair(void *ctx)
+{
+ struct spdk_nvmf_qpair *qpair;
+ struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
+ struct spdk_nvmf_poll_group *group = qpair_ctx->group;
+ struct spdk_io_channel *ch;
+ int rc = 0;
+
+ qpair = TAILQ_FIRST(&group->qpairs);
+
+ if (qpair) {
+ rc = spdk_nvmf_qpair_disconnect(qpair, _nvmf_tgt_disconnect_next_qpair, ctx);
+ }
+
+ if (!qpair || rc != 0) {
+ /* When the refcount from the channels reaches 0, nvmf_tgt_destroy_poll_group will be called. */
+ ch = spdk_io_channel_from_ctx(group);
+ spdk_put_io_channel(ch);
+ free(qpair_ctx);
+ }
+}
+
+static void
+nvmf_tgt_destroy_poll_group_qpairs(struct spdk_nvmf_poll_group *group)
+{
+ struct nvmf_qpair_disconnect_many_ctx *ctx;
+
+ ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_many_ctx));
+
+ if (!ctx) {
+ SPDK_ERRLOG("Failed to allocate memory for destroy poll group ctx\n");
+ return;
+ }
+
+ spdk_poller_unregister(&group->poller);
+
+ ctx->group = group;
+ _nvmf_tgt_disconnect_next_qpair(ctx);
+}
+
+struct spdk_nvmf_tgt *
+spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts)
+{
+ struct spdk_nvmf_tgt *tgt, *tmp_tgt;
+
+ if (strnlen(opts->name, NVMF_TGT_NAME_MAX_LENGTH) == NVMF_TGT_NAME_MAX_LENGTH) {
+ SPDK_ERRLOG("Provided target name exceeds the max length of %u.\n", NVMF_TGT_NAME_MAX_LENGTH);
+ return NULL;
+ }
+
+ TAILQ_FOREACH(tmp_tgt, &g_nvmf_tgts, link) {
+ if (!strncmp(opts->name, tmp_tgt->name, NVMF_TGT_NAME_MAX_LENGTH)) {
+ SPDK_ERRLOG("Provided target name must be unique.\n");
+ return NULL;
+ }
+ }
+
+ tgt = calloc(1, sizeof(*tgt));
+ if (!tgt) {
+ return NULL;
+ }
+
+ snprintf(tgt->name, NVMF_TGT_NAME_MAX_LENGTH, "%s", opts->name);
+
+ if (!opts || !opts->max_subsystems) {
+ tgt->max_subsystems = SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS;
+ } else {
+ tgt->max_subsystems = opts->max_subsystems;
+ }
+
+ tgt->discovery_genctr = 0;
+ TAILQ_INIT(&tgt->transports);
+ TAILQ_INIT(&tgt->poll_groups);
+
+ tgt->subsystems = calloc(tgt->max_subsystems, sizeof(struct spdk_nvmf_subsystem *));
+ if (!tgt->subsystems) {
+ free(tgt);
+ return NULL;
+ }
+
+ pthread_mutex_init(&tgt->mutex, NULL);
+
+ TAILQ_INSERT_HEAD(&g_nvmf_tgts, tgt, link);
+
+ spdk_io_device_register(tgt,
+ nvmf_tgt_create_poll_group,
+ nvmf_tgt_destroy_poll_group,
+ sizeof(struct spdk_nvmf_poll_group),
+ tgt->name);
+
+ return tgt;
+}
+
+static void
+nvmf_tgt_destroy_cb(void *io_device)
+{
+ struct spdk_nvmf_tgt *tgt = io_device;
+ struct spdk_nvmf_transport *transport, *transport_tmp;
+ spdk_nvmf_tgt_destroy_done_fn *destroy_cb_fn;
+ void *destroy_cb_arg;
+ uint32_t i;
+
+ if (tgt->subsystems) {
+ for (i = 0; i < tgt->max_subsystems; i++) {
+ if (tgt->subsystems[i]) {
+ nvmf_subsystem_remove_all_listeners(tgt->subsystems[i], true);
+ spdk_nvmf_subsystem_destroy(tgt->subsystems[i]);
+ }
+ }
+ free(tgt->subsystems);
+ }
+
+ TAILQ_FOREACH_SAFE(transport, &tgt->transports, link, transport_tmp) {
+ TAILQ_REMOVE(&tgt->transports, transport, link);
+ spdk_nvmf_transport_destroy(transport);
+ }
+
+ destroy_cb_fn = tgt->destroy_cb_fn;
+ destroy_cb_arg = tgt->destroy_cb_arg;
+
+ free(tgt);
+
+ if (destroy_cb_fn) {
+ destroy_cb_fn(destroy_cb_arg, 0);
+ }
+}
+
+void
+spdk_nvmf_tgt_destroy(struct spdk_nvmf_tgt *tgt,
+ spdk_nvmf_tgt_destroy_done_fn cb_fn,
+ void *cb_arg)
+{
+ tgt->destroy_cb_fn = cb_fn;
+ tgt->destroy_cb_arg = cb_arg;
+
+ TAILQ_REMOVE(&g_nvmf_tgts, tgt, link);
+
+ spdk_io_device_unregister(tgt, nvmf_tgt_destroy_cb);
+}
+
+const char *
+spdk_nvmf_tgt_get_name(struct spdk_nvmf_tgt *tgt)
+{
+ return tgt->name;
+}
+
+struct spdk_nvmf_tgt *
+spdk_nvmf_get_tgt(const char *name)
+{
+ struct spdk_nvmf_tgt *tgt;
+ uint32_t num_targets = 0;
+
+ TAILQ_FOREACH(tgt, &g_nvmf_tgts, link) {
+ if (name) {
+ if (!strncmp(tgt->name, name, NVMF_TGT_NAME_MAX_LENGTH)) {
+ return tgt;
+ }
+ }
+ num_targets++;
+ }
+
+ /*
+ * special case. If there is only one target and
+ * no name was specified, return the only available
+ * target. If there is more than one target, name must
+ * be specified.
+ */
+ if (!name && num_targets == 1) {
+ return TAILQ_FIRST(&g_nvmf_tgts);
+ }
+
+ return NULL;
+}
+
+struct spdk_nvmf_tgt *
+spdk_nvmf_get_first_tgt(void)
+{
+ return TAILQ_FIRST(&g_nvmf_tgts);
+}
+
+struct spdk_nvmf_tgt *
+spdk_nvmf_get_next_tgt(struct spdk_nvmf_tgt *prev)
+{
+ return TAILQ_NEXT(prev, link);
+}
+
+static void
+nvmf_write_subsystem_config_json(struct spdk_json_write_ctx *w,
+ struct spdk_nvmf_subsystem *subsystem)
+{
+ struct spdk_nvmf_host *host;
+ struct spdk_nvmf_subsystem_listener *listener;
+ const struct spdk_nvme_transport_id *trid;
+ struct spdk_nvmf_ns *ns;
+ struct spdk_nvmf_ns_opts ns_opts;
+ uint32_t max_namespaces;
+ char uuid_str[SPDK_UUID_STRING_LEN];
+ const char *adrfam;
+
+ if (spdk_nvmf_subsystem_get_type(subsystem) != SPDK_NVMF_SUBTYPE_NVME) {
+ return;
+ }
+
+ /* { */
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "method", "nvmf_create_subsystem");
+
+ /* "params" : { */
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
+ spdk_json_write_named_bool(w, "allow_any_host", spdk_nvmf_subsystem_get_allow_any_host(subsystem));
+ spdk_json_write_named_string(w, "serial_number", spdk_nvmf_subsystem_get_sn(subsystem));
+ spdk_json_write_named_string(w, "model_number", spdk_nvmf_subsystem_get_mn(subsystem));
+
+ max_namespaces = spdk_nvmf_subsystem_get_max_namespaces(subsystem);
+ if (max_namespaces != 0) {
+ spdk_json_write_named_uint32(w, "max_namespaces", max_namespaces);
+ }
+
+ /* } "params" */
+ spdk_json_write_object_end(w);
+
+ /* } */
+ spdk_json_write_object_end(w);
+
+ for (listener = spdk_nvmf_subsystem_get_first_listener(subsystem); listener != NULL;
+ listener = spdk_nvmf_subsystem_get_next_listener(subsystem, listener)) {
+ trid = spdk_nvmf_subsystem_listener_get_trid(listener);
+
+ adrfam = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
+
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_listener");
+
+ /* "params" : { */
+ spdk_json_write_named_object_begin(w, "params");
+
+ spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
+
+ /* "listen_address" : { */
+ spdk_json_write_named_object_begin(w, "listen_address");
+
+ spdk_json_write_named_string(w, "trtype", trid->trstring);
+ if (adrfam) {
+ spdk_json_write_named_string(w, "adrfam", adrfam);
+ }
+
+ spdk_json_write_named_string(w, "traddr", trid->traddr);
+ spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
+ /* } "listen_address" */
+ spdk_json_write_object_end(w);
+
+ /* } "params" */
+ spdk_json_write_object_end(w);
+
+ /* } */
+ spdk_json_write_object_end(w);
+ }
+
+ for (host = spdk_nvmf_subsystem_get_first_host(subsystem); host != NULL;
+ host = spdk_nvmf_subsystem_get_next_host(subsystem, host)) {
+
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_host");
+
+ /* "params" : { */
+ spdk_json_write_named_object_begin(w, "params");
+
+ spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
+ spdk_json_write_named_string(w, "host", spdk_nvmf_host_get_nqn(host));
+
+ /* } "params" */
+ spdk_json_write_object_end(w);
+
+ /* } */
+ spdk_json_write_object_end(w);
+ }
+
+ for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
+ ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
+ spdk_nvmf_ns_get_opts(ns, &ns_opts, sizeof(ns_opts));
+
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_ns");
+
+ /* "params" : { */
+ spdk_json_write_named_object_begin(w, "params");
+
+ spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
+
+ /* "namespace" : { */
+ spdk_json_write_named_object_begin(w, "namespace");
+
+ spdk_json_write_named_uint32(w, "nsid", spdk_nvmf_ns_get_id(ns));
+ spdk_json_write_named_string(w, "bdev_name", spdk_bdev_get_name(spdk_nvmf_ns_get_bdev(ns)));
+
+ if (!spdk_mem_all_zero(ns_opts.nguid, sizeof(ns_opts.nguid))) {
+ SPDK_STATIC_ASSERT(sizeof(ns_opts.nguid) == sizeof(uint64_t) * 2, "size mismatch");
+ spdk_json_write_named_string_fmt(w, "nguid", "%016"PRIX64"%016"PRIX64, from_be64(&ns_opts.nguid[0]),
+ from_be64(&ns_opts.nguid[8]));
+ }
+
+ if (!spdk_mem_all_zero(ns_opts.eui64, sizeof(ns_opts.eui64))) {
+ SPDK_STATIC_ASSERT(sizeof(ns_opts.eui64) == sizeof(uint64_t), "size mismatch");
+ spdk_json_write_named_string_fmt(w, "eui64", "%016"PRIX64, from_be64(&ns_opts.eui64));
+ }
+
+ if (!spdk_mem_all_zero(&ns_opts.uuid, sizeof(ns_opts.uuid))) {
+ spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &ns_opts.uuid);
+ spdk_json_write_named_string(w, "uuid", uuid_str);
+ }
+
+ /* "namespace" */
+ spdk_json_write_object_end(w);
+
+ /* } "params" */
+ spdk_json_write_object_end(w);
+
+ /* } */
+ spdk_json_write_object_end(w);
+ }
+}
+
+void
+spdk_nvmf_tgt_write_config_json(struct spdk_json_write_ctx *w, struct spdk_nvmf_tgt *tgt)
+{
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_transport *transport;
+
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "method", "nvmf_set_max_subsystems");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_uint32(w, "max_subsystems", tgt->max_subsystems);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+
+ /* write transports */
+ TAILQ_FOREACH(transport, &tgt->transports, link) {
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "method", "nvmf_create_transport");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "trtype", spdk_nvme_transport_id_trtype_str(transport->ops->type));
+ spdk_json_write_named_uint32(w, "max_queue_depth", transport->opts.max_queue_depth);
+ spdk_json_write_named_uint32(w, "max_io_qpairs_per_ctrlr",
+ transport->opts.max_qpairs_per_ctrlr - 1);
+ spdk_json_write_named_uint32(w, "in_capsule_data_size", transport->opts.in_capsule_data_size);
+ spdk_json_write_named_uint32(w, "max_io_size", transport->opts.max_io_size);
+ spdk_json_write_named_uint32(w, "io_unit_size", transport->opts.io_unit_size);
+ spdk_json_write_named_uint32(w, "max_aq_depth", transport->opts.max_aq_depth);
+ if (transport->ops->type == SPDK_NVME_TRANSPORT_RDMA) {
+ spdk_json_write_named_uint32(w, "max_srq_depth", transport->opts.max_srq_depth);
+ }
+ spdk_json_write_named_uint32(w, "abort_timeout_sec", transport->opts.abort_timeout_sec);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+ }
+
+ subsystem = spdk_nvmf_subsystem_get_first(tgt);
+ while (subsystem) {
+ nvmf_write_subsystem_config_json(w, subsystem);
+ subsystem = spdk_nvmf_subsystem_get_next(subsystem);
+ }
+}
+
+int
+spdk_nvmf_tgt_listen(struct spdk_nvmf_tgt *tgt,
+ struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_transport *transport;
+ const char *trtype;
+ int rc;
+
+ transport = spdk_nvmf_tgt_get_transport(tgt, trid->trstring);
+ if (!transport) {
+ trtype = spdk_nvme_transport_id_trtype_str(trid->trtype);
+ if (trtype != NULL) {
+ SPDK_ERRLOG("Unable to listen on transport %s. The transport must be created first.\n", trtype);
+ } else {
+ SPDK_ERRLOG("The specified trtype %d is unknown. Please make sure that it is properly registered.\n",
+ trid->trtype);
+ }
+
+ return -EINVAL;
+ }
+
+ rc = spdk_nvmf_transport_listen(transport, trid);
+ if (rc < 0) {
+ SPDK_ERRLOG("Unable to listen on address '%s'\n", trid->traddr);
+ }
+
+ return rc;
+}
+
+int
+spdk_nvmf_tgt_stop_listen(struct spdk_nvmf_tgt *tgt,
+ struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_transport *transport;
+ const char *trtype;
+ int rc;
+
+ transport = spdk_nvmf_tgt_get_transport(tgt, trid->trstring);
+ if (!transport) {
+ trtype = spdk_nvme_transport_id_trtype_str(trid->trtype);
+ if (trtype != NULL) {
+ SPDK_ERRLOG("Unable to stop listen on transport %s. The transport must be created first.\n",
+ trtype);
+ } else {
+ SPDK_ERRLOG("The specified trtype %d is unknown. Please make sure that it is properly registered.\n",
+ trid->trtype);
+ }
+ return -EINVAL;
+ }
+
+ rc = spdk_nvmf_transport_stop_listen(transport, trid);
+ if (rc < 0) {
+ SPDK_ERRLOG("Failed to stop listening on address '%s'\n", trid->traddr);
+ return rc;
+ }
+ return 0;
+}
+
+struct spdk_nvmf_tgt_add_transport_ctx {
+ struct spdk_nvmf_tgt *tgt;
+ struct spdk_nvmf_transport *transport;
+ spdk_nvmf_tgt_add_transport_done_fn cb_fn;
+ void *cb_arg;
+};
+
+static void
+_nvmf_tgt_add_transport_done(struct spdk_io_channel_iter *i, int status)
+{
+ struct spdk_nvmf_tgt_add_transport_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+
+ ctx->cb_fn(ctx->cb_arg, status);
+
+ free(ctx);
+}
+
+static void
+_nvmf_tgt_add_transport(struct spdk_io_channel_iter *i)
+{
+ struct spdk_nvmf_tgt_add_transport_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+ struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
+ struct spdk_nvmf_poll_group *group = spdk_io_channel_get_ctx(ch);
+ int rc;
+
+ rc = nvmf_poll_group_add_transport(group, ctx->transport);
+ spdk_for_each_channel_continue(i, rc);
+}
+
+void spdk_nvmf_tgt_add_transport(struct spdk_nvmf_tgt *tgt,
+ struct spdk_nvmf_transport *transport,
+ spdk_nvmf_tgt_add_transport_done_fn cb_fn,
+ void *cb_arg)
+{
+ struct spdk_nvmf_tgt_add_transport_ctx *ctx;
+
+ if (spdk_nvmf_tgt_get_transport(tgt, transport->ops->name)) {
+ cb_fn(cb_arg, -EEXIST);
+ return; /* transport already created */
+ }
+
+ transport->tgt = tgt;
+ TAILQ_INSERT_TAIL(&tgt->transports, transport, link);
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ cb_fn(cb_arg, -ENOMEM);
+ return;
+ }
+
+ ctx->tgt = tgt;
+ ctx->transport = transport;
+ ctx->cb_fn = cb_fn;
+ ctx->cb_arg = cb_arg;
+
+ spdk_for_each_channel(tgt,
+ _nvmf_tgt_add_transport,
+ ctx,
+ _nvmf_tgt_add_transport_done);
+}
+
+struct spdk_nvmf_subsystem *
+spdk_nvmf_tgt_find_subsystem(struct spdk_nvmf_tgt *tgt, const char *subnqn)
+{
+ struct spdk_nvmf_subsystem *subsystem;
+ uint32_t sid;
+
+ if (!subnqn) {
+ return NULL;
+ }
+
+ /* Ensure that subnqn is null terminated */
+ if (!memchr(subnqn, '\0', SPDK_NVMF_NQN_MAX_LEN + 1)) {
+ SPDK_ERRLOG("Connect SUBNQN is not null terminated\n");
+ return NULL;
+ }
+
+ for (sid = 0; sid < tgt->max_subsystems; sid++) {
+ subsystem = tgt->subsystems[sid];
+ if (subsystem == NULL) {
+ continue;
+ }
+
+ if (strcmp(subnqn, subsystem->subnqn) == 0) {
+ return subsystem;
+ }
+ }
+
+ return NULL;
+}
+
+struct spdk_nvmf_transport *
+spdk_nvmf_tgt_get_transport(struct spdk_nvmf_tgt *tgt, const char *transport_name)
+{
+ struct spdk_nvmf_transport *transport;
+
+ TAILQ_FOREACH(transport, &tgt->transports, link) {
+ if (!strncasecmp(transport->ops->name, transport_name, SPDK_NVMF_TRSTRING_MAX_LEN)) {
+ return transport;
+ }
+ }
+ return NULL;
+}
+
+struct nvmf_new_qpair_ctx {
+ struct spdk_nvmf_qpair *qpair;
+ struct spdk_nvmf_poll_group *group;
+};
+
+static void
+_nvmf_poll_group_add(void *_ctx)
+{
+ struct nvmf_new_qpair_ctx *ctx = _ctx;
+ struct spdk_nvmf_qpair *qpair = ctx->qpair;
+ struct spdk_nvmf_poll_group *group = ctx->group;
+
+ free(_ctx);
+
+ if (spdk_nvmf_poll_group_add(group, qpair) != 0) {
+ SPDK_ERRLOG("Unable to add the qpair to a poll group.\n");
+ spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
+ }
+}
+
+void
+spdk_nvmf_tgt_new_qpair(struct spdk_nvmf_tgt *tgt, struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_poll_group *group;
+ struct nvmf_new_qpair_ctx *ctx;
+
+ group = spdk_nvmf_get_optimal_poll_group(qpair);
+ if (group == NULL) {
+ if (tgt->next_poll_group == NULL) {
+ tgt->next_poll_group = TAILQ_FIRST(&tgt->poll_groups);
+ if (tgt->next_poll_group == NULL) {
+ SPDK_ERRLOG("No poll groups exist.\n");
+ spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
+ return;
+ }
+ }
+ group = tgt->next_poll_group;
+ tgt->next_poll_group = TAILQ_NEXT(group, link);
+ }
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ SPDK_ERRLOG("Unable to send message to poll group.\n");
+ spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
+ return;
+ }
+
+ ctx->qpair = qpair;
+ ctx->group = group;
+
+ spdk_thread_send_msg(group->thread, _nvmf_poll_group_add, ctx);
+}
+
+uint32_t
+spdk_nvmf_tgt_accept(struct spdk_nvmf_tgt *tgt)
+{
+ struct spdk_nvmf_transport *transport, *tmp;
+ uint32_t count = 0;
+
+ TAILQ_FOREACH_SAFE(transport, &tgt->transports, link, tmp) {
+ count += nvmf_transport_accept(transport);
+ }
+
+ return count;
+}
+
+struct spdk_nvmf_poll_group *
+spdk_nvmf_poll_group_create(struct spdk_nvmf_tgt *tgt)
+{
+ struct spdk_io_channel *ch;
+
+ ch = spdk_get_io_channel(tgt);
+ if (!ch) {
+ SPDK_ERRLOG("Unable to get I/O channel for target\n");
+ return NULL;
+ }
+
+ return spdk_io_channel_get_ctx(ch);
+}
+
+void
+spdk_nvmf_poll_group_destroy(struct spdk_nvmf_poll_group *group,
+ spdk_nvmf_poll_group_destroy_done_fn cb_fn,
+ void *cb_arg)
+{
+ assert(group->destroy_cb_fn == NULL);
+ group->destroy_cb_fn = cb_fn;
+ group->destroy_cb_arg = cb_arg;
+
+ /* This function will put the io_channel associated with this poll group */
+ nvmf_tgt_destroy_poll_group_qpairs(group);
+}
+
+int
+spdk_nvmf_poll_group_add(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_qpair *qpair)
+{
+ int rc = -1;
+ struct spdk_nvmf_transport_poll_group *tgroup;
+
+ TAILQ_INIT(&qpair->outstanding);
+ qpair->group = group;
+
+ TAILQ_FOREACH(tgroup, &group->tgroups, link) {
+ if (tgroup->transport == qpair->transport) {
+ rc = nvmf_transport_poll_group_add(tgroup, qpair);
+ break;
+ }
+ }
+
+ /* We add the qpair to the group only it is succesfully added into the tgroup */
+ if (rc == 0) {
+ TAILQ_INSERT_TAIL(&group->qpairs, qpair, link);
+ nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_ACTIVE);
+ }
+
+ return rc;
+}
+
+static
+void _nvmf_ctrlr_destruct(void *ctx)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = ctx;
+
+ nvmf_ctrlr_destruct(ctrlr);
+}
+
+static void
+_nvmf_transport_qpair_fini(void *ctx)
+{
+ struct spdk_nvmf_qpair *qpair = ctx;
+
+ nvmf_transport_qpair_fini(qpair);
+}
+
+static void
+_nvmf_ctrlr_free_from_qpair(void *ctx)
+{
+ struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
+ struct spdk_nvmf_ctrlr *ctrlr = qpair_ctx->ctrlr;
+ uint32_t count;
+
+ spdk_bit_array_clear(ctrlr->qpair_mask, qpair_ctx->qid);
+ count = spdk_bit_array_count_set(ctrlr->qpair_mask);
+ if (count == 0) {
+ spdk_bit_array_free(&ctrlr->qpair_mask);
+
+ spdk_thread_send_msg(ctrlr->subsys->thread, _nvmf_ctrlr_destruct, ctrlr);
+ }
+
+ spdk_thread_send_msg(qpair_ctx->thread, _nvmf_transport_qpair_fini, qpair_ctx->qpair);
+ if (qpair_ctx->cb_fn) {
+ spdk_thread_send_msg(qpair_ctx->thread, qpair_ctx->cb_fn, qpair_ctx->ctx);
+ }
+ free(qpair_ctx);
+}
+
+void
+spdk_nvmf_poll_group_remove(struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+ struct spdk_nvmf_transport_poll_group *tgroup;
+ struct spdk_nvmf_request *req, *tmp;
+ struct spdk_nvmf_subsystem_poll_group *sgroup;
+ int rc;
+
+ nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_ERROR);
+
+ /* Find the tgroup and remove the qpair from the tgroup */
+ TAILQ_FOREACH(tgroup, &qpair->group->tgroups, link) {
+ if (tgroup->transport == qpair->transport) {
+ rc = nvmf_transport_poll_group_remove(tgroup, qpair);
+ if (rc && (rc != ENOTSUP)) {
+ SPDK_ERRLOG("Cannot remove qpair=%p from transport group=%p\n",
+ qpair, tgroup);
+ }
+ break;
+ }
+ }
+
+ if (ctrlr) {
+ sgroup = &qpair->group->sgroups[ctrlr->subsys->id];
+ TAILQ_FOREACH_SAFE(req, &sgroup->queued, link, tmp) {
+ if (req->qpair == qpair) {
+ TAILQ_REMOVE(&sgroup->queued, req, link);
+ if (nvmf_transport_req_free(req)) {
+ SPDK_ERRLOG("Transport request free error!\n");
+ }
+ }
+ }
+ }
+
+ TAILQ_REMOVE(&qpair->group->qpairs, qpair, link);
+ qpair->group = NULL;
+}
+
+static void
+_nvmf_qpair_destroy(void *ctx, int status)
+{
+ struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
+ struct spdk_nvmf_qpair *qpair = qpair_ctx->qpair;
+ struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+
+ assert(qpair->state == SPDK_NVMF_QPAIR_DEACTIVATING);
+ qpair_ctx->qid = qpair->qid;
+
+ spdk_nvmf_poll_group_remove(qpair);
+
+ if (!ctrlr || !ctrlr->thread) {
+ nvmf_transport_qpair_fini(qpair);
+ if (qpair_ctx->cb_fn) {
+ spdk_thread_send_msg(qpair_ctx->thread, qpair_ctx->cb_fn, qpair_ctx->ctx);
+ }
+ free(qpair_ctx);
+ return;
+ }
+
+ qpair_ctx->ctrlr = ctrlr;
+ spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_free_from_qpair, qpair_ctx);
+}
+
+int
+spdk_nvmf_qpair_disconnect(struct spdk_nvmf_qpair *qpair, nvmf_qpair_disconnect_cb cb_fn, void *ctx)
+{
+ struct nvmf_qpair_disconnect_ctx *qpair_ctx;
+
+ /* If we get a qpair in the uninitialized state, we can just destroy it immediately */
+ if (qpair->state == SPDK_NVMF_QPAIR_UNINITIALIZED) {
+ nvmf_transport_qpair_fini(qpair);
+ if (cb_fn) {
+ cb_fn(ctx);
+ }
+ return 0;
+ }
+
+ /* The queue pair must be disconnected from the thread that owns it */
+ assert(qpair->group->thread == spdk_get_thread());
+
+ if (qpair->state != SPDK_NVMF_QPAIR_ACTIVE) {
+ /* This can occur if the connection is killed by the target,
+ * which results in a notification that the connection
+ * died. Send a message to defer the processing of this
+ * callback. This allows the stack to unwind in the case
+ * where a bunch of connections are disconnected in
+ * a loop. */
+ if (cb_fn) {
+ spdk_thread_send_msg(qpair->group->thread, cb_fn, ctx);
+ }
+ return 0;
+ }
+
+ assert(qpair->state == SPDK_NVMF_QPAIR_ACTIVE);
+ nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_DEACTIVATING);
+
+ qpair_ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_ctx));
+ if (!qpair_ctx) {
+ SPDK_ERRLOG("Unable to allocate context for nvmf_qpair_disconnect\n");
+ return -ENOMEM;
+ }
+
+ qpair_ctx->qpair = qpair;
+ qpair_ctx->cb_fn = cb_fn;
+ qpair_ctx->thread = qpair->group->thread;
+ qpair_ctx->ctx = ctx;
+
+ /* Check for outstanding I/O */
+ if (!TAILQ_EMPTY(&qpair->outstanding)) {
+ qpair->state_cb = _nvmf_qpair_destroy;
+ qpair->state_cb_arg = qpair_ctx;
+ nvmf_qpair_free_aer(qpair);
+ return 0;
+ }
+
+ _nvmf_qpair_destroy(qpair_ctx, 0);
+
+ return 0;
+}
+
+int
+spdk_nvmf_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ return nvmf_transport_qpair_get_peer_trid(qpair, trid);
+}
+
+int
+spdk_nvmf_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ return nvmf_transport_qpair_get_local_trid(qpair, trid);
+}
+
+int
+spdk_nvmf_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ return nvmf_transport_qpair_get_listen_trid(qpair, trid);
+}
+
+int
+nvmf_poll_group_add_transport(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_transport *transport)
+{
+ struct spdk_nvmf_transport_poll_group *tgroup;
+
+ TAILQ_FOREACH(tgroup, &group->tgroups, link) {
+ if (tgroup->transport == transport) {
+ /* Transport already in the poll group */
+ return 0;
+ }
+ }
+
+ tgroup = nvmf_transport_poll_group_create(transport);
+ if (!tgroup) {
+ SPDK_ERRLOG("Unable to create poll group for transport\n");
+ return -1;
+ }
+
+ tgroup->group = group;
+ TAILQ_INSERT_TAIL(&group->tgroups, tgroup, link);
+
+ return 0;
+}
+
+static int
+poll_group_update_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem)
+{
+ struct spdk_nvmf_subsystem_poll_group *sgroup;
+ uint32_t new_num_ns, old_num_ns;
+ uint32_t i, j;
+ struct spdk_nvmf_ns *ns;
+ struct spdk_nvmf_registrant *reg, *tmp;
+ struct spdk_io_channel *ch;
+ struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
+ struct spdk_nvmf_ctrlr *ctrlr;
+ bool ns_changed;
+
+ /* Make sure our poll group has memory for this subsystem allocated */
+ if (subsystem->id >= group->num_sgroups) {
+ return -ENOMEM;
+ }
+
+ sgroup = &group->sgroups[subsystem->id];
+
+ /* Make sure the array of namespace information is the correct size */
+ new_num_ns = subsystem->max_nsid;
+ old_num_ns = sgroup->num_ns;
+
+ ns_changed = false;
+
+ if (old_num_ns == 0) {
+ if (new_num_ns > 0) {
+ /* First allocation */
+ sgroup->ns_info = calloc(new_num_ns, sizeof(struct spdk_nvmf_subsystem_pg_ns_info));
+ if (!sgroup->ns_info) {
+ return -ENOMEM;
+ }
+ }
+ } else if (new_num_ns > old_num_ns) {
+ void *buf;
+
+ /* Make the array larger */
+ buf = realloc(sgroup->ns_info, new_num_ns * sizeof(struct spdk_nvmf_subsystem_pg_ns_info));
+ if (!buf) {
+ return -ENOMEM;
+ }
+
+ sgroup->ns_info = buf;
+
+ /* Null out the new namespace information slots */
+ for (i = old_num_ns; i < new_num_ns; i++) {
+ memset(&sgroup->ns_info[i], 0, sizeof(struct spdk_nvmf_subsystem_pg_ns_info));
+ }
+ } else if (new_num_ns < old_num_ns) {
+ void *buf;
+
+ /* Free the extra I/O channels */
+ for (i = new_num_ns; i < old_num_ns; i++) {
+ ns_info = &sgroup->ns_info[i];
+
+ if (ns_info->channel) {
+ spdk_put_io_channel(ns_info->channel);
+ ns_info->channel = NULL;
+ }
+ }
+
+ /* Make the array smaller */
+ if (new_num_ns > 0) {
+ buf = realloc(sgroup->ns_info, new_num_ns * sizeof(struct spdk_nvmf_subsystem_pg_ns_info));
+ if (!buf) {
+ return -ENOMEM;
+ }
+ sgroup->ns_info = buf;
+ } else {
+ free(sgroup->ns_info);
+ sgroup->ns_info = NULL;
+ }
+ }
+
+ sgroup->num_ns = new_num_ns;
+
+ /* Detect bdevs that were added or removed */
+ for (i = 0; i < sgroup->num_ns; i++) {
+ ns = subsystem->ns[i];
+ ns_info = &sgroup->ns_info[i];
+ ch = ns_info->channel;
+
+ if (ns == NULL && ch == NULL) {
+ /* Both NULL. Leave empty */
+ } else if (ns == NULL && ch != NULL) {
+ /* There was a channel here, but the namespace is gone. */
+ ns_changed = true;
+ spdk_put_io_channel(ch);
+ ns_info->channel = NULL;
+ } else if (ns != NULL && ch == NULL) {
+ /* A namespace appeared but there is no channel yet */
+ ns_changed = true;
+ ch = spdk_bdev_get_io_channel(ns->desc);
+ if (ch == NULL) {
+ SPDK_ERRLOG("Could not allocate I/O channel.\n");
+ return -ENOMEM;
+ }
+ ns_info->channel = ch;
+ } else if (spdk_uuid_compare(&ns_info->uuid, spdk_bdev_get_uuid(ns->bdev)) != 0) {
+ /* A namespace was here before, but was replaced by a new one. */
+ ns_changed = true;
+ spdk_put_io_channel(ns_info->channel);
+ memset(ns_info, 0, sizeof(*ns_info));
+
+ ch = spdk_bdev_get_io_channel(ns->desc);
+ if (ch == NULL) {
+ SPDK_ERRLOG("Could not allocate I/O channel.\n");
+ return -ENOMEM;
+ }
+ ns_info->channel = ch;
+ } else if (ns_info->num_blocks != spdk_bdev_get_num_blocks(ns->bdev)) {
+ /* Namespace is still there but size has changed */
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Namespace resized: subsystem_id %d,"
+ " nsid %u, pg %p, old %lu, new %lu\n",
+ subsystem->id,
+ ns->nsid,
+ group,
+ ns_info->num_blocks,
+ spdk_bdev_get_num_blocks(ns->bdev));
+ ns_changed = true;
+ }
+
+ if (ns == NULL) {
+ memset(ns_info, 0, sizeof(*ns_info));
+ } else {
+ ns_info->uuid = *spdk_bdev_get_uuid(ns->bdev);
+ ns_info->num_blocks = spdk_bdev_get_num_blocks(ns->bdev);
+ ns_info->crkey = ns->crkey;
+ ns_info->rtype = ns->rtype;
+ if (ns->holder) {
+ ns_info->holder_id = ns->holder->hostid;
+ }
+
+ memset(&ns_info->reg_hostid, 0, SPDK_NVMF_MAX_NUM_REGISTRANTS * sizeof(struct spdk_uuid));
+ j = 0;
+ TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
+ if (j >= SPDK_NVMF_MAX_NUM_REGISTRANTS) {
+ SPDK_ERRLOG("Maximum %u registrants can support.\n", SPDK_NVMF_MAX_NUM_REGISTRANTS);
+ return -EINVAL;
+ }
+ ns_info->reg_hostid[j++] = reg->hostid;
+ }
+ }
+ }
+
+ if (ns_changed) {
+ TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
+ if (ctrlr->admin_qpair->group == group) {
+ nvmf_ctrlr_async_event_ns_notice(ctrlr);
+ }
+ }
+ }
+
+ return 0;
+}
+
+int
+nvmf_poll_group_update_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem)
+{
+ return poll_group_update_subsystem(group, subsystem);
+}
+
+int
+nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
+{
+ int rc = 0;
+ struct spdk_nvmf_subsystem_poll_group *sgroup = &group->sgroups[subsystem->id];
+
+ TAILQ_INIT(&sgroup->queued);
+
+ rc = poll_group_update_subsystem(group, subsystem);
+ if (rc) {
+ nvmf_poll_group_remove_subsystem(group, subsystem, NULL, NULL);
+ goto fini;
+ }
+
+ sgroup->state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
+fini:
+ if (cb_fn) {
+ cb_fn(cb_arg, rc);
+ }
+
+ return rc;
+}
+
+static void
+_nvmf_poll_group_remove_subsystem_cb(void *ctx, int status)
+{
+ struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_poll_group *group;
+ struct spdk_nvmf_subsystem_poll_group *sgroup;
+ spdk_nvmf_poll_group_mod_done cpl_fn = NULL;
+ void *cpl_ctx = NULL;
+ uint32_t nsid;
+
+ group = qpair_ctx->group;
+ subsystem = qpair_ctx->subsystem;
+ cpl_fn = qpair_ctx->cpl_fn;
+ cpl_ctx = qpair_ctx->cpl_ctx;
+ sgroup = &group->sgroups[subsystem->id];
+
+ if (status) {
+ goto fini;
+ }
+
+ for (nsid = 0; nsid < sgroup->num_ns; nsid++) {
+ if (sgroup->ns_info[nsid].channel) {
+ spdk_put_io_channel(sgroup->ns_info[nsid].channel);
+ sgroup->ns_info[nsid].channel = NULL;
+ }
+ }
+
+ sgroup->num_ns = 0;
+ free(sgroup->ns_info);
+ sgroup->ns_info = NULL;
+fini:
+ free(qpair_ctx);
+ if (cpl_fn) {
+ cpl_fn(cpl_ctx, status);
+ }
+}
+
+static void
+_nvmf_subsystem_disconnect_next_qpair(void *ctx)
+{
+ struct spdk_nvmf_qpair *qpair;
+ struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_poll_group *group;
+ int rc = 0;
+
+ group = qpair_ctx->group;
+ subsystem = qpair_ctx->subsystem;
+
+ TAILQ_FOREACH(qpair, &group->qpairs, link) {
+ if ((qpair->ctrlr != NULL) && (qpair->ctrlr->subsys == subsystem)) {
+ break;
+ }
+ }
+
+ if (qpair) {
+ rc = spdk_nvmf_qpair_disconnect(qpair, _nvmf_subsystem_disconnect_next_qpair, qpair_ctx);
+ }
+
+ if (!qpair || rc != 0) {
+ _nvmf_poll_group_remove_subsystem_cb(ctx, rc);
+ }
+ return;
+}
+
+void
+nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
+{
+ struct spdk_nvmf_qpair *qpair;
+ struct spdk_nvmf_subsystem_poll_group *sgroup;
+ struct nvmf_qpair_disconnect_many_ctx *ctx;
+ int rc = 0;
+
+ ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_many_ctx));
+
+ if (!ctx) {
+ SPDK_ERRLOG("Unable to allocate memory for context to remove poll subsystem\n");
+ goto fini;
+ }
+
+ ctx->group = group;
+ ctx->subsystem = subsystem;
+ ctx->cpl_fn = cb_fn;
+ ctx->cpl_ctx = cb_arg;
+
+ sgroup = &group->sgroups[subsystem->id];
+ sgroup->state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
+
+ TAILQ_FOREACH(qpair, &group->qpairs, link) {
+ if ((qpair->ctrlr != NULL) && (qpair->ctrlr->subsys == subsystem)) {
+ break;
+ }
+ }
+
+ if (qpair) {
+ rc = spdk_nvmf_qpair_disconnect(qpair, _nvmf_subsystem_disconnect_next_qpair, ctx);
+ } else {
+ /* call the callback immediately. It will handle any channel iteration */
+ _nvmf_poll_group_remove_subsystem_cb(ctx, 0);
+ }
+
+ if (rc != 0) {
+ free(ctx);
+ goto fini;
+ }
+
+ return;
+fini:
+ if (cb_fn) {
+ cb_fn(cb_arg, rc);
+ }
+}
+
+void
+nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
+{
+ struct spdk_nvmf_subsystem_poll_group *sgroup;
+ int rc = 0;
+
+ if (subsystem->id >= group->num_sgroups) {
+ rc = -1;
+ goto fini;
+ }
+
+ sgroup = &group->sgroups[subsystem->id];
+ if (sgroup == NULL) {
+ rc = -1;
+ goto fini;
+ }
+
+ assert(sgroup->state == SPDK_NVMF_SUBSYSTEM_ACTIVE);
+ sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSING;
+
+ if (sgroup->io_outstanding > 0) {
+ sgroup->cb_fn = cb_fn;
+ sgroup->cb_arg = cb_arg;
+ return;
+ }
+
+ assert(sgroup->io_outstanding == 0);
+ sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
+fini:
+ if (cb_fn) {
+ cb_fn(cb_arg, rc);
+ }
+}
+
+void
+nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
+{
+ struct spdk_nvmf_request *req, *tmp;
+ struct spdk_nvmf_subsystem_poll_group *sgroup;
+ int rc = 0;
+
+ if (subsystem->id >= group->num_sgroups) {
+ rc = -1;
+ goto fini;
+ }
+
+ sgroup = &group->sgroups[subsystem->id];
+
+ assert(sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSED);
+
+ rc = poll_group_update_subsystem(group, subsystem);
+ if (rc) {
+ goto fini;
+ }
+
+ sgroup->state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
+
+ /* Release all queued requests */
+ TAILQ_FOREACH_SAFE(req, &sgroup->queued, link, tmp) {
+ TAILQ_REMOVE(&sgroup->queued, req, link);
+ spdk_nvmf_request_exec(req);
+ }
+fini:
+ if (cb_fn) {
+ cb_fn(cb_arg, rc);
+ }
+}
+
+
+struct spdk_nvmf_poll_group *
+spdk_nvmf_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_transport_poll_group *tgroup;
+
+ tgroup = nvmf_transport_get_optimal_poll_group(qpair->transport, qpair);
+
+ if (tgroup == NULL) {
+ return NULL;
+ }
+
+ return tgroup->group;
+}
+
+int
+spdk_nvmf_poll_group_get_stat(struct spdk_nvmf_tgt *tgt,
+ struct spdk_nvmf_poll_group_stat *stat)
+{
+ struct spdk_io_channel *ch;
+ struct spdk_nvmf_poll_group *group;
+
+ if (tgt == NULL || stat == NULL) {
+ return -EINVAL;
+ }
+
+ ch = spdk_get_io_channel(tgt);
+ group = spdk_io_channel_get_ctx(ch);
+ *stat = group->stat;
+ spdk_put_io_channel(ch);
+ return 0;
+}
diff --git a/src/spdk/lib/nvmf/nvmf_fc.h b/src/spdk/lib/nvmf/nvmf_fc.h
new file mode 100644
index 000000000..10d3ef9cf
--- /dev/null
+++ b/src/spdk/lib/nvmf/nvmf_fc.h
@@ -0,0 +1,999 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (c) 2018-2019 Broadcom. All Rights Reserved.
+ * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __NVMF_FC_H__
+#define __NVMF_FC_H__
+
+#include "spdk/nvme.h"
+#include "spdk/nvmf.h"
+#include "spdk/assert.h"
+#include "spdk/nvme_spec.h"
+#include "spdk/nvmf_fc_spec.h"
+#include "spdk/thread.h"
+#include "nvmf_internal.h"
+
+#define SPDK_NVMF_FC_TR_ADDR_LEN 64
+#define NVMF_FC_INVALID_CONN_ID UINT64_MAX
+
+#define SPDK_FC_HW_DUMP_REASON_STR_MAX_SIZE 256
+#define SPDK_MAX_NUM_OF_FC_PORTS 32
+#define SPDK_NVMF_PORT_ID_MAX_LEN 32
+
+/*
+ * FC HWQP pointer
+ */
+typedef void *spdk_nvmf_fc_lld_hwqp_t;
+
+/*
+ * FC HW port states.
+ */
+enum spdk_fc_port_state {
+ SPDK_FC_PORT_OFFLINE = 0,
+ SPDK_FC_PORT_ONLINE = 1,
+ SPDK_FC_PORT_QUIESCED = 2,
+};
+
+enum spdk_fc_hwqp_state {
+ SPDK_FC_HWQP_OFFLINE = 0,
+ SPDK_FC_HWQP_ONLINE = 1,
+};
+
+/*
+ * NVMF FC Object state
+ * Add all the generic states of the object here.
+ * Specific object states can be added separately
+ */
+enum spdk_nvmf_fc_object_state {
+ SPDK_NVMF_FC_OBJECT_CREATED = 0,
+ SPDK_NVMF_FC_OBJECT_TO_BE_DELETED = 1,
+ SPDK_NVMF_FC_OBJECT_ZOMBIE = 2, /* Partial Create or Delete */
+};
+
+/*
+ * FC request state
+ */
+enum spdk_nvmf_fc_request_state {
+ SPDK_NVMF_FC_REQ_INIT = 0,
+ SPDK_NVMF_FC_REQ_READ_BDEV,
+ SPDK_NVMF_FC_REQ_READ_XFER,
+ SPDK_NVMF_FC_REQ_READ_RSP,
+ SPDK_NVMF_FC_REQ_WRITE_BUFFS,
+ SPDK_NVMF_FC_REQ_WRITE_XFER,
+ SPDK_NVMF_FC_REQ_WRITE_BDEV,
+ SPDK_NVMF_FC_REQ_WRITE_RSP,
+ SPDK_NVMF_FC_REQ_NONE_BDEV,
+ SPDK_NVMF_FC_REQ_NONE_RSP,
+ SPDK_NVMF_FC_REQ_SUCCESS,
+ SPDK_NVMF_FC_REQ_FAILED,
+ SPDK_NVMF_FC_REQ_ABORTED,
+ SPDK_NVMF_FC_REQ_BDEV_ABORTED,
+ SPDK_NVMF_FC_REQ_PENDING,
+ SPDK_NVMF_FC_REQ_MAX_STATE,
+};
+
+/*
+ * Generic DMA buffer descriptor
+ */
+struct spdk_nvmf_fc_buffer_desc {
+ void *virt;
+ uint64_t phys;
+ size_t len;
+
+ /* Internal */
+ uint32_t buf_index;
+};
+
+/*
+ * ABTS hadling context
+ */
+struct spdk_nvmf_fc_abts_ctx {
+ bool handled;
+ uint16_t hwqps_responded;
+ uint16_t rpi;
+ uint16_t oxid;
+ uint16_t rxid;
+ struct spdk_nvmf_fc_nport *nport;
+ uint16_t nport_hdl;
+ uint8_t port_hdl;
+ void *abts_poller_args;
+ void *sync_poller_args;
+ int num_hwqps;
+ bool queue_synced;
+ uint64_t u_id;
+ struct spdk_nvmf_fc_hwqp *ls_hwqp;
+ uint16_t fcp_rq_id;
+};
+
+/*
+ * NVME FC transport errors
+ */
+struct spdk_nvmf_fc_errors {
+ uint32_t no_xchg;
+ uint32_t nport_invalid;
+ uint32_t unknown_frame;
+ uint32_t wqe_cmplt_err;
+ uint32_t wqe_write_err;
+ uint32_t rq_status_err;
+ uint32_t rq_buf_len_err;
+ uint32_t rq_id_err;
+ uint32_t rq_index_err;
+ uint32_t invalid_cq_type;
+ uint32_t invalid_cq_id;
+ uint32_t fc_req_buf_err;
+ uint32_t buf_alloc_err;
+ uint32_t unexpected_err;
+ uint32_t nvme_cmd_iu_err;
+ uint32_t nvme_cmd_xfer_err;
+ uint32_t queue_entry_invalid;
+ uint32_t invalid_conn_err;
+ uint32_t fcp_rsp_failure;
+ uint32_t write_failed;
+ uint32_t read_failed;
+ uint32_t rport_invalid;
+ uint32_t num_aborted;
+ uint32_t num_abts_sent;
+};
+
+/*
+ * Send Single Request/Response Sequence.
+ */
+struct spdk_nvmf_fc_srsr_bufs {
+ void *rqst;
+ size_t rqst_len;
+ void *rsp;
+ size_t rsp_len;
+ uint16_t rpi;
+};
+
+/*
+ * Struct representing a nport
+ */
+struct spdk_nvmf_fc_nport {
+
+ uint16_t nport_hdl;
+ uint8_t port_hdl;
+ uint32_t d_id;
+ enum spdk_nvmf_fc_object_state nport_state;
+ struct spdk_nvmf_fc_wwn fc_nodename;
+ struct spdk_nvmf_fc_wwn fc_portname;
+
+ /* list of remote ports (i.e. initiators) connected to nport */
+ TAILQ_HEAD(, spdk_nvmf_fc_remote_port_info) rem_port_list;
+ uint32_t rport_count;
+
+ void *vendor_data; /* available for vendor use */
+
+ /* list of associations to nport */
+ TAILQ_HEAD(, spdk_nvmf_fc_association) fc_associations;
+ uint32_t assoc_count;
+ struct spdk_nvmf_fc_port *fc_port;
+ TAILQ_ENTRY(spdk_nvmf_fc_nport) link; /* list of nports on a hw port. */
+};
+
+/*
+ * NVMF FC Connection
+ */
+struct spdk_nvmf_fc_conn {
+ struct spdk_nvmf_qpair qpair;
+ struct spdk_nvme_transport_id trid;
+
+ uint64_t conn_id;
+ struct spdk_nvmf_fc_hwqp *hwqp;
+ uint16_t esrp_ratio;
+ uint16_t rsp_count;
+ uint32_t rsn;
+
+ /* The maximum number of I/O outstanding on this connection at one time */
+ uint16_t max_queue_depth;
+ uint16_t max_rw_depth;
+ /* The current number of I/O outstanding on this connection. This number
+ * includes all I/O from the time the capsule is first received until it is
+ * completed.
+ */
+ uint16_t cur_queue_depth;
+
+ /* number of read/write requests that are outstanding */
+ uint16_t cur_fc_rw_depth;
+
+ struct spdk_nvmf_fc_association *fc_assoc;
+
+ uint16_t rpi;
+
+ /* for association's connection list */
+ TAILQ_ENTRY(spdk_nvmf_fc_conn) assoc_link;
+
+ /* for assocations's available connection list */
+ TAILQ_ENTRY(spdk_nvmf_fc_conn) assoc_avail_link;
+
+ /* for hwqp's connection list */
+ TAILQ_ENTRY(spdk_nvmf_fc_conn) link;
+
+ /* New QP create context. */
+ struct nvmf_fc_ls_op_ctx *create_opd;
+};
+
+/*
+ * Structure for maintaining the FC exchanges
+ */
+struct spdk_nvmf_fc_xchg {
+ uint32_t xchg_id; /* The actual xchg identifier */
+
+ /* Internal */
+ TAILQ_ENTRY(spdk_nvmf_fc_xchg) link;
+ bool active;
+ bool aborted;
+ bool send_abts; /* Valid if is_aborted is set. */
+};
+
+/*
+ * FC poll group structure
+ */
+struct spdk_nvmf_fc_poll_group {
+ struct spdk_nvmf_transport_poll_group group;
+ struct spdk_nvmf_tgt *nvmf_tgt;
+ uint32_t hwqp_count; /* number of hwqp's assigned to this pg */
+ TAILQ_HEAD(, spdk_nvmf_fc_hwqp) hwqp_list;
+
+ TAILQ_ENTRY(spdk_nvmf_fc_poll_group) link;
+};
+
+/*
+ * HWQP poller structure passed from Master thread
+ */
+struct spdk_nvmf_fc_hwqp {
+ enum spdk_fc_hwqp_state state; /* queue state (for poller) */
+ uint32_t lcore_id; /* core hwqp is running on (for tracing purposes only) */
+ struct spdk_thread *thread; /* thread hwqp is running on */
+ uint32_t hwqp_id; /* A unique id (per physical port) for a hwqp */
+ uint32_t rq_size; /* receive queue size */
+ spdk_nvmf_fc_lld_hwqp_t queues; /* vendor HW queue set */
+ struct spdk_nvmf_fc_port *fc_port; /* HW port structure for these queues */
+ struct spdk_nvmf_fc_poll_group *fgroup;
+
+ /* qpair (fc_connection) list */
+ TAILQ_HEAD(, spdk_nvmf_fc_conn) connection_list;
+ uint32_t num_conns; /* number of connections to queue */
+
+ struct spdk_nvmf_fc_request *fc_reqs_buf;
+ TAILQ_HEAD(, spdk_nvmf_fc_request) free_reqs;
+ TAILQ_HEAD(, spdk_nvmf_fc_request) in_use_reqs;
+
+ struct spdk_nvmf_fc_errors counters;
+
+ /* Pending LS request waiting for FC resource */
+ TAILQ_HEAD(, spdk_nvmf_fc_ls_rqst) ls_pending_queue;
+
+ /* Sync req list */
+ TAILQ_HEAD(, spdk_nvmf_fc_poller_api_queue_sync_args) sync_cbs;
+
+ TAILQ_ENTRY(spdk_nvmf_fc_hwqp) link;
+
+ void *context; /* Vendor specific context data */
+};
+
+/*
+ * FC HW port.
+ */
+struct spdk_nvmf_fc_port {
+ uint8_t port_hdl;
+ enum spdk_fc_port_state hw_port_status;
+ uint16_t fcp_rq_id;
+ struct spdk_nvmf_fc_hwqp ls_queue;
+
+ uint32_t num_io_queues;
+ struct spdk_nvmf_fc_hwqp *io_queues;
+ /*
+ * List of nports on this HW port.
+ */
+ TAILQ_HEAD(, spdk_nvmf_fc_nport)nport_list;
+ int num_nports;
+ TAILQ_ENTRY(spdk_nvmf_fc_port) link;
+
+ struct spdk_mempool *io_resource_pool; /* Pools to store bdev_io's for this port */
+ void *port_ctx;
+};
+
+/*
+ * NVMF FC Request
+ */
+struct spdk_nvmf_fc_request {
+ struct spdk_nvmf_request req;
+ struct spdk_nvmf_fc_ersp_iu ersp;
+ uint32_t poller_lcore; /* for tracing purposes only */
+ struct spdk_thread *poller_thread;
+ uint16_t buf_index;
+ struct spdk_nvmf_fc_xchg *xchg;
+ uint16_t oxid;
+ uint16_t rpi;
+ struct spdk_nvmf_fc_conn *fc_conn;
+ struct spdk_nvmf_fc_hwqp *hwqp;
+ int state;
+ uint32_t transfered_len;
+ bool is_aborted;
+ uint32_t magic;
+ uint32_t s_id;
+ uint32_t d_id;
+ TAILQ_ENTRY(spdk_nvmf_fc_request) link;
+ STAILQ_ENTRY(spdk_nvmf_fc_request) pending_link;
+ TAILQ_HEAD(, spdk_nvmf_fc_caller_ctx) abort_cbs;
+};
+
+SPDK_STATIC_ASSERT(!offsetof(struct spdk_nvmf_fc_request, req),
+ "FC request and NVMF request address don't match.");
+
+
+/*
+ * NVMF FC Association
+ */
+struct spdk_nvmf_fc_association {
+ uint64_t assoc_id;
+ uint32_t s_id;
+ struct spdk_nvmf_fc_nport *tgtport;
+ struct spdk_nvmf_fc_remote_port_info *rport;
+ struct spdk_nvmf_subsystem *subsystem;
+ enum spdk_nvmf_fc_object_state assoc_state;
+
+ char host_id[FCNVME_ASSOC_HOSTID_LEN];
+ char host_nqn[SPDK_NVME_NQN_FIELD_SIZE];
+ char sub_nqn[SPDK_NVME_NQN_FIELD_SIZE];
+
+ struct spdk_nvmf_fc_conn *aq_conn; /* connection for admin queue */
+
+ uint16_t conn_count;
+ TAILQ_HEAD(, spdk_nvmf_fc_conn) fc_conns;
+
+ void *conns_buf;
+ TAILQ_HEAD(, spdk_nvmf_fc_conn) avail_fc_conns;
+
+ TAILQ_ENTRY(spdk_nvmf_fc_association) link;
+
+ /* for port's association free list */
+ TAILQ_ENTRY(spdk_nvmf_fc_association) port_free_assoc_list_link;
+
+ void *ls_del_op_ctx; /* delete assoc. callback list */
+
+ /* disconnect cmd buffers (sent to initiator) */
+ struct spdk_nvmf_fc_srsr_bufs *snd_disconn_bufs;
+};
+
+/*
+ * FC Remote Port
+ */
+struct spdk_nvmf_fc_remote_port_info {
+ uint32_t s_id;
+ uint32_t rpi;
+ uint32_t assoc_count;
+ struct spdk_nvmf_fc_wwn fc_nodename;
+ struct spdk_nvmf_fc_wwn fc_portname;
+ enum spdk_nvmf_fc_object_state rport_state;
+ TAILQ_ENTRY(spdk_nvmf_fc_remote_port_info) link;
+};
+
+/*
+ * Poller API error codes
+ */
+enum spdk_nvmf_fc_poller_api_ret {
+ SPDK_NVMF_FC_POLLER_API_SUCCESS = 0,
+ SPDK_NVMF_FC_POLLER_API_ERROR,
+ SPDK_NVMF_FC_POLLER_API_INVALID_ARG,
+ SPDK_NVMF_FC_POLLER_API_NO_CONN_ID,
+ SPDK_NVMF_FC_POLLER_API_DUP_CONN_ID,
+ SPDK_NVMF_FC_POLLER_API_OXID_NOT_FOUND,
+};
+
+/*
+ * Poller API definitions
+ */
+enum spdk_nvmf_fc_poller_api {
+ SPDK_NVMF_FC_POLLER_API_ADD_CONNECTION,
+ SPDK_NVMF_FC_POLLER_API_DEL_CONNECTION,
+ SPDK_NVMF_FC_POLLER_API_QUIESCE_QUEUE,
+ SPDK_NVMF_FC_POLLER_API_ACTIVATE_QUEUE,
+ SPDK_NVMF_FC_POLLER_API_ABTS_RECEIVED,
+ SPDK_NVMF_FC_POLLER_API_REQ_ABORT_COMPLETE,
+ SPDK_NVMF_FC_POLLER_API_ADAPTER_EVENT,
+ SPDK_NVMF_FC_POLLER_API_AEN,
+ SPDK_NVMF_FC_POLLER_API_QUEUE_SYNC,
+ SPDK_NVMF_FC_POLLER_API_QUEUE_SYNC_DONE,
+ SPDK_NVMF_FC_POLLER_API_ADD_HWQP,
+ SPDK_NVMF_FC_POLLER_API_REMOVE_HWQP,
+};
+
+/*
+ * Poller API callback function proto
+ */
+typedef void (*spdk_nvmf_fc_poller_api_cb)(void *cb_data, enum spdk_nvmf_fc_poller_api_ret ret);
+
+/*
+ * Poller API callback data
+ */
+struct spdk_nvmf_fc_poller_api_cb_info {
+ struct spdk_thread *cb_thread;
+ spdk_nvmf_fc_poller_api_cb cb_func;
+ void *cb_data;
+ enum spdk_nvmf_fc_poller_api_ret ret;
+};
+
+/*
+ * Poller API structures
+ */
+struct spdk_nvmf_fc_poller_api_add_connection_args {
+ struct spdk_nvmf_fc_conn *fc_conn;
+ struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+};
+
+struct spdk_nvmf_fc_poller_api_del_connection_args {
+ struct spdk_nvmf_fc_conn *fc_conn;
+ struct spdk_nvmf_fc_hwqp *hwqp;
+ struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+ bool send_abts;
+ /* internal */
+ int fc_request_cnt;
+ bool backend_initiated;
+};
+
+struct spdk_nvmf_fc_poller_api_quiesce_queue_args {
+ void *ctx;
+ struct spdk_nvmf_fc_hwqp *hwqp;
+ struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+};
+
+struct spdk_nvmf_fc_poller_api_activate_queue_args {
+ struct spdk_nvmf_fc_hwqp *hwqp;
+ struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+};
+
+struct spdk_nvmf_fc_poller_api_abts_recvd_args {
+ struct spdk_nvmf_fc_abts_ctx *ctx;
+ struct spdk_nvmf_fc_hwqp *hwqp;
+ struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+};
+
+struct spdk_nvmf_fc_poller_api_queue_sync_done_args {
+ struct spdk_nvmf_fc_hwqp *hwqp;
+ struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+ uint64_t tag;
+};
+
+/*
+ * NVMF LS request structure
+ */
+struct spdk_nvmf_fc_ls_rqst {
+ struct spdk_nvmf_fc_buffer_desc rqstbuf;
+ struct spdk_nvmf_fc_buffer_desc rspbuf;
+ uint32_t rqst_len;
+ uint32_t rsp_len;
+ uint32_t rpi;
+ struct spdk_nvmf_fc_xchg *xchg;
+ uint16_t oxid;
+ void *private_data; /* for LLD only (LS does not touch) */
+ TAILQ_ENTRY(spdk_nvmf_fc_ls_rqst) ls_pending_link;
+ uint32_t s_id;
+ uint32_t d_id;
+ struct spdk_nvmf_fc_nport *nport;
+ struct spdk_nvmf_fc_remote_port_info *rport;
+ struct spdk_nvmf_tgt *nvmf_tgt;
+};
+
+/*
+ * RQ Buffer LS Overlay Structure
+ */
+#define FCNVME_LS_RSVD_SIZE (FCNVME_MAX_LS_BUFFER_SIZE - \
+ (sizeof(struct spdk_nvmf_fc_ls_rqst) + FCNVME_MAX_LS_REQ_SIZE + FCNVME_MAX_LS_RSP_SIZE))
+
+struct spdk_nvmf_fc_rq_buf_ls_request {
+ uint8_t rqst[FCNVME_MAX_LS_REQ_SIZE];
+ uint8_t resp[FCNVME_MAX_LS_RSP_SIZE];
+ struct spdk_nvmf_fc_ls_rqst ls_rqst;
+ uint8_t rsvd[FCNVME_LS_RSVD_SIZE];
+};
+
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_rq_buf_ls_request) ==
+ FCNVME_MAX_LS_BUFFER_SIZE, "LS RQ Buffer overflow");
+
+/* Poller API structures (arguments and callback data */
+typedef void (*spdk_nvmf_fc_del_assoc_cb)(void *arg, uint32_t err);
+
+struct spdk_nvmf_fc_ls_add_conn_api_data {
+ struct spdk_nvmf_fc_poller_api_add_connection_args args;
+ struct spdk_nvmf_fc_ls_rqst *ls_rqst;
+ struct spdk_nvmf_fc_association *assoc;
+ bool aq_conn; /* true if adding connection for new association */
+};
+
+/* Disconnect (connection) request functions */
+struct spdk_nvmf_fc_ls_del_conn_api_data {
+ struct spdk_nvmf_fc_poller_api_del_connection_args args;
+ struct spdk_nvmf_fc_ls_rqst *ls_rqst;
+ struct spdk_nvmf_fc_association *assoc;
+ bool aq_conn; /* true if deleting AQ connection */
+};
+
+/* used by LS disconnect association cmd handling */
+struct spdk_nvmf_fc_ls_disconn_assoc_api_data {
+ struct spdk_nvmf_fc_nport *tgtport;
+ struct spdk_nvmf_fc_ls_rqst *ls_rqst;
+};
+
+/* used by delete association call */
+struct spdk_nvmf_fc_delete_assoc_api_data {
+ struct spdk_nvmf_fc_poller_api_del_connection_args args;
+ struct spdk_nvmf_fc_association *assoc;
+ bool from_ls_rqst; /* true = request came for LS */
+ spdk_nvmf_fc_del_assoc_cb del_assoc_cb;
+ void *del_assoc_cb_data;
+};
+
+struct nvmf_fc_ls_op_ctx {
+ union {
+ struct spdk_nvmf_fc_ls_add_conn_api_data add_conn;
+ struct spdk_nvmf_fc_ls_del_conn_api_data del_conn;
+ struct spdk_nvmf_fc_ls_disconn_assoc_api_data disconn_assoc;
+ struct spdk_nvmf_fc_delete_assoc_api_data del_assoc;
+ } u;
+ struct nvmf_fc_ls_op_ctx *next_op_ctx;
+};
+
+struct spdk_nvmf_fc_poller_api_queue_sync_args {
+ uint64_t u_id;
+ struct spdk_nvmf_fc_hwqp *hwqp;
+ struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+
+ /* Used internally by poller */
+ TAILQ_ENTRY(spdk_nvmf_fc_poller_api_queue_sync_args) link;
+};
+
+/**
+ * Following defines and structures are used to pass messages between master thread
+ * and FCT driver.
+ */
+enum spdk_fc_event {
+ SPDK_FC_HW_PORT_INIT,
+ SPDK_FC_HW_PORT_ONLINE,
+ SPDK_FC_HW_PORT_OFFLINE,
+ SPDK_FC_HW_PORT_RESET,
+ SPDK_FC_NPORT_CREATE,
+ SPDK_FC_NPORT_DELETE,
+ SPDK_FC_IT_ADD, /* PRLI */
+ SPDK_FC_IT_DELETE, /* PRLI */
+ SPDK_FC_ABTS_RECV,
+ SPDK_FC_LINK_BREAK,
+ SPDK_FC_HW_PORT_DUMP,
+ SPDK_FC_UNRECOVERABLE_ERR,
+ SPDK_FC_EVENT_MAX,
+};
+
+/**
+ * Arguments for to dump assoc id
+ */
+struct spdk_nvmf_fc_dump_assoc_id_args {
+ uint8_t pport_handle;
+ uint16_t nport_handle;
+ uint32_t assoc_id;
+};
+
+/**
+ * Arguments for HW port init event.
+ */
+struct spdk_nvmf_fc_hw_port_init_args {
+ uint32_t ls_queue_size;
+ spdk_nvmf_fc_lld_hwqp_t ls_queue;
+ uint32_t io_queue_size;
+ uint32_t io_queue_cnt;
+ spdk_nvmf_fc_lld_hwqp_t *io_queues;
+ void *cb_ctx;
+ void *port_ctx;
+ uint8_t port_handle;
+ uint8_t nvme_aq_index; /* io_queue used for nvme admin queue */
+ uint16_t fcp_rq_id; /* Base rq ID of SCSI queue */
+};
+
+/**
+ * Arguments for HW port link break event.
+ */
+struct spdk_nvmf_hw_port_link_break_args {
+ uint8_t port_handle;
+ void *cb_ctx;
+};
+
+/**
+ * Arguments for HW port online event.
+ */
+struct spdk_nvmf_fc_hw_port_online_args {
+ uint8_t port_handle;
+ void *cb_ctx;
+};
+
+/**
+ * Arguments for HW port offline event.
+ */
+struct spdk_nvmf_fc_hw_port_offline_args {
+ uint8_t port_handle;
+ void *cb_ctx;
+};
+
+/**
+ * Arguments for n-port add event.
+ */
+struct spdk_nvmf_fc_nport_create_args {
+ uint8_t port_handle;
+ uint16_t nport_handle;
+ struct spdk_uuid container_uuid; /* UUID of the nports container */
+ struct spdk_uuid nport_uuid; /* Unique UUID for the nport */
+ uint32_t d_id;
+ struct spdk_nvmf_fc_wwn fc_nodename;
+ struct spdk_nvmf_fc_wwn fc_portname;
+ uint32_t subsys_id; /* Subsystemid */
+ char port_id[SPDK_NVMF_PORT_ID_MAX_LEN];
+ void *cb_ctx;
+};
+
+/**
+ * Arguments for n-port delete event.
+ */
+struct spdk_nvmf_fc_nport_delete_args {
+ uint8_t port_handle;
+ uint32_t nport_handle;
+ uint32_t subsys_id; /* Subsystem id */
+ void *cb_ctx;
+};
+
+/**
+ * Arguments for I_T add event.
+ */
+struct spdk_nvmf_fc_hw_i_t_add_args {
+ uint8_t port_handle;
+ uint32_t nport_handle;
+ uint16_t itn_handle;
+ uint32_t rpi;
+ uint32_t s_id;
+ uint32_t initiator_prli_info;
+ uint32_t target_prli_info; /* populated by the SPDK master */
+ struct spdk_nvmf_fc_wwn fc_nodename;
+ struct spdk_nvmf_fc_wwn fc_portname;
+ void *cb_ctx;
+};
+
+/**
+ * Arguments for I_T delete event.
+ */
+struct spdk_nvmf_fc_hw_i_t_delete_args {
+ uint8_t port_handle;
+ uint32_t nport_handle;
+ uint16_t itn_handle; /* Only used by FC LLD driver; unused in SPDK */
+ uint32_t rpi;
+ uint32_t s_id;
+ void *cb_ctx;
+};
+
+/**
+ * Arguments for ABTS event.
+ */
+struct spdk_nvmf_fc_abts_args {
+ uint8_t port_handle;
+ uint32_t nport_handle;
+ uint32_t rpi;
+ uint16_t oxid, rxid;
+ void *cb_ctx;
+};
+
+/**
+ * Arguments for link break event.
+ */
+struct spdk_nvmf_fc_link_break_args {
+ uint8_t port_handle;
+};
+
+/**
+ * Arguments for port reset event.
+ */
+struct spdk_nvmf_fc_hw_port_reset_args {
+ uint8_t port_handle;
+ bool dump_queues;
+ char reason[SPDK_FC_HW_DUMP_REASON_STR_MAX_SIZE];
+ uint32_t **dump_buf;
+ void *cb_ctx;
+};
+
+/**
+ * Arguments for unrecoverable error event
+ */
+struct spdk_nvmf_fc_unrecoverable_error_event_args {
+};
+
+/**
+ * Callback function to the FCT driver.
+ */
+typedef void (*spdk_nvmf_fc_callback)(uint8_t port_handle,
+ enum spdk_fc_event event_type,
+ void *arg, int err);
+
+/**
+ * Enqueue an FCT event to master thread
+ *
+ * \param event_type Type of the event.
+ * \param args Pointer to the argument structure.
+ * \param cb_func Callback function into fc driver.
+ *
+ * \return 0 on success, non-zero on failure.
+ */
+int
+nvmf_fc_master_enqueue_event(enum spdk_fc_event event_type,
+ void *args,
+ spdk_nvmf_fc_callback cb_func);
+
+/*
+ * dump info
+ */
+struct spdk_nvmf_fc_queue_dump_info {
+ char *buffer;
+ int offset;
+};
+#define SPDK_FC_HW_DUMP_BUF_SIZE (10 * 4096)
+
+static inline void
+nvmf_fc_dump_buf_print(struct spdk_nvmf_fc_queue_dump_info *dump_info, char *fmt, ...)
+{
+ uint64_t buffer_size = SPDK_FC_HW_DUMP_BUF_SIZE;
+ int32_t avail = (int32_t)(buffer_size - dump_info->offset);
+
+ if (avail > 0) {
+ va_list ap;
+ int32_t written;
+
+ va_start(ap, fmt);
+ written = vsnprintf(dump_info->buffer + dump_info->offset, avail, fmt, ap);
+ if (written >= avail) {
+ dump_info->offset += avail;
+ } else {
+ dump_info->offset += written;
+ }
+ va_end(ap);
+ }
+}
+
+/*
+ * NVMF FC caller callback definitions
+ */
+typedef void (*spdk_nvmf_fc_caller_cb)(void *hwqp, int32_t status, void *args);
+
+struct spdk_nvmf_fc_caller_ctx {
+ void *ctx;
+ spdk_nvmf_fc_caller_cb cb;
+ void *cb_args;
+ TAILQ_ENTRY(spdk_nvmf_fc_caller_ctx) link;
+};
+
+/*
+ * NVMF FC Exchange Info (for debug)
+ */
+struct spdk_nvmf_fc_xchg_info {
+ uint32_t xchg_base;
+ uint32_t xchg_total_count;
+ uint32_t xchg_avail_count;
+ uint32_t send_frame_xchg_id;
+ uint8_t send_frame_seqid;
+};
+
+/*
+ * NVMF FC inline and function prototypes
+ */
+
+static inline struct spdk_nvmf_fc_request *
+nvmf_fc_get_fc_req(struct spdk_nvmf_request *req)
+{
+ return (struct spdk_nvmf_fc_request *)
+ ((uintptr_t)req - offsetof(struct spdk_nvmf_fc_request, req));
+}
+
+static inline bool
+nvmf_fc_is_port_dead(struct spdk_nvmf_fc_hwqp *hwqp)
+{
+ switch (hwqp->fc_port->hw_port_status) {
+ case SPDK_FC_PORT_QUIESCED:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool
+nvmf_fc_req_in_xfer(struct spdk_nvmf_fc_request *fc_req)
+{
+ switch (fc_req->state) {
+ case SPDK_NVMF_FC_REQ_READ_XFER:
+ case SPDK_NVMF_FC_REQ_READ_RSP:
+ case SPDK_NVMF_FC_REQ_WRITE_XFER:
+ case SPDK_NVMF_FC_REQ_WRITE_RSP:
+ case SPDK_NVMF_FC_REQ_NONE_RSP:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline void
+nvmf_fc_create_trid(struct spdk_nvme_transport_id *trid, uint64_t n_wwn, uint64_t p_wwn)
+{
+ spdk_nvme_trid_populate_transport(trid, SPDK_NVME_TRANSPORT_FC);
+ trid->adrfam = SPDK_NVMF_ADRFAM_FC;
+ snprintf(trid->trsvcid, sizeof(trid->trsvcid), "none");
+ snprintf(trid->traddr, sizeof(trid->traddr), "nn-0x%lx:pn-0x%lx", n_wwn, p_wwn);
+}
+
+void nvmf_fc_ls_init(struct spdk_nvmf_fc_port *fc_port);
+
+void nvmf_fc_ls_fini(struct spdk_nvmf_fc_port *fc_port);
+
+void nvmf_fc_handle_ls_rqst(struct spdk_nvmf_fc_ls_rqst *ls_rqst);
+void nvmf_fc_ls_add_conn_failure(
+ struct spdk_nvmf_fc_association *assoc,
+ struct spdk_nvmf_fc_ls_rqst *ls_rqst,
+ struct spdk_nvmf_fc_conn *fc_conn,
+ bool aq_conn);
+
+void nvmf_fc_init_hwqp(struct spdk_nvmf_fc_port *fc_port, struct spdk_nvmf_fc_hwqp *hwqp);
+
+void nvmf_fc_init_poller_queues(struct spdk_nvmf_fc_hwqp *hwqp);
+
+struct spdk_nvmf_fc_conn *nvmf_fc_hwqp_find_fc_conn(struct spdk_nvmf_fc_hwqp *hwqp,
+ uint64_t conn_id);
+
+void nvmf_fc_hwqp_reinit_poller_queues(struct spdk_nvmf_fc_hwqp *hwqp, void *queues_curr);
+
+struct spdk_nvmf_fc_port *nvmf_fc_port_lookup(uint8_t port_hdl);
+
+bool nvmf_fc_port_is_offline(struct spdk_nvmf_fc_port *fc_port);
+
+int nvmf_fc_port_set_offline(struct spdk_nvmf_fc_port *fc_port);
+
+bool nvmf_fc_port_is_online(struct spdk_nvmf_fc_port *fc_port);
+
+int nvmf_fc_port_set_online(struct spdk_nvmf_fc_port *fc_port);
+
+int nvmf_fc_rport_set_state(struct spdk_nvmf_fc_remote_port_info *rport,
+ enum spdk_nvmf_fc_object_state state);
+
+void nvmf_fc_port_add(struct spdk_nvmf_fc_port *fc_port);
+
+int nvmf_fc_port_add_nport(struct spdk_nvmf_fc_port *fc_port,
+ struct spdk_nvmf_fc_nport *nport);
+
+int nvmf_fc_port_remove_nport(struct spdk_nvmf_fc_port *fc_port,
+ struct spdk_nvmf_fc_nport *nport);
+
+struct spdk_nvmf_fc_nport *nvmf_fc_nport_find(uint8_t port_hdl, uint16_t nport_hdl);
+
+int nvmf_fc_nport_set_state(struct spdk_nvmf_fc_nport *nport,
+ enum spdk_nvmf_fc_object_state state);
+
+bool nvmf_fc_nport_add_rem_port(struct spdk_nvmf_fc_nport *nport,
+ struct spdk_nvmf_fc_remote_port_info *rem_port);
+
+bool nvmf_fc_nport_remove_rem_port(struct spdk_nvmf_fc_nport *nport,
+ struct spdk_nvmf_fc_remote_port_info *rem_port);
+
+bool nvmf_fc_nport_has_no_rport(struct spdk_nvmf_fc_nport *nport);
+
+int nvmf_fc_assoc_set_state(struct spdk_nvmf_fc_association *assoc,
+ enum spdk_nvmf_fc_object_state state);
+
+int nvmf_fc_delete_association(struct spdk_nvmf_fc_nport *tgtport,
+ uint64_t assoc_id, bool send_abts, bool backend_initiated,
+ spdk_nvmf_fc_del_assoc_cb del_assoc_cb,
+ void *cb_data);
+
+bool nvmf_ctrlr_is_on_nport(uint8_t port_hdl, uint16_t nport_hdl,
+ struct spdk_nvmf_ctrlr *ctrlr);
+
+void nvmf_fc_assign_queue_to_master_thread(struct spdk_nvmf_fc_hwqp *hwqp);
+
+void nvmf_fc_poll_group_add_hwqp(struct spdk_nvmf_fc_hwqp *hwqp);
+
+void nvmf_fc_poll_group_remove_hwqp(struct spdk_nvmf_fc_hwqp *hwqp);
+
+int nvmf_fc_hwqp_set_online(struct spdk_nvmf_fc_hwqp *hwqp);
+
+int nvmf_fc_hwqp_set_offline(struct spdk_nvmf_fc_hwqp *hwqp);
+
+uint32_t nvmf_fc_get_prli_service_params(void);
+
+void nvmf_fc_handle_abts_frame(struct spdk_nvmf_fc_nport *nport, uint16_t rpi, uint16_t oxid,
+ uint16_t rxid);
+
+void nvmf_fc_request_abort(struct spdk_nvmf_fc_request *fc_req, bool send_abts,
+ spdk_nvmf_fc_caller_cb cb, void *cb_args);
+
+struct spdk_nvmf_tgt *nvmf_fc_get_tgt(void);
+
+struct spdk_thread *nvmf_fc_get_master_thread(void);
+
+/*
+ * These functions are called by low level FC driver
+ */
+
+static inline struct spdk_nvmf_fc_conn *
+nvmf_fc_get_conn(struct spdk_nvmf_qpair *qpair)
+{
+ return (struct spdk_nvmf_fc_conn *)
+ ((uintptr_t)qpair - offsetof(struct spdk_nvmf_fc_conn, qpair));
+}
+
+static inline uint16_t
+nvmf_fc_advance_conn_sqhead(struct spdk_nvmf_qpair *qpair)
+{
+ /* advance sq_head pointer - wrap if needed */
+ qpair->sq_head = (qpair->sq_head == qpair->sq_head_max) ?
+ 0 : (qpair->sq_head + 1);
+ return qpair->sq_head;
+}
+
+static inline bool
+nvmf_fc_use_send_frame(struct spdk_nvmf_request *req)
+{
+ /* For now use for only keepalives. */
+ if (req->qpair->qid == 0 &&
+ (req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_KEEP_ALIVE)) {
+ return true;
+ }
+ return false;
+}
+
+enum spdk_nvmf_fc_poller_api_ret nvmf_fc_poller_api_func(
+ struct spdk_nvmf_fc_hwqp *hwqp,
+ enum spdk_nvmf_fc_poller_api api,
+ void *api_args);
+
+int nvmf_fc_hwqp_process_frame(struct spdk_nvmf_fc_hwqp *hwqp, uint32_t buff_idx,
+ struct spdk_nvmf_fc_frame_hdr *frame,
+ struct spdk_nvmf_fc_buffer_desc *buffer, uint32_t plen);
+
+void nvmf_fc_hwqp_process_pending_reqs(struct spdk_nvmf_fc_hwqp *hwqp);
+
+void nvmf_fc_hwqp_process_pending_ls_rqsts(struct spdk_nvmf_fc_hwqp *hwqp);
+
+void nvmf_fc_request_set_state(struct spdk_nvmf_fc_request *fc_req,
+ enum spdk_nvmf_fc_request_state state);
+
+char *nvmf_fc_request_get_state_str(int state);
+
+void _nvmf_fc_request_free(struct spdk_nvmf_fc_request *fc_req);
+
+void nvmf_fc_request_abort_complete(void *arg1);
+
+bool nvmf_fc_send_ersp_required(struct spdk_nvmf_fc_request *fc_req,
+ uint32_t rsp_cnt, uint32_t xfer_len);
+
+int nvmf_fc_handle_rsp(struct spdk_nvmf_fc_request *req);
+
+#endif
diff --git a/src/spdk/lib/nvmf/nvmf_internal.h b/src/spdk/lib/nvmf/nvmf_internal.h
new file mode 100644
index 000000000..f1f3837d5
--- /dev/null
+++ b/src/spdk/lib/nvmf/nvmf_internal.h
@@ -0,0 +1,371 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __NVMF_INTERNAL_H__
+#define __NVMF_INTERNAL_H__
+
+#include "spdk/stdinc.h"
+
+#include "spdk/likely.h"
+#include "spdk/nvmf.h"
+#include "spdk/nvmf_cmd.h"
+#include "spdk/nvmf_transport.h"
+#include "spdk/nvmf_spec.h"
+#include "spdk/assert.h"
+#include "spdk/bdev.h"
+#include "spdk/queue.h"
+#include "spdk/util.h"
+#include "spdk/thread.h"
+
+#define NVMF_MAX_ASYNC_EVENTS (4)
+
+enum spdk_nvmf_subsystem_state {
+ SPDK_NVMF_SUBSYSTEM_INACTIVE = 0,
+ SPDK_NVMF_SUBSYSTEM_ACTIVATING,
+ SPDK_NVMF_SUBSYSTEM_ACTIVE,
+ SPDK_NVMF_SUBSYSTEM_PAUSING,
+ SPDK_NVMF_SUBSYSTEM_PAUSED,
+ SPDK_NVMF_SUBSYSTEM_RESUMING,
+ SPDK_NVMF_SUBSYSTEM_DEACTIVATING,
+};
+
+struct spdk_nvmf_tgt {
+ char name[NVMF_TGT_NAME_MAX_LENGTH];
+
+ pthread_mutex_t mutex;
+
+ uint64_t discovery_genctr;
+
+ uint32_t max_subsystems;
+
+ /* Array of subsystem pointers of size max_subsystems indexed by sid */
+ struct spdk_nvmf_subsystem **subsystems;
+
+ TAILQ_HEAD(, spdk_nvmf_transport) transports;
+ TAILQ_HEAD(, spdk_nvmf_poll_group) poll_groups;
+
+ /* Used for round-robin assignment of connections to poll groups */
+ struct spdk_nvmf_poll_group *next_poll_group;
+
+ spdk_nvmf_tgt_destroy_done_fn *destroy_cb_fn;
+ void *destroy_cb_arg;
+
+ TAILQ_ENTRY(spdk_nvmf_tgt) link;
+};
+
+struct spdk_nvmf_host {
+ char nqn[SPDK_NVMF_NQN_MAX_LEN + 1];
+ TAILQ_ENTRY(spdk_nvmf_host) link;
+};
+
+struct spdk_nvmf_subsystem_listener {
+ struct spdk_nvmf_subsystem *subsystem;
+ spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn;
+ void *cb_arg;
+ struct spdk_nvme_transport_id *trid;
+ struct spdk_nvmf_transport *transport;
+ TAILQ_ENTRY(spdk_nvmf_subsystem_listener) link;
+};
+
+/* Maximum number of registrants supported per namespace */
+#define SPDK_NVMF_MAX_NUM_REGISTRANTS 16
+
+struct spdk_nvmf_registrant_info {
+ uint64_t rkey;
+ char host_uuid[SPDK_UUID_STRING_LEN];
+};
+
+struct spdk_nvmf_reservation_info {
+ bool ptpl_activated;
+ enum spdk_nvme_reservation_type rtype;
+ uint64_t crkey;
+ char bdev_uuid[SPDK_UUID_STRING_LEN];
+ char holder_uuid[SPDK_UUID_STRING_LEN];
+ uint32_t num_regs;
+ struct spdk_nvmf_registrant_info registrants[SPDK_NVMF_MAX_NUM_REGISTRANTS];
+};
+
+struct spdk_nvmf_subsystem_pg_ns_info {
+ struct spdk_io_channel *channel;
+ struct spdk_uuid uuid;
+ /* current reservation key, no reservation if the value is 0 */
+ uint64_t crkey;
+ /* reservation type */
+ enum spdk_nvme_reservation_type rtype;
+ /* Host ID which holds the reservation */
+ struct spdk_uuid holder_id;
+ /* Host ID for the registrants with the namespace */
+ struct spdk_uuid reg_hostid[SPDK_NVMF_MAX_NUM_REGISTRANTS];
+ uint64_t num_blocks;
+};
+
+typedef void(*spdk_nvmf_poll_group_mod_done)(void *cb_arg, int status);
+
+struct spdk_nvmf_subsystem_poll_group {
+ /* Array of namespace information for each namespace indexed by nsid - 1 */
+ struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
+ uint32_t num_ns;
+
+ uint64_t io_outstanding;
+ spdk_nvmf_poll_group_mod_done cb_fn;
+ void *cb_arg;
+
+ enum spdk_nvmf_subsystem_state state;
+
+ TAILQ_HEAD(, spdk_nvmf_request) queued;
+};
+
+struct spdk_nvmf_registrant {
+ TAILQ_ENTRY(spdk_nvmf_registrant) link;
+ struct spdk_uuid hostid;
+ /* Registration key */
+ uint64_t rkey;
+};
+
+struct spdk_nvmf_ns {
+ uint32_t nsid;
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_bdev *bdev;
+ struct spdk_bdev_desc *desc;
+ struct spdk_nvmf_ns_opts opts;
+ /* reservation notificaton mask */
+ uint32_t mask;
+ /* generation code */
+ uint32_t gen;
+ /* registrants head */
+ TAILQ_HEAD(, spdk_nvmf_registrant) registrants;
+ /* current reservation key */
+ uint64_t crkey;
+ /* reservation type */
+ enum spdk_nvme_reservation_type rtype;
+ /* current reservation holder, only valid if reservation type can only have one holder */
+ struct spdk_nvmf_registrant *holder;
+ /* Persist Through Power Loss file which contains the persistent reservation */
+ char *ptpl_file;
+ /* Persist Through Power Loss feature is enabled */
+ bool ptpl_activated;
+};
+
+struct spdk_nvmf_ctrlr_feat {
+ union spdk_nvme_feat_arbitration arbitration;
+ union spdk_nvme_feat_power_management power_management;
+ union spdk_nvme_feat_error_recovery error_recovery;
+ union spdk_nvme_feat_volatile_write_cache volatile_write_cache;
+ union spdk_nvme_feat_number_of_queues number_of_queues;
+ union spdk_nvme_feat_write_atomicity write_atomicity;
+ union spdk_nvme_feat_async_event_configuration async_event_configuration;
+ union spdk_nvme_feat_keep_alive_timer keep_alive_timer;
+};
+
+/*
+ * NVMf reservation notificaton log page.
+ */
+struct spdk_nvmf_reservation_log {
+ struct spdk_nvme_reservation_notification_log log;
+ TAILQ_ENTRY(spdk_nvmf_reservation_log) link;
+ struct spdk_nvmf_ctrlr *ctrlr;
+};
+
+/*
+ * This structure represents an NVMe-oF controller,
+ * which is like a "session" in networking terms.
+ */
+struct spdk_nvmf_ctrlr {
+ uint16_t cntlid;
+ char hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1];
+ struct spdk_nvmf_subsystem *subsys;
+
+ struct spdk_nvmf_ctrlr_data cdata;
+
+ struct spdk_nvmf_registers vcprop;
+
+ struct spdk_nvmf_ctrlr_feat feat;
+
+ struct spdk_nvmf_qpair *admin_qpair;
+ struct spdk_thread *thread;
+ struct spdk_bit_array *qpair_mask;
+
+ struct spdk_nvmf_request *aer_req[NVMF_MAX_ASYNC_EVENTS];
+ union spdk_nvme_async_event_completion notice_event;
+ union spdk_nvme_async_event_completion reservation_event;
+ uint8_t nr_aer_reqs;
+ struct spdk_uuid hostid;
+
+ uint16_t changed_ns_list_count;
+ struct spdk_nvme_ns_list changed_ns_list;
+ uint64_t log_page_count;
+ uint8_t num_avail_log_pages;
+ TAILQ_HEAD(log_page_head, spdk_nvmf_reservation_log) log_head;
+
+ /* Time to trigger keep-alive--poller_time = now_tick + period */
+ uint64_t last_keep_alive_tick;
+ struct spdk_poller *keep_alive_poller;
+
+ bool dif_insert_or_strip;
+
+ TAILQ_ENTRY(spdk_nvmf_ctrlr) link;
+};
+
+struct spdk_nvmf_subsystem {
+ struct spdk_thread *thread;
+ uint32_t id;
+ enum spdk_nvmf_subsystem_state state;
+
+ char subnqn[SPDK_NVMF_NQN_MAX_LEN + 1];
+ enum spdk_nvmf_subtype subtype;
+ uint16_t next_cntlid;
+ bool allow_any_host;
+ bool allow_any_listener;
+
+ struct spdk_nvmf_tgt *tgt;
+
+ char sn[SPDK_NVME_CTRLR_SN_LEN + 1];
+ char mn[SPDK_NVME_CTRLR_MN_LEN + 1];
+
+ /* Array of pointers to namespaces of size max_nsid indexed by nsid - 1 */
+ struct spdk_nvmf_ns **ns;
+ uint32_t max_nsid;
+ /* This is the maximum allowed nsid to a subsystem */
+ uint32_t max_allowed_nsid;
+
+ TAILQ_HEAD(, spdk_nvmf_ctrlr) ctrlrs;
+ TAILQ_HEAD(, spdk_nvmf_host) hosts;
+ TAILQ_HEAD(, spdk_nvmf_subsystem_listener) listeners;
+
+ TAILQ_ENTRY(spdk_nvmf_subsystem) entries;
+};
+
+int nvmf_poll_group_add_transport(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_transport *transport);
+int nvmf_poll_group_update_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem);
+int nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
+void nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
+void nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
+void nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group,
+ struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
+
+void nvmf_get_discovery_log_page(struct spdk_nvmf_tgt *tgt, const char *hostnqn,
+ struct iovec *iov,
+ uint32_t iovcnt, uint64_t offset, uint32_t length);
+
+void nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr);
+int nvmf_ctrlr_process_fabrics_cmd(struct spdk_nvmf_request *req);
+int nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req);
+int nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req);
+bool nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr);
+bool nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr);
+void nvmf_ctrlr_ns_changed(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid);
+
+void nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
+ bool dif_insert_or_strip);
+int nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
+int nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
+int nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
+int nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req);
+int nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
+int nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
+int nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
+int nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
+bool nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd,
+ struct spdk_dif_ctx *dif_ctx);
+
+int nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_ctrlr *ctrlr);
+void nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_ctrlr *ctrlr);
+void nvmf_subsystem_remove_all_listeners(struct spdk_nvmf_subsystem *subsystem,
+ bool stop);
+struct spdk_nvmf_ctrlr *nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem,
+ uint16_t cntlid);
+struct spdk_nvmf_subsystem_listener *nvmf_subsystem_find_listener(
+ struct spdk_nvmf_subsystem *subsystem,
+ const struct spdk_nvme_transport_id *trid);
+struct spdk_nvmf_listener *nvmf_transport_find_listener(
+ struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid);
+
+int nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr);
+void nvmf_ctrlr_async_event_reservation_notification(struct spdk_nvmf_ctrlr *ctrlr);
+void nvmf_ns_reservation_request(void *ctx);
+void nvmf_ctrlr_reservation_notice_log(struct spdk_nvmf_ctrlr *ctrlr,
+ struct spdk_nvmf_ns *ns,
+ enum spdk_nvme_reservation_notification_log_page_type type);
+
+/*
+ * Abort aer is sent on a per controller basis and sends a completion for the aer to the host.
+ * This function should be called when attempting to recover in error paths when it is OK for
+ * the host to send a subsequent AER.
+ */
+void nvmf_ctrlr_abort_aer(struct spdk_nvmf_ctrlr *ctrlr);
+
+/*
+ * Free aer simply frees the rdma resources for the aer without informing the host.
+ * This function should be called when deleting a qpair when one wants to make sure
+ * the qpair is completely empty before freeing the request. The reason we free the
+ * AER without sending a completion is to prevent the host from sending another AER.
+ */
+void nvmf_qpair_free_aer(struct spdk_nvmf_qpair *qpair);
+
+int nvmf_ctrlr_abort_request(struct spdk_nvmf_request *req);
+
+static inline struct spdk_nvmf_ns *
+_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
+{
+ /* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
+ if (spdk_unlikely(nsid - 1 >= subsystem->max_nsid)) {
+ return NULL;
+ }
+
+ return subsystem->ns[nsid - 1];
+}
+
+static inline bool
+nvmf_qpair_is_admin_queue(struct spdk_nvmf_qpair *qpair)
+{
+ return qpair->qid == 0;
+}
+
+#endif /* __NVMF_INTERNAL_H__ */
diff --git a/src/spdk/lib/nvmf/nvmf_rpc.c b/src/spdk/lib/nvmf/nvmf_rpc.c
new file mode 100644
index 000000000..5dc9f42f0
--- /dev/null
+++ b/src/spdk/lib/nvmf/nvmf_rpc.c
@@ -0,0 +1,2012 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2018-2020 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/bdev.h"
+#include "spdk/log.h"
+#include "spdk/rpc.h"
+#include "spdk/env.h"
+#include "spdk/nvme.h"
+#include "spdk/nvmf.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+
+#include "spdk_internal/log.h"
+#include "spdk_internal/assert.h"
+
+#include "nvmf_internal.h"
+
+static int
+json_write_hex_str(struct spdk_json_write_ctx *w, const void *data, size_t size)
+{
+ static const char hex_char[16] = "0123456789ABCDEF";
+ const uint8_t *buf = data;
+ char *str, *out;
+ int rc;
+
+ str = malloc(size * 2 + 1);
+ if (str == NULL) {
+ return -1;
+ }
+
+ out = str;
+ while (size--) {
+ unsigned byte = *buf++;
+
+ out[0] = hex_char[(byte >> 4) & 0xF];
+ out[1] = hex_char[byte & 0xF];
+
+ out += 2;
+ }
+ *out = '\0';
+
+ rc = spdk_json_write_string(w, str);
+ free(str);
+
+ return rc;
+}
+
+static int
+hex_nybble_to_num(char c)
+{
+ if (c >= '0' && c <= '9') {
+ return c - '0';
+ }
+
+ if (c >= 'a' && c <= 'f') {
+ return c - 'a' + 0xA;
+ }
+
+ if (c >= 'A' && c <= 'F') {
+ return c - 'A' + 0xA;
+ }
+
+ return -1;
+}
+
+static int
+hex_byte_to_num(const char *str)
+{
+ int hi, lo;
+
+ hi = hex_nybble_to_num(str[0]);
+ if (hi < 0) {
+ return hi;
+ }
+
+ lo = hex_nybble_to_num(str[1]);
+ if (lo < 0) {
+ return lo;
+ }
+
+ return hi * 16 + lo;
+}
+
+static int
+decode_hex_string_be(const char *str, uint8_t *out, size_t size)
+{
+ size_t i;
+
+ /* Decode a string in "ABCDEF012345" format to its binary representation */
+ for (i = 0; i < size; i++) {
+ int num = hex_byte_to_num(str);
+
+ if (num < 0) {
+ /* Invalid hex byte or end of string */
+ return -1;
+ }
+
+ out[i] = (uint8_t)num;
+ str += 2;
+ }
+
+ if (i != size || *str != '\0') {
+ /* Length mismatch */
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+decode_ns_nguid(const struct spdk_json_val *val, void *out)
+{
+ char *str = NULL;
+ int rc;
+
+ rc = spdk_json_decode_string(val, &str);
+ if (rc == 0) {
+ /* 16-byte NGUID */
+ rc = decode_hex_string_be(str, out, 16);
+ }
+
+ free(str);
+ return rc;
+}
+
+static int
+decode_ns_eui64(const struct spdk_json_val *val, void *out)
+{
+ char *str = NULL;
+ int rc;
+
+ rc = spdk_json_decode_string(val, &str);
+ if (rc == 0) {
+ /* 8-byte EUI-64 */
+ rc = decode_hex_string_be(str, out, 8);
+ }
+
+ free(str);
+ return rc;
+}
+
+static int
+decode_ns_uuid(const struct spdk_json_val *val, void *out)
+{
+ char *str = NULL;
+ int rc;
+
+ rc = spdk_json_decode_string(val, &str);
+ if (rc == 0) {
+ rc = spdk_uuid_parse(out, str);
+ }
+
+ free(str);
+ return rc;
+}
+
+struct rpc_get_subsystem {
+ char *tgt_name;
+};
+
+static const struct spdk_json_object_decoder rpc_get_subsystem_decoders[] = {
+ {"tgt_name", offsetof(struct rpc_get_subsystem, tgt_name), spdk_json_decode_string, true},
+};
+
+static void
+dump_nvmf_subsystem(struct spdk_json_write_ctx *w, struct spdk_nvmf_subsystem *subsystem)
+{
+ struct spdk_nvmf_host *host;
+ struct spdk_nvmf_subsystem_listener *listener;
+
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
+ spdk_json_write_name(w, "subtype");
+ if (spdk_nvmf_subsystem_get_type(subsystem) == SPDK_NVMF_SUBTYPE_NVME) {
+ spdk_json_write_string(w, "NVMe");
+ } else {
+ spdk_json_write_string(w, "Discovery");
+ }
+
+ spdk_json_write_named_array_begin(w, "listen_addresses");
+
+ for (listener = spdk_nvmf_subsystem_get_first_listener(subsystem); listener != NULL;
+ listener = spdk_nvmf_subsystem_get_next_listener(subsystem, listener)) {
+ const struct spdk_nvme_transport_id *trid;
+ const char *adrfam;
+
+ trid = spdk_nvmf_subsystem_listener_get_trid(listener);
+
+ spdk_json_write_object_begin(w);
+ adrfam = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
+ if (adrfam == NULL) {
+ adrfam = "unknown";
+ }
+ /* NOTE: "transport" is kept for compatibility; new code should use "trtype" */
+ spdk_json_write_named_string(w, "transport", trid->trstring);
+ spdk_json_write_named_string(w, "trtype", trid->trstring);
+ spdk_json_write_named_string(w, "adrfam", adrfam);
+ spdk_json_write_named_string(w, "traddr", trid->traddr);
+ spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
+ spdk_json_write_object_end(w);
+ }
+ spdk_json_write_array_end(w);
+
+ spdk_json_write_named_bool(w, "allow_any_host",
+ spdk_nvmf_subsystem_get_allow_any_host(subsystem));
+
+ spdk_json_write_named_array_begin(w, "hosts");
+
+ for (host = spdk_nvmf_subsystem_get_first_host(subsystem); host != NULL;
+ host = spdk_nvmf_subsystem_get_next_host(subsystem, host)) {
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "nqn", spdk_nvmf_host_get_nqn(host));
+ spdk_json_write_object_end(w);
+ }
+ spdk_json_write_array_end(w);
+
+ if (spdk_nvmf_subsystem_get_type(subsystem) == SPDK_NVMF_SUBTYPE_NVME) {
+ struct spdk_nvmf_ns *ns;
+ struct spdk_nvmf_ns_opts ns_opts;
+ uint32_t max_namespaces;
+
+ spdk_json_write_named_string(w, "serial_number", spdk_nvmf_subsystem_get_sn(subsystem));
+
+ spdk_json_write_named_string(w, "model_number", spdk_nvmf_subsystem_get_mn(subsystem));
+
+ max_namespaces = spdk_nvmf_subsystem_get_max_namespaces(subsystem);
+ if (max_namespaces != 0) {
+ spdk_json_write_named_uint32(w, "max_namespaces", max_namespaces);
+ }
+
+ spdk_json_write_named_array_begin(w, "namespaces");
+ for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
+ ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
+ spdk_nvmf_ns_get_opts(ns, &ns_opts, sizeof(ns_opts));
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_int32(w, "nsid", spdk_nvmf_ns_get_id(ns));
+ spdk_json_write_named_string(w, "bdev_name",
+ spdk_bdev_get_name(spdk_nvmf_ns_get_bdev(ns)));
+ /* NOTE: "name" is kept for compatibility only - new code should use bdev_name. */
+ spdk_json_write_named_string(w, "name",
+ spdk_bdev_get_name(spdk_nvmf_ns_get_bdev(ns)));
+
+ if (!spdk_mem_all_zero(ns_opts.nguid, sizeof(ns_opts.nguid))) {
+ spdk_json_write_name(w, "nguid");
+ json_write_hex_str(w, ns_opts.nguid, sizeof(ns_opts.nguid));
+ }
+
+ if (!spdk_mem_all_zero(ns_opts.eui64, sizeof(ns_opts.eui64))) {
+ spdk_json_write_name(w, "eui64");
+ json_write_hex_str(w, ns_opts.eui64, sizeof(ns_opts.eui64));
+ }
+
+ if (!spdk_mem_all_zero(&ns_opts.uuid, sizeof(ns_opts.uuid))) {
+ char uuid_str[SPDK_UUID_STRING_LEN];
+
+ spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &ns_opts.uuid);
+ spdk_json_write_named_string(w, "uuid", uuid_str);
+ }
+
+ spdk_json_write_object_end(w);
+ }
+ spdk_json_write_array_end(w);
+ }
+ spdk_json_write_object_end(w);
+}
+
+static void
+rpc_nvmf_get_subsystems(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_get_subsystem req = { 0 };
+ struct spdk_json_write_ctx *w;
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_tgt *tgt;
+
+ if (params) {
+ if (spdk_json_decode_object(params, rpc_get_subsystem_decoders,
+ SPDK_COUNTOF(rpc_get_subsystem_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ return;
+ }
+ }
+
+ tgt = spdk_nvmf_get_tgt(req.tgt_name);
+ if (!tgt) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Unable to find a target.");
+ free(req.tgt_name);
+ return;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_array_begin(w);
+ subsystem = spdk_nvmf_subsystem_get_first(tgt);
+ while (subsystem) {
+ dump_nvmf_subsystem(w, subsystem);
+ subsystem = spdk_nvmf_subsystem_get_next(subsystem);
+ }
+ spdk_json_write_array_end(w);
+ spdk_jsonrpc_end_result(request, w);
+ free(req.tgt_name);
+}
+SPDK_RPC_REGISTER("nvmf_get_subsystems", rpc_nvmf_get_subsystems, SPDK_RPC_RUNTIME)
+SPDK_RPC_REGISTER_ALIAS_DEPRECATED(nvmf_get_subsystems, get_nvmf_subsystems)
+
+struct rpc_subsystem_create {
+ char *nqn;
+ char *serial_number;
+ char *model_number;
+ char *tgt_name;
+ uint32_t max_namespaces;
+ bool allow_any_host;
+};
+
+static const struct spdk_json_object_decoder rpc_subsystem_create_decoders[] = {
+ {"nqn", offsetof(struct rpc_subsystem_create, nqn), spdk_json_decode_string},
+ {"serial_number", offsetof(struct rpc_subsystem_create, serial_number), spdk_json_decode_string, true},
+ {"model_number", offsetof(struct rpc_subsystem_create, model_number), spdk_json_decode_string, true},
+ {"tgt_name", offsetof(struct rpc_subsystem_create, tgt_name), spdk_json_decode_string, true},
+ {"max_namespaces", offsetof(struct rpc_subsystem_create, max_namespaces), spdk_json_decode_uint32, true},
+ {"allow_any_host", offsetof(struct rpc_subsystem_create, allow_any_host), spdk_json_decode_bool, true},
+};
+
+static void
+rpc_nvmf_subsystem_started(struct spdk_nvmf_subsystem *subsystem,
+ void *cb_arg, int status)
+{
+ struct spdk_jsonrpc_request *request = cb_arg;
+
+ if (!status) {
+ struct spdk_json_write_ctx *w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+ } else {
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Subsystem %s start failed",
+ subsystem->subnqn);
+ spdk_nvmf_subsystem_destroy(subsystem);
+ }
+}
+
+static void
+rpc_nvmf_create_subsystem(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_subsystem_create *req;
+ struct spdk_nvmf_subsystem *subsystem = NULL;
+ struct spdk_nvmf_tgt *tgt;
+ int rc = -1;
+
+ req = calloc(1, sizeof(*req));
+ if (!req) {
+ SPDK_ERRLOG("Memory allocation failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Memory allocation failed");
+ return;
+ }
+
+ if (spdk_json_decode_object(params, rpc_subsystem_create_decoders,
+ SPDK_COUNTOF(rpc_subsystem_create_decoders),
+ req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ goto cleanup;
+ }
+
+ tgt = spdk_nvmf_get_tgt(req->tgt_name);
+ if (!tgt) {
+ SPDK_ERRLOG("Unable to find target %s\n", req->tgt_name);
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Unable to find target %s", req->tgt_name);
+ goto cleanup;
+ }
+
+ subsystem = spdk_nvmf_subsystem_create(tgt, req->nqn, SPDK_NVMF_SUBTYPE_NVME,
+ req->max_namespaces);
+ if (!subsystem) {
+ SPDK_ERRLOG("Unable to create subsystem %s\n", req->nqn);
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Unable to create subsystem %s", req->nqn);
+ goto cleanup;
+ }
+
+ if (req->serial_number) {
+ if (spdk_nvmf_subsystem_set_sn(subsystem, req->serial_number)) {
+ SPDK_ERRLOG("Subsystem %s: invalid serial number '%s'\n", req->nqn, req->serial_number);
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Invalid SN %s", req->serial_number);
+ goto cleanup;
+ }
+ }
+
+ if (req->model_number) {
+ if (spdk_nvmf_subsystem_set_mn(subsystem, req->model_number)) {
+ SPDK_ERRLOG("Subsystem %s: invalid model number '%s'\n", req->nqn, req->model_number);
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Invalid MN %s", req->model_number);
+ goto cleanup;
+ }
+ }
+
+ spdk_nvmf_subsystem_set_allow_any_host(subsystem, req->allow_any_host);
+
+ rc = spdk_nvmf_subsystem_start(subsystem,
+ rpc_nvmf_subsystem_started,
+ request);
+
+cleanup:
+ free(req->nqn);
+ free(req->tgt_name);
+ free(req->serial_number);
+ free(req->model_number);
+ free(req);
+
+ if (rc && subsystem) {
+ spdk_nvmf_subsystem_destroy(subsystem);
+ }
+}
+SPDK_RPC_REGISTER("nvmf_create_subsystem", rpc_nvmf_create_subsystem, SPDK_RPC_RUNTIME)
+SPDK_RPC_REGISTER_ALIAS_DEPRECATED(nvmf_create_subsystem, nvmf_subsystem_create)
+
+struct rpc_delete_subsystem {
+ char *nqn;
+ char *tgt_name;
+};
+
+static void
+free_rpc_delete_subsystem(struct rpc_delete_subsystem *r)
+{
+ free(r->nqn);
+ free(r->tgt_name);
+}
+
+static void
+rpc_nvmf_subsystem_stopped(struct spdk_nvmf_subsystem *subsystem,
+ void *cb_arg, int status)
+{
+ struct spdk_jsonrpc_request *request = cb_arg;
+ struct spdk_json_write_ctx *w;
+
+ nvmf_subsystem_remove_all_listeners(subsystem, true);
+ spdk_nvmf_subsystem_destroy(subsystem);
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_subsystem_decoders[] = {
+ {"nqn", offsetof(struct rpc_delete_subsystem, nqn), spdk_json_decode_string},
+ {"tgt_name", offsetof(struct rpc_delete_subsystem, tgt_name), spdk_json_decode_string, true},
+};
+
+static void
+rpc_nvmf_delete_subsystem(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_delete_subsystem req = { 0 };
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_tgt *tgt;
+
+ if (spdk_json_decode_object(params, rpc_delete_subsystem_decoders,
+ SPDK_COUNTOF(rpc_delete_subsystem_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ if (req.nqn == NULL) {
+ SPDK_ERRLOG("missing name param\n");
+ goto invalid;
+ }
+
+ tgt = spdk_nvmf_get_tgt(req.tgt_name);
+ if (!tgt) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Unable to find a target.");
+ goto invalid_custom_response;
+ }
+
+ subsystem = spdk_nvmf_tgt_find_subsystem(tgt, req.nqn);
+ if (!subsystem) {
+ goto invalid;
+ }
+
+ free_rpc_delete_subsystem(&req);
+
+ spdk_nvmf_subsystem_stop(subsystem,
+ rpc_nvmf_subsystem_stopped,
+ request);
+
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+invalid_custom_response:
+ free_rpc_delete_subsystem(&req);
+}
+SPDK_RPC_REGISTER("nvmf_delete_subsystem", rpc_nvmf_delete_subsystem, SPDK_RPC_RUNTIME)
+SPDK_RPC_REGISTER_ALIAS_DEPRECATED(nvmf_delete_subsystem, delete_nvmf_subsystem)
+
+struct rpc_listen_address {
+ char *transport;
+ char *adrfam;
+ char *traddr;
+ char *trsvcid;
+};
+
+#define RPC_MAX_LISTEN_ADDRESSES 255
+#define RPC_MAX_NAMESPACES 255
+
+struct rpc_listen_addresses {
+ size_t num_listen_address;
+ struct rpc_listen_address addresses[RPC_MAX_LISTEN_ADDRESSES];
+};
+
+static const struct spdk_json_object_decoder rpc_listen_address_decoders[] = {
+ /* NOTE: "transport" is kept for compatibility; new code should use "trtype" */
+ {"transport", offsetof(struct rpc_listen_address, transport), spdk_json_decode_string, true},
+ {"trtype", offsetof(struct rpc_listen_address, transport), spdk_json_decode_string, true},
+ {"adrfam", offsetof(struct rpc_listen_address, adrfam), spdk_json_decode_string, true},
+ {"traddr", offsetof(struct rpc_listen_address, traddr), spdk_json_decode_string},
+ {"trsvcid", offsetof(struct rpc_listen_address, trsvcid), spdk_json_decode_string},
+};
+
+static int
+decode_rpc_listen_address(const struct spdk_json_val *val, void *out)
+{
+ struct rpc_listen_address *req = (struct rpc_listen_address *)out;
+ if (spdk_json_decode_object(val, rpc_listen_address_decoders,
+ SPDK_COUNTOF(rpc_listen_address_decoders),
+ req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ return -1;
+ }
+ return 0;
+}
+
+static void
+free_rpc_listen_address(struct rpc_listen_address *r)
+{
+ free(r->transport);
+ free(r->adrfam);
+ free(r->traddr);
+ free(r->trsvcid);
+}
+
+enum nvmf_rpc_listen_op {
+ NVMF_RPC_LISTEN_ADD,
+ NVMF_RPC_LISTEN_REMOVE,
+};
+
+struct nvmf_rpc_listener_ctx {
+ char *nqn;
+ char *tgt_name;
+ struct spdk_nvmf_tgt *tgt;
+ struct spdk_nvmf_subsystem *subsystem;
+ struct rpc_listen_address address;
+
+ struct spdk_jsonrpc_request *request;
+ struct spdk_nvme_transport_id trid;
+ enum nvmf_rpc_listen_op op;
+ bool response_sent;
+};
+
+static const struct spdk_json_object_decoder nvmf_rpc_listener_decoder[] = {
+ {"nqn", offsetof(struct nvmf_rpc_listener_ctx, nqn), spdk_json_decode_string},
+ {"listen_address", offsetof(struct nvmf_rpc_listener_ctx, address), decode_rpc_listen_address},
+ {"tgt_name", offsetof(struct nvmf_rpc_listener_ctx, tgt_name), spdk_json_decode_string, true},
+};
+
+static void
+nvmf_rpc_listener_ctx_free(struct nvmf_rpc_listener_ctx *ctx)
+{
+ free(ctx->nqn);
+ free(ctx->tgt_name);
+ free_rpc_listen_address(&ctx->address);
+ free(ctx);
+}
+
+static void
+nvmf_rpc_listen_resumed(struct spdk_nvmf_subsystem *subsystem,
+ void *cb_arg, int status)
+{
+ struct nvmf_rpc_listener_ctx *ctx = cb_arg;
+ struct spdk_jsonrpc_request *request;
+ struct spdk_json_write_ctx *w;
+
+ request = ctx->request;
+ if (ctx->response_sent) {
+ /* If an error occurred, the response has already been sent. */
+ nvmf_rpc_listener_ctx_free(ctx);
+ return;
+ }
+
+ nvmf_rpc_listener_ctx_free(ctx);
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+nvmf_rpc_subsystem_listen(void *cb_arg, int status)
+{
+ struct nvmf_rpc_listener_ctx *ctx = cb_arg;
+
+ if (status) {
+ /* Destroy the listener that we just created. Ignore the error code because
+ * the RPC is failing already anyway. */
+ spdk_nvmf_tgt_stop_listen(ctx->tgt, &ctx->trid);
+
+ spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Invalid parameters");
+ ctx->response_sent = true;
+ }
+
+ if (spdk_nvmf_subsystem_resume(ctx->subsystem, nvmf_rpc_listen_resumed, ctx)) {
+ if (!ctx->response_sent) {
+ spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+ }
+ nvmf_rpc_listener_ctx_free(ctx);
+ /* Can't really do anything to recover here - subsystem will remain paused. */
+ }
+}
+
+static void
+nvmf_rpc_listen_paused(struct spdk_nvmf_subsystem *subsystem,
+ void *cb_arg, int status)
+{
+ struct nvmf_rpc_listener_ctx *ctx = cb_arg;
+ int rc;
+
+ if (ctx->op == NVMF_RPC_LISTEN_ADD) {
+ if (!nvmf_subsystem_find_listener(subsystem, &ctx->trid)) {
+ rc = spdk_nvmf_tgt_listen(ctx->tgt, &ctx->trid);
+ if (rc == 0) {
+ spdk_nvmf_subsystem_add_listener(ctx->subsystem, &ctx->trid, nvmf_rpc_subsystem_listen, ctx);
+ return;
+ }
+
+ spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Invalid parameters");
+ ctx->response_sent = true;
+ }
+ } else if (ctx->op == NVMF_RPC_LISTEN_REMOVE) {
+ if (spdk_nvmf_subsystem_remove_listener(subsystem, &ctx->trid)) {
+ SPDK_ERRLOG("Unable to remove listener.\n");
+ spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Invalid parameters");
+ ctx->response_sent = true;
+ }
+ spdk_nvmf_tgt_stop_listen(ctx->tgt, &ctx->trid);
+ } else {
+ SPDK_UNREACHABLE();
+ }
+
+ if (spdk_nvmf_subsystem_resume(subsystem, nvmf_rpc_listen_resumed, ctx)) {
+ if (!ctx->response_sent) {
+ spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+ }
+ nvmf_rpc_listener_ctx_free(ctx);
+ /* Can't really do anything to recover here - subsystem will remain paused. */
+ }
+}
+
+static int
+rpc_listen_address_to_trid(const struct rpc_listen_address *address,
+ struct spdk_nvme_transport_id *trid)
+{
+ size_t len;
+
+ memset(trid, 0, sizeof(*trid));
+
+ if (spdk_nvme_transport_id_populate_trstring(trid, address->transport)) {
+ SPDK_ERRLOG("Invalid transport string: %s\n", address->transport);
+ return -EINVAL;
+ }
+
+ if (spdk_nvme_transport_id_parse_trtype(&trid->trtype, address->transport)) {
+ SPDK_ERRLOG("Invalid transport type: %s\n", address->transport);
+ return -EINVAL;
+ }
+
+ if (address->adrfam) {
+ if (spdk_nvme_transport_id_parse_adrfam(&trid->adrfam, address->adrfam)) {
+ SPDK_ERRLOG("Invalid adrfam: %s\n", address->adrfam);
+ return -EINVAL;
+ }
+ } else {
+ trid->adrfam = SPDK_NVMF_ADRFAM_IPV4;
+ }
+
+ len = strlen(address->traddr);
+ if (len > sizeof(trid->traddr) - 1) {
+ SPDK_ERRLOG("Transport address longer than %zu characters: %s\n",
+ sizeof(trid->traddr) - 1, address->traddr);
+ return -EINVAL;
+ }
+ memcpy(trid->traddr, address->traddr, len + 1);
+
+ len = strlen(address->trsvcid);
+ if (len > sizeof(trid->trsvcid) - 1) {
+ SPDK_ERRLOG("Transport service id longer than %zu characters: %s\n",
+ sizeof(trid->trsvcid) - 1, address->trsvcid);
+ return -EINVAL;
+ }
+ memcpy(trid->trsvcid, address->trsvcid, len + 1);
+
+ return 0;
+}
+
+static void
+rpc_nvmf_subsystem_add_listener(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct nvmf_rpc_listener_ctx *ctx;
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_tgt *tgt;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory");
+ return;
+ }
+
+ ctx->request = request;
+
+ if (spdk_json_decode_object(params, nvmf_rpc_listener_decoder,
+ SPDK_COUNTOF(nvmf_rpc_listener_decoder),
+ ctx)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ nvmf_rpc_listener_ctx_free(ctx);
+ return;
+ }
+
+ tgt = spdk_nvmf_get_tgt(ctx->tgt_name);
+ if (!tgt) {
+ SPDK_ERRLOG("Unable to find a target object.\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Unable to find a target.");
+ nvmf_rpc_listener_ctx_free(ctx);
+ return;
+ }
+ ctx->tgt = tgt;
+
+ subsystem = spdk_nvmf_tgt_find_subsystem(tgt, ctx->nqn);
+ if (!subsystem) {
+ SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ nvmf_rpc_listener_ctx_free(ctx);
+ return;
+ }
+
+ ctx->subsystem = subsystem;
+
+ if (rpc_listen_address_to_trid(&ctx->address, &ctx->trid)) {
+ spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Invalid parameters");
+ nvmf_rpc_listener_ctx_free(ctx);
+ return;
+ }
+
+ ctx->op = NVMF_RPC_LISTEN_ADD;
+
+ if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_listen_paused, ctx)) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+ nvmf_rpc_listener_ctx_free(ctx);
+ }
+}
+SPDK_RPC_REGISTER("nvmf_subsystem_add_listener", rpc_nvmf_subsystem_add_listener,
+ SPDK_RPC_RUNTIME);
+
+static void
+rpc_nvmf_subsystem_remove_listener(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct nvmf_rpc_listener_ctx *ctx;
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_tgt *tgt;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory");
+ return;
+ }
+
+ ctx->request = request;
+
+ if (spdk_json_decode_object(params, nvmf_rpc_listener_decoder,
+ SPDK_COUNTOF(nvmf_rpc_listener_decoder),
+ ctx)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ nvmf_rpc_listener_ctx_free(ctx);
+ return;
+ }
+
+ tgt = spdk_nvmf_get_tgt(ctx->tgt_name);
+ if (!tgt) {
+ SPDK_ERRLOG("Unable to find a target object.\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Unable to find a target.");
+ nvmf_rpc_listener_ctx_free(ctx);
+ return;
+ }
+ ctx->tgt = tgt;
+
+ subsystem = spdk_nvmf_tgt_find_subsystem(tgt, ctx->nqn);
+ if (!subsystem) {
+ SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ nvmf_rpc_listener_ctx_free(ctx);
+ return;
+ }
+
+ ctx->subsystem = subsystem;
+
+ if (rpc_listen_address_to_trid(&ctx->address, &ctx->trid)) {
+ spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Invalid parameters");
+ nvmf_rpc_listener_ctx_free(ctx);
+ return;
+ }
+
+ ctx->op = NVMF_RPC_LISTEN_REMOVE;
+
+ if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_listen_paused, ctx)) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+ nvmf_rpc_listener_ctx_free(ctx);
+ }
+}
+SPDK_RPC_REGISTER("nvmf_subsystem_remove_listener", rpc_nvmf_subsystem_remove_listener,
+ SPDK_RPC_RUNTIME);
+
+struct spdk_nvmf_ns_params {
+ char *bdev_name;
+ char *ptpl_file;
+ uint32_t nsid;
+ char nguid[16];
+ char eui64[8];
+ struct spdk_uuid uuid;
+};
+
+struct rpc_namespaces {
+ size_t num_ns;
+ struct spdk_nvmf_ns_params ns_params[RPC_MAX_NAMESPACES];
+};
+
+
+static const struct spdk_json_object_decoder rpc_ns_params_decoders[] = {
+ {"nsid", offsetof(struct spdk_nvmf_ns_params, nsid), spdk_json_decode_uint32, true},
+ {"bdev_name", offsetof(struct spdk_nvmf_ns_params, bdev_name), spdk_json_decode_string},
+ {"ptpl_file", offsetof(struct spdk_nvmf_ns_params, ptpl_file), spdk_json_decode_string, true},
+ {"nguid", offsetof(struct spdk_nvmf_ns_params, nguid), decode_ns_nguid, true},
+ {"eui64", offsetof(struct spdk_nvmf_ns_params, eui64), decode_ns_eui64, true},
+ {"uuid", offsetof(struct spdk_nvmf_ns_params, uuid), decode_ns_uuid, true},
+};
+
+static int
+decode_rpc_ns_params(const struct spdk_json_val *val, void *out)
+{
+ struct spdk_nvmf_ns_params *ns_params = out;
+
+ return spdk_json_decode_object(val, rpc_ns_params_decoders,
+ SPDK_COUNTOF(rpc_ns_params_decoders),
+ ns_params);
+}
+
+struct nvmf_rpc_ns_ctx {
+ char *nqn;
+ char *tgt_name;
+ struct spdk_nvmf_ns_params ns_params;
+
+ struct spdk_jsonrpc_request *request;
+ bool response_sent;
+};
+
+static const struct spdk_json_object_decoder nvmf_rpc_subsystem_ns_decoder[] = {
+ {"nqn", offsetof(struct nvmf_rpc_ns_ctx, nqn), spdk_json_decode_string},
+ {"namespace", offsetof(struct nvmf_rpc_ns_ctx, ns_params), decode_rpc_ns_params},
+ {"tgt_name", offsetof(struct nvmf_rpc_ns_ctx, tgt_name), spdk_json_decode_string, true},
+};
+
+static void
+nvmf_rpc_ns_ctx_free(struct nvmf_rpc_ns_ctx *ctx)
+{
+ free(ctx->nqn);
+ free(ctx->tgt_name);
+ free(ctx->ns_params.bdev_name);
+ free(ctx->ns_params.ptpl_file);
+ free(ctx);
+}
+
+static void
+nvmf_rpc_ns_resumed(struct spdk_nvmf_subsystem *subsystem,
+ void *cb_arg, int status)
+{
+ struct nvmf_rpc_ns_ctx *ctx = cb_arg;
+ struct spdk_jsonrpc_request *request = ctx->request;
+ uint32_t nsid = ctx->ns_params.nsid;
+ bool response_sent = ctx->response_sent;
+ struct spdk_json_write_ctx *w;
+
+ nvmf_rpc_ns_ctx_free(ctx);
+
+ if (response_sent) {
+ return;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_uint32(w, nsid);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+nvmf_rpc_ns_paused(struct spdk_nvmf_subsystem *subsystem,
+ void *cb_arg, int status)
+{
+ struct nvmf_rpc_ns_ctx *ctx = cb_arg;
+ struct spdk_nvmf_ns_opts ns_opts;
+ struct spdk_bdev *bdev;
+
+ bdev = spdk_bdev_get_by_name(ctx->ns_params.bdev_name);
+ if (!bdev) {
+ SPDK_ERRLOG("No bdev with name %s\n", ctx->ns_params.bdev_name);
+ spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Invalid parameters");
+ ctx->response_sent = true;
+ goto resume;
+ }
+
+ spdk_nvmf_ns_opts_get_defaults(&ns_opts, sizeof(ns_opts));
+ ns_opts.nsid = ctx->ns_params.nsid;
+
+ SPDK_STATIC_ASSERT(sizeof(ns_opts.nguid) == sizeof(ctx->ns_params.nguid), "size mismatch");
+ memcpy(ns_opts.nguid, ctx->ns_params.nguid, sizeof(ns_opts.nguid));
+
+ SPDK_STATIC_ASSERT(sizeof(ns_opts.eui64) == sizeof(ctx->ns_params.eui64), "size mismatch");
+ memcpy(ns_opts.eui64, ctx->ns_params.eui64, sizeof(ns_opts.eui64));
+
+ if (!spdk_mem_all_zero(&ctx->ns_params.uuid, sizeof(ctx->ns_params.uuid))) {
+ ns_opts.uuid = ctx->ns_params.uuid;
+ }
+
+ ctx->ns_params.nsid = spdk_nvmf_subsystem_add_ns(subsystem, bdev, &ns_opts, sizeof(ns_opts),
+ ctx->ns_params.ptpl_file);
+ if (ctx->ns_params.nsid == 0) {
+ SPDK_ERRLOG("Unable to add namespace\n");
+ spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Invalid parameters");
+ ctx->response_sent = true;
+ goto resume;
+ }
+
+resume:
+ if (spdk_nvmf_subsystem_resume(subsystem, nvmf_rpc_ns_resumed, ctx)) {
+ spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+ nvmf_rpc_ns_ctx_free(ctx);
+ }
+}
+
+static void
+rpc_nvmf_subsystem_add_ns(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct nvmf_rpc_ns_ctx *ctx;
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_tgt *tgt;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory");
+ return;
+ }
+
+ if (spdk_json_decode_object(params, nvmf_rpc_subsystem_ns_decoder,
+ SPDK_COUNTOF(nvmf_rpc_subsystem_ns_decoder),
+ ctx)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ nvmf_rpc_ns_ctx_free(ctx);
+ return;
+ }
+
+ ctx->request = request;
+ ctx->response_sent = false;
+
+ tgt = spdk_nvmf_get_tgt(ctx->tgt_name);
+ if (!tgt) {
+ SPDK_ERRLOG("Unable to find a target object.\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Unable to find a target.");
+ nvmf_rpc_ns_ctx_free(ctx);
+ return;
+ }
+
+ subsystem = spdk_nvmf_tgt_find_subsystem(tgt, ctx->nqn);
+ if (!subsystem) {
+ SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ nvmf_rpc_ns_ctx_free(ctx);
+ return;
+ }
+
+ if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_ns_paused, ctx)) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+ nvmf_rpc_ns_ctx_free(ctx);
+ }
+}
+SPDK_RPC_REGISTER("nvmf_subsystem_add_ns", rpc_nvmf_subsystem_add_ns, SPDK_RPC_RUNTIME)
+
+struct nvmf_rpc_remove_ns_ctx {
+ char *nqn;
+ char *tgt_name;
+ uint32_t nsid;
+
+ struct spdk_jsonrpc_request *request;
+ bool response_sent;
+};
+
+static const struct spdk_json_object_decoder nvmf_rpc_subsystem_remove_ns_decoder[] = {
+ {"nqn", offsetof(struct nvmf_rpc_remove_ns_ctx, nqn), spdk_json_decode_string},
+ {"nsid", offsetof(struct nvmf_rpc_remove_ns_ctx, nsid), spdk_json_decode_uint32},
+ {"tgt_name", offsetof(struct nvmf_rpc_remove_ns_ctx, tgt_name), spdk_json_decode_string, true},
+};
+
+static void
+nvmf_rpc_remove_ns_ctx_free(struct nvmf_rpc_remove_ns_ctx *ctx)
+{
+ free(ctx->nqn);
+ free(ctx->tgt_name);
+ free(ctx);
+}
+
+static void
+nvmf_rpc_remove_ns_resumed(struct spdk_nvmf_subsystem *subsystem,
+ void *cb_arg, int status)
+{
+ struct nvmf_rpc_remove_ns_ctx *ctx = cb_arg;
+ struct spdk_jsonrpc_request *request = ctx->request;
+ bool response_sent = ctx->response_sent;
+ struct spdk_json_write_ctx *w;
+
+ nvmf_rpc_remove_ns_ctx_free(ctx);
+
+ if (response_sent) {
+ return;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+nvmf_rpc_remove_ns_paused(struct spdk_nvmf_subsystem *subsystem,
+ void *cb_arg, int status)
+{
+ struct nvmf_rpc_remove_ns_ctx *ctx = cb_arg;
+ int ret;
+
+ ret = spdk_nvmf_subsystem_remove_ns(subsystem, ctx->nsid);
+ if (ret < 0) {
+ SPDK_ERRLOG("Unable to remove namespace ID %u\n", ctx->nsid);
+ spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Invalid parameters");
+ ctx->response_sent = true;
+ }
+
+ if (spdk_nvmf_subsystem_resume(subsystem, nvmf_rpc_remove_ns_resumed, ctx)) {
+ if (!ctx->response_sent) {
+ spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+ }
+ nvmf_rpc_remove_ns_ctx_free(ctx);
+ }
+}
+
+static void
+rpc_nvmf_subsystem_remove_ns(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct nvmf_rpc_remove_ns_ctx *ctx;
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_tgt *tgt;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory");
+ return;
+ }
+
+ if (spdk_json_decode_object(params, nvmf_rpc_subsystem_remove_ns_decoder,
+ SPDK_COUNTOF(nvmf_rpc_subsystem_remove_ns_decoder),
+ ctx)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ nvmf_rpc_remove_ns_ctx_free(ctx);
+ return;
+ }
+
+ tgt = spdk_nvmf_get_tgt(ctx->tgt_name);
+ if (!tgt) {
+ SPDK_ERRLOG("Unable to find a target object.\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Unable to find a target.");
+ nvmf_rpc_remove_ns_ctx_free(ctx);
+ return;
+ }
+
+ ctx->request = request;
+ ctx->response_sent = false;
+
+ subsystem = spdk_nvmf_tgt_find_subsystem(tgt, ctx->nqn);
+ if (!subsystem) {
+ SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ nvmf_rpc_remove_ns_ctx_free(ctx);
+ return;
+ }
+
+ if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_remove_ns_paused, ctx)) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+ nvmf_rpc_remove_ns_ctx_free(ctx);
+ }
+}
+SPDK_RPC_REGISTER("nvmf_subsystem_remove_ns", rpc_nvmf_subsystem_remove_ns, SPDK_RPC_RUNTIME)
+
+enum nvmf_rpc_host_op {
+ NVMF_RPC_HOST_ADD,
+ NVMF_RPC_HOST_REMOVE,
+ NVMF_RPC_HOST_ALLOW_ANY,
+};
+
+struct nvmf_rpc_host_ctx {
+ struct spdk_jsonrpc_request *request;
+
+ char *nqn;
+ char *host;
+ char *tgt_name;
+
+ enum nvmf_rpc_host_op op;
+
+ bool allow_any_host;
+
+ bool response_sent;
+};
+
+static const struct spdk_json_object_decoder nvmf_rpc_subsystem_host_decoder[] = {
+ {"nqn", offsetof(struct nvmf_rpc_host_ctx, nqn), spdk_json_decode_string},
+ {"host", offsetof(struct nvmf_rpc_host_ctx, host), spdk_json_decode_string},
+ {"tgt_name", offsetof(struct nvmf_rpc_host_ctx, tgt_name), spdk_json_decode_string, true},
+};
+
+static void
+nvmf_rpc_host_ctx_free(struct nvmf_rpc_host_ctx *ctx)
+{
+ free(ctx->nqn);
+ free(ctx->host);
+ free(ctx->tgt_name);
+ free(ctx);
+}
+
+static void
+nvmf_rpc_host_resumed(struct spdk_nvmf_subsystem *subsystem,
+ void *cb_arg, int status)
+{
+ struct nvmf_rpc_host_ctx *ctx = cb_arg;
+ struct spdk_jsonrpc_request *request;
+ struct spdk_json_write_ctx *w;
+ bool response_sent = ctx->response_sent;
+
+ request = ctx->request;
+ nvmf_rpc_host_ctx_free(ctx);
+
+ if (response_sent) {
+ return;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+nvmf_rpc_host_paused(struct spdk_nvmf_subsystem *subsystem,
+ void *cb_arg, int status)
+{
+ struct nvmf_rpc_host_ctx *ctx = cb_arg;
+ int rc = -1;
+
+ switch (ctx->op) {
+ case NVMF_RPC_HOST_ADD:
+ rc = spdk_nvmf_subsystem_add_host(subsystem, ctx->host);
+ break;
+ case NVMF_RPC_HOST_REMOVE:
+ rc = spdk_nvmf_subsystem_remove_host(subsystem, ctx->host);
+ break;
+ case NVMF_RPC_HOST_ALLOW_ANY:
+ rc = spdk_nvmf_subsystem_set_allow_any_host(subsystem, ctx->allow_any_host);
+ break;
+ }
+
+ if (rc != 0) {
+ spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+ ctx->response_sent = true;
+ }
+
+ if (spdk_nvmf_subsystem_resume(subsystem, nvmf_rpc_host_resumed, ctx)) {
+ if (!ctx->response_sent) {
+ spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+ }
+ nvmf_rpc_host_ctx_free(ctx);
+ }
+}
+
+static void
+rpc_nvmf_subsystem_add_host(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct nvmf_rpc_host_ctx *ctx;
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_tgt *tgt;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory");
+ return;
+ }
+
+ if (spdk_json_decode_object(params, nvmf_rpc_subsystem_host_decoder,
+ SPDK_COUNTOF(nvmf_rpc_subsystem_host_decoder),
+ ctx)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ nvmf_rpc_host_ctx_free(ctx);
+ return;
+ }
+
+ tgt = spdk_nvmf_get_tgt(ctx->tgt_name);
+ if (!tgt) {
+ SPDK_ERRLOG("Unable to find a target object.\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Unable to find a target.");
+ nvmf_rpc_host_ctx_free(ctx);
+ return;
+ }
+
+ ctx->request = request;
+ ctx->op = NVMF_RPC_HOST_ADD;
+ ctx->response_sent = false;
+
+ subsystem = spdk_nvmf_tgt_find_subsystem(tgt, ctx->nqn);
+ if (!subsystem) {
+ SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ nvmf_rpc_host_ctx_free(ctx);
+ return;
+ }
+
+ if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_host_paused, ctx)) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+ nvmf_rpc_host_ctx_free(ctx);
+ }
+}
+SPDK_RPC_REGISTER("nvmf_subsystem_add_host", rpc_nvmf_subsystem_add_host, SPDK_RPC_RUNTIME)
+
+static void
+rpc_nvmf_subsystem_remove_host(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct nvmf_rpc_host_ctx *ctx;
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_tgt *tgt;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory");
+ return;
+ }
+
+ if (spdk_json_decode_object(params, nvmf_rpc_subsystem_host_decoder,
+ SPDK_COUNTOF(nvmf_rpc_subsystem_host_decoder),
+ ctx)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ nvmf_rpc_host_ctx_free(ctx);
+ return;
+ }
+
+ tgt = spdk_nvmf_get_tgt(ctx->tgt_name);
+ if (!tgt) {
+ SPDK_ERRLOG("Unable to find a target object.\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Unable to find a target.");
+ nvmf_rpc_host_ctx_free(ctx);
+ return;
+ }
+
+ ctx->request = request;
+ ctx->op = NVMF_RPC_HOST_REMOVE;
+ ctx->response_sent = false;
+
+ subsystem = spdk_nvmf_tgt_find_subsystem(tgt, ctx->nqn);
+ if (!subsystem) {
+ SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ nvmf_rpc_host_ctx_free(ctx);
+ return;
+ }
+
+ if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_host_paused, ctx)) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+ nvmf_rpc_host_ctx_free(ctx);
+ }
+}
+SPDK_RPC_REGISTER("nvmf_subsystem_remove_host", rpc_nvmf_subsystem_remove_host,
+ SPDK_RPC_RUNTIME)
+
+
+static const struct spdk_json_object_decoder nvmf_rpc_subsystem_any_host_decoder[] = {
+ {"nqn", offsetof(struct nvmf_rpc_host_ctx, nqn), spdk_json_decode_string},
+ {"allow_any_host", offsetof(struct nvmf_rpc_host_ctx, allow_any_host), spdk_json_decode_bool},
+ {"tgt_name", offsetof(struct nvmf_rpc_host_ctx, tgt_name), spdk_json_decode_string, true},
+};
+
+static void
+rpc_nvmf_subsystem_allow_any_host(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct nvmf_rpc_host_ctx *ctx;
+ struct spdk_nvmf_subsystem *subsystem;
+ struct spdk_nvmf_tgt *tgt;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory");
+ return;
+ }
+
+ if (spdk_json_decode_object(params, nvmf_rpc_subsystem_any_host_decoder,
+ SPDK_COUNTOF(nvmf_rpc_subsystem_any_host_decoder),
+ ctx)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ nvmf_rpc_host_ctx_free(ctx);
+ return;
+ }
+
+ tgt = spdk_nvmf_get_tgt(ctx->tgt_name);
+ if (!tgt) {
+ SPDK_ERRLOG("Unable to find a target object.\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Unable to find a target.");
+ nvmf_rpc_host_ctx_free(ctx);
+ return;
+ }
+
+ ctx->request = request;
+ ctx->op = NVMF_RPC_HOST_ALLOW_ANY;
+ ctx->response_sent = false;
+
+ subsystem = spdk_nvmf_tgt_find_subsystem(tgt, ctx->nqn);
+ if (!subsystem) {
+ SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ nvmf_rpc_host_ctx_free(ctx);
+ return;
+ }
+
+ if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_host_paused, ctx)) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+ nvmf_rpc_host_ctx_free(ctx);
+ }
+}
+SPDK_RPC_REGISTER("nvmf_subsystem_allow_any_host", rpc_nvmf_subsystem_allow_any_host,
+ SPDK_RPC_RUNTIME)
+
+struct nvmf_rpc_target_ctx {
+ char *name;
+ uint32_t max_subsystems;
+};
+
+static const struct spdk_json_object_decoder nvmf_rpc_create_target_decoder[] = {
+ {"name", offsetof(struct nvmf_rpc_target_ctx, name), spdk_json_decode_string},
+ {"max_subsystems", offsetof(struct nvmf_rpc_target_ctx, max_subsystems), spdk_json_decode_uint32, true},
+};
+
+static void
+rpc_nvmf_create_target(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct spdk_nvmf_target_opts opts;
+ struct nvmf_rpc_target_ctx ctx = {0};
+ struct spdk_nvmf_tgt *tgt;
+ struct spdk_json_write_ctx *w;
+
+ /* Decode parameters the first time to get the transport type */
+ if (spdk_json_decode_object(params, nvmf_rpc_create_target_decoder,
+ SPDK_COUNTOF(nvmf_rpc_create_target_decoder),
+ &ctx)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ free(ctx.name);
+ return;
+ }
+
+ snprintf(opts.name, NVMF_TGT_NAME_MAX_LENGTH, "%s", ctx.name);
+ opts.max_subsystems = ctx.max_subsystems;
+
+ if (spdk_nvmf_get_tgt(opts.name) != NULL) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Target already exists.");
+ free(ctx.name);
+ return;
+ }
+
+ tgt = spdk_nvmf_tgt_create(&opts);
+
+ if (tgt == NULL) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Unable to create the requested target.");
+ free(ctx.name);
+ return;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_string(w, spdk_nvmf_tgt_get_name(tgt));
+ spdk_jsonrpc_end_result(request, w);
+ free(ctx.name);
+}
+SPDK_RPC_REGISTER("nvmf_create_target", rpc_nvmf_create_target, SPDK_RPC_RUNTIME);
+
+static const struct spdk_json_object_decoder nvmf_rpc_destroy_target_decoder[] = {
+ {"name", offsetof(struct nvmf_rpc_target_ctx, name), spdk_json_decode_string},
+};
+
+static void
+nvmf_rpc_destroy_target_done(void *ctx, int status)
+{
+ struct spdk_jsonrpc_request *request = ctx;
+ struct spdk_json_write_ctx *w;
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+rpc_nvmf_delete_target(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct nvmf_rpc_target_ctx ctx = {0};
+ struct spdk_nvmf_tgt *tgt;
+
+ /* Decode parameters the first time to get the transport type */
+ if (spdk_json_decode_object(params, nvmf_rpc_destroy_target_decoder,
+ SPDK_COUNTOF(nvmf_rpc_destroy_target_decoder),
+ &ctx)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ free(ctx.name);
+ return;
+ }
+
+ tgt = spdk_nvmf_get_tgt(ctx.name);
+
+ if (tgt == NULL) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "The specified target doesn't exist, cannot delete it.");
+ free(ctx.name);
+ return;
+ }
+
+ spdk_nvmf_tgt_destroy(tgt, nvmf_rpc_destroy_target_done, request);
+ free(ctx.name);
+}
+SPDK_RPC_REGISTER("nvmf_delete_target", rpc_nvmf_delete_target, SPDK_RPC_RUNTIME);
+
+static void
+rpc_nvmf_get_targets(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct spdk_json_write_ctx *w;
+ struct spdk_nvmf_tgt *tgt;
+ const char *name;
+
+ if (params != NULL) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "nvmf_get_targets has no parameters.");
+ return;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_array_begin(w);
+
+ tgt = spdk_nvmf_get_first_tgt();
+
+ while (tgt != NULL) {
+ name = spdk_nvmf_tgt_get_name(tgt);
+ spdk_json_write_string(w, name);
+ tgt = spdk_nvmf_get_next_tgt(tgt);
+ }
+
+ spdk_json_write_array_end(w);
+ spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("nvmf_get_targets", rpc_nvmf_get_targets, SPDK_RPC_RUNTIME);
+
+struct nvmf_rpc_create_transport_ctx {
+ char *trtype;
+ char *tgt_name;
+ struct spdk_nvmf_transport_opts opts;
+ struct spdk_jsonrpc_request *request;
+};
+
+/**
+ * `max_qpairs_per_ctrlr` represents both admin and IO qpairs, that confuses
+ * users when they configure a transport using RPC. So it was decided to
+ * deprecate `max_qpairs_per_ctrlr` RPC parameter and use `max_io_qpairs_per_ctrlr`
+ * But internal logic remains unchanged and SPDK expects that
+ * spdk_nvmf_transport_opts::max_qpairs_per_ctrlr includes an admin qpair.
+ * This function parses the number of IO qpairs and adds +1 for admin qpair.
+ */
+static int
+nvmf_rpc_decode_max_io_qpairs(const struct spdk_json_val *val, void *out)
+{
+ uint16_t *i = out;
+ int rc;
+
+ rc = spdk_json_number_to_uint16(val, i);
+ if (rc == 0) {
+ (*i)++;
+ }
+
+ return rc;
+}
+
+/**
+ * This function parses deprecated `max_qpairs_per_ctrlr` and warns the user to use
+ * the new parameter `max_io_qpairs_per_ctrlr`
+ */
+static int
+nvmf_rpc_decode_max_qpairs(const struct spdk_json_val *val, void *out)
+{
+ uint16_t *i = out;
+ int rc;
+
+ rc = spdk_json_number_to_uint16(val, i);
+ if (rc == 0) {
+ SPDK_WARNLOG("Parameter max_qpairs_per_ctrlr is deprecated, use max_io_qpairs_per_ctrlr instead.\n");
+ }
+
+ return rc;
+}
+
+static const struct spdk_json_object_decoder nvmf_rpc_create_transport_decoder[] = {
+ { "trtype", offsetof(struct nvmf_rpc_create_transport_ctx, trtype), spdk_json_decode_string},
+ {
+ "max_queue_depth", offsetof(struct nvmf_rpc_create_transport_ctx, opts.max_queue_depth),
+ spdk_json_decode_uint16, true
+ },
+ {
+ "max_qpairs_per_ctrlr", offsetof(struct nvmf_rpc_create_transport_ctx, opts.max_qpairs_per_ctrlr),
+ nvmf_rpc_decode_max_qpairs, true
+ },
+ {
+ "max_io_qpairs_per_ctrlr", offsetof(struct nvmf_rpc_create_transport_ctx, opts.max_qpairs_per_ctrlr),
+ nvmf_rpc_decode_max_io_qpairs, true
+ },
+ {
+ "in_capsule_data_size", offsetof(struct nvmf_rpc_create_transport_ctx, opts.in_capsule_data_size),
+ spdk_json_decode_uint32, true
+ },
+ {
+ "max_io_size", offsetof(struct nvmf_rpc_create_transport_ctx, opts.max_io_size),
+ spdk_json_decode_uint32, true
+ },
+ {
+ "io_unit_size", offsetof(struct nvmf_rpc_create_transport_ctx, opts.io_unit_size),
+ spdk_json_decode_uint32, true
+ },
+ {
+ "max_aq_depth", offsetof(struct nvmf_rpc_create_transport_ctx, opts.max_aq_depth),
+ spdk_json_decode_uint32, true
+ },
+ {
+ "num_shared_buffers", offsetof(struct nvmf_rpc_create_transport_ctx, opts.num_shared_buffers),
+ spdk_json_decode_uint32, true
+ },
+ {
+ "buf_cache_size", offsetof(struct nvmf_rpc_create_transport_ctx, opts.buf_cache_size),
+ spdk_json_decode_uint32, true
+ },
+ {
+ "max_srq_depth", offsetof(struct nvmf_rpc_create_transport_ctx, opts.max_srq_depth),
+ spdk_json_decode_uint32, true
+ },
+ {
+ "no_srq", offsetof(struct nvmf_rpc_create_transport_ctx, opts.no_srq),
+ spdk_json_decode_bool, true
+ },
+ {
+ "c2h_success", offsetof(struct nvmf_rpc_create_transport_ctx, opts.c2h_success),
+ spdk_json_decode_bool, true
+ },
+ {
+ "dif_insert_or_strip", offsetof(struct nvmf_rpc_create_transport_ctx, opts.dif_insert_or_strip),
+ spdk_json_decode_bool, true
+ },
+ {
+ "sock_priority", offsetof(struct nvmf_rpc_create_transport_ctx, opts.sock_priority),
+ spdk_json_decode_uint32, true
+ },
+ {
+ "acceptor_backlog", offsetof(struct nvmf_rpc_create_transport_ctx, opts.acceptor_backlog),
+ spdk_json_decode_int32, true
+ },
+ {
+ "abort_timeout_sec", offsetof(struct nvmf_rpc_create_transport_ctx, opts.abort_timeout_sec),
+ spdk_json_decode_uint32, true
+ },
+ {
+ "tgt_name", offsetof(struct nvmf_rpc_create_transport_ctx, tgt_name),
+ spdk_json_decode_string, true
+ },
+};
+
+static void
+nvmf_rpc_create_transport_ctx_free(struct nvmf_rpc_create_transport_ctx *ctx)
+{
+ free(ctx->trtype);
+ free(ctx->tgt_name);
+ free(ctx);
+}
+
+static void
+nvmf_rpc_tgt_add_transport_done(void *cb_arg, int status)
+{
+ struct nvmf_rpc_create_transport_ctx *ctx = cb_arg;
+ struct spdk_jsonrpc_request *request;
+ struct spdk_json_write_ctx *w;
+
+ request = ctx->request;
+ nvmf_rpc_create_transport_ctx_free(ctx);
+
+ if (status) {
+ SPDK_ERRLOG("Failed to add transport to tgt.(%d)\n", status);
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Failed to add transport to tgt.(%d)\n",
+ status);
+ return;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+rpc_nvmf_create_transport(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct nvmf_rpc_create_transport_ctx *ctx;
+ enum spdk_nvme_transport_type trtype;
+ struct spdk_nvmf_transport *transport;
+ struct spdk_nvmf_tgt *tgt;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory");
+ return;
+ }
+
+ /* Decode parameters the first time to get the transport type */
+ if (spdk_json_decode_object(params, nvmf_rpc_create_transport_decoder,
+ SPDK_COUNTOF(nvmf_rpc_create_transport_decoder),
+ ctx)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ nvmf_rpc_create_transport_ctx_free(ctx);
+ return;
+ }
+
+ tgt = spdk_nvmf_get_tgt(ctx->tgt_name);
+ if (!tgt) {
+ SPDK_ERRLOG("Unable to find a target object.\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Unable to find a target.");
+ nvmf_rpc_create_transport_ctx_free(ctx);
+ return;
+ }
+
+ if (spdk_nvme_transport_id_parse_trtype(&trtype, ctx->trtype)) {
+ SPDK_ERRLOG("Invalid transport type '%s'\n", ctx->trtype);
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Invalid transport type '%s'\n", ctx->trtype);
+ nvmf_rpc_create_transport_ctx_free(ctx);
+ return;
+ }
+
+ /* Initialize all the transport options (based on transport type) and decode the
+ * parameters again to update any options passed in rpc create transport call.
+ */
+ if (!spdk_nvmf_transport_opts_init(ctx->trtype, &ctx->opts)) {
+ /* This can happen if user specifies PCIE transport type which isn't valid for
+ * NVMe-oF.
+ */
+ SPDK_ERRLOG("Invalid transport type '%s'\n", ctx->trtype);
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Invalid transport type '%s'\n", ctx->trtype);
+ nvmf_rpc_create_transport_ctx_free(ctx);
+ return;
+ }
+
+ if (spdk_json_decode_object(params, nvmf_rpc_create_transport_decoder,
+ SPDK_COUNTOF(nvmf_rpc_create_transport_decoder),
+ ctx)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ nvmf_rpc_create_transport_ctx_free(ctx);
+ return;
+ }
+
+ if (spdk_nvmf_tgt_get_transport(tgt, ctx->trtype)) {
+ SPDK_ERRLOG("Transport type '%s' already exists\n", ctx->trtype);
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Transport type '%s' already exists\n", ctx->trtype);
+ nvmf_rpc_create_transport_ctx_free(ctx);
+ return;
+ }
+
+ transport = spdk_nvmf_transport_create(ctx->trtype, &ctx->opts);
+
+ if (!transport) {
+ SPDK_ERRLOG("Transport type '%s' create failed\n", ctx->trtype);
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Transport type '%s' create failed\n", ctx->trtype);
+ nvmf_rpc_create_transport_ctx_free(ctx);
+ return;
+ }
+
+ /* add transport to target */
+ ctx->request = request;
+ spdk_nvmf_tgt_add_transport(tgt, transport, nvmf_rpc_tgt_add_transport_done, ctx);
+}
+SPDK_RPC_REGISTER("nvmf_create_transport", rpc_nvmf_create_transport, SPDK_RPC_RUNTIME)
+
+static void
+dump_nvmf_transport(struct spdk_json_write_ctx *w, struct spdk_nvmf_transport *transport)
+{
+ const struct spdk_nvmf_transport_opts *opts = spdk_nvmf_get_transport_opts(transport);
+ spdk_nvme_transport_type_t type = spdk_nvmf_get_transport_type(transport);
+
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "trtype", spdk_nvmf_get_transport_name(transport));
+ spdk_json_write_named_uint32(w, "max_queue_depth", opts->max_queue_depth);
+ spdk_json_write_named_uint32(w, "max_io_qpairs_per_ctrlr", opts->max_qpairs_per_ctrlr - 1);
+ spdk_json_write_named_uint32(w, "in_capsule_data_size", opts->in_capsule_data_size);
+ spdk_json_write_named_uint32(w, "max_io_size", opts->max_io_size);
+ spdk_json_write_named_uint32(w, "io_unit_size", opts->io_unit_size);
+ spdk_json_write_named_uint32(w, "max_aq_depth", opts->max_aq_depth);
+ spdk_json_write_named_uint32(w, "num_shared_buffers", opts->num_shared_buffers);
+ spdk_json_write_named_uint32(w, "buf_cache_size", opts->buf_cache_size);
+ spdk_json_write_named_bool(w, "dif_insert_or_strip", opts->dif_insert_or_strip);
+ if (type == SPDK_NVME_TRANSPORT_RDMA) {
+ spdk_json_write_named_uint32(w, "max_srq_depth", opts->max_srq_depth);
+ spdk_json_write_named_bool(w, "no_srq", opts->no_srq);
+ spdk_json_write_named_int32(w, "acceptor_backlog", opts->acceptor_backlog);
+ } else if (type == SPDK_NVME_TRANSPORT_TCP) {
+ spdk_json_write_named_bool(w, "c2h_success", opts->c2h_success);
+ spdk_json_write_named_uint32(w, "sock_priority", opts->sock_priority);
+ }
+ spdk_json_write_named_uint32(w, "abort_timeout_sec", opts->abort_timeout_sec);
+
+ spdk_json_write_object_end(w);
+}
+
+struct rpc_get_transport {
+ char *tgt_name;
+};
+
+static const struct spdk_json_object_decoder rpc_get_transport_decoders[] = {
+ {"tgt_name", offsetof(struct rpc_get_transport, tgt_name), spdk_json_decode_string, true},
+};
+
+static void
+rpc_nvmf_get_transports(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_get_transport req = { 0 };
+ struct spdk_json_write_ctx *w;
+ struct spdk_nvmf_transport *transport;
+ struct spdk_nvmf_tgt *tgt;
+
+ if (params) {
+ if (spdk_json_decode_object(params, rpc_get_transport_decoders,
+ SPDK_COUNTOF(rpc_get_transport_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ return;
+ }
+ }
+
+ tgt = spdk_nvmf_get_tgt(req.tgt_name);
+ if (!tgt) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Unable to find a target.");
+ free(req.tgt_name);
+ return;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_array_begin(w);
+ transport = spdk_nvmf_transport_get_first(tgt);
+ while (transport) {
+ dump_nvmf_transport(w, transport);
+ transport = spdk_nvmf_transport_get_next(transport);
+ }
+ spdk_json_write_array_end(w);
+ spdk_jsonrpc_end_result(request, w);
+ free(req.tgt_name);
+}
+SPDK_RPC_REGISTER("nvmf_get_transports", rpc_nvmf_get_transports, SPDK_RPC_RUNTIME)
+SPDK_RPC_REGISTER_ALIAS_DEPRECATED(nvmf_get_transports, get_nvmf_transports)
+
+struct rpc_nvmf_get_stats_ctx {
+ char *tgt_name;
+ struct spdk_nvmf_tgt *tgt;
+ struct spdk_jsonrpc_request *request;
+ struct spdk_json_write_ctx *w;
+};
+
+static const struct spdk_json_object_decoder rpc_get_stats_decoders[] = {
+ {"tgt_name", offsetof(struct rpc_nvmf_get_stats_ctx, tgt_name), spdk_json_decode_string, true},
+};
+
+static void
+free_get_stats_ctx(struct rpc_nvmf_get_stats_ctx *ctx)
+{
+ free(ctx->tgt_name);
+ free(ctx);
+}
+
+static void
+rpc_nvmf_get_stats_done(struct spdk_io_channel_iter *i, int status)
+{
+ struct rpc_nvmf_get_stats_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+
+ spdk_json_write_array_end(ctx->w);
+ spdk_json_write_object_end(ctx->w);
+ spdk_jsonrpc_end_result(ctx->request, ctx->w);
+ free_get_stats_ctx(ctx);
+}
+
+static void
+write_nvmf_transport_stats(struct spdk_json_write_ctx *w,
+ struct spdk_nvmf_transport_poll_group_stat *stat)
+{
+ uint64_t i;
+
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "trtype",
+ spdk_nvme_transport_id_trtype_str(stat->trtype));
+ switch (stat->trtype) {
+ case SPDK_NVME_TRANSPORT_RDMA:
+ spdk_json_write_named_uint64(w, "pending_data_buffer", stat->rdma.pending_data_buffer);
+ spdk_json_write_named_array_begin(w, "devices");
+ for (i = 0; i < stat->rdma.num_devices; ++i) {
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "name", stat->rdma.devices[i].name);
+ spdk_json_write_named_uint64(w, "polls", stat->rdma.devices[i].polls);
+ spdk_json_write_named_uint64(w, "completions", stat->rdma.devices[i].completions);
+ spdk_json_write_named_uint64(w, "requests",
+ stat->rdma.devices[i].requests);
+ spdk_json_write_named_uint64(w, "request_latency",
+ stat->rdma.devices[i].request_latency);
+ spdk_json_write_named_uint64(w, "pending_free_request",
+ stat->rdma.devices[i].pending_free_request);
+ spdk_json_write_named_uint64(w, "pending_rdma_read",
+ stat->rdma.devices[i].pending_rdma_read);
+ spdk_json_write_named_uint64(w, "pending_rdma_write",
+ stat->rdma.devices[i].pending_rdma_write);
+ spdk_json_write_object_end(w);
+ }
+ spdk_json_write_array_end(w);
+ break;
+ default:
+ break;
+ }
+ spdk_json_write_object_end(w);
+}
+
+static void
+_rpc_nvmf_get_stats(struct spdk_io_channel_iter *i)
+{
+ struct rpc_nvmf_get_stats_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+ struct spdk_nvmf_transport *transport;
+ struct spdk_nvmf_poll_group_stat stat;
+ struct spdk_nvmf_transport_poll_group_stat *trstat;
+ int rc;
+
+ if (0 == spdk_nvmf_poll_group_get_stat(ctx->tgt, &stat)) {
+ spdk_json_write_object_begin(ctx->w);
+ spdk_json_write_named_string(ctx->w, "name", spdk_thread_get_name(spdk_get_thread()));
+ spdk_json_write_named_uint32(ctx->w, "admin_qpairs", stat.admin_qpairs);
+ spdk_json_write_named_uint32(ctx->w, "io_qpairs", stat.io_qpairs);
+ spdk_json_write_named_uint64(ctx->w, "pending_bdev_io", stat.pending_bdev_io);
+
+ spdk_json_write_named_array_begin(ctx->w, "transports");
+ transport = spdk_nvmf_transport_get_first(ctx->tgt);
+ while (transport) {
+ rc = spdk_nvmf_transport_poll_group_get_stat(ctx->tgt, transport, &trstat);
+ if (0 == rc) {
+ write_nvmf_transport_stats(ctx->w, trstat);
+ spdk_nvmf_transport_poll_group_free_stat(transport, trstat);
+ } else if (-ENOTSUP != rc) {
+ SPDK_ERRLOG("Failed to get poll group statistics for transport %s, errno %d\n",
+ spdk_nvme_transport_id_trtype_str(spdk_nvmf_get_transport_type(transport)),
+ rc);
+ }
+ transport = spdk_nvmf_transport_get_next(transport);
+ }
+ spdk_json_write_array_end(ctx->w);
+ spdk_json_write_object_end(ctx->w);
+ }
+
+ spdk_for_each_channel_continue(i, 0);
+}
+
+
+static void
+rpc_nvmf_get_stats(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_nvmf_get_stats_ctx *ctx;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Memory allocation error");
+ return;
+ }
+ ctx->request = request;
+
+ if (params) {
+ if (spdk_json_decode_object(params, rpc_get_stats_decoders,
+ SPDK_COUNTOF(rpc_get_stats_decoders),
+ ctx)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ free_get_stats_ctx(ctx);
+ return;
+ }
+ }
+
+ ctx->tgt = spdk_nvmf_get_tgt(ctx->tgt_name);
+ if (!ctx->tgt) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Unable to find a target.");
+ free_get_stats_ctx(ctx);
+ return;
+ }
+
+ ctx->w = spdk_jsonrpc_begin_result(ctx->request);
+ spdk_json_write_object_begin(ctx->w);
+ spdk_json_write_named_uint64(ctx->w, "tick_rate", spdk_get_ticks_hz());
+ spdk_json_write_named_array_begin(ctx->w, "poll_groups");
+
+ spdk_for_each_channel(ctx->tgt,
+ _rpc_nvmf_get_stats,
+ ctx,
+ rpc_nvmf_get_stats_done);
+}
+
+SPDK_RPC_REGISTER("nvmf_get_stats", rpc_nvmf_get_stats, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/nvmf/rdma.c b/src/spdk/lib/nvmf/rdma.c
new file mode 100644
index 000000000..4a4de4374
--- /dev/null
+++ b/src/spdk/lib/nvmf/rdma.c
@@ -0,0 +1,4313 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/config.h"
+#include "spdk/thread.h"
+#include "spdk/likely.h"
+#include "spdk/nvmf_transport.h"
+#include "spdk/string.h"
+#include "spdk/trace.h"
+#include "spdk/util.h"
+
+#include "spdk_internal/assert.h"
+#include "spdk_internal/log.h"
+#include "spdk_internal/rdma.h"
+
+#include "nvmf_internal.h"
+
+struct spdk_nvme_rdma_hooks g_nvmf_hooks = {};
+const struct spdk_nvmf_transport_ops spdk_nvmf_transport_rdma;
+
+/*
+ RDMA Connection Resource Defaults
+ */
+#define NVMF_DEFAULT_TX_SGE SPDK_NVMF_MAX_SGL_ENTRIES
+#define NVMF_DEFAULT_RSP_SGE 1
+#define NVMF_DEFAULT_RX_SGE 2
+
+/* The RDMA completion queue size */
+#define DEFAULT_NVMF_RDMA_CQ_SIZE 4096
+#define MAX_WR_PER_QP(queue_depth) (queue_depth * 3 + 2)
+
+/* Timeout for destroying defunct rqpairs */
+#define NVMF_RDMA_QPAIR_DESTROY_TIMEOUT_US 4000000
+
+static int g_spdk_nvmf_ibv_query_mask =
+ IBV_QP_STATE |
+ IBV_QP_PKEY_INDEX |
+ IBV_QP_PORT |
+ IBV_QP_ACCESS_FLAGS |
+ IBV_QP_AV |
+ IBV_QP_PATH_MTU |
+ IBV_QP_DEST_QPN |
+ IBV_QP_RQ_PSN |
+ IBV_QP_MAX_DEST_RD_ATOMIC |
+ IBV_QP_MIN_RNR_TIMER |
+ IBV_QP_SQ_PSN |
+ IBV_QP_TIMEOUT |
+ IBV_QP_RETRY_CNT |
+ IBV_QP_RNR_RETRY |
+ IBV_QP_MAX_QP_RD_ATOMIC;
+
+enum spdk_nvmf_rdma_request_state {
+ /* The request is not currently in use */
+ RDMA_REQUEST_STATE_FREE = 0,
+
+ /* Initial state when request first received */
+ RDMA_REQUEST_STATE_NEW,
+
+ /* The request is queued until a data buffer is available. */
+ RDMA_REQUEST_STATE_NEED_BUFFER,
+
+ /* The request is waiting on RDMA queue depth availability
+ * to transfer data from the host to the controller.
+ */
+ RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING,
+
+ /* The request is currently transferring data from the host to the controller. */
+ RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
+
+ /* The request is ready to execute at the block device */
+ RDMA_REQUEST_STATE_READY_TO_EXECUTE,
+
+ /* The request is currently executing at the block device */
+ RDMA_REQUEST_STATE_EXECUTING,
+
+ /* The request finished executing at the block device */
+ RDMA_REQUEST_STATE_EXECUTED,
+
+ /* The request is waiting on RDMA queue depth availability
+ * to transfer data from the controller to the host.
+ */
+ RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING,
+
+ /* The request is ready to send a completion */
+ RDMA_REQUEST_STATE_READY_TO_COMPLETE,
+
+ /* The request is currently transferring data from the controller to the host. */
+ RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
+
+ /* The request currently has an outstanding completion without an
+ * associated data transfer.
+ */
+ RDMA_REQUEST_STATE_COMPLETING,
+
+ /* The request completed and can be marked free. */
+ RDMA_REQUEST_STATE_COMPLETED,
+
+ /* Terminator */
+ RDMA_REQUEST_NUM_STATES,
+};
+
+#define OBJECT_NVMF_RDMA_IO 0x40
+
+#define TRACE_GROUP_NVMF_RDMA 0x4
+#define TRACE_RDMA_REQUEST_STATE_NEW SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x0)
+#define TRACE_RDMA_REQUEST_STATE_NEED_BUFFER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x1)
+#define TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x2)
+#define TRACE_RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x3)
+#define TRACE_RDMA_REQUEST_STATE_READY_TO_EXECUTE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x4)
+#define TRACE_RDMA_REQUEST_STATE_EXECUTING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x5)
+#define TRACE_RDMA_REQUEST_STATE_EXECUTED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x6)
+#define TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x7)
+#define TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x8)
+#define TRACE_RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x9)
+#define TRACE_RDMA_REQUEST_STATE_COMPLETING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xA)
+#define TRACE_RDMA_REQUEST_STATE_COMPLETED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xB)
+#define TRACE_RDMA_QP_CREATE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xC)
+#define TRACE_RDMA_IBV_ASYNC_EVENT SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xD)
+#define TRACE_RDMA_CM_ASYNC_EVENT SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xE)
+#define TRACE_RDMA_QP_STATE_CHANGE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xF)
+#define TRACE_RDMA_QP_DISCONNECT SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x10)
+#define TRACE_RDMA_QP_DESTROY SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x11)
+
+SPDK_TRACE_REGISTER_FN(nvmf_trace, "nvmf_rdma", TRACE_GROUP_NVMF_RDMA)
+{
+ spdk_trace_register_object(OBJECT_NVMF_RDMA_IO, 'r');
+ spdk_trace_register_description("RDMA_REQ_NEW", TRACE_RDMA_REQUEST_STATE_NEW,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 1, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_NEED_BUFFER", TRACE_RDMA_REQUEST_STATE_NEED_BUFFER,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_TX_PENDING_C2H",
+ TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_TX_PENDING_H2C",
+ TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_TX_H2C",
+ TRACE_RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_RDY_TO_EXECUTE",
+ TRACE_RDMA_REQUEST_STATE_READY_TO_EXECUTE,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_EXECUTING",
+ TRACE_RDMA_REQUEST_STATE_EXECUTING,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_EXECUTED",
+ TRACE_RDMA_REQUEST_STATE_EXECUTED,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_RDY_TO_COMPL",
+ TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_COMPLETING_C2H",
+ TRACE_RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_COMPLETING",
+ TRACE_RDMA_REQUEST_STATE_COMPLETING,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+ spdk_trace_register_description("RDMA_REQ_COMPLETED",
+ TRACE_RDMA_REQUEST_STATE_COMPLETED,
+ OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: ");
+
+ spdk_trace_register_description("RDMA_QP_CREATE", TRACE_RDMA_QP_CREATE,
+ OWNER_NONE, OBJECT_NONE, 0, 0, "");
+ spdk_trace_register_description("RDMA_IBV_ASYNC_EVENT", TRACE_RDMA_IBV_ASYNC_EVENT,
+ OWNER_NONE, OBJECT_NONE, 0, 0, "type: ");
+ spdk_trace_register_description("RDMA_CM_ASYNC_EVENT", TRACE_RDMA_CM_ASYNC_EVENT,
+ OWNER_NONE, OBJECT_NONE, 0, 0, "type: ");
+ spdk_trace_register_description("RDMA_QP_STATE_CHANGE", TRACE_RDMA_QP_STATE_CHANGE,
+ OWNER_NONE, OBJECT_NONE, 0, 1, "state: ");
+ spdk_trace_register_description("RDMA_QP_DISCONNECT", TRACE_RDMA_QP_DISCONNECT,
+ OWNER_NONE, OBJECT_NONE, 0, 0, "");
+ spdk_trace_register_description("RDMA_QP_DESTROY", TRACE_RDMA_QP_DESTROY,
+ OWNER_NONE, OBJECT_NONE, 0, 0, "");
+}
+
+enum spdk_nvmf_rdma_wr_type {
+ RDMA_WR_TYPE_RECV,
+ RDMA_WR_TYPE_SEND,
+ RDMA_WR_TYPE_DATA,
+};
+
+struct spdk_nvmf_rdma_wr {
+ enum spdk_nvmf_rdma_wr_type type;
+};
+
+/* This structure holds commands as they are received off the wire.
+ * It must be dynamically paired with a full request object
+ * (spdk_nvmf_rdma_request) to service a request. It is separate
+ * from the request because RDMA does not appear to order
+ * completions, so occasionally we'll get a new incoming
+ * command when there aren't any free request objects.
+ */
+struct spdk_nvmf_rdma_recv {
+ struct ibv_recv_wr wr;
+ struct ibv_sge sgl[NVMF_DEFAULT_RX_SGE];
+
+ struct spdk_nvmf_rdma_qpair *qpair;
+
+ /* In-capsule data buffer */
+ uint8_t *buf;
+
+ struct spdk_nvmf_rdma_wr rdma_wr;
+ uint64_t receive_tsc;
+
+ STAILQ_ENTRY(spdk_nvmf_rdma_recv) link;
+};
+
+struct spdk_nvmf_rdma_request_data {
+ struct spdk_nvmf_rdma_wr rdma_wr;
+ struct ibv_send_wr wr;
+ struct ibv_sge sgl[SPDK_NVMF_MAX_SGL_ENTRIES];
+};
+
+struct spdk_nvmf_rdma_request {
+ struct spdk_nvmf_request req;
+
+ enum spdk_nvmf_rdma_request_state state;
+
+ struct spdk_nvmf_rdma_recv *recv;
+
+ struct {
+ struct spdk_nvmf_rdma_wr rdma_wr;
+ struct ibv_send_wr wr;
+ struct ibv_sge sgl[NVMF_DEFAULT_RSP_SGE];
+ } rsp;
+
+ struct spdk_nvmf_rdma_request_data data;
+
+ uint32_t iovpos;
+
+ uint32_t num_outstanding_data_wr;
+ uint64_t receive_tsc;
+
+ STAILQ_ENTRY(spdk_nvmf_rdma_request) state_link;
+};
+
+enum spdk_nvmf_rdma_qpair_disconnect_flags {
+ RDMA_QP_DISCONNECTING = 1,
+ RDMA_QP_RECV_DRAINED = 1 << 1,
+ RDMA_QP_SEND_DRAINED = 1 << 2
+};
+
+struct spdk_nvmf_rdma_resource_opts {
+ struct spdk_nvmf_rdma_qpair *qpair;
+ /* qp points either to an ibv_qp object or an ibv_srq object depending on the value of shared. */
+ void *qp;
+ struct ibv_pd *pd;
+ uint32_t max_queue_depth;
+ uint32_t in_capsule_data_size;
+ bool shared;
+};
+
+struct spdk_nvmf_send_wr_list {
+ struct ibv_send_wr *first;
+ struct ibv_send_wr *last;
+};
+
+struct spdk_nvmf_recv_wr_list {
+ struct ibv_recv_wr *first;
+ struct ibv_recv_wr *last;
+};
+
+struct spdk_nvmf_rdma_resources {
+ /* Array of size "max_queue_depth" containing RDMA requests. */
+ struct spdk_nvmf_rdma_request *reqs;
+
+ /* Array of size "max_queue_depth" containing RDMA recvs. */
+ struct spdk_nvmf_rdma_recv *recvs;
+
+ /* Array of size "max_queue_depth" containing 64 byte capsules
+ * used for receive.
+ */
+ union nvmf_h2c_msg *cmds;
+ struct ibv_mr *cmds_mr;
+
+ /* Array of size "max_queue_depth" containing 16 byte completions
+ * to be sent back to the user.
+ */
+ union nvmf_c2h_msg *cpls;
+ struct ibv_mr *cpls_mr;
+
+ /* Array of size "max_queue_depth * InCapsuleDataSize" containing
+ * buffers to be used for in capsule data.
+ */
+ void *bufs;
+ struct ibv_mr *bufs_mr;
+
+ /* The list of pending recvs to transfer */
+ struct spdk_nvmf_recv_wr_list recvs_to_post;
+
+ /* Receives that are waiting for a request object */
+ STAILQ_HEAD(, spdk_nvmf_rdma_recv) incoming_queue;
+
+ /* Queue to track free requests */
+ STAILQ_HEAD(, spdk_nvmf_rdma_request) free_queue;
+};
+
+typedef void (*spdk_nvmf_rdma_qpair_ibv_event)(struct spdk_nvmf_rdma_qpair *rqpair);
+
+struct spdk_nvmf_rdma_ibv_event_ctx {
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ spdk_nvmf_rdma_qpair_ibv_event cb_fn;
+ /* Link to other ibv events associated with this qpair */
+ STAILQ_ENTRY(spdk_nvmf_rdma_ibv_event_ctx) link;
+};
+
+struct spdk_nvmf_rdma_qpair {
+ struct spdk_nvmf_qpair qpair;
+
+ struct spdk_nvmf_rdma_device *device;
+ struct spdk_nvmf_rdma_poller *poller;
+
+ struct spdk_rdma_qp *rdma_qp;
+ struct rdma_cm_id *cm_id;
+ struct ibv_srq *srq;
+ struct rdma_cm_id *listen_id;
+
+ /* The maximum number of I/O outstanding on this connection at one time */
+ uint16_t max_queue_depth;
+
+ /* The maximum number of active RDMA READ and ATOMIC operations at one time */
+ uint16_t max_read_depth;
+
+ /* The maximum number of RDMA SEND operations at one time */
+ uint32_t max_send_depth;
+
+ /* The current number of outstanding WRs from this qpair's
+ * recv queue. Should not exceed device->attr.max_queue_depth.
+ */
+ uint16_t current_recv_depth;
+
+ /* The current number of active RDMA READ operations */
+ uint16_t current_read_depth;
+
+ /* The current number of posted WRs from this qpair's
+ * send queue. Should not exceed max_send_depth.
+ */
+ uint32_t current_send_depth;
+
+ /* The maximum number of SGEs per WR on the send queue */
+ uint32_t max_send_sge;
+
+ /* The maximum number of SGEs per WR on the recv queue */
+ uint32_t max_recv_sge;
+
+ struct spdk_nvmf_rdma_resources *resources;
+
+ STAILQ_HEAD(, spdk_nvmf_rdma_request) pending_rdma_read_queue;
+
+ STAILQ_HEAD(, spdk_nvmf_rdma_request) pending_rdma_write_queue;
+
+ /* Number of requests not in the free state */
+ uint32_t qd;
+
+ TAILQ_ENTRY(spdk_nvmf_rdma_qpair) link;
+
+ STAILQ_ENTRY(spdk_nvmf_rdma_qpair) recv_link;
+
+ STAILQ_ENTRY(spdk_nvmf_rdma_qpair) send_link;
+
+ /* IBV queue pair attributes: they are used to manage
+ * qp state and recover from errors.
+ */
+ enum ibv_qp_state ibv_state;
+
+ uint32_t disconnect_flags;
+
+ /* Poller registered in case the qpair doesn't properly
+ * complete the qpair destruct process and becomes defunct.
+ */
+
+ struct spdk_poller *destruct_poller;
+
+ /*
+ * io_channel which is used to destroy qpair when it is removed from poll group
+ */
+ struct spdk_io_channel *destruct_channel;
+
+ /* List of ibv async events */
+ STAILQ_HEAD(, spdk_nvmf_rdma_ibv_event_ctx) ibv_events;
+
+ /* There are several ways a disconnect can start on a qpair
+ * and they are not all mutually exclusive. It is important
+ * that we only initialize one of these paths.
+ */
+ bool disconnect_started;
+ /* Lets us know that we have received the last_wqe event. */
+ bool last_wqe_reached;
+};
+
+struct spdk_nvmf_rdma_poller_stat {
+ uint64_t completions;
+ uint64_t polls;
+ uint64_t requests;
+ uint64_t request_latency;
+ uint64_t pending_free_request;
+ uint64_t pending_rdma_read;
+ uint64_t pending_rdma_write;
+};
+
+struct spdk_nvmf_rdma_poller {
+ struct spdk_nvmf_rdma_device *device;
+ struct spdk_nvmf_rdma_poll_group *group;
+
+ int num_cqe;
+ int required_num_wr;
+ struct ibv_cq *cq;
+
+ /* The maximum number of I/O outstanding on the shared receive queue at one time */
+ uint16_t max_srq_depth;
+
+ /* Shared receive queue */
+ struct ibv_srq *srq;
+
+ struct spdk_nvmf_rdma_resources *resources;
+ struct spdk_nvmf_rdma_poller_stat stat;
+
+ TAILQ_HEAD(, spdk_nvmf_rdma_qpair) qpairs;
+
+ STAILQ_HEAD(, spdk_nvmf_rdma_qpair) qpairs_pending_recv;
+
+ STAILQ_HEAD(, spdk_nvmf_rdma_qpair) qpairs_pending_send;
+
+ TAILQ_ENTRY(spdk_nvmf_rdma_poller) link;
+};
+
+struct spdk_nvmf_rdma_poll_group_stat {
+ uint64_t pending_data_buffer;
+};
+
+struct spdk_nvmf_rdma_poll_group {
+ struct spdk_nvmf_transport_poll_group group;
+ struct spdk_nvmf_rdma_poll_group_stat stat;
+ TAILQ_HEAD(, spdk_nvmf_rdma_poller) pollers;
+ TAILQ_ENTRY(spdk_nvmf_rdma_poll_group) link;
+ /*
+ * buffers which are split across multiple RDMA
+ * memory regions cannot be used by this transport.
+ */
+ STAILQ_HEAD(, spdk_nvmf_transport_pg_cache_buf) retired_bufs;
+};
+
+struct spdk_nvmf_rdma_conn_sched {
+ struct spdk_nvmf_rdma_poll_group *next_admin_pg;
+ struct spdk_nvmf_rdma_poll_group *next_io_pg;
+};
+
+/* Assuming rdma_cm uses just one protection domain per ibv_context. */
+struct spdk_nvmf_rdma_device {
+ struct ibv_device_attr attr;
+ struct ibv_context *context;
+
+ struct spdk_mem_map *map;
+ struct ibv_pd *pd;
+
+ int num_srq;
+
+ TAILQ_ENTRY(spdk_nvmf_rdma_device) link;
+};
+
+struct spdk_nvmf_rdma_port {
+ const struct spdk_nvme_transport_id *trid;
+ struct rdma_cm_id *id;
+ struct spdk_nvmf_rdma_device *device;
+ TAILQ_ENTRY(spdk_nvmf_rdma_port) link;
+};
+
+struct spdk_nvmf_rdma_transport {
+ struct spdk_nvmf_transport transport;
+
+ struct spdk_nvmf_rdma_conn_sched conn_sched;
+
+ struct rdma_event_channel *event_channel;
+
+ struct spdk_mempool *data_wr_pool;
+
+ pthread_mutex_t lock;
+
+ /* fields used to poll RDMA/IB events */
+ nfds_t npoll_fds;
+ struct pollfd *poll_fds;
+
+ TAILQ_HEAD(, spdk_nvmf_rdma_device) devices;
+ TAILQ_HEAD(, spdk_nvmf_rdma_port) ports;
+ TAILQ_HEAD(, spdk_nvmf_rdma_poll_group) poll_groups;
+};
+
+static inline void
+nvmf_rdma_start_disconnect(struct spdk_nvmf_rdma_qpair *rqpair);
+
+static bool
+nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
+ struct spdk_nvmf_rdma_request *rdma_req);
+
+static inline int
+nvmf_rdma_check_ibv_state(enum ibv_qp_state state)
+{
+ switch (state) {
+ case IBV_QPS_RESET:
+ case IBV_QPS_INIT:
+ case IBV_QPS_RTR:
+ case IBV_QPS_RTS:
+ case IBV_QPS_SQD:
+ case IBV_QPS_SQE:
+ case IBV_QPS_ERR:
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+static inline enum spdk_nvme_media_error_status_code
+nvmf_rdma_dif_error_to_compl_status(uint8_t err_type) {
+ enum spdk_nvme_media_error_status_code result;
+ switch (err_type)
+ {
+ case SPDK_DIF_REFTAG_ERROR:
+ result = SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR;
+ break;
+ case SPDK_DIF_APPTAG_ERROR:
+ result = SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR;
+ break;
+ case SPDK_DIF_GUARD_ERROR:
+ result = SPDK_NVME_SC_GUARD_CHECK_ERROR;
+ break;
+ default:
+ SPDK_UNREACHABLE();
+ }
+
+ return result;
+}
+
+static enum ibv_qp_state
+nvmf_rdma_update_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair) {
+ enum ibv_qp_state old_state, new_state;
+ struct ibv_qp_attr qp_attr;
+ struct ibv_qp_init_attr init_attr;
+ int rc;
+
+ old_state = rqpair->ibv_state;
+ rc = ibv_query_qp(rqpair->rdma_qp->qp, &qp_attr,
+ g_spdk_nvmf_ibv_query_mask, &init_attr);
+
+ if (rc)
+ {
+ SPDK_ERRLOG("Failed to get updated RDMA queue pair state!\n");
+ return IBV_QPS_ERR + 1;
+ }
+
+ new_state = qp_attr.qp_state;
+ rqpair->ibv_state = new_state;
+ qp_attr.ah_attr.port_num = qp_attr.port_num;
+
+ rc = nvmf_rdma_check_ibv_state(new_state);
+ if (rc)
+ {
+ SPDK_ERRLOG("QP#%d: bad state updated: %u, maybe hardware issue\n", rqpair->qpair.qid, new_state);
+ /*
+ * IBV_QPS_UNKNOWN undefined if lib version smaller than libibverbs-1.1.8
+ * IBV_QPS_UNKNOWN is the enum element after IBV_QPS_ERR
+ */
+ return IBV_QPS_ERR + 1;
+ }
+
+ if (old_state != new_state)
+ {
+ spdk_trace_record(TRACE_RDMA_QP_STATE_CHANGE, 0, 0,
+ (uintptr_t)rqpair->cm_id, new_state);
+ }
+ return new_state;
+}
+
+static void
+nvmf_rdma_request_free_data(struct spdk_nvmf_rdma_request *rdma_req,
+ struct spdk_nvmf_rdma_transport *rtransport)
+{
+ struct spdk_nvmf_rdma_request_data *data_wr;
+ struct ibv_send_wr *next_send_wr;
+ uint64_t req_wrid;
+
+ rdma_req->num_outstanding_data_wr = 0;
+ data_wr = &rdma_req->data;
+ req_wrid = data_wr->wr.wr_id;
+ while (data_wr && data_wr->wr.wr_id == req_wrid) {
+ memset(data_wr->sgl, 0, sizeof(data_wr->wr.sg_list[0]) * data_wr->wr.num_sge);
+ data_wr->wr.num_sge = 0;
+ next_send_wr = data_wr->wr.next;
+ if (data_wr != &rdma_req->data) {
+ spdk_mempool_put(rtransport->data_wr_pool, data_wr);
+ }
+ data_wr = (!next_send_wr || next_send_wr == &rdma_req->rsp.wr) ? NULL :
+ SPDK_CONTAINEROF(next_send_wr, struct spdk_nvmf_rdma_request_data, wr);
+ }
+}
+
+static void
+nvmf_rdma_dump_request(struct spdk_nvmf_rdma_request *req)
+{
+ SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", req->req.data_from_pool);
+ if (req->req.cmd) {
+ SPDK_ERRLOG("\t\tRequest opcode: %d\n", req->req.cmd->nvmf_cmd.opcode);
+ }
+ if (req->recv) {
+ SPDK_ERRLOG("\t\tRequest recv wr_id%lu\n", req->recv->wr.wr_id);
+ }
+}
+
+static void
+nvmf_rdma_dump_qpair_contents(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ int i;
+
+ SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", rqpair->qpair.qid);
+ for (i = 0; i < rqpair->max_queue_depth; i++) {
+ if (rqpair->resources->reqs[i].state != RDMA_REQUEST_STATE_FREE) {
+ nvmf_rdma_dump_request(&rqpair->resources->reqs[i]);
+ }
+ }
+}
+
+static void
+nvmf_rdma_resources_destroy(struct spdk_nvmf_rdma_resources *resources)
+{
+ if (resources->cmds_mr) {
+ ibv_dereg_mr(resources->cmds_mr);
+ }
+
+ if (resources->cpls_mr) {
+ ibv_dereg_mr(resources->cpls_mr);
+ }
+
+ if (resources->bufs_mr) {
+ ibv_dereg_mr(resources->bufs_mr);
+ }
+
+ spdk_free(resources->cmds);
+ spdk_free(resources->cpls);
+ spdk_free(resources->bufs);
+ free(resources->reqs);
+ free(resources->recvs);
+ free(resources);
+}
+
+
+static struct spdk_nvmf_rdma_resources *
+nvmf_rdma_resources_create(struct spdk_nvmf_rdma_resource_opts *opts)
+{
+ struct spdk_nvmf_rdma_resources *resources;
+ struct spdk_nvmf_rdma_request *rdma_req;
+ struct spdk_nvmf_rdma_recv *rdma_recv;
+ struct ibv_qp *qp;
+ struct ibv_srq *srq;
+ uint32_t i;
+ int rc;
+
+ resources = calloc(1, sizeof(struct spdk_nvmf_rdma_resources));
+ if (!resources) {
+ SPDK_ERRLOG("Unable to allocate resources for receive queue.\n");
+ return NULL;
+ }
+
+ resources->reqs = calloc(opts->max_queue_depth, sizeof(*resources->reqs));
+ resources->recvs = calloc(opts->max_queue_depth, sizeof(*resources->recvs));
+ resources->cmds = spdk_zmalloc(opts->max_queue_depth * sizeof(*resources->cmds),
+ 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+ resources->cpls = spdk_zmalloc(opts->max_queue_depth * sizeof(*resources->cpls),
+ 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+
+ if (opts->in_capsule_data_size > 0) {
+ resources->bufs = spdk_zmalloc(opts->max_queue_depth * opts->in_capsule_data_size,
+ 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY,
+ SPDK_MALLOC_DMA);
+ }
+
+ if (!resources->reqs || !resources->recvs || !resources->cmds ||
+ !resources->cpls || (opts->in_capsule_data_size && !resources->bufs)) {
+ SPDK_ERRLOG("Unable to allocate sufficient memory for RDMA queue.\n");
+ goto cleanup;
+ }
+
+ resources->cmds_mr = ibv_reg_mr(opts->pd, resources->cmds,
+ opts->max_queue_depth * sizeof(*resources->cmds),
+ IBV_ACCESS_LOCAL_WRITE);
+ resources->cpls_mr = ibv_reg_mr(opts->pd, resources->cpls,
+ opts->max_queue_depth * sizeof(*resources->cpls),
+ 0);
+
+ if (opts->in_capsule_data_size) {
+ resources->bufs_mr = ibv_reg_mr(opts->pd, resources->bufs,
+ opts->max_queue_depth *
+ opts->in_capsule_data_size,
+ IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);
+ }
+
+ if (!resources->cmds_mr || !resources->cpls_mr ||
+ (opts->in_capsule_data_size &&
+ !resources->bufs_mr)) {
+ goto cleanup;
+ }
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Command Array: %p Length: %lx LKey: %x\n",
+ resources->cmds, opts->max_queue_depth * sizeof(*resources->cmds),
+ resources->cmds_mr->lkey);
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Completion Array: %p Length: %lx LKey: %x\n",
+ resources->cpls, opts->max_queue_depth * sizeof(*resources->cpls),
+ resources->cpls_mr->lkey);
+ if (resources->bufs && resources->bufs_mr) {
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "In Capsule Data Array: %p Length: %x LKey: %x\n",
+ resources->bufs, opts->max_queue_depth *
+ opts->in_capsule_data_size, resources->bufs_mr->lkey);
+ }
+
+ /* Initialize queues */
+ STAILQ_INIT(&resources->incoming_queue);
+ STAILQ_INIT(&resources->free_queue);
+
+ for (i = 0; i < opts->max_queue_depth; i++) {
+ struct ibv_recv_wr *bad_wr = NULL;
+
+ rdma_recv = &resources->recvs[i];
+ rdma_recv->qpair = opts->qpair;
+
+ /* Set up memory to receive commands */
+ if (resources->bufs) {
+ rdma_recv->buf = (void *)((uintptr_t)resources->bufs + (i *
+ opts->in_capsule_data_size));
+ }
+
+ rdma_recv->rdma_wr.type = RDMA_WR_TYPE_RECV;
+
+ rdma_recv->sgl[0].addr = (uintptr_t)&resources->cmds[i];
+ rdma_recv->sgl[0].length = sizeof(resources->cmds[i]);
+ rdma_recv->sgl[0].lkey = resources->cmds_mr->lkey;
+ rdma_recv->wr.num_sge = 1;
+
+ if (rdma_recv->buf && resources->bufs_mr) {
+ rdma_recv->sgl[1].addr = (uintptr_t)rdma_recv->buf;
+ rdma_recv->sgl[1].length = opts->in_capsule_data_size;
+ rdma_recv->sgl[1].lkey = resources->bufs_mr->lkey;
+ rdma_recv->wr.num_sge++;
+ }
+
+ rdma_recv->wr.wr_id = (uintptr_t)&rdma_recv->rdma_wr;
+ rdma_recv->wr.sg_list = rdma_recv->sgl;
+ if (opts->shared) {
+ srq = (struct ibv_srq *)opts->qp;
+ rc = ibv_post_srq_recv(srq, &rdma_recv->wr, &bad_wr);
+ } else {
+ qp = (struct ibv_qp *)opts->qp;
+ rc = ibv_post_recv(qp, &rdma_recv->wr, &bad_wr);
+ }
+ if (rc) {
+ goto cleanup;
+ }
+ }
+
+ for (i = 0; i < opts->max_queue_depth; i++) {
+ rdma_req = &resources->reqs[i];
+
+ if (opts->qpair != NULL) {
+ rdma_req->req.qpair = &opts->qpair->qpair;
+ } else {
+ rdma_req->req.qpair = NULL;
+ }
+ rdma_req->req.cmd = NULL;
+
+ /* Set up memory to send responses */
+ rdma_req->req.rsp = &resources->cpls[i];
+
+ rdma_req->rsp.sgl[0].addr = (uintptr_t)&resources->cpls[i];
+ rdma_req->rsp.sgl[0].length = sizeof(resources->cpls[i]);
+ rdma_req->rsp.sgl[0].lkey = resources->cpls_mr->lkey;
+
+ rdma_req->rsp.rdma_wr.type = RDMA_WR_TYPE_SEND;
+ rdma_req->rsp.wr.wr_id = (uintptr_t)&rdma_req->rsp.rdma_wr;
+ rdma_req->rsp.wr.next = NULL;
+ rdma_req->rsp.wr.opcode = IBV_WR_SEND;
+ rdma_req->rsp.wr.send_flags = IBV_SEND_SIGNALED;
+ rdma_req->rsp.wr.sg_list = rdma_req->rsp.sgl;
+ rdma_req->rsp.wr.num_sge = SPDK_COUNTOF(rdma_req->rsp.sgl);
+
+ /* Set up memory for data buffers */
+ rdma_req->data.rdma_wr.type = RDMA_WR_TYPE_DATA;
+ rdma_req->data.wr.wr_id = (uintptr_t)&rdma_req->data.rdma_wr;
+ rdma_req->data.wr.next = NULL;
+ rdma_req->data.wr.send_flags = IBV_SEND_SIGNALED;
+ rdma_req->data.wr.sg_list = rdma_req->data.sgl;
+ rdma_req->data.wr.num_sge = SPDK_COUNTOF(rdma_req->data.sgl);
+
+ /* Initialize request state to FREE */
+ rdma_req->state = RDMA_REQUEST_STATE_FREE;
+ STAILQ_INSERT_TAIL(&resources->free_queue, rdma_req, state_link);
+ }
+
+ return resources;
+
+cleanup:
+ nvmf_rdma_resources_destroy(resources);
+ return NULL;
+}
+
+static void
+nvmf_rdma_qpair_clean_ibv_events(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ struct spdk_nvmf_rdma_ibv_event_ctx *ctx, *tctx;
+ STAILQ_FOREACH_SAFE(ctx, &rqpair->ibv_events, link, tctx) {
+ ctx->rqpair = NULL;
+ /* Memory allocated for ctx is freed in nvmf_rdma_qpair_process_ibv_event */
+ STAILQ_REMOVE(&rqpair->ibv_events, ctx, spdk_nvmf_rdma_ibv_event_ctx, link);
+ }
+}
+
+static void
+nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ struct spdk_nvmf_rdma_recv *rdma_recv, *recv_tmp;
+ struct ibv_recv_wr *bad_recv_wr = NULL;
+ int rc;
+
+ spdk_trace_record(TRACE_RDMA_QP_DESTROY, 0, 0, (uintptr_t)rqpair->cm_id, 0);
+
+ spdk_poller_unregister(&rqpair->destruct_poller);
+
+ if (rqpair->qd != 0) {
+ struct spdk_nvmf_qpair *qpair = &rqpair->qpair;
+ struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(qpair->transport,
+ struct spdk_nvmf_rdma_transport, transport);
+ struct spdk_nvmf_rdma_request *req;
+ uint32_t i, max_req_count = 0;
+
+ SPDK_WARNLOG("Destroying qpair when queue depth is %d\n", rqpair->qd);
+
+ if (rqpair->srq == NULL) {
+ nvmf_rdma_dump_qpair_contents(rqpair);
+ max_req_count = rqpair->max_queue_depth;
+ } else if (rqpair->poller && rqpair->resources) {
+ max_req_count = rqpair->poller->max_srq_depth;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Release incomplete requests\n");
+ for (i = 0; i < max_req_count; i++) {
+ req = &rqpair->resources->reqs[i];
+ if (req->req.qpair == qpair && req->state != RDMA_REQUEST_STATE_FREE) {
+ /* nvmf_rdma_request_process checks qpair ibv and internal state
+ * and completes a request */
+ nvmf_rdma_request_process(rtransport, req);
+ }
+ }
+ assert(rqpair->qd == 0);
+ }
+
+ if (rqpair->poller) {
+ TAILQ_REMOVE(&rqpair->poller->qpairs, rqpair, link);
+
+ if (rqpair->srq != NULL && rqpair->resources != NULL) {
+ /* Drop all received but unprocessed commands for this queue and return them to SRQ */
+ STAILQ_FOREACH_SAFE(rdma_recv, &rqpair->resources->incoming_queue, link, recv_tmp) {
+ if (rqpair == rdma_recv->qpair) {
+ STAILQ_REMOVE(&rqpair->resources->incoming_queue, rdma_recv, spdk_nvmf_rdma_recv, link);
+ rc = ibv_post_srq_recv(rqpair->srq, &rdma_recv->wr, &bad_recv_wr);
+ if (rc) {
+ SPDK_ERRLOG("Unable to re-post rx descriptor\n");
+ }
+ }
+ }
+ }
+ }
+
+ if (rqpair->cm_id) {
+ if (rqpair->rdma_qp != NULL) {
+ spdk_rdma_qp_destroy(rqpair->rdma_qp);
+ rqpair->rdma_qp = NULL;
+ }
+ rdma_destroy_id(rqpair->cm_id);
+
+ if (rqpair->poller != NULL && rqpair->srq == NULL) {
+ rqpair->poller->required_num_wr -= MAX_WR_PER_QP(rqpair->max_queue_depth);
+ }
+ }
+
+ if (rqpair->srq == NULL && rqpair->resources != NULL) {
+ nvmf_rdma_resources_destroy(rqpair->resources);
+ }
+
+ nvmf_rdma_qpair_clean_ibv_events(rqpair);
+
+ if (rqpair->destruct_channel) {
+ spdk_put_io_channel(rqpair->destruct_channel);
+ rqpair->destruct_channel = NULL;
+ }
+
+ free(rqpair);
+}
+
+static int
+nvmf_rdma_resize_cq(struct spdk_nvmf_rdma_qpair *rqpair, struct spdk_nvmf_rdma_device *device)
+{
+ struct spdk_nvmf_rdma_poller *rpoller;
+ int rc, num_cqe, required_num_wr;
+
+ /* Enlarge CQ size dynamically */
+ rpoller = rqpair->poller;
+ required_num_wr = rpoller->required_num_wr + MAX_WR_PER_QP(rqpair->max_queue_depth);
+ num_cqe = rpoller->num_cqe;
+ if (num_cqe < required_num_wr) {
+ num_cqe = spdk_max(num_cqe * 2, required_num_wr);
+ num_cqe = spdk_min(num_cqe, device->attr.max_cqe);
+ }
+
+ if (rpoller->num_cqe != num_cqe) {
+ if (required_num_wr > device->attr.max_cqe) {
+ SPDK_ERRLOG("RDMA CQE requirement (%d) exceeds device max_cqe limitation (%d)\n",
+ required_num_wr, device->attr.max_cqe);
+ return -1;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Resize RDMA CQ from %d to %d\n", rpoller->num_cqe, num_cqe);
+ rc = ibv_resize_cq(rpoller->cq, num_cqe);
+ if (rc) {
+ SPDK_ERRLOG("RDMA CQ resize failed: errno %d: %s\n", errno, spdk_strerror(errno));
+ return -1;
+ }
+
+ rpoller->num_cqe = num_cqe;
+ }
+
+ rpoller->required_num_wr = required_num_wr;
+ return 0;
+}
+
+static int
+nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_transport *transport;
+ struct spdk_nvmf_rdma_resource_opts opts;
+ struct spdk_nvmf_rdma_device *device;
+ struct spdk_rdma_qp_init_attr qp_init_attr = {};
+
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+ device = rqpair->device;
+
+ qp_init_attr.qp_context = rqpair;
+ qp_init_attr.pd = device->pd;
+ qp_init_attr.send_cq = rqpair->poller->cq;
+ qp_init_attr.recv_cq = rqpair->poller->cq;
+
+ if (rqpair->srq) {
+ qp_init_attr.srq = rqpair->srq;
+ } else {
+ qp_init_attr.cap.max_recv_wr = rqpair->max_queue_depth;
+ }
+
+ /* SEND, READ, and WRITE operations */
+ qp_init_attr.cap.max_send_wr = (uint32_t)rqpair->max_queue_depth * 2;
+ qp_init_attr.cap.max_send_sge = spdk_min((uint32_t)device->attr.max_sge, NVMF_DEFAULT_TX_SGE);
+ qp_init_attr.cap.max_recv_sge = spdk_min((uint32_t)device->attr.max_sge, NVMF_DEFAULT_RX_SGE);
+
+ if (rqpair->srq == NULL && nvmf_rdma_resize_cq(rqpair, device) < 0) {
+ SPDK_ERRLOG("Failed to resize the completion queue. Cannot initialize qpair.\n");
+ goto error;
+ }
+
+ rqpair->rdma_qp = spdk_rdma_qp_create(rqpair->cm_id, &qp_init_attr);
+ if (!rqpair->rdma_qp) {
+ goto error;
+ }
+
+ rqpair->max_send_depth = spdk_min((uint32_t)(rqpair->max_queue_depth * 2),
+ qp_init_attr.cap.max_send_wr);
+ rqpair->max_send_sge = spdk_min(NVMF_DEFAULT_TX_SGE, qp_init_attr.cap.max_send_sge);
+ rqpair->max_recv_sge = spdk_min(NVMF_DEFAULT_RX_SGE, qp_init_attr.cap.max_recv_sge);
+ spdk_trace_record(TRACE_RDMA_QP_CREATE, 0, 0, (uintptr_t)rqpair->cm_id, 0);
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "New RDMA Connection: %p\n", qpair);
+
+ if (rqpair->poller->srq == NULL) {
+ rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
+ transport = &rtransport->transport;
+
+ opts.qp = rqpair->rdma_qp->qp;
+ opts.pd = rqpair->cm_id->pd;
+ opts.qpair = rqpair;
+ opts.shared = false;
+ opts.max_queue_depth = rqpair->max_queue_depth;
+ opts.in_capsule_data_size = transport->opts.in_capsule_data_size;
+
+ rqpair->resources = nvmf_rdma_resources_create(&opts);
+
+ if (!rqpair->resources) {
+ SPDK_ERRLOG("Unable to allocate resources for receive queue.\n");
+ rdma_destroy_qp(rqpair->cm_id);
+ goto error;
+ }
+ } else {
+ rqpair->resources = rqpair->poller->resources;
+ }
+
+ rqpair->current_recv_depth = 0;
+ STAILQ_INIT(&rqpair->pending_rdma_read_queue);
+ STAILQ_INIT(&rqpair->pending_rdma_write_queue);
+
+ return 0;
+
+error:
+ rdma_destroy_id(rqpair->cm_id);
+ rqpair->cm_id = NULL;
+ return -1;
+}
+
+/* Append the given recv wr structure to the resource structs outstanding recvs list. */
+/* This function accepts either a single wr or the first wr in a linked list. */
+static void
+nvmf_rdma_qpair_queue_recv_wrs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_recv_wr *first)
+{
+ struct ibv_recv_wr *last;
+
+ last = first;
+ while (last->next != NULL) {
+ last = last->next;
+ }
+
+ if (rqpair->resources->recvs_to_post.first == NULL) {
+ rqpair->resources->recvs_to_post.first = first;
+ rqpair->resources->recvs_to_post.last = last;
+ if (rqpair->srq == NULL) {
+ STAILQ_INSERT_TAIL(&rqpair->poller->qpairs_pending_recv, rqpair, recv_link);
+ }
+ } else {
+ rqpair->resources->recvs_to_post.last->next = first;
+ rqpair->resources->recvs_to_post.last = last;
+ }
+}
+
+static int
+request_transfer_in(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_rdma_request *rdma_req;
+ struct spdk_nvmf_qpair *qpair;
+ struct spdk_nvmf_rdma_qpair *rqpair;
+
+ qpair = req->qpair;
+ rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req);
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ assert(req->xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER);
+ assert(rdma_req != NULL);
+
+ if (spdk_rdma_qp_queue_send_wrs(rqpair->rdma_qp, &rdma_req->data.wr)) {
+ STAILQ_INSERT_TAIL(&rqpair->poller->qpairs_pending_send, rqpair, send_link);
+ }
+
+ rqpair->current_read_depth += rdma_req->num_outstanding_data_wr;
+ rqpair->current_send_depth += rdma_req->num_outstanding_data_wr;
+ return 0;
+}
+
+static int
+request_transfer_out(struct spdk_nvmf_request *req, int *data_posted)
+{
+ int num_outstanding_data_wr = 0;
+ struct spdk_nvmf_rdma_request *rdma_req;
+ struct spdk_nvmf_qpair *qpair;
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ struct spdk_nvme_cpl *rsp;
+ struct ibv_send_wr *first = NULL;
+
+ *data_posted = 0;
+ qpair = req->qpair;
+ rsp = &req->rsp->nvme_cpl;
+ rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req);
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ /* Advance our sq_head pointer */
+ if (qpair->sq_head == qpair->sq_head_max) {
+ qpair->sq_head = 0;
+ } else {
+ qpair->sq_head++;
+ }
+ rsp->sqhd = qpair->sq_head;
+
+ /* queue the capsule for the recv buffer */
+ assert(rdma_req->recv != NULL);
+
+ nvmf_rdma_qpair_queue_recv_wrs(rqpair, &rdma_req->recv->wr);
+
+ rdma_req->recv = NULL;
+ assert(rqpair->current_recv_depth > 0);
+ rqpair->current_recv_depth--;
+
+ /* Build the response which consists of optional
+ * RDMA WRITEs to transfer data, plus an RDMA SEND
+ * containing the response.
+ */
+ first = &rdma_req->rsp.wr;
+
+ if (rsp->status.sc != SPDK_NVME_SC_SUCCESS) {
+ /* On failure, data was not read from the controller. So clear the
+ * number of outstanding data WRs to zero.
+ */
+ rdma_req->num_outstanding_data_wr = 0;
+ } else if (req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+ first = &rdma_req->data.wr;
+ *data_posted = 1;
+ num_outstanding_data_wr = rdma_req->num_outstanding_data_wr;
+ }
+ if (spdk_rdma_qp_queue_send_wrs(rqpair->rdma_qp, first)) {
+ STAILQ_INSERT_TAIL(&rqpair->poller->qpairs_pending_send, rqpair, send_link);
+ }
+
+ /* +1 for the rsp wr */
+ rqpair->current_send_depth += num_outstanding_data_wr + 1;
+
+ return 0;
+}
+
+static int
+nvmf_rdma_event_accept(struct rdma_cm_id *id, struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ struct spdk_nvmf_rdma_accept_private_data accept_data;
+ struct rdma_conn_param ctrlr_event_data = {};
+ int rc;
+
+ accept_data.recfmt = 0;
+ accept_data.crqsize = rqpair->max_queue_depth;
+
+ ctrlr_event_data.private_data = &accept_data;
+ ctrlr_event_data.private_data_len = sizeof(accept_data);
+ if (id->ps == RDMA_PS_TCP) {
+ ctrlr_event_data.responder_resources = 0; /* We accept 0 reads from the host */
+ ctrlr_event_data.initiator_depth = rqpair->max_read_depth;
+ }
+
+ /* Configure infinite retries for the initiator side qpair.
+ * When using a shared receive queue on the target side,
+ * we need to pass this value to the initiator to prevent the
+ * initiator side NIC from completing SEND requests back to the
+ * initiator with status rnr_retry_count_exceeded. */
+ if (rqpair->srq != NULL) {
+ ctrlr_event_data.rnr_retry_count = 0x7;
+ }
+
+ /* When qpair is created without use of rdma cm API, an additional
+ * information must be provided to initiator in the connection response:
+ * whether qpair is using SRQ and its qp_num
+ * Fields below are ignored by rdma cm if qpair has been
+ * created using rdma cm API. */
+ ctrlr_event_data.srq = rqpair->srq ? 1 : 0;
+ ctrlr_event_data.qp_num = rqpair->rdma_qp->qp->qp_num;
+
+ rc = spdk_rdma_qp_accept(rqpair->rdma_qp, &ctrlr_event_data);
+ if (rc) {
+ SPDK_ERRLOG("Error %d on spdk_rdma_qp_accept\n", errno);
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Sent back the accept\n");
+ }
+
+ return rc;
+}
+
+static void
+nvmf_rdma_event_reject(struct rdma_cm_id *id, enum spdk_nvmf_rdma_transport_error error)
+{
+ struct spdk_nvmf_rdma_reject_private_data rej_data;
+
+ rej_data.recfmt = 0;
+ rej_data.sts = error;
+
+ rdma_reject(id, &rej_data, sizeof(rej_data));
+}
+
+static int
+nvmf_rdma_connect(struct spdk_nvmf_transport *transport, struct rdma_cm_event *event)
+{
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_qpair *rqpair = NULL;
+ struct spdk_nvmf_rdma_port *port;
+ struct rdma_conn_param *rdma_param = NULL;
+ const struct spdk_nvmf_rdma_request_private_data *private_data = NULL;
+ uint16_t max_queue_depth;
+ uint16_t max_read_depth;
+
+ rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+
+ assert(event->id != NULL); /* Impossible. Can't even reject the connection. */
+ assert(event->id->verbs != NULL); /* Impossible. No way to handle this. */
+
+ rdma_param = &event->param.conn;
+ if (rdma_param->private_data == NULL ||
+ rdma_param->private_data_len < sizeof(struct spdk_nvmf_rdma_request_private_data)) {
+ SPDK_ERRLOG("connect request: no private data provided\n");
+ nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_INVALID_PRIVATE_DATA_LENGTH);
+ return -1;
+ }
+
+ private_data = rdma_param->private_data;
+ if (private_data->recfmt != 0) {
+ SPDK_ERRLOG("Received RDMA private data with RECFMT != 0\n");
+ nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_INVALID_RECFMT);
+ return -1;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Connect Recv on fabric intf name %s, dev_name %s\n",
+ event->id->verbs->device->name, event->id->verbs->device->dev_name);
+
+ port = event->listen_id->context;
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Listen Id was %p with verbs %p. ListenAddr: %p\n",
+ event->listen_id, event->listen_id->verbs, port);
+
+ /* Figure out the supported queue depth. This is a multi-step process
+ * that takes into account hardware maximums, host provided values,
+ * and our target's internal memory limits */
+
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Calculating Queue Depth\n");
+
+ /* Start with the maximum queue depth allowed by the target */
+ max_queue_depth = rtransport->transport.opts.max_queue_depth;
+ max_read_depth = rtransport->transport.opts.max_queue_depth;
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Target Max Queue Depth: %d\n",
+ rtransport->transport.opts.max_queue_depth);
+
+ /* Next check the local NIC's hardware limitations */
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA,
+ "Local NIC Max Send/Recv Queue Depth: %d Max Read/Write Queue Depth: %d\n",
+ port->device->attr.max_qp_wr, port->device->attr.max_qp_rd_atom);
+ max_queue_depth = spdk_min(max_queue_depth, port->device->attr.max_qp_wr);
+ max_read_depth = spdk_min(max_read_depth, port->device->attr.max_qp_init_rd_atom);
+
+ /* Next check the remote NIC's hardware limitations */
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA,
+ "Host (Initiator) NIC Max Incoming RDMA R/W operations: %d Max Outgoing RDMA R/W operations: %d\n",
+ rdma_param->initiator_depth, rdma_param->responder_resources);
+ if (rdma_param->initiator_depth > 0) {
+ max_read_depth = spdk_min(max_read_depth, rdma_param->initiator_depth);
+ }
+
+ /* Finally check for the host software requested values, which are
+ * optional. */
+ if (rdma_param->private_data != NULL &&
+ rdma_param->private_data_len >= sizeof(struct spdk_nvmf_rdma_request_private_data)) {
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Host Receive Queue Size: %d\n", private_data->hrqsize);
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Host Send Queue Size: %d\n", private_data->hsqsize);
+ max_queue_depth = spdk_min(max_queue_depth, private_data->hrqsize);
+ max_queue_depth = spdk_min(max_queue_depth, private_data->hsqsize + 1);
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Final Negotiated Queue Depth: %d R/W Depth: %d\n",
+ max_queue_depth, max_read_depth);
+
+ rqpair = calloc(1, sizeof(struct spdk_nvmf_rdma_qpair));
+ if (rqpair == NULL) {
+ SPDK_ERRLOG("Could not allocate new connection.\n");
+ nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_NO_RESOURCES);
+ return -1;
+ }
+
+ rqpair->device = port->device;
+ rqpair->max_queue_depth = max_queue_depth;
+ rqpair->max_read_depth = max_read_depth;
+ rqpair->cm_id = event->id;
+ rqpair->listen_id = event->listen_id;
+ rqpair->qpair.transport = transport;
+ STAILQ_INIT(&rqpair->ibv_events);
+ /* use qid from the private data to determine the qpair type
+ qid will be set to the appropriate value when the controller is created */
+ rqpair->qpair.qid = private_data->qid;
+
+ event->id->context = &rqpair->qpair;
+
+ spdk_nvmf_tgt_new_qpair(transport->tgt, &rqpair->qpair);
+
+ return 0;
+}
+
+static int
+nvmf_rdma_mem_notify(void *cb_ctx, struct spdk_mem_map *map,
+ enum spdk_mem_map_notify_action action,
+ void *vaddr, size_t size)
+{
+ struct ibv_pd *pd = cb_ctx;
+ struct ibv_mr *mr;
+ int rc;
+
+ switch (action) {
+ case SPDK_MEM_MAP_NOTIFY_REGISTER:
+ if (!g_nvmf_hooks.get_rkey) {
+ mr = ibv_reg_mr(pd, vaddr, size,
+ IBV_ACCESS_LOCAL_WRITE |
+ IBV_ACCESS_REMOTE_READ |
+ IBV_ACCESS_REMOTE_WRITE);
+ if (mr == NULL) {
+ SPDK_ERRLOG("ibv_reg_mr() failed\n");
+ return -1;
+ } else {
+ rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
+ }
+ } else {
+ rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size,
+ g_nvmf_hooks.get_rkey(pd, vaddr, size));
+ }
+ break;
+ case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
+ if (!g_nvmf_hooks.get_rkey) {
+ mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL);
+ if (mr) {
+ ibv_dereg_mr(mr);
+ }
+ }
+ rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
+ break;
+ default:
+ SPDK_UNREACHABLE();
+ }
+
+ return rc;
+}
+
+static int
+nvmf_rdma_check_contiguous_entries(uint64_t addr_1, uint64_t addr_2)
+{
+ /* Two contiguous mappings will point to the same address which is the start of the RDMA MR. */
+ return addr_1 == addr_2;
+}
+
+static inline void
+nvmf_rdma_setup_wr(struct ibv_send_wr *wr, struct ibv_send_wr *next,
+ enum spdk_nvme_data_transfer xfer)
+{
+ if (xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+ wr->opcode = IBV_WR_RDMA_WRITE;
+ wr->send_flags = 0;
+ wr->next = next;
+ } else if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
+ wr->opcode = IBV_WR_RDMA_READ;
+ wr->send_flags = IBV_SEND_SIGNALED;
+ wr->next = NULL;
+ } else {
+ assert(0);
+ }
+}
+
+static int
+nvmf_request_alloc_wrs(struct spdk_nvmf_rdma_transport *rtransport,
+ struct spdk_nvmf_rdma_request *rdma_req,
+ uint32_t num_sgl_descriptors)
+{
+ struct spdk_nvmf_rdma_request_data *work_requests[SPDK_NVMF_MAX_SGL_ENTRIES];
+ struct spdk_nvmf_rdma_request_data *current_data_wr;
+ uint32_t i;
+
+ if (num_sgl_descriptors > SPDK_NVMF_MAX_SGL_ENTRIES) {
+ SPDK_ERRLOG("Requested too much entries (%u), the limit is %u\n",
+ num_sgl_descriptors, SPDK_NVMF_MAX_SGL_ENTRIES);
+ return -EINVAL;
+ }
+
+ if (spdk_mempool_get_bulk(rtransport->data_wr_pool, (void **)work_requests, num_sgl_descriptors)) {
+ return -ENOMEM;
+ }
+
+ current_data_wr = &rdma_req->data;
+
+ for (i = 0; i < num_sgl_descriptors; i++) {
+ nvmf_rdma_setup_wr(&current_data_wr->wr, &work_requests[i]->wr, rdma_req->req.xfer);
+ current_data_wr->wr.next = &work_requests[i]->wr;
+ current_data_wr = work_requests[i];
+ current_data_wr->wr.sg_list = current_data_wr->sgl;
+ current_data_wr->wr.wr_id = rdma_req->data.wr.wr_id;
+ }
+
+ nvmf_rdma_setup_wr(&current_data_wr->wr, &rdma_req->rsp.wr, rdma_req->req.xfer);
+
+ return 0;
+}
+
+static inline void
+nvmf_rdma_setup_request(struct spdk_nvmf_rdma_request *rdma_req)
+{
+ struct ibv_send_wr *wr = &rdma_req->data.wr;
+ struct spdk_nvme_sgl_descriptor *sgl = &rdma_req->req.cmd->nvme_cmd.dptr.sgl1;
+
+ wr->wr.rdma.rkey = sgl->keyed.key;
+ wr->wr.rdma.remote_addr = sgl->address;
+ nvmf_rdma_setup_wr(wr, &rdma_req->rsp.wr, rdma_req->req.xfer);
+}
+
+static inline void
+nvmf_rdma_update_remote_addr(struct spdk_nvmf_rdma_request *rdma_req, uint32_t num_wrs)
+{
+ struct ibv_send_wr *wr = &rdma_req->data.wr;
+ struct spdk_nvme_sgl_descriptor *sgl = &rdma_req->req.cmd->nvme_cmd.dptr.sgl1;
+ uint32_t i;
+ int j;
+ uint64_t remote_addr_offset = 0;
+
+ for (i = 0; i < num_wrs; ++i) {
+ wr->wr.rdma.rkey = sgl->keyed.key;
+ wr->wr.rdma.remote_addr = sgl->address + remote_addr_offset;
+ for (j = 0; j < wr->num_sge; ++j) {
+ remote_addr_offset += wr->sg_list[j].length;
+ }
+ wr = wr->next;
+ }
+}
+
+/* This function is used in the rare case that we have a buffer split over multiple memory regions. */
+static int
+nvmf_rdma_replace_buffer(struct spdk_nvmf_rdma_poll_group *rgroup, void **buf)
+{
+ struct spdk_nvmf_transport_poll_group *group = &rgroup->group;
+ struct spdk_nvmf_transport *transport = group->transport;
+ struct spdk_nvmf_transport_pg_cache_buf *old_buf;
+ void *new_buf;
+
+ if (!(STAILQ_EMPTY(&group->buf_cache))) {
+ group->buf_cache_count--;
+ new_buf = STAILQ_FIRST(&group->buf_cache);
+ STAILQ_REMOVE_HEAD(&group->buf_cache, link);
+ assert(*buf != NULL);
+ } else {
+ new_buf = spdk_mempool_get(transport->data_buf_pool);
+ }
+
+ if (*buf == NULL) {
+ return -ENOMEM;
+ }
+
+ old_buf = *buf;
+ STAILQ_INSERT_HEAD(&rgroup->retired_bufs, old_buf, link);
+ *buf = new_buf;
+ return 0;
+}
+
+static bool
+nvmf_rdma_get_lkey(struct spdk_nvmf_rdma_device *device, struct iovec *iov,
+ uint32_t *_lkey)
+{
+ uint64_t translation_len;
+ uint32_t lkey;
+
+ translation_len = iov->iov_len;
+
+ if (!g_nvmf_hooks.get_rkey) {
+ lkey = ((struct ibv_mr *)spdk_mem_map_translate(device->map,
+ (uint64_t)iov->iov_base, &translation_len))->lkey;
+ } else {
+ lkey = spdk_mem_map_translate(device->map,
+ (uint64_t)iov->iov_base, &translation_len);
+ }
+
+ if (spdk_unlikely(translation_len < iov->iov_len)) {
+ return false;
+ }
+
+ *_lkey = lkey;
+ return true;
+}
+
+static bool
+nvmf_rdma_fill_wr_sge(struct spdk_nvmf_rdma_device *device,
+ struct iovec *iov, struct ibv_send_wr **_wr,
+ uint32_t *_remaining_data_block, uint32_t *_offset,
+ uint32_t *_num_extra_wrs,
+ const struct spdk_dif_ctx *dif_ctx)
+{
+ struct ibv_send_wr *wr = *_wr;
+ struct ibv_sge *sg_ele = &wr->sg_list[wr->num_sge];
+ uint32_t lkey = 0;
+ uint32_t remaining, data_block_size, md_size, sge_len;
+
+ if (spdk_unlikely(!nvmf_rdma_get_lkey(device, iov, &lkey))) {
+ /* This is a very rare case that can occur when using DPDK version < 19.05 */
+ SPDK_ERRLOG("Data buffer split over multiple RDMA Memory Regions. Removing it from circulation.\n");
+ return false;
+ }
+
+ if (spdk_likely(!dif_ctx)) {
+ sg_ele->lkey = lkey;
+ sg_ele->addr = (uintptr_t)(iov->iov_base);
+ sg_ele->length = iov->iov_len;
+ wr->num_sge++;
+ } else {
+ remaining = iov->iov_len - *_offset;
+ data_block_size = dif_ctx->block_size - dif_ctx->md_size;
+ md_size = dif_ctx->md_size;
+
+ while (remaining) {
+ if (wr->num_sge >= SPDK_NVMF_MAX_SGL_ENTRIES) {
+ if (*_num_extra_wrs > 0 && wr->next) {
+ *_wr = wr->next;
+ wr = *_wr;
+ wr->num_sge = 0;
+ sg_ele = &wr->sg_list[wr->num_sge];
+ (*_num_extra_wrs)--;
+ } else {
+ break;
+ }
+ }
+ sg_ele->lkey = lkey;
+ sg_ele->addr = (uintptr_t)((char *)iov->iov_base + *_offset);
+ sge_len = spdk_min(remaining, *_remaining_data_block);
+ sg_ele->length = sge_len;
+ remaining -= sge_len;
+ *_remaining_data_block -= sge_len;
+ *_offset += sge_len;
+
+ sg_ele++;
+ wr->num_sge++;
+
+ if (*_remaining_data_block == 0) {
+ /* skip metadata */
+ *_offset += md_size;
+ /* Metadata that do not fit this IO buffer will be included in the next IO buffer */
+ remaining -= spdk_min(remaining, md_size);
+ *_remaining_data_block = data_block_size;
+ }
+
+ if (remaining == 0) {
+ /* By subtracting the size of the last IOV from the offset, we ensure that we skip
+ the remaining metadata bits at the beginning of the next buffer */
+ *_offset -= iov->iov_len;
+ }
+ }
+ }
+
+ return true;
+}
+
+static int
+nvmf_rdma_fill_wr_sgl(struct spdk_nvmf_rdma_poll_group *rgroup,
+ struct spdk_nvmf_rdma_device *device,
+ struct spdk_nvmf_rdma_request *rdma_req,
+ struct ibv_send_wr *wr,
+ uint32_t length,
+ uint32_t num_extra_wrs)
+{
+ struct spdk_nvmf_request *req = &rdma_req->req;
+ struct spdk_dif_ctx *dif_ctx = NULL;
+ uint32_t remaining_data_block = 0;
+ uint32_t offset = 0;
+
+ if (spdk_unlikely(rdma_req->req.dif.dif_insert_or_strip)) {
+ dif_ctx = &rdma_req->req.dif.dif_ctx;
+ remaining_data_block = dif_ctx->block_size - dif_ctx->md_size;
+ }
+
+ wr->num_sge = 0;
+
+ while (length && (num_extra_wrs || wr->num_sge < SPDK_NVMF_MAX_SGL_ENTRIES)) {
+ while (spdk_unlikely(!nvmf_rdma_fill_wr_sge(device, &req->iov[rdma_req->iovpos], &wr,
+ &remaining_data_block, &offset, &num_extra_wrs, dif_ctx))) {
+ if (nvmf_rdma_replace_buffer(rgroup, &req->buffers[rdma_req->iovpos]) == -ENOMEM) {
+ return -ENOMEM;
+ }
+ req->iov[rdma_req->iovpos].iov_base = (void *)((uintptr_t)(req->buffers[rdma_req->iovpos] +
+ NVMF_DATA_BUFFER_MASK) &
+ ~NVMF_DATA_BUFFER_MASK);
+ }
+
+ length -= req->iov[rdma_req->iovpos].iov_len;
+ rdma_req->iovpos++;
+ }
+
+ if (length) {
+ SPDK_ERRLOG("Not enough SG entries to hold data buffer\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static inline uint32_t
+nvmf_rdma_calc_num_wrs(uint32_t length, uint32_t io_unit_size, uint32_t block_size)
+{
+ /* estimate the number of SG entries and WRs needed to process the request */
+ uint32_t num_sge = 0;
+ uint32_t i;
+ uint32_t num_buffers = SPDK_CEIL_DIV(length, io_unit_size);
+
+ for (i = 0; i < num_buffers && length > 0; i++) {
+ uint32_t buffer_len = spdk_min(length, io_unit_size);
+ uint32_t num_sge_in_block = SPDK_CEIL_DIV(buffer_len, block_size);
+
+ if (num_sge_in_block * block_size > buffer_len) {
+ ++num_sge_in_block;
+ }
+ num_sge += num_sge_in_block;
+ length -= buffer_len;
+ }
+ return SPDK_CEIL_DIV(num_sge, SPDK_NVMF_MAX_SGL_ENTRIES);
+}
+
+static int
+nvmf_rdma_request_fill_iovs(struct spdk_nvmf_rdma_transport *rtransport,
+ struct spdk_nvmf_rdma_device *device,
+ struct spdk_nvmf_rdma_request *rdma_req,
+ uint32_t length)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ struct spdk_nvmf_rdma_poll_group *rgroup;
+ struct spdk_nvmf_request *req = &rdma_req->req;
+ struct ibv_send_wr *wr = &rdma_req->data.wr;
+ int rc;
+ uint32_t num_wrs = 1;
+
+ rqpair = SPDK_CONTAINEROF(req->qpair, struct spdk_nvmf_rdma_qpair, qpair);
+ rgroup = rqpair->poller->group;
+
+ /* rdma wr specifics */
+ nvmf_rdma_setup_request(rdma_req);
+
+ rc = spdk_nvmf_request_get_buffers(req, &rgroup->group, &rtransport->transport,
+ length);
+ if (rc != 0) {
+ return rc;
+ }
+
+ assert(req->iovcnt <= rqpair->max_send_sge);
+
+ rdma_req->iovpos = 0;
+
+ if (spdk_unlikely(req->dif.dif_insert_or_strip)) {
+ num_wrs = nvmf_rdma_calc_num_wrs(length, rtransport->transport.opts.io_unit_size,
+ req->dif.dif_ctx.block_size);
+ if (num_wrs > 1) {
+ rc = nvmf_request_alloc_wrs(rtransport, rdma_req, num_wrs - 1);
+ if (rc != 0) {
+ goto err_exit;
+ }
+ }
+ }
+
+ rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, wr, length, num_wrs - 1);
+ if (spdk_unlikely(rc != 0)) {
+ goto err_exit;
+ }
+
+ if (spdk_unlikely(num_wrs > 1)) {
+ nvmf_rdma_update_remote_addr(rdma_req, num_wrs);
+ }
+
+ /* set the number of outstanding data WRs for this request. */
+ rdma_req->num_outstanding_data_wr = num_wrs;
+
+ return rc;
+
+err_exit:
+ spdk_nvmf_request_free_buffers(req, &rgroup->group, &rtransport->transport);
+ nvmf_rdma_request_free_data(rdma_req, rtransport);
+ req->iovcnt = 0;
+ return rc;
+}
+
+static int
+nvmf_rdma_request_fill_iovs_multi_sgl(struct spdk_nvmf_rdma_transport *rtransport,
+ struct spdk_nvmf_rdma_device *device,
+ struct spdk_nvmf_rdma_request *rdma_req)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ struct spdk_nvmf_rdma_poll_group *rgroup;
+ struct ibv_send_wr *current_wr;
+ struct spdk_nvmf_request *req = &rdma_req->req;
+ struct spdk_nvme_sgl_descriptor *inline_segment, *desc;
+ uint32_t num_sgl_descriptors;
+ uint32_t lengths[SPDK_NVMF_MAX_SGL_ENTRIES];
+ uint32_t i;
+ int rc;
+
+ rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+ rgroup = rqpair->poller->group;
+
+ inline_segment = &req->cmd->nvme_cmd.dptr.sgl1;
+ assert(inline_segment->generic.type == SPDK_NVME_SGL_TYPE_LAST_SEGMENT);
+ assert(inline_segment->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET);
+
+ num_sgl_descriptors = inline_segment->unkeyed.length / sizeof(struct spdk_nvme_sgl_descriptor);
+ assert(num_sgl_descriptors <= SPDK_NVMF_MAX_SGL_ENTRIES);
+
+ if (nvmf_request_alloc_wrs(rtransport, rdma_req, num_sgl_descriptors - 1) != 0) {
+ return -ENOMEM;
+ }
+
+ desc = (struct spdk_nvme_sgl_descriptor *)rdma_req->recv->buf + inline_segment->address;
+ for (i = 0; i < num_sgl_descriptors; i++) {
+ if (spdk_likely(!req->dif.dif_insert_or_strip)) {
+ lengths[i] = desc->keyed.length;
+ } else {
+ req->dif.orig_length += desc->keyed.length;
+ lengths[i] = spdk_dif_get_length_with_md(desc->keyed.length, &req->dif.dif_ctx);
+ req->dif.elba_length += lengths[i];
+ }
+ desc++;
+ }
+
+ rc = spdk_nvmf_request_get_buffers_multi(req, &rgroup->group, &rtransport->transport,
+ lengths, num_sgl_descriptors);
+ if (rc != 0) {
+ nvmf_rdma_request_free_data(rdma_req, rtransport);
+ return rc;
+ }
+
+ /* The first WR must always be the embedded data WR. This is how we unwind them later. */
+ current_wr = &rdma_req->data.wr;
+ assert(current_wr != NULL);
+
+ req->length = 0;
+ rdma_req->iovpos = 0;
+ desc = (struct spdk_nvme_sgl_descriptor *)rdma_req->recv->buf + inline_segment->address;
+ for (i = 0; i < num_sgl_descriptors; i++) {
+ /* The descriptors must be keyed data block descriptors with an address, not an offset. */
+ if (spdk_unlikely(desc->generic.type != SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK ||
+ desc->keyed.subtype != SPDK_NVME_SGL_SUBTYPE_ADDRESS)) {
+ rc = -EINVAL;
+ goto err_exit;
+ }
+
+ current_wr->num_sge = 0;
+
+ rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, current_wr, lengths[i], 0);
+ if (rc != 0) {
+ rc = -ENOMEM;
+ goto err_exit;
+ }
+
+ req->length += desc->keyed.length;
+ current_wr->wr.rdma.rkey = desc->keyed.key;
+ current_wr->wr.rdma.remote_addr = desc->address;
+ current_wr = current_wr->next;
+ desc++;
+ }
+
+#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
+ /* Go back to the last descriptor in the list. */
+ desc--;
+ if ((device->attr.device_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) != 0) {
+ if (desc->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY) {
+ rdma_req->rsp.wr.opcode = IBV_WR_SEND_WITH_INV;
+ rdma_req->rsp.wr.imm_data = desc->keyed.key;
+ }
+ }
+#endif
+
+ rdma_req->num_outstanding_data_wr = num_sgl_descriptors;
+
+ return 0;
+
+err_exit:
+ spdk_nvmf_request_free_buffers(req, &rgroup->group, &rtransport->transport);
+ nvmf_rdma_request_free_data(rdma_req, rtransport);
+ return rc;
+}
+
+static int
+nvmf_rdma_request_parse_sgl(struct spdk_nvmf_rdma_transport *rtransport,
+ struct spdk_nvmf_rdma_device *device,
+ struct spdk_nvmf_rdma_request *rdma_req)
+{
+ struct spdk_nvmf_request *req = &rdma_req->req;
+ struct spdk_nvme_cpl *rsp;
+ struct spdk_nvme_sgl_descriptor *sgl;
+ int rc;
+ uint32_t length;
+
+ rsp = &req->rsp->nvme_cpl;
+ sgl = &req->cmd->nvme_cmd.dptr.sgl1;
+
+ if (sgl->generic.type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK &&
+ (sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_ADDRESS ||
+ sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY)) {
+
+ length = sgl->keyed.length;
+ if (length > rtransport->transport.opts.max_io_size) {
+ SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n",
+ length, rtransport->transport.opts.max_io_size);
+ rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+ return -1;
+ }
+#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
+ if ((device->attr.device_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) != 0) {
+ if (sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY) {
+ rdma_req->rsp.wr.opcode = IBV_WR_SEND_WITH_INV;
+ rdma_req->rsp.wr.imm_data = sgl->keyed.key;
+ }
+ }
+#endif
+
+ /* fill request length and populate iovs */
+ req->length = length;
+
+ if (spdk_unlikely(req->dif.dif_insert_or_strip)) {
+ req->dif.orig_length = length;
+ length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx);
+ req->dif.elba_length = length;
+ }
+
+ rc = nvmf_rdma_request_fill_iovs(rtransport, device, rdma_req, length);
+ if (spdk_unlikely(rc < 0)) {
+ if (rc == -EINVAL) {
+ SPDK_ERRLOG("SGL length exceeds the max I/O size\n");
+ rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+ return -1;
+ }
+ /* No available buffers. Queue this request up. */
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "No available large data buffers. Queueing request %p\n", rdma_req);
+ return 0;
+ }
+
+ /* backward compatible */
+ req->data = req->iov[0].iov_base;
+
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Request %p took %d buffer/s from central pool\n", rdma_req,
+ req->iovcnt);
+
+ return 0;
+ } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
+ sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
+ uint64_t offset = sgl->address;
+ uint32_t max_len = rtransport->transport.opts.in_capsule_data_size;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n",
+ offset, sgl->unkeyed.length);
+
+ if (offset > max_len) {
+ SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n",
+ offset, max_len);
+ rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET;
+ return -1;
+ }
+ max_len -= (uint32_t)offset;
+
+ if (sgl->unkeyed.length > max_len) {
+ SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n",
+ sgl->unkeyed.length, max_len);
+ rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+ return -1;
+ }
+
+ rdma_req->num_outstanding_data_wr = 0;
+ req->data = rdma_req->recv->buf + offset;
+ req->data_from_pool = false;
+ req->length = sgl->unkeyed.length;
+
+ req->iov[0].iov_base = req->data;
+ req->iov[0].iov_len = req->length;
+ req->iovcnt = 1;
+
+ return 0;
+ } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_LAST_SEGMENT &&
+ sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
+
+ rc = nvmf_rdma_request_fill_iovs_multi_sgl(rtransport, device, rdma_req);
+ if (rc == -ENOMEM) {
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "No available large data buffers. Queueing request %p\n", rdma_req);
+ return 0;
+ } else if (rc == -EINVAL) {
+ SPDK_ERRLOG("Multi SGL element request length exceeds the max I/O size\n");
+ rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+ return -1;
+ }
+
+ /* backward compatible */
+ req->data = req->iov[0].iov_base;
+
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Request %p took %d buffer/s from central pool\n", rdma_req,
+ req->iovcnt);
+
+ return 0;
+ }
+
+ SPDK_ERRLOG("Invalid NVMf I/O Command SGL: Type 0x%x, Subtype 0x%x\n",
+ sgl->generic.type, sgl->generic.subtype);
+ rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID;
+ return -1;
+}
+
+static void
+_nvmf_rdma_request_free(struct spdk_nvmf_rdma_request *rdma_req,
+ struct spdk_nvmf_rdma_transport *rtransport)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ struct spdk_nvmf_rdma_poll_group *rgroup;
+
+ rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+ if (rdma_req->req.data_from_pool) {
+ rgroup = rqpair->poller->group;
+
+ spdk_nvmf_request_free_buffers(&rdma_req->req, &rgroup->group, &rtransport->transport);
+ }
+ nvmf_rdma_request_free_data(rdma_req, rtransport);
+ rdma_req->req.length = 0;
+ rdma_req->req.iovcnt = 0;
+ rdma_req->req.data = NULL;
+ rdma_req->rsp.wr.next = NULL;
+ rdma_req->data.wr.next = NULL;
+ memset(&rdma_req->req.dif, 0, sizeof(rdma_req->req.dif));
+ rqpair->qd--;
+
+ STAILQ_INSERT_HEAD(&rqpair->resources->free_queue, rdma_req, state_link);
+ rdma_req->state = RDMA_REQUEST_STATE_FREE;
+}
+
+bool
+nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
+ struct spdk_nvmf_rdma_request *rdma_req)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ struct spdk_nvmf_rdma_device *device;
+ struct spdk_nvmf_rdma_poll_group *rgroup;
+ struct spdk_nvme_cpl *rsp = &rdma_req->req.rsp->nvme_cpl;
+ int rc;
+ struct spdk_nvmf_rdma_recv *rdma_recv;
+ enum spdk_nvmf_rdma_request_state prev_state;
+ bool progress = false;
+ int data_posted;
+ uint32_t num_blocks;
+
+ rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+ device = rqpair->device;
+ rgroup = rqpair->poller->group;
+
+ assert(rdma_req->state != RDMA_REQUEST_STATE_FREE);
+
+ /* If the queue pair is in an error state, force the request to the completed state
+ * to release resources. */
+ if (rqpair->ibv_state == IBV_QPS_ERR || rqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) {
+ if (rdma_req->state == RDMA_REQUEST_STATE_NEED_BUFFER) {
+ STAILQ_REMOVE(&rgroup->group.pending_buf_queue, &rdma_req->req, spdk_nvmf_request, buf_link);
+ } else if (rdma_req->state == RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING) {
+ STAILQ_REMOVE(&rqpair->pending_rdma_read_queue, rdma_req, spdk_nvmf_rdma_request, state_link);
+ } else if (rdma_req->state == RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING) {
+ STAILQ_REMOVE(&rqpair->pending_rdma_write_queue, rdma_req, spdk_nvmf_rdma_request, state_link);
+ }
+ rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
+ }
+
+ /* The loop here is to allow for several back-to-back state changes. */
+ do {
+ prev_state = rdma_req->state;
+
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Request %p entering state %d\n", rdma_req, prev_state);
+
+ switch (rdma_req->state) {
+ case RDMA_REQUEST_STATE_FREE:
+ /* Some external code must kick a request into RDMA_REQUEST_STATE_NEW
+ * to escape this state. */
+ break;
+ case RDMA_REQUEST_STATE_NEW:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_NEW, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+ rdma_recv = rdma_req->recv;
+
+ /* The first element of the SGL is the NVMe command */
+ rdma_req->req.cmd = (union nvmf_h2c_msg *)rdma_recv->sgl[0].addr;
+ memset(rdma_req->req.rsp, 0, sizeof(*rdma_req->req.rsp));
+
+ if (rqpair->ibv_state == IBV_QPS_ERR || rqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) {
+ rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
+ break;
+ }
+
+ if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&rdma_req->req, &rdma_req->req.dif.dif_ctx))) {
+ rdma_req->req.dif.dif_insert_or_strip = true;
+ }
+
+#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
+ rdma_req->rsp.wr.opcode = IBV_WR_SEND;
+ rdma_req->rsp.wr.imm_data = 0;
+#endif
+
+ /* The next state transition depends on the data transfer needs of this request. */
+ rdma_req->req.xfer = spdk_nvmf_req_get_xfer(&rdma_req->req);
+
+ /* If no data to transfer, ready to execute. */
+ if (rdma_req->req.xfer == SPDK_NVME_DATA_NONE) {
+ rdma_req->state = RDMA_REQUEST_STATE_READY_TO_EXECUTE;
+ break;
+ }
+
+ rdma_req->state = RDMA_REQUEST_STATE_NEED_BUFFER;
+ STAILQ_INSERT_TAIL(&rgroup->group.pending_buf_queue, &rdma_req->req, buf_link);
+ break;
+ case RDMA_REQUEST_STATE_NEED_BUFFER:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_NEED_BUFFER, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+
+ assert(rdma_req->req.xfer != SPDK_NVME_DATA_NONE);
+
+ if (&rdma_req->req != STAILQ_FIRST(&rgroup->group.pending_buf_queue)) {
+ /* This request needs to wait in line to obtain a buffer */
+ break;
+ }
+
+ /* Try to get a data buffer */
+ rc = nvmf_rdma_request_parse_sgl(rtransport, device, rdma_req);
+ if (rc < 0) {
+ STAILQ_REMOVE_HEAD(&rgroup->group.pending_buf_queue, buf_link);
+ rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE;
+ break;
+ }
+
+ if (!rdma_req->req.data) {
+ /* No buffers available. */
+ rgroup->stat.pending_data_buffer++;
+ break;
+ }
+
+ STAILQ_REMOVE_HEAD(&rgroup->group.pending_buf_queue, buf_link);
+
+ /* If data is transferring from host to controller and the data didn't
+ * arrive using in capsule data, we need to do a transfer from the host.
+ */
+ if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER &&
+ rdma_req->req.data_from_pool) {
+ STAILQ_INSERT_TAIL(&rqpair->pending_rdma_read_queue, rdma_req, state_link);
+ rdma_req->state = RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING;
+ break;
+ }
+
+ rdma_req->state = RDMA_REQUEST_STATE_READY_TO_EXECUTE;
+ break;
+ case RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+
+ if (rdma_req != STAILQ_FIRST(&rqpair->pending_rdma_read_queue)) {
+ /* This request needs to wait in line to perform RDMA */
+ break;
+ }
+ if (rqpair->current_send_depth + rdma_req->num_outstanding_data_wr > rqpair->max_send_depth
+ || rqpair->current_read_depth + rdma_req->num_outstanding_data_wr > rqpair->max_read_depth) {
+ /* We can only have so many WRs outstanding. we have to wait until some finish. */
+ rqpair->poller->stat.pending_rdma_read++;
+ break;
+ }
+
+ /* We have already verified that this request is the head of the queue. */
+ STAILQ_REMOVE_HEAD(&rqpair->pending_rdma_read_queue, state_link);
+
+ rc = request_transfer_in(&rdma_req->req);
+ if (!rc) {
+ rdma_req->state = RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER;
+ } else {
+ rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE;
+ }
+ break;
+ case RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+ /* Some external code must kick a request into RDMA_REQUEST_STATE_READY_TO_EXECUTE
+ * to escape this state. */
+ break;
+ case RDMA_REQUEST_STATE_READY_TO_EXECUTE:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_READY_TO_EXECUTE, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+
+ if (spdk_unlikely(rdma_req->req.dif.dif_insert_or_strip)) {
+ if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
+ /* generate DIF for write operation */
+ num_blocks = SPDK_CEIL_DIV(rdma_req->req.dif.elba_length, rdma_req->req.dif.dif_ctx.block_size);
+ assert(num_blocks > 0);
+
+ rc = spdk_dif_generate(rdma_req->req.iov, rdma_req->req.iovcnt,
+ num_blocks, &rdma_req->req.dif.dif_ctx);
+ if (rc != 0) {
+ SPDK_ERRLOG("DIF generation failed\n");
+ rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
+ nvmf_rdma_start_disconnect(rqpair);
+ break;
+ }
+ }
+
+ assert(rdma_req->req.dif.elba_length >= rdma_req->req.length);
+ /* set extended length before IO operation */
+ rdma_req->req.length = rdma_req->req.dif.elba_length;
+ }
+
+ rdma_req->state = RDMA_REQUEST_STATE_EXECUTING;
+ spdk_nvmf_request_exec(&rdma_req->req);
+ break;
+ case RDMA_REQUEST_STATE_EXECUTING:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_EXECUTING, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+ /* Some external code must kick a request into RDMA_REQUEST_STATE_EXECUTED
+ * to escape this state. */
+ break;
+ case RDMA_REQUEST_STATE_EXECUTED:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_EXECUTED, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+ if (rsp->status.sc == SPDK_NVME_SC_SUCCESS &&
+ rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+ STAILQ_INSERT_TAIL(&rqpair->pending_rdma_write_queue, rdma_req, state_link);
+ rdma_req->state = RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING;
+ } else {
+ rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE;
+ }
+ if (spdk_unlikely(rdma_req->req.dif.dif_insert_or_strip)) {
+ /* restore the original length */
+ rdma_req->req.length = rdma_req->req.dif.orig_length;
+
+ if (rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+ struct spdk_dif_error error_blk;
+
+ num_blocks = SPDK_CEIL_DIV(rdma_req->req.dif.elba_length, rdma_req->req.dif.dif_ctx.block_size);
+
+ rc = spdk_dif_verify(rdma_req->req.iov, rdma_req->req.iovcnt, num_blocks,
+ &rdma_req->req.dif.dif_ctx, &error_blk);
+ if (rc) {
+ struct spdk_nvme_cpl *rsp = &rdma_req->req.rsp->nvme_cpl;
+
+ SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n", error_blk.err_type,
+ error_blk.err_offset);
+ rsp->status.sct = SPDK_NVME_SCT_MEDIA_ERROR;
+ rsp->status.sc = nvmf_rdma_dif_error_to_compl_status(error_blk.err_type);
+ rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE;
+ STAILQ_REMOVE(&rqpair->pending_rdma_write_queue, rdma_req, spdk_nvmf_rdma_request, state_link);
+ }
+ }
+ }
+ break;
+ case RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+
+ if (rdma_req != STAILQ_FIRST(&rqpair->pending_rdma_write_queue)) {
+ /* This request needs to wait in line to perform RDMA */
+ break;
+ }
+ if ((rqpair->current_send_depth + rdma_req->num_outstanding_data_wr + 1) >
+ rqpair->max_send_depth) {
+ /* We can only have so many WRs outstanding. we have to wait until some finish.
+ * +1 since each request has an additional wr in the resp. */
+ rqpair->poller->stat.pending_rdma_write++;
+ break;
+ }
+
+ /* We have already verified that this request is the head of the queue. */
+ STAILQ_REMOVE_HEAD(&rqpair->pending_rdma_write_queue, state_link);
+
+ /* The data transfer will be kicked off from
+ * RDMA_REQUEST_STATE_READY_TO_COMPLETE state.
+ */
+ rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE;
+ break;
+ case RDMA_REQUEST_STATE_READY_TO_COMPLETE:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+ rc = request_transfer_out(&rdma_req->req, &data_posted);
+ assert(rc == 0); /* No good way to handle this currently */
+ if (rc) {
+ rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
+ } else {
+ rdma_req->state = data_posted ? RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST :
+ RDMA_REQUEST_STATE_COMPLETING;
+ }
+ break;
+ case RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+ /* Some external code must kick a request into RDMA_REQUEST_STATE_COMPLETED
+ * to escape this state. */
+ break;
+ case RDMA_REQUEST_STATE_COMPLETING:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_COMPLETING, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+ /* Some external code must kick a request into RDMA_REQUEST_STATE_COMPLETED
+ * to escape this state. */
+ break;
+ case RDMA_REQUEST_STATE_COMPLETED:
+ spdk_trace_record(TRACE_RDMA_REQUEST_STATE_COMPLETED, 0, 0,
+ (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+
+ rqpair->poller->stat.request_latency += spdk_get_ticks() - rdma_req->receive_tsc;
+ _nvmf_rdma_request_free(rdma_req, rtransport);
+ break;
+ case RDMA_REQUEST_NUM_STATES:
+ default:
+ assert(0);
+ break;
+ }
+
+ if (rdma_req->state != prev_state) {
+ progress = true;
+ }
+ } while (rdma_req->state != prev_state);
+
+ return progress;
+}
+
+/* Public API callbacks begin here */
+
+#define SPDK_NVMF_RDMA_DEFAULT_MAX_QUEUE_DEPTH 128
+#define SPDK_NVMF_RDMA_DEFAULT_AQ_DEPTH 128
+#define SPDK_NVMF_RDMA_DEFAULT_SRQ_DEPTH 4096
+#define SPDK_NVMF_RDMA_DEFAULT_MAX_QPAIRS_PER_CTRLR 128
+#define SPDK_NVMF_RDMA_DEFAULT_IN_CAPSULE_DATA_SIZE 4096
+#define SPDK_NVMF_RDMA_DEFAULT_MAX_IO_SIZE 131072
+#define SPDK_NVMF_RDMA_MIN_IO_BUFFER_SIZE (SPDK_NVMF_RDMA_DEFAULT_MAX_IO_SIZE / SPDK_NVMF_MAX_SGL_ENTRIES)
+#define SPDK_NVMF_RDMA_DEFAULT_NUM_SHARED_BUFFERS 4095
+#define SPDK_NVMF_RDMA_DEFAULT_BUFFER_CACHE_SIZE 32
+#define SPDK_NVMF_RDMA_DEFAULT_NO_SRQ false
+#define SPDK_NVMF_RDMA_DIF_INSERT_OR_STRIP false
+#define SPDK_NVMF_RDMA_ACCEPTOR_BACKLOG 100
+#define SPDK_NVMF_RDMA_DEFAULT_ABORT_TIMEOUT_SEC 1
+
+static void
+nvmf_rdma_opts_init(struct spdk_nvmf_transport_opts *opts)
+{
+ opts->max_queue_depth = SPDK_NVMF_RDMA_DEFAULT_MAX_QUEUE_DEPTH;
+ opts->max_qpairs_per_ctrlr = SPDK_NVMF_RDMA_DEFAULT_MAX_QPAIRS_PER_CTRLR;
+ opts->in_capsule_data_size = SPDK_NVMF_RDMA_DEFAULT_IN_CAPSULE_DATA_SIZE;
+ opts->max_io_size = SPDK_NVMF_RDMA_DEFAULT_MAX_IO_SIZE;
+ opts->io_unit_size = SPDK_NVMF_RDMA_MIN_IO_BUFFER_SIZE;
+ opts->max_aq_depth = SPDK_NVMF_RDMA_DEFAULT_AQ_DEPTH;
+ opts->num_shared_buffers = SPDK_NVMF_RDMA_DEFAULT_NUM_SHARED_BUFFERS;
+ opts->buf_cache_size = SPDK_NVMF_RDMA_DEFAULT_BUFFER_CACHE_SIZE;
+ opts->max_srq_depth = SPDK_NVMF_RDMA_DEFAULT_SRQ_DEPTH;
+ opts->no_srq = SPDK_NVMF_RDMA_DEFAULT_NO_SRQ;
+ opts->dif_insert_or_strip = SPDK_NVMF_RDMA_DIF_INSERT_OR_STRIP;
+ opts->acceptor_backlog = SPDK_NVMF_RDMA_ACCEPTOR_BACKLOG;
+ opts->abort_timeout_sec = SPDK_NVMF_RDMA_DEFAULT_ABORT_TIMEOUT_SEC;
+}
+
+const struct spdk_mem_map_ops g_nvmf_rdma_map_ops = {
+ .notify_cb = nvmf_rdma_mem_notify,
+ .are_contiguous = nvmf_rdma_check_contiguous_entries
+};
+
+static int nvmf_rdma_destroy(struct spdk_nvmf_transport *transport);
+
+static struct spdk_nvmf_transport *
+nvmf_rdma_create(struct spdk_nvmf_transport_opts *opts)
+{
+ int rc;
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_device *device, *tmp;
+ struct ibv_context **contexts;
+ uint32_t i;
+ int flag;
+ uint32_t sge_count;
+ uint32_t min_shared_buffers;
+ int max_device_sge = SPDK_NVMF_MAX_SGL_ENTRIES;
+ pthread_mutexattr_t attr;
+
+ rtransport = calloc(1, sizeof(*rtransport));
+ if (!rtransport) {
+ return NULL;
+ }
+
+ if (pthread_mutexattr_init(&attr)) {
+ SPDK_ERRLOG("pthread_mutexattr_init() failed\n");
+ free(rtransport);
+ return NULL;
+ }
+
+ if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) {
+ SPDK_ERRLOG("pthread_mutexattr_settype() failed\n");
+ pthread_mutexattr_destroy(&attr);
+ free(rtransport);
+ return NULL;
+ }
+
+ if (pthread_mutex_init(&rtransport->lock, &attr)) {
+ SPDK_ERRLOG("pthread_mutex_init() failed\n");
+ pthread_mutexattr_destroy(&attr);
+ free(rtransport);
+ return NULL;
+ }
+
+ pthread_mutexattr_destroy(&attr);
+
+ TAILQ_INIT(&rtransport->devices);
+ TAILQ_INIT(&rtransport->ports);
+ TAILQ_INIT(&rtransport->poll_groups);
+
+ rtransport->transport.ops = &spdk_nvmf_transport_rdma;
+
+ SPDK_INFOLOG(SPDK_LOG_RDMA, "*** RDMA Transport Init ***\n"
+ " Transport opts: max_ioq_depth=%d, max_io_size=%d,\n"
+ " max_io_qpairs_per_ctrlr=%d, io_unit_size=%d,\n"
+ " in_capsule_data_size=%d, max_aq_depth=%d,\n"
+ " num_shared_buffers=%d, max_srq_depth=%d, no_srq=%d,"
+ " acceptor_backlog=%d, abort_timeout_sec=%d\n",
+ opts->max_queue_depth,
+ opts->max_io_size,
+ opts->max_qpairs_per_ctrlr - 1,
+ opts->io_unit_size,
+ opts->in_capsule_data_size,
+ opts->max_aq_depth,
+ opts->num_shared_buffers,
+ opts->max_srq_depth,
+ opts->no_srq,
+ opts->acceptor_backlog,
+ opts->abort_timeout_sec);
+
+ /* I/O unit size cannot be larger than max I/O size */
+ if (opts->io_unit_size > opts->max_io_size) {
+ opts->io_unit_size = opts->max_io_size;
+ }
+
+ if (opts->acceptor_backlog <= 0) {
+ SPDK_ERRLOG("The acceptor backlog cannot be less than 1, setting to the default value of (%d).\n",
+ SPDK_NVMF_RDMA_ACCEPTOR_BACKLOG);
+ opts->acceptor_backlog = SPDK_NVMF_RDMA_ACCEPTOR_BACKLOG;
+ }
+
+ if (opts->num_shared_buffers < (SPDK_NVMF_MAX_SGL_ENTRIES * 2)) {
+ SPDK_ERRLOG("The number of shared data buffers (%d) is less than"
+ "the minimum number required to guarantee that forward progress can be made (%d)\n",
+ opts->num_shared_buffers, (SPDK_NVMF_MAX_SGL_ENTRIES * 2));
+ nvmf_rdma_destroy(&rtransport->transport);
+ return NULL;
+ }
+
+ min_shared_buffers = spdk_thread_get_count() * opts->buf_cache_size;
+ if (min_shared_buffers > opts->num_shared_buffers) {
+ SPDK_ERRLOG("There are not enough buffers to satisfy"
+ "per-poll group caches for each thread. (%" PRIu32 ")"
+ "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers);
+ SPDK_ERRLOG("Please specify a larger number of shared buffers\n");
+ nvmf_rdma_destroy(&rtransport->transport);
+ return NULL;
+ }
+
+ sge_count = opts->max_io_size / opts->io_unit_size;
+ if (sge_count > NVMF_DEFAULT_TX_SGE) {
+ SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size);
+ nvmf_rdma_destroy(&rtransport->transport);
+ return NULL;
+ }
+
+ rtransport->event_channel = rdma_create_event_channel();
+ if (rtransport->event_channel == NULL) {
+ SPDK_ERRLOG("rdma_create_event_channel() failed, %s\n", spdk_strerror(errno));
+ nvmf_rdma_destroy(&rtransport->transport);
+ return NULL;
+ }
+
+ flag = fcntl(rtransport->event_channel->fd, F_GETFL);
+ if (fcntl(rtransport->event_channel->fd, F_SETFL, flag | O_NONBLOCK) < 0) {
+ SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%s)\n",
+ rtransport->event_channel->fd, spdk_strerror(errno));
+ nvmf_rdma_destroy(&rtransport->transport);
+ return NULL;
+ }
+
+ rtransport->data_wr_pool = spdk_mempool_create("spdk_nvmf_rdma_wr_data",
+ opts->max_queue_depth * SPDK_NVMF_MAX_SGL_ENTRIES,
+ sizeof(struct spdk_nvmf_rdma_request_data),
+ SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
+ SPDK_ENV_SOCKET_ID_ANY);
+ if (!rtransport->data_wr_pool) {
+ SPDK_ERRLOG("Unable to allocate work request pool for poll group\n");
+ nvmf_rdma_destroy(&rtransport->transport);
+ return NULL;
+ }
+
+ contexts = rdma_get_devices(NULL);
+ if (contexts == NULL) {
+ SPDK_ERRLOG("rdma_get_devices() failed: %s (%d)\n", spdk_strerror(errno), errno);
+ nvmf_rdma_destroy(&rtransport->transport);
+ return NULL;
+ }
+
+ i = 0;
+ rc = 0;
+ while (contexts[i] != NULL) {
+ device = calloc(1, sizeof(*device));
+ if (!device) {
+ SPDK_ERRLOG("Unable to allocate memory for RDMA devices.\n");
+ rc = -ENOMEM;
+ break;
+ }
+ device->context = contexts[i];
+ rc = ibv_query_device(device->context, &device->attr);
+ if (rc < 0) {
+ SPDK_ERRLOG("Failed to query RDMA device attributes.\n");
+ free(device);
+ break;
+
+ }
+
+ max_device_sge = spdk_min(max_device_sge, device->attr.max_sge);
+
+#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
+ if ((device->attr.device_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) == 0) {
+ SPDK_WARNLOG("The libibverbs on this system supports SEND_WITH_INVALIDATE,");
+ SPDK_WARNLOG("but the device with vendor ID %u does not.\n", device->attr.vendor_id);
+ }
+
+ /**
+ * The vendor ID is assigned by the IEEE and an ID of 0 implies Soft-RoCE.
+ * The Soft-RoCE RXE driver does not currently support send with invalidate,
+ * but incorrectly reports that it does. There are changes making their way
+ * through the kernel now that will enable this feature. When they are merged,
+ * we can conditionally enable this feature.
+ *
+ * TODO: enable this for versions of the kernel rxe driver that support it.
+ */
+ if (device->attr.vendor_id == 0) {
+ device->attr.device_cap_flags &= ~(IBV_DEVICE_MEM_MGT_EXTENSIONS);
+ }
+#endif
+
+ /* set up device context async ev fd as NON_BLOCKING */
+ flag = fcntl(device->context->async_fd, F_GETFL);
+ rc = fcntl(device->context->async_fd, F_SETFL, flag | O_NONBLOCK);
+ if (rc < 0) {
+ SPDK_ERRLOG("Failed to set context async fd to NONBLOCK.\n");
+ free(device);
+ break;
+ }
+
+ TAILQ_INSERT_TAIL(&rtransport->devices, device, link);
+ i++;
+
+ if (g_nvmf_hooks.get_ibv_pd) {
+ device->pd = g_nvmf_hooks.get_ibv_pd(NULL, device->context);
+ } else {
+ device->pd = ibv_alloc_pd(device->context);
+ }
+
+ if (!device->pd) {
+ SPDK_ERRLOG("Unable to allocate protection domain.\n");
+ rc = -ENOMEM;
+ break;
+ }
+
+ assert(device->map == NULL);
+
+ device->map = spdk_mem_map_alloc(0, &g_nvmf_rdma_map_ops, device->pd);
+ if (!device->map) {
+ SPDK_ERRLOG("Unable to allocate memory map for listen address\n");
+ rc = -ENOMEM;
+ break;
+ }
+
+ assert(device->map != NULL);
+ assert(device->pd != NULL);
+ }
+ rdma_free_devices(contexts);
+
+ if (opts->io_unit_size * max_device_sge < opts->max_io_size) {
+ /* divide and round up. */
+ opts->io_unit_size = (opts->max_io_size + max_device_sge - 1) / max_device_sge;
+
+ /* round up to the nearest 4k. */
+ opts->io_unit_size = (opts->io_unit_size + NVMF_DATA_BUFFER_ALIGNMENT - 1) & ~NVMF_DATA_BUFFER_MASK;
+
+ opts->io_unit_size = spdk_max(opts->io_unit_size, SPDK_NVMF_RDMA_MIN_IO_BUFFER_SIZE);
+ SPDK_NOTICELOG("Adjusting the io unit size to fit the device's maximum I/O size. New I/O unit size %u\n",
+ opts->io_unit_size);
+ }
+
+ if (rc < 0) {
+ nvmf_rdma_destroy(&rtransport->transport);
+ return NULL;
+ }
+
+ /* Set up poll descriptor array to monitor events from RDMA and IB
+ * in a single poll syscall
+ */
+ rtransport->npoll_fds = i + 1;
+ i = 0;
+ rtransport->poll_fds = calloc(rtransport->npoll_fds, sizeof(struct pollfd));
+ if (rtransport->poll_fds == NULL) {
+ SPDK_ERRLOG("poll_fds allocation failed\n");
+ nvmf_rdma_destroy(&rtransport->transport);
+ return NULL;
+ }
+
+ rtransport->poll_fds[i].fd = rtransport->event_channel->fd;
+ rtransport->poll_fds[i++].events = POLLIN;
+
+ TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, tmp) {
+ rtransport->poll_fds[i].fd = device->context->async_fd;
+ rtransport->poll_fds[i++].events = POLLIN;
+ }
+
+ return &rtransport->transport;
+}
+
+static int
+nvmf_rdma_destroy(struct spdk_nvmf_transport *transport)
+{
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_port *port, *port_tmp;
+ struct spdk_nvmf_rdma_device *device, *device_tmp;
+
+ rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+
+ TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, port_tmp) {
+ TAILQ_REMOVE(&rtransport->ports, port, link);
+ rdma_destroy_id(port->id);
+ free(port);
+ }
+
+ if (rtransport->poll_fds != NULL) {
+ free(rtransport->poll_fds);
+ }
+
+ if (rtransport->event_channel != NULL) {
+ rdma_destroy_event_channel(rtransport->event_channel);
+ }
+
+ TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, device_tmp) {
+ TAILQ_REMOVE(&rtransport->devices, device, link);
+ if (device->map) {
+ spdk_mem_map_free(&device->map);
+ }
+ if (device->pd) {
+ if (!g_nvmf_hooks.get_ibv_pd) {
+ ibv_dealloc_pd(device->pd);
+ }
+ }
+ free(device);
+ }
+
+ if (rtransport->data_wr_pool != NULL) {
+ if (spdk_mempool_count(rtransport->data_wr_pool) !=
+ (transport->opts.max_queue_depth * SPDK_NVMF_MAX_SGL_ENTRIES)) {
+ SPDK_ERRLOG("transport wr pool count is %zu but should be %u\n",
+ spdk_mempool_count(rtransport->data_wr_pool),
+ transport->opts.max_queue_depth * SPDK_NVMF_MAX_SGL_ENTRIES);
+ }
+ }
+
+ spdk_mempool_free(rtransport->data_wr_pool);
+
+ pthread_mutex_destroy(&rtransport->lock);
+ free(rtransport);
+
+ return 0;
+}
+
+static int
+nvmf_rdma_trid_from_cm_id(struct rdma_cm_id *id,
+ struct spdk_nvme_transport_id *trid,
+ bool peer);
+
+static int
+nvmf_rdma_listen(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_device *device;
+ struct spdk_nvmf_rdma_port *port;
+ struct addrinfo *res;
+ struct addrinfo hints;
+ int family;
+ int rc;
+
+ rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+ assert(rtransport->event_channel != NULL);
+
+ pthread_mutex_lock(&rtransport->lock);
+ port = calloc(1, sizeof(*port));
+ if (!port) {
+ SPDK_ERRLOG("Port allocation failed\n");
+ pthread_mutex_unlock(&rtransport->lock);
+ return -ENOMEM;
+ }
+
+ port->trid = trid;
+
+ switch (trid->adrfam) {
+ case SPDK_NVMF_ADRFAM_IPV4:
+ family = AF_INET;
+ break;
+ case SPDK_NVMF_ADRFAM_IPV6:
+ family = AF_INET6;
+ break;
+ default:
+ SPDK_ERRLOG("Unhandled ADRFAM %d\n", trid->adrfam);
+ free(port);
+ pthread_mutex_unlock(&rtransport->lock);
+ return -EINVAL;
+ }
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = family;
+ hints.ai_flags = AI_NUMERICSERV;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_protocol = 0;
+
+ rc = getaddrinfo(trid->traddr, trid->trsvcid, &hints, &res);
+ if (rc) {
+ SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(rc), rc);
+ free(port);
+ pthread_mutex_unlock(&rtransport->lock);
+ return -EINVAL;
+ }
+
+ rc = rdma_create_id(rtransport->event_channel, &port->id, port, RDMA_PS_TCP);
+ if (rc < 0) {
+ SPDK_ERRLOG("rdma_create_id() failed\n");
+ freeaddrinfo(res);
+ free(port);
+ pthread_mutex_unlock(&rtransport->lock);
+ return rc;
+ }
+
+ rc = rdma_bind_addr(port->id, res->ai_addr);
+ freeaddrinfo(res);
+
+ if (rc < 0) {
+ SPDK_ERRLOG("rdma_bind_addr() failed\n");
+ rdma_destroy_id(port->id);
+ free(port);
+ pthread_mutex_unlock(&rtransport->lock);
+ return rc;
+ }
+
+ if (!port->id->verbs) {
+ SPDK_ERRLOG("ibv_context is null\n");
+ rdma_destroy_id(port->id);
+ free(port);
+ pthread_mutex_unlock(&rtransport->lock);
+ return -1;
+ }
+
+ rc = rdma_listen(port->id, transport->opts.acceptor_backlog);
+ if (rc < 0) {
+ SPDK_ERRLOG("rdma_listen() failed\n");
+ rdma_destroy_id(port->id);
+ free(port);
+ pthread_mutex_unlock(&rtransport->lock);
+ return rc;
+ }
+
+ TAILQ_FOREACH(device, &rtransport->devices, link) {
+ if (device->context == port->id->verbs) {
+ port->device = device;
+ break;
+ }
+ }
+ if (!port->device) {
+ SPDK_ERRLOG("Accepted a connection with verbs %p, but unable to find a corresponding device.\n",
+ port->id->verbs);
+ rdma_destroy_id(port->id);
+ free(port);
+ pthread_mutex_unlock(&rtransport->lock);
+ return -EINVAL;
+ }
+
+ SPDK_NOTICELOG("*** NVMe/RDMA Target Listening on %s port %s ***\n",
+ trid->traddr, trid->trsvcid);
+
+ TAILQ_INSERT_TAIL(&rtransport->ports, port, link);
+ pthread_mutex_unlock(&rtransport->lock);
+ return 0;
+}
+
+static void
+nvmf_rdma_stop_listen(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_port *port, *tmp;
+
+ rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+
+ pthread_mutex_lock(&rtransport->lock);
+ TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, tmp) {
+ if (spdk_nvme_transport_id_compare(port->trid, trid) == 0) {
+ TAILQ_REMOVE(&rtransport->ports, port, link);
+ rdma_destroy_id(port->id);
+ free(port);
+ break;
+ }
+ }
+
+ pthread_mutex_unlock(&rtransport->lock);
+}
+
+static void
+nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport,
+ struct spdk_nvmf_rdma_qpair *rqpair, bool drain)
+{
+ struct spdk_nvmf_request *req, *tmp;
+ struct spdk_nvmf_rdma_request *rdma_req, *req_tmp;
+ struct spdk_nvmf_rdma_resources *resources;
+
+ /* We process I/O in the data transfer pending queue at the highest priority. RDMA reads first */
+ STAILQ_FOREACH_SAFE(rdma_req, &rqpair->pending_rdma_read_queue, state_link, req_tmp) {
+ if (nvmf_rdma_request_process(rtransport, rdma_req) == false && drain == false) {
+ break;
+ }
+ }
+
+ /* Then RDMA writes since reads have stronger restrictions than writes */
+ STAILQ_FOREACH_SAFE(rdma_req, &rqpair->pending_rdma_write_queue, state_link, req_tmp) {
+ if (nvmf_rdma_request_process(rtransport, rdma_req) == false && drain == false) {
+ break;
+ }
+ }
+
+ /* The second highest priority is I/O waiting on memory buffers. */
+ STAILQ_FOREACH_SAFE(req, &rqpair->poller->group->group.pending_buf_queue, buf_link, tmp) {
+ rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req);
+ if (nvmf_rdma_request_process(rtransport, rdma_req) == false && drain == false) {
+ break;
+ }
+ }
+
+ resources = rqpair->resources;
+ while (!STAILQ_EMPTY(&resources->free_queue) && !STAILQ_EMPTY(&resources->incoming_queue)) {
+ rdma_req = STAILQ_FIRST(&resources->free_queue);
+ STAILQ_REMOVE_HEAD(&resources->free_queue, state_link);
+ rdma_req->recv = STAILQ_FIRST(&resources->incoming_queue);
+ STAILQ_REMOVE_HEAD(&resources->incoming_queue, link);
+
+ if (rqpair->srq != NULL) {
+ rdma_req->req.qpair = &rdma_req->recv->qpair->qpair;
+ rdma_req->recv->qpair->qd++;
+ } else {
+ rqpair->qd++;
+ }
+
+ rdma_req->receive_tsc = rdma_req->recv->receive_tsc;
+ rdma_req->state = RDMA_REQUEST_STATE_NEW;
+ if (nvmf_rdma_request_process(rtransport, rdma_req) == false) {
+ break;
+ }
+ }
+ if (!STAILQ_EMPTY(&resources->incoming_queue) && STAILQ_EMPTY(&resources->free_queue)) {
+ rqpair->poller->stat.pending_free_request++;
+ }
+}
+
+static void
+_nvmf_rdma_qpair_disconnect(void *ctx)
+{
+ struct spdk_nvmf_qpair *qpair = ctx;
+
+ spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
+}
+
+static void
+_nvmf_rdma_try_disconnect(void *ctx)
+{
+ struct spdk_nvmf_qpair *qpair = ctx;
+ struct spdk_nvmf_poll_group *group;
+
+ /* Read the group out of the qpair. This is normally set and accessed only from
+ * the thread that created the group. Here, we're not on that thread necessarily.
+ * The data member qpair->group begins it's life as NULL and then is assigned to
+ * a pointer and never changes. So fortunately reading this and checking for
+ * non-NULL is thread safe in the x86_64 memory model. */
+ group = qpair->group;
+
+ if (group == NULL) {
+ /* The qpair hasn't been assigned to a group yet, so we can't
+ * process a disconnect. Send a message to ourself and try again. */
+ spdk_thread_send_msg(spdk_get_thread(), _nvmf_rdma_try_disconnect, qpair);
+ return;
+ }
+
+ spdk_thread_send_msg(group->thread, _nvmf_rdma_qpair_disconnect, qpair);
+}
+
+static inline void
+nvmf_rdma_start_disconnect(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ if (!__atomic_test_and_set(&rqpair->disconnect_started, __ATOMIC_RELAXED)) {
+ _nvmf_rdma_try_disconnect(&rqpair->qpair);
+ }
+}
+
+static void nvmf_rdma_destroy_drained_qpair(void *ctx)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair = ctx;
+ struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport,
+ struct spdk_nvmf_rdma_transport, transport);
+
+ /* In non SRQ path, we will reach rqpair->max_queue_depth. In SRQ path, we will get the last_wqe event. */
+ if (rqpair->current_send_depth != 0) {
+ return;
+ }
+
+ if (rqpair->srq == NULL && rqpair->current_recv_depth != rqpair->max_queue_depth) {
+ return;
+ }
+
+ if (rqpair->srq != NULL && rqpair->last_wqe_reached == false) {
+ return;
+ }
+
+ nvmf_rdma_qpair_process_pending(rtransport, rqpair, true);
+
+ /* Qpair will be destroyed after nvmf layer closes this qpair */
+ if (rqpair->qpair.state != SPDK_NVMF_QPAIR_ERROR) {
+ return;
+ }
+
+ nvmf_rdma_qpair_destroy(rqpair);
+}
+
+
+static int
+nvmf_rdma_disconnect(struct rdma_cm_event *evt)
+{
+ struct spdk_nvmf_qpair *qpair;
+ struct spdk_nvmf_rdma_qpair *rqpair;
+
+ if (evt->id == NULL) {
+ SPDK_ERRLOG("disconnect request: missing cm_id\n");
+ return -1;
+ }
+
+ qpair = evt->id->context;
+ if (qpair == NULL) {
+ SPDK_ERRLOG("disconnect request: no active connection\n");
+ return -1;
+ }
+
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ spdk_trace_record(TRACE_RDMA_QP_DISCONNECT, 0, 0, (uintptr_t)rqpair->cm_id, 0);
+
+ nvmf_rdma_start_disconnect(rqpair);
+
+ return 0;
+}
+
+#ifdef DEBUG
+static const char *CM_EVENT_STR[] = {
+ "RDMA_CM_EVENT_ADDR_RESOLVED",
+ "RDMA_CM_EVENT_ADDR_ERROR",
+ "RDMA_CM_EVENT_ROUTE_RESOLVED",
+ "RDMA_CM_EVENT_ROUTE_ERROR",
+ "RDMA_CM_EVENT_CONNECT_REQUEST",
+ "RDMA_CM_EVENT_CONNECT_RESPONSE",
+ "RDMA_CM_EVENT_CONNECT_ERROR",
+ "RDMA_CM_EVENT_UNREACHABLE",
+ "RDMA_CM_EVENT_REJECTED",
+ "RDMA_CM_EVENT_ESTABLISHED",
+ "RDMA_CM_EVENT_DISCONNECTED",
+ "RDMA_CM_EVENT_DEVICE_REMOVAL",
+ "RDMA_CM_EVENT_MULTICAST_JOIN",
+ "RDMA_CM_EVENT_MULTICAST_ERROR",
+ "RDMA_CM_EVENT_ADDR_CHANGE",
+ "RDMA_CM_EVENT_TIMEWAIT_EXIT"
+};
+#endif /* DEBUG */
+
+static void
+nvmf_rdma_disconnect_qpairs_on_port(struct spdk_nvmf_rdma_transport *rtransport,
+ struct spdk_nvmf_rdma_port *port)
+{
+ struct spdk_nvmf_rdma_poll_group *rgroup;
+ struct spdk_nvmf_rdma_poller *rpoller;
+ struct spdk_nvmf_rdma_qpair *rqpair;
+
+ TAILQ_FOREACH(rgroup, &rtransport->poll_groups, link) {
+ TAILQ_FOREACH(rpoller, &rgroup->pollers, link) {
+ TAILQ_FOREACH(rqpair, &rpoller->qpairs, link) {
+ if (rqpair->listen_id == port->id) {
+ nvmf_rdma_start_disconnect(rqpair);
+ }
+ }
+ }
+ }
+}
+
+static bool
+nvmf_rdma_handle_cm_event_addr_change(struct spdk_nvmf_transport *transport,
+ struct rdma_cm_event *event)
+{
+ const struct spdk_nvme_transport_id *trid;
+ struct spdk_nvmf_rdma_port *port;
+ struct spdk_nvmf_rdma_transport *rtransport;
+ bool event_acked = false;
+
+ rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+ TAILQ_FOREACH(port, &rtransport->ports, link) {
+ if (port->id == event->id) {
+ SPDK_ERRLOG("ADDR_CHANGE: IP %s:%s migrated\n", port->trid->traddr, port->trid->trsvcid);
+ rdma_ack_cm_event(event);
+ event_acked = true;
+ trid = port->trid;
+ break;
+ }
+ }
+
+ if (event_acked) {
+ nvmf_rdma_disconnect_qpairs_on_port(rtransport, port);
+
+ nvmf_rdma_stop_listen(transport, trid);
+ nvmf_rdma_listen(transport, trid);
+ }
+
+ return event_acked;
+}
+
+static void
+nvmf_rdma_handle_cm_event_port_removal(struct spdk_nvmf_transport *transport,
+ struct rdma_cm_event *event)
+{
+ struct spdk_nvmf_rdma_port *port;
+ struct spdk_nvmf_rdma_transport *rtransport;
+
+ port = event->id->context;
+ rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+
+ SPDK_NOTICELOG("Port %s:%s is being removed\n", port->trid->traddr, port->trid->trsvcid);
+
+ nvmf_rdma_disconnect_qpairs_on_port(rtransport, port);
+
+ rdma_ack_cm_event(event);
+
+ while (spdk_nvmf_transport_stop_listen(transport, port->trid) == 0) {
+ ;
+ }
+}
+
+static void
+nvmf_process_cm_event(struct spdk_nvmf_transport *transport)
+{
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct rdma_cm_event *event;
+ int rc;
+ bool event_acked;
+
+ rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+
+ if (rtransport->event_channel == NULL) {
+ return;
+ }
+
+ while (1) {
+ event_acked = false;
+ rc = rdma_get_cm_event(rtransport->event_channel, &event);
+ if (rc) {
+ if (errno != EAGAIN && errno != EWOULDBLOCK) {
+ SPDK_ERRLOG("Acceptor Event Error: %s\n", spdk_strerror(errno));
+ }
+ break;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Acceptor Event: %s\n", CM_EVENT_STR[event->event]);
+
+ spdk_trace_record(TRACE_RDMA_CM_ASYNC_EVENT, 0, 0, 0, event->event);
+
+ switch (event->event) {
+ case RDMA_CM_EVENT_ADDR_RESOLVED:
+ case RDMA_CM_EVENT_ADDR_ERROR:
+ case RDMA_CM_EVENT_ROUTE_RESOLVED:
+ case RDMA_CM_EVENT_ROUTE_ERROR:
+ /* No action required. The target never attempts to resolve routes. */
+ break;
+ case RDMA_CM_EVENT_CONNECT_REQUEST:
+ rc = nvmf_rdma_connect(transport, event);
+ if (rc < 0) {
+ SPDK_ERRLOG("Unable to process connect event. rc: %d\n", rc);
+ break;
+ }
+ break;
+ case RDMA_CM_EVENT_CONNECT_RESPONSE:
+ /* The target never initiates a new connection. So this will not occur. */
+ break;
+ case RDMA_CM_EVENT_CONNECT_ERROR:
+ /* Can this happen? The docs say it can, but not sure what causes it. */
+ break;
+ case RDMA_CM_EVENT_UNREACHABLE:
+ case RDMA_CM_EVENT_REJECTED:
+ /* These only occur on the client side. */
+ break;
+ case RDMA_CM_EVENT_ESTABLISHED:
+ /* TODO: Should we be waiting for this event anywhere? */
+ break;
+ case RDMA_CM_EVENT_DISCONNECTED:
+ rc = nvmf_rdma_disconnect(event);
+ if (rc < 0) {
+ SPDK_ERRLOG("Unable to process disconnect event. rc: %d\n", rc);
+ break;
+ }
+ break;
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ /* In case of device removal, kernel IB part triggers IBV_EVENT_DEVICE_FATAL
+ * which triggers RDMA_CM_EVENT_DEVICE_REMOVAL on all cma_id’s.
+ * Once these events are sent to SPDK, we should release all IB resources and
+ * don't make attempts to call any ibv_query/modify/create functions. We can only call
+ * ibv_destory* functions to release user space memory allocated by IB. All kernel
+ * resources are already cleaned. */
+ if (event->id->qp) {
+ /* If rdma_cm event has a valid `qp` pointer then the event refers to the
+ * corresponding qpair. Otherwise the event refers to a listening device */
+ rc = nvmf_rdma_disconnect(event);
+ if (rc < 0) {
+ SPDK_ERRLOG("Unable to process disconnect event. rc: %d\n", rc);
+ break;
+ }
+ } else {
+ nvmf_rdma_handle_cm_event_port_removal(transport, event);
+ event_acked = true;
+ }
+ break;
+ case RDMA_CM_EVENT_MULTICAST_JOIN:
+ case RDMA_CM_EVENT_MULTICAST_ERROR:
+ /* Multicast is not used */
+ break;
+ case RDMA_CM_EVENT_ADDR_CHANGE:
+ event_acked = nvmf_rdma_handle_cm_event_addr_change(transport, event);
+ break;
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+ /* For now, do nothing. The target never re-uses queue pairs. */
+ break;
+ default:
+ SPDK_ERRLOG("Unexpected Acceptor Event [%d]\n", event->event);
+ break;
+ }
+ if (!event_acked) {
+ rdma_ack_cm_event(event);
+ }
+ }
+}
+
+static void
+nvmf_rdma_handle_qp_fatal(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ nvmf_rdma_update_ibv_state(rqpair);
+ nvmf_rdma_start_disconnect(rqpair);
+}
+
+static void
+nvmf_rdma_handle_last_wqe_reached(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ rqpair->last_wqe_reached = true;
+ nvmf_rdma_destroy_drained_qpair(rqpair);
+}
+
+static void
+nvmf_rdma_handle_sq_drained(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ nvmf_rdma_start_disconnect(rqpair);
+}
+
+static void
+nvmf_rdma_qpair_process_ibv_event(void *ctx)
+{
+ struct spdk_nvmf_rdma_ibv_event_ctx *event_ctx = ctx;
+
+ if (event_ctx->rqpair) {
+ STAILQ_REMOVE(&event_ctx->rqpair->ibv_events, event_ctx, spdk_nvmf_rdma_ibv_event_ctx, link);
+ if (event_ctx->cb_fn) {
+ event_ctx->cb_fn(event_ctx->rqpair);
+ }
+ }
+ free(event_ctx);
+}
+
+static int
+nvmf_rdma_send_qpair_async_event(struct spdk_nvmf_rdma_qpair *rqpair,
+ spdk_nvmf_rdma_qpair_ibv_event fn)
+{
+ struct spdk_nvmf_rdma_ibv_event_ctx *ctx;
+ struct spdk_thread *thr = NULL;
+ int rc;
+
+ if (rqpair->qpair.group) {
+ thr = rqpair->qpair.group->thread;
+ } else if (rqpair->destruct_channel) {
+ thr = spdk_io_channel_get_thread(rqpair->destruct_channel);
+ }
+
+ if (!thr) {
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "rqpair %p has no thread\n", rqpair);
+ return -EINVAL;
+ }
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ return -ENOMEM;
+ }
+
+ ctx->rqpair = rqpair;
+ ctx->cb_fn = fn;
+ STAILQ_INSERT_TAIL(&rqpair->ibv_events, ctx, link);
+
+ rc = spdk_thread_send_msg(thr, nvmf_rdma_qpair_process_ibv_event, ctx);
+ if (rc) {
+ STAILQ_REMOVE(&rqpair->ibv_events, ctx, spdk_nvmf_rdma_ibv_event_ctx, link);
+ free(ctx);
+ }
+
+ return rc;
+}
+
+static void
+nvmf_process_ib_event(struct spdk_nvmf_rdma_device *device)
+{
+ int rc;
+ struct spdk_nvmf_rdma_qpair *rqpair = NULL;
+ struct ibv_async_event event;
+
+ rc = ibv_get_async_event(device->context, &event);
+
+ if (rc) {
+ SPDK_ERRLOG("Failed to get async_event (%d): %s\n",
+ errno, spdk_strerror(errno));
+ return;
+ }
+
+ switch (event.event_type) {
+ case IBV_EVENT_QP_FATAL:
+ rqpair = event.element.qp->qp_context;
+ SPDK_ERRLOG("Fatal event received for rqpair %p\n", rqpair);
+ spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0,
+ (uintptr_t)rqpair->cm_id, event.event_type);
+ rc = nvmf_rdma_send_qpair_async_event(rqpair, nvmf_rdma_handle_qp_fatal);
+ if (rc) {
+ SPDK_WARNLOG("Failed to send QP_FATAL event. rqpair %p, err %d\n", rqpair, rc);
+ nvmf_rdma_handle_qp_fatal(rqpair);
+ }
+ break;
+ case IBV_EVENT_QP_LAST_WQE_REACHED:
+ /* This event only occurs for shared receive queues. */
+ rqpair = event.element.qp->qp_context;
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Last WQE reached event received for rqpair %p\n", rqpair);
+ rc = nvmf_rdma_send_qpair_async_event(rqpair, nvmf_rdma_handle_last_wqe_reached);
+ if (rc) {
+ SPDK_WARNLOG("Failed to send LAST_WQE_REACHED event. rqpair %p, err %d\n", rqpair, rc);
+ rqpair->last_wqe_reached = true;
+ }
+ break;
+ case IBV_EVENT_SQ_DRAINED:
+ /* This event occurs frequently in both error and non-error states.
+ * Check if the qpair is in an error state before sending a message. */
+ rqpair = event.element.qp->qp_context;
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Last sq drained event received for rqpair %p\n", rqpair);
+ spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0,
+ (uintptr_t)rqpair->cm_id, event.event_type);
+ if (nvmf_rdma_update_ibv_state(rqpair) == IBV_QPS_ERR) {
+ rc = nvmf_rdma_send_qpair_async_event(rqpair, nvmf_rdma_handle_sq_drained);
+ if (rc) {
+ SPDK_WARNLOG("Failed to send SQ_DRAINED event. rqpair %p, err %d\n", rqpair, rc);
+ nvmf_rdma_handle_sq_drained(rqpair);
+ }
+ }
+ break;
+ case IBV_EVENT_QP_REQ_ERR:
+ case IBV_EVENT_QP_ACCESS_ERR:
+ case IBV_EVENT_COMM_EST:
+ case IBV_EVENT_PATH_MIG:
+ case IBV_EVENT_PATH_MIG_ERR:
+ SPDK_NOTICELOG("Async event: %s\n",
+ ibv_event_type_str(event.event_type));
+ rqpair = event.element.qp->qp_context;
+ spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0,
+ (uintptr_t)rqpair->cm_id, event.event_type);
+ nvmf_rdma_update_ibv_state(rqpair);
+ break;
+ case IBV_EVENT_CQ_ERR:
+ case IBV_EVENT_DEVICE_FATAL:
+ case IBV_EVENT_PORT_ACTIVE:
+ case IBV_EVENT_PORT_ERR:
+ case IBV_EVENT_LID_CHANGE:
+ case IBV_EVENT_PKEY_CHANGE:
+ case IBV_EVENT_SM_CHANGE:
+ case IBV_EVENT_SRQ_ERR:
+ case IBV_EVENT_SRQ_LIMIT_REACHED:
+ case IBV_EVENT_CLIENT_REREGISTER:
+ case IBV_EVENT_GID_CHANGE:
+ default:
+ SPDK_NOTICELOG("Async event: %s\n",
+ ibv_event_type_str(event.event_type));
+ spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0, 0, event.event_type);
+ break;
+ }
+ ibv_ack_async_event(&event);
+}
+
+static uint32_t
+nvmf_rdma_accept(struct spdk_nvmf_transport *transport)
+{
+ int nfds, i = 0;
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_device *device, *tmp;
+ uint32_t count;
+
+ rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+ count = nfds = poll(rtransport->poll_fds, rtransport->npoll_fds, 0);
+
+ if (nfds <= 0) {
+ return 0;
+ }
+
+ /* The first poll descriptor is RDMA CM event */
+ if (rtransport->poll_fds[i++].revents & POLLIN) {
+ nvmf_process_cm_event(transport);
+ nfds--;
+ }
+
+ if (nfds == 0) {
+ return count;
+ }
+
+ /* Second and subsequent poll descriptors are IB async events */
+ TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, tmp) {
+ if (rtransport->poll_fds[i++].revents & POLLIN) {
+ nvmf_process_ib_event(device);
+ nfds--;
+ }
+ }
+ /* check all flagged fd's have been served */
+ assert(nfds == 0);
+
+ return count;
+}
+
+static void
+nvmf_rdma_cdata_init(struct spdk_nvmf_transport *transport, struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_ctrlr_data *cdata)
+{
+ cdata->nvmf_specific.msdbd = SPDK_NVMF_MAX_SGL_ENTRIES;
+
+ /* Disable in-capsule data transfer for RDMA controller when dif_insert_or_strip is enabled
+ since in-capsule data only works with NVME drives that support SGL memory layout */
+ if (transport->opts.dif_insert_or_strip) {
+ cdata->nvmf_specific.ioccsz = sizeof(struct spdk_nvme_cmd) / 16;
+ }
+}
+
+static void
+nvmf_rdma_discover(struct spdk_nvmf_transport *transport,
+ struct spdk_nvme_transport_id *trid,
+ struct spdk_nvmf_discovery_log_page_entry *entry)
+{
+ entry->trtype = SPDK_NVMF_TRTYPE_RDMA;
+ entry->adrfam = trid->adrfam;
+ entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED;
+
+ spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' ');
+ spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' ');
+
+ entry->tsas.rdma.rdma_qptype = SPDK_NVMF_RDMA_QPTYPE_RELIABLE_CONNECTED;
+ entry->tsas.rdma.rdma_prtype = SPDK_NVMF_RDMA_PRTYPE_NONE;
+ entry->tsas.rdma.rdma_cms = SPDK_NVMF_RDMA_CMS_RDMA_CM;
+}
+
+static void
+nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group);
+
+static struct spdk_nvmf_transport_poll_group *
+nvmf_rdma_poll_group_create(struct spdk_nvmf_transport *transport)
+{
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_poll_group *rgroup;
+ struct spdk_nvmf_rdma_poller *poller;
+ struct spdk_nvmf_rdma_device *device;
+ struct ibv_srq_init_attr srq_init_attr;
+ struct spdk_nvmf_rdma_resource_opts opts;
+ int num_cqe;
+
+ rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+
+ rgroup = calloc(1, sizeof(*rgroup));
+ if (!rgroup) {
+ return NULL;
+ }
+
+ TAILQ_INIT(&rgroup->pollers);
+ STAILQ_INIT(&rgroup->retired_bufs);
+
+ pthread_mutex_lock(&rtransport->lock);
+ TAILQ_FOREACH(device, &rtransport->devices, link) {
+ poller = calloc(1, sizeof(*poller));
+ if (!poller) {
+ SPDK_ERRLOG("Unable to allocate memory for new RDMA poller\n");
+ nvmf_rdma_poll_group_destroy(&rgroup->group);
+ pthread_mutex_unlock(&rtransport->lock);
+ return NULL;
+ }
+
+ poller->device = device;
+ poller->group = rgroup;
+
+ TAILQ_INIT(&poller->qpairs);
+ STAILQ_INIT(&poller->qpairs_pending_send);
+ STAILQ_INIT(&poller->qpairs_pending_recv);
+
+ TAILQ_INSERT_TAIL(&rgroup->pollers, poller, link);
+ if (transport->opts.no_srq == false && device->num_srq < device->attr.max_srq) {
+ poller->max_srq_depth = transport->opts.max_srq_depth;
+
+ device->num_srq++;
+ memset(&srq_init_attr, 0, sizeof(struct ibv_srq_init_attr));
+ srq_init_attr.attr.max_wr = poller->max_srq_depth;
+ srq_init_attr.attr.max_sge = spdk_min(device->attr.max_sge, NVMF_DEFAULT_RX_SGE);
+ poller->srq = ibv_create_srq(device->pd, &srq_init_attr);
+ if (!poller->srq) {
+ SPDK_ERRLOG("Unable to create shared receive queue, errno %d\n", errno);
+ nvmf_rdma_poll_group_destroy(&rgroup->group);
+ pthread_mutex_unlock(&rtransport->lock);
+ return NULL;
+ }
+
+ opts.qp = poller->srq;
+ opts.pd = device->pd;
+ opts.qpair = NULL;
+ opts.shared = true;
+ opts.max_queue_depth = poller->max_srq_depth;
+ opts.in_capsule_data_size = transport->opts.in_capsule_data_size;
+
+ poller->resources = nvmf_rdma_resources_create(&opts);
+ if (!poller->resources) {
+ SPDK_ERRLOG("Unable to allocate resources for shared receive queue.\n");
+ nvmf_rdma_poll_group_destroy(&rgroup->group);
+ pthread_mutex_unlock(&rtransport->lock);
+ return NULL;
+ }
+ }
+
+ /*
+ * When using an srq, we can limit the completion queue at startup.
+ * The following formula represents the calculation:
+ * num_cqe = num_recv + num_data_wr + num_send_wr.
+ * where num_recv=num_data_wr=and num_send_wr=poller->max_srq_depth
+ */
+ if (poller->srq) {
+ num_cqe = poller->max_srq_depth * 3;
+ } else {
+ num_cqe = DEFAULT_NVMF_RDMA_CQ_SIZE;
+ }
+
+ poller->cq = ibv_create_cq(device->context, num_cqe, poller, NULL, 0);
+ if (!poller->cq) {
+ SPDK_ERRLOG("Unable to create completion queue\n");
+ nvmf_rdma_poll_group_destroy(&rgroup->group);
+ pthread_mutex_unlock(&rtransport->lock);
+ return NULL;
+ }
+ poller->num_cqe = num_cqe;
+ }
+
+ TAILQ_INSERT_TAIL(&rtransport->poll_groups, rgroup, link);
+ if (rtransport->conn_sched.next_admin_pg == NULL) {
+ rtransport->conn_sched.next_admin_pg = rgroup;
+ rtransport->conn_sched.next_io_pg = rgroup;
+ }
+
+ pthread_mutex_unlock(&rtransport->lock);
+ return &rgroup->group;
+}
+
+static struct spdk_nvmf_transport_poll_group *
+nvmf_rdma_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_poll_group **pg;
+ struct spdk_nvmf_transport_poll_group *result;
+
+ rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
+
+ pthread_mutex_lock(&rtransport->lock);
+
+ if (TAILQ_EMPTY(&rtransport->poll_groups)) {
+ pthread_mutex_unlock(&rtransport->lock);
+ return NULL;
+ }
+
+ if (qpair->qid == 0) {
+ pg = &rtransport->conn_sched.next_admin_pg;
+ } else {
+ pg = &rtransport->conn_sched.next_io_pg;
+ }
+
+ assert(*pg != NULL);
+
+ result = &(*pg)->group;
+
+ *pg = TAILQ_NEXT(*pg, link);
+ if (*pg == NULL) {
+ *pg = TAILQ_FIRST(&rtransport->poll_groups);
+ }
+
+ pthread_mutex_unlock(&rtransport->lock);
+
+ return result;
+}
+
+static void
+nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
+{
+ struct spdk_nvmf_rdma_poll_group *rgroup, *next_rgroup;
+ struct spdk_nvmf_rdma_poller *poller, *tmp;
+ struct spdk_nvmf_rdma_qpair *qpair, *tmp_qpair;
+ struct spdk_nvmf_transport_pg_cache_buf *buf, *tmp_buf;
+ struct spdk_nvmf_rdma_transport *rtransport;
+
+ rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group);
+ if (!rgroup) {
+ return;
+ }
+
+ /* free all retired buffers back to the transport so we don't short the mempool. */
+ STAILQ_FOREACH_SAFE(buf, &rgroup->retired_bufs, link, tmp_buf) {
+ STAILQ_REMOVE(&rgroup->retired_bufs, buf, spdk_nvmf_transport_pg_cache_buf, link);
+ assert(group->transport != NULL);
+ spdk_mempool_put(group->transport->data_buf_pool, buf);
+ }
+
+ TAILQ_FOREACH_SAFE(poller, &rgroup->pollers, link, tmp) {
+ TAILQ_REMOVE(&rgroup->pollers, poller, link);
+
+ TAILQ_FOREACH_SAFE(qpair, &poller->qpairs, link, tmp_qpair) {
+ nvmf_rdma_qpair_destroy(qpair);
+ }
+
+ if (poller->srq) {
+ if (poller->resources) {
+ nvmf_rdma_resources_destroy(poller->resources);
+ }
+ ibv_destroy_srq(poller->srq);
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Destroyed RDMA shared queue %p\n", poller->srq);
+ }
+
+ if (poller->cq) {
+ ibv_destroy_cq(poller->cq);
+ }
+
+ free(poller);
+ }
+
+ if (rgroup->group.transport == NULL) {
+ /* Transport can be NULL when nvmf_rdma_poll_group_create()
+ * calls this function directly in a failure path. */
+ free(rgroup);
+ return;
+ }
+
+ rtransport = SPDK_CONTAINEROF(rgroup->group.transport, struct spdk_nvmf_rdma_transport, transport);
+
+ pthread_mutex_lock(&rtransport->lock);
+ next_rgroup = TAILQ_NEXT(rgroup, link);
+ TAILQ_REMOVE(&rtransport->poll_groups, rgroup, link);
+ if (next_rgroup == NULL) {
+ next_rgroup = TAILQ_FIRST(&rtransport->poll_groups);
+ }
+ if (rtransport->conn_sched.next_admin_pg == rgroup) {
+ rtransport->conn_sched.next_admin_pg = next_rgroup;
+ }
+ if (rtransport->conn_sched.next_io_pg == rgroup) {
+ rtransport->conn_sched.next_io_pg = next_rgroup;
+ }
+ pthread_mutex_unlock(&rtransport->lock);
+
+ free(rgroup);
+}
+
+static void
+nvmf_rdma_qpair_reject_connection(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+ if (rqpair->cm_id != NULL) {
+ nvmf_rdma_event_reject(rqpair->cm_id, SPDK_NVMF_RDMA_ERROR_NO_RESOURCES);
+ }
+ nvmf_rdma_qpair_destroy(rqpair);
+}
+
+static int
+nvmf_rdma_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_rdma_poll_group *rgroup;
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ struct spdk_nvmf_rdma_device *device;
+ struct spdk_nvmf_rdma_poller *poller;
+ int rc;
+
+ rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group);
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ device = rqpair->device;
+
+ TAILQ_FOREACH(poller, &rgroup->pollers, link) {
+ if (poller->device == device) {
+ break;
+ }
+ }
+
+ if (!poller) {
+ SPDK_ERRLOG("No poller found for device.\n");
+ return -1;
+ }
+
+ TAILQ_INSERT_TAIL(&poller->qpairs, rqpair, link);
+ rqpair->poller = poller;
+ rqpair->srq = rqpair->poller->srq;
+
+ rc = nvmf_rdma_qpair_initialize(qpair);
+ if (rc < 0) {
+ SPDK_ERRLOG("Failed to initialize nvmf_rdma_qpair with qpair=%p\n", qpair);
+ return -1;
+ }
+
+ rc = nvmf_rdma_event_accept(rqpair->cm_id, rqpair);
+ if (rc) {
+ /* Try to reject, but we probably can't */
+ nvmf_rdma_qpair_reject_connection(rqpair);
+ return -1;
+ }
+
+ nvmf_rdma_update_ibv_state(rqpair);
+
+ return 0;
+}
+
+static int
+nvmf_rdma_poll_group_remove(struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair;
+
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+ assert(group->transport->tgt != NULL);
+
+ rqpair->destruct_channel = spdk_get_io_channel(group->transport->tgt);
+
+ if (!rqpair->destruct_channel) {
+ SPDK_WARNLOG("failed to get io_channel, qpair %p\n", qpair);
+ return 0;
+ }
+
+ /* Sanity check that we get io_channel on the correct thread */
+ if (qpair->group) {
+ assert(qpair->group->thread == spdk_io_channel_get_thread(rqpair->destruct_channel));
+ }
+
+ return 0;
+}
+
+static int
+nvmf_rdma_request_free(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_rdma_request *rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req);
+ struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(req->qpair->transport,
+ struct spdk_nvmf_rdma_transport, transport);
+ struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair,
+ struct spdk_nvmf_rdma_qpair, qpair);
+
+ /*
+ * AER requests are freed when a qpair is destroyed. The recv corresponding to that request
+ * needs to be returned to the shared receive queue or the poll group will eventually be
+ * starved of RECV structures.
+ */
+ if (rqpair->srq && rdma_req->recv) {
+ int rc;
+ struct ibv_recv_wr *bad_recv_wr;
+
+ rc = ibv_post_srq_recv(rqpair->srq, &rdma_req->recv->wr, &bad_recv_wr);
+ if (rc) {
+ SPDK_ERRLOG("Unable to re-post rx descriptor\n");
+ }
+ }
+
+ _nvmf_rdma_request_free(rdma_req, rtransport);
+ return 0;
+}
+
+static int
+nvmf_rdma_request_complete(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(req->qpair->transport,
+ struct spdk_nvmf_rdma_transport, transport);
+ struct spdk_nvmf_rdma_request *rdma_req = SPDK_CONTAINEROF(req,
+ struct spdk_nvmf_rdma_request, req);
+ struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair,
+ struct spdk_nvmf_rdma_qpair, qpair);
+
+ if (rqpair->ibv_state != IBV_QPS_ERR) {
+ /* The connection is alive, so process the request as normal */
+ rdma_req->state = RDMA_REQUEST_STATE_EXECUTED;
+ } else {
+ /* The connection is dead. Move the request directly to the completed state. */
+ rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
+ }
+
+ nvmf_rdma_request_process(rtransport, rdma_req);
+
+ return 0;
+}
+
+static int
+nvmf_rdma_destroy_defunct_qpair(void *ctx)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair = ctx;
+ struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport,
+ struct spdk_nvmf_rdma_transport, transport);
+
+ SPDK_INFOLOG(SPDK_LOG_RDMA, "QP#%d hasn't been drained as expected, manually destroy it\n",
+ rqpair->qpair.qid);
+
+ nvmf_rdma_qpair_process_pending(rtransport, rqpair, true);
+ nvmf_rdma_qpair_destroy(rqpair);
+
+ return SPDK_POLLER_BUSY;
+}
+
+static void
+nvmf_rdma_close_qpair(struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ if (rqpair->disconnect_flags & RDMA_QP_DISCONNECTING) {
+ return;
+ }
+
+ rqpair->disconnect_flags |= RDMA_QP_DISCONNECTING;
+
+ /* This happens only when the qpair is disconnected before
+ * it is added to the poll group. Since there is no poll group,
+ * the RDMA qp has not been initialized yet and the RDMA CM
+ * event has not yet been acknowledged, so we need to reject it.
+ */
+ if (rqpair->qpair.state == SPDK_NVMF_QPAIR_UNINITIALIZED) {
+ nvmf_rdma_qpair_reject_connection(rqpair);
+ return;
+ }
+
+ if (rqpair->rdma_qp) {
+ spdk_rdma_qp_disconnect(rqpair->rdma_qp);
+ }
+
+ rqpair->destruct_poller = SPDK_POLLER_REGISTER(nvmf_rdma_destroy_defunct_qpair, (void *)rqpair,
+ NVMF_RDMA_QPAIR_DESTROY_TIMEOUT_US);
+}
+
+static struct spdk_nvmf_rdma_qpair *
+get_rdma_qpair_from_wc(struct spdk_nvmf_rdma_poller *rpoller, struct ibv_wc *wc)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ /* @todo: improve QP search */
+ TAILQ_FOREACH(rqpair, &rpoller->qpairs, link) {
+ if (wc->qp_num == rqpair->rdma_qp->qp->qp_num) {
+ return rqpair;
+ }
+ }
+ SPDK_ERRLOG("Didn't find QP with qp_num %u\n", wc->qp_num);
+ return NULL;
+}
+
+#ifdef DEBUG
+static int
+nvmf_rdma_req_is_completing(struct spdk_nvmf_rdma_request *rdma_req)
+{
+ return rdma_req->state == RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST ||
+ rdma_req->state == RDMA_REQUEST_STATE_COMPLETING;
+}
+#endif
+
+static void
+_poller_reset_failed_recvs(struct spdk_nvmf_rdma_poller *rpoller, struct ibv_recv_wr *bad_recv_wr,
+ int rc)
+{
+ struct spdk_nvmf_rdma_recv *rdma_recv;
+ struct spdk_nvmf_rdma_wr *bad_rdma_wr;
+
+ SPDK_ERRLOG("Failed to post a recv for the poller %p with errno %d\n", rpoller, -rc);
+ while (bad_recv_wr != NULL) {
+ bad_rdma_wr = (struct spdk_nvmf_rdma_wr *)bad_recv_wr->wr_id;
+ rdma_recv = SPDK_CONTAINEROF(bad_rdma_wr, struct spdk_nvmf_rdma_recv, rdma_wr);
+
+ rdma_recv->qpair->current_recv_depth++;
+ bad_recv_wr = bad_recv_wr->next;
+ SPDK_ERRLOG("Failed to post a recv for the qpair %p with errno %d\n", rdma_recv->qpair, -rc);
+ nvmf_rdma_start_disconnect(rdma_recv->qpair);
+ }
+}
+
+static void
+_qp_reset_failed_recvs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_recv_wr *bad_recv_wr, int rc)
+{
+ SPDK_ERRLOG("Failed to post a recv for the qpair %p with errno %d\n", rqpair, -rc);
+ while (bad_recv_wr != NULL) {
+ bad_recv_wr = bad_recv_wr->next;
+ rqpair->current_recv_depth++;
+ }
+ nvmf_rdma_start_disconnect(rqpair);
+}
+
+static void
+_poller_submit_recvs(struct spdk_nvmf_rdma_transport *rtransport,
+ struct spdk_nvmf_rdma_poller *rpoller)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ struct ibv_recv_wr *bad_recv_wr;
+ int rc;
+
+ if (rpoller->srq) {
+ if (rpoller->resources->recvs_to_post.first != NULL) {
+ rc = ibv_post_srq_recv(rpoller->srq, rpoller->resources->recvs_to_post.first, &bad_recv_wr);
+ if (rc) {
+ _poller_reset_failed_recvs(rpoller, bad_recv_wr, rc);
+ }
+ rpoller->resources->recvs_to_post.first = NULL;
+ rpoller->resources->recvs_to_post.last = NULL;
+ }
+ } else {
+ while (!STAILQ_EMPTY(&rpoller->qpairs_pending_recv)) {
+ rqpair = STAILQ_FIRST(&rpoller->qpairs_pending_recv);
+ assert(rqpair->resources->recvs_to_post.first != NULL);
+ rc = ibv_post_recv(rqpair->rdma_qp->qp, rqpair->resources->recvs_to_post.first, &bad_recv_wr);
+ if (rc) {
+ _qp_reset_failed_recvs(rqpair, bad_recv_wr, rc);
+ }
+ rqpair->resources->recvs_to_post.first = NULL;
+ rqpair->resources->recvs_to_post.last = NULL;
+ STAILQ_REMOVE_HEAD(&rpoller->qpairs_pending_recv, recv_link);
+ }
+ }
+}
+
+static void
+_qp_reset_failed_sends(struct spdk_nvmf_rdma_transport *rtransport,
+ struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_send_wr *bad_wr, int rc)
+{
+ struct spdk_nvmf_rdma_wr *bad_rdma_wr;
+ struct spdk_nvmf_rdma_request *prev_rdma_req = NULL, *cur_rdma_req = NULL;
+
+ SPDK_ERRLOG("Failed to post a send for the qpair %p with errno %d\n", rqpair, -rc);
+ for (; bad_wr != NULL; bad_wr = bad_wr->next) {
+ bad_rdma_wr = (struct spdk_nvmf_rdma_wr *)bad_wr->wr_id;
+ assert(rqpair->current_send_depth > 0);
+ rqpair->current_send_depth--;
+ switch (bad_rdma_wr->type) {
+ case RDMA_WR_TYPE_DATA:
+ cur_rdma_req = SPDK_CONTAINEROF(bad_rdma_wr, struct spdk_nvmf_rdma_request, data.rdma_wr);
+ if (bad_wr->opcode == IBV_WR_RDMA_READ) {
+ assert(rqpair->current_read_depth > 0);
+ rqpair->current_read_depth--;
+ }
+ break;
+ case RDMA_WR_TYPE_SEND:
+ cur_rdma_req = SPDK_CONTAINEROF(bad_rdma_wr, struct spdk_nvmf_rdma_request, rsp.rdma_wr);
+ break;
+ default:
+ SPDK_ERRLOG("Found a RECV in the list of pending SEND requests for qpair %p\n", rqpair);
+ prev_rdma_req = cur_rdma_req;
+ continue;
+ }
+
+ if (prev_rdma_req == cur_rdma_req) {
+ /* this request was handled by an earlier wr. i.e. we were performing an nvme read. */
+ /* We only have to check against prev_wr since each requests wrs are contiguous in this list. */
+ continue;
+ }
+
+ switch (cur_rdma_req->state) {
+ case RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
+ cur_rdma_req->req.rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ cur_rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE;
+ break;
+ case RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST:
+ case RDMA_REQUEST_STATE_COMPLETING:
+ cur_rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
+ break;
+ default:
+ SPDK_ERRLOG("Found a request in a bad state %d when draining pending SEND requests for qpair %p\n",
+ cur_rdma_req->state, rqpair);
+ continue;
+ }
+
+ nvmf_rdma_request_process(rtransport, cur_rdma_req);
+ prev_rdma_req = cur_rdma_req;
+ }
+
+ if (rqpair->qpair.state == SPDK_NVMF_QPAIR_ACTIVE) {
+ /* Disconnect the connection. */
+ nvmf_rdma_start_disconnect(rqpair);
+ }
+
+}
+
+static void
+_poller_submit_sends(struct spdk_nvmf_rdma_transport *rtransport,
+ struct spdk_nvmf_rdma_poller *rpoller)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ struct ibv_send_wr *bad_wr = NULL;
+ int rc;
+
+ while (!STAILQ_EMPTY(&rpoller->qpairs_pending_send)) {
+ rqpair = STAILQ_FIRST(&rpoller->qpairs_pending_send);
+ rc = spdk_rdma_qp_flush_send_wrs(rqpair->rdma_qp, &bad_wr);
+
+ /* bad wr always points to the first wr that failed. */
+ if (rc) {
+ _qp_reset_failed_sends(rtransport, rqpair, bad_wr, rc);
+ }
+ STAILQ_REMOVE_HEAD(&rpoller->qpairs_pending_send, send_link);
+ }
+}
+
+static int
+nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,
+ struct spdk_nvmf_rdma_poller *rpoller)
+{
+ struct ibv_wc wc[32];
+ struct spdk_nvmf_rdma_wr *rdma_wr;
+ struct spdk_nvmf_rdma_request *rdma_req;
+ struct spdk_nvmf_rdma_recv *rdma_recv;
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ int reaped, i;
+ int count = 0;
+ bool error = false;
+ uint64_t poll_tsc = spdk_get_ticks();
+
+ /* Poll for completing operations. */
+ reaped = ibv_poll_cq(rpoller->cq, 32, wc);
+ if (reaped < 0) {
+ SPDK_ERRLOG("Error polling CQ! (%d): %s\n",
+ errno, spdk_strerror(errno));
+ return -1;
+ }
+
+ rpoller->stat.polls++;
+ rpoller->stat.completions += reaped;
+
+ for (i = 0; i < reaped; i++) {
+
+ rdma_wr = (struct spdk_nvmf_rdma_wr *)wc[i].wr_id;
+
+ switch (rdma_wr->type) {
+ case RDMA_WR_TYPE_SEND:
+ rdma_req = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_request, rsp.rdma_wr);
+ rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ if (!wc[i].status) {
+ count++;
+ assert(wc[i].opcode == IBV_WC_SEND);
+ assert(nvmf_rdma_req_is_completing(rdma_req));
+ }
+
+ rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
+ /* RDMA_WRITE operation completed. +1 since it was chained with rsp WR */
+ rqpair->current_send_depth -= rdma_req->num_outstanding_data_wr + 1;
+ rdma_req->num_outstanding_data_wr = 0;
+
+ nvmf_rdma_request_process(rtransport, rdma_req);
+ break;
+ case RDMA_WR_TYPE_RECV:
+ /* rdma_recv->qpair will be invalid if using an SRQ. In that case we have to get the qpair from the wc. */
+ rdma_recv = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_recv, rdma_wr);
+ if (rpoller->srq != NULL) {
+ rdma_recv->qpair = get_rdma_qpair_from_wc(rpoller, &wc[i]);
+ /* It is possible that there are still some completions for destroyed QP
+ * associated with SRQ. We just ignore these late completions and re-post
+ * receive WRs back to SRQ.
+ */
+ if (spdk_unlikely(NULL == rdma_recv->qpair)) {
+ struct ibv_recv_wr *bad_wr;
+ int rc;
+
+ rdma_recv->wr.next = NULL;
+ rc = ibv_post_srq_recv(rpoller->srq,
+ &rdma_recv->wr,
+ &bad_wr);
+ if (rc) {
+ SPDK_ERRLOG("Failed to re-post recv WR to SRQ, err %d\n", rc);
+ }
+ continue;
+ }
+ }
+ rqpair = rdma_recv->qpair;
+
+ assert(rqpair != NULL);
+ if (!wc[i].status) {
+ assert(wc[i].opcode == IBV_WC_RECV);
+ if (rqpair->current_recv_depth >= rqpair->max_queue_depth) {
+ nvmf_rdma_start_disconnect(rqpair);
+ break;
+ }
+ }
+
+ rdma_recv->wr.next = NULL;
+ rqpair->current_recv_depth++;
+ rdma_recv->receive_tsc = poll_tsc;
+ rpoller->stat.requests++;
+ STAILQ_INSERT_TAIL(&rqpair->resources->incoming_queue, rdma_recv, link);
+ break;
+ case RDMA_WR_TYPE_DATA:
+ rdma_req = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_request, data.rdma_wr);
+ rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ assert(rdma_req->num_outstanding_data_wr > 0);
+
+ rqpair->current_send_depth--;
+ rdma_req->num_outstanding_data_wr--;
+ if (!wc[i].status) {
+ assert(wc[i].opcode == IBV_WC_RDMA_READ);
+ rqpair->current_read_depth--;
+ /* wait for all outstanding reads associated with the same rdma_req to complete before proceeding. */
+ if (rdma_req->num_outstanding_data_wr == 0) {
+ rdma_req->state = RDMA_REQUEST_STATE_READY_TO_EXECUTE;
+ nvmf_rdma_request_process(rtransport, rdma_req);
+ }
+ } else {
+ /* If the data transfer fails still force the queue into the error state,
+ * if we were performing an RDMA_READ, we need to force the request into a
+ * completed state since it wasn't linked to a send. However, in the RDMA_WRITE
+ * case, we should wait for the SEND to complete. */
+ if (rdma_req->data.wr.opcode == IBV_WR_RDMA_READ) {
+ rqpair->current_read_depth--;
+ if (rdma_req->num_outstanding_data_wr == 0) {
+ rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
+ }
+ }
+ }
+ break;
+ default:
+ SPDK_ERRLOG("Received an unknown opcode on the CQ: %d\n", wc[i].opcode);
+ continue;
+ }
+
+ /* Handle error conditions */
+ if (wc[i].status) {
+ if ((rdma_wr->type == RDMA_WR_TYPE_RECV && !rpoller->srq)) {
+ /* When we don't use SRQ and close a qpair, we will receive completions with error
+ * status for all posted ibv_recv_wrs. This is expected and we don't want to log
+ * an error in that case. */
+ SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Error on CQ %p, request 0x%lu, type %d, status: (%d): %s\n",
+ rpoller->cq, wc[i].wr_id, rdma_wr->type, wc[i].status, ibv_wc_status_str(wc[i].status));
+ } else {
+ SPDK_ERRLOG("Error on CQ %p, request 0x%lu, type %d, status: (%d): %s\n",
+ rpoller->cq, wc[i].wr_id, rdma_wr->type, wc[i].status, ibv_wc_status_str(wc[i].status));
+ }
+
+ error = true;
+
+ if (rqpair->qpair.state == SPDK_NVMF_QPAIR_ACTIVE) {
+ /* Disconnect the connection. */
+ nvmf_rdma_start_disconnect(rqpair);
+ } else {
+ nvmf_rdma_destroy_drained_qpair(rqpair);
+ }
+ continue;
+ }
+
+ nvmf_rdma_qpair_process_pending(rtransport, rqpair, false);
+
+ if (rqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) {
+ nvmf_rdma_destroy_drained_qpair(rqpair);
+ }
+ }
+
+ if (error == true) {
+ return -1;
+ }
+
+ /* submit outstanding work requests. */
+ _poller_submit_recvs(rtransport, rpoller);
+ _poller_submit_sends(rtransport, rpoller);
+
+ return count;
+}
+
+static int
+nvmf_rdma_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
+{
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_rdma_poll_group *rgroup;
+ struct spdk_nvmf_rdma_poller *rpoller;
+ int count, rc;
+
+ rtransport = SPDK_CONTAINEROF(group->transport, struct spdk_nvmf_rdma_transport, transport);
+ rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group);
+
+ count = 0;
+ TAILQ_FOREACH(rpoller, &rgroup->pollers, link) {
+ rc = nvmf_rdma_poller_poll(rtransport, rpoller);
+ if (rc < 0) {
+ return rc;
+ }
+ count += rc;
+ }
+
+ return count;
+}
+
+static int
+nvmf_rdma_trid_from_cm_id(struct rdma_cm_id *id,
+ struct spdk_nvme_transport_id *trid,
+ bool peer)
+{
+ struct sockaddr *saddr;
+ uint16_t port;
+
+ spdk_nvme_trid_populate_transport(trid, SPDK_NVME_TRANSPORT_RDMA);
+
+ if (peer) {
+ saddr = rdma_get_peer_addr(id);
+ } else {
+ saddr = rdma_get_local_addr(id);
+ }
+ switch (saddr->sa_family) {
+ case AF_INET: {
+ struct sockaddr_in *saddr_in = (struct sockaddr_in *)saddr;
+
+ trid->adrfam = SPDK_NVMF_ADRFAM_IPV4;
+ inet_ntop(AF_INET, &saddr_in->sin_addr,
+ trid->traddr, sizeof(trid->traddr));
+ if (peer) {
+ port = ntohs(rdma_get_dst_port(id));
+ } else {
+ port = ntohs(rdma_get_src_port(id));
+ }
+ snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%u", port);
+ break;
+ }
+ case AF_INET6: {
+ struct sockaddr_in6 *saddr_in = (struct sockaddr_in6 *)saddr;
+ trid->adrfam = SPDK_NVMF_ADRFAM_IPV6;
+ inet_ntop(AF_INET6, &saddr_in->sin6_addr,
+ trid->traddr, sizeof(trid->traddr));
+ if (peer) {
+ port = ntohs(rdma_get_dst_port(id));
+ } else {
+ port = ntohs(rdma_get_src_port(id));
+ }
+ snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%u", port);
+ break;
+ }
+ default:
+ return -1;
+
+ }
+
+ return 0;
+}
+
+static int
+nvmf_rdma_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair;
+
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ return nvmf_rdma_trid_from_cm_id(rqpair->cm_id, trid, true);
+}
+
+static int
+nvmf_rdma_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair;
+
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ return nvmf_rdma_trid_from_cm_id(rqpair->cm_id, trid, false);
+}
+
+static int
+nvmf_rdma_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair;
+
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+ return nvmf_rdma_trid_from_cm_id(rqpair->listen_id, trid, false);
+}
+
+void
+spdk_nvmf_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks)
+{
+ g_nvmf_hooks = *hooks;
+}
+
+static void
+nvmf_rdma_request_set_abort_status(struct spdk_nvmf_request *req,
+ struct spdk_nvmf_rdma_request *rdma_req_to_abort)
+{
+ rdma_req_to_abort->req.rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ rdma_req_to_abort->req.rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
+
+ rdma_req_to_abort->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE;
+
+ req->rsp->nvme_cpl.cdw0 &= ~1U; /* Command was successfully aborted. */
+}
+
+static int
+_nvmf_rdma_qpair_abort_request(void *ctx)
+{
+ struct spdk_nvmf_request *req = ctx;
+ struct spdk_nvmf_rdma_request *rdma_req_to_abort = SPDK_CONTAINEROF(
+ req->req_to_abort, struct spdk_nvmf_rdma_request, req);
+ struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(req->req_to_abort->qpair,
+ struct spdk_nvmf_rdma_qpair, qpair);
+ int rc;
+
+ spdk_poller_unregister(&req->poller);
+
+ switch (rdma_req_to_abort->state) {
+ case RDMA_REQUEST_STATE_EXECUTING:
+ rc = nvmf_ctrlr_abort_request(req);
+ if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS) {
+ return SPDK_POLLER_BUSY;
+ }
+ break;
+
+ case RDMA_REQUEST_STATE_NEED_BUFFER:
+ STAILQ_REMOVE(&rqpair->poller->group->group.pending_buf_queue,
+ &rdma_req_to_abort->req, spdk_nvmf_request, buf_link);
+
+ nvmf_rdma_request_set_abort_status(req, rdma_req_to_abort);
+ break;
+
+ case RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING:
+ STAILQ_REMOVE(&rqpair->pending_rdma_read_queue, rdma_req_to_abort,
+ spdk_nvmf_rdma_request, state_link);
+
+ nvmf_rdma_request_set_abort_status(req, rdma_req_to_abort);
+ break;
+
+ case RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING:
+ STAILQ_REMOVE(&rqpair->pending_rdma_write_queue, rdma_req_to_abort,
+ spdk_nvmf_rdma_request, state_link);
+
+ nvmf_rdma_request_set_abort_status(req, rdma_req_to_abort);
+ break;
+
+ case RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
+ if (spdk_get_ticks() < req->timeout_tsc) {
+ req->poller = SPDK_POLLER_REGISTER(_nvmf_rdma_qpair_abort_request, req, 0);
+ return SPDK_POLLER_BUSY;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ spdk_nvmf_request_complete(req);
+ return SPDK_POLLER_BUSY;
+}
+
+static void
+nvmf_rdma_qpair_abort_request(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_rdma_qpair *rqpair;
+ struct spdk_nvmf_rdma_transport *rtransport;
+ struct spdk_nvmf_transport *transport;
+ uint16_t cid;
+ uint32_t i;
+ struct spdk_nvmf_rdma_request *rdma_req_to_abort = NULL;
+
+ rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+ rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
+ transport = &rtransport->transport;
+
+ cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid;
+
+ for (i = 0; i < rqpair->max_queue_depth; i++) {
+ rdma_req_to_abort = &rqpair->resources->reqs[i];
+
+ if (rdma_req_to_abort->state != RDMA_REQUEST_STATE_FREE &&
+ rdma_req_to_abort->req.cmd->nvme_cmd.cid == cid) {
+ break;
+ }
+ }
+
+ if (rdma_req_to_abort == NULL) {
+ spdk_nvmf_request_complete(req);
+ return;
+ }
+
+ req->req_to_abort = &rdma_req_to_abort->req;
+ req->timeout_tsc = spdk_get_ticks() +
+ transport->opts.abort_timeout_sec * spdk_get_ticks_hz();
+ req->poller = NULL;
+
+ _nvmf_rdma_qpair_abort_request(req);
+}
+
+static int
+nvmf_rdma_poll_group_get_stat(struct spdk_nvmf_tgt *tgt,
+ struct spdk_nvmf_transport_poll_group_stat **stat)
+{
+ struct spdk_io_channel *ch;
+ struct spdk_nvmf_poll_group *group;
+ struct spdk_nvmf_transport_poll_group *tgroup;
+ struct spdk_nvmf_rdma_poll_group *rgroup;
+ struct spdk_nvmf_rdma_poller *rpoller;
+ struct spdk_nvmf_rdma_device_stat *device_stat;
+ uint64_t num_devices = 0;
+
+ if (tgt == NULL || stat == NULL) {
+ return -EINVAL;
+ }
+
+ ch = spdk_get_io_channel(tgt);
+ group = spdk_io_channel_get_ctx(ch);;
+ spdk_put_io_channel(ch);
+ TAILQ_FOREACH(tgroup, &group->tgroups, link) {
+ if (SPDK_NVME_TRANSPORT_RDMA == tgroup->transport->ops->type) {
+ *stat = calloc(1, sizeof(struct spdk_nvmf_transport_poll_group_stat));
+ if (!*stat) {
+ SPDK_ERRLOG("Failed to allocate memory for NVMf RDMA statistics\n");
+ return -ENOMEM;
+ }
+ (*stat)->trtype = SPDK_NVME_TRANSPORT_RDMA;
+
+ rgroup = SPDK_CONTAINEROF(tgroup, struct spdk_nvmf_rdma_poll_group, group);
+ /* Count devices to allocate enough memory */
+ TAILQ_FOREACH(rpoller, &rgroup->pollers, link) {
+ ++num_devices;
+ }
+ (*stat)->rdma.devices = calloc(num_devices, sizeof(struct spdk_nvmf_rdma_device_stat));
+ if (!(*stat)->rdma.devices) {
+ SPDK_ERRLOG("Failed to allocate NVMf RDMA devices statistics\n");
+ free(*stat);
+ return -ENOMEM;
+ }
+
+ (*stat)->rdma.pending_data_buffer = rgroup->stat.pending_data_buffer;
+ (*stat)->rdma.num_devices = num_devices;
+ num_devices = 0;
+ TAILQ_FOREACH(rpoller, &rgroup->pollers, link) {
+ device_stat = &(*stat)->rdma.devices[num_devices++];
+ device_stat->name = ibv_get_device_name(rpoller->device->context->device);
+ device_stat->polls = rpoller->stat.polls;
+ device_stat->completions = rpoller->stat.completions;
+ device_stat->requests = rpoller->stat.requests;
+ device_stat->request_latency = rpoller->stat.request_latency;
+ device_stat->pending_free_request = rpoller->stat.pending_free_request;
+ device_stat->pending_rdma_read = rpoller->stat.pending_rdma_read;
+ device_stat->pending_rdma_write = rpoller->stat.pending_rdma_write;
+ }
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+static void
+nvmf_rdma_poll_group_free_stat(struct spdk_nvmf_transport_poll_group_stat *stat)
+{
+ if (stat) {
+ free(stat->rdma.devices);
+ }
+ free(stat);
+}
+
+const struct spdk_nvmf_transport_ops spdk_nvmf_transport_rdma = {
+ .name = "RDMA",
+ .type = SPDK_NVME_TRANSPORT_RDMA,
+ .opts_init = nvmf_rdma_opts_init,
+ .create = nvmf_rdma_create,
+ .destroy = nvmf_rdma_destroy,
+
+ .listen = nvmf_rdma_listen,
+ .stop_listen = nvmf_rdma_stop_listen,
+ .accept = nvmf_rdma_accept,
+ .cdata_init = nvmf_rdma_cdata_init,
+
+ .listener_discover = nvmf_rdma_discover,
+
+ .poll_group_create = nvmf_rdma_poll_group_create,
+ .get_optimal_poll_group = nvmf_rdma_get_optimal_poll_group,
+ .poll_group_destroy = nvmf_rdma_poll_group_destroy,
+ .poll_group_add = nvmf_rdma_poll_group_add,
+ .poll_group_remove = nvmf_rdma_poll_group_remove,
+ .poll_group_poll = nvmf_rdma_poll_group_poll,
+
+ .req_free = nvmf_rdma_request_free,
+ .req_complete = nvmf_rdma_request_complete,
+
+ .qpair_fini = nvmf_rdma_close_qpair,
+ .qpair_get_peer_trid = nvmf_rdma_qpair_get_peer_trid,
+ .qpair_get_local_trid = nvmf_rdma_qpair_get_local_trid,
+ .qpair_get_listen_trid = nvmf_rdma_qpair_get_listen_trid,
+ .qpair_abort_request = nvmf_rdma_qpair_abort_request,
+
+ .poll_group_get_stat = nvmf_rdma_poll_group_get_stat,
+ .poll_group_free_stat = nvmf_rdma_poll_group_free_stat,
+};
+
+SPDK_NVMF_TRANSPORT_REGISTER(rdma, &spdk_nvmf_transport_rdma);
+SPDK_LOG_REGISTER_COMPONENT("rdma", SPDK_LOG_RDMA)
diff --git a/src/spdk/lib/nvmf/spdk_nvmf.map b/src/spdk/lib/nvmf/spdk_nvmf.map
new file mode 100644
index 000000000..994e7437b
--- /dev/null
+++ b/src/spdk/lib/nvmf/spdk_nvmf.map
@@ -0,0 +1,118 @@
+{
+ global:
+
+ # public functions in nvmf.h
+ spdk_nvmf_tgt_create;
+ spdk_nvmf_tgt_destroy;
+ spdk_nvmf_tgt_get_name;
+ spdk_nvmf_get_tgt;
+ spdk_nvmf_get_first_tgt;
+ spdk_nvmf_get_next_tgt;
+ spdk_nvmf_tgt_write_config_json;
+ spdk_nvmf_tgt_listen;
+ spdk_nvmf_tgt_stop_listen;
+ spdk_nvmf_tgt_accept;
+ spdk_nvmf_poll_group_create;
+ spdk_nvmf_get_optimal_poll_group;
+ spdk_nvmf_poll_group_destroy;
+ spdk_nvmf_poll_group_add;
+ spdk_nvmf_poll_group_get_stat;
+ spdk_nvmf_qpair_disconnect;
+ spdk_nvmf_qpair_get_peer_trid;
+ spdk_nvmf_qpair_get_local_trid;
+ spdk_nvmf_qpair_get_listen_trid;
+ spdk_nvmf_subsystem_create;
+ spdk_nvmf_subsystem_destroy;
+ spdk_nvmf_subsystem_start;
+ spdk_nvmf_subsystem_stop;
+ spdk_nvmf_subsystem_pause;
+ spdk_nvmf_subsystem_resume;
+ spdk_nvmf_tgt_find_subsystem;
+ spdk_nvmf_subsystem_get_first;
+ spdk_nvmf_subsystem_get_next;
+ spdk_nvmf_subsystem_add_host;
+ spdk_nvmf_subsystem_remove_host;
+ spdk_nvmf_subsystem_set_allow_any_host;
+ spdk_nvmf_subsystem_get_allow_any_host;
+ spdk_nvmf_subsystem_host_allowed;
+ spdk_nvmf_subsystem_get_first_host;
+ spdk_nvmf_subsystem_get_next_host;
+ spdk_nvmf_host_get_nqn;
+ spdk_nvmf_subsystem_add_listener;
+ spdk_nvmf_subsystem_remove_listener;
+ spdk_nvmf_subsystem_listener_allowed;
+ spdk_nvmf_subsystem_get_first_listener;
+ spdk_nvmf_subsystem_get_next_listener;
+ spdk_nvmf_subsystem_listener_get_trid;
+ spdk_nvmf_subsystem_allow_any_listener;
+ spdk_nvmf_subsytem_any_listener_allowed;
+ spdk_nvmf_ns_opts_get_defaults;
+ spdk_nvmf_subsystem_add_ns;
+ spdk_nvmf_subsystem_remove_ns;
+ spdk_nvmf_subsystem_get_first_ns;
+ spdk_nvmf_subsystem_get_next_ns;
+ spdk_nvmf_subsystem_get_ns;
+ spdk_nvmf_subsystem_get_max_namespaces;
+ spdk_nvmf_ns_get_id;
+ spdk_nvmf_ns_get_bdev;
+ spdk_nvmf_ns_get_opts;
+ spdk_nvmf_subsystem_get_sn;
+ spdk_nvmf_subsystem_set_sn;
+ spdk_nvmf_subsystem_get_mn;
+ spdk_nvmf_subsystem_set_mn;
+ spdk_nvmf_subsystem_get_nqn;
+ spdk_nvmf_subsystem_get_type;
+ spdk_nvmf_subsystem_get_max_nsid;
+ spdk_nvmf_transport_opts_init;
+ spdk_nvmf_transport_create;
+ spdk_nvmf_transport_destroy;
+ spdk_nvmf_tgt_get_transport;
+ spdk_nvmf_transport_get_first;
+ spdk_nvmf_transport_get_next;
+ spdk_nvmf_get_transport_opts;
+ spdk_nvmf_get_transport_type;
+ spdk_nvmf_get_transport_name;
+ spdk_nvmf_tgt_add_transport;
+ spdk_nvmf_transport_listen;
+ spdk_nvmf_transport_stop_listen;
+ spdk_nvmf_transport_poll_group_get_stat;
+ spdk_nvmf_transport_poll_group_free_stat;
+ spdk_nvmf_rdma_init_hooks;
+
+ # public functions in nvmf_cmd.h
+ spdk_nvmf_ctrlr_identify_ctrlr;
+ spdk_nvmf_ctrlr_identify_ns;
+ spdk_nvmf_set_custom_admin_cmd_hdlr;
+ spdk_nvmf_set_passthru_admin_cmd;
+ spdk_nvmf_bdev_ctrlr_nvme_passthru_admin;
+ spdk_nvmf_request_get_bdev;
+ spdk_nvmf_request_get_ctrlr;
+ spdk_nvmf_request_get_subsystem;
+ spdk_nvmf_request_get_data;
+ spdk_nvmf_request_get_cmd;
+ spdk_nvmf_request_get_response;
+ spdk_nvmf_request_get_req_to_abort;
+ spdk_nvmf_bdev_ctrlr_abort_cmd;
+
+ # public functions in nvmf_transport.h
+ spdk_nvmf_transport_register;
+ spdk_nvmf_tgt_new_qpair;
+ spdk_nvmf_ctrlr_connect;
+ spdk_nvmf_ctrlr_data_init;
+ spdk_nvmf_ctrlr_get_regs;
+ spdk_nvmf_request_free_buffers;
+ spdk_nvmf_request_get_buffers;
+ spdk_nvmf_request_get_buffers_multi;
+ spdk_nvmf_request_get_dif_ctx;
+ spdk_nvmf_request_exec;
+ spdk_nvmf_request_exec_fabrics;
+ spdk_nvmf_request_free;
+ spdk_nvmf_request_complete;
+ spdk_nvmf_ctrlr_get_subsystem;
+ spdk_nvmf_ctrlr_get_id;
+ spdk_nvmf_req_get_xfer;
+ spdk_nvmf_poll_group_remove;
+
+
+ local: *;
+};
diff --git a/src/spdk/lib/nvmf/subsystem.c b/src/spdk/lib/nvmf/subsystem.c
new file mode 100644
index 000000000..ebe8d9a8e
--- /dev/null
+++ b/src/spdk/lib/nvmf/subsystem.c
@@ -0,0 +1,2515 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+#include "spdk/likely.h"
+#include "spdk/string.h"
+#include "spdk/trace.h"
+#include "spdk/nvmf_spec.h"
+#include "spdk/uuid.h"
+#include "spdk/json.h"
+#include "spdk/file.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+#include "spdk_internal/utf.h"
+
+#define MODEL_NUMBER_DEFAULT "SPDK bdev Controller"
+
+/*
+ * States for parsing valid domains in NQNs according to RFC 1034
+ */
+enum spdk_nvmf_nqn_domain_states {
+ /* First character of a domain must be a letter */
+ SPDK_NVMF_DOMAIN_ACCEPT_LETTER = 0,
+
+ /* Subsequent characters can be any of letter, digit, or hyphen */
+ SPDK_NVMF_DOMAIN_ACCEPT_LDH = 1,
+
+ /* A domain label must end with either a letter or digit */
+ SPDK_NVMF_DOMAIN_ACCEPT_ANY = 2
+};
+
+/* Returns true if is a valid ASCII string as defined by the NVMe spec */
+static bool
+nvmf_valid_ascii_string(const void *buf, size_t size)
+{
+ const uint8_t *str = buf;
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ if (str[i] < 0x20 || str[i] > 0x7E) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool
+nvmf_valid_nqn(const char *nqn)
+{
+ size_t len;
+ struct spdk_uuid uuid_value;
+ uint32_t i;
+ int bytes_consumed;
+ uint32_t domain_label_length;
+ char *reverse_domain_end;
+ uint32_t reverse_domain_end_index;
+ enum spdk_nvmf_nqn_domain_states domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LETTER;
+
+ /* Check for length requirements */
+ len = strlen(nqn);
+ if (len > SPDK_NVMF_NQN_MAX_LEN) {
+ SPDK_ERRLOG("Invalid NQN \"%s\": length %zu > max %d\n", nqn, len, SPDK_NVMF_NQN_MAX_LEN);
+ return false;
+ }
+
+ /* The nqn must be at least as long as SPDK_NVMF_NQN_MIN_LEN to contain the necessary prefix. */
+ if (len < SPDK_NVMF_NQN_MIN_LEN) {
+ SPDK_ERRLOG("Invalid NQN \"%s\": length %zu < min %d\n", nqn, len, SPDK_NVMF_NQN_MIN_LEN);
+ return false;
+ }
+
+ /* Check for discovery controller nqn */
+ if (!strcmp(nqn, SPDK_NVMF_DISCOVERY_NQN)) {
+ return true;
+ }
+
+ /* Check for equality with the generic nqn structure of the form "nqn.2014-08.org.nvmexpress:uuid:11111111-2222-3333-4444-555555555555" */
+ if (!strncmp(nqn, SPDK_NVMF_NQN_UUID_PRE, SPDK_NVMF_NQN_UUID_PRE_LEN)) {
+ if (len != SPDK_NVMF_NQN_UUID_PRE_LEN + SPDK_NVMF_UUID_STRING_LEN) {
+ SPDK_ERRLOG("Invalid NQN \"%s\": uuid is not the correct length\n", nqn);
+ return false;
+ }
+
+ if (spdk_uuid_parse(&uuid_value, &nqn[SPDK_NVMF_NQN_UUID_PRE_LEN])) {
+ SPDK_ERRLOG("Invalid NQN \"%s\": uuid is not formatted correctly\n", nqn);
+ return false;
+ }
+ return true;
+ }
+
+ /* If the nqn does not match the uuid structure, the next several checks validate the form "nqn.yyyy-mm.reverse.domain:user-string" */
+
+ if (strncmp(nqn, "nqn.", 4) != 0) {
+ SPDK_ERRLOG("Invalid NQN \"%s\": NQN must begin with \"nqn.\".\n", nqn);
+ return false;
+ }
+
+ /* Check for yyyy-mm. */
+ if (!(isdigit(nqn[4]) && isdigit(nqn[5]) && isdigit(nqn[6]) && isdigit(nqn[7]) &&
+ nqn[8] == '-' && isdigit(nqn[9]) && isdigit(nqn[10]) && nqn[11] == '.')) {
+ SPDK_ERRLOG("Invalid date code in NQN \"%s\"\n", nqn);
+ return false;
+ }
+
+ reverse_domain_end = strchr(nqn, ':');
+ if (reverse_domain_end != NULL && (reverse_domain_end_index = reverse_domain_end - nqn) < len - 1) {
+ } else {
+ SPDK_ERRLOG("Invalid NQN \"%s\". NQN must contain user specified name with a ':' as a prefix.\n",
+ nqn);
+ return false;
+ }
+
+ /* Check for valid reverse domain */
+ domain_label_length = 0;
+ for (i = 12; i < reverse_domain_end_index; i++) {
+ if (domain_label_length > SPDK_DOMAIN_LABEL_MAX_LEN) {
+ SPDK_ERRLOG("Invalid domain name in NQN \"%s\". At least one Label is too long.\n", nqn);
+ return false;
+ }
+
+ switch (domain_state) {
+
+ case SPDK_NVMF_DOMAIN_ACCEPT_LETTER: {
+ if (isalpha(nqn[i])) {
+ domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
+ domain_label_length++;
+ break;
+ } else {
+ SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must start with a letter.\n", nqn);
+ return false;
+ }
+ }
+
+ case SPDK_NVMF_DOMAIN_ACCEPT_LDH: {
+ if (isalpha(nqn[i]) || isdigit(nqn[i])) {
+ domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
+ domain_label_length++;
+ break;
+ } else if (nqn[i] == '-') {
+ if (i == reverse_domain_end_index - 1) {
+ SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
+ nqn);
+ return false;
+ }
+ domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LDH;
+ domain_label_length++;
+ break;
+ } else if (nqn[i] == '.') {
+ SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
+ nqn);
+ return false;
+ } else {
+ SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only [a-z,A-Z,0-9,'-','.'].\n",
+ nqn);
+ return false;
+ }
+ }
+
+ case SPDK_NVMF_DOMAIN_ACCEPT_ANY: {
+ if (isalpha(nqn[i]) || isdigit(nqn[i])) {
+ domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
+ domain_label_length++;
+ break;
+ } else if (nqn[i] == '-') {
+ if (i == reverse_domain_end_index - 1) {
+ SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
+ nqn);
+ return false;
+ }
+ domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LDH;
+ domain_label_length++;
+ break;
+ } else if (nqn[i] == '.') {
+ domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LETTER;
+ domain_label_length = 0;
+ break;
+ } else {
+ SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only [a-z,A-Z,0-9,'-','.'].\n",
+ nqn);
+ return false;
+ }
+ }
+ }
+ }
+
+ i = reverse_domain_end_index + 1;
+ while (i < len) {
+ bytes_consumed = utf8_valid(&nqn[i], &nqn[len]);
+ if (bytes_consumed <= 0) {
+ SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only valid utf-8.\n", nqn);
+ return false;
+ }
+
+ i += bytes_consumed;
+ }
+ return true;
+}
+
+struct spdk_nvmf_subsystem *
+spdk_nvmf_subsystem_create(struct spdk_nvmf_tgt *tgt,
+ const char *nqn,
+ enum spdk_nvmf_subtype type,
+ uint32_t num_ns)
+{
+ struct spdk_nvmf_subsystem *subsystem;
+ uint32_t sid;
+
+ if (spdk_nvmf_tgt_find_subsystem(tgt, nqn)) {
+ SPDK_ERRLOG("Subsystem NQN '%s' already exists\n", nqn);
+ return NULL;
+ }
+
+ if (!nvmf_valid_nqn(nqn)) {
+ return NULL;
+ }
+
+ if (type == SPDK_NVMF_SUBTYPE_DISCOVERY && num_ns != 0) {
+ SPDK_ERRLOG("Discovery subsystem cannot have namespaces.\n");
+ return NULL;
+ }
+
+ /* Find a free subsystem id (sid) */
+ for (sid = 0; sid < tgt->max_subsystems; sid++) {
+ if (tgt->subsystems[sid] == NULL) {
+ break;
+ }
+ }
+ if (sid >= tgt->max_subsystems) {
+ return NULL;
+ }
+
+ subsystem = calloc(1, sizeof(struct spdk_nvmf_subsystem));
+ if (subsystem == NULL) {
+ return NULL;
+ }
+
+ subsystem->thread = spdk_get_thread();
+ subsystem->state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
+ subsystem->tgt = tgt;
+ subsystem->id = sid;
+ subsystem->subtype = type;
+ subsystem->max_nsid = num_ns;
+ subsystem->max_allowed_nsid = num_ns;
+ subsystem->next_cntlid = 0;
+ snprintf(subsystem->subnqn, sizeof(subsystem->subnqn), "%s", nqn);
+ TAILQ_INIT(&subsystem->listeners);
+ TAILQ_INIT(&subsystem->hosts);
+ TAILQ_INIT(&subsystem->ctrlrs);
+
+ if (num_ns != 0) {
+ subsystem->ns = calloc(num_ns, sizeof(struct spdk_nvmf_ns *));
+ if (subsystem->ns == NULL) {
+ SPDK_ERRLOG("Namespace memory allocation failed\n");
+ free(subsystem);
+ return NULL;
+ }
+ }
+
+ memset(subsystem->sn, '0', sizeof(subsystem->sn) - 1);
+ subsystem->sn[sizeof(subsystem->sn) - 1] = '\0';
+
+ snprintf(subsystem->mn, sizeof(subsystem->mn), "%s",
+ MODEL_NUMBER_DEFAULT);
+
+ tgt->subsystems[sid] = subsystem;
+ tgt->discovery_genctr++;
+
+ return subsystem;
+}
+
+static void
+nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvmf_host *host)
+{
+ TAILQ_REMOVE(&subsystem->hosts, host, link);
+ free(host);
+}
+
+static void
+_nvmf_subsystem_remove_listener(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_subsystem_listener *listener,
+ bool stop)
+{
+ struct spdk_nvmf_transport *transport;
+
+ if (stop) {
+ transport = spdk_nvmf_tgt_get_transport(subsystem->tgt, listener->trid->trstring);
+ if (transport != NULL) {
+ spdk_nvmf_transport_stop_listen(transport, listener->trid);
+ }
+ }
+
+ TAILQ_REMOVE(&subsystem->listeners, listener, link);
+ free(listener);
+}
+
+void
+spdk_nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem)
+{
+ struct spdk_nvmf_host *host, *host_tmp;
+ struct spdk_nvmf_ctrlr *ctrlr, *ctrlr_tmp;
+ struct spdk_nvmf_ns *ns;
+
+ if (!subsystem) {
+ return;
+ }
+
+ assert(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "subsystem is %p\n", subsystem);
+
+ nvmf_subsystem_remove_all_listeners(subsystem, false);
+
+ TAILQ_FOREACH_SAFE(host, &subsystem->hosts, link, host_tmp) {
+ nvmf_subsystem_remove_host(subsystem, host);
+ }
+
+ TAILQ_FOREACH_SAFE(ctrlr, &subsystem->ctrlrs, link, ctrlr_tmp) {
+ nvmf_ctrlr_destruct(ctrlr);
+ }
+
+ ns = spdk_nvmf_subsystem_get_first_ns(subsystem);
+ while (ns != NULL) {
+ struct spdk_nvmf_ns *next_ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns);
+
+ spdk_nvmf_subsystem_remove_ns(subsystem, ns->opts.nsid);
+ ns = next_ns;
+ }
+
+ free(subsystem->ns);
+
+ subsystem->tgt->subsystems[subsystem->id] = NULL;
+ subsystem->tgt->discovery_genctr++;
+
+ free(subsystem);
+}
+
+static int
+nvmf_subsystem_set_state(struct spdk_nvmf_subsystem *subsystem,
+ enum spdk_nvmf_subsystem_state state)
+{
+ enum spdk_nvmf_subsystem_state actual_old_state, expected_old_state;
+ bool exchanged;
+
+ switch (state) {
+ case SPDK_NVMF_SUBSYSTEM_INACTIVE:
+ expected_old_state = SPDK_NVMF_SUBSYSTEM_DEACTIVATING;
+ break;
+ case SPDK_NVMF_SUBSYSTEM_ACTIVATING:
+ expected_old_state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
+ break;
+ case SPDK_NVMF_SUBSYSTEM_ACTIVE:
+ expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
+ break;
+ case SPDK_NVMF_SUBSYSTEM_PAUSING:
+ expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
+ break;
+ case SPDK_NVMF_SUBSYSTEM_PAUSED:
+ expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSING;
+ break;
+ case SPDK_NVMF_SUBSYSTEM_RESUMING:
+ expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSED;
+ break;
+ case SPDK_NVMF_SUBSYSTEM_DEACTIVATING:
+ expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
+ break;
+ default:
+ assert(false);
+ return -1;
+ }
+
+ actual_old_state = expected_old_state;
+ exchanged = __atomic_compare_exchange_n(&subsystem->state, &actual_old_state, state, false,
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+ if (spdk_unlikely(exchanged == false)) {
+ if (actual_old_state == SPDK_NVMF_SUBSYSTEM_RESUMING &&
+ state == SPDK_NVMF_SUBSYSTEM_ACTIVE) {
+ expected_old_state = SPDK_NVMF_SUBSYSTEM_RESUMING;
+ }
+ /* This is for the case when activating the subsystem fails. */
+ if (actual_old_state == SPDK_NVMF_SUBSYSTEM_ACTIVATING &&
+ state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING) {
+ expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
+ }
+ actual_old_state = expected_old_state;
+ __atomic_compare_exchange_n(&subsystem->state, &actual_old_state, state, false,
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+ }
+ assert(actual_old_state == expected_old_state);
+ return actual_old_state - expected_old_state;
+}
+
+struct subsystem_state_change_ctx {
+ struct spdk_nvmf_subsystem *subsystem;
+
+ enum spdk_nvmf_subsystem_state requested_state;
+
+ spdk_nvmf_subsystem_state_change_done cb_fn;
+ void *cb_arg;
+};
+
+static void
+subsystem_state_change_done(struct spdk_io_channel_iter *i, int status)
+{
+ struct subsystem_state_change_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+
+ if (status == 0) {
+ status = nvmf_subsystem_set_state(ctx->subsystem, ctx->requested_state);
+ if (status) {
+ status = -1;
+ }
+ }
+
+ if (ctx->cb_fn) {
+ ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status);
+ }
+ free(ctx);
+}
+
+static void
+subsystem_state_change_continue(void *ctx, int status)
+{
+ struct spdk_io_channel_iter *i = ctx;
+ spdk_for_each_channel_continue(i, status);
+}
+
+static void
+subsystem_state_change_on_pg(struct spdk_io_channel_iter *i)
+{
+ struct subsystem_state_change_ctx *ctx;
+ struct spdk_io_channel *ch;
+ struct spdk_nvmf_poll_group *group;
+
+ ctx = spdk_io_channel_iter_get_ctx(i);
+ ch = spdk_io_channel_iter_get_channel(i);
+ group = spdk_io_channel_get_ctx(ch);
+
+ switch (ctx->requested_state) {
+ case SPDK_NVMF_SUBSYSTEM_INACTIVE:
+ nvmf_poll_group_remove_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
+ break;
+ case SPDK_NVMF_SUBSYSTEM_ACTIVE:
+ if (ctx->subsystem->state == SPDK_NVMF_SUBSYSTEM_ACTIVATING) {
+ nvmf_poll_group_add_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
+ } else if (ctx->subsystem->state == SPDK_NVMF_SUBSYSTEM_RESUMING) {
+ nvmf_poll_group_resume_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
+ }
+ break;
+ case SPDK_NVMF_SUBSYSTEM_PAUSED:
+ nvmf_poll_group_pause_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
+ break;
+ default:
+ assert(false);
+ break;
+ }
+}
+
+static int
+nvmf_subsystem_state_change(struct spdk_nvmf_subsystem *subsystem,
+ enum spdk_nvmf_subsystem_state requested_state,
+ spdk_nvmf_subsystem_state_change_done cb_fn,
+ void *cb_arg)
+{
+ struct subsystem_state_change_ctx *ctx;
+ enum spdk_nvmf_subsystem_state intermediate_state;
+ int rc;
+
+ switch (requested_state) {
+ case SPDK_NVMF_SUBSYSTEM_INACTIVE:
+ intermediate_state = SPDK_NVMF_SUBSYSTEM_DEACTIVATING;
+ break;
+ case SPDK_NVMF_SUBSYSTEM_ACTIVE:
+ if (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED) {
+ intermediate_state = SPDK_NVMF_SUBSYSTEM_RESUMING;
+ } else {
+ intermediate_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
+ }
+ break;
+ case SPDK_NVMF_SUBSYSTEM_PAUSED:
+ intermediate_state = SPDK_NVMF_SUBSYSTEM_PAUSING;
+ break;
+ default:
+ assert(false);
+ return -EINVAL;
+ }
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ return -ENOMEM;
+ }
+
+ rc = nvmf_subsystem_set_state(subsystem, intermediate_state);
+ if (rc) {
+ free(ctx);
+ return rc;
+ }
+
+ ctx->subsystem = subsystem;
+ ctx->requested_state = requested_state;
+ ctx->cb_fn = cb_fn;
+ ctx->cb_arg = cb_arg;
+
+ spdk_for_each_channel(subsystem->tgt,
+ subsystem_state_change_on_pg,
+ ctx,
+ subsystem_state_change_done);
+
+ return 0;
+}
+
+int
+spdk_nvmf_subsystem_start(struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_subsystem_state_change_done cb_fn,
+ void *cb_arg)
+{
+ return nvmf_subsystem_state_change(subsystem, SPDK_NVMF_SUBSYSTEM_ACTIVE, cb_fn, cb_arg);
+}
+
+int
+spdk_nvmf_subsystem_stop(struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_subsystem_state_change_done cb_fn,
+ void *cb_arg)
+{
+ return nvmf_subsystem_state_change(subsystem, SPDK_NVMF_SUBSYSTEM_INACTIVE, cb_fn, cb_arg);
+}
+
+int
+spdk_nvmf_subsystem_pause(struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_subsystem_state_change_done cb_fn,
+ void *cb_arg)
+{
+ return nvmf_subsystem_state_change(subsystem, SPDK_NVMF_SUBSYSTEM_PAUSED, cb_fn, cb_arg);
+}
+
+int
+spdk_nvmf_subsystem_resume(struct spdk_nvmf_subsystem *subsystem,
+ spdk_nvmf_subsystem_state_change_done cb_fn,
+ void *cb_arg)
+{
+ return nvmf_subsystem_state_change(subsystem, SPDK_NVMF_SUBSYSTEM_ACTIVE, cb_fn, cb_arg);
+}
+
+struct spdk_nvmf_subsystem *
+spdk_nvmf_subsystem_get_first(struct spdk_nvmf_tgt *tgt)
+{
+ struct spdk_nvmf_subsystem *subsystem;
+ uint32_t sid;
+
+ for (sid = 0; sid < tgt->max_subsystems; sid++) {
+ subsystem = tgt->subsystems[sid];
+ if (subsystem) {
+ return subsystem;
+ }
+ }
+
+ return NULL;
+}
+
+struct spdk_nvmf_subsystem *
+spdk_nvmf_subsystem_get_next(struct spdk_nvmf_subsystem *subsystem)
+{
+ uint32_t sid;
+ struct spdk_nvmf_tgt *tgt;
+
+ if (!subsystem) {
+ return NULL;
+ }
+
+ tgt = subsystem->tgt;
+
+ for (sid = subsystem->id + 1; sid < tgt->max_subsystems; sid++) {
+ subsystem = tgt->subsystems[sid];
+ if (subsystem) {
+ return subsystem;
+ }
+ }
+
+ return NULL;
+}
+
+static struct spdk_nvmf_host *
+nvmf_subsystem_find_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
+{
+ struct spdk_nvmf_host *host = NULL;
+
+ TAILQ_FOREACH(host, &subsystem->hosts, link) {
+ if (strcmp(hostnqn, host->nqn) == 0) {
+ return host;
+ }
+ }
+
+ return NULL;
+}
+
+int
+spdk_nvmf_subsystem_add_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
+{
+ struct spdk_nvmf_host *host;
+
+ if (!nvmf_valid_nqn(hostnqn)) {
+ return -EINVAL;
+ }
+
+ if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+ subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+ return -EAGAIN;
+ }
+
+ if (nvmf_subsystem_find_host(subsystem, hostnqn)) {
+ /* This subsystem already allows the specified host. */
+ return 0;
+ }
+
+ host = calloc(1, sizeof(*host));
+ if (!host) {
+ return -ENOMEM;
+ }
+
+ snprintf(host->nqn, sizeof(host->nqn), "%s", hostnqn);
+
+ TAILQ_INSERT_HEAD(&subsystem->hosts, host, link);
+ subsystem->tgt->discovery_genctr++;
+
+ return 0;
+}
+
+int
+spdk_nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
+{
+ struct spdk_nvmf_host *host;
+
+ if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+ subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+ return -EAGAIN;
+ }
+
+ host = nvmf_subsystem_find_host(subsystem, hostnqn);
+ if (host == NULL) {
+ return -ENOENT;
+ }
+
+ nvmf_subsystem_remove_host(subsystem, host);
+ return 0;
+}
+
+int
+spdk_nvmf_subsystem_set_allow_any_host(struct spdk_nvmf_subsystem *subsystem, bool allow_any_host)
+{
+ if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+ subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+ return -EAGAIN;
+ }
+
+ subsystem->allow_any_host = allow_any_host;
+
+ return 0;
+}
+
+bool
+spdk_nvmf_subsystem_get_allow_any_host(const struct spdk_nvmf_subsystem *subsystem)
+{
+ return subsystem->allow_any_host;
+}
+
+bool
+spdk_nvmf_subsystem_host_allowed(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
+{
+ if (!hostnqn) {
+ return false;
+ }
+
+ if (subsystem->allow_any_host) {
+ return true;
+ }
+
+ return nvmf_subsystem_find_host(subsystem, hostnqn) != NULL;
+}
+
+struct spdk_nvmf_host *
+spdk_nvmf_subsystem_get_first_host(struct spdk_nvmf_subsystem *subsystem)
+{
+ return TAILQ_FIRST(&subsystem->hosts);
+}
+
+
+struct spdk_nvmf_host *
+spdk_nvmf_subsystem_get_next_host(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_host *prev_host)
+{
+ return TAILQ_NEXT(prev_host, link);
+}
+
+const char *
+spdk_nvmf_host_get_nqn(const struct spdk_nvmf_host *host)
+{
+ return host->nqn;
+}
+
+struct spdk_nvmf_subsystem_listener *
+nvmf_subsystem_find_listener(struct spdk_nvmf_subsystem *subsystem,
+ const struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_subsystem_listener *listener;
+
+ TAILQ_FOREACH(listener, &subsystem->listeners, link) {
+ if (spdk_nvme_transport_id_compare(listener->trid, trid) == 0) {
+ return listener;
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * Function to be called once the target is listening.
+ *
+ * \param ctx Context argument passed to this function.
+ * \param status 0 if it completed successfully, or negative errno if it failed.
+ */
+static void
+_nvmf_subsystem_add_listener_done(void *ctx, int status)
+{
+ struct spdk_nvmf_subsystem_listener *listener = ctx;
+
+ if (status) {
+ listener->cb_fn(listener->cb_arg, status);
+ free(listener);
+ return;
+ }
+
+ TAILQ_INSERT_HEAD(&listener->subsystem->listeners, listener, link);
+ listener->subsystem->tgt->discovery_genctr++;
+ listener->cb_fn(listener->cb_arg, status);
+}
+
+void
+spdk_nvmf_subsystem_add_listener(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvme_transport_id *trid,
+ spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn,
+ void *cb_arg)
+{
+ struct spdk_nvmf_transport *transport;
+ struct spdk_nvmf_subsystem_listener *listener;
+ struct spdk_nvmf_listener *tr_listener;
+
+ assert(cb_fn != NULL);
+
+ if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+ subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+ cb_fn(cb_arg, -EAGAIN);
+ return;
+ }
+
+ if (nvmf_subsystem_find_listener(subsystem, trid)) {
+ /* Listener already exists in this subsystem */
+ cb_fn(cb_arg, 0);
+ return;
+ }
+
+ transport = spdk_nvmf_tgt_get_transport(subsystem->tgt, trid->trstring);
+ if (transport == NULL) {
+ SPDK_ERRLOG("Unknown transport type %d\n", trid->trtype);
+ cb_fn(cb_arg, -EINVAL);
+ return;
+ }
+
+ tr_listener = nvmf_transport_find_listener(transport, trid);
+ if (!tr_listener) {
+ SPDK_ERRLOG("Cannot find transport listener for %s\n", trid->traddr);
+ cb_fn(cb_arg, -EINVAL);
+ return;
+ }
+
+ listener = calloc(1, sizeof(*listener));
+ if (!listener) {
+ cb_fn(cb_arg, -ENOMEM);
+ return;
+ }
+
+ listener->trid = &tr_listener->trid;
+ listener->transport = transport;
+ listener->cb_fn = cb_fn;
+ listener->cb_arg = cb_arg;
+ listener->subsystem = subsystem;
+
+ if (transport->ops->listen_associate != NULL) {
+ transport->ops->listen_associate(transport, subsystem, trid,
+ _nvmf_subsystem_add_listener_done,
+ listener);
+ } else {
+ _nvmf_subsystem_add_listener_done(listener, 0);
+ }
+}
+
+int
+spdk_nvmf_subsystem_remove_listener(struct spdk_nvmf_subsystem *subsystem,
+ const struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_subsystem_listener *listener;
+
+ if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+ subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+ return -EAGAIN;
+ }
+
+ listener = nvmf_subsystem_find_listener(subsystem, trid);
+ if (listener == NULL) {
+ return -ENOENT;
+ }
+
+ _nvmf_subsystem_remove_listener(subsystem, listener, false);
+
+ return 0;
+}
+
+void
+nvmf_subsystem_remove_all_listeners(struct spdk_nvmf_subsystem *subsystem,
+ bool stop)
+{
+ struct spdk_nvmf_subsystem_listener *listener, *listener_tmp;
+
+ TAILQ_FOREACH_SAFE(listener, &subsystem->listeners, link, listener_tmp) {
+ _nvmf_subsystem_remove_listener(subsystem, listener, stop);
+ }
+}
+
+bool
+spdk_nvmf_subsystem_listener_allowed(struct spdk_nvmf_subsystem *subsystem,
+ const struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_subsystem_listener *listener;
+
+ if (!strcmp(subsystem->subnqn, SPDK_NVMF_DISCOVERY_NQN)) {
+ return true;
+ }
+
+ TAILQ_FOREACH(listener, &subsystem->listeners, link) {
+ if (spdk_nvme_transport_id_compare(listener->trid, trid) == 0) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+struct spdk_nvmf_subsystem_listener *
+spdk_nvmf_subsystem_get_first_listener(struct spdk_nvmf_subsystem *subsystem)
+{
+ return TAILQ_FIRST(&subsystem->listeners);
+}
+
+struct spdk_nvmf_subsystem_listener *
+spdk_nvmf_subsystem_get_next_listener(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_subsystem_listener *prev_listener)
+{
+ return TAILQ_NEXT(prev_listener, link);
+}
+
+const struct spdk_nvme_transport_id *
+spdk_nvmf_subsystem_listener_get_trid(struct spdk_nvmf_subsystem_listener *listener)
+{
+ return listener->trid;
+}
+
+void
+spdk_nvmf_subsystem_allow_any_listener(struct spdk_nvmf_subsystem *subsystem,
+ bool allow_any_listener)
+{
+ subsystem->allow_any_listener = allow_any_listener;
+}
+
+bool
+spdk_nvmf_subsytem_any_listener_allowed(struct spdk_nvmf_subsystem *subsystem)
+{
+ return subsystem->allow_any_listener;
+}
+
+
+struct subsystem_update_ns_ctx {
+ struct spdk_nvmf_subsystem *subsystem;
+
+ spdk_nvmf_subsystem_state_change_done cb_fn;
+ void *cb_arg;
+};
+
+static void
+subsystem_update_ns_done(struct spdk_io_channel_iter *i, int status)
+{
+ struct subsystem_update_ns_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+
+ if (ctx->cb_fn) {
+ ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status);
+ }
+ free(ctx);
+}
+
+static void
+subsystem_update_ns_on_pg(struct spdk_io_channel_iter *i)
+{
+ int rc;
+ struct subsystem_update_ns_ctx *ctx;
+ struct spdk_nvmf_poll_group *group;
+ struct spdk_nvmf_subsystem *subsystem;
+
+ ctx = spdk_io_channel_iter_get_ctx(i);
+ group = spdk_io_channel_get_ctx(spdk_io_channel_iter_get_channel(i));
+ subsystem = ctx->subsystem;
+
+ rc = nvmf_poll_group_update_subsystem(group, subsystem);
+ spdk_for_each_channel_continue(i, rc);
+}
+
+static int
+nvmf_subsystem_update_ns(struct spdk_nvmf_subsystem *subsystem, spdk_channel_for_each_cpl cpl,
+ void *ctx)
+{
+ spdk_for_each_channel(subsystem->tgt,
+ subsystem_update_ns_on_pg,
+ ctx,
+ cpl);
+
+ return 0;
+}
+
+static void
+nvmf_subsystem_ns_changed(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
+{
+ struct spdk_nvmf_ctrlr *ctrlr;
+
+ TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
+ nvmf_ctrlr_ns_changed(ctrlr, nsid);
+ }
+}
+
+int
+spdk_nvmf_subsystem_remove_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
+{
+ struct spdk_nvmf_ns *ns;
+ struct spdk_nvmf_registrant *reg, *reg_tmp;
+
+ if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+ subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+ assert(false);
+ return -1;
+ }
+
+ if (nsid == 0 || nsid > subsystem->max_nsid) {
+ return -1;
+ }
+
+ ns = subsystem->ns[nsid - 1];
+ if (!ns) {
+ return -1;
+ }
+
+ subsystem->ns[nsid - 1] = NULL;
+
+ TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, reg_tmp) {
+ TAILQ_REMOVE(&ns->registrants, reg, link);
+ free(reg);
+ }
+ spdk_bdev_module_release_bdev(ns->bdev);
+ spdk_bdev_close(ns->desc);
+ if (ns->ptpl_file) {
+ free(ns->ptpl_file);
+ }
+ free(ns);
+
+ nvmf_subsystem_ns_changed(subsystem, nsid);
+
+ return 0;
+}
+
+static void
+_nvmf_ns_hot_remove(struct spdk_nvmf_subsystem *subsystem,
+ void *cb_arg, int status)
+{
+ struct spdk_nvmf_ns *ns = cb_arg;
+ int rc;
+
+ rc = spdk_nvmf_subsystem_remove_ns(subsystem, ns->opts.nsid);
+ if (rc != 0) {
+ SPDK_ERRLOG("Failed to make changes to NVME-oF subsystem with id: %u\n", subsystem->id);
+ }
+
+ spdk_nvmf_subsystem_resume(subsystem, NULL, NULL);
+}
+
+static void
+nvmf_ns_hot_remove(void *remove_ctx)
+{
+ struct spdk_nvmf_ns *ns = remove_ctx;
+ int rc;
+
+ rc = spdk_nvmf_subsystem_pause(ns->subsystem, _nvmf_ns_hot_remove, ns);
+ if (rc) {
+ SPDK_ERRLOG("Unable to pause subsystem to process namespace removal!\n");
+ }
+}
+
+static void
+_nvmf_ns_resize(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status)
+{
+ struct spdk_nvmf_ns *ns = cb_arg;
+
+ nvmf_subsystem_ns_changed(subsystem, ns->opts.nsid);
+ spdk_nvmf_subsystem_resume(subsystem, NULL, NULL);
+}
+
+static void
+nvmf_ns_resize(void *event_ctx)
+{
+ struct spdk_nvmf_ns *ns = event_ctx;
+ int rc;
+
+ rc = spdk_nvmf_subsystem_pause(ns->subsystem, _nvmf_ns_resize, ns);
+ if (rc) {
+ SPDK_ERRLOG("Unable to pause subsystem to process namespace resize!\n");
+ }
+}
+
+static void
+nvmf_ns_event(enum spdk_bdev_event_type type,
+ struct spdk_bdev *bdev,
+ void *event_ctx)
+{
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Bdev event: type %d, name %s, subsystem_id %d, ns_id %d\n",
+ type,
+ bdev->name,
+ ((struct spdk_nvmf_ns *)event_ctx)->subsystem->id,
+ ((struct spdk_nvmf_ns *)event_ctx)->nsid);
+
+ switch (type) {
+ case SPDK_BDEV_EVENT_REMOVE:
+ nvmf_ns_hot_remove(event_ctx);
+ break;
+ case SPDK_BDEV_EVENT_RESIZE:
+ nvmf_ns_resize(event_ctx);
+ break;
+ default:
+ SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
+ break;
+ }
+}
+
+void
+spdk_nvmf_ns_opts_get_defaults(struct spdk_nvmf_ns_opts *opts, size_t opts_size)
+{
+ /* All current fields are set to 0 by default. */
+ memset(opts, 0, opts_size);
+}
+
+/* Dummy bdev module used to to claim bdevs. */
+static struct spdk_bdev_module ns_bdev_module = {
+ .name = "NVMe-oF Target",
+};
+
+static int
+nvmf_ns_load_reservation(const char *file, struct spdk_nvmf_reservation_info *info);
+static int
+nvmf_ns_reservation_restore(struct spdk_nvmf_ns *ns, struct spdk_nvmf_reservation_info *info);
+
+uint32_t
+spdk_nvmf_subsystem_add_ns(struct spdk_nvmf_subsystem *subsystem, struct spdk_bdev *bdev,
+ const struct spdk_nvmf_ns_opts *user_opts, size_t opts_size,
+ const char *ptpl_file)
+{
+ struct spdk_nvmf_ns_opts opts;
+ struct spdk_nvmf_ns *ns;
+ struct spdk_nvmf_reservation_info info = {0};
+ int rc;
+
+ if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+ subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+ return 0;
+ }
+
+ if (spdk_bdev_get_md_size(bdev) != 0 && !spdk_bdev_is_md_interleaved(bdev)) {
+ SPDK_ERRLOG("Can't attach bdev with separate metadata.\n");
+ return 0;
+ }
+
+ spdk_nvmf_ns_opts_get_defaults(&opts, sizeof(opts));
+ if (user_opts) {
+ memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size));
+ }
+
+ if (spdk_mem_all_zero(&opts.uuid, sizeof(opts.uuid))) {
+ opts.uuid = *spdk_bdev_get_uuid(bdev);
+ }
+
+ if (opts.nsid == SPDK_NVME_GLOBAL_NS_TAG) {
+ SPDK_ERRLOG("Invalid NSID %" PRIu32 "\n", opts.nsid);
+ return 0;
+ }
+
+ if (opts.nsid == 0) {
+ /*
+ * NSID not specified - find a free index.
+ *
+ * If no free slots are found, opts.nsid will be subsystem->max_nsid + 1, which will
+ * expand max_nsid if possible.
+ */
+ for (opts.nsid = 1; opts.nsid <= subsystem->max_nsid; opts.nsid++) {
+ if (_nvmf_subsystem_get_ns(subsystem, opts.nsid) == NULL) {
+ break;
+ }
+ }
+ }
+
+ if (_nvmf_subsystem_get_ns(subsystem, opts.nsid)) {
+ SPDK_ERRLOG("Requested NSID %" PRIu32 " already in use\n", opts.nsid);
+ return 0;
+ }
+
+ if (opts.nsid > subsystem->max_nsid) {
+ struct spdk_nvmf_ns **new_ns_array;
+
+ /* If MaxNamespaces was specified, we can't extend max_nsid beyond it. */
+ if (subsystem->max_allowed_nsid > 0 && opts.nsid > subsystem->max_allowed_nsid) {
+ SPDK_ERRLOG("Can't extend NSID range above MaxNamespaces\n");
+ return 0;
+ }
+
+ /* If a controller is connected, we can't change NN. */
+ if (!TAILQ_EMPTY(&subsystem->ctrlrs)) {
+ SPDK_ERRLOG("Can't extend NSID range while controllers are connected\n");
+ return 0;
+ }
+
+ new_ns_array = realloc(subsystem->ns, sizeof(struct spdk_nvmf_ns *) * opts.nsid);
+ if (new_ns_array == NULL) {
+ SPDK_ERRLOG("Memory allocation error while resizing namespace array.\n");
+ return 0;
+ }
+
+ memset(new_ns_array + subsystem->max_nsid, 0,
+ sizeof(struct spdk_nvmf_ns *) * (opts.nsid - subsystem->max_nsid));
+ subsystem->ns = new_ns_array;
+ subsystem->max_nsid = opts.nsid;
+ }
+
+ ns = calloc(1, sizeof(*ns));
+ if (ns == NULL) {
+ SPDK_ERRLOG("Namespace allocation failed\n");
+ return 0;
+ }
+
+ ns->bdev = bdev;
+ ns->opts = opts;
+ ns->subsystem = subsystem;
+ rc = spdk_bdev_open_ext(bdev->name, true, nvmf_ns_event, ns, &ns->desc);
+ if (rc != 0) {
+ SPDK_ERRLOG("Subsystem %s: bdev %s cannot be opened, error=%d\n",
+ subsystem->subnqn, spdk_bdev_get_name(bdev), rc);
+ free(ns);
+ return 0;
+ }
+ rc = spdk_bdev_module_claim_bdev(bdev, ns->desc, &ns_bdev_module);
+ if (rc != 0) {
+ spdk_bdev_close(ns->desc);
+ free(ns);
+ return 0;
+ }
+ subsystem->ns[opts.nsid - 1] = ns;
+ ns->nsid = opts.nsid;
+ TAILQ_INIT(&ns->registrants);
+
+ if (ptpl_file) {
+ rc = nvmf_ns_load_reservation(ptpl_file, &info);
+ if (!rc) {
+ rc = nvmf_ns_reservation_restore(ns, &info);
+ if (rc) {
+ SPDK_ERRLOG("Subsystem restore reservation failed\n");
+ subsystem->ns[opts.nsid - 1] = NULL;
+ spdk_bdev_close(ns->desc);
+ free(ns);
+ return 0;
+ }
+ }
+ ns->ptpl_file = strdup(ptpl_file);
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Subsystem %s: bdev %s assigned nsid %" PRIu32 "\n",
+ spdk_nvmf_subsystem_get_nqn(subsystem),
+ spdk_bdev_get_name(bdev),
+ opts.nsid);
+
+ nvmf_subsystem_ns_changed(subsystem, opts.nsid);
+
+ return opts.nsid;
+}
+
+static uint32_t
+nvmf_subsystem_get_next_allocated_nsid(struct spdk_nvmf_subsystem *subsystem,
+ uint32_t prev_nsid)
+{
+ uint32_t nsid;
+
+ if (prev_nsid >= subsystem->max_nsid) {
+ return 0;
+ }
+
+ for (nsid = prev_nsid + 1; nsid <= subsystem->max_nsid; nsid++) {
+ if (subsystem->ns[nsid - 1]) {
+ return nsid;
+ }
+ }
+
+ return 0;
+}
+
+struct spdk_nvmf_ns *
+spdk_nvmf_subsystem_get_first_ns(struct spdk_nvmf_subsystem *subsystem)
+{
+ uint32_t first_nsid;
+
+ first_nsid = nvmf_subsystem_get_next_allocated_nsid(subsystem, 0);
+ return _nvmf_subsystem_get_ns(subsystem, first_nsid);
+}
+
+struct spdk_nvmf_ns *
+spdk_nvmf_subsystem_get_next_ns(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_ns *prev_ns)
+{
+ uint32_t next_nsid;
+
+ next_nsid = nvmf_subsystem_get_next_allocated_nsid(subsystem, prev_ns->opts.nsid);
+ return _nvmf_subsystem_get_ns(subsystem, next_nsid);
+}
+
+struct spdk_nvmf_ns *
+spdk_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
+{
+ return _nvmf_subsystem_get_ns(subsystem, nsid);
+}
+
+uint32_t
+spdk_nvmf_ns_get_id(const struct spdk_nvmf_ns *ns)
+{
+ return ns->opts.nsid;
+}
+
+struct spdk_bdev *
+spdk_nvmf_ns_get_bdev(struct spdk_nvmf_ns *ns)
+{
+ return ns->bdev;
+}
+
+void
+spdk_nvmf_ns_get_opts(const struct spdk_nvmf_ns *ns, struct spdk_nvmf_ns_opts *opts,
+ size_t opts_size)
+{
+ memset(opts, 0, opts_size);
+ memcpy(opts, &ns->opts, spdk_min(sizeof(ns->opts), opts_size));
+}
+
+const char *
+spdk_nvmf_subsystem_get_sn(const struct spdk_nvmf_subsystem *subsystem)
+{
+ return subsystem->sn;
+}
+
+int
+spdk_nvmf_subsystem_set_sn(struct spdk_nvmf_subsystem *subsystem, const char *sn)
+{
+ size_t len, max_len;
+
+ max_len = sizeof(subsystem->sn) - 1;
+ len = strlen(sn);
+ if (len > max_len) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Invalid sn \"%s\": length %zu > max %zu\n",
+ sn, len, max_len);
+ return -1;
+ }
+
+ if (!nvmf_valid_ascii_string(sn, len)) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Non-ASCII sn\n");
+ SPDK_LOGDUMP(SPDK_LOG_NVMF, "sn", sn, len);
+ return -1;
+ }
+
+ snprintf(subsystem->sn, sizeof(subsystem->sn), "%s", sn);
+
+ return 0;
+}
+
+const char *
+spdk_nvmf_subsystem_get_mn(const struct spdk_nvmf_subsystem *subsystem)
+{
+ return subsystem->mn;
+}
+
+int
+spdk_nvmf_subsystem_set_mn(struct spdk_nvmf_subsystem *subsystem, const char *mn)
+{
+ size_t len, max_len;
+
+ if (mn == NULL) {
+ mn = MODEL_NUMBER_DEFAULT;
+ }
+ max_len = sizeof(subsystem->mn) - 1;
+ len = strlen(mn);
+ if (len > max_len) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Invalid mn \"%s\": length %zu > max %zu\n",
+ mn, len, max_len);
+ return -1;
+ }
+
+ if (!nvmf_valid_ascii_string(mn, len)) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Non-ASCII mn\n");
+ SPDK_LOGDUMP(SPDK_LOG_NVMF, "mn", mn, len);
+ return -1;
+ }
+
+ snprintf(subsystem->mn, sizeof(subsystem->mn), "%s", mn);
+
+ return 0;
+}
+
+const char *
+spdk_nvmf_subsystem_get_nqn(const struct spdk_nvmf_subsystem *subsystem)
+{
+ return subsystem->subnqn;
+}
+
+enum spdk_nvmf_subtype spdk_nvmf_subsystem_get_type(struct spdk_nvmf_subsystem *subsystem)
+{
+ return subsystem->subtype;
+}
+
+uint32_t
+spdk_nvmf_subsystem_get_max_nsid(struct spdk_nvmf_subsystem *subsystem)
+{
+ return subsystem->max_nsid;
+}
+
+static uint16_t
+nvmf_subsystem_gen_cntlid(struct spdk_nvmf_subsystem *subsystem)
+{
+ int count;
+
+ /*
+ * In the worst case, we might have to try all CNTLID values between 1 and 0xFFF0 - 1
+ * before we find one that is unused (or find that all values are in use).
+ */
+ for (count = 0; count < 0xFFF0 - 1; count++) {
+ subsystem->next_cntlid++;
+ if (subsystem->next_cntlid >= 0xFFF0) {
+ /* The spec reserves cntlid values in the range FFF0h to FFFFh. */
+ subsystem->next_cntlid = 1;
+ }
+
+ /* Check if a controller with this cntlid currently exists. */
+ if (nvmf_subsystem_get_ctrlr(subsystem, subsystem->next_cntlid) == NULL) {
+ /* Found unused cntlid */
+ return subsystem->next_cntlid;
+ }
+ }
+
+ /* All valid cntlid values are in use. */
+ return 0xFFFF;
+}
+
+int
+nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvmf_ctrlr *ctrlr)
+{
+ ctrlr->cntlid = nvmf_subsystem_gen_cntlid(subsystem);
+ if (ctrlr->cntlid == 0xFFFF) {
+ /* Unable to get a cntlid */
+ SPDK_ERRLOG("Reached max simultaneous ctrlrs\n");
+ return -EBUSY;
+ }
+
+ TAILQ_INSERT_TAIL(&subsystem->ctrlrs, ctrlr, link);
+
+ return 0;
+}
+
+void
+nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_ctrlr *ctrlr)
+{
+ assert(subsystem == ctrlr->subsys);
+ TAILQ_REMOVE(&subsystem->ctrlrs, ctrlr, link);
+}
+
+struct spdk_nvmf_ctrlr *
+nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem, uint16_t cntlid)
+{
+ struct spdk_nvmf_ctrlr *ctrlr;
+
+ TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
+ if (ctrlr->cntlid == cntlid) {
+ return ctrlr;
+ }
+ }
+
+ return NULL;
+}
+
+uint32_t
+spdk_nvmf_subsystem_get_max_namespaces(const struct spdk_nvmf_subsystem *subsystem)
+{
+ return subsystem->max_allowed_nsid;
+}
+
+struct _nvmf_ns_registrant {
+ uint64_t rkey;
+ char *host_uuid;
+};
+
+struct _nvmf_ns_registrants {
+ size_t num_regs;
+ struct _nvmf_ns_registrant reg[SPDK_NVMF_MAX_NUM_REGISTRANTS];
+};
+
+struct _nvmf_ns_reservation {
+ bool ptpl_activated;
+ enum spdk_nvme_reservation_type rtype;
+ uint64_t crkey;
+ char *bdev_uuid;
+ char *holder_uuid;
+ struct _nvmf_ns_registrants regs;
+};
+
+static const struct spdk_json_object_decoder nvmf_ns_pr_reg_decoders[] = {
+ {"rkey", offsetof(struct _nvmf_ns_registrant, rkey), spdk_json_decode_uint64},
+ {"host_uuid", offsetof(struct _nvmf_ns_registrant, host_uuid), spdk_json_decode_string},
+};
+
+static int
+nvmf_decode_ns_pr_reg(const struct spdk_json_val *val, void *out)
+{
+ struct _nvmf_ns_registrant *reg = out;
+
+ return spdk_json_decode_object(val, nvmf_ns_pr_reg_decoders,
+ SPDK_COUNTOF(nvmf_ns_pr_reg_decoders), reg);
+}
+
+static int
+nvmf_decode_ns_pr_regs(const struct spdk_json_val *val, void *out)
+{
+ struct _nvmf_ns_registrants *regs = out;
+
+ return spdk_json_decode_array(val, nvmf_decode_ns_pr_reg, regs->reg,
+ SPDK_NVMF_MAX_NUM_REGISTRANTS, &regs->num_regs,
+ sizeof(struct _nvmf_ns_registrant));
+}
+
+static const struct spdk_json_object_decoder nvmf_ns_pr_decoders[] = {
+ {"ptpl", offsetof(struct _nvmf_ns_reservation, ptpl_activated), spdk_json_decode_bool, true},
+ {"rtype", offsetof(struct _nvmf_ns_reservation, rtype), spdk_json_decode_uint32, true},
+ {"crkey", offsetof(struct _nvmf_ns_reservation, crkey), spdk_json_decode_uint64, true},
+ {"bdev_uuid", offsetof(struct _nvmf_ns_reservation, bdev_uuid), spdk_json_decode_string},
+ {"holder_uuid", offsetof(struct _nvmf_ns_reservation, holder_uuid), spdk_json_decode_string, true},
+ {"registrants", offsetof(struct _nvmf_ns_reservation, regs), nvmf_decode_ns_pr_regs},
+};
+
+static int
+nvmf_ns_load_reservation(const char *file, struct spdk_nvmf_reservation_info *info)
+{
+ FILE *fd;
+ size_t json_size;
+ ssize_t values_cnt, rc;
+ void *json = NULL, *end;
+ struct spdk_json_val *values = NULL;
+ struct _nvmf_ns_reservation res = {};
+ uint32_t i;
+
+ fd = fopen(file, "r");
+ /* It's not an error if the file does not exist */
+ if (!fd) {
+ SPDK_NOTICELOG("File %s does not exist\n", file);
+ return -ENOENT;
+ }
+
+ /* Load all persist file contents into a local buffer */
+ json = spdk_posix_file_load(fd, &json_size);
+ fclose(fd);
+ if (!json) {
+ SPDK_ERRLOG("Load persit file %s failed\n", file);
+ return -ENOMEM;
+ }
+
+ rc = spdk_json_parse(json, json_size, NULL, 0, &end, 0);
+ if (rc < 0) {
+ SPDK_NOTICELOG("Parsing JSON configuration failed (%zd)\n", rc);
+ goto exit;
+ }
+
+ values_cnt = rc;
+ values = calloc(values_cnt, sizeof(struct spdk_json_val));
+ if (values == NULL) {
+ goto exit;
+ }
+
+ rc = spdk_json_parse(json, json_size, values, values_cnt, &end, 0);
+ if (rc != values_cnt) {
+ SPDK_ERRLOG("Parsing JSON configuration failed (%zd)\n", rc);
+ goto exit;
+ }
+
+ /* Decode json */
+ if (spdk_json_decode_object(values, nvmf_ns_pr_decoders,
+ SPDK_COUNTOF(nvmf_ns_pr_decoders),
+ &res)) {
+ SPDK_ERRLOG("Invalid objects in the persist file %s\n", file);
+ rc = -EINVAL;
+ goto exit;
+ }
+
+ if (res.regs.num_regs > SPDK_NVMF_MAX_NUM_REGISTRANTS) {
+ SPDK_ERRLOG("Can only support up to %u registrants\n", SPDK_NVMF_MAX_NUM_REGISTRANTS);
+ rc = -ERANGE;
+ goto exit;
+ }
+
+ rc = 0;
+ info->ptpl_activated = res.ptpl_activated;
+ info->rtype = res.rtype;
+ info->crkey = res.crkey;
+ snprintf(info->bdev_uuid, sizeof(info->bdev_uuid), "%s", res.bdev_uuid);
+ snprintf(info->holder_uuid, sizeof(info->holder_uuid), "%s", res.holder_uuid);
+ info->num_regs = res.regs.num_regs;
+ for (i = 0; i < res.regs.num_regs; i++) {
+ info->registrants[i].rkey = res.regs.reg[i].rkey;
+ snprintf(info->registrants[i].host_uuid, sizeof(info->registrants[i].host_uuid), "%s",
+ res.regs.reg[i].host_uuid);
+ }
+
+exit:
+ free(json);
+ free(values);
+ free(res.bdev_uuid);
+ free(res.holder_uuid);
+ for (i = 0; i < res.regs.num_regs; i++) {
+ free(res.regs.reg[i].host_uuid);
+ }
+
+ return rc;
+}
+
+static bool
+nvmf_ns_reservation_all_registrants_type(struct spdk_nvmf_ns *ns);
+
+static int
+nvmf_ns_reservation_restore(struct spdk_nvmf_ns *ns, struct spdk_nvmf_reservation_info *info)
+{
+ uint32_t i;
+ struct spdk_nvmf_registrant *reg, *holder = NULL;
+ struct spdk_uuid bdev_uuid, holder_uuid;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "NSID %u, PTPL %u, Number of registrants %u\n",
+ ns->nsid, info->ptpl_activated, info->num_regs);
+
+ /* it's not an error */
+ if (!info->ptpl_activated || !info->num_regs) {
+ return 0;
+ }
+
+ spdk_uuid_parse(&bdev_uuid, info->bdev_uuid);
+ if (spdk_uuid_compare(&bdev_uuid, spdk_bdev_get_uuid(ns->bdev))) {
+ SPDK_ERRLOG("Existing bdev UUID is not same with configuration file\n");
+ return -EINVAL;
+ }
+
+ ns->crkey = info->crkey;
+ ns->rtype = info->rtype;
+ ns->ptpl_activated = info->ptpl_activated;
+ spdk_uuid_parse(&holder_uuid, info->holder_uuid);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Bdev UUID %s\n", info->bdev_uuid);
+ if (info->rtype) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Holder UUID %s, RTYPE %u, RKEY 0x%"PRIx64"\n",
+ info->holder_uuid, info->rtype, info->crkey);
+ }
+
+ for (i = 0; i < info->num_regs; i++) {
+ reg = calloc(1, sizeof(*reg));
+ if (!reg) {
+ return -ENOMEM;
+ }
+ spdk_uuid_parse(&reg->hostid, info->registrants[i].host_uuid);
+ reg->rkey = info->registrants[i].rkey;
+ TAILQ_INSERT_TAIL(&ns->registrants, reg, link);
+ if (!spdk_uuid_compare(&holder_uuid, &reg->hostid)) {
+ holder = reg;
+ }
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Registrant RKEY 0x%"PRIx64", Host UUID %s\n",
+ info->registrants[i].rkey, info->registrants[i].host_uuid);
+ }
+
+ if (nvmf_ns_reservation_all_registrants_type(ns)) {
+ ns->holder = TAILQ_FIRST(&ns->registrants);
+ } else {
+ ns->holder = holder;
+ }
+
+ return 0;
+}
+
+static int
+nvmf_ns_json_write_cb(void *cb_ctx, const void *data, size_t size)
+{
+ char *file = cb_ctx;
+ size_t rc;
+ FILE *fd;
+
+ fd = fopen(file, "w");
+ if (!fd) {
+ SPDK_ERRLOG("Can't open file %s for write\n", file);
+ return -ENOENT;
+ }
+ rc = fwrite(data, 1, size, fd);
+ fclose(fd);
+
+ return rc == size ? 0 : -1;
+}
+
+static int
+nvmf_ns_reservation_update(const char *file, struct spdk_nvmf_reservation_info *info)
+{
+ struct spdk_json_write_ctx *w;
+ uint32_t i;
+ int rc = 0;
+
+ w = spdk_json_write_begin(nvmf_ns_json_write_cb, (void *)file, 0);
+ if (w == NULL) {
+ return -ENOMEM;
+ }
+ /* clear the configuration file */
+ if (!info->ptpl_activated) {
+ goto exit;
+ }
+
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_bool(w, "ptpl", info->ptpl_activated);
+ spdk_json_write_named_uint32(w, "rtype", info->rtype);
+ spdk_json_write_named_uint64(w, "crkey", info->crkey);
+ spdk_json_write_named_string(w, "bdev_uuid", info->bdev_uuid);
+ spdk_json_write_named_string(w, "holder_uuid", info->holder_uuid);
+
+ spdk_json_write_named_array_begin(w, "registrants");
+ for (i = 0; i < info->num_regs; i++) {
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_uint64(w, "rkey", info->registrants[i].rkey);
+ spdk_json_write_named_string(w, "host_uuid", info->registrants[i].host_uuid);
+ spdk_json_write_object_end(w);
+ }
+ spdk_json_write_array_end(w);
+ spdk_json_write_object_end(w);
+
+exit:
+ rc = spdk_json_write_end(w);
+ return rc;
+}
+
+static int
+nvmf_ns_update_reservation_info(struct spdk_nvmf_ns *ns)
+{
+ struct spdk_nvmf_reservation_info info;
+ struct spdk_nvmf_registrant *reg, *tmp;
+ uint32_t i = 0;
+
+ assert(ns != NULL);
+
+ if (!ns->bdev || !ns->ptpl_file) {
+ return 0;
+ }
+
+ memset(&info, 0, sizeof(info));
+ spdk_uuid_fmt_lower(info.bdev_uuid, sizeof(info.bdev_uuid), spdk_bdev_get_uuid(ns->bdev));
+
+ if (ns->rtype) {
+ info.rtype = ns->rtype;
+ info.crkey = ns->crkey;
+ if (!nvmf_ns_reservation_all_registrants_type(ns)) {
+ assert(ns->holder != NULL);
+ spdk_uuid_fmt_lower(info.holder_uuid, sizeof(info.holder_uuid), &ns->holder->hostid);
+ }
+ }
+
+ TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
+ spdk_uuid_fmt_lower(info.registrants[i].host_uuid, sizeof(info.registrants[i].host_uuid),
+ &reg->hostid);
+ info.registrants[i++].rkey = reg->rkey;
+ }
+
+ info.num_regs = i;
+ info.ptpl_activated = ns->ptpl_activated;
+
+ return nvmf_ns_reservation_update(ns->ptpl_file, &info);
+}
+
+static struct spdk_nvmf_registrant *
+nvmf_ns_reservation_get_registrant(struct spdk_nvmf_ns *ns,
+ struct spdk_uuid *uuid)
+{
+ struct spdk_nvmf_registrant *reg, *tmp;
+
+ TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
+ if (!spdk_uuid_compare(&reg->hostid, uuid)) {
+ return reg;
+ }
+ }
+
+ return NULL;
+}
+
+/* Generate reservation notice log to registered HostID controllers */
+static void
+nvmf_subsystem_gen_ctrlr_notification(struct spdk_nvmf_subsystem *subsystem,
+ struct spdk_nvmf_ns *ns,
+ struct spdk_uuid *hostid_list,
+ uint32_t num_hostid,
+ enum spdk_nvme_reservation_notification_log_page_type type)
+{
+ struct spdk_nvmf_ctrlr *ctrlr;
+ uint32_t i;
+
+ for (i = 0; i < num_hostid; i++) {
+ TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
+ if (!spdk_uuid_compare(&ctrlr->hostid, &hostid_list[i])) {
+ nvmf_ctrlr_reservation_notice_log(ctrlr, ns, type);
+ }
+ }
+ }
+}
+
+/* Get all registrants' hostid other than the controller who issued the command */
+static uint32_t
+nvmf_ns_reservation_get_all_other_hostid(struct spdk_nvmf_ns *ns,
+ struct spdk_uuid *hostid_list,
+ uint32_t max_num_hostid,
+ struct spdk_uuid *current_hostid)
+{
+ struct spdk_nvmf_registrant *reg, *tmp;
+ uint32_t num_hostid = 0;
+
+ TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
+ if (spdk_uuid_compare(&reg->hostid, current_hostid)) {
+ if (num_hostid == max_num_hostid) {
+ assert(false);
+ return max_num_hostid;
+ }
+ hostid_list[num_hostid++] = reg->hostid;
+ }
+ }
+
+ return num_hostid;
+}
+
+/* Calculate the unregistered HostID list according to list
+ * prior to execute preempt command and list after executing
+ * preempt command.
+ */
+static uint32_t
+nvmf_ns_reservation_get_unregistered_hostid(struct spdk_uuid *old_hostid_list,
+ uint32_t old_num_hostid,
+ struct spdk_uuid *remaining_hostid_list,
+ uint32_t remaining_num_hostid)
+{
+ struct spdk_uuid temp_hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
+ uint32_t i, j, num_hostid = 0;
+ bool found;
+
+ if (!remaining_num_hostid) {
+ return old_num_hostid;
+ }
+
+ for (i = 0; i < old_num_hostid; i++) {
+ found = false;
+ for (j = 0; j < remaining_num_hostid; j++) {
+ if (!spdk_uuid_compare(&old_hostid_list[i], &remaining_hostid_list[j])) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ spdk_uuid_copy(&temp_hostid_list[num_hostid++], &old_hostid_list[i]);
+ }
+ }
+
+ if (num_hostid) {
+ memcpy(old_hostid_list, temp_hostid_list, sizeof(struct spdk_uuid) * num_hostid);
+ }
+
+ return num_hostid;
+}
+
+/* current reservation type is all registrants or not */
+static bool
+nvmf_ns_reservation_all_registrants_type(struct spdk_nvmf_ns *ns)
+{
+ return (ns->rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_ALL_REGS ||
+ ns->rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS);
+}
+
+/* current registrant is reservation holder or not */
+static bool
+nvmf_ns_reservation_registrant_is_holder(struct spdk_nvmf_ns *ns,
+ struct spdk_nvmf_registrant *reg)
+{
+ if (!reg) {
+ return false;
+ }
+
+ if (nvmf_ns_reservation_all_registrants_type(ns)) {
+ return true;
+ }
+
+ return (ns->holder == reg);
+}
+
+static int
+nvmf_ns_reservation_add_registrant(struct spdk_nvmf_ns *ns,
+ struct spdk_nvmf_ctrlr *ctrlr,
+ uint64_t nrkey)
+{
+ struct spdk_nvmf_registrant *reg;
+
+ reg = calloc(1, sizeof(*reg));
+ if (!reg) {
+ return -ENOMEM;
+ }
+
+ reg->rkey = nrkey;
+ /* set hostid for the registrant */
+ spdk_uuid_copy(&reg->hostid, &ctrlr->hostid);
+ TAILQ_INSERT_TAIL(&ns->registrants, reg, link);
+ ns->gen++;
+
+ return 0;
+}
+
+static void
+nvmf_ns_reservation_release_reservation(struct spdk_nvmf_ns *ns)
+{
+ ns->rtype = 0;
+ ns->crkey = 0;
+ ns->holder = NULL;
+}
+
+/* release the reservation if the last registrant was removed */
+static void
+nvmf_ns_reservation_check_release_on_remove_registrant(struct spdk_nvmf_ns *ns,
+ struct spdk_nvmf_registrant *reg)
+{
+ struct spdk_nvmf_registrant *next_reg;
+
+ /* no reservation holder */
+ if (!ns->holder) {
+ assert(ns->rtype == 0);
+ return;
+ }
+
+ next_reg = TAILQ_FIRST(&ns->registrants);
+ if (next_reg && nvmf_ns_reservation_all_registrants_type(ns)) {
+ /* the next valid registrant is the new holder now */
+ ns->holder = next_reg;
+ } else if (nvmf_ns_reservation_registrant_is_holder(ns, reg)) {
+ /* release the reservation */
+ nvmf_ns_reservation_release_reservation(ns);
+ }
+}
+
+static void
+nvmf_ns_reservation_remove_registrant(struct spdk_nvmf_ns *ns,
+ struct spdk_nvmf_registrant *reg)
+{
+ TAILQ_REMOVE(&ns->registrants, reg, link);
+ nvmf_ns_reservation_check_release_on_remove_registrant(ns, reg);
+ free(reg);
+ ns->gen++;
+ return;
+}
+
+static uint32_t
+nvmf_ns_reservation_remove_registrants_by_key(struct spdk_nvmf_ns *ns,
+ uint64_t rkey)
+{
+ struct spdk_nvmf_registrant *reg, *tmp;
+ uint32_t count = 0;
+
+ TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
+ if (reg->rkey == rkey) {
+ nvmf_ns_reservation_remove_registrant(ns, reg);
+ count++;
+ }
+ }
+ return count;
+}
+
+static uint32_t
+nvmf_ns_reservation_remove_all_other_registrants(struct spdk_nvmf_ns *ns,
+ struct spdk_nvmf_registrant *reg)
+{
+ struct spdk_nvmf_registrant *reg_tmp, *reg_tmp2;
+ uint32_t count = 0;
+
+ TAILQ_FOREACH_SAFE(reg_tmp, &ns->registrants, link, reg_tmp2) {
+ if (reg_tmp != reg) {
+ nvmf_ns_reservation_remove_registrant(ns, reg_tmp);
+ count++;
+ }
+ }
+ return count;
+}
+
+static uint32_t
+nvmf_ns_reservation_clear_all_registrants(struct spdk_nvmf_ns *ns)
+{
+ struct spdk_nvmf_registrant *reg, *reg_tmp;
+ uint32_t count = 0;
+
+ TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, reg_tmp) {
+ nvmf_ns_reservation_remove_registrant(ns, reg);
+ count++;
+ }
+ return count;
+}
+
+static void
+nvmf_ns_reservation_acquire_reservation(struct spdk_nvmf_ns *ns, uint64_t rkey,
+ enum spdk_nvme_reservation_type rtype,
+ struct spdk_nvmf_registrant *holder)
+{
+ ns->rtype = rtype;
+ ns->crkey = rkey;
+ assert(ns->holder == NULL);
+ ns->holder = holder;
+}
+
+static bool
+nvmf_ns_reservation_register(struct spdk_nvmf_ns *ns,
+ struct spdk_nvmf_ctrlr *ctrlr,
+ struct spdk_nvmf_request *req)
+{
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ uint8_t rrega, iekey, cptpl, rtype;
+ struct spdk_nvme_reservation_register_data key;
+ struct spdk_nvmf_registrant *reg;
+ uint8_t status = SPDK_NVME_SC_SUCCESS;
+ bool update_sgroup = false;
+ struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
+ uint32_t num_hostid = 0;
+ int rc;
+
+ rrega = cmd->cdw10_bits.resv_register.rrega;
+ iekey = cmd->cdw10_bits.resv_register.iekey;
+ cptpl = cmd->cdw10_bits.resv_register.cptpl;
+
+ if (req->data && req->length >= sizeof(key)) {
+ memcpy(&key, req->data, sizeof(key));
+ } else {
+ SPDK_ERRLOG("No key provided. Failing request.\n");
+ status = SPDK_NVME_SC_INVALID_FIELD;
+ goto exit;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "REGISTER: RREGA %u, IEKEY %u, CPTPL %u, "
+ "NRKEY 0x%"PRIx64", NRKEY 0x%"PRIx64"\n",
+ rrega, iekey, cptpl, key.crkey, key.nrkey);
+
+ if (cptpl == SPDK_NVME_RESERVE_PTPL_CLEAR_POWER_ON) {
+ /* Ture to OFF state, and need to be updated in the configuration file */
+ if (ns->ptpl_activated) {
+ ns->ptpl_activated = 0;
+ update_sgroup = true;
+ }
+ } else if (cptpl == SPDK_NVME_RESERVE_PTPL_PERSIST_POWER_LOSS) {
+ if (ns->ptpl_file == NULL) {
+ status = SPDK_NVME_SC_INVALID_FIELD;
+ goto exit;
+ } else if (ns->ptpl_activated == 0) {
+ ns->ptpl_activated = 1;
+ update_sgroup = true;
+ }
+ }
+
+ /* current Host Identifier has registrant or not */
+ reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid);
+
+ switch (rrega) {
+ case SPDK_NVME_RESERVE_REGISTER_KEY:
+ if (!reg) {
+ /* register new controller */
+ if (key.nrkey == 0) {
+ SPDK_ERRLOG("Can't register zeroed new key\n");
+ status = SPDK_NVME_SC_INVALID_FIELD;
+ goto exit;
+ }
+ rc = nvmf_ns_reservation_add_registrant(ns, ctrlr, key.nrkey);
+ if (rc < 0) {
+ status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ goto exit;
+ }
+ update_sgroup = true;
+ } else {
+ /* register with same key is not an error */
+ if (reg->rkey != key.nrkey) {
+ SPDK_ERRLOG("The same host already register a "
+ "key with 0x%"PRIx64"\n",
+ reg->rkey);
+ status = SPDK_NVME_SC_RESERVATION_CONFLICT;
+ goto exit;
+ }
+ }
+ break;
+ case SPDK_NVME_RESERVE_UNREGISTER_KEY:
+ if (!reg || (!iekey && reg->rkey != key.crkey)) {
+ SPDK_ERRLOG("No registrant or current key doesn't match "
+ "with existing registrant key\n");
+ status = SPDK_NVME_SC_RESERVATION_CONFLICT;
+ goto exit;
+ }
+
+ rtype = ns->rtype;
+ num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list,
+ SPDK_NVMF_MAX_NUM_REGISTRANTS,
+ &ctrlr->hostid);
+
+ nvmf_ns_reservation_remove_registrant(ns, reg);
+
+ if (!ns->rtype && num_hostid && (rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_REG_ONLY ||
+ rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_REG_ONLY)) {
+ nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
+ hostid_list,
+ num_hostid,
+ SPDK_NVME_RESERVATION_RELEASED);
+ }
+ update_sgroup = true;
+ break;
+ case SPDK_NVME_RESERVE_REPLACE_KEY:
+ if (!reg || (!iekey && reg->rkey != key.crkey)) {
+ SPDK_ERRLOG("No registrant or current key doesn't match "
+ "with existing registrant key\n");
+ status = SPDK_NVME_SC_RESERVATION_CONFLICT;
+ goto exit;
+ }
+ if (key.nrkey == 0) {
+ SPDK_ERRLOG("Can't register zeroed new key\n");
+ status = SPDK_NVME_SC_INVALID_FIELD;
+ goto exit;
+ }
+ reg->rkey = key.nrkey;
+ update_sgroup = true;
+ break;
+ default:
+ status = SPDK_NVME_SC_INVALID_FIELD;
+ goto exit;
+ }
+
+exit:
+ if (update_sgroup) {
+ rc = nvmf_ns_update_reservation_info(ns);
+ if (rc != 0) {
+ status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ }
+ }
+ req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req->rsp->nvme_cpl.status.sc = status;
+ return update_sgroup;
+}
+
+static bool
+nvmf_ns_reservation_acquire(struct spdk_nvmf_ns *ns,
+ struct spdk_nvmf_ctrlr *ctrlr,
+ struct spdk_nvmf_request *req)
+{
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ uint8_t racqa, iekey, rtype;
+ struct spdk_nvme_reservation_acquire_data key;
+ struct spdk_nvmf_registrant *reg;
+ bool all_regs = false;
+ uint32_t count = 0;
+ bool update_sgroup = true;
+ struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
+ uint32_t num_hostid = 0;
+ struct spdk_uuid new_hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
+ uint32_t new_num_hostid = 0;
+ bool reservation_released = false;
+ uint8_t status = SPDK_NVME_SC_SUCCESS;
+
+ racqa = cmd->cdw10_bits.resv_acquire.racqa;
+ iekey = cmd->cdw10_bits.resv_acquire.iekey;
+ rtype = cmd->cdw10_bits.resv_acquire.rtype;
+
+ if (req->data && req->length >= sizeof(key)) {
+ memcpy(&key, req->data, sizeof(key));
+ } else {
+ SPDK_ERRLOG("No key provided. Failing request.\n");
+ status = SPDK_NVME_SC_INVALID_FIELD;
+ goto exit;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ACQUIRE: RACQA %u, IEKEY %u, RTYPE %u, "
+ "NRKEY 0x%"PRIx64", PRKEY 0x%"PRIx64"\n",
+ racqa, iekey, rtype, key.crkey, key.prkey);
+
+ if (iekey || rtype > SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS) {
+ SPDK_ERRLOG("Ignore existing key field set to 1\n");
+ status = SPDK_NVME_SC_INVALID_FIELD;
+ update_sgroup = false;
+ goto exit;
+ }
+
+ reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid);
+ /* must be registrant and CRKEY must match */
+ if (!reg || reg->rkey != key.crkey) {
+ SPDK_ERRLOG("No registrant or current key doesn't match "
+ "with existing registrant key\n");
+ status = SPDK_NVME_SC_RESERVATION_CONFLICT;
+ update_sgroup = false;
+ goto exit;
+ }
+
+ all_regs = nvmf_ns_reservation_all_registrants_type(ns);
+
+ switch (racqa) {
+ case SPDK_NVME_RESERVE_ACQUIRE:
+ /* it's not an error for the holder to acquire same reservation type again */
+ if (nvmf_ns_reservation_registrant_is_holder(ns, reg) && ns->rtype == rtype) {
+ /* do nothing */
+ update_sgroup = false;
+ } else if (ns->holder == NULL) {
+ /* fisrt time to acquire the reservation */
+ nvmf_ns_reservation_acquire_reservation(ns, key.crkey, rtype, reg);
+ } else {
+ SPDK_ERRLOG("Invalid rtype or current registrant is not holder\n");
+ status = SPDK_NVME_SC_RESERVATION_CONFLICT;
+ update_sgroup = false;
+ goto exit;
+ }
+ break;
+ case SPDK_NVME_RESERVE_PREEMPT:
+ /* no reservation holder */
+ if (!ns->holder) {
+ /* unregister with PRKEY */
+ nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey);
+ break;
+ }
+ num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list,
+ SPDK_NVMF_MAX_NUM_REGISTRANTS,
+ &ctrlr->hostid);
+
+ /* only 1 reservation holder and reservation key is valid */
+ if (!all_regs) {
+ /* preempt itself */
+ if (nvmf_ns_reservation_registrant_is_holder(ns, reg) &&
+ ns->crkey == key.prkey) {
+ ns->rtype = rtype;
+ reservation_released = true;
+ break;
+ }
+
+ if (ns->crkey == key.prkey) {
+ nvmf_ns_reservation_remove_registrant(ns, ns->holder);
+ nvmf_ns_reservation_acquire_reservation(ns, key.crkey, rtype, reg);
+ reservation_released = true;
+ } else if (key.prkey != 0) {
+ nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey);
+ } else {
+ /* PRKEY is zero */
+ SPDK_ERRLOG("Current PRKEY is zero\n");
+ status = SPDK_NVME_SC_RESERVATION_CONFLICT;
+ update_sgroup = false;
+ goto exit;
+ }
+ } else {
+ /* release all other registrants except for the current one */
+ if (key.prkey == 0) {
+ nvmf_ns_reservation_remove_all_other_registrants(ns, reg);
+ assert(ns->holder == reg);
+ } else {
+ count = nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey);
+ if (count == 0) {
+ SPDK_ERRLOG("PRKEY doesn't match any registrant\n");
+ status = SPDK_NVME_SC_RESERVATION_CONFLICT;
+ update_sgroup = false;
+ goto exit;
+ }
+ }
+ }
+ break;
+ default:
+ status = SPDK_NVME_SC_INVALID_FIELD;
+ update_sgroup = false;
+ break;
+ }
+
+exit:
+ if (update_sgroup && racqa == SPDK_NVME_RESERVE_PREEMPT) {
+ new_num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, new_hostid_list,
+ SPDK_NVMF_MAX_NUM_REGISTRANTS,
+ &ctrlr->hostid);
+ /* Preempt notification occurs on the unregistered controllers
+ * other than the controller who issued the command.
+ */
+ num_hostid = nvmf_ns_reservation_get_unregistered_hostid(hostid_list,
+ num_hostid,
+ new_hostid_list,
+ new_num_hostid);
+ if (num_hostid) {
+ nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
+ hostid_list,
+ num_hostid,
+ SPDK_NVME_REGISTRATION_PREEMPTED);
+
+ }
+ /* Reservation released notification occurs on the
+ * controllers which are the remaining registrants other than
+ * the controller who issued the command.
+ */
+ if (reservation_released && new_num_hostid) {
+ nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
+ new_hostid_list,
+ new_num_hostid,
+ SPDK_NVME_RESERVATION_RELEASED);
+
+ }
+ }
+ if (update_sgroup && ns->ptpl_activated) {
+ if (nvmf_ns_update_reservation_info(ns)) {
+ status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ }
+ }
+ req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req->rsp->nvme_cpl.status.sc = status;
+ return update_sgroup;
+}
+
+static bool
+nvmf_ns_reservation_release(struct spdk_nvmf_ns *ns,
+ struct spdk_nvmf_ctrlr *ctrlr,
+ struct spdk_nvmf_request *req)
+{
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ uint8_t rrela, iekey, rtype;
+ struct spdk_nvmf_registrant *reg;
+ uint64_t crkey;
+ uint8_t status = SPDK_NVME_SC_SUCCESS;
+ bool update_sgroup = true;
+ struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
+ uint32_t num_hostid = 0;
+
+ rrela = cmd->cdw10_bits.resv_release.rrela;
+ iekey = cmd->cdw10_bits.resv_release.iekey;
+ rtype = cmd->cdw10_bits.resv_release.rtype;
+
+ if (req->data && req->length >= sizeof(crkey)) {
+ memcpy(&crkey, req->data, sizeof(crkey));
+ } else {
+ SPDK_ERRLOG("No key provided. Failing request.\n");
+ status = SPDK_NVME_SC_INVALID_FIELD;
+ goto exit;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "RELEASE: RRELA %u, IEKEY %u, RTYPE %u, "
+ "CRKEY 0x%"PRIx64"\n", rrela, iekey, rtype, crkey);
+
+ if (iekey) {
+ SPDK_ERRLOG("Ignore existing key field set to 1\n");
+ status = SPDK_NVME_SC_INVALID_FIELD;
+ update_sgroup = false;
+ goto exit;
+ }
+
+ reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid);
+ if (!reg || reg->rkey != crkey) {
+ SPDK_ERRLOG("No registrant or current key doesn't match "
+ "with existing registrant key\n");
+ status = SPDK_NVME_SC_RESERVATION_CONFLICT;
+ update_sgroup = false;
+ goto exit;
+ }
+
+ num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list,
+ SPDK_NVMF_MAX_NUM_REGISTRANTS,
+ &ctrlr->hostid);
+
+ switch (rrela) {
+ case SPDK_NVME_RESERVE_RELEASE:
+ if (!ns->holder) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF, "RELEASE: no holder\n");
+ update_sgroup = false;
+ goto exit;
+ }
+ if (ns->rtype != rtype) {
+ SPDK_ERRLOG("Type doesn't match\n");
+ status = SPDK_NVME_SC_INVALID_FIELD;
+ update_sgroup = false;
+ goto exit;
+ }
+ if (!nvmf_ns_reservation_registrant_is_holder(ns, reg)) {
+ /* not the reservation holder, this isn't an error */
+ update_sgroup = false;
+ goto exit;
+ }
+
+ rtype = ns->rtype;
+ nvmf_ns_reservation_release_reservation(ns);
+
+ if (num_hostid && rtype != SPDK_NVME_RESERVE_WRITE_EXCLUSIVE &&
+ rtype != SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS) {
+ nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
+ hostid_list,
+ num_hostid,
+ SPDK_NVME_RESERVATION_RELEASED);
+ }
+ break;
+ case SPDK_NVME_RESERVE_CLEAR:
+ nvmf_ns_reservation_clear_all_registrants(ns);
+ if (num_hostid) {
+ nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
+ hostid_list,
+ num_hostid,
+ SPDK_NVME_RESERVATION_PREEMPTED);
+ }
+ break;
+ default:
+ status = SPDK_NVME_SC_INVALID_FIELD;
+ update_sgroup = false;
+ goto exit;
+ }
+
+exit:
+ if (update_sgroup && ns->ptpl_activated) {
+ if (nvmf_ns_update_reservation_info(ns)) {
+ status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ }
+ }
+ req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req->rsp->nvme_cpl.status.sc = status;
+ return update_sgroup;
+}
+
+static void
+nvmf_ns_reservation_report(struct spdk_nvmf_ns *ns,
+ struct spdk_nvmf_ctrlr *ctrlr,
+ struct spdk_nvmf_request *req)
+{
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
+ struct spdk_nvmf_ctrlr *ctrlr_tmp;
+ struct spdk_nvmf_registrant *reg, *tmp;
+ struct spdk_nvme_reservation_status_extended_data *status_data;
+ struct spdk_nvme_registered_ctrlr_extended_data *ctrlr_data;
+ uint8_t *payload;
+ uint32_t len, count = 0;
+ uint32_t regctl = 0;
+ uint8_t status = SPDK_NVME_SC_SUCCESS;
+
+ if (req->data == NULL) {
+ SPDK_ERRLOG("No data transfer specified for request. "
+ " Unable to transfer back response.\n");
+ status = SPDK_NVME_SC_INVALID_FIELD;
+ goto exit;
+ }
+
+ if (!cmd->cdw11_bits.resv_report.eds) {
+ SPDK_ERRLOG("NVMeoF uses extended controller data structure, "
+ "please set EDS bit in cdw11 and try again\n");
+ status = SPDK_NVME_SC_HOSTID_INCONSISTENT_FORMAT;
+ goto exit;
+ }
+
+ /* Get number of registerd controllers, one Host may have more than
+ * one controller based on different ports.
+ */
+ TAILQ_FOREACH(ctrlr_tmp, &subsystem->ctrlrs, link) {
+ reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr_tmp->hostid);
+ if (reg) {
+ regctl++;
+ }
+ }
+
+ len = sizeof(*status_data) + sizeof(*ctrlr_data) * regctl;
+ payload = calloc(1, len);
+ if (!payload) {
+ status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ goto exit;
+ }
+
+ status_data = (struct spdk_nvme_reservation_status_extended_data *)payload;
+ status_data->data.gen = ns->gen;
+ status_data->data.rtype = ns->rtype;
+ status_data->data.regctl = regctl;
+ status_data->data.ptpls = ns->ptpl_activated;
+
+ TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
+ assert(count <= regctl);
+ ctrlr_data = (struct spdk_nvme_registered_ctrlr_extended_data *)
+ (payload + sizeof(*status_data) + sizeof(*ctrlr_data) * count);
+ /* Set to 0xffffh for dynamic controller */
+ ctrlr_data->cntlid = 0xffff;
+ ctrlr_data->rcsts.status = (ns->holder == reg) ? true : false;
+ ctrlr_data->rkey = reg->rkey;
+ spdk_uuid_copy((struct spdk_uuid *)ctrlr_data->hostid, &reg->hostid);
+ count++;
+ }
+
+ memcpy(req->data, payload, spdk_min(len, (cmd->cdw10 + 1) * sizeof(uint32_t)));
+ free(payload);
+
+exit:
+ req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ req->rsp->nvme_cpl.status.sc = status;
+ return;
+}
+
+static void
+nvmf_ns_reservation_complete(void *ctx)
+{
+ struct spdk_nvmf_request *req = ctx;
+
+ spdk_nvmf_request_complete(req);
+}
+
+static void
+_nvmf_ns_reservation_update_done(struct spdk_nvmf_subsystem *subsystem,
+ void *cb_arg, int status)
+{
+ struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)cb_arg;
+ struct spdk_nvmf_poll_group *group = req->qpair->group;
+
+ spdk_thread_send_msg(group->thread, nvmf_ns_reservation_complete, req);
+}
+
+void
+nvmf_ns_reservation_request(void *ctx)
+{
+ struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)ctx;
+ struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+ struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+ struct subsystem_update_ns_ctx *update_ctx;
+ uint32_t nsid;
+ struct spdk_nvmf_ns *ns;
+ bool update_sgroup = false;
+
+ nsid = cmd->nsid;
+ ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
+ assert(ns != NULL);
+
+ switch (cmd->opc) {
+ case SPDK_NVME_OPC_RESERVATION_REGISTER:
+ update_sgroup = nvmf_ns_reservation_register(ns, ctrlr, req);
+ break;
+ case SPDK_NVME_OPC_RESERVATION_ACQUIRE:
+ update_sgroup = nvmf_ns_reservation_acquire(ns, ctrlr, req);
+ break;
+ case SPDK_NVME_OPC_RESERVATION_RELEASE:
+ update_sgroup = nvmf_ns_reservation_release(ns, ctrlr, req);
+ break;
+ case SPDK_NVME_OPC_RESERVATION_REPORT:
+ nvmf_ns_reservation_report(ns, ctrlr, req);
+ break;
+ default:
+ break;
+ }
+
+ /* update reservation information to subsystem's poll group */
+ if (update_sgroup) {
+ update_ctx = calloc(1, sizeof(*update_ctx));
+ if (update_ctx == NULL) {
+ SPDK_ERRLOG("Can't alloc subsystem poll group update context\n");
+ goto update_done;
+ }
+ update_ctx->subsystem = ctrlr->subsys;
+ update_ctx->cb_fn = _nvmf_ns_reservation_update_done;
+ update_ctx->cb_arg = req;
+
+ nvmf_subsystem_update_ns(ctrlr->subsys, subsystem_update_ns_done, update_ctx);
+ return;
+ }
+
+update_done:
+ _nvmf_ns_reservation_update_done(ctrlr->subsys, (void *)req, 0);
+}
diff --git a/src/spdk/lib/nvmf/tcp.c b/src/spdk/lib/nvmf/tcp.c
new file mode 100644
index 000000000..391d4bcf1
--- /dev/null
+++ b/src/spdk/lib/nvmf/tcp.c
@@ -0,0 +1,2631 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/crc32.h"
+#include "spdk/endian.h"
+#include "spdk/assert.h"
+#include "spdk/thread.h"
+#include "spdk/nvmf_transport.h"
+#include "spdk/sock.h"
+#include "spdk/string.h"
+#include "spdk/trace.h"
+#include "spdk/util.h"
+
+#include "spdk_internal/assert.h"
+#include "spdk_internal/log.h"
+#include "spdk_internal/nvme_tcp.h"
+
+#include "nvmf_internal.h"
+
+#define NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME 16
+#define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 6
+
+const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp;
+
+/* spdk nvmf related structure */
+enum spdk_nvmf_tcp_req_state {
+
+ /* The request is not currently in use */
+ TCP_REQUEST_STATE_FREE = 0,
+
+ /* Initial state when request first received */
+ TCP_REQUEST_STATE_NEW,
+
+ /* The request is queued until a data buffer is available. */
+ TCP_REQUEST_STATE_NEED_BUFFER,
+
+ /* The request is currently transferring data from the host to the controller. */
+ TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
+
+ /* The request is waiting for the R2T send acknowledgement. */
+ TCP_REQUEST_STATE_AWAITING_R2T_ACK,
+
+ /* The request is ready to execute at the block device */
+ TCP_REQUEST_STATE_READY_TO_EXECUTE,
+
+ /* The request is currently executing at the block device */
+ TCP_REQUEST_STATE_EXECUTING,
+
+ /* The request finished executing at the block device */
+ TCP_REQUEST_STATE_EXECUTED,
+
+ /* The request is ready to send a completion */
+ TCP_REQUEST_STATE_READY_TO_COMPLETE,
+
+ /* The request is currently transferring final pdus from the controller to the host. */
+ TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
+
+ /* The request completed and can be marked free. */
+ TCP_REQUEST_STATE_COMPLETED,
+
+ /* Terminator */
+ TCP_REQUEST_NUM_STATES,
+};
+
+static const char *spdk_nvmf_tcp_term_req_fes_str[] = {
+ "Invalid PDU Header Field",
+ "PDU Sequence Error",
+ "Header Digiest Error",
+ "Data Transfer Out of Range",
+ "R2T Limit Exceeded",
+ "Unsupported parameter",
+};
+
+#define OBJECT_NVMF_TCP_IO 0x80
+
+#define TRACE_GROUP_NVMF_TCP 0x5
+#define TRACE_TCP_REQUEST_STATE_NEW SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x0)
+#define TRACE_TCP_REQUEST_STATE_NEED_BUFFER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x1)
+#define TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x2)
+#define TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x3)
+#define TRACE_TCP_REQUEST_STATE_EXECUTING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x4)
+#define TRACE_TCP_REQUEST_STATE_EXECUTED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x5)
+#define TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x6)
+#define TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x7)
+#define TRACE_TCP_REQUEST_STATE_COMPLETED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x8)
+#define TRACE_TCP_FLUSH_WRITEBUF_START SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x9)
+#define TRACE_TCP_FLUSH_WRITEBUF_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xA)
+#define TRACE_TCP_READ_FROM_SOCKET_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xB)
+#define TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xC)
+
+SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP)
+{
+ spdk_trace_register_object(OBJECT_NVMF_TCP_IO, 'r');
+ spdk_trace_register_description("TCP_REQ_NEW",
+ TRACE_TCP_REQUEST_STATE_NEW,
+ OWNER_NONE, OBJECT_NVMF_TCP_IO, 1, 1, "");
+ spdk_trace_register_description("TCP_REQ_NEED_BUFFER",
+ TRACE_TCP_REQUEST_STATE_NEED_BUFFER,
+ OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
+ spdk_trace_register_description("TCP_REQ_TX_H_TO_C",
+ TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
+ OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
+ spdk_trace_register_description("TCP_REQ_RDY_TO_EXECUTE",
+ TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE,
+ OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
+ spdk_trace_register_description("TCP_REQ_EXECUTING",
+ TRACE_TCP_REQUEST_STATE_EXECUTING,
+ OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
+ spdk_trace_register_description("TCP_REQ_EXECUTED",
+ TRACE_TCP_REQUEST_STATE_EXECUTED,
+ OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
+ spdk_trace_register_description("TCP_REQ_RDY_TO_COMPLETE",
+ TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE,
+ OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
+ spdk_trace_register_description("TCP_REQ_TRANSFER_C2H",
+ TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
+ OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
+ spdk_trace_register_description("TCP_REQ_COMPLETED",
+ TRACE_TCP_REQUEST_STATE_COMPLETED,
+ OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
+ spdk_trace_register_description("TCP_WRITE_START",
+ TRACE_TCP_FLUSH_WRITEBUF_START,
+ OWNER_NONE, OBJECT_NONE, 0, 0, "");
+ spdk_trace_register_description("TCP_WRITE_DONE",
+ TRACE_TCP_FLUSH_WRITEBUF_DONE,
+ OWNER_NONE, OBJECT_NONE, 0, 0, "");
+ spdk_trace_register_description("TCP_READ_DONE",
+ TRACE_TCP_READ_FROM_SOCKET_DONE,
+ OWNER_NONE, OBJECT_NONE, 0, 0, "");
+ spdk_trace_register_description("TCP_REQ_AWAIT_R2T_ACK",
+ TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK,
+ OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
+}
+
+struct spdk_nvmf_tcp_req {
+ struct spdk_nvmf_request req;
+ struct spdk_nvme_cpl rsp;
+ struct spdk_nvme_cmd cmd;
+
+ /* A PDU that can be used for sending responses. This is
+ * not the incoming PDU! */
+ struct nvme_tcp_pdu *pdu;
+
+ /*
+ * The PDU for a request may be used multiple times in serial over
+ * the request's lifetime. For example, first to send an R2T, then
+ * to send a completion. To catch mistakes where the PDU is used
+ * twice at the same time, add a debug flag here for init/fini.
+ */
+ bool pdu_in_use;
+
+ /* In-capsule data buffer */
+ uint8_t *buf;
+
+ bool has_incapsule_data;
+
+ /* transfer_tag */
+ uint16_t ttag;
+
+ enum spdk_nvmf_tcp_req_state state;
+
+ /*
+ * h2c_offset is used when we receive the h2c_data PDU.
+ */
+ uint32_t h2c_offset;
+
+ STAILQ_ENTRY(spdk_nvmf_tcp_req) link;
+ TAILQ_ENTRY(spdk_nvmf_tcp_req) state_link;
+};
+
+struct spdk_nvmf_tcp_qpair {
+ struct spdk_nvmf_qpair qpair;
+ struct spdk_nvmf_tcp_poll_group *group;
+ struct spdk_nvmf_tcp_port *port;
+ struct spdk_sock *sock;
+
+ enum nvme_tcp_pdu_recv_state recv_state;
+ enum nvme_tcp_qpair_state state;
+
+ /* PDU being actively received */
+ struct nvme_tcp_pdu pdu_in_progress;
+ uint32_t recv_buf_size;
+
+ /* This is a spare PDU used for sending special management
+ * operations. Primarily, this is used for the initial
+ * connection response and c2h termination request. */
+ struct nvme_tcp_pdu mgmt_pdu;
+
+ TAILQ_HEAD(, nvme_tcp_pdu) send_queue;
+
+ /* Arrays of in-capsule buffers, requests, and pdus.
+ * Each array is 'resource_count' number of elements */
+ void *bufs;
+ struct spdk_nvmf_tcp_req *reqs;
+ struct nvme_tcp_pdu *pdus;
+ uint32_t resource_count;
+
+ /* Queues to track the requests in all states */
+ TAILQ_HEAD(, spdk_nvmf_tcp_req) state_queue[TCP_REQUEST_NUM_STATES];
+ /* Number of requests in each state */
+ uint32_t state_cntr[TCP_REQUEST_NUM_STATES];
+
+ uint8_t cpda;
+
+ bool host_hdgst_enable;
+ bool host_ddgst_enable;
+
+ /* IP address */
+ char initiator_addr[SPDK_NVMF_TRADDR_MAX_LEN];
+ char target_addr[SPDK_NVMF_TRADDR_MAX_LEN];
+
+ /* IP port */
+ uint16_t initiator_port;
+ uint16_t target_port;
+
+ /* Timer used to destroy qpair after detecting transport error issue if initiator does
+ * not close the connection.
+ */
+ struct spdk_poller *timeout_poller;
+
+ TAILQ_ENTRY(spdk_nvmf_tcp_qpair) link;
+};
+
+struct spdk_nvmf_tcp_poll_group {
+ struct spdk_nvmf_transport_poll_group group;
+ struct spdk_sock_group *sock_group;
+
+ TAILQ_HEAD(, spdk_nvmf_tcp_qpair) qpairs;
+ TAILQ_HEAD(, spdk_nvmf_tcp_qpair) await_req;
+};
+
+struct spdk_nvmf_tcp_port {
+ const struct spdk_nvme_transport_id *trid;
+ struct spdk_sock *listen_sock;
+ TAILQ_ENTRY(spdk_nvmf_tcp_port) link;
+};
+
+struct spdk_nvmf_tcp_transport {
+ struct spdk_nvmf_transport transport;
+
+ pthread_mutex_t lock;
+
+ TAILQ_HEAD(, spdk_nvmf_tcp_port) ports;
+};
+
+static bool nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
+ struct spdk_nvmf_tcp_req *tcp_req);
+
+static void
+nvmf_tcp_req_set_state(struct spdk_nvmf_tcp_req *tcp_req,
+ enum spdk_nvmf_tcp_req_state state)
+{
+ struct spdk_nvmf_qpair *qpair;
+ struct spdk_nvmf_tcp_qpair *tqpair;
+
+ qpair = tcp_req->req.qpair;
+ tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
+
+ TAILQ_REMOVE(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
+ assert(tqpair->state_cntr[tcp_req->state] > 0);
+ tqpair->state_cntr[tcp_req->state]--;
+
+ TAILQ_INSERT_TAIL(&tqpair->state_queue[state], tcp_req, state_link);
+ tqpair->state_cntr[state]++;
+
+ tcp_req->state = state;
+}
+
+static inline struct nvme_tcp_pdu *
+nvmf_tcp_req_pdu_init(struct spdk_nvmf_tcp_req *tcp_req)
+{
+ assert(tcp_req->pdu_in_use == false);
+ tcp_req->pdu_in_use = true;
+
+ memset(tcp_req->pdu, 0, sizeof(*tcp_req->pdu));
+ tcp_req->pdu->qpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
+
+ return tcp_req->pdu;
+}
+
+static inline void
+nvmf_tcp_req_pdu_fini(struct spdk_nvmf_tcp_req *tcp_req)
+{
+ tcp_req->pdu_in_use = false;
+}
+
+static struct spdk_nvmf_tcp_req *
+nvmf_tcp_req_get(struct spdk_nvmf_tcp_qpair *tqpair)
+{
+ struct spdk_nvmf_tcp_req *tcp_req;
+
+ tcp_req = TAILQ_FIRST(&tqpair->state_queue[TCP_REQUEST_STATE_FREE]);
+ if (!tcp_req) {
+ return NULL;
+ }
+
+ memset(&tcp_req->rsp, 0, sizeof(tcp_req->rsp));
+ tcp_req->h2c_offset = 0;
+ tcp_req->has_incapsule_data = false;
+ tcp_req->req.dif.dif_insert_or_strip = false;
+
+ nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW);
+ return tcp_req;
+}
+
+static void
+nvmf_tcp_request_free(struct spdk_nvmf_tcp_req *tcp_req)
+{
+ struct spdk_nvmf_tcp_transport *ttransport;
+
+ assert(tcp_req != NULL);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req=%p will be freed\n", tcp_req);
+ ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport,
+ struct spdk_nvmf_tcp_transport, transport);
+ nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED);
+ nvmf_tcp_req_process(ttransport, tcp_req);
+}
+
+static int
+nvmf_tcp_req_free(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_tcp_req *tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
+
+ nvmf_tcp_request_free(tcp_req);
+
+ return 0;
+}
+
+static void
+nvmf_tcp_drain_state_queue(struct spdk_nvmf_tcp_qpair *tqpair,
+ enum spdk_nvmf_tcp_req_state state)
+{
+ struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
+
+ TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[state], state_link, req_tmp) {
+ nvmf_tcp_request_free(tcp_req);
+ }
+}
+
+static void
+nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair)
+{
+ struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
+
+ assert(TAILQ_EMPTY(&tqpair->send_queue));
+
+ nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
+ nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEW);
+
+ /* Wipe the requests waiting for buffer from the global list */
+ TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_NEED_BUFFER], state_link,
+ req_tmp) {
+ STAILQ_REMOVE(&tqpair->group->group.pending_buf_queue, &tcp_req->req,
+ spdk_nvmf_request, buf_link);
+ }
+
+ nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER);
+ nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING);
+ nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
+ nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK);
+}
+
+static void
+nvmf_tcp_dump_qpair_req_contents(struct spdk_nvmf_tcp_qpair *tqpair)
+{
+ int i;
+ struct spdk_nvmf_tcp_req *tcp_req;
+
+ SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", tqpair->qpair.qid);
+ for (i = 1; i < TCP_REQUEST_NUM_STATES; i++) {
+ SPDK_ERRLOG("\tNum of requests in state[%d] = %u\n", i, tqpair->state_cntr[i]);
+ TAILQ_FOREACH(tcp_req, &tqpair->state_queue[i], state_link) {
+ SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", tcp_req->req.data_from_pool);
+ SPDK_ERRLOG("\t\tRequest opcode: %d\n", tcp_req->req.cmd->nvmf_cmd.opcode);
+ }
+ }
+}
+
+static void
+nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair)
+{
+ int err = 0;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
+
+ err = spdk_sock_close(&tqpair->sock);
+ assert(err == 0);
+ nvmf_tcp_cleanup_all_states(tqpair);
+
+ if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->resource_count) {
+ SPDK_ERRLOG("tqpair(%p) free tcp request num is %u but should be %u\n", tqpair,
+ tqpair->state_cntr[TCP_REQUEST_STATE_FREE],
+ tqpair->resource_count);
+ err++;
+ }
+
+ if (err > 0) {
+ nvmf_tcp_dump_qpair_req_contents(tqpair);
+ }
+
+ spdk_dma_free(tqpair->pdus);
+ free(tqpair->reqs);
+ spdk_free(tqpair->bufs);
+ free(tqpair);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Leave\n");
+}
+
+static int
+nvmf_tcp_destroy(struct spdk_nvmf_transport *transport)
+{
+ struct spdk_nvmf_tcp_transport *ttransport;
+
+ assert(transport != NULL);
+ ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
+
+ pthread_mutex_destroy(&ttransport->lock);
+ free(ttransport);
+ return 0;
+}
+
+static struct spdk_nvmf_transport *
+nvmf_tcp_create(struct spdk_nvmf_transport_opts *opts)
+{
+ struct spdk_nvmf_tcp_transport *ttransport;
+ uint32_t sge_count;
+ uint32_t min_shared_buffers;
+
+ ttransport = calloc(1, sizeof(*ttransport));
+ if (!ttransport) {
+ return NULL;
+ }
+
+ TAILQ_INIT(&ttransport->ports);
+
+ ttransport->transport.ops = &spdk_nvmf_transport_tcp;
+
+ SPDK_NOTICELOG("*** TCP Transport Init ***\n");
+
+ SPDK_INFOLOG(SPDK_LOG_NVMF_TCP, "*** TCP Transport Init ***\n"
+ " Transport opts: max_ioq_depth=%d, max_io_size=%d,\n"
+ " max_io_qpairs_per_ctrlr=%d, io_unit_size=%d,\n"
+ " in_capsule_data_size=%d, max_aq_depth=%d\n"
+ " num_shared_buffers=%d, c2h_success=%d,\n"
+ " dif_insert_or_strip=%d, sock_priority=%d\n"
+ " abort_timeout_sec=%d\n",
+ opts->max_queue_depth,
+ opts->max_io_size,
+ opts->max_qpairs_per_ctrlr - 1,
+ opts->io_unit_size,
+ opts->in_capsule_data_size,
+ opts->max_aq_depth,
+ opts->num_shared_buffers,
+ opts->c2h_success,
+ opts->dif_insert_or_strip,
+ opts->sock_priority,
+ opts->abort_timeout_sec);
+
+ if (opts->sock_priority > SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY) {
+ SPDK_ERRLOG("Unsupported socket_priority=%d, the current range is: 0 to %d\n"
+ "you can use man 7 socket to view the range of priority under SO_PRIORITY item\n",
+ opts->sock_priority, SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY);
+ free(ttransport);
+ return NULL;
+ }
+
+ /* I/O unit size cannot be larger than max I/O size */
+ if (opts->io_unit_size > opts->max_io_size) {
+ opts->io_unit_size = opts->max_io_size;
+ }
+
+ sge_count = opts->max_io_size / opts->io_unit_size;
+ if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) {
+ SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size);
+ free(ttransport);
+ return NULL;
+ }
+
+ min_shared_buffers = spdk_thread_get_count() * opts->buf_cache_size;
+ if (min_shared_buffers > opts->num_shared_buffers) {
+ SPDK_ERRLOG("There are not enough buffers to satisfy"
+ "per-poll group caches for each thread. (%" PRIu32 ")"
+ "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers);
+ SPDK_ERRLOG("Please specify a larger number of shared buffers\n");
+ nvmf_tcp_destroy(&ttransport->transport);
+ return NULL;
+ }
+
+ pthread_mutex_init(&ttransport->lock, NULL);
+
+ return &ttransport->transport;
+}
+
+static int
+nvmf_tcp_trsvcid_to_int(const char *trsvcid)
+{
+ unsigned long long ull;
+ char *end = NULL;
+
+ ull = strtoull(trsvcid, &end, 10);
+ if (end == NULL || end == trsvcid || *end != '\0') {
+ return -1;
+ }
+
+ /* Valid TCP/IP port numbers are in [0, 65535] */
+ if (ull > 65535) {
+ return -1;
+ }
+
+ return (int)ull;
+}
+
+/**
+ * Canonicalize a listen address trid.
+ */
+static int
+nvmf_tcp_canon_listen_trid(struct spdk_nvme_transport_id *canon_trid,
+ const struct spdk_nvme_transport_id *trid)
+{
+ int trsvcid_int;
+
+ trsvcid_int = nvmf_tcp_trsvcid_to_int(trid->trsvcid);
+ if (trsvcid_int < 0) {
+ return -EINVAL;
+ }
+
+ memset(canon_trid, 0, sizeof(*canon_trid));
+ spdk_nvme_trid_populate_transport(canon_trid, SPDK_NVME_TRANSPORT_TCP);
+ canon_trid->adrfam = trid->adrfam;
+ snprintf(canon_trid->traddr, sizeof(canon_trid->traddr), "%s", trid->traddr);
+ snprintf(canon_trid->trsvcid, sizeof(canon_trid->trsvcid), "%d", trsvcid_int);
+
+ return 0;
+}
+
+/**
+ * Find an existing listening port.
+ *
+ * Caller must hold ttransport->lock.
+ */
+static struct spdk_nvmf_tcp_port *
+nvmf_tcp_find_port(struct spdk_nvmf_tcp_transport *ttransport,
+ const struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvme_transport_id canon_trid;
+ struct spdk_nvmf_tcp_port *port;
+
+ if (nvmf_tcp_canon_listen_trid(&canon_trid, trid) != 0) {
+ return NULL;
+ }
+
+ TAILQ_FOREACH(port, &ttransport->ports, link) {
+ if (spdk_nvme_transport_id_compare(&canon_trid, port->trid) == 0) {
+ return port;
+ }
+ }
+
+ return NULL;
+}
+
+static int
+nvmf_tcp_listen(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_tcp_transport *ttransport;
+ struct spdk_nvmf_tcp_port *port;
+ int trsvcid_int;
+ uint8_t adrfam;
+ struct spdk_sock_opts opts;
+
+ ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
+
+ trsvcid_int = nvmf_tcp_trsvcid_to_int(trid->trsvcid);
+ if (trsvcid_int < 0) {
+ SPDK_ERRLOG("Invalid trsvcid '%s'\n", trid->trsvcid);
+ return -EINVAL;
+ }
+
+ pthread_mutex_lock(&ttransport->lock);
+ port = calloc(1, sizeof(*port));
+ if (!port) {
+ SPDK_ERRLOG("Port allocation failed\n");
+ pthread_mutex_unlock(&ttransport->lock);
+ return -ENOMEM;
+ }
+
+ port->trid = trid;
+ opts.opts_size = sizeof(opts);
+ spdk_sock_get_default_opts(&opts);
+ opts.priority = transport->opts.sock_priority;
+ port->listen_sock = spdk_sock_listen_ext(trid->traddr, trsvcid_int,
+ NULL, &opts);
+ if (port->listen_sock == NULL) {
+ SPDK_ERRLOG("spdk_sock_listen(%s, %d) failed: %s (%d)\n",
+ trid->traddr, trsvcid_int,
+ spdk_strerror(errno), errno);
+ free(port);
+ pthread_mutex_unlock(&ttransport->lock);
+ return -errno;
+ }
+
+ if (spdk_sock_is_ipv4(port->listen_sock)) {
+ adrfam = SPDK_NVMF_ADRFAM_IPV4;
+ } else if (spdk_sock_is_ipv6(port->listen_sock)) {
+ adrfam = SPDK_NVMF_ADRFAM_IPV6;
+ } else {
+ SPDK_ERRLOG("Unhandled socket type\n");
+ adrfam = 0;
+ }
+
+ if (adrfam != trid->adrfam) {
+ SPDK_ERRLOG("Socket address family mismatch\n");
+ spdk_sock_close(&port->listen_sock);
+ free(port);
+ pthread_mutex_unlock(&ttransport->lock);
+ return -EINVAL;
+ }
+
+ SPDK_NOTICELOG("*** NVMe/TCP Target Listening on %s port %s ***\n",
+ trid->traddr, trid->trsvcid);
+
+ TAILQ_INSERT_TAIL(&ttransport->ports, port, link);
+ pthread_mutex_unlock(&ttransport->lock);
+ return 0;
+}
+
+static void
+nvmf_tcp_stop_listen(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_tcp_transport *ttransport;
+ struct spdk_nvmf_tcp_port *port;
+
+ ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Removing listen address %s port %s\n",
+ trid->traddr, trid->trsvcid);
+
+ pthread_mutex_lock(&ttransport->lock);
+ port = nvmf_tcp_find_port(ttransport, trid);
+ if (port) {
+ TAILQ_REMOVE(&ttransport->ports, port, link);
+ spdk_sock_close(&port->listen_sock);
+ free(port);
+ }
+
+ pthread_mutex_unlock(&ttransport->lock);
+}
+
+static void nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair,
+ enum nvme_tcp_pdu_recv_state state);
+
+static void
+nvmf_tcp_qpair_disconnect(struct spdk_nvmf_tcp_qpair *tqpair)
+{
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Disconnecting qpair %p\n", tqpair);
+
+ if (tqpair->state <= NVME_TCP_QPAIR_STATE_RUNNING) {
+ tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
+ nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
+ spdk_poller_unregister(&tqpair->timeout_poller);
+
+ /* This will end up calling nvmf_tcp_close_qpair */
+ spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL);
+ }
+}
+
+static void
+_pdu_write_done(void *_pdu, int err)
+{
+ struct nvme_tcp_pdu *pdu = _pdu;
+ struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair;
+
+ TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
+
+ if (err != 0) {
+ nvmf_tcp_qpair_disconnect(tqpair);
+ return;
+ }
+
+ assert(pdu->cb_fn != NULL);
+ pdu->cb_fn(pdu->cb_arg);
+}
+
+static void
+nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
+ struct nvme_tcp_pdu *pdu,
+ nvme_tcp_qpair_xfer_complete_cb cb_fn,
+ void *cb_arg)
+{
+ int hlen;
+ uint32_t crc32c;
+ uint32_t mapped_length = 0;
+ ssize_t rc;
+
+ assert(&tqpair->pdu_in_progress != pdu);
+
+ hlen = pdu->hdr.common.hlen;
+
+ /* Header Digest */
+ if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->host_hdgst_enable) {
+ crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
+ MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c);
+ }
+
+ /* Data Digest */
+ if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && tqpair->host_ddgst_enable) {
+ crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
+ MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
+ }
+
+ pdu->cb_fn = cb_fn;
+ pdu->cb_arg = cb_arg;
+
+ pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, SPDK_COUNTOF(pdu->iov), pdu,
+ tqpair->host_hdgst_enable, tqpair->host_ddgst_enable,
+ &mapped_length);
+ pdu->sock_req.cb_fn = _pdu_write_done;
+ pdu->sock_req.cb_arg = pdu;
+ TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq);
+ if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP ||
+ pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ) {
+ rc = spdk_sock_writev(tqpair->sock, pdu->iov, pdu->sock_req.iovcnt);
+ if (rc == mapped_length) {
+ _pdu_write_done(pdu, 0);
+ } else {
+ SPDK_ERRLOG("IC_RESP or TERM_REQ could not write to socket.\n");
+ _pdu_write_done(pdu, -1);
+ }
+ } else {
+ spdk_sock_writev_async(tqpair->sock, &pdu->sock_req);
+ }
+}
+
+static int
+nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair)
+{
+ uint32_t i;
+ struct spdk_nvmf_transport_opts *opts;
+ uint32_t in_capsule_data_size;
+
+ opts = &tqpair->qpair.transport->opts;
+
+ in_capsule_data_size = opts->in_capsule_data_size;
+ if (opts->dif_insert_or_strip) {
+ in_capsule_data_size = SPDK_BDEV_BUF_SIZE_WITH_MD(in_capsule_data_size);
+ }
+
+ tqpair->resource_count = opts->max_queue_depth;
+
+ tqpair->mgmt_pdu.qpair = tqpair;
+
+ tqpair->reqs = calloc(tqpair->resource_count, sizeof(*tqpair->reqs));
+ if (!tqpair->reqs) {
+ SPDK_ERRLOG("Unable to allocate reqs on tqpair=%p\n", tqpair);
+ return -1;
+ }
+
+ if (in_capsule_data_size) {
+ tqpair->bufs = spdk_zmalloc(tqpair->resource_count * in_capsule_data_size, 0x1000,
+ NULL, SPDK_ENV_LCORE_ID_ANY,
+ SPDK_MALLOC_DMA);
+ if (!tqpair->bufs) {
+ SPDK_ERRLOG("Unable to allocate bufs on tqpair=%p.\n", tqpair);
+ return -1;
+ }
+ }
+
+ tqpair->pdus = spdk_dma_malloc(tqpair->resource_count * sizeof(*tqpair->pdus), 0x1000, NULL);
+ if (!tqpair->pdus) {
+ SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair);
+ return -1;
+ }
+
+ for (i = 0; i < tqpair->resource_count; i++) {
+ struct spdk_nvmf_tcp_req *tcp_req = &tqpair->reqs[i];
+
+ tcp_req->ttag = i + 1;
+ tcp_req->req.qpair = &tqpair->qpair;
+
+ tcp_req->pdu = &tqpair->pdus[i];
+ tcp_req->pdu->qpair = tqpair;
+
+ /* Set up memory to receive commands */
+ if (tqpair->bufs) {
+ tcp_req->buf = (void *)((uintptr_t)tqpair->bufs + (i * in_capsule_data_size));
+ }
+
+ /* Set the cmdn and rsp */
+ tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp;
+ tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd;
+
+ /* Initialize request state to FREE */
+ tcp_req->state = TCP_REQUEST_STATE_FREE;
+ TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
+ tqpair->state_cntr[TCP_REQUEST_STATE_FREE]++;
+ }
+
+ tqpair->recv_buf_size = (in_capsule_data_size + sizeof(struct spdk_nvme_tcp_cmd) + 2 *
+ SPDK_NVME_TCP_DIGEST_LEN) * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
+
+ return 0;
+}
+
+static int
+nvmf_tcp_qpair_init(struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_tcp_qpair *tqpair;
+ int i;
+
+ tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New TCP Connection: %p\n", qpair);
+
+ TAILQ_INIT(&tqpair->send_queue);
+
+ /* Initialise request state queues of the qpair */
+ for (i = TCP_REQUEST_STATE_FREE; i < TCP_REQUEST_NUM_STATES; i++) {
+ TAILQ_INIT(&tqpair->state_queue[i]);
+ }
+
+ tqpair->host_hdgst_enable = true;
+ tqpair->host_ddgst_enable = true;
+
+ return 0;
+}
+
+static int
+nvmf_tcp_qpair_sock_init(struct spdk_nvmf_tcp_qpair *tqpair)
+{
+ int rc;
+
+ /* set low water mark */
+ rc = spdk_sock_set_recvlowat(tqpair->sock, sizeof(struct spdk_nvme_tcp_common_pdu_hdr));
+ if (rc != 0) {
+ SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n");
+ return rc;
+ }
+
+ return 0;
+}
+
+static void
+nvmf_tcp_handle_connect(struct spdk_nvmf_transport *transport,
+ struct spdk_nvmf_tcp_port *port,
+ struct spdk_sock *sock)
+{
+ struct spdk_nvmf_tcp_qpair *tqpair;
+ int rc;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New connection accepted on %s port %s\n",
+ port->trid->traddr, port->trid->trsvcid);
+
+ tqpair = calloc(1, sizeof(struct spdk_nvmf_tcp_qpair));
+ if (tqpair == NULL) {
+ SPDK_ERRLOG("Could not allocate new connection.\n");
+ spdk_sock_close(&sock);
+ return;
+ }
+
+ tqpair->sock = sock;
+ tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = 0;
+ tqpair->port = port;
+ tqpair->qpair.transport = transport;
+
+ rc = spdk_sock_getaddr(tqpair->sock, tqpair->target_addr,
+ sizeof(tqpair->target_addr), &tqpair->target_port,
+ tqpair->initiator_addr, sizeof(tqpair->initiator_addr),
+ &tqpair->initiator_port);
+ if (rc < 0) {
+ SPDK_ERRLOG("spdk_sock_getaddr() failed of tqpair=%p\n", tqpair);
+ nvmf_tcp_qpair_destroy(tqpair);
+ return;
+ }
+
+ spdk_nvmf_tgt_new_qpair(transport->tgt, &tqpair->qpair);
+}
+
+static uint32_t
+nvmf_tcp_port_accept(struct spdk_nvmf_transport *transport, struct spdk_nvmf_tcp_port *port)
+{
+ struct spdk_sock *sock;
+ uint32_t count = 0;
+ int i;
+
+ for (i = 0; i < NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME; i++) {
+ sock = spdk_sock_accept(port->listen_sock);
+ if (sock == NULL) {
+ break;
+ }
+ count++;
+ nvmf_tcp_handle_connect(transport, port, sock);
+ }
+
+ return count;
+}
+
+static uint32_t
+nvmf_tcp_accept(struct spdk_nvmf_transport *transport)
+{
+ struct spdk_nvmf_tcp_transport *ttransport;
+ struct spdk_nvmf_tcp_port *port;
+ uint32_t count = 0;
+
+ ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
+
+ TAILQ_FOREACH(port, &ttransport->ports, link) {
+ count += nvmf_tcp_port_accept(transport, port);
+ }
+
+ return count;
+}
+
+static void
+nvmf_tcp_discover(struct spdk_nvmf_transport *transport,
+ struct spdk_nvme_transport_id *trid,
+ struct spdk_nvmf_discovery_log_page_entry *entry)
+{
+ entry->trtype = SPDK_NVMF_TRTYPE_TCP;
+ entry->adrfam = trid->adrfam;
+ entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED;
+
+ spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' ');
+ spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' ');
+
+ entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE;
+}
+
+static struct spdk_nvmf_transport_poll_group *
+nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport)
+{
+ struct spdk_nvmf_tcp_poll_group *tgroup;
+
+ tgroup = calloc(1, sizeof(*tgroup));
+ if (!tgroup) {
+ return NULL;
+ }
+
+ tgroup->sock_group = spdk_sock_group_create(&tgroup->group);
+ if (!tgroup->sock_group) {
+ goto cleanup;
+ }
+
+ TAILQ_INIT(&tgroup->qpairs);
+ TAILQ_INIT(&tgroup->await_req);
+
+ return &tgroup->group;
+
+cleanup:
+ free(tgroup);
+ return NULL;
+}
+
+static struct spdk_nvmf_transport_poll_group *
+nvmf_tcp_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_tcp_qpair *tqpair;
+ struct spdk_sock_group *group = NULL;
+ int rc;
+
+ tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
+ rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group);
+ if (!rc && group != NULL) {
+ return spdk_sock_group_get_ctx(group);
+ }
+
+ return NULL;
+}
+
+static void
+nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
+{
+ struct spdk_nvmf_tcp_poll_group *tgroup;
+
+ tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
+ spdk_sock_group_close(&tgroup->sock_group);
+
+ free(tgroup);
+}
+
+static void
+nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair,
+ enum nvme_tcp_pdu_recv_state state)
+{
+ if (tqpair->recv_state == state) {
+ SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n",
+ tqpair, state);
+ return;
+ }
+
+ if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ) {
+ /* When leaving the await req state, move the qpair to the main list */
+ TAILQ_REMOVE(&tqpair->group->await_req, tqpair, link);
+ TAILQ_INSERT_TAIL(&tqpair->group->qpairs, tqpair, link);
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv state=%d\n", tqpair, state);
+ tqpair->recv_state = state;
+
+ switch (state) {
+ case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
+ case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
+ case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
+ break;
+ case NVME_TCP_PDU_RECV_STATE_AWAIT_REQ:
+ TAILQ_REMOVE(&tqpair->group->qpairs, tqpair, link);
+ TAILQ_INSERT_TAIL(&tqpair->group->await_req, tqpair, link);
+ break;
+ case NVME_TCP_PDU_RECV_STATE_ERROR:
+ case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
+ memset(&tqpair->pdu_in_progress, 0, sizeof(tqpair->pdu_in_progress));
+ break;
+ default:
+ SPDK_ERRLOG("The state(%d) is invalid\n", state);
+ abort();
+ break;
+ }
+}
+
+static int
+nvmf_tcp_qpair_handle_timeout(void *ctx)
+{
+ struct spdk_nvmf_tcp_qpair *tqpair = ctx;
+
+ assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR);
+
+ SPDK_ERRLOG("No pdu coming for tqpair=%p within %d seconds\n", tqpair,
+ SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT);
+
+ nvmf_tcp_qpair_disconnect(tqpair);
+ return SPDK_POLLER_BUSY;
+}
+
+static void
+nvmf_tcp_send_c2h_term_req_complete(void *cb_arg)
+{
+ struct spdk_nvmf_tcp_qpair *tqpair = (struct spdk_nvmf_tcp_qpair *)cb_arg;
+
+ if (!tqpair->timeout_poller) {
+ tqpair->timeout_poller = SPDK_POLLER_REGISTER(nvmf_tcp_qpair_handle_timeout, tqpair,
+ SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT * 1000000);
+ }
+}
+
+static void
+nvmf_tcp_send_c2h_term_req(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu,
+ enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset)
+{
+ struct nvme_tcp_pdu *rsp_pdu;
+ struct spdk_nvme_tcp_term_req_hdr *c2h_term_req;
+ uint32_t c2h_term_req_hdr_len = sizeof(*c2h_term_req);
+ uint32_t copy_len;
+
+ rsp_pdu = &tqpair->mgmt_pdu;
+
+ c2h_term_req = &rsp_pdu->hdr.term_req;
+ c2h_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ;
+ c2h_term_req->common.hlen = c2h_term_req_hdr_len;
+
+ if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
+ (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
+ DSET32(&c2h_term_req->fei, error_offset);
+ }
+
+ copy_len = spdk_min(pdu->hdr.common.hlen, SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE);
+
+ /* Copy the error info into the buffer */
+ memcpy((uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, pdu->hdr.raw, copy_len);
+ nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, copy_len);
+
+ /* Contain the header of the wrong received pdu */
+ c2h_term_req->common.plen = c2h_term_req->common.hlen + copy_len;
+ nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
+ nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_send_c2h_term_req_complete, tqpair);
+}
+
+static void
+nvmf_tcp_capsule_cmd_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
+ struct spdk_nvmf_tcp_qpair *tqpair,
+ struct nvme_tcp_pdu *pdu)
+{
+ struct spdk_nvmf_tcp_req *tcp_req;
+
+ assert(pdu->psh_valid_bytes == pdu->psh_len);
+ assert(pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD);
+
+ tcp_req = nvmf_tcp_req_get(tqpair);
+ if (!tcp_req) {
+ /* Directly return and make the allocation retry again */
+ if (tqpair->state_cntr[TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST] > 0) {
+ return;
+ }
+
+ /* The host sent more commands than the maximum queue depth. */
+ SPDK_ERRLOG("Cannot allocate tcp_req on tqpair=%p\n", tqpair);
+ nvmf_tcp_qpair_disconnect(tqpair);
+ return;
+ }
+
+ pdu->req = tcp_req;
+ assert(tcp_req->state == TCP_REQUEST_STATE_NEW);
+ nvmf_tcp_req_process(ttransport, tcp_req);
+}
+
+static void
+nvmf_tcp_capsule_cmd_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
+ struct spdk_nvmf_tcp_qpair *tqpair,
+ struct nvme_tcp_pdu *pdu)
+{
+ struct spdk_nvmf_tcp_req *tcp_req;
+ struct spdk_nvme_tcp_cmd *capsule_cmd;
+ uint32_t error_offset = 0;
+ enum spdk_nvme_tcp_term_req_fes fes;
+
+ capsule_cmd = &pdu->hdr.capsule_cmd;
+ tcp_req = pdu->req;
+ assert(tcp_req != NULL);
+ if (capsule_cmd->common.pdo > SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET) {
+ SPDK_ERRLOG("Expected ICReq capsule_cmd pdu offset <= %d, got %c\n",
+ SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET, capsule_cmd->common.pdo);
+ fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
+ error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
+ goto err;
+ }
+
+ nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
+ nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
+ nvmf_tcp_req_process(ttransport, tcp_req);
+
+ return;
+err:
+ nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
+}
+
+static int
+nvmf_tcp_find_req_in_state(struct spdk_nvmf_tcp_qpair *tqpair,
+ enum spdk_nvmf_tcp_req_state state,
+ uint16_t cid, uint16_t tag,
+ struct spdk_nvmf_tcp_req **req)
+{
+ struct spdk_nvmf_tcp_req *tcp_req = NULL;
+
+ TAILQ_FOREACH(tcp_req, &tqpair->state_queue[state], state_link) {
+ if (tcp_req->req.cmd->nvme_cmd.cid != cid) {
+ continue;
+ }
+
+ if (tcp_req->ttag == tag) {
+ *req = tcp_req;
+ return 0;
+ }
+
+ *req = NULL;
+ return -1;
+ }
+
+ /* Didn't find it, but not an error */
+ *req = NULL;
+ return 0;
+}
+
+static void
+nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
+ struct spdk_nvmf_tcp_qpair *tqpair,
+ struct nvme_tcp_pdu *pdu)
+{
+ struct spdk_nvmf_tcp_req *tcp_req;
+ uint32_t error_offset = 0;
+ enum spdk_nvme_tcp_term_req_fes fes = 0;
+ struct spdk_nvme_tcp_h2c_data_hdr *h2c_data;
+ int rc;
+
+ h2c_data = &pdu->hdr.h2c_data;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n",
+ tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag);
+
+ rc = nvmf_tcp_find_req_in_state(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
+ h2c_data->cccid, h2c_data->ttag, &tcp_req);
+ if (rc == 0 && tcp_req == NULL) {
+ rc = nvmf_tcp_find_req_in_state(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK, h2c_data->cccid,
+ h2c_data->ttag, &tcp_req);
+ }
+
+ if (!tcp_req) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req is not found for tqpair=%p\n", tqpair);
+ fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER;
+ if (rc == 0) {
+ error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid);
+ } else {
+ error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag);
+ }
+ goto err;
+ }
+
+ if (tcp_req->h2c_offset != h2c_data->datao) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
+ "tcp_req(%p), tqpair=%p, expected data offset %u, but data offset is %u\n",
+ tcp_req, tqpair, tcp_req->h2c_offset, h2c_data->datao);
+ fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
+ goto err;
+ }
+
+ if ((h2c_data->datao + h2c_data->datal) > tcp_req->req.length) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
+ "tcp_req(%p), tqpair=%p, (datao=%u + datal=%u) execeeds requested length=%u\n",
+ tcp_req, tqpair, h2c_data->datao, h2c_data->datal, tcp_req->req.length);
+ fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
+ goto err;
+ }
+
+ pdu->req = tcp_req;
+
+ if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
+ pdu->dif_ctx = &tcp_req->req.dif.dif_ctx;
+ }
+
+ nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
+ h2c_data->datao, h2c_data->datal);
+ nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
+ return;
+
+err:
+ nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
+}
+
+static void
+nvmf_tcp_pdu_cmd_complete(void *cb_arg)
+{
+ struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
+ nvmf_tcp_request_free(tcp_req);
+}
+
+static void
+nvmf_tcp_send_capsule_resp_pdu(struct spdk_nvmf_tcp_req *tcp_req,
+ struct spdk_nvmf_tcp_qpair *tqpair)
+{
+ struct nvme_tcp_pdu *rsp_pdu;
+ struct spdk_nvme_tcp_rsp *capsule_resp;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter, tqpair=%p\n", tqpair);
+
+ rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req);
+ assert(rsp_pdu != NULL);
+
+ capsule_resp = &rsp_pdu->hdr.capsule_resp;
+ capsule_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP;
+ capsule_resp->common.plen = capsule_resp->common.hlen = sizeof(*capsule_resp);
+ capsule_resp->rccqe = tcp_req->req.rsp->nvme_cpl;
+ if (tqpair->host_hdgst_enable) {
+ capsule_resp->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
+ capsule_resp->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
+ }
+
+ nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_pdu_cmd_complete, tcp_req);
+}
+
+static void
+nvmf_tcp_pdu_c2h_data_complete(void *cb_arg)
+{
+ struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
+ struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair,
+ struct spdk_nvmf_tcp_qpair, qpair);
+
+ assert(tqpair != NULL);
+ if (tqpair->qpair.transport->opts.c2h_success) {
+ nvmf_tcp_request_free(tcp_req);
+ } else {
+ nvmf_tcp_req_pdu_fini(tcp_req);
+ nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
+ }
+}
+
+static void
+nvmf_tcp_r2t_complete(void *cb_arg)
+{
+ struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
+ struct spdk_nvmf_tcp_transport *ttransport;
+
+ nvmf_tcp_req_pdu_fini(tcp_req);
+
+ ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport,
+ struct spdk_nvmf_tcp_transport, transport);
+
+ nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
+
+ if (tcp_req->h2c_offset == tcp_req->req.length) {
+ nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
+ nvmf_tcp_req_process(ttransport, tcp_req);
+ }
+}
+
+static void
+nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
+ struct spdk_nvmf_tcp_req *tcp_req)
+{
+ struct nvme_tcp_pdu *rsp_pdu;
+ struct spdk_nvme_tcp_r2t_hdr *r2t;
+
+ rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req);
+ assert(rsp_pdu != NULL);
+
+ r2t = &rsp_pdu->hdr.r2t;
+ r2t->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_R2T;
+ r2t->common.plen = r2t->common.hlen = sizeof(*r2t);
+
+ if (tqpair->host_hdgst_enable) {
+ r2t->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
+ r2t->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
+ }
+
+ r2t->cccid = tcp_req->req.cmd->nvme_cmd.cid;
+ r2t->ttag = tcp_req->ttag;
+ r2t->r2to = tcp_req->h2c_offset;
+ r2t->r2tl = tcp_req->req.length;
+
+ nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_R2T_ACK);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
+ "tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n",
+ tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl);
+ nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_r2t_complete, tcp_req);
+}
+
+static void
+nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
+ struct spdk_nvmf_tcp_qpair *tqpair,
+ struct nvme_tcp_pdu *pdu)
+{
+ struct spdk_nvmf_tcp_req *tcp_req;
+
+ tcp_req = pdu->req;
+ assert(tcp_req != NULL);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
+
+ tcp_req->h2c_offset += pdu->data_len;
+
+ nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
+
+ /* Wait for all of the data to arrive AND for the initial R2T PDU send to be
+ * acknowledged before moving on. */
+ if (tcp_req->h2c_offset == tcp_req->req.length &&
+ tcp_req->state == TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER) {
+ nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
+ nvmf_tcp_req_process(ttransport, tcp_req);
+ }
+}
+
+static void
+nvmf_tcp_h2c_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *h2c_term_req)
+{
+ SPDK_ERRLOG("Error info of pdu(%p): %s\n", h2c_term_req,
+ spdk_nvmf_tcp_term_req_fes_str[h2c_term_req->fes]);
+ if ((h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
+ (h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The offset from the start of the PDU header is %u\n",
+ DGET32(h2c_term_req->fei));
+ }
+}
+
+static void
+nvmf_tcp_h2c_term_req_hdr_handle(struct spdk_nvmf_tcp_qpair *tqpair,
+ struct nvme_tcp_pdu *pdu)
+{
+ struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req;
+ uint32_t error_offset = 0;
+ enum spdk_nvme_tcp_term_req_fes fes;
+
+
+ if (h2c_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) {
+ SPDK_ERRLOG("Fatal Error Stauts(FES) is unknown for h2c_term_req pdu=%p\n", pdu);
+ fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
+ error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes);
+ goto end;
+ }
+
+ /* set the data buffer */
+ nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + h2c_term_req->common.hlen,
+ h2c_term_req->common.plen - h2c_term_req->common.hlen);
+ nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
+ return;
+end:
+ nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
+}
+
+static void
+nvmf_tcp_h2c_term_req_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair,
+ struct nvme_tcp_pdu *pdu)
+{
+ struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req;
+
+ nvmf_tcp_h2c_term_req_dump(h2c_term_req);
+ nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
+}
+
+static void
+nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair,
+ struct spdk_nvmf_tcp_transport *ttransport)
+{
+ int rc = 0;
+ struct nvme_tcp_pdu *pdu;
+ uint32_t crc32c, error_offset = 0;
+ enum spdk_nvme_tcp_term_req_fes fes;
+
+ assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
+ pdu = &tqpair->pdu_in_progress;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
+ /* check data digest if need */
+ if (pdu->ddgst_enable) {
+ crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
+ rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c);
+ if (rc == 0) {
+ SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
+ fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
+ nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
+ return;
+
+ }
+ }
+
+ switch (pdu->hdr.common.pdu_type) {
+ case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
+ nvmf_tcp_capsule_cmd_payload_handle(ttransport, tqpair, pdu);
+ break;
+ case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
+ nvmf_tcp_h2c_data_payload_handle(ttransport, tqpair, pdu);
+ break;
+
+ case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
+ nvmf_tcp_h2c_term_req_payload_handle(tqpair, pdu);
+ break;
+
+ default:
+ /* The code should not go to here */
+ SPDK_ERRLOG("The code should not go to here\n");
+ break;
+ }
+}
+
+static void
+nvmf_tcp_send_icresp_complete(void *cb_arg)
+{
+ struct spdk_nvmf_tcp_qpair *tqpair = cb_arg;
+
+ tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING;
+}
+
+static void
+nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport,
+ struct spdk_nvmf_tcp_qpair *tqpair,
+ struct nvme_tcp_pdu *pdu)
+{
+ struct spdk_nvme_tcp_ic_req *ic_req = &pdu->hdr.ic_req;
+ struct nvme_tcp_pdu *rsp_pdu;
+ struct spdk_nvme_tcp_ic_resp *ic_resp;
+ uint32_t error_offset = 0;
+ enum spdk_nvme_tcp_term_req_fes fes;
+
+ /* Only PFV 0 is defined currently */
+ if (ic_req->pfv != 0) {
+ SPDK_ERRLOG("Expected ICReq PFV %u, got %u\n", 0u, ic_req->pfv);
+ fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
+ error_offset = offsetof(struct spdk_nvme_tcp_ic_req, pfv);
+ goto end;
+ }
+
+ /* MAXR2T is 0's based */
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "maxr2t =%u\n", (ic_req->maxr2t + 1u));
+
+ tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false;
+ if (!tqpair->host_hdgst_enable) {
+ tqpair->recv_buf_size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
+ }
+
+ tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false;
+ if (!tqpair->host_ddgst_enable) {
+ tqpair->recv_buf_size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
+ }
+
+ /* Now that we know whether digests are enabled, properly size the receive buffer */
+ if (spdk_sock_set_recvbuf(tqpair->sock, tqpair->recv_buf_size) < 0) {
+ SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n",
+ tqpair,
+ tqpair->recv_buf_size);
+ /* Not fatal. */
+ }
+
+ tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda);
+
+ rsp_pdu = &tqpair->mgmt_pdu;
+
+ ic_resp = &rsp_pdu->hdr.ic_resp;
+ ic_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_RESP;
+ ic_resp->common.hlen = ic_resp->common.plen = sizeof(*ic_resp);
+ ic_resp->pfv = 0;
+ ic_resp->cpda = tqpair->cpda;
+ ic_resp->maxh2cdata = ttransport->transport.opts.max_io_size;
+ ic_resp->dgst.bits.hdgst_enable = tqpair->host_hdgst_enable ? 1 : 0;
+ ic_resp->dgst.bits.ddgst_enable = tqpair->host_ddgst_enable ? 1 : 0;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable);
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable);
+
+ tqpair->state = NVME_TCP_QPAIR_STATE_INITIALIZING;
+ nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_send_icresp_complete, tqpair);
+ nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
+ return;
+end:
+ nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
+}
+
+static void
+nvmf_tcp_pdu_psh_handle(struct spdk_nvmf_tcp_qpair *tqpair,
+ struct spdk_nvmf_tcp_transport *ttransport)
+{
+ struct nvme_tcp_pdu *pdu;
+ int rc;
+ uint32_t crc32c, error_offset = 0;
+ enum spdk_nvme_tcp_term_req_fes fes;
+
+ assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
+ pdu = &tqpair->pdu_in_progress;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "pdu type of tqpair(%p) is %d\n", tqpair,
+ pdu->hdr.common.pdu_type);
+ /* check header digest if needed */
+ if (pdu->has_hdgst) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Compare the header of pdu=%p on tqpair=%p\n", pdu, tqpair);
+ crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
+ rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c);
+ if (rc == 0) {
+ SPDK_ERRLOG("Header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
+ fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
+ nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
+ return;
+
+ }
+ }
+
+ switch (pdu->hdr.common.pdu_type) {
+ case SPDK_NVME_TCP_PDU_TYPE_IC_REQ:
+ nvmf_tcp_icreq_handle(ttransport, tqpair, pdu);
+ break;
+ case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
+ nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_REQ);
+ break;
+ case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
+ nvmf_tcp_h2c_data_hdr_handle(ttransport, tqpair, pdu);
+ break;
+
+ case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
+ nvmf_tcp_h2c_term_req_hdr_handle(tqpair, pdu);
+ break;
+
+ default:
+ SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->pdu_in_progress.hdr.common.pdu_type);
+ fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
+ error_offset = 1;
+ nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
+ break;
+ }
+}
+
+static void
+nvmf_tcp_pdu_ch_handle(struct spdk_nvmf_tcp_qpair *tqpair)
+{
+ struct nvme_tcp_pdu *pdu;
+ uint32_t error_offset = 0;
+ enum spdk_nvme_tcp_term_req_fes fes;
+ uint8_t expected_hlen, pdo;
+ bool plen_error = false, pdo_error = false;
+
+ assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
+ pdu = &tqpair->pdu_in_progress;
+
+ if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ) {
+ if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) {
+ SPDK_ERRLOG("Already received ICreq PDU, and reject this pdu=%p\n", pdu);
+ fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
+ goto err;
+ }
+ expected_hlen = sizeof(struct spdk_nvme_tcp_ic_req);
+ if (pdu->hdr.common.plen != expected_hlen) {
+ plen_error = true;
+ }
+ } else {
+ if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) {
+ SPDK_ERRLOG("The TCP/IP connection is not negotitated\n");
+ fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
+ goto err;
+ }
+
+ switch (pdu->hdr.common.pdu_type) {
+ case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
+ expected_hlen = sizeof(struct spdk_nvme_tcp_cmd);
+ pdo = pdu->hdr.common.pdo;
+ if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) {
+ pdo_error = true;
+ break;
+ }
+
+ if (pdu->hdr.common.plen < expected_hlen) {
+ plen_error = true;
+ }
+ break;
+ case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
+ expected_hlen = sizeof(struct spdk_nvme_tcp_h2c_data_hdr);
+ pdo = pdu->hdr.common.pdo;
+ if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) {
+ pdo_error = true;
+ break;
+ }
+ if (pdu->hdr.common.plen < expected_hlen) {
+ plen_error = true;
+ }
+ break;
+
+ case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
+ expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr);
+ if ((pdu->hdr.common.plen <= expected_hlen) ||
+ (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) {
+ plen_error = true;
+ }
+ break;
+
+ default:
+ SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", pdu->hdr.common.pdu_type);
+ fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
+ error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type);
+ goto err;
+ }
+ }
+
+ if (pdu->hdr.common.hlen != expected_hlen) {
+ SPDK_ERRLOG("PDU type=0x%02x, Expected ICReq header length %u, got %u on tqpair=%p\n",
+ pdu->hdr.common.pdu_type,
+ expected_hlen, pdu->hdr.common.hlen, tqpair);
+ fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
+ error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen);
+ goto err;
+ } else if (pdo_error) {
+ fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
+ error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
+ } else if (plen_error) {
+ fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
+ error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen);
+ goto err;
+ } else {
+ nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
+ nvme_tcp_pdu_calc_psh_len(&tqpair->pdu_in_progress, tqpair->host_hdgst_enable);
+ return;
+ }
+err:
+ nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
+}
+
+static int
+nvmf_tcp_pdu_payload_insert_dif(struct nvme_tcp_pdu *pdu, uint32_t read_offset,
+ int read_len)
+{
+ int rc;
+
+ rc = spdk_dif_generate_stream(pdu->data_iov, pdu->data_iovcnt,
+ read_offset, read_len, pdu->dif_ctx);
+ if (rc != 0) {
+ SPDK_ERRLOG("DIF generate failed\n");
+ }
+
+ return rc;
+}
+
+static int
+nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
+{
+ int rc = 0;
+ struct nvme_tcp_pdu *pdu;
+ enum nvme_tcp_pdu_recv_state prev_state;
+ uint32_t data_len;
+ struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport,
+ struct spdk_nvmf_tcp_transport, transport);
+
+ /* The loop here is to allow for several back-to-back state changes. */
+ do {
+ prev_state = tqpair->recv_state;
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state);
+
+ pdu = &tqpair->pdu_in_progress;
+ switch (tqpair->recv_state) {
+ /* Wait for the common header */
+ case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
+ case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
+ if (spdk_unlikely(tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING)) {
+ return rc;
+ }
+
+ rc = nvme_tcp_read_data(tqpair->sock,
+ sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes,
+ (void *)&pdu->hdr.common + pdu->ch_valid_bytes);
+ if (rc < 0) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconnect tqpair=%p\n", tqpair);
+ return NVME_TCP_PDU_FATAL;
+ } else if (rc > 0) {
+ pdu->ch_valid_bytes += rc;
+ spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 0, rc, 0, 0);
+ if (spdk_likely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY)) {
+ nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
+ }
+ }
+
+ if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) {
+ return NVME_TCP_PDU_IN_PROGRESS;
+ }
+
+ /* The command header of this PDU has now been read from the socket. */
+ nvmf_tcp_pdu_ch_handle(tqpair);
+ break;
+ /* Wait for the pdu specific header */
+ case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
+ rc = nvme_tcp_read_data(tqpair->sock,
+ pdu->psh_len - pdu->psh_valid_bytes,
+ (void *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes);
+ if (rc < 0) {
+ return NVME_TCP_PDU_FATAL;
+ } else if (rc > 0) {
+ spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE,
+ 0, rc, 0, 0);
+ pdu->psh_valid_bytes += rc;
+ }
+
+ if (pdu->psh_valid_bytes < pdu->psh_len) {
+ return NVME_TCP_PDU_IN_PROGRESS;
+ }
+
+ /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */
+ nvmf_tcp_pdu_psh_handle(tqpair, ttransport);
+ break;
+ /* Wait for the req slot */
+ case NVME_TCP_PDU_RECV_STATE_AWAIT_REQ:
+ nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu);
+ break;
+ case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
+ /* check whether the data is valid, if not we just return */
+ if (!pdu->data_len) {
+ return NVME_TCP_PDU_IN_PROGRESS;
+ }
+
+ data_len = pdu->data_len;
+ /* data digest */
+ if (spdk_unlikely((pdu->hdr.common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) &&
+ tqpair->host_ddgst_enable)) {
+ data_len += SPDK_NVME_TCP_DIGEST_LEN;
+ pdu->ddgst_enable = true;
+ }
+
+ rc = nvme_tcp_read_payload_data(tqpair->sock, pdu);
+ if (rc < 0) {
+ return NVME_TCP_PDU_FATAL;
+ }
+ pdu->readv_offset += rc;
+
+ if (spdk_unlikely(pdu->dif_ctx != NULL)) {
+ rc = nvmf_tcp_pdu_payload_insert_dif(pdu, pdu->readv_offset - rc, rc);
+ if (rc != 0) {
+ return NVME_TCP_PDU_FATAL;
+ }
+ }
+
+ if (pdu->readv_offset < data_len) {
+ return NVME_TCP_PDU_IN_PROGRESS;
+ }
+
+ /* All of this PDU has now been read from the socket. */
+ nvmf_tcp_pdu_payload_handle(tqpair, ttransport);
+ break;
+ case NVME_TCP_PDU_RECV_STATE_ERROR:
+ if (!spdk_sock_is_connected(tqpair->sock)) {
+ return NVME_TCP_PDU_FATAL;
+ }
+ break;
+ default:
+ assert(0);
+ SPDK_ERRLOG("code should not come to here");
+ break;
+ }
+ } while (tqpair->recv_state != prev_state);
+
+ return rc;
+}
+
+static int
+nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req,
+ struct spdk_nvmf_transport *transport,
+ struct spdk_nvmf_transport_poll_group *group)
+{
+ struct spdk_nvmf_request *req = &tcp_req->req;
+ struct spdk_nvme_cmd *cmd;
+ struct spdk_nvme_cpl *rsp;
+ struct spdk_nvme_sgl_descriptor *sgl;
+ uint32_t length;
+
+ cmd = &req->cmd->nvme_cmd;
+ rsp = &req->rsp->nvme_cpl;
+ sgl = &cmd->dptr.sgl1;
+
+ length = sgl->unkeyed.length;
+
+ if (sgl->generic.type == SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK &&
+ sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_TRANSPORT) {
+ if (length > transport->opts.max_io_size) {
+ SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n",
+ length, transport->opts.max_io_size);
+ rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+ return -1;
+ }
+
+ /* fill request length and populate iovs */
+ req->length = length;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Data requested length= 0x%x\n", length);
+
+ if (spdk_unlikely(req->dif.dif_insert_or_strip)) {
+ req->dif.orig_length = length;
+ length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx);
+ req->dif.elba_length = length;
+ }
+
+ if (spdk_nvmf_request_get_buffers(req, group, transport, length)) {
+ /* No available buffers. Queue this request up. */
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No available large data buffers. Queueing request %p\n",
+ tcp_req);
+ return 0;
+ }
+
+ /* backward compatible */
+ req->data = req->iov[0].iov_base;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p took %d buffer/s from central pool, and data=%p\n",
+ tcp_req, req->iovcnt, req->data);
+
+ return 0;
+ } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
+ sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
+ uint64_t offset = sgl->address;
+ uint32_t max_len = transport->opts.in_capsule_data_size;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n",
+ offset, length);
+
+ if (offset > max_len) {
+ SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n",
+ offset, max_len);
+ rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET;
+ return -1;
+ }
+ max_len -= (uint32_t)offset;
+
+ if (length > max_len) {
+ SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n",
+ length, max_len);
+ rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+ return -1;
+ }
+
+ req->data = tcp_req->buf + offset;
+ req->data_from_pool = false;
+ req->length = length;
+
+ if (spdk_unlikely(req->dif.dif_insert_or_strip)) {
+ length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx);
+ req->dif.elba_length = length;
+ }
+
+ req->iov[0].iov_base = req->data;
+ req->iov[0].iov_len = length;
+ req->iovcnt = 1;
+
+ return 0;
+ }
+
+ SPDK_ERRLOG("Invalid NVMf I/O Command SGL: Type 0x%x, Subtype 0x%x\n",
+ sgl->generic.type, sgl->generic.subtype);
+ rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID;
+ return -1;
+}
+
+static inline enum spdk_nvme_media_error_status_code
+nvmf_tcp_dif_error_to_compl_status(uint8_t err_type) {
+ enum spdk_nvme_media_error_status_code result;
+
+ switch (err_type)
+ {
+ case SPDK_DIF_REFTAG_ERROR:
+ result = SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR;
+ break;
+ case SPDK_DIF_APPTAG_ERROR:
+ result = SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR;
+ break;
+ case SPDK_DIF_GUARD_ERROR:
+ result = SPDK_NVME_SC_GUARD_CHECK_ERROR;
+ break;
+ default:
+ SPDK_UNREACHABLE();
+ break;
+ }
+
+ return result;
+}
+
+static void
+nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair,
+ struct spdk_nvmf_tcp_req *tcp_req)
+{
+ struct nvme_tcp_pdu *rsp_pdu;
+ struct spdk_nvme_tcp_c2h_data_hdr *c2h_data;
+ uint32_t plen, pdo, alignment;
+ int rc;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
+
+ rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req);
+ assert(rsp_pdu != NULL);
+
+ c2h_data = &rsp_pdu->hdr.c2h_data;
+ c2h_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_DATA;
+ plen = c2h_data->common.hlen = sizeof(*c2h_data);
+
+ if (tqpair->host_hdgst_enable) {
+ plen += SPDK_NVME_TCP_DIGEST_LEN;
+ c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
+ }
+
+ /* set the psh */
+ c2h_data->cccid = tcp_req->req.cmd->nvme_cmd.cid;
+ c2h_data->datal = tcp_req->req.length;
+ c2h_data->datao = 0;
+
+ /* set the padding */
+ rsp_pdu->padding_len = 0;
+ pdo = plen;
+ if (tqpair->cpda) {
+ alignment = (tqpair->cpda + 1) << 2;
+ if (alignment > plen) {
+ rsp_pdu->padding_len = alignment - plen;
+ pdo = plen = alignment;
+ }
+ }
+
+ c2h_data->common.pdo = pdo;
+ plen += c2h_data->datal;
+ if (tqpair->host_ddgst_enable) {
+ c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF;
+ plen += SPDK_NVME_TCP_DIGEST_LEN;
+ }
+
+ c2h_data->common.plen = plen;
+
+ if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
+ rsp_pdu->dif_ctx = &tcp_req->req.dif.dif_ctx;
+ }
+
+ nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
+ c2h_data->datao, c2h_data->datal);
+
+ if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
+ struct spdk_nvme_cpl *rsp = &tcp_req->req.rsp->nvme_cpl;
+ struct spdk_dif_error err_blk = {};
+
+ rc = spdk_dif_verify_stream(rsp_pdu->data_iov, rsp_pdu->data_iovcnt,
+ 0, rsp_pdu->data_len, rsp_pdu->dif_ctx, &err_blk);
+ if (rc != 0) {
+ SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
+ err_blk.err_type, err_blk.err_offset);
+ rsp->status.sct = SPDK_NVME_SCT_MEDIA_ERROR;
+ rsp->status.sc = nvmf_tcp_dif_error_to_compl_status(err_blk.err_type);
+ nvmf_tcp_req_pdu_fini(tcp_req);
+ nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
+ return;
+ }
+ }
+
+ c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU;
+ if (tqpair->qpair.transport->opts.c2h_success) {
+ c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS;
+ }
+
+ nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_pdu_c2h_data_complete, tcp_req);
+}
+
+static int
+request_transfer_out(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_tcp_req *tcp_req;
+ struct spdk_nvmf_qpair *qpair;
+ struct spdk_nvmf_tcp_qpair *tqpair;
+ struct spdk_nvme_cpl *rsp;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
+
+ qpair = req->qpair;
+ rsp = &req->rsp->nvme_cpl;
+ tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
+
+ /* Advance our sq_head pointer */
+ if (qpair->sq_head == qpair->sq_head_max) {
+ qpair->sq_head = 0;
+ } else {
+ qpair->sq_head++;
+ }
+ rsp->sqhd = qpair->sq_head;
+
+ tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
+ nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
+ if (rsp->status.sc == SPDK_NVME_SC_SUCCESS && req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+ nvmf_tcp_send_c2h_data(tqpair, tcp_req);
+ } else {
+ nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
+ }
+
+ return 0;
+}
+
+static void
+nvmf_tcp_set_incapsule_data(struct spdk_nvmf_tcp_qpair *tqpair,
+ struct spdk_nvmf_tcp_req *tcp_req)
+{
+ struct nvme_tcp_pdu *pdu;
+ uint32_t plen = 0;
+
+ pdu = &tqpair->pdu_in_progress;
+ plen = pdu->hdr.common.hlen;
+
+ if (tqpair->host_hdgst_enable) {
+ plen += SPDK_NVME_TCP_DIGEST_LEN;
+ }
+
+ if (pdu->hdr.common.plen != plen) {
+ tcp_req->has_incapsule_data = true;
+ }
+}
+
+static bool
+nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
+ struct spdk_nvmf_tcp_req *tcp_req)
+{
+ struct spdk_nvmf_tcp_qpair *tqpair;
+ int rc;
+ enum spdk_nvmf_tcp_req_state prev_state;
+ bool progress = false;
+ struct spdk_nvmf_transport *transport = &ttransport->transport;
+ struct spdk_nvmf_transport_poll_group *group;
+
+ tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
+ group = &tqpair->group->group;
+ assert(tcp_req->state != TCP_REQUEST_STATE_FREE);
+
+ /* If the qpair is not active, we need to abort the outstanding requests. */
+ if (tqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) {
+ if (tcp_req->state == TCP_REQUEST_STATE_NEED_BUFFER) {
+ STAILQ_REMOVE(&group->pending_buf_queue, &tcp_req->req, spdk_nvmf_request, buf_link);
+ }
+ nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED);
+ }
+
+ /* The loop here is to allow for several back-to-back state changes. */
+ do {
+ prev_state = tcp_req->state;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p entering state %d on tqpair=%p\n", tcp_req, prev_state,
+ tqpair);
+
+ switch (tcp_req->state) {
+ case TCP_REQUEST_STATE_FREE:
+ /* Some external code must kick a request into TCP_REQUEST_STATE_NEW
+ * to escape this state. */
+ break;
+ case TCP_REQUEST_STATE_NEW:
+ spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEW, 0, 0, (uintptr_t)tcp_req, 0);
+
+ /* copy the cmd from the receive pdu */
+ tcp_req->cmd = tqpair->pdu_in_progress.hdr.capsule_cmd.ccsqe;
+
+ if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&tcp_req->req, &tcp_req->req.dif.dif_ctx))) {
+ tcp_req->req.dif.dif_insert_or_strip = true;
+ tqpair->pdu_in_progress.dif_ctx = &tcp_req->req.dif.dif_ctx;
+ }
+
+ /* The next state transition depends on the data transfer needs of this request. */
+ tcp_req->req.xfer = spdk_nvmf_req_get_xfer(&tcp_req->req);
+
+ /* If no data to transfer, ready to execute. */
+ if (tcp_req->req.xfer == SPDK_NVME_DATA_NONE) {
+ /* Reset the tqpair receving pdu state */
+ nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
+ nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
+ break;
+ }
+
+ nvmf_tcp_set_incapsule_data(tqpair, tcp_req);
+
+ if (!tcp_req->has_incapsule_data) {
+ nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
+ }
+
+ nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEED_BUFFER);
+ STAILQ_INSERT_TAIL(&group->pending_buf_queue, &tcp_req->req, buf_link);
+ break;
+ case TCP_REQUEST_STATE_NEED_BUFFER:
+ spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 0, 0, (uintptr_t)tcp_req, 0);
+
+ assert(tcp_req->req.xfer != SPDK_NVME_DATA_NONE);
+
+ if (!tcp_req->has_incapsule_data && (&tcp_req->req != STAILQ_FIRST(&group->pending_buf_queue))) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
+ "Not the first element to wait for the buf for tcp_req(%p) on tqpair=%p\n",
+ tcp_req, tqpair);
+ /* This request needs to wait in line to obtain a buffer */
+ break;
+ }
+
+ /* Try to get a data buffer */
+ rc = nvmf_tcp_req_parse_sgl(tcp_req, transport, group);
+ if (rc < 0) {
+ STAILQ_REMOVE_HEAD(&group->pending_buf_queue, buf_link);
+ /* Reset the tqpair receving pdu state */
+ nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
+ nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
+ break;
+ }
+
+ if (!tcp_req->req.data) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No buffer allocated for tcp_req(%p) on tqpair(%p\n)",
+ tcp_req, tqpair);
+ /* No buffers available. */
+ break;
+ }
+
+ STAILQ_REMOVE(&group->pending_buf_queue, &tcp_req->req, spdk_nvmf_request, buf_link);
+
+ /* If data is transferring from host to controller, we need to do a transfer from the host. */
+ if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
+ if (tcp_req->req.data_from_pool) {
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Sending R2T for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair);
+ nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
+ } else {
+ struct nvme_tcp_pdu *pdu;
+
+ nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
+
+ pdu = &tqpair->pdu_in_progress;
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req,
+ tqpair);
+ /* No need to send r2t, contained in the capsuled data */
+ nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
+ 0, tcp_req->req.length);
+ nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
+ }
+ break;
+ }
+
+ nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
+ break;
+ case TCP_REQUEST_STATE_AWAITING_R2T_ACK:
+ spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK, 0, 0, (uintptr_t)tcp_req, 0);
+ /* The R2T completion or the h2c data incoming will kick it out of this state. */
+ break;
+ case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
+
+ spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0,
+ (uintptr_t)tcp_req, 0);
+ /* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE
+ * to escape this state. */
+ break;
+ case TCP_REQUEST_STATE_READY_TO_EXECUTE:
+ spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 0, 0, (uintptr_t)tcp_req, 0);
+
+ if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
+ assert(tcp_req->req.dif.elba_length >= tcp_req->req.length);
+ tcp_req->req.length = tcp_req->req.dif.elba_length;
+ }
+
+ nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTING);
+ spdk_nvmf_request_exec(&tcp_req->req);
+ break;
+ case TCP_REQUEST_STATE_EXECUTING:
+ spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTING, 0, 0, (uintptr_t)tcp_req, 0);
+ /* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED
+ * to escape this state. */
+ break;
+ case TCP_REQUEST_STATE_EXECUTED:
+ spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTED, 0, 0, (uintptr_t)tcp_req, 0);
+
+ if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
+ tcp_req->req.length = tcp_req->req.dif.orig_length;
+ }
+
+ nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
+ break;
+ case TCP_REQUEST_STATE_READY_TO_COMPLETE:
+ spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 0, 0, (uintptr_t)tcp_req, 0);
+ rc = request_transfer_out(&tcp_req->req);
+ assert(rc == 0); /* No good way to handle this currently */
+ break;
+ case TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST:
+ spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0,
+ (uintptr_t)tcp_req,
+ 0);
+ /* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED
+ * to escape this state. */
+ break;
+ case TCP_REQUEST_STATE_COMPLETED:
+ spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, 0, 0, (uintptr_t)tcp_req, 0);
+ if (tcp_req->req.data_from_pool) {
+ spdk_nvmf_request_free_buffers(&tcp_req->req, group, transport);
+ }
+ tcp_req->req.length = 0;
+ tcp_req->req.iovcnt = 0;
+ tcp_req->req.data = NULL;
+
+ nvmf_tcp_req_pdu_fini(tcp_req);
+
+ nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_FREE);
+ break;
+ case TCP_REQUEST_NUM_STATES:
+ default:
+ assert(0);
+ break;
+ }
+
+ if (tcp_req->state != prev_state) {
+ progress = true;
+ }
+ } while (tcp_req->state != prev_state);
+
+ return progress;
+}
+
+static void
+nvmf_tcp_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock)
+{
+ struct spdk_nvmf_tcp_qpair *tqpair = arg;
+ int rc;
+
+ assert(tqpair != NULL);
+ rc = nvmf_tcp_sock_process(tqpair);
+
+ /* If there was a new socket error, disconnect */
+ if (rc < 0) {
+ nvmf_tcp_qpair_disconnect(tqpair);
+ }
+}
+
+static int
+nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_tcp_poll_group *tgroup;
+ struct spdk_nvmf_tcp_qpair *tqpair;
+ int rc;
+
+ tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
+ tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
+
+ rc = spdk_sock_group_add_sock(tgroup->sock_group, tqpair->sock,
+ nvmf_tcp_sock_cb, tqpair);
+ if (rc != 0) {
+ SPDK_ERRLOG("Could not add sock to sock_group: %s (%d)\n",
+ spdk_strerror(errno), errno);
+ return -1;
+ }
+
+ rc = nvmf_tcp_qpair_sock_init(tqpair);
+ if (rc != 0) {
+ SPDK_ERRLOG("Cannot set sock opt for tqpair=%p\n", tqpair);
+ return -1;
+ }
+
+ rc = nvmf_tcp_qpair_init(&tqpair->qpair);
+ if (rc < 0) {
+ SPDK_ERRLOG("Cannot init tqpair=%p\n", tqpair);
+ return -1;
+ }
+
+ rc = nvmf_tcp_qpair_init_mem_resource(tqpair);
+ if (rc < 0) {
+ SPDK_ERRLOG("Cannot init memory resource info for tqpair=%p\n", tqpair);
+ return -1;
+ }
+
+ tqpair->group = tgroup;
+ tqpair->state = NVME_TCP_QPAIR_STATE_INVALID;
+ TAILQ_INSERT_TAIL(&tgroup->qpairs, tqpair, link);
+
+ return 0;
+}
+
+static int
+nvmf_tcp_poll_group_remove(struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_tcp_poll_group *tgroup;
+ struct spdk_nvmf_tcp_qpair *tqpair;
+ int rc;
+
+ tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
+ tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
+
+ assert(tqpair->group == tgroup);
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "remove tqpair=%p from the tgroup=%p\n", tqpair, tgroup);
+ if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ) {
+ TAILQ_REMOVE(&tgroup->await_req, tqpair, link);
+ } else {
+ TAILQ_REMOVE(&tgroup->qpairs, tqpair, link);
+ }
+
+ rc = spdk_sock_group_remove_sock(tgroup->sock_group, tqpair->sock);
+ if (rc != 0) {
+ SPDK_ERRLOG("Could not remove sock from sock_group: %s (%d)\n",
+ spdk_strerror(errno), errno);
+ }
+
+ return rc;
+}
+
+static int
+nvmf_tcp_req_complete(struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_tcp_transport *ttransport;
+ struct spdk_nvmf_tcp_req *tcp_req;
+
+ ttransport = SPDK_CONTAINEROF(req->qpair->transport, struct spdk_nvmf_tcp_transport, transport);
+ tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
+
+ nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED);
+ nvmf_tcp_req_process(ttransport, tcp_req);
+
+ return 0;
+}
+
+static void
+nvmf_tcp_close_qpair(struct spdk_nvmf_qpair *qpair)
+{
+ struct spdk_nvmf_tcp_qpair *tqpair;
+
+ SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Qpair: %p\n", qpair);
+
+ tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
+ tqpair->state = NVME_TCP_QPAIR_STATE_EXITED;
+ nvmf_tcp_qpair_destroy(tqpair);
+}
+
+static int
+nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
+{
+ struct spdk_nvmf_tcp_poll_group *tgroup;
+ int rc;
+ struct spdk_nvmf_request *req, *req_tmp;
+ struct spdk_nvmf_tcp_req *tcp_req;
+ struct spdk_nvmf_tcp_qpair *tqpair, *tqpair_tmp;
+ struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(group->transport,
+ struct spdk_nvmf_tcp_transport, transport);
+
+ tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
+
+ if (spdk_unlikely(TAILQ_EMPTY(&tgroup->qpairs) && TAILQ_EMPTY(&tgroup->await_req))) {
+ return 0;
+ }
+
+ STAILQ_FOREACH_SAFE(req, &group->pending_buf_queue, buf_link, req_tmp) {
+ tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
+ if (nvmf_tcp_req_process(ttransport, tcp_req) == false) {
+ break;
+ }
+ }
+
+ rc = spdk_sock_group_poll(tgroup->sock_group);
+ if (rc < 0) {
+ SPDK_ERRLOG("Failed to poll sock_group=%p\n", tgroup->sock_group);
+ }
+
+ TAILQ_FOREACH_SAFE(tqpair, &tgroup->await_req, link, tqpair_tmp) {
+ nvmf_tcp_sock_process(tqpair);
+ }
+
+ return rc;
+}
+
+static int
+nvmf_tcp_qpair_get_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid, bool peer)
+{
+ struct spdk_nvmf_tcp_qpair *tqpair;
+ uint16_t port;
+
+ tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
+ spdk_nvme_trid_populate_transport(trid, SPDK_NVME_TRANSPORT_TCP);
+
+ if (peer) {
+ snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->initiator_addr);
+ port = tqpair->initiator_port;
+ } else {
+ snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->target_addr);
+ port = tqpair->target_port;
+ }
+
+ if (spdk_sock_is_ipv4(tqpair->sock)) {
+ trid->adrfam = SPDK_NVMF_ADRFAM_IPV4;
+ } else if (spdk_sock_is_ipv6(tqpair->sock)) {
+ trid->adrfam = SPDK_NVMF_ADRFAM_IPV6;
+ } else {
+ return -1;
+ }
+
+ snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%d", port);
+ return 0;
+}
+
+static int
+nvmf_tcp_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ return nvmf_tcp_qpair_get_trid(qpair, trid, 0);
+}
+
+static int
+nvmf_tcp_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ return nvmf_tcp_qpair_get_trid(qpair, trid, 1);
+}
+
+static int
+nvmf_tcp_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ return nvmf_tcp_qpair_get_trid(qpair, trid, 0);
+}
+
+static void
+nvmf_tcp_req_set_abort_status(struct spdk_nvmf_request *req,
+ struct spdk_nvmf_tcp_req *tcp_req_to_abort)
+{
+ tcp_req_to_abort->req.rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+ tcp_req_to_abort->req.rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
+
+ nvmf_tcp_req_set_state(tcp_req_to_abort, TCP_REQUEST_STATE_READY_TO_COMPLETE);
+
+ req->rsp->nvme_cpl.cdw0 &= ~1U; /* Command was successfully aborted. */
+}
+
+static int
+_nvmf_tcp_qpair_abort_request(void *ctx)
+{
+ struct spdk_nvmf_request *req = ctx;
+ struct spdk_nvmf_tcp_req *tcp_req_to_abort = SPDK_CONTAINEROF(req->req_to_abort,
+ struct spdk_nvmf_tcp_req, req);
+ struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(req->req_to_abort->qpair,
+ struct spdk_nvmf_tcp_qpair, qpair);
+ int rc;
+
+ spdk_poller_unregister(&req->poller);
+
+ switch (tcp_req_to_abort->state) {
+ case TCP_REQUEST_STATE_EXECUTING:
+ rc = nvmf_ctrlr_abort_request(req);
+ if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS) {
+ return SPDK_POLLER_BUSY;
+ }
+ break;
+
+ case TCP_REQUEST_STATE_NEED_BUFFER:
+ STAILQ_REMOVE(&tqpair->group->group.pending_buf_queue,
+ &tcp_req_to_abort->req, spdk_nvmf_request, buf_link);
+
+ nvmf_tcp_req_set_abort_status(req, tcp_req_to_abort);
+ break;
+
+ case TCP_REQUEST_STATE_AWAITING_R2T_ACK:
+ nvmf_tcp_req_set_abort_status(req, tcp_req_to_abort);
+ break;
+
+ case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
+ if (spdk_get_ticks() < req->timeout_tsc) {
+ req->poller = SPDK_POLLER_REGISTER(_nvmf_tcp_qpair_abort_request, req, 0);
+ return SPDK_POLLER_BUSY;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ spdk_nvmf_request_complete(req);
+ return SPDK_POLLER_BUSY;
+}
+
+static void
+nvmf_tcp_qpair_abort_request(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvmf_request *req)
+{
+ struct spdk_nvmf_tcp_qpair *tqpair;
+ struct spdk_nvmf_tcp_transport *ttransport;
+ struct spdk_nvmf_transport *transport;
+ uint16_t cid;
+ uint32_t i;
+ struct spdk_nvmf_tcp_req *tcp_req_to_abort = NULL;
+
+ tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
+ ttransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_tcp_transport, transport);
+ transport = &ttransport->transport;
+
+ cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid;
+
+ for (i = 0; i < tqpair->resource_count; i++) {
+ tcp_req_to_abort = &tqpair->reqs[i];
+
+ if (tcp_req_to_abort->state != TCP_REQUEST_STATE_FREE &&
+ tcp_req_to_abort->req.cmd->nvme_cmd.cid == cid) {
+ break;
+ }
+ }
+
+ if (tcp_req_to_abort == NULL) {
+ spdk_nvmf_request_complete(req);
+ return;
+ }
+
+ req->req_to_abort = &tcp_req_to_abort->req;
+ req->timeout_tsc = spdk_get_ticks() +
+ transport->opts.abort_timeout_sec * spdk_get_ticks_hz();
+ req->poller = NULL;
+
+ _nvmf_tcp_qpair_abort_request(req);
+}
+
+#define SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH 128
+#define SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH 128
+#define SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR 128
+#define SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE 4096
+#define SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE 131072
+#define SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE 131072
+#define SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS 511
+#define SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE 32
+#define SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION true
+#define SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP false
+#define SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY 0
+#define SPDK_NVMF_TCP_DEFAULT_ABORT_TIMEOUT_SEC 1
+
+static void
+nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts)
+{
+ opts->max_queue_depth = SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH;
+ opts->max_qpairs_per_ctrlr = SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR;
+ opts->in_capsule_data_size = SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE;
+ opts->max_io_size = SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE;
+ opts->io_unit_size = SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE;
+ opts->max_aq_depth = SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH;
+ opts->num_shared_buffers = SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS;
+ opts->buf_cache_size = SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE;
+ opts->c2h_success = SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION;
+ opts->dif_insert_or_strip = SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP;
+ opts->sock_priority = SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY;
+ opts->abort_timeout_sec = SPDK_NVMF_TCP_DEFAULT_ABORT_TIMEOUT_SEC;
+}
+
+const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = {
+ .name = "TCP",
+ .type = SPDK_NVME_TRANSPORT_TCP,
+ .opts_init = nvmf_tcp_opts_init,
+ .create = nvmf_tcp_create,
+ .destroy = nvmf_tcp_destroy,
+
+ .listen = nvmf_tcp_listen,
+ .stop_listen = nvmf_tcp_stop_listen,
+ .accept = nvmf_tcp_accept,
+
+ .listener_discover = nvmf_tcp_discover,
+
+ .poll_group_create = nvmf_tcp_poll_group_create,
+ .get_optimal_poll_group = nvmf_tcp_get_optimal_poll_group,
+ .poll_group_destroy = nvmf_tcp_poll_group_destroy,
+ .poll_group_add = nvmf_tcp_poll_group_add,
+ .poll_group_remove = nvmf_tcp_poll_group_remove,
+ .poll_group_poll = nvmf_tcp_poll_group_poll,
+
+ .req_free = nvmf_tcp_req_free,
+ .req_complete = nvmf_tcp_req_complete,
+
+ .qpair_fini = nvmf_tcp_close_qpair,
+ .qpair_get_local_trid = nvmf_tcp_qpair_get_local_trid,
+ .qpair_get_peer_trid = nvmf_tcp_qpair_get_peer_trid,
+ .qpair_get_listen_trid = nvmf_tcp_qpair_get_listen_trid,
+ .qpair_abort_request = nvmf_tcp_qpair_abort_request,
+};
+
+SPDK_NVMF_TRANSPORT_REGISTER(tcp, &spdk_nvmf_transport_tcp);
+SPDK_LOG_REGISTER_COMPONENT("nvmf_tcp", SPDK_LOG_NVMF_TCP)
diff --git a/src/spdk/lib/nvmf/transport.c b/src/spdk/lib/nvmf/transport.c
new file mode 100644
index 000000000..11bb152df
--- /dev/null
+++ b/src/spdk/lib/nvmf/transport.c
@@ -0,0 +1,572 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2018-2019 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+#include "spdk/config.h"
+#include "spdk/log.h"
+#include "spdk/nvmf.h"
+#include "spdk/nvmf_transport.h"
+#include "spdk/queue.h"
+#include "spdk/util.h"
+
+#define MAX_MEMPOOL_NAME_LENGTH 40
+
+struct nvmf_transport_ops_list_element {
+ struct spdk_nvmf_transport_ops ops;
+ TAILQ_ENTRY(nvmf_transport_ops_list_element) link;
+};
+
+TAILQ_HEAD(nvmf_transport_ops_list, nvmf_transport_ops_list_element)
+g_spdk_nvmf_transport_ops = TAILQ_HEAD_INITIALIZER(g_spdk_nvmf_transport_ops);
+
+static inline const struct spdk_nvmf_transport_ops *
+nvmf_get_transport_ops(const char *transport_name)
+{
+ struct nvmf_transport_ops_list_element *ops;
+ TAILQ_FOREACH(ops, &g_spdk_nvmf_transport_ops, link) {
+ if (strcasecmp(transport_name, ops->ops.name) == 0) {
+ return &ops->ops;
+ }
+ }
+ return NULL;
+}
+
+void
+spdk_nvmf_transport_register(const struct spdk_nvmf_transport_ops *ops)
+{
+ struct nvmf_transport_ops_list_element *new_ops;
+
+ if (nvmf_get_transport_ops(ops->name) != NULL) {
+ SPDK_ERRLOG("Double registering nvmf transport type %s.\n", ops->name);
+ assert(false);
+ return;
+ }
+
+ new_ops = calloc(1, sizeof(*new_ops));
+ if (new_ops == NULL) {
+ SPDK_ERRLOG("Unable to allocate memory to register new transport type %s.\n", ops->name);
+ assert(false);
+ return;
+ }
+
+ new_ops->ops = *ops;
+
+ TAILQ_INSERT_TAIL(&g_spdk_nvmf_transport_ops, new_ops, link);
+}
+
+const struct spdk_nvmf_transport_opts *
+spdk_nvmf_get_transport_opts(struct spdk_nvmf_transport *transport)
+{
+ return &transport->opts;
+}
+
+spdk_nvme_transport_type_t
+spdk_nvmf_get_transport_type(struct spdk_nvmf_transport *transport)
+{
+ return transport->ops->type;
+}
+
+const char *
+spdk_nvmf_get_transport_name(struct spdk_nvmf_transport *transport)
+{
+ return transport->ops->name;
+}
+
+struct spdk_nvmf_transport *
+spdk_nvmf_transport_create(const char *transport_name, struct spdk_nvmf_transport_opts *opts)
+{
+ const struct spdk_nvmf_transport_ops *ops = NULL;
+ struct spdk_nvmf_transport *transport;
+ char spdk_mempool_name[MAX_MEMPOOL_NAME_LENGTH];
+ int chars_written;
+
+ ops = nvmf_get_transport_ops(transport_name);
+ if (!ops) {
+ SPDK_ERRLOG("Transport type '%s' unavailable.\n", transport_name);
+ return NULL;
+ }
+
+ if (opts->max_aq_depth < SPDK_NVMF_MIN_ADMIN_MAX_SQ_SIZE) {
+ SPDK_ERRLOG("max_aq_depth %u is less than minimum defined by NVMf spec, use min value\n",
+ opts->max_aq_depth);
+ opts->max_aq_depth = SPDK_NVMF_MIN_ADMIN_MAX_SQ_SIZE;
+ }
+
+ transport = ops->create(opts);
+ if (!transport) {
+ SPDK_ERRLOG("Unable to create new transport of type %s\n", transport_name);
+ return NULL;
+ }
+
+ TAILQ_INIT(&transport->listeners);
+
+ transport->ops = ops;
+ transport->opts = *opts;
+ chars_written = snprintf(spdk_mempool_name, MAX_MEMPOOL_NAME_LENGTH, "%s_%s_%s", "spdk_nvmf",
+ transport_name, "data");
+ if (chars_written < 0) {
+ SPDK_ERRLOG("Unable to generate transport data buffer pool name.\n");
+ ops->destroy(transport);
+ return NULL;
+ }
+
+ transport->data_buf_pool = spdk_mempool_create(spdk_mempool_name,
+ opts->num_shared_buffers,
+ opts->io_unit_size + NVMF_DATA_BUFFER_ALIGNMENT,
+ SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
+ SPDK_ENV_SOCKET_ID_ANY);
+
+ if (!transport->data_buf_pool) {
+ SPDK_ERRLOG("Unable to allocate buffer pool for poll group\n");
+ ops->destroy(transport);
+ return NULL;
+ }
+
+ return transport;
+}
+
+struct spdk_nvmf_transport *
+spdk_nvmf_transport_get_first(struct spdk_nvmf_tgt *tgt)
+{
+ return TAILQ_FIRST(&tgt->transports);
+}
+
+struct spdk_nvmf_transport *
+spdk_nvmf_transport_get_next(struct spdk_nvmf_transport *transport)
+{
+ return TAILQ_NEXT(transport, link);
+}
+
+int
+spdk_nvmf_transport_destroy(struct spdk_nvmf_transport *transport)
+{
+ if (transport->data_buf_pool != NULL) {
+ if (spdk_mempool_count(transport->data_buf_pool) !=
+ transport->opts.num_shared_buffers) {
+ SPDK_ERRLOG("transport buffer pool count is %zu but should be %u\n",
+ spdk_mempool_count(transport->data_buf_pool),
+ transport->opts.num_shared_buffers);
+ }
+ }
+
+ spdk_mempool_free(transport->data_buf_pool);
+
+ return transport->ops->destroy(transport);
+}
+
+struct spdk_nvmf_listener *
+nvmf_transport_find_listener(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_listener *listener;
+
+ TAILQ_FOREACH(listener, &transport->listeners, link) {
+ if (spdk_nvme_transport_id_compare(&listener->trid, trid) == 0) {
+ return listener;
+ }
+ }
+
+ return NULL;
+}
+
+int
+spdk_nvmf_transport_listen(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_listener *listener;
+ int rc;
+
+ listener = nvmf_transport_find_listener(transport, trid);
+ if (!listener) {
+ listener = calloc(1, sizeof(*listener));
+ if (!listener) {
+ return -ENOMEM;
+ }
+
+ listener->ref = 1;
+ listener->trid = *trid;
+ TAILQ_INSERT_TAIL(&transport->listeners, listener, link);
+
+ rc = transport->ops->listen(transport, &listener->trid);
+ if (rc != 0) {
+ TAILQ_REMOVE(&transport->listeners, listener, link);
+ free(listener);
+ }
+ return rc;
+ }
+
+ ++listener->ref;
+
+ return 0;
+}
+
+int
+spdk_nvmf_transport_stop_listen(struct spdk_nvmf_transport *transport,
+ const struct spdk_nvme_transport_id *trid)
+{
+ struct spdk_nvmf_listener *listener;
+
+ listener = nvmf_transport_find_listener(transport, trid);
+ if (!listener) {
+ return -ENOENT;
+ }
+
+ if (--listener->ref == 0) {
+ TAILQ_REMOVE(&transport->listeners, listener, link);
+ transport->ops->stop_listen(transport, trid);
+ free(listener);
+ }
+
+ return 0;
+}
+
+uint32_t
+nvmf_transport_accept(struct spdk_nvmf_transport *transport)
+{
+ return transport->ops->accept(transport);
+}
+
+void
+nvmf_transport_listener_discover(struct spdk_nvmf_transport *transport,
+ struct spdk_nvme_transport_id *trid,
+ struct spdk_nvmf_discovery_log_page_entry *entry)
+{
+ transport->ops->listener_discover(transport, trid, entry);
+}
+
+struct spdk_nvmf_transport_poll_group *
+nvmf_transport_poll_group_create(struct spdk_nvmf_transport *transport)
+{
+ struct spdk_nvmf_transport_poll_group *group;
+ struct spdk_nvmf_transport_pg_cache_buf *buf;
+
+ group = transport->ops->poll_group_create(transport);
+ if (!group) {
+ return NULL;
+ }
+ group->transport = transport;
+
+ STAILQ_INIT(&group->pending_buf_queue);
+ STAILQ_INIT(&group->buf_cache);
+
+ if (transport->opts.buf_cache_size) {
+ group->buf_cache_count = 0;
+ group->buf_cache_size = transport->opts.buf_cache_size;
+ while (group->buf_cache_count < group->buf_cache_size) {
+ buf = (struct spdk_nvmf_transport_pg_cache_buf *)spdk_mempool_get(transport->data_buf_pool);
+ if (!buf) {
+ SPDK_NOTICELOG("Unable to reserve the full number of buffers for the pg buffer cache.\n");
+ break;
+ }
+ STAILQ_INSERT_HEAD(&group->buf_cache, buf, link);
+ group->buf_cache_count++;
+ }
+ }
+ return group;
+}
+
+struct spdk_nvmf_transport_poll_group *
+nvmf_transport_get_optimal_poll_group(struct spdk_nvmf_transport *transport,
+ struct spdk_nvmf_qpair *qpair)
+{
+ if (transport->ops->get_optimal_poll_group) {
+ return transport->ops->get_optimal_poll_group(qpair);
+ } else {
+ return NULL;
+ }
+}
+
+void
+nvmf_transport_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
+{
+ struct spdk_nvmf_transport_pg_cache_buf *buf, *tmp;
+
+ if (!STAILQ_EMPTY(&group->pending_buf_queue)) {
+ SPDK_ERRLOG("Pending I/O list wasn't empty on poll group destruction\n");
+ }
+
+ STAILQ_FOREACH_SAFE(buf, &group->buf_cache, link, tmp) {
+ STAILQ_REMOVE(&group->buf_cache, buf, spdk_nvmf_transport_pg_cache_buf, link);
+ spdk_mempool_put(group->transport->data_buf_pool, buf);
+ }
+ group->transport->ops->poll_group_destroy(group);
+}
+
+int
+nvmf_transport_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_qpair *qpair)
+{
+ if (qpair->transport) {
+ assert(qpair->transport == group->transport);
+ if (qpair->transport != group->transport) {
+ return -1;
+ }
+ } else {
+ qpair->transport = group->transport;
+ }
+
+ return group->transport->ops->poll_group_add(group, qpair);
+}
+
+int
+nvmf_transport_poll_group_remove(struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_qpair *qpair)
+{
+ int rc = ENOTSUP;
+
+ assert(qpair->transport == group->transport);
+ if (group->transport->ops->poll_group_remove) {
+ rc = group->transport->ops->poll_group_remove(group, qpair);
+ }
+
+ return rc;
+}
+
+int
+nvmf_transport_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
+{
+ return group->transport->ops->poll_group_poll(group);
+}
+
+int
+nvmf_transport_req_free(struct spdk_nvmf_request *req)
+{
+ return req->qpair->transport->ops->req_free(req);
+}
+
+int
+nvmf_transport_req_complete(struct spdk_nvmf_request *req)
+{
+ return req->qpair->transport->ops->req_complete(req);
+}
+
+void
+nvmf_transport_qpair_fini(struct spdk_nvmf_qpair *qpair)
+{
+ qpair->transport->ops->qpair_fini(qpair);
+}
+
+int
+nvmf_transport_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ return qpair->transport->ops->qpair_get_peer_trid(qpair, trid);
+}
+
+int
+nvmf_transport_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ return qpair->transport->ops->qpair_get_local_trid(qpair, trid);
+}
+
+int
+nvmf_transport_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid)
+{
+ return qpair->transport->ops->qpair_get_listen_trid(qpair, trid);
+}
+
+void
+nvmf_transport_qpair_abort_request(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvmf_request *req)
+{
+ qpair->transport->ops->qpair_abort_request(qpair, req);
+}
+
+bool
+spdk_nvmf_transport_opts_init(const char *transport_name,
+ struct spdk_nvmf_transport_opts *opts)
+{
+ const struct spdk_nvmf_transport_ops *ops;
+
+ ops = nvmf_get_transport_ops(transport_name);
+ if (!ops) {
+ SPDK_ERRLOG("Transport type %s unavailable.\n", transport_name);
+ return false;
+ }
+
+ ops->opts_init(opts);
+ return true;
+}
+
+int
+spdk_nvmf_transport_poll_group_get_stat(struct spdk_nvmf_tgt *tgt,
+ struct spdk_nvmf_transport *transport,
+ struct spdk_nvmf_transport_poll_group_stat **stat)
+{
+ if (transport->ops->poll_group_get_stat) {
+ return transport->ops->poll_group_get_stat(tgt, stat);
+ } else {
+ return -ENOTSUP;
+ }
+}
+
+void
+spdk_nvmf_transport_poll_group_free_stat(struct spdk_nvmf_transport *transport,
+ struct spdk_nvmf_transport_poll_group_stat *stat)
+{
+ if (transport->ops->poll_group_free_stat) {
+ transport->ops->poll_group_free_stat(stat);
+ }
+}
+
+void
+spdk_nvmf_request_free_buffers(struct spdk_nvmf_request *req,
+ struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_transport *transport)
+{
+ uint32_t i;
+
+ for (i = 0; i < req->iovcnt; i++) {
+ if (group->buf_cache_count < group->buf_cache_size) {
+ STAILQ_INSERT_HEAD(&group->buf_cache,
+ (struct spdk_nvmf_transport_pg_cache_buf *)req->buffers[i],
+ link);
+ group->buf_cache_count++;
+ } else {
+ spdk_mempool_put(transport->data_buf_pool, req->buffers[i]);
+ }
+ req->iov[i].iov_base = NULL;
+ req->buffers[i] = NULL;
+ req->iov[i].iov_len = 0;
+ }
+ req->data_from_pool = false;
+}
+
+static inline int
+nvmf_request_set_buffer(struct spdk_nvmf_request *req, void *buf, uint32_t length,
+ uint32_t io_unit_size)
+{
+ req->buffers[req->iovcnt] = buf;
+ req->iov[req->iovcnt].iov_base = (void *)((uintptr_t)(buf + NVMF_DATA_BUFFER_MASK) &
+ ~NVMF_DATA_BUFFER_MASK);
+ req->iov[req->iovcnt].iov_len = spdk_min(length, io_unit_size);
+ length -= req->iov[req->iovcnt].iov_len;
+ req->iovcnt++;
+
+ return length;
+}
+
+static int
+nvmf_request_get_buffers(struct spdk_nvmf_request *req,
+ struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_transport *transport,
+ uint32_t length)
+{
+ uint32_t io_unit_size = transport->opts.io_unit_size;
+ uint32_t num_buffers;
+ uint32_t i = 0, j;
+ void *buffer, *buffers[NVMF_REQ_MAX_BUFFERS];
+
+ /* If the number of buffers is too large, then we know the I/O is larger than allowed.
+ * Fail it.
+ */
+ num_buffers = SPDK_CEIL_DIV(length, io_unit_size);
+ if (num_buffers + req->iovcnt > NVMF_REQ_MAX_BUFFERS) {
+ return -EINVAL;
+ }
+
+ while (i < num_buffers) {
+ if (!(STAILQ_EMPTY(&group->buf_cache))) {
+ group->buf_cache_count--;
+ buffer = STAILQ_FIRST(&group->buf_cache);
+ STAILQ_REMOVE_HEAD(&group->buf_cache, link);
+ assert(buffer != NULL);
+
+ length = nvmf_request_set_buffer(req, buffer, length, io_unit_size);
+ i++;
+ } else {
+ if (spdk_mempool_get_bulk(transport->data_buf_pool, buffers,
+ num_buffers - i)) {
+ return -ENOMEM;
+ }
+ for (j = 0; j < num_buffers - i; j++) {
+ length = nvmf_request_set_buffer(req, buffers[j], length, io_unit_size);
+ }
+ i += num_buffers - i;
+ }
+ }
+
+ assert(length == 0);
+
+ req->data_from_pool = true;
+ return 0;
+}
+
+int
+spdk_nvmf_request_get_buffers(struct spdk_nvmf_request *req,
+ struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_transport *transport,
+ uint32_t length)
+{
+ int rc;
+
+ req->iovcnt = 0;
+
+ rc = nvmf_request_get_buffers(req, group, transport, length);
+ if (rc == -ENOMEM) {
+ spdk_nvmf_request_free_buffers(req, group, transport);
+ }
+
+ return rc;
+}
+
+int
+spdk_nvmf_request_get_buffers_multi(struct spdk_nvmf_request *req,
+ struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_transport *transport,
+ uint32_t *lengths, uint32_t num_lengths)
+{
+ int rc = 0;
+ uint32_t i;
+
+ req->iovcnt = 0;
+
+ for (i = 0; i < num_lengths; i++) {
+ rc = nvmf_request_get_buffers(req, group, transport, lengths[i]);
+ if (rc != 0) {
+ goto err_exit;
+ }
+ }
+
+ return 0;
+
+err_exit:
+ spdk_nvmf_request_free_buffers(req, group, transport);
+ return rc;
+}
diff --git a/src/spdk/lib/nvmf/transport.h b/src/spdk/lib/nvmf/transport.h
new file mode 100644
index 000000000..38b5d8db3
--- /dev/null
+++ b/src/spdk/lib/nvmf/transport.h
@@ -0,0 +1,82 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_NVMF_TRANSPORT_H
+#define SPDK_NVMF_TRANSPORT_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/nvme.h"
+#include "spdk/nvmf.h"
+#include "spdk/nvmf_transport.h"
+
+uint32_t nvmf_transport_accept(struct spdk_nvmf_transport *transport);
+
+void nvmf_transport_listener_discover(struct spdk_nvmf_transport *transport,
+ struct spdk_nvme_transport_id *trid,
+ struct spdk_nvmf_discovery_log_page_entry *entry);
+
+struct spdk_nvmf_transport_poll_group *nvmf_transport_poll_group_create(
+ struct spdk_nvmf_transport *transport);
+struct spdk_nvmf_transport_poll_group *nvmf_transport_get_optimal_poll_group(
+ struct spdk_nvmf_transport *transport, struct spdk_nvmf_qpair *qpair);
+
+void nvmf_transport_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group);
+
+int nvmf_transport_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_qpair *qpair);
+
+int nvmf_transport_poll_group_remove(struct spdk_nvmf_transport_poll_group *group,
+ struct spdk_nvmf_qpair *qpair);
+
+int nvmf_transport_poll_group_poll(struct spdk_nvmf_transport_poll_group *group);
+
+int nvmf_transport_req_free(struct spdk_nvmf_request *req);
+
+int nvmf_transport_req_complete(struct spdk_nvmf_request *req);
+
+void nvmf_transport_qpair_fini(struct spdk_nvmf_qpair *qpair);
+
+int nvmf_transport_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid);
+
+int nvmf_transport_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid);
+
+int nvmf_transport_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvme_transport_id *trid);
+
+void nvmf_transport_qpair_abort_request(struct spdk_nvmf_qpair *qpair,
+ struct spdk_nvmf_request *req);
+
+#endif /* SPDK_NVMF_TRANSPORT_H */