diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
commit | 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch) | |
tree | 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/spdk/lib/nvmf | |
parent | Initial commit. (diff) | |
download | ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip |
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/lib/nvmf')
-rw-r--r-- | src/spdk/lib/nvmf/Makefile | 75 | ||||
-rw-r--r-- | src/spdk/lib/nvmf/ctrlr.c | 3224 | ||||
-rw-r--r-- | src/spdk/lib/nvmf/ctrlr_bdev.c | 761 | ||||
-rw-r--r-- | src/spdk/lib/nvmf/ctrlr_discovery.c | 159 | ||||
-rw-r--r-- | src/spdk/lib/nvmf/fc.c | 3957 | ||||
-rw-r--r-- | src/spdk/lib/nvmf/fc_ls.c | 1678 | ||||
-rw-r--r-- | src/spdk/lib/nvmf/nvmf.c | 1457 | ||||
-rw-r--r-- | src/spdk/lib/nvmf/nvmf_fc.h | 999 | ||||
-rw-r--r-- | src/spdk/lib/nvmf/nvmf_internal.h | 371 | ||||
-rw-r--r-- | src/spdk/lib/nvmf/nvmf_rpc.c | 2012 | ||||
-rw-r--r-- | src/spdk/lib/nvmf/rdma.c | 4313 | ||||
-rw-r--r-- | src/spdk/lib/nvmf/spdk_nvmf.map | 118 | ||||
-rw-r--r-- | src/spdk/lib/nvmf/subsystem.c | 2515 | ||||
-rw-r--r-- | src/spdk/lib/nvmf/tcp.c | 2631 | ||||
-rw-r--r-- | src/spdk/lib/nvmf/transport.c | 572 | ||||
-rw-r--r-- | src/spdk/lib/nvmf/transport.h | 82 |
16 files changed, 24924 insertions, 0 deletions
diff --git a/src/spdk/lib/nvmf/Makefile b/src/spdk/lib/nvmf/Makefile new file mode 100644 index 000000000..b4556564a --- /dev/null +++ b/src/spdk/lib/nvmf/Makefile @@ -0,0 +1,75 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +SO_VER := 5 +SO_MINOR := 0 + +C_SRCS = ctrlr.c ctrlr_discovery.c ctrlr_bdev.c \ + subsystem.c nvmf.c nvmf_rpc.c transport.c tcp.c + +C_SRCS-$(CONFIG_RDMA) += rdma.c +LIBNAME = nvmf +LOCAL_SYS_LIBS = -luuid +ifeq ($(CONFIG_RDMA),y) +LOCAL_SYS_LIBS += -libverbs -lrdmacm +#Attach only if FreeBSD and RDMA is specified with configure +ifeq ($(OS),FreeBSD) +# Mellanox - MLX4 HBA Userspace Library +ifneq ("$(wildcard /usr/lib/libmlx4.*)","") +LOCAL_SYS_LIBS += -lmlx4 +endif +# Mellanox - MLX5 HBA Userspace Library +ifneq ("$(wildcard /usr/lib/libmlx5.*)","") +LOCAL_SYS_LIBS += -lmlx5 +endif +# Chelsio HBA Userspace Library +ifneq ("$(wildcard /usr/lib/libcxgb4.*)","") +LOCAL_SYS_LIBS += -lcxgb4 +endif +endif +endif + +ifeq ($(CONFIG_FC),y) +C_SRCS += fc.c fc_ls.c +CFLAGS += -I$(CURDIR) +ifneq ($(strip $(CONFIG_FC_PATH)),) +CFLAGS += -I$(CONFIG_FC_PATH) +endif +endif + +SPDK_MAP_FILE = $(abspath $(CURDIR)/spdk_nvmf.map) + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/src/spdk/lib/nvmf/ctrlr.c b/src/spdk/lib/nvmf/ctrlr.c new file mode 100644 index 000000000..638cde9d2 --- /dev/null +++ b/src/spdk/lib/nvmf/ctrlr.c @@ -0,0 +1,3224 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "nvmf_internal.h" +#include "transport.h" + +#include "spdk/bit_array.h" +#include "spdk/endian.h" +#include "spdk/thread.h" +#include "spdk/trace.h" +#include "spdk/nvme_spec.h" +#include "spdk/nvmf_cmd.h" +#include "spdk/string.h" +#include "spdk/util.h" +#include "spdk/version.h" + +#include "spdk_internal/log.h" + +#define MIN_KEEP_ALIVE_TIMEOUT_IN_MS 10000 +#define NVMF_DISC_KATO_IN_MS 120000 +#define KAS_TIME_UNIT_IN_MS 100 +#define KAS_DEFAULT_VALUE (MIN_KEEP_ALIVE_TIMEOUT_IN_MS / KAS_TIME_UNIT_IN_MS) + +/* + * Report the SPDK version as the firmware revision. + * SPDK_VERSION_STRING won't fit into FR (only 8 bytes), so try to fit the most important parts. + */ +#define FW_VERSION SPDK_VERSION_MAJOR_STRING SPDK_VERSION_MINOR_STRING SPDK_VERSION_PATCH_STRING + +/* + * Support for custom admin command handlers + */ +struct spdk_nvmf_custom_admin_cmd { + spdk_nvmf_custom_cmd_hdlr hdlr; + uint32_t nsid; /* nsid to forward */ +}; + +static struct spdk_nvmf_custom_admin_cmd g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_MAX_OPC + 1]; + +static void _nvmf_request_complete(void *ctx); + +static inline void +nvmf_invalid_connect_response(struct spdk_nvmf_fabric_connect_rsp *rsp, + uint8_t iattr, uint16_t ipo) +{ + rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; + rsp->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + rsp->status_code_specific.invalid.iattr = iattr; + rsp->status_code_specific.invalid.ipo = ipo; +} + +#define SPDK_NVMF_INVALID_CONNECT_CMD(rsp, field) \ + nvmf_invalid_connect_response(rsp, 0, offsetof(struct spdk_nvmf_fabric_connect_cmd, field)) +#define SPDK_NVMF_INVALID_CONNECT_DATA(rsp, field) \ + nvmf_invalid_connect_response(rsp, 1, offsetof(struct spdk_nvmf_fabric_connect_data, field)) + +static void +nvmf_ctrlr_stop_keep_alive_timer(struct spdk_nvmf_ctrlr *ctrlr) +{ + if (!ctrlr) { + SPDK_ERRLOG("Controller is NULL\n"); + return; + } + + if (ctrlr->keep_alive_poller == NULL) { + return; + } + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Stop keep alive poller\n"); + spdk_poller_unregister(&ctrlr->keep_alive_poller); +} + +static void +nvmf_ctrlr_disconnect_qpairs_done(struct spdk_io_channel_iter *i, int status) +{ + if (status == 0) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ctrlr disconnect qpairs complete successfully\n"); + } else { + SPDK_ERRLOG("Fail to disconnect ctrlr qpairs\n"); + } +} + +static int +_nvmf_ctrlr_disconnect_qpairs_on_pg(struct spdk_io_channel_iter *i, bool include_admin) +{ + int rc = 0; + struct spdk_nvmf_ctrlr *ctrlr; + struct spdk_nvmf_qpair *qpair, *temp_qpair; + struct spdk_io_channel *ch; + struct spdk_nvmf_poll_group *group; + + ctrlr = spdk_io_channel_iter_get_ctx(i); + ch = spdk_io_channel_iter_get_channel(i); + group = spdk_io_channel_get_ctx(ch); + + TAILQ_FOREACH_SAFE(qpair, &group->qpairs, link, temp_qpair) { + if (qpair->ctrlr == ctrlr && (include_admin || !nvmf_qpair_is_admin_queue(qpair))) { + rc = spdk_nvmf_qpair_disconnect(qpair, NULL, NULL); + if (rc) { + SPDK_ERRLOG("Qpair disconnect failed\n"); + return rc; + } + } + } + + return rc; +} + +static void +nvmf_ctrlr_disconnect_qpairs_on_pg(struct spdk_io_channel_iter *i) +{ + spdk_for_each_channel_continue(i, _nvmf_ctrlr_disconnect_qpairs_on_pg(i, true)); +} + +static void +nvmf_ctrlr_disconnect_io_qpairs_on_pg(struct spdk_io_channel_iter *i) +{ + spdk_for_each_channel_continue(i, _nvmf_ctrlr_disconnect_qpairs_on_pg(i, false)); +} + +static int +nvmf_ctrlr_keep_alive_poll(void *ctx) +{ + uint64_t keep_alive_timeout_tick; + uint64_t now = spdk_get_ticks(); + struct spdk_nvmf_ctrlr *ctrlr = ctx; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Polling ctrlr keep alive timeout\n"); + + /* If the Keep alive feature is in use and the timer expires */ + keep_alive_timeout_tick = ctrlr->last_keep_alive_tick + + ctrlr->feat.keep_alive_timer.bits.kato * spdk_get_ticks_hz() / UINT64_C(1000); + if (now > keep_alive_timeout_tick) { + SPDK_NOTICELOG("Disconnecting host from subsystem %s due to keep alive timeout.\n", + ctrlr->subsys->subnqn); + /* set the Controller Fatal Status bit to '1' */ + if (ctrlr->vcprop.csts.bits.cfs == 0) { + ctrlr->vcprop.csts.bits.cfs = 1; + + /* + * disconnect qpairs, terminate Transport connection + * destroy ctrlr, break the host to controller association + * disconnect qpairs with qpair->ctrlr == ctrlr + */ + spdk_for_each_channel(ctrlr->subsys->tgt, + nvmf_ctrlr_disconnect_qpairs_on_pg, + ctrlr, + nvmf_ctrlr_disconnect_qpairs_done); + } + } + + return SPDK_POLLER_BUSY; +} + +static void +nvmf_ctrlr_start_keep_alive_timer(struct spdk_nvmf_ctrlr *ctrlr) +{ + if (!ctrlr) { + SPDK_ERRLOG("Controller is NULL\n"); + return; + } + + /* if cleared to 0 then the Keep Alive Timer is disabled */ + if (ctrlr->feat.keep_alive_timer.bits.kato != 0) { + + ctrlr->last_keep_alive_tick = spdk_get_ticks(); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Ctrlr add keep alive poller\n"); + ctrlr->keep_alive_poller = SPDK_POLLER_REGISTER(nvmf_ctrlr_keep_alive_poll, ctrlr, + ctrlr->feat.keep_alive_timer.bits.kato * 1000); + } +} + +static void +ctrlr_add_qpair_and_update_rsp(struct spdk_nvmf_qpair *qpair, + struct spdk_nvmf_ctrlr *ctrlr, + struct spdk_nvmf_fabric_connect_rsp *rsp) +{ + assert(ctrlr->admin_qpair->group->thread == spdk_get_thread()); + + /* check if we would exceed ctrlr connection limit */ + if (qpair->qid >= spdk_bit_array_capacity(ctrlr->qpair_mask)) { + SPDK_ERRLOG("Requested QID %u but Max QID is %u\n", + qpair->qid, spdk_bit_array_capacity(ctrlr->qpair_mask) - 1); + rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; + rsp->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; + return; + } + + if (spdk_bit_array_get(ctrlr->qpair_mask, qpair->qid)) { + SPDK_ERRLOG("Got I/O connect with duplicate QID %u\n", qpair->qid); + rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; + rsp->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; + return; + } + + qpair->ctrlr = ctrlr; + spdk_bit_array_set(ctrlr->qpair_mask, qpair->qid); + + rsp->status.sc = SPDK_NVME_SC_SUCCESS; + rsp->status_code_specific.success.cntlid = ctrlr->cntlid; + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "connect capsule response: cntlid = 0x%04x\n", + rsp->status_code_specific.success.cntlid); +} + +static void +_nvmf_ctrlr_add_admin_qpair(void *ctx) +{ + struct spdk_nvmf_request *req = ctx; + struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp; + struct spdk_nvmf_qpair *qpair = req->qpair; + struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr; + + ctrlr->admin_qpair = qpair; + nvmf_ctrlr_start_keep_alive_timer(ctrlr); + ctrlr_add_qpair_and_update_rsp(qpair, ctrlr, rsp); + _nvmf_request_complete(req); +} + +static void +_nvmf_subsystem_add_ctrlr(void *ctx) +{ + struct spdk_nvmf_request *req = ctx; + struct spdk_nvmf_qpair *qpair = req->qpair; + struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp; + struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr; + + if (nvmf_subsystem_add_ctrlr(ctrlr->subsys, ctrlr)) { + SPDK_ERRLOG("Unable to add controller to subsystem\n"); + spdk_bit_array_free(&ctrlr->qpair_mask); + free(ctrlr); + qpair->ctrlr = NULL; + rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + spdk_nvmf_request_complete(req); + return; + } + + spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_add_admin_qpair, req); +} + +static void +nvmf_ctrlr_cdata_init(struct spdk_nvmf_transport *transport, struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvmf_ctrlr_data *cdata) +{ + cdata->kas = KAS_DEFAULT_VALUE; + cdata->sgls.supported = 1; + cdata->sgls.keyed_sgl = 1; + cdata->sgls.sgl_offset = 1; + cdata->nvmf_specific.ioccsz = sizeof(struct spdk_nvme_cmd) / 16; + cdata->nvmf_specific.ioccsz += transport->opts.in_capsule_data_size / 16; + cdata->nvmf_specific.iorcsz = sizeof(struct spdk_nvme_cpl) / 16; + cdata->nvmf_specific.icdoff = 0; /* offset starts directly after SQE */ + cdata->nvmf_specific.ctrattr.ctrlr_model = SPDK_NVMF_CTRLR_MODEL_DYNAMIC; + cdata->nvmf_specific.msdbd = 1; + + if (transport->ops->cdata_init) { + transport->ops->cdata_init(transport, subsystem, cdata); + } +} + +static struct spdk_nvmf_ctrlr * +nvmf_ctrlr_create(struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvmf_request *req, + struct spdk_nvmf_fabric_connect_cmd *connect_cmd, + struct spdk_nvmf_fabric_connect_data *connect_data) +{ + struct spdk_nvmf_ctrlr *ctrlr; + struct spdk_nvmf_transport *transport; + + ctrlr = calloc(1, sizeof(*ctrlr)); + if (ctrlr == NULL) { + SPDK_ERRLOG("Memory allocation failed\n"); + return NULL; + } + + TAILQ_INIT(&ctrlr->log_head); + ctrlr->subsys = subsystem; + ctrlr->thread = req->qpair->group->thread; + + transport = req->qpair->transport; + ctrlr->qpair_mask = spdk_bit_array_create(transport->opts.max_qpairs_per_ctrlr); + if (!ctrlr->qpair_mask) { + SPDK_ERRLOG("Failed to allocate controller qpair mask\n"); + free(ctrlr); + return NULL; + } + + nvmf_ctrlr_cdata_init(transport, subsystem, &ctrlr->cdata); + + /* + * KAS: This field indicates the granularity of the Keep Alive Timer in 100ms units. + * If this field is cleared to 0h, then Keep Alive is not supported. + */ + if (ctrlr->cdata.kas) { + ctrlr->feat.keep_alive_timer.bits.kato = spdk_divide_round_up(connect_cmd->kato, + KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS) * + KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS; + } + + ctrlr->feat.async_event_configuration.bits.ns_attr_notice = 1; + ctrlr->feat.volatile_write_cache.bits.wce = 1; + + if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) { + /* + * If keep-alive timeout is not set, discovery controllers use some + * arbitrary high value in order to cleanup stale discovery sessions + * + * From the 1.0a nvme-of spec: + * "The Keep Alive command is reserved for + * Discovery controllers. A transport may specify a + * fixed Discovery controller activity timeout value + * (e.g., 2 minutes). If no commands are received + * by a Discovery controller within that time + * period, the controller may perform the + * actions for Keep Alive Timer expiration". + * kato is in millisecond. + */ + if (ctrlr->feat.keep_alive_timer.bits.kato == 0) { + ctrlr->feat.keep_alive_timer.bits.kato = NVMF_DISC_KATO_IN_MS; + } + } + + /* Subtract 1 for admin queue, 1 for 0's based */ + ctrlr->feat.number_of_queues.bits.ncqr = transport->opts.max_qpairs_per_ctrlr - 1 - + 1; + ctrlr->feat.number_of_queues.bits.nsqr = transport->opts.max_qpairs_per_ctrlr - 1 - + 1; + + spdk_uuid_copy(&ctrlr->hostid, (struct spdk_uuid *)connect_data->hostid); + memcpy(ctrlr->hostnqn, connect_data->hostnqn, sizeof(ctrlr->hostnqn)); + + ctrlr->vcprop.cap.raw = 0; + ctrlr->vcprop.cap.bits.cqr = 1; /* NVMe-oF specification required */ + ctrlr->vcprop.cap.bits.mqes = transport->opts.max_queue_depth - + 1; /* max queue depth */ + ctrlr->vcprop.cap.bits.ams = 0; /* optional arb mechanisms */ + ctrlr->vcprop.cap.bits.to = 1; /* ready timeout - 500 msec units */ + ctrlr->vcprop.cap.bits.dstrd = 0; /* fixed to 0 for NVMe-oF */ + ctrlr->vcprop.cap.bits.css = SPDK_NVME_CAP_CSS_NVM; /* NVM command set */ + ctrlr->vcprop.cap.bits.mpsmin = 0; /* 2 ^ (12 + mpsmin) == 4k */ + ctrlr->vcprop.cap.bits.mpsmax = 0; /* 2 ^ (12 + mpsmax) == 4k */ + + /* Version Supported: 1.3 */ + ctrlr->vcprop.vs.bits.mjr = 1; + ctrlr->vcprop.vs.bits.mnr = 3; + ctrlr->vcprop.vs.bits.ter = 0; + + ctrlr->vcprop.cc.raw = 0; + ctrlr->vcprop.cc.bits.en = 0; /* Init controller disabled */ + + ctrlr->vcprop.csts.raw = 0; + ctrlr->vcprop.csts.bits.rdy = 0; /* Init controller as not ready */ + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "cap 0x%" PRIx64 "\n", ctrlr->vcprop.cap.raw); + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "vs 0x%x\n", ctrlr->vcprop.vs.raw); + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "cc 0x%x\n", ctrlr->vcprop.cc.raw); + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "csts 0x%x\n", ctrlr->vcprop.csts.raw); + + ctrlr->dif_insert_or_strip = transport->opts.dif_insert_or_strip; + + req->qpair->ctrlr = ctrlr; + spdk_thread_send_msg(subsystem->thread, _nvmf_subsystem_add_ctrlr, req); + + return ctrlr; +} + +static void +_nvmf_ctrlr_destruct(void *ctx) +{ + struct spdk_nvmf_ctrlr *ctrlr = ctx; + struct spdk_nvmf_reservation_log *log, *log_tmp; + + nvmf_ctrlr_stop_keep_alive_timer(ctrlr); + + TAILQ_FOREACH_SAFE(log, &ctrlr->log_head, link, log_tmp) { + TAILQ_REMOVE(&ctrlr->log_head, log, link); + free(log); + } + free(ctrlr); +} + +void +nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr) +{ + nvmf_subsystem_remove_ctrlr(ctrlr->subsys, ctrlr); + + spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_destruct, ctrlr); +} + +static void +nvmf_ctrlr_add_io_qpair(void *ctx) +{ + struct spdk_nvmf_request *req = ctx; + struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp; + struct spdk_nvmf_qpair *qpair = req->qpair; + struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr; + + /* Unit test will check qpair->ctrlr after calling spdk_nvmf_ctrlr_connect. + * For error case, the value should be NULL. So set it to NULL at first. + */ + qpair->ctrlr = NULL; + + if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) { + SPDK_ERRLOG("I/O connect not allowed on discovery controller\n"); + SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid); + goto end; + } + + if (!ctrlr->vcprop.cc.bits.en) { + SPDK_ERRLOG("Got I/O connect before ctrlr was enabled\n"); + SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid); + goto end; + } + + if (1u << ctrlr->vcprop.cc.bits.iosqes != sizeof(struct spdk_nvme_cmd)) { + SPDK_ERRLOG("Got I/O connect with invalid IOSQES %u\n", + ctrlr->vcprop.cc.bits.iosqes); + SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid); + goto end; + } + + if (1u << ctrlr->vcprop.cc.bits.iocqes != sizeof(struct spdk_nvme_cpl)) { + SPDK_ERRLOG("Got I/O connect with invalid IOCQES %u\n", + ctrlr->vcprop.cc.bits.iocqes); + SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid); + goto end; + } + + ctrlr_add_qpair_and_update_rsp(qpair, ctrlr, rsp); +end: + spdk_nvmf_request_complete(req); +} + +static void +_nvmf_ctrlr_add_io_qpair(void *ctx) +{ + struct spdk_nvmf_request *req = ctx; + struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp; + struct spdk_nvmf_fabric_connect_data *data = req->data; + struct spdk_nvmf_ctrlr *ctrlr; + struct spdk_nvmf_qpair *qpair = req->qpair; + struct spdk_nvmf_qpair *admin_qpair; + struct spdk_nvmf_tgt *tgt = qpair->transport->tgt; + struct spdk_nvmf_subsystem *subsystem; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Connect I/O Queue for controller id 0x%x\n", data->cntlid); + + subsystem = spdk_nvmf_tgt_find_subsystem(tgt, data->subnqn); + /* We already checked this in spdk_nvmf_ctrlr_connect */ + assert(subsystem != NULL); + + ctrlr = nvmf_subsystem_get_ctrlr(subsystem, data->cntlid); + if (ctrlr == NULL) { + SPDK_ERRLOG("Unknown controller ID 0x%x\n", data->cntlid); + SPDK_NVMF_INVALID_CONNECT_DATA(rsp, cntlid); + spdk_nvmf_request_complete(req); + return; + } + + admin_qpair = ctrlr->admin_qpair; + qpair->ctrlr = ctrlr; + spdk_thread_send_msg(admin_qpair->group->thread, nvmf_ctrlr_add_io_qpair, req); +} + +static bool +nvmf_qpair_access_allowed(struct spdk_nvmf_qpair *qpair, struct spdk_nvmf_subsystem *subsystem, + const char *hostnqn) +{ + struct spdk_nvme_transport_id listen_trid = {}; + + if (!spdk_nvmf_subsystem_host_allowed(subsystem, hostnqn)) { + SPDK_ERRLOG("Subsystem '%s' does not allow host '%s'\n", subsystem->subnqn, hostnqn); + return false; + } + + if (spdk_nvmf_qpair_get_listen_trid(qpair, &listen_trid)) { + SPDK_ERRLOG("Subsystem '%s' is unable to enforce access control due to an internal error.\n", + subsystem->subnqn); + return false; + } + + if (!spdk_nvmf_subsystem_listener_allowed(subsystem, &listen_trid)) { + SPDK_ERRLOG("Subsystem '%s' does not allow host '%s' to connect at this address.\n", + subsystem->subnqn, hostnqn); + return false; + } + + return true; +} + +static int +_nvmf_ctrlr_connect(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_fabric_connect_data *data = req->data; + struct spdk_nvmf_fabric_connect_cmd *cmd = &req->cmd->connect_cmd; + struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp; + struct spdk_nvmf_qpair *qpair = req->qpair; + struct spdk_nvmf_transport *transport = qpair->transport; + struct spdk_nvmf_ctrlr *ctrlr; + struct spdk_nvmf_subsystem *subsystem; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "recfmt 0x%x qid %u sqsize %u\n", + cmd->recfmt, cmd->qid, cmd->sqsize); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Connect data:\n"); + SPDK_DEBUGLOG(SPDK_LOG_NVMF, " cntlid: 0x%04x\n", data->cntlid); + SPDK_DEBUGLOG(SPDK_LOG_NVMF, " hostid: %08x-%04x-%04x-%02x%02x-%04x%08x ***\n", + ntohl(*(uint32_t *)&data->hostid[0]), + ntohs(*(uint16_t *)&data->hostid[4]), + ntohs(*(uint16_t *)&data->hostid[6]), + data->hostid[8], + data->hostid[9], + ntohs(*(uint16_t *)&data->hostid[10]), + ntohl(*(uint32_t *)&data->hostid[12])); + SPDK_DEBUGLOG(SPDK_LOG_NVMF, " subnqn: \"%s\"\n", data->subnqn); + SPDK_DEBUGLOG(SPDK_LOG_NVMF, " hostnqn: \"%s\"\n", data->hostnqn); + + subsystem = spdk_nvmf_tgt_find_subsystem(transport->tgt, data->subnqn); + if (!subsystem) { + SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn); + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + if (cmd->recfmt != 0) { + SPDK_ERRLOG("Connect command unsupported RECFMT %u\n", cmd->recfmt); + rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; + rsp->status.sc = SPDK_NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + /* + * SQSIZE is a 0-based value, so it must be at least 1 (minimum queue depth is 2) and + * strictly less than max_aq_depth (admin queues) or max_queue_depth (io queues). + */ + if (cmd->sqsize == 0) { + SPDK_ERRLOG("Invalid SQSIZE = 0\n"); + SPDK_NVMF_INVALID_CONNECT_CMD(rsp, sqsize); + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + if (cmd->qid == 0) { + if (cmd->sqsize >= transport->opts.max_aq_depth) { + SPDK_ERRLOG("Invalid SQSIZE for admin queue %u (min 1, max %u)\n", + cmd->sqsize, transport->opts.max_aq_depth - 1); + SPDK_NVMF_INVALID_CONNECT_CMD(rsp, sqsize); + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + } else if (cmd->sqsize >= transport->opts.max_queue_depth) { + SPDK_ERRLOG("Invalid SQSIZE %u (min 1, max %u)\n", + cmd->sqsize, transport->opts.max_queue_depth - 1); + SPDK_NVMF_INVALID_CONNECT_CMD(rsp, sqsize); + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + qpair->sq_head_max = cmd->sqsize; + qpair->qid = cmd->qid; + + if (0 == qpair->qid) { + qpair->group->stat.admin_qpairs++; + } else { + qpair->group->stat.io_qpairs++; + } + + if (cmd->qid == 0) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Connect Admin Queue for controller ID 0x%x\n", data->cntlid); + + if (data->cntlid != 0xFFFF) { + /* This NVMf target only supports dynamic mode. */ + SPDK_ERRLOG("The NVMf target only supports dynamic mode (CNTLID = 0x%x).\n", data->cntlid); + SPDK_NVMF_INVALID_CONNECT_DATA(rsp, cntlid); + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + /* Establish a new ctrlr */ + ctrlr = nvmf_ctrlr_create(subsystem, req, cmd, data); + if (!ctrlr) { + SPDK_ERRLOG("nvmf_ctrlr_create() failed\n"); + rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } else { + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; + } + } else { + spdk_thread_send_msg(subsystem->thread, _nvmf_ctrlr_add_io_qpair, req); + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; + } +} + +static inline bool +nvmf_request_is_fabric_connect(struct spdk_nvmf_request *req) +{ + return req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC && + req->cmd->nvmf_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_CONNECT; +} + +static struct spdk_nvmf_subsystem_poll_group * +nvmf_subsystem_pg_from_connect_cmd(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_fabric_connect_data *data; + struct spdk_nvmf_subsystem *subsystem; + struct spdk_nvmf_tgt *tgt; + + assert(nvmf_request_is_fabric_connect(req)); + assert(req->qpair->ctrlr == NULL); + + data = req->data; + tgt = req->qpair->transport->tgt; + + subsystem = spdk_nvmf_tgt_find_subsystem(tgt, data->subnqn); + if (subsystem == NULL) { + return NULL; + } + + return &req->qpair->group->sgroups[subsystem->id]; +} + +int +spdk_nvmf_ctrlr_connect(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp; + struct spdk_nvmf_qpair *qpair = req->qpair; + struct spdk_nvmf_subsystem_poll_group *sgroup; + enum spdk_nvmf_request_exec_status status; + + sgroup = nvmf_subsystem_pg_from_connect_cmd(req); + if (!sgroup) { + SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn); + status = SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + goto out; + } + + sgroup->io_outstanding++; + TAILQ_INSERT_TAIL(&qpair->outstanding, req, link); + + status = _nvmf_ctrlr_connect(req); + +out: + if (status == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) { + _nvmf_request_complete(req); + } + + return status; +} + +static int +nvmf_ctrlr_cmd_connect(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_fabric_connect_data *data = req->data; + struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp; + struct spdk_nvmf_transport *transport = req->qpair->transport; + struct spdk_nvmf_subsystem *subsystem; + + if (req->length < sizeof(struct spdk_nvmf_fabric_connect_data)) { + SPDK_ERRLOG("Connect command data length 0x%x too small\n", req->length); + rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + subsystem = spdk_nvmf_tgt_find_subsystem(transport->tgt, data->subnqn); + if (!subsystem) { + SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn); + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + if ((subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE) || + (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSING) || + (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED) || + (subsystem->state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING)) { + SPDK_ERRLOG("Subsystem '%s' is not ready\n", subsystem->subnqn); + rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; + rsp->status.sc = SPDK_NVMF_FABRIC_SC_CONTROLLER_BUSY; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + /* Ensure that hostnqn is null terminated */ + if (!memchr(data->hostnqn, '\0', SPDK_NVMF_NQN_MAX_LEN + 1)) { + SPDK_ERRLOG("Connect HOSTNQN is not null terminated\n"); + SPDK_NVMF_INVALID_CONNECT_DATA(rsp, hostnqn); + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + if (!nvmf_qpair_access_allowed(req->qpair, subsystem, data->hostnqn)) { + rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; + rsp->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_HOST; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + return _nvmf_ctrlr_connect(req); +} + +static void +nvmf_ctrlr_cc_reset_done(struct spdk_io_channel_iter *i, int status) +{ + struct spdk_nvmf_ctrlr *ctrlr = spdk_io_channel_iter_get_ctx(i); + + if (status < 0) { + SPDK_ERRLOG("Fail to disconnect io ctrlr qpairs\n"); + assert(false); + } + + /* Only a subset of the registers are cleared out on a reset */ + ctrlr->vcprop.cc.raw = 0; + ctrlr->vcprop.csts.raw = 0; + +} + +const struct spdk_nvmf_registers * +spdk_nvmf_ctrlr_get_regs(struct spdk_nvmf_ctrlr *ctrlr) +{ + return &ctrlr->vcprop; +} + +static uint64_t +nvmf_prop_get_cap(struct spdk_nvmf_ctrlr *ctrlr) +{ + return ctrlr->vcprop.cap.raw; +} + +static uint64_t +nvmf_prop_get_vs(struct spdk_nvmf_ctrlr *ctrlr) +{ + return ctrlr->vcprop.vs.raw; +} + +static uint64_t +nvmf_prop_get_cc(struct spdk_nvmf_ctrlr *ctrlr) +{ + return ctrlr->vcprop.cc.raw; +} + +static bool +nvmf_prop_set_cc(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value) +{ + union spdk_nvme_cc_register cc, diff; + + cc.raw = value; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "cur CC: 0x%08x\n", ctrlr->vcprop.cc.raw); + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "new CC: 0x%08x\n", cc.raw); + + /* + * Calculate which bits changed between the current and new CC. + * Mark each bit as 0 once it is handled to determine if any unhandled bits were changed. + */ + diff.raw = cc.raw ^ ctrlr->vcprop.cc.raw; + + if (diff.bits.en) { + if (cc.bits.en) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Property Set CC Enable!\n"); + ctrlr->vcprop.cc.bits.en = 1; + ctrlr->vcprop.csts.bits.rdy = 1; + } else { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Property Set CC Disable!\n"); + ctrlr->vcprop.cc.bits.en = 0; + spdk_for_each_channel(ctrlr->subsys->tgt, + nvmf_ctrlr_disconnect_io_qpairs_on_pg, + ctrlr, + nvmf_ctrlr_cc_reset_done); + } + diff.bits.en = 0; + } + + if (diff.bits.shn) { + if (cc.bits.shn == SPDK_NVME_SHN_NORMAL || + cc.bits.shn == SPDK_NVME_SHN_ABRUPT) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Property Set CC Shutdown %u%ub!\n", + cc.bits.shn >> 1, cc.bits.shn & 1); + ctrlr->vcprop.cc.bits.shn = cc.bits.shn; + ctrlr->vcprop.cc.bits.en = 0; + ctrlr->vcprop.csts.bits.rdy = 0; + ctrlr->vcprop.csts.bits.shst = SPDK_NVME_SHST_COMPLETE; + } else if (cc.bits.shn == 0) { + ctrlr->vcprop.cc.bits.shn = 0; + } else { + SPDK_ERRLOG("Prop Set CC: Invalid SHN value %u%ub\n", + cc.bits.shn >> 1, cc.bits.shn & 1); + return false; + } + diff.bits.shn = 0; + } + + if (diff.bits.iosqes) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Prop Set IOSQES = %u (%u bytes)\n", + cc.bits.iosqes, 1u << cc.bits.iosqes); + ctrlr->vcprop.cc.bits.iosqes = cc.bits.iosqes; + diff.bits.iosqes = 0; + } + + if (diff.bits.iocqes) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Prop Set IOCQES = %u (%u bytes)\n", + cc.bits.iocqes, 1u << cc.bits.iocqes); + ctrlr->vcprop.cc.bits.iocqes = cc.bits.iocqes; + diff.bits.iocqes = 0; + } + + if (diff.bits.ams) { + SPDK_ERRLOG("Arbitration Mechanism Selected (AMS) 0x%x not supported!\n", cc.bits.ams); + return false; + } + + if (diff.bits.mps) { + SPDK_ERRLOG("Memory Page Size (MPS) %u KiB not supported!\n", (1 << (2 + cc.bits.mps))); + return false; + } + + if (diff.bits.css) { + SPDK_ERRLOG("I/O Command Set Selected (CSS) 0x%x not supported!\n", cc.bits.css); + return false; + } + + if (diff.raw != 0) { + SPDK_ERRLOG("Prop Set CC toggled reserved bits 0x%x!\n", diff.raw); + return false; + } + + return true; +} + +static uint64_t +nvmf_prop_get_csts(struct spdk_nvmf_ctrlr *ctrlr) +{ + return ctrlr->vcprop.csts.raw; +} + +static uint64_t +nvmf_prop_get_aqa(struct spdk_nvmf_ctrlr *ctrlr) +{ + return ctrlr->vcprop.aqa.raw; +} + +static bool +nvmf_prop_set_aqa(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value) +{ + union spdk_nvme_aqa_register aqa; + + aqa.raw = value; + + if (aqa.bits.asqs > ctrlr->vcprop.cap.bits.mqes || + aqa.bits.acqs > ctrlr->vcprop.cap.bits.mqes) { + return false; + } + + ctrlr->vcprop.aqa.raw = value; + + return true; +} + +static uint64_t +nvmf_prop_get_asq(struct spdk_nvmf_ctrlr *ctrlr) +{ + return ctrlr->vcprop.asq; +} + +static bool +nvmf_prop_set_asq_lower(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value) +{ + ctrlr->vcprop.asq = (ctrlr->vcprop.asq & (0xFFFFFFFFULL << 32ULL)) | value; + + return true; +} + +static bool +nvmf_prop_set_asq_upper(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value) +{ + ctrlr->vcprop.asq = (ctrlr->vcprop.asq & 0xFFFFFFFFULL) | ((uint64_t)value << 32ULL); + + return true; +} + +static uint64_t +nvmf_prop_get_acq(struct spdk_nvmf_ctrlr *ctrlr) +{ + return ctrlr->vcprop.acq; +} + +static bool +nvmf_prop_set_acq_lower(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value) +{ + ctrlr->vcprop.acq = (ctrlr->vcprop.acq & (0xFFFFFFFFULL << 32ULL)) | value; + + return true; +} + +static bool +nvmf_prop_set_acq_upper(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value) +{ + ctrlr->vcprop.acq = (ctrlr->vcprop.acq & 0xFFFFFFFFULL) | ((uint64_t)value << 32ULL); + + return true; +} + +struct nvmf_prop { + uint32_t ofst; + uint8_t size; + char name[11]; + uint64_t (*get_cb)(struct spdk_nvmf_ctrlr *ctrlr); + bool (*set_cb)(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value); + bool (*set_upper_cb)(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value); +}; + +#define PROP(field, size, get_cb, set_cb, set_upper_cb) \ + { \ + offsetof(struct spdk_nvme_registers, field), \ + size, \ + #field, \ + get_cb, set_cb, set_upper_cb \ + } + +static const struct nvmf_prop nvmf_props[] = { + PROP(cap, 8, nvmf_prop_get_cap, NULL, NULL), + PROP(vs, 4, nvmf_prop_get_vs, NULL, NULL), + PROP(cc, 4, nvmf_prop_get_cc, nvmf_prop_set_cc, NULL), + PROP(csts, 4, nvmf_prop_get_csts, NULL, NULL), + PROP(aqa, 4, nvmf_prop_get_aqa, nvmf_prop_set_aqa, NULL), + PROP(asq, 8, nvmf_prop_get_asq, nvmf_prop_set_asq_lower, nvmf_prop_set_asq_upper), + PROP(acq, 8, nvmf_prop_get_acq, nvmf_prop_set_acq_lower, nvmf_prop_set_acq_upper), +}; + +static const struct nvmf_prop * +find_prop(uint32_t ofst, uint8_t size) +{ + size_t i; + + for (i = 0; i < SPDK_COUNTOF(nvmf_props); i++) { + const struct nvmf_prop *prop = &nvmf_props[i]; + + if ((ofst >= prop->ofst) && (ofst + size <= prop->ofst + prop->size)) { + return prop; + } + } + + return NULL; +} + +static int +nvmf_property_get(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvmf_fabric_prop_get_cmd *cmd = &req->cmd->prop_get_cmd; + struct spdk_nvmf_fabric_prop_get_rsp *response = &req->rsp->prop_get_rsp; + const struct nvmf_prop *prop; + uint8_t size; + + response->status.sc = 0; + response->value.u64 = 0; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "size %d, offset 0x%x\n", + cmd->attrib.size, cmd->ofst); + + switch (cmd->attrib.size) { + case SPDK_NVMF_PROP_SIZE_4: + size = 4; + break; + case SPDK_NVMF_PROP_SIZE_8: + size = 8; + break; + default: + SPDK_ERRLOG("Invalid size value %d\n", cmd->attrib.size); + response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + prop = find_prop(cmd->ofst, size); + if (prop == NULL || prop->get_cb == NULL) { + response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "name: %s\n", prop->name); + + response->value.u64 = prop->get_cb(ctrlr); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "response value: 0x%" PRIx64 "\n", response->value.u64); + + if (size != prop->size) { + /* The size must be 4 and the prop->size is 8. Figure out which part of the property to read. */ + assert(size == 4); + assert(prop->size == 8); + + if (cmd->ofst == prop->ofst) { + /* Keep bottom 4 bytes only */ + response->value.u64 &= 0xFFFFFFFF; + } else { + /* Keep top 4 bytes only */ + response->value.u64 >>= 32; + } + } + + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_property_set(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvmf_fabric_prop_set_cmd *cmd = &req->cmd->prop_set_cmd; + struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; + const struct nvmf_prop *prop; + uint64_t value; + uint8_t size; + bool ret; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "size %d, offset 0x%x, value 0x%" PRIx64 "\n", + cmd->attrib.size, cmd->ofst, cmd->value.u64); + + switch (cmd->attrib.size) { + case SPDK_NVMF_PROP_SIZE_4: + size = 4; + break; + case SPDK_NVMF_PROP_SIZE_8: + size = 8; + break; + default: + SPDK_ERRLOG("Invalid size value %d\n", cmd->attrib.size); + response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + prop = find_prop(cmd->ofst, size); + if (prop == NULL || prop->set_cb == NULL) { + SPDK_ERRLOG("Invalid offset 0x%x\n", cmd->ofst); + response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "name: %s\n", prop->name); + + value = cmd->value.u64; + + if (prop->size == 4) { + ret = prop->set_cb(ctrlr, (uint32_t)value); + } else if (size != prop->size) { + /* The size must be 4 and the prop->size is 8. Figure out which part of the property to write. */ + assert(size == 4); + assert(prop->size == 8); + + if (cmd->ofst == prop->ofst) { + ret = prop->set_cb(ctrlr, (uint32_t)value); + } else { + ret = prop->set_upper_cb(ctrlr, (uint32_t)value); + } + } else { + ret = prop->set_cb(ctrlr, (uint32_t)value); + if (ret) { + ret = prop->set_upper_cb(ctrlr, (uint32_t)(value >> 32)); + } + } + + if (!ret) { + SPDK_ERRLOG("prop set_cb failed\n"); + response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_set_features_arbitration(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Arbitration (cdw11 = 0x%0x)\n", cmd->cdw11); + + ctrlr->feat.arbitration.raw = cmd->cdw11; + ctrlr->feat.arbitration.bits.reserved = 0; + + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_set_features_power_management(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Power Management (cdw11 = 0x%0x)\n", cmd->cdw11); + + /* Only PS = 0 is allowed, since we report NPSS = 0 */ + if (cmd->cdw11_bits.feat_power_management.bits.ps != 0) { + SPDK_ERRLOG("Invalid power state %u\n", cmd->cdw11_bits.feat_power_management.bits.ps); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + ctrlr->feat.power_management.raw = cmd->cdw11; + ctrlr->feat.power_management.bits.reserved = 0; + + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static bool +temp_threshold_opts_valid(const union spdk_nvme_feat_temperature_threshold *opts) +{ + /* + * Valid TMPSEL values: + * 0000b - 1000b: temperature sensors + * 1111b: set all implemented temperature sensors + */ + if (opts->bits.tmpsel >= 9 && opts->bits.tmpsel != 15) { + /* 1001b - 1110b: reserved */ + SPDK_ERRLOG("Invalid TMPSEL %u\n", opts->bits.tmpsel); + return false; + } + + /* + * Valid THSEL values: + * 00b: over temperature threshold + * 01b: under temperature threshold + */ + if (opts->bits.thsel > 1) { + /* 10b - 11b: reserved */ + SPDK_ERRLOG("Invalid THSEL %u\n", opts->bits.thsel); + return false; + } + + return true; +} + +static int +nvmf_ctrlr_set_features_temperature_threshold(struct spdk_nvmf_request *req) +{ + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Temperature Threshold (cdw11 = 0x%0x)\n", cmd->cdw11); + + if (!temp_threshold_opts_valid(&cmd->cdw11_bits.feat_temp_threshold)) { + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + /* TODO: no sensors implemented - ignore new values */ + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_get_features_temperature_threshold(struct spdk_nvmf_request *req) +{ + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Get Features - Temperature Threshold (cdw11 = 0x%0x)\n", cmd->cdw11); + + if (!temp_threshold_opts_valid(&cmd->cdw11_bits.feat_temp_threshold)) { + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + /* TODO: no sensors implemented - return 0 for all thresholds */ + rsp->cdw0 = 0; + + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_set_features_error_recovery(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Error Recovery (cdw11 = 0x%0x)\n", cmd->cdw11); + + if (cmd->cdw11_bits.feat_error_recovery.bits.dulbe) { + /* + * Host is not allowed to set this bit, since we don't advertise it in + * Identify Namespace. + */ + SPDK_ERRLOG("Host set unsupported DULBE bit\n"); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + ctrlr->feat.error_recovery.raw = cmd->cdw11; + ctrlr->feat.error_recovery.bits.reserved = 0; + + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_set_features_volatile_write_cache(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Volatile Write Cache (cdw11 = 0x%0x)\n", cmd->cdw11); + + ctrlr->feat.volatile_write_cache.raw = cmd->cdw11; + ctrlr->feat.volatile_write_cache.bits.reserved = 0; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Volatile Write Cache %s\n", + ctrlr->feat.volatile_write_cache.bits.wce ? "Enabled" : "Disabled"); + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_set_features_write_atomicity(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Write Atomicity (cdw11 = 0x%0x)\n", cmd->cdw11); + + ctrlr->feat.write_atomicity.raw = cmd->cdw11; + ctrlr->feat.write_atomicity.bits.reserved = 0; + + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_set_features_host_identifier(struct spdk_nvmf_request *req) +{ + struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; + + SPDK_ERRLOG("Set Features - Host Identifier not allowed\n"); + response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_get_features_host_identifier(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Get Features - Host Identifier\n"); + + if (!cmd->cdw11_bits.feat_host_identifier.bits.exhid) { + /* NVMe over Fabrics requires EXHID=1 (128-bit/16-byte host ID) */ + SPDK_ERRLOG("Get Features - Host Identifier with EXHID=0 not allowed\n"); + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + if (req->data == NULL || req->length < sizeof(ctrlr->hostid)) { + SPDK_ERRLOG("Invalid data buffer for Get Features - Host Identifier\n"); + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + spdk_uuid_copy((struct spdk_uuid *)req->data, &ctrlr->hostid); + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_get_features_reservation_notification_mask(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + struct spdk_nvmf_ns *ns; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "get Features - Reservation Notificaton Mask\n"); + + if (cmd->nsid == 0xffffffffu) { + SPDK_ERRLOG("get Features - Invalid Namespace ID\n"); + rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid); + if (ns == NULL) { + SPDK_ERRLOG("Set Features - Invalid Namespace ID\n"); + rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + rsp->cdw0 = ns->mask; + + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_set_features_reservation_notification_mask(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + struct spdk_nvmf_ns *ns; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Reservation Notificaton Mask\n"); + + if (cmd->nsid == 0xffffffffu) { + for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL; + ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) { + ns->mask = cmd->cdw11; + } + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid); + if (ns == NULL) { + SPDK_ERRLOG("Set Features - Invalid Namespace ID\n"); + rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + ns->mask = cmd->cdw11; + + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_get_features_reservation_persistence(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; + struct spdk_nvmf_ns *ns; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Get Features - Reservation Persistence\n"); + + ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid); + /* NSID with 0xffffffffu also included */ + if (ns == NULL) { + SPDK_ERRLOG("Get Features - Invalid Namespace ID\n"); + response->status.sct = SPDK_NVME_SCT_GENERIC; + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + response->cdw0 = ns->ptpl_activated; + + response->status.sct = SPDK_NVME_SCT_GENERIC; + response->status.sc = SPDK_NVME_SC_SUCCESS; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_set_features_reservation_persistence(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; + struct spdk_nvmf_ns *ns; + bool ptpl; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Reservation Persistence\n"); + + ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid); + ptpl = cmd->cdw11_bits.feat_rsv_persistence.bits.ptpl; + + if (cmd->nsid != 0xffffffffu && ns && ns->ptpl_file) { + ns->ptpl_activated = ptpl; + } else if (cmd->nsid == 0xffffffffu) { + for (ns = spdk_nvmf_subsystem_get_first_ns(ctrlr->subsys); ns && ns->ptpl_file; + ns = spdk_nvmf_subsystem_get_next_ns(ctrlr->subsys, ns)) { + ns->ptpl_activated = ptpl; + } + } else { + SPDK_ERRLOG("Set Features - Invalid Namespace ID or Reservation Configuration\n"); + response->status.sct = SPDK_NVME_SCT_GENERIC; + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + /* TODO: Feature not changeable for now */ + response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; + response->status.sc = SPDK_NVME_SC_FEATURE_ID_NOT_SAVEABLE; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_set_features_keep_alive_timer(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Keep Alive Timer (%u ms)\n", cmd->cdw11); + + /* + * if attempts to disable keep alive by setting kato to 0h + * a status value of keep alive invalid shall be returned + */ + if (cmd->cdw11_bits.feat_keep_alive_timer.bits.kato == 0) { + rsp->status.sc = SPDK_NVME_SC_KEEP_ALIVE_INVALID; + } else if (cmd->cdw11_bits.feat_keep_alive_timer.bits.kato < MIN_KEEP_ALIVE_TIMEOUT_IN_MS) { + ctrlr->feat.keep_alive_timer.bits.kato = MIN_KEEP_ALIVE_TIMEOUT_IN_MS; + } else { + /* round up to milliseconds */ + ctrlr->feat.keep_alive_timer.bits.kato = spdk_divide_round_up( + cmd->cdw11_bits.feat_keep_alive_timer.bits.kato, + KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS) * + KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS; + } + + /* + * if change the keep alive timeout value successfully + * update the keep alive poller. + */ + if (cmd->cdw11_bits.feat_keep_alive_timer.bits.kato != 0) { + if (ctrlr->keep_alive_poller != NULL) { + spdk_poller_unregister(&ctrlr->keep_alive_poller); + } + ctrlr->keep_alive_poller = SPDK_POLLER_REGISTER(nvmf_ctrlr_keep_alive_poll, ctrlr, + ctrlr->feat.keep_alive_timer.bits.kato * 1000); + } + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Keep Alive Timer set to %u ms\n", + ctrlr->feat.keep_alive_timer.bits.kato); + + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_set_features_number_of_queues(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + uint32_t count; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Number of Queues, cdw11 0x%x\n", + req->cmd->nvme_cmd.cdw11); + + count = spdk_bit_array_count_set(ctrlr->qpair_mask); + /* verify that the controller is ready to process commands */ + if (count > 1) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Queue pairs already active!\n"); + rsp->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR; + } else { + /* + * Ignore the value requested by the host - + * always return the pre-configured value based on max_qpairs_allowed. + */ + rsp->cdw0 = ctrlr->feat.number_of_queues.raw; + } + + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_set_features_async_event_configuration(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Async Event Configuration, cdw11 0x%08x\n", + cmd->cdw11); + ctrlr->feat.async_event_configuration.raw = cmd->cdw11; + ctrlr->feat.async_event_configuration.bits.reserved = 0; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_async_event_request(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + struct spdk_nvmf_subsystem_poll_group *sgroup; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Async Event Request\n"); + + /* Four asynchronous events are supported for now */ + if (ctrlr->nr_aer_reqs >= NVMF_MAX_ASYNC_EVENTS) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "AERL exceeded\n"); + rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; + rsp->status.sc = SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + if (ctrlr->notice_event.bits.async_event_type == + SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) { + rsp->cdw0 = ctrlr->notice_event.raw; + ctrlr->notice_event.raw = 0; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + if (ctrlr->reservation_event.bits.async_event_type == + SPDK_NVME_ASYNC_EVENT_TYPE_IO) { + rsp->cdw0 = ctrlr->reservation_event.raw; + ctrlr->reservation_event.raw = 0; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + /* AER cmd is an exception */ + sgroup = &req->qpair->group->sgroups[ctrlr->subsys->id]; + assert(sgroup != NULL); + sgroup->io_outstanding--; + + ctrlr->aer_req[ctrlr->nr_aer_reqs++] = req; + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; +} + +static void +nvmf_get_firmware_slot_log_page(void *buffer, uint64_t offset, uint32_t length) +{ + struct spdk_nvme_firmware_page fw_page; + size_t copy_len; + + memset(&fw_page, 0, sizeof(fw_page)); + fw_page.afi.active_slot = 1; + fw_page.afi.next_reset_slot = 0; + spdk_strcpy_pad(fw_page.revision[0], FW_VERSION, sizeof(fw_page.revision[0]), ' '); + + if (offset < sizeof(fw_page)) { + copy_len = spdk_min(sizeof(fw_page) - offset, length); + if (copy_len > 0) { + memcpy(buffer, (const char *)&fw_page + offset, copy_len); + } + } +} + +void +nvmf_ctrlr_ns_changed(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid) +{ + uint16_t max_changes = SPDK_COUNTOF(ctrlr->changed_ns_list.ns_list); + uint16_t i; + bool found = false; + + for (i = 0; i < ctrlr->changed_ns_list_count; i++) { + if (ctrlr->changed_ns_list.ns_list[i] == nsid) { + /* nsid is already in the list */ + found = true; + break; + } + } + + if (!found) { + if (ctrlr->changed_ns_list_count == max_changes) { + /* Out of space - set first entry to FFFFFFFFh and zero-fill the rest. */ + ctrlr->changed_ns_list.ns_list[0] = 0xFFFFFFFFu; + for (i = 1; i < max_changes; i++) { + ctrlr->changed_ns_list.ns_list[i] = 0; + } + } else { + ctrlr->changed_ns_list.ns_list[ctrlr->changed_ns_list_count++] = nsid; + } + } +} + +static void +nvmf_get_changed_ns_list_log_page(struct spdk_nvmf_ctrlr *ctrlr, + void *buffer, uint64_t offset, uint32_t length) +{ + size_t copy_length; + + if (offset < sizeof(ctrlr->changed_ns_list)) { + copy_length = spdk_min(length, sizeof(ctrlr->changed_ns_list) - offset); + if (copy_length) { + memcpy(buffer, (char *)&ctrlr->changed_ns_list + offset, copy_length); + } + } + + /* Clear log page each time it is read */ + ctrlr->changed_ns_list_count = 0; + memset(&ctrlr->changed_ns_list, 0, sizeof(ctrlr->changed_ns_list)); +} + +/* The structure can be modified if we provide support for other commands in future */ +static const struct spdk_nvme_cmds_and_effect_log_page g_cmds_and_effect_log_page = { + .admin_cmds_supported = { + /* CSUPP, LBCC, NCC, NIC, CCC, CSE */ + /* Get Log Page */ + [SPDK_NVME_OPC_GET_LOG_PAGE] = {1, 0, 0, 0, 0, 0, 0, 0}, + /* Identify */ + [SPDK_NVME_OPC_IDENTIFY] = {1, 0, 0, 0, 0, 0, 0, 0}, + /* Abort */ + [SPDK_NVME_OPC_ABORT] = {1, 0, 0, 0, 0, 0, 0, 0}, + /* Set Features */ + [SPDK_NVME_OPC_SET_FEATURES] = {1, 0, 0, 0, 0, 0, 0, 0}, + /* Get Features */ + [SPDK_NVME_OPC_GET_FEATURES] = {1, 0, 0, 0, 0, 0, 0, 0}, + /* Async Event Request */ + [SPDK_NVME_OPC_ASYNC_EVENT_REQUEST] = {1, 0, 0, 0, 0, 0, 0, 0}, + /* Keep Alive */ + [SPDK_NVME_OPC_KEEP_ALIVE] = {1, 0, 0, 0, 0, 0, 0, 0}, + }, + .io_cmds_supported = { + /* FLUSH */ + [SPDK_NVME_OPC_FLUSH] = {1, 1, 0, 0, 0, 0, 0, 0}, + /* WRITE */ + [SPDK_NVME_OPC_WRITE] = {1, 1, 0, 0, 0, 0, 0, 0}, + /* READ */ + [SPDK_NVME_OPC_READ] = {1, 0, 0, 0, 0, 0, 0, 0}, + /* WRITE ZEROES */ + [SPDK_NVME_OPC_WRITE_ZEROES] = {1, 1, 0, 0, 0, 0, 0, 0}, + /* DATASET MANAGEMENT */ + [SPDK_NVME_OPC_DATASET_MANAGEMENT] = {1, 1, 0, 0, 0, 0, 0, 0}, + /* COMPARE */ + [SPDK_NVME_OPC_COMPARE] = {1, 0, 0, 0, 0, 0, 0, 0}, + }, +}; + +static void +nvmf_get_cmds_and_effects_log_page(void *buffer, + uint64_t offset, uint32_t length) +{ + uint32_t page_size = sizeof(struct spdk_nvme_cmds_and_effect_log_page); + size_t copy_len = 0; + size_t zero_len = length; + + if (offset < page_size) { + copy_len = spdk_min(page_size - offset, length); + zero_len -= copy_len; + memcpy(buffer, (char *)(&g_cmds_and_effect_log_page) + offset, copy_len); + } + + if (zero_len) { + memset((char *)buffer + copy_len, 0, zero_len); + } +} + +static void +nvmf_get_reservation_notification_log_page(struct spdk_nvmf_ctrlr *ctrlr, + void *data, uint64_t offset, uint32_t length) +{ + uint32_t unit_log_len, avail_log_len, next_pos, copy_len; + struct spdk_nvmf_reservation_log *log, *log_tmp; + uint8_t *buf = data; + + unit_log_len = sizeof(struct spdk_nvme_reservation_notification_log); + /* No available log, return 1 zeroed log page */ + if (!ctrlr->num_avail_log_pages) { + memset(buf, 0, spdk_min(length, unit_log_len)); + return; + } + + avail_log_len = ctrlr->num_avail_log_pages * unit_log_len; + if (offset >= avail_log_len) { + return; + } + + next_pos = copy_len = 0; + TAILQ_FOREACH_SAFE(log, &ctrlr->log_head, link, log_tmp) { + TAILQ_REMOVE(&ctrlr->log_head, log, link); + ctrlr->num_avail_log_pages--; + + next_pos += unit_log_len; + if (next_pos > offset) { + copy_len = spdk_min(next_pos - offset, length); + memcpy(buf, &log->log, copy_len); + length -= copy_len; + offset += copy_len; + buf += copy_len; + } + free(log); + + if (length == 0) { + break; + } + } + return; +} + +static int +nvmf_ctrlr_get_log_page(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; + uint64_t offset, len; + uint32_t numdl, numdu; + uint8_t lid; + + if (req->data == NULL) { + SPDK_ERRLOG("get log command with no buffer\n"); + response->status.sct = SPDK_NVME_SCT_GENERIC; + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + offset = (uint64_t)cmd->cdw12 | ((uint64_t)cmd->cdw13 << 32); + if (offset & 3) { + SPDK_ERRLOG("Invalid log page offset 0x%" PRIx64 "\n", offset); + response->status.sct = SPDK_NVME_SCT_GENERIC; + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + numdl = cmd->cdw10_bits.get_log_page.numdl; + numdu = cmd->cdw11_bits.get_log_page.numdu; + len = ((numdu << 16) + numdl + (uint64_t)1) * 4; + if (len > req->length) { + SPDK_ERRLOG("Get log page: len (%" PRIu64 ") > buf size (%u)\n", + len, req->length); + response->status.sct = SPDK_NVME_SCT_GENERIC; + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + lid = cmd->cdw10_bits.get_log_page.lid; + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Get log page: LID=0x%02X offset=0x%" PRIx64 " len=0x%" PRIx64 "\n", + lid, offset, len); + + if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) { + switch (lid) { + case SPDK_NVME_LOG_DISCOVERY: + nvmf_get_discovery_log_page(subsystem->tgt, ctrlr->hostnqn, req->iov, req->iovcnt, offset, + len); + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + default: + goto invalid_log_page; + } + } else { + switch (lid) { + case SPDK_NVME_LOG_ERROR: + case SPDK_NVME_LOG_HEALTH_INFORMATION: + /* TODO: actually fill out log page data */ + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + case SPDK_NVME_LOG_FIRMWARE_SLOT: + nvmf_get_firmware_slot_log_page(req->data, offset, len); + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + case SPDK_NVME_LOG_COMMAND_EFFECTS_LOG: + nvmf_get_cmds_and_effects_log_page(req->data, offset, len); + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + case SPDK_NVME_LOG_CHANGED_NS_LIST: + nvmf_get_changed_ns_list_log_page(ctrlr, req->data, offset, len); + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + case SPDK_NVME_LOG_RESERVATION_NOTIFICATION: + nvmf_get_reservation_notification_log_page(ctrlr, req->data, offset, len); + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + default: + goto invalid_log_page; + } + } + +invalid_log_page: + SPDK_ERRLOG("Unsupported Get Log Page 0x%02X\n", lid); + response->status.sct = SPDK_NVME_SCT_GENERIC; + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +int +spdk_nvmf_ctrlr_identify_ns(struct spdk_nvmf_ctrlr *ctrlr, + struct spdk_nvme_cmd *cmd, + struct spdk_nvme_cpl *rsp, + struct spdk_nvme_ns_data *nsdata) +{ + struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys; + struct spdk_nvmf_ns *ns; + uint32_t max_num_blocks; + + if (cmd->nsid == 0 || cmd->nsid > subsystem->max_nsid) { + SPDK_ERRLOG("Identify Namespace for invalid NSID %u\n", cmd->nsid); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + ns = _nvmf_subsystem_get_ns(subsystem, cmd->nsid); + if (ns == NULL || ns->bdev == NULL) { + /* + * Inactive namespaces should return a zero filled data structure. + * The data buffer is already zeroed by nvmf_ctrlr_process_admin_cmd(), + * so we can just return early here. + */ + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Identify Namespace for inactive NSID %u\n", cmd->nsid); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_SUCCESS; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + nvmf_bdev_ctrlr_identify_ns(ns, nsdata, ctrlr->dif_insert_or_strip); + + /* Due to bug in the Linux kernel NVMe driver we have to set noiob no larger than mdts */ + max_num_blocks = ctrlr->admin_qpair->transport->opts.max_io_size / + (1U << nsdata->lbaf[nsdata->flbas.format].lbads); + if (nsdata->noiob > max_num_blocks) { + nsdata->noiob = max_num_blocks; + } + + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static void +nvmf_ctrlr_populate_oacs(struct spdk_nvmf_ctrlr *ctrlr, + struct spdk_nvme_ctrlr_data *cdata) +{ + cdata->oacs.virtualization_management = + g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_VIRTUALIZATION_MANAGEMENT].hdlr != NULL; + cdata->oacs.nvme_mi = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NVME_MI_SEND].hdlr != NULL + && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NVME_MI_RECEIVE].hdlr != NULL; + cdata->oacs.directives = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_DIRECTIVE_SEND].hdlr != NULL + && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_DIRECTIVE_RECEIVE].hdlr != NULL; + cdata->oacs.device_self_test = + g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_DEVICE_SELF_TEST].hdlr != NULL; + cdata->oacs.ns_manage = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NS_MANAGEMENT].hdlr != NULL + && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NS_ATTACHMENT].hdlr != NULL; + cdata->oacs.firmware = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD].hdlr != + NULL + && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_FIRMWARE_COMMIT].hdlr != NULL; + cdata->oacs.format = + g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_FORMAT_NVM].hdlr != NULL; + cdata->oacs.security = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_SECURITY_SEND].hdlr != NULL + && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_SECURITY_RECEIVE].hdlr != NULL; + cdata->oacs.get_lba_status = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_GET_LBA_STATUS].hdlr != + NULL; +} + +int +spdk_nvmf_ctrlr_identify_ctrlr(struct spdk_nvmf_ctrlr *ctrlr, struct spdk_nvme_ctrlr_data *cdata) +{ + struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys; + struct spdk_nvmf_transport *transport = ctrlr->admin_qpair->transport; + + /* + * Common fields for discovery and NVM subsystems + */ + spdk_strcpy_pad(cdata->fr, FW_VERSION, sizeof(cdata->fr), ' '); + assert((transport->opts.max_io_size % 4096) == 0); + cdata->mdts = spdk_u32log2(transport->opts.max_io_size / 4096); + cdata->cntlid = ctrlr->cntlid; + cdata->ver = ctrlr->vcprop.vs; + cdata->aerl = NVMF_MAX_ASYNC_EVENTS - 1; + cdata->lpa.edlp = 1; + cdata->elpe = 127; + cdata->maxcmd = transport->opts.max_queue_depth; + cdata->sgls = ctrlr->cdata.sgls; + cdata->fuses.compare_and_write = 1; + cdata->acwu = 1; + spdk_strcpy_pad(cdata->subnqn, subsystem->subnqn, sizeof(cdata->subnqn), '\0'); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ctrlr data: maxcmd 0x%x\n", cdata->maxcmd); + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "sgls data: 0x%x\n", from_le32(&cdata->sgls)); + + /* + * NVM subsystem fields (reserved for discovery subsystems) + */ + if (subsystem->subtype == SPDK_NVMF_SUBTYPE_NVME) { + spdk_strcpy_pad(cdata->mn, spdk_nvmf_subsystem_get_mn(subsystem), sizeof(cdata->mn), ' '); + spdk_strcpy_pad(cdata->sn, spdk_nvmf_subsystem_get_sn(subsystem), sizeof(cdata->sn), ' '); + cdata->kas = ctrlr->cdata.kas; + + cdata->rab = 6; + cdata->cmic.multi_port = 1; + cdata->cmic.multi_host = 1; + cdata->oaes.ns_attribute_notices = 1; + cdata->ctratt.host_id_exhid_supported = 1; + /* TODO: Concurrent execution of multiple abort commands. */ + cdata->acl = 0; + cdata->aerl = 0; + cdata->frmw.slot1_ro = 1; + cdata->frmw.num_slots = 1; + + cdata->lpa.celp = 1; /* Command Effects log page supported */ + + cdata->sqes.min = 6; + cdata->sqes.max = 6; + cdata->cqes.min = 4; + cdata->cqes.max = 4; + cdata->nn = subsystem->max_nsid; + cdata->vwc.present = 1; + cdata->vwc.flush_broadcast = SPDK_NVME_FLUSH_BROADCAST_NOT_SUPPORTED; + + cdata->nvmf_specific = ctrlr->cdata.nvmf_specific; + + cdata->oncs.dsm = nvmf_ctrlr_dsm_supported(ctrlr); + cdata->oncs.write_zeroes = nvmf_ctrlr_write_zeroes_supported(ctrlr); + cdata->oncs.reservations = 1; + + nvmf_ctrlr_populate_oacs(ctrlr, cdata); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: ioccsz 0x%x\n", + cdata->nvmf_specific.ioccsz); + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: iorcsz 0x%x\n", + cdata->nvmf_specific.iorcsz); + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: icdoff 0x%x\n", + cdata->nvmf_specific.icdoff); + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: ctrattr 0x%x\n", + *(uint8_t *)&cdata->nvmf_specific.ctrattr); + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: msdbd 0x%x\n", + cdata->nvmf_specific.msdbd); + } + + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_identify_active_ns_list(struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvme_cmd *cmd, + struct spdk_nvme_cpl *rsp, + struct spdk_nvme_ns_list *ns_list) +{ + struct spdk_nvmf_ns *ns; + uint32_t count = 0; + + if (cmd->nsid >= 0xfffffffeUL) { + SPDK_ERRLOG("Identify Active Namespace List with invalid NSID %u\n", cmd->nsid); + rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL; + ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) { + if (ns->opts.nsid <= cmd->nsid) { + continue; + } + + ns_list->ns_list[count++] = ns->opts.nsid; + if (count == SPDK_COUNTOF(ns_list->ns_list)) { + break; + } + } + + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static void +_add_ns_id_desc(void **buf_ptr, size_t *buf_remain, + enum spdk_nvme_nidt type, + const void *data, size_t data_size) +{ + struct spdk_nvme_ns_id_desc *desc; + size_t desc_size = sizeof(*desc) + data_size; + + /* + * These should never fail in practice, since all valid NS ID descriptors + * should be defined so that they fit in the available 4096-byte buffer. + */ + assert(data_size > 0); + assert(data_size <= UINT8_MAX); + assert(desc_size < *buf_remain); + if (data_size == 0 || data_size > UINT8_MAX || desc_size > *buf_remain) { + return; + } + + desc = *buf_ptr; + desc->nidt = type; + desc->nidl = data_size; + memcpy(desc->nid, data, data_size); + + *buf_ptr += desc_size; + *buf_remain -= desc_size; +} + +static int +nvmf_ctrlr_identify_ns_id_descriptor_list( + struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvme_cmd *cmd, + struct spdk_nvme_cpl *rsp, + void *id_desc_list, size_t id_desc_list_size) +{ + struct spdk_nvmf_ns *ns; + size_t buf_remain = id_desc_list_size; + void *buf_ptr = id_desc_list; + + ns = _nvmf_subsystem_get_ns(subsystem, cmd->nsid); + if (ns == NULL || ns->bdev == NULL) { + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + +#define ADD_ID_DESC(type, data, size) \ + do { \ + if (!spdk_mem_all_zero(data, size)) { \ + _add_ns_id_desc(&buf_ptr, &buf_remain, type, data, size); \ + } \ + } while (0) + + ADD_ID_DESC(SPDK_NVME_NIDT_EUI64, ns->opts.eui64, sizeof(ns->opts.eui64)); + ADD_ID_DESC(SPDK_NVME_NIDT_NGUID, ns->opts.nguid, sizeof(ns->opts.nguid)); + ADD_ID_DESC(SPDK_NVME_NIDT_UUID, &ns->opts.uuid, sizeof(ns->opts.uuid)); + + /* + * The list is automatically 0-terminated because controller to host buffers in + * admin commands always get zeroed in nvmf_ctrlr_process_admin_cmd(). + */ + +#undef ADD_ID_DESC + + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_identify(struct spdk_nvmf_request *req) +{ + uint8_t cns; + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys; + + if (req->data == NULL || req->length < 4096) { + SPDK_ERRLOG("identify command with invalid buffer\n"); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + cns = cmd->cdw10_bits.identify.cns; + + if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY && + cns != SPDK_NVME_IDENTIFY_CTRLR) { + /* Discovery controllers only support Identify Controller */ + goto invalid_cns; + } + + switch (cns) { + case SPDK_NVME_IDENTIFY_NS: + return spdk_nvmf_ctrlr_identify_ns(ctrlr, cmd, rsp, req->data); + case SPDK_NVME_IDENTIFY_CTRLR: + return spdk_nvmf_ctrlr_identify_ctrlr(ctrlr, req->data); + case SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST: + return nvmf_ctrlr_identify_active_ns_list(subsystem, cmd, rsp, req->data); + case SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST: + return nvmf_ctrlr_identify_ns_id_descriptor_list(subsystem, cmd, rsp, req->data, req->length); + default: + goto invalid_cns; + } + +invalid_cns: + SPDK_ERRLOG("Identify command with unsupported CNS 0x%02x\n", cns); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static bool +nvmf_qpair_abort_aer(struct spdk_nvmf_qpair *qpair, uint16_t cid) +{ + struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr; + struct spdk_nvmf_request *req; + int i; + + if (!nvmf_qpair_is_admin_queue(qpair)) { + return false; + } + + for (i = 0; i < ctrlr->nr_aer_reqs; i++) { + if (ctrlr->aer_req[i]->cmd->nvme_cmd.cid == cid) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Aborting AER request\n"); + req = ctrlr->aer_req[i]; + ctrlr->aer_req[i] = NULL; + ctrlr->nr_aer_reqs--; + + /* Move the last req to the aborting position for making aer_reqs + * in continuous + */ + if (i < ctrlr->nr_aer_reqs) { + ctrlr->aer_req[i] = ctrlr->aer_req[ctrlr->nr_aer_reqs]; + ctrlr->aer_req[ctrlr->nr_aer_reqs] = NULL; + } + + req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; + req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST; + _nvmf_request_complete(req); + return true; + } + } + + return false; +} + +static void +nvmf_qpair_abort_request(struct spdk_nvmf_qpair *qpair, struct spdk_nvmf_request *req) +{ + uint16_t cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid; + + if (nvmf_qpair_abort_aer(qpair, cid)) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "abort ctrlr=%p sqid=%u cid=%u successful\n", + qpair->ctrlr, qpair->qid, cid); + req->rsp->nvme_cpl.cdw0 &= ~1U; /* Command successfully aborted */ + + spdk_nvmf_request_complete(req); + return; + } + + nvmf_transport_qpair_abort_request(qpair, req); +} + +static void +nvmf_ctrlr_abort_done(struct spdk_io_channel_iter *i, int status) +{ + struct spdk_nvmf_request *req = spdk_io_channel_iter_get_ctx(i); + + if (status == 0) { + /* There was no qpair whose ID matches SQID of the abort command. + * Hence call _nvmf_request_complete() here. + */ + _nvmf_request_complete(req); + } +} + +static void +nvmf_ctrlr_abort_on_pg(struct spdk_io_channel_iter *i) +{ + struct spdk_nvmf_request *req = spdk_io_channel_iter_get_ctx(i); + struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); + struct spdk_nvmf_poll_group *group = spdk_io_channel_get_ctx(ch); + uint16_t sqid = req->cmd->nvme_cmd.cdw10_bits.abort.sqid; + struct spdk_nvmf_qpair *qpair; + + TAILQ_FOREACH(qpair, &group->qpairs, link) { + if (qpair->ctrlr == req->qpair->ctrlr && qpair->qid == sqid) { + /* Found the qpair */ + + nvmf_qpair_abort_request(qpair, req); + + /* Return -1 for the status so the iteration across threads stops. */ + spdk_for_each_channel_continue(i, -1); + return; + } + } + + spdk_for_each_channel_continue(i, 0); +} + +static int +nvmf_ctrlr_abort(struct spdk_nvmf_request *req) +{ + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + + rsp->cdw0 = 1U; /* Command not aborted */ + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_SUCCESS; + + /* Send a message to each poll group, searching for this ctrlr, sqid, and command. */ + spdk_for_each_channel(req->qpair->ctrlr->subsys->tgt, + nvmf_ctrlr_abort_on_pg, + req, + nvmf_ctrlr_abort_done + ); + + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; +} + +int +nvmf_ctrlr_abort_request(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_request *req_to_abort = req->req_to_abort; + struct spdk_bdev *bdev; + struct spdk_bdev_desc *desc; + struct spdk_io_channel *ch; + int rc; + + assert(req_to_abort != NULL); + + if (g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_ABORT].hdlr && + nvmf_qpair_is_admin_queue(req_to_abort->qpair)) { + return g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_ABORT].hdlr(req); + } + + rc = spdk_nvmf_request_get_bdev(req_to_abort->cmd->nvme_cmd.nsid, req_to_abort, + &bdev, &desc, &ch); + if (rc != 0) { + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + return spdk_nvmf_bdev_ctrlr_abort_cmd(bdev, desc, ch, req, req_to_abort); +} + +static int +get_features_generic(struct spdk_nvmf_request *req, uint32_t cdw0) +{ + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + + rsp->cdw0 = cdw0; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +static int +nvmf_ctrlr_get_features(struct spdk_nvmf_request *req) +{ + uint8_t feature; + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; + + feature = cmd->cdw10_bits.get_features.fid; + switch (feature) { + case SPDK_NVME_FEAT_ARBITRATION: + return get_features_generic(req, ctrlr->feat.arbitration.raw); + case SPDK_NVME_FEAT_POWER_MANAGEMENT: + return get_features_generic(req, ctrlr->feat.power_management.raw); + case SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD: + return nvmf_ctrlr_get_features_temperature_threshold(req); + case SPDK_NVME_FEAT_ERROR_RECOVERY: + return get_features_generic(req, ctrlr->feat.error_recovery.raw); + case SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE: + return get_features_generic(req, ctrlr->feat.volatile_write_cache.raw); + case SPDK_NVME_FEAT_NUMBER_OF_QUEUES: + return get_features_generic(req, ctrlr->feat.number_of_queues.raw); + case SPDK_NVME_FEAT_WRITE_ATOMICITY: + return get_features_generic(req, ctrlr->feat.write_atomicity.raw); + case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION: + return get_features_generic(req, ctrlr->feat.async_event_configuration.raw); + case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER: + return get_features_generic(req, ctrlr->feat.keep_alive_timer.raw); + case SPDK_NVME_FEAT_HOST_IDENTIFIER: + return nvmf_ctrlr_get_features_host_identifier(req); + case SPDK_NVME_FEAT_HOST_RESERVE_MASK: + return nvmf_ctrlr_get_features_reservation_notification_mask(req); + case SPDK_NVME_FEAT_HOST_RESERVE_PERSIST: + return nvmf_ctrlr_get_features_reservation_persistence(req); + default: + SPDK_ERRLOG("Get Features command with unsupported feature ID 0x%02x\n", feature); + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } +} + +static int +nvmf_ctrlr_set_features(struct spdk_nvmf_request *req) +{ + uint8_t feature, save; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; + + /* + * Features are not saveable by the controller as indicated by + * ONCS field of the Identify Controller data. + * */ + save = cmd->cdw10_bits.set_features.sv; + if (save) { + response->status.sc = SPDK_NVME_SC_FEATURE_ID_NOT_SAVEABLE; + response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + feature = cmd->cdw10_bits.set_features.fid; + switch (feature) { + case SPDK_NVME_FEAT_ARBITRATION: + return nvmf_ctrlr_set_features_arbitration(req); + case SPDK_NVME_FEAT_POWER_MANAGEMENT: + return nvmf_ctrlr_set_features_power_management(req); + case SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD: + return nvmf_ctrlr_set_features_temperature_threshold(req); + case SPDK_NVME_FEAT_ERROR_RECOVERY: + return nvmf_ctrlr_set_features_error_recovery(req); + case SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE: + return nvmf_ctrlr_set_features_volatile_write_cache(req); + case SPDK_NVME_FEAT_NUMBER_OF_QUEUES: + return nvmf_ctrlr_set_features_number_of_queues(req); + case SPDK_NVME_FEAT_WRITE_ATOMICITY: + return nvmf_ctrlr_set_features_write_atomicity(req); + case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION: + return nvmf_ctrlr_set_features_async_event_configuration(req); + case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER: + return nvmf_ctrlr_set_features_keep_alive_timer(req); + case SPDK_NVME_FEAT_HOST_IDENTIFIER: + return nvmf_ctrlr_set_features_host_identifier(req); + case SPDK_NVME_FEAT_HOST_RESERVE_MASK: + return nvmf_ctrlr_set_features_reservation_notification_mask(req); + case SPDK_NVME_FEAT_HOST_RESERVE_PERSIST: + return nvmf_ctrlr_set_features_reservation_persistence(req); + default: + SPDK_ERRLOG("Set Features command with unsupported feature ID 0x%02x\n", feature); + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } +} + +static int +nvmf_ctrlr_keep_alive(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Keep Alive\n"); + /* + * To handle keep alive just clear or reset the + * ctrlr based keep alive duration counter. + * When added, a separate timer based process + * will monitor if the time since last recorded + * keep alive has exceeded the max duration and + * take appropriate action. + */ + ctrlr->last_keep_alive_tick = spdk_get_ticks(); + + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +int +nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; + int rc; + + if (ctrlr == NULL) { + SPDK_ERRLOG("Admin command sent before CONNECT\n"); + response->status.sct = SPDK_NVME_SCT_GENERIC; + response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + if (ctrlr->vcprop.cc.bits.en != 1) { + SPDK_ERRLOG("Admin command sent to disabled controller\n"); + response->status.sct = SPDK_NVME_SCT_GENERIC; + response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + if (req->data && spdk_nvme_opc_get_data_transfer(cmd->opc) == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { + memset(req->data, 0, req->length); + } + + if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) { + /* Discovery controllers only support Get Log Page, Identify and Keep Alive. */ + switch (cmd->opc) { + case SPDK_NVME_OPC_IDENTIFY: + case SPDK_NVME_OPC_GET_LOG_PAGE: + case SPDK_NVME_OPC_KEEP_ALIVE: + break; + default: + goto invalid_opcode; + } + } + + /* Call a custom adm cmd handler if set. Aborts are handled in a different path (see nvmf_passthru_admin_cmd) */ + if (g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].hdlr && cmd->opc != SPDK_NVME_OPC_ABORT) { + rc = g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].hdlr(req); + if (rc >= SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) { + /* The handler took care of this commmand */ + return rc; + } + } + + switch (cmd->opc) { + case SPDK_NVME_OPC_GET_LOG_PAGE: + return nvmf_ctrlr_get_log_page(req); + case SPDK_NVME_OPC_IDENTIFY: + return nvmf_ctrlr_identify(req); + case SPDK_NVME_OPC_ABORT: + return nvmf_ctrlr_abort(req); + case SPDK_NVME_OPC_GET_FEATURES: + return nvmf_ctrlr_get_features(req); + case SPDK_NVME_OPC_SET_FEATURES: + return nvmf_ctrlr_set_features(req); + case SPDK_NVME_OPC_ASYNC_EVENT_REQUEST: + return nvmf_ctrlr_async_event_request(req); + case SPDK_NVME_OPC_KEEP_ALIVE: + return nvmf_ctrlr_keep_alive(req); + + case SPDK_NVME_OPC_CREATE_IO_SQ: + case SPDK_NVME_OPC_CREATE_IO_CQ: + case SPDK_NVME_OPC_DELETE_IO_SQ: + case SPDK_NVME_OPC_DELETE_IO_CQ: + /* Create and Delete I/O CQ/SQ not allowed in NVMe-oF */ + goto invalid_opcode; + + default: + goto invalid_opcode; + } + +invalid_opcode: + SPDK_ERRLOG("Unsupported admin opcode 0x%x\n", cmd->opc); + response->status.sct = SPDK_NVME_SCT_GENERIC; + response->status.sc = SPDK_NVME_SC_INVALID_OPCODE; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +int +nvmf_ctrlr_process_fabrics_cmd(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_qpair *qpair = req->qpair; + struct spdk_nvmf_capsule_cmd *cap_hdr; + + cap_hdr = &req->cmd->nvmf_cmd; + + if (qpair->ctrlr == NULL) { + /* No ctrlr established yet; the only valid command is Connect */ + if (cap_hdr->fctype == SPDK_NVMF_FABRIC_COMMAND_CONNECT) { + return nvmf_ctrlr_cmd_connect(req); + } else { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Got fctype 0x%x, expected Connect\n", + cap_hdr->fctype); + req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; + req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + } else if (nvmf_qpair_is_admin_queue(qpair)) { + /* + * Controller session is established, and this is an admin queue. + * Disallow Connect and allow other fabrics commands. + */ + switch (cap_hdr->fctype) { + case SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET: + return nvmf_property_set(req); + case SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET: + return nvmf_property_get(req); + default: + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "unknown fctype 0x%02x\n", + cap_hdr->fctype); + req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; + req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + } else { + /* Controller session is established, and this is an I/O queue */ + /* For now, no I/O-specific Fabrics commands are implemented (other than Connect) */ + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Unexpected I/O fctype 0x%x\n", cap_hdr->fctype); + req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; + req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } +} + +static inline int +nvmf_ctrlr_async_event_notification(struct spdk_nvmf_ctrlr *ctrlr, + union spdk_nvme_async_event_completion *event) +{ + struct spdk_nvmf_request *req; + struct spdk_nvme_cpl *rsp; + + assert(ctrlr->nr_aer_reqs > 0); + + req = ctrlr->aer_req[--ctrlr->nr_aer_reqs]; + rsp = &req->rsp->nvme_cpl; + + rsp->cdw0 = event->raw; + + _nvmf_request_complete(req); + ctrlr->aer_req[ctrlr->nr_aer_reqs] = NULL; + + return 0; +} + +int +nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr) +{ + union spdk_nvme_async_event_completion event = {0}; + + /* Users may disable the event notification */ + if (!ctrlr->feat.async_event_configuration.bits.ns_attr_notice) { + return 0; + } + + event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE; + event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED; + event.bits.log_page_identifier = SPDK_NVME_LOG_CHANGED_NS_LIST; + + /* If there is no outstanding AER request, queue the event. Then + * if an AER is later submitted, this event can be sent as a + * response. + */ + if (ctrlr->nr_aer_reqs == 0) { + if (ctrlr->notice_event.bits.async_event_type == + SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) { + return 0; + } + + ctrlr->notice_event.raw = event.raw; + return 0; + } + + return nvmf_ctrlr_async_event_notification(ctrlr, &event); +} + +void +nvmf_ctrlr_async_event_reservation_notification(struct spdk_nvmf_ctrlr *ctrlr) +{ + union spdk_nvme_async_event_completion event = {0}; + + if (!ctrlr->num_avail_log_pages) { + return; + } + event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_IO; + event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_RESERVATION_LOG_AVAIL; + event.bits.log_page_identifier = SPDK_NVME_LOG_RESERVATION_NOTIFICATION; + + /* If there is no outstanding AER request, queue the event. Then + * if an AER is later submitted, this event can be sent as a + * response. + */ + if (ctrlr->nr_aer_reqs == 0) { + if (ctrlr->reservation_event.bits.async_event_type == + SPDK_NVME_ASYNC_EVENT_TYPE_IO) { + return; + } + + ctrlr->reservation_event.raw = event.raw; + return; + } + + nvmf_ctrlr_async_event_notification(ctrlr, &event); +} + +void +nvmf_qpair_free_aer(struct spdk_nvmf_qpair *qpair) +{ + struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr; + int i; + + if (!nvmf_qpair_is_admin_queue(qpair)) { + return; + } + + for (i = 0; i < ctrlr->nr_aer_reqs; i++) { + spdk_nvmf_request_free(ctrlr->aer_req[i]); + ctrlr->aer_req[i] = NULL; + } + + ctrlr->nr_aer_reqs = 0; +} + +void +nvmf_ctrlr_abort_aer(struct spdk_nvmf_ctrlr *ctrlr) +{ + struct spdk_nvmf_request *req; + int i; + + for (i = 0; i < ctrlr->nr_aer_reqs; i++) { + req = ctrlr->aer_req[i]; + + req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; + req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST; + _nvmf_request_complete(req); + + ctrlr->aer_req[i] = NULL; + } + + ctrlr->nr_aer_reqs = 0; +} + +static void +_nvmf_ctrlr_add_reservation_log(void *ctx) +{ + struct spdk_nvmf_reservation_log *log = (struct spdk_nvmf_reservation_log *)ctx; + struct spdk_nvmf_ctrlr *ctrlr = log->ctrlr; + + ctrlr->log_page_count++; + + /* Maximum number of queued log pages is 255 */ + if (ctrlr->num_avail_log_pages == 0xff) { + struct spdk_nvmf_reservation_log *entry; + entry = TAILQ_LAST(&ctrlr->log_head, log_page_head); + entry->log.log_page_count = ctrlr->log_page_count; + free(log); + return; + } + + log->log.log_page_count = ctrlr->log_page_count; + log->log.num_avail_log_pages = ctrlr->num_avail_log_pages++; + TAILQ_INSERT_TAIL(&ctrlr->log_head, log, link); + + nvmf_ctrlr_async_event_reservation_notification(ctrlr); +} + +void +nvmf_ctrlr_reservation_notice_log(struct spdk_nvmf_ctrlr *ctrlr, + struct spdk_nvmf_ns *ns, + enum spdk_nvme_reservation_notification_log_page_type type) +{ + struct spdk_nvmf_reservation_log *log; + + switch (type) { + case SPDK_NVME_RESERVATION_LOG_PAGE_EMPTY: + return; + case SPDK_NVME_REGISTRATION_PREEMPTED: + if (ns->mask & SPDK_NVME_REGISTRATION_PREEMPTED_MASK) { + return; + } + break; + case SPDK_NVME_RESERVATION_RELEASED: + if (ns->mask & SPDK_NVME_RESERVATION_RELEASED_MASK) { + return; + } + break; + case SPDK_NVME_RESERVATION_PREEMPTED: + if (ns->mask & SPDK_NVME_RESERVATION_PREEMPTED_MASK) { + return; + } + break; + default: + return; + } + + log = calloc(1, sizeof(*log)); + if (!log) { + SPDK_ERRLOG("Alloc log page failed, ignore the log\n"); + return; + } + log->ctrlr = ctrlr; + log->log.type = type; + log->log.nsid = ns->nsid; + + spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_add_reservation_log, log); +} + +/* Check from subsystem poll group's namespace information data structure */ +static bool +nvmf_ns_info_ctrlr_is_registrant(struct spdk_nvmf_subsystem_pg_ns_info *ns_info, + struct spdk_nvmf_ctrlr *ctrlr) +{ + uint32_t i; + + for (i = 0; i < SPDK_NVMF_MAX_NUM_REGISTRANTS; i++) { + if (!spdk_uuid_compare(&ns_info->reg_hostid[i], &ctrlr->hostid)) { + return true; + } + } + + return false; +} + +/* + * Check the NVMe command is permitted or not for current controller(Host). + */ +static int +nvmf_ns_reservation_request_check(struct spdk_nvmf_subsystem_pg_ns_info *ns_info, + struct spdk_nvmf_ctrlr *ctrlr, + struct spdk_nvmf_request *req) +{ + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + enum spdk_nvme_reservation_type rtype = ns_info->rtype; + uint8_t status = SPDK_NVME_SC_SUCCESS; + uint8_t racqa; + bool is_registrant; + + /* No valid reservation */ + if (!rtype) { + return 0; + } + + is_registrant = nvmf_ns_info_ctrlr_is_registrant(ns_info, ctrlr); + /* All registrants type and current ctrlr is a valid registrant */ + if ((rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_ALL_REGS || + rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS) && is_registrant) { + return 0; + } else if (!spdk_uuid_compare(&ns_info->holder_id, &ctrlr->hostid)) { + return 0; + } + + /* Non-holder for current controller */ + switch (cmd->opc) { + case SPDK_NVME_OPC_READ: + case SPDK_NVME_OPC_COMPARE: + if (rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS) { + status = SPDK_NVME_SC_RESERVATION_CONFLICT; + goto exit; + } + if ((rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_REG_ONLY || + rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS) && !is_registrant) { + status = SPDK_NVME_SC_RESERVATION_CONFLICT; + } + break; + case SPDK_NVME_OPC_FLUSH: + case SPDK_NVME_OPC_WRITE: + case SPDK_NVME_OPC_WRITE_UNCORRECTABLE: + case SPDK_NVME_OPC_WRITE_ZEROES: + case SPDK_NVME_OPC_DATASET_MANAGEMENT: + if (rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE || + rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS) { + status = SPDK_NVME_SC_RESERVATION_CONFLICT; + goto exit; + } + if (!is_registrant) { + status = SPDK_NVME_SC_RESERVATION_CONFLICT; + } + break; + case SPDK_NVME_OPC_RESERVATION_ACQUIRE: + racqa = cmd->cdw10_bits.resv_acquire.racqa; + if (racqa == SPDK_NVME_RESERVE_ACQUIRE) { + status = SPDK_NVME_SC_RESERVATION_CONFLICT; + goto exit; + } + if (!is_registrant) { + status = SPDK_NVME_SC_RESERVATION_CONFLICT; + } + break; + case SPDK_NVME_OPC_RESERVATION_RELEASE: + if (!is_registrant) { + status = SPDK_NVME_SC_RESERVATION_CONFLICT; + } + break; + default: + break; + } + +exit: + req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; + req->rsp->nvme_cpl.status.sc = status; + if (status == SPDK_NVME_SC_RESERVATION_CONFLICT) { + return -EPERM; + } + + return 0; +} + +static int +nvmf_ctrlr_process_io_fused_cmd(struct spdk_nvmf_request *req, struct spdk_bdev *bdev, + struct spdk_bdev_desc *desc, struct spdk_io_channel *ch) +{ + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + struct spdk_nvmf_request *first_fused_req = req->qpair->first_fused_req; + int rc; + + if (cmd->fuse == SPDK_NVME_CMD_FUSE_FIRST) { + /* first fused operation (should be compare) */ + if (first_fused_req != NULL) { + struct spdk_nvme_cpl *fused_response = &first_fused_req->rsp->nvme_cpl; + + SPDK_ERRLOG("Wrong sequence of fused operations\n"); + + /* abort req->qpair->first_fused_request and continue with new fused command */ + fused_response->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED; + fused_response->status.sct = SPDK_NVME_SCT_GENERIC; + _nvmf_request_complete(first_fused_req); + } else if (cmd->opc != SPDK_NVME_OPC_COMPARE) { + SPDK_ERRLOG("Wrong op code of fused operations\n"); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_INVALID_OPCODE; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + req->qpair->first_fused_req = req; + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; + } else if (cmd->fuse == SPDK_NVME_CMD_FUSE_SECOND) { + /* second fused operation (should be write) */ + if (first_fused_req == NULL) { + SPDK_ERRLOG("Wrong sequence of fused operations\n"); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } else if (cmd->opc != SPDK_NVME_OPC_WRITE) { + struct spdk_nvme_cpl *fused_response = &first_fused_req->rsp->nvme_cpl; + + SPDK_ERRLOG("Wrong op code of fused operations\n"); + + /* abort req->qpair->first_fused_request and fail current command */ + fused_response->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED; + fused_response->status.sct = SPDK_NVME_SCT_GENERIC; + _nvmf_request_complete(first_fused_req); + + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_INVALID_OPCODE; + req->qpair->first_fused_req = NULL; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + /* save request of first command to generate response later */ + req->first_fused_req = first_fused_req; + req->qpair->first_fused_req = NULL; + } else { + SPDK_ERRLOG("Invalid fused command fuse field.\n"); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + rc = nvmf_bdev_ctrlr_compare_and_write_cmd(bdev, desc, ch, req->first_fused_req, req); + + if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) { + if (spdk_nvme_cpl_is_error(rsp)) { + struct spdk_nvme_cpl *fused_response = &first_fused_req->rsp->nvme_cpl; + + fused_response->status = rsp->status; + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_ABORTED_FAILED_FUSED; + /* Complete first of fused commands. Second will be completed by upper layer */ + _nvmf_request_complete(first_fused_req); + req->first_fused_req = NULL; + } + } + + return rc; +} + +int +nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req) +{ + uint32_t nsid; + struct spdk_nvmf_ns *ns; + struct spdk_bdev *bdev; + struct spdk_bdev_desc *desc; + struct spdk_io_channel *ch; + struct spdk_nvmf_poll_group *group = req->qpair->group; + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; + struct spdk_nvmf_subsystem_pg_ns_info *ns_info; + + /* pre-set response details for this command */ + response->status.sc = SPDK_NVME_SC_SUCCESS; + nsid = cmd->nsid; + + if (spdk_unlikely(ctrlr == NULL)) { + SPDK_ERRLOG("I/O command sent before CONNECT\n"); + response->status.sct = SPDK_NVME_SCT_GENERIC; + response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + if (spdk_unlikely(ctrlr->vcprop.cc.bits.en != 1)) { + SPDK_ERRLOG("I/O command sent to disabled controller\n"); + response->status.sct = SPDK_NVME_SCT_GENERIC; + response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid); + if (ns == NULL || ns->bdev == NULL) { + SPDK_ERRLOG("Unsuccessful query for nsid %u\n", cmd->nsid); + response->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT; + response->status.dnr = 1; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + /* scan-build falsely reporting dereference of null pointer */ + assert(group != NULL && group->sgroups != NULL); + ns_info = &group->sgroups[ctrlr->subsys->id].ns_info[nsid - 1]; + if (nvmf_ns_reservation_request_check(ns_info, ctrlr, req)) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Reservation Conflict for nsid %u, opcode %u\n", + cmd->nsid, cmd->opc); + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + bdev = ns->bdev; + desc = ns->desc; + ch = ns_info->channel; + + if (spdk_unlikely(cmd->fuse & SPDK_NVME_CMD_FUSE_MASK)) { + return nvmf_ctrlr_process_io_fused_cmd(req, bdev, desc, ch); + } else if (spdk_unlikely(req->qpair->first_fused_req != NULL)) { + struct spdk_nvme_cpl *fused_response = &req->qpair->first_fused_req->rsp->nvme_cpl; + + SPDK_ERRLOG("Expected second of fused commands - failing first of fused commands\n"); + + /* abort req->qpair->first_fused_request and continue with new command */ + fused_response->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED; + fused_response->status.sct = SPDK_NVME_SCT_GENERIC; + _nvmf_request_complete(req->qpair->first_fused_req); + req->qpair->first_fused_req = NULL; + } + + switch (cmd->opc) { + case SPDK_NVME_OPC_READ: + return nvmf_bdev_ctrlr_read_cmd(bdev, desc, ch, req); + case SPDK_NVME_OPC_WRITE: + return nvmf_bdev_ctrlr_write_cmd(bdev, desc, ch, req); + case SPDK_NVME_OPC_COMPARE: + return nvmf_bdev_ctrlr_compare_cmd(bdev, desc, ch, req); + case SPDK_NVME_OPC_WRITE_ZEROES: + return nvmf_bdev_ctrlr_write_zeroes_cmd(bdev, desc, ch, req); + case SPDK_NVME_OPC_FLUSH: + return nvmf_bdev_ctrlr_flush_cmd(bdev, desc, ch, req); + case SPDK_NVME_OPC_DATASET_MANAGEMENT: + return nvmf_bdev_ctrlr_dsm_cmd(bdev, desc, ch, req); + case SPDK_NVME_OPC_RESERVATION_REGISTER: + case SPDK_NVME_OPC_RESERVATION_ACQUIRE: + case SPDK_NVME_OPC_RESERVATION_RELEASE: + case SPDK_NVME_OPC_RESERVATION_REPORT: + spdk_thread_send_msg(ctrlr->subsys->thread, nvmf_ns_reservation_request, req); + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; + default: + return nvmf_bdev_ctrlr_nvme_passthru_io(bdev, desc, ch, req); + } +} + +static void +nvmf_qpair_request_cleanup(struct spdk_nvmf_qpair *qpair) +{ + if (qpair->state == SPDK_NVMF_QPAIR_DEACTIVATING) { + assert(qpair->state_cb != NULL); + + if (TAILQ_EMPTY(&qpair->outstanding)) { + qpair->state_cb(qpair->state_cb_arg, 0); + } + } +} + +int +spdk_nvmf_request_free(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_qpair *qpair = req->qpair; + + TAILQ_REMOVE(&qpair->outstanding, req, link); + if (nvmf_transport_req_free(req)) { + SPDK_ERRLOG("Unable to free transport level request resources.\n"); + } + + nvmf_qpair_request_cleanup(qpair); + + return 0; +} + +static void +_nvmf_request_complete(void *ctx) +{ + struct spdk_nvmf_request *req = ctx; + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + struct spdk_nvmf_qpair *qpair; + struct spdk_nvmf_subsystem_poll_group *sgroup = NULL; + bool is_aer = false; + + rsp->sqid = 0; + rsp->status.p = 0; + rsp->cid = req->cmd->nvme_cmd.cid; + + qpair = req->qpair; + if (qpair->ctrlr) { + sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id]; + assert(sgroup != NULL); + is_aer = req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST; + } else if (spdk_unlikely(nvmf_request_is_fabric_connect(req))) { + sgroup = nvmf_subsystem_pg_from_connect_cmd(req); + } + + if (SPDK_DEBUGLOG_FLAG_ENABLED("nvmf")) { + spdk_nvme_print_completion(qpair->qid, rsp); + } + + TAILQ_REMOVE(&qpair->outstanding, req, link); + if (nvmf_transport_req_complete(req)) { + SPDK_ERRLOG("Transport request completion error!\n"); + } + + /* AER cmd is an exception */ + if (sgroup && !is_aer) { + assert(sgroup->io_outstanding > 0); + sgroup->io_outstanding--; + if (sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSING && + sgroup->io_outstanding == 0) { + sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED; + sgroup->cb_fn(sgroup->cb_arg, 0); + } + } + + nvmf_qpair_request_cleanup(qpair); +} + +int +spdk_nvmf_request_complete(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_qpair *qpair = req->qpair; + + if (spdk_likely(qpair->group->thread == spdk_get_thread())) { + _nvmf_request_complete(req); + } else { + spdk_thread_send_msg(qpair->group->thread, + _nvmf_request_complete, req); + } + + return 0; +} + +static void +_nvmf_request_exec(struct spdk_nvmf_request *req, + struct spdk_nvmf_subsystem_poll_group *sgroup) +{ + struct spdk_nvmf_qpair *qpair = req->qpair; + enum spdk_nvmf_request_exec_status status; + + if (SPDK_DEBUGLOG_FLAG_ENABLED("nvmf")) { + spdk_nvme_print_command(qpair->qid, &req->cmd->nvme_cmd); + } + + if (sgroup) { + sgroup->io_outstanding++; + } + + /* Place the request on the outstanding list so we can keep track of it */ + TAILQ_INSERT_TAIL(&qpair->outstanding, req, link); + + if (spdk_unlikely(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC)) { + status = nvmf_ctrlr_process_fabrics_cmd(req); + } else if (spdk_unlikely(nvmf_qpair_is_admin_queue(qpair))) { + status = nvmf_ctrlr_process_admin_cmd(req); + } else { + status = nvmf_ctrlr_process_io_cmd(req); + } + + if (status == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) { + _nvmf_request_complete(req); + } +} + +void +spdk_nvmf_request_exec_fabrics(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_qpair *qpair = req->qpair; + struct spdk_nvmf_subsystem_poll_group *sgroup = NULL; + + assert(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC); + + if (qpair->ctrlr) { + sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id]; + assert(sgroup != NULL); + } else { + sgroup = nvmf_subsystem_pg_from_connect_cmd(req); + } + + _nvmf_request_exec(req, sgroup); +} + +void +spdk_nvmf_request_exec(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_qpair *qpair = req->qpair; + struct spdk_nvmf_subsystem_poll_group *sgroup = NULL; + + if (qpair->ctrlr) { + sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id]; + assert(sgroup != NULL); + } else if (spdk_unlikely(nvmf_request_is_fabric_connect(req))) { + sgroup = nvmf_subsystem_pg_from_connect_cmd(req); + } + + if (qpair->state != SPDK_NVMF_QPAIR_ACTIVE) { + req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; + req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR; + /* Place the request on the outstanding list so we can keep track of it */ + TAILQ_INSERT_TAIL(&qpair->outstanding, req, link); + /* Still increment io_outstanding because request_complete decrements it */ + if (sgroup != NULL) { + sgroup->io_outstanding++; + } + _nvmf_request_complete(req); + return; + } + + /* Check if the subsystem is paused (if there is a subsystem) */ + if (sgroup != NULL) { + if (sgroup->state != SPDK_NVMF_SUBSYSTEM_ACTIVE) { + /* The subsystem is not currently active. Queue this request. */ + TAILQ_INSERT_TAIL(&sgroup->queued, req, link); + return; + } + } + + _nvmf_request_exec(req, sgroup); +} + +static bool +nvmf_ctrlr_get_dif_ctx(struct spdk_nvmf_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd, + struct spdk_dif_ctx *dif_ctx) +{ + struct spdk_nvmf_ns *ns; + struct spdk_bdev *bdev; + + if (ctrlr == NULL || cmd == NULL) { + return false; + } + + ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid); + if (ns == NULL || ns->bdev == NULL) { + return false; + } + + bdev = ns->bdev; + + switch (cmd->opc) { + case SPDK_NVME_OPC_READ: + case SPDK_NVME_OPC_WRITE: + case SPDK_NVME_OPC_COMPARE: + return nvmf_bdev_ctrlr_get_dif_ctx(bdev, cmd, dif_ctx); + default: + break; + } + + return false; +} + +bool +spdk_nvmf_request_get_dif_ctx(struct spdk_nvmf_request *req, struct spdk_dif_ctx *dif_ctx) +{ + struct spdk_nvmf_qpair *qpair = req->qpair; + struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr; + + if (spdk_likely(ctrlr == NULL || !ctrlr->dif_insert_or_strip)) { + return false; + } + + if (spdk_unlikely(qpair->state != SPDK_NVMF_QPAIR_ACTIVE)) { + return false; + } + + if (spdk_unlikely(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC)) { + return false; + } + + if (spdk_unlikely(nvmf_qpair_is_admin_queue(qpair))) { + return false; + } + + return nvmf_ctrlr_get_dif_ctx(ctrlr, &req->cmd->nvme_cmd, dif_ctx); +} + +void +spdk_nvmf_set_custom_admin_cmd_hdlr(uint8_t opc, spdk_nvmf_custom_cmd_hdlr hdlr) +{ + g_nvmf_custom_admin_cmd_hdlrs[opc].hdlr = hdlr; +} + +static int +nvmf_passthru_admin_cmd(struct spdk_nvmf_request *req) +{ + struct spdk_bdev *bdev; + struct spdk_bdev_desc *desc; + struct spdk_io_channel *ch; + struct spdk_nvme_cmd *cmd = spdk_nvmf_request_get_cmd(req); + struct spdk_nvme_cpl *response = spdk_nvmf_request_get_response(req); + uint32_t bdev_nsid; + int rc; + + if (g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].nsid == 0) { + bdev_nsid = cmd->nsid; + } else { + bdev_nsid = g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].nsid; + } + + rc = spdk_nvmf_request_get_bdev(bdev_nsid, req, &bdev, &desc, &ch); + if (rc) { + response->status.sct = SPDK_NVME_SCT_GENERIC; + response->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + return spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(bdev, desc, ch, req, NULL); +} + +void +spdk_nvmf_set_passthru_admin_cmd(uint8_t opc, uint32_t forward_nsid) +{ + g_nvmf_custom_admin_cmd_hdlrs[opc].hdlr = nvmf_passthru_admin_cmd; + g_nvmf_custom_admin_cmd_hdlrs[opc].nsid = forward_nsid; +} + +int +spdk_nvmf_request_get_bdev(uint32_t nsid, struct spdk_nvmf_request *req, + struct spdk_bdev **bdev, struct spdk_bdev_desc **desc, struct spdk_io_channel **ch) +{ + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct spdk_nvmf_ns *ns; + struct spdk_nvmf_poll_group *group = req->qpair->group; + struct spdk_nvmf_subsystem_pg_ns_info *ns_info; + + *bdev = NULL; + *desc = NULL; + *ch = NULL; + + ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid); + if (ns == NULL || ns->bdev == NULL) { + return -EINVAL; + } + + assert(group != NULL && group->sgroups != NULL); + ns_info = &group->sgroups[ctrlr->subsys->id].ns_info[nsid - 1]; + *bdev = ns->bdev; + *desc = ns->desc; + *ch = ns_info->channel; + + return 0; +} + +struct spdk_nvmf_ctrlr *spdk_nvmf_request_get_ctrlr(struct spdk_nvmf_request *req) +{ + return req->qpair->ctrlr; +} + +struct spdk_nvme_cmd *spdk_nvmf_request_get_cmd(struct spdk_nvmf_request *req) +{ + return &req->cmd->nvme_cmd; +} + +struct spdk_nvme_cpl *spdk_nvmf_request_get_response(struct spdk_nvmf_request *req) +{ + return &req->rsp->nvme_cpl; +} + +struct spdk_nvmf_subsystem *spdk_nvmf_request_get_subsystem(struct spdk_nvmf_request *req) +{ + return req->qpair->ctrlr->subsys; +} + +void spdk_nvmf_request_get_data(struct spdk_nvmf_request *req, void **data, uint32_t *length) +{ + *data = req->data; + *length = req->length; +} + +struct spdk_nvmf_subsystem *spdk_nvmf_ctrlr_get_subsystem(struct spdk_nvmf_ctrlr *ctrlr) +{ + return ctrlr->subsys; +} + +uint16_t spdk_nvmf_ctrlr_get_id(struct spdk_nvmf_ctrlr *ctrlr) +{ + return ctrlr->cntlid; +} + +struct spdk_nvmf_request *spdk_nvmf_request_get_req_to_abort(struct spdk_nvmf_request *req) +{ + return req->req_to_abort; +} diff --git a/src/spdk/lib/nvmf/ctrlr_bdev.c b/src/spdk/lib/nvmf/ctrlr_bdev.c new file mode 100644 index 000000000..13e0a4309 --- /dev/null +++ b/src/spdk/lib/nvmf/ctrlr_bdev.c @@ -0,0 +1,761 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "nvmf_internal.h" + +#include "spdk/bdev.h" +#include "spdk/endian.h" +#include "spdk/thread.h" +#include "spdk/likely.h" +#include "spdk/nvme.h" +#include "spdk/nvmf_cmd.h" +#include "spdk/nvmf_spec.h" +#include "spdk/trace.h" +#include "spdk/scsi_spec.h" +#include "spdk/string.h" +#include "spdk/util.h" + +#include "spdk_internal/log.h" + +static bool +nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem, + enum spdk_bdev_io_type io_type) +{ + struct spdk_nvmf_ns *ns; + + for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL; + ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) { + if (ns->bdev == NULL) { + continue; + } + + if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, + "Subsystem %s namespace %u (%s) does not support io_type %d\n", + spdk_nvmf_subsystem_get_nqn(subsystem), + ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type); + return false; + } + } + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "All devices in Subsystem %s support io_type %d\n", + spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type); + return true; +} + +bool +nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr) +{ + return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP); +} + +bool +nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr) +{ + return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES); +} + +static void +nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success, + void *cb_arg) +{ + struct spdk_nvmf_request *req = cb_arg; + struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; + int first_sc = 0, first_sct = 0, second_sc = 0, second_sct = 0; + uint32_t cdw0 = 0; + struct spdk_nvmf_request *first_req = req->first_fused_req; + + if (spdk_unlikely(first_req != NULL)) { + /* fused commands - get status for both operations */ + struct spdk_nvme_cpl *fused_response = &first_req->rsp->nvme_cpl; + + spdk_bdev_io_get_nvme_fused_status(bdev_io, &cdw0, &second_sct, &second_sc, &first_sct, &first_sc); + fused_response->cdw0 = cdw0; + fused_response->status.sc = second_sc; + fused_response->status.sct = second_sct; + + /* first request should be completed */ + spdk_nvmf_request_complete(first_req); + req->first_fused_req = NULL; + } else { + spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &first_sct, &first_sc); + } + + response->cdw0 = cdw0; + response->status.sc = first_sc; + response->status.sct = first_sct; + + spdk_nvmf_request_complete(req); + spdk_bdev_free_io(bdev_io); +} + +static void +nvmf_bdev_ctrlr_complete_admin_cmd(struct spdk_bdev_io *bdev_io, bool success, + void *cb_arg) +{ + struct spdk_nvmf_request *req = cb_arg; + + if (req->cmd_cb_fn) { + req->cmd_cb_fn(req); + } + + nvmf_bdev_ctrlr_complete_cmd(bdev_io, success, req); +} + +void +nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata, + bool dif_insert_or_strip) +{ + struct spdk_bdev *bdev = ns->bdev; + uint64_t num_blocks; + + num_blocks = spdk_bdev_get_num_blocks(bdev); + + nsdata->nsze = num_blocks; + nsdata->ncap = num_blocks; + nsdata->nuse = num_blocks; + nsdata->nlbaf = 0; + nsdata->flbas.format = 0; + nsdata->nacwu = spdk_bdev_get_acwu(bdev); + if (!dif_insert_or_strip) { + nsdata->lbaf[0].ms = spdk_bdev_get_md_size(bdev); + nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev)); + if (nsdata->lbaf[0].ms != 0) { + nsdata->flbas.extended = 1; + nsdata->mc.extended = 1; + nsdata->mc.pointer = 0; + nsdata->dps.md_start = spdk_bdev_is_dif_head_of_md(bdev); + + switch (spdk_bdev_get_dif_type(bdev)) { + case SPDK_DIF_TYPE1: + nsdata->dpc.pit1 = 1; + nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE1; + break; + case SPDK_DIF_TYPE2: + nsdata->dpc.pit2 = 1; + nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE2; + break; + case SPDK_DIF_TYPE3: + nsdata->dpc.pit3 = 1; + nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE3; + break; + default: + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Protection Disabled\n"); + nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_DISABLE; + break; + } + } + } else { + nsdata->lbaf[0].ms = 0; + nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_data_block_size(bdev)); + } + nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev); + nsdata->nmic.can_share = 1; + if (ns->ptpl_file != NULL) { + nsdata->nsrescap.rescap.persist = 1; + } + nsdata->nsrescap.rescap.write_exclusive = 1; + nsdata->nsrescap.rescap.exclusive_access = 1; + nsdata->nsrescap.rescap.write_exclusive_reg_only = 1; + nsdata->nsrescap.rescap.exclusive_access_reg_only = 1; + nsdata->nsrescap.rescap.write_exclusive_all_reg = 1; + nsdata->nsrescap.rescap.exclusive_access_all_reg = 1; + nsdata->nsrescap.rescap.ignore_existing_key = 1; + + SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch"); + memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid)); + + SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch"); + memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64)); +} + +static void +nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba, + uint64_t *num_blocks) +{ + /* SLBA: CDW10 and CDW11 */ + *start_lba = from_le64(&cmd->cdw10); + + /* NLB: CDW12 bits 15:00, 0's based */ + *num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1; +} + +static bool +nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba, + uint64_t io_num_blocks) +{ + if (io_start_lba + io_num_blocks > bdev_num_blocks || + io_start_lba + io_num_blocks < io_start_lba) { + return false; + } + + return true; +} + +static void +nvmf_ctrlr_process_io_cmd_resubmit(void *arg) +{ + struct spdk_nvmf_request *req = arg; + + nvmf_ctrlr_process_io_cmd(req); +} + +static void +nvmf_ctrlr_process_admin_cmd_resubmit(void *arg) +{ + struct spdk_nvmf_request *req = arg; + + nvmf_ctrlr_process_admin_cmd(req); +} + +static void +nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev, + struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg) +{ + int rc; + + req->bdev_io_wait.bdev = bdev; + req->bdev_io_wait.cb_fn = cb_fn; + req->bdev_io_wait.cb_arg = cb_arg; + + rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait); + if (rc != 0) { + assert(false); + } + req->qpair->group->stat.pending_bdev_io++; +} + +int +nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req) +{ + uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev); + uint32_t block_size = spdk_bdev_get_block_size(bdev); + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + uint64_t start_lba; + uint64_t num_blocks; + int rc; + + nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks); + + if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) { + SPDK_ERRLOG("end of media\n"); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + if (spdk_unlikely(num_blocks * block_size > req->length)) { + SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n", + num_blocks, block_size, req->length); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + rc = spdk_bdev_readv_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks, + nvmf_bdev_ctrlr_complete_cmd, req); + if (spdk_unlikely(rc)) { + if (rc == -ENOMEM) { + nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req); + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; + } + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; +} + +int +nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req) +{ + uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev); + uint32_t block_size = spdk_bdev_get_block_size(bdev); + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + uint64_t start_lba; + uint64_t num_blocks; + int rc; + + nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks); + + if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) { + SPDK_ERRLOG("end of media\n"); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + if (spdk_unlikely(num_blocks * block_size > req->length)) { + SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n", + num_blocks, block_size, req->length); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + rc = spdk_bdev_writev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks, + nvmf_bdev_ctrlr_complete_cmd, req); + if (spdk_unlikely(rc)) { + if (rc == -ENOMEM) { + nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req); + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; + } + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; +} + +int +nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req) +{ + uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev); + uint32_t block_size = spdk_bdev_get_block_size(bdev); + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + uint64_t start_lba; + uint64_t num_blocks; + int rc; + + nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks); + + if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) { + SPDK_ERRLOG("end of media\n"); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + if (spdk_unlikely(num_blocks * block_size > req->length)) { + SPDK_ERRLOG("Compare NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n", + num_blocks, block_size, req->length); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + rc = spdk_bdev_comparev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks, + nvmf_bdev_ctrlr_complete_cmd, req); + if (spdk_unlikely(rc)) { + if (rc == -ENOMEM) { + nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req); + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; + } + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; +} + +int +nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req) +{ + uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev); + uint32_t block_size = spdk_bdev_get_block_size(bdev); + struct spdk_nvme_cmd *cmp_cmd = &cmp_req->cmd->nvme_cmd; + struct spdk_nvme_cmd *write_cmd = &write_req->cmd->nvme_cmd; + struct spdk_nvme_cpl *rsp = &write_req->rsp->nvme_cpl; + uint64_t write_start_lba, cmp_start_lba; + uint64_t write_num_blocks, cmp_num_blocks; + int rc; + + nvmf_bdev_ctrlr_get_rw_params(cmp_cmd, &cmp_start_lba, &cmp_num_blocks); + nvmf_bdev_ctrlr_get_rw_params(write_cmd, &write_start_lba, &write_num_blocks); + + if (spdk_unlikely(write_start_lba != cmp_start_lba || write_num_blocks != cmp_num_blocks)) { + SPDK_ERRLOG("Fused command start lba / num blocks mismatch\n"); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, write_start_lba, + write_num_blocks))) { + SPDK_ERRLOG("end of media\n"); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + if (spdk_unlikely(write_num_blocks * block_size > write_req->length)) { + SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n", + write_num_blocks, block_size, write_req->length); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + rc = spdk_bdev_comparev_and_writev_blocks(desc, ch, cmp_req->iov, cmp_req->iovcnt, write_req->iov, + write_req->iovcnt, write_start_lba, write_num_blocks, nvmf_bdev_ctrlr_complete_cmd, write_req); + if (spdk_unlikely(rc)) { + if (rc == -ENOMEM) { + nvmf_bdev_ctrl_queue_io(cmp_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, cmp_req); + nvmf_bdev_ctrl_queue_io(write_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, write_req); + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; + } + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; +} + +int +nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req) +{ + uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev); + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + uint64_t start_lba; + uint64_t num_blocks; + int rc; + + nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks); + + if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) { + SPDK_ERRLOG("end of media\n"); + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks, + nvmf_bdev_ctrlr_complete_cmd, req); + if (spdk_unlikely(rc)) { + if (rc == -ENOMEM) { + nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req); + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; + } + rsp->status.sct = SPDK_NVME_SCT_GENERIC; + rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; +} + +int +nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req) +{ + struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; + int rc; + + /* As for NVMeoF controller, SPDK always set volatile write + * cache bit to 1, return success for those block devices + * which can't support FLUSH command. + */ + if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) { + response->status.sct = SPDK_NVME_SCT_GENERIC; + response->status.sc = SPDK_NVME_SC_SUCCESS; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev), + nvmf_bdev_ctrlr_complete_cmd, req); + if (spdk_unlikely(rc)) { + if (rc == -ENOMEM) { + nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req); + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; + } + response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; +} + +struct nvmf_bdev_ctrlr_unmap { + struct spdk_nvmf_request *req; + uint32_t count; + struct spdk_bdev_desc *desc; + struct spdk_bdev *bdev; + struct spdk_io_channel *ch; + uint32_t range_index; +}; + +static void +nvmf_bdev_ctrlr_unmap_cpl(struct spdk_bdev_io *bdev_io, bool success, + void *cb_arg) +{ + struct nvmf_bdev_ctrlr_unmap *unmap_ctx = cb_arg; + struct spdk_nvmf_request *req = unmap_ctx->req; + struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; + int sc, sct; + uint32_t cdw0; + + unmap_ctx->count--; + + if (response->status.sct == SPDK_NVME_SCT_GENERIC && + response->status.sc == SPDK_NVME_SC_SUCCESS) { + spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc); + response->cdw0 = cdw0; + response->status.sc = sc; + response->status.sct = sct; + } + + if (unmap_ctx->count == 0) { + spdk_nvmf_request_complete(req); + free(unmap_ctx); + } + spdk_bdev_free_io(bdev_io); +} + +static int +nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req, + struct nvmf_bdev_ctrlr_unmap *unmap_ctx); +static void +nvmf_bdev_ctrlr_unmap_resubmit(void *arg) +{ + struct nvmf_bdev_ctrlr_unmap *unmap_ctx = arg; + struct spdk_nvmf_request *req = unmap_ctx->req; + struct spdk_bdev_desc *desc = unmap_ctx->desc; + struct spdk_bdev *bdev = unmap_ctx->bdev; + struct spdk_io_channel *ch = unmap_ctx->ch; + + nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, unmap_ctx); +} + +static int +nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req, + struct nvmf_bdev_ctrlr_unmap *unmap_ctx) +{ + uint16_t nr, i; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; + struct spdk_nvme_dsm_range *dsm_range; + uint64_t lba; + uint32_t lba_count; + int rc; + + nr = cmd->cdw10_bits.dsm.nr + 1; + if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) { + SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n"); + response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + if (unmap_ctx == NULL) { + unmap_ctx = calloc(1, sizeof(*unmap_ctx)); + if (!unmap_ctx) { + response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + unmap_ctx->req = req; + unmap_ctx->desc = desc; + unmap_ctx->ch = ch; + unmap_ctx->bdev = bdev; + + response->status.sct = SPDK_NVME_SCT_GENERIC; + response->status.sc = SPDK_NVME_SC_SUCCESS; + } else { + unmap_ctx->count--; /* dequeued */ + } + + dsm_range = (struct spdk_nvme_dsm_range *)req->data; + for (i = unmap_ctx->range_index; i < nr; i++) { + lba = dsm_range[i].starting_lba; + lba_count = dsm_range[i].length; + + unmap_ctx->count++; + + rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count, + nvmf_bdev_ctrlr_unmap_cpl, unmap_ctx); + if (rc) { + if (rc == -ENOMEM) { + nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_unmap_resubmit, unmap_ctx); + /* Unmap was not yet submitted to bdev */ + /* unmap_ctx->count will be decremented when the request is dequeued */ + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; + } + response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + unmap_ctx->count--; + /* We can't return here - we may have to wait for any other + * unmaps already sent to complete */ + break; + } + unmap_ctx->range_index++; + } + + if (unmap_ctx->count == 0) { + free(unmap_ctx); + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; +} + +int +nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req) +{ + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; + + if (cmd->cdw11_bits.dsm.ad) { + return nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, NULL); + } + + response->status.sct = SPDK_NVME_SCT_GENERIC; + response->status.sc = SPDK_NVME_SC_SUCCESS; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; +} + +int +nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req) +{ + int rc; + + rc = spdk_bdev_nvme_io_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length, + nvmf_bdev_ctrlr_complete_cmd, req); + if (spdk_unlikely(rc)) { + if (rc == -ENOMEM) { + nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req); + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; + } + req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; + req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; +} + +int +spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req, + spdk_nvmf_nvme_passthru_cmd_cb cb_fn) +{ + int rc; + + req->cmd_cb_fn = cb_fn; + + rc = spdk_bdev_nvme_admin_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length, + nvmf_bdev_ctrlr_complete_admin_cmd, req); + if (spdk_unlikely(rc)) { + if (rc == -ENOMEM) { + nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req); + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; + } + req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; + req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } + + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; +} + +static void +nvmf_bdev_ctrlr_complete_abort_cmd(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct spdk_nvmf_request *req = cb_arg; + + if (success) { + req->rsp->nvme_cpl.cdw0 &= ~1U; + } + + spdk_nvmf_request_complete(req); + spdk_bdev_free_io(bdev_io); +} + +int +spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req, + struct spdk_nvmf_request *req_to_abort) +{ + int rc; + + assert((req->rsp->nvme_cpl.cdw0 & 1U) != 0); + + rc = spdk_bdev_abort(desc, ch, req_to_abort, nvmf_bdev_ctrlr_complete_abort_cmd, req); + if (spdk_likely(rc == 0)) { + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; + } else if (rc == -ENOMEM) { + nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req); + return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; + } else { + return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; + } +} + +bool +nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd, + struct spdk_dif_ctx *dif_ctx) +{ + uint32_t init_ref_tag, dif_check_flags = 0; + int rc; + + if (spdk_bdev_get_md_size(bdev) == 0) { + return false; + } + + /* Initial Reference Tag is the lower 32 bits of the start LBA. */ + init_ref_tag = (uint32_t)from_le64(&cmd->cdw10); + + if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) { + dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK; + } + + if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) { + dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK; + } + + rc = spdk_dif_ctx_init(dif_ctx, + spdk_bdev_get_block_size(bdev), + spdk_bdev_get_md_size(bdev), + spdk_bdev_is_md_interleaved(bdev), + spdk_bdev_is_dif_head_of_md(bdev), + spdk_bdev_get_dif_type(bdev), + dif_check_flags, + init_ref_tag, 0, 0, 0, 0); + + return (rc == 0) ? true : false; +} diff --git a/src/spdk/lib/nvmf/ctrlr_discovery.c b/src/spdk/lib/nvmf/ctrlr_discovery.c new file mode 100644 index 000000000..ab1c46ba1 --- /dev/null +++ b/src/spdk/lib/nvmf/ctrlr_discovery.c @@ -0,0 +1,159 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NVMe over Fabrics discovery service + */ + +#include "spdk/stdinc.h" + +#include "nvmf_internal.h" +#include "transport.h" + +#include "spdk/string.h" +#include "spdk/trace.h" +#include "spdk/nvmf_spec.h" + +#include "spdk/bdev_module.h" +#include "spdk_internal/log.h" + +static struct spdk_nvmf_discovery_log_page * +nvmf_generate_discovery_log(struct spdk_nvmf_tgt *tgt, const char *hostnqn, size_t *log_page_size) +{ + uint64_t numrec = 0; + struct spdk_nvmf_subsystem *subsystem; + struct spdk_nvmf_subsystem_listener *listener; + struct spdk_nvmf_discovery_log_page_entry *entry; + struct spdk_nvmf_discovery_log_page *disc_log; + size_t cur_size; + uint32_t sid; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Generating log page for genctr %" PRIu64 "\n", + tgt->discovery_genctr); + + cur_size = sizeof(struct spdk_nvmf_discovery_log_page); + disc_log = calloc(1, cur_size); + if (disc_log == NULL) { + SPDK_ERRLOG("Discovery log page memory allocation error\n"); + return NULL; + } + + for (sid = 0; sid < tgt->max_subsystems; sid++) { + subsystem = tgt->subsystems[sid]; + if ((subsystem == NULL) || + (subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE) || + (subsystem->state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING)) { + continue; + } + + if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) { + continue; + } + + if (!spdk_nvmf_subsystem_host_allowed(subsystem, hostnqn)) { + continue; + } + + for (listener = spdk_nvmf_subsystem_get_first_listener(subsystem); listener != NULL; + listener = spdk_nvmf_subsystem_get_next_listener(subsystem, listener)) { + size_t new_size = cur_size + sizeof(*entry); + void *new_log_page = realloc(disc_log, new_size); + + if (new_log_page == NULL) { + SPDK_ERRLOG("Discovery log page memory allocation error\n"); + break; + } + + disc_log = new_log_page; + cur_size = new_size; + + entry = &disc_log->entries[numrec]; + memset(entry, 0, sizeof(*entry)); + entry->portid = numrec; + entry->cntlid = 0xffff; + entry->asqsz = listener->transport->opts.max_aq_depth; + entry->subtype = subsystem->subtype; + snprintf(entry->subnqn, sizeof(entry->subnqn), "%s", subsystem->subnqn); + + nvmf_transport_listener_discover(listener->transport, listener->trid, entry); + + numrec++; + } + } + + disc_log->numrec = numrec; + disc_log->genctr = tgt->discovery_genctr; + *log_page_size = cur_size; + + return disc_log; +} + +void +nvmf_get_discovery_log_page(struct spdk_nvmf_tgt *tgt, const char *hostnqn, struct iovec *iov, + uint32_t iovcnt, uint64_t offset, uint32_t length) +{ + size_t copy_len = 0; + size_t zero_len = 0; + struct iovec *tmp; + size_t log_page_size = 0; + struct spdk_nvmf_discovery_log_page *discovery_log_page; + + discovery_log_page = nvmf_generate_discovery_log(tgt, hostnqn, &log_page_size); + + /* Copy the valid part of the discovery log page, if any */ + if (discovery_log_page) { + for (tmp = iov; tmp < iov + iovcnt; tmp++) { + copy_len = spdk_min(tmp->iov_len, length); + copy_len = spdk_min(log_page_size - offset, copy_len); + + memcpy(tmp->iov_base, (char *)discovery_log_page + offset, copy_len); + + offset += copy_len; + length -= copy_len; + zero_len = tmp->iov_len - copy_len; + if (log_page_size <= offset || length == 0) { + break; + } + } + /* Zero out the rest of the payload */ + if (zero_len) { + memset((char *)tmp->iov_base + copy_len, 0, zero_len); + } + + for (++tmp; tmp < iov + iovcnt; tmp++) { + memset((char *)tmp->iov_base, 0, tmp->iov_len); + } + + free(discovery_log_page); + } +} diff --git a/src/spdk/lib/nvmf/fc.c b/src/spdk/lib/nvmf/fc.c new file mode 100644 index 000000000..678cfc681 --- /dev/null +++ b/src/spdk/lib/nvmf/fc.c @@ -0,0 +1,3957 @@ +/* + * BSD LICENSE + * + * Copyright (c) 2018-2019 Broadcom. All Rights Reserved. + * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NVMe_FC transport functions. + */ + +#include "spdk/env.h" +#include "spdk/assert.h" +#include "spdk/nvmf_transport.h" +#include "spdk/string.h" +#include "spdk/trace.h" +#include "spdk/util.h" +#include "spdk/likely.h" +#include "spdk/endian.h" +#include "spdk/log.h" +#include "spdk/thread.h" + +#include "spdk_internal/log.h" + +#include "nvmf_fc.h" +#include "fc_lld.h" + +#ifndef DEV_VERIFY +#define DEV_VERIFY assert +#endif + +#ifndef ASSERT_SPDK_FC_MASTER_THREAD +#define ASSERT_SPDK_FC_MASTER_THREAD() \ + DEV_VERIFY(spdk_get_thread() == nvmf_fc_get_master_thread()); +#endif + +/* + * PRLI service parameters + */ +enum spdk_nvmf_fc_service_parameters { + SPDK_NVMF_FC_FIRST_BURST_SUPPORTED = 0x0001, + SPDK_NVMF_FC_DISCOVERY_SERVICE = 0x0008, + SPDK_NVMF_FC_TARGET_FUNCTION = 0x0010, + SPDK_NVMF_FC_INITIATOR_FUNCTION = 0x0020, + SPDK_NVMF_FC_CONFIRMED_COMPLETION_SUPPORTED = 0x0080, +}; + +static char *fc_req_state_strs[] = { + "SPDK_NVMF_FC_REQ_INIT", + "SPDK_NVMF_FC_REQ_READ_BDEV", + "SPDK_NVMF_FC_REQ_READ_XFER", + "SPDK_NVMF_FC_REQ_READ_RSP", + "SPDK_NVMF_FC_REQ_WRITE_BUFFS", + "SPDK_NVMF_FC_REQ_WRITE_XFER", + "SPDK_NVMF_FC_REQ_WRITE_BDEV", + "SPDK_NVMF_FC_REQ_WRITE_RSP", + "SPDK_NVMF_FC_REQ_NONE_BDEV", + "SPDK_NVMF_FC_REQ_NONE_RSP", + "SPDK_NVMF_FC_REQ_SUCCESS", + "SPDK_NVMF_FC_REQ_FAILED", + "SPDK_NVMF_FC_REQ_ABORTED", + "SPDK_NVMF_FC_REQ_BDEV_ABORTED", + "SPDK_NVMF_FC_REQ_PENDING" +}; + +#define OBJECT_NVMF_FC_IO 0xA0 + +#define TRACE_GROUP_NVMF_FC 0x8 +#define TRACE_FC_REQ_INIT SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x01) +#define TRACE_FC_REQ_READ_BDEV SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x02) +#define TRACE_FC_REQ_READ_XFER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x03) +#define TRACE_FC_REQ_READ_RSP SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x04) +#define TRACE_FC_REQ_WRITE_BUFFS SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x05) +#define TRACE_FC_REQ_WRITE_XFER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x06) +#define TRACE_FC_REQ_WRITE_BDEV SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x07) +#define TRACE_FC_REQ_WRITE_RSP SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x08) +#define TRACE_FC_REQ_NONE_BDEV SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x09) +#define TRACE_FC_REQ_NONE_RSP SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x0A) +#define TRACE_FC_REQ_SUCCESS SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x0B) +#define TRACE_FC_REQ_FAILED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x0C) +#define TRACE_FC_REQ_ABORTED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x0D) +#define TRACE_FC_REQ_BDEV_ABORTED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x0E) +#define TRACE_FC_REQ_PENDING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_FC, 0x0F) + +SPDK_TRACE_REGISTER_FN(nvmf_fc_trace, "nvmf_fc", TRACE_GROUP_NVMF_FC) +{ + spdk_trace_register_object(OBJECT_NVMF_FC_IO, 'r'); + spdk_trace_register_description("FC_REQ_NEW", + TRACE_FC_REQ_INIT, + OWNER_NONE, OBJECT_NVMF_FC_IO, 1, 1, ""); + spdk_trace_register_description("FC_REQ_READ_SUBMIT_TO_BDEV", + TRACE_FC_REQ_READ_BDEV, + OWNER_NONE, OBJECT_NVMF_FC_IO, 0, 1, ""); + spdk_trace_register_description("FC_REQ_READ_XFER_DATA", + TRACE_FC_REQ_READ_XFER, + OWNER_NONE, OBJECT_NVMF_FC_IO, 0, 1, ""); + spdk_trace_register_description("FC_REQ_READ_RSP", + TRACE_FC_REQ_READ_RSP, + OWNER_NONE, OBJECT_NVMF_FC_IO, 0, 1, ""); + spdk_trace_register_description("FC_REQ_WRITE_NEED_BUFFER", + TRACE_FC_REQ_WRITE_BUFFS, + OWNER_NONE, OBJECT_NVMF_FC_IO, 0, 1, ""); + spdk_trace_register_description("FC_REQ_WRITE_XFER_DATA", + TRACE_FC_REQ_WRITE_XFER, + OWNER_NONE, OBJECT_NVMF_FC_IO, 0, 1, ""); + spdk_trace_register_description("FC_REQ_WRITE_SUBMIT_TO_BDEV", + TRACE_FC_REQ_WRITE_BDEV, + OWNER_NONE, OBJECT_NVMF_FC_IO, 0, 1, ""); + spdk_trace_register_description("FC_REQ_WRITE_RSP", + TRACE_FC_REQ_WRITE_RSP, + OWNER_NONE, OBJECT_NVMF_FC_IO, 0, 1, ""); + spdk_trace_register_description("FC_REQ_NONE_SUBMIT_TO_BDEV", + TRACE_FC_REQ_NONE_BDEV, + OWNER_NONE, OBJECT_NVMF_FC_IO, 0, 1, ""); + spdk_trace_register_description("FC_REQ_NONE_RSP", + TRACE_FC_REQ_NONE_RSP, + OWNER_NONE, OBJECT_NVMF_FC_IO, 0, 1, ""); + spdk_trace_register_description("FC_REQ_SUCCESS", + TRACE_FC_REQ_SUCCESS, + OWNER_NONE, OBJECT_NONE, 0, 0, ""); + spdk_trace_register_description("FC_REQ_FAILED", + TRACE_FC_REQ_FAILED, + OWNER_NONE, OBJECT_NONE, 0, 0, ""); + spdk_trace_register_description("FC_REQ_ABORTED", + TRACE_FC_REQ_ABORTED, + OWNER_NONE, OBJECT_NONE, 0, 1, ""); + spdk_trace_register_description("FC_REQ_ABORTED_SUBMIT_TO_BDEV", + TRACE_FC_REQ_BDEV_ABORTED, + OWNER_NONE, OBJECT_NONE, 0, 1, ""); + spdk_trace_register_description("FC_REQ_PENDING", + TRACE_FC_REQ_PENDING, + OWNER_NONE, OBJECT_NONE, 0, 1, ""); +} + +/** + * The structure used by all fc adm functions + */ +struct spdk_nvmf_fc_adm_api_data { + void *api_args; + spdk_nvmf_fc_callback cb_func; +}; + +/** + * The callback structure for nport-delete + */ +struct spdk_nvmf_fc_adm_nport_del_cb_data { + struct spdk_nvmf_fc_nport *nport; + uint8_t port_handle; + spdk_nvmf_fc_callback fc_cb_func; + void *fc_cb_ctx; +}; + +/** + * The callback structure for it-delete + */ +struct spdk_nvmf_fc_adm_i_t_del_cb_data { + struct spdk_nvmf_fc_nport *nport; + struct spdk_nvmf_fc_remote_port_info *rport; + uint8_t port_handle; + spdk_nvmf_fc_callback fc_cb_func; + void *fc_cb_ctx; +}; + + +typedef void (*spdk_nvmf_fc_adm_i_t_delete_assoc_cb_fn)(void *arg, uint32_t err); + +/** + * The callback structure for the it-delete-assoc callback + */ +struct spdk_nvmf_fc_adm_i_t_del_assoc_cb_data { + struct spdk_nvmf_fc_nport *nport; + struct spdk_nvmf_fc_remote_port_info *rport; + uint8_t port_handle; + spdk_nvmf_fc_adm_i_t_delete_assoc_cb_fn cb_func; + void *cb_ctx; +}; + +/* + * Call back function pointer for HW port quiesce. + */ +typedef void (*spdk_nvmf_fc_adm_hw_port_quiesce_cb_fn)(void *ctx, int err); + +/** + * Context structure for quiescing a hardware port + */ +struct spdk_nvmf_fc_adm_hw_port_quiesce_ctx { + int quiesce_count; + void *ctx; + spdk_nvmf_fc_adm_hw_port_quiesce_cb_fn cb_func; +}; + +/** + * Context structure used to reset a hardware port + */ +struct spdk_nvmf_fc_adm_hw_port_reset_ctx { + void *reset_args; + spdk_nvmf_fc_callback reset_cb_func; +}; + +/** + * The callback structure for HW port link break event + */ +struct spdk_nvmf_fc_adm_port_link_break_cb_data { + struct spdk_nvmf_hw_port_link_break_args *args; + struct spdk_nvmf_fc_nport_delete_args nport_del_args; + spdk_nvmf_fc_callback cb_func; +}; + +struct spdk_nvmf_fc_transport { + struct spdk_nvmf_transport transport; + pthread_mutex_t lock; +}; + +static struct spdk_nvmf_fc_transport *g_nvmf_ftransport; + +static TAILQ_HEAD(, spdk_nvmf_fc_port) g_spdk_nvmf_fc_port_list = + TAILQ_HEAD_INITIALIZER(g_spdk_nvmf_fc_port_list); + +static struct spdk_thread *g_nvmf_fc_master_thread = NULL; + +static uint32_t g_nvmf_fgroup_count = 0; +static TAILQ_HEAD(, spdk_nvmf_fc_poll_group) g_nvmf_fgroups = + TAILQ_HEAD_INITIALIZER(g_nvmf_fgroups); + +struct spdk_thread * +nvmf_fc_get_master_thread(void) +{ + return g_nvmf_fc_master_thread; +} + +static inline void +nvmf_fc_record_req_trace_point(struct spdk_nvmf_fc_request *fc_req, + enum spdk_nvmf_fc_request_state state) +{ + uint16_t tpoint_id = SPDK_TRACE_MAX_TPOINT_ID; + + switch (state) { + case SPDK_NVMF_FC_REQ_INIT: + /* Start IO tracing */ + tpoint_id = TRACE_FC_REQ_INIT; + break; + case SPDK_NVMF_FC_REQ_READ_BDEV: + tpoint_id = TRACE_FC_REQ_READ_BDEV; + break; + case SPDK_NVMF_FC_REQ_READ_XFER: + tpoint_id = TRACE_FC_REQ_READ_XFER; + break; + case SPDK_NVMF_FC_REQ_READ_RSP: + tpoint_id = TRACE_FC_REQ_READ_RSP; + break; + case SPDK_NVMF_FC_REQ_WRITE_BUFFS: + tpoint_id = TRACE_FC_REQ_WRITE_BUFFS; + break; + case SPDK_NVMF_FC_REQ_WRITE_XFER: + tpoint_id = TRACE_FC_REQ_WRITE_XFER; + break; + case SPDK_NVMF_FC_REQ_WRITE_BDEV: + tpoint_id = TRACE_FC_REQ_WRITE_BDEV; + break; + case SPDK_NVMF_FC_REQ_WRITE_RSP: + tpoint_id = TRACE_FC_REQ_WRITE_RSP; + break; + case SPDK_NVMF_FC_REQ_NONE_BDEV: + tpoint_id = TRACE_FC_REQ_NONE_BDEV; + break; + case SPDK_NVMF_FC_REQ_NONE_RSP: + tpoint_id = TRACE_FC_REQ_NONE_RSP; + break; + case SPDK_NVMF_FC_REQ_SUCCESS: + tpoint_id = TRACE_FC_REQ_SUCCESS; + break; + case SPDK_NVMF_FC_REQ_FAILED: + tpoint_id = TRACE_FC_REQ_FAILED; + break; + case SPDK_NVMF_FC_REQ_ABORTED: + tpoint_id = TRACE_FC_REQ_ABORTED; + break; + case SPDK_NVMF_FC_REQ_BDEV_ABORTED: + tpoint_id = TRACE_FC_REQ_ABORTED; + break; + case SPDK_NVMF_FC_REQ_PENDING: + tpoint_id = TRACE_FC_REQ_PENDING; + break; + default: + assert(0); + break; + } + if (tpoint_id != SPDK_TRACE_MAX_TPOINT_ID) { + spdk_trace_record(tpoint_id, fc_req->poller_lcore, 0, + (uint64_t)(&fc_req->req), 0); + } +} + +static void +nvmf_fc_handle_connection_failure(void *arg) +{ + struct spdk_nvmf_fc_conn *fc_conn = arg; + struct spdk_nvmf_fc_ls_add_conn_api_data *api_data = NULL; + + if (!fc_conn->create_opd) { + return; + } + api_data = &fc_conn->create_opd->u.add_conn; + + nvmf_fc_ls_add_conn_failure(api_data->assoc, api_data->ls_rqst, + api_data->args.fc_conn, api_data->aq_conn); +} + +static void +nvmf_fc_handle_assoc_deletion(void *arg) +{ + struct spdk_nvmf_fc_conn *fc_conn = arg; + + nvmf_fc_delete_association(fc_conn->fc_assoc->tgtport, + fc_conn->fc_assoc->assoc_id, false, true, NULL, NULL); +} + +static int +nvmf_fc_create_req_mempool(struct spdk_nvmf_fc_hwqp *hwqp) +{ + uint32_t i; + struct spdk_nvmf_fc_request *fc_req; + + TAILQ_INIT(&hwqp->free_reqs); + TAILQ_INIT(&hwqp->in_use_reqs); + + hwqp->fc_reqs_buf = calloc(hwqp->rq_size, sizeof(struct spdk_nvmf_fc_request)); + if (hwqp->fc_reqs_buf == NULL) { + SPDK_ERRLOG("create fc request pool failed\n"); + return -ENOMEM; + } + + for (i = 0; i < hwqp->rq_size; i++) { + fc_req = hwqp->fc_reqs_buf + i; + + nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_INIT); + TAILQ_INSERT_TAIL(&hwqp->free_reqs, fc_req, link); + } + + return 0; +} + +static inline struct spdk_nvmf_fc_request * +nvmf_fc_hwqp_alloc_fc_request(struct spdk_nvmf_fc_hwqp *hwqp) +{ + struct spdk_nvmf_fc_request *fc_req; + + if (TAILQ_EMPTY(&hwqp->free_reqs)) { + SPDK_ERRLOG("Alloc request buffer failed\n"); + return NULL; + } + + fc_req = TAILQ_FIRST(&hwqp->free_reqs); + TAILQ_REMOVE(&hwqp->free_reqs, fc_req, link); + + memset(fc_req, 0, sizeof(struct spdk_nvmf_fc_request)); + TAILQ_INSERT_TAIL(&hwqp->in_use_reqs, fc_req, link); + TAILQ_INIT(&fc_req->abort_cbs); + return fc_req; +} + +static inline void +nvmf_fc_hwqp_free_fc_request(struct spdk_nvmf_fc_hwqp *hwqp, struct spdk_nvmf_fc_request *fc_req) +{ + if (fc_req->state != SPDK_NVMF_FC_REQ_SUCCESS) { + /* Log an error for debug purpose. */ + nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_FAILED); + } + + /* set the magic to mark req as no longer valid. */ + fc_req->magic = 0xDEADBEEF; + + TAILQ_REMOVE(&hwqp->in_use_reqs, fc_req, link); + TAILQ_INSERT_HEAD(&hwqp->free_reqs, fc_req, link); +} + +static inline bool +nvmf_fc_req_in_get_buff(struct spdk_nvmf_fc_request *fc_req) +{ + switch (fc_req->state) { + case SPDK_NVMF_FC_REQ_WRITE_BUFFS: + return true; + default: + return false; + } +} + +void +nvmf_fc_init_poller_queues(struct spdk_nvmf_fc_hwqp *hwqp) +{ + nvmf_fc_init_rqpair_buffers(hwqp); +} + +struct spdk_nvmf_fc_conn * +nvmf_fc_hwqp_find_fc_conn(struct spdk_nvmf_fc_hwqp *hwqp, uint64_t conn_id) +{ + struct spdk_nvmf_fc_conn *fc_conn; + + TAILQ_FOREACH(fc_conn, &hwqp->connection_list, link) { + if (fc_conn->conn_id == conn_id) { + return fc_conn; + } + } + + return NULL; +} + +void +nvmf_fc_hwqp_reinit_poller_queues(struct spdk_nvmf_fc_hwqp *hwqp, void *queues_curr) +{ + struct spdk_nvmf_fc_abts_ctx *ctx; + struct spdk_nvmf_fc_poller_api_queue_sync_args *args = NULL, *tmp = NULL; + + /* Clean up any pending sync callbacks */ + TAILQ_FOREACH_SAFE(args, &hwqp->sync_cbs, link, tmp) { + TAILQ_REMOVE(&hwqp->sync_cbs, args, link); + ctx = args->cb_info.cb_data; + if (ctx) { + if (++ctx->hwqps_responded == ctx->num_hwqps) { + free(ctx->sync_poller_args); + free(ctx->abts_poller_args); + free(ctx); + } + } + } + + nvmf_fc_reinit_q(hwqp->queues, queues_curr); +} + +void +nvmf_fc_init_hwqp(struct spdk_nvmf_fc_port *fc_port, struct spdk_nvmf_fc_hwqp *hwqp) +{ + hwqp->fc_port = fc_port; + + /* clear counters */ + memset(&hwqp->counters, 0, sizeof(struct spdk_nvmf_fc_errors)); + + nvmf_fc_init_poller_queues(hwqp); + if (&fc_port->ls_queue != hwqp) { + nvmf_fc_create_req_mempool(hwqp); + } + + nvmf_fc_init_q(hwqp); + TAILQ_INIT(&hwqp->connection_list); + TAILQ_INIT(&hwqp->sync_cbs); + TAILQ_INIT(&hwqp->ls_pending_queue); +} + +static struct spdk_nvmf_fc_poll_group * +nvmf_fc_get_idlest_poll_group(void) +{ + uint32_t max_count = UINT32_MAX; + struct spdk_nvmf_fc_poll_group *fgroup; + struct spdk_nvmf_fc_poll_group *ret_fgroup = NULL; + + /* find poll group with least number of hwqp's assigned to it */ + TAILQ_FOREACH(fgroup, &g_nvmf_fgroups, link) { + if (fgroup->hwqp_count < max_count) { + ret_fgroup = fgroup; + max_count = fgroup->hwqp_count; + } + } + + return ret_fgroup; +} + +void +nvmf_fc_poll_group_add_hwqp(struct spdk_nvmf_fc_hwqp *hwqp) +{ + struct spdk_nvmf_fc_poll_group *fgroup = NULL; + + assert(hwqp); + if (hwqp == NULL) { + SPDK_ERRLOG("Error: hwqp is NULL\n"); + return; + } + + assert(g_nvmf_fgroup_count); + + fgroup = nvmf_fc_get_idlest_poll_group(); + if (!fgroup) { + SPDK_ERRLOG("Could not assign poll group for hwqp (%d)\n", hwqp->hwqp_id); + return; + } + + hwqp->thread = fgroup->group.group->thread; + hwqp->fgroup = fgroup; + fgroup->hwqp_count++; + nvmf_fc_poller_api_func(hwqp, SPDK_NVMF_FC_POLLER_API_ADD_HWQP, NULL); +} + +void +nvmf_fc_poll_group_remove_hwqp(struct spdk_nvmf_fc_hwqp *hwqp) +{ + assert(hwqp); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, + "Remove hwqp from poller: for port: %d, hwqp: %d\n", + hwqp->fc_port->port_hdl, hwqp->hwqp_id); + + if (!hwqp->fgroup) { + SPDK_ERRLOG("HWQP (%d) not assigned to poll group\n", hwqp->hwqp_id); + } else { + hwqp->fgroup->hwqp_count--; + nvmf_fc_poller_api_func(hwqp, SPDK_NVMF_FC_POLLER_API_REMOVE_HWQP, NULL); + } +} + +/* + * Note: This needs to be used only on master poller. + */ +static uint64_t +nvmf_fc_get_abts_unique_id(void) +{ + static uint32_t u_id = 0; + + return (uint64_t)(++u_id); +} + +static void +nvmf_fc_queue_synced_cb(void *cb_data, enum spdk_nvmf_fc_poller_api_ret ret) +{ + struct spdk_nvmf_fc_abts_ctx *ctx = cb_data; + struct spdk_nvmf_fc_poller_api_abts_recvd_args *args, *poller_arg; + + ctx->hwqps_responded++; + + if (ctx->hwqps_responded < ctx->num_hwqps) { + /* Wait for all pollers to complete. */ + return; + } + + /* Free the queue sync poller args. */ + free(ctx->sync_poller_args); + + /* Mark as queue synced */ + ctx->queue_synced = true; + + /* Reset the ctx values */ + ctx->hwqps_responded = 0; + ctx->handled = false; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, + "QueueSync(0x%lx) completed for nport: %d, rpi: 0x%x, oxid: 0x%x, rxid: 0x%x\n", + ctx->u_id, ctx->nport->nport_hdl, ctx->rpi, ctx->oxid, ctx->rxid); + + /* Resend ABTS to pollers */ + args = ctx->abts_poller_args; + for (int i = 0; i < ctx->num_hwqps; i++) { + poller_arg = args + i; + nvmf_fc_poller_api_func(poller_arg->hwqp, + SPDK_NVMF_FC_POLLER_API_ABTS_RECEIVED, + poller_arg); + } +} + +static int +nvmf_fc_handle_abts_notfound(struct spdk_nvmf_fc_abts_ctx *ctx) +{ + struct spdk_nvmf_fc_poller_api_queue_sync_args *args, *poller_arg; + struct spdk_nvmf_fc_poller_api_abts_recvd_args *abts_args, *abts_poller_arg; + + /* check if FC driver supports queue sync */ + if (!nvmf_fc_q_sync_available()) { + return -EPERM; + } + + assert(ctx); + if (!ctx) { + SPDK_ERRLOG("NULL ctx pointer"); + return -EINVAL; + } + + /* Reset the ctx values */ + ctx->hwqps_responded = 0; + + args = calloc(ctx->num_hwqps, + sizeof(struct spdk_nvmf_fc_poller_api_queue_sync_args)); + if (!args) { + SPDK_ERRLOG("QueueSync(0x%lx) failed for nport: %d, rpi: 0x%x, oxid: 0x%x, rxid: 0x%x\n", + ctx->u_id, ctx->nport->nport_hdl, ctx->rpi, ctx->oxid, ctx->rxid); + return -ENOMEM; + } + ctx->sync_poller_args = args; + + abts_args = ctx->abts_poller_args; + for (int i = 0; i < ctx->num_hwqps; i++) { + abts_poller_arg = abts_args + i; + poller_arg = args + i; + poller_arg->u_id = ctx->u_id; + poller_arg->hwqp = abts_poller_arg->hwqp; + poller_arg->cb_info.cb_func = nvmf_fc_queue_synced_cb; + poller_arg->cb_info.cb_data = ctx; + poller_arg->cb_info.cb_thread = spdk_get_thread(); + + /* Send a Queue sync message to interested pollers */ + nvmf_fc_poller_api_func(poller_arg->hwqp, + SPDK_NVMF_FC_POLLER_API_QUEUE_SYNC, + poller_arg); + } + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, + "QueueSync(0x%lx) Sent for nport: %d, rpi: 0x%x, oxid: 0x%x, rxid: 0x%x\n", + ctx->u_id, ctx->nport->nport_hdl, ctx->rpi, ctx->oxid, ctx->rxid); + + /* Post Marker to queue to track aborted request */ + nvmf_fc_issue_q_sync(ctx->ls_hwqp, ctx->u_id, ctx->fcp_rq_id); + + return 0; +} + +static void +nvmf_fc_abts_handled_cb(void *cb_data, enum spdk_nvmf_fc_poller_api_ret ret) +{ + struct spdk_nvmf_fc_abts_ctx *ctx = cb_data; + struct spdk_nvmf_fc_nport *nport = NULL; + + if (ret != SPDK_NVMF_FC_POLLER_API_OXID_NOT_FOUND) { + ctx->handled = true; + } + + ctx->hwqps_responded++; + + if (ctx->hwqps_responded < ctx->num_hwqps) { + /* Wait for all pollers to complete. */ + return; + } + + nport = nvmf_fc_nport_find(ctx->port_hdl, ctx->nport_hdl); + + if (ctx->nport != nport) { + /* Nport can be deleted while this abort is being + * processed by the pollers. + */ + SPDK_NOTICELOG("nport_%d deleted while processing ABTS frame, rpi: 0x%x, oxid: 0x%x, rxid: 0x%x\n", + ctx->nport_hdl, ctx->rpi, ctx->oxid, ctx->rxid); + } else { + if (!ctx->handled) { + /* Try syncing the queues and try one more time */ + if (!ctx->queue_synced && (nvmf_fc_handle_abts_notfound(ctx) == 0)) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, + "QueueSync(0x%lx) for nport: %d, rpi: 0x%x, oxid: 0x%x, rxid: 0x%x\n", + ctx->u_id, ctx->nport->nport_hdl, ctx->rpi, ctx->oxid, ctx->rxid); + return; + } else { + /* Send Reject */ + nvmf_fc_xmt_bls_rsp(&ctx->nport->fc_port->ls_queue, + ctx->oxid, ctx->rxid, ctx->rpi, true, + FCNVME_BLS_REJECT_EXP_INVALID_OXID, NULL, NULL); + } + } else { + /* Send Accept */ + nvmf_fc_xmt_bls_rsp(&ctx->nport->fc_port->ls_queue, + ctx->oxid, ctx->rxid, ctx->rpi, false, + 0, NULL, NULL); + } + } + SPDK_NOTICELOG("BLS_%s sent for ABTS frame nport: %d, rpi: 0x%x, oxid: 0x%x, rxid: 0x%x\n", + (ctx->handled) ? "ACC" : "REJ", ctx->nport->nport_hdl, ctx->rpi, ctx->oxid, ctx->rxid); + + free(ctx->abts_poller_args); + free(ctx); +} + +void +nvmf_fc_handle_abts_frame(struct spdk_nvmf_fc_nport *nport, uint16_t rpi, + uint16_t oxid, uint16_t rxid) +{ + struct spdk_nvmf_fc_abts_ctx *ctx = NULL; + struct spdk_nvmf_fc_poller_api_abts_recvd_args *args = NULL, *poller_arg; + struct spdk_nvmf_fc_association *assoc = NULL; + struct spdk_nvmf_fc_conn *conn = NULL; + uint32_t hwqp_cnt = 0; + bool skip_hwqp_cnt; + struct spdk_nvmf_fc_hwqp **hwqps = NULL; + uint32_t i; + + SPDK_NOTICELOG("Handle ABTS frame for nport: %d, rpi: 0x%x, oxid: 0x%x, rxid: 0x%x\n", + nport->nport_hdl, rpi, oxid, rxid); + + /* Allocate memory to track hwqp's with at least 1 active connection. */ + hwqps = calloc(nport->fc_port->num_io_queues, sizeof(struct spdk_nvmf_fc_hwqp *)); + if (hwqps == NULL) { + SPDK_ERRLOG("Unable to allocate temp. hwqp array for abts processing!\n"); + goto bls_rej; + } + + TAILQ_FOREACH(assoc, &nport->fc_associations, link) { + TAILQ_FOREACH(conn, &assoc->fc_conns, assoc_link) { + if (conn->rpi != rpi) { + continue; + } + + skip_hwqp_cnt = false; + for (i = 0; i < hwqp_cnt; i++) { + if (hwqps[i] == conn->hwqp) { + /* Skip. This is already present */ + skip_hwqp_cnt = true; + break; + } + } + if (!skip_hwqp_cnt) { + assert(hwqp_cnt < nport->fc_port->num_io_queues); + hwqps[hwqp_cnt] = conn->hwqp; + hwqp_cnt++; + } + } + } + + if (!hwqp_cnt) { + goto bls_rej; + } + + args = calloc(hwqp_cnt, + sizeof(struct spdk_nvmf_fc_poller_api_abts_recvd_args)); + if (!args) { + goto bls_rej; + } + + ctx = calloc(1, sizeof(struct spdk_nvmf_fc_abts_ctx)); + if (!ctx) { + goto bls_rej; + } + ctx->rpi = rpi; + ctx->oxid = oxid; + ctx->rxid = rxid; + ctx->nport = nport; + ctx->nport_hdl = nport->nport_hdl; + ctx->port_hdl = nport->fc_port->port_hdl; + ctx->num_hwqps = hwqp_cnt; + ctx->ls_hwqp = &nport->fc_port->ls_queue; + ctx->fcp_rq_id = nport->fc_port->fcp_rq_id; + ctx->abts_poller_args = args; + + /* Get a unique context for this ABTS */ + ctx->u_id = nvmf_fc_get_abts_unique_id(); + + for (i = 0; i < hwqp_cnt; i++) { + poller_arg = args + i; + poller_arg->hwqp = hwqps[i]; + poller_arg->cb_info.cb_func = nvmf_fc_abts_handled_cb; + poller_arg->cb_info.cb_data = ctx; + poller_arg->cb_info.cb_thread = spdk_get_thread(); + poller_arg->ctx = ctx; + + nvmf_fc_poller_api_func(poller_arg->hwqp, + SPDK_NVMF_FC_POLLER_API_ABTS_RECEIVED, + poller_arg); + } + + free(hwqps); + + return; +bls_rej: + free(args); + free(hwqps); + + /* Send Reject */ + nvmf_fc_xmt_bls_rsp(&nport->fc_port->ls_queue, oxid, rxid, rpi, + true, FCNVME_BLS_REJECT_EXP_NOINFO, NULL, NULL); + SPDK_NOTICELOG("BLS_RJT for ABTS frame for nport: %d, rpi: 0x%x, oxid: 0x%x, rxid: 0x%x\n", + nport->nport_hdl, rpi, oxid, rxid); + return; +} + +/*** Accessor functions for the FC structures - BEGIN */ +/* + * Returns true if the port is in offline state. + */ +bool +nvmf_fc_port_is_offline(struct spdk_nvmf_fc_port *fc_port) +{ + if (fc_port && (fc_port->hw_port_status == SPDK_FC_PORT_OFFLINE)) { + return true; + } + + return false; +} + +/* + * Returns true if the port is in online state. + */ +bool +nvmf_fc_port_is_online(struct spdk_nvmf_fc_port *fc_port) +{ + if (fc_port && (fc_port->hw_port_status == SPDK_FC_PORT_ONLINE)) { + return true; + } + + return false; +} + +int +nvmf_fc_port_set_online(struct spdk_nvmf_fc_port *fc_port) +{ + if (fc_port && (fc_port->hw_port_status != SPDK_FC_PORT_ONLINE)) { + fc_port->hw_port_status = SPDK_FC_PORT_ONLINE; + return 0; + } + + return -EPERM; +} + +int +nvmf_fc_port_set_offline(struct spdk_nvmf_fc_port *fc_port) +{ + if (fc_port && (fc_port->hw_port_status != SPDK_FC_PORT_OFFLINE)) { + fc_port->hw_port_status = SPDK_FC_PORT_OFFLINE; + return 0; + } + + return -EPERM; +} + +int +nvmf_fc_hwqp_set_online(struct spdk_nvmf_fc_hwqp *hwqp) +{ + if (hwqp && (hwqp->state != SPDK_FC_HWQP_ONLINE)) { + hwqp->state = SPDK_FC_HWQP_ONLINE; + /* reset some queue counters */ + hwqp->num_conns = 0; + return nvmf_fc_set_q_online_state(hwqp, true); + } + + return -EPERM; +} + +int +nvmf_fc_hwqp_set_offline(struct spdk_nvmf_fc_hwqp *hwqp) +{ + if (hwqp && (hwqp->state != SPDK_FC_HWQP_OFFLINE)) { + hwqp->state = SPDK_FC_HWQP_OFFLINE; + return nvmf_fc_set_q_online_state(hwqp, false); + } + + return -EPERM; +} + +void +nvmf_fc_port_add(struct spdk_nvmf_fc_port *fc_port) +{ + TAILQ_INSERT_TAIL(&g_spdk_nvmf_fc_port_list, fc_port, link); +} + +struct spdk_nvmf_fc_port * +nvmf_fc_port_lookup(uint8_t port_hdl) +{ + struct spdk_nvmf_fc_port *fc_port = NULL; + + TAILQ_FOREACH(fc_port, &g_spdk_nvmf_fc_port_list, link) { + if (fc_port->port_hdl == port_hdl) { + return fc_port; + } + } + return NULL; +} + +static void +nvmf_fc_port_cleanup(void) +{ + struct spdk_nvmf_fc_port *fc_port, *tmp; + struct spdk_nvmf_fc_hwqp *hwqp; + uint32_t i; + + TAILQ_FOREACH_SAFE(fc_port, &g_spdk_nvmf_fc_port_list, link, tmp) { + TAILQ_REMOVE(&g_spdk_nvmf_fc_port_list, fc_port, link); + for (i = 0; i < fc_port->num_io_queues; i++) { + hwqp = &fc_port->io_queues[i]; + if (hwqp->fc_reqs_buf) { + free(hwqp->fc_reqs_buf); + } + } + free(fc_port); + } +} + +uint32_t +nvmf_fc_get_prli_service_params(void) +{ + return (SPDK_NVMF_FC_DISCOVERY_SERVICE | SPDK_NVMF_FC_TARGET_FUNCTION); +} + +int +nvmf_fc_port_add_nport(struct spdk_nvmf_fc_port *fc_port, + struct spdk_nvmf_fc_nport *nport) +{ + if (fc_port) { + TAILQ_INSERT_TAIL(&fc_port->nport_list, nport, link); + fc_port->num_nports++; + return 0; + } + + return -EINVAL; +} + +int +nvmf_fc_port_remove_nport(struct spdk_nvmf_fc_port *fc_port, + struct spdk_nvmf_fc_nport *nport) +{ + if (fc_port && nport) { + TAILQ_REMOVE(&fc_port->nport_list, nport, link); + fc_port->num_nports--; + return 0; + } + + return -EINVAL; +} + +static struct spdk_nvmf_fc_nport * +nvmf_fc_nport_hdl_lookup(struct spdk_nvmf_fc_port *fc_port, uint16_t nport_hdl) +{ + struct spdk_nvmf_fc_nport *fc_nport = NULL; + + TAILQ_FOREACH(fc_nport, &fc_port->nport_list, link) { + if (fc_nport->nport_hdl == nport_hdl) { + return fc_nport; + } + } + + return NULL; +} + +struct spdk_nvmf_fc_nport * +nvmf_fc_nport_find(uint8_t port_hdl, uint16_t nport_hdl) +{ + struct spdk_nvmf_fc_port *fc_port = NULL; + + fc_port = nvmf_fc_port_lookup(port_hdl); + if (fc_port) { + return nvmf_fc_nport_hdl_lookup(fc_port, nport_hdl); + } + + return NULL; +} + +static inline int +nvmf_fc_hwqp_find_nport_and_rport(struct spdk_nvmf_fc_hwqp *hwqp, + uint32_t d_id, struct spdk_nvmf_fc_nport **nport, + uint32_t s_id, struct spdk_nvmf_fc_remote_port_info **rport) +{ + struct spdk_nvmf_fc_nport *n_port; + struct spdk_nvmf_fc_remote_port_info *r_port; + + assert(hwqp); + if (hwqp == NULL) { + SPDK_ERRLOG("Error: hwqp is NULL\n"); + return -EINVAL; + } + assert(nport); + if (nport == NULL) { + SPDK_ERRLOG("Error: nport is NULL\n"); + return -EINVAL; + } + assert(rport); + if (rport == NULL) { + SPDK_ERRLOG("Error: rport is NULL\n"); + return -EINVAL; + } + + TAILQ_FOREACH(n_port, &hwqp->fc_port->nport_list, link) { + if (n_port->d_id == d_id) { + TAILQ_FOREACH(r_port, &n_port->rem_port_list, link) { + if (r_port->s_id == s_id) { + *nport = n_port; + *rport = r_port; + return 0; + } + } + break; + } + } + + return -ENOENT; +} + +/* Returns true if the Nport is empty of all rem_ports */ +bool +nvmf_fc_nport_has_no_rport(struct spdk_nvmf_fc_nport *nport) +{ + if (nport && TAILQ_EMPTY(&nport->rem_port_list)) { + assert(nport->rport_count == 0); + return true; + } else { + return false; + } +} + +int +nvmf_fc_nport_set_state(struct spdk_nvmf_fc_nport *nport, + enum spdk_nvmf_fc_object_state state) +{ + if (nport) { + nport->nport_state = state; + return 0; + } else { + return -EINVAL; + } +} + +bool +nvmf_fc_nport_add_rem_port(struct spdk_nvmf_fc_nport *nport, + struct spdk_nvmf_fc_remote_port_info *rem_port) +{ + if (nport && rem_port) { + TAILQ_INSERT_TAIL(&nport->rem_port_list, rem_port, link); + nport->rport_count++; + return 0; + } else { + return -EINVAL; + } +} + +bool +nvmf_fc_nport_remove_rem_port(struct spdk_nvmf_fc_nport *nport, + struct spdk_nvmf_fc_remote_port_info *rem_port) +{ + if (nport && rem_port) { + TAILQ_REMOVE(&nport->rem_port_list, rem_port, link); + nport->rport_count--; + return 0; + } else { + return -EINVAL; + } +} + +int +nvmf_fc_rport_set_state(struct spdk_nvmf_fc_remote_port_info *rport, + enum spdk_nvmf_fc_object_state state) +{ + if (rport) { + rport->rport_state = state; + return 0; + } else { + return -EINVAL; + } +} +int +nvmf_fc_assoc_set_state(struct spdk_nvmf_fc_association *assoc, + enum spdk_nvmf_fc_object_state state) +{ + if (assoc) { + assoc->assoc_state = state; + return 0; + } else { + return -EINVAL; + } +} + +static struct spdk_nvmf_fc_association * +nvmf_ctrlr_get_fc_assoc(struct spdk_nvmf_ctrlr *ctrlr) +{ + struct spdk_nvmf_qpair *qpair = ctrlr->admin_qpair; + struct spdk_nvmf_fc_conn *fc_conn; + + if (!qpair) { + SPDK_ERRLOG("Controller %d has no associations\n", ctrlr->cntlid); + return NULL; + } + + fc_conn = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_fc_conn, qpair); + + return fc_conn->fc_assoc; +} + +bool +nvmf_ctrlr_is_on_nport(uint8_t port_hdl, uint16_t nport_hdl, + struct spdk_nvmf_ctrlr *ctrlr) +{ + struct spdk_nvmf_fc_nport *fc_nport = NULL; + struct spdk_nvmf_fc_association *assoc = NULL; + + if (!ctrlr) { + return false; + } + + fc_nport = nvmf_fc_nport_find(port_hdl, nport_hdl); + if (!fc_nport) { + return false; + } + + assoc = nvmf_ctrlr_get_fc_assoc(ctrlr); + if (assoc && assoc->tgtport == fc_nport) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, + "Controller: %d corresponding to association: %p(%lu:%d) is on port: %d nport: %d\n", + ctrlr->cntlid, assoc, assoc->assoc_id, assoc->assoc_state, port_hdl, + nport_hdl); + return true; + } + return false; +} + +static inline bool +nvmf_fc_req_in_bdev(struct spdk_nvmf_fc_request *fc_req) +{ + switch (fc_req->state) { + case SPDK_NVMF_FC_REQ_READ_BDEV: + case SPDK_NVMF_FC_REQ_WRITE_BDEV: + case SPDK_NVMF_FC_REQ_NONE_BDEV: + return true; + default: + return false; + } +} + +static inline bool +nvmf_fc_req_in_pending(struct spdk_nvmf_fc_request *fc_req) +{ + struct spdk_nvmf_request *tmp = NULL; + + STAILQ_FOREACH(tmp, &fc_req->hwqp->fgroup->group.pending_buf_queue, buf_link) { + if (tmp == &fc_req->req) { + return true; + } + } + return false; +} + +static void +nvmf_fc_req_bdev_abort(void *arg1) +{ + struct spdk_nvmf_fc_request *fc_req = arg1; + struct spdk_nvmf_ctrlr *ctrlr = fc_req->req.qpair->ctrlr; + int i; + + /* Initial release - we don't have to abort Admin Queue or + * Fabric commands. The AQ commands supported at this time are + * Get-Log-Page, + * Identify + * Set Features + * Get Features + * AER -> Special case and handled differently. + * Every one of the above Admin commands (except AER) run + * to completion and so an Abort of such commands doesn't + * make sense. + */ + /* The Fabric commands supported are + * Property Set + * Property Get + * Connect -> Special case (async. handling). Not sure how to + * handle at this point. Let it run to completion. + */ + for (i = 0; i < NVMF_MAX_ASYNC_EVENTS; i++) { + if (ctrlr->aer_req[i] == &fc_req->req) { + SPDK_NOTICELOG("Abort AER request\n"); + nvmf_qpair_free_aer(fc_req->req.qpair); + } + } +} + +void +nvmf_fc_request_abort_complete(void *arg1) +{ + struct spdk_nvmf_fc_request *fc_req = + (struct spdk_nvmf_fc_request *)arg1; + struct spdk_nvmf_fc_caller_ctx *ctx = NULL, *tmp = NULL; + + /* Request abort completed. Notify all the callbacks */ + TAILQ_FOREACH_SAFE(ctx, &fc_req->abort_cbs, link, tmp) { + /* Notify */ + ctx->cb(fc_req->hwqp, 0, ctx->cb_args); + /* Remove */ + TAILQ_REMOVE(&fc_req->abort_cbs, ctx, link); + /* free */ + free(ctx); + } + + SPDK_NOTICELOG("FC Request(%p) in state :%s aborted\n", fc_req, + fc_req_state_strs[fc_req->state]); + + _nvmf_fc_request_free(fc_req); +} + +void +nvmf_fc_request_abort(struct spdk_nvmf_fc_request *fc_req, bool send_abts, + spdk_nvmf_fc_caller_cb cb, void *cb_args) +{ + struct spdk_nvmf_fc_caller_ctx *ctx = NULL; + bool kill_req = false; + + /* Add the cb to list */ + if (cb) { + ctx = calloc(1, sizeof(struct spdk_nvmf_fc_caller_ctx)); + if (!ctx) { + SPDK_ERRLOG("ctx alloc failed.\n"); + return; + } + ctx->cb = cb; + ctx->cb_args = cb_args; + + TAILQ_INSERT_TAIL(&fc_req->abort_cbs, ctx, link); + } + + if (!fc_req->is_aborted) { + /* Increment aborted command counter */ + fc_req->hwqp->counters.num_aborted++; + } + + /* If port is dead, skip abort wqe */ + kill_req = nvmf_fc_is_port_dead(fc_req->hwqp); + if (kill_req && nvmf_fc_req_in_xfer(fc_req)) { + fc_req->is_aborted = true; + goto complete; + } + + /* Check if the request is already marked for deletion */ + if (fc_req->is_aborted) { + return; + } + + /* Mark request as aborted */ + fc_req->is_aborted = true; + + /* If xchg is allocated, then save if we need to send abts or not. */ + if (fc_req->xchg) { + fc_req->xchg->send_abts = send_abts; + fc_req->xchg->aborted = true; + } + + if (fc_req->state == SPDK_NVMF_FC_REQ_BDEV_ABORTED) { + /* Aborted by backend */ + goto complete; + } else if (nvmf_fc_req_in_bdev(fc_req)) { + /* Notify bdev */ + spdk_thread_send_msg(fc_req->hwqp->thread, + nvmf_fc_req_bdev_abort, (void *)fc_req); + } else if (nvmf_fc_req_in_xfer(fc_req)) { + /* Notify HBA to abort this exchange */ + nvmf_fc_issue_abort(fc_req->hwqp, fc_req->xchg, NULL, NULL); + } else if (nvmf_fc_req_in_get_buff(fc_req)) { + /* Will be completed by request_complete callback. */ + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, "Abort req when getting buffers.\n"); + } else if (nvmf_fc_req_in_pending(fc_req)) { + /* Remove from pending */ + STAILQ_REMOVE(&fc_req->hwqp->fgroup->group.pending_buf_queue, &fc_req->req, + spdk_nvmf_request, buf_link); + goto complete; + } else { + /* Should never happen */ + SPDK_ERRLOG("Request in invalid state\n"); + goto complete; + } + + return; +complete: + nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_ABORTED); + nvmf_fc_poller_api_func(fc_req->hwqp, SPDK_NVMF_FC_POLLER_API_REQ_ABORT_COMPLETE, + (void *)fc_req); +} + +static int +nvmf_fc_request_alloc_buffers(struct spdk_nvmf_fc_request *fc_req) +{ + uint32_t length = fc_req->req.length; + struct spdk_nvmf_fc_poll_group *fgroup = fc_req->hwqp->fgroup; + struct spdk_nvmf_transport_poll_group *group = &fgroup->group; + struct spdk_nvmf_transport *transport = group->transport; + + if (spdk_nvmf_request_get_buffers(&fc_req->req, group, transport, length)) { + return -ENOMEM; + } + + return 0; +} + +static int +nvmf_fc_request_execute(struct spdk_nvmf_fc_request *fc_req) +{ + /* Allocate an XCHG if we dont use send frame for this command. */ + if (!nvmf_fc_use_send_frame(&fc_req->req)) { + fc_req->xchg = nvmf_fc_get_xri(fc_req->hwqp); + if (!fc_req->xchg) { + fc_req->hwqp->counters.no_xchg++; + printf("NO XCHGs!\n"); + goto pending; + } + } + + if (fc_req->req.length) { + if (nvmf_fc_request_alloc_buffers(fc_req) < 0) { + fc_req->hwqp->counters.buf_alloc_err++; + goto pending; + } + fc_req->req.data = fc_req->req.iov[0].iov_base; + } + + if (fc_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, "WRITE CMD.\n"); + + nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_WRITE_XFER); + + if (nvmf_fc_recv_data(fc_req)) { + /* Dropped return success to caller */ + fc_req->hwqp->counters.unexpected_err++; + _nvmf_fc_request_free(fc_req); + } + } else { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, "READ/NONE CMD\n"); + + if (fc_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { + nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_READ_BDEV); + } else { + nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_NONE_BDEV); + } + spdk_nvmf_request_exec(&fc_req->req); + } + + return 0; + +pending: + if (fc_req->xchg) { + nvmf_fc_put_xchg(fc_req->hwqp, fc_req->xchg); + fc_req->xchg = NULL; + } + + nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_PENDING); + + return -EAGAIN; +} + +static int +nvmf_fc_hwqp_handle_request(struct spdk_nvmf_fc_hwqp *hwqp, struct spdk_nvmf_fc_frame_hdr *frame, + uint32_t buf_idx, struct spdk_nvmf_fc_buffer_desc *buffer, uint32_t plen) +{ + uint16_t cmnd_len; + uint64_t rqst_conn_id; + struct spdk_nvmf_fc_request *fc_req = NULL; + struct spdk_nvmf_fc_cmnd_iu *cmd_iu = NULL; + struct spdk_nvmf_fc_conn *fc_conn = NULL; + enum spdk_nvme_data_transfer xfer; + + cmd_iu = buffer->virt; + cmnd_len = cmd_iu->cmnd_iu_len; + cmnd_len = from_be16(&cmnd_len); + + /* check for a valid cmnd_iu format */ + if ((cmd_iu->fc_id != FCNVME_CMND_IU_FC_ID) || + (cmd_iu->scsi_id != FCNVME_CMND_IU_SCSI_ID) || + (cmnd_len != sizeof(struct spdk_nvmf_fc_cmnd_iu) / 4)) { + SPDK_ERRLOG("IU CMD error\n"); + hwqp->counters.nvme_cmd_iu_err++; + return -ENXIO; + } + + xfer = spdk_nvme_opc_get_data_transfer(cmd_iu->flags); + if (xfer == SPDK_NVME_DATA_BIDIRECTIONAL) { + SPDK_ERRLOG("IU CMD xfer error\n"); + hwqp->counters.nvme_cmd_xfer_err++; + return -EPERM; + } + + rqst_conn_id = from_be64(&cmd_iu->conn_id); + + /* Check if conn id is valid */ + fc_conn = nvmf_fc_hwqp_find_fc_conn(hwqp, rqst_conn_id); + if (!fc_conn) { + SPDK_ERRLOG("IU CMD conn(%ld) invalid\n", rqst_conn_id); + hwqp->counters.invalid_conn_err++; + return -ENODEV; + } + + /* If association/connection is being deleted - return */ + if (fc_conn->fc_assoc->assoc_state != SPDK_NVMF_FC_OBJECT_CREATED) { + SPDK_ERRLOG("Association state not valid\n"); + return -EACCES; + } + + if (fc_conn->qpair.state == SPDK_NVMF_QPAIR_ERROR) { + return -EACCES; + } + + /* Make sure xfer len is according to mdts */ + if (from_be32(&cmd_iu->data_len) > + hwqp->fgroup->group.transport->opts.max_io_size) { + SPDK_ERRLOG("IO length requested is greater than MDTS\n"); + return -EINVAL; + } + + /* allocate a request buffer */ + fc_req = nvmf_fc_hwqp_alloc_fc_request(hwqp); + if (fc_req == NULL) { + /* Should not happen. Since fc_reqs == RQ buffers */ + return -ENOMEM; + } + + fc_req->req.length = from_be32(&cmd_iu->data_len); + fc_req->req.qpair = &fc_conn->qpair; + fc_req->req.cmd = (union nvmf_h2c_msg *)&cmd_iu->cmd; + fc_req->req.rsp = (union nvmf_c2h_msg *)&fc_req->ersp.rsp; + fc_req->oxid = frame->ox_id; + fc_req->oxid = from_be16(&fc_req->oxid); + fc_req->rpi = fc_conn->rpi; + fc_req->buf_index = buf_idx; + fc_req->poller_lcore = hwqp->lcore_id; + fc_req->poller_thread = hwqp->thread; + fc_req->hwqp = hwqp; + fc_req->fc_conn = fc_conn; + fc_req->req.xfer = xfer; + fc_req->s_id = (uint32_t)frame->s_id; + fc_req->d_id = (uint32_t)frame->d_id; + fc_req->s_id = from_be32(&fc_req->s_id) >> 8; + fc_req->d_id = from_be32(&fc_req->d_id) >> 8; + + nvmf_fc_record_req_trace_point(fc_req, SPDK_NVMF_FC_REQ_INIT); + if (nvmf_fc_request_execute(fc_req)) { + STAILQ_INSERT_TAIL(&hwqp->fgroup->group.pending_buf_queue, &fc_req->req, buf_link); + } + + return 0; +} + +/* + * These functions are called from the FC LLD + */ + +void +_nvmf_fc_request_free(struct spdk_nvmf_fc_request *fc_req) +{ + struct spdk_nvmf_fc_hwqp *hwqp = fc_req->hwqp; + struct spdk_nvmf_fc_poll_group *fgroup = hwqp->fgroup; + struct spdk_nvmf_transport_poll_group *group = &fgroup->group; + struct spdk_nvmf_transport *transport = group->transport; + + if (!fc_req) { + return; + } + + if (fc_req->xchg) { + nvmf_fc_put_xchg(hwqp, fc_req->xchg); + fc_req->xchg = NULL; + } + + /* Release IO buffers */ + if (fc_req->req.data_from_pool) { + spdk_nvmf_request_free_buffers(&fc_req->req, group, transport); + } + fc_req->req.data = NULL; + fc_req->req.iovcnt = 0; + + /* Release Q buffer */ + nvmf_fc_rqpair_buffer_release(hwqp, fc_req->buf_index); + + /* Free Fc request */ + nvmf_fc_hwqp_free_fc_request(hwqp, fc_req); +} + +void +nvmf_fc_request_set_state(struct spdk_nvmf_fc_request *fc_req, + enum spdk_nvmf_fc_request_state state) +{ + assert(fc_req->magic != 0xDEADBEEF); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, + "FC Request(%p):\n\tState Old:%s New:%s\n", fc_req, + nvmf_fc_request_get_state_str(fc_req->state), + nvmf_fc_request_get_state_str(state)); + nvmf_fc_record_req_trace_point(fc_req, state); + fc_req->state = state; +} + +char * +nvmf_fc_request_get_state_str(int state) +{ + static char *unk_str = "unknown"; + + return (state >= 0 && state < (int)(sizeof(fc_req_state_strs) / sizeof(char *)) ? + fc_req_state_strs[state] : unk_str); +} + +int +nvmf_fc_hwqp_process_frame(struct spdk_nvmf_fc_hwqp *hwqp, + uint32_t buff_idx, + struct spdk_nvmf_fc_frame_hdr *frame, + struct spdk_nvmf_fc_buffer_desc *buffer, + uint32_t plen) +{ + int rc = 0; + uint32_t s_id, d_id; + struct spdk_nvmf_fc_nport *nport = NULL; + struct spdk_nvmf_fc_remote_port_info *rport = NULL; + + s_id = (uint32_t)frame->s_id; + d_id = (uint32_t)frame->d_id; + s_id = from_be32(&s_id) >> 8; + d_id = from_be32(&d_id) >> 8; + + /* Note: In tracelog below, we directly do endian conversion on rx_id and. + * ox_id Since these are fields, we can't pass address to from_be16(). + * Since ox_id and rx_id are only needed for tracelog, assigning to local + * vars. and doing conversion is a waste of time in non-debug builds. */ + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, + "Process NVME frame s_id:0x%x d_id:0x%x oxid:0x%x rxid:0x%x.\n", + s_id, d_id, + ((frame->ox_id << 8) & 0xff00) | ((frame->ox_id >> 8) & 0xff), + ((frame->rx_id << 8) & 0xff00) | ((frame->rx_id >> 8) & 0xff)); + + rc = nvmf_fc_hwqp_find_nport_and_rport(hwqp, d_id, &nport, s_id, &rport); + if (rc) { + if (nport == NULL) { + SPDK_ERRLOG("Nport not found. Dropping\n"); + /* increment invalid nport counter */ + hwqp->counters.nport_invalid++; + } else if (rport == NULL) { + SPDK_ERRLOG("Rport not found. Dropping\n"); + /* increment invalid rport counter */ + hwqp->counters.rport_invalid++; + } + return rc; + } + + if (nport->nport_state != SPDK_NVMF_FC_OBJECT_CREATED || + rport->rport_state != SPDK_NVMF_FC_OBJECT_CREATED) { + SPDK_ERRLOG("%s state not created. Dropping\n", + nport->nport_state != SPDK_NVMF_FC_OBJECT_CREATED ? + "Nport" : "Rport"); + return -EACCES; + } + + if ((frame->r_ctl == FCNVME_R_CTL_LS_REQUEST) && + (frame->type == FCNVME_TYPE_NVMF_DATA)) { + struct spdk_nvmf_fc_rq_buf_ls_request *req_buf = buffer->virt; + struct spdk_nvmf_fc_ls_rqst *ls_rqst; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, "Process LS NVME frame\n"); + + /* Use the RQ buffer for holding LS request. */ + ls_rqst = (struct spdk_nvmf_fc_ls_rqst *)&req_buf->ls_rqst; + + /* Fill in the LS request structure */ + ls_rqst->rqstbuf.virt = (void *)&req_buf->rqst; + ls_rqst->rqstbuf.phys = buffer->phys + + offsetof(struct spdk_nvmf_fc_rq_buf_ls_request, rqst); + ls_rqst->rqstbuf.buf_index = buff_idx; + ls_rqst->rqst_len = plen; + + ls_rqst->rspbuf.virt = (void *)&req_buf->resp; + ls_rqst->rspbuf.phys = buffer->phys + + offsetof(struct spdk_nvmf_fc_rq_buf_ls_request, resp); + ls_rqst->rsp_len = FCNVME_MAX_LS_RSP_SIZE; + + ls_rqst->private_data = (void *)hwqp; + ls_rqst->rpi = rport->rpi; + ls_rqst->oxid = (uint16_t)frame->ox_id; + ls_rqst->oxid = from_be16(&ls_rqst->oxid); + ls_rqst->s_id = s_id; + ls_rqst->d_id = d_id; + ls_rqst->nport = nport; + ls_rqst->rport = rport; + ls_rqst->nvmf_tgt = g_nvmf_ftransport->transport.tgt; + + ls_rqst->xchg = nvmf_fc_get_xri(hwqp); + if (ls_rqst->xchg) { + /* Handover the request to LS module */ + nvmf_fc_handle_ls_rqst(ls_rqst); + } else { + /* No XCHG available. Add to pending list. */ + hwqp->counters.no_xchg++; + TAILQ_INSERT_TAIL(&hwqp->ls_pending_queue, ls_rqst, ls_pending_link); + } + } else if ((frame->r_ctl == FCNVME_R_CTL_CMD_REQ) && + (frame->type == FCNVME_TYPE_FC_EXCHANGE)) { + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, "Process IO NVME frame\n"); + rc = nvmf_fc_hwqp_handle_request(hwqp, frame, buff_idx, buffer, plen); + } else { + + SPDK_ERRLOG("Unknown frame received. Dropping\n"); + hwqp->counters.unknown_frame++; + rc = -EINVAL; + } + + return rc; +} + +void +nvmf_fc_hwqp_process_pending_reqs(struct spdk_nvmf_fc_hwqp *hwqp) +{ + struct spdk_nvmf_request *req = NULL, *tmp; + struct spdk_nvmf_fc_request *fc_req; + int budget = 64; + + if (!hwqp->fgroup) { + /* LS queue is tied to acceptor_poll group and LS pending requests + * are stagged and processed using hwqp->ls_pending_queue. + */ + return; + } + + STAILQ_FOREACH_SAFE(req, &hwqp->fgroup->group.pending_buf_queue, buf_link, tmp) { + fc_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_fc_request, req); + if (!nvmf_fc_request_execute(fc_req)) { + /* Succesfuly posted, Delete from pending. */ + STAILQ_REMOVE_HEAD(&hwqp->fgroup->group.pending_buf_queue, buf_link); + } + + if (budget) { + budget--; + } else { + return; + } + } +} + +void +nvmf_fc_hwqp_process_pending_ls_rqsts(struct spdk_nvmf_fc_hwqp *hwqp) +{ + struct spdk_nvmf_fc_ls_rqst *ls_rqst = NULL, *tmp; + struct spdk_nvmf_fc_nport *nport = NULL; + struct spdk_nvmf_fc_remote_port_info *rport = NULL; + + TAILQ_FOREACH_SAFE(ls_rqst, &hwqp->ls_pending_queue, ls_pending_link, tmp) { + /* lookup nport and rport again - make sure they are still valid */ + int rc = nvmf_fc_hwqp_find_nport_and_rport(hwqp, ls_rqst->d_id, &nport, ls_rqst->s_id, &rport); + if (rc) { + if (nport == NULL) { + SPDK_ERRLOG("Nport not found. Dropping\n"); + /* increment invalid nport counter */ + hwqp->counters.nport_invalid++; + } else if (rport == NULL) { + SPDK_ERRLOG("Rport not found. Dropping\n"); + /* increment invalid rport counter */ + hwqp->counters.rport_invalid++; + } + TAILQ_REMOVE(&hwqp->ls_pending_queue, ls_rqst, ls_pending_link); + /* Return buffer to chip */ + nvmf_fc_rqpair_buffer_release(hwqp, ls_rqst->rqstbuf.buf_index); + continue; + } + if (nport->nport_state != SPDK_NVMF_FC_OBJECT_CREATED || + rport->rport_state != SPDK_NVMF_FC_OBJECT_CREATED) { + SPDK_ERRLOG("%s state not created. Dropping\n", + nport->nport_state != SPDK_NVMF_FC_OBJECT_CREATED ? + "Nport" : "Rport"); + TAILQ_REMOVE(&hwqp->ls_pending_queue, ls_rqst, ls_pending_link); + /* Return buffer to chip */ + nvmf_fc_rqpair_buffer_release(hwqp, ls_rqst->rqstbuf.buf_index); + continue; + } + + ls_rqst->xchg = nvmf_fc_get_xri(hwqp); + if (ls_rqst->xchg) { + /* Got an XCHG */ + TAILQ_REMOVE(&hwqp->ls_pending_queue, ls_rqst, ls_pending_link); + /* Handover the request to LS module */ + nvmf_fc_handle_ls_rqst(ls_rqst); + } else { + /* No more XCHGs. Stop processing. */ + hwqp->counters.no_xchg++; + return; + } + } +} + +int +nvmf_fc_handle_rsp(struct spdk_nvmf_fc_request *fc_req) +{ + int rc = 0; + struct spdk_nvmf_request *req = &fc_req->req; + struct spdk_nvmf_qpair *qpair = req->qpair; + struct spdk_nvmf_fc_conn *fc_conn = nvmf_fc_get_conn(qpair); + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + uint16_t ersp_len = 0; + + /* set sq head value in resp */ + rsp->sqhd = nvmf_fc_advance_conn_sqhead(qpair); + + /* Increment connection responses */ + fc_conn->rsp_count++; + + if (nvmf_fc_send_ersp_required(fc_req, fc_conn->rsp_count, + fc_req->transfered_len)) { + /* Fill ERSP Len */ + to_be16(&ersp_len, (sizeof(struct spdk_nvmf_fc_ersp_iu) / + sizeof(uint32_t))); + fc_req->ersp.ersp_len = ersp_len; + + /* Fill RSN */ + to_be32(&fc_req->ersp.response_seq_no, fc_conn->rsn); + fc_conn->rsn++; + + /* Fill transfer length */ + to_be32(&fc_req->ersp.transferred_data_len, fc_req->transfered_len); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, "Posting ERSP.\n"); + rc = nvmf_fc_xmt_rsp(fc_req, (uint8_t *)&fc_req->ersp, + sizeof(struct spdk_nvmf_fc_ersp_iu)); + } else { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC, "Posting RSP.\n"); + rc = nvmf_fc_xmt_rsp(fc_req, NULL, 0); + } + + return rc; +} + +bool +nvmf_fc_send_ersp_required(struct spdk_nvmf_fc_request *fc_req, + uint32_t rsp_cnt, uint32_t xfer_len) +{ + struct spdk_nvmf_request *req = &fc_req->req; + struct spdk_nvmf_qpair *qpair = req->qpair; + struct spdk_nvmf_fc_conn *fc_conn = nvmf_fc_get_conn(qpair); + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + uint16_t status = *((uint16_t *)&rsp->status); + + /* + * Check if we need to send ERSP + * 1) For every N responses where N == ersp_ratio + * 2) Fabric commands. + * 3) Completion status failed or Completion dw0 or dw1 valid. + * 4) SQ == 90% full. + * 5) Transfer length not equal to CMD IU length + */ + + if (!(rsp_cnt % fc_conn->esrp_ratio) || + (cmd->opc == SPDK_NVME_OPC_FABRIC) || + (status & 0xFFFE) || rsp->cdw0 || rsp->rsvd1 || + (req->length != xfer_len)) { + return true; + } + return false; +} + +static int +nvmf_fc_request_complete(struct spdk_nvmf_request *req) +{ + int rc = 0; + struct spdk_nvmf_fc_request *fc_req = nvmf_fc_get_fc_req(req); + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + + if (fc_req->is_aborted) { + /* Defer this to make sure we dont call io cleanup in same context. */ + nvmf_fc_poller_api_func(fc_req->hwqp, SPDK_NVMF_FC_POLLER_API_REQ_ABORT_COMPLETE, + (void *)fc_req); + } else if (rsp->status.sc == SPDK_NVME_SC_SUCCESS && + req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { + + nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_READ_XFER); + + rc = nvmf_fc_send_data(fc_req); + } else { + if (req->xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { + nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_WRITE_RSP); + } else if (req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { + nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_READ_RSP); + } else { + nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_NONE_RSP); + } + + rc = nvmf_fc_handle_rsp(fc_req); + } + + if (rc) { + SPDK_ERRLOG("Error in request complete.\n"); + _nvmf_fc_request_free(fc_req); + } + return 0; +} + +struct spdk_nvmf_tgt * +nvmf_fc_get_tgt(void) +{ + if (g_nvmf_ftransport) { + return g_nvmf_ftransport->transport.tgt; + } + return NULL; +} + +/* + * FC Transport Public API begins here + */ + +#define SPDK_NVMF_FC_DEFAULT_MAX_QUEUE_DEPTH 128 +#define SPDK_NVMF_FC_DEFAULT_AQ_DEPTH 32 +#define SPDK_NVMF_FC_DEFAULT_MAX_QPAIRS_PER_CTRLR 5 +#define SPDK_NVMF_FC_DEFAULT_IN_CAPSULE_DATA_SIZE 0 +#define SPDK_NVMF_FC_DEFAULT_MAX_IO_SIZE 65536 +#define SPDK_NVMF_FC_DEFAULT_IO_UNIT_SIZE 4096 +#define SPDK_NVMF_FC_DEFAULT_NUM_SHARED_BUFFERS 8192 +#define SPDK_NVMF_FC_DEFAULT_MAX_SGE (SPDK_NVMF_FC_DEFAULT_MAX_IO_SIZE / \ + SPDK_NVMF_FC_DEFAULT_IO_UNIT_SIZE) + +static void +nvmf_fc_opts_init(struct spdk_nvmf_transport_opts *opts) +{ + opts->max_queue_depth = SPDK_NVMF_FC_DEFAULT_MAX_QUEUE_DEPTH; + opts->max_qpairs_per_ctrlr = SPDK_NVMF_FC_DEFAULT_MAX_QPAIRS_PER_CTRLR; + opts->in_capsule_data_size = SPDK_NVMF_FC_DEFAULT_IN_CAPSULE_DATA_SIZE; + opts->max_io_size = SPDK_NVMF_FC_DEFAULT_MAX_IO_SIZE; + opts->io_unit_size = SPDK_NVMF_FC_DEFAULT_IO_UNIT_SIZE; + opts->max_aq_depth = SPDK_NVMF_FC_DEFAULT_AQ_DEPTH; + opts->num_shared_buffers = SPDK_NVMF_FC_DEFAULT_NUM_SHARED_BUFFERS; +} + +static struct spdk_nvmf_transport * +nvmf_fc_create(struct spdk_nvmf_transport_opts *opts) +{ + uint32_t sge_count; + + SPDK_INFOLOG(SPDK_LOG_NVMF_FC, "*** FC Transport Init ***\n" + " Transport opts: max_ioq_depth=%d, max_io_size=%d,\n" + " max_io_qpairs_per_ctrlr=%d, io_unit_size=%d,\n" + " max_aq_depth=%d\n", + opts->max_queue_depth, + opts->max_io_size, + opts->max_qpairs_per_ctrlr - 1, + opts->io_unit_size, + opts->max_aq_depth); + + if (g_nvmf_ftransport) { + SPDK_ERRLOG("Duplicate NVMF-FC transport create request!\n"); + return NULL; + } + + if (spdk_env_get_last_core() < 1) { + SPDK_ERRLOG("Not enough cores/threads (%d) to run NVMF-FC transport!\n", + spdk_env_get_last_core() + 1); + return NULL; + } + + sge_count = opts->max_io_size / opts->io_unit_size; + if (sge_count > SPDK_NVMF_FC_DEFAULT_MAX_SGE) { + SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size); + return NULL; + } + + g_nvmf_fc_master_thread = spdk_get_thread(); + g_nvmf_fgroup_count = 0; + g_nvmf_ftransport = calloc(1, sizeof(*g_nvmf_ftransport)); + + if (!g_nvmf_ftransport) { + SPDK_ERRLOG("Failed to allocate NVMF-FC transport\n"); + return NULL; + } + + if (pthread_mutex_init(&g_nvmf_ftransport->lock, NULL)) { + SPDK_ERRLOG("pthread_mutex_init() failed\n"); + free(g_nvmf_ftransport); + g_nvmf_ftransport = NULL; + return NULL; + } + + /* initialize the low level FC driver */ + nvmf_fc_lld_init(); + + return &g_nvmf_ftransport->transport; +} + +static int +nvmf_fc_destroy(struct spdk_nvmf_transport *transport) +{ + if (transport) { + struct spdk_nvmf_fc_transport *ftransport; + struct spdk_nvmf_fc_poll_group *fgroup, *pg_tmp; + + ftransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_fc_transport, transport); + + free(ftransport); + + /* clean up any FC poll groups still around */ + TAILQ_FOREACH_SAFE(fgroup, &g_nvmf_fgroups, link, pg_tmp) { + TAILQ_REMOVE(&g_nvmf_fgroups, fgroup, link); + free(fgroup); + } + g_nvmf_fgroup_count = 0; + + /* low level FC driver clean up */ + nvmf_fc_lld_fini(); + + nvmf_fc_port_cleanup(); + } + + return 0; +} + +static int +nvmf_fc_listen(struct spdk_nvmf_transport *transport, + const struct spdk_nvme_transport_id *trid) +{ + return 0; +} + +static void +nvmf_fc_stop_listen(struct spdk_nvmf_transport *transport, + const struct spdk_nvme_transport_id *_trid) +{ +} + +static uint32_t +nvmf_fc_accept(struct spdk_nvmf_transport *transport) +{ + struct spdk_nvmf_fc_port *fc_port = NULL; + uint32_t count = 0; + static bool start_lld = false; + + if (spdk_unlikely(!start_lld)) { + start_lld = true; + nvmf_fc_lld_start(); + } + + /* poll the LS queue on each port */ + TAILQ_FOREACH(fc_port, &g_spdk_nvmf_fc_port_list, link) { + if (fc_port->hw_port_status == SPDK_FC_PORT_ONLINE) { + count += nvmf_fc_process_queue(&fc_port->ls_queue); + } + } + + return count; +} + +static void +nvmf_fc_discover(struct spdk_nvmf_transport *transport, + struct spdk_nvme_transport_id *trid, + struct spdk_nvmf_discovery_log_page_entry *entry) +{ + entry->trtype = (enum spdk_nvme_transport_type) SPDK_NVMF_TRTYPE_FC; + entry->adrfam = trid->adrfam; + entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_SPECIFIED; + + spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' '); + spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' '); +} + +static struct spdk_nvmf_transport_poll_group * +nvmf_fc_poll_group_create(struct spdk_nvmf_transport *transport) +{ + struct spdk_nvmf_fc_poll_group *fgroup; + struct spdk_nvmf_fc_transport *ftransport = + SPDK_CONTAINEROF(transport, struct spdk_nvmf_fc_transport, transport); + + fgroup = calloc(1, sizeof(struct spdk_nvmf_fc_poll_group)); + if (!fgroup) { + SPDK_ERRLOG("Unable to alloc FC poll group\n"); + return NULL; + } + + TAILQ_INIT(&fgroup->hwqp_list); + + pthread_mutex_lock(&ftransport->lock); + TAILQ_INSERT_TAIL(&g_nvmf_fgroups, fgroup, link); + g_nvmf_fgroup_count++; + pthread_mutex_unlock(&ftransport->lock); + + return &fgroup->group; +} + +static void +nvmf_fc_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) +{ + struct spdk_nvmf_fc_poll_group *fgroup; + struct spdk_nvmf_fc_transport *ftransport = + SPDK_CONTAINEROF(group->transport, struct spdk_nvmf_fc_transport, transport); + + fgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_fc_poll_group, group); + pthread_mutex_lock(&ftransport->lock); + TAILQ_REMOVE(&g_nvmf_fgroups, fgroup, link); + g_nvmf_fgroup_count--; + pthread_mutex_unlock(&ftransport->lock); + + free(fgroup); +} + +static int +nvmf_fc_poll_group_add(struct spdk_nvmf_transport_poll_group *group, + struct spdk_nvmf_qpair *qpair) +{ + struct spdk_nvmf_fc_poll_group *fgroup; + struct spdk_nvmf_fc_conn *fc_conn; + struct spdk_nvmf_fc_hwqp *hwqp = NULL; + struct spdk_nvmf_fc_ls_add_conn_api_data *api_data = NULL; + bool hwqp_found = false; + + fgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_fc_poll_group, group); + fc_conn = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_fc_conn, qpair); + + TAILQ_FOREACH(hwqp, &fgroup->hwqp_list, link) { + if (fc_conn->fc_assoc->tgtport->fc_port == hwqp->fc_port) { + hwqp_found = true; + break; + } + } + + if (!hwqp_found) { + SPDK_ERRLOG("No valid hwqp found for new QP.\n"); + goto err; + } + + if (!nvmf_fc_assign_conn_to_hwqp(hwqp, + &fc_conn->conn_id, + fc_conn->max_queue_depth)) { + SPDK_ERRLOG("Failed to get a connection id for new QP.\n"); + goto err; + } + + fc_conn->hwqp = hwqp; + + /* If this is for ADMIN connection, then update assoc ID. */ + if (fc_conn->qpair.qid == 0) { + fc_conn->fc_assoc->assoc_id = fc_conn->conn_id; + } + + api_data = &fc_conn->create_opd->u.add_conn; + nvmf_fc_poller_api_func(hwqp, SPDK_NVMF_FC_POLLER_API_ADD_CONNECTION, &api_data->args); + return 0; +err: + return -1; +} + +static int +nvmf_fc_poll_group_poll(struct spdk_nvmf_transport_poll_group *group) +{ + uint32_t count = 0; + struct spdk_nvmf_fc_poll_group *fgroup; + struct spdk_nvmf_fc_hwqp *hwqp; + + fgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_fc_poll_group, group); + + TAILQ_FOREACH(hwqp, &fgroup->hwqp_list, link) { + if (hwqp->state == SPDK_FC_HWQP_ONLINE) { + count += nvmf_fc_process_queue(hwqp); + } + } + + return (int) count; +} + +static int +nvmf_fc_request_free(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_fc_request *fc_req = nvmf_fc_get_fc_req(req); + + if (!fc_req->is_aborted) { + nvmf_fc_request_set_state(fc_req, SPDK_NVMF_FC_REQ_BDEV_ABORTED); + nvmf_fc_request_abort(fc_req, true, NULL, NULL); + } else { + nvmf_fc_request_abort_complete(fc_req); + } + return 0; +} + + +static void +nvmf_fc_close_qpair(struct spdk_nvmf_qpair *qpair) +{ + struct spdk_nvmf_fc_conn *fc_conn; + + fc_conn = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_fc_conn, qpair); + + if (fc_conn->conn_id == NVMF_FC_INVALID_CONN_ID) { + /* QP creation failure in FC tranport. Cleanup. */ + spdk_thread_send_msg(nvmf_fc_get_master_thread(), + nvmf_fc_handle_connection_failure, fc_conn); + } else if (fc_conn->fc_assoc->assoc_id == fc_conn->conn_id && + fc_conn->fc_assoc->assoc_state != SPDK_NVMF_FC_OBJECT_TO_BE_DELETED) { + /* Admin connection */ + spdk_thread_send_msg(nvmf_fc_get_master_thread(), + nvmf_fc_handle_assoc_deletion, fc_conn); + } +} + +static int +nvmf_fc_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvmf_fc_conn *fc_conn; + + fc_conn = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_fc_conn, qpair); + memcpy(trid, &fc_conn->trid, sizeof(struct spdk_nvme_transport_id)); + return 0; +} + +static int +nvmf_fc_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvmf_fc_conn *fc_conn; + + fc_conn = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_fc_conn, qpair); + memcpy(trid, &fc_conn->trid, sizeof(struct spdk_nvme_transport_id)); + return 0; +} + +static int +nvmf_fc_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvmf_fc_conn *fc_conn; + + fc_conn = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_fc_conn, qpair); + memcpy(trid, &fc_conn->trid, sizeof(struct spdk_nvme_transport_id)); + return 0; +} + +static void +nvmf_fc_qpair_abort_request(struct spdk_nvmf_qpair *qpair, + struct spdk_nvmf_request *req) +{ + spdk_nvmf_request_complete(req); +} + +const struct spdk_nvmf_transport_ops spdk_nvmf_transport_fc = { + .name = "FC", + .type = (enum spdk_nvme_transport_type) SPDK_NVMF_TRTYPE_FC, + .opts_init = nvmf_fc_opts_init, + .create = nvmf_fc_create, + .destroy = nvmf_fc_destroy, + + .listen = nvmf_fc_listen, + .stop_listen = nvmf_fc_stop_listen, + .accept = nvmf_fc_accept, + + .listener_discover = nvmf_fc_discover, + + .poll_group_create = nvmf_fc_poll_group_create, + .poll_group_destroy = nvmf_fc_poll_group_destroy, + .poll_group_add = nvmf_fc_poll_group_add, + .poll_group_poll = nvmf_fc_poll_group_poll, + + .req_complete = nvmf_fc_request_complete, + .req_free = nvmf_fc_request_free, + .qpair_fini = nvmf_fc_close_qpair, + .qpair_get_peer_trid = nvmf_fc_qpair_get_peer_trid, + .qpair_get_local_trid = nvmf_fc_qpair_get_local_trid, + .qpair_get_listen_trid = nvmf_fc_qpair_get_listen_trid, + .qpair_abort_request = nvmf_fc_qpair_abort_request, +}; + +/* + * Re-initialize the FC-Port after an offline event. + * Only the queue information needs to be populated. XCHG, lcore and other hwqp information remains + * unchanged after the first initialization. + * + */ +static int +nvmf_fc_adm_hw_port_reinit_validate(struct spdk_nvmf_fc_port *fc_port, + struct spdk_nvmf_fc_hw_port_init_args *args) +{ + uint32_t i; + + /* Verify that the port was previously in offline or quiesced state */ + if (nvmf_fc_port_is_online(fc_port)) { + SPDK_ERRLOG("SPDK FC port %d already initialized and online.\n", args->port_handle); + return -EINVAL; + } + + /* Reinit information in new LS queue from previous queue */ + nvmf_fc_hwqp_reinit_poller_queues(&fc_port->ls_queue, args->ls_queue); + + fc_port->fcp_rq_id = args->fcp_rq_id; + + /* Initialize the LS queue */ + fc_port->ls_queue.queues = args->ls_queue; + nvmf_fc_init_poller_queues(fc_port->ls_queue.queues); + + for (i = 0; i < fc_port->num_io_queues; i++) { + /* Reinit information in new IO queue from previous queue */ + nvmf_fc_hwqp_reinit_poller_queues(&fc_port->io_queues[i], + args->io_queues[i]); + fc_port->io_queues[i].queues = args->io_queues[i]; + /* Initialize the IO queues */ + nvmf_fc_init_poller_queues(fc_port->io_queues[i].queues); + } + + fc_port->hw_port_status = SPDK_FC_PORT_OFFLINE; + + /* Validate the port information */ + DEV_VERIFY(TAILQ_EMPTY(&fc_port->nport_list)); + DEV_VERIFY(fc_port->num_nports == 0); + if (!TAILQ_EMPTY(&fc_port->nport_list) || (fc_port->num_nports != 0)) { + return -EINVAL; + } + + return 0; +} + +/* Initializes the data for the creation of a FC-Port object in the SPDK + * library. The spdk_nvmf_fc_port is a well defined structure that is part of + * the API to the library. The contents added to this well defined structure + * is private to each vendors implementation. + */ +static int +nvmf_fc_adm_hw_port_data_init(struct spdk_nvmf_fc_port *fc_port, + struct spdk_nvmf_fc_hw_port_init_args *args) +{ + /* Used a high number for the LS HWQP so that it does not clash with the + * IO HWQP's and immediately shows a LS queue during tracing. + */ + uint32_t i; + + fc_port->port_hdl = args->port_handle; + fc_port->hw_port_status = SPDK_FC_PORT_OFFLINE; + fc_port->fcp_rq_id = args->fcp_rq_id; + fc_port->num_io_queues = args->io_queue_cnt; + + /* + * Set port context from init args. Used for FCP port stats. + */ + fc_port->port_ctx = args->port_ctx; + + /* + * Initialize the LS queue wherever needed. + */ + fc_port->ls_queue.queues = args->ls_queue; + fc_port->ls_queue.thread = nvmf_fc_get_master_thread(); + fc_port->ls_queue.hwqp_id = SPDK_MAX_NUM_OF_FC_PORTS * fc_port->num_io_queues; + + /* + * Initialize the LS queue. + */ + nvmf_fc_init_hwqp(fc_port, &fc_port->ls_queue); + + /* + * Initialize the IO queues. + */ + for (i = 0; i < args->io_queue_cnt; i++) { + struct spdk_nvmf_fc_hwqp *hwqp = &fc_port->io_queues[i]; + hwqp->hwqp_id = i; + hwqp->queues = args->io_queues[i]; + hwqp->rq_size = args->io_queue_size; + nvmf_fc_init_hwqp(fc_port, hwqp); + } + + /* + * Initialize the LS processing for port + */ + nvmf_fc_ls_init(fc_port); + + /* + * Initialize the list of nport on this HW port. + */ + TAILQ_INIT(&fc_port->nport_list); + fc_port->num_nports = 0; + + return 0; +} + +static void +nvmf_fc_adm_port_hwqp_offline_del_poller(struct spdk_nvmf_fc_port *fc_port) +{ + struct spdk_nvmf_fc_hwqp *hwqp = NULL; + int i = 0; + + hwqp = &fc_port->ls_queue; + (void)nvmf_fc_hwqp_set_offline(hwqp); + + /* Remove poller for all the io queues. */ + for (i = 0; i < (int)fc_port->num_io_queues; i++) { + hwqp = &fc_port->io_queues[i]; + (void)nvmf_fc_hwqp_set_offline(hwqp); + nvmf_fc_poll_group_remove_hwqp(hwqp); + } +} + +/* + * Callback function for HW port link break operation. + * + * Notice that this callback is being triggered when spdk_fc_nport_delete() + * completes, if that spdk_fc_nport_delete() called is issued by + * nvmf_fc_adm_evnt_hw_port_link_break(). + * + * Since nvmf_fc_adm_evnt_hw_port_link_break() can invoke spdk_fc_nport_delete() multiple + * times (one per nport in the HW port's nport_list), a single call to + * nvmf_fc_adm_evnt_hw_port_link_break() can result in multiple calls to this callback function. + * + * As a result, this function only invokes a callback to the caller of + * nvmf_fc_adm_evnt_hw_port_link_break() only when the HW port's nport_list is empty. + */ +static void +nvmf_fc_adm_hw_port_link_break_cb(uint8_t port_handle, + enum spdk_fc_event event_type, void *cb_args, int spdk_err) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct spdk_nvmf_fc_adm_port_link_break_cb_data *offline_cb_args = cb_args; + struct spdk_nvmf_hw_port_link_break_args *offline_args = NULL; + spdk_nvmf_fc_callback cb_func = NULL; + int err = 0; + struct spdk_nvmf_fc_port *fc_port = NULL; + int num_nports = 0; + char log_str[256]; + + if (0 != spdk_err) { + DEV_VERIFY(!"port link break cb: spdk_err not success."); + SPDK_ERRLOG("port link break cb: spdk_err:%d.\n", spdk_err); + goto out; + } + + if (!offline_cb_args) { + DEV_VERIFY(!"port link break cb: port_offline_args is NULL."); + err = -EINVAL; + goto out; + } + + offline_args = offline_cb_args->args; + if (!offline_args) { + DEV_VERIFY(!"port link break cb: offline_args is NULL."); + err = -EINVAL; + goto out; + } + + if (port_handle != offline_args->port_handle) { + DEV_VERIFY(!"port link break cb: port_handle mismatch."); + err = -EINVAL; + goto out; + } + + cb_func = offline_cb_args->cb_func; + if (!cb_func) { + DEV_VERIFY(!"port link break cb: cb_func is NULL."); + err = -EINVAL; + goto out; + } + + fc_port = nvmf_fc_port_lookup(port_handle); + if (!fc_port) { + DEV_VERIFY(!"port link break cb: fc_port is NULL."); + SPDK_ERRLOG("port link break cb: Unable to find port:%d\n", + offline_args->port_handle); + err = -EINVAL; + goto out; + } + + num_nports = fc_port->num_nports; + if (!TAILQ_EMPTY(&fc_port->nport_list)) { + /* + * Don't call the callback unless all nports have been deleted. + */ + goto out; + } + + if (num_nports != 0) { + DEV_VERIFY(!"port link break cb: num_nports in non-zero."); + SPDK_ERRLOG("port link break cb: # of ports should be 0. Instead, num_nports:%d\n", + num_nports); + err = -EINVAL; + } + + /* + * Mark the hwqps as offline and unregister the pollers. + */ + (void)nvmf_fc_adm_port_hwqp_offline_del_poller(fc_port); + + /* + * Since there are no more nports, execute the callback(s). + */ + (void)cb_func(port_handle, SPDK_FC_LINK_BREAK, + (void *)offline_args->cb_ctx, spdk_err); + +out: + free(offline_cb_args); + + snprintf(log_str, sizeof(log_str), + "port link break cb: port:%d evt_type:%d num_nports:%d err:%d spdk_err:%d.\n", + port_handle, event_type, num_nports, err, spdk_err); + + if (err != 0) { + SPDK_ERRLOG("%s", log_str); + } else { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "%s", log_str); + } + return; +} + +/* + * FC port must have all its nports deleted before transitioning to offline state. + */ +static void +nvmf_fc_adm_hw_port_offline_nport_delete(struct spdk_nvmf_fc_port *fc_port) +{ + struct spdk_nvmf_fc_nport *nport = NULL; + /* All nports must have been deleted at this point for this fc port */ + DEV_VERIFY(fc_port && TAILQ_EMPTY(&fc_port->nport_list)); + DEV_VERIFY(fc_port->num_nports == 0); + /* Mark the nport states to be zombie, if they exist */ + if (fc_port && !TAILQ_EMPTY(&fc_port->nport_list)) { + TAILQ_FOREACH(nport, &fc_port->nport_list, link) { + (void)nvmf_fc_nport_set_state(nport, SPDK_NVMF_FC_OBJECT_ZOMBIE); + } + } +} + +static void +nvmf_fc_adm_i_t_delete_cb(void *args, uint32_t err) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct spdk_nvmf_fc_adm_i_t_del_cb_data *cb_data = args; + struct spdk_nvmf_fc_nport *nport = cb_data->nport; + struct spdk_nvmf_fc_remote_port_info *rport = cb_data->rport; + spdk_nvmf_fc_callback cb_func = cb_data->fc_cb_func; + int spdk_err = 0; + uint8_t port_handle = cb_data->port_handle; + uint32_t s_id = rport->s_id; + uint32_t rpi = rport->rpi; + uint32_t assoc_count = rport->assoc_count; + uint32_t nport_hdl = nport->nport_hdl; + uint32_t d_id = nport->d_id; + char log_str[256]; + + /* + * Assert on any delete failure. + */ + if (0 != err) { + DEV_VERIFY(!"Error in IT Delete callback."); + goto out; + } + + if (cb_func != NULL) { + (void)cb_func(port_handle, SPDK_FC_IT_DELETE, cb_data->fc_cb_ctx, spdk_err); + } + +out: + free(cb_data); + + snprintf(log_str, sizeof(log_str), + "IT delete assoc_cb on nport %d done, port_handle:%d s_id:%d d_id:%d rpi:%d rport_assoc_count:%d rc = %d.\n", + nport_hdl, port_handle, s_id, d_id, rpi, assoc_count, err); + + if (err != 0) { + SPDK_ERRLOG("%s", log_str); + } else { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "%s", log_str); + } +} + +static void +nvmf_fc_adm_i_t_delete_assoc_cb(void *args, uint32_t err) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct spdk_nvmf_fc_adm_i_t_del_assoc_cb_data *cb_data = args; + struct spdk_nvmf_fc_nport *nport = cb_data->nport; + struct spdk_nvmf_fc_remote_port_info *rport = cb_data->rport; + spdk_nvmf_fc_adm_i_t_delete_assoc_cb_fn cb_func = cb_data->cb_func; + uint32_t s_id = rport->s_id; + uint32_t rpi = rport->rpi; + uint32_t assoc_count = rport->assoc_count; + uint32_t nport_hdl = nport->nport_hdl; + uint32_t d_id = nport->d_id; + char log_str[256]; + + /* + * Assert on any association delete failure. We continue to delete other + * associations in promoted builds. + */ + if (0 != err) { + DEV_VERIFY(!"Nport's association delete callback returned error"); + if (nport->assoc_count > 0) { + nport->assoc_count--; + } + if (rport->assoc_count > 0) { + rport->assoc_count--; + } + } + + /* + * If this is the last association being deleted for the ITN, + * execute the callback(s). + */ + if (0 == rport->assoc_count) { + /* Remove the rport from the remote port list. */ + if (nvmf_fc_nport_remove_rem_port(nport, rport) != 0) { + SPDK_ERRLOG("Error while removing rport from list.\n"); + DEV_VERIFY(!"Error while removing rport from list."); + } + + if (cb_func != NULL) { + /* + * Callback function is provided by the caller + * of nvmf_fc_adm_i_t_delete_assoc(). + */ + (void)cb_func(cb_data->cb_ctx, 0); + } + free(rport); + free(args); + } + + snprintf(log_str, sizeof(log_str), + "IT delete assoc_cb on nport %d done, s_id:%d d_id:%d rpi:%d rport_assoc_count:%d err = %d.\n", + nport_hdl, s_id, d_id, rpi, assoc_count, err); + + if (err != 0) { + SPDK_ERRLOG("%s", log_str); + } else { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "%s", log_str); + } +} + +/** + * Process a IT delete. + */ +static void +nvmf_fc_adm_i_t_delete_assoc(struct spdk_nvmf_fc_nport *nport, + struct spdk_nvmf_fc_remote_port_info *rport, + spdk_nvmf_fc_adm_i_t_delete_assoc_cb_fn cb_func, + void *cb_ctx) +{ + int err = 0; + struct spdk_nvmf_fc_association *assoc = NULL; + int assoc_err = 0; + uint32_t num_assoc = 0; + uint32_t num_assoc_del_scheduled = 0; + struct spdk_nvmf_fc_adm_i_t_del_assoc_cb_data *cb_data = NULL; + uint8_t port_hdl = nport->port_hdl; + uint32_t s_id = rport->s_id; + uint32_t rpi = rport->rpi; + uint32_t assoc_count = rport->assoc_count; + char log_str[256]; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "IT delete associations on nport:%d begin.\n", + nport->nport_hdl); + + /* + * Allocate memory for callback data. + * This memory will be freed by the callback function. + */ + cb_data = calloc(1, sizeof(struct spdk_nvmf_fc_adm_i_t_del_assoc_cb_data)); + if (NULL == cb_data) { + SPDK_ERRLOG("Failed to allocate memory for cb_data on nport:%d.\n", nport->nport_hdl); + err = -ENOMEM; + goto out; + } + cb_data->nport = nport; + cb_data->rport = rport; + cb_data->port_handle = port_hdl; + cb_data->cb_func = cb_func; + cb_data->cb_ctx = cb_ctx; + + /* + * Delete all associations, if any, related with this ITN/remote_port. + */ + TAILQ_FOREACH(assoc, &nport->fc_associations, link) { + num_assoc++; + if (assoc->s_id == s_id) { + assoc_err = nvmf_fc_delete_association(nport, + assoc->assoc_id, + false /* send abts */, false, + nvmf_fc_adm_i_t_delete_assoc_cb, cb_data); + if (0 != assoc_err) { + /* + * Mark this association as zombie. + */ + err = -EINVAL; + DEV_VERIFY(!"Error while deleting association"); + (void)nvmf_fc_assoc_set_state(assoc, SPDK_NVMF_FC_OBJECT_ZOMBIE); + } else { + num_assoc_del_scheduled++; + } + } + } + +out: + if ((cb_data) && (num_assoc_del_scheduled == 0)) { + /* + * Since there are no association_delete calls + * successfully scheduled, the association_delete + * callback function will never be called. + * In this case, call the callback function now. + */ + nvmf_fc_adm_i_t_delete_assoc_cb(cb_data, 0); + } + + snprintf(log_str, sizeof(log_str), + "IT delete associations on nport:%d end. " + "s_id:%d rpi:%d assoc_count:%d assoc:%d assoc_del_scheduled:%d rc:%d.\n", + nport->nport_hdl, s_id, rpi, assoc_count, num_assoc, num_assoc_del_scheduled, err); + + if (err == 0) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "%s", log_str); + } else { + SPDK_ERRLOG("%s", log_str); + } +} + +static void +nvmf_fc_adm_queue_quiesce_cb(void *cb_data, enum spdk_nvmf_fc_poller_api_ret ret) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct spdk_nvmf_fc_poller_api_quiesce_queue_args *quiesce_api_data = NULL; + struct spdk_nvmf_fc_adm_hw_port_quiesce_ctx *port_quiesce_ctx = NULL; + struct spdk_nvmf_fc_hwqp *hwqp = NULL; + struct spdk_nvmf_fc_port *fc_port = NULL; + int err = 0; + + quiesce_api_data = (struct spdk_nvmf_fc_poller_api_quiesce_queue_args *)cb_data; + hwqp = quiesce_api_data->hwqp; + fc_port = hwqp->fc_port; + port_quiesce_ctx = (struct spdk_nvmf_fc_adm_hw_port_quiesce_ctx *)quiesce_api_data->ctx; + spdk_nvmf_fc_adm_hw_port_quiesce_cb_fn cb_func = port_quiesce_ctx->cb_func; + + /* + * Decrement the callback/quiesced queue count. + */ + port_quiesce_ctx->quiesce_count--; + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "Queue%d Quiesced\n", quiesce_api_data->hwqp->hwqp_id); + + free(quiesce_api_data); + /* + * Wait for call backs i.e. max_ioq_queues + LS QUEUE. + */ + if (port_quiesce_ctx->quiesce_count > 0) { + return; + } + + if (fc_port->hw_port_status == SPDK_FC_PORT_QUIESCED) { + SPDK_ERRLOG("Port %d already in quiesced state.\n", fc_port->port_hdl); + } else { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "HW port %d quiesced.\n", fc_port->port_hdl); + fc_port->hw_port_status = SPDK_FC_PORT_QUIESCED; + } + + if (cb_func) { + /* + * Callback function for the called of quiesce. + */ + cb_func(port_quiesce_ctx->ctx, err); + } + + /* + * Free the context structure. + */ + free(port_quiesce_ctx); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "HW port %d quiesce done, rc = %d.\n", fc_port->port_hdl, + err); +} + +static int +nvmf_fc_adm_hw_queue_quiesce(struct spdk_nvmf_fc_hwqp *fc_hwqp, void *ctx, + spdk_nvmf_fc_poller_api_cb cb_func) +{ + struct spdk_nvmf_fc_poller_api_quiesce_queue_args *args; + enum spdk_nvmf_fc_poller_api_ret rc = SPDK_NVMF_FC_POLLER_API_SUCCESS; + int err = 0; + + args = calloc(1, sizeof(struct spdk_nvmf_fc_poller_api_quiesce_queue_args)); + + if (args == NULL) { + err = -ENOMEM; + SPDK_ERRLOG("Failed to allocate memory for poller quiesce args, hwqp:%d\n", fc_hwqp->hwqp_id); + goto done; + } + args->hwqp = fc_hwqp; + args->ctx = ctx; + args->cb_info.cb_func = cb_func; + args->cb_info.cb_data = args; + args->cb_info.cb_thread = spdk_get_thread(); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "Quiesce queue %d\n", fc_hwqp->hwqp_id); + rc = nvmf_fc_poller_api_func(fc_hwqp, SPDK_NVMF_FC_POLLER_API_QUIESCE_QUEUE, args); + if (rc) { + free(args); + err = -EINVAL; + } + +done: + return err; +} + +/* + * Hw port Quiesce + */ +static int +nvmf_fc_adm_hw_port_quiesce(struct spdk_nvmf_fc_port *fc_port, void *ctx, + spdk_nvmf_fc_adm_hw_port_quiesce_cb_fn cb_func) +{ + struct spdk_nvmf_fc_adm_hw_port_quiesce_ctx *port_quiesce_ctx = NULL; + uint32_t i = 0; + int err = 0; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "HW port:%d is being quiesced.\n", fc_port->port_hdl); + + /* + * If the port is in an OFFLINE state, set the state to QUIESCED + * and execute the callback. + */ + if (fc_port->hw_port_status == SPDK_FC_PORT_OFFLINE) { + fc_port->hw_port_status = SPDK_FC_PORT_QUIESCED; + } + + if (fc_port->hw_port_status == SPDK_FC_PORT_QUIESCED) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "Port %d already in quiesced state.\n", + fc_port->port_hdl); + /* + * Execute the callback function directly. + */ + cb_func(ctx, err); + goto out; + } + + port_quiesce_ctx = calloc(1, sizeof(struct spdk_nvmf_fc_adm_hw_port_quiesce_ctx)); + + if (port_quiesce_ctx == NULL) { + err = -ENOMEM; + SPDK_ERRLOG("Failed to allocate memory for LS queue quiesce ctx, port:%d\n", + fc_port->port_hdl); + goto out; + } + + port_quiesce_ctx->quiesce_count = 0; + port_quiesce_ctx->ctx = ctx; + port_quiesce_ctx->cb_func = cb_func; + + /* + * Quiesce the LS queue. + */ + err = nvmf_fc_adm_hw_queue_quiesce(&fc_port->ls_queue, port_quiesce_ctx, + nvmf_fc_adm_queue_quiesce_cb); + if (err != 0) { + SPDK_ERRLOG("Failed to quiesce the LS queue.\n"); + goto out; + } + port_quiesce_ctx->quiesce_count++; + + /* + * Quiesce the IO queues. + */ + for (i = 0; i < fc_port->num_io_queues; i++) { + err = nvmf_fc_adm_hw_queue_quiesce(&fc_port->io_queues[i], + port_quiesce_ctx, + nvmf_fc_adm_queue_quiesce_cb); + if (err != 0) { + DEV_VERIFY(0); + SPDK_ERRLOG("Failed to quiesce the IO queue:%d.\n", fc_port->io_queues[i].hwqp_id); + } + port_quiesce_ctx->quiesce_count++; + } + +out: + if (port_quiesce_ctx && err != 0) { + free(port_quiesce_ctx); + } + return err; +} + +/* + * Initialize and add a HW port entry to the global + * HW port list. + */ +static void +nvmf_fc_adm_evnt_hw_port_init(void *arg) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct spdk_nvmf_fc_port *fc_port = NULL; + struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg; + struct spdk_nvmf_fc_hw_port_init_args *args = (struct spdk_nvmf_fc_hw_port_init_args *) + api_data->api_args; + int err = 0; + + if (args->io_queue_cnt > spdk_env_get_core_count()) { + SPDK_ERRLOG("IO queues count greater than cores for %d.\n", args->port_handle); + err = EINVAL; + goto abort_port_init; + } + + /* + * 1. Check for duplicate initialization. + */ + fc_port = nvmf_fc_port_lookup(args->port_handle); + if (fc_port != NULL) { + /* Port already exists, check if it has to be re-initialized */ + err = nvmf_fc_adm_hw_port_reinit_validate(fc_port, args); + if (err) { + /* + * In case of an error we do not want to free the fc_port + * so we set that pointer to NULL. + */ + fc_port = NULL; + } + goto abort_port_init; + } + + /* + * 2. Get the memory to instantiate a fc port. + */ + fc_port = calloc(1, sizeof(struct spdk_nvmf_fc_port) + + (args->io_queue_cnt * sizeof(struct spdk_nvmf_fc_hwqp))); + if (fc_port == NULL) { + SPDK_ERRLOG("Failed to allocate memory for fc_port %d.\n", args->port_handle); + err = -ENOMEM; + goto abort_port_init; + } + + /* assign the io_queues array */ + fc_port->io_queues = (struct spdk_nvmf_fc_hwqp *)((uint8_t *)fc_port + sizeof( + struct spdk_nvmf_fc_port)); + + /* + * 3. Initialize the contents for the FC-port + */ + err = nvmf_fc_adm_hw_port_data_init(fc_port, args); + + if (err != 0) { + SPDK_ERRLOG("Data initialization failed for fc_port %d.\n", args->port_handle); + DEV_VERIFY(!"Data initialization failed for fc_port"); + goto abort_port_init; + } + + /* + * 4. Add this port to the global fc port list in the library. + */ + nvmf_fc_port_add(fc_port); + +abort_port_init: + if (err && fc_port) { + free(fc_port); + } + if (api_data->cb_func != NULL) { + (void)api_data->cb_func(args->port_handle, SPDK_FC_HW_PORT_INIT, args->cb_ctx, err); + } + + free(arg); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "HW port %d initialize done, rc = %d.\n", + args->port_handle, err); +} + +/* + * Online a HW port. + */ +static void +nvmf_fc_adm_evnt_hw_port_online(void *arg) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct spdk_nvmf_fc_port *fc_port = NULL; + struct spdk_nvmf_fc_hwqp *hwqp = NULL; + struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg; + struct spdk_nvmf_fc_hw_port_online_args *args = (struct spdk_nvmf_fc_hw_port_online_args *) + api_data->api_args; + int i = 0; + int err = 0; + + fc_port = nvmf_fc_port_lookup(args->port_handle); + if (fc_port) { + /* Set the port state to online */ + err = nvmf_fc_port_set_online(fc_port); + if (err != 0) { + SPDK_ERRLOG("Hw port %d online failed. err = %d\n", fc_port->port_hdl, err); + DEV_VERIFY(!"Hw port online failed"); + goto out; + } + + hwqp = &fc_port->ls_queue; + hwqp->context = NULL; + (void)nvmf_fc_hwqp_set_online(hwqp); + + /* Cycle through all the io queues and setup a hwqp poller for each. */ + for (i = 0; i < (int)fc_port->num_io_queues; i++) { + hwqp = &fc_port->io_queues[i]; + hwqp->context = NULL; + (void)nvmf_fc_hwqp_set_online(hwqp); + nvmf_fc_poll_group_add_hwqp(hwqp); + } + } else { + SPDK_ERRLOG("Unable to find the SPDK FC port %d\n", args->port_handle); + err = -EINVAL; + } + +out: + if (api_data->cb_func != NULL) { + (void)api_data->cb_func(args->port_handle, SPDK_FC_HW_PORT_ONLINE, args->cb_ctx, err); + } + + free(arg); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "HW port %d online done, rc = %d.\n", args->port_handle, + err); +} + +/* + * Offline a HW port. + */ +static void +nvmf_fc_adm_evnt_hw_port_offline(void *arg) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct spdk_nvmf_fc_port *fc_port = NULL; + struct spdk_nvmf_fc_hwqp *hwqp = NULL; + struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg; + struct spdk_nvmf_fc_hw_port_offline_args *args = (struct spdk_nvmf_fc_hw_port_offline_args *) + api_data->api_args; + int i = 0; + int err = 0; + + fc_port = nvmf_fc_port_lookup(args->port_handle); + if (fc_port) { + /* Set the port state to offline, if it is not already. */ + err = nvmf_fc_port_set_offline(fc_port); + if (err != 0) { + SPDK_ERRLOG("Hw port %d already offline. err = %d\n", fc_port->port_hdl, err); + err = 0; + goto out; + } + + hwqp = &fc_port->ls_queue; + (void)nvmf_fc_hwqp_set_offline(hwqp); + + /* Remove poller for all the io queues. */ + for (i = 0; i < (int)fc_port->num_io_queues; i++) { + hwqp = &fc_port->io_queues[i]; + (void)nvmf_fc_hwqp_set_offline(hwqp); + nvmf_fc_poll_group_remove_hwqp(hwqp); + } + + /* + * Delete all the nports. Ideally, the nports should have been purged + * before the offline event, in which case, only a validation is required. + */ + nvmf_fc_adm_hw_port_offline_nport_delete(fc_port); + } else { + SPDK_ERRLOG("Unable to find the SPDK FC port %d\n", args->port_handle); + err = -EINVAL; + } +out: + if (api_data->cb_func != NULL) { + (void)api_data->cb_func(args->port_handle, SPDK_FC_HW_PORT_OFFLINE, args->cb_ctx, err); + } + + free(arg); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "HW port %d offline done, rc = %d.\n", args->port_handle, + err); +} + +struct nvmf_fc_add_rem_listener_ctx { + struct spdk_nvmf_subsystem *subsystem; + bool add_listener; + struct spdk_nvme_transport_id trid; +}; + +static void +nvmf_fc_adm_subsystem_resume_cb(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct nvmf_fc_add_rem_listener_ctx *ctx = (struct nvmf_fc_add_rem_listener_ctx *)cb_arg; + free(ctx); +} + +static void +nvmf_fc_adm_listen_done(void *cb_arg, int status) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct nvmf_fc_add_rem_listener_ctx *ctx = cb_arg; + + if (spdk_nvmf_subsystem_resume(ctx->subsystem, nvmf_fc_adm_subsystem_resume_cb, ctx)) { + SPDK_ERRLOG("Failed to resume subsystem: %s\n", ctx->subsystem->subnqn); + free(ctx); + } +} + +static void +nvmf_fc_adm_subsystem_paused_cb(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct nvmf_fc_add_rem_listener_ctx *ctx = (struct nvmf_fc_add_rem_listener_ctx *)cb_arg; + + if (ctx->add_listener) { + spdk_nvmf_subsystem_add_listener(subsystem, &ctx->trid, nvmf_fc_adm_listen_done, ctx); + } else { + spdk_nvmf_subsystem_remove_listener(subsystem, &ctx->trid); + nvmf_fc_adm_listen_done(ctx, 0); + } +} + +static int +nvmf_fc_adm_add_rem_nport_listener(struct spdk_nvmf_fc_nport *nport, bool add) +{ + struct spdk_nvmf_tgt *tgt = nvmf_fc_get_tgt(); + struct spdk_nvmf_subsystem *subsystem; + + if (!tgt) { + SPDK_ERRLOG("No nvmf target defined\n"); + return -EINVAL; + } + + subsystem = spdk_nvmf_subsystem_get_first(tgt); + while (subsystem) { + struct nvmf_fc_add_rem_listener_ctx *ctx; + + if (spdk_nvmf_subsytem_any_listener_allowed(subsystem) == true) { + ctx = calloc(1, sizeof(struct nvmf_fc_add_rem_listener_ctx)); + if (ctx) { + ctx->add_listener = add; + ctx->subsystem = subsystem; + nvmf_fc_create_trid(&ctx->trid, + nport->fc_nodename.u.wwn, + nport->fc_portname.u.wwn); + + if (spdk_nvmf_tgt_listen(subsystem->tgt, &ctx->trid)) { + SPDK_ERRLOG("Failed to add transport address %s to tgt listeners\n", + ctx->trid.traddr); + free(ctx); + } else if (spdk_nvmf_subsystem_pause(subsystem, + nvmf_fc_adm_subsystem_paused_cb, + ctx)) { + SPDK_ERRLOG("Failed to pause subsystem: %s\n", + subsystem->subnqn); + free(ctx); + } + } + } + + subsystem = spdk_nvmf_subsystem_get_next(subsystem); + } + + return 0; +} + +/* + * Create a Nport. + */ +static void +nvmf_fc_adm_evnt_nport_create(void *arg) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg; + struct spdk_nvmf_fc_nport_create_args *args = (struct spdk_nvmf_fc_nport_create_args *) + api_data->api_args; + struct spdk_nvmf_fc_nport *nport = NULL; + struct spdk_nvmf_fc_port *fc_port = NULL; + int err = 0; + + /* + * Get the physical port. + */ + fc_port = nvmf_fc_port_lookup(args->port_handle); + if (fc_port == NULL) { + err = -EINVAL; + goto out; + } + + /* + * Check for duplicate initialization. + */ + nport = nvmf_fc_nport_find(args->port_handle, args->nport_handle); + if (nport != NULL) { + SPDK_ERRLOG("Duplicate SPDK FC nport %d exists for FC port:%d.\n", args->nport_handle, + args->port_handle); + err = -EINVAL; + goto out; + } + + /* + * Get the memory to instantiate a fc nport. + */ + nport = calloc(1, sizeof(struct spdk_nvmf_fc_nport)); + if (nport == NULL) { + SPDK_ERRLOG("Failed to allocate memory for nport %d.\n", + args->nport_handle); + err = -ENOMEM; + goto out; + } + + /* + * Initialize the contents for the nport + */ + nport->nport_hdl = args->nport_handle; + nport->port_hdl = args->port_handle; + nport->nport_state = SPDK_NVMF_FC_OBJECT_CREATED; + nport->fc_nodename = args->fc_nodename; + nport->fc_portname = args->fc_portname; + nport->d_id = args->d_id; + nport->fc_port = nvmf_fc_port_lookup(args->port_handle); + + (void)nvmf_fc_nport_set_state(nport, SPDK_NVMF_FC_OBJECT_CREATED); + TAILQ_INIT(&nport->rem_port_list); + nport->rport_count = 0; + TAILQ_INIT(&nport->fc_associations); + nport->assoc_count = 0; + + /* + * Populate the nport address (as listening address) to the nvmf subsystems. + */ + err = nvmf_fc_adm_add_rem_nport_listener(nport, true); + + (void)nvmf_fc_port_add_nport(fc_port, nport); +out: + if (err && nport) { + free(nport); + } + + if (api_data->cb_func != NULL) { + (void)api_data->cb_func(args->port_handle, SPDK_FC_NPORT_CREATE, args->cb_ctx, err); + } + + free(arg); +} + +static void +nvmf_fc_adm_delete_nport_cb(uint8_t port_handle, enum spdk_fc_event event_type, + void *cb_args, int spdk_err) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct spdk_nvmf_fc_adm_nport_del_cb_data *cb_data = cb_args; + struct spdk_nvmf_fc_nport *nport = cb_data->nport; + spdk_nvmf_fc_callback cb_func = cb_data->fc_cb_func; + int err = 0; + uint16_t nport_hdl = 0; + char log_str[256]; + + /* + * Assert on any delete failure. + */ + if (nport == NULL) { + SPDK_ERRLOG("Nport delete callback returned null nport"); + DEV_VERIFY(!"nport is null."); + goto out; + } + + nport_hdl = nport->nport_hdl; + if (0 != spdk_err) { + SPDK_ERRLOG("Nport delete callback returned error. FC Port: " + "%d, Nport: %d\n", + nport->port_hdl, nport->nport_hdl); + DEV_VERIFY(!"nport delete callback error."); + } + + /* + * Free the nport if this is the last rport being deleted and + * execute the callback(s). + */ + if (nvmf_fc_nport_has_no_rport(nport)) { + if (0 != nport->assoc_count) { + SPDK_ERRLOG("association count != 0\n"); + DEV_VERIFY(!"association count != 0"); + } + + err = nvmf_fc_port_remove_nport(nport->fc_port, nport); + if (0 != err) { + SPDK_ERRLOG("Nport delete callback: Failed to remove " + "nport from nport list. FC Port:%d Nport:%d\n", + nport->port_hdl, nport->nport_hdl); + } + /* Free the nport */ + free(nport); + + if (cb_func != NULL) { + (void)cb_func(cb_data->port_handle, SPDK_FC_NPORT_DELETE, cb_data->fc_cb_ctx, spdk_err); + } + free(cb_data); + } +out: + snprintf(log_str, sizeof(log_str), + "port:%d nport:%d delete cb exit, evt_type:%d rc:%d.\n", + port_handle, nport_hdl, event_type, spdk_err); + + if (err != 0) { + SPDK_ERRLOG("%s", log_str); + } else { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "%s", log_str); + } +} + +/* + * Delete Nport. + */ +static void +nvmf_fc_adm_evnt_nport_delete(void *arg) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg; + struct spdk_nvmf_fc_nport_delete_args *args = (struct spdk_nvmf_fc_nport_delete_args *) + api_data->api_args; + struct spdk_nvmf_fc_nport *nport = NULL; + struct spdk_nvmf_fc_adm_nport_del_cb_data *cb_data = NULL; + struct spdk_nvmf_fc_remote_port_info *rport_iter = NULL; + int err = 0; + uint32_t rport_cnt = 0; + int rc = 0; + + /* + * Make sure that the nport exists. + */ + nport = nvmf_fc_nport_find(args->port_handle, args->nport_handle); + if (nport == NULL) { + SPDK_ERRLOG("Unable to find the SPDK FC nport %d for FC Port: %d.\n", args->nport_handle, + args->port_handle); + err = -EINVAL; + goto out; + } + + /* + * Allocate memory for callback data. + */ + cb_data = calloc(1, sizeof(struct spdk_nvmf_fc_adm_nport_del_cb_data)); + if (NULL == cb_data) { + SPDK_ERRLOG("Failed to allocate memory for cb_data %d.\n", args->nport_handle); + err = -ENOMEM; + goto out; + } + + cb_data->nport = nport; + cb_data->port_handle = args->port_handle; + cb_data->fc_cb_func = api_data->cb_func; + cb_data->fc_cb_ctx = args->cb_ctx; + + /* + * Begin nport tear down + */ + if (nport->nport_state == SPDK_NVMF_FC_OBJECT_CREATED) { + (void)nvmf_fc_nport_set_state(nport, SPDK_NVMF_FC_OBJECT_TO_BE_DELETED); + } else if (nport->nport_state == SPDK_NVMF_FC_OBJECT_TO_BE_DELETED) { + /* + * Deletion of this nport already in progress. Register callback + * and return. + */ + /* TODO: Register callback in callback vector. For now, set the error and return. */ + err = -ENODEV; + goto out; + } else { + /* nport partially created/deleted */ + DEV_VERIFY(nport->nport_state == SPDK_NVMF_FC_OBJECT_ZOMBIE); + DEV_VERIFY(0 != "Nport in zombie state"); + err = -ENODEV; + goto out; + } + + /* + * Remove this nport from listening addresses across subsystems + */ + rc = nvmf_fc_adm_add_rem_nport_listener(nport, false); + + if (0 != rc) { + err = nvmf_fc_nport_set_state(nport, SPDK_NVMF_FC_OBJECT_ZOMBIE); + SPDK_ERRLOG("Unable to remove the listen addr in the subsystems for nport %d.\n", + nport->nport_hdl); + goto out; + } + + /* + * Delete all the remote ports (if any) for the nport + */ + /* TODO - Need to do this with a "first" and a "next" accessor function + * for completeness. Look at app-subsystem as examples. + */ + if (nvmf_fc_nport_has_no_rport(nport)) { + /* No rports to delete. Complete the nport deletion. */ + nvmf_fc_adm_delete_nport_cb(nport->port_hdl, SPDK_FC_NPORT_DELETE, cb_data, 0); + goto out; + } + + TAILQ_FOREACH(rport_iter, &nport->rem_port_list, link) { + struct spdk_nvmf_fc_hw_i_t_delete_args *it_del_args = calloc( + 1, sizeof(struct spdk_nvmf_fc_hw_i_t_delete_args)); + + if (it_del_args == NULL) { + err = -ENOMEM; + SPDK_ERRLOG("SPDK_FC_IT_DELETE no mem to delete rport with rpi:%d s_id:%d.\n", + rport_iter->rpi, rport_iter->s_id); + DEV_VERIFY(!"SPDK_FC_IT_DELETE failed, cannot allocate memory"); + goto out; + } + + rport_cnt++; + it_del_args->port_handle = nport->port_hdl; + it_del_args->nport_handle = nport->nport_hdl; + it_del_args->cb_ctx = (void *)cb_data; + it_del_args->rpi = rport_iter->rpi; + it_del_args->s_id = rport_iter->s_id; + + nvmf_fc_master_enqueue_event(SPDK_FC_IT_DELETE, (void *)it_del_args, + nvmf_fc_adm_delete_nport_cb); + } + +out: + /* On failure, execute the callback function now */ + if ((err != 0) || (rc != 0)) { + SPDK_ERRLOG("NPort %d delete failed, error:%d, fc port:%d, " + "rport_cnt:%d rc:%d.\n", + args->nport_handle, err, args->port_handle, + rport_cnt, rc); + if (cb_data) { + free(cb_data); + } + if (api_data->cb_func != NULL) { + (void)api_data->cb_func(args->port_handle, SPDK_FC_NPORT_DELETE, args->cb_ctx, err); + } + + } else { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, + "NPort %d delete done succesfully, fc port:%d. " + "rport_cnt:%d\n", + args->nport_handle, args->port_handle, rport_cnt); + } + + free(arg); +} + +/* + * Process an PRLI/IT add. + */ +static void +nvmf_fc_adm_evnt_i_t_add(void *arg) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg; + struct spdk_nvmf_fc_hw_i_t_add_args *args = (struct spdk_nvmf_fc_hw_i_t_add_args *) + api_data->api_args; + struct spdk_nvmf_fc_nport *nport = NULL; + struct spdk_nvmf_fc_remote_port_info *rport_iter = NULL; + struct spdk_nvmf_fc_remote_port_info *rport = NULL; + int err = 0; + + /* + * Make sure the nport port exists. + */ + nport = nvmf_fc_nport_find(args->port_handle, args->nport_handle); + if (nport == NULL) { + SPDK_ERRLOG("Unable to find the SPDK FC nport %d\n", args->nport_handle); + err = -EINVAL; + goto out; + } + + /* + * Check for duplicate i_t_add. + */ + TAILQ_FOREACH(rport_iter, &nport->rem_port_list, link) { + if ((rport_iter->s_id == args->s_id) && (rport_iter->rpi == args->rpi)) { + SPDK_ERRLOG("Duplicate rport found for FC nport %d: sid:%d rpi:%d\n", + args->nport_handle, rport_iter->s_id, rport_iter->rpi); + err = -EEXIST; + goto out; + } + } + + /* + * Get the memory to instantiate the remote port + */ + rport = calloc(1, sizeof(struct spdk_nvmf_fc_remote_port_info)); + if (rport == NULL) { + SPDK_ERRLOG("Memory allocation for rem port failed.\n"); + err = -ENOMEM; + goto out; + } + + /* + * Initialize the contents for the rport + */ + (void)nvmf_fc_rport_set_state(rport, SPDK_NVMF_FC_OBJECT_CREATED); + rport->s_id = args->s_id; + rport->rpi = args->rpi; + rport->fc_nodename = args->fc_nodename; + rport->fc_portname = args->fc_portname; + + /* + * Add remote port to nport + */ + if (nvmf_fc_nport_add_rem_port(nport, rport) != 0) { + DEV_VERIFY(!"Error while adding rport to list"); + }; + + /* + * TODO: Do we validate the initiators service parameters? + */ + + /* + * Get the targets service parameters from the library + * to return back to the driver. + */ + args->target_prli_info = nvmf_fc_get_prli_service_params(); + +out: + if (api_data->cb_func != NULL) { + /* + * Passing pointer to the args struct as the first argument. + * The cb_func should handle this appropriately. + */ + (void)api_data->cb_func(args->port_handle, SPDK_FC_IT_ADD, args->cb_ctx, err); + } + + free(arg); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, + "IT add on nport %d done, rc = %d.\n", + args->nport_handle, err); +} + +/** + * Process a IT delete. + */ +static void +nvmf_fc_adm_evnt_i_t_delete(void *arg) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg; + struct spdk_nvmf_fc_hw_i_t_delete_args *args = (struct spdk_nvmf_fc_hw_i_t_delete_args *) + api_data->api_args; + int rc = 0; + struct spdk_nvmf_fc_nport *nport = NULL; + struct spdk_nvmf_fc_adm_i_t_del_cb_data *cb_data = NULL; + struct spdk_nvmf_fc_remote_port_info *rport_iter = NULL; + struct spdk_nvmf_fc_remote_port_info *rport = NULL; + uint32_t num_rport = 0; + char log_str[256]; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "IT delete on nport:%d begin.\n", args->nport_handle); + + /* + * Make sure the nport port exists. If it does not, error out. + */ + nport = nvmf_fc_nport_find(args->port_handle, args->nport_handle); + if (nport == NULL) { + SPDK_ERRLOG("Unable to find the SPDK FC nport:%d\n", args->nport_handle); + rc = -EINVAL; + goto out; + } + + /* + * Find this ITN / rport (remote port). + */ + TAILQ_FOREACH(rport_iter, &nport->rem_port_list, link) { + num_rport++; + if ((rport_iter->s_id == args->s_id) && + (rport_iter->rpi == args->rpi) && + (rport_iter->rport_state == SPDK_NVMF_FC_OBJECT_CREATED)) { + rport = rport_iter; + break; + } + } + + /* + * We should find either zero or exactly one rport. + * + * If we find zero rports, that means that a previous request has + * removed the rport by the time we reached here. In this case, + * simply return out. + */ + if (rport == NULL) { + rc = -ENODEV; + goto out; + } + + /* + * We have found exactly one rport. Allocate memory for callback data. + */ + cb_data = calloc(1, sizeof(struct spdk_nvmf_fc_adm_i_t_del_cb_data)); + if (NULL == cb_data) { + SPDK_ERRLOG("Failed to allocate memory for cb_data for nport:%d.\n", args->nport_handle); + rc = -ENOMEM; + goto out; + } + + cb_data->nport = nport; + cb_data->rport = rport; + cb_data->port_handle = args->port_handle; + cb_data->fc_cb_func = api_data->cb_func; + cb_data->fc_cb_ctx = args->cb_ctx; + + /* + * Validate rport object state. + */ + if (rport->rport_state == SPDK_NVMF_FC_OBJECT_CREATED) { + (void)nvmf_fc_rport_set_state(rport, SPDK_NVMF_FC_OBJECT_TO_BE_DELETED); + } else if (rport->rport_state == SPDK_NVMF_FC_OBJECT_TO_BE_DELETED) { + /* + * Deletion of this rport already in progress. Register callback + * and return. + */ + /* TODO: Register callback in callback vector. For now, set the error and return. */ + rc = -ENODEV; + goto out; + } else { + /* rport partially created/deleted */ + DEV_VERIFY(rport->rport_state == SPDK_NVMF_FC_OBJECT_ZOMBIE); + DEV_VERIFY(!"Invalid rport_state"); + rc = -ENODEV; + goto out; + } + + /* + * We have successfully found a rport to delete. Call + * nvmf_fc_i_t_delete_assoc(), which will perform further + * IT-delete processing as well as free the cb_data. + */ + nvmf_fc_adm_i_t_delete_assoc(nport, rport, nvmf_fc_adm_i_t_delete_cb, + (void *)cb_data); + +out: + if (rc != 0) { + /* + * We have entered here because either we encountered an + * error, or we did not find a rport to delete. + * As a result, we will not call the function + * nvmf_fc_i_t_delete_assoc() for further IT-delete + * processing. Therefore, execute the callback function now. + */ + if (cb_data) { + free(cb_data); + } + if (api_data->cb_func != NULL) { + (void)api_data->cb_func(args->port_handle, SPDK_FC_IT_DELETE, args->cb_ctx, rc); + } + } + + snprintf(log_str, sizeof(log_str), + "IT delete on nport:%d end. num_rport:%d rc = %d.\n", + args->nport_handle, num_rport, rc); + + if (rc != 0) { + SPDK_ERRLOG("%s", log_str); + } else { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "%s", log_str); + } + + free(arg); +} + +/* + * Process ABTS received + */ +static void +nvmf_fc_adm_evnt_abts_recv(void *arg) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg; + struct spdk_nvmf_fc_abts_args *args = (struct spdk_nvmf_fc_abts_args *)api_data->api_args; + struct spdk_nvmf_fc_nport *nport = NULL; + int err = 0; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "FC ABTS received. RPI:%d, oxid:%d, rxid:%d\n", args->rpi, + args->oxid, args->rxid); + + /* + * 1. Make sure the nport port exists. + */ + nport = nvmf_fc_nport_find(args->port_handle, args->nport_handle); + if (nport == NULL) { + SPDK_ERRLOG("Unable to find the SPDK FC nport %d\n", args->nport_handle); + err = -EINVAL; + goto out; + } + + /* + * 2. If the nport is in the process of being deleted, drop the ABTS. + */ + if (nport->nport_state == SPDK_NVMF_FC_OBJECT_TO_BE_DELETED) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, + "FC ABTS dropped because the nport is being deleted; RPI:%d, oxid:%d, rxid:%d\n", + args->rpi, args->oxid, args->rxid); + err = 0; + goto out; + + } + + /* + * 3. Pass the received ABTS-LS to the library for handling. + */ + nvmf_fc_handle_abts_frame(nport, args->rpi, args->oxid, args->rxid); + +out: + if (api_data->cb_func != NULL) { + /* + * Passing pointer to the args struct as the first argument. + * The cb_func should handle this appropriately. + */ + (void)api_data->cb_func(args->port_handle, SPDK_FC_ABTS_RECV, args, err); + } else { + /* No callback set, free the args */ + free(args); + } + + free(arg); +} + +/* + * Callback function for hw port quiesce. + */ +static void +nvmf_fc_adm_hw_port_quiesce_reset_cb(void *ctx, int err) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct spdk_nvmf_fc_adm_hw_port_reset_ctx *reset_ctx = + (struct spdk_nvmf_fc_adm_hw_port_reset_ctx *)ctx; + struct spdk_nvmf_fc_hw_port_reset_args *args = reset_ctx->reset_args; + spdk_nvmf_fc_callback cb_func = reset_ctx->reset_cb_func; + struct spdk_nvmf_fc_queue_dump_info dump_info; + struct spdk_nvmf_fc_port *fc_port = NULL; + char *dump_buf = NULL; + uint32_t dump_buf_size = SPDK_FC_HW_DUMP_BUF_SIZE; + + /* + * Free the callback context struct. + */ + free(ctx); + + if (err != 0) { + SPDK_ERRLOG("Port %d quiesce operation failed.\n", args->port_handle); + goto out; + } + + if (args->dump_queues == false) { + /* + * Queues need not be dumped. + */ + goto out; + } + + SPDK_ERRLOG("Dumping queues for HW port %d\n", args->port_handle); + + /* + * Get the fc port. + */ + fc_port = nvmf_fc_port_lookup(args->port_handle); + if (fc_port == NULL) { + SPDK_ERRLOG("Unable to find the SPDK FC port %d\n", args->port_handle); + err = -EINVAL; + goto out; + } + + /* + * Allocate memory for the dump buffer. + * This memory will be freed by FCT. + */ + dump_buf = (char *)calloc(1, dump_buf_size); + if (dump_buf == NULL) { + err = -ENOMEM; + SPDK_ERRLOG("Memory allocation for dump buffer failed, SPDK FC port %d\n", args->port_handle); + goto out; + } + *args->dump_buf = (uint32_t *)dump_buf; + dump_info.buffer = dump_buf; + dump_info.offset = 0; + + /* + * Add the dump reason to the top of the buffer. + */ + nvmf_fc_dump_buf_print(&dump_info, "%s\n", args->reason); + + /* + * Dump the hwqp. + */ + nvmf_fc_dump_all_queues(&fc_port->ls_queue, fc_port->io_queues, + fc_port->num_io_queues, &dump_info); + +out: + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "HW port %d reset done, queues_dumped = %d, rc = %d.\n", + args->port_handle, args->dump_queues, err); + + if (cb_func != NULL) { + (void)cb_func(args->port_handle, SPDK_FC_HW_PORT_RESET, args->cb_ctx, err); + } +} + +/* + * HW port reset + + */ +static void +nvmf_fc_adm_evnt_hw_port_reset(void *arg) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg; + struct spdk_nvmf_fc_hw_port_reset_args *args = (struct spdk_nvmf_fc_hw_port_reset_args *) + api_data->api_args; + struct spdk_nvmf_fc_port *fc_port = NULL; + struct spdk_nvmf_fc_adm_hw_port_reset_ctx *ctx = NULL; + int err = 0; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "HW port %d dump\n", args->port_handle); + + /* + * Make sure the physical port exists. + */ + fc_port = nvmf_fc_port_lookup(args->port_handle); + if (fc_port == NULL) { + SPDK_ERRLOG("Unable to find the SPDK FC port %d\n", args->port_handle); + err = -EINVAL; + goto out; + } + + /* + * Save the reset event args and the callback in a context struct. + */ + ctx = calloc(1, sizeof(struct spdk_nvmf_fc_adm_hw_port_reset_ctx)); + + if (ctx == NULL) { + err = -ENOMEM; + SPDK_ERRLOG("Memory allocation for reset ctx failed, SPDK FC port %d\n", args->port_handle); + goto fail; + } + + ctx->reset_args = arg; + ctx->reset_cb_func = api_data->cb_func; + + /* + * Quiesce the hw port. + */ + err = nvmf_fc_adm_hw_port_quiesce(fc_port, ctx, nvmf_fc_adm_hw_port_quiesce_reset_cb); + if (err != 0) { + goto fail; + } + + /* + * Once the ports are successfully quiesced the reset processing + * will continue in the callback function: spdk_fc_port_quiesce_reset_cb + */ + return; +fail: + free(ctx); + +out: + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "HW port %d dump done, rc = %d.\n", args->port_handle, + err); + + if (api_data->cb_func != NULL) { + (void)api_data->cb_func(args->port_handle, SPDK_FC_HW_PORT_RESET, args->cb_ctx, err); + } + + free(arg); +} + +/* + * Process a link break event on a HW port. + */ +static void +nvmf_fc_adm_evnt_hw_port_link_break(void *arg) +{ + ASSERT_SPDK_FC_MASTER_THREAD(); + struct spdk_nvmf_fc_adm_api_data *api_data = (struct spdk_nvmf_fc_adm_api_data *)arg; + struct spdk_nvmf_hw_port_link_break_args *args = (struct spdk_nvmf_hw_port_link_break_args *) + api_data->api_args; + struct spdk_nvmf_fc_port *fc_port = NULL; + int err = 0; + struct spdk_nvmf_fc_adm_port_link_break_cb_data *cb_data = NULL; + struct spdk_nvmf_fc_nport *nport = NULL; + uint32_t nport_deletes_sent = 0; + uint32_t nport_deletes_skipped = 0; + struct spdk_nvmf_fc_nport_delete_args *nport_del_args = NULL; + char log_str[256]; + + /* + * Get the fc port using the port handle. + */ + fc_port = nvmf_fc_port_lookup(args->port_handle); + if (!fc_port) { + SPDK_ERRLOG("port link break: Unable to find the SPDK FC port %d\n", + args->port_handle); + err = -EINVAL; + goto out; + } + + /* + * Set the port state to offline, if it is not already. + */ + err = nvmf_fc_port_set_offline(fc_port); + if (err != 0) { + SPDK_ERRLOG("port link break: HW port %d already offline. rc = %d\n", + fc_port->port_hdl, err); + err = 0; + goto out; + } + + /* + * Delete all the nports, if any. + */ + if (!TAILQ_EMPTY(&fc_port->nport_list)) { + TAILQ_FOREACH(nport, &fc_port->nport_list, link) { + /* Skipped the nports that are not in CREATED state */ + if (nport->nport_state != SPDK_NVMF_FC_OBJECT_CREATED) { + nport_deletes_skipped++; + continue; + } + + /* Allocate memory for callback data. */ + cb_data = calloc(1, sizeof(struct spdk_nvmf_fc_adm_port_link_break_cb_data)); + if (NULL == cb_data) { + SPDK_ERRLOG("port link break: Failed to allocate memory for cb_data %d.\n", + args->port_handle); + err = -ENOMEM; + goto out; + } + cb_data->args = args; + cb_data->cb_func = api_data->cb_func; + nport_del_args = &cb_data->nport_del_args; + nport_del_args->port_handle = args->port_handle; + nport_del_args->nport_handle = nport->nport_hdl; + nport_del_args->cb_ctx = cb_data; + + nvmf_fc_master_enqueue_event(SPDK_FC_NPORT_DELETE, + (void *)nport_del_args, + nvmf_fc_adm_hw_port_link_break_cb); + + nport_deletes_sent++; + } + } + + if (nport_deletes_sent == 0 && err == 0) { + /* + * Mark the hwqps as offline and unregister the pollers. + */ + (void)nvmf_fc_adm_port_hwqp_offline_del_poller(fc_port); + } + +out: + snprintf(log_str, sizeof(log_str), + "port link break done: port:%d nport_deletes_sent:%d nport_deletes_skipped:%d rc:%d.\n", + args->port_handle, nport_deletes_sent, nport_deletes_skipped, err); + + if (err != 0) { + SPDK_ERRLOG("%s", log_str); + } else { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "%s", log_str); + } + + if ((api_data->cb_func != NULL) && (nport_deletes_sent == 0)) { + /* + * No nport_deletes are sent, which would have eventually + * called the port_link_break callback. Therefore, call the + * port_link_break callback here. + */ + (void)api_data->cb_func(args->port_handle, SPDK_FC_LINK_BREAK, args->cb_ctx, err); + } + + free(arg); +} + +static inline void +nvmf_fc_adm_run_on_master_thread(spdk_msg_fn fn, void *args) +{ + if (nvmf_fc_get_master_thread()) { + spdk_thread_send_msg(nvmf_fc_get_master_thread(), fn, args); + } +} + +/* + * Queue up an event in the SPDK masters event queue. + * Used by the FC driver to notify the SPDK master of FC related events. + */ +int +nvmf_fc_master_enqueue_event(enum spdk_fc_event event_type, void *args, + spdk_nvmf_fc_callback cb_func) +{ + int err = 0; + struct spdk_nvmf_fc_adm_api_data *api_data = NULL; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "Enqueue event %d.\n", event_type); + + if (event_type >= SPDK_FC_EVENT_MAX) { + SPDK_ERRLOG("Invalid spdk_fc_event_t %d.\n", event_type); + err = -EINVAL; + goto done; + } + + if (args == NULL) { + SPDK_ERRLOG("Null args for event %d.\n", event_type); + err = -EINVAL; + goto done; + } + + api_data = calloc(1, sizeof(*api_data)); + + if (api_data == NULL) { + SPDK_ERRLOG("Failed to alloc api data for event %d.\n", event_type); + err = -ENOMEM; + goto done; + } + + api_data->api_args = args; + api_data->cb_func = cb_func; + + switch (event_type) { + case SPDK_FC_HW_PORT_INIT: + nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_hw_port_init, + (void *)api_data); + break; + + case SPDK_FC_HW_PORT_ONLINE: + nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_hw_port_online, + (void *)api_data); + break; + + case SPDK_FC_HW_PORT_OFFLINE: + nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_hw_port_offline, + (void *)api_data); + break; + + case SPDK_FC_NPORT_CREATE: + nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_nport_create, + (void *)api_data); + break; + + case SPDK_FC_NPORT_DELETE: + nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_nport_delete, + (void *)api_data); + break; + + case SPDK_FC_IT_ADD: + nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_i_t_add, + (void *)api_data); + break; + + case SPDK_FC_IT_DELETE: + nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_i_t_delete, + (void *)api_data); + break; + + case SPDK_FC_ABTS_RECV: + nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_abts_recv, + (void *)api_data); + break; + + case SPDK_FC_LINK_BREAK: + nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_hw_port_link_break, + (void *)api_data); + break; + + case SPDK_FC_HW_PORT_RESET: + nvmf_fc_adm_run_on_master_thread(nvmf_fc_adm_evnt_hw_port_reset, + (void *)api_data); + break; + + case SPDK_FC_UNRECOVERABLE_ERR: + default: + SPDK_ERRLOG("Invalid spdk_fc_event_t: %d\n", event_type); + err = -EINVAL; + break; + } + +done: + + if (err == 0) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_ADM_API, "Enqueue event %d done successfully\n", event_type); + } else { + SPDK_ERRLOG("Enqueue event %d failed, err = %d\n", event_type, err); + if (api_data) { + free(api_data); + } + } + + return err; +} + +SPDK_NVMF_TRANSPORT_REGISTER(fc, &spdk_nvmf_transport_fc); +SPDK_LOG_REGISTER_COMPONENT("nvmf_fc_adm_api", SPDK_LOG_NVMF_FC_ADM_API); +SPDK_LOG_REGISTER_COMPONENT("nvmf_fc", SPDK_LOG_NVMF_FC) diff --git a/src/spdk/lib/nvmf/fc_ls.c b/src/spdk/lib/nvmf/fc_ls.c new file mode 100644 index 000000000..1aa06bd45 --- /dev/null +++ b/src/spdk/lib/nvmf/fc_ls.c @@ -0,0 +1,1678 @@ +/* + * BSD LICENSE + * + * Copyright (c) 2018-2019 Broadcom. All Rights Reserved. + * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/env.h" +#include "spdk/assert.h" +#include "spdk/nvmf.h" +#include "spdk/nvmf_spec.h" +#include "spdk/string.h" +#include "spdk/trace.h" +#include "spdk/util.h" +#include "spdk/endian.h" +#include "spdk_internal/log.h" +#include "nvmf_internal.h" +#include "transport.h" + +#include "nvmf_fc.h" +#include "fc_lld.h" + +/* set to 1 to send ls disconnect in response to ls disconnect from host (per standard) */ +#define NVMF_FC_LS_SEND_LS_DISCONNECT 0 + +/* Validation Error indexes into the string table below */ +enum { + VERR_NO_ERROR = 0, + VERR_CR_ASSOC_LEN = 1, + VERR_CR_ASSOC_RQST_LEN = 2, + VERR_CR_ASSOC_CMD = 3, + VERR_CR_ASSOC_CMD_LEN = 4, + VERR_ERSP_RATIO = 5, + VERR_ASSOC_ALLOC_FAIL = 6, + VERR_CONN_ALLOC_FAIL = 7, + VERR_CR_CONN_LEN = 8, + VERR_CR_CONN_RQST_LEN = 9, + VERR_ASSOC_ID = 10, + VERR_ASSOC_ID_LEN = 11, + VERR_NO_ASSOC = 12, + VERR_CONN_ID = 13, + VERR_CONN_ID_LEN = 14, + VERR_NO_CONN = 15, + VERR_CR_CONN_CMD = 16, + VERR_CR_CONN_CMD_LEN = 17, + VERR_DISCONN_LEN = 18, + VERR_DISCONN_RQST_LEN = 19, + VERR_DISCONN_CMD = 20, + VERR_DISCONN_CMD_LEN = 21, + VERR_DISCONN_SCOPE = 22, + VERR_RS_LEN = 23, + VERR_RS_RQST_LEN = 24, + VERR_RS_CMD = 25, + VERR_RS_CMD_LEN = 26, + VERR_RS_RCTL = 27, + VERR_RS_RO = 28, + VERR_CONN_TOO_MANY = 29, + VERR_SUBNQN = 30, + VERR_HOSTNQN = 31, + VERR_SQSIZE = 32, + VERR_NO_RPORT = 33, + VERR_SUBLISTENER = 34, +}; + +static char *validation_errors[] = { + "OK", + "Bad CR_ASSOC Length", + "Bad CR_ASSOC Rqst Length", + "Not CR_ASSOC Cmd", + "Bad CR_ASSOC Cmd Length", + "Bad Ersp Ratio", + "Association Allocation Failed", + "Queue Allocation Failed", + "Bad CR_CONN Length", + "Bad CR_CONN Rqst Length", + "Not Association ID", + "Bad Association ID Length", + "No Association", + "Not Connection ID", + "Bad Connection ID Length", + "No Connection", + "Not CR_CONN Cmd", + "Bad CR_CONN Cmd Length", + "Bad DISCONN Length", + "Bad DISCONN Rqst Length", + "Not DISCONN Cmd", + "Bad DISCONN Cmd Length", + "Bad Disconnect Scope", + "Bad RS Length", + "Bad RS Rqst Length", + "Not RS Cmd", + "Bad RS Cmd Length", + "Bad RS R_CTL", + "Bad RS Relative Offset", + "Too many connections for association", + "Invalid subnqn or subsystem not found", + "Invalid hostnqn or subsystem doesn't allow host", + "SQ size = 0 or too big", + "No Remote Port", + "Bad Subsystem Port", +}; + +static inline void +nvmf_fc_add_assoc_to_tgt_port(struct spdk_nvmf_fc_nport *tgtport, + struct spdk_nvmf_fc_association *assoc, + struct spdk_nvmf_fc_remote_port_info *rport); + +static inline FCNVME_BE32 cpu_to_be32(uint32_t in) +{ + uint32_t t; + + to_be32(&t, in); + return (FCNVME_BE32)t; +} + +static inline FCNVME_BE32 nvmf_fc_lsdesc_len(size_t sz) +{ + uint32_t t; + + to_be32(&t, sz - (2 * sizeof(uint32_t))); + return (FCNVME_BE32)t; +} + +static void +nvmf_fc_ls_format_rsp_hdr(void *buf, uint8_t ls_cmd, uint32_t desc_len, + uint8_t rqst_ls_cmd) +{ + struct spdk_nvmf_fc_ls_acc_hdr *acc_hdr = buf; + + acc_hdr->w0.ls_cmd = ls_cmd; + acc_hdr->desc_list_len = desc_len; + to_be32(&acc_hdr->rqst.desc_tag, FCNVME_LSDESC_RQST); + acc_hdr->rqst.desc_len = + nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_lsdesc_rqst)); + acc_hdr->rqst.w0.ls_cmd = rqst_ls_cmd; +} + +static int +nvmf_fc_ls_format_rjt(void *buf, uint16_t buflen, uint8_t ls_cmd, + uint8_t reason, uint8_t explanation, uint8_t vendor) +{ + struct spdk_nvmf_fc_ls_rjt *rjt = buf; + + bzero(buf, sizeof(struct spdk_nvmf_fc_ls_rjt)); + nvmf_fc_ls_format_rsp_hdr(buf, FCNVME_LSDESC_RQST, + nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_ls_rjt)), + ls_cmd); + to_be32(&rjt->rjt.desc_tag, FCNVME_LSDESC_RJT); + rjt->rjt.desc_len = nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_lsdesc_rjt)); + rjt->rjt.reason_code = reason; + rjt->rjt.reason_explanation = explanation; + rjt->rjt.vendor = vendor; + + return sizeof(struct spdk_nvmf_fc_ls_rjt); +} + +/* ************************************************** */ +/* Allocators/Deallocators (assocations, connections, */ +/* poller API data) */ + +static inline void +nvmf_fc_ls_free_association(struct spdk_nvmf_fc_association *assoc) +{ + struct spdk_nvmf_fc_conn *fc_conn; + + /* return the q slots of the conns for the association */ + TAILQ_FOREACH(fc_conn, &assoc->avail_fc_conns, assoc_avail_link) { + if (fc_conn->conn_id != NVMF_FC_INVALID_CONN_ID) { + nvmf_fc_release_conn(fc_conn->hwqp, fc_conn->conn_id, + fc_conn->max_queue_depth); + } + } + + /* free assocation's send disconnect buffer */ + if (assoc->snd_disconn_bufs) { + nvmf_fc_free_srsr_bufs(assoc->snd_disconn_bufs); + } + + /* free assocation's connections */ + free(assoc->conns_buf); + + /* free the association */ + free(assoc); +} + +static int +nvmf_fc_ls_alloc_connections(struct spdk_nvmf_fc_association *assoc, + struct spdk_nvmf_transport *nvmf_transport) +{ + uint32_t i; + struct spdk_nvmf_fc_conn *fc_conn; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Pre-alloc %d qpairs for host NQN %s\n", + nvmf_transport->opts.max_qpairs_per_ctrlr, assoc->host_nqn); + + /* allocate memory for all connections at once */ + assoc->conns_buf = calloc(nvmf_transport->opts.max_qpairs_per_ctrlr + 1, + sizeof(struct spdk_nvmf_fc_conn)); + if (assoc->conns_buf == NULL) { + SPDK_ERRLOG("Out of memory for connections for new association\n"); + return -ENOMEM; + } + + for (i = 0; i < nvmf_transport->opts.max_qpairs_per_ctrlr; i++) { + fc_conn = assoc->conns_buf + (i * sizeof(struct spdk_nvmf_fc_conn)); + fc_conn->conn_id = NVMF_FC_INVALID_CONN_ID; + fc_conn->qpair.state = SPDK_NVMF_QPAIR_UNINITIALIZED; + fc_conn->qpair.transport = nvmf_transport; + + TAILQ_INSERT_TAIL(&assoc->avail_fc_conns, fc_conn, assoc_avail_link); + } + + return 0; +} + +static struct spdk_nvmf_fc_association * +nvmf_fc_ls_new_association(uint32_t s_id, + struct spdk_nvmf_fc_nport *tgtport, + struct spdk_nvmf_fc_remote_port_info *rport, + struct spdk_nvmf_fc_lsdesc_cr_assoc_cmd *a_cmd, + struct spdk_nvmf_subsystem *subsys, + uint16_t rpi, + struct spdk_nvmf_transport *nvmf_transport) +{ + struct spdk_nvmf_fc_association *assoc; + int rc; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, + "New Association request for port %d nport %d rpi 0x%x\n", + tgtport->fc_port->port_hdl, tgtport->nport_hdl, rpi); + + assert(rport); + if (!rport) { + SPDK_ERRLOG("rport is null.\n"); + return NULL; + } + + assoc = calloc(1, sizeof(struct spdk_nvmf_fc_association)); + if (!assoc) { + SPDK_ERRLOG("unable to allocate memory for new association\n"); + return NULL; + } + + /* initialize association */ +#if (NVMF_FC_LS_SEND_LS_DISCONNECT == 1) + /* allocate buffers to send LS disconnect command to host */ + assoc->snd_disconn_bufs = + nvmf_fc_alloc_srsr_bufs(sizeof(struct spdk_nvmf_fc_ls_disconnect_rqst), + sizeof(struct spdk_nvmf_fc_ls_rjt)); + if (!assoc->snd_disconn_bufs) { + SPDK_ERRLOG("no dma memory for association's ls disconnect bufs\n"); + free(assoc); + return NULL; + } + + assoc->snd_disconn_bufs->rpi = rpi; +#endif + assoc->s_id = s_id; + assoc->tgtport = tgtport; + assoc->rport = rport; + assoc->subsystem = subsys; + assoc->assoc_state = SPDK_NVMF_FC_OBJECT_CREATED; + memcpy(assoc->host_id, a_cmd->hostid, FCNVME_ASSOC_HOSTID_LEN); + memcpy(assoc->host_nqn, a_cmd->hostnqn, SPDK_NVME_NQN_FIELD_SIZE); + memcpy(assoc->sub_nqn, a_cmd->subnqn, SPDK_NVME_NQN_FIELD_SIZE); + TAILQ_INIT(&assoc->fc_conns); + TAILQ_INIT(&assoc->avail_fc_conns); + assoc->ls_del_op_ctx = NULL; + + /* allocate and assign connections for association */ + rc = nvmf_fc_ls_alloc_connections(assoc, nvmf_transport); + if (rc != 0) { + nvmf_fc_ls_free_association(assoc); + return NULL; + } + + /* add association to target port's association list */ + nvmf_fc_add_assoc_to_tgt_port(tgtport, assoc, rport); + return assoc; +} + +static inline void +nvmf_fc_ls_append_del_cb_ctx(struct spdk_nvmf_fc_association *assoc, + struct nvmf_fc_ls_op_ctx *opd) +{ + /* append to delete assoc callback list */ + if (!assoc->ls_del_op_ctx) { + assoc->ls_del_op_ctx = (void *)opd; + } else { + struct nvmf_fc_ls_op_ctx *nxt = + (struct nvmf_fc_ls_op_ctx *) assoc->ls_del_op_ctx; + while (nxt->next_op_ctx) { + nxt = nxt->next_op_ctx; + } + nxt->next_op_ctx = opd; + } +} + +static struct spdk_nvmf_fc_conn * +nvmf_fc_ls_new_connection(struct spdk_nvmf_fc_association *assoc, uint16_t qid, + uint16_t esrp_ratio, uint16_t rpi, uint16_t sq_size, + struct spdk_nvmf_fc_nport *tgtport) +{ + struct spdk_nvmf_fc_conn *fc_conn; + + fc_conn = TAILQ_FIRST(&assoc->avail_fc_conns); + if (!fc_conn) { + SPDK_ERRLOG("out of connections for association %p\n", assoc); + return NULL; + } + + /* Remove from avail list and add to in use. */ + TAILQ_REMOVE(&assoc->avail_fc_conns, fc_conn, assoc_avail_link); + TAILQ_INSERT_TAIL(&assoc->fc_conns, fc_conn, assoc_link); + + if (qid == 0) { + /* AdminQ connection. */ + assoc->aq_conn = fc_conn; + } + + fc_conn->qpair.qid = qid; + fc_conn->qpair.sq_head_max = sq_size; + TAILQ_INIT(&fc_conn->qpair.outstanding); + fc_conn->esrp_ratio = esrp_ratio; + fc_conn->fc_assoc = assoc; + fc_conn->rpi = rpi; + fc_conn->max_queue_depth = sq_size + 1; + + /* save target port trid in connection (for subsystem + * listener validation in fabric connect command) + */ + nvmf_fc_create_trid(&fc_conn->trid, tgtport->fc_nodename.u.wwn, + tgtport->fc_portname.u.wwn); + + return fc_conn; +} + +static inline void +nvmf_fc_ls_free_connection(struct spdk_nvmf_fc_conn *fc_conn) +{ + TAILQ_INSERT_TAIL(&fc_conn->fc_assoc->avail_fc_conns, fc_conn, assoc_avail_link); +} + +/* End - Allocators/Deallocators (assocations, connections, */ +/* poller API data) */ +/* ******************************************************** */ + +static inline struct spdk_nvmf_fc_association * +nvmf_fc_ls_find_assoc(struct spdk_nvmf_fc_nport *tgtport, uint64_t assoc_id) +{ + struct spdk_nvmf_fc_association *assoc = NULL; + + TAILQ_FOREACH(assoc, &tgtport->fc_associations, link) { + if (assoc->assoc_id == assoc_id) { + if (assoc->assoc_state == SPDK_NVMF_FC_OBJECT_ZOMBIE) { + assoc = NULL; + } + break; + } + } + return assoc; +} + +static inline void +nvmf_fc_add_assoc_to_tgt_port(struct spdk_nvmf_fc_nport *tgtport, + struct spdk_nvmf_fc_association *assoc, + struct spdk_nvmf_fc_remote_port_info *rport) +{ + TAILQ_INSERT_TAIL(&tgtport->fc_associations, assoc, link); + tgtport->assoc_count++; + rport->assoc_count++; +} + +static inline void +nvmf_fc_del_assoc_from_tgt_port(struct spdk_nvmf_fc_association *assoc) +{ + struct spdk_nvmf_fc_nport *tgtport = assoc->tgtport; + + TAILQ_REMOVE(&tgtport->fc_associations, assoc, link); + tgtport->assoc_count--; + assoc->rport->assoc_count--; +} + +static void +nvmf_fc_ls_rsp_fail_del_conn_cb(void *cb_data, enum spdk_nvmf_fc_poller_api_ret ret) +{ + struct nvmf_fc_ls_op_ctx *opd = + (struct nvmf_fc_ls_op_ctx *)cb_data; + struct spdk_nvmf_fc_ls_del_conn_api_data *dp = &opd->u.del_conn; + struct spdk_nvmf_fc_association *assoc = dp->assoc; + struct spdk_nvmf_fc_conn *fc_conn = dp->args.fc_conn; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Delete Connection callback " + "for assoc_id 0x%lx conn_id 0x%lx\n", assoc->assoc_id, + fc_conn->conn_id); + + if (dp->aq_conn) { + /* delete association */ + nvmf_fc_del_assoc_from_tgt_port(assoc); + nvmf_fc_ls_free_association(assoc); + } else { + /* remove connection from association's connection list */ + TAILQ_REMOVE(&assoc->fc_conns, fc_conn, assoc_link); + nvmf_fc_ls_free_connection(fc_conn); + } + + free(opd); +} + +static void +nvmf_fc_handle_xmt_ls_rsp_failure(struct spdk_nvmf_fc_association *assoc, + struct spdk_nvmf_fc_conn *fc_conn, + bool aq_conn) +{ + struct spdk_nvmf_fc_ls_del_conn_api_data *api_data; + struct nvmf_fc_ls_op_ctx *opd = NULL; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Transmit LS response failure " + "for assoc_id 0x%lx conn_id 0x%lx\n", assoc->assoc_id, + fc_conn->conn_id); + + + /* create context for delete connection API */ + opd = calloc(1, sizeof(struct nvmf_fc_ls_op_ctx)); + if (!opd) { /* hopefully this doesn't happen - if so, we leak the connection */ + SPDK_ERRLOG("Mem alloc failed for del conn op data"); + return; + } + + api_data = &opd->u.del_conn; + api_data->assoc = assoc; + api_data->ls_rqst = NULL; + api_data->aq_conn = aq_conn; + api_data->args.fc_conn = fc_conn; + api_data->args.send_abts = false; + api_data->args.hwqp = fc_conn->hwqp; + api_data->args.cb_info.cb_thread = spdk_get_thread(); + api_data->args.cb_info.cb_func = nvmf_fc_ls_rsp_fail_del_conn_cb; + api_data->args.cb_info.cb_data = opd; + + nvmf_fc_poller_api_func(api_data->args.hwqp, + SPDK_NVMF_FC_POLLER_API_DEL_CONNECTION, + &api_data->args); +} + +/* callback from poller's ADD_Connection event */ +static void +nvmf_fc_ls_add_conn_cb(void *cb_data, enum spdk_nvmf_fc_poller_api_ret ret) +{ + struct nvmf_fc_ls_op_ctx *opd = + (struct nvmf_fc_ls_op_ctx *)cb_data; + struct spdk_nvmf_fc_ls_add_conn_api_data *dp = &opd->u.add_conn; + struct spdk_nvmf_fc_association *assoc = dp->assoc; + struct spdk_nvmf_fc_nport *tgtport = assoc->tgtport; + struct spdk_nvmf_fc_conn *fc_conn = dp->args.fc_conn; + struct spdk_nvmf_fc_ls_rqst *ls_rqst = dp->ls_rqst; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, + "add_conn_cb: assoc_id = 0x%lx, conn_id = 0x%lx\n", + assoc->assoc_id, fc_conn->conn_id); + + fc_conn->create_opd = NULL; + + if (assoc->assoc_state == SPDK_NVMF_FC_OBJECT_TO_BE_DELETED) { + /* association is already being deleted - don't continue */ + free(opd); + return; + } + + if (dp->aq_conn) { + struct spdk_nvmf_fc_ls_cr_assoc_acc *assoc_acc = + (struct spdk_nvmf_fc_ls_cr_assoc_acc *)ls_rqst->rspbuf.virt; + /* put connection and association ID in response */ + to_be64(&assoc_acc->conn_id.connection_id, fc_conn->conn_id); + assoc_acc->assoc_id.association_id = assoc_acc->conn_id.connection_id; + } else { + struct spdk_nvmf_fc_ls_cr_conn_acc *conn_acc = + (struct spdk_nvmf_fc_ls_cr_conn_acc *)ls_rqst->rspbuf.virt; + /* put connection ID in response */ + to_be64(&conn_acc->conn_id.connection_id, fc_conn->conn_id); + } + + /* send LS response */ + if (nvmf_fc_xmt_ls_rsp(tgtport, ls_rqst) != 0) { + SPDK_ERRLOG("Send LS response for %s failed - cleaning up\n", + dp->aq_conn ? "association" : "connection"); + nvmf_fc_handle_xmt_ls_rsp_failure(assoc, fc_conn, + dp->aq_conn); + } else { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, + "LS response (conn_id 0x%lx) sent\n", fc_conn->conn_id); + } + + free(opd); +} + +void +nvmf_fc_ls_add_conn_failure( + struct spdk_nvmf_fc_association *assoc, + struct spdk_nvmf_fc_ls_rqst *ls_rqst, + struct spdk_nvmf_fc_conn *fc_conn, + bool aq_conn) +{ + struct spdk_nvmf_fc_ls_cr_assoc_rqst *rqst; + struct spdk_nvmf_fc_ls_cr_assoc_acc *acc; + struct spdk_nvmf_fc_nport *tgtport = assoc->tgtport; + + if (fc_conn->create_opd) { + free(fc_conn->create_opd); + fc_conn->create_opd = NULL; + } + + rqst = (struct spdk_nvmf_fc_ls_cr_assoc_rqst *)ls_rqst->rqstbuf.virt; + acc = (struct spdk_nvmf_fc_ls_cr_assoc_acc *)ls_rqst->rspbuf.virt; + + /* send failure response */ + ls_rqst->rsp_len = nvmf_fc_ls_format_rjt(acc, + FCNVME_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, + FCNVME_RJT_RC_INSUFF_RES, + FCNVME_RJT_EXP_NONE, 0); + + nvmf_fc_ls_free_connection(fc_conn); + if (aq_conn) { + nvmf_fc_del_assoc_from_tgt_port(assoc); + nvmf_fc_ls_free_association(assoc); + } + + nvmf_fc_xmt_ls_rsp(tgtport, ls_rqst); +} + + +static void +nvmf_fc_ls_add_conn_to_poller( + struct spdk_nvmf_fc_association *assoc, + struct spdk_nvmf_fc_ls_rqst *ls_rqst, + struct spdk_nvmf_fc_conn *fc_conn, + bool aq_conn) +{ + struct nvmf_fc_ls_op_ctx *opd; + struct spdk_nvmf_fc_ls_add_conn_api_data *api_data; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Add Connection to poller for " + "assoc_id 0x%lx conn_id 0x%lx\n", assoc->assoc_id, + fc_conn->conn_id); + + opd = calloc(1, sizeof(struct nvmf_fc_ls_op_ctx)); + if (!opd) { + SPDK_ERRLOG("allocate api data for add conn op failed\n"); + nvmf_fc_ls_add_conn_failure(assoc, ls_rqst, fc_conn, aq_conn); + return; + } + + /* insert conn in association's connection list */ + api_data = &opd->u.add_conn; + assoc->conn_count++; + + api_data->args.fc_conn = fc_conn; + api_data->args.cb_info.cb_thread = spdk_get_thread(); + api_data->args.cb_info.cb_func = nvmf_fc_ls_add_conn_cb; + api_data->args.cb_info.cb_data = (void *)opd; + api_data->assoc = assoc; + api_data->ls_rqst = ls_rqst; + api_data->aq_conn = aq_conn; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, + "New QP callback called.\n"); + + /* Let the nvmf_tgt decide which pollgroup to use. */ + fc_conn->create_opd = opd; + spdk_nvmf_tgt_new_qpair(ls_rqst->nvmf_tgt, &fc_conn->qpair); +} + +/* Delete association functions */ + +static void +nvmf_fc_do_del_assoc_cbs(struct nvmf_fc_ls_op_ctx *opd, int ret) +{ + struct nvmf_fc_ls_op_ctx *nxt; + struct spdk_nvmf_fc_delete_assoc_api_data *dp; + + while (opd) { + dp = &opd->u.del_assoc; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "performing delete assoc. callback\n"); + dp->del_assoc_cb(dp->del_assoc_cb_data, ret); + + nxt = opd->next_op_ctx; + free(opd); + opd = nxt; + } +} + +static void +nvmf_fs_send_ls_disconnect_cb(void *hwqp, int32_t status, void *args) +{ + if (args) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "free disconnect buffers\n"); + nvmf_fc_free_srsr_bufs((struct spdk_nvmf_fc_srsr_bufs *)args); + } +} + +static void +nvmf_fc_del_all_conns_cb(void *cb_data, enum spdk_nvmf_fc_poller_api_ret ret) +{ + struct nvmf_fc_ls_op_ctx *opd = (struct nvmf_fc_ls_op_ctx *)cb_data; + struct spdk_nvmf_fc_delete_assoc_api_data *dp = &opd->u.del_assoc; + struct spdk_nvmf_fc_association *assoc = dp->assoc; + struct spdk_nvmf_fc_conn *fc_conn = dp->args.fc_conn; + + /* Assumption here is that there will be no error (i.e. ret=success). + * Since connections are deleted in parallel, nothing can be + * done anyway if there is an error because we need to complete + * all connection deletes and callback to caller */ + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, + "Delete all connections for assoc_id 0x%lx, conn_id = %lx\n", + assoc->assoc_id, fc_conn->conn_id); + + /* remove connection from association's connection list */ + TAILQ_REMOVE(&assoc->fc_conns, fc_conn, assoc_link); + nvmf_fc_ls_free_connection(fc_conn); + + if (--assoc->conn_count == 0) { + /* last connection - remove association from target port's association list */ + struct nvmf_fc_ls_op_ctx *cb_opd = (struct nvmf_fc_ls_op_ctx *)assoc->ls_del_op_ctx; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, + "remove assoc. %lx\n", assoc->assoc_id); + nvmf_fc_del_assoc_from_tgt_port(assoc); + + if (assoc->snd_disconn_bufs && + assoc->tgtport->fc_port->hw_port_status == SPDK_FC_PORT_ONLINE) { + + struct spdk_nvmf_fc_ls_disconnect_rqst *dc_rqst; + struct spdk_nvmf_fc_srsr_bufs *srsr_bufs; + + dc_rqst = (struct spdk_nvmf_fc_ls_disconnect_rqst *) + assoc->snd_disconn_bufs->rqst; + + bzero(dc_rqst, sizeof(struct spdk_nvmf_fc_ls_disconnect_rqst)); + + /* fill in request descriptor */ + dc_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT; + to_be32(&dc_rqst->desc_list_len, + sizeof(struct spdk_nvmf_fc_ls_disconnect_rqst) - + (2 * sizeof(uint32_t))); + + /* fill in disconnect command descriptor */ + to_be32(&dc_rqst->disconn_cmd.desc_tag, FCNVME_LSDESC_DISCONN_CMD); + to_be32(&dc_rqst->disconn_cmd.desc_len, + sizeof(struct spdk_nvmf_fc_lsdesc_disconn_cmd) - + (2 * sizeof(uint32_t))); + + /* fill in association id descriptor */ + to_be32(&dc_rqst->assoc_id.desc_tag, FCNVME_LSDESC_ASSOC_ID), + to_be32(&dc_rqst->assoc_id.desc_len, + sizeof(struct spdk_nvmf_fc_lsdesc_assoc_id) - + (2 * sizeof(uint32_t))); + to_be64(&dc_rqst->assoc_id.association_id, assoc->assoc_id); + + srsr_bufs = assoc->snd_disconn_bufs; + assoc->snd_disconn_bufs = NULL; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Send LS disconnect\n"); + if (nvmf_fc_xmt_srsr_req(&assoc->tgtport->fc_port->ls_queue, + srsr_bufs, nvmf_fs_send_ls_disconnect_cb, + (void *)srsr_bufs)) { + SPDK_ERRLOG("Error sending LS disconnect\n"); + assoc->snd_disconn_bufs = srsr_bufs; + } + } + + nvmf_fc_ls_free_association(assoc); + + /* perform callbacks to all callers to delete association */ + nvmf_fc_do_del_assoc_cbs(cb_opd, 0); + + } + + free(opd); +} + +static void +nvmf_fc_kill_io_del_all_conns_cb(void *cb_data, enum spdk_nvmf_fc_poller_api_ret ret) +{ + struct nvmf_fc_ls_op_ctx *opd = (struct nvmf_fc_ls_op_ctx *)cb_data; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Callback after killing outstanding ABTS."); + /* + * NOTE: We should not access any connection or association related data + * structures here. + */ + free(opd); +} + + +/* Disconnect/delete (association) request functions */ + +static int +_nvmf_fc_delete_association(struct spdk_nvmf_fc_nport *tgtport, + uint64_t assoc_id, bool send_abts, bool backend_initiated, + spdk_nvmf_fc_del_assoc_cb del_assoc_cb, + void *cb_data, bool from_ls_rqst) +{ + + struct nvmf_fc_ls_op_ctx *opd, *opd_tail, *opd_head = NULL; + struct spdk_nvmf_fc_delete_assoc_api_data *api_data; + struct spdk_nvmf_fc_conn *fc_conn; + struct spdk_nvmf_fc_association *assoc = + nvmf_fc_ls_find_assoc(tgtport, assoc_id); + struct spdk_nvmf_fc_port *fc_port = tgtport->fc_port; + enum spdk_nvmf_fc_object_state assoc_state; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Delete association, " + "assoc_id 0x%lx\n", assoc_id); + + if (!assoc) { + SPDK_ERRLOG("Delete association failed: %s\n", + validation_errors[VERR_NO_ASSOC]); + return VERR_NO_ASSOC; + } + + /* create cb context to put in association's list of + * callbacks to call when delete association is done */ + opd = calloc(1, sizeof(struct nvmf_fc_ls_op_ctx)); + if (!opd) { + SPDK_ERRLOG("Mem alloc failed for del assoc cb data"); + return -ENOMEM; + } + + api_data = &opd->u.del_assoc; + api_data->assoc = assoc; + api_data->from_ls_rqst = from_ls_rqst; + api_data->del_assoc_cb = del_assoc_cb; + api_data->del_assoc_cb_data = cb_data; + api_data->args.cb_info.cb_data = opd; + nvmf_fc_ls_append_del_cb_ctx(assoc, opd); + + assoc_state = assoc->assoc_state; + if ((assoc_state == SPDK_NVMF_FC_OBJECT_TO_BE_DELETED) && + (fc_port->hw_port_status != SPDK_FC_PORT_QUIESCED)) { + /* association already being deleted */ + return 0; + } + + /* mark assoc. to be deleted */ + assoc->assoc_state = SPDK_NVMF_FC_OBJECT_TO_BE_DELETED; + + /* create a list of all connection to delete */ + TAILQ_FOREACH(fc_conn, &assoc->fc_conns, assoc_link) { + opd = calloc(1, sizeof(struct nvmf_fc_ls_op_ctx)); + if (!opd) { /* hopefully this doesn't happen */ + SPDK_ERRLOG("Mem alloc failed for del conn op data"); + while (opd_head) { /* free any contexts already allocated */ + opd = opd_head; + opd_head = opd->next_op_ctx; + free(opd); + } + return -ENOMEM; + } + + api_data = &opd->u.del_assoc; + api_data->args.fc_conn = fc_conn; + api_data->assoc = assoc; + api_data->args.send_abts = send_abts; + api_data->args.backend_initiated = backend_initiated; + api_data->args.hwqp = nvmf_fc_get_hwqp_from_conn_id( + assoc->tgtport->fc_port->io_queues, + assoc->tgtport->fc_port->num_io_queues, + fc_conn->conn_id); + api_data->args.cb_info.cb_thread = spdk_get_thread(); + if ((fc_port->hw_port_status == SPDK_FC_PORT_QUIESCED) && + (assoc_state == SPDK_NVMF_FC_OBJECT_TO_BE_DELETED)) { + /* + * If there are any connections deletes or IO abts that are + * stuck because of firmware reset, a second invocation of + * SPDK_NVMF_FC_POLLER_API_DEL_CONNECTION will result in + * outstanding connections & requests being killed and + * their corresponding callbacks being executed. + */ + api_data->args.cb_info.cb_func = nvmf_fc_kill_io_del_all_conns_cb; + } else { + api_data->args.cb_info.cb_func = nvmf_fc_del_all_conns_cb; + } + api_data->args.cb_info.cb_data = opd; + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, + "conn_id = %lx\n", fc_conn->conn_id); + + if (!opd_head) { + opd_head = opd; + } else { + opd_tail->next_op_ctx = opd; + } + opd_tail = opd; + } + + /* make poller api calls to delete connetions */ + while (opd_head) { + opd = opd_head; + opd_head = opd->next_op_ctx; + api_data = &opd->u.del_assoc; + nvmf_fc_poller_api_func(api_data->args.hwqp, + SPDK_NVMF_FC_POLLER_API_DEL_CONNECTION, + &api_data->args); + } + + return 0; +} + +static void +nvmf_fc_ls_disconnect_assoc_cb(void *cb_data, uint32_t err) +{ + struct nvmf_fc_ls_op_ctx *opd = (struct nvmf_fc_ls_op_ctx *)cb_data; + struct spdk_nvmf_fc_ls_disconn_assoc_api_data *dp = &opd->u.disconn_assoc; + struct spdk_nvmf_fc_nport *tgtport = dp->tgtport; + struct spdk_nvmf_fc_ls_rqst *ls_rqst = dp->ls_rqst; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Disconnect association callback begin " + "nport %d\n", tgtport->nport_hdl); + if (err != 0) { + /* send failure response */ + struct spdk_nvmf_fc_ls_cr_assoc_rqst *rqst = + (struct spdk_nvmf_fc_ls_cr_assoc_rqst *)ls_rqst->rqstbuf.virt; + struct spdk_nvmf_fc_ls_cr_assoc_acc *acc = + (struct spdk_nvmf_fc_ls_cr_assoc_acc *)ls_rqst->rspbuf.virt; + ls_rqst->rsp_len = nvmf_fc_ls_format_rjt(acc, + FCNVME_MAX_LS_BUFFER_SIZE, + rqst->w0.ls_cmd, + FCNVME_RJT_RC_UNAB, + FCNVME_RJT_EXP_NONE, + 0); + } + + nvmf_fc_xmt_ls_rsp(tgtport, ls_rqst); + + free(opd); + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Disconnect association callback complete " + "nport %d err %d\n", tgtport->nport_hdl, err); +} + +static void +nvmf_fc_ls_disconnect_assoc(struct spdk_nvmf_fc_nport *tgtport, + struct spdk_nvmf_fc_ls_rqst *ls_rqst, uint64_t assoc_id) +{ + struct nvmf_fc_ls_op_ctx *opd; + struct spdk_nvmf_fc_ls_cr_assoc_rqst *rqst = + (struct spdk_nvmf_fc_ls_cr_assoc_rqst *)ls_rqst->rqstbuf.virt; + struct spdk_nvmf_fc_ls_cr_assoc_acc *acc = + (struct spdk_nvmf_fc_ls_cr_assoc_acc *)ls_rqst->rspbuf.virt; + struct spdk_nvmf_fc_ls_disconn_assoc_api_data *api_data; + int ret; + uint8_t reason = 0; + + opd = calloc(1, sizeof(struct nvmf_fc_ls_op_ctx)); + if (!opd) { + /* send failure response */ + SPDK_ERRLOG("Allocate disconn assoc op data failed\n"); + reason = FCNVME_RJT_RC_INSUFF_RES; + goto send_rjt; + } + + api_data = &opd->u.disconn_assoc; + api_data->tgtport = tgtport; + api_data->ls_rqst = ls_rqst; + ret = _nvmf_fc_delete_association(tgtport, assoc_id, + false, false, + nvmf_fc_ls_disconnect_assoc_cb, + api_data, true); + if (!ret) { + return; + } + + /* delete association failed */ + switch (ret) { + case VERR_NO_ASSOC: + reason = FCNVME_RJT_RC_INV_ASSOC; + break; + case -ENOMEM: + reason = FCNVME_RJT_RC_INSUFF_RES; + break; + default: + reason = FCNVME_RJT_RC_LOGIC; + } + + free(opd); + +send_rjt: + ls_rqst->rsp_len = nvmf_fc_ls_format_rjt(acc, + FCNVME_MAX_LS_BUFFER_SIZE, + rqst->w0.ls_cmd, reason, + FCNVME_RJT_EXP_NONE, 0); + nvmf_fc_xmt_ls_rsp(tgtport, ls_rqst); +} + +static int +nvmf_fc_ls_validate_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn) +{ + + if (!spdk_nvmf_subsystem_host_allowed(subsystem, hostnqn)) { + return -EPERM; + } + + return 0; +} + +/* **************************** */ +/* LS Reqeust Handler Functions */ + +static void +nvmf_fc_ls_process_cass(uint32_t s_id, + struct spdk_nvmf_fc_nport *tgtport, + struct spdk_nvmf_fc_ls_rqst *ls_rqst) +{ + struct spdk_nvmf_fc_ls_cr_assoc_rqst *rqst = + (struct spdk_nvmf_fc_ls_cr_assoc_rqst *)ls_rqst->rqstbuf.virt; + struct spdk_nvmf_fc_ls_cr_assoc_acc *acc = + (struct spdk_nvmf_fc_ls_cr_assoc_acc *)ls_rqst->rspbuf.virt; + struct spdk_nvmf_fc_association *assoc; + struct spdk_nvmf_fc_conn *fc_conn; + struct spdk_nvmf_subsystem *subsystem = NULL; + const char *hostnqn = (const char *)rqst->assoc_cmd.hostnqn; + int errmsg_ind = 0; + uint8_t rc = FCNVME_RJT_RC_NONE; + uint8_t ec = FCNVME_RJT_EXP_NONE; + struct spdk_nvmf_transport *transport = spdk_nvmf_tgt_get_transport(ls_rqst->nvmf_tgt, + SPDK_NVME_TRANSPORT_NAME_FC); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, + "LS_CASS: ls_rqst_len=%d, desc_list_len=%d, cmd_len=%d, sq_size=%d, " + "Subnqn: %s, Hostnqn: %s, Tgtport nn:%lx, pn:%lx\n", + ls_rqst->rqst_len, from_be32(&rqst->desc_list_len), + from_be32(&rqst->assoc_cmd.desc_len), + from_be32(&rqst->assoc_cmd.sqsize), + rqst->assoc_cmd.subnqn, hostnqn, + tgtport->fc_nodename.u.wwn, tgtport->fc_portname.u.wwn); + + if (ls_rqst->rqst_len < FCNVME_LS_CA_CMD_MIN_LEN) { + SPDK_ERRLOG("assoc_cmd req len = %d, should be at least %d\n", + ls_rqst->rqst_len, FCNVME_LS_CA_CMD_MIN_LEN); + errmsg_ind = VERR_CR_ASSOC_LEN; + rc = FCNVME_RJT_RC_INV_PARAM; + ec = FCNVME_RJT_EXP_INV_LEN; + } else if (from_be32(&rqst->desc_list_len) < + FCNVME_LS_CA_DESC_LIST_MIN_LEN) { + SPDK_ERRLOG("assoc_cmd desc list len = %d, should be at least %d\n", + from_be32(&rqst->desc_list_len), + FCNVME_LS_CA_DESC_LIST_MIN_LEN); + errmsg_ind = VERR_CR_ASSOC_RQST_LEN; + rc = FCNVME_RJT_RC_INV_PARAM; + ec = FCNVME_RJT_EXP_INV_LEN; + } else if (rqst->assoc_cmd.desc_tag != + cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD)) { + errmsg_ind = VERR_CR_ASSOC_CMD; + rc = FCNVME_RJT_RC_INV_PARAM; + } else if (from_be32(&rqst->assoc_cmd.desc_len) < + FCNVME_LS_CA_DESC_MIN_LEN) { + SPDK_ERRLOG("assoc_cmd desc len = %d, should be at least %d\n", + from_be32(&rqst->assoc_cmd.desc_len), + FCNVME_LS_CA_DESC_MIN_LEN); + errmsg_ind = VERR_CR_ASSOC_CMD_LEN; + rc = FCNVME_RJT_RC_INV_PARAM; + ec = FCNVME_RJT_EXP_INV_LEN; + } else if (!rqst->assoc_cmd.ersp_ratio || + (from_be16(&rqst->assoc_cmd.ersp_ratio) >= + from_be16(&rqst->assoc_cmd.sqsize))) { + errmsg_ind = VERR_ERSP_RATIO; + rc = FCNVME_RJT_RC_INV_PARAM; + ec = FCNVME_RJT_EXP_INV_ESRP; + } else if (from_be16(&rqst->assoc_cmd.sqsize) == 0 || + from_be16(&rqst->assoc_cmd.sqsize) > transport->opts.max_aq_depth) { + errmsg_ind = VERR_SQSIZE; + rc = FCNVME_RJT_RC_INV_PARAM; + ec = FCNVME_RJT_EXP_SQ_SIZE; + } + + if (rc != FCNVME_RJT_RC_NONE) { + goto rjt_cass; + } + + subsystem = spdk_nvmf_tgt_find_subsystem(ls_rqst->nvmf_tgt, rqst->assoc_cmd.subnqn); + if (subsystem == NULL) { + errmsg_ind = VERR_SUBNQN; + rc = FCNVME_RJT_RC_INV_PARAM; + ec = FCNVME_RJT_EXP_INV_SUBNQN; + goto rjt_cass; + } + + if (nvmf_fc_ls_validate_host(subsystem, hostnqn)) { + errmsg_ind = VERR_HOSTNQN; + rc = FCNVME_RJT_RC_INV_HOST; + ec = FCNVME_RJT_EXP_INV_HOSTNQN; + goto rjt_cass; + } + + /* get new association */ + assoc = nvmf_fc_ls_new_association(s_id, tgtport, ls_rqst->rport, + &rqst->assoc_cmd, subsystem, + ls_rqst->rpi, transport); + if (!assoc) { + errmsg_ind = VERR_ASSOC_ALLOC_FAIL; + rc = FCNVME_RJT_RC_INSUFF_RES; + ec = FCNVME_RJT_EXP_NONE; + goto rjt_cass; + } + + /* alloc admin q (i.e. connection) */ + fc_conn = nvmf_fc_ls_new_connection(assoc, 0, + from_be16(&rqst->assoc_cmd.ersp_ratio), + ls_rqst->rpi, + from_be16(&rqst->assoc_cmd.sqsize), + tgtport); + if (!fc_conn) { + nvmf_fc_ls_free_association(assoc); + errmsg_ind = VERR_CONN_ALLOC_FAIL; + rc = FCNVME_RJT_RC_INSUFF_RES; + ec = FCNVME_RJT_EXP_NONE; + goto rjt_cass; + } + + /* format accept response */ + bzero(acc, sizeof(*acc)); + ls_rqst->rsp_len = sizeof(*acc); + + nvmf_fc_ls_format_rsp_hdr(acc, FCNVME_LS_ACC, + nvmf_fc_lsdesc_len( + sizeof(struct spdk_nvmf_fc_ls_cr_assoc_acc)), + FCNVME_LS_CREATE_ASSOCIATION); + to_be32(&acc->assoc_id.desc_tag, FCNVME_LSDESC_ASSOC_ID); + acc->assoc_id.desc_len = + nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_lsdesc_assoc_id)); + to_be32(&acc->conn_id.desc_tag, FCNVME_LSDESC_CONN_ID); + acc->conn_id.desc_len = + nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_lsdesc_conn_id)); + + /* assign connection to HWQP poller - also sends response */ + nvmf_fc_ls_add_conn_to_poller(assoc, ls_rqst, fc_conn, true); + + return; + +rjt_cass: + SPDK_ERRLOG("Create Association LS failed: %s\n", validation_errors[errmsg_ind]); + ls_rqst->rsp_len = nvmf_fc_ls_format_rjt(acc, FCNVME_MAX_LS_BUFFER_SIZE, + rqst->w0.ls_cmd, rc, ec, 0); + nvmf_fc_xmt_ls_rsp(tgtport, ls_rqst); +} + +static void +nvmf_fc_ls_process_cioc(struct spdk_nvmf_fc_nport *tgtport, + struct spdk_nvmf_fc_ls_rqst *ls_rqst) +{ + struct spdk_nvmf_fc_ls_cr_conn_rqst *rqst = + (struct spdk_nvmf_fc_ls_cr_conn_rqst *)ls_rqst->rqstbuf.virt; + struct spdk_nvmf_fc_ls_cr_conn_acc *acc = + (struct spdk_nvmf_fc_ls_cr_conn_acc *)ls_rqst->rspbuf.virt; + struct spdk_nvmf_fc_association *assoc; + struct spdk_nvmf_fc_conn *fc_conn = NULL; + int errmsg_ind = 0; + uint8_t rc = FCNVME_RJT_RC_NONE; + uint8_t ec = FCNVME_RJT_EXP_NONE; + struct spdk_nvmf_transport *transport = spdk_nvmf_tgt_get_transport(ls_rqst->nvmf_tgt, + SPDK_NVME_TRANSPORT_NAME_FC); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, + "LS_CIOC: ls_rqst_len=%d, desc_list_len=%d, cmd_len=%d, " + "assoc_id=0x%lx, sq_size=%d, esrp=%d, Tgtport nn:%lx, pn:%lx\n", + ls_rqst->rqst_len, from_be32(&rqst->desc_list_len), + from_be32(&rqst->connect_cmd.desc_len), + from_be64(&rqst->assoc_id.association_id), + from_be32(&rqst->connect_cmd.sqsize), + from_be32(&rqst->connect_cmd.ersp_ratio), + tgtport->fc_nodename.u.wwn, tgtport->fc_portname.u.wwn); + + if (ls_rqst->rqst_len < sizeof(struct spdk_nvmf_fc_ls_cr_conn_rqst)) { + errmsg_ind = VERR_CR_CONN_LEN; + rc = FCNVME_RJT_RC_INV_PARAM; + ec = FCNVME_RJT_EXP_INV_LEN; + } else if (rqst->desc_list_len != + nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_ls_cr_conn_rqst))) { + errmsg_ind = VERR_CR_CONN_RQST_LEN; + rc = FCNVME_RJT_RC_INV_PARAM; + ec = FCNVME_RJT_EXP_INV_LEN; + } else if (rqst->assoc_id.desc_tag != + cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) { + errmsg_ind = VERR_ASSOC_ID; + rc = FCNVME_RJT_RC_INV_PARAM; + } else if (rqst->assoc_id.desc_len != + nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_lsdesc_assoc_id))) { + errmsg_ind = VERR_ASSOC_ID_LEN; + rc = FCNVME_RJT_RC_INV_PARAM; + ec = FCNVME_RJT_EXP_INV_LEN; + } else if (rqst->connect_cmd.desc_tag != + cpu_to_be32(FCNVME_LSDESC_CREATE_CONN_CMD)) { + errmsg_ind = VERR_CR_CONN_CMD; + rc = FCNVME_RJT_RC_INV_PARAM; + } else if (rqst->connect_cmd.desc_len != + nvmf_fc_lsdesc_len( + sizeof(struct spdk_nvmf_fc_lsdesc_cr_conn_cmd))) { + errmsg_ind = VERR_CR_CONN_CMD_LEN; + rc = FCNVME_RJT_RC_INV_PARAM; + ec = FCNVME_RJT_EXP_INV_LEN; + } else if (!rqst->connect_cmd.ersp_ratio || + (from_be16(&rqst->connect_cmd.ersp_ratio) >= + from_be16(&rqst->connect_cmd.sqsize))) { + errmsg_ind = VERR_ERSP_RATIO; + rc = FCNVME_RJT_RC_INV_PARAM; + ec = FCNVME_RJT_EXP_INV_ESRP; + } else if (from_be16(&rqst->connect_cmd.sqsize) == 0 || + from_be16(&rqst->connect_cmd.sqsize) > transport->opts.max_queue_depth) { + errmsg_ind = VERR_SQSIZE; + rc = FCNVME_RJT_RC_INV_PARAM; + ec = FCNVME_RJT_EXP_SQ_SIZE; + } + + if (rc != FCNVME_RJT_RC_NONE) { + goto rjt_cioc; + } + + /* find association */ + assoc = nvmf_fc_ls_find_assoc(tgtport, + from_be64(&rqst->assoc_id.association_id)); + if (!assoc) { + errmsg_ind = VERR_NO_ASSOC; + rc = FCNVME_RJT_RC_INV_ASSOC; + } else if (assoc->assoc_state == SPDK_NVMF_FC_OBJECT_TO_BE_DELETED) { + /* association is being deleted - don't allow more connections */ + errmsg_ind = VERR_NO_ASSOC; + rc = FCNVME_RJT_RC_INV_ASSOC; + } else if (assoc->conn_count >= transport->opts.max_qpairs_per_ctrlr) { + errmsg_ind = VERR_CONN_TOO_MANY; + rc = FCNVME_RJT_RC_INV_PARAM; + ec = FCNVME_RJT_EXP_INV_Q_ID; + } + + if (rc != FCNVME_RJT_RC_NONE) { + goto rjt_cioc; + } + + fc_conn = nvmf_fc_ls_new_connection(assoc, from_be16(&rqst->connect_cmd.qid), + from_be16(&rqst->connect_cmd.ersp_ratio), + ls_rqst->rpi, + from_be16(&rqst->connect_cmd.sqsize), + tgtport); + if (!fc_conn) { + errmsg_ind = VERR_CONN_ALLOC_FAIL; + rc = FCNVME_RJT_RC_INSUFF_RES; + ec = FCNVME_RJT_EXP_NONE; + goto rjt_cioc; + } + + /* format accept response */ + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "Formatting LS accept response for " + "assoc_id 0x%lx conn_id 0x%lx\n", assoc->assoc_id, + fc_conn->conn_id); + bzero(acc, sizeof(*acc)); + ls_rqst->rsp_len = sizeof(*acc); + nvmf_fc_ls_format_rsp_hdr(acc, FCNVME_LS_ACC, + nvmf_fc_lsdesc_len( + sizeof(struct spdk_nvmf_fc_ls_cr_conn_acc)), + FCNVME_LS_CREATE_CONNECTION); + to_be32(&acc->conn_id.desc_tag, FCNVME_LSDESC_CONN_ID); + acc->conn_id.desc_len = + nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_lsdesc_conn_id)); + + /* assign connection to HWQP poller - also sends response */ + nvmf_fc_ls_add_conn_to_poller(assoc, ls_rqst, fc_conn, false); + + return; + +rjt_cioc: + SPDK_ERRLOG("Create Connection LS failed: %s\n", validation_errors[errmsg_ind]); + + ls_rqst->rsp_len = nvmf_fc_ls_format_rjt(acc, FCNVME_MAX_LS_BUFFER_SIZE, + rqst->w0.ls_cmd, rc, ec, 0); + nvmf_fc_xmt_ls_rsp(tgtport, ls_rqst); +} + +static void +nvmf_fc_ls_process_disc(struct spdk_nvmf_fc_nport *tgtport, + struct spdk_nvmf_fc_ls_rqst *ls_rqst) +{ + struct spdk_nvmf_fc_ls_disconnect_rqst *rqst = + (struct spdk_nvmf_fc_ls_disconnect_rqst *)ls_rqst->rqstbuf.virt; + struct spdk_nvmf_fc_ls_disconnect_acc *acc = + (struct spdk_nvmf_fc_ls_disconnect_acc *)ls_rqst->rspbuf.virt; + struct spdk_nvmf_fc_association *assoc; + int errmsg_ind = 0; + uint8_t rc = FCNVME_RJT_RC_NONE; + uint8_t ec = FCNVME_RJT_EXP_NONE; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, + "LS_DISC: ls_rqst_len=%d, desc_list_len=%d, cmd_len=%d," + "assoc_id=0x%lx\n", + ls_rqst->rqst_len, from_be32(&rqst->desc_list_len), + from_be32(&rqst->disconn_cmd.desc_len), + from_be64(&rqst->assoc_id.association_id)); + + if (ls_rqst->rqst_len < sizeof(struct spdk_nvmf_fc_ls_disconnect_rqst)) { + errmsg_ind = VERR_DISCONN_LEN; + rc = FCNVME_RJT_RC_INV_PARAM; + ec = FCNVME_RJT_EXP_INV_LEN; + } else if (rqst->desc_list_len != + nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_ls_disconnect_rqst))) { + errmsg_ind = VERR_DISCONN_RQST_LEN; + rc = FCNVME_RJT_RC_INV_PARAM; + ec = FCNVME_RJT_EXP_INV_LEN; + } else if (rqst->assoc_id.desc_tag != + cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) { + errmsg_ind = VERR_ASSOC_ID; + rc = FCNVME_RJT_RC_INV_PARAM; + } else if (rqst->assoc_id.desc_len != + nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_lsdesc_assoc_id))) { + errmsg_ind = VERR_ASSOC_ID_LEN; + rc = FCNVME_RJT_RC_INV_PARAM; + ec = FCNVME_RJT_EXP_INV_LEN; + } else if (rqst->disconn_cmd.desc_tag != + cpu_to_be32(FCNVME_LSDESC_DISCONN_CMD)) { + rc = FCNVME_RJT_RC_INV_PARAM; + errmsg_ind = VERR_DISCONN_CMD; + } else if (rqst->disconn_cmd.desc_len != + nvmf_fc_lsdesc_len(sizeof(struct spdk_nvmf_fc_lsdesc_disconn_cmd))) { + errmsg_ind = VERR_DISCONN_CMD_LEN; + rc = FCNVME_RJT_RC_INV_PARAM; + ec = FCNVME_RJT_EXP_INV_LEN; + } + + if (rc != FCNVME_RJT_RC_NONE) { + goto rjt_disc; + } + + /* match an active association */ + assoc = nvmf_fc_ls_find_assoc(tgtport, + from_be64(&rqst->assoc_id.association_id)); + if (!assoc) { + errmsg_ind = VERR_NO_ASSOC; + rc = FCNVME_RJT_RC_INV_ASSOC; + goto rjt_disc; + } + + /* format response */ + bzero(acc, sizeof(*acc)); + ls_rqst->rsp_len = sizeof(*acc); + + nvmf_fc_ls_format_rsp_hdr(acc, FCNVME_LS_ACC, + nvmf_fc_lsdesc_len( + sizeof(struct spdk_nvmf_fc_ls_disconnect_acc)), + FCNVME_LS_DISCONNECT); + + nvmf_fc_ls_disconnect_assoc(tgtport, ls_rqst, assoc->assoc_id); + return; + +rjt_disc: + SPDK_ERRLOG("Disconnect LS failed: %s\n", validation_errors[errmsg_ind]); + ls_rqst->rsp_len = nvmf_fc_ls_format_rjt(acc, FCNVME_MAX_LS_BUFFER_SIZE, + rqst->w0.ls_cmd, rc, ec, 0); + nvmf_fc_xmt_ls_rsp(tgtport, ls_rqst); +} + +/* ************************ */ +/* external functions */ + +void +nvmf_fc_ls_init(struct spdk_nvmf_fc_port *fc_port) +{ +} + +void +nvmf_fc_ls_fini(struct spdk_nvmf_fc_port *fc_port) +{ +} + +void +nvmf_fc_handle_ls_rqst(struct spdk_nvmf_fc_ls_rqst *ls_rqst) +{ + struct spdk_nvmf_fc_ls_rqst_w0 *w0 = + (struct spdk_nvmf_fc_ls_rqst_w0 *)ls_rqst->rqstbuf.virt; + uint32_t s_id = ls_rqst->s_id; + struct spdk_nvmf_fc_nport *tgtport = ls_rqst->nport; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_LS, "LS cmd=%d\n", w0->ls_cmd); + + switch (w0->ls_cmd) { + case FCNVME_LS_CREATE_ASSOCIATION: + nvmf_fc_ls_process_cass(s_id, tgtport, ls_rqst); + break; + case FCNVME_LS_CREATE_CONNECTION: + nvmf_fc_ls_process_cioc(tgtport, ls_rqst); + break; + case FCNVME_LS_DISCONNECT: + nvmf_fc_ls_process_disc(tgtport, ls_rqst); + break; + default: + SPDK_ERRLOG("Invalid LS cmd=%d\n", w0->ls_cmd); + ls_rqst->rsp_len = nvmf_fc_ls_format_rjt(ls_rqst->rspbuf.virt, + FCNVME_MAX_LS_BUFFER_SIZE, w0->ls_cmd, + FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); + nvmf_fc_xmt_ls_rsp(tgtport, ls_rqst); + } +} + +int +nvmf_fc_delete_association(struct spdk_nvmf_fc_nport *tgtport, + uint64_t assoc_id, bool send_abts, bool backend_initiated, + spdk_nvmf_fc_del_assoc_cb del_assoc_cb, + void *cb_data) +{ + return _nvmf_fc_delete_association(tgtport, assoc_id, send_abts, backend_initiated, + del_assoc_cb, cb_data, false); +} + +static void +nvmf_fc_poller_api_cb_event(void *arg) +{ + struct spdk_nvmf_fc_poller_api_cb_info *cb_info = + (struct spdk_nvmf_fc_poller_api_cb_info *) arg; + + assert(cb_info != NULL); + cb_info->cb_func(cb_info->cb_data, cb_info->ret); +} + +static void +nvmf_fc_poller_api_perform_cb(struct spdk_nvmf_fc_poller_api_cb_info *cb_info, + enum spdk_nvmf_fc_poller_api_ret ret) +{ + if (cb_info->cb_func && cb_info->cb_thread) { + cb_info->ret = ret; + /* callback to master thread */ + spdk_thread_send_msg(cb_info->cb_thread, nvmf_fc_poller_api_cb_event, + (void *) cb_info); + } +} + +static void +nvmf_fc_poller_api_add_connection(void *arg) +{ + enum spdk_nvmf_fc_poller_api_ret ret = SPDK_NVMF_FC_POLLER_API_SUCCESS; + struct spdk_nvmf_fc_poller_api_add_connection_args *conn_args = + (struct spdk_nvmf_fc_poller_api_add_connection_args *)arg; + struct spdk_nvmf_fc_conn *fc_conn; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_POLLER_API, "Poller add connection, conn_id 0x%lx\n", + conn_args->fc_conn->conn_id); + + /* make sure connection is not already in poller's list */ + fc_conn = nvmf_fc_hwqp_find_fc_conn(conn_args->fc_conn->hwqp, + conn_args->fc_conn->conn_id); + if (fc_conn) { + SPDK_ERRLOG("duplicate connection found"); + ret = SPDK_NVMF_FC_POLLER_API_DUP_CONN_ID; + } else { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_POLLER_API, + "conn_id=%lx", fc_conn->conn_id); + TAILQ_INSERT_TAIL(&conn_args->fc_conn->hwqp->connection_list, + conn_args->fc_conn, link); + } + + /* perform callback */ + nvmf_fc_poller_api_perform_cb(&conn_args->cb_info, ret); +} + +static void +nvmf_fc_poller_api_quiesce_queue(void *arg) +{ + struct spdk_nvmf_fc_poller_api_quiesce_queue_args *q_args = + (struct spdk_nvmf_fc_poller_api_quiesce_queue_args *) arg; + struct spdk_nvmf_fc_request *fc_req = NULL, *tmp; + + /* should be already, but make sure queue is quiesced */ + q_args->hwqp->state = SPDK_FC_HWQP_OFFLINE; + + /* + * Kill all the outstanding commands that are in the transfer state and + * in the process of being aborted. + * We can run into this situation if an adapter reset happens when an I_T Nexus delete + * is in progress. + */ + TAILQ_FOREACH_SAFE(fc_req, &q_args->hwqp->in_use_reqs, link, tmp) { + if (nvmf_fc_req_in_xfer(fc_req) && fc_req->is_aborted == true) { + nvmf_fc_poller_api_func(q_args->hwqp, SPDK_NVMF_FC_POLLER_API_REQ_ABORT_COMPLETE, + (void *)fc_req); + } + } + + /* perform callback */ + nvmf_fc_poller_api_perform_cb(&q_args->cb_info, SPDK_NVMF_FC_POLLER_API_SUCCESS); +} + +static void +nvmf_fc_poller_api_activate_queue(void *arg) +{ + struct spdk_nvmf_fc_poller_api_quiesce_queue_args *q_args = + (struct spdk_nvmf_fc_poller_api_quiesce_queue_args *) arg; + + q_args->hwqp->state = SPDK_FC_HWQP_ONLINE; + + /* perform callback */ + nvmf_fc_poller_api_perform_cb(&q_args->cb_info, 0); +} + +static void +nvmf_fc_disconnect_qpair_cb(void *ctx) +{ + struct spdk_nvmf_fc_poller_api_cb_info *cb_info = ctx; + /* perform callback */ + nvmf_fc_poller_api_perform_cb(cb_info, SPDK_NVMF_FC_POLLER_API_SUCCESS); +} + +static void +nvmf_fc_poller_conn_abort_done(void *hwqp, int32_t status, void *cb_args) +{ + struct spdk_nvmf_fc_poller_api_del_connection_args *conn_args = cb_args; + + if (conn_args->fc_request_cnt) { + conn_args->fc_request_cnt -= 1; + } + + if (!conn_args->fc_request_cnt) { + if (!TAILQ_EMPTY(&conn_args->hwqp->connection_list)) { + /* All the requests for this connection are aborted. */ + TAILQ_REMOVE(&conn_args->hwqp->connection_list, conn_args->fc_conn, link); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_POLLER_API, "Connection deleted, conn_id 0x%lx\n", + conn_args->fc_conn->conn_id); + + if (!conn_args->backend_initiated) { + /* disconnect qpair from nvmf controller */ + spdk_nvmf_qpair_disconnect(&conn_args->fc_conn->qpair, + nvmf_fc_disconnect_qpair_cb, &conn_args->cb_info); + } + } else { + /* + * Duplicate connection delete can happen if one is + * coming in via an association disconnect and the other + * is initiated by a port reset. + */ + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_POLLER_API, "Duplicate conn delete."); + /* perform callback */ + nvmf_fc_poller_api_perform_cb(&conn_args->cb_info, SPDK_NVMF_FC_POLLER_API_SUCCESS); + } + } +} + +static void +nvmf_fc_poller_api_del_connection(void *arg) +{ + struct spdk_nvmf_fc_poller_api_del_connection_args *conn_args = + (struct spdk_nvmf_fc_poller_api_del_connection_args *)arg; + struct spdk_nvmf_fc_conn *fc_conn; + struct spdk_nvmf_fc_request *fc_req = NULL, *tmp; + struct spdk_nvmf_fc_hwqp *hwqp = conn_args->hwqp; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_POLLER_API, "Poller delete connection, conn_id 0x%lx\n", + conn_args->fc_conn->conn_id); + + /* find the connection in poller's list */ + fc_conn = nvmf_fc_hwqp_find_fc_conn(hwqp, conn_args->fc_conn->conn_id); + if (!fc_conn) { + /* perform callback */ + nvmf_fc_poller_api_perform_cb(&conn_args->cb_info, SPDK_NVMF_FC_POLLER_API_NO_CONN_ID); + return; + } + + conn_args->fc_request_cnt = 0; + + TAILQ_FOREACH_SAFE(fc_req, &hwqp->in_use_reqs, link, tmp) { + if (fc_req->fc_conn->conn_id == fc_conn->conn_id) { + if (nvmf_qpair_is_admin_queue(&fc_conn->qpair) && + (fc_req->req.cmd->nvme_cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST)) { + /* AER will be cleaned by spdk_nvmf_qpair_disconnect. */ + continue; + } + + conn_args->fc_request_cnt += 1; + nvmf_fc_request_abort(fc_req, conn_args->send_abts, + nvmf_fc_poller_conn_abort_done, + conn_args); + } + } + + if (!conn_args->fc_request_cnt) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_POLLER_API, "Connection deleted.\n"); + TAILQ_REMOVE(&hwqp->connection_list, fc_conn, link); + + if (!conn_args->backend_initiated) { + /* disconnect qpair from nvmf controller */ + spdk_nvmf_qpair_disconnect(&fc_conn->qpair, nvmf_fc_disconnect_qpair_cb, + &conn_args->cb_info); + } + } +} + +static void +nvmf_fc_poller_abts_done(void *hwqp, int32_t status, void *cb_args) +{ + struct spdk_nvmf_fc_poller_api_abts_recvd_args *args = cb_args; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_POLLER_API, + "ABTS poller done, rpi: 0x%x, oxid: 0x%x, rxid: 0x%x\n", + args->ctx->rpi, args->ctx->oxid, args->ctx->rxid); + + nvmf_fc_poller_api_perform_cb(&args->cb_info, + SPDK_NVMF_FC_POLLER_API_SUCCESS); +} + +static void +nvmf_fc_poller_api_abts_received(void *arg) +{ + struct spdk_nvmf_fc_poller_api_abts_recvd_args *args = arg; + struct spdk_nvmf_fc_request *fc_req = NULL; + struct spdk_nvmf_fc_hwqp *hwqp = args->hwqp; + + TAILQ_FOREACH(fc_req, &hwqp->in_use_reqs, link) { + if ((fc_req->rpi == args->ctx->rpi) && + (fc_req->oxid == args->ctx->oxid)) { + nvmf_fc_request_abort(fc_req, false, + nvmf_fc_poller_abts_done, args); + return; + } + } + + nvmf_fc_poller_api_perform_cb(&args->cb_info, + SPDK_NVMF_FC_POLLER_API_OXID_NOT_FOUND); +} + +static void +nvmf_fc_poller_api_queue_sync(void *arg) +{ + struct spdk_nvmf_fc_poller_api_queue_sync_args *args = arg; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_POLLER_API, + "HWQP sync requested for u_id = 0x%lx\n", args->u_id); + + /* Add this args to hwqp sync_cb list */ + TAILQ_INSERT_TAIL(&args->hwqp->sync_cbs, args, link); +} + +static void +nvmf_fc_poller_api_queue_sync_done(void *arg) +{ + struct spdk_nvmf_fc_poller_api_queue_sync_done_args *args = arg; + struct spdk_nvmf_fc_hwqp *hwqp = args->hwqp; + uint64_t tag = args->tag; + struct spdk_nvmf_fc_poller_api_queue_sync_args *sync_args = NULL, *tmp = NULL; + + assert(args != NULL); + + TAILQ_FOREACH_SAFE(sync_args, &hwqp->sync_cbs, link, tmp) { + if (sync_args->u_id == tag) { + /* Queue successfully synced. Remove from cb list */ + TAILQ_REMOVE(&hwqp->sync_cbs, sync_args, link); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_FC_POLLER_API, + "HWQP sync done for u_id = 0x%lx\n", sync_args->u_id); + + /* Return the status to poller */ + nvmf_fc_poller_api_perform_cb(&sync_args->cb_info, + SPDK_NVMF_FC_POLLER_API_SUCCESS); + return; + } + } + + free(arg); + /* note: no callback from this api */ +} + +static void +nvmf_fc_poller_api_add_hwqp(void *arg) +{ + struct spdk_nvmf_fc_hwqp *hwqp = (struct spdk_nvmf_fc_hwqp *)arg; + + hwqp->lcore_id = spdk_env_get_current_core(); /* for tracing purposes only */ + TAILQ_INSERT_TAIL(&hwqp->fgroup->hwqp_list, hwqp, link); + /* note: no callback from this api */ +} + +static void +nvmf_fc_poller_api_remove_hwqp(void *arg) +{ + struct spdk_nvmf_fc_hwqp *hwqp = (struct spdk_nvmf_fc_hwqp *)arg; + struct spdk_nvmf_fc_poll_group *fgroup = hwqp->fgroup; + + TAILQ_REMOVE(&fgroup->hwqp_list, hwqp, link); + hwqp->fgroup = NULL; + /* note: no callback from this api */ +} + +enum spdk_nvmf_fc_poller_api_ret +nvmf_fc_poller_api_func(struct spdk_nvmf_fc_hwqp *hwqp, enum spdk_nvmf_fc_poller_api api, + void *api_args) { + switch (api) + { + case SPDK_NVMF_FC_POLLER_API_ADD_CONNECTION: + spdk_thread_send_msg(hwqp->thread, + nvmf_fc_poller_api_add_connection, api_args); + break; + + case SPDK_NVMF_FC_POLLER_API_DEL_CONNECTION: + spdk_thread_send_msg(hwqp->thread, + nvmf_fc_poller_api_del_connection, api_args); + break; + + case SPDK_NVMF_FC_POLLER_API_QUIESCE_QUEUE: + /* quiesce q polling now, don't wait for poller to do it */ + hwqp->state = SPDK_FC_HWQP_OFFLINE; + spdk_thread_send_msg(hwqp->thread, + nvmf_fc_poller_api_quiesce_queue, api_args); + break; + + case SPDK_NVMF_FC_POLLER_API_ACTIVATE_QUEUE: + spdk_thread_send_msg(hwqp->thread, + nvmf_fc_poller_api_activate_queue, api_args); + break; + + case SPDK_NVMF_FC_POLLER_API_ABTS_RECEIVED: + spdk_thread_send_msg(hwqp->thread, + nvmf_fc_poller_api_abts_received, api_args); + break; + + case SPDK_NVMF_FC_POLLER_API_REQ_ABORT_COMPLETE: + spdk_thread_send_msg(hwqp->thread, + nvmf_fc_request_abort_complete, api_args); + break; + + case SPDK_NVMF_FC_POLLER_API_QUEUE_SYNC: + spdk_thread_send_msg(hwqp->thread, + nvmf_fc_poller_api_queue_sync, api_args); + break; + + case SPDK_NVMF_FC_POLLER_API_QUEUE_SYNC_DONE: + spdk_thread_send_msg(hwqp->thread, + nvmf_fc_poller_api_queue_sync_done, api_args); + break; + + case SPDK_NVMF_FC_POLLER_API_ADD_HWQP: + spdk_thread_send_msg(hwqp->thread, nvmf_fc_poller_api_add_hwqp, (void *) hwqp); + break; + + case SPDK_NVMF_FC_POLLER_API_REMOVE_HWQP: + spdk_thread_send_msg(hwqp->thread, nvmf_fc_poller_api_remove_hwqp, (void *) hwqp); + break; + + case SPDK_NVMF_FC_POLLER_API_ADAPTER_EVENT: + case SPDK_NVMF_FC_POLLER_API_AEN: + default: + SPDK_ERRLOG("BAD ARG!"); + return SPDK_NVMF_FC_POLLER_API_INVALID_ARG; + } + + return SPDK_NVMF_FC_POLLER_API_SUCCESS; +} + +SPDK_LOG_REGISTER_COMPONENT("nvmf_fc_poller_api", SPDK_LOG_NVMF_FC_POLLER_API) +SPDK_LOG_REGISTER_COMPONENT("nvmf_fc_ls", SPDK_LOG_NVMF_FC_LS) diff --git a/src/spdk/lib/nvmf/nvmf.c b/src/spdk/lib/nvmf/nvmf.c new file mode 100644 index 000000000..73fa0742e --- /dev/null +++ b/src/spdk/lib/nvmf/nvmf.c @@ -0,0 +1,1457 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2018-2019 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/bdev.h" +#include "spdk/bit_array.h" +#include "spdk/conf.h" +#include "spdk/thread.h" +#include "spdk/nvmf.h" +#include "spdk/trace.h" +#include "spdk/endian.h" +#include "spdk/string.h" + +#include "spdk_internal/log.h" + +#include "nvmf_internal.h" +#include "transport.h" + +SPDK_LOG_REGISTER_COMPONENT("nvmf", SPDK_LOG_NVMF) + +#define SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS 1024 + +static TAILQ_HEAD(, spdk_nvmf_tgt) g_nvmf_tgts = TAILQ_HEAD_INITIALIZER(g_nvmf_tgts); + +typedef void (*nvmf_qpair_disconnect_cpl)(void *ctx, int status); +static void nvmf_tgt_destroy_poll_group(void *io_device, void *ctx_buf); + +/* supplied to a single call to nvmf_qpair_disconnect */ +struct nvmf_qpair_disconnect_ctx { + struct spdk_nvmf_qpair *qpair; + struct spdk_nvmf_ctrlr *ctrlr; + nvmf_qpair_disconnect_cb cb_fn; + struct spdk_thread *thread; + void *ctx; + uint16_t qid; +}; + +/* + * There are several times when we need to iterate through the list of all qpairs and selectively delete them. + * In order to do this sequentially without overlap, we must provide a context to recover the next qpair from + * to enable calling nvmf_qpair_disconnect on the next desired qpair. + */ +struct nvmf_qpair_disconnect_many_ctx { + struct spdk_nvmf_subsystem *subsystem; + struct spdk_nvmf_poll_group *group; + spdk_nvmf_poll_group_mod_done cpl_fn; + void *cpl_ctx; +}; + +static void +nvmf_qpair_set_state(struct spdk_nvmf_qpair *qpair, + enum spdk_nvmf_qpair_state state) +{ + assert(qpair != NULL); + assert(qpair->group->thread == spdk_get_thread()); + + qpair->state = state; +} + +static int +nvmf_poll_group_poll(void *ctx) +{ + struct spdk_nvmf_poll_group *group = ctx; + int rc; + int count = 0; + struct spdk_nvmf_transport_poll_group *tgroup; + + TAILQ_FOREACH(tgroup, &group->tgroups, link) { + rc = nvmf_transport_poll_group_poll(tgroup); + if (rc < 0) { + return SPDK_POLLER_BUSY; + } + count += rc; + } + + return count > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; +} + +static int +nvmf_tgt_create_poll_group(void *io_device, void *ctx_buf) +{ + struct spdk_nvmf_tgt *tgt = io_device; + struct spdk_nvmf_poll_group *group = ctx_buf; + struct spdk_nvmf_transport *transport; + uint32_t sid; + + TAILQ_INIT(&group->tgroups); + TAILQ_INIT(&group->qpairs); + + TAILQ_FOREACH(transport, &tgt->transports, link) { + nvmf_poll_group_add_transport(group, transport); + } + + group->num_sgroups = tgt->max_subsystems; + group->sgroups = calloc(tgt->max_subsystems, sizeof(struct spdk_nvmf_subsystem_poll_group)); + if (!group->sgroups) { + return -ENOMEM; + } + + for (sid = 0; sid < tgt->max_subsystems; sid++) { + struct spdk_nvmf_subsystem *subsystem; + + subsystem = tgt->subsystems[sid]; + if (!subsystem) { + continue; + } + + if (nvmf_poll_group_add_subsystem(group, subsystem, NULL, NULL) != 0) { + nvmf_tgt_destroy_poll_group(io_device, ctx_buf); + return -1; + } + } + + pthread_mutex_lock(&tgt->mutex); + TAILQ_INSERT_TAIL(&tgt->poll_groups, group, link); + pthread_mutex_unlock(&tgt->mutex); + + group->poller = SPDK_POLLER_REGISTER(nvmf_poll_group_poll, group, 0); + group->thread = spdk_get_thread(); + + return 0; +} + +static void +nvmf_tgt_destroy_poll_group(void *io_device, void *ctx_buf) +{ + struct spdk_nvmf_tgt *tgt = io_device; + struct spdk_nvmf_poll_group *group = ctx_buf; + struct spdk_nvmf_transport_poll_group *tgroup, *tmp; + struct spdk_nvmf_subsystem_poll_group *sgroup; + uint32_t sid, nsid; + + pthread_mutex_lock(&tgt->mutex); + TAILQ_REMOVE(&tgt->poll_groups, group, link); + pthread_mutex_unlock(&tgt->mutex); + + TAILQ_FOREACH_SAFE(tgroup, &group->tgroups, link, tmp) { + TAILQ_REMOVE(&group->tgroups, tgroup, link); + nvmf_transport_poll_group_destroy(tgroup); + } + + for (sid = 0; sid < group->num_sgroups; sid++) { + sgroup = &group->sgroups[sid]; + + for (nsid = 0; nsid < sgroup->num_ns; nsid++) { + if (sgroup->ns_info[nsid].channel) { + spdk_put_io_channel(sgroup->ns_info[nsid].channel); + sgroup->ns_info[nsid].channel = NULL; + } + } + + free(sgroup->ns_info); + } + + free(group->sgroups); + + if (group->destroy_cb_fn) { + group->destroy_cb_fn(group->destroy_cb_arg, 0); + } +} + +static void +_nvmf_tgt_disconnect_next_qpair(void *ctx) +{ + struct spdk_nvmf_qpair *qpair; + struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx; + struct spdk_nvmf_poll_group *group = qpair_ctx->group; + struct spdk_io_channel *ch; + int rc = 0; + + qpair = TAILQ_FIRST(&group->qpairs); + + if (qpair) { + rc = spdk_nvmf_qpair_disconnect(qpair, _nvmf_tgt_disconnect_next_qpair, ctx); + } + + if (!qpair || rc != 0) { + /* When the refcount from the channels reaches 0, nvmf_tgt_destroy_poll_group will be called. */ + ch = spdk_io_channel_from_ctx(group); + spdk_put_io_channel(ch); + free(qpair_ctx); + } +} + +static void +nvmf_tgt_destroy_poll_group_qpairs(struct spdk_nvmf_poll_group *group) +{ + struct nvmf_qpair_disconnect_many_ctx *ctx; + + ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_many_ctx)); + + if (!ctx) { + SPDK_ERRLOG("Failed to allocate memory for destroy poll group ctx\n"); + return; + } + + spdk_poller_unregister(&group->poller); + + ctx->group = group; + _nvmf_tgt_disconnect_next_qpair(ctx); +} + +struct spdk_nvmf_tgt * +spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts) +{ + struct spdk_nvmf_tgt *tgt, *tmp_tgt; + + if (strnlen(opts->name, NVMF_TGT_NAME_MAX_LENGTH) == NVMF_TGT_NAME_MAX_LENGTH) { + SPDK_ERRLOG("Provided target name exceeds the max length of %u.\n", NVMF_TGT_NAME_MAX_LENGTH); + return NULL; + } + + TAILQ_FOREACH(tmp_tgt, &g_nvmf_tgts, link) { + if (!strncmp(opts->name, tmp_tgt->name, NVMF_TGT_NAME_MAX_LENGTH)) { + SPDK_ERRLOG("Provided target name must be unique.\n"); + return NULL; + } + } + + tgt = calloc(1, sizeof(*tgt)); + if (!tgt) { + return NULL; + } + + snprintf(tgt->name, NVMF_TGT_NAME_MAX_LENGTH, "%s", opts->name); + + if (!opts || !opts->max_subsystems) { + tgt->max_subsystems = SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS; + } else { + tgt->max_subsystems = opts->max_subsystems; + } + + tgt->discovery_genctr = 0; + TAILQ_INIT(&tgt->transports); + TAILQ_INIT(&tgt->poll_groups); + + tgt->subsystems = calloc(tgt->max_subsystems, sizeof(struct spdk_nvmf_subsystem *)); + if (!tgt->subsystems) { + free(tgt); + return NULL; + } + + pthread_mutex_init(&tgt->mutex, NULL); + + TAILQ_INSERT_HEAD(&g_nvmf_tgts, tgt, link); + + spdk_io_device_register(tgt, + nvmf_tgt_create_poll_group, + nvmf_tgt_destroy_poll_group, + sizeof(struct spdk_nvmf_poll_group), + tgt->name); + + return tgt; +} + +static void +nvmf_tgt_destroy_cb(void *io_device) +{ + struct spdk_nvmf_tgt *tgt = io_device; + struct spdk_nvmf_transport *transport, *transport_tmp; + spdk_nvmf_tgt_destroy_done_fn *destroy_cb_fn; + void *destroy_cb_arg; + uint32_t i; + + if (tgt->subsystems) { + for (i = 0; i < tgt->max_subsystems; i++) { + if (tgt->subsystems[i]) { + nvmf_subsystem_remove_all_listeners(tgt->subsystems[i], true); + spdk_nvmf_subsystem_destroy(tgt->subsystems[i]); + } + } + free(tgt->subsystems); + } + + TAILQ_FOREACH_SAFE(transport, &tgt->transports, link, transport_tmp) { + TAILQ_REMOVE(&tgt->transports, transport, link); + spdk_nvmf_transport_destroy(transport); + } + + destroy_cb_fn = tgt->destroy_cb_fn; + destroy_cb_arg = tgt->destroy_cb_arg; + + free(tgt); + + if (destroy_cb_fn) { + destroy_cb_fn(destroy_cb_arg, 0); + } +} + +void +spdk_nvmf_tgt_destroy(struct spdk_nvmf_tgt *tgt, + spdk_nvmf_tgt_destroy_done_fn cb_fn, + void *cb_arg) +{ + tgt->destroy_cb_fn = cb_fn; + tgt->destroy_cb_arg = cb_arg; + + TAILQ_REMOVE(&g_nvmf_tgts, tgt, link); + + spdk_io_device_unregister(tgt, nvmf_tgt_destroy_cb); +} + +const char * +spdk_nvmf_tgt_get_name(struct spdk_nvmf_tgt *tgt) +{ + return tgt->name; +} + +struct spdk_nvmf_tgt * +spdk_nvmf_get_tgt(const char *name) +{ + struct spdk_nvmf_tgt *tgt; + uint32_t num_targets = 0; + + TAILQ_FOREACH(tgt, &g_nvmf_tgts, link) { + if (name) { + if (!strncmp(tgt->name, name, NVMF_TGT_NAME_MAX_LENGTH)) { + return tgt; + } + } + num_targets++; + } + + /* + * special case. If there is only one target and + * no name was specified, return the only available + * target. If there is more than one target, name must + * be specified. + */ + if (!name && num_targets == 1) { + return TAILQ_FIRST(&g_nvmf_tgts); + } + + return NULL; +} + +struct spdk_nvmf_tgt * +spdk_nvmf_get_first_tgt(void) +{ + return TAILQ_FIRST(&g_nvmf_tgts); +} + +struct spdk_nvmf_tgt * +spdk_nvmf_get_next_tgt(struct spdk_nvmf_tgt *prev) +{ + return TAILQ_NEXT(prev, link); +} + +static void +nvmf_write_subsystem_config_json(struct spdk_json_write_ctx *w, + struct spdk_nvmf_subsystem *subsystem) +{ + struct spdk_nvmf_host *host; + struct spdk_nvmf_subsystem_listener *listener; + const struct spdk_nvme_transport_id *trid; + struct spdk_nvmf_ns *ns; + struct spdk_nvmf_ns_opts ns_opts; + uint32_t max_namespaces; + char uuid_str[SPDK_UUID_STRING_LEN]; + const char *adrfam; + + if (spdk_nvmf_subsystem_get_type(subsystem) != SPDK_NVMF_SUBTYPE_NVME) { + return; + } + + /* { */ + spdk_json_write_object_begin(w); + spdk_json_write_named_string(w, "method", "nvmf_create_subsystem"); + + /* "params" : { */ + spdk_json_write_named_object_begin(w, "params"); + spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem)); + spdk_json_write_named_bool(w, "allow_any_host", spdk_nvmf_subsystem_get_allow_any_host(subsystem)); + spdk_json_write_named_string(w, "serial_number", spdk_nvmf_subsystem_get_sn(subsystem)); + spdk_json_write_named_string(w, "model_number", spdk_nvmf_subsystem_get_mn(subsystem)); + + max_namespaces = spdk_nvmf_subsystem_get_max_namespaces(subsystem); + if (max_namespaces != 0) { + spdk_json_write_named_uint32(w, "max_namespaces", max_namespaces); + } + + /* } "params" */ + spdk_json_write_object_end(w); + + /* } */ + spdk_json_write_object_end(w); + + for (listener = spdk_nvmf_subsystem_get_first_listener(subsystem); listener != NULL; + listener = spdk_nvmf_subsystem_get_next_listener(subsystem, listener)) { + trid = spdk_nvmf_subsystem_listener_get_trid(listener); + + adrfam = spdk_nvme_transport_id_adrfam_str(trid->adrfam); + + spdk_json_write_object_begin(w); + spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_listener"); + + /* "params" : { */ + spdk_json_write_named_object_begin(w, "params"); + + spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem)); + + /* "listen_address" : { */ + spdk_json_write_named_object_begin(w, "listen_address"); + + spdk_json_write_named_string(w, "trtype", trid->trstring); + if (adrfam) { + spdk_json_write_named_string(w, "adrfam", adrfam); + } + + spdk_json_write_named_string(w, "traddr", trid->traddr); + spdk_json_write_named_string(w, "trsvcid", trid->trsvcid); + /* } "listen_address" */ + spdk_json_write_object_end(w); + + /* } "params" */ + spdk_json_write_object_end(w); + + /* } */ + spdk_json_write_object_end(w); + } + + for (host = spdk_nvmf_subsystem_get_first_host(subsystem); host != NULL; + host = spdk_nvmf_subsystem_get_next_host(subsystem, host)) { + + spdk_json_write_object_begin(w); + spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_host"); + + /* "params" : { */ + spdk_json_write_named_object_begin(w, "params"); + + spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem)); + spdk_json_write_named_string(w, "host", spdk_nvmf_host_get_nqn(host)); + + /* } "params" */ + spdk_json_write_object_end(w); + + /* } */ + spdk_json_write_object_end(w); + } + + for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL; + ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) { + spdk_nvmf_ns_get_opts(ns, &ns_opts, sizeof(ns_opts)); + + spdk_json_write_object_begin(w); + spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_ns"); + + /* "params" : { */ + spdk_json_write_named_object_begin(w, "params"); + + spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem)); + + /* "namespace" : { */ + spdk_json_write_named_object_begin(w, "namespace"); + + spdk_json_write_named_uint32(w, "nsid", spdk_nvmf_ns_get_id(ns)); + spdk_json_write_named_string(w, "bdev_name", spdk_bdev_get_name(spdk_nvmf_ns_get_bdev(ns))); + + if (!spdk_mem_all_zero(ns_opts.nguid, sizeof(ns_opts.nguid))) { + SPDK_STATIC_ASSERT(sizeof(ns_opts.nguid) == sizeof(uint64_t) * 2, "size mismatch"); + spdk_json_write_named_string_fmt(w, "nguid", "%016"PRIX64"%016"PRIX64, from_be64(&ns_opts.nguid[0]), + from_be64(&ns_opts.nguid[8])); + } + + if (!spdk_mem_all_zero(ns_opts.eui64, sizeof(ns_opts.eui64))) { + SPDK_STATIC_ASSERT(sizeof(ns_opts.eui64) == sizeof(uint64_t), "size mismatch"); + spdk_json_write_named_string_fmt(w, "eui64", "%016"PRIX64, from_be64(&ns_opts.eui64)); + } + + if (!spdk_mem_all_zero(&ns_opts.uuid, sizeof(ns_opts.uuid))) { + spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &ns_opts.uuid); + spdk_json_write_named_string(w, "uuid", uuid_str); + } + + /* "namespace" */ + spdk_json_write_object_end(w); + + /* } "params" */ + spdk_json_write_object_end(w); + + /* } */ + spdk_json_write_object_end(w); + } +} + +void +spdk_nvmf_tgt_write_config_json(struct spdk_json_write_ctx *w, struct spdk_nvmf_tgt *tgt) +{ + struct spdk_nvmf_subsystem *subsystem; + struct spdk_nvmf_transport *transport; + + spdk_json_write_object_begin(w); + spdk_json_write_named_string(w, "method", "nvmf_set_max_subsystems"); + + spdk_json_write_named_object_begin(w, "params"); + spdk_json_write_named_uint32(w, "max_subsystems", tgt->max_subsystems); + spdk_json_write_object_end(w); + + spdk_json_write_object_end(w); + + /* write transports */ + TAILQ_FOREACH(transport, &tgt->transports, link) { + spdk_json_write_object_begin(w); + spdk_json_write_named_string(w, "method", "nvmf_create_transport"); + + spdk_json_write_named_object_begin(w, "params"); + spdk_json_write_named_string(w, "trtype", spdk_nvme_transport_id_trtype_str(transport->ops->type)); + spdk_json_write_named_uint32(w, "max_queue_depth", transport->opts.max_queue_depth); + spdk_json_write_named_uint32(w, "max_io_qpairs_per_ctrlr", + transport->opts.max_qpairs_per_ctrlr - 1); + spdk_json_write_named_uint32(w, "in_capsule_data_size", transport->opts.in_capsule_data_size); + spdk_json_write_named_uint32(w, "max_io_size", transport->opts.max_io_size); + spdk_json_write_named_uint32(w, "io_unit_size", transport->opts.io_unit_size); + spdk_json_write_named_uint32(w, "max_aq_depth", transport->opts.max_aq_depth); + if (transport->ops->type == SPDK_NVME_TRANSPORT_RDMA) { + spdk_json_write_named_uint32(w, "max_srq_depth", transport->opts.max_srq_depth); + } + spdk_json_write_named_uint32(w, "abort_timeout_sec", transport->opts.abort_timeout_sec); + spdk_json_write_object_end(w); + + spdk_json_write_object_end(w); + } + + subsystem = spdk_nvmf_subsystem_get_first(tgt); + while (subsystem) { + nvmf_write_subsystem_config_json(w, subsystem); + subsystem = spdk_nvmf_subsystem_get_next(subsystem); + } +} + +int +spdk_nvmf_tgt_listen(struct spdk_nvmf_tgt *tgt, + struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvmf_transport *transport; + const char *trtype; + int rc; + + transport = spdk_nvmf_tgt_get_transport(tgt, trid->trstring); + if (!transport) { + trtype = spdk_nvme_transport_id_trtype_str(trid->trtype); + if (trtype != NULL) { + SPDK_ERRLOG("Unable to listen on transport %s. The transport must be created first.\n", trtype); + } else { + SPDK_ERRLOG("The specified trtype %d is unknown. Please make sure that it is properly registered.\n", + trid->trtype); + } + + return -EINVAL; + } + + rc = spdk_nvmf_transport_listen(transport, trid); + if (rc < 0) { + SPDK_ERRLOG("Unable to listen on address '%s'\n", trid->traddr); + } + + return rc; +} + +int +spdk_nvmf_tgt_stop_listen(struct spdk_nvmf_tgt *tgt, + struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvmf_transport *transport; + const char *trtype; + int rc; + + transport = spdk_nvmf_tgt_get_transport(tgt, trid->trstring); + if (!transport) { + trtype = spdk_nvme_transport_id_trtype_str(trid->trtype); + if (trtype != NULL) { + SPDK_ERRLOG("Unable to stop listen on transport %s. The transport must be created first.\n", + trtype); + } else { + SPDK_ERRLOG("The specified trtype %d is unknown. Please make sure that it is properly registered.\n", + trid->trtype); + } + return -EINVAL; + } + + rc = spdk_nvmf_transport_stop_listen(transport, trid); + if (rc < 0) { + SPDK_ERRLOG("Failed to stop listening on address '%s'\n", trid->traddr); + return rc; + } + return 0; +} + +struct spdk_nvmf_tgt_add_transport_ctx { + struct spdk_nvmf_tgt *tgt; + struct spdk_nvmf_transport *transport; + spdk_nvmf_tgt_add_transport_done_fn cb_fn; + void *cb_arg; +}; + +static void +_nvmf_tgt_add_transport_done(struct spdk_io_channel_iter *i, int status) +{ + struct spdk_nvmf_tgt_add_transport_ctx *ctx = spdk_io_channel_iter_get_ctx(i); + + ctx->cb_fn(ctx->cb_arg, status); + + free(ctx); +} + +static void +_nvmf_tgt_add_transport(struct spdk_io_channel_iter *i) +{ + struct spdk_nvmf_tgt_add_transport_ctx *ctx = spdk_io_channel_iter_get_ctx(i); + struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); + struct spdk_nvmf_poll_group *group = spdk_io_channel_get_ctx(ch); + int rc; + + rc = nvmf_poll_group_add_transport(group, ctx->transport); + spdk_for_each_channel_continue(i, rc); +} + +void spdk_nvmf_tgt_add_transport(struct spdk_nvmf_tgt *tgt, + struct spdk_nvmf_transport *transport, + spdk_nvmf_tgt_add_transport_done_fn cb_fn, + void *cb_arg) +{ + struct spdk_nvmf_tgt_add_transport_ctx *ctx; + + if (spdk_nvmf_tgt_get_transport(tgt, transport->ops->name)) { + cb_fn(cb_arg, -EEXIST); + return; /* transport already created */ + } + + transport->tgt = tgt; + TAILQ_INSERT_TAIL(&tgt->transports, transport, link); + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + cb_fn(cb_arg, -ENOMEM); + return; + } + + ctx->tgt = tgt; + ctx->transport = transport; + ctx->cb_fn = cb_fn; + ctx->cb_arg = cb_arg; + + spdk_for_each_channel(tgt, + _nvmf_tgt_add_transport, + ctx, + _nvmf_tgt_add_transport_done); +} + +struct spdk_nvmf_subsystem * +spdk_nvmf_tgt_find_subsystem(struct spdk_nvmf_tgt *tgt, const char *subnqn) +{ + struct spdk_nvmf_subsystem *subsystem; + uint32_t sid; + + if (!subnqn) { + return NULL; + } + + /* Ensure that subnqn is null terminated */ + if (!memchr(subnqn, '\0', SPDK_NVMF_NQN_MAX_LEN + 1)) { + SPDK_ERRLOG("Connect SUBNQN is not null terminated\n"); + return NULL; + } + + for (sid = 0; sid < tgt->max_subsystems; sid++) { + subsystem = tgt->subsystems[sid]; + if (subsystem == NULL) { + continue; + } + + if (strcmp(subnqn, subsystem->subnqn) == 0) { + return subsystem; + } + } + + return NULL; +} + +struct spdk_nvmf_transport * +spdk_nvmf_tgt_get_transport(struct spdk_nvmf_tgt *tgt, const char *transport_name) +{ + struct spdk_nvmf_transport *transport; + + TAILQ_FOREACH(transport, &tgt->transports, link) { + if (!strncasecmp(transport->ops->name, transport_name, SPDK_NVMF_TRSTRING_MAX_LEN)) { + return transport; + } + } + return NULL; +} + +struct nvmf_new_qpair_ctx { + struct spdk_nvmf_qpair *qpair; + struct spdk_nvmf_poll_group *group; +}; + +static void +_nvmf_poll_group_add(void *_ctx) +{ + struct nvmf_new_qpair_ctx *ctx = _ctx; + struct spdk_nvmf_qpair *qpair = ctx->qpair; + struct spdk_nvmf_poll_group *group = ctx->group; + + free(_ctx); + + if (spdk_nvmf_poll_group_add(group, qpair) != 0) { + SPDK_ERRLOG("Unable to add the qpair to a poll group.\n"); + spdk_nvmf_qpair_disconnect(qpair, NULL, NULL); + } +} + +void +spdk_nvmf_tgt_new_qpair(struct spdk_nvmf_tgt *tgt, struct spdk_nvmf_qpair *qpair) +{ + struct spdk_nvmf_poll_group *group; + struct nvmf_new_qpair_ctx *ctx; + + group = spdk_nvmf_get_optimal_poll_group(qpair); + if (group == NULL) { + if (tgt->next_poll_group == NULL) { + tgt->next_poll_group = TAILQ_FIRST(&tgt->poll_groups); + if (tgt->next_poll_group == NULL) { + SPDK_ERRLOG("No poll groups exist.\n"); + spdk_nvmf_qpair_disconnect(qpair, NULL, NULL); + return; + } + } + group = tgt->next_poll_group; + tgt->next_poll_group = TAILQ_NEXT(group, link); + } + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + SPDK_ERRLOG("Unable to send message to poll group.\n"); + spdk_nvmf_qpair_disconnect(qpair, NULL, NULL); + return; + } + + ctx->qpair = qpair; + ctx->group = group; + + spdk_thread_send_msg(group->thread, _nvmf_poll_group_add, ctx); +} + +uint32_t +spdk_nvmf_tgt_accept(struct spdk_nvmf_tgt *tgt) +{ + struct spdk_nvmf_transport *transport, *tmp; + uint32_t count = 0; + + TAILQ_FOREACH_SAFE(transport, &tgt->transports, link, tmp) { + count += nvmf_transport_accept(transport); + } + + return count; +} + +struct spdk_nvmf_poll_group * +spdk_nvmf_poll_group_create(struct spdk_nvmf_tgt *tgt) +{ + struct spdk_io_channel *ch; + + ch = spdk_get_io_channel(tgt); + if (!ch) { + SPDK_ERRLOG("Unable to get I/O channel for target\n"); + return NULL; + } + + return spdk_io_channel_get_ctx(ch); +} + +void +spdk_nvmf_poll_group_destroy(struct spdk_nvmf_poll_group *group, + spdk_nvmf_poll_group_destroy_done_fn cb_fn, + void *cb_arg) +{ + assert(group->destroy_cb_fn == NULL); + group->destroy_cb_fn = cb_fn; + group->destroy_cb_arg = cb_arg; + + /* This function will put the io_channel associated with this poll group */ + nvmf_tgt_destroy_poll_group_qpairs(group); +} + +int +spdk_nvmf_poll_group_add(struct spdk_nvmf_poll_group *group, + struct spdk_nvmf_qpair *qpair) +{ + int rc = -1; + struct spdk_nvmf_transport_poll_group *tgroup; + + TAILQ_INIT(&qpair->outstanding); + qpair->group = group; + + TAILQ_FOREACH(tgroup, &group->tgroups, link) { + if (tgroup->transport == qpair->transport) { + rc = nvmf_transport_poll_group_add(tgroup, qpair); + break; + } + } + + /* We add the qpair to the group only it is succesfully added into the tgroup */ + if (rc == 0) { + TAILQ_INSERT_TAIL(&group->qpairs, qpair, link); + nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_ACTIVE); + } + + return rc; +} + +static +void _nvmf_ctrlr_destruct(void *ctx) +{ + struct spdk_nvmf_ctrlr *ctrlr = ctx; + + nvmf_ctrlr_destruct(ctrlr); +} + +static void +_nvmf_transport_qpair_fini(void *ctx) +{ + struct spdk_nvmf_qpair *qpair = ctx; + + nvmf_transport_qpair_fini(qpair); +} + +static void +_nvmf_ctrlr_free_from_qpair(void *ctx) +{ + struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx; + struct spdk_nvmf_ctrlr *ctrlr = qpair_ctx->ctrlr; + uint32_t count; + + spdk_bit_array_clear(ctrlr->qpair_mask, qpair_ctx->qid); + count = spdk_bit_array_count_set(ctrlr->qpair_mask); + if (count == 0) { + spdk_bit_array_free(&ctrlr->qpair_mask); + + spdk_thread_send_msg(ctrlr->subsys->thread, _nvmf_ctrlr_destruct, ctrlr); + } + + spdk_thread_send_msg(qpair_ctx->thread, _nvmf_transport_qpair_fini, qpair_ctx->qpair); + if (qpair_ctx->cb_fn) { + spdk_thread_send_msg(qpair_ctx->thread, qpair_ctx->cb_fn, qpair_ctx->ctx); + } + free(qpair_ctx); +} + +void +spdk_nvmf_poll_group_remove(struct spdk_nvmf_qpair *qpair) +{ + struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr; + struct spdk_nvmf_transport_poll_group *tgroup; + struct spdk_nvmf_request *req, *tmp; + struct spdk_nvmf_subsystem_poll_group *sgroup; + int rc; + + nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_ERROR); + + /* Find the tgroup and remove the qpair from the tgroup */ + TAILQ_FOREACH(tgroup, &qpair->group->tgroups, link) { + if (tgroup->transport == qpair->transport) { + rc = nvmf_transport_poll_group_remove(tgroup, qpair); + if (rc && (rc != ENOTSUP)) { + SPDK_ERRLOG("Cannot remove qpair=%p from transport group=%p\n", + qpair, tgroup); + } + break; + } + } + + if (ctrlr) { + sgroup = &qpair->group->sgroups[ctrlr->subsys->id]; + TAILQ_FOREACH_SAFE(req, &sgroup->queued, link, tmp) { + if (req->qpair == qpair) { + TAILQ_REMOVE(&sgroup->queued, req, link); + if (nvmf_transport_req_free(req)) { + SPDK_ERRLOG("Transport request free error!\n"); + } + } + } + } + + TAILQ_REMOVE(&qpair->group->qpairs, qpair, link); + qpair->group = NULL; +} + +static void +_nvmf_qpair_destroy(void *ctx, int status) +{ + struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx; + struct spdk_nvmf_qpair *qpair = qpair_ctx->qpair; + struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr; + + assert(qpair->state == SPDK_NVMF_QPAIR_DEACTIVATING); + qpair_ctx->qid = qpair->qid; + + spdk_nvmf_poll_group_remove(qpair); + + if (!ctrlr || !ctrlr->thread) { + nvmf_transport_qpair_fini(qpair); + if (qpair_ctx->cb_fn) { + spdk_thread_send_msg(qpair_ctx->thread, qpair_ctx->cb_fn, qpair_ctx->ctx); + } + free(qpair_ctx); + return; + } + + qpair_ctx->ctrlr = ctrlr; + spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_free_from_qpair, qpair_ctx); +} + +int +spdk_nvmf_qpair_disconnect(struct spdk_nvmf_qpair *qpair, nvmf_qpair_disconnect_cb cb_fn, void *ctx) +{ + struct nvmf_qpair_disconnect_ctx *qpair_ctx; + + /* If we get a qpair in the uninitialized state, we can just destroy it immediately */ + if (qpair->state == SPDK_NVMF_QPAIR_UNINITIALIZED) { + nvmf_transport_qpair_fini(qpair); + if (cb_fn) { + cb_fn(ctx); + } + return 0; + } + + /* The queue pair must be disconnected from the thread that owns it */ + assert(qpair->group->thread == spdk_get_thread()); + + if (qpair->state != SPDK_NVMF_QPAIR_ACTIVE) { + /* This can occur if the connection is killed by the target, + * which results in a notification that the connection + * died. Send a message to defer the processing of this + * callback. This allows the stack to unwind in the case + * where a bunch of connections are disconnected in + * a loop. */ + if (cb_fn) { + spdk_thread_send_msg(qpair->group->thread, cb_fn, ctx); + } + return 0; + } + + assert(qpair->state == SPDK_NVMF_QPAIR_ACTIVE); + nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_DEACTIVATING); + + qpair_ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_ctx)); + if (!qpair_ctx) { + SPDK_ERRLOG("Unable to allocate context for nvmf_qpair_disconnect\n"); + return -ENOMEM; + } + + qpair_ctx->qpair = qpair; + qpair_ctx->cb_fn = cb_fn; + qpair_ctx->thread = qpair->group->thread; + qpair_ctx->ctx = ctx; + + /* Check for outstanding I/O */ + if (!TAILQ_EMPTY(&qpair->outstanding)) { + qpair->state_cb = _nvmf_qpair_destroy; + qpair->state_cb_arg = qpair_ctx; + nvmf_qpair_free_aer(qpair); + return 0; + } + + _nvmf_qpair_destroy(qpair_ctx, 0); + + return 0; +} + +int +spdk_nvmf_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid) +{ + return nvmf_transport_qpair_get_peer_trid(qpair, trid); +} + +int +spdk_nvmf_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid) +{ + return nvmf_transport_qpair_get_local_trid(qpair, trid); +} + +int +spdk_nvmf_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid) +{ + return nvmf_transport_qpair_get_listen_trid(qpair, trid); +} + +int +nvmf_poll_group_add_transport(struct spdk_nvmf_poll_group *group, + struct spdk_nvmf_transport *transport) +{ + struct spdk_nvmf_transport_poll_group *tgroup; + + TAILQ_FOREACH(tgroup, &group->tgroups, link) { + if (tgroup->transport == transport) { + /* Transport already in the poll group */ + return 0; + } + } + + tgroup = nvmf_transport_poll_group_create(transport); + if (!tgroup) { + SPDK_ERRLOG("Unable to create poll group for transport\n"); + return -1; + } + + tgroup->group = group; + TAILQ_INSERT_TAIL(&group->tgroups, tgroup, link); + + return 0; +} + +static int +poll_group_update_subsystem(struct spdk_nvmf_poll_group *group, + struct spdk_nvmf_subsystem *subsystem) +{ + struct spdk_nvmf_subsystem_poll_group *sgroup; + uint32_t new_num_ns, old_num_ns; + uint32_t i, j; + struct spdk_nvmf_ns *ns; + struct spdk_nvmf_registrant *reg, *tmp; + struct spdk_io_channel *ch; + struct spdk_nvmf_subsystem_pg_ns_info *ns_info; + struct spdk_nvmf_ctrlr *ctrlr; + bool ns_changed; + + /* Make sure our poll group has memory for this subsystem allocated */ + if (subsystem->id >= group->num_sgroups) { + return -ENOMEM; + } + + sgroup = &group->sgroups[subsystem->id]; + + /* Make sure the array of namespace information is the correct size */ + new_num_ns = subsystem->max_nsid; + old_num_ns = sgroup->num_ns; + + ns_changed = false; + + if (old_num_ns == 0) { + if (new_num_ns > 0) { + /* First allocation */ + sgroup->ns_info = calloc(new_num_ns, sizeof(struct spdk_nvmf_subsystem_pg_ns_info)); + if (!sgroup->ns_info) { + return -ENOMEM; + } + } + } else if (new_num_ns > old_num_ns) { + void *buf; + + /* Make the array larger */ + buf = realloc(sgroup->ns_info, new_num_ns * sizeof(struct spdk_nvmf_subsystem_pg_ns_info)); + if (!buf) { + return -ENOMEM; + } + + sgroup->ns_info = buf; + + /* Null out the new namespace information slots */ + for (i = old_num_ns; i < new_num_ns; i++) { + memset(&sgroup->ns_info[i], 0, sizeof(struct spdk_nvmf_subsystem_pg_ns_info)); + } + } else if (new_num_ns < old_num_ns) { + void *buf; + + /* Free the extra I/O channels */ + for (i = new_num_ns; i < old_num_ns; i++) { + ns_info = &sgroup->ns_info[i]; + + if (ns_info->channel) { + spdk_put_io_channel(ns_info->channel); + ns_info->channel = NULL; + } + } + + /* Make the array smaller */ + if (new_num_ns > 0) { + buf = realloc(sgroup->ns_info, new_num_ns * sizeof(struct spdk_nvmf_subsystem_pg_ns_info)); + if (!buf) { + return -ENOMEM; + } + sgroup->ns_info = buf; + } else { + free(sgroup->ns_info); + sgroup->ns_info = NULL; + } + } + + sgroup->num_ns = new_num_ns; + + /* Detect bdevs that were added or removed */ + for (i = 0; i < sgroup->num_ns; i++) { + ns = subsystem->ns[i]; + ns_info = &sgroup->ns_info[i]; + ch = ns_info->channel; + + if (ns == NULL && ch == NULL) { + /* Both NULL. Leave empty */ + } else if (ns == NULL && ch != NULL) { + /* There was a channel here, but the namespace is gone. */ + ns_changed = true; + spdk_put_io_channel(ch); + ns_info->channel = NULL; + } else if (ns != NULL && ch == NULL) { + /* A namespace appeared but there is no channel yet */ + ns_changed = true; + ch = spdk_bdev_get_io_channel(ns->desc); + if (ch == NULL) { + SPDK_ERRLOG("Could not allocate I/O channel.\n"); + return -ENOMEM; + } + ns_info->channel = ch; + } else if (spdk_uuid_compare(&ns_info->uuid, spdk_bdev_get_uuid(ns->bdev)) != 0) { + /* A namespace was here before, but was replaced by a new one. */ + ns_changed = true; + spdk_put_io_channel(ns_info->channel); + memset(ns_info, 0, sizeof(*ns_info)); + + ch = spdk_bdev_get_io_channel(ns->desc); + if (ch == NULL) { + SPDK_ERRLOG("Could not allocate I/O channel.\n"); + return -ENOMEM; + } + ns_info->channel = ch; + } else if (ns_info->num_blocks != spdk_bdev_get_num_blocks(ns->bdev)) { + /* Namespace is still there but size has changed */ + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Namespace resized: subsystem_id %d," + " nsid %u, pg %p, old %lu, new %lu\n", + subsystem->id, + ns->nsid, + group, + ns_info->num_blocks, + spdk_bdev_get_num_blocks(ns->bdev)); + ns_changed = true; + } + + if (ns == NULL) { + memset(ns_info, 0, sizeof(*ns_info)); + } else { + ns_info->uuid = *spdk_bdev_get_uuid(ns->bdev); + ns_info->num_blocks = spdk_bdev_get_num_blocks(ns->bdev); + ns_info->crkey = ns->crkey; + ns_info->rtype = ns->rtype; + if (ns->holder) { + ns_info->holder_id = ns->holder->hostid; + } + + memset(&ns_info->reg_hostid, 0, SPDK_NVMF_MAX_NUM_REGISTRANTS * sizeof(struct spdk_uuid)); + j = 0; + TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) { + if (j >= SPDK_NVMF_MAX_NUM_REGISTRANTS) { + SPDK_ERRLOG("Maximum %u registrants can support.\n", SPDK_NVMF_MAX_NUM_REGISTRANTS); + return -EINVAL; + } + ns_info->reg_hostid[j++] = reg->hostid; + } + } + } + + if (ns_changed) { + TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) { + if (ctrlr->admin_qpair->group == group) { + nvmf_ctrlr_async_event_ns_notice(ctrlr); + } + } + } + + return 0; +} + +int +nvmf_poll_group_update_subsystem(struct spdk_nvmf_poll_group *group, + struct spdk_nvmf_subsystem *subsystem) +{ + return poll_group_update_subsystem(group, subsystem); +} + +int +nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group, + struct spdk_nvmf_subsystem *subsystem, + spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg) +{ + int rc = 0; + struct spdk_nvmf_subsystem_poll_group *sgroup = &group->sgroups[subsystem->id]; + + TAILQ_INIT(&sgroup->queued); + + rc = poll_group_update_subsystem(group, subsystem); + if (rc) { + nvmf_poll_group_remove_subsystem(group, subsystem, NULL, NULL); + goto fini; + } + + sgroup->state = SPDK_NVMF_SUBSYSTEM_ACTIVE; +fini: + if (cb_fn) { + cb_fn(cb_arg, rc); + } + + return rc; +} + +static void +_nvmf_poll_group_remove_subsystem_cb(void *ctx, int status) +{ + struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx; + struct spdk_nvmf_subsystem *subsystem; + struct spdk_nvmf_poll_group *group; + struct spdk_nvmf_subsystem_poll_group *sgroup; + spdk_nvmf_poll_group_mod_done cpl_fn = NULL; + void *cpl_ctx = NULL; + uint32_t nsid; + + group = qpair_ctx->group; + subsystem = qpair_ctx->subsystem; + cpl_fn = qpair_ctx->cpl_fn; + cpl_ctx = qpair_ctx->cpl_ctx; + sgroup = &group->sgroups[subsystem->id]; + + if (status) { + goto fini; + } + + for (nsid = 0; nsid < sgroup->num_ns; nsid++) { + if (sgroup->ns_info[nsid].channel) { + spdk_put_io_channel(sgroup->ns_info[nsid].channel); + sgroup->ns_info[nsid].channel = NULL; + } + } + + sgroup->num_ns = 0; + free(sgroup->ns_info); + sgroup->ns_info = NULL; +fini: + free(qpair_ctx); + if (cpl_fn) { + cpl_fn(cpl_ctx, status); + } +} + +static void +_nvmf_subsystem_disconnect_next_qpair(void *ctx) +{ + struct spdk_nvmf_qpair *qpair; + struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx; + struct spdk_nvmf_subsystem *subsystem; + struct spdk_nvmf_poll_group *group; + int rc = 0; + + group = qpair_ctx->group; + subsystem = qpair_ctx->subsystem; + + TAILQ_FOREACH(qpair, &group->qpairs, link) { + if ((qpair->ctrlr != NULL) && (qpair->ctrlr->subsys == subsystem)) { + break; + } + } + + if (qpair) { + rc = spdk_nvmf_qpair_disconnect(qpair, _nvmf_subsystem_disconnect_next_qpair, qpair_ctx); + } + + if (!qpair || rc != 0) { + _nvmf_poll_group_remove_subsystem_cb(ctx, rc); + } + return; +} + +void +nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group, + struct spdk_nvmf_subsystem *subsystem, + spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg) +{ + struct spdk_nvmf_qpair *qpair; + struct spdk_nvmf_subsystem_poll_group *sgroup; + struct nvmf_qpair_disconnect_many_ctx *ctx; + int rc = 0; + + ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_many_ctx)); + + if (!ctx) { + SPDK_ERRLOG("Unable to allocate memory for context to remove poll subsystem\n"); + goto fini; + } + + ctx->group = group; + ctx->subsystem = subsystem; + ctx->cpl_fn = cb_fn; + ctx->cpl_ctx = cb_arg; + + sgroup = &group->sgroups[subsystem->id]; + sgroup->state = SPDK_NVMF_SUBSYSTEM_INACTIVE; + + TAILQ_FOREACH(qpair, &group->qpairs, link) { + if ((qpair->ctrlr != NULL) && (qpair->ctrlr->subsys == subsystem)) { + break; + } + } + + if (qpair) { + rc = spdk_nvmf_qpair_disconnect(qpair, _nvmf_subsystem_disconnect_next_qpair, ctx); + } else { + /* call the callback immediately. It will handle any channel iteration */ + _nvmf_poll_group_remove_subsystem_cb(ctx, 0); + } + + if (rc != 0) { + free(ctx); + goto fini; + } + + return; +fini: + if (cb_fn) { + cb_fn(cb_arg, rc); + } +} + +void +nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group, + struct spdk_nvmf_subsystem *subsystem, + spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg) +{ + struct spdk_nvmf_subsystem_poll_group *sgroup; + int rc = 0; + + if (subsystem->id >= group->num_sgroups) { + rc = -1; + goto fini; + } + + sgroup = &group->sgroups[subsystem->id]; + if (sgroup == NULL) { + rc = -1; + goto fini; + } + + assert(sgroup->state == SPDK_NVMF_SUBSYSTEM_ACTIVE); + sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSING; + + if (sgroup->io_outstanding > 0) { + sgroup->cb_fn = cb_fn; + sgroup->cb_arg = cb_arg; + return; + } + + assert(sgroup->io_outstanding == 0); + sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED; +fini: + if (cb_fn) { + cb_fn(cb_arg, rc); + } +} + +void +nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group, + struct spdk_nvmf_subsystem *subsystem, + spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg) +{ + struct spdk_nvmf_request *req, *tmp; + struct spdk_nvmf_subsystem_poll_group *sgroup; + int rc = 0; + + if (subsystem->id >= group->num_sgroups) { + rc = -1; + goto fini; + } + + sgroup = &group->sgroups[subsystem->id]; + + assert(sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSED); + + rc = poll_group_update_subsystem(group, subsystem); + if (rc) { + goto fini; + } + + sgroup->state = SPDK_NVMF_SUBSYSTEM_ACTIVE; + + /* Release all queued requests */ + TAILQ_FOREACH_SAFE(req, &sgroup->queued, link, tmp) { + TAILQ_REMOVE(&sgroup->queued, req, link); + spdk_nvmf_request_exec(req); + } +fini: + if (cb_fn) { + cb_fn(cb_arg, rc); + } +} + + +struct spdk_nvmf_poll_group * +spdk_nvmf_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair) +{ + struct spdk_nvmf_transport_poll_group *tgroup; + + tgroup = nvmf_transport_get_optimal_poll_group(qpair->transport, qpair); + + if (tgroup == NULL) { + return NULL; + } + + return tgroup->group; +} + +int +spdk_nvmf_poll_group_get_stat(struct spdk_nvmf_tgt *tgt, + struct spdk_nvmf_poll_group_stat *stat) +{ + struct spdk_io_channel *ch; + struct spdk_nvmf_poll_group *group; + + if (tgt == NULL || stat == NULL) { + return -EINVAL; + } + + ch = spdk_get_io_channel(tgt); + group = spdk_io_channel_get_ctx(ch); + *stat = group->stat; + spdk_put_io_channel(ch); + return 0; +} diff --git a/src/spdk/lib/nvmf/nvmf_fc.h b/src/spdk/lib/nvmf/nvmf_fc.h new file mode 100644 index 000000000..10d3ef9cf --- /dev/null +++ b/src/spdk/lib/nvmf/nvmf_fc.h @@ -0,0 +1,999 @@ +/* + * BSD LICENSE + * + * Copyright (c) 2018-2019 Broadcom. All Rights Reserved. + * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __NVMF_FC_H__ +#define __NVMF_FC_H__ + +#include "spdk/nvme.h" +#include "spdk/nvmf.h" +#include "spdk/assert.h" +#include "spdk/nvme_spec.h" +#include "spdk/nvmf_fc_spec.h" +#include "spdk/thread.h" +#include "nvmf_internal.h" + +#define SPDK_NVMF_FC_TR_ADDR_LEN 64 +#define NVMF_FC_INVALID_CONN_ID UINT64_MAX + +#define SPDK_FC_HW_DUMP_REASON_STR_MAX_SIZE 256 +#define SPDK_MAX_NUM_OF_FC_PORTS 32 +#define SPDK_NVMF_PORT_ID_MAX_LEN 32 + +/* + * FC HWQP pointer + */ +typedef void *spdk_nvmf_fc_lld_hwqp_t; + +/* + * FC HW port states. + */ +enum spdk_fc_port_state { + SPDK_FC_PORT_OFFLINE = 0, + SPDK_FC_PORT_ONLINE = 1, + SPDK_FC_PORT_QUIESCED = 2, +}; + +enum spdk_fc_hwqp_state { + SPDK_FC_HWQP_OFFLINE = 0, + SPDK_FC_HWQP_ONLINE = 1, +}; + +/* + * NVMF FC Object state + * Add all the generic states of the object here. + * Specific object states can be added separately + */ +enum spdk_nvmf_fc_object_state { + SPDK_NVMF_FC_OBJECT_CREATED = 0, + SPDK_NVMF_FC_OBJECT_TO_BE_DELETED = 1, + SPDK_NVMF_FC_OBJECT_ZOMBIE = 2, /* Partial Create or Delete */ +}; + +/* + * FC request state + */ +enum spdk_nvmf_fc_request_state { + SPDK_NVMF_FC_REQ_INIT = 0, + SPDK_NVMF_FC_REQ_READ_BDEV, + SPDK_NVMF_FC_REQ_READ_XFER, + SPDK_NVMF_FC_REQ_READ_RSP, + SPDK_NVMF_FC_REQ_WRITE_BUFFS, + SPDK_NVMF_FC_REQ_WRITE_XFER, + SPDK_NVMF_FC_REQ_WRITE_BDEV, + SPDK_NVMF_FC_REQ_WRITE_RSP, + SPDK_NVMF_FC_REQ_NONE_BDEV, + SPDK_NVMF_FC_REQ_NONE_RSP, + SPDK_NVMF_FC_REQ_SUCCESS, + SPDK_NVMF_FC_REQ_FAILED, + SPDK_NVMF_FC_REQ_ABORTED, + SPDK_NVMF_FC_REQ_BDEV_ABORTED, + SPDK_NVMF_FC_REQ_PENDING, + SPDK_NVMF_FC_REQ_MAX_STATE, +}; + +/* + * Generic DMA buffer descriptor + */ +struct spdk_nvmf_fc_buffer_desc { + void *virt; + uint64_t phys; + size_t len; + + /* Internal */ + uint32_t buf_index; +}; + +/* + * ABTS hadling context + */ +struct spdk_nvmf_fc_abts_ctx { + bool handled; + uint16_t hwqps_responded; + uint16_t rpi; + uint16_t oxid; + uint16_t rxid; + struct spdk_nvmf_fc_nport *nport; + uint16_t nport_hdl; + uint8_t port_hdl; + void *abts_poller_args; + void *sync_poller_args; + int num_hwqps; + bool queue_synced; + uint64_t u_id; + struct spdk_nvmf_fc_hwqp *ls_hwqp; + uint16_t fcp_rq_id; +}; + +/* + * NVME FC transport errors + */ +struct spdk_nvmf_fc_errors { + uint32_t no_xchg; + uint32_t nport_invalid; + uint32_t unknown_frame; + uint32_t wqe_cmplt_err; + uint32_t wqe_write_err; + uint32_t rq_status_err; + uint32_t rq_buf_len_err; + uint32_t rq_id_err; + uint32_t rq_index_err; + uint32_t invalid_cq_type; + uint32_t invalid_cq_id; + uint32_t fc_req_buf_err; + uint32_t buf_alloc_err; + uint32_t unexpected_err; + uint32_t nvme_cmd_iu_err; + uint32_t nvme_cmd_xfer_err; + uint32_t queue_entry_invalid; + uint32_t invalid_conn_err; + uint32_t fcp_rsp_failure; + uint32_t write_failed; + uint32_t read_failed; + uint32_t rport_invalid; + uint32_t num_aborted; + uint32_t num_abts_sent; +}; + +/* + * Send Single Request/Response Sequence. + */ +struct spdk_nvmf_fc_srsr_bufs { + void *rqst; + size_t rqst_len; + void *rsp; + size_t rsp_len; + uint16_t rpi; +}; + +/* + * Struct representing a nport + */ +struct spdk_nvmf_fc_nport { + + uint16_t nport_hdl; + uint8_t port_hdl; + uint32_t d_id; + enum spdk_nvmf_fc_object_state nport_state; + struct spdk_nvmf_fc_wwn fc_nodename; + struct spdk_nvmf_fc_wwn fc_portname; + + /* list of remote ports (i.e. initiators) connected to nport */ + TAILQ_HEAD(, spdk_nvmf_fc_remote_port_info) rem_port_list; + uint32_t rport_count; + + void *vendor_data; /* available for vendor use */ + + /* list of associations to nport */ + TAILQ_HEAD(, spdk_nvmf_fc_association) fc_associations; + uint32_t assoc_count; + struct spdk_nvmf_fc_port *fc_port; + TAILQ_ENTRY(spdk_nvmf_fc_nport) link; /* list of nports on a hw port. */ +}; + +/* + * NVMF FC Connection + */ +struct spdk_nvmf_fc_conn { + struct spdk_nvmf_qpair qpair; + struct spdk_nvme_transport_id trid; + + uint64_t conn_id; + struct spdk_nvmf_fc_hwqp *hwqp; + uint16_t esrp_ratio; + uint16_t rsp_count; + uint32_t rsn; + + /* The maximum number of I/O outstanding on this connection at one time */ + uint16_t max_queue_depth; + uint16_t max_rw_depth; + /* The current number of I/O outstanding on this connection. This number + * includes all I/O from the time the capsule is first received until it is + * completed. + */ + uint16_t cur_queue_depth; + + /* number of read/write requests that are outstanding */ + uint16_t cur_fc_rw_depth; + + struct spdk_nvmf_fc_association *fc_assoc; + + uint16_t rpi; + + /* for association's connection list */ + TAILQ_ENTRY(spdk_nvmf_fc_conn) assoc_link; + + /* for assocations's available connection list */ + TAILQ_ENTRY(spdk_nvmf_fc_conn) assoc_avail_link; + + /* for hwqp's connection list */ + TAILQ_ENTRY(spdk_nvmf_fc_conn) link; + + /* New QP create context. */ + struct nvmf_fc_ls_op_ctx *create_opd; +}; + +/* + * Structure for maintaining the FC exchanges + */ +struct spdk_nvmf_fc_xchg { + uint32_t xchg_id; /* The actual xchg identifier */ + + /* Internal */ + TAILQ_ENTRY(spdk_nvmf_fc_xchg) link; + bool active; + bool aborted; + bool send_abts; /* Valid if is_aborted is set. */ +}; + +/* + * FC poll group structure + */ +struct spdk_nvmf_fc_poll_group { + struct spdk_nvmf_transport_poll_group group; + struct spdk_nvmf_tgt *nvmf_tgt; + uint32_t hwqp_count; /* number of hwqp's assigned to this pg */ + TAILQ_HEAD(, spdk_nvmf_fc_hwqp) hwqp_list; + + TAILQ_ENTRY(spdk_nvmf_fc_poll_group) link; +}; + +/* + * HWQP poller structure passed from Master thread + */ +struct spdk_nvmf_fc_hwqp { + enum spdk_fc_hwqp_state state; /* queue state (for poller) */ + uint32_t lcore_id; /* core hwqp is running on (for tracing purposes only) */ + struct spdk_thread *thread; /* thread hwqp is running on */ + uint32_t hwqp_id; /* A unique id (per physical port) for a hwqp */ + uint32_t rq_size; /* receive queue size */ + spdk_nvmf_fc_lld_hwqp_t queues; /* vendor HW queue set */ + struct spdk_nvmf_fc_port *fc_port; /* HW port structure for these queues */ + struct spdk_nvmf_fc_poll_group *fgroup; + + /* qpair (fc_connection) list */ + TAILQ_HEAD(, spdk_nvmf_fc_conn) connection_list; + uint32_t num_conns; /* number of connections to queue */ + + struct spdk_nvmf_fc_request *fc_reqs_buf; + TAILQ_HEAD(, spdk_nvmf_fc_request) free_reqs; + TAILQ_HEAD(, spdk_nvmf_fc_request) in_use_reqs; + + struct spdk_nvmf_fc_errors counters; + + /* Pending LS request waiting for FC resource */ + TAILQ_HEAD(, spdk_nvmf_fc_ls_rqst) ls_pending_queue; + + /* Sync req list */ + TAILQ_HEAD(, spdk_nvmf_fc_poller_api_queue_sync_args) sync_cbs; + + TAILQ_ENTRY(spdk_nvmf_fc_hwqp) link; + + void *context; /* Vendor specific context data */ +}; + +/* + * FC HW port. + */ +struct spdk_nvmf_fc_port { + uint8_t port_hdl; + enum spdk_fc_port_state hw_port_status; + uint16_t fcp_rq_id; + struct spdk_nvmf_fc_hwqp ls_queue; + + uint32_t num_io_queues; + struct spdk_nvmf_fc_hwqp *io_queues; + /* + * List of nports on this HW port. + */ + TAILQ_HEAD(, spdk_nvmf_fc_nport)nport_list; + int num_nports; + TAILQ_ENTRY(spdk_nvmf_fc_port) link; + + struct spdk_mempool *io_resource_pool; /* Pools to store bdev_io's for this port */ + void *port_ctx; +}; + +/* + * NVMF FC Request + */ +struct spdk_nvmf_fc_request { + struct spdk_nvmf_request req; + struct spdk_nvmf_fc_ersp_iu ersp; + uint32_t poller_lcore; /* for tracing purposes only */ + struct spdk_thread *poller_thread; + uint16_t buf_index; + struct spdk_nvmf_fc_xchg *xchg; + uint16_t oxid; + uint16_t rpi; + struct spdk_nvmf_fc_conn *fc_conn; + struct spdk_nvmf_fc_hwqp *hwqp; + int state; + uint32_t transfered_len; + bool is_aborted; + uint32_t magic; + uint32_t s_id; + uint32_t d_id; + TAILQ_ENTRY(spdk_nvmf_fc_request) link; + STAILQ_ENTRY(spdk_nvmf_fc_request) pending_link; + TAILQ_HEAD(, spdk_nvmf_fc_caller_ctx) abort_cbs; +}; + +SPDK_STATIC_ASSERT(!offsetof(struct spdk_nvmf_fc_request, req), + "FC request and NVMF request address don't match."); + + +/* + * NVMF FC Association + */ +struct spdk_nvmf_fc_association { + uint64_t assoc_id; + uint32_t s_id; + struct spdk_nvmf_fc_nport *tgtport; + struct spdk_nvmf_fc_remote_port_info *rport; + struct spdk_nvmf_subsystem *subsystem; + enum spdk_nvmf_fc_object_state assoc_state; + + char host_id[FCNVME_ASSOC_HOSTID_LEN]; + char host_nqn[SPDK_NVME_NQN_FIELD_SIZE]; + char sub_nqn[SPDK_NVME_NQN_FIELD_SIZE]; + + struct spdk_nvmf_fc_conn *aq_conn; /* connection for admin queue */ + + uint16_t conn_count; + TAILQ_HEAD(, spdk_nvmf_fc_conn) fc_conns; + + void *conns_buf; + TAILQ_HEAD(, spdk_nvmf_fc_conn) avail_fc_conns; + + TAILQ_ENTRY(spdk_nvmf_fc_association) link; + + /* for port's association free list */ + TAILQ_ENTRY(spdk_nvmf_fc_association) port_free_assoc_list_link; + + void *ls_del_op_ctx; /* delete assoc. callback list */ + + /* disconnect cmd buffers (sent to initiator) */ + struct spdk_nvmf_fc_srsr_bufs *snd_disconn_bufs; +}; + +/* + * FC Remote Port + */ +struct spdk_nvmf_fc_remote_port_info { + uint32_t s_id; + uint32_t rpi; + uint32_t assoc_count; + struct spdk_nvmf_fc_wwn fc_nodename; + struct spdk_nvmf_fc_wwn fc_portname; + enum spdk_nvmf_fc_object_state rport_state; + TAILQ_ENTRY(spdk_nvmf_fc_remote_port_info) link; +}; + +/* + * Poller API error codes + */ +enum spdk_nvmf_fc_poller_api_ret { + SPDK_NVMF_FC_POLLER_API_SUCCESS = 0, + SPDK_NVMF_FC_POLLER_API_ERROR, + SPDK_NVMF_FC_POLLER_API_INVALID_ARG, + SPDK_NVMF_FC_POLLER_API_NO_CONN_ID, + SPDK_NVMF_FC_POLLER_API_DUP_CONN_ID, + SPDK_NVMF_FC_POLLER_API_OXID_NOT_FOUND, +}; + +/* + * Poller API definitions + */ +enum spdk_nvmf_fc_poller_api { + SPDK_NVMF_FC_POLLER_API_ADD_CONNECTION, + SPDK_NVMF_FC_POLLER_API_DEL_CONNECTION, + SPDK_NVMF_FC_POLLER_API_QUIESCE_QUEUE, + SPDK_NVMF_FC_POLLER_API_ACTIVATE_QUEUE, + SPDK_NVMF_FC_POLLER_API_ABTS_RECEIVED, + SPDK_NVMF_FC_POLLER_API_REQ_ABORT_COMPLETE, + SPDK_NVMF_FC_POLLER_API_ADAPTER_EVENT, + SPDK_NVMF_FC_POLLER_API_AEN, + SPDK_NVMF_FC_POLLER_API_QUEUE_SYNC, + SPDK_NVMF_FC_POLLER_API_QUEUE_SYNC_DONE, + SPDK_NVMF_FC_POLLER_API_ADD_HWQP, + SPDK_NVMF_FC_POLLER_API_REMOVE_HWQP, +}; + +/* + * Poller API callback function proto + */ +typedef void (*spdk_nvmf_fc_poller_api_cb)(void *cb_data, enum spdk_nvmf_fc_poller_api_ret ret); + +/* + * Poller API callback data + */ +struct spdk_nvmf_fc_poller_api_cb_info { + struct spdk_thread *cb_thread; + spdk_nvmf_fc_poller_api_cb cb_func; + void *cb_data; + enum spdk_nvmf_fc_poller_api_ret ret; +}; + +/* + * Poller API structures + */ +struct spdk_nvmf_fc_poller_api_add_connection_args { + struct spdk_nvmf_fc_conn *fc_conn; + struct spdk_nvmf_fc_poller_api_cb_info cb_info; +}; + +struct spdk_nvmf_fc_poller_api_del_connection_args { + struct spdk_nvmf_fc_conn *fc_conn; + struct spdk_nvmf_fc_hwqp *hwqp; + struct spdk_nvmf_fc_poller_api_cb_info cb_info; + bool send_abts; + /* internal */ + int fc_request_cnt; + bool backend_initiated; +}; + +struct spdk_nvmf_fc_poller_api_quiesce_queue_args { + void *ctx; + struct spdk_nvmf_fc_hwqp *hwqp; + struct spdk_nvmf_fc_poller_api_cb_info cb_info; +}; + +struct spdk_nvmf_fc_poller_api_activate_queue_args { + struct spdk_nvmf_fc_hwqp *hwqp; + struct spdk_nvmf_fc_poller_api_cb_info cb_info; +}; + +struct spdk_nvmf_fc_poller_api_abts_recvd_args { + struct spdk_nvmf_fc_abts_ctx *ctx; + struct spdk_nvmf_fc_hwqp *hwqp; + struct spdk_nvmf_fc_poller_api_cb_info cb_info; +}; + +struct spdk_nvmf_fc_poller_api_queue_sync_done_args { + struct spdk_nvmf_fc_hwqp *hwqp; + struct spdk_nvmf_fc_poller_api_cb_info cb_info; + uint64_t tag; +}; + +/* + * NVMF LS request structure + */ +struct spdk_nvmf_fc_ls_rqst { + struct spdk_nvmf_fc_buffer_desc rqstbuf; + struct spdk_nvmf_fc_buffer_desc rspbuf; + uint32_t rqst_len; + uint32_t rsp_len; + uint32_t rpi; + struct spdk_nvmf_fc_xchg *xchg; + uint16_t oxid; + void *private_data; /* for LLD only (LS does not touch) */ + TAILQ_ENTRY(spdk_nvmf_fc_ls_rqst) ls_pending_link; + uint32_t s_id; + uint32_t d_id; + struct spdk_nvmf_fc_nport *nport; + struct spdk_nvmf_fc_remote_port_info *rport; + struct spdk_nvmf_tgt *nvmf_tgt; +}; + +/* + * RQ Buffer LS Overlay Structure + */ +#define FCNVME_LS_RSVD_SIZE (FCNVME_MAX_LS_BUFFER_SIZE - \ + (sizeof(struct spdk_nvmf_fc_ls_rqst) + FCNVME_MAX_LS_REQ_SIZE + FCNVME_MAX_LS_RSP_SIZE)) + +struct spdk_nvmf_fc_rq_buf_ls_request { + uint8_t rqst[FCNVME_MAX_LS_REQ_SIZE]; + uint8_t resp[FCNVME_MAX_LS_RSP_SIZE]; + struct spdk_nvmf_fc_ls_rqst ls_rqst; + uint8_t rsvd[FCNVME_LS_RSVD_SIZE]; +}; + +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_rq_buf_ls_request) == + FCNVME_MAX_LS_BUFFER_SIZE, "LS RQ Buffer overflow"); + +/* Poller API structures (arguments and callback data */ +typedef void (*spdk_nvmf_fc_del_assoc_cb)(void *arg, uint32_t err); + +struct spdk_nvmf_fc_ls_add_conn_api_data { + struct spdk_nvmf_fc_poller_api_add_connection_args args; + struct spdk_nvmf_fc_ls_rqst *ls_rqst; + struct spdk_nvmf_fc_association *assoc; + bool aq_conn; /* true if adding connection for new association */ +}; + +/* Disconnect (connection) request functions */ +struct spdk_nvmf_fc_ls_del_conn_api_data { + struct spdk_nvmf_fc_poller_api_del_connection_args args; + struct spdk_nvmf_fc_ls_rqst *ls_rqst; + struct spdk_nvmf_fc_association *assoc; + bool aq_conn; /* true if deleting AQ connection */ +}; + +/* used by LS disconnect association cmd handling */ +struct spdk_nvmf_fc_ls_disconn_assoc_api_data { + struct spdk_nvmf_fc_nport *tgtport; + struct spdk_nvmf_fc_ls_rqst *ls_rqst; +}; + +/* used by delete association call */ +struct spdk_nvmf_fc_delete_assoc_api_data { + struct spdk_nvmf_fc_poller_api_del_connection_args args; + struct spdk_nvmf_fc_association *assoc; + bool from_ls_rqst; /* true = request came for LS */ + spdk_nvmf_fc_del_assoc_cb del_assoc_cb; + void *del_assoc_cb_data; +}; + +struct nvmf_fc_ls_op_ctx { + union { + struct spdk_nvmf_fc_ls_add_conn_api_data add_conn; + struct spdk_nvmf_fc_ls_del_conn_api_data del_conn; + struct spdk_nvmf_fc_ls_disconn_assoc_api_data disconn_assoc; + struct spdk_nvmf_fc_delete_assoc_api_data del_assoc; + } u; + struct nvmf_fc_ls_op_ctx *next_op_ctx; +}; + +struct spdk_nvmf_fc_poller_api_queue_sync_args { + uint64_t u_id; + struct spdk_nvmf_fc_hwqp *hwqp; + struct spdk_nvmf_fc_poller_api_cb_info cb_info; + + /* Used internally by poller */ + TAILQ_ENTRY(spdk_nvmf_fc_poller_api_queue_sync_args) link; +}; + +/** + * Following defines and structures are used to pass messages between master thread + * and FCT driver. + */ +enum spdk_fc_event { + SPDK_FC_HW_PORT_INIT, + SPDK_FC_HW_PORT_ONLINE, + SPDK_FC_HW_PORT_OFFLINE, + SPDK_FC_HW_PORT_RESET, + SPDK_FC_NPORT_CREATE, + SPDK_FC_NPORT_DELETE, + SPDK_FC_IT_ADD, /* PRLI */ + SPDK_FC_IT_DELETE, /* PRLI */ + SPDK_FC_ABTS_RECV, + SPDK_FC_LINK_BREAK, + SPDK_FC_HW_PORT_DUMP, + SPDK_FC_UNRECOVERABLE_ERR, + SPDK_FC_EVENT_MAX, +}; + +/** + * Arguments for to dump assoc id + */ +struct spdk_nvmf_fc_dump_assoc_id_args { + uint8_t pport_handle; + uint16_t nport_handle; + uint32_t assoc_id; +}; + +/** + * Arguments for HW port init event. + */ +struct spdk_nvmf_fc_hw_port_init_args { + uint32_t ls_queue_size; + spdk_nvmf_fc_lld_hwqp_t ls_queue; + uint32_t io_queue_size; + uint32_t io_queue_cnt; + spdk_nvmf_fc_lld_hwqp_t *io_queues; + void *cb_ctx; + void *port_ctx; + uint8_t port_handle; + uint8_t nvme_aq_index; /* io_queue used for nvme admin queue */ + uint16_t fcp_rq_id; /* Base rq ID of SCSI queue */ +}; + +/** + * Arguments for HW port link break event. + */ +struct spdk_nvmf_hw_port_link_break_args { + uint8_t port_handle; + void *cb_ctx; +}; + +/** + * Arguments for HW port online event. + */ +struct spdk_nvmf_fc_hw_port_online_args { + uint8_t port_handle; + void *cb_ctx; +}; + +/** + * Arguments for HW port offline event. + */ +struct spdk_nvmf_fc_hw_port_offline_args { + uint8_t port_handle; + void *cb_ctx; +}; + +/** + * Arguments for n-port add event. + */ +struct spdk_nvmf_fc_nport_create_args { + uint8_t port_handle; + uint16_t nport_handle; + struct spdk_uuid container_uuid; /* UUID of the nports container */ + struct spdk_uuid nport_uuid; /* Unique UUID for the nport */ + uint32_t d_id; + struct spdk_nvmf_fc_wwn fc_nodename; + struct spdk_nvmf_fc_wwn fc_portname; + uint32_t subsys_id; /* Subsystemid */ + char port_id[SPDK_NVMF_PORT_ID_MAX_LEN]; + void *cb_ctx; +}; + +/** + * Arguments for n-port delete event. + */ +struct spdk_nvmf_fc_nport_delete_args { + uint8_t port_handle; + uint32_t nport_handle; + uint32_t subsys_id; /* Subsystem id */ + void *cb_ctx; +}; + +/** + * Arguments for I_T add event. + */ +struct spdk_nvmf_fc_hw_i_t_add_args { + uint8_t port_handle; + uint32_t nport_handle; + uint16_t itn_handle; + uint32_t rpi; + uint32_t s_id; + uint32_t initiator_prli_info; + uint32_t target_prli_info; /* populated by the SPDK master */ + struct spdk_nvmf_fc_wwn fc_nodename; + struct spdk_nvmf_fc_wwn fc_portname; + void *cb_ctx; +}; + +/** + * Arguments for I_T delete event. + */ +struct spdk_nvmf_fc_hw_i_t_delete_args { + uint8_t port_handle; + uint32_t nport_handle; + uint16_t itn_handle; /* Only used by FC LLD driver; unused in SPDK */ + uint32_t rpi; + uint32_t s_id; + void *cb_ctx; +}; + +/** + * Arguments for ABTS event. + */ +struct spdk_nvmf_fc_abts_args { + uint8_t port_handle; + uint32_t nport_handle; + uint32_t rpi; + uint16_t oxid, rxid; + void *cb_ctx; +}; + +/** + * Arguments for link break event. + */ +struct spdk_nvmf_fc_link_break_args { + uint8_t port_handle; +}; + +/** + * Arguments for port reset event. + */ +struct spdk_nvmf_fc_hw_port_reset_args { + uint8_t port_handle; + bool dump_queues; + char reason[SPDK_FC_HW_DUMP_REASON_STR_MAX_SIZE]; + uint32_t **dump_buf; + void *cb_ctx; +}; + +/** + * Arguments for unrecoverable error event + */ +struct spdk_nvmf_fc_unrecoverable_error_event_args { +}; + +/** + * Callback function to the FCT driver. + */ +typedef void (*spdk_nvmf_fc_callback)(uint8_t port_handle, + enum spdk_fc_event event_type, + void *arg, int err); + +/** + * Enqueue an FCT event to master thread + * + * \param event_type Type of the event. + * \param args Pointer to the argument structure. + * \param cb_func Callback function into fc driver. + * + * \return 0 on success, non-zero on failure. + */ +int +nvmf_fc_master_enqueue_event(enum spdk_fc_event event_type, + void *args, + spdk_nvmf_fc_callback cb_func); + +/* + * dump info + */ +struct spdk_nvmf_fc_queue_dump_info { + char *buffer; + int offset; +}; +#define SPDK_FC_HW_DUMP_BUF_SIZE (10 * 4096) + +static inline void +nvmf_fc_dump_buf_print(struct spdk_nvmf_fc_queue_dump_info *dump_info, char *fmt, ...) +{ + uint64_t buffer_size = SPDK_FC_HW_DUMP_BUF_SIZE; + int32_t avail = (int32_t)(buffer_size - dump_info->offset); + + if (avail > 0) { + va_list ap; + int32_t written; + + va_start(ap, fmt); + written = vsnprintf(dump_info->buffer + dump_info->offset, avail, fmt, ap); + if (written >= avail) { + dump_info->offset += avail; + } else { + dump_info->offset += written; + } + va_end(ap); + } +} + +/* + * NVMF FC caller callback definitions + */ +typedef void (*spdk_nvmf_fc_caller_cb)(void *hwqp, int32_t status, void *args); + +struct spdk_nvmf_fc_caller_ctx { + void *ctx; + spdk_nvmf_fc_caller_cb cb; + void *cb_args; + TAILQ_ENTRY(spdk_nvmf_fc_caller_ctx) link; +}; + +/* + * NVMF FC Exchange Info (for debug) + */ +struct spdk_nvmf_fc_xchg_info { + uint32_t xchg_base; + uint32_t xchg_total_count; + uint32_t xchg_avail_count; + uint32_t send_frame_xchg_id; + uint8_t send_frame_seqid; +}; + +/* + * NVMF FC inline and function prototypes + */ + +static inline struct spdk_nvmf_fc_request * +nvmf_fc_get_fc_req(struct spdk_nvmf_request *req) +{ + return (struct spdk_nvmf_fc_request *) + ((uintptr_t)req - offsetof(struct spdk_nvmf_fc_request, req)); +} + +static inline bool +nvmf_fc_is_port_dead(struct spdk_nvmf_fc_hwqp *hwqp) +{ + switch (hwqp->fc_port->hw_port_status) { + case SPDK_FC_PORT_QUIESCED: + return true; + default: + return false; + } +} + +static inline bool +nvmf_fc_req_in_xfer(struct spdk_nvmf_fc_request *fc_req) +{ + switch (fc_req->state) { + case SPDK_NVMF_FC_REQ_READ_XFER: + case SPDK_NVMF_FC_REQ_READ_RSP: + case SPDK_NVMF_FC_REQ_WRITE_XFER: + case SPDK_NVMF_FC_REQ_WRITE_RSP: + case SPDK_NVMF_FC_REQ_NONE_RSP: + return true; + default: + return false; + } +} + +static inline void +nvmf_fc_create_trid(struct spdk_nvme_transport_id *trid, uint64_t n_wwn, uint64_t p_wwn) +{ + spdk_nvme_trid_populate_transport(trid, SPDK_NVME_TRANSPORT_FC); + trid->adrfam = SPDK_NVMF_ADRFAM_FC; + snprintf(trid->trsvcid, sizeof(trid->trsvcid), "none"); + snprintf(trid->traddr, sizeof(trid->traddr), "nn-0x%lx:pn-0x%lx", n_wwn, p_wwn); +} + +void nvmf_fc_ls_init(struct spdk_nvmf_fc_port *fc_port); + +void nvmf_fc_ls_fini(struct spdk_nvmf_fc_port *fc_port); + +void nvmf_fc_handle_ls_rqst(struct spdk_nvmf_fc_ls_rqst *ls_rqst); +void nvmf_fc_ls_add_conn_failure( + struct spdk_nvmf_fc_association *assoc, + struct spdk_nvmf_fc_ls_rqst *ls_rqst, + struct spdk_nvmf_fc_conn *fc_conn, + bool aq_conn); + +void nvmf_fc_init_hwqp(struct spdk_nvmf_fc_port *fc_port, struct spdk_nvmf_fc_hwqp *hwqp); + +void nvmf_fc_init_poller_queues(struct spdk_nvmf_fc_hwqp *hwqp); + +struct spdk_nvmf_fc_conn *nvmf_fc_hwqp_find_fc_conn(struct spdk_nvmf_fc_hwqp *hwqp, + uint64_t conn_id); + +void nvmf_fc_hwqp_reinit_poller_queues(struct spdk_nvmf_fc_hwqp *hwqp, void *queues_curr); + +struct spdk_nvmf_fc_port *nvmf_fc_port_lookup(uint8_t port_hdl); + +bool nvmf_fc_port_is_offline(struct spdk_nvmf_fc_port *fc_port); + +int nvmf_fc_port_set_offline(struct spdk_nvmf_fc_port *fc_port); + +bool nvmf_fc_port_is_online(struct spdk_nvmf_fc_port *fc_port); + +int nvmf_fc_port_set_online(struct spdk_nvmf_fc_port *fc_port); + +int nvmf_fc_rport_set_state(struct spdk_nvmf_fc_remote_port_info *rport, + enum spdk_nvmf_fc_object_state state); + +void nvmf_fc_port_add(struct spdk_nvmf_fc_port *fc_port); + +int nvmf_fc_port_add_nport(struct spdk_nvmf_fc_port *fc_port, + struct spdk_nvmf_fc_nport *nport); + +int nvmf_fc_port_remove_nport(struct spdk_nvmf_fc_port *fc_port, + struct spdk_nvmf_fc_nport *nport); + +struct spdk_nvmf_fc_nport *nvmf_fc_nport_find(uint8_t port_hdl, uint16_t nport_hdl); + +int nvmf_fc_nport_set_state(struct spdk_nvmf_fc_nport *nport, + enum spdk_nvmf_fc_object_state state); + +bool nvmf_fc_nport_add_rem_port(struct spdk_nvmf_fc_nport *nport, + struct spdk_nvmf_fc_remote_port_info *rem_port); + +bool nvmf_fc_nport_remove_rem_port(struct spdk_nvmf_fc_nport *nport, + struct spdk_nvmf_fc_remote_port_info *rem_port); + +bool nvmf_fc_nport_has_no_rport(struct spdk_nvmf_fc_nport *nport); + +int nvmf_fc_assoc_set_state(struct spdk_nvmf_fc_association *assoc, + enum spdk_nvmf_fc_object_state state); + +int nvmf_fc_delete_association(struct spdk_nvmf_fc_nport *tgtport, + uint64_t assoc_id, bool send_abts, bool backend_initiated, + spdk_nvmf_fc_del_assoc_cb del_assoc_cb, + void *cb_data); + +bool nvmf_ctrlr_is_on_nport(uint8_t port_hdl, uint16_t nport_hdl, + struct spdk_nvmf_ctrlr *ctrlr); + +void nvmf_fc_assign_queue_to_master_thread(struct spdk_nvmf_fc_hwqp *hwqp); + +void nvmf_fc_poll_group_add_hwqp(struct spdk_nvmf_fc_hwqp *hwqp); + +void nvmf_fc_poll_group_remove_hwqp(struct spdk_nvmf_fc_hwqp *hwqp); + +int nvmf_fc_hwqp_set_online(struct spdk_nvmf_fc_hwqp *hwqp); + +int nvmf_fc_hwqp_set_offline(struct spdk_nvmf_fc_hwqp *hwqp); + +uint32_t nvmf_fc_get_prli_service_params(void); + +void nvmf_fc_handle_abts_frame(struct spdk_nvmf_fc_nport *nport, uint16_t rpi, uint16_t oxid, + uint16_t rxid); + +void nvmf_fc_request_abort(struct spdk_nvmf_fc_request *fc_req, bool send_abts, + spdk_nvmf_fc_caller_cb cb, void *cb_args); + +struct spdk_nvmf_tgt *nvmf_fc_get_tgt(void); + +struct spdk_thread *nvmf_fc_get_master_thread(void); + +/* + * These functions are called by low level FC driver + */ + +static inline struct spdk_nvmf_fc_conn * +nvmf_fc_get_conn(struct spdk_nvmf_qpair *qpair) +{ + return (struct spdk_nvmf_fc_conn *) + ((uintptr_t)qpair - offsetof(struct spdk_nvmf_fc_conn, qpair)); +} + +static inline uint16_t +nvmf_fc_advance_conn_sqhead(struct spdk_nvmf_qpair *qpair) +{ + /* advance sq_head pointer - wrap if needed */ + qpair->sq_head = (qpair->sq_head == qpair->sq_head_max) ? + 0 : (qpair->sq_head + 1); + return qpair->sq_head; +} + +static inline bool +nvmf_fc_use_send_frame(struct spdk_nvmf_request *req) +{ + /* For now use for only keepalives. */ + if (req->qpair->qid == 0 && + (req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_KEEP_ALIVE)) { + return true; + } + return false; +} + +enum spdk_nvmf_fc_poller_api_ret nvmf_fc_poller_api_func( + struct spdk_nvmf_fc_hwqp *hwqp, + enum spdk_nvmf_fc_poller_api api, + void *api_args); + +int nvmf_fc_hwqp_process_frame(struct spdk_nvmf_fc_hwqp *hwqp, uint32_t buff_idx, + struct spdk_nvmf_fc_frame_hdr *frame, + struct spdk_nvmf_fc_buffer_desc *buffer, uint32_t plen); + +void nvmf_fc_hwqp_process_pending_reqs(struct spdk_nvmf_fc_hwqp *hwqp); + +void nvmf_fc_hwqp_process_pending_ls_rqsts(struct spdk_nvmf_fc_hwqp *hwqp); + +void nvmf_fc_request_set_state(struct spdk_nvmf_fc_request *fc_req, + enum spdk_nvmf_fc_request_state state); + +char *nvmf_fc_request_get_state_str(int state); + +void _nvmf_fc_request_free(struct spdk_nvmf_fc_request *fc_req); + +void nvmf_fc_request_abort_complete(void *arg1); + +bool nvmf_fc_send_ersp_required(struct spdk_nvmf_fc_request *fc_req, + uint32_t rsp_cnt, uint32_t xfer_len); + +int nvmf_fc_handle_rsp(struct spdk_nvmf_fc_request *req); + +#endif diff --git a/src/spdk/lib/nvmf/nvmf_internal.h b/src/spdk/lib/nvmf/nvmf_internal.h new file mode 100644 index 000000000..f1f3837d5 --- /dev/null +++ b/src/spdk/lib/nvmf/nvmf_internal.h @@ -0,0 +1,371 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __NVMF_INTERNAL_H__ +#define __NVMF_INTERNAL_H__ + +#include "spdk/stdinc.h" + +#include "spdk/likely.h" +#include "spdk/nvmf.h" +#include "spdk/nvmf_cmd.h" +#include "spdk/nvmf_transport.h" +#include "spdk/nvmf_spec.h" +#include "spdk/assert.h" +#include "spdk/bdev.h" +#include "spdk/queue.h" +#include "spdk/util.h" +#include "spdk/thread.h" + +#define NVMF_MAX_ASYNC_EVENTS (4) + +enum spdk_nvmf_subsystem_state { + SPDK_NVMF_SUBSYSTEM_INACTIVE = 0, + SPDK_NVMF_SUBSYSTEM_ACTIVATING, + SPDK_NVMF_SUBSYSTEM_ACTIVE, + SPDK_NVMF_SUBSYSTEM_PAUSING, + SPDK_NVMF_SUBSYSTEM_PAUSED, + SPDK_NVMF_SUBSYSTEM_RESUMING, + SPDK_NVMF_SUBSYSTEM_DEACTIVATING, +}; + +struct spdk_nvmf_tgt { + char name[NVMF_TGT_NAME_MAX_LENGTH]; + + pthread_mutex_t mutex; + + uint64_t discovery_genctr; + + uint32_t max_subsystems; + + /* Array of subsystem pointers of size max_subsystems indexed by sid */ + struct spdk_nvmf_subsystem **subsystems; + + TAILQ_HEAD(, spdk_nvmf_transport) transports; + TAILQ_HEAD(, spdk_nvmf_poll_group) poll_groups; + + /* Used for round-robin assignment of connections to poll groups */ + struct spdk_nvmf_poll_group *next_poll_group; + + spdk_nvmf_tgt_destroy_done_fn *destroy_cb_fn; + void *destroy_cb_arg; + + TAILQ_ENTRY(spdk_nvmf_tgt) link; +}; + +struct spdk_nvmf_host { + char nqn[SPDK_NVMF_NQN_MAX_LEN + 1]; + TAILQ_ENTRY(spdk_nvmf_host) link; +}; + +struct spdk_nvmf_subsystem_listener { + struct spdk_nvmf_subsystem *subsystem; + spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn; + void *cb_arg; + struct spdk_nvme_transport_id *trid; + struct spdk_nvmf_transport *transport; + TAILQ_ENTRY(spdk_nvmf_subsystem_listener) link; +}; + +/* Maximum number of registrants supported per namespace */ +#define SPDK_NVMF_MAX_NUM_REGISTRANTS 16 + +struct spdk_nvmf_registrant_info { + uint64_t rkey; + char host_uuid[SPDK_UUID_STRING_LEN]; +}; + +struct spdk_nvmf_reservation_info { + bool ptpl_activated; + enum spdk_nvme_reservation_type rtype; + uint64_t crkey; + char bdev_uuid[SPDK_UUID_STRING_LEN]; + char holder_uuid[SPDK_UUID_STRING_LEN]; + uint32_t num_regs; + struct spdk_nvmf_registrant_info registrants[SPDK_NVMF_MAX_NUM_REGISTRANTS]; +}; + +struct spdk_nvmf_subsystem_pg_ns_info { + struct spdk_io_channel *channel; + struct spdk_uuid uuid; + /* current reservation key, no reservation if the value is 0 */ + uint64_t crkey; + /* reservation type */ + enum spdk_nvme_reservation_type rtype; + /* Host ID which holds the reservation */ + struct spdk_uuid holder_id; + /* Host ID for the registrants with the namespace */ + struct spdk_uuid reg_hostid[SPDK_NVMF_MAX_NUM_REGISTRANTS]; + uint64_t num_blocks; +}; + +typedef void(*spdk_nvmf_poll_group_mod_done)(void *cb_arg, int status); + +struct spdk_nvmf_subsystem_poll_group { + /* Array of namespace information for each namespace indexed by nsid - 1 */ + struct spdk_nvmf_subsystem_pg_ns_info *ns_info; + uint32_t num_ns; + + uint64_t io_outstanding; + spdk_nvmf_poll_group_mod_done cb_fn; + void *cb_arg; + + enum spdk_nvmf_subsystem_state state; + + TAILQ_HEAD(, spdk_nvmf_request) queued; +}; + +struct spdk_nvmf_registrant { + TAILQ_ENTRY(spdk_nvmf_registrant) link; + struct spdk_uuid hostid; + /* Registration key */ + uint64_t rkey; +}; + +struct spdk_nvmf_ns { + uint32_t nsid; + struct spdk_nvmf_subsystem *subsystem; + struct spdk_bdev *bdev; + struct spdk_bdev_desc *desc; + struct spdk_nvmf_ns_opts opts; + /* reservation notificaton mask */ + uint32_t mask; + /* generation code */ + uint32_t gen; + /* registrants head */ + TAILQ_HEAD(, spdk_nvmf_registrant) registrants; + /* current reservation key */ + uint64_t crkey; + /* reservation type */ + enum spdk_nvme_reservation_type rtype; + /* current reservation holder, only valid if reservation type can only have one holder */ + struct spdk_nvmf_registrant *holder; + /* Persist Through Power Loss file which contains the persistent reservation */ + char *ptpl_file; + /* Persist Through Power Loss feature is enabled */ + bool ptpl_activated; +}; + +struct spdk_nvmf_ctrlr_feat { + union spdk_nvme_feat_arbitration arbitration; + union spdk_nvme_feat_power_management power_management; + union spdk_nvme_feat_error_recovery error_recovery; + union spdk_nvme_feat_volatile_write_cache volatile_write_cache; + union spdk_nvme_feat_number_of_queues number_of_queues; + union spdk_nvme_feat_write_atomicity write_atomicity; + union spdk_nvme_feat_async_event_configuration async_event_configuration; + union spdk_nvme_feat_keep_alive_timer keep_alive_timer; +}; + +/* + * NVMf reservation notificaton log page. + */ +struct spdk_nvmf_reservation_log { + struct spdk_nvme_reservation_notification_log log; + TAILQ_ENTRY(spdk_nvmf_reservation_log) link; + struct spdk_nvmf_ctrlr *ctrlr; +}; + +/* + * This structure represents an NVMe-oF controller, + * which is like a "session" in networking terms. + */ +struct spdk_nvmf_ctrlr { + uint16_t cntlid; + char hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; + struct spdk_nvmf_subsystem *subsys; + + struct spdk_nvmf_ctrlr_data cdata; + + struct spdk_nvmf_registers vcprop; + + struct spdk_nvmf_ctrlr_feat feat; + + struct spdk_nvmf_qpair *admin_qpair; + struct spdk_thread *thread; + struct spdk_bit_array *qpair_mask; + + struct spdk_nvmf_request *aer_req[NVMF_MAX_ASYNC_EVENTS]; + union spdk_nvme_async_event_completion notice_event; + union spdk_nvme_async_event_completion reservation_event; + uint8_t nr_aer_reqs; + struct spdk_uuid hostid; + + uint16_t changed_ns_list_count; + struct spdk_nvme_ns_list changed_ns_list; + uint64_t log_page_count; + uint8_t num_avail_log_pages; + TAILQ_HEAD(log_page_head, spdk_nvmf_reservation_log) log_head; + + /* Time to trigger keep-alive--poller_time = now_tick + period */ + uint64_t last_keep_alive_tick; + struct spdk_poller *keep_alive_poller; + + bool dif_insert_or_strip; + + TAILQ_ENTRY(spdk_nvmf_ctrlr) link; +}; + +struct spdk_nvmf_subsystem { + struct spdk_thread *thread; + uint32_t id; + enum spdk_nvmf_subsystem_state state; + + char subnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; + enum spdk_nvmf_subtype subtype; + uint16_t next_cntlid; + bool allow_any_host; + bool allow_any_listener; + + struct spdk_nvmf_tgt *tgt; + + char sn[SPDK_NVME_CTRLR_SN_LEN + 1]; + char mn[SPDK_NVME_CTRLR_MN_LEN + 1]; + + /* Array of pointers to namespaces of size max_nsid indexed by nsid - 1 */ + struct spdk_nvmf_ns **ns; + uint32_t max_nsid; + /* This is the maximum allowed nsid to a subsystem */ + uint32_t max_allowed_nsid; + + TAILQ_HEAD(, spdk_nvmf_ctrlr) ctrlrs; + TAILQ_HEAD(, spdk_nvmf_host) hosts; + TAILQ_HEAD(, spdk_nvmf_subsystem_listener) listeners; + + TAILQ_ENTRY(spdk_nvmf_subsystem) entries; +}; + +int nvmf_poll_group_add_transport(struct spdk_nvmf_poll_group *group, + struct spdk_nvmf_transport *transport); +int nvmf_poll_group_update_subsystem(struct spdk_nvmf_poll_group *group, + struct spdk_nvmf_subsystem *subsystem); +int nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group, + struct spdk_nvmf_subsystem *subsystem, + spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); +void nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group, + struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); +void nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group, + struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); +void nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group, + struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); + +void nvmf_get_discovery_log_page(struct spdk_nvmf_tgt *tgt, const char *hostnqn, + struct iovec *iov, + uint32_t iovcnt, uint64_t offset, uint32_t length); + +void nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr); +int nvmf_ctrlr_process_fabrics_cmd(struct spdk_nvmf_request *req); +int nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req); +int nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req); +bool nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr); +bool nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr); +void nvmf_ctrlr_ns_changed(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid); + +void nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata, + bool dif_insert_or_strip); +int nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req); +int nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req); +int nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req); +int nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req); +int nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req); +int nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req); +int nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req); +int nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, struct spdk_nvmf_request *req); +bool nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd, + struct spdk_dif_ctx *dif_ctx); + +int nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvmf_ctrlr *ctrlr); +void nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvmf_ctrlr *ctrlr); +void nvmf_subsystem_remove_all_listeners(struct spdk_nvmf_subsystem *subsystem, + bool stop); +struct spdk_nvmf_ctrlr *nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem, + uint16_t cntlid); +struct spdk_nvmf_subsystem_listener *nvmf_subsystem_find_listener( + struct spdk_nvmf_subsystem *subsystem, + const struct spdk_nvme_transport_id *trid); +struct spdk_nvmf_listener *nvmf_transport_find_listener( + struct spdk_nvmf_transport *transport, + const struct spdk_nvme_transport_id *trid); + +int nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr); +void nvmf_ctrlr_async_event_reservation_notification(struct spdk_nvmf_ctrlr *ctrlr); +void nvmf_ns_reservation_request(void *ctx); +void nvmf_ctrlr_reservation_notice_log(struct spdk_nvmf_ctrlr *ctrlr, + struct spdk_nvmf_ns *ns, + enum spdk_nvme_reservation_notification_log_page_type type); + +/* + * Abort aer is sent on a per controller basis and sends a completion for the aer to the host. + * This function should be called when attempting to recover in error paths when it is OK for + * the host to send a subsequent AER. + */ +void nvmf_ctrlr_abort_aer(struct spdk_nvmf_ctrlr *ctrlr); + +/* + * Free aer simply frees the rdma resources for the aer without informing the host. + * This function should be called when deleting a qpair when one wants to make sure + * the qpair is completely empty before freeing the request. The reason we free the + * AER without sending a completion is to prevent the host from sending another AER. + */ +void nvmf_qpair_free_aer(struct spdk_nvmf_qpair *qpair); + +int nvmf_ctrlr_abort_request(struct spdk_nvmf_request *req); + +static inline struct spdk_nvmf_ns * +_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid) +{ + /* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */ + if (spdk_unlikely(nsid - 1 >= subsystem->max_nsid)) { + return NULL; + } + + return subsystem->ns[nsid - 1]; +} + +static inline bool +nvmf_qpair_is_admin_queue(struct spdk_nvmf_qpair *qpair) +{ + return qpair->qid == 0; +} + +#endif /* __NVMF_INTERNAL_H__ */ diff --git a/src/spdk/lib/nvmf/nvmf_rpc.c b/src/spdk/lib/nvmf/nvmf_rpc.c new file mode 100644 index 000000000..5dc9f42f0 --- /dev/null +++ b/src/spdk/lib/nvmf/nvmf_rpc.c @@ -0,0 +1,2012 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2018-2020 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/bdev.h" +#include "spdk/log.h" +#include "spdk/rpc.h" +#include "spdk/env.h" +#include "spdk/nvme.h" +#include "spdk/nvmf.h" +#include "spdk/string.h" +#include "spdk/util.h" + +#include "spdk_internal/log.h" +#include "spdk_internal/assert.h" + +#include "nvmf_internal.h" + +static int +json_write_hex_str(struct spdk_json_write_ctx *w, const void *data, size_t size) +{ + static const char hex_char[16] = "0123456789ABCDEF"; + const uint8_t *buf = data; + char *str, *out; + int rc; + + str = malloc(size * 2 + 1); + if (str == NULL) { + return -1; + } + + out = str; + while (size--) { + unsigned byte = *buf++; + + out[0] = hex_char[(byte >> 4) & 0xF]; + out[1] = hex_char[byte & 0xF]; + + out += 2; + } + *out = '\0'; + + rc = spdk_json_write_string(w, str); + free(str); + + return rc; +} + +static int +hex_nybble_to_num(char c) +{ + if (c >= '0' && c <= '9') { + return c - '0'; + } + + if (c >= 'a' && c <= 'f') { + return c - 'a' + 0xA; + } + + if (c >= 'A' && c <= 'F') { + return c - 'A' + 0xA; + } + + return -1; +} + +static int +hex_byte_to_num(const char *str) +{ + int hi, lo; + + hi = hex_nybble_to_num(str[0]); + if (hi < 0) { + return hi; + } + + lo = hex_nybble_to_num(str[1]); + if (lo < 0) { + return lo; + } + + return hi * 16 + lo; +} + +static int +decode_hex_string_be(const char *str, uint8_t *out, size_t size) +{ + size_t i; + + /* Decode a string in "ABCDEF012345" format to its binary representation */ + for (i = 0; i < size; i++) { + int num = hex_byte_to_num(str); + + if (num < 0) { + /* Invalid hex byte or end of string */ + return -1; + } + + out[i] = (uint8_t)num; + str += 2; + } + + if (i != size || *str != '\0') { + /* Length mismatch */ + return -1; + } + + return 0; +} + +static int +decode_ns_nguid(const struct spdk_json_val *val, void *out) +{ + char *str = NULL; + int rc; + + rc = spdk_json_decode_string(val, &str); + if (rc == 0) { + /* 16-byte NGUID */ + rc = decode_hex_string_be(str, out, 16); + } + + free(str); + return rc; +} + +static int +decode_ns_eui64(const struct spdk_json_val *val, void *out) +{ + char *str = NULL; + int rc; + + rc = spdk_json_decode_string(val, &str); + if (rc == 0) { + /* 8-byte EUI-64 */ + rc = decode_hex_string_be(str, out, 8); + } + + free(str); + return rc; +} + +static int +decode_ns_uuid(const struct spdk_json_val *val, void *out) +{ + char *str = NULL; + int rc; + + rc = spdk_json_decode_string(val, &str); + if (rc == 0) { + rc = spdk_uuid_parse(out, str); + } + + free(str); + return rc; +} + +struct rpc_get_subsystem { + char *tgt_name; +}; + +static const struct spdk_json_object_decoder rpc_get_subsystem_decoders[] = { + {"tgt_name", offsetof(struct rpc_get_subsystem, tgt_name), spdk_json_decode_string, true}, +}; + +static void +dump_nvmf_subsystem(struct spdk_json_write_ctx *w, struct spdk_nvmf_subsystem *subsystem) +{ + struct spdk_nvmf_host *host; + struct spdk_nvmf_subsystem_listener *listener; + + spdk_json_write_object_begin(w); + + spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem)); + spdk_json_write_name(w, "subtype"); + if (spdk_nvmf_subsystem_get_type(subsystem) == SPDK_NVMF_SUBTYPE_NVME) { + spdk_json_write_string(w, "NVMe"); + } else { + spdk_json_write_string(w, "Discovery"); + } + + spdk_json_write_named_array_begin(w, "listen_addresses"); + + for (listener = spdk_nvmf_subsystem_get_first_listener(subsystem); listener != NULL; + listener = spdk_nvmf_subsystem_get_next_listener(subsystem, listener)) { + const struct spdk_nvme_transport_id *trid; + const char *adrfam; + + trid = spdk_nvmf_subsystem_listener_get_trid(listener); + + spdk_json_write_object_begin(w); + adrfam = spdk_nvme_transport_id_adrfam_str(trid->adrfam); + if (adrfam == NULL) { + adrfam = "unknown"; + } + /* NOTE: "transport" is kept for compatibility; new code should use "trtype" */ + spdk_json_write_named_string(w, "transport", trid->trstring); + spdk_json_write_named_string(w, "trtype", trid->trstring); + spdk_json_write_named_string(w, "adrfam", adrfam); + spdk_json_write_named_string(w, "traddr", trid->traddr); + spdk_json_write_named_string(w, "trsvcid", trid->trsvcid); + spdk_json_write_object_end(w); + } + spdk_json_write_array_end(w); + + spdk_json_write_named_bool(w, "allow_any_host", + spdk_nvmf_subsystem_get_allow_any_host(subsystem)); + + spdk_json_write_named_array_begin(w, "hosts"); + + for (host = spdk_nvmf_subsystem_get_first_host(subsystem); host != NULL; + host = spdk_nvmf_subsystem_get_next_host(subsystem, host)) { + spdk_json_write_object_begin(w); + spdk_json_write_named_string(w, "nqn", spdk_nvmf_host_get_nqn(host)); + spdk_json_write_object_end(w); + } + spdk_json_write_array_end(w); + + if (spdk_nvmf_subsystem_get_type(subsystem) == SPDK_NVMF_SUBTYPE_NVME) { + struct spdk_nvmf_ns *ns; + struct spdk_nvmf_ns_opts ns_opts; + uint32_t max_namespaces; + + spdk_json_write_named_string(w, "serial_number", spdk_nvmf_subsystem_get_sn(subsystem)); + + spdk_json_write_named_string(w, "model_number", spdk_nvmf_subsystem_get_mn(subsystem)); + + max_namespaces = spdk_nvmf_subsystem_get_max_namespaces(subsystem); + if (max_namespaces != 0) { + spdk_json_write_named_uint32(w, "max_namespaces", max_namespaces); + } + + spdk_json_write_named_array_begin(w, "namespaces"); + for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL; + ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) { + spdk_nvmf_ns_get_opts(ns, &ns_opts, sizeof(ns_opts)); + spdk_json_write_object_begin(w); + spdk_json_write_named_int32(w, "nsid", spdk_nvmf_ns_get_id(ns)); + spdk_json_write_named_string(w, "bdev_name", + spdk_bdev_get_name(spdk_nvmf_ns_get_bdev(ns))); + /* NOTE: "name" is kept for compatibility only - new code should use bdev_name. */ + spdk_json_write_named_string(w, "name", + spdk_bdev_get_name(spdk_nvmf_ns_get_bdev(ns))); + + if (!spdk_mem_all_zero(ns_opts.nguid, sizeof(ns_opts.nguid))) { + spdk_json_write_name(w, "nguid"); + json_write_hex_str(w, ns_opts.nguid, sizeof(ns_opts.nguid)); + } + + if (!spdk_mem_all_zero(ns_opts.eui64, sizeof(ns_opts.eui64))) { + spdk_json_write_name(w, "eui64"); + json_write_hex_str(w, ns_opts.eui64, sizeof(ns_opts.eui64)); + } + + if (!spdk_mem_all_zero(&ns_opts.uuid, sizeof(ns_opts.uuid))) { + char uuid_str[SPDK_UUID_STRING_LEN]; + + spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &ns_opts.uuid); + spdk_json_write_named_string(w, "uuid", uuid_str); + } + + spdk_json_write_object_end(w); + } + spdk_json_write_array_end(w); + } + spdk_json_write_object_end(w); +} + +static void +rpc_nvmf_get_subsystems(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_get_subsystem req = { 0 }; + struct spdk_json_write_ctx *w; + struct spdk_nvmf_subsystem *subsystem; + struct spdk_nvmf_tgt *tgt; + + if (params) { + if (spdk_json_decode_object(params, rpc_get_subsystem_decoders, + SPDK_COUNTOF(rpc_get_subsystem_decoders), + &req)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + return; + } + } + + tgt = spdk_nvmf_get_tgt(req.tgt_name); + if (!tgt) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Unable to find a target."); + free(req.tgt_name); + return; + } + + w = spdk_jsonrpc_begin_result(request); + spdk_json_write_array_begin(w); + subsystem = spdk_nvmf_subsystem_get_first(tgt); + while (subsystem) { + dump_nvmf_subsystem(w, subsystem); + subsystem = spdk_nvmf_subsystem_get_next(subsystem); + } + spdk_json_write_array_end(w); + spdk_jsonrpc_end_result(request, w); + free(req.tgt_name); +} +SPDK_RPC_REGISTER("nvmf_get_subsystems", rpc_nvmf_get_subsystems, SPDK_RPC_RUNTIME) +SPDK_RPC_REGISTER_ALIAS_DEPRECATED(nvmf_get_subsystems, get_nvmf_subsystems) + +struct rpc_subsystem_create { + char *nqn; + char *serial_number; + char *model_number; + char *tgt_name; + uint32_t max_namespaces; + bool allow_any_host; +}; + +static const struct spdk_json_object_decoder rpc_subsystem_create_decoders[] = { + {"nqn", offsetof(struct rpc_subsystem_create, nqn), spdk_json_decode_string}, + {"serial_number", offsetof(struct rpc_subsystem_create, serial_number), spdk_json_decode_string, true}, + {"model_number", offsetof(struct rpc_subsystem_create, model_number), spdk_json_decode_string, true}, + {"tgt_name", offsetof(struct rpc_subsystem_create, tgt_name), spdk_json_decode_string, true}, + {"max_namespaces", offsetof(struct rpc_subsystem_create, max_namespaces), spdk_json_decode_uint32, true}, + {"allow_any_host", offsetof(struct rpc_subsystem_create, allow_any_host), spdk_json_decode_bool, true}, +}; + +static void +rpc_nvmf_subsystem_started(struct spdk_nvmf_subsystem *subsystem, + void *cb_arg, int status) +{ + struct spdk_jsonrpc_request *request = cb_arg; + + if (!status) { + struct spdk_json_write_ctx *w = spdk_jsonrpc_begin_result(request); + spdk_json_write_bool(w, true); + spdk_jsonrpc_end_result(request, w); + } else { + spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Subsystem %s start failed", + subsystem->subnqn); + spdk_nvmf_subsystem_destroy(subsystem); + } +} + +static void +rpc_nvmf_create_subsystem(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_subsystem_create *req; + struct spdk_nvmf_subsystem *subsystem = NULL; + struct spdk_nvmf_tgt *tgt; + int rc = -1; + + req = calloc(1, sizeof(*req)); + if (!req) { + SPDK_ERRLOG("Memory allocation failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Memory allocation failed"); + return; + } + + if (spdk_json_decode_object(params, rpc_subsystem_create_decoders, + SPDK_COUNTOF(rpc_subsystem_create_decoders), + req)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + goto cleanup; + } + + tgt = spdk_nvmf_get_tgt(req->tgt_name); + if (!tgt) { + SPDK_ERRLOG("Unable to find target %s\n", req->tgt_name); + spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Unable to find target %s", req->tgt_name); + goto cleanup; + } + + subsystem = spdk_nvmf_subsystem_create(tgt, req->nqn, SPDK_NVMF_SUBTYPE_NVME, + req->max_namespaces); + if (!subsystem) { + SPDK_ERRLOG("Unable to create subsystem %s\n", req->nqn); + spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Unable to create subsystem %s", req->nqn); + goto cleanup; + } + + if (req->serial_number) { + if (spdk_nvmf_subsystem_set_sn(subsystem, req->serial_number)) { + SPDK_ERRLOG("Subsystem %s: invalid serial number '%s'\n", req->nqn, req->serial_number); + spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "Invalid SN %s", req->serial_number); + goto cleanup; + } + } + + if (req->model_number) { + if (spdk_nvmf_subsystem_set_mn(subsystem, req->model_number)) { + SPDK_ERRLOG("Subsystem %s: invalid model number '%s'\n", req->nqn, req->model_number); + spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "Invalid MN %s", req->model_number); + goto cleanup; + } + } + + spdk_nvmf_subsystem_set_allow_any_host(subsystem, req->allow_any_host); + + rc = spdk_nvmf_subsystem_start(subsystem, + rpc_nvmf_subsystem_started, + request); + +cleanup: + free(req->nqn); + free(req->tgt_name); + free(req->serial_number); + free(req->model_number); + free(req); + + if (rc && subsystem) { + spdk_nvmf_subsystem_destroy(subsystem); + } +} +SPDK_RPC_REGISTER("nvmf_create_subsystem", rpc_nvmf_create_subsystem, SPDK_RPC_RUNTIME) +SPDK_RPC_REGISTER_ALIAS_DEPRECATED(nvmf_create_subsystem, nvmf_subsystem_create) + +struct rpc_delete_subsystem { + char *nqn; + char *tgt_name; +}; + +static void +free_rpc_delete_subsystem(struct rpc_delete_subsystem *r) +{ + free(r->nqn); + free(r->tgt_name); +} + +static void +rpc_nvmf_subsystem_stopped(struct spdk_nvmf_subsystem *subsystem, + void *cb_arg, int status) +{ + struct spdk_jsonrpc_request *request = cb_arg; + struct spdk_json_write_ctx *w; + + nvmf_subsystem_remove_all_listeners(subsystem, true); + spdk_nvmf_subsystem_destroy(subsystem); + + w = spdk_jsonrpc_begin_result(request); + spdk_json_write_bool(w, true); + spdk_jsonrpc_end_result(request, w); +} + +static const struct spdk_json_object_decoder rpc_delete_subsystem_decoders[] = { + {"nqn", offsetof(struct rpc_delete_subsystem, nqn), spdk_json_decode_string}, + {"tgt_name", offsetof(struct rpc_delete_subsystem, tgt_name), spdk_json_decode_string, true}, +}; + +static void +rpc_nvmf_delete_subsystem(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_delete_subsystem req = { 0 }; + struct spdk_nvmf_subsystem *subsystem; + struct spdk_nvmf_tgt *tgt; + + if (spdk_json_decode_object(params, rpc_delete_subsystem_decoders, + SPDK_COUNTOF(rpc_delete_subsystem_decoders), + &req)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + goto invalid; + } + + if (req.nqn == NULL) { + SPDK_ERRLOG("missing name param\n"); + goto invalid; + } + + tgt = spdk_nvmf_get_tgt(req.tgt_name); + if (!tgt) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Unable to find a target."); + goto invalid_custom_response; + } + + subsystem = spdk_nvmf_tgt_find_subsystem(tgt, req.nqn); + if (!subsystem) { + goto invalid; + } + + free_rpc_delete_subsystem(&req); + + spdk_nvmf_subsystem_stop(subsystem, + rpc_nvmf_subsystem_stopped, + request); + + return; + +invalid: + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); +invalid_custom_response: + free_rpc_delete_subsystem(&req); +} +SPDK_RPC_REGISTER("nvmf_delete_subsystem", rpc_nvmf_delete_subsystem, SPDK_RPC_RUNTIME) +SPDK_RPC_REGISTER_ALIAS_DEPRECATED(nvmf_delete_subsystem, delete_nvmf_subsystem) + +struct rpc_listen_address { + char *transport; + char *adrfam; + char *traddr; + char *trsvcid; +}; + +#define RPC_MAX_LISTEN_ADDRESSES 255 +#define RPC_MAX_NAMESPACES 255 + +struct rpc_listen_addresses { + size_t num_listen_address; + struct rpc_listen_address addresses[RPC_MAX_LISTEN_ADDRESSES]; +}; + +static const struct spdk_json_object_decoder rpc_listen_address_decoders[] = { + /* NOTE: "transport" is kept for compatibility; new code should use "trtype" */ + {"transport", offsetof(struct rpc_listen_address, transport), spdk_json_decode_string, true}, + {"trtype", offsetof(struct rpc_listen_address, transport), spdk_json_decode_string, true}, + {"adrfam", offsetof(struct rpc_listen_address, adrfam), spdk_json_decode_string, true}, + {"traddr", offsetof(struct rpc_listen_address, traddr), spdk_json_decode_string}, + {"trsvcid", offsetof(struct rpc_listen_address, trsvcid), spdk_json_decode_string}, +}; + +static int +decode_rpc_listen_address(const struct spdk_json_val *val, void *out) +{ + struct rpc_listen_address *req = (struct rpc_listen_address *)out; + if (spdk_json_decode_object(val, rpc_listen_address_decoders, + SPDK_COUNTOF(rpc_listen_address_decoders), + req)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + return -1; + } + return 0; +} + +static void +free_rpc_listen_address(struct rpc_listen_address *r) +{ + free(r->transport); + free(r->adrfam); + free(r->traddr); + free(r->trsvcid); +} + +enum nvmf_rpc_listen_op { + NVMF_RPC_LISTEN_ADD, + NVMF_RPC_LISTEN_REMOVE, +}; + +struct nvmf_rpc_listener_ctx { + char *nqn; + char *tgt_name; + struct spdk_nvmf_tgt *tgt; + struct spdk_nvmf_subsystem *subsystem; + struct rpc_listen_address address; + + struct spdk_jsonrpc_request *request; + struct spdk_nvme_transport_id trid; + enum nvmf_rpc_listen_op op; + bool response_sent; +}; + +static const struct spdk_json_object_decoder nvmf_rpc_listener_decoder[] = { + {"nqn", offsetof(struct nvmf_rpc_listener_ctx, nqn), spdk_json_decode_string}, + {"listen_address", offsetof(struct nvmf_rpc_listener_ctx, address), decode_rpc_listen_address}, + {"tgt_name", offsetof(struct nvmf_rpc_listener_ctx, tgt_name), spdk_json_decode_string, true}, +}; + +static void +nvmf_rpc_listener_ctx_free(struct nvmf_rpc_listener_ctx *ctx) +{ + free(ctx->nqn); + free(ctx->tgt_name); + free_rpc_listen_address(&ctx->address); + free(ctx); +} + +static void +nvmf_rpc_listen_resumed(struct spdk_nvmf_subsystem *subsystem, + void *cb_arg, int status) +{ + struct nvmf_rpc_listener_ctx *ctx = cb_arg; + struct spdk_jsonrpc_request *request; + struct spdk_json_write_ctx *w; + + request = ctx->request; + if (ctx->response_sent) { + /* If an error occurred, the response has already been sent. */ + nvmf_rpc_listener_ctx_free(ctx); + return; + } + + nvmf_rpc_listener_ctx_free(ctx); + + w = spdk_jsonrpc_begin_result(request); + spdk_json_write_bool(w, true); + spdk_jsonrpc_end_result(request, w); +} + +static void +nvmf_rpc_subsystem_listen(void *cb_arg, int status) +{ + struct nvmf_rpc_listener_ctx *ctx = cb_arg; + + if (status) { + /* Destroy the listener that we just created. Ignore the error code because + * the RPC is failing already anyway. */ + spdk_nvmf_tgt_stop_listen(ctx->tgt, &ctx->trid); + + spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "Invalid parameters"); + ctx->response_sent = true; + } + + if (spdk_nvmf_subsystem_resume(ctx->subsystem, nvmf_rpc_listen_resumed, ctx)) { + if (!ctx->response_sent) { + spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error"); + } + nvmf_rpc_listener_ctx_free(ctx); + /* Can't really do anything to recover here - subsystem will remain paused. */ + } +} + +static void +nvmf_rpc_listen_paused(struct spdk_nvmf_subsystem *subsystem, + void *cb_arg, int status) +{ + struct nvmf_rpc_listener_ctx *ctx = cb_arg; + int rc; + + if (ctx->op == NVMF_RPC_LISTEN_ADD) { + if (!nvmf_subsystem_find_listener(subsystem, &ctx->trid)) { + rc = spdk_nvmf_tgt_listen(ctx->tgt, &ctx->trid); + if (rc == 0) { + spdk_nvmf_subsystem_add_listener(ctx->subsystem, &ctx->trid, nvmf_rpc_subsystem_listen, ctx); + return; + } + + spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "Invalid parameters"); + ctx->response_sent = true; + } + } else if (ctx->op == NVMF_RPC_LISTEN_REMOVE) { + if (spdk_nvmf_subsystem_remove_listener(subsystem, &ctx->trid)) { + SPDK_ERRLOG("Unable to remove listener.\n"); + spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "Invalid parameters"); + ctx->response_sent = true; + } + spdk_nvmf_tgt_stop_listen(ctx->tgt, &ctx->trid); + } else { + SPDK_UNREACHABLE(); + } + + if (spdk_nvmf_subsystem_resume(subsystem, nvmf_rpc_listen_resumed, ctx)) { + if (!ctx->response_sent) { + spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error"); + } + nvmf_rpc_listener_ctx_free(ctx); + /* Can't really do anything to recover here - subsystem will remain paused. */ + } +} + +static int +rpc_listen_address_to_trid(const struct rpc_listen_address *address, + struct spdk_nvme_transport_id *trid) +{ + size_t len; + + memset(trid, 0, sizeof(*trid)); + + if (spdk_nvme_transport_id_populate_trstring(trid, address->transport)) { + SPDK_ERRLOG("Invalid transport string: %s\n", address->transport); + return -EINVAL; + } + + if (spdk_nvme_transport_id_parse_trtype(&trid->trtype, address->transport)) { + SPDK_ERRLOG("Invalid transport type: %s\n", address->transport); + return -EINVAL; + } + + if (address->adrfam) { + if (spdk_nvme_transport_id_parse_adrfam(&trid->adrfam, address->adrfam)) { + SPDK_ERRLOG("Invalid adrfam: %s\n", address->adrfam); + return -EINVAL; + } + } else { + trid->adrfam = SPDK_NVMF_ADRFAM_IPV4; + } + + len = strlen(address->traddr); + if (len > sizeof(trid->traddr) - 1) { + SPDK_ERRLOG("Transport address longer than %zu characters: %s\n", + sizeof(trid->traddr) - 1, address->traddr); + return -EINVAL; + } + memcpy(trid->traddr, address->traddr, len + 1); + + len = strlen(address->trsvcid); + if (len > sizeof(trid->trsvcid) - 1) { + SPDK_ERRLOG("Transport service id longer than %zu characters: %s\n", + sizeof(trid->trsvcid) - 1, address->trsvcid); + return -EINVAL; + } + memcpy(trid->trsvcid, address->trsvcid, len + 1); + + return 0; +} + +static void +rpc_nvmf_subsystem_add_listener(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct nvmf_rpc_listener_ctx *ctx; + struct spdk_nvmf_subsystem *subsystem; + struct spdk_nvmf_tgt *tgt; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory"); + return; + } + + ctx->request = request; + + if (spdk_json_decode_object(params, nvmf_rpc_listener_decoder, + SPDK_COUNTOF(nvmf_rpc_listener_decoder), + ctx)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + nvmf_rpc_listener_ctx_free(ctx); + return; + } + + tgt = spdk_nvmf_get_tgt(ctx->tgt_name); + if (!tgt) { + SPDK_ERRLOG("Unable to find a target object.\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Unable to find a target."); + nvmf_rpc_listener_ctx_free(ctx); + return; + } + ctx->tgt = tgt; + + subsystem = spdk_nvmf_tgt_find_subsystem(tgt, ctx->nqn); + if (!subsystem) { + SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + nvmf_rpc_listener_ctx_free(ctx); + return; + } + + ctx->subsystem = subsystem; + + if (rpc_listen_address_to_trid(&ctx->address, &ctx->trid)) { + spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "Invalid parameters"); + nvmf_rpc_listener_ctx_free(ctx); + return; + } + + ctx->op = NVMF_RPC_LISTEN_ADD; + + if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_listen_paused, ctx)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error"); + nvmf_rpc_listener_ctx_free(ctx); + } +} +SPDK_RPC_REGISTER("nvmf_subsystem_add_listener", rpc_nvmf_subsystem_add_listener, + SPDK_RPC_RUNTIME); + +static void +rpc_nvmf_subsystem_remove_listener(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct nvmf_rpc_listener_ctx *ctx; + struct spdk_nvmf_subsystem *subsystem; + struct spdk_nvmf_tgt *tgt; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory"); + return; + } + + ctx->request = request; + + if (spdk_json_decode_object(params, nvmf_rpc_listener_decoder, + SPDK_COUNTOF(nvmf_rpc_listener_decoder), + ctx)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + nvmf_rpc_listener_ctx_free(ctx); + return; + } + + tgt = spdk_nvmf_get_tgt(ctx->tgt_name); + if (!tgt) { + SPDK_ERRLOG("Unable to find a target object.\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Unable to find a target."); + nvmf_rpc_listener_ctx_free(ctx); + return; + } + ctx->tgt = tgt; + + subsystem = spdk_nvmf_tgt_find_subsystem(tgt, ctx->nqn); + if (!subsystem) { + SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + nvmf_rpc_listener_ctx_free(ctx); + return; + } + + ctx->subsystem = subsystem; + + if (rpc_listen_address_to_trid(&ctx->address, &ctx->trid)) { + spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "Invalid parameters"); + nvmf_rpc_listener_ctx_free(ctx); + return; + } + + ctx->op = NVMF_RPC_LISTEN_REMOVE; + + if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_listen_paused, ctx)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error"); + nvmf_rpc_listener_ctx_free(ctx); + } +} +SPDK_RPC_REGISTER("nvmf_subsystem_remove_listener", rpc_nvmf_subsystem_remove_listener, + SPDK_RPC_RUNTIME); + +struct spdk_nvmf_ns_params { + char *bdev_name; + char *ptpl_file; + uint32_t nsid; + char nguid[16]; + char eui64[8]; + struct spdk_uuid uuid; +}; + +struct rpc_namespaces { + size_t num_ns; + struct spdk_nvmf_ns_params ns_params[RPC_MAX_NAMESPACES]; +}; + + +static const struct spdk_json_object_decoder rpc_ns_params_decoders[] = { + {"nsid", offsetof(struct spdk_nvmf_ns_params, nsid), spdk_json_decode_uint32, true}, + {"bdev_name", offsetof(struct spdk_nvmf_ns_params, bdev_name), spdk_json_decode_string}, + {"ptpl_file", offsetof(struct spdk_nvmf_ns_params, ptpl_file), spdk_json_decode_string, true}, + {"nguid", offsetof(struct spdk_nvmf_ns_params, nguid), decode_ns_nguid, true}, + {"eui64", offsetof(struct spdk_nvmf_ns_params, eui64), decode_ns_eui64, true}, + {"uuid", offsetof(struct spdk_nvmf_ns_params, uuid), decode_ns_uuid, true}, +}; + +static int +decode_rpc_ns_params(const struct spdk_json_val *val, void *out) +{ + struct spdk_nvmf_ns_params *ns_params = out; + + return spdk_json_decode_object(val, rpc_ns_params_decoders, + SPDK_COUNTOF(rpc_ns_params_decoders), + ns_params); +} + +struct nvmf_rpc_ns_ctx { + char *nqn; + char *tgt_name; + struct spdk_nvmf_ns_params ns_params; + + struct spdk_jsonrpc_request *request; + bool response_sent; +}; + +static const struct spdk_json_object_decoder nvmf_rpc_subsystem_ns_decoder[] = { + {"nqn", offsetof(struct nvmf_rpc_ns_ctx, nqn), spdk_json_decode_string}, + {"namespace", offsetof(struct nvmf_rpc_ns_ctx, ns_params), decode_rpc_ns_params}, + {"tgt_name", offsetof(struct nvmf_rpc_ns_ctx, tgt_name), spdk_json_decode_string, true}, +}; + +static void +nvmf_rpc_ns_ctx_free(struct nvmf_rpc_ns_ctx *ctx) +{ + free(ctx->nqn); + free(ctx->tgt_name); + free(ctx->ns_params.bdev_name); + free(ctx->ns_params.ptpl_file); + free(ctx); +} + +static void +nvmf_rpc_ns_resumed(struct spdk_nvmf_subsystem *subsystem, + void *cb_arg, int status) +{ + struct nvmf_rpc_ns_ctx *ctx = cb_arg; + struct spdk_jsonrpc_request *request = ctx->request; + uint32_t nsid = ctx->ns_params.nsid; + bool response_sent = ctx->response_sent; + struct spdk_json_write_ctx *w; + + nvmf_rpc_ns_ctx_free(ctx); + + if (response_sent) { + return; + } + + w = spdk_jsonrpc_begin_result(request); + spdk_json_write_uint32(w, nsid); + spdk_jsonrpc_end_result(request, w); +} + +static void +nvmf_rpc_ns_paused(struct spdk_nvmf_subsystem *subsystem, + void *cb_arg, int status) +{ + struct nvmf_rpc_ns_ctx *ctx = cb_arg; + struct spdk_nvmf_ns_opts ns_opts; + struct spdk_bdev *bdev; + + bdev = spdk_bdev_get_by_name(ctx->ns_params.bdev_name); + if (!bdev) { + SPDK_ERRLOG("No bdev with name %s\n", ctx->ns_params.bdev_name); + spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "Invalid parameters"); + ctx->response_sent = true; + goto resume; + } + + spdk_nvmf_ns_opts_get_defaults(&ns_opts, sizeof(ns_opts)); + ns_opts.nsid = ctx->ns_params.nsid; + + SPDK_STATIC_ASSERT(sizeof(ns_opts.nguid) == sizeof(ctx->ns_params.nguid), "size mismatch"); + memcpy(ns_opts.nguid, ctx->ns_params.nguid, sizeof(ns_opts.nguid)); + + SPDK_STATIC_ASSERT(sizeof(ns_opts.eui64) == sizeof(ctx->ns_params.eui64), "size mismatch"); + memcpy(ns_opts.eui64, ctx->ns_params.eui64, sizeof(ns_opts.eui64)); + + if (!spdk_mem_all_zero(&ctx->ns_params.uuid, sizeof(ctx->ns_params.uuid))) { + ns_opts.uuid = ctx->ns_params.uuid; + } + + ctx->ns_params.nsid = spdk_nvmf_subsystem_add_ns(subsystem, bdev, &ns_opts, sizeof(ns_opts), + ctx->ns_params.ptpl_file); + if (ctx->ns_params.nsid == 0) { + SPDK_ERRLOG("Unable to add namespace\n"); + spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "Invalid parameters"); + ctx->response_sent = true; + goto resume; + } + +resume: + if (spdk_nvmf_subsystem_resume(subsystem, nvmf_rpc_ns_resumed, ctx)) { + spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error"); + nvmf_rpc_ns_ctx_free(ctx); + } +} + +static void +rpc_nvmf_subsystem_add_ns(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct nvmf_rpc_ns_ctx *ctx; + struct spdk_nvmf_subsystem *subsystem; + struct spdk_nvmf_tgt *tgt; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory"); + return; + } + + if (spdk_json_decode_object(params, nvmf_rpc_subsystem_ns_decoder, + SPDK_COUNTOF(nvmf_rpc_subsystem_ns_decoder), + ctx)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + nvmf_rpc_ns_ctx_free(ctx); + return; + } + + ctx->request = request; + ctx->response_sent = false; + + tgt = spdk_nvmf_get_tgt(ctx->tgt_name); + if (!tgt) { + SPDK_ERRLOG("Unable to find a target object.\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Unable to find a target."); + nvmf_rpc_ns_ctx_free(ctx); + return; + } + + subsystem = spdk_nvmf_tgt_find_subsystem(tgt, ctx->nqn); + if (!subsystem) { + SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + nvmf_rpc_ns_ctx_free(ctx); + return; + } + + if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_ns_paused, ctx)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error"); + nvmf_rpc_ns_ctx_free(ctx); + } +} +SPDK_RPC_REGISTER("nvmf_subsystem_add_ns", rpc_nvmf_subsystem_add_ns, SPDK_RPC_RUNTIME) + +struct nvmf_rpc_remove_ns_ctx { + char *nqn; + char *tgt_name; + uint32_t nsid; + + struct spdk_jsonrpc_request *request; + bool response_sent; +}; + +static const struct spdk_json_object_decoder nvmf_rpc_subsystem_remove_ns_decoder[] = { + {"nqn", offsetof(struct nvmf_rpc_remove_ns_ctx, nqn), spdk_json_decode_string}, + {"nsid", offsetof(struct nvmf_rpc_remove_ns_ctx, nsid), spdk_json_decode_uint32}, + {"tgt_name", offsetof(struct nvmf_rpc_remove_ns_ctx, tgt_name), spdk_json_decode_string, true}, +}; + +static void +nvmf_rpc_remove_ns_ctx_free(struct nvmf_rpc_remove_ns_ctx *ctx) +{ + free(ctx->nqn); + free(ctx->tgt_name); + free(ctx); +} + +static void +nvmf_rpc_remove_ns_resumed(struct spdk_nvmf_subsystem *subsystem, + void *cb_arg, int status) +{ + struct nvmf_rpc_remove_ns_ctx *ctx = cb_arg; + struct spdk_jsonrpc_request *request = ctx->request; + bool response_sent = ctx->response_sent; + struct spdk_json_write_ctx *w; + + nvmf_rpc_remove_ns_ctx_free(ctx); + + if (response_sent) { + return; + } + + w = spdk_jsonrpc_begin_result(request); + spdk_json_write_bool(w, true); + spdk_jsonrpc_end_result(request, w); +} + +static void +nvmf_rpc_remove_ns_paused(struct spdk_nvmf_subsystem *subsystem, + void *cb_arg, int status) +{ + struct nvmf_rpc_remove_ns_ctx *ctx = cb_arg; + int ret; + + ret = spdk_nvmf_subsystem_remove_ns(subsystem, ctx->nsid); + if (ret < 0) { + SPDK_ERRLOG("Unable to remove namespace ID %u\n", ctx->nsid); + spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "Invalid parameters"); + ctx->response_sent = true; + } + + if (spdk_nvmf_subsystem_resume(subsystem, nvmf_rpc_remove_ns_resumed, ctx)) { + if (!ctx->response_sent) { + spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error"); + } + nvmf_rpc_remove_ns_ctx_free(ctx); + } +} + +static void +rpc_nvmf_subsystem_remove_ns(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct nvmf_rpc_remove_ns_ctx *ctx; + struct spdk_nvmf_subsystem *subsystem; + struct spdk_nvmf_tgt *tgt; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory"); + return; + } + + if (spdk_json_decode_object(params, nvmf_rpc_subsystem_remove_ns_decoder, + SPDK_COUNTOF(nvmf_rpc_subsystem_remove_ns_decoder), + ctx)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + nvmf_rpc_remove_ns_ctx_free(ctx); + return; + } + + tgt = spdk_nvmf_get_tgt(ctx->tgt_name); + if (!tgt) { + SPDK_ERRLOG("Unable to find a target object.\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Unable to find a target."); + nvmf_rpc_remove_ns_ctx_free(ctx); + return; + } + + ctx->request = request; + ctx->response_sent = false; + + subsystem = spdk_nvmf_tgt_find_subsystem(tgt, ctx->nqn); + if (!subsystem) { + SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + nvmf_rpc_remove_ns_ctx_free(ctx); + return; + } + + if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_remove_ns_paused, ctx)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error"); + nvmf_rpc_remove_ns_ctx_free(ctx); + } +} +SPDK_RPC_REGISTER("nvmf_subsystem_remove_ns", rpc_nvmf_subsystem_remove_ns, SPDK_RPC_RUNTIME) + +enum nvmf_rpc_host_op { + NVMF_RPC_HOST_ADD, + NVMF_RPC_HOST_REMOVE, + NVMF_RPC_HOST_ALLOW_ANY, +}; + +struct nvmf_rpc_host_ctx { + struct spdk_jsonrpc_request *request; + + char *nqn; + char *host; + char *tgt_name; + + enum nvmf_rpc_host_op op; + + bool allow_any_host; + + bool response_sent; +}; + +static const struct spdk_json_object_decoder nvmf_rpc_subsystem_host_decoder[] = { + {"nqn", offsetof(struct nvmf_rpc_host_ctx, nqn), spdk_json_decode_string}, + {"host", offsetof(struct nvmf_rpc_host_ctx, host), spdk_json_decode_string}, + {"tgt_name", offsetof(struct nvmf_rpc_host_ctx, tgt_name), spdk_json_decode_string, true}, +}; + +static void +nvmf_rpc_host_ctx_free(struct nvmf_rpc_host_ctx *ctx) +{ + free(ctx->nqn); + free(ctx->host); + free(ctx->tgt_name); + free(ctx); +} + +static void +nvmf_rpc_host_resumed(struct spdk_nvmf_subsystem *subsystem, + void *cb_arg, int status) +{ + struct nvmf_rpc_host_ctx *ctx = cb_arg; + struct spdk_jsonrpc_request *request; + struct spdk_json_write_ctx *w; + bool response_sent = ctx->response_sent; + + request = ctx->request; + nvmf_rpc_host_ctx_free(ctx); + + if (response_sent) { + return; + } + + w = spdk_jsonrpc_begin_result(request); + spdk_json_write_bool(w, true); + spdk_jsonrpc_end_result(request, w); +} + +static void +nvmf_rpc_host_paused(struct spdk_nvmf_subsystem *subsystem, + void *cb_arg, int status) +{ + struct nvmf_rpc_host_ctx *ctx = cb_arg; + int rc = -1; + + switch (ctx->op) { + case NVMF_RPC_HOST_ADD: + rc = spdk_nvmf_subsystem_add_host(subsystem, ctx->host); + break; + case NVMF_RPC_HOST_REMOVE: + rc = spdk_nvmf_subsystem_remove_host(subsystem, ctx->host); + break; + case NVMF_RPC_HOST_ALLOW_ANY: + rc = spdk_nvmf_subsystem_set_allow_any_host(subsystem, ctx->allow_any_host); + break; + } + + if (rc != 0) { + spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error"); + ctx->response_sent = true; + } + + if (spdk_nvmf_subsystem_resume(subsystem, nvmf_rpc_host_resumed, ctx)) { + if (!ctx->response_sent) { + spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error"); + } + nvmf_rpc_host_ctx_free(ctx); + } +} + +static void +rpc_nvmf_subsystem_add_host(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct nvmf_rpc_host_ctx *ctx; + struct spdk_nvmf_subsystem *subsystem; + struct spdk_nvmf_tgt *tgt; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory"); + return; + } + + if (spdk_json_decode_object(params, nvmf_rpc_subsystem_host_decoder, + SPDK_COUNTOF(nvmf_rpc_subsystem_host_decoder), + ctx)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + nvmf_rpc_host_ctx_free(ctx); + return; + } + + tgt = spdk_nvmf_get_tgt(ctx->tgt_name); + if (!tgt) { + SPDK_ERRLOG("Unable to find a target object.\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Unable to find a target."); + nvmf_rpc_host_ctx_free(ctx); + return; + } + + ctx->request = request; + ctx->op = NVMF_RPC_HOST_ADD; + ctx->response_sent = false; + + subsystem = spdk_nvmf_tgt_find_subsystem(tgt, ctx->nqn); + if (!subsystem) { + SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + nvmf_rpc_host_ctx_free(ctx); + return; + } + + if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_host_paused, ctx)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error"); + nvmf_rpc_host_ctx_free(ctx); + } +} +SPDK_RPC_REGISTER("nvmf_subsystem_add_host", rpc_nvmf_subsystem_add_host, SPDK_RPC_RUNTIME) + +static void +rpc_nvmf_subsystem_remove_host(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct nvmf_rpc_host_ctx *ctx; + struct spdk_nvmf_subsystem *subsystem; + struct spdk_nvmf_tgt *tgt; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory"); + return; + } + + if (spdk_json_decode_object(params, nvmf_rpc_subsystem_host_decoder, + SPDK_COUNTOF(nvmf_rpc_subsystem_host_decoder), + ctx)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + nvmf_rpc_host_ctx_free(ctx); + return; + } + + tgt = spdk_nvmf_get_tgt(ctx->tgt_name); + if (!tgt) { + SPDK_ERRLOG("Unable to find a target object.\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Unable to find a target."); + nvmf_rpc_host_ctx_free(ctx); + return; + } + + ctx->request = request; + ctx->op = NVMF_RPC_HOST_REMOVE; + ctx->response_sent = false; + + subsystem = spdk_nvmf_tgt_find_subsystem(tgt, ctx->nqn); + if (!subsystem) { + SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + nvmf_rpc_host_ctx_free(ctx); + return; + } + + if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_host_paused, ctx)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error"); + nvmf_rpc_host_ctx_free(ctx); + } +} +SPDK_RPC_REGISTER("nvmf_subsystem_remove_host", rpc_nvmf_subsystem_remove_host, + SPDK_RPC_RUNTIME) + + +static const struct spdk_json_object_decoder nvmf_rpc_subsystem_any_host_decoder[] = { + {"nqn", offsetof(struct nvmf_rpc_host_ctx, nqn), spdk_json_decode_string}, + {"allow_any_host", offsetof(struct nvmf_rpc_host_ctx, allow_any_host), spdk_json_decode_bool}, + {"tgt_name", offsetof(struct nvmf_rpc_host_ctx, tgt_name), spdk_json_decode_string, true}, +}; + +static void +rpc_nvmf_subsystem_allow_any_host(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct nvmf_rpc_host_ctx *ctx; + struct spdk_nvmf_subsystem *subsystem; + struct spdk_nvmf_tgt *tgt; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory"); + return; + } + + if (spdk_json_decode_object(params, nvmf_rpc_subsystem_any_host_decoder, + SPDK_COUNTOF(nvmf_rpc_subsystem_any_host_decoder), + ctx)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + nvmf_rpc_host_ctx_free(ctx); + return; + } + + tgt = spdk_nvmf_get_tgt(ctx->tgt_name); + if (!tgt) { + SPDK_ERRLOG("Unable to find a target object.\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Unable to find a target."); + nvmf_rpc_host_ctx_free(ctx); + return; + } + + ctx->request = request; + ctx->op = NVMF_RPC_HOST_ALLOW_ANY; + ctx->response_sent = false; + + subsystem = spdk_nvmf_tgt_find_subsystem(tgt, ctx->nqn); + if (!subsystem) { + SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + nvmf_rpc_host_ctx_free(ctx); + return; + } + + if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_host_paused, ctx)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error"); + nvmf_rpc_host_ctx_free(ctx); + } +} +SPDK_RPC_REGISTER("nvmf_subsystem_allow_any_host", rpc_nvmf_subsystem_allow_any_host, + SPDK_RPC_RUNTIME) + +struct nvmf_rpc_target_ctx { + char *name; + uint32_t max_subsystems; +}; + +static const struct spdk_json_object_decoder nvmf_rpc_create_target_decoder[] = { + {"name", offsetof(struct nvmf_rpc_target_ctx, name), spdk_json_decode_string}, + {"max_subsystems", offsetof(struct nvmf_rpc_target_ctx, max_subsystems), spdk_json_decode_uint32, true}, +}; + +static void +rpc_nvmf_create_target(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct spdk_nvmf_target_opts opts; + struct nvmf_rpc_target_ctx ctx = {0}; + struct spdk_nvmf_tgt *tgt; + struct spdk_json_write_ctx *w; + + /* Decode parameters the first time to get the transport type */ + if (spdk_json_decode_object(params, nvmf_rpc_create_target_decoder, + SPDK_COUNTOF(nvmf_rpc_create_target_decoder), + &ctx)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + free(ctx.name); + return; + } + + snprintf(opts.name, NVMF_TGT_NAME_MAX_LENGTH, "%s", ctx.name); + opts.max_subsystems = ctx.max_subsystems; + + if (spdk_nvmf_get_tgt(opts.name) != NULL) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "Target already exists."); + free(ctx.name); + return; + } + + tgt = spdk_nvmf_tgt_create(&opts); + + if (tgt == NULL) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Unable to create the requested target."); + free(ctx.name); + return; + } + + w = spdk_jsonrpc_begin_result(request); + spdk_json_write_string(w, spdk_nvmf_tgt_get_name(tgt)); + spdk_jsonrpc_end_result(request, w); + free(ctx.name); +} +SPDK_RPC_REGISTER("nvmf_create_target", rpc_nvmf_create_target, SPDK_RPC_RUNTIME); + +static const struct spdk_json_object_decoder nvmf_rpc_destroy_target_decoder[] = { + {"name", offsetof(struct nvmf_rpc_target_ctx, name), spdk_json_decode_string}, +}; + +static void +nvmf_rpc_destroy_target_done(void *ctx, int status) +{ + struct spdk_jsonrpc_request *request = ctx; + struct spdk_json_write_ctx *w; + + w = spdk_jsonrpc_begin_result(request); + spdk_json_write_bool(w, true); + spdk_jsonrpc_end_result(request, w); +} + +static void +rpc_nvmf_delete_target(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct nvmf_rpc_target_ctx ctx = {0}; + struct spdk_nvmf_tgt *tgt; + + /* Decode parameters the first time to get the transport type */ + if (spdk_json_decode_object(params, nvmf_rpc_destroy_target_decoder, + SPDK_COUNTOF(nvmf_rpc_destroy_target_decoder), + &ctx)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + free(ctx.name); + return; + } + + tgt = spdk_nvmf_get_tgt(ctx.name); + + if (tgt == NULL) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "The specified target doesn't exist, cannot delete it."); + free(ctx.name); + return; + } + + spdk_nvmf_tgt_destroy(tgt, nvmf_rpc_destroy_target_done, request); + free(ctx.name); +} +SPDK_RPC_REGISTER("nvmf_delete_target", rpc_nvmf_delete_target, SPDK_RPC_RUNTIME); + +static void +rpc_nvmf_get_targets(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct spdk_json_write_ctx *w; + struct spdk_nvmf_tgt *tgt; + const char *name; + + if (params != NULL) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "nvmf_get_targets has no parameters."); + return; + } + + w = spdk_jsonrpc_begin_result(request); + spdk_json_write_array_begin(w); + + tgt = spdk_nvmf_get_first_tgt(); + + while (tgt != NULL) { + name = spdk_nvmf_tgt_get_name(tgt); + spdk_json_write_string(w, name); + tgt = spdk_nvmf_get_next_tgt(tgt); + } + + spdk_json_write_array_end(w); + spdk_jsonrpc_end_result(request, w); +} +SPDK_RPC_REGISTER("nvmf_get_targets", rpc_nvmf_get_targets, SPDK_RPC_RUNTIME); + +struct nvmf_rpc_create_transport_ctx { + char *trtype; + char *tgt_name; + struct spdk_nvmf_transport_opts opts; + struct spdk_jsonrpc_request *request; +}; + +/** + * `max_qpairs_per_ctrlr` represents both admin and IO qpairs, that confuses + * users when they configure a transport using RPC. So it was decided to + * deprecate `max_qpairs_per_ctrlr` RPC parameter and use `max_io_qpairs_per_ctrlr` + * But internal logic remains unchanged and SPDK expects that + * spdk_nvmf_transport_opts::max_qpairs_per_ctrlr includes an admin qpair. + * This function parses the number of IO qpairs and adds +1 for admin qpair. + */ +static int +nvmf_rpc_decode_max_io_qpairs(const struct spdk_json_val *val, void *out) +{ + uint16_t *i = out; + int rc; + + rc = spdk_json_number_to_uint16(val, i); + if (rc == 0) { + (*i)++; + } + + return rc; +} + +/** + * This function parses deprecated `max_qpairs_per_ctrlr` and warns the user to use + * the new parameter `max_io_qpairs_per_ctrlr` + */ +static int +nvmf_rpc_decode_max_qpairs(const struct spdk_json_val *val, void *out) +{ + uint16_t *i = out; + int rc; + + rc = spdk_json_number_to_uint16(val, i); + if (rc == 0) { + SPDK_WARNLOG("Parameter max_qpairs_per_ctrlr is deprecated, use max_io_qpairs_per_ctrlr instead.\n"); + } + + return rc; +} + +static const struct spdk_json_object_decoder nvmf_rpc_create_transport_decoder[] = { + { "trtype", offsetof(struct nvmf_rpc_create_transport_ctx, trtype), spdk_json_decode_string}, + { + "max_queue_depth", offsetof(struct nvmf_rpc_create_transport_ctx, opts.max_queue_depth), + spdk_json_decode_uint16, true + }, + { + "max_qpairs_per_ctrlr", offsetof(struct nvmf_rpc_create_transport_ctx, opts.max_qpairs_per_ctrlr), + nvmf_rpc_decode_max_qpairs, true + }, + { + "max_io_qpairs_per_ctrlr", offsetof(struct nvmf_rpc_create_transport_ctx, opts.max_qpairs_per_ctrlr), + nvmf_rpc_decode_max_io_qpairs, true + }, + { + "in_capsule_data_size", offsetof(struct nvmf_rpc_create_transport_ctx, opts.in_capsule_data_size), + spdk_json_decode_uint32, true + }, + { + "max_io_size", offsetof(struct nvmf_rpc_create_transport_ctx, opts.max_io_size), + spdk_json_decode_uint32, true + }, + { + "io_unit_size", offsetof(struct nvmf_rpc_create_transport_ctx, opts.io_unit_size), + spdk_json_decode_uint32, true + }, + { + "max_aq_depth", offsetof(struct nvmf_rpc_create_transport_ctx, opts.max_aq_depth), + spdk_json_decode_uint32, true + }, + { + "num_shared_buffers", offsetof(struct nvmf_rpc_create_transport_ctx, opts.num_shared_buffers), + spdk_json_decode_uint32, true + }, + { + "buf_cache_size", offsetof(struct nvmf_rpc_create_transport_ctx, opts.buf_cache_size), + spdk_json_decode_uint32, true + }, + { + "max_srq_depth", offsetof(struct nvmf_rpc_create_transport_ctx, opts.max_srq_depth), + spdk_json_decode_uint32, true + }, + { + "no_srq", offsetof(struct nvmf_rpc_create_transport_ctx, opts.no_srq), + spdk_json_decode_bool, true + }, + { + "c2h_success", offsetof(struct nvmf_rpc_create_transport_ctx, opts.c2h_success), + spdk_json_decode_bool, true + }, + { + "dif_insert_or_strip", offsetof(struct nvmf_rpc_create_transport_ctx, opts.dif_insert_or_strip), + spdk_json_decode_bool, true + }, + { + "sock_priority", offsetof(struct nvmf_rpc_create_transport_ctx, opts.sock_priority), + spdk_json_decode_uint32, true + }, + { + "acceptor_backlog", offsetof(struct nvmf_rpc_create_transport_ctx, opts.acceptor_backlog), + spdk_json_decode_int32, true + }, + { + "abort_timeout_sec", offsetof(struct nvmf_rpc_create_transport_ctx, opts.abort_timeout_sec), + spdk_json_decode_uint32, true + }, + { + "tgt_name", offsetof(struct nvmf_rpc_create_transport_ctx, tgt_name), + spdk_json_decode_string, true + }, +}; + +static void +nvmf_rpc_create_transport_ctx_free(struct nvmf_rpc_create_transport_ctx *ctx) +{ + free(ctx->trtype); + free(ctx->tgt_name); + free(ctx); +} + +static void +nvmf_rpc_tgt_add_transport_done(void *cb_arg, int status) +{ + struct nvmf_rpc_create_transport_ctx *ctx = cb_arg; + struct spdk_jsonrpc_request *request; + struct spdk_json_write_ctx *w; + + request = ctx->request; + nvmf_rpc_create_transport_ctx_free(ctx); + + if (status) { + SPDK_ERRLOG("Failed to add transport to tgt.(%d)\n", status); + spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Failed to add transport to tgt.(%d)\n", + status); + return; + } + + w = spdk_jsonrpc_begin_result(request); + spdk_json_write_bool(w, true); + spdk_jsonrpc_end_result(request, w); +} + +static void +rpc_nvmf_create_transport(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct nvmf_rpc_create_transport_ctx *ctx; + enum spdk_nvme_transport_type trtype; + struct spdk_nvmf_transport *transport; + struct spdk_nvmf_tgt *tgt; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory"); + return; + } + + /* Decode parameters the first time to get the transport type */ + if (spdk_json_decode_object(params, nvmf_rpc_create_transport_decoder, + SPDK_COUNTOF(nvmf_rpc_create_transport_decoder), + ctx)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + nvmf_rpc_create_transport_ctx_free(ctx); + return; + } + + tgt = spdk_nvmf_get_tgt(ctx->tgt_name); + if (!tgt) { + SPDK_ERRLOG("Unable to find a target object.\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Unable to find a target."); + nvmf_rpc_create_transport_ctx_free(ctx); + return; + } + + if (spdk_nvme_transport_id_parse_trtype(&trtype, ctx->trtype)) { + SPDK_ERRLOG("Invalid transport type '%s'\n", ctx->trtype); + spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "Invalid transport type '%s'\n", ctx->trtype); + nvmf_rpc_create_transport_ctx_free(ctx); + return; + } + + /* Initialize all the transport options (based on transport type) and decode the + * parameters again to update any options passed in rpc create transport call. + */ + if (!spdk_nvmf_transport_opts_init(ctx->trtype, &ctx->opts)) { + /* This can happen if user specifies PCIE transport type which isn't valid for + * NVMe-oF. + */ + SPDK_ERRLOG("Invalid transport type '%s'\n", ctx->trtype); + spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "Invalid transport type '%s'\n", ctx->trtype); + nvmf_rpc_create_transport_ctx_free(ctx); + return; + } + + if (spdk_json_decode_object(params, nvmf_rpc_create_transport_decoder, + SPDK_COUNTOF(nvmf_rpc_create_transport_decoder), + ctx)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + nvmf_rpc_create_transport_ctx_free(ctx); + return; + } + + if (spdk_nvmf_tgt_get_transport(tgt, ctx->trtype)) { + SPDK_ERRLOG("Transport type '%s' already exists\n", ctx->trtype); + spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Transport type '%s' already exists\n", ctx->trtype); + nvmf_rpc_create_transport_ctx_free(ctx); + return; + } + + transport = spdk_nvmf_transport_create(ctx->trtype, &ctx->opts); + + if (!transport) { + SPDK_ERRLOG("Transport type '%s' create failed\n", ctx->trtype); + spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Transport type '%s' create failed\n", ctx->trtype); + nvmf_rpc_create_transport_ctx_free(ctx); + return; + } + + /* add transport to target */ + ctx->request = request; + spdk_nvmf_tgt_add_transport(tgt, transport, nvmf_rpc_tgt_add_transport_done, ctx); +} +SPDK_RPC_REGISTER("nvmf_create_transport", rpc_nvmf_create_transport, SPDK_RPC_RUNTIME) + +static void +dump_nvmf_transport(struct spdk_json_write_ctx *w, struct spdk_nvmf_transport *transport) +{ + const struct spdk_nvmf_transport_opts *opts = spdk_nvmf_get_transport_opts(transport); + spdk_nvme_transport_type_t type = spdk_nvmf_get_transport_type(transport); + + spdk_json_write_object_begin(w); + + spdk_json_write_named_string(w, "trtype", spdk_nvmf_get_transport_name(transport)); + spdk_json_write_named_uint32(w, "max_queue_depth", opts->max_queue_depth); + spdk_json_write_named_uint32(w, "max_io_qpairs_per_ctrlr", opts->max_qpairs_per_ctrlr - 1); + spdk_json_write_named_uint32(w, "in_capsule_data_size", opts->in_capsule_data_size); + spdk_json_write_named_uint32(w, "max_io_size", opts->max_io_size); + spdk_json_write_named_uint32(w, "io_unit_size", opts->io_unit_size); + spdk_json_write_named_uint32(w, "max_aq_depth", opts->max_aq_depth); + spdk_json_write_named_uint32(w, "num_shared_buffers", opts->num_shared_buffers); + spdk_json_write_named_uint32(w, "buf_cache_size", opts->buf_cache_size); + spdk_json_write_named_bool(w, "dif_insert_or_strip", opts->dif_insert_or_strip); + if (type == SPDK_NVME_TRANSPORT_RDMA) { + spdk_json_write_named_uint32(w, "max_srq_depth", opts->max_srq_depth); + spdk_json_write_named_bool(w, "no_srq", opts->no_srq); + spdk_json_write_named_int32(w, "acceptor_backlog", opts->acceptor_backlog); + } else if (type == SPDK_NVME_TRANSPORT_TCP) { + spdk_json_write_named_bool(w, "c2h_success", opts->c2h_success); + spdk_json_write_named_uint32(w, "sock_priority", opts->sock_priority); + } + spdk_json_write_named_uint32(w, "abort_timeout_sec", opts->abort_timeout_sec); + + spdk_json_write_object_end(w); +} + +struct rpc_get_transport { + char *tgt_name; +}; + +static const struct spdk_json_object_decoder rpc_get_transport_decoders[] = { + {"tgt_name", offsetof(struct rpc_get_transport, tgt_name), spdk_json_decode_string, true}, +}; + +static void +rpc_nvmf_get_transports(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_get_transport req = { 0 }; + struct spdk_json_write_ctx *w; + struct spdk_nvmf_transport *transport; + struct spdk_nvmf_tgt *tgt; + + if (params) { + if (spdk_json_decode_object(params, rpc_get_transport_decoders, + SPDK_COUNTOF(rpc_get_transport_decoders), + &req)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + return; + } + } + + tgt = spdk_nvmf_get_tgt(req.tgt_name); + if (!tgt) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Unable to find a target."); + free(req.tgt_name); + return; + } + + w = spdk_jsonrpc_begin_result(request); + spdk_json_write_array_begin(w); + transport = spdk_nvmf_transport_get_first(tgt); + while (transport) { + dump_nvmf_transport(w, transport); + transport = spdk_nvmf_transport_get_next(transport); + } + spdk_json_write_array_end(w); + spdk_jsonrpc_end_result(request, w); + free(req.tgt_name); +} +SPDK_RPC_REGISTER("nvmf_get_transports", rpc_nvmf_get_transports, SPDK_RPC_RUNTIME) +SPDK_RPC_REGISTER_ALIAS_DEPRECATED(nvmf_get_transports, get_nvmf_transports) + +struct rpc_nvmf_get_stats_ctx { + char *tgt_name; + struct spdk_nvmf_tgt *tgt; + struct spdk_jsonrpc_request *request; + struct spdk_json_write_ctx *w; +}; + +static const struct spdk_json_object_decoder rpc_get_stats_decoders[] = { + {"tgt_name", offsetof(struct rpc_nvmf_get_stats_ctx, tgt_name), spdk_json_decode_string, true}, +}; + +static void +free_get_stats_ctx(struct rpc_nvmf_get_stats_ctx *ctx) +{ + free(ctx->tgt_name); + free(ctx); +} + +static void +rpc_nvmf_get_stats_done(struct spdk_io_channel_iter *i, int status) +{ + struct rpc_nvmf_get_stats_ctx *ctx = spdk_io_channel_iter_get_ctx(i); + + spdk_json_write_array_end(ctx->w); + spdk_json_write_object_end(ctx->w); + spdk_jsonrpc_end_result(ctx->request, ctx->w); + free_get_stats_ctx(ctx); +} + +static void +write_nvmf_transport_stats(struct spdk_json_write_ctx *w, + struct spdk_nvmf_transport_poll_group_stat *stat) +{ + uint64_t i; + + spdk_json_write_object_begin(w); + spdk_json_write_named_string(w, "trtype", + spdk_nvme_transport_id_trtype_str(stat->trtype)); + switch (stat->trtype) { + case SPDK_NVME_TRANSPORT_RDMA: + spdk_json_write_named_uint64(w, "pending_data_buffer", stat->rdma.pending_data_buffer); + spdk_json_write_named_array_begin(w, "devices"); + for (i = 0; i < stat->rdma.num_devices; ++i) { + spdk_json_write_object_begin(w); + spdk_json_write_named_string(w, "name", stat->rdma.devices[i].name); + spdk_json_write_named_uint64(w, "polls", stat->rdma.devices[i].polls); + spdk_json_write_named_uint64(w, "completions", stat->rdma.devices[i].completions); + spdk_json_write_named_uint64(w, "requests", + stat->rdma.devices[i].requests); + spdk_json_write_named_uint64(w, "request_latency", + stat->rdma.devices[i].request_latency); + spdk_json_write_named_uint64(w, "pending_free_request", + stat->rdma.devices[i].pending_free_request); + spdk_json_write_named_uint64(w, "pending_rdma_read", + stat->rdma.devices[i].pending_rdma_read); + spdk_json_write_named_uint64(w, "pending_rdma_write", + stat->rdma.devices[i].pending_rdma_write); + spdk_json_write_object_end(w); + } + spdk_json_write_array_end(w); + break; + default: + break; + } + spdk_json_write_object_end(w); +} + +static void +_rpc_nvmf_get_stats(struct spdk_io_channel_iter *i) +{ + struct rpc_nvmf_get_stats_ctx *ctx = spdk_io_channel_iter_get_ctx(i); + struct spdk_nvmf_transport *transport; + struct spdk_nvmf_poll_group_stat stat; + struct spdk_nvmf_transport_poll_group_stat *trstat; + int rc; + + if (0 == spdk_nvmf_poll_group_get_stat(ctx->tgt, &stat)) { + spdk_json_write_object_begin(ctx->w); + spdk_json_write_named_string(ctx->w, "name", spdk_thread_get_name(spdk_get_thread())); + spdk_json_write_named_uint32(ctx->w, "admin_qpairs", stat.admin_qpairs); + spdk_json_write_named_uint32(ctx->w, "io_qpairs", stat.io_qpairs); + spdk_json_write_named_uint64(ctx->w, "pending_bdev_io", stat.pending_bdev_io); + + spdk_json_write_named_array_begin(ctx->w, "transports"); + transport = spdk_nvmf_transport_get_first(ctx->tgt); + while (transport) { + rc = spdk_nvmf_transport_poll_group_get_stat(ctx->tgt, transport, &trstat); + if (0 == rc) { + write_nvmf_transport_stats(ctx->w, trstat); + spdk_nvmf_transport_poll_group_free_stat(transport, trstat); + } else if (-ENOTSUP != rc) { + SPDK_ERRLOG("Failed to get poll group statistics for transport %s, errno %d\n", + spdk_nvme_transport_id_trtype_str(spdk_nvmf_get_transport_type(transport)), + rc); + } + transport = spdk_nvmf_transport_get_next(transport); + } + spdk_json_write_array_end(ctx->w); + spdk_json_write_object_end(ctx->w); + } + + spdk_for_each_channel_continue(i, 0); +} + + +static void +rpc_nvmf_get_stats(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_nvmf_get_stats_ctx *ctx; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Memory allocation error"); + return; + } + ctx->request = request; + + if (params) { + if (spdk_json_decode_object(params, rpc_get_stats_decoders, + SPDK_COUNTOF(rpc_get_stats_decoders), + ctx)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + free_get_stats_ctx(ctx); + return; + } + } + + ctx->tgt = spdk_nvmf_get_tgt(ctx->tgt_name); + if (!ctx->tgt) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Unable to find a target."); + free_get_stats_ctx(ctx); + return; + } + + ctx->w = spdk_jsonrpc_begin_result(ctx->request); + spdk_json_write_object_begin(ctx->w); + spdk_json_write_named_uint64(ctx->w, "tick_rate", spdk_get_ticks_hz()); + spdk_json_write_named_array_begin(ctx->w, "poll_groups"); + + spdk_for_each_channel(ctx->tgt, + _rpc_nvmf_get_stats, + ctx, + rpc_nvmf_get_stats_done); +} + +SPDK_RPC_REGISTER("nvmf_get_stats", rpc_nvmf_get_stats, SPDK_RPC_RUNTIME) diff --git a/src/spdk/lib/nvmf/rdma.c b/src/spdk/lib/nvmf/rdma.c new file mode 100644 index 000000000..4a4de4374 --- /dev/null +++ b/src/spdk/lib/nvmf/rdma.c @@ -0,0 +1,4313 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/config.h" +#include "spdk/thread.h" +#include "spdk/likely.h" +#include "spdk/nvmf_transport.h" +#include "spdk/string.h" +#include "spdk/trace.h" +#include "spdk/util.h" + +#include "spdk_internal/assert.h" +#include "spdk_internal/log.h" +#include "spdk_internal/rdma.h" + +#include "nvmf_internal.h" + +struct spdk_nvme_rdma_hooks g_nvmf_hooks = {}; +const struct spdk_nvmf_transport_ops spdk_nvmf_transport_rdma; + +/* + RDMA Connection Resource Defaults + */ +#define NVMF_DEFAULT_TX_SGE SPDK_NVMF_MAX_SGL_ENTRIES +#define NVMF_DEFAULT_RSP_SGE 1 +#define NVMF_DEFAULT_RX_SGE 2 + +/* The RDMA completion queue size */ +#define DEFAULT_NVMF_RDMA_CQ_SIZE 4096 +#define MAX_WR_PER_QP(queue_depth) (queue_depth * 3 + 2) + +/* Timeout for destroying defunct rqpairs */ +#define NVMF_RDMA_QPAIR_DESTROY_TIMEOUT_US 4000000 + +static int g_spdk_nvmf_ibv_query_mask = + IBV_QP_STATE | + IBV_QP_PKEY_INDEX | + IBV_QP_PORT | + IBV_QP_ACCESS_FLAGS | + IBV_QP_AV | + IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | + IBV_QP_RQ_PSN | + IBV_QP_MAX_DEST_RD_ATOMIC | + IBV_QP_MIN_RNR_TIMER | + IBV_QP_SQ_PSN | + IBV_QP_TIMEOUT | + IBV_QP_RETRY_CNT | + IBV_QP_RNR_RETRY | + IBV_QP_MAX_QP_RD_ATOMIC; + +enum spdk_nvmf_rdma_request_state { + /* The request is not currently in use */ + RDMA_REQUEST_STATE_FREE = 0, + + /* Initial state when request first received */ + RDMA_REQUEST_STATE_NEW, + + /* The request is queued until a data buffer is available. */ + RDMA_REQUEST_STATE_NEED_BUFFER, + + /* The request is waiting on RDMA queue depth availability + * to transfer data from the host to the controller. + */ + RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING, + + /* The request is currently transferring data from the host to the controller. */ + RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, + + /* The request is ready to execute at the block device */ + RDMA_REQUEST_STATE_READY_TO_EXECUTE, + + /* The request is currently executing at the block device */ + RDMA_REQUEST_STATE_EXECUTING, + + /* The request finished executing at the block device */ + RDMA_REQUEST_STATE_EXECUTED, + + /* The request is waiting on RDMA queue depth availability + * to transfer data from the controller to the host. + */ + RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING, + + /* The request is ready to send a completion */ + RDMA_REQUEST_STATE_READY_TO_COMPLETE, + + /* The request is currently transferring data from the controller to the host. */ + RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, + + /* The request currently has an outstanding completion without an + * associated data transfer. + */ + RDMA_REQUEST_STATE_COMPLETING, + + /* The request completed and can be marked free. */ + RDMA_REQUEST_STATE_COMPLETED, + + /* Terminator */ + RDMA_REQUEST_NUM_STATES, +}; + +#define OBJECT_NVMF_RDMA_IO 0x40 + +#define TRACE_GROUP_NVMF_RDMA 0x4 +#define TRACE_RDMA_REQUEST_STATE_NEW SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x0) +#define TRACE_RDMA_REQUEST_STATE_NEED_BUFFER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x1) +#define TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x2) +#define TRACE_RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x3) +#define TRACE_RDMA_REQUEST_STATE_READY_TO_EXECUTE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x4) +#define TRACE_RDMA_REQUEST_STATE_EXECUTING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x5) +#define TRACE_RDMA_REQUEST_STATE_EXECUTED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x6) +#define TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x7) +#define TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x8) +#define TRACE_RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x9) +#define TRACE_RDMA_REQUEST_STATE_COMPLETING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xA) +#define TRACE_RDMA_REQUEST_STATE_COMPLETED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xB) +#define TRACE_RDMA_QP_CREATE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xC) +#define TRACE_RDMA_IBV_ASYNC_EVENT SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xD) +#define TRACE_RDMA_CM_ASYNC_EVENT SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xE) +#define TRACE_RDMA_QP_STATE_CHANGE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xF) +#define TRACE_RDMA_QP_DISCONNECT SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x10) +#define TRACE_RDMA_QP_DESTROY SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x11) + +SPDK_TRACE_REGISTER_FN(nvmf_trace, "nvmf_rdma", TRACE_GROUP_NVMF_RDMA) +{ + spdk_trace_register_object(OBJECT_NVMF_RDMA_IO, 'r'); + spdk_trace_register_description("RDMA_REQ_NEW", TRACE_RDMA_REQUEST_STATE_NEW, + OWNER_NONE, OBJECT_NVMF_RDMA_IO, 1, 1, "cmid: "); + spdk_trace_register_description("RDMA_REQ_NEED_BUFFER", TRACE_RDMA_REQUEST_STATE_NEED_BUFFER, + OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); + spdk_trace_register_description("RDMA_REQ_TX_PENDING_C2H", + TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING, + OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); + spdk_trace_register_description("RDMA_REQ_TX_PENDING_H2C", + TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING, + OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); + spdk_trace_register_description("RDMA_REQ_TX_H2C", + TRACE_RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, + OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); + spdk_trace_register_description("RDMA_REQ_RDY_TO_EXECUTE", + TRACE_RDMA_REQUEST_STATE_READY_TO_EXECUTE, + OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); + spdk_trace_register_description("RDMA_REQ_EXECUTING", + TRACE_RDMA_REQUEST_STATE_EXECUTING, + OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); + spdk_trace_register_description("RDMA_REQ_EXECUTED", + TRACE_RDMA_REQUEST_STATE_EXECUTED, + OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); + spdk_trace_register_description("RDMA_REQ_RDY_TO_COMPL", + TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE, + OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); + spdk_trace_register_description("RDMA_REQ_COMPLETING_C2H", + TRACE_RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, + OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); + spdk_trace_register_description("RDMA_REQ_COMPLETING", + TRACE_RDMA_REQUEST_STATE_COMPLETING, + OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); + spdk_trace_register_description("RDMA_REQ_COMPLETED", + TRACE_RDMA_REQUEST_STATE_COMPLETED, + OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); + + spdk_trace_register_description("RDMA_QP_CREATE", TRACE_RDMA_QP_CREATE, + OWNER_NONE, OBJECT_NONE, 0, 0, ""); + spdk_trace_register_description("RDMA_IBV_ASYNC_EVENT", TRACE_RDMA_IBV_ASYNC_EVENT, + OWNER_NONE, OBJECT_NONE, 0, 0, "type: "); + spdk_trace_register_description("RDMA_CM_ASYNC_EVENT", TRACE_RDMA_CM_ASYNC_EVENT, + OWNER_NONE, OBJECT_NONE, 0, 0, "type: "); + spdk_trace_register_description("RDMA_QP_STATE_CHANGE", TRACE_RDMA_QP_STATE_CHANGE, + OWNER_NONE, OBJECT_NONE, 0, 1, "state: "); + spdk_trace_register_description("RDMA_QP_DISCONNECT", TRACE_RDMA_QP_DISCONNECT, + OWNER_NONE, OBJECT_NONE, 0, 0, ""); + spdk_trace_register_description("RDMA_QP_DESTROY", TRACE_RDMA_QP_DESTROY, + OWNER_NONE, OBJECT_NONE, 0, 0, ""); +} + +enum spdk_nvmf_rdma_wr_type { + RDMA_WR_TYPE_RECV, + RDMA_WR_TYPE_SEND, + RDMA_WR_TYPE_DATA, +}; + +struct spdk_nvmf_rdma_wr { + enum spdk_nvmf_rdma_wr_type type; +}; + +/* This structure holds commands as they are received off the wire. + * It must be dynamically paired with a full request object + * (spdk_nvmf_rdma_request) to service a request. It is separate + * from the request because RDMA does not appear to order + * completions, so occasionally we'll get a new incoming + * command when there aren't any free request objects. + */ +struct spdk_nvmf_rdma_recv { + struct ibv_recv_wr wr; + struct ibv_sge sgl[NVMF_DEFAULT_RX_SGE]; + + struct spdk_nvmf_rdma_qpair *qpair; + + /* In-capsule data buffer */ + uint8_t *buf; + + struct spdk_nvmf_rdma_wr rdma_wr; + uint64_t receive_tsc; + + STAILQ_ENTRY(spdk_nvmf_rdma_recv) link; +}; + +struct spdk_nvmf_rdma_request_data { + struct spdk_nvmf_rdma_wr rdma_wr; + struct ibv_send_wr wr; + struct ibv_sge sgl[SPDK_NVMF_MAX_SGL_ENTRIES]; +}; + +struct spdk_nvmf_rdma_request { + struct spdk_nvmf_request req; + + enum spdk_nvmf_rdma_request_state state; + + struct spdk_nvmf_rdma_recv *recv; + + struct { + struct spdk_nvmf_rdma_wr rdma_wr; + struct ibv_send_wr wr; + struct ibv_sge sgl[NVMF_DEFAULT_RSP_SGE]; + } rsp; + + struct spdk_nvmf_rdma_request_data data; + + uint32_t iovpos; + + uint32_t num_outstanding_data_wr; + uint64_t receive_tsc; + + STAILQ_ENTRY(spdk_nvmf_rdma_request) state_link; +}; + +enum spdk_nvmf_rdma_qpair_disconnect_flags { + RDMA_QP_DISCONNECTING = 1, + RDMA_QP_RECV_DRAINED = 1 << 1, + RDMA_QP_SEND_DRAINED = 1 << 2 +}; + +struct spdk_nvmf_rdma_resource_opts { + struct spdk_nvmf_rdma_qpair *qpair; + /* qp points either to an ibv_qp object or an ibv_srq object depending on the value of shared. */ + void *qp; + struct ibv_pd *pd; + uint32_t max_queue_depth; + uint32_t in_capsule_data_size; + bool shared; +}; + +struct spdk_nvmf_send_wr_list { + struct ibv_send_wr *first; + struct ibv_send_wr *last; +}; + +struct spdk_nvmf_recv_wr_list { + struct ibv_recv_wr *first; + struct ibv_recv_wr *last; +}; + +struct spdk_nvmf_rdma_resources { + /* Array of size "max_queue_depth" containing RDMA requests. */ + struct spdk_nvmf_rdma_request *reqs; + + /* Array of size "max_queue_depth" containing RDMA recvs. */ + struct spdk_nvmf_rdma_recv *recvs; + + /* Array of size "max_queue_depth" containing 64 byte capsules + * used for receive. + */ + union nvmf_h2c_msg *cmds; + struct ibv_mr *cmds_mr; + + /* Array of size "max_queue_depth" containing 16 byte completions + * to be sent back to the user. + */ + union nvmf_c2h_msg *cpls; + struct ibv_mr *cpls_mr; + + /* Array of size "max_queue_depth * InCapsuleDataSize" containing + * buffers to be used for in capsule data. + */ + void *bufs; + struct ibv_mr *bufs_mr; + + /* The list of pending recvs to transfer */ + struct spdk_nvmf_recv_wr_list recvs_to_post; + + /* Receives that are waiting for a request object */ + STAILQ_HEAD(, spdk_nvmf_rdma_recv) incoming_queue; + + /* Queue to track free requests */ + STAILQ_HEAD(, spdk_nvmf_rdma_request) free_queue; +}; + +typedef void (*spdk_nvmf_rdma_qpair_ibv_event)(struct spdk_nvmf_rdma_qpair *rqpair); + +struct spdk_nvmf_rdma_ibv_event_ctx { + struct spdk_nvmf_rdma_qpair *rqpair; + spdk_nvmf_rdma_qpair_ibv_event cb_fn; + /* Link to other ibv events associated with this qpair */ + STAILQ_ENTRY(spdk_nvmf_rdma_ibv_event_ctx) link; +}; + +struct spdk_nvmf_rdma_qpair { + struct spdk_nvmf_qpair qpair; + + struct spdk_nvmf_rdma_device *device; + struct spdk_nvmf_rdma_poller *poller; + + struct spdk_rdma_qp *rdma_qp; + struct rdma_cm_id *cm_id; + struct ibv_srq *srq; + struct rdma_cm_id *listen_id; + + /* The maximum number of I/O outstanding on this connection at one time */ + uint16_t max_queue_depth; + + /* The maximum number of active RDMA READ and ATOMIC operations at one time */ + uint16_t max_read_depth; + + /* The maximum number of RDMA SEND operations at one time */ + uint32_t max_send_depth; + + /* The current number of outstanding WRs from this qpair's + * recv queue. Should not exceed device->attr.max_queue_depth. + */ + uint16_t current_recv_depth; + + /* The current number of active RDMA READ operations */ + uint16_t current_read_depth; + + /* The current number of posted WRs from this qpair's + * send queue. Should not exceed max_send_depth. + */ + uint32_t current_send_depth; + + /* The maximum number of SGEs per WR on the send queue */ + uint32_t max_send_sge; + + /* The maximum number of SGEs per WR on the recv queue */ + uint32_t max_recv_sge; + + struct spdk_nvmf_rdma_resources *resources; + + STAILQ_HEAD(, spdk_nvmf_rdma_request) pending_rdma_read_queue; + + STAILQ_HEAD(, spdk_nvmf_rdma_request) pending_rdma_write_queue; + + /* Number of requests not in the free state */ + uint32_t qd; + + TAILQ_ENTRY(spdk_nvmf_rdma_qpair) link; + + STAILQ_ENTRY(spdk_nvmf_rdma_qpair) recv_link; + + STAILQ_ENTRY(spdk_nvmf_rdma_qpair) send_link; + + /* IBV queue pair attributes: they are used to manage + * qp state and recover from errors. + */ + enum ibv_qp_state ibv_state; + + uint32_t disconnect_flags; + + /* Poller registered in case the qpair doesn't properly + * complete the qpair destruct process and becomes defunct. + */ + + struct spdk_poller *destruct_poller; + + /* + * io_channel which is used to destroy qpair when it is removed from poll group + */ + struct spdk_io_channel *destruct_channel; + + /* List of ibv async events */ + STAILQ_HEAD(, spdk_nvmf_rdma_ibv_event_ctx) ibv_events; + + /* There are several ways a disconnect can start on a qpair + * and they are not all mutually exclusive. It is important + * that we only initialize one of these paths. + */ + bool disconnect_started; + /* Lets us know that we have received the last_wqe event. */ + bool last_wqe_reached; +}; + +struct spdk_nvmf_rdma_poller_stat { + uint64_t completions; + uint64_t polls; + uint64_t requests; + uint64_t request_latency; + uint64_t pending_free_request; + uint64_t pending_rdma_read; + uint64_t pending_rdma_write; +}; + +struct spdk_nvmf_rdma_poller { + struct spdk_nvmf_rdma_device *device; + struct spdk_nvmf_rdma_poll_group *group; + + int num_cqe; + int required_num_wr; + struct ibv_cq *cq; + + /* The maximum number of I/O outstanding on the shared receive queue at one time */ + uint16_t max_srq_depth; + + /* Shared receive queue */ + struct ibv_srq *srq; + + struct spdk_nvmf_rdma_resources *resources; + struct spdk_nvmf_rdma_poller_stat stat; + + TAILQ_HEAD(, spdk_nvmf_rdma_qpair) qpairs; + + STAILQ_HEAD(, spdk_nvmf_rdma_qpair) qpairs_pending_recv; + + STAILQ_HEAD(, spdk_nvmf_rdma_qpair) qpairs_pending_send; + + TAILQ_ENTRY(spdk_nvmf_rdma_poller) link; +}; + +struct spdk_nvmf_rdma_poll_group_stat { + uint64_t pending_data_buffer; +}; + +struct spdk_nvmf_rdma_poll_group { + struct spdk_nvmf_transport_poll_group group; + struct spdk_nvmf_rdma_poll_group_stat stat; + TAILQ_HEAD(, spdk_nvmf_rdma_poller) pollers; + TAILQ_ENTRY(spdk_nvmf_rdma_poll_group) link; + /* + * buffers which are split across multiple RDMA + * memory regions cannot be used by this transport. + */ + STAILQ_HEAD(, spdk_nvmf_transport_pg_cache_buf) retired_bufs; +}; + +struct spdk_nvmf_rdma_conn_sched { + struct spdk_nvmf_rdma_poll_group *next_admin_pg; + struct spdk_nvmf_rdma_poll_group *next_io_pg; +}; + +/* Assuming rdma_cm uses just one protection domain per ibv_context. */ +struct spdk_nvmf_rdma_device { + struct ibv_device_attr attr; + struct ibv_context *context; + + struct spdk_mem_map *map; + struct ibv_pd *pd; + + int num_srq; + + TAILQ_ENTRY(spdk_nvmf_rdma_device) link; +}; + +struct spdk_nvmf_rdma_port { + const struct spdk_nvme_transport_id *trid; + struct rdma_cm_id *id; + struct spdk_nvmf_rdma_device *device; + TAILQ_ENTRY(spdk_nvmf_rdma_port) link; +}; + +struct spdk_nvmf_rdma_transport { + struct spdk_nvmf_transport transport; + + struct spdk_nvmf_rdma_conn_sched conn_sched; + + struct rdma_event_channel *event_channel; + + struct spdk_mempool *data_wr_pool; + + pthread_mutex_t lock; + + /* fields used to poll RDMA/IB events */ + nfds_t npoll_fds; + struct pollfd *poll_fds; + + TAILQ_HEAD(, spdk_nvmf_rdma_device) devices; + TAILQ_HEAD(, spdk_nvmf_rdma_port) ports; + TAILQ_HEAD(, spdk_nvmf_rdma_poll_group) poll_groups; +}; + +static inline void +nvmf_rdma_start_disconnect(struct spdk_nvmf_rdma_qpair *rqpair); + +static bool +nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_request *rdma_req); + +static inline int +nvmf_rdma_check_ibv_state(enum ibv_qp_state state) +{ + switch (state) { + case IBV_QPS_RESET: + case IBV_QPS_INIT: + case IBV_QPS_RTR: + case IBV_QPS_RTS: + case IBV_QPS_SQD: + case IBV_QPS_SQE: + case IBV_QPS_ERR: + return 0; + default: + return -1; + } +} + +static inline enum spdk_nvme_media_error_status_code +nvmf_rdma_dif_error_to_compl_status(uint8_t err_type) { + enum spdk_nvme_media_error_status_code result; + switch (err_type) + { + case SPDK_DIF_REFTAG_ERROR: + result = SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR; + break; + case SPDK_DIF_APPTAG_ERROR: + result = SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR; + break; + case SPDK_DIF_GUARD_ERROR: + result = SPDK_NVME_SC_GUARD_CHECK_ERROR; + break; + default: + SPDK_UNREACHABLE(); + } + + return result; +} + +static enum ibv_qp_state +nvmf_rdma_update_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair) { + enum ibv_qp_state old_state, new_state; + struct ibv_qp_attr qp_attr; + struct ibv_qp_init_attr init_attr; + int rc; + + old_state = rqpair->ibv_state; + rc = ibv_query_qp(rqpair->rdma_qp->qp, &qp_attr, + g_spdk_nvmf_ibv_query_mask, &init_attr); + + if (rc) + { + SPDK_ERRLOG("Failed to get updated RDMA queue pair state!\n"); + return IBV_QPS_ERR + 1; + } + + new_state = qp_attr.qp_state; + rqpair->ibv_state = new_state; + qp_attr.ah_attr.port_num = qp_attr.port_num; + + rc = nvmf_rdma_check_ibv_state(new_state); + if (rc) + { + SPDK_ERRLOG("QP#%d: bad state updated: %u, maybe hardware issue\n", rqpair->qpair.qid, new_state); + /* + * IBV_QPS_UNKNOWN undefined if lib version smaller than libibverbs-1.1.8 + * IBV_QPS_UNKNOWN is the enum element after IBV_QPS_ERR + */ + return IBV_QPS_ERR + 1; + } + + if (old_state != new_state) + { + spdk_trace_record(TRACE_RDMA_QP_STATE_CHANGE, 0, 0, + (uintptr_t)rqpair->cm_id, new_state); + } + return new_state; +} + +static void +nvmf_rdma_request_free_data(struct spdk_nvmf_rdma_request *rdma_req, + struct spdk_nvmf_rdma_transport *rtransport) +{ + struct spdk_nvmf_rdma_request_data *data_wr; + struct ibv_send_wr *next_send_wr; + uint64_t req_wrid; + + rdma_req->num_outstanding_data_wr = 0; + data_wr = &rdma_req->data; + req_wrid = data_wr->wr.wr_id; + while (data_wr && data_wr->wr.wr_id == req_wrid) { + memset(data_wr->sgl, 0, sizeof(data_wr->wr.sg_list[0]) * data_wr->wr.num_sge); + data_wr->wr.num_sge = 0; + next_send_wr = data_wr->wr.next; + if (data_wr != &rdma_req->data) { + spdk_mempool_put(rtransport->data_wr_pool, data_wr); + } + data_wr = (!next_send_wr || next_send_wr == &rdma_req->rsp.wr) ? NULL : + SPDK_CONTAINEROF(next_send_wr, struct spdk_nvmf_rdma_request_data, wr); + } +} + +static void +nvmf_rdma_dump_request(struct spdk_nvmf_rdma_request *req) +{ + SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", req->req.data_from_pool); + if (req->req.cmd) { + SPDK_ERRLOG("\t\tRequest opcode: %d\n", req->req.cmd->nvmf_cmd.opcode); + } + if (req->recv) { + SPDK_ERRLOG("\t\tRequest recv wr_id%lu\n", req->recv->wr.wr_id); + } +} + +static void +nvmf_rdma_dump_qpair_contents(struct spdk_nvmf_rdma_qpair *rqpair) +{ + int i; + + SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", rqpair->qpair.qid); + for (i = 0; i < rqpair->max_queue_depth; i++) { + if (rqpair->resources->reqs[i].state != RDMA_REQUEST_STATE_FREE) { + nvmf_rdma_dump_request(&rqpair->resources->reqs[i]); + } + } +} + +static void +nvmf_rdma_resources_destroy(struct spdk_nvmf_rdma_resources *resources) +{ + if (resources->cmds_mr) { + ibv_dereg_mr(resources->cmds_mr); + } + + if (resources->cpls_mr) { + ibv_dereg_mr(resources->cpls_mr); + } + + if (resources->bufs_mr) { + ibv_dereg_mr(resources->bufs_mr); + } + + spdk_free(resources->cmds); + spdk_free(resources->cpls); + spdk_free(resources->bufs); + free(resources->reqs); + free(resources->recvs); + free(resources); +} + + +static struct spdk_nvmf_rdma_resources * +nvmf_rdma_resources_create(struct spdk_nvmf_rdma_resource_opts *opts) +{ + struct spdk_nvmf_rdma_resources *resources; + struct spdk_nvmf_rdma_request *rdma_req; + struct spdk_nvmf_rdma_recv *rdma_recv; + struct ibv_qp *qp; + struct ibv_srq *srq; + uint32_t i; + int rc; + + resources = calloc(1, sizeof(struct spdk_nvmf_rdma_resources)); + if (!resources) { + SPDK_ERRLOG("Unable to allocate resources for receive queue.\n"); + return NULL; + } + + resources->reqs = calloc(opts->max_queue_depth, sizeof(*resources->reqs)); + resources->recvs = calloc(opts->max_queue_depth, sizeof(*resources->recvs)); + resources->cmds = spdk_zmalloc(opts->max_queue_depth * sizeof(*resources->cmds), + 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + resources->cpls = spdk_zmalloc(opts->max_queue_depth * sizeof(*resources->cpls), + 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + + if (opts->in_capsule_data_size > 0) { + resources->bufs = spdk_zmalloc(opts->max_queue_depth * opts->in_capsule_data_size, + 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + } + + if (!resources->reqs || !resources->recvs || !resources->cmds || + !resources->cpls || (opts->in_capsule_data_size && !resources->bufs)) { + SPDK_ERRLOG("Unable to allocate sufficient memory for RDMA queue.\n"); + goto cleanup; + } + + resources->cmds_mr = ibv_reg_mr(opts->pd, resources->cmds, + opts->max_queue_depth * sizeof(*resources->cmds), + IBV_ACCESS_LOCAL_WRITE); + resources->cpls_mr = ibv_reg_mr(opts->pd, resources->cpls, + opts->max_queue_depth * sizeof(*resources->cpls), + 0); + + if (opts->in_capsule_data_size) { + resources->bufs_mr = ibv_reg_mr(opts->pd, resources->bufs, + opts->max_queue_depth * + opts->in_capsule_data_size, + IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + } + + if (!resources->cmds_mr || !resources->cpls_mr || + (opts->in_capsule_data_size && + !resources->bufs_mr)) { + goto cleanup; + } + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Command Array: %p Length: %lx LKey: %x\n", + resources->cmds, opts->max_queue_depth * sizeof(*resources->cmds), + resources->cmds_mr->lkey); + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Completion Array: %p Length: %lx LKey: %x\n", + resources->cpls, opts->max_queue_depth * sizeof(*resources->cpls), + resources->cpls_mr->lkey); + if (resources->bufs && resources->bufs_mr) { + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "In Capsule Data Array: %p Length: %x LKey: %x\n", + resources->bufs, opts->max_queue_depth * + opts->in_capsule_data_size, resources->bufs_mr->lkey); + } + + /* Initialize queues */ + STAILQ_INIT(&resources->incoming_queue); + STAILQ_INIT(&resources->free_queue); + + for (i = 0; i < opts->max_queue_depth; i++) { + struct ibv_recv_wr *bad_wr = NULL; + + rdma_recv = &resources->recvs[i]; + rdma_recv->qpair = opts->qpair; + + /* Set up memory to receive commands */ + if (resources->bufs) { + rdma_recv->buf = (void *)((uintptr_t)resources->bufs + (i * + opts->in_capsule_data_size)); + } + + rdma_recv->rdma_wr.type = RDMA_WR_TYPE_RECV; + + rdma_recv->sgl[0].addr = (uintptr_t)&resources->cmds[i]; + rdma_recv->sgl[0].length = sizeof(resources->cmds[i]); + rdma_recv->sgl[0].lkey = resources->cmds_mr->lkey; + rdma_recv->wr.num_sge = 1; + + if (rdma_recv->buf && resources->bufs_mr) { + rdma_recv->sgl[1].addr = (uintptr_t)rdma_recv->buf; + rdma_recv->sgl[1].length = opts->in_capsule_data_size; + rdma_recv->sgl[1].lkey = resources->bufs_mr->lkey; + rdma_recv->wr.num_sge++; + } + + rdma_recv->wr.wr_id = (uintptr_t)&rdma_recv->rdma_wr; + rdma_recv->wr.sg_list = rdma_recv->sgl; + if (opts->shared) { + srq = (struct ibv_srq *)opts->qp; + rc = ibv_post_srq_recv(srq, &rdma_recv->wr, &bad_wr); + } else { + qp = (struct ibv_qp *)opts->qp; + rc = ibv_post_recv(qp, &rdma_recv->wr, &bad_wr); + } + if (rc) { + goto cleanup; + } + } + + for (i = 0; i < opts->max_queue_depth; i++) { + rdma_req = &resources->reqs[i]; + + if (opts->qpair != NULL) { + rdma_req->req.qpair = &opts->qpair->qpair; + } else { + rdma_req->req.qpair = NULL; + } + rdma_req->req.cmd = NULL; + + /* Set up memory to send responses */ + rdma_req->req.rsp = &resources->cpls[i]; + + rdma_req->rsp.sgl[0].addr = (uintptr_t)&resources->cpls[i]; + rdma_req->rsp.sgl[0].length = sizeof(resources->cpls[i]); + rdma_req->rsp.sgl[0].lkey = resources->cpls_mr->lkey; + + rdma_req->rsp.rdma_wr.type = RDMA_WR_TYPE_SEND; + rdma_req->rsp.wr.wr_id = (uintptr_t)&rdma_req->rsp.rdma_wr; + rdma_req->rsp.wr.next = NULL; + rdma_req->rsp.wr.opcode = IBV_WR_SEND; + rdma_req->rsp.wr.send_flags = IBV_SEND_SIGNALED; + rdma_req->rsp.wr.sg_list = rdma_req->rsp.sgl; + rdma_req->rsp.wr.num_sge = SPDK_COUNTOF(rdma_req->rsp.sgl); + + /* Set up memory for data buffers */ + rdma_req->data.rdma_wr.type = RDMA_WR_TYPE_DATA; + rdma_req->data.wr.wr_id = (uintptr_t)&rdma_req->data.rdma_wr; + rdma_req->data.wr.next = NULL; + rdma_req->data.wr.send_flags = IBV_SEND_SIGNALED; + rdma_req->data.wr.sg_list = rdma_req->data.sgl; + rdma_req->data.wr.num_sge = SPDK_COUNTOF(rdma_req->data.sgl); + + /* Initialize request state to FREE */ + rdma_req->state = RDMA_REQUEST_STATE_FREE; + STAILQ_INSERT_TAIL(&resources->free_queue, rdma_req, state_link); + } + + return resources; + +cleanup: + nvmf_rdma_resources_destroy(resources); + return NULL; +} + +static void +nvmf_rdma_qpair_clean_ibv_events(struct spdk_nvmf_rdma_qpair *rqpair) +{ + struct spdk_nvmf_rdma_ibv_event_ctx *ctx, *tctx; + STAILQ_FOREACH_SAFE(ctx, &rqpair->ibv_events, link, tctx) { + ctx->rqpair = NULL; + /* Memory allocated for ctx is freed in nvmf_rdma_qpair_process_ibv_event */ + STAILQ_REMOVE(&rqpair->ibv_events, ctx, spdk_nvmf_rdma_ibv_event_ctx, link); + } +} + +static void +nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair) +{ + struct spdk_nvmf_rdma_recv *rdma_recv, *recv_tmp; + struct ibv_recv_wr *bad_recv_wr = NULL; + int rc; + + spdk_trace_record(TRACE_RDMA_QP_DESTROY, 0, 0, (uintptr_t)rqpair->cm_id, 0); + + spdk_poller_unregister(&rqpair->destruct_poller); + + if (rqpair->qd != 0) { + struct spdk_nvmf_qpair *qpair = &rqpair->qpair; + struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(qpair->transport, + struct spdk_nvmf_rdma_transport, transport); + struct spdk_nvmf_rdma_request *req; + uint32_t i, max_req_count = 0; + + SPDK_WARNLOG("Destroying qpair when queue depth is %d\n", rqpair->qd); + + if (rqpair->srq == NULL) { + nvmf_rdma_dump_qpair_contents(rqpair); + max_req_count = rqpair->max_queue_depth; + } else if (rqpair->poller && rqpair->resources) { + max_req_count = rqpair->poller->max_srq_depth; + } + + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Release incomplete requests\n"); + for (i = 0; i < max_req_count; i++) { + req = &rqpair->resources->reqs[i]; + if (req->req.qpair == qpair && req->state != RDMA_REQUEST_STATE_FREE) { + /* nvmf_rdma_request_process checks qpair ibv and internal state + * and completes a request */ + nvmf_rdma_request_process(rtransport, req); + } + } + assert(rqpair->qd == 0); + } + + if (rqpair->poller) { + TAILQ_REMOVE(&rqpair->poller->qpairs, rqpair, link); + + if (rqpair->srq != NULL && rqpair->resources != NULL) { + /* Drop all received but unprocessed commands for this queue and return them to SRQ */ + STAILQ_FOREACH_SAFE(rdma_recv, &rqpair->resources->incoming_queue, link, recv_tmp) { + if (rqpair == rdma_recv->qpair) { + STAILQ_REMOVE(&rqpair->resources->incoming_queue, rdma_recv, spdk_nvmf_rdma_recv, link); + rc = ibv_post_srq_recv(rqpair->srq, &rdma_recv->wr, &bad_recv_wr); + if (rc) { + SPDK_ERRLOG("Unable to re-post rx descriptor\n"); + } + } + } + } + } + + if (rqpair->cm_id) { + if (rqpair->rdma_qp != NULL) { + spdk_rdma_qp_destroy(rqpair->rdma_qp); + rqpair->rdma_qp = NULL; + } + rdma_destroy_id(rqpair->cm_id); + + if (rqpair->poller != NULL && rqpair->srq == NULL) { + rqpair->poller->required_num_wr -= MAX_WR_PER_QP(rqpair->max_queue_depth); + } + } + + if (rqpair->srq == NULL && rqpair->resources != NULL) { + nvmf_rdma_resources_destroy(rqpair->resources); + } + + nvmf_rdma_qpair_clean_ibv_events(rqpair); + + if (rqpair->destruct_channel) { + spdk_put_io_channel(rqpair->destruct_channel); + rqpair->destruct_channel = NULL; + } + + free(rqpair); +} + +static int +nvmf_rdma_resize_cq(struct spdk_nvmf_rdma_qpair *rqpair, struct spdk_nvmf_rdma_device *device) +{ + struct spdk_nvmf_rdma_poller *rpoller; + int rc, num_cqe, required_num_wr; + + /* Enlarge CQ size dynamically */ + rpoller = rqpair->poller; + required_num_wr = rpoller->required_num_wr + MAX_WR_PER_QP(rqpair->max_queue_depth); + num_cqe = rpoller->num_cqe; + if (num_cqe < required_num_wr) { + num_cqe = spdk_max(num_cqe * 2, required_num_wr); + num_cqe = spdk_min(num_cqe, device->attr.max_cqe); + } + + if (rpoller->num_cqe != num_cqe) { + if (required_num_wr > device->attr.max_cqe) { + SPDK_ERRLOG("RDMA CQE requirement (%d) exceeds device max_cqe limitation (%d)\n", + required_num_wr, device->attr.max_cqe); + return -1; + } + + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Resize RDMA CQ from %d to %d\n", rpoller->num_cqe, num_cqe); + rc = ibv_resize_cq(rpoller->cq, num_cqe); + if (rc) { + SPDK_ERRLOG("RDMA CQ resize failed: errno %d: %s\n", errno, spdk_strerror(errno)); + return -1; + } + + rpoller->num_cqe = num_cqe; + } + + rpoller->required_num_wr = required_num_wr; + return 0; +} + +static int +nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair) +{ + struct spdk_nvmf_rdma_qpair *rqpair; + struct spdk_nvmf_rdma_transport *rtransport; + struct spdk_nvmf_transport *transport; + struct spdk_nvmf_rdma_resource_opts opts; + struct spdk_nvmf_rdma_device *device; + struct spdk_rdma_qp_init_attr qp_init_attr = {}; + + rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); + device = rqpair->device; + + qp_init_attr.qp_context = rqpair; + qp_init_attr.pd = device->pd; + qp_init_attr.send_cq = rqpair->poller->cq; + qp_init_attr.recv_cq = rqpair->poller->cq; + + if (rqpair->srq) { + qp_init_attr.srq = rqpair->srq; + } else { + qp_init_attr.cap.max_recv_wr = rqpair->max_queue_depth; + } + + /* SEND, READ, and WRITE operations */ + qp_init_attr.cap.max_send_wr = (uint32_t)rqpair->max_queue_depth * 2; + qp_init_attr.cap.max_send_sge = spdk_min((uint32_t)device->attr.max_sge, NVMF_DEFAULT_TX_SGE); + qp_init_attr.cap.max_recv_sge = spdk_min((uint32_t)device->attr.max_sge, NVMF_DEFAULT_RX_SGE); + + if (rqpair->srq == NULL && nvmf_rdma_resize_cq(rqpair, device) < 0) { + SPDK_ERRLOG("Failed to resize the completion queue. Cannot initialize qpair.\n"); + goto error; + } + + rqpair->rdma_qp = spdk_rdma_qp_create(rqpair->cm_id, &qp_init_attr); + if (!rqpair->rdma_qp) { + goto error; + } + + rqpair->max_send_depth = spdk_min((uint32_t)(rqpair->max_queue_depth * 2), + qp_init_attr.cap.max_send_wr); + rqpair->max_send_sge = spdk_min(NVMF_DEFAULT_TX_SGE, qp_init_attr.cap.max_send_sge); + rqpair->max_recv_sge = spdk_min(NVMF_DEFAULT_RX_SGE, qp_init_attr.cap.max_recv_sge); + spdk_trace_record(TRACE_RDMA_QP_CREATE, 0, 0, (uintptr_t)rqpair->cm_id, 0); + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "New RDMA Connection: %p\n", qpair); + + if (rqpair->poller->srq == NULL) { + rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport); + transport = &rtransport->transport; + + opts.qp = rqpair->rdma_qp->qp; + opts.pd = rqpair->cm_id->pd; + opts.qpair = rqpair; + opts.shared = false; + opts.max_queue_depth = rqpair->max_queue_depth; + opts.in_capsule_data_size = transport->opts.in_capsule_data_size; + + rqpair->resources = nvmf_rdma_resources_create(&opts); + + if (!rqpair->resources) { + SPDK_ERRLOG("Unable to allocate resources for receive queue.\n"); + rdma_destroy_qp(rqpair->cm_id); + goto error; + } + } else { + rqpair->resources = rqpair->poller->resources; + } + + rqpair->current_recv_depth = 0; + STAILQ_INIT(&rqpair->pending_rdma_read_queue); + STAILQ_INIT(&rqpair->pending_rdma_write_queue); + + return 0; + +error: + rdma_destroy_id(rqpair->cm_id); + rqpair->cm_id = NULL; + return -1; +} + +/* Append the given recv wr structure to the resource structs outstanding recvs list. */ +/* This function accepts either a single wr or the first wr in a linked list. */ +static void +nvmf_rdma_qpair_queue_recv_wrs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_recv_wr *first) +{ + struct ibv_recv_wr *last; + + last = first; + while (last->next != NULL) { + last = last->next; + } + + if (rqpair->resources->recvs_to_post.first == NULL) { + rqpair->resources->recvs_to_post.first = first; + rqpair->resources->recvs_to_post.last = last; + if (rqpair->srq == NULL) { + STAILQ_INSERT_TAIL(&rqpair->poller->qpairs_pending_recv, rqpair, recv_link); + } + } else { + rqpair->resources->recvs_to_post.last->next = first; + rqpair->resources->recvs_to_post.last = last; + } +} + +static int +request_transfer_in(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_rdma_request *rdma_req; + struct spdk_nvmf_qpair *qpair; + struct spdk_nvmf_rdma_qpair *rqpair; + + qpair = req->qpair; + rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req); + rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); + + assert(req->xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER); + assert(rdma_req != NULL); + + if (spdk_rdma_qp_queue_send_wrs(rqpair->rdma_qp, &rdma_req->data.wr)) { + STAILQ_INSERT_TAIL(&rqpair->poller->qpairs_pending_send, rqpair, send_link); + } + + rqpair->current_read_depth += rdma_req->num_outstanding_data_wr; + rqpair->current_send_depth += rdma_req->num_outstanding_data_wr; + return 0; +} + +static int +request_transfer_out(struct spdk_nvmf_request *req, int *data_posted) +{ + int num_outstanding_data_wr = 0; + struct spdk_nvmf_rdma_request *rdma_req; + struct spdk_nvmf_qpair *qpair; + struct spdk_nvmf_rdma_qpair *rqpair; + struct spdk_nvme_cpl *rsp; + struct ibv_send_wr *first = NULL; + + *data_posted = 0; + qpair = req->qpair; + rsp = &req->rsp->nvme_cpl; + rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req); + rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); + + /* Advance our sq_head pointer */ + if (qpair->sq_head == qpair->sq_head_max) { + qpair->sq_head = 0; + } else { + qpair->sq_head++; + } + rsp->sqhd = qpair->sq_head; + + /* queue the capsule for the recv buffer */ + assert(rdma_req->recv != NULL); + + nvmf_rdma_qpair_queue_recv_wrs(rqpair, &rdma_req->recv->wr); + + rdma_req->recv = NULL; + assert(rqpair->current_recv_depth > 0); + rqpair->current_recv_depth--; + + /* Build the response which consists of optional + * RDMA WRITEs to transfer data, plus an RDMA SEND + * containing the response. + */ + first = &rdma_req->rsp.wr; + + if (rsp->status.sc != SPDK_NVME_SC_SUCCESS) { + /* On failure, data was not read from the controller. So clear the + * number of outstanding data WRs to zero. + */ + rdma_req->num_outstanding_data_wr = 0; + } else if (req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { + first = &rdma_req->data.wr; + *data_posted = 1; + num_outstanding_data_wr = rdma_req->num_outstanding_data_wr; + } + if (spdk_rdma_qp_queue_send_wrs(rqpair->rdma_qp, first)) { + STAILQ_INSERT_TAIL(&rqpair->poller->qpairs_pending_send, rqpair, send_link); + } + + /* +1 for the rsp wr */ + rqpair->current_send_depth += num_outstanding_data_wr + 1; + + return 0; +} + +static int +nvmf_rdma_event_accept(struct rdma_cm_id *id, struct spdk_nvmf_rdma_qpair *rqpair) +{ + struct spdk_nvmf_rdma_accept_private_data accept_data; + struct rdma_conn_param ctrlr_event_data = {}; + int rc; + + accept_data.recfmt = 0; + accept_data.crqsize = rqpair->max_queue_depth; + + ctrlr_event_data.private_data = &accept_data; + ctrlr_event_data.private_data_len = sizeof(accept_data); + if (id->ps == RDMA_PS_TCP) { + ctrlr_event_data.responder_resources = 0; /* We accept 0 reads from the host */ + ctrlr_event_data.initiator_depth = rqpair->max_read_depth; + } + + /* Configure infinite retries for the initiator side qpair. + * When using a shared receive queue on the target side, + * we need to pass this value to the initiator to prevent the + * initiator side NIC from completing SEND requests back to the + * initiator with status rnr_retry_count_exceeded. */ + if (rqpair->srq != NULL) { + ctrlr_event_data.rnr_retry_count = 0x7; + } + + /* When qpair is created without use of rdma cm API, an additional + * information must be provided to initiator in the connection response: + * whether qpair is using SRQ and its qp_num + * Fields below are ignored by rdma cm if qpair has been + * created using rdma cm API. */ + ctrlr_event_data.srq = rqpair->srq ? 1 : 0; + ctrlr_event_data.qp_num = rqpair->rdma_qp->qp->qp_num; + + rc = spdk_rdma_qp_accept(rqpair->rdma_qp, &ctrlr_event_data); + if (rc) { + SPDK_ERRLOG("Error %d on spdk_rdma_qp_accept\n", errno); + } else { + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Sent back the accept\n"); + } + + return rc; +} + +static void +nvmf_rdma_event_reject(struct rdma_cm_id *id, enum spdk_nvmf_rdma_transport_error error) +{ + struct spdk_nvmf_rdma_reject_private_data rej_data; + + rej_data.recfmt = 0; + rej_data.sts = error; + + rdma_reject(id, &rej_data, sizeof(rej_data)); +} + +static int +nvmf_rdma_connect(struct spdk_nvmf_transport *transport, struct rdma_cm_event *event) +{ + struct spdk_nvmf_rdma_transport *rtransport; + struct spdk_nvmf_rdma_qpair *rqpair = NULL; + struct spdk_nvmf_rdma_port *port; + struct rdma_conn_param *rdma_param = NULL; + const struct spdk_nvmf_rdma_request_private_data *private_data = NULL; + uint16_t max_queue_depth; + uint16_t max_read_depth; + + rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); + + assert(event->id != NULL); /* Impossible. Can't even reject the connection. */ + assert(event->id->verbs != NULL); /* Impossible. No way to handle this. */ + + rdma_param = &event->param.conn; + if (rdma_param->private_data == NULL || + rdma_param->private_data_len < sizeof(struct spdk_nvmf_rdma_request_private_data)) { + SPDK_ERRLOG("connect request: no private data provided\n"); + nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_INVALID_PRIVATE_DATA_LENGTH); + return -1; + } + + private_data = rdma_param->private_data; + if (private_data->recfmt != 0) { + SPDK_ERRLOG("Received RDMA private data with RECFMT != 0\n"); + nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_INVALID_RECFMT); + return -1; + } + + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Connect Recv on fabric intf name %s, dev_name %s\n", + event->id->verbs->device->name, event->id->verbs->device->dev_name); + + port = event->listen_id->context; + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Listen Id was %p with verbs %p. ListenAddr: %p\n", + event->listen_id, event->listen_id->verbs, port); + + /* Figure out the supported queue depth. This is a multi-step process + * that takes into account hardware maximums, host provided values, + * and our target's internal memory limits */ + + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Calculating Queue Depth\n"); + + /* Start with the maximum queue depth allowed by the target */ + max_queue_depth = rtransport->transport.opts.max_queue_depth; + max_read_depth = rtransport->transport.opts.max_queue_depth; + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Target Max Queue Depth: %d\n", + rtransport->transport.opts.max_queue_depth); + + /* Next check the local NIC's hardware limitations */ + SPDK_DEBUGLOG(SPDK_LOG_RDMA, + "Local NIC Max Send/Recv Queue Depth: %d Max Read/Write Queue Depth: %d\n", + port->device->attr.max_qp_wr, port->device->attr.max_qp_rd_atom); + max_queue_depth = spdk_min(max_queue_depth, port->device->attr.max_qp_wr); + max_read_depth = spdk_min(max_read_depth, port->device->attr.max_qp_init_rd_atom); + + /* Next check the remote NIC's hardware limitations */ + SPDK_DEBUGLOG(SPDK_LOG_RDMA, + "Host (Initiator) NIC Max Incoming RDMA R/W operations: %d Max Outgoing RDMA R/W operations: %d\n", + rdma_param->initiator_depth, rdma_param->responder_resources); + if (rdma_param->initiator_depth > 0) { + max_read_depth = spdk_min(max_read_depth, rdma_param->initiator_depth); + } + + /* Finally check for the host software requested values, which are + * optional. */ + if (rdma_param->private_data != NULL && + rdma_param->private_data_len >= sizeof(struct spdk_nvmf_rdma_request_private_data)) { + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Host Receive Queue Size: %d\n", private_data->hrqsize); + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Host Send Queue Size: %d\n", private_data->hsqsize); + max_queue_depth = spdk_min(max_queue_depth, private_data->hrqsize); + max_queue_depth = spdk_min(max_queue_depth, private_data->hsqsize + 1); + } + + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Final Negotiated Queue Depth: %d R/W Depth: %d\n", + max_queue_depth, max_read_depth); + + rqpair = calloc(1, sizeof(struct spdk_nvmf_rdma_qpair)); + if (rqpair == NULL) { + SPDK_ERRLOG("Could not allocate new connection.\n"); + nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_NO_RESOURCES); + return -1; + } + + rqpair->device = port->device; + rqpair->max_queue_depth = max_queue_depth; + rqpair->max_read_depth = max_read_depth; + rqpair->cm_id = event->id; + rqpair->listen_id = event->listen_id; + rqpair->qpair.transport = transport; + STAILQ_INIT(&rqpair->ibv_events); + /* use qid from the private data to determine the qpair type + qid will be set to the appropriate value when the controller is created */ + rqpair->qpair.qid = private_data->qid; + + event->id->context = &rqpair->qpair; + + spdk_nvmf_tgt_new_qpair(transport->tgt, &rqpair->qpair); + + return 0; +} + +static int +nvmf_rdma_mem_notify(void *cb_ctx, struct spdk_mem_map *map, + enum spdk_mem_map_notify_action action, + void *vaddr, size_t size) +{ + struct ibv_pd *pd = cb_ctx; + struct ibv_mr *mr; + int rc; + + switch (action) { + case SPDK_MEM_MAP_NOTIFY_REGISTER: + if (!g_nvmf_hooks.get_rkey) { + mr = ibv_reg_mr(pd, vaddr, size, + IBV_ACCESS_LOCAL_WRITE | + IBV_ACCESS_REMOTE_READ | + IBV_ACCESS_REMOTE_WRITE); + if (mr == NULL) { + SPDK_ERRLOG("ibv_reg_mr() failed\n"); + return -1; + } else { + rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr); + } + } else { + rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, + g_nvmf_hooks.get_rkey(pd, vaddr, size)); + } + break; + case SPDK_MEM_MAP_NOTIFY_UNREGISTER: + if (!g_nvmf_hooks.get_rkey) { + mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL); + if (mr) { + ibv_dereg_mr(mr); + } + } + rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size); + break; + default: + SPDK_UNREACHABLE(); + } + + return rc; +} + +static int +nvmf_rdma_check_contiguous_entries(uint64_t addr_1, uint64_t addr_2) +{ + /* Two contiguous mappings will point to the same address which is the start of the RDMA MR. */ + return addr_1 == addr_2; +} + +static inline void +nvmf_rdma_setup_wr(struct ibv_send_wr *wr, struct ibv_send_wr *next, + enum spdk_nvme_data_transfer xfer) +{ + if (xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { + wr->opcode = IBV_WR_RDMA_WRITE; + wr->send_flags = 0; + wr->next = next; + } else if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { + wr->opcode = IBV_WR_RDMA_READ; + wr->send_flags = IBV_SEND_SIGNALED; + wr->next = NULL; + } else { + assert(0); + } +} + +static int +nvmf_request_alloc_wrs(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_request *rdma_req, + uint32_t num_sgl_descriptors) +{ + struct spdk_nvmf_rdma_request_data *work_requests[SPDK_NVMF_MAX_SGL_ENTRIES]; + struct spdk_nvmf_rdma_request_data *current_data_wr; + uint32_t i; + + if (num_sgl_descriptors > SPDK_NVMF_MAX_SGL_ENTRIES) { + SPDK_ERRLOG("Requested too much entries (%u), the limit is %u\n", + num_sgl_descriptors, SPDK_NVMF_MAX_SGL_ENTRIES); + return -EINVAL; + } + + if (spdk_mempool_get_bulk(rtransport->data_wr_pool, (void **)work_requests, num_sgl_descriptors)) { + return -ENOMEM; + } + + current_data_wr = &rdma_req->data; + + for (i = 0; i < num_sgl_descriptors; i++) { + nvmf_rdma_setup_wr(¤t_data_wr->wr, &work_requests[i]->wr, rdma_req->req.xfer); + current_data_wr->wr.next = &work_requests[i]->wr; + current_data_wr = work_requests[i]; + current_data_wr->wr.sg_list = current_data_wr->sgl; + current_data_wr->wr.wr_id = rdma_req->data.wr.wr_id; + } + + nvmf_rdma_setup_wr(¤t_data_wr->wr, &rdma_req->rsp.wr, rdma_req->req.xfer); + + return 0; +} + +static inline void +nvmf_rdma_setup_request(struct spdk_nvmf_rdma_request *rdma_req) +{ + struct ibv_send_wr *wr = &rdma_req->data.wr; + struct spdk_nvme_sgl_descriptor *sgl = &rdma_req->req.cmd->nvme_cmd.dptr.sgl1; + + wr->wr.rdma.rkey = sgl->keyed.key; + wr->wr.rdma.remote_addr = sgl->address; + nvmf_rdma_setup_wr(wr, &rdma_req->rsp.wr, rdma_req->req.xfer); +} + +static inline void +nvmf_rdma_update_remote_addr(struct spdk_nvmf_rdma_request *rdma_req, uint32_t num_wrs) +{ + struct ibv_send_wr *wr = &rdma_req->data.wr; + struct spdk_nvme_sgl_descriptor *sgl = &rdma_req->req.cmd->nvme_cmd.dptr.sgl1; + uint32_t i; + int j; + uint64_t remote_addr_offset = 0; + + for (i = 0; i < num_wrs; ++i) { + wr->wr.rdma.rkey = sgl->keyed.key; + wr->wr.rdma.remote_addr = sgl->address + remote_addr_offset; + for (j = 0; j < wr->num_sge; ++j) { + remote_addr_offset += wr->sg_list[j].length; + } + wr = wr->next; + } +} + +/* This function is used in the rare case that we have a buffer split over multiple memory regions. */ +static int +nvmf_rdma_replace_buffer(struct spdk_nvmf_rdma_poll_group *rgroup, void **buf) +{ + struct spdk_nvmf_transport_poll_group *group = &rgroup->group; + struct spdk_nvmf_transport *transport = group->transport; + struct spdk_nvmf_transport_pg_cache_buf *old_buf; + void *new_buf; + + if (!(STAILQ_EMPTY(&group->buf_cache))) { + group->buf_cache_count--; + new_buf = STAILQ_FIRST(&group->buf_cache); + STAILQ_REMOVE_HEAD(&group->buf_cache, link); + assert(*buf != NULL); + } else { + new_buf = spdk_mempool_get(transport->data_buf_pool); + } + + if (*buf == NULL) { + return -ENOMEM; + } + + old_buf = *buf; + STAILQ_INSERT_HEAD(&rgroup->retired_bufs, old_buf, link); + *buf = new_buf; + return 0; +} + +static bool +nvmf_rdma_get_lkey(struct spdk_nvmf_rdma_device *device, struct iovec *iov, + uint32_t *_lkey) +{ + uint64_t translation_len; + uint32_t lkey; + + translation_len = iov->iov_len; + + if (!g_nvmf_hooks.get_rkey) { + lkey = ((struct ibv_mr *)spdk_mem_map_translate(device->map, + (uint64_t)iov->iov_base, &translation_len))->lkey; + } else { + lkey = spdk_mem_map_translate(device->map, + (uint64_t)iov->iov_base, &translation_len); + } + + if (spdk_unlikely(translation_len < iov->iov_len)) { + return false; + } + + *_lkey = lkey; + return true; +} + +static bool +nvmf_rdma_fill_wr_sge(struct spdk_nvmf_rdma_device *device, + struct iovec *iov, struct ibv_send_wr **_wr, + uint32_t *_remaining_data_block, uint32_t *_offset, + uint32_t *_num_extra_wrs, + const struct spdk_dif_ctx *dif_ctx) +{ + struct ibv_send_wr *wr = *_wr; + struct ibv_sge *sg_ele = &wr->sg_list[wr->num_sge]; + uint32_t lkey = 0; + uint32_t remaining, data_block_size, md_size, sge_len; + + if (spdk_unlikely(!nvmf_rdma_get_lkey(device, iov, &lkey))) { + /* This is a very rare case that can occur when using DPDK version < 19.05 */ + SPDK_ERRLOG("Data buffer split over multiple RDMA Memory Regions. Removing it from circulation.\n"); + return false; + } + + if (spdk_likely(!dif_ctx)) { + sg_ele->lkey = lkey; + sg_ele->addr = (uintptr_t)(iov->iov_base); + sg_ele->length = iov->iov_len; + wr->num_sge++; + } else { + remaining = iov->iov_len - *_offset; + data_block_size = dif_ctx->block_size - dif_ctx->md_size; + md_size = dif_ctx->md_size; + + while (remaining) { + if (wr->num_sge >= SPDK_NVMF_MAX_SGL_ENTRIES) { + if (*_num_extra_wrs > 0 && wr->next) { + *_wr = wr->next; + wr = *_wr; + wr->num_sge = 0; + sg_ele = &wr->sg_list[wr->num_sge]; + (*_num_extra_wrs)--; + } else { + break; + } + } + sg_ele->lkey = lkey; + sg_ele->addr = (uintptr_t)((char *)iov->iov_base + *_offset); + sge_len = spdk_min(remaining, *_remaining_data_block); + sg_ele->length = sge_len; + remaining -= sge_len; + *_remaining_data_block -= sge_len; + *_offset += sge_len; + + sg_ele++; + wr->num_sge++; + + if (*_remaining_data_block == 0) { + /* skip metadata */ + *_offset += md_size; + /* Metadata that do not fit this IO buffer will be included in the next IO buffer */ + remaining -= spdk_min(remaining, md_size); + *_remaining_data_block = data_block_size; + } + + if (remaining == 0) { + /* By subtracting the size of the last IOV from the offset, we ensure that we skip + the remaining metadata bits at the beginning of the next buffer */ + *_offset -= iov->iov_len; + } + } + } + + return true; +} + +static int +nvmf_rdma_fill_wr_sgl(struct spdk_nvmf_rdma_poll_group *rgroup, + struct spdk_nvmf_rdma_device *device, + struct spdk_nvmf_rdma_request *rdma_req, + struct ibv_send_wr *wr, + uint32_t length, + uint32_t num_extra_wrs) +{ + struct spdk_nvmf_request *req = &rdma_req->req; + struct spdk_dif_ctx *dif_ctx = NULL; + uint32_t remaining_data_block = 0; + uint32_t offset = 0; + + if (spdk_unlikely(rdma_req->req.dif.dif_insert_or_strip)) { + dif_ctx = &rdma_req->req.dif.dif_ctx; + remaining_data_block = dif_ctx->block_size - dif_ctx->md_size; + } + + wr->num_sge = 0; + + while (length && (num_extra_wrs || wr->num_sge < SPDK_NVMF_MAX_SGL_ENTRIES)) { + while (spdk_unlikely(!nvmf_rdma_fill_wr_sge(device, &req->iov[rdma_req->iovpos], &wr, + &remaining_data_block, &offset, &num_extra_wrs, dif_ctx))) { + if (nvmf_rdma_replace_buffer(rgroup, &req->buffers[rdma_req->iovpos]) == -ENOMEM) { + return -ENOMEM; + } + req->iov[rdma_req->iovpos].iov_base = (void *)((uintptr_t)(req->buffers[rdma_req->iovpos] + + NVMF_DATA_BUFFER_MASK) & + ~NVMF_DATA_BUFFER_MASK); + } + + length -= req->iov[rdma_req->iovpos].iov_len; + rdma_req->iovpos++; + } + + if (length) { + SPDK_ERRLOG("Not enough SG entries to hold data buffer\n"); + return -EINVAL; + } + + return 0; +} + +static inline uint32_t +nvmf_rdma_calc_num_wrs(uint32_t length, uint32_t io_unit_size, uint32_t block_size) +{ + /* estimate the number of SG entries and WRs needed to process the request */ + uint32_t num_sge = 0; + uint32_t i; + uint32_t num_buffers = SPDK_CEIL_DIV(length, io_unit_size); + + for (i = 0; i < num_buffers && length > 0; i++) { + uint32_t buffer_len = spdk_min(length, io_unit_size); + uint32_t num_sge_in_block = SPDK_CEIL_DIV(buffer_len, block_size); + + if (num_sge_in_block * block_size > buffer_len) { + ++num_sge_in_block; + } + num_sge += num_sge_in_block; + length -= buffer_len; + } + return SPDK_CEIL_DIV(num_sge, SPDK_NVMF_MAX_SGL_ENTRIES); +} + +static int +nvmf_rdma_request_fill_iovs(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_device *device, + struct spdk_nvmf_rdma_request *rdma_req, + uint32_t length) +{ + struct spdk_nvmf_rdma_qpair *rqpair; + struct spdk_nvmf_rdma_poll_group *rgroup; + struct spdk_nvmf_request *req = &rdma_req->req; + struct ibv_send_wr *wr = &rdma_req->data.wr; + int rc; + uint32_t num_wrs = 1; + + rqpair = SPDK_CONTAINEROF(req->qpair, struct spdk_nvmf_rdma_qpair, qpair); + rgroup = rqpair->poller->group; + + /* rdma wr specifics */ + nvmf_rdma_setup_request(rdma_req); + + rc = spdk_nvmf_request_get_buffers(req, &rgroup->group, &rtransport->transport, + length); + if (rc != 0) { + return rc; + } + + assert(req->iovcnt <= rqpair->max_send_sge); + + rdma_req->iovpos = 0; + + if (spdk_unlikely(req->dif.dif_insert_or_strip)) { + num_wrs = nvmf_rdma_calc_num_wrs(length, rtransport->transport.opts.io_unit_size, + req->dif.dif_ctx.block_size); + if (num_wrs > 1) { + rc = nvmf_request_alloc_wrs(rtransport, rdma_req, num_wrs - 1); + if (rc != 0) { + goto err_exit; + } + } + } + + rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, wr, length, num_wrs - 1); + if (spdk_unlikely(rc != 0)) { + goto err_exit; + } + + if (spdk_unlikely(num_wrs > 1)) { + nvmf_rdma_update_remote_addr(rdma_req, num_wrs); + } + + /* set the number of outstanding data WRs for this request. */ + rdma_req->num_outstanding_data_wr = num_wrs; + + return rc; + +err_exit: + spdk_nvmf_request_free_buffers(req, &rgroup->group, &rtransport->transport); + nvmf_rdma_request_free_data(rdma_req, rtransport); + req->iovcnt = 0; + return rc; +} + +static int +nvmf_rdma_request_fill_iovs_multi_sgl(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_device *device, + struct spdk_nvmf_rdma_request *rdma_req) +{ + struct spdk_nvmf_rdma_qpair *rqpair; + struct spdk_nvmf_rdma_poll_group *rgroup; + struct ibv_send_wr *current_wr; + struct spdk_nvmf_request *req = &rdma_req->req; + struct spdk_nvme_sgl_descriptor *inline_segment, *desc; + uint32_t num_sgl_descriptors; + uint32_t lengths[SPDK_NVMF_MAX_SGL_ENTRIES]; + uint32_t i; + int rc; + + rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair); + rgroup = rqpair->poller->group; + + inline_segment = &req->cmd->nvme_cmd.dptr.sgl1; + assert(inline_segment->generic.type == SPDK_NVME_SGL_TYPE_LAST_SEGMENT); + assert(inline_segment->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET); + + num_sgl_descriptors = inline_segment->unkeyed.length / sizeof(struct spdk_nvme_sgl_descriptor); + assert(num_sgl_descriptors <= SPDK_NVMF_MAX_SGL_ENTRIES); + + if (nvmf_request_alloc_wrs(rtransport, rdma_req, num_sgl_descriptors - 1) != 0) { + return -ENOMEM; + } + + desc = (struct spdk_nvme_sgl_descriptor *)rdma_req->recv->buf + inline_segment->address; + for (i = 0; i < num_sgl_descriptors; i++) { + if (spdk_likely(!req->dif.dif_insert_or_strip)) { + lengths[i] = desc->keyed.length; + } else { + req->dif.orig_length += desc->keyed.length; + lengths[i] = spdk_dif_get_length_with_md(desc->keyed.length, &req->dif.dif_ctx); + req->dif.elba_length += lengths[i]; + } + desc++; + } + + rc = spdk_nvmf_request_get_buffers_multi(req, &rgroup->group, &rtransport->transport, + lengths, num_sgl_descriptors); + if (rc != 0) { + nvmf_rdma_request_free_data(rdma_req, rtransport); + return rc; + } + + /* The first WR must always be the embedded data WR. This is how we unwind them later. */ + current_wr = &rdma_req->data.wr; + assert(current_wr != NULL); + + req->length = 0; + rdma_req->iovpos = 0; + desc = (struct spdk_nvme_sgl_descriptor *)rdma_req->recv->buf + inline_segment->address; + for (i = 0; i < num_sgl_descriptors; i++) { + /* The descriptors must be keyed data block descriptors with an address, not an offset. */ + if (spdk_unlikely(desc->generic.type != SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK || + desc->keyed.subtype != SPDK_NVME_SGL_SUBTYPE_ADDRESS)) { + rc = -EINVAL; + goto err_exit; + } + + current_wr->num_sge = 0; + + rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, current_wr, lengths[i], 0); + if (rc != 0) { + rc = -ENOMEM; + goto err_exit; + } + + req->length += desc->keyed.length; + current_wr->wr.rdma.rkey = desc->keyed.key; + current_wr->wr.rdma.remote_addr = desc->address; + current_wr = current_wr->next; + desc++; + } + +#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL + /* Go back to the last descriptor in the list. */ + desc--; + if ((device->attr.device_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) != 0) { + if (desc->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY) { + rdma_req->rsp.wr.opcode = IBV_WR_SEND_WITH_INV; + rdma_req->rsp.wr.imm_data = desc->keyed.key; + } + } +#endif + + rdma_req->num_outstanding_data_wr = num_sgl_descriptors; + + return 0; + +err_exit: + spdk_nvmf_request_free_buffers(req, &rgroup->group, &rtransport->transport); + nvmf_rdma_request_free_data(rdma_req, rtransport); + return rc; +} + +static int +nvmf_rdma_request_parse_sgl(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_device *device, + struct spdk_nvmf_rdma_request *rdma_req) +{ + struct spdk_nvmf_request *req = &rdma_req->req; + struct spdk_nvme_cpl *rsp; + struct spdk_nvme_sgl_descriptor *sgl; + int rc; + uint32_t length; + + rsp = &req->rsp->nvme_cpl; + sgl = &req->cmd->nvme_cmd.dptr.sgl1; + + if (sgl->generic.type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK && + (sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_ADDRESS || + sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY)) { + + length = sgl->keyed.length; + if (length > rtransport->transport.opts.max_io_size) { + SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n", + length, rtransport->transport.opts.max_io_size); + rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; + return -1; + } +#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL + if ((device->attr.device_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) != 0) { + if (sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY) { + rdma_req->rsp.wr.opcode = IBV_WR_SEND_WITH_INV; + rdma_req->rsp.wr.imm_data = sgl->keyed.key; + } + } +#endif + + /* fill request length and populate iovs */ + req->length = length; + + if (spdk_unlikely(req->dif.dif_insert_or_strip)) { + req->dif.orig_length = length; + length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx); + req->dif.elba_length = length; + } + + rc = nvmf_rdma_request_fill_iovs(rtransport, device, rdma_req, length); + if (spdk_unlikely(rc < 0)) { + if (rc == -EINVAL) { + SPDK_ERRLOG("SGL length exceeds the max I/O size\n"); + rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; + return -1; + } + /* No available buffers. Queue this request up. */ + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "No available large data buffers. Queueing request %p\n", rdma_req); + return 0; + } + + /* backward compatible */ + req->data = req->iov[0].iov_base; + + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Request %p took %d buffer/s from central pool\n", rdma_req, + req->iovcnt); + + return 0; + } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK && + sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) { + uint64_t offset = sgl->address; + uint32_t max_len = rtransport->transport.opts.in_capsule_data_size; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n", + offset, sgl->unkeyed.length); + + if (offset > max_len) { + SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n", + offset, max_len); + rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET; + return -1; + } + max_len -= (uint32_t)offset; + + if (sgl->unkeyed.length > max_len) { + SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n", + sgl->unkeyed.length, max_len); + rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; + return -1; + } + + rdma_req->num_outstanding_data_wr = 0; + req->data = rdma_req->recv->buf + offset; + req->data_from_pool = false; + req->length = sgl->unkeyed.length; + + req->iov[0].iov_base = req->data; + req->iov[0].iov_len = req->length; + req->iovcnt = 1; + + return 0; + } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_LAST_SEGMENT && + sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) { + + rc = nvmf_rdma_request_fill_iovs_multi_sgl(rtransport, device, rdma_req); + if (rc == -ENOMEM) { + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "No available large data buffers. Queueing request %p\n", rdma_req); + return 0; + } else if (rc == -EINVAL) { + SPDK_ERRLOG("Multi SGL element request length exceeds the max I/O size\n"); + rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; + return -1; + } + + /* backward compatible */ + req->data = req->iov[0].iov_base; + + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Request %p took %d buffer/s from central pool\n", rdma_req, + req->iovcnt); + + return 0; + } + + SPDK_ERRLOG("Invalid NVMf I/O Command SGL: Type 0x%x, Subtype 0x%x\n", + sgl->generic.type, sgl->generic.subtype); + rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID; + return -1; +} + +static void +_nvmf_rdma_request_free(struct spdk_nvmf_rdma_request *rdma_req, + struct spdk_nvmf_rdma_transport *rtransport) +{ + struct spdk_nvmf_rdma_qpair *rqpair; + struct spdk_nvmf_rdma_poll_group *rgroup; + + rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair); + if (rdma_req->req.data_from_pool) { + rgroup = rqpair->poller->group; + + spdk_nvmf_request_free_buffers(&rdma_req->req, &rgroup->group, &rtransport->transport); + } + nvmf_rdma_request_free_data(rdma_req, rtransport); + rdma_req->req.length = 0; + rdma_req->req.iovcnt = 0; + rdma_req->req.data = NULL; + rdma_req->rsp.wr.next = NULL; + rdma_req->data.wr.next = NULL; + memset(&rdma_req->req.dif, 0, sizeof(rdma_req->req.dif)); + rqpair->qd--; + + STAILQ_INSERT_HEAD(&rqpair->resources->free_queue, rdma_req, state_link); + rdma_req->state = RDMA_REQUEST_STATE_FREE; +} + +bool +nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_request *rdma_req) +{ + struct spdk_nvmf_rdma_qpair *rqpair; + struct spdk_nvmf_rdma_device *device; + struct spdk_nvmf_rdma_poll_group *rgroup; + struct spdk_nvme_cpl *rsp = &rdma_req->req.rsp->nvme_cpl; + int rc; + struct spdk_nvmf_rdma_recv *rdma_recv; + enum spdk_nvmf_rdma_request_state prev_state; + bool progress = false; + int data_posted; + uint32_t num_blocks; + + rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair); + device = rqpair->device; + rgroup = rqpair->poller->group; + + assert(rdma_req->state != RDMA_REQUEST_STATE_FREE); + + /* If the queue pair is in an error state, force the request to the completed state + * to release resources. */ + if (rqpair->ibv_state == IBV_QPS_ERR || rqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) { + if (rdma_req->state == RDMA_REQUEST_STATE_NEED_BUFFER) { + STAILQ_REMOVE(&rgroup->group.pending_buf_queue, &rdma_req->req, spdk_nvmf_request, buf_link); + } else if (rdma_req->state == RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING) { + STAILQ_REMOVE(&rqpair->pending_rdma_read_queue, rdma_req, spdk_nvmf_rdma_request, state_link); + } else if (rdma_req->state == RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING) { + STAILQ_REMOVE(&rqpair->pending_rdma_write_queue, rdma_req, spdk_nvmf_rdma_request, state_link); + } + rdma_req->state = RDMA_REQUEST_STATE_COMPLETED; + } + + /* The loop here is to allow for several back-to-back state changes. */ + do { + prev_state = rdma_req->state; + + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Request %p entering state %d\n", rdma_req, prev_state); + + switch (rdma_req->state) { + case RDMA_REQUEST_STATE_FREE: + /* Some external code must kick a request into RDMA_REQUEST_STATE_NEW + * to escape this state. */ + break; + case RDMA_REQUEST_STATE_NEW: + spdk_trace_record(TRACE_RDMA_REQUEST_STATE_NEW, 0, 0, + (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); + rdma_recv = rdma_req->recv; + + /* The first element of the SGL is the NVMe command */ + rdma_req->req.cmd = (union nvmf_h2c_msg *)rdma_recv->sgl[0].addr; + memset(rdma_req->req.rsp, 0, sizeof(*rdma_req->req.rsp)); + + if (rqpair->ibv_state == IBV_QPS_ERR || rqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) { + rdma_req->state = RDMA_REQUEST_STATE_COMPLETED; + break; + } + + if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&rdma_req->req, &rdma_req->req.dif.dif_ctx))) { + rdma_req->req.dif.dif_insert_or_strip = true; + } + +#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL + rdma_req->rsp.wr.opcode = IBV_WR_SEND; + rdma_req->rsp.wr.imm_data = 0; +#endif + + /* The next state transition depends on the data transfer needs of this request. */ + rdma_req->req.xfer = spdk_nvmf_req_get_xfer(&rdma_req->req); + + /* If no data to transfer, ready to execute. */ + if (rdma_req->req.xfer == SPDK_NVME_DATA_NONE) { + rdma_req->state = RDMA_REQUEST_STATE_READY_TO_EXECUTE; + break; + } + + rdma_req->state = RDMA_REQUEST_STATE_NEED_BUFFER; + STAILQ_INSERT_TAIL(&rgroup->group.pending_buf_queue, &rdma_req->req, buf_link); + break; + case RDMA_REQUEST_STATE_NEED_BUFFER: + spdk_trace_record(TRACE_RDMA_REQUEST_STATE_NEED_BUFFER, 0, 0, + (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); + + assert(rdma_req->req.xfer != SPDK_NVME_DATA_NONE); + + if (&rdma_req->req != STAILQ_FIRST(&rgroup->group.pending_buf_queue)) { + /* This request needs to wait in line to obtain a buffer */ + break; + } + + /* Try to get a data buffer */ + rc = nvmf_rdma_request_parse_sgl(rtransport, device, rdma_req); + if (rc < 0) { + STAILQ_REMOVE_HEAD(&rgroup->group.pending_buf_queue, buf_link); + rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE; + break; + } + + if (!rdma_req->req.data) { + /* No buffers available. */ + rgroup->stat.pending_data_buffer++; + break; + } + + STAILQ_REMOVE_HEAD(&rgroup->group.pending_buf_queue, buf_link); + + /* If data is transferring from host to controller and the data didn't + * arrive using in capsule data, we need to do a transfer from the host. + */ + if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER && + rdma_req->req.data_from_pool) { + STAILQ_INSERT_TAIL(&rqpair->pending_rdma_read_queue, rdma_req, state_link); + rdma_req->state = RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING; + break; + } + + rdma_req->state = RDMA_REQUEST_STATE_READY_TO_EXECUTE; + break; + case RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING: + spdk_trace_record(TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING, 0, 0, + (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); + + if (rdma_req != STAILQ_FIRST(&rqpair->pending_rdma_read_queue)) { + /* This request needs to wait in line to perform RDMA */ + break; + } + if (rqpair->current_send_depth + rdma_req->num_outstanding_data_wr > rqpair->max_send_depth + || rqpair->current_read_depth + rdma_req->num_outstanding_data_wr > rqpair->max_read_depth) { + /* We can only have so many WRs outstanding. we have to wait until some finish. */ + rqpair->poller->stat.pending_rdma_read++; + break; + } + + /* We have already verified that this request is the head of the queue. */ + STAILQ_REMOVE_HEAD(&rqpair->pending_rdma_read_queue, state_link); + + rc = request_transfer_in(&rdma_req->req); + if (!rc) { + rdma_req->state = RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER; + } else { + rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE; + } + break; + case RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER: + spdk_trace_record(TRACE_RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0, + (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); + /* Some external code must kick a request into RDMA_REQUEST_STATE_READY_TO_EXECUTE + * to escape this state. */ + break; + case RDMA_REQUEST_STATE_READY_TO_EXECUTE: + spdk_trace_record(TRACE_RDMA_REQUEST_STATE_READY_TO_EXECUTE, 0, 0, + (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); + + if (spdk_unlikely(rdma_req->req.dif.dif_insert_or_strip)) { + if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { + /* generate DIF for write operation */ + num_blocks = SPDK_CEIL_DIV(rdma_req->req.dif.elba_length, rdma_req->req.dif.dif_ctx.block_size); + assert(num_blocks > 0); + + rc = spdk_dif_generate(rdma_req->req.iov, rdma_req->req.iovcnt, + num_blocks, &rdma_req->req.dif.dif_ctx); + if (rc != 0) { + SPDK_ERRLOG("DIF generation failed\n"); + rdma_req->state = RDMA_REQUEST_STATE_COMPLETED; + nvmf_rdma_start_disconnect(rqpair); + break; + } + } + + assert(rdma_req->req.dif.elba_length >= rdma_req->req.length); + /* set extended length before IO operation */ + rdma_req->req.length = rdma_req->req.dif.elba_length; + } + + rdma_req->state = RDMA_REQUEST_STATE_EXECUTING; + spdk_nvmf_request_exec(&rdma_req->req); + break; + case RDMA_REQUEST_STATE_EXECUTING: + spdk_trace_record(TRACE_RDMA_REQUEST_STATE_EXECUTING, 0, 0, + (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); + /* Some external code must kick a request into RDMA_REQUEST_STATE_EXECUTED + * to escape this state. */ + break; + case RDMA_REQUEST_STATE_EXECUTED: + spdk_trace_record(TRACE_RDMA_REQUEST_STATE_EXECUTED, 0, 0, + (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); + if (rsp->status.sc == SPDK_NVME_SC_SUCCESS && + rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { + STAILQ_INSERT_TAIL(&rqpair->pending_rdma_write_queue, rdma_req, state_link); + rdma_req->state = RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING; + } else { + rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE; + } + if (spdk_unlikely(rdma_req->req.dif.dif_insert_or_strip)) { + /* restore the original length */ + rdma_req->req.length = rdma_req->req.dif.orig_length; + + if (rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { + struct spdk_dif_error error_blk; + + num_blocks = SPDK_CEIL_DIV(rdma_req->req.dif.elba_length, rdma_req->req.dif.dif_ctx.block_size); + + rc = spdk_dif_verify(rdma_req->req.iov, rdma_req->req.iovcnt, num_blocks, + &rdma_req->req.dif.dif_ctx, &error_blk); + if (rc) { + struct spdk_nvme_cpl *rsp = &rdma_req->req.rsp->nvme_cpl; + + SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n", error_blk.err_type, + error_blk.err_offset); + rsp->status.sct = SPDK_NVME_SCT_MEDIA_ERROR; + rsp->status.sc = nvmf_rdma_dif_error_to_compl_status(error_blk.err_type); + rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE; + STAILQ_REMOVE(&rqpair->pending_rdma_write_queue, rdma_req, spdk_nvmf_rdma_request, state_link); + } + } + } + break; + case RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING: + spdk_trace_record(TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING, 0, 0, + (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); + + if (rdma_req != STAILQ_FIRST(&rqpair->pending_rdma_write_queue)) { + /* This request needs to wait in line to perform RDMA */ + break; + } + if ((rqpair->current_send_depth + rdma_req->num_outstanding_data_wr + 1) > + rqpair->max_send_depth) { + /* We can only have so many WRs outstanding. we have to wait until some finish. + * +1 since each request has an additional wr in the resp. */ + rqpair->poller->stat.pending_rdma_write++; + break; + } + + /* We have already verified that this request is the head of the queue. */ + STAILQ_REMOVE_HEAD(&rqpair->pending_rdma_write_queue, state_link); + + /* The data transfer will be kicked off from + * RDMA_REQUEST_STATE_READY_TO_COMPLETE state. + */ + rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE; + break; + case RDMA_REQUEST_STATE_READY_TO_COMPLETE: + spdk_trace_record(TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE, 0, 0, + (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); + rc = request_transfer_out(&rdma_req->req, &data_posted); + assert(rc == 0); /* No good way to handle this currently */ + if (rc) { + rdma_req->state = RDMA_REQUEST_STATE_COMPLETED; + } else { + rdma_req->state = data_posted ? RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST : + RDMA_REQUEST_STATE_COMPLETING; + } + break; + case RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST: + spdk_trace_record(TRACE_RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0, + (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); + /* Some external code must kick a request into RDMA_REQUEST_STATE_COMPLETED + * to escape this state. */ + break; + case RDMA_REQUEST_STATE_COMPLETING: + spdk_trace_record(TRACE_RDMA_REQUEST_STATE_COMPLETING, 0, 0, + (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); + /* Some external code must kick a request into RDMA_REQUEST_STATE_COMPLETED + * to escape this state. */ + break; + case RDMA_REQUEST_STATE_COMPLETED: + spdk_trace_record(TRACE_RDMA_REQUEST_STATE_COMPLETED, 0, 0, + (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); + + rqpair->poller->stat.request_latency += spdk_get_ticks() - rdma_req->receive_tsc; + _nvmf_rdma_request_free(rdma_req, rtransport); + break; + case RDMA_REQUEST_NUM_STATES: + default: + assert(0); + break; + } + + if (rdma_req->state != prev_state) { + progress = true; + } + } while (rdma_req->state != prev_state); + + return progress; +} + +/* Public API callbacks begin here */ + +#define SPDK_NVMF_RDMA_DEFAULT_MAX_QUEUE_DEPTH 128 +#define SPDK_NVMF_RDMA_DEFAULT_AQ_DEPTH 128 +#define SPDK_NVMF_RDMA_DEFAULT_SRQ_DEPTH 4096 +#define SPDK_NVMF_RDMA_DEFAULT_MAX_QPAIRS_PER_CTRLR 128 +#define SPDK_NVMF_RDMA_DEFAULT_IN_CAPSULE_DATA_SIZE 4096 +#define SPDK_NVMF_RDMA_DEFAULT_MAX_IO_SIZE 131072 +#define SPDK_NVMF_RDMA_MIN_IO_BUFFER_SIZE (SPDK_NVMF_RDMA_DEFAULT_MAX_IO_SIZE / SPDK_NVMF_MAX_SGL_ENTRIES) +#define SPDK_NVMF_RDMA_DEFAULT_NUM_SHARED_BUFFERS 4095 +#define SPDK_NVMF_RDMA_DEFAULT_BUFFER_CACHE_SIZE 32 +#define SPDK_NVMF_RDMA_DEFAULT_NO_SRQ false +#define SPDK_NVMF_RDMA_DIF_INSERT_OR_STRIP false +#define SPDK_NVMF_RDMA_ACCEPTOR_BACKLOG 100 +#define SPDK_NVMF_RDMA_DEFAULT_ABORT_TIMEOUT_SEC 1 + +static void +nvmf_rdma_opts_init(struct spdk_nvmf_transport_opts *opts) +{ + opts->max_queue_depth = SPDK_NVMF_RDMA_DEFAULT_MAX_QUEUE_DEPTH; + opts->max_qpairs_per_ctrlr = SPDK_NVMF_RDMA_DEFAULT_MAX_QPAIRS_PER_CTRLR; + opts->in_capsule_data_size = SPDK_NVMF_RDMA_DEFAULT_IN_CAPSULE_DATA_SIZE; + opts->max_io_size = SPDK_NVMF_RDMA_DEFAULT_MAX_IO_SIZE; + opts->io_unit_size = SPDK_NVMF_RDMA_MIN_IO_BUFFER_SIZE; + opts->max_aq_depth = SPDK_NVMF_RDMA_DEFAULT_AQ_DEPTH; + opts->num_shared_buffers = SPDK_NVMF_RDMA_DEFAULT_NUM_SHARED_BUFFERS; + opts->buf_cache_size = SPDK_NVMF_RDMA_DEFAULT_BUFFER_CACHE_SIZE; + opts->max_srq_depth = SPDK_NVMF_RDMA_DEFAULT_SRQ_DEPTH; + opts->no_srq = SPDK_NVMF_RDMA_DEFAULT_NO_SRQ; + opts->dif_insert_or_strip = SPDK_NVMF_RDMA_DIF_INSERT_OR_STRIP; + opts->acceptor_backlog = SPDK_NVMF_RDMA_ACCEPTOR_BACKLOG; + opts->abort_timeout_sec = SPDK_NVMF_RDMA_DEFAULT_ABORT_TIMEOUT_SEC; +} + +const struct spdk_mem_map_ops g_nvmf_rdma_map_ops = { + .notify_cb = nvmf_rdma_mem_notify, + .are_contiguous = nvmf_rdma_check_contiguous_entries +}; + +static int nvmf_rdma_destroy(struct spdk_nvmf_transport *transport); + +static struct spdk_nvmf_transport * +nvmf_rdma_create(struct spdk_nvmf_transport_opts *opts) +{ + int rc; + struct spdk_nvmf_rdma_transport *rtransport; + struct spdk_nvmf_rdma_device *device, *tmp; + struct ibv_context **contexts; + uint32_t i; + int flag; + uint32_t sge_count; + uint32_t min_shared_buffers; + int max_device_sge = SPDK_NVMF_MAX_SGL_ENTRIES; + pthread_mutexattr_t attr; + + rtransport = calloc(1, sizeof(*rtransport)); + if (!rtransport) { + return NULL; + } + + if (pthread_mutexattr_init(&attr)) { + SPDK_ERRLOG("pthread_mutexattr_init() failed\n"); + free(rtransport); + return NULL; + } + + if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) { + SPDK_ERRLOG("pthread_mutexattr_settype() failed\n"); + pthread_mutexattr_destroy(&attr); + free(rtransport); + return NULL; + } + + if (pthread_mutex_init(&rtransport->lock, &attr)) { + SPDK_ERRLOG("pthread_mutex_init() failed\n"); + pthread_mutexattr_destroy(&attr); + free(rtransport); + return NULL; + } + + pthread_mutexattr_destroy(&attr); + + TAILQ_INIT(&rtransport->devices); + TAILQ_INIT(&rtransport->ports); + TAILQ_INIT(&rtransport->poll_groups); + + rtransport->transport.ops = &spdk_nvmf_transport_rdma; + + SPDK_INFOLOG(SPDK_LOG_RDMA, "*** RDMA Transport Init ***\n" + " Transport opts: max_ioq_depth=%d, max_io_size=%d,\n" + " max_io_qpairs_per_ctrlr=%d, io_unit_size=%d,\n" + " in_capsule_data_size=%d, max_aq_depth=%d,\n" + " num_shared_buffers=%d, max_srq_depth=%d, no_srq=%d," + " acceptor_backlog=%d, abort_timeout_sec=%d\n", + opts->max_queue_depth, + opts->max_io_size, + opts->max_qpairs_per_ctrlr - 1, + opts->io_unit_size, + opts->in_capsule_data_size, + opts->max_aq_depth, + opts->num_shared_buffers, + opts->max_srq_depth, + opts->no_srq, + opts->acceptor_backlog, + opts->abort_timeout_sec); + + /* I/O unit size cannot be larger than max I/O size */ + if (opts->io_unit_size > opts->max_io_size) { + opts->io_unit_size = opts->max_io_size; + } + + if (opts->acceptor_backlog <= 0) { + SPDK_ERRLOG("The acceptor backlog cannot be less than 1, setting to the default value of (%d).\n", + SPDK_NVMF_RDMA_ACCEPTOR_BACKLOG); + opts->acceptor_backlog = SPDK_NVMF_RDMA_ACCEPTOR_BACKLOG; + } + + if (opts->num_shared_buffers < (SPDK_NVMF_MAX_SGL_ENTRIES * 2)) { + SPDK_ERRLOG("The number of shared data buffers (%d) is less than" + "the minimum number required to guarantee that forward progress can be made (%d)\n", + opts->num_shared_buffers, (SPDK_NVMF_MAX_SGL_ENTRIES * 2)); + nvmf_rdma_destroy(&rtransport->transport); + return NULL; + } + + min_shared_buffers = spdk_thread_get_count() * opts->buf_cache_size; + if (min_shared_buffers > opts->num_shared_buffers) { + SPDK_ERRLOG("There are not enough buffers to satisfy" + "per-poll group caches for each thread. (%" PRIu32 ")" + "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers); + SPDK_ERRLOG("Please specify a larger number of shared buffers\n"); + nvmf_rdma_destroy(&rtransport->transport); + return NULL; + } + + sge_count = opts->max_io_size / opts->io_unit_size; + if (sge_count > NVMF_DEFAULT_TX_SGE) { + SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size); + nvmf_rdma_destroy(&rtransport->transport); + return NULL; + } + + rtransport->event_channel = rdma_create_event_channel(); + if (rtransport->event_channel == NULL) { + SPDK_ERRLOG("rdma_create_event_channel() failed, %s\n", spdk_strerror(errno)); + nvmf_rdma_destroy(&rtransport->transport); + return NULL; + } + + flag = fcntl(rtransport->event_channel->fd, F_GETFL); + if (fcntl(rtransport->event_channel->fd, F_SETFL, flag | O_NONBLOCK) < 0) { + SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%s)\n", + rtransport->event_channel->fd, spdk_strerror(errno)); + nvmf_rdma_destroy(&rtransport->transport); + return NULL; + } + + rtransport->data_wr_pool = spdk_mempool_create("spdk_nvmf_rdma_wr_data", + opts->max_queue_depth * SPDK_NVMF_MAX_SGL_ENTRIES, + sizeof(struct spdk_nvmf_rdma_request_data), + SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, + SPDK_ENV_SOCKET_ID_ANY); + if (!rtransport->data_wr_pool) { + SPDK_ERRLOG("Unable to allocate work request pool for poll group\n"); + nvmf_rdma_destroy(&rtransport->transport); + return NULL; + } + + contexts = rdma_get_devices(NULL); + if (contexts == NULL) { + SPDK_ERRLOG("rdma_get_devices() failed: %s (%d)\n", spdk_strerror(errno), errno); + nvmf_rdma_destroy(&rtransport->transport); + return NULL; + } + + i = 0; + rc = 0; + while (contexts[i] != NULL) { + device = calloc(1, sizeof(*device)); + if (!device) { + SPDK_ERRLOG("Unable to allocate memory for RDMA devices.\n"); + rc = -ENOMEM; + break; + } + device->context = contexts[i]; + rc = ibv_query_device(device->context, &device->attr); + if (rc < 0) { + SPDK_ERRLOG("Failed to query RDMA device attributes.\n"); + free(device); + break; + + } + + max_device_sge = spdk_min(max_device_sge, device->attr.max_sge); + +#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL + if ((device->attr.device_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) == 0) { + SPDK_WARNLOG("The libibverbs on this system supports SEND_WITH_INVALIDATE,"); + SPDK_WARNLOG("but the device with vendor ID %u does not.\n", device->attr.vendor_id); + } + + /** + * The vendor ID is assigned by the IEEE and an ID of 0 implies Soft-RoCE. + * The Soft-RoCE RXE driver does not currently support send with invalidate, + * but incorrectly reports that it does. There are changes making their way + * through the kernel now that will enable this feature. When they are merged, + * we can conditionally enable this feature. + * + * TODO: enable this for versions of the kernel rxe driver that support it. + */ + if (device->attr.vendor_id == 0) { + device->attr.device_cap_flags &= ~(IBV_DEVICE_MEM_MGT_EXTENSIONS); + } +#endif + + /* set up device context async ev fd as NON_BLOCKING */ + flag = fcntl(device->context->async_fd, F_GETFL); + rc = fcntl(device->context->async_fd, F_SETFL, flag | O_NONBLOCK); + if (rc < 0) { + SPDK_ERRLOG("Failed to set context async fd to NONBLOCK.\n"); + free(device); + break; + } + + TAILQ_INSERT_TAIL(&rtransport->devices, device, link); + i++; + + if (g_nvmf_hooks.get_ibv_pd) { + device->pd = g_nvmf_hooks.get_ibv_pd(NULL, device->context); + } else { + device->pd = ibv_alloc_pd(device->context); + } + + if (!device->pd) { + SPDK_ERRLOG("Unable to allocate protection domain.\n"); + rc = -ENOMEM; + break; + } + + assert(device->map == NULL); + + device->map = spdk_mem_map_alloc(0, &g_nvmf_rdma_map_ops, device->pd); + if (!device->map) { + SPDK_ERRLOG("Unable to allocate memory map for listen address\n"); + rc = -ENOMEM; + break; + } + + assert(device->map != NULL); + assert(device->pd != NULL); + } + rdma_free_devices(contexts); + + if (opts->io_unit_size * max_device_sge < opts->max_io_size) { + /* divide and round up. */ + opts->io_unit_size = (opts->max_io_size + max_device_sge - 1) / max_device_sge; + + /* round up to the nearest 4k. */ + opts->io_unit_size = (opts->io_unit_size + NVMF_DATA_BUFFER_ALIGNMENT - 1) & ~NVMF_DATA_BUFFER_MASK; + + opts->io_unit_size = spdk_max(opts->io_unit_size, SPDK_NVMF_RDMA_MIN_IO_BUFFER_SIZE); + SPDK_NOTICELOG("Adjusting the io unit size to fit the device's maximum I/O size. New I/O unit size %u\n", + opts->io_unit_size); + } + + if (rc < 0) { + nvmf_rdma_destroy(&rtransport->transport); + return NULL; + } + + /* Set up poll descriptor array to monitor events from RDMA and IB + * in a single poll syscall + */ + rtransport->npoll_fds = i + 1; + i = 0; + rtransport->poll_fds = calloc(rtransport->npoll_fds, sizeof(struct pollfd)); + if (rtransport->poll_fds == NULL) { + SPDK_ERRLOG("poll_fds allocation failed\n"); + nvmf_rdma_destroy(&rtransport->transport); + return NULL; + } + + rtransport->poll_fds[i].fd = rtransport->event_channel->fd; + rtransport->poll_fds[i++].events = POLLIN; + + TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, tmp) { + rtransport->poll_fds[i].fd = device->context->async_fd; + rtransport->poll_fds[i++].events = POLLIN; + } + + return &rtransport->transport; +} + +static int +nvmf_rdma_destroy(struct spdk_nvmf_transport *transport) +{ + struct spdk_nvmf_rdma_transport *rtransport; + struct spdk_nvmf_rdma_port *port, *port_tmp; + struct spdk_nvmf_rdma_device *device, *device_tmp; + + rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); + + TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, port_tmp) { + TAILQ_REMOVE(&rtransport->ports, port, link); + rdma_destroy_id(port->id); + free(port); + } + + if (rtransport->poll_fds != NULL) { + free(rtransport->poll_fds); + } + + if (rtransport->event_channel != NULL) { + rdma_destroy_event_channel(rtransport->event_channel); + } + + TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, device_tmp) { + TAILQ_REMOVE(&rtransport->devices, device, link); + if (device->map) { + spdk_mem_map_free(&device->map); + } + if (device->pd) { + if (!g_nvmf_hooks.get_ibv_pd) { + ibv_dealloc_pd(device->pd); + } + } + free(device); + } + + if (rtransport->data_wr_pool != NULL) { + if (spdk_mempool_count(rtransport->data_wr_pool) != + (transport->opts.max_queue_depth * SPDK_NVMF_MAX_SGL_ENTRIES)) { + SPDK_ERRLOG("transport wr pool count is %zu but should be %u\n", + spdk_mempool_count(rtransport->data_wr_pool), + transport->opts.max_queue_depth * SPDK_NVMF_MAX_SGL_ENTRIES); + } + } + + spdk_mempool_free(rtransport->data_wr_pool); + + pthread_mutex_destroy(&rtransport->lock); + free(rtransport); + + return 0; +} + +static int +nvmf_rdma_trid_from_cm_id(struct rdma_cm_id *id, + struct spdk_nvme_transport_id *trid, + bool peer); + +static int +nvmf_rdma_listen(struct spdk_nvmf_transport *transport, + const struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvmf_rdma_transport *rtransport; + struct spdk_nvmf_rdma_device *device; + struct spdk_nvmf_rdma_port *port; + struct addrinfo *res; + struct addrinfo hints; + int family; + int rc; + + rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); + assert(rtransport->event_channel != NULL); + + pthread_mutex_lock(&rtransport->lock); + port = calloc(1, sizeof(*port)); + if (!port) { + SPDK_ERRLOG("Port allocation failed\n"); + pthread_mutex_unlock(&rtransport->lock); + return -ENOMEM; + } + + port->trid = trid; + + switch (trid->adrfam) { + case SPDK_NVMF_ADRFAM_IPV4: + family = AF_INET; + break; + case SPDK_NVMF_ADRFAM_IPV6: + family = AF_INET6; + break; + default: + SPDK_ERRLOG("Unhandled ADRFAM %d\n", trid->adrfam); + free(port); + pthread_mutex_unlock(&rtransport->lock); + return -EINVAL; + } + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = family; + hints.ai_flags = AI_NUMERICSERV; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = 0; + + rc = getaddrinfo(trid->traddr, trid->trsvcid, &hints, &res); + if (rc) { + SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(rc), rc); + free(port); + pthread_mutex_unlock(&rtransport->lock); + return -EINVAL; + } + + rc = rdma_create_id(rtransport->event_channel, &port->id, port, RDMA_PS_TCP); + if (rc < 0) { + SPDK_ERRLOG("rdma_create_id() failed\n"); + freeaddrinfo(res); + free(port); + pthread_mutex_unlock(&rtransport->lock); + return rc; + } + + rc = rdma_bind_addr(port->id, res->ai_addr); + freeaddrinfo(res); + + if (rc < 0) { + SPDK_ERRLOG("rdma_bind_addr() failed\n"); + rdma_destroy_id(port->id); + free(port); + pthread_mutex_unlock(&rtransport->lock); + return rc; + } + + if (!port->id->verbs) { + SPDK_ERRLOG("ibv_context is null\n"); + rdma_destroy_id(port->id); + free(port); + pthread_mutex_unlock(&rtransport->lock); + return -1; + } + + rc = rdma_listen(port->id, transport->opts.acceptor_backlog); + if (rc < 0) { + SPDK_ERRLOG("rdma_listen() failed\n"); + rdma_destroy_id(port->id); + free(port); + pthread_mutex_unlock(&rtransport->lock); + return rc; + } + + TAILQ_FOREACH(device, &rtransport->devices, link) { + if (device->context == port->id->verbs) { + port->device = device; + break; + } + } + if (!port->device) { + SPDK_ERRLOG("Accepted a connection with verbs %p, but unable to find a corresponding device.\n", + port->id->verbs); + rdma_destroy_id(port->id); + free(port); + pthread_mutex_unlock(&rtransport->lock); + return -EINVAL; + } + + SPDK_NOTICELOG("*** NVMe/RDMA Target Listening on %s port %s ***\n", + trid->traddr, trid->trsvcid); + + TAILQ_INSERT_TAIL(&rtransport->ports, port, link); + pthread_mutex_unlock(&rtransport->lock); + return 0; +} + +static void +nvmf_rdma_stop_listen(struct spdk_nvmf_transport *transport, + const struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvmf_rdma_transport *rtransport; + struct spdk_nvmf_rdma_port *port, *tmp; + + rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); + + pthread_mutex_lock(&rtransport->lock); + TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, tmp) { + if (spdk_nvme_transport_id_compare(port->trid, trid) == 0) { + TAILQ_REMOVE(&rtransport->ports, port, link); + rdma_destroy_id(port->id); + free(port); + break; + } + } + + pthread_mutex_unlock(&rtransport->lock); +} + +static void +nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_qpair *rqpair, bool drain) +{ + struct spdk_nvmf_request *req, *tmp; + struct spdk_nvmf_rdma_request *rdma_req, *req_tmp; + struct spdk_nvmf_rdma_resources *resources; + + /* We process I/O in the data transfer pending queue at the highest priority. RDMA reads first */ + STAILQ_FOREACH_SAFE(rdma_req, &rqpair->pending_rdma_read_queue, state_link, req_tmp) { + if (nvmf_rdma_request_process(rtransport, rdma_req) == false && drain == false) { + break; + } + } + + /* Then RDMA writes since reads have stronger restrictions than writes */ + STAILQ_FOREACH_SAFE(rdma_req, &rqpair->pending_rdma_write_queue, state_link, req_tmp) { + if (nvmf_rdma_request_process(rtransport, rdma_req) == false && drain == false) { + break; + } + } + + /* The second highest priority is I/O waiting on memory buffers. */ + STAILQ_FOREACH_SAFE(req, &rqpair->poller->group->group.pending_buf_queue, buf_link, tmp) { + rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req); + if (nvmf_rdma_request_process(rtransport, rdma_req) == false && drain == false) { + break; + } + } + + resources = rqpair->resources; + while (!STAILQ_EMPTY(&resources->free_queue) && !STAILQ_EMPTY(&resources->incoming_queue)) { + rdma_req = STAILQ_FIRST(&resources->free_queue); + STAILQ_REMOVE_HEAD(&resources->free_queue, state_link); + rdma_req->recv = STAILQ_FIRST(&resources->incoming_queue); + STAILQ_REMOVE_HEAD(&resources->incoming_queue, link); + + if (rqpair->srq != NULL) { + rdma_req->req.qpair = &rdma_req->recv->qpair->qpair; + rdma_req->recv->qpair->qd++; + } else { + rqpair->qd++; + } + + rdma_req->receive_tsc = rdma_req->recv->receive_tsc; + rdma_req->state = RDMA_REQUEST_STATE_NEW; + if (nvmf_rdma_request_process(rtransport, rdma_req) == false) { + break; + } + } + if (!STAILQ_EMPTY(&resources->incoming_queue) && STAILQ_EMPTY(&resources->free_queue)) { + rqpair->poller->stat.pending_free_request++; + } +} + +static void +_nvmf_rdma_qpair_disconnect(void *ctx) +{ + struct spdk_nvmf_qpair *qpair = ctx; + + spdk_nvmf_qpair_disconnect(qpair, NULL, NULL); +} + +static void +_nvmf_rdma_try_disconnect(void *ctx) +{ + struct spdk_nvmf_qpair *qpair = ctx; + struct spdk_nvmf_poll_group *group; + + /* Read the group out of the qpair. This is normally set and accessed only from + * the thread that created the group. Here, we're not on that thread necessarily. + * The data member qpair->group begins it's life as NULL and then is assigned to + * a pointer and never changes. So fortunately reading this and checking for + * non-NULL is thread safe in the x86_64 memory model. */ + group = qpair->group; + + if (group == NULL) { + /* The qpair hasn't been assigned to a group yet, so we can't + * process a disconnect. Send a message to ourself and try again. */ + spdk_thread_send_msg(spdk_get_thread(), _nvmf_rdma_try_disconnect, qpair); + return; + } + + spdk_thread_send_msg(group->thread, _nvmf_rdma_qpair_disconnect, qpair); +} + +static inline void +nvmf_rdma_start_disconnect(struct spdk_nvmf_rdma_qpair *rqpair) +{ + if (!__atomic_test_and_set(&rqpair->disconnect_started, __ATOMIC_RELAXED)) { + _nvmf_rdma_try_disconnect(&rqpair->qpair); + } +} + +static void nvmf_rdma_destroy_drained_qpair(void *ctx) +{ + struct spdk_nvmf_rdma_qpair *rqpair = ctx; + struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport, + struct spdk_nvmf_rdma_transport, transport); + + /* In non SRQ path, we will reach rqpair->max_queue_depth. In SRQ path, we will get the last_wqe event. */ + if (rqpair->current_send_depth != 0) { + return; + } + + if (rqpair->srq == NULL && rqpair->current_recv_depth != rqpair->max_queue_depth) { + return; + } + + if (rqpair->srq != NULL && rqpair->last_wqe_reached == false) { + return; + } + + nvmf_rdma_qpair_process_pending(rtransport, rqpair, true); + + /* Qpair will be destroyed after nvmf layer closes this qpair */ + if (rqpair->qpair.state != SPDK_NVMF_QPAIR_ERROR) { + return; + } + + nvmf_rdma_qpair_destroy(rqpair); +} + + +static int +nvmf_rdma_disconnect(struct rdma_cm_event *evt) +{ + struct spdk_nvmf_qpair *qpair; + struct spdk_nvmf_rdma_qpair *rqpair; + + if (evt->id == NULL) { + SPDK_ERRLOG("disconnect request: missing cm_id\n"); + return -1; + } + + qpair = evt->id->context; + if (qpair == NULL) { + SPDK_ERRLOG("disconnect request: no active connection\n"); + return -1; + } + + rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); + + spdk_trace_record(TRACE_RDMA_QP_DISCONNECT, 0, 0, (uintptr_t)rqpair->cm_id, 0); + + nvmf_rdma_start_disconnect(rqpair); + + return 0; +} + +#ifdef DEBUG +static const char *CM_EVENT_STR[] = { + "RDMA_CM_EVENT_ADDR_RESOLVED", + "RDMA_CM_EVENT_ADDR_ERROR", + "RDMA_CM_EVENT_ROUTE_RESOLVED", + "RDMA_CM_EVENT_ROUTE_ERROR", + "RDMA_CM_EVENT_CONNECT_REQUEST", + "RDMA_CM_EVENT_CONNECT_RESPONSE", + "RDMA_CM_EVENT_CONNECT_ERROR", + "RDMA_CM_EVENT_UNREACHABLE", + "RDMA_CM_EVENT_REJECTED", + "RDMA_CM_EVENT_ESTABLISHED", + "RDMA_CM_EVENT_DISCONNECTED", + "RDMA_CM_EVENT_DEVICE_REMOVAL", + "RDMA_CM_EVENT_MULTICAST_JOIN", + "RDMA_CM_EVENT_MULTICAST_ERROR", + "RDMA_CM_EVENT_ADDR_CHANGE", + "RDMA_CM_EVENT_TIMEWAIT_EXIT" +}; +#endif /* DEBUG */ + +static void +nvmf_rdma_disconnect_qpairs_on_port(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_port *port) +{ + struct spdk_nvmf_rdma_poll_group *rgroup; + struct spdk_nvmf_rdma_poller *rpoller; + struct spdk_nvmf_rdma_qpair *rqpair; + + TAILQ_FOREACH(rgroup, &rtransport->poll_groups, link) { + TAILQ_FOREACH(rpoller, &rgroup->pollers, link) { + TAILQ_FOREACH(rqpair, &rpoller->qpairs, link) { + if (rqpair->listen_id == port->id) { + nvmf_rdma_start_disconnect(rqpair); + } + } + } + } +} + +static bool +nvmf_rdma_handle_cm_event_addr_change(struct spdk_nvmf_transport *transport, + struct rdma_cm_event *event) +{ + const struct spdk_nvme_transport_id *trid; + struct spdk_nvmf_rdma_port *port; + struct spdk_nvmf_rdma_transport *rtransport; + bool event_acked = false; + + rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); + TAILQ_FOREACH(port, &rtransport->ports, link) { + if (port->id == event->id) { + SPDK_ERRLOG("ADDR_CHANGE: IP %s:%s migrated\n", port->trid->traddr, port->trid->trsvcid); + rdma_ack_cm_event(event); + event_acked = true; + trid = port->trid; + break; + } + } + + if (event_acked) { + nvmf_rdma_disconnect_qpairs_on_port(rtransport, port); + + nvmf_rdma_stop_listen(transport, trid); + nvmf_rdma_listen(transport, trid); + } + + return event_acked; +} + +static void +nvmf_rdma_handle_cm_event_port_removal(struct spdk_nvmf_transport *transport, + struct rdma_cm_event *event) +{ + struct spdk_nvmf_rdma_port *port; + struct spdk_nvmf_rdma_transport *rtransport; + + port = event->id->context; + rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); + + SPDK_NOTICELOG("Port %s:%s is being removed\n", port->trid->traddr, port->trid->trsvcid); + + nvmf_rdma_disconnect_qpairs_on_port(rtransport, port); + + rdma_ack_cm_event(event); + + while (spdk_nvmf_transport_stop_listen(transport, port->trid) == 0) { + ; + } +} + +static void +nvmf_process_cm_event(struct spdk_nvmf_transport *transport) +{ + struct spdk_nvmf_rdma_transport *rtransport; + struct rdma_cm_event *event; + int rc; + bool event_acked; + + rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); + + if (rtransport->event_channel == NULL) { + return; + } + + while (1) { + event_acked = false; + rc = rdma_get_cm_event(rtransport->event_channel, &event); + if (rc) { + if (errno != EAGAIN && errno != EWOULDBLOCK) { + SPDK_ERRLOG("Acceptor Event Error: %s\n", spdk_strerror(errno)); + } + break; + } + + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Acceptor Event: %s\n", CM_EVENT_STR[event->event]); + + spdk_trace_record(TRACE_RDMA_CM_ASYNC_EVENT, 0, 0, 0, event->event); + + switch (event->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + case RDMA_CM_EVENT_ADDR_ERROR: + case RDMA_CM_EVENT_ROUTE_RESOLVED: + case RDMA_CM_EVENT_ROUTE_ERROR: + /* No action required. The target never attempts to resolve routes. */ + break; + case RDMA_CM_EVENT_CONNECT_REQUEST: + rc = nvmf_rdma_connect(transport, event); + if (rc < 0) { + SPDK_ERRLOG("Unable to process connect event. rc: %d\n", rc); + break; + } + break; + case RDMA_CM_EVENT_CONNECT_RESPONSE: + /* The target never initiates a new connection. So this will not occur. */ + break; + case RDMA_CM_EVENT_CONNECT_ERROR: + /* Can this happen? The docs say it can, but not sure what causes it. */ + break; + case RDMA_CM_EVENT_UNREACHABLE: + case RDMA_CM_EVENT_REJECTED: + /* These only occur on the client side. */ + break; + case RDMA_CM_EVENT_ESTABLISHED: + /* TODO: Should we be waiting for this event anywhere? */ + break; + case RDMA_CM_EVENT_DISCONNECTED: + rc = nvmf_rdma_disconnect(event); + if (rc < 0) { + SPDK_ERRLOG("Unable to process disconnect event. rc: %d\n", rc); + break; + } + break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + /* In case of device removal, kernel IB part triggers IBV_EVENT_DEVICE_FATAL + * which triggers RDMA_CM_EVENT_DEVICE_REMOVAL on all cma_id’s. + * Once these events are sent to SPDK, we should release all IB resources and + * don't make attempts to call any ibv_query/modify/create functions. We can only call + * ibv_destory* functions to release user space memory allocated by IB. All kernel + * resources are already cleaned. */ + if (event->id->qp) { + /* If rdma_cm event has a valid `qp` pointer then the event refers to the + * corresponding qpair. Otherwise the event refers to a listening device */ + rc = nvmf_rdma_disconnect(event); + if (rc < 0) { + SPDK_ERRLOG("Unable to process disconnect event. rc: %d\n", rc); + break; + } + } else { + nvmf_rdma_handle_cm_event_port_removal(transport, event); + event_acked = true; + } + break; + case RDMA_CM_EVENT_MULTICAST_JOIN: + case RDMA_CM_EVENT_MULTICAST_ERROR: + /* Multicast is not used */ + break; + case RDMA_CM_EVENT_ADDR_CHANGE: + event_acked = nvmf_rdma_handle_cm_event_addr_change(transport, event); + break; + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + /* For now, do nothing. The target never re-uses queue pairs. */ + break; + default: + SPDK_ERRLOG("Unexpected Acceptor Event [%d]\n", event->event); + break; + } + if (!event_acked) { + rdma_ack_cm_event(event); + } + } +} + +static void +nvmf_rdma_handle_qp_fatal(struct spdk_nvmf_rdma_qpair *rqpair) +{ + nvmf_rdma_update_ibv_state(rqpair); + nvmf_rdma_start_disconnect(rqpair); +} + +static void +nvmf_rdma_handle_last_wqe_reached(struct spdk_nvmf_rdma_qpair *rqpair) +{ + rqpair->last_wqe_reached = true; + nvmf_rdma_destroy_drained_qpair(rqpair); +} + +static void +nvmf_rdma_handle_sq_drained(struct spdk_nvmf_rdma_qpair *rqpair) +{ + nvmf_rdma_start_disconnect(rqpair); +} + +static void +nvmf_rdma_qpair_process_ibv_event(void *ctx) +{ + struct spdk_nvmf_rdma_ibv_event_ctx *event_ctx = ctx; + + if (event_ctx->rqpair) { + STAILQ_REMOVE(&event_ctx->rqpair->ibv_events, event_ctx, spdk_nvmf_rdma_ibv_event_ctx, link); + if (event_ctx->cb_fn) { + event_ctx->cb_fn(event_ctx->rqpair); + } + } + free(event_ctx); +} + +static int +nvmf_rdma_send_qpair_async_event(struct spdk_nvmf_rdma_qpair *rqpair, + spdk_nvmf_rdma_qpair_ibv_event fn) +{ + struct spdk_nvmf_rdma_ibv_event_ctx *ctx; + struct spdk_thread *thr = NULL; + int rc; + + if (rqpair->qpair.group) { + thr = rqpair->qpair.group->thread; + } else if (rqpair->destruct_channel) { + thr = spdk_io_channel_get_thread(rqpair->destruct_channel); + } + + if (!thr) { + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "rqpair %p has no thread\n", rqpair); + return -EINVAL; + } + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + return -ENOMEM; + } + + ctx->rqpair = rqpair; + ctx->cb_fn = fn; + STAILQ_INSERT_TAIL(&rqpair->ibv_events, ctx, link); + + rc = spdk_thread_send_msg(thr, nvmf_rdma_qpair_process_ibv_event, ctx); + if (rc) { + STAILQ_REMOVE(&rqpair->ibv_events, ctx, spdk_nvmf_rdma_ibv_event_ctx, link); + free(ctx); + } + + return rc; +} + +static void +nvmf_process_ib_event(struct spdk_nvmf_rdma_device *device) +{ + int rc; + struct spdk_nvmf_rdma_qpair *rqpair = NULL; + struct ibv_async_event event; + + rc = ibv_get_async_event(device->context, &event); + + if (rc) { + SPDK_ERRLOG("Failed to get async_event (%d): %s\n", + errno, spdk_strerror(errno)); + return; + } + + switch (event.event_type) { + case IBV_EVENT_QP_FATAL: + rqpair = event.element.qp->qp_context; + SPDK_ERRLOG("Fatal event received for rqpair %p\n", rqpair); + spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0, + (uintptr_t)rqpair->cm_id, event.event_type); + rc = nvmf_rdma_send_qpair_async_event(rqpair, nvmf_rdma_handle_qp_fatal); + if (rc) { + SPDK_WARNLOG("Failed to send QP_FATAL event. rqpair %p, err %d\n", rqpair, rc); + nvmf_rdma_handle_qp_fatal(rqpair); + } + break; + case IBV_EVENT_QP_LAST_WQE_REACHED: + /* This event only occurs for shared receive queues. */ + rqpair = event.element.qp->qp_context; + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Last WQE reached event received for rqpair %p\n", rqpair); + rc = nvmf_rdma_send_qpair_async_event(rqpair, nvmf_rdma_handle_last_wqe_reached); + if (rc) { + SPDK_WARNLOG("Failed to send LAST_WQE_REACHED event. rqpair %p, err %d\n", rqpair, rc); + rqpair->last_wqe_reached = true; + } + break; + case IBV_EVENT_SQ_DRAINED: + /* This event occurs frequently in both error and non-error states. + * Check if the qpair is in an error state before sending a message. */ + rqpair = event.element.qp->qp_context; + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Last sq drained event received for rqpair %p\n", rqpair); + spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0, + (uintptr_t)rqpair->cm_id, event.event_type); + if (nvmf_rdma_update_ibv_state(rqpair) == IBV_QPS_ERR) { + rc = nvmf_rdma_send_qpair_async_event(rqpair, nvmf_rdma_handle_sq_drained); + if (rc) { + SPDK_WARNLOG("Failed to send SQ_DRAINED event. rqpair %p, err %d\n", rqpair, rc); + nvmf_rdma_handle_sq_drained(rqpair); + } + } + break; + case IBV_EVENT_QP_REQ_ERR: + case IBV_EVENT_QP_ACCESS_ERR: + case IBV_EVENT_COMM_EST: + case IBV_EVENT_PATH_MIG: + case IBV_EVENT_PATH_MIG_ERR: + SPDK_NOTICELOG("Async event: %s\n", + ibv_event_type_str(event.event_type)); + rqpair = event.element.qp->qp_context; + spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0, + (uintptr_t)rqpair->cm_id, event.event_type); + nvmf_rdma_update_ibv_state(rqpair); + break; + case IBV_EVENT_CQ_ERR: + case IBV_EVENT_DEVICE_FATAL: + case IBV_EVENT_PORT_ACTIVE: + case IBV_EVENT_PORT_ERR: + case IBV_EVENT_LID_CHANGE: + case IBV_EVENT_PKEY_CHANGE: + case IBV_EVENT_SM_CHANGE: + case IBV_EVENT_SRQ_ERR: + case IBV_EVENT_SRQ_LIMIT_REACHED: + case IBV_EVENT_CLIENT_REREGISTER: + case IBV_EVENT_GID_CHANGE: + default: + SPDK_NOTICELOG("Async event: %s\n", + ibv_event_type_str(event.event_type)); + spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0, 0, event.event_type); + break; + } + ibv_ack_async_event(&event); +} + +static uint32_t +nvmf_rdma_accept(struct spdk_nvmf_transport *transport) +{ + int nfds, i = 0; + struct spdk_nvmf_rdma_transport *rtransport; + struct spdk_nvmf_rdma_device *device, *tmp; + uint32_t count; + + rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); + count = nfds = poll(rtransport->poll_fds, rtransport->npoll_fds, 0); + + if (nfds <= 0) { + return 0; + } + + /* The first poll descriptor is RDMA CM event */ + if (rtransport->poll_fds[i++].revents & POLLIN) { + nvmf_process_cm_event(transport); + nfds--; + } + + if (nfds == 0) { + return count; + } + + /* Second and subsequent poll descriptors are IB async events */ + TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, tmp) { + if (rtransport->poll_fds[i++].revents & POLLIN) { + nvmf_process_ib_event(device); + nfds--; + } + } + /* check all flagged fd's have been served */ + assert(nfds == 0); + + return count; +} + +static void +nvmf_rdma_cdata_init(struct spdk_nvmf_transport *transport, struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvmf_ctrlr_data *cdata) +{ + cdata->nvmf_specific.msdbd = SPDK_NVMF_MAX_SGL_ENTRIES; + + /* Disable in-capsule data transfer for RDMA controller when dif_insert_or_strip is enabled + since in-capsule data only works with NVME drives that support SGL memory layout */ + if (transport->opts.dif_insert_or_strip) { + cdata->nvmf_specific.ioccsz = sizeof(struct spdk_nvme_cmd) / 16; + } +} + +static void +nvmf_rdma_discover(struct spdk_nvmf_transport *transport, + struct spdk_nvme_transport_id *trid, + struct spdk_nvmf_discovery_log_page_entry *entry) +{ + entry->trtype = SPDK_NVMF_TRTYPE_RDMA; + entry->adrfam = trid->adrfam; + entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED; + + spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' '); + spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' '); + + entry->tsas.rdma.rdma_qptype = SPDK_NVMF_RDMA_QPTYPE_RELIABLE_CONNECTED; + entry->tsas.rdma.rdma_prtype = SPDK_NVMF_RDMA_PRTYPE_NONE; + entry->tsas.rdma.rdma_cms = SPDK_NVMF_RDMA_CMS_RDMA_CM; +} + +static void +nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group); + +static struct spdk_nvmf_transport_poll_group * +nvmf_rdma_poll_group_create(struct spdk_nvmf_transport *transport) +{ + struct spdk_nvmf_rdma_transport *rtransport; + struct spdk_nvmf_rdma_poll_group *rgroup; + struct spdk_nvmf_rdma_poller *poller; + struct spdk_nvmf_rdma_device *device; + struct ibv_srq_init_attr srq_init_attr; + struct spdk_nvmf_rdma_resource_opts opts; + int num_cqe; + + rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); + + rgroup = calloc(1, sizeof(*rgroup)); + if (!rgroup) { + return NULL; + } + + TAILQ_INIT(&rgroup->pollers); + STAILQ_INIT(&rgroup->retired_bufs); + + pthread_mutex_lock(&rtransport->lock); + TAILQ_FOREACH(device, &rtransport->devices, link) { + poller = calloc(1, sizeof(*poller)); + if (!poller) { + SPDK_ERRLOG("Unable to allocate memory for new RDMA poller\n"); + nvmf_rdma_poll_group_destroy(&rgroup->group); + pthread_mutex_unlock(&rtransport->lock); + return NULL; + } + + poller->device = device; + poller->group = rgroup; + + TAILQ_INIT(&poller->qpairs); + STAILQ_INIT(&poller->qpairs_pending_send); + STAILQ_INIT(&poller->qpairs_pending_recv); + + TAILQ_INSERT_TAIL(&rgroup->pollers, poller, link); + if (transport->opts.no_srq == false && device->num_srq < device->attr.max_srq) { + poller->max_srq_depth = transport->opts.max_srq_depth; + + device->num_srq++; + memset(&srq_init_attr, 0, sizeof(struct ibv_srq_init_attr)); + srq_init_attr.attr.max_wr = poller->max_srq_depth; + srq_init_attr.attr.max_sge = spdk_min(device->attr.max_sge, NVMF_DEFAULT_RX_SGE); + poller->srq = ibv_create_srq(device->pd, &srq_init_attr); + if (!poller->srq) { + SPDK_ERRLOG("Unable to create shared receive queue, errno %d\n", errno); + nvmf_rdma_poll_group_destroy(&rgroup->group); + pthread_mutex_unlock(&rtransport->lock); + return NULL; + } + + opts.qp = poller->srq; + opts.pd = device->pd; + opts.qpair = NULL; + opts.shared = true; + opts.max_queue_depth = poller->max_srq_depth; + opts.in_capsule_data_size = transport->opts.in_capsule_data_size; + + poller->resources = nvmf_rdma_resources_create(&opts); + if (!poller->resources) { + SPDK_ERRLOG("Unable to allocate resources for shared receive queue.\n"); + nvmf_rdma_poll_group_destroy(&rgroup->group); + pthread_mutex_unlock(&rtransport->lock); + return NULL; + } + } + + /* + * When using an srq, we can limit the completion queue at startup. + * The following formula represents the calculation: + * num_cqe = num_recv + num_data_wr + num_send_wr. + * where num_recv=num_data_wr=and num_send_wr=poller->max_srq_depth + */ + if (poller->srq) { + num_cqe = poller->max_srq_depth * 3; + } else { + num_cqe = DEFAULT_NVMF_RDMA_CQ_SIZE; + } + + poller->cq = ibv_create_cq(device->context, num_cqe, poller, NULL, 0); + if (!poller->cq) { + SPDK_ERRLOG("Unable to create completion queue\n"); + nvmf_rdma_poll_group_destroy(&rgroup->group); + pthread_mutex_unlock(&rtransport->lock); + return NULL; + } + poller->num_cqe = num_cqe; + } + + TAILQ_INSERT_TAIL(&rtransport->poll_groups, rgroup, link); + if (rtransport->conn_sched.next_admin_pg == NULL) { + rtransport->conn_sched.next_admin_pg = rgroup; + rtransport->conn_sched.next_io_pg = rgroup; + } + + pthread_mutex_unlock(&rtransport->lock); + return &rgroup->group; +} + +static struct spdk_nvmf_transport_poll_group * +nvmf_rdma_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair) +{ + struct spdk_nvmf_rdma_transport *rtransport; + struct spdk_nvmf_rdma_poll_group **pg; + struct spdk_nvmf_transport_poll_group *result; + + rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport); + + pthread_mutex_lock(&rtransport->lock); + + if (TAILQ_EMPTY(&rtransport->poll_groups)) { + pthread_mutex_unlock(&rtransport->lock); + return NULL; + } + + if (qpair->qid == 0) { + pg = &rtransport->conn_sched.next_admin_pg; + } else { + pg = &rtransport->conn_sched.next_io_pg; + } + + assert(*pg != NULL); + + result = &(*pg)->group; + + *pg = TAILQ_NEXT(*pg, link); + if (*pg == NULL) { + *pg = TAILQ_FIRST(&rtransport->poll_groups); + } + + pthread_mutex_unlock(&rtransport->lock); + + return result; +} + +static void +nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) +{ + struct spdk_nvmf_rdma_poll_group *rgroup, *next_rgroup; + struct spdk_nvmf_rdma_poller *poller, *tmp; + struct spdk_nvmf_rdma_qpair *qpair, *tmp_qpair; + struct spdk_nvmf_transport_pg_cache_buf *buf, *tmp_buf; + struct spdk_nvmf_rdma_transport *rtransport; + + rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group); + if (!rgroup) { + return; + } + + /* free all retired buffers back to the transport so we don't short the mempool. */ + STAILQ_FOREACH_SAFE(buf, &rgroup->retired_bufs, link, tmp_buf) { + STAILQ_REMOVE(&rgroup->retired_bufs, buf, spdk_nvmf_transport_pg_cache_buf, link); + assert(group->transport != NULL); + spdk_mempool_put(group->transport->data_buf_pool, buf); + } + + TAILQ_FOREACH_SAFE(poller, &rgroup->pollers, link, tmp) { + TAILQ_REMOVE(&rgroup->pollers, poller, link); + + TAILQ_FOREACH_SAFE(qpair, &poller->qpairs, link, tmp_qpair) { + nvmf_rdma_qpair_destroy(qpair); + } + + if (poller->srq) { + if (poller->resources) { + nvmf_rdma_resources_destroy(poller->resources); + } + ibv_destroy_srq(poller->srq); + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Destroyed RDMA shared queue %p\n", poller->srq); + } + + if (poller->cq) { + ibv_destroy_cq(poller->cq); + } + + free(poller); + } + + if (rgroup->group.transport == NULL) { + /* Transport can be NULL when nvmf_rdma_poll_group_create() + * calls this function directly in a failure path. */ + free(rgroup); + return; + } + + rtransport = SPDK_CONTAINEROF(rgroup->group.transport, struct spdk_nvmf_rdma_transport, transport); + + pthread_mutex_lock(&rtransport->lock); + next_rgroup = TAILQ_NEXT(rgroup, link); + TAILQ_REMOVE(&rtransport->poll_groups, rgroup, link); + if (next_rgroup == NULL) { + next_rgroup = TAILQ_FIRST(&rtransport->poll_groups); + } + if (rtransport->conn_sched.next_admin_pg == rgroup) { + rtransport->conn_sched.next_admin_pg = next_rgroup; + } + if (rtransport->conn_sched.next_io_pg == rgroup) { + rtransport->conn_sched.next_io_pg = next_rgroup; + } + pthread_mutex_unlock(&rtransport->lock); + + free(rgroup); +} + +static void +nvmf_rdma_qpair_reject_connection(struct spdk_nvmf_rdma_qpair *rqpair) +{ + if (rqpair->cm_id != NULL) { + nvmf_rdma_event_reject(rqpair->cm_id, SPDK_NVMF_RDMA_ERROR_NO_RESOURCES); + } + nvmf_rdma_qpair_destroy(rqpair); +} + +static int +nvmf_rdma_poll_group_add(struct spdk_nvmf_transport_poll_group *group, + struct spdk_nvmf_qpair *qpair) +{ + struct spdk_nvmf_rdma_poll_group *rgroup; + struct spdk_nvmf_rdma_qpair *rqpair; + struct spdk_nvmf_rdma_device *device; + struct spdk_nvmf_rdma_poller *poller; + int rc; + + rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group); + rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); + + device = rqpair->device; + + TAILQ_FOREACH(poller, &rgroup->pollers, link) { + if (poller->device == device) { + break; + } + } + + if (!poller) { + SPDK_ERRLOG("No poller found for device.\n"); + return -1; + } + + TAILQ_INSERT_TAIL(&poller->qpairs, rqpair, link); + rqpair->poller = poller; + rqpair->srq = rqpair->poller->srq; + + rc = nvmf_rdma_qpair_initialize(qpair); + if (rc < 0) { + SPDK_ERRLOG("Failed to initialize nvmf_rdma_qpair with qpair=%p\n", qpair); + return -1; + } + + rc = nvmf_rdma_event_accept(rqpair->cm_id, rqpair); + if (rc) { + /* Try to reject, but we probably can't */ + nvmf_rdma_qpair_reject_connection(rqpair); + return -1; + } + + nvmf_rdma_update_ibv_state(rqpair); + + return 0; +} + +static int +nvmf_rdma_poll_group_remove(struct spdk_nvmf_transport_poll_group *group, + struct spdk_nvmf_qpair *qpair) +{ + struct spdk_nvmf_rdma_qpair *rqpair; + + rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); + assert(group->transport->tgt != NULL); + + rqpair->destruct_channel = spdk_get_io_channel(group->transport->tgt); + + if (!rqpair->destruct_channel) { + SPDK_WARNLOG("failed to get io_channel, qpair %p\n", qpair); + return 0; + } + + /* Sanity check that we get io_channel on the correct thread */ + if (qpair->group) { + assert(qpair->group->thread == spdk_io_channel_get_thread(rqpair->destruct_channel)); + } + + return 0; +} + +static int +nvmf_rdma_request_free(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_rdma_request *rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req); + struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(req->qpair->transport, + struct spdk_nvmf_rdma_transport, transport); + struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, + struct spdk_nvmf_rdma_qpair, qpair); + + /* + * AER requests are freed when a qpair is destroyed. The recv corresponding to that request + * needs to be returned to the shared receive queue or the poll group will eventually be + * starved of RECV structures. + */ + if (rqpair->srq && rdma_req->recv) { + int rc; + struct ibv_recv_wr *bad_recv_wr; + + rc = ibv_post_srq_recv(rqpair->srq, &rdma_req->recv->wr, &bad_recv_wr); + if (rc) { + SPDK_ERRLOG("Unable to re-post rx descriptor\n"); + } + } + + _nvmf_rdma_request_free(rdma_req, rtransport); + return 0; +} + +static int +nvmf_rdma_request_complete(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(req->qpair->transport, + struct spdk_nvmf_rdma_transport, transport); + struct spdk_nvmf_rdma_request *rdma_req = SPDK_CONTAINEROF(req, + struct spdk_nvmf_rdma_request, req); + struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, + struct spdk_nvmf_rdma_qpair, qpair); + + if (rqpair->ibv_state != IBV_QPS_ERR) { + /* The connection is alive, so process the request as normal */ + rdma_req->state = RDMA_REQUEST_STATE_EXECUTED; + } else { + /* The connection is dead. Move the request directly to the completed state. */ + rdma_req->state = RDMA_REQUEST_STATE_COMPLETED; + } + + nvmf_rdma_request_process(rtransport, rdma_req); + + return 0; +} + +static int +nvmf_rdma_destroy_defunct_qpair(void *ctx) +{ + struct spdk_nvmf_rdma_qpair *rqpair = ctx; + struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport, + struct spdk_nvmf_rdma_transport, transport); + + SPDK_INFOLOG(SPDK_LOG_RDMA, "QP#%d hasn't been drained as expected, manually destroy it\n", + rqpair->qpair.qid); + + nvmf_rdma_qpair_process_pending(rtransport, rqpair, true); + nvmf_rdma_qpair_destroy(rqpair); + + return SPDK_POLLER_BUSY; +} + +static void +nvmf_rdma_close_qpair(struct spdk_nvmf_qpair *qpair) +{ + struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); + + if (rqpair->disconnect_flags & RDMA_QP_DISCONNECTING) { + return; + } + + rqpair->disconnect_flags |= RDMA_QP_DISCONNECTING; + + /* This happens only when the qpair is disconnected before + * it is added to the poll group. Since there is no poll group, + * the RDMA qp has not been initialized yet and the RDMA CM + * event has not yet been acknowledged, so we need to reject it. + */ + if (rqpair->qpair.state == SPDK_NVMF_QPAIR_UNINITIALIZED) { + nvmf_rdma_qpair_reject_connection(rqpair); + return; + } + + if (rqpair->rdma_qp) { + spdk_rdma_qp_disconnect(rqpair->rdma_qp); + } + + rqpair->destruct_poller = SPDK_POLLER_REGISTER(nvmf_rdma_destroy_defunct_qpair, (void *)rqpair, + NVMF_RDMA_QPAIR_DESTROY_TIMEOUT_US); +} + +static struct spdk_nvmf_rdma_qpair * +get_rdma_qpair_from_wc(struct spdk_nvmf_rdma_poller *rpoller, struct ibv_wc *wc) +{ + struct spdk_nvmf_rdma_qpair *rqpair; + /* @todo: improve QP search */ + TAILQ_FOREACH(rqpair, &rpoller->qpairs, link) { + if (wc->qp_num == rqpair->rdma_qp->qp->qp_num) { + return rqpair; + } + } + SPDK_ERRLOG("Didn't find QP with qp_num %u\n", wc->qp_num); + return NULL; +} + +#ifdef DEBUG +static int +nvmf_rdma_req_is_completing(struct spdk_nvmf_rdma_request *rdma_req) +{ + return rdma_req->state == RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST || + rdma_req->state == RDMA_REQUEST_STATE_COMPLETING; +} +#endif + +static void +_poller_reset_failed_recvs(struct spdk_nvmf_rdma_poller *rpoller, struct ibv_recv_wr *bad_recv_wr, + int rc) +{ + struct spdk_nvmf_rdma_recv *rdma_recv; + struct spdk_nvmf_rdma_wr *bad_rdma_wr; + + SPDK_ERRLOG("Failed to post a recv for the poller %p with errno %d\n", rpoller, -rc); + while (bad_recv_wr != NULL) { + bad_rdma_wr = (struct spdk_nvmf_rdma_wr *)bad_recv_wr->wr_id; + rdma_recv = SPDK_CONTAINEROF(bad_rdma_wr, struct spdk_nvmf_rdma_recv, rdma_wr); + + rdma_recv->qpair->current_recv_depth++; + bad_recv_wr = bad_recv_wr->next; + SPDK_ERRLOG("Failed to post a recv for the qpair %p with errno %d\n", rdma_recv->qpair, -rc); + nvmf_rdma_start_disconnect(rdma_recv->qpair); + } +} + +static void +_qp_reset_failed_recvs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_recv_wr *bad_recv_wr, int rc) +{ + SPDK_ERRLOG("Failed to post a recv for the qpair %p with errno %d\n", rqpair, -rc); + while (bad_recv_wr != NULL) { + bad_recv_wr = bad_recv_wr->next; + rqpair->current_recv_depth++; + } + nvmf_rdma_start_disconnect(rqpair); +} + +static void +_poller_submit_recvs(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_poller *rpoller) +{ + struct spdk_nvmf_rdma_qpair *rqpair; + struct ibv_recv_wr *bad_recv_wr; + int rc; + + if (rpoller->srq) { + if (rpoller->resources->recvs_to_post.first != NULL) { + rc = ibv_post_srq_recv(rpoller->srq, rpoller->resources->recvs_to_post.first, &bad_recv_wr); + if (rc) { + _poller_reset_failed_recvs(rpoller, bad_recv_wr, rc); + } + rpoller->resources->recvs_to_post.first = NULL; + rpoller->resources->recvs_to_post.last = NULL; + } + } else { + while (!STAILQ_EMPTY(&rpoller->qpairs_pending_recv)) { + rqpair = STAILQ_FIRST(&rpoller->qpairs_pending_recv); + assert(rqpair->resources->recvs_to_post.first != NULL); + rc = ibv_post_recv(rqpair->rdma_qp->qp, rqpair->resources->recvs_to_post.first, &bad_recv_wr); + if (rc) { + _qp_reset_failed_recvs(rqpair, bad_recv_wr, rc); + } + rqpair->resources->recvs_to_post.first = NULL; + rqpair->resources->recvs_to_post.last = NULL; + STAILQ_REMOVE_HEAD(&rpoller->qpairs_pending_recv, recv_link); + } + } +} + +static void +_qp_reset_failed_sends(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_send_wr *bad_wr, int rc) +{ + struct spdk_nvmf_rdma_wr *bad_rdma_wr; + struct spdk_nvmf_rdma_request *prev_rdma_req = NULL, *cur_rdma_req = NULL; + + SPDK_ERRLOG("Failed to post a send for the qpair %p with errno %d\n", rqpair, -rc); + for (; bad_wr != NULL; bad_wr = bad_wr->next) { + bad_rdma_wr = (struct spdk_nvmf_rdma_wr *)bad_wr->wr_id; + assert(rqpair->current_send_depth > 0); + rqpair->current_send_depth--; + switch (bad_rdma_wr->type) { + case RDMA_WR_TYPE_DATA: + cur_rdma_req = SPDK_CONTAINEROF(bad_rdma_wr, struct spdk_nvmf_rdma_request, data.rdma_wr); + if (bad_wr->opcode == IBV_WR_RDMA_READ) { + assert(rqpair->current_read_depth > 0); + rqpair->current_read_depth--; + } + break; + case RDMA_WR_TYPE_SEND: + cur_rdma_req = SPDK_CONTAINEROF(bad_rdma_wr, struct spdk_nvmf_rdma_request, rsp.rdma_wr); + break; + default: + SPDK_ERRLOG("Found a RECV in the list of pending SEND requests for qpair %p\n", rqpair); + prev_rdma_req = cur_rdma_req; + continue; + } + + if (prev_rdma_req == cur_rdma_req) { + /* this request was handled by an earlier wr. i.e. we were performing an nvme read. */ + /* We only have to check against prev_wr since each requests wrs are contiguous in this list. */ + continue; + } + + switch (cur_rdma_req->state) { + case RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER: + cur_rdma_req->req.rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + cur_rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE; + break; + case RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST: + case RDMA_REQUEST_STATE_COMPLETING: + cur_rdma_req->state = RDMA_REQUEST_STATE_COMPLETED; + break; + default: + SPDK_ERRLOG("Found a request in a bad state %d when draining pending SEND requests for qpair %p\n", + cur_rdma_req->state, rqpair); + continue; + } + + nvmf_rdma_request_process(rtransport, cur_rdma_req); + prev_rdma_req = cur_rdma_req; + } + + if (rqpair->qpair.state == SPDK_NVMF_QPAIR_ACTIVE) { + /* Disconnect the connection. */ + nvmf_rdma_start_disconnect(rqpair); + } + +} + +static void +_poller_submit_sends(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_poller *rpoller) +{ + struct spdk_nvmf_rdma_qpair *rqpair; + struct ibv_send_wr *bad_wr = NULL; + int rc; + + while (!STAILQ_EMPTY(&rpoller->qpairs_pending_send)) { + rqpair = STAILQ_FIRST(&rpoller->qpairs_pending_send); + rc = spdk_rdma_qp_flush_send_wrs(rqpair->rdma_qp, &bad_wr); + + /* bad wr always points to the first wr that failed. */ + if (rc) { + _qp_reset_failed_sends(rtransport, rqpair, bad_wr, rc); + } + STAILQ_REMOVE_HEAD(&rpoller->qpairs_pending_send, send_link); + } +} + +static int +nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_poller *rpoller) +{ + struct ibv_wc wc[32]; + struct spdk_nvmf_rdma_wr *rdma_wr; + struct spdk_nvmf_rdma_request *rdma_req; + struct spdk_nvmf_rdma_recv *rdma_recv; + struct spdk_nvmf_rdma_qpair *rqpair; + int reaped, i; + int count = 0; + bool error = false; + uint64_t poll_tsc = spdk_get_ticks(); + + /* Poll for completing operations. */ + reaped = ibv_poll_cq(rpoller->cq, 32, wc); + if (reaped < 0) { + SPDK_ERRLOG("Error polling CQ! (%d): %s\n", + errno, spdk_strerror(errno)); + return -1; + } + + rpoller->stat.polls++; + rpoller->stat.completions += reaped; + + for (i = 0; i < reaped; i++) { + + rdma_wr = (struct spdk_nvmf_rdma_wr *)wc[i].wr_id; + + switch (rdma_wr->type) { + case RDMA_WR_TYPE_SEND: + rdma_req = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_request, rsp.rdma_wr); + rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair); + + if (!wc[i].status) { + count++; + assert(wc[i].opcode == IBV_WC_SEND); + assert(nvmf_rdma_req_is_completing(rdma_req)); + } + + rdma_req->state = RDMA_REQUEST_STATE_COMPLETED; + /* RDMA_WRITE operation completed. +1 since it was chained with rsp WR */ + rqpair->current_send_depth -= rdma_req->num_outstanding_data_wr + 1; + rdma_req->num_outstanding_data_wr = 0; + + nvmf_rdma_request_process(rtransport, rdma_req); + break; + case RDMA_WR_TYPE_RECV: + /* rdma_recv->qpair will be invalid if using an SRQ. In that case we have to get the qpair from the wc. */ + rdma_recv = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_recv, rdma_wr); + if (rpoller->srq != NULL) { + rdma_recv->qpair = get_rdma_qpair_from_wc(rpoller, &wc[i]); + /* It is possible that there are still some completions for destroyed QP + * associated with SRQ. We just ignore these late completions and re-post + * receive WRs back to SRQ. + */ + if (spdk_unlikely(NULL == rdma_recv->qpair)) { + struct ibv_recv_wr *bad_wr; + int rc; + + rdma_recv->wr.next = NULL; + rc = ibv_post_srq_recv(rpoller->srq, + &rdma_recv->wr, + &bad_wr); + if (rc) { + SPDK_ERRLOG("Failed to re-post recv WR to SRQ, err %d\n", rc); + } + continue; + } + } + rqpair = rdma_recv->qpair; + + assert(rqpair != NULL); + if (!wc[i].status) { + assert(wc[i].opcode == IBV_WC_RECV); + if (rqpair->current_recv_depth >= rqpair->max_queue_depth) { + nvmf_rdma_start_disconnect(rqpair); + break; + } + } + + rdma_recv->wr.next = NULL; + rqpair->current_recv_depth++; + rdma_recv->receive_tsc = poll_tsc; + rpoller->stat.requests++; + STAILQ_INSERT_TAIL(&rqpair->resources->incoming_queue, rdma_recv, link); + break; + case RDMA_WR_TYPE_DATA: + rdma_req = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_request, data.rdma_wr); + rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair); + + assert(rdma_req->num_outstanding_data_wr > 0); + + rqpair->current_send_depth--; + rdma_req->num_outstanding_data_wr--; + if (!wc[i].status) { + assert(wc[i].opcode == IBV_WC_RDMA_READ); + rqpair->current_read_depth--; + /* wait for all outstanding reads associated with the same rdma_req to complete before proceeding. */ + if (rdma_req->num_outstanding_data_wr == 0) { + rdma_req->state = RDMA_REQUEST_STATE_READY_TO_EXECUTE; + nvmf_rdma_request_process(rtransport, rdma_req); + } + } else { + /* If the data transfer fails still force the queue into the error state, + * if we were performing an RDMA_READ, we need to force the request into a + * completed state since it wasn't linked to a send. However, in the RDMA_WRITE + * case, we should wait for the SEND to complete. */ + if (rdma_req->data.wr.opcode == IBV_WR_RDMA_READ) { + rqpair->current_read_depth--; + if (rdma_req->num_outstanding_data_wr == 0) { + rdma_req->state = RDMA_REQUEST_STATE_COMPLETED; + } + } + } + break; + default: + SPDK_ERRLOG("Received an unknown opcode on the CQ: %d\n", wc[i].opcode); + continue; + } + + /* Handle error conditions */ + if (wc[i].status) { + if ((rdma_wr->type == RDMA_WR_TYPE_RECV && !rpoller->srq)) { + /* When we don't use SRQ and close a qpair, we will receive completions with error + * status for all posted ibv_recv_wrs. This is expected and we don't want to log + * an error in that case. */ + SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Error on CQ %p, request 0x%lu, type %d, status: (%d): %s\n", + rpoller->cq, wc[i].wr_id, rdma_wr->type, wc[i].status, ibv_wc_status_str(wc[i].status)); + } else { + SPDK_ERRLOG("Error on CQ %p, request 0x%lu, type %d, status: (%d): %s\n", + rpoller->cq, wc[i].wr_id, rdma_wr->type, wc[i].status, ibv_wc_status_str(wc[i].status)); + } + + error = true; + + if (rqpair->qpair.state == SPDK_NVMF_QPAIR_ACTIVE) { + /* Disconnect the connection. */ + nvmf_rdma_start_disconnect(rqpair); + } else { + nvmf_rdma_destroy_drained_qpair(rqpair); + } + continue; + } + + nvmf_rdma_qpair_process_pending(rtransport, rqpair, false); + + if (rqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) { + nvmf_rdma_destroy_drained_qpair(rqpair); + } + } + + if (error == true) { + return -1; + } + + /* submit outstanding work requests. */ + _poller_submit_recvs(rtransport, rpoller); + _poller_submit_sends(rtransport, rpoller); + + return count; +} + +static int +nvmf_rdma_poll_group_poll(struct spdk_nvmf_transport_poll_group *group) +{ + struct spdk_nvmf_rdma_transport *rtransport; + struct spdk_nvmf_rdma_poll_group *rgroup; + struct spdk_nvmf_rdma_poller *rpoller; + int count, rc; + + rtransport = SPDK_CONTAINEROF(group->transport, struct spdk_nvmf_rdma_transport, transport); + rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group); + + count = 0; + TAILQ_FOREACH(rpoller, &rgroup->pollers, link) { + rc = nvmf_rdma_poller_poll(rtransport, rpoller); + if (rc < 0) { + return rc; + } + count += rc; + } + + return count; +} + +static int +nvmf_rdma_trid_from_cm_id(struct rdma_cm_id *id, + struct spdk_nvme_transport_id *trid, + bool peer) +{ + struct sockaddr *saddr; + uint16_t port; + + spdk_nvme_trid_populate_transport(trid, SPDK_NVME_TRANSPORT_RDMA); + + if (peer) { + saddr = rdma_get_peer_addr(id); + } else { + saddr = rdma_get_local_addr(id); + } + switch (saddr->sa_family) { + case AF_INET: { + struct sockaddr_in *saddr_in = (struct sockaddr_in *)saddr; + + trid->adrfam = SPDK_NVMF_ADRFAM_IPV4; + inet_ntop(AF_INET, &saddr_in->sin_addr, + trid->traddr, sizeof(trid->traddr)); + if (peer) { + port = ntohs(rdma_get_dst_port(id)); + } else { + port = ntohs(rdma_get_src_port(id)); + } + snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%u", port); + break; + } + case AF_INET6: { + struct sockaddr_in6 *saddr_in = (struct sockaddr_in6 *)saddr; + trid->adrfam = SPDK_NVMF_ADRFAM_IPV6; + inet_ntop(AF_INET6, &saddr_in->sin6_addr, + trid->traddr, sizeof(trid->traddr)); + if (peer) { + port = ntohs(rdma_get_dst_port(id)); + } else { + port = ntohs(rdma_get_src_port(id)); + } + snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%u", port); + break; + } + default: + return -1; + + } + + return 0; +} + +static int +nvmf_rdma_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvmf_rdma_qpair *rqpair; + + rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); + + return nvmf_rdma_trid_from_cm_id(rqpair->cm_id, trid, true); +} + +static int +nvmf_rdma_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvmf_rdma_qpair *rqpair; + + rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); + + return nvmf_rdma_trid_from_cm_id(rqpair->cm_id, trid, false); +} + +static int +nvmf_rdma_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvmf_rdma_qpair *rqpair; + + rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); + + return nvmf_rdma_trid_from_cm_id(rqpair->listen_id, trid, false); +} + +void +spdk_nvmf_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks) +{ + g_nvmf_hooks = *hooks; +} + +static void +nvmf_rdma_request_set_abort_status(struct spdk_nvmf_request *req, + struct spdk_nvmf_rdma_request *rdma_req_to_abort) +{ + rdma_req_to_abort->req.rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; + rdma_req_to_abort->req.rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST; + + rdma_req_to_abort->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE; + + req->rsp->nvme_cpl.cdw0 &= ~1U; /* Command was successfully aborted. */ +} + +static int +_nvmf_rdma_qpair_abort_request(void *ctx) +{ + struct spdk_nvmf_request *req = ctx; + struct spdk_nvmf_rdma_request *rdma_req_to_abort = SPDK_CONTAINEROF( + req->req_to_abort, struct spdk_nvmf_rdma_request, req); + struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(req->req_to_abort->qpair, + struct spdk_nvmf_rdma_qpair, qpair); + int rc; + + spdk_poller_unregister(&req->poller); + + switch (rdma_req_to_abort->state) { + case RDMA_REQUEST_STATE_EXECUTING: + rc = nvmf_ctrlr_abort_request(req); + if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS) { + return SPDK_POLLER_BUSY; + } + break; + + case RDMA_REQUEST_STATE_NEED_BUFFER: + STAILQ_REMOVE(&rqpair->poller->group->group.pending_buf_queue, + &rdma_req_to_abort->req, spdk_nvmf_request, buf_link); + + nvmf_rdma_request_set_abort_status(req, rdma_req_to_abort); + break; + + case RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING: + STAILQ_REMOVE(&rqpair->pending_rdma_read_queue, rdma_req_to_abort, + spdk_nvmf_rdma_request, state_link); + + nvmf_rdma_request_set_abort_status(req, rdma_req_to_abort); + break; + + case RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING: + STAILQ_REMOVE(&rqpair->pending_rdma_write_queue, rdma_req_to_abort, + spdk_nvmf_rdma_request, state_link); + + nvmf_rdma_request_set_abort_status(req, rdma_req_to_abort); + break; + + case RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER: + if (spdk_get_ticks() < req->timeout_tsc) { + req->poller = SPDK_POLLER_REGISTER(_nvmf_rdma_qpair_abort_request, req, 0); + return SPDK_POLLER_BUSY; + } + break; + + default: + break; + } + + spdk_nvmf_request_complete(req); + return SPDK_POLLER_BUSY; +} + +static void +nvmf_rdma_qpair_abort_request(struct spdk_nvmf_qpair *qpair, + struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_rdma_qpair *rqpair; + struct spdk_nvmf_rdma_transport *rtransport; + struct spdk_nvmf_transport *transport; + uint16_t cid; + uint32_t i; + struct spdk_nvmf_rdma_request *rdma_req_to_abort = NULL; + + rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); + rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport); + transport = &rtransport->transport; + + cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid; + + for (i = 0; i < rqpair->max_queue_depth; i++) { + rdma_req_to_abort = &rqpair->resources->reqs[i]; + + if (rdma_req_to_abort->state != RDMA_REQUEST_STATE_FREE && + rdma_req_to_abort->req.cmd->nvme_cmd.cid == cid) { + break; + } + } + + if (rdma_req_to_abort == NULL) { + spdk_nvmf_request_complete(req); + return; + } + + req->req_to_abort = &rdma_req_to_abort->req; + req->timeout_tsc = spdk_get_ticks() + + transport->opts.abort_timeout_sec * spdk_get_ticks_hz(); + req->poller = NULL; + + _nvmf_rdma_qpair_abort_request(req); +} + +static int +nvmf_rdma_poll_group_get_stat(struct spdk_nvmf_tgt *tgt, + struct spdk_nvmf_transport_poll_group_stat **stat) +{ + struct spdk_io_channel *ch; + struct spdk_nvmf_poll_group *group; + struct spdk_nvmf_transport_poll_group *tgroup; + struct spdk_nvmf_rdma_poll_group *rgroup; + struct spdk_nvmf_rdma_poller *rpoller; + struct spdk_nvmf_rdma_device_stat *device_stat; + uint64_t num_devices = 0; + + if (tgt == NULL || stat == NULL) { + return -EINVAL; + } + + ch = spdk_get_io_channel(tgt); + group = spdk_io_channel_get_ctx(ch);; + spdk_put_io_channel(ch); + TAILQ_FOREACH(tgroup, &group->tgroups, link) { + if (SPDK_NVME_TRANSPORT_RDMA == tgroup->transport->ops->type) { + *stat = calloc(1, sizeof(struct spdk_nvmf_transport_poll_group_stat)); + if (!*stat) { + SPDK_ERRLOG("Failed to allocate memory for NVMf RDMA statistics\n"); + return -ENOMEM; + } + (*stat)->trtype = SPDK_NVME_TRANSPORT_RDMA; + + rgroup = SPDK_CONTAINEROF(tgroup, struct spdk_nvmf_rdma_poll_group, group); + /* Count devices to allocate enough memory */ + TAILQ_FOREACH(rpoller, &rgroup->pollers, link) { + ++num_devices; + } + (*stat)->rdma.devices = calloc(num_devices, sizeof(struct spdk_nvmf_rdma_device_stat)); + if (!(*stat)->rdma.devices) { + SPDK_ERRLOG("Failed to allocate NVMf RDMA devices statistics\n"); + free(*stat); + return -ENOMEM; + } + + (*stat)->rdma.pending_data_buffer = rgroup->stat.pending_data_buffer; + (*stat)->rdma.num_devices = num_devices; + num_devices = 0; + TAILQ_FOREACH(rpoller, &rgroup->pollers, link) { + device_stat = &(*stat)->rdma.devices[num_devices++]; + device_stat->name = ibv_get_device_name(rpoller->device->context->device); + device_stat->polls = rpoller->stat.polls; + device_stat->completions = rpoller->stat.completions; + device_stat->requests = rpoller->stat.requests; + device_stat->request_latency = rpoller->stat.request_latency; + device_stat->pending_free_request = rpoller->stat.pending_free_request; + device_stat->pending_rdma_read = rpoller->stat.pending_rdma_read; + device_stat->pending_rdma_write = rpoller->stat.pending_rdma_write; + } + return 0; + } + } + return -ENOENT; +} + +static void +nvmf_rdma_poll_group_free_stat(struct spdk_nvmf_transport_poll_group_stat *stat) +{ + if (stat) { + free(stat->rdma.devices); + } + free(stat); +} + +const struct spdk_nvmf_transport_ops spdk_nvmf_transport_rdma = { + .name = "RDMA", + .type = SPDK_NVME_TRANSPORT_RDMA, + .opts_init = nvmf_rdma_opts_init, + .create = nvmf_rdma_create, + .destroy = nvmf_rdma_destroy, + + .listen = nvmf_rdma_listen, + .stop_listen = nvmf_rdma_stop_listen, + .accept = nvmf_rdma_accept, + .cdata_init = nvmf_rdma_cdata_init, + + .listener_discover = nvmf_rdma_discover, + + .poll_group_create = nvmf_rdma_poll_group_create, + .get_optimal_poll_group = nvmf_rdma_get_optimal_poll_group, + .poll_group_destroy = nvmf_rdma_poll_group_destroy, + .poll_group_add = nvmf_rdma_poll_group_add, + .poll_group_remove = nvmf_rdma_poll_group_remove, + .poll_group_poll = nvmf_rdma_poll_group_poll, + + .req_free = nvmf_rdma_request_free, + .req_complete = nvmf_rdma_request_complete, + + .qpair_fini = nvmf_rdma_close_qpair, + .qpair_get_peer_trid = nvmf_rdma_qpair_get_peer_trid, + .qpair_get_local_trid = nvmf_rdma_qpair_get_local_trid, + .qpair_get_listen_trid = nvmf_rdma_qpair_get_listen_trid, + .qpair_abort_request = nvmf_rdma_qpair_abort_request, + + .poll_group_get_stat = nvmf_rdma_poll_group_get_stat, + .poll_group_free_stat = nvmf_rdma_poll_group_free_stat, +}; + +SPDK_NVMF_TRANSPORT_REGISTER(rdma, &spdk_nvmf_transport_rdma); +SPDK_LOG_REGISTER_COMPONENT("rdma", SPDK_LOG_RDMA) diff --git a/src/spdk/lib/nvmf/spdk_nvmf.map b/src/spdk/lib/nvmf/spdk_nvmf.map new file mode 100644 index 000000000..994e7437b --- /dev/null +++ b/src/spdk/lib/nvmf/spdk_nvmf.map @@ -0,0 +1,118 @@ +{ + global: + + # public functions in nvmf.h + spdk_nvmf_tgt_create; + spdk_nvmf_tgt_destroy; + spdk_nvmf_tgt_get_name; + spdk_nvmf_get_tgt; + spdk_nvmf_get_first_tgt; + spdk_nvmf_get_next_tgt; + spdk_nvmf_tgt_write_config_json; + spdk_nvmf_tgt_listen; + spdk_nvmf_tgt_stop_listen; + spdk_nvmf_tgt_accept; + spdk_nvmf_poll_group_create; + spdk_nvmf_get_optimal_poll_group; + spdk_nvmf_poll_group_destroy; + spdk_nvmf_poll_group_add; + spdk_nvmf_poll_group_get_stat; + spdk_nvmf_qpair_disconnect; + spdk_nvmf_qpair_get_peer_trid; + spdk_nvmf_qpair_get_local_trid; + spdk_nvmf_qpair_get_listen_trid; + spdk_nvmf_subsystem_create; + spdk_nvmf_subsystem_destroy; + spdk_nvmf_subsystem_start; + spdk_nvmf_subsystem_stop; + spdk_nvmf_subsystem_pause; + spdk_nvmf_subsystem_resume; + spdk_nvmf_tgt_find_subsystem; + spdk_nvmf_subsystem_get_first; + spdk_nvmf_subsystem_get_next; + spdk_nvmf_subsystem_add_host; + spdk_nvmf_subsystem_remove_host; + spdk_nvmf_subsystem_set_allow_any_host; + spdk_nvmf_subsystem_get_allow_any_host; + spdk_nvmf_subsystem_host_allowed; + spdk_nvmf_subsystem_get_first_host; + spdk_nvmf_subsystem_get_next_host; + spdk_nvmf_host_get_nqn; + spdk_nvmf_subsystem_add_listener; + spdk_nvmf_subsystem_remove_listener; + spdk_nvmf_subsystem_listener_allowed; + spdk_nvmf_subsystem_get_first_listener; + spdk_nvmf_subsystem_get_next_listener; + spdk_nvmf_subsystem_listener_get_trid; + spdk_nvmf_subsystem_allow_any_listener; + spdk_nvmf_subsytem_any_listener_allowed; + spdk_nvmf_ns_opts_get_defaults; + spdk_nvmf_subsystem_add_ns; + spdk_nvmf_subsystem_remove_ns; + spdk_nvmf_subsystem_get_first_ns; + spdk_nvmf_subsystem_get_next_ns; + spdk_nvmf_subsystem_get_ns; + spdk_nvmf_subsystem_get_max_namespaces; + spdk_nvmf_ns_get_id; + spdk_nvmf_ns_get_bdev; + spdk_nvmf_ns_get_opts; + spdk_nvmf_subsystem_get_sn; + spdk_nvmf_subsystem_set_sn; + spdk_nvmf_subsystem_get_mn; + spdk_nvmf_subsystem_set_mn; + spdk_nvmf_subsystem_get_nqn; + spdk_nvmf_subsystem_get_type; + spdk_nvmf_subsystem_get_max_nsid; + spdk_nvmf_transport_opts_init; + spdk_nvmf_transport_create; + spdk_nvmf_transport_destroy; + spdk_nvmf_tgt_get_transport; + spdk_nvmf_transport_get_first; + spdk_nvmf_transport_get_next; + spdk_nvmf_get_transport_opts; + spdk_nvmf_get_transport_type; + spdk_nvmf_get_transport_name; + spdk_nvmf_tgt_add_transport; + spdk_nvmf_transport_listen; + spdk_nvmf_transport_stop_listen; + spdk_nvmf_transport_poll_group_get_stat; + spdk_nvmf_transport_poll_group_free_stat; + spdk_nvmf_rdma_init_hooks; + + # public functions in nvmf_cmd.h + spdk_nvmf_ctrlr_identify_ctrlr; + spdk_nvmf_ctrlr_identify_ns; + spdk_nvmf_set_custom_admin_cmd_hdlr; + spdk_nvmf_set_passthru_admin_cmd; + spdk_nvmf_bdev_ctrlr_nvme_passthru_admin; + spdk_nvmf_request_get_bdev; + spdk_nvmf_request_get_ctrlr; + spdk_nvmf_request_get_subsystem; + spdk_nvmf_request_get_data; + spdk_nvmf_request_get_cmd; + spdk_nvmf_request_get_response; + spdk_nvmf_request_get_req_to_abort; + spdk_nvmf_bdev_ctrlr_abort_cmd; + + # public functions in nvmf_transport.h + spdk_nvmf_transport_register; + spdk_nvmf_tgt_new_qpair; + spdk_nvmf_ctrlr_connect; + spdk_nvmf_ctrlr_data_init; + spdk_nvmf_ctrlr_get_regs; + spdk_nvmf_request_free_buffers; + spdk_nvmf_request_get_buffers; + spdk_nvmf_request_get_buffers_multi; + spdk_nvmf_request_get_dif_ctx; + spdk_nvmf_request_exec; + spdk_nvmf_request_exec_fabrics; + spdk_nvmf_request_free; + spdk_nvmf_request_complete; + spdk_nvmf_ctrlr_get_subsystem; + spdk_nvmf_ctrlr_get_id; + spdk_nvmf_req_get_xfer; + spdk_nvmf_poll_group_remove; + + + local: *; +}; diff --git a/src/spdk/lib/nvmf/subsystem.c b/src/spdk/lib/nvmf/subsystem.c new file mode 100644 index 000000000..ebe8d9a8e --- /dev/null +++ b/src/spdk/lib/nvmf/subsystem.c @@ -0,0 +1,2515 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "nvmf_internal.h" +#include "transport.h" + +#include "spdk/likely.h" +#include "spdk/string.h" +#include "spdk/trace.h" +#include "spdk/nvmf_spec.h" +#include "spdk/uuid.h" +#include "spdk/json.h" +#include "spdk/file.h" + +#include "spdk/bdev_module.h" +#include "spdk_internal/log.h" +#include "spdk_internal/utf.h" + +#define MODEL_NUMBER_DEFAULT "SPDK bdev Controller" + +/* + * States for parsing valid domains in NQNs according to RFC 1034 + */ +enum spdk_nvmf_nqn_domain_states { + /* First character of a domain must be a letter */ + SPDK_NVMF_DOMAIN_ACCEPT_LETTER = 0, + + /* Subsequent characters can be any of letter, digit, or hyphen */ + SPDK_NVMF_DOMAIN_ACCEPT_LDH = 1, + + /* A domain label must end with either a letter or digit */ + SPDK_NVMF_DOMAIN_ACCEPT_ANY = 2 +}; + +/* Returns true if is a valid ASCII string as defined by the NVMe spec */ +static bool +nvmf_valid_ascii_string(const void *buf, size_t size) +{ + const uint8_t *str = buf; + size_t i; + + for (i = 0; i < size; i++) { + if (str[i] < 0x20 || str[i] > 0x7E) { + return false; + } + } + + return true; +} + +static bool +nvmf_valid_nqn(const char *nqn) +{ + size_t len; + struct spdk_uuid uuid_value; + uint32_t i; + int bytes_consumed; + uint32_t domain_label_length; + char *reverse_domain_end; + uint32_t reverse_domain_end_index; + enum spdk_nvmf_nqn_domain_states domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LETTER; + + /* Check for length requirements */ + len = strlen(nqn); + if (len > SPDK_NVMF_NQN_MAX_LEN) { + SPDK_ERRLOG("Invalid NQN \"%s\": length %zu > max %d\n", nqn, len, SPDK_NVMF_NQN_MAX_LEN); + return false; + } + + /* The nqn must be at least as long as SPDK_NVMF_NQN_MIN_LEN to contain the necessary prefix. */ + if (len < SPDK_NVMF_NQN_MIN_LEN) { + SPDK_ERRLOG("Invalid NQN \"%s\": length %zu < min %d\n", nqn, len, SPDK_NVMF_NQN_MIN_LEN); + return false; + } + + /* Check for discovery controller nqn */ + if (!strcmp(nqn, SPDK_NVMF_DISCOVERY_NQN)) { + return true; + } + + /* Check for equality with the generic nqn structure of the form "nqn.2014-08.org.nvmexpress:uuid:11111111-2222-3333-4444-555555555555" */ + if (!strncmp(nqn, SPDK_NVMF_NQN_UUID_PRE, SPDK_NVMF_NQN_UUID_PRE_LEN)) { + if (len != SPDK_NVMF_NQN_UUID_PRE_LEN + SPDK_NVMF_UUID_STRING_LEN) { + SPDK_ERRLOG("Invalid NQN \"%s\": uuid is not the correct length\n", nqn); + return false; + } + + if (spdk_uuid_parse(&uuid_value, &nqn[SPDK_NVMF_NQN_UUID_PRE_LEN])) { + SPDK_ERRLOG("Invalid NQN \"%s\": uuid is not formatted correctly\n", nqn); + return false; + } + return true; + } + + /* If the nqn does not match the uuid structure, the next several checks validate the form "nqn.yyyy-mm.reverse.domain:user-string" */ + + if (strncmp(nqn, "nqn.", 4) != 0) { + SPDK_ERRLOG("Invalid NQN \"%s\": NQN must begin with \"nqn.\".\n", nqn); + return false; + } + + /* Check for yyyy-mm. */ + if (!(isdigit(nqn[4]) && isdigit(nqn[5]) && isdigit(nqn[6]) && isdigit(nqn[7]) && + nqn[8] == '-' && isdigit(nqn[9]) && isdigit(nqn[10]) && nqn[11] == '.')) { + SPDK_ERRLOG("Invalid date code in NQN \"%s\"\n", nqn); + return false; + } + + reverse_domain_end = strchr(nqn, ':'); + if (reverse_domain_end != NULL && (reverse_domain_end_index = reverse_domain_end - nqn) < len - 1) { + } else { + SPDK_ERRLOG("Invalid NQN \"%s\". NQN must contain user specified name with a ':' as a prefix.\n", + nqn); + return false; + } + + /* Check for valid reverse domain */ + domain_label_length = 0; + for (i = 12; i < reverse_domain_end_index; i++) { + if (domain_label_length > SPDK_DOMAIN_LABEL_MAX_LEN) { + SPDK_ERRLOG("Invalid domain name in NQN \"%s\". At least one Label is too long.\n", nqn); + return false; + } + + switch (domain_state) { + + case SPDK_NVMF_DOMAIN_ACCEPT_LETTER: { + if (isalpha(nqn[i])) { + domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY; + domain_label_length++; + break; + } else { + SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must start with a letter.\n", nqn); + return false; + } + } + + case SPDK_NVMF_DOMAIN_ACCEPT_LDH: { + if (isalpha(nqn[i]) || isdigit(nqn[i])) { + domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY; + domain_label_length++; + break; + } else if (nqn[i] == '-') { + if (i == reverse_domain_end_index - 1) { + SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n", + nqn); + return false; + } + domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LDH; + domain_label_length++; + break; + } else if (nqn[i] == '.') { + SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n", + nqn); + return false; + } else { + SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only [a-z,A-Z,0-9,'-','.'].\n", + nqn); + return false; + } + } + + case SPDK_NVMF_DOMAIN_ACCEPT_ANY: { + if (isalpha(nqn[i]) || isdigit(nqn[i])) { + domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY; + domain_label_length++; + break; + } else if (nqn[i] == '-') { + if (i == reverse_domain_end_index - 1) { + SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n", + nqn); + return false; + } + domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LDH; + domain_label_length++; + break; + } else if (nqn[i] == '.') { + domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LETTER; + domain_label_length = 0; + break; + } else { + SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only [a-z,A-Z,0-9,'-','.'].\n", + nqn); + return false; + } + } + } + } + + i = reverse_domain_end_index + 1; + while (i < len) { + bytes_consumed = utf8_valid(&nqn[i], &nqn[len]); + if (bytes_consumed <= 0) { + SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only valid utf-8.\n", nqn); + return false; + } + + i += bytes_consumed; + } + return true; +} + +struct spdk_nvmf_subsystem * +spdk_nvmf_subsystem_create(struct spdk_nvmf_tgt *tgt, + const char *nqn, + enum spdk_nvmf_subtype type, + uint32_t num_ns) +{ + struct spdk_nvmf_subsystem *subsystem; + uint32_t sid; + + if (spdk_nvmf_tgt_find_subsystem(tgt, nqn)) { + SPDK_ERRLOG("Subsystem NQN '%s' already exists\n", nqn); + return NULL; + } + + if (!nvmf_valid_nqn(nqn)) { + return NULL; + } + + if (type == SPDK_NVMF_SUBTYPE_DISCOVERY && num_ns != 0) { + SPDK_ERRLOG("Discovery subsystem cannot have namespaces.\n"); + return NULL; + } + + /* Find a free subsystem id (sid) */ + for (sid = 0; sid < tgt->max_subsystems; sid++) { + if (tgt->subsystems[sid] == NULL) { + break; + } + } + if (sid >= tgt->max_subsystems) { + return NULL; + } + + subsystem = calloc(1, sizeof(struct spdk_nvmf_subsystem)); + if (subsystem == NULL) { + return NULL; + } + + subsystem->thread = spdk_get_thread(); + subsystem->state = SPDK_NVMF_SUBSYSTEM_INACTIVE; + subsystem->tgt = tgt; + subsystem->id = sid; + subsystem->subtype = type; + subsystem->max_nsid = num_ns; + subsystem->max_allowed_nsid = num_ns; + subsystem->next_cntlid = 0; + snprintf(subsystem->subnqn, sizeof(subsystem->subnqn), "%s", nqn); + TAILQ_INIT(&subsystem->listeners); + TAILQ_INIT(&subsystem->hosts); + TAILQ_INIT(&subsystem->ctrlrs); + + if (num_ns != 0) { + subsystem->ns = calloc(num_ns, sizeof(struct spdk_nvmf_ns *)); + if (subsystem->ns == NULL) { + SPDK_ERRLOG("Namespace memory allocation failed\n"); + free(subsystem); + return NULL; + } + } + + memset(subsystem->sn, '0', sizeof(subsystem->sn) - 1); + subsystem->sn[sizeof(subsystem->sn) - 1] = '\0'; + + snprintf(subsystem->mn, sizeof(subsystem->mn), "%s", + MODEL_NUMBER_DEFAULT); + + tgt->subsystems[sid] = subsystem; + tgt->discovery_genctr++; + + return subsystem; +} + +static void +nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvmf_host *host) +{ + TAILQ_REMOVE(&subsystem->hosts, host, link); + free(host); +} + +static void +_nvmf_subsystem_remove_listener(struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvmf_subsystem_listener *listener, + bool stop) +{ + struct spdk_nvmf_transport *transport; + + if (stop) { + transport = spdk_nvmf_tgt_get_transport(subsystem->tgt, listener->trid->trstring); + if (transport != NULL) { + spdk_nvmf_transport_stop_listen(transport, listener->trid); + } + } + + TAILQ_REMOVE(&subsystem->listeners, listener, link); + free(listener); +} + +void +spdk_nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem) +{ + struct spdk_nvmf_host *host, *host_tmp; + struct spdk_nvmf_ctrlr *ctrlr, *ctrlr_tmp; + struct spdk_nvmf_ns *ns; + + if (!subsystem) { + return; + } + + assert(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "subsystem is %p\n", subsystem); + + nvmf_subsystem_remove_all_listeners(subsystem, false); + + TAILQ_FOREACH_SAFE(host, &subsystem->hosts, link, host_tmp) { + nvmf_subsystem_remove_host(subsystem, host); + } + + TAILQ_FOREACH_SAFE(ctrlr, &subsystem->ctrlrs, link, ctrlr_tmp) { + nvmf_ctrlr_destruct(ctrlr); + } + + ns = spdk_nvmf_subsystem_get_first_ns(subsystem); + while (ns != NULL) { + struct spdk_nvmf_ns *next_ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns); + + spdk_nvmf_subsystem_remove_ns(subsystem, ns->opts.nsid); + ns = next_ns; + } + + free(subsystem->ns); + + subsystem->tgt->subsystems[subsystem->id] = NULL; + subsystem->tgt->discovery_genctr++; + + free(subsystem); +} + +static int +nvmf_subsystem_set_state(struct spdk_nvmf_subsystem *subsystem, + enum spdk_nvmf_subsystem_state state) +{ + enum spdk_nvmf_subsystem_state actual_old_state, expected_old_state; + bool exchanged; + + switch (state) { + case SPDK_NVMF_SUBSYSTEM_INACTIVE: + expected_old_state = SPDK_NVMF_SUBSYSTEM_DEACTIVATING; + break; + case SPDK_NVMF_SUBSYSTEM_ACTIVATING: + expected_old_state = SPDK_NVMF_SUBSYSTEM_INACTIVE; + break; + case SPDK_NVMF_SUBSYSTEM_ACTIVE: + expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING; + break; + case SPDK_NVMF_SUBSYSTEM_PAUSING: + expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVE; + break; + case SPDK_NVMF_SUBSYSTEM_PAUSED: + expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSING; + break; + case SPDK_NVMF_SUBSYSTEM_RESUMING: + expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSED; + break; + case SPDK_NVMF_SUBSYSTEM_DEACTIVATING: + expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVE; + break; + default: + assert(false); + return -1; + } + + actual_old_state = expected_old_state; + exchanged = __atomic_compare_exchange_n(&subsystem->state, &actual_old_state, state, false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + if (spdk_unlikely(exchanged == false)) { + if (actual_old_state == SPDK_NVMF_SUBSYSTEM_RESUMING && + state == SPDK_NVMF_SUBSYSTEM_ACTIVE) { + expected_old_state = SPDK_NVMF_SUBSYSTEM_RESUMING; + } + /* This is for the case when activating the subsystem fails. */ + if (actual_old_state == SPDK_NVMF_SUBSYSTEM_ACTIVATING && + state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING) { + expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING; + } + actual_old_state = expected_old_state; + __atomic_compare_exchange_n(&subsystem->state, &actual_old_state, state, false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } + assert(actual_old_state == expected_old_state); + return actual_old_state - expected_old_state; +} + +struct subsystem_state_change_ctx { + struct spdk_nvmf_subsystem *subsystem; + + enum spdk_nvmf_subsystem_state requested_state; + + spdk_nvmf_subsystem_state_change_done cb_fn; + void *cb_arg; +}; + +static void +subsystem_state_change_done(struct spdk_io_channel_iter *i, int status) +{ + struct subsystem_state_change_ctx *ctx = spdk_io_channel_iter_get_ctx(i); + + if (status == 0) { + status = nvmf_subsystem_set_state(ctx->subsystem, ctx->requested_state); + if (status) { + status = -1; + } + } + + if (ctx->cb_fn) { + ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status); + } + free(ctx); +} + +static void +subsystem_state_change_continue(void *ctx, int status) +{ + struct spdk_io_channel_iter *i = ctx; + spdk_for_each_channel_continue(i, status); +} + +static void +subsystem_state_change_on_pg(struct spdk_io_channel_iter *i) +{ + struct subsystem_state_change_ctx *ctx; + struct spdk_io_channel *ch; + struct spdk_nvmf_poll_group *group; + + ctx = spdk_io_channel_iter_get_ctx(i); + ch = spdk_io_channel_iter_get_channel(i); + group = spdk_io_channel_get_ctx(ch); + + switch (ctx->requested_state) { + case SPDK_NVMF_SUBSYSTEM_INACTIVE: + nvmf_poll_group_remove_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i); + break; + case SPDK_NVMF_SUBSYSTEM_ACTIVE: + if (ctx->subsystem->state == SPDK_NVMF_SUBSYSTEM_ACTIVATING) { + nvmf_poll_group_add_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i); + } else if (ctx->subsystem->state == SPDK_NVMF_SUBSYSTEM_RESUMING) { + nvmf_poll_group_resume_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i); + } + break; + case SPDK_NVMF_SUBSYSTEM_PAUSED: + nvmf_poll_group_pause_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i); + break; + default: + assert(false); + break; + } +} + +static int +nvmf_subsystem_state_change(struct spdk_nvmf_subsystem *subsystem, + enum spdk_nvmf_subsystem_state requested_state, + spdk_nvmf_subsystem_state_change_done cb_fn, + void *cb_arg) +{ + struct subsystem_state_change_ctx *ctx; + enum spdk_nvmf_subsystem_state intermediate_state; + int rc; + + switch (requested_state) { + case SPDK_NVMF_SUBSYSTEM_INACTIVE: + intermediate_state = SPDK_NVMF_SUBSYSTEM_DEACTIVATING; + break; + case SPDK_NVMF_SUBSYSTEM_ACTIVE: + if (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED) { + intermediate_state = SPDK_NVMF_SUBSYSTEM_RESUMING; + } else { + intermediate_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING; + } + break; + case SPDK_NVMF_SUBSYSTEM_PAUSED: + intermediate_state = SPDK_NVMF_SUBSYSTEM_PAUSING; + break; + default: + assert(false); + return -EINVAL; + } + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + return -ENOMEM; + } + + rc = nvmf_subsystem_set_state(subsystem, intermediate_state); + if (rc) { + free(ctx); + return rc; + } + + ctx->subsystem = subsystem; + ctx->requested_state = requested_state; + ctx->cb_fn = cb_fn; + ctx->cb_arg = cb_arg; + + spdk_for_each_channel(subsystem->tgt, + subsystem_state_change_on_pg, + ctx, + subsystem_state_change_done); + + return 0; +} + +int +spdk_nvmf_subsystem_start(struct spdk_nvmf_subsystem *subsystem, + spdk_nvmf_subsystem_state_change_done cb_fn, + void *cb_arg) +{ + return nvmf_subsystem_state_change(subsystem, SPDK_NVMF_SUBSYSTEM_ACTIVE, cb_fn, cb_arg); +} + +int +spdk_nvmf_subsystem_stop(struct spdk_nvmf_subsystem *subsystem, + spdk_nvmf_subsystem_state_change_done cb_fn, + void *cb_arg) +{ + return nvmf_subsystem_state_change(subsystem, SPDK_NVMF_SUBSYSTEM_INACTIVE, cb_fn, cb_arg); +} + +int +spdk_nvmf_subsystem_pause(struct spdk_nvmf_subsystem *subsystem, + spdk_nvmf_subsystem_state_change_done cb_fn, + void *cb_arg) +{ + return nvmf_subsystem_state_change(subsystem, SPDK_NVMF_SUBSYSTEM_PAUSED, cb_fn, cb_arg); +} + +int +spdk_nvmf_subsystem_resume(struct spdk_nvmf_subsystem *subsystem, + spdk_nvmf_subsystem_state_change_done cb_fn, + void *cb_arg) +{ + return nvmf_subsystem_state_change(subsystem, SPDK_NVMF_SUBSYSTEM_ACTIVE, cb_fn, cb_arg); +} + +struct spdk_nvmf_subsystem * +spdk_nvmf_subsystem_get_first(struct spdk_nvmf_tgt *tgt) +{ + struct spdk_nvmf_subsystem *subsystem; + uint32_t sid; + + for (sid = 0; sid < tgt->max_subsystems; sid++) { + subsystem = tgt->subsystems[sid]; + if (subsystem) { + return subsystem; + } + } + + return NULL; +} + +struct spdk_nvmf_subsystem * +spdk_nvmf_subsystem_get_next(struct spdk_nvmf_subsystem *subsystem) +{ + uint32_t sid; + struct spdk_nvmf_tgt *tgt; + + if (!subsystem) { + return NULL; + } + + tgt = subsystem->tgt; + + for (sid = subsystem->id + 1; sid < tgt->max_subsystems; sid++) { + subsystem = tgt->subsystems[sid]; + if (subsystem) { + return subsystem; + } + } + + return NULL; +} + +static struct spdk_nvmf_host * +nvmf_subsystem_find_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn) +{ + struct spdk_nvmf_host *host = NULL; + + TAILQ_FOREACH(host, &subsystem->hosts, link) { + if (strcmp(hostnqn, host->nqn) == 0) { + return host; + } + } + + return NULL; +} + +int +spdk_nvmf_subsystem_add_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn) +{ + struct spdk_nvmf_host *host; + + if (!nvmf_valid_nqn(hostnqn)) { + return -EINVAL; + } + + if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE || + subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) { + return -EAGAIN; + } + + if (nvmf_subsystem_find_host(subsystem, hostnqn)) { + /* This subsystem already allows the specified host. */ + return 0; + } + + host = calloc(1, sizeof(*host)); + if (!host) { + return -ENOMEM; + } + + snprintf(host->nqn, sizeof(host->nqn), "%s", hostnqn); + + TAILQ_INSERT_HEAD(&subsystem->hosts, host, link); + subsystem->tgt->discovery_genctr++; + + return 0; +} + +int +spdk_nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn) +{ + struct spdk_nvmf_host *host; + + if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE || + subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) { + return -EAGAIN; + } + + host = nvmf_subsystem_find_host(subsystem, hostnqn); + if (host == NULL) { + return -ENOENT; + } + + nvmf_subsystem_remove_host(subsystem, host); + return 0; +} + +int +spdk_nvmf_subsystem_set_allow_any_host(struct spdk_nvmf_subsystem *subsystem, bool allow_any_host) +{ + if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE || + subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) { + return -EAGAIN; + } + + subsystem->allow_any_host = allow_any_host; + + return 0; +} + +bool +spdk_nvmf_subsystem_get_allow_any_host(const struct spdk_nvmf_subsystem *subsystem) +{ + return subsystem->allow_any_host; +} + +bool +spdk_nvmf_subsystem_host_allowed(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn) +{ + if (!hostnqn) { + return false; + } + + if (subsystem->allow_any_host) { + return true; + } + + return nvmf_subsystem_find_host(subsystem, hostnqn) != NULL; +} + +struct spdk_nvmf_host * +spdk_nvmf_subsystem_get_first_host(struct spdk_nvmf_subsystem *subsystem) +{ + return TAILQ_FIRST(&subsystem->hosts); +} + + +struct spdk_nvmf_host * +spdk_nvmf_subsystem_get_next_host(struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvmf_host *prev_host) +{ + return TAILQ_NEXT(prev_host, link); +} + +const char * +spdk_nvmf_host_get_nqn(const struct spdk_nvmf_host *host) +{ + return host->nqn; +} + +struct spdk_nvmf_subsystem_listener * +nvmf_subsystem_find_listener(struct spdk_nvmf_subsystem *subsystem, + const struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvmf_subsystem_listener *listener; + + TAILQ_FOREACH(listener, &subsystem->listeners, link) { + if (spdk_nvme_transport_id_compare(listener->trid, trid) == 0) { + return listener; + } + } + + return NULL; +} + +/** + * Function to be called once the target is listening. + * + * \param ctx Context argument passed to this function. + * \param status 0 if it completed successfully, or negative errno if it failed. + */ +static void +_nvmf_subsystem_add_listener_done(void *ctx, int status) +{ + struct spdk_nvmf_subsystem_listener *listener = ctx; + + if (status) { + listener->cb_fn(listener->cb_arg, status); + free(listener); + return; + } + + TAILQ_INSERT_HEAD(&listener->subsystem->listeners, listener, link); + listener->subsystem->tgt->discovery_genctr++; + listener->cb_fn(listener->cb_arg, status); +} + +void +spdk_nvmf_subsystem_add_listener(struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvme_transport_id *trid, + spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn, + void *cb_arg) +{ + struct spdk_nvmf_transport *transport; + struct spdk_nvmf_subsystem_listener *listener; + struct spdk_nvmf_listener *tr_listener; + + assert(cb_fn != NULL); + + if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE || + subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) { + cb_fn(cb_arg, -EAGAIN); + return; + } + + if (nvmf_subsystem_find_listener(subsystem, trid)) { + /* Listener already exists in this subsystem */ + cb_fn(cb_arg, 0); + return; + } + + transport = spdk_nvmf_tgt_get_transport(subsystem->tgt, trid->trstring); + if (transport == NULL) { + SPDK_ERRLOG("Unknown transport type %d\n", trid->trtype); + cb_fn(cb_arg, -EINVAL); + return; + } + + tr_listener = nvmf_transport_find_listener(transport, trid); + if (!tr_listener) { + SPDK_ERRLOG("Cannot find transport listener for %s\n", trid->traddr); + cb_fn(cb_arg, -EINVAL); + return; + } + + listener = calloc(1, sizeof(*listener)); + if (!listener) { + cb_fn(cb_arg, -ENOMEM); + return; + } + + listener->trid = &tr_listener->trid; + listener->transport = transport; + listener->cb_fn = cb_fn; + listener->cb_arg = cb_arg; + listener->subsystem = subsystem; + + if (transport->ops->listen_associate != NULL) { + transport->ops->listen_associate(transport, subsystem, trid, + _nvmf_subsystem_add_listener_done, + listener); + } else { + _nvmf_subsystem_add_listener_done(listener, 0); + } +} + +int +spdk_nvmf_subsystem_remove_listener(struct spdk_nvmf_subsystem *subsystem, + const struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvmf_subsystem_listener *listener; + + if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE || + subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) { + return -EAGAIN; + } + + listener = nvmf_subsystem_find_listener(subsystem, trid); + if (listener == NULL) { + return -ENOENT; + } + + _nvmf_subsystem_remove_listener(subsystem, listener, false); + + return 0; +} + +void +nvmf_subsystem_remove_all_listeners(struct spdk_nvmf_subsystem *subsystem, + bool stop) +{ + struct spdk_nvmf_subsystem_listener *listener, *listener_tmp; + + TAILQ_FOREACH_SAFE(listener, &subsystem->listeners, link, listener_tmp) { + _nvmf_subsystem_remove_listener(subsystem, listener, stop); + } +} + +bool +spdk_nvmf_subsystem_listener_allowed(struct spdk_nvmf_subsystem *subsystem, + const struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvmf_subsystem_listener *listener; + + if (!strcmp(subsystem->subnqn, SPDK_NVMF_DISCOVERY_NQN)) { + return true; + } + + TAILQ_FOREACH(listener, &subsystem->listeners, link) { + if (spdk_nvme_transport_id_compare(listener->trid, trid) == 0) { + return true; + } + } + + return false; +} + +struct spdk_nvmf_subsystem_listener * +spdk_nvmf_subsystem_get_first_listener(struct spdk_nvmf_subsystem *subsystem) +{ + return TAILQ_FIRST(&subsystem->listeners); +} + +struct spdk_nvmf_subsystem_listener * +spdk_nvmf_subsystem_get_next_listener(struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvmf_subsystem_listener *prev_listener) +{ + return TAILQ_NEXT(prev_listener, link); +} + +const struct spdk_nvme_transport_id * +spdk_nvmf_subsystem_listener_get_trid(struct spdk_nvmf_subsystem_listener *listener) +{ + return listener->trid; +} + +void +spdk_nvmf_subsystem_allow_any_listener(struct spdk_nvmf_subsystem *subsystem, + bool allow_any_listener) +{ + subsystem->allow_any_listener = allow_any_listener; +} + +bool +spdk_nvmf_subsytem_any_listener_allowed(struct spdk_nvmf_subsystem *subsystem) +{ + return subsystem->allow_any_listener; +} + + +struct subsystem_update_ns_ctx { + struct spdk_nvmf_subsystem *subsystem; + + spdk_nvmf_subsystem_state_change_done cb_fn; + void *cb_arg; +}; + +static void +subsystem_update_ns_done(struct spdk_io_channel_iter *i, int status) +{ + struct subsystem_update_ns_ctx *ctx = spdk_io_channel_iter_get_ctx(i); + + if (ctx->cb_fn) { + ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status); + } + free(ctx); +} + +static void +subsystem_update_ns_on_pg(struct spdk_io_channel_iter *i) +{ + int rc; + struct subsystem_update_ns_ctx *ctx; + struct spdk_nvmf_poll_group *group; + struct spdk_nvmf_subsystem *subsystem; + + ctx = spdk_io_channel_iter_get_ctx(i); + group = spdk_io_channel_get_ctx(spdk_io_channel_iter_get_channel(i)); + subsystem = ctx->subsystem; + + rc = nvmf_poll_group_update_subsystem(group, subsystem); + spdk_for_each_channel_continue(i, rc); +} + +static int +nvmf_subsystem_update_ns(struct spdk_nvmf_subsystem *subsystem, spdk_channel_for_each_cpl cpl, + void *ctx) +{ + spdk_for_each_channel(subsystem->tgt, + subsystem_update_ns_on_pg, + ctx, + cpl); + + return 0; +} + +static void +nvmf_subsystem_ns_changed(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid) +{ + struct spdk_nvmf_ctrlr *ctrlr; + + TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) { + nvmf_ctrlr_ns_changed(ctrlr, nsid); + } +} + +int +spdk_nvmf_subsystem_remove_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid) +{ + struct spdk_nvmf_ns *ns; + struct spdk_nvmf_registrant *reg, *reg_tmp; + + if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE || + subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) { + assert(false); + return -1; + } + + if (nsid == 0 || nsid > subsystem->max_nsid) { + return -1; + } + + ns = subsystem->ns[nsid - 1]; + if (!ns) { + return -1; + } + + subsystem->ns[nsid - 1] = NULL; + + TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, reg_tmp) { + TAILQ_REMOVE(&ns->registrants, reg, link); + free(reg); + } + spdk_bdev_module_release_bdev(ns->bdev); + spdk_bdev_close(ns->desc); + if (ns->ptpl_file) { + free(ns->ptpl_file); + } + free(ns); + + nvmf_subsystem_ns_changed(subsystem, nsid); + + return 0; +} + +static void +_nvmf_ns_hot_remove(struct spdk_nvmf_subsystem *subsystem, + void *cb_arg, int status) +{ + struct spdk_nvmf_ns *ns = cb_arg; + int rc; + + rc = spdk_nvmf_subsystem_remove_ns(subsystem, ns->opts.nsid); + if (rc != 0) { + SPDK_ERRLOG("Failed to make changes to NVME-oF subsystem with id: %u\n", subsystem->id); + } + + spdk_nvmf_subsystem_resume(subsystem, NULL, NULL); +} + +static void +nvmf_ns_hot_remove(void *remove_ctx) +{ + struct spdk_nvmf_ns *ns = remove_ctx; + int rc; + + rc = spdk_nvmf_subsystem_pause(ns->subsystem, _nvmf_ns_hot_remove, ns); + if (rc) { + SPDK_ERRLOG("Unable to pause subsystem to process namespace removal!\n"); + } +} + +static void +_nvmf_ns_resize(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status) +{ + struct spdk_nvmf_ns *ns = cb_arg; + + nvmf_subsystem_ns_changed(subsystem, ns->opts.nsid); + spdk_nvmf_subsystem_resume(subsystem, NULL, NULL); +} + +static void +nvmf_ns_resize(void *event_ctx) +{ + struct spdk_nvmf_ns *ns = event_ctx; + int rc; + + rc = spdk_nvmf_subsystem_pause(ns->subsystem, _nvmf_ns_resize, ns); + if (rc) { + SPDK_ERRLOG("Unable to pause subsystem to process namespace resize!\n"); + } +} + +static void +nvmf_ns_event(enum spdk_bdev_event_type type, + struct spdk_bdev *bdev, + void *event_ctx) +{ + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Bdev event: type %d, name %s, subsystem_id %d, ns_id %d\n", + type, + bdev->name, + ((struct spdk_nvmf_ns *)event_ctx)->subsystem->id, + ((struct spdk_nvmf_ns *)event_ctx)->nsid); + + switch (type) { + case SPDK_BDEV_EVENT_REMOVE: + nvmf_ns_hot_remove(event_ctx); + break; + case SPDK_BDEV_EVENT_RESIZE: + nvmf_ns_resize(event_ctx); + break; + default: + SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); + break; + } +} + +void +spdk_nvmf_ns_opts_get_defaults(struct spdk_nvmf_ns_opts *opts, size_t opts_size) +{ + /* All current fields are set to 0 by default. */ + memset(opts, 0, opts_size); +} + +/* Dummy bdev module used to to claim bdevs. */ +static struct spdk_bdev_module ns_bdev_module = { + .name = "NVMe-oF Target", +}; + +static int +nvmf_ns_load_reservation(const char *file, struct spdk_nvmf_reservation_info *info); +static int +nvmf_ns_reservation_restore(struct spdk_nvmf_ns *ns, struct spdk_nvmf_reservation_info *info); + +uint32_t +spdk_nvmf_subsystem_add_ns(struct spdk_nvmf_subsystem *subsystem, struct spdk_bdev *bdev, + const struct spdk_nvmf_ns_opts *user_opts, size_t opts_size, + const char *ptpl_file) +{ + struct spdk_nvmf_ns_opts opts; + struct spdk_nvmf_ns *ns; + struct spdk_nvmf_reservation_info info = {0}; + int rc; + + if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE || + subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) { + return 0; + } + + if (spdk_bdev_get_md_size(bdev) != 0 && !spdk_bdev_is_md_interleaved(bdev)) { + SPDK_ERRLOG("Can't attach bdev with separate metadata.\n"); + return 0; + } + + spdk_nvmf_ns_opts_get_defaults(&opts, sizeof(opts)); + if (user_opts) { + memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size)); + } + + if (spdk_mem_all_zero(&opts.uuid, sizeof(opts.uuid))) { + opts.uuid = *spdk_bdev_get_uuid(bdev); + } + + if (opts.nsid == SPDK_NVME_GLOBAL_NS_TAG) { + SPDK_ERRLOG("Invalid NSID %" PRIu32 "\n", opts.nsid); + return 0; + } + + if (opts.nsid == 0) { + /* + * NSID not specified - find a free index. + * + * If no free slots are found, opts.nsid will be subsystem->max_nsid + 1, which will + * expand max_nsid if possible. + */ + for (opts.nsid = 1; opts.nsid <= subsystem->max_nsid; opts.nsid++) { + if (_nvmf_subsystem_get_ns(subsystem, opts.nsid) == NULL) { + break; + } + } + } + + if (_nvmf_subsystem_get_ns(subsystem, opts.nsid)) { + SPDK_ERRLOG("Requested NSID %" PRIu32 " already in use\n", opts.nsid); + return 0; + } + + if (opts.nsid > subsystem->max_nsid) { + struct spdk_nvmf_ns **new_ns_array; + + /* If MaxNamespaces was specified, we can't extend max_nsid beyond it. */ + if (subsystem->max_allowed_nsid > 0 && opts.nsid > subsystem->max_allowed_nsid) { + SPDK_ERRLOG("Can't extend NSID range above MaxNamespaces\n"); + return 0; + } + + /* If a controller is connected, we can't change NN. */ + if (!TAILQ_EMPTY(&subsystem->ctrlrs)) { + SPDK_ERRLOG("Can't extend NSID range while controllers are connected\n"); + return 0; + } + + new_ns_array = realloc(subsystem->ns, sizeof(struct spdk_nvmf_ns *) * opts.nsid); + if (new_ns_array == NULL) { + SPDK_ERRLOG("Memory allocation error while resizing namespace array.\n"); + return 0; + } + + memset(new_ns_array + subsystem->max_nsid, 0, + sizeof(struct spdk_nvmf_ns *) * (opts.nsid - subsystem->max_nsid)); + subsystem->ns = new_ns_array; + subsystem->max_nsid = opts.nsid; + } + + ns = calloc(1, sizeof(*ns)); + if (ns == NULL) { + SPDK_ERRLOG("Namespace allocation failed\n"); + return 0; + } + + ns->bdev = bdev; + ns->opts = opts; + ns->subsystem = subsystem; + rc = spdk_bdev_open_ext(bdev->name, true, nvmf_ns_event, ns, &ns->desc); + if (rc != 0) { + SPDK_ERRLOG("Subsystem %s: bdev %s cannot be opened, error=%d\n", + subsystem->subnqn, spdk_bdev_get_name(bdev), rc); + free(ns); + return 0; + } + rc = spdk_bdev_module_claim_bdev(bdev, ns->desc, &ns_bdev_module); + if (rc != 0) { + spdk_bdev_close(ns->desc); + free(ns); + return 0; + } + subsystem->ns[opts.nsid - 1] = ns; + ns->nsid = opts.nsid; + TAILQ_INIT(&ns->registrants); + + if (ptpl_file) { + rc = nvmf_ns_load_reservation(ptpl_file, &info); + if (!rc) { + rc = nvmf_ns_reservation_restore(ns, &info); + if (rc) { + SPDK_ERRLOG("Subsystem restore reservation failed\n"); + subsystem->ns[opts.nsid - 1] = NULL; + spdk_bdev_close(ns->desc); + free(ns); + return 0; + } + } + ns->ptpl_file = strdup(ptpl_file); + } + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Subsystem %s: bdev %s assigned nsid %" PRIu32 "\n", + spdk_nvmf_subsystem_get_nqn(subsystem), + spdk_bdev_get_name(bdev), + opts.nsid); + + nvmf_subsystem_ns_changed(subsystem, opts.nsid); + + return opts.nsid; +} + +static uint32_t +nvmf_subsystem_get_next_allocated_nsid(struct spdk_nvmf_subsystem *subsystem, + uint32_t prev_nsid) +{ + uint32_t nsid; + + if (prev_nsid >= subsystem->max_nsid) { + return 0; + } + + for (nsid = prev_nsid + 1; nsid <= subsystem->max_nsid; nsid++) { + if (subsystem->ns[nsid - 1]) { + return nsid; + } + } + + return 0; +} + +struct spdk_nvmf_ns * +spdk_nvmf_subsystem_get_first_ns(struct spdk_nvmf_subsystem *subsystem) +{ + uint32_t first_nsid; + + first_nsid = nvmf_subsystem_get_next_allocated_nsid(subsystem, 0); + return _nvmf_subsystem_get_ns(subsystem, first_nsid); +} + +struct spdk_nvmf_ns * +spdk_nvmf_subsystem_get_next_ns(struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvmf_ns *prev_ns) +{ + uint32_t next_nsid; + + next_nsid = nvmf_subsystem_get_next_allocated_nsid(subsystem, prev_ns->opts.nsid); + return _nvmf_subsystem_get_ns(subsystem, next_nsid); +} + +struct spdk_nvmf_ns * +spdk_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid) +{ + return _nvmf_subsystem_get_ns(subsystem, nsid); +} + +uint32_t +spdk_nvmf_ns_get_id(const struct spdk_nvmf_ns *ns) +{ + return ns->opts.nsid; +} + +struct spdk_bdev * +spdk_nvmf_ns_get_bdev(struct spdk_nvmf_ns *ns) +{ + return ns->bdev; +} + +void +spdk_nvmf_ns_get_opts(const struct spdk_nvmf_ns *ns, struct spdk_nvmf_ns_opts *opts, + size_t opts_size) +{ + memset(opts, 0, opts_size); + memcpy(opts, &ns->opts, spdk_min(sizeof(ns->opts), opts_size)); +} + +const char * +spdk_nvmf_subsystem_get_sn(const struct spdk_nvmf_subsystem *subsystem) +{ + return subsystem->sn; +} + +int +spdk_nvmf_subsystem_set_sn(struct spdk_nvmf_subsystem *subsystem, const char *sn) +{ + size_t len, max_len; + + max_len = sizeof(subsystem->sn) - 1; + len = strlen(sn); + if (len > max_len) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Invalid sn \"%s\": length %zu > max %zu\n", + sn, len, max_len); + return -1; + } + + if (!nvmf_valid_ascii_string(sn, len)) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Non-ASCII sn\n"); + SPDK_LOGDUMP(SPDK_LOG_NVMF, "sn", sn, len); + return -1; + } + + snprintf(subsystem->sn, sizeof(subsystem->sn), "%s", sn); + + return 0; +} + +const char * +spdk_nvmf_subsystem_get_mn(const struct spdk_nvmf_subsystem *subsystem) +{ + return subsystem->mn; +} + +int +spdk_nvmf_subsystem_set_mn(struct spdk_nvmf_subsystem *subsystem, const char *mn) +{ + size_t len, max_len; + + if (mn == NULL) { + mn = MODEL_NUMBER_DEFAULT; + } + max_len = sizeof(subsystem->mn) - 1; + len = strlen(mn); + if (len > max_len) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Invalid mn \"%s\": length %zu > max %zu\n", + mn, len, max_len); + return -1; + } + + if (!nvmf_valid_ascii_string(mn, len)) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Non-ASCII mn\n"); + SPDK_LOGDUMP(SPDK_LOG_NVMF, "mn", mn, len); + return -1; + } + + snprintf(subsystem->mn, sizeof(subsystem->mn), "%s", mn); + + return 0; +} + +const char * +spdk_nvmf_subsystem_get_nqn(const struct spdk_nvmf_subsystem *subsystem) +{ + return subsystem->subnqn; +} + +enum spdk_nvmf_subtype spdk_nvmf_subsystem_get_type(struct spdk_nvmf_subsystem *subsystem) +{ + return subsystem->subtype; +} + +uint32_t +spdk_nvmf_subsystem_get_max_nsid(struct spdk_nvmf_subsystem *subsystem) +{ + return subsystem->max_nsid; +} + +static uint16_t +nvmf_subsystem_gen_cntlid(struct spdk_nvmf_subsystem *subsystem) +{ + int count; + + /* + * In the worst case, we might have to try all CNTLID values between 1 and 0xFFF0 - 1 + * before we find one that is unused (or find that all values are in use). + */ + for (count = 0; count < 0xFFF0 - 1; count++) { + subsystem->next_cntlid++; + if (subsystem->next_cntlid >= 0xFFF0) { + /* The spec reserves cntlid values in the range FFF0h to FFFFh. */ + subsystem->next_cntlid = 1; + } + + /* Check if a controller with this cntlid currently exists. */ + if (nvmf_subsystem_get_ctrlr(subsystem, subsystem->next_cntlid) == NULL) { + /* Found unused cntlid */ + return subsystem->next_cntlid; + } + } + + /* All valid cntlid values are in use. */ + return 0xFFFF; +} + +int +nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvmf_ctrlr *ctrlr) +{ + ctrlr->cntlid = nvmf_subsystem_gen_cntlid(subsystem); + if (ctrlr->cntlid == 0xFFFF) { + /* Unable to get a cntlid */ + SPDK_ERRLOG("Reached max simultaneous ctrlrs\n"); + return -EBUSY; + } + + TAILQ_INSERT_TAIL(&subsystem->ctrlrs, ctrlr, link); + + return 0; +} + +void +nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvmf_ctrlr *ctrlr) +{ + assert(subsystem == ctrlr->subsys); + TAILQ_REMOVE(&subsystem->ctrlrs, ctrlr, link); +} + +struct spdk_nvmf_ctrlr * +nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem, uint16_t cntlid) +{ + struct spdk_nvmf_ctrlr *ctrlr; + + TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) { + if (ctrlr->cntlid == cntlid) { + return ctrlr; + } + } + + return NULL; +} + +uint32_t +spdk_nvmf_subsystem_get_max_namespaces(const struct spdk_nvmf_subsystem *subsystem) +{ + return subsystem->max_allowed_nsid; +} + +struct _nvmf_ns_registrant { + uint64_t rkey; + char *host_uuid; +}; + +struct _nvmf_ns_registrants { + size_t num_regs; + struct _nvmf_ns_registrant reg[SPDK_NVMF_MAX_NUM_REGISTRANTS]; +}; + +struct _nvmf_ns_reservation { + bool ptpl_activated; + enum spdk_nvme_reservation_type rtype; + uint64_t crkey; + char *bdev_uuid; + char *holder_uuid; + struct _nvmf_ns_registrants regs; +}; + +static const struct spdk_json_object_decoder nvmf_ns_pr_reg_decoders[] = { + {"rkey", offsetof(struct _nvmf_ns_registrant, rkey), spdk_json_decode_uint64}, + {"host_uuid", offsetof(struct _nvmf_ns_registrant, host_uuid), spdk_json_decode_string}, +}; + +static int +nvmf_decode_ns_pr_reg(const struct spdk_json_val *val, void *out) +{ + struct _nvmf_ns_registrant *reg = out; + + return spdk_json_decode_object(val, nvmf_ns_pr_reg_decoders, + SPDK_COUNTOF(nvmf_ns_pr_reg_decoders), reg); +} + +static int +nvmf_decode_ns_pr_regs(const struct spdk_json_val *val, void *out) +{ + struct _nvmf_ns_registrants *regs = out; + + return spdk_json_decode_array(val, nvmf_decode_ns_pr_reg, regs->reg, + SPDK_NVMF_MAX_NUM_REGISTRANTS, ®s->num_regs, + sizeof(struct _nvmf_ns_registrant)); +} + +static const struct spdk_json_object_decoder nvmf_ns_pr_decoders[] = { + {"ptpl", offsetof(struct _nvmf_ns_reservation, ptpl_activated), spdk_json_decode_bool, true}, + {"rtype", offsetof(struct _nvmf_ns_reservation, rtype), spdk_json_decode_uint32, true}, + {"crkey", offsetof(struct _nvmf_ns_reservation, crkey), spdk_json_decode_uint64, true}, + {"bdev_uuid", offsetof(struct _nvmf_ns_reservation, bdev_uuid), spdk_json_decode_string}, + {"holder_uuid", offsetof(struct _nvmf_ns_reservation, holder_uuid), spdk_json_decode_string, true}, + {"registrants", offsetof(struct _nvmf_ns_reservation, regs), nvmf_decode_ns_pr_regs}, +}; + +static int +nvmf_ns_load_reservation(const char *file, struct spdk_nvmf_reservation_info *info) +{ + FILE *fd; + size_t json_size; + ssize_t values_cnt, rc; + void *json = NULL, *end; + struct spdk_json_val *values = NULL; + struct _nvmf_ns_reservation res = {}; + uint32_t i; + + fd = fopen(file, "r"); + /* It's not an error if the file does not exist */ + if (!fd) { + SPDK_NOTICELOG("File %s does not exist\n", file); + return -ENOENT; + } + + /* Load all persist file contents into a local buffer */ + json = spdk_posix_file_load(fd, &json_size); + fclose(fd); + if (!json) { + SPDK_ERRLOG("Load persit file %s failed\n", file); + return -ENOMEM; + } + + rc = spdk_json_parse(json, json_size, NULL, 0, &end, 0); + if (rc < 0) { + SPDK_NOTICELOG("Parsing JSON configuration failed (%zd)\n", rc); + goto exit; + } + + values_cnt = rc; + values = calloc(values_cnt, sizeof(struct spdk_json_val)); + if (values == NULL) { + goto exit; + } + + rc = spdk_json_parse(json, json_size, values, values_cnt, &end, 0); + if (rc != values_cnt) { + SPDK_ERRLOG("Parsing JSON configuration failed (%zd)\n", rc); + goto exit; + } + + /* Decode json */ + if (spdk_json_decode_object(values, nvmf_ns_pr_decoders, + SPDK_COUNTOF(nvmf_ns_pr_decoders), + &res)) { + SPDK_ERRLOG("Invalid objects in the persist file %s\n", file); + rc = -EINVAL; + goto exit; + } + + if (res.regs.num_regs > SPDK_NVMF_MAX_NUM_REGISTRANTS) { + SPDK_ERRLOG("Can only support up to %u registrants\n", SPDK_NVMF_MAX_NUM_REGISTRANTS); + rc = -ERANGE; + goto exit; + } + + rc = 0; + info->ptpl_activated = res.ptpl_activated; + info->rtype = res.rtype; + info->crkey = res.crkey; + snprintf(info->bdev_uuid, sizeof(info->bdev_uuid), "%s", res.bdev_uuid); + snprintf(info->holder_uuid, sizeof(info->holder_uuid), "%s", res.holder_uuid); + info->num_regs = res.regs.num_regs; + for (i = 0; i < res.regs.num_regs; i++) { + info->registrants[i].rkey = res.regs.reg[i].rkey; + snprintf(info->registrants[i].host_uuid, sizeof(info->registrants[i].host_uuid), "%s", + res.regs.reg[i].host_uuid); + } + +exit: + free(json); + free(values); + free(res.bdev_uuid); + free(res.holder_uuid); + for (i = 0; i < res.regs.num_regs; i++) { + free(res.regs.reg[i].host_uuid); + } + + return rc; +} + +static bool +nvmf_ns_reservation_all_registrants_type(struct spdk_nvmf_ns *ns); + +static int +nvmf_ns_reservation_restore(struct spdk_nvmf_ns *ns, struct spdk_nvmf_reservation_info *info) +{ + uint32_t i; + struct spdk_nvmf_registrant *reg, *holder = NULL; + struct spdk_uuid bdev_uuid, holder_uuid; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "NSID %u, PTPL %u, Number of registrants %u\n", + ns->nsid, info->ptpl_activated, info->num_regs); + + /* it's not an error */ + if (!info->ptpl_activated || !info->num_regs) { + return 0; + } + + spdk_uuid_parse(&bdev_uuid, info->bdev_uuid); + if (spdk_uuid_compare(&bdev_uuid, spdk_bdev_get_uuid(ns->bdev))) { + SPDK_ERRLOG("Existing bdev UUID is not same with configuration file\n"); + return -EINVAL; + } + + ns->crkey = info->crkey; + ns->rtype = info->rtype; + ns->ptpl_activated = info->ptpl_activated; + spdk_uuid_parse(&holder_uuid, info->holder_uuid); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Bdev UUID %s\n", info->bdev_uuid); + if (info->rtype) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Holder UUID %s, RTYPE %u, RKEY 0x%"PRIx64"\n", + info->holder_uuid, info->rtype, info->crkey); + } + + for (i = 0; i < info->num_regs; i++) { + reg = calloc(1, sizeof(*reg)); + if (!reg) { + return -ENOMEM; + } + spdk_uuid_parse(®->hostid, info->registrants[i].host_uuid); + reg->rkey = info->registrants[i].rkey; + TAILQ_INSERT_TAIL(&ns->registrants, reg, link); + if (!spdk_uuid_compare(&holder_uuid, ®->hostid)) { + holder = reg; + } + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Registrant RKEY 0x%"PRIx64", Host UUID %s\n", + info->registrants[i].rkey, info->registrants[i].host_uuid); + } + + if (nvmf_ns_reservation_all_registrants_type(ns)) { + ns->holder = TAILQ_FIRST(&ns->registrants); + } else { + ns->holder = holder; + } + + return 0; +} + +static int +nvmf_ns_json_write_cb(void *cb_ctx, const void *data, size_t size) +{ + char *file = cb_ctx; + size_t rc; + FILE *fd; + + fd = fopen(file, "w"); + if (!fd) { + SPDK_ERRLOG("Can't open file %s for write\n", file); + return -ENOENT; + } + rc = fwrite(data, 1, size, fd); + fclose(fd); + + return rc == size ? 0 : -1; +} + +static int +nvmf_ns_reservation_update(const char *file, struct spdk_nvmf_reservation_info *info) +{ + struct spdk_json_write_ctx *w; + uint32_t i; + int rc = 0; + + w = spdk_json_write_begin(nvmf_ns_json_write_cb, (void *)file, 0); + if (w == NULL) { + return -ENOMEM; + } + /* clear the configuration file */ + if (!info->ptpl_activated) { + goto exit; + } + + spdk_json_write_object_begin(w); + spdk_json_write_named_bool(w, "ptpl", info->ptpl_activated); + spdk_json_write_named_uint32(w, "rtype", info->rtype); + spdk_json_write_named_uint64(w, "crkey", info->crkey); + spdk_json_write_named_string(w, "bdev_uuid", info->bdev_uuid); + spdk_json_write_named_string(w, "holder_uuid", info->holder_uuid); + + spdk_json_write_named_array_begin(w, "registrants"); + for (i = 0; i < info->num_regs; i++) { + spdk_json_write_object_begin(w); + spdk_json_write_named_uint64(w, "rkey", info->registrants[i].rkey); + spdk_json_write_named_string(w, "host_uuid", info->registrants[i].host_uuid); + spdk_json_write_object_end(w); + } + spdk_json_write_array_end(w); + spdk_json_write_object_end(w); + +exit: + rc = spdk_json_write_end(w); + return rc; +} + +static int +nvmf_ns_update_reservation_info(struct spdk_nvmf_ns *ns) +{ + struct spdk_nvmf_reservation_info info; + struct spdk_nvmf_registrant *reg, *tmp; + uint32_t i = 0; + + assert(ns != NULL); + + if (!ns->bdev || !ns->ptpl_file) { + return 0; + } + + memset(&info, 0, sizeof(info)); + spdk_uuid_fmt_lower(info.bdev_uuid, sizeof(info.bdev_uuid), spdk_bdev_get_uuid(ns->bdev)); + + if (ns->rtype) { + info.rtype = ns->rtype; + info.crkey = ns->crkey; + if (!nvmf_ns_reservation_all_registrants_type(ns)) { + assert(ns->holder != NULL); + spdk_uuid_fmt_lower(info.holder_uuid, sizeof(info.holder_uuid), &ns->holder->hostid); + } + } + + TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) { + spdk_uuid_fmt_lower(info.registrants[i].host_uuid, sizeof(info.registrants[i].host_uuid), + ®->hostid); + info.registrants[i++].rkey = reg->rkey; + } + + info.num_regs = i; + info.ptpl_activated = ns->ptpl_activated; + + return nvmf_ns_reservation_update(ns->ptpl_file, &info); +} + +static struct spdk_nvmf_registrant * +nvmf_ns_reservation_get_registrant(struct spdk_nvmf_ns *ns, + struct spdk_uuid *uuid) +{ + struct spdk_nvmf_registrant *reg, *tmp; + + TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) { + if (!spdk_uuid_compare(®->hostid, uuid)) { + return reg; + } + } + + return NULL; +} + +/* Generate reservation notice log to registered HostID controllers */ +static void +nvmf_subsystem_gen_ctrlr_notification(struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvmf_ns *ns, + struct spdk_uuid *hostid_list, + uint32_t num_hostid, + enum spdk_nvme_reservation_notification_log_page_type type) +{ + struct spdk_nvmf_ctrlr *ctrlr; + uint32_t i; + + for (i = 0; i < num_hostid; i++) { + TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) { + if (!spdk_uuid_compare(&ctrlr->hostid, &hostid_list[i])) { + nvmf_ctrlr_reservation_notice_log(ctrlr, ns, type); + } + } + } +} + +/* Get all registrants' hostid other than the controller who issued the command */ +static uint32_t +nvmf_ns_reservation_get_all_other_hostid(struct spdk_nvmf_ns *ns, + struct spdk_uuid *hostid_list, + uint32_t max_num_hostid, + struct spdk_uuid *current_hostid) +{ + struct spdk_nvmf_registrant *reg, *tmp; + uint32_t num_hostid = 0; + + TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) { + if (spdk_uuid_compare(®->hostid, current_hostid)) { + if (num_hostid == max_num_hostid) { + assert(false); + return max_num_hostid; + } + hostid_list[num_hostid++] = reg->hostid; + } + } + + return num_hostid; +} + +/* Calculate the unregistered HostID list according to list + * prior to execute preempt command and list after executing + * preempt command. + */ +static uint32_t +nvmf_ns_reservation_get_unregistered_hostid(struct spdk_uuid *old_hostid_list, + uint32_t old_num_hostid, + struct spdk_uuid *remaining_hostid_list, + uint32_t remaining_num_hostid) +{ + struct spdk_uuid temp_hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS]; + uint32_t i, j, num_hostid = 0; + bool found; + + if (!remaining_num_hostid) { + return old_num_hostid; + } + + for (i = 0; i < old_num_hostid; i++) { + found = false; + for (j = 0; j < remaining_num_hostid; j++) { + if (!spdk_uuid_compare(&old_hostid_list[i], &remaining_hostid_list[j])) { + found = true; + break; + } + } + if (!found) { + spdk_uuid_copy(&temp_hostid_list[num_hostid++], &old_hostid_list[i]); + } + } + + if (num_hostid) { + memcpy(old_hostid_list, temp_hostid_list, sizeof(struct spdk_uuid) * num_hostid); + } + + return num_hostid; +} + +/* current reservation type is all registrants or not */ +static bool +nvmf_ns_reservation_all_registrants_type(struct spdk_nvmf_ns *ns) +{ + return (ns->rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_ALL_REGS || + ns->rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS); +} + +/* current registrant is reservation holder or not */ +static bool +nvmf_ns_reservation_registrant_is_holder(struct spdk_nvmf_ns *ns, + struct spdk_nvmf_registrant *reg) +{ + if (!reg) { + return false; + } + + if (nvmf_ns_reservation_all_registrants_type(ns)) { + return true; + } + + return (ns->holder == reg); +} + +static int +nvmf_ns_reservation_add_registrant(struct spdk_nvmf_ns *ns, + struct spdk_nvmf_ctrlr *ctrlr, + uint64_t nrkey) +{ + struct spdk_nvmf_registrant *reg; + + reg = calloc(1, sizeof(*reg)); + if (!reg) { + return -ENOMEM; + } + + reg->rkey = nrkey; + /* set hostid for the registrant */ + spdk_uuid_copy(®->hostid, &ctrlr->hostid); + TAILQ_INSERT_TAIL(&ns->registrants, reg, link); + ns->gen++; + + return 0; +} + +static void +nvmf_ns_reservation_release_reservation(struct spdk_nvmf_ns *ns) +{ + ns->rtype = 0; + ns->crkey = 0; + ns->holder = NULL; +} + +/* release the reservation if the last registrant was removed */ +static void +nvmf_ns_reservation_check_release_on_remove_registrant(struct spdk_nvmf_ns *ns, + struct spdk_nvmf_registrant *reg) +{ + struct spdk_nvmf_registrant *next_reg; + + /* no reservation holder */ + if (!ns->holder) { + assert(ns->rtype == 0); + return; + } + + next_reg = TAILQ_FIRST(&ns->registrants); + if (next_reg && nvmf_ns_reservation_all_registrants_type(ns)) { + /* the next valid registrant is the new holder now */ + ns->holder = next_reg; + } else if (nvmf_ns_reservation_registrant_is_holder(ns, reg)) { + /* release the reservation */ + nvmf_ns_reservation_release_reservation(ns); + } +} + +static void +nvmf_ns_reservation_remove_registrant(struct spdk_nvmf_ns *ns, + struct spdk_nvmf_registrant *reg) +{ + TAILQ_REMOVE(&ns->registrants, reg, link); + nvmf_ns_reservation_check_release_on_remove_registrant(ns, reg); + free(reg); + ns->gen++; + return; +} + +static uint32_t +nvmf_ns_reservation_remove_registrants_by_key(struct spdk_nvmf_ns *ns, + uint64_t rkey) +{ + struct spdk_nvmf_registrant *reg, *tmp; + uint32_t count = 0; + + TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) { + if (reg->rkey == rkey) { + nvmf_ns_reservation_remove_registrant(ns, reg); + count++; + } + } + return count; +} + +static uint32_t +nvmf_ns_reservation_remove_all_other_registrants(struct spdk_nvmf_ns *ns, + struct spdk_nvmf_registrant *reg) +{ + struct spdk_nvmf_registrant *reg_tmp, *reg_tmp2; + uint32_t count = 0; + + TAILQ_FOREACH_SAFE(reg_tmp, &ns->registrants, link, reg_tmp2) { + if (reg_tmp != reg) { + nvmf_ns_reservation_remove_registrant(ns, reg_tmp); + count++; + } + } + return count; +} + +static uint32_t +nvmf_ns_reservation_clear_all_registrants(struct spdk_nvmf_ns *ns) +{ + struct spdk_nvmf_registrant *reg, *reg_tmp; + uint32_t count = 0; + + TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, reg_tmp) { + nvmf_ns_reservation_remove_registrant(ns, reg); + count++; + } + return count; +} + +static void +nvmf_ns_reservation_acquire_reservation(struct spdk_nvmf_ns *ns, uint64_t rkey, + enum spdk_nvme_reservation_type rtype, + struct spdk_nvmf_registrant *holder) +{ + ns->rtype = rtype; + ns->crkey = rkey; + assert(ns->holder == NULL); + ns->holder = holder; +} + +static bool +nvmf_ns_reservation_register(struct spdk_nvmf_ns *ns, + struct spdk_nvmf_ctrlr *ctrlr, + struct spdk_nvmf_request *req) +{ + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + uint8_t rrega, iekey, cptpl, rtype; + struct spdk_nvme_reservation_register_data key; + struct spdk_nvmf_registrant *reg; + uint8_t status = SPDK_NVME_SC_SUCCESS; + bool update_sgroup = false; + struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS]; + uint32_t num_hostid = 0; + int rc; + + rrega = cmd->cdw10_bits.resv_register.rrega; + iekey = cmd->cdw10_bits.resv_register.iekey; + cptpl = cmd->cdw10_bits.resv_register.cptpl; + + if (req->data && req->length >= sizeof(key)) { + memcpy(&key, req->data, sizeof(key)); + } else { + SPDK_ERRLOG("No key provided. Failing request.\n"); + status = SPDK_NVME_SC_INVALID_FIELD; + goto exit; + } + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "REGISTER: RREGA %u, IEKEY %u, CPTPL %u, " + "NRKEY 0x%"PRIx64", NRKEY 0x%"PRIx64"\n", + rrega, iekey, cptpl, key.crkey, key.nrkey); + + if (cptpl == SPDK_NVME_RESERVE_PTPL_CLEAR_POWER_ON) { + /* Ture to OFF state, and need to be updated in the configuration file */ + if (ns->ptpl_activated) { + ns->ptpl_activated = 0; + update_sgroup = true; + } + } else if (cptpl == SPDK_NVME_RESERVE_PTPL_PERSIST_POWER_LOSS) { + if (ns->ptpl_file == NULL) { + status = SPDK_NVME_SC_INVALID_FIELD; + goto exit; + } else if (ns->ptpl_activated == 0) { + ns->ptpl_activated = 1; + update_sgroup = true; + } + } + + /* current Host Identifier has registrant or not */ + reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid); + + switch (rrega) { + case SPDK_NVME_RESERVE_REGISTER_KEY: + if (!reg) { + /* register new controller */ + if (key.nrkey == 0) { + SPDK_ERRLOG("Can't register zeroed new key\n"); + status = SPDK_NVME_SC_INVALID_FIELD; + goto exit; + } + rc = nvmf_ns_reservation_add_registrant(ns, ctrlr, key.nrkey); + if (rc < 0) { + status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + goto exit; + } + update_sgroup = true; + } else { + /* register with same key is not an error */ + if (reg->rkey != key.nrkey) { + SPDK_ERRLOG("The same host already register a " + "key with 0x%"PRIx64"\n", + reg->rkey); + status = SPDK_NVME_SC_RESERVATION_CONFLICT; + goto exit; + } + } + break; + case SPDK_NVME_RESERVE_UNREGISTER_KEY: + if (!reg || (!iekey && reg->rkey != key.crkey)) { + SPDK_ERRLOG("No registrant or current key doesn't match " + "with existing registrant key\n"); + status = SPDK_NVME_SC_RESERVATION_CONFLICT; + goto exit; + } + + rtype = ns->rtype; + num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list, + SPDK_NVMF_MAX_NUM_REGISTRANTS, + &ctrlr->hostid); + + nvmf_ns_reservation_remove_registrant(ns, reg); + + if (!ns->rtype && num_hostid && (rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_REG_ONLY || + rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_REG_ONLY)) { + nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns, + hostid_list, + num_hostid, + SPDK_NVME_RESERVATION_RELEASED); + } + update_sgroup = true; + break; + case SPDK_NVME_RESERVE_REPLACE_KEY: + if (!reg || (!iekey && reg->rkey != key.crkey)) { + SPDK_ERRLOG("No registrant or current key doesn't match " + "with existing registrant key\n"); + status = SPDK_NVME_SC_RESERVATION_CONFLICT; + goto exit; + } + if (key.nrkey == 0) { + SPDK_ERRLOG("Can't register zeroed new key\n"); + status = SPDK_NVME_SC_INVALID_FIELD; + goto exit; + } + reg->rkey = key.nrkey; + update_sgroup = true; + break; + default: + status = SPDK_NVME_SC_INVALID_FIELD; + goto exit; + } + +exit: + if (update_sgroup) { + rc = nvmf_ns_update_reservation_info(ns); + if (rc != 0) { + status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + } + } + req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; + req->rsp->nvme_cpl.status.sc = status; + return update_sgroup; +} + +static bool +nvmf_ns_reservation_acquire(struct spdk_nvmf_ns *ns, + struct spdk_nvmf_ctrlr *ctrlr, + struct spdk_nvmf_request *req) +{ + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + uint8_t racqa, iekey, rtype; + struct spdk_nvme_reservation_acquire_data key; + struct spdk_nvmf_registrant *reg; + bool all_regs = false; + uint32_t count = 0; + bool update_sgroup = true; + struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS]; + uint32_t num_hostid = 0; + struct spdk_uuid new_hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS]; + uint32_t new_num_hostid = 0; + bool reservation_released = false; + uint8_t status = SPDK_NVME_SC_SUCCESS; + + racqa = cmd->cdw10_bits.resv_acquire.racqa; + iekey = cmd->cdw10_bits.resv_acquire.iekey; + rtype = cmd->cdw10_bits.resv_acquire.rtype; + + if (req->data && req->length >= sizeof(key)) { + memcpy(&key, req->data, sizeof(key)); + } else { + SPDK_ERRLOG("No key provided. Failing request.\n"); + status = SPDK_NVME_SC_INVALID_FIELD; + goto exit; + } + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ACQUIRE: RACQA %u, IEKEY %u, RTYPE %u, " + "NRKEY 0x%"PRIx64", PRKEY 0x%"PRIx64"\n", + racqa, iekey, rtype, key.crkey, key.prkey); + + if (iekey || rtype > SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS) { + SPDK_ERRLOG("Ignore existing key field set to 1\n"); + status = SPDK_NVME_SC_INVALID_FIELD; + update_sgroup = false; + goto exit; + } + + reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid); + /* must be registrant and CRKEY must match */ + if (!reg || reg->rkey != key.crkey) { + SPDK_ERRLOG("No registrant or current key doesn't match " + "with existing registrant key\n"); + status = SPDK_NVME_SC_RESERVATION_CONFLICT; + update_sgroup = false; + goto exit; + } + + all_regs = nvmf_ns_reservation_all_registrants_type(ns); + + switch (racqa) { + case SPDK_NVME_RESERVE_ACQUIRE: + /* it's not an error for the holder to acquire same reservation type again */ + if (nvmf_ns_reservation_registrant_is_holder(ns, reg) && ns->rtype == rtype) { + /* do nothing */ + update_sgroup = false; + } else if (ns->holder == NULL) { + /* fisrt time to acquire the reservation */ + nvmf_ns_reservation_acquire_reservation(ns, key.crkey, rtype, reg); + } else { + SPDK_ERRLOG("Invalid rtype or current registrant is not holder\n"); + status = SPDK_NVME_SC_RESERVATION_CONFLICT; + update_sgroup = false; + goto exit; + } + break; + case SPDK_NVME_RESERVE_PREEMPT: + /* no reservation holder */ + if (!ns->holder) { + /* unregister with PRKEY */ + nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey); + break; + } + num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list, + SPDK_NVMF_MAX_NUM_REGISTRANTS, + &ctrlr->hostid); + + /* only 1 reservation holder and reservation key is valid */ + if (!all_regs) { + /* preempt itself */ + if (nvmf_ns_reservation_registrant_is_holder(ns, reg) && + ns->crkey == key.prkey) { + ns->rtype = rtype; + reservation_released = true; + break; + } + + if (ns->crkey == key.prkey) { + nvmf_ns_reservation_remove_registrant(ns, ns->holder); + nvmf_ns_reservation_acquire_reservation(ns, key.crkey, rtype, reg); + reservation_released = true; + } else if (key.prkey != 0) { + nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey); + } else { + /* PRKEY is zero */ + SPDK_ERRLOG("Current PRKEY is zero\n"); + status = SPDK_NVME_SC_RESERVATION_CONFLICT; + update_sgroup = false; + goto exit; + } + } else { + /* release all other registrants except for the current one */ + if (key.prkey == 0) { + nvmf_ns_reservation_remove_all_other_registrants(ns, reg); + assert(ns->holder == reg); + } else { + count = nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey); + if (count == 0) { + SPDK_ERRLOG("PRKEY doesn't match any registrant\n"); + status = SPDK_NVME_SC_RESERVATION_CONFLICT; + update_sgroup = false; + goto exit; + } + } + } + break; + default: + status = SPDK_NVME_SC_INVALID_FIELD; + update_sgroup = false; + break; + } + +exit: + if (update_sgroup && racqa == SPDK_NVME_RESERVE_PREEMPT) { + new_num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, new_hostid_list, + SPDK_NVMF_MAX_NUM_REGISTRANTS, + &ctrlr->hostid); + /* Preempt notification occurs on the unregistered controllers + * other than the controller who issued the command. + */ + num_hostid = nvmf_ns_reservation_get_unregistered_hostid(hostid_list, + num_hostid, + new_hostid_list, + new_num_hostid); + if (num_hostid) { + nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns, + hostid_list, + num_hostid, + SPDK_NVME_REGISTRATION_PREEMPTED); + + } + /* Reservation released notification occurs on the + * controllers which are the remaining registrants other than + * the controller who issued the command. + */ + if (reservation_released && new_num_hostid) { + nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns, + new_hostid_list, + new_num_hostid, + SPDK_NVME_RESERVATION_RELEASED); + + } + } + if (update_sgroup && ns->ptpl_activated) { + if (nvmf_ns_update_reservation_info(ns)) { + status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + } + } + req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; + req->rsp->nvme_cpl.status.sc = status; + return update_sgroup; +} + +static bool +nvmf_ns_reservation_release(struct spdk_nvmf_ns *ns, + struct spdk_nvmf_ctrlr *ctrlr, + struct spdk_nvmf_request *req) +{ + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + uint8_t rrela, iekey, rtype; + struct spdk_nvmf_registrant *reg; + uint64_t crkey; + uint8_t status = SPDK_NVME_SC_SUCCESS; + bool update_sgroup = true; + struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS]; + uint32_t num_hostid = 0; + + rrela = cmd->cdw10_bits.resv_release.rrela; + iekey = cmd->cdw10_bits.resv_release.iekey; + rtype = cmd->cdw10_bits.resv_release.rtype; + + if (req->data && req->length >= sizeof(crkey)) { + memcpy(&crkey, req->data, sizeof(crkey)); + } else { + SPDK_ERRLOG("No key provided. Failing request.\n"); + status = SPDK_NVME_SC_INVALID_FIELD; + goto exit; + } + + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "RELEASE: RRELA %u, IEKEY %u, RTYPE %u, " + "CRKEY 0x%"PRIx64"\n", rrela, iekey, rtype, crkey); + + if (iekey) { + SPDK_ERRLOG("Ignore existing key field set to 1\n"); + status = SPDK_NVME_SC_INVALID_FIELD; + update_sgroup = false; + goto exit; + } + + reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid); + if (!reg || reg->rkey != crkey) { + SPDK_ERRLOG("No registrant or current key doesn't match " + "with existing registrant key\n"); + status = SPDK_NVME_SC_RESERVATION_CONFLICT; + update_sgroup = false; + goto exit; + } + + num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list, + SPDK_NVMF_MAX_NUM_REGISTRANTS, + &ctrlr->hostid); + + switch (rrela) { + case SPDK_NVME_RESERVE_RELEASE: + if (!ns->holder) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF, "RELEASE: no holder\n"); + update_sgroup = false; + goto exit; + } + if (ns->rtype != rtype) { + SPDK_ERRLOG("Type doesn't match\n"); + status = SPDK_NVME_SC_INVALID_FIELD; + update_sgroup = false; + goto exit; + } + if (!nvmf_ns_reservation_registrant_is_holder(ns, reg)) { + /* not the reservation holder, this isn't an error */ + update_sgroup = false; + goto exit; + } + + rtype = ns->rtype; + nvmf_ns_reservation_release_reservation(ns); + + if (num_hostid && rtype != SPDK_NVME_RESERVE_WRITE_EXCLUSIVE && + rtype != SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS) { + nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns, + hostid_list, + num_hostid, + SPDK_NVME_RESERVATION_RELEASED); + } + break; + case SPDK_NVME_RESERVE_CLEAR: + nvmf_ns_reservation_clear_all_registrants(ns); + if (num_hostid) { + nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns, + hostid_list, + num_hostid, + SPDK_NVME_RESERVATION_PREEMPTED); + } + break; + default: + status = SPDK_NVME_SC_INVALID_FIELD; + update_sgroup = false; + goto exit; + } + +exit: + if (update_sgroup && ns->ptpl_activated) { + if (nvmf_ns_update_reservation_info(ns)) { + status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + } + } + req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; + req->rsp->nvme_cpl.status.sc = status; + return update_sgroup; +} + +static void +nvmf_ns_reservation_report(struct spdk_nvmf_ns *ns, + struct spdk_nvmf_ctrlr *ctrlr, + struct spdk_nvmf_request *req) +{ + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys; + struct spdk_nvmf_ctrlr *ctrlr_tmp; + struct spdk_nvmf_registrant *reg, *tmp; + struct spdk_nvme_reservation_status_extended_data *status_data; + struct spdk_nvme_registered_ctrlr_extended_data *ctrlr_data; + uint8_t *payload; + uint32_t len, count = 0; + uint32_t regctl = 0; + uint8_t status = SPDK_NVME_SC_SUCCESS; + + if (req->data == NULL) { + SPDK_ERRLOG("No data transfer specified for request. " + " Unable to transfer back response.\n"); + status = SPDK_NVME_SC_INVALID_FIELD; + goto exit; + } + + if (!cmd->cdw11_bits.resv_report.eds) { + SPDK_ERRLOG("NVMeoF uses extended controller data structure, " + "please set EDS bit in cdw11 and try again\n"); + status = SPDK_NVME_SC_HOSTID_INCONSISTENT_FORMAT; + goto exit; + } + + /* Get number of registerd controllers, one Host may have more than + * one controller based on different ports. + */ + TAILQ_FOREACH(ctrlr_tmp, &subsystem->ctrlrs, link) { + reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr_tmp->hostid); + if (reg) { + regctl++; + } + } + + len = sizeof(*status_data) + sizeof(*ctrlr_data) * regctl; + payload = calloc(1, len); + if (!payload) { + status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + goto exit; + } + + status_data = (struct spdk_nvme_reservation_status_extended_data *)payload; + status_data->data.gen = ns->gen; + status_data->data.rtype = ns->rtype; + status_data->data.regctl = regctl; + status_data->data.ptpls = ns->ptpl_activated; + + TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) { + assert(count <= regctl); + ctrlr_data = (struct spdk_nvme_registered_ctrlr_extended_data *) + (payload + sizeof(*status_data) + sizeof(*ctrlr_data) * count); + /* Set to 0xffffh for dynamic controller */ + ctrlr_data->cntlid = 0xffff; + ctrlr_data->rcsts.status = (ns->holder == reg) ? true : false; + ctrlr_data->rkey = reg->rkey; + spdk_uuid_copy((struct spdk_uuid *)ctrlr_data->hostid, ®->hostid); + count++; + } + + memcpy(req->data, payload, spdk_min(len, (cmd->cdw10 + 1) * sizeof(uint32_t))); + free(payload); + +exit: + req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; + req->rsp->nvme_cpl.status.sc = status; + return; +} + +static void +nvmf_ns_reservation_complete(void *ctx) +{ + struct spdk_nvmf_request *req = ctx; + + spdk_nvmf_request_complete(req); +} + +static void +_nvmf_ns_reservation_update_done(struct spdk_nvmf_subsystem *subsystem, + void *cb_arg, int status) +{ + struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)cb_arg; + struct spdk_nvmf_poll_group *group = req->qpair->group; + + spdk_thread_send_msg(group->thread, nvmf_ns_reservation_complete, req); +} + +void +nvmf_ns_reservation_request(void *ctx) +{ + struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)ctx; + struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; + struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; + struct subsystem_update_ns_ctx *update_ctx; + uint32_t nsid; + struct spdk_nvmf_ns *ns; + bool update_sgroup = false; + + nsid = cmd->nsid; + ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid); + assert(ns != NULL); + + switch (cmd->opc) { + case SPDK_NVME_OPC_RESERVATION_REGISTER: + update_sgroup = nvmf_ns_reservation_register(ns, ctrlr, req); + break; + case SPDK_NVME_OPC_RESERVATION_ACQUIRE: + update_sgroup = nvmf_ns_reservation_acquire(ns, ctrlr, req); + break; + case SPDK_NVME_OPC_RESERVATION_RELEASE: + update_sgroup = nvmf_ns_reservation_release(ns, ctrlr, req); + break; + case SPDK_NVME_OPC_RESERVATION_REPORT: + nvmf_ns_reservation_report(ns, ctrlr, req); + break; + default: + break; + } + + /* update reservation information to subsystem's poll group */ + if (update_sgroup) { + update_ctx = calloc(1, sizeof(*update_ctx)); + if (update_ctx == NULL) { + SPDK_ERRLOG("Can't alloc subsystem poll group update context\n"); + goto update_done; + } + update_ctx->subsystem = ctrlr->subsys; + update_ctx->cb_fn = _nvmf_ns_reservation_update_done; + update_ctx->cb_arg = req; + + nvmf_subsystem_update_ns(ctrlr->subsys, subsystem_update_ns_done, update_ctx); + return; + } + +update_done: + _nvmf_ns_reservation_update_done(ctrlr->subsys, (void *)req, 0); +} diff --git a/src/spdk/lib/nvmf/tcp.c b/src/spdk/lib/nvmf/tcp.c new file mode 100644 index 000000000..391d4bcf1 --- /dev/null +++ b/src/spdk/lib/nvmf/tcp.c @@ -0,0 +1,2631 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" +#include "spdk/crc32.h" +#include "spdk/endian.h" +#include "spdk/assert.h" +#include "spdk/thread.h" +#include "spdk/nvmf_transport.h" +#include "spdk/sock.h" +#include "spdk/string.h" +#include "spdk/trace.h" +#include "spdk/util.h" + +#include "spdk_internal/assert.h" +#include "spdk_internal/log.h" +#include "spdk_internal/nvme_tcp.h" + +#include "nvmf_internal.h" + +#define NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME 16 +#define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 6 + +const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp; + +/* spdk nvmf related structure */ +enum spdk_nvmf_tcp_req_state { + + /* The request is not currently in use */ + TCP_REQUEST_STATE_FREE = 0, + + /* Initial state when request first received */ + TCP_REQUEST_STATE_NEW, + + /* The request is queued until a data buffer is available. */ + TCP_REQUEST_STATE_NEED_BUFFER, + + /* The request is currently transferring data from the host to the controller. */ + TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, + + /* The request is waiting for the R2T send acknowledgement. */ + TCP_REQUEST_STATE_AWAITING_R2T_ACK, + + /* The request is ready to execute at the block device */ + TCP_REQUEST_STATE_READY_TO_EXECUTE, + + /* The request is currently executing at the block device */ + TCP_REQUEST_STATE_EXECUTING, + + /* The request finished executing at the block device */ + TCP_REQUEST_STATE_EXECUTED, + + /* The request is ready to send a completion */ + TCP_REQUEST_STATE_READY_TO_COMPLETE, + + /* The request is currently transferring final pdus from the controller to the host. */ + TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, + + /* The request completed and can be marked free. */ + TCP_REQUEST_STATE_COMPLETED, + + /* Terminator */ + TCP_REQUEST_NUM_STATES, +}; + +static const char *spdk_nvmf_tcp_term_req_fes_str[] = { + "Invalid PDU Header Field", + "PDU Sequence Error", + "Header Digiest Error", + "Data Transfer Out of Range", + "R2T Limit Exceeded", + "Unsupported parameter", +}; + +#define OBJECT_NVMF_TCP_IO 0x80 + +#define TRACE_GROUP_NVMF_TCP 0x5 +#define TRACE_TCP_REQUEST_STATE_NEW SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x0) +#define TRACE_TCP_REQUEST_STATE_NEED_BUFFER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x1) +#define TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x2) +#define TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x3) +#define TRACE_TCP_REQUEST_STATE_EXECUTING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x4) +#define TRACE_TCP_REQUEST_STATE_EXECUTED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x5) +#define TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x6) +#define TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x7) +#define TRACE_TCP_REQUEST_STATE_COMPLETED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x8) +#define TRACE_TCP_FLUSH_WRITEBUF_START SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x9) +#define TRACE_TCP_FLUSH_WRITEBUF_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xA) +#define TRACE_TCP_READ_FROM_SOCKET_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xB) +#define TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xC) + +SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP) +{ + spdk_trace_register_object(OBJECT_NVMF_TCP_IO, 'r'); + spdk_trace_register_description("TCP_REQ_NEW", + TRACE_TCP_REQUEST_STATE_NEW, + OWNER_NONE, OBJECT_NVMF_TCP_IO, 1, 1, ""); + spdk_trace_register_description("TCP_REQ_NEED_BUFFER", + TRACE_TCP_REQUEST_STATE_NEED_BUFFER, + OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); + spdk_trace_register_description("TCP_REQ_TX_H_TO_C", + TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, + OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); + spdk_trace_register_description("TCP_REQ_RDY_TO_EXECUTE", + TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, + OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); + spdk_trace_register_description("TCP_REQ_EXECUTING", + TRACE_TCP_REQUEST_STATE_EXECUTING, + OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); + spdk_trace_register_description("TCP_REQ_EXECUTED", + TRACE_TCP_REQUEST_STATE_EXECUTED, + OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); + spdk_trace_register_description("TCP_REQ_RDY_TO_COMPLETE", + TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, + OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); + spdk_trace_register_description("TCP_REQ_TRANSFER_C2H", + TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, + OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); + spdk_trace_register_description("TCP_REQ_COMPLETED", + TRACE_TCP_REQUEST_STATE_COMPLETED, + OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); + spdk_trace_register_description("TCP_WRITE_START", + TRACE_TCP_FLUSH_WRITEBUF_START, + OWNER_NONE, OBJECT_NONE, 0, 0, ""); + spdk_trace_register_description("TCP_WRITE_DONE", + TRACE_TCP_FLUSH_WRITEBUF_DONE, + OWNER_NONE, OBJECT_NONE, 0, 0, ""); + spdk_trace_register_description("TCP_READ_DONE", + TRACE_TCP_READ_FROM_SOCKET_DONE, + OWNER_NONE, OBJECT_NONE, 0, 0, ""); + spdk_trace_register_description("TCP_REQ_AWAIT_R2T_ACK", + TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK, + OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); +} + +struct spdk_nvmf_tcp_req { + struct spdk_nvmf_request req; + struct spdk_nvme_cpl rsp; + struct spdk_nvme_cmd cmd; + + /* A PDU that can be used for sending responses. This is + * not the incoming PDU! */ + struct nvme_tcp_pdu *pdu; + + /* + * The PDU for a request may be used multiple times in serial over + * the request's lifetime. For example, first to send an R2T, then + * to send a completion. To catch mistakes where the PDU is used + * twice at the same time, add a debug flag here for init/fini. + */ + bool pdu_in_use; + + /* In-capsule data buffer */ + uint8_t *buf; + + bool has_incapsule_data; + + /* transfer_tag */ + uint16_t ttag; + + enum spdk_nvmf_tcp_req_state state; + + /* + * h2c_offset is used when we receive the h2c_data PDU. + */ + uint32_t h2c_offset; + + STAILQ_ENTRY(spdk_nvmf_tcp_req) link; + TAILQ_ENTRY(spdk_nvmf_tcp_req) state_link; +}; + +struct spdk_nvmf_tcp_qpair { + struct spdk_nvmf_qpair qpair; + struct spdk_nvmf_tcp_poll_group *group; + struct spdk_nvmf_tcp_port *port; + struct spdk_sock *sock; + + enum nvme_tcp_pdu_recv_state recv_state; + enum nvme_tcp_qpair_state state; + + /* PDU being actively received */ + struct nvme_tcp_pdu pdu_in_progress; + uint32_t recv_buf_size; + + /* This is a spare PDU used for sending special management + * operations. Primarily, this is used for the initial + * connection response and c2h termination request. */ + struct nvme_tcp_pdu mgmt_pdu; + + TAILQ_HEAD(, nvme_tcp_pdu) send_queue; + + /* Arrays of in-capsule buffers, requests, and pdus. + * Each array is 'resource_count' number of elements */ + void *bufs; + struct spdk_nvmf_tcp_req *reqs; + struct nvme_tcp_pdu *pdus; + uint32_t resource_count; + + /* Queues to track the requests in all states */ + TAILQ_HEAD(, spdk_nvmf_tcp_req) state_queue[TCP_REQUEST_NUM_STATES]; + /* Number of requests in each state */ + uint32_t state_cntr[TCP_REQUEST_NUM_STATES]; + + uint8_t cpda; + + bool host_hdgst_enable; + bool host_ddgst_enable; + + /* IP address */ + char initiator_addr[SPDK_NVMF_TRADDR_MAX_LEN]; + char target_addr[SPDK_NVMF_TRADDR_MAX_LEN]; + + /* IP port */ + uint16_t initiator_port; + uint16_t target_port; + + /* Timer used to destroy qpair after detecting transport error issue if initiator does + * not close the connection. + */ + struct spdk_poller *timeout_poller; + + TAILQ_ENTRY(spdk_nvmf_tcp_qpair) link; +}; + +struct spdk_nvmf_tcp_poll_group { + struct spdk_nvmf_transport_poll_group group; + struct spdk_sock_group *sock_group; + + TAILQ_HEAD(, spdk_nvmf_tcp_qpair) qpairs; + TAILQ_HEAD(, spdk_nvmf_tcp_qpair) await_req; +}; + +struct spdk_nvmf_tcp_port { + const struct spdk_nvme_transport_id *trid; + struct spdk_sock *listen_sock; + TAILQ_ENTRY(spdk_nvmf_tcp_port) link; +}; + +struct spdk_nvmf_tcp_transport { + struct spdk_nvmf_transport transport; + + pthread_mutex_t lock; + + TAILQ_HEAD(, spdk_nvmf_tcp_port) ports; +}; + +static bool nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, + struct spdk_nvmf_tcp_req *tcp_req); + +static void +nvmf_tcp_req_set_state(struct spdk_nvmf_tcp_req *tcp_req, + enum spdk_nvmf_tcp_req_state state) +{ + struct spdk_nvmf_qpair *qpair; + struct spdk_nvmf_tcp_qpair *tqpair; + + qpair = tcp_req->req.qpair; + tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); + + TAILQ_REMOVE(&tqpair->state_queue[tcp_req->state], tcp_req, state_link); + assert(tqpair->state_cntr[tcp_req->state] > 0); + tqpair->state_cntr[tcp_req->state]--; + + TAILQ_INSERT_TAIL(&tqpair->state_queue[state], tcp_req, state_link); + tqpair->state_cntr[state]++; + + tcp_req->state = state; +} + +static inline struct nvme_tcp_pdu * +nvmf_tcp_req_pdu_init(struct spdk_nvmf_tcp_req *tcp_req) +{ + assert(tcp_req->pdu_in_use == false); + tcp_req->pdu_in_use = true; + + memset(tcp_req->pdu, 0, sizeof(*tcp_req->pdu)); + tcp_req->pdu->qpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); + + return tcp_req->pdu; +} + +static inline void +nvmf_tcp_req_pdu_fini(struct spdk_nvmf_tcp_req *tcp_req) +{ + tcp_req->pdu_in_use = false; +} + +static struct spdk_nvmf_tcp_req * +nvmf_tcp_req_get(struct spdk_nvmf_tcp_qpair *tqpair) +{ + struct spdk_nvmf_tcp_req *tcp_req; + + tcp_req = TAILQ_FIRST(&tqpair->state_queue[TCP_REQUEST_STATE_FREE]); + if (!tcp_req) { + return NULL; + } + + memset(&tcp_req->rsp, 0, sizeof(tcp_req->rsp)); + tcp_req->h2c_offset = 0; + tcp_req->has_incapsule_data = false; + tcp_req->req.dif.dif_insert_or_strip = false; + + nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW); + return tcp_req; +} + +static void +nvmf_tcp_request_free(struct spdk_nvmf_tcp_req *tcp_req) +{ + struct spdk_nvmf_tcp_transport *ttransport; + + assert(tcp_req != NULL); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req=%p will be freed\n", tcp_req); + ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport, + struct spdk_nvmf_tcp_transport, transport); + nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED); + nvmf_tcp_req_process(ttransport, tcp_req); +} + +static int +nvmf_tcp_req_free(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_tcp_req *tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); + + nvmf_tcp_request_free(tcp_req); + + return 0; +} + +static void +nvmf_tcp_drain_state_queue(struct spdk_nvmf_tcp_qpair *tqpair, + enum spdk_nvmf_tcp_req_state state) +{ + struct spdk_nvmf_tcp_req *tcp_req, *req_tmp; + + TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[state], state_link, req_tmp) { + nvmf_tcp_request_free(tcp_req); + } +} + +static void +nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair) +{ + struct spdk_nvmf_tcp_req *tcp_req, *req_tmp; + + assert(TAILQ_EMPTY(&tqpair->send_queue)); + + nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST); + nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEW); + + /* Wipe the requests waiting for buffer from the global list */ + TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_NEED_BUFFER], state_link, + req_tmp) { + STAILQ_REMOVE(&tqpair->group->group.pending_buf_queue, &tcp_req->req, + spdk_nvmf_request, buf_link); + } + + nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER); + nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING); + nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); + nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK); +} + +static void +nvmf_tcp_dump_qpair_req_contents(struct spdk_nvmf_tcp_qpair *tqpair) +{ + int i; + struct spdk_nvmf_tcp_req *tcp_req; + + SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", tqpair->qpair.qid); + for (i = 1; i < TCP_REQUEST_NUM_STATES; i++) { + SPDK_ERRLOG("\tNum of requests in state[%d] = %u\n", i, tqpair->state_cntr[i]); + TAILQ_FOREACH(tcp_req, &tqpair->state_queue[i], state_link) { + SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", tcp_req->req.data_from_pool); + SPDK_ERRLOG("\t\tRequest opcode: %d\n", tcp_req->req.cmd->nvmf_cmd.opcode); + } + } +} + +static void +nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair) +{ + int err = 0; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n"); + + err = spdk_sock_close(&tqpair->sock); + assert(err == 0); + nvmf_tcp_cleanup_all_states(tqpair); + + if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->resource_count) { + SPDK_ERRLOG("tqpair(%p) free tcp request num is %u but should be %u\n", tqpair, + tqpair->state_cntr[TCP_REQUEST_STATE_FREE], + tqpair->resource_count); + err++; + } + + if (err > 0) { + nvmf_tcp_dump_qpair_req_contents(tqpair); + } + + spdk_dma_free(tqpair->pdus); + free(tqpair->reqs); + spdk_free(tqpair->bufs); + free(tqpair); + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Leave\n"); +} + +static int +nvmf_tcp_destroy(struct spdk_nvmf_transport *transport) +{ + struct spdk_nvmf_tcp_transport *ttransport; + + assert(transport != NULL); + ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); + + pthread_mutex_destroy(&ttransport->lock); + free(ttransport); + return 0; +} + +static struct spdk_nvmf_transport * +nvmf_tcp_create(struct spdk_nvmf_transport_opts *opts) +{ + struct spdk_nvmf_tcp_transport *ttransport; + uint32_t sge_count; + uint32_t min_shared_buffers; + + ttransport = calloc(1, sizeof(*ttransport)); + if (!ttransport) { + return NULL; + } + + TAILQ_INIT(&ttransport->ports); + + ttransport->transport.ops = &spdk_nvmf_transport_tcp; + + SPDK_NOTICELOG("*** TCP Transport Init ***\n"); + + SPDK_INFOLOG(SPDK_LOG_NVMF_TCP, "*** TCP Transport Init ***\n" + " Transport opts: max_ioq_depth=%d, max_io_size=%d,\n" + " max_io_qpairs_per_ctrlr=%d, io_unit_size=%d,\n" + " in_capsule_data_size=%d, max_aq_depth=%d\n" + " num_shared_buffers=%d, c2h_success=%d,\n" + " dif_insert_or_strip=%d, sock_priority=%d\n" + " abort_timeout_sec=%d\n", + opts->max_queue_depth, + opts->max_io_size, + opts->max_qpairs_per_ctrlr - 1, + opts->io_unit_size, + opts->in_capsule_data_size, + opts->max_aq_depth, + opts->num_shared_buffers, + opts->c2h_success, + opts->dif_insert_or_strip, + opts->sock_priority, + opts->abort_timeout_sec); + + if (opts->sock_priority > SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY) { + SPDK_ERRLOG("Unsupported socket_priority=%d, the current range is: 0 to %d\n" + "you can use man 7 socket to view the range of priority under SO_PRIORITY item\n", + opts->sock_priority, SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY); + free(ttransport); + return NULL; + } + + /* I/O unit size cannot be larger than max I/O size */ + if (opts->io_unit_size > opts->max_io_size) { + opts->io_unit_size = opts->max_io_size; + } + + sge_count = opts->max_io_size / opts->io_unit_size; + if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) { + SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size); + free(ttransport); + return NULL; + } + + min_shared_buffers = spdk_thread_get_count() * opts->buf_cache_size; + if (min_shared_buffers > opts->num_shared_buffers) { + SPDK_ERRLOG("There are not enough buffers to satisfy" + "per-poll group caches for each thread. (%" PRIu32 ")" + "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers); + SPDK_ERRLOG("Please specify a larger number of shared buffers\n"); + nvmf_tcp_destroy(&ttransport->transport); + return NULL; + } + + pthread_mutex_init(&ttransport->lock, NULL); + + return &ttransport->transport; +} + +static int +nvmf_tcp_trsvcid_to_int(const char *trsvcid) +{ + unsigned long long ull; + char *end = NULL; + + ull = strtoull(trsvcid, &end, 10); + if (end == NULL || end == trsvcid || *end != '\0') { + return -1; + } + + /* Valid TCP/IP port numbers are in [0, 65535] */ + if (ull > 65535) { + return -1; + } + + return (int)ull; +} + +/** + * Canonicalize a listen address trid. + */ +static int +nvmf_tcp_canon_listen_trid(struct spdk_nvme_transport_id *canon_trid, + const struct spdk_nvme_transport_id *trid) +{ + int trsvcid_int; + + trsvcid_int = nvmf_tcp_trsvcid_to_int(trid->trsvcid); + if (trsvcid_int < 0) { + return -EINVAL; + } + + memset(canon_trid, 0, sizeof(*canon_trid)); + spdk_nvme_trid_populate_transport(canon_trid, SPDK_NVME_TRANSPORT_TCP); + canon_trid->adrfam = trid->adrfam; + snprintf(canon_trid->traddr, sizeof(canon_trid->traddr), "%s", trid->traddr); + snprintf(canon_trid->trsvcid, sizeof(canon_trid->trsvcid), "%d", trsvcid_int); + + return 0; +} + +/** + * Find an existing listening port. + * + * Caller must hold ttransport->lock. + */ +static struct spdk_nvmf_tcp_port * +nvmf_tcp_find_port(struct spdk_nvmf_tcp_transport *ttransport, + const struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvme_transport_id canon_trid; + struct spdk_nvmf_tcp_port *port; + + if (nvmf_tcp_canon_listen_trid(&canon_trid, trid) != 0) { + return NULL; + } + + TAILQ_FOREACH(port, &ttransport->ports, link) { + if (spdk_nvme_transport_id_compare(&canon_trid, port->trid) == 0) { + return port; + } + } + + return NULL; +} + +static int +nvmf_tcp_listen(struct spdk_nvmf_transport *transport, + const struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvmf_tcp_transport *ttransport; + struct spdk_nvmf_tcp_port *port; + int trsvcid_int; + uint8_t adrfam; + struct spdk_sock_opts opts; + + ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); + + trsvcid_int = nvmf_tcp_trsvcid_to_int(trid->trsvcid); + if (trsvcid_int < 0) { + SPDK_ERRLOG("Invalid trsvcid '%s'\n", trid->trsvcid); + return -EINVAL; + } + + pthread_mutex_lock(&ttransport->lock); + port = calloc(1, sizeof(*port)); + if (!port) { + SPDK_ERRLOG("Port allocation failed\n"); + pthread_mutex_unlock(&ttransport->lock); + return -ENOMEM; + } + + port->trid = trid; + opts.opts_size = sizeof(opts); + spdk_sock_get_default_opts(&opts); + opts.priority = transport->opts.sock_priority; + port->listen_sock = spdk_sock_listen_ext(trid->traddr, trsvcid_int, + NULL, &opts); + if (port->listen_sock == NULL) { + SPDK_ERRLOG("spdk_sock_listen(%s, %d) failed: %s (%d)\n", + trid->traddr, trsvcid_int, + spdk_strerror(errno), errno); + free(port); + pthread_mutex_unlock(&ttransport->lock); + return -errno; + } + + if (spdk_sock_is_ipv4(port->listen_sock)) { + adrfam = SPDK_NVMF_ADRFAM_IPV4; + } else if (spdk_sock_is_ipv6(port->listen_sock)) { + adrfam = SPDK_NVMF_ADRFAM_IPV6; + } else { + SPDK_ERRLOG("Unhandled socket type\n"); + adrfam = 0; + } + + if (adrfam != trid->adrfam) { + SPDK_ERRLOG("Socket address family mismatch\n"); + spdk_sock_close(&port->listen_sock); + free(port); + pthread_mutex_unlock(&ttransport->lock); + return -EINVAL; + } + + SPDK_NOTICELOG("*** NVMe/TCP Target Listening on %s port %s ***\n", + trid->traddr, trid->trsvcid); + + TAILQ_INSERT_TAIL(&ttransport->ports, port, link); + pthread_mutex_unlock(&ttransport->lock); + return 0; +} + +static void +nvmf_tcp_stop_listen(struct spdk_nvmf_transport *transport, + const struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvmf_tcp_transport *ttransport; + struct spdk_nvmf_tcp_port *port; + + ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Removing listen address %s port %s\n", + trid->traddr, trid->trsvcid); + + pthread_mutex_lock(&ttransport->lock); + port = nvmf_tcp_find_port(ttransport, trid); + if (port) { + TAILQ_REMOVE(&ttransport->ports, port, link); + spdk_sock_close(&port->listen_sock); + free(port); + } + + pthread_mutex_unlock(&ttransport->lock); +} + +static void nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair, + enum nvme_tcp_pdu_recv_state state); + +static void +nvmf_tcp_qpair_disconnect(struct spdk_nvmf_tcp_qpair *tqpair) +{ + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Disconnecting qpair %p\n", tqpair); + + if (tqpair->state <= NVME_TCP_QPAIR_STATE_RUNNING) { + tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; + nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); + spdk_poller_unregister(&tqpair->timeout_poller); + + /* This will end up calling nvmf_tcp_close_qpair */ + spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL); + } +} + +static void +_pdu_write_done(void *_pdu, int err) +{ + struct nvme_tcp_pdu *pdu = _pdu; + struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair; + + TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); + + if (err != 0) { + nvmf_tcp_qpair_disconnect(tqpair); + return; + } + + assert(pdu->cb_fn != NULL); + pdu->cb_fn(pdu->cb_arg); +} + +static void +nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair, + struct nvme_tcp_pdu *pdu, + nvme_tcp_qpair_xfer_complete_cb cb_fn, + void *cb_arg) +{ + int hlen; + uint32_t crc32c; + uint32_t mapped_length = 0; + ssize_t rc; + + assert(&tqpair->pdu_in_progress != pdu); + + hlen = pdu->hdr.common.hlen; + + /* Header Digest */ + if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->host_hdgst_enable) { + crc32c = nvme_tcp_pdu_calc_header_digest(pdu); + MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c); + } + + /* Data Digest */ + if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && tqpair->host_ddgst_enable) { + crc32c = nvme_tcp_pdu_calc_data_digest(pdu); + MAKE_DIGEST_WORD(pdu->data_digest, crc32c); + } + + pdu->cb_fn = cb_fn; + pdu->cb_arg = cb_arg; + + pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, SPDK_COUNTOF(pdu->iov), pdu, + tqpair->host_hdgst_enable, tqpair->host_ddgst_enable, + &mapped_length); + pdu->sock_req.cb_fn = _pdu_write_done; + pdu->sock_req.cb_arg = pdu; + TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq); + if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP || + pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ) { + rc = spdk_sock_writev(tqpair->sock, pdu->iov, pdu->sock_req.iovcnt); + if (rc == mapped_length) { + _pdu_write_done(pdu, 0); + } else { + SPDK_ERRLOG("IC_RESP or TERM_REQ could not write to socket.\n"); + _pdu_write_done(pdu, -1); + } + } else { + spdk_sock_writev_async(tqpair->sock, &pdu->sock_req); + } +} + +static int +nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair) +{ + uint32_t i; + struct spdk_nvmf_transport_opts *opts; + uint32_t in_capsule_data_size; + + opts = &tqpair->qpair.transport->opts; + + in_capsule_data_size = opts->in_capsule_data_size; + if (opts->dif_insert_or_strip) { + in_capsule_data_size = SPDK_BDEV_BUF_SIZE_WITH_MD(in_capsule_data_size); + } + + tqpair->resource_count = opts->max_queue_depth; + + tqpair->mgmt_pdu.qpair = tqpair; + + tqpair->reqs = calloc(tqpair->resource_count, sizeof(*tqpair->reqs)); + if (!tqpair->reqs) { + SPDK_ERRLOG("Unable to allocate reqs on tqpair=%p\n", tqpair); + return -1; + } + + if (in_capsule_data_size) { + tqpair->bufs = spdk_zmalloc(tqpair->resource_count * in_capsule_data_size, 0x1000, + NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + if (!tqpair->bufs) { + SPDK_ERRLOG("Unable to allocate bufs on tqpair=%p.\n", tqpair); + return -1; + } + } + + tqpair->pdus = spdk_dma_malloc(tqpair->resource_count * sizeof(*tqpair->pdus), 0x1000, NULL); + if (!tqpair->pdus) { + SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair); + return -1; + } + + for (i = 0; i < tqpair->resource_count; i++) { + struct spdk_nvmf_tcp_req *tcp_req = &tqpair->reqs[i]; + + tcp_req->ttag = i + 1; + tcp_req->req.qpair = &tqpair->qpair; + + tcp_req->pdu = &tqpair->pdus[i]; + tcp_req->pdu->qpair = tqpair; + + /* Set up memory to receive commands */ + if (tqpair->bufs) { + tcp_req->buf = (void *)((uintptr_t)tqpair->bufs + (i * in_capsule_data_size)); + } + + /* Set the cmdn and rsp */ + tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp; + tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd; + + /* Initialize request state to FREE */ + tcp_req->state = TCP_REQUEST_STATE_FREE; + TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link); + tqpair->state_cntr[TCP_REQUEST_STATE_FREE]++; + } + + tqpair->recv_buf_size = (in_capsule_data_size + sizeof(struct spdk_nvme_tcp_cmd) + 2 * + SPDK_NVME_TCP_DIGEST_LEN) * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR; + + return 0; +} + +static int +nvmf_tcp_qpair_init(struct spdk_nvmf_qpair *qpair) +{ + struct spdk_nvmf_tcp_qpair *tqpair; + int i; + + tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New TCP Connection: %p\n", qpair); + + TAILQ_INIT(&tqpair->send_queue); + + /* Initialise request state queues of the qpair */ + for (i = TCP_REQUEST_STATE_FREE; i < TCP_REQUEST_NUM_STATES; i++) { + TAILQ_INIT(&tqpair->state_queue[i]); + } + + tqpair->host_hdgst_enable = true; + tqpair->host_ddgst_enable = true; + + return 0; +} + +static int +nvmf_tcp_qpair_sock_init(struct spdk_nvmf_tcp_qpair *tqpair) +{ + int rc; + + /* set low water mark */ + rc = spdk_sock_set_recvlowat(tqpair->sock, sizeof(struct spdk_nvme_tcp_common_pdu_hdr)); + if (rc != 0) { + SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n"); + return rc; + } + + return 0; +} + +static void +nvmf_tcp_handle_connect(struct spdk_nvmf_transport *transport, + struct spdk_nvmf_tcp_port *port, + struct spdk_sock *sock) +{ + struct spdk_nvmf_tcp_qpair *tqpair; + int rc; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New connection accepted on %s port %s\n", + port->trid->traddr, port->trid->trsvcid); + + tqpair = calloc(1, sizeof(struct spdk_nvmf_tcp_qpair)); + if (tqpair == NULL) { + SPDK_ERRLOG("Could not allocate new connection.\n"); + spdk_sock_close(&sock); + return; + } + + tqpair->sock = sock; + tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = 0; + tqpair->port = port; + tqpair->qpair.transport = transport; + + rc = spdk_sock_getaddr(tqpair->sock, tqpair->target_addr, + sizeof(tqpair->target_addr), &tqpair->target_port, + tqpair->initiator_addr, sizeof(tqpair->initiator_addr), + &tqpair->initiator_port); + if (rc < 0) { + SPDK_ERRLOG("spdk_sock_getaddr() failed of tqpair=%p\n", tqpair); + nvmf_tcp_qpair_destroy(tqpair); + return; + } + + spdk_nvmf_tgt_new_qpair(transport->tgt, &tqpair->qpair); +} + +static uint32_t +nvmf_tcp_port_accept(struct spdk_nvmf_transport *transport, struct spdk_nvmf_tcp_port *port) +{ + struct spdk_sock *sock; + uint32_t count = 0; + int i; + + for (i = 0; i < NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME; i++) { + sock = spdk_sock_accept(port->listen_sock); + if (sock == NULL) { + break; + } + count++; + nvmf_tcp_handle_connect(transport, port, sock); + } + + return count; +} + +static uint32_t +nvmf_tcp_accept(struct spdk_nvmf_transport *transport) +{ + struct spdk_nvmf_tcp_transport *ttransport; + struct spdk_nvmf_tcp_port *port; + uint32_t count = 0; + + ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); + + TAILQ_FOREACH(port, &ttransport->ports, link) { + count += nvmf_tcp_port_accept(transport, port); + } + + return count; +} + +static void +nvmf_tcp_discover(struct spdk_nvmf_transport *transport, + struct spdk_nvme_transport_id *trid, + struct spdk_nvmf_discovery_log_page_entry *entry) +{ + entry->trtype = SPDK_NVMF_TRTYPE_TCP; + entry->adrfam = trid->adrfam; + entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED; + + spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' '); + spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' '); + + entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE; +} + +static struct spdk_nvmf_transport_poll_group * +nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport) +{ + struct spdk_nvmf_tcp_poll_group *tgroup; + + tgroup = calloc(1, sizeof(*tgroup)); + if (!tgroup) { + return NULL; + } + + tgroup->sock_group = spdk_sock_group_create(&tgroup->group); + if (!tgroup->sock_group) { + goto cleanup; + } + + TAILQ_INIT(&tgroup->qpairs); + TAILQ_INIT(&tgroup->await_req); + + return &tgroup->group; + +cleanup: + free(tgroup); + return NULL; +} + +static struct spdk_nvmf_transport_poll_group * +nvmf_tcp_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair) +{ + struct spdk_nvmf_tcp_qpair *tqpair; + struct spdk_sock_group *group = NULL; + int rc; + + tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); + rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group); + if (!rc && group != NULL) { + return spdk_sock_group_get_ctx(group); + } + + return NULL; +} + +static void +nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) +{ + struct spdk_nvmf_tcp_poll_group *tgroup; + + tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); + spdk_sock_group_close(&tgroup->sock_group); + + free(tgroup); +} + +static void +nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair, + enum nvme_tcp_pdu_recv_state state) +{ + if (tqpair->recv_state == state) { + SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n", + tqpair, state); + return; + } + + if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ) { + /* When leaving the await req state, move the qpair to the main list */ + TAILQ_REMOVE(&tqpair->group->await_req, tqpair, link); + TAILQ_INSERT_TAIL(&tqpair->group->qpairs, tqpair, link); + } + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv state=%d\n", tqpair, state); + tqpair->recv_state = state; + + switch (state) { + case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: + case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: + case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: + break; + case NVME_TCP_PDU_RECV_STATE_AWAIT_REQ: + TAILQ_REMOVE(&tqpair->group->qpairs, tqpair, link); + TAILQ_INSERT_TAIL(&tqpair->group->await_req, tqpair, link); + break; + case NVME_TCP_PDU_RECV_STATE_ERROR: + case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: + memset(&tqpair->pdu_in_progress, 0, sizeof(tqpair->pdu_in_progress)); + break; + default: + SPDK_ERRLOG("The state(%d) is invalid\n", state); + abort(); + break; + } +} + +static int +nvmf_tcp_qpair_handle_timeout(void *ctx) +{ + struct spdk_nvmf_tcp_qpair *tqpair = ctx; + + assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR); + + SPDK_ERRLOG("No pdu coming for tqpair=%p within %d seconds\n", tqpair, + SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT); + + nvmf_tcp_qpair_disconnect(tqpair); + return SPDK_POLLER_BUSY; +} + +static void +nvmf_tcp_send_c2h_term_req_complete(void *cb_arg) +{ + struct spdk_nvmf_tcp_qpair *tqpair = (struct spdk_nvmf_tcp_qpair *)cb_arg; + + if (!tqpair->timeout_poller) { + tqpair->timeout_poller = SPDK_POLLER_REGISTER(nvmf_tcp_qpair_handle_timeout, tqpair, + SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT * 1000000); + } +} + +static void +nvmf_tcp_send_c2h_term_req(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, + enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset) +{ + struct nvme_tcp_pdu *rsp_pdu; + struct spdk_nvme_tcp_term_req_hdr *c2h_term_req; + uint32_t c2h_term_req_hdr_len = sizeof(*c2h_term_req); + uint32_t copy_len; + + rsp_pdu = &tqpair->mgmt_pdu; + + c2h_term_req = &rsp_pdu->hdr.term_req; + c2h_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ; + c2h_term_req->common.hlen = c2h_term_req_hdr_len; + + if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || + (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { + DSET32(&c2h_term_req->fei, error_offset); + } + + copy_len = spdk_min(pdu->hdr.common.hlen, SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE); + + /* Copy the error info into the buffer */ + memcpy((uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, pdu->hdr.raw, copy_len); + nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, copy_len); + + /* Contain the header of the wrong received pdu */ + c2h_term_req->common.plen = c2h_term_req->common.hlen + copy_len; + nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); + nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_send_c2h_term_req_complete, tqpair); +} + +static void +nvmf_tcp_capsule_cmd_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport, + struct spdk_nvmf_tcp_qpair *tqpair, + struct nvme_tcp_pdu *pdu) +{ + struct spdk_nvmf_tcp_req *tcp_req; + + assert(pdu->psh_valid_bytes == pdu->psh_len); + assert(pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD); + + tcp_req = nvmf_tcp_req_get(tqpair); + if (!tcp_req) { + /* Directly return and make the allocation retry again */ + if (tqpair->state_cntr[TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST] > 0) { + return; + } + + /* The host sent more commands than the maximum queue depth. */ + SPDK_ERRLOG("Cannot allocate tcp_req on tqpair=%p\n", tqpair); + nvmf_tcp_qpair_disconnect(tqpair); + return; + } + + pdu->req = tcp_req; + assert(tcp_req->state == TCP_REQUEST_STATE_NEW); + nvmf_tcp_req_process(ttransport, tcp_req); +} + +static void +nvmf_tcp_capsule_cmd_payload_handle(struct spdk_nvmf_tcp_transport *ttransport, + struct spdk_nvmf_tcp_qpair *tqpair, + struct nvme_tcp_pdu *pdu) +{ + struct spdk_nvmf_tcp_req *tcp_req; + struct spdk_nvme_tcp_cmd *capsule_cmd; + uint32_t error_offset = 0; + enum spdk_nvme_tcp_term_req_fes fes; + + capsule_cmd = &pdu->hdr.capsule_cmd; + tcp_req = pdu->req; + assert(tcp_req != NULL); + if (capsule_cmd->common.pdo > SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET) { + SPDK_ERRLOG("Expected ICReq capsule_cmd pdu offset <= %d, got %c\n", + SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET, capsule_cmd->common.pdo); + fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; + error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo); + goto err; + } + + nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); + nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); + nvmf_tcp_req_process(ttransport, tcp_req); + + return; +err: + nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); +} + +static int +nvmf_tcp_find_req_in_state(struct spdk_nvmf_tcp_qpair *tqpair, + enum spdk_nvmf_tcp_req_state state, + uint16_t cid, uint16_t tag, + struct spdk_nvmf_tcp_req **req) +{ + struct spdk_nvmf_tcp_req *tcp_req = NULL; + + TAILQ_FOREACH(tcp_req, &tqpair->state_queue[state], state_link) { + if (tcp_req->req.cmd->nvme_cmd.cid != cid) { + continue; + } + + if (tcp_req->ttag == tag) { + *req = tcp_req; + return 0; + } + + *req = NULL; + return -1; + } + + /* Didn't find it, but not an error */ + *req = NULL; + return 0; +} + +static void +nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport, + struct spdk_nvmf_tcp_qpair *tqpair, + struct nvme_tcp_pdu *pdu) +{ + struct spdk_nvmf_tcp_req *tcp_req; + uint32_t error_offset = 0; + enum spdk_nvme_tcp_term_req_fes fes = 0; + struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; + int rc; + + h2c_data = &pdu->hdr.h2c_data; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n", + tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag); + + rc = nvmf_tcp_find_req_in_state(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, + h2c_data->cccid, h2c_data->ttag, &tcp_req); + if (rc == 0 && tcp_req == NULL) { + rc = nvmf_tcp_find_req_in_state(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK, h2c_data->cccid, + h2c_data->ttag, &tcp_req); + } + + if (!tcp_req) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req is not found for tqpair=%p\n", tqpair); + fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER; + if (rc == 0) { + error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid); + } else { + error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag); + } + goto err; + } + + if (tcp_req->h2c_offset != h2c_data->datao) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, + "tcp_req(%p), tqpair=%p, expected data offset %u, but data offset is %u\n", + tcp_req, tqpair, tcp_req->h2c_offset, h2c_data->datao); + fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; + goto err; + } + + if ((h2c_data->datao + h2c_data->datal) > tcp_req->req.length) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, + "tcp_req(%p), tqpair=%p, (datao=%u + datal=%u) execeeds requested length=%u\n", + tcp_req, tqpair, h2c_data->datao, h2c_data->datal, tcp_req->req.length); + fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; + goto err; + } + + pdu->req = tcp_req; + + if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) { + pdu->dif_ctx = &tcp_req->req.dif.dif_ctx; + } + + nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt, + h2c_data->datao, h2c_data->datal); + nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); + return; + +err: + nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); +} + +static void +nvmf_tcp_pdu_cmd_complete(void *cb_arg) +{ + struct spdk_nvmf_tcp_req *tcp_req = cb_arg; + nvmf_tcp_request_free(tcp_req); +} + +static void +nvmf_tcp_send_capsule_resp_pdu(struct spdk_nvmf_tcp_req *tcp_req, + struct spdk_nvmf_tcp_qpair *tqpair) +{ + struct nvme_tcp_pdu *rsp_pdu; + struct spdk_nvme_tcp_rsp *capsule_resp; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter, tqpair=%p\n", tqpair); + + rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req); + assert(rsp_pdu != NULL); + + capsule_resp = &rsp_pdu->hdr.capsule_resp; + capsule_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP; + capsule_resp->common.plen = capsule_resp->common.hlen = sizeof(*capsule_resp); + capsule_resp->rccqe = tcp_req->req.rsp->nvme_cpl; + if (tqpair->host_hdgst_enable) { + capsule_resp->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; + capsule_resp->common.plen += SPDK_NVME_TCP_DIGEST_LEN; + } + + nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_pdu_cmd_complete, tcp_req); +} + +static void +nvmf_tcp_pdu_c2h_data_complete(void *cb_arg) +{ + struct spdk_nvmf_tcp_req *tcp_req = cb_arg; + struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, + struct spdk_nvmf_tcp_qpair, qpair); + + assert(tqpair != NULL); + if (tqpair->qpair.transport->opts.c2h_success) { + nvmf_tcp_request_free(tcp_req); + } else { + nvmf_tcp_req_pdu_fini(tcp_req); + nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); + } +} + +static void +nvmf_tcp_r2t_complete(void *cb_arg) +{ + struct spdk_nvmf_tcp_req *tcp_req = cb_arg; + struct spdk_nvmf_tcp_transport *ttransport; + + nvmf_tcp_req_pdu_fini(tcp_req); + + ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport, + struct spdk_nvmf_tcp_transport, transport); + + nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); + + if (tcp_req->h2c_offset == tcp_req->req.length) { + nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); + nvmf_tcp_req_process(ttransport, tcp_req); + } +} + +static void +nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair, + struct spdk_nvmf_tcp_req *tcp_req) +{ + struct nvme_tcp_pdu *rsp_pdu; + struct spdk_nvme_tcp_r2t_hdr *r2t; + + rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req); + assert(rsp_pdu != NULL); + + r2t = &rsp_pdu->hdr.r2t; + r2t->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_R2T; + r2t->common.plen = r2t->common.hlen = sizeof(*r2t); + + if (tqpair->host_hdgst_enable) { + r2t->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; + r2t->common.plen += SPDK_NVME_TCP_DIGEST_LEN; + } + + r2t->cccid = tcp_req->req.cmd->nvme_cmd.cid; + r2t->ttag = tcp_req->ttag; + r2t->r2to = tcp_req->h2c_offset; + r2t->r2tl = tcp_req->req.length; + + nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_R2T_ACK); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, + "tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n", + tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl); + nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_r2t_complete, tcp_req); +} + +static void +nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport, + struct spdk_nvmf_tcp_qpair *tqpair, + struct nvme_tcp_pdu *pdu) +{ + struct spdk_nvmf_tcp_req *tcp_req; + + tcp_req = pdu->req; + assert(tcp_req != NULL); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n"); + + tcp_req->h2c_offset += pdu->data_len; + + nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); + + /* Wait for all of the data to arrive AND for the initial R2T PDU send to be + * acknowledged before moving on. */ + if (tcp_req->h2c_offset == tcp_req->req.length && + tcp_req->state == TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER) { + nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); + nvmf_tcp_req_process(ttransport, tcp_req); + } +} + +static void +nvmf_tcp_h2c_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *h2c_term_req) +{ + SPDK_ERRLOG("Error info of pdu(%p): %s\n", h2c_term_req, + spdk_nvmf_tcp_term_req_fes_str[h2c_term_req->fes]); + if ((h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || + (h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The offset from the start of the PDU header is %u\n", + DGET32(h2c_term_req->fei)); + } +} + +static void +nvmf_tcp_h2c_term_req_hdr_handle(struct spdk_nvmf_tcp_qpair *tqpair, + struct nvme_tcp_pdu *pdu) +{ + struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req; + uint32_t error_offset = 0; + enum spdk_nvme_tcp_term_req_fes fes; + + + if (h2c_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) { + SPDK_ERRLOG("Fatal Error Stauts(FES) is unknown for h2c_term_req pdu=%p\n", pdu); + fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; + error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes); + goto end; + } + + /* set the data buffer */ + nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + h2c_term_req->common.hlen, + h2c_term_req->common.plen - h2c_term_req->common.hlen); + nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); + return; +end: + nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); +} + +static void +nvmf_tcp_h2c_term_req_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, + struct nvme_tcp_pdu *pdu) +{ + struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req; + + nvmf_tcp_h2c_term_req_dump(h2c_term_req); + nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); +} + +static void +nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, + struct spdk_nvmf_tcp_transport *ttransport) +{ + int rc = 0; + struct nvme_tcp_pdu *pdu; + uint32_t crc32c, error_offset = 0; + enum spdk_nvme_tcp_term_req_fes fes; + + assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); + pdu = &tqpair->pdu_in_progress; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n"); + /* check data digest if need */ + if (pdu->ddgst_enable) { + crc32c = nvme_tcp_pdu_calc_data_digest(pdu); + rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c); + if (rc == 0) { + SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); + fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; + nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); + return; + + } + } + + switch (pdu->hdr.common.pdu_type) { + case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: + nvmf_tcp_capsule_cmd_payload_handle(ttransport, tqpair, pdu); + break; + case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: + nvmf_tcp_h2c_data_payload_handle(ttransport, tqpair, pdu); + break; + + case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: + nvmf_tcp_h2c_term_req_payload_handle(tqpair, pdu); + break; + + default: + /* The code should not go to here */ + SPDK_ERRLOG("The code should not go to here\n"); + break; + } +} + +static void +nvmf_tcp_send_icresp_complete(void *cb_arg) +{ + struct spdk_nvmf_tcp_qpair *tqpair = cb_arg; + + tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING; +} + +static void +nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport, + struct spdk_nvmf_tcp_qpair *tqpair, + struct nvme_tcp_pdu *pdu) +{ + struct spdk_nvme_tcp_ic_req *ic_req = &pdu->hdr.ic_req; + struct nvme_tcp_pdu *rsp_pdu; + struct spdk_nvme_tcp_ic_resp *ic_resp; + uint32_t error_offset = 0; + enum spdk_nvme_tcp_term_req_fes fes; + + /* Only PFV 0 is defined currently */ + if (ic_req->pfv != 0) { + SPDK_ERRLOG("Expected ICReq PFV %u, got %u\n", 0u, ic_req->pfv); + fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; + error_offset = offsetof(struct spdk_nvme_tcp_ic_req, pfv); + goto end; + } + + /* MAXR2T is 0's based */ + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "maxr2t =%u\n", (ic_req->maxr2t + 1u)); + + tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false; + if (!tqpair->host_hdgst_enable) { + tqpair->recv_buf_size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR; + } + + tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false; + if (!tqpair->host_ddgst_enable) { + tqpair->recv_buf_size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR; + } + + /* Now that we know whether digests are enabled, properly size the receive buffer */ + if (spdk_sock_set_recvbuf(tqpair->sock, tqpair->recv_buf_size) < 0) { + SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n", + tqpair, + tqpair->recv_buf_size); + /* Not fatal. */ + } + + tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX); + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda); + + rsp_pdu = &tqpair->mgmt_pdu; + + ic_resp = &rsp_pdu->hdr.ic_resp; + ic_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_RESP; + ic_resp->common.hlen = ic_resp->common.plen = sizeof(*ic_resp); + ic_resp->pfv = 0; + ic_resp->cpda = tqpair->cpda; + ic_resp->maxh2cdata = ttransport->transport.opts.max_io_size; + ic_resp->dgst.bits.hdgst_enable = tqpair->host_hdgst_enable ? 1 : 0; + ic_resp->dgst.bits.ddgst_enable = tqpair->host_ddgst_enable ? 1 : 0; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable); + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable); + + tqpair->state = NVME_TCP_QPAIR_STATE_INITIALIZING; + nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_send_icresp_complete, tqpair); + nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); + return; +end: + nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); +} + +static void +nvmf_tcp_pdu_psh_handle(struct spdk_nvmf_tcp_qpair *tqpair, + struct spdk_nvmf_tcp_transport *ttransport) +{ + struct nvme_tcp_pdu *pdu; + int rc; + uint32_t crc32c, error_offset = 0; + enum spdk_nvme_tcp_term_req_fes fes; + + assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); + pdu = &tqpair->pdu_in_progress; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "pdu type of tqpair(%p) is %d\n", tqpair, + pdu->hdr.common.pdu_type); + /* check header digest if needed */ + if (pdu->has_hdgst) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Compare the header of pdu=%p on tqpair=%p\n", pdu, tqpair); + crc32c = nvme_tcp_pdu_calc_header_digest(pdu); + rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c); + if (rc == 0) { + SPDK_ERRLOG("Header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); + fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; + nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); + return; + + } + } + + switch (pdu->hdr.common.pdu_type) { + case SPDK_NVME_TCP_PDU_TYPE_IC_REQ: + nvmf_tcp_icreq_handle(ttransport, tqpair, pdu); + break; + case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: + nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_REQ); + break; + case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: + nvmf_tcp_h2c_data_hdr_handle(ttransport, tqpair, pdu); + break; + + case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: + nvmf_tcp_h2c_term_req_hdr_handle(tqpair, pdu); + break; + + default: + SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->pdu_in_progress.hdr.common.pdu_type); + fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; + error_offset = 1; + nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); + break; + } +} + +static void +nvmf_tcp_pdu_ch_handle(struct spdk_nvmf_tcp_qpair *tqpair) +{ + struct nvme_tcp_pdu *pdu; + uint32_t error_offset = 0; + enum spdk_nvme_tcp_term_req_fes fes; + uint8_t expected_hlen, pdo; + bool plen_error = false, pdo_error = false; + + assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); + pdu = &tqpair->pdu_in_progress; + + if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ) { + if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) { + SPDK_ERRLOG("Already received ICreq PDU, and reject this pdu=%p\n", pdu); + fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; + goto err; + } + expected_hlen = sizeof(struct spdk_nvme_tcp_ic_req); + if (pdu->hdr.common.plen != expected_hlen) { + plen_error = true; + } + } else { + if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) { + SPDK_ERRLOG("The TCP/IP connection is not negotitated\n"); + fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; + goto err; + } + + switch (pdu->hdr.common.pdu_type) { + case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: + expected_hlen = sizeof(struct spdk_nvme_tcp_cmd); + pdo = pdu->hdr.common.pdo; + if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) { + pdo_error = true; + break; + } + + if (pdu->hdr.common.plen < expected_hlen) { + plen_error = true; + } + break; + case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: + expected_hlen = sizeof(struct spdk_nvme_tcp_h2c_data_hdr); + pdo = pdu->hdr.common.pdo; + if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) { + pdo_error = true; + break; + } + if (pdu->hdr.common.plen < expected_hlen) { + plen_error = true; + } + break; + + case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: + expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr); + if ((pdu->hdr.common.plen <= expected_hlen) || + (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) { + plen_error = true; + } + break; + + default: + SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", pdu->hdr.common.pdu_type); + fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; + error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type); + goto err; + } + } + + if (pdu->hdr.common.hlen != expected_hlen) { + SPDK_ERRLOG("PDU type=0x%02x, Expected ICReq header length %u, got %u on tqpair=%p\n", + pdu->hdr.common.pdu_type, + expected_hlen, pdu->hdr.common.hlen, tqpair); + fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; + error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen); + goto err; + } else if (pdo_error) { + fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; + error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo); + } else if (plen_error) { + fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; + error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); + goto err; + } else { + nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); + nvme_tcp_pdu_calc_psh_len(&tqpair->pdu_in_progress, tqpair->host_hdgst_enable); + return; + } +err: + nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); +} + +static int +nvmf_tcp_pdu_payload_insert_dif(struct nvme_tcp_pdu *pdu, uint32_t read_offset, + int read_len) +{ + int rc; + + rc = spdk_dif_generate_stream(pdu->data_iov, pdu->data_iovcnt, + read_offset, read_len, pdu->dif_ctx); + if (rc != 0) { + SPDK_ERRLOG("DIF generate failed\n"); + } + + return rc; +} + +static int +nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair) +{ + int rc = 0; + struct nvme_tcp_pdu *pdu; + enum nvme_tcp_pdu_recv_state prev_state; + uint32_t data_len; + struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, + struct spdk_nvmf_tcp_transport, transport); + + /* The loop here is to allow for several back-to-back state changes. */ + do { + prev_state = tqpair->recv_state; + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state); + + pdu = &tqpair->pdu_in_progress; + switch (tqpair->recv_state) { + /* Wait for the common header */ + case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: + case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: + if (spdk_unlikely(tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING)) { + return rc; + } + + rc = nvme_tcp_read_data(tqpair->sock, + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, + (void *)&pdu->hdr.common + pdu->ch_valid_bytes); + if (rc < 0) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconnect tqpair=%p\n", tqpair); + return NVME_TCP_PDU_FATAL; + } else if (rc > 0) { + pdu->ch_valid_bytes += rc; + spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 0, rc, 0, 0); + if (spdk_likely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY)) { + nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); + } + } + + if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { + return NVME_TCP_PDU_IN_PROGRESS; + } + + /* The command header of this PDU has now been read from the socket. */ + nvmf_tcp_pdu_ch_handle(tqpair); + break; + /* Wait for the pdu specific header */ + case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: + rc = nvme_tcp_read_data(tqpair->sock, + pdu->psh_len - pdu->psh_valid_bytes, + (void *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); + if (rc < 0) { + return NVME_TCP_PDU_FATAL; + } else if (rc > 0) { + spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, + 0, rc, 0, 0); + pdu->psh_valid_bytes += rc; + } + + if (pdu->psh_valid_bytes < pdu->psh_len) { + return NVME_TCP_PDU_IN_PROGRESS; + } + + /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */ + nvmf_tcp_pdu_psh_handle(tqpair, ttransport); + break; + /* Wait for the req slot */ + case NVME_TCP_PDU_RECV_STATE_AWAIT_REQ: + nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu); + break; + case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: + /* check whether the data is valid, if not we just return */ + if (!pdu->data_len) { + return NVME_TCP_PDU_IN_PROGRESS; + } + + data_len = pdu->data_len; + /* data digest */ + if (spdk_unlikely((pdu->hdr.common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) && + tqpair->host_ddgst_enable)) { + data_len += SPDK_NVME_TCP_DIGEST_LEN; + pdu->ddgst_enable = true; + } + + rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); + if (rc < 0) { + return NVME_TCP_PDU_FATAL; + } + pdu->readv_offset += rc; + + if (spdk_unlikely(pdu->dif_ctx != NULL)) { + rc = nvmf_tcp_pdu_payload_insert_dif(pdu, pdu->readv_offset - rc, rc); + if (rc != 0) { + return NVME_TCP_PDU_FATAL; + } + } + + if (pdu->readv_offset < data_len) { + return NVME_TCP_PDU_IN_PROGRESS; + } + + /* All of this PDU has now been read from the socket. */ + nvmf_tcp_pdu_payload_handle(tqpair, ttransport); + break; + case NVME_TCP_PDU_RECV_STATE_ERROR: + if (!spdk_sock_is_connected(tqpair->sock)) { + return NVME_TCP_PDU_FATAL; + } + break; + default: + assert(0); + SPDK_ERRLOG("code should not come to here"); + break; + } + } while (tqpair->recv_state != prev_state); + + return rc; +} + +static int +nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req, + struct spdk_nvmf_transport *transport, + struct spdk_nvmf_transport_poll_group *group) +{ + struct spdk_nvmf_request *req = &tcp_req->req; + struct spdk_nvme_cmd *cmd; + struct spdk_nvme_cpl *rsp; + struct spdk_nvme_sgl_descriptor *sgl; + uint32_t length; + + cmd = &req->cmd->nvme_cmd; + rsp = &req->rsp->nvme_cpl; + sgl = &cmd->dptr.sgl1; + + length = sgl->unkeyed.length; + + if (sgl->generic.type == SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK && + sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_TRANSPORT) { + if (length > transport->opts.max_io_size) { + SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n", + length, transport->opts.max_io_size); + rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; + return -1; + } + + /* fill request length and populate iovs */ + req->length = length; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Data requested length= 0x%x\n", length); + + if (spdk_unlikely(req->dif.dif_insert_or_strip)) { + req->dif.orig_length = length; + length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx); + req->dif.elba_length = length; + } + + if (spdk_nvmf_request_get_buffers(req, group, transport, length)) { + /* No available buffers. Queue this request up. */ + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No available large data buffers. Queueing request %p\n", + tcp_req); + return 0; + } + + /* backward compatible */ + req->data = req->iov[0].iov_base; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p took %d buffer/s from central pool, and data=%p\n", + tcp_req, req->iovcnt, req->data); + + return 0; + } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK && + sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) { + uint64_t offset = sgl->address; + uint32_t max_len = transport->opts.in_capsule_data_size; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n", + offset, length); + + if (offset > max_len) { + SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n", + offset, max_len); + rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET; + return -1; + } + max_len -= (uint32_t)offset; + + if (length > max_len) { + SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n", + length, max_len); + rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; + return -1; + } + + req->data = tcp_req->buf + offset; + req->data_from_pool = false; + req->length = length; + + if (spdk_unlikely(req->dif.dif_insert_or_strip)) { + length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx); + req->dif.elba_length = length; + } + + req->iov[0].iov_base = req->data; + req->iov[0].iov_len = length; + req->iovcnt = 1; + + return 0; + } + + SPDK_ERRLOG("Invalid NVMf I/O Command SGL: Type 0x%x, Subtype 0x%x\n", + sgl->generic.type, sgl->generic.subtype); + rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID; + return -1; +} + +static inline enum spdk_nvme_media_error_status_code +nvmf_tcp_dif_error_to_compl_status(uint8_t err_type) { + enum spdk_nvme_media_error_status_code result; + + switch (err_type) + { + case SPDK_DIF_REFTAG_ERROR: + result = SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR; + break; + case SPDK_DIF_APPTAG_ERROR: + result = SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR; + break; + case SPDK_DIF_GUARD_ERROR: + result = SPDK_NVME_SC_GUARD_CHECK_ERROR; + break; + default: + SPDK_UNREACHABLE(); + break; + } + + return result; +} + +static void +nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair, + struct spdk_nvmf_tcp_req *tcp_req) +{ + struct nvme_tcp_pdu *rsp_pdu; + struct spdk_nvme_tcp_c2h_data_hdr *c2h_data; + uint32_t plen, pdo, alignment; + int rc; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n"); + + rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req); + assert(rsp_pdu != NULL); + + c2h_data = &rsp_pdu->hdr.c2h_data; + c2h_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_DATA; + plen = c2h_data->common.hlen = sizeof(*c2h_data); + + if (tqpair->host_hdgst_enable) { + plen += SPDK_NVME_TCP_DIGEST_LEN; + c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; + } + + /* set the psh */ + c2h_data->cccid = tcp_req->req.cmd->nvme_cmd.cid; + c2h_data->datal = tcp_req->req.length; + c2h_data->datao = 0; + + /* set the padding */ + rsp_pdu->padding_len = 0; + pdo = plen; + if (tqpair->cpda) { + alignment = (tqpair->cpda + 1) << 2; + if (alignment > plen) { + rsp_pdu->padding_len = alignment - plen; + pdo = plen = alignment; + } + } + + c2h_data->common.pdo = pdo; + plen += c2h_data->datal; + if (tqpair->host_ddgst_enable) { + c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; + plen += SPDK_NVME_TCP_DIGEST_LEN; + } + + c2h_data->common.plen = plen; + + if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) { + rsp_pdu->dif_ctx = &tcp_req->req.dif.dif_ctx; + } + + nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt, + c2h_data->datao, c2h_data->datal); + + if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) { + struct spdk_nvme_cpl *rsp = &tcp_req->req.rsp->nvme_cpl; + struct spdk_dif_error err_blk = {}; + + rc = spdk_dif_verify_stream(rsp_pdu->data_iov, rsp_pdu->data_iovcnt, + 0, rsp_pdu->data_len, rsp_pdu->dif_ctx, &err_blk); + if (rc != 0) { + SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n", + err_blk.err_type, err_blk.err_offset); + rsp->status.sct = SPDK_NVME_SCT_MEDIA_ERROR; + rsp->status.sc = nvmf_tcp_dif_error_to_compl_status(err_blk.err_type); + nvmf_tcp_req_pdu_fini(tcp_req); + nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); + return; + } + } + + c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU; + if (tqpair->qpair.transport->opts.c2h_success) { + c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS; + } + + nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_pdu_c2h_data_complete, tcp_req); +} + +static int +request_transfer_out(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_tcp_req *tcp_req; + struct spdk_nvmf_qpair *qpair; + struct spdk_nvmf_tcp_qpair *tqpair; + struct spdk_nvme_cpl *rsp; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n"); + + qpair = req->qpair; + rsp = &req->rsp->nvme_cpl; + tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); + + /* Advance our sq_head pointer */ + if (qpair->sq_head == qpair->sq_head_max) { + qpair->sq_head = 0; + } else { + qpair->sq_head++; + } + rsp->sqhd = qpair->sq_head; + + tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); + nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST); + if (rsp->status.sc == SPDK_NVME_SC_SUCCESS && req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { + nvmf_tcp_send_c2h_data(tqpair, tcp_req); + } else { + nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); + } + + return 0; +} + +static void +nvmf_tcp_set_incapsule_data(struct spdk_nvmf_tcp_qpair *tqpair, + struct spdk_nvmf_tcp_req *tcp_req) +{ + struct nvme_tcp_pdu *pdu; + uint32_t plen = 0; + + pdu = &tqpair->pdu_in_progress; + plen = pdu->hdr.common.hlen; + + if (tqpair->host_hdgst_enable) { + plen += SPDK_NVME_TCP_DIGEST_LEN; + } + + if (pdu->hdr.common.plen != plen) { + tcp_req->has_incapsule_data = true; + } +} + +static bool +nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, + struct spdk_nvmf_tcp_req *tcp_req) +{ + struct spdk_nvmf_tcp_qpair *tqpair; + int rc; + enum spdk_nvmf_tcp_req_state prev_state; + bool progress = false; + struct spdk_nvmf_transport *transport = &ttransport->transport; + struct spdk_nvmf_transport_poll_group *group; + + tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); + group = &tqpair->group->group; + assert(tcp_req->state != TCP_REQUEST_STATE_FREE); + + /* If the qpair is not active, we need to abort the outstanding requests. */ + if (tqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) { + if (tcp_req->state == TCP_REQUEST_STATE_NEED_BUFFER) { + STAILQ_REMOVE(&group->pending_buf_queue, &tcp_req->req, spdk_nvmf_request, buf_link); + } + nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED); + } + + /* The loop here is to allow for several back-to-back state changes. */ + do { + prev_state = tcp_req->state; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p entering state %d on tqpair=%p\n", tcp_req, prev_state, + tqpair); + + switch (tcp_req->state) { + case TCP_REQUEST_STATE_FREE: + /* Some external code must kick a request into TCP_REQUEST_STATE_NEW + * to escape this state. */ + break; + case TCP_REQUEST_STATE_NEW: + spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEW, 0, 0, (uintptr_t)tcp_req, 0); + + /* copy the cmd from the receive pdu */ + tcp_req->cmd = tqpair->pdu_in_progress.hdr.capsule_cmd.ccsqe; + + if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&tcp_req->req, &tcp_req->req.dif.dif_ctx))) { + tcp_req->req.dif.dif_insert_or_strip = true; + tqpair->pdu_in_progress.dif_ctx = &tcp_req->req.dif.dif_ctx; + } + + /* The next state transition depends on the data transfer needs of this request. */ + tcp_req->req.xfer = spdk_nvmf_req_get_xfer(&tcp_req->req); + + /* If no data to transfer, ready to execute. */ + if (tcp_req->req.xfer == SPDK_NVME_DATA_NONE) { + /* Reset the tqpair receving pdu state */ + nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); + nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); + break; + } + + nvmf_tcp_set_incapsule_data(tqpair, tcp_req); + + if (!tcp_req->has_incapsule_data) { + nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); + } + + nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEED_BUFFER); + STAILQ_INSERT_TAIL(&group->pending_buf_queue, &tcp_req->req, buf_link); + break; + case TCP_REQUEST_STATE_NEED_BUFFER: + spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 0, 0, (uintptr_t)tcp_req, 0); + + assert(tcp_req->req.xfer != SPDK_NVME_DATA_NONE); + + if (!tcp_req->has_incapsule_data && (&tcp_req->req != STAILQ_FIRST(&group->pending_buf_queue))) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, + "Not the first element to wait for the buf for tcp_req(%p) on tqpair=%p\n", + tcp_req, tqpair); + /* This request needs to wait in line to obtain a buffer */ + break; + } + + /* Try to get a data buffer */ + rc = nvmf_tcp_req_parse_sgl(tcp_req, transport, group); + if (rc < 0) { + STAILQ_REMOVE_HEAD(&group->pending_buf_queue, buf_link); + /* Reset the tqpair receving pdu state */ + nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); + nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); + break; + } + + if (!tcp_req->req.data) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No buffer allocated for tcp_req(%p) on tqpair(%p\n)", + tcp_req, tqpair); + /* No buffers available. */ + break; + } + + STAILQ_REMOVE(&group->pending_buf_queue, &tcp_req->req, spdk_nvmf_request, buf_link); + + /* If data is transferring from host to controller, we need to do a transfer from the host. */ + if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { + if (tcp_req->req.data_from_pool) { + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Sending R2T for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair); + nvmf_tcp_send_r2t_pdu(tqpair, tcp_req); + } else { + struct nvme_tcp_pdu *pdu; + + nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); + + pdu = &tqpair->pdu_in_progress; + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, + tqpair); + /* No need to send r2t, contained in the capsuled data */ + nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt, + 0, tcp_req->req.length); + nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); + } + break; + } + + nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); + break; + case TCP_REQUEST_STATE_AWAITING_R2T_ACK: + spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK, 0, 0, (uintptr_t)tcp_req, 0); + /* The R2T completion or the h2c data incoming will kick it out of this state. */ + break; + case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER: + + spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0, + (uintptr_t)tcp_req, 0); + /* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE + * to escape this state. */ + break; + case TCP_REQUEST_STATE_READY_TO_EXECUTE: + spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 0, 0, (uintptr_t)tcp_req, 0); + + if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) { + assert(tcp_req->req.dif.elba_length >= tcp_req->req.length); + tcp_req->req.length = tcp_req->req.dif.elba_length; + } + + nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTING); + spdk_nvmf_request_exec(&tcp_req->req); + break; + case TCP_REQUEST_STATE_EXECUTING: + spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTING, 0, 0, (uintptr_t)tcp_req, 0); + /* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED + * to escape this state. */ + break; + case TCP_REQUEST_STATE_EXECUTED: + spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTED, 0, 0, (uintptr_t)tcp_req, 0); + + if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) { + tcp_req->req.length = tcp_req->req.dif.orig_length; + } + + nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); + break; + case TCP_REQUEST_STATE_READY_TO_COMPLETE: + spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 0, 0, (uintptr_t)tcp_req, 0); + rc = request_transfer_out(&tcp_req->req); + assert(rc == 0); /* No good way to handle this currently */ + break; + case TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST: + spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0, + (uintptr_t)tcp_req, + 0); + /* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED + * to escape this state. */ + break; + case TCP_REQUEST_STATE_COMPLETED: + spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, 0, 0, (uintptr_t)tcp_req, 0); + if (tcp_req->req.data_from_pool) { + spdk_nvmf_request_free_buffers(&tcp_req->req, group, transport); + } + tcp_req->req.length = 0; + tcp_req->req.iovcnt = 0; + tcp_req->req.data = NULL; + + nvmf_tcp_req_pdu_fini(tcp_req); + + nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_FREE); + break; + case TCP_REQUEST_NUM_STATES: + default: + assert(0); + break; + } + + if (tcp_req->state != prev_state) { + progress = true; + } + } while (tcp_req->state != prev_state); + + return progress; +} + +static void +nvmf_tcp_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock) +{ + struct spdk_nvmf_tcp_qpair *tqpair = arg; + int rc; + + assert(tqpair != NULL); + rc = nvmf_tcp_sock_process(tqpair); + + /* If there was a new socket error, disconnect */ + if (rc < 0) { + nvmf_tcp_qpair_disconnect(tqpair); + } +} + +static int +nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group, + struct spdk_nvmf_qpair *qpair) +{ + struct spdk_nvmf_tcp_poll_group *tgroup; + struct spdk_nvmf_tcp_qpair *tqpair; + int rc; + + tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); + tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); + + rc = spdk_sock_group_add_sock(tgroup->sock_group, tqpair->sock, + nvmf_tcp_sock_cb, tqpair); + if (rc != 0) { + SPDK_ERRLOG("Could not add sock to sock_group: %s (%d)\n", + spdk_strerror(errno), errno); + return -1; + } + + rc = nvmf_tcp_qpair_sock_init(tqpair); + if (rc != 0) { + SPDK_ERRLOG("Cannot set sock opt for tqpair=%p\n", tqpair); + return -1; + } + + rc = nvmf_tcp_qpair_init(&tqpair->qpair); + if (rc < 0) { + SPDK_ERRLOG("Cannot init tqpair=%p\n", tqpair); + return -1; + } + + rc = nvmf_tcp_qpair_init_mem_resource(tqpair); + if (rc < 0) { + SPDK_ERRLOG("Cannot init memory resource info for tqpair=%p\n", tqpair); + return -1; + } + + tqpair->group = tgroup; + tqpair->state = NVME_TCP_QPAIR_STATE_INVALID; + TAILQ_INSERT_TAIL(&tgroup->qpairs, tqpair, link); + + return 0; +} + +static int +nvmf_tcp_poll_group_remove(struct spdk_nvmf_transport_poll_group *group, + struct spdk_nvmf_qpair *qpair) +{ + struct spdk_nvmf_tcp_poll_group *tgroup; + struct spdk_nvmf_tcp_qpair *tqpair; + int rc; + + tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); + tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); + + assert(tqpair->group == tgroup); + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "remove tqpair=%p from the tgroup=%p\n", tqpair, tgroup); + if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ) { + TAILQ_REMOVE(&tgroup->await_req, tqpair, link); + } else { + TAILQ_REMOVE(&tgroup->qpairs, tqpair, link); + } + + rc = spdk_sock_group_remove_sock(tgroup->sock_group, tqpair->sock); + if (rc != 0) { + SPDK_ERRLOG("Could not remove sock from sock_group: %s (%d)\n", + spdk_strerror(errno), errno); + } + + return rc; +} + +static int +nvmf_tcp_req_complete(struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_tcp_transport *ttransport; + struct spdk_nvmf_tcp_req *tcp_req; + + ttransport = SPDK_CONTAINEROF(req->qpair->transport, struct spdk_nvmf_tcp_transport, transport); + tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); + + nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED); + nvmf_tcp_req_process(ttransport, tcp_req); + + return 0; +} + +static void +nvmf_tcp_close_qpair(struct spdk_nvmf_qpair *qpair) +{ + struct spdk_nvmf_tcp_qpair *tqpair; + + SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Qpair: %p\n", qpair); + + tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); + tqpair->state = NVME_TCP_QPAIR_STATE_EXITED; + nvmf_tcp_qpair_destroy(tqpair); +} + +static int +nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group) +{ + struct spdk_nvmf_tcp_poll_group *tgroup; + int rc; + struct spdk_nvmf_request *req, *req_tmp; + struct spdk_nvmf_tcp_req *tcp_req; + struct spdk_nvmf_tcp_qpair *tqpair, *tqpair_tmp; + struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(group->transport, + struct spdk_nvmf_tcp_transport, transport); + + tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); + + if (spdk_unlikely(TAILQ_EMPTY(&tgroup->qpairs) && TAILQ_EMPTY(&tgroup->await_req))) { + return 0; + } + + STAILQ_FOREACH_SAFE(req, &group->pending_buf_queue, buf_link, req_tmp) { + tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); + if (nvmf_tcp_req_process(ttransport, tcp_req) == false) { + break; + } + } + + rc = spdk_sock_group_poll(tgroup->sock_group); + if (rc < 0) { + SPDK_ERRLOG("Failed to poll sock_group=%p\n", tgroup->sock_group); + } + + TAILQ_FOREACH_SAFE(tqpair, &tgroup->await_req, link, tqpair_tmp) { + nvmf_tcp_sock_process(tqpair); + } + + return rc; +} + +static int +nvmf_tcp_qpair_get_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid, bool peer) +{ + struct spdk_nvmf_tcp_qpair *tqpair; + uint16_t port; + + tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); + spdk_nvme_trid_populate_transport(trid, SPDK_NVME_TRANSPORT_TCP); + + if (peer) { + snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->initiator_addr); + port = tqpair->initiator_port; + } else { + snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->target_addr); + port = tqpair->target_port; + } + + if (spdk_sock_is_ipv4(tqpair->sock)) { + trid->adrfam = SPDK_NVMF_ADRFAM_IPV4; + } else if (spdk_sock_is_ipv6(tqpair->sock)) { + trid->adrfam = SPDK_NVMF_ADRFAM_IPV6; + } else { + return -1; + } + + snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%d", port); + return 0; +} + +static int +nvmf_tcp_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid) +{ + return nvmf_tcp_qpair_get_trid(qpair, trid, 0); +} + +static int +nvmf_tcp_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid) +{ + return nvmf_tcp_qpair_get_trid(qpair, trid, 1); +} + +static int +nvmf_tcp_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid) +{ + return nvmf_tcp_qpair_get_trid(qpair, trid, 0); +} + +static void +nvmf_tcp_req_set_abort_status(struct spdk_nvmf_request *req, + struct spdk_nvmf_tcp_req *tcp_req_to_abort) +{ + tcp_req_to_abort->req.rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; + tcp_req_to_abort->req.rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST; + + nvmf_tcp_req_set_state(tcp_req_to_abort, TCP_REQUEST_STATE_READY_TO_COMPLETE); + + req->rsp->nvme_cpl.cdw0 &= ~1U; /* Command was successfully aborted. */ +} + +static int +_nvmf_tcp_qpair_abort_request(void *ctx) +{ + struct spdk_nvmf_request *req = ctx; + struct spdk_nvmf_tcp_req *tcp_req_to_abort = SPDK_CONTAINEROF(req->req_to_abort, + struct spdk_nvmf_tcp_req, req); + struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(req->req_to_abort->qpair, + struct spdk_nvmf_tcp_qpair, qpair); + int rc; + + spdk_poller_unregister(&req->poller); + + switch (tcp_req_to_abort->state) { + case TCP_REQUEST_STATE_EXECUTING: + rc = nvmf_ctrlr_abort_request(req); + if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS) { + return SPDK_POLLER_BUSY; + } + break; + + case TCP_REQUEST_STATE_NEED_BUFFER: + STAILQ_REMOVE(&tqpair->group->group.pending_buf_queue, + &tcp_req_to_abort->req, spdk_nvmf_request, buf_link); + + nvmf_tcp_req_set_abort_status(req, tcp_req_to_abort); + break; + + case TCP_REQUEST_STATE_AWAITING_R2T_ACK: + nvmf_tcp_req_set_abort_status(req, tcp_req_to_abort); + break; + + case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER: + if (spdk_get_ticks() < req->timeout_tsc) { + req->poller = SPDK_POLLER_REGISTER(_nvmf_tcp_qpair_abort_request, req, 0); + return SPDK_POLLER_BUSY; + } + break; + + default: + break; + } + + spdk_nvmf_request_complete(req); + return SPDK_POLLER_BUSY; +} + +static void +nvmf_tcp_qpair_abort_request(struct spdk_nvmf_qpair *qpair, + struct spdk_nvmf_request *req) +{ + struct spdk_nvmf_tcp_qpair *tqpair; + struct spdk_nvmf_tcp_transport *ttransport; + struct spdk_nvmf_transport *transport; + uint16_t cid; + uint32_t i; + struct spdk_nvmf_tcp_req *tcp_req_to_abort = NULL; + + tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); + ttransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_tcp_transport, transport); + transport = &ttransport->transport; + + cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid; + + for (i = 0; i < tqpair->resource_count; i++) { + tcp_req_to_abort = &tqpair->reqs[i]; + + if (tcp_req_to_abort->state != TCP_REQUEST_STATE_FREE && + tcp_req_to_abort->req.cmd->nvme_cmd.cid == cid) { + break; + } + } + + if (tcp_req_to_abort == NULL) { + spdk_nvmf_request_complete(req); + return; + } + + req->req_to_abort = &tcp_req_to_abort->req; + req->timeout_tsc = spdk_get_ticks() + + transport->opts.abort_timeout_sec * spdk_get_ticks_hz(); + req->poller = NULL; + + _nvmf_tcp_qpair_abort_request(req); +} + +#define SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH 128 +#define SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH 128 +#define SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR 128 +#define SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE 4096 +#define SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE 131072 +#define SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE 131072 +#define SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS 511 +#define SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE 32 +#define SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION true +#define SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP false +#define SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY 0 +#define SPDK_NVMF_TCP_DEFAULT_ABORT_TIMEOUT_SEC 1 + +static void +nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts) +{ + opts->max_queue_depth = SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH; + opts->max_qpairs_per_ctrlr = SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR; + opts->in_capsule_data_size = SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE; + opts->max_io_size = SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE; + opts->io_unit_size = SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE; + opts->max_aq_depth = SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH; + opts->num_shared_buffers = SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS; + opts->buf_cache_size = SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE; + opts->c2h_success = SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION; + opts->dif_insert_or_strip = SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP; + opts->sock_priority = SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY; + opts->abort_timeout_sec = SPDK_NVMF_TCP_DEFAULT_ABORT_TIMEOUT_SEC; +} + +const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = { + .name = "TCP", + .type = SPDK_NVME_TRANSPORT_TCP, + .opts_init = nvmf_tcp_opts_init, + .create = nvmf_tcp_create, + .destroy = nvmf_tcp_destroy, + + .listen = nvmf_tcp_listen, + .stop_listen = nvmf_tcp_stop_listen, + .accept = nvmf_tcp_accept, + + .listener_discover = nvmf_tcp_discover, + + .poll_group_create = nvmf_tcp_poll_group_create, + .get_optimal_poll_group = nvmf_tcp_get_optimal_poll_group, + .poll_group_destroy = nvmf_tcp_poll_group_destroy, + .poll_group_add = nvmf_tcp_poll_group_add, + .poll_group_remove = nvmf_tcp_poll_group_remove, + .poll_group_poll = nvmf_tcp_poll_group_poll, + + .req_free = nvmf_tcp_req_free, + .req_complete = nvmf_tcp_req_complete, + + .qpair_fini = nvmf_tcp_close_qpair, + .qpair_get_local_trid = nvmf_tcp_qpair_get_local_trid, + .qpair_get_peer_trid = nvmf_tcp_qpair_get_peer_trid, + .qpair_get_listen_trid = nvmf_tcp_qpair_get_listen_trid, + .qpair_abort_request = nvmf_tcp_qpair_abort_request, +}; + +SPDK_NVMF_TRANSPORT_REGISTER(tcp, &spdk_nvmf_transport_tcp); +SPDK_LOG_REGISTER_COMPONENT("nvmf_tcp", SPDK_LOG_NVMF_TCP) diff --git a/src/spdk/lib/nvmf/transport.c b/src/spdk/lib/nvmf/transport.c new file mode 100644 index 000000000..11bb152df --- /dev/null +++ b/src/spdk/lib/nvmf/transport.c @@ -0,0 +1,572 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2018-2019 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "nvmf_internal.h" +#include "transport.h" + +#include "spdk/config.h" +#include "spdk/log.h" +#include "spdk/nvmf.h" +#include "spdk/nvmf_transport.h" +#include "spdk/queue.h" +#include "spdk/util.h" + +#define MAX_MEMPOOL_NAME_LENGTH 40 + +struct nvmf_transport_ops_list_element { + struct spdk_nvmf_transport_ops ops; + TAILQ_ENTRY(nvmf_transport_ops_list_element) link; +}; + +TAILQ_HEAD(nvmf_transport_ops_list, nvmf_transport_ops_list_element) +g_spdk_nvmf_transport_ops = TAILQ_HEAD_INITIALIZER(g_spdk_nvmf_transport_ops); + +static inline const struct spdk_nvmf_transport_ops * +nvmf_get_transport_ops(const char *transport_name) +{ + struct nvmf_transport_ops_list_element *ops; + TAILQ_FOREACH(ops, &g_spdk_nvmf_transport_ops, link) { + if (strcasecmp(transport_name, ops->ops.name) == 0) { + return &ops->ops; + } + } + return NULL; +} + +void +spdk_nvmf_transport_register(const struct spdk_nvmf_transport_ops *ops) +{ + struct nvmf_transport_ops_list_element *new_ops; + + if (nvmf_get_transport_ops(ops->name) != NULL) { + SPDK_ERRLOG("Double registering nvmf transport type %s.\n", ops->name); + assert(false); + return; + } + + new_ops = calloc(1, sizeof(*new_ops)); + if (new_ops == NULL) { + SPDK_ERRLOG("Unable to allocate memory to register new transport type %s.\n", ops->name); + assert(false); + return; + } + + new_ops->ops = *ops; + + TAILQ_INSERT_TAIL(&g_spdk_nvmf_transport_ops, new_ops, link); +} + +const struct spdk_nvmf_transport_opts * +spdk_nvmf_get_transport_opts(struct spdk_nvmf_transport *transport) +{ + return &transport->opts; +} + +spdk_nvme_transport_type_t +spdk_nvmf_get_transport_type(struct spdk_nvmf_transport *transport) +{ + return transport->ops->type; +} + +const char * +spdk_nvmf_get_transport_name(struct spdk_nvmf_transport *transport) +{ + return transport->ops->name; +} + +struct spdk_nvmf_transport * +spdk_nvmf_transport_create(const char *transport_name, struct spdk_nvmf_transport_opts *opts) +{ + const struct spdk_nvmf_transport_ops *ops = NULL; + struct spdk_nvmf_transport *transport; + char spdk_mempool_name[MAX_MEMPOOL_NAME_LENGTH]; + int chars_written; + + ops = nvmf_get_transport_ops(transport_name); + if (!ops) { + SPDK_ERRLOG("Transport type '%s' unavailable.\n", transport_name); + return NULL; + } + + if (opts->max_aq_depth < SPDK_NVMF_MIN_ADMIN_MAX_SQ_SIZE) { + SPDK_ERRLOG("max_aq_depth %u is less than minimum defined by NVMf spec, use min value\n", + opts->max_aq_depth); + opts->max_aq_depth = SPDK_NVMF_MIN_ADMIN_MAX_SQ_SIZE; + } + + transport = ops->create(opts); + if (!transport) { + SPDK_ERRLOG("Unable to create new transport of type %s\n", transport_name); + return NULL; + } + + TAILQ_INIT(&transport->listeners); + + transport->ops = ops; + transport->opts = *opts; + chars_written = snprintf(spdk_mempool_name, MAX_MEMPOOL_NAME_LENGTH, "%s_%s_%s", "spdk_nvmf", + transport_name, "data"); + if (chars_written < 0) { + SPDK_ERRLOG("Unable to generate transport data buffer pool name.\n"); + ops->destroy(transport); + return NULL; + } + + transport->data_buf_pool = spdk_mempool_create(spdk_mempool_name, + opts->num_shared_buffers, + opts->io_unit_size + NVMF_DATA_BUFFER_ALIGNMENT, + SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, + SPDK_ENV_SOCKET_ID_ANY); + + if (!transport->data_buf_pool) { + SPDK_ERRLOG("Unable to allocate buffer pool for poll group\n"); + ops->destroy(transport); + return NULL; + } + + return transport; +} + +struct spdk_nvmf_transport * +spdk_nvmf_transport_get_first(struct spdk_nvmf_tgt *tgt) +{ + return TAILQ_FIRST(&tgt->transports); +} + +struct spdk_nvmf_transport * +spdk_nvmf_transport_get_next(struct spdk_nvmf_transport *transport) +{ + return TAILQ_NEXT(transport, link); +} + +int +spdk_nvmf_transport_destroy(struct spdk_nvmf_transport *transport) +{ + if (transport->data_buf_pool != NULL) { + if (spdk_mempool_count(transport->data_buf_pool) != + transport->opts.num_shared_buffers) { + SPDK_ERRLOG("transport buffer pool count is %zu but should be %u\n", + spdk_mempool_count(transport->data_buf_pool), + transport->opts.num_shared_buffers); + } + } + + spdk_mempool_free(transport->data_buf_pool); + + return transport->ops->destroy(transport); +} + +struct spdk_nvmf_listener * +nvmf_transport_find_listener(struct spdk_nvmf_transport *transport, + const struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvmf_listener *listener; + + TAILQ_FOREACH(listener, &transport->listeners, link) { + if (spdk_nvme_transport_id_compare(&listener->trid, trid) == 0) { + return listener; + } + } + + return NULL; +} + +int +spdk_nvmf_transport_listen(struct spdk_nvmf_transport *transport, + const struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvmf_listener *listener; + int rc; + + listener = nvmf_transport_find_listener(transport, trid); + if (!listener) { + listener = calloc(1, sizeof(*listener)); + if (!listener) { + return -ENOMEM; + } + + listener->ref = 1; + listener->trid = *trid; + TAILQ_INSERT_TAIL(&transport->listeners, listener, link); + + rc = transport->ops->listen(transport, &listener->trid); + if (rc != 0) { + TAILQ_REMOVE(&transport->listeners, listener, link); + free(listener); + } + return rc; + } + + ++listener->ref; + + return 0; +} + +int +spdk_nvmf_transport_stop_listen(struct spdk_nvmf_transport *transport, + const struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvmf_listener *listener; + + listener = nvmf_transport_find_listener(transport, trid); + if (!listener) { + return -ENOENT; + } + + if (--listener->ref == 0) { + TAILQ_REMOVE(&transport->listeners, listener, link); + transport->ops->stop_listen(transport, trid); + free(listener); + } + + return 0; +} + +uint32_t +nvmf_transport_accept(struct spdk_nvmf_transport *transport) +{ + return transport->ops->accept(transport); +} + +void +nvmf_transport_listener_discover(struct spdk_nvmf_transport *transport, + struct spdk_nvme_transport_id *trid, + struct spdk_nvmf_discovery_log_page_entry *entry) +{ + transport->ops->listener_discover(transport, trid, entry); +} + +struct spdk_nvmf_transport_poll_group * +nvmf_transport_poll_group_create(struct spdk_nvmf_transport *transport) +{ + struct spdk_nvmf_transport_poll_group *group; + struct spdk_nvmf_transport_pg_cache_buf *buf; + + group = transport->ops->poll_group_create(transport); + if (!group) { + return NULL; + } + group->transport = transport; + + STAILQ_INIT(&group->pending_buf_queue); + STAILQ_INIT(&group->buf_cache); + + if (transport->opts.buf_cache_size) { + group->buf_cache_count = 0; + group->buf_cache_size = transport->opts.buf_cache_size; + while (group->buf_cache_count < group->buf_cache_size) { + buf = (struct spdk_nvmf_transport_pg_cache_buf *)spdk_mempool_get(transport->data_buf_pool); + if (!buf) { + SPDK_NOTICELOG("Unable to reserve the full number of buffers for the pg buffer cache.\n"); + break; + } + STAILQ_INSERT_HEAD(&group->buf_cache, buf, link); + group->buf_cache_count++; + } + } + return group; +} + +struct spdk_nvmf_transport_poll_group * +nvmf_transport_get_optimal_poll_group(struct spdk_nvmf_transport *transport, + struct spdk_nvmf_qpair *qpair) +{ + if (transport->ops->get_optimal_poll_group) { + return transport->ops->get_optimal_poll_group(qpair); + } else { + return NULL; + } +} + +void +nvmf_transport_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) +{ + struct spdk_nvmf_transport_pg_cache_buf *buf, *tmp; + + if (!STAILQ_EMPTY(&group->pending_buf_queue)) { + SPDK_ERRLOG("Pending I/O list wasn't empty on poll group destruction\n"); + } + + STAILQ_FOREACH_SAFE(buf, &group->buf_cache, link, tmp) { + STAILQ_REMOVE(&group->buf_cache, buf, spdk_nvmf_transport_pg_cache_buf, link); + spdk_mempool_put(group->transport->data_buf_pool, buf); + } + group->transport->ops->poll_group_destroy(group); +} + +int +nvmf_transport_poll_group_add(struct spdk_nvmf_transport_poll_group *group, + struct spdk_nvmf_qpair *qpair) +{ + if (qpair->transport) { + assert(qpair->transport == group->transport); + if (qpair->transport != group->transport) { + return -1; + } + } else { + qpair->transport = group->transport; + } + + return group->transport->ops->poll_group_add(group, qpair); +} + +int +nvmf_transport_poll_group_remove(struct spdk_nvmf_transport_poll_group *group, + struct spdk_nvmf_qpair *qpair) +{ + int rc = ENOTSUP; + + assert(qpair->transport == group->transport); + if (group->transport->ops->poll_group_remove) { + rc = group->transport->ops->poll_group_remove(group, qpair); + } + + return rc; +} + +int +nvmf_transport_poll_group_poll(struct spdk_nvmf_transport_poll_group *group) +{ + return group->transport->ops->poll_group_poll(group); +} + +int +nvmf_transport_req_free(struct spdk_nvmf_request *req) +{ + return req->qpair->transport->ops->req_free(req); +} + +int +nvmf_transport_req_complete(struct spdk_nvmf_request *req) +{ + return req->qpair->transport->ops->req_complete(req); +} + +void +nvmf_transport_qpair_fini(struct spdk_nvmf_qpair *qpair) +{ + qpair->transport->ops->qpair_fini(qpair); +} + +int +nvmf_transport_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid) +{ + return qpair->transport->ops->qpair_get_peer_trid(qpair, trid); +} + +int +nvmf_transport_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid) +{ + return qpair->transport->ops->qpair_get_local_trid(qpair, trid); +} + +int +nvmf_transport_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid) +{ + return qpair->transport->ops->qpair_get_listen_trid(qpair, trid); +} + +void +nvmf_transport_qpair_abort_request(struct spdk_nvmf_qpair *qpair, + struct spdk_nvmf_request *req) +{ + qpair->transport->ops->qpair_abort_request(qpair, req); +} + +bool +spdk_nvmf_transport_opts_init(const char *transport_name, + struct spdk_nvmf_transport_opts *opts) +{ + const struct spdk_nvmf_transport_ops *ops; + + ops = nvmf_get_transport_ops(transport_name); + if (!ops) { + SPDK_ERRLOG("Transport type %s unavailable.\n", transport_name); + return false; + } + + ops->opts_init(opts); + return true; +} + +int +spdk_nvmf_transport_poll_group_get_stat(struct spdk_nvmf_tgt *tgt, + struct spdk_nvmf_transport *transport, + struct spdk_nvmf_transport_poll_group_stat **stat) +{ + if (transport->ops->poll_group_get_stat) { + return transport->ops->poll_group_get_stat(tgt, stat); + } else { + return -ENOTSUP; + } +} + +void +spdk_nvmf_transport_poll_group_free_stat(struct spdk_nvmf_transport *transport, + struct spdk_nvmf_transport_poll_group_stat *stat) +{ + if (transport->ops->poll_group_free_stat) { + transport->ops->poll_group_free_stat(stat); + } +} + +void +spdk_nvmf_request_free_buffers(struct spdk_nvmf_request *req, + struct spdk_nvmf_transport_poll_group *group, + struct spdk_nvmf_transport *transport) +{ + uint32_t i; + + for (i = 0; i < req->iovcnt; i++) { + if (group->buf_cache_count < group->buf_cache_size) { + STAILQ_INSERT_HEAD(&group->buf_cache, + (struct spdk_nvmf_transport_pg_cache_buf *)req->buffers[i], + link); + group->buf_cache_count++; + } else { + spdk_mempool_put(transport->data_buf_pool, req->buffers[i]); + } + req->iov[i].iov_base = NULL; + req->buffers[i] = NULL; + req->iov[i].iov_len = 0; + } + req->data_from_pool = false; +} + +static inline int +nvmf_request_set_buffer(struct spdk_nvmf_request *req, void *buf, uint32_t length, + uint32_t io_unit_size) +{ + req->buffers[req->iovcnt] = buf; + req->iov[req->iovcnt].iov_base = (void *)((uintptr_t)(buf + NVMF_DATA_BUFFER_MASK) & + ~NVMF_DATA_BUFFER_MASK); + req->iov[req->iovcnt].iov_len = spdk_min(length, io_unit_size); + length -= req->iov[req->iovcnt].iov_len; + req->iovcnt++; + + return length; +} + +static int +nvmf_request_get_buffers(struct spdk_nvmf_request *req, + struct spdk_nvmf_transport_poll_group *group, + struct spdk_nvmf_transport *transport, + uint32_t length) +{ + uint32_t io_unit_size = transport->opts.io_unit_size; + uint32_t num_buffers; + uint32_t i = 0, j; + void *buffer, *buffers[NVMF_REQ_MAX_BUFFERS]; + + /* If the number of buffers is too large, then we know the I/O is larger than allowed. + * Fail it. + */ + num_buffers = SPDK_CEIL_DIV(length, io_unit_size); + if (num_buffers + req->iovcnt > NVMF_REQ_MAX_BUFFERS) { + return -EINVAL; + } + + while (i < num_buffers) { + if (!(STAILQ_EMPTY(&group->buf_cache))) { + group->buf_cache_count--; + buffer = STAILQ_FIRST(&group->buf_cache); + STAILQ_REMOVE_HEAD(&group->buf_cache, link); + assert(buffer != NULL); + + length = nvmf_request_set_buffer(req, buffer, length, io_unit_size); + i++; + } else { + if (spdk_mempool_get_bulk(transport->data_buf_pool, buffers, + num_buffers - i)) { + return -ENOMEM; + } + for (j = 0; j < num_buffers - i; j++) { + length = nvmf_request_set_buffer(req, buffers[j], length, io_unit_size); + } + i += num_buffers - i; + } + } + + assert(length == 0); + + req->data_from_pool = true; + return 0; +} + +int +spdk_nvmf_request_get_buffers(struct spdk_nvmf_request *req, + struct spdk_nvmf_transport_poll_group *group, + struct spdk_nvmf_transport *transport, + uint32_t length) +{ + int rc; + + req->iovcnt = 0; + + rc = nvmf_request_get_buffers(req, group, transport, length); + if (rc == -ENOMEM) { + spdk_nvmf_request_free_buffers(req, group, transport); + } + + return rc; +} + +int +spdk_nvmf_request_get_buffers_multi(struct spdk_nvmf_request *req, + struct spdk_nvmf_transport_poll_group *group, + struct spdk_nvmf_transport *transport, + uint32_t *lengths, uint32_t num_lengths) +{ + int rc = 0; + uint32_t i; + + req->iovcnt = 0; + + for (i = 0; i < num_lengths; i++) { + rc = nvmf_request_get_buffers(req, group, transport, lengths[i]); + if (rc != 0) { + goto err_exit; + } + } + + return 0; + +err_exit: + spdk_nvmf_request_free_buffers(req, group, transport); + return rc; +} diff --git a/src/spdk/lib/nvmf/transport.h b/src/spdk/lib/nvmf/transport.h new file mode 100644 index 000000000..38b5d8db3 --- /dev/null +++ b/src/spdk/lib/nvmf/transport.h @@ -0,0 +1,82 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_NVMF_TRANSPORT_H +#define SPDK_NVMF_TRANSPORT_H + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/nvmf.h" +#include "spdk/nvmf_transport.h" + +uint32_t nvmf_transport_accept(struct spdk_nvmf_transport *transport); + +void nvmf_transport_listener_discover(struct spdk_nvmf_transport *transport, + struct spdk_nvme_transport_id *trid, + struct spdk_nvmf_discovery_log_page_entry *entry); + +struct spdk_nvmf_transport_poll_group *nvmf_transport_poll_group_create( + struct spdk_nvmf_transport *transport); +struct spdk_nvmf_transport_poll_group *nvmf_transport_get_optimal_poll_group( + struct spdk_nvmf_transport *transport, struct spdk_nvmf_qpair *qpair); + +void nvmf_transport_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group); + +int nvmf_transport_poll_group_add(struct spdk_nvmf_transport_poll_group *group, + struct spdk_nvmf_qpair *qpair); + +int nvmf_transport_poll_group_remove(struct spdk_nvmf_transport_poll_group *group, + struct spdk_nvmf_qpair *qpair); + +int nvmf_transport_poll_group_poll(struct spdk_nvmf_transport_poll_group *group); + +int nvmf_transport_req_free(struct spdk_nvmf_request *req); + +int nvmf_transport_req_complete(struct spdk_nvmf_request *req); + +void nvmf_transport_qpair_fini(struct spdk_nvmf_qpair *qpair); + +int nvmf_transport_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid); + +int nvmf_transport_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid); + +int nvmf_transport_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, + struct spdk_nvme_transport_id *trid); + +void nvmf_transport_qpair_abort_request(struct spdk_nvmf_qpair *qpair, + struct spdk_nvmf_request *req); + +#endif /* SPDK_NVMF_TRANSPORT_H */ |