diff options
Diffstat (limited to 'src/spdk/lib/nvme/nvme.c')
-rw-r--r-- | src/spdk/lib/nvme/nvme.c | 1423 |
1 files changed, 1423 insertions, 0 deletions
diff --git a/src/spdk/lib/nvme/nvme.c b/src/spdk/lib/nvme/nvme.c new file mode 100644 index 000000000..9393810a6 --- /dev/null +++ b/src/spdk/lib/nvme/nvme.c @@ -0,0 +1,1423 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/nvmf_spec.h" +#include "spdk/string.h" +#include "nvme_internal.h" +#include "nvme_io_msg.h" +#include "nvme_uevent.h" + +#define SPDK_NVME_DRIVER_NAME "spdk_nvme_driver" + +struct nvme_driver *g_spdk_nvme_driver; +pid_t g_spdk_nvme_pid; + +/* gross timeout of 180 seconds in milliseconds */ +static int g_nvme_driver_timeout_ms = 3 * 60 * 1000; + +/* Per-process attached controller list */ +static TAILQ_HEAD(, spdk_nvme_ctrlr) g_nvme_attached_ctrlrs = + TAILQ_HEAD_INITIALIZER(g_nvme_attached_ctrlrs); + +/* Returns true if ctrlr should be stored on the multi-process shared_attached_ctrlrs list */ +static bool +nvme_ctrlr_shared(const struct spdk_nvme_ctrlr *ctrlr) +{ + return ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE; +} + +void +nvme_ctrlr_connected(struct spdk_nvme_probe_ctx *probe_ctx, + struct spdk_nvme_ctrlr *ctrlr) +{ + TAILQ_INSERT_TAIL(&probe_ctx->init_ctrlrs, ctrlr, tailq); +} + +int +spdk_nvme_detach(struct spdk_nvme_ctrlr *ctrlr) +{ + nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); + + nvme_ctrlr_proc_put_ref(ctrlr); + + if (nvme_ctrlr_get_ref_count(ctrlr) == 0) { + nvme_io_msg_ctrlr_detach(ctrlr); + if (nvme_ctrlr_shared(ctrlr)) { + TAILQ_REMOVE(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq); + } else { + TAILQ_REMOVE(&g_nvme_attached_ctrlrs, ctrlr, tailq); + } + nvme_ctrlr_destruct(ctrlr); + } + + nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); + return 0; +} + +void +nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl) +{ + struct nvme_completion_poll_status *status = arg; + + if (status->timed_out) { + /* There is no routine waiting for the completion of this request, free allocated memory */ + free(status); + return; + } + + /* + * Copy status into the argument passed by the caller, so that + * the caller can check the status to determine if the + * the request passed or failed. + */ + memcpy(&status->cpl, cpl, sizeof(*cpl)); + status->done = true; +} + +/** + * Poll qpair for completions until a command completes. + * + * \param qpair queue to poll + * \param status completion status. The user must fill this structure with zeroes before calling + * this function + * \param robust_mutex optional robust mutex to lock while polling qpair + * + * \return 0 if command completed without error, + * -EIO if command completed with error, + * -ECANCELED if command is not completed due to transport/device error + * + * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback + * and status as the callback argument. + */ +int +nvme_wait_for_completion_robust_lock( + struct spdk_nvme_qpair *qpair, + struct nvme_completion_poll_status *status, + pthread_mutex_t *robust_mutex) +{ + int rc; + + while (status->done == false) { + if (robust_mutex) { + nvme_robust_mutex_lock(robust_mutex); + } + + rc = spdk_nvme_qpair_process_completions(qpair, 0); + + if (robust_mutex) { + nvme_robust_mutex_unlock(robust_mutex); + } + + if (rc < 0) { + status->cpl.status.sct = SPDK_NVME_SCT_GENERIC; + status->cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; + if (status->done == false) { + status->timed_out = true; + } + return -ECANCELED; + } + } + + return spdk_nvme_cpl_is_error(&status->cpl) ? -EIO : 0; +} + +int +nvme_wait_for_completion(struct spdk_nvme_qpair *qpair, + struct nvme_completion_poll_status *status) +{ + return nvme_wait_for_completion_robust_lock(qpair, status, NULL); +} + +/** + * Poll qpair for completions until a command completes. + * + * \param qpair queue to poll + * \param status completion status. The user must fill this structure with zeroes before calling + * this function + * \param timeout_in_secs optional timeout + * + * \return 0 if command completed without error, + * -EIO if command completed with error, + * -ECANCELED if command is not completed due to transport/device error or time expired + * + * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback + * and status as the callback argument. + */ +int +nvme_wait_for_completion_timeout(struct spdk_nvme_qpair *qpair, + struct nvme_completion_poll_status *status, + uint64_t timeout_in_secs) +{ + uint64_t timeout_tsc = 0; + int rc = 0; + + if (timeout_in_secs) { + timeout_tsc = spdk_get_ticks() + timeout_in_secs * spdk_get_ticks_hz(); + } + + while (status->done == false) { + rc = spdk_nvme_qpair_process_completions(qpair, 0); + + if (rc < 0) { + status->cpl.status.sct = SPDK_NVME_SCT_GENERIC; + status->cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; + break; + } + if (timeout_tsc && spdk_get_ticks() > timeout_tsc) { + break; + } + } + + if (status->done == false || rc < 0) { + if (status->done == false) { + status->timed_out = true; + } + return -ECANCELED; + } + + return spdk_nvme_cpl_is_error(&status->cpl) ? -EIO : 0; +} + +static void +nvme_user_copy_cmd_complete(void *arg, const struct spdk_nvme_cpl *cpl) +{ + struct nvme_request *req = arg; + enum spdk_nvme_data_transfer xfer; + + if (req->user_buffer && req->payload_size) { + /* Copy back to the user buffer and free the contig buffer */ + assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); + xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc); + if (xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST || + xfer == SPDK_NVME_DATA_BIDIRECTIONAL) { + assert(req->pid == getpid()); + memcpy(req->user_buffer, req->payload.contig_or_cb_arg, req->payload_size); + } + + spdk_free(req->payload.contig_or_cb_arg); + } + + /* Call the user's original callback now that the buffer has been copied */ + req->user_cb_fn(req->user_cb_arg, cpl); +} + +/** + * Allocate a request as well as a DMA-capable buffer to copy to/from the user's buffer. + * + * This is intended for use in non-fast-path functions (admin commands, reservations, etc.) + * where the overhead of a copy is not a problem. + */ +struct nvme_request * +nvme_allocate_request_user_copy(struct spdk_nvme_qpair *qpair, + void *buffer, uint32_t payload_size, spdk_nvme_cmd_cb cb_fn, + void *cb_arg, bool host_to_controller) +{ + struct nvme_request *req; + void *dma_buffer = NULL; + + if (buffer && payload_size) { + dma_buffer = spdk_zmalloc(payload_size, 4096, NULL, + SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); + if (!dma_buffer) { + return NULL; + } + + if (host_to_controller) { + memcpy(dma_buffer, buffer, payload_size); + } + } + + req = nvme_allocate_request_contig(qpair, dma_buffer, payload_size, nvme_user_copy_cmd_complete, + NULL); + if (!req) { + spdk_free(dma_buffer); + return NULL; + } + + req->user_cb_fn = cb_fn; + req->user_cb_arg = cb_arg; + req->user_buffer = buffer; + req->cb_arg = req; + + return req; +} + +/** + * Check if a request has exceeded the controller timeout. + * + * \param req request to check for timeout. + * \param cid command ID for command submitted by req (will be passed to timeout_cb_fn) + * \param active_proc per-process data for the controller associated with req + * \param now_tick current time from spdk_get_ticks() + * \return 0 if requests submitted more recently than req should still be checked for timeouts, or + * 1 if requests newer than req need not be checked. + * + * The request's timeout callback will be called if needed; the caller is only responsible for + * calling this function on each outstanding request. + */ +int +nvme_request_check_timeout(struct nvme_request *req, uint16_t cid, + struct spdk_nvme_ctrlr_process *active_proc, + uint64_t now_tick) +{ + struct spdk_nvme_qpair *qpair = req->qpair; + struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; + + assert(active_proc->timeout_cb_fn != NULL); + + if (req->timed_out || req->submit_tick == 0) { + return 0; + } + + if (req->pid != g_spdk_nvme_pid) { + return 0; + } + + if (nvme_qpair_is_admin_queue(qpair) && + req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { + return 0; + } + + if (req->submit_tick + active_proc->timeout_ticks > now_tick) { + return 1; + } + + req->timed_out = true; + + /* + * We don't want to expose the admin queue to the user, + * so when we're timing out admin commands set the + * qpair to NULL. + */ + active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr, + nvme_qpair_is_admin_queue(qpair) ? NULL : qpair, + cid); + return 0; +} + +int +nvme_robust_mutex_init_shared(pthread_mutex_t *mtx) +{ + int rc = 0; + +#ifdef __FreeBSD__ + pthread_mutex_init(mtx, NULL); +#else + pthread_mutexattr_t attr; + + if (pthread_mutexattr_init(&attr)) { + return -1; + } + if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) || + pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) || + pthread_mutex_init(mtx, &attr)) { + rc = -1; + } + pthread_mutexattr_destroy(&attr); +#endif + + return rc; +} + +int +nvme_driver_init(void) +{ + static pthread_mutex_t g_init_mutex = PTHREAD_MUTEX_INITIALIZER; + int ret = 0; + /* Any socket ID */ + int socket_id = -1; + + /* Use a special process-private mutex to ensure the global + * nvme driver object (g_spdk_nvme_driver) gets initialized by + * only one thread. Once that object is established and its + * mutex is initialized, we can unlock this mutex and use that + * one instead. + */ + pthread_mutex_lock(&g_init_mutex); + + /* Each process needs its own pid. */ + g_spdk_nvme_pid = getpid(); + + /* + * Only one thread from one process will do this driver init work. + * The primary process will reserve the shared memory and do the + * initialization. + * The secondary process will lookup the existing reserved memory. + */ + if (spdk_process_is_primary()) { + /* The unique named memzone already reserved. */ + if (g_spdk_nvme_driver != NULL) { + pthread_mutex_unlock(&g_init_mutex); + return 0; + } else { + g_spdk_nvme_driver = spdk_memzone_reserve(SPDK_NVME_DRIVER_NAME, + sizeof(struct nvme_driver), socket_id, + SPDK_MEMZONE_NO_IOVA_CONTIG); + } + + if (g_spdk_nvme_driver == NULL) { + SPDK_ERRLOG("primary process failed to reserve memory\n"); + pthread_mutex_unlock(&g_init_mutex); + return -1; + } + } else { + g_spdk_nvme_driver = spdk_memzone_lookup(SPDK_NVME_DRIVER_NAME); + + /* The unique named memzone already reserved by the primary process. */ + if (g_spdk_nvme_driver != NULL) { + int ms_waited = 0; + + /* Wait the nvme driver to get initialized. */ + while ((g_spdk_nvme_driver->initialized == false) && + (ms_waited < g_nvme_driver_timeout_ms)) { + ms_waited++; + nvme_delay(1000); /* delay 1ms */ + } + if (g_spdk_nvme_driver->initialized == false) { + SPDK_ERRLOG("timeout waiting for primary process to init\n"); + pthread_mutex_unlock(&g_init_mutex); + return -1; + } + } else { + SPDK_ERRLOG("primary process is not started yet\n"); + pthread_mutex_unlock(&g_init_mutex); + return -1; + } + + pthread_mutex_unlock(&g_init_mutex); + return 0; + } + + /* + * At this moment, only one thread from the primary process will do + * the g_spdk_nvme_driver initialization + */ + assert(spdk_process_is_primary()); + + ret = nvme_robust_mutex_init_shared(&g_spdk_nvme_driver->lock); + if (ret != 0) { + SPDK_ERRLOG("failed to initialize mutex\n"); + spdk_memzone_free(SPDK_NVME_DRIVER_NAME); + pthread_mutex_unlock(&g_init_mutex); + return ret; + } + + /* The lock in the shared g_spdk_nvme_driver object is now ready to + * be used - so we can unlock the g_init_mutex here. + */ + pthread_mutex_unlock(&g_init_mutex); + nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); + + g_spdk_nvme_driver->initialized = false; + g_spdk_nvme_driver->hotplug_fd = nvme_uevent_connect(); + if (g_spdk_nvme_driver->hotplug_fd < 0) { + SPDK_DEBUGLOG(SPDK_LOG_NVME, "Failed to open uevent netlink socket\n"); + } + + TAILQ_INIT(&g_spdk_nvme_driver->shared_attached_ctrlrs); + + spdk_uuid_generate(&g_spdk_nvme_driver->default_extended_host_id); + + nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); + + return ret; +} + +/* This function must only be called while holding g_spdk_nvme_driver->lock */ +int +nvme_ctrlr_probe(const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_probe_ctx *probe_ctx, void *devhandle) +{ + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ctrlr_opts opts; + + assert(trid != NULL); + + spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts)); + + if (!probe_ctx->probe_cb || probe_ctx->probe_cb(probe_ctx->cb_ctx, trid, &opts)) { + ctrlr = nvme_get_ctrlr_by_trid_unsafe(trid); + if (ctrlr) { + /* This ctrlr already exists. + * Increase the ref count before calling attach_cb() as the user may + * call nvme_detach() immediately. */ + nvme_ctrlr_proc_get_ref(ctrlr); + + if (probe_ctx->attach_cb) { + nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); + probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts); + nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); + } + return 0; + } + + ctrlr = nvme_transport_ctrlr_construct(trid, &opts, devhandle); + if (ctrlr == NULL) { + SPDK_ERRLOG("Failed to construct NVMe controller for SSD: %s\n", trid->traddr); + return -1; + } + ctrlr->remove_cb = probe_ctx->remove_cb; + ctrlr->cb_ctx = probe_ctx->cb_ctx; + + if (ctrlr->quirks & NVME_QUIRK_MINIMUM_IO_QUEUE_SIZE && + ctrlr->opts.io_queue_size == DEFAULT_IO_QUEUE_SIZE) { + /* If the user specifically set an IO queue size different than the + * default, use that value. Otherwise overwrite with the quirked value. + * This allows this quirk to be overridden when necessary. + * However, cap.mqes still needs to be respected. + */ + ctrlr->opts.io_queue_size = spdk_min(DEFAULT_IO_QUEUE_SIZE_FOR_QUIRK, ctrlr->cap.bits.mqes + 1u); + } + + nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED); + TAILQ_INSERT_TAIL(&probe_ctx->init_ctrlrs, ctrlr, tailq); + return 0; + } + + return 1; +} + +static int +nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_probe_ctx *probe_ctx) +{ + int rc = 0; + + rc = nvme_ctrlr_process_init(ctrlr); + + if (rc) { + /* Controller failed to initialize. */ + TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq); + SPDK_ERRLOG("Failed to initialize SSD: %s\n", ctrlr->trid.traddr); + nvme_ctrlr_fail(ctrlr, false); + nvme_ctrlr_destruct(ctrlr); + return rc; + } + + if (ctrlr->state != NVME_CTRLR_STATE_READY) { + return 0; + } + + STAILQ_INIT(&ctrlr->io_producers); + + /* + * Controller has been initialized. + * Move it to the attached_ctrlrs list. + */ + TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq); + + nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); + if (nvme_ctrlr_shared(ctrlr)) { + TAILQ_INSERT_TAIL(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq); + } else { + TAILQ_INSERT_TAIL(&g_nvme_attached_ctrlrs, ctrlr, tailq); + } + + /* + * Increase the ref count before calling attach_cb() as the user may + * call nvme_detach() immediately. + */ + nvme_ctrlr_proc_get_ref(ctrlr); + nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); + + if (probe_ctx->attach_cb) { + probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts); + return 0; + } + + return 0; +} + +static int +nvme_init_controllers(struct spdk_nvme_probe_ctx *probe_ctx) +{ + int rc = 0; + + while (true) { + rc = spdk_nvme_probe_poll_async(probe_ctx); + if (rc != -EAGAIN) { + return rc; + } + } + + return rc; +} + +/* This function must not be called while holding g_spdk_nvme_driver->lock */ +static struct spdk_nvme_ctrlr * +nvme_get_ctrlr_by_trid(const struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvme_ctrlr *ctrlr; + + nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); + ctrlr = nvme_get_ctrlr_by_trid_unsafe(trid); + nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); + + return ctrlr; +} + +/* This function must be called while holding g_spdk_nvme_driver->lock */ +struct spdk_nvme_ctrlr * +nvme_get_ctrlr_by_trid_unsafe(const struct spdk_nvme_transport_id *trid) +{ + struct spdk_nvme_ctrlr *ctrlr; + + /* Search per-process list */ + TAILQ_FOREACH(ctrlr, &g_nvme_attached_ctrlrs, tailq) { + if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) == 0) { + return ctrlr; + } + } + + /* Search multi-process shared list */ + TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) { + if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) == 0) { + return ctrlr; + } + } + + return NULL; +} + +/* This function must only be called while holding g_spdk_nvme_driver->lock */ +static int +nvme_probe_internal(struct spdk_nvme_probe_ctx *probe_ctx, + bool direct_connect) +{ + int rc; + struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp; + + spdk_nvme_trid_populate_transport(&probe_ctx->trid, probe_ctx->trid.trtype); + if (!spdk_nvme_transport_available_by_name(probe_ctx->trid.trstring)) { + SPDK_ERRLOG("NVMe trtype %u not available\n", probe_ctx->trid.trtype); + return -1; + } + + nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); + + rc = nvme_transport_ctrlr_scan(probe_ctx, direct_connect); + if (rc != 0) { + SPDK_ERRLOG("NVMe ctrlr scan failed\n"); + TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) { + TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq); + nvme_transport_ctrlr_destruct(ctrlr); + } + nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); + return -1; + } + + /* + * Probe controllers on the shared_attached_ctrlrs list + */ + if (!spdk_process_is_primary() && (probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE)) { + TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) { + /* Do not attach other ctrlrs if user specify a valid trid */ + if ((strlen(probe_ctx->trid.traddr) != 0) && + (spdk_nvme_transport_id_compare(&probe_ctx->trid, &ctrlr->trid))) { + continue; + } + + /* Do not attach if we failed to initialize it in this process */ + if (nvme_ctrlr_get_current_process(ctrlr) == NULL) { + continue; + } + + nvme_ctrlr_proc_get_ref(ctrlr); + + /* + * Unlock while calling attach_cb() so the user can call other functions + * that may take the driver lock, like nvme_detach(). + */ + if (probe_ctx->attach_cb) { + nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); + probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts); + nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); + } + } + } + + nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); + + return 0; +} + +static void +nvme_probe_ctx_init(struct spdk_nvme_probe_ctx *probe_ctx, + const struct spdk_nvme_transport_id *trid, + void *cb_ctx, + spdk_nvme_probe_cb probe_cb, + spdk_nvme_attach_cb attach_cb, + spdk_nvme_remove_cb remove_cb) +{ + probe_ctx->trid = *trid; + probe_ctx->cb_ctx = cb_ctx; + probe_ctx->probe_cb = probe_cb; + probe_ctx->attach_cb = attach_cb; + probe_ctx->remove_cb = remove_cb; + TAILQ_INIT(&probe_ctx->init_ctrlrs); +} + +int +spdk_nvme_probe(const struct spdk_nvme_transport_id *trid, void *cb_ctx, + spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb attach_cb, + spdk_nvme_remove_cb remove_cb) +{ + struct spdk_nvme_transport_id trid_pcie; + struct spdk_nvme_probe_ctx *probe_ctx; + + if (trid == NULL) { + memset(&trid_pcie, 0, sizeof(trid_pcie)); + spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE); + trid = &trid_pcie; + } + + probe_ctx = spdk_nvme_probe_async(trid, cb_ctx, probe_cb, + attach_cb, remove_cb); + if (!probe_ctx) { + SPDK_ERRLOG("Create probe context failed\n"); + return -1; + } + + /* + * Keep going even if one or more nvme_attach() calls failed, + * but maintain the value of rc to signal errors when we return. + */ + return nvme_init_controllers(probe_ctx); +} + +static bool +nvme_connect_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + struct spdk_nvme_ctrlr_opts *requested_opts = cb_ctx; + + assert(requested_opts); + memcpy(opts, requested_opts, sizeof(*opts)); + + return true; +} + +static void +nvme_ctrlr_opts_init(struct spdk_nvme_ctrlr_opts *opts, + const struct spdk_nvme_ctrlr_opts *opts_user, + size_t opts_size_user) +{ + assert(opts); + assert(opts_user); + + spdk_nvme_ctrlr_get_default_ctrlr_opts(opts, opts_size_user); + +#define FIELD_OK(field) \ + offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= (opts->opts_size) + + if (FIELD_OK(num_io_queues)) { + opts->num_io_queues = opts_user->num_io_queues; + } + + if (FIELD_OK(use_cmb_sqs)) { + opts->use_cmb_sqs = opts_user->use_cmb_sqs; + } + + if (FIELD_OK(no_shn_notification)) { + opts->no_shn_notification = opts_user->no_shn_notification; + } + + if (FIELD_OK(arb_mechanism)) { + opts->arb_mechanism = opts_user->arb_mechanism; + } + + if (FIELD_OK(arbitration_burst)) { + opts->arbitration_burst = opts_user->arbitration_burst; + } + + if (FIELD_OK(low_priority_weight)) { + opts->low_priority_weight = opts_user->low_priority_weight; + } + + if (FIELD_OK(medium_priority_weight)) { + opts->medium_priority_weight = opts_user->medium_priority_weight; + } + + if (FIELD_OK(high_priority_weight)) { + opts->high_priority_weight = opts_user->high_priority_weight; + } + + if (FIELD_OK(keep_alive_timeout_ms)) { + opts->keep_alive_timeout_ms = opts_user->keep_alive_timeout_ms; + } + + if (FIELD_OK(transport_retry_count)) { + opts->transport_retry_count = opts_user->transport_retry_count; + } + + if (FIELD_OK(io_queue_size)) { + opts->io_queue_size = opts_user->io_queue_size; + } + + if (FIELD_OK(hostnqn)) { + memcpy(opts->hostnqn, opts_user->hostnqn, sizeof(opts_user->hostnqn)); + } + + if (FIELD_OK(io_queue_requests)) { + opts->io_queue_requests = opts_user->io_queue_requests; + } + + if (FIELD_OK(src_addr)) { + memcpy(opts->src_addr, opts_user->src_addr, sizeof(opts_user->src_addr)); + } + + if (FIELD_OK(src_svcid)) { + memcpy(opts->src_svcid, opts_user->src_svcid, sizeof(opts_user->src_svcid)); + } + + if (FIELD_OK(host_id)) { + memcpy(opts->host_id, opts_user->host_id, sizeof(opts_user->host_id)); + } + if (FIELD_OK(extended_host_id)) { + memcpy(opts->extended_host_id, opts_user->extended_host_id, + sizeof(opts_user->extended_host_id)); + } + + if (FIELD_OK(command_set)) { + opts->command_set = opts_user->command_set; + } + + if (FIELD_OK(admin_timeout_ms)) { + opts->admin_timeout_ms = opts_user->admin_timeout_ms; + } + + if (FIELD_OK(header_digest)) { + opts->header_digest = opts_user->header_digest; + } + + if (FIELD_OK(data_digest)) { + opts->data_digest = opts_user->data_digest; + } + + if (FIELD_OK(disable_error_logging)) { + opts->disable_error_logging = opts_user->disable_error_logging; + } + + if (FIELD_OK(transport_ack_timeout)) { + opts->transport_ack_timeout = opts_user->transport_ack_timeout; + } + + if (FIELD_OK(admin_queue_size)) { + opts->admin_queue_size = opts_user->admin_queue_size; + } +#undef FIELD_OK +} + +struct spdk_nvme_ctrlr * +spdk_nvme_connect(const struct spdk_nvme_transport_id *trid, + const struct spdk_nvme_ctrlr_opts *opts, size_t opts_size) +{ + int rc; + struct spdk_nvme_ctrlr *ctrlr = NULL; + struct spdk_nvme_probe_ctx *probe_ctx; + struct spdk_nvme_ctrlr_opts *opts_local_p = NULL; + struct spdk_nvme_ctrlr_opts opts_local; + + if (trid == NULL) { + SPDK_ERRLOG("No transport ID specified\n"); + return NULL; + } + + if (opts) { + opts_local_p = &opts_local; + nvme_ctrlr_opts_init(opts_local_p, opts, opts_size); + } + + probe_ctx = spdk_nvme_connect_async(trid, opts_local_p, NULL); + if (!probe_ctx) { + SPDK_ERRLOG("Create probe context failed\n"); + return NULL; + } + + rc = nvme_init_controllers(probe_ctx); + if (rc != 0) { + return NULL; + } + + ctrlr = nvme_get_ctrlr_by_trid(trid); + + return ctrlr; +} + +void +spdk_nvme_trid_populate_transport(struct spdk_nvme_transport_id *trid, + enum spdk_nvme_transport_type trtype) +{ + const char *trstring = ""; + + trid->trtype = trtype; + switch (trtype) { + case SPDK_NVME_TRANSPORT_FC: + trstring = SPDK_NVME_TRANSPORT_NAME_FC; + break; + case SPDK_NVME_TRANSPORT_PCIE: + trstring = SPDK_NVME_TRANSPORT_NAME_PCIE; + break; + case SPDK_NVME_TRANSPORT_RDMA: + trstring = SPDK_NVME_TRANSPORT_NAME_RDMA; + break; + case SPDK_NVME_TRANSPORT_TCP: + trstring = SPDK_NVME_TRANSPORT_NAME_TCP; + break; + case SPDK_NVME_TRANSPORT_CUSTOM: + default: + SPDK_ERRLOG("don't use this for custom transports\n"); + assert(0); + return; + } + snprintf(trid->trstring, SPDK_NVMF_TRSTRING_MAX_LEN, "%s", trstring); +} + +int +spdk_nvme_transport_id_populate_trstring(struct spdk_nvme_transport_id *trid, const char *trstring) +{ + int len, i, rc; + + if (trstring == NULL) { + return -EINVAL; + } + + len = strnlen(trstring, SPDK_NVMF_TRSTRING_MAX_LEN); + if (len == SPDK_NVMF_TRSTRING_MAX_LEN) { + return -EINVAL; + } + + rc = snprintf(trid->trstring, SPDK_NVMF_TRSTRING_MAX_LEN, "%s", trstring); + if (rc < 0) { + return rc; + } + + /* cast official trstring to uppercase version of input. */ + for (i = 0; i < len; i++) { + trid->trstring[i] = toupper(trid->trstring[i]); + } + return 0; +} + +int +spdk_nvme_transport_id_parse_trtype(enum spdk_nvme_transport_type *trtype, const char *str) +{ + if (trtype == NULL || str == NULL) { + return -EINVAL; + } + + if (strcasecmp(str, "PCIe") == 0) { + *trtype = SPDK_NVME_TRANSPORT_PCIE; + } else if (strcasecmp(str, "RDMA") == 0) { + *trtype = SPDK_NVME_TRANSPORT_RDMA; + } else if (strcasecmp(str, "FC") == 0) { + *trtype = SPDK_NVME_TRANSPORT_FC; + } else if (strcasecmp(str, "TCP") == 0) { + *trtype = SPDK_NVME_TRANSPORT_TCP; + } else { + *trtype = SPDK_NVME_TRANSPORT_CUSTOM; + } + return 0; +} + +const char * +spdk_nvme_transport_id_trtype_str(enum spdk_nvme_transport_type trtype) +{ + switch (trtype) { + case SPDK_NVME_TRANSPORT_PCIE: + return "PCIe"; + case SPDK_NVME_TRANSPORT_RDMA: + return "RDMA"; + case SPDK_NVME_TRANSPORT_FC: + return "FC"; + case SPDK_NVME_TRANSPORT_TCP: + return "TCP"; + case SPDK_NVME_TRANSPORT_CUSTOM: + return "CUSTOM"; + default: + return NULL; + } +} + +int +spdk_nvme_transport_id_parse_adrfam(enum spdk_nvmf_adrfam *adrfam, const char *str) +{ + if (adrfam == NULL || str == NULL) { + return -EINVAL; + } + + if (strcasecmp(str, "IPv4") == 0) { + *adrfam = SPDK_NVMF_ADRFAM_IPV4; + } else if (strcasecmp(str, "IPv6") == 0) { + *adrfam = SPDK_NVMF_ADRFAM_IPV6; + } else if (strcasecmp(str, "IB") == 0) { + *adrfam = SPDK_NVMF_ADRFAM_IB; + } else if (strcasecmp(str, "FC") == 0) { + *adrfam = SPDK_NVMF_ADRFAM_FC; + } else { + return -ENOENT; + } + return 0; +} + +const char * +spdk_nvme_transport_id_adrfam_str(enum spdk_nvmf_adrfam adrfam) +{ + switch (adrfam) { + case SPDK_NVMF_ADRFAM_IPV4: + return "IPv4"; + case SPDK_NVMF_ADRFAM_IPV6: + return "IPv6"; + case SPDK_NVMF_ADRFAM_IB: + return "IB"; + case SPDK_NVMF_ADRFAM_FC: + return "FC"; + default: + return NULL; + } +} + +static size_t +parse_next_key(const char **str, char *key, char *val, size_t key_buf_size, size_t val_buf_size) +{ + + const char *sep, *sep1; + const char *whitespace = " \t\n"; + size_t key_len, val_len; + + *str += strspn(*str, whitespace); + + sep = strchr(*str, ':'); + if (!sep) { + sep = strchr(*str, '='); + if (!sep) { + SPDK_ERRLOG("Key without ':' or '=' separator\n"); + return 0; + } + } else { + sep1 = strchr(*str, '='); + if ((sep1 != NULL) && (sep1 < sep)) { + sep = sep1; + } + } + + key_len = sep - *str; + if (key_len >= key_buf_size) { + SPDK_ERRLOG("Key length %zu greater than maximum allowed %zu\n", + key_len, key_buf_size - 1); + return 0; + } + + memcpy(key, *str, key_len); + key[key_len] = '\0'; + + *str += key_len + 1; /* Skip key: */ + val_len = strcspn(*str, whitespace); + if (val_len == 0) { + SPDK_ERRLOG("Key without value\n"); + return 0; + } + + if (val_len >= val_buf_size) { + SPDK_ERRLOG("Value length %zu greater than maximum allowed %zu\n", + val_len, val_buf_size - 1); + return 0; + } + + memcpy(val, *str, val_len); + val[val_len] = '\0'; + + *str += val_len; + + return val_len; +} + +int +spdk_nvme_transport_id_parse(struct spdk_nvme_transport_id *trid, const char *str) +{ + size_t val_len; + char key[32]; + char val[1024]; + + if (trid == NULL || str == NULL) { + return -EINVAL; + } + + while (*str != '\0') { + + val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val)); + + if (val_len == 0) { + SPDK_ERRLOG("Failed to parse transport ID\n"); + return -EINVAL; + } + + if (strcasecmp(key, "trtype") == 0) { + if (spdk_nvme_transport_id_populate_trstring(trid, val) != 0) { + SPDK_ERRLOG("invalid transport '%s'\n", val); + return -EINVAL; + } + if (spdk_nvme_transport_id_parse_trtype(&trid->trtype, val) != 0) { + SPDK_ERRLOG("Unknown trtype '%s'\n", val); + return -EINVAL; + } + } else if (strcasecmp(key, "adrfam") == 0) { + if (spdk_nvme_transport_id_parse_adrfam(&trid->adrfam, val) != 0) { + SPDK_ERRLOG("Unknown adrfam '%s'\n", val); + return -EINVAL; + } + } else if (strcasecmp(key, "traddr") == 0) { + if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) { + SPDK_ERRLOG("traddr length %zu greater than maximum allowed %u\n", + val_len, SPDK_NVMF_TRADDR_MAX_LEN); + return -EINVAL; + } + memcpy(trid->traddr, val, val_len + 1); + } else if (strcasecmp(key, "trsvcid") == 0) { + if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) { + SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n", + val_len, SPDK_NVMF_TRSVCID_MAX_LEN); + return -EINVAL; + } + memcpy(trid->trsvcid, val, val_len + 1); + } else if (strcasecmp(key, "priority") == 0) { + if (val_len > SPDK_NVMF_PRIORITY_MAX_LEN) { + SPDK_ERRLOG("priority length %zu greater than maximum allowed %u\n", + val_len, SPDK_NVMF_PRIORITY_MAX_LEN); + return -EINVAL; + } + trid->priority = spdk_strtol(val, 10); + } else if (strcasecmp(key, "subnqn") == 0) { + if (val_len > SPDK_NVMF_NQN_MAX_LEN) { + SPDK_ERRLOG("subnqn length %zu greater than maximum allowed %u\n", + val_len, SPDK_NVMF_NQN_MAX_LEN); + return -EINVAL; + } + memcpy(trid->subnqn, val, val_len + 1); + } else if (strcasecmp(key, "hostaddr") == 0) { + continue; + } else if (strcasecmp(key, "hostsvcid") == 0) { + continue; + } else if (strcasecmp(key, "ns") == 0) { + /* + * Special case. The namespace id parameter may + * optionally be passed in the transport id string + * for an SPDK application (e.g. nvme/perf) + * and additionally parsed therein to limit + * targeting a specific namespace. For this + * scenario, just silently ignore this key + * rather than letting it default to logging + * it as an invalid key. + */ + continue; + } else if (strcasecmp(key, "alt_traddr") == 0) { + /* + * Used by applications for enabling transport ID failover. + * Please see the case above for more information on custom parameters. + */ + continue; + } else { + SPDK_ERRLOG("Unknown transport ID key '%s'\n", key); + } + } + + return 0; +} + +int +spdk_nvme_host_id_parse(struct spdk_nvme_host_id *hostid, const char *str) +{ + + size_t key_size = 32; + size_t val_size = 1024; + size_t val_len; + char key[key_size]; + char val[val_size]; + + if (hostid == NULL || str == NULL) { + return -EINVAL; + } + + while (*str != '\0') { + + val_len = parse_next_key(&str, key, val, key_size, val_size); + + if (val_len == 0) { + SPDK_ERRLOG("Failed to parse host ID\n"); + return val_len; + } + + /* Ignore the rest of the options from the transport ID. */ + if (strcasecmp(key, "trtype") == 0) { + continue; + } else if (strcasecmp(key, "adrfam") == 0) { + continue; + } else if (strcasecmp(key, "traddr") == 0) { + continue; + } else if (strcasecmp(key, "trsvcid") == 0) { + continue; + } else if (strcasecmp(key, "subnqn") == 0) { + continue; + } else if (strcasecmp(key, "priority") == 0) { + continue; + } else if (strcasecmp(key, "ns") == 0) { + continue; + } else if (strcasecmp(key, "hostaddr") == 0) { + if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) { + SPDK_ERRLOG("hostaddr length %zu greater than maximum allowed %u\n", + val_len, SPDK_NVMF_TRADDR_MAX_LEN); + return -EINVAL; + } + memcpy(hostid->hostaddr, val, val_len + 1); + + } else if (strcasecmp(key, "hostsvcid") == 0) { + if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) { + SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n", + val_len, SPDK_NVMF_TRSVCID_MAX_LEN); + return -EINVAL; + } + memcpy(hostid->hostsvcid, val, val_len + 1); + } else { + SPDK_ERRLOG("Unknown transport ID key '%s'\n", key); + } + } + + return 0; +} + +static int +cmp_int(int a, int b) +{ + return a - b; +} + +int +spdk_nvme_transport_id_compare(const struct spdk_nvme_transport_id *trid1, + const struct spdk_nvme_transport_id *trid2) +{ + int cmp; + + if (trid1->trtype == SPDK_NVME_TRANSPORT_CUSTOM) { + cmp = strcasecmp(trid1->trstring, trid2->trstring); + } else { + cmp = cmp_int(trid1->trtype, trid2->trtype); + } + + if (cmp) { + return cmp; + } + + if (trid1->trtype == SPDK_NVME_TRANSPORT_PCIE) { + struct spdk_pci_addr pci_addr1 = {}; + struct spdk_pci_addr pci_addr2 = {}; + + /* Normalize PCI addresses before comparing */ + if (spdk_pci_addr_parse(&pci_addr1, trid1->traddr) < 0 || + spdk_pci_addr_parse(&pci_addr2, trid2->traddr) < 0) { + return -1; + } + + /* PCIe transport ID only uses trtype and traddr */ + return spdk_pci_addr_compare(&pci_addr1, &pci_addr2); + } + + cmp = strcasecmp(trid1->traddr, trid2->traddr); + if (cmp) { + return cmp; + } + + cmp = cmp_int(trid1->adrfam, trid2->adrfam); + if (cmp) { + return cmp; + } + + cmp = strcasecmp(trid1->trsvcid, trid2->trsvcid); + if (cmp) { + return cmp; + } + + cmp = strcmp(trid1->subnqn, trid2->subnqn); + if (cmp) { + return cmp; + } + + return 0; +} + +int +spdk_nvme_prchk_flags_parse(uint32_t *prchk_flags, const char *str) +{ + size_t val_len; + char key[32]; + char val[1024]; + + if (prchk_flags == NULL || str == NULL) { + return -EINVAL; + } + + while (*str != '\0') { + val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val)); + + if (val_len == 0) { + SPDK_ERRLOG("Failed to parse prchk\n"); + return -EINVAL; + } + + if (strcasecmp(key, "prchk") == 0) { + if (strcasestr(val, "reftag") != NULL) { + *prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; + } + if (strcasestr(val, "guard") != NULL) { + *prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD; + } + } else { + SPDK_ERRLOG("Unknown key '%s'\n", key); + return -EINVAL; + } + } + + return 0; +} + +const char * +spdk_nvme_prchk_flags_str(uint32_t prchk_flags) +{ + if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) { + if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { + return "prchk:reftag|guard"; + } else { + return "prchk:reftag"; + } + } else { + if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { + return "prchk:guard"; + } else { + return NULL; + } + } +} + +struct spdk_nvme_probe_ctx * +spdk_nvme_probe_async(const struct spdk_nvme_transport_id *trid, + void *cb_ctx, + spdk_nvme_probe_cb probe_cb, + spdk_nvme_attach_cb attach_cb, + spdk_nvme_remove_cb remove_cb) +{ + int rc; + struct spdk_nvme_probe_ctx *probe_ctx; + + rc = nvme_driver_init(); + if (rc != 0) { + return NULL; + } + + probe_ctx = calloc(1, sizeof(*probe_ctx)); + if (!probe_ctx) { + return NULL; + } + + nvme_probe_ctx_init(probe_ctx, trid, cb_ctx, probe_cb, attach_cb, remove_cb); + rc = nvme_probe_internal(probe_ctx, false); + if (rc != 0) { + free(probe_ctx); + return NULL; + } + + return probe_ctx; +} + +int +spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx) +{ + int rc = 0; + struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp; + + if (!spdk_process_is_primary() && probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { + free(probe_ctx); + return 0; + } + + TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) { + rc = nvme_ctrlr_poll_internal(ctrlr, probe_ctx); + if (rc != 0) { + rc = -EIO; + break; + } + } + + if (rc != 0 || TAILQ_EMPTY(&probe_ctx->init_ctrlrs)) { + nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); + g_spdk_nvme_driver->initialized = true; + nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); + free(probe_ctx); + return rc; + } + + return -EAGAIN; +} + +struct spdk_nvme_probe_ctx * +spdk_nvme_connect_async(const struct spdk_nvme_transport_id *trid, + const struct spdk_nvme_ctrlr_opts *opts, + spdk_nvme_attach_cb attach_cb) +{ + int rc; + spdk_nvme_probe_cb probe_cb = NULL; + struct spdk_nvme_probe_ctx *probe_ctx; + + rc = nvme_driver_init(); + if (rc != 0) { + return NULL; + } + + probe_ctx = calloc(1, sizeof(*probe_ctx)); + if (!probe_ctx) { + return NULL; + } + + if (opts) { + probe_cb = nvme_connect_probe_cb; + } + + nvme_probe_ctx_init(probe_ctx, trid, (void *)opts, probe_cb, attach_cb, NULL); + rc = nvme_probe_internal(probe_ctx, true); + if (rc != 0) { + free(probe_ctx); + return NULL; + } + + return probe_ctx; +} + +SPDK_LOG_REGISTER_COMPONENT("nvme", SPDK_LOG_NVME) |