diff options
Diffstat (limited to '')
-rw-r--r-- | drivers/infiniband/ulp/srp/Kbuild | 2 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srp/Kconfig | 13 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.c | 4225 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.h | 350 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srpt/Kconfig | 13 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srpt/Makefile | 2 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srpt/ib_dm_mad.h | 139 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srpt/ib_srpt.c | 3958 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srpt/ib_srpt.h | 469 |
9 files changed, 9171 insertions, 0 deletions
diff --git a/drivers/infiniband/ulp/srp/Kbuild b/drivers/infiniband/ulp/srp/Kbuild new file mode 100644 index 000000000..d1f4e513b --- /dev/null +++ b/drivers/infiniband/ulp/srp/Kbuild @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_INFINIBAND_SRP) += ib_srp.o diff --git a/drivers/infiniband/ulp/srp/Kconfig b/drivers/infiniband/ulp/srp/Kconfig new file mode 100644 index 000000000..67cd63d13 --- /dev/null +++ b/drivers/infiniband/ulp/srp/Kconfig @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0-only +config INFINIBAND_SRP + tristate "InfiniBand SCSI RDMA Protocol" + depends on SCSI && INFINIBAND_ADDR_TRANS + select SCSI_SRP_ATTRS + help + Support for the SCSI RDMA Protocol over InfiniBand. This + allows you to access storage devices that speak SRP over + InfiniBand. + + The SRP protocol is defined by the INCITS T10 technical + committee. See <http://www.t10.org/>. + diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c new file mode 100644 index 000000000..c4dcef76e --- /dev/null +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -0,0 +1,4225 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/string.h> +#include <linux/parser.h> +#include <linux/random.h> +#include <linux/jiffies.h> +#include <linux/lockdep.h> +#include <linux/inet.h> +#include <rdma/ib_cache.h> + +#include <linux/atomic.h> + +#include <scsi/scsi.h> +#include <scsi/scsi_device.h> +#include <scsi/scsi_dbg.h> +#include <scsi/scsi_tcq.h> +#include <scsi/srp.h> +#include <scsi/scsi_transport_srp.h> + +#include "ib_srp.h" + +#define DRV_NAME "ib_srp" +#define PFX DRV_NAME ": " + +MODULE_AUTHOR("Roland Dreier"); +MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator"); +MODULE_LICENSE("Dual BSD/GPL"); + +#if !defined(CONFIG_DYNAMIC_DEBUG) +#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) +#define DYNAMIC_DEBUG_BRANCH(descriptor) false +#endif + +static unsigned int srp_sg_tablesize; +static unsigned int cmd_sg_entries; +static unsigned int indirect_sg_entries; +static bool allow_ext_sg; +static bool register_always = true; +static bool never_register; +static int topspin_workarounds = 1; + +module_param(srp_sg_tablesize, uint, 0444); +MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries"); + +module_param(cmd_sg_entries, uint, 0444); +MODULE_PARM_DESC(cmd_sg_entries, + "Default number of gather/scatter entries in the SRP command (default is 12, max 255)"); + +module_param(indirect_sg_entries, uint, 0444); +MODULE_PARM_DESC(indirect_sg_entries, + "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")"); + +module_param(allow_ext_sg, bool, 0444); +MODULE_PARM_DESC(allow_ext_sg, + "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)"); + +module_param(topspin_workarounds, int, 0444); +MODULE_PARM_DESC(topspin_workarounds, + "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); + +module_param(register_always, bool, 0444); +MODULE_PARM_DESC(register_always, + "Use memory registration even for contiguous memory regions"); + +module_param(never_register, bool, 0444); +MODULE_PARM_DESC(never_register, "Never register memory"); + +static const struct kernel_param_ops srp_tmo_ops; + +static int srp_reconnect_delay = 10; +module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay, + S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts"); + +static int srp_fast_io_fail_tmo = 15; +module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo, + S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(fast_io_fail_tmo, + "Number of seconds between the observation of a transport" + " layer error and failing all I/O. \"off\" means that this" + " functionality is disabled."); + +static int srp_dev_loss_tmo = 600; +module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo, + S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(dev_loss_tmo, + "Maximum number of seconds that the SRP transport should" + " insulate transport layer errors. After this time has been" + " exceeded the SCSI host is removed. Should be" + " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT) + " if fast_io_fail_tmo has not been set. \"off\" means that" + " this functionality is disabled."); + +static bool srp_use_imm_data = true; +module_param_named(use_imm_data, srp_use_imm_data, bool, 0644); +MODULE_PARM_DESC(use_imm_data, + "Whether or not to request permission to use immediate data during SRP login."); + +static unsigned int srp_max_imm_data = 8 * 1024; +module_param_named(max_imm_data, srp_max_imm_data, uint, 0644); +MODULE_PARM_DESC(max_imm_data, "Maximum immediate data size."); + +static unsigned ch_count; +module_param(ch_count, uint, 0444); +MODULE_PARM_DESC(ch_count, + "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA."); + +static int srp_add_one(struct ib_device *device); +static void srp_remove_one(struct ib_device *device, void *client_data); +static void srp_rename_dev(struct ib_device *device, void *client_data); +static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc); +static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, + const char *opname); +static int srp_ib_cm_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event); +static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event); + +static struct scsi_transport_template *ib_srp_transport_template; +static struct workqueue_struct *srp_remove_wq; + +static struct ib_client srp_client = { + .name = "srp", + .add = srp_add_one, + .remove = srp_remove_one, + .rename = srp_rename_dev +}; + +static struct ib_sa_client srp_sa_client; + +static int srp_tmo_get(char *buffer, const struct kernel_param *kp) +{ + int tmo = *(int *)kp->arg; + + if (tmo >= 0) + return sysfs_emit(buffer, "%d\n", tmo); + else + return sysfs_emit(buffer, "off\n"); +} + +static int srp_tmo_set(const char *val, const struct kernel_param *kp) +{ + int tmo, res; + + res = srp_parse_tmo(&tmo, val); + if (res) + goto out; + + if (kp->arg == &srp_reconnect_delay) + res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo, + srp_dev_loss_tmo); + else if (kp->arg == &srp_fast_io_fail_tmo) + res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo); + else + res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo, + tmo); + if (res) + goto out; + *(int *)kp->arg = tmo; + +out: + return res; +} + +static const struct kernel_param_ops srp_tmo_ops = { + .get = srp_tmo_get, + .set = srp_tmo_set, +}; + +static inline struct srp_target_port *host_to_target(struct Scsi_Host *host) +{ + return (struct srp_target_port *) host->hostdata; +} + +static const char *srp_target_info(struct Scsi_Host *host) +{ + return host_to_target(host)->target_name; +} + +static int srp_target_is_topspin(struct srp_target_port *target) +{ + static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad }; + static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d }; + + return topspin_workarounds && + (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) || + !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui)); +} + +static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size, + gfp_t gfp_mask, + enum dma_data_direction direction) +{ + struct srp_iu *iu; + + iu = kmalloc(sizeof *iu, gfp_mask); + if (!iu) + goto out; + + iu->buf = kzalloc(size, gfp_mask); + if (!iu->buf) + goto out_free_iu; + + iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size, + direction); + if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma)) + goto out_free_buf; + + iu->size = size; + iu->direction = direction; + + return iu; + +out_free_buf: + kfree(iu->buf); +out_free_iu: + kfree(iu); +out: + return NULL; +} + +static void srp_free_iu(struct srp_host *host, struct srp_iu *iu) +{ + if (!iu) + return; + + ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size, + iu->direction); + kfree(iu->buf); + kfree(iu); +} + +static void srp_qp_event(struct ib_event *event, void *context) +{ + pr_debug("QP event %s (%d)\n", + ib_event_msg(event->event), event->event); +} + +static int srp_init_ib_qp(struct srp_target_port *target, + struct ib_qp *qp) +{ + struct ib_qp_attr *attr; + int ret; + + attr = kmalloc(sizeof *attr, GFP_KERNEL); + if (!attr) + return -ENOMEM; + + ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev, + target->srp_host->port, + be16_to_cpu(target->ib_cm.pkey), + &attr->pkey_index); + if (ret) + goto out; + + attr->qp_state = IB_QPS_INIT; + attr->qp_access_flags = (IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE); + attr->port_num = target->srp_host->port; + + ret = ib_modify_qp(qp, attr, + IB_QP_STATE | + IB_QP_PKEY_INDEX | + IB_QP_ACCESS_FLAGS | + IB_QP_PORT); + +out: + kfree(attr); + return ret; +} + +static int srp_new_ib_cm_id(struct srp_rdma_ch *ch) +{ + struct srp_target_port *target = ch->target; + struct ib_cm_id *new_cm_id; + + new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev, + srp_ib_cm_handler, ch); + if (IS_ERR(new_cm_id)) + return PTR_ERR(new_cm_id); + + if (ch->ib_cm.cm_id) + ib_destroy_cm_id(ch->ib_cm.cm_id); + ch->ib_cm.cm_id = new_cm_id; + if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev, + target->srp_host->port)) + ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_OPA; + else + ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_IB; + ch->ib_cm.path.sgid = target->sgid; + ch->ib_cm.path.dgid = target->ib_cm.orig_dgid; + ch->ib_cm.path.pkey = target->ib_cm.pkey; + ch->ib_cm.path.service_id = target->ib_cm.service_id; + + return 0; +} + +static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch) +{ + struct srp_target_port *target = ch->target; + struct rdma_cm_id *new_cm_id; + int ret; + + new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch, + RDMA_PS_TCP, IB_QPT_RC); + if (IS_ERR(new_cm_id)) { + ret = PTR_ERR(new_cm_id); + new_cm_id = NULL; + goto out; + } + + init_completion(&ch->done); + ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ? + &target->rdma_cm.src.sa : NULL, + &target->rdma_cm.dst.sa, + SRP_PATH_REC_TIMEOUT_MS); + if (ret) { + pr_err("No route available from %pISpsc to %pISpsc (%d)\n", + &target->rdma_cm.src, &target->rdma_cm.dst, ret); + goto out; + } + ret = wait_for_completion_interruptible(&ch->done); + if (ret < 0) + goto out; + + ret = ch->status; + if (ret) { + pr_err("Resolving address %pISpsc failed (%d)\n", + &target->rdma_cm.dst, ret); + goto out; + } + + swap(ch->rdma_cm.cm_id, new_cm_id); + +out: + if (new_cm_id) + rdma_destroy_id(new_cm_id); + + return ret; +} + +static int srp_new_cm_id(struct srp_rdma_ch *ch) +{ + struct srp_target_port *target = ch->target; + + return target->using_rdma_cm ? srp_new_rdma_cm_id(ch) : + srp_new_ib_cm_id(ch); +} + +/** + * srp_destroy_fr_pool() - free the resources owned by a pool + * @pool: Fast registration pool to be destroyed. + */ +static void srp_destroy_fr_pool(struct srp_fr_pool *pool) +{ + int i; + struct srp_fr_desc *d; + + if (!pool) + return; + + for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { + if (d->mr) + ib_dereg_mr(d->mr); + } + kfree(pool); +} + +/** + * srp_create_fr_pool() - allocate and initialize a pool for fast registration + * @device: IB device to allocate fast registration descriptors for. + * @pd: Protection domain associated with the FR descriptors. + * @pool_size: Number of descriptors to allocate. + * @max_page_list_len: Maximum fast registration work request page list length. + */ +static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, + struct ib_pd *pd, int pool_size, + int max_page_list_len) +{ + struct srp_fr_pool *pool; + struct srp_fr_desc *d; + struct ib_mr *mr; + int i, ret = -EINVAL; + enum ib_mr_type mr_type; + + if (pool_size <= 0) + goto err; + ret = -ENOMEM; + pool = kzalloc(struct_size(pool, desc, pool_size), GFP_KERNEL); + if (!pool) + goto err; + pool->size = pool_size; + pool->max_page_list_len = max_page_list_len; + spin_lock_init(&pool->lock); + INIT_LIST_HEAD(&pool->free_list); + + if (device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) + mr_type = IB_MR_TYPE_SG_GAPS; + else + mr_type = IB_MR_TYPE_MEM_REG; + + for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { + mr = ib_alloc_mr(pd, mr_type, max_page_list_len); + if (IS_ERR(mr)) { + ret = PTR_ERR(mr); + if (ret == -ENOMEM) + pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n", + dev_name(&device->dev)); + goto destroy_pool; + } + d->mr = mr; + list_add_tail(&d->entry, &pool->free_list); + } + +out: + return pool; + +destroy_pool: + srp_destroy_fr_pool(pool); + +err: + pool = ERR_PTR(ret); + goto out; +} + +/** + * srp_fr_pool_get() - obtain a descriptor suitable for fast registration + * @pool: Pool to obtain descriptor from. + */ +static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool) +{ + struct srp_fr_desc *d = NULL; + unsigned long flags; + + spin_lock_irqsave(&pool->lock, flags); + if (!list_empty(&pool->free_list)) { + d = list_first_entry(&pool->free_list, typeof(*d), entry); + list_del(&d->entry); + } + spin_unlock_irqrestore(&pool->lock, flags); + + return d; +} + +/** + * srp_fr_pool_put() - put an FR descriptor back in the free list + * @pool: Pool the descriptor was allocated from. + * @desc: Pointer to an array of fast registration descriptor pointers. + * @n: Number of descriptors to put back. + * + * Note: The caller must already have queued an invalidation request for + * desc->mr->rkey before calling this function. + */ +static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc, + int n) +{ + unsigned long flags; + int i; + + spin_lock_irqsave(&pool->lock, flags); + for (i = 0; i < n; i++) + list_add(&desc[i]->entry, &pool->free_list); + spin_unlock_irqrestore(&pool->lock, flags); +} + +static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) +{ + struct srp_device *dev = target->srp_host->srp_dev; + + return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size, + dev->max_pages_per_mr); +} + +/** + * srp_destroy_qp() - destroy an RDMA queue pair + * @ch: SRP RDMA channel. + * + * Drain the qp before destroying it. This avoids that the receive + * completion handler can access the queue pair while it is + * being destroyed. + */ +static void srp_destroy_qp(struct srp_rdma_ch *ch) +{ + spin_lock_irq(&ch->lock); + ib_process_cq_direct(ch->send_cq, -1); + spin_unlock_irq(&ch->lock); + + ib_drain_qp(ch->qp); + ib_destroy_qp(ch->qp); +} + +static int srp_create_ch_ib(struct srp_rdma_ch *ch) +{ + struct srp_target_port *target = ch->target; + struct srp_device *dev = target->srp_host->srp_dev; + const struct ib_device_attr *attr = &dev->dev->attrs; + struct ib_qp_init_attr *init_attr; + struct ib_cq *recv_cq, *send_cq; + struct ib_qp *qp; + struct srp_fr_pool *fr_pool = NULL; + const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2; + int ret; + + init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); + if (!init_attr) + return -ENOMEM; + + /* queue_size + 1 for ib_drain_rq() */ + recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1, + ch->comp_vector, IB_POLL_SOFTIRQ); + if (IS_ERR(recv_cq)) { + ret = PTR_ERR(recv_cq); + goto err; + } + + send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size, + ch->comp_vector, IB_POLL_DIRECT); + if (IS_ERR(send_cq)) { + ret = PTR_ERR(send_cq); + goto err_recv_cq; + } + + init_attr->event_handler = srp_qp_event; + init_attr->cap.max_send_wr = m * target->queue_size; + init_attr->cap.max_recv_wr = target->queue_size + 1; + init_attr->cap.max_recv_sge = 1; + init_attr->cap.max_send_sge = min(SRP_MAX_SGE, attr->max_send_sge); + init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; + init_attr->qp_type = IB_QPT_RC; + init_attr->send_cq = send_cq; + init_attr->recv_cq = recv_cq; + + ch->max_imm_sge = min(init_attr->cap.max_send_sge - 1U, 255U); + + if (target->using_rdma_cm) { + ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr); + qp = ch->rdma_cm.cm_id->qp; + } else { + qp = ib_create_qp(dev->pd, init_attr); + if (!IS_ERR(qp)) { + ret = srp_init_ib_qp(target, qp); + if (ret) + ib_destroy_qp(qp); + } else { + ret = PTR_ERR(qp); + } + } + if (ret) { + pr_err("QP creation failed for dev %s: %d\n", + dev_name(&dev->dev->dev), ret); + goto err_send_cq; + } + + if (dev->use_fast_reg) { + fr_pool = srp_alloc_fr_pool(target); + if (IS_ERR(fr_pool)) { + ret = PTR_ERR(fr_pool); + shost_printk(KERN_WARNING, target->scsi_host, PFX + "FR pool allocation failed (%d)\n", ret); + goto err_qp; + } + } + + if (ch->qp) + srp_destroy_qp(ch); + if (ch->recv_cq) + ib_free_cq(ch->recv_cq); + if (ch->send_cq) + ib_free_cq(ch->send_cq); + + ch->qp = qp; + ch->recv_cq = recv_cq; + ch->send_cq = send_cq; + + if (dev->use_fast_reg) { + if (ch->fr_pool) + srp_destroy_fr_pool(ch->fr_pool); + ch->fr_pool = fr_pool; + } + + kfree(init_attr); + return 0; + +err_qp: + if (target->using_rdma_cm) + rdma_destroy_qp(ch->rdma_cm.cm_id); + else + ib_destroy_qp(qp); + +err_send_cq: + ib_free_cq(send_cq); + +err_recv_cq: + ib_free_cq(recv_cq); + +err: + kfree(init_attr); + return ret; +} + +/* + * Note: this function may be called without srp_alloc_iu_bufs() having been + * invoked. Hence the ch->[rt]x_ring checks. + */ +static void srp_free_ch_ib(struct srp_target_port *target, + struct srp_rdma_ch *ch) +{ + struct srp_device *dev = target->srp_host->srp_dev; + int i; + + if (!ch->target) + return; + + if (target->using_rdma_cm) { + if (ch->rdma_cm.cm_id) { + rdma_destroy_id(ch->rdma_cm.cm_id); + ch->rdma_cm.cm_id = NULL; + } + } else { + if (ch->ib_cm.cm_id) { + ib_destroy_cm_id(ch->ib_cm.cm_id); + ch->ib_cm.cm_id = NULL; + } + } + + /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */ + if (!ch->qp) + return; + + if (dev->use_fast_reg) { + if (ch->fr_pool) + srp_destroy_fr_pool(ch->fr_pool); + } + + srp_destroy_qp(ch); + ib_free_cq(ch->send_cq); + ib_free_cq(ch->recv_cq); + + /* + * Avoid that the SCSI error handler tries to use this channel after + * it has been freed. The SCSI error handler can namely continue + * trying to perform recovery actions after scsi_remove_host() + * returned. + */ + ch->target = NULL; + + ch->qp = NULL; + ch->send_cq = ch->recv_cq = NULL; + + if (ch->rx_ring) { + for (i = 0; i < target->queue_size; ++i) + srp_free_iu(target->srp_host, ch->rx_ring[i]); + kfree(ch->rx_ring); + ch->rx_ring = NULL; + } + if (ch->tx_ring) { + for (i = 0; i < target->queue_size; ++i) + srp_free_iu(target->srp_host, ch->tx_ring[i]); + kfree(ch->tx_ring); + ch->tx_ring = NULL; + } +} + +static void srp_path_rec_completion(int status, + struct sa_path_rec *pathrec, + int num_paths, void *ch_ptr) +{ + struct srp_rdma_ch *ch = ch_ptr; + struct srp_target_port *target = ch->target; + + ch->status = status; + if (status) + shost_printk(KERN_ERR, target->scsi_host, + PFX "Got failed path rec status %d\n", status); + else + ch->ib_cm.path = *pathrec; + complete(&ch->done); +} + +static int srp_ib_lookup_path(struct srp_rdma_ch *ch) +{ + struct srp_target_port *target = ch->target; + int ret; + + ch->ib_cm.path.numb_path = 1; + + init_completion(&ch->done); + + ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client, + target->srp_host->srp_dev->dev, + target->srp_host->port, + &ch->ib_cm.path, + IB_SA_PATH_REC_SERVICE_ID | + IB_SA_PATH_REC_DGID | + IB_SA_PATH_REC_SGID | + IB_SA_PATH_REC_NUMB_PATH | + IB_SA_PATH_REC_PKEY, + SRP_PATH_REC_TIMEOUT_MS, + GFP_KERNEL, + srp_path_rec_completion, + ch, &ch->ib_cm.path_query); + if (ch->ib_cm.path_query_id < 0) + return ch->ib_cm.path_query_id; + + ret = wait_for_completion_interruptible(&ch->done); + if (ret < 0) + return ret; + + if (ch->status < 0) + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n", + ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw, + be16_to_cpu(target->ib_cm.pkey), + be64_to_cpu(target->ib_cm.service_id)); + + return ch->status; +} + +static int srp_rdma_lookup_path(struct srp_rdma_ch *ch) +{ + struct srp_target_port *target = ch->target; + int ret; + + init_completion(&ch->done); + + ret = rdma_resolve_route(ch->rdma_cm.cm_id, SRP_PATH_REC_TIMEOUT_MS); + if (ret) + return ret; + + wait_for_completion_interruptible(&ch->done); + + if (ch->status != 0) + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Path resolution failed\n"); + + return ch->status; +} + +static int srp_lookup_path(struct srp_rdma_ch *ch) +{ + struct srp_target_port *target = ch->target; + + return target->using_rdma_cm ? srp_rdma_lookup_path(ch) : + srp_ib_lookup_path(ch); +} + +static u8 srp_get_subnet_timeout(struct srp_host *host) +{ + struct ib_port_attr attr; + int ret; + u8 subnet_timeout = 18; + + ret = ib_query_port(host->srp_dev->dev, host->port, &attr); + if (ret == 0) + subnet_timeout = attr.subnet_timeout; + + if (unlikely(subnet_timeout < 15)) + pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n", + dev_name(&host->srp_dev->dev->dev), subnet_timeout); + + return subnet_timeout; +} + +static int srp_send_req(struct srp_rdma_ch *ch, uint32_t max_iu_len, + bool multich) +{ + struct srp_target_port *target = ch->target; + struct { + struct rdma_conn_param rdma_param; + struct srp_login_req_rdma rdma_req; + struct ib_cm_req_param ib_param; + struct srp_login_req ib_req; + } *req = NULL; + char *ipi, *tpi; + int status; + + req = kzalloc(sizeof *req, GFP_KERNEL); + if (!req) + return -ENOMEM; + + req->ib_param.flow_control = 1; + req->ib_param.retry_count = target->tl_retry_count; + + /* + * Pick some arbitrary defaults here; we could make these + * module parameters if anyone cared about setting them. + */ + req->ib_param.responder_resources = 4; + req->ib_param.rnr_retry_count = 7; + req->ib_param.max_cm_retries = 15; + + req->ib_req.opcode = SRP_LOGIN_REQ; + req->ib_req.tag = 0; + req->ib_req.req_it_iu_len = cpu_to_be32(max_iu_len); + req->ib_req.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | + SRP_BUF_FORMAT_INDIRECT); + req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI : + SRP_MULTICHAN_SINGLE); + if (srp_use_imm_data) { + req->ib_req.req_flags |= SRP_IMMED_REQUESTED; + req->ib_req.imm_data_offset = cpu_to_be16(SRP_IMM_DATA_OFFSET); + } + + if (target->using_rdma_cm) { + req->rdma_param.flow_control = req->ib_param.flow_control; + req->rdma_param.responder_resources = + req->ib_param.responder_resources; + req->rdma_param.initiator_depth = req->ib_param.initiator_depth; + req->rdma_param.retry_count = req->ib_param.retry_count; + req->rdma_param.rnr_retry_count = req->ib_param.rnr_retry_count; + req->rdma_param.private_data = &req->rdma_req; + req->rdma_param.private_data_len = sizeof(req->rdma_req); + + req->rdma_req.opcode = req->ib_req.opcode; + req->rdma_req.tag = req->ib_req.tag; + req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len; + req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt; + req->rdma_req.req_flags = req->ib_req.req_flags; + req->rdma_req.imm_data_offset = req->ib_req.imm_data_offset; + + ipi = req->rdma_req.initiator_port_id; + tpi = req->rdma_req.target_port_id; + } else { + u8 subnet_timeout; + + subnet_timeout = srp_get_subnet_timeout(target->srp_host); + + req->ib_param.primary_path = &ch->ib_cm.path; + req->ib_param.alternate_path = NULL; + req->ib_param.service_id = target->ib_cm.service_id; + get_random_bytes(&req->ib_param.starting_psn, 4); + req->ib_param.starting_psn &= 0xffffff; + req->ib_param.qp_num = ch->qp->qp_num; + req->ib_param.qp_type = ch->qp->qp_type; + req->ib_param.local_cm_response_timeout = subnet_timeout + 2; + req->ib_param.remote_cm_response_timeout = subnet_timeout + 2; + req->ib_param.private_data = &req->ib_req; + req->ib_param.private_data_len = sizeof(req->ib_req); + + ipi = req->ib_req.initiator_port_id; + tpi = req->ib_req.target_port_id; + } + + /* + * In the published SRP specification (draft rev. 16a), the + * port identifier format is 8 bytes of ID extension followed + * by 8 bytes of GUID. Older drafts put the two halves in the + * opposite order, so that the GUID comes first. + * + * Targets conforming to these obsolete drafts can be + * recognized by the I/O Class they report. + */ + if (target->io_class == SRP_REV10_IB_IO_CLASS) { + memcpy(ipi, &target->sgid.global.interface_id, 8); + memcpy(ipi + 8, &target->initiator_ext, 8); + memcpy(tpi, &target->ioc_guid, 8); + memcpy(tpi + 8, &target->id_ext, 8); + } else { + memcpy(ipi, &target->initiator_ext, 8); + memcpy(ipi + 8, &target->sgid.global.interface_id, 8); + memcpy(tpi, &target->id_ext, 8); + memcpy(tpi + 8, &target->ioc_guid, 8); + } + + /* + * Topspin/Cisco SRP targets will reject our login unless we + * zero out the first 8 bytes of our initiator port ID and set + * the second 8 bytes to the local node GUID. + */ + if (srp_target_is_topspin(target)) { + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Topspin/Cisco initiator port ID workaround " + "activated for target GUID %016llx\n", + be64_to_cpu(target->ioc_guid)); + memset(ipi, 0, 8); + memcpy(ipi + 8, &target->srp_host->srp_dev->dev->node_guid, 8); + } + + if (target->using_rdma_cm) + status = rdma_connect(ch->rdma_cm.cm_id, &req->rdma_param); + else + status = ib_send_cm_req(ch->ib_cm.cm_id, &req->ib_param); + + kfree(req); + + return status; +} + +static bool srp_queue_remove_work(struct srp_target_port *target) +{ + bool changed = false; + + spin_lock_irq(&target->lock); + if (target->state != SRP_TARGET_REMOVED) { + target->state = SRP_TARGET_REMOVED; + changed = true; + } + spin_unlock_irq(&target->lock); + + if (changed) + queue_work(srp_remove_wq, &target->remove_work); + + return changed; +} + +static void srp_disconnect_target(struct srp_target_port *target) +{ + struct srp_rdma_ch *ch; + int i, ret; + + /* XXX should send SRP_I_LOGOUT request */ + + for (i = 0; i < target->ch_count; i++) { + ch = &target->ch[i]; + ch->connected = false; + ret = 0; + if (target->using_rdma_cm) { + if (ch->rdma_cm.cm_id) + rdma_disconnect(ch->rdma_cm.cm_id); + } else { + if (ch->ib_cm.cm_id) + ret = ib_send_cm_dreq(ch->ib_cm.cm_id, + NULL, 0); + } + if (ret < 0) { + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Sending CM DREQ failed\n"); + } + } +} + +static int srp_exit_cmd_priv(struct Scsi_Host *shost, struct scsi_cmnd *cmd) +{ + struct srp_target_port *target = host_to_target(shost); + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_device *ibdev = dev->dev; + struct srp_request *req = scsi_cmd_priv(cmd); + + kfree(req->fr_list); + if (req->indirect_dma_addr) { + ib_dma_unmap_single(ibdev, req->indirect_dma_addr, + target->indirect_size, + DMA_TO_DEVICE); + } + kfree(req->indirect_desc); + + return 0; +} + +static int srp_init_cmd_priv(struct Scsi_Host *shost, struct scsi_cmnd *cmd) +{ + struct srp_target_port *target = host_to_target(shost); + struct srp_device *srp_dev = target->srp_host->srp_dev; + struct ib_device *ibdev = srp_dev->dev; + struct srp_request *req = scsi_cmd_priv(cmd); + dma_addr_t dma_addr; + int ret = -ENOMEM; + + if (srp_dev->use_fast_reg) { + req->fr_list = kmalloc_array(target->mr_per_cmd, sizeof(void *), + GFP_KERNEL); + if (!req->fr_list) + goto out; + } + req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); + if (!req->indirect_desc) + goto out; + + dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, + target->indirect_size, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(ibdev, dma_addr)) { + srp_exit_cmd_priv(shost, cmd); + goto out; + } + + req->indirect_dma_addr = dma_addr; + ret = 0; + +out: + return ret; +} + +/** + * srp_del_scsi_host_attr() - Remove attributes defined in the host template. + * @shost: SCSI host whose attributes to remove from sysfs. + * + * Note: Any attributes defined in the host template and that did not exist + * before invocation of this function will be ignored. + */ +static void srp_del_scsi_host_attr(struct Scsi_Host *shost) +{ + const struct attribute_group **g; + struct attribute **attr; + + for (g = shost->hostt->shost_groups; *g; ++g) { + for (attr = (*g)->attrs; *attr; ++attr) { + struct device_attribute *dev_attr = + container_of(*attr, typeof(*dev_attr), attr); + + device_remove_file(&shost->shost_dev, dev_attr); + } + } +} + +static void srp_remove_target(struct srp_target_port *target) +{ + struct srp_rdma_ch *ch; + int i; + + WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); + + srp_del_scsi_host_attr(target->scsi_host); + srp_rport_get(target->rport); + srp_remove_host(target->scsi_host); + scsi_remove_host(target->scsi_host); + srp_stop_rport_timers(target->rport); + srp_disconnect_target(target); + kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net); + for (i = 0; i < target->ch_count; i++) { + ch = &target->ch[i]; + srp_free_ch_ib(target, ch); + } + cancel_work_sync(&target->tl_err_work); + srp_rport_put(target->rport); + kfree(target->ch); + target->ch = NULL; + + spin_lock(&target->srp_host->target_lock); + list_del(&target->list); + spin_unlock(&target->srp_host->target_lock); + + scsi_host_put(target->scsi_host); +} + +static void srp_remove_work(struct work_struct *work) +{ + struct srp_target_port *target = + container_of(work, struct srp_target_port, remove_work); + + WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); + + srp_remove_target(target); +} + +static void srp_rport_delete(struct srp_rport *rport) +{ + struct srp_target_port *target = rport->lld_data; + + srp_queue_remove_work(target); +} + +/** + * srp_connected_ch() - number of connected channels + * @target: SRP target port. + */ +static int srp_connected_ch(struct srp_target_port *target) +{ + int i, c = 0; + + for (i = 0; i < target->ch_count; i++) + c += target->ch[i].connected; + + return c; +} + +static int srp_connect_ch(struct srp_rdma_ch *ch, uint32_t max_iu_len, + bool multich) +{ + struct srp_target_port *target = ch->target; + int ret; + + WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0); + + ret = srp_lookup_path(ch); + if (ret) + goto out; + + while (1) { + init_completion(&ch->done); + ret = srp_send_req(ch, max_iu_len, multich); + if (ret) + goto out; + ret = wait_for_completion_interruptible(&ch->done); + if (ret < 0) + goto out; + + /* + * The CM event handling code will set status to + * SRP_PORT_REDIRECT if we get a port redirect REJ + * back, or SRP_DLID_REDIRECT if we get a lid/qp + * redirect REJ back. + */ + ret = ch->status; + switch (ret) { + case 0: + ch->connected = true; + goto out; + + case SRP_PORT_REDIRECT: + ret = srp_lookup_path(ch); + if (ret) + goto out; + break; + + case SRP_DLID_REDIRECT: + break; + + case SRP_STALE_CONN: + shost_printk(KERN_ERR, target->scsi_host, PFX + "giving up on stale connection\n"); + ret = -ECONNRESET; + goto out; + + default: + goto out; + } + } + +out: + return ret <= 0 ? ret : -ENODEV; +} + +static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc) +{ + srp_handle_qp_err(cq, wc, "INV RKEY"); +} + +static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch, + u32 rkey) +{ + struct ib_send_wr wr = { + .opcode = IB_WR_LOCAL_INV, + .next = NULL, + .num_sge = 0, + .send_flags = 0, + .ex.invalidate_rkey = rkey, + }; + + wr.wr_cqe = &req->reg_cqe; + req->reg_cqe.done = srp_inv_rkey_err_done; + return ib_post_send(ch->qp, &wr, NULL); +} + +static void srp_unmap_data(struct scsi_cmnd *scmnd, + struct srp_rdma_ch *ch, + struct srp_request *req) +{ + struct srp_target_port *target = ch->target; + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_device *ibdev = dev->dev; + int i, res; + + if (!scsi_sglist(scmnd) || + (scmnd->sc_data_direction != DMA_TO_DEVICE && + scmnd->sc_data_direction != DMA_FROM_DEVICE)) + return; + + if (dev->use_fast_reg) { + struct srp_fr_desc **pfr; + + for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) { + res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey); + if (res < 0) { + shost_printk(KERN_ERR, target->scsi_host, PFX + "Queueing INV WR for rkey %#x failed (%d)\n", + (*pfr)->mr->rkey, res); + queue_work(system_long_wq, + &target->tl_err_work); + } + } + if (req->nmdesc) + srp_fr_pool_put(ch->fr_pool, req->fr_list, + req->nmdesc); + } + + ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd), + scmnd->sc_data_direction); +} + +/** + * srp_claim_req - Take ownership of the scmnd associated with a request. + * @ch: SRP RDMA channel. + * @req: SRP request. + * @sdev: If not NULL, only take ownership for this SCSI device. + * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take + * ownership of @req->scmnd if it equals @scmnd. + * + * Return value: + * Either NULL or a pointer to the SCSI command the caller became owner of. + */ +static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch, + struct srp_request *req, + struct scsi_device *sdev, + struct scsi_cmnd *scmnd) +{ + unsigned long flags; + + spin_lock_irqsave(&ch->lock, flags); + if (req->scmnd && + (!sdev || req->scmnd->device == sdev) && + (!scmnd || req->scmnd == scmnd)) { + scmnd = req->scmnd; + req->scmnd = NULL; + } else { + scmnd = NULL; + } + spin_unlock_irqrestore(&ch->lock, flags); + + return scmnd; +} + +/** + * srp_free_req() - Unmap data and adjust ch->req_lim. + * @ch: SRP RDMA channel. + * @req: Request to be freed. + * @scmnd: SCSI command associated with @req. + * @req_lim_delta: Amount to be added to @target->req_lim. + */ +static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req, + struct scsi_cmnd *scmnd, s32 req_lim_delta) +{ + unsigned long flags; + + srp_unmap_data(scmnd, ch, req); + + spin_lock_irqsave(&ch->lock, flags); + ch->req_lim += req_lim_delta; + spin_unlock_irqrestore(&ch->lock, flags); +} + +static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req, + struct scsi_device *sdev, int result) +{ + struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL); + + if (scmnd) { + srp_free_req(ch, req, scmnd, 0); + scmnd->result = result; + scsi_done(scmnd); + } +} + +struct srp_terminate_context { + struct srp_target_port *srp_target; + int scsi_result; +}; + +static bool srp_terminate_cmd(struct scsi_cmnd *scmnd, void *context_ptr) +{ + struct srp_terminate_context *context = context_ptr; + struct srp_target_port *target = context->srp_target; + u32 tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmnd)); + struct srp_rdma_ch *ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)]; + struct srp_request *req = scsi_cmd_priv(scmnd); + + srp_finish_req(ch, req, NULL, context->scsi_result); + + return true; +} + +static void srp_terminate_io(struct srp_rport *rport) +{ + struct srp_target_port *target = rport->lld_data; + struct srp_terminate_context context = { .srp_target = target, + .scsi_result = DID_TRANSPORT_FAILFAST << 16 }; + + scsi_host_busy_iter(target->scsi_host, srp_terminate_cmd, &context); +} + +/* Calculate maximum initiator to target information unit length. */ +static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data, + uint32_t max_it_iu_size) +{ + uint32_t max_iu_len = sizeof(struct srp_cmd) + SRP_MAX_ADD_CDB_LEN + + sizeof(struct srp_indirect_buf) + + cmd_sg_cnt * sizeof(struct srp_direct_buf); + + if (use_imm_data) + max_iu_len = max(max_iu_len, SRP_IMM_DATA_OFFSET + + srp_max_imm_data); + + if (max_it_iu_size) + max_iu_len = min(max_iu_len, max_it_iu_size); + + pr_debug("max_iu_len = %d\n", max_iu_len); + + return max_iu_len; +} + +/* + * It is up to the caller to ensure that srp_rport_reconnect() calls are + * serialized and that no concurrent srp_queuecommand(), srp_abort(), + * srp_reset_device() or srp_reset_host() calls will occur while this function + * is in progress. One way to realize that is not to call this function + * directly but to call srp_reconnect_rport() instead since that last function + * serializes calls of this function via rport->mutex and also blocks + * srp_queuecommand() calls before invoking this function. + */ +static int srp_rport_reconnect(struct srp_rport *rport) +{ + struct srp_target_port *target = rport->lld_data; + struct srp_rdma_ch *ch; + uint32_t max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, + srp_use_imm_data, + target->max_it_iu_size); + int i, j, ret = 0; + bool multich = false; + + srp_disconnect_target(target); + + if (target->state == SRP_TARGET_SCANNING) + return -ENODEV; + + /* + * Now get a new local CM ID so that we avoid confusing the target in + * case things are really fouled up. Doing so also ensures that all CM + * callbacks will have finished before a new QP is allocated. + */ + for (i = 0; i < target->ch_count; i++) { + ch = &target->ch[i]; + ret += srp_new_cm_id(ch); + } + { + struct srp_terminate_context context = { + .srp_target = target, .scsi_result = DID_RESET << 16}; + + scsi_host_busy_iter(target->scsi_host, srp_terminate_cmd, + &context); + } + for (i = 0; i < target->ch_count; i++) { + ch = &target->ch[i]; + /* + * Whether or not creating a new CM ID succeeded, create a new + * QP. This guarantees that all completion callback function + * invocations have finished before request resetting starts. + */ + ret += srp_create_ch_ib(ch); + + INIT_LIST_HEAD(&ch->free_tx); + for (j = 0; j < target->queue_size; ++j) + list_add(&ch->tx_ring[j]->list, &ch->free_tx); + } + + target->qp_in_error = false; + + for (i = 0; i < target->ch_count; i++) { + ch = &target->ch[i]; + if (ret) + break; + ret = srp_connect_ch(ch, max_iu_len, multich); + multich = true; + } + + if (ret == 0) + shost_printk(KERN_INFO, target->scsi_host, + PFX "reconnect succeeded\n"); + + return ret; +} + +static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr, + unsigned int dma_len, u32 rkey) +{ + struct srp_direct_buf *desc = state->desc; + + WARN_ON_ONCE(!dma_len); + + desc->va = cpu_to_be64(dma_addr); + desc->key = cpu_to_be32(rkey); + desc->len = cpu_to_be32(dma_len); + + state->total_len += dma_len; + state->desc++; + state->ndesc++; +} + +static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc) +{ + srp_handle_qp_err(cq, wc, "FAST REG"); +} + +/* + * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset + * where to start in the first element. If sg_offset_p != NULL then + * *sg_offset_p is updated to the offset in state->sg[retval] of the first + * byte that has not yet been mapped. + */ +static int srp_map_finish_fr(struct srp_map_state *state, + struct srp_request *req, + struct srp_rdma_ch *ch, int sg_nents, + unsigned int *sg_offset_p) +{ + struct srp_target_port *target = ch->target; + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_reg_wr wr; + struct srp_fr_desc *desc; + u32 rkey; + int n, err; + + if (state->fr.next >= state->fr.end) { + shost_printk(KERN_ERR, ch->target->scsi_host, + PFX "Out of MRs (mr_per_cmd = %d)\n", + ch->target->mr_per_cmd); + return -ENOMEM; + } + + WARN_ON_ONCE(!dev->use_fast_reg); + + if (sg_nents == 1 && target->global_rkey) { + unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; + + srp_map_desc(state, sg_dma_address(state->sg) + sg_offset, + sg_dma_len(state->sg) - sg_offset, + target->global_rkey); + if (sg_offset_p) + *sg_offset_p = 0; + return 1; + } + + desc = srp_fr_pool_get(ch->fr_pool); + if (!desc) + return -ENOMEM; + + rkey = ib_inc_rkey(desc->mr->rkey); + ib_update_fast_reg_key(desc->mr, rkey); + + n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p, + dev->mr_page_size); + if (unlikely(n < 0)) { + srp_fr_pool_put(ch->fr_pool, &desc, 1); + pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n", + dev_name(&req->scmnd->device->sdev_gendev), sg_nents, + sg_offset_p ? *sg_offset_p : -1, n); + return n; + } + + WARN_ON_ONCE(desc->mr->length == 0); + + req->reg_cqe.done = srp_reg_mr_err_done; + + wr.wr.next = NULL; + wr.wr.opcode = IB_WR_REG_MR; + wr.wr.wr_cqe = &req->reg_cqe; + wr.wr.num_sge = 0; + wr.wr.send_flags = 0; + wr.mr = desc->mr; + wr.key = desc->mr->rkey; + wr.access = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE); + + *state->fr.next++ = desc; + state->nmdesc++; + + srp_map_desc(state, desc->mr->iova, + desc->mr->length, desc->mr->rkey); + + err = ib_post_send(ch->qp, &wr.wr, NULL); + if (unlikely(err)) { + WARN_ON_ONCE(err == -ENOMEM); + return err; + } + + return n; +} + +static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, + struct srp_request *req, struct scatterlist *scat, + int count) +{ + unsigned int sg_offset = 0; + + state->fr.next = req->fr_list; + state->fr.end = req->fr_list + ch->target->mr_per_cmd; + state->sg = scat; + + if (count == 0) + return 0; + + while (count) { + int i, n; + + n = srp_map_finish_fr(state, req, ch, count, &sg_offset); + if (unlikely(n < 0)) + return n; + + count -= n; + for (i = 0; i < n; i++) + state->sg = sg_next(state->sg); + } + + return 0; +} + +static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, + struct srp_request *req, struct scatterlist *scat, + int count) +{ + struct srp_target_port *target = ch->target; + struct scatterlist *sg; + int i; + + for_each_sg(scat, sg, count, i) { + srp_map_desc(state, sg_dma_address(sg), sg_dma_len(sg), + target->global_rkey); + } + + return 0; +} + +/* + * Register the indirect data buffer descriptor with the HCA. + * + * Note: since the indirect data buffer descriptor has been allocated with + * kmalloc() it is guaranteed that this buffer is a physically contiguous + * memory buffer. + */ +static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, + void **next_mr, void **end_mr, u32 idb_len, + __be32 *idb_rkey) +{ + struct srp_target_port *target = ch->target; + struct srp_device *dev = target->srp_host->srp_dev; + struct srp_map_state state; + struct srp_direct_buf idb_desc; + struct scatterlist idb_sg[1]; + int ret; + + memset(&state, 0, sizeof(state)); + memset(&idb_desc, 0, sizeof(idb_desc)); + state.gen.next = next_mr; + state.gen.end = end_mr; + state.desc = &idb_desc; + state.base_dma_addr = req->indirect_dma_addr; + state.dma_len = idb_len; + + if (dev->use_fast_reg) { + state.sg = idb_sg; + sg_init_one(idb_sg, req->indirect_desc, idb_len); + idb_sg->dma_address = req->indirect_dma_addr; /* hack! */ +#ifdef CONFIG_NEED_SG_DMA_LENGTH + idb_sg->dma_length = idb_sg->length; /* hack^2 */ +#endif + ret = srp_map_finish_fr(&state, req, ch, 1, NULL); + if (ret < 0) + return ret; + WARN_ON_ONCE(ret < 1); + } else { + return -EINVAL; + } + + *idb_rkey = idb_desc.key; + + return 0; +} + +static void srp_check_mapping(struct srp_map_state *state, + struct srp_rdma_ch *ch, struct srp_request *req, + struct scatterlist *scat, int count) +{ + struct srp_device *dev = ch->target->srp_host->srp_dev; + struct srp_fr_desc **pfr; + u64 desc_len = 0, mr_len = 0; + int i; + + for (i = 0; i < state->ndesc; i++) + desc_len += be32_to_cpu(req->indirect_desc[i].len); + if (dev->use_fast_reg) + for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++) + mr_len += (*pfr)->mr->length; + if (desc_len != scsi_bufflen(req->scmnd) || + mr_len > scsi_bufflen(req->scmnd)) + pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n", + scsi_bufflen(req->scmnd), desc_len, mr_len, + state->ndesc, state->nmdesc); +} + +/** + * srp_map_data() - map SCSI data buffer onto an SRP request + * @scmnd: SCSI command to map + * @ch: SRP RDMA channel + * @req: SRP request + * + * Returns the length in bytes of the SRP_CMD IU or a negative value if + * mapping failed. The size of any immediate data is not included in the + * return value. + */ +static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, + struct srp_request *req) +{ + struct srp_target_port *target = ch->target; + struct scatterlist *scat, *sg; + struct srp_cmd *cmd = req->cmd->buf; + int i, len, nents, count, ret; + struct srp_device *dev; + struct ib_device *ibdev; + struct srp_map_state state; + struct srp_indirect_buf *indirect_hdr; + u64 data_len; + u32 idb_len, table_len; + __be32 idb_rkey; + u8 fmt; + + req->cmd->num_sge = 1; + + if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE) + return sizeof(struct srp_cmd) + cmd->add_cdb_len; + + if (scmnd->sc_data_direction != DMA_FROM_DEVICE && + scmnd->sc_data_direction != DMA_TO_DEVICE) { + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Unhandled data direction %d\n", + scmnd->sc_data_direction); + return -EINVAL; + } + + nents = scsi_sg_count(scmnd); + scat = scsi_sglist(scmnd); + data_len = scsi_bufflen(scmnd); + + dev = target->srp_host->srp_dev; + ibdev = dev->dev; + + count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction); + if (unlikely(count == 0)) + return -EIO; + + if (ch->use_imm_data && + count <= ch->max_imm_sge && + SRP_IMM_DATA_OFFSET + data_len <= ch->max_it_iu_len && + scmnd->sc_data_direction == DMA_TO_DEVICE) { + struct srp_imm_buf *buf; + struct ib_sge *sge = &req->cmd->sge[1]; + + fmt = SRP_DATA_DESC_IMM; + len = SRP_IMM_DATA_OFFSET; + req->nmdesc = 0; + buf = (void *)cmd->add_data + cmd->add_cdb_len; + buf->len = cpu_to_be32(data_len); + WARN_ON_ONCE((void *)(buf + 1) > (void *)cmd + len); + for_each_sg(scat, sg, count, i) { + sge[i].addr = sg_dma_address(sg); + sge[i].length = sg_dma_len(sg); + sge[i].lkey = target->lkey; + } + req->cmd->num_sge += count; + goto map_complete; + } + + fmt = SRP_DATA_DESC_DIRECT; + len = sizeof(struct srp_cmd) + cmd->add_cdb_len + + sizeof(struct srp_direct_buf); + + if (count == 1 && target->global_rkey) { + /* + * The midlayer only generated a single gather/scatter + * entry, or DMA mapping coalesced everything to a + * single entry. So a direct descriptor along with + * the DMA MR suffices. + */ + struct srp_direct_buf *buf; + + buf = (void *)cmd->add_data + cmd->add_cdb_len; + buf->va = cpu_to_be64(sg_dma_address(scat)); + buf->key = cpu_to_be32(target->global_rkey); + buf->len = cpu_to_be32(sg_dma_len(scat)); + + req->nmdesc = 0; + goto map_complete; + } + + /* + * We have more than one scatter/gather entry, so build our indirect + * descriptor table, trying to merge as many entries as we can. + */ + indirect_hdr = (void *)cmd->add_data + cmd->add_cdb_len; + + ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr, + target->indirect_size, DMA_TO_DEVICE); + + memset(&state, 0, sizeof(state)); + state.desc = req->indirect_desc; + if (dev->use_fast_reg) + ret = srp_map_sg_fr(&state, ch, req, scat, count); + else + ret = srp_map_sg_dma(&state, ch, req, scat, count); + req->nmdesc = state.nmdesc; + if (ret < 0) + goto unmap; + + { + DEFINE_DYNAMIC_DEBUG_METADATA(ddm, + "Memory mapping consistency check"); + if (DYNAMIC_DEBUG_BRANCH(ddm)) + srp_check_mapping(&state, ch, req, scat, count); + } + + /* We've mapped the request, now pull as much of the indirect + * descriptor table as we can into the command buffer. If this + * target is not using an external indirect table, we are + * guaranteed to fit into the command, as the SCSI layer won't + * give us more S/G entries than we allow. + */ + if (state.ndesc == 1) { + /* + * Memory registration collapsed the sg-list into one entry, + * so use a direct descriptor. + */ + struct srp_direct_buf *buf; + + buf = (void *)cmd->add_data + cmd->add_cdb_len; + *buf = req->indirect_desc[0]; + goto map_complete; + } + + if (unlikely(target->cmd_sg_cnt < state.ndesc && + !target->allow_ext_sg)) { + shost_printk(KERN_ERR, target->scsi_host, + "Could not fit S/G list into SRP_CMD\n"); + ret = -EIO; + goto unmap; + } + + count = min(state.ndesc, target->cmd_sg_cnt); + table_len = state.ndesc * sizeof (struct srp_direct_buf); + idb_len = sizeof(struct srp_indirect_buf) + table_len; + + fmt = SRP_DATA_DESC_INDIRECT; + len = sizeof(struct srp_cmd) + cmd->add_cdb_len + + sizeof(struct srp_indirect_buf); + len += count * sizeof (struct srp_direct_buf); + + memcpy(indirect_hdr->desc_list, req->indirect_desc, + count * sizeof (struct srp_direct_buf)); + + if (!target->global_rkey) { + ret = srp_map_idb(ch, req, state.gen.next, state.gen.end, + idb_len, &idb_rkey); + if (ret < 0) + goto unmap; + req->nmdesc++; + } else { + idb_rkey = cpu_to_be32(target->global_rkey); + } + + indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr); + indirect_hdr->table_desc.key = idb_rkey; + indirect_hdr->table_desc.len = cpu_to_be32(table_len); + indirect_hdr->len = cpu_to_be32(state.total_len); + + if (scmnd->sc_data_direction == DMA_TO_DEVICE) + cmd->data_out_desc_cnt = count; + else + cmd->data_in_desc_cnt = count; + + ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len, + DMA_TO_DEVICE); + +map_complete: + if (scmnd->sc_data_direction == DMA_TO_DEVICE) + cmd->buf_fmt = fmt << 4; + else + cmd->buf_fmt = fmt; + + return len; + +unmap: + srp_unmap_data(scmnd, ch, req); + if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size) + ret = -E2BIG; + return ret; +} + +/* + * Return an IU and possible credit to the free pool + */ +static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu, + enum srp_iu_type iu_type) +{ + unsigned long flags; + + spin_lock_irqsave(&ch->lock, flags); + list_add(&iu->list, &ch->free_tx); + if (iu_type != SRP_IU_RSP) + ++ch->req_lim; + spin_unlock_irqrestore(&ch->lock, flags); +} + +/* + * Must be called with ch->lock held to protect req_lim and free_tx. + * If IU is not sent, it must be returned using srp_put_tx_iu(). + * + * Note: + * An upper limit for the number of allocated information units for each + * request type is: + * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues + * more than Scsi_Host.can_queue requests. + * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE. + * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than + * one unanswered SRP request to an initiator. + */ +static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch, + enum srp_iu_type iu_type) +{ + struct srp_target_port *target = ch->target; + s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE; + struct srp_iu *iu; + + lockdep_assert_held(&ch->lock); + + ib_process_cq_direct(ch->send_cq, -1); + + if (list_empty(&ch->free_tx)) + return NULL; + + /* Initiator responses to target requests do not consume credits */ + if (iu_type != SRP_IU_RSP) { + if (ch->req_lim <= rsv) { + ++target->zero_req_lim; + return NULL; + } + + --ch->req_lim; + } + + iu = list_first_entry(&ch->free_tx, struct srp_iu, list); + list_del(&iu->list); + return iu; +} + +/* + * Note: if this function is called from inside ib_drain_sq() then it will + * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE + * with status IB_WC_SUCCESS then that's a bug. + */ +static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe); + struct srp_rdma_ch *ch = cq->cq_context; + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + srp_handle_qp_err(cq, wc, "SEND"); + return; + } + + lockdep_assert_held(&ch->lock); + + list_add(&iu->list, &ch->free_tx); +} + +/** + * srp_post_send() - send an SRP information unit + * @ch: RDMA channel over which to send the information unit. + * @iu: Information unit to send. + * @len: Length of the information unit excluding immediate data. + */ +static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) +{ + struct srp_target_port *target = ch->target; + struct ib_send_wr wr; + + if (WARN_ON_ONCE(iu->num_sge > SRP_MAX_SGE)) + return -EINVAL; + + iu->sge[0].addr = iu->dma; + iu->sge[0].length = len; + iu->sge[0].lkey = target->lkey; + + iu->cqe.done = srp_send_done; + + wr.next = NULL; + wr.wr_cqe = &iu->cqe; + wr.sg_list = &iu->sge[0]; + wr.num_sge = iu->num_sge; + wr.opcode = IB_WR_SEND; + wr.send_flags = IB_SEND_SIGNALED; + + return ib_post_send(ch->qp, &wr, NULL); +} + +static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu) +{ + struct srp_target_port *target = ch->target; + struct ib_recv_wr wr; + struct ib_sge list; + + list.addr = iu->dma; + list.length = iu->size; + list.lkey = target->lkey; + + iu->cqe.done = srp_recv_done; + + wr.next = NULL; + wr.wr_cqe = &iu->cqe; + wr.sg_list = &list; + wr.num_sge = 1; + + return ib_post_recv(ch->qp, &wr, NULL); +} + +static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) +{ + struct srp_target_port *target = ch->target; + struct srp_request *req; + struct scsi_cmnd *scmnd; + unsigned long flags; + + if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) { + spin_lock_irqsave(&ch->lock, flags); + ch->req_lim += be32_to_cpu(rsp->req_lim_delta); + if (rsp->tag == ch->tsk_mgmt_tag) { + ch->tsk_mgmt_status = -1; + if (be32_to_cpu(rsp->resp_data_len) >= 4) + ch->tsk_mgmt_status = rsp->data[3]; + complete(&ch->tsk_mgmt_done); + } else { + shost_printk(KERN_ERR, target->scsi_host, + "Received tsk mgmt response too late for tag %#llx\n", + rsp->tag); + } + spin_unlock_irqrestore(&ch->lock, flags); + } else { + scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag); + if (scmnd) { + req = scsi_cmd_priv(scmnd); + scmnd = srp_claim_req(ch, req, NULL, scmnd); + } + if (!scmnd) { + shost_printk(KERN_ERR, target->scsi_host, + "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n", + rsp->tag, ch - target->ch, ch->qp->qp_num); + + spin_lock_irqsave(&ch->lock, flags); + ch->req_lim += be32_to_cpu(rsp->req_lim_delta); + spin_unlock_irqrestore(&ch->lock, flags); + + return; + } + scmnd->result = rsp->status; + + if (rsp->flags & SRP_RSP_FLAG_SNSVALID) { + memcpy(scmnd->sense_buffer, rsp->data + + be32_to_cpu(rsp->resp_data_len), + min_t(int, be32_to_cpu(rsp->sense_data_len), + SCSI_SENSE_BUFFERSIZE)); + } + + if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER)) + scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); + else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER)) + scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt)); + + srp_free_req(ch, req, scmnd, + be32_to_cpu(rsp->req_lim_delta)); + + scsi_done(scmnd); + } +} + +static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta, + void *rsp, int len) +{ + struct srp_target_port *target = ch->target; + struct ib_device *dev = target->srp_host->srp_dev->dev; + unsigned long flags; + struct srp_iu *iu; + int err; + + spin_lock_irqsave(&ch->lock, flags); + ch->req_lim += req_delta; + iu = __srp_get_tx_iu(ch, SRP_IU_RSP); + spin_unlock_irqrestore(&ch->lock, flags); + + if (!iu) { + shost_printk(KERN_ERR, target->scsi_host, PFX + "no IU available to send response\n"); + return 1; + } + + iu->num_sge = 1; + ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE); + memcpy(iu->buf, rsp, len); + ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE); + + err = srp_post_send(ch, iu, len); + if (err) { + shost_printk(KERN_ERR, target->scsi_host, PFX + "unable to post response: %d\n", err); + srp_put_tx_iu(ch, iu, SRP_IU_RSP); + } + + return err; +} + +static void srp_process_cred_req(struct srp_rdma_ch *ch, + struct srp_cred_req *req) +{ + struct srp_cred_rsp rsp = { + .opcode = SRP_CRED_RSP, + .tag = req->tag, + }; + s32 delta = be32_to_cpu(req->req_lim_delta); + + if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) + shost_printk(KERN_ERR, ch->target->scsi_host, PFX + "problems processing SRP_CRED_REQ\n"); +} + +static void srp_process_aer_req(struct srp_rdma_ch *ch, + struct srp_aer_req *req) +{ + struct srp_target_port *target = ch->target; + struct srp_aer_rsp rsp = { + .opcode = SRP_AER_RSP, + .tag = req->tag, + }; + s32 delta = be32_to_cpu(req->req_lim_delta); + + shost_printk(KERN_ERR, target->scsi_host, PFX + "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun)); + + if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) + shost_printk(KERN_ERR, target->scsi_host, PFX + "problems processing SRP_AER_REQ\n"); +} + +static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe); + struct srp_rdma_ch *ch = cq->cq_context; + struct srp_target_port *target = ch->target; + struct ib_device *dev = target->srp_host->srp_dev->dev; + int res; + u8 opcode; + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + srp_handle_qp_err(cq, wc, "RECV"); + return; + } + + ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len, + DMA_FROM_DEVICE); + + opcode = *(u8 *) iu->buf; + + if (0) { + shost_printk(KERN_ERR, target->scsi_host, + PFX "recv completion, opcode 0x%02x\n", opcode); + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1, + iu->buf, wc->byte_len, true); + } + + switch (opcode) { + case SRP_RSP: + srp_process_rsp(ch, iu->buf); + break; + + case SRP_CRED_REQ: + srp_process_cred_req(ch, iu->buf); + break; + + case SRP_AER_REQ: + srp_process_aer_req(ch, iu->buf); + break; + + case SRP_T_LOGOUT: + /* XXX Handle target logout */ + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Got target logout request\n"); + break; + + default: + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Unhandled SRP opcode 0x%02x\n", opcode); + break; + } + + ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len, + DMA_FROM_DEVICE); + + res = srp_post_recv(ch, iu); + if (res != 0) + shost_printk(KERN_ERR, target->scsi_host, + PFX "Recv failed with error code %d\n", res); +} + +/** + * srp_tl_err_work() - handle a transport layer error + * @work: Work structure embedded in an SRP target port. + * + * Note: This function may get invoked before the rport has been created, + * hence the target->rport test. + */ +static void srp_tl_err_work(struct work_struct *work) +{ + struct srp_target_port *target; + + target = container_of(work, struct srp_target_port, tl_err_work); + if (target->rport) + srp_start_tl_fail_timers(target->rport); +} + +static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, + const char *opname) +{ + struct srp_rdma_ch *ch = cq->cq_context; + struct srp_target_port *target = ch->target; + + if (ch->connected && !target->qp_in_error) { + shost_printk(KERN_ERR, target->scsi_host, + PFX "failed %s status %s (%d) for CQE %p\n", + opname, ib_wc_status_msg(wc->status), wc->status, + wc->wr_cqe); + queue_work(system_long_wq, &target->tl_err_work); + } + target->qp_in_error = true; +} + +static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) +{ + struct request *rq = scsi_cmd_to_rq(scmnd); + struct srp_target_port *target = host_to_target(shost); + struct srp_rdma_ch *ch; + struct srp_request *req = scsi_cmd_priv(scmnd); + struct srp_iu *iu; + struct srp_cmd *cmd; + struct ib_device *dev; + unsigned long flags; + u32 tag; + int len, ret; + + scmnd->result = srp_chkready(target->rport); + if (unlikely(scmnd->result)) + goto err; + + WARN_ON_ONCE(rq->tag < 0); + tag = blk_mq_unique_tag(rq); + ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)]; + + spin_lock_irqsave(&ch->lock, flags); + iu = __srp_get_tx_iu(ch, SRP_IU_CMD); + spin_unlock_irqrestore(&ch->lock, flags); + + if (!iu) + goto err; + + dev = target->srp_host->srp_dev->dev; + ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_it_iu_len, + DMA_TO_DEVICE); + + cmd = iu->buf; + memset(cmd, 0, sizeof *cmd); + + cmd->opcode = SRP_CMD; + int_to_scsilun(scmnd->device->lun, &cmd->lun); + cmd->tag = tag; + memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len); + if (unlikely(scmnd->cmd_len > sizeof(cmd->cdb))) { + cmd->add_cdb_len = round_up(scmnd->cmd_len - sizeof(cmd->cdb), + 4); + if (WARN_ON_ONCE(cmd->add_cdb_len > SRP_MAX_ADD_CDB_LEN)) + goto err_iu; + } + + req->scmnd = scmnd; + req->cmd = iu; + + len = srp_map_data(scmnd, ch, req); + if (len < 0) { + shost_printk(KERN_ERR, target->scsi_host, + PFX "Failed to map data (%d)\n", len); + /* + * If we ran out of memory descriptors (-ENOMEM) because an + * application is queuing many requests with more than + * max_pages_per_mr sg-list elements, tell the SCSI mid-layer + * to reduce queue depth temporarily. + */ + scmnd->result = len == -ENOMEM ? + DID_OK << 16 | SAM_STAT_TASK_SET_FULL : DID_ERROR << 16; + goto err_iu; + } + + ib_dma_sync_single_for_device(dev, iu->dma, ch->max_it_iu_len, + DMA_TO_DEVICE); + + if (srp_post_send(ch, iu, len)) { + shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n"); + scmnd->result = DID_ERROR << 16; + goto err_unmap; + } + + return 0; + +err_unmap: + srp_unmap_data(scmnd, ch, req); + +err_iu: + srp_put_tx_iu(ch, iu, SRP_IU_CMD); + + /* + * Avoid that the loops that iterate over the request ring can + * encounter a dangling SCSI command pointer. + */ + req->scmnd = NULL; + +err: + if (scmnd->result) { + scsi_done(scmnd); + ret = 0; + } else { + ret = SCSI_MLQUEUE_HOST_BUSY; + } + + return ret; +} + +/* + * Note: the resources allocated in this function are freed in + * srp_free_ch_ib(). + */ +static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch) +{ + struct srp_target_port *target = ch->target; + int i; + + ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring), + GFP_KERNEL); + if (!ch->rx_ring) + goto err_no_ring; + ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring), + GFP_KERNEL); + if (!ch->tx_ring) + goto err_no_ring; + + for (i = 0; i < target->queue_size; ++i) { + ch->rx_ring[i] = srp_alloc_iu(target->srp_host, + ch->max_ti_iu_len, + GFP_KERNEL, DMA_FROM_DEVICE); + if (!ch->rx_ring[i]) + goto err; + } + + for (i = 0; i < target->queue_size; ++i) { + ch->tx_ring[i] = srp_alloc_iu(target->srp_host, + ch->max_it_iu_len, + GFP_KERNEL, DMA_TO_DEVICE); + if (!ch->tx_ring[i]) + goto err; + + list_add(&ch->tx_ring[i]->list, &ch->free_tx); + } + + return 0; + +err: + for (i = 0; i < target->queue_size; ++i) { + srp_free_iu(target->srp_host, ch->rx_ring[i]); + srp_free_iu(target->srp_host, ch->tx_ring[i]); + } + + +err_no_ring: + kfree(ch->tx_ring); + ch->tx_ring = NULL; + kfree(ch->rx_ring); + ch->rx_ring = NULL; + + return -ENOMEM; +} + +static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask) +{ + uint64_t T_tr_ns, max_compl_time_ms; + uint32_t rq_tmo_jiffies; + + /* + * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair, + * table 91), both the QP timeout and the retry count have to be set + * for RC QP's during the RTR to RTS transition. + */ + WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) != + (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)); + + /* + * Set target->rq_tmo_jiffies to one second more than the largest time + * it can take before an error completion is generated. See also + * C9-140..142 in the IBTA spec for more information about how to + * convert the QP Local ACK Timeout value to nanoseconds. + */ + T_tr_ns = 4096 * (1ULL << qp_attr->timeout); + max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns; + do_div(max_compl_time_ms, NSEC_PER_MSEC); + rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000); + + return rq_tmo_jiffies; +} + +static void srp_cm_rep_handler(struct ib_cm_id *cm_id, + const struct srp_login_rsp *lrsp, + struct srp_rdma_ch *ch) +{ + struct srp_target_port *target = ch->target; + struct ib_qp_attr *qp_attr = NULL; + int attr_mask = 0; + int ret = 0; + int i; + + if (lrsp->opcode == SRP_LOGIN_RSP) { + ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len); + ch->req_lim = be32_to_cpu(lrsp->req_lim_delta); + ch->use_imm_data = srp_use_imm_data && + (lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP); + ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, + ch->use_imm_data, + target->max_it_iu_size); + WARN_ON_ONCE(ch->max_it_iu_len > + be32_to_cpu(lrsp->max_it_iu_len)); + + if (ch->use_imm_data) + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "using immediate data\n"); + + /* + * Reserve credits for task management so we don't + * bounce requests back to the SCSI mid-layer. + */ + target->scsi_host->can_queue + = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE, + target->scsi_host->can_queue); + target->scsi_host->cmd_per_lun + = min_t(int, target->scsi_host->can_queue, + target->scsi_host->cmd_per_lun); + } else { + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Unhandled RSP opcode %#x\n", lrsp->opcode); + ret = -ECONNRESET; + goto error; + } + + if (!ch->rx_ring) { + ret = srp_alloc_iu_bufs(ch); + if (ret) + goto error; + } + + for (i = 0; i < target->queue_size; i++) { + struct srp_iu *iu = ch->rx_ring[i]; + + ret = srp_post_recv(ch, iu); + if (ret) + goto error; + } + + if (!target->using_rdma_cm) { + ret = -ENOMEM; + qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL); + if (!qp_attr) + goto error; + + qp_attr->qp_state = IB_QPS_RTR; + ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); + if (ret) + goto error_free; + + ret = ib_modify_qp(ch->qp, qp_attr, attr_mask); + if (ret) + goto error_free; + + qp_attr->qp_state = IB_QPS_RTS; + ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); + if (ret) + goto error_free; + + target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask); + + ret = ib_modify_qp(ch->qp, qp_attr, attr_mask); + if (ret) + goto error_free; + + ret = ib_send_cm_rtu(cm_id, NULL, 0); + } + +error_free: + kfree(qp_attr); + +error: + ch->status = ret; +} + +static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event, + struct srp_rdma_ch *ch) +{ + struct srp_target_port *target = ch->target; + struct Scsi_Host *shost = target->scsi_host; + struct ib_class_port_info *cpi; + int opcode; + u16 dlid; + + switch (event->param.rej_rcvd.reason) { + case IB_CM_REJ_PORT_CM_REDIRECT: + cpi = event->param.rej_rcvd.ari; + dlid = be16_to_cpu(cpi->redirect_lid); + sa_path_set_dlid(&ch->ib_cm.path, dlid); + ch->ib_cm.path.pkey = cpi->redirect_pkey; + cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff; + memcpy(ch->ib_cm.path.dgid.raw, cpi->redirect_gid, 16); + + ch->status = dlid ? SRP_DLID_REDIRECT : SRP_PORT_REDIRECT; + break; + + case IB_CM_REJ_PORT_REDIRECT: + if (srp_target_is_topspin(target)) { + union ib_gid *dgid = &ch->ib_cm.path.dgid; + + /* + * Topspin/Cisco SRP gateways incorrectly send + * reject reason code 25 when they mean 24 + * (port redirect). + */ + memcpy(dgid->raw, event->param.rej_rcvd.ari, 16); + + shost_printk(KERN_DEBUG, shost, + PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n", + be64_to_cpu(dgid->global.subnet_prefix), + be64_to_cpu(dgid->global.interface_id)); + + ch->status = SRP_PORT_REDIRECT; + } else { + shost_printk(KERN_WARNING, shost, + " REJ reason: IB_CM_REJ_PORT_REDIRECT\n"); + ch->status = -ECONNRESET; + } + break; + + case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID: + shost_printk(KERN_WARNING, shost, + " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); + ch->status = -ECONNRESET; + break; + + case IB_CM_REJ_CONSUMER_DEFINED: + opcode = *(u8 *) event->private_data; + if (opcode == SRP_LOGIN_REJ) { + struct srp_login_rej *rej = event->private_data; + u32 reason = be32_to_cpu(rej->reason); + + if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE) + shost_printk(KERN_WARNING, shost, + PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); + else + shost_printk(KERN_WARNING, shost, PFX + "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n", + target->sgid.raw, + target->ib_cm.orig_dgid.raw, + reason); + } else + shost_printk(KERN_WARNING, shost, + " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," + " opcode 0x%02x\n", opcode); + ch->status = -ECONNRESET; + break; + + case IB_CM_REJ_STALE_CONN: + shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n"); + ch->status = SRP_STALE_CONN; + break; + + default: + shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n", + event->param.rej_rcvd.reason); + ch->status = -ECONNRESET; + } +} + +static int srp_ib_cm_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event) +{ + struct srp_rdma_ch *ch = cm_id->context; + struct srp_target_port *target = ch->target; + int comp = 0; + + switch (event->event) { + case IB_CM_REQ_ERROR: + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Sending CM REQ failed\n"); + comp = 1; + ch->status = -ECONNRESET; + break; + + case IB_CM_REP_RECEIVED: + comp = 1; + srp_cm_rep_handler(cm_id, event->private_data, ch); + break; + + case IB_CM_REJ_RECEIVED: + shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n"); + comp = 1; + + srp_ib_cm_rej_handler(cm_id, event, ch); + break; + + case IB_CM_DREQ_RECEIVED: + shost_printk(KERN_WARNING, target->scsi_host, + PFX "DREQ received - connection closed\n"); + ch->connected = false; + if (ib_send_cm_drep(cm_id, NULL, 0)) + shost_printk(KERN_ERR, target->scsi_host, + PFX "Sending CM DREP failed\n"); + queue_work(system_long_wq, &target->tl_err_work); + break; + + case IB_CM_TIMEWAIT_EXIT: + shost_printk(KERN_ERR, target->scsi_host, + PFX "connection closed\n"); + comp = 1; + + ch->status = 0; + break; + + case IB_CM_MRA_RECEIVED: + case IB_CM_DREQ_ERROR: + case IB_CM_DREP_RECEIVED: + break; + + default: + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Unhandled CM event %d\n", event->event); + break; + } + + if (comp) + complete(&ch->done); + + return 0; +} + +static void srp_rdma_cm_rej_handler(struct srp_rdma_ch *ch, + struct rdma_cm_event *event) +{ + struct srp_target_port *target = ch->target; + struct Scsi_Host *shost = target->scsi_host; + int opcode; + + switch (event->status) { + case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID: + shost_printk(KERN_WARNING, shost, + " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); + ch->status = -ECONNRESET; + break; + + case IB_CM_REJ_CONSUMER_DEFINED: + opcode = *(u8 *) event->param.conn.private_data; + if (opcode == SRP_LOGIN_REJ) { + struct srp_login_rej *rej = + (struct srp_login_rej *) + event->param.conn.private_data; + u32 reason = be32_to_cpu(rej->reason); + + if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE) + shost_printk(KERN_WARNING, shost, + PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); + else + shost_printk(KERN_WARNING, shost, + PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason); + } else { + shost_printk(KERN_WARNING, shost, + " REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n", + opcode); + } + ch->status = -ECONNRESET; + break; + + case IB_CM_REJ_STALE_CONN: + shost_printk(KERN_WARNING, shost, + " REJ reason: stale connection\n"); + ch->status = SRP_STALE_CONN; + break; + + default: + shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n", + event->status); + ch->status = -ECONNRESET; + break; + } +} + +static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + struct srp_rdma_ch *ch = cm_id->context; + struct srp_target_port *target = ch->target; + int comp = 0; + + switch (event->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + ch->status = 0; + comp = 1; + break; + + case RDMA_CM_EVENT_ADDR_ERROR: + ch->status = -ENXIO; + comp = 1; + break; + + case RDMA_CM_EVENT_ROUTE_RESOLVED: + ch->status = 0; + comp = 1; + break; + + case RDMA_CM_EVENT_ROUTE_ERROR: + case RDMA_CM_EVENT_UNREACHABLE: + ch->status = -EHOSTUNREACH; + comp = 1; + break; + + case RDMA_CM_EVENT_CONNECT_ERROR: + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Sending CM REQ failed\n"); + comp = 1; + ch->status = -ECONNRESET; + break; + + case RDMA_CM_EVENT_ESTABLISHED: + comp = 1; + srp_cm_rep_handler(NULL, event->param.conn.private_data, ch); + break; + + case RDMA_CM_EVENT_REJECTED: + shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n"); + comp = 1; + + srp_rdma_cm_rej_handler(ch, event); + break; + + case RDMA_CM_EVENT_DISCONNECTED: + if (ch->connected) { + shost_printk(KERN_WARNING, target->scsi_host, + PFX "received DREQ\n"); + rdma_disconnect(ch->rdma_cm.cm_id); + comp = 1; + ch->status = 0; + queue_work(system_long_wq, &target->tl_err_work); + } + break; + + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + shost_printk(KERN_ERR, target->scsi_host, + PFX "connection closed\n"); + + comp = 1; + ch->status = 0; + break; + + default: + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Unhandled CM event %d\n", event->event); + break; + } + + if (comp) + complete(&ch->done); + + return 0; +} + +/** + * srp_change_queue_depth - setting device queue depth + * @sdev: scsi device struct + * @qdepth: requested queue depth + * + * Returns queue depth. + */ +static int +srp_change_queue_depth(struct scsi_device *sdev, int qdepth) +{ + if (!sdev->tagged_supported) + qdepth = 1; + return scsi_change_queue_depth(sdev, qdepth); +} + +static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, + u8 func, u8 *status) +{ + struct srp_target_port *target = ch->target; + struct srp_rport *rport = target->rport; + struct ib_device *dev = target->srp_host->srp_dev->dev; + struct srp_iu *iu; + struct srp_tsk_mgmt *tsk_mgmt; + int res; + + if (!ch->connected || target->qp_in_error) + return -1; + + /* + * Lock the rport mutex to avoid that srp_create_ch_ib() is + * invoked while a task management function is being sent. + */ + mutex_lock(&rport->mutex); + spin_lock_irq(&ch->lock); + iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT); + spin_unlock_irq(&ch->lock); + + if (!iu) { + mutex_unlock(&rport->mutex); + + return -1; + } + + iu->num_sge = 1; + + ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, + DMA_TO_DEVICE); + tsk_mgmt = iu->buf; + memset(tsk_mgmt, 0, sizeof *tsk_mgmt); + + tsk_mgmt->opcode = SRP_TSK_MGMT; + int_to_scsilun(lun, &tsk_mgmt->lun); + tsk_mgmt->tsk_mgmt_func = func; + tsk_mgmt->task_tag = req_tag; + + spin_lock_irq(&ch->lock); + ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT; + tsk_mgmt->tag = ch->tsk_mgmt_tag; + spin_unlock_irq(&ch->lock); + + init_completion(&ch->tsk_mgmt_done); + + ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt, + DMA_TO_DEVICE); + if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) { + srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT); + mutex_unlock(&rport->mutex); + + return -1; + } + res = wait_for_completion_timeout(&ch->tsk_mgmt_done, + msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)); + if (res > 0 && status) + *status = ch->tsk_mgmt_status; + mutex_unlock(&rport->mutex); + + WARN_ON_ONCE(res < 0); + + return res > 0 ? 0 : -1; +} + +static int srp_abort(struct scsi_cmnd *scmnd) +{ + struct srp_target_port *target = host_to_target(scmnd->device->host); + struct srp_request *req = scsi_cmd_priv(scmnd); + u32 tag; + u16 ch_idx; + struct srp_rdma_ch *ch; + + shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); + + tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmnd)); + ch_idx = blk_mq_unique_tag_to_hwq(tag); + if (WARN_ON_ONCE(ch_idx >= target->ch_count)) + return SUCCESS; + ch = &target->ch[ch_idx]; + if (!srp_claim_req(ch, req, NULL, scmnd)) + return SUCCESS; + shost_printk(KERN_ERR, target->scsi_host, + "Sending SRP abort for tag %#x\n", tag); + if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun, + SRP_TSK_ABORT_TASK, NULL) == 0) { + srp_free_req(ch, req, scmnd, 0); + return SUCCESS; + } + if (target->rport->state == SRP_RPORT_LOST) + return FAST_IO_FAIL; + + return FAILED; +} + +static int srp_reset_device(struct scsi_cmnd *scmnd) +{ + struct srp_target_port *target = host_to_target(scmnd->device->host); + struct srp_rdma_ch *ch; + u8 status; + + shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); + + ch = &target->ch[0]; + if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun, + SRP_TSK_LUN_RESET, &status)) + return FAILED; + if (status) + return FAILED; + + return SUCCESS; +} + +static int srp_reset_host(struct scsi_cmnd *scmnd) +{ + struct srp_target_port *target = host_to_target(scmnd->device->host); + + shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); + + return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED; +} + +static int srp_target_alloc(struct scsi_target *starget) +{ + struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); + struct srp_target_port *target = host_to_target(shost); + + if (target->target_can_queue) + starget->can_queue = target->target_can_queue; + return 0; +} + +static int srp_slave_configure(struct scsi_device *sdev) +{ + struct Scsi_Host *shost = sdev->host; + struct srp_target_port *target = host_to_target(shost); + struct request_queue *q = sdev->request_queue; + unsigned long timeout; + + if (sdev->type == TYPE_DISK) { + timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies); + blk_queue_rq_timeout(q, timeout); + } + + return 0; +} + +static ssize_t id_ext_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); +} + +static DEVICE_ATTR_RO(id_ext); + +static ssize_t ioc_guid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); +} + +static DEVICE_ATTR_RO(ioc_guid); + +static ssize_t service_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + if (target->using_rdma_cm) + return -ENOENT; + return sysfs_emit(buf, "0x%016llx\n", + be64_to_cpu(target->ib_cm.service_id)); +} + +static DEVICE_ATTR_RO(service_id); + +static ssize_t pkey_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + if (target->using_rdma_cm) + return -ENOENT; + + return sysfs_emit(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey)); +} + +static DEVICE_ATTR_RO(pkey); + +static ssize_t sgid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sysfs_emit(buf, "%pI6\n", target->sgid.raw); +} + +static DEVICE_ATTR_RO(sgid); + +static ssize_t dgid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + struct srp_rdma_ch *ch = &target->ch[0]; + + if (target->using_rdma_cm) + return -ENOENT; + + return sysfs_emit(buf, "%pI6\n", ch->ib_cm.path.dgid.raw); +} + +static DEVICE_ATTR_RO(dgid); + +static ssize_t orig_dgid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + if (target->using_rdma_cm) + return -ENOENT; + + return sysfs_emit(buf, "%pI6\n", target->ib_cm.orig_dgid.raw); +} + +static DEVICE_ATTR_RO(orig_dgid); + +static ssize_t req_lim_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + struct srp_rdma_ch *ch; + int i, req_lim = INT_MAX; + + for (i = 0; i < target->ch_count; i++) { + ch = &target->ch[i]; + req_lim = min(req_lim, ch->req_lim); + } + + return sysfs_emit(buf, "%d\n", req_lim); +} + +static DEVICE_ATTR_RO(req_lim); + +static ssize_t zero_req_lim_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sysfs_emit(buf, "%d\n", target->zero_req_lim); +} + +static DEVICE_ATTR_RO(zero_req_lim); + +static ssize_t local_ib_port_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sysfs_emit(buf, "%u\n", target->srp_host->port); +} + +static DEVICE_ATTR_RO(local_ib_port); + +static ssize_t local_ib_device_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sysfs_emit(buf, "%s\n", + dev_name(&target->srp_host->srp_dev->dev->dev)); +} + +static DEVICE_ATTR_RO(local_ib_device); + +static ssize_t ch_count_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sysfs_emit(buf, "%d\n", target->ch_count); +} + +static DEVICE_ATTR_RO(ch_count); + +static ssize_t comp_vector_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sysfs_emit(buf, "%d\n", target->comp_vector); +} + +static DEVICE_ATTR_RO(comp_vector); + +static ssize_t tl_retry_count_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sysfs_emit(buf, "%d\n", target->tl_retry_count); +} + +static DEVICE_ATTR_RO(tl_retry_count); + +static ssize_t cmd_sg_entries_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sysfs_emit(buf, "%u\n", target->cmd_sg_cnt); +} + +static DEVICE_ATTR_RO(cmd_sg_entries); + +static ssize_t allow_ext_sg_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sysfs_emit(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); +} + +static DEVICE_ATTR_RO(allow_ext_sg); + +static struct attribute *srp_host_attrs[] = { + &dev_attr_id_ext.attr, + &dev_attr_ioc_guid.attr, + &dev_attr_service_id.attr, + &dev_attr_pkey.attr, + &dev_attr_sgid.attr, + &dev_attr_dgid.attr, + &dev_attr_orig_dgid.attr, + &dev_attr_req_lim.attr, + &dev_attr_zero_req_lim.attr, + &dev_attr_local_ib_port.attr, + &dev_attr_local_ib_device.attr, + &dev_attr_ch_count.attr, + &dev_attr_comp_vector.attr, + &dev_attr_tl_retry_count.attr, + &dev_attr_cmd_sg_entries.attr, + &dev_attr_allow_ext_sg.attr, + NULL +}; + +ATTRIBUTE_GROUPS(srp_host); + +static struct scsi_host_template srp_template = { + .module = THIS_MODULE, + .name = "InfiniBand SRP initiator", + .proc_name = DRV_NAME, + .target_alloc = srp_target_alloc, + .slave_configure = srp_slave_configure, + .info = srp_target_info, + .init_cmd_priv = srp_init_cmd_priv, + .exit_cmd_priv = srp_exit_cmd_priv, + .queuecommand = srp_queuecommand, + .change_queue_depth = srp_change_queue_depth, + .eh_timed_out = srp_timed_out, + .eh_abort_handler = srp_abort, + .eh_device_reset_handler = srp_reset_device, + .eh_host_reset_handler = srp_reset_host, + .skip_settle_delay = true, + .sg_tablesize = SRP_DEF_SG_TABLESIZE, + .can_queue = SRP_DEFAULT_CMD_SQ_SIZE, + .this_id = -1, + .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE, + .shost_groups = srp_host_groups, + .track_queue_depth = 1, + .cmd_size = sizeof(struct srp_request), +}; + +static int srp_sdev_count(struct Scsi_Host *host) +{ + struct scsi_device *sdev; + int c = 0; + + shost_for_each_device(sdev, host) + c++; + + return c; +} + +/* + * Return values: + * < 0 upon failure. Caller is responsible for SRP target port cleanup. + * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port + * removal has been scheduled. + * 0 and target->state != SRP_TARGET_REMOVED upon success. + */ +static int srp_add_target(struct srp_host *host, struct srp_target_port *target) +{ + struct srp_rport_identifiers ids; + struct srp_rport *rport; + + target->state = SRP_TARGET_SCANNING; + sprintf(target->target_name, "SRP.T10:%016llX", + be64_to_cpu(target->id_ext)); + + if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent)) + return -ENODEV; + + memcpy(ids.port_id, &target->id_ext, 8); + memcpy(ids.port_id + 8, &target->ioc_guid, 8); + ids.roles = SRP_RPORT_ROLE_TARGET; + rport = srp_rport_add(target->scsi_host, &ids); + if (IS_ERR(rport)) { + scsi_remove_host(target->scsi_host); + return PTR_ERR(rport); + } + + rport->lld_data = target; + target->rport = rport; + + spin_lock(&host->target_lock); + list_add_tail(&target->list, &host->target_list); + spin_unlock(&host->target_lock); + + scsi_scan_target(&target->scsi_host->shost_gendev, + 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL); + + if (srp_connected_ch(target) < target->ch_count || + target->qp_in_error) { + shost_printk(KERN_INFO, target->scsi_host, + PFX "SCSI scan failed - removing SCSI host\n"); + srp_queue_remove_work(target); + goto out; + } + + pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n", + dev_name(&target->scsi_host->shost_gendev), + srp_sdev_count(target->scsi_host)); + + spin_lock_irq(&target->lock); + if (target->state == SRP_TARGET_SCANNING) + target->state = SRP_TARGET_LIVE; + spin_unlock_irq(&target->lock); + +out: + return 0; +} + +static void srp_release_dev(struct device *dev) +{ + struct srp_host *host = + container_of(dev, struct srp_host, dev); + + kfree(host); +} + +static struct attribute *srp_class_attrs[]; + +ATTRIBUTE_GROUPS(srp_class); + +static struct class srp_class = { + .name = "infiniband_srp", + .dev_groups = srp_class_groups, + .dev_release = srp_release_dev +}; + +/** + * srp_conn_unique() - check whether the connection to a target is unique + * @host: SRP host. + * @target: SRP target port. + */ +static bool srp_conn_unique(struct srp_host *host, + struct srp_target_port *target) +{ + struct srp_target_port *t; + bool ret = false; + + if (target->state == SRP_TARGET_REMOVED) + goto out; + + ret = true; + + spin_lock(&host->target_lock); + list_for_each_entry(t, &host->target_list, list) { + if (t != target && + target->id_ext == t->id_ext && + target->ioc_guid == t->ioc_guid && + target->initiator_ext == t->initiator_ext) { + ret = false; + break; + } + } + spin_unlock(&host->target_lock); + +out: + return ret; +} + +/* + * Target ports are added by writing + * + * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>, + * pkey=<P_Key>,service_id=<service ID> + * or + * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>, + * [src=<IPv4 address>,]dest=<IPv4 address>:<port number> + * + * to the add_target sysfs attribute. + */ +enum { + SRP_OPT_ERR = 0, + SRP_OPT_ID_EXT = 1 << 0, + SRP_OPT_IOC_GUID = 1 << 1, + SRP_OPT_DGID = 1 << 2, + SRP_OPT_PKEY = 1 << 3, + SRP_OPT_SERVICE_ID = 1 << 4, + SRP_OPT_MAX_SECT = 1 << 5, + SRP_OPT_MAX_CMD_PER_LUN = 1 << 6, + SRP_OPT_IO_CLASS = 1 << 7, + SRP_OPT_INITIATOR_EXT = 1 << 8, + SRP_OPT_CMD_SG_ENTRIES = 1 << 9, + SRP_OPT_ALLOW_EXT_SG = 1 << 10, + SRP_OPT_SG_TABLESIZE = 1 << 11, + SRP_OPT_COMP_VECTOR = 1 << 12, + SRP_OPT_TL_RETRY_COUNT = 1 << 13, + SRP_OPT_QUEUE_SIZE = 1 << 14, + SRP_OPT_IP_SRC = 1 << 15, + SRP_OPT_IP_DEST = 1 << 16, + SRP_OPT_TARGET_CAN_QUEUE= 1 << 17, + SRP_OPT_MAX_IT_IU_SIZE = 1 << 18, + SRP_OPT_CH_COUNT = 1 << 19, +}; + +static unsigned int srp_opt_mandatory[] = { + SRP_OPT_ID_EXT | + SRP_OPT_IOC_GUID | + SRP_OPT_DGID | + SRP_OPT_PKEY | + SRP_OPT_SERVICE_ID, + SRP_OPT_ID_EXT | + SRP_OPT_IOC_GUID | + SRP_OPT_IP_DEST, +}; + +static const match_table_t srp_opt_tokens = { + { SRP_OPT_ID_EXT, "id_ext=%s" }, + { SRP_OPT_IOC_GUID, "ioc_guid=%s" }, + { SRP_OPT_DGID, "dgid=%s" }, + { SRP_OPT_PKEY, "pkey=%x" }, + { SRP_OPT_SERVICE_ID, "service_id=%s" }, + { SRP_OPT_MAX_SECT, "max_sect=%d" }, + { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" }, + { SRP_OPT_TARGET_CAN_QUEUE, "target_can_queue=%d" }, + { SRP_OPT_IO_CLASS, "io_class=%x" }, + { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" }, + { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" }, + { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" }, + { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" }, + { SRP_OPT_COMP_VECTOR, "comp_vector=%u" }, + { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" }, + { SRP_OPT_QUEUE_SIZE, "queue_size=%d" }, + { SRP_OPT_IP_SRC, "src=%s" }, + { SRP_OPT_IP_DEST, "dest=%s" }, + { SRP_OPT_MAX_IT_IU_SIZE, "max_it_iu_size=%d" }, + { SRP_OPT_CH_COUNT, "ch_count=%u", }, + { SRP_OPT_ERR, NULL } +}; + +/** + * srp_parse_in - parse an IP address and port number combination + * @net: [in] Network namespace. + * @sa: [out] Address family, IP address and port number. + * @addr_port_str: [in] IP address and port number. + * @has_port: [out] Whether or not @addr_port_str includes a port number. + * + * Parse the following address formats: + * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5. + * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5. + */ +static int srp_parse_in(struct net *net, struct sockaddr_storage *sa, + const char *addr_port_str, bool *has_port) +{ + char *addr_end, *addr = kstrdup(addr_port_str, GFP_KERNEL); + char *port_str; + int ret; + + if (!addr) + return -ENOMEM; + port_str = strrchr(addr, ':'); + if (port_str && strchr(port_str, ']')) + port_str = NULL; + if (port_str) + *port_str++ = '\0'; + if (has_port) + *has_port = port_str != NULL; + ret = inet_pton_with_scope(net, AF_INET, addr, port_str, sa); + if (ret && addr[0]) { + addr_end = addr + strlen(addr) - 1; + if (addr[0] == '[' && *addr_end == ']') { + *addr_end = '\0'; + ret = inet_pton_with_scope(net, AF_INET6, addr + 1, + port_str, sa); + } + } + kfree(addr); + pr_debug("%s -> %pISpfsc\n", addr_port_str, sa); + return ret; +} + +static int srp_parse_options(struct net *net, const char *buf, + struct srp_target_port *target) +{ + char *options, *sep_opt; + char *p; + substring_t args[MAX_OPT_ARGS]; + unsigned long long ull; + bool has_port; + int opt_mask = 0; + int token; + int ret = -EINVAL; + int i; + + options = kstrdup(buf, GFP_KERNEL); + if (!options) + return -ENOMEM; + + sep_opt = options; + while ((p = strsep(&sep_opt, ",\n")) != NULL) { + if (!*p) + continue; + + token = match_token(p, srp_opt_tokens, args); + opt_mask |= token; + + switch (token) { + case SRP_OPT_ID_EXT: + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + ret = kstrtoull(p, 16, &ull); + if (ret) { + pr_warn("invalid id_ext parameter '%s'\n", p); + kfree(p); + goto out; + } + target->id_ext = cpu_to_be64(ull); + kfree(p); + break; + + case SRP_OPT_IOC_GUID: + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + ret = kstrtoull(p, 16, &ull); + if (ret) { + pr_warn("invalid ioc_guid parameter '%s'\n", p); + kfree(p); + goto out; + } + target->ioc_guid = cpu_to_be64(ull); + kfree(p); + break; + + case SRP_OPT_DGID: + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + if (strlen(p) != 32) { + pr_warn("bad dest GID parameter '%s'\n", p); + kfree(p); + goto out; + } + + ret = hex2bin(target->ib_cm.orig_dgid.raw, p, 16); + kfree(p); + if (ret < 0) + goto out; + break; + + case SRP_OPT_PKEY: + ret = match_hex(args, &token); + if (ret) { + pr_warn("bad P_Key parameter '%s'\n", p); + goto out; + } + target->ib_cm.pkey = cpu_to_be16(token); + break; + + case SRP_OPT_SERVICE_ID: + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + ret = kstrtoull(p, 16, &ull); + if (ret) { + pr_warn("bad service_id parameter '%s'\n", p); + kfree(p); + goto out; + } + target->ib_cm.service_id = cpu_to_be64(ull); + kfree(p); + break; + + case SRP_OPT_IP_SRC: + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + ret = srp_parse_in(net, &target->rdma_cm.src.ss, p, + NULL); + if (ret < 0) { + pr_warn("bad source parameter '%s'\n", p); + kfree(p); + goto out; + } + target->rdma_cm.src_specified = true; + kfree(p); + break; + + case SRP_OPT_IP_DEST: + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p, + &has_port); + if (!has_port) + ret = -EINVAL; + if (ret < 0) { + pr_warn("bad dest parameter '%s'\n", p); + kfree(p); + goto out; + } + target->using_rdma_cm = true; + kfree(p); + break; + + case SRP_OPT_MAX_SECT: + ret = match_int(args, &token); + if (ret) { + pr_warn("bad max sect parameter '%s'\n", p); + goto out; + } + target->scsi_host->max_sectors = token; + break; + + case SRP_OPT_QUEUE_SIZE: + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for queue_size parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 1) { + pr_warn("bad queue_size parameter '%s'\n", p); + ret = -EINVAL; + goto out; + } + target->scsi_host->can_queue = token; + target->queue_size = token + SRP_RSP_SQ_SIZE + + SRP_TSK_MGMT_SQ_SIZE; + if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) + target->scsi_host->cmd_per_lun = token; + break; + + case SRP_OPT_MAX_CMD_PER_LUN: + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for max cmd_per_lun parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 1) { + pr_warn("bad max cmd_per_lun parameter '%s'\n", + p); + ret = -EINVAL; + goto out; + } + target->scsi_host->cmd_per_lun = token; + break; + + case SRP_OPT_TARGET_CAN_QUEUE: + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for max target_can_queue parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 1) { + pr_warn("bad max target_can_queue parameter '%s'\n", + p); + ret = -EINVAL; + goto out; + } + target->target_can_queue = token; + break; + + case SRP_OPT_IO_CLASS: + ret = match_hex(args, &token); + if (ret) { + pr_warn("bad IO class parameter '%s'\n", p); + goto out; + } + if (token != SRP_REV10_IB_IO_CLASS && + token != SRP_REV16A_IB_IO_CLASS) { + pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n", + token, SRP_REV10_IB_IO_CLASS, + SRP_REV16A_IB_IO_CLASS); + ret = -EINVAL; + goto out; + } + target->io_class = token; + break; + + case SRP_OPT_INITIATOR_EXT: + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + ret = kstrtoull(p, 16, &ull); + if (ret) { + pr_warn("bad initiator_ext value '%s'\n", p); + kfree(p); + goto out; + } + target->initiator_ext = cpu_to_be64(ull); + kfree(p); + break; + + case SRP_OPT_CMD_SG_ENTRIES: + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for max cmd_sg_entries parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 1 || token > 255) { + pr_warn("bad max cmd_sg_entries parameter '%s'\n", + p); + ret = -EINVAL; + goto out; + } + target->cmd_sg_cnt = token; + break; + + case SRP_OPT_ALLOW_EXT_SG: + ret = match_int(args, &token); + if (ret) { + pr_warn("bad allow_ext_sg parameter '%s'\n", p); + goto out; + } + target->allow_ext_sg = !!token; + break; + + case SRP_OPT_SG_TABLESIZE: + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for max sg_tablesize parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 1 || token > SG_MAX_SEGMENTS) { + pr_warn("bad max sg_tablesize parameter '%s'\n", + p); + ret = -EINVAL; + goto out; + } + target->sg_tablesize = token; + break; + + case SRP_OPT_COMP_VECTOR: + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for comp_vector parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 0) { + pr_warn("bad comp_vector parameter '%s'\n", p); + ret = -EINVAL; + goto out; + } + target->comp_vector = token; + break; + + case SRP_OPT_TL_RETRY_COUNT: + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for tl_retry_count parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 2 || token > 7) { + pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n", + p); + ret = -EINVAL; + goto out; + } + target->tl_retry_count = token; + break; + + case SRP_OPT_MAX_IT_IU_SIZE: + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for max it_iu_size parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 0) { + pr_warn("bad maximum initiator to target IU size '%s'\n", p); + ret = -EINVAL; + goto out; + } + target->max_it_iu_size = token; + break; + + case SRP_OPT_CH_COUNT: + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for channel count parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 1) { + pr_warn("bad channel count %s\n", p); + ret = -EINVAL; + goto out; + } + target->ch_count = token; + break; + + default: + pr_warn("unknown parameter or missing value '%s' in target creation request\n", + p); + ret = -EINVAL; + goto out; + } + } + + for (i = 0; i < ARRAY_SIZE(srp_opt_mandatory); i++) { + if ((opt_mask & srp_opt_mandatory[i]) == srp_opt_mandatory[i]) { + ret = 0; + break; + } + } + if (ret) + pr_warn("target creation request is missing one or more parameters\n"); + + if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue + && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) + pr_warn("cmd_per_lun = %d > queue_size = %d\n", + target->scsi_host->cmd_per_lun, + target->scsi_host->can_queue); + +out: + kfree(options); + return ret; +} + +static ssize_t add_target_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct srp_host *host = + container_of(dev, struct srp_host, dev); + struct Scsi_Host *target_host; + struct srp_target_port *target; + struct srp_rdma_ch *ch; + struct srp_device *srp_dev = host->srp_dev; + struct ib_device *ibdev = srp_dev->dev; + int ret, i, ch_idx; + unsigned int max_sectors_per_mr, mr_per_cmd = 0; + bool multich = false; + uint32_t max_iu_len; + + target_host = scsi_host_alloc(&srp_template, + sizeof (struct srp_target_port)); + if (!target_host) + return -ENOMEM; + + target_host->transportt = ib_srp_transport_template; + target_host->max_channel = 0; + target_host->max_id = 1; + target_host->max_lun = -1LL; + target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; + target_host->max_segment_size = ib_dma_max_seg_size(ibdev); + + if (!(ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)) + target_host->virt_boundary_mask = ~srp_dev->mr_page_mask; + + target = host_to_target(target_host); + + target->net = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); + target->io_class = SRP_REV16A_IB_IO_CLASS; + target->scsi_host = target_host; + target->srp_host = host; + target->lkey = host->srp_dev->pd->local_dma_lkey; + target->global_rkey = host->srp_dev->global_rkey; + target->cmd_sg_cnt = cmd_sg_entries; + target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries; + target->allow_ext_sg = allow_ext_sg; + target->tl_retry_count = 7; + target->queue_size = SRP_DEFAULT_QUEUE_SIZE; + + /* + * Avoid that the SCSI host can be removed by srp_remove_target() + * before this function returns. + */ + scsi_host_get(target->scsi_host); + + ret = mutex_lock_interruptible(&host->add_target_mutex); + if (ret < 0) + goto put; + + ret = srp_parse_options(target->net, buf, target); + if (ret) + goto out; + + if (!srp_conn_unique(target->srp_host, target)) { + if (target->using_rdma_cm) { + shost_printk(KERN_INFO, target->scsi_host, + PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%pIS\n", + be64_to_cpu(target->id_ext), + be64_to_cpu(target->ioc_guid), + &target->rdma_cm.dst); + } else { + shost_printk(KERN_INFO, target->scsi_host, + PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", + be64_to_cpu(target->id_ext), + be64_to_cpu(target->ioc_guid), + be64_to_cpu(target->initiator_ext)); + } + ret = -EEXIST; + goto out; + } + + if (!srp_dev->has_fr && !target->allow_ext_sg && + target->cmd_sg_cnt < target->sg_tablesize) { + pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); + target->sg_tablesize = target->cmd_sg_cnt; + } + + if (srp_dev->use_fast_reg) { + bool gaps_reg = ibdev->attrs.kernel_cap_flags & + IBK_SG_GAPS_REG; + + max_sectors_per_mr = srp_dev->max_pages_per_mr << + (ilog2(srp_dev->mr_page_size) - 9); + if (!gaps_reg) { + /* + * FR can only map one HCA page per entry. If the start + * address is not aligned on a HCA page boundary two + * entries will be used for the head and the tail + * although these two entries combined contain at most + * one HCA page of data. Hence the "+ 1" in the + * calculation below. + * + * The indirect data buffer descriptor is contiguous + * so the memory for that buffer will only be + * registered if register_always is true. Hence add + * one to mr_per_cmd if register_always has been set. + */ + mr_per_cmd = register_always + + (target->scsi_host->max_sectors + 1 + + max_sectors_per_mr - 1) / max_sectors_per_mr; + } else { + mr_per_cmd = register_always + + (target->sg_tablesize + + srp_dev->max_pages_per_mr - 1) / + srp_dev->max_pages_per_mr; + } + pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n", + target->scsi_host->max_sectors, srp_dev->max_pages_per_mr, srp_dev->mr_page_size, + max_sectors_per_mr, mr_per_cmd); + } + + target_host->sg_tablesize = target->sg_tablesize; + target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd; + target->mr_per_cmd = mr_per_cmd; + target->indirect_size = target->sg_tablesize * + sizeof (struct srp_direct_buf); + max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, + srp_use_imm_data, + target->max_it_iu_size); + + INIT_WORK(&target->tl_err_work, srp_tl_err_work); + INIT_WORK(&target->remove_work, srp_remove_work); + spin_lock_init(&target->lock); + ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid); + if (ret) + goto out; + + ret = -ENOMEM; + if (target->ch_count == 0) { + target->ch_count = + min(ch_count ?: + max(4 * num_online_nodes(), + ibdev->num_comp_vectors), + num_online_cpus()); + } + + target->ch = kcalloc(target->ch_count, sizeof(*target->ch), + GFP_KERNEL); + if (!target->ch) + goto out; + + for (ch_idx = 0; ch_idx < target->ch_count; ++ch_idx) { + ch = &target->ch[ch_idx]; + ch->target = target; + ch->comp_vector = ch_idx % ibdev->num_comp_vectors; + spin_lock_init(&ch->lock); + INIT_LIST_HEAD(&ch->free_tx); + ret = srp_new_cm_id(ch); + if (ret) + goto err_disconnect; + + ret = srp_create_ch_ib(ch); + if (ret) + goto err_disconnect; + + ret = srp_connect_ch(ch, max_iu_len, multich); + if (ret) { + char dst[64]; + + if (target->using_rdma_cm) + snprintf(dst, sizeof(dst), "%pIS", + &target->rdma_cm.dst); + else + snprintf(dst, sizeof(dst), "%pI6", + target->ib_cm.orig_dgid.raw); + shost_printk(KERN_ERR, target->scsi_host, + PFX "Connection %d/%d to %s failed\n", + ch_idx, + target->ch_count, dst); + if (ch_idx == 0) { + goto free_ch; + } else { + srp_free_ch_ib(target, ch); + target->ch_count = ch - target->ch; + goto connected; + } + } + multich = true; + } + +connected: + target->scsi_host->nr_hw_queues = target->ch_count; + + ret = srp_add_target(host, target); + if (ret) + goto err_disconnect; + + if (target->state != SRP_TARGET_REMOVED) { + if (target->using_rdma_cm) { + shost_printk(KERN_DEBUG, target->scsi_host, PFX + "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %pIS\n", + be64_to_cpu(target->id_ext), + be64_to_cpu(target->ioc_guid), + target->sgid.raw, &target->rdma_cm.dst); + } else { + shost_printk(KERN_DEBUG, target->scsi_host, PFX + "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n", + be64_to_cpu(target->id_ext), + be64_to_cpu(target->ioc_guid), + be16_to_cpu(target->ib_cm.pkey), + be64_to_cpu(target->ib_cm.service_id), + target->sgid.raw, + target->ib_cm.orig_dgid.raw); + } + } + + ret = count; + +out: + mutex_unlock(&host->add_target_mutex); + +put: + scsi_host_put(target->scsi_host); + if (ret < 0) { + /* + * If a call to srp_remove_target() has not been scheduled, + * drop the network namespace reference now that was obtained + * earlier in this function. + */ + if (target->state != SRP_TARGET_REMOVED) + kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net); + scsi_host_put(target->scsi_host); + } + + return ret; + +err_disconnect: + srp_disconnect_target(target); + +free_ch: + for (i = 0; i < target->ch_count; i++) { + ch = &target->ch[i]; + srp_free_ch_ib(target, ch); + } + + kfree(target->ch); + goto out; +} + +static DEVICE_ATTR_WO(add_target); + +static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct srp_host *host = container_of(dev, struct srp_host, dev); + + return sysfs_emit(buf, "%s\n", dev_name(&host->srp_dev->dev->dev)); +} + +static DEVICE_ATTR_RO(ibdev); + +static ssize_t port_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct srp_host *host = container_of(dev, struct srp_host, dev); + + return sysfs_emit(buf, "%u\n", host->port); +} + +static DEVICE_ATTR_RO(port); + +static struct attribute *srp_class_attrs[] = { + &dev_attr_add_target.attr, + &dev_attr_ibdev.attr, + &dev_attr_port.attr, + NULL +}; + +static struct srp_host *srp_add_port(struct srp_device *device, u32 port) +{ + struct srp_host *host; + + host = kzalloc(sizeof *host, GFP_KERNEL); + if (!host) + return NULL; + + INIT_LIST_HEAD(&host->target_list); + spin_lock_init(&host->target_lock); + mutex_init(&host->add_target_mutex); + host->srp_dev = device; + host->port = port; + + device_initialize(&host->dev); + host->dev.class = &srp_class; + host->dev.parent = device->dev->dev.parent; + if (dev_set_name(&host->dev, "srp-%s-%u", dev_name(&device->dev->dev), + port)) + goto put_host; + if (device_add(&host->dev)) + goto put_host; + + return host; + +put_host: + device_del(&host->dev); + put_device(&host->dev); + return NULL; +} + +static void srp_rename_dev(struct ib_device *device, void *client_data) +{ + struct srp_device *srp_dev = client_data; + struct srp_host *host, *tmp_host; + + list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { + char name[IB_DEVICE_NAME_MAX + 8]; + + snprintf(name, sizeof(name), "srp-%s-%u", + dev_name(&device->dev), host->port); + device_rename(&host->dev, name); + } +} + +static int srp_add_one(struct ib_device *device) +{ + struct srp_device *srp_dev; + struct ib_device_attr *attr = &device->attrs; + struct srp_host *host; + int mr_page_shift; + u32 p; + u64 max_pages_per_mr; + unsigned int flags = 0; + + srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL); + if (!srp_dev) + return -ENOMEM; + + /* + * Use the smallest page size supported by the HCA, down to a + * minimum of 4096 bytes. We're unlikely to build large sglists + * out of smaller entries. + */ + mr_page_shift = max(12, ffs(attr->page_size_cap) - 1); + srp_dev->mr_page_size = 1 << mr_page_shift; + srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1); + max_pages_per_mr = attr->max_mr_size; + do_div(max_pages_per_mr, srp_dev->mr_page_size); + pr_debug("%s: %llu / %u = %llu <> %u\n", __func__, + attr->max_mr_size, srp_dev->mr_page_size, + max_pages_per_mr, SRP_MAX_PAGES_PER_MR); + srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, + max_pages_per_mr); + + srp_dev->has_fr = (attr->device_cap_flags & + IB_DEVICE_MEM_MGT_EXTENSIONS); + if (!never_register && !srp_dev->has_fr) + dev_warn(&device->dev, "FR is not supported\n"); + else if (!never_register && + attr->max_mr_size >= 2 * srp_dev->mr_page_size) + srp_dev->use_fast_reg = srp_dev->has_fr; + + if (never_register || !register_always || !srp_dev->has_fr) + flags |= IB_PD_UNSAFE_GLOBAL_RKEY; + + if (srp_dev->use_fast_reg) { + srp_dev->max_pages_per_mr = + min_t(u32, srp_dev->max_pages_per_mr, + attr->max_fast_reg_page_list_len); + } + srp_dev->mr_max_size = srp_dev->mr_page_size * + srp_dev->max_pages_per_mr; + pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", + dev_name(&device->dev), mr_page_shift, attr->max_mr_size, + attr->max_fast_reg_page_list_len, + srp_dev->max_pages_per_mr, srp_dev->mr_max_size); + + INIT_LIST_HEAD(&srp_dev->dev_list); + + srp_dev->dev = device; + srp_dev->pd = ib_alloc_pd(device, flags); + if (IS_ERR(srp_dev->pd)) { + int ret = PTR_ERR(srp_dev->pd); + + kfree(srp_dev); + return ret; + } + + if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) { + srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey; + WARN_ON_ONCE(srp_dev->global_rkey == 0); + } + + rdma_for_each_port (device, p) { + host = srp_add_port(srp_dev, p); + if (host) + list_add_tail(&host->list, &srp_dev->dev_list); + } + + ib_set_client_data(device, &srp_client, srp_dev); + return 0; +} + +static void srp_remove_one(struct ib_device *device, void *client_data) +{ + struct srp_device *srp_dev; + struct srp_host *host, *tmp_host; + struct srp_target_port *target; + + srp_dev = client_data; + + list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { + /* + * Remove the add_target sysfs entry so that no new target ports + * can be created. + */ + device_del(&host->dev); + + /* + * Remove all target ports. + */ + spin_lock(&host->target_lock); + list_for_each_entry(target, &host->target_list, list) + srp_queue_remove_work(target); + spin_unlock(&host->target_lock); + + /* + * srp_queue_remove_work() queues a call to + * srp_remove_target(). The latter function cancels + * target->tl_err_work so waiting for the remove works to + * finish is sufficient. + */ + flush_workqueue(srp_remove_wq); + + put_device(&host->dev); + } + + ib_dealloc_pd(srp_dev->pd); + + kfree(srp_dev); +} + +static struct srp_function_template ib_srp_transport_functions = { + .has_rport_state = true, + .reset_timer_if_blocked = true, + .reconnect_delay = &srp_reconnect_delay, + .fast_io_fail_tmo = &srp_fast_io_fail_tmo, + .dev_loss_tmo = &srp_dev_loss_tmo, + .reconnect = srp_rport_reconnect, + .rport_delete = srp_rport_delete, + .terminate_rport_io = srp_terminate_io, +}; + +static int __init srp_init_module(void) +{ + int ret; + + BUILD_BUG_ON(sizeof(struct srp_aer_req) != 36); + BUILD_BUG_ON(sizeof(struct srp_cmd) != 48); + BUILD_BUG_ON(sizeof(struct srp_imm_buf) != 4); + BUILD_BUG_ON(sizeof(struct srp_indirect_buf) != 20); + BUILD_BUG_ON(sizeof(struct srp_login_req) != 64); + BUILD_BUG_ON(sizeof(struct srp_login_req_rdma) != 56); + BUILD_BUG_ON(sizeof(struct srp_rsp) != 36); + + if (srp_sg_tablesize) { + pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); + if (!cmd_sg_entries) + cmd_sg_entries = srp_sg_tablesize; + } + + if (!cmd_sg_entries) + cmd_sg_entries = SRP_DEF_SG_TABLESIZE; + + if (cmd_sg_entries > 255) { + pr_warn("Clamping cmd_sg_entries to 255\n"); + cmd_sg_entries = 255; + } + + if (!indirect_sg_entries) + indirect_sg_entries = cmd_sg_entries; + else if (indirect_sg_entries < cmd_sg_entries) { + pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n", + cmd_sg_entries); + indirect_sg_entries = cmd_sg_entries; + } + + if (indirect_sg_entries > SG_MAX_SEGMENTS) { + pr_warn("Clamping indirect_sg_entries to %u\n", + SG_MAX_SEGMENTS); + indirect_sg_entries = SG_MAX_SEGMENTS; + } + + srp_remove_wq = create_workqueue("srp_remove"); + if (!srp_remove_wq) { + ret = -ENOMEM; + goto out; + } + + ret = -ENOMEM; + ib_srp_transport_template = + srp_attach_transport(&ib_srp_transport_functions); + if (!ib_srp_transport_template) + goto destroy_wq; + + ret = class_register(&srp_class); + if (ret) { + pr_err("couldn't register class infiniband_srp\n"); + goto release_tr; + } + + ib_sa_register_client(&srp_sa_client); + + ret = ib_register_client(&srp_client); + if (ret) { + pr_err("couldn't register IB client\n"); + goto unreg_sa; + } + +out: + return ret; + +unreg_sa: + ib_sa_unregister_client(&srp_sa_client); + class_unregister(&srp_class); + +release_tr: + srp_release_transport(ib_srp_transport_template); + +destroy_wq: + destroy_workqueue(srp_remove_wq); + goto out; +} + +static void __exit srp_cleanup_module(void) +{ + ib_unregister_client(&srp_client); + ib_sa_unregister_client(&srp_sa_client); + class_unregister(&srp_class); + srp_release_transport(ib_srp_transport_template); + destroy_workqueue(srp_remove_wq); +} + +module_init(srp_init_module); +module_exit(srp_cleanup_module); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h new file mode 100644 index 000000000..5d94db453 --- /dev/null +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IB_SRP_H +#define IB_SRP_H + +#include <linux/types.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/scatterlist.h> + +#include <scsi/scsi_host.h> +#include <scsi/scsi_cmnd.h> + +#include <rdma/ib_verbs.h> +#include <rdma/ib_sa.h> +#include <rdma/ib_cm.h> +#include <rdma/rdma_cm.h> + +enum { + SRP_PATH_REC_TIMEOUT_MS = 1000, + SRP_ABORT_TIMEOUT_MS = 5000, + + SRP_PORT_REDIRECT = 1, + SRP_DLID_REDIRECT = 2, + SRP_STALE_CONN = 3, + + SRP_DEF_SG_TABLESIZE = 12, + + SRP_DEFAULT_QUEUE_SIZE = 1 << 6, + SRP_RSP_SQ_SIZE = 1, + SRP_TSK_MGMT_SQ_SIZE = 1, + SRP_DEFAULT_CMD_SQ_SIZE = SRP_DEFAULT_QUEUE_SIZE - SRP_RSP_SQ_SIZE - + SRP_TSK_MGMT_SQ_SIZE, + + SRP_MAX_PAGES_PER_MR = 512, + + SRP_MAX_ADD_CDB_LEN = 16, + + SRP_MAX_IMM_SGE = 2, + SRP_MAX_SGE = SRP_MAX_IMM_SGE + 1, + /* + * Choose the immediate data offset such that a 32 byte CDB still fits. + */ + SRP_IMM_DATA_OFFSET = sizeof(struct srp_cmd) + + SRP_MAX_ADD_CDB_LEN + + sizeof(struct srp_imm_buf), +}; + +enum { + SRP_TAG_NO_REQ = ~0U, + SRP_TAG_TSK_MGMT = BIT(31), +}; + +enum srp_target_state { + SRP_TARGET_SCANNING, + SRP_TARGET_LIVE, + SRP_TARGET_REMOVED, +}; + +enum srp_iu_type { + SRP_IU_CMD, + SRP_IU_TSK_MGMT, + SRP_IU_RSP, +}; + +/* + * RDMA adapter in the initiator system. + * + * @dev_list: List of RDMA ports associated with this RDMA adapter (srp_host). + * @mr_page_mask: HCA memory registration page mask. + * @mr_page_size: HCA memory registration page size. + * @mr_max_size: Maximum size in bytes of a single FR registration request. + */ +struct srp_device { + struct list_head dev_list; + struct ib_device *dev; + struct ib_pd *pd; + u32 global_rkey; + u64 mr_page_mask; + int mr_page_size; + int mr_max_size; + int max_pages_per_mr; + bool has_fr; + bool use_fast_reg; +}; + +/* + * One port of an RDMA adapter in the initiator system. + * + * @target_list: List of connected target ports (struct srp_target_port). + * @target_lock: Protects @target_list. + */ +struct srp_host { + struct srp_device *srp_dev; + u32 port; + struct device dev; + struct list_head target_list; + spinlock_t target_lock; + struct list_head list; + struct mutex add_target_mutex; +}; + +struct srp_request { + struct scsi_cmnd *scmnd; + struct srp_iu *cmd; + struct srp_fr_desc **fr_list; + struct srp_direct_buf *indirect_desc; + dma_addr_t indirect_dma_addr; + short nmdesc; + struct ib_cqe reg_cqe; +}; + +/** + * struct srp_rdma_ch + * @comp_vector: Completion vector used by this RDMA channel. + * @max_it_iu_len: Maximum initiator-to-target information unit length. + * @max_ti_iu_len: Maximum target-to-initiator information unit length. + */ +struct srp_rdma_ch { + /* These are RW in the hot path, and commonly used together */ + struct list_head free_tx; + spinlock_t lock; + s32 req_lim; + + /* These are read-only in the hot path */ + struct srp_target_port *target ____cacheline_aligned_in_smp; + struct ib_cq *send_cq; + struct ib_cq *recv_cq; + struct ib_qp *qp; + struct srp_fr_pool *fr_pool; + uint32_t max_it_iu_len; + uint32_t max_ti_iu_len; + u8 max_imm_sge; + bool use_imm_data; + + /* Everything above this point is used in the hot path of + * command processing. Try to keep them packed into cachelines. + */ + + struct completion done; + int status; + + union { + struct ib_cm { + struct sa_path_rec path; + struct ib_sa_query *path_query; + int path_query_id; + struct ib_cm_id *cm_id; + } ib_cm; + struct rdma_cm { + struct rdma_cm_id *cm_id; + } rdma_cm; + }; + + struct srp_iu **tx_ring; + struct srp_iu **rx_ring; + int comp_vector; + + u64 tsk_mgmt_tag; + struct completion tsk_mgmt_done; + u8 tsk_mgmt_status; + bool connected; +}; + +/** + * struct srp_target_port - RDMA port in the SRP target system + * @comp_vector: Completion vector used by the first RDMA channel created for + * this target port. + */ +struct srp_target_port { + /* read and written in the hot path */ + spinlock_t lock; + + /* read only in the hot path */ + u32 global_rkey; + struct srp_rdma_ch *ch; + struct net *net; + u32 ch_count; + u32 lkey; + enum srp_target_state state; + uint32_t max_it_iu_size; + unsigned int cmd_sg_cnt; + unsigned int indirect_size; + bool allow_ext_sg; + + /* other member variables */ + union ib_gid sgid; + __be64 id_ext; + __be64 ioc_guid; + __be64 initiator_ext; + u16 io_class; + struct srp_host *srp_host; + struct Scsi_Host *scsi_host; + struct srp_rport *rport; + char target_name[32]; + unsigned int scsi_id; + unsigned int sg_tablesize; + unsigned int target_can_queue; + int mr_pool_size; + int mr_per_cmd; + int queue_size; + int comp_vector; + int tl_retry_count; + + bool using_rdma_cm; + + union { + struct { + __be64 service_id; + union ib_gid orig_dgid; + __be16 pkey; + } ib_cm; + struct { + union { + struct sockaddr_in ip4; + struct sockaddr_in6 ip6; + struct sockaddr sa; + struct sockaddr_storage ss; + } src; + union { + struct sockaddr_in ip4; + struct sockaddr_in6 ip6; + struct sockaddr sa; + struct sockaddr_storage ss; + } dst; + bool src_specified; + } rdma_cm; + }; + + u32 rq_tmo_jiffies; + + int zero_req_lim; + + struct work_struct tl_err_work; + struct work_struct remove_work; + + struct list_head list; + bool qp_in_error; +}; + +struct srp_iu { + struct list_head list; + u64 dma; + void *buf; + size_t size; + enum dma_data_direction direction; + u32 num_sge; + struct ib_sge sge[SRP_MAX_SGE]; + struct ib_cqe cqe; +}; + +/** + * struct srp_fr_desc - fast registration work request arguments + * @entry: Entry in srp_fr_pool.free_list. + * @mr: Memory region. + * @frpl: Fast registration page list. + */ +struct srp_fr_desc { + struct list_head entry; + struct ib_mr *mr; +}; + +/** + * struct srp_fr_pool - pool of fast registration descriptors + * + * An entry is available for allocation if and only if it occurs in @free_list. + * + * @size: Number of descriptors in this pool. + * @max_page_list_len: Maximum fast registration work request page list length. + * @lock: Protects free_list. + * @free_list: List of free descriptors. + * @desc: Fast registration descriptor pool. + */ +struct srp_fr_pool { + int size; + int max_page_list_len; + spinlock_t lock; + struct list_head free_list; + struct srp_fr_desc desc[]; +}; + +/** + * struct srp_map_state - per-request DMA memory mapping state + * @desc: Pointer to the element of the SRP buffer descriptor array + * that is being filled in. + * @pages: Array with DMA addresses of pages being considered for + * memory registration. + * @base_dma_addr: DMA address of the first page that has not yet been mapped. + * @dma_len: Number of bytes that will be registered with the next FR + * memory registration call. + * @total_len: Total number of bytes in the sg-list being mapped. + * @npages: Number of page addresses in the pages[] array. + * @nmdesc: Number of FR memory descriptors used for mapping. + * @ndesc: Number of SRP buffer descriptors that have been filled in. + */ +struct srp_map_state { + union { + struct { + struct srp_fr_desc **next; + struct srp_fr_desc **end; + } fr; + struct { + void **next; + void **end; + } gen; + }; + struct srp_direct_buf *desc; + union { + u64 *pages; + struct scatterlist *sg; + }; + dma_addr_t base_dma_addr; + u32 dma_len; + u32 total_len; + unsigned int npages; + unsigned int nmdesc; + unsigned int ndesc; +}; + +#endif /* IB_SRP_H */ diff --git a/drivers/infiniband/ulp/srpt/Kconfig b/drivers/infiniband/ulp/srpt/Kconfig new file mode 100644 index 000000000..4b5d9b792 --- /dev/null +++ b/drivers/infiniband/ulp/srpt/Kconfig @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0-only +config INFINIBAND_SRPT + tristate "InfiniBand SCSI RDMA Protocol target support" + depends on INFINIBAND && INFINIBAND_ADDR_TRANS && TARGET_CORE + help + + Support for the SCSI RDMA Protocol (SRP) Target driver. The + SRP protocol is a protocol that allows an initiator to access + a block storage device on another host (target) over a network + that supports the RDMA protocol. Currently the RDMA protocol is + supported by InfiniBand and by iWarp network hardware. More + information about the SRP protocol can be found on the website + of the INCITS T10 technical committee (http://www.t10.org/). diff --git a/drivers/infiniband/ulp/srpt/Makefile b/drivers/infiniband/ulp/srpt/Makefile new file mode 100644 index 000000000..2d137928a --- /dev/null +++ b/drivers/infiniband/ulp/srpt/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_INFINIBAND_SRPT) += ib_srpt.o diff --git a/drivers/infiniband/ulp/srpt/ib_dm_mad.h b/drivers/infiniband/ulp/srpt/ib_dm_mad.h new file mode 100644 index 000000000..fb1de1f6f --- /dev/null +++ b/drivers/infiniband/ulp/srpt/ib_dm_mad.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2006 - 2009 Mellanox Technology Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef IB_DM_MAD_H +#define IB_DM_MAD_H + +#include <linux/types.h> + +#include <rdma/ib_mad.h> + +enum { + /* + * See also section 13.4.7 Status Field, table 115 MAD Common Status + * Field Bit Values and also section 16.3.1.1 Status Field in the + * InfiniBand Architecture Specification. + */ + DM_MAD_STATUS_UNSUP_METHOD = 0x0008, + DM_MAD_STATUS_UNSUP_METHOD_ATTR = 0x000c, + DM_MAD_STATUS_INVALID_FIELD = 0x001c, + DM_MAD_STATUS_NO_IOC = 0x0100, + + /* + * See also the Device Management chapter, section 16.3.3 Attributes, + * table 279 Device Management Attributes in the InfiniBand + * Architecture Specification. + */ + DM_ATTR_CLASS_PORT_INFO = 0x01, + DM_ATTR_IOU_INFO = 0x10, + DM_ATTR_IOC_PROFILE = 0x11, + DM_ATTR_SVC_ENTRIES = 0x12 +}; + +struct ib_dm_hdr { + u8 reserved[28]; +}; + +/* + * Structure of management datagram sent by the SRP target implementation. + * Contains a management datagram header, reliable multi-packet transaction + * protocol (RMPP) header and ib_dm_hdr. Notes: + * - The SRP target implementation does not use RMPP or ib_dm_hdr when sending + * management datagrams. + * - The header size must be exactly 64 bytes (IB_MGMT_DEVICE_HDR), since this + * is the header size that is passed to ib_create_send_mad() in ib_srpt.c. + * - The maximum supported size for a management datagram when not using RMPP + * is 256 bytes -- 64 bytes header and 192 (IB_MGMT_DEVICE_DATA) bytes data. + */ +struct ib_dm_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + struct ib_dm_hdr dm_hdr; + u8 data[IB_MGMT_DEVICE_DATA]; +}; + +/* + * IOUnitInfo as defined in section 16.3.3.3 IOUnitInfo of the InfiniBand + * Architecture Specification. + */ +struct ib_dm_iou_info { + __be16 change_id; + u8 max_controllers; + u8 op_rom; + u8 controller_list[128]; +}; + +/* + * IOControllerprofile as defined in section 16.3.3.4 IOControllerProfile of + * the InfiniBand Architecture Specification. + */ +struct ib_dm_ioc_profile { + __be64 guid; + __be32 vendor_id; + __be32 device_id; + __be16 device_version; + __be16 reserved1; + __be32 subsys_vendor_id; + __be32 subsys_device_id; + __be16 io_class; + __be16 io_subclass; + __be16 protocol; + __be16 protocol_version; + __be16 service_conn; + __be16 initiators_supported; + __be16 send_queue_depth; + u8 reserved2; + u8 rdma_read_depth; + __be32 send_size; + __be32 rdma_size; + u8 op_cap_mask; + u8 svc_cap_mask; + u8 num_svc_entries; + u8 reserved3[9]; + u8 id_string[64]; +}; + +struct ib_dm_svc_entry { + u8 name[40]; + __be64 id; +}; + +/* + * See also section 16.3.3.5 ServiceEntries in the InfiniBand Architecture + * Specification. See also section B.7, table B.8 in the T10 SRP r16a document. + */ +struct ib_dm_svc_entries { + struct ib_dm_svc_entry service_entries[4]; +}; + +#endif diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c new file mode 100644 index 000000000..25e799dba --- /dev/null +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -0,0 +1,3958 @@ +/* + * Copyright (c) 2006 - 2009 Mellanox Technology Inc. All rights reserved. + * Copyright (C) 2008 - 2011 Bart Van Assche <bvanassche@acm.org>. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/ctype.h> +#include <linux/kthread.h> +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/atomic.h> +#include <linux/inet.h> +#include <rdma/ib_cache.h> +#include <scsi/scsi_proto.h> +#include <scsi/scsi_tcq.h> +#include <target/target_core_base.h> +#include <target/target_core_fabric.h> +#include "ib_srpt.h" + +/* Name of this kernel module. */ +#define DRV_NAME "ib_srpt" + +#define SRPT_ID_STRING "Linux SRP target" + +#undef pr_fmt +#define pr_fmt(fmt) DRV_NAME " " fmt + +MODULE_AUTHOR("Vu Pham and Bart Van Assche"); +MODULE_DESCRIPTION("SCSI RDMA Protocol target driver"); +MODULE_LICENSE("Dual BSD/GPL"); + +/* + * Global Variables + */ + +static u64 srpt_service_guid; +static DEFINE_SPINLOCK(srpt_dev_lock); /* Protects srpt_dev_list. */ +static LIST_HEAD(srpt_dev_list); /* List of srpt_device structures. */ + +static unsigned srp_max_req_size = DEFAULT_MAX_REQ_SIZE; +module_param(srp_max_req_size, int, 0444); +MODULE_PARM_DESC(srp_max_req_size, + "Maximum size of SRP request messages in bytes."); + +static int srpt_srq_size = DEFAULT_SRPT_SRQ_SIZE; +module_param(srpt_srq_size, int, 0444); +MODULE_PARM_DESC(srpt_srq_size, + "Shared receive queue (SRQ) size."); + +static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp) +{ + return sprintf(buffer, "0x%016llx\n", *(u64 *)kp->arg); +} +module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid, + 0444); +MODULE_PARM_DESC(srpt_service_guid, + "Using this value for ioc_guid, id_ext, and cm_listen_id instead of using the node_guid of the first HCA."); + +static struct ib_client srpt_client; +/* Protects both rdma_cm_port and rdma_cm_id. */ +static DEFINE_MUTEX(rdma_cm_mutex); +/* Port number RDMA/CM will bind to. */ +static u16 rdma_cm_port; +static struct rdma_cm_id *rdma_cm_id; +static void srpt_release_cmd(struct se_cmd *se_cmd); +static void srpt_free_ch(struct kref *kref); +static int srpt_queue_status(struct se_cmd *cmd); +static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc); +static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc); +static void srpt_process_wait_list(struct srpt_rdma_ch *ch); + +/* + * The only allowed channel state changes are those that change the channel + * state into a state with a higher numerical value. Hence the new > prev test. + */ +static bool srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new) +{ + unsigned long flags; + enum rdma_ch_state prev; + bool changed = false; + + spin_lock_irqsave(&ch->spinlock, flags); + prev = ch->state; + if (new > prev) { + ch->state = new; + changed = true; + } + spin_unlock_irqrestore(&ch->spinlock, flags); + + return changed; +} + +/** + * srpt_event_handler - asynchronous IB event callback function + * @handler: IB event handler registered by ib_register_event_handler(). + * @event: Description of the event that occurred. + * + * Callback function called by the InfiniBand core when an asynchronous IB + * event occurs. This callback may occur in interrupt context. See also + * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand + * Architecture Specification. + */ +static void srpt_event_handler(struct ib_event_handler *handler, + struct ib_event *event) +{ + struct srpt_device *sdev = + container_of(handler, struct srpt_device, event_handler); + struct srpt_port *sport; + u8 port_num; + + pr_debug("ASYNC event= %d on device= %s\n", event->event, + dev_name(&sdev->device->dev)); + + switch (event->event) { + case IB_EVENT_PORT_ERR: + port_num = event->element.port_num - 1; + if (port_num < sdev->device->phys_port_cnt) { + sport = &sdev->port[port_num]; + sport->lid = 0; + sport->sm_lid = 0; + } else { + WARN(true, "event %d: port_num %d out of range 1..%d\n", + event->event, port_num + 1, + sdev->device->phys_port_cnt); + } + break; + case IB_EVENT_PORT_ACTIVE: + case IB_EVENT_LID_CHANGE: + case IB_EVENT_PKEY_CHANGE: + case IB_EVENT_SM_CHANGE: + case IB_EVENT_CLIENT_REREGISTER: + case IB_EVENT_GID_CHANGE: + /* Refresh port data asynchronously. */ + port_num = event->element.port_num - 1; + if (port_num < sdev->device->phys_port_cnt) { + sport = &sdev->port[port_num]; + if (!sport->lid && !sport->sm_lid) + schedule_work(&sport->work); + } else { + WARN(true, "event %d: port_num %d out of range 1..%d\n", + event->event, port_num + 1, + sdev->device->phys_port_cnt); + } + break; + default: + pr_err("received unrecognized IB event %d\n", event->event); + break; + } +} + +/** + * srpt_srq_event - SRQ event callback function + * @event: Description of the event that occurred. + * @ctx: Context pointer specified at SRQ creation time. + */ +static void srpt_srq_event(struct ib_event *event, void *ctx) +{ + pr_debug("SRQ event %d\n", event->event); +} + +static const char *get_ch_state_name(enum rdma_ch_state s) +{ + switch (s) { + case CH_CONNECTING: + return "connecting"; + case CH_LIVE: + return "live"; + case CH_DISCONNECTING: + return "disconnecting"; + case CH_DRAINING: + return "draining"; + case CH_DISCONNECTED: + return "disconnected"; + } + return "???"; +} + +/** + * srpt_qp_event - QP event callback function + * @event: Description of the event that occurred. + * @ch: SRPT RDMA channel. + */ +static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) +{ + pr_debug("QP event %d on ch=%p sess_name=%s-%d state=%s\n", + event->event, ch, ch->sess_name, ch->qp->qp_num, + get_ch_state_name(ch->state)); + + switch (event->event) { + case IB_EVENT_COMM_EST: + if (ch->using_rdma_cm) + rdma_notify(ch->rdma_cm.cm_id, event->event); + else + ib_cm_notify(ch->ib_cm.cm_id, event->event); + break; + case IB_EVENT_QP_LAST_WQE_REACHED: + pr_debug("%s-%d, state %s: received Last WQE event.\n", + ch->sess_name, ch->qp->qp_num, + get_ch_state_name(ch->state)); + break; + default: + pr_err("received unrecognized IB QP event %d\n", event->event); + break; + } +} + +/** + * srpt_set_ioc - initialize a IOUnitInfo structure + * @c_list: controller list. + * @slot: one-based slot number. + * @value: four-bit value. + * + * Copies the lowest four bits of value in element slot of the array of four + * bit elements called c_list (controller list). The index slot is one-based. + */ +static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value) +{ + u16 id; + u8 tmp; + + id = (slot - 1) / 2; + if (slot & 0x1) { + tmp = c_list[id] & 0xf; + c_list[id] = (value << 4) | tmp; + } else { + tmp = c_list[id] & 0xf0; + c_list[id] = (value & 0xf) | tmp; + } +} + +/** + * srpt_get_class_port_info - copy ClassPortInfo to a management datagram + * @mad: Datagram that will be sent as response to DM_ATTR_CLASS_PORT_INFO. + * + * See also section 16.3.3.1 ClassPortInfo in the InfiniBand Architecture + * Specification. + */ +static void srpt_get_class_port_info(struct ib_dm_mad *mad) +{ + struct ib_class_port_info *cif; + + cif = (struct ib_class_port_info *)mad->data; + memset(cif, 0, sizeof(*cif)); + cif->base_version = 1; + cif->class_version = 1; + + ib_set_cpi_resp_time(cif, 20); + mad->mad_hdr.status = 0; +} + +/** + * srpt_get_iou - write IOUnitInfo to a management datagram + * @mad: Datagram that will be sent as response to DM_ATTR_IOU_INFO. + * + * See also section 16.3.3.3 IOUnitInfo in the InfiniBand Architecture + * Specification. See also section B.7, table B.6 in the SRP r16a document. + */ +static void srpt_get_iou(struct ib_dm_mad *mad) +{ + struct ib_dm_iou_info *ioui; + u8 slot; + int i; + + ioui = (struct ib_dm_iou_info *)mad->data; + ioui->change_id = cpu_to_be16(1); + ioui->max_controllers = 16; + + /* set present for slot 1 and empty for the rest */ + srpt_set_ioc(ioui->controller_list, 1, 1); + for (i = 1, slot = 2; i < 16; i++, slot++) + srpt_set_ioc(ioui->controller_list, slot, 0); + + mad->mad_hdr.status = 0; +} + +/** + * srpt_get_ioc - write IOControllerprofile to a management datagram + * @sport: HCA port through which the MAD has been received. + * @slot: Slot number specified in DM_ATTR_IOC_PROFILE query. + * @mad: Datagram that will be sent as response to DM_ATTR_IOC_PROFILE. + * + * See also section 16.3.3.4 IOControllerProfile in the InfiniBand + * Architecture Specification. See also section B.7, table B.7 in the SRP + * r16a document. + */ +static void srpt_get_ioc(struct srpt_port *sport, u32 slot, + struct ib_dm_mad *mad) +{ + struct srpt_device *sdev = sport->sdev; + struct ib_dm_ioc_profile *iocp; + int send_queue_depth; + + iocp = (struct ib_dm_ioc_profile *)mad->data; + + if (!slot || slot > 16) { + mad->mad_hdr.status + = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); + return; + } + + if (slot > 2) { + mad->mad_hdr.status + = cpu_to_be16(DM_MAD_STATUS_NO_IOC); + return; + } + + if (sdev->use_srq) + send_queue_depth = sdev->srq_size; + else + send_queue_depth = min(MAX_SRPT_RQ_SIZE, + sdev->device->attrs.max_qp_wr); + + memset(iocp, 0, sizeof(*iocp)); + strcpy(iocp->id_string, SRPT_ID_STRING); + iocp->guid = cpu_to_be64(srpt_service_guid); + iocp->vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id); + iocp->device_id = cpu_to_be32(sdev->device->attrs.vendor_part_id); + iocp->device_version = cpu_to_be16(sdev->device->attrs.hw_ver); + iocp->subsys_vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id); + iocp->subsys_device_id = 0x0; + iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS); + iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS); + iocp->protocol = cpu_to_be16(SRP_PROTOCOL); + iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION); + iocp->send_queue_depth = cpu_to_be16(send_queue_depth); + iocp->rdma_read_depth = 4; + iocp->send_size = cpu_to_be32(srp_max_req_size); + iocp->rdma_size = cpu_to_be32(min(sport->port_attrib.srp_max_rdma_size, + 1U << 24)); + iocp->num_svc_entries = 1; + iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC | + SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC; + + mad->mad_hdr.status = 0; +} + +/** + * srpt_get_svc_entries - write ServiceEntries to a management datagram + * @ioc_guid: I/O controller GUID to use in reply. + * @slot: I/O controller number. + * @hi: End of the range of service entries to be specified in the reply. + * @lo: Start of the range of service entries to be specified in the reply.. + * @mad: Datagram that will be sent as response to DM_ATTR_SVC_ENTRIES. + * + * See also section 16.3.3.5 ServiceEntries in the InfiniBand Architecture + * Specification. See also section B.7, table B.8 in the SRP r16a document. + */ +static void srpt_get_svc_entries(u64 ioc_guid, + u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad) +{ + struct ib_dm_svc_entries *svc_entries; + + WARN_ON(!ioc_guid); + + if (!slot || slot > 16) { + mad->mad_hdr.status + = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); + return; + } + + if (slot > 2 || lo > hi || hi > 1) { + mad->mad_hdr.status + = cpu_to_be16(DM_MAD_STATUS_NO_IOC); + return; + } + + svc_entries = (struct ib_dm_svc_entries *)mad->data; + memset(svc_entries, 0, sizeof(*svc_entries)); + svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid); + snprintf(svc_entries->service_entries[0].name, + sizeof(svc_entries->service_entries[0].name), + "%s%016llx", + SRP_SERVICE_NAME_PREFIX, + ioc_guid); + + mad->mad_hdr.status = 0; +} + +/** + * srpt_mgmt_method_get - process a received management datagram + * @sp: HCA port through which the MAD has been received. + * @rq_mad: received MAD. + * @rsp_mad: response MAD. + */ +static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad, + struct ib_dm_mad *rsp_mad) +{ + u16 attr_id; + u32 slot; + u8 hi, lo; + + attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id); + switch (attr_id) { + case DM_ATTR_CLASS_PORT_INFO: + srpt_get_class_port_info(rsp_mad); + break; + case DM_ATTR_IOU_INFO: + srpt_get_iou(rsp_mad); + break; + case DM_ATTR_IOC_PROFILE: + slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod); + srpt_get_ioc(sp, slot, rsp_mad); + break; + case DM_ATTR_SVC_ENTRIES: + slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod); + hi = (u8) ((slot >> 8) & 0xff); + lo = (u8) (slot & 0xff); + slot = (u16) ((slot >> 16) & 0xffff); + srpt_get_svc_entries(srpt_service_guid, + slot, hi, lo, rsp_mad); + break; + default: + rsp_mad->mad_hdr.status = + cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); + break; + } +} + +/** + * srpt_mad_send_handler - MAD send completion callback + * @mad_agent: Return value of ib_register_mad_agent(). + * @mad_wc: Work completion reporting that the MAD has been sent. + */ +static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent, + struct ib_mad_send_wc *mad_wc) +{ + rdma_destroy_ah(mad_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE); + ib_free_send_mad(mad_wc->send_buf); +} + +/** + * srpt_mad_recv_handler - MAD reception callback function + * @mad_agent: Return value of ib_register_mad_agent(). + * @send_buf: Not used. + * @mad_wc: Work completion reporting that a MAD has been received. + */ +static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf, + struct ib_mad_recv_wc *mad_wc) +{ + struct srpt_port *sport = (struct srpt_port *)mad_agent->context; + struct ib_ah *ah; + struct ib_mad_send_buf *rsp; + struct ib_dm_mad *dm_mad; + + if (!mad_wc || !mad_wc->recv_buf.mad) + return; + + ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc, + mad_wc->recv_buf.grh, mad_agent->port_num); + if (IS_ERR(ah)) + goto err; + + BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR); + + rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp, + mad_wc->wc->pkey_index, 0, + IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA, + GFP_KERNEL, + IB_MGMT_BASE_VERSION); + if (IS_ERR(rsp)) + goto err_rsp; + + rsp->ah = ah; + + dm_mad = rsp->mad; + memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof(*dm_mad)); + dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; + dm_mad->mad_hdr.status = 0; + + switch (mad_wc->recv_buf.mad->mad_hdr.method) { + case IB_MGMT_METHOD_GET: + srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad); + break; + case IB_MGMT_METHOD_SET: + dm_mad->mad_hdr.status = + cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); + break; + default: + dm_mad->mad_hdr.status = + cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD); + break; + } + + if (!ib_post_send_mad(rsp, NULL)) { + ib_free_recv_mad(mad_wc); + /* will destroy_ah & free_send_mad in send completion */ + return; + } + + ib_free_send_mad(rsp); + +err_rsp: + rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); +err: + ib_free_recv_mad(mad_wc); +} + +static int srpt_format_guid(char *buf, unsigned int size, const __be64 *guid) +{ + const __be16 *g = (const __be16 *)guid; + + return snprintf(buf, size, "%04x:%04x:%04x:%04x", + be16_to_cpu(g[0]), be16_to_cpu(g[1]), + be16_to_cpu(g[2]), be16_to_cpu(g[3])); +} + +/** + * srpt_refresh_port - configure a HCA port + * @sport: SRPT HCA port. + * + * Enable InfiniBand management datagram processing, update the cached sm_lid, + * lid and gid values, and register a callback function for processing MADs + * on the specified port. + * + * Note: It is safe to call this function more than once for the same port. + */ +static int srpt_refresh_port(struct srpt_port *sport) +{ + struct ib_mad_agent *mad_agent; + struct ib_mad_reg_req reg_req; + struct ib_port_modify port_modify; + struct ib_port_attr port_attr; + int ret; + + ret = ib_query_port(sport->sdev->device, sport->port, &port_attr); + if (ret) + return ret; + + sport->sm_lid = port_attr.sm_lid; + sport->lid = port_attr.lid; + + ret = rdma_query_gid(sport->sdev->device, sport->port, 0, &sport->gid); + if (ret) + return ret; + + srpt_format_guid(sport->guid_name, ARRAY_SIZE(sport->guid_name), + &sport->gid.global.interface_id); + snprintf(sport->gid_name, ARRAY_SIZE(sport->gid_name), + "0x%016llx%016llx", + be64_to_cpu(sport->gid.global.subnet_prefix), + be64_to_cpu(sport->gid.global.interface_id)); + + if (rdma_protocol_iwarp(sport->sdev->device, sport->port)) + return 0; + + memset(&port_modify, 0, sizeof(port_modify)); + port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; + port_modify.clr_port_cap_mask = 0; + + ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify); + if (ret) { + pr_warn("%s-%d: enabling device management failed (%d). Note: this is expected if SR-IOV is enabled.\n", + dev_name(&sport->sdev->device->dev), sport->port, ret); + return 0; + } + + if (!sport->mad_agent) { + memset(®_req, 0, sizeof(reg_req)); + reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT; + reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION; + set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask); + set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask); + + mad_agent = ib_register_mad_agent(sport->sdev->device, + sport->port, + IB_QPT_GSI, + ®_req, 0, + srpt_mad_send_handler, + srpt_mad_recv_handler, + sport, 0); + if (IS_ERR(mad_agent)) { + pr_err("%s-%d: MAD agent registration failed (%ld). Note: this is expected if SR-IOV is enabled.\n", + dev_name(&sport->sdev->device->dev), sport->port, + PTR_ERR(mad_agent)); + sport->mad_agent = NULL; + memset(&port_modify, 0, sizeof(port_modify)); + port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; + ib_modify_port(sport->sdev->device, sport->port, 0, + &port_modify); + return 0; + } + + sport->mad_agent = mad_agent; + } + + return 0; +} + +/** + * srpt_unregister_mad_agent - unregister MAD callback functions + * @sdev: SRPT HCA pointer. + * @port_cnt: number of ports with registered MAD + * + * Note: It is safe to call this function more than once for the same device. + */ +static void srpt_unregister_mad_agent(struct srpt_device *sdev, int port_cnt) +{ + struct ib_port_modify port_modify = { + .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP, + }; + struct srpt_port *sport; + int i; + + for (i = 1; i <= port_cnt; i++) { + sport = &sdev->port[i - 1]; + WARN_ON(sport->port != i); + if (sport->mad_agent) { + ib_modify_port(sdev->device, i, 0, &port_modify); + ib_unregister_mad_agent(sport->mad_agent); + sport->mad_agent = NULL; + } + } +} + +/** + * srpt_alloc_ioctx - allocate a SRPT I/O context structure + * @sdev: SRPT HCA pointer. + * @ioctx_size: I/O context size. + * @buf_cache: I/O buffer cache. + * @dir: DMA data direction. + */ +static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev, + int ioctx_size, + struct kmem_cache *buf_cache, + enum dma_data_direction dir) +{ + struct srpt_ioctx *ioctx; + + ioctx = kzalloc(ioctx_size, GFP_KERNEL); + if (!ioctx) + goto err; + + ioctx->buf = kmem_cache_alloc(buf_cache, GFP_KERNEL); + if (!ioctx->buf) + goto err_free_ioctx; + + ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf, + kmem_cache_size(buf_cache), dir); + if (ib_dma_mapping_error(sdev->device, ioctx->dma)) + goto err_free_buf; + + return ioctx; + +err_free_buf: + kmem_cache_free(buf_cache, ioctx->buf); +err_free_ioctx: + kfree(ioctx); +err: + return NULL; +} + +/** + * srpt_free_ioctx - free a SRPT I/O context structure + * @sdev: SRPT HCA pointer. + * @ioctx: I/O context pointer. + * @buf_cache: I/O buffer cache. + * @dir: DMA data direction. + */ +static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx, + struct kmem_cache *buf_cache, + enum dma_data_direction dir) +{ + if (!ioctx) + return; + + ib_dma_unmap_single(sdev->device, ioctx->dma, + kmem_cache_size(buf_cache), dir); + kmem_cache_free(buf_cache, ioctx->buf); + kfree(ioctx); +} + +/** + * srpt_alloc_ioctx_ring - allocate a ring of SRPT I/O context structures + * @sdev: Device to allocate the I/O context ring for. + * @ring_size: Number of elements in the I/O context ring. + * @ioctx_size: I/O context size. + * @buf_cache: I/O buffer cache. + * @alignment_offset: Offset in each ring buffer at which the SRP information + * unit starts. + * @dir: DMA data direction. + */ +static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev, + int ring_size, int ioctx_size, + struct kmem_cache *buf_cache, + int alignment_offset, + enum dma_data_direction dir) +{ + struct srpt_ioctx **ring; + int i; + + WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx) && + ioctx_size != sizeof(struct srpt_send_ioctx)); + + ring = kvmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL); + if (!ring) + goto out; + for (i = 0; i < ring_size; ++i) { + ring[i] = srpt_alloc_ioctx(sdev, ioctx_size, buf_cache, dir); + if (!ring[i]) + goto err; + ring[i]->index = i; + ring[i]->offset = alignment_offset; + } + goto out; + +err: + while (--i >= 0) + srpt_free_ioctx(sdev, ring[i], buf_cache, dir); + kvfree(ring); + ring = NULL; +out: + return ring; +} + +/** + * srpt_free_ioctx_ring - free the ring of SRPT I/O context structures + * @ioctx_ring: I/O context ring to be freed. + * @sdev: SRPT HCA pointer. + * @ring_size: Number of ring elements. + * @buf_cache: I/O buffer cache. + * @dir: DMA data direction. + */ +static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring, + struct srpt_device *sdev, int ring_size, + struct kmem_cache *buf_cache, + enum dma_data_direction dir) +{ + int i; + + if (!ioctx_ring) + return; + + for (i = 0; i < ring_size; ++i) + srpt_free_ioctx(sdev, ioctx_ring[i], buf_cache, dir); + kvfree(ioctx_ring); +} + +/** + * srpt_set_cmd_state - set the state of a SCSI command + * @ioctx: Send I/O context. + * @new: New I/O context state. + * + * Does not modify the state of aborted commands. Returns the previous command + * state. + */ +static enum srpt_command_state srpt_set_cmd_state(struct srpt_send_ioctx *ioctx, + enum srpt_command_state new) +{ + enum srpt_command_state previous; + + previous = ioctx->state; + if (previous != SRPT_STATE_DONE) + ioctx->state = new; + + return previous; +} + +/** + * srpt_test_and_set_cmd_state - test and set the state of a command + * @ioctx: Send I/O context. + * @old: Current I/O context state. + * @new: New I/O context state. + * + * Returns true if and only if the previous command state was equal to 'old'. + */ +static bool srpt_test_and_set_cmd_state(struct srpt_send_ioctx *ioctx, + enum srpt_command_state old, + enum srpt_command_state new) +{ + enum srpt_command_state previous; + + WARN_ON(!ioctx); + WARN_ON(old == SRPT_STATE_DONE); + WARN_ON(new == SRPT_STATE_NEW); + + previous = ioctx->state; + if (previous == old) + ioctx->state = new; + + return previous == old; +} + +/** + * srpt_post_recv - post an IB receive request + * @sdev: SRPT HCA pointer. + * @ch: SRPT RDMA channel. + * @ioctx: Receive I/O context pointer. + */ +static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, + struct srpt_recv_ioctx *ioctx) +{ + struct ib_sge list; + struct ib_recv_wr wr; + + BUG_ON(!sdev); + list.addr = ioctx->ioctx.dma + ioctx->ioctx.offset; + list.length = srp_max_req_size; + list.lkey = sdev->lkey; + + ioctx->ioctx.cqe.done = srpt_recv_done; + wr.wr_cqe = &ioctx->ioctx.cqe; + wr.next = NULL; + wr.sg_list = &list; + wr.num_sge = 1; + + if (sdev->use_srq) + return ib_post_srq_recv(sdev->srq, &wr, NULL); + else + return ib_post_recv(ch->qp, &wr, NULL); +} + +/** + * srpt_zerolength_write - perform a zero-length RDMA write + * @ch: SRPT RDMA channel. + * + * A quote from the InfiniBand specification: C9-88: For an HCA responder + * using Reliable Connection service, for each zero-length RDMA READ or WRITE + * request, the R_Key shall not be validated, even if the request includes + * Immediate data. + */ +static int srpt_zerolength_write(struct srpt_rdma_ch *ch) +{ + struct ib_rdma_wr wr = { + .wr = { + .next = NULL, + { .wr_cqe = &ch->zw_cqe, }, + .opcode = IB_WR_RDMA_WRITE, + .send_flags = IB_SEND_SIGNALED, + } + }; + + pr_debug("%s-%d: queued zerolength write\n", ch->sess_name, + ch->qp->qp_num); + + return ib_post_send(ch->qp, &wr.wr, NULL); +} + +static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct srpt_rdma_ch *ch = wc->qp->qp_context; + + pr_debug("%s-%d wc->status %d\n", ch->sess_name, ch->qp->qp_num, + wc->status); + + if (wc->status == IB_WC_SUCCESS) { + srpt_process_wait_list(ch); + } else { + if (srpt_set_ch_state(ch, CH_DISCONNECTED)) + schedule_work(&ch->release_work); + else + pr_debug("%s-%d: already disconnected.\n", + ch->sess_name, ch->qp->qp_num); + } +} + +static int srpt_alloc_rw_ctxs(struct srpt_send_ioctx *ioctx, + struct srp_direct_buf *db, int nbufs, struct scatterlist **sg, + unsigned *sg_cnt) +{ + enum dma_data_direction dir = target_reverse_dma_direction(&ioctx->cmd); + struct srpt_rdma_ch *ch = ioctx->ch; + struct scatterlist *prev = NULL; + unsigned prev_nents; + int ret, i; + + if (nbufs == 1) { + ioctx->rw_ctxs = &ioctx->s_rw_ctx; + } else { + ioctx->rw_ctxs = kmalloc_array(nbufs, sizeof(*ioctx->rw_ctxs), + GFP_KERNEL); + if (!ioctx->rw_ctxs) + return -ENOMEM; + } + + for (i = ioctx->n_rw_ctx; i < nbufs; i++, db++) { + struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i]; + u64 remote_addr = be64_to_cpu(db->va); + u32 size = be32_to_cpu(db->len); + u32 rkey = be32_to_cpu(db->key); + + ret = target_alloc_sgl(&ctx->sg, &ctx->nents, size, false, + i < nbufs - 1); + if (ret) + goto unwind; + + ret = rdma_rw_ctx_init(&ctx->rw, ch->qp, ch->sport->port, + ctx->sg, ctx->nents, 0, remote_addr, rkey, dir); + if (ret < 0) { + target_free_sgl(ctx->sg, ctx->nents); + goto unwind; + } + + ioctx->n_rdma += ret; + ioctx->n_rw_ctx++; + + if (prev) { + sg_unmark_end(&prev[prev_nents - 1]); + sg_chain(prev, prev_nents + 1, ctx->sg); + } else { + *sg = ctx->sg; + } + + prev = ctx->sg; + prev_nents = ctx->nents; + + *sg_cnt += ctx->nents; + } + + return 0; + +unwind: + while (--i >= 0) { + struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i]; + + rdma_rw_ctx_destroy(&ctx->rw, ch->qp, ch->sport->port, + ctx->sg, ctx->nents, dir); + target_free_sgl(ctx->sg, ctx->nents); + } + if (ioctx->rw_ctxs != &ioctx->s_rw_ctx) + kfree(ioctx->rw_ctxs); + return ret; +} + +static void srpt_free_rw_ctxs(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx) +{ + enum dma_data_direction dir = target_reverse_dma_direction(&ioctx->cmd); + int i; + + for (i = 0; i < ioctx->n_rw_ctx; i++) { + struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i]; + + rdma_rw_ctx_destroy(&ctx->rw, ch->qp, ch->sport->port, + ctx->sg, ctx->nents, dir); + target_free_sgl(ctx->sg, ctx->nents); + } + + if (ioctx->rw_ctxs != &ioctx->s_rw_ctx) + kfree(ioctx->rw_ctxs); +} + +static inline void *srpt_get_desc_buf(struct srp_cmd *srp_cmd) +{ + /* + * The pointer computations below will only be compiled correctly + * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check + * whether srp_cmd::add_data has been declared as a byte pointer. + */ + BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0) && + !__same_type(srp_cmd->add_data[0], (u8)0)); + + /* + * According to the SRP spec, the lower two bits of the 'ADDITIONAL + * CDB LENGTH' field are reserved and the size in bytes of this field + * is four times the value specified in bits 3..7. Hence the "& ~3". + */ + return srp_cmd->add_data + (srp_cmd->add_cdb_len & ~3); +} + +/** + * srpt_get_desc_tbl - parse the data descriptors of a SRP_CMD request + * @recv_ioctx: I/O context associated with the received command @srp_cmd. + * @ioctx: I/O context that will be used for responding to the initiator. + * @srp_cmd: Pointer to the SRP_CMD request data. + * @dir: Pointer to the variable to which the transfer direction will be + * written. + * @sg: [out] scatterlist for the parsed SRP_CMD. + * @sg_cnt: [out] length of @sg. + * @data_len: Pointer to the variable to which the total data length of all + * descriptors in the SRP_CMD request will be written. + * @imm_data_offset: [in] Offset in SRP_CMD requests at which immediate data + * starts. + * + * This function initializes ioctx->nrbuf and ioctx->r_bufs. + * + * Returns -EINVAL when the SRP_CMD request contains inconsistent descriptors; + * -ENOMEM when memory allocation fails and zero upon success. + */ +static int srpt_get_desc_tbl(struct srpt_recv_ioctx *recv_ioctx, + struct srpt_send_ioctx *ioctx, + struct srp_cmd *srp_cmd, enum dma_data_direction *dir, + struct scatterlist **sg, unsigned int *sg_cnt, u64 *data_len, + u16 imm_data_offset) +{ + BUG_ON(!dir); + BUG_ON(!data_len); + + /* + * The lower four bits of the buffer format field contain the DATA-IN + * buffer descriptor format, and the highest four bits contain the + * DATA-OUT buffer descriptor format. + */ + if (srp_cmd->buf_fmt & 0xf) + /* DATA-IN: transfer data from target to initiator (read). */ + *dir = DMA_FROM_DEVICE; + else if (srp_cmd->buf_fmt >> 4) + /* DATA-OUT: transfer data from initiator to target (write). */ + *dir = DMA_TO_DEVICE; + else + *dir = DMA_NONE; + + /* initialize data_direction early as srpt_alloc_rw_ctxs needs it */ + ioctx->cmd.data_direction = *dir; + + if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) || + ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) { + struct srp_direct_buf *db = srpt_get_desc_buf(srp_cmd); + + *data_len = be32_to_cpu(db->len); + return srpt_alloc_rw_ctxs(ioctx, db, 1, sg, sg_cnt); + } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) || + ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) { + struct srp_indirect_buf *idb = srpt_get_desc_buf(srp_cmd); + int nbufs = be32_to_cpu(idb->table_desc.len) / + sizeof(struct srp_direct_buf); + + if (nbufs > + (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) { + pr_err("received unsupported SRP_CMD request type (%u out + %u in != %u / %zu)\n", + srp_cmd->data_out_desc_cnt, + srp_cmd->data_in_desc_cnt, + be32_to_cpu(idb->table_desc.len), + sizeof(struct srp_direct_buf)); + return -EINVAL; + } + + *data_len = be32_to_cpu(idb->len); + return srpt_alloc_rw_ctxs(ioctx, idb->desc_list, nbufs, + sg, sg_cnt); + } else if ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_IMM) { + struct srp_imm_buf *imm_buf = srpt_get_desc_buf(srp_cmd); + void *data = (void *)srp_cmd + imm_data_offset; + uint32_t len = be32_to_cpu(imm_buf->len); + uint32_t req_size = imm_data_offset + len; + + if (req_size > srp_max_req_size) { + pr_err("Immediate data (length %d + %d) exceeds request size %d\n", + imm_data_offset, len, srp_max_req_size); + return -EINVAL; + } + if (recv_ioctx->byte_len < req_size) { + pr_err("Received too few data - %d < %d\n", + recv_ioctx->byte_len, req_size); + return -EIO; + } + /* + * The immediate data buffer descriptor must occur before the + * immediate data itself. + */ + if ((void *)(imm_buf + 1) > (void *)data) { + pr_err("Received invalid write request\n"); + return -EINVAL; + } + *data_len = len; + ioctx->recv_ioctx = recv_ioctx; + if ((uintptr_t)data & 511) { + pr_warn_once("Internal error - the receive buffers are not aligned properly.\n"); + return -EINVAL; + } + sg_init_one(&ioctx->imm_sg, data, len); + *sg = &ioctx->imm_sg; + *sg_cnt = 1; + return 0; + } else { + *data_len = 0; + return 0; + } +} + +/** + * srpt_init_ch_qp - initialize queue pair attributes + * @ch: SRPT RDMA channel. + * @qp: Queue pair pointer. + * + * Initialized the attributes of queue pair 'qp' by allowing local write, + * remote read and remote write. Also transitions 'qp' to state IB_QPS_INIT. + */ +static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp) +{ + struct ib_qp_attr *attr; + int ret; + + WARN_ON_ONCE(ch->using_rdma_cm); + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + return -ENOMEM; + + attr->qp_state = IB_QPS_INIT; + attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; + attr->port_num = ch->sport->port; + + ret = ib_find_cached_pkey(ch->sport->sdev->device, ch->sport->port, + ch->pkey, &attr->pkey_index); + if (ret < 0) + pr_err("Translating pkey %#x failed (%d) - using index 0\n", + ch->pkey, ret); + + ret = ib_modify_qp(qp, attr, + IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT | + IB_QP_PKEY_INDEX); + + kfree(attr); + return ret; +} + +/** + * srpt_ch_qp_rtr - change the state of a channel to 'ready to receive' (RTR) + * @ch: channel of the queue pair. + * @qp: queue pair to change the state of. + * + * Returns zero upon success and a negative value upon failure. + * + * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system. + * If this structure ever becomes larger, it might be necessary to allocate + * it dynamically instead of on the stack. + */ +static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + int attr_mask; + int ret; + + WARN_ON_ONCE(ch->using_rdma_cm); + + qp_attr.qp_state = IB_QPS_RTR; + ret = ib_cm_init_qp_attr(ch->ib_cm.cm_id, &qp_attr, &attr_mask); + if (ret) + goto out; + + qp_attr.max_dest_rd_atomic = 4; + + ret = ib_modify_qp(qp, &qp_attr, attr_mask); + +out: + return ret; +} + +/** + * srpt_ch_qp_rts - change the state of a channel to 'ready to send' (RTS) + * @ch: channel of the queue pair. + * @qp: queue pair to change the state of. + * + * Returns zero upon success and a negative value upon failure. + * + * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system. + * If this structure ever becomes larger, it might be necessary to allocate + * it dynamically instead of on the stack. + */ +static int srpt_ch_qp_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + int attr_mask; + int ret; + + qp_attr.qp_state = IB_QPS_RTS; + ret = ib_cm_init_qp_attr(ch->ib_cm.cm_id, &qp_attr, &attr_mask); + if (ret) + goto out; + + qp_attr.max_rd_atomic = 4; + + ret = ib_modify_qp(qp, &qp_attr, attr_mask); + +out: + return ret; +} + +/** + * srpt_ch_qp_err - set the channel queue pair state to 'error' + * @ch: SRPT RDMA channel. + */ +static int srpt_ch_qp_err(struct srpt_rdma_ch *ch) +{ + struct ib_qp_attr qp_attr; + + qp_attr.qp_state = IB_QPS_ERR; + return ib_modify_qp(ch->qp, &qp_attr, IB_QP_STATE); +} + +/** + * srpt_get_send_ioctx - obtain an I/O context for sending to the initiator + * @ch: SRPT RDMA channel. + */ +static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch) +{ + struct srpt_send_ioctx *ioctx; + int tag, cpu; + + BUG_ON(!ch); + + tag = sbitmap_queue_get(&ch->sess->sess_tag_pool, &cpu); + if (tag < 0) + return NULL; + + ioctx = ch->ioctx_ring[tag]; + BUG_ON(ioctx->ch != ch); + ioctx->state = SRPT_STATE_NEW; + WARN_ON_ONCE(ioctx->recv_ioctx); + ioctx->n_rdma = 0; + ioctx->n_rw_ctx = 0; + ioctx->queue_status_only = false; + /* + * transport_init_se_cmd() does not initialize all fields, so do it + * here. + */ + memset(&ioctx->cmd, 0, sizeof(ioctx->cmd)); + memset(&ioctx->sense_data, 0, sizeof(ioctx->sense_data)); + ioctx->cmd.map_tag = tag; + ioctx->cmd.map_cpu = cpu; + + return ioctx; +} + +/** + * srpt_abort_cmd - abort a SCSI command + * @ioctx: I/O context associated with the SCSI command. + */ +static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) +{ + enum srpt_command_state state; + + BUG_ON(!ioctx); + + /* + * If the command is in a state where the target core is waiting for + * the ib_srpt driver, change the state to the next state. + */ + + state = ioctx->state; + switch (state) { + case SRPT_STATE_NEED_DATA: + ioctx->state = SRPT_STATE_DATA_IN; + break; + case SRPT_STATE_CMD_RSP_SENT: + case SRPT_STATE_MGMT_RSP_SENT: + ioctx->state = SRPT_STATE_DONE; + break; + default: + WARN_ONCE(true, "%s: unexpected I/O context state %d\n", + __func__, state); + break; + } + + pr_debug("Aborting cmd with state %d -> %d and tag %lld\n", state, + ioctx->state, ioctx->cmd.tag); + + switch (state) { + case SRPT_STATE_NEW: + case SRPT_STATE_DATA_IN: + case SRPT_STATE_MGMT: + case SRPT_STATE_DONE: + /* + * Do nothing - defer abort processing until + * srpt_queue_response() is invoked. + */ + break; + case SRPT_STATE_NEED_DATA: + pr_debug("tag %#llx: RDMA read error\n", ioctx->cmd.tag); + transport_generic_request_failure(&ioctx->cmd, + TCM_CHECK_CONDITION_ABORT_CMD); + break; + case SRPT_STATE_CMD_RSP_SENT: + /* + * SRP_RSP sending failed or the SRP_RSP send completion has + * not been received in time. + */ + transport_generic_free_cmd(&ioctx->cmd, 0); + break; + case SRPT_STATE_MGMT_RSP_SENT: + transport_generic_free_cmd(&ioctx->cmd, 0); + break; + default: + WARN(1, "Unexpected command state (%d)", state); + break; + } + + return state; +} + +/** + * srpt_rdma_read_done - RDMA read completion callback + * @cq: Completion queue. + * @wc: Work completion. + * + * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping + * the data that has been transferred via IB RDMA had to be postponed until the + * check_stop_free() callback. None of this is necessary anymore and needs to + * be cleaned up. + */ +static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct srpt_rdma_ch *ch = wc->qp->qp_context; + struct srpt_send_ioctx *ioctx = + container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe); + + WARN_ON(ioctx->n_rdma <= 0); + atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); + ioctx->n_rdma = 0; + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n", + ioctx, wc->status); + srpt_abort_cmd(ioctx); + return; + } + + if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA, + SRPT_STATE_DATA_IN)) + target_execute_cmd(&ioctx->cmd); + else + pr_err("%s[%d]: wrong state = %d\n", __func__, + __LINE__, ioctx->state); +} + +/** + * srpt_build_cmd_rsp - build a SRP_RSP response + * @ch: RDMA channel through which the request has been received. + * @ioctx: I/O context associated with the SRP_CMD request. The response will + * be built in the buffer ioctx->buf points at and hence this function will + * overwrite the request data. + * @tag: tag of the request for which this response is being generated. + * @status: value for the STATUS field of the SRP_RSP information unit. + * + * Returns the size in bytes of the SRP_RSP response. + * + * An SRP_RSP response contains a SCSI status or service response. See also + * section 6.9 in the SRP r16a document for the format of an SRP_RSP + * response. See also SPC-2 for more information about sense data. + */ +static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx, u64 tag, + int status) +{ + struct se_cmd *cmd = &ioctx->cmd; + struct srp_rsp *srp_rsp; + const u8 *sense_data; + int sense_data_len, max_sense_len; + u32 resid = cmd->residual_count; + + /* + * The lowest bit of all SAM-3 status codes is zero (see also + * paragraph 5.3 in SAM-3). + */ + WARN_ON(status & 1); + + srp_rsp = ioctx->ioctx.buf; + BUG_ON(!srp_rsp); + + sense_data = ioctx->sense_data; + sense_data_len = ioctx->cmd.scsi_sense_length; + WARN_ON(sense_data_len > sizeof(ioctx->sense_data)); + + memset(srp_rsp, 0, sizeof(*srp_rsp)); + srp_rsp->opcode = SRP_RSP; + srp_rsp->req_lim_delta = + cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); + srp_rsp->tag = tag; + srp_rsp->status = status; + + if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { + if (cmd->data_direction == DMA_TO_DEVICE) { + /* residual data from an underflow write */ + srp_rsp->flags = SRP_RSP_FLAG_DOUNDER; + srp_rsp->data_out_res_cnt = cpu_to_be32(resid); + } else if (cmd->data_direction == DMA_FROM_DEVICE) { + /* residual data from an underflow read */ + srp_rsp->flags = SRP_RSP_FLAG_DIUNDER; + srp_rsp->data_in_res_cnt = cpu_to_be32(resid); + } + } else if (cmd->se_cmd_flags & SCF_OVERFLOW_BIT) { + if (cmd->data_direction == DMA_TO_DEVICE) { + /* residual data from an overflow write */ + srp_rsp->flags = SRP_RSP_FLAG_DOOVER; + srp_rsp->data_out_res_cnt = cpu_to_be32(resid); + } else if (cmd->data_direction == DMA_FROM_DEVICE) { + /* residual data from an overflow read */ + srp_rsp->flags = SRP_RSP_FLAG_DIOVER; + srp_rsp->data_in_res_cnt = cpu_to_be32(resid); + } + } + + if (sense_data_len) { + BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp)); + max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp); + if (sense_data_len > max_sense_len) { + pr_warn("truncated sense data from %d to %d bytes\n", + sense_data_len, max_sense_len); + sense_data_len = max_sense_len; + } + + srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID; + srp_rsp->sense_data_len = cpu_to_be32(sense_data_len); + memcpy(srp_rsp->data, sense_data, sense_data_len); + } + + return sizeof(*srp_rsp) + sense_data_len; +} + +/** + * srpt_build_tskmgmt_rsp - build a task management response + * @ch: RDMA channel through which the request has been received. + * @ioctx: I/O context in which the SRP_RSP response will be built. + * @rsp_code: RSP_CODE that will be stored in the response. + * @tag: Tag of the request for which this response is being generated. + * + * Returns the size in bytes of the SRP_RSP response. + * + * An SRP_RSP response contains a SCSI status or service response. See also + * section 6.9 in the SRP r16a document for the format of an SRP_RSP + * response. + */ +static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx, + u8 rsp_code, u64 tag) +{ + struct srp_rsp *srp_rsp; + int resp_data_len; + int resp_len; + + resp_data_len = 4; + resp_len = sizeof(*srp_rsp) + resp_data_len; + + srp_rsp = ioctx->ioctx.buf; + BUG_ON(!srp_rsp); + memset(srp_rsp, 0, sizeof(*srp_rsp)); + + srp_rsp->opcode = SRP_RSP; + srp_rsp->req_lim_delta = + cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); + srp_rsp->tag = tag; + + srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID; + srp_rsp->resp_data_len = cpu_to_be32(resp_data_len); + srp_rsp->data[3] = rsp_code; + + return resp_len; +} + +static int srpt_check_stop_free(struct se_cmd *cmd) +{ + struct srpt_send_ioctx *ioctx = container_of(cmd, + struct srpt_send_ioctx, cmd); + + return target_put_sess_cmd(&ioctx->cmd); +} + +/** + * srpt_handle_cmd - process a SRP_CMD information unit + * @ch: SRPT RDMA channel. + * @recv_ioctx: Receive I/O context. + * @send_ioctx: Send I/O context. + */ +static void srpt_handle_cmd(struct srpt_rdma_ch *ch, + struct srpt_recv_ioctx *recv_ioctx, + struct srpt_send_ioctx *send_ioctx) +{ + struct se_cmd *cmd; + struct srp_cmd *srp_cmd; + struct scatterlist *sg = NULL; + unsigned sg_cnt = 0; + u64 data_len; + enum dma_data_direction dir; + int rc; + + BUG_ON(!send_ioctx); + + srp_cmd = recv_ioctx->ioctx.buf + recv_ioctx->ioctx.offset; + cmd = &send_ioctx->cmd; + cmd->tag = srp_cmd->tag; + + switch (srp_cmd->task_attr) { + case SRP_CMD_SIMPLE_Q: + cmd->sam_task_attr = TCM_SIMPLE_TAG; + break; + case SRP_CMD_ORDERED_Q: + default: + cmd->sam_task_attr = TCM_ORDERED_TAG; + break; + case SRP_CMD_HEAD_OF_Q: + cmd->sam_task_attr = TCM_HEAD_TAG; + break; + case SRP_CMD_ACA: + cmd->sam_task_attr = TCM_ACA_TAG; + break; + } + + rc = srpt_get_desc_tbl(recv_ioctx, send_ioctx, srp_cmd, &dir, + &sg, &sg_cnt, &data_len, ch->imm_data_offset); + if (rc) { + if (rc != -EAGAIN) { + pr_err("0x%llx: parsing SRP descriptor table failed.\n", + srp_cmd->tag); + } + goto busy; + } + + rc = target_init_cmd(cmd, ch->sess, &send_ioctx->sense_data[0], + scsilun_to_int(&srp_cmd->lun), data_len, + TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF); + if (rc != 0) { + pr_debug("target_submit_cmd() returned %d for tag %#llx\n", rc, + srp_cmd->tag); + goto busy; + } + + if (target_submit_prep(cmd, srp_cmd->cdb, sg, sg_cnt, NULL, 0, NULL, 0, + GFP_KERNEL)) + return; + + target_submit(cmd); + return; + +busy: + target_send_busy(cmd); +} + +static int srp_tmr_to_tcm(int fn) +{ + switch (fn) { + case SRP_TSK_ABORT_TASK: + return TMR_ABORT_TASK; + case SRP_TSK_ABORT_TASK_SET: + return TMR_ABORT_TASK_SET; + case SRP_TSK_CLEAR_TASK_SET: + return TMR_CLEAR_TASK_SET; + case SRP_TSK_LUN_RESET: + return TMR_LUN_RESET; + case SRP_TSK_CLEAR_ACA: + return TMR_CLEAR_ACA; + default: + return -1; + } +} + +/** + * srpt_handle_tsk_mgmt - process a SRP_TSK_MGMT information unit + * @ch: SRPT RDMA channel. + * @recv_ioctx: Receive I/O context. + * @send_ioctx: Send I/O context. + * + * Returns 0 if and only if the request will be processed by the target core. + * + * For more information about SRP_TSK_MGMT information units, see also section + * 6.7 in the SRP r16a document. + */ +static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, + struct srpt_recv_ioctx *recv_ioctx, + struct srpt_send_ioctx *send_ioctx) +{ + struct srp_tsk_mgmt *srp_tsk; + struct se_cmd *cmd; + struct se_session *sess = ch->sess; + int tcm_tmr; + int rc; + + BUG_ON(!send_ioctx); + + srp_tsk = recv_ioctx->ioctx.buf + recv_ioctx->ioctx.offset; + cmd = &send_ioctx->cmd; + + pr_debug("recv tsk_mgmt fn %d for task_tag %lld and cmd tag %lld ch %p sess %p\n", + srp_tsk->tsk_mgmt_func, srp_tsk->task_tag, srp_tsk->tag, ch, + ch->sess); + + srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT); + send_ioctx->cmd.tag = srp_tsk->tag; + tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func); + rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL, + scsilun_to_int(&srp_tsk->lun), srp_tsk, tcm_tmr, + GFP_KERNEL, srp_tsk->task_tag, + TARGET_SCF_ACK_KREF); + if (rc != 0) { + send_ioctx->cmd.se_tmr_req->response = TMR_FUNCTION_REJECTED; + cmd->se_tfo->queue_tm_rsp(cmd); + } + return; +} + +/** + * srpt_handle_new_iu - process a newly received information unit + * @ch: RDMA channel through which the information unit has been received. + * @recv_ioctx: Receive I/O context associated with the information unit. + */ +static bool +srpt_handle_new_iu(struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *recv_ioctx) +{ + struct srpt_send_ioctx *send_ioctx = NULL; + struct srp_cmd *srp_cmd; + bool res = false; + u8 opcode; + + BUG_ON(!ch); + BUG_ON(!recv_ioctx); + + if (unlikely(ch->state == CH_CONNECTING)) + goto push; + + ib_dma_sync_single_for_cpu(ch->sport->sdev->device, + recv_ioctx->ioctx.dma, + recv_ioctx->ioctx.offset + srp_max_req_size, + DMA_FROM_DEVICE); + + srp_cmd = recv_ioctx->ioctx.buf + recv_ioctx->ioctx.offset; + opcode = srp_cmd->opcode; + if (opcode == SRP_CMD || opcode == SRP_TSK_MGMT) { + send_ioctx = srpt_get_send_ioctx(ch); + if (unlikely(!send_ioctx)) + goto push; + } + + if (!list_empty(&recv_ioctx->wait_list)) { + WARN_ON_ONCE(!ch->processing_wait_list); + list_del_init(&recv_ioctx->wait_list); + } + + switch (opcode) { + case SRP_CMD: + srpt_handle_cmd(ch, recv_ioctx, send_ioctx); + break; + case SRP_TSK_MGMT: + srpt_handle_tsk_mgmt(ch, recv_ioctx, send_ioctx); + break; + case SRP_I_LOGOUT: + pr_err("Not yet implemented: SRP_I_LOGOUT\n"); + break; + case SRP_CRED_RSP: + pr_debug("received SRP_CRED_RSP\n"); + break; + case SRP_AER_RSP: + pr_debug("received SRP_AER_RSP\n"); + break; + case SRP_RSP: + pr_err("Received SRP_RSP\n"); + break; + default: + pr_err("received IU with unknown opcode 0x%x\n", opcode); + break; + } + + if (!send_ioctx || !send_ioctx->recv_ioctx) + srpt_post_recv(ch->sport->sdev, ch, recv_ioctx); + res = true; + +out: + return res; + +push: + if (list_empty(&recv_ioctx->wait_list)) { + WARN_ON_ONCE(ch->processing_wait_list); + list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list); + } + goto out; +} + +static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct srpt_rdma_ch *ch = wc->qp->qp_context; + struct srpt_recv_ioctx *ioctx = + container_of(wc->wr_cqe, struct srpt_recv_ioctx, ioctx.cqe); + + if (wc->status == IB_WC_SUCCESS) { + int req_lim; + + req_lim = atomic_dec_return(&ch->req_lim); + if (unlikely(req_lim < 0)) + pr_err("req_lim = %d < 0\n", req_lim); + ioctx->byte_len = wc->byte_len; + srpt_handle_new_iu(ch, ioctx); + } else { + pr_info_ratelimited("receiving failed for ioctx %p with status %d\n", + ioctx, wc->status); + } +} + +/* + * This function must be called from the context in which RDMA completions are + * processed because it accesses the wait list without protection against + * access from other threads. + */ +static void srpt_process_wait_list(struct srpt_rdma_ch *ch) +{ + struct srpt_recv_ioctx *recv_ioctx, *tmp; + + WARN_ON_ONCE(ch->state == CH_CONNECTING); + + if (list_empty(&ch->cmd_wait_list)) + return; + + WARN_ON_ONCE(ch->processing_wait_list); + ch->processing_wait_list = true; + list_for_each_entry_safe(recv_ioctx, tmp, &ch->cmd_wait_list, + wait_list) { + if (!srpt_handle_new_iu(ch, recv_ioctx)) + break; + } + ch->processing_wait_list = false; +} + +/** + * srpt_send_done - send completion callback + * @cq: Completion queue. + * @wc: Work completion. + * + * Note: Although this has not yet been observed during tests, at least in + * theory it is possible that the srpt_get_send_ioctx() call invoked by + * srpt_handle_new_iu() fails. This is possible because the req_lim_delta + * value in each response is set to one, and it is possible that this response + * makes the initiator send a new request before the send completion for that + * response has been processed. This could e.g. happen if the call to + * srpt_put_send_iotcx() is delayed because of a higher priority interrupt or + * if IB retransmission causes generation of the send completion to be + * delayed. Incoming information units for which srpt_get_send_ioctx() fails + * are queued on cmd_wait_list. The code below processes these delayed + * requests one at a time. + */ +static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct srpt_rdma_ch *ch = wc->qp->qp_context; + struct srpt_send_ioctx *ioctx = + container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe); + enum srpt_command_state state; + + state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); + + WARN_ON(state != SRPT_STATE_CMD_RSP_SENT && + state != SRPT_STATE_MGMT_RSP_SENT); + + atomic_add(1 + ioctx->n_rdma, &ch->sq_wr_avail); + + if (wc->status != IB_WC_SUCCESS) + pr_info("sending response for ioctx 0x%p failed with status %d\n", + ioctx, wc->status); + + if (state != SRPT_STATE_DONE) { + transport_generic_free_cmd(&ioctx->cmd, 0); + } else { + pr_err("IB completion has been received too late for wr_id = %u.\n", + ioctx->ioctx.index); + } + + srpt_process_wait_list(ch); +} + +/** + * srpt_create_ch_ib - create receive and send completion queues + * @ch: SRPT RDMA channel. + */ +static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) +{ + struct ib_qp_init_attr *qp_init; + struct srpt_port *sport = ch->sport; + struct srpt_device *sdev = sport->sdev; + const struct ib_device_attr *attrs = &sdev->device->attrs; + int sq_size = sport->port_attrib.srp_sq_size; + int i, ret; + + WARN_ON(ch->rq_size < 1); + + ret = -ENOMEM; + qp_init = kzalloc(sizeof(*qp_init), GFP_KERNEL); + if (!qp_init) + goto out; + +retry: + ch->cq = ib_cq_pool_get(sdev->device, ch->rq_size + sq_size, -1, + IB_POLL_WORKQUEUE); + if (IS_ERR(ch->cq)) { + ret = PTR_ERR(ch->cq); + pr_err("failed to create CQ cqe= %d ret= %d\n", + ch->rq_size + sq_size, ret); + goto out; + } + ch->cq_size = ch->rq_size + sq_size; + + qp_init->qp_context = (void *)ch; + qp_init->event_handler + = (void(*)(struct ib_event *, void*))srpt_qp_event; + qp_init->send_cq = ch->cq; + qp_init->recv_cq = ch->cq; + qp_init->sq_sig_type = IB_SIGNAL_REQ_WR; + qp_init->qp_type = IB_QPT_RC; + /* + * We divide up our send queue size into half SEND WRs to send the + * completions, and half R/W contexts to actually do the RDMA + * READ/WRITE transfers. Note that we need to allocate CQ slots for + * both both, as RDMA contexts will also post completions for the + * RDMA READ case. + */ + qp_init->cap.max_send_wr = min(sq_size / 2, attrs->max_qp_wr); + qp_init->cap.max_rdma_ctxs = sq_size / 2; + qp_init->cap.max_send_sge = attrs->max_send_sge; + qp_init->cap.max_recv_sge = 1; + qp_init->port_num = ch->sport->port; + if (sdev->use_srq) + qp_init->srq = sdev->srq; + else + qp_init->cap.max_recv_wr = ch->rq_size; + + if (ch->using_rdma_cm) { + ret = rdma_create_qp(ch->rdma_cm.cm_id, sdev->pd, qp_init); + ch->qp = ch->rdma_cm.cm_id->qp; + } else { + ch->qp = ib_create_qp(sdev->pd, qp_init); + if (!IS_ERR(ch->qp)) { + ret = srpt_init_ch_qp(ch, ch->qp); + if (ret) + ib_destroy_qp(ch->qp); + } else { + ret = PTR_ERR(ch->qp); + } + } + if (ret) { + bool retry = sq_size > MIN_SRPT_SQ_SIZE; + + if (retry) { + pr_debug("failed to create queue pair with sq_size = %d (%d) - retrying\n", + sq_size, ret); + ib_cq_pool_put(ch->cq, ch->cq_size); + sq_size = max(sq_size / 2, MIN_SRPT_SQ_SIZE); + goto retry; + } else { + pr_err("failed to create queue pair with sq_size = %d (%d)\n", + sq_size, ret); + goto err_destroy_cq; + } + } + + atomic_set(&ch->sq_wr_avail, qp_init->cap.max_send_wr); + + pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d ch= %p\n", + __func__, ch->cq->cqe, qp_init->cap.max_send_sge, + qp_init->cap.max_send_wr, ch); + + if (!sdev->use_srq) + for (i = 0; i < ch->rq_size; i++) + srpt_post_recv(sdev, ch, ch->ioctx_recv_ring[i]); + +out: + kfree(qp_init); + return ret; + +err_destroy_cq: + ch->qp = NULL; + ib_cq_pool_put(ch->cq, ch->cq_size); + goto out; +} + +static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) +{ + ib_destroy_qp(ch->qp); + ib_cq_pool_put(ch->cq, ch->cq_size); +} + +/** + * srpt_close_ch - close a RDMA channel + * @ch: SRPT RDMA channel. + * + * Make sure all resources associated with the channel will be deallocated at + * an appropriate time. + * + * Returns true if and only if the channel state has been modified into + * CH_DRAINING. + */ +static bool srpt_close_ch(struct srpt_rdma_ch *ch) +{ + int ret; + + if (!srpt_set_ch_state(ch, CH_DRAINING)) { + pr_debug("%s: already closed\n", ch->sess_name); + return false; + } + + kref_get(&ch->kref); + + ret = srpt_ch_qp_err(ch); + if (ret < 0) + pr_err("%s-%d: changing queue pair into error state failed: %d\n", + ch->sess_name, ch->qp->qp_num, ret); + + ret = srpt_zerolength_write(ch); + if (ret < 0) { + pr_err("%s-%d: queuing zero-length write failed: %d\n", + ch->sess_name, ch->qp->qp_num, ret); + if (srpt_set_ch_state(ch, CH_DISCONNECTED)) + schedule_work(&ch->release_work); + else + WARN_ON_ONCE(true); + } + + kref_put(&ch->kref, srpt_free_ch); + + return true; +} + +/* + * Change the channel state into CH_DISCONNECTING. If a channel has not yet + * reached the connected state, close it. If a channel is in the connected + * state, send a DREQ. If a DREQ has been received, send a DREP. Note: it is + * the responsibility of the caller to ensure that this function is not + * invoked concurrently with the code that accepts a connection. This means + * that this function must either be invoked from inside a CM callback + * function or that it must be invoked with the srpt_port.mutex held. + */ +static int srpt_disconnect_ch(struct srpt_rdma_ch *ch) +{ + int ret; + + if (!srpt_set_ch_state(ch, CH_DISCONNECTING)) + return -ENOTCONN; + + if (ch->using_rdma_cm) { + ret = rdma_disconnect(ch->rdma_cm.cm_id); + } else { + ret = ib_send_cm_dreq(ch->ib_cm.cm_id, NULL, 0); + if (ret < 0) + ret = ib_send_cm_drep(ch->ib_cm.cm_id, NULL, 0); + } + + if (ret < 0 && srpt_close_ch(ch)) + ret = 0; + + return ret; +} + +/* Send DREQ and wait for DREP. */ +static void srpt_disconnect_ch_sync(struct srpt_rdma_ch *ch) +{ + DECLARE_COMPLETION_ONSTACK(closed); + struct srpt_port *sport = ch->sport; + + pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num, + ch->state); + + ch->closed = &closed; + + mutex_lock(&sport->mutex); + srpt_disconnect_ch(ch); + mutex_unlock(&sport->mutex); + + while (wait_for_completion_timeout(&closed, 5 * HZ) == 0) + pr_info("%s(%s-%d state %d): still waiting ...\n", __func__, + ch->sess_name, ch->qp->qp_num, ch->state); + +} + +static void __srpt_close_all_ch(struct srpt_port *sport) +{ + struct srpt_nexus *nexus; + struct srpt_rdma_ch *ch; + + lockdep_assert_held(&sport->mutex); + + list_for_each_entry(nexus, &sport->nexus_list, entry) { + list_for_each_entry(ch, &nexus->ch_list, list) { + if (srpt_disconnect_ch(ch) >= 0) + pr_info("Closing channel %s-%d because target %s_%d has been disabled\n", + ch->sess_name, ch->qp->qp_num, + dev_name(&sport->sdev->device->dev), + sport->port); + srpt_close_ch(ch); + } + } +} + +/* + * Look up (i_port_id, t_port_id) in sport->nexus_list. Create an entry if + * it does not yet exist. + */ +static struct srpt_nexus *srpt_get_nexus(struct srpt_port *sport, + const u8 i_port_id[16], + const u8 t_port_id[16]) +{ + struct srpt_nexus *nexus = NULL, *tmp_nexus = NULL, *n; + + for (;;) { + mutex_lock(&sport->mutex); + list_for_each_entry(n, &sport->nexus_list, entry) { + if (memcmp(n->i_port_id, i_port_id, 16) == 0 && + memcmp(n->t_port_id, t_port_id, 16) == 0) { + nexus = n; + break; + } + } + if (!nexus && tmp_nexus) { + list_add_tail_rcu(&tmp_nexus->entry, + &sport->nexus_list); + swap(nexus, tmp_nexus); + } + mutex_unlock(&sport->mutex); + + if (nexus) + break; + tmp_nexus = kzalloc(sizeof(*nexus), GFP_KERNEL); + if (!tmp_nexus) { + nexus = ERR_PTR(-ENOMEM); + break; + } + INIT_LIST_HEAD(&tmp_nexus->ch_list); + memcpy(tmp_nexus->i_port_id, i_port_id, 16); + memcpy(tmp_nexus->t_port_id, t_port_id, 16); + } + + kfree(tmp_nexus); + + return nexus; +} + +static void srpt_set_enabled(struct srpt_port *sport, bool enabled) + __must_hold(&sport->mutex) +{ + lockdep_assert_held(&sport->mutex); + + if (sport->enabled == enabled) + return; + sport->enabled = enabled; + if (!enabled) + __srpt_close_all_ch(sport); +} + +static void srpt_drop_sport_ref(struct srpt_port *sport) +{ + if (atomic_dec_return(&sport->refcount) == 0 && sport->freed_channels) + complete(sport->freed_channels); +} + +static void srpt_free_ch(struct kref *kref) +{ + struct srpt_rdma_ch *ch = container_of(kref, struct srpt_rdma_ch, kref); + + srpt_drop_sport_ref(ch->sport); + kfree_rcu(ch, rcu); +} + +/* + * Shut down the SCSI target session, tell the connection manager to + * disconnect the associated RDMA channel, transition the QP to the error + * state and remove the channel from the channel list. This function is + * typically called from inside srpt_zerolength_write_done(). Concurrent + * srpt_zerolength_write() calls from inside srpt_close_ch() are possible + * as long as the channel is on sport->nexus_list. + */ +static void srpt_release_channel_work(struct work_struct *w) +{ + struct srpt_rdma_ch *ch; + struct srpt_device *sdev; + struct srpt_port *sport; + struct se_session *se_sess; + + ch = container_of(w, struct srpt_rdma_ch, release_work); + pr_debug("%s-%d\n", ch->sess_name, ch->qp->qp_num); + + sdev = ch->sport->sdev; + BUG_ON(!sdev); + + se_sess = ch->sess; + BUG_ON(!se_sess); + + target_stop_session(se_sess); + target_wait_for_sess_cmds(se_sess); + + target_remove_session(se_sess); + ch->sess = NULL; + + if (ch->using_rdma_cm) + rdma_destroy_id(ch->rdma_cm.cm_id); + else + ib_destroy_cm_id(ch->ib_cm.cm_id); + + sport = ch->sport; + mutex_lock(&sport->mutex); + list_del_rcu(&ch->list); + mutex_unlock(&sport->mutex); + + if (ch->closed) + complete(ch->closed); + + srpt_destroy_ch_ib(ch); + + srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, + ch->sport->sdev, ch->rq_size, + ch->rsp_buf_cache, DMA_TO_DEVICE); + + kmem_cache_destroy(ch->rsp_buf_cache); + + srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring, + sdev, ch->rq_size, + ch->req_buf_cache, DMA_FROM_DEVICE); + + kmem_cache_destroy(ch->req_buf_cache); + + kref_put(&ch->kref, srpt_free_ch); +} + +/** + * srpt_cm_req_recv - process the event IB_CM_REQ_RECEIVED + * @sdev: HCA through which the login request was received. + * @ib_cm_id: IB/CM connection identifier in case of IB/CM. + * @rdma_cm_id: RDMA/CM connection identifier in case of RDMA/CM. + * @port_num: Port through which the REQ message was received. + * @pkey: P_Key of the incoming connection. + * @req: SRP login request. + * @src_addr: GID (IB/CM) or IP address (RDMA/CM) of the port that submitted + * the login request. + * + * Ownership of the cm_id is transferred to the target session if this + * function returns zero. Otherwise the caller remains the owner of cm_id. + */ +static int srpt_cm_req_recv(struct srpt_device *const sdev, + struct ib_cm_id *ib_cm_id, + struct rdma_cm_id *rdma_cm_id, + u8 port_num, __be16 pkey, + const struct srp_login_req *req, + const char *src_addr) +{ + struct srpt_port *sport = &sdev->port[port_num - 1]; + struct srpt_nexus *nexus; + struct srp_login_rsp *rsp = NULL; + struct srp_login_rej *rej = NULL; + union { + struct rdma_conn_param rdma_cm; + struct ib_cm_rep_param ib_cm; + } *rep_param = NULL; + struct srpt_rdma_ch *ch = NULL; + char i_port_id[36]; + u32 it_iu_len; + int i, tag_num, tag_size, ret; + struct srpt_tpg *stpg; + + WARN_ON_ONCE(irqs_disabled()); + + it_iu_len = be32_to_cpu(req->req_it_iu_len); + + pr_info("Received SRP_LOGIN_REQ with i_port_id %pI6, t_port_id %pI6 and it_iu_len %d on port %d (guid=%pI6); pkey %#04x\n", + req->initiator_port_id, req->target_port_id, it_iu_len, + port_num, &sport->gid, be16_to_cpu(pkey)); + + nexus = srpt_get_nexus(sport, req->initiator_port_id, + req->target_port_id); + if (IS_ERR(nexus)) { + ret = PTR_ERR(nexus); + goto out; + } + + ret = -ENOMEM; + rsp = kzalloc(sizeof(*rsp), GFP_KERNEL); + rej = kzalloc(sizeof(*rej), GFP_KERNEL); + rep_param = kzalloc(sizeof(*rep_param), GFP_KERNEL); + if (!rsp || !rej || !rep_param) + goto out; + + ret = -EINVAL; + if (it_iu_len > srp_max_req_size || it_iu_len < 64) { + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE); + pr_err("rejected SRP_LOGIN_REQ because its length (%d bytes) is out of range (%d .. %d)\n", + it_iu_len, 64, srp_max_req_size); + goto reject; + } + + if (!sport->enabled) { + rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + pr_info("rejected SRP_LOGIN_REQ because target port %s_%d has not yet been enabled\n", + dev_name(&sport->sdev->device->dev), port_num); + goto reject; + } + + if (*(__be64 *)req->target_port_id != cpu_to_be64(srpt_service_guid) + || *(__be64 *)(req->target_port_id + 8) != + cpu_to_be64(srpt_service_guid)) { + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL); + pr_err("rejected SRP_LOGIN_REQ because it has an invalid target port identifier.\n"); + goto reject; + } + + ret = -ENOMEM; + ch = kzalloc(sizeof(*ch), GFP_KERNEL); + if (!ch) { + rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + pr_err("rejected SRP_LOGIN_REQ because out of memory.\n"); + goto reject; + } + + kref_init(&ch->kref); + ch->pkey = be16_to_cpu(pkey); + ch->nexus = nexus; + ch->zw_cqe.done = srpt_zerolength_write_done; + INIT_WORK(&ch->release_work, srpt_release_channel_work); + ch->sport = sport; + if (rdma_cm_id) { + ch->using_rdma_cm = true; + ch->rdma_cm.cm_id = rdma_cm_id; + rdma_cm_id->context = ch; + } else { + ch->ib_cm.cm_id = ib_cm_id; + ib_cm_id->context = ch; + } + /* + * ch->rq_size should be at least as large as the initiator queue + * depth to avoid that the initiator driver has to report QUEUE_FULL + * to the SCSI mid-layer. + */ + ch->rq_size = min(MAX_SRPT_RQ_SIZE, sdev->device->attrs.max_qp_wr); + spin_lock_init(&ch->spinlock); + ch->state = CH_CONNECTING; + INIT_LIST_HEAD(&ch->cmd_wait_list); + ch->max_rsp_size = ch->sport->port_attrib.srp_max_rsp_size; + + ch->rsp_buf_cache = kmem_cache_create("srpt-rsp-buf", ch->max_rsp_size, + 512, 0, NULL); + if (!ch->rsp_buf_cache) + goto free_ch; + + ch->ioctx_ring = (struct srpt_send_ioctx **) + srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size, + sizeof(*ch->ioctx_ring[0]), + ch->rsp_buf_cache, 0, DMA_TO_DEVICE); + if (!ch->ioctx_ring) { + pr_err("rejected SRP_LOGIN_REQ because creating a new QP SQ ring failed.\n"); + rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + goto free_rsp_cache; + } + + for (i = 0; i < ch->rq_size; i++) + ch->ioctx_ring[i]->ch = ch; + if (!sdev->use_srq) { + u16 imm_data_offset = req->req_flags & SRP_IMMED_REQUESTED ? + be16_to_cpu(req->imm_data_offset) : 0; + u16 alignment_offset; + u32 req_sz; + + if (req->req_flags & SRP_IMMED_REQUESTED) + pr_debug("imm_data_offset = %d\n", + be16_to_cpu(req->imm_data_offset)); + if (imm_data_offset >= sizeof(struct srp_cmd)) { + ch->imm_data_offset = imm_data_offset; + rsp->rsp_flags |= SRP_LOGIN_RSP_IMMED_SUPP; + } else { + ch->imm_data_offset = 0; + } + alignment_offset = round_up(imm_data_offset, 512) - + imm_data_offset; + req_sz = alignment_offset + imm_data_offset + srp_max_req_size; + ch->req_buf_cache = kmem_cache_create("srpt-req-buf", req_sz, + 512, 0, NULL); + if (!ch->req_buf_cache) + goto free_rsp_ring; + + ch->ioctx_recv_ring = (struct srpt_recv_ioctx **) + srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size, + sizeof(*ch->ioctx_recv_ring[0]), + ch->req_buf_cache, + alignment_offset, + DMA_FROM_DEVICE); + if (!ch->ioctx_recv_ring) { + pr_err("rejected SRP_LOGIN_REQ because creating a new QP RQ ring failed.\n"); + rej->reason = + cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + goto free_recv_cache; + } + for (i = 0; i < ch->rq_size; i++) + INIT_LIST_HEAD(&ch->ioctx_recv_ring[i]->wait_list); + } + + ret = srpt_create_ch_ib(ch); + if (ret) { + rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + pr_err("rejected SRP_LOGIN_REQ because creating a new RDMA channel failed.\n"); + goto free_recv_ring; + } + + strscpy(ch->sess_name, src_addr, sizeof(ch->sess_name)); + snprintf(i_port_id, sizeof(i_port_id), "0x%016llx%016llx", + be64_to_cpu(*(__be64 *)nexus->i_port_id), + be64_to_cpu(*(__be64 *)(nexus->i_port_id + 8))); + + pr_debug("registering src addr %s or i_port_id %s\n", ch->sess_name, + i_port_id); + + tag_num = ch->rq_size; + tag_size = 1; /* ib_srpt does not use se_sess->sess_cmd_map */ + + if (sport->guid_id) { + mutex_lock(&sport->guid_id->mutex); + list_for_each_entry(stpg, &sport->guid_id->tpg_list, entry) { + if (!IS_ERR_OR_NULL(ch->sess)) + break; + ch->sess = target_setup_session(&stpg->tpg, tag_num, + tag_size, TARGET_PROT_NORMAL, + ch->sess_name, ch, NULL); + } + mutex_unlock(&sport->guid_id->mutex); + } + + if (sport->gid_id) { + mutex_lock(&sport->gid_id->mutex); + list_for_each_entry(stpg, &sport->gid_id->tpg_list, entry) { + if (!IS_ERR_OR_NULL(ch->sess)) + break; + ch->sess = target_setup_session(&stpg->tpg, tag_num, + tag_size, TARGET_PROT_NORMAL, i_port_id, + ch, NULL); + if (!IS_ERR_OR_NULL(ch->sess)) + break; + /* Retry without leading "0x" */ + ch->sess = target_setup_session(&stpg->tpg, tag_num, + tag_size, TARGET_PROT_NORMAL, + i_port_id + 2, ch, NULL); + } + mutex_unlock(&sport->gid_id->mutex); + } + + if (IS_ERR_OR_NULL(ch->sess)) { + WARN_ON_ONCE(ch->sess == NULL); + ret = PTR_ERR(ch->sess); + ch->sess = NULL; + pr_info("Rejected login for initiator %s: ret = %d.\n", + ch->sess_name, ret); + rej->reason = cpu_to_be32(ret == -ENOMEM ? + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES : + SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); + goto destroy_ib; + } + + /* + * Once a session has been created destruction of srpt_rdma_ch objects + * will decrement sport->refcount. Hence increment sport->refcount now. + */ + atomic_inc(&sport->refcount); + + mutex_lock(&sport->mutex); + + if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) { + struct srpt_rdma_ch *ch2; + + list_for_each_entry(ch2, &nexus->ch_list, list) { + if (srpt_disconnect_ch(ch2) < 0) + continue; + pr_info("Relogin - closed existing channel %s\n", + ch2->sess_name); + rsp->rsp_flags |= SRP_LOGIN_RSP_MULTICHAN_TERMINATED; + } + } else { + rsp->rsp_flags |= SRP_LOGIN_RSP_MULTICHAN_MAINTAINED; + } + + list_add_tail_rcu(&ch->list, &nexus->ch_list); + + if (!sport->enabled) { + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + pr_info("rejected SRP_LOGIN_REQ because target %s_%d is not enabled\n", + dev_name(&sdev->device->dev), port_num); + mutex_unlock(&sport->mutex); + ret = -EINVAL; + goto reject; + } + + mutex_unlock(&sport->mutex); + + ret = ch->using_rdma_cm ? 0 : srpt_ch_qp_rtr(ch, ch->qp); + if (ret) { + rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + pr_err("rejected SRP_LOGIN_REQ because enabling RTR failed (error code = %d)\n", + ret); + goto reject; + } + + pr_debug("Establish connection sess=%p name=%s ch=%p\n", ch->sess, + ch->sess_name, ch); + + /* create srp_login_response */ + rsp->opcode = SRP_LOGIN_RSP; + rsp->tag = req->tag; + rsp->max_it_iu_len = cpu_to_be32(srp_max_req_size); + rsp->max_ti_iu_len = req->req_it_iu_len; + ch->max_ti_iu_len = it_iu_len; + rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | + SRP_BUF_FORMAT_INDIRECT); + rsp->req_lim_delta = cpu_to_be32(ch->rq_size); + atomic_set(&ch->req_lim, ch->rq_size); + atomic_set(&ch->req_lim_delta, 0); + + /* create cm reply */ + if (ch->using_rdma_cm) { + rep_param->rdma_cm.private_data = (void *)rsp; + rep_param->rdma_cm.private_data_len = sizeof(*rsp); + rep_param->rdma_cm.rnr_retry_count = 7; + rep_param->rdma_cm.flow_control = 1; + rep_param->rdma_cm.responder_resources = 4; + rep_param->rdma_cm.initiator_depth = 4; + } else { + rep_param->ib_cm.qp_num = ch->qp->qp_num; + rep_param->ib_cm.private_data = (void *)rsp; + rep_param->ib_cm.private_data_len = sizeof(*rsp); + rep_param->ib_cm.rnr_retry_count = 7; + rep_param->ib_cm.flow_control = 1; + rep_param->ib_cm.failover_accepted = 0; + rep_param->ib_cm.srq = 1; + rep_param->ib_cm.responder_resources = 4; + rep_param->ib_cm.initiator_depth = 4; + } + + /* + * Hold the sport mutex while accepting a connection to avoid that + * srpt_disconnect_ch() is invoked concurrently with this code. + */ + mutex_lock(&sport->mutex); + if (sport->enabled && ch->state == CH_CONNECTING) { + if (ch->using_rdma_cm) + ret = rdma_accept(rdma_cm_id, &rep_param->rdma_cm); + else + ret = ib_send_cm_rep(ib_cm_id, &rep_param->ib_cm); + } else { + ret = -EINVAL; + } + mutex_unlock(&sport->mutex); + + switch (ret) { + case 0: + break; + case -EINVAL: + goto reject; + default: + rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + pr_err("sending SRP_LOGIN_REQ response failed (error code = %d)\n", + ret); + goto reject; + } + + goto out; + +destroy_ib: + srpt_destroy_ch_ib(ch); + +free_recv_ring: + srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring, + ch->sport->sdev, ch->rq_size, + ch->req_buf_cache, DMA_FROM_DEVICE); + +free_recv_cache: + kmem_cache_destroy(ch->req_buf_cache); + +free_rsp_ring: + srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, + ch->sport->sdev, ch->rq_size, + ch->rsp_buf_cache, DMA_TO_DEVICE); + +free_rsp_cache: + kmem_cache_destroy(ch->rsp_buf_cache); + +free_ch: + if (rdma_cm_id) + rdma_cm_id->context = NULL; + else + ib_cm_id->context = NULL; + kfree(ch); + ch = NULL; + + WARN_ON_ONCE(ret == 0); + +reject: + pr_info("Rejecting login with reason %#x\n", be32_to_cpu(rej->reason)); + rej->opcode = SRP_LOGIN_REJ; + rej->tag = req->tag; + rej->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | + SRP_BUF_FORMAT_INDIRECT); + + if (rdma_cm_id) + rdma_reject(rdma_cm_id, rej, sizeof(*rej), + IB_CM_REJ_CONSUMER_DEFINED); + else + ib_send_cm_rej(ib_cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, + rej, sizeof(*rej)); + + if (ch && ch->sess) { + srpt_close_ch(ch); + /* + * Tell the caller not to free cm_id since + * srpt_release_channel_work() will do that. + */ + ret = 0; + } + +out: + kfree(rep_param); + kfree(rsp); + kfree(rej); + + return ret; +} + +static int srpt_ib_cm_req_recv(struct ib_cm_id *cm_id, + const struct ib_cm_req_event_param *param, + void *private_data) +{ + char sguid[40]; + + srpt_format_guid(sguid, sizeof(sguid), + ¶m->primary_path->dgid.global.interface_id); + + return srpt_cm_req_recv(cm_id->context, cm_id, NULL, param->port, + param->primary_path->pkey, + private_data, sguid); +} + +static int srpt_rdma_cm_req_recv(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + struct srpt_device *sdev; + struct srp_login_req req; + const struct srp_login_req_rdma *req_rdma; + struct sa_path_rec *path_rec = cm_id->route.path_rec; + char src_addr[40]; + + sdev = ib_get_client_data(cm_id->device, &srpt_client); + if (!sdev) + return -ECONNREFUSED; + + if (event->param.conn.private_data_len < sizeof(*req_rdma)) + return -EINVAL; + + /* Transform srp_login_req_rdma into srp_login_req. */ + req_rdma = event->param.conn.private_data; + memset(&req, 0, sizeof(req)); + req.opcode = req_rdma->opcode; + req.tag = req_rdma->tag; + req.req_it_iu_len = req_rdma->req_it_iu_len; + req.req_buf_fmt = req_rdma->req_buf_fmt; + req.req_flags = req_rdma->req_flags; + memcpy(req.initiator_port_id, req_rdma->initiator_port_id, 16); + memcpy(req.target_port_id, req_rdma->target_port_id, 16); + req.imm_data_offset = req_rdma->imm_data_offset; + + snprintf(src_addr, sizeof(src_addr), "%pIS", + &cm_id->route.addr.src_addr); + + return srpt_cm_req_recv(sdev, NULL, cm_id, cm_id->port_num, + path_rec ? path_rec->pkey : 0, &req, src_addr); +} + +static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch, + enum ib_cm_rej_reason reason, + const u8 *private_data, + u8 private_data_len) +{ + char *priv = NULL; + int i; + + if (private_data_len && (priv = kmalloc(private_data_len * 3 + 1, + GFP_KERNEL))) { + for (i = 0; i < private_data_len; i++) + sprintf(priv + 3 * i, " %02x", private_data[i]); + } + pr_info("Received CM REJ for ch %s-%d; reason %d%s%s.\n", + ch->sess_name, ch->qp->qp_num, reason, private_data_len ? + "; private data" : "", priv ? priv : " (?)"); + kfree(priv); +} + +/** + * srpt_cm_rtu_recv - process an IB_CM_RTU_RECEIVED or USER_ESTABLISHED event + * @ch: SRPT RDMA channel. + * + * An RTU (ready to use) message indicates that the connection has been + * established and that the recipient may begin transmitting. + */ +static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch) +{ + int ret; + + ret = ch->using_rdma_cm ? 0 : srpt_ch_qp_rts(ch, ch->qp); + if (ret < 0) { + pr_err("%s-%d: QP transition to RTS failed\n", ch->sess_name, + ch->qp->qp_num); + srpt_close_ch(ch); + return; + } + + /* + * Note: calling srpt_close_ch() if the transition to the LIVE state + * fails is not necessary since that means that that function has + * already been invoked from another thread. + */ + if (!srpt_set_ch_state(ch, CH_LIVE)) { + pr_err("%s-%d: channel transition to LIVE state failed\n", + ch->sess_name, ch->qp->qp_num); + return; + } + + /* Trigger wait list processing. */ + ret = srpt_zerolength_write(ch); + WARN_ONCE(ret < 0, "%d\n", ret); +} + +/** + * srpt_cm_handler - IB connection manager callback function + * @cm_id: IB/CM connection identifier. + * @event: IB/CM event. + * + * A non-zero return value will cause the caller destroy the CM ID. + * + * Note: srpt_cm_handler() must only return a non-zero value when transferring + * ownership of the cm_id to a channel by srpt_cm_req_recv() failed. Returning + * a non-zero value in any other case will trigger a race with the + * ib_destroy_cm_id() call in srpt_release_channel(). + */ +static int srpt_cm_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event) +{ + struct srpt_rdma_ch *ch = cm_id->context; + int ret; + + ret = 0; + switch (event->event) { + case IB_CM_REQ_RECEIVED: + ret = srpt_ib_cm_req_recv(cm_id, &event->param.req_rcvd, + event->private_data); + break; + case IB_CM_REJ_RECEIVED: + srpt_cm_rej_recv(ch, event->param.rej_rcvd.reason, + event->private_data, + IB_CM_REJ_PRIVATE_DATA_SIZE); + break; + case IB_CM_RTU_RECEIVED: + case IB_CM_USER_ESTABLISHED: + srpt_cm_rtu_recv(ch); + break; + case IB_CM_DREQ_RECEIVED: + srpt_disconnect_ch(ch); + break; + case IB_CM_DREP_RECEIVED: + pr_info("Received CM DREP message for ch %s-%d.\n", + ch->sess_name, ch->qp->qp_num); + srpt_close_ch(ch); + break; + case IB_CM_TIMEWAIT_EXIT: + pr_info("Received CM TimeWait exit for ch %s-%d.\n", + ch->sess_name, ch->qp->qp_num); + srpt_close_ch(ch); + break; + case IB_CM_REP_ERROR: + pr_info("Received CM REP error for ch %s-%d.\n", ch->sess_name, + ch->qp->qp_num); + break; + case IB_CM_DREQ_ERROR: + pr_info("Received CM DREQ ERROR event.\n"); + break; + case IB_CM_MRA_RECEIVED: + pr_info("Received CM MRA event\n"); + break; + default: + pr_err("received unrecognized CM event %d\n", event->event); + break; + } + + return ret; +} + +static int srpt_rdma_cm_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + struct srpt_rdma_ch *ch = cm_id->context; + int ret = 0; + + switch (event->event) { + case RDMA_CM_EVENT_CONNECT_REQUEST: + ret = srpt_rdma_cm_req_recv(cm_id, event); + break; + case RDMA_CM_EVENT_REJECTED: + srpt_cm_rej_recv(ch, event->status, + event->param.conn.private_data, + event->param.conn.private_data_len); + break; + case RDMA_CM_EVENT_ESTABLISHED: + srpt_cm_rtu_recv(ch); + break; + case RDMA_CM_EVENT_DISCONNECTED: + if (ch->state < CH_DISCONNECTING) + srpt_disconnect_ch(ch); + else + srpt_close_ch(ch); + break; + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + srpt_close_ch(ch); + break; + case RDMA_CM_EVENT_UNREACHABLE: + pr_info("Received CM REP error for ch %s-%d.\n", ch->sess_name, + ch->qp->qp_num); + break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + case RDMA_CM_EVENT_ADDR_CHANGE: + break; + default: + pr_err("received unrecognized RDMA CM event %d\n", + event->event); + break; + } + + return ret; +} + +/* + * srpt_write_pending - Start data transfer from initiator to target (write). + */ +static int srpt_write_pending(struct se_cmd *se_cmd) +{ + struct srpt_send_ioctx *ioctx = + container_of(se_cmd, struct srpt_send_ioctx, cmd); + struct srpt_rdma_ch *ch = ioctx->ch; + struct ib_send_wr *first_wr = NULL; + struct ib_cqe *cqe = &ioctx->rdma_cqe; + enum srpt_command_state new_state; + int ret, i; + + if (ioctx->recv_ioctx) { + srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN); + target_execute_cmd(&ioctx->cmd); + return 0; + } + + new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA); + WARN_ON(new_state == SRPT_STATE_DONE); + + if (atomic_sub_return(ioctx->n_rdma, &ch->sq_wr_avail) < 0) { + pr_warn("%s: IB send queue full (needed %d)\n", + __func__, ioctx->n_rdma); + ret = -ENOMEM; + goto out_undo; + } + + cqe->done = srpt_rdma_read_done; + for (i = ioctx->n_rw_ctx - 1; i >= 0; i--) { + struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i]; + + first_wr = rdma_rw_ctx_wrs(&ctx->rw, ch->qp, ch->sport->port, + cqe, first_wr); + cqe = NULL; + } + + ret = ib_post_send(ch->qp, first_wr, NULL); + if (ret) { + pr_err("%s: ib_post_send() returned %d for %d (avail: %d)\n", + __func__, ret, ioctx->n_rdma, + atomic_read(&ch->sq_wr_avail)); + goto out_undo; + } + + return 0; +out_undo: + atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); + return ret; +} + +static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status) +{ + switch (tcm_mgmt_status) { + case TMR_FUNCTION_COMPLETE: + return SRP_TSK_MGMT_SUCCESS; + case TMR_FUNCTION_REJECTED: + return SRP_TSK_MGMT_FUNC_NOT_SUPP; + } + return SRP_TSK_MGMT_FAILED; +} + +/** + * srpt_queue_response - transmit the response to a SCSI command + * @cmd: SCSI target command. + * + * Callback function called by the TCM core. Must not block since it can be + * invoked on the context of the IB completion handler. + */ +static void srpt_queue_response(struct se_cmd *cmd) +{ + struct srpt_send_ioctx *ioctx = + container_of(cmd, struct srpt_send_ioctx, cmd); + struct srpt_rdma_ch *ch = ioctx->ch; + struct srpt_device *sdev = ch->sport->sdev; + struct ib_send_wr send_wr, *first_wr = &send_wr; + struct ib_sge sge; + enum srpt_command_state state; + int resp_len, ret, i; + u8 srp_tm_status; + + state = ioctx->state; + switch (state) { + case SRPT_STATE_NEW: + case SRPT_STATE_DATA_IN: + ioctx->state = SRPT_STATE_CMD_RSP_SENT; + break; + case SRPT_STATE_MGMT: + ioctx->state = SRPT_STATE_MGMT_RSP_SENT; + break; + default: + WARN(true, "ch %p; cmd %d: unexpected command state %d\n", + ch, ioctx->ioctx.index, ioctx->state); + break; + } + + if (WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT)) + return; + + /* For read commands, transfer the data to the initiator. */ + if (ioctx->cmd.data_direction == DMA_FROM_DEVICE && + ioctx->cmd.data_length && + !ioctx->queue_status_only) { + for (i = ioctx->n_rw_ctx - 1; i >= 0; i--) { + struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i]; + + first_wr = rdma_rw_ctx_wrs(&ctx->rw, ch->qp, + ch->sport->port, NULL, first_wr); + } + } + + if (state != SRPT_STATE_MGMT) + resp_len = srpt_build_cmd_rsp(ch, ioctx, ioctx->cmd.tag, + cmd->scsi_status); + else { + srp_tm_status + = tcm_to_srp_tsk_mgmt_status(cmd->se_tmr_req->response); + resp_len = srpt_build_tskmgmt_rsp(ch, ioctx, srp_tm_status, + ioctx->cmd.tag); + } + + atomic_inc(&ch->req_lim); + + if (unlikely(atomic_sub_return(1 + ioctx->n_rdma, + &ch->sq_wr_avail) < 0)) { + pr_warn("%s: IB send queue full (needed %d)\n", + __func__, ioctx->n_rdma); + goto out; + } + + ib_dma_sync_single_for_device(sdev->device, ioctx->ioctx.dma, resp_len, + DMA_TO_DEVICE); + + sge.addr = ioctx->ioctx.dma; + sge.length = resp_len; + sge.lkey = sdev->lkey; + + ioctx->ioctx.cqe.done = srpt_send_done; + send_wr.next = NULL; + send_wr.wr_cqe = &ioctx->ioctx.cqe; + send_wr.sg_list = &sge; + send_wr.num_sge = 1; + send_wr.opcode = IB_WR_SEND; + send_wr.send_flags = IB_SEND_SIGNALED; + + ret = ib_post_send(ch->qp, first_wr, NULL); + if (ret < 0) { + pr_err("%s: sending cmd response failed for tag %llu (%d)\n", + __func__, ioctx->cmd.tag, ret); + goto out; + } + + return; + +out: + atomic_add(1 + ioctx->n_rdma, &ch->sq_wr_avail); + atomic_dec(&ch->req_lim); + srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); + target_put_sess_cmd(&ioctx->cmd); +} + +static int srpt_queue_data_in(struct se_cmd *cmd) +{ + srpt_queue_response(cmd); + return 0; +} + +static void srpt_queue_tm_rsp(struct se_cmd *cmd) +{ + srpt_queue_response(cmd); +} + +/* + * This function is called for aborted commands if no response is sent to the + * initiator. Make sure that the credits freed by aborting a command are + * returned to the initiator the next time a response is sent by incrementing + * ch->req_lim_delta. + */ +static void srpt_aborted_task(struct se_cmd *cmd) +{ + struct srpt_send_ioctx *ioctx = container_of(cmd, + struct srpt_send_ioctx, cmd); + struct srpt_rdma_ch *ch = ioctx->ch; + + atomic_inc(&ch->req_lim_delta); +} + +static int srpt_queue_status(struct se_cmd *cmd) +{ + struct srpt_send_ioctx *ioctx; + + ioctx = container_of(cmd, struct srpt_send_ioctx, cmd); + BUG_ON(ioctx->sense_data != cmd->sense_buffer); + if (cmd->se_cmd_flags & + (SCF_TRANSPORT_TASK_SENSE | SCF_EMULATED_TASK_SENSE)) + WARN_ON(cmd->scsi_status != SAM_STAT_CHECK_CONDITION); + ioctx->queue_status_only = true; + srpt_queue_response(cmd); + return 0; +} + +static void srpt_refresh_port_work(struct work_struct *work) +{ + struct srpt_port *sport = container_of(work, struct srpt_port, work); + + srpt_refresh_port(sport); +} + +/** + * srpt_release_sport - disable login and wait for associated channels + * @sport: SRPT HCA port. + */ +static int srpt_release_sport(struct srpt_port *sport) +{ + DECLARE_COMPLETION_ONSTACK(c); + struct srpt_nexus *nexus, *next_n; + struct srpt_rdma_ch *ch; + + WARN_ON_ONCE(irqs_disabled()); + + sport->freed_channels = &c; + + mutex_lock(&sport->mutex); + srpt_set_enabled(sport, false); + mutex_unlock(&sport->mutex); + + while (atomic_read(&sport->refcount) > 0 && + wait_for_completion_timeout(&c, 5 * HZ) <= 0) { + pr_info("%s_%d: waiting for unregistration of %d sessions ...\n", + dev_name(&sport->sdev->device->dev), sport->port, + atomic_read(&sport->refcount)); + rcu_read_lock(); + list_for_each_entry(nexus, &sport->nexus_list, entry) { + list_for_each_entry(ch, &nexus->ch_list, list) { + pr_info("%s-%d: state %s\n", + ch->sess_name, ch->qp->qp_num, + get_ch_state_name(ch->state)); + } + } + rcu_read_unlock(); + } + + mutex_lock(&sport->mutex); + list_for_each_entry_safe(nexus, next_n, &sport->nexus_list, entry) { + list_del(&nexus->entry); + kfree_rcu(nexus, rcu); + } + mutex_unlock(&sport->mutex); + + return 0; +} + +struct port_and_port_id { + struct srpt_port *sport; + struct srpt_port_id **port_id; +}; + +static struct port_and_port_id __srpt_lookup_port(const char *name) +{ + struct ib_device *dev; + struct srpt_device *sdev; + struct srpt_port *sport; + int i; + + list_for_each_entry(sdev, &srpt_dev_list, list) { + dev = sdev->device; + if (!dev) + continue; + + for (i = 0; i < dev->phys_port_cnt; i++) { + sport = &sdev->port[i]; + + if (strcmp(sport->guid_name, name) == 0) { + kref_get(&sdev->refcnt); + return (struct port_and_port_id){ + sport, &sport->guid_id}; + } + if (strcmp(sport->gid_name, name) == 0) { + kref_get(&sdev->refcnt); + return (struct port_and_port_id){ + sport, &sport->gid_id}; + } + } + } + + return (struct port_and_port_id){}; +} + +/** + * srpt_lookup_port() - Look up an RDMA port by name + * @name: ASCII port name + * + * Increments the RDMA port reference count if an RDMA port pointer is returned. + * The caller must drop that reference count by calling srpt_port_put_ref(). + */ +static struct port_and_port_id srpt_lookup_port(const char *name) +{ + struct port_and_port_id papi; + + spin_lock(&srpt_dev_lock); + papi = __srpt_lookup_port(name); + spin_unlock(&srpt_dev_lock); + + return papi; +} + +static void srpt_free_srq(struct srpt_device *sdev) +{ + if (!sdev->srq) + return; + + ib_destroy_srq(sdev->srq); + srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, + sdev->srq_size, sdev->req_buf_cache, + DMA_FROM_DEVICE); + kmem_cache_destroy(sdev->req_buf_cache); + sdev->srq = NULL; +} + +static int srpt_alloc_srq(struct srpt_device *sdev) +{ + struct ib_srq_init_attr srq_attr = { + .event_handler = srpt_srq_event, + .srq_context = (void *)sdev, + .attr.max_wr = sdev->srq_size, + .attr.max_sge = 1, + .srq_type = IB_SRQT_BASIC, + }; + struct ib_device *device = sdev->device; + struct ib_srq *srq; + int i; + + WARN_ON_ONCE(sdev->srq); + srq = ib_create_srq(sdev->pd, &srq_attr); + if (IS_ERR(srq)) { + pr_debug("ib_create_srq() failed: %ld\n", PTR_ERR(srq)); + return PTR_ERR(srq); + } + + pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n", sdev->srq_size, + sdev->device->attrs.max_srq_wr, dev_name(&device->dev)); + + sdev->req_buf_cache = kmem_cache_create("srpt-srq-req-buf", + srp_max_req_size, 0, 0, NULL); + if (!sdev->req_buf_cache) + goto free_srq; + + sdev->ioctx_ring = (struct srpt_recv_ioctx **) + srpt_alloc_ioctx_ring(sdev, sdev->srq_size, + sizeof(*sdev->ioctx_ring[0]), + sdev->req_buf_cache, 0, DMA_FROM_DEVICE); + if (!sdev->ioctx_ring) + goto free_cache; + + sdev->use_srq = true; + sdev->srq = srq; + + for (i = 0; i < sdev->srq_size; ++i) { + INIT_LIST_HEAD(&sdev->ioctx_ring[i]->wait_list); + srpt_post_recv(sdev, NULL, sdev->ioctx_ring[i]); + } + + return 0; + +free_cache: + kmem_cache_destroy(sdev->req_buf_cache); + +free_srq: + ib_destroy_srq(srq); + return -ENOMEM; +} + +static int srpt_use_srq(struct srpt_device *sdev, bool use_srq) +{ + struct ib_device *device = sdev->device; + int ret = 0; + + if (!use_srq) { + srpt_free_srq(sdev); + sdev->use_srq = false; + } else if (use_srq && !sdev->srq) { + ret = srpt_alloc_srq(sdev); + } + pr_debug("%s(%s): use_srq = %d; ret = %d\n", __func__, + dev_name(&device->dev), sdev->use_srq, ret); + return ret; +} + +static void srpt_free_sdev(struct kref *refcnt) +{ + struct srpt_device *sdev = container_of(refcnt, typeof(*sdev), refcnt); + + kfree(sdev); +} + +static void srpt_sdev_put(struct srpt_device *sdev) +{ + kref_put(&sdev->refcnt, srpt_free_sdev); +} + +/** + * srpt_add_one - InfiniBand device addition callback function + * @device: Describes a HCA. + */ +static int srpt_add_one(struct ib_device *device) +{ + struct srpt_device *sdev; + struct srpt_port *sport; + int ret; + u32 i; + + pr_debug("device = %p\n", device); + + sdev = kzalloc(struct_size(sdev, port, device->phys_port_cnt), + GFP_KERNEL); + if (!sdev) + return -ENOMEM; + + kref_init(&sdev->refcnt); + sdev->device = device; + mutex_init(&sdev->sdev_mutex); + + sdev->pd = ib_alloc_pd(device, 0); + if (IS_ERR(sdev->pd)) { + ret = PTR_ERR(sdev->pd); + goto free_dev; + } + + sdev->lkey = sdev->pd->local_dma_lkey; + + sdev->srq_size = min(srpt_srq_size, sdev->device->attrs.max_srq_wr); + + srpt_use_srq(sdev, sdev->port[0].port_attrib.use_srq); + + if (!srpt_service_guid) + srpt_service_guid = be64_to_cpu(device->node_guid); + + if (rdma_port_get_link_layer(device, 1) == IB_LINK_LAYER_INFINIBAND) + sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev); + if (IS_ERR(sdev->cm_id)) { + pr_info("ib_create_cm_id() failed: %ld\n", + PTR_ERR(sdev->cm_id)); + ret = PTR_ERR(sdev->cm_id); + sdev->cm_id = NULL; + if (!rdma_cm_id) + goto err_ring; + } + + /* print out target login information */ + pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx,pkey=ffff,service_id=%016llx\n", + srpt_service_guid, srpt_service_guid, srpt_service_guid); + + /* + * We do not have a consistent service_id (ie. also id_ext of target_id) + * to identify this target. We currently use the guid of the first HCA + * in the system as service_id; therefore, the target_id will change + * if this HCA is gone bad and replaced by different HCA + */ + ret = sdev->cm_id ? + ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid)) : + 0; + if (ret < 0) { + pr_err("ib_cm_listen() failed: %d (cm_id state = %d)\n", ret, + sdev->cm_id->state); + goto err_cm; + } + + INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device, + srpt_event_handler); + ib_register_event_handler(&sdev->event_handler); + + for (i = 1; i <= sdev->device->phys_port_cnt; i++) { + sport = &sdev->port[i - 1]; + INIT_LIST_HEAD(&sport->nexus_list); + mutex_init(&sport->mutex); + sport->sdev = sdev; + sport->port = i; + sport->port_attrib.srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE; + sport->port_attrib.srp_max_rsp_size = DEFAULT_MAX_RSP_SIZE; + sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE; + sport->port_attrib.use_srq = false; + INIT_WORK(&sport->work, srpt_refresh_port_work); + + ret = srpt_refresh_port(sport); + if (ret) { + pr_err("MAD registration failed for %s-%d.\n", + dev_name(&sdev->device->dev), i); + i--; + goto err_port; + } + } + + spin_lock(&srpt_dev_lock); + list_add_tail(&sdev->list, &srpt_dev_list); + spin_unlock(&srpt_dev_lock); + + ib_set_client_data(device, &srpt_client, sdev); + pr_debug("added %s.\n", dev_name(&device->dev)); + return 0; + +err_port: + srpt_unregister_mad_agent(sdev, i); + ib_unregister_event_handler(&sdev->event_handler); +err_cm: + if (sdev->cm_id) + ib_destroy_cm_id(sdev->cm_id); +err_ring: + srpt_free_srq(sdev); + ib_dealloc_pd(sdev->pd); +free_dev: + srpt_sdev_put(sdev); + pr_info("%s(%s) failed.\n", __func__, dev_name(&device->dev)); + return ret; +} + +/** + * srpt_remove_one - InfiniBand device removal callback function + * @device: Describes a HCA. + * @client_data: The value passed as the third argument to ib_set_client_data(). + */ +static void srpt_remove_one(struct ib_device *device, void *client_data) +{ + struct srpt_device *sdev = client_data; + int i; + + srpt_unregister_mad_agent(sdev, sdev->device->phys_port_cnt); + + ib_unregister_event_handler(&sdev->event_handler); + + /* Cancel any work queued by the just unregistered IB event handler. */ + for (i = 0; i < sdev->device->phys_port_cnt; i++) + cancel_work_sync(&sdev->port[i].work); + + if (sdev->cm_id) + ib_destroy_cm_id(sdev->cm_id); + + ib_set_client_data(device, &srpt_client, NULL); + + /* + * Unregistering a target must happen after destroying sdev->cm_id + * such that no new SRP_LOGIN_REQ information units can arrive while + * destroying the target. + */ + spin_lock(&srpt_dev_lock); + list_del(&sdev->list); + spin_unlock(&srpt_dev_lock); + + for (i = 0; i < sdev->device->phys_port_cnt; i++) + srpt_release_sport(&sdev->port[i]); + + srpt_free_srq(sdev); + + ib_dealloc_pd(sdev->pd); + + srpt_sdev_put(sdev); +} + +static struct ib_client srpt_client = { + .name = DRV_NAME, + .add = srpt_add_one, + .remove = srpt_remove_one +}; + +static int srpt_check_true(struct se_portal_group *se_tpg) +{ + return 1; +} + +static int srpt_check_false(struct se_portal_group *se_tpg) +{ + return 0; +} + +static struct srpt_port *srpt_tpg_to_sport(struct se_portal_group *tpg) +{ + return tpg->se_tpg_wwn->priv; +} + +static struct srpt_port_id *srpt_wwn_to_sport_id(struct se_wwn *wwn) +{ + struct srpt_port *sport = wwn->priv; + + if (sport->guid_id && &sport->guid_id->wwn == wwn) + return sport->guid_id; + if (sport->gid_id && &sport->gid_id->wwn == wwn) + return sport->gid_id; + WARN_ON_ONCE(true); + return NULL; +} + +static char *srpt_get_fabric_wwn(struct se_portal_group *tpg) +{ + struct srpt_tpg *stpg = container_of(tpg, typeof(*stpg), tpg); + + return stpg->sport_id->name; +} + +static u16 srpt_get_tag(struct se_portal_group *tpg) +{ + return 1; +} + +static u32 srpt_tpg_get_inst_index(struct se_portal_group *se_tpg) +{ + return 1; +} + +static void srpt_release_cmd(struct se_cmd *se_cmd) +{ + struct srpt_send_ioctx *ioctx = container_of(se_cmd, + struct srpt_send_ioctx, cmd); + struct srpt_rdma_ch *ch = ioctx->ch; + struct srpt_recv_ioctx *recv_ioctx = ioctx->recv_ioctx; + + WARN_ON_ONCE(ioctx->state != SRPT_STATE_DONE && + !(ioctx->cmd.transport_state & CMD_T_ABORTED)); + + if (recv_ioctx) { + WARN_ON_ONCE(!list_empty(&recv_ioctx->wait_list)); + ioctx->recv_ioctx = NULL; + srpt_post_recv(ch->sport->sdev, ch, recv_ioctx); + } + + if (ioctx->n_rw_ctx) { + srpt_free_rw_ctxs(ch, ioctx); + ioctx->n_rw_ctx = 0; + } + + target_free_tag(se_cmd->se_sess, se_cmd); +} + +/** + * srpt_close_session - forcibly close a session + * @se_sess: SCSI target session. + * + * Callback function invoked by the TCM core to clean up sessions associated + * with a node ACL when the user invokes + * rmdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id + */ +static void srpt_close_session(struct se_session *se_sess) +{ + struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr; + + srpt_disconnect_ch_sync(ch); +} + +/** + * srpt_sess_get_index - return the value of scsiAttIntrPortIndex (SCSI-MIB) + * @se_sess: SCSI target session. + * + * A quote from RFC 4455 (SCSI-MIB) about this MIB object: + * This object represents an arbitrary integer used to uniquely identify a + * particular attached remote initiator port to a particular SCSI target port + * within a particular SCSI target device within a particular SCSI instance. + */ +static u32 srpt_sess_get_index(struct se_session *se_sess) +{ + return 0; +} + +static void srpt_set_default_node_attrs(struct se_node_acl *nacl) +{ +} + +/* Note: only used from inside debug printk's by the TCM core. */ +static int srpt_get_tcm_cmd_state(struct se_cmd *se_cmd) +{ + struct srpt_send_ioctx *ioctx; + + ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); + return ioctx->state; +} + +static int srpt_parse_guid(u64 *guid, const char *name) +{ + u16 w[4]; + int ret = -EINVAL; + + if (sscanf(name, "%hx:%hx:%hx:%hx", &w[0], &w[1], &w[2], &w[3]) != 4) + goto out; + *guid = get_unaligned_be64(w); + ret = 0; +out: + return ret; +} + +/** + * srpt_parse_i_port_id - parse an initiator port ID + * @name: ASCII representation of a 128-bit initiator port ID. + * @i_port_id: Binary 128-bit port ID. + */ +static int srpt_parse_i_port_id(u8 i_port_id[16], const char *name) +{ + const char *p; + unsigned len, count, leading_zero_bytes; + int ret; + + p = name; + if (strncasecmp(p, "0x", 2) == 0) + p += 2; + ret = -EINVAL; + len = strlen(p); + if (len % 2) + goto out; + count = min(len / 2, 16U); + leading_zero_bytes = 16 - count; + memset(i_port_id, 0, leading_zero_bytes); + ret = hex2bin(i_port_id + leading_zero_bytes, p, count); + +out: + return ret; +} + +/* + * configfs callback function invoked for mkdir + * /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id + * + * i_port_id must be an initiator port GUID, GID or IP address. See also the + * target_alloc_session() calls in this driver. Examples of valid initiator + * port IDs: + * 0x0000000000000000505400fffe4a0b7b + * 0000000000000000505400fffe4a0b7b + * 5054:00ff:fe4a:0b7b + * 192.168.122.76 + */ +static int srpt_init_nodeacl(struct se_node_acl *se_nacl, const char *name) +{ + struct sockaddr_storage sa; + u64 guid; + u8 i_port_id[16]; + int ret; + + ret = srpt_parse_guid(&guid, name); + if (ret < 0) + ret = srpt_parse_i_port_id(i_port_id, name); + if (ret < 0) + ret = inet_pton_with_scope(&init_net, AF_UNSPEC, name, NULL, + &sa); + if (ret < 0) + pr_err("invalid initiator port ID %s\n", name); + return ret; +} + +static ssize_t srpt_tpg_attrib_srp_max_rdma_size_show(struct config_item *item, + char *page) +{ + struct se_portal_group *se_tpg = attrib_to_tpg(item); + struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); + + return sysfs_emit(page, "%u\n", sport->port_attrib.srp_max_rdma_size); +} + +static ssize_t srpt_tpg_attrib_srp_max_rdma_size_store(struct config_item *item, + const char *page, size_t count) +{ + struct se_portal_group *se_tpg = attrib_to_tpg(item); + struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); + unsigned long val; + int ret; + + ret = kstrtoul(page, 0, &val); + if (ret < 0) { + pr_err("kstrtoul() failed with ret: %d\n", ret); + return -EINVAL; + } + if (val > MAX_SRPT_RDMA_SIZE) { + pr_err("val: %lu exceeds MAX_SRPT_RDMA_SIZE: %d\n", val, + MAX_SRPT_RDMA_SIZE); + return -EINVAL; + } + if (val < DEFAULT_MAX_RDMA_SIZE) { + pr_err("val: %lu smaller than DEFAULT_MAX_RDMA_SIZE: %d\n", + val, DEFAULT_MAX_RDMA_SIZE); + return -EINVAL; + } + sport->port_attrib.srp_max_rdma_size = val; + + return count; +} + +static ssize_t srpt_tpg_attrib_srp_max_rsp_size_show(struct config_item *item, + char *page) +{ + struct se_portal_group *se_tpg = attrib_to_tpg(item); + struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); + + return sysfs_emit(page, "%u\n", sport->port_attrib.srp_max_rsp_size); +} + +static ssize_t srpt_tpg_attrib_srp_max_rsp_size_store(struct config_item *item, + const char *page, size_t count) +{ + struct se_portal_group *se_tpg = attrib_to_tpg(item); + struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); + unsigned long val; + int ret; + + ret = kstrtoul(page, 0, &val); + if (ret < 0) { + pr_err("kstrtoul() failed with ret: %d\n", ret); + return -EINVAL; + } + if (val > MAX_SRPT_RSP_SIZE) { + pr_err("val: %lu exceeds MAX_SRPT_RSP_SIZE: %d\n", val, + MAX_SRPT_RSP_SIZE); + return -EINVAL; + } + if (val < MIN_MAX_RSP_SIZE) { + pr_err("val: %lu smaller than MIN_MAX_RSP_SIZE: %d\n", val, + MIN_MAX_RSP_SIZE); + return -EINVAL; + } + sport->port_attrib.srp_max_rsp_size = val; + + return count; +} + +static ssize_t srpt_tpg_attrib_srp_sq_size_show(struct config_item *item, + char *page) +{ + struct se_portal_group *se_tpg = attrib_to_tpg(item); + struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); + + return sysfs_emit(page, "%u\n", sport->port_attrib.srp_sq_size); +} + +static ssize_t srpt_tpg_attrib_srp_sq_size_store(struct config_item *item, + const char *page, size_t count) +{ + struct se_portal_group *se_tpg = attrib_to_tpg(item); + struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); + unsigned long val; + int ret; + + ret = kstrtoul(page, 0, &val); + if (ret < 0) { + pr_err("kstrtoul() failed with ret: %d\n", ret); + return -EINVAL; + } + if (val > MAX_SRPT_SRQ_SIZE) { + pr_err("val: %lu exceeds MAX_SRPT_SRQ_SIZE: %d\n", val, + MAX_SRPT_SRQ_SIZE); + return -EINVAL; + } + if (val < MIN_SRPT_SRQ_SIZE) { + pr_err("val: %lu smaller than MIN_SRPT_SRQ_SIZE: %d\n", val, + MIN_SRPT_SRQ_SIZE); + return -EINVAL; + } + sport->port_attrib.srp_sq_size = val; + + return count; +} + +static ssize_t srpt_tpg_attrib_use_srq_show(struct config_item *item, + char *page) +{ + struct se_portal_group *se_tpg = attrib_to_tpg(item); + struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); + + return sysfs_emit(page, "%d\n", sport->port_attrib.use_srq); +} + +static ssize_t srpt_tpg_attrib_use_srq_store(struct config_item *item, + const char *page, size_t count) +{ + struct se_portal_group *se_tpg = attrib_to_tpg(item); + struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); + struct srpt_device *sdev = sport->sdev; + unsigned long val; + bool enabled; + int ret; + + ret = kstrtoul(page, 0, &val); + if (ret < 0) + return ret; + if (val != !!val) + return -EINVAL; + + ret = mutex_lock_interruptible(&sdev->sdev_mutex); + if (ret < 0) + return ret; + ret = mutex_lock_interruptible(&sport->mutex); + if (ret < 0) + goto unlock_sdev; + enabled = sport->enabled; + /* Log out all initiator systems before changing 'use_srq'. */ + srpt_set_enabled(sport, false); + sport->port_attrib.use_srq = val; + srpt_use_srq(sdev, sport->port_attrib.use_srq); + srpt_set_enabled(sport, enabled); + ret = count; + mutex_unlock(&sport->mutex); +unlock_sdev: + mutex_unlock(&sdev->sdev_mutex); + + return ret; +} + +CONFIGFS_ATTR(srpt_tpg_attrib_, srp_max_rdma_size); +CONFIGFS_ATTR(srpt_tpg_attrib_, srp_max_rsp_size); +CONFIGFS_ATTR(srpt_tpg_attrib_, srp_sq_size); +CONFIGFS_ATTR(srpt_tpg_attrib_, use_srq); + +static struct configfs_attribute *srpt_tpg_attrib_attrs[] = { + &srpt_tpg_attrib_attr_srp_max_rdma_size, + &srpt_tpg_attrib_attr_srp_max_rsp_size, + &srpt_tpg_attrib_attr_srp_sq_size, + &srpt_tpg_attrib_attr_use_srq, + NULL, +}; + +static struct rdma_cm_id *srpt_create_rdma_id(struct sockaddr *listen_addr) +{ + struct rdma_cm_id *rdma_cm_id; + int ret; + + rdma_cm_id = rdma_create_id(&init_net, srpt_rdma_cm_handler, + NULL, RDMA_PS_TCP, IB_QPT_RC); + if (IS_ERR(rdma_cm_id)) { + pr_err("RDMA/CM ID creation failed: %ld\n", + PTR_ERR(rdma_cm_id)); + goto out; + } + + ret = rdma_bind_addr(rdma_cm_id, listen_addr); + if (ret) { + char addr_str[64]; + + snprintf(addr_str, sizeof(addr_str), "%pISp", listen_addr); + pr_err("Binding RDMA/CM ID to address %s failed: %d\n", + addr_str, ret); + rdma_destroy_id(rdma_cm_id); + rdma_cm_id = ERR_PTR(ret); + goto out; + } + + ret = rdma_listen(rdma_cm_id, 128); + if (ret) { + pr_err("rdma_listen() failed: %d\n", ret); + rdma_destroy_id(rdma_cm_id); + rdma_cm_id = ERR_PTR(ret); + } + +out: + return rdma_cm_id; +} + +static ssize_t srpt_rdma_cm_port_show(struct config_item *item, char *page) +{ + return sysfs_emit(page, "%d\n", rdma_cm_port); +} + +static ssize_t srpt_rdma_cm_port_store(struct config_item *item, + const char *page, size_t count) +{ + struct sockaddr_in addr4 = { .sin_family = AF_INET }; + struct sockaddr_in6 addr6 = { .sin6_family = AF_INET6 }; + struct rdma_cm_id *new_id = NULL; + u16 val; + int ret; + + ret = kstrtou16(page, 0, &val); + if (ret < 0) + return ret; + ret = count; + if (rdma_cm_port == val) + goto out; + + if (val) { + addr6.sin6_port = cpu_to_be16(val); + new_id = srpt_create_rdma_id((struct sockaddr *)&addr6); + if (IS_ERR(new_id)) { + addr4.sin_port = cpu_to_be16(val); + new_id = srpt_create_rdma_id((struct sockaddr *)&addr4); + if (IS_ERR(new_id)) { + ret = PTR_ERR(new_id); + goto out; + } + } + } + + mutex_lock(&rdma_cm_mutex); + rdma_cm_port = val; + swap(rdma_cm_id, new_id); + mutex_unlock(&rdma_cm_mutex); + + if (new_id) + rdma_destroy_id(new_id); + ret = count; +out: + return ret; +} + +CONFIGFS_ATTR(srpt_, rdma_cm_port); + +static struct configfs_attribute *srpt_da_attrs[] = { + &srpt_attr_rdma_cm_port, + NULL, +}; + +static int srpt_enable_tpg(struct se_portal_group *se_tpg, bool enable) +{ + struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); + + mutex_lock(&sport->mutex); + srpt_set_enabled(sport, enable); + mutex_unlock(&sport->mutex); + + return 0; +} + +/** + * srpt_make_tpg - configfs callback invoked for mkdir /sys/kernel/config/target/$driver/$port/$tpg + * @wwn: Corresponds to $driver/$port. + * @name: $tpg. + */ +static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, + const char *name) +{ + struct srpt_port_id *sport_id = srpt_wwn_to_sport_id(wwn); + struct srpt_tpg *stpg; + int res = -ENOMEM; + + stpg = kzalloc(sizeof(*stpg), GFP_KERNEL); + if (!stpg) + return ERR_PTR(res); + stpg->sport_id = sport_id; + res = core_tpg_register(wwn, &stpg->tpg, SCSI_PROTOCOL_SRP); + if (res) { + kfree(stpg); + return ERR_PTR(res); + } + + mutex_lock(&sport_id->mutex); + list_add_tail(&stpg->entry, &sport_id->tpg_list); + mutex_unlock(&sport_id->mutex); + + return &stpg->tpg; +} + +/** + * srpt_drop_tpg - configfs callback invoked for rmdir /sys/kernel/config/target/$driver/$port/$tpg + * @tpg: Target portal group to deregister. + */ +static void srpt_drop_tpg(struct se_portal_group *tpg) +{ + struct srpt_tpg *stpg = container_of(tpg, typeof(*stpg), tpg); + struct srpt_port_id *sport_id = stpg->sport_id; + struct srpt_port *sport = srpt_tpg_to_sport(tpg); + + mutex_lock(&sport_id->mutex); + list_del(&stpg->entry); + mutex_unlock(&sport_id->mutex); + + sport->enabled = false; + core_tpg_deregister(tpg); + kfree(stpg); +} + +/** + * srpt_make_tport - configfs callback invoked for mkdir /sys/kernel/config/target/$driver/$port + * @tf: Not used. + * @group: Not used. + * @name: $port. + */ +static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf, + struct config_group *group, + const char *name) +{ + struct port_and_port_id papi = srpt_lookup_port(name); + struct srpt_port *sport = papi.sport; + struct srpt_port_id *port_id; + + if (!papi.port_id) + return ERR_PTR(-EINVAL); + if (*papi.port_id) { + /* Attempt to create a directory that already exists. */ + WARN_ON_ONCE(true); + return &(*papi.port_id)->wwn; + } + port_id = kzalloc(sizeof(*port_id), GFP_KERNEL); + if (!port_id) { + srpt_sdev_put(sport->sdev); + return ERR_PTR(-ENOMEM); + } + mutex_init(&port_id->mutex); + INIT_LIST_HEAD(&port_id->tpg_list); + port_id->wwn.priv = sport; + memcpy(port_id->name, port_id == sport->guid_id ? sport->guid_name : + sport->gid_name, ARRAY_SIZE(port_id->name)); + + *papi.port_id = port_id; + + return &port_id->wwn; +} + +/** + * srpt_drop_tport - configfs callback invoked for rmdir /sys/kernel/config/target/$driver/$port + * @wwn: $port. + */ +static void srpt_drop_tport(struct se_wwn *wwn) +{ + struct srpt_port_id *port_id = container_of(wwn, typeof(*port_id), wwn); + struct srpt_port *sport = wwn->priv; + + if (sport->guid_id == port_id) + sport->guid_id = NULL; + else if (sport->gid_id == port_id) + sport->gid_id = NULL; + else + WARN_ON_ONCE(true); + + srpt_sdev_put(sport->sdev); + kfree(port_id); +} + +static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf) +{ + return sysfs_emit(buf, "\n"); +} + +CONFIGFS_ATTR_RO(srpt_wwn_, version); + +static struct configfs_attribute *srpt_wwn_attrs[] = { + &srpt_wwn_attr_version, + NULL, +}; + +static const struct target_core_fabric_ops srpt_template = { + .module = THIS_MODULE, + .fabric_name = "srpt", + .tpg_get_wwn = srpt_get_fabric_wwn, + .tpg_get_tag = srpt_get_tag, + .tpg_check_demo_mode = srpt_check_false, + .tpg_check_demo_mode_cache = srpt_check_true, + .tpg_check_demo_mode_write_protect = srpt_check_true, + .tpg_check_prod_mode_write_protect = srpt_check_false, + .tpg_get_inst_index = srpt_tpg_get_inst_index, + .release_cmd = srpt_release_cmd, + .check_stop_free = srpt_check_stop_free, + .close_session = srpt_close_session, + .sess_get_index = srpt_sess_get_index, + .sess_get_initiator_sid = NULL, + .write_pending = srpt_write_pending, + .set_default_node_attributes = srpt_set_default_node_attrs, + .get_cmd_state = srpt_get_tcm_cmd_state, + .queue_data_in = srpt_queue_data_in, + .queue_status = srpt_queue_status, + .queue_tm_rsp = srpt_queue_tm_rsp, + .aborted_task = srpt_aborted_task, + /* + * Setup function pointers for generic logic in + * target_core_fabric_configfs.c + */ + .fabric_make_wwn = srpt_make_tport, + .fabric_drop_wwn = srpt_drop_tport, + .fabric_make_tpg = srpt_make_tpg, + .fabric_enable_tpg = srpt_enable_tpg, + .fabric_drop_tpg = srpt_drop_tpg, + .fabric_init_nodeacl = srpt_init_nodeacl, + + .tfc_discovery_attrs = srpt_da_attrs, + .tfc_wwn_attrs = srpt_wwn_attrs, + .tfc_tpg_attrib_attrs = srpt_tpg_attrib_attrs, +}; + +/** + * srpt_init_module - kernel module initialization + * + * Note: Since ib_register_client() registers callback functions, and since at + * least one of these callback functions (srpt_add_one()) calls target core + * functions, this driver must be registered with the target core before + * ib_register_client() is called. + */ +static int __init srpt_init_module(void) +{ + int ret; + + ret = -EINVAL; + if (srp_max_req_size < MIN_MAX_REQ_SIZE) { + pr_err("invalid value %d for kernel module parameter srp_max_req_size -- must be at least %d.\n", + srp_max_req_size, MIN_MAX_REQ_SIZE); + goto out; + } + + if (srpt_srq_size < MIN_SRPT_SRQ_SIZE + || srpt_srq_size > MAX_SRPT_SRQ_SIZE) { + pr_err("invalid value %d for kernel module parameter srpt_srq_size -- must be in the range [%d..%d].\n", + srpt_srq_size, MIN_SRPT_SRQ_SIZE, MAX_SRPT_SRQ_SIZE); + goto out; + } + + ret = target_register_template(&srpt_template); + if (ret) + goto out; + + ret = ib_register_client(&srpt_client); + if (ret) { + pr_err("couldn't register IB client\n"); + goto out_unregister_target; + } + + return 0; + +out_unregister_target: + target_unregister_template(&srpt_template); +out: + return ret; +} + +static void __exit srpt_cleanup_module(void) +{ + if (rdma_cm_id) + rdma_destroy_id(rdma_cm_id); + ib_unregister_client(&srpt_client); + target_unregister_template(&srpt_template); +} + +module_init(srpt_init_module); +module_exit(srpt_cleanup_module); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h new file mode 100644 index 000000000..4c46b301e --- /dev/null +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -0,0 +1,469 @@ +/* + * Copyright (c) 2006 - 2009 Mellanox Technology Inc. All rights reserved. + * Copyright (C) 2009 - 2010 Bart Van Assche <bvanassche@acm.org>. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef IB_SRPT_H +#define IB_SRPT_H + +#include <linux/types.h> +#include <linux/list.h> +#include <linux/wait.h> + +#include <rdma/ib_verbs.h> +#include <rdma/ib_sa.h> +#include <rdma/ib_cm.h> +#include <rdma/rdma_cm.h> +#include <rdma/rw.h> + +#include <scsi/srp.h> + +#include "ib_dm_mad.h" + +/* + * The prefix the ServiceName field must start with in the device management + * ServiceEntries attribute pair. See also the SRP specification. + */ +#define SRP_SERVICE_NAME_PREFIX "SRP.T10:" + +struct srpt_nexus; + +enum { + /* + * SRP IOControllerProfile attributes for SRP target ports that have + * not been defined in <scsi/srp.h>. Source: section B.7, table B.7 + * in the SRP specification. + */ + SRP_PROTOCOL = 0x0108, + SRP_PROTOCOL_VERSION = 0x0001, + SRP_IO_SUBCLASS = 0x609e, + SRP_SEND_TO_IOC = 0x01, + SRP_SEND_FROM_IOC = 0x02, + SRP_RDMA_READ_FROM_IOC = 0x08, + SRP_RDMA_WRITE_FROM_IOC = 0x20, + + /* + * srp_login_cmd.req_flags bitmasks. See also table 9 in the SRP + * specification. + */ + SRP_MTCH_ACTION = 0x03, /* MULTI-CHANNEL ACTION */ + SRP_LOSOLNT = 0x10, /* logout solicited notification */ + SRP_CRSOLNT = 0x20, /* credit request solicited notification */ + SRP_AESOLNT = 0x40, /* asynchronous event solicited notification */ + + /* + * srp_cmd.sol_nt / srp_tsk_mgmt.sol_not bitmasks. See also tables + * 18 and 20 in the SRP specification. + */ + SRP_SCSOLNT = 0x02, /* SCSOLNT = successful solicited notification */ + SRP_UCSOLNT = 0x04, /* UCSOLNT = unsuccessful solicited notification */ + + /* + * srp_rsp.sol_not / srp_t_logout.sol_not bitmasks. See also tables + * 16 and 22 in the SRP specification. + */ + SRP_SOLNT = 0x01, /* SOLNT = solicited notification */ + + /* See also table 24 in the SRP specification. */ + SRP_TSK_MGMT_SUCCESS = 0x00, + SRP_TSK_MGMT_FUNC_NOT_SUPP = 0x04, + SRP_TSK_MGMT_FAILED = 0x05, + + /* See also table 21 in the SRP specification. */ + SRP_CMD_SIMPLE_Q = 0x0, + SRP_CMD_HEAD_OF_Q = 0x1, + SRP_CMD_ORDERED_Q = 0x2, + SRP_CMD_ACA = 0x4, + + SRPT_DEF_SG_TABLESIZE = 128, + + MIN_SRPT_SQ_SIZE = 16, + DEF_SRPT_SQ_SIZE = 4096, + MAX_SRPT_RQ_SIZE = 128, + MIN_SRPT_SRQ_SIZE = 4, + DEFAULT_SRPT_SRQ_SIZE = 4095, + MAX_SRPT_SRQ_SIZE = 65535, + MAX_SRPT_RDMA_SIZE = 1U << 24, + MAX_SRPT_RSP_SIZE = 1024, + + SRP_MAX_ADD_CDB_LEN = 16, + SRP_MAX_IMM_DATA_OFFSET = 80, + SRP_MAX_IMM_DATA = 8 * 1024, + MIN_MAX_REQ_SIZE = 996, + DEFAULT_MAX_REQ_SIZE_1 = sizeof(struct srp_cmd)/*48*/ + + SRP_MAX_ADD_CDB_LEN + + sizeof(struct srp_indirect_buf)/*20*/ + + 128 * sizeof(struct srp_direct_buf)/*16*/, + DEFAULT_MAX_REQ_SIZE_2 = SRP_MAX_IMM_DATA_OFFSET + + sizeof(struct srp_imm_buf) + SRP_MAX_IMM_DATA, + DEFAULT_MAX_REQ_SIZE = DEFAULT_MAX_REQ_SIZE_1 > DEFAULT_MAX_REQ_SIZE_2 ? + DEFAULT_MAX_REQ_SIZE_1 : DEFAULT_MAX_REQ_SIZE_2, + + MIN_MAX_RSP_SIZE = sizeof(struct srp_rsp)/*36*/ + 4, + DEFAULT_MAX_RSP_SIZE = 256, /* leaves 220 bytes for sense data */ + + DEFAULT_MAX_RDMA_SIZE = 65536, +}; + +/** + * enum srpt_command_state - SCSI command state managed by SRPT + * @SRPT_STATE_NEW: New command arrived and is being processed. + * @SRPT_STATE_NEED_DATA: Processing a write or bidir command and waiting + * for data arrival. + * @SRPT_STATE_DATA_IN: Data for the write or bidir command arrived and is + * being processed. + * @SRPT_STATE_CMD_RSP_SENT: SRP_RSP for SRP_CMD has been sent. + * @SRPT_STATE_MGMT: Processing a SCSI task management command. + * @SRPT_STATE_MGMT_RSP_SENT: SRP_RSP for SRP_TSK_MGMT has been sent. + * @SRPT_STATE_DONE: Command processing finished successfully, command + * processing has been aborted or command processing + * failed. + */ +enum srpt_command_state { + SRPT_STATE_NEW = 0, + SRPT_STATE_NEED_DATA = 1, + SRPT_STATE_DATA_IN = 2, + SRPT_STATE_CMD_RSP_SENT = 3, + SRPT_STATE_MGMT = 4, + SRPT_STATE_MGMT_RSP_SENT = 5, + SRPT_STATE_DONE = 6, +}; + +/** + * struct srpt_ioctx - shared SRPT I/O context information + * @cqe: Completion queue element. + * @buf: Pointer to the buffer. + * @dma: DMA address of the buffer. + * @offset: Offset of the first byte in @buf and @dma that is actually used. + * @index: Index of the I/O context in its ioctx_ring array. + */ +struct srpt_ioctx { + struct ib_cqe cqe; + void *buf; + dma_addr_t dma; + uint32_t offset; + uint32_t index; +}; + +/** + * struct srpt_recv_ioctx - SRPT receive I/O context + * @ioctx: See above. + * @wait_list: Node for insertion in srpt_rdma_ch.cmd_wait_list. + * @byte_len: Number of bytes in @ioctx.buf. + */ +struct srpt_recv_ioctx { + struct srpt_ioctx ioctx; + struct list_head wait_list; + int byte_len; +}; + +struct srpt_rw_ctx { + struct rdma_rw_ctx rw; + struct scatterlist *sg; + unsigned int nents; +}; + +/** + * struct srpt_send_ioctx - SRPT send I/O context + * @ioctx: See above. + * @ch: Channel pointer. + * @recv_ioctx: Receive I/O context associated with this send I/O context. + * Only used for processing immediate data. + * @s_rw_ctx: @rw_ctxs points here if only a single rw_ctx is needed. + * @rw_ctxs: RDMA read/write contexts. + * @imm_sg: Scatterlist for immediate data. + * @rdma_cqe: RDMA completion queue element. + * @state: I/O context state. + * @cmd: Target core command data structure. + * @sense_data: SCSI sense data. + * @n_rdma: Number of work requests needed to transfer this ioctx. + * @n_rw_ctx: Size of rw_ctxs array. + * @queue_status_only: Send a SCSI status back to the initiator but no data. + * @sense_data: Sense data to be sent to the initiator. + */ +struct srpt_send_ioctx { + struct srpt_ioctx ioctx; + struct srpt_rdma_ch *ch; + struct srpt_recv_ioctx *recv_ioctx; + + struct srpt_rw_ctx s_rw_ctx; + struct srpt_rw_ctx *rw_ctxs; + + struct scatterlist imm_sg; + + struct ib_cqe rdma_cqe; + enum srpt_command_state state; + struct se_cmd cmd; + u8 n_rdma; + u8 n_rw_ctx; + bool queue_status_only; + u8 sense_data[TRANSPORT_SENSE_BUFFER]; +}; + +/** + * enum rdma_ch_state - SRP channel state + * @CH_CONNECTING: QP is in RTR state; waiting for RTU. + * @CH_LIVE: QP is in RTS state. + * @CH_DISCONNECTING: DREQ has been sent and waiting for DREP or DREQ has + * been received. + * @CH_DRAINING: DREP has been received or waiting for DREP timed out + * and last work request has been queued. + * @CH_DISCONNECTED: Last completion has been received. + */ +enum rdma_ch_state { + CH_CONNECTING, + CH_LIVE, + CH_DISCONNECTING, + CH_DRAINING, + CH_DISCONNECTED, +}; + +/** + * struct srpt_rdma_ch - RDMA channel + * @nexus: I_T nexus this channel is associated with. + * @qp: IB queue pair used for communicating over this channel. + * @ib_cm: See below. + * @ib_cm.cm_id: IB CM ID associated with the channel. + * @rdma_cm: See below. + * @rdma_cm.cm_id: RDMA CM ID associated with the channel. + * @cq: IB completion queue for this channel. + * @cq_size: Number of CQEs in @cq. + * @zw_cqe: Zero-length write CQE. + * @rcu: RCU head. + * @kref: kref for this channel. + * @closed: Completion object that will be signaled as soon as a new + * channel object with the same identity can be created. + * @rq_size: IB receive queue size. + * @max_rsp_size: Maximum size of an RSP response message in bytes. + * @sq_wr_avail: number of work requests available in the send queue. + * @sport: pointer to the information of the HCA port used by this + * channel. + * @max_ti_iu_len: maximum target-to-initiator information unit length. + * @req_lim: request limit: maximum number of requests that may be sent + * by the initiator without having received a response. + * @req_lim_delta: Number of credits not yet sent back to the initiator. + * @imm_data_offset: Offset from start of SRP_CMD for immediate data. + * @spinlock: Protects free_list and state. + * @state: channel state. See also enum rdma_ch_state. + * @using_rdma_cm: Whether the RDMA/CM or IB/CM is used for this channel. + * @processing_wait_list: Whether or not cmd_wait_list is being processed. + * @rsp_buf_cache: kmem_cache for @ioctx_ring. + * @ioctx_ring: Send ring. + * @req_buf_cache: kmem_cache for @ioctx_recv_ring. + * @ioctx_recv_ring: Receive I/O context ring. + * @list: Node in srpt_nexus.ch_list. + * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This + * list contains struct srpt_ioctx elements and is protected + * against concurrent modification by the cm_id spinlock. + * @pkey: P_Key of the IB partition for this SRP channel. + * @sess: Session information associated with this SRP channel. + * @sess_name: Session name. + * @release_work: Allows scheduling of srpt_release_channel(). + */ +struct srpt_rdma_ch { + struct srpt_nexus *nexus; + struct ib_qp *qp; + union { + struct { + struct ib_cm_id *cm_id; + } ib_cm; + struct { + struct rdma_cm_id *cm_id; + } rdma_cm; + }; + struct ib_cq *cq; + u32 cq_size; + struct ib_cqe zw_cqe; + struct rcu_head rcu; + struct kref kref; + struct completion *closed; + int rq_size; + u32 max_rsp_size; + atomic_t sq_wr_avail; + struct srpt_port *sport; + int max_ti_iu_len; + atomic_t req_lim; + atomic_t req_lim_delta; + u16 imm_data_offset; + spinlock_t spinlock; + enum rdma_ch_state state; + struct kmem_cache *rsp_buf_cache; + struct srpt_send_ioctx **ioctx_ring; + struct kmem_cache *req_buf_cache; + struct srpt_recv_ioctx **ioctx_recv_ring; + struct list_head list; + struct list_head cmd_wait_list; + uint16_t pkey; + bool using_rdma_cm; + bool processing_wait_list; + struct se_session *sess; + u8 sess_name[40]; + struct work_struct release_work; +}; + +/** + * struct srpt_nexus - I_T nexus + * @rcu: RCU head for this data structure. + * @entry: srpt_port.nexus_list list node. + * @ch_list: struct srpt_rdma_ch list. Protected by srpt_port.mutex. + * @i_port_id: 128-bit initiator port identifier copied from SRP_LOGIN_REQ. + * @t_port_id: 128-bit target port identifier copied from SRP_LOGIN_REQ. + */ +struct srpt_nexus { + struct rcu_head rcu; + struct list_head entry; + struct list_head ch_list; + u8 i_port_id[16]; + u8 t_port_id[16]; +}; + +/** + * struct srpt_port_attrib - attributes for SRPT port + * @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections. + * @srp_max_rsp_size: Maximum size of SRP response messages in bytes. + * @srp_sq_size: Shared receive queue (SRQ) size. + * @use_srq: Whether or not to use SRQ. + */ +struct srpt_port_attrib { + u32 srp_max_rdma_size; + u32 srp_max_rsp_size; + u32 srp_sq_size; + bool use_srq; +}; + +/** + * struct srpt_tpg - information about a single "target portal group" + * @entry: Entry in @sport_id->tpg_list. + * @sport_id: Port name this TPG is associated with. + * @tpg: LIO TPG data structure. + * + * Zero or more target portal groups are associated with each port name + * (srpt_port_id). With each TPG an ACL list is associated. + */ +struct srpt_tpg { + struct list_head entry; + struct srpt_port_id *sport_id; + struct se_portal_group tpg; +}; + +/** + * struct srpt_port_id - LIO RDMA port information + * @mutex: Protects @tpg_list changes. + * @tpg_list: TPGs associated with the RDMA port name. + * @wwn: WWN associated with the RDMA port name. + * @name: ASCII representation of the port name. + * + * Multiple sysfs directories can be associated with a single RDMA port. This + * data structure represents a single (port, name) pair. + */ +struct srpt_port_id { + struct mutex mutex; + struct list_head tpg_list; + struct se_wwn wwn; + char name[64]; +}; + +/** + * struct srpt_port - SRPT RDMA port information + * @sdev: backpointer to the HCA information. + * @mad_agent: per-port management datagram processing information. + * @enabled: Whether or not this target port is enabled. + * @port: one-based port number. + * @sm_lid: cached value of the port's sm_lid. + * @lid: cached value of the port's lid. + * @gid: cached value of the port's gid. + * @work: work structure for refreshing the aforementioned cached values. + * @guid_name: port name in GUID format. + * @guid_id: LIO target port information for the port name in GUID format. + * @gid_name: port name in GID format. + * @gid_id: LIO target port information for the port name in GID format. + * @port_attrib: Port attributes that can be accessed through configfs. + * @refcount: Number of objects associated with this port. + * @freed_channels: Completion that will be signaled once @refcount becomes 0. + * @mutex: Protects nexus_list. + * @nexus_list: Nexus list. See also srpt_nexus.entry. + */ +struct srpt_port { + struct srpt_device *sdev; + struct ib_mad_agent *mad_agent; + bool enabled; + u8 port; + u32 sm_lid; + u32 lid; + union ib_gid gid; + struct work_struct work; + char guid_name[64]; + struct srpt_port_id *guid_id; + char gid_name[64]; + struct srpt_port_id *gid_id; + struct srpt_port_attrib port_attrib; + atomic_t refcount; + struct completion *freed_channels; + struct mutex mutex; + struct list_head nexus_list; +}; + +/** + * struct srpt_device - information associated by SRPT with a single HCA + * @refcnt: Reference count for this device. + * @device: Backpointer to the struct ib_device managed by the IB core. + * @pd: IB protection domain. + * @lkey: L_Key (local key) with write access to all local memory. + * @srq: Per-HCA SRQ (shared receive queue). + * @cm_id: Connection identifier. + * @srq_size: SRQ size. + * @sdev_mutex: Serializes use_srq changes. + * @use_srq: Whether or not to use SRQ. + * @req_buf_cache: kmem_cache for @ioctx_ring buffers. + * @ioctx_ring: Per-HCA SRQ. + * @event_handler: Per-HCA asynchronous IB event handler. + * @list: Node in srpt_dev_list. + * @port: Information about the ports owned by this HCA. + */ +struct srpt_device { + struct kref refcnt; + struct ib_device *device; + struct ib_pd *pd; + u32 lkey; + struct ib_srq *srq; + struct ib_cm_id *cm_id; + int srq_size; + struct mutex sdev_mutex; + bool use_srq; + struct kmem_cache *req_buf_cache; + struct srpt_recv_ioctx **ioctx_ring; + struct ib_event_handler event_handler; + struct list_head list; + struct srpt_port port[]; +}; + +#endif /* IB_SRPT_H */ |