From 2c3c1048746a4622d8c89a29670120dc8fab93c4 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 20:49:45 +0200 Subject: Adding upstream version 6.1.76. Signed-off-by: Daniel Baumann --- drivers/infiniband/core/mad.c | 3156 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 3156 insertions(+) create mode 100644 drivers/infiniband/core/mad.c (limited to 'drivers/infiniband/core/mad.c') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c new file mode 100644 index 000000000..674344eb8 --- /dev/null +++ b/drivers/infiniband/core/mad.c @@ -0,0 +1,3156 @@ +/* + * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2014,2018 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include + +#include "mad_priv.h" +#include "core_priv.h" +#include "mad_rmpp.h" +#include "smi.h" +#include "opa_smi.h" +#include "agent.h" + +#define CREATE_TRACE_POINTS +#include + +#ifdef CONFIG_TRACEPOINTS +static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_qp_info *qp_info, + struct trace_event_raw_ib_mad_send_template *entry) +{ + struct ib_ud_wr *wr = &mad_send_wr->send_wr; + struct rdma_ah_attr attr = {}; + + rdma_query_ah(wr->ah, &attr); + + /* These are common */ + entry->sl = attr.sl; + entry->rqpn = wr->remote_qpn; + entry->rqkey = wr->remote_qkey; + entry->dlid = rdma_ah_get_dlid(&attr); +} +#endif + +static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; +static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; + +module_param_named(send_queue_size, mad_sendq_size, int, 0444); +MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests"); +module_param_named(recv_queue_size, mad_recvq_size, int, 0444); +MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); + +static DEFINE_XARRAY_ALLOC1(ib_mad_clients); +static u32 ib_mad_client_next; +static struct list_head ib_mad_port_list; + +/* Port list lock */ +static DEFINE_SPINLOCK(ib_mad_port_list_lock); + +/* Forward declarations */ +static int method_in_use(struct ib_mad_mgmt_method_table **method, + struct ib_mad_reg_req *mad_reg_req); +static void remove_mad_reg_req(struct ib_mad_agent_private *priv); +static struct ib_mad_agent_private *find_mad_agent( + struct ib_mad_port_private *port_priv, + const struct ib_mad_hdr *mad); +static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, + struct ib_mad_private *mad); +static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv); +static void timeout_sends(struct work_struct *work); +static void local_completions(struct work_struct *work); +static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, + struct ib_mad_agent_private *agent_priv, + u8 mgmt_class); +static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, + struct ib_mad_agent_private *agent_priv); +static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, + struct ib_wc *wc); +static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc); + +/* + * Returns a ib_mad_port_private structure or NULL for a device/port + * Assumes ib_mad_port_list_lock is being held + */ +static inline struct ib_mad_port_private * +__ib_get_mad_port(struct ib_device *device, u32 port_num) +{ + struct ib_mad_port_private *entry; + + list_for_each_entry(entry, &ib_mad_port_list, port_list) { + if (entry->device == device && entry->port_num == port_num) + return entry; + } + return NULL; +} + +/* + * Wrapper function to return a ib_mad_port_private structure or NULL + * for a device/port + */ +static inline struct ib_mad_port_private * +ib_get_mad_port(struct ib_device *device, u32 port_num) +{ + struct ib_mad_port_private *entry; + unsigned long flags; + + spin_lock_irqsave(&ib_mad_port_list_lock, flags); + entry = __ib_get_mad_port(device, port_num); + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); + + return entry; +} + +static inline u8 convert_mgmt_class(u8 mgmt_class) +{ + /* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */ + return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ? + 0 : mgmt_class; +} + +static int get_spl_qp_index(enum ib_qp_type qp_type) +{ + switch (qp_type) { + case IB_QPT_SMI: + return 0; + case IB_QPT_GSI: + return 1; + default: + return -1; + } +} + +static int vendor_class_index(u8 mgmt_class) +{ + return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START; +} + +static int is_vendor_class(u8 mgmt_class) +{ + if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) || + (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END)) + return 0; + return 1; +} + +static int is_vendor_oui(char *oui) +{ + if (oui[0] || oui[1] || oui[2]) + return 1; + return 0; +} + +static int is_vendor_method_in_use( + struct ib_mad_mgmt_vendor_class *vendor_class, + struct ib_mad_reg_req *mad_reg_req) +{ + struct ib_mad_mgmt_method_table *method; + int i; + + for (i = 0; i < MAX_MGMT_OUI; i++) { + if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) { + method = vendor_class->method_table[i]; + if (method) { + if (method_in_use(&method, mad_reg_req)) + return 1; + else + break; + } + } + } + return 0; +} + +int ib_response_mad(const struct ib_mad_hdr *hdr) +{ + return ((hdr->method & IB_MGMT_METHOD_RESP) || + (hdr->method == IB_MGMT_METHOD_TRAP_REPRESS) || + ((hdr->mgmt_class == IB_MGMT_CLASS_BM) && + (hdr->attr_mod & IB_BM_ATTR_MOD_RESP))); +} +EXPORT_SYMBOL(ib_response_mad); + +/* + * ib_register_mad_agent - Register to send/receive MADs + * + * Context: Process context. + */ +struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, + u32 port_num, + enum ib_qp_type qp_type, + struct ib_mad_reg_req *mad_reg_req, + u8 rmpp_version, + ib_mad_send_handler send_handler, + ib_mad_recv_handler recv_handler, + void *context, + u32 registration_flags) +{ + struct ib_mad_port_private *port_priv; + struct ib_mad_agent *ret = ERR_PTR(-EINVAL); + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_reg_req *reg_req = NULL; + struct ib_mad_mgmt_class_table *class; + struct ib_mad_mgmt_vendor_class_table *vendor; + struct ib_mad_mgmt_vendor_class *vendor_class; + struct ib_mad_mgmt_method_table *method; + int ret2, qpn; + u8 mgmt_class, vclass; + + if ((qp_type == IB_QPT_SMI && !rdma_cap_ib_smi(device, port_num)) || + (qp_type == IB_QPT_GSI && !rdma_cap_ib_cm(device, port_num))) + return ERR_PTR(-EPROTONOSUPPORT); + + /* Validate parameters */ + qpn = get_spl_qp_index(qp_type); + if (qpn == -1) { + dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n", + __func__, qp_type); + goto error1; + } + + if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) { + dev_dbg_ratelimited(&device->dev, + "%s: invalid RMPP Version %u\n", + __func__, rmpp_version); + goto error1; + } + + /* Validate MAD registration request if supplied */ + if (mad_reg_req) { + if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) { + dev_dbg_ratelimited(&device->dev, + "%s: invalid Class Version %u\n", + __func__, + mad_reg_req->mgmt_class_version); + goto error1; + } + if (!recv_handler) { + dev_dbg_ratelimited(&device->dev, + "%s: no recv_handler\n", __func__); + goto error1; + } + if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) { + /* + * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only + * one in this range currently allowed + */ + if (mad_reg_req->mgmt_class != + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + dev_dbg_ratelimited(&device->dev, + "%s: Invalid Mgmt Class 0x%x\n", + __func__, mad_reg_req->mgmt_class); + goto error1; + } + } else if (mad_reg_req->mgmt_class == 0) { + /* + * Class 0 is reserved in IBA and is used for + * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE + */ + dev_dbg_ratelimited(&device->dev, + "%s: Invalid Mgmt Class 0\n", + __func__); + goto error1; + } else if (is_vendor_class(mad_reg_req->mgmt_class)) { + /* + * If class is in "new" vendor range, + * ensure supplied OUI is not zero + */ + if (!is_vendor_oui(mad_reg_req->oui)) { + dev_dbg_ratelimited(&device->dev, + "%s: No OUI specified for class 0x%x\n", + __func__, + mad_reg_req->mgmt_class); + goto error1; + } + } + /* Make sure class supplied is consistent with RMPP */ + if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { + if (rmpp_version) { + dev_dbg_ratelimited(&device->dev, + "%s: RMPP version for non-RMPP class 0x%x\n", + __func__, mad_reg_req->mgmt_class); + goto error1; + } + } + + /* Make sure class supplied is consistent with QP type */ + if (qp_type == IB_QPT_SMI) { + if ((mad_reg_req->mgmt_class != + IB_MGMT_CLASS_SUBN_LID_ROUTED) && + (mad_reg_req->mgmt_class != + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { + dev_dbg_ratelimited(&device->dev, + "%s: Invalid SM QP type: class 0x%x\n", + __func__, mad_reg_req->mgmt_class); + goto error1; + } + } else { + if ((mad_reg_req->mgmt_class == + IB_MGMT_CLASS_SUBN_LID_ROUTED) || + (mad_reg_req->mgmt_class == + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { + dev_dbg_ratelimited(&device->dev, + "%s: Invalid GS QP type: class 0x%x\n", + __func__, mad_reg_req->mgmt_class); + goto error1; + } + } + } else { + /* No registration request supplied */ + if (!send_handler) + goto error1; + if (registration_flags & IB_MAD_USER_RMPP) + goto error1; + } + + /* Validate device and port */ + port_priv = ib_get_mad_port(device, port_num); + if (!port_priv) { + dev_dbg_ratelimited(&device->dev, "%s: Invalid port %u\n", + __func__, port_num); + ret = ERR_PTR(-ENODEV); + goto error1; + } + + /* Verify the QP requested is supported. For example, Ethernet devices + * will not have QP0. + */ + if (!port_priv->qp_info[qpn].qp) { + dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n", + __func__, qpn); + ret = ERR_PTR(-EPROTONOSUPPORT); + goto error1; + } + + /* Allocate structures */ + mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL); + if (!mad_agent_priv) { + ret = ERR_PTR(-ENOMEM); + goto error1; + } + + if (mad_reg_req) { + reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL); + if (!reg_req) { + ret = ERR_PTR(-ENOMEM); + goto error3; + } + } + + /* Now, fill in the various structures */ + mad_agent_priv->qp_info = &port_priv->qp_info[qpn]; + mad_agent_priv->reg_req = reg_req; + mad_agent_priv->agent.rmpp_version = rmpp_version; + mad_agent_priv->agent.device = device; + mad_agent_priv->agent.recv_handler = recv_handler; + mad_agent_priv->agent.send_handler = send_handler; + mad_agent_priv->agent.context = context; + mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp; + mad_agent_priv->agent.port_num = port_num; + mad_agent_priv->agent.flags = registration_flags; + spin_lock_init(&mad_agent_priv->lock); + INIT_LIST_HEAD(&mad_agent_priv->send_list); + INIT_LIST_HEAD(&mad_agent_priv->wait_list); + INIT_LIST_HEAD(&mad_agent_priv->done_list); + INIT_LIST_HEAD(&mad_agent_priv->rmpp_list); + INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends); + INIT_LIST_HEAD(&mad_agent_priv->local_list); + INIT_WORK(&mad_agent_priv->local_work, local_completions); + refcount_set(&mad_agent_priv->refcount, 1); + init_completion(&mad_agent_priv->comp); + + ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type); + if (ret2) { + ret = ERR_PTR(ret2); + goto error4; + } + + /* + * The mlx4 driver uses the top byte to distinguish which virtual + * function generated the MAD, so we must avoid using it. + */ + ret2 = xa_alloc_cyclic(&ib_mad_clients, &mad_agent_priv->agent.hi_tid, + mad_agent_priv, XA_LIMIT(0, (1 << 24) - 1), + &ib_mad_client_next, GFP_KERNEL); + if (ret2 < 0) { + ret = ERR_PTR(ret2); + goto error5; + } + + /* + * Make sure MAD registration (if supplied) + * is non overlapping with any existing ones + */ + spin_lock_irq(&port_priv->reg_lock); + if (mad_reg_req) { + mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class); + if (!is_vendor_class(mgmt_class)) { + class = port_priv->version[mad_reg_req-> + mgmt_class_version].class; + if (class) { + method = class->method_table[mgmt_class]; + if (method) { + if (method_in_use(&method, + mad_reg_req)) + goto error6; + } + } + ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv, + mgmt_class); + } else { + /* "New" vendor class range */ + vendor = port_priv->version[mad_reg_req-> + mgmt_class_version].vendor; + if (vendor) { + vclass = vendor_class_index(mgmt_class); + vendor_class = vendor->vendor_class[vclass]; + if (vendor_class) { + if (is_vendor_method_in_use( + vendor_class, + mad_reg_req)) + goto error6; + } + } + ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv); + } + if (ret2) { + ret = ERR_PTR(ret2); + goto error6; + } + } + spin_unlock_irq(&port_priv->reg_lock); + + trace_ib_mad_create_agent(mad_agent_priv); + return &mad_agent_priv->agent; +error6: + spin_unlock_irq(&port_priv->reg_lock); + xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid); +error5: + ib_mad_agent_security_cleanup(&mad_agent_priv->agent); +error4: + kfree(reg_req); +error3: + kfree(mad_agent_priv); +error1: + return ret; +} +EXPORT_SYMBOL(ib_register_mad_agent); + +static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv) +{ + if (refcount_dec_and_test(&mad_agent_priv->refcount)) + complete(&mad_agent_priv->comp); +} + +static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) +{ + struct ib_mad_port_private *port_priv; + + /* Note that we could still be handling received MADs */ + trace_ib_mad_unregister_agent(mad_agent_priv); + + /* + * Canceling all sends results in dropping received response + * MADs, preventing us from queuing additional work + */ + cancel_mads(mad_agent_priv); + port_priv = mad_agent_priv->qp_info->port_priv; + cancel_delayed_work(&mad_agent_priv->timed_work); + + spin_lock_irq(&port_priv->reg_lock); + remove_mad_reg_req(mad_agent_priv); + spin_unlock_irq(&port_priv->reg_lock); + xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid); + + flush_workqueue(port_priv->wq); + + deref_mad_agent(mad_agent_priv); + wait_for_completion(&mad_agent_priv->comp); + ib_cancel_rmpp_recvs(mad_agent_priv); + + ib_mad_agent_security_cleanup(&mad_agent_priv->agent); + + kfree(mad_agent_priv->reg_req); + kfree_rcu(mad_agent_priv, rcu); +} + +/* + * ib_unregister_mad_agent - Unregisters a client from using MAD services + * + * Context: Process context. + */ +void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) +{ + struct ib_mad_agent_private *mad_agent_priv; + + mad_agent_priv = container_of(mad_agent, + struct ib_mad_agent_private, + agent); + unregister_mad_agent(mad_agent_priv); +} +EXPORT_SYMBOL(ib_unregister_mad_agent); + +static void dequeue_mad(struct ib_mad_list_head *mad_list) +{ + struct ib_mad_queue *mad_queue; + unsigned long flags; + + mad_queue = mad_list->mad_queue; + spin_lock_irqsave(&mad_queue->lock, flags); + list_del(&mad_list->list); + mad_queue->count--; + spin_unlock_irqrestore(&mad_queue->lock, flags); +} + +static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid, + u16 pkey_index, u32 port_num, struct ib_wc *wc) +{ + memset(wc, 0, sizeof *wc); + wc->wr_cqe = cqe; + wc->status = IB_WC_SUCCESS; + wc->opcode = IB_WC_RECV; + wc->pkey_index = pkey_index; + wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh); + wc->src_qp = IB_QP0; + wc->qp = qp; + wc->slid = slid; + wc->sl = 0; + wc->dlid_path_bits = 0; + wc->port_num = port_num; +} + +static size_t mad_priv_size(const struct ib_mad_private *mp) +{ + return sizeof(struct ib_mad_private) + mp->mad_size; +} + +static struct ib_mad_private *alloc_mad_private(size_t mad_size, gfp_t flags) +{ + size_t size = sizeof(struct ib_mad_private) + mad_size; + struct ib_mad_private *ret = kzalloc(size, flags); + + if (ret) + ret->mad_size = mad_size; + + return ret; +} + +static size_t port_mad_size(const struct ib_mad_port_private *port_priv) +{ + return rdma_max_mad_size(port_priv->device, port_priv->port_num); +} + +static size_t mad_priv_dma_size(const struct ib_mad_private *mp) +{ + return sizeof(struct ib_grh) + mp->mad_size; +} + +/* + * Return 0 if SMP is to be sent + * Return 1 if SMP was consumed locally (whether or not solicited) + * Return < 0 if error + */ +static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_send_wr_private *mad_send_wr) +{ + int ret = 0; + struct ib_smp *smp = mad_send_wr->send_buf.mad; + struct opa_smp *opa_smp = (struct opa_smp *)smp; + unsigned long flags; + struct ib_mad_local_private *local; + struct ib_mad_private *mad_priv; + struct ib_mad_port_private *port_priv; + struct ib_mad_agent_private *recv_mad_agent = NULL; + struct ib_device *device = mad_agent_priv->agent.device; + u32 port_num; + struct ib_wc mad_wc; + struct ib_ud_wr *send_wr = &mad_send_wr->send_wr; + size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv); + u16 out_mad_pkey_index = 0; + u16 drslid; + bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, + mad_agent_priv->qp_info->port_priv->port_num); + + if (rdma_cap_ib_switch(device) && + smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + port_num = send_wr->port_num; + else + port_num = mad_agent_priv->agent.port_num; + + /* + * Directed route handling starts if the initial LID routed part of + * a request or the ending LID routed part of a response is empty. + * If we are at the start of the LID routed part, don't update the + * hop_ptr or hop_cnt. See section 14.2.2, Vol 1 IB spec. + */ + if (opa && smp->class_version == OPA_SM_CLASS_VERSION) { + u32 opa_drslid; + + trace_ib_mad_handle_out_opa_smi(opa_smp); + + if ((opa_get_smp_direction(opa_smp) + ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) == + OPA_LID_PERMISSIVE && + opa_smi_handle_dr_smp_send(opa_smp, + rdma_cap_ib_switch(device), + port_num) == IB_SMI_DISCARD) { + ret = -EINVAL; + dev_err(&device->dev, "OPA Invalid directed route\n"); + goto out; + } + opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid); + if (opa_drslid != be32_to_cpu(OPA_LID_PERMISSIVE) && + opa_drslid & 0xffff0000) { + ret = -EINVAL; + dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n", + opa_drslid); + goto out; + } + drslid = (u16)(opa_drslid & 0x0000ffff); + + /* Check to post send on QP or process locally */ + if (opa_smi_check_local_smp(opa_smp, device) == IB_SMI_DISCARD && + opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD) + goto out; + } else { + trace_ib_mad_handle_out_ib_smi(smp); + + if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == + IB_LID_PERMISSIVE && + smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) == + IB_SMI_DISCARD) { + ret = -EINVAL; + dev_err(&device->dev, "Invalid directed route\n"); + goto out; + } + drslid = be16_to_cpu(smp->dr_slid); + + /* Check to post send on QP or process locally */ + if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD && + smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD) + goto out; + } + + local = kmalloc(sizeof *local, GFP_ATOMIC); + if (!local) { + ret = -ENOMEM; + goto out; + } + local->mad_priv = NULL; + local->recv_mad_agent = NULL; + mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC); + if (!mad_priv) { + ret = -ENOMEM; + kfree(local); + goto out; + } + + build_smp_wc(mad_agent_priv->agent.qp, + send_wr->wr.wr_cqe, drslid, + send_wr->pkey_index, + send_wr->port_num, &mad_wc); + + if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) { + mad_wc.byte_len = mad_send_wr->send_buf.hdr_len + + mad_send_wr->send_buf.data_len + + sizeof(struct ib_grh); + } + + /* No GRH for DR SMP */ + ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL, + (const struct ib_mad *)smp, + (struct ib_mad *)mad_priv->mad, &mad_size, + &out_mad_pkey_index); + switch (ret) { + case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: + if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) && + mad_agent_priv->agent.recv_handler) { + local->mad_priv = mad_priv; + local->recv_mad_agent = mad_agent_priv; + /* + * Reference MAD agent until receive + * side of local completion handled + */ + refcount_inc(&mad_agent_priv->refcount); + } else + kfree(mad_priv); + break; + case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED: + kfree(mad_priv); + break; + case IB_MAD_RESULT_SUCCESS: + /* Treat like an incoming receive MAD */ + port_priv = ib_get_mad_port(mad_agent_priv->agent.device, + mad_agent_priv->agent.port_num); + if (port_priv) { + memcpy(mad_priv->mad, smp, mad_priv->mad_size); + recv_mad_agent = find_mad_agent(port_priv, + (const struct ib_mad_hdr *)mad_priv->mad); + } + if (!port_priv || !recv_mad_agent) { + /* + * No receiving agent so drop packet and + * generate send completion. + */ + kfree(mad_priv); + break; + } + local->mad_priv = mad_priv; + local->recv_mad_agent = recv_mad_agent; + break; + default: + kfree(mad_priv); + kfree(local); + ret = -EINVAL; + goto out; + } + + local->mad_send_wr = mad_send_wr; + if (opa) { + local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index; + local->return_wc_byte_len = mad_size; + } + /* Reference MAD agent until send side of local completion handled */ + refcount_inc(&mad_agent_priv->refcount); + /* Queue local completion to local list */ + spin_lock_irqsave(&mad_agent_priv->lock, flags); + list_add_tail(&local->completion_list, &mad_agent_priv->local_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + queue_work(mad_agent_priv->qp_info->port_priv->wq, + &mad_agent_priv->local_work); + ret = 1; +out: + return ret; +} + +static int get_pad_size(int hdr_len, int data_len, size_t mad_size) +{ + int seg_size, pad; + + seg_size = mad_size - hdr_len; + if (data_len && seg_size) { + pad = seg_size - data_len % seg_size; + return pad == seg_size ? 0 : pad; + } else + return seg_size; +} + +static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_rmpp_segment *s, *t; + + list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) { + list_del(&s->list); + kfree(s); + } +} + +static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, + size_t mad_size, gfp_t gfp_mask) +{ + struct ib_mad_send_buf *send_buf = &send_wr->send_buf; + struct ib_rmpp_mad *rmpp_mad = send_buf->mad; + struct ib_rmpp_segment *seg = NULL; + int left, seg_size, pad; + + send_buf->seg_size = mad_size - send_buf->hdr_len; + send_buf->seg_rmpp_size = mad_size - IB_MGMT_RMPP_HDR; + seg_size = send_buf->seg_size; + pad = send_wr->pad; + + /* Allocate data segments. */ + for (left = send_buf->data_len + pad; left > 0; left -= seg_size) { + seg = kmalloc(sizeof(*seg) + seg_size, gfp_mask); + if (!seg) { + free_send_rmpp_list(send_wr); + return -ENOMEM; + } + seg->num = ++send_buf->seg_count; + list_add_tail(&seg->list, &send_wr->rmpp_list); + } + + /* Zero any padding */ + if (pad) + memset(seg->data + seg_size - pad, 0, pad); + + rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv-> + agent.rmpp_version; + rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; + ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + + send_wr->cur_seg = container_of(send_wr->rmpp_list.next, + struct ib_rmpp_segment, list); + send_wr->last_ack_seg = send_wr->cur_seg; + return 0; +} + +int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent) +{ + return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP); +} +EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent); + +struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent, + u32 remote_qpn, u16 pkey_index, + int rmpp_active, int hdr_len, + int data_len, gfp_t gfp_mask, + u8 base_version) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_wr_private *mad_send_wr; + int pad, message_size, ret, size; + void *buf; + size_t mad_size; + bool opa; + + mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, + agent); + + opa = rdma_cap_opa_mad(mad_agent->device, mad_agent->port_num); + + if (opa && base_version == OPA_MGMT_BASE_VERSION) + mad_size = sizeof(struct opa_mad); + else + mad_size = sizeof(struct ib_mad); + + pad = get_pad_size(hdr_len, data_len, mad_size); + message_size = hdr_len + data_len + pad; + + if (ib_mad_kernel_rmpp_agent(mad_agent)) { + if (!rmpp_active && message_size > mad_size) + return ERR_PTR(-EINVAL); + } else + if (rmpp_active || message_size > mad_size) + return ERR_PTR(-EINVAL); + + size = rmpp_active ? hdr_len : mad_size; + buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask); + if (!buf) + return ERR_PTR(-ENOMEM); + + mad_send_wr = buf + size; + INIT_LIST_HEAD(&mad_send_wr->rmpp_list); + mad_send_wr->send_buf.mad = buf; + mad_send_wr->send_buf.hdr_len = hdr_len; + mad_send_wr->send_buf.data_len = data_len; + mad_send_wr->pad = pad; + + mad_send_wr->mad_agent_priv = mad_agent_priv; + mad_send_wr->sg_list[0].length = hdr_len; + mad_send_wr->sg_list[0].lkey = mad_agent->qp->pd->local_dma_lkey; + + /* OPA MADs don't have to be the full 2048 bytes */ + if (opa && base_version == OPA_MGMT_BASE_VERSION && + data_len < mad_size - hdr_len) + mad_send_wr->sg_list[1].length = data_len; + else + mad_send_wr->sg_list[1].length = mad_size - hdr_len; + + mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey; + + mad_send_wr->mad_list.cqe.done = ib_mad_send_done; + + mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe; + mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list; + mad_send_wr->send_wr.wr.num_sge = 2; + mad_send_wr->send_wr.wr.opcode = IB_WR_SEND; + mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED; + mad_send_wr->send_wr.remote_qpn = remote_qpn; + mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY; + mad_send_wr->send_wr.pkey_index = pkey_index; + + if (rmpp_active) { + ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask); + if (ret) { + kfree(buf); + return ERR_PTR(ret); + } + } + + mad_send_wr->send_buf.mad_agent = mad_agent; + refcount_inc(&mad_agent_priv->refcount); + return &mad_send_wr->send_buf; +} +EXPORT_SYMBOL(ib_create_send_mad); + +int ib_get_mad_data_offset(u8 mgmt_class) +{ + if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM) + return IB_MGMT_SA_HDR; + else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) || + (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) || + (mgmt_class == IB_MGMT_CLASS_BIS)) + return IB_MGMT_DEVICE_HDR; + else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && + (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) + return IB_MGMT_VENDOR_HDR; + else + return IB_MGMT_MAD_HDR; +} +EXPORT_SYMBOL(ib_get_mad_data_offset); + +int ib_is_mad_class_rmpp(u8 mgmt_class) +{ + if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) || + (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) || + (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) || + (mgmt_class == IB_MGMT_CLASS_BIS) || + ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && + (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))) + return 1; + return 0; +} +EXPORT_SYMBOL(ib_is_mad_class_rmpp); + +void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct list_head *list; + + mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, + send_buf); + list = &mad_send_wr->cur_seg->list; + + if (mad_send_wr->cur_seg->num < seg_num) { + list_for_each_entry(mad_send_wr->cur_seg, list, list) + if (mad_send_wr->cur_seg->num == seg_num) + break; + } else if (mad_send_wr->cur_seg->num > seg_num) { + list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list) + if (mad_send_wr->cur_seg->num == seg_num) + break; + } + return mad_send_wr->cur_seg->data; +} +EXPORT_SYMBOL(ib_get_rmpp_segment); + +static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr) +{ + if (mad_send_wr->send_buf.seg_count) + return ib_get_rmpp_segment(&mad_send_wr->send_buf, + mad_send_wr->seg_num); + else + return mad_send_wr->send_buf.mad + + mad_send_wr->send_buf.hdr_len; +} + +void ib_free_send_mad(struct ib_mad_send_buf *send_buf) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_wr_private *mad_send_wr; + + mad_agent_priv = container_of(send_buf->mad_agent, + struct ib_mad_agent_private, agent); + mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, + send_buf); + + free_send_rmpp_list(mad_send_wr); + kfree(send_buf->mad); + deref_mad_agent(mad_agent_priv); +} +EXPORT_SYMBOL(ib_free_send_mad); + +int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_mad_qp_info *qp_info; + struct list_head *list; + struct ib_mad_agent *mad_agent; + struct ib_sge *sge; + unsigned long flags; + int ret; + + /* Set WR ID to find mad_send_wr upon completion */ + qp_info = mad_send_wr->mad_agent_priv->qp_info; + mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; + mad_send_wr->mad_list.cqe.done = ib_mad_send_done; + mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe; + + mad_agent = mad_send_wr->send_buf.mad_agent; + sge = mad_send_wr->sg_list; + sge[0].addr = ib_dma_map_single(mad_agent->device, + mad_send_wr->send_buf.mad, + sge[0].length, + DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr))) + return -ENOMEM; + + mad_send_wr->header_mapping = sge[0].addr; + + sge[1].addr = ib_dma_map_single(mad_agent->device, + ib_get_payload(mad_send_wr), + sge[1].length, + DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) { + ib_dma_unmap_single(mad_agent->device, + mad_send_wr->header_mapping, + sge[0].length, DMA_TO_DEVICE); + return -ENOMEM; + } + mad_send_wr->payload_mapping = sge[1].addr; + + spin_lock_irqsave(&qp_info->send_queue.lock, flags); + if (qp_info->send_queue.count < qp_info->send_queue.max_active) { + trace_ib_mad_ib_send_mad(mad_send_wr, qp_info); + ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr, + NULL); + list = &qp_info->send_queue.list; + } else { + ret = 0; + list = &qp_info->overflow_list; + } + + if (!ret) { + qp_info->send_queue.count++; + list_add_tail(&mad_send_wr->mad_list.list, list); + } + spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); + if (ret) { + ib_dma_unmap_single(mad_agent->device, + mad_send_wr->header_mapping, + sge[0].length, DMA_TO_DEVICE); + ib_dma_unmap_single(mad_agent->device, + mad_send_wr->payload_mapping, + sge[1].length, DMA_TO_DEVICE); + } + return ret; +} + +/* + * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated + * with the registered client + */ +int ib_post_send_mad(struct ib_mad_send_buf *send_buf, + struct ib_mad_send_buf **bad_send_buf) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_buf *next_send_buf; + struct ib_mad_send_wr_private *mad_send_wr; + unsigned long flags; + int ret = -EINVAL; + + /* Walk list of send WRs and post each on send list */ + for (; send_buf; send_buf = next_send_buf) { + mad_send_wr = container_of(send_buf, + struct ib_mad_send_wr_private, + send_buf); + mad_agent_priv = mad_send_wr->mad_agent_priv; + + ret = ib_mad_enforce_security(mad_agent_priv, + mad_send_wr->send_wr.pkey_index); + if (ret) + goto error; + + if (!send_buf->mad_agent->send_handler || + (send_buf->timeout_ms && + !send_buf->mad_agent->recv_handler)) { + ret = -EINVAL; + goto error; + } + + if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) { + if (mad_agent_priv->agent.rmpp_version) { + ret = -EINVAL; + goto error; + } + } + + /* + * Save pointer to next work request to post in case the + * current one completes, and the user modifies the work + * request associated with the completion + */ + next_send_buf = send_buf->next; + mad_send_wr->send_wr.ah = send_buf->ah; + + if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class == + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + ret = handle_outgoing_dr_smp(mad_agent_priv, + mad_send_wr); + if (ret < 0) /* error */ + goto error; + else if (ret == 1) /* locally consumed */ + continue; + } + + mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid; + /* Timeout will be updated after send completes */ + mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms); + mad_send_wr->max_retries = send_buf->retries; + mad_send_wr->retries_left = send_buf->retries; + send_buf->retries = 0; + /* Reference for work request to QP + response */ + mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); + mad_send_wr->status = IB_WC_SUCCESS; + + /* Reference MAD agent until send completes */ + refcount_inc(&mad_agent_priv->refcount); + spin_lock_irqsave(&mad_agent_priv->lock, flags); + list_add_tail(&mad_send_wr->agent_list, + &mad_agent_priv->send_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + + if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { + ret = ib_send_rmpp_mad(mad_send_wr); + if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED) + ret = ib_send_mad(mad_send_wr); + } else + ret = ib_send_mad(mad_send_wr); + if (ret < 0) { + /* Fail send request */ + spin_lock_irqsave(&mad_agent_priv->lock, flags); + list_del(&mad_send_wr->agent_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + deref_mad_agent(mad_agent_priv); + goto error; + } + } + return 0; +error: + if (bad_send_buf) + *bad_send_buf = send_buf; + return ret; +} +EXPORT_SYMBOL(ib_post_send_mad); + +/* + * ib_free_recv_mad - Returns data buffers used to receive + * a MAD to the access layer + */ +void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf; + struct ib_mad_private_header *mad_priv_hdr; + struct ib_mad_private *priv; + struct list_head free_list; + + INIT_LIST_HEAD(&free_list); + list_splice_init(&mad_recv_wc->rmpp_list, &free_list); + + list_for_each_entry_safe(mad_recv_buf, temp_recv_buf, + &free_list, list) { + mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc, + recv_buf); + mad_priv_hdr = container_of(mad_recv_wc, + struct ib_mad_private_header, + recv_wc); + priv = container_of(mad_priv_hdr, struct ib_mad_private, + header); + kfree(priv); + } +} +EXPORT_SYMBOL(ib_free_recv_mad); + +static int method_in_use(struct ib_mad_mgmt_method_table **method, + struct ib_mad_reg_req *mad_reg_req) +{ + int i; + + for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) { + if ((*method)->agent[i]) { + pr_err("Method %d already in use\n", i); + return -EINVAL; + } + } + return 0; +} + +static int allocate_method_table(struct ib_mad_mgmt_method_table **method) +{ + /* Allocate management method table */ + *method = kzalloc(sizeof **method, GFP_ATOMIC); + return (*method) ? 0 : (-ENOMEM); +} + +/* + * Check to see if there are any methods still in use + */ +static int check_method_table(struct ib_mad_mgmt_method_table *method) +{ + int i; + + for (i = 0; i < IB_MGMT_MAX_METHODS; i++) + if (method->agent[i]) + return 1; + return 0; +} + +/* + * Check to see if there are any method tables for this class still in use + */ +static int check_class_table(struct ib_mad_mgmt_class_table *class) +{ + int i; + + for (i = 0; i < MAX_MGMT_CLASS; i++) + if (class->method_table[i]) + return 1; + return 0; +} + +static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class) +{ + int i; + + for (i = 0; i < MAX_MGMT_OUI; i++) + if (vendor_class->method_table[i]) + return 1; + return 0; +} + +static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class, + const char *oui) +{ + int i; + + for (i = 0; i < MAX_MGMT_OUI; i++) + /* Is there matching OUI for this vendor class ? */ + if (!memcmp(vendor_class->oui[i], oui, 3)) + return i; + + return -1; +} + +static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor) +{ + int i; + + for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++) + if (vendor->vendor_class[i]) + return 1; + + return 0; +} + +static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method, + struct ib_mad_agent_private *agent) +{ + int i; + + /* Remove any methods for this mad agent */ + for (i = 0; i < IB_MGMT_MAX_METHODS; i++) + if (method->agent[i] == agent) + method->agent[i] = NULL; +} + +static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, + struct ib_mad_agent_private *agent_priv, + u8 mgmt_class) +{ + struct ib_mad_port_private *port_priv; + struct ib_mad_mgmt_class_table **class; + struct ib_mad_mgmt_method_table **method; + int i, ret; + + port_priv = agent_priv->qp_info->port_priv; + class = &port_priv->version[mad_reg_req->mgmt_class_version].class; + if (!*class) { + /* Allocate management class table for "new" class version */ + *class = kzalloc(sizeof **class, GFP_ATOMIC); + if (!*class) { + ret = -ENOMEM; + goto error1; + } + + /* Allocate method table for this management class */ + method = &(*class)->method_table[mgmt_class]; + if ((ret = allocate_method_table(method))) + goto error2; + } else { + method = &(*class)->method_table[mgmt_class]; + if (!*method) { + /* Allocate method table for this management class */ + if ((ret = allocate_method_table(method))) + goto error1; + } + } + + /* Now, make sure methods are not already in use */ + if (method_in_use(method, mad_reg_req)) + goto error3; + + /* Finally, add in methods being registered */ + for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) + (*method)->agent[i] = agent_priv; + + return 0; + +error3: + /* Remove any methods for this mad agent */ + remove_methods_mad_agent(*method, agent_priv); + /* Now, check to see if there are any methods in use */ + if (!check_method_table(*method)) { + /* If not, release management method table */ + kfree(*method); + *method = NULL; + } + ret = -EINVAL; + goto error1; +error2: + kfree(*class); + *class = NULL; +error1: + return ret; +} + +static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, + struct ib_mad_agent_private *agent_priv) +{ + struct ib_mad_port_private *port_priv; + struct ib_mad_mgmt_vendor_class_table **vendor_table; + struct ib_mad_mgmt_vendor_class_table *vendor = NULL; + struct ib_mad_mgmt_vendor_class *vendor_class = NULL; + struct ib_mad_mgmt_method_table **method; + int i, ret = -ENOMEM; + u8 vclass; + + /* "New" vendor (with OUI) class */ + vclass = vendor_class_index(mad_reg_req->mgmt_class); + port_priv = agent_priv->qp_info->port_priv; + vendor_table = &port_priv->version[ + mad_reg_req->mgmt_class_version].vendor; + if (!*vendor_table) { + /* Allocate mgmt vendor class table for "new" class version */ + vendor = kzalloc(sizeof *vendor, GFP_ATOMIC); + if (!vendor) + goto error1; + + *vendor_table = vendor; + } + if (!(*vendor_table)->vendor_class[vclass]) { + /* Allocate table for this management vendor class */ + vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC); + if (!vendor_class) + goto error2; + + (*vendor_table)->vendor_class[vclass] = vendor_class; + } + for (i = 0; i < MAX_MGMT_OUI; i++) { + /* Is there matching OUI for this vendor class ? */ + if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i], + mad_reg_req->oui, 3)) { + method = &(*vendor_table)->vendor_class[ + vclass]->method_table[i]; + if (!*method) + goto error3; + goto check_in_use; + } + } + for (i = 0; i < MAX_MGMT_OUI; i++) { + /* OUI slot available ? */ + if (!is_vendor_oui((*vendor_table)->vendor_class[ + vclass]->oui[i])) { + method = &(*vendor_table)->vendor_class[ + vclass]->method_table[i]; + /* Allocate method table for this OUI */ + if (!*method) { + ret = allocate_method_table(method); + if (ret) + goto error3; + } + memcpy((*vendor_table)->vendor_class[vclass]->oui[i], + mad_reg_req->oui, 3); + goto check_in_use; + } + } + dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n"); + goto error3; + +check_in_use: + /* Now, make sure methods are not already in use */ + if (method_in_use(method, mad_reg_req)) + goto error4; + + /* Finally, add in methods being registered */ + for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) + (*method)->agent[i] = agent_priv; + + return 0; + +error4: + /* Remove any methods for this mad agent */ + remove_methods_mad_agent(*method, agent_priv); + /* Now, check to see if there are any methods in use */ + if (!check_method_table(*method)) { + /* If not, release management method table */ + kfree(*method); + *method = NULL; + } + ret = -EINVAL; +error3: + if (vendor_class) { + (*vendor_table)->vendor_class[vclass] = NULL; + kfree(vendor_class); + } +error2: + if (vendor) { + *vendor_table = NULL; + kfree(vendor); + } +error1: + return ret; +} + +static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv) +{ + struct ib_mad_port_private *port_priv; + struct ib_mad_mgmt_class_table *class; + struct ib_mad_mgmt_method_table *method; + struct ib_mad_mgmt_vendor_class_table *vendor; + struct ib_mad_mgmt_vendor_class *vendor_class; + int index; + u8 mgmt_class; + + /* + * Was MAD registration request supplied + * with original registration ? + */ + if (!agent_priv->reg_req) + goto out; + + port_priv = agent_priv->qp_info->port_priv; + mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class); + class = port_priv->version[ + agent_priv->reg_req->mgmt_class_version].class; + if (!class) + goto vendor_check; + + method = class->method_table[mgmt_class]; + if (method) { + /* Remove any methods for this mad agent */ + remove_methods_mad_agent(method, agent_priv); + /* Now, check to see if there are any methods still in use */ + if (!check_method_table(method)) { + /* If not, release management method table */ + kfree(method); + class->method_table[mgmt_class] = NULL; + /* Any management classes left ? */ + if (!check_class_table(class)) { + /* If not, release management class table */ + kfree(class); + port_priv->version[ + agent_priv->reg_req-> + mgmt_class_version].class = NULL; + } + } + } + +vendor_check: + if (!is_vendor_class(mgmt_class)) + goto out; + + /* normalize mgmt_class to vendor range 2 */ + mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class); + vendor = port_priv->version[ + agent_priv->reg_req->mgmt_class_version].vendor; + + if (!vendor) + goto out; + + vendor_class = vendor->vendor_class[mgmt_class]; + if (vendor_class) { + index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui); + if (index < 0) + goto out; + method = vendor_class->method_table[index]; + if (method) { + /* Remove any methods for this mad agent */ + remove_methods_mad_agent(method, agent_priv); + /* + * Now, check to see if there are + * any methods still in use + */ + if (!check_method_table(method)) { + /* If not, release management method table */ + kfree(method); + vendor_class->method_table[index] = NULL; + memset(vendor_class->oui[index], 0, 3); + /* Any OUIs left ? */ + if (!check_vendor_class(vendor_class)) { + /* If not, release vendor class table */ + kfree(vendor_class); + vendor->vendor_class[mgmt_class] = NULL; + /* Any other vendor classes left ? */ + if (!check_vendor_table(vendor)) { + kfree(vendor); + port_priv->version[ + agent_priv->reg_req-> + mgmt_class_version]. + vendor = NULL; + } + } + } + } + } + +out: + return; +} + +static struct ib_mad_agent_private * +find_mad_agent(struct ib_mad_port_private *port_priv, + const struct ib_mad_hdr *mad_hdr) +{ + struct ib_mad_agent_private *mad_agent = NULL; + unsigned long flags; + + if (ib_response_mad(mad_hdr)) { + u32 hi_tid; + + /* + * Routing is based on high 32 bits of transaction ID + * of MAD. + */ + hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; + rcu_read_lock(); + mad_agent = xa_load(&ib_mad_clients, hi_tid); + if (mad_agent && !refcount_inc_not_zero(&mad_agent->refcount)) + mad_agent = NULL; + rcu_read_unlock(); + } else { + struct ib_mad_mgmt_class_table *class; + struct ib_mad_mgmt_method_table *method; + struct ib_mad_mgmt_vendor_class_table *vendor; + struct ib_mad_mgmt_vendor_class *vendor_class; + const struct ib_vendor_mad *vendor_mad; + int index; + + spin_lock_irqsave(&port_priv->reg_lock, flags); + /* + * Routing is based on version, class, and method + * For "newer" vendor MADs, also based on OUI + */ + if (mad_hdr->class_version >= MAX_MGMT_VERSION) + goto out; + if (!is_vendor_class(mad_hdr->mgmt_class)) { + class = port_priv->version[ + mad_hdr->class_version].class; + if (!class) + goto out; + if (convert_mgmt_class(mad_hdr->mgmt_class) >= + ARRAY_SIZE(class->method_table)) + goto out; + method = class->method_table[convert_mgmt_class( + mad_hdr->mgmt_class)]; + if (method) + mad_agent = method->agent[mad_hdr->method & + ~IB_MGMT_METHOD_RESP]; + } else { + vendor = port_priv->version[ + mad_hdr->class_version].vendor; + if (!vendor) + goto out; + vendor_class = vendor->vendor_class[vendor_class_index( + mad_hdr->mgmt_class)]; + if (!vendor_class) + goto out; + /* Find matching OUI */ + vendor_mad = (const struct ib_vendor_mad *)mad_hdr; + index = find_vendor_oui(vendor_class, vendor_mad->oui); + if (index == -1) + goto out; + method = vendor_class->method_table[index]; + if (method) { + mad_agent = method->agent[mad_hdr->method & + ~IB_MGMT_METHOD_RESP]; + } + } + if (mad_agent) + refcount_inc(&mad_agent->refcount); +out: + spin_unlock_irqrestore(&port_priv->reg_lock, flags); + } + + if (mad_agent && !mad_agent->agent.recv_handler) { + dev_notice(&port_priv->device->dev, + "No receive handler for client %p on port %u\n", + &mad_agent->agent, port_priv->port_num); + deref_mad_agent(mad_agent); + mad_agent = NULL; + } + + return mad_agent; +} + +static int validate_mad(const struct ib_mad_hdr *mad_hdr, + const struct ib_mad_qp_info *qp_info, + bool opa) +{ + int valid = 0; + u32 qp_num = qp_info->qp->qp_num; + + /* Make sure MAD base version is understood */ + if (mad_hdr->base_version != IB_MGMT_BASE_VERSION && + (!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) { + pr_err("MAD received with unsupported base version %u %s\n", + mad_hdr->base_version, opa ? "(opa)" : ""); + goto out; + } + + /* Filter SMI packets sent to other than QP0 */ + if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || + (mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { + if (qp_num == 0) + valid = 1; + } else { + /* CM attributes other than ClassPortInfo only use Send method */ + if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) && + (mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) && + (mad_hdr->method != IB_MGMT_METHOD_SEND)) + goto out; + /* Filter GSI packets sent to QP0 */ + if (qp_num != 0) + valid = 1; + } + +out: + return valid; +} + +static int is_rmpp_data_mad(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_hdr *mad_hdr) +{ + struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (struct ib_rmpp_mad *)mad_hdr; + return !mad_agent_priv->agent.rmpp_version || + !ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) || + !(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE) || + (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); +} + +static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr, + const struct ib_mad_recv_wc *rwc) +{ + return ((struct ib_mad_hdr *)(wr->send_buf.mad))->mgmt_class == + rwc->recv_buf.mad->mad_hdr.mgmt_class; +} + +static inline int +rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_send_wr_private *wr, + const struct ib_mad_recv_wc *rwc) +{ + struct rdma_ah_attr attr; + u8 send_resp, rcv_resp; + union ib_gid sgid; + struct ib_device *device = mad_agent_priv->agent.device; + u32 port_num = mad_agent_priv->agent.port_num; + u8 lmc; + bool has_grh; + + send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad); + rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr); + + if (send_resp == rcv_resp) + /* both requests, or both responses. GIDs different */ + return 0; + + if (rdma_query_ah(wr->send_buf.ah, &attr)) + /* Assume not equal, to avoid false positives. */ + return 0; + + has_grh = !!(rdma_ah_get_ah_flags(&attr) & IB_AH_GRH); + if (has_grh != !!(rwc->wc->wc_flags & IB_WC_GRH)) + /* one has GID, other does not. Assume different */ + return 0; + + if (!send_resp && rcv_resp) { + /* is request/response. */ + if (!has_grh) { + if (ib_get_cached_lmc(device, port_num, &lmc)) + return 0; + return (!lmc || !((rdma_ah_get_path_bits(&attr) ^ + rwc->wc->dlid_path_bits) & + ((1 << lmc) - 1))); + } else { + const struct ib_global_route *grh = + rdma_ah_read_grh(&attr); + + if (rdma_query_gid(device, port_num, + grh->sgid_index, &sgid)) + return 0; + return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, + 16); + } + } + + if (!has_grh) + return rdma_ah_get_dlid(&attr) == rwc->wc->slid; + else + return !memcmp(rdma_ah_read_grh(&attr)->dgid.raw, + rwc->recv_buf.grh->sgid.raw, + 16); +} + +static inline int is_direct(u8 class) +{ + return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE); +} + +struct ib_mad_send_wr_private* +ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_recv_wc *wc) +{ + struct ib_mad_send_wr_private *wr; + const struct ib_mad_hdr *mad_hdr; + + mad_hdr = &wc->recv_buf.mad->mad_hdr; + + list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) { + if ((wr->tid == mad_hdr->tid) && + rcv_has_same_class(wr, wc) && + /* + * Don't check GID for direct routed MADs. + * These might have permissive LIDs. + */ + (is_direct(mad_hdr->mgmt_class) || + rcv_has_same_gid(mad_agent_priv, wr, wc))) + return (wr->status == IB_WC_SUCCESS) ? wr : NULL; + } + + /* + * It's possible to receive the response before we've + * been notified that the send has completed + */ + list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) { + if (is_rmpp_data_mad(mad_agent_priv, wr->send_buf.mad) && + wr->tid == mad_hdr->tid && + wr->timeout && + rcv_has_same_class(wr, wc) && + /* + * Don't check GID for direct routed MADs. + * These might have permissive LIDs. + */ + (is_direct(mad_hdr->mgmt_class) || + rcv_has_same_gid(mad_agent_priv, wr, wc))) + /* Verify request has not been canceled */ + return (wr->status == IB_WC_SUCCESS) ? wr : NULL; + } + return NULL; +} + +void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr) +{ + mad_send_wr->timeout = 0; + if (mad_send_wr->refcount == 1) + list_move_tail(&mad_send_wr->agent_list, + &mad_send_wr->mad_agent_priv->done_list); +} + +static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_mad_send_wc mad_send_wc; + unsigned long flags; + int ret; + + INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); + ret = ib_mad_enforce_security(mad_agent_priv, + mad_recv_wc->wc->pkey_index); + if (ret) { + ib_free_recv_mad(mad_recv_wc); + deref_mad_agent(mad_agent_priv); + return; + } + + list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); + if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { + mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv, + mad_recv_wc); + if (!mad_recv_wc) { + deref_mad_agent(mad_agent_priv); + return; + } + } + + /* Complete corresponding request */ + if (ib_response_mad(&mad_recv_wc->recv_buf.mad->mad_hdr)) { + spin_lock_irqsave(&mad_agent_priv->lock, flags); + mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc); + if (!mad_send_wr) { + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) + && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class) + && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr) + & IB_MGMT_RMPP_FLAG_ACTIVE)) { + /* user rmpp is in effect + * and this is an active RMPP MAD + */ + mad_agent_priv->agent.recv_handler( + &mad_agent_priv->agent, NULL, + mad_recv_wc); + deref_mad_agent(mad_agent_priv); + } else { + /* not user rmpp, revert to normal behavior and + * drop the mad + */ + ib_free_recv_mad(mad_recv_wc); + deref_mad_agent(mad_agent_priv); + return; + } + } else { + ib_mark_mad_done(mad_send_wr); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + + /* Defined behavior is to complete response before request */ + mad_agent_priv->agent.recv_handler( + &mad_agent_priv->agent, + &mad_send_wr->send_buf, + mad_recv_wc); + deref_mad_agent(mad_agent_priv); + + mad_send_wc.status = IB_WC_SUCCESS; + mad_send_wc.vendor_err = 0; + mad_send_wc.send_buf = &mad_send_wr->send_buf; + ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); + } + } else { + mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL, + mad_recv_wc); + deref_mad_agent(mad_agent_priv); + } +} + +static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv, + const struct ib_mad_qp_info *qp_info, + const struct ib_wc *wc, + u32 port_num, + struct ib_mad_private *recv, + struct ib_mad_private *response) +{ + enum smi_forward_action retsmi; + struct ib_smp *smp = (struct ib_smp *)recv->mad; + + trace_ib_mad_handle_ib_smi(smp); + + if (smi_handle_dr_smp_recv(smp, + rdma_cap_ib_switch(port_priv->device), + port_num, + port_priv->device->phys_port_cnt) == + IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + retsmi = smi_check_forward_dr_smp(smp); + if (retsmi == IB_SMI_LOCAL) + return IB_SMI_HANDLE; + + if (retsmi == IB_SMI_SEND) { /* don't forward */ + if (smi_handle_dr_smp_send(smp, + rdma_cap_ib_switch(port_priv->device), + port_num) == IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD) + return IB_SMI_DISCARD; + } else if (rdma_cap_ib_switch(port_priv->device)) { + /* forward case for switches */ + memcpy(response, recv, mad_priv_size(response)); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + + agent_send_response((const struct ib_mad_hdr *)response->mad, + &response->grh, wc, + port_priv->device, + smi_get_fwd_port(smp), + qp_info->qp->qp_num, + response->mad_size, + false); + + return IB_SMI_DISCARD; + } + return IB_SMI_HANDLE; +} + +static bool generate_unmatched_resp(const struct ib_mad_private *recv, + struct ib_mad_private *response, + size_t *resp_len, bool opa) +{ + const struct ib_mad_hdr *recv_hdr = (const struct ib_mad_hdr *)recv->mad; + struct ib_mad_hdr *resp_hdr = (struct ib_mad_hdr *)response->mad; + + if (recv_hdr->method == IB_MGMT_METHOD_GET || + recv_hdr->method == IB_MGMT_METHOD_SET) { + memcpy(response, recv, mad_priv_size(response)); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + resp_hdr->method = IB_MGMT_METHOD_GET_RESP; + resp_hdr->status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); + if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + resp_hdr->status |= IB_SMP_DIRECTION; + + if (opa && recv_hdr->base_version == OPA_MGMT_BASE_VERSION) { + if (recv_hdr->mgmt_class == + IB_MGMT_CLASS_SUBN_LID_ROUTED || + recv_hdr->mgmt_class == + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + *resp_len = opa_get_smp_header_size( + (struct opa_smp *)recv->mad); + else + *resp_len = sizeof(struct ib_mad_hdr); + } + + return true; + } else { + return false; + } +} + +static enum smi_action +handle_opa_smi(struct ib_mad_port_private *port_priv, + struct ib_mad_qp_info *qp_info, + struct ib_wc *wc, + u32 port_num, + struct ib_mad_private *recv, + struct ib_mad_private *response) +{ + enum smi_forward_action retsmi; + struct opa_smp *smp = (struct opa_smp *)recv->mad; + + trace_ib_mad_handle_opa_smi(smp); + + if (opa_smi_handle_dr_smp_recv(smp, + rdma_cap_ib_switch(port_priv->device), + port_num, + port_priv->device->phys_port_cnt) == + IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + retsmi = opa_smi_check_forward_dr_smp(smp); + if (retsmi == IB_SMI_LOCAL) + return IB_SMI_HANDLE; + + if (retsmi == IB_SMI_SEND) { /* don't forward */ + if (opa_smi_handle_dr_smp_send(smp, + rdma_cap_ib_switch(port_priv->device), + port_num) == IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + if (opa_smi_check_local_smp(smp, port_priv->device) == + IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + } else if (rdma_cap_ib_switch(port_priv->device)) { + /* forward case for switches */ + memcpy(response, recv, mad_priv_size(response)); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.opa_mad = + (struct opa_mad *)response->mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + + agent_send_response((const struct ib_mad_hdr *)response->mad, + &response->grh, wc, + port_priv->device, + opa_smi_get_fwd_port(smp), + qp_info->qp->qp_num, + recv->header.wc.byte_len, + true); + + return IB_SMI_DISCARD; + } + + return IB_SMI_HANDLE; +} + +static enum smi_action +handle_smi(struct ib_mad_port_private *port_priv, + struct ib_mad_qp_info *qp_info, + struct ib_wc *wc, + u32 port_num, + struct ib_mad_private *recv, + struct ib_mad_private *response, + bool opa) +{ + struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad; + + if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION && + mad_hdr->class_version == OPA_SM_CLASS_VERSION) + return handle_opa_smi(port_priv, qp_info, wc, port_num, recv, + response); + + return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response); +} + +static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct ib_mad_port_private *port_priv = cq->cq_context; + struct ib_mad_list_head *mad_list = + container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); + struct ib_mad_qp_info *qp_info; + struct ib_mad_private_header *mad_priv_hdr; + struct ib_mad_private *recv, *response = NULL; + struct ib_mad_agent_private *mad_agent; + u32 port_num; + int ret = IB_MAD_RESULT_SUCCESS; + size_t mad_size; + u16 resp_mad_pkey_index = 0; + bool opa; + + if (list_empty_careful(&port_priv->port_list)) + return; + + if (wc->status != IB_WC_SUCCESS) { + /* + * Receive errors indicate that the QP has entered the error + * state - error handling/shutdown code will cleanup + */ + return; + } + + qp_info = mad_list->mad_queue->qp_info; + dequeue_mad(mad_list); + + opa = rdma_cap_opa_mad(qp_info->port_priv->device, + qp_info->port_priv->port_num); + + mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header, + mad_list); + recv = container_of(mad_priv_hdr, struct ib_mad_private, header); + ib_dma_unmap_single(port_priv->device, + recv->header.mapping, + mad_priv_dma_size(recv), + DMA_FROM_DEVICE); + + /* Setup MAD receive work completion from "normal" work completion */ + recv->header.wc = *wc; + recv->header.recv_wc.wc = &recv->header.wc; + + if (opa && ((struct ib_mad_hdr *)(recv->mad))->base_version == OPA_MGMT_BASE_VERSION) { + recv->header.recv_wc.mad_len = wc->byte_len - sizeof(struct ib_grh); + recv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); + } else { + recv->header.recv_wc.mad_len = sizeof(struct ib_mad); + recv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); + } + + recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad; + recv->header.recv_wc.recv_buf.grh = &recv->grh; + + /* Validate MAD */ + if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa)) + goto out; + + trace_ib_mad_recv_done_handler(qp_info, wc, + (struct ib_mad_hdr *)recv->mad); + + mad_size = recv->mad_size; + response = alloc_mad_private(mad_size, GFP_KERNEL); + if (!response) + goto out; + + if (rdma_cap_ib_switch(port_priv->device)) + port_num = wc->port_num; + else + port_num = port_priv->port_num; + + if (((struct ib_mad_hdr *)recv->mad)->mgmt_class == + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + if (handle_smi(port_priv, qp_info, wc, port_num, recv, + response, opa) + == IB_SMI_DISCARD) + goto out; + } + + /* Give driver "right of first refusal" on incoming MAD */ + if (port_priv->device->ops.process_mad) { + ret = port_priv->device->ops.process_mad( + port_priv->device, 0, port_priv->port_num, wc, + &recv->grh, (const struct ib_mad *)recv->mad, + (struct ib_mad *)response->mad, &mad_size, + &resp_mad_pkey_index); + + if (opa) + wc->pkey_index = resp_mad_pkey_index; + + if (ret & IB_MAD_RESULT_SUCCESS) { + if (ret & IB_MAD_RESULT_CONSUMED) + goto out; + if (ret & IB_MAD_RESULT_REPLY) { + agent_send_response((const struct ib_mad_hdr *)response->mad, + &recv->grh, wc, + port_priv->device, + port_num, + qp_info->qp->qp_num, + mad_size, opa); + goto out; + } + } + } + + mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad); + if (mad_agent) { + trace_ib_mad_recv_done_agent(mad_agent); + ib_mad_complete_recv(mad_agent, &recv->header.recv_wc); + /* + * recv is freed up in error cases in ib_mad_complete_recv + * or via recv_handler in ib_mad_complete_recv() + */ + recv = NULL; + } else if ((ret & IB_MAD_RESULT_SUCCESS) && + generate_unmatched_resp(recv, response, &mad_size, opa)) { + agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc, + port_priv->device, port_num, + qp_info->qp->qp_num, mad_size, opa); + } + +out: + /* Post another receive request for this QP */ + if (response) { + ib_mad_post_receive_mads(qp_info, response); + kfree(recv); + } else + ib_mad_post_receive_mads(qp_info, recv); +} + +static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) +{ + struct ib_mad_send_wr_private *mad_send_wr; + unsigned long delay; + + if (list_empty(&mad_agent_priv->wait_list)) { + cancel_delayed_work(&mad_agent_priv->timed_work); + } else { + mad_send_wr = list_entry(mad_agent_priv->wait_list.next, + struct ib_mad_send_wr_private, + agent_list); + + if (time_after(mad_agent_priv->timeout, + mad_send_wr->timeout)) { + mad_agent_priv->timeout = mad_send_wr->timeout; + delay = mad_send_wr->timeout - jiffies; + if ((long)delay <= 0) + delay = 1; + mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, + &mad_agent_priv->timed_work, delay); + } + } +} + +static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_wr_private *temp_mad_send_wr; + struct list_head *list_item; + unsigned long delay; + + mad_agent_priv = mad_send_wr->mad_agent_priv; + list_del(&mad_send_wr->agent_list); + + delay = mad_send_wr->timeout; + mad_send_wr->timeout += jiffies; + + if (delay) { + list_for_each_prev(list_item, &mad_agent_priv->wait_list) { + temp_mad_send_wr = list_entry(list_item, + struct ib_mad_send_wr_private, + agent_list); + if (time_after(mad_send_wr->timeout, + temp_mad_send_wr->timeout)) + break; + } + } else { + list_item = &mad_agent_priv->wait_list; + } + + list_add(&mad_send_wr->agent_list, list_item); + + /* Reschedule a work item if we have a shorter timeout */ + if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) + mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, + &mad_agent_priv->timed_work, delay); +} + +void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, + unsigned long timeout_ms) +{ + mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); + wait_for_response(mad_send_wr); +} + +/* + * Process a send work completion + */ +void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_send_wc *mad_send_wc) +{ + struct ib_mad_agent_private *mad_agent_priv; + unsigned long flags; + int ret; + + mad_agent_priv = mad_send_wr->mad_agent_priv; + spin_lock_irqsave(&mad_agent_priv->lock, flags); + if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { + ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc); + if (ret == IB_RMPP_RESULT_CONSUMED) + goto done; + } else + ret = IB_RMPP_RESULT_UNHANDLED; + + if (mad_send_wc->status != IB_WC_SUCCESS && + mad_send_wr->status == IB_WC_SUCCESS) { + mad_send_wr->status = mad_send_wc->status; + mad_send_wr->refcount -= (mad_send_wr->timeout > 0); + } + + if (--mad_send_wr->refcount > 0) { + if (mad_send_wr->refcount == 1 && mad_send_wr->timeout && + mad_send_wr->status == IB_WC_SUCCESS) { + wait_for_response(mad_send_wr); + } + goto done; + } + + /* Remove send from MAD agent and notify client of completion */ + list_del(&mad_send_wr->agent_list); + adjust_timeout(mad_agent_priv); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + + if (mad_send_wr->status != IB_WC_SUCCESS) + mad_send_wc->status = mad_send_wr->status; + if (ret == IB_RMPP_RESULT_INTERNAL) + ib_rmpp_send_handler(mad_send_wc); + else + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + mad_send_wc); + + /* Release reference on agent taken when sending */ + deref_mad_agent(mad_agent_priv); + return; +done: + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); +} + +static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct ib_mad_port_private *port_priv = cq->cq_context; + struct ib_mad_list_head *mad_list = + container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); + struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr; + struct ib_mad_qp_info *qp_info; + struct ib_mad_queue *send_queue; + struct ib_mad_send_wc mad_send_wc; + unsigned long flags; + int ret; + + if (list_empty_careful(&port_priv->port_list)) + return; + + if (wc->status != IB_WC_SUCCESS) { + if (!ib_mad_send_error(port_priv, wc)) + return; + } + + mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, + mad_list); + send_queue = mad_list->mad_queue; + qp_info = send_queue->qp_info; + + trace_ib_mad_send_done_agent(mad_send_wr->mad_agent_priv); + trace_ib_mad_send_done_handler(mad_send_wr, wc); + +retry: + ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, + mad_send_wr->header_mapping, + mad_send_wr->sg_list[0].length, DMA_TO_DEVICE); + ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, + mad_send_wr->payload_mapping, + mad_send_wr->sg_list[1].length, DMA_TO_DEVICE); + queued_send_wr = NULL; + spin_lock_irqsave(&send_queue->lock, flags); + list_del(&mad_list->list); + + /* Move queued send to the send queue */ + if (send_queue->count-- > send_queue->max_active) { + mad_list = container_of(qp_info->overflow_list.next, + struct ib_mad_list_head, list); + queued_send_wr = container_of(mad_list, + struct ib_mad_send_wr_private, + mad_list); + list_move_tail(&mad_list->list, &send_queue->list); + } + spin_unlock_irqrestore(&send_queue->lock, flags); + + mad_send_wc.send_buf = &mad_send_wr->send_buf; + mad_send_wc.status = wc->status; + mad_send_wc.vendor_err = wc->vendor_err; + ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); + + if (queued_send_wr) { + trace_ib_mad_send_done_resend(queued_send_wr, qp_info); + ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr, + NULL); + if (ret) { + dev_err(&port_priv->device->dev, + "ib_post_send failed: %d\n", ret); + mad_send_wr = queued_send_wr; + wc->status = IB_WC_LOC_QP_OP_ERR; + goto retry; + } + } +} + +static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_mad_list_head *mad_list; + unsigned long flags; + + spin_lock_irqsave(&qp_info->send_queue.lock, flags); + list_for_each_entry(mad_list, &qp_info->send_queue.list, list) { + mad_send_wr = container_of(mad_list, + struct ib_mad_send_wr_private, + mad_list); + mad_send_wr->retry = 1; + } + spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); +} + +static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, + struct ib_wc *wc) +{ + struct ib_mad_list_head *mad_list = + container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); + struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info; + struct ib_mad_send_wr_private *mad_send_wr; + int ret; + + /* + * Send errors will transition the QP to SQE - move + * QP to RTS and repost flushed work requests + */ + mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, + mad_list); + if (wc->status == IB_WC_WR_FLUSH_ERR) { + if (mad_send_wr->retry) { + /* Repost send */ + mad_send_wr->retry = 0; + trace_ib_mad_error_handler(mad_send_wr, qp_info); + ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, + NULL); + if (!ret) + return false; + } + } else { + struct ib_qp_attr *attr; + + /* Transition QP to RTS and fail offending send */ + attr = kmalloc(sizeof *attr, GFP_KERNEL); + if (attr) { + attr->qp_state = IB_QPS_RTS; + attr->cur_qp_state = IB_QPS_SQE; + ret = ib_modify_qp(qp_info->qp, attr, + IB_QP_STATE | IB_QP_CUR_STATE); + kfree(attr); + if (ret) + dev_err(&port_priv->device->dev, + "%s - ib_modify_qp to RTS: %d\n", + __func__, ret); + else + mark_sends_for_retry(qp_info); + } + } + + return true; +} + +static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) +{ + unsigned long flags; + struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr; + struct ib_mad_send_wc mad_send_wc; + struct list_head cancel_list; + + INIT_LIST_HEAD(&cancel_list); + + spin_lock_irqsave(&mad_agent_priv->lock, flags); + list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, + &mad_agent_priv->send_list, agent_list) { + if (mad_send_wr->status == IB_WC_SUCCESS) { + mad_send_wr->status = IB_WC_WR_FLUSH_ERR; + mad_send_wr->refcount -= (mad_send_wr->timeout > 0); + } + } + + /* Empty wait list to prevent receives from finding a request */ + list_splice_init(&mad_agent_priv->wait_list, &cancel_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + + /* Report all cancelled requests */ + mad_send_wc.status = IB_WC_WR_FLUSH_ERR; + mad_send_wc.vendor_err = 0; + + list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, + &cancel_list, agent_list) { + mad_send_wc.send_buf = &mad_send_wr->send_buf; + list_del(&mad_send_wr->agent_list); + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + &mad_send_wc); + deref_mad_agent(mad_agent_priv); + } +} + +static struct ib_mad_send_wr_private* +find_send_wr(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_send_buf *send_buf) +{ + struct ib_mad_send_wr_private *mad_send_wr; + + list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, + agent_list) { + if (&mad_send_wr->send_buf == send_buf) + return mad_send_wr; + } + + list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, + agent_list) { + if (is_rmpp_data_mad(mad_agent_priv, + mad_send_wr->send_buf.mad) && + &mad_send_wr->send_buf == send_buf) + return mad_send_wr; + } + return NULL; +} + +int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_wr_private *mad_send_wr; + unsigned long flags; + int active; + + if (!send_buf) + return -EINVAL; + + mad_agent_priv = container_of(send_buf->mad_agent, + struct ib_mad_agent_private, agent); + spin_lock_irqsave(&mad_agent_priv->lock, flags); + mad_send_wr = find_send_wr(mad_agent_priv, send_buf); + if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) { + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + return -EINVAL; + } + + active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1); + if (!timeout_ms) { + mad_send_wr->status = IB_WC_WR_FLUSH_ERR; + mad_send_wr->refcount -= (mad_send_wr->timeout > 0); + } + + mad_send_wr->send_buf.timeout_ms = timeout_ms; + if (active) + mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); + else + ib_reset_mad_timeout(mad_send_wr, timeout_ms); + + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + return 0; +} +EXPORT_SYMBOL(ib_modify_mad); + +static void local_completions(struct work_struct *work) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_local_private *local; + struct ib_mad_agent_private *recv_mad_agent; + unsigned long flags; + int free_mad; + struct ib_wc wc; + struct ib_mad_send_wc mad_send_wc; + bool opa; + + mad_agent_priv = + container_of(work, struct ib_mad_agent_private, local_work); + + opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, + mad_agent_priv->qp_info->port_priv->port_num); + + spin_lock_irqsave(&mad_agent_priv->lock, flags); + while (!list_empty(&mad_agent_priv->local_list)) { + local = list_entry(mad_agent_priv->local_list.next, + struct ib_mad_local_private, + completion_list); + list_del(&local->completion_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + free_mad = 0; + if (local->mad_priv) { + u8 base_version; + recv_mad_agent = local->recv_mad_agent; + if (!recv_mad_agent) { + dev_err(&mad_agent_priv->agent.device->dev, + "No receive MAD agent for local completion\n"); + free_mad = 1; + goto local_send_completion; + } + + /* + * Defined behavior is to complete response + * before request + */ + build_smp_wc(recv_mad_agent->agent.qp, + local->mad_send_wr->send_wr.wr.wr_cqe, + be16_to_cpu(IB_LID_PERMISSIVE), + local->mad_send_wr->send_wr.pkey_index, + recv_mad_agent->agent.port_num, &wc); + + local->mad_priv->header.recv_wc.wc = &wc; + + base_version = ((struct ib_mad_hdr *)(local->mad_priv->mad))->base_version; + if (opa && base_version == OPA_MGMT_BASE_VERSION) { + local->mad_priv->header.recv_wc.mad_len = local->return_wc_byte_len; + local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); + } else { + local->mad_priv->header.recv_wc.mad_len = sizeof(struct ib_mad); + local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); + } + + INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list); + list_add(&local->mad_priv->header.recv_wc.recv_buf.list, + &local->mad_priv->header.recv_wc.rmpp_list); + local->mad_priv->header.recv_wc.recv_buf.grh = NULL; + local->mad_priv->header.recv_wc.recv_buf.mad = + (struct ib_mad *)local->mad_priv->mad; + recv_mad_agent->agent.recv_handler( + &recv_mad_agent->agent, + &local->mad_send_wr->send_buf, + &local->mad_priv->header.recv_wc); + spin_lock_irqsave(&recv_mad_agent->lock, flags); + deref_mad_agent(recv_mad_agent); + spin_unlock_irqrestore(&recv_mad_agent->lock, flags); + } + +local_send_completion: + /* Complete send */ + mad_send_wc.status = IB_WC_SUCCESS; + mad_send_wc.vendor_err = 0; + mad_send_wc.send_buf = &local->mad_send_wr->send_buf; + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + &mad_send_wc); + + spin_lock_irqsave(&mad_agent_priv->lock, flags); + deref_mad_agent(mad_agent_priv); + if (free_mad) + kfree(local->mad_priv); + kfree(local); + } + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); +} + +static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) +{ + int ret; + + if (!mad_send_wr->retries_left) + return -ETIMEDOUT; + + mad_send_wr->retries_left--; + mad_send_wr->send_buf.retries++; + + mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); + + if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) { + ret = ib_retry_rmpp(mad_send_wr); + switch (ret) { + case IB_RMPP_RESULT_UNHANDLED: + ret = ib_send_mad(mad_send_wr); + break; + case IB_RMPP_RESULT_CONSUMED: + ret = 0; + break; + default: + ret = -ECOMM; + break; + } + } else + ret = ib_send_mad(mad_send_wr); + + if (!ret) { + mad_send_wr->refcount++; + list_add_tail(&mad_send_wr->agent_list, + &mad_send_wr->mad_agent_priv->send_list); + } + return ret; +} + +static void timeout_sends(struct work_struct *work) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_mad_send_wc mad_send_wc; + unsigned long flags, delay; + + mad_agent_priv = container_of(work, struct ib_mad_agent_private, + timed_work.work); + mad_send_wc.vendor_err = 0; + + spin_lock_irqsave(&mad_agent_priv->lock, flags); + while (!list_empty(&mad_agent_priv->wait_list)) { + mad_send_wr = list_entry(mad_agent_priv->wait_list.next, + struct ib_mad_send_wr_private, + agent_list); + + if (time_after(mad_send_wr->timeout, jiffies)) { + delay = mad_send_wr->timeout - jiffies; + if ((long)delay <= 0) + delay = 1; + queue_delayed_work(mad_agent_priv->qp_info-> + port_priv->wq, + &mad_agent_priv->timed_work, delay); + break; + } + + list_del(&mad_send_wr->agent_list); + if (mad_send_wr->status == IB_WC_SUCCESS && + !retry_send(mad_send_wr)) + continue; + + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + + if (mad_send_wr->status == IB_WC_SUCCESS) + mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; + else + mad_send_wc.status = mad_send_wr->status; + mad_send_wc.send_buf = &mad_send_wr->send_buf; + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + &mad_send_wc); + + deref_mad_agent(mad_agent_priv); + spin_lock_irqsave(&mad_agent_priv->lock, flags); + } + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); +} + +/* + * Allocate receive MADs and post receive WRs for them + */ +static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, + struct ib_mad_private *mad) +{ + unsigned long flags; + int post, ret; + struct ib_mad_private *mad_priv; + struct ib_sge sg_list; + struct ib_recv_wr recv_wr; + struct ib_mad_queue *recv_queue = &qp_info->recv_queue; + + /* Initialize common scatter list fields */ + sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey; + + /* Initialize common receive WR fields */ + recv_wr.next = NULL; + recv_wr.sg_list = &sg_list; + recv_wr.num_sge = 1; + + do { + /* Allocate and map receive buffer */ + if (mad) { + mad_priv = mad; + mad = NULL; + } else { + mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv), + GFP_ATOMIC); + if (!mad_priv) { + ret = -ENOMEM; + break; + } + } + sg_list.length = mad_priv_dma_size(mad_priv); + sg_list.addr = ib_dma_map_single(qp_info->port_priv->device, + &mad_priv->grh, + mad_priv_dma_size(mad_priv), + DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, + sg_list.addr))) { + kfree(mad_priv); + ret = -ENOMEM; + break; + } + mad_priv->header.mapping = sg_list.addr; + mad_priv->header.mad_list.mad_queue = recv_queue; + mad_priv->header.mad_list.cqe.done = ib_mad_recv_done; + recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe; + + /* Post receive WR */ + spin_lock_irqsave(&recv_queue->lock, flags); + post = (++recv_queue->count < recv_queue->max_active); + list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list); + spin_unlock_irqrestore(&recv_queue->lock, flags); + ret = ib_post_recv(qp_info->qp, &recv_wr, NULL); + if (ret) { + spin_lock_irqsave(&recv_queue->lock, flags); + list_del(&mad_priv->header.mad_list.list); + recv_queue->count--; + spin_unlock_irqrestore(&recv_queue->lock, flags); + ib_dma_unmap_single(qp_info->port_priv->device, + mad_priv->header.mapping, + mad_priv_dma_size(mad_priv), + DMA_FROM_DEVICE); + kfree(mad_priv); + dev_err(&qp_info->port_priv->device->dev, + "ib_post_recv failed: %d\n", ret); + break; + } + } while (post); + + return ret; +} + +/* + * Return all the posted receive MADs + */ +static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info) +{ + struct ib_mad_private_header *mad_priv_hdr; + struct ib_mad_private *recv; + struct ib_mad_list_head *mad_list; + + if (!qp_info->qp) + return; + + while (!list_empty(&qp_info->recv_queue.list)) { + + mad_list = list_entry(qp_info->recv_queue.list.next, + struct ib_mad_list_head, list); + mad_priv_hdr = container_of(mad_list, + struct ib_mad_private_header, + mad_list); + recv = container_of(mad_priv_hdr, struct ib_mad_private, + header); + + /* Remove from posted receive MAD list */ + list_del(&mad_list->list); + + ib_dma_unmap_single(qp_info->port_priv->device, + recv->header.mapping, + mad_priv_dma_size(recv), + DMA_FROM_DEVICE); + kfree(recv); + } + + qp_info->recv_queue.count = 0; +} + +/* + * Start the port + */ +static int ib_mad_port_start(struct ib_mad_port_private *port_priv) +{ + int ret, i; + struct ib_qp_attr *attr; + struct ib_qp *qp; + u16 pkey_index; + + attr = kmalloc(sizeof *attr, GFP_KERNEL); + if (!attr) + return -ENOMEM; + + ret = ib_find_pkey(port_priv->device, port_priv->port_num, + IB_DEFAULT_PKEY_FULL, &pkey_index); + if (ret) + pkey_index = 0; + + for (i = 0; i < IB_MAD_QPS_CORE; i++) { + qp = port_priv->qp_info[i].qp; + if (!qp) + continue; + + /* + * PKey index for QP1 is irrelevant but + * one is needed for the Reset to Init transition + */ + attr->qp_state = IB_QPS_INIT; + attr->pkey_index = pkey_index; + attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY; + ret = ib_modify_qp(qp, attr, IB_QP_STATE | + IB_QP_PKEY_INDEX | IB_QP_QKEY); + if (ret) { + dev_err(&port_priv->device->dev, + "Couldn't change QP%d state to INIT: %d\n", + i, ret); + goto out; + } + + attr->qp_state = IB_QPS_RTR; + ret = ib_modify_qp(qp, attr, IB_QP_STATE); + if (ret) { + dev_err(&port_priv->device->dev, + "Couldn't change QP%d state to RTR: %d\n", + i, ret); + goto out; + } + + attr->qp_state = IB_QPS_RTS; + attr->sq_psn = IB_MAD_SEND_Q_PSN; + ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN); + if (ret) { + dev_err(&port_priv->device->dev, + "Couldn't change QP%d state to RTS: %d\n", + i, ret); + goto out; + } + } + + ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP); + if (ret) { + dev_err(&port_priv->device->dev, + "Failed to request completion notification: %d\n", + ret); + goto out; + } + + for (i = 0; i < IB_MAD_QPS_CORE; i++) { + if (!port_priv->qp_info[i].qp) + continue; + + ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL); + if (ret) { + dev_err(&port_priv->device->dev, + "Couldn't post receive WRs\n"); + goto out; + } + } +out: + kfree(attr); + return ret; +} + +static void qp_event_handler(struct ib_event *event, void *qp_context) +{ + struct ib_mad_qp_info *qp_info = qp_context; + + /* It's worse than that! He's dead, Jim! */ + dev_err(&qp_info->port_priv->device->dev, + "Fatal error (%d) on MAD QP (%u)\n", + event->event, qp_info->qp->qp_num); +} + +static void init_mad_queue(struct ib_mad_qp_info *qp_info, + struct ib_mad_queue *mad_queue) +{ + mad_queue->qp_info = qp_info; + mad_queue->count = 0; + spin_lock_init(&mad_queue->lock); + INIT_LIST_HEAD(&mad_queue->list); +} + +static void init_mad_qp(struct ib_mad_port_private *port_priv, + struct ib_mad_qp_info *qp_info) +{ + qp_info->port_priv = port_priv; + init_mad_queue(qp_info, &qp_info->send_queue); + init_mad_queue(qp_info, &qp_info->recv_queue); + INIT_LIST_HEAD(&qp_info->overflow_list); +} + +static int create_mad_qp(struct ib_mad_qp_info *qp_info, + enum ib_qp_type qp_type) +{ + struct ib_qp_init_attr qp_init_attr; + int ret; + + memset(&qp_init_attr, 0, sizeof qp_init_attr); + qp_init_attr.send_cq = qp_info->port_priv->cq; + qp_init_attr.recv_cq = qp_info->port_priv->cq; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.cap.max_send_wr = mad_sendq_size; + qp_init_attr.cap.max_recv_wr = mad_recvq_size; + qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG; + qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG; + qp_init_attr.qp_type = qp_type; + qp_init_attr.port_num = qp_info->port_priv->port_num; + qp_init_attr.qp_context = qp_info; + qp_init_attr.event_handler = qp_event_handler; + qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr); + if (IS_ERR(qp_info->qp)) { + dev_err(&qp_info->port_priv->device->dev, + "Couldn't create ib_mad QP%d\n", + get_spl_qp_index(qp_type)); + ret = PTR_ERR(qp_info->qp); + goto error; + } + /* Use minimum queue sizes unless the CQ is resized */ + qp_info->send_queue.max_active = mad_sendq_size; + qp_info->recv_queue.max_active = mad_recvq_size; + return 0; + +error: + return ret; +} + +static void destroy_mad_qp(struct ib_mad_qp_info *qp_info) +{ + if (!qp_info->qp) + return; + + ib_destroy_qp(qp_info->qp); +} + +/* + * Open the port + * Create the QP, PD, MR, and CQ if needed + */ +static int ib_mad_port_open(struct ib_device *device, + u32 port_num) +{ + int ret, cq_size; + struct ib_mad_port_private *port_priv; + unsigned long flags; + char name[sizeof "ib_mad123"]; + int has_smi; + + if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE)) + return -EFAULT; + + if (WARN_ON(rdma_cap_opa_mad(device, port_num) && + rdma_max_mad_size(device, port_num) < OPA_MGMT_MAD_SIZE)) + return -EFAULT; + + /* Create new device info */ + port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); + if (!port_priv) + return -ENOMEM; + + port_priv->device = device; + port_priv->port_num = port_num; + spin_lock_init(&port_priv->reg_lock); + init_mad_qp(port_priv, &port_priv->qp_info[0]); + init_mad_qp(port_priv, &port_priv->qp_info[1]); + + cq_size = mad_sendq_size + mad_recvq_size; + has_smi = rdma_cap_ib_smi(device, port_num); + if (has_smi) + cq_size *= 2; + + port_priv->pd = ib_alloc_pd(device, 0); + if (IS_ERR(port_priv->pd)) { + dev_err(&device->dev, "Couldn't create ib_mad PD\n"); + ret = PTR_ERR(port_priv->pd); + goto error3; + } + + port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0, + IB_POLL_UNBOUND_WORKQUEUE); + if (IS_ERR(port_priv->cq)) { + dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); + ret = PTR_ERR(port_priv->cq); + goto error4; + } + + if (has_smi) { + ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI); + if (ret) + goto error6; + } + ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI); + if (ret) + goto error7; + + snprintf(name, sizeof(name), "ib_mad%u", port_num); + port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); + if (!port_priv->wq) { + ret = -ENOMEM; + goto error8; + } + + spin_lock_irqsave(&ib_mad_port_list_lock, flags); + list_add_tail(&port_priv->port_list, &ib_mad_port_list); + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); + + ret = ib_mad_port_start(port_priv); + if (ret) { + dev_err(&device->dev, "Couldn't start port\n"); + goto error9; + } + + return 0; + +error9: + spin_lock_irqsave(&ib_mad_port_list_lock, flags); + list_del_init(&port_priv->port_list); + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); + + destroy_workqueue(port_priv->wq); +error8: + destroy_mad_qp(&port_priv->qp_info[1]); +error7: + destroy_mad_qp(&port_priv->qp_info[0]); +error6: + ib_free_cq(port_priv->cq); + cleanup_recv_queue(&port_priv->qp_info[1]); + cleanup_recv_queue(&port_priv->qp_info[0]); +error4: + ib_dealloc_pd(port_priv->pd); +error3: + kfree(port_priv); + + return ret; +} + +/* + * Close the port + * If there are no classes using the port, free the port + * resources (CQ, MR, PD, QP) and remove the port's info structure + */ +static int ib_mad_port_close(struct ib_device *device, u32 port_num) +{ + struct ib_mad_port_private *port_priv; + unsigned long flags; + + spin_lock_irqsave(&ib_mad_port_list_lock, flags); + port_priv = __ib_get_mad_port(device, port_num); + if (port_priv == NULL) { + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); + dev_err(&device->dev, "Port %u not found\n", port_num); + return -ENODEV; + } + list_del_init(&port_priv->port_list); + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); + + destroy_workqueue(port_priv->wq); + destroy_mad_qp(&port_priv->qp_info[1]); + destroy_mad_qp(&port_priv->qp_info[0]); + ib_free_cq(port_priv->cq); + ib_dealloc_pd(port_priv->pd); + cleanup_recv_queue(&port_priv->qp_info[1]); + cleanup_recv_queue(&port_priv->qp_info[0]); + /* XXX: Handle deallocation of MAD registration tables */ + + kfree(port_priv); + + return 0; +} + +static int ib_mad_init_device(struct ib_device *device) +{ + int start, i; + unsigned int count = 0; + int ret; + + start = rdma_start_port(device); + + for (i = start; i <= rdma_end_port(device); i++) { + if (!rdma_cap_ib_mad(device, i)) + continue; + + ret = ib_mad_port_open(device, i); + if (ret) { + dev_err(&device->dev, "Couldn't open port %d\n", i); + goto error; + } + ret = ib_agent_port_open(device, i); + if (ret) { + dev_err(&device->dev, + "Couldn't open port %d for agents\n", i); + goto error_agent; + } + count++; + } + if (!count) + return -EOPNOTSUPP; + + return 0; + +error_agent: + if (ib_mad_port_close(device, i)) + dev_err(&device->dev, "Couldn't close port %d\n", i); + +error: + while (--i >= start) { + if (!rdma_cap_ib_mad(device, i)) + continue; + + if (ib_agent_port_close(device, i)) + dev_err(&device->dev, + "Couldn't close port %d for agents\n", i); + if (ib_mad_port_close(device, i)) + dev_err(&device->dev, "Couldn't close port %d\n", i); + } + return ret; +} + +static void ib_mad_remove_device(struct ib_device *device, void *client_data) +{ + unsigned int i; + + rdma_for_each_port (device, i) { + if (!rdma_cap_ib_mad(device, i)) + continue; + + if (ib_agent_port_close(device, i)) + dev_err(&device->dev, + "Couldn't close port %u for agents\n", i); + if (ib_mad_port_close(device, i)) + dev_err(&device->dev, "Couldn't close port %u\n", i); + } +} + +static struct ib_client mad_client = { + .name = "mad", + .add = ib_mad_init_device, + .remove = ib_mad_remove_device +}; + +int ib_mad_init(void) +{ + mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); + mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); + + mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE); + mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE); + + INIT_LIST_HEAD(&ib_mad_port_list); + + if (ib_register_client(&mad_client)) { + pr_err("Couldn't register ib_mad client\n"); + return -EINVAL; + } + + return 0; +} + +void ib_mad_cleanup(void) +{ + ib_unregister_client(&mad_client); +} -- cgit v1.2.3