diff options
Diffstat (limited to '')
-rw-r--r-- | drivers/infiniband/hw/cxgb4/Kconfig | 21 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/Makefile | 8 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/cm.c | 4472 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/cq.c | 1190 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/device.c | 1572 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/ev.c | 242 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/id_table.c | 106 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1046 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/mem.c | 739 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/provider.c | 569 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/qp.c | 2813 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/resource.c | 516 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/restrack.c | 487 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/t4.h | 833 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 830 |
15 files changed, 15444 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig new file mode 100644 index 000000000..9e2b2c348 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/Kconfig @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0-only +config INFINIBAND_CXGB4 + tristate "Chelsio T4/T5 RDMA Driver" + depends on CHELSIO_T4 && INET + depends on INFINIBAND_ADDR_TRANS + select CHELSIO_LIB + select GENERIC_ALLOCATOR + help + This is an iWARP/RDMA driver for the Chelsio T4 and T5 + 1GbE, 10GbE adapters and T5 40GbE adapter. + + For general information about Chelsio and our products, visit + our website at <http://www.chelsio.com>. + + For customer support, please visit our customer support page at + <http://www.chelsio.com/support.html>. + + Please send feedback to <linux-bugs@chelsio.com>. + + To compile this driver as a module, choose M here: the module + will be called iw_cxgb4. diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile new file mode 100644 index 000000000..291d259d2 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-only +ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb4 +ccflags-y += -I $(srctree)/drivers/net/ethernet/chelsio/libcxgb + +obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o + +iw_cxgb4-y := device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o id_table.o \ + restrack.o diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c new file mode 100644 index 000000000..040ba2224 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -0,0 +1,4472 @@ +/* + * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/module.h> +#include <linux/list.h> +#include <linux/workqueue.h> +#include <linux/skbuff.h> +#include <linux/timer.h> +#include <linux/notifier.h> +#include <linux/inetdevice.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/if_vlan.h> + +#include <net/neighbour.h> +#include <net/netevent.h> +#include <net/route.h> +#include <net/tcp.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> + +#include <rdma/ib_addr.h> + +#include <libcxgb_cm.h> +#include "iw_cxgb4.h" +#include "clip_tbl.h" + +static char *states[] = { + "idle", + "listen", + "connecting", + "mpa_wait_req", + "mpa_req_sent", + "mpa_req_rcvd", + "mpa_rep_sent", + "fpdu_mode", + "aborting", + "closing", + "moribund", + "dead", + NULL, +}; + +static int nocong; +module_param(nocong, int, 0644); +MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)"); + +static int enable_ecn; +module_param(enable_ecn, int, 0644); +MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)"); + +static int dack_mode; +module_param(dack_mode, int, 0644); +MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)"); + +uint c4iw_max_read_depth = 32; +module_param(c4iw_max_read_depth, int, 0644); +MODULE_PARM_DESC(c4iw_max_read_depth, + "Per-connection max ORD/IRD (default=32)"); + +static int enable_tcp_timestamps; +module_param(enable_tcp_timestamps, int, 0644); +MODULE_PARM_DESC(enable_tcp_timestamps, "Enable tcp timestamps (default=0)"); + +static int enable_tcp_sack; +module_param(enable_tcp_sack, int, 0644); +MODULE_PARM_DESC(enable_tcp_sack, "Enable tcp SACK (default=0)"); + +static int enable_tcp_window_scaling = 1; +module_param(enable_tcp_window_scaling, int, 0644); +MODULE_PARM_DESC(enable_tcp_window_scaling, + "Enable tcp window scaling (default=1)"); + +static int peer2peer = 1; +module_param(peer2peer, int, 0644); +MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)"); + +static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; +module_param(p2p_type, int, 0644); +MODULE_PARM_DESC(p2p_type, "RDMAP opcode to use for the RTR message: " + "1=RDMA_READ 0=RDMA_WRITE (default 1)"); + +static int ep_timeout_secs = 60; +module_param(ep_timeout_secs, int, 0644); +MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " + "in seconds (default=60)"); + +static int mpa_rev = 2; +module_param(mpa_rev, int, 0644); +MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " + "1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft" + " compliant (default=2)"); + +static int markers_enabled; +module_param(markers_enabled, int, 0644); +MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)"); + +static int crc_enabled = 1; +module_param(crc_enabled, int, 0644); +MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)"); + +static int rcv_win = 256 * 1024; +module_param(rcv_win, int, 0644); +MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)"); + +static int snd_win = 128 * 1024; +module_param(snd_win, int, 0644); +MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)"); + +static struct workqueue_struct *workq; + +static struct sk_buff_head rxq; + +static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp); +static void ep_timeout(struct timer_list *t); +static void connect_reply_upcall(struct c4iw_ep *ep, int status); +static int sched(struct c4iw_dev *dev, struct sk_buff *skb); + +static LIST_HEAD(timeout_list); +static DEFINE_SPINLOCK(timeout_lock); + +static void deref_cm_id(struct c4iw_ep_common *epc) +{ + epc->cm_id->rem_ref(epc->cm_id); + epc->cm_id = NULL; + set_bit(CM_ID_DEREFED, &epc->history); +} + +static void ref_cm_id(struct c4iw_ep_common *epc) +{ + set_bit(CM_ID_REFED, &epc->history); + epc->cm_id->add_ref(epc->cm_id); +} + +static void deref_qp(struct c4iw_ep *ep) +{ + c4iw_qp_rem_ref(&ep->com.qp->ibqp); + clear_bit(QP_REFERENCED, &ep->com.flags); + set_bit(QP_DEREFED, &ep->com.history); +} + +static void ref_qp(struct c4iw_ep *ep) +{ + set_bit(QP_REFERENCED, &ep->com.flags); + set_bit(QP_REFED, &ep->com.history); + c4iw_qp_add_ref(&ep->com.qp->ibqp); +} + +static void start_ep_timer(struct c4iw_ep *ep) +{ + pr_debug("ep %p\n", ep); + if (timer_pending(&ep->timer)) { + pr_err("%s timer already started! ep %p\n", + __func__, ep); + return; + } + clear_bit(TIMEOUT, &ep->com.flags); + c4iw_get_ep(&ep->com); + ep->timer.expires = jiffies + ep_timeout_secs * HZ; + add_timer(&ep->timer); +} + +static int stop_ep_timer(struct c4iw_ep *ep) +{ + pr_debug("ep %p stopping\n", ep); + del_timer_sync(&ep->timer); + if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { + c4iw_put_ep(&ep->com); + return 0; + } + return 1; +} + +static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb, + struct l2t_entry *l2e) +{ + int error = 0; + + if (c4iw_fatal_error(rdev)) { + kfree_skb(skb); + pr_err("%s - device in error state - dropping\n", __func__); + return -EIO; + } + error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e); + if (error < 0) + kfree_skb(skb); + else if (error == NET_XMIT_DROP) + return -ENOMEM; + return error < 0 ? error : 0; +} + +int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb) +{ + int error = 0; + + if (c4iw_fatal_error(rdev)) { + kfree_skb(skb); + pr_err("%s - device in error state - dropping\n", __func__); + return -EIO; + } + error = cxgb4_ofld_send(rdev->lldi.ports[0], skb); + if (error < 0) + kfree_skb(skb); + return error < 0 ? error : 0; +} + +static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb) +{ + u32 len = roundup(sizeof(struct cpl_tid_release), 16); + + skb = get_skb(skb, len, GFP_KERNEL); + if (!skb) + return; + + cxgb_mk_tid_release(skb, len, hwtid, 0); + c4iw_ofld_send(rdev, skb); + return; +} + +static void set_emss(struct c4iw_ep *ep, u16 opt) +{ + ep->emss = ep->com.dev->rdev.lldi.mtus[TCPOPT_MSS_G(opt)] - + ((AF_INET == ep->com.remote_addr.ss_family) ? + sizeof(struct iphdr) : sizeof(struct ipv6hdr)) - + sizeof(struct tcphdr); + ep->mss = ep->emss; + if (TCPOPT_TSTAMP_G(opt)) + ep->emss -= round_up(TCPOLEN_TIMESTAMP, 4); + if (ep->emss < 128) + ep->emss = 128; + if (ep->emss & 7) + pr_debug("Warning: misaligned mtu idx %u mss %u emss=%u\n", + TCPOPT_MSS_G(opt), ep->mss, ep->emss); + pr_debug("mss_idx %u mss %u emss=%u\n", TCPOPT_MSS_G(opt), ep->mss, + ep->emss); +} + +static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc) +{ + enum c4iw_ep_state state; + + mutex_lock(&epc->mutex); + state = epc->state; + mutex_unlock(&epc->mutex); + return state; +} + +static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) +{ + epc->state = new; +} + +static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) +{ + mutex_lock(&epc->mutex); + pr_debug("%s -> %s\n", states[epc->state], states[new]); + __state_set(epc, new); + mutex_unlock(&epc->mutex); + return; +} + +static int alloc_ep_skb_list(struct sk_buff_head *ep_skb_list, int size) +{ + struct sk_buff *skb; + unsigned int i; + size_t len; + + len = roundup(sizeof(union cpl_wr_size), 16); + for (i = 0; i < size; i++) { + skb = alloc_skb(len, GFP_KERNEL); + if (!skb) + goto fail; + skb_queue_tail(ep_skb_list, skb); + } + return 0; +fail: + skb_queue_purge(ep_skb_list); + return -ENOMEM; +} + +static void *alloc_ep(int size, gfp_t gfp) +{ + struct c4iw_ep_common *epc; + + epc = kzalloc(size, gfp); + if (epc) { + epc->wr_waitp = c4iw_alloc_wr_wait(gfp); + if (!epc->wr_waitp) { + kfree(epc); + epc = NULL; + goto out; + } + kref_init(&epc->kref); + mutex_init(&epc->mutex); + c4iw_init_wr_wait(epc->wr_waitp); + } + pr_debug("alloc ep %p\n", epc); +out: + return epc; +} + +static void remove_ep_tid(struct c4iw_ep *ep) +{ + unsigned long flags; + + xa_lock_irqsave(&ep->com.dev->hwtids, flags); + __xa_erase(&ep->com.dev->hwtids, ep->hwtid); + if (xa_empty(&ep->com.dev->hwtids)) + wake_up(&ep->com.dev->wait); + xa_unlock_irqrestore(&ep->com.dev->hwtids, flags); +} + +static int insert_ep_tid(struct c4iw_ep *ep) +{ + unsigned long flags; + int err; + + xa_lock_irqsave(&ep->com.dev->hwtids, flags); + err = __xa_insert(&ep->com.dev->hwtids, ep->hwtid, ep, GFP_KERNEL); + xa_unlock_irqrestore(&ep->com.dev->hwtids, flags); + + return err; +} + +/* + * Atomically lookup the ep ptr given the tid and grab a reference on the ep. + */ +static struct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsigned int tid) +{ + struct c4iw_ep *ep; + unsigned long flags; + + xa_lock_irqsave(&dev->hwtids, flags); + ep = xa_load(&dev->hwtids, tid); + if (ep) + c4iw_get_ep(&ep->com); + xa_unlock_irqrestore(&dev->hwtids, flags); + return ep; +} + +/* + * Atomically lookup the ep ptr given the stid and grab a reference on the ep. + */ +static struct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev, + unsigned int stid) +{ + struct c4iw_listen_ep *ep; + unsigned long flags; + + xa_lock_irqsave(&dev->stids, flags); + ep = xa_load(&dev->stids, stid); + if (ep) + c4iw_get_ep(&ep->com); + xa_unlock_irqrestore(&dev->stids, flags); + return ep; +} + +void _c4iw_free_ep(struct kref *kref) +{ + struct c4iw_ep *ep; + + ep = container_of(kref, struct c4iw_ep, com.kref); + pr_debug("ep %p state %s\n", ep, states[ep->com.state]); + if (test_bit(QP_REFERENCED, &ep->com.flags)) + deref_qp(ep); + if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { + if (ep->com.remote_addr.ss_family == AF_INET6) { + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *) + &ep->com.local_addr; + + cxgb4_clip_release( + ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, + 1); + } + cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid, + ep->com.local_addr.ss_family); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + kfree_skb(ep->mpa_skb); + } + if (!skb_queue_empty(&ep->com.ep_skb_list)) + skb_queue_purge(&ep->com.ep_skb_list); + c4iw_put_wr_wait(ep->com.wr_waitp); + kfree(ep); +} + +static void release_ep_resources(struct c4iw_ep *ep) +{ + set_bit(RELEASE_RESOURCES, &ep->com.flags); + + /* + * If we have a hwtid, then remove it from the idr table + * so lookups will no longer find this endpoint. Otherwise + * we have a race where one thread finds the ep ptr just + * before the other thread is freeing the ep memory. + */ + if (ep->hwtid != -1) + remove_ep_tid(ep); + c4iw_put_ep(&ep->com); +} + +static int status2errno(int status) +{ + switch (status) { + case CPL_ERR_NONE: + return 0; + case CPL_ERR_CONN_RESET: + return -ECONNRESET; + case CPL_ERR_ARP_MISS: + return -EHOSTUNREACH; + case CPL_ERR_CONN_TIMEDOUT: + return -ETIMEDOUT; + case CPL_ERR_TCAM_FULL: + return -ENOMEM; + case CPL_ERR_CONN_EXIST: + return -EADDRINUSE; + default: + return -EIO; + } +} + +/* + * Try and reuse skbs already allocated... + */ +static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) +{ + if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) { + skb_trim(skb, 0); + skb_get(skb); + skb_reset_transport_header(skb); + } else { + skb = alloc_skb(len, gfp); + if (!skb) + return NULL; + } + t4_set_arp_err_handler(skb, NULL, NULL); + return skb; +} + +static struct net_device *get_real_dev(struct net_device *egress_dev) +{ + return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev; +} + +static void arp_failure_discard(void *handle, struct sk_buff *skb) +{ + pr_err("ARP failure\n"); + kfree_skb(skb); +} + +static void mpa_start_arp_failure(void *handle, struct sk_buff *skb) +{ + pr_err("ARP failure during MPA Negotiation - Closing Connection\n"); +} + +enum { + NUM_FAKE_CPLS = 2, + FAKE_CPL_PUT_EP_SAFE = NUM_CPL_CMDS + 0, + FAKE_CPL_PASS_PUT_EP_SAFE = NUM_CPL_CMDS + 1, +}; + +static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + + ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); + release_ep_resources(ep); + return 0; +} + +static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + + ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); + c4iw_put_ep(&ep->parent_ep->com); + release_ep_resources(ep); + return 0; +} + +/* + * Fake up a special CPL opcode and call sched() so process_work() will call + * _put_ep_safe() in a safe context to free the ep resources. This is needed + * because ARP error handlers are called in an ATOMIC context, and + * _c4iw_free_ep() needs to block. + */ +static void queue_arp_failure_cpl(struct c4iw_ep *ep, struct sk_buff *skb, + int cpl) +{ + struct cpl_act_establish *rpl = cplhdr(skb); + + /* Set our special ARP_FAILURE opcode */ + rpl->ot.opcode = cpl; + + /* + * Save ep in the skb->cb area, after where sched() will save the dev + * ptr. + */ + *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))) = ep; + sched(ep->com.dev, skb); +} + +/* Handle an ARP failure for an accept */ +static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb) +{ + struct c4iw_ep *ep = handle; + + pr_err("ARP failure during accept - tid %u - dropping connection\n", + ep->hwtid); + + __state_set(&ep->com, DEAD); + queue_arp_failure_cpl(ep, skb, FAKE_CPL_PASS_PUT_EP_SAFE); +} + +/* + * Handle an ARP failure for an active open. + */ +static void act_open_req_arp_failure(void *handle, struct sk_buff *skb) +{ + struct c4iw_ep *ep = handle; + + pr_err("ARP failure during connect\n"); + connect_reply_upcall(ep, -EHOSTUNREACH); + __state_set(&ep->com, DEAD); + if (ep->com.remote_addr.ss_family == AF_INET6) { + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *)&ep->com.local_addr; + cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, 1); + } + xa_erase_irq(&ep->com.dev->atids, ep->atid); + cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); + queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE); +} + +/* + * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant + * and send it along. + */ +static void abort_arp_failure(void *handle, struct sk_buff *skb) +{ + int ret; + struct c4iw_ep *ep = handle; + struct c4iw_rdev *rdev = &ep->com.dev->rdev; + struct cpl_abort_req *req = cplhdr(skb); + + pr_debug("rdev %p\n", rdev); + req->cmd = CPL_ABORT_NO_RST; + skb_get(skb); + ret = c4iw_ofld_send(rdev, skb); + if (ret) { + __state_set(&ep->com, DEAD); + queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE); + } else + kfree_skb(skb); +} + +static int send_flowc(struct c4iw_ep *ep) +{ + struct fw_flowc_wr *flowc; + struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); + u16 vlan = ep->l2t->vlan; + int nparams; + int flowclen, flowclen16; + + if (WARN_ON(!skb)) + return -ENOMEM; + + if (vlan == CPL_L2T_VLAN_NONE) + nparams = 9; + else + nparams = 10; + + flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]); + flowclen16 = DIV_ROUND_UP(flowclen, 16); + flowclen = flowclen16 * 16; + + flowc = __skb_put(skb, flowclen); + memset(flowc, 0, flowclen); + + flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | + FW_FLOWC_WR_NPARAMS_V(nparams)); + flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) | + FW_WR_FLOWID_V(ep->hwtid)); + + flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; + flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V + (ep->com.dev->rdev.lldi.pf)); + flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; + flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan); + flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; + flowc->mnemval[2].val = cpu_to_be32(ep->tx_chan); + flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; + flowc->mnemval[3].val = cpu_to_be32(ep->rss_qid); + flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT; + flowc->mnemval[4].val = cpu_to_be32(ep->snd_seq); + flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT; + flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq); + flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF; + flowc->mnemval[6].val = cpu_to_be32(ep->snd_win); + flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; + flowc->mnemval[7].val = cpu_to_be32(ep->emss); + flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_RCV_SCALE; + flowc->mnemval[8].val = cpu_to_be32(ep->snd_wscale); + if (nparams == 10) { + u16 pri; + pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + flowc->mnemval[9].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; + flowc->mnemval[9].val = cpu_to_be32(pri); + } + + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + return c4iw_ofld_send(&ep->com.dev->rdev, skb); +} + +static int send_halfclose(struct c4iw_ep *ep) +{ + struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); + u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16); + + pr_debug("ep %p tid %u\n", ep, ep->hwtid); + if (WARN_ON(!skb)) + return -ENOMEM; + + cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx, + NULL, arp_failure_discard); + + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + +static void read_tcb(struct c4iw_ep *ep) +{ + struct sk_buff *skb; + struct cpl_get_tcb *req; + int wrlen = roundup(sizeof(*req), 16); + + skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + if (WARN_ON(!skb)) + return; + + set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); + req = (struct cpl_get_tcb *) skb_put(skb, wrlen); + memset(req, 0, wrlen); + INIT_TP_WR(req, ep->hwtid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_GET_TCB, ep->hwtid)); + req->reply_ctrl = htons(REPLY_CHAN_V(0) | QUEUENO_V(ep->rss_qid)); + + /* + * keep a ref on the ep so the tcb is not unlocked before this + * cpl completes. The ref is released in read_tcb_rpl(). + */ + c4iw_get_ep(&ep->com); + if (WARN_ON(c4iw_ofld_send(&ep->com.dev->rdev, skb))) + c4iw_put_ep(&ep->com); +} + +static int send_abort_req(struct c4iw_ep *ep) +{ + u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16); + struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list); + + pr_debug("ep %p tid %u\n", ep, ep->hwtid); + if (WARN_ON(!req_skb)) + return -ENOMEM; + + cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx, + ep, abort_arp_failure); + + return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); +} + +static int send_abort(struct c4iw_ep *ep) +{ + if (!ep->com.qp || !ep->com.qp->srq) { + send_abort_req(ep); + return 0; + } + set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags); + read_tcb(ep); + return 0; +} + +static int send_connect(struct c4iw_ep *ep) +{ + struct cpl_act_open_req *req = NULL; + struct cpl_t5_act_open_req *t5req = NULL; + struct cpl_t6_act_open_req *t6req = NULL; + struct cpl_act_open_req6 *req6 = NULL; + struct cpl_t5_act_open_req6 *t5req6 = NULL; + struct cpl_t6_act_open_req6 *t6req6 = NULL; + struct sk_buff *skb; + u64 opt0; + u32 opt2; + unsigned int mtu_idx; + u32 wscale; + int win, sizev4, sizev6, wrlen; + struct sockaddr_in *la = (struct sockaddr_in *) + &ep->com.local_addr; + struct sockaddr_in *ra = (struct sockaddr_in *) + &ep->com.remote_addr; + struct sockaddr_in6 *la6 = (struct sockaddr_in6 *) + &ep->com.local_addr; + struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *) + &ep->com.remote_addr; + int ret; + enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; + u32 isn = (get_random_u32() & ~7UL) - 1; + struct net_device *netdev; + u64 params; + + netdev = ep->com.dev->rdev.lldi.ports[0]; + + switch (CHELSIO_CHIP_VERSION(adapter_type)) { + case CHELSIO_T4: + sizev4 = sizeof(struct cpl_act_open_req); + sizev6 = sizeof(struct cpl_act_open_req6); + break; + case CHELSIO_T5: + sizev4 = sizeof(struct cpl_t5_act_open_req); + sizev6 = sizeof(struct cpl_t5_act_open_req6); + break; + case CHELSIO_T6: + sizev4 = sizeof(struct cpl_t6_act_open_req); + sizev6 = sizeof(struct cpl_t6_act_open_req6); + break; + default: + pr_err("T%d Chip is not supported\n", + CHELSIO_CHIP_VERSION(adapter_type)); + return -EINVAL; + } + + wrlen = (ep->com.remote_addr.ss_family == AF_INET) ? + roundup(sizev4, 16) : + roundup(sizev6, 16); + + pr_debug("ep %p atid %u\n", ep, ep->atid); + + skb = get_skb(NULL, wrlen, GFP_KERNEL); + if (!skb) { + pr_err("%s - failed to alloc skb\n", __func__); + return -ENOMEM; + } + set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); + + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + wscale = cxgb_compute_wscale(rcv_win); + + /* + * Specify the largest window that will fit in opt0. The + * remainder will be specified in the rx_data_ack. + */ + win = ep->rcv_win >> 10; + if (win > RCV_BUFSIZ_M) + win = RCV_BUFSIZ_M; + + opt0 = (nocong ? NO_CONG_F : 0) | + KEEP_ALIVE_F | + DELACK_F | + WND_SCALE_V(wscale) | + MSS_IDX_V(mtu_idx) | + L2T_IDX_V(ep->l2t->idx) | + TX_CHAN_V(ep->tx_chan) | + SMAC_SEL_V(ep->smac_idx) | + DSCP_V(ep->tos >> 2) | + ULP_MODE_V(ULP_MODE_TCPDDP) | + RCV_BUFSIZ_V(win); + opt2 = RX_CHANNEL_V(0) | + CCTRL_ECN_V(enable_ecn) | + RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid); + if (enable_tcp_timestamps) + opt2 |= TSTAMPS_EN_F; + if (enable_tcp_sack) + opt2 |= SACK_EN_F; + if (wscale && enable_tcp_window_scaling) + opt2 |= WND_SCALE_EN_F; + if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) { + if (peer2peer) + isn += 4; + + opt2 |= T5_OPT_2_VALID_F; + opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); + opt2 |= T5_ISS_F; + } + + params = cxgb4_select_ntuple(netdev, ep->l2t); + + if (ep->com.remote_addr.ss_family == AF_INET6) + cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&la6->sin6_addr.s6_addr, 1); + + t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure); + + if (ep->com.remote_addr.ss_family == AF_INET) { + switch (CHELSIO_CHIP_VERSION(adapter_type)) { + case CHELSIO_T4: + req = skb_put(skb, wrlen); + INIT_TP_WR(req, 0); + break; + case CHELSIO_T5: + t5req = skb_put(skb, wrlen); + INIT_TP_WR(t5req, 0); + req = (struct cpl_act_open_req *)t5req; + break; + case CHELSIO_T6: + t6req = skb_put(skb, wrlen); + INIT_TP_WR(t6req, 0); + req = (struct cpl_act_open_req *)t6req; + t5req = (struct cpl_t5_act_open_req *)t6req; + break; + default: + pr_err("T%d Chip is not supported\n", + CHELSIO_CHIP_VERSION(adapter_type)); + ret = -EINVAL; + goto clip_release; + } + + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, + ((ep->rss_qid<<14) | ep->atid))); + req->local_port = la->sin_port; + req->peer_port = ra->sin_port; + req->local_ip = la->sin_addr.s_addr; + req->peer_ip = ra->sin_addr.s_addr; + req->opt0 = cpu_to_be64(opt0); + + if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { + req->params = cpu_to_be32(params); + req->opt2 = cpu_to_be32(opt2); + } else { + if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + t5req->params = + cpu_to_be64(FILTER_TUPLE_V(params)); + t5req->rsvd = cpu_to_be32(isn); + pr_debug("snd_isn %u\n", t5req->rsvd); + t5req->opt2 = cpu_to_be32(opt2); + } else { + t6req->params = + cpu_to_be64(FILTER_TUPLE_V(params)); + t6req->rsvd = cpu_to_be32(isn); + pr_debug("snd_isn %u\n", t6req->rsvd); + t6req->opt2 = cpu_to_be32(opt2); + } + } + } else { + switch (CHELSIO_CHIP_VERSION(adapter_type)) { + case CHELSIO_T4: + req6 = skb_put(skb, wrlen); + INIT_TP_WR(req6, 0); + break; + case CHELSIO_T5: + t5req6 = skb_put(skb, wrlen); + INIT_TP_WR(t5req6, 0); + req6 = (struct cpl_act_open_req6 *)t5req6; + break; + case CHELSIO_T6: + t6req6 = skb_put(skb, wrlen); + INIT_TP_WR(t6req6, 0); + req6 = (struct cpl_act_open_req6 *)t6req6; + t5req6 = (struct cpl_t5_act_open_req6 *)t6req6; + break; + default: + pr_err("T%d Chip is not supported\n", + CHELSIO_CHIP_VERSION(adapter_type)); + ret = -EINVAL; + goto clip_release; + } + + OPCODE_TID(req6) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, + ((ep->rss_qid<<14)|ep->atid))); + req6->local_port = la6->sin6_port; + req6->peer_port = ra6->sin6_port; + req6->local_ip_hi = *((__be64 *)(la6->sin6_addr.s6_addr)); + req6->local_ip_lo = *((__be64 *)(la6->sin6_addr.s6_addr + 8)); + req6->peer_ip_hi = *((__be64 *)(ra6->sin6_addr.s6_addr)); + req6->peer_ip_lo = *((__be64 *)(ra6->sin6_addr.s6_addr + 8)); + req6->opt0 = cpu_to_be64(opt0); + + if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { + req6->params = cpu_to_be32(cxgb4_select_ntuple(netdev, + ep->l2t)); + req6->opt2 = cpu_to_be32(opt2); + } else { + if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + t5req6->params = + cpu_to_be64(FILTER_TUPLE_V(params)); + t5req6->rsvd = cpu_to_be32(isn); + pr_debug("snd_isn %u\n", t5req6->rsvd); + t5req6->opt2 = cpu_to_be32(opt2); + } else { + t6req6->params = + cpu_to_be64(FILTER_TUPLE_V(params)); + t6req6->rsvd = cpu_to_be32(isn); + pr_debug("snd_isn %u\n", t6req6->rsvd); + t6req6->opt2 = cpu_to_be32(opt2); + } + + } + } + + set_bit(ACT_OPEN_REQ, &ep->com.history); + ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +clip_release: + if (ret && ep->com.remote_addr.ss_family == AF_INET6) + cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&la6->sin6_addr.s6_addr, 1); + return ret; +} + +static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, + u8 mpa_rev_to_use) +{ + int mpalen, wrlen, ret; + struct fw_ofld_tx_data_wr *req; + struct mpa_message *mpa; + struct mpa_v2_conn_params mpa_v2_params; + + pr_debug("ep %p tid %u pd_len %d\n", + ep, ep->hwtid, ep->plen); + + mpalen = sizeof(*mpa) + ep->plen; + if (mpa_rev_to_use == 2) + mpalen += sizeof(struct mpa_v2_conn_params); + wrlen = roundup(mpalen + sizeof(*req), 16); + skb = get_skb(skb, wrlen, GFP_KERNEL); + if (!skb) { + connect_reply_upcall(ep, -ENOMEM); + return -ENOMEM; + } + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + + req = skb_put_zero(skb, wrlen); + req->op_to_immdlen = cpu_to_be32( + FW_WR_OP_V(FW_OFLD_TX_DATA_WR) | + FW_WR_COMPL_F | + FW_WR_IMMDLEN_V(mpalen)); + req->flowid_len16 = cpu_to_be32( + FW_WR_FLOWID_V(ep->hwtid) | + FW_WR_LEN16_V(wrlen >> 4)); + req->plen = cpu_to_be32(mpalen); + req->tunnel_to_proxy = cpu_to_be32( + FW_OFLD_TX_DATA_WR_FLUSH_F | + FW_OFLD_TX_DATA_WR_SHOVE_F); + + mpa = (struct mpa_message *)(req + 1); + memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); + + mpa->flags = 0; + if (crc_enabled) + mpa->flags |= MPA_CRC; + if (markers_enabled) { + mpa->flags |= MPA_MARKERS; + ep->mpa_attr.recv_marker_enabled = 1; + } else { + ep->mpa_attr.recv_marker_enabled = 0; + } + if (mpa_rev_to_use == 2) + mpa->flags |= MPA_ENHANCED_RDMA_CONN; + + mpa->private_data_size = htons(ep->plen); + mpa->revision = mpa_rev_to_use; + if (mpa_rev_to_use == 1) { + ep->tried_with_mpa_v1 = 1; + ep->retry_with_mpa_v1 = 0; + } + + if (mpa_rev_to_use == 2) { + mpa->private_data_size = + htons(ntohs(mpa->private_data_size) + + sizeof(struct mpa_v2_conn_params)); + pr_debug("initiator ird %u ord %u\n", ep->ird, + ep->ord); + mpa_v2_params.ird = htons((u16)ep->ird); + mpa_v2_params.ord = htons((u16)ep->ord); + + if (peer2peer) { + mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); + if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) + mpa_v2_params.ord |= + htons(MPA_V2_RDMA_WRITE_RTR); + else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) + mpa_v2_params.ord |= + htons(MPA_V2_RDMA_READ_RTR); + } + memcpy(mpa->private_data, &mpa_v2_params, + sizeof(struct mpa_v2_conn_params)); + + if (ep->plen) + memcpy(mpa->private_data + + sizeof(struct mpa_v2_conn_params), + ep->mpa_pkt + sizeof(*mpa), ep->plen); + } else + if (ep->plen) + memcpy(mpa->private_data, + ep->mpa_pkt + sizeof(*mpa), ep->plen); + + /* + * Reference the mpa skb. This ensures the data area + * will remain in memory until the hw acks the tx. + * Function fw4_ack() will deref it. + */ + skb_get(skb); + t4_set_arp_err_handler(skb, NULL, arp_failure_discard); + ep->mpa_skb = skb; + ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); + if (ret) + return ret; + start_ep_timer(ep); + __state_set(&ep->com, MPA_REQ_SENT); + ep->mpa_attr.initiator = 1; + ep->snd_seq += mpalen; + return ret; +} + +static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) +{ + int mpalen, wrlen; + struct fw_ofld_tx_data_wr *req; + struct mpa_message *mpa; + struct sk_buff *skb; + struct mpa_v2_conn_params mpa_v2_params; + + pr_debug("ep %p tid %u pd_len %d\n", + ep, ep->hwtid, ep->plen); + + mpalen = sizeof(*mpa) + plen; + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) + mpalen += sizeof(struct mpa_v2_conn_params); + wrlen = roundup(mpalen + sizeof(*req), 16); + + skb = get_skb(NULL, wrlen, GFP_KERNEL); + if (!skb) { + pr_err("%s - cannot alloc skb!\n", __func__); + return -ENOMEM; + } + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + + req = skb_put_zero(skb, wrlen); + req->op_to_immdlen = cpu_to_be32( + FW_WR_OP_V(FW_OFLD_TX_DATA_WR) | + FW_WR_COMPL_F | + FW_WR_IMMDLEN_V(mpalen)); + req->flowid_len16 = cpu_to_be32( + FW_WR_FLOWID_V(ep->hwtid) | + FW_WR_LEN16_V(wrlen >> 4)); + req->plen = cpu_to_be32(mpalen); + req->tunnel_to_proxy = cpu_to_be32( + FW_OFLD_TX_DATA_WR_FLUSH_F | + FW_OFLD_TX_DATA_WR_SHOVE_F); + + mpa = (struct mpa_message *)(req + 1); + memset(mpa, 0, sizeof(*mpa)); + memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); + mpa->flags = MPA_REJECT; + mpa->revision = ep->mpa_attr.version; + mpa->private_data_size = htons(plen); + + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { + mpa->flags |= MPA_ENHANCED_RDMA_CONN; + mpa->private_data_size = + htons(ntohs(mpa->private_data_size) + + sizeof(struct mpa_v2_conn_params)); + mpa_v2_params.ird = htons(((u16)ep->ird) | + (peer2peer ? MPA_V2_PEER2PEER_MODEL : + 0)); + mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ? + (p2p_type == + FW_RI_INIT_P2PTYPE_RDMA_WRITE ? + MPA_V2_RDMA_WRITE_RTR : p2p_type == + FW_RI_INIT_P2PTYPE_READ_REQ ? + MPA_V2_RDMA_READ_RTR : 0) : 0)); + memcpy(mpa->private_data, &mpa_v2_params, + sizeof(struct mpa_v2_conn_params)); + + if (ep->plen) + memcpy(mpa->private_data + + sizeof(struct mpa_v2_conn_params), pdata, plen); + } else + if (plen) + memcpy(mpa->private_data, pdata, plen); + + /* + * Reference the mpa skb again. This ensures the data area + * will remain in memory until the hw acks the tx. + * Function fw4_ack() will deref it. + */ + skb_get(skb); + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure); + ep->mpa_skb = skb; + ep->snd_seq += mpalen; + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + +static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) +{ + int mpalen, wrlen; + struct fw_ofld_tx_data_wr *req; + struct mpa_message *mpa; + struct sk_buff *skb; + struct mpa_v2_conn_params mpa_v2_params; + + pr_debug("ep %p tid %u pd_len %d\n", + ep, ep->hwtid, ep->plen); + + mpalen = sizeof(*mpa) + plen; + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) + mpalen += sizeof(struct mpa_v2_conn_params); + wrlen = roundup(mpalen + sizeof(*req), 16); + + skb = get_skb(NULL, wrlen, GFP_KERNEL); + if (!skb) { + pr_err("%s - cannot alloc skb!\n", __func__); + return -ENOMEM; + } + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + + req = skb_put_zero(skb, wrlen); + req->op_to_immdlen = cpu_to_be32( + FW_WR_OP_V(FW_OFLD_TX_DATA_WR) | + FW_WR_COMPL_F | + FW_WR_IMMDLEN_V(mpalen)); + req->flowid_len16 = cpu_to_be32( + FW_WR_FLOWID_V(ep->hwtid) | + FW_WR_LEN16_V(wrlen >> 4)); + req->plen = cpu_to_be32(mpalen); + req->tunnel_to_proxy = cpu_to_be32( + FW_OFLD_TX_DATA_WR_FLUSH_F | + FW_OFLD_TX_DATA_WR_SHOVE_F); + + mpa = (struct mpa_message *)(req + 1); + memset(mpa, 0, sizeof(*mpa)); + memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); + mpa->flags = 0; + if (ep->mpa_attr.crc_enabled) + mpa->flags |= MPA_CRC; + if (ep->mpa_attr.recv_marker_enabled) + mpa->flags |= MPA_MARKERS; + mpa->revision = ep->mpa_attr.version; + mpa->private_data_size = htons(plen); + + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { + mpa->flags |= MPA_ENHANCED_RDMA_CONN; + mpa->private_data_size = + htons(ntohs(mpa->private_data_size) + + sizeof(struct mpa_v2_conn_params)); + mpa_v2_params.ird = htons((u16)ep->ird); + mpa_v2_params.ord = htons((u16)ep->ord); + if (peer2peer && (ep->mpa_attr.p2p_type != + FW_RI_INIT_P2PTYPE_DISABLED)) { + mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); + + if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) + mpa_v2_params.ord |= + htons(MPA_V2_RDMA_WRITE_RTR); + else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) + mpa_v2_params.ord |= + htons(MPA_V2_RDMA_READ_RTR); + } + + memcpy(mpa->private_data, &mpa_v2_params, + sizeof(struct mpa_v2_conn_params)); + + if (ep->plen) + memcpy(mpa->private_data + + sizeof(struct mpa_v2_conn_params), pdata, plen); + } else + if (plen) + memcpy(mpa->private_data, pdata, plen); + + /* + * Reference the mpa skb. This ensures the data area + * will remain in memory until the hw acks the tx. + * Function fw4_ack() will deref it. + */ + skb_get(skb); + t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure); + ep->mpa_skb = skb; + __state_set(&ep->com, MPA_REP_SENT); + ep->snd_seq += mpalen; + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + +static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct cpl_act_establish *req = cplhdr(skb); + unsigned short tcp_opt = ntohs(req->tcp_opt); + unsigned int tid = GET_TID(req); + unsigned int atid = TID_TID_G(ntohl(req->tos_atid)); + struct tid_info *t = dev->rdev.lldi.tids; + int ret; + + ep = lookup_atid(t, atid); + + pr_debug("ep %p tid %u snd_isn %u rcv_isn %u\n", ep, tid, + be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn)); + + mutex_lock(&ep->com.mutex); + dst_confirm(ep->dst); + + /* setup the hwtid for this connection */ + ep->hwtid = tid; + cxgb4_insert_tid(t, ep, tid, ep->com.local_addr.ss_family); + insert_ep_tid(ep); + + ep->snd_seq = be32_to_cpu(req->snd_isn); + ep->rcv_seq = be32_to_cpu(req->rcv_isn); + ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt); + + set_emss(ep, tcp_opt); + + /* dealloc the atid */ + xa_erase_irq(&ep->com.dev->atids, atid); + cxgb4_free_atid(t, atid); + set_bit(ACT_ESTAB, &ep->com.history); + + /* start MPA negotiation */ + ret = send_flowc(ep); + if (ret) + goto err; + if (ep->retry_with_mpa_v1) + ret = send_mpa_req(ep, skb, 1); + else + ret = send_mpa_req(ep, skb, mpa_rev); + if (ret) + goto err; + mutex_unlock(&ep->com.mutex); + return 0; +err: + mutex_unlock(&ep->com.mutex); + connect_reply_upcall(ep, -ENOMEM); + c4iw_ep_disconnect(ep, 0, GFP_KERNEL); + return 0; +} + +static void close_complete_upcall(struct c4iw_ep *ep, int status) +{ + struct iw_cm_event event; + + pr_debug("ep %p tid %u\n", ep, ep->hwtid); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CLOSE; + event.status = status; + if (ep->com.cm_id) { + pr_debug("close complete delivered ep %p cm_id %p tid %u\n", + ep, ep->com.cm_id, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + deref_cm_id(&ep->com); + set_bit(CLOSE_UPCALL, &ep->com.history); + } +} + +static void peer_close_upcall(struct c4iw_ep *ep) +{ + struct iw_cm_event event; + + pr_debug("ep %p tid %u\n", ep, ep->hwtid); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_DISCONNECT; + if (ep->com.cm_id) { + pr_debug("peer close delivered ep %p cm_id %p tid %u\n", + ep, ep->com.cm_id, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + set_bit(DISCONN_UPCALL, &ep->com.history); + } +} + +static void peer_abort_upcall(struct c4iw_ep *ep) +{ + struct iw_cm_event event; + + pr_debug("ep %p tid %u\n", ep, ep->hwtid); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CLOSE; + event.status = -ECONNRESET; + if (ep->com.cm_id) { + pr_debug("abort delivered ep %p cm_id %p tid %u\n", ep, + ep->com.cm_id, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + deref_cm_id(&ep->com); + set_bit(ABORT_UPCALL, &ep->com.history); + } +} + +static void connect_reply_upcall(struct c4iw_ep *ep, int status) +{ + struct iw_cm_event event; + + pr_debug("ep %p tid %u status %d\n", + ep, ep->hwtid, status); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CONNECT_REPLY; + event.status = status; + memcpy(&event.local_addr, &ep->com.local_addr, + sizeof(ep->com.local_addr)); + memcpy(&event.remote_addr, &ep->com.remote_addr, + sizeof(ep->com.remote_addr)); + + if ((status == 0) || (status == -ECONNREFUSED)) { + if (!ep->tried_with_mpa_v1) { + /* this means MPA_v2 is used */ + event.ord = ep->ird; + event.ird = ep->ord; + event.private_data_len = ep->plen - + sizeof(struct mpa_v2_conn_params); + event.private_data = ep->mpa_pkt + + sizeof(struct mpa_message) + + sizeof(struct mpa_v2_conn_params); + } else { + /* this means MPA_v1 is used */ + event.ord = cur_max_read_depth(ep->com.dev); + event.ird = cur_max_read_depth(ep->com.dev); + event.private_data_len = ep->plen; + event.private_data = ep->mpa_pkt + + sizeof(struct mpa_message); + } + } + + pr_debug("ep %p tid %u status %d\n", ep, + ep->hwtid, status); + set_bit(CONN_RPL_UPCALL, &ep->com.history); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + + if (status < 0) + deref_cm_id(&ep->com); +} + +static int connect_request_upcall(struct c4iw_ep *ep) +{ + struct iw_cm_event event; + int ret; + + pr_debug("ep %p tid %u\n", ep, ep->hwtid); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CONNECT_REQUEST; + memcpy(&event.local_addr, &ep->com.local_addr, + sizeof(ep->com.local_addr)); + memcpy(&event.remote_addr, &ep->com.remote_addr, + sizeof(ep->com.remote_addr)); + event.provider_data = ep; + if (!ep->tried_with_mpa_v1) { + /* this means MPA_v2 is used */ + event.ord = ep->ord; + event.ird = ep->ird; + event.private_data_len = ep->plen - + sizeof(struct mpa_v2_conn_params); + event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) + + sizeof(struct mpa_v2_conn_params); + } else { + /* this means MPA_v1 is used. Send max supported */ + event.ord = cur_max_read_depth(ep->com.dev); + event.ird = cur_max_read_depth(ep->com.dev); + event.private_data_len = ep->plen; + event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); + } + c4iw_get_ep(&ep->com); + ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id, + &event); + if (ret) + c4iw_put_ep(&ep->com); + set_bit(CONNREQ_UPCALL, &ep->com.history); + c4iw_put_ep(&ep->parent_ep->com); + return ret; +} + +static void established_upcall(struct c4iw_ep *ep) +{ + struct iw_cm_event event; + + pr_debug("ep %p tid %u\n", ep, ep->hwtid); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_ESTABLISHED; + event.ird = ep->ord; + event.ord = ep->ird; + if (ep->com.cm_id) { + pr_debug("ep %p tid %u\n", ep, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + set_bit(ESTAB_UPCALL, &ep->com.history); + } +} + +static int update_rx_credits(struct c4iw_ep *ep, u32 credits) +{ + struct sk_buff *skb; + u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16); + u32 credit_dack; + + pr_debug("ep %p tid %u credits %u\n", + ep, ep->hwtid, credits); + skb = get_skb(NULL, wrlen, GFP_KERNEL); + if (!skb) { + pr_err("update_rx_credits - cannot alloc skb!\n"); + return 0; + } + + /* + * If we couldn't specify the entire rcv window at connection setup + * due to the limit in the number of bits in the RCV_BUFSIZ field, + * then add the overage in to the credits returned. + */ + if (ep->rcv_win > RCV_BUFSIZ_M * 1024) + credits += ep->rcv_win - RCV_BUFSIZ_M * 1024; + + credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F | + RX_DACK_MODE_V(dack_mode); + + cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx, + credit_dack); + + c4iw_ofld_send(&ep->com.dev->rdev, skb); + return credits; +} + +#define RELAXED_IRD_NEGOTIATION 1 + +/* + * process_mpa_reply - process streaming mode MPA reply + * + * Returns: + * + * 0 upon success indicating a connect request was delivered to the ULP + * or the mpa request is incomplete but valid so far. + * + * 1 if a failure requires the caller to close the connection. + * + * 2 if a failure requires the caller to abort the connection. + */ +static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) +{ + struct mpa_message *mpa; + struct mpa_v2_conn_params *mpa_v2_params; + u16 plen; + u16 resp_ird, resp_ord; + u8 rtr_mismatch = 0, insuff_ird = 0; + struct c4iw_qp_attributes attrs; + enum c4iw_qp_attr_mask mask; + int err; + int disconnect = 0; + + pr_debug("ep %p tid %u\n", ep, ep->hwtid); + + /* + * If we get more than the supported amount of private data + * then we must fail this connection. + */ + if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { + err = -EINVAL; + goto err_stop_timer; + } + + /* + * copy the new data into our accumulation buffer. + */ + skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), + skb->len); + ep->mpa_pkt_len += skb->len; + + /* + * if we don't even have the mpa message, then bail. + */ + if (ep->mpa_pkt_len < sizeof(*mpa)) + return 0; + mpa = (struct mpa_message *) ep->mpa_pkt; + + /* Validate MPA header. */ + if (mpa->revision > mpa_rev) { + pr_err("%s MPA version mismatch. Local = %d, Received = %d\n", + __func__, mpa_rev, mpa->revision); + err = -EPROTO; + goto err_stop_timer; + } + if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { + err = -EPROTO; + goto err_stop_timer; + } + + plen = ntohs(mpa->private_data_size); + + /* + * Fail if there's too much private data. + */ + if (plen > MPA_MAX_PRIVATE_DATA) { + err = -EPROTO; + goto err_stop_timer; + } + + /* + * If plen does not account for pkt size + */ + if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { + err = -EPROTO; + goto err_stop_timer; + } + + ep->plen = (u8) plen; + + /* + * If we don't have all the pdata yet, then bail. + * We'll continue process when more data arrives. + */ + if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) + return 0; + + if (mpa->flags & MPA_REJECT) { + err = -ECONNREFUSED; + goto err_stop_timer; + } + + /* + * Stop mpa timer. If it expired, then + * we ignore the MPA reply. process_timeout() + * will abort the connection. + */ + if (stop_ep_timer(ep)) + return 0; + + /* + * If we get here we have accumulated the entire mpa + * start reply message including private data. And + * the MPA header is valid. + */ + __state_set(&ep->com, FPDU_MODE); + ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; + ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; + ep->mpa_attr.version = mpa->revision; + ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; + + if (mpa->revision == 2) { + ep->mpa_attr.enhanced_rdma_conn = + mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; + if (ep->mpa_attr.enhanced_rdma_conn) { + mpa_v2_params = (struct mpa_v2_conn_params *) + (ep->mpa_pkt + sizeof(*mpa)); + resp_ird = ntohs(mpa_v2_params->ird) & + MPA_V2_IRD_ORD_MASK; + resp_ord = ntohs(mpa_v2_params->ord) & + MPA_V2_IRD_ORD_MASK; + pr_debug("responder ird %u ord %u ep ird %u ord %u\n", + resp_ird, resp_ord, ep->ird, ep->ord); + + /* + * This is a double-check. Ideally, below checks are + * not required since ird/ord stuff has been taken + * care of in c4iw_accept_cr + */ + if (ep->ird < resp_ord) { + if (RELAXED_IRD_NEGOTIATION && resp_ord <= + ep->com.dev->rdev.lldi.max_ordird_qp) + ep->ird = resp_ord; + else + insuff_ird = 1; + } else if (ep->ird > resp_ord) { + ep->ird = resp_ord; + } + if (ep->ord > resp_ird) { + if (RELAXED_IRD_NEGOTIATION) + ep->ord = resp_ird; + else + insuff_ird = 1; + } + if (insuff_ird) { + err = -ENOMEM; + ep->ird = resp_ord; + ep->ord = resp_ird; + } + + if (ntohs(mpa_v2_params->ird) & + MPA_V2_PEER2PEER_MODEL) { + if (ntohs(mpa_v2_params->ord) & + MPA_V2_RDMA_WRITE_RTR) + ep->mpa_attr.p2p_type = + FW_RI_INIT_P2PTYPE_RDMA_WRITE; + else if (ntohs(mpa_v2_params->ord) & + MPA_V2_RDMA_READ_RTR) + ep->mpa_attr.p2p_type = + FW_RI_INIT_P2PTYPE_READ_REQ; + } + } + } else if (mpa->revision == 1) + if (peer2peer) + ep->mpa_attr.p2p_type = p2p_type; + + pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = %d\n", + ep->mpa_attr.crc_enabled, + ep->mpa_attr.recv_marker_enabled, + ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, + ep->mpa_attr.p2p_type, p2p_type); + + /* + * If responder's RTR does not match with that of initiator, assign + * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not + * generated when moving QP to RTS state. + * A TERM message will be sent after QP has moved to RTS state + */ + if ((ep->mpa_attr.version == 2) && peer2peer && + (ep->mpa_attr.p2p_type != p2p_type)) { + ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; + rtr_mismatch = 1; + } + + attrs.mpa_attr = ep->mpa_attr; + attrs.max_ird = ep->ird; + attrs.max_ord = ep->ord; + attrs.llp_stream_handle = ep; + attrs.next_state = C4IW_QP_STATE_RTS; + + mask = C4IW_QP_ATTR_NEXT_STATE | + C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR | + C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD; + + /* bind QP and TID with INIT_WR */ + err = c4iw_modify_qp(ep->com.qp->rhp, + ep->com.qp, mask, &attrs, 1); + if (err) + goto err; + + /* + * If responder's RTR requirement did not match with what initiator + * supports, generate TERM message + */ + if (rtr_mismatch) { + pr_err("%s: RTR mismatch, sending TERM\n", __func__); + attrs.layer_etype = LAYER_MPA | DDP_LLP; + attrs.ecode = MPA_NOMATCH_RTR; + attrs.next_state = C4IW_QP_STATE_TERMINATE; + attrs.send_term = 1; + err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + err = -ENOMEM; + disconnect = 1; + goto out; + } + + /* + * Generate TERM if initiator IRD is not sufficient for responder + * provided ORD. Currently, we do the same behaviour even when + * responder provided IRD is also not sufficient as regards to + * initiator ORD. + */ + if (insuff_ird) { + pr_err("%s: Insufficient IRD, sending TERM\n", __func__); + attrs.layer_etype = LAYER_MPA | DDP_LLP; + attrs.ecode = MPA_INSUFF_IRD; + attrs.next_state = C4IW_QP_STATE_TERMINATE; + attrs.send_term = 1; + err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + err = -ENOMEM; + disconnect = 1; + goto out; + } + goto out; +err_stop_timer: + stop_ep_timer(ep); +err: + disconnect = 2; +out: + connect_reply_upcall(ep, err); + return disconnect; +} + +/* + * process_mpa_request - process streaming mode MPA request + * + * Returns: + * + * 0 upon success indicating a connect request was delivered to the ULP + * or the mpa request is incomplete but valid so far. + * + * 1 if a failure requires the caller to close the connection. + * + * 2 if a failure requires the caller to abort the connection. + */ +static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) +{ + struct mpa_message *mpa; + struct mpa_v2_conn_params *mpa_v2_params; + u16 plen; + + pr_debug("ep %p tid %u\n", ep, ep->hwtid); + + /* + * If we get more than the supported amount of private data + * then we must fail this connection. + */ + if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) + goto err_stop_timer; + + pr_debug("enter (%s line %u)\n", __FILE__, __LINE__); + + /* + * Copy the new data into our accumulation buffer. + */ + skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), + skb->len); + ep->mpa_pkt_len += skb->len; + + /* + * If we don't even have the mpa message, then bail. + * We'll continue process when more data arrives. + */ + if (ep->mpa_pkt_len < sizeof(*mpa)) + return 0; + + pr_debug("enter (%s line %u)\n", __FILE__, __LINE__); + mpa = (struct mpa_message *) ep->mpa_pkt; + + /* + * Validate MPA Header. + */ + if (mpa->revision > mpa_rev) { + pr_err("%s MPA version mismatch. Local = %d, Received = %d\n", + __func__, mpa_rev, mpa->revision); + goto err_stop_timer; + } + + if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) + goto err_stop_timer; + + plen = ntohs(mpa->private_data_size); + + /* + * Fail if there's too much private data. + */ + if (plen > MPA_MAX_PRIVATE_DATA) + goto err_stop_timer; + + /* + * If plen does not account for pkt size + */ + if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) + goto err_stop_timer; + ep->plen = (u8) plen; + + /* + * If we don't have all the pdata yet, then bail. + */ + if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) + return 0; + + /* + * If we get here we have accumulated the entire mpa + * start reply message including private data. + */ + ep->mpa_attr.initiator = 0; + ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; + ep->mpa_attr.recv_marker_enabled = markers_enabled; + ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; + ep->mpa_attr.version = mpa->revision; + if (mpa->revision == 1) + ep->tried_with_mpa_v1 = 1; + ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; + + if (mpa->revision == 2) { + ep->mpa_attr.enhanced_rdma_conn = + mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; + if (ep->mpa_attr.enhanced_rdma_conn) { + mpa_v2_params = (struct mpa_v2_conn_params *) + (ep->mpa_pkt + sizeof(*mpa)); + ep->ird = ntohs(mpa_v2_params->ird) & + MPA_V2_IRD_ORD_MASK; + ep->ird = min_t(u32, ep->ird, + cur_max_read_depth(ep->com.dev)); + ep->ord = ntohs(mpa_v2_params->ord) & + MPA_V2_IRD_ORD_MASK; + ep->ord = min_t(u32, ep->ord, + cur_max_read_depth(ep->com.dev)); + pr_debug("initiator ird %u ord %u\n", + ep->ird, ep->ord); + if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL) + if (peer2peer) { + if (ntohs(mpa_v2_params->ord) & + MPA_V2_RDMA_WRITE_RTR) + ep->mpa_attr.p2p_type = + FW_RI_INIT_P2PTYPE_RDMA_WRITE; + else if (ntohs(mpa_v2_params->ord) & + MPA_V2_RDMA_READ_RTR) + ep->mpa_attr.p2p_type = + FW_RI_INIT_P2PTYPE_READ_REQ; + } + } + } else if (mpa->revision == 1) + if (peer2peer) + ep->mpa_attr.p2p_type = p2p_type; + + pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d\n", + ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, + ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, + ep->mpa_attr.p2p_type); + + __state_set(&ep->com, MPA_REQ_RCVD); + + /* drive upcall */ + mutex_lock_nested(&ep->parent_ep->com.mutex, SINGLE_DEPTH_NESTING); + if (ep->parent_ep->com.state != DEAD) { + if (connect_request_upcall(ep)) + goto err_unlock_parent; + } else { + goto err_unlock_parent; + } + mutex_unlock(&ep->parent_ep->com.mutex); + return 0; + +err_unlock_parent: + mutex_unlock(&ep->parent_ep->com.mutex); + goto err_out; +err_stop_timer: + (void)stop_ep_timer(ep); +err_out: + return 2; +} + +static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct cpl_rx_data *hdr = cplhdr(skb); + unsigned int dlen = ntohs(hdr->len); + unsigned int tid = GET_TID(hdr); + __u8 status = hdr->status; + int disconnect = 0; + + ep = get_ep_from_tid(dev, tid); + if (!ep) + return 0; + pr_debug("ep %p tid %u dlen %u\n", ep, ep->hwtid, dlen); + skb_pull(skb, sizeof(*hdr)); + skb_trim(skb, dlen); + mutex_lock(&ep->com.mutex); + + switch (ep->com.state) { + case MPA_REQ_SENT: + update_rx_credits(ep, dlen); + ep->rcv_seq += dlen; + disconnect = process_mpa_reply(ep, skb); + break; + case MPA_REQ_WAIT: + update_rx_credits(ep, dlen); + ep->rcv_seq += dlen; + disconnect = process_mpa_request(ep, skb); + break; + case FPDU_MODE: { + struct c4iw_qp_attributes attrs; + + update_rx_credits(ep, dlen); + if (status) + pr_err("%s Unexpected streaming data." \ + " qpid %u ep %p state %d tid %u status %d\n", + __func__, ep->com.qp->wq.sq.qid, ep, + ep->com.state, ep->hwtid, status); + attrs.next_state = C4IW_QP_STATE_TERMINATE; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + disconnect = 1; + break; + } + default: + break; + } + mutex_unlock(&ep->com.mutex); + if (disconnect) + c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL); + c4iw_put_ep(&ep->com); + return 0; +} + +static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx) +{ + enum chip_type adapter_type; + + adapter_type = ep->com.dev->rdev.lldi.adapter_type; + + /* + * If this TCB had a srq buffer cached, then we must complete + * it. For user mode, that means saving the srqidx in the + * user/kernel status page for this qp. For kernel mode, just + * synthesize the CQE now. + */ + if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) { + if (ep->com.qp->ibqp.uobject) + t4_set_wq_in_error(&ep->com.qp->wq, srqidx); + else + c4iw_flush_srqidx(ep->com.qp, srqidx); + } +} + +static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + u32 srqidx; + struct c4iw_ep *ep; + struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb); + int release = 0; + unsigned int tid = GET_TID(rpl); + + ep = get_ep_from_tid(dev, tid); + if (!ep) { + pr_warn("Abort rpl to freed endpoint\n"); + return 0; + } + + if (ep->com.qp && ep->com.qp->srq) { + srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(rpl->srqidx_status)); + complete_cached_srq_buffers(ep, srqidx ? srqidx : ep->srqe_idx); + } + + pr_debug("ep %p tid %u\n", ep, ep->hwtid); + mutex_lock(&ep->com.mutex); + switch (ep->com.state) { + case ABORTING: + c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); + __state_set(&ep->com, DEAD); + release = 1; + break; + default: + pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state); + break; + } + mutex_unlock(&ep->com.mutex); + + if (release) { + close_complete_upcall(ep, -ECONNRESET); + release_ep_resources(ep); + } + c4iw_put_ep(&ep->com); + return 0; +} + +static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) +{ + struct sk_buff *skb; + struct fw_ofld_connection_wr *req; + unsigned int mtu_idx; + u32 wscale; + struct sockaddr_in *sin; + int win; + + skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + if (!skb) + return -ENOMEM; + + req = __skb_put_zero(skb, sizeof(*req)); + req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR)); + req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16))); + req->le.filter = cpu_to_be32(cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t)); + sin = (struct sockaddr_in *)&ep->com.local_addr; + req->le.lport = sin->sin_port; + req->le.u.ipv4.lip = sin->sin_addr.s_addr; + sin = (struct sockaddr_in *)&ep->com.remote_addr; + req->le.pport = sin->sin_port; + req->le.u.ipv4.pip = sin->sin_addr.s_addr; + req->tcb.t_state_to_astid = + htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_SENT) | + FW_OFLD_CONNECTION_WR_ASTID_V(atid)); + req->tcb.cplrxdataack_cplpassacceptrpl = + htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F); + req->tcb.tx_max = (__force __be32) jiffies; + req->tcb.rcv_adv = htons(1); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + wscale = cxgb_compute_wscale(rcv_win); + + /* + * Specify the largest window that will fit in opt0. The + * remainder will be specified in the rx_data_ack. + */ + win = ep->rcv_win >> 10; + if (win > RCV_BUFSIZ_M) + win = RCV_BUFSIZ_M; + + req->tcb.opt0 = (__force __be64) (TCAM_BYPASS_F | + (nocong ? NO_CONG_F : 0) | + KEEP_ALIVE_F | + DELACK_F | + WND_SCALE_V(wscale) | + MSS_IDX_V(mtu_idx) | + L2T_IDX_V(ep->l2t->idx) | + TX_CHAN_V(ep->tx_chan) | + SMAC_SEL_V(ep->smac_idx) | + DSCP_V(ep->tos >> 2) | + ULP_MODE_V(ULP_MODE_TCPDDP) | + RCV_BUFSIZ_V(win)); + req->tcb.opt2 = (__force __be32) (PACE_V(1) | + TX_QUEUE_V(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) | + RX_CHANNEL_V(0) | + CCTRL_ECN_V(enable_ecn) | + RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid)); + if (enable_tcp_timestamps) + req->tcb.opt2 |= (__force __be32)TSTAMPS_EN_F; + if (enable_tcp_sack) + req->tcb.opt2 |= (__force __be32)SACK_EN_F; + if (wscale && enable_tcp_window_scaling) + req->tcb.opt2 |= (__force __be32)WND_SCALE_EN_F; + req->tcb.opt0 = cpu_to_be64((__force u64)req->tcb.opt0); + req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); + set_bit(ACT_OFLD_CONN, &ep->com.history); + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + +/* + * Some of the error codes above implicitly indicate that there is no TID + * allocated with the result of an ACT_OPEN. We use this predicate to make + * that explicit. + */ +static inline int act_open_has_tid(int status) +{ + return (status != CPL_ERR_TCAM_PARITY && + status != CPL_ERR_TCAM_MISS && + status != CPL_ERR_TCAM_FULL && + status != CPL_ERR_CONN_EXIST_SYNRECV && + status != CPL_ERR_CONN_EXIST); +} + +static char *neg_adv_str(unsigned int status) +{ + switch (status) { + case CPL_ERR_RTX_NEG_ADVICE: + return "Retransmit timeout"; + case CPL_ERR_PERSIST_NEG_ADVICE: + return "Persist timeout"; + case CPL_ERR_KEEPALV_NEG_ADVICE: + return "Keepalive timeout"; + default: + return "Unknown"; + } +} + +static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi) +{ + ep->snd_win = snd_win; + ep->rcv_win = rcv_win; + pr_debug("snd_win %d rcv_win %d\n", + ep->snd_win, ep->rcv_win); +} + +#define ACT_OPEN_RETRY_COUNT 2 + +static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, + struct dst_entry *dst, struct c4iw_dev *cdev, + bool clear_mpa_v1, enum chip_type adapter_type, u8 tos) +{ + struct neighbour *n; + int err, step; + struct net_device *pdev; + + n = dst_neigh_lookup(dst, peer_ip); + if (!n) + return -ENODEV; + + rcu_read_lock(); + err = -ENOMEM; + if (n->dev->flags & IFF_LOOPBACK) { + if (iptype == 4) + pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip); + else if (IS_ENABLED(CONFIG_IPV6)) + for_each_netdev(&init_net, pdev) { + if (ipv6_chk_addr(&init_net, + (struct in6_addr *)peer_ip, + pdev, 1)) + break; + } + else + pdev = NULL; + + if (!pdev) { + err = -ENODEV; + goto out; + } + ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, + n, pdev, rt_tos2priority(tos)); + if (!ep->l2t) { + dev_put(pdev); + goto out; + } + ep->mtu = pdev->mtu; + ep->tx_chan = cxgb4_port_chan(pdev); + ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx; + step = cdev->rdev.lldi.ntxq / + cdev->rdev.lldi.nchan; + ep->txq_idx = cxgb4_port_idx(pdev) * step; + step = cdev->rdev.lldi.nrxq / + cdev->rdev.lldi.nchan; + ep->ctrlq_idx = cxgb4_port_idx(pdev); + ep->rss_qid = cdev->rdev.lldi.rxq_ids[ + cxgb4_port_idx(pdev) * step]; + set_tcp_window(ep, (struct port_info *)netdev_priv(pdev)); + dev_put(pdev); + } else { + pdev = get_real_dev(n->dev); + ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, + n, pdev, rt_tos2priority(tos)); + if (!ep->l2t) + goto out; + ep->mtu = dst_mtu(dst); + ep->tx_chan = cxgb4_port_chan(pdev); + ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx; + step = cdev->rdev.lldi.ntxq / + cdev->rdev.lldi.nchan; + ep->txq_idx = cxgb4_port_idx(pdev) * step; + ep->ctrlq_idx = cxgb4_port_idx(pdev); + step = cdev->rdev.lldi.nrxq / + cdev->rdev.lldi.nchan; + ep->rss_qid = cdev->rdev.lldi.rxq_ids[ + cxgb4_port_idx(pdev) * step]; + set_tcp_window(ep, (struct port_info *)netdev_priv(pdev)); + + if (clear_mpa_v1) { + ep->retry_with_mpa_v1 = 0; + ep->tried_with_mpa_v1 = 0; + } + } + err = 0; +out: + rcu_read_unlock(); + + neigh_release(n); + + return err; +} + +static int c4iw_reconnect(struct c4iw_ep *ep) +{ + int err = 0; + int size = 0; + struct sockaddr_in *laddr = (struct sockaddr_in *) + &ep->com.cm_id->m_local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *) + &ep->com.cm_id->m_remote_addr; + struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *) + &ep->com.cm_id->m_local_addr; + struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *) + &ep->com.cm_id->m_remote_addr; + int iptype; + __u8 *ra; + + pr_debug("qp %p cm_id %p\n", ep->com.qp, ep->com.cm_id); + c4iw_init_wr_wait(ep->com.wr_waitp); + + /* When MPA revision is different on nodes, the node with MPA_rev=2 + * tries to reconnect with MPA_rev 1 for the same EP through + * c4iw_reconnect(), where the same EP is assigned with new tid for + * further connection establishment. As we are using the same EP pointer + * for reconnect, few skbs are used during the previous c4iw_connect(), + * which leaves the EP with inadequate skbs for further + * c4iw_reconnect(), Further causing a crash due to an empty + * skb_list() during peer_abort(). Allocate skbs which is already used. + */ + size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list)); + if (alloc_ep_skb_list(&ep->com.ep_skb_list, size)) { + err = -ENOMEM; + goto fail1; + } + + /* + * Allocate an active TID to initiate a TCP connection. + */ + ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep); + if (ep->atid == -1) { + pr_err("%s - cannot alloc atid\n", __func__); + err = -ENOMEM; + goto fail2; + } + err = xa_insert_irq(&ep->com.dev->atids, ep->atid, ep, GFP_KERNEL); + if (err) + goto fail2a; + + /* find a route */ + if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) { + ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev, + laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, + laddr->sin_port, + raddr->sin_port, ep->com.cm_id->tos); + iptype = 4; + ra = (__u8 *)&raddr->sin_addr; + } else { + ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi, + get_real_dev, + laddr6->sin6_addr.s6_addr, + raddr6->sin6_addr.s6_addr, + laddr6->sin6_port, + raddr6->sin6_port, + ep->com.cm_id->tos, + raddr6->sin6_scope_id); + iptype = 6; + ra = (__u8 *)&raddr6->sin6_addr; + } + if (!ep->dst) { + pr_err("%s - cannot find route\n", __func__); + err = -EHOSTUNREACH; + goto fail3; + } + err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false, + ep->com.dev->rdev.lldi.adapter_type, + ep->com.cm_id->tos); + if (err) { + pr_err("%s - cannot alloc l2e\n", __func__); + goto fail4; + } + + pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", + ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, + ep->l2t->idx); + + state_set(&ep->com, CONNECTING); + ep->tos = ep->com.cm_id->tos; + + /* send connect request to rnic */ + err = send_connect(ep); + if (!err) + goto out; + + cxgb4_l2t_release(ep->l2t); +fail4: + dst_release(ep->dst); +fail3: + xa_erase_irq(&ep->com.dev->atids, ep->atid); +fail2a: + cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); +fail2: + /* + * remember to send notification to upper layer. + * We are in here so the upper layer is not aware that this is + * re-connect attempt and so, upper layer is still waiting for + * response of 1st connect request. + */ + connect_reply_upcall(ep, -ECONNRESET); +fail1: + c4iw_put_ep(&ep->com); +out: + return err; +} + +static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct cpl_act_open_rpl *rpl = cplhdr(skb); + unsigned int atid = TID_TID_G(AOPEN_ATID_G( + ntohl(rpl->atid_status))); + struct tid_info *t = dev->rdev.lldi.tids; + int status = AOPEN_STATUS_G(ntohl(rpl->atid_status)); + struct sockaddr_in *la; + struct sockaddr_in *ra; + struct sockaddr_in6 *la6; + struct sockaddr_in6 *ra6; + int ret = 0; + + ep = lookup_atid(t, atid); + la = (struct sockaddr_in *)&ep->com.local_addr; + ra = (struct sockaddr_in *)&ep->com.remote_addr; + la6 = (struct sockaddr_in6 *)&ep->com.local_addr; + ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr; + + pr_debug("ep %p atid %u status %u errno %d\n", ep, atid, + status, status2errno(status)); + + if (cxgb_is_neg_adv(status)) { + pr_debug("Connection problems for atid %u status %u (%s)\n", + atid, status, neg_adv_str(status)); + ep->stats.connect_neg_adv++; + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.neg_adv++; + mutex_unlock(&dev->rdev.stats.lock); + return 0; + } + + set_bit(ACT_OPEN_RPL, &ep->com.history); + + /* + * Log interesting failures. + */ + switch (status) { + case CPL_ERR_CONN_RESET: + case CPL_ERR_CONN_TIMEDOUT: + break; + case CPL_ERR_TCAM_FULL: + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.tcam_full++; + mutex_unlock(&dev->rdev.stats.lock); + if (ep->com.local_addr.ss_family == AF_INET && + dev->rdev.lldi.enable_fw_ofld_conn) { + ret = send_fw_act_open_req(ep, TID_TID_G(AOPEN_ATID_G( + ntohl(rpl->atid_status)))); + if (ret) + goto fail; + return 0; + } + break; + case CPL_ERR_CONN_EXIST: + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + set_bit(ACT_RETRY_INUSE, &ep->com.history); + if (ep->com.remote_addr.ss_family == AF_INET6) { + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *) + &ep->com.local_addr; + cxgb4_clip_release( + ep->com.dev->rdev.lldi.ports[0], + (const u32 *) + &sin6->sin6_addr.s6_addr, 1); + } + xa_erase_irq(&ep->com.dev->atids, atid); + cxgb4_free_atid(t, atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_reconnect(ep); + return 0; + } + break; + default: + if (ep->com.local_addr.ss_family == AF_INET) { + pr_info("Active open failure - atid %u status %u errno %d %pI4:%u->%pI4:%u\n", + atid, status, status2errno(status), + &la->sin_addr.s_addr, ntohs(la->sin_port), + &ra->sin_addr.s_addr, ntohs(ra->sin_port)); + } else { + pr_info("Active open failure - atid %u status %u errno %d %pI6:%u->%pI6:%u\n", + atid, status, status2errno(status), + la6->sin6_addr.s6_addr, ntohs(la6->sin6_port), + ra6->sin6_addr.s6_addr, ntohs(ra6->sin6_port)); + } + break; + } + +fail: + connect_reply_upcall(ep, status2errno(status)); + state_set(&ep->com, DEAD); + + if (ep->com.remote_addr.ss_family == AF_INET6) { + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *)&ep->com.local_addr; + cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, 1); + } + if (status && act_open_has_tid(status)) + cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl), + ep->com.local_addr.ss_family); + + xa_erase_irq(&ep->com.dev->atids, atid); + cxgb4_free_atid(t, atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_put_ep(&ep->com); + + return 0; +} + +static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_pass_open_rpl *rpl = cplhdr(skb); + unsigned int stid = GET_TID(rpl); + struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid); + + if (!ep) { + pr_warn("%s stid %d lookup failure!\n", __func__, stid); + goto out; + } + pr_debug("ep %p status %d error %d\n", ep, + rpl->status, status2errno(rpl->status)); + c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status)); + c4iw_put_ep(&ep->com); +out: + return 0; +} + +static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_close_listsvr_rpl *rpl = cplhdr(skb); + unsigned int stid = GET_TID(rpl); + struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid); + + if (!ep) { + pr_warn("%s stid %d lookup failure!\n", __func__, stid); + goto out; + } + pr_debug("ep %p\n", ep); + c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status)); + c4iw_put_ep(&ep->com); +out: + return 0; +} + +static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, + struct cpl_pass_accept_req *req) +{ + struct cpl_pass_accept_rpl *rpl; + unsigned int mtu_idx; + u64 opt0; + u32 opt2; + u32 wscale; + struct cpl_t5_pass_accept_rpl *rpl5 = NULL; + int win; + enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; + + pr_debug("ep %p tid %u\n", ep, ep->hwtid); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps && req->tcpopt.tstamp, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + wscale = cxgb_compute_wscale(rcv_win); + + /* + * Specify the largest window that will fit in opt0. The + * remainder will be specified in the rx_data_ack. + */ + win = ep->rcv_win >> 10; + if (win > RCV_BUFSIZ_M) + win = RCV_BUFSIZ_M; + opt0 = (nocong ? NO_CONG_F : 0) | + KEEP_ALIVE_F | + DELACK_F | + WND_SCALE_V(wscale) | + MSS_IDX_V(mtu_idx) | + L2T_IDX_V(ep->l2t->idx) | + TX_CHAN_V(ep->tx_chan) | + SMAC_SEL_V(ep->smac_idx) | + DSCP_V(ep->tos >> 2) | + ULP_MODE_V(ULP_MODE_TCPDDP) | + RCV_BUFSIZ_V(win); + opt2 = RX_CHANNEL_V(0) | + RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid); + + if (enable_tcp_timestamps && req->tcpopt.tstamp) + opt2 |= TSTAMPS_EN_F; + if (enable_tcp_sack && req->tcpopt.sack) + opt2 |= SACK_EN_F; + if (wscale && enable_tcp_window_scaling) + opt2 |= WND_SCALE_EN_F; + if (enable_ecn) { + const struct tcphdr *tcph; + u32 hlen = ntohl(req->hdr_len); + + if (CHELSIO_CHIP_VERSION(adapter_type) <= CHELSIO_T5) + tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) + + IP_HDR_LEN_G(hlen); + else + tcph = (const void *)(req + 1) + + T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen); + if (tcph->ece && tcph->cwr) + opt2 |= CCTRL_ECN_V(1); + } + + if (!is_t4(adapter_type)) { + u32 isn = (get_random_u32() & ~7UL) - 1; + + skb = get_skb(skb, roundup(sizeof(*rpl5), 16), GFP_KERNEL); + rpl5 = __skb_put_zero(skb, roundup(sizeof(*rpl5), 16)); + rpl = (void *)rpl5; + INIT_TP_WR_CPL(rpl5, CPL_PASS_ACCEPT_RPL, ep->hwtid); + opt2 |= T5_OPT_2_VALID_F; + opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); + opt2 |= T5_ISS_F; + if (peer2peer) + isn += 4; + rpl5->iss = cpu_to_be32(isn); + pr_debug("iss %u\n", be32_to_cpu(rpl5->iss)); + } else { + skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); + rpl = __skb_put_zero(skb, sizeof(*rpl)); + INIT_TP_WR_CPL(rpl, CPL_PASS_ACCEPT_RPL, ep->hwtid); + } + + rpl->opt0 = cpu_to_be64(opt0); + rpl->opt2 = cpu_to_be32(opt2); + set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); + t4_set_arp_err_handler(skb, ep, pass_accept_rpl_arp_failure); + + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + +static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) +{ + pr_debug("c4iw_dev %p tid %u\n", dev, hwtid); + skb_trim(skb, sizeof(struct cpl_tid_release)); + release_tid(&dev->rdev, hwtid, skb); + return; +} + +static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *child_ep = NULL, *parent_ep; + struct cpl_pass_accept_req *req = cplhdr(skb); + unsigned int stid = PASS_OPEN_TID_G(ntohl(req->tos_stid)); + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int hwtid = GET_TID(req); + struct dst_entry *dst; + __u8 local_ip[16], peer_ip[16]; + __be16 local_port, peer_port; + struct sockaddr_in6 *sin6; + int err; + u16 peer_mss = ntohs(req->tcpopt.mss); + int iptype; + unsigned short hdrs; + u8 tos; + + parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid); + if (!parent_ep) { + pr_err("%s connect request on invalid stid %d\n", + __func__, stid); + goto reject; + } + + if (state_read(&parent_ep->com) != LISTEN) { + pr_err("%s - listening ep not in LISTEN\n", __func__); + goto reject; + } + + if (parent_ep->com.cm_id->tos_set) + tos = parent_ep->com.cm_id->tos; + else + tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); + + cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, + &iptype, local_ip, peer_ip, &local_port, &peer_port); + + /* Find output route */ + if (iptype == 4) { + pr_debug("parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n" + , parent_ep, hwtid, + local_ip, peer_ip, ntohs(local_port), + ntohs(peer_port), peer_mss); + dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + *(__be32 *)local_ip, *(__be32 *)peer_ip, + local_port, peer_port, tos); + } else { + pr_debug("parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n" + , parent_ep, hwtid, + local_ip, peer_ip, ntohs(local_port), + ntohs(peer_port), peer_mss); + dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, + local_ip, peer_ip, local_port, peer_port, + tos, + ((struct sockaddr_in6 *) + &parent_ep->com.local_addr)->sin6_scope_id); + } + if (!dst) { + pr_err("%s - failed to find dst entry!\n", __func__); + goto reject; + } + + child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); + if (!child_ep) { + pr_err("%s - failed to allocate ep entry!\n", __func__); + dst_release(dst); + goto reject; + } + + err = import_ep(child_ep, iptype, peer_ip, dst, dev, false, + parent_ep->com.dev->rdev.lldi.adapter_type, tos); + if (err) { + pr_err("%s - failed to allocate l2t entry!\n", __func__); + dst_release(dst); + kfree(child_ep); + goto reject; + } + + hdrs = ((iptype == 4) ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) + + sizeof(struct tcphdr) + + ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0); + if (peer_mss && child_ep->mtu > (peer_mss + hdrs)) + child_ep->mtu = peer_mss + hdrs; + + skb_queue_head_init(&child_ep->com.ep_skb_list); + if (alloc_ep_skb_list(&child_ep->com.ep_skb_list, CN_MAX_CON_BUF)) + goto fail; + + state_set(&child_ep->com, CONNECTING); + child_ep->com.dev = dev; + child_ep->com.cm_id = NULL; + + if (iptype == 4) { + struct sockaddr_in *sin = (struct sockaddr_in *) + &child_ep->com.local_addr; + + sin->sin_family = AF_INET; + sin->sin_port = local_port; + sin->sin_addr.s_addr = *(__be32 *)local_ip; + + sin = (struct sockaddr_in *)&child_ep->com.local_addr; + sin->sin_family = AF_INET; + sin->sin_port = ((struct sockaddr_in *) + &parent_ep->com.local_addr)->sin_port; + sin->sin_addr.s_addr = *(__be32 *)local_ip; + + sin = (struct sockaddr_in *)&child_ep->com.remote_addr; + sin->sin_family = AF_INET; + sin->sin_port = peer_port; + sin->sin_addr.s_addr = *(__be32 *)peer_ip; + } else { + sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; + sin6->sin6_family = PF_INET6; + sin6->sin6_port = local_port; + memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); + + sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; + sin6->sin6_family = PF_INET6; + sin6->sin6_port = ((struct sockaddr_in6 *) + &parent_ep->com.local_addr)->sin6_port; + memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); + + sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr; + sin6->sin6_family = PF_INET6; + sin6->sin6_port = peer_port; + memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16); + } + + c4iw_get_ep(&parent_ep->com); + child_ep->parent_ep = parent_ep; + child_ep->tos = tos; + child_ep->dst = dst; + child_ep->hwtid = hwtid; + + pr_debug("tx_chan %u smac_idx %u rss_qid %u\n", + child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid); + + timer_setup(&child_ep->timer, ep_timeout, 0); + cxgb4_insert_tid(t, child_ep, hwtid, + child_ep->com.local_addr.ss_family); + insert_ep_tid(child_ep); + if (accept_cr(child_ep, skb, req)) { + c4iw_put_ep(&parent_ep->com); + release_ep_resources(child_ep); + } else { + set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); + } + if (iptype == 6) { + sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; + cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, 1); + } + goto out; +fail: + c4iw_put_ep(&child_ep->com); +reject: + reject_cr(dev, hwtid, skb); +out: + if (parent_ep) + c4iw_put_ep(&parent_ep->com); + return 0; +} + +static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct cpl_pass_establish *req = cplhdr(skb); + unsigned int tid = GET_TID(req); + int ret; + u16 tcp_opt = ntohs(req->tcp_opt); + + ep = get_ep_from_tid(dev, tid); + if (!ep) + return 0; + + pr_debug("ep %p tid %u\n", ep, ep->hwtid); + ep->snd_seq = be32_to_cpu(req->snd_isn); + ep->rcv_seq = be32_to_cpu(req->rcv_isn); + ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt); + + pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, tcp_opt); + + set_emss(ep, tcp_opt); + + dst_confirm(ep->dst); + mutex_lock(&ep->com.mutex); + ep->com.state = MPA_REQ_WAIT; + start_ep_timer(ep); + set_bit(PASS_ESTAB, &ep->com.history); + ret = send_flowc(ep); + mutex_unlock(&ep->com.mutex); + if (ret) + c4iw_ep_disconnect(ep, 1, GFP_KERNEL); + c4iw_put_ep(&ep->com); + + return 0; +} + +static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_peer_close *hdr = cplhdr(skb); + struct c4iw_ep *ep; + struct c4iw_qp_attributes attrs; + int disconnect = 1; + int release = 0; + unsigned int tid = GET_TID(hdr); + int ret; + + ep = get_ep_from_tid(dev, tid); + if (!ep) + return 0; + + pr_debug("ep %p tid %u\n", ep, ep->hwtid); + dst_confirm(ep->dst); + + set_bit(PEER_CLOSE, &ep->com.history); + mutex_lock(&ep->com.mutex); + switch (ep->com.state) { + case MPA_REQ_WAIT: + __state_set(&ep->com, CLOSING); + break; + case MPA_REQ_SENT: + __state_set(&ep->com, CLOSING); + connect_reply_upcall(ep, -ECONNRESET); + break; + case MPA_REQ_RCVD: + + /* + * We're gonna mark this puppy DEAD, but keep + * the reference on it until the ULP accepts or + * rejects the CR. Also wake up anyone waiting + * in rdma connection migration (see c4iw_accept_cr()). + */ + __state_set(&ep->com, CLOSING); + pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid); + c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); + break; + case MPA_REP_SENT: + __state_set(&ep->com, CLOSING); + pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid); + c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); + break; + case FPDU_MODE: + start_ep_timer(ep); + __state_set(&ep->com, CLOSING); + attrs.next_state = C4IW_QP_STATE_CLOSING; + ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + if (ret != -ECONNRESET) { + peer_close_upcall(ep); + disconnect = 1; + } + break; + case ABORTING: + disconnect = 0; + break; + case CLOSING: + __state_set(&ep->com, MORIBUND); + disconnect = 0; + break; + case MORIBUND: + (void)stop_ep_timer(ep); + if (ep->com.cm_id && ep->com.qp) { + attrs.next_state = C4IW_QP_STATE_IDLE; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + } + close_complete_upcall(ep, 0); + __state_set(&ep->com, DEAD); + release = 1; + disconnect = 0; + break; + case DEAD: + disconnect = 0; + break; + default: + WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); + } + mutex_unlock(&ep->com.mutex); + if (disconnect) + c4iw_ep_disconnect(ep, 0, GFP_KERNEL); + if (release) + release_ep_resources(ep); + c4iw_put_ep(&ep->com); + return 0; +} + +static void finish_peer_abort(struct c4iw_dev *dev, struct c4iw_ep *ep) +{ + complete_cached_srq_buffers(ep, ep->srqe_idx); + if (ep->com.cm_id && ep->com.qp) { + struct c4iw_qp_attributes attrs; + + attrs.next_state = C4IW_QP_STATE_ERROR; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + } + peer_abort_upcall(ep); + release_ep_resources(ep); + c4iw_put_ep(&ep->com); +} + +static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_abort_req_rss6 *req = cplhdr(skb); + struct c4iw_ep *ep; + struct sk_buff *rpl_skb; + struct c4iw_qp_attributes attrs; + int ret; + int release = 0; + unsigned int tid = GET_TID(req); + u8 status; + u32 srqidx; + + u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); + + ep = get_ep_from_tid(dev, tid); + if (!ep) + return 0; + + status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status)); + + if (cxgb_is_neg_adv(status)) { + pr_debug("Negative advice on abort- tid %u status %d (%s)\n", + ep->hwtid, status, neg_adv_str(status)); + ep->stats.abort_neg_adv++; + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.neg_adv++; + mutex_unlock(&dev->rdev.stats.lock); + goto deref_ep; + } + + pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, + ep->com.state); + set_bit(PEER_ABORT, &ep->com.history); + + /* + * Wake up any threads in rdma_init() or rdma_fini(). + * However, this is not needed if com state is just + * MPA_REQ_SENT + */ + if (ep->com.state != MPA_REQ_SENT) + c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); + + mutex_lock(&ep->com.mutex); + switch (ep->com.state) { + case CONNECTING: + c4iw_put_ep(&ep->parent_ep->com); + break; + case MPA_REQ_WAIT: + (void)stop_ep_timer(ep); + break; + case MPA_REQ_SENT: + (void)stop_ep_timer(ep); + if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 || + (mpa_rev == 2 && ep->tried_with_mpa_v1)) + connect_reply_upcall(ep, -ECONNRESET); + else { + /* + * we just don't send notification upwards because we + * want to retry with mpa_v1 without upper layers even + * knowing it. + * + * do some housekeeping so as to re-initiate the + * connection + */ + pr_info("%s: mpa_rev=%d. Retrying with mpav1\n", + __func__, mpa_rev); + ep->retry_with_mpa_v1 = 1; + } + break; + case MPA_REP_SENT: + break; + case MPA_REQ_RCVD: + break; + case MORIBUND: + case CLOSING: + stop_ep_timer(ep); + fallthrough; + case FPDU_MODE: + if (ep->com.qp && ep->com.qp->srq) { + srqidx = ABORT_RSS_SRQIDX_G( + be32_to_cpu(req->srqidx_status)); + if (srqidx) { + complete_cached_srq_buffers(ep, srqidx); + } else { + /* Hold ep ref until finish_peer_abort() */ + c4iw_get_ep(&ep->com); + __state_set(&ep->com, ABORTING); + set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags); + read_tcb(ep); + break; + + } + } + + if (ep->com.cm_id && ep->com.qp) { + attrs.next_state = C4IW_QP_STATE_ERROR; + ret = c4iw_modify_qp(ep->com.qp->rhp, + ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, + &attrs, 1); + if (ret) + pr_err("%s - qp <- error failed!\n", __func__); + } + peer_abort_upcall(ep); + break; + case ABORTING: + break; + case DEAD: + pr_warn("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); + mutex_unlock(&ep->com.mutex); + goto deref_ep; + default: + WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); + break; + } + dst_confirm(ep->dst); + if (ep->com.state != ABORTING) { + __state_set(&ep->com, DEAD); + /* we don't release if we want to retry with mpa_v1 */ + if (!ep->retry_with_mpa_v1) + release = 1; + } + mutex_unlock(&ep->com.mutex); + + rpl_skb = skb_dequeue(&ep->com.ep_skb_list); + if (WARN_ON(!rpl_skb)) { + release = 1; + goto out; + } + + cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx); + + c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb); +out: + if (release) + release_ep_resources(ep); + else if (ep->retry_with_mpa_v1) { + if (ep->com.remote_addr.ss_family == AF_INET6) { + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *) + &ep->com.local_addr; + cxgb4_clip_release( + ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, + 1); + } + xa_erase_irq(&ep->com.dev->hwtids, ep->hwtid); + cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid, + ep->com.local_addr.ss_family); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_reconnect(ep); + } + +deref_ep: + c4iw_put_ep(&ep->com); + /* Dereferencing ep, referenced in peer_abort_intr() */ + c4iw_put_ep(&ep->com); + return 0; +} + +static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct c4iw_qp_attributes attrs; + struct cpl_close_con_rpl *rpl = cplhdr(skb); + int release = 0; + unsigned int tid = GET_TID(rpl); + + ep = get_ep_from_tid(dev, tid); + if (!ep) + return 0; + + pr_debug("ep %p tid %u\n", ep, ep->hwtid); + + /* The cm_id may be null if we failed to connect */ + mutex_lock(&ep->com.mutex); + set_bit(CLOSE_CON_RPL, &ep->com.history); + switch (ep->com.state) { + case CLOSING: + __state_set(&ep->com, MORIBUND); + break; + case MORIBUND: + (void)stop_ep_timer(ep); + if ((ep->com.cm_id) && (ep->com.qp)) { + attrs.next_state = C4IW_QP_STATE_IDLE; + c4iw_modify_qp(ep->com.qp->rhp, + ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, + &attrs, 1); + } + close_complete_upcall(ep, 0); + __state_set(&ep->com, DEAD); + release = 1; + break; + case ABORTING: + case DEAD: + break; + default: + WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); + break; + } + mutex_unlock(&ep->com.mutex); + if (release) + release_ep_resources(ep); + c4iw_put_ep(&ep->com); + return 0; +} + +static int terminate(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_rdma_terminate *rpl = cplhdr(skb); + unsigned int tid = GET_TID(rpl); + struct c4iw_ep *ep; + struct c4iw_qp_attributes attrs; + + ep = get_ep_from_tid(dev, tid); + + if (ep) { + if (ep->com.qp) { + pr_warn("TERM received tid %u qpid %u\n", tid, + ep->com.qp->wq.sq.qid); + attrs.next_state = C4IW_QP_STATE_TERMINATE; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + } + + /* As per draft-hilland-iwarp-verbs-v1.0, sec 6.2.3, + * when entering the TERM state the RNIC MUST initiate a CLOSE. + */ + c4iw_ep_disconnect(ep, 1, GFP_KERNEL); + c4iw_put_ep(&ep->com); + } else + pr_warn("TERM received tid %u no ep/qp\n", tid); + + return 0; +} + +/* + * Upcall from the adapter indicating data has been transmitted. + * For us its just the single MPA request or reply. We can now free + * the skb holding the mpa message. + */ +static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct cpl_fw4_ack *hdr = cplhdr(skb); + u8 credits = hdr->credits; + unsigned int tid = GET_TID(hdr); + + + ep = get_ep_from_tid(dev, tid); + if (!ep) + return 0; + pr_debug("ep %p tid %u credits %u\n", + ep, ep->hwtid, credits); + if (credits == 0) { + pr_debug("0 credit ack ep %p tid %u state %u\n", + ep, ep->hwtid, state_read(&ep->com)); + goto out; + } + + dst_confirm(ep->dst); + if (ep->mpa_skb) { + pr_debug("last streaming msg ack ep %p tid %u state %u initiator %u freeing skb\n", + ep, ep->hwtid, state_read(&ep->com), + ep->mpa_attr.initiator ? 1 : 0); + mutex_lock(&ep->com.mutex); + kfree_skb(ep->mpa_skb); + ep->mpa_skb = NULL; + if (test_bit(STOP_MPA_TIMER, &ep->com.flags)) + stop_ep_timer(ep); + mutex_unlock(&ep->com.mutex); + } +out: + c4iw_put_ep(&ep->com); + return 0; +} + +int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) +{ + int abort; + struct c4iw_ep *ep = to_ep(cm_id); + + pr_debug("ep %p tid %u\n", ep, ep->hwtid); + + mutex_lock(&ep->com.mutex); + if (ep->com.state != MPA_REQ_RCVD) { + mutex_unlock(&ep->com.mutex); + c4iw_put_ep(&ep->com); + return -ECONNRESET; + } + set_bit(ULP_REJECT, &ep->com.history); + if (mpa_rev == 0) + abort = 1; + else + abort = send_mpa_reject(ep, pdata, pdata_len); + mutex_unlock(&ep->com.mutex); + + stop_ep_timer(ep); + c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL); + c4iw_put_ep(&ep->com); + return 0; +} + +int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + int err; + struct c4iw_qp_attributes attrs; + enum c4iw_qp_attr_mask mask; + struct c4iw_ep *ep = to_ep(cm_id); + struct c4iw_dev *h = to_c4iw_dev(cm_id->device); + struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); + int abort = 0; + + pr_debug("ep %p tid %u\n", ep, ep->hwtid); + + mutex_lock(&ep->com.mutex); + if (ep->com.state != MPA_REQ_RCVD) { + err = -ECONNRESET; + goto err_out; + } + + if (!qp) { + err = -EINVAL; + goto err_out; + } + + set_bit(ULP_ACCEPT, &ep->com.history); + if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) || + (conn_param->ird > cur_max_read_depth(ep->com.dev))) { + err = -EINVAL; + goto err_abort; + } + + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { + if (conn_param->ord > ep->ird) { + if (RELAXED_IRD_NEGOTIATION) { + conn_param->ord = ep->ird; + } else { + ep->ird = conn_param->ird; + ep->ord = conn_param->ord; + send_mpa_reject(ep, conn_param->private_data, + conn_param->private_data_len); + err = -ENOMEM; + goto err_abort; + } + } + if (conn_param->ird < ep->ord) { + if (RELAXED_IRD_NEGOTIATION && + ep->ord <= h->rdev.lldi.max_ordird_qp) { + conn_param->ird = ep->ord; + } else { + err = -ENOMEM; + goto err_abort; + } + } + } + ep->ird = conn_param->ird; + ep->ord = conn_param->ord; + + if (ep->mpa_attr.version == 1) { + if (peer2peer && ep->ird == 0) + ep->ird = 1; + } else { + if (peer2peer && + (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) && + (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0) + ep->ird = 1; + } + + pr_debug("ird %d ord %d\n", ep->ird, ep->ord); + + ep->com.cm_id = cm_id; + ref_cm_id(&ep->com); + ep->com.qp = qp; + ref_qp(ep); + + /* bind QP to EP and move to RTS */ + attrs.mpa_attr = ep->mpa_attr; + attrs.max_ird = ep->ird; + attrs.max_ord = ep->ord; + attrs.llp_stream_handle = ep; + attrs.next_state = C4IW_QP_STATE_RTS; + + /* bind QP and TID with INIT_WR */ + mask = C4IW_QP_ATTR_NEXT_STATE | + C4IW_QP_ATTR_LLP_STREAM_HANDLE | + C4IW_QP_ATTR_MPA_ATTR | + C4IW_QP_ATTR_MAX_IRD | + C4IW_QP_ATTR_MAX_ORD; + + err = c4iw_modify_qp(ep->com.qp->rhp, + ep->com.qp, mask, &attrs, 1); + if (err) + goto err_deref_cm_id; + + set_bit(STOP_MPA_TIMER, &ep->com.flags); + err = send_mpa_reply(ep, conn_param->private_data, + conn_param->private_data_len); + if (err) + goto err_deref_cm_id; + + __state_set(&ep->com, FPDU_MODE); + established_upcall(ep); + mutex_unlock(&ep->com.mutex); + c4iw_put_ep(&ep->com); + return 0; +err_deref_cm_id: + deref_cm_id(&ep->com); +err_abort: + abort = 1; +err_out: + mutex_unlock(&ep->com.mutex); + if (abort) + c4iw_ep_disconnect(ep, 1, GFP_KERNEL); + c4iw_put_ep(&ep->com); + return err; +} + +static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id) +{ + struct in_device *ind; + int found = 0; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; + const struct in_ifaddr *ifa; + + ind = in_dev_get(dev->rdev.lldi.ports[0]); + if (!ind) + return -EADDRNOTAVAIL; + rcu_read_lock(); + in_dev_for_each_ifa_rcu(ifa, ind) { + if (ifa->ifa_flags & IFA_F_SECONDARY) + continue; + laddr->sin_addr.s_addr = ifa->ifa_address; + raddr->sin_addr.s_addr = ifa->ifa_address; + found = 1; + break; + } + rcu_read_unlock(); + + in_dev_put(ind); + return found ? 0 : -EADDRNOTAVAIL; +} + +static int get_lladdr(struct net_device *dev, struct in6_addr *addr, + unsigned char banned_flags) +{ + struct inet6_dev *idev; + int err = -EADDRNOTAVAIL; + + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev != NULL) { + struct inet6_ifaddr *ifp; + + read_lock_bh(&idev->lock); + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (ifp->scope == IFA_LINK && + !(ifp->flags & banned_flags)) { + memcpy(addr, &ifp->addr, 16); + err = 0; + break; + } + } + read_unlock_bh(&idev->lock); + } + rcu_read_unlock(); + return err; +} + +static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id) +{ + struct in6_addr addr; + struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->m_local_addr; + struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr; + + if (!get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) { + memcpy(la6->sin6_addr.s6_addr, &addr, 16); + memcpy(ra6->sin6_addr.s6_addr, &addr, 16); + return 0; + } + return -EADDRNOTAVAIL; +} + +int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); + struct c4iw_ep *ep; + int err = 0; + struct sockaddr_in *laddr; + struct sockaddr_in *raddr; + struct sockaddr_in6 *laddr6; + struct sockaddr_in6 *raddr6; + __u8 *ra; + int iptype; + + if ((conn_param->ord > cur_max_read_depth(dev)) || + (conn_param->ird > cur_max_read_depth(dev))) { + err = -EINVAL; + goto out; + } + ep = alloc_ep(sizeof(*ep), GFP_KERNEL); + if (!ep) { + pr_err("%s - cannot alloc ep\n", __func__); + err = -ENOMEM; + goto out; + } + + skb_queue_head_init(&ep->com.ep_skb_list); + if (alloc_ep_skb_list(&ep->com.ep_skb_list, CN_MAX_CON_BUF)) { + err = -ENOMEM; + goto fail1; + } + + timer_setup(&ep->timer, ep_timeout, 0); + ep->plen = conn_param->private_data_len; + if (ep->plen) + memcpy(ep->mpa_pkt + sizeof(struct mpa_message), + conn_param->private_data, ep->plen); + ep->ird = conn_param->ird; + ep->ord = conn_param->ord; + + if (peer2peer && ep->ord == 0) + ep->ord = 1; + + ep->com.cm_id = cm_id; + ref_cm_id(&ep->com); + cm_id->provider_data = ep; + ep->com.dev = dev; + ep->com.qp = get_qhp(dev, conn_param->qpn); + if (!ep->com.qp) { + pr_warn("%s qpn 0x%x not found!\n", __func__, conn_param->qpn); + err = -EINVAL; + goto fail2; + } + ref_qp(ep); + pr_debug("qpn 0x%x qp %p cm_id %p\n", conn_param->qpn, + ep->com.qp, cm_id); + + /* + * Allocate an active TID to initiate a TCP connection. + */ + ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep); + if (ep->atid == -1) { + pr_err("%s - cannot alloc atid\n", __func__); + err = -ENOMEM; + goto fail2; + } + err = xa_insert_irq(&dev->atids, ep->atid, ep, GFP_KERNEL); + if (err) + goto fail5; + + memcpy(&ep->com.local_addr, &cm_id->m_local_addr, + sizeof(ep->com.local_addr)); + memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr, + sizeof(ep->com.remote_addr)); + + laddr = (struct sockaddr_in *)&ep->com.local_addr; + raddr = (struct sockaddr_in *)&ep->com.remote_addr; + laddr6 = (struct sockaddr_in6 *)&ep->com.local_addr; + raddr6 = (struct sockaddr_in6 *) &ep->com.remote_addr; + + if (cm_id->m_remote_addr.ss_family == AF_INET) { + iptype = 4; + ra = (__u8 *)&raddr->sin_addr; + + /* + * Handle loopback requests to INADDR_ANY. + */ + if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) { + err = pick_local_ipaddrs(dev, cm_id); + if (err) + goto fail3; + } + + /* find a route */ + pr_debug("saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n", + &laddr->sin_addr, ntohs(laddr->sin_port), + ra, ntohs(raddr->sin_port)); + ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, + laddr->sin_port, + raddr->sin_port, cm_id->tos); + } else { + iptype = 6; + ra = (__u8 *)&raddr6->sin6_addr; + + /* + * Handle loopback requests to INADDR_ANY. + */ + if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) { + err = pick_local_ip6addrs(dev, cm_id); + if (err) + goto fail3; + } + + /* find a route */ + pr_debug("saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n", + laddr6->sin6_addr.s6_addr, + ntohs(laddr6->sin6_port), + raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port)); + ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, + laddr6->sin6_addr.s6_addr, + raddr6->sin6_addr.s6_addr, + laddr6->sin6_port, + raddr6->sin6_port, cm_id->tos, + raddr6->sin6_scope_id); + } + if (!ep->dst) { + pr_err("%s - cannot find route\n", __func__); + err = -EHOSTUNREACH; + goto fail3; + } + + err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true, + ep->com.dev->rdev.lldi.adapter_type, cm_id->tos); + if (err) { + pr_err("%s - cannot alloc l2e\n", __func__); + goto fail4; + } + + pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", + ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, + ep->l2t->idx); + + state_set(&ep->com, CONNECTING); + ep->tos = cm_id->tos; + + /* send connect request to rnic */ + err = send_connect(ep); + if (!err) + goto out; + + cxgb4_l2t_release(ep->l2t); +fail4: + dst_release(ep->dst); +fail3: + xa_erase_irq(&ep->com.dev->atids, ep->atid); +fail5: + cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); +fail2: + skb_queue_purge(&ep->com.ep_skb_list); + deref_cm_id(&ep->com); +fail1: + c4iw_put_ep(&ep->com); +out: + return err; +} + +static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) +{ + int err; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) + &ep->com.local_addr; + + if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) { + err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, 1); + if (err) + return err; + } + c4iw_init_wr_wait(ep->com.wr_waitp); + err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0], + ep->stid, &sin6->sin6_addr, + sin6->sin6_port, + ep->com.dev->rdev.lldi.rxq_ids[0]); + if (!err) + err = c4iw_wait_for_reply(&ep->com.dev->rdev, + ep->com.wr_waitp, + 0, 0, __func__); + else if (err > 0) + err = net_xmit_errno(err); + if (err) { + cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, 1); + pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n", + err, ep->stid, + sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port)); + } + return err; +} + +static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) +{ + int err; + struct sockaddr_in *sin = (struct sockaddr_in *) + &ep->com.local_addr; + + if (dev->rdev.lldi.enable_fw_ofld_conn) { + do { + err = cxgb4_create_server_filter( + ep->com.dev->rdev.lldi.ports[0], ep->stid, + sin->sin_addr.s_addr, sin->sin_port, 0, + ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0); + if (err == -EBUSY) { + if (c4iw_fatal_error(&ep->com.dev->rdev)) { + err = -EIO; + break; + } + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(100)); + } + } while (err == -EBUSY); + } else { + c4iw_init_wr_wait(ep->com.wr_waitp); + err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], + ep->stid, sin->sin_addr.s_addr, sin->sin_port, + 0, ep->com.dev->rdev.lldi.rxq_ids[0]); + if (!err) + err = c4iw_wait_for_reply(&ep->com.dev->rdev, + ep->com.wr_waitp, + 0, 0, __func__); + else if (err > 0) + err = net_xmit_errno(err); + } + if (err) + pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n" + , err, ep->stid, + &sin->sin_addr, ntohs(sin->sin_port)); + return err; +} + +int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) +{ + int err = 0; + struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); + struct c4iw_listen_ep *ep; + + might_sleep(); + + ep = alloc_ep(sizeof(*ep), GFP_KERNEL); + if (!ep) { + pr_err("%s - cannot alloc ep\n", __func__); + err = -ENOMEM; + goto fail1; + } + skb_queue_head_init(&ep->com.ep_skb_list); + pr_debug("ep %p\n", ep); + ep->com.cm_id = cm_id; + ref_cm_id(&ep->com); + ep->com.dev = dev; + ep->backlog = backlog; + memcpy(&ep->com.local_addr, &cm_id->m_local_addr, + sizeof(ep->com.local_addr)); + + /* + * Allocate a server TID. + */ + if (dev->rdev.lldi.enable_fw_ofld_conn && + ep->com.local_addr.ss_family == AF_INET) + ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, + cm_id->m_local_addr.ss_family, ep); + else + ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, + cm_id->m_local_addr.ss_family, ep); + + if (ep->stid == -1) { + pr_err("%s - cannot alloc stid\n", __func__); + err = -ENOMEM; + goto fail2; + } + err = xa_insert_irq(&dev->stids, ep->stid, ep, GFP_KERNEL); + if (err) + goto fail3; + + state_set(&ep->com, LISTEN); + if (ep->com.local_addr.ss_family == AF_INET) + err = create_server4(dev, ep); + else + err = create_server6(dev, ep); + if (!err) { + cm_id->provider_data = ep; + goto out; + } + xa_erase_irq(&ep->com.dev->stids, ep->stid); +fail3: + cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, + ep->com.local_addr.ss_family); +fail2: + deref_cm_id(&ep->com); + c4iw_put_ep(&ep->com); +fail1: +out: + return err; +} + +int c4iw_destroy_listen(struct iw_cm_id *cm_id) +{ + int err; + struct c4iw_listen_ep *ep = to_listen_ep(cm_id); + + pr_debug("ep %p\n", ep); + + might_sleep(); + state_set(&ep->com, DEAD); + if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn && + ep->com.local_addr.ss_family == AF_INET) { + err = cxgb4_remove_server_filter( + ep->com.dev->rdev.lldi.ports[0], ep->stid, + ep->com.dev->rdev.lldi.rxq_ids[0], false); + } else { + struct sockaddr_in6 *sin6; + c4iw_init_wr_wait(ep->com.wr_waitp); + err = cxgb4_remove_server( + ep->com.dev->rdev.lldi.ports[0], ep->stid, + ep->com.dev->rdev.lldi.rxq_ids[0], + ep->com.local_addr.ss_family == AF_INET6); + if (err) + goto done; + err = c4iw_wait_for_reply(&ep->com.dev->rdev, ep->com.wr_waitp, + 0, 0, __func__); + sin6 = (struct sockaddr_in6 *)&ep->com.local_addr; + cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, 1); + } + xa_erase_irq(&ep->com.dev->stids, ep->stid); + cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, + ep->com.local_addr.ss_family); +done: + deref_cm_id(&ep->com); + c4iw_put_ep(&ep->com); + return err; +} + +int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) +{ + int ret = 0; + int close = 0; + int fatal = 0; + struct c4iw_rdev *rdev; + + mutex_lock(&ep->com.mutex); + + pr_debug("ep %p state %s, abrupt %d\n", ep, + states[ep->com.state], abrupt); + + /* + * Ref the ep here in case we have fatal errors causing the + * ep to be released and freed. + */ + c4iw_get_ep(&ep->com); + + rdev = &ep->com.dev->rdev; + if (c4iw_fatal_error(rdev)) { + fatal = 1; + close_complete_upcall(ep, -EIO); + ep->com.state = DEAD; + } + switch (ep->com.state) { + case MPA_REQ_WAIT: + case MPA_REQ_SENT: + case MPA_REQ_RCVD: + case MPA_REP_SENT: + case FPDU_MODE: + case CONNECTING: + close = 1; + if (abrupt) + ep->com.state = ABORTING; + else { + ep->com.state = CLOSING; + + /* + * if we close before we see the fw4_ack() then we fix + * up the timer state since we're reusing it. + */ + if (ep->mpa_skb && + test_bit(STOP_MPA_TIMER, &ep->com.flags)) { + clear_bit(STOP_MPA_TIMER, &ep->com.flags); + stop_ep_timer(ep); + } + start_ep_timer(ep); + } + set_bit(CLOSE_SENT, &ep->com.flags); + break; + case CLOSING: + if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { + close = 1; + if (abrupt) { + (void)stop_ep_timer(ep); + ep->com.state = ABORTING; + } else + ep->com.state = MORIBUND; + } + break; + case MORIBUND: + case ABORTING: + case DEAD: + pr_debug("ignoring disconnect ep %p state %u\n", + ep, ep->com.state); + break; + default: + WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); + break; + } + + if (close) { + if (abrupt) { + set_bit(EP_DISC_ABORT, &ep->com.history); + ret = send_abort(ep); + } else { + set_bit(EP_DISC_CLOSE, &ep->com.history); + ret = send_halfclose(ep); + } + if (ret) { + set_bit(EP_DISC_FAIL, &ep->com.history); + if (!abrupt) { + stop_ep_timer(ep); + close_complete_upcall(ep, -EIO); + } + if (ep->com.qp) { + struct c4iw_qp_attributes attrs; + + attrs.next_state = C4IW_QP_STATE_ERROR; + ret = c4iw_modify_qp(ep->com.qp->rhp, + ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, + &attrs, 1); + if (ret) + pr_err("%s - qp <- error failed!\n", + __func__); + } + fatal = 1; + } + } + mutex_unlock(&ep->com.mutex); + c4iw_put_ep(&ep->com); + if (fatal) + release_ep_resources(ep); + return ret; +} + +static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, + struct cpl_fw6_msg_ofld_connection_wr_rpl *req) +{ + struct c4iw_ep *ep; + int atid = be32_to_cpu(req->tid); + + ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids, + (__force u32) req->tid); + if (!ep) + return; + + switch (req->retval) { + case FW_ENOMEM: + set_bit(ACT_RETRY_NOMEM, &ep->com.history); + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + send_fw_act_open_req(ep, atid); + return; + } + fallthrough; + case FW_EADDRINUSE: + set_bit(ACT_RETRY_INUSE, &ep->com.history); + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + send_fw_act_open_req(ep, atid); + return; + } + break; + default: + pr_info("%s unexpected ofld conn wr retval %d\n", + __func__, req->retval); + break; + } + pr_err("active ofld_connect_wr failure %d atid %d\n", + req->retval, atid); + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.act_ofld_conn_fails++; + mutex_unlock(&dev->rdev.stats.lock); + connect_reply_upcall(ep, status2errno(req->retval)); + state_set(&ep->com, DEAD); + if (ep->com.remote_addr.ss_family == AF_INET6) { + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *)&ep->com.local_addr; + cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, 1); + } + xa_erase_irq(&dev->atids, atid); + cxgb4_free_atid(dev->rdev.lldi.tids, atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_put_ep(&ep->com); +} + +static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, + struct cpl_fw6_msg_ofld_connection_wr_rpl *req) +{ + struct sk_buff *rpl_skb; + struct cpl_pass_accept_req *cpl; + int ret; + + rpl_skb = (struct sk_buff *)(unsigned long)req->cookie; + if (req->retval) { + pr_err("%s passive open failure %d\n", __func__, req->retval); + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.pas_ofld_conn_fails++; + mutex_unlock(&dev->rdev.stats.lock); + kfree_skb(rpl_skb); + } else { + cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb); + OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, + (__force u32) htonl( + (__force u32) req->tid))); + ret = pass_accept_req(dev, rpl_skb); + if (!ret) + kfree_skb(rpl_skb); + } + return; +} + +static inline u64 t4_tcb_get_field64(__be64 *tcb, u16 word) +{ + u64 tlo = be64_to_cpu(tcb[((31 - word) / 2)]); + u64 thi = be64_to_cpu(tcb[((31 - word) / 2) - 1]); + u64 t; + u32 shift = 32; + + t = (thi << shift) | (tlo >> shift); + + return t; +} + +static inline u32 t4_tcb_get_field32(__be64 *tcb, u16 word, u32 mask, u32 shift) +{ + u32 v; + u64 t = be64_to_cpu(tcb[(31 - word) / 2]); + + if (word & 0x1) + shift += 32; + v = (t >> shift) & mask; + return v; +} + +static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_get_tcb_rpl *rpl = cplhdr(skb); + __be64 *tcb = (__be64 *)(rpl + 1); + unsigned int tid = GET_TID(rpl); + struct c4iw_ep *ep; + u64 t_flags_64; + u32 rx_pdu_out; + + ep = get_ep_from_tid(dev, tid); + if (!ep) + return 0; + /* Examine the TF_RX_PDU_OUT (bit 49 of the t_flags) in order to + * determine if there's a rx PDU feedback event pending. + * + * If that bit is set, it means we'll need to re-read the TCB's + * rq_start value. The final value is the one present in a TCB + * with the TF_RX_PDU_OUT bit cleared. + */ + + t_flags_64 = t4_tcb_get_field64(tcb, TCB_T_FLAGS_W); + rx_pdu_out = (t_flags_64 & TF_RX_PDU_OUT_V(1)) >> TF_RX_PDU_OUT_S; + + c4iw_put_ep(&ep->com); /* from get_ep_from_tid() */ + c4iw_put_ep(&ep->com); /* from read_tcb() */ + + /* If TF_RX_PDU_OUT bit is set, re-read the TCB */ + if (rx_pdu_out) { + if (++ep->rx_pdu_out_cnt >= 2) { + WARN_ONCE(1, "tcb re-read() reached the guard limit, finishing the cleanup\n"); + goto cleanup; + } + read_tcb(ep); + return 0; + } + + ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_M, + TCB_RQ_START_S); +cleanup: + pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx); + + if (test_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) + finish_peer_abort(dev, ep); + else if (test_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) + send_abort_req(ep); + else + WARN_ONCE(1, "unexpected state!"); + + return 0; +} + +static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_fw6_msg *rpl = cplhdr(skb); + struct cpl_fw6_msg_ofld_connection_wr_rpl *req; + + switch (rpl->type) { + case FW6_TYPE_CQE: + c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); + break; + case FW6_TYPE_OFLD_CONNECTION_WR_RPL: + req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data; + switch (req->t_state) { + case TCP_SYN_SENT: + active_ofld_conn_reply(dev, skb, req); + break; + case TCP_SYN_RECV: + passive_ofld_conn_reply(dev, skb, req); + break; + default: + pr_err("%s unexpected ofld conn wr state %d\n", + __func__, req->t_state); + break; + } + break; + } + return 0; +} + +static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) +{ + __be32 l2info; + __be16 hdr_len, vlantag, len; + u16 eth_hdr_len; + int tcp_hdr_len, ip_hdr_len; + u8 intf; + struct cpl_rx_pkt *cpl = cplhdr(skb); + struct cpl_pass_accept_req *req; + struct tcp_options_received tmp_opt; + struct c4iw_dev *dev; + enum chip_type type; + + dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); + /* Store values from cpl_rx_pkt in temporary location. */ + vlantag = cpl->vlan; + len = cpl->len; + l2info = cpl->l2info; + hdr_len = cpl->hdr_len; + intf = cpl->iff; + + __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header)); + + /* + * We need to parse the TCP options from SYN packet. + * to generate cpl_pass_accept_req. + */ + memset(&tmp_opt, 0, sizeof(tmp_opt)); + tcp_clear_options(&tmp_opt); + tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL); + + req = __skb_push(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->l2info = cpu_to_be16(SYN_INTF_V(intf) | + SYN_MAC_IDX_V(RX_MACIDX_G( + be32_to_cpu(l2info))) | + SYN_XACT_MATCH_F); + type = dev->rdev.lldi.adapter_type; + tcp_hdr_len = RX_TCPHDR_LEN_G(be16_to_cpu(hdr_len)); + ip_hdr_len = RX_IPHDR_LEN_G(be16_to_cpu(hdr_len)); + req->hdr_len = + cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(be32_to_cpu(l2info)))); + if (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) { + eth_hdr_len = is_t4(type) ? + RX_ETHHDR_LEN_G(be32_to_cpu(l2info)) : + RX_T5_ETHHDR_LEN_G(be32_to_cpu(l2info)); + req->hdr_len |= cpu_to_be32(TCP_HDR_LEN_V(tcp_hdr_len) | + IP_HDR_LEN_V(ip_hdr_len) | + ETH_HDR_LEN_V(eth_hdr_len)); + } else { /* T6 and later */ + eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(l2info)); + req->hdr_len |= cpu_to_be32(T6_TCP_HDR_LEN_V(tcp_hdr_len) | + T6_IP_HDR_LEN_V(ip_hdr_len) | + T6_ETH_HDR_LEN_V(eth_hdr_len)); + } + req->vlan = vlantag; + req->len = len; + req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) | + PASS_OPEN_TOS_V(tos)); + req->tcpopt.mss = htons(tmp_opt.mss_clamp); + if (tmp_opt.wscale_ok) + req->tcpopt.wsf = tmp_opt.snd_wscale; + req->tcpopt.tstamp = tmp_opt.saw_tstamp; + if (tmp_opt.sack_ok) + req->tcpopt.sack = 1; + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0)); + return; +} + +static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, + __be32 laddr, __be16 lport, + __be32 raddr, __be16 rport, + u32 rcv_isn, u32 filter, u16 window, + u32 rss_qid, u8 port_id) +{ + struct sk_buff *req_skb; + struct fw_ofld_connection_wr *req; + struct cpl_pass_accept_req *cpl = cplhdr(skb); + int ret; + + req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL); + if (!req_skb) + return; + req = __skb_put_zero(req_skb, sizeof(*req)); + req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F); + req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16))); + req->le.version_cpl = htonl(FW_OFLD_CONNECTION_WR_CPL_F); + req->le.filter = (__force __be32) filter; + req->le.lport = lport; + req->le.pport = rport; + req->le.u.ipv4.lip = laddr; + req->le.u.ipv4.pip = raddr; + req->tcb.rcv_nxt = htonl(rcv_isn + 1); + req->tcb.rcv_adv = htons(window); + req->tcb.t_state_to_astid = + htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_RECV) | + FW_OFLD_CONNECTION_WR_RCV_SCALE_V(cpl->tcpopt.wsf) | + FW_OFLD_CONNECTION_WR_ASTID_V( + PASS_OPEN_TID_G(ntohl(cpl->tos_stid)))); + + /* + * We store the qid in opt2 which will be used by the firmware + * to send us the wr response. + */ + req->tcb.opt2 = htonl(RSS_QUEUE_V(rss_qid)); + + /* + * We initialize the MSS index in TCB to 0xF. + * So that when driver sends cpl_pass_accept_rpl + * TCB picks up the correct value. If this was 0 + * TP will ignore any value > 0 for MSS index. + */ + req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF)); + req->cookie = (uintptr_t)skb; + + set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); + ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); + if (ret < 0) { + pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__, + ret); + kfree_skb(skb); + kfree_skb(req_skb); + } +} + +/* + * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt + * messages when a filter is being used instead of server to + * redirect a syn packet. When packets hit filter they are redirected + * to the offload queue and driver tries to establish the connection + * using firmware work request. + */ +static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) +{ + int stid; + unsigned int filter; + struct ethhdr *eh = NULL; + struct vlan_ethhdr *vlan_eh = NULL; + struct iphdr *iph; + struct tcphdr *tcph; + struct rss_header *rss = (void *)skb->data; + struct cpl_rx_pkt *cpl = (void *)skb->data; + struct cpl_pass_accept_req *req = (void *)(rss + 1); + struct l2t_entry *e; + struct dst_entry *dst; + struct c4iw_ep *lep = NULL; + u16 window; + struct port_info *pi; + struct net_device *pdev; + u16 rss_qid, eth_hdr_len; + int step; + struct neighbour *neigh; + + /* Drop all non-SYN packets */ + if (!(cpl->l2info & cpu_to_be32(RXF_SYN_F))) + goto reject; + + /* + * Drop all packets which did not hit the filter. + * Unlikely to happen. + */ + if (!(rss->filter_hit && rss->filter_tid)) + goto reject; + + /* + * Calculate the server tid from filter hit index from cpl_rx_pkt. + */ + stid = (__force int) cpu_to_be32((__force u32) rss->hash_val); + + lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid); + if (!lep) { + pr_warn("%s connect request on invalid stid %d\n", + __func__, stid); + goto reject; + } + + switch (CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)) { + case CHELSIO_T4: + eth_hdr_len = RX_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info)); + break; + case CHELSIO_T5: + eth_hdr_len = RX_T5_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info)); + break; + case CHELSIO_T6: + eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info)); + break; + default: + pr_err("T%d Chip is not supported\n", + CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)); + goto reject; + } + + if (eth_hdr_len == ETH_HLEN) { + eh = (struct ethhdr *)(req + 1); + iph = (struct iphdr *)(eh + 1); + } else { + vlan_eh = (struct vlan_ethhdr *)(req + 1); + iph = (struct iphdr *)(vlan_eh + 1); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan)); + } + + if (iph->version != 0x4) + goto reject; + + tcph = (struct tcphdr *)(iph + 1); + skb_set_network_header(skb, (void *)iph - (void *)rss); + skb_set_transport_header(skb, (void *)tcph - (void *)rss); + skb_get(skb); + + pr_debug("lip 0x%x lport %u pip 0x%x pport %u tos %d\n", + ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr), + ntohs(tcph->source), iph->tos); + + dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + iph->daddr, iph->saddr, tcph->dest, + tcph->source, iph->tos); + if (!dst) { + pr_err("%s - failed to find dst entry!\n", __func__); + goto reject; + } + neigh = dst_neigh_lookup_skb(dst, skb); + + if (!neigh) { + pr_err("%s - failed to allocate neigh!\n", __func__); + goto free_dst; + } + + if (neigh->dev->flags & IFF_LOOPBACK) { + pdev = ip_dev_find(&init_net, iph->daddr); + if (!pdev) { + pr_err("%s - failed to find device!\n", __func__); + goto free_dst; + } + e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, + pdev, 0); + pi = (struct port_info *)netdev_priv(pdev); + dev_put(pdev); + } else { + pdev = get_real_dev(neigh->dev); + e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, + pdev, 0); + pi = (struct port_info *)netdev_priv(pdev); + } + neigh_release(neigh); + if (!e) { + pr_err("%s - failed to allocate l2t entry!\n", + __func__); + goto free_dst; + } + + step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; + rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step]; + window = (__force u16) htons((__force u16)tcph->window); + + /* Calcuate filter portion for LE region. */ + filter = (__force unsigned int) cpu_to_be32(cxgb4_select_ntuple( + dev->rdev.lldi.ports[0], + e)); + + /* + * Synthesize the cpl_pass_accept_req. We have everything except the + * TID. Once firmware sends a reply with TID we update the TID field + * in cpl and pass it through the regular cpl_pass_accept_req path. + */ + build_cpl_pass_accept_req(skb, stid, iph->tos); + send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr, + tcph->source, ntohl(tcph->seq), filter, window, + rss_qid, pi->port_id); + cxgb4_l2t_release(e); +free_dst: + dst_release(dst); +reject: + if (lep) + c4iw_put_ep(&lep->com); + return 0; +} + +/* + * These are the real handlers that are called from a + * work queue. + */ +static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = { + [CPL_ACT_ESTABLISH] = act_establish, + [CPL_ACT_OPEN_RPL] = act_open_rpl, + [CPL_RX_DATA] = rx_data, + [CPL_ABORT_RPL_RSS] = abort_rpl, + [CPL_ABORT_RPL] = abort_rpl, + [CPL_PASS_OPEN_RPL] = pass_open_rpl, + [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl, + [CPL_PASS_ACCEPT_REQ] = pass_accept_req, + [CPL_PASS_ESTABLISH] = pass_establish, + [CPL_PEER_CLOSE] = peer_close, + [CPL_ABORT_REQ_RSS] = peer_abort, + [CPL_CLOSE_CON_RPL] = close_con_rpl, + [CPL_RDMA_TERMINATE] = terminate, + [CPL_FW4_ACK] = fw4_ack, + [CPL_GET_TCB_RPL] = read_tcb_rpl, + [CPL_FW6_MSG] = deferred_fw6_msg, + [CPL_RX_PKT] = rx_pkt, + [FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe, + [FAKE_CPL_PASS_PUT_EP_SAFE] = _put_pass_ep_safe +}; + +static void process_timeout(struct c4iw_ep *ep) +{ + struct c4iw_qp_attributes attrs; + int abort = 1; + + mutex_lock(&ep->com.mutex); + pr_debug("ep %p tid %u state %d\n", ep, ep->hwtid, ep->com.state); + set_bit(TIMEDOUT, &ep->com.history); + switch (ep->com.state) { + case MPA_REQ_SENT: + connect_reply_upcall(ep, -ETIMEDOUT); + break; + case MPA_REQ_WAIT: + case MPA_REQ_RCVD: + case MPA_REP_SENT: + case FPDU_MODE: + break; + case CLOSING: + case MORIBUND: + if (ep->com.cm_id && ep->com.qp) { + attrs.next_state = C4IW_QP_STATE_ERROR; + c4iw_modify_qp(ep->com.qp->rhp, + ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, + &attrs, 1); + } + close_complete_upcall(ep, -ETIMEDOUT); + break; + case ABORTING: + case DEAD: + + /* + * These states are expected if the ep timed out at the same + * time as another thread was calling stop_ep_timer(). + * So we silently do nothing for these states. + */ + abort = 0; + break; + default: + WARN(1, "%s unexpected state ep %p tid %u state %u\n", + __func__, ep, ep->hwtid, ep->com.state); + abort = 0; + } + mutex_unlock(&ep->com.mutex); + if (abort) + c4iw_ep_disconnect(ep, 1, GFP_KERNEL); + c4iw_put_ep(&ep->com); +} + +static void process_timedout_eps(void) +{ + struct c4iw_ep *ep; + + spin_lock_irq(&timeout_lock); + while (!list_empty(&timeout_list)) { + struct list_head *tmp; + + tmp = timeout_list.next; + list_del(tmp); + tmp->next = NULL; + tmp->prev = NULL; + spin_unlock_irq(&timeout_lock); + ep = list_entry(tmp, struct c4iw_ep, entry); + process_timeout(ep); + spin_lock_irq(&timeout_lock); + } + spin_unlock_irq(&timeout_lock); +} + +static void process_work(struct work_struct *work) +{ + struct sk_buff *skb = NULL; + struct c4iw_dev *dev; + struct cpl_act_establish *rpl; + unsigned int opcode; + int ret; + + process_timedout_eps(); + while ((skb = skb_dequeue(&rxq))) { + rpl = cplhdr(skb); + dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); + opcode = rpl->ot.opcode; + + if (opcode >= ARRAY_SIZE(work_handlers) || + !work_handlers[opcode]) { + pr_err("No handler for opcode 0x%x.\n", opcode); + kfree_skb(skb); + } else { + ret = work_handlers[opcode](dev, skb); + if (!ret) + kfree_skb(skb); + } + process_timedout_eps(); + } +} + +static DECLARE_WORK(skb_work, process_work); + +static void ep_timeout(struct timer_list *t) +{ + struct c4iw_ep *ep = from_timer(ep, t, timer); + int kickit = 0; + + spin_lock(&timeout_lock); + if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { + /* + * Only insert if it is not already on the list. + */ + if (!ep->entry.next) { + list_add_tail(&ep->entry, &timeout_list); + kickit = 1; + } + } + spin_unlock(&timeout_lock); + if (kickit) + queue_work(workq, &skb_work); +} + +/* + * All the CM events are handled on a work queue to have a safe context. + */ +static int sched(struct c4iw_dev *dev, struct sk_buff *skb) +{ + + /* + * Save dev in the skb->cb area. + */ + *((struct c4iw_dev **) (skb->cb + sizeof(void *))) = dev; + + /* + * Queue the skb and schedule the worker thread. + */ + skb_queue_tail(&rxq, skb); + queue_work(workq, &skb_work); + return 0; +} + +static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_set_tcb_rpl *rpl = cplhdr(skb); + + if (rpl->status != CPL_ERR_NONE) { + pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n", + rpl->status, GET_TID(rpl)); + } + kfree_skb(skb); + return 0; +} + +static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_fw6_msg *rpl = cplhdr(skb); + struct c4iw_wr_wait *wr_waitp; + int ret; + + pr_debug("type %u\n", rpl->type); + + switch (rpl->type) { + case FW6_TYPE_WR_RPL: + ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff); + wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1]; + pr_debug("wr_waitp %p ret %u\n", wr_waitp, ret); + if (wr_waitp) + c4iw_wake_up_deref(wr_waitp, ret ? -ret : 0); + kfree_skb(skb); + break; + case FW6_TYPE_CQE: + case FW6_TYPE_OFLD_CONNECTION_WR_RPL: + sched(dev, skb); + break; + default: + pr_err("%s unexpected fw6 msg type %u\n", + __func__, rpl->type); + kfree_skb(skb); + break; + } + return 0; +} + +static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_abort_req_rss *req = cplhdr(skb); + struct c4iw_ep *ep; + unsigned int tid = GET_TID(req); + + ep = get_ep_from_tid(dev, tid); + /* This EP will be dereferenced in peer_abort() */ + if (!ep) { + pr_warn("Abort on non-existent endpoint, tid %d\n", tid); + kfree_skb(skb); + return 0; + } + if (cxgb_is_neg_adv(req->status)) { + pr_debug("Negative advice on abort- tid %u status %d (%s)\n", + ep->hwtid, req->status, + neg_adv_str(req->status)); + goto out; + } + pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state); + + c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); +out: + sched(dev, skb); + return 0; +} + +/* + * Most upcalls from the T4 Core go to sched() to + * schedule the processing on a work queue. + */ +c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = { + [CPL_ACT_ESTABLISH] = sched, + [CPL_ACT_OPEN_RPL] = sched, + [CPL_RX_DATA] = sched, + [CPL_ABORT_RPL_RSS] = sched, + [CPL_ABORT_RPL] = sched, + [CPL_PASS_OPEN_RPL] = sched, + [CPL_CLOSE_LISTSRV_RPL] = sched, + [CPL_PASS_ACCEPT_REQ] = sched, + [CPL_PASS_ESTABLISH] = sched, + [CPL_PEER_CLOSE] = sched, + [CPL_CLOSE_CON_RPL] = sched, + [CPL_ABORT_REQ_RSS] = peer_abort_intr, + [CPL_RDMA_TERMINATE] = sched, + [CPL_FW4_ACK] = sched, + [CPL_SET_TCB_RPL] = set_tcb_rpl, + [CPL_GET_TCB_RPL] = sched, + [CPL_FW6_MSG] = fw6_msg, + [CPL_RX_PKT] = sched +}; + +int __init c4iw_cm_init(void) +{ + skb_queue_head_init(&rxq); + + workq = alloc_ordered_workqueue("iw_cxgb4", WQ_MEM_RECLAIM); + if (!workq) + return -ENOMEM; + + return 0; +} + +void c4iw_cm_term(void) +{ + WARN_ON(!list_empty(&timeout_list)); + destroy_workqueue(workq); +} diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c new file mode 100644 index 000000000..c7e8d7b3b --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -0,0 +1,1190 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/uverbs_ioctl.h> + +#include "iw_cxgb4.h" + +static void destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, + struct c4iw_dev_ucontext *uctx, struct sk_buff *skb, + struct c4iw_wr_wait *wr_waitp) +{ + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + int wr_len; + + wr_len = sizeof(*res_wr) + sizeof(*res); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = __skb_put_zero(skb, wr_len); + res_wr->op_nres = cpu_to_be32( + FW_WR_OP_V(FW_RI_RES_WR) | + FW_RI_RES_WR_NRES_V(1) | + FW_WR_COMPL_F); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (uintptr_t)wr_waitp; + res = res_wr->res; + res->u.cq.restype = FW_RI_RES_TYPE_CQ; + res->u.cq.op = FW_RI_RES_OP_RESET; + res->u.cq.iqid = cpu_to_be32(cq->cqid); + + c4iw_init_wr_wait(wr_waitp); + c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); + + kfree(cq->sw_queue); + dma_free_coherent(&(rdev->lldi.pdev->dev), + cq->memsize, cq->queue, + dma_unmap_addr(cq, mapping)); + c4iw_put_cqid(rdev, cq->cqid, uctx); +} + +static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, + struct c4iw_dev_ucontext *uctx, + struct c4iw_wr_wait *wr_waitp) +{ + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + int wr_len; + int user = (uctx != &rdev->uctx); + int ret; + struct sk_buff *skb; + struct c4iw_ucontext *ucontext = NULL; + + if (user) + ucontext = container_of(uctx, struct c4iw_ucontext, uctx); + + cq->cqid = c4iw_get_cqid(rdev, uctx); + if (!cq->cqid) { + ret = -ENOMEM; + goto err1; + } + + if (!user) { + cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL); + if (!cq->sw_queue) { + ret = -ENOMEM; + goto err2; + } + } + cq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, cq->memsize, + &cq->dma_addr, GFP_KERNEL); + if (!cq->queue) { + ret = -ENOMEM; + goto err3; + } + dma_unmap_addr_set(cq, mapping, cq->dma_addr); + + if (user && ucontext->is_32b_cqe) { + cq->qp_errp = &((struct t4_status_page *) + ((u8 *)cq->queue + (cq->size - 1) * + (sizeof(*cq->queue) / 2)))->qp_err; + } else { + cq->qp_errp = &((struct t4_status_page *) + ((u8 *)cq->queue + (cq->size - 1) * + sizeof(*cq->queue)))->qp_err; + } + + /* build fw_ri_res_wr */ + wr_len = sizeof(*res_wr) + sizeof(*res); + + skb = alloc_skb(wr_len, GFP_KERNEL); + if (!skb) { + ret = -ENOMEM; + goto err4; + } + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = __skb_put_zero(skb, wr_len); + res_wr->op_nres = cpu_to_be32( + FW_WR_OP_V(FW_RI_RES_WR) | + FW_RI_RES_WR_NRES_V(1) | + FW_WR_COMPL_F); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (uintptr_t)wr_waitp; + res = res_wr->res; + res->u.cq.restype = FW_RI_RES_TYPE_CQ; + res->u.cq.op = FW_RI_RES_OP_WRITE; + res->u.cq.iqid = cpu_to_be32(cq->cqid); + res->u.cq.iqandst_to_iqandstindex = cpu_to_be32( + FW_RI_RES_WR_IQANUS_V(0) | + FW_RI_RES_WR_IQANUD_V(1) | + FW_RI_RES_WR_IQANDST_F | + FW_RI_RES_WR_IQANDSTINDEX_V( + rdev->lldi.ciq_ids[cq->vector])); + res->u.cq.iqdroprss_to_iqesize = cpu_to_be16( + FW_RI_RES_WR_IQDROPRSS_F | + FW_RI_RES_WR_IQPCIECH_V(2) | + FW_RI_RES_WR_IQINTCNTTHRESH_V(0) | + FW_RI_RES_WR_IQO_F | + ((user && ucontext->is_32b_cqe) ? + FW_RI_RES_WR_IQESIZE_V(1) : + FW_RI_RES_WR_IQESIZE_V(2))); + res->u.cq.iqsize = cpu_to_be16(cq->size); + res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr); + + c4iw_init_wr_wait(wr_waitp); + ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); + if (ret) + goto err4; + + cq->gen = 1; + cq->gts = rdev->lldi.gts_reg; + cq->rdev = rdev; + + cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, CXGB4_BAR2_QTYPE_INGRESS, + &cq->bar2_qid, + user ? &cq->bar2_pa : NULL); + if (user && !cq->bar2_pa) { + pr_warn("%s: cqid %u not in BAR2 range\n", + pci_name(rdev->lldi.pdev), cq->cqid); + ret = -EINVAL; + goto err4; + } + return 0; +err4: + dma_free_coherent(&rdev->lldi.pdev->dev, cq->memsize, cq->queue, + dma_unmap_addr(cq, mapping)); +err3: + kfree(cq->sw_queue); +err2: + c4iw_put_cqid(rdev, cq->cqid, uctx); +err1: + return ret; +} + +static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq, u32 srqidx) +{ + struct t4_cqe cqe; + + pr_debug("wq %p cq %p sw_cidx %u sw_pidx %u\n", + wq, cq, cq->sw_cidx, cq->sw_pidx); + memset(&cqe, 0, sizeof(cqe)); + cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | + CQE_OPCODE_V(FW_RI_SEND) | + CQE_TYPE_V(0) | + CQE_SWCQE_V(1) | + CQE_QPID_V(wq->sq.qid)); + cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); + if (srqidx) + cqe.u.srcqe.abs_rqe_idx = cpu_to_be32(srqidx); + cq->sw_queue[cq->sw_pidx] = cqe; + t4_swcq_produce(cq); +} + +int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count) +{ + int flushed = 0; + int in_use = wq->rq.in_use - count; + + pr_debug("wq %p cq %p rq.in_use %u skip count %u\n", + wq, cq, wq->rq.in_use, count); + while (in_use--) { + insert_recv_cqe(wq, cq, 0); + flushed++; + } + return flushed; +} + +static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq, + struct t4_swsqe *swcqe) +{ + struct t4_cqe cqe; + + pr_debug("wq %p cq %p sw_cidx %u sw_pidx %u\n", + wq, cq, cq->sw_cidx, cq->sw_pidx); + memset(&cqe, 0, sizeof(cqe)); + cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | + CQE_OPCODE_V(swcqe->opcode) | + CQE_TYPE_V(1) | + CQE_SWCQE_V(1) | + CQE_QPID_V(wq->sq.qid)); + CQE_WRID_SQ_IDX(&cqe) = swcqe->idx; + cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); + cq->sw_queue[cq->sw_pidx] = cqe; + t4_swcq_produce(cq); +} + +static void advance_oldest_read(struct t4_wq *wq); + +int c4iw_flush_sq(struct c4iw_qp *qhp) +{ + int flushed = 0; + struct t4_wq *wq = &qhp->wq; + struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq); + struct t4_cq *cq = &chp->cq; + int idx; + struct t4_swsqe *swsqe; + + if (wq->sq.flush_cidx == -1) + wq->sq.flush_cidx = wq->sq.cidx; + idx = wq->sq.flush_cidx; + while (idx != wq->sq.pidx) { + swsqe = &wq->sq.sw_sq[idx]; + swsqe->flushed = 1; + insert_sq_cqe(wq, cq, swsqe); + if (wq->sq.oldest_read == swsqe) { + advance_oldest_read(wq); + } + flushed++; + if (++idx == wq->sq.size) + idx = 0; + } + wq->sq.flush_cidx += flushed; + if (wq->sq.flush_cidx >= wq->sq.size) + wq->sq.flush_cidx -= wq->sq.size; + return flushed; +} + +static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) +{ + struct t4_swsqe *swsqe; + int cidx; + + if (wq->sq.flush_cidx == -1) + wq->sq.flush_cidx = wq->sq.cidx; + cidx = wq->sq.flush_cidx; + + while (cidx != wq->sq.pidx) { + swsqe = &wq->sq.sw_sq[cidx]; + if (!swsqe->signaled) { + if (++cidx == wq->sq.size) + cidx = 0; + } else if (swsqe->complete) { + + /* + * Insert this completed cqe into the swcq. + */ + pr_debug("moving cqe into swcq sq idx %u cq idx %u\n", + cidx, cq->sw_pidx); + swsqe->cqe.header |= htonl(CQE_SWCQE_V(1)); + cq->sw_queue[cq->sw_pidx] = swsqe->cqe; + t4_swcq_produce(cq); + swsqe->flushed = 1; + if (++cidx == wq->sq.size) + cidx = 0; + wq->sq.flush_cidx = cidx; + } else + break; + } +} + +static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe, + struct t4_cqe *read_cqe) +{ + read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx; + read_cqe->len = htonl(wq->sq.oldest_read->read_len); + read_cqe->header = htonl(CQE_QPID_V(CQE_QPID(hw_cqe)) | + CQE_SWCQE_V(SW_CQE(hw_cqe)) | + CQE_OPCODE_V(FW_RI_READ_REQ) | + CQE_TYPE_V(1)); + read_cqe->bits_type_ts = hw_cqe->bits_type_ts; +} + +static void advance_oldest_read(struct t4_wq *wq) +{ + + u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1; + + if (rptr == wq->sq.size) + rptr = 0; + while (rptr != wq->sq.pidx) { + wq->sq.oldest_read = &wq->sq.sw_sq[rptr]; + + if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ) + return; + if (++rptr == wq->sq.size) + rptr = 0; + } + wq->sq.oldest_read = NULL; +} + +/* + * Move all CQEs from the HWCQ into the SWCQ. + * Deal with out-of-order and/or completions that complete + * prior unsignalled WRs. + */ +void c4iw_flush_hw_cq(struct c4iw_cq *chp, struct c4iw_qp *flush_qhp) +{ + struct t4_cqe *hw_cqe, *swcqe, read_cqe; + struct c4iw_qp *qhp; + struct t4_swsqe *swsqe; + int ret; + + pr_debug("cqid 0x%x\n", chp->cq.cqid); + ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); + + /* + * This logic is similar to poll_cq(), but not quite the same + * unfortunately. Need to move pertinent HW CQEs to the SW CQ but + * also do any translation magic that poll_cq() normally does. + */ + while (!ret) { + qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe)); + + /* + * drop CQEs with no associated QP + */ + if (qhp == NULL) + goto next_cqe; + + if (flush_qhp != qhp) { + spin_lock(&qhp->lock); + + if (qhp->wq.flushed == 1) + goto next_cqe; + } + + if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) + goto next_cqe; + + if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) { + + /* If we have reached here because of async + * event or other error, and have egress error + * then drop + */ + if (CQE_TYPE(hw_cqe) == 1) + goto next_cqe; + + /* drop peer2peer RTR reads. + */ + if (CQE_WRID_STAG(hw_cqe) == 1) + goto next_cqe; + + /* + * Eat completions for unsignaled read WRs. + */ + if (!qhp->wq.sq.oldest_read->signaled) { + advance_oldest_read(&qhp->wq); + goto next_cqe; + } + + /* + * Don't write to the HWCQ, create a new read req CQE + * in local memory and move it into the swcq. + */ + create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe); + hw_cqe = &read_cqe; + advance_oldest_read(&qhp->wq); + } + + /* if its a SQ completion, then do the magic to move all the + * unsignaled and now in-order completions into the swcq. + */ + if (SQ_TYPE(hw_cqe)) { + swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; + swsqe->cqe = *hw_cqe; + swsqe->complete = 1; + flush_completed_wrs(&qhp->wq, &chp->cq); + } else { + swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx]; + *swcqe = *hw_cqe; + swcqe->header |= cpu_to_be32(CQE_SWCQE_V(1)); + t4_swcq_produce(&chp->cq); + } +next_cqe: + t4_hwcq_consume(&chp->cq); + ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); + if (qhp && flush_qhp != qhp) + spin_unlock(&qhp->lock); + } +} + +static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) +{ + if (DRAIN_CQE(cqe)) { + WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid); + return 0; + } + + if (CQE_OPCODE(cqe) == FW_RI_TERMINATE) + return 0; + + if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe)) + return 0; + + if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe)) + return 0; + + if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq)) + return 0; + return 1; +} + +void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) +{ + struct t4_cqe *cqe; + u32 ptr; + + *count = 0; + pr_debug("count zero %d\n", *count); + ptr = cq->sw_cidx; + while (ptr != cq->sw_pidx) { + cqe = &cq->sw_queue[ptr]; + if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) && + (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq)) + (*count)++; + if (++ptr == cq->size) + ptr = 0; + } + pr_debug("cq %p count %d\n", cq, *count); +} + +static void post_pending_srq_wrs(struct t4_srq *srq) +{ + struct t4_srq_pending_wr *pwr; + u16 idx = 0; + + while (srq->pending_in_use) { + pwr = &srq->pending_wrs[srq->pending_cidx]; + srq->sw_rq[srq->pidx].wr_id = pwr->wr_id; + srq->sw_rq[srq->pidx].valid = 1; + + pr_debug("%s posting pending cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n", + __func__, + srq->cidx, srq->pidx, srq->wq_pidx, + srq->in_use, srq->size, + (unsigned long long)pwr->wr_id); + + c4iw_copy_wr_to_srq(srq, &pwr->wqe, pwr->len16); + t4_srq_consume_pending_wr(srq); + t4_srq_produce(srq, pwr->len16); + idx += DIV_ROUND_UP(pwr->len16 * 16, T4_EQ_ENTRY_SIZE); + } + + if (idx) { + t4_ring_srq_db(srq, idx, pwr->len16, &pwr->wqe); + srq->queue[srq->size].status.host_wq_pidx = + srq->wq_pidx; + } +} + +static u64 reap_srq_cqe(struct t4_cqe *hw_cqe, struct t4_srq *srq) +{ + int rel_idx = CQE_ABS_RQE_IDX(hw_cqe) - srq->rqt_abs_idx; + u64 wr_id; + + srq->sw_rq[rel_idx].valid = 0; + wr_id = srq->sw_rq[rel_idx].wr_id; + + if (rel_idx == srq->cidx) { + pr_debug("%s in order cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n", + __func__, rel_idx, srq->cidx, srq->pidx, + srq->wq_pidx, srq->in_use, srq->size, + (unsigned long long)srq->sw_rq[rel_idx].wr_id); + t4_srq_consume(srq); + while (srq->ooo_count && !srq->sw_rq[srq->cidx].valid) { + pr_debug("%s eat ooo cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n", + __func__, srq->cidx, srq->pidx, + srq->wq_pidx, srq->in_use, + srq->size, srq->ooo_count, + (unsigned long long) + srq->sw_rq[srq->cidx].wr_id); + t4_srq_consume_ooo(srq); + } + if (srq->ooo_count == 0 && srq->pending_in_use) + post_pending_srq_wrs(srq); + } else { + pr_debug("%s ooo cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n", + __func__, rel_idx, srq->cidx, + srq->pidx, srq->wq_pidx, + srq->in_use, srq->size, + srq->ooo_count, + (unsigned long long)srq->sw_rq[rel_idx].wr_id); + t4_srq_produce_ooo(srq); + } + return wr_id; +} + +/* + * poll_cq + * + * Caller must: + * check the validity of the first CQE, + * supply the wq assicated with the qpid. + * + * credit: cq credit to return to sge. + * cqe_flushed: 1 iff the CQE is flushed. + * cqe: copy of the polled CQE. + * + * return value: + * 0 CQE returned ok. + * -EAGAIN CQE skipped, try again. + * -EOVERFLOW CQ overflow detected. + */ +static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, + u8 *cqe_flushed, u64 *cookie, u32 *credit, + struct t4_srq *srq) +{ + int ret = 0; + struct t4_cqe *hw_cqe, read_cqe; + + *cqe_flushed = 0; + *credit = 0; + ret = t4_next_cqe(cq, &hw_cqe); + if (ret) + return ret; + + pr_debug("CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", + CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe), + CQE_GENBIT(hw_cqe), CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe), + CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe), + CQE_WRID_LOW(hw_cqe)); + + /* + * skip cqe's not affiliated with a QP. + */ + if (wq == NULL) { + ret = -EAGAIN; + goto skip_cqe; + } + + /* + * skip hw cqe's if the wq is flushed. + */ + if (wq->flushed && !SW_CQE(hw_cqe)) { + ret = -EAGAIN; + goto skip_cqe; + } + + /* + * skip TERMINATE cqes... + */ + if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { + ret = -EAGAIN; + goto skip_cqe; + } + + /* + * Special cqe for drain WR completions... + */ + if (DRAIN_CQE(hw_cqe)) { + *cookie = CQE_DRAIN_COOKIE(hw_cqe); + *cqe = *hw_cqe; + goto skip_cqe; + } + + /* + * Gotta tweak READ completions: + * 1) the cqe doesn't contain the sq_wptr from the wr. + * 2) opcode not reflected from the wr. + * 3) read_len not reflected from the wr. + * 4) cq_type is RQ_TYPE not SQ_TYPE. + */ + if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) { + + /* If we have reached here because of async + * event or other error, and have egress error + * then drop + */ + if (CQE_TYPE(hw_cqe) == 1) { + if (CQE_STATUS(hw_cqe)) + t4_set_wq_in_error(wq, 0); + ret = -EAGAIN; + goto skip_cqe; + } + + /* If this is an unsolicited read response, then the read + * was generated by the kernel driver as part of peer-2-peer + * connection setup. So ignore the completion. + */ + if (CQE_WRID_STAG(hw_cqe) == 1) { + if (CQE_STATUS(hw_cqe)) + t4_set_wq_in_error(wq, 0); + ret = -EAGAIN; + goto skip_cqe; + } + + /* + * Eat completions for unsignaled read WRs. + */ + if (!wq->sq.oldest_read->signaled) { + advance_oldest_read(wq); + ret = -EAGAIN; + goto skip_cqe; + } + + /* + * Don't write to the HWCQ, so create a new read req CQE + * in local memory. + */ + create_read_req_cqe(wq, hw_cqe, &read_cqe); + hw_cqe = &read_cqe; + advance_oldest_read(wq); + } + + if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { + *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH); + t4_set_wq_in_error(wq, 0); + } + + /* + * RECV completion. + */ + if (RQ_TYPE(hw_cqe)) { + + /* + * HW only validates 4 bits of MSN. So we must validate that + * the MSN in the SEND is the next expected MSN. If its not, + * then we complete this with T4_ERR_MSN and mark the wq in + * error. + */ + if (unlikely(!CQE_STATUS(hw_cqe) && + CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) { + t4_set_wq_in_error(wq, 0); + hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN)); + } + goto proc_cqe; + } + + /* + * If we get here its a send completion. + * + * Handle out of order completion. These get stuffed + * in the SW SQ. Then the SW SQ is walked to move any + * now in-order completions into the SW CQ. This handles + * 2 cases: + * 1) reaping unsignaled WRs when the first subsequent + * signaled WR is completed. + * 2) out of order read completions. + */ + if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) { + struct t4_swsqe *swsqe; + + pr_debug("out of order completion going in sw_sq at idx %u\n", + CQE_WRID_SQ_IDX(hw_cqe)); + swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; + swsqe->cqe = *hw_cqe; + swsqe->complete = 1; + ret = -EAGAIN; + goto flush_wq; + } + +proc_cqe: + *cqe = *hw_cqe; + + /* + * Reap the associated WR(s) that are freed up with this + * completion. + */ + if (SQ_TYPE(hw_cqe)) { + int idx = CQE_WRID_SQ_IDX(hw_cqe); + + /* + * Account for any unsignaled completions completed by + * this signaled completion. In this case, cidx points + * to the first unsignaled one, and idx points to the + * signaled one. So adjust in_use based on this delta. + * if this is not completing any unsigned wrs, then the + * delta will be 0. Handle wrapping also! + */ + if (idx < wq->sq.cidx) + wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx; + else + wq->sq.in_use -= idx - wq->sq.cidx; + + wq->sq.cidx = (uint16_t)idx; + pr_debug("completing sq idx %u\n", wq->sq.cidx); + *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id; + if (c4iw_wr_log) + c4iw_log_wr_stats(wq, hw_cqe); + t4_sq_consume(wq); + } else { + if (!srq) { + pr_debug("completing rq idx %u\n", wq->rq.cidx); + *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; + if (c4iw_wr_log) + c4iw_log_wr_stats(wq, hw_cqe); + t4_rq_consume(wq); + } else { + *cookie = reap_srq_cqe(hw_cqe, srq); + } + wq->rq.msn++; + goto skip_cqe; + } + +flush_wq: + /* + * Flush any completed cqes that are now in-order. + */ + flush_completed_wrs(wq, cq); + +skip_cqe: + if (SW_CQE(hw_cqe)) { + pr_debug("cq %p cqid 0x%x skip sw cqe cidx %u\n", + cq, cq->cqid, cq->sw_cidx); + t4_swcq_consume(cq); + } else { + pr_debug("cq %p cqid 0x%x skip hw cqe cidx %u\n", + cq, cq->cqid, cq->cidx); + t4_hwcq_consume(cq); + } + return ret; +} + +static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp, + struct ib_wc *wc, struct c4iw_srq *srq) +{ + struct t4_cqe cqe; + struct t4_wq *wq = qhp ? &qhp->wq : NULL; + u32 credit = 0; + u8 cqe_flushed; + u64 cookie = 0; + int ret; + + ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit, + srq ? &srq->wq : NULL); + if (ret) + goto out; + + wc->wr_id = cookie; + wc->qp = qhp ? &qhp->ibqp : NULL; + wc->vendor_err = CQE_STATUS(&cqe); + wc->wc_flags = 0; + + /* + * Simulate a SRQ_LIMIT_REACHED HW notification if required. + */ + if (srq && !(srq->flags & T4_SRQ_LIMIT_SUPPORT) && srq->armed && + srq->wq.in_use < srq->srq_limit) + c4iw_dispatch_srq_limit_reached_event(srq); + + pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n", + CQE_QPID(&cqe), + CQE_TYPE(&cqe), CQE_OPCODE(&cqe), + CQE_STATUS(&cqe), CQE_LEN(&cqe), + CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe), + (unsigned long long)cookie); + + if (CQE_TYPE(&cqe) == 0) { + if (!CQE_STATUS(&cqe)) + wc->byte_len = CQE_LEN(&cqe); + else + wc->byte_len = 0; + + switch (CQE_OPCODE(&cqe)) { + case FW_RI_SEND: + wc->opcode = IB_WC_RECV; + break; + case FW_RI_SEND_WITH_INV: + case FW_RI_SEND_WITH_SE_INV: + wc->opcode = IB_WC_RECV; + wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); + break; + case FW_RI_WRITE_IMMEDIATE: + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + wc->ex.imm_data = CQE_IMM_DATA(&cqe); + wc->wc_flags |= IB_WC_WITH_IMM; + break; + default: + pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", + CQE_OPCODE(&cqe), CQE_QPID(&cqe)); + ret = -EINVAL; + goto out; + } + } else { + switch (CQE_OPCODE(&cqe)) { + case FW_RI_WRITE_IMMEDIATE: + case FW_RI_RDMA_WRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case FW_RI_READ_REQ: + wc->opcode = IB_WC_RDMA_READ; + wc->byte_len = CQE_LEN(&cqe); + break; + case FW_RI_SEND_WITH_INV: + case FW_RI_SEND_WITH_SE_INV: + wc->opcode = IB_WC_SEND; + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + break; + case FW_RI_SEND: + case FW_RI_SEND_WITH_SE: + wc->opcode = IB_WC_SEND; + break; + + case FW_RI_LOCAL_INV: + wc->opcode = IB_WC_LOCAL_INV; + break; + case FW_RI_FAST_REGISTER: + wc->opcode = IB_WC_REG_MR; + + /* Invalidate the MR if the fastreg failed */ + if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS) + c4iw_invalidate_mr(qhp->rhp, + CQE_WRID_FR_STAG(&cqe)); + break; + default: + pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", + CQE_OPCODE(&cqe), CQE_QPID(&cqe)); + ret = -EINVAL; + goto out; + } + } + + if (cqe_flushed) + wc->status = IB_WC_WR_FLUSH_ERR; + else { + + switch (CQE_STATUS(&cqe)) { + case T4_ERR_SUCCESS: + wc->status = IB_WC_SUCCESS; + break; + case T4_ERR_STAG: + wc->status = IB_WC_LOC_ACCESS_ERR; + break; + case T4_ERR_PDID: + wc->status = IB_WC_LOC_PROT_ERR; + break; + case T4_ERR_QPID: + case T4_ERR_ACCESS: + wc->status = IB_WC_LOC_ACCESS_ERR; + break; + case T4_ERR_WRAP: + wc->status = IB_WC_GENERAL_ERR; + break; + case T4_ERR_BOUND: + wc->status = IB_WC_LOC_LEN_ERR; + break; + case T4_ERR_INVALIDATE_SHARED_MR: + case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND: + wc->status = IB_WC_MW_BIND_ERR; + break; + case T4_ERR_CRC: + case T4_ERR_MARKER: + case T4_ERR_PDU_LEN_ERR: + case T4_ERR_OUT_OF_RQE: + case T4_ERR_DDP_VERSION: + case T4_ERR_RDMA_VERSION: + case T4_ERR_DDP_QUEUE_NUM: + case T4_ERR_MSN: + case T4_ERR_TBIT: + case T4_ERR_MO: + case T4_ERR_MSN_RANGE: + case T4_ERR_IRD_OVERFLOW: + case T4_ERR_OPCODE: + case T4_ERR_INTERNAL_ERR: + wc->status = IB_WC_FATAL_ERR; + break; + case T4_ERR_SWFLUSH: + wc->status = IB_WC_WR_FLUSH_ERR; + break; + default: + pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n", + CQE_STATUS(&cqe), CQE_QPID(&cqe)); + wc->status = IB_WC_FATAL_ERR; + } + } +out: + return ret; +} + +/* + * Get one cq entry from c4iw and map it to openib. + * + * Returns: + * 0 cqe returned + * -ENODATA EMPTY; + * -EAGAIN caller must try again + * any other -errno fatal error + */ +static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) +{ + struct c4iw_srq *srq = NULL; + struct c4iw_qp *qhp = NULL; + struct t4_cqe *rd_cqe; + int ret; + + ret = t4_next_cqe(&chp->cq, &rd_cqe); + + if (ret) + return ret; + + qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); + if (qhp) { + spin_lock(&qhp->lock); + srq = qhp->srq; + if (srq) + spin_lock(&srq->lock); + ret = __c4iw_poll_cq_one(chp, qhp, wc, srq); + spin_unlock(&qhp->lock); + if (srq) + spin_unlock(&srq->lock); + } else { + ret = __c4iw_poll_cq_one(chp, NULL, wc, NULL); + } + return ret; +} + +int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct c4iw_cq *chp; + unsigned long flags; + int npolled; + int err = 0; + + chp = to_c4iw_cq(ibcq); + + spin_lock_irqsave(&chp->lock, flags); + for (npolled = 0; npolled < num_entries; ++npolled) { + do { + err = c4iw_poll_cq_one(chp, wc + npolled); + } while (err == -EAGAIN); + if (err) + break; + } + spin_unlock_irqrestore(&chp->lock, flags); + return !err || err == -ENODATA ? npolled : err; +} + +void c4iw_cq_rem_ref(struct c4iw_cq *chp) +{ + if (refcount_dec_and_test(&chp->refcnt)) + complete(&chp->cq_rel_comp); +} + +int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +{ + struct c4iw_cq *chp; + struct c4iw_ucontext *ucontext; + + pr_debug("ib_cq %p\n", ib_cq); + chp = to_c4iw_cq(ib_cq); + + xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid); + c4iw_cq_rem_ref(chp); + wait_for_completion(&chp->cq_rel_comp); + + ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, + ibucontext); + destroy_cq(&chp->rhp->rdev, &chp->cq, + ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx, + chp->destroy_skb, chp->wr_waitp); + c4iw_put_wr_wait(chp->wr_waitp); + return 0; +} + +int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) +{ + struct ib_device *ibdev = ibcq->device; + int entries = attr->cqe; + int vector = attr->comp_vector; + struct c4iw_dev *rhp = to_c4iw_dev(ibcq->device); + struct c4iw_cq *chp = to_c4iw_cq(ibcq); + struct c4iw_create_cq ucmd; + struct c4iw_create_cq_resp uresp; + int ret, wr_len; + size_t memsize, hwentries; + struct c4iw_mm_entry *mm, *mm2; + struct c4iw_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct c4iw_ucontext, ibucontext); + + pr_debug("ib_dev %p entries %d\n", ibdev, entries); + if (attr->flags) + return -EOPNOTSUPP; + + if (entries < 1 || entries > ibdev->attrs.max_cqe) + return -EINVAL; + + if (vector >= rhp->rdev.lldi.nciq) + return -EINVAL; + + if (udata) { + if (udata->inlen < sizeof(ucmd)) + ucontext->is_32b_cqe = 1; + } + + chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); + if (!chp->wr_waitp) { + ret = -ENOMEM; + goto err_free_chp; + } + c4iw_init_wr_wait(chp->wr_waitp); + + wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res); + chp->destroy_skb = alloc_skb(wr_len, GFP_KERNEL); + if (!chp->destroy_skb) { + ret = -ENOMEM; + goto err_free_wr_wait; + } + + /* account for the status page. */ + entries++; + + /* IQ needs one extra entry to differentiate full vs empty. */ + entries++; + + /* + * entries must be multiple of 16 for HW. + */ + entries = roundup(entries, 16); + + /* + * Make actual HW queue 2x to avoid cdix_inc overflows. + */ + hwentries = min(entries * 2, rhp->rdev.hw_queue.t4_max_iq_size); + + /* + * Make HW queue at least 64 entries so GTS updates aren't too + * frequent. + */ + if (hwentries < 64) + hwentries = 64; + + memsize = hwentries * ((ucontext && ucontext->is_32b_cqe) ? + (sizeof(*chp->cq.queue) / 2) : sizeof(*chp->cq.queue)); + + /* + * memsize must be a multiple of the page size if its a user cq. + */ + if (udata) + memsize = roundup(memsize, PAGE_SIZE); + + chp->cq.size = hwentries; + chp->cq.memsize = memsize; + chp->cq.vector = vector; + + ret = create_cq(&rhp->rdev, &chp->cq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, + chp->wr_waitp); + if (ret) + goto err_free_skb; + + chp->rhp = rhp; + chp->cq.size--; /* status page */ + chp->ibcq.cqe = entries - 2; + spin_lock_init(&chp->lock); + spin_lock_init(&chp->comp_handler_lock); + refcount_set(&chp->refcnt, 1); + init_completion(&chp->cq_rel_comp); + ret = xa_insert_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL); + if (ret) + goto err_destroy_cq; + + if (ucontext) { + ret = -ENOMEM; + mm = kmalloc(sizeof(*mm), GFP_KERNEL); + if (!mm) + goto err_remove_handle; + mm2 = kmalloc(sizeof(*mm2), GFP_KERNEL); + if (!mm2) + goto err_free_mm; + + memset(&uresp, 0, sizeof(uresp)); + uresp.qid_mask = rhp->rdev.cqmask; + uresp.cqid = chp->cq.cqid; + uresp.size = chp->cq.size; + uresp.memsize = chp->cq.memsize; + spin_lock(&ucontext->mmap_lock); + uresp.key = ucontext->key; + ucontext->key += PAGE_SIZE; + uresp.gts_key = ucontext->key; + ucontext->key += PAGE_SIZE; + /* communicate to the userspace that + * kernel driver supports 64B CQE + */ + uresp.flags |= C4IW_64B_CQE; + + spin_unlock(&ucontext->mmap_lock); + ret = ib_copy_to_udata(udata, &uresp, + ucontext->is_32b_cqe ? + sizeof(uresp) - sizeof(uresp.flags) : + sizeof(uresp)); + if (ret) + goto err_free_mm2; + + mm->key = uresp.key; + mm->addr = virt_to_phys(chp->cq.queue); + mm->len = chp->cq.memsize; + insert_mmap(ucontext, mm); + + mm2->key = uresp.gts_key; + mm2->addr = chp->cq.bar2_pa; + mm2->len = PAGE_SIZE; + insert_mmap(ucontext, mm2); + } + + pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr %pad\n", + chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize, + &chp->cq.dma_addr); + return 0; +err_free_mm2: + kfree(mm2); +err_free_mm: + kfree(mm); +err_remove_handle: + xa_erase_irq(&rhp->cqs, chp->cq.cqid); +err_destroy_cq: + destroy_cq(&chp->rhp->rdev, &chp->cq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, + chp->destroy_skb, chp->wr_waitp); +err_free_skb: + kfree_skb(chp->destroy_skb); +err_free_wr_wait: + c4iw_put_wr_wait(chp->wr_waitp); +err_free_chp: + return ret; +} + +int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct c4iw_cq *chp; + int ret = 0; + unsigned long flag; + + chp = to_c4iw_cq(ibcq); + spin_lock_irqsave(&chp->lock, flag); + t4_arm_cq(&chp->cq, + (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + if (flags & IB_CQ_REPORT_MISSED_EVENTS) + ret = t4_cq_notempty(&chp->cq); + spin_unlock_irqrestore(&chp->lock, flag); + return ret; +} + +void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx) +{ + struct c4iw_cq *rchp = to_c4iw_cq(qhp->ibqp.recv_cq); + unsigned long flag; + + /* locking heirarchy: cq lock first, then qp lock. */ + spin_lock_irqsave(&rchp->lock, flag); + spin_lock(&qhp->lock); + + /* create a SRQ RECV CQE for srqidx */ + insert_recv_cqe(&qhp->wq, &rchp->cq, srqidx); + + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&rchp->lock, flag); +} diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c new file mode 100644 index 000000000..80970a173 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -0,0 +1,1572 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/debugfs.h> +#include <linux/vmalloc.h> +#include <linux/math64.h> + +#include <rdma/ib_verbs.h> + +#include "iw_cxgb4.h" + +#define DRV_VERSION "0.1" + +MODULE_AUTHOR("Steve Wise"); +MODULE_DESCRIPTION("Chelsio T4/T5 RDMA Driver"); +MODULE_LICENSE("Dual BSD/GPL"); + +static int allow_db_fc_on_t5; +module_param(allow_db_fc_on_t5, int, 0644); +MODULE_PARM_DESC(allow_db_fc_on_t5, + "Allow DB Flow Control on T5 (default = 0)"); + +static int allow_db_coalescing_on_t5; +module_param(allow_db_coalescing_on_t5, int, 0644); +MODULE_PARM_DESC(allow_db_coalescing_on_t5, + "Allow DB Coalescing on T5 (default = 0)"); + +int c4iw_wr_log = 0; +module_param(c4iw_wr_log, int, 0444); +MODULE_PARM_DESC(c4iw_wr_log, "Enables logging of work request timing data."); + +static int c4iw_wr_log_size_order = 12; +module_param(c4iw_wr_log_size_order, int, 0444); +MODULE_PARM_DESC(c4iw_wr_log_size_order, + "Number of entries (log2) in the work request timing log."); + +static LIST_HEAD(uld_ctx_list); +static DEFINE_MUTEX(dev_mutex); +static struct workqueue_struct *reg_workq; + +#define DB_FC_RESUME_SIZE 64 +#define DB_FC_RESUME_DELAY 1 +#define DB_FC_DRAIN_THRESH 0 + +static struct dentry *c4iw_debugfs_root; + +struct c4iw_debugfs_data { + struct c4iw_dev *devp; + char *buf; + int bufsize; + int pos; +}; + +static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct c4iw_debugfs_data *d = file->private_data; + + return simple_read_from_buffer(buf, count, ppos, d->buf, d->pos); +} + +void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe) +{ + struct wr_log_entry le; + int idx; + + if (!wq->rdev->wr_log) + return; + + idx = (atomic_inc_return(&wq->rdev->wr_log_idx) - 1) & + (wq->rdev->wr_log_size - 1); + le.poll_sge_ts = cxgb4_read_sge_timestamp(wq->rdev->lldi.ports[0]); + le.poll_host_time = ktime_get(); + le.valid = 1; + le.cqe_sge_ts = CQE_TS(cqe); + if (SQ_TYPE(cqe)) { + le.qid = wq->sq.qid; + le.opcode = CQE_OPCODE(cqe); + le.post_host_time = wq->sq.sw_sq[wq->sq.cidx].host_time; + le.post_sge_ts = wq->sq.sw_sq[wq->sq.cidx].sge_ts; + le.wr_id = CQE_WRID_SQ_IDX(cqe); + } else { + le.qid = wq->rq.qid; + le.opcode = FW_RI_RECEIVE; + le.post_host_time = wq->rq.sw_rq[wq->rq.cidx].host_time; + le.post_sge_ts = wq->rq.sw_rq[wq->rq.cidx].sge_ts; + le.wr_id = CQE_WRID_MSN(cqe); + } + wq->rdev->wr_log[idx] = le; +} + +static int wr_log_show(struct seq_file *seq, void *v) +{ + struct c4iw_dev *dev = seq->private; + ktime_t prev_time; + struct wr_log_entry *lep; + int prev_time_set = 0; + int idx, end; + +#define ts2ns(ts) div64_u64((ts) * dev->rdev.lldi.cclk_ps, 1000) + + idx = atomic_read(&dev->rdev.wr_log_idx) & + (dev->rdev.wr_log_size - 1); + end = idx - 1; + if (end < 0) + end = dev->rdev.wr_log_size - 1; + lep = &dev->rdev.wr_log[idx]; + while (idx != end) { + if (lep->valid) { + if (!prev_time_set) { + prev_time_set = 1; + prev_time = lep->poll_host_time; + } + seq_printf(seq, "%04u: nsec %llu qid %u opcode " + "%u %s 0x%x host_wr_delta nsec %llu " + "post_sge_ts 0x%llx cqe_sge_ts 0x%llx " + "poll_sge_ts 0x%llx post_poll_delta_ns %llu " + "cqe_poll_delta_ns %llu\n", + idx, + ktime_to_ns(ktime_sub(lep->poll_host_time, + prev_time)), + lep->qid, lep->opcode, + lep->opcode == FW_RI_RECEIVE ? + "msn" : "wrid", + lep->wr_id, + ktime_to_ns(ktime_sub(lep->poll_host_time, + lep->post_host_time)), + lep->post_sge_ts, lep->cqe_sge_ts, + lep->poll_sge_ts, + ts2ns(lep->poll_sge_ts - lep->post_sge_ts), + ts2ns(lep->poll_sge_ts - lep->cqe_sge_ts)); + prev_time = lep->poll_host_time; + } + idx++; + if (idx > (dev->rdev.wr_log_size - 1)) + idx = 0; + lep = &dev->rdev.wr_log[idx]; + } +#undef ts2ns + return 0; +} + +static int wr_log_open(struct inode *inode, struct file *file) +{ + return single_open(file, wr_log_show, inode->i_private); +} + +static ssize_t wr_log_clear(struct file *file, const char __user *buf, + size_t count, loff_t *pos) +{ + struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private; + int i; + + if (dev->rdev.wr_log) + for (i = 0; i < dev->rdev.wr_log_size; i++) + dev->rdev.wr_log[i].valid = 0; + return count; +} + +static const struct file_operations wr_log_debugfs_fops = { + .owner = THIS_MODULE, + .open = wr_log_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek, + .write = wr_log_clear, +}; + +static struct sockaddr_in zero_sin = { + .sin_family = AF_INET, +}; + +static struct sockaddr_in6 zero_sin6 = { + .sin6_family = AF_INET6, +}; + +static void set_ep_sin_addrs(struct c4iw_ep *ep, + struct sockaddr_in **lsin, + struct sockaddr_in **rsin, + struct sockaddr_in **m_lsin, + struct sockaddr_in **m_rsin) +{ + struct iw_cm_id *id = ep->com.cm_id; + + *m_lsin = (struct sockaddr_in *)&ep->com.local_addr; + *m_rsin = (struct sockaddr_in *)&ep->com.remote_addr; + if (id) { + *lsin = (struct sockaddr_in *)&id->local_addr; + *rsin = (struct sockaddr_in *)&id->remote_addr; + } else { + *lsin = &zero_sin; + *rsin = &zero_sin; + } +} + +static void set_ep_sin6_addrs(struct c4iw_ep *ep, + struct sockaddr_in6 **lsin6, + struct sockaddr_in6 **rsin6, + struct sockaddr_in6 **m_lsin6, + struct sockaddr_in6 **m_rsin6) +{ + struct iw_cm_id *id = ep->com.cm_id; + + *m_lsin6 = (struct sockaddr_in6 *)&ep->com.local_addr; + *m_rsin6 = (struct sockaddr_in6 *)&ep->com.remote_addr; + if (id) { + *lsin6 = (struct sockaddr_in6 *)&id->local_addr; + *rsin6 = (struct sockaddr_in6 *)&id->remote_addr; + } else { + *lsin6 = &zero_sin6; + *rsin6 = &zero_sin6; + } +} + +static int dump_qp(unsigned long id, struct c4iw_qp *qp, + struct c4iw_debugfs_data *qpd) +{ + int space; + int cc; + if (id != qp->wq.sq.qid) + return 0; + + space = qpd->bufsize - qpd->pos - 1; + if (space == 0) + return 1; + + if (qp->ep) { + struct c4iw_ep *ep = qp->ep; + + if (ep->com.local_addr.ss_family == AF_INET) { + struct sockaddr_in *lsin; + struct sockaddr_in *rsin; + struct sockaddr_in *m_lsin; + struct sockaddr_in *m_rsin; + + set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin); + cc = snprintf(qpd->buf + qpd->pos, space, + "rc qp sq id %u %s id %u state %u " + "onchip %u ep tid %u state %u " + "%pI4:%u/%u->%pI4:%u/%u\n", + qp->wq.sq.qid, qp->srq ? "srq" : "rq", + qp->srq ? qp->srq->idx : qp->wq.rq.qid, + (int)qp->attr.state, + qp->wq.sq.flags & T4_SQ_ONCHIP, + ep->hwtid, (int)ep->com.state, + &lsin->sin_addr, ntohs(lsin->sin_port), + ntohs(m_lsin->sin_port), + &rsin->sin_addr, ntohs(rsin->sin_port), + ntohs(m_rsin->sin_port)); + } else { + struct sockaddr_in6 *lsin6; + struct sockaddr_in6 *rsin6; + struct sockaddr_in6 *m_lsin6; + struct sockaddr_in6 *m_rsin6; + + set_ep_sin6_addrs(ep, &lsin6, &rsin6, &m_lsin6, + &m_rsin6); + cc = snprintf(qpd->buf + qpd->pos, space, + "rc qp sq id %u rq id %u state %u " + "onchip %u ep tid %u state %u " + "%pI6:%u/%u->%pI6:%u/%u\n", + qp->wq.sq.qid, qp->wq.rq.qid, + (int)qp->attr.state, + qp->wq.sq.flags & T4_SQ_ONCHIP, + ep->hwtid, (int)ep->com.state, + &lsin6->sin6_addr, + ntohs(lsin6->sin6_port), + ntohs(m_lsin6->sin6_port), + &rsin6->sin6_addr, + ntohs(rsin6->sin6_port), + ntohs(m_rsin6->sin6_port)); + } + } else + cc = snprintf(qpd->buf + qpd->pos, space, + "qp sq id %u rq id %u state %u onchip %u\n", + qp->wq.sq.qid, qp->wq.rq.qid, + (int)qp->attr.state, + qp->wq.sq.flags & T4_SQ_ONCHIP); + if (cc < space) + qpd->pos += cc; + return 0; +} + +static int qp_release(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *qpd = file->private_data; + if (!qpd) { + pr_info("%s null qpd?\n", __func__); + return 0; + } + vfree(qpd->buf); + kfree(qpd); + return 0; +} + +static int qp_open(struct inode *inode, struct file *file) +{ + struct c4iw_qp *qp; + struct c4iw_debugfs_data *qpd; + unsigned long index; + int count = 1; + + qpd = kmalloc(sizeof(*qpd), GFP_KERNEL); + if (!qpd) + return -ENOMEM; + + qpd->devp = inode->i_private; + qpd->pos = 0; + + /* + * No need to lock; we drop the lock to call vmalloc so it's racy + * anyway. Someone who cares should switch this over to seq_file + */ + xa_for_each(&qpd->devp->qps, index, qp) + count++; + + qpd->bufsize = count * 180; + qpd->buf = vmalloc(qpd->bufsize); + if (!qpd->buf) { + kfree(qpd); + return -ENOMEM; + } + + xa_lock_irq(&qpd->devp->qps); + xa_for_each(&qpd->devp->qps, index, qp) + dump_qp(index, qp, qpd); + xa_unlock_irq(&qpd->devp->qps); + + qpd->buf[qpd->pos++] = 0; + file->private_data = qpd; + return 0; +} + +static const struct file_operations qp_debugfs_fops = { + .owner = THIS_MODULE, + .open = qp_open, + .release = qp_release, + .read = debugfs_read, + .llseek = default_llseek, +}; + +static int dump_stag(unsigned long id, struct c4iw_debugfs_data *stagd) +{ + int space; + int cc; + struct fw_ri_tpte tpte; + int ret; + + space = stagd->bufsize - stagd->pos - 1; + if (space == 0) + return 1; + + ret = cxgb4_read_tpte(stagd->devp->rdev.lldi.ports[0], (u32)id<<8, + (__be32 *)&tpte); + if (ret) { + dev_err(&stagd->devp->rdev.lldi.pdev->dev, + "%s cxgb4_read_tpte err %d\n", __func__, ret); + return ret; + } + cc = snprintf(stagd->buf + stagd->pos, space, + "stag: idx 0x%x valid %d key 0x%x state %d pdid %d " + "perm 0x%x ps %d len 0x%llx va 0x%llx\n", + (u32)id<<8, + FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid)), + FW_RI_TPTE_STAGKEY_G(ntohl(tpte.valid_to_pdid)), + FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid)), + FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid)), + FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid)), + FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid)), + ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo), + ((u64)ntohl(tpte.va_hi) << 32) | ntohl(tpte.va_lo_fbo)); + if (cc < space) + stagd->pos += cc; + return 0; +} + +static int stag_release(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *stagd = file->private_data; + if (!stagd) { + pr_info("%s null stagd?\n", __func__); + return 0; + } + vfree(stagd->buf); + kfree(stagd); + return 0; +} + +static int stag_open(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *stagd; + void *p; + unsigned long index; + int ret = 0; + int count = 1; + + stagd = kmalloc(sizeof(*stagd), GFP_KERNEL); + if (!stagd) { + ret = -ENOMEM; + goto out; + } + stagd->devp = inode->i_private; + stagd->pos = 0; + + xa_for_each(&stagd->devp->mrs, index, p) + count++; + + stagd->bufsize = count * 256; + stagd->buf = vmalloc(stagd->bufsize); + if (!stagd->buf) { + ret = -ENOMEM; + goto err1; + } + + xa_lock_irq(&stagd->devp->mrs); + xa_for_each(&stagd->devp->mrs, index, p) + dump_stag(index, stagd); + xa_unlock_irq(&stagd->devp->mrs); + + stagd->buf[stagd->pos++] = 0; + file->private_data = stagd; + goto out; +err1: + kfree(stagd); +out: + return ret; +} + +static const struct file_operations stag_debugfs_fops = { + .owner = THIS_MODULE, + .open = stag_open, + .release = stag_release, + .read = debugfs_read, + .llseek = default_llseek, +}; + +static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY", "STOPPED"}; + +static int stats_show(struct seq_file *seq, void *v) +{ + struct c4iw_dev *dev = seq->private; + + seq_printf(seq, " Object: %10s %10s %10s %10s\n", "Total", "Current", + "Max", "Fail"); + seq_printf(seq, " PDID: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.pd.total, dev->rdev.stats.pd.cur, + dev->rdev.stats.pd.max, dev->rdev.stats.pd.fail); + seq_printf(seq, " QID: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur, + dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail); + seq_printf(seq, " SRQS: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.srqt.total, dev->rdev.stats.srqt.cur, + dev->rdev.stats.srqt.max, dev->rdev.stats.srqt.fail); + seq_printf(seq, " TPTMEM: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur, + dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail); + seq_printf(seq, " PBLMEM: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.pbl.total, dev->rdev.stats.pbl.cur, + dev->rdev.stats.pbl.max, dev->rdev.stats.pbl.fail); + seq_printf(seq, " RQTMEM: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.rqt.total, dev->rdev.stats.rqt.cur, + dev->rdev.stats.rqt.max, dev->rdev.stats.rqt.fail); + seq_printf(seq, " OCQPMEM: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur, + dev->rdev.stats.ocqp.max, dev->rdev.stats.ocqp.fail); + seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full); + seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty); + seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop); + seq_printf(seq, " DB State: %s Transitions %llu FC Interruptions %llu\n", + db_state_str[dev->db_state], + dev->rdev.stats.db_state_transitions, + dev->rdev.stats.db_fc_interruptions); + seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full); + seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n", + dev->rdev.stats.act_ofld_conn_fails); + seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n", + dev->rdev.stats.pas_ofld_conn_fails); + seq_printf(seq, "NEG_ADV_RCVD: %10llu\n", dev->rdev.stats.neg_adv); + seq_printf(seq, "AVAILABLE IRD: %10u\n", dev->avail_ird); + return 0; +} + +static int stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, stats_show, inode->i_private); +} + +static ssize_t stats_clear(struct file *file, const char __user *buf, + size_t count, loff_t *pos) +{ + struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private; + + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.pd.max = 0; + dev->rdev.stats.pd.fail = 0; + dev->rdev.stats.qid.max = 0; + dev->rdev.stats.qid.fail = 0; + dev->rdev.stats.stag.max = 0; + dev->rdev.stats.stag.fail = 0; + dev->rdev.stats.pbl.max = 0; + dev->rdev.stats.pbl.fail = 0; + dev->rdev.stats.rqt.max = 0; + dev->rdev.stats.rqt.fail = 0; + dev->rdev.stats.rqt.max = 0; + dev->rdev.stats.rqt.fail = 0; + dev->rdev.stats.ocqp.max = 0; + dev->rdev.stats.ocqp.fail = 0; + dev->rdev.stats.db_full = 0; + dev->rdev.stats.db_empty = 0; + dev->rdev.stats.db_drop = 0; + dev->rdev.stats.db_state_transitions = 0; + dev->rdev.stats.tcam_full = 0; + dev->rdev.stats.act_ofld_conn_fails = 0; + dev->rdev.stats.pas_ofld_conn_fails = 0; + mutex_unlock(&dev->rdev.stats.lock); + return count; +} + +static const struct file_operations stats_debugfs_fops = { + .owner = THIS_MODULE, + .open = stats_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek, + .write = stats_clear, +}; + +static int dump_ep(struct c4iw_ep *ep, struct c4iw_debugfs_data *epd) +{ + int space; + int cc; + + space = epd->bufsize - epd->pos - 1; + if (space == 0) + return 1; + + if (ep->com.local_addr.ss_family == AF_INET) { + struct sockaddr_in *lsin; + struct sockaddr_in *rsin; + struct sockaddr_in *m_lsin; + struct sockaddr_in *m_rsin; + + set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin); + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p qp %p state %d flags 0x%lx " + "history 0x%lx hwtid %d atid %d " + "conn_na %u abort_na %u " + "%pI4:%d/%d <-> %pI4:%d/%d\n", + ep, ep->com.cm_id, ep->com.qp, + (int)ep->com.state, ep->com.flags, + ep->com.history, ep->hwtid, ep->atid, + ep->stats.connect_neg_adv, + ep->stats.abort_neg_adv, + &lsin->sin_addr, ntohs(lsin->sin_port), + ntohs(m_lsin->sin_port), + &rsin->sin_addr, ntohs(rsin->sin_port), + ntohs(m_rsin->sin_port)); + } else { + struct sockaddr_in6 *lsin6; + struct sockaddr_in6 *rsin6; + struct sockaddr_in6 *m_lsin6; + struct sockaddr_in6 *m_rsin6; + + set_ep_sin6_addrs(ep, &lsin6, &rsin6, &m_lsin6, &m_rsin6); + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p qp %p state %d flags 0x%lx " + "history 0x%lx hwtid %d atid %d " + "conn_na %u abort_na %u " + "%pI6:%d/%d <-> %pI6:%d/%d\n", + ep, ep->com.cm_id, ep->com.qp, + (int)ep->com.state, ep->com.flags, + ep->com.history, ep->hwtid, ep->atid, + ep->stats.connect_neg_adv, + ep->stats.abort_neg_adv, + &lsin6->sin6_addr, ntohs(lsin6->sin6_port), + ntohs(m_lsin6->sin6_port), + &rsin6->sin6_addr, ntohs(rsin6->sin6_port), + ntohs(m_rsin6->sin6_port)); + } + if (cc < space) + epd->pos += cc; + return 0; +} + +static +int dump_listen_ep(struct c4iw_listen_ep *ep, struct c4iw_debugfs_data *epd) +{ + int space; + int cc; + + space = epd->bufsize - epd->pos - 1; + if (space == 0) + return 1; + + if (ep->com.local_addr.ss_family == AF_INET) { + struct sockaddr_in *lsin = (struct sockaddr_in *) + &ep->com.cm_id->local_addr; + struct sockaddr_in *m_lsin = (struct sockaddr_in *) + &ep->com.cm_id->m_local_addr; + + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p state %d flags 0x%lx stid %d " + "backlog %d %pI4:%d/%d\n", + ep, ep->com.cm_id, (int)ep->com.state, + ep->com.flags, ep->stid, ep->backlog, + &lsin->sin_addr, ntohs(lsin->sin_port), + ntohs(m_lsin->sin_port)); + } else { + struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) + &ep->com.cm_id->local_addr; + struct sockaddr_in6 *m_lsin6 = (struct sockaddr_in6 *) + &ep->com.cm_id->m_local_addr; + + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p state %d flags 0x%lx stid %d " + "backlog %d %pI6:%d/%d\n", + ep, ep->com.cm_id, (int)ep->com.state, + ep->com.flags, ep->stid, ep->backlog, + &lsin6->sin6_addr, ntohs(lsin6->sin6_port), + ntohs(m_lsin6->sin6_port)); + } + if (cc < space) + epd->pos += cc; + return 0; +} + +static int ep_release(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *epd = file->private_data; + if (!epd) { + pr_info("%s null qpd?\n", __func__); + return 0; + } + vfree(epd->buf); + kfree(epd); + return 0; +} + +static int ep_open(struct inode *inode, struct file *file) +{ + struct c4iw_ep *ep; + struct c4iw_listen_ep *lep; + unsigned long index; + struct c4iw_debugfs_data *epd; + int ret = 0; + int count = 1; + + epd = kmalloc(sizeof(*epd), GFP_KERNEL); + if (!epd) { + ret = -ENOMEM; + goto out; + } + epd->devp = inode->i_private; + epd->pos = 0; + + xa_for_each(&epd->devp->hwtids, index, ep) + count++; + xa_for_each(&epd->devp->atids, index, ep) + count++; + xa_for_each(&epd->devp->stids, index, lep) + count++; + + epd->bufsize = count * 240; + epd->buf = vmalloc(epd->bufsize); + if (!epd->buf) { + ret = -ENOMEM; + goto err1; + } + + xa_lock_irq(&epd->devp->hwtids); + xa_for_each(&epd->devp->hwtids, index, ep) + dump_ep(ep, epd); + xa_unlock_irq(&epd->devp->hwtids); + xa_lock_irq(&epd->devp->atids); + xa_for_each(&epd->devp->atids, index, ep) + dump_ep(ep, epd); + xa_unlock_irq(&epd->devp->atids); + xa_lock_irq(&epd->devp->stids); + xa_for_each(&epd->devp->stids, index, lep) + dump_listen_ep(lep, epd); + xa_unlock_irq(&epd->devp->stids); + + file->private_data = epd; + goto out; +err1: + kfree(epd); +out: + return ret; +} + +static const struct file_operations ep_debugfs_fops = { + .owner = THIS_MODULE, + .open = ep_open, + .release = ep_release, + .read = debugfs_read, +}; + +static void setup_debugfs(struct c4iw_dev *devp) +{ + debugfs_create_file_size("qps", S_IWUSR, devp->debugfs_root, + (void *)devp, &qp_debugfs_fops, 4096); + + debugfs_create_file_size("stags", S_IWUSR, devp->debugfs_root, + (void *)devp, &stag_debugfs_fops, 4096); + + debugfs_create_file_size("stats", S_IWUSR, devp->debugfs_root, + (void *)devp, &stats_debugfs_fops, 4096); + + debugfs_create_file_size("eps", S_IWUSR, devp->debugfs_root, + (void *)devp, &ep_debugfs_fops, 4096); + + if (c4iw_wr_log) + debugfs_create_file_size("wr_log", S_IWUSR, devp->debugfs_root, + (void *)devp, &wr_log_debugfs_fops, 4096); +} + +void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev, + struct c4iw_dev_ucontext *uctx) +{ + struct list_head *pos, *nxt; + struct c4iw_qid_list *entry; + + mutex_lock(&uctx->lock); + list_for_each_safe(pos, nxt, &uctx->qpids) { + entry = list_entry(pos, struct c4iw_qid_list, entry); + list_del_init(&entry->entry); + if (!(entry->qid & rdev->qpmask)) { + c4iw_put_resource(&rdev->resource.qid_table, + entry->qid); + mutex_lock(&rdev->stats.lock); + rdev->stats.qid.cur -= rdev->qpmask + 1; + mutex_unlock(&rdev->stats.lock); + } + kfree(entry); + } + + list_for_each_safe(pos, nxt, &uctx->cqids) { + entry = list_entry(pos, struct c4iw_qid_list, entry); + list_del_init(&entry->entry); + kfree(entry); + } + mutex_unlock(&uctx->lock); +} + +void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev, + struct c4iw_dev_ucontext *uctx) +{ + INIT_LIST_HEAD(&uctx->qpids); + INIT_LIST_HEAD(&uctx->cqids); + mutex_init(&uctx->lock); +} + +/* Caller takes care of locking if needed */ +static int c4iw_rdev_open(struct c4iw_rdev *rdev) +{ + int err; + unsigned int factor; + + c4iw_init_dev_ucontext(rdev, &rdev->uctx); + + /* + * This implementation assumes udb_density == ucq_density! Eventually + * we might need to support this but for now fail the open. Also the + * cqid and qpid range must match for now. + */ + if (rdev->lldi.udb_density != rdev->lldi.ucq_density) { + pr_err("%s: unsupported udb/ucq densities %u/%u\n", + pci_name(rdev->lldi.pdev), rdev->lldi.udb_density, + rdev->lldi.ucq_density); + return -EINVAL; + } + if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start || + rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) { + pr_err("%s: unsupported qp and cq id ranges qp start %u size %u cq start %u size %u\n", + pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start, + rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size, + rdev->lldi.vr->cq.size); + return -EINVAL; + } + + /* This implementation requires a sge_host_page_size <= PAGE_SIZE. */ + if (rdev->lldi.sge_host_page_size > PAGE_SIZE) { + pr_err("%s: unsupported sge host page size %u\n", + pci_name(rdev->lldi.pdev), + rdev->lldi.sge_host_page_size); + return -EINVAL; + } + + factor = PAGE_SIZE / rdev->lldi.sge_host_page_size; + rdev->qpmask = (rdev->lldi.udb_density * factor) - 1; + rdev->cqmask = (rdev->lldi.ucq_density * factor) - 1; + + pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u srq size %u\n", + pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start, + rdev->lldi.vr->stag.size, c4iw_num_stags(rdev), + rdev->lldi.vr->pbl.start, + rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start, + rdev->lldi.vr->rq.size, + rdev->lldi.vr->qp.start, + rdev->lldi.vr->qp.size, + rdev->lldi.vr->cq.start, + rdev->lldi.vr->cq.size, + rdev->lldi.vr->srq.size); + pr_debug("udb %pR db_reg %p gts_reg %p qpmask 0x%x cqmask 0x%x\n", + &rdev->lldi.pdev->resource[2], + rdev->lldi.db_reg, rdev->lldi.gts_reg, + rdev->qpmask, rdev->cqmask); + + if (c4iw_num_stags(rdev) == 0) + return -EINVAL; + + rdev->stats.pd.total = T4_MAX_NUM_PD; + rdev->stats.stag.total = rdev->lldi.vr->stag.size; + rdev->stats.pbl.total = rdev->lldi.vr->pbl.size; + rdev->stats.rqt.total = rdev->lldi.vr->rq.size; + rdev->stats.srqt.total = rdev->lldi.vr->srq.size; + rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size; + rdev->stats.qid.total = rdev->lldi.vr->qp.size; + + err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), + T4_MAX_NUM_PD, rdev->lldi.vr->srq.size); + if (err) { + pr_err("error %d initializing resources\n", err); + return err; + } + err = c4iw_pblpool_create(rdev); + if (err) { + pr_err("error %d initializing pbl pool\n", err); + goto destroy_resource; + } + err = c4iw_rqtpool_create(rdev); + if (err) { + pr_err("error %d initializing rqt pool\n", err); + goto destroy_pblpool; + } + err = c4iw_ocqp_pool_create(rdev); + if (err) { + pr_err("error %d initializing ocqp pool\n", err); + goto destroy_rqtpool; + } + rdev->status_page = (struct t4_dev_status_page *) + __get_free_page(GFP_KERNEL); + if (!rdev->status_page) { + err = -ENOMEM; + goto destroy_ocqp_pool; + } + rdev->status_page->qp_start = rdev->lldi.vr->qp.start; + rdev->status_page->qp_size = rdev->lldi.vr->qp.size; + rdev->status_page->cq_start = rdev->lldi.vr->cq.start; + rdev->status_page->cq_size = rdev->lldi.vr->cq.size; + rdev->status_page->write_cmpl_supported = rdev->lldi.write_cmpl_support; + + if (c4iw_wr_log) { + rdev->wr_log = kcalloc(1 << c4iw_wr_log_size_order, + sizeof(*rdev->wr_log), + GFP_KERNEL); + if (rdev->wr_log) { + rdev->wr_log_size = 1 << c4iw_wr_log_size_order; + atomic_set(&rdev->wr_log_idx, 0); + } + } + + rdev->free_workq = create_singlethread_workqueue("iw_cxgb4_free"); + if (!rdev->free_workq) { + err = -ENOMEM; + goto err_free_status_page_and_wr_log; + } + + rdev->status_page->db_off = 0; + + init_completion(&rdev->rqt_compl); + init_completion(&rdev->pbl_compl); + kref_init(&rdev->rqt_kref); + kref_init(&rdev->pbl_kref); + + return 0; +err_free_status_page_and_wr_log: + if (c4iw_wr_log && rdev->wr_log) + kfree(rdev->wr_log); + free_page((unsigned long)rdev->status_page); +destroy_ocqp_pool: + c4iw_ocqp_pool_destroy(rdev); +destroy_rqtpool: + c4iw_rqtpool_destroy(rdev); +destroy_pblpool: + c4iw_pblpool_destroy(rdev); +destroy_resource: + c4iw_destroy_resource(&rdev->resource); + return err; +} + +static void c4iw_rdev_close(struct c4iw_rdev *rdev) +{ + kfree(rdev->wr_log); + c4iw_release_dev_ucontext(rdev, &rdev->uctx); + free_page((unsigned long)rdev->status_page); + c4iw_pblpool_destroy(rdev); + c4iw_rqtpool_destroy(rdev); + wait_for_completion(&rdev->pbl_compl); + wait_for_completion(&rdev->rqt_compl); + c4iw_ocqp_pool_destroy(rdev); + destroy_workqueue(rdev->free_workq); + c4iw_destroy_resource(&rdev->resource); +} + +void c4iw_dealloc(struct uld_ctx *ctx) +{ + c4iw_rdev_close(&ctx->dev->rdev); + WARN_ON(!xa_empty(&ctx->dev->cqs)); + WARN_ON(!xa_empty(&ctx->dev->qps)); + WARN_ON(!xa_empty(&ctx->dev->mrs)); + wait_event(ctx->dev->wait, xa_empty(&ctx->dev->hwtids)); + WARN_ON(!xa_empty(&ctx->dev->stids)); + WARN_ON(!xa_empty(&ctx->dev->atids)); + if (ctx->dev->rdev.bar2_kva) + iounmap(ctx->dev->rdev.bar2_kva); + if (ctx->dev->rdev.oc_mw_kva) + iounmap(ctx->dev->rdev.oc_mw_kva); + ib_dealloc_device(&ctx->dev->ibdev); + ctx->dev = NULL; +} + +static void c4iw_remove(struct uld_ctx *ctx) +{ + pr_debug("c4iw_dev %p\n", ctx->dev); + debugfs_remove_recursive(ctx->dev->debugfs_root); + c4iw_unregister_device(ctx->dev); + c4iw_dealloc(ctx); +} + +static int rdma_supported(const struct cxgb4_lld_info *infop) +{ + return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 && + infop->vr->rq.size > 0 && infop->vr->qp.size > 0 && + infop->vr->cq.size > 0; +} + +static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) +{ + struct c4iw_dev *devp; + int ret; + + if (!rdma_supported(infop)) { + pr_info("%s: RDMA not supported on this device\n", + pci_name(infop->pdev)); + return ERR_PTR(-ENOSYS); + } + if (!ocqp_supported(infop)) + pr_info("%s: On-Chip Queues not supported on this device\n", + pci_name(infop->pdev)); + + devp = ib_alloc_device(c4iw_dev, ibdev); + if (!devp) { + pr_err("Cannot allocate ib device\n"); + return ERR_PTR(-ENOMEM); + } + devp->rdev.lldi = *infop; + + /* init various hw-queue params based on lld info */ + pr_debug("Ing. padding boundary is %d, egrsstatuspagesize = %d\n", + devp->rdev.lldi.sge_ingpadboundary, + devp->rdev.lldi.sge_egrstatuspagesize); + + devp->rdev.hw_queue.t4_eq_status_entries = + devp->rdev.lldi.sge_egrstatuspagesize / 64; + devp->rdev.hw_queue.t4_max_eq_size = 65520; + devp->rdev.hw_queue.t4_max_iq_size = 65520; + devp->rdev.hw_queue.t4_max_rq_size = 8192 - + devp->rdev.hw_queue.t4_eq_status_entries - 1; + devp->rdev.hw_queue.t4_max_sq_size = + devp->rdev.hw_queue.t4_max_eq_size - + devp->rdev.hw_queue.t4_eq_status_entries - 1; + devp->rdev.hw_queue.t4_max_qp_depth = + devp->rdev.hw_queue.t4_max_rq_size; + devp->rdev.hw_queue.t4_max_cq_depth = + devp->rdev.hw_queue.t4_max_iq_size - 2; + devp->rdev.hw_queue.t4_stat_len = + devp->rdev.lldi.sge_egrstatuspagesize; + + /* + * For T5/T6 devices, we map all of BAR2 with WC. + * For T4 devices with onchip qp mem, we map only that part + * of BAR2 with WC. + */ + devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2); + if (!is_t4(devp->rdev.lldi.adapter_type)) { + devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa, + pci_resource_len(devp->rdev.lldi.pdev, 2)); + if (!devp->rdev.bar2_kva) { + pr_err("Unable to ioremap BAR2\n"); + ib_dealloc_device(&devp->ibdev); + return ERR_PTR(-EINVAL); + } + } else if (ocqp_supported(infop)) { + devp->rdev.oc_mw_pa = + pci_resource_start(devp->rdev.lldi.pdev, 2) + + pci_resource_len(devp->rdev.lldi.pdev, 2) - + roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size); + devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa, + devp->rdev.lldi.vr->ocq.size); + if (!devp->rdev.oc_mw_kva) { + pr_err("Unable to ioremap onchip mem\n"); + ib_dealloc_device(&devp->ibdev); + return ERR_PTR(-EINVAL); + } + } + + pr_debug("ocq memory: hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n", + devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size, + devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva); + + ret = c4iw_rdev_open(&devp->rdev); + if (ret) { + pr_err("Unable to open CXIO rdev err %d\n", ret); + ib_dealloc_device(&devp->ibdev); + return ERR_PTR(ret); + } + + xa_init_flags(&devp->cqs, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&devp->qps, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&devp->mrs, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&devp->hwtids, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&devp->atids, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&devp->stids, XA_FLAGS_LOCK_IRQ); + mutex_init(&devp->rdev.stats.lock); + mutex_init(&devp->db_mutex); + INIT_LIST_HEAD(&devp->db_fc_list); + init_waitqueue_head(&devp->wait); + devp->avail_ird = devp->rdev.lldi.max_ird_adapter; + + if (c4iw_debugfs_root) { + devp->debugfs_root = debugfs_create_dir( + pci_name(devp->rdev.lldi.pdev), + c4iw_debugfs_root); + setup_debugfs(devp); + } + + + return devp; +} + +static void *c4iw_uld_add(const struct cxgb4_lld_info *infop) +{ + struct uld_ctx *ctx; + static int vers_printed; + int i; + + if (!vers_printed++) + pr_info("Chelsio T4/T5 RDMA Driver - version %s\n", + DRV_VERSION); + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + ctx = ERR_PTR(-ENOMEM); + goto out; + } + ctx->lldi = *infop; + + pr_debug("found device %s nchan %u nrxq %u ntxq %u nports %u\n", + pci_name(ctx->lldi.pdev), + ctx->lldi.nchan, ctx->lldi.nrxq, + ctx->lldi.ntxq, ctx->lldi.nports); + + mutex_lock(&dev_mutex); + list_add_tail(&ctx->entry, &uld_ctx_list); + mutex_unlock(&dev_mutex); + + for (i = 0; i < ctx->lldi.nrxq; i++) + pr_debug("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]); +out: + return ctx; +} + +static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl, + const __be64 *rsp, + u32 pktshift) +{ + struct sk_buff *skb; + + /* + * Allocate space for cpl_pass_accept_req which will be synthesized by + * driver. Once the driver synthesizes the request the skb will go + * through the regular cpl_pass_accept_req processing. + * The math here assumes sizeof cpl_pass_accept_req >= sizeof + * cpl_rx_pkt. + */ + skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header) - pktshift, GFP_ATOMIC); + if (unlikely(!skb)) + return NULL; + + __skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header) - pktshift); + + /* + * This skb will contain: + * rss_header from the rspq descriptor (1 flit) + * cpl_rx_pkt struct from the rspq descriptor (2 flits) + * space for the difference between the size of an + * rx_pkt and pass_accept_req cpl (1 flit) + * the packet data from the gl + */ + skb_copy_to_linear_data(skb, rsp, sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header)); + skb_copy_to_linear_data_offset(skb, sizeof(struct rss_header) + + sizeof(struct cpl_pass_accept_req), + gl->va + pktshift, + gl->tot_len - pktshift); + return skb; +} + +static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl, + const __be64 *rsp) +{ + unsigned int opcode = *(u8 *)rsp; + struct sk_buff *skb; + + if (opcode != CPL_RX_PKT) + goto out; + + skb = copy_gl_to_skb_pkt(gl , rsp, dev->rdev.lldi.sge_pktshift); + if (skb == NULL) + goto out; + + if (c4iw_handlers[opcode] == NULL) { + pr_info("%s no handler opcode 0x%x...\n", __func__, opcode); + kfree_skb(skb); + goto out; + } + c4iw_handlers[opcode](dev, skb); + return 1; +out: + return 0; +} + +static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, + const struct pkt_gl *gl) +{ + struct uld_ctx *ctx = handle; + struct c4iw_dev *dev = ctx->dev; + struct sk_buff *skb; + u8 opcode; + + if (gl == NULL) { + /* omit RSS and rsp_ctrl at end of descriptor */ + unsigned int len = 64 - sizeof(struct rsp_ctrl) - 8; + + skb = alloc_skb(256, GFP_ATOMIC); + if (!skb) + goto nomem; + __skb_put(skb, len); + skb_copy_to_linear_data(skb, &rsp[1], len); + } else if (gl == CXGB4_MSG_AN) { + const struct rsp_ctrl *rc = (void *)rsp; + + u32 qid = be32_to_cpu(rc->pldbuflen_qid); + c4iw_ev_handler(dev, qid); + return 0; + } else if (unlikely(*(u8 *)rsp != *(u8 *)gl->va)) { + if (recv_rx_pkt(dev, gl, rsp)) + return 0; + + pr_info("%s: unexpected FL contents at %p, RSS %#llx, FL %#llx, len %u\n", + pci_name(ctx->lldi.pdev), gl->va, + be64_to_cpu(*rsp), + be64_to_cpu(*(__force __be64 *)gl->va), + gl->tot_len); + + return 0; + } else { + skb = cxgb4_pktgl_to_skb(gl, 128, 128); + if (unlikely(!skb)) + goto nomem; + } + + opcode = *(u8 *)rsp; + if (c4iw_handlers[opcode]) { + c4iw_handlers[opcode](dev, skb); + } else { + pr_info("%s no handler opcode 0x%x...\n", __func__, opcode); + kfree_skb(skb); + } + + return 0; +nomem: + return -1; +} + +static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state) +{ + struct uld_ctx *ctx = handle; + + pr_debug("new_state %u\n", new_state); + switch (new_state) { + case CXGB4_STATE_UP: + pr_info("%s: Up\n", pci_name(ctx->lldi.pdev)); + if (!ctx->dev) { + ctx->dev = c4iw_alloc(&ctx->lldi); + if (IS_ERR(ctx->dev)) { + pr_err("%s: initialization failed: %ld\n", + pci_name(ctx->lldi.pdev), + PTR_ERR(ctx->dev)); + ctx->dev = NULL; + break; + } + + INIT_WORK(&ctx->reg_work, c4iw_register_device); + queue_work(reg_workq, &ctx->reg_work); + } + break; + case CXGB4_STATE_DOWN: + pr_info("%s: Down\n", pci_name(ctx->lldi.pdev)); + if (ctx->dev) + c4iw_remove(ctx); + break; + case CXGB4_STATE_FATAL_ERROR: + case CXGB4_STATE_START_RECOVERY: + pr_info("%s: Fatal Error\n", pci_name(ctx->lldi.pdev)); + if (ctx->dev) { + struct ib_event event = {}; + + ctx->dev->rdev.flags |= T4_FATAL_ERROR; + event.event = IB_EVENT_DEVICE_FATAL; + event.device = &ctx->dev->ibdev; + ib_dispatch_event(&event); + c4iw_remove(ctx); + } + break; + case CXGB4_STATE_DETACH: + pr_info("%s: Detach\n", pci_name(ctx->lldi.pdev)); + if (ctx->dev) + c4iw_remove(ctx); + break; + } + return 0; +} + +static void stop_queues(struct uld_ctx *ctx) +{ + struct c4iw_qp *qp; + unsigned long index, flags; + + xa_lock_irqsave(&ctx->dev->qps, flags); + ctx->dev->rdev.stats.db_state_transitions++; + ctx->dev->db_state = STOPPED; + if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) { + xa_for_each(&ctx->dev->qps, index, qp) + t4_disable_wq_db(&qp->wq); + } else { + ctx->dev->rdev.status_page->db_off = 1; + } + xa_unlock_irqrestore(&ctx->dev->qps, flags); +} + +static void resume_rc_qp(struct c4iw_qp *qp) +{ + spin_lock(&qp->lock); + t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, NULL); + qp->wq.sq.wq_pidx_inc = 0; + t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, NULL); + qp->wq.rq.wq_pidx_inc = 0; + spin_unlock(&qp->lock); +} + +static void resume_a_chunk(struct uld_ctx *ctx) +{ + int i; + struct c4iw_qp *qp; + + for (i = 0; i < DB_FC_RESUME_SIZE; i++) { + qp = list_first_entry(&ctx->dev->db_fc_list, struct c4iw_qp, + db_fc_entry); + list_del_init(&qp->db_fc_entry); + resume_rc_qp(qp); + if (list_empty(&ctx->dev->db_fc_list)) + break; + } +} + +static void resume_queues(struct uld_ctx *ctx) +{ + xa_lock_irq(&ctx->dev->qps); + if (ctx->dev->db_state != STOPPED) + goto out; + ctx->dev->db_state = FLOW_CONTROL; + while (1) { + if (list_empty(&ctx->dev->db_fc_list)) { + struct c4iw_qp *qp; + unsigned long index; + + WARN_ON(ctx->dev->db_state != FLOW_CONTROL); + ctx->dev->db_state = NORMAL; + ctx->dev->rdev.stats.db_state_transitions++; + if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) { + xa_for_each(&ctx->dev->qps, index, qp) + t4_enable_wq_db(&qp->wq); + } else { + ctx->dev->rdev.status_page->db_off = 0; + } + break; + } else { + if (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) + < (ctx->dev->rdev.lldi.dbfifo_int_thresh << + DB_FC_DRAIN_THRESH)) { + resume_a_chunk(ctx); + } + if (!list_empty(&ctx->dev->db_fc_list)) { + xa_unlock_irq(&ctx->dev->qps); + if (DB_FC_RESUME_DELAY) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(DB_FC_RESUME_DELAY); + } + xa_lock_irq(&ctx->dev->qps); + if (ctx->dev->db_state != FLOW_CONTROL) + break; + } + } + } +out: + if (ctx->dev->db_state != NORMAL) + ctx->dev->rdev.stats.db_fc_interruptions++; + xa_unlock_irq(&ctx->dev->qps); +} + +struct qp_list { + unsigned idx; + struct c4iw_qp **qps; +}; + +static void deref_qps(struct qp_list *qp_list) +{ + int idx; + + for (idx = 0; idx < qp_list->idx; idx++) + c4iw_qp_rem_ref(&qp_list->qps[idx]->ibqp); +} + +static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) +{ + int idx; + int ret; + + for (idx = 0; idx < qp_list->idx; idx++) { + struct c4iw_qp *qp = qp_list->qps[idx]; + + xa_lock_irq(&qp->rhp->qps); + spin_lock(&qp->lock); + ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0], + qp->wq.sq.qid, + t4_sq_host_wq_pidx(&qp->wq), + t4_sq_wq_size(&qp->wq)); + if (ret) { + pr_err("%s: Fatal error - DB overflow recovery failed - error syncing SQ qid %u\n", + pci_name(ctx->lldi.pdev), qp->wq.sq.qid); + spin_unlock(&qp->lock); + xa_unlock_irq(&qp->rhp->qps); + return; + } + qp->wq.sq.wq_pidx_inc = 0; + + ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0], + qp->wq.rq.qid, + t4_rq_host_wq_pidx(&qp->wq), + t4_rq_wq_size(&qp->wq)); + + if (ret) { + pr_err("%s: Fatal error - DB overflow recovery failed - error syncing RQ qid %u\n", + pci_name(ctx->lldi.pdev), qp->wq.rq.qid); + spin_unlock(&qp->lock); + xa_unlock_irq(&qp->rhp->qps); + return; + } + qp->wq.rq.wq_pidx_inc = 0; + spin_unlock(&qp->lock); + xa_unlock_irq(&qp->rhp->qps); + + /* Wait for the dbfifo to drain */ + while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(10)); + } + } +} + +static void recover_queues(struct uld_ctx *ctx) +{ + struct c4iw_qp *qp; + unsigned long index; + int count = 0; + struct qp_list qp_list; + int ret; + + /* slow everybody down */ + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(1000)); + + /* flush the SGE contexts */ + ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]); + if (ret) { + pr_err("%s: Fatal error - DB overflow recovery failed\n", + pci_name(ctx->lldi.pdev)); + return; + } + + /* Count active queues so we can build a list of queues to recover */ + xa_lock_irq(&ctx->dev->qps); + WARN_ON(ctx->dev->db_state != STOPPED); + ctx->dev->db_state = RECOVERY; + xa_for_each(&ctx->dev->qps, index, qp) + count++; + + qp_list.qps = kcalloc(count, sizeof(*qp_list.qps), GFP_ATOMIC); + if (!qp_list.qps) { + xa_unlock_irq(&ctx->dev->qps); + return; + } + qp_list.idx = 0; + + /* add and ref each qp so it doesn't get freed */ + xa_for_each(&ctx->dev->qps, index, qp) { + c4iw_qp_add_ref(&qp->ibqp); + qp_list.qps[qp_list.idx++] = qp; + } + + xa_unlock_irq(&ctx->dev->qps); + + /* now traverse the list in a safe context to recover the db state*/ + recover_lost_dbs(ctx, &qp_list); + + /* we're almost done! deref the qps and clean up */ + deref_qps(&qp_list); + kfree(qp_list.qps); + + xa_lock_irq(&ctx->dev->qps); + WARN_ON(ctx->dev->db_state != RECOVERY); + ctx->dev->db_state = STOPPED; + xa_unlock_irq(&ctx->dev->qps); +} + +static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) +{ + struct uld_ctx *ctx = handle; + + switch (control) { + case CXGB4_CONTROL_DB_FULL: + stop_queues(ctx); + ctx->dev->rdev.stats.db_full++; + break; + case CXGB4_CONTROL_DB_EMPTY: + resume_queues(ctx); + mutex_lock(&ctx->dev->rdev.stats.lock); + ctx->dev->rdev.stats.db_empty++; + mutex_unlock(&ctx->dev->rdev.stats.lock); + break; + case CXGB4_CONTROL_DB_DROP: + recover_queues(ctx); + mutex_lock(&ctx->dev->rdev.stats.lock); + ctx->dev->rdev.stats.db_drop++; + mutex_unlock(&ctx->dev->rdev.stats.lock); + break; + default: + pr_warn("%s: unknown control cmd %u\n", + pci_name(ctx->lldi.pdev), control); + break; + } + return 0; +} + +static struct cxgb4_uld_info c4iw_uld_info = { + .name = DRV_NAME, + .nrxq = MAX_ULD_QSETS, + .ntxq = MAX_ULD_QSETS, + .rxq_size = 511, + .ciq = true, + .lro = false, + .add = c4iw_uld_add, + .rx_handler = c4iw_uld_rx_handler, + .state_change = c4iw_uld_state_change, + .control = c4iw_uld_control, +}; + +void _c4iw_free_wr_wait(struct kref *kref) +{ + struct c4iw_wr_wait *wr_waitp; + + wr_waitp = container_of(kref, struct c4iw_wr_wait, kref); + pr_debug("Free wr_wait %p\n", wr_waitp); + kfree(wr_waitp); +} + +struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp) +{ + struct c4iw_wr_wait *wr_waitp; + + wr_waitp = kzalloc(sizeof(*wr_waitp), gfp); + if (wr_waitp) { + kref_init(&wr_waitp->kref); + pr_debug("wr_wait %p\n", wr_waitp); + } + return wr_waitp; +} + +static int __init c4iw_init_module(void) +{ + int err; + + err = c4iw_cm_init(); + if (err) + return err; + + c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL); + + reg_workq = create_singlethread_workqueue("Register_iWARP_device"); + if (!reg_workq) { + pr_err("Failed creating workqueue to register iwarp device\n"); + return -ENOMEM; + } + + cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info); + + return 0; +} + +static void __exit c4iw_exit_module(void) +{ + struct uld_ctx *ctx, *tmp; + + mutex_lock(&dev_mutex); + list_for_each_entry_safe(ctx, tmp, &uld_ctx_list, entry) { + if (ctx->dev) + c4iw_remove(ctx); + kfree(ctx); + } + mutex_unlock(&dev_mutex); + destroy_workqueue(reg_workq); + cxgb4_unregister_uld(CXGB4_ULD_RDMA); + c4iw_cm_term(); + debugfs_remove_recursive(c4iw_debugfs_root); +} + +module_init(c4iw_init_module); +module_exit(c4iw_exit_module); diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c new file mode 100644 index 000000000..34211a533 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/slab.h> +#include <linux/mman.h> +#include <net/sock.h> + +#include "iw_cxgb4.h" + +static void print_tpte(struct c4iw_dev *dev, u32 stag) +{ + int ret; + struct fw_ri_tpte tpte; + + ret = cxgb4_read_tpte(dev->rdev.lldi.ports[0], stag, + (__be32 *)&tpte); + if (ret) { + dev_err(&dev->rdev.lldi.pdev->dev, + "%s cxgb4_read_tpte err %d\n", __func__, ret); + return; + } + pr_debug("stag idx 0x%x valid %d key 0x%x state %d pdid %d perm 0x%x ps %d len 0x%llx va 0x%llx\n", + stag & 0xffffff00, + FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid)), + FW_RI_TPTE_STAGKEY_G(ntohl(tpte.valid_to_pdid)), + FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid)), + FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid)), + FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid)), + FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid)), + ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo), + ((u64)ntohl(tpte.va_hi) << 32) | ntohl(tpte.va_lo_fbo)); +} + +static void dump_err_cqe(struct c4iw_dev *dev, struct t4_cqe *err_cqe) +{ + __be64 *p = (void *)err_cqe; + + dev_err(&dev->rdev.lldi.pdev->dev, + "AE qpid %d opcode %d status 0x%x " + "type %d len 0x%x wrid.hi 0x%x wrid.lo 0x%x\n", + CQE_QPID(err_cqe), CQE_OPCODE(err_cqe), + CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), ntohl(err_cqe->len), + CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); + + pr_debug("%016llx %016llx %016llx %016llx - %016llx %016llx %016llx %016llx\n", + be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]), + be64_to_cpu(p[3]), be64_to_cpu(p[4]), be64_to_cpu(p[5]), + be64_to_cpu(p[6]), be64_to_cpu(p[7])); + + /* + * Ingress WRITE and READ_RESP errors provide + * the offending stag, so parse and log it. + */ + if (RQ_TYPE(err_cqe) && (CQE_OPCODE(err_cqe) == FW_RI_RDMA_WRITE || + CQE_OPCODE(err_cqe) == FW_RI_READ_RESP)) + print_tpte(dev, CQE_WRID_STAG(err_cqe)); +} + +static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp, + struct c4iw_qp *qhp, + struct t4_cqe *err_cqe, + enum ib_event_type ib_event) +{ + struct ib_event event; + struct c4iw_qp_attributes attrs; + unsigned long flag; + + dump_err_cqe(dev, err_cqe); + + if (qhp->attr.state == C4IW_QP_STATE_RTS) { + attrs.next_state = C4IW_QP_STATE_TERMINATE; + c4iw_modify_qp(qhp->rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, + &attrs, 0); + } + + event.event = ib_event; + event.device = chp->ibcq.device; + if (ib_event == IB_EVENT_CQ_ERR) + event.element.cq = &chp->ibcq; + else + event.element.qp = &qhp->ibqp; + if (qhp->ibqp.event_handler) + (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); + + if (t4_clear_cq_armed(&chp->cq)) { + spin_lock_irqsave(&chp->comp_handler_lock, flag); + (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); + } +} + +void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) +{ + struct c4iw_cq *chp; + struct c4iw_qp *qhp; + u32 cqid; + + xa_lock_irq(&dev->qps); + qhp = xa_load(&dev->qps, CQE_QPID(err_cqe)); + if (!qhp) { + pr_err("BAD AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", + CQE_QPID(err_cqe), + CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe), + CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe), + CQE_WRID_LOW(err_cqe)); + xa_unlock_irq(&dev->qps); + goto out; + } + + if (SQ_TYPE(err_cqe)) + cqid = qhp->attr.scq; + else + cqid = qhp->attr.rcq; + chp = get_chp(dev, cqid); + if (!chp) { + pr_err("BAD AE cqid 0x%x qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", + cqid, CQE_QPID(err_cqe), + CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe), + CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe), + CQE_WRID_LOW(err_cqe)); + xa_unlock_irq(&dev->qps); + goto out; + } + + c4iw_qp_add_ref(&qhp->ibqp); + refcount_inc(&chp->refcnt); + xa_unlock_irq(&dev->qps); + + /* Bad incoming write */ + if (RQ_TYPE(err_cqe) && + (CQE_OPCODE(err_cqe) == FW_RI_RDMA_WRITE)) { + post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_REQ_ERR); + goto done; + } + + switch (CQE_STATUS(err_cqe)) { + + /* Completion Events */ + case T4_ERR_SUCCESS: + pr_err("AE with status 0!\n"); + break; + + case T4_ERR_STAG: + case T4_ERR_PDID: + case T4_ERR_QPID: + case T4_ERR_ACCESS: + case T4_ERR_WRAP: + case T4_ERR_BOUND: + case T4_ERR_INVALIDATE_SHARED_MR: + case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND: + post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_ACCESS_ERR); + break; + + /* Device Fatal Errors */ + case T4_ERR_ECC: + case T4_ERR_ECC_PSTAG: + case T4_ERR_INTERNAL_ERR: + post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_DEVICE_FATAL); + break; + + /* QP Fatal Errors */ + case T4_ERR_OUT_OF_RQE: + case T4_ERR_PBL_ADDR_BOUND: + case T4_ERR_CRC: + case T4_ERR_MARKER: + case T4_ERR_PDU_LEN_ERR: + case T4_ERR_DDP_VERSION: + case T4_ERR_RDMA_VERSION: + case T4_ERR_OPCODE: + case T4_ERR_DDP_QUEUE_NUM: + case T4_ERR_MSN: + case T4_ERR_TBIT: + case T4_ERR_MO: + case T4_ERR_MSN_GAP: + case T4_ERR_MSN_RANGE: + case T4_ERR_RQE_ADDR_BOUND: + case T4_ERR_IRD_OVERFLOW: + post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_FATAL); + break; + + default: + pr_err("Unknown T4 status 0x%x QPID 0x%x\n", + CQE_STATUS(err_cqe), qhp->wq.sq.qid); + post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_FATAL); + break; + } +done: + c4iw_cq_rem_ref(chp); + c4iw_qp_rem_ref(&qhp->ibqp); +out: + return; +} + +int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) +{ + struct c4iw_cq *chp; + unsigned long flag; + + xa_lock_irqsave(&dev->cqs, flag); + chp = xa_load(&dev->cqs, qid); + if (chp) { + refcount_inc(&chp->refcnt); + xa_unlock_irqrestore(&dev->cqs, flag); + t4_clear_cq_armed(&chp->cq); + spin_lock_irqsave(&chp->comp_handler_lock, flag); + (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); + c4iw_cq_rem_ref(chp); + } else { + pr_debug("unknown cqid 0x%x\n", qid); + xa_unlock_irqrestore(&dev->cqs, flag); + } + return 0; +} diff --git a/drivers/infiniband/hw/cxgb4/id_table.c b/drivers/infiniband/hw/cxgb4/id_table.c new file mode 100644 index 000000000..280d61466 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/id_table.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2011 Chelsio Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/kernel.h> +#include <linux/random.h> +#include "iw_cxgb4.h" + +#define RANDOM_SKIP 16 + +/* + * Trivial bitmap-based allocator. If the random flag is set, the + * allocator is designed to: + * - pseudo-randomize the id returned such that it is not trivially predictable. + * - avoid reuse of recently used id (at the expense of predictability) + */ +u32 c4iw_id_alloc(struct c4iw_id_table *alloc) +{ + unsigned long flags; + u32 obj; + + spin_lock_irqsave(&alloc->lock, flags); + + obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last); + if (obj >= alloc->max) + obj = find_first_zero_bit(alloc->table, alloc->max); + + if (obj < alloc->max) { + if (alloc->flags & C4IW_ID_TABLE_F_RANDOM) + alloc->last += prandom_u32_max(RANDOM_SKIP); + else + alloc->last = obj + 1; + if (alloc->last >= alloc->max) + alloc->last = 0; + __set_bit(obj, alloc->table); + obj += alloc->start; + } else + obj = -1; + + spin_unlock_irqrestore(&alloc->lock, flags); + return obj; +} + +void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj) +{ + unsigned long flags; + + obj -= alloc->start; + + spin_lock_irqsave(&alloc->lock, flags); + __clear_bit(obj, alloc->table); + spin_unlock_irqrestore(&alloc->lock, flags); +} + +int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num, + u32 reserved, u32 flags) +{ + alloc->start = start; + alloc->flags = flags; + if (flags & C4IW_ID_TABLE_F_RANDOM) + alloc->last = prandom_u32_max(RANDOM_SKIP); + else + alloc->last = 0; + alloc->max = num; + spin_lock_init(&alloc->lock); + alloc->table = bitmap_zalloc(num, GFP_KERNEL); + if (!alloc->table) + return -ENOMEM; + + if (!(alloc->flags & C4IW_ID_TABLE_F_EMPTY)) + bitmap_set(alloc->table, 0, reserved); + + return 0; +} + +void c4iw_id_table_free(struct c4iw_id_table *alloc) +{ + bitmap_free(alloc->table); +} diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h new file mode 100644 index 000000000..50cb2259b --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -0,0 +1,1046 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __IW_CXGB4_H__ +#define __IW_CXGB4_H__ + +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/xarray.h> +#include <linux/completion.h> +#include <linux/netdevice.h> +#include <linux/sched/mm.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/inet.h> +#include <linux/wait.h> +#include <linux/kref.h> +#include <linux/timer.h> +#include <linux/io.h> +#include <linux/workqueue.h> + +#include <asm/byteorder.h> + +#include <net/net_namespace.h> + +#include <rdma/ib_verbs.h> +#include <rdma/iw_cm.h> +#include <rdma/rdma_netlink.h> +#include <rdma/iw_portmap.h> +#include <rdma/restrack.h> + +#include "cxgb4.h" +#include "cxgb4_uld.h" +#include "l2t.h" +#include <rdma/cxgb4-abi.h> + +#define DRV_NAME "iw_cxgb4" +#define MOD DRV_NAME ":" + +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "t4.h" + +#define PBL_OFF(rdev_p, a) ((a) - (rdev_p)->lldi.vr->pbl.start) +#define RQT_OFF(rdev_p, a) ((a) - (rdev_p)->lldi.vr->rq.start) + +static inline void *cplhdr(struct sk_buff *skb) +{ + return skb->data; +} + +#define C4IW_ID_TABLE_F_RANDOM 1 /* Pseudo-randomize the id's returned */ +#define C4IW_ID_TABLE_F_EMPTY 2 /* Table is initially empty */ + +struct c4iw_id_table { + u32 flags; + u32 start; /* logical minimal id */ + u32 last; /* hint for find */ + u32 max; + spinlock_t lock; + unsigned long *table; +}; + +struct c4iw_resource { + struct c4iw_id_table tpt_table; + struct c4iw_id_table qid_table; + struct c4iw_id_table pdid_table; + struct c4iw_id_table srq_table; +}; + +struct c4iw_qid_list { + struct list_head entry; + u32 qid; +}; + +struct c4iw_dev_ucontext { + struct list_head qpids; + struct list_head cqids; + struct mutex lock; + struct kref kref; +}; + +enum c4iw_rdev_flags { + T4_FATAL_ERROR = (1<<0), + T4_STATUS_PAGE_DISABLED = (1<<1), +}; + +struct c4iw_stat { + u64 total; + u64 cur; + u64 max; + u64 fail; +}; + +struct c4iw_stats { + struct mutex lock; + struct c4iw_stat qid; + struct c4iw_stat pd; + struct c4iw_stat stag; + struct c4iw_stat pbl; + struct c4iw_stat rqt; + struct c4iw_stat srqt; + struct c4iw_stat srq; + struct c4iw_stat ocqp; + u64 db_full; + u64 db_empty; + u64 db_drop; + u64 db_state_transitions; + u64 db_fc_interruptions; + u64 tcam_full; + u64 act_ofld_conn_fails; + u64 pas_ofld_conn_fails; + u64 neg_adv; +}; + +struct c4iw_hw_queue { + int t4_eq_status_entries; + int t4_max_eq_size; + int t4_max_iq_size; + int t4_max_rq_size; + int t4_max_sq_size; + int t4_max_qp_depth; + int t4_max_cq_depth; + int t4_stat_len; +}; + +struct wr_log_entry { + ktime_t post_host_time; + ktime_t poll_host_time; + u64 post_sge_ts; + u64 cqe_sge_ts; + u64 poll_sge_ts; + u16 qid; + u16 wr_id; + u8 opcode; + u8 valid; +}; + +struct c4iw_rdev { + struct c4iw_resource resource; + u32 qpmask; + u32 cqmask; + struct c4iw_dev_ucontext uctx; + struct gen_pool *pbl_pool; + struct gen_pool *rqt_pool; + struct gen_pool *ocqp_pool; + u32 flags; + struct cxgb4_lld_info lldi; + unsigned long bar2_pa; + void __iomem *bar2_kva; + unsigned long oc_mw_pa; + void __iomem *oc_mw_kva; + struct c4iw_stats stats; + struct c4iw_hw_queue hw_queue; + struct t4_dev_status_page *status_page; + atomic_t wr_log_idx; + struct wr_log_entry *wr_log; + int wr_log_size; + struct workqueue_struct *free_workq; + struct completion rqt_compl; + struct completion pbl_compl; + struct kref rqt_kref; + struct kref pbl_kref; +}; + +static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) +{ + return rdev->flags & T4_FATAL_ERROR; +} + +static inline int c4iw_num_stags(struct c4iw_rdev *rdev) +{ + return (int)(rdev->lldi.vr->stag.size >> 5); +} + +#define C4IW_WR_TO (60*HZ) + +struct c4iw_wr_wait { + struct completion completion; + int ret; + struct kref kref; +}; + +void _c4iw_free_wr_wait(struct kref *kref); + +static inline void c4iw_put_wr_wait(struct c4iw_wr_wait *wr_waitp) +{ + pr_debug("wr_wait %p ref before put %u\n", wr_waitp, + kref_read(&wr_waitp->kref)); + WARN_ON(kref_read(&wr_waitp->kref) == 0); + kref_put(&wr_waitp->kref, _c4iw_free_wr_wait); +} + +static inline void c4iw_get_wr_wait(struct c4iw_wr_wait *wr_waitp) +{ + pr_debug("wr_wait %p ref before get %u\n", wr_waitp, + kref_read(&wr_waitp->kref)); + WARN_ON(kref_read(&wr_waitp->kref) == 0); + kref_get(&wr_waitp->kref); +} + +static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp) +{ + wr_waitp->ret = 0; + init_completion(&wr_waitp->completion); +} + +static inline void _c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret, + bool deref) +{ + wr_waitp->ret = ret; + complete(&wr_waitp->completion); + if (deref) + c4iw_put_wr_wait(wr_waitp); +} + +static inline void c4iw_wake_up_noref(struct c4iw_wr_wait *wr_waitp, int ret) +{ + _c4iw_wake_up(wr_waitp, ret, false); +} + +static inline void c4iw_wake_up_deref(struct c4iw_wr_wait *wr_waitp, int ret) +{ + _c4iw_wake_up(wr_waitp, ret, true); +} + +static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, + struct c4iw_wr_wait *wr_waitp, + u32 hwtid, u32 qpid, + const char *func) +{ + int ret; + + if (c4iw_fatal_error(rdev)) { + wr_waitp->ret = -EIO; + goto out; + } + + ret = wait_for_completion_timeout(&wr_waitp->completion, C4IW_WR_TO); + if (!ret) { + pr_err("%s - Device %s not responding (disabling device) - tid %u qpid %u\n", + func, pci_name(rdev->lldi.pdev), hwtid, qpid); + rdev->flags |= T4_FATAL_ERROR; + wr_waitp->ret = -EIO; + goto out; + } + if (wr_waitp->ret) + pr_debug("%s: FW reply %d tid %u qpid %u\n", + pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid); +out: + return wr_waitp->ret; +} + +int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb); + +static inline int c4iw_ref_send_wait(struct c4iw_rdev *rdev, + struct sk_buff *skb, + struct c4iw_wr_wait *wr_waitp, + u32 hwtid, u32 qpid, + const char *func) +{ + int ret; + + pr_debug("%s wr_wait %p hwtid %u qpid %u\n", func, wr_waitp, hwtid, + qpid); + c4iw_get_wr_wait(wr_waitp); + ret = c4iw_ofld_send(rdev, skb); + if (ret) { + c4iw_put_wr_wait(wr_waitp); + return ret; + } + return c4iw_wait_for_reply(rdev, wr_waitp, hwtid, qpid, func); +} + +enum db_state { + NORMAL = 0, + FLOW_CONTROL = 1, + RECOVERY = 2, + STOPPED = 3 +}; + +struct c4iw_dev { + struct ib_device ibdev; + struct c4iw_rdev rdev; + struct xarray cqs; + struct xarray qps; + struct xarray mrs; + struct mutex db_mutex; + struct dentry *debugfs_root; + enum db_state db_state; + struct xarray hwtids; + struct xarray atids; + struct xarray stids; + struct list_head db_fc_list; + u32 avail_ird; + wait_queue_head_t wait; +}; + +struct uld_ctx { + struct list_head entry; + struct cxgb4_lld_info lldi; + struct c4iw_dev *dev; + struct work_struct reg_work; +}; + +static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct c4iw_dev, ibdev); +} + +static inline struct c4iw_cq *get_chp(struct c4iw_dev *rhp, u32 cqid) +{ + return xa_load(&rhp->cqs, cqid); +} + +static inline struct c4iw_qp *get_qhp(struct c4iw_dev *rhp, u32 qpid) +{ + return xa_load(&rhp->qps, qpid); +} + +extern uint c4iw_max_read_depth; + +static inline int cur_max_read_depth(struct c4iw_dev *dev) +{ + return min(dev->rdev.lldi.max_ordird_qp, c4iw_max_read_depth); +} + +struct c4iw_pd { + struct ib_pd ibpd; + u32 pdid; + struct c4iw_dev *rhp; +}; + +static inline struct c4iw_pd *to_c4iw_pd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct c4iw_pd, ibpd); +} + +struct tpt_attributes { + u64 len; + u64 va_fbo; + enum fw_ri_mem_perms perms; + u32 stag; + u32 pdid; + u32 qpid; + u32 pbl_addr; + u32 pbl_size; + u32 state:1; + u32 type:2; + u32 rsvd:1; + u32 remote_invaliate_disable:1; + u32 zbva:1; + u32 mw_bind_enable:1; + u32 page_size:5; +}; + +struct c4iw_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + struct c4iw_dev *rhp; + struct sk_buff *dereg_skb; + u64 kva; + struct tpt_attributes attr; + u64 *mpl; + dma_addr_t mpl_addr; + u32 max_mpl_len; + u32 mpl_len; + struct c4iw_wr_wait *wr_waitp; +}; + +static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct c4iw_mr, ibmr); +} + +struct c4iw_mw { + struct ib_mw ibmw; + struct c4iw_dev *rhp; + struct sk_buff *dereg_skb; + u64 kva; + struct tpt_attributes attr; + struct c4iw_wr_wait *wr_waitp; +}; + +static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw) +{ + return container_of(ibmw, struct c4iw_mw, ibmw); +} + +struct c4iw_cq { + struct ib_cq ibcq; + struct c4iw_dev *rhp; + struct sk_buff *destroy_skb; + struct t4_cq cq; + spinlock_t lock; + spinlock_t comp_handler_lock; + refcount_t refcnt; + struct completion cq_rel_comp; + struct c4iw_wr_wait *wr_waitp; +}; + +static inline struct c4iw_cq *to_c4iw_cq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct c4iw_cq, ibcq); +} + +struct c4iw_mpa_attributes { + u8 initiator; + u8 recv_marker_enabled; + u8 xmit_marker_enabled; + u8 crc_enabled; + u8 enhanced_rdma_conn; + u8 version; + u8 p2p_type; +}; + +struct c4iw_qp_attributes { + u32 scq; + u32 rcq; + u32 sq_num_entries; + u32 rq_num_entries; + u32 sq_max_sges; + u32 sq_max_sges_rdma_write; + u32 rq_max_sges; + u32 state; + u8 enable_rdma_read; + u8 enable_rdma_write; + u8 enable_bind; + u8 enable_mmid0_fastreg; + u32 max_ord; + u32 max_ird; + u32 pd; + u32 next_state; + char terminate_buffer[52]; + u32 terminate_msg_len; + u8 is_terminate_local; + struct c4iw_mpa_attributes mpa_attr; + struct c4iw_ep *llp_stream_handle; + u8 layer_etype; + u8 ecode; + u16 sq_db_inc; + u16 rq_db_inc; + u8 send_term; +}; + +struct c4iw_qp { + struct ib_qp ibqp; + struct list_head db_fc_entry; + struct c4iw_dev *rhp; + struct c4iw_ep *ep; + struct c4iw_qp_attributes attr; + struct t4_wq wq; + spinlock_t lock; + struct mutex mutex; + wait_queue_head_t wait; + int sq_sig_all; + struct c4iw_srq *srq; + struct c4iw_ucontext *ucontext; + struct c4iw_wr_wait *wr_waitp; + struct completion qp_rel_comp; + refcount_t qp_refcnt; +}; + +static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct c4iw_qp, ibqp); +} + +struct c4iw_srq { + struct ib_srq ibsrq; + struct list_head db_fc_entry; + struct c4iw_dev *rhp; + struct t4_srq wq; + struct sk_buff *destroy_skb; + u32 srq_limit; + u32 pdid; + int idx; + u32 flags; + spinlock_t lock; /* protects srq */ + struct c4iw_wr_wait *wr_waitp; + bool armed; +}; + +static inline struct c4iw_srq *to_c4iw_srq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct c4iw_srq, ibsrq); +} + +struct c4iw_ucontext { + struct ib_ucontext ibucontext; + struct c4iw_dev_ucontext uctx; + u32 key; + spinlock_t mmap_lock; + struct list_head mmaps; + bool is_32b_cqe; +}; + +static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) +{ + return container_of(c, struct c4iw_ucontext, ibucontext); +} + +struct c4iw_mm_entry { + struct list_head entry; + u64 addr; + u32 key; + unsigned len; +}; + +static inline struct c4iw_mm_entry *remove_mmap(struct c4iw_ucontext *ucontext, + u32 key, unsigned len) +{ + struct list_head *pos, *nxt; + struct c4iw_mm_entry *mm; + + spin_lock(&ucontext->mmap_lock); + list_for_each_safe(pos, nxt, &ucontext->mmaps) { + + mm = list_entry(pos, struct c4iw_mm_entry, entry); + if (mm->key == key && mm->len == len) { + list_del_init(&mm->entry); + spin_unlock(&ucontext->mmap_lock); + pr_debug("key 0x%x addr 0x%llx len %d\n", key, + (unsigned long long)mm->addr, mm->len); + return mm; + } + } + spin_unlock(&ucontext->mmap_lock); + return NULL; +} + +static inline void insert_mmap(struct c4iw_ucontext *ucontext, + struct c4iw_mm_entry *mm) +{ + spin_lock(&ucontext->mmap_lock); + pr_debug("key 0x%x addr 0x%llx len %d\n", + mm->key, (unsigned long long)mm->addr, mm->len); + list_add_tail(&mm->entry, &ucontext->mmaps); + spin_unlock(&ucontext->mmap_lock); +} + +enum c4iw_qp_attr_mask { + C4IW_QP_ATTR_NEXT_STATE = 1 << 0, + C4IW_QP_ATTR_SQ_DB = 1<<1, + C4IW_QP_ATTR_RQ_DB = 1<<2, + C4IW_QP_ATTR_ENABLE_RDMA_READ = 1 << 7, + C4IW_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8, + C4IW_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9, + C4IW_QP_ATTR_MAX_ORD = 1 << 11, + C4IW_QP_ATTR_MAX_IRD = 1 << 12, + C4IW_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22, + C4IW_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23, + C4IW_QP_ATTR_MPA_ATTR = 1 << 24, + C4IW_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25, + C4IW_QP_ATTR_VALID_MODIFY = (C4IW_QP_ATTR_ENABLE_RDMA_READ | + C4IW_QP_ATTR_ENABLE_RDMA_WRITE | + C4IW_QP_ATTR_MAX_ORD | + C4IW_QP_ATTR_MAX_IRD | + C4IW_QP_ATTR_LLP_STREAM_HANDLE | + C4IW_QP_ATTR_STREAM_MSG_BUFFER | + C4IW_QP_ATTR_MPA_ATTR | + C4IW_QP_ATTR_QP_CONTEXT_ACTIVATE) +}; + +int c4iw_modify_qp(struct c4iw_dev *rhp, + struct c4iw_qp *qhp, + enum c4iw_qp_attr_mask mask, + struct c4iw_qp_attributes *attrs, + int internal); + +enum c4iw_qp_state { + C4IW_QP_STATE_IDLE, + C4IW_QP_STATE_RTS, + C4IW_QP_STATE_ERROR, + C4IW_QP_STATE_TERMINATE, + C4IW_QP_STATE_CLOSING, + C4IW_QP_STATE_TOT +}; + +static inline int c4iw_convert_state(enum ib_qp_state ib_state) +{ + switch (ib_state) { + case IB_QPS_RESET: + case IB_QPS_INIT: + return C4IW_QP_STATE_IDLE; + case IB_QPS_RTS: + return C4IW_QP_STATE_RTS; + case IB_QPS_SQD: + return C4IW_QP_STATE_CLOSING; + case IB_QPS_SQE: + return C4IW_QP_STATE_TERMINATE; + case IB_QPS_ERR: + return C4IW_QP_STATE_ERROR; + default: + return -1; + } +} + +static inline int to_ib_qp_state(int c4iw_qp_state) +{ + switch (c4iw_qp_state) { + case C4IW_QP_STATE_IDLE: + return IB_QPS_INIT; + case C4IW_QP_STATE_RTS: + return IB_QPS_RTS; + case C4IW_QP_STATE_CLOSING: + return IB_QPS_SQD; + case C4IW_QP_STATE_TERMINATE: + return IB_QPS_SQE; + case C4IW_QP_STATE_ERROR: + return IB_QPS_ERR; + } + return IB_QPS_ERR; +} + +static inline u32 c4iw_ib_to_tpt_access(int a) +{ + return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | + (a & IB_ACCESS_REMOTE_READ ? FW_RI_MEM_ACCESS_REM_READ : 0) | + (a & IB_ACCESS_LOCAL_WRITE ? FW_RI_MEM_ACCESS_LOCAL_WRITE : 0) | + FW_RI_MEM_ACCESS_LOCAL_READ; +} + +enum c4iw_mmid_state { + C4IW_STAG_STATE_VALID, + C4IW_STAG_STATE_INVALID +}; + +#define C4IW_NODE_DESC "cxgb4 Chelsio Communications" + +#define MPA_KEY_REQ "MPA ID Req Frame" +#define MPA_KEY_REP "MPA ID Rep Frame" + +#define MPA_MAX_PRIVATE_DATA 256 +#define MPA_ENHANCED_RDMA_CONN 0x10 +#define MPA_REJECT 0x20 +#define MPA_CRC 0x40 +#define MPA_MARKERS 0x80 +#define MPA_FLAGS_MASK 0xE0 + +#define MPA_V2_PEER2PEER_MODEL 0x8000 +#define MPA_V2_ZERO_LEN_FPDU_RTR 0x4000 +#define MPA_V2_RDMA_WRITE_RTR 0x8000 +#define MPA_V2_RDMA_READ_RTR 0x4000 +#define MPA_V2_IRD_ORD_MASK 0x3FFF + +#define c4iw_put_ep(ep) { \ + pr_debug("put_ep ep %p refcnt %d\n", \ + ep, kref_read(&((ep)->kref))); \ + WARN_ON(kref_read(&((ep)->kref)) < 1); \ + kref_put(&((ep)->kref), _c4iw_free_ep); \ +} + +#define c4iw_get_ep(ep) { \ + pr_debug("get_ep ep %p, refcnt %d\n", \ + ep, kref_read(&((ep)->kref))); \ + kref_get(&((ep)->kref)); \ +} +void _c4iw_free_ep(struct kref *kref); + +struct mpa_message { + u8 key[16]; + u8 flags; + u8 revision; + __be16 private_data_size; + u8 private_data[]; +}; + +struct mpa_v2_conn_params { + __be16 ird; + __be16 ord; +}; + +struct terminate_message { + u8 layer_etype; + u8 ecode; + __be16 hdrct_rsvd; + u8 len_hdrs[]; +}; + +#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28) + +enum c4iw_layers_types { + LAYER_RDMAP = 0x00, + LAYER_DDP = 0x10, + LAYER_MPA = 0x20, + RDMAP_LOCAL_CATA = 0x00, + RDMAP_REMOTE_PROT = 0x01, + RDMAP_REMOTE_OP = 0x02, + DDP_LOCAL_CATA = 0x00, + DDP_TAGGED_ERR = 0x01, + DDP_UNTAGGED_ERR = 0x02, + DDP_LLP = 0x03 +}; + +enum c4iw_rdma_ecodes { + RDMAP_INV_STAG = 0x00, + RDMAP_BASE_BOUNDS = 0x01, + RDMAP_ACC_VIOL = 0x02, + RDMAP_STAG_NOT_ASSOC = 0x03, + RDMAP_TO_WRAP = 0x04, + RDMAP_INV_VERS = 0x05, + RDMAP_INV_OPCODE = 0x06, + RDMAP_STREAM_CATA = 0x07, + RDMAP_GLOBAL_CATA = 0x08, + RDMAP_CANT_INV_STAG = 0x09, + RDMAP_UNSPECIFIED = 0xff +}; + +enum c4iw_ddp_ecodes { + DDPT_INV_STAG = 0x00, + DDPT_BASE_BOUNDS = 0x01, + DDPT_STAG_NOT_ASSOC = 0x02, + DDPT_TO_WRAP = 0x03, + DDPT_INV_VERS = 0x04, + DDPU_INV_QN = 0x01, + DDPU_INV_MSN_NOBUF = 0x02, + DDPU_INV_MSN_RANGE = 0x03, + DDPU_INV_MO = 0x04, + DDPU_MSG_TOOBIG = 0x05, + DDPU_INV_VERS = 0x06 +}; + +enum c4iw_mpa_ecodes { + MPA_CRC_ERR = 0x02, + MPA_MARKER_ERR = 0x03, + MPA_LOCAL_CATA = 0x05, + MPA_INSUFF_IRD = 0x06, + MPA_NOMATCH_RTR = 0x07, +}; + +enum c4iw_ep_state { + IDLE = 0, + LISTEN, + CONNECTING, + MPA_REQ_WAIT, + MPA_REQ_SENT, + MPA_REQ_RCVD, + MPA_REP_SENT, + FPDU_MODE, + ABORTING, + CLOSING, + MORIBUND, + DEAD, +}; + +enum c4iw_ep_flags { + PEER_ABORT_IN_PROGRESS = 0, + ABORT_REQ_IN_PROGRESS = 1, + RELEASE_RESOURCES = 2, + CLOSE_SENT = 3, + TIMEOUT = 4, + QP_REFERENCED = 5, + STOP_MPA_TIMER = 7, +}; + +enum c4iw_ep_history { + ACT_OPEN_REQ = 0, + ACT_OFLD_CONN = 1, + ACT_OPEN_RPL = 2, + ACT_ESTAB = 3, + PASS_ACCEPT_REQ = 4, + PASS_ESTAB = 5, + ABORT_UPCALL = 6, + ESTAB_UPCALL = 7, + CLOSE_UPCALL = 8, + ULP_ACCEPT = 9, + ULP_REJECT = 10, + TIMEDOUT = 11, + PEER_ABORT = 12, + PEER_CLOSE = 13, + CONNREQ_UPCALL = 14, + ABORT_CONN = 15, + DISCONN_UPCALL = 16, + EP_DISC_CLOSE = 17, + EP_DISC_ABORT = 18, + CONN_RPL_UPCALL = 19, + ACT_RETRY_NOMEM = 20, + ACT_RETRY_INUSE = 21, + CLOSE_CON_RPL = 22, + EP_DISC_FAIL = 24, + QP_REFED = 25, + QP_DEREFED = 26, + CM_ID_REFED = 27, + CM_ID_DEREFED = 28, +}; + +enum conn_pre_alloc_buffers { + CN_ABORT_REQ_BUF, + CN_ABORT_RPL_BUF, + CN_CLOSE_CON_REQ_BUF, + CN_DESTROY_BUF, + CN_FLOWC_BUF, + CN_MAX_CON_BUF +}; + +enum { + FLOWC_LEN = offsetof(struct fw_flowc_wr, mnemval[FW_FLOWC_MNEM_MAX]) +}; + +union cpl_wr_size { + struct cpl_abort_req abrt_req; + struct cpl_abort_rpl abrt_rpl; + struct fw_ri_wr ri_req; + struct cpl_close_con_req close_req; + char flowc_buf[FLOWC_LEN]; +}; + +struct c4iw_ep_common { + struct iw_cm_id *cm_id; + struct c4iw_qp *qp; + struct c4iw_dev *dev; + struct sk_buff_head ep_skb_list; + enum c4iw_ep_state state; + struct kref kref; + struct mutex mutex; + struct sockaddr_storage local_addr; + struct sockaddr_storage remote_addr; + struct c4iw_wr_wait *wr_waitp; + unsigned long flags; + unsigned long history; +}; + +struct c4iw_listen_ep { + struct c4iw_ep_common com; + unsigned int stid; + int backlog; +}; + +struct c4iw_ep_stats { + unsigned connect_neg_adv; + unsigned abort_neg_adv; +}; + +struct c4iw_ep { + struct c4iw_ep_common com; + struct c4iw_ep *parent_ep; + struct timer_list timer; + struct list_head entry; + unsigned int atid; + u32 hwtid; + u32 snd_seq; + u32 rcv_seq; + struct l2t_entry *l2t; + struct dst_entry *dst; + struct sk_buff *mpa_skb; + struct c4iw_mpa_attributes mpa_attr; + u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA]; + unsigned int mpa_pkt_len; + u32 ird; + u32 ord; + u32 smac_idx; + u32 tx_chan; + u32 mtu; + u16 mss; + u16 emss; + u16 plen; + u16 rss_qid; + u16 txq_idx; + u16 ctrlq_idx; + u8 tos; + u8 retry_with_mpa_v1; + u8 tried_with_mpa_v1; + unsigned int retry_count; + int snd_win; + int rcv_win; + u32 snd_wscale; + struct c4iw_ep_stats stats; + u32 srqe_idx; + u32 rx_pdu_out_cnt; + struct sk_buff *peer_abort_skb; +}; + +static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) +{ + return cm_id->provider_data; +} + +static inline struct c4iw_listen_ep *to_listen_ep(struct iw_cm_id *cm_id) +{ + return cm_id->provider_data; +} + +static inline int ocqp_supported(const struct cxgb4_lld_info *infop) +{ +#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64) + return infop->vr->ocq.size > 0; +#else + return 0; +#endif +} + +u32 c4iw_id_alloc(struct c4iw_id_table *alloc); +void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj); +int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num, + u32 reserved, u32 flags); +void c4iw_id_table_free(struct c4iw_id_table *alloc); + +typedef int (*c4iw_handler_func)(struct c4iw_dev *dev, struct sk_buff *skb); + +int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, + struct l2t_entry *l2t); +void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid, + struct c4iw_dev_ucontext *uctx); +u32 c4iw_get_resource(struct c4iw_id_table *id_table); +void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry); +int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, + u32 nr_pdid, u32 nr_srqt); +int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev); +int c4iw_pblpool_create(struct c4iw_rdev *rdev); +int c4iw_rqtpool_create(struct c4iw_rdev *rdev); +int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev); +void c4iw_pblpool_destroy(struct c4iw_rdev *rdev); +void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev); +void c4iw_ocqp_pool_destroy(struct c4iw_rdev *rdev); +void c4iw_destroy_resource(struct c4iw_resource *rscp); +int c4iw_destroy_ctrl_qp(struct c4iw_rdev *rdev); +void c4iw_register_device(struct work_struct *work); +void c4iw_unregister_device(struct c4iw_dev *dev); +int __init c4iw_cm_init(void); +void c4iw_cm_term(void); +void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev, + struct c4iw_dev_ucontext *uctx); +void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev, + struct c4iw_dev_ucontext *uctx); +int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); +int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog); +int c4iw_destroy_listen(struct iw_cm_id *cm_id); +int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); +int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); +void c4iw_qp_add_ref(struct ib_qp *qp); +void c4iw_qp_rem_ref(struct ib_qp *qp); +struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg); +int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned int *sg_offset); +void c4iw_dealloc(struct uld_ctx *ctx); +struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, + u64 length, u64 virt, int acc, + struct ib_udata *udata); +struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc); +int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); +int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); +void c4iw_cq_rem_ref(struct c4iw_cq *chp); +int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); +int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); +int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata); +int c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata); +int c4iw_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attrs, + struct ib_udata *udata); +int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata); +int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs, + struct ib_udata *udata); +int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); +int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr); +struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn); +u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size); +void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size); +u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size); +void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size); +u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size); +void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size); +void c4iw_flush_hw_cq(struct c4iw_cq *chp, struct c4iw_qp *flush_qhp); +void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count); +int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp); +int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count); +int c4iw_flush_sq(struct c4iw_qp *qhp); +int c4iw_ev_handler(struct c4iw_dev *rnicp, u32 qid); +u16 c4iw_rqes_posted(struct c4iw_qp *qhp); +int c4iw_post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe); +u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx); +void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid, + struct c4iw_dev_ucontext *uctx); +u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx); +void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid, + struct c4iw_dev_ucontext *uctx); +void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe); + +extern struct cxgb4_client t4c_client; +extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; +void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, + enum cxgb4_bar2_qtype qtype, + unsigned int *pbar2_qid, u64 *pbar2_pa); +int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev); +void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx); +extern void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe); +extern int c4iw_wr_log; +extern int db_fc_threshold; +extern int db_coalescing_threshold; +extern int use_dsgl; +void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey); +void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq); +void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16); +void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx); +int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp); + +int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr); +int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq); +int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp); +int c4iw_fill_res_cm_id_entry(struct sk_buff *msg, struct rdma_cm_id *cm_id); + +#endif diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c new file mode 100644 index 000000000..a2c71a1d9 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -0,0 +1,739 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <rdma/ib_umem.h> +#include <linux/atomic.h> +#include <rdma/ib_user_verbs.h> + +#include "iw_cxgb4.h" + +int use_dsgl = 1; +module_param(use_dsgl, int, 0644); +MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=1) (DEPRECATED)"); + +#define T4_ULPTX_MIN_IO 32 +#define C4IW_MAX_INLINE_SIZE 96 +#define T4_ULPTX_MAX_DMA 1024 +#define C4IW_INLINE_THRESHOLD 128 + +static int inline_threshold = C4IW_INLINE_THRESHOLD; +module_param(inline_threshold, int, 0644); +MODULE_PARM_DESC(inline_threshold, "inline vs dsgl threshold (default=128)"); + +static int mr_exceeds_hw_limits(struct c4iw_dev *dev, u64 length) +{ + return (is_t4(dev->rdev.lldi.adapter_type) || + is_t5(dev->rdev.lldi.adapter_type)) && + length >= 8*1024*1024*1024ULL; +} + +static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, + u32 len, dma_addr_t data, + struct sk_buff *skb, + struct c4iw_wr_wait *wr_waitp) +{ + struct ulp_mem_io *req; + struct ulptx_sgl *sgl; + u8 wr_len; + int ret = 0; + + addr &= 0x7FFFFFF; + + if (wr_waitp) + c4iw_init_wr_wait(wr_waitp); + wr_len = roundup(sizeof(*req) + sizeof(*sgl), 16); + + if (!skb) { + skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + return -ENOMEM; + } + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + req = __skb_put_zero(skb, wr_len); + INIT_ULPTX_WR(req, wr_len, 0, 0); + req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) | + (wr_waitp ? FW_WR_COMPL_F : 0)); + req->wr.wr_lo = wr_waitp ? (__force __be64)(unsigned long)wr_waitp : 0L; + req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(wr_len, 16))); + req->cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE) | + T5_ULP_MEMIO_ORDER_V(1) | + T5_ULP_MEMIO_FID_V(rdev->lldi.rxq_ids[0])); + req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN_V(len>>5)); + req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), 16)); + req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR_V(addr)); + + sgl = (struct ulptx_sgl *)(req + 1); + sgl->cmd_nsge = cpu_to_be32(ULPTX_CMD_V(ULP_TX_SC_DSGL) | + ULPTX_NSGE_V(1)); + sgl->len0 = cpu_to_be32(len); + sgl->addr0 = cpu_to_be64(data); + + if (wr_waitp) + ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); + else + ret = c4iw_ofld_send(rdev, skb); + return ret; +} + +static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, + void *data, struct sk_buff *skb, + struct c4iw_wr_wait *wr_waitp) +{ + struct ulp_mem_io *req; + struct ulptx_idata *sc; + u8 wr_len, *to_dp, *from_dp; + int copy_len, num_wqe, i, ret = 0; + __be32 cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE)); + + if (is_t4(rdev->lldi.adapter_type)) + cmd |= cpu_to_be32(ULP_MEMIO_ORDER_F); + else + cmd |= cpu_to_be32(T5_ULP_MEMIO_IMM_F); + + addr &= 0x7FFFFFF; + pr_debug("addr 0x%x len %u\n", addr, len); + num_wqe = DIV_ROUND_UP(len, C4IW_MAX_INLINE_SIZE); + c4iw_init_wr_wait(wr_waitp); + for (i = 0; i < num_wqe; i++) { + + copy_len = len > C4IW_MAX_INLINE_SIZE ? C4IW_MAX_INLINE_SIZE : + len; + wr_len = roundup(sizeof(*req) + sizeof(*sc) + + roundup(copy_len, T4_ULPTX_MIN_IO), + 16); + + if (!skb) { + skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + return -ENOMEM; + } + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + req = __skb_put_zero(skb, wr_len); + INIT_ULPTX_WR(req, wr_len, 0, 0); + + if (i == (num_wqe-1)) { + req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) | + FW_WR_COMPL_F); + req->wr.wr_lo = (__force __be64)(unsigned long)wr_waitp; + } else + req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR)); + req->wr.wr_mid = cpu_to_be32( + FW_WR_LEN16_V(DIV_ROUND_UP(wr_len, 16))); + + req->cmd = cmd; + req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN_V( + DIV_ROUND_UP(copy_len, T4_ULPTX_MIN_IO))); + req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), + 16)); + req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR_V(addr + i * 3)); + + sc = (struct ulptx_idata *)(req + 1); + sc->cmd_more = cpu_to_be32(ULPTX_CMD_V(ULP_TX_SC_IMM)); + sc->len = cpu_to_be32(roundup(copy_len, T4_ULPTX_MIN_IO)); + + to_dp = (u8 *)(sc + 1); + from_dp = (u8 *)data + i * C4IW_MAX_INLINE_SIZE; + if (data) + memcpy(to_dp, from_dp, copy_len); + else + memset(to_dp, 0, copy_len); + if (copy_len % T4_ULPTX_MIN_IO) + memset(to_dp + copy_len, 0, T4_ULPTX_MIN_IO - + (copy_len % T4_ULPTX_MIN_IO)); + if (i == (num_wqe-1)) + ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, + __func__); + else + ret = c4iw_ofld_send(rdev, skb); + if (ret) + break; + skb = NULL; + len -= C4IW_MAX_INLINE_SIZE; + } + + return ret; +} + +static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, + void *data, struct sk_buff *skb, + struct c4iw_wr_wait *wr_waitp) +{ + u32 remain = len; + u32 dmalen; + int ret = 0; + dma_addr_t daddr; + dma_addr_t save; + + daddr = dma_map_single(&rdev->lldi.pdev->dev, data, len, DMA_TO_DEVICE); + if (dma_mapping_error(&rdev->lldi.pdev->dev, daddr)) + return -1; + save = daddr; + + while (remain > inline_threshold) { + if (remain < T4_ULPTX_MAX_DMA) { + if (remain & ~T4_ULPTX_MIN_IO) + dmalen = remain & ~(T4_ULPTX_MIN_IO-1); + else + dmalen = remain; + } else + dmalen = T4_ULPTX_MAX_DMA; + remain -= dmalen; + ret = _c4iw_write_mem_dma_aligned(rdev, addr, dmalen, daddr, + skb, remain ? NULL : wr_waitp); + if (ret) + goto out; + addr += dmalen >> 5; + data += dmalen; + daddr += dmalen; + } + if (remain) + ret = _c4iw_write_mem_inline(rdev, addr, remain, data, skb, + wr_waitp); +out: + dma_unmap_single(&rdev->lldi.pdev->dev, save, len, DMA_TO_DEVICE); + return ret; +} + +/* + * write len bytes of data into addr (32B aligned address) + * If data is NULL, clear len byte of memory to zero. + */ +static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, + void *data, struct sk_buff *skb, + struct c4iw_wr_wait *wr_waitp) +{ + int ret; + + if (!rdev->lldi.ulptx_memwrite_dsgl || !use_dsgl) { + ret = _c4iw_write_mem_inline(rdev, addr, len, data, skb, + wr_waitp); + goto out; + } + + if (len <= inline_threshold) { + ret = _c4iw_write_mem_inline(rdev, addr, len, data, skb, + wr_waitp); + goto out; + } + + ret = _c4iw_write_mem_dma(rdev, addr, len, data, skb, wr_waitp); + if (ret) { + pr_warn_ratelimited("%s: dma map failure (non fatal)\n", + pci_name(rdev->lldi.pdev)); + ret = _c4iw_write_mem_inline(rdev, addr, len, data, skb, + wr_waitp); + } +out: + return ret; + +} + +/* + * Build and write a TPT entry. + * IN: stag key, pdid, perm, bind_enabled, zbva, to, len, page_size, + * pbl_size and pbl_addr + * OUT: stag index + */ +static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, + u32 *stag, u8 stag_state, u32 pdid, + enum fw_ri_stag_type type, enum fw_ri_mem_perms perm, + int bind_enabled, u32 zbva, u64 to, + u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr, + struct sk_buff *skb, struct c4iw_wr_wait *wr_waitp) +{ + int err; + struct fw_ri_tpte *tpt; + u32 stag_idx; + static atomic_t key; + + if (c4iw_fatal_error(rdev)) + return -EIO; + + tpt = kmalloc(sizeof(*tpt), GFP_KERNEL); + if (!tpt) + return -ENOMEM; + + stag_state = stag_state > 0; + stag_idx = (*stag) >> 8; + + if ((!reset_tpt_entry) && (*stag == T4_STAG_UNSET)) { + stag_idx = c4iw_get_resource(&rdev->resource.tpt_table); + if (!stag_idx) { + mutex_lock(&rdev->stats.lock); + rdev->stats.stag.fail++; + mutex_unlock(&rdev->stats.lock); + kfree(tpt); + return -ENOMEM; + } + mutex_lock(&rdev->stats.lock); + rdev->stats.stag.cur += 32; + if (rdev->stats.stag.cur > rdev->stats.stag.max) + rdev->stats.stag.max = rdev->stats.stag.cur; + mutex_unlock(&rdev->stats.lock); + *stag = (stag_idx << 8) | (atomic_inc_return(&key) & 0xff); + } + pr_debug("stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", + stag_state, type, pdid, stag_idx); + + /* write TPT entry */ + if (reset_tpt_entry) + memset(tpt, 0, sizeof(*tpt)); + else { + tpt->valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | + FW_RI_TPTE_STAGKEY_V((*stag & FW_RI_TPTE_STAGKEY_M)) | + FW_RI_TPTE_STAGSTATE_V(stag_state) | + FW_RI_TPTE_STAGTYPE_V(type) | FW_RI_TPTE_PDID_V(pdid)); + tpt->locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) | + (bind_enabled ? FW_RI_TPTE_MWBINDEN_F : 0) | + FW_RI_TPTE_ADDRTYPE_V((zbva ? FW_RI_ZERO_BASED_TO : + FW_RI_VA_BASED_TO))| + FW_RI_TPTE_PS_V(page_size)); + tpt->nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( + FW_RI_TPTE_PBLADDR_V(PBL_OFF(rdev, pbl_addr)>>3)); + tpt->len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); + tpt->va_hi = cpu_to_be32((u32)(to >> 32)); + tpt->va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); + tpt->dca_mwbcnt_pstag = cpu_to_be32(0); + tpt->len_hi = cpu_to_be32((u32)(len >> 32)); + } + err = write_adapter_mem(rdev, stag_idx + + (rdev->lldi.vr->stag.start >> 5), + sizeof(*tpt), tpt, skb, wr_waitp); + + if (reset_tpt_entry) { + c4iw_put_resource(&rdev->resource.tpt_table, stag_idx); + mutex_lock(&rdev->stats.lock); + rdev->stats.stag.cur -= 32; + mutex_unlock(&rdev->stats.lock); + } + kfree(tpt); + return err; +} + +static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl, + u32 pbl_addr, u32 pbl_size, struct c4iw_wr_wait *wr_waitp) +{ + int err; + + pr_debug("*pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", + pbl_addr, rdev->lldi.vr->pbl.start, + pbl_size); + + err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl, NULL, + wr_waitp); + return err; +} + +static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size, + u32 pbl_addr, struct sk_buff *skb, + struct c4iw_wr_wait *wr_waitp) +{ + return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, + pbl_size, pbl_addr, skb, wr_waitp); +} + +static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, + u32 pbl_size, u32 pbl_addr, + struct c4iw_wr_wait *wr_waitp) +{ + *stag = T4_STAG_UNSET; + return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_NSMR, 0, 0, 0, + 0UL, 0, 0, pbl_size, pbl_addr, NULL, wr_waitp); +} + +static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag) +{ + u32 mmid; + + mhp->attr.state = 1; + mhp->attr.stag = stag; + mmid = stag >> 8; + mhp->ibmr.rkey = mhp->ibmr.lkey = stag; + mhp->ibmr.length = mhp->attr.len; + mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12); + pr_debug("mmid 0x%x mhp %p\n", mmid, mhp); + return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL); +} + +static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php, + struct c4iw_mr *mhp, int shift) +{ + u32 stag = T4_STAG_UNSET; + int ret; + + ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid, + FW_RI_STAG_NSMR, mhp->attr.len ? + mhp->attr.perms : 0, + mhp->attr.mw_bind_enable, mhp->attr.zbva, + mhp->attr.va_fbo, mhp->attr.len ? + mhp->attr.len : -1, shift - 12, + mhp->attr.pbl_size, mhp->attr.pbl_addr, NULL, + mhp->wr_waitp); + if (ret) + return ret; + + ret = finish_mem_reg(mhp, stag); + if (ret) { + dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp); + mhp->dereg_skb = NULL; + } + return ret; +} + +static int alloc_pbl(struct c4iw_mr *mhp, int npages) +{ + mhp->attr.pbl_addr = c4iw_pblpool_alloc(&mhp->rhp->rdev, + npages << 3); + + if (!mhp->attr.pbl_addr) + return -ENOMEM; + + mhp->attr.pbl_size = npages; + + return 0; +} + +struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct c4iw_dev *rhp; + struct c4iw_pd *php; + struct c4iw_mr *mhp; + int ret; + u32 stag = T4_STAG_UNSET; + + pr_debug("ib_pd %p\n", pd); + php = to_c4iw_pd(pd); + rhp = php->rhp; + + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) + return ERR_PTR(-ENOMEM); + mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); + if (!mhp->wr_waitp) { + ret = -ENOMEM; + goto err_free_mhp; + } + c4iw_init_wr_wait(mhp->wr_waitp); + + mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); + if (!mhp->dereg_skb) { + ret = -ENOMEM; + goto err_free_wr_wait; + } + + mhp->rhp = rhp; + mhp->attr.pdid = php->pdid; + mhp->attr.perms = c4iw_ib_to_tpt_access(acc); + mhp->attr.mw_bind_enable = (acc&IB_ACCESS_MW_BIND) == IB_ACCESS_MW_BIND; + mhp->attr.zbva = 0; + mhp->attr.va_fbo = 0; + mhp->attr.page_size = 0; + mhp->attr.len = ~0ULL; + mhp->attr.pbl_size = 0; + + ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid, + FW_RI_STAG_NSMR, mhp->attr.perms, + mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0, + NULL, mhp->wr_waitp); + if (ret) + goto err_free_skb; + + ret = finish_mem_reg(mhp, stag); + if (ret) + goto err_dereg_mem; + return &mhp->ibmr; +err_dereg_mem: + dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp); +err_free_skb: + kfree_skb(mhp->dereg_skb); +err_free_wr_wait: + c4iw_put_wr_wait(mhp->wr_waitp); +err_free_mhp: + kfree(mhp); + return ERR_PTR(ret); +} + +struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int acc, struct ib_udata *udata) +{ + __be64 *pages; + int shift, n, i; + int err = -ENOMEM; + struct ib_block_iter biter; + struct c4iw_dev *rhp; + struct c4iw_pd *php; + struct c4iw_mr *mhp; + + pr_debug("ib_pd %p\n", pd); + + if (length == ~0ULL) + return ERR_PTR(-EINVAL); + + if ((length + start) < start) + return ERR_PTR(-EINVAL); + + php = to_c4iw_pd(pd); + rhp = php->rhp; + + if (mr_exceeds_hw_limits(rhp, length)) + return ERR_PTR(-EINVAL); + + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) + return ERR_PTR(-ENOMEM); + mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); + if (!mhp->wr_waitp) + goto err_free_mhp; + + mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); + if (!mhp->dereg_skb) + goto err_free_wr_wait; + + mhp->rhp = rhp; + + mhp->umem = ib_umem_get(pd->device, start, length, acc); + if (IS_ERR(mhp->umem)) + goto err_free_skb; + + shift = PAGE_SHIFT; + + n = ib_umem_num_dma_blocks(mhp->umem, 1 << shift); + err = alloc_pbl(mhp, n); + if (err) + goto err_umem_release; + + pages = (__be64 *) __get_free_page(GFP_KERNEL); + if (!pages) { + err = -ENOMEM; + goto err_pbl_free; + } + + i = n = 0; + + rdma_umem_for_each_dma_block(mhp->umem, &biter, 1 << shift) { + pages[i++] = cpu_to_be64(rdma_block_iter_dma_address(&biter)); + if (i == PAGE_SIZE / sizeof(*pages)) { + err = write_pbl(&mhp->rhp->rdev, pages, + mhp->attr.pbl_addr + (n << 3), i, + mhp->wr_waitp); + if (err) + goto pbl_done; + n += i; + i = 0; + } + } + + if (i) + err = write_pbl(&mhp->rhp->rdev, pages, + mhp->attr.pbl_addr + (n << 3), i, + mhp->wr_waitp); + +pbl_done: + free_page((unsigned long) pages); + if (err) + goto err_pbl_free; + + mhp->attr.pdid = php->pdid; + mhp->attr.zbva = 0; + mhp->attr.perms = c4iw_ib_to_tpt_access(acc); + mhp->attr.va_fbo = virt; + mhp->attr.page_size = shift - 12; + mhp->attr.len = length; + + err = register_mem(rhp, php, mhp, shift); + if (err) + goto err_pbl_free; + + return &mhp->ibmr; + +err_pbl_free: + c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, + mhp->attr.pbl_size << 3); +err_umem_release: + ib_umem_release(mhp->umem); +err_free_skb: + kfree_skb(mhp->dereg_skb); +err_free_wr_wait: + c4iw_put_wr_wait(mhp->wr_waitp); +err_free_mhp: + kfree(mhp); + return ERR_PTR(err); +} + +struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg) +{ + struct c4iw_dev *rhp; + struct c4iw_pd *php; + struct c4iw_mr *mhp; + u32 mmid; + u32 stag = 0; + int ret = 0; + int length = roundup(max_num_sg * sizeof(u64), 32); + + php = to_c4iw_pd(pd); + rhp = php->rhp; + + if (mr_type != IB_MR_TYPE_MEM_REG || + max_num_sg > t4_max_fr_depth(rhp->rdev.lldi.ulptx_memwrite_dsgl && + use_dsgl)) + return ERR_PTR(-EINVAL); + + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) { + ret = -ENOMEM; + goto err; + } + + mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); + if (!mhp->wr_waitp) { + ret = -ENOMEM; + goto err_free_mhp; + } + c4iw_init_wr_wait(mhp->wr_waitp); + + mhp->mpl = dma_alloc_coherent(&rhp->rdev.lldi.pdev->dev, + length, &mhp->mpl_addr, GFP_KERNEL); + if (!mhp->mpl) { + ret = -ENOMEM; + goto err_free_wr_wait; + } + mhp->max_mpl_len = length; + + mhp->rhp = rhp; + ret = alloc_pbl(mhp, max_num_sg); + if (ret) + goto err_free_dma; + mhp->attr.pbl_size = max_num_sg; + ret = allocate_stag(&rhp->rdev, &stag, php->pdid, + mhp->attr.pbl_size, mhp->attr.pbl_addr, + mhp->wr_waitp); + if (ret) + goto err_free_pbl; + mhp->attr.pdid = php->pdid; + mhp->attr.type = FW_RI_STAG_NSMR; + mhp->attr.stag = stag; + mhp->attr.state = 0; + mmid = (stag) >> 8; + mhp->ibmr.rkey = mhp->ibmr.lkey = stag; + if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_dereg; + } + + pr_debug("mmid 0x%x mhp %p stag 0x%x\n", mmid, mhp, stag); + return &(mhp->ibmr); +err_dereg: + dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp); +err_free_pbl: + c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, + mhp->attr.pbl_size << 3); +err_free_dma: + dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev, + mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr); +err_free_wr_wait: + c4iw_put_wr_wait(mhp->wr_waitp); +err_free_mhp: + kfree(mhp); +err: + return ERR_PTR(ret); +} + +static int c4iw_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct c4iw_mr *mhp = to_c4iw_mr(ibmr); + + if (unlikely(mhp->mpl_len == mhp->attr.pbl_size)) + return -ENOMEM; + + mhp->mpl[mhp->mpl_len++] = addr; + + return 0; +} + +int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned int *sg_offset) +{ + struct c4iw_mr *mhp = to_c4iw_mr(ibmr); + + mhp->mpl_len = 0; + + return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, c4iw_set_page); +} + +int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) +{ + struct c4iw_dev *rhp; + struct c4iw_mr *mhp; + u32 mmid; + + pr_debug("ib_mr %p\n", ib_mr); + + mhp = to_c4iw_mr(ib_mr); + rhp = mhp->rhp; + mmid = mhp->attr.stag >> 8; + xa_erase_irq(&rhp->mrs, mmid); + if (mhp->mpl) + dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev, + mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr); + dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp); + if (mhp->attr.pbl_size) + c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, + mhp->attr.pbl_size << 3); + if (mhp->kva) + kfree((void *) (unsigned long) mhp->kva); + ib_umem_release(mhp->umem); + pr_debug("mmid 0x%x ptr %p\n", mmid, mhp); + c4iw_put_wr_wait(mhp->wr_waitp); + kfree(mhp); + return 0; +} + +void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey) +{ + struct c4iw_mr *mhp; + unsigned long flags; + + xa_lock_irqsave(&rhp->mrs, flags); + mhp = xa_load(&rhp->mrs, rkey >> 8); + if (mhp) + mhp->attr.state = 0; + xa_unlock_irqrestore(&rhp->mrs, flags); +} diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c new file mode 100644 index 000000000..246b739dd --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -0,0 +1,569 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/device.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/ethtool.h> +#include <linux/rtnetlink.h> +#include <linux/inetdevice.h> +#include <net/addrconf.h> +#include <linux/io.h> + +#include <asm/irq.h> +#include <asm/byteorder.h> + +#include <rdma/iw_cm.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_umem.h> +#include <rdma/ib_user_verbs.h> + +#include "iw_cxgb4.h" + +static int fastreg_support = 1; +module_param(fastreg_support, int, 0644); +MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)"); + +static void c4iw_dealloc_ucontext(struct ib_ucontext *context) +{ + struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context); + struct c4iw_dev *rhp; + struct c4iw_mm_entry *mm, *tmp; + + pr_debug("context %p\n", context); + rhp = to_c4iw_dev(ucontext->ibucontext.device); + + list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) + kfree(mm); + c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx); +} + +static int c4iw_alloc_ucontext(struct ib_ucontext *ucontext, + struct ib_udata *udata) +{ + struct ib_device *ibdev = ucontext->device; + struct c4iw_ucontext *context = to_c4iw_ucontext(ucontext); + struct c4iw_dev *rhp = to_c4iw_dev(ibdev); + struct c4iw_alloc_ucontext_resp uresp; + int ret = 0; + struct c4iw_mm_entry *mm = NULL; + + pr_debug("ibdev %p\n", ibdev); + c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx); + INIT_LIST_HEAD(&context->mmaps); + spin_lock_init(&context->mmap_lock); + + if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) { + pr_err_once("Warning - downlevel libcxgb4 (non-fatal), device status page disabled\n"); + rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED; + } else { + mm = kmalloc(sizeof(*mm), GFP_KERNEL); + if (!mm) { + ret = -ENOMEM; + goto err; + } + + uresp.status_page_size = PAGE_SIZE; + + spin_lock(&context->mmap_lock); + uresp.status_page_key = context->key; + context->key += PAGE_SIZE; + spin_unlock(&context->mmap_lock); + + ret = ib_copy_to_udata(udata, &uresp, + sizeof(uresp) - sizeof(uresp.reserved)); + if (ret) + goto err_mm; + + mm->key = uresp.status_page_key; + mm->addr = virt_to_phys(rhp->rdev.status_page); + mm->len = PAGE_SIZE; + insert_mmap(context, mm); + } + return 0; +err_mm: + kfree(mm); +err: + return ret; +} + +static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + int len = vma->vm_end - vma->vm_start; + u32 key = vma->vm_pgoff << PAGE_SHIFT; + struct c4iw_rdev *rdev; + int ret = 0; + struct c4iw_mm_entry *mm; + struct c4iw_ucontext *ucontext; + u64 addr; + + pr_debug("pgoff 0x%lx key 0x%x len %d\n", vma->vm_pgoff, + key, len); + + if (vma->vm_start & (PAGE_SIZE-1)) + return -EINVAL; + + rdev = &(to_c4iw_dev(context->device)->rdev); + ucontext = to_c4iw_ucontext(context); + + mm = remove_mmap(ucontext, key, len); + if (!mm) + return -EINVAL; + addr = mm->addr; + kfree(mm); + + if ((addr >= pci_resource_start(rdev->lldi.pdev, 0)) && + (addr < (pci_resource_start(rdev->lldi.pdev, 0) + + pci_resource_len(rdev->lldi.pdev, 0)))) { + + /* + * MA_SYNC register... + */ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + ret = io_remap_pfn_range(vma, vma->vm_start, + addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + } else if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) && + (addr < (pci_resource_start(rdev->lldi.pdev, 2) + + pci_resource_len(rdev->lldi.pdev, 2)))) { + + /* + * Map user DB or OCQP memory... + */ + if (addr >= rdev->oc_mw_pa) + vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); + else { + if (!is_t4(rdev->lldi.adapter_type)) + vma->vm_page_prot = + t4_pgprot_wc(vma->vm_page_prot); + else + vma->vm_page_prot = + pgprot_noncached(vma->vm_page_prot); + } + ret = io_remap_pfn_range(vma, vma->vm_start, + addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + } else { + + /* + * Map WQ or CQ contig dma memory... + */ + ret = remap_pfn_range(vma, vma->vm_start, + addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + } + + return ret; +} + +static int c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) +{ + struct c4iw_dev *rhp; + struct c4iw_pd *php; + + php = to_c4iw_pd(pd); + rhp = php->rhp; + pr_debug("ibpd %p pdid 0x%x\n", pd, php->pdid); + c4iw_put_resource(&rhp->rdev.resource.pdid_table, php->pdid); + mutex_lock(&rhp->rdev.stats.lock); + rhp->rdev.stats.pd.cur--; + mutex_unlock(&rhp->rdev.stats.lock); + return 0; +} + +static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata) +{ + struct c4iw_pd *php = to_c4iw_pd(pd); + struct ib_device *ibdev = pd->device; + u32 pdid; + struct c4iw_dev *rhp; + + pr_debug("ibdev %p\n", ibdev); + rhp = (struct c4iw_dev *) ibdev; + pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table); + if (!pdid) + return -EINVAL; + + php->pdid = pdid; + php->rhp = rhp; + if (udata) { + struct c4iw_alloc_pd_resp uresp = {.pdid = php->pdid}; + + if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { + c4iw_deallocate_pd(&php->ibpd, udata); + return -EFAULT; + } + } + mutex_lock(&rhp->rdev.stats.lock); + rhp->rdev.stats.pd.cur++; + if (rhp->rdev.stats.pd.cur > rhp->rdev.stats.pd.max) + rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur; + mutex_unlock(&rhp->rdev.stats.lock); + pr_debug("pdid 0x%0x ptr 0x%p\n", pdid, php); + return 0; +} + +static int c4iw_query_gid(struct ib_device *ibdev, u32 port, int index, + union ib_gid *gid) +{ + struct c4iw_dev *dev; + + pr_debug("ibdev %p, port %u, index %d, gid %p\n", + ibdev, port, index, gid); + if (!port) + return -EINVAL; + dev = to_c4iw_dev(ibdev); + memset(&(gid->raw[0]), 0, sizeof(gid->raw)); + memcpy(&(gid->raw[0]), dev->rdev.lldi.ports[port-1]->dev_addr, 6); + return 0; +} + +static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) +{ + + struct c4iw_dev *dev; + + pr_debug("ibdev %p\n", ibdev); + + if (uhw->inlen || uhw->outlen) + return -EINVAL; + + dev = to_c4iw_dev(ibdev); + addrconf_addr_eui48((u8 *)&props->sys_image_guid, + dev->rdev.lldi.ports[0]->dev_addr); + props->hw_ver = CHELSIO_CHIP_RELEASE(dev->rdev.lldi.adapter_type); + props->fw_ver = dev->rdev.lldi.fw_vers; + props->device_cap_flags = IB_DEVICE_MEM_WINDOW; + props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; + if (fastreg_support) + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + props->page_size_cap = T4_PAGESIZE_MASK; + props->vendor_id = (u32)dev->rdev.lldi.pdev->vendor; + props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device; + props->max_mr_size = T4_MAX_MR_SIZE; + props->max_qp = dev->rdev.lldi.vr->qp.size / 2; + props->max_srq = dev->rdev.lldi.vr->srq.size; + props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth; + props->max_srq_wr = dev->rdev.hw_queue.t4_max_qp_depth; + props->max_send_sge = min(T4_MAX_SEND_SGE, T4_MAX_WRITE_SGE); + props->max_recv_sge = T4_MAX_RECV_SGE; + props->max_srq_sge = T4_MAX_RECV_SGE; + props->max_sge_rd = 1; + props->max_res_rd_atom = dev->rdev.lldi.max_ird_adapter; + props->max_qp_rd_atom = min(dev->rdev.lldi.max_ordird_qp, + c4iw_max_read_depth); + props->max_qp_init_rd_atom = props->max_qp_rd_atom; + props->max_cq = dev->rdev.lldi.vr->qp.size; + props->max_cqe = dev->rdev.hw_queue.t4_max_cq_depth; + props->max_mr = c4iw_num_stags(&dev->rdev); + props->max_pd = T4_MAX_NUM_PD; + props->local_ca_ack_delay = 0; + props->max_fast_reg_page_list_len = + t4_max_fr_depth(dev->rdev.lldi.ulptx_memwrite_dsgl && use_dsgl); + + return 0; +} + +static int c4iw_query_port(struct ib_device *ibdev, u32 port, + struct ib_port_attr *props) +{ + int ret = 0; + pr_debug("ibdev %p\n", ibdev); + ret = ib_get_eth_speed(ibdev, port, &props->active_speed, + &props->active_width); + + props->port_cap_flags = + IB_PORT_CM_SUP | + IB_PORT_SNMP_TUNNEL_SUP | + IB_PORT_REINIT_SUP | + IB_PORT_DEVICE_MGMT_SUP | + IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; + props->gid_tbl_len = 1; + props->max_msg_sz = -1; + + return ret; +} + +static ssize_t hw_rev_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c4iw_dev *c4iw_dev = + rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); + + pr_debug("dev 0x%p\n", dev); + return sysfs_emit( + buf, "%d\n", + CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); +} +static DEVICE_ATTR_RO(hw_rev); + +static ssize_t hca_type_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c4iw_dev *c4iw_dev = + rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); + struct ethtool_drvinfo info; + struct net_device *lldev = c4iw_dev->rdev.lldi.ports[0]; + + pr_debug("dev 0x%p\n", dev); + lldev->ethtool_ops->get_drvinfo(lldev, &info); + return sysfs_emit(buf, "%s\n", info.driver); +} +static DEVICE_ATTR_RO(hca_type); + +static ssize_t board_id_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct c4iw_dev *c4iw_dev = + rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); + + pr_debug("dev 0x%p\n", dev); + return sysfs_emit(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, + c4iw_dev->rdev.lldi.pdev->device); +} +static DEVICE_ATTR_RO(board_id); + +enum counters { + IP4INSEGS, + IP4OUTSEGS, + IP4RETRANSSEGS, + IP4OUTRSTS, + IP6INSEGS, + IP6OUTSEGS, + IP6RETRANSSEGS, + IP6OUTRSTS, + NR_COUNTERS +}; + +static const struct rdma_stat_desc cxgb4_descs[] = { + [IP4INSEGS].name = "ip4InSegs", + [IP4OUTSEGS].name = "ip4OutSegs", + [IP4RETRANSSEGS].name = "ip4RetransSegs", + [IP4OUTRSTS].name = "ip4OutRsts", + [IP6INSEGS].name = "ip6InSegs", + [IP6OUTSEGS].name = "ip6OutSegs", + [IP6RETRANSSEGS].name = "ip6RetransSegs", + [IP6OUTRSTS].name = "ip6OutRsts" +}; + +static struct rdma_hw_stats *c4iw_alloc_device_stats(struct ib_device *ibdev) +{ + BUILD_BUG_ON(ARRAY_SIZE(cxgb4_descs) != NR_COUNTERS); + + /* FIXME: these look like port stats */ + return rdma_alloc_hw_stats_struct(cxgb4_descs, NR_COUNTERS, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int c4iw_get_mib(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u32 port, int index) +{ + struct tp_tcp_stats v4, v6; + struct c4iw_dev *c4iw_dev = to_c4iw_dev(ibdev); + + cxgb4_get_tcp_stats(c4iw_dev->rdev.lldi.pdev, &v4, &v6); + stats->value[IP4INSEGS] = v4.tcp_in_segs; + stats->value[IP4OUTSEGS] = v4.tcp_out_segs; + stats->value[IP4RETRANSSEGS] = v4.tcp_retrans_segs; + stats->value[IP4OUTRSTS] = v4.tcp_out_rsts; + stats->value[IP6INSEGS] = v6.tcp_in_segs; + stats->value[IP6OUTSEGS] = v6.tcp_out_segs; + stats->value[IP6RETRANSSEGS] = v6.tcp_retrans_segs; + stats->value[IP6OUTRSTS] = v6.tcp_out_rsts; + + return stats->num_counters; +} + +static struct attribute *c4iw_class_attributes[] = { + &dev_attr_hw_rev.attr, + &dev_attr_hca_type.attr, + &dev_attr_board_id.attr, + NULL +}; + +static const struct attribute_group c4iw_attr_group = { + .attrs = c4iw_class_attributes, +}; + +static int c4iw_port_immutable(struct ib_device *ibdev, u32 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + + err = ib_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->gid_tbl_len = attr.gid_tbl_len; + + return 0; +} + +static void get_dev_fw_str(struct ib_device *dev, char *str) +{ + struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, + ibdev); + pr_debug("dev 0x%p\n", dev); + + snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u.%u.%u", + FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers)); +} + +static const struct ib_device_ops c4iw_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_CXGB4, + .uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION, + + .alloc_hw_device_stats = c4iw_alloc_device_stats, + .alloc_mr = c4iw_alloc_mr, + .alloc_pd = c4iw_allocate_pd, + .alloc_ucontext = c4iw_alloc_ucontext, + .create_cq = c4iw_create_cq, + .create_qp = c4iw_create_qp, + .create_srq = c4iw_create_srq, + .dealloc_pd = c4iw_deallocate_pd, + .dealloc_ucontext = c4iw_dealloc_ucontext, + .dereg_mr = c4iw_dereg_mr, + .destroy_cq = c4iw_destroy_cq, + .destroy_qp = c4iw_destroy_qp, + .destroy_srq = c4iw_destroy_srq, + .device_group = &c4iw_attr_group, + .fill_res_cq_entry = c4iw_fill_res_cq_entry, + .fill_res_cm_id_entry = c4iw_fill_res_cm_id_entry, + .fill_res_mr_entry = c4iw_fill_res_mr_entry, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = c4iw_get_dma_mr, + .get_hw_stats = c4iw_get_mib, + .get_port_immutable = c4iw_port_immutable, + .iw_accept = c4iw_accept_cr, + .iw_add_ref = c4iw_qp_add_ref, + .iw_connect = c4iw_connect, + .iw_create_listen = c4iw_create_listen, + .iw_destroy_listen = c4iw_destroy_listen, + .iw_get_qp = c4iw_get_qp, + .iw_reject = c4iw_reject_cr, + .iw_rem_ref = c4iw_qp_rem_ref, + .map_mr_sg = c4iw_map_mr_sg, + .mmap = c4iw_mmap, + .modify_qp = c4iw_ib_modify_qp, + .modify_srq = c4iw_modify_srq, + .poll_cq = c4iw_poll_cq, + .post_recv = c4iw_post_receive, + .post_send = c4iw_post_send, + .post_srq_recv = c4iw_post_srq_recv, + .query_device = c4iw_query_device, + .query_gid = c4iw_query_gid, + .query_port = c4iw_query_port, + .query_qp = c4iw_ib_query_qp, + .reg_user_mr = c4iw_reg_user_mr, + .req_notify_cq = c4iw_arm_cq, + + INIT_RDMA_OBJ_SIZE(ib_cq, c4iw_cq, ibcq), + INIT_RDMA_OBJ_SIZE(ib_mw, c4iw_mw, ibmw), + INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_qp, c4iw_qp, ibqp), + INIT_RDMA_OBJ_SIZE(ib_srq, c4iw_srq, ibsrq), + INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext), +}; + +static int set_netdevs(struct ib_device *ib_dev, struct c4iw_rdev *rdev) +{ + int ret; + int i; + + for (i = 0; i < rdev->lldi.nports; i++) { + ret = ib_device_set_netdev(ib_dev, rdev->lldi.ports[i], + i + 1); + if (ret) + return ret; + } + return 0; +} + +void c4iw_register_device(struct work_struct *work) +{ + int ret; + struct uld_ctx *ctx = container_of(work, struct uld_ctx, reg_work); + struct c4iw_dev *dev = ctx->dev; + + pr_debug("c4iw_dev %p\n", dev); + addrconf_addr_eui48((u8 *)&dev->ibdev.node_guid, + dev->rdev.lldi.ports[0]->dev_addr); + dev->ibdev.local_dma_lkey = 0; + dev->ibdev.node_type = RDMA_NODE_RNIC; + BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); + memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); + dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports; + dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq; + dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev; + + memcpy(dev->ibdev.iw_ifname, dev->rdev.lldi.ports[0]->name, + sizeof(dev->ibdev.iw_ifname)); + + ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops); + ret = set_netdevs(&dev->ibdev, &dev->rdev); + if (ret) + goto err_dealloc_ctx; + dma_set_max_seg_size(&dev->rdev.lldi.pdev->dev, UINT_MAX); + ret = ib_register_device(&dev->ibdev, "cxgb4_%d", + &dev->rdev.lldi.pdev->dev); + if (ret) + goto err_dealloc_ctx; + return; + +err_dealloc_ctx: + pr_err("%s - Failed registering iwarp device: %d\n", + pci_name(ctx->lldi.pdev), ret); + c4iw_dealloc(ctx); + return; +} + +void c4iw_unregister_device(struct c4iw_dev *dev) +{ + pr_debug("c4iw_dev %p\n", dev); + ib_unregister_device(&dev->ibdev); + return; +} diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c new file mode 100644 index 000000000..ffbd9a899 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -0,0 +1,2813 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <rdma/uverbs_ioctl.h> + +#include "iw_cxgb4.h" + +static int db_delay_usecs = 1; +module_param(db_delay_usecs, int, 0644); +MODULE_PARM_DESC(db_delay_usecs, "Usecs to delay awaiting db fifo to drain"); + +static int ocqp_support = 1; +module_param(ocqp_support, int, 0644); +MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)"); + +int db_fc_threshold = 1000; +module_param(db_fc_threshold, int, 0644); +MODULE_PARM_DESC(db_fc_threshold, + "QP count/threshold that triggers" + " automatic db flow control mode (default = 1000)"); + +int db_coalescing_threshold; +module_param(db_coalescing_threshold, int, 0644); +MODULE_PARM_DESC(db_coalescing_threshold, + "QP count/threshold that triggers" + " disabling db coalescing (default = 0)"); + +static int max_fr_immd = T4_MAX_FR_IMMD; +module_param(max_fr_immd, int, 0644); +MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immediate"); + +static int alloc_ird(struct c4iw_dev *dev, u32 ird) +{ + int ret = 0; + + xa_lock_irq(&dev->qps); + if (ird <= dev->avail_ird) + dev->avail_ird -= ird; + else + ret = -ENOMEM; + xa_unlock_irq(&dev->qps); + + if (ret) + dev_warn(&dev->rdev.lldi.pdev->dev, + "device IRD resources exhausted\n"); + + return ret; +} + +static void free_ird(struct c4iw_dev *dev, int ird) +{ + xa_lock_irq(&dev->qps); + dev->avail_ird += ird; + xa_unlock_irq(&dev->qps); +} + +static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) +{ + unsigned long flag; + spin_lock_irqsave(&qhp->lock, flag); + qhp->attr.state = state; + spin_unlock_irqrestore(&qhp->lock, flag); +} + +static void dealloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + c4iw_ocqp_pool_free(rdev, sq->dma_addr, sq->memsize); +} + +static void dealloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + dma_free_coherent(&(rdev->lldi.pdev->dev), sq->memsize, sq->queue, + dma_unmap_addr(sq, mapping)); +} + +static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + if (t4_sq_onchip(sq)) + dealloc_oc_sq(rdev, sq); + else + dealloc_host_sq(rdev, sq); +} + +static int alloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + if (!ocqp_support || !ocqp_supported(&rdev->lldi)) + return -ENOSYS; + sq->dma_addr = c4iw_ocqp_pool_alloc(rdev, sq->memsize); + if (!sq->dma_addr) + return -ENOMEM; + sq->phys_addr = rdev->oc_mw_pa + sq->dma_addr - + rdev->lldi.vr->ocq.start; + sq->queue = (__force union t4_wr *)(rdev->oc_mw_kva + sq->dma_addr - + rdev->lldi.vr->ocq.start); + sq->flags |= T4_SQ_ONCHIP; + return 0; +} + +static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + sq->queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), sq->memsize, + &(sq->dma_addr), GFP_KERNEL); + if (!sq->queue) + return -ENOMEM; + sq->phys_addr = virt_to_phys(sq->queue); + dma_unmap_addr_set(sq, mapping, sq->dma_addr); + return 0; +} + +static int alloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq, int user) +{ + int ret = -ENOSYS; + if (user) + ret = alloc_oc_sq(rdev, sq); + if (ret) + ret = alloc_host_sq(rdev, sq); + return ret; +} + +static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, + struct c4iw_dev_ucontext *uctx, int has_rq) +{ + /* + * uP clears EQ contexts when the connection exits rdma mode, + * so no need to post a RESET WR for these EQs. + */ + dealloc_sq(rdev, &wq->sq); + kfree(wq->sq.sw_sq); + c4iw_put_qpid(rdev, wq->sq.qid, uctx); + + if (has_rq) { + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->rq.memsize, wq->rq.queue, + dma_unmap_addr(&wq->rq, mapping)); + c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); + kfree(wq->rq.sw_rq); + c4iw_put_qpid(rdev, wq->rq.qid, uctx); + } + return 0; +} + +/* + * Determine the BAR2 virtual address and qid. If pbar2_pa is not NULL, + * then this is a user mapping so compute the page-aligned physical address + * for mapping. + */ +void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, + enum cxgb4_bar2_qtype qtype, + unsigned int *pbar2_qid, u64 *pbar2_pa) +{ + u64 bar2_qoffset; + int ret; + + ret = cxgb4_bar2_sge_qregs(rdev->lldi.ports[0], qid, qtype, + pbar2_pa ? 1 : 0, + &bar2_qoffset, pbar2_qid); + if (ret) + return NULL; + + if (pbar2_pa) + *pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK; + + if (is_t4(rdev->lldi.adapter_type)) + return NULL; + + return rdev->bar2_kva + bar2_qoffset; +} + +static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, + struct t4_cq *rcq, struct t4_cq *scq, + struct c4iw_dev_ucontext *uctx, + struct c4iw_wr_wait *wr_waitp, + int need_rq) +{ + int user = (uctx != &rdev->uctx); + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + int wr_len; + struct sk_buff *skb; + int ret = 0; + int eqsize; + + wq->sq.qid = c4iw_get_qpid(rdev, uctx); + if (!wq->sq.qid) + return -ENOMEM; + + if (need_rq) { + wq->rq.qid = c4iw_get_qpid(rdev, uctx); + if (!wq->rq.qid) { + ret = -ENOMEM; + goto free_sq_qid; + } + } + + if (!user) { + wq->sq.sw_sq = kcalloc(wq->sq.size, sizeof(*wq->sq.sw_sq), + GFP_KERNEL); + if (!wq->sq.sw_sq) { + ret = -ENOMEM; + goto free_rq_qid;//FIXME + } + + if (need_rq) { + wq->rq.sw_rq = kcalloc(wq->rq.size, + sizeof(*wq->rq.sw_rq), + GFP_KERNEL); + if (!wq->rq.sw_rq) { + ret = -ENOMEM; + goto free_sw_sq; + } + } + } + + if (need_rq) { + /* + * RQT must be a power of 2 and at least 16 deep. + */ + wq->rq.rqt_size = + roundup_pow_of_two(max_t(u16, wq->rq.size, 16)); + wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); + if (!wq->rq.rqt_hwaddr) { + ret = -ENOMEM; + goto free_sw_rq; + } + } + + ret = alloc_sq(rdev, &wq->sq, user); + if (ret) + goto free_hwaddr; + memset(wq->sq.queue, 0, wq->sq.memsize); + dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); + + if (need_rq) { + wq->rq.queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, + wq->rq.memsize, + &wq->rq.dma_addr, + GFP_KERNEL); + if (!wq->rq.queue) { + ret = -ENOMEM; + goto free_sq; + } + pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", + wq->sq.queue, + (unsigned long long)virt_to_phys(wq->sq.queue), + wq->rq.queue, + (unsigned long long)virt_to_phys(wq->rq.queue)); + dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); + } + + wq->db = rdev->lldi.db_reg; + + wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, + CXGB4_BAR2_QTYPE_EGRESS, + &wq->sq.bar2_qid, + user ? &wq->sq.bar2_pa : NULL); + if (need_rq) + wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, + CXGB4_BAR2_QTYPE_EGRESS, + &wq->rq.bar2_qid, + user ? &wq->rq.bar2_pa : NULL); + + /* + * User mode must have bar2 access. + */ + if (user && (!wq->sq.bar2_pa || (need_rq && !wq->rq.bar2_pa))) { + pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n", + pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid); + ret = -EINVAL; + goto free_dma; + } + + wq->rdev = rdev; + wq->rq.msn = 1; + + /* build fw_ri_res_wr */ + wr_len = sizeof(*res_wr) + 2 * sizeof(*res); + if (need_rq) + wr_len += sizeof(*res); + skb = alloc_skb(wr_len, GFP_KERNEL); + if (!skb) { + ret = -ENOMEM; + goto free_dma; + } + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = __skb_put_zero(skb, wr_len); + res_wr->op_nres = cpu_to_be32( + FW_WR_OP_V(FW_RI_RES_WR) | + FW_RI_RES_WR_NRES_V(need_rq ? 2 : 1) | + FW_WR_COMPL_F); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (uintptr_t)wr_waitp; + res = res_wr->res; + res->u.sqrq.restype = FW_RI_RES_TYPE_SQ; + res->u.sqrq.op = FW_RI_RES_OP_WRITE; + + /* + * eqsize is the number of 64B entries plus the status page size. + */ + eqsize = wq->sq.size * T4_SQ_NUM_SLOTS + + rdev->hw_queue.t4_eq_status_entries; + + res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( + FW_RI_RES_WR_HOSTFCMODE_V(0) | /* no host cidx updates */ + FW_RI_RES_WR_CPRIO_V(0) | /* don't keep in chip cache */ + FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */ + (t4_sq_onchip(&wq->sq) ? FW_RI_RES_WR_ONCHIP_F : 0) | + FW_RI_RES_WR_IQID_V(scq->cqid)); + res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( + FW_RI_RES_WR_DCAEN_V(0) | + FW_RI_RES_WR_DCACPU_V(0) | + FW_RI_RES_WR_FBMIN_V(2) | + (t4_sq_onchip(&wq->sq) ? FW_RI_RES_WR_FBMAX_V(2) : + FW_RI_RES_WR_FBMAX_V(3)) | + FW_RI_RES_WR_CIDXFTHRESHO_V(0) | + FW_RI_RES_WR_CIDXFTHRESH_V(0) | + FW_RI_RES_WR_EQSIZE_V(eqsize)); + res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid); + res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr); + + if (need_rq) { + res++; + res->u.sqrq.restype = FW_RI_RES_TYPE_RQ; + res->u.sqrq.op = FW_RI_RES_OP_WRITE; + + /* + * eqsize is the number of 64B entries plus the status page size + */ + eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + + rdev->hw_queue.t4_eq_status_entries; + res->u.sqrq.fetchszm_to_iqid = + /* no host cidx updates */ + cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) | + /* don't keep in chip cache */ + FW_RI_RES_WR_CPRIO_V(0) | + /* set by uP at ri_init time */ + FW_RI_RES_WR_PCIECHN_V(0) | + FW_RI_RES_WR_IQID_V(rcq->cqid)); + res->u.sqrq.dcaen_to_eqsize = + cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) | + FW_RI_RES_WR_DCACPU_V(0) | + FW_RI_RES_WR_FBMIN_V(2) | + FW_RI_RES_WR_FBMAX_V(3) | + FW_RI_RES_WR_CIDXFTHRESHO_V(0) | + FW_RI_RES_WR_CIDXFTHRESH_V(0) | + FW_RI_RES_WR_EQSIZE_V(eqsize)); + res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); + res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); + } + + c4iw_init_wr_wait(wr_waitp); + ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->sq.qid, __func__); + if (ret) + goto free_dma; + + pr_debug("sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n", + wq->sq.qid, wq->rq.qid, wq->db, + wq->sq.bar2_va, wq->rq.bar2_va); + + return 0; +free_dma: + if (need_rq) + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->rq.memsize, wq->rq.queue, + dma_unmap_addr(&wq->rq, mapping)); +free_sq: + dealloc_sq(rdev, &wq->sq); +free_hwaddr: + if (need_rq) + c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); +free_sw_rq: + if (need_rq) + kfree(wq->rq.sw_rq); +free_sw_sq: + kfree(wq->sq.sw_sq); +free_rq_qid: + if (need_rq) + c4iw_put_qpid(rdev, wq->rq.qid, uctx); +free_sq_qid: + c4iw_put_qpid(rdev, wq->sq.qid, uctx); + return ret; +} + +static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, + const struct ib_send_wr *wr, int max, u32 *plenp) +{ + u8 *dstp, *srcp; + u32 plen = 0; + int i; + int rem, len; + + dstp = (u8 *)immdp->data; + for (i = 0; i < wr->num_sge; i++) { + if ((plen + wr->sg_list[i].length) > max) + return -EMSGSIZE; + srcp = (u8 *)(unsigned long)wr->sg_list[i].addr; + plen += wr->sg_list[i].length; + rem = wr->sg_list[i].length; + while (rem) { + if (dstp == (u8 *)&sq->queue[sq->size]) + dstp = (u8 *)sq->queue; + if (rem <= (u8 *)&sq->queue[sq->size] - dstp) + len = rem; + else + len = (u8 *)&sq->queue[sq->size] - dstp; + memcpy(dstp, srcp, len); + dstp += len; + srcp += len; + rem -= len; + } + } + len = roundup(plen + sizeof(*immdp), 16) - (plen + sizeof(*immdp)); + if (len) + memset(dstp, 0, len); + immdp->op = FW_RI_DATA_IMMD; + immdp->r1 = 0; + immdp->r2 = 0; + immdp->immdlen = cpu_to_be32(plen); + *plenp = plen; + return 0; +} + +static int build_isgl(__be64 *queue_start, __be64 *queue_end, + struct fw_ri_isgl *isglp, struct ib_sge *sg_list, + int num_sge, u32 *plenp) + +{ + int i; + u32 plen = 0; + __be64 *flitp; + + if ((__be64 *)isglp == queue_end) + isglp = (struct fw_ri_isgl *)queue_start; + + flitp = (__be64 *)isglp->sge; + + for (i = 0; i < num_sge; i++) { + if ((plen + sg_list[i].length) < plen) + return -EMSGSIZE; + plen += sg_list[i].length; + *flitp = cpu_to_be64(((u64)sg_list[i].lkey << 32) | + sg_list[i].length); + if (++flitp == queue_end) + flitp = queue_start; + *flitp = cpu_to_be64(sg_list[i].addr); + if (++flitp == queue_end) + flitp = queue_start; + } + *flitp = (__force __be64)0; + isglp->op = FW_RI_DATA_ISGL; + isglp->r1 = 0; + isglp->nsge = cpu_to_be16(num_sge); + isglp->r2 = 0; + if (plenp) + *plenp = plen; + return 0; +} + +static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, + const struct ib_send_wr *wr, u8 *len16) +{ + u32 plen; + int size; + int ret; + + if (wr->num_sge > T4_MAX_SEND_SGE) + return -EINVAL; + switch (wr->opcode) { + case IB_WR_SEND: + if (wr->send_flags & IB_SEND_SOLICITED) + wqe->send.sendop_pkd = cpu_to_be32( + FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_SE)); + else + wqe->send.sendop_pkd = cpu_to_be32( + FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND)); + wqe->send.stag_inv = 0; + break; + case IB_WR_SEND_WITH_INV: + if (wr->send_flags & IB_SEND_SOLICITED) + wqe->send.sendop_pkd = cpu_to_be32( + FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_SE_INV)); + else + wqe->send.sendop_pkd = cpu_to_be32( + FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_INV)); + wqe->send.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); + break; + + default: + return -EINVAL; + } + wqe->send.r3 = 0; + wqe->send.r4 = 0; + + plen = 0; + if (wr->num_sge) { + if (wr->send_flags & IB_SEND_INLINE) { + ret = build_immd(sq, wqe->send.u.immd_src, wr, + T4_MAX_SEND_INLINE, &plen); + if (ret) + return ret; + size = sizeof(wqe->send) + sizeof(struct fw_ri_immd) + + plen; + } else { + ret = build_isgl((__be64 *)sq->queue, + (__be64 *)&sq->queue[sq->size], + wqe->send.u.isgl_src, + wr->sg_list, wr->num_sge, &plen); + if (ret) + return ret; + size = sizeof(wqe->send) + sizeof(struct fw_ri_isgl) + + wr->num_sge * sizeof(struct fw_ri_sge); + } + } else { + wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD; + wqe->send.u.immd_src[0].r1 = 0; + wqe->send.u.immd_src[0].r2 = 0; + wqe->send.u.immd_src[0].immdlen = 0; + size = sizeof(wqe->send) + sizeof(struct fw_ri_immd); + plen = 0; + } + *len16 = DIV_ROUND_UP(size, 16); + wqe->send.plen = cpu_to_be32(plen); + return 0; +} + +static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, + const struct ib_send_wr *wr, u8 *len16) +{ + u32 plen; + int size; + int ret; + + if (wr->num_sge > T4_MAX_SEND_SGE) + return -EINVAL; + + /* + * iWARP protocol supports 64 bit immediate data but rdma api + * limits it to 32bit. + */ + if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) + wqe->write.iw_imm_data.ib_imm_data.imm_data32 = wr->ex.imm_data; + else + wqe->write.iw_imm_data.ib_imm_data.imm_data32 = 0; + wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); + wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); + if (wr->num_sge) { + if (wr->send_flags & IB_SEND_INLINE) { + ret = build_immd(sq, wqe->write.u.immd_src, wr, + T4_MAX_WRITE_INLINE, &plen); + if (ret) + return ret; + size = sizeof(wqe->write) + sizeof(struct fw_ri_immd) + + plen; + } else { + ret = build_isgl((__be64 *)sq->queue, + (__be64 *)&sq->queue[sq->size], + wqe->write.u.isgl_src, + wr->sg_list, wr->num_sge, &plen); + if (ret) + return ret; + size = sizeof(wqe->write) + sizeof(struct fw_ri_isgl) + + wr->num_sge * sizeof(struct fw_ri_sge); + } + } else { + wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD; + wqe->write.u.immd_src[0].r1 = 0; + wqe->write.u.immd_src[0].r2 = 0; + wqe->write.u.immd_src[0].immdlen = 0; + size = sizeof(wqe->write) + sizeof(struct fw_ri_immd); + plen = 0; + } + *len16 = DIV_ROUND_UP(size, 16); + wqe->write.plen = cpu_to_be32(plen); + return 0; +} + +static void build_immd_cmpl(struct t4_sq *sq, struct fw_ri_immd_cmpl *immdp, + struct ib_send_wr *wr) +{ + memcpy((u8 *)immdp->data, (u8 *)(uintptr_t)wr->sg_list->addr, 16); + memset(immdp->r1, 0, 6); + immdp->op = FW_RI_DATA_IMMD; + immdp->immdlen = 16; +} + +static void build_rdma_write_cmpl(struct t4_sq *sq, + struct fw_ri_rdma_write_cmpl_wr *wcwr, + const struct ib_send_wr *wr, u8 *len16) +{ + u32 plen; + int size; + + /* + * This code assumes the struct fields preceding the write isgl + * fit in one 64B WR slot. This is because the WQE is built + * directly in the dma queue, and wrapping is only handled + * by the code buildling sgls. IE the "fixed part" of the wr + * structs must all fit in 64B. The WQE build code should probably be + * redesigned to avoid this restriction, but for now just add + * the BUILD_BUG_ON() to catch if this WQE struct gets too big. + */ + BUILD_BUG_ON(offsetof(struct fw_ri_rdma_write_cmpl_wr, u) > 64); + + wcwr->stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); + wcwr->to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); + if (wr->next->opcode == IB_WR_SEND) + wcwr->stag_inv = 0; + else + wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey); + wcwr->r2 = 0; + wcwr->r3 = 0; + + /* SEND_INV SGL */ + if (wr->next->send_flags & IB_SEND_INLINE) + build_immd_cmpl(sq, &wcwr->u_cmpl.immd_src, wr->next); + else + build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size], + &wcwr->u_cmpl.isgl_src, wr->next->sg_list, 1, NULL); + + /* WRITE SGL */ + build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size], + wcwr->u.isgl_src, wr->sg_list, wr->num_sge, &plen); + + size = sizeof(*wcwr) + sizeof(struct fw_ri_isgl) + + wr->num_sge * sizeof(struct fw_ri_sge); + wcwr->plen = cpu_to_be32(plen); + *len16 = DIV_ROUND_UP(size, 16); +} + +static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr, + u8 *len16) +{ + if (wr->num_sge > 1) + return -EINVAL; + if (wr->num_sge && wr->sg_list[0].length) { + wqe->read.stag_src = cpu_to_be32(rdma_wr(wr)->rkey); + wqe->read.to_src_hi = cpu_to_be32((u32)(rdma_wr(wr)->remote_addr + >> 32)); + wqe->read.to_src_lo = cpu_to_be32((u32)rdma_wr(wr)->remote_addr); + wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey); + wqe->read.plen = cpu_to_be32(wr->sg_list[0].length); + wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr + >> 32)); + wqe->read.to_sink_lo = cpu_to_be32((u32)(wr->sg_list[0].addr)); + } else { + wqe->read.stag_src = cpu_to_be32(2); + wqe->read.to_src_hi = 0; + wqe->read.to_src_lo = 0; + wqe->read.stag_sink = cpu_to_be32(2); + wqe->read.plen = 0; + wqe->read.to_sink_hi = 0; + wqe->read.to_sink_lo = 0; + } + wqe->read.r2 = 0; + wqe->read.r5 = 0; + *len16 = DIV_ROUND_UP(sizeof(wqe->read), 16); + return 0; +} + +static void post_write_cmpl(struct c4iw_qp *qhp, const struct ib_send_wr *wr) +{ + bool send_signaled = (wr->next->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; + bool write_signaled = (wr->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; + struct t4_swsqe *swsqe; + union t4_wr *wqe; + u16 write_wrid; + u8 len16; + u16 idx; + + /* + * The sw_sq entries still look like a WRITE and a SEND and consume + * 2 slots. The FW WR, however, will be a single uber-WR. + */ + wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue + + qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE); + build_rdma_write_cmpl(&qhp->wq.sq, &wqe->write_cmpl, wr, &len16); + + /* WRITE swsqe */ + swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; + swsqe->opcode = FW_RI_RDMA_WRITE; + swsqe->idx = qhp->wq.sq.pidx; + swsqe->complete = 0; + swsqe->signaled = write_signaled; + swsqe->flushed = 0; + swsqe->wr_id = wr->wr_id; + if (c4iw_wr_log) { + swsqe->sge_ts = + cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]); + swsqe->host_time = ktime_get(); + } + + write_wrid = qhp->wq.sq.pidx; + + /* just bump the sw_sq */ + qhp->wq.sq.in_use++; + if (++qhp->wq.sq.pidx == qhp->wq.sq.size) + qhp->wq.sq.pidx = 0; + + /* SEND_WITH_INV swsqe */ + swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; + if (wr->next->opcode == IB_WR_SEND) + swsqe->opcode = FW_RI_SEND; + else + swsqe->opcode = FW_RI_SEND_WITH_INV; + swsqe->idx = qhp->wq.sq.pidx; + swsqe->complete = 0; + swsqe->signaled = send_signaled; + swsqe->flushed = 0; + swsqe->wr_id = wr->next->wr_id; + if (c4iw_wr_log) { + swsqe->sge_ts = + cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]); + swsqe->host_time = ktime_get(); + } + + wqe->write_cmpl.flags_send = send_signaled ? FW_RI_COMPLETION_FLAG : 0; + wqe->write_cmpl.wrid_send = qhp->wq.sq.pidx; + + init_wr_hdr(wqe, write_wrid, FW_RI_RDMA_WRITE_CMPL_WR, + write_signaled ? FW_RI_COMPLETION_FLAG : 0, len16); + t4_sq_produce(&qhp->wq, len16); + idx = DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE); + + t4_ring_sq_db(&qhp->wq, idx, wqe); +} + +static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, + const struct ib_recv_wr *wr, u8 *len16) +{ + int ret; + + ret = build_isgl((__be64 *)qhp->wq.rq.queue, + (__be64 *)&qhp->wq.rq.queue[qhp->wq.rq.size], + &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL); + if (ret) + return ret; + *len16 = DIV_ROUND_UP( + sizeof(wqe->recv) + wr->num_sge * sizeof(struct fw_ri_sge), 16); + return 0; +} + +static int build_srq_recv(union t4_recv_wr *wqe, const struct ib_recv_wr *wr, + u8 *len16) +{ + int ret; + + ret = build_isgl((__be64 *)wqe, (__be64 *)(wqe + 1), + &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL); + if (ret) + return ret; + *len16 = DIV_ROUND_UP(sizeof(wqe->recv) + + wr->num_sge * sizeof(struct fw_ri_sge), 16); + return 0; +} + +static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr, + const struct ib_reg_wr *wr, struct c4iw_mr *mhp, + u8 *len16) +{ + __be64 *p = (__be64 *)fr->pbl; + + fr->r2 = cpu_to_be32(0); + fr->stag = cpu_to_be32(mhp->ibmr.rkey); + + fr->tpte.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | + FW_RI_TPTE_STAGKEY_V((mhp->ibmr.rkey & FW_RI_TPTE_STAGKEY_M)) | + FW_RI_TPTE_STAGSTATE_V(1) | + FW_RI_TPTE_STAGTYPE_V(FW_RI_STAG_NSMR) | + FW_RI_TPTE_PDID_V(mhp->attr.pdid)); + fr->tpte.locread_to_qpid = cpu_to_be32( + FW_RI_TPTE_PERM_V(c4iw_ib_to_tpt_access(wr->access)) | + FW_RI_TPTE_ADDRTYPE_V(FW_RI_VA_BASED_TO) | + FW_RI_TPTE_PS_V(ilog2(wr->mr->page_size) - 12)); + fr->tpte.nosnoop_pbladdr = cpu_to_be32(FW_RI_TPTE_PBLADDR_V( + PBL_OFF(&mhp->rhp->rdev, mhp->attr.pbl_addr)>>3)); + fr->tpte.dca_mwbcnt_pstag = cpu_to_be32(0); + fr->tpte.len_hi = cpu_to_be32(0); + fr->tpte.len_lo = cpu_to_be32(mhp->ibmr.length); + fr->tpte.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32); + fr->tpte.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & 0xffffffff); + + p[0] = cpu_to_be64((u64)mhp->mpl[0]); + p[1] = cpu_to_be64((u64)mhp->mpl[1]); + + *len16 = DIV_ROUND_UP(sizeof(*fr), 16); +} + +static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, + const struct ib_reg_wr *wr, struct c4iw_mr *mhp, + u8 *len16, bool dsgl_supported) +{ + struct fw_ri_immd *imdp; + __be64 *p; + int i; + int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32); + int rem; + + if (mhp->mpl_len > t4_max_fr_depth(dsgl_supported && use_dsgl)) + return -EINVAL; + + wqe->fr.qpbinde_to_dcacpu = 0; + wqe->fr.pgsz_shift = ilog2(wr->mr->page_size) - 12; + wqe->fr.addr_type = FW_RI_VA_BASED_TO; + wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->access); + wqe->fr.len_hi = 0; + wqe->fr.len_lo = cpu_to_be32(mhp->ibmr.length); + wqe->fr.stag = cpu_to_be32(wr->key); + wqe->fr.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32); + wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & + 0xffffffff); + + if (dsgl_supported && use_dsgl && (pbllen > max_fr_immd)) { + struct fw_ri_dsgl *sglp; + + for (i = 0; i < mhp->mpl_len; i++) + mhp->mpl[i] = (__force u64)cpu_to_be64((u64)mhp->mpl[i]); + + sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1); + sglp->op = FW_RI_DATA_DSGL; + sglp->r1 = 0; + sglp->nsge = cpu_to_be16(1); + sglp->addr0 = cpu_to_be64(mhp->mpl_addr); + sglp->len0 = cpu_to_be32(pbllen); + + *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16); + } else { + imdp = (struct fw_ri_immd *)(&wqe->fr + 1); + imdp->op = FW_RI_DATA_IMMD; + imdp->r1 = 0; + imdp->r2 = 0; + imdp->immdlen = cpu_to_be32(pbllen); + p = (__be64 *)(imdp + 1); + rem = pbllen; + for (i = 0; i < mhp->mpl_len; i++) { + *p = cpu_to_be64((u64)mhp->mpl[i]); + rem -= sizeof(*p); + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; + } + while (rem) { + *p = 0; + rem -= sizeof(*p); + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; + } + *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*imdp) + + pbllen, 16); + } + return 0; +} + +static int build_inv_stag(union t4_wr *wqe, const struct ib_send_wr *wr, + u8 *len16) +{ + wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); + wqe->inv.r2 = 0; + *len16 = DIV_ROUND_UP(sizeof(wqe->inv), 16); + return 0; +} + +void c4iw_qp_add_ref(struct ib_qp *qp) +{ + pr_debug("ib_qp %p\n", qp); + refcount_inc(&to_c4iw_qp(qp)->qp_refcnt); +} + +void c4iw_qp_rem_ref(struct ib_qp *qp) +{ + pr_debug("ib_qp %p\n", qp); + if (refcount_dec_and_test(&to_c4iw_qp(qp)->qp_refcnt)) + complete(&to_c4iw_qp(qp)->qp_rel_comp); +} + +static void add_to_fc_list(struct list_head *head, struct list_head *entry) +{ + if (list_empty(entry)) + list_add_tail(entry, head); +} + +static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc) +{ + unsigned long flags; + + xa_lock_irqsave(&qhp->rhp->qps, flags); + spin_lock(&qhp->lock); + if (qhp->rhp->db_state == NORMAL) + t4_ring_sq_db(&qhp->wq, inc, NULL); + else { + add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); + qhp->wq.sq.wq_pidx_inc += inc; + } + spin_unlock(&qhp->lock); + xa_unlock_irqrestore(&qhp->rhp->qps, flags); + return 0; +} + +static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) +{ + unsigned long flags; + + xa_lock_irqsave(&qhp->rhp->qps, flags); + spin_lock(&qhp->lock); + if (qhp->rhp->db_state == NORMAL) + t4_ring_rq_db(&qhp->wq, inc, NULL); + else { + add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); + qhp->wq.rq.wq_pidx_inc += inc; + } + spin_unlock(&qhp->lock); + xa_unlock_irqrestore(&qhp->rhp->qps, flags); + return 0; +} + +static int ib_to_fw_opcode(int ib_opcode) +{ + int opcode; + + switch (ib_opcode) { + case IB_WR_SEND_WITH_INV: + opcode = FW_RI_SEND_WITH_INV; + break; + case IB_WR_SEND: + opcode = FW_RI_SEND; + break; + case IB_WR_RDMA_WRITE: + opcode = FW_RI_RDMA_WRITE; + break; + case IB_WR_RDMA_WRITE_WITH_IMM: + opcode = FW_RI_WRITE_IMMEDIATE; + break; + case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: + opcode = FW_RI_READ_REQ; + break; + case IB_WR_REG_MR: + opcode = FW_RI_FAST_REGISTER; + break; + case IB_WR_LOCAL_INV: + opcode = FW_RI_LOCAL_INV; + break; + default: + opcode = -EINVAL; + } + return opcode; +} + +static int complete_sq_drain_wr(struct c4iw_qp *qhp, + const struct ib_send_wr *wr) +{ + struct t4_cqe cqe = {}; + struct c4iw_cq *schp; + unsigned long flag; + struct t4_cq *cq; + int opcode; + + schp = to_c4iw_cq(qhp->ibqp.send_cq); + cq = &schp->cq; + + opcode = ib_to_fw_opcode(wr->opcode); + if (opcode < 0) + return opcode; + + cqe.u.drain_cookie = wr->wr_id; + cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | + CQE_OPCODE_V(opcode) | + CQE_TYPE_V(1) | + CQE_SWCQE_V(1) | + CQE_DRAIN_V(1) | + CQE_QPID_V(qhp->wq.sq.qid)); + + spin_lock_irqsave(&schp->lock, flag); + cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); + cq->sw_queue[cq->sw_pidx] = cqe; + t4_swcq_produce(cq); + spin_unlock_irqrestore(&schp->lock, flag); + + if (t4_clear_cq_armed(&schp->cq)) { + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } + return 0; +} + +static int complete_sq_drain_wrs(struct c4iw_qp *qhp, + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + int ret = 0; + + while (wr) { + ret = complete_sq_drain_wr(qhp, wr); + if (ret) { + *bad_wr = wr; + break; + } + wr = wr->next; + } + return ret; +} + +static void complete_rq_drain_wr(struct c4iw_qp *qhp, + const struct ib_recv_wr *wr) +{ + struct t4_cqe cqe = {}; + struct c4iw_cq *rchp; + unsigned long flag; + struct t4_cq *cq; + + rchp = to_c4iw_cq(qhp->ibqp.recv_cq); + cq = &rchp->cq; + + cqe.u.drain_cookie = wr->wr_id; + cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | + CQE_OPCODE_V(FW_RI_SEND) | + CQE_TYPE_V(0) | + CQE_SWCQE_V(1) | + CQE_DRAIN_V(1) | + CQE_QPID_V(qhp->wq.sq.qid)); + + spin_lock_irqsave(&rchp->lock, flag); + cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); + cq->sw_queue[cq->sw_pidx] = cqe; + t4_swcq_produce(cq); + spin_unlock_irqrestore(&rchp->lock, flag); + + if (t4_clear_cq_armed(&rchp->cq)) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } +} + +static void complete_rq_drain_wrs(struct c4iw_qp *qhp, + const struct ib_recv_wr *wr) +{ + while (wr) { + complete_rq_drain_wr(qhp, wr); + wr = wr->next; + } +} + +int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + int err = 0; + u8 len16 = 0; + enum fw_wr_opcodes fw_opcode = 0; + enum fw_ri_wr_flags fw_flags; + struct c4iw_qp *qhp; + struct c4iw_dev *rhp; + union t4_wr *wqe = NULL; + u32 num_wrs; + struct t4_swsqe *swsqe; + unsigned long flag; + u16 idx = 0; + + qhp = to_c4iw_qp(ibqp); + rhp = qhp->rhp; + spin_lock_irqsave(&qhp->lock, flag); + + /* + * If the qp has been flushed, then just insert a special + * drain cqe. + */ + if (qhp->wq.flushed) { + spin_unlock_irqrestore(&qhp->lock, flag); + err = complete_sq_drain_wrs(qhp, wr, bad_wr); + return err; + } + num_wrs = t4_sq_avail(&qhp->wq); + if (num_wrs == 0) { + spin_unlock_irqrestore(&qhp->lock, flag); + *bad_wr = wr; + return -ENOMEM; + } + + /* + * Fastpath for NVMe-oF target WRITE + SEND_WITH_INV wr chain which is + * the response for small NVMEe-oF READ requests. If the chain is + * exactly a WRITE->SEND_WITH_INV or a WRITE->SEND and the sgl depths + * and lengths meet the requirements of the fw_ri_write_cmpl_wr work + * request, then build and post the write_cmpl WR. If any of the tests + * below are not true, then we continue on with the tradtional WRITE + * and SEND WRs. + */ + if (qhp->rhp->rdev.lldi.write_cmpl_support && + CHELSIO_CHIP_VERSION(qhp->rhp->rdev.lldi.adapter_type) >= + CHELSIO_T5 && + wr && wr->next && !wr->next->next && + wr->opcode == IB_WR_RDMA_WRITE && + wr->sg_list[0].length && wr->num_sge <= T4_WRITE_CMPL_MAX_SGL && + (wr->next->opcode == IB_WR_SEND || + wr->next->opcode == IB_WR_SEND_WITH_INV) && + wr->next->sg_list[0].length == T4_WRITE_CMPL_MAX_CQE && + wr->next->num_sge == 1 && num_wrs >= 2) { + post_write_cmpl(qhp, wr); + spin_unlock_irqrestore(&qhp->lock, flag); + return 0; + } + + while (wr) { + if (num_wrs == 0) { + err = -ENOMEM; + *bad_wr = wr; + break; + } + wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue + + qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE); + + fw_flags = 0; + if (wr->send_flags & IB_SEND_SOLICITED) + fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; + if (wr->send_flags & IB_SEND_SIGNALED || qhp->sq_sig_all) + fw_flags |= FW_RI_COMPLETION_FLAG; + swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; + switch (wr->opcode) { + case IB_WR_SEND_WITH_INV: + case IB_WR_SEND: + if (wr->send_flags & IB_SEND_FENCE) + fw_flags |= FW_RI_READ_FENCE_FLAG; + fw_opcode = FW_RI_SEND_WR; + if (wr->opcode == IB_WR_SEND) + swsqe->opcode = FW_RI_SEND; + else + swsqe->opcode = FW_RI_SEND_WITH_INV; + err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16); + break; + case IB_WR_RDMA_WRITE_WITH_IMM: + if (unlikely(!rhp->rdev.lldi.write_w_imm_support)) { + err = -EINVAL; + break; + } + fw_flags |= FW_RI_RDMA_WRITE_WITH_IMMEDIATE; + fallthrough; + case IB_WR_RDMA_WRITE: + fw_opcode = FW_RI_RDMA_WRITE_WR; + swsqe->opcode = FW_RI_RDMA_WRITE; + err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16); + break; + case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: + fw_opcode = FW_RI_RDMA_READ_WR; + swsqe->opcode = FW_RI_READ_REQ; + if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) { + c4iw_invalidate_mr(rhp, wr->sg_list[0].lkey); + fw_flags = FW_RI_RDMA_READ_INVALIDATE; + } else { + fw_flags = 0; + } + err = build_rdma_read(wqe, wr, &len16); + if (err) + break; + swsqe->read_len = wr->sg_list[0].length; + if (!qhp->wq.sq.oldest_read) + qhp->wq.sq.oldest_read = swsqe; + break; + case IB_WR_REG_MR: { + struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr); + + swsqe->opcode = FW_RI_FAST_REGISTER; + if (rhp->rdev.lldi.fr_nsmr_tpte_wr_support && + !mhp->attr.state && mhp->mpl_len <= 2) { + fw_opcode = FW_RI_FR_NSMR_TPTE_WR; + build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr), + mhp, &len16); + } else { + fw_opcode = FW_RI_FR_NSMR_WR; + err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), + mhp, &len16, + rhp->rdev.lldi.ulptx_memwrite_dsgl); + if (err) + break; + } + mhp->attr.state = 1; + break; + } + case IB_WR_LOCAL_INV: + if (wr->send_flags & IB_SEND_FENCE) + fw_flags |= FW_RI_LOCAL_FENCE_FLAG; + fw_opcode = FW_RI_INV_LSTAG_WR; + swsqe->opcode = FW_RI_LOCAL_INV; + err = build_inv_stag(wqe, wr, &len16); + c4iw_invalidate_mr(rhp, wr->ex.invalidate_rkey); + break; + default: + pr_warn("%s post of type=%d TBD!\n", __func__, + wr->opcode); + err = -EINVAL; + } + if (err) { + *bad_wr = wr; + break; + } + swsqe->idx = qhp->wq.sq.pidx; + swsqe->complete = 0; + swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; + swsqe->flushed = 0; + swsqe->wr_id = wr->wr_id; + if (c4iw_wr_log) { + swsqe->sge_ts = cxgb4_read_sge_timestamp( + rhp->rdev.lldi.ports[0]); + swsqe->host_time = ktime_get(); + } + + init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16); + + pr_debug("cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u\n", + (unsigned long long)wr->wr_id, qhp->wq.sq.pidx, + swsqe->opcode, swsqe->read_len); + wr = wr->next; + num_wrs--; + t4_sq_produce(&qhp->wq, len16); + idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); + } + if (!rhp->rdev.status_page->db_off) { + t4_ring_sq_db(&qhp->wq, idx, wqe); + spin_unlock_irqrestore(&qhp->lock, flag); + } else { + spin_unlock_irqrestore(&qhp->lock, flag); + ring_kernel_sq_db(qhp, idx); + } + return err; +} + +int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + int err = 0; + struct c4iw_qp *qhp; + union t4_recv_wr *wqe = NULL; + u32 num_wrs; + u8 len16 = 0; + unsigned long flag; + u16 idx = 0; + + qhp = to_c4iw_qp(ibqp); + spin_lock_irqsave(&qhp->lock, flag); + + /* + * If the qp has been flushed, then just insert a special + * drain cqe. + */ + if (qhp->wq.flushed) { + spin_unlock_irqrestore(&qhp->lock, flag); + complete_rq_drain_wrs(qhp, wr); + return err; + } + num_wrs = t4_rq_avail(&qhp->wq); + if (num_wrs == 0) { + spin_unlock_irqrestore(&qhp->lock, flag); + *bad_wr = wr; + return -ENOMEM; + } + while (wr) { + if (wr->num_sge > T4_MAX_RECV_SGE) { + err = -EINVAL; + *bad_wr = wr; + break; + } + wqe = (union t4_recv_wr *)((u8 *)qhp->wq.rq.queue + + qhp->wq.rq.wq_pidx * + T4_EQ_ENTRY_SIZE); + if (num_wrs) + err = build_rdma_recv(qhp, wqe, wr, &len16); + else + err = -ENOMEM; + if (err) { + *bad_wr = wr; + break; + } + + qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].wr_id = wr->wr_id; + if (c4iw_wr_log) { + qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].sge_ts = + cxgb4_read_sge_timestamp( + qhp->rhp->rdev.lldi.ports[0]); + qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].host_time = + ktime_get(); + } + + wqe->recv.opcode = FW_RI_RECV_WR; + wqe->recv.r1 = 0; + wqe->recv.wrid = qhp->wq.rq.pidx; + wqe->recv.r2[0] = 0; + wqe->recv.r2[1] = 0; + wqe->recv.r2[2] = 0; + wqe->recv.len16 = len16; + pr_debug("cookie 0x%llx pidx %u\n", + (unsigned long long)wr->wr_id, qhp->wq.rq.pidx); + t4_rq_produce(&qhp->wq, len16); + idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); + wr = wr->next; + num_wrs--; + } + if (!qhp->rhp->rdev.status_page->db_off) { + t4_ring_rq_db(&qhp->wq, idx, wqe); + spin_unlock_irqrestore(&qhp->lock, flag); + } else { + spin_unlock_irqrestore(&qhp->lock, flag); + ring_kernel_rq_db(qhp, idx); + } + return err; +} + +static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe, + u64 wr_id, u8 len16) +{ + struct t4_srq_pending_wr *pwr = &srq->pending_wrs[srq->pending_pidx]; + + pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u ooo_count %u wr_id 0x%llx pending_cidx %u pending_pidx %u pending_in_use %u\n", + __func__, srq->cidx, srq->pidx, srq->wq_pidx, + srq->in_use, srq->ooo_count, + (unsigned long long)wr_id, srq->pending_cidx, + srq->pending_pidx, srq->pending_in_use); + pwr->wr_id = wr_id; + pwr->len16 = len16; + memcpy(&pwr->wqe, wqe, len16 * 16); + t4_srq_produce_pending_wr(srq); +} + +int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + union t4_recv_wr *wqe, lwqe; + struct c4iw_srq *srq; + unsigned long flag; + u8 len16 = 0; + u16 idx = 0; + int err = 0; + u32 num_wrs; + + srq = to_c4iw_srq(ibsrq); + spin_lock_irqsave(&srq->lock, flag); + num_wrs = t4_srq_avail(&srq->wq); + if (num_wrs == 0) { + spin_unlock_irqrestore(&srq->lock, flag); + return -ENOMEM; + } + while (wr) { + if (wr->num_sge > T4_MAX_RECV_SGE) { + err = -EINVAL; + *bad_wr = wr; + break; + } + wqe = &lwqe; + if (num_wrs) + err = build_srq_recv(wqe, wr, &len16); + else + err = -ENOMEM; + if (err) { + *bad_wr = wr; + break; + } + + wqe->recv.opcode = FW_RI_RECV_WR; + wqe->recv.r1 = 0; + wqe->recv.wrid = srq->wq.pidx; + wqe->recv.r2[0] = 0; + wqe->recv.r2[1] = 0; + wqe->recv.r2[2] = 0; + wqe->recv.len16 = len16; + + if (srq->wq.ooo_count || + srq->wq.pending_in_use || + srq->wq.sw_rq[srq->wq.pidx].valid) { + defer_srq_wr(&srq->wq, wqe, wr->wr_id, len16); + } else { + srq->wq.sw_rq[srq->wq.pidx].wr_id = wr->wr_id; + srq->wq.sw_rq[srq->wq.pidx].valid = 1; + c4iw_copy_wr_to_srq(&srq->wq, wqe, len16); + pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u wr_id 0x%llx\n", + __func__, srq->wq.cidx, + srq->wq.pidx, srq->wq.wq_pidx, + srq->wq.in_use, + (unsigned long long)wr->wr_id); + t4_srq_produce(&srq->wq, len16); + idx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE); + } + wr = wr->next; + num_wrs--; + } + if (idx) + t4_ring_srq_db(&srq->wq, idx, len16, wqe); + spin_unlock_irqrestore(&srq->lock, flag); + return err; +} + +static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type, + u8 *ecode) +{ + int status; + int tagged; + int opcode; + int rqtype; + int send_inv; + + if (!err_cqe) { + *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; + *ecode = 0; + return; + } + + status = CQE_STATUS(err_cqe); + opcode = CQE_OPCODE(err_cqe); + rqtype = RQ_TYPE(err_cqe); + send_inv = (opcode == FW_RI_SEND_WITH_INV) || + (opcode == FW_RI_SEND_WITH_SE_INV); + tagged = (opcode == FW_RI_RDMA_WRITE) || + (rqtype && (opcode == FW_RI_READ_RESP)); + + switch (status) { + case T4_ERR_STAG: + if (send_inv) { + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_CANT_INV_STAG; + } else { + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_INV_STAG; + } + break; + case T4_ERR_PDID: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + if ((opcode == FW_RI_SEND_WITH_INV) || + (opcode == FW_RI_SEND_WITH_SE_INV)) + *ecode = RDMAP_CANT_INV_STAG; + else + *ecode = RDMAP_STAG_NOT_ASSOC; + break; + case T4_ERR_QPID: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_STAG_NOT_ASSOC; + break; + case T4_ERR_ACCESS: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_ACC_VIOL; + break; + case T4_ERR_WRAP: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_TO_WRAP; + break; + case T4_ERR_BOUND: + if (tagged) { + *layer_type = LAYER_DDP|DDP_TAGGED_ERR; + *ecode = DDPT_BASE_BOUNDS; + } else { + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_BASE_BOUNDS; + } + break; + case T4_ERR_INVALIDATE_SHARED_MR: + case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_CANT_INV_STAG; + break; + case T4_ERR_ECC: + case T4_ERR_ECC_PSTAG: + case T4_ERR_INTERNAL_ERR: + *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA; + *ecode = 0; + break; + case T4_ERR_OUT_OF_RQE: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_MSN_NOBUF; + break; + case T4_ERR_PBL_ADDR_BOUND: + *layer_type = LAYER_DDP|DDP_TAGGED_ERR; + *ecode = DDPT_BASE_BOUNDS; + break; + case T4_ERR_CRC: + *layer_type = LAYER_MPA|DDP_LLP; + *ecode = MPA_CRC_ERR; + break; + case T4_ERR_MARKER: + *layer_type = LAYER_MPA|DDP_LLP; + *ecode = MPA_MARKER_ERR; + break; + case T4_ERR_PDU_LEN_ERR: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_MSG_TOOBIG; + break; + case T4_ERR_DDP_VERSION: + if (tagged) { + *layer_type = LAYER_DDP|DDP_TAGGED_ERR; + *ecode = DDPT_INV_VERS; + } else { + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_VERS; + } + break; + case T4_ERR_RDMA_VERSION: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_INV_VERS; + break; + case T4_ERR_OPCODE: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_INV_OPCODE; + break; + case T4_ERR_DDP_QUEUE_NUM: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_QN; + break; + case T4_ERR_MSN: + case T4_ERR_MSN_GAP: + case T4_ERR_MSN_RANGE: + case T4_ERR_IRD_OVERFLOW: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_MSN_RANGE; + break; + case T4_ERR_TBIT: + *layer_type = LAYER_DDP|DDP_LOCAL_CATA; + *ecode = 0; + break; + case T4_ERR_MO: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_MO; + break; + default: + *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; + *ecode = 0; + break; + } +} + +static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe, + gfp_t gfp) +{ + struct fw_ri_wr *wqe; + struct sk_buff *skb; + struct terminate_message *term; + + pr_debug("qhp %p qid 0x%x tid %u\n", qhp, qhp->wq.sq.qid, + qhp->ep->hwtid); + + skb = skb_dequeue(&qhp->ep->com.ep_skb_list); + if (WARN_ON(!skb)) + return; + + set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx); + + wqe = __skb_put_zero(skb, sizeof(*wqe)); + wqe->op_compl = cpu_to_be32(FW_WR_OP_V(FW_RI_INIT_WR)); + wqe->flowid_len16 = cpu_to_be32( + FW_WR_FLOWID_V(qhp->ep->hwtid) | + FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16))); + + wqe->u.terminate.type = FW_RI_TYPE_TERMINATE; + wqe->u.terminate.immdlen = cpu_to_be32(sizeof(*term)); + term = (struct terminate_message *)wqe->u.terminate.termmsg; + if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) { + term->layer_etype = qhp->attr.layer_etype; + term->ecode = qhp->attr.ecode; + } else + build_term_codes(err_cqe, &term->layer_etype, &term->ecode); + c4iw_ofld_send(&qhp->rhp->rdev, skb); +} + +/* + * Assumes qhp lock is held. + */ +static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, + struct c4iw_cq *schp) +{ + int count; + int rq_flushed = 0, sq_flushed; + unsigned long flag; + + pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp); + + /* locking hierarchy: cqs lock first, then qp lock. */ + spin_lock_irqsave(&rchp->lock, flag); + if (schp != rchp) + spin_lock(&schp->lock); + spin_lock(&qhp->lock); + + if (qhp->wq.flushed) { + spin_unlock(&qhp->lock); + if (schp != rchp) + spin_unlock(&schp->lock); + spin_unlock_irqrestore(&rchp->lock, flag); + return; + } + qhp->wq.flushed = 1; + t4_set_wq_in_error(&qhp->wq, 0); + + c4iw_flush_hw_cq(rchp, qhp); + if (!qhp->srq) { + c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); + rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + } + + if (schp != rchp) + c4iw_flush_hw_cq(schp, qhp); + sq_flushed = c4iw_flush_sq(qhp); + + spin_unlock(&qhp->lock); + if (schp != rchp) + spin_unlock(&schp->lock); + spin_unlock_irqrestore(&rchp->lock, flag); + + if (schp == rchp) { + if ((rq_flushed || sq_flushed) && + t4_clear_cq_armed(&rchp->cq)) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } + } else { + if (rq_flushed && t4_clear_cq_armed(&rchp->cq)) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } + if (sq_flushed && t4_clear_cq_armed(&schp->cq)) { + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } + } +} + +static void flush_qp(struct c4iw_qp *qhp) +{ + struct c4iw_cq *rchp, *schp; + unsigned long flag; + + rchp = to_c4iw_cq(qhp->ibqp.recv_cq); + schp = to_c4iw_cq(qhp->ibqp.send_cq); + + if (qhp->ibqp.uobject) { + + /* for user qps, qhp->wq.flushed is protected by qhp->mutex */ + if (qhp->wq.flushed) + return; + + qhp->wq.flushed = 1; + t4_set_wq_in_error(&qhp->wq, 0); + t4_set_cq_in_error(&rchp->cq); + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + if (schp != rchp) { + t4_set_cq_in_error(&schp->cq); + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } + return; + } + __flush_qp(qhp, rchp, schp); +} + +static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, + struct c4iw_ep *ep) +{ + struct fw_ri_wr *wqe; + int ret; + struct sk_buff *skb; + + pr_debug("qhp %p qid 0x%x tid %u\n", qhp, qhp->wq.sq.qid, ep->hwtid); + + skb = skb_dequeue(&ep->com.ep_skb_list); + if (WARN_ON(!skb)) + return -ENOMEM; + + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + + wqe = __skb_put_zero(skb, sizeof(*wqe)); + wqe->op_compl = cpu_to_be32( + FW_WR_OP_V(FW_RI_INIT_WR) | + FW_WR_COMPL_F); + wqe->flowid_len16 = cpu_to_be32( + FW_WR_FLOWID_V(ep->hwtid) | + FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16))); + wqe->cookie = (uintptr_t)ep->com.wr_waitp; + + wqe->u.fini.type = FW_RI_TYPE_FINI; + + ret = c4iw_ref_send_wait(&rhp->rdev, skb, ep->com.wr_waitp, + qhp->ep->hwtid, qhp->wq.sq.qid, __func__); + + pr_debug("ret %d\n", ret); + return ret; +} + +static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init) +{ + pr_debug("p2p_type = %d\n", p2p_type); + memset(&init->u, 0, sizeof(init->u)); + switch (p2p_type) { + case FW_RI_INIT_P2PTYPE_RDMA_WRITE: + init->u.write.opcode = FW_RI_RDMA_WRITE_WR; + init->u.write.stag_sink = cpu_to_be32(1); + init->u.write.to_sink = cpu_to_be64(1); + init->u.write.u.immd_src[0].op = FW_RI_DATA_IMMD; + init->u.write.len16 = DIV_ROUND_UP( + sizeof(init->u.write) + sizeof(struct fw_ri_immd), 16); + break; + case FW_RI_INIT_P2PTYPE_READ_REQ: + init->u.write.opcode = FW_RI_RDMA_READ_WR; + init->u.read.stag_src = cpu_to_be32(1); + init->u.read.to_src_lo = cpu_to_be32(1); + init->u.read.stag_sink = cpu_to_be32(1); + init->u.read.to_sink_lo = cpu_to_be32(1); + init->u.read.len16 = DIV_ROUND_UP(sizeof(init->u.read), 16); + break; + } +} + +static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) +{ + struct fw_ri_wr *wqe; + int ret; + struct sk_buff *skb; + + pr_debug("qhp %p qid 0x%x tid %u ird %u ord %u\n", qhp, + qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord); + + skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); + if (!skb) { + ret = -ENOMEM; + goto out; + } + ret = alloc_ird(rhp, qhp->attr.max_ird); + if (ret) { + qhp->attr.max_ird = 0; + kfree_skb(skb); + goto out; + } + set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx); + + wqe = __skb_put_zero(skb, sizeof(*wqe)); + wqe->op_compl = cpu_to_be32( + FW_WR_OP_V(FW_RI_INIT_WR) | + FW_WR_COMPL_F); + wqe->flowid_len16 = cpu_to_be32( + FW_WR_FLOWID_V(qhp->ep->hwtid) | + FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16))); + + wqe->cookie = (uintptr_t)qhp->ep->com.wr_waitp; + + wqe->u.init.type = FW_RI_TYPE_INIT; + wqe->u.init.mpareqbit_p2ptype = + FW_RI_WR_MPAREQBIT_V(qhp->attr.mpa_attr.initiator) | + FW_RI_WR_P2PTYPE_V(qhp->attr.mpa_attr.p2p_type); + wqe->u.init.mpa_attrs = FW_RI_MPA_IETF_ENABLE; + if (qhp->attr.mpa_attr.recv_marker_enabled) + wqe->u.init.mpa_attrs |= FW_RI_MPA_RX_MARKER_ENABLE; + if (qhp->attr.mpa_attr.xmit_marker_enabled) + wqe->u.init.mpa_attrs |= FW_RI_MPA_TX_MARKER_ENABLE; + if (qhp->attr.mpa_attr.crc_enabled) + wqe->u.init.mpa_attrs |= FW_RI_MPA_CRC_ENABLE; + + wqe->u.init.qp_caps = FW_RI_QP_RDMA_READ_ENABLE | + FW_RI_QP_RDMA_WRITE_ENABLE | + FW_RI_QP_BIND_ENABLE; + if (!qhp->ibqp.uobject) + wqe->u.init.qp_caps |= FW_RI_QP_FAST_REGISTER_ENABLE | + FW_RI_QP_STAG0_ENABLE; + wqe->u.init.nrqe = cpu_to_be16(t4_rqes_posted(&qhp->wq)); + wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd); + wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid); + wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid); + if (qhp->srq) { + wqe->u.init.rq_eqid = cpu_to_be32(FW_RI_INIT_RQEQID_SRQ | + qhp->srq->idx); + } else { + wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid); + wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size); + wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr - + rhp->rdev.lldi.vr->rq.start); + } + wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq); + wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq); + wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord); + wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird); + wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq); + wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq); + if (qhp->attr.mpa_attr.initiator) + build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init); + + ret = c4iw_ref_send_wait(&rhp->rdev, skb, qhp->ep->com.wr_waitp, + qhp->ep->hwtid, qhp->wq.sq.qid, __func__); + if (!ret) + goto out; + + free_ird(rhp, qhp->attr.max_ird); +out: + pr_debug("ret %d\n", ret); + return ret; +} + +int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, + enum c4iw_qp_attr_mask mask, + struct c4iw_qp_attributes *attrs, + int internal) +{ + int ret = 0; + struct c4iw_qp_attributes newattr = qhp->attr; + int disconnect = 0; + int terminate = 0; + int abort = 0; + int free = 0; + struct c4iw_ep *ep = NULL; + + pr_debug("qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n", + qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state, + (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1); + + mutex_lock(&qhp->mutex); + + /* Process attr changes if in IDLE */ + if (mask & C4IW_QP_ATTR_VALID_MODIFY) { + if (qhp->attr.state != C4IW_QP_STATE_IDLE) { + ret = -EIO; + goto out; + } + if (mask & C4IW_QP_ATTR_ENABLE_RDMA_READ) + newattr.enable_rdma_read = attrs->enable_rdma_read; + if (mask & C4IW_QP_ATTR_ENABLE_RDMA_WRITE) + newattr.enable_rdma_write = attrs->enable_rdma_write; + if (mask & C4IW_QP_ATTR_ENABLE_RDMA_BIND) + newattr.enable_bind = attrs->enable_bind; + if (mask & C4IW_QP_ATTR_MAX_ORD) { + if (attrs->max_ord > c4iw_max_read_depth) { + ret = -EINVAL; + goto out; + } + newattr.max_ord = attrs->max_ord; + } + if (mask & C4IW_QP_ATTR_MAX_IRD) { + if (attrs->max_ird > cur_max_read_depth(rhp)) { + ret = -EINVAL; + goto out; + } + newattr.max_ird = attrs->max_ird; + } + qhp->attr = newattr; + } + + if (mask & C4IW_QP_ATTR_SQ_DB) { + ret = ring_kernel_sq_db(qhp, attrs->sq_db_inc); + goto out; + } + if (mask & C4IW_QP_ATTR_RQ_DB) { + ret = ring_kernel_rq_db(qhp, attrs->rq_db_inc); + goto out; + } + + if (!(mask & C4IW_QP_ATTR_NEXT_STATE)) + goto out; + if (qhp->attr.state == attrs->next_state) + goto out; + + switch (qhp->attr.state) { + case C4IW_QP_STATE_IDLE: + switch (attrs->next_state) { + case C4IW_QP_STATE_RTS: + if (!(mask & C4IW_QP_ATTR_LLP_STREAM_HANDLE)) { + ret = -EINVAL; + goto out; + } + if (!(mask & C4IW_QP_ATTR_MPA_ATTR)) { + ret = -EINVAL; + goto out; + } + qhp->attr.mpa_attr = attrs->mpa_attr; + qhp->attr.llp_stream_handle = attrs->llp_stream_handle; + qhp->ep = qhp->attr.llp_stream_handle; + set_state(qhp, C4IW_QP_STATE_RTS); + + /* + * Ref the endpoint here and deref when we + * disassociate the endpoint from the QP. This + * happens in CLOSING->IDLE transition or *->ERROR + * transition. + */ + c4iw_get_ep(&qhp->ep->com); + ret = rdma_init(rhp, qhp); + if (ret) + goto err; + break; + case C4IW_QP_STATE_ERROR: + set_state(qhp, C4IW_QP_STATE_ERROR); + flush_qp(qhp); + break; + default: + ret = -EINVAL; + goto out; + } + break; + case C4IW_QP_STATE_RTS: + switch (attrs->next_state) { + case C4IW_QP_STATE_CLOSING: + t4_set_wq_in_error(&qhp->wq, 0); + set_state(qhp, C4IW_QP_STATE_CLOSING); + ep = qhp->ep; + if (!internal) { + abort = 0; + disconnect = 1; + c4iw_get_ep(&qhp->ep->com); + } + ret = rdma_fini(rhp, qhp, ep); + if (ret) + goto err; + break; + case C4IW_QP_STATE_TERMINATE: + t4_set_wq_in_error(&qhp->wq, 0); + set_state(qhp, C4IW_QP_STATE_TERMINATE); + qhp->attr.layer_etype = attrs->layer_etype; + qhp->attr.ecode = attrs->ecode; + ep = qhp->ep; + if (!internal) { + c4iw_get_ep(&ep->com); + terminate = 1; + disconnect = 1; + } else { + terminate = qhp->attr.send_term; + ret = rdma_fini(rhp, qhp, ep); + if (ret) + goto err; + } + break; + case C4IW_QP_STATE_ERROR: + t4_set_wq_in_error(&qhp->wq, 0); + set_state(qhp, C4IW_QP_STATE_ERROR); + if (!internal) { + disconnect = 1; + ep = qhp->ep; + c4iw_get_ep(&qhp->ep->com); + } + goto err; + break; + default: + ret = -EINVAL; + goto out; + } + break; + case C4IW_QP_STATE_CLOSING: + + /* + * Allow kernel users to move to ERROR for qp draining. + */ + if (!internal && (qhp->ibqp.uobject || attrs->next_state != + C4IW_QP_STATE_ERROR)) { + ret = -EINVAL; + goto out; + } + switch (attrs->next_state) { + case C4IW_QP_STATE_IDLE: + flush_qp(qhp); + set_state(qhp, C4IW_QP_STATE_IDLE); + qhp->attr.llp_stream_handle = NULL; + c4iw_put_ep(&qhp->ep->com); + qhp->ep = NULL; + wake_up(&qhp->wait); + break; + case C4IW_QP_STATE_ERROR: + goto err; + default: + ret = -EINVAL; + goto err; + } + break; + case C4IW_QP_STATE_ERROR: + if (attrs->next_state != C4IW_QP_STATE_IDLE) { + ret = -EINVAL; + goto out; + } + if (!t4_sq_empty(&qhp->wq) || !t4_rq_empty(&qhp->wq)) { + ret = -EINVAL; + goto out; + } + set_state(qhp, C4IW_QP_STATE_IDLE); + break; + case C4IW_QP_STATE_TERMINATE: + if (!internal) { + ret = -EINVAL; + goto out; + } + goto err; + break; + default: + pr_err("%s in a bad state %d\n", __func__, qhp->attr.state); + ret = -EINVAL; + goto err; + break; + } + goto out; +err: + pr_debug("disassociating ep %p qpid 0x%x\n", qhp->ep, + qhp->wq.sq.qid); + + /* disassociate the LLP connection */ + qhp->attr.llp_stream_handle = NULL; + if (!ep) + ep = qhp->ep; + qhp->ep = NULL; + set_state(qhp, C4IW_QP_STATE_ERROR); + free = 1; + abort = 1; + flush_qp(qhp); + wake_up(&qhp->wait); +out: + mutex_unlock(&qhp->mutex); + + if (terminate) + post_terminate(qhp, NULL, internal ? GFP_ATOMIC : GFP_KERNEL); + + /* + * If disconnect is 1, then we need to initiate a disconnect + * on the EP. This can be a normal close (RTS->CLOSING) or + * an abnormal close (RTS/CLOSING->ERROR). + */ + if (disconnect) { + c4iw_ep_disconnect(ep, abort, internal ? GFP_ATOMIC : + GFP_KERNEL); + c4iw_put_ep(&ep->com); + } + + /* + * If free is 1, then we've disassociated the EP from the QP + * and we need to dereference the EP. + */ + if (free) + c4iw_put_ep(&ep->com); + pr_debug("exit state %d\n", qhp->attr.state); + return ret; +} + +int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) +{ + struct c4iw_dev *rhp; + struct c4iw_qp *qhp; + struct c4iw_ucontext *ucontext; + struct c4iw_qp_attributes attrs; + + qhp = to_c4iw_qp(ib_qp); + rhp = qhp->rhp; + ucontext = qhp->ucontext; + + attrs.next_state = C4IW_QP_STATE_ERROR; + if (qhp->attr.state == C4IW_QP_STATE_TERMINATE) + c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + else + c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + wait_event(qhp->wait, !qhp->ep); + + xa_lock_irq(&rhp->qps); + __xa_erase(&rhp->qps, qhp->wq.sq.qid); + if (!list_empty(&qhp->db_fc_entry)) + list_del_init(&qhp->db_fc_entry); + xa_unlock_irq(&rhp->qps); + free_ird(rhp, qhp->attr.max_ird); + + c4iw_qp_rem_ref(ib_qp); + + wait_for_completion(&qhp->qp_rel_comp); + + pr_debug("ib_qp %p qpid 0x%0x\n", ib_qp, qhp->wq.sq.qid); + pr_debug("qhp %p ucontext %p\n", qhp, ucontext); + + destroy_qp(&rhp->rdev, &qhp->wq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq); + + c4iw_put_wr_wait(qhp->wr_waitp); + return 0; +} + +int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs, + struct ib_udata *udata) +{ + struct ib_pd *pd = qp->pd; + struct c4iw_dev *rhp; + struct c4iw_qp *qhp = to_c4iw_qp(qp); + struct c4iw_pd *php; + struct c4iw_cq *schp; + struct c4iw_cq *rchp; + struct c4iw_create_qp_resp uresp; + unsigned int sqsize, rqsize = 0; + struct c4iw_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct c4iw_ucontext, ibucontext); + int ret; + struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm; + struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL; + + if (attrs->qp_type != IB_QPT_RC || attrs->create_flags) + return -EOPNOTSUPP; + + php = to_c4iw_pd(pd); + rhp = php->rhp; + schp = get_chp(rhp, ((struct c4iw_cq *)attrs->send_cq)->cq.cqid); + rchp = get_chp(rhp, ((struct c4iw_cq *)attrs->recv_cq)->cq.cqid); + if (!schp || !rchp) + return -EINVAL; + + if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE) + return -EINVAL; + + if (!attrs->srq) { + if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size) + return -E2BIG; + rqsize = attrs->cap.max_recv_wr + 1; + if (rqsize < 8) + rqsize = 8; + } + + if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size) + return -E2BIG; + sqsize = attrs->cap.max_send_wr + 1; + if (sqsize < 8) + sqsize = 8; + + qhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); + if (!qhp->wr_waitp) + return -ENOMEM; + + qhp->wq.sq.size = sqsize; + qhp->wq.sq.memsize = + (sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * + sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64); + qhp->wq.sq.flush_cidx = -1; + if (!attrs->srq) { + qhp->wq.rq.size = rqsize; + qhp->wq.rq.memsize = + (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * + sizeof(*qhp->wq.rq.queue); + } + + if (ucontext) { + qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE); + if (!attrs->srq) + qhp->wq.rq.memsize = + roundup(qhp->wq.rq.memsize, PAGE_SIZE); + } + + ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, + qhp->wr_waitp, !attrs->srq); + if (ret) + goto err_free_wr_wait; + + attrs->cap.max_recv_wr = rqsize - 1; + attrs->cap.max_send_wr = sqsize - 1; + attrs->cap.max_inline_data = T4_MAX_SEND_INLINE; + + qhp->rhp = rhp; + qhp->attr.pd = php->pdid; + qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid; + qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid; + qhp->attr.sq_num_entries = attrs->cap.max_send_wr; + qhp->attr.sq_max_sges = attrs->cap.max_send_sge; + qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge; + if (!attrs->srq) { + qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; + qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; + } + qhp->attr.state = C4IW_QP_STATE_IDLE; + qhp->attr.next_state = C4IW_QP_STATE_IDLE; + qhp->attr.enable_rdma_read = 1; + qhp->attr.enable_rdma_write = 1; + qhp->attr.enable_bind = 1; + qhp->attr.max_ord = 0; + qhp->attr.max_ird = 0; + qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; + spin_lock_init(&qhp->lock); + mutex_init(&qhp->mutex); + init_waitqueue_head(&qhp->wait); + init_completion(&qhp->qp_rel_comp); + refcount_set(&qhp->qp_refcnt, 1); + + ret = xa_insert_irq(&rhp->qps, qhp->wq.sq.qid, qhp, GFP_KERNEL); + if (ret) + goto err_destroy_qp; + + if (udata && ucontext) { + sq_key_mm = kmalloc(sizeof(*sq_key_mm), GFP_KERNEL); + if (!sq_key_mm) { + ret = -ENOMEM; + goto err_remove_handle; + } + if (!attrs->srq) { + rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL); + if (!rq_key_mm) { + ret = -ENOMEM; + goto err_free_sq_key; + } + } + sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL); + if (!sq_db_key_mm) { + ret = -ENOMEM; + goto err_free_rq_key; + } + if (!attrs->srq) { + rq_db_key_mm = + kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL); + if (!rq_db_key_mm) { + ret = -ENOMEM; + goto err_free_sq_db_key; + } + } + memset(&uresp, 0, sizeof(uresp)); + if (t4_sq_onchip(&qhp->wq.sq)) { + ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm), + GFP_KERNEL); + if (!ma_sync_key_mm) { + ret = -ENOMEM; + goto err_free_rq_db_key; + } + uresp.flags = C4IW_QPF_ONCHIP; + } + if (rhp->rdev.lldi.write_w_imm_support) + uresp.flags |= C4IW_QPF_WRITE_W_IMM; + uresp.qid_mask = rhp->rdev.qpmask; + uresp.sqid = qhp->wq.sq.qid; + uresp.sq_size = qhp->wq.sq.size; + uresp.sq_memsize = qhp->wq.sq.memsize; + if (!attrs->srq) { + uresp.rqid = qhp->wq.rq.qid; + uresp.rq_size = qhp->wq.rq.size; + uresp.rq_memsize = qhp->wq.rq.memsize; + } + spin_lock(&ucontext->mmap_lock); + if (ma_sync_key_mm) { + uresp.ma_sync_key = ucontext->key; + ucontext->key += PAGE_SIZE; + } + uresp.sq_key = ucontext->key; + ucontext->key += PAGE_SIZE; + if (!attrs->srq) { + uresp.rq_key = ucontext->key; + ucontext->key += PAGE_SIZE; + } + uresp.sq_db_gts_key = ucontext->key; + ucontext->key += PAGE_SIZE; + if (!attrs->srq) { + uresp.rq_db_gts_key = ucontext->key; + ucontext->key += PAGE_SIZE; + } + spin_unlock(&ucontext->mmap_lock); + ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (ret) + goto err_free_ma_sync_key; + sq_key_mm->key = uresp.sq_key; + sq_key_mm->addr = qhp->wq.sq.phys_addr; + sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize); + insert_mmap(ucontext, sq_key_mm); + if (!attrs->srq) { + rq_key_mm->key = uresp.rq_key; + rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue); + rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize); + insert_mmap(ucontext, rq_key_mm); + } + sq_db_key_mm->key = uresp.sq_db_gts_key; + sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa; + sq_db_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, sq_db_key_mm); + if (!attrs->srq) { + rq_db_key_mm->key = uresp.rq_db_gts_key; + rq_db_key_mm->addr = + (u64)(unsigned long)qhp->wq.rq.bar2_pa; + rq_db_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, rq_db_key_mm); + } + if (ma_sync_key_mm) { + ma_sync_key_mm->key = uresp.ma_sync_key; + ma_sync_key_mm->addr = + (pci_resource_start(rhp->rdev.lldi.pdev, 0) + + PCIE_MA_SYNC_A) & PAGE_MASK; + ma_sync_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, ma_sync_key_mm); + } + + qhp->ucontext = ucontext; + } + if (!attrs->srq) { + qhp->wq.qp_errp = + &qhp->wq.rq.queue[qhp->wq.rq.size].status.qp_err; + } else { + qhp->wq.qp_errp = + &qhp->wq.sq.queue[qhp->wq.sq.size].status.qp_err; + qhp->wq.srqidxp = + &qhp->wq.sq.queue[qhp->wq.sq.size].status.srqidx; + } + + qhp->ibqp.qp_num = qhp->wq.sq.qid; + if (attrs->srq) + qhp->srq = to_c4iw_srq(attrs->srq); + INIT_LIST_HEAD(&qhp->db_fc_entry); + pr_debug("sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n", + qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize, + attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size, + qhp->wq.rq.memsize, attrs->cap.max_recv_wr); + return 0; +err_free_ma_sync_key: + kfree(ma_sync_key_mm); +err_free_rq_db_key: + if (!attrs->srq) + kfree(rq_db_key_mm); +err_free_sq_db_key: + kfree(sq_db_key_mm); +err_free_rq_key: + if (!attrs->srq) + kfree(rq_key_mm); +err_free_sq_key: + kfree(sq_key_mm); +err_remove_handle: + xa_erase_irq(&rhp->qps, qhp->wq.sq.qid); +err_destroy_qp: + destroy_qp(&rhp->rdev, &qhp->wq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq); +err_free_wr_wait: + c4iw_put_wr_wait(qhp->wr_waitp); + return ret; +} + +int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct c4iw_dev *rhp; + struct c4iw_qp *qhp; + enum c4iw_qp_attr_mask mask = 0; + struct c4iw_qp_attributes attrs = {}; + + pr_debug("ib_qp %p\n", ibqp); + + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + + /* iwarp does not support the RTR state */ + if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) + attr_mask &= ~IB_QP_STATE; + + /* Make sure we still have something left to do */ + if (!attr_mask) + return 0; + + qhp = to_c4iw_qp(ibqp); + rhp = qhp->rhp; + + attrs.next_state = c4iw_convert_state(attr->qp_state); + attrs.enable_rdma_read = (attr->qp_access_flags & + IB_ACCESS_REMOTE_READ) ? 1 : 0; + attrs.enable_rdma_write = (attr->qp_access_flags & + IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0; + + + mask |= (attr_mask & IB_QP_STATE) ? C4IW_QP_ATTR_NEXT_STATE : 0; + mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ? + (C4IW_QP_ATTR_ENABLE_RDMA_READ | + C4IW_QP_ATTR_ENABLE_RDMA_WRITE | + C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0; + + /* + * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for + * ringing the queue db when we're in DB_FULL mode. + * Only allow this on T4 devices. + */ + attrs.sq_db_inc = attr->sq_psn; + attrs.rq_db_inc = attr->rq_psn; + mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0; + mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0; + if (!is_t4(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) && + (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB))) + return -EINVAL; + + return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0); +} + +struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn) +{ + pr_debug("ib_dev %p qpn 0x%x\n", dev, qpn); + return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn); +} + +void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq) +{ + struct ib_event event = {}; + + event.device = &srq->rhp->ibdev; + event.element.srq = &srq->ibsrq; + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + ib_dispatch_event(&event); +} + +int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata) +{ + struct c4iw_srq *srq = to_c4iw_srq(ib_srq); + int ret = 0; + + /* + * XXX 0 mask == a SW interrupt for srq_limit reached... + */ + if (udata && !srq_attr_mask) { + c4iw_dispatch_srq_limit_reached_event(srq); + goto out; + } + + /* no support for this yet */ + if (srq_attr_mask & IB_SRQ_MAX_WR) { + ret = -EINVAL; + goto out; + } + + if (!udata && (srq_attr_mask & IB_SRQ_LIMIT)) { + srq->armed = true; + srq->srq_limit = attr->srq_limit; + } +out: + return ret; +} + +int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr) +{ + struct c4iw_qp *qhp = to_c4iw_qp(ibqp); + + memset(attr, 0, sizeof(*attr)); + memset(init_attr, 0, sizeof(*init_attr)); + attr->qp_state = to_ib_qp_state(qhp->attr.state); + attr->cur_qp_state = to_ib_qp_state(qhp->attr.state); + init_attr->cap.max_send_wr = qhp->attr.sq_num_entries; + init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries; + init_attr->cap.max_send_sge = qhp->attr.sq_max_sges; + init_attr->cap.max_recv_sge = qhp->attr.rq_max_sges; + init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE; + init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; + return 0; +} + +static void free_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx, + struct c4iw_wr_wait *wr_waitp) +{ + struct c4iw_rdev *rdev = &srq->rhp->rdev; + struct sk_buff *skb = srq->destroy_skb; + struct t4_srq *wq = &srq->wq; + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + int wr_len; + + wr_len = sizeof(*res_wr) + sizeof(*res); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); + memset(res_wr, 0, wr_len); + res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) | + FW_RI_RES_WR_NRES_V(1) | + FW_WR_COMPL_F); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (uintptr_t)wr_waitp; + res = res_wr->res; + res->u.srq.restype = FW_RI_RES_TYPE_SRQ; + res->u.srq.op = FW_RI_RES_OP_RESET; + res->u.srq.srqid = cpu_to_be32(srq->idx); + res->u.srq.eqid = cpu_to_be32(wq->qid); + + c4iw_init_wr_wait(wr_waitp); + c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); + + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->memsize, wq->queue, + dma_unmap_addr(wq, mapping)); + c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size); + kfree(wq->sw_rq); + c4iw_put_qpid(rdev, wq->qid, uctx); +} + +static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx, + struct c4iw_wr_wait *wr_waitp) +{ + struct c4iw_rdev *rdev = &srq->rhp->rdev; + int user = (uctx != &rdev->uctx); + struct t4_srq *wq = &srq->wq; + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + struct sk_buff *skb; + int wr_len; + int eqsize; + int ret = -ENOMEM; + + wq->qid = c4iw_get_qpid(rdev, uctx); + if (!wq->qid) + goto err; + + if (!user) { + wq->sw_rq = kcalloc(wq->size, sizeof(*wq->sw_rq), + GFP_KERNEL); + if (!wq->sw_rq) + goto err_put_qpid; + wq->pending_wrs = kcalloc(srq->wq.size, + sizeof(*srq->wq.pending_wrs), + GFP_KERNEL); + if (!wq->pending_wrs) + goto err_free_sw_rq; + } + + wq->rqt_size = wq->size; + wq->rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rqt_size); + if (!wq->rqt_hwaddr) + goto err_free_pending_wrs; + wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >> + T4_RQT_ENTRY_SHIFT; + + wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, wq->memsize, + &wq->dma_addr, GFP_KERNEL); + if (!wq->queue) + goto err_free_rqtpool; + + dma_unmap_addr_set(wq, mapping, wq->dma_addr); + + wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS, + &wq->bar2_qid, + user ? &wq->bar2_pa : NULL); + + /* + * User mode must have bar2 access. + */ + + if (user && !wq->bar2_va) { + pr_warn(MOD "%s: srqid %u not in BAR2 range.\n", + pci_name(rdev->lldi.pdev), wq->qid); + ret = -EINVAL; + goto err_free_queue; + } + + /* build fw_ri_res_wr */ + wr_len = sizeof(*res_wr) + sizeof(*res); + + skb = alloc_skb(wr_len, GFP_KERNEL); + if (!skb) + goto err_free_queue; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); + memset(res_wr, 0, wr_len); + res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) | + FW_RI_RES_WR_NRES_V(1) | + FW_WR_COMPL_F); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (uintptr_t)wr_waitp; + res = res_wr->res; + res->u.srq.restype = FW_RI_RES_TYPE_SRQ; + res->u.srq.op = FW_RI_RES_OP_WRITE; + + /* + * eqsize is the number of 64B entries plus the status page size. + */ + eqsize = wq->size * T4_RQ_NUM_SLOTS + + rdev->hw_queue.t4_eq_status_entries; + res->u.srq.eqid = cpu_to_be32(wq->qid); + res->u.srq.fetchszm_to_iqid = + /* no host cidx updates */ + cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) | + FW_RI_RES_WR_CPRIO_V(0) | /* don't keep in chip cache */ + FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */ + FW_RI_RES_WR_FETCHRO_V(0)); /* relaxed_ordering */ + res->u.srq.dcaen_to_eqsize = + cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) | + FW_RI_RES_WR_DCACPU_V(0) | + FW_RI_RES_WR_FBMIN_V(2) | + FW_RI_RES_WR_FBMAX_V(3) | + FW_RI_RES_WR_CIDXFTHRESHO_V(0) | + FW_RI_RES_WR_CIDXFTHRESH_V(0) | + FW_RI_RES_WR_EQSIZE_V(eqsize)); + res->u.srq.eqaddr = cpu_to_be64(wq->dma_addr); + res->u.srq.srqid = cpu_to_be32(srq->idx); + res->u.srq.pdid = cpu_to_be32(srq->pdid); + res->u.srq.hwsrqsize = cpu_to_be32(wq->rqt_size); + res->u.srq.hwsrqaddr = cpu_to_be32(wq->rqt_hwaddr - + rdev->lldi.vr->rq.start); + + c4iw_init_wr_wait(wr_waitp); + + ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->qid, __func__); + if (ret) + goto err_free_queue; + + pr_debug("%s srq %u eqid %u pdid %u queue va %p pa 0x%llx\n" + " bar2_addr %p rqt addr 0x%x size %d\n", + __func__, srq->idx, wq->qid, srq->pdid, wq->queue, + (u64)virt_to_phys(wq->queue), wq->bar2_va, + wq->rqt_hwaddr, wq->rqt_size); + + return 0; +err_free_queue: + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->memsize, wq->queue, + dma_unmap_addr(wq, mapping)); +err_free_rqtpool: + c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size); +err_free_pending_wrs: + if (!user) + kfree(wq->pending_wrs); +err_free_sw_rq: + if (!user) + kfree(wq->sw_rq); +err_put_qpid: + c4iw_put_qpid(rdev, wq->qid, uctx); +err: + return ret; +} + +void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16) +{ + u64 *src, *dst; + + src = (u64 *)wqe; + dst = (u64 *)((u8 *)srq->queue + srq->wq_pidx * T4_EQ_ENTRY_SIZE); + while (len16) { + *dst++ = *src++; + if (dst >= (u64 *)&srq->queue[srq->size]) + dst = (u64 *)srq->queue; + *dst++ = *src++; + if (dst >= (u64 *)&srq->queue[srq->size]) + dst = (u64 *)srq->queue; + len16--; + } +} + +int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs, + struct ib_udata *udata) +{ + struct ib_pd *pd = ib_srq->pd; + struct c4iw_dev *rhp; + struct c4iw_srq *srq = to_c4iw_srq(ib_srq); + struct c4iw_pd *php; + struct c4iw_create_srq_resp uresp; + struct c4iw_ucontext *ucontext; + struct c4iw_mm_entry *srq_key_mm, *srq_db_key_mm; + int rqsize; + int ret; + int wr_len; + + if (attrs->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + + pr_debug("%s ib_pd %p\n", __func__, pd); + + php = to_c4iw_pd(pd); + rhp = php->rhp; + + if (!rhp->rdev.lldi.vr->srq.size) + return -EINVAL; + if (attrs->attr.max_wr > rhp->rdev.hw_queue.t4_max_rq_size) + return -E2BIG; + if (attrs->attr.max_sge > T4_MAX_RECV_SGE) + return -E2BIG; + + /* + * SRQ RQT and RQ must be a power of 2 and at least 16 deep. + */ + rqsize = attrs->attr.max_wr + 1; + rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16)); + + ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, + ibucontext); + + srq->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); + if (!srq->wr_waitp) + return -ENOMEM; + + srq->idx = c4iw_alloc_srq_idx(&rhp->rdev); + if (srq->idx < 0) { + ret = -ENOMEM; + goto err_free_wr_wait; + } + + wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res); + srq->destroy_skb = alloc_skb(wr_len, GFP_KERNEL); + if (!srq->destroy_skb) { + ret = -ENOMEM; + goto err_free_srq_idx; + } + + srq->rhp = rhp; + srq->pdid = php->pdid; + + srq->wq.size = rqsize; + srq->wq.memsize = + (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * + sizeof(*srq->wq.queue); + if (ucontext) + srq->wq.memsize = roundup(srq->wq.memsize, PAGE_SIZE); + + ret = alloc_srq_queue(srq, ucontext ? &ucontext->uctx : + &rhp->rdev.uctx, srq->wr_waitp); + if (ret) + goto err_free_skb; + attrs->attr.max_wr = rqsize - 1; + + if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6) + srq->flags = T4_SRQ_LIMIT_SUPPORT; + + if (udata) { + srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL); + if (!srq_key_mm) { + ret = -ENOMEM; + goto err_free_queue; + } + srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL); + if (!srq_db_key_mm) { + ret = -ENOMEM; + goto err_free_srq_key_mm; + } + memset(&uresp, 0, sizeof(uresp)); + uresp.flags = srq->flags; + uresp.qid_mask = rhp->rdev.qpmask; + uresp.srqid = srq->wq.qid; + uresp.srq_size = srq->wq.size; + uresp.srq_memsize = srq->wq.memsize; + uresp.rqt_abs_idx = srq->wq.rqt_abs_idx; + spin_lock(&ucontext->mmap_lock); + uresp.srq_key = ucontext->key; + ucontext->key += PAGE_SIZE; + uresp.srq_db_gts_key = ucontext->key; + ucontext->key += PAGE_SIZE; + spin_unlock(&ucontext->mmap_lock); + ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (ret) + goto err_free_srq_db_key_mm; + srq_key_mm->key = uresp.srq_key; + srq_key_mm->addr = virt_to_phys(srq->wq.queue); + srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize); + insert_mmap(ucontext, srq_key_mm); + srq_db_key_mm->key = uresp.srq_db_gts_key; + srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa; + srq_db_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, srq_db_key_mm); + } + + pr_debug("%s srq qid %u idx %u size %u memsize %lu num_entries %u\n", + __func__, srq->wq.qid, srq->idx, srq->wq.size, + (unsigned long)srq->wq.memsize, attrs->attr.max_wr); + + spin_lock_init(&srq->lock); + return 0; + +err_free_srq_db_key_mm: + kfree(srq_db_key_mm); +err_free_srq_key_mm: + kfree(srq_key_mm); +err_free_queue: + free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, + srq->wr_waitp); +err_free_skb: + kfree_skb(srq->destroy_skb); +err_free_srq_idx: + c4iw_free_srq_idx(&rhp->rdev, srq->idx); +err_free_wr_wait: + c4iw_put_wr_wait(srq->wr_waitp); + return ret; +} + +int c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +{ + struct c4iw_dev *rhp; + struct c4iw_srq *srq; + struct c4iw_ucontext *ucontext; + + srq = to_c4iw_srq(ibsrq); + rhp = srq->rhp; + + pr_debug("%s id %d\n", __func__, srq->wq.qid); + ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, + ibucontext); + free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, + srq->wr_waitp); + c4iw_free_srq_idx(&rhp->rdev, srq->idx); + c4iw_put_wr_wait(srq->wr_waitp); + return 0; +} diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c new file mode 100644 index 000000000..e800e8e8b --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -0,0 +1,516 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* Crude resource management */ +#include <linux/spinlock.h> +#include <linux/genalloc.h> +#include <linux/ratelimit.h> +#include "iw_cxgb4.h" + +static int c4iw_init_qid_table(struct c4iw_rdev *rdev) +{ + u32 i; + + if (c4iw_id_table_alloc(&rdev->resource.qid_table, + rdev->lldi.vr->qp.start, + rdev->lldi.vr->qp.size, + rdev->lldi.vr->qp.size, 0)) + return -ENOMEM; + + for (i = rdev->lldi.vr->qp.start; + i < rdev->lldi.vr->qp.start + rdev->lldi.vr->qp.size; i++) + if (!(i & rdev->qpmask)) + c4iw_id_free(&rdev->resource.qid_table, i); + return 0; +} + +/* nr_* must be power of 2 */ +int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, + u32 nr_pdid, u32 nr_srqt) +{ + int err = 0; + err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1, + C4IW_ID_TABLE_F_RANDOM); + if (err) + goto tpt_err; + err = c4iw_init_qid_table(rdev); + if (err) + goto qid_err; + err = c4iw_id_table_alloc(&rdev->resource.pdid_table, 0, + nr_pdid, 1, 0); + if (err) + goto pdid_err; + if (!nr_srqt) + err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0, + 1, 1, 0); + else + err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0, + nr_srqt, 0, 0); + if (err) + goto srq_err; + return 0; + srq_err: + c4iw_id_table_free(&rdev->resource.pdid_table); + pdid_err: + c4iw_id_table_free(&rdev->resource.qid_table); + qid_err: + c4iw_id_table_free(&rdev->resource.tpt_table); + tpt_err: + return -ENOMEM; +} + +/* + * returns 0 if no resource available + */ +u32 c4iw_get_resource(struct c4iw_id_table *id_table) +{ + u32 entry; + entry = c4iw_id_alloc(id_table); + if (entry == (u32)(-1)) + return 0; + return entry; +} + +void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry) +{ + pr_debug("entry 0x%x\n", entry); + c4iw_id_free(id_table, entry); +} + +u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) +{ + struct c4iw_qid_list *entry; + u32 qid; + int i; + + mutex_lock(&uctx->lock); + if (!list_empty(&uctx->cqids)) { + entry = list_entry(uctx->cqids.next, struct c4iw_qid_list, + entry); + list_del(&entry->entry); + qid = entry->qid; + kfree(entry); + } else { + qid = c4iw_get_resource(&rdev->resource.qid_table); + if (!qid) + goto out; + mutex_lock(&rdev->stats.lock); + rdev->stats.qid.cur += rdev->qpmask + 1; + mutex_unlock(&rdev->stats.lock); + for (i = qid+1; i & rdev->qpmask; i++) { + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto out; + entry->qid = i; + list_add_tail(&entry->entry, &uctx->cqids); + } + + /* + * now put the same ids on the qp list since they all + * map to the same db/gts page. + */ + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto out; + entry->qid = qid; + list_add_tail(&entry->entry, &uctx->qpids); + for (i = qid+1; i & rdev->qpmask; i++) { + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto out; + entry->qid = i; + list_add_tail(&entry->entry, &uctx->qpids); + } + } +out: + mutex_unlock(&uctx->lock); + pr_debug("qid 0x%x\n", qid); + mutex_lock(&rdev->stats.lock); + if (rdev->stats.qid.cur > rdev->stats.qid.max) + rdev->stats.qid.max = rdev->stats.qid.cur; + mutex_unlock(&rdev->stats.lock); + return qid; +} + +void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid, + struct c4iw_dev_ucontext *uctx) +{ + struct c4iw_qid_list *entry; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return; + pr_debug("qid 0x%x\n", qid); + entry->qid = qid; + mutex_lock(&uctx->lock); + list_add_tail(&entry->entry, &uctx->cqids); + mutex_unlock(&uctx->lock); +} + +u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) +{ + struct c4iw_qid_list *entry; + u32 qid; + int i; + + mutex_lock(&uctx->lock); + if (!list_empty(&uctx->qpids)) { + entry = list_entry(uctx->qpids.next, struct c4iw_qid_list, + entry); + list_del(&entry->entry); + qid = entry->qid; + kfree(entry); + } else { + qid = c4iw_get_resource(&rdev->resource.qid_table); + if (!qid) { + mutex_lock(&rdev->stats.lock); + rdev->stats.qid.fail++; + mutex_unlock(&rdev->stats.lock); + goto out; + } + mutex_lock(&rdev->stats.lock); + rdev->stats.qid.cur += rdev->qpmask + 1; + mutex_unlock(&rdev->stats.lock); + for (i = qid+1; i & rdev->qpmask; i++) { + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto out; + entry->qid = i; + list_add_tail(&entry->entry, &uctx->qpids); + } + + /* + * now put the same ids on the cq list since they all + * map to the same db/gts page. + */ + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto out; + entry->qid = qid; + list_add_tail(&entry->entry, &uctx->cqids); + for (i = qid + 1; i & rdev->qpmask; i++) { + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto out; + entry->qid = i; + list_add_tail(&entry->entry, &uctx->cqids); + } + } +out: + mutex_unlock(&uctx->lock); + pr_debug("qid 0x%x\n", qid); + mutex_lock(&rdev->stats.lock); + if (rdev->stats.qid.cur > rdev->stats.qid.max) + rdev->stats.qid.max = rdev->stats.qid.cur; + mutex_unlock(&rdev->stats.lock); + return qid; +} + +void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid, + struct c4iw_dev_ucontext *uctx) +{ + struct c4iw_qid_list *entry; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return; + pr_debug("qid 0x%x\n", qid); + entry->qid = qid; + mutex_lock(&uctx->lock); + list_add_tail(&entry->entry, &uctx->qpids); + mutex_unlock(&uctx->lock); +} + +void c4iw_destroy_resource(struct c4iw_resource *rscp) +{ + c4iw_id_table_free(&rscp->tpt_table); + c4iw_id_table_free(&rscp->qid_table); + c4iw_id_table_free(&rscp->pdid_table); +} + +/* + * PBL Memory Manager. Uses Linux generic allocator. + */ + +#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */ + +u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size) +{ + unsigned long addr = gen_pool_alloc(rdev->pbl_pool, size); + pr_debug("addr 0x%x size %d\n", (u32)addr, size); + mutex_lock(&rdev->stats.lock); + if (addr) { + rdev->stats.pbl.cur += roundup(size, 1 << MIN_PBL_SHIFT); + if (rdev->stats.pbl.cur > rdev->stats.pbl.max) + rdev->stats.pbl.max = rdev->stats.pbl.cur; + kref_get(&rdev->pbl_kref); + } else + rdev->stats.pbl.fail++; + mutex_unlock(&rdev->stats.lock); + return (u32)addr; +} + +static void destroy_pblpool(struct kref *kref) +{ + struct c4iw_rdev *rdev; + + rdev = container_of(kref, struct c4iw_rdev, pbl_kref); + gen_pool_destroy(rdev->pbl_pool); + complete(&rdev->pbl_compl); +} + +void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size) +{ + pr_debug("addr 0x%x size %d\n", addr, size); + mutex_lock(&rdev->stats.lock); + rdev->stats.pbl.cur -= roundup(size, 1 << MIN_PBL_SHIFT); + mutex_unlock(&rdev->stats.lock); + gen_pool_free(rdev->pbl_pool, (unsigned long)addr, size); + kref_put(&rdev->pbl_kref, destroy_pblpool); +} + +int c4iw_pblpool_create(struct c4iw_rdev *rdev) +{ + unsigned pbl_start, pbl_chunk, pbl_top; + + rdev->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1); + if (!rdev->pbl_pool) + return -ENOMEM; + + pbl_start = rdev->lldi.vr->pbl.start; + pbl_chunk = rdev->lldi.vr->pbl.size; + pbl_top = pbl_start + pbl_chunk; + + while (pbl_start < pbl_top) { + pbl_chunk = min(pbl_top - pbl_start + 1, pbl_chunk); + if (gen_pool_add(rdev->pbl_pool, pbl_start, pbl_chunk, -1)) { + pr_debug("failed to add PBL chunk (%x/%x)\n", + pbl_start, pbl_chunk); + if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) { + pr_warn("Failed to add all PBL chunks (%x/%x)\n", + pbl_start, pbl_top - pbl_start); + return 0; + } + pbl_chunk >>= 1; + } else { + pr_debug("added PBL chunk (%x/%x)\n", + pbl_start, pbl_chunk); + pbl_start += pbl_chunk; + } + } + + return 0; +} + +void c4iw_pblpool_destroy(struct c4iw_rdev *rdev) +{ + kref_put(&rdev->pbl_kref, destroy_pblpool); +} + +/* + * RQT Memory Manager. Uses Linux generic allocator. + */ + +#define MIN_RQT_SHIFT 10 /* 1KB == min RQT size (16 entries) */ + +u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size) +{ + unsigned long addr = gen_pool_alloc(rdev->rqt_pool, size << 6); + pr_debug("addr 0x%x size %d\n", (u32)addr, size << 6); + if (!addr) + pr_warn_ratelimited("%s: Out of RQT memory\n", + pci_name(rdev->lldi.pdev)); + mutex_lock(&rdev->stats.lock); + if (addr) { + rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT); + if (rdev->stats.rqt.cur > rdev->stats.rqt.max) + rdev->stats.rqt.max = rdev->stats.rqt.cur; + kref_get(&rdev->rqt_kref); + } else + rdev->stats.rqt.fail++; + mutex_unlock(&rdev->stats.lock); + return (u32)addr; +} + +static void destroy_rqtpool(struct kref *kref) +{ + struct c4iw_rdev *rdev; + + rdev = container_of(kref, struct c4iw_rdev, rqt_kref); + gen_pool_destroy(rdev->rqt_pool); + complete(&rdev->rqt_compl); +} + +void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size) +{ + pr_debug("addr 0x%x size %d\n", addr, size << 6); + mutex_lock(&rdev->stats.lock); + rdev->stats.rqt.cur -= roundup(size << 6, 1 << MIN_RQT_SHIFT); + mutex_unlock(&rdev->stats.lock); + gen_pool_free(rdev->rqt_pool, (unsigned long)addr, size << 6); + kref_put(&rdev->rqt_kref, destroy_rqtpool); +} + +int c4iw_rqtpool_create(struct c4iw_rdev *rdev) +{ + unsigned rqt_start, rqt_chunk, rqt_top; + int skip = 0; + + rdev->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1); + if (!rdev->rqt_pool) + return -ENOMEM; + + /* + * If SRQs are supported, then never use the first RQE from + * the RQT region. This is because HW uses RQT index 0 as NULL. + */ + if (rdev->lldi.vr->srq.size) + skip = T4_RQT_ENTRY_SIZE; + + rqt_start = rdev->lldi.vr->rq.start + skip; + rqt_chunk = rdev->lldi.vr->rq.size - skip; + rqt_top = rqt_start + rqt_chunk; + + while (rqt_start < rqt_top) { + rqt_chunk = min(rqt_top - rqt_start + 1, rqt_chunk); + if (gen_pool_add(rdev->rqt_pool, rqt_start, rqt_chunk, -1)) { + pr_debug("failed to add RQT chunk (%x/%x)\n", + rqt_start, rqt_chunk); + if (rqt_chunk <= 1024 << MIN_RQT_SHIFT) { + pr_warn("Failed to add all RQT chunks (%x/%x)\n", + rqt_start, rqt_top - rqt_start); + return 0; + } + rqt_chunk >>= 1; + } else { + pr_debug("added RQT chunk (%x/%x)\n", + rqt_start, rqt_chunk); + rqt_start += rqt_chunk; + } + } + return 0; +} + +void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev) +{ + kref_put(&rdev->rqt_kref, destroy_rqtpool); +} + +int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev) +{ + int idx; + + idx = c4iw_id_alloc(&rdev->resource.srq_table); + mutex_lock(&rdev->stats.lock); + if (idx == -1) { + rdev->stats.srqt.fail++; + mutex_unlock(&rdev->stats.lock); + return -ENOMEM; + } + rdev->stats.srqt.cur++; + if (rdev->stats.srqt.cur > rdev->stats.srqt.max) + rdev->stats.srqt.max = rdev->stats.srqt.cur; + mutex_unlock(&rdev->stats.lock); + return idx; +} + +void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx) +{ + c4iw_id_free(&rdev->resource.srq_table, idx); + mutex_lock(&rdev->stats.lock); + rdev->stats.srqt.cur--; + mutex_unlock(&rdev->stats.lock); +} + +/* + * On-Chip QP Memory. + */ +#define MIN_OCQP_SHIFT 12 /* 4KB == min ocqp size */ + +u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size) +{ + unsigned long addr = gen_pool_alloc(rdev->ocqp_pool, size); + pr_debug("addr 0x%x size %d\n", (u32)addr, size); + if (addr) { + mutex_lock(&rdev->stats.lock); + rdev->stats.ocqp.cur += roundup(size, 1 << MIN_OCQP_SHIFT); + if (rdev->stats.ocqp.cur > rdev->stats.ocqp.max) + rdev->stats.ocqp.max = rdev->stats.ocqp.cur; + mutex_unlock(&rdev->stats.lock); + } + return (u32)addr; +} + +void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size) +{ + pr_debug("addr 0x%x size %d\n", addr, size); + mutex_lock(&rdev->stats.lock); + rdev->stats.ocqp.cur -= roundup(size, 1 << MIN_OCQP_SHIFT); + mutex_unlock(&rdev->stats.lock); + gen_pool_free(rdev->ocqp_pool, (unsigned long)addr, size); +} + +int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev) +{ + unsigned start, chunk, top; + + rdev->ocqp_pool = gen_pool_create(MIN_OCQP_SHIFT, -1); + if (!rdev->ocqp_pool) + return -ENOMEM; + + start = rdev->lldi.vr->ocq.start; + chunk = rdev->lldi.vr->ocq.size; + top = start + chunk; + + while (start < top) { + chunk = min(top - start + 1, chunk); + if (gen_pool_add(rdev->ocqp_pool, start, chunk, -1)) { + pr_debug("failed to add OCQP chunk (%x/%x)\n", + start, chunk); + if (chunk <= 1024 << MIN_OCQP_SHIFT) { + pr_warn("Failed to add all OCQP chunks (%x/%x)\n", + start, top - start); + return 0; + } + chunk >>= 1; + } else { + pr_debug("added OCQP chunk (%x/%x)\n", + start, chunk); + start += chunk; + } + } + return 0; +} + +void c4iw_ocqp_pool_destroy(struct c4iw_rdev *rdev) +{ + gen_pool_destroy(rdev->ocqp_pool); +} diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c new file mode 100644 index 000000000..fd22c85d3 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/restrack.c @@ -0,0 +1,487 @@ +/* + * Copyright (c) 2018 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/rdma_cm.h> + +#include "iw_cxgb4.h" +#include <rdma/restrack.h> +#include <uapi/rdma/rdma_netlink.h> + +static int fill_sq(struct sk_buff *msg, struct t4_wq *wq) +{ + /* WQ+SQ */ + if (rdma_nl_put_driver_u32(msg, "sqid", wq->sq.qid)) + goto err; + if (rdma_nl_put_driver_u32(msg, "flushed", wq->flushed)) + goto err; + if (rdma_nl_put_driver_u32(msg, "memsize", wq->sq.memsize)) + goto err; + if (rdma_nl_put_driver_u32(msg, "cidx", wq->sq.cidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "pidx", wq->sq.pidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "wq_pidx", wq->sq.wq_pidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "flush_cidx", wq->sq.flush_cidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "in_use", wq->sq.in_use)) + goto err; + if (rdma_nl_put_driver_u32(msg, "size", wq->sq.size)) + goto err; + if (rdma_nl_put_driver_u32_hex(msg, "flags", wq->sq.flags)) + goto err; + return 0; +err: + return -EMSGSIZE; +} + +static int fill_rq(struct sk_buff *msg, struct t4_wq *wq) +{ + /* RQ */ + if (rdma_nl_put_driver_u32(msg, "rqid", wq->rq.qid)) + goto err; + if (rdma_nl_put_driver_u32(msg, "memsize", wq->rq.memsize)) + goto err; + if (rdma_nl_put_driver_u32(msg, "cidx", wq->rq.cidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "pidx", wq->rq.pidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "wq_pidx", wq->rq.wq_pidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "msn", wq->rq.msn)) + goto err; + if (rdma_nl_put_driver_u32_hex(msg, "rqt_hwaddr", wq->rq.rqt_hwaddr)) + goto err; + if (rdma_nl_put_driver_u32(msg, "rqt_size", wq->rq.rqt_size)) + goto err; + if (rdma_nl_put_driver_u32(msg, "in_use", wq->rq.in_use)) + goto err; + if (rdma_nl_put_driver_u32(msg, "size", wq->rq.size)) + goto err; + return 0; +err: + return -EMSGSIZE; +} + +static int fill_swsqe(struct sk_buff *msg, struct t4_sq *sq, u16 idx, + struct t4_swsqe *sqe) +{ + if (rdma_nl_put_driver_u32(msg, "idx", idx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "opcode", sqe->opcode)) + goto err; + if (rdma_nl_put_driver_u32(msg, "complete", sqe->complete)) + goto err; + if (sqe->complete && + rdma_nl_put_driver_u32(msg, "cqe_status", CQE_STATUS(&sqe->cqe))) + goto err; + if (rdma_nl_put_driver_u32(msg, "signaled", sqe->signaled)) + goto err; + if (rdma_nl_put_driver_u32(msg, "flushed", sqe->flushed)) + goto err; + return 0; +err: + return -EMSGSIZE; +} + +/* + * Dump the first and last pending sqes. + */ +static int fill_swsqes(struct sk_buff *msg, struct t4_sq *sq, + u16 first_idx, struct t4_swsqe *first_sqe, + u16 last_idx, struct t4_swsqe *last_sqe) +{ + if (!first_sqe) + goto out; + if (fill_swsqe(msg, sq, first_idx, first_sqe)) + goto err; + if (!last_sqe) + goto out; + if (fill_swsqe(msg, sq, last_idx, last_sqe)) + goto err; +out: + return 0; +err: + return -EMSGSIZE; +} + +int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp) +{ + struct t4_swsqe *fsp = NULL, *lsp = NULL; + struct c4iw_qp *qhp = to_c4iw_qp(ibqp); + u16 first_sq_idx = 0, last_sq_idx = 0; + struct t4_swsqe first_sqe, last_sqe; + struct nlattr *table_attr; + struct t4_wq wq; + + /* User qp state is not available, so don't dump user qps */ + if (qhp->ucontext) + return 0; + + table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err; + + /* Get a consistent snapshot */ + spin_lock_irq(&qhp->lock); + wq = qhp->wq; + + /* If there are any pending sqes, copy the first and last */ + if (wq.sq.cidx != wq.sq.pidx) { + first_sq_idx = wq.sq.cidx; + first_sqe = qhp->wq.sq.sw_sq[first_sq_idx]; + fsp = &first_sqe; + last_sq_idx = wq.sq.pidx; + if (last_sq_idx-- == 0) + last_sq_idx = wq.sq.size - 1; + if (last_sq_idx != first_sq_idx) { + last_sqe = qhp->wq.sq.sw_sq[last_sq_idx]; + lsp = &last_sqe; + } + } + spin_unlock_irq(&qhp->lock); + + if (fill_sq(msg, &wq)) + goto err_cancel_table; + + if (fill_swsqes(msg, &wq.sq, first_sq_idx, fsp, last_sq_idx, lsp)) + goto err_cancel_table; + + if (fill_rq(msg, &wq)) + goto err_cancel_table; + + nla_nest_end(msg, table_attr); + return 0; + +err_cancel_table: + nla_nest_cancel(msg, table_attr); +err: + return -EMSGSIZE; +} + +union union_ep { + struct c4iw_listen_ep lep; + struct c4iw_ep ep; +}; + +int c4iw_fill_res_cm_id_entry(struct sk_buff *msg, + struct rdma_cm_id *cm_id) +{ + struct nlattr *table_attr; + struct c4iw_ep_common *epcp; + struct c4iw_listen_ep *listen_ep = NULL; + struct c4iw_ep *ep = NULL; + struct iw_cm_id *iw_cm_id; + union union_ep *uep; + + iw_cm_id = rdma_iw_cm_id(cm_id); + if (!iw_cm_id) + return 0; + epcp = (struct c4iw_ep_common *)iw_cm_id->provider_data; + if (!epcp) + return 0; + uep = kzalloc(sizeof(*uep), GFP_KERNEL); + if (!uep) + return 0; + + table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err_free_uep; + + /* Get a consistent snapshot */ + mutex_lock(&epcp->mutex); + if (epcp->state == LISTEN) { + uep->lep = *(struct c4iw_listen_ep *)epcp; + mutex_unlock(&epcp->mutex); + listen_ep = &uep->lep; + epcp = &listen_ep->com; + } else { + uep->ep = *(struct c4iw_ep *)epcp; + mutex_unlock(&epcp->mutex); + ep = &uep->ep; + epcp = &ep->com; + } + + if (rdma_nl_put_driver_u32(msg, "state", epcp->state)) + goto err_cancel_table; + if (rdma_nl_put_driver_u64_hex(msg, "flags", epcp->flags)) + goto err_cancel_table; + if (rdma_nl_put_driver_u64_hex(msg, "history", epcp->history)) + goto err_cancel_table; + + if (listen_ep) { + if (rdma_nl_put_driver_u32(msg, "stid", listen_ep->stid)) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "backlog", listen_ep->backlog)) + goto err_cancel_table; + } else { + if (rdma_nl_put_driver_u32(msg, "hwtid", ep->hwtid)) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "ord", ep->ord)) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "ird", ep->ird)) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "emss", ep->emss)) + goto err_cancel_table; + + if (!ep->parent_ep && rdma_nl_put_driver_u32(msg, "atid", + ep->atid)) + goto err_cancel_table; + } + nla_nest_end(msg, table_attr); + kfree(uep); + return 0; + +err_cancel_table: + nla_nest_cancel(msg, table_attr); +err_free_uep: + kfree(uep); + return -EMSGSIZE; +} + +static int fill_cq(struct sk_buff *msg, struct t4_cq *cq) +{ + if (rdma_nl_put_driver_u32(msg, "cqid", cq->cqid)) + goto err; + if (rdma_nl_put_driver_u32(msg, "memsize", cq->memsize)) + goto err; + if (rdma_nl_put_driver_u32(msg, "size", cq->size)) + goto err; + if (rdma_nl_put_driver_u32(msg, "cidx", cq->cidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "cidx_inc", cq->cidx_inc)) + goto err; + if (rdma_nl_put_driver_u32(msg, "sw_cidx", cq->sw_cidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "sw_pidx", cq->sw_pidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "sw_in_use", cq->sw_in_use)) + goto err; + if (rdma_nl_put_driver_u32(msg, "vector", cq->vector)) + goto err; + if (rdma_nl_put_driver_u32(msg, "gen", cq->gen)) + goto err; + if (rdma_nl_put_driver_u32(msg, "error", cq->error)) + goto err; + if (rdma_nl_put_driver_u64_hex(msg, "bits_type_ts", + be64_to_cpu(cq->bits_type_ts))) + goto err; + if (rdma_nl_put_driver_u64_hex(msg, "flags", cq->flags)) + goto err; + + return 0; + +err: + return -EMSGSIZE; +} + +static int fill_cqe(struct sk_buff *msg, struct t4_cqe *cqe, u16 idx, + const char *qstr) +{ + if (rdma_nl_put_driver_u32(msg, qstr, idx)) + goto err; + if (rdma_nl_put_driver_u32_hex(msg, "header", + be32_to_cpu(cqe->header))) + goto err; + if (rdma_nl_put_driver_u32(msg, "len", be32_to_cpu(cqe->len))) + goto err; + if (rdma_nl_put_driver_u32_hex(msg, "wrid_hi", + be32_to_cpu(cqe->u.gen.wrid_hi))) + goto err; + if (rdma_nl_put_driver_u32_hex(msg, "wrid_low", + be32_to_cpu(cqe->u.gen.wrid_low))) + goto err; + if (rdma_nl_put_driver_u64_hex(msg, "bits_type_ts", + be64_to_cpu(cqe->bits_type_ts))) + goto err; + + return 0; + +err: + return -EMSGSIZE; +} + +static int fill_hwcqes(struct sk_buff *msg, struct t4_cq *cq, + struct t4_cqe *cqes) +{ + u16 idx; + + idx = (cq->cidx > 0) ? cq->cidx - 1 : cq->size - 1; + if (fill_cqe(msg, cqes, idx, "hwcq_idx")) + goto err; + idx = cq->cidx; + if (fill_cqe(msg, cqes + 1, idx, "hwcq_idx")) + goto err; + + return 0; +err: + return -EMSGSIZE; +} + +static int fill_swcqes(struct sk_buff *msg, struct t4_cq *cq, + struct t4_cqe *cqes) +{ + u16 idx; + + if (!cq->sw_in_use) + return 0; + + idx = cq->sw_cidx; + if (fill_cqe(msg, cqes, idx, "swcq_idx")) + goto err; + if (cq->sw_in_use == 1) + goto out; + idx = (cq->sw_pidx > 0) ? cq->sw_pidx - 1 : cq->size - 1; + if (fill_cqe(msg, cqes + 1, idx, "swcq_idx")) + goto err; +out: + return 0; +err: + return -EMSGSIZE; +} + +int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq) +{ + struct c4iw_cq *chp = to_c4iw_cq(ibcq); + struct nlattr *table_attr; + struct t4_cqe hwcqes[2]; + struct t4_cqe swcqes[2]; + struct t4_cq cq; + u16 idx; + + /* User cq state is not available, so don't dump user cqs */ + if (ibcq->uobject) + return 0; + + table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err; + + /* Get a consistent snapshot */ + spin_lock_irq(&chp->lock); + + /* t4_cq struct */ + cq = chp->cq; + + /* get 2 hw cqes: cidx-1, and cidx */ + idx = (cq.cidx > 0) ? cq.cidx - 1 : cq.size - 1; + hwcqes[0] = chp->cq.queue[idx]; + + idx = cq.cidx; + hwcqes[1] = chp->cq.queue[idx]; + + /* get first and last sw cqes */ + if (cq.sw_in_use) { + swcqes[0] = chp->cq.sw_queue[cq.sw_cidx]; + if (cq.sw_in_use > 1) { + idx = (cq.sw_pidx > 0) ? cq.sw_pidx - 1 : cq.size - 1; + swcqes[1] = chp->cq.sw_queue[idx]; + } + } + + spin_unlock_irq(&chp->lock); + + if (fill_cq(msg, &cq)) + goto err_cancel_table; + + if (fill_swcqes(msg, &cq, swcqes)) + goto err_cancel_table; + + if (fill_hwcqes(msg, &cq, hwcqes)) + goto err_cancel_table; + + nla_nest_end(msg, table_attr); + return 0; + +err_cancel_table: + nla_nest_cancel(msg, table_attr); +err: + return -EMSGSIZE; +} + +int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr) +{ + struct c4iw_mr *mhp = to_c4iw_mr(ibmr); + struct c4iw_dev *dev = mhp->rhp; + u32 stag = mhp->attr.stag; + struct nlattr *table_attr; + struct fw_ri_tpte tpte; + int ret; + + if (!stag) + return 0; + + table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err; + + ret = cxgb4_read_tpte(dev->rdev.lldi.ports[0], stag, (__be32 *)&tpte); + if (ret) { + dev_err(&dev->rdev.lldi.pdev->dev, + "%s cxgb4_read_tpte err %d\n", __func__, ret); + return 0; + } + + if (rdma_nl_put_driver_u32_hex(msg, "idx", stag >> 8)) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "valid", + FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid)))) + goto err_cancel_table; + if (rdma_nl_put_driver_u32_hex(msg, "key", stag & 0xff)) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "state", + FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid)))) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "pdid", + FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid)))) + goto err_cancel_table; + if (rdma_nl_put_driver_u32_hex(msg, "perm", + FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid)))) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "ps", + FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid)))) + goto err_cancel_table; + if (rdma_nl_put_driver_u64(msg, "len", + ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo))) + goto err_cancel_table; + if (rdma_nl_put_driver_u32_hex(msg, "pbl_addr", + FW_RI_TPTE_PBLADDR_G(ntohl(tpte.nosnoop_pbladdr)))) + goto err_cancel_table; + + nla_nest_end(msg, table_attr); + return 0; + +err_cancel_table: + nla_nest_cancel(msg, table_attr); +err: + return -EMSGSIZE; +} diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h new file mode 100644 index 000000000..c3b0e2896 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -0,0 +1,833 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __T4_H__ +#define __T4_H__ + +#include "t4_hw.h" +#include "t4_regs.h" +#include "t4_values.h" +#include "t4_msg.h" +#include "t4_tcb.h" +#include "t4fw_ri_api.h" + +#define T4_MAX_NUM_PD 65536 +#define T4_MAX_MR_SIZE (~0ULL) +#define T4_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ +#define T4_STAG_UNSET 0xffffffff +#define T4_FW_MAJ 0 +#define PCIE_MA_SYNC_A 0x30b4 + +struct t4_status_page { + __be32 rsvd1; /* flit 0 - hw owns */ + __be16 rsvd2; + __be16 qid; + __be16 cidx; + __be16 pidx; + u8 qp_err; /* flit 1 - sw owns */ + u8 db_off; + u8 pad[2]; + u16 host_wq_pidx; + u16 host_cidx; + u16 host_pidx; + u16 pad2; + u32 srqidx; +}; + +#define T4_RQT_ENTRY_SHIFT 6 +#define T4_RQT_ENTRY_SIZE BIT(T4_RQT_ENTRY_SHIFT) +#define T4_EQ_ENTRY_SIZE 64 + +#define T4_SQ_NUM_SLOTS 5 +#define T4_SQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_SQ_NUM_SLOTS) +#define T4_MAX_SEND_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ + sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) +#define T4_MAX_SEND_INLINE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ + sizeof(struct fw_ri_immd))) +#define T4_MAX_WRITE_INLINE ((T4_SQ_NUM_BYTES - \ + sizeof(struct fw_ri_rdma_write_wr) - \ + sizeof(struct fw_ri_immd))) +#define T4_MAX_WRITE_SGE ((T4_SQ_NUM_BYTES - \ + sizeof(struct fw_ri_rdma_write_wr) - \ + sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) +#define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \ + sizeof(struct fw_ri_immd)) & ~31UL) +#define T4_MAX_FR_IMMD_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) +#define T4_MAX_FR_DSGL 1024 +#define T4_MAX_FR_DSGL_DEPTH (T4_MAX_FR_DSGL / sizeof(u64)) + +static inline int t4_max_fr_depth(int use_dsgl) +{ + return use_dsgl ? T4_MAX_FR_DSGL_DEPTH : T4_MAX_FR_IMMD_DEPTH; +} + +#define T4_RQ_NUM_SLOTS 2 +#define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) +#define T4_MAX_RECV_SGE 4 + +#define T4_WRITE_CMPL_MAX_SGL 4 +#define T4_WRITE_CMPL_MAX_CQE 16 + +union t4_wr { + struct fw_ri_res_wr res; + struct fw_ri_wr ri; + struct fw_ri_rdma_write_wr write; + struct fw_ri_send_wr send; + struct fw_ri_rdma_read_wr read; + struct fw_ri_bind_mw_wr bind; + struct fw_ri_fr_nsmr_wr fr; + struct fw_ri_fr_nsmr_tpte_wr fr_tpte; + struct fw_ri_inv_lstag_wr inv; + struct fw_ri_rdma_write_cmpl_wr write_cmpl; + struct t4_status_page status; + __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; +}; + +union t4_recv_wr { + struct fw_ri_recv_wr recv; + struct t4_status_page status; + __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_RQ_NUM_SLOTS]; +}; + +static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid, + enum fw_wr_opcodes opcode, u8 flags, u8 len16) +{ + wqe->send.opcode = (u8)opcode; + wqe->send.flags = flags; + wqe->send.wrid = wrid; + wqe->send.r1[0] = 0; + wqe->send.r1[1] = 0; + wqe->send.r1[2] = 0; + wqe->send.len16 = len16; +} + +/* CQE/AE status codes */ +#define T4_ERR_SUCCESS 0x0 +#define T4_ERR_STAG 0x1 /* STAG invalid: either the */ + /* STAG is offlimt, being 0, */ + /* or STAG_key mismatch */ +#define T4_ERR_PDID 0x2 /* PDID mismatch */ +#define T4_ERR_QPID 0x3 /* QPID mismatch */ +#define T4_ERR_ACCESS 0x4 /* Invalid access right */ +#define T4_ERR_WRAP 0x5 /* Wrap error */ +#define T4_ERR_BOUND 0x6 /* base and bounds voilation */ +#define T4_ERR_INVALIDATE_SHARED_MR 0x7 /* attempt to invalidate a */ + /* shared memory region */ +#define T4_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8 /* attempt to invalidate a */ + /* shared memory region */ +#define T4_ERR_ECC 0x9 /* ECC error detected */ +#define T4_ERR_ECC_PSTAG 0xA /* ECC error detected when */ + /* reading PSTAG for a MW */ + /* Invalidate */ +#define T4_ERR_PBL_ADDR_BOUND 0xB /* pbl addr out of bounds: */ + /* software error */ +#define T4_ERR_SWFLUSH 0xC /* SW FLUSHED */ +#define T4_ERR_CRC 0x10 /* CRC error */ +#define T4_ERR_MARKER 0x11 /* Marker error */ +#define T4_ERR_PDU_LEN_ERR 0x12 /* invalid PDU length */ +#define T4_ERR_OUT_OF_RQE 0x13 /* out of RQE */ +#define T4_ERR_DDP_VERSION 0x14 /* wrong DDP version */ +#define T4_ERR_RDMA_VERSION 0x15 /* wrong RDMA version */ +#define T4_ERR_OPCODE 0x16 /* invalid rdma opcode */ +#define T4_ERR_DDP_QUEUE_NUM 0x17 /* invalid ddp queue number */ +#define T4_ERR_MSN 0x18 /* MSN error */ +#define T4_ERR_TBIT 0x19 /* tag bit not set correctly */ +#define T4_ERR_MO 0x1A /* MO not 0 for TERMINATE */ + /* or READ_REQ */ +#define T4_ERR_MSN_GAP 0x1B +#define T4_ERR_MSN_RANGE 0x1C +#define T4_ERR_IRD_OVERFLOW 0x1D +#define T4_ERR_RQE_ADDR_BOUND 0x1E /* RQE addr out of bounds: */ + /* software error */ +#define T4_ERR_INTERNAL_ERR 0x1F /* internal error (opcode */ + /* mismatch) */ +/* + * CQE defs + */ +struct t4_cqe { + __be32 header; + __be32 len; + union { + struct { + __be32 stag; + __be32 msn; + } rcqe; + struct { + __be32 stag; + u16 nada2; + u16 cidx; + } scqe; + struct { + __be32 wrid_hi; + __be32 wrid_low; + } gen; + struct { + __be32 stag; + __be32 msn; + __be32 reserved; + __be32 abs_rqe_idx; + } srcqe; + struct { + __be32 mo; + __be32 msn; + /* + * Use union for immediate data to be consistent with + * stack's 32 bit data and iWARP spec's 64 bit data. + */ + union { + struct { + __be32 imm_data32; + u32 reserved; + } ib_imm_data; + __be64 imm_data64; + } iw_imm_data; + } imm_data_rcqe; + + u64 drain_cookie; + __be64 flits[3]; + } u; + __be64 reserved[3]; + __be64 bits_type_ts; +}; + +/* macros for flit 0 of the cqe */ + +#define CQE_QPID_S 12 +#define CQE_QPID_M 0xFFFFF +#define CQE_QPID_G(x) ((((x) >> CQE_QPID_S)) & CQE_QPID_M) +#define CQE_QPID_V(x) ((x)<<CQE_QPID_S) + +#define CQE_SWCQE_S 11 +#define CQE_SWCQE_M 0x1 +#define CQE_SWCQE_G(x) ((((x) >> CQE_SWCQE_S)) & CQE_SWCQE_M) +#define CQE_SWCQE_V(x) ((x)<<CQE_SWCQE_S) + +#define CQE_DRAIN_S 10 +#define CQE_DRAIN_M 0x1 +#define CQE_DRAIN_G(x) ((((x) >> CQE_DRAIN_S)) & CQE_DRAIN_M) +#define CQE_DRAIN_V(x) ((x)<<CQE_DRAIN_S) + +#define CQE_STATUS_S 5 +#define CQE_STATUS_M 0x1F +#define CQE_STATUS_G(x) ((((x) >> CQE_STATUS_S)) & CQE_STATUS_M) +#define CQE_STATUS_V(x) ((x)<<CQE_STATUS_S) + +#define CQE_TYPE_S 4 +#define CQE_TYPE_M 0x1 +#define CQE_TYPE_G(x) ((((x) >> CQE_TYPE_S)) & CQE_TYPE_M) +#define CQE_TYPE_V(x) ((x)<<CQE_TYPE_S) + +#define CQE_OPCODE_S 0 +#define CQE_OPCODE_M 0xF +#define CQE_OPCODE_G(x) ((((x) >> CQE_OPCODE_S)) & CQE_OPCODE_M) +#define CQE_OPCODE_V(x) ((x)<<CQE_OPCODE_S) + +#define SW_CQE(x) (CQE_SWCQE_G(be32_to_cpu((x)->header))) +#define DRAIN_CQE(x) (CQE_DRAIN_G(be32_to_cpu((x)->header))) +#define CQE_QPID(x) (CQE_QPID_G(be32_to_cpu((x)->header))) +#define CQE_TYPE(x) (CQE_TYPE_G(be32_to_cpu((x)->header))) +#define SQ_TYPE(x) (CQE_TYPE((x))) +#define RQ_TYPE(x) (!CQE_TYPE((x))) +#define CQE_STATUS(x) (CQE_STATUS_G(be32_to_cpu((x)->header))) +#define CQE_OPCODE(x) (CQE_OPCODE_G(be32_to_cpu((x)->header))) + +#define CQE_SEND_OPCODE(x)( \ + (CQE_OPCODE_G(be32_to_cpu((x)->header)) == FW_RI_SEND) || \ + (CQE_OPCODE_G(be32_to_cpu((x)->header)) == FW_RI_SEND_WITH_SE) || \ + (CQE_OPCODE_G(be32_to_cpu((x)->header)) == FW_RI_SEND_WITH_INV) || \ + (CQE_OPCODE_G(be32_to_cpu((x)->header)) == FW_RI_SEND_WITH_SE_INV)) + +#define CQE_LEN(x) (be32_to_cpu((x)->len)) + +/* used for RQ completion processing */ +#define CQE_WRID_STAG(x) (be32_to_cpu((x)->u.rcqe.stag)) +#define CQE_WRID_MSN(x) (be32_to_cpu((x)->u.rcqe.msn)) +#define CQE_ABS_RQE_IDX(x) (be32_to_cpu((x)->u.srcqe.abs_rqe_idx)) +#define CQE_IMM_DATA(x)( \ + (x)->u.imm_data_rcqe.iw_imm_data.ib_imm_data.imm_data32) + +/* used for SQ completion processing */ +#define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx) +#define CQE_WRID_FR_STAG(x) (be32_to_cpu((x)->u.scqe.stag)) + +/* generic accessor macros */ +#define CQE_WRID_HI(x) (be32_to_cpu((x)->u.gen.wrid_hi)) +#define CQE_WRID_LOW(x) (be32_to_cpu((x)->u.gen.wrid_low)) +#define CQE_DRAIN_COOKIE(x) ((x)->u.drain_cookie) + +/* macros for flit 3 of the cqe */ +#define CQE_GENBIT_S 63 +#define CQE_GENBIT_M 0x1 +#define CQE_GENBIT_G(x) (((x) >> CQE_GENBIT_S) & CQE_GENBIT_M) +#define CQE_GENBIT_V(x) ((x)<<CQE_GENBIT_S) + +#define CQE_OVFBIT_S 62 +#define CQE_OVFBIT_M 0x1 +#define CQE_OVFBIT_G(x) ((((x) >> CQE_OVFBIT_S)) & CQE_OVFBIT_M) + +#define CQE_IQTYPE_S 60 +#define CQE_IQTYPE_M 0x3 +#define CQE_IQTYPE_G(x) ((((x) >> CQE_IQTYPE_S)) & CQE_IQTYPE_M) + +#define CQE_TS_M 0x0fffffffffffffffULL +#define CQE_TS_G(x) ((x) & CQE_TS_M) + +#define CQE_OVFBIT(x) ((unsigned)CQE_OVFBIT_G(be64_to_cpu((x)->bits_type_ts))) +#define CQE_GENBIT(x) ((unsigned)CQE_GENBIT_G(be64_to_cpu((x)->bits_type_ts))) +#define CQE_TS(x) (CQE_TS_G(be64_to_cpu((x)->bits_type_ts))) + +struct t4_swsqe { + u64 wr_id; + struct t4_cqe cqe; + int read_len; + int opcode; + int complete; + int signaled; + u16 idx; + int flushed; + ktime_t host_time; + u64 sge_ts; +}; + +static inline pgprot_t t4_pgprot_wc(pgprot_t prot) +{ +#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64) + return pgprot_writecombine(prot); +#else + return pgprot_noncached(prot); +#endif +} + +enum { + T4_SQ_ONCHIP = (1<<0), +}; + +struct t4_sq { + union t4_wr *queue; + dma_addr_t dma_addr; + DEFINE_DMA_UNMAP_ADDR(mapping); + unsigned long phys_addr; + struct t4_swsqe *sw_sq; + struct t4_swsqe *oldest_read; + void __iomem *bar2_va; + u64 bar2_pa; + size_t memsize; + u32 bar2_qid; + u32 qid; + u16 in_use; + u16 size; + u16 cidx; + u16 pidx; + u16 wq_pidx; + u16 wq_pidx_inc; + u16 flags; + short flush_cidx; +}; + +struct t4_swrqe { + u64 wr_id; + ktime_t host_time; + u64 sge_ts; + int valid; +}; + +struct t4_rq { + union t4_recv_wr *queue; + dma_addr_t dma_addr; + DEFINE_DMA_UNMAP_ADDR(mapping); + struct t4_swrqe *sw_rq; + void __iomem *bar2_va; + u64 bar2_pa; + size_t memsize; + u32 bar2_qid; + u32 qid; + u32 msn; + u32 rqt_hwaddr; + u16 rqt_size; + u16 in_use; + u16 size; + u16 cidx; + u16 pidx; + u16 wq_pidx; + u16 wq_pidx_inc; +}; + +struct t4_wq { + struct t4_sq sq; + struct t4_rq rq; + void __iomem *db; + struct c4iw_rdev *rdev; + int flushed; + u8 *qp_errp; + u32 *srqidxp; +}; + +struct t4_srq_pending_wr { + u64 wr_id; + union t4_recv_wr wqe; + u8 len16; +}; + +struct t4_srq { + union t4_recv_wr *queue; + dma_addr_t dma_addr; + DEFINE_DMA_UNMAP_ADDR(mapping); + struct t4_swrqe *sw_rq; + void __iomem *bar2_va; + u64 bar2_pa; + size_t memsize; + u32 bar2_qid; + u32 qid; + u32 msn; + u32 rqt_hwaddr; + u32 rqt_abs_idx; + u16 rqt_size; + u16 size; + u16 cidx; + u16 pidx; + u16 wq_pidx; + u16 wq_pidx_inc; + u16 in_use; + struct t4_srq_pending_wr *pending_wrs; + u16 pending_cidx; + u16 pending_pidx; + u16 pending_in_use; + u16 ooo_count; +}; + +static inline u32 t4_srq_avail(struct t4_srq *srq) +{ + return srq->size - 1 - srq->in_use; +} + +static inline void t4_srq_produce(struct t4_srq *srq, u8 len16) +{ + srq->in_use++; + if (++srq->pidx == srq->size) + srq->pidx = 0; + srq->wq_pidx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE); + if (srq->wq_pidx >= srq->size * T4_RQ_NUM_SLOTS) + srq->wq_pidx %= srq->size * T4_RQ_NUM_SLOTS; + srq->queue[srq->size].status.host_pidx = srq->pidx; +} + +static inline void t4_srq_produce_pending_wr(struct t4_srq *srq) +{ + srq->pending_in_use++; + srq->in_use++; + if (++srq->pending_pidx == srq->size) + srq->pending_pidx = 0; +} + +static inline void t4_srq_consume_pending_wr(struct t4_srq *srq) +{ + srq->pending_in_use--; + srq->in_use--; + if (++srq->pending_cidx == srq->size) + srq->pending_cidx = 0; +} + +static inline void t4_srq_produce_ooo(struct t4_srq *srq) +{ + srq->in_use--; + srq->ooo_count++; +} + +static inline void t4_srq_consume_ooo(struct t4_srq *srq) +{ + srq->cidx++; + if (srq->cidx == srq->size) + srq->cidx = 0; + srq->queue[srq->size].status.host_cidx = srq->cidx; + srq->ooo_count--; +} + +static inline void t4_srq_consume(struct t4_srq *srq) +{ + srq->in_use--; + if (++srq->cidx == srq->size) + srq->cidx = 0; + srq->queue[srq->size].status.host_cidx = srq->cidx; +} + +static inline int t4_rqes_posted(struct t4_wq *wq) +{ + return wq->rq.in_use; +} + +static inline int t4_rq_empty(struct t4_wq *wq) +{ + return wq->rq.in_use == 0; +} + +static inline u32 t4_rq_avail(struct t4_wq *wq) +{ + return wq->rq.size - 1 - wq->rq.in_use; +} + +static inline void t4_rq_produce(struct t4_wq *wq, u8 len16) +{ + wq->rq.in_use++; + if (++wq->rq.pidx == wq->rq.size) + wq->rq.pidx = 0; + wq->rq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); + if (wq->rq.wq_pidx >= wq->rq.size * T4_RQ_NUM_SLOTS) + wq->rq.wq_pidx %= wq->rq.size * T4_RQ_NUM_SLOTS; +} + +static inline void t4_rq_consume(struct t4_wq *wq) +{ + wq->rq.in_use--; + if (++wq->rq.cidx == wq->rq.size) + wq->rq.cidx = 0; +} + +static inline u16 t4_rq_host_wq_pidx(struct t4_wq *wq) +{ + return wq->rq.queue[wq->rq.size].status.host_wq_pidx; +} + +static inline u16 t4_rq_wq_size(struct t4_wq *wq) +{ + return wq->rq.size * T4_RQ_NUM_SLOTS; +} + +static inline int t4_sq_onchip(struct t4_sq *sq) +{ + return sq->flags & T4_SQ_ONCHIP; +} + +static inline int t4_sq_empty(struct t4_wq *wq) +{ + return wq->sq.in_use == 0; +} + +static inline u32 t4_sq_avail(struct t4_wq *wq) +{ + return wq->sq.size - 1 - wq->sq.in_use; +} + +static inline void t4_sq_produce(struct t4_wq *wq, u8 len16) +{ + wq->sq.in_use++; + if (++wq->sq.pidx == wq->sq.size) + wq->sq.pidx = 0; + wq->sq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); + if (wq->sq.wq_pidx >= wq->sq.size * T4_SQ_NUM_SLOTS) + wq->sq.wq_pidx %= wq->sq.size * T4_SQ_NUM_SLOTS; +} + +static inline void t4_sq_consume(struct t4_wq *wq) +{ + if (wq->sq.cidx == wq->sq.flush_cidx) + wq->sq.flush_cidx = -1; + wq->sq.in_use--; + if (++wq->sq.cidx == wq->sq.size) + wq->sq.cidx = 0; +} + +static inline u16 t4_sq_host_wq_pidx(struct t4_wq *wq) +{ + return wq->sq.queue[wq->sq.size].status.host_wq_pidx; +} + +static inline u16 t4_sq_wq_size(struct t4_wq *wq) +{ + return wq->sq.size * T4_SQ_NUM_SLOTS; +} + +/* This function copies 64 byte coalesced work request to memory + * mapped BAR2 space. For coalesced WRs, the SGE fetches data + * from the FIFO instead of from Host. + */ +static inline void pio_copy(u64 __iomem *dst, u64 *src) +{ + int count = 8; + + while (count) { + writeq(*src, dst); + src++; + dst++; + count--; + } +} + +static inline void t4_ring_srq_db(struct t4_srq *srq, u16 inc, u8 len16, + union t4_recv_wr *wqe) +{ + /* Flush host queue memory writes. */ + wmb(); + if (inc == 1 && srq->bar2_qid == 0 && wqe) { + pr_debug("%s : WC srq->pidx = %d; len16=%d\n", + __func__, srq->pidx, len16); + pio_copy(srq->bar2_va + SGE_UDB_WCDOORBELL, (u64 *)wqe); + } else { + pr_debug("%s: DB srq->pidx = %d; len16=%d\n", + __func__, srq->pidx, len16); + writel(PIDX_T5_V(inc) | QID_V(srq->bar2_qid), + srq->bar2_va + SGE_UDB_KDOORBELL); + } + /* Flush user doorbell area writes. */ + wmb(); +} + +static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe) +{ + + /* Flush host queue memory writes. */ + wmb(); + if (wq->sq.bar2_va) { + if (inc == 1 && wq->sq.bar2_qid == 0 && wqe) { + pr_debug("WC wq->sq.pidx = %d\n", wq->sq.pidx); + pio_copy((u64 __iomem *) + (wq->sq.bar2_va + SGE_UDB_WCDOORBELL), + (u64 *)wqe); + } else { + pr_debug("DB wq->sq.pidx = %d\n", wq->sq.pidx); + writel(PIDX_T5_V(inc) | QID_V(wq->sq.bar2_qid), + wq->sq.bar2_va + SGE_UDB_KDOORBELL); + } + + /* Flush user doorbell area writes. */ + wmb(); + return; + } + writel(QID_V(wq->sq.qid) | PIDX_V(inc), wq->db); +} + +static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, + union t4_recv_wr *wqe) +{ + + /* Flush host queue memory writes. */ + wmb(); + if (wq->rq.bar2_va) { + if (inc == 1 && wq->rq.bar2_qid == 0 && wqe) { + pr_debug("WC wq->rq.pidx = %d\n", wq->rq.pidx); + pio_copy((u64 __iomem *) + (wq->rq.bar2_va + SGE_UDB_WCDOORBELL), + (void *)wqe); + } else { + pr_debug("DB wq->rq.pidx = %d\n", wq->rq.pidx); + writel(PIDX_T5_V(inc) | QID_V(wq->rq.bar2_qid), + wq->rq.bar2_va + SGE_UDB_KDOORBELL); + } + + /* Flush user doorbell area writes. */ + wmb(); + return; + } + writel(QID_V(wq->rq.qid) | PIDX_V(inc), wq->db); +} + +static inline int t4_wq_in_error(struct t4_wq *wq) +{ + return *wq->qp_errp; +} + +static inline void t4_set_wq_in_error(struct t4_wq *wq, u32 srqidx) +{ + if (srqidx) + *wq->srqidxp = srqidx; + *wq->qp_errp = 1; +} + +static inline void t4_disable_wq_db(struct t4_wq *wq) +{ + wq->rq.queue[wq->rq.size].status.db_off = 1; +} + +static inline void t4_enable_wq_db(struct t4_wq *wq) +{ + wq->rq.queue[wq->rq.size].status.db_off = 0; +} + +enum t4_cq_flags { + CQ_ARMED = 1, +}; + +struct t4_cq { + struct t4_cqe *queue; + dma_addr_t dma_addr; + DEFINE_DMA_UNMAP_ADDR(mapping); + struct t4_cqe *sw_queue; + void __iomem *gts; + void __iomem *bar2_va; + u64 bar2_pa; + u32 bar2_qid; + struct c4iw_rdev *rdev; + size_t memsize; + __be64 bits_type_ts; + u32 cqid; + u32 qid_mask; + int vector; + u16 size; /* including status page */ + u16 cidx; + u16 sw_pidx; + u16 sw_cidx; + u16 sw_in_use; + u16 cidx_inc; + u8 gen; + u8 error; + u8 *qp_errp; + unsigned long flags; +}; + +static inline void write_gts(struct t4_cq *cq, u32 val) +{ + if (cq->bar2_va) + writel(val | INGRESSQID_V(cq->bar2_qid), + cq->bar2_va + SGE_UDB_GTS); + else + writel(val | INGRESSQID_V(cq->cqid), cq->gts); +} + +static inline int t4_clear_cq_armed(struct t4_cq *cq) +{ + return test_and_clear_bit(CQ_ARMED, &cq->flags); +} + +static inline int t4_arm_cq(struct t4_cq *cq, int se) +{ + u32 val; + + set_bit(CQ_ARMED, &cq->flags); + while (cq->cidx_inc > CIDXINC_M) { + val = SEINTARM_V(0) | CIDXINC_V(CIDXINC_M) | TIMERREG_V(7); + write_gts(cq, val); + cq->cidx_inc -= CIDXINC_M; + } + val = SEINTARM_V(se) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(6); + write_gts(cq, val); + cq->cidx_inc = 0; + return 0; +} + +static inline void t4_swcq_produce(struct t4_cq *cq) +{ + cq->sw_in_use++; + if (cq->sw_in_use == cq->size) { + pr_warn("%s cxgb4 sw cq overflow cqid %u\n", + __func__, cq->cqid); + cq->error = 1; + cq->sw_in_use--; + return; + } + if (++cq->sw_pidx == cq->size) + cq->sw_pidx = 0; +} + +static inline void t4_swcq_consume(struct t4_cq *cq) +{ + cq->sw_in_use--; + if (++cq->sw_cidx == cq->size) + cq->sw_cidx = 0; +} + +static inline void t4_hwcq_consume(struct t4_cq *cq) +{ + cq->bits_type_ts = cq->queue[cq->cidx].bits_type_ts; + if (++cq->cidx_inc == (cq->size >> 4) || cq->cidx_inc == CIDXINC_M) { + u32 val; + + val = SEINTARM_V(0) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(7); + write_gts(cq, val); + cq->cidx_inc = 0; + } + if (++cq->cidx == cq->size) { + cq->cidx = 0; + cq->gen ^= 1; + } +} + +static inline int t4_valid_cqe(struct t4_cq *cq, struct t4_cqe *cqe) +{ + return (CQE_GENBIT(cqe) == cq->gen); +} + +static inline int t4_cq_notempty(struct t4_cq *cq) +{ + return cq->sw_in_use || t4_valid_cqe(cq, &cq->queue[cq->cidx]); +} + +static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) +{ + int ret; + u16 prev_cidx; + + if (cq->cidx == 0) + prev_cidx = cq->size - 1; + else + prev_cidx = cq->cidx - 1; + + if (cq->queue[prev_cidx].bits_type_ts != cq->bits_type_ts) { + ret = -EOVERFLOW; + cq->error = 1; + pr_err("cq overflow cqid %u\n", cq->cqid); + } else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) { + + /* Ensure CQE is flushed to memory */ + rmb(); + *cqe = &cq->queue[cq->cidx]; + ret = 0; + } else + ret = -ENODATA; + return ret; +} + +static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe) +{ + int ret = 0; + + if (cq->error) + ret = -ENODATA; + else if (cq->sw_in_use) + *cqe = &cq->sw_queue[cq->sw_cidx]; + else + ret = t4_next_hw_cqe(cq, cqe); + return ret; +} + +static inline void t4_set_cq_in_error(struct t4_cq *cq) +{ + *cq->qp_errp = 1; +} +#endif + +struct t4_dev_status_page { + u8 db_off; + u8 write_cmpl_supported; + u16 pad2; + u32 pad3; + u64 qp_start; + u64 qp_size; + u64 cq_start; + u64 cq_size; +}; diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h new file mode 100644 index 000000000..a2f5e29ef --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -0,0 +1,830 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _T4FW_RI_API_H_ +#define _T4FW_RI_API_H_ + +#include "t4fw_api.h" + +enum fw_ri_wr_opcode { + FW_RI_RDMA_WRITE = 0x0, /* IETF RDMAP v1.0 ... */ + FW_RI_READ_REQ = 0x1, + FW_RI_READ_RESP = 0x2, + FW_RI_SEND = 0x3, + FW_RI_SEND_WITH_INV = 0x4, + FW_RI_SEND_WITH_SE = 0x5, + FW_RI_SEND_WITH_SE_INV = 0x6, + FW_RI_TERMINATE = 0x7, + FW_RI_RDMA_INIT = 0x8, /* CHELSIO RI specific ... */ + FW_RI_BIND_MW = 0x9, + FW_RI_FAST_REGISTER = 0xa, + FW_RI_LOCAL_INV = 0xb, + FW_RI_QP_MODIFY = 0xc, + FW_RI_BYPASS = 0xd, + FW_RI_RECEIVE = 0xe, + + FW_RI_SGE_EC_CR_RETURN = 0xf, + FW_RI_WRITE_IMMEDIATE = FW_RI_RDMA_INIT +}; + +enum fw_ri_wr_flags { + FW_RI_COMPLETION_FLAG = 0x01, + FW_RI_NOTIFICATION_FLAG = 0x02, + FW_RI_SOLICITED_EVENT_FLAG = 0x04, + FW_RI_READ_FENCE_FLAG = 0x08, + FW_RI_LOCAL_FENCE_FLAG = 0x10, + FW_RI_RDMA_READ_INVALIDATE = 0x20, + FW_RI_RDMA_WRITE_WITH_IMMEDIATE = 0x40 +}; + +enum fw_ri_mpa_attrs { + FW_RI_MPA_RX_MARKER_ENABLE = 0x01, + FW_RI_MPA_TX_MARKER_ENABLE = 0x02, + FW_RI_MPA_CRC_ENABLE = 0x04, + FW_RI_MPA_IETF_ENABLE = 0x08 +}; + +enum fw_ri_qp_caps { + FW_RI_QP_RDMA_READ_ENABLE = 0x01, + FW_RI_QP_RDMA_WRITE_ENABLE = 0x02, + FW_RI_QP_BIND_ENABLE = 0x04, + FW_RI_QP_FAST_REGISTER_ENABLE = 0x08, + FW_RI_QP_STAG0_ENABLE = 0x10 +}; + +enum fw_ri_addr_type { + FW_RI_ZERO_BASED_TO = 0x00, + FW_RI_VA_BASED_TO = 0x01 +}; + +enum fw_ri_mem_perms { + FW_RI_MEM_ACCESS_REM_WRITE = 0x01, + FW_RI_MEM_ACCESS_REM_READ = 0x02, + FW_RI_MEM_ACCESS_REM = 0x03, + FW_RI_MEM_ACCESS_LOCAL_WRITE = 0x04, + FW_RI_MEM_ACCESS_LOCAL_READ = 0x08, + FW_RI_MEM_ACCESS_LOCAL = 0x0C +}; + +enum fw_ri_stag_type { + FW_RI_STAG_NSMR = 0x00, + FW_RI_STAG_SMR = 0x01, + FW_RI_STAG_MW = 0x02, + FW_RI_STAG_MW_RELAXED = 0x03 +}; + +enum fw_ri_data_op { + FW_RI_DATA_IMMD = 0x81, + FW_RI_DATA_DSGL = 0x82, + FW_RI_DATA_ISGL = 0x83 +}; + +enum fw_ri_sgl_depth { + FW_RI_SGL_DEPTH_MAX_SQ = 16, + FW_RI_SGL_DEPTH_MAX_RQ = 4 +}; + +struct fw_ri_dsge_pair { + __be32 len[2]; + __be64 addr[2]; +}; + +struct fw_ri_dsgl { + __u8 op; + __u8 r1; + __be16 nsge; + __be32 len0; + __be64 addr0; +#ifndef C99_NOT_SUPPORTED + struct fw_ri_dsge_pair sge[]; +#endif +}; + +struct fw_ri_sge { + __be32 stag; + __be32 len; + __be64 to; +}; + +struct fw_ri_isgl { + __u8 op; + __u8 r1; + __be16 nsge; + __be32 r2; +#ifndef C99_NOT_SUPPORTED + struct fw_ri_sge sge[]; +#endif +}; + +struct fw_ri_immd { + __u8 op; + __u8 r1; + __be16 r2; + __be32 immdlen; +#ifndef C99_NOT_SUPPORTED + __u8 data[]; +#endif +}; + +struct fw_ri_tpte { + __be32 valid_to_pdid; + __be32 locread_to_qpid; + __be32 nosnoop_pbladdr; + __be32 len_lo; + __be32 va_hi; + __be32 va_lo_fbo; + __be32 dca_mwbcnt_pstag; + __be32 len_hi; +}; + +#define FW_RI_TPTE_VALID_S 31 +#define FW_RI_TPTE_VALID_M 0x1 +#define FW_RI_TPTE_VALID_V(x) ((x) << FW_RI_TPTE_VALID_S) +#define FW_RI_TPTE_VALID_G(x) \ + (((x) >> FW_RI_TPTE_VALID_S) & FW_RI_TPTE_VALID_M) +#define FW_RI_TPTE_VALID_F FW_RI_TPTE_VALID_V(1U) + +#define FW_RI_TPTE_STAGKEY_S 23 +#define FW_RI_TPTE_STAGKEY_M 0xff +#define FW_RI_TPTE_STAGKEY_V(x) ((x) << FW_RI_TPTE_STAGKEY_S) +#define FW_RI_TPTE_STAGKEY_G(x) \ + (((x) >> FW_RI_TPTE_STAGKEY_S) & FW_RI_TPTE_STAGKEY_M) + +#define FW_RI_TPTE_STAGSTATE_S 22 +#define FW_RI_TPTE_STAGSTATE_M 0x1 +#define FW_RI_TPTE_STAGSTATE_V(x) ((x) << FW_RI_TPTE_STAGSTATE_S) +#define FW_RI_TPTE_STAGSTATE_G(x) \ + (((x) >> FW_RI_TPTE_STAGSTATE_S) & FW_RI_TPTE_STAGSTATE_M) +#define FW_RI_TPTE_STAGSTATE_F FW_RI_TPTE_STAGSTATE_V(1U) + +#define FW_RI_TPTE_STAGTYPE_S 20 +#define FW_RI_TPTE_STAGTYPE_M 0x3 +#define FW_RI_TPTE_STAGTYPE_V(x) ((x) << FW_RI_TPTE_STAGTYPE_S) +#define FW_RI_TPTE_STAGTYPE_G(x) \ + (((x) >> FW_RI_TPTE_STAGTYPE_S) & FW_RI_TPTE_STAGTYPE_M) + +#define FW_RI_TPTE_PDID_S 0 +#define FW_RI_TPTE_PDID_M 0xfffff +#define FW_RI_TPTE_PDID_V(x) ((x) << FW_RI_TPTE_PDID_S) +#define FW_RI_TPTE_PDID_G(x) \ + (((x) >> FW_RI_TPTE_PDID_S) & FW_RI_TPTE_PDID_M) + +#define FW_RI_TPTE_PERM_S 28 +#define FW_RI_TPTE_PERM_M 0xf +#define FW_RI_TPTE_PERM_V(x) ((x) << FW_RI_TPTE_PERM_S) +#define FW_RI_TPTE_PERM_G(x) \ + (((x) >> FW_RI_TPTE_PERM_S) & FW_RI_TPTE_PERM_M) + +#define FW_RI_TPTE_REMINVDIS_S 27 +#define FW_RI_TPTE_REMINVDIS_M 0x1 +#define FW_RI_TPTE_REMINVDIS_V(x) ((x) << FW_RI_TPTE_REMINVDIS_S) +#define FW_RI_TPTE_REMINVDIS_G(x) \ + (((x) >> FW_RI_TPTE_REMINVDIS_S) & FW_RI_TPTE_REMINVDIS_M) +#define FW_RI_TPTE_REMINVDIS_F FW_RI_TPTE_REMINVDIS_V(1U) + +#define FW_RI_TPTE_ADDRTYPE_S 26 +#define FW_RI_TPTE_ADDRTYPE_M 1 +#define FW_RI_TPTE_ADDRTYPE_V(x) ((x) << FW_RI_TPTE_ADDRTYPE_S) +#define FW_RI_TPTE_ADDRTYPE_G(x) \ + (((x) >> FW_RI_TPTE_ADDRTYPE_S) & FW_RI_TPTE_ADDRTYPE_M) +#define FW_RI_TPTE_ADDRTYPE_F FW_RI_TPTE_ADDRTYPE_V(1U) + +#define FW_RI_TPTE_MWBINDEN_S 25 +#define FW_RI_TPTE_MWBINDEN_M 0x1 +#define FW_RI_TPTE_MWBINDEN_V(x) ((x) << FW_RI_TPTE_MWBINDEN_S) +#define FW_RI_TPTE_MWBINDEN_G(x) \ + (((x) >> FW_RI_TPTE_MWBINDEN_S) & FW_RI_TPTE_MWBINDEN_M) +#define FW_RI_TPTE_MWBINDEN_F FW_RI_TPTE_MWBINDEN_V(1U) + +#define FW_RI_TPTE_PS_S 20 +#define FW_RI_TPTE_PS_M 0x1f +#define FW_RI_TPTE_PS_V(x) ((x) << FW_RI_TPTE_PS_S) +#define FW_RI_TPTE_PS_G(x) \ + (((x) >> FW_RI_TPTE_PS_S) & FW_RI_TPTE_PS_M) + +#define FW_RI_TPTE_QPID_S 0 +#define FW_RI_TPTE_QPID_M 0xfffff +#define FW_RI_TPTE_QPID_V(x) ((x) << FW_RI_TPTE_QPID_S) +#define FW_RI_TPTE_QPID_G(x) \ + (((x) >> FW_RI_TPTE_QPID_S) & FW_RI_TPTE_QPID_M) + +#define FW_RI_TPTE_NOSNOOP_S 30 +#define FW_RI_TPTE_NOSNOOP_M 0x1 +#define FW_RI_TPTE_NOSNOOP_V(x) ((x) << FW_RI_TPTE_NOSNOOP_S) +#define FW_RI_TPTE_NOSNOOP_G(x) \ + (((x) >> FW_RI_TPTE_NOSNOOP_S) & FW_RI_TPTE_NOSNOOP_M) +#define FW_RI_TPTE_NOSNOOP_F FW_RI_TPTE_NOSNOOP_V(1U) + +#define FW_RI_TPTE_PBLADDR_S 0 +#define FW_RI_TPTE_PBLADDR_M 0x1fffffff +#define FW_RI_TPTE_PBLADDR_V(x) ((x) << FW_RI_TPTE_PBLADDR_S) +#define FW_RI_TPTE_PBLADDR_G(x) \ + (((x) >> FW_RI_TPTE_PBLADDR_S) & FW_RI_TPTE_PBLADDR_M) + +#define FW_RI_TPTE_DCA_S 24 +#define FW_RI_TPTE_DCA_M 0x1f +#define FW_RI_TPTE_DCA_V(x) ((x) << FW_RI_TPTE_DCA_S) +#define FW_RI_TPTE_DCA_G(x) \ + (((x) >> FW_RI_TPTE_DCA_S) & FW_RI_TPTE_DCA_M) + +#define FW_RI_TPTE_MWBCNT_PSTAG_S 0 +#define FW_RI_TPTE_MWBCNT_PSTAG_M 0xffffff +#define FW_RI_TPTE_MWBCNT_PSTAT_V(x) \ + ((x) << FW_RI_TPTE_MWBCNT_PSTAG_S) +#define FW_RI_TPTE_MWBCNT_PSTAG_G(x) \ + (((x) >> FW_RI_TPTE_MWBCNT_PSTAG_S) & FW_RI_TPTE_MWBCNT_PSTAG_M) + +enum fw_ri_res_type { + FW_RI_RES_TYPE_SQ, + FW_RI_RES_TYPE_RQ, + FW_RI_RES_TYPE_CQ, + FW_RI_RES_TYPE_SRQ, +}; + +enum fw_ri_res_op { + FW_RI_RES_OP_WRITE, + FW_RI_RES_OP_RESET, +}; + +struct fw_ri_res { + union fw_ri_restype { + struct fw_ri_res_sqrq { + __u8 restype; + __u8 op; + __be16 r3; + __be32 eqid; + __be32 r4[2]; + __be32 fetchszm_to_iqid; + __be32 dcaen_to_eqsize; + __be64 eqaddr; + } sqrq; + struct fw_ri_res_cq { + __u8 restype; + __u8 op; + __be16 r3; + __be32 iqid; + __be32 r4[2]; + __be32 iqandst_to_iqandstindex; + __be16 iqdroprss_to_iqesize; + __be16 iqsize; + __be64 iqaddr; + __be32 iqns_iqro; + __be32 r6_lo; + __be64 r7; + } cq; + struct fw_ri_res_srq { + __u8 restype; + __u8 op; + __be16 r3; + __be32 eqid; + __be32 r4[2]; + __be32 fetchszm_to_iqid; + __be32 dcaen_to_eqsize; + __be64 eqaddr; + __be32 srqid; + __be32 pdid; + __be32 hwsrqsize; + __be32 hwsrqaddr; + } srq; + } u; +}; + +struct fw_ri_res_wr { + __be32 op_nres; + __be32 len16_pkd; + __u64 cookie; +#ifndef C99_NOT_SUPPORTED + struct fw_ri_res res[]; +#endif +}; + +#define FW_RI_RES_WR_NRES_S 0 +#define FW_RI_RES_WR_NRES_M 0xff +#define FW_RI_RES_WR_NRES_V(x) ((x) << FW_RI_RES_WR_NRES_S) +#define FW_RI_RES_WR_NRES_G(x) \ + (((x) >> FW_RI_RES_WR_NRES_S) & FW_RI_RES_WR_NRES_M) + +#define FW_RI_RES_WR_FETCHSZM_S 26 +#define FW_RI_RES_WR_FETCHSZM_M 0x1 +#define FW_RI_RES_WR_FETCHSZM_V(x) ((x) << FW_RI_RES_WR_FETCHSZM_S) +#define FW_RI_RES_WR_FETCHSZM_G(x) \ + (((x) >> FW_RI_RES_WR_FETCHSZM_S) & FW_RI_RES_WR_FETCHSZM_M) +#define FW_RI_RES_WR_FETCHSZM_F FW_RI_RES_WR_FETCHSZM_V(1U) + +#define FW_RI_RES_WR_STATUSPGNS_S 25 +#define FW_RI_RES_WR_STATUSPGNS_M 0x1 +#define FW_RI_RES_WR_STATUSPGNS_V(x) ((x) << FW_RI_RES_WR_STATUSPGNS_S) +#define FW_RI_RES_WR_STATUSPGNS_G(x) \ + (((x) >> FW_RI_RES_WR_STATUSPGNS_S) & FW_RI_RES_WR_STATUSPGNS_M) +#define FW_RI_RES_WR_STATUSPGNS_F FW_RI_RES_WR_STATUSPGNS_V(1U) + +#define FW_RI_RES_WR_STATUSPGRO_S 24 +#define FW_RI_RES_WR_STATUSPGRO_M 0x1 +#define FW_RI_RES_WR_STATUSPGRO_V(x) ((x) << FW_RI_RES_WR_STATUSPGRO_S) +#define FW_RI_RES_WR_STATUSPGRO_G(x) \ + (((x) >> FW_RI_RES_WR_STATUSPGRO_S) & FW_RI_RES_WR_STATUSPGRO_M) +#define FW_RI_RES_WR_STATUSPGRO_F FW_RI_RES_WR_STATUSPGRO_V(1U) + +#define FW_RI_RES_WR_FETCHNS_S 23 +#define FW_RI_RES_WR_FETCHNS_M 0x1 +#define FW_RI_RES_WR_FETCHNS_V(x) ((x) << FW_RI_RES_WR_FETCHNS_S) +#define FW_RI_RES_WR_FETCHNS_G(x) \ + (((x) >> FW_RI_RES_WR_FETCHNS_S) & FW_RI_RES_WR_FETCHNS_M) +#define FW_RI_RES_WR_FETCHNS_F FW_RI_RES_WR_FETCHNS_V(1U) + +#define FW_RI_RES_WR_FETCHRO_S 22 +#define FW_RI_RES_WR_FETCHRO_M 0x1 +#define FW_RI_RES_WR_FETCHRO_V(x) ((x) << FW_RI_RES_WR_FETCHRO_S) +#define FW_RI_RES_WR_FETCHRO_G(x) \ + (((x) >> FW_RI_RES_WR_FETCHRO_S) & FW_RI_RES_WR_FETCHRO_M) +#define FW_RI_RES_WR_FETCHRO_F FW_RI_RES_WR_FETCHRO_V(1U) + +#define FW_RI_RES_WR_HOSTFCMODE_S 20 +#define FW_RI_RES_WR_HOSTFCMODE_M 0x3 +#define FW_RI_RES_WR_HOSTFCMODE_V(x) ((x) << FW_RI_RES_WR_HOSTFCMODE_S) +#define FW_RI_RES_WR_HOSTFCMODE_G(x) \ + (((x) >> FW_RI_RES_WR_HOSTFCMODE_S) & FW_RI_RES_WR_HOSTFCMODE_M) + +#define FW_RI_RES_WR_CPRIO_S 19 +#define FW_RI_RES_WR_CPRIO_M 0x1 +#define FW_RI_RES_WR_CPRIO_V(x) ((x) << FW_RI_RES_WR_CPRIO_S) +#define FW_RI_RES_WR_CPRIO_G(x) \ + (((x) >> FW_RI_RES_WR_CPRIO_S) & FW_RI_RES_WR_CPRIO_M) +#define FW_RI_RES_WR_CPRIO_F FW_RI_RES_WR_CPRIO_V(1U) + +#define FW_RI_RES_WR_ONCHIP_S 18 +#define FW_RI_RES_WR_ONCHIP_M 0x1 +#define FW_RI_RES_WR_ONCHIP_V(x) ((x) << FW_RI_RES_WR_ONCHIP_S) +#define FW_RI_RES_WR_ONCHIP_G(x) \ + (((x) >> FW_RI_RES_WR_ONCHIP_S) & FW_RI_RES_WR_ONCHIP_M) +#define FW_RI_RES_WR_ONCHIP_F FW_RI_RES_WR_ONCHIP_V(1U) + +#define FW_RI_RES_WR_PCIECHN_S 16 +#define FW_RI_RES_WR_PCIECHN_M 0x3 +#define FW_RI_RES_WR_PCIECHN_V(x) ((x) << FW_RI_RES_WR_PCIECHN_S) +#define FW_RI_RES_WR_PCIECHN_G(x) \ + (((x) >> FW_RI_RES_WR_PCIECHN_S) & FW_RI_RES_WR_PCIECHN_M) + +#define FW_RI_RES_WR_IQID_S 0 +#define FW_RI_RES_WR_IQID_M 0xffff +#define FW_RI_RES_WR_IQID_V(x) ((x) << FW_RI_RES_WR_IQID_S) +#define FW_RI_RES_WR_IQID_G(x) \ + (((x) >> FW_RI_RES_WR_IQID_S) & FW_RI_RES_WR_IQID_M) + +#define FW_RI_RES_WR_DCAEN_S 31 +#define FW_RI_RES_WR_DCAEN_M 0x1 +#define FW_RI_RES_WR_DCAEN_V(x) ((x) << FW_RI_RES_WR_DCAEN_S) +#define FW_RI_RES_WR_DCAEN_G(x) \ + (((x) >> FW_RI_RES_WR_DCAEN_S) & FW_RI_RES_WR_DCAEN_M) +#define FW_RI_RES_WR_DCAEN_F FW_RI_RES_WR_DCAEN_V(1U) + +#define FW_RI_RES_WR_DCACPU_S 26 +#define FW_RI_RES_WR_DCACPU_M 0x1f +#define FW_RI_RES_WR_DCACPU_V(x) ((x) << FW_RI_RES_WR_DCACPU_S) +#define FW_RI_RES_WR_DCACPU_G(x) \ + (((x) >> FW_RI_RES_WR_DCACPU_S) & FW_RI_RES_WR_DCACPU_M) + +#define FW_RI_RES_WR_FBMIN_S 23 +#define FW_RI_RES_WR_FBMIN_M 0x7 +#define FW_RI_RES_WR_FBMIN_V(x) ((x) << FW_RI_RES_WR_FBMIN_S) +#define FW_RI_RES_WR_FBMIN_G(x) \ + (((x) >> FW_RI_RES_WR_FBMIN_S) & FW_RI_RES_WR_FBMIN_M) + +#define FW_RI_RES_WR_FBMAX_S 20 +#define FW_RI_RES_WR_FBMAX_M 0x7 +#define FW_RI_RES_WR_FBMAX_V(x) ((x) << FW_RI_RES_WR_FBMAX_S) +#define FW_RI_RES_WR_FBMAX_G(x) \ + (((x) >> FW_RI_RES_WR_FBMAX_S) & FW_RI_RES_WR_FBMAX_M) + +#define FW_RI_RES_WR_CIDXFTHRESHO_S 19 +#define FW_RI_RES_WR_CIDXFTHRESHO_M 0x1 +#define FW_RI_RES_WR_CIDXFTHRESHO_V(x) ((x) << FW_RI_RES_WR_CIDXFTHRESHO_S) +#define FW_RI_RES_WR_CIDXFTHRESHO_G(x) \ + (((x) >> FW_RI_RES_WR_CIDXFTHRESHO_S) & FW_RI_RES_WR_CIDXFTHRESHO_M) +#define FW_RI_RES_WR_CIDXFTHRESHO_F FW_RI_RES_WR_CIDXFTHRESHO_V(1U) + +#define FW_RI_RES_WR_CIDXFTHRESH_S 16 +#define FW_RI_RES_WR_CIDXFTHRESH_M 0x7 +#define FW_RI_RES_WR_CIDXFTHRESH_V(x) ((x) << FW_RI_RES_WR_CIDXFTHRESH_S) +#define FW_RI_RES_WR_CIDXFTHRESH_G(x) \ + (((x) >> FW_RI_RES_WR_CIDXFTHRESH_S) & FW_RI_RES_WR_CIDXFTHRESH_M) + +#define FW_RI_RES_WR_EQSIZE_S 0 +#define FW_RI_RES_WR_EQSIZE_M 0xffff +#define FW_RI_RES_WR_EQSIZE_V(x) ((x) << FW_RI_RES_WR_EQSIZE_S) +#define FW_RI_RES_WR_EQSIZE_G(x) \ + (((x) >> FW_RI_RES_WR_EQSIZE_S) & FW_RI_RES_WR_EQSIZE_M) + +#define FW_RI_RES_WR_IQANDST_S 15 +#define FW_RI_RES_WR_IQANDST_M 0x1 +#define FW_RI_RES_WR_IQANDST_V(x) ((x) << FW_RI_RES_WR_IQANDST_S) +#define FW_RI_RES_WR_IQANDST_G(x) \ + (((x) >> FW_RI_RES_WR_IQANDST_S) & FW_RI_RES_WR_IQANDST_M) +#define FW_RI_RES_WR_IQANDST_F FW_RI_RES_WR_IQANDST_V(1U) + +#define FW_RI_RES_WR_IQANUS_S 14 +#define FW_RI_RES_WR_IQANUS_M 0x1 +#define FW_RI_RES_WR_IQANUS_V(x) ((x) << FW_RI_RES_WR_IQANUS_S) +#define FW_RI_RES_WR_IQANUS_G(x) \ + (((x) >> FW_RI_RES_WR_IQANUS_S) & FW_RI_RES_WR_IQANUS_M) +#define FW_RI_RES_WR_IQANUS_F FW_RI_RES_WR_IQANUS_V(1U) + +#define FW_RI_RES_WR_IQANUD_S 12 +#define FW_RI_RES_WR_IQANUD_M 0x3 +#define FW_RI_RES_WR_IQANUD_V(x) ((x) << FW_RI_RES_WR_IQANUD_S) +#define FW_RI_RES_WR_IQANUD_G(x) \ + (((x) >> FW_RI_RES_WR_IQANUD_S) & FW_RI_RES_WR_IQANUD_M) + +#define FW_RI_RES_WR_IQANDSTINDEX_S 0 +#define FW_RI_RES_WR_IQANDSTINDEX_M 0xfff +#define FW_RI_RES_WR_IQANDSTINDEX_V(x) ((x) << FW_RI_RES_WR_IQANDSTINDEX_S) +#define FW_RI_RES_WR_IQANDSTINDEX_G(x) \ + (((x) >> FW_RI_RES_WR_IQANDSTINDEX_S) & FW_RI_RES_WR_IQANDSTINDEX_M) + +#define FW_RI_RES_WR_IQDROPRSS_S 15 +#define FW_RI_RES_WR_IQDROPRSS_M 0x1 +#define FW_RI_RES_WR_IQDROPRSS_V(x) ((x) << FW_RI_RES_WR_IQDROPRSS_S) +#define FW_RI_RES_WR_IQDROPRSS_G(x) \ + (((x) >> FW_RI_RES_WR_IQDROPRSS_S) & FW_RI_RES_WR_IQDROPRSS_M) +#define FW_RI_RES_WR_IQDROPRSS_F FW_RI_RES_WR_IQDROPRSS_V(1U) + +#define FW_RI_RES_WR_IQGTSMODE_S 14 +#define FW_RI_RES_WR_IQGTSMODE_M 0x1 +#define FW_RI_RES_WR_IQGTSMODE_V(x) ((x) << FW_RI_RES_WR_IQGTSMODE_S) +#define FW_RI_RES_WR_IQGTSMODE_G(x) \ + (((x) >> FW_RI_RES_WR_IQGTSMODE_S) & FW_RI_RES_WR_IQGTSMODE_M) +#define FW_RI_RES_WR_IQGTSMODE_F FW_RI_RES_WR_IQGTSMODE_V(1U) + +#define FW_RI_RES_WR_IQPCIECH_S 12 +#define FW_RI_RES_WR_IQPCIECH_M 0x3 +#define FW_RI_RES_WR_IQPCIECH_V(x) ((x) << FW_RI_RES_WR_IQPCIECH_S) +#define FW_RI_RES_WR_IQPCIECH_G(x) \ + (((x) >> FW_RI_RES_WR_IQPCIECH_S) & FW_RI_RES_WR_IQPCIECH_M) + +#define FW_RI_RES_WR_IQDCAEN_S 11 +#define FW_RI_RES_WR_IQDCAEN_M 0x1 +#define FW_RI_RES_WR_IQDCAEN_V(x) ((x) << FW_RI_RES_WR_IQDCAEN_S) +#define FW_RI_RES_WR_IQDCAEN_G(x) \ + (((x) >> FW_RI_RES_WR_IQDCAEN_S) & FW_RI_RES_WR_IQDCAEN_M) +#define FW_RI_RES_WR_IQDCAEN_F FW_RI_RES_WR_IQDCAEN_V(1U) + +#define FW_RI_RES_WR_IQDCACPU_S 6 +#define FW_RI_RES_WR_IQDCACPU_M 0x1f +#define FW_RI_RES_WR_IQDCACPU_V(x) ((x) << FW_RI_RES_WR_IQDCACPU_S) +#define FW_RI_RES_WR_IQDCACPU_G(x) \ + (((x) >> FW_RI_RES_WR_IQDCACPU_S) & FW_RI_RES_WR_IQDCACPU_M) + +#define FW_RI_RES_WR_IQINTCNTTHRESH_S 4 +#define FW_RI_RES_WR_IQINTCNTTHRESH_M 0x3 +#define FW_RI_RES_WR_IQINTCNTTHRESH_V(x) \ + ((x) << FW_RI_RES_WR_IQINTCNTTHRESH_S) +#define FW_RI_RES_WR_IQINTCNTTHRESH_G(x) \ + (((x) >> FW_RI_RES_WR_IQINTCNTTHRESH_S) & FW_RI_RES_WR_IQINTCNTTHRESH_M) + +#define FW_RI_RES_WR_IQO_S 3 +#define FW_RI_RES_WR_IQO_M 0x1 +#define FW_RI_RES_WR_IQO_V(x) ((x) << FW_RI_RES_WR_IQO_S) +#define FW_RI_RES_WR_IQO_G(x) \ + (((x) >> FW_RI_RES_WR_IQO_S) & FW_RI_RES_WR_IQO_M) +#define FW_RI_RES_WR_IQO_F FW_RI_RES_WR_IQO_V(1U) + +#define FW_RI_RES_WR_IQCPRIO_S 2 +#define FW_RI_RES_WR_IQCPRIO_M 0x1 +#define FW_RI_RES_WR_IQCPRIO_V(x) ((x) << FW_RI_RES_WR_IQCPRIO_S) +#define FW_RI_RES_WR_IQCPRIO_G(x) \ + (((x) >> FW_RI_RES_WR_IQCPRIO_S) & FW_RI_RES_WR_IQCPRIO_M) +#define FW_RI_RES_WR_IQCPRIO_F FW_RI_RES_WR_IQCPRIO_V(1U) + +#define FW_RI_RES_WR_IQESIZE_S 0 +#define FW_RI_RES_WR_IQESIZE_M 0x3 +#define FW_RI_RES_WR_IQESIZE_V(x) ((x) << FW_RI_RES_WR_IQESIZE_S) +#define FW_RI_RES_WR_IQESIZE_G(x) \ + (((x) >> FW_RI_RES_WR_IQESIZE_S) & FW_RI_RES_WR_IQESIZE_M) + +#define FW_RI_RES_WR_IQNS_S 31 +#define FW_RI_RES_WR_IQNS_M 0x1 +#define FW_RI_RES_WR_IQNS_V(x) ((x) << FW_RI_RES_WR_IQNS_S) +#define FW_RI_RES_WR_IQNS_G(x) \ + (((x) >> FW_RI_RES_WR_IQNS_S) & FW_RI_RES_WR_IQNS_M) +#define FW_RI_RES_WR_IQNS_F FW_RI_RES_WR_IQNS_V(1U) + +#define FW_RI_RES_WR_IQRO_S 30 +#define FW_RI_RES_WR_IQRO_M 0x1 +#define FW_RI_RES_WR_IQRO_V(x) ((x) << FW_RI_RES_WR_IQRO_S) +#define FW_RI_RES_WR_IQRO_G(x) \ + (((x) >> FW_RI_RES_WR_IQRO_S) & FW_RI_RES_WR_IQRO_M) +#define FW_RI_RES_WR_IQRO_F FW_RI_RES_WR_IQRO_V(1U) + +struct fw_ri_rdma_write_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + /* + * Use union for immediate data to be consistent with stack's 32 bit + * data and iWARP spec's 64 bit data. + */ + union { + struct { + __be32 imm_data32; + u32 reserved; + } ib_imm_data; + __be64 imm_data64; + } iw_imm_data; + __be32 plen; + __be32 stag_sink; + __be64 to_sink; +#ifndef C99_NOT_SUPPORTED + union { + struct fw_ri_immd immd_src[0]; + struct fw_ri_isgl isgl_src[0]; + } u; +#endif +}; + +struct fw_ri_send_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __be32 sendop_pkd; + __be32 stag_inv; + __be32 plen; + __be32 r3; + __be64 r4; +#ifndef C99_NOT_SUPPORTED + union { + struct fw_ri_immd immd_src[0]; + struct fw_ri_isgl isgl_src[0]; + } u; +#endif +}; + +#define FW_RI_SEND_WR_SENDOP_S 0 +#define FW_RI_SEND_WR_SENDOP_M 0xf +#define FW_RI_SEND_WR_SENDOP_V(x) ((x) << FW_RI_SEND_WR_SENDOP_S) +#define FW_RI_SEND_WR_SENDOP_G(x) \ + (((x) >> FW_RI_SEND_WR_SENDOP_S) & FW_RI_SEND_WR_SENDOP_M) + +struct fw_ri_rdma_write_cmpl_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __u8 r2; + __u8 flags_send; + __u16 wrid_send; + __be32 stag_inv; + __be32 plen; + __be32 stag_sink; + __be64 to_sink; + union fw_ri_cmpl { + struct fw_ri_immd_cmpl { + __u8 op; + __u8 r1[6]; + __u8 immdlen; + __u8 data[16]; + } immd_src; + struct fw_ri_isgl isgl_src; + } u_cmpl; + __be64 r3; +#ifndef C99_NOT_SUPPORTED + union fw_ri_write { + struct fw_ri_immd immd_src[0]; + struct fw_ri_isgl isgl_src[0]; + } u; +#endif +}; + +struct fw_ri_rdma_read_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __be64 r2; + __be32 stag_sink; + __be32 to_sink_hi; + __be32 to_sink_lo; + __be32 plen; + __be32 stag_src; + __be32 to_src_hi; + __be32 to_src_lo; + __be32 r5; +}; + +struct fw_ri_recv_wr { + __u8 opcode; + __u8 r1; + __u16 wrid; + __u8 r2[3]; + __u8 len16; + struct fw_ri_isgl isgl; +}; + +struct fw_ri_bind_mw_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __u8 qpbinde_to_dcacpu; + __u8 pgsz_shift; + __u8 addr_type; + __u8 mem_perms; + __be32 stag_mr; + __be32 stag_mw; + __be32 r3; + __be64 len_mw; + __be64 va_fbo; + __be64 r4; +}; + +#define FW_RI_BIND_MW_WR_QPBINDE_S 6 +#define FW_RI_BIND_MW_WR_QPBINDE_M 0x1 +#define FW_RI_BIND_MW_WR_QPBINDE_V(x) ((x) << FW_RI_BIND_MW_WR_QPBINDE_S) +#define FW_RI_BIND_MW_WR_QPBINDE_G(x) \ + (((x) >> FW_RI_BIND_MW_WR_QPBINDE_S) & FW_RI_BIND_MW_WR_QPBINDE_M) +#define FW_RI_BIND_MW_WR_QPBINDE_F FW_RI_BIND_MW_WR_QPBINDE_V(1U) + +#define FW_RI_BIND_MW_WR_NS_S 5 +#define FW_RI_BIND_MW_WR_NS_M 0x1 +#define FW_RI_BIND_MW_WR_NS_V(x) ((x) << FW_RI_BIND_MW_WR_NS_S) +#define FW_RI_BIND_MW_WR_NS_G(x) \ + (((x) >> FW_RI_BIND_MW_WR_NS_S) & FW_RI_BIND_MW_WR_NS_M) +#define FW_RI_BIND_MW_WR_NS_F FW_RI_BIND_MW_WR_NS_V(1U) + +#define FW_RI_BIND_MW_WR_DCACPU_S 0 +#define FW_RI_BIND_MW_WR_DCACPU_M 0x1f +#define FW_RI_BIND_MW_WR_DCACPU_V(x) ((x) << FW_RI_BIND_MW_WR_DCACPU_S) +#define FW_RI_BIND_MW_WR_DCACPU_G(x) \ + (((x) >> FW_RI_BIND_MW_WR_DCACPU_S) & FW_RI_BIND_MW_WR_DCACPU_M) + +struct fw_ri_fr_nsmr_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __u8 qpbinde_to_dcacpu; + __u8 pgsz_shift; + __u8 addr_type; + __u8 mem_perms; + __be32 stag; + __be32 len_hi; + __be32 len_lo; + __be32 va_hi; + __be32 va_lo_fbo; +}; + +#define FW_RI_FR_NSMR_WR_QPBINDE_S 6 +#define FW_RI_FR_NSMR_WR_QPBINDE_M 0x1 +#define FW_RI_FR_NSMR_WR_QPBINDE_V(x) ((x) << FW_RI_FR_NSMR_WR_QPBINDE_S) +#define FW_RI_FR_NSMR_WR_QPBINDE_G(x) \ + (((x) >> FW_RI_FR_NSMR_WR_QPBINDE_S) & FW_RI_FR_NSMR_WR_QPBINDE_M) +#define FW_RI_FR_NSMR_WR_QPBINDE_F FW_RI_FR_NSMR_WR_QPBINDE_V(1U) + +#define FW_RI_FR_NSMR_WR_NS_S 5 +#define FW_RI_FR_NSMR_WR_NS_M 0x1 +#define FW_RI_FR_NSMR_WR_NS_V(x) ((x) << FW_RI_FR_NSMR_WR_NS_S) +#define FW_RI_FR_NSMR_WR_NS_G(x) \ + (((x) >> FW_RI_FR_NSMR_WR_NS_S) & FW_RI_FR_NSMR_WR_NS_M) +#define FW_RI_FR_NSMR_WR_NS_F FW_RI_FR_NSMR_WR_NS_V(1U) + +#define FW_RI_FR_NSMR_WR_DCACPU_S 0 +#define FW_RI_FR_NSMR_WR_DCACPU_M 0x1f +#define FW_RI_FR_NSMR_WR_DCACPU_V(x) ((x) << FW_RI_FR_NSMR_WR_DCACPU_S) +#define FW_RI_FR_NSMR_WR_DCACPU_G(x) \ + (((x) >> FW_RI_FR_NSMR_WR_DCACPU_S) & FW_RI_FR_NSMR_WR_DCACPU_M) + +struct fw_ri_fr_nsmr_tpte_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __be32 r2; + __be32 stag; + struct fw_ri_tpte tpte; + __u64 pbl[2]; +}; + +struct fw_ri_inv_lstag_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __be32 r2; + __be32 stag_inv; +}; + +enum fw_ri_type { + FW_RI_TYPE_INIT, + FW_RI_TYPE_FINI, + FW_RI_TYPE_TERMINATE +}; + +enum fw_ri_init_p2ptype { + FW_RI_INIT_P2PTYPE_RDMA_WRITE = FW_RI_RDMA_WRITE, + FW_RI_INIT_P2PTYPE_READ_REQ = FW_RI_READ_REQ, + FW_RI_INIT_P2PTYPE_SEND = FW_RI_SEND, + FW_RI_INIT_P2PTYPE_SEND_WITH_INV = FW_RI_SEND_WITH_INV, + FW_RI_INIT_P2PTYPE_SEND_WITH_SE = FW_RI_SEND_WITH_SE, + FW_RI_INIT_P2PTYPE_SEND_WITH_SE_INV = FW_RI_SEND_WITH_SE_INV, + FW_RI_INIT_P2PTYPE_DISABLED = 0xf, +}; + +enum fw_ri_init_rqeqid_srq { + FW_RI_INIT_RQEQID_SRQ = 1 << 31, +}; + +struct fw_ri_wr { + __be32 op_compl; + __be32 flowid_len16; + __u64 cookie; + union fw_ri { + struct fw_ri_init { + __u8 type; + __u8 mpareqbit_p2ptype; + __u8 r4[2]; + __u8 mpa_attrs; + __u8 qp_caps; + __be16 nrqe; + __be32 pdid; + __be32 qpid; + __be32 sq_eqid; + __be32 rq_eqid; + __be32 scqid; + __be32 rcqid; + __be32 ord_max; + __be32 ird_max; + __be32 iss; + __be32 irs; + __be32 hwrqsize; + __be32 hwrqaddr; + __be64 r5; + union fw_ri_init_p2p { + struct fw_ri_rdma_write_wr write; + struct fw_ri_rdma_read_wr read; + struct fw_ri_send_wr send; + } u; + } init; + struct fw_ri_fini { + __u8 type; + __u8 r3[7]; + __be64 r4; + } fini; + struct fw_ri_terminate { + __u8 type; + __u8 r3[3]; + __be32 immdlen; + __u8 termmsg[40]; + } terminate; + } u; +}; + +#define FW_RI_WR_MPAREQBIT_S 7 +#define FW_RI_WR_MPAREQBIT_M 0x1 +#define FW_RI_WR_MPAREQBIT_V(x) ((x) << FW_RI_WR_MPAREQBIT_S) +#define FW_RI_WR_MPAREQBIT_G(x) \ + (((x) >> FW_RI_WR_MPAREQBIT_S) & FW_RI_WR_MPAREQBIT_M) +#define FW_RI_WR_MPAREQBIT_F FW_RI_WR_MPAREQBIT_V(1U) + +#define FW_RI_WR_P2PTYPE_S 0 +#define FW_RI_WR_P2PTYPE_M 0xf +#define FW_RI_WR_P2PTYPE_V(x) ((x) << FW_RI_WR_P2PTYPE_S) +#define FW_RI_WR_P2PTYPE_G(x) \ + (((x) >> FW_RI_WR_P2PTYPE_S) & FW_RI_WR_P2PTYPE_M) + +#endif /* _T4FW_RI_API_H_ */ |