diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
commit | 2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch) | |
tree | 848558de17fb3008cdf4d861b01ac7781903ce39 /drivers/infiniband/sw/rxe/rxe_req.c | |
parent | Initial commit. (diff) | |
download | linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip |
Adding upstream version 6.1.76.upstream/6.1.76
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/infiniband/sw/rxe/rxe_req.c')
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_req.c | 832 |
1 files changed, 832 insertions, 0 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c new file mode 100644 index 000000000..2ace1007a --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -0,0 +1,832 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#include <linux/skbuff.h> +#include <crypto/hash.h> + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" + +static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + u32 opcode); + +static inline void retry_first_write_send(struct rxe_qp *qp, + struct rxe_send_wqe *wqe, int npsn) +{ + int i; + + for (i = 0; i < npsn; i++) { + int to_send = (wqe->dma.resid > qp->mtu) ? + qp->mtu : wqe->dma.resid; + + qp->req.opcode = next_opcode(qp, wqe, + wqe->wr.opcode); + + if (wqe->wr.send_flags & IB_SEND_INLINE) { + wqe->dma.resid -= to_send; + wqe->dma.sge_offset += to_send; + } else { + advance_dma_data(&wqe->dma, to_send); + } + } +} + +static void req_retry(struct rxe_qp *qp) +{ + struct rxe_send_wqe *wqe; + unsigned int wqe_index; + unsigned int mask; + int npsn; + int first = 1; + struct rxe_queue *q = qp->sq.queue; + unsigned int cons; + unsigned int prod; + + cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT); + prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT); + + qp->req.wqe_index = cons; + qp->req.psn = qp->comp.psn; + qp->req.opcode = -1; + + for (wqe_index = cons; wqe_index != prod; + wqe_index = queue_next_index(q, wqe_index)) { + wqe = queue_addr_from_index(qp->sq.queue, wqe_index); + mask = wr_opcode_mask(wqe->wr.opcode, qp); + + if (wqe->state == wqe_state_posted) + break; + + if (wqe->state == wqe_state_done) + continue; + + wqe->iova = (mask & WR_ATOMIC_MASK) ? + wqe->wr.wr.atomic.remote_addr : + (mask & WR_READ_OR_WRITE_MASK) ? + wqe->wr.wr.rdma.remote_addr : + 0; + + if (!first || (mask & WR_READ_MASK) == 0) { + wqe->dma.resid = wqe->dma.length; + wqe->dma.cur_sge = 0; + wqe->dma.sge_offset = 0; + } + + if (first) { + first = 0; + + if (mask & WR_WRITE_OR_SEND_MASK) { + npsn = (qp->comp.psn - wqe->first_psn) & + BTH_PSN_MASK; + retry_first_write_send(qp, wqe, npsn); + } + + if (mask & WR_READ_MASK) { + npsn = (wqe->dma.length - wqe->dma.resid) / + qp->mtu; + wqe->iova += npsn * qp->mtu; + } + } + + wqe->state = wqe_state_posted; + } +} + +void rnr_nak_timer(struct timer_list *t) +{ + struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer); + + pr_debug("%s: fired for qp#%d\n", __func__, qp_num(qp)); + + /* request a send queue retry */ + qp->req.need_retry = 1; + qp->req.wait_for_rnr_timer = 0; + rxe_sched_task(&qp->req.task); +} + +static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) +{ + struct rxe_send_wqe *wqe; + struct rxe_queue *q = qp->sq.queue; + unsigned int index = qp->req.wqe_index; + unsigned int cons; + unsigned int prod; + + wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); + cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT); + prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT); + + if (unlikely(qp->req.state == QP_STATE_DRAIN)) { + /* check to see if we are drained; + * state_lock used by requester and completer + */ + spin_lock_bh(&qp->state_lock); + do { + if (qp->req.state != QP_STATE_DRAIN) { + /* comp just finished */ + spin_unlock_bh(&qp->state_lock); + break; + } + + if (wqe && ((index != cons) || + (wqe->state != wqe_state_posted))) { + /* comp not done yet */ + spin_unlock_bh(&qp->state_lock); + break; + } + + qp->req.state = QP_STATE_DRAINED; + spin_unlock_bh(&qp->state_lock); + + if (qp->ibqp.event_handler) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_SQ_DRAINED; + qp->ibqp.event_handler(&ev, + qp->ibqp.qp_context); + } + } while (0); + } + + if (index == prod) + return NULL; + + wqe = queue_addr_from_index(q, index); + + if (unlikely((qp->req.state == QP_STATE_DRAIN || + qp->req.state == QP_STATE_DRAINED) && + (wqe->state != wqe_state_processing))) + return NULL; + + wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp); + return wqe; +} + +/** + * rxe_wqe_is_fenced - check if next wqe is fenced + * @qp: the queue pair + * @wqe: the next wqe + * + * Returns: 1 if wqe needs to wait + * 0 if wqe is ready to go + */ +static int rxe_wqe_is_fenced(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + /* Local invalidate fence (LIF) see IBA 10.6.5.1 + * Requires ALL previous operations on the send queue + * are complete. Make mandatory for the rxe driver. + */ + if (wqe->wr.opcode == IB_WR_LOCAL_INV) + return qp->req.wqe_index != queue_get_consumer(qp->sq.queue, + QUEUE_TYPE_FROM_CLIENT); + + /* Fence see IBA 10.8.3.3 + * Requires that all previous read and atomic operations + * are complete. + */ + return (wqe->wr.send_flags & IB_SEND_FENCE) && + atomic_read(&qp->req.rd_atomic) != qp->attr.max_rd_atomic; +} + +static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits) +{ + switch (opcode) { + case IB_WR_RDMA_WRITE: + if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST || + qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE) + return fits ? + IB_OPCODE_RC_RDMA_WRITE_LAST : + IB_OPCODE_RC_RDMA_WRITE_MIDDLE; + else + return fits ? + IB_OPCODE_RC_RDMA_WRITE_ONLY : + IB_OPCODE_RC_RDMA_WRITE_FIRST; + + case IB_WR_RDMA_WRITE_WITH_IMM: + if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST || + qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE) + return fits ? + IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE : + IB_OPCODE_RC_RDMA_WRITE_MIDDLE; + else + return fits ? + IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE : + IB_OPCODE_RC_RDMA_WRITE_FIRST; + + case IB_WR_SEND: + if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST || + qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE) + return fits ? + IB_OPCODE_RC_SEND_LAST : + IB_OPCODE_RC_SEND_MIDDLE; + else + return fits ? + IB_OPCODE_RC_SEND_ONLY : + IB_OPCODE_RC_SEND_FIRST; + + case IB_WR_SEND_WITH_IMM: + if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST || + qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE) + return fits ? + IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE : + IB_OPCODE_RC_SEND_MIDDLE; + else + return fits ? + IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE : + IB_OPCODE_RC_SEND_FIRST; + + case IB_WR_RDMA_READ: + return IB_OPCODE_RC_RDMA_READ_REQUEST; + + case IB_WR_ATOMIC_CMP_AND_SWP: + return IB_OPCODE_RC_COMPARE_SWAP; + + case IB_WR_ATOMIC_FETCH_AND_ADD: + return IB_OPCODE_RC_FETCH_ADD; + + case IB_WR_SEND_WITH_INV: + if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST || + qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE) + return fits ? IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE : + IB_OPCODE_RC_SEND_MIDDLE; + else + return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE : + IB_OPCODE_RC_SEND_FIRST; + case IB_WR_REG_MR: + case IB_WR_LOCAL_INV: + return opcode; + } + + return -EINVAL; +} + +static int next_opcode_uc(struct rxe_qp *qp, u32 opcode, int fits) +{ + switch (opcode) { + case IB_WR_RDMA_WRITE: + if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST || + qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE) + return fits ? + IB_OPCODE_UC_RDMA_WRITE_LAST : + IB_OPCODE_UC_RDMA_WRITE_MIDDLE; + else + return fits ? + IB_OPCODE_UC_RDMA_WRITE_ONLY : + IB_OPCODE_UC_RDMA_WRITE_FIRST; + + case IB_WR_RDMA_WRITE_WITH_IMM: + if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST || + qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE) + return fits ? + IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE : + IB_OPCODE_UC_RDMA_WRITE_MIDDLE; + else + return fits ? + IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE : + IB_OPCODE_UC_RDMA_WRITE_FIRST; + + case IB_WR_SEND: + if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST || + qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE) + return fits ? + IB_OPCODE_UC_SEND_LAST : + IB_OPCODE_UC_SEND_MIDDLE; + else + return fits ? + IB_OPCODE_UC_SEND_ONLY : + IB_OPCODE_UC_SEND_FIRST; + + case IB_WR_SEND_WITH_IMM: + if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST || + qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE) + return fits ? + IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE : + IB_OPCODE_UC_SEND_MIDDLE; + else + return fits ? + IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE : + IB_OPCODE_UC_SEND_FIRST; + } + + return -EINVAL; +} + +static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + u32 opcode) +{ + int fits = (wqe->dma.resid <= qp->mtu); + + switch (qp_type(qp)) { + case IB_QPT_RC: + return next_opcode_rc(qp, opcode, fits); + + case IB_QPT_UC: + return next_opcode_uc(qp, opcode, fits); + + case IB_QPT_UD: + case IB_QPT_GSI: + switch (opcode) { + case IB_WR_SEND: + return IB_OPCODE_UD_SEND_ONLY; + + case IB_WR_SEND_WITH_IMM: + return IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; + } + break; + + default: + break; + } + + return -EINVAL; +} + +static inline int check_init_depth(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + int depth; + + if (wqe->has_rd_atomic) + return 0; + + qp->req.need_rd_atomic = 1; + depth = atomic_dec_return(&qp->req.rd_atomic); + + if (depth >= 0) { + qp->req.need_rd_atomic = 0; + wqe->has_rd_atomic = 1; + return 0; + } + + atomic_inc(&qp->req.rd_atomic); + return -EAGAIN; +} + +static inline int get_mtu(struct rxe_qp *qp) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + + if ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC)) + return qp->mtu; + + return rxe->port.mtu_cap; +} + +static struct sk_buff *init_req_packet(struct rxe_qp *qp, + struct rxe_av *av, + struct rxe_send_wqe *wqe, + int opcode, u32 payload, + struct rxe_pkt_info *pkt) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct sk_buff *skb; + struct rxe_send_wr *ibwr = &wqe->wr; + int pad = (-payload) & 0x3; + int paylen; + int solicited; + u32 qp_num; + int ack_req; + + /* length from start of bth to end of icrc */ + paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; + pkt->paylen = paylen; + + /* init skb */ + skb = rxe_init_packet(rxe, av, paylen, pkt); + if (unlikely(!skb)) + return NULL; + + /* init bth */ + solicited = (ibwr->send_flags & IB_SEND_SOLICITED) && + (pkt->mask & RXE_END_MASK) && + ((pkt->mask & (RXE_SEND_MASK)) || + (pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) == + (RXE_WRITE_MASK | RXE_IMMDT_MASK)); + + qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn : + qp->attr.dest_qp_num; + + ack_req = ((pkt->mask & RXE_END_MASK) || + (qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK)); + if (ack_req) + qp->req.noack_pkts = 0; + + bth_init(pkt, pkt->opcode, solicited, 0, pad, IB_DEFAULT_PKEY_FULL, qp_num, + ack_req, pkt->psn); + + /* init optional headers */ + if (pkt->mask & RXE_RETH_MASK) { + reth_set_rkey(pkt, ibwr->wr.rdma.rkey); + reth_set_va(pkt, wqe->iova); + reth_set_len(pkt, wqe->dma.resid); + } + + if (pkt->mask & RXE_IMMDT_MASK) + immdt_set_imm(pkt, ibwr->ex.imm_data); + + if (pkt->mask & RXE_IETH_MASK) + ieth_set_rkey(pkt, ibwr->ex.invalidate_rkey); + + if (pkt->mask & RXE_ATMETH_MASK) { + atmeth_set_va(pkt, wqe->iova); + if (opcode == IB_OPCODE_RC_COMPARE_SWAP) { + atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap); + atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add); + } else { + atmeth_set_swap_add(pkt, ibwr->wr.atomic.compare_add); + } + atmeth_set_rkey(pkt, ibwr->wr.atomic.rkey); + } + + if (pkt->mask & RXE_DETH_MASK) { + if (qp->ibqp.qp_num == 1) + deth_set_qkey(pkt, GSI_QKEY); + else + deth_set_qkey(pkt, ibwr->wr.ud.remote_qkey); + deth_set_sqp(pkt, qp->ibqp.qp_num); + } + + return skb; +} + +static int finish_packet(struct rxe_qp *qp, struct rxe_av *av, + struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, u32 payload) +{ + int err; + + err = rxe_prepare(av, pkt, skb); + if (err) + return err; + + if (pkt->mask & RXE_WRITE_OR_SEND_MASK) { + if (wqe->wr.send_flags & IB_SEND_INLINE) { + u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset]; + + memcpy(payload_addr(pkt), tmp, payload); + + wqe->dma.resid -= payload; + wqe->dma.sge_offset += payload; + } else { + err = copy_data(qp->pd, 0, &wqe->dma, + payload_addr(pkt), payload, + RXE_FROM_MR_OBJ); + if (err) + return err; + } + if (bth_pad(pkt)) { + u8 *pad = payload_addr(pkt) + payload; + + memset(pad, 0, bth_pad(pkt)); + } + } + + return 0; +} + +static void update_wqe_state(struct rxe_qp *qp, + struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt) +{ + if (pkt->mask & RXE_END_MASK) { + if (qp_type(qp) == IB_QPT_RC) + wqe->state = wqe_state_pending; + } else { + wqe->state = wqe_state_processing; + } +} + +static void update_wqe_psn(struct rxe_qp *qp, + struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, + u32 payload) +{ + /* number of packets left to send including current one */ + int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; + + /* handle zero length packet case */ + if (num_pkt == 0) + num_pkt = 1; + + if (pkt->mask & RXE_START_MASK) { + wqe->first_psn = qp->req.psn; + wqe->last_psn = (qp->req.psn + num_pkt - 1) & BTH_PSN_MASK; + } + + if (pkt->mask & RXE_READ_MASK) + qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK; + else + qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; +} + +static void save_state(struct rxe_send_wqe *wqe, + struct rxe_qp *qp, + struct rxe_send_wqe *rollback_wqe, + u32 *rollback_psn) +{ + rollback_wqe->state = wqe->state; + rollback_wqe->first_psn = wqe->first_psn; + rollback_wqe->last_psn = wqe->last_psn; + rollback_wqe->dma = wqe->dma; + *rollback_psn = qp->req.psn; +} + +static void rollback_state(struct rxe_send_wqe *wqe, + struct rxe_qp *qp, + struct rxe_send_wqe *rollback_wqe, + u32 rollback_psn) +{ + wqe->state = rollback_wqe->state; + wqe->first_psn = rollback_wqe->first_psn; + wqe->last_psn = rollback_wqe->last_psn; + wqe->dma = rollback_wqe->dma; + qp->req.psn = rollback_psn; +} + +static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt) +{ + qp->req.opcode = pkt->opcode; + + if (pkt->mask & RXE_END_MASK) + qp->req.wqe_index = queue_next_index(qp->sq.queue, + qp->req.wqe_index); + + qp->need_req_skb = 0; + + if (qp->qp_timeout_jiffies && !timer_pending(&qp->retrans_timer)) + mod_timer(&qp->retrans_timer, + jiffies + qp->qp_timeout_jiffies); +} + +static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + u8 opcode = wqe->wr.opcode; + u32 rkey; + int ret; + + switch (opcode) { + case IB_WR_LOCAL_INV: + rkey = wqe->wr.ex.invalidate_rkey; + if (rkey_is_mw(rkey)) + ret = rxe_invalidate_mw(qp, rkey); + else + ret = rxe_invalidate_mr(qp, rkey); + + if (unlikely(ret)) { + wqe->status = IB_WC_LOC_QP_OP_ERR; + return ret; + } + break; + case IB_WR_REG_MR: + ret = rxe_reg_fast_mr(qp, wqe); + if (unlikely(ret)) { + wqe->status = IB_WC_LOC_QP_OP_ERR; + return ret; + } + break; + case IB_WR_BIND_MW: + ret = rxe_bind_mw(qp, wqe); + if (unlikely(ret)) { + wqe->status = IB_WC_MW_BIND_ERR; + return ret; + } + break; + default: + pr_err("Unexpected send wqe opcode %d\n", opcode); + wqe->status = IB_WC_LOC_QP_OP_ERR; + return -EINVAL; + } + + wqe->state = wqe_state_done; + wqe->status = IB_WC_SUCCESS; + qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); + + /* There is no ack coming for local work requests + * which can lead to a deadlock. So go ahead and complete + * it now. + */ + rxe_sched_task(&qp->comp.task); + + return 0; +} + +int rxe_requester(void *arg) +{ + struct rxe_qp *qp = (struct rxe_qp *)arg; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_pkt_info pkt; + struct sk_buff *skb; + struct rxe_send_wqe *wqe; + enum rxe_hdr_mask mask; + u32 payload; + int mtu; + int opcode; + int err; + int ret; + struct rxe_send_wqe rollback_wqe; + u32 rollback_psn; + struct rxe_queue *q = qp->sq.queue; + struct rxe_ah *ah; + struct rxe_av *av; + + if (!rxe_get(qp)) + return -EAGAIN; + + if (unlikely(!qp->valid)) + goto exit; + + if (unlikely(qp->req.state == QP_STATE_ERROR)) { + wqe = req_next_wqe(qp); + if (wqe) + /* + * Generate an error completion for error qp state + */ + goto err; + else + goto exit; + } + + if (unlikely(qp->req.state == QP_STATE_RESET)) { + qp->req.wqe_index = queue_get_consumer(q, + QUEUE_TYPE_FROM_CLIENT); + qp->req.opcode = -1; + qp->req.need_rd_atomic = 0; + qp->req.wait_psn = 0; + qp->req.need_retry = 0; + qp->req.wait_for_rnr_timer = 0; + goto exit; + } + + /* we come here if the retransmit timer has fired + * or if the rnr timer has fired. If the retransmit + * timer fires while we are processing an RNR NAK wait + * until the rnr timer has fired before starting the + * retry flow + */ + if (unlikely(qp->req.need_retry && !qp->req.wait_for_rnr_timer)) { + req_retry(qp); + qp->req.need_retry = 0; + } + + wqe = req_next_wqe(qp); + if (unlikely(!wqe)) + goto exit; + + if (rxe_wqe_is_fenced(qp, wqe)) { + qp->req.wait_fence = 1; + goto exit; + } + + if (wqe->mask & WR_LOCAL_OP_MASK) { + err = rxe_do_local_ops(qp, wqe); + if (unlikely(err)) + goto err; + else + goto done; + } + + if (unlikely(qp_type(qp) == IB_QPT_RC && + psn_compare(qp->req.psn, (qp->comp.psn + + RXE_MAX_UNACKED_PSNS)) > 0)) { + qp->req.wait_psn = 1; + goto exit; + } + + /* Limit the number of inflight SKBs per QP */ + if (unlikely(atomic_read(&qp->skb_out) > + RXE_INFLIGHT_SKBS_PER_QP_HIGH)) { + qp->need_req_skb = 1; + goto exit; + } + + opcode = next_opcode(qp, wqe, wqe->wr.opcode); + if (unlikely(opcode < 0)) { + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto err; + } + + mask = rxe_opcode[opcode].mask; + if (unlikely(mask & RXE_READ_OR_ATOMIC_MASK)) { + if (check_init_depth(qp, wqe)) + goto exit; + } + + mtu = get_mtu(qp); + payload = (mask & RXE_WRITE_OR_SEND_MASK) ? wqe->dma.resid : 0; + if (payload > mtu) { + if (qp_type(qp) == IB_QPT_UD) { + /* C10-93.1.1: If the total sum of all the buffer lengths specified for a + * UD message exceeds the MTU of the port as returned by QueryHCA, the CI + * shall not emit any packets for this message. Further, the CI shall not + * generate an error due to this condition. + */ + + /* fake a successful UD send */ + wqe->first_psn = qp->req.psn; + wqe->last_psn = qp->req.psn; + qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; + qp->req.opcode = IB_OPCODE_UD_SEND_ONLY; + qp->req.wqe_index = queue_next_index(qp->sq.queue, + qp->req.wqe_index); + wqe->state = wqe_state_done; + wqe->status = IB_WC_SUCCESS; + rxe_run_task(&qp->comp.task); + goto done; + } + payload = mtu; + } + + pkt.rxe = rxe; + pkt.opcode = opcode; + pkt.qp = qp; + pkt.psn = qp->req.psn; + pkt.mask = rxe_opcode[opcode].mask; + pkt.wqe = wqe; + + /* save wqe state before we build and send packet */ + save_state(wqe, qp, &rollback_wqe, &rollback_psn); + + av = rxe_get_av(&pkt, &ah); + if (unlikely(!av)) { + pr_err("qp#%d Failed no address vector\n", qp_num(qp)); + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto err; + } + + skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt); + if (unlikely(!skb)) { + pr_err("qp#%d Failed allocating skb\n", qp_num(qp)); + wqe->status = IB_WC_LOC_QP_OP_ERR; + if (ah) + rxe_put(ah); + goto err; + } + + err = finish_packet(qp, av, wqe, &pkt, skb, payload); + if (unlikely(err)) { + pr_debug("qp#%d Error during finish packet\n", qp_num(qp)); + if (err == -EFAULT) + wqe->status = IB_WC_LOC_PROT_ERR; + else + wqe->status = IB_WC_LOC_QP_OP_ERR; + kfree_skb(skb); + if (ah) + rxe_put(ah); + goto err; + } + + if (ah) + rxe_put(ah); + + /* update wqe state as though we had sent it */ + update_wqe_state(qp, wqe, &pkt); + update_wqe_psn(qp, wqe, &pkt, payload); + + err = rxe_xmit_packet(qp, &pkt, skb); + if (err) { + if (err != -EAGAIN) { + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto err; + } + + /* the packet was dropped so reset wqe to the state + * before we sent it so we can try to resend + */ + rollback_state(wqe, qp, &rollback_wqe, rollback_psn); + + /* force a delay until the dropped packet is freed and + * the send queue is drained below the low water mark + */ + qp->need_req_skb = 1; + + rxe_sched_task(&qp->req.task); + goto exit; + } + + update_state(qp, &pkt); + + /* A non-zero return value will cause rxe_do_task to + * exit its loop and end the tasklet. A zero return + * will continue looping and return to rxe_requester + */ +done: + ret = 0; + goto out; +err: + /* update wqe_index for each wqe completion */ + qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); + wqe->state = wqe_state_error; + qp->req.state = QP_STATE_ERROR; + rxe_run_task(&qp->comp.task); +exit: + ret = -EAGAIN; +out: + rxe_put(qp); + + return ret; +} |