summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h2
-rw-r--r--drivers/infiniband/hw/efa/efa.h1
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c32
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c25
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.h3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c35
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h20
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c95
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h68
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c155
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h9
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c354
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c60
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_restrack.c23
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c6
-rw-r--r--drivers/infiniband/hw/mana/cq.c54
-rw-r--r--drivers/infiniband/hw/mana/main.c43
-rw-r--r--drivers/infiniband/hw/mana/mana_ib.h14
-rw-r--r--drivers/infiniband/hw/mana/qp.c26
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c8
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h4
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c35
24 files changed, 599 insertions, 480 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 04258676d..439d0c7c5 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -1013,8 +1013,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
hwq_attr.stride = sizeof(struct sq_sge);
hwq_attr.depth = bnxt_qplib_get_depth(sq);
hwq_attr.aux_stride = psn_sz;
- hwq_attr.aux_depth = psn_sz ? bnxt_qplib_set_sq_size(sq, qp->wqe_mode)
- : 0;
+ hwq_attr.aux_depth = bnxt_qplib_set_sq_size(sq, qp->wqe_mode);
/* Update msn tbl size */
if (BNXT_RE_HW_RETX(qp->dev_cap_flags) && psn_sz) {
hwq_attr.aux_depth = roundup_pow_of_two(bnxt_qplib_set_sq_size(sq, qp->wqe_mode));
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 50cb2259b..fb8a0c248 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -930,8 +930,6 @@ void c4iw_id_table_free(struct c4iw_id_table *alloc);
typedef int (*c4iw_handler_func)(struct c4iw_dev *dev, struct sk_buff *skb);
-int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
- struct l2t_entry *l2t);
void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid,
struct c4iw_dev_ucontext *uctx);
u32 c4iw_get_resource(struct c4iw_id_table *id_table);
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index e2bdec32a..926f9ff1f 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -57,6 +57,7 @@ struct efa_dev {
u64 db_bar_addr;
u64 db_bar_len;
+ unsigned int num_irq_vectors;
int admin_msix_vector_idx;
struct efa_irq admin_irq;
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index 7b1910a86..5fa3603c8 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -322,7 +322,9 @@ static int efa_create_eqs(struct efa_dev *dev)
int err;
int i;
- neqs = min_t(unsigned int, neqs, num_online_cpus());
+ neqs = min_t(unsigned int, neqs,
+ dev->num_irq_vectors - EFA_COMP_EQS_VEC_BASE);
+
dev->neqs = neqs;
dev->eqs = kcalloc(neqs, sizeof(*dev->eqs), GFP_KERNEL);
if (!dev->eqs)
@@ -468,34 +470,30 @@ static void efa_disable_msix(struct efa_dev *dev)
static int efa_enable_msix(struct efa_dev *dev)
{
- int msix_vecs, irq_num;
+ int max_vecs, num_vecs;
/*
* Reserve the max msix vectors we might need, one vector is reserved
* for admin.
*/
- msix_vecs = min_t(int, pci_msix_vec_count(dev->pdev),
- num_online_cpus() + 1);
+ max_vecs = min_t(int, pci_msix_vec_count(dev->pdev),
+ num_online_cpus() + 1);
dev_dbg(&dev->pdev->dev, "Trying to enable MSI-X, vectors %d\n",
- msix_vecs);
+ max_vecs);
dev->admin_msix_vector_idx = EFA_MGMNT_MSIX_VEC_IDX;
- irq_num = pci_alloc_irq_vectors(dev->pdev, msix_vecs,
- msix_vecs, PCI_IRQ_MSIX);
+ num_vecs = pci_alloc_irq_vectors(dev->pdev, 1,
+ max_vecs, PCI_IRQ_MSIX);
- if (irq_num < 0) {
- dev_err(&dev->pdev->dev, "Failed to enable MSI-X. irq_num %d\n",
- irq_num);
+ if (num_vecs < 0) {
+ dev_err(&dev->pdev->dev, "Failed to enable MSI-X. error %d\n",
+ num_vecs);
return -ENOSPC;
}
- if (irq_num != msix_vecs) {
- efa_disable_msix(dev);
- dev_err(&dev->pdev->dev,
- "Allocated %d MSI-X (out of %d requested)\n",
- irq_num, msix_vecs);
- return -ENOSPC;
- }
+ dev_dbg(&dev->pdev->dev, "Allocated %d MSI-X vectors\n", num_vecs);
+
+ dev->num_irq_vectors = num_vecs;
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 18b05ffb4..c465966a1 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -315,7 +315,7 @@ int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit)
* This routine returns the receive context associated
* with a a qp's qpn.
*
- * Returns the context.
+ * Return: the context.
*/
static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
struct rvt_qp *qp)
@@ -710,7 +710,7 @@ void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp)
* The exp_lock must be held.
*
* Return:
- * On success: a value postive value between 0 and RXE_NUM_TID_FLOWS - 1
+ * On success: a value positive value between 0 and RXE_NUM_TID_FLOWS - 1
* On failure: -EAGAIN
*/
static int kern_reserve_flow(struct hfi1_ctxtdata *rcd, int last)
@@ -1007,7 +1007,7 @@ static u32 tid_flush_pages(struct tid_rdma_pageset *list,
* pages are tested two at a time, i, i + 1 for contiguous
* pages and i - 1 and i contiguous pages.
*
- * If any condition is false, any accumlated pages are flushed and
+ * If any condition is false, any accumulated pages are flushed and
* v0,v1 are emitted as separate PAGE_SIZE pagesets
*
* Otherwise, the current 8k is totaled for a future flush.
@@ -1434,7 +1434,7 @@ static void kern_program_rcvarray(struct tid_rdma_flow *flow)
* (5) computes a tidarray with formatted TID entries which can be sent
* to the sender
* (6) Reserves and programs HW flows.
- * (7) It also manages queing the QP when TID/flow resources are not
+ * (7) It also manages queueing the QP when TID/flow resources are not
* available.
*
* @req points to struct tid_rdma_request of which the segments are a part. The
@@ -1604,7 +1604,7 @@ void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req)
}
/**
- * hfi1_kern_exp_rcv_free_flows - free priviously allocated flow information
+ * hfi1_kern_exp_rcv_free_flows - free previously allocated flow information
* @req: the tid rdma request to be cleaned
*/
static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req)
@@ -2055,7 +2055,7 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet,
* req->clear_tail is advanced). However, when an earlier
* request is received, this request will not be complete any
* more (qp->s_tail_ack_queue is moved back, see below).
- * Consequently, we need to update the TID flow info everytime
+ * Consequently, we need to update the TID flow info every time
* a duplicate request is received.
*/
bth0 = be32_to_cpu(ohdr->bth[0]);
@@ -2219,7 +2219,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet)
/*
* 1. Verify TID RDMA READ REQ as per IB_OPCODE_RC_RDMA_READ
* (see hfi1_rc_rcv())
- * 2. Put TID RDMA READ REQ into the response queueu (s_ack_queue)
+ * 2. Put TID RDMA READ REQ into the response queue (s_ack_queue)
* - Setup struct tid_rdma_req with request info
* - Initialize struct tid_rdma_flow info;
* - Copy TID entries;
@@ -2439,7 +2439,7 @@ find_tid_request(struct rvt_qp *qp, u32 psn, enum ib_wr_opcode opcode)
void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet)
{
- /* HANDLER FOR TID RDMA READ RESPONSE packet (Requestor side */
+ /* HANDLER FOR TID RDMA READ RESPONSE packet (Requester side) */
/*
* 1. Find matching SWQE
@@ -3649,7 +3649,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet)
* 1. Verify TID RDMA WRITE REQ as per IB_OPCODE_RC_RDMA_WRITE_FIRST
* (see hfi1_rc_rcv())
* - Don't allow 0-length requests.
- * 2. Put TID RDMA WRITE REQ into the response queueu (s_ack_queue)
+ * 2. Put TID RDMA WRITE REQ into the response queue (s_ack_queue)
* - Setup struct tid_rdma_req with request info
* - Prepare struct tid_rdma_flow array?
* 3. Set the qp->s_ack_state as state diagram in design doc.
@@ -4026,7 +4026,7 @@ unlock_r_lock:
void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet)
{
- /* HANDLER FOR TID RDMA WRITE RESPONSE packet (Requestor side */
+ /* HANDLER FOR TID RDMA WRITE RESPONSE packet (Requester side) */
/*
* 1. Find matching SWQE
@@ -5440,8 +5440,9 @@ static bool _hfi1_schedule_tid_send(struct rvt_qp *qp)
* the two state machines can step on each other with respect to the
* RVT_S_BUSY flag.
* Therefore, a modified test is used.
- * @return true if the second leg is scheduled;
- * false if the second leg is not scheduled.
+ *
+ * Return: %true if the second leg is scheduled;
+ * %false if the second leg is not scheduled.
*/
bool hfi1_schedule_tid_send(struct rvt_qp *qp)
{
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index 052a3d609..11dbbabeb 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -108,6 +108,9 @@ enum {
HNS_ROCE_CMD_QUERY_CEQC = 0x92,
HNS_ROCE_CMD_DESTROY_CEQC = 0x93,
+ /* SCC CTX commands */
+ HNS_ROCE_CMD_QUERY_SCCC = 0xa2,
+
/* SCC CTX BT commands */
HNS_ROCE_CMD_READ_SCCC_BT0 = 0xa4,
HNS_ROCE_CMD_WRITE_SCCC_BT0 = 0xa5,
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 2517c972c..7250d0643 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -133,14 +133,12 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
struct ib_device *ibdev = &hr_dev->ib_dev;
u64 mtts[MTT_MIN_COUNT] = {};
- dma_addr_t dma_handle;
int ret;
- ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts),
- &dma_handle);
- if (!ret) {
+ ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts));
+ if (ret) {
ibdev_err(ibdev, "failed to find CQ mtr, ret = %d.\n", ret);
- return -EINVAL;
+ return ret;
}
/* Get CQC memory HEM(Hardware Entry Memory) table */
@@ -151,20 +149,21 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
return ret;
}
- ret = xa_err(xa_store_irq(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL));
+ ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL));
if (ret) {
ibdev_err(ibdev, "failed to xa_store CQ, ret = %d.\n", ret);
goto err_put;
}
- ret = hns_roce_create_cqc(hr_dev, hr_cq, mtts, dma_handle);
+ ret = hns_roce_create_cqc(hr_dev, hr_cq, mtts,
+ hns_roce_get_mtr_ba(&hr_cq->mtr));
if (ret)
goto err_xa;
return 0;
err_xa:
- xa_erase_irq(&cq_table->array, hr_cq->cqn);
+ xa_erase(&cq_table->array, hr_cq->cqn);
err_put:
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
@@ -183,7 +182,7 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret,
hr_cq->cqn);
- xa_erase_irq(&cq_table->array, hr_cq->cqn);
+ xa_erase(&cq_table->array, hr_cq->cqn);
/* Waiting interrupt process procedure carried out */
synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq);
@@ -477,6 +476,13 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
struct ib_event event;
struct ib_cq *ibcq;
+ hr_cq = xa_load(&hr_dev->cq_table.array,
+ cqn & (hr_dev->caps.num_cqs - 1));
+ if (!hr_cq) {
+ dev_warn(dev, "async event for bogus CQ 0x%06x\n", cqn);
+ return;
+ }
+
if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID &&
event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR &&
event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) {
@@ -485,16 +491,7 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
return;
}
- xa_lock(&hr_dev->cq_table.array);
- hr_cq = xa_load(&hr_dev->cq_table.array,
- cqn & (hr_dev->caps.num_cqs - 1));
- if (hr_cq)
- refcount_inc(&hr_cq->refcount);
- xa_unlock(&hr_dev->cq_table.array);
- if (!hr_cq) {
- dev_warn(dev, "async event for bogus CQ 0x%06x\n", cqn);
- return;
- }
+ refcount_inc(&hr_cq->refcount);
ibcq = &hr_cq->ib_cq;
if (ibcq->event_handler) {
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 46f8a6310..c3cbd0a49 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -179,6 +179,7 @@ enum {
#define HNS_ROCE_CMD_SUCCESS 1
+#define HNS_ROCE_MAX_HOP_NUM 3
/* The minimum page size is 4K for hardware */
#define HNS_HW_PAGE_SHIFT 12
#define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT)
@@ -269,6 +270,11 @@ struct hns_roce_hem_list {
dma_addr_t root_ba; /* pointer to the root ba table */
};
+enum mtr_type {
+ MTR_DEFAULT = 0,
+ MTR_PBL,
+};
+
struct hns_roce_buf_attr {
struct {
size_t size; /* region size */
@@ -277,7 +283,10 @@ struct hns_roce_buf_attr {
unsigned int region_count; /* valid region count */
unsigned int page_shift; /* buffer page shift */
unsigned int user_access; /* umem access flag */
+ u64 iova;
+ enum mtr_type type;
bool mtt_only; /* only alloc buffer-required MTT memory */
+ bool adaptive; /* adaptive for page_shift and hopnum */
};
struct hns_roce_hem_cfg {
@@ -836,7 +845,8 @@ struct hns_roce_caps {
u16 default_aeq_period;
u16 default_aeq_arm_st;
u16 default_ceq_arm_st;
- enum hns_roce_cong_type cong_type;
+ u8 cong_cap;
+ enum hns_roce_cong_type default_cong_type;
};
enum hns_roce_device_state {
@@ -937,6 +947,7 @@ struct hns_roce_hw {
int (*query_qpc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer);
int (*query_mpt)(struct hns_roce_dev *hr_dev, u32 key, void *buffer);
int (*query_srqc)(struct hns_roce_dev *hr_dev, u32 srqn, void *buffer);
+ int (*query_sccc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer);
int (*query_hw_counter)(struct hns_roce_dev *hr_dev,
u64 *stats, u32 port, int *hw_counters);
const struct ib_device_ops *hns_roce_dev_ops;
@@ -1153,8 +1164,13 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev);
/* hns roce hw need current block and next block addr from mtt */
#define MTT_MIN_COUNT 2
+static inline dma_addr_t hns_roce_get_mtr_ba(struct hns_roce_mtr *mtr)
+{
+ return mtr->hem_cfg.root_ba;
+}
+
int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- u32 offset, u64 *mtt_buf, int mtt_max, u64 *base_addr);
+ u32 offset, u64 *mtt_buf, int mtt_max);
int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
struct hns_roce_buf_attr *buf_attr,
unsigned int page_shift, struct ib_udata *udata,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index c4ac06a33..a4b3f1916 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -249,61 +249,34 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
}
static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev,
- int npages,
unsigned long hem_alloc_size,
gfp_t gfp_mask)
{
- struct hns_roce_hem_chunk *chunk = NULL;
struct hns_roce_hem *hem;
- struct scatterlist *mem;
int order;
void *buf;
WARN_ON(gfp_mask & __GFP_HIGHMEM);
+ order = get_order(hem_alloc_size);
+ if (PAGE_SIZE << order != hem_alloc_size) {
+ dev_err(hr_dev->dev, "invalid hem_alloc_size: %lu!\n",
+ hem_alloc_size);
+ return NULL;
+ }
+
hem = kmalloc(sizeof(*hem),
gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
if (!hem)
return NULL;
- INIT_LIST_HEAD(&hem->chunk_list);
-
- order = get_order(hem_alloc_size);
-
- while (npages > 0) {
- if (!chunk) {
- chunk = kmalloc(sizeof(*chunk),
- gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
- if (!chunk)
- goto fail;
-
- sg_init_table(chunk->mem, HNS_ROCE_HEM_CHUNK_LEN);
- chunk->npages = 0;
- chunk->nsg = 0;
- memset(chunk->buf, 0, sizeof(chunk->buf));
- list_add_tail(&chunk->list, &hem->chunk_list);
- }
+ buf = dma_alloc_coherent(hr_dev->dev, hem_alloc_size,
+ &hem->dma, gfp_mask);
+ if (!buf)
+ goto fail;
- while (1 << order > npages)
- --order;
-
- /*
- * Alloc memory one time. If failed, don't alloc small block
- * memory, directly return fail.
- */
- mem = &chunk->mem[chunk->npages];
- buf = dma_alloc_coherent(hr_dev->dev, PAGE_SIZE << order,
- &sg_dma_address(mem), gfp_mask);
- if (!buf)
- goto fail;
-
- chunk->buf[chunk->npages] = buf;
- sg_dma_len(mem) = PAGE_SIZE << order;
-
- ++chunk->npages;
- ++chunk->nsg;
- npages -= 1 << order;
- }
+ hem->buf = buf;
+ hem->size = hem_alloc_size;
return hem;
@@ -314,20 +287,10 @@ fail:
void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem)
{
- struct hns_roce_hem_chunk *chunk, *tmp;
- int i;
-
if (!hem)
return;
- list_for_each_entry_safe(chunk, tmp, &hem->chunk_list, list) {
- for (i = 0; i < chunk->npages; ++i)
- dma_free_coherent(hr_dev->dev,
- sg_dma_len(&chunk->mem[i]),
- chunk->buf[i],
- sg_dma_address(&chunk->mem[i]));
- kfree(chunk);
- }
+ dma_free_coherent(hr_dev->dev, hem->size, hem->buf, hem->dma);
kfree(hem);
}
@@ -415,7 +378,6 @@ static int alloc_mhop_hem(struct hns_roce_dev *hr_dev,
{
u32 bt_size = mhop->bt_chunk_size;
struct device *dev = hr_dev->dev;
- struct hns_roce_hem_iter iter;
gfp_t flag;
u64 bt_ba;
u32 size;
@@ -456,16 +418,15 @@ static int alloc_mhop_hem(struct hns_roce_dev *hr_dev,
*/
size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size : bt_size;
flag = GFP_KERNEL | __GFP_NOWARN;
- table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size >> PAGE_SHIFT,
- size, flag);
+ table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size, flag);
if (!table->hem[index->buf]) {
ret = -ENOMEM;
goto err_alloc_hem;
}
index->inited |= HEM_INDEX_BUF;
- hns_roce_hem_first(table->hem[index->buf], &iter);
- bt_ba = hns_roce_hem_addr(&iter);
+ bt_ba = table->hem[index->buf]->dma;
+
if (table->type < HEM_TYPE_MTT) {
if (mhop->hop_num == 2)
*(table->bt_l1[index->l1] + mhop->l2_idx) = bt_ba;
@@ -586,7 +547,6 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev,
}
table->hem[i] = hns_roce_alloc_hem(hr_dev,
- table->table_chunk_size >> PAGE_SHIFT,
table->table_chunk_size,
GFP_KERNEL | __GFP_NOWARN);
if (!table->hem[i]) {
@@ -725,7 +685,6 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table,
unsigned long obj, dma_addr_t *dma_handle)
{
- struct hns_roce_hem_chunk *chunk;
struct hns_roce_hem_mhop mhop;
struct hns_roce_hem *hem;
unsigned long mhop_obj = obj;
@@ -734,7 +693,6 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
int offset, dma_offset;
void *addr = NULL;
u32 hem_idx = 0;
- int length;
int i, j;
mutex_lock(&table->mutex);
@@ -767,23 +725,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
if (!hem)
goto out;
- list_for_each_entry(chunk, &hem->chunk_list, list) {
- for (i = 0; i < chunk->npages; ++i) {
- length = sg_dma_len(&chunk->mem[i]);
- if (dma_handle && dma_offset >= 0) {
- if (length > (u32)dma_offset)
- *dma_handle = sg_dma_address(
- &chunk->mem[i]) + dma_offset;
- dma_offset -= length;
- }
-
- if (length > (u32)offset) {
- addr = chunk->buf[i] + offset;
- goto out;
- }
- offset -= length;
- }
- }
+ *dma_handle = hem->dma + dma_offset;
+ addr = hem->buf + offset;
out:
mutex_unlock(&table->mutex);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index fea6d7d50..6fb51db96 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -56,41 +56,25 @@ enum {
HEM_TYPE_TRRL,
};
-#define HNS_ROCE_HEM_CHUNK_LEN \
- ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \
- (sizeof(struct scatterlist) + sizeof(void *)))
-
#define check_whether_bt_num_3(type, hop_num) \
- ((type) < HEM_TYPE_MTT && (hop_num) == 2)
+ (type < HEM_TYPE_MTT && hop_num == 2)
#define check_whether_bt_num_2(type, hop_num) \
- (((type) < HEM_TYPE_MTT && (hop_num) == 1) || \
- ((type) >= HEM_TYPE_MTT && (hop_num) == 2))
+ ((type < HEM_TYPE_MTT && hop_num == 1) || \
+ (type >= HEM_TYPE_MTT && hop_num == 2))
#define check_whether_bt_num_1(type, hop_num) \
- (((type) < HEM_TYPE_MTT && (hop_num) == HNS_ROCE_HOP_NUM_0) || \
- ((type) >= HEM_TYPE_MTT && (hop_num) == 1) || \
- ((type) >= HEM_TYPE_MTT && (hop_num) == HNS_ROCE_HOP_NUM_0))
-
-struct hns_roce_hem_chunk {
- struct list_head list;
- int npages;
- int nsg;
- struct scatterlist mem[HNS_ROCE_HEM_CHUNK_LEN];
- void *buf[HNS_ROCE_HEM_CHUNK_LEN];
-};
+ ((type < HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0) || \
+ (type >= HEM_TYPE_MTT && hop_num == 1) || \
+ (type >= HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0))
struct hns_roce_hem {
- struct list_head chunk_list;
+ void *buf;
+ dma_addr_t dma;
+ unsigned long size;
refcount_t refcount;
};
-struct hns_roce_hem_iter {
- struct hns_roce_hem *hem;
- struct hns_roce_hem_chunk *chunk;
- int page_idx;
-};
-
struct hns_roce_hem_mhop {
u32 hop_num;
u32 buf_chunk_size;
@@ -133,38 +117,4 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_list *hem_list,
int offset, int *mtt_cnt);
-static inline void hns_roce_hem_first(struct hns_roce_hem *hem,
- struct hns_roce_hem_iter *iter)
-{
- iter->hem = hem;
- iter->chunk = list_empty(&hem->chunk_list) ? NULL :
- list_entry(hem->chunk_list.next,
- struct hns_roce_hem_chunk, list);
- iter->page_idx = 0;
-}
-
-static inline int hns_roce_hem_last(struct hns_roce_hem_iter *iter)
-{
- return !iter->chunk;
-}
-
-static inline void hns_roce_hem_next(struct hns_roce_hem_iter *iter)
-{
- if (++iter->page_idx >= iter->chunk->nsg) {
- if (iter->chunk->list.next == &iter->hem->chunk_list) {
- iter->chunk = NULL;
- return;
- }
-
- iter->chunk = list_entry(iter->chunk->list.next,
- struct hns_roce_hem_chunk, list);
- iter->page_idx = 0;
- }
-}
-
-static inline dma_addr_t hns_roce_hem_addr(struct hns_roce_hem_iter *iter)
-{
- return sg_dma_address(&iter->chunk->mem[iter->page_idx]);
-}
-
#endif /* _HNS_ROCE_HEM_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index f95ec4618..ba7ae792d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -2105,7 +2105,7 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev)
caps->gmv_bt_num *
(HNS_HW_PAGE_SIZE / caps->gmv_entry_sz));
- caps->gmv_entry_num = caps->gmv_bt_num * (HNS_HW_PAGE_SIZE /
+ caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE /
caps->gmv_entry_sz);
} else {
u32 func_num = max_t(u32, 1, hr_dev->func_num);
@@ -2209,11 +2209,12 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev)
caps->max_wqes = 1 << le16_to_cpu(resp_c->sq_depth);
caps->num_srqs = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_SRQS);
- caps->cong_type = hr_reg_read(resp_d, PF_CAPS_D_CONG_TYPE);
+ caps->cong_cap = hr_reg_read(resp_d, PF_CAPS_D_CONG_CAP);
caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth);
caps->ceqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_CEQ_DEPTH);
caps->num_comp_vectors = hr_reg_read(resp_d, PF_CAPS_D_NUM_CEQS);
caps->aeqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_AEQ_DEPTH);
+ caps->default_cong_type = hr_reg_read(resp_d, PF_CAPS_D_DEFAULT_ALG);
caps->reserved_pds = hr_reg_read(resp_d, PF_CAPS_D_RSV_PDS);
caps->num_uars = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_UARS);
caps->reserved_qps = hr_reg_read(resp_d, PF_CAPS_D_RSV_QPS);
@@ -3195,21 +3196,22 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev,
u64 pages[HNS_ROCE_V2_MAX_INNER_MTPT_NUM] = { 0 };
struct ib_device *ibdev = &hr_dev->ib_dev;
dma_addr_t pbl_ba;
- int i, count;
+ int ret;
+ int i;
- count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages,
- min_t(int, ARRAY_SIZE(pages), mr->npages),
- &pbl_ba);
- if (count < 1) {
- ibdev_err(ibdev, "failed to find PBL mtr, count = %d.\n",
- count);
- return -ENOBUFS;
+ ret = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages,
+ min_t(int, ARRAY_SIZE(pages), mr->npages));
+ if (ret) {
+ ibdev_err(ibdev, "failed to find PBL mtr, ret = %d.\n", ret);
+ return ret;
}
/* Aligned to the hardware address access unit */
- for (i = 0; i < count; i++)
+ for (i = 0; i < ARRAY_SIZE(pages); i++)
pages[i] >>= 6;
+ pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr);
+
mpt_entry->pbl_size = cpu_to_le32(mr->npages);
mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3);
hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3));
@@ -3308,18 +3310,12 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev,
void *mb_buf, struct hns_roce_mr *mr)
{
- struct ib_device *ibdev = &hr_dev->ib_dev;
+ dma_addr_t pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr);
struct hns_roce_v2_mpt_entry *mpt_entry;
- dma_addr_t pbl_ba = 0;
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
- if (hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, NULL, 0, &pbl_ba) < 0) {
- ibdev_err(ibdev, "failed to find frmr mtr.\n");
- return -ENOBUFS;
- }
-
hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE);
hr_reg_write(mpt_entry, MPT_PD, mr->pd);
@@ -3715,9 +3711,8 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
wc->status == IB_WC_WR_FLUSH_ERR))
return;
- ibdev_err_ratelimited(&hr_dev->ib_dev, "error cqe status 0x%x:\n",
- cqe_status);
- print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 4, cqe,
+ ibdev_err(&hr_dev->ib_dev, "error cqe status 0x%x:\n", cqe_status);
+ print_hex_dump(KERN_ERR, "", DUMP_PREFIX_NONE, 16, 4, cqe,
cq->cqe_size, false);
wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS);
@@ -4064,7 +4059,6 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, int obj,
u32 step_idx)
{
- struct hns_roce_hem_iter iter;
struct hns_roce_hem_mhop mhop;
struct hns_roce_hem *hem;
unsigned long mhop_obj = obj;
@@ -4101,12 +4095,8 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
if (check_whether_last_step(hop_num, step_idx)) {
hem = table->hem[hem_idx];
- for (hns_roce_hem_first(hem, &iter);
- !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) {
- bt_ba = hns_roce_hem_addr(&iter);
- ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type,
- step_idx);
- }
+
+ ret = set_hem_to_hw(hr_dev, obj, hem->dma, table->type, step_idx);
} else {
if (step_idx == 0)
bt_ba = table->bt_l0_dma_addr[i];
@@ -4347,17 +4337,20 @@ static int config_qp_rq_buf(struct hns_roce_dev *hr_dev,
{
u64 mtts[MTT_MIN_COUNT] = { 0 };
u64 wqe_sge_ba;
- int count;
+ int ret;
/* Search qp buf's mtts */
- count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, mtts,
- MTT_MIN_COUNT, &wqe_sge_ba);
- if (hr_qp->rq.wqe_cnt && count < 1) {
+ ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, mtts,
+ MTT_MIN_COUNT);
+ if (hr_qp->rq.wqe_cnt && ret) {
ibdev_err(&hr_dev->ib_dev,
- "failed to find RQ WQE, QPN = 0x%lx.\n", hr_qp->qpn);
- return -EINVAL;
+ "failed to find QP(0x%lx) RQ WQE buf, ret = %d.\n",
+ hr_qp->qpn, ret);
+ return ret;
}
+ wqe_sge_ba = hns_roce_get_mtr_ba(&hr_qp->mtr);
+
context->wqe_sge_ba = cpu_to_le32(wqe_sge_ba >> 3);
qpc_mask->wqe_sge_ba = 0;
@@ -4419,23 +4412,23 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev,
struct ib_device *ibdev = &hr_dev->ib_dev;
u64 sge_cur_blk = 0;
u64 sq_cur_blk = 0;
- int count;
+ int ret;
/* search qp buf's mtts */
- count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, &sq_cur_blk, 1, NULL);
- if (count < 1) {
- ibdev_err(ibdev, "failed to find QP(0x%lx) SQ buf.\n",
- hr_qp->qpn);
- return -EINVAL;
+ ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->sq.offset,
+ &sq_cur_blk, 1);
+ if (ret) {
+ ibdev_err(ibdev, "failed to find QP(0x%lx) SQ WQE buf, ret = %d.\n",
+ hr_qp->qpn, ret);
+ return ret;
}
if (hr_qp->sge.sge_cnt > 0) {
- count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
- hr_qp->sge.offset,
- &sge_cur_blk, 1, NULL);
- if (count < 1) {
- ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf.\n",
- hr_qp->qpn);
- return -EINVAL;
+ ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
+ hr_qp->sge.offset, &sge_cur_blk, 1);
+ if (ret) {
+ ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf, ret = %d.\n",
+ hr_qp->qpn, ret);
+ return ret;
}
}
@@ -4745,14 +4738,8 @@ enum {
static int check_cong_type(struct ib_qp *ibqp,
struct hns_roce_congestion_algorithm *cong_alg)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- if (ibqp->qp_type == IB_QPT_UD || ibqp->qp_type == IB_QPT_GSI)
- hr_qp->cong_type = CONG_TYPE_DCQCN;
- else
- hr_qp->cong_type = hr_dev->caps.cong_type;
-
/* different congestion types match different configurations */
switch (hr_qp->cong_type) {
case CONG_TYPE_DCQCN:
@@ -4780,9 +4767,6 @@ static int check_cong_type(struct ib_qp *ibqp,
cong_alg->wnd_mode_sel = WND_LIMIT;
break;
default:
- ibdev_warn(&hr_dev->ib_dev,
- "invalid type(%u) for congestion selection.\n",
- hr_qp->cong_type);
hr_qp->cong_type = CONG_TYPE_DCQCN;
cong_alg->alg_sel = CONG_DCQCN;
cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
@@ -5333,6 +5317,30 @@ out:
return ret;
}
+static int hns_roce_v2_query_sccc(struct hns_roce_dev *hr_dev, u32 qpn,
+ void *buffer)
+{
+ struct hns_roce_v2_scc_context *context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_SCCC,
+ qpn);
+ if (ret)
+ goto out;
+
+ context = mailbox->buf;
+ memcpy(buffer, context, sizeof(*context));
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
static u8 get_qp_timeout_attr(struct hns_roce_dev *hr_dev,
struct hns_roce_v2_qp_context *context)
{
@@ -5586,18 +5594,20 @@ static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq,
struct ib_device *ibdev = srq->ibsrq.device;
struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
u64 mtts_idx[MTT_MIN_COUNT] = {};
- dma_addr_t dma_handle_idx = 0;
+ dma_addr_t dma_handle_idx;
int ret;
/* Get physical address of idx que buf */
ret = hns_roce_mtr_find(hr_dev, &idx_que->mtr, 0, mtts_idx,
- ARRAY_SIZE(mtts_idx), &dma_handle_idx);
- if (ret < 1) {
+ ARRAY_SIZE(mtts_idx));
+ if (ret) {
ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n",
ret);
- return -ENOBUFS;
+ return ret;
}
+ dma_handle_idx = hns_roce_get_mtr_ba(&idx_que->mtr);
+
hr_reg_write(ctx, SRQC_IDX_HOP_NUM,
to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, srq->wqe_cnt));
@@ -5629,20 +5639,22 @@ static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf)
struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
struct hns_roce_srq_context *ctx = mb_buf;
u64 mtts_wqe[MTT_MIN_COUNT] = {};
- dma_addr_t dma_handle_wqe = 0;
+ dma_addr_t dma_handle_wqe;
int ret;
memset(ctx, 0, sizeof(*ctx));
/* Get the physical address of srq buf */
ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe,
- ARRAY_SIZE(mtts_wqe), &dma_handle_wqe);
- if (ret < 1) {
+ ARRAY_SIZE(mtts_wqe));
+ if (ret) {
ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n",
ret);
- return -ENOBUFS;
+ return ret;
}
+ dma_handle_wqe = hns_roce_get_mtr_ba(&srq->buf_mtr);
+
hr_reg_write(ctx, SRQC_SRQ_ST, 1);
hr_reg_write_bool(ctx, SRQC_SRQ_TYPE,
srq->ibsrq.srq_type == IB_SRQT_XRC);
@@ -6358,7 +6370,7 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
u64 eqe_ba[MTT_MIN_COUNT] = { 0 };
struct hns_roce_eq_context *eqc;
u64 bt_ba = 0;
- int count;
+ int ret;
eqc = mb_buf;
memset(eqc, 0, sizeof(struct hns_roce_eq_context));
@@ -6366,13 +6378,15 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
init_eq_config(hr_dev, eq);
/* if not multi-hop, eqe buffer only use one trunk */
- count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, eqe_ba, MTT_MIN_COUNT,
- &bt_ba);
- if (count < 1) {
- dev_err(hr_dev->dev, "failed to find EQE mtr\n");
- return -ENOBUFS;
+ ret = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, eqe_ba,
+ ARRAY_SIZE(eqe_ba));
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to find EQE mtr, ret = %d\n", ret);
+ return ret;
}
+ bt_ba = hns_roce_get_mtr_ba(&eq->mtr);
+
hr_reg_write(eqc, EQC_EQ_ST, HNS_ROCE_V2_EQ_STATE_VALID);
hr_reg_write(eqc, EQC_EQE_HOP_NUM, eq->hop_num);
hr_reg_write(eqc, EQC_OVER_IGNORE, eq->over_ignore);
@@ -6719,6 +6733,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.query_qpc = hns_roce_v2_query_qpc,
.query_mpt = hns_roce_v2_query_mpt,
.query_srqc = hns_roce_v2_query_srqc,
+ .query_sccc = hns_roce_v2_query_sccc,
.query_hw_counter = hns_roce_hw_v2_query_counter,
.hns_roce_dev_ops = &hns_roce_v2_dev_ops,
.hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index cd97cbee6..df04bc8ed 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -646,6 +646,12 @@ struct hns_roce_v2_qp_context {
#define QPCEX_SQ_RQ_NOT_FORBID_EN QPCEX_FIELD_LOC(23, 23)
#define QPCEX_STASH QPCEX_FIELD_LOC(82, 82)
+#define SCC_CONTEXT_SIZE 16
+
+struct hns_roce_v2_scc_context {
+ __le32 data[SCC_CONTEXT_SIZE];
+};
+
#define V2_QP_RWE_S 1 /* rdma write enable */
#define V2_QP_RRE_S 2 /* rdma read enable */
#define V2_QP_ATE_S 3 /* rdma atomic enable */
@@ -1214,12 +1220,13 @@ struct hns_roce_query_pf_caps_d {
#define PF_CAPS_D_RQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(21, 20)
#define PF_CAPS_D_EX_SGE_HOP_NUM PF_CAPS_D_FIELD_LOC(23, 22)
#define PF_CAPS_D_SQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(25, 24)
-#define PF_CAPS_D_CONG_TYPE PF_CAPS_D_FIELD_LOC(29, 26)
+#define PF_CAPS_D_CONG_CAP PF_CAPS_D_FIELD_LOC(29, 26)
#define PF_CAPS_D_CEQ_DEPTH PF_CAPS_D_FIELD_LOC(85, 64)
#define PF_CAPS_D_NUM_CEQS PF_CAPS_D_FIELD_LOC(95, 86)
#define PF_CAPS_D_AEQ_DEPTH PF_CAPS_D_FIELD_LOC(117, 96)
#define PF_CAPS_D_AEQ_ARM_ST PF_CAPS_D_FIELD_LOC(119, 118)
#define PF_CAPS_D_CEQ_ARM_ST PF_CAPS_D_FIELD_LOC(121, 120)
+#define PF_CAPS_D_DEFAULT_ALG PF_CAPS_D_FIELD_LOC(127, 122)
#define PF_CAPS_D_RSV_PDS PF_CAPS_D_FIELD_LOC(147, 128)
#define PF_CAPS_D_NUM_UARS PF_CAPS_D_FIELD_LOC(155, 148)
#define PF_CAPS_D_RSV_QPS PF_CAPS_D_FIELD_LOC(179, 160)
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index a33d3cedb..1dc60c2b2 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -37,7 +37,6 @@
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_cache.h>
-#include "hnae3.h"
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
@@ -395,6 +394,9 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
resp.config |= HNS_ROCE_RSP_CQE_INLINE_FLAGS;
}
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ resp.congest_type = hr_dev->caps.cong_cap;
+
ret = hns_roce_uar_alloc(hr_dev, &context->uar);
if (ret)
goto error_out;
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 0d42fd197..9e05b57a2 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -32,6 +32,7 @@
*/
#include <linux/vmalloc.h>
+#include <linux/count_zeros.h>
#include <rdma/ib_umem.h>
#include <linux/math.h>
#include "hns_roce_device.h"
@@ -103,14 +104,21 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
buf_attr.user_access = mr->access;
/* fast MR's buffer is alloced before mapping, not at creation */
buf_attr.mtt_only = is_fast;
+ buf_attr.iova = mr->iova;
+ /* pagesize and hopnum is fixed for fast MR */
+ buf_attr.adaptive = !is_fast;
+ buf_attr.type = MTR_PBL;
err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr,
hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT,
udata, start);
- if (err)
+ if (err) {
ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err);
- else
- mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count;
+ return err;
+ }
+
+ mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count;
+ mr->pbl_hop_num = buf_attr.region[0].hopnum;
return err;
}
@@ -433,18 +441,18 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_mr *mr = to_hr_mr(ibmr);
struct hns_roce_mtr *mtr = &mr->pbl_mtr;
- int ret, sg_num = 0;
+ int ret = 0;
mr->npages = 0;
mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count,
sizeof(dma_addr_t), GFP_KERNEL);
if (!mr->page_list)
- return sg_num;
+ return ret;
- sg_num = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
- if (sg_num < 1) {
+ ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
+ if (ret < 1) {
ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n",
- mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, sg_num);
+ mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret);
goto err_page_list;
}
@@ -455,16 +463,17 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
ret = hns_roce_mtr_map(hr_dev, mtr, mr->page_list, mr->npages);
if (ret) {
ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret);
- sg_num = 0;
+ ret = 0;
} else {
mr->pbl_mtr.hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size);
+ ret = mr->npages;
}
err_page_list:
kvfree(mr->page_list);
mr->page_list = NULL;
- return sg_num;
+ return ret;
}
static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
@@ -694,7 +703,7 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
mtr->umem = NULL;
mtr->kmem = hns_roce_buf_alloc(hr_dev, total_size,
buf_attr->page_shift,
- mtr->hem_cfg.is_direct ?
+ !mtr_has_mtt(buf_attr) ?
HNS_ROCE_BUF_DIRECT : 0);
if (IS_ERR(mtr->kmem)) {
ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n",
@@ -706,14 +715,41 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
return 0;
}
-static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- int page_count, unsigned int page_shift)
+static int cal_mtr_pg_cnt(struct hns_roce_mtr *mtr)
+{
+ struct hns_roce_buf_region *region;
+ int page_cnt = 0;
+ int i;
+
+ for (i = 0; i < mtr->hem_cfg.region_count; i++) {
+ region = &mtr->hem_cfg.region[i];
+ page_cnt += region->count;
+ }
+
+ return page_cnt;
+}
+
+static bool need_split_huge_page(struct hns_roce_mtr *mtr)
+{
+ /* When HEM buffer uses 0-level addressing, the page size is
+ * equal to the whole buffer size. If the current MTR has multiple
+ * regions, we split the buffer into small pages(4k, required by hns
+ * ROCEE). These pages will be used in multiple regions.
+ */
+ return mtr->hem_cfg.is_direct && mtr->hem_cfg.region_count > 1;
+}
+
+static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
+ int page_count = cal_mtr_pg_cnt(mtr);
+ unsigned int page_shift;
dma_addr_t *pages;
int npage;
int ret;
+ page_shift = need_split_huge_page(mtr) ? HNS_HW_PAGE_SHIFT :
+ mtr->hem_cfg.buf_pg_shift;
/* alloc a tmp array to store buffer's dma address */
pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL);
if (!pages)
@@ -733,7 +769,7 @@ static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
goto err_alloc_list;
}
- if (mtr->hem_cfg.is_direct && npage > 1) {
+ if (need_split_huge_page(mtr) && npage > 1) {
ret = mtr_check_direct_pages(pages, npage, page_shift);
if (ret) {
ibdev_err(ibdev, "failed to check %s page: %d / %d.\n",
@@ -808,47 +844,53 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
return ret;
}
-int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- u32 offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
+static int hns_roce_get_direct_addr_mtt(struct hns_roce_hem_cfg *cfg,
+ u32 start_index, u64 *mtt_buf,
+ int mtt_cnt)
{
- struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
- int mtt_count, left;
- u32 start_index;
+ int mtt_count;
int total = 0;
- __le64 *mtts;
u32 npage;
u64 addr;
- if (!mtt_buf || mtt_max < 1)
- goto done;
-
- /* no mtt memory in direct mode, so just return the buffer address */
- if (cfg->is_direct) {
- start_index = offset >> HNS_HW_PAGE_SHIFT;
- for (mtt_count = 0; mtt_count < cfg->region_count &&
- total < mtt_max; mtt_count++) {
- npage = cfg->region[mtt_count].offset;
- if (npage < start_index)
- continue;
+ if (mtt_cnt > cfg->region_count)
+ return -EINVAL;
- addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT);
- mtt_buf[total] = addr;
+ for (mtt_count = 0; mtt_count < cfg->region_count && total < mtt_cnt;
+ mtt_count++) {
+ npage = cfg->region[mtt_count].offset;
+ if (npage < start_index)
+ continue;
- total++;
- }
+ addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT);
+ mtt_buf[total] = addr;
- goto done;
+ total++;
}
- start_index = offset >> cfg->buf_pg_shift;
- left = mtt_max;
+ if (!total)
+ return -ENOENT;
+
+ return 0;
+}
+
+static int hns_roce_get_mhop_mtt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr, u32 start_index,
+ u64 *mtt_buf, int mtt_cnt)
+{
+ int left = mtt_cnt;
+ int total = 0;
+ int mtt_count;
+ __le64 *mtts;
+ u32 npage;
+
while (left > 0) {
mtt_count = 0;
mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
start_index + total,
&mtt_count);
if (!mtts || !mtt_count)
- goto done;
+ break;
npage = min(mtt_count, left);
left -= npage;
@@ -856,69 +898,165 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]);
}
-done:
- if (base_addr)
- *base_addr = cfg->root_ba;
+ if (!total)
+ return -ENOENT;
+
+ return 0;
+}
+
+int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ u32 offset, u64 *mtt_buf, int mtt_max)
+{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+ u32 start_index;
+ int ret;
+
+ if (!mtt_buf || mtt_max < 1)
+ return -EINVAL;
+
+ /* no mtt memory in direct mode, so just return the buffer address */
+ if (cfg->is_direct) {
+ start_index = offset >> HNS_HW_PAGE_SHIFT;
+ ret = hns_roce_get_direct_addr_mtt(cfg, start_index,
+ mtt_buf, mtt_max);
+ } else {
+ start_index = offset >> cfg->buf_pg_shift;
+ ret = hns_roce_get_mhop_mtt(hr_dev, mtr, start_index,
+ mtt_buf, mtt_max);
+ }
+ return ret;
+}
+
+static int get_best_page_shift(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr)
+{
+ unsigned int page_sz;
+
+ if (!buf_attr->adaptive || buf_attr->type != MTR_PBL || !mtr->umem)
+ return 0;
+
+ page_sz = ib_umem_find_best_pgsz(mtr->umem,
+ hr_dev->caps.page_size_cap,
+ buf_attr->iova);
+ if (!page_sz)
+ return -EINVAL;
+
+ buf_attr->page_shift = order_base_2(page_sz);
+ return 0;
+}
+
+static int get_best_hop_num(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr,
+ unsigned int ba_pg_shift)
+{
+#define INVALID_HOPNUM -1
+#define MIN_BA_CNT 1
+ size_t buf_pg_sz = 1 << buf_attr->page_shift;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ size_t ba_pg_sz = 1 << ba_pg_shift;
+ int hop_num = INVALID_HOPNUM;
+ size_t unit = MIN_BA_CNT;
+ size_t ba_cnt;
+ int j;
+
+ if (!buf_attr->adaptive || buf_attr->type != MTR_PBL)
+ return 0;
+
+ /* Caculating the number of buf pages, each buf page need a BA */
+ if (mtr->umem)
+ ba_cnt = ib_umem_num_dma_blocks(mtr->umem, buf_pg_sz);
+ else
+ ba_cnt = DIV_ROUND_UP(buf_attr->region[0].size, buf_pg_sz);
+
+ for (j = 0; j <= HNS_ROCE_MAX_HOP_NUM; j++) {
+ if (ba_cnt <= unit) {
+ hop_num = j;
+ break;
+ }
+ /* Number of BAs can be represented at per hop */
+ unit *= ba_pg_sz / BA_BYTE_LEN;
+ }
+
+ if (hop_num < 0) {
+ ibdev_err(ibdev,
+ "failed to calculate a valid hopnum.\n");
+ return -EINVAL;
+ }
+
+ buf_attr->region[0].hopnum = hop_num;
+
+ return 0;
+}
+
+static bool is_buf_attr_valid(struct hns_roce_dev *hr_dev,
+ struct hns_roce_buf_attr *attr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+
+ if (attr->region_count > ARRAY_SIZE(attr->region) ||
+ attr->region_count < 1 || attr->page_shift < HNS_HW_PAGE_SHIFT) {
+ ibdev_err(ibdev,
+ "invalid buf attr, region count %d, page shift %u.\n",
+ attr->region_count, attr->page_shift);
+ return false;
+ }
- return total;
+ return true;
}
static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
- struct hns_roce_buf_attr *attr,
- struct hns_roce_hem_cfg *cfg,
- unsigned int *buf_page_shift, u64 unalinged_size)
+ struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *attr)
{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
struct hns_roce_buf_region *r;
- u64 first_region_padding;
- int page_cnt, region_cnt;
- unsigned int page_shift;
+ size_t buf_pg_sz;
size_t buf_size;
+ int page_cnt, i;
+ u64 pgoff = 0;
+
+ if (!is_buf_attr_valid(hr_dev, attr))
+ return -EINVAL;
/* If mtt is disabled, all pages must be within a continuous range */
cfg->is_direct = !mtr_has_mtt(attr);
+ cfg->region_count = attr->region_count;
buf_size = mtr_bufs_size(attr);
- if (cfg->is_direct) {
- /* When HEM buffer uses 0-level addressing, the page size is
- * equal to the whole buffer size, and we split the buffer into
- * small pages which is used to check whether the adjacent
- * units are in the continuous space and its size is fixed to
- * 4K based on hns ROCEE's requirement.
- */
- page_shift = HNS_HW_PAGE_SHIFT;
-
- /* The ROCEE requires the page size to be 4K * 2 ^ N. */
+ if (need_split_huge_page(mtr)) {
+ buf_pg_sz = HNS_HW_PAGE_SIZE;
cfg->buf_pg_count = 1;
+ /* The ROCEE requires the page size to be 4K * 2 ^ N. */
cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT +
order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE));
- first_region_padding = 0;
} else {
- page_shift = attr->page_shift;
- cfg->buf_pg_count = DIV_ROUND_UP(buf_size + unalinged_size,
- 1 << page_shift);
- cfg->buf_pg_shift = page_shift;
- first_region_padding = unalinged_size;
+ buf_pg_sz = 1 << attr->page_shift;
+ cfg->buf_pg_count = mtr->umem ?
+ ib_umem_num_dma_blocks(mtr->umem, buf_pg_sz) :
+ DIV_ROUND_UP(buf_size, buf_pg_sz);
+ cfg->buf_pg_shift = attr->page_shift;
+ pgoff = mtr->umem ? mtr->umem->address & ~PAGE_MASK : 0;
}
/* Convert buffer size to page index and page count for each region and
* the buffer's offset needs to be appended to the first region.
*/
- for (page_cnt = 0, region_cnt = 0; region_cnt < attr->region_count &&
- region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) {
- r = &cfg->region[region_cnt];
+ for (page_cnt = 0, i = 0; i < attr->region_count; i++) {
+ r = &cfg->region[i];
r->offset = page_cnt;
- buf_size = hr_hw_page_align(attr->region[region_cnt].size +
- first_region_padding);
- r->count = DIV_ROUND_UP(buf_size, 1 << page_shift);
- first_region_padding = 0;
+ buf_size = hr_hw_page_align(attr->region[i].size + pgoff);
+ if (attr->type == MTR_PBL && mtr->umem)
+ r->count = ib_umem_num_dma_blocks(mtr->umem, buf_pg_sz);
+ else
+ r->count = DIV_ROUND_UP(buf_size, buf_pg_sz);
+
+ pgoff = 0;
page_cnt += r->count;
- r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum,
- r->count);
+ r->hopnum = to_hr_hem_hopnum(attr->region[i].hopnum, r->count);
}
- cfg->region_count = region_cnt;
- *buf_page_shift = page_shift;
-
- return page_cnt;
+ return 0;
}
static u64 cal_pages_per_l1ba(unsigned int ba_per_bt, unsigned int hopnum)
@@ -1006,50 +1144,58 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
unsigned long user_addr)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
- unsigned int buf_page_shift = 0;
- int buf_page_cnt;
int ret;
- buf_page_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, &mtr->hem_cfg,
- &buf_page_shift,
- udata ? user_addr & ~PAGE_MASK : 0);
- if (buf_page_cnt < 1 || buf_page_shift < HNS_HW_PAGE_SHIFT) {
- ibdev_err(ibdev, "failed to init mtr cfg, count %d shift %u.\n",
- buf_page_cnt, buf_page_shift);
- return -EINVAL;
- }
-
- ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift);
- if (ret) {
- ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret);
- return ret;
- }
-
/* The caller has its own buffer list and invokes the hns_roce_mtr_map()
* to finish the MTT configuration.
*/
if (buf_attr->mtt_only) {
mtr->umem = NULL;
mtr->kmem = NULL;
- return 0;
+ } else {
+ ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to alloc mtr bufs, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = get_best_page_shift(hr_dev, mtr, buf_attr);
+ if (ret)
+ goto err_init_buf;
+
+ ret = get_best_hop_num(hr_dev, mtr, buf_attr, ba_page_shift);
+ if (ret)
+ goto err_init_buf;
}
- ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr);
+ ret = mtr_init_buf_cfg(hr_dev, mtr, buf_attr);
+ if (ret)
+ goto err_init_buf;
+
+ ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift);
if (ret) {
- ibdev_err(ibdev, "failed to alloc mtr bufs, ret = %d.\n", ret);
- goto err_alloc_mtt;
+ ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret);
+ goto err_init_buf;
}
+ if (buf_attr->mtt_only)
+ return 0;
+
/* Write buffer's dma address to MTT */
- ret = mtr_map_bufs(hr_dev, mtr, buf_page_cnt, buf_page_shift);
- if (ret)
+ ret = mtr_map_bufs(hr_dev, mtr);
+ if (ret) {
ibdev_err(ibdev, "failed to map mtr bufs, ret = %d.\n", ret);
- else
- return 0;
+ goto err_alloc_mtt;
+ }
+
+ return 0;
- mtr_free_bufs(hr_dev, mtr);
err_alloc_mtt:
mtr_free_mtt(hr_dev, mtr);
+err_init_buf:
+ mtr_free_bufs(hr_dev, mtr);
+
return ret;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 31b147210..f35a66325 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -1004,6 +1004,60 @@ static void free_kernel_wrid(struct hns_roce_qp *hr_qp)
kfree(hr_qp->sq.wrid);
}
+static void default_congest_type(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ if (hr_qp->ibqp.qp_type == IB_QPT_UD ||
+ hr_qp->ibqp.qp_type == IB_QPT_GSI)
+ hr_qp->cong_type = CONG_TYPE_DCQCN;
+ else
+ hr_qp->cong_type = hr_dev->caps.default_cong_type;
+}
+
+static int set_congest_type(struct hns_roce_qp *hr_qp,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
+
+ switch (ucmd->cong_type_flags) {
+ case HNS_ROCE_CREATE_QP_FLAGS_DCQCN:
+ hr_qp->cong_type = CONG_TYPE_DCQCN;
+ break;
+ case HNS_ROCE_CREATE_QP_FLAGS_LDCP:
+ hr_qp->cong_type = CONG_TYPE_LDCP;
+ break;
+ case HNS_ROCE_CREATE_QP_FLAGS_HC3:
+ hr_qp->cong_type = CONG_TYPE_HC3;
+ break;
+ case HNS_ROCE_CREATE_QP_FLAGS_DIP:
+ hr_qp->cong_type = CONG_TYPE_DIP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!test_bit(hr_qp->cong_type, (unsigned long *)&hr_dev->caps.cong_cap))
+ return -EOPNOTSUPP;
+
+ if (hr_qp->ibqp.qp_type == IB_QPT_UD &&
+ hr_qp->cong_type != CONG_TYPE_DCQCN)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int set_congest_param(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ if (ucmd->comp_mask & HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE)
+ return set_congest_type(hr_qp, ucmd);
+
+ default_congest_type(hr_dev, hr_qp);
+
+ return 0;
+}
+
static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata,
@@ -1043,6 +1097,10 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
ibdev_err(ibdev,
"failed to set user SQ size, ret = %d.\n",
ret);
+
+ ret = set_congest_param(hr_dev, hr_qp, ucmd);
+ if (ret)
+ return ret;
} else {
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
hr_qp->config = HNS_ROCE_EXSGE_FLAGS;
@@ -1051,6 +1109,8 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
ibdev_err(ibdev,
"failed to set kernel SQ size, ret = %d.\n",
ret);
+
+ default_congest_type(hr_dev, hr_qp);
}
return ret;
diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c
index f7f3c4cc7..356d98816 100644
--- a/drivers/infiniband/hw/hns/hns_roce_restrack.c
+++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c
@@ -97,16 +97,33 @@ int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_qp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ib_qp);
- struct hns_roce_v2_qp_context context;
+ struct hns_roce_full_qp_ctx {
+ struct hns_roce_v2_qp_context qpc;
+ struct hns_roce_v2_scc_context sccc;
+ } context = {};
int ret;
if (!hr_dev->hw->query_qpc)
return -EINVAL;
- ret = hr_dev->hw->query_qpc(hr_dev, hr_qp->qpn, &context);
+ ret = hr_dev->hw->query_qpc(hr_dev, hr_qp->qpn, &context.qpc);
if (ret)
- return -EINVAL;
+ return ret;
+
+ /* If SCC is disabled or the query fails, the queried SCCC will
+ * be all 0.
+ */
+ if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) ||
+ !hr_dev->hw->query_sccc)
+ goto out;
+
+ ret = hr_dev->hw->query_sccc(hr_dev, hr_qp->qpn, &context.sccc);
+ if (ret)
+ ibdev_warn_ratelimited(&hr_dev->ib_dev,
+ "failed to query SCCC, ret = %d.\n",
+ ret);
+out:
ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context);
return ret;
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 8f48c6723..4abae9477 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -123,7 +123,7 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
return ret;
}
- ret = xa_err(xa_store_irq(&srq_table->xa, srq->srqn, srq, GFP_KERNEL));
+ ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL));
if (ret) {
ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret);
goto err_put;
@@ -136,7 +136,7 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
return 0;
err_xa:
- xa_erase_irq(&srq_table->xa, srq->srqn);
+ xa_erase(&srq_table->xa, srq->srqn);
err_put:
hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
@@ -154,7 +154,7 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n",
ret, srq->srqn);
- xa_erase_irq(&srq_table->xa, srq->srqn);
+ xa_erase(&srq_table->xa, srq->srqn);
if (refcount_dec_and_test(&srq->refcount))
complete(&srq->free);
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
index 89fcc09de..4a71e678d 100644
--- a/drivers/infiniband/hw/mana/cq.c
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -39,13 +39,37 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
}
cq->cqe = attr->cqe;
- err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE, &cq->queue);
- if (err) {
- ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
+ cq->umem = ib_umem_get(ibdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(cq->umem)) {
+ err = PTR_ERR(cq->umem);
+ ibdev_dbg(ibdev, "Failed to get umem for create cq, err %d\n",
+ err);
return err;
}
+ err = mana_ib_create_zero_offset_dma_region(mdev, cq->umem, &cq->gdma_region);
+ if (err) {
+ ibdev_dbg(ibdev,
+ "Failed to create dma region for create cq, %d\n",
+ err);
+ goto err_release_umem;
+ }
+
+ ibdev_dbg(ibdev,
+ "create_dma_region ret %d gdma_region 0x%llx\n",
+ err, cq->gdma_region);
+
+ /*
+ * The CQ ID is not known at this time. The ID is generated at create_qp
+ */
+ cq->id = INVALID_QUEUE_ID;
+
return 0;
+
+err_release_umem:
+ ib_umem_release(cq->umem);
+ return err;
}
int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
@@ -54,16 +78,24 @@ int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
struct ib_device *ibdev = ibcq->device;
struct mana_ib_dev *mdev;
struct gdma_context *gc;
+ int err;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
gc = mdev_to_gc(mdev);
- if (cq->queue.id != INVALID_QUEUE_ID) {
- kfree(gc->cq_table[cq->queue.id]);
- gc->cq_table[cq->queue.id] = NULL;
+ err = mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
+ if (err) {
+ ibdev_dbg(ibdev,
+ "Failed to destroy dma region, %d\n", err);
+ return err;
}
- mana_ib_destroy_queue(mdev, &cq->queue);
+ if (cq->id != INVALID_QUEUE_ID) {
+ kfree(gc->cq_table[cq->id]);
+ gc->cq_table[cq->id] = NULL;
+ }
+
+ ib_umem_release(cq->umem);
return 0;
}
@@ -81,10 +113,8 @@ int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
struct gdma_context *gc = mdev_to_gc(mdev);
struct gdma_queue *gdma_cq;
- if (cq->queue.id >= gc->max_num_cqs)
- return -EINVAL;
/* Create CQ table entry */
- WARN_ON(gc->cq_table[cq->queue.id]);
+ WARN_ON(gc->cq_table[cq->id]);
gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
if (!gdma_cq)
return -ENOMEM;
@@ -92,7 +122,7 @@ int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
gdma_cq->cq.context = cq;
gdma_cq->type = GDMA_CQ;
gdma_cq->cq.callback = mana_ib_cq_handler;
- gdma_cq->id = cq->queue.id;
- gc->cq_table[cq->queue.id] = gdma_cq;
+ gdma_cq->id = cq->id;
+ gc->cq_table[cq->id] = gdma_cq;
return 0;
}
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index 4524c6b80..71e33feee 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -237,49 +237,6 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
}
-int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
- struct mana_ib_queue *queue)
-{
- struct ib_umem *umem;
- int err;
-
- queue->umem = NULL;
- queue->id = INVALID_QUEUE_ID;
- queue->gdma_region = GDMA_INVALID_DMA_REGION;
-
- umem = ib_umem_get(&mdev->ib_dev, addr, size, IB_ACCESS_LOCAL_WRITE);
- if (IS_ERR(umem)) {
- err = PTR_ERR(umem);
- ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %d\n", err);
- return err;
- }
-
- err = mana_ib_create_zero_offset_dma_region(mdev, umem, &queue->gdma_region);
- if (err) {
- ibdev_dbg(&mdev->ib_dev, "Failed to create dma region, %d\n", err);
- goto free_umem;
- }
- queue->umem = umem;
-
- ibdev_dbg(&mdev->ib_dev,
- "create_dma_region ret %d gdma_region 0x%llx\n",
- err, queue->gdma_region);
-
- return 0;
-free_umem:
- ib_umem_release(umem);
- return err;
-}
-
-void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue)
-{
- /* Ignore return code as there is not much we can do about it.
- * The error message is printed inside.
- */
- mana_ib_gd_destroy_dma_region(mdev, queue->gdma_region);
- ib_umem_release(queue->umem);
-}
-
static int
mana_ib_gd_first_dma_region(struct mana_ib_dev *dev,
struct gdma_context *gc,
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index 6acb5c281..f83390eeb 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -45,12 +45,6 @@ struct mana_ib_adapter_caps {
u32 max_inline_data_size;
};
-struct mana_ib_queue {
- struct ib_umem *umem;
- u64 gdma_region;
- u64 id;
-};
-
struct mana_ib_dev {
struct ib_device ib_dev;
struct gdma_dev *gdma_dev;
@@ -88,8 +82,10 @@ struct mana_ib_mr {
struct mana_ib_cq {
struct ib_cq ibcq;
- struct mana_ib_queue queue;
+ struct ib_umem *umem;
int cqe;
+ u64 gdma_region;
+ u64 id;
u32 comp_vector;
};
@@ -173,10 +169,6 @@ int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
mana_handle_t gdma_region);
-int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
- struct mana_ib_queue *queue);
-void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue);
-
struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index d7485ee6a..6e7627745 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -197,7 +197,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
wq_spec.gdma_region = wq->gdma_region;
wq_spec.queue_size = wq->wq_buf_size;
- cq_spec.gdma_region = cq->queue.gdma_region;
+ cq_spec.gdma_region = cq->gdma_region;
cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE;
cq_spec.modr_ctx_id = 0;
eq = &mpc->ac->eqs[cq->comp_vector % gc->max_num_queues];
@@ -213,16 +213,16 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
/* The GDMA regions are now owned by the WQ object */
wq->gdma_region = GDMA_INVALID_DMA_REGION;
- cq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
+ cq->gdma_region = GDMA_INVALID_DMA_REGION;
wq->id = wq_spec.queue_index;
- cq->queue.id = cq_spec.queue_index;
+ cq->id = cq_spec.queue_index;
ibdev_dbg(&mdev->ib_dev,
"ret %d rx_object 0x%llx wq id %llu cq id %llu\n",
- ret, wq->rx_object, wq->id, cq->queue.id);
+ ret, wq->rx_object, wq->id, cq->id);
- resp.entries[i].cqid = cq->queue.id;
+ resp.entries[i].cqid = cq->id;
resp.entries[i].wqid = wq->id;
mana_ind_table[i] = wq->rx_object;
@@ -232,7 +232,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
if (ret)
goto fail;
- gdma_cq_allocated[i] = gc->cq_table[cq->queue.id];
+ gdma_cq_allocated[i] = gc->cq_table[cq->id];
}
resp.num_entries = i;
@@ -264,7 +264,7 @@ fail:
wq = container_of(ibwq, struct mana_ib_wq, ibwq);
cq = container_of(ibcq, struct mana_ib_cq, ibcq);
- gc->cq_table[cq->queue.id] = NULL;
+ gc->cq_table[cq->id] = NULL;
kfree(gdma_cq_allocated[i]);
mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
@@ -374,7 +374,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
wq_spec.gdma_region = qp->sq_gdma_region;
wq_spec.queue_size = ucmd.sq_buf_size;
- cq_spec.gdma_region = send_cq->queue.gdma_region;
+ cq_spec.gdma_region = send_cq->gdma_region;
cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE;
cq_spec.modr_ctx_id = 0;
eq_vec = send_cq->comp_vector % gc->max_num_queues;
@@ -392,10 +392,10 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
/* The GDMA regions are now owned by the WQ object */
qp->sq_gdma_region = GDMA_INVALID_DMA_REGION;
- send_cq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
+ send_cq->gdma_region = GDMA_INVALID_DMA_REGION;
qp->sq_id = wq_spec.queue_index;
- send_cq->queue.id = cq_spec.queue_index;
+ send_cq->id = cq_spec.queue_index;
/* Create CQ table entry */
err = mana_ib_install_cq_cb(mdev, send_cq);
@@ -404,10 +404,10 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
ibdev_dbg(&mdev->ib_dev,
"ret %d qp->tx_object 0x%llx sq id %llu cq id %llu\n", err,
- qp->tx_object, qp->sq_id, send_cq->queue.id);
+ qp->tx_object, qp->sq_id, send_cq->id);
resp.sqid = qp->sq_id;
- resp.cqid = send_cq->queue.id;
+ resp.cqid = send_cq->id;
resp.tx_vp_offset = pd->tx_vp_offset;
err = ib_copy_to_udata(udata, &resp, sizeof(resp));
@@ -422,7 +422,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
err_release_gdma_cq:
kfree(gdma_cq);
- gc->cq_table[send_cq->queue.id] = NULL;
+ gc->cq_table[send_cq->id] = NULL;
err_destroy_wq_obj:
mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 5a22be14d..96ffbbaf0 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/io.h>
#include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h"
#include <linux/jiffies.h>
@@ -109,6 +108,7 @@ static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id,
__be32 mmio_wqe[16] = {};
unsigned long flags;
unsigned int idx;
+ int i;
if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
return -EIO;
@@ -148,8 +148,10 @@ static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id,
* we hit doorbell
*/
wmb();
- __iowrite64_copy(bf->bfreg->map + bf->offset, mmio_wqe,
- sizeof(mmio_wqe) / 8);
+ for (i = 0; i < 8; i++)
+ mlx5_write64(&mmio_wqe[i * 2],
+ bf->bfreg->map + bf->offset + i * 8);
+ io_stop_wc();
bf->offset ^= bf->buf_size;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 79ebafecc..a8de35c07 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -643,10 +643,9 @@ struct mlx5_ib_mkey {
unsigned int ndescs;
struct wait_queue_head wait;
refcount_t usecount;
- /* Cacheable user Mkey must hold either a rb_key or a cache_ent. */
+ /* User Mkey must hold either a rb_key or a cache_ent. */
struct mlx5r_cache_rb_key rb_key;
struct mlx5_cache_ent *cache_ent;
- u8 cacheable : 1;
};
#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
@@ -1378,7 +1377,6 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props);
void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
u64 access_flags);
-void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev);
void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index ecc111ed5..a8ee2ca1f 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1158,7 +1158,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
if (IS_ERR(mr))
return mr;
mr->mmkey.rb_key = rb_key;
- mr->mmkey.cacheable = true;
return mr;
}
@@ -1169,7 +1168,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
mr->ibmr.pd = pd;
mr->umem = umem;
mr->page_shift = order_base_2(page_size);
- mr->mmkey.cacheable = true;
set_mr_fields(dev, mr, umem->length, access_flags, iova);
return mr;
@@ -1572,8 +1570,7 @@ static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev,
unsigned int diffs = current_access_flags ^ target_access_flags;
if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING |
- IB_ACCESS_REMOTE_ATOMIC))
+ IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING))
return false;
return mlx5r_umr_can_reconfig(dev, current_access_flags,
target_access_flags);
@@ -1838,23 +1835,6 @@ end:
return ret;
}
-static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
-{
- struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
- struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
-
- if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr))
- return 0;
-
- if (ent) {
- spin_lock_irq(&ent->mkeys_queue.lock);
- ent->in_use--;
- mr->mmkey.cache_ent = NULL;
- spin_unlock_irq(&ent->mkeys_queue.lock);
- }
- return destroy_mkey(dev, mr);
-}
-
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct mlx5_ib_mr *mr = to_mmr(ibmr);
@@ -1900,9 +1880,16 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
}
/* Stop DMA */
- rc = mlx5_revoke_mr(mr);
- if (rc)
- return rc;
+ if (mr->umem && mlx5r_umr_can_load_pas(dev, mr->umem->length))
+ if (mlx5r_umr_revoke_mr(mr) ||
+ cache_ent_find_and_store(dev, mr))
+ mr->mmkey.cache_ent = NULL;
+
+ if (!mr->mmkey.cache_ent) {
+ rc = destroy_mkey(to_mdev(mr->ibmr.device), mr);
+ if (rc)
+ return rc;
+ }
if (mr->umem) {
bool is_odp = is_odp_mr(mr);