summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:11:22 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:11:22 +0000
commitb20732900e4636a467c0183a47f7396700f5f743 (patch)
tree42f079ff82e701ebcb76829974b4caca3e5b6798 /drivers/infiniband/hw
parentAdding upstream version 6.8.12. (diff)
downloadlinux-b20732900e4636a467c0183a47f7396700f5f743.tar.xz
linux-b20732900e4636a467c0183a47f7396700f5f743.zip
Adding upstream version 6.9.7.upstream/6.9.7
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h4
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h2
-rw-r--r--drivers/infiniband/hw/efa/efa.h1
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c32
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c25
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.h3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c11
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h23
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c97
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h56
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c150
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h11
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c10
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c339
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c60
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_restrack.c23
-rw-r--r--drivers/infiniband/hw/mana/mr.c1
-rw-r--r--drivers/infiniband/hw/mlx5/main.c4
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h1
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c8
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c13
21 files changed, 521 insertions, 353 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 9dca451ed5..6974922e56 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -107,8 +107,6 @@ struct bnxt_re_gsi_context {
struct bnxt_re_sqp_entries *sqp_tbl;
};
-#define BNXT_RE_MIN_MSIX 2
-#define BNXT_RE_MAX_MSIX 9
#define BNXT_RE_AEQ_IDX 0
#define BNXT_RE_NQ_IDX 1
#define BNXT_RE_GEN_P5_MAX_VF 64
@@ -168,7 +166,7 @@ struct bnxt_re_dev {
struct bnxt_qplib_rcfw rcfw;
/* NQ */
- struct bnxt_qplib_nq nq[BNXT_RE_MAX_MSIX];
+ struct bnxt_qplib_nq nq[BNXT_MAX_ROCE_MSIX];
/* Device Resources */
struct bnxt_qplib_dev_attr dev_attr;
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 50cb2259bf..fb8a0c2488 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -930,8 +930,6 @@ void c4iw_id_table_free(struct c4iw_id_table *alloc);
typedef int (*c4iw_handler_func)(struct c4iw_dev *dev, struct sk_buff *skb);
-int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
- struct l2t_entry *l2t);
void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid,
struct c4iw_dev_ucontext *uctx);
u32 c4iw_get_resource(struct c4iw_id_table *id_table);
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index e2bdec32ae..926f9ff1f6 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -57,6 +57,7 @@ struct efa_dev {
u64 db_bar_addr;
u64 db_bar_len;
+ unsigned int num_irq_vectors;
int admin_msix_vector_idx;
struct efa_irq admin_irq;
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index 7b1910a862..5fa3603c80 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -322,7 +322,9 @@ static int efa_create_eqs(struct efa_dev *dev)
int err;
int i;
- neqs = min_t(unsigned int, neqs, num_online_cpus());
+ neqs = min_t(unsigned int, neqs,
+ dev->num_irq_vectors - EFA_COMP_EQS_VEC_BASE);
+
dev->neqs = neqs;
dev->eqs = kcalloc(neqs, sizeof(*dev->eqs), GFP_KERNEL);
if (!dev->eqs)
@@ -468,34 +470,30 @@ static void efa_disable_msix(struct efa_dev *dev)
static int efa_enable_msix(struct efa_dev *dev)
{
- int msix_vecs, irq_num;
+ int max_vecs, num_vecs;
/*
* Reserve the max msix vectors we might need, one vector is reserved
* for admin.
*/
- msix_vecs = min_t(int, pci_msix_vec_count(dev->pdev),
- num_online_cpus() + 1);
+ max_vecs = min_t(int, pci_msix_vec_count(dev->pdev),
+ num_online_cpus() + 1);
dev_dbg(&dev->pdev->dev, "Trying to enable MSI-X, vectors %d\n",
- msix_vecs);
+ max_vecs);
dev->admin_msix_vector_idx = EFA_MGMNT_MSIX_VEC_IDX;
- irq_num = pci_alloc_irq_vectors(dev->pdev, msix_vecs,
- msix_vecs, PCI_IRQ_MSIX);
+ num_vecs = pci_alloc_irq_vectors(dev->pdev, 1,
+ max_vecs, PCI_IRQ_MSIX);
- if (irq_num < 0) {
- dev_err(&dev->pdev->dev, "Failed to enable MSI-X. irq_num %d\n",
- irq_num);
+ if (num_vecs < 0) {
+ dev_err(&dev->pdev->dev, "Failed to enable MSI-X. error %d\n",
+ num_vecs);
return -ENOSPC;
}
- if (irq_num != msix_vecs) {
- efa_disable_msix(dev);
- dev_err(&dev->pdev->dev,
- "Allocated %d MSI-X (out of %d requested)\n",
- irq_num, msix_vecs);
- return -ENOSPC;
- }
+ dev_dbg(&dev->pdev->dev, "Allocated %d MSI-X vectors\n", num_vecs);
+
+ dev->num_irq_vectors = num_vecs;
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 18b05ffb41..c465966a1d 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -315,7 +315,7 @@ int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit)
* This routine returns the receive context associated
* with a a qp's qpn.
*
- * Returns the context.
+ * Return: the context.
*/
static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
struct rvt_qp *qp)
@@ -710,7 +710,7 @@ void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp)
* The exp_lock must be held.
*
* Return:
- * On success: a value postive value between 0 and RXE_NUM_TID_FLOWS - 1
+ * On success: a value positive value between 0 and RXE_NUM_TID_FLOWS - 1
* On failure: -EAGAIN
*/
static int kern_reserve_flow(struct hfi1_ctxtdata *rcd, int last)
@@ -1007,7 +1007,7 @@ static u32 tid_flush_pages(struct tid_rdma_pageset *list,
* pages are tested two at a time, i, i + 1 for contiguous
* pages and i - 1 and i contiguous pages.
*
- * If any condition is false, any accumlated pages are flushed and
+ * If any condition is false, any accumulated pages are flushed and
* v0,v1 are emitted as separate PAGE_SIZE pagesets
*
* Otherwise, the current 8k is totaled for a future flush.
@@ -1434,7 +1434,7 @@ static void kern_program_rcvarray(struct tid_rdma_flow *flow)
* (5) computes a tidarray with formatted TID entries which can be sent
* to the sender
* (6) Reserves and programs HW flows.
- * (7) It also manages queing the QP when TID/flow resources are not
+ * (7) It also manages queueing the QP when TID/flow resources are not
* available.
*
* @req points to struct tid_rdma_request of which the segments are a part. The
@@ -1604,7 +1604,7 @@ void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req)
}
/**
- * hfi1_kern_exp_rcv_free_flows - free priviously allocated flow information
+ * hfi1_kern_exp_rcv_free_flows - free previously allocated flow information
* @req: the tid rdma request to be cleaned
*/
static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req)
@@ -2055,7 +2055,7 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet,
* req->clear_tail is advanced). However, when an earlier
* request is received, this request will not be complete any
* more (qp->s_tail_ack_queue is moved back, see below).
- * Consequently, we need to update the TID flow info everytime
+ * Consequently, we need to update the TID flow info every time
* a duplicate request is received.
*/
bth0 = be32_to_cpu(ohdr->bth[0]);
@@ -2219,7 +2219,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet)
/*
* 1. Verify TID RDMA READ REQ as per IB_OPCODE_RC_RDMA_READ
* (see hfi1_rc_rcv())
- * 2. Put TID RDMA READ REQ into the response queueu (s_ack_queue)
+ * 2. Put TID RDMA READ REQ into the response queue (s_ack_queue)
* - Setup struct tid_rdma_req with request info
* - Initialize struct tid_rdma_flow info;
* - Copy TID entries;
@@ -2439,7 +2439,7 @@ find_tid_request(struct rvt_qp *qp, u32 psn, enum ib_wr_opcode opcode)
void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet)
{
- /* HANDLER FOR TID RDMA READ RESPONSE packet (Requestor side */
+ /* HANDLER FOR TID RDMA READ RESPONSE packet (Requester side) */
/*
* 1. Find matching SWQE
@@ -3649,7 +3649,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet)
* 1. Verify TID RDMA WRITE REQ as per IB_OPCODE_RC_RDMA_WRITE_FIRST
* (see hfi1_rc_rcv())
* - Don't allow 0-length requests.
- * 2. Put TID RDMA WRITE REQ into the response queueu (s_ack_queue)
+ * 2. Put TID RDMA WRITE REQ into the response queue (s_ack_queue)
* - Setup struct tid_rdma_req with request info
* - Prepare struct tid_rdma_flow array?
* 3. Set the qp->s_ack_state as state diagram in design doc.
@@ -4026,7 +4026,7 @@ unlock_r_lock:
void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet)
{
- /* HANDLER FOR TID RDMA WRITE RESPONSE packet (Requestor side */
+ /* HANDLER FOR TID RDMA WRITE RESPONSE packet (Requester side) */
/*
* 1. Find matching SWQE
@@ -5440,8 +5440,9 @@ static bool _hfi1_schedule_tid_send(struct rvt_qp *qp)
* the two state machines can step on each other with respect to the
* RVT_S_BUSY flag.
* Therefore, a modified test is used.
- * @return true if the second leg is scheduled;
- * false if the second leg is not scheduled.
+ *
+ * Return: %true if the second leg is scheduled;
+ * %false if the second leg is not scheduled.
*/
bool hfi1_schedule_tid_send(struct rvt_qp *qp)
{
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index 052a3d6090..11dbbabebd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -108,6 +108,9 @@ enum {
HNS_ROCE_CMD_QUERY_CEQC = 0x92,
HNS_ROCE_CMD_DESTROY_CEQC = 0x93,
+ /* SCC CTX commands */
+ HNS_ROCE_CMD_QUERY_SCCC = 0xa2,
+
/* SCC CTX BT commands */
HNS_ROCE_CMD_READ_SCCC_BT0 = 0xa4,
HNS_ROCE_CMD_WRITE_SCCC_BT0 = 0xa5,
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 2517c972c6..68e22f368d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -133,14 +133,12 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
struct ib_device *ibdev = &hr_dev->ib_dev;
u64 mtts[MTT_MIN_COUNT] = {};
- dma_addr_t dma_handle;
int ret;
- ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts),
- &dma_handle);
- if (!ret) {
+ ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts));
+ if (ret) {
ibdev_err(ibdev, "failed to find CQ mtr, ret = %d.\n", ret);
- return -EINVAL;
+ return ret;
}
/* Get CQC memory HEM(Hardware Entry Memory) table */
@@ -157,7 +155,8 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
goto err_put;
}
- ret = hns_roce_create_cqc(hr_dev, hr_cq, mtts, dma_handle);
+ ret = hns_roce_create_cqc(hr_dev, hr_cq, mtts,
+ hns_roce_get_mtr_ba(&hr_cq->mtr));
if (ret)
goto err_xa;
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 46f8a63109..0b47c6d688 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -100,6 +100,9 @@
#define CQ_BANKID_SHIFT 2
#define CQ_BANKID_MASK GENMASK(1, 0)
+#define HNS_ROCE_MAX_CQ_COUNT 0xFFFF
+#define HNS_ROCE_MAX_CQ_PERIOD 0xFFFF
+
enum {
SERV_TYPE_RC,
SERV_TYPE_UC,
@@ -179,6 +182,7 @@ enum {
#define HNS_ROCE_CMD_SUCCESS 1
+#define HNS_ROCE_MAX_HOP_NUM 3
/* The minimum page size is 4K for hardware */
#define HNS_HW_PAGE_SHIFT 12
#define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT)
@@ -269,6 +273,11 @@ struct hns_roce_hem_list {
dma_addr_t root_ba; /* pointer to the root ba table */
};
+enum mtr_type {
+ MTR_DEFAULT = 0,
+ MTR_PBL,
+};
+
struct hns_roce_buf_attr {
struct {
size_t size; /* region size */
@@ -277,7 +286,10 @@ struct hns_roce_buf_attr {
unsigned int region_count; /* valid region count */
unsigned int page_shift; /* buffer page shift */
unsigned int user_access; /* umem access flag */
+ u64 iova;
+ enum mtr_type type;
bool mtt_only; /* only alloc buffer-required MTT memory */
+ bool adaptive; /* adaptive for page_shift and hopnum */
};
struct hns_roce_hem_cfg {
@@ -836,7 +848,8 @@ struct hns_roce_caps {
u16 default_aeq_period;
u16 default_aeq_arm_st;
u16 default_ceq_arm_st;
- enum hns_roce_cong_type cong_type;
+ u8 cong_cap;
+ enum hns_roce_cong_type default_cong_type;
};
enum hns_roce_device_state {
@@ -937,6 +950,7 @@ struct hns_roce_hw {
int (*query_qpc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer);
int (*query_mpt)(struct hns_roce_dev *hr_dev, u32 key, void *buffer);
int (*query_srqc)(struct hns_roce_dev *hr_dev, u32 srqn, void *buffer);
+ int (*query_sccc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer);
int (*query_hw_counter)(struct hns_roce_dev *hr_dev,
u64 *stats, u32 port, int *hw_counters);
const struct ib_device_ops *hns_roce_dev_ops;
@@ -1153,8 +1167,13 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev);
/* hns roce hw need current block and next block addr from mtt */
#define MTT_MIN_COUNT 2
+static inline dma_addr_t hns_roce_get_mtr_ba(struct hns_roce_mtr *mtr)
+{
+ return mtr->hem_cfg.root_ba;
+}
+
int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- u32 offset, u64 *mtt_buf, int mtt_max, u64 *base_addr);
+ u32 offset, u64 *mtt_buf, int mtt_max);
int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
struct hns_roce_buf_attr *buf_attr,
unsigned int page_shift, struct ib_udata *udata,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index c4ac06a338..658c522be7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -249,85 +249,48 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
}
static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev,
- int npages,
unsigned long hem_alloc_size,
gfp_t gfp_mask)
{
- struct hns_roce_hem_chunk *chunk = NULL;
struct hns_roce_hem *hem;
- struct scatterlist *mem;
int order;
void *buf;
WARN_ON(gfp_mask & __GFP_HIGHMEM);
+ order = get_order(hem_alloc_size);
+ if (PAGE_SIZE << order != hem_alloc_size) {
+ dev_err(hr_dev->dev, "invalid hem_alloc_size: %lu!\n",
+ hem_alloc_size);
+ return NULL;
+ }
+
hem = kmalloc(sizeof(*hem),
gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
if (!hem)
return NULL;
- INIT_LIST_HEAD(&hem->chunk_list);
-
- order = get_order(hem_alloc_size);
+ buf = dma_alloc_coherent(hr_dev->dev, hem_alloc_size,
+ &hem->dma, gfp_mask);
+ if (!buf)
+ goto fail;
- while (npages > 0) {
- if (!chunk) {
- chunk = kmalloc(sizeof(*chunk),
- gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
- if (!chunk)
- goto fail;
-
- sg_init_table(chunk->mem, HNS_ROCE_HEM_CHUNK_LEN);
- chunk->npages = 0;
- chunk->nsg = 0;
- memset(chunk->buf, 0, sizeof(chunk->buf));
- list_add_tail(&chunk->list, &hem->chunk_list);
- }
-
- while (1 << order > npages)
- --order;
-
- /*
- * Alloc memory one time. If failed, don't alloc small block
- * memory, directly return fail.
- */
- mem = &chunk->mem[chunk->npages];
- buf = dma_alloc_coherent(hr_dev->dev, PAGE_SIZE << order,
- &sg_dma_address(mem), gfp_mask);
- if (!buf)
- goto fail;
-
- chunk->buf[chunk->npages] = buf;
- sg_dma_len(mem) = PAGE_SIZE << order;
-
- ++chunk->npages;
- ++chunk->nsg;
- npages -= 1 << order;
- }
+ hem->buf = buf;
+ hem->size = hem_alloc_size;
return hem;
fail:
- hns_roce_free_hem(hr_dev, hem);
+ kfree(hem);
return NULL;
}
void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem)
{
- struct hns_roce_hem_chunk *chunk, *tmp;
- int i;
-
if (!hem)
return;
- list_for_each_entry_safe(chunk, tmp, &hem->chunk_list, list) {
- for (i = 0; i < chunk->npages; ++i)
- dma_free_coherent(hr_dev->dev,
- sg_dma_len(&chunk->mem[i]),
- chunk->buf[i],
- sg_dma_address(&chunk->mem[i]));
- kfree(chunk);
- }
+ dma_free_coherent(hr_dev->dev, hem->size, hem->buf, hem->dma);
kfree(hem);
}
@@ -415,7 +378,6 @@ static int alloc_mhop_hem(struct hns_roce_dev *hr_dev,
{
u32 bt_size = mhop->bt_chunk_size;
struct device *dev = hr_dev->dev;
- struct hns_roce_hem_iter iter;
gfp_t flag;
u64 bt_ba;
u32 size;
@@ -456,16 +418,15 @@ static int alloc_mhop_hem(struct hns_roce_dev *hr_dev,
*/
size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size : bt_size;
flag = GFP_KERNEL | __GFP_NOWARN;
- table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size >> PAGE_SHIFT,
- size, flag);
+ table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size, flag);
if (!table->hem[index->buf]) {
ret = -ENOMEM;
goto err_alloc_hem;
}
index->inited |= HEM_INDEX_BUF;
- hns_roce_hem_first(table->hem[index->buf], &iter);
- bt_ba = hns_roce_hem_addr(&iter);
+ bt_ba = table->hem[index->buf]->dma;
+
if (table->type < HEM_TYPE_MTT) {
if (mhop->hop_num == 2)
*(table->bt_l1[index->l1] + mhop->l2_idx) = bt_ba;
@@ -586,7 +547,6 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev,
}
table->hem[i] = hns_roce_alloc_hem(hr_dev,
- table->table_chunk_size >> PAGE_SHIFT,
table->table_chunk_size,
GFP_KERNEL | __GFP_NOWARN);
if (!table->hem[i]) {
@@ -725,7 +685,6 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table,
unsigned long obj, dma_addr_t *dma_handle)
{
- struct hns_roce_hem_chunk *chunk;
struct hns_roce_hem_mhop mhop;
struct hns_roce_hem *hem;
unsigned long mhop_obj = obj;
@@ -734,7 +693,6 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
int offset, dma_offset;
void *addr = NULL;
u32 hem_idx = 0;
- int length;
int i, j;
mutex_lock(&table->mutex);
@@ -767,23 +725,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
if (!hem)
goto out;
- list_for_each_entry(chunk, &hem->chunk_list, list) {
- for (i = 0; i < chunk->npages; ++i) {
- length = sg_dma_len(&chunk->mem[i]);
- if (dma_handle && dma_offset >= 0) {
- if (length > (u32)dma_offset)
- *dma_handle = sg_dma_address(
- &chunk->mem[i]) + dma_offset;
- dma_offset -= length;
- }
-
- if (length > (u32)offset) {
- addr = chunk->buf[i] + offset;
- goto out;
- }
- offset -= length;
- }
- }
+ *dma_handle = hem->dma + dma_offset;
+ addr = hem->buf + offset;
out:
mutex_unlock(&table->mutex);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index fea6d7d508..9c415b2541 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -56,10 +56,6 @@ enum {
HEM_TYPE_TRRL,
};
-#define HNS_ROCE_HEM_CHUNK_LEN \
- ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \
- (sizeof(struct scatterlist) + sizeof(void *)))
-
#define check_whether_bt_num_3(type, hop_num) \
((type) < HEM_TYPE_MTT && (hop_num) == 2)
@@ -72,25 +68,13 @@ enum {
((type) >= HEM_TYPE_MTT && (hop_num) == 1) || \
((type) >= HEM_TYPE_MTT && (hop_num) == HNS_ROCE_HOP_NUM_0))
-struct hns_roce_hem_chunk {
- struct list_head list;
- int npages;
- int nsg;
- struct scatterlist mem[HNS_ROCE_HEM_CHUNK_LEN];
- void *buf[HNS_ROCE_HEM_CHUNK_LEN];
-};
-
struct hns_roce_hem {
- struct list_head chunk_list;
+ void *buf;
+ dma_addr_t dma;
+ unsigned long size;
refcount_t refcount;
};
-struct hns_roce_hem_iter {
- struct hns_roce_hem *hem;
- struct hns_roce_hem_chunk *chunk;
- int page_idx;
-};
-
struct hns_roce_hem_mhop {
u32 hop_num;
u32 buf_chunk_size;
@@ -133,38 +117,4 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_list *hem_list,
int offset, int *mtt_cnt);
-static inline void hns_roce_hem_first(struct hns_roce_hem *hem,
- struct hns_roce_hem_iter *iter)
-{
- iter->hem = hem;
- iter->chunk = list_empty(&hem->chunk_list) ? NULL :
- list_entry(hem->chunk_list.next,
- struct hns_roce_hem_chunk, list);
- iter->page_idx = 0;
-}
-
-static inline int hns_roce_hem_last(struct hns_roce_hem_iter *iter)
-{
- return !iter->chunk;
-}
-
-static inline void hns_roce_hem_next(struct hns_roce_hem_iter *iter)
-{
- if (++iter->page_idx >= iter->chunk->nsg) {
- if (iter->chunk->list.next == &iter->hem->chunk_list) {
- iter->chunk = NULL;
- return;
- }
-
- iter->chunk = list_entry(iter->chunk->list.next,
- struct hns_roce_hem_chunk, list);
- iter->page_idx = 0;
- }
-}
-
-static inline dma_addr_t hns_roce_hem_addr(struct hns_roce_hem_iter *iter)
-{
- return sg_dma_address(&iter->chunk->mem[iter->page_idx]);
-}
-
#endif /* _HNS_ROCE_HEM_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index f95ec4618f..8800464c9a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -2209,11 +2209,12 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev)
caps->max_wqes = 1 << le16_to_cpu(resp_c->sq_depth);
caps->num_srqs = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_SRQS);
- caps->cong_type = hr_reg_read(resp_d, PF_CAPS_D_CONG_TYPE);
+ caps->cong_cap = hr_reg_read(resp_d, PF_CAPS_D_CONG_CAP);
caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth);
caps->ceqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_CEQ_DEPTH);
caps->num_comp_vectors = hr_reg_read(resp_d, PF_CAPS_D_NUM_CEQS);
caps->aeqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_AEQ_DEPTH);
+ caps->default_cong_type = hr_reg_read(resp_d, PF_CAPS_D_DEFAULT_ALG);
caps->reserved_pds = hr_reg_read(resp_d, PF_CAPS_D_RSV_PDS);
caps->num_uars = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_UARS);
caps->reserved_qps = hr_reg_read(resp_d, PF_CAPS_D_RSV_QPS);
@@ -3195,21 +3196,22 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev,
u64 pages[HNS_ROCE_V2_MAX_INNER_MTPT_NUM] = { 0 };
struct ib_device *ibdev = &hr_dev->ib_dev;
dma_addr_t pbl_ba;
- int i, count;
+ int ret;
+ int i;
- count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages,
- min_t(int, ARRAY_SIZE(pages), mr->npages),
- &pbl_ba);
- if (count < 1) {
- ibdev_err(ibdev, "failed to find PBL mtr, count = %d.\n",
- count);
- return -ENOBUFS;
+ ret = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages,
+ min_t(int, ARRAY_SIZE(pages), mr->npages));
+ if (ret) {
+ ibdev_err(ibdev, "failed to find PBL mtr, ret = %d.\n", ret);
+ return ret;
}
/* Aligned to the hardware address access unit */
- for (i = 0; i < count; i++)
+ for (i = 0; i < ARRAY_SIZE(pages); i++)
pages[i] >>= 6;
+ pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr);
+
mpt_entry->pbl_size = cpu_to_le32(mr->npages);
mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3);
hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3));
@@ -3308,18 +3310,12 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev,
void *mb_buf, struct hns_roce_mr *mr)
{
- struct ib_device *ibdev = &hr_dev->ib_dev;
+ dma_addr_t pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr);
struct hns_roce_v2_mpt_entry *mpt_entry;
- dma_addr_t pbl_ba = 0;
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
- if (hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, NULL, 0, &pbl_ba) < 0) {
- ibdev_err(ibdev, "failed to find frmr mtr.\n");
- return -ENOBUFS;
- }
-
hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE);
hr_reg_write(mpt_entry, MPT_PD, mr->pd);
@@ -4064,7 +4060,6 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, int obj,
u32 step_idx)
{
- struct hns_roce_hem_iter iter;
struct hns_roce_hem_mhop mhop;
struct hns_roce_hem *hem;
unsigned long mhop_obj = obj;
@@ -4101,12 +4096,8 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
if (check_whether_last_step(hop_num, step_idx)) {
hem = table->hem[hem_idx];
- for (hns_roce_hem_first(hem, &iter);
- !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) {
- bt_ba = hns_roce_hem_addr(&iter);
- ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type,
- step_idx);
- }
+
+ ret = set_hem_to_hw(hr_dev, obj, hem->dma, table->type, step_idx);
} else {
if (step_idx == 0)
bt_ba = table->bt_l0_dma_addr[i];
@@ -4347,17 +4338,20 @@ static int config_qp_rq_buf(struct hns_roce_dev *hr_dev,
{
u64 mtts[MTT_MIN_COUNT] = { 0 };
u64 wqe_sge_ba;
- int count;
+ int ret;
/* Search qp buf's mtts */
- count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, mtts,
- MTT_MIN_COUNT, &wqe_sge_ba);
- if (hr_qp->rq.wqe_cnt && count < 1) {
+ ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, mtts,
+ MTT_MIN_COUNT);
+ if (hr_qp->rq.wqe_cnt && ret) {
ibdev_err(&hr_dev->ib_dev,
- "failed to find RQ WQE, QPN = 0x%lx.\n", hr_qp->qpn);
- return -EINVAL;
+ "failed to find QP(0x%lx) RQ WQE buf, ret = %d.\n",
+ hr_qp->qpn, ret);
+ return ret;
}
+ wqe_sge_ba = hns_roce_get_mtr_ba(&hr_qp->mtr);
+
context->wqe_sge_ba = cpu_to_le32(wqe_sge_ba >> 3);
qpc_mask->wqe_sge_ba = 0;
@@ -4419,23 +4413,23 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev,
struct ib_device *ibdev = &hr_dev->ib_dev;
u64 sge_cur_blk = 0;
u64 sq_cur_blk = 0;
- int count;
+ int ret;
/* search qp buf's mtts */
- count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, &sq_cur_blk, 1, NULL);
- if (count < 1) {
- ibdev_err(ibdev, "failed to find QP(0x%lx) SQ buf.\n",
- hr_qp->qpn);
- return -EINVAL;
+ ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->sq.offset,
+ &sq_cur_blk, 1);
+ if (ret) {
+ ibdev_err(ibdev, "failed to find QP(0x%lx) SQ WQE buf, ret = %d.\n",
+ hr_qp->qpn, ret);
+ return ret;
}
if (hr_qp->sge.sge_cnt > 0) {
- count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
- hr_qp->sge.offset,
- &sge_cur_blk, 1, NULL);
- if (count < 1) {
- ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf.\n",
- hr_qp->qpn);
- return -EINVAL;
+ ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
+ hr_qp->sge.offset, &sge_cur_blk, 1);
+ if (ret) {
+ ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf, ret = %d.\n",
+ hr_qp->qpn, ret);
+ return ret;
}
}
@@ -4745,14 +4739,8 @@ enum {
static int check_cong_type(struct ib_qp *ibqp,
struct hns_roce_congestion_algorithm *cong_alg)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- if (ibqp->qp_type == IB_QPT_UD || ibqp->qp_type == IB_QPT_GSI)
- hr_qp->cong_type = CONG_TYPE_DCQCN;
- else
- hr_qp->cong_type = hr_dev->caps.cong_type;
-
/* different congestion types match different configurations */
switch (hr_qp->cong_type) {
case CONG_TYPE_DCQCN:
@@ -4780,9 +4768,6 @@ static int check_cong_type(struct ib_qp *ibqp,
cong_alg->wnd_mode_sel = WND_LIMIT;
break;
default:
- ibdev_warn(&hr_dev->ib_dev,
- "invalid type(%u) for congestion selection.\n",
- hr_qp->cong_type);
hr_qp->cong_type = CONG_TYPE_DCQCN;
cong_alg->alg_sel = CONG_DCQCN;
cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
@@ -5333,6 +5318,30 @@ out:
return ret;
}
+static int hns_roce_v2_query_sccc(struct hns_roce_dev *hr_dev, u32 qpn,
+ void *buffer)
+{
+ struct hns_roce_v2_scc_context *context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_SCCC,
+ qpn);
+ if (ret)
+ goto out;
+
+ context = mailbox->buf;
+ memcpy(buffer, context, sizeof(*context));
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
static u8 get_qp_timeout_attr(struct hns_roce_dev *hr_dev,
struct hns_roce_v2_qp_context *context)
{
@@ -5586,18 +5595,20 @@ static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq,
struct ib_device *ibdev = srq->ibsrq.device;
struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
u64 mtts_idx[MTT_MIN_COUNT] = {};
- dma_addr_t dma_handle_idx = 0;
+ dma_addr_t dma_handle_idx;
int ret;
/* Get physical address of idx que buf */
ret = hns_roce_mtr_find(hr_dev, &idx_que->mtr, 0, mtts_idx,
- ARRAY_SIZE(mtts_idx), &dma_handle_idx);
- if (ret < 1) {
+ ARRAY_SIZE(mtts_idx));
+ if (ret) {
ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n",
ret);
- return -ENOBUFS;
+ return ret;
}
+ dma_handle_idx = hns_roce_get_mtr_ba(&idx_que->mtr);
+
hr_reg_write(ctx, SRQC_IDX_HOP_NUM,
to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, srq->wqe_cnt));
@@ -5629,20 +5640,22 @@ static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf)
struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
struct hns_roce_srq_context *ctx = mb_buf;
u64 mtts_wqe[MTT_MIN_COUNT] = {};
- dma_addr_t dma_handle_wqe = 0;
+ dma_addr_t dma_handle_wqe;
int ret;
memset(ctx, 0, sizeof(*ctx));
/* Get the physical address of srq buf */
ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe,
- ARRAY_SIZE(mtts_wqe), &dma_handle_wqe);
- if (ret < 1) {
+ ARRAY_SIZE(mtts_wqe));
+ if (ret) {
ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n",
ret);
- return -ENOBUFS;
+ return ret;
}
+ dma_handle_wqe = hns_roce_get_mtr_ba(&srq->buf_mtr);
+
hr_reg_write(ctx, SRQC_SRQ_ST, 1);
hr_reg_write_bool(ctx, SRQC_SRQ_TYPE,
srq->ibsrq.srq_type == IB_SRQT_XRC);
@@ -5790,7 +5803,7 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
dev_info(hr_dev->dev,
"cq_period(%u) reached the upper limit, adjusted to 65.\n",
cq_period);
- cq_period = HNS_ROCE_MAX_CQ_PERIOD;
+ cq_period = HNS_ROCE_MAX_CQ_PERIOD_HIP08;
}
cq_period *= HNS_ROCE_CLOCK_ADJUST;
}
@@ -6358,7 +6371,7 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
u64 eqe_ba[MTT_MIN_COUNT] = { 0 };
struct hns_roce_eq_context *eqc;
u64 bt_ba = 0;
- int count;
+ int ret;
eqc = mb_buf;
memset(eqc, 0, sizeof(struct hns_roce_eq_context));
@@ -6366,13 +6379,15 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
init_eq_config(hr_dev, eq);
/* if not multi-hop, eqe buffer only use one trunk */
- count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, eqe_ba, MTT_MIN_COUNT,
- &bt_ba);
- if (count < 1) {
- dev_err(hr_dev->dev, "failed to find EQE mtr\n");
- return -ENOBUFS;
+ ret = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, eqe_ba,
+ ARRAY_SIZE(eqe_ba));
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to find EQE mtr, ret = %d\n", ret);
+ return ret;
}
+ bt_ba = hns_roce_get_mtr_ba(&eq->mtr);
+
hr_reg_write(eqc, EQC_EQ_ST, HNS_ROCE_V2_EQ_STATE_VALID);
hr_reg_write(eqc, EQC_EQE_HOP_NUM, eq->hop_num);
hr_reg_write(eqc, EQC_OVER_IGNORE, eq->over_ignore);
@@ -6719,6 +6734,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.query_qpc = hns_roce_v2_query_qpc,
.query_mpt = hns_roce_v2_query_mpt,
.query_srqc = hns_roce_v2_query_srqc,
+ .query_sccc = hns_roce_v2_query_sccc,
.query_hw_counter = hns_roce_hw_v2_query_counter,
.hns_roce_dev_ops = &hns_roce_v2_dev_ops,
.hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index cd97cbee68..dfed6b4ddb 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -646,6 +646,12 @@ struct hns_roce_v2_qp_context {
#define QPCEX_SQ_RQ_NOT_FORBID_EN QPCEX_FIELD_LOC(23, 23)
#define QPCEX_STASH QPCEX_FIELD_LOC(82, 82)
+#define SCC_CONTEXT_SIZE 16
+
+struct hns_roce_v2_scc_context {
+ __le32 data[SCC_CONTEXT_SIZE];
+};
+
#define V2_QP_RWE_S 1 /* rdma write enable */
#define V2_QP_RRE_S 2 /* rdma read enable */
#define V2_QP_ATE_S 3 /* rdma atomic enable */
@@ -1214,12 +1220,13 @@ struct hns_roce_query_pf_caps_d {
#define PF_CAPS_D_RQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(21, 20)
#define PF_CAPS_D_EX_SGE_HOP_NUM PF_CAPS_D_FIELD_LOC(23, 22)
#define PF_CAPS_D_SQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(25, 24)
-#define PF_CAPS_D_CONG_TYPE PF_CAPS_D_FIELD_LOC(29, 26)
+#define PF_CAPS_D_CONG_CAP PF_CAPS_D_FIELD_LOC(29, 26)
#define PF_CAPS_D_CEQ_DEPTH PF_CAPS_D_FIELD_LOC(85, 64)
#define PF_CAPS_D_NUM_CEQS PF_CAPS_D_FIELD_LOC(95, 86)
#define PF_CAPS_D_AEQ_DEPTH PF_CAPS_D_FIELD_LOC(117, 96)
#define PF_CAPS_D_AEQ_ARM_ST PF_CAPS_D_FIELD_LOC(119, 118)
#define PF_CAPS_D_CEQ_ARM_ST PF_CAPS_D_FIELD_LOC(121, 120)
+#define PF_CAPS_D_DEFAULT_ALG PF_CAPS_D_FIELD_LOC(127, 122)
#define PF_CAPS_D_RSV_PDS PF_CAPS_D_FIELD_LOC(147, 128)
#define PF_CAPS_D_NUM_UARS PF_CAPS_D_FIELD_LOC(155, 148)
#define PF_CAPS_D_RSV_QPS PF_CAPS_D_FIELD_LOC(179, 160)
@@ -1327,7 +1334,7 @@ struct fmea_ram_ecc {
/* only for RNR timeout issue of HIP08 */
#define HNS_ROCE_CLOCK_ADJUST 1000
-#define HNS_ROCE_MAX_CQ_PERIOD 65
+#define HNS_ROCE_MAX_CQ_PERIOD_HIP08 65
#define HNS_ROCE_MAX_EQ_PERIOD 65
#define HNS_ROCE_RNR_TIMER_10NS 1
#define HNS_ROCE_1US_CFG 999
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index a33d3cedbc..d202258368 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -41,6 +41,7 @@
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
+#include "hns_roce_hw_v2.h"
static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port,
const u8 *addr)
@@ -193,6 +194,12 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->max_pkeys = 1;
props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
+ props->max_ah = INT_MAX;
+ props->cq_caps.max_cq_moderation_period = HNS_ROCE_MAX_CQ_PERIOD;
+ props->cq_caps.max_cq_moderation_count = HNS_ROCE_MAX_CQ_COUNT;
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ props->cq_caps.max_cq_moderation_period = HNS_ROCE_MAX_CQ_PERIOD_HIP08;
+
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
props->max_srq = hr_dev->caps.num_srqs;
props->max_srq_wr = hr_dev->caps.max_srq_wrs;
@@ -395,6 +402,9 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
resp.config |= HNS_ROCE_RSP_CQE_INLINE_FLAGS;
}
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ resp.congest_type = hr_dev->caps.cong_cap;
+
ret = hns_roce_uar_alloc(hr_dev, &context->uar);
if (ret)
goto error_out;
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 0d42fd197c..80c050d7d0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -32,6 +32,7 @@
*/
#include <linux/vmalloc.h>
+#include <linux/count_zeros.h>
#include <rdma/ib_umem.h>
#include <linux/math.h>
#include "hns_roce_device.h"
@@ -103,14 +104,21 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
buf_attr.user_access = mr->access;
/* fast MR's buffer is alloced before mapping, not at creation */
buf_attr.mtt_only = is_fast;
+ buf_attr.iova = mr->iova;
+ /* pagesize and hopnum is fixed for fast MR */
+ buf_attr.adaptive = !is_fast;
+ buf_attr.type = MTR_PBL;
err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr,
hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT,
udata, start);
- if (err)
+ if (err) {
ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err);
- else
- mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count;
+ return err;
+ }
+
+ mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count;
+ mr->pbl_hop_num = buf_attr.region[0].hopnum;
return err;
}
@@ -694,7 +702,7 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
mtr->umem = NULL;
mtr->kmem = hns_roce_buf_alloc(hr_dev, total_size,
buf_attr->page_shift,
- mtr->hem_cfg.is_direct ?
+ !mtr_has_mtt(buf_attr) ?
HNS_ROCE_BUF_DIRECT : 0);
if (IS_ERR(mtr->kmem)) {
ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n",
@@ -706,14 +714,41 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
return 0;
}
-static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- int page_count, unsigned int page_shift)
+static int cal_mtr_pg_cnt(struct hns_roce_mtr *mtr)
+{
+ struct hns_roce_buf_region *region;
+ int page_cnt = 0;
+ int i;
+
+ for (i = 0; i < mtr->hem_cfg.region_count; i++) {
+ region = &mtr->hem_cfg.region[i];
+ page_cnt += region->count;
+ }
+
+ return page_cnt;
+}
+
+static bool need_split_huge_page(struct hns_roce_mtr *mtr)
+{
+ /* When HEM buffer uses 0-level addressing, the page size is
+ * equal to the whole buffer size. If the current MTR has multiple
+ * regions, we split the buffer into small pages(4k, required by hns
+ * ROCEE). These pages will be used in multiple regions.
+ */
+ return mtr->hem_cfg.is_direct && mtr->hem_cfg.region_count > 1;
+}
+
+static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
+ int page_count = cal_mtr_pg_cnt(mtr);
+ unsigned int page_shift;
dma_addr_t *pages;
int npage;
int ret;
+ page_shift = need_split_huge_page(mtr) ? HNS_HW_PAGE_SHIFT :
+ mtr->hem_cfg.buf_pg_shift;
/* alloc a tmp array to store buffer's dma address */
pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL);
if (!pages)
@@ -733,7 +768,7 @@ static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
goto err_alloc_list;
}
- if (mtr->hem_cfg.is_direct && npage > 1) {
+ if (need_split_huge_page(mtr) && npage > 1) {
ret = mtr_check_direct_pages(pages, npage, page_shift);
if (ret) {
ibdev_err(ibdev, "failed to check %s page: %d / %d.\n",
@@ -808,47 +843,53 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
return ret;
}
-int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- u32 offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
+static int hns_roce_get_direct_addr_mtt(struct hns_roce_hem_cfg *cfg,
+ u32 start_index, u64 *mtt_buf,
+ int mtt_cnt)
{
- struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
- int mtt_count, left;
- u32 start_index;
+ int mtt_count;
int total = 0;
- __le64 *mtts;
u32 npage;
u64 addr;
- if (!mtt_buf || mtt_max < 1)
- goto done;
-
- /* no mtt memory in direct mode, so just return the buffer address */
- if (cfg->is_direct) {
- start_index = offset >> HNS_HW_PAGE_SHIFT;
- for (mtt_count = 0; mtt_count < cfg->region_count &&
- total < mtt_max; mtt_count++) {
- npage = cfg->region[mtt_count].offset;
- if (npage < start_index)
- continue;
+ if (mtt_cnt > cfg->region_count)
+ return -EINVAL;
- addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT);
- mtt_buf[total] = addr;
+ for (mtt_count = 0; mtt_count < cfg->region_count && total < mtt_cnt;
+ mtt_count++) {
+ npage = cfg->region[mtt_count].offset;
+ if (npage < start_index)
+ continue;
- total++;
- }
+ addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT);
+ mtt_buf[total] = addr;
- goto done;
+ total++;
}
- start_index = offset >> cfg->buf_pg_shift;
- left = mtt_max;
+ if (!total)
+ return -ENOENT;
+
+ return 0;
+}
+
+static int hns_roce_get_mhop_mtt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr, u32 start_index,
+ u64 *mtt_buf, int mtt_cnt)
+{
+ int left = mtt_cnt;
+ int total = 0;
+ int mtt_count;
+ __le64 *mtts;
+ u32 npage;
+
while (left > 0) {
mtt_count = 0;
mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
start_index + total,
&mtt_count);
if (!mtts || !mtt_count)
- goto done;
+ break;
npage = min(mtt_count, left);
left -= npage;
@@ -856,69 +897,165 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]);
}
-done:
- if (base_addr)
- *base_addr = cfg->root_ba;
+ if (!total)
+ return -ENOENT;
+
+ return 0;
+}
+
+int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ u32 offset, u64 *mtt_buf, int mtt_max)
+{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+ u32 start_index;
+ int ret;
+
+ if (!mtt_buf || mtt_max < 1)
+ return -EINVAL;
+
+ /* no mtt memory in direct mode, so just return the buffer address */
+ if (cfg->is_direct) {
+ start_index = offset >> HNS_HW_PAGE_SHIFT;
+ ret = hns_roce_get_direct_addr_mtt(cfg, start_index,
+ mtt_buf, mtt_max);
+ } else {
+ start_index = offset >> cfg->buf_pg_shift;
+ ret = hns_roce_get_mhop_mtt(hr_dev, mtr, start_index,
+ mtt_buf, mtt_max);
+ }
+ return ret;
+}
+
+static int get_best_page_shift(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr)
+{
+ unsigned int page_sz;
+
+ if (!buf_attr->adaptive || buf_attr->type != MTR_PBL || !mtr->umem)
+ return 0;
+
+ page_sz = ib_umem_find_best_pgsz(mtr->umem,
+ hr_dev->caps.page_size_cap,
+ buf_attr->iova);
+ if (!page_sz)
+ return -EINVAL;
+
+ buf_attr->page_shift = order_base_2(page_sz);
+ return 0;
+}
+
+static int get_best_hop_num(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr,
+ unsigned int ba_pg_shift)
+{
+#define INVALID_HOPNUM -1
+#define MIN_BA_CNT 1
+ size_t buf_pg_sz = 1 << buf_attr->page_shift;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ size_t ba_pg_sz = 1 << ba_pg_shift;
+ int hop_num = INVALID_HOPNUM;
+ size_t unit = MIN_BA_CNT;
+ size_t ba_cnt;
+ int j;
+
+ if (!buf_attr->adaptive || buf_attr->type != MTR_PBL)
+ return 0;
+
+ /* Caculating the number of buf pages, each buf page need a BA */
+ if (mtr->umem)
+ ba_cnt = ib_umem_num_dma_blocks(mtr->umem, buf_pg_sz);
+ else
+ ba_cnt = DIV_ROUND_UP(buf_attr->region[0].size, buf_pg_sz);
+
+ for (j = 0; j <= HNS_ROCE_MAX_HOP_NUM; j++) {
+ if (ba_cnt <= unit) {
+ hop_num = j;
+ break;
+ }
+ /* Number of BAs can be represented at per hop */
+ unit *= ba_pg_sz / BA_BYTE_LEN;
+ }
+
+ if (hop_num < 0) {
+ ibdev_err(ibdev,
+ "failed to calculate a valid hopnum.\n");
+ return -EINVAL;
+ }
- return total;
+ buf_attr->region[0].hopnum = hop_num;
+
+ return 0;
+}
+
+static bool is_buf_attr_valid(struct hns_roce_dev *hr_dev,
+ struct hns_roce_buf_attr *attr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+
+ if (attr->region_count > ARRAY_SIZE(attr->region) ||
+ attr->region_count < 1 || attr->page_shift < HNS_HW_PAGE_SHIFT) {
+ ibdev_err(ibdev,
+ "invalid buf attr, region count %d, page shift %u.\n",
+ attr->region_count, attr->page_shift);
+ return false;
+ }
+
+ return true;
}
static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
- struct hns_roce_buf_attr *attr,
- struct hns_roce_hem_cfg *cfg,
- unsigned int *buf_page_shift, u64 unalinged_size)
+ struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *attr)
{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
struct hns_roce_buf_region *r;
- u64 first_region_padding;
- int page_cnt, region_cnt;
- unsigned int page_shift;
+ size_t buf_pg_sz;
size_t buf_size;
+ int page_cnt, i;
+ u64 pgoff = 0;
+
+ if (!is_buf_attr_valid(hr_dev, attr))
+ return -EINVAL;
/* If mtt is disabled, all pages must be within a continuous range */
cfg->is_direct = !mtr_has_mtt(attr);
+ cfg->region_count = attr->region_count;
buf_size = mtr_bufs_size(attr);
- if (cfg->is_direct) {
- /* When HEM buffer uses 0-level addressing, the page size is
- * equal to the whole buffer size, and we split the buffer into
- * small pages which is used to check whether the adjacent
- * units are in the continuous space and its size is fixed to
- * 4K based on hns ROCEE's requirement.
- */
- page_shift = HNS_HW_PAGE_SHIFT;
-
- /* The ROCEE requires the page size to be 4K * 2 ^ N. */
+ if (need_split_huge_page(mtr)) {
+ buf_pg_sz = HNS_HW_PAGE_SIZE;
cfg->buf_pg_count = 1;
+ /* The ROCEE requires the page size to be 4K * 2 ^ N. */
cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT +
order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE));
- first_region_padding = 0;
} else {
- page_shift = attr->page_shift;
- cfg->buf_pg_count = DIV_ROUND_UP(buf_size + unalinged_size,
- 1 << page_shift);
- cfg->buf_pg_shift = page_shift;
- first_region_padding = unalinged_size;
+ buf_pg_sz = 1 << attr->page_shift;
+ cfg->buf_pg_count = mtr->umem ?
+ ib_umem_num_dma_blocks(mtr->umem, buf_pg_sz) :
+ DIV_ROUND_UP(buf_size, buf_pg_sz);
+ cfg->buf_pg_shift = attr->page_shift;
+ pgoff = mtr->umem ? mtr->umem->address & ~PAGE_MASK : 0;
}
/* Convert buffer size to page index and page count for each region and
* the buffer's offset needs to be appended to the first region.
*/
- for (page_cnt = 0, region_cnt = 0; region_cnt < attr->region_count &&
- region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) {
- r = &cfg->region[region_cnt];
+ for (page_cnt = 0, i = 0; i < attr->region_count; i++) {
+ r = &cfg->region[i];
r->offset = page_cnt;
- buf_size = hr_hw_page_align(attr->region[region_cnt].size +
- first_region_padding);
- r->count = DIV_ROUND_UP(buf_size, 1 << page_shift);
- first_region_padding = 0;
+ buf_size = hr_hw_page_align(attr->region[i].size + pgoff);
+ if (attr->type == MTR_PBL && mtr->umem)
+ r->count = ib_umem_num_dma_blocks(mtr->umem, buf_pg_sz);
+ else
+ r->count = DIV_ROUND_UP(buf_size, buf_pg_sz);
+
+ pgoff = 0;
page_cnt += r->count;
- r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum,
- r->count);
+ r->hopnum = to_hr_hem_hopnum(attr->region[i].hopnum, r->count);
}
- cfg->region_count = region_cnt;
- *buf_page_shift = page_shift;
-
- return page_cnt;
+ return 0;
}
static u64 cal_pages_per_l1ba(unsigned int ba_per_bt, unsigned int hopnum)
@@ -1006,50 +1143,58 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
unsigned long user_addr)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
- unsigned int buf_page_shift = 0;
- int buf_page_cnt;
int ret;
- buf_page_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, &mtr->hem_cfg,
- &buf_page_shift,
- udata ? user_addr & ~PAGE_MASK : 0);
- if (buf_page_cnt < 1 || buf_page_shift < HNS_HW_PAGE_SHIFT) {
- ibdev_err(ibdev, "failed to init mtr cfg, count %d shift %u.\n",
- buf_page_cnt, buf_page_shift);
- return -EINVAL;
- }
-
- ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift);
- if (ret) {
- ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret);
- return ret;
- }
-
/* The caller has its own buffer list and invokes the hns_roce_mtr_map()
* to finish the MTT configuration.
*/
if (buf_attr->mtt_only) {
mtr->umem = NULL;
mtr->kmem = NULL;
- return 0;
+ } else {
+ ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to alloc mtr bufs, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = get_best_page_shift(hr_dev, mtr, buf_attr);
+ if (ret)
+ goto err_init_buf;
+
+ ret = get_best_hop_num(hr_dev, mtr, buf_attr, ba_page_shift);
+ if (ret)
+ goto err_init_buf;
}
- ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr);
+ ret = mtr_init_buf_cfg(hr_dev, mtr, buf_attr);
+ if (ret)
+ goto err_init_buf;
+
+ ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift);
if (ret) {
- ibdev_err(ibdev, "failed to alloc mtr bufs, ret = %d.\n", ret);
- goto err_alloc_mtt;
+ ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret);
+ goto err_init_buf;
}
+ if (buf_attr->mtt_only)
+ return 0;
+
/* Write buffer's dma address to MTT */
- ret = mtr_map_bufs(hr_dev, mtr, buf_page_cnt, buf_page_shift);
- if (ret)
+ ret = mtr_map_bufs(hr_dev, mtr);
+ if (ret) {
ibdev_err(ibdev, "failed to map mtr bufs, ret = %d.\n", ret);
- else
- return 0;
+ goto err_alloc_mtt;
+ }
+
+ return 0;
- mtr_free_bufs(hr_dev, mtr);
err_alloc_mtt:
mtr_free_mtt(hr_dev, mtr);
+err_init_buf:
+ mtr_free_bufs(hr_dev, mtr);
+
return ret;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 31b1472106..f35a66325d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -1004,6 +1004,60 @@ static void free_kernel_wrid(struct hns_roce_qp *hr_qp)
kfree(hr_qp->sq.wrid);
}
+static void default_congest_type(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ if (hr_qp->ibqp.qp_type == IB_QPT_UD ||
+ hr_qp->ibqp.qp_type == IB_QPT_GSI)
+ hr_qp->cong_type = CONG_TYPE_DCQCN;
+ else
+ hr_qp->cong_type = hr_dev->caps.default_cong_type;
+}
+
+static int set_congest_type(struct hns_roce_qp *hr_qp,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
+
+ switch (ucmd->cong_type_flags) {
+ case HNS_ROCE_CREATE_QP_FLAGS_DCQCN:
+ hr_qp->cong_type = CONG_TYPE_DCQCN;
+ break;
+ case HNS_ROCE_CREATE_QP_FLAGS_LDCP:
+ hr_qp->cong_type = CONG_TYPE_LDCP;
+ break;
+ case HNS_ROCE_CREATE_QP_FLAGS_HC3:
+ hr_qp->cong_type = CONG_TYPE_HC3;
+ break;
+ case HNS_ROCE_CREATE_QP_FLAGS_DIP:
+ hr_qp->cong_type = CONG_TYPE_DIP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!test_bit(hr_qp->cong_type, (unsigned long *)&hr_dev->caps.cong_cap))
+ return -EOPNOTSUPP;
+
+ if (hr_qp->ibqp.qp_type == IB_QPT_UD &&
+ hr_qp->cong_type != CONG_TYPE_DCQCN)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int set_congest_param(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ if (ucmd->comp_mask & HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE)
+ return set_congest_type(hr_qp, ucmd);
+
+ default_congest_type(hr_dev, hr_qp);
+
+ return 0;
+}
+
static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata,
@@ -1043,6 +1097,10 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
ibdev_err(ibdev,
"failed to set user SQ size, ret = %d.\n",
ret);
+
+ ret = set_congest_param(hr_dev, hr_qp, ucmd);
+ if (ret)
+ return ret;
} else {
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
hr_qp->config = HNS_ROCE_EXSGE_FLAGS;
@@ -1051,6 +1109,8 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
ibdev_err(ibdev,
"failed to set kernel SQ size, ret = %d.\n",
ret);
+
+ default_congest_type(hr_dev, hr_qp);
}
return ret;
diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c
index f7f3c4cc74..356d988169 100644
--- a/drivers/infiniband/hw/hns/hns_roce_restrack.c
+++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c
@@ -97,16 +97,33 @@ int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_qp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ib_qp);
- struct hns_roce_v2_qp_context context;
+ struct hns_roce_full_qp_ctx {
+ struct hns_roce_v2_qp_context qpc;
+ struct hns_roce_v2_scc_context sccc;
+ } context = {};
int ret;
if (!hr_dev->hw->query_qpc)
return -EINVAL;
- ret = hr_dev->hw->query_qpc(hr_dev, hr_qp->qpn, &context);
+ ret = hr_dev->hw->query_qpc(hr_dev, hr_qp->qpn, &context.qpc);
if (ret)
- return -EINVAL;
+ return ret;
+
+ /* If SCC is disabled or the query fails, the queried SCCC will
+ * be all 0.
+ */
+ if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) ||
+ !hr_dev->hw->query_sccc)
+ goto out;
+
+ ret = hr_dev->hw->query_sccc(hr_dev, hr_qp->qpn, &context.sccc);
+ if (ret)
+ ibdev_warn_ratelimited(&hr_dev->ib_dev,
+ "failed to query SCCC, ret = %d.\n",
+ ret);
+out:
ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context);
return ret;
diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c
index b70b13484f..13a49d8fd4 100644
--- a/drivers/infiniband/hw/mana/mr.c
+++ b/drivers/infiniband/hw/mana/mr.c
@@ -112,6 +112,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
"start 0x%llx, iova 0x%llx length 0x%llx access_flags 0x%x",
start, iova, length, access_flags);
+ access_flags &= ~IB_ACCESS_OPTIONAL;
if (access_flags & ~VALID_MR_FLAGS)
return ERR_PTR(-EINVAL);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index c2b557e642..9fb8a54423 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3760,10 +3760,10 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
spin_lock_init(&dev->dm.lock);
dev->dm.dev = mdev;
return 0;
-err:
- mlx5r_macsec_dealloc_gids(dev);
err_mp:
mlx5_ib_cleanup_multiport_master(dev);
+err:
+ mlx5r_macsec_dealloc_gids(dev);
return err;
}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 79ebafecca..f255a12e26 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1378,7 +1378,6 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props);
void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
u64 access_flags);
-void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev);
void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index ecc111ed5d..d3c1f63791 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -246,6 +246,7 @@ static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3);
MLX5_SET(mkc, mkc, access_mode_4_2,
(ent->rb_key.access_mode >> 2) & 0x7);
+ MLX5_SET(mkc, mkc, ma_translation_mode, !!ent->rb_key.ats);
MLX5_SET(mkc, mkc, translations_octword_size,
get_mkc_octo_size(ent->rb_key.access_mode,
@@ -641,10 +642,8 @@ static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache,
new = &((*new)->rb_left);
if (cmp < 0)
new = &((*new)->rb_right);
- if (cmp == 0) {
- mutex_unlock(&cache->rb_lock);
+ if (cmp == 0)
return -EEXIST;
- }
}
/* Add new node and rebalance tree. */
@@ -719,6 +718,8 @@ static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
}
mr->mmkey.cache_ent = ent;
mr->mmkey.type = MLX5_MKEY_MR;
+ mr->mmkey.rb_key = ent->rb_key;
+ mr->mmkey.cacheable = true;
init_waitqueue_head(&mr->mmkey.wait);
return mr;
}
@@ -1169,7 +1170,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
mr->ibmr.pd = pd;
mr->umem = umem;
mr->page_shift = order_base_2(page_size);
- mr->mmkey.cacheable = true;
set_mr_fields(dev, mr, umem->length, access_flags, iova);
return mr;
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index a056ea835d..84be0c3d56 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -199,17 +199,20 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq,
int err;
struct mlx5_srq_attr in = {};
__u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+ __u32 max_sge_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq) /
+ sizeof(struct mlx5_wqe_data_seg);
if (init_attr->srq_type != IB_SRQT_BASIC &&
init_attr->srq_type != IB_SRQT_XRC &&
init_attr->srq_type != IB_SRQT_TM)
return -EOPNOTSUPP;
- /* Sanity check SRQ size before proceeding */
- if (init_attr->attr.max_wr >= max_srq_wqes) {
- mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
- init_attr->attr.max_wr,
- max_srq_wqes);
+ /* Sanity check SRQ and sge size before proceeding */
+ if (init_attr->attr.max_wr >= max_srq_wqes ||
+ init_attr->attr.max_sge > max_sge_sz) {
+ mlx5_ib_dbg(dev, "max_wr %d,wr_cap %d,max_sge %d, sge_cap:%d\n",
+ init_attr->attr.max_wr, max_srq_wqes,
+ init_attr->attr.max_sge, max_sge_sz);
return -EINVAL;
}