summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/hns
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--drivers/infiniband/hw/hns/Kconfig21
-rw-r--r--drivers/infiniband/hw/hns/Makefile15
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c118
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c186
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.c276
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.h155
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_common.h199
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c533
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_db.c180
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h1239
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c1486
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h170
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c7103
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h1482
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c1040
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c1052
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_pd.c170
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c1563
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_restrack.c231
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c465
20 files changed, 17684 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig
new file mode 100644
index 000000000..ab3fbba70
--- /dev/null
+++ b/drivers/infiniband/hw/hns/Kconfig
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config INFINIBAND_HNS
+ tristate "HNS RoCE Driver"
+ depends on NET_VENDOR_HISILICON
+ depends on ARM64 || (COMPILE_TEST && 64BIT)
+ depends on (HNS_DSAF && HNS_ENET) || HNS3
+ help
+ This is a RoCE/RDMA driver for the Hisilicon RoCE engine.
+
+ To compile HIP08 driver as module, choose M here.
+
+config INFINIBAND_HNS_HIP08
+ bool "Hisilicon Hip08 Family RoCE support"
+ depends on INFINIBAND_HNS && PCI && HNS3
+ depends on INFINIBAND_HNS=m || HNS3=y
+ help
+ RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC.
+ The RoCE engine is a PCI device.
+
+ To compile this driver, choose Y here: if INFINIBAND_HNS is m, this
+ module will be called hns-roce-hw-v2.
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile
new file mode 100644
index 000000000..a7d259238
--- /dev/null
+++ b/drivers/infiniband/hw/hns/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for the Hisilicon RoCE drivers.
+#
+
+ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
+
+hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
+ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
+ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o
+
+ifdef CONFIG_INFINIBAND_HNS_HIP08
+hns-roce-hw-v2-objs := hns_roce_hw_v2.o $(hns-roce-objs)
+obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o
+endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
new file mode 100644
index 000000000..103a7787b
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2016 Hisilicon Limited.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/pci.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
+#include "hnae3.h"
+#include "hns_roce_device.h"
+#include "hns_roce_hw_v2.h"
+
+static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr)
+{
+ u32 fl = ah_attr->grh.flow_label;
+ u16 sport;
+
+ if (!fl)
+ sport = prandom_u32_max(IB_ROCE_UDP_ENCAP_VALID_PORT_MAX + 1 -
+ IB_ROCE_UDP_ENCAP_VALID_PORT_MIN) +
+ IB_ROCE_UDP_ENCAP_VALID_PORT_MIN;
+ else
+ sport = rdma_flow_label_to_udp_sport(fl);
+
+ return sport;
+}
+
+int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device);
+ struct hns_roce_ah *ah = to_hr_ah(ibah);
+ int ret = 0;
+ u32 max_sl;
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && udata)
+ return -EOPNOTSUPP;
+
+ ah->av.port = rdma_ah_get_port_num(ah_attr);
+ ah->av.gid_index = grh->sgid_index;
+
+ if (rdma_ah_get_static_rate(ah_attr))
+ ah->av.stat_rate = IB_RATE_10_GBPS;
+
+ ah->av.hop_limit = grh->hop_limit;
+ ah->av.flowlabel = grh->flow_label;
+ ah->av.udp_sport = get_ah_udp_sport(ah_attr);
+ ah->av.tclass = get_tclass(grh);
+
+ ah->av.sl = rdma_ah_get_sl(ah_attr);
+ max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1);
+ if (unlikely(ah->av.sl > max_sl)) {
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to set sl, sl (%u) shouldn't be larger than %u.\n",
+ ah->av.sl, max_sl);
+ return -EINVAL;
+ }
+
+ memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE);
+ memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
+
+ /* HIP08 needs to record vlan info in Address Vector */
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr,
+ &ah->av.vlan_id, NULL);
+ if (ret)
+ return ret;
+
+ ah->av.vlan_en = ah->av.vlan_id < VLAN_N_VID;
+ }
+
+ return ret;
+}
+
+int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
+{
+ struct hns_roce_ah *ah = to_hr_ah(ibah);
+
+ memset(ah_attr, 0, sizeof(*ah_attr));
+
+ rdma_ah_set_sl(ah_attr, ah->av.sl);
+ rdma_ah_set_port_num(ah_attr, ah->av.port);
+ rdma_ah_set_static_rate(ah_attr, ah->av.stat_rate);
+ rdma_ah_set_grh(ah_attr, NULL, ah->av.flowlabel,
+ ah->av.gid_index, ah->av.hop_limit, ah->av.tclass);
+ rdma_ah_set_dgid_raw(ah_attr, ah->av.dgid);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
new file mode 100644
index 000000000..11a78ceae
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2016 Hisilicon Limited.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/vmalloc.h>
+#include <rdma/ib_umem.h>
+#include "hns_roce_device.h"
+
+void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf)
+{
+ struct hns_roce_buf_list *trunks;
+ u32 i;
+
+ if (!buf)
+ return;
+
+ trunks = buf->trunk_list;
+ if (trunks) {
+ buf->trunk_list = NULL;
+ for (i = 0; i < buf->ntrunks; i++)
+ dma_free_coherent(hr_dev->dev, 1 << buf->trunk_shift,
+ trunks[i].buf, trunks[i].map);
+
+ kfree(trunks);
+ }
+
+ kfree(buf);
+}
+
+/*
+ * Allocate the dma buffer for storing ROCEE table entries
+ *
+ * @size: required size
+ * @page_shift: the unit size in a continuous dma address range
+ * @flags: HNS_ROCE_BUF_ flags to control the allocation flow.
+ */
+struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
+ u32 page_shift, u32 flags)
+{
+ u32 trunk_size, page_size, alloced_size;
+ struct hns_roce_buf_list *trunks;
+ struct hns_roce_buf *buf;
+ gfp_t gfp_flags;
+ u32 ntrunk, i;
+
+ /* The minimum shift of the page accessed by hw is HNS_HW_PAGE_SHIFT */
+ if (WARN_ON(page_shift < HNS_HW_PAGE_SHIFT))
+ return ERR_PTR(-EINVAL);
+
+ gfp_flags = (flags & HNS_ROCE_BUF_NOSLEEP) ? GFP_ATOMIC : GFP_KERNEL;
+ buf = kzalloc(sizeof(*buf), gfp_flags);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ buf->page_shift = page_shift;
+ page_size = 1 << buf->page_shift;
+
+ /* Calc the trunk size and num by required size and page_shift */
+ if (flags & HNS_ROCE_BUF_DIRECT) {
+ buf->trunk_shift = order_base_2(ALIGN(size, PAGE_SIZE));
+ ntrunk = 1;
+ } else {
+ buf->trunk_shift = order_base_2(ALIGN(page_size, PAGE_SIZE));
+ ntrunk = DIV_ROUND_UP(size, 1 << buf->trunk_shift);
+ }
+
+ trunks = kcalloc(ntrunk, sizeof(*trunks), gfp_flags);
+ if (!trunks) {
+ kfree(buf);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ trunk_size = 1 << buf->trunk_shift;
+ alloced_size = 0;
+ for (i = 0; i < ntrunk; i++) {
+ trunks[i].buf = dma_alloc_coherent(hr_dev->dev, trunk_size,
+ &trunks[i].map, gfp_flags);
+ if (!trunks[i].buf)
+ break;
+
+ alloced_size += trunk_size;
+ }
+
+ buf->ntrunks = i;
+
+ /* In nofail mode, it's only failed when the alloced size is 0 */
+ if ((flags & HNS_ROCE_BUF_NOFAIL) ? i == 0 : i != ntrunk) {
+ for (i = 0; i < buf->ntrunks; i++)
+ dma_free_coherent(hr_dev->dev, trunk_size,
+ trunks[i].buf, trunks[i].map);
+
+ kfree(trunks);
+ kfree(buf);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ buf->npages = DIV_ROUND_UP(alloced_size, page_size);
+ buf->trunk_list = trunks;
+
+ return buf;
+}
+
+int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
+ int buf_cnt, struct hns_roce_buf *buf,
+ unsigned int page_shift)
+{
+ unsigned int offset, max_size;
+ int total = 0;
+ int i;
+
+ if (page_shift > buf->trunk_shift) {
+ dev_err(hr_dev->dev, "failed to check kmem buf shift %u > %u\n",
+ page_shift, buf->trunk_shift);
+ return -EINVAL;
+ }
+
+ offset = 0;
+ max_size = buf->ntrunks << buf->trunk_shift;
+ for (i = 0; i < buf_cnt && offset < max_size; i++) {
+ bufs[total++] = hns_roce_buf_dma_addr(buf, offset);
+ offset += (1 << page_shift);
+ }
+
+ return total;
+}
+
+int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
+ int buf_cnt, struct ib_umem *umem,
+ unsigned int page_shift)
+{
+ struct ib_block_iter biter;
+ int total = 0;
+
+ /* convert system page cnt to hw page cnt */
+ rdma_umem_for_each_dma_block(umem, &biter, 1 << page_shift) {
+ bufs[total++] = rdma_block_iter_dma_address(&biter);
+ if (total >= buf_cnt)
+ goto done;
+ }
+
+done:
+ return total;
+}
+
+void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev)
+{
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+ ida_destroy(&hr_dev->xrcd_ida.ida);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+ ida_destroy(&hr_dev->srq_table.srq_ida.ida);
+ hns_roce_cleanup_qp_table(hr_dev);
+ hns_roce_cleanup_cq_table(hr_dev);
+ ida_destroy(&hr_dev->mr_table.mtpt_ida.ida);
+ ida_destroy(&hr_dev->pd_ida.ida);
+ ida_destroy(&hr_dev->uar_ida.ida);
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
new file mode 100644
index 000000000..864413607
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -0,0 +1,276 @@
+/*
+ * Copyright (c) 2016 Hisilicon Limited.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/dmapool.h>
+#include "hns_roce_common.h"
+#include "hns_roce_device.h"
+#include "hns_roce_cmd.h"
+
+#define CMD_POLL_TOKEN 0xffff
+#define CMD_MAX_NUM 32
+
+static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
+{
+ return hr_dev->hw->post_mbox(hr_dev, mbox_msg);
+}
+
+/* this should be called with "poll_sem" */
+static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
+{
+ int ret;
+
+ ret = hns_roce_cmd_mbox_post_hw(hr_dev, mbox_msg);
+ if (ret) {
+ dev_err_ratelimited(hr_dev->dev,
+ "failed to post mailbox 0x%x in poll mode, ret = %d.\n",
+ mbox_msg->cmd, ret);
+ return ret;
+ }
+
+ return hr_dev->hw->poll_mbox_done(hr_dev);
+}
+
+static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
+{
+ int ret;
+
+ down(&hr_dev->cmd.poll_sem);
+ ret = __hns_roce_cmd_mbox_poll(hr_dev, mbox_msg);
+ up(&hr_dev->cmd.poll_sem);
+
+ return ret;
+}
+
+void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
+ u64 out_param)
+{
+ struct hns_roce_cmd_context *context =
+ &hr_dev->cmd.context[token % hr_dev->cmd.max_cmds];
+
+ if (unlikely(token != context->token)) {
+ dev_err_ratelimited(hr_dev->dev,
+ "[cmd] invalid ae token 0x%x, context token is 0x%x.\n",
+ token, context->token);
+ return;
+ }
+
+ context->result = (status == HNS_ROCE_CMD_SUCCESS) ? 0 : (-EIO);
+ context->out_param = out_param;
+ complete(&context->done);
+}
+
+static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
+{
+ struct hns_roce_cmdq *cmd = &hr_dev->cmd;
+ struct hns_roce_cmd_context *context;
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ spin_lock(&cmd->context_lock);
+
+ do {
+ context = &cmd->context[cmd->free_head];
+ cmd->free_head = context->next;
+ } while (context->busy);
+
+ context->busy = 1;
+ context->token += cmd->max_cmds;
+
+ spin_unlock(&cmd->context_lock);
+
+ reinit_completion(&context->done);
+
+ mbox_msg->token = context->token;
+ ret = hns_roce_cmd_mbox_post_hw(hr_dev, mbox_msg);
+ if (ret) {
+ dev_err_ratelimited(dev,
+ "failed to post mailbox 0x%x in event mode, ret = %d.\n",
+ mbox_msg->cmd, ret);
+ goto out;
+ }
+
+ if (!wait_for_completion_timeout(&context->done,
+ msecs_to_jiffies(HNS_ROCE_CMD_TIMEOUT_MSECS))) {
+ dev_err_ratelimited(dev, "[cmd] token 0x%x mailbox 0x%x timeout.\n",
+ context->token, mbox_msg->cmd);
+ ret = -EBUSY;
+ goto out;
+ }
+
+ ret = context->result;
+ if (ret)
+ dev_err_ratelimited(dev, "[cmd] token 0x%x mailbox 0x%x error %d.\n",
+ context->token, mbox_msg->cmd, ret);
+
+out:
+ context->busy = 0;
+ return ret;
+}
+
+static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
+{
+ int ret;
+
+ down(&hr_dev->cmd.event_sem);
+ ret = __hns_roce_cmd_mbox_wait(hr_dev, mbox_msg);
+ up(&hr_dev->cmd.event_sem);
+
+ return ret;
+}
+
+int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
+ u8 cmd, unsigned long tag)
+{
+ struct hns_roce_mbox_msg mbox_msg = {};
+ bool is_busy;
+
+ if (hr_dev->hw->chk_mbox_avail)
+ if (!hr_dev->hw->chk_mbox_avail(hr_dev, &is_busy))
+ return is_busy ? -EBUSY : 0;
+
+ mbox_msg.in_param = in_param;
+ mbox_msg.out_param = out_param;
+ mbox_msg.cmd = cmd;
+ mbox_msg.tag = tag;
+
+ if (hr_dev->cmd.use_events) {
+ mbox_msg.event_en = 1;
+
+ return hns_roce_cmd_mbox_wait(hr_dev, &mbox_msg);
+ } else {
+ mbox_msg.event_en = 0;
+ mbox_msg.token = CMD_POLL_TOKEN;
+
+ return hns_roce_cmd_mbox_poll(hr_dev, &mbox_msg);
+ }
+}
+
+int hns_roce_cmd_init(struct hns_roce_dev *hr_dev)
+{
+ sema_init(&hr_dev->cmd.poll_sem, 1);
+ hr_dev->cmd.use_events = 0;
+ hr_dev->cmd.max_cmds = CMD_MAX_NUM;
+ hr_dev->cmd.pool = dma_pool_create("hns_roce_cmd", hr_dev->dev,
+ HNS_ROCE_MAILBOX_SIZE,
+ HNS_ROCE_MAILBOX_SIZE, 0);
+ if (!hr_dev->cmd.pool)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void hns_roce_cmd_cleanup(struct hns_roce_dev *hr_dev)
+{
+ dma_pool_destroy(hr_dev->cmd.pool);
+}
+
+int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd;
+ int i;
+
+ hr_cmd->context =
+ kcalloc(hr_cmd->max_cmds, sizeof(*hr_cmd->context), GFP_KERNEL);
+ if (!hr_cmd->context) {
+ hr_dev->cmd_mod = 0;
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < hr_cmd->max_cmds; ++i) {
+ hr_cmd->context[i].token = i;
+ hr_cmd->context[i].next = i + 1;
+ init_completion(&hr_cmd->context[i].done);
+ }
+ hr_cmd->context[hr_cmd->max_cmds - 1].next = 0;
+ hr_cmd->free_head = 0;
+
+ sema_init(&hr_cmd->event_sem, hr_cmd->max_cmds);
+ spin_lock_init(&hr_cmd->context_lock);
+
+ hr_cmd->use_events = 1;
+
+ return 0;
+}
+
+void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd;
+
+ kfree(hr_cmd->context);
+ hr_cmd->use_events = 0;
+}
+
+struct hns_roce_cmd_mailbox *
+hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmd_mailbox *mailbox;
+
+ mailbox = kmalloc(sizeof(*mailbox), GFP_KERNEL);
+ if (!mailbox)
+ return ERR_PTR(-ENOMEM);
+
+ mailbox->buf =
+ dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL, &mailbox->dma);
+ if (!mailbox->buf) {
+ kfree(mailbox);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return mailbox;
+}
+
+void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmd_mailbox *mailbox)
+{
+ if (!mailbox)
+ return;
+
+ dma_pool_free(hr_dev->cmd.pool, mailbox->buf, mailbox->dma);
+ kfree(mailbox);
+}
+
+int hns_roce_create_hw_ctx(struct hns_roce_dev *dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ u8 cmd, unsigned long idx)
+{
+ return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cmd, idx);
+}
+
+int hns_roce_destroy_hw_ctx(struct hns_roce_dev *dev, u8 cmd, unsigned long idx)
+{
+ return hns_roce_cmd_mbox(dev, 0, 0, cmd, idx);
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
new file mode 100644
index 000000000..052a3d609
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2016 Hisilicon Limited.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _HNS_ROCE_CMD_H
+#define _HNS_ROCE_CMD_H
+
+#define HNS_ROCE_MAILBOX_SIZE 4096
+#define HNS_ROCE_CMD_TIMEOUT_MSECS 10000
+
+enum {
+ /* QPC BT commands */
+ HNS_ROCE_CMD_WRITE_QPC_BT0 = 0x0,
+ HNS_ROCE_CMD_WRITE_QPC_BT1 = 0x1,
+ HNS_ROCE_CMD_WRITE_QPC_BT2 = 0x2,
+ HNS_ROCE_CMD_READ_QPC_BT0 = 0x4,
+ HNS_ROCE_CMD_READ_QPC_BT1 = 0x5,
+ HNS_ROCE_CMD_READ_QPC_BT2 = 0x6,
+ HNS_ROCE_CMD_DESTROY_QPC_BT0 = 0x8,
+ HNS_ROCE_CMD_DESTROY_QPC_BT1 = 0x9,
+ HNS_ROCE_CMD_DESTROY_QPC_BT2 = 0xa,
+
+ /* QPC operation */
+ HNS_ROCE_CMD_MODIFY_QPC = 0x41,
+ HNS_ROCE_CMD_QUERY_QPC = 0x42,
+
+ HNS_ROCE_CMD_MODIFY_CQC = 0x52,
+ HNS_ROCE_CMD_QUERY_CQC = 0x53,
+ /* CQC BT commands */
+ HNS_ROCE_CMD_WRITE_CQC_BT0 = 0x10,
+ HNS_ROCE_CMD_WRITE_CQC_BT1 = 0x11,
+ HNS_ROCE_CMD_WRITE_CQC_BT2 = 0x12,
+ HNS_ROCE_CMD_READ_CQC_BT0 = 0x14,
+ HNS_ROCE_CMD_READ_CQC_BT1 = 0x15,
+ HNS_ROCE_CMD_READ_CQC_BT2 = 0x1b,
+ HNS_ROCE_CMD_DESTROY_CQC_BT0 = 0x18,
+ HNS_ROCE_CMD_DESTROY_CQC_BT1 = 0x19,
+ HNS_ROCE_CMD_DESTROY_CQC_BT2 = 0x1a,
+
+ /* MPT BT commands */
+ HNS_ROCE_CMD_WRITE_MPT_BT0 = 0x20,
+ HNS_ROCE_CMD_WRITE_MPT_BT1 = 0x21,
+ HNS_ROCE_CMD_WRITE_MPT_BT2 = 0x22,
+ HNS_ROCE_CMD_READ_MPT_BT0 = 0x24,
+ HNS_ROCE_CMD_READ_MPT_BT1 = 0x25,
+ HNS_ROCE_CMD_READ_MPT_BT2 = 0x26,
+ HNS_ROCE_CMD_DESTROY_MPT_BT0 = 0x28,
+ HNS_ROCE_CMD_DESTROY_MPT_BT1 = 0x29,
+ HNS_ROCE_CMD_DESTROY_MPT_BT2 = 0x2a,
+
+ /* CQC TIMER commands */
+ HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0 = 0x23,
+ HNS_ROCE_CMD_READ_CQC_TIMER_BT0 = 0x27,
+
+ /* MPT commands */
+ HNS_ROCE_CMD_QUERY_MPT = 0x62,
+
+ /* SRQC BT commands */
+ HNS_ROCE_CMD_WRITE_SRQC_BT0 = 0x30,
+ HNS_ROCE_CMD_WRITE_SRQC_BT1 = 0x31,
+ HNS_ROCE_CMD_WRITE_SRQC_BT2 = 0x32,
+ HNS_ROCE_CMD_READ_SRQC_BT0 = 0x34,
+ HNS_ROCE_CMD_READ_SRQC_BT1 = 0x35,
+ HNS_ROCE_CMD_READ_SRQC_BT2 = 0x36,
+ HNS_ROCE_CMD_DESTROY_SRQC_BT0 = 0x38,
+ HNS_ROCE_CMD_DESTROY_SRQC_BT1 = 0x39,
+ HNS_ROCE_CMD_DESTROY_SRQC_BT2 = 0x3a,
+
+ /* QPC TIMER commands */
+ HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0 = 0x33,
+ HNS_ROCE_CMD_READ_QPC_TIMER_BT0 = 0x37,
+
+ /* EQC commands */
+ HNS_ROCE_CMD_CREATE_AEQC = 0x80,
+ HNS_ROCE_CMD_MODIFY_AEQC = 0x81,
+ HNS_ROCE_CMD_QUERY_AEQC = 0x82,
+ HNS_ROCE_CMD_DESTROY_AEQC = 0x83,
+ HNS_ROCE_CMD_CREATE_CEQC = 0x90,
+ HNS_ROCE_CMD_MODIFY_CEQC = 0x91,
+ HNS_ROCE_CMD_QUERY_CEQC = 0x92,
+ HNS_ROCE_CMD_DESTROY_CEQC = 0x93,
+
+ /* SCC CTX BT commands */
+ HNS_ROCE_CMD_READ_SCCC_BT0 = 0xa4,
+ HNS_ROCE_CMD_WRITE_SCCC_BT0 = 0xa5,
+};
+
+enum {
+ /* TPT commands */
+ HNS_ROCE_CMD_CREATE_MPT = 0xd,
+ HNS_ROCE_CMD_DESTROY_MPT = 0xf,
+
+ /* CQ commands */
+ HNS_ROCE_CMD_CREATE_CQC = 0x16,
+ HNS_ROCE_CMD_DESTROY_CQC = 0x17,
+
+ /* QP/EE commands */
+ HNS_ROCE_CMD_RST2INIT_QP = 0x19,
+ HNS_ROCE_CMD_INIT2RTR_QP = 0x1a,
+ HNS_ROCE_CMD_RTR2RTS_QP = 0x1b,
+ HNS_ROCE_CMD_RTS2RTS_QP = 0x1c,
+ HNS_ROCE_CMD_2ERR_QP = 0x1e,
+ HNS_ROCE_CMD_RTS2SQD_QP = 0x1f,
+ HNS_ROCE_CMD_SQD2RTS_QP = 0x20,
+ HNS_ROCE_CMD_2RST_QP = 0x21,
+ HNS_ROCE_CMD_QUERY_QP = 0x22,
+ HNS_ROCE_CMD_SQD2SQD_QP = 0x38,
+ HNS_ROCE_CMD_CREATE_SRQ = 0x70,
+ HNS_ROCE_CMD_MODIFY_SRQC = 0x72,
+ HNS_ROCE_CMD_QUERY_SRQC = 0x73,
+ HNS_ROCE_CMD_DESTROY_SRQ = 0x74,
+};
+
+int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
+ u8 cmd, unsigned long tag);
+
+struct hns_roce_cmd_mailbox *
+hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev);
+void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmd_mailbox *mailbox);
+int hns_roce_create_hw_ctx(struct hns_roce_dev *dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ u8 cmd, unsigned long idx);
+int hns_roce_destroy_hw_ctx(struct hns_roce_dev *dev, u8 cmd,
+ unsigned long idx);
+
+#endif /* _HNS_ROCE_CMD_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
new file mode 100644
index 000000000..465d1f914
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2016 Hisilicon Limited.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _HNS_ROCE_COMMON_H
+#define _HNS_ROCE_COMMON_H
+#include <linux/bitfield.h>
+
+#define roce_write(dev, reg, val) writel((val), (dev)->reg_base + (reg))
+#define roce_read(dev, reg) readl((dev)->reg_base + (reg))
+#define roce_raw_write(value, addr) \
+ __raw_writel((__force u32)cpu_to_le32(value), (addr))
+
+#define roce_get_field(origin, mask, shift) \
+ ((le32_to_cpu(origin) & (mask)) >> (u32)(shift))
+
+#define roce_get_bit(origin, shift) \
+ roce_get_field((origin), (1ul << (shift)), (shift))
+
+#define roce_set_field(origin, mask, shift, val) \
+ do { \
+ (origin) &= ~cpu_to_le32(mask); \
+ (origin) |= \
+ cpu_to_le32(((u32)(val) << (u32)(shift)) & (mask)); \
+ } while (0)
+
+#define roce_set_bit(origin, shift, val) \
+ roce_set_field((origin), (1ul << (shift)), (shift), (val))
+
+#define FIELD_LOC(field_type, field_h, field_l) field_type, field_h, field_l
+
+#define _hr_reg_enable(ptr, field_type, field_h, field_l) \
+ ({ \
+ const field_type *_ptr = ptr; \
+ *((__le32 *)_ptr + (field_h) / 32) |= cpu_to_le32( \
+ BIT((field_l) % 32) + \
+ BUILD_BUG_ON_ZERO((field_h) != (field_l))); \
+ })
+
+#define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field)
+
+#define _hr_reg_clear(ptr, field_type, field_h, field_l) \
+ ({ \
+ const field_type *_ptr = ptr; \
+ BUILD_BUG_ON(((field_h) / 32) != ((field_l) / 32)); \
+ *((__le32 *)_ptr + (field_h) / 32) &= \
+ ~cpu_to_le32(GENMASK((field_h) % 32, (field_l) % 32)); \
+ })
+
+#define hr_reg_clear(ptr, field) _hr_reg_clear(ptr, field)
+
+#define _hr_reg_write_bool(ptr, field_type, field_h, field_l, val) \
+ ({ \
+ (val) ? _hr_reg_enable(ptr, field_type, field_h, field_l) : \
+ _hr_reg_clear(ptr, field_type, field_h, field_l); \
+ })
+
+#define hr_reg_write_bool(ptr, field, val) _hr_reg_write_bool(ptr, field, val)
+
+#define _hr_reg_write(ptr, field_type, field_h, field_l, val) \
+ ({ \
+ _hr_reg_clear(ptr, field_type, field_h, field_l); \
+ *((__le32 *)ptr + (field_h) / 32) |= cpu_to_le32(FIELD_PREP( \
+ GENMASK((field_h) % 32, (field_l) % 32), val)); \
+ })
+
+#define hr_reg_write(ptr, field, val) _hr_reg_write(ptr, field, val)
+
+#define _hr_reg_read(ptr, field_type, field_h, field_l) \
+ ({ \
+ const field_type *_ptr = ptr; \
+ BUILD_BUG_ON(((field_h) / 32) != ((field_l) / 32)); \
+ FIELD_GET(GENMASK((field_h) % 32, (field_l) % 32), \
+ le32_to_cpu(*((__le32 *)_ptr + (field_h) / 32))); \
+ })
+
+#define hr_reg_read(ptr, field) _hr_reg_read(ptr, field)
+
+/*************ROCEE_REG DEFINITION****************/
+#define ROCEE_VENDOR_ID_REG 0x0
+#define ROCEE_VENDOR_PART_ID_REG 0x4
+
+#define ROCEE_SYS_IMAGE_GUID_L_REG 0xC
+#define ROCEE_SYS_IMAGE_GUID_H_REG 0x10
+
+#define ROCEE_PORT_GID_L_0_REG 0x50
+#define ROCEE_PORT_GID_ML_0_REG 0x54
+#define ROCEE_PORT_GID_MH_0_REG 0x58
+#define ROCEE_PORT_GID_H_0_REG 0x5C
+
+#define ROCEE_BT_CMD_H_REG 0x204
+
+#define ROCEE_SMAC_L_0_REG 0x240
+#define ROCEE_SMAC_H_0_REG 0x244
+
+#define ROCEE_QP1C_CFG3_0_REG 0x27C
+
+#define ROCEE_CAEP_AEQE_CONS_IDX_REG 0x3AC
+#define ROCEE_CAEP_CEQC_CONS_IDX_0_REG 0x3BC
+
+#define ROCEE_ECC_UCERR_ALM1_REG 0xB38
+#define ROCEE_ECC_UCERR_ALM2_REG 0xB3C
+#define ROCEE_ECC_CERR_ALM1_REG 0xB44
+#define ROCEE_ECC_CERR_ALM2_REG 0xB48
+
+#define ROCEE_ACK_DELAY_REG 0x14
+#define ROCEE_GLB_CFG_REG 0x18
+
+#define ROCEE_DMAE_USER_CFG1_REG 0x40
+#define ROCEE_DMAE_USER_CFG2_REG 0x44
+
+#define ROCEE_DB_SQ_WL_REG 0x154
+#define ROCEE_DB_OTHERS_WL_REG 0x158
+#define ROCEE_RAQ_WL_REG 0x15C
+#define ROCEE_WRMS_POL_TIME_INTERVAL_REG 0x160
+#define ROCEE_EXT_DB_SQ_REG 0x164
+#define ROCEE_EXT_DB_SQ_H_REG 0x168
+#define ROCEE_EXT_DB_OTH_REG 0x16C
+
+#define ROCEE_EXT_DB_OTH_H_REG 0x170
+#define ROCEE_EXT_DB_SQ_WL_EMPTY_REG 0x174
+#define ROCEE_EXT_DB_SQ_WL_REG 0x178
+#define ROCEE_EXT_DB_OTHERS_WL_EMPTY_REG 0x17C
+#define ROCEE_EXT_DB_OTHERS_WL_REG 0x180
+#define ROCEE_EXT_RAQ_REG 0x184
+#define ROCEE_EXT_RAQ_H_REG 0x188
+
+#define ROCEE_CAEP_CE_INTERVAL_CFG_REG 0x190
+#define ROCEE_CAEP_CE_BURST_NUM_CFG_REG 0x194
+#define ROCEE_BT_CMD_L_REG 0x200
+
+#define ROCEE_MB1_REG 0x210
+#define ROCEE_MB6_REG 0x224
+#define ROCEE_DB_SQ_L_0_REG 0x230
+#define ROCEE_DB_OTHERS_L_0_REG 0x238
+#define ROCEE_QP1C_CFG0_0_REG 0x270
+
+#define ROCEE_CAEP_AEQC_AEQE_SHIFT_REG 0x3A0
+#define ROCEE_CAEP_CEQC_SHIFT_0_REG 0x3B0
+#define ROCEE_CAEP_CE_IRQ_MASK_0_REG 0x3C0
+#define ROCEE_CAEP_CEQ_ALM_OVF_0_REG 0x3C4
+#define ROCEE_CAEP_AE_MASK_REG 0x6C8
+#define ROCEE_CAEP_AE_ST_REG 0x6CC
+
+#define ROCEE_CAEP_CQE_WCMD_EMPTY 0x850
+#define ROCEE_SCAEP_WR_CQE_CNT 0x8D0
+#define ROCEE_ECC_UCERR_ALM0_REG 0xB34
+#define ROCEE_ECC_CERR_ALM0_REG 0xB40
+
+/* V2 ROCEE REG */
+#define ROCEE_TX_CMQ_BASEADDR_L_REG 0x07000
+#define ROCEE_TX_CMQ_BASEADDR_H_REG 0x07004
+#define ROCEE_TX_CMQ_DEPTH_REG 0x07008
+#define ROCEE_TX_CMQ_PI_REG 0x07010
+#define ROCEE_TX_CMQ_CI_REG 0x07014
+
+#define ROCEE_RX_CMQ_BASEADDR_L_REG 0x07018
+#define ROCEE_RX_CMQ_BASEADDR_H_REG 0x0701c
+#define ROCEE_RX_CMQ_DEPTH_REG 0x07020
+#define ROCEE_RX_CMQ_TAIL_REG 0x07024
+#define ROCEE_RX_CMQ_HEAD_REG 0x07028
+
+#define ROCEE_VF_EQ_DB_CFG0_REG 0x238
+#define ROCEE_VF_EQ_DB_CFG1_REG 0x23C
+
+#define ROCEE_VF_ABN_INT_CFG_REG 0x13000
+#define ROCEE_VF_ABN_INT_ST_REG 0x13004
+#define ROCEE_VF_ABN_INT_EN_REG 0x13008
+#define ROCEE_VF_EVENT_INT_EN_REG 0x1300c
+
+#endif /* _HNS_ROCE_COMMON_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
new file mode 100644
index 000000000..736dc2f99
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -0,0 +1,533 @@
+/*
+ * Copyright (c) 2016 Hisilicon Limited.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_umem.h>
+#include <rdma/uverbs_ioctl.h>
+#include "hns_roce_device.h"
+#include "hns_roce_cmd.h"
+#include "hns_roce_hem.h"
+#include "hns_roce_common.h"
+
+static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank)
+{
+ u32 least_load = bank[0].inuse;
+ u8 bankid = 0;
+ u32 bankcnt;
+ u8 i;
+
+ for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) {
+ bankcnt = bank[i].inuse;
+ if (bankcnt < least_load) {
+ least_load = bankcnt;
+ bankid = i;
+ }
+ }
+
+ return bankid;
+}
+
+static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ struct hns_roce_bank *bank;
+ u8 bankid;
+ int id;
+
+ mutex_lock(&cq_table->bank_mutex);
+ bankid = get_least_load_bankid_for_cq(cq_table->bank);
+ bank = &cq_table->bank[bankid];
+
+ id = ida_alloc_range(&bank->ida, bank->min, bank->max, GFP_KERNEL);
+ if (id < 0) {
+ mutex_unlock(&cq_table->bank_mutex);
+ return id;
+ }
+
+ /* the lower 2 bits is bankid */
+ hr_cq->cqn = (id << CQ_BANKID_SHIFT) | bankid;
+ bank->inuse++;
+ mutex_unlock(&cq_table->bank_mutex);
+
+ return 0;
+}
+
+static inline u8 get_cq_bankid(unsigned long cqn)
+{
+ /* The lower 2 bits of CQN are used to hash to different banks */
+ return (u8)(cqn & GENMASK(1, 0));
+}
+
+static void free_cqn(struct hns_roce_dev *hr_dev, unsigned long cqn)
+{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ struct hns_roce_bank *bank;
+
+ bank = &cq_table->bank[get_cq_bankid(cqn)];
+
+ ida_free(&bank->ida, cqn >> CQ_BANKID_SHIFT);
+
+ mutex_lock(&cq_table->bank_mutex);
+ bank->inuse--;
+ mutex_unlock(&cq_table->bank_mutex);
+}
+
+static int hns_roce_create_cqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq,
+ u64 *mtts, dma_addr_t dma_handle)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox)) {
+ ibdev_err(ibdev, "failed to alloc mailbox for CQC.\n");
+ return PTR_ERR(mailbox);
+ }
+
+ hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle);
+
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_CQC,
+ hr_cq->cqn);
+ if (ret)
+ ibdev_err(ibdev,
+ "failed to send create cmd for CQ(0x%lx), ret = %d.\n",
+ hr_cq->cqn, ret);
+
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
+static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u64 mtts[MTT_MIN_COUNT] = {};
+ dma_addr_t dma_handle;
+ int ret;
+
+ ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts),
+ &dma_handle);
+ if (!ret) {
+ ibdev_err(ibdev, "failed to find CQ mtr, ret = %d.\n", ret);
+ return -EINVAL;
+ }
+
+ /* Get CQC memory HEM(Hardware Entry Memory) table */
+ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn);
+ if (ret) {
+ ibdev_err(ibdev, "failed to get CQ(0x%lx) context, ret = %d.\n",
+ hr_cq->cqn, ret);
+ return ret;
+ }
+
+ ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL));
+ if (ret) {
+ ibdev_err(ibdev, "failed to xa_store CQ, ret = %d.\n", ret);
+ goto err_put;
+ }
+
+ ret = hns_roce_create_cqc(hr_dev, hr_cq, mtts, dma_handle);
+ if (ret)
+ goto err_xa;
+
+ return 0;
+
+err_xa:
+ xa_erase(&cq_table->array, hr_cq->cqn);
+err_put:
+ hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
+
+ return ret;
+}
+
+static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC,
+ hr_cq->cqn);
+ if (ret)
+ dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret,
+ hr_cq->cqn);
+
+ xa_erase(&cq_table->array, hr_cq->cqn);
+
+ /* Waiting interrupt process procedure carried out */
+ synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq);
+
+ /* wait for all interrupt processed */
+ if (refcount_dec_and_test(&hr_cq->refcount))
+ complete(&hr_cq->free);
+ wait_for_completion(&hr_cq->free);
+
+ hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
+}
+
+static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
+ struct ib_udata *udata, unsigned long addr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_attr buf_attr = {};
+ int ret;
+
+ buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + PAGE_SHIFT;
+ buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size;
+ buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num;
+ buf_attr.region_count = 1;
+
+ ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr,
+ hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT,
+ udata, addr);
+ if (ret)
+ ibdev_err(ibdev, "failed to alloc CQ mtr, ret = %d.\n", ret);
+
+ return ret;
+}
+
+static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+{
+ hns_roce_mtr_destroy(hr_dev, &hr_cq->mtr);
+}
+
+static int alloc_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
+ struct ib_udata *udata, unsigned long addr,
+ struct hns_roce_ib_create_cq_resp *resp)
+{
+ bool has_db = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB;
+ struct hns_roce_ucontext *uctx;
+ int err;
+
+ if (udata) {
+ if (has_db &&
+ udata->outlen >= offsetofend(typeof(*resp), cap_flags)) {
+ uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext, ibucontext);
+ err = hns_roce_db_map_user(uctx, addr, &hr_cq->db);
+ if (err)
+ return err;
+ hr_cq->flags |= HNS_ROCE_CQ_FLAG_RECORD_DB;
+ resp->cap_flags |= HNS_ROCE_CQ_FLAG_RECORD_DB;
+ }
+ } else {
+ if (has_db) {
+ err = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1);
+ if (err)
+ return err;
+ hr_cq->set_ci_db = hr_cq->db.db_record;
+ *hr_cq->set_ci_db = 0;
+ hr_cq->flags |= HNS_ROCE_CQ_FLAG_RECORD_DB;
+ }
+ hr_cq->db_reg = hr_dev->reg_base + hr_dev->odb_offset +
+ DB_REG_OFFSET * hr_dev->priv_uar.index;
+ }
+
+ return 0;
+}
+
+static void free_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
+ struct ib_udata *udata)
+{
+ struct hns_roce_ucontext *uctx;
+
+ if (!(hr_cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB))
+ return;
+
+ hr_cq->flags &= ~HNS_ROCE_CQ_FLAG_RECORD_DB;
+ if (udata) {
+ uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext,
+ ibucontext);
+ hns_roce_db_unmap_user(uctx, &hr_cq->db);
+ } else {
+ hns_roce_free_db(hr_dev, &hr_cq->db);
+ }
+}
+
+static int verify_cq_create_attr(struct hns_roce_dev *hr_dev,
+ const struct ib_cq_init_attr *attr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+
+ if (!attr->cqe || attr->cqe > hr_dev->caps.max_cqes) {
+ ibdev_err(ibdev, "failed to check CQ count %u, max = %u.\n",
+ attr->cqe, hr_dev->caps.max_cqes);
+ return -EINVAL;
+ }
+
+ if (attr->comp_vector >= hr_dev->caps.num_comp_vectors) {
+ ibdev_err(ibdev, "failed to check CQ vector = %u, max = %d.\n",
+ attr->comp_vector, hr_dev->caps.num_comp_vectors);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int get_cq_ucmd(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
+ struct hns_roce_ib_create_cq *ucmd)
+{
+ struct ib_device *ibdev = hr_cq->ib_cq.device;
+ int ret;
+
+ ret = ib_copy_from_udata(ucmd, udata, min(udata->inlen, sizeof(*ucmd)));
+ if (ret) {
+ ibdev_err(ibdev, "failed to copy CQ udata, ret = %d.\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void set_cq_param(struct hns_roce_cq *hr_cq, u32 cq_entries, int vector,
+ struct hns_roce_ib_create_cq *ucmd)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
+
+ cq_entries = max(cq_entries, hr_dev->caps.min_cqes);
+ cq_entries = roundup_pow_of_two(cq_entries);
+ hr_cq->ib_cq.cqe = cq_entries - 1; /* used as cqe index */
+ hr_cq->cq_depth = cq_entries;
+ hr_cq->vector = vector;
+
+ spin_lock_init(&hr_cq->lock);
+ INIT_LIST_HEAD(&hr_cq->sq_list);
+ INIT_LIST_HEAD(&hr_cq->rq_list);
+}
+
+static int set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
+ struct hns_roce_ib_create_cq *ucmd)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
+
+ if (!udata) {
+ hr_cq->cqe_size = hr_dev->caps.cqe_sz;
+ return 0;
+ }
+
+ if (udata->inlen >= offsetofend(typeof(*ucmd), cqe_size)) {
+ if (ucmd->cqe_size != HNS_ROCE_V2_CQE_SIZE &&
+ ucmd->cqe_size != HNS_ROCE_V3_CQE_SIZE) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid cqe size %u.\n", ucmd->cqe_size);
+ return -EINVAL;
+ }
+
+ hr_cq->cqe_size = ucmd->cqe_size;
+ } else {
+ hr_cq->cqe_size = HNS_ROCE_V2_CQE_SIZE;
+ }
+
+ return 0;
+}
+
+int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
+ struct hns_roce_ib_create_cq_resp resp = {};
+ struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_ib_create_cq ucmd = {};
+ int ret;
+
+ if (attr->flags)
+ return -EOPNOTSUPP;
+
+ ret = verify_cq_create_attr(hr_dev, attr);
+ if (ret)
+ return ret;
+
+ if (udata) {
+ ret = get_cq_ucmd(hr_cq, udata, &ucmd);
+ if (ret)
+ return ret;
+ }
+
+ set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd);
+
+ ret = set_cqe_size(hr_cq, udata, &ucmd);
+ if (ret)
+ return ret;
+
+ ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc CQ db, ret = %d.\n", ret);
+ goto err_cq_buf;
+ }
+
+ ret = alloc_cqn(hr_dev, hr_cq);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret);
+ goto err_cq_db;
+ }
+
+ ret = alloc_cqc(hr_dev, hr_cq);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to alloc CQ context, ret = %d.\n", ret);
+ goto err_cqn;
+ }
+
+ if (udata) {
+ resp.cqn = hr_cq->cqn;
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
+ if (ret)
+ goto err_cqc;
+ }
+
+ hr_cq->cons_index = 0;
+ hr_cq->arm_sn = 1;
+ refcount_set(&hr_cq->refcount, 1);
+ init_completion(&hr_cq->free);
+
+ return 0;
+
+err_cqc:
+ free_cqc(hr_dev, hr_cq);
+err_cqn:
+ free_cqn(hr_dev, hr_cq->cqn);
+err_cq_db:
+ free_cq_db(hr_dev, hr_cq, udata);
+err_cq_buf:
+ free_cq_buf(hr_dev, hr_cq);
+ return ret;
+}
+
+int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
+ struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+
+ free_cqc(hr_dev, hr_cq);
+ free_cqn(hr_dev, hr_cq->cqn);
+ free_cq_db(hr_dev, hr_cq, udata);
+ free_cq_buf(hr_dev, hr_cq);
+
+ return 0;
+}
+
+void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn)
+{
+ struct hns_roce_cq *hr_cq;
+ struct ib_cq *ibcq;
+
+ hr_cq = xa_load(&hr_dev->cq_table.array,
+ cqn & (hr_dev->caps.num_cqs - 1));
+ if (!hr_cq) {
+ dev_warn(hr_dev->dev, "completion event for bogus CQ 0x%06x\n",
+ cqn);
+ return;
+ }
+
+ ++hr_cq->arm_sn;
+ ibcq = &hr_cq->ib_cq;
+ if (ibcq->comp_handler)
+ ibcq->comp_handler(ibcq, ibcq->cq_context);
+}
+
+void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
+{
+ struct device *dev = hr_dev->dev;
+ struct hns_roce_cq *hr_cq;
+ struct ib_event event;
+ struct ib_cq *ibcq;
+
+ hr_cq = xa_load(&hr_dev->cq_table.array,
+ cqn & (hr_dev->caps.num_cqs - 1));
+ if (!hr_cq) {
+ dev_warn(dev, "async event for bogus CQ 0x%06x\n", cqn);
+ return;
+ }
+
+ if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID &&
+ event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR &&
+ event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) {
+ dev_err(dev, "unexpected event type 0x%x on CQ 0x%06x\n",
+ event_type, cqn);
+ return;
+ }
+
+ refcount_inc(&hr_cq->refcount);
+
+ ibcq = &hr_cq->ib_cq;
+ if (ibcq->event_handler) {
+ event.device = ibcq->device;
+ event.element.cq = ibcq;
+ event.event = IB_EVENT_CQ_ERR;
+ ibcq->event_handler(&event, ibcq->cq_context);
+ }
+
+ if (refcount_dec_and_test(&hr_cq->refcount))
+ complete(&hr_cq->free);
+}
+
+void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ unsigned int reserved_from_bot;
+ unsigned int i;
+
+ mutex_init(&cq_table->bank_mutex);
+ xa_init(&cq_table->array);
+
+ reserved_from_bot = hr_dev->caps.reserved_cqs;
+
+ for (i = 0; i < reserved_from_bot; i++) {
+ cq_table->bank[get_cq_bankid(i)].inuse++;
+ cq_table->bank[get_cq_bankid(i)].min++;
+ }
+
+ for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) {
+ ida_init(&cq_table->bank[i].ida);
+ cq_table->bank[i].max = hr_dev->caps.num_cqs /
+ HNS_ROCE_CQ_BANK_NUM - 1;
+ }
+}
+
+void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev)
+{
+ int i;
+
+ for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++)
+ ida_destroy(&hr_dev->cq_table.bank[i].ida);
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c
new file mode 100644
index 000000000..5c4c04808
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_db.c
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2017 Hisilicon Limited.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ */
+
+#include <rdma/ib_umem.h>
+#include "hns_roce_device.h"
+
+int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
+ struct hns_roce_db *db)
+{
+ unsigned long page_addr = virt & PAGE_MASK;
+ struct hns_roce_user_db_page *page;
+ unsigned int offset;
+ int ret = 0;
+
+ mutex_lock(&context->page_mutex);
+
+ list_for_each_entry(page, &context->page_list, list)
+ if (page->user_virt == page_addr)
+ goto found;
+
+ page = kmalloc(sizeof(*page), GFP_KERNEL);
+ if (!page) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ refcount_set(&page->refcount, 1);
+ page->user_virt = page_addr;
+ page->umem = ib_umem_get(context->ibucontext.device, page_addr,
+ PAGE_SIZE, 0);
+ if (IS_ERR(page->umem)) {
+ ret = PTR_ERR(page->umem);
+ kfree(page);
+ goto out;
+ }
+
+ list_add(&page->list, &context->page_list);
+
+found:
+ offset = virt - page_addr;
+ db->dma = sg_dma_address(page->umem->sgt_append.sgt.sgl) + offset;
+ db->virt_addr = sg_virt(page->umem->sgt_append.sgt.sgl) + offset;
+ db->u.user_page = page;
+ refcount_inc(&page->refcount);
+
+out:
+ mutex_unlock(&context->page_mutex);
+
+ return ret;
+}
+
+void hns_roce_db_unmap_user(struct hns_roce_ucontext *context,
+ struct hns_roce_db *db)
+{
+ mutex_lock(&context->page_mutex);
+
+ refcount_dec(&db->u.user_page->refcount);
+ if (refcount_dec_if_one(&db->u.user_page->refcount)) {
+ list_del(&db->u.user_page->list);
+ ib_umem_release(db->u.user_page->umem);
+ kfree(db->u.user_page);
+ }
+
+ mutex_unlock(&context->page_mutex);
+}
+
+static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir(
+ struct device *dma_device)
+{
+ struct hns_roce_db_pgdir *pgdir;
+
+ pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL);
+ if (!pgdir)
+ return NULL;
+
+ bitmap_fill(pgdir->order1,
+ HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT);
+ pgdir->bits[0] = pgdir->order0;
+ pgdir->bits[1] = pgdir->order1;
+ pgdir->page = dma_alloc_coherent(dma_device, PAGE_SIZE,
+ &pgdir->db_dma, GFP_KERNEL);
+ if (!pgdir->page) {
+ kfree(pgdir);
+ return NULL;
+ }
+
+ return pgdir;
+}
+
+static int hns_roce_alloc_db_from_pgdir(struct hns_roce_db_pgdir *pgdir,
+ struct hns_roce_db *db, int order)
+{
+ unsigned long o;
+ unsigned long i;
+
+ for (o = order; o <= 1; ++o) {
+ i = find_first_bit(pgdir->bits[o], HNS_ROCE_DB_PER_PAGE >> o);
+ if (i < HNS_ROCE_DB_PER_PAGE >> o)
+ goto found;
+ }
+
+ return -ENOMEM;
+
+found:
+ clear_bit(i, pgdir->bits[o]);
+
+ i <<= o;
+
+ if (o > order)
+ set_bit(i ^ 1, pgdir->bits[order]);
+
+ db->u.pgdir = pgdir;
+ db->index = i;
+ db->db_record = pgdir->page + db->index;
+ db->dma = pgdir->db_dma + db->index * HNS_ROCE_DB_UNIT_SIZE;
+ db->order = order;
+
+ return 0;
+}
+
+int hns_roce_alloc_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db,
+ int order)
+{
+ struct hns_roce_db_pgdir *pgdir;
+ int ret = 0;
+
+ mutex_lock(&hr_dev->pgdir_mutex);
+
+ list_for_each_entry(pgdir, &hr_dev->pgdir_list, list)
+ if (!hns_roce_alloc_db_from_pgdir(pgdir, db, order))
+ goto out;
+
+ pgdir = hns_roce_alloc_db_pgdir(hr_dev->dev);
+ if (!pgdir) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ list_add(&pgdir->list, &hr_dev->pgdir_list);
+
+ /* This should never fail -- we just allocated an empty page: */
+ WARN_ON(hns_roce_alloc_db_from_pgdir(pgdir, db, order));
+
+out:
+ mutex_unlock(&hr_dev->pgdir_mutex);
+
+ return ret;
+}
+
+void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db)
+{
+ unsigned long o;
+ unsigned long i;
+
+ mutex_lock(&hr_dev->pgdir_mutex);
+
+ o = db->order;
+ i = db->index;
+
+ if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) {
+ clear_bit(i ^ 1, db->u.pgdir->order0);
+ ++o;
+ }
+
+ i >>= o;
+ set_bit(i, db->u.pgdir->bits[o]);
+
+ if (bitmap_full(db->u.pgdir->order1,
+ HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT)) {
+ dma_free_coherent(hr_dev->dev, PAGE_SIZE, db->u.pgdir->page,
+ db->u.pgdir->db_dma);
+ list_del(&db->u.pgdir->list);
+ kfree(db->u.pgdir);
+ }
+
+ mutex_unlock(&hr_dev->pgdir_mutex);
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
new file mode 100644
index 000000000..1112afa0a
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -0,0 +1,1239 @@
+/*
+ * Copyright (c) 2016 Hisilicon Limited.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _HNS_ROCE_DEVICE_H
+#define _HNS_ROCE_DEVICE_H
+
+#include <rdma/ib_verbs.h>
+#include <rdma/hns-abi.h>
+
+#define PCI_REVISION_ID_HIP08 0x21
+#define PCI_REVISION_ID_HIP09 0x30
+
+#define HNS_ROCE_MAX_MSG_LEN 0x80000000
+
+#define HNS_ROCE_IB_MIN_SQ_STRIDE 6
+
+#define BA_BYTE_LEN 8
+
+#define HNS_ROCE_MIN_CQE_NUM 0x40
+#define HNS_ROCE_MIN_SRQ_WQE_NUM 1
+
+#define HNS_ROCE_MAX_IRQ_NUM 128
+
+#define HNS_ROCE_SGE_IN_WQE 2
+#define HNS_ROCE_SGE_SHIFT 4
+
+#define EQ_ENABLE 1
+#define EQ_DISABLE 0
+
+#define HNS_ROCE_CEQ 0
+#define HNS_ROCE_AEQ 1
+
+#define HNS_ROCE_CEQE_SIZE 0x4
+#define HNS_ROCE_AEQE_SIZE 0x10
+
+#define HNS_ROCE_V3_EQE_SIZE 0x40
+
+#define HNS_ROCE_V2_CQE_SIZE 32
+#define HNS_ROCE_V3_CQE_SIZE 64
+
+#define HNS_ROCE_V2_QPC_SZ 256
+#define HNS_ROCE_V3_QPC_SZ 512
+
+#define HNS_ROCE_MAX_PORTS 6
+#define HNS_ROCE_GID_SIZE 16
+#define HNS_ROCE_SGE_SIZE 16
+#define HNS_ROCE_DWQE_SIZE 65536
+
+#define HNS_ROCE_HOP_NUM_0 0xff
+
+#define MR_TYPE_MR 0x00
+#define MR_TYPE_FRMR 0x01
+#define MR_TYPE_DMA 0x03
+
+#define HNS_ROCE_FRMR_MAX_PA 512
+
+#define PKEY_ID 0xffff
+#define NODE_DESC_SIZE 64
+#define DB_REG_OFFSET 0x1000
+
+/* Configure to HW for PAGE_SIZE larger than 4KB */
+#define PG_SHIFT_OFFSET (PAGE_SHIFT - 12)
+
+#define HNS_ROCE_IDX_QUE_ENTRY_SZ 4
+#define SRQ_DB_REG 0x230
+
+#define HNS_ROCE_QP_BANK_NUM 8
+#define HNS_ROCE_CQ_BANK_NUM 4
+
+#define CQ_BANKID_SHIFT 2
+#define CQ_BANKID_MASK GENMASK(1, 0)
+
+enum {
+ SERV_TYPE_RC,
+ SERV_TYPE_UC,
+ SERV_TYPE_RD,
+ SERV_TYPE_UD,
+ SERV_TYPE_XRC = 5,
+};
+
+enum hns_roce_event {
+ HNS_ROCE_EVENT_TYPE_PATH_MIG = 0x01,
+ HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED = 0x02,
+ HNS_ROCE_EVENT_TYPE_COMM_EST = 0x03,
+ HNS_ROCE_EVENT_TYPE_SQ_DRAINED = 0x04,
+ HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR = 0x05,
+ HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR = 0x06,
+ HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR = 0x07,
+ HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH = 0x08,
+ HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH = 0x09,
+ HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR = 0x0a,
+ HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR = 0x0b,
+ HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW = 0x0c,
+ HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID = 0x0d,
+ HNS_ROCE_EVENT_TYPE_PORT_CHANGE = 0x0f,
+ /* 0x10 and 0x11 is unused in currently application case */
+ HNS_ROCE_EVENT_TYPE_DB_OVERFLOW = 0x12,
+ HNS_ROCE_EVENT_TYPE_MB = 0x13,
+ HNS_ROCE_EVENT_TYPE_FLR = 0x15,
+ HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION = 0x16,
+ HNS_ROCE_EVENT_TYPE_INVALID_XRCETH = 0x17,
+};
+
+enum {
+ HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0),
+ HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1),
+ HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2),
+ HNS_ROCE_CAP_FLAG_CQ_RECORD_DB = BIT(3),
+ HNS_ROCE_CAP_FLAG_QP_RECORD_DB = BIT(4),
+ HNS_ROCE_CAP_FLAG_SRQ = BIT(5),
+ HNS_ROCE_CAP_FLAG_XRC = BIT(6),
+ HNS_ROCE_CAP_FLAG_MW = BIT(7),
+ HNS_ROCE_CAP_FLAG_FRMR = BIT(8),
+ HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9),
+ HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
+ HNS_ROCE_CAP_FLAG_DIRECT_WQE = BIT(12),
+ HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14),
+ HNS_ROCE_CAP_FLAG_STASH = BIT(17),
+};
+
+#define HNS_ROCE_DB_TYPE_COUNT 2
+#define HNS_ROCE_DB_UNIT_SIZE 4
+
+enum {
+ HNS_ROCE_DB_PER_PAGE = PAGE_SIZE / 4
+};
+
+enum hns_roce_reset_stage {
+ HNS_ROCE_STATE_NON_RST,
+ HNS_ROCE_STATE_RST_BEF_DOWN,
+ HNS_ROCE_STATE_RST_DOWN,
+ HNS_ROCE_STATE_RST_UNINIT,
+ HNS_ROCE_STATE_RST_INIT,
+ HNS_ROCE_STATE_RST_INITED,
+};
+
+enum hns_roce_instance_state {
+ HNS_ROCE_STATE_NON_INIT,
+ HNS_ROCE_STATE_INIT,
+ HNS_ROCE_STATE_INITED,
+ HNS_ROCE_STATE_UNINIT,
+};
+
+enum {
+ HNS_ROCE_RST_DIRECT_RETURN = 0,
+};
+
+#define HNS_ROCE_CMD_SUCCESS 1
+
+/* The minimum page size is 4K for hardware */
+#define HNS_HW_PAGE_SHIFT 12
+#define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT)
+
+struct hns_roce_uar {
+ u64 pfn;
+ unsigned long index;
+ unsigned long logic_idx;
+};
+
+enum hns_roce_mmap_type {
+ HNS_ROCE_MMAP_TYPE_DB = 1,
+ HNS_ROCE_MMAP_TYPE_DWQE,
+};
+
+struct hns_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ enum hns_roce_mmap_type mmap_type;
+ u64 address;
+};
+
+struct hns_roce_ucontext {
+ struct ib_ucontext ibucontext;
+ struct hns_roce_uar uar;
+ struct list_head page_list;
+ struct mutex page_mutex;
+ struct hns_user_mmap_entry *db_mmap_entry;
+ u32 config;
+};
+
+struct hns_roce_pd {
+ struct ib_pd ibpd;
+ unsigned long pdn;
+};
+
+struct hns_roce_xrcd {
+ struct ib_xrcd ibxrcd;
+ u32 xrcdn;
+};
+
+struct hns_roce_bitmap {
+ /* Bitmap Traversal last a bit which is 1 */
+ unsigned long last;
+ unsigned long top;
+ unsigned long max;
+ unsigned long reserved_top;
+ unsigned long mask;
+ spinlock_t lock;
+ unsigned long *table;
+};
+
+struct hns_roce_ida {
+ struct ida ida;
+ u32 min; /* Lowest ID to allocate. */
+ u32 max; /* Highest ID to allocate. */
+};
+
+/* For Hardware Entry Memory */
+struct hns_roce_hem_table {
+ /* HEM type: 0 = qpc, 1 = mtt, 2 = cqc, 3 = srq, 4 = other */
+ u32 type;
+ /* HEM array elment num */
+ unsigned long num_hem;
+ /* Single obj size */
+ unsigned long obj_size;
+ unsigned long table_chunk_size;
+ struct mutex mutex;
+ struct hns_roce_hem **hem;
+ u64 **bt_l1;
+ dma_addr_t *bt_l1_dma_addr;
+ u64 **bt_l0;
+ dma_addr_t *bt_l0_dma_addr;
+};
+
+struct hns_roce_buf_region {
+ u32 offset; /* page offset */
+ u32 count; /* page count */
+ int hopnum; /* addressing hop num */
+};
+
+#define HNS_ROCE_MAX_BT_REGION 3
+#define HNS_ROCE_MAX_BT_LEVEL 3
+struct hns_roce_hem_list {
+ struct list_head root_bt;
+ /* link all bt dma mem by hop config */
+ struct list_head mid_bt[HNS_ROCE_MAX_BT_REGION][HNS_ROCE_MAX_BT_LEVEL];
+ struct list_head btm_bt; /* link all bottom bt in @mid_bt */
+ dma_addr_t root_ba; /* pointer to the root ba table */
+};
+
+struct hns_roce_buf_attr {
+ struct {
+ size_t size; /* region size */
+ int hopnum; /* multi-hop addressing hop num */
+ } region[HNS_ROCE_MAX_BT_REGION];
+ unsigned int region_count; /* valid region count */
+ unsigned int page_shift; /* buffer page shift */
+ unsigned int user_access; /* umem access flag */
+ bool mtt_only; /* only alloc buffer-required MTT memory */
+};
+
+struct hns_roce_hem_cfg {
+ dma_addr_t root_ba; /* root BA table's address */
+ bool is_direct; /* addressing without BA table */
+ unsigned int ba_pg_shift; /* BA table page shift */
+ unsigned int buf_pg_shift; /* buffer page shift */
+ unsigned int buf_pg_count; /* buffer page count */
+ struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION];
+ unsigned int region_count;
+};
+
+/* memory translate region */
+struct hns_roce_mtr {
+ struct hns_roce_hem_list hem_list; /* multi-hop addressing resource */
+ struct ib_umem *umem; /* user space buffer */
+ struct hns_roce_buf *kmem; /* kernel space buffer */
+ struct hns_roce_hem_cfg hem_cfg; /* config for hardware addressing */
+};
+
+struct hns_roce_mw {
+ struct ib_mw ibmw;
+ u32 pdn;
+ u32 rkey;
+ int enabled; /* MW's active status */
+ u32 pbl_hop_num;
+ u32 pbl_ba_pg_sz;
+ u32 pbl_buf_pg_sz;
+};
+
+struct hns_roce_mr {
+ struct ib_mr ibmr;
+ u64 iova; /* MR's virtual original addr */
+ u64 size; /* Address range of MR */
+ u32 key; /* Key of MR */
+ u32 pd; /* PD num of MR */
+ u32 access; /* Access permission of MR */
+ int enabled; /* MR's active status */
+ int type; /* MR's register type */
+ u32 pbl_hop_num; /* multi-hop number */
+ struct hns_roce_mtr pbl_mtr;
+ u32 npages;
+ dma_addr_t *page_list;
+};
+
+struct hns_roce_mr_table {
+ struct hns_roce_ida mtpt_ida;
+ struct hns_roce_hem_table mtpt_table;
+};
+
+struct hns_roce_wq {
+ u64 *wrid; /* Work request ID */
+ spinlock_t lock;
+ u32 wqe_cnt; /* WQE num */
+ u32 max_gs;
+ u32 rsv_sge;
+ u32 offset;
+ u32 wqe_shift; /* WQE size */
+ u32 head;
+ u32 tail;
+ void __iomem *db_reg;
+ u32 ext_sge_cnt;
+};
+
+struct hns_roce_sge {
+ unsigned int sge_cnt; /* SGE num */
+ u32 offset;
+ u32 sge_shift; /* SGE size */
+};
+
+struct hns_roce_buf_list {
+ void *buf;
+ dma_addr_t map;
+};
+
+/*
+ * %HNS_ROCE_BUF_DIRECT indicates that the all memory must be in a continuous
+ * dma address range.
+ *
+ * %HNS_ROCE_BUF_NOSLEEP indicates that the caller cannot sleep.
+ *
+ * %HNS_ROCE_BUF_NOFAIL allocation only failed when allocated size is zero, even
+ * the allocated size is smaller than the required size.
+ */
+enum {
+ HNS_ROCE_BUF_DIRECT = BIT(0),
+ HNS_ROCE_BUF_NOSLEEP = BIT(1),
+ HNS_ROCE_BUF_NOFAIL = BIT(2),
+};
+
+struct hns_roce_buf {
+ struct hns_roce_buf_list *trunk_list;
+ u32 ntrunks;
+ u32 npages;
+ unsigned int trunk_shift;
+ unsigned int page_shift;
+};
+
+struct hns_roce_db_pgdir {
+ struct list_head list;
+ DECLARE_BITMAP(order0, HNS_ROCE_DB_PER_PAGE);
+ DECLARE_BITMAP(order1, HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT);
+ unsigned long *bits[HNS_ROCE_DB_TYPE_COUNT];
+ u32 *page;
+ dma_addr_t db_dma;
+};
+
+struct hns_roce_user_db_page {
+ struct list_head list;
+ struct ib_umem *umem;
+ unsigned long user_virt;
+ refcount_t refcount;
+};
+
+struct hns_roce_db {
+ u32 *db_record;
+ union {
+ struct hns_roce_db_pgdir *pgdir;
+ struct hns_roce_user_db_page *user_page;
+ } u;
+ dma_addr_t dma;
+ void *virt_addr;
+ unsigned long index;
+ unsigned long order;
+};
+
+struct hns_roce_cq {
+ struct ib_cq ib_cq;
+ struct hns_roce_mtr mtr;
+ struct hns_roce_db db;
+ u32 flags;
+ spinlock_t lock;
+ u32 cq_depth;
+ u32 cons_index;
+ u32 *set_ci_db;
+ void __iomem *db_reg;
+ int arm_sn;
+ int cqe_size;
+ unsigned long cqn;
+ u32 vector;
+ refcount_t refcount;
+ struct completion free;
+ struct list_head sq_list; /* all qps on this send cq */
+ struct list_head rq_list; /* all qps on this recv cq */
+ int is_armed; /* cq is armed */
+ struct list_head node; /* all armed cqs are on a list */
+};
+
+struct hns_roce_idx_que {
+ struct hns_roce_mtr mtr;
+ u32 entry_shift;
+ unsigned long *bitmap;
+ u32 head;
+ u32 tail;
+};
+
+struct hns_roce_srq {
+ struct ib_srq ibsrq;
+ unsigned long srqn;
+ u32 wqe_cnt;
+ int max_gs;
+ u32 rsv_sge;
+ u32 wqe_shift;
+ u32 cqn;
+ u32 xrcdn;
+ void __iomem *db_reg;
+
+ refcount_t refcount;
+ struct completion free;
+
+ struct hns_roce_mtr buf_mtr;
+
+ u64 *wrid;
+ struct hns_roce_idx_que idx_que;
+ spinlock_t lock;
+ struct mutex mutex;
+ void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event);
+};
+
+struct hns_roce_uar_table {
+ struct hns_roce_bitmap bitmap;
+};
+
+struct hns_roce_bank {
+ struct ida ida;
+ u32 inuse; /* Number of IDs allocated */
+ u32 min; /* Lowest ID to allocate. */
+ u32 max; /* Highest ID to allocate. */
+ u32 next; /* Next ID to allocate. */
+};
+
+struct hns_roce_idx_table {
+ u32 *spare_idx;
+ u32 head;
+ u32 tail;
+};
+
+struct hns_roce_qp_table {
+ struct hns_roce_hem_table qp_table;
+ struct hns_roce_hem_table irrl_table;
+ struct hns_roce_hem_table trrl_table;
+ struct hns_roce_hem_table sccc_table;
+ struct mutex scc_mutex;
+ struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM];
+ struct mutex bank_mutex;
+ struct hns_roce_idx_table idx_table;
+};
+
+struct hns_roce_cq_table {
+ struct xarray array;
+ struct hns_roce_hem_table table;
+ struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM];
+ struct mutex bank_mutex;
+};
+
+struct hns_roce_srq_table {
+ struct hns_roce_ida srq_ida;
+ struct xarray xa;
+ struct hns_roce_hem_table table;
+};
+
+struct hns_roce_av {
+ u8 port;
+ u8 gid_index;
+ u8 stat_rate;
+ u8 hop_limit;
+ u32 flowlabel;
+ u16 udp_sport;
+ u8 sl;
+ u8 tclass;
+ u8 dgid[HNS_ROCE_GID_SIZE];
+ u8 mac[ETH_ALEN];
+ u16 vlan_id;
+ u8 vlan_en;
+};
+
+struct hns_roce_ah {
+ struct ib_ah ibah;
+ struct hns_roce_av av;
+};
+
+struct hns_roce_cmd_context {
+ struct completion done;
+ int result;
+ int next;
+ u64 out_param;
+ u16 token;
+ u16 busy;
+};
+
+enum hns_roce_cmdq_state {
+ HNS_ROCE_CMDQ_STATE_NORMAL,
+ HNS_ROCE_CMDQ_STATE_FATAL_ERR,
+};
+
+struct hns_roce_cmdq {
+ struct dma_pool *pool;
+ struct semaphore poll_sem;
+ /*
+ * Event mode: cmd register mutex protection,
+ * ensure to not exceed max_cmds and user use limit region
+ */
+ struct semaphore event_sem;
+ int max_cmds;
+ spinlock_t context_lock;
+ int free_head;
+ struct hns_roce_cmd_context *context;
+ /*
+ * Process whether use event mode, init default non-zero
+ * After the event queue of cmd event ready,
+ * can switch into event mode
+ * close device, switch into poll mode(non event mode)
+ */
+ u8 use_events;
+ enum hns_roce_cmdq_state state;
+};
+
+struct hns_roce_cmd_mailbox {
+ void *buf;
+ dma_addr_t dma;
+};
+
+struct hns_roce_mbox_msg {
+ u64 in_param;
+ u64 out_param;
+ u8 cmd;
+ u32 tag;
+ u16 token;
+ u8 event_en;
+};
+
+struct hns_roce_dev;
+
+struct hns_roce_rinl_sge {
+ void *addr;
+ u32 len;
+};
+
+struct hns_roce_rinl_wqe {
+ struct hns_roce_rinl_sge *sg_list;
+ u32 sge_cnt;
+};
+
+struct hns_roce_rinl_buf {
+ struct hns_roce_rinl_wqe *wqe_list;
+ u32 wqe_cnt;
+};
+
+enum {
+ HNS_ROCE_FLUSH_FLAG = 0,
+};
+
+struct hns_roce_work {
+ struct hns_roce_dev *hr_dev;
+ struct work_struct work;
+ int event_type;
+ int sub_type;
+ u32 queue_num;
+};
+
+struct hns_roce_qp {
+ struct ib_qp ibqp;
+ struct hns_roce_wq rq;
+ struct hns_roce_db rdb;
+ struct hns_roce_db sdb;
+ unsigned long en_flags;
+ enum ib_sig_type sq_signal_bits;
+ struct hns_roce_wq sq;
+
+ struct hns_roce_mtr mtr;
+
+ u32 buff_size;
+ struct mutex mutex;
+ u8 port;
+ u8 phy_port;
+ u8 sl;
+ u8 resp_depth;
+ u8 state;
+ u32 atomic_rd_en;
+ u32 qkey;
+ void (*event)(struct hns_roce_qp *qp,
+ enum hns_roce_event event_type);
+ unsigned long qpn;
+
+ u32 xrcdn;
+
+ refcount_t refcount;
+ struct completion free;
+
+ struct hns_roce_sge sge;
+ u32 next_sge;
+ enum ib_mtu path_mtu;
+ u32 max_inline_data;
+ u8 free_mr_en;
+
+ /* 0: flush needed, 1: unneeded */
+ unsigned long flush_flag;
+ struct hns_roce_work flush_work;
+ struct hns_roce_rinl_buf rq_inl_buf;
+ struct list_head node; /* all qps are on a list */
+ struct list_head rq_node; /* all recv qps are on a list */
+ struct list_head sq_node; /* all send qps are on a list */
+ struct hns_user_mmap_entry *dwqe_mmap_entry;
+ u32 config;
+};
+
+struct hns_roce_ib_iboe {
+ spinlock_t lock;
+ struct net_device *netdevs[HNS_ROCE_MAX_PORTS];
+ struct notifier_block nb;
+ u8 phy_port[HNS_ROCE_MAX_PORTS];
+};
+
+struct hns_roce_ceqe {
+ __le32 comp;
+ __le32 rsv[15];
+};
+
+#define CEQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_ceqe, h, l)
+
+#define CEQE_CQN CEQE_FIELD_LOC(23, 0)
+#define CEQE_OWNER CEQE_FIELD_LOC(31, 31)
+
+struct hns_roce_aeqe {
+ __le32 asyn;
+ union {
+ struct {
+ __le32 num;
+ u32 rsv0;
+ u32 rsv1;
+ } queue_event;
+
+ struct {
+ __le64 out_param;
+ __le16 token;
+ u8 status;
+ u8 rsv0;
+ } __packed cmd;
+ } event;
+ __le32 rsv[12];
+};
+
+#define AEQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_aeqe, h, l)
+
+#define AEQE_EVENT_TYPE AEQE_FIELD_LOC(7, 0)
+#define AEQE_SUB_TYPE AEQE_FIELD_LOC(15, 8)
+#define AEQE_OWNER AEQE_FIELD_LOC(31, 31)
+#define AEQE_EVENT_QUEUE_NUM AEQE_FIELD_LOC(55, 32)
+
+struct hns_roce_eq {
+ struct hns_roce_dev *hr_dev;
+ void __iomem *db_reg;
+
+ int type_flag; /* Aeq:1 ceq:0 */
+ int eqn;
+ u32 entries;
+ int eqe_size;
+ int irq;
+ u32 cons_index;
+ int over_ignore;
+ int coalesce;
+ int arm_st;
+ int hop_num;
+ struct hns_roce_mtr mtr;
+ u16 eq_max_cnt;
+ u32 eq_period;
+ int shift;
+ int event_type;
+ int sub_type;
+};
+
+struct hns_roce_eq_table {
+ struct hns_roce_eq *eq;
+};
+
+enum cong_type {
+ CONG_TYPE_DCQCN,
+ CONG_TYPE_LDCP,
+ CONG_TYPE_HC3,
+ CONG_TYPE_DIP,
+};
+
+struct hns_roce_caps {
+ u64 fw_ver;
+ u8 num_ports;
+ int gid_table_len[HNS_ROCE_MAX_PORTS];
+ int pkey_table_len[HNS_ROCE_MAX_PORTS];
+ int local_ca_ack_delay;
+ int num_uars;
+ u32 phy_num_uars;
+ u32 max_sq_sg;
+ u32 max_sq_inline;
+ u32 max_rq_sg;
+ u32 rsv0;
+ u32 num_qps;
+ u32 num_pi_qps;
+ u32 reserved_qps;
+ u32 num_srqs;
+ u32 max_wqes;
+ u32 max_srq_wrs;
+ u32 max_srq_sges;
+ u32 max_sq_desc_sz;
+ u32 max_rq_desc_sz;
+ u32 rsv2;
+ int max_qp_init_rdma;
+ int max_qp_dest_rdma;
+ u32 num_cqs;
+ u32 max_cqes;
+ u32 min_cqes;
+ u32 min_wqes;
+ u32 reserved_cqs;
+ u32 reserved_srqs;
+ int num_aeq_vectors;
+ int num_comp_vectors;
+ int num_other_vectors;
+ u32 num_mtpts;
+ u32 rsv1;
+ u32 num_srqwqe_segs;
+ u32 num_idx_segs;
+ int reserved_mrws;
+ int reserved_uars;
+ int num_pds;
+ int reserved_pds;
+ u32 num_xrcds;
+ u32 reserved_xrcds;
+ u32 mtt_entry_sz;
+ u32 cqe_sz;
+ u32 page_size_cap;
+ u32 reserved_lkey;
+ int mtpt_entry_sz;
+ int qpc_sz;
+ int irrl_entry_sz;
+ int trrl_entry_sz;
+ int cqc_entry_sz;
+ int sccc_sz;
+ int qpc_timer_entry_sz;
+ int cqc_timer_entry_sz;
+ int srqc_entry_sz;
+ int idx_entry_sz;
+ u32 pbl_ba_pg_sz;
+ u32 pbl_buf_pg_sz;
+ u32 pbl_hop_num;
+ int aeqe_depth;
+ int ceqe_depth;
+ u32 aeqe_size;
+ u32 ceqe_size;
+ enum ib_mtu max_mtu;
+ u32 qpc_bt_num;
+ u32 qpc_timer_bt_num;
+ u32 srqc_bt_num;
+ u32 cqc_bt_num;
+ u32 cqc_timer_bt_num;
+ u32 mpt_bt_num;
+ u32 eqc_bt_num;
+ u32 smac_bt_num;
+ u32 sgid_bt_num;
+ u32 sccc_bt_num;
+ u32 gmv_bt_num;
+ u32 qpc_ba_pg_sz;
+ u32 qpc_buf_pg_sz;
+ u32 qpc_hop_num;
+ u32 srqc_ba_pg_sz;
+ u32 srqc_buf_pg_sz;
+ u32 srqc_hop_num;
+ u32 cqc_ba_pg_sz;
+ u32 cqc_buf_pg_sz;
+ u32 cqc_hop_num;
+ u32 mpt_ba_pg_sz;
+ u32 mpt_buf_pg_sz;
+ u32 mpt_hop_num;
+ u32 mtt_ba_pg_sz;
+ u32 mtt_buf_pg_sz;
+ u32 mtt_hop_num;
+ u32 wqe_sq_hop_num;
+ u32 wqe_sge_hop_num;
+ u32 wqe_rq_hop_num;
+ u32 sccc_ba_pg_sz;
+ u32 sccc_buf_pg_sz;
+ u32 sccc_hop_num;
+ u32 qpc_timer_ba_pg_sz;
+ u32 qpc_timer_buf_pg_sz;
+ u32 qpc_timer_hop_num;
+ u32 cqc_timer_ba_pg_sz;
+ u32 cqc_timer_buf_pg_sz;
+ u32 cqc_timer_hop_num;
+ u32 cqe_ba_pg_sz; /* page_size = 4K*(2^cqe_ba_pg_sz) */
+ u32 cqe_buf_pg_sz;
+ u32 cqe_hop_num;
+ u32 srqwqe_ba_pg_sz;
+ u32 srqwqe_buf_pg_sz;
+ u32 srqwqe_hop_num;
+ u32 idx_ba_pg_sz;
+ u32 idx_buf_pg_sz;
+ u32 idx_hop_num;
+ u32 eqe_ba_pg_sz;
+ u32 eqe_buf_pg_sz;
+ u32 eqe_hop_num;
+ u32 gmv_entry_num;
+ u32 gmv_entry_sz;
+ u32 gmv_ba_pg_sz;
+ u32 gmv_buf_pg_sz;
+ u32 gmv_hop_num;
+ u32 sl_num;
+ u32 llm_buf_pg_sz;
+ u32 chunk_sz; /* chunk size in non multihop mode */
+ u64 flags;
+ u16 default_ceq_max_cnt;
+ u16 default_ceq_period;
+ u16 default_aeq_max_cnt;
+ u16 default_aeq_period;
+ u16 default_aeq_arm_st;
+ u16 default_ceq_arm_st;
+ enum cong_type cong_type;
+};
+
+enum hns_roce_device_state {
+ HNS_ROCE_DEVICE_STATE_INITED,
+ HNS_ROCE_DEVICE_STATE_RST_DOWN,
+ HNS_ROCE_DEVICE_STATE_UNINIT,
+};
+
+struct hns_roce_hw {
+ int (*cmq_init)(struct hns_roce_dev *hr_dev);
+ void (*cmq_exit)(struct hns_roce_dev *hr_dev);
+ int (*hw_profile)(struct hns_roce_dev *hr_dev);
+ int (*hw_init)(struct hns_roce_dev *hr_dev);
+ void (*hw_exit)(struct hns_roce_dev *hr_dev);
+ int (*post_mbox)(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg);
+ int (*poll_mbox_done)(struct hns_roce_dev *hr_dev);
+ bool (*chk_mbox_avail)(struct hns_roce_dev *hr_dev, bool *is_busy);
+ int (*set_gid)(struct hns_roce_dev *hr_dev, int gid_index,
+ const union ib_gid *gid, const struct ib_gid_attr *attr);
+ int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port,
+ const u8 *addr);
+ int (*write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf,
+ struct hns_roce_mr *mr);
+ int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mr *mr, int flags,
+ void *mb_buf);
+ int (*frmr_write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf,
+ struct hns_roce_mr *mr);
+ int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw);
+ void (*write_cqc)(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
+ dma_addr_t dma_handle);
+ int (*set_hem)(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, int obj, u32 step_idx);
+ int (*clear_hem)(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, int obj,
+ u32 step_idx);
+ int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
+ int attr_mask, enum ib_qp_state cur_state,
+ enum ib_qp_state new_state);
+ int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp);
+ void (*dereg_mr)(struct hns_roce_dev *hr_dev);
+ int (*init_eq)(struct hns_roce_dev *hr_dev);
+ void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
+ int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf);
+ int (*query_cqc)(struct hns_roce_dev *hr_dev, u32 cqn, void *buffer);
+ int (*query_qpc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer);
+ int (*query_mpt)(struct hns_roce_dev *hr_dev, u32 key, void *buffer);
+ const struct ib_device_ops *hns_roce_dev_ops;
+ const struct ib_device_ops *hns_roce_dev_srq_ops;
+};
+
+struct hns_roce_dev {
+ struct ib_device ib_dev;
+ struct pci_dev *pci_dev;
+ struct device *dev;
+ struct hns_roce_uar priv_uar;
+ const char *irq_names[HNS_ROCE_MAX_IRQ_NUM];
+ spinlock_t sm_lock;
+ bool active;
+ bool is_reset;
+ bool dis_db;
+ unsigned long reset_cnt;
+ struct hns_roce_ib_iboe iboe;
+ enum hns_roce_device_state state;
+ struct list_head qp_list; /* list of all qps on this dev */
+ spinlock_t qp_list_lock; /* protect qp_list */
+ struct list_head dip_list; /* list of all dest ips on this dev */
+ spinlock_t dip_list_lock; /* protect dip_list */
+
+ struct list_head pgdir_list;
+ struct mutex pgdir_mutex;
+ int irq[HNS_ROCE_MAX_IRQ_NUM];
+ u8 __iomem *reg_base;
+ void __iomem *mem_base;
+ struct hns_roce_caps caps;
+ struct xarray qp_table_xa;
+
+ unsigned char dev_addr[HNS_ROCE_MAX_PORTS][ETH_ALEN];
+ u64 sys_image_guid;
+ u32 vendor_id;
+ u32 vendor_part_id;
+ u32 hw_rev;
+ void __iomem *priv_addr;
+
+ struct hns_roce_cmdq cmd;
+ struct hns_roce_ida pd_ida;
+ struct hns_roce_ida xrcd_ida;
+ struct hns_roce_ida uar_ida;
+ struct hns_roce_mr_table mr_table;
+ struct hns_roce_cq_table cq_table;
+ struct hns_roce_srq_table srq_table;
+ struct hns_roce_qp_table qp_table;
+ struct hns_roce_eq_table eq_table;
+ struct hns_roce_hem_table qpc_timer_table;
+ struct hns_roce_hem_table cqc_timer_table;
+ /* GMV is the memory area that the driver allocates for the hardware
+ * to store SGID, SMAC and VLAN information.
+ */
+ struct hns_roce_hem_table gmv_table;
+
+ int cmd_mod;
+ int loop_idc;
+ u32 sdb_offset;
+ u32 odb_offset;
+ const struct hns_roce_hw *hw;
+ void *priv;
+ struct workqueue_struct *irq_workq;
+ struct work_struct ecc_work;
+ u32 func_num;
+ u32 is_vf;
+ u32 cong_algo_tmpl_id;
+ u64 dwqe_page;
+};
+
+static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
+{
+ return container_of(ib_dev, struct hns_roce_dev, ib_dev);
+}
+
+static inline struct hns_roce_ucontext
+ *to_hr_ucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct hns_roce_ucontext, ibucontext);
+}
+
+static inline struct hns_roce_pd *to_hr_pd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct hns_roce_pd, ibpd);
+}
+
+static inline struct hns_roce_xrcd *to_hr_xrcd(struct ib_xrcd *ibxrcd)
+{
+ return container_of(ibxrcd, struct hns_roce_xrcd, ibxrcd);
+}
+
+static inline struct hns_roce_ah *to_hr_ah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct hns_roce_ah, ibah);
+}
+
+static inline struct hns_roce_mr *to_hr_mr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct hns_roce_mr, ibmr);
+}
+
+static inline struct hns_roce_mw *to_hr_mw(struct ib_mw *ibmw)
+{
+ return container_of(ibmw, struct hns_roce_mw, ibmw);
+}
+
+static inline struct hns_roce_qp *to_hr_qp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct hns_roce_qp, ibqp);
+}
+
+static inline struct hns_roce_cq *to_hr_cq(struct ib_cq *ib_cq)
+{
+ return container_of(ib_cq, struct hns_roce_cq, ib_cq);
+}
+
+static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct hns_roce_srq, ibsrq);
+}
+
+static inline struct hns_user_mmap_entry *
+to_hns_mmap(struct rdma_user_mmap_entry *rdma_entry)
+{
+ return container_of(rdma_entry, struct hns_user_mmap_entry, rdma_entry);
+}
+
+static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest)
+{
+ writeq(*(u64 *)val, dest);
+}
+
+static inline struct hns_roce_qp
+ *__hns_roce_qp_lookup(struct hns_roce_dev *hr_dev, u32 qpn)
+{
+ return xa_load(&hr_dev->qp_table_xa, qpn);
+}
+
+static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf,
+ unsigned int offset)
+{
+ return (char *)(buf->trunk_list[offset >> buf->trunk_shift].buf) +
+ (offset & ((1 << buf->trunk_shift) - 1));
+}
+
+static inline dma_addr_t hns_roce_buf_dma_addr(struct hns_roce_buf *buf,
+ unsigned int offset)
+{
+ return buf->trunk_list[offset >> buf->trunk_shift].map +
+ (offset & ((1 << buf->trunk_shift) - 1));
+}
+
+static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx)
+{
+ return hns_roce_buf_dma_addr(buf, idx << buf->page_shift);
+}
+
+#define hr_hw_page_align(x) ALIGN(x, 1 << HNS_HW_PAGE_SHIFT)
+
+static inline u64 to_hr_hw_page_addr(u64 addr)
+{
+ return addr >> HNS_HW_PAGE_SHIFT;
+}
+
+static inline u32 to_hr_hw_page_shift(u32 page_shift)
+{
+ return page_shift - HNS_HW_PAGE_SHIFT;
+}
+
+static inline u32 to_hr_hem_hopnum(u32 hopnum, u32 count)
+{
+ if (count > 0)
+ return hopnum == HNS_ROCE_HOP_NUM_0 ? 0 : hopnum;
+
+ return 0;
+}
+
+static inline u32 to_hr_hem_entries_size(u32 count, u32 buf_shift)
+{
+ return hr_hw_page_align(count << buf_shift);
+}
+
+static inline u32 to_hr_hem_entries_count(u32 count, u32 buf_shift)
+{
+ return hr_hw_page_align(count << buf_shift) >> buf_shift;
+}
+
+static inline u32 to_hr_hem_entries_shift(u32 count, u32 buf_shift)
+{
+ if (!count)
+ return 0;
+
+ return ilog2(to_hr_hem_entries_count(count, buf_shift));
+}
+
+#define DSCP_SHIFT 2
+
+static inline u8 get_tclass(const struct ib_global_route *grh)
+{
+ return grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP ?
+ grh->traffic_class >> DSCP_SHIFT : grh->traffic_class;
+}
+
+void hns_roce_init_uar_table(struct hns_roce_dev *dev);
+int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar);
+
+int hns_roce_cmd_init(struct hns_roce_dev *hr_dev);
+void hns_roce_cmd_cleanup(struct hns_roce_dev *hr_dev);
+void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
+ u64 out_param);
+int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev);
+void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev);
+
+/* hns roce hw need current block and next block addr from mtt */
+#define MTT_MIN_COUNT 2
+int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ u32 offset, u64 *mtt_buf, int mtt_max, u64 *base_addr);
+int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr,
+ unsigned int page_shift, struct ib_udata *udata,
+ unsigned long user_addr);
+void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr);
+int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ dma_addr_t *pages, unsigned int page_cnt);
+
+void hns_roce_init_pd_table(struct hns_roce_dev *hr_dev);
+void hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
+void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
+int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev);
+void hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
+void hns_roce_init_xrcd_table(struct hns_roce_dev *hr_dev);
+
+void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev);
+void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev);
+void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev);
+
+void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev);
+
+int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
+static inline int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+ return 0;
+}
+
+int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+
+struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc);
+struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata);
+struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start,
+ u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata);
+struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg);
+int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
+int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
+unsigned long key_to_hw_index(u32 key);
+
+int hns_roce_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
+int hns_roce_dealloc_mw(struct ib_mw *ibmw);
+
+void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf);
+struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
+ u32 page_shift, u32 flags);
+
+int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
+ int buf_cnt, struct hns_roce_buf *buf,
+ unsigned int page_shift);
+int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
+ int buf_cnt, struct ib_umem *umem,
+ unsigned int page_shift);
+
+int hns_roce_create_srq(struct ib_srq *srq,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata);
+int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata);
+int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
+
+int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata);
+int hns_roce_dealloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata);
+
+int hns_roce_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
+void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n);
+void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n);
+void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n);
+bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq,
+ struct ib_cq *ib_cq);
+void hns_roce_lock_cqs(struct hns_roce_cq *send_cq,
+ struct hns_roce_cq *recv_cq);
+void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
+ struct hns_roce_cq *recv_cq);
+void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
+void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_udata *udata);
+__be32 send_ieth(const struct ib_send_wr *wr);
+int to_hr_qp_type(int qp_type);
+
+int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
+
+int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
+int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
+ struct hns_roce_db *db);
+void hns_roce_db_unmap_user(struct hns_roce_ucontext *context,
+ struct hns_roce_db *db);
+int hns_roce_alloc_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db,
+ int order);
+void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db);
+
+void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
+void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
+void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp);
+void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
+void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
+u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u32 port, int gid_index);
+void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
+int hns_roce_init(struct hns_roce_dev *hr_dev);
+void hns_roce_exit(struct hns_roce_dev *hr_dev);
+int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq);
+int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq);
+int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp);
+int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp);
+int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr);
+int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr);
+struct hns_user_mmap_entry *
+hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
+ size_t length,
+ enum hns_roce_mmap_type mmap_type);
+#endif /* _HNS_ROCE_DEVICE_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
new file mode 100644
index 000000000..f30274986
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -0,0 +1,1486 @@
+/*
+ * Copyright (c) 2016 Hisilicon Limited.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "hns_roce_device.h"
+#include "hns_roce_hem.h"
+#include "hns_roce_common.h"
+
+#define HEM_INDEX_BUF BIT(0)
+#define HEM_INDEX_L0 BIT(1)
+#define HEM_INDEX_L1 BIT(2)
+struct hns_roce_hem_index {
+ u64 buf;
+ u64 l0;
+ u64 l1;
+ u32 inited; /* indicate which index is available */
+};
+
+bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
+{
+ int hop_num = 0;
+
+ switch (type) {
+ case HEM_TYPE_QPC:
+ hop_num = hr_dev->caps.qpc_hop_num;
+ break;
+ case HEM_TYPE_MTPT:
+ hop_num = hr_dev->caps.mpt_hop_num;
+ break;
+ case HEM_TYPE_CQC:
+ hop_num = hr_dev->caps.cqc_hop_num;
+ break;
+ case HEM_TYPE_SRQC:
+ hop_num = hr_dev->caps.srqc_hop_num;
+ break;
+ case HEM_TYPE_SCCC:
+ hop_num = hr_dev->caps.sccc_hop_num;
+ break;
+ case HEM_TYPE_QPC_TIMER:
+ hop_num = hr_dev->caps.qpc_timer_hop_num;
+ break;
+ case HEM_TYPE_CQC_TIMER:
+ hop_num = hr_dev->caps.cqc_timer_hop_num;
+ break;
+ case HEM_TYPE_GMV:
+ hop_num = hr_dev->caps.gmv_hop_num;
+ break;
+ default:
+ return false;
+ }
+
+ return hop_num ? true : false;
+}
+
+static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 hem_idx,
+ u32 bt_chunk_num, u64 hem_max_num)
+{
+ u64 start_idx = round_down(hem_idx, bt_chunk_num);
+ u64 check_max_num = start_idx + bt_chunk_num;
+ u64 i;
+
+ for (i = start_idx; (i < check_max_num) && (i < hem_max_num); i++)
+ if (i != hem_idx && hem[i])
+ return false;
+
+ return true;
+}
+
+static bool hns_roce_check_bt_null(u64 **bt, u64 ba_idx, u32 bt_chunk_num)
+{
+ u64 start_idx = round_down(ba_idx, bt_chunk_num);
+ int i;
+
+ for (i = 0; i < bt_chunk_num; i++)
+ if (i != ba_idx && bt[start_idx + i])
+ return false;
+
+ return true;
+}
+
+static int hns_roce_get_bt_num(u32 table_type, u32 hop_num)
+{
+ if (check_whether_bt_num_3(table_type, hop_num))
+ return 3;
+ else if (check_whether_bt_num_2(table_type, hop_num))
+ return 2;
+ else if (check_whether_bt_num_1(table_type, hop_num))
+ return 1;
+ else
+ return 0;
+}
+
+static int get_hem_table_config(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_mhop *mhop,
+ u32 type)
+{
+ struct device *dev = hr_dev->dev;
+
+ switch (type) {
+ case HEM_TYPE_QPC:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.qpc_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.qpc_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = hr_dev->caps.qpc_bt_num;
+ mhop->hop_num = hr_dev->caps.qpc_hop_num;
+ break;
+ case HEM_TYPE_MTPT:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.mpt_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.mpt_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = hr_dev->caps.mpt_bt_num;
+ mhop->hop_num = hr_dev->caps.mpt_hop_num;
+ break;
+ case HEM_TYPE_CQC:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.cqc_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.cqc_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = hr_dev->caps.cqc_bt_num;
+ mhop->hop_num = hr_dev->caps.cqc_hop_num;
+ break;
+ case HEM_TYPE_SCCC:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.sccc_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.sccc_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = hr_dev->caps.sccc_bt_num;
+ mhop->hop_num = hr_dev->caps.sccc_hop_num;
+ break;
+ case HEM_TYPE_QPC_TIMER:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.qpc_timer_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.qpc_timer_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = hr_dev->caps.qpc_timer_bt_num;
+ mhop->hop_num = hr_dev->caps.qpc_timer_hop_num;
+ break;
+ case HEM_TYPE_CQC_TIMER:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.cqc_timer_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.cqc_timer_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = hr_dev->caps.cqc_timer_bt_num;
+ mhop->hop_num = hr_dev->caps.cqc_timer_hop_num;
+ break;
+ case HEM_TYPE_SRQC:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.srqc_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.srqc_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = hr_dev->caps.srqc_bt_num;
+ mhop->hop_num = hr_dev->caps.srqc_hop_num;
+ break;
+ case HEM_TYPE_GMV:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.gmv_buf_pg_sz +
+ PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.gmv_ba_pg_sz +
+ PAGE_SHIFT);
+ mhop->ba_l0_num = hr_dev->caps.gmv_bt_num;
+ mhop->hop_num = hr_dev->caps.gmv_hop_num;
+ break;
+ default:
+ dev_err(dev, "table %u not support multi-hop addressing!\n",
+ type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long *obj,
+ struct hns_roce_hem_mhop *mhop)
+{
+ struct device *dev = hr_dev->dev;
+ u32 chunk_ba_num;
+ u32 chunk_size;
+ u32 table_idx;
+ u32 bt_num;
+
+ if (get_hem_table_config(hr_dev, mhop, table->type))
+ return -EINVAL;
+
+ if (!obj)
+ return 0;
+
+ /*
+ * QPC/MTPT/CQC/SRQC/SCCC alloc hem for buffer pages.
+ * MTT/CQE alloc hem for bt pages.
+ */
+ bt_num = hns_roce_get_bt_num(table->type, mhop->hop_num);
+ chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
+ chunk_size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size :
+ mhop->bt_chunk_size;
+ table_idx = *obj / (chunk_size / table->obj_size);
+ switch (bt_num) {
+ case 3:
+ mhop->l2_idx = table_idx & (chunk_ba_num - 1);
+ mhop->l1_idx = table_idx / chunk_ba_num & (chunk_ba_num - 1);
+ mhop->l0_idx = (table_idx / chunk_ba_num) / chunk_ba_num;
+ break;
+ case 2:
+ mhop->l1_idx = table_idx & (chunk_ba_num - 1);
+ mhop->l0_idx = table_idx / chunk_ba_num;
+ break;
+ case 1:
+ mhop->l0_idx = table_idx;
+ break;
+ default:
+ dev_err(dev, "table %u not support hop_num = %u!\n",
+ table->type, mhop->hop_num);
+ return -EINVAL;
+ }
+ if (mhop->l0_idx >= mhop->ba_l0_num)
+ mhop->l0_idx %= mhop->ba_l0_num;
+
+ return 0;
+}
+
+static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev,
+ int npages,
+ unsigned long hem_alloc_size,
+ gfp_t gfp_mask)
+{
+ struct hns_roce_hem_chunk *chunk = NULL;
+ struct hns_roce_hem *hem;
+ struct scatterlist *mem;
+ int order;
+ void *buf;
+
+ WARN_ON(gfp_mask & __GFP_HIGHMEM);
+
+ hem = kmalloc(sizeof(*hem),
+ gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
+ if (!hem)
+ return NULL;
+
+ INIT_LIST_HEAD(&hem->chunk_list);
+
+ order = get_order(hem_alloc_size);
+
+ while (npages > 0) {
+ if (!chunk) {
+ chunk = kmalloc(sizeof(*chunk),
+ gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
+ if (!chunk)
+ goto fail;
+
+ sg_init_table(chunk->mem, HNS_ROCE_HEM_CHUNK_LEN);
+ chunk->npages = 0;
+ chunk->nsg = 0;
+ memset(chunk->buf, 0, sizeof(chunk->buf));
+ list_add_tail(&chunk->list, &hem->chunk_list);
+ }
+
+ while (1 << order > npages)
+ --order;
+
+ /*
+ * Alloc memory one time. If failed, don't alloc small block
+ * memory, directly return fail.
+ */
+ mem = &chunk->mem[chunk->npages];
+ buf = dma_alloc_coherent(hr_dev->dev, PAGE_SIZE << order,
+ &sg_dma_address(mem), gfp_mask);
+ if (!buf)
+ goto fail;
+
+ chunk->buf[chunk->npages] = buf;
+ sg_dma_len(mem) = PAGE_SIZE << order;
+
+ ++chunk->npages;
+ ++chunk->nsg;
+ npages -= 1 << order;
+ }
+
+ return hem;
+
+fail:
+ hns_roce_free_hem(hr_dev, hem);
+ return NULL;
+}
+
+void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem)
+{
+ struct hns_roce_hem_chunk *chunk, *tmp;
+ int i;
+
+ if (!hem)
+ return;
+
+ list_for_each_entry_safe(chunk, tmp, &hem->chunk_list, list) {
+ for (i = 0; i < chunk->npages; ++i)
+ dma_free_coherent(hr_dev->dev,
+ sg_dma_len(&chunk->mem[i]),
+ chunk->buf[i],
+ sg_dma_address(&chunk->mem[i]));
+ kfree(chunk);
+ }
+
+ kfree(hem);
+}
+
+static int calc_hem_config(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long obj,
+ struct hns_roce_hem_mhop *mhop,
+ struct hns_roce_hem_index *index)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ unsigned long mhop_obj = obj;
+ u32 l0_idx, l1_idx, l2_idx;
+ u32 chunk_ba_num;
+ u32 bt_num;
+ int ret;
+
+ ret = hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, mhop);
+ if (ret)
+ return ret;
+
+ l0_idx = mhop->l0_idx;
+ l1_idx = mhop->l1_idx;
+ l2_idx = mhop->l2_idx;
+ chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
+ bt_num = hns_roce_get_bt_num(table->type, mhop->hop_num);
+ switch (bt_num) {
+ case 3:
+ index->l1 = l0_idx * chunk_ba_num + l1_idx;
+ index->l0 = l0_idx;
+ index->buf = l0_idx * chunk_ba_num * chunk_ba_num +
+ l1_idx * chunk_ba_num + l2_idx;
+ break;
+ case 2:
+ index->l0 = l0_idx;
+ index->buf = l0_idx * chunk_ba_num + l1_idx;
+ break;
+ case 1:
+ index->buf = l0_idx;
+ break;
+ default:
+ ibdev_err(ibdev, "table %u not support mhop.hop_num = %u!\n",
+ table->type, mhop->hop_num);
+ return -EINVAL;
+ }
+
+ if (unlikely(index->buf >= table->num_hem)) {
+ ibdev_err(ibdev, "table %u exceed hem limt idx %llu, max %lu!\n",
+ table->type, index->buf, table->num_hem);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void free_mhop_hem(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table,
+ struct hns_roce_hem_mhop *mhop,
+ struct hns_roce_hem_index *index)
+{
+ u32 bt_size = mhop->bt_chunk_size;
+ struct device *dev = hr_dev->dev;
+
+ if (index->inited & HEM_INDEX_BUF) {
+ hns_roce_free_hem(hr_dev, table->hem[index->buf]);
+ table->hem[index->buf] = NULL;
+ }
+
+ if (index->inited & HEM_INDEX_L1) {
+ dma_free_coherent(dev, bt_size, table->bt_l1[index->l1],
+ table->bt_l1_dma_addr[index->l1]);
+ table->bt_l1[index->l1] = NULL;
+ }
+
+ if (index->inited & HEM_INDEX_L0) {
+ dma_free_coherent(dev, bt_size, table->bt_l0[index->l0],
+ table->bt_l0_dma_addr[index->l0]);
+ table->bt_l0[index->l0] = NULL;
+ }
+}
+
+static int alloc_mhop_hem(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table,
+ struct hns_roce_hem_mhop *mhop,
+ struct hns_roce_hem_index *index)
+{
+ u32 bt_size = mhop->bt_chunk_size;
+ struct device *dev = hr_dev->dev;
+ struct hns_roce_hem_iter iter;
+ gfp_t flag;
+ u64 bt_ba;
+ u32 size;
+ int ret;
+
+ /* alloc L1 BA's chunk */
+ if ((check_whether_bt_num_3(table->type, mhop->hop_num) ||
+ check_whether_bt_num_2(table->type, mhop->hop_num)) &&
+ !table->bt_l0[index->l0]) {
+ table->bt_l0[index->l0] = dma_alloc_coherent(dev, bt_size,
+ &table->bt_l0_dma_addr[index->l0],
+ GFP_KERNEL);
+ if (!table->bt_l0[index->l0]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ index->inited |= HEM_INDEX_L0;
+ }
+
+ /* alloc L2 BA's chunk */
+ if (check_whether_bt_num_3(table->type, mhop->hop_num) &&
+ !table->bt_l1[index->l1]) {
+ table->bt_l1[index->l1] = dma_alloc_coherent(dev, bt_size,
+ &table->bt_l1_dma_addr[index->l1],
+ GFP_KERNEL);
+ if (!table->bt_l1[index->l1]) {
+ ret = -ENOMEM;
+ goto err_alloc_hem;
+ }
+ index->inited |= HEM_INDEX_L1;
+ *(table->bt_l0[index->l0] + mhop->l1_idx) =
+ table->bt_l1_dma_addr[index->l1];
+ }
+
+ /*
+ * alloc buffer space chunk for QPC/MTPT/CQC/SRQC/SCCC.
+ * alloc bt space chunk for MTT/CQE.
+ */
+ size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size : bt_size;
+ flag = GFP_KERNEL | __GFP_NOWARN;
+ table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size >> PAGE_SHIFT,
+ size, flag);
+ if (!table->hem[index->buf]) {
+ ret = -ENOMEM;
+ goto err_alloc_hem;
+ }
+
+ index->inited |= HEM_INDEX_BUF;
+ hns_roce_hem_first(table->hem[index->buf], &iter);
+ bt_ba = hns_roce_hem_addr(&iter);
+ if (table->type < HEM_TYPE_MTT) {
+ if (mhop->hop_num == 2)
+ *(table->bt_l1[index->l1] + mhop->l2_idx) = bt_ba;
+ else if (mhop->hop_num == 1)
+ *(table->bt_l0[index->l0] + mhop->l1_idx) = bt_ba;
+ } else if (mhop->hop_num == 2) {
+ *(table->bt_l0[index->l0] + mhop->l1_idx) = bt_ba;
+ }
+
+ return 0;
+err_alloc_hem:
+ free_mhop_hem(hr_dev, table, mhop, index);
+out:
+ return ret;
+}
+
+static int set_mhop_hem(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long obj,
+ struct hns_roce_hem_mhop *mhop,
+ struct hns_roce_hem_index *index)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u32 step_idx;
+ int ret = 0;
+
+ if (index->inited & HEM_INDEX_L0) {
+ ret = hr_dev->hw->set_hem(hr_dev, table, obj, 0);
+ if (ret) {
+ ibdev_err(ibdev, "set HEM step 0 failed!\n");
+ goto out;
+ }
+ }
+
+ if (index->inited & HEM_INDEX_L1) {
+ ret = hr_dev->hw->set_hem(hr_dev, table, obj, 1);
+ if (ret) {
+ ibdev_err(ibdev, "set HEM step 1 failed!\n");
+ goto out;
+ }
+ }
+
+ if (index->inited & HEM_INDEX_BUF) {
+ if (mhop->hop_num == HNS_ROCE_HOP_NUM_0)
+ step_idx = 0;
+ else
+ step_idx = mhop->hop_num;
+ ret = hr_dev->hw->set_hem(hr_dev, table, obj, step_idx);
+ if (ret)
+ ibdev_err(ibdev, "set HEM step last failed!\n");
+ }
+out:
+ return ret;
+}
+
+static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table,
+ unsigned long obj)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_hem_index index = {};
+ struct hns_roce_hem_mhop mhop = {};
+ int ret;
+
+ ret = calc_hem_config(hr_dev, table, obj, &mhop, &index);
+ if (ret) {
+ ibdev_err(ibdev, "calc hem config failed!\n");
+ return ret;
+ }
+
+ mutex_lock(&table->mutex);
+ if (table->hem[index.buf]) {
+ refcount_inc(&table->hem[index.buf]->refcount);
+ goto out;
+ }
+
+ ret = alloc_mhop_hem(hr_dev, table, &mhop, &index);
+ if (ret) {
+ ibdev_err(ibdev, "alloc mhop hem failed!\n");
+ goto out;
+ }
+
+ /* set HEM base address to hardware */
+ if (table->type < HEM_TYPE_MTT) {
+ ret = set_mhop_hem(hr_dev, table, obj, &mhop, &index);
+ if (ret) {
+ ibdev_err(ibdev, "set HEM address to HW failed!\n");
+ goto err_alloc;
+ }
+ }
+
+ refcount_set(&table->hem[index.buf]->refcount, 1);
+ goto out;
+
+err_alloc:
+ free_mhop_hem(hr_dev, table, &mhop, &index);
+out:
+ mutex_unlock(&table->mutex);
+ return ret;
+}
+
+int hns_roce_table_get(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long obj)
+{
+ struct device *dev = hr_dev->dev;
+ unsigned long i;
+ int ret = 0;
+
+ if (hns_roce_check_whether_mhop(hr_dev, table->type))
+ return hns_roce_table_mhop_get(hr_dev, table, obj);
+
+ i = obj / (table->table_chunk_size / table->obj_size);
+
+ mutex_lock(&table->mutex);
+
+ if (table->hem[i]) {
+ refcount_inc(&table->hem[i]->refcount);
+ goto out;
+ }
+
+ table->hem[i] = hns_roce_alloc_hem(hr_dev,
+ table->table_chunk_size >> PAGE_SHIFT,
+ table->table_chunk_size,
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!table->hem[i]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* Set HEM base address(128K/page, pa) to Hardware */
+ ret = hr_dev->hw->set_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT);
+ if (ret) {
+ hns_roce_free_hem(hr_dev, table->hem[i]);
+ table->hem[i] = NULL;
+ dev_err(dev, "set HEM base address to HW failed, ret = %d.\n",
+ ret);
+ goto out;
+ }
+
+ refcount_set(&table->hem[i]->refcount, 1);
+out:
+ mutex_unlock(&table->mutex);
+ return ret;
+}
+
+static void clear_mhop_hem(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long obj,
+ struct hns_roce_hem_mhop *mhop,
+ struct hns_roce_hem_index *index)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u32 hop_num = mhop->hop_num;
+ u32 chunk_ba_num;
+ u32 step_idx;
+
+ index->inited = HEM_INDEX_BUF;
+ chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
+ if (check_whether_bt_num_2(table->type, hop_num)) {
+ if (hns_roce_check_hem_null(table->hem, index->buf,
+ chunk_ba_num, table->num_hem))
+ index->inited |= HEM_INDEX_L0;
+ } else if (check_whether_bt_num_3(table->type, hop_num)) {
+ if (hns_roce_check_hem_null(table->hem, index->buf,
+ chunk_ba_num, table->num_hem)) {
+ index->inited |= HEM_INDEX_L1;
+ if (hns_roce_check_bt_null(table->bt_l1, index->l1,
+ chunk_ba_num))
+ index->inited |= HEM_INDEX_L0;
+ }
+ }
+
+ if (table->type < HEM_TYPE_MTT) {
+ if (hop_num == HNS_ROCE_HOP_NUM_0)
+ step_idx = 0;
+ else
+ step_idx = hop_num;
+
+ if (hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx))
+ ibdev_warn(ibdev, "failed to clear hop%u HEM.\n", hop_num);
+
+ if (index->inited & HEM_INDEX_L1)
+ if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1))
+ ibdev_warn(ibdev, "failed to clear HEM step 1.\n");
+
+ if (index->inited & HEM_INDEX_L0)
+ if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0))
+ ibdev_warn(ibdev, "failed to clear HEM step 0.\n");
+ }
+}
+
+static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table,
+ unsigned long obj,
+ int check_refcount)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_hem_index index = {};
+ struct hns_roce_hem_mhop mhop = {};
+ int ret;
+
+ ret = calc_hem_config(hr_dev, table, obj, &mhop, &index);
+ if (ret) {
+ ibdev_err(ibdev, "calc hem config failed!\n");
+ return;
+ }
+
+ if (!check_refcount)
+ mutex_lock(&table->mutex);
+ else if (!refcount_dec_and_mutex_lock(&table->hem[index.buf]->refcount,
+ &table->mutex))
+ return;
+
+ clear_mhop_hem(hr_dev, table, obj, &mhop, &index);
+ free_mhop_hem(hr_dev, table, &mhop, &index);
+
+ mutex_unlock(&table->mutex);
+}
+
+void hns_roce_table_put(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long obj)
+{
+ struct device *dev = hr_dev->dev;
+ unsigned long i;
+
+ if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
+ hns_roce_table_mhop_put(hr_dev, table, obj, 1);
+ return;
+ }
+
+ i = obj / (table->table_chunk_size / table->obj_size);
+
+ if (!refcount_dec_and_mutex_lock(&table->hem[i]->refcount,
+ &table->mutex))
+ return;
+
+ if (hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT))
+ dev_warn(dev, "failed to clear HEM base address.\n");
+
+ hns_roce_free_hem(hr_dev, table->hem[i]);
+ table->hem[i] = NULL;
+
+ mutex_unlock(&table->mutex);
+}
+
+void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table,
+ unsigned long obj, dma_addr_t *dma_handle)
+{
+ struct hns_roce_hem_chunk *chunk;
+ struct hns_roce_hem_mhop mhop;
+ struct hns_roce_hem *hem;
+ unsigned long mhop_obj = obj;
+ unsigned long obj_per_chunk;
+ unsigned long idx_offset;
+ int offset, dma_offset;
+ void *addr = NULL;
+ u32 hem_idx = 0;
+ int length;
+ int i, j;
+
+ mutex_lock(&table->mutex);
+
+ if (!hns_roce_check_whether_mhop(hr_dev, table->type)) {
+ obj_per_chunk = table->table_chunk_size / table->obj_size;
+ hem = table->hem[obj / obj_per_chunk];
+ idx_offset = obj % obj_per_chunk;
+ dma_offset = offset = idx_offset * table->obj_size;
+ } else {
+ u32 seg_size = 64; /* 8 bytes per BA and 8 BA per segment */
+
+ if (hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop))
+ goto out;
+ /* mtt mhop */
+ i = mhop.l0_idx;
+ j = mhop.l1_idx;
+ if (mhop.hop_num == 2)
+ hem_idx = i * (mhop.bt_chunk_size / BA_BYTE_LEN) + j;
+ else if (mhop.hop_num == 1 ||
+ mhop.hop_num == HNS_ROCE_HOP_NUM_0)
+ hem_idx = i;
+
+ hem = table->hem[hem_idx];
+ dma_offset = offset = obj * seg_size % mhop.bt_chunk_size;
+ if (mhop.hop_num == 2)
+ dma_offset = offset = 0;
+ }
+
+ if (!hem)
+ goto out;
+
+ list_for_each_entry(chunk, &hem->chunk_list, list) {
+ for (i = 0; i < chunk->npages; ++i) {
+ length = sg_dma_len(&chunk->mem[i]);
+ if (dma_handle && dma_offset >= 0) {
+ if (length > (u32)dma_offset)
+ *dma_handle = sg_dma_address(
+ &chunk->mem[i]) + dma_offset;
+ dma_offset -= length;
+ }
+
+ if (length > (u32)offset) {
+ addr = chunk->buf[i] + offset;
+ goto out;
+ }
+ offset -= length;
+ }
+ }
+
+out:
+ mutex_unlock(&table->mutex);
+ return addr;
+}
+
+int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, u32 type,
+ unsigned long obj_size, unsigned long nobj)
+{
+ unsigned long obj_per_chunk;
+ unsigned long num_hem;
+
+ if (!hns_roce_check_whether_mhop(hr_dev, type)) {
+ table->table_chunk_size = hr_dev->caps.chunk_sz;
+ obj_per_chunk = table->table_chunk_size / obj_size;
+ num_hem = DIV_ROUND_UP(nobj, obj_per_chunk);
+
+ table->hem = kcalloc(num_hem, sizeof(*table->hem), GFP_KERNEL);
+ if (!table->hem)
+ return -ENOMEM;
+ } else {
+ struct hns_roce_hem_mhop mhop = {};
+ unsigned long buf_chunk_size;
+ unsigned long bt_chunk_size;
+ unsigned long bt_chunk_num;
+ unsigned long num_bt_l0;
+ u32 hop_num;
+
+ if (get_hem_table_config(hr_dev, &mhop, type))
+ return -EINVAL;
+
+ buf_chunk_size = mhop.buf_chunk_size;
+ bt_chunk_size = mhop.bt_chunk_size;
+ num_bt_l0 = mhop.ba_l0_num;
+ hop_num = mhop.hop_num;
+
+ obj_per_chunk = buf_chunk_size / obj_size;
+ num_hem = DIV_ROUND_UP(nobj, obj_per_chunk);
+ bt_chunk_num = bt_chunk_size / BA_BYTE_LEN;
+
+ if (type >= HEM_TYPE_MTT)
+ num_bt_l0 = bt_chunk_num;
+
+ table->hem = kcalloc(num_hem, sizeof(*table->hem),
+ GFP_KERNEL);
+ if (!table->hem)
+ goto err_kcalloc_hem_buf;
+
+ if (check_whether_bt_num_3(type, hop_num)) {
+ unsigned long num_bt_l1;
+
+ num_bt_l1 = DIV_ROUND_UP(num_hem, bt_chunk_num);
+ table->bt_l1 = kcalloc(num_bt_l1,
+ sizeof(*table->bt_l1),
+ GFP_KERNEL);
+ if (!table->bt_l1)
+ goto err_kcalloc_bt_l1;
+
+ table->bt_l1_dma_addr = kcalloc(num_bt_l1,
+ sizeof(*table->bt_l1_dma_addr),
+ GFP_KERNEL);
+
+ if (!table->bt_l1_dma_addr)
+ goto err_kcalloc_l1_dma;
+ }
+
+ if (check_whether_bt_num_2(type, hop_num) ||
+ check_whether_bt_num_3(type, hop_num)) {
+ table->bt_l0 = kcalloc(num_bt_l0, sizeof(*table->bt_l0),
+ GFP_KERNEL);
+ if (!table->bt_l0)
+ goto err_kcalloc_bt_l0;
+
+ table->bt_l0_dma_addr = kcalloc(num_bt_l0,
+ sizeof(*table->bt_l0_dma_addr),
+ GFP_KERNEL);
+ if (!table->bt_l0_dma_addr)
+ goto err_kcalloc_l0_dma;
+ }
+ }
+
+ table->type = type;
+ table->num_hem = num_hem;
+ table->obj_size = obj_size;
+ mutex_init(&table->mutex);
+
+ return 0;
+
+err_kcalloc_l0_dma:
+ kfree(table->bt_l0);
+ table->bt_l0 = NULL;
+
+err_kcalloc_bt_l0:
+ kfree(table->bt_l1_dma_addr);
+ table->bt_l1_dma_addr = NULL;
+
+err_kcalloc_l1_dma:
+ kfree(table->bt_l1);
+ table->bt_l1 = NULL;
+
+err_kcalloc_bt_l1:
+ kfree(table->hem);
+ table->hem = NULL;
+
+err_kcalloc_hem_buf:
+ return -ENOMEM;
+}
+
+static void hns_roce_cleanup_mhop_hem_table(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table)
+{
+ struct hns_roce_hem_mhop mhop;
+ u32 buf_chunk_size;
+ u64 obj;
+ int i;
+
+ if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop))
+ return;
+ buf_chunk_size = table->type < HEM_TYPE_MTT ? mhop.buf_chunk_size :
+ mhop.bt_chunk_size;
+
+ for (i = 0; i < table->num_hem; ++i) {
+ obj = i * buf_chunk_size / table->obj_size;
+ if (table->hem[i])
+ hns_roce_table_mhop_put(hr_dev, table, obj, 0);
+ }
+
+ kfree(table->hem);
+ table->hem = NULL;
+ kfree(table->bt_l1);
+ table->bt_l1 = NULL;
+ kfree(table->bt_l1_dma_addr);
+ table->bt_l1_dma_addr = NULL;
+ kfree(table->bt_l0);
+ table->bt_l0 = NULL;
+ kfree(table->bt_l0_dma_addr);
+ table->bt_l0_dma_addr = NULL;
+}
+
+void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table)
+{
+ struct device *dev = hr_dev->dev;
+ unsigned long i;
+
+ if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
+ hns_roce_cleanup_mhop_hem_table(hr_dev, table);
+ return;
+ }
+
+ for (i = 0; i < table->num_hem; ++i)
+ if (table->hem[i]) {
+ if (hr_dev->hw->clear_hem(hr_dev, table,
+ i * table->table_chunk_size / table->obj_size, 0))
+ dev_err(dev, "clear HEM base address failed.\n");
+
+ hns_roce_free_hem(hr_dev, table->hem[i]);
+ }
+
+ kfree(table->hem);
+}
+
+void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
+{
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->srq_table.table);
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table);
+ if (hr_dev->caps.qpc_timer_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->qpc_timer_table);
+ if (hr_dev->caps.cqc_timer_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->cqc_timer_table);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->qp_table.sccc_table);
+ if (hr_dev->caps.trrl_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->qp_table.trrl_table);
+
+ if (hr_dev->caps.gmv_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->gmv_table);
+
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table);
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table);
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table);
+}
+
+struct hns_roce_hem_item {
+ struct list_head list; /* link all hems in the same bt level */
+ struct list_head sibling; /* link all hems in last hop for mtt */
+ void *addr;
+ dma_addr_t dma_addr;
+ size_t count; /* max ba numbers */
+ int start; /* start buf offset in this hem */
+ int end; /* end buf offset in this hem */
+};
+
+/* All HEM items are linked in a tree structure */
+struct hns_roce_hem_head {
+ struct list_head branch[HNS_ROCE_MAX_BT_REGION];
+ struct list_head root;
+ struct list_head leaf;
+};
+
+static struct hns_roce_hem_item *
+hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count,
+ bool exist_bt)
+{
+ struct hns_roce_hem_item *hem;
+
+ hem = kzalloc(sizeof(*hem), GFP_KERNEL);
+ if (!hem)
+ return NULL;
+
+ if (exist_bt) {
+ hem->addr = dma_alloc_coherent(hr_dev->dev, count * BA_BYTE_LEN,
+ &hem->dma_addr, GFP_KERNEL);
+ if (!hem->addr) {
+ kfree(hem);
+ return NULL;
+ }
+ }
+
+ hem->count = count;
+ hem->start = start;
+ hem->end = end;
+ INIT_LIST_HEAD(&hem->list);
+ INIT_LIST_HEAD(&hem->sibling);
+
+ return hem;
+}
+
+static void hem_list_free_item(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_item *hem, bool exist_bt)
+{
+ if (exist_bt)
+ dma_free_coherent(hr_dev->dev, hem->count * BA_BYTE_LEN,
+ hem->addr, hem->dma_addr);
+ kfree(hem);
+}
+
+static void hem_list_free_all(struct hns_roce_dev *hr_dev,
+ struct list_head *head, bool exist_bt)
+{
+ struct hns_roce_hem_item *hem, *temp_hem;
+
+ list_for_each_entry_safe(hem, temp_hem, head, list) {
+ list_del(&hem->list);
+ hem_list_free_item(hr_dev, hem, exist_bt);
+ }
+}
+
+static void hem_list_link_bt(struct hns_roce_dev *hr_dev, void *base_addr,
+ u64 table_addr)
+{
+ *(u64 *)(base_addr) = table_addr;
+}
+
+/* assign L0 table address to hem from root bt */
+static void hem_list_assign_bt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_item *hem, void *cpu_addr,
+ u64 phy_addr)
+{
+ hem->addr = cpu_addr;
+ hem->dma_addr = (dma_addr_t)phy_addr;
+}
+
+static inline bool hem_list_page_is_in_range(struct hns_roce_hem_item *hem,
+ int offset)
+{
+ return (hem->start <= offset && offset <= hem->end);
+}
+
+static struct hns_roce_hem_item *hem_list_search_item(struct list_head *ba_list,
+ int page_offset)
+{
+ struct hns_roce_hem_item *hem, *temp_hem;
+ struct hns_roce_hem_item *found = NULL;
+
+ list_for_each_entry_safe(hem, temp_hem, ba_list, list) {
+ if (hem_list_page_is_in_range(hem, page_offset)) {
+ found = hem;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static bool hem_list_is_bottom_bt(int hopnum, int bt_level)
+{
+ /*
+ * hopnum base address table levels
+ * 0 L0(buf)
+ * 1 L0 -> buf
+ * 2 L0 -> L1 -> buf
+ * 3 L0 -> L1 -> L2 -> buf
+ */
+ return bt_level >= (hopnum ? hopnum - 1 : hopnum);
+}
+
+/*
+ * calc base address entries num
+ * @hopnum: num of mutihop addressing
+ * @bt_level: base address table level
+ * @unit: ba entries per bt page
+ */
+static u32 hem_list_calc_ba_range(int hopnum, int bt_level, int unit)
+{
+ u32 step;
+ int max;
+ int i;
+
+ if (hopnum <= bt_level)
+ return 0;
+ /*
+ * hopnum bt_level range
+ * 1 0 unit
+ * ------------
+ * 2 0 unit * unit
+ * 2 1 unit
+ * ------------
+ * 3 0 unit * unit * unit
+ * 3 1 unit * unit
+ * 3 2 unit
+ */
+ step = 1;
+ max = hopnum - bt_level;
+ for (i = 0; i < max; i++)
+ step = step * unit;
+
+ return step;
+}
+
+/*
+ * calc the root ba entries which could cover all regions
+ * @regions: buf region array
+ * @region_cnt: array size of @regions
+ * @unit: ba entries per bt page
+ */
+int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions,
+ int region_cnt, int unit)
+{
+ struct hns_roce_buf_region *r;
+ int total = 0;
+ int step;
+ int i;
+
+ for (i = 0; i < region_cnt; i++) {
+ r = (struct hns_roce_buf_region *)&regions[i];
+ if (r->hopnum > 1) {
+ step = hem_list_calc_ba_range(r->hopnum, 1, unit);
+ if (step > 0)
+ total += (r->count + step - 1) / step;
+ } else {
+ total += r->count;
+ }
+ }
+
+ return total;
+}
+
+static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev,
+ const struct hns_roce_buf_region *r, int unit,
+ int offset, struct list_head *mid_bt,
+ struct list_head *btm_bt)
+{
+ struct hns_roce_hem_item *hem_ptrs[HNS_ROCE_MAX_BT_LEVEL] = { NULL };
+ struct list_head temp_list[HNS_ROCE_MAX_BT_LEVEL];
+ struct hns_roce_hem_item *cur, *pre;
+ const int hopnum = r->hopnum;
+ int start_aligned;
+ int distance;
+ int ret = 0;
+ int max_ofs;
+ int level;
+ u32 step;
+ int end;
+
+ if (hopnum <= 1)
+ return 0;
+
+ if (hopnum > HNS_ROCE_MAX_BT_LEVEL) {
+ dev_err(hr_dev->dev, "invalid hopnum %d!\n", hopnum);
+ return -EINVAL;
+ }
+
+ if (offset < r->offset) {
+ dev_err(hr_dev->dev, "invalid offset %d, min %u!\n",
+ offset, r->offset);
+ return -EINVAL;
+ }
+
+ distance = offset - r->offset;
+ max_ofs = r->offset + r->count - 1;
+ for (level = 0; level < hopnum; level++)
+ INIT_LIST_HEAD(&temp_list[level]);
+
+ /* config L1 bt to last bt and link them to corresponding parent */
+ for (level = 1; level < hopnum; level++) {
+ cur = hem_list_search_item(&mid_bt[level], offset);
+ if (cur) {
+ hem_ptrs[level] = cur;
+ continue;
+ }
+
+ step = hem_list_calc_ba_range(hopnum, level, unit);
+ if (step < 1) {
+ ret = -EINVAL;
+ goto err_exit;
+ }
+
+ start_aligned = (distance / step) * step + r->offset;
+ end = min_t(int, start_aligned + step - 1, max_ofs);
+ cur = hem_list_alloc_item(hr_dev, start_aligned, end, unit,
+ true);
+ if (!cur) {
+ ret = -ENOMEM;
+ goto err_exit;
+ }
+ hem_ptrs[level] = cur;
+ list_add(&cur->list, &temp_list[level]);
+ if (hem_list_is_bottom_bt(hopnum, level))
+ list_add(&cur->sibling, &temp_list[0]);
+
+ /* link bt to parent bt */
+ if (level > 1) {
+ pre = hem_ptrs[level - 1];
+ step = (cur->start - pre->start) / step * BA_BYTE_LEN;
+ hem_list_link_bt(hr_dev, pre->addr + step,
+ cur->dma_addr);
+ }
+ }
+
+ list_splice(&temp_list[0], btm_bt);
+ for (level = 1; level < hopnum; level++)
+ list_splice(&temp_list[level], &mid_bt[level]);
+
+ return 0;
+
+err_exit:
+ for (level = 1; level < hopnum; level++)
+ hem_list_free_all(hr_dev, &temp_list[level], true);
+
+ return ret;
+}
+
+static struct hns_roce_hem_item *
+alloc_root_hem(struct hns_roce_dev *hr_dev, int unit, int *max_ba_num,
+ const struct hns_roce_buf_region *regions, int region_cnt)
+{
+ const struct hns_roce_buf_region *r;
+ struct hns_roce_hem_item *hem;
+ int ba_num;
+ int offset;
+
+ ba_num = hns_roce_hem_list_calc_root_ba(regions, region_cnt, unit);
+ if (ba_num < 1)
+ return ERR_PTR(-ENOMEM);
+
+ if (ba_num > unit)
+ return ERR_PTR(-ENOBUFS);
+
+ offset = regions[0].offset;
+ /* indicate to last region */
+ r = &regions[region_cnt - 1];
+ hem = hem_list_alloc_item(hr_dev, offset, r->offset + r->count - 1,
+ ba_num, true);
+ if (!hem)
+ return ERR_PTR(-ENOMEM);
+
+ *max_ba_num = ba_num;
+
+ return hem;
+}
+
+static int alloc_fake_root_bt(struct hns_roce_dev *hr_dev, void *cpu_base,
+ u64 phy_base, const struct hns_roce_buf_region *r,
+ struct list_head *branch_head,
+ struct list_head *leaf_head)
+{
+ struct hns_roce_hem_item *hem;
+
+ hem = hem_list_alloc_item(hr_dev, r->offset, r->offset + r->count - 1,
+ r->count, false);
+ if (!hem)
+ return -ENOMEM;
+
+ hem_list_assign_bt(hr_dev, hem, cpu_base, phy_base);
+ list_add(&hem->list, branch_head);
+ list_add(&hem->sibling, leaf_head);
+
+ return r->count;
+}
+
+static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base,
+ int unit, const struct hns_roce_buf_region *r,
+ const struct list_head *branch_head)
+{
+ struct hns_roce_hem_item *hem, *temp_hem;
+ int total = 0;
+ int offset;
+ int step;
+
+ step = hem_list_calc_ba_range(r->hopnum, 1, unit);
+ if (step < 1)
+ return -EINVAL;
+
+ /* if exist mid bt, link L1 to L0 */
+ list_for_each_entry_safe(hem, temp_hem, branch_head, list) {
+ offset = (hem->start - r->offset) / step * BA_BYTE_LEN;
+ hem_list_link_bt(hr_dev, cpu_base + offset, hem->dma_addr);
+ total++;
+ }
+
+ return total;
+}
+
+static int
+setup_root_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list,
+ int unit, int max_ba_num, struct hns_roce_hem_head *head,
+ const struct hns_roce_buf_region *regions, int region_cnt)
+{
+ const struct hns_roce_buf_region *r;
+ struct hns_roce_hem_item *root_hem;
+ void *cpu_base;
+ u64 phy_base;
+ int i, total;
+ int ret;
+
+ root_hem = list_first_entry(&head->root,
+ struct hns_roce_hem_item, list);
+ if (!root_hem)
+ return -ENOMEM;
+
+ total = 0;
+ for (i = 0; i < region_cnt && total < max_ba_num; i++) {
+ r = &regions[i];
+ if (!r->count)
+ continue;
+
+ /* all regions's mid[x][0] shared the root_bt's trunk */
+ cpu_base = root_hem->addr + total * BA_BYTE_LEN;
+ phy_base = root_hem->dma_addr + total * BA_BYTE_LEN;
+
+ /* if hopnum is 0 or 1, cut a new fake hem from the root bt
+ * which's address share to all regions.
+ */
+ if (hem_list_is_bottom_bt(r->hopnum, 0))
+ ret = alloc_fake_root_bt(hr_dev, cpu_base, phy_base, r,
+ &head->branch[i], &head->leaf);
+ else
+ ret = setup_middle_bt(hr_dev, cpu_base, unit, r,
+ &hem_list->mid_bt[i][1]);
+
+ if (ret < 0)
+ return ret;
+
+ total += ret;
+ }
+
+ list_splice(&head->leaf, &hem_list->btm_bt);
+ list_splice(&head->root, &hem_list->root_bt);
+ for (i = 0; i < region_cnt; i++)
+ list_splice(&head->branch[i], &hem_list->mid_bt[i][0]);
+
+ return 0;
+}
+
+static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list, int unit,
+ const struct hns_roce_buf_region *regions,
+ int region_cnt)
+{
+ struct hns_roce_hem_item *root_hem;
+ struct hns_roce_hem_head head;
+ int max_ba_num;
+ int ret;
+ int i;
+
+ root_hem = hem_list_search_item(&hem_list->root_bt, regions[0].offset);
+ if (root_hem)
+ return 0;
+
+ max_ba_num = 0;
+ root_hem = alloc_root_hem(hr_dev, unit, &max_ba_num, regions,
+ region_cnt);
+ if (IS_ERR(root_hem))
+ return PTR_ERR(root_hem);
+
+ /* List head for storing all allocated HEM items */
+ INIT_LIST_HEAD(&head.root);
+ INIT_LIST_HEAD(&head.leaf);
+ for (i = 0; i < region_cnt; i++)
+ INIT_LIST_HEAD(&head.branch[i]);
+
+ hem_list->root_ba = root_hem->dma_addr;
+ list_add(&root_hem->list, &head.root);
+ ret = setup_root_hem(hr_dev, hem_list, unit, max_ba_num, &head, regions,
+ region_cnt);
+ if (ret) {
+ for (i = 0; i < region_cnt; i++)
+ hem_list_free_all(hr_dev, &head.branch[i], false);
+
+ hem_list_free_all(hr_dev, &head.root, true);
+ }
+
+ return ret;
+}
+
+/* construct the base address table and link them by address hop config */
+int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list,
+ const struct hns_roce_buf_region *regions,
+ int region_cnt, unsigned int bt_pg_shift)
+{
+ const struct hns_roce_buf_region *r;
+ int ofs, end;
+ int unit;
+ int ret;
+ int i;
+
+ if (region_cnt > HNS_ROCE_MAX_BT_REGION) {
+ dev_err(hr_dev->dev, "invalid region region_cnt %d!\n",
+ region_cnt);
+ return -EINVAL;
+ }
+
+ unit = (1 << bt_pg_shift) / BA_BYTE_LEN;
+ for (i = 0; i < region_cnt; i++) {
+ r = &regions[i];
+ if (!r->count)
+ continue;
+
+ end = r->offset + r->count;
+ for (ofs = r->offset; ofs < end; ofs += unit) {
+ ret = hem_list_alloc_mid_bt(hr_dev, r, unit, ofs,
+ hem_list->mid_bt[i],
+ &hem_list->btm_bt);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "alloc hem trunk fail ret = %d!\n", ret);
+ goto err_alloc;
+ }
+ }
+ }
+
+ ret = hem_list_alloc_root_bt(hr_dev, hem_list, unit, regions,
+ region_cnt);
+ if (ret)
+ dev_err(hr_dev->dev, "alloc hem root fail ret = %d!\n", ret);
+ else
+ return 0;
+
+err_alloc:
+ hns_roce_hem_list_release(hr_dev, hem_list);
+
+ return ret;
+}
+
+void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list)
+{
+ int i, j;
+
+ for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++)
+ for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++)
+ hem_list_free_all(hr_dev, &hem_list->mid_bt[i][j],
+ j != 0);
+
+ hem_list_free_all(hr_dev, &hem_list->root_bt, true);
+ INIT_LIST_HEAD(&hem_list->btm_bt);
+ hem_list->root_ba = 0;
+}
+
+void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list)
+{
+ int i, j;
+
+ INIT_LIST_HEAD(&hem_list->root_bt);
+ INIT_LIST_HEAD(&hem_list->btm_bt);
+ for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++)
+ for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++)
+ INIT_LIST_HEAD(&hem_list->mid_bt[i][j]);
+}
+
+void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list,
+ int offset, int *mtt_cnt)
+{
+ struct list_head *head = &hem_list->btm_bt;
+ struct hns_roce_hem_item *hem, *temp_hem;
+ void *cpu_base = NULL;
+ int nr = 0;
+
+ list_for_each_entry_safe(hem, temp_hem, head, sibling) {
+ if (hem_list_page_is_in_range(hem, offset)) {
+ nr = offset - hem->start;
+ cpu_base = hem->addr + nr * BA_BYTE_LEN;
+ nr = hem->end + 1 - offset;
+ break;
+ }
+ }
+
+ if (mtt_cnt)
+ *mtt_cnt = nr;
+
+ return cpu_base;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
new file mode 100644
index 000000000..7d23d3c51
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2016 Hisilicon Limited.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _HNS_ROCE_HEM_H
+#define _HNS_ROCE_HEM_H
+
+#define HEM_HOP_STEP_DIRECT 0xff
+
+enum {
+ /* MAP HEM(Hardware Entry Memory) */
+ HEM_TYPE_QPC = 0,
+ HEM_TYPE_MTPT,
+ HEM_TYPE_CQC,
+ HEM_TYPE_SRQC,
+ HEM_TYPE_SCCC,
+ HEM_TYPE_QPC_TIMER,
+ HEM_TYPE_CQC_TIMER,
+ HEM_TYPE_GMV,
+
+ /* UNMAP HEM */
+ HEM_TYPE_MTT,
+ HEM_TYPE_CQE,
+ HEM_TYPE_SRQWQE,
+ HEM_TYPE_IDX,
+ HEM_TYPE_IRRL,
+ HEM_TYPE_TRRL,
+};
+
+#define HNS_ROCE_HEM_CHUNK_LEN \
+ ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \
+ (sizeof(struct scatterlist) + sizeof(void *)))
+
+#define check_whether_bt_num_3(type, hop_num) \
+ (type < HEM_TYPE_MTT && hop_num == 2)
+
+#define check_whether_bt_num_2(type, hop_num) \
+ ((type < HEM_TYPE_MTT && hop_num == 1) || \
+ (type >= HEM_TYPE_MTT && hop_num == 2))
+
+#define check_whether_bt_num_1(type, hop_num) \
+ ((type < HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0) || \
+ (type >= HEM_TYPE_MTT && hop_num == 1) || \
+ (type >= HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0))
+
+struct hns_roce_hem_chunk {
+ struct list_head list;
+ int npages;
+ int nsg;
+ struct scatterlist mem[HNS_ROCE_HEM_CHUNK_LEN];
+ void *buf[HNS_ROCE_HEM_CHUNK_LEN];
+};
+
+struct hns_roce_hem {
+ struct list_head chunk_list;
+ refcount_t refcount;
+};
+
+struct hns_roce_hem_iter {
+ struct hns_roce_hem *hem;
+ struct hns_roce_hem_chunk *chunk;
+ int page_idx;
+};
+
+struct hns_roce_hem_mhop {
+ u32 hop_num;
+ u32 buf_chunk_size;
+ u32 bt_chunk_size;
+ u32 ba_l0_num;
+ u32 l0_idx; /* level 0 base address table index */
+ u32 l1_idx; /* level 1 base address table index */
+ u32 l2_idx; /* level 2 base address table index */
+};
+
+void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem);
+int hns_roce_table_get(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long obj);
+void hns_roce_table_put(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long obj);
+void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long obj,
+ dma_addr_t *dma_handle);
+int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, u32 type,
+ unsigned long obj_size, unsigned long nobj);
+void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table);
+void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev);
+int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long *obj,
+ struct hns_roce_hem_mhop *mhop);
+bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type);
+
+void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list);
+int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions,
+ int region_cnt, int unit);
+int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list,
+ const struct hns_roce_buf_region *regions,
+ int region_cnt, unsigned int bt_pg_shift);
+void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list);
+void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list,
+ int offset, int *mtt_cnt);
+
+static inline void hns_roce_hem_first(struct hns_roce_hem *hem,
+ struct hns_roce_hem_iter *iter)
+{
+ iter->hem = hem;
+ iter->chunk = list_empty(&hem->chunk_list) ? NULL :
+ list_entry(hem->chunk_list.next,
+ struct hns_roce_hem_chunk, list);
+ iter->page_idx = 0;
+}
+
+static inline int hns_roce_hem_last(struct hns_roce_hem_iter *iter)
+{
+ return !iter->chunk;
+}
+
+static inline void hns_roce_hem_next(struct hns_roce_hem_iter *iter)
+{
+ if (++iter->page_idx >= iter->chunk->nsg) {
+ if (iter->chunk->list.next == &iter->hem->chunk_list) {
+ iter->chunk = NULL;
+ return;
+ }
+
+ iter->chunk = list_entry(iter->chunk->list.next,
+ struct hns_roce_hem_chunk, list);
+ iter->page_idx = 0;
+ }
+}
+
+static inline dma_addr_t hns_roce_hem_addr(struct hns_roce_hem_iter *iter)
+{
+ return sg_dma_address(&iter->chunk->mem[iter->page_idx]);
+}
+
+#endif /* _HNS_ROCE_HEM_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
new file mode 100644
index 000000000..58fbb1d3b
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -0,0 +1,7103 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/acpi.h>
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <net/addrconf.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_umem.h>
+#include <rdma/uverbs_ioctl.h>
+
+#include "hnae3.h"
+#include "hns_roce_common.h"
+#include "hns_roce_device.h"
+#include "hns_roce_cmd.h"
+#include "hns_roce_hem.h"
+#include "hns_roce_hw_v2.h"
+
+enum {
+ CMD_RST_PRC_OTHERS,
+ CMD_RST_PRC_SUCCESS,
+ CMD_RST_PRC_EBUSY,
+};
+
+enum ecc_resource_type {
+ ECC_RESOURCE_QPC,
+ ECC_RESOURCE_CQC,
+ ECC_RESOURCE_MPT,
+ ECC_RESOURCE_SRQC,
+ ECC_RESOURCE_GMV,
+ ECC_RESOURCE_QPC_TIMER,
+ ECC_RESOURCE_CQC_TIMER,
+ ECC_RESOURCE_SCCC,
+ ECC_RESOURCE_COUNT,
+};
+
+static const struct {
+ const char *name;
+ u8 read_bt0_op;
+ u8 write_bt0_op;
+} fmea_ram_res[] = {
+ { "ECC_RESOURCE_QPC",
+ HNS_ROCE_CMD_READ_QPC_BT0, HNS_ROCE_CMD_WRITE_QPC_BT0 },
+ { "ECC_RESOURCE_CQC",
+ HNS_ROCE_CMD_READ_CQC_BT0, HNS_ROCE_CMD_WRITE_CQC_BT0 },
+ { "ECC_RESOURCE_MPT",
+ HNS_ROCE_CMD_READ_MPT_BT0, HNS_ROCE_CMD_WRITE_MPT_BT0 },
+ { "ECC_RESOURCE_SRQC",
+ HNS_ROCE_CMD_READ_SRQC_BT0, HNS_ROCE_CMD_WRITE_SRQC_BT0 },
+ /* ECC_RESOURCE_GMV is handled by cmdq, not mailbox */
+ { "ECC_RESOURCE_GMV",
+ 0, 0 },
+ { "ECC_RESOURCE_QPC_TIMER",
+ HNS_ROCE_CMD_READ_QPC_TIMER_BT0, HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0 },
+ { "ECC_RESOURCE_CQC_TIMER",
+ HNS_ROCE_CMD_READ_CQC_TIMER_BT0, HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0 },
+ { "ECC_RESOURCE_SCCC",
+ HNS_ROCE_CMD_READ_SCCC_BT0, HNS_ROCE_CMD_WRITE_SCCC_BT0 },
+};
+
+static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
+ struct ib_sge *sg)
+{
+ dseg->lkey = cpu_to_le32(sg->lkey);
+ dseg->addr = cpu_to_le64(sg->addr);
+ dseg->len = cpu_to_le32(sg->length);
+}
+
+/*
+ * mapped-value = 1 + real-value
+ * The hns wr opcode real value is start from 0, In order to distinguish between
+ * initialized and uninitialized map values, we plus 1 to the actual value when
+ * defining the mapping, so that the validity can be identified by checking the
+ * mapped value is greater than 0.
+ */
+#define HR_OPC_MAP(ib_key, hr_key) \
+ [IB_WR_ ## ib_key] = 1 + HNS_ROCE_V2_WQE_OP_ ## hr_key
+
+static const u32 hns_roce_op_code[] = {
+ HR_OPC_MAP(RDMA_WRITE, RDMA_WRITE),
+ HR_OPC_MAP(RDMA_WRITE_WITH_IMM, RDMA_WRITE_WITH_IMM),
+ HR_OPC_MAP(SEND, SEND),
+ HR_OPC_MAP(SEND_WITH_IMM, SEND_WITH_IMM),
+ HR_OPC_MAP(RDMA_READ, RDMA_READ),
+ HR_OPC_MAP(ATOMIC_CMP_AND_SWP, ATOM_CMP_AND_SWAP),
+ HR_OPC_MAP(ATOMIC_FETCH_AND_ADD, ATOM_FETCH_AND_ADD),
+ HR_OPC_MAP(SEND_WITH_INV, SEND_WITH_INV),
+ HR_OPC_MAP(MASKED_ATOMIC_CMP_AND_SWP, ATOM_MSK_CMP_AND_SWAP),
+ HR_OPC_MAP(MASKED_ATOMIC_FETCH_AND_ADD, ATOM_MSK_FETCH_AND_ADD),
+ HR_OPC_MAP(REG_MR, FAST_REG_PMR),
+};
+
+static u32 to_hr_opcode(u32 ib_opcode)
+{
+ if (ib_opcode >= ARRAY_SIZE(hns_roce_op_code))
+ return HNS_ROCE_V2_WQE_OP_MASK;
+
+ return hns_roce_op_code[ib_opcode] ? hns_roce_op_code[ib_opcode] - 1 :
+ HNS_ROCE_V2_WQE_OP_MASK;
+}
+
+static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ const struct ib_reg_wr *wr)
+{
+ struct hns_roce_wqe_frmr_seg *fseg =
+ (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe);
+ struct hns_roce_mr *mr = to_hr_mr(wr->mr);
+ u64 pbl_ba;
+
+ /* use ib_access_flags */
+ hr_reg_write_bool(fseg, FRMR_BIND_EN, wr->access & IB_ACCESS_MW_BIND);
+ hr_reg_write_bool(fseg, FRMR_ATOMIC,
+ wr->access & IB_ACCESS_REMOTE_ATOMIC);
+ hr_reg_write_bool(fseg, FRMR_RR, wr->access & IB_ACCESS_REMOTE_READ);
+ hr_reg_write_bool(fseg, FRMR_RW, wr->access & IB_ACCESS_REMOTE_WRITE);
+ hr_reg_write_bool(fseg, FRMR_LW, wr->access & IB_ACCESS_LOCAL_WRITE);
+
+ /* Data structure reuse may lead to confusion */
+ pbl_ba = mr->pbl_mtr.hem_cfg.root_ba;
+ rc_sq_wqe->msg_len = cpu_to_le32(lower_32_bits(pbl_ba));
+ rc_sq_wqe->inv_key = cpu_to_le32(upper_32_bits(pbl_ba));
+
+ rc_sq_wqe->byte_16 = cpu_to_le32(wr->mr->length & 0xffffffff);
+ rc_sq_wqe->byte_20 = cpu_to_le32(wr->mr->length >> 32);
+ rc_sq_wqe->rkey = cpu_to_le32(wr->key);
+ rc_sq_wqe->va = cpu_to_le64(wr->mr->iova);
+
+ hr_reg_write(fseg, FRMR_PBL_SIZE, mr->npages);
+ hr_reg_write(fseg, FRMR_PBL_BUF_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
+ hr_reg_clear(fseg, FRMR_BLK_MODE);
+}
+
+static void set_atomic_seg(const struct ib_send_wr *wr,
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ unsigned int valid_num_sge)
+{
+ struct hns_roce_v2_wqe_data_seg *dseg =
+ (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe);
+ struct hns_roce_wqe_atomic_seg *aseg =
+ (void *)dseg + sizeof(struct hns_roce_v2_wqe_data_seg);
+
+ set_data_seg_v2(dseg, wr->sg_list);
+
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ aseg->fetchadd_swap_data = cpu_to_le64(atomic_wr(wr)->swap);
+ aseg->cmp_data = cpu_to_le64(atomic_wr(wr)->compare_add);
+ } else {
+ aseg->fetchadd_swap_data =
+ cpu_to_le64(atomic_wr(wr)->compare_add);
+ aseg->cmp_data = 0;
+ }
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge);
+}
+
+static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
+ const struct ib_send_wr *wr,
+ unsigned int *sge_idx, u32 msg_len)
+{
+ struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev;
+ unsigned int left_len_in_pg;
+ unsigned int idx = *sge_idx;
+ unsigned int i = 0;
+ unsigned int len;
+ void *addr;
+ void *dseg;
+
+ if (msg_len > qp->sq.ext_sge_cnt * HNS_ROCE_SGE_SIZE) {
+ ibdev_err(ibdev,
+ "no enough extended sge space for inline data.\n");
+ return -EINVAL;
+ }
+
+ dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1));
+ left_len_in_pg = hr_hw_page_align((uintptr_t)dseg) - (uintptr_t)dseg;
+ len = wr->sg_list[0].length;
+ addr = (void *)(unsigned long)(wr->sg_list[0].addr);
+
+ /* When copying data to extended sge space, the left length in page may
+ * not long enough for current user's sge. So the data should be
+ * splited into several parts, one in the first page, and the others in
+ * the subsequent pages.
+ */
+ while (1) {
+ if (len <= left_len_in_pg) {
+ memcpy(dseg, addr, len);
+
+ idx += len / HNS_ROCE_SGE_SIZE;
+
+ i++;
+ if (i >= wr->num_sge)
+ break;
+
+ left_len_in_pg -= len;
+ len = wr->sg_list[i].length;
+ addr = (void *)(unsigned long)(wr->sg_list[i].addr);
+ dseg += len;
+ } else {
+ memcpy(dseg, addr, left_len_in_pg);
+
+ len -= left_len_in_pg;
+ addr += left_len_in_pg;
+ idx += left_len_in_pg / HNS_ROCE_SGE_SIZE;
+ dseg = hns_roce_get_extend_sge(qp,
+ idx & (qp->sge.sge_cnt - 1));
+ left_len_in_pg = 1 << HNS_HW_PAGE_SHIFT;
+ }
+ }
+
+ *sge_idx = idx;
+
+ return 0;
+}
+
+static void set_extend_sge(struct hns_roce_qp *qp, struct ib_sge *sge,
+ unsigned int *sge_ind, unsigned int cnt)
+{
+ struct hns_roce_v2_wqe_data_seg *dseg;
+ unsigned int idx = *sge_ind;
+
+ while (cnt > 0) {
+ dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1));
+ if (likely(sge->length)) {
+ set_data_seg_v2(dseg, sge);
+ idx++;
+ cnt--;
+ }
+ sge++;
+ }
+
+ *sge_ind = idx;
+}
+
+static bool check_inl_data_len(struct hns_roce_qp *qp, unsigned int len)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
+ int mtu = ib_mtu_enum_to_int(qp->path_mtu);
+
+ if (mtu < 0 || len > qp->max_inline_data || len > mtu) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid length of data, data len = %u, max inline len = %u, path mtu = %d.\n",
+ len, qp->max_inline_data, mtu);
+ return false;
+ }
+
+ return true;
+}
+
+static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ unsigned int *sge_idx)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
+ u32 msg_len = le32_to_cpu(rc_sq_wqe->msg_len);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ unsigned int curr_idx = *sge_idx;
+ void *dseg = rc_sq_wqe;
+ unsigned int i;
+ int ret;
+
+ if (unlikely(wr->opcode == IB_WR_RDMA_READ)) {
+ ibdev_err(ibdev, "invalid inline parameters!\n");
+ return -EINVAL;
+ }
+
+ if (!check_inl_data_len(qp, msg_len))
+ return -EINVAL;
+
+ dseg += sizeof(struct hns_roce_v2_rc_send_wqe);
+
+ if (msg_len <= HNS_ROCE_V2_MAX_RC_INL_INN_SZ) {
+ hr_reg_clear(rc_sq_wqe, RC_SEND_WQE_INL_TYPE);
+
+ for (i = 0; i < wr->num_sge; i++) {
+ memcpy(dseg, ((void *)wr->sg_list[i].addr),
+ wr->sg_list[i].length);
+ dseg += wr->sg_list[i].length;
+ }
+ } else {
+ hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_INL_TYPE);
+
+ ret = fill_ext_sge_inl_data(qp, wr, &curr_idx, msg_len);
+ if (ret)
+ return ret;
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, curr_idx - *sge_idx);
+ }
+
+ *sge_idx = curr_idx;
+
+ return 0;
+}
+
+static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ unsigned int *sge_ind,
+ unsigned int valid_num_sge)
+{
+ struct hns_roce_v2_wqe_data_seg *dseg =
+ (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe);
+ struct hns_roce_qp *qp = to_hr_qp(ibqp);
+ int j = 0;
+ int i;
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_MSG_START_SGE_IDX,
+ (*sge_ind) & (qp->sge.sge_cnt - 1));
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_INLINE,
+ !!(wr->send_flags & IB_SEND_INLINE));
+ if (wr->send_flags & IB_SEND_INLINE)
+ return set_rc_inl(qp, wr, rc_sq_wqe, sge_ind);
+
+ if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) {
+ for (i = 0; i < wr->num_sge; i++) {
+ if (likely(wr->sg_list[i].length)) {
+ set_data_seg_v2(dseg, wr->sg_list + i);
+ dseg++;
+ }
+ }
+ } else {
+ for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE; i++) {
+ if (likely(wr->sg_list[i].length)) {
+ set_data_seg_v2(dseg, wr->sg_list + i);
+ dseg++;
+ j++;
+ }
+ }
+
+ set_extend_sge(qp, wr->sg_list + i, sge_ind,
+ valid_num_sge - HNS_ROCE_SGE_IN_WQE);
+ }
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge);
+
+ return 0;
+}
+
+static int check_send_valid(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct ib_qp *ibqp = &hr_qp->ibqp;
+
+ if (unlikely(ibqp->qp_type != IB_QPT_RC &&
+ ibqp->qp_type != IB_QPT_GSI &&
+ ibqp->qp_type != IB_QPT_UD)) {
+ ibdev_err(ibdev, "not supported QP(0x%x)type!\n",
+ ibqp->qp_type);
+ return -EOPNOTSUPP;
+ } else if (unlikely(hr_qp->state == IB_QPS_RESET ||
+ hr_qp->state == IB_QPS_INIT ||
+ hr_qp->state == IB_QPS_RTR)) {
+ ibdev_err(ibdev, "failed to post WQE, QP state %u!\n",
+ hr_qp->state);
+ return -EINVAL;
+ } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) {
+ ibdev_err(ibdev, "failed to post WQE, dev state %d!\n",
+ hr_dev->state);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static unsigned int calc_wr_sge_num(const struct ib_send_wr *wr,
+ unsigned int *sge_len)
+{
+ unsigned int valid_num = 0;
+ unsigned int len = 0;
+ int i;
+
+ for (i = 0; i < wr->num_sge; i++) {
+ if (likely(wr->sg_list[i].length)) {
+ len += wr->sg_list[i].length;
+ valid_num++;
+ }
+ }
+
+ *sge_len = len;
+ return valid_num;
+}
+
+static __le32 get_immtdata(const struct ib_send_wr *wr)
+{
+ switch (wr->opcode) {
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ return cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
+ default:
+ return 0;
+ }
+}
+
+static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
+ const struct ib_send_wr *wr)
+{
+ u32 ib_op = wr->opcode;
+
+ if (ib_op != IB_WR_SEND && ib_op != IB_WR_SEND_WITH_IMM)
+ return -EINVAL;
+
+ ud_sq_wqe->immtdata = get_immtdata(wr);
+
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OPCODE, to_hr_opcode(ib_op));
+
+ return 0;
+}
+
+static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
+ struct hns_roce_ah *ah)
+{
+ struct ib_device *ib_dev = ah->ibah.device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_UDPSPN, ah->av.udp_sport);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel);
+
+ if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL))
+ return -EINVAL;
+
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, ah->av.sl);
+
+ ud_sq_wqe->sgid_index = ah->av.gid_index;
+
+ memcpy(ud_sq_wqe->dmac, ah->av.mac, ETH_ALEN);
+ memcpy(ud_sq_wqe->dgid, ah->av.dgid, GID_LEN_V2);
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return 0;
+
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN_EN, ah->av.vlan_en);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN, ah->av.vlan_id);
+
+ return 0;
+}
+
+static inline int set_ud_wqe(struct hns_roce_qp *qp,
+ const struct ib_send_wr *wr,
+ void *wqe, unsigned int *sge_idx,
+ unsigned int owner_bit)
+{
+ struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
+ struct hns_roce_v2_ud_send_wqe *ud_sq_wqe = wqe;
+ unsigned int curr_idx = *sge_idx;
+ unsigned int valid_num_sge;
+ u32 msg_len = 0;
+ int ret;
+
+ valid_num_sge = calc_wr_sge_num(wr, &msg_len);
+
+ ret = set_ud_opcode(ud_sq_wqe, wr);
+ if (WARN_ON(ret))
+ return ret;
+
+ ud_sq_wqe->msg_len = cpu_to_le32(msg_len);
+
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_CQE,
+ !!(wr->send_flags & IB_SEND_SIGNALED));
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SE,
+ !!(wr->send_flags & IB_SEND_SOLICITED));
+
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_PD, to_hr_pd(qp->ibqp.pd)->pdn);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SGE_NUM, valid_num_sge);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_MSG_START_SGE_IDX,
+ curr_idx & (qp->sge.sge_cnt - 1));
+
+ ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ?
+ qp->qkey : ud_wr(wr)->remote_qkey);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_DQPN, ud_wr(wr)->remote_qpn);
+
+ ret = fill_ud_av(ud_sq_wqe, ah);
+ if (ret)
+ return ret;
+
+ qp->sl = to_hr_ah(ud_wr(wr)->ah)->av.sl;
+
+ set_extend_sge(qp, wr->sg_list, &curr_idx, valid_num_sge);
+
+ /*
+ * The pipeline can sequentially post all valid WQEs into WQ buffer,
+ * including new WQEs waiting for the doorbell to update the PI again.
+ * Therefore, the owner bit of WQE MUST be updated after all fields
+ * and extSGEs have been written into DDR instead of cache.
+ */
+ if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB)
+ dma_wmb();
+
+ *sge_idx = curr_idx;
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OWNER, owner_bit);
+
+ return 0;
+}
+
+static int set_rc_opcode(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ const struct ib_send_wr *wr)
+{
+ u32 ib_op = wr->opcode;
+ int ret = 0;
+
+ rc_sq_wqe->immtdata = get_immtdata(wr);
+
+ switch (ib_op) {
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey);
+ rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr);
+ break;
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ rc_sq_wqe->rkey = cpu_to_le32(atomic_wr(wr)->rkey);
+ rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr);
+ break;
+ case IB_WR_REG_MR:
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ set_frmr_seg(rc_sq_wqe, reg_wr(wr));
+ else
+ ret = -EOPNOTSUPP;
+ break;
+ case IB_WR_SEND_WITH_INV:
+ rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (unlikely(ret))
+ return ret;
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OPCODE, to_hr_opcode(ib_op));
+
+ return ret;
+}
+
+static inline int set_rc_wqe(struct hns_roce_qp *qp,
+ const struct ib_send_wr *wr,
+ void *wqe, unsigned int *sge_idx,
+ unsigned int owner_bit)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
+ unsigned int curr_idx = *sge_idx;
+ unsigned int valid_num_sge;
+ u32 msg_len = 0;
+ int ret;
+
+ valid_num_sge = calc_wr_sge_num(wr, &msg_len);
+
+ rc_sq_wqe->msg_len = cpu_to_le32(msg_len);
+
+ ret = set_rc_opcode(hr_dev, rc_sq_wqe, wr);
+ if (WARN_ON(ret))
+ return ret;
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_FENCE,
+ (wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SE,
+ (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_CQE,
+ (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
+
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+ set_atomic_seg(wr, rc_sq_wqe, valid_num_sge);
+ else if (wr->opcode != IB_WR_REG_MR)
+ ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe,
+ &curr_idx, valid_num_sge);
+
+ /*
+ * The pipeline can sequentially post all valid WQEs into WQ buffer,
+ * including new WQEs waiting for the doorbell to update the PI again.
+ * Therefore, the owner bit of WQE MUST be updated after all fields
+ * and extSGEs have been written into DDR instead of cache.
+ */
+ if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB)
+ dma_wmb();
+
+ *sge_idx = curr_idx;
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OWNER, owner_bit);
+
+ return ret;
+}
+
+static inline void update_sq_db(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *qp)
+{
+ if (unlikely(qp->state == IB_QPS_ERR)) {
+ flush_cqe(hr_dev, qp);
+ } else {
+ struct hns_roce_v2_db sq_db = {};
+
+ hr_reg_write(&sq_db, DB_TAG, qp->qpn);
+ hr_reg_write(&sq_db, DB_CMD, HNS_ROCE_V2_SQ_DB);
+ hr_reg_write(&sq_db, DB_PI, qp->sq.head);
+ hr_reg_write(&sq_db, DB_SL, qp->sl);
+
+ hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg);
+ }
+}
+
+static inline void update_rq_db(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *qp)
+{
+ if (unlikely(qp->state == IB_QPS_ERR)) {
+ flush_cqe(hr_dev, qp);
+ } else {
+ if (likely(qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)) {
+ *qp->rdb.db_record =
+ qp->rq.head & V2_DB_PRODUCER_IDX_M;
+ } else {
+ struct hns_roce_v2_db rq_db = {};
+
+ hr_reg_write(&rq_db, DB_TAG, qp->qpn);
+ hr_reg_write(&rq_db, DB_CMD, HNS_ROCE_V2_RQ_DB);
+ hr_reg_write(&rq_db, DB_PI, qp->rq.head);
+
+ hns_roce_write64(hr_dev, (__le32 *)&rq_db,
+ qp->rq.db_reg);
+ }
+ }
+}
+
+static void hns_roce_write512(struct hns_roce_dev *hr_dev, u64 *val,
+ u64 __iomem *dest)
+{
+#define HNS_ROCE_WRITE_TIMES 8
+ struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ int i;
+
+ if (!hr_dev->dis_db && !ops->get_hw_reset_stat(handle))
+ for (i = 0; i < HNS_ROCE_WRITE_TIMES; i++)
+ writeq_relaxed(*(val + i), dest + i);
+}
+
+static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
+ void *wqe)
+{
+#define HNS_ROCE_SL_SHIFT 2
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
+
+ /* All kinds of DirectWQE have the same header field layout */
+ hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_FLAG);
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_L, qp->sl);
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_H,
+ qp->sl >> HNS_ROCE_SL_SHIFT);
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_WQE_INDEX, qp->sq.head);
+
+ hns_roce_write512(hr_dev, wqe, qp->sq.db_reg);
+}
+
+static int hns_roce_v2_post_send(struct ib_qp *ibqp,
+ const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_qp *qp = to_hr_qp(ibqp);
+ unsigned long flags = 0;
+ unsigned int owner_bit;
+ unsigned int sge_idx;
+ unsigned int wqe_idx;
+ void *wqe = NULL;
+ u32 nreq;
+ int ret;
+
+ spin_lock_irqsave(&qp->sq.lock, flags);
+
+ ret = check_send_valid(hr_dev, qp);
+ if (unlikely(ret)) {
+ *bad_wr = wr;
+ nreq = 0;
+ goto out;
+ }
+
+ sge_idx = qp->next_sge;
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
+ ret = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1);
+
+ if (unlikely(wr->num_sge > qp->sq.max_gs)) {
+ ibdev_err(ibdev, "num_sge = %d > qp->sq.max_gs = %u.\n",
+ wr->num_sge, qp->sq.max_gs);
+ ret = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ wqe = hns_roce_get_send_wqe(qp, wqe_idx);
+ qp->sq.wrid[wqe_idx] = wr->wr_id;
+ owner_bit =
+ ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1);
+
+ /* Corresponding to the QP type, wqe process separately */
+ if (ibqp->qp_type == IB_QPT_RC)
+ ret = set_rc_wqe(qp, wr, wqe, &sge_idx, owner_bit);
+ else
+ ret = set_ud_wqe(qp, wr, wqe, &sge_idx, owner_bit);
+
+ if (unlikely(ret)) {
+ *bad_wr = wr;
+ goto out;
+ }
+ }
+
+out:
+ if (likely(nreq)) {
+ qp->sq.head += nreq;
+ qp->next_sge = sge_idx;
+
+ if (nreq == 1 && !ret &&
+ (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
+ write_dwqe(hr_dev, qp, wqe);
+ else
+ update_sq_db(hr_dev, qp);
+ }
+
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
+
+ return ret;
+}
+
+static int check_recv_valid(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct ib_qp *ibqp = &hr_qp->ibqp;
+
+ if (unlikely(ibqp->qp_type != IB_QPT_RC &&
+ ibqp->qp_type != IB_QPT_GSI &&
+ ibqp->qp_type != IB_QPT_UD)) {
+ ibdev_err(ibdev, "unsupported qp type, qp_type = %d.\n",
+ ibqp->qp_type);
+ return -EOPNOTSUPP;
+ }
+
+ if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN))
+ return -EIO;
+
+ if (hr_qp->state == IB_QPS_RESET)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void fill_recv_sge_to_wqe(const struct ib_recv_wr *wr, void *wqe,
+ u32 max_sge, bool rsv)
+{
+ struct hns_roce_v2_wqe_data_seg *dseg = wqe;
+ u32 i, cnt;
+
+ for (i = 0, cnt = 0; i < wr->num_sge; i++) {
+ /* Skip zero-length sge */
+ if (!wr->sg_list[i].length)
+ continue;
+ set_data_seg_v2(dseg + cnt, wr->sg_list + i);
+ cnt++;
+ }
+
+ /* Fill a reserved sge to make hw stop reading remaining segments */
+ if (rsv) {
+ dseg[cnt].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
+ dseg[cnt].addr = 0;
+ dseg[cnt].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH);
+ } else {
+ /* Clear remaining segments to make ROCEE ignore sges */
+ if (cnt < max_sge)
+ memset(dseg + cnt, 0,
+ (max_sge - cnt) * HNS_ROCE_SGE_SIZE);
+ }
+}
+
+static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr,
+ u32 wqe_idx, u32 max_sge)
+{
+ struct hns_roce_rinl_sge *sge_list;
+ void *wqe = NULL;
+ u32 i;
+
+ wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
+ fill_recv_sge_to_wqe(wr, wqe, max_sge, hr_qp->rq.rsv_sge);
+
+ /* rq support inline data */
+ if (hr_qp->rq_inl_buf.wqe_cnt) {
+ sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list;
+ hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = (u32)wr->num_sge;
+ for (i = 0; i < wr->num_sge; i++) {
+ sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr;
+ sge_list[i].len = wr->sg_list[i].length;
+ }
+ }
+}
+
+static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
+ const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u32 wqe_idx, nreq, max_sge;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&hr_qp->rq.lock, flags);
+
+ ret = check_recv_valid(hr_dev, hr_qp);
+ if (unlikely(ret)) {
+ *bad_wr = wr;
+ nreq = 0;
+ goto out;
+ }
+
+ max_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (unlikely(hns_roce_wq_overflow(&hr_qp->rq, nreq,
+ hr_qp->ibqp.recv_cq))) {
+ ret = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (unlikely(wr->num_sge > max_sge)) {
+ ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n",
+ wr->num_sge, max_sge);
+ ret = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
+ fill_rq_wqe(hr_qp, wr, wqe_idx, max_sge);
+ hr_qp->rq.wrid[wqe_idx] = wr->wr_id;
+ }
+
+out:
+ if (likely(nreq)) {
+ hr_qp->rq.head += nreq;
+
+ update_rq_db(hr_dev, hr_qp);
+ }
+ spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
+
+ return ret;
+}
+
+static void *get_srq_wqe_buf(struct hns_roce_srq *srq, u32 n)
+{
+ return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift);
+}
+
+static void *get_idx_buf(struct hns_roce_idx_que *idx_que, u32 n)
+{
+ return hns_roce_buf_offset(idx_que->mtr.kmem,
+ n << idx_que->entry_shift);
+}
+
+static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, u32 wqe_index)
+{
+ /* always called with interrupts disabled. */
+ spin_lock(&srq->lock);
+
+ bitmap_clear(srq->idx_que.bitmap, wqe_index, 1);
+ srq->idx_que.tail++;
+
+ spin_unlock(&srq->lock);
+}
+
+static int hns_roce_srqwq_overflow(struct hns_roce_srq *srq)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+
+ return idx_que->head - idx_que->tail >= srq->wqe_cnt;
+}
+
+static int check_post_srq_valid(struct hns_roce_srq *srq, u32 max_sge,
+ const struct ib_recv_wr *wr)
+{
+ struct ib_device *ib_dev = srq->ibsrq.device;
+
+ if (unlikely(wr->num_sge > max_sge)) {
+ ibdev_err(ib_dev,
+ "failed to check sge, wr->num_sge = %d, max_sge = %u.\n",
+ wr->num_sge, max_sge);
+ return -EINVAL;
+ }
+
+ if (unlikely(hns_roce_srqwq_overflow(srq))) {
+ ibdev_err(ib_dev,
+ "failed to check srqwq status, srqwq is full.\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int get_srq_wqe_idx(struct hns_roce_srq *srq, u32 *wqe_idx)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ u32 pos;
+
+ pos = find_first_zero_bit(idx_que->bitmap, srq->wqe_cnt);
+ if (unlikely(pos == srq->wqe_cnt))
+ return -ENOSPC;
+
+ bitmap_set(idx_que->bitmap, pos, 1);
+ *wqe_idx = pos;
+ return 0;
+}
+
+static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ unsigned int head;
+ __le32 *buf;
+
+ head = idx_que->head & (srq->wqe_cnt - 1);
+
+ buf = get_idx_buf(idx_que, head);
+ *buf = cpu_to_le32(wqe_idx);
+
+ idx_que->head++;
+}
+
+static void update_srq_db(struct hns_roce_v2_db *db, struct hns_roce_srq *srq)
+{
+ hr_reg_write(db, DB_TAG, srq->srqn);
+ hr_reg_write(db, DB_CMD, HNS_ROCE_V2_SRQ_DB);
+ hr_reg_write(db, DB_PI, srq->idx_que.head);
+}
+
+static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
+ const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_v2_db srq_db;
+ unsigned long flags;
+ int ret = 0;
+ u32 max_sge;
+ u32 wqe_idx;
+ void *wqe;
+ u32 nreq;
+
+ spin_lock_irqsave(&srq->lock, flags);
+
+ max_sge = srq->max_gs - srq->rsv_sge;
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ ret = check_post_srq_valid(srq, max_sge, wr);
+ if (ret) {
+ *bad_wr = wr;
+ break;
+ }
+
+ ret = get_srq_wqe_idx(srq, &wqe_idx);
+ if (unlikely(ret)) {
+ *bad_wr = wr;
+ break;
+ }
+
+ wqe = get_srq_wqe_buf(srq, wqe_idx);
+ fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge);
+ fill_wqe_idx(srq, wqe_idx);
+ srq->wrid[wqe_idx] = wr->wr_id;
+ }
+
+ if (likely(nreq)) {
+ update_srq_db(&srq_db, srq);
+
+ hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg);
+ }
+
+ spin_unlock_irqrestore(&srq->lock, flags);
+
+ return ret;
+}
+
+static u32 hns_roce_v2_cmd_hw_reseted(struct hns_roce_dev *hr_dev,
+ unsigned long instance_stage,
+ unsigned long reset_stage)
+{
+ /* When hardware reset has been completed once or more, we should stop
+ * sending mailbox&cmq&doorbell to hardware. If now in .init_instance()
+ * function, we should exit with error. If now at HNAE3_INIT_CLIENT
+ * stage of soft reset process, we should exit with error, and then
+ * HNAE3_INIT_CLIENT related process can rollback the operation like
+ * notifing hardware to free resources, HNAE3_INIT_CLIENT related
+ * process will exit with error to notify NIC driver to reschedule soft
+ * reset process once again.
+ */
+ hr_dev->is_reset = true;
+ hr_dev->dis_db = true;
+
+ if (reset_stage == HNS_ROCE_STATE_RST_INIT ||
+ instance_stage == HNS_ROCE_STATE_INIT)
+ return CMD_RST_PRC_EBUSY;
+
+ return CMD_RST_PRC_SUCCESS;
+}
+
+static u32 hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev,
+ unsigned long instance_stage,
+ unsigned long reset_stage)
+{
+#define HW_RESET_TIMEOUT_US 1000000
+#define HW_RESET_SLEEP_US 1000
+
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ unsigned long val;
+ int ret;
+
+ /* When hardware reset is detected, we should stop sending mailbox&cmq&
+ * doorbell to hardware. If now in .init_instance() function, we should
+ * exit with error. If now at HNAE3_INIT_CLIENT stage of soft reset
+ * process, we should exit with error, and then HNAE3_INIT_CLIENT
+ * related process can rollback the operation like notifing hardware to
+ * free resources, HNAE3_INIT_CLIENT related process will exit with
+ * error to notify NIC driver to reschedule soft reset process once
+ * again.
+ */
+ hr_dev->dis_db = true;
+
+ ret = read_poll_timeout(ops->ae_dev_reset_cnt, val,
+ val > hr_dev->reset_cnt, HW_RESET_SLEEP_US,
+ HW_RESET_TIMEOUT_US, false, handle);
+ if (!ret)
+ hr_dev->is_reset = true;
+
+ if (!hr_dev->is_reset || reset_stage == HNS_ROCE_STATE_RST_INIT ||
+ instance_stage == HNS_ROCE_STATE_INIT)
+ return CMD_RST_PRC_EBUSY;
+
+ return CMD_RST_PRC_SUCCESS;
+}
+
+static u32 hns_roce_v2_cmd_sw_resetting(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+ /* When software reset is detected at .init_instance() function, we
+ * should stop sending mailbox&cmq&doorbell to hardware, and exit
+ * with error.
+ */
+ hr_dev->dis_db = true;
+ if (ops->ae_dev_reset_cnt(handle) != hr_dev->reset_cnt)
+ hr_dev->is_reset = true;
+
+ return CMD_RST_PRC_EBUSY;
+}
+
+static u32 check_aedev_reset_status(struct hns_roce_dev *hr_dev,
+ struct hnae3_handle *handle)
+{
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ unsigned long instance_stage; /* the current instance stage */
+ unsigned long reset_stage; /* the current reset stage */
+ unsigned long reset_cnt;
+ bool sw_resetting;
+ bool hw_resetting;
+
+ /* Get information about reset from NIC driver or RoCE driver itself,
+ * the meaning of the following variables from NIC driver are described
+ * as below:
+ * reset_cnt -- The count value of completed hardware reset.
+ * hw_resetting -- Whether hardware device is resetting now.
+ * sw_resetting -- Whether NIC's software reset process is running now.
+ */
+ instance_stage = handle->rinfo.instance_state;
+ reset_stage = handle->rinfo.reset_state;
+ reset_cnt = ops->ae_dev_reset_cnt(handle);
+ if (reset_cnt != hr_dev->reset_cnt)
+ return hns_roce_v2_cmd_hw_reseted(hr_dev, instance_stage,
+ reset_stage);
+
+ hw_resetting = ops->get_cmdq_stat(handle);
+ if (hw_resetting)
+ return hns_roce_v2_cmd_hw_resetting(hr_dev, instance_stage,
+ reset_stage);
+
+ sw_resetting = ops->ae_dev_resetting(handle);
+ if (sw_resetting && instance_stage == HNS_ROCE_STATE_INIT)
+ return hns_roce_v2_cmd_sw_resetting(hr_dev);
+
+ return CMD_RST_PRC_OTHERS;
+}
+
+static bool check_device_is_in_reset(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+ if (hr_dev->reset_cnt != ops->ae_dev_reset_cnt(handle))
+ return true;
+
+ if (ops->get_hw_reset_stat(handle))
+ return true;
+
+ if (ops->ae_dev_resetting(handle))
+ return true;
+
+ return false;
+}
+
+static bool v2_chk_mbox_is_avail(struct hns_roce_dev *hr_dev, bool *busy)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ u32 status;
+
+ if (hr_dev->is_reset)
+ status = CMD_RST_PRC_SUCCESS;
+ else
+ status = check_aedev_reset_status(hr_dev, priv->handle);
+
+ *busy = (status == CMD_RST_PRC_EBUSY);
+
+ return status == CMD_RST_PRC_OTHERS;
+}
+
+static int hns_roce_alloc_cmq_desc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_cmq_ring *ring)
+{
+ int size = ring->desc_num * sizeof(struct hns_roce_cmq_desc);
+
+ ring->desc = dma_alloc_coherent(hr_dev->dev, size,
+ &ring->desc_dma_addr, GFP_KERNEL);
+ if (!ring->desc)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void hns_roce_free_cmq_desc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_cmq_ring *ring)
+{
+ dma_free_coherent(hr_dev->dev,
+ ring->desc_num * sizeof(struct hns_roce_cmq_desc),
+ ring->desc, ring->desc_dma_addr);
+
+ ring->desc_dma_addr = 0;
+}
+
+static int init_csq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_cmq_ring *csq)
+{
+ dma_addr_t dma;
+ int ret;
+
+ csq->desc_num = CMD_CSQ_DESC_NUM;
+ spin_lock_init(&csq->lock);
+ csq->flag = TYPE_CSQ;
+ csq->head = 0;
+
+ ret = hns_roce_alloc_cmq_desc(hr_dev, csq);
+ if (ret)
+ return ret;
+
+ dma = csq->desc_dma_addr;
+ roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_L_REG, lower_32_bits(dma));
+ roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_H_REG, upper_32_bits(dma));
+ roce_write(hr_dev, ROCEE_TX_CMQ_DEPTH_REG,
+ (u32)csq->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S);
+
+ /* Make sure to write CI first and then PI */
+ roce_write(hr_dev, ROCEE_TX_CMQ_CI_REG, 0);
+ roce_write(hr_dev, ROCEE_TX_CMQ_PI_REG, 0);
+
+ return 0;
+}
+
+static int hns_roce_v2_cmq_init(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ int ret;
+
+ priv->cmq.tx_timeout = HNS_ROCE_CMQ_TX_TIMEOUT;
+
+ ret = init_csq(hr_dev, &priv->cmq.csq);
+ if (ret)
+ dev_err(hr_dev->dev, "failed to init CSQ, ret = %d.\n", ret);
+
+ return ret;
+}
+
+static void hns_roce_v2_cmq_exit(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+
+ hns_roce_free_cmq_desc(hr_dev, &priv->cmq.csq);
+}
+
+static void hns_roce_cmq_setup_basic_desc(struct hns_roce_cmq_desc *desc,
+ enum hns_roce_opcode_type opcode,
+ bool is_read)
+{
+ memset((void *)desc, 0, sizeof(struct hns_roce_cmq_desc));
+ desc->opcode = cpu_to_le16(opcode);
+ desc->flag = cpu_to_le16(HNS_ROCE_CMD_FLAG_IN);
+ if (is_read)
+ desc->flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_WR);
+ else
+ desc->flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR);
+}
+
+static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev)
+{
+ u32 tail = roce_read(hr_dev, ROCEE_TX_CMQ_CI_REG);
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+
+ return tail == priv->cmq.csq.head;
+}
+
+static void update_cmdq_status(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+
+ if (handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT ||
+ handle->rinfo.instance_state == HNS_ROCE_STATE_INIT)
+ hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR;
+}
+
+static int hns_roce_cmd_err_convert_errno(u16 desc_ret)
+{
+ struct hns_roce_cmd_errcode errcode_table[] = {
+ {CMD_EXEC_SUCCESS, 0},
+ {CMD_NO_AUTH, -EPERM},
+ {CMD_NOT_EXIST, -EOPNOTSUPP},
+ {CMD_CRQ_FULL, -EXFULL},
+ {CMD_NEXT_ERR, -ENOSR},
+ {CMD_NOT_EXEC, -ENOTBLK},
+ {CMD_PARA_ERR, -EINVAL},
+ {CMD_RESULT_ERR, -ERANGE},
+ {CMD_TIMEOUT, -ETIME},
+ {CMD_HILINK_ERR, -ENOLINK},
+ {CMD_INFO_ILLEGAL, -ENXIO},
+ {CMD_INVALID, -EBADR},
+ };
+ u16 i;
+
+ for (i = 0; i < ARRAY_SIZE(errcode_table); i++)
+ if (desc_ret == errcode_table[i].return_status)
+ return errcode_table[i].errno;
+ return -EIO;
+}
+
+static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc, int num)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
+ u32 timeout = 0;
+ u16 desc_ret;
+ u32 tail;
+ int ret;
+ int i;
+
+ spin_lock_bh(&csq->lock);
+
+ tail = csq->head;
+
+ for (i = 0; i < num; i++) {
+ csq->desc[csq->head++] = desc[i];
+ if (csq->head == csq->desc_num)
+ csq->head = 0;
+ }
+
+ /* Write to hardware */
+ roce_write(hr_dev, ROCEE_TX_CMQ_PI_REG, csq->head);
+
+ do {
+ if (hns_roce_cmq_csq_done(hr_dev))
+ break;
+ udelay(1);
+ } while (++timeout < priv->cmq.tx_timeout);
+
+ if (hns_roce_cmq_csq_done(hr_dev)) {
+ ret = 0;
+ for (i = 0; i < num; i++) {
+ /* check the result of hardware write back */
+ desc[i] = csq->desc[tail++];
+ if (tail == csq->desc_num)
+ tail = 0;
+
+ desc_ret = le16_to_cpu(desc[i].retval);
+ if (likely(desc_ret == CMD_EXEC_SUCCESS))
+ continue;
+
+ dev_err_ratelimited(hr_dev->dev,
+ "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n",
+ desc->opcode, desc_ret);
+ ret = hns_roce_cmd_err_convert_errno(desc_ret);
+ }
+ } else {
+ /* FW/HW reset or incorrect number of desc */
+ tail = roce_read(hr_dev, ROCEE_TX_CMQ_CI_REG);
+ dev_warn(hr_dev->dev, "CMDQ move tail from %u to %u.\n",
+ csq->head, tail);
+ csq->head = tail;
+
+ update_cmdq_status(hr_dev);
+
+ ret = -EAGAIN;
+ }
+
+ spin_unlock_bh(&csq->lock);
+
+ return ret;
+}
+
+static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc, int num)
+{
+ bool busy;
+ int ret;
+
+ if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
+ return -EIO;
+
+ if (!v2_chk_mbox_is_avail(hr_dev, &busy))
+ return busy ? -EBUSY : 0;
+
+ ret = __hns_roce_cmq_send(hr_dev, desc, num);
+ if (ret) {
+ if (!v2_chk_mbox_is_avail(hr_dev, &busy))
+ return busy ? -EBUSY : 0;
+ }
+
+ return ret;
+}
+
+static int config_hem_ba_to_hw(struct hns_roce_dev *hr_dev,
+ dma_addr_t base_addr, u8 cmd, unsigned long tag)
+{
+ struct hns_roce_cmd_mailbox *mbox;
+ int ret;
+
+ mbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mbox))
+ return PTR_ERR(mbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, base_addr, mbox->dma, cmd, tag);
+ hns_roce_free_cmd_mailbox(hr_dev, mbox);
+ return ret;
+}
+
+static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_query_version *resp;
+ struct hns_roce_cmq_desc desc;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_HW_VER, true);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+
+ resp = (struct hns_roce_query_version *)desc.data;
+ hr_dev->hw_rev = le16_to_cpu(resp->rocee_hw_version);
+ hr_dev->vendor_id = hr_dev->pci_dev->vendor;
+
+ return 0;
+}
+
+static void func_clr_hw_resetting_state(struct hns_roce_dev *hr_dev,
+ struct hnae3_handle *handle)
+{
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ unsigned long end;
+
+ hr_dev->dis_db = true;
+
+ dev_warn(hr_dev->dev,
+ "func clear is pending, device in resetting state.\n");
+ end = HNS_ROCE_V2_HW_RST_TIMEOUT;
+ while (end) {
+ if (!ops->get_hw_reset_stat(handle)) {
+ hr_dev->is_reset = true;
+ dev_info(hr_dev->dev,
+ "func clear success after reset.\n");
+ return;
+ }
+ msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT);
+ end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT;
+ }
+
+ dev_warn(hr_dev->dev, "func clear failed.\n");
+}
+
+static void func_clr_sw_resetting_state(struct hns_roce_dev *hr_dev,
+ struct hnae3_handle *handle)
+{
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ unsigned long end;
+
+ hr_dev->dis_db = true;
+
+ dev_warn(hr_dev->dev,
+ "func clear is pending, device in resetting state.\n");
+ end = HNS_ROCE_V2_HW_RST_TIMEOUT;
+ while (end) {
+ if (ops->ae_dev_reset_cnt(handle) !=
+ hr_dev->reset_cnt) {
+ hr_dev->is_reset = true;
+ dev_info(hr_dev->dev,
+ "func clear success after sw reset\n");
+ return;
+ }
+ msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT);
+ end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT;
+ }
+
+ dev_warn(hr_dev->dev, "func clear failed because of unfinished sw reset\n");
+}
+
+static void hns_roce_func_clr_rst_proc(struct hns_roce_dev *hr_dev, int retval,
+ int flag)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+ if (ops->ae_dev_reset_cnt(handle) != hr_dev->reset_cnt) {
+ hr_dev->dis_db = true;
+ hr_dev->is_reset = true;
+ dev_info(hr_dev->dev, "func clear success after reset.\n");
+ return;
+ }
+
+ if (ops->get_hw_reset_stat(handle)) {
+ func_clr_hw_resetting_state(hr_dev, handle);
+ return;
+ }
+
+ if (ops->ae_dev_resetting(handle) &&
+ handle->rinfo.instance_state == HNS_ROCE_STATE_INIT) {
+ func_clr_sw_resetting_state(hr_dev, handle);
+ return;
+ }
+
+ if (retval && !flag)
+ dev_warn(hr_dev->dev,
+ "func clear read failed, ret = %d.\n", retval);
+
+ dev_warn(hr_dev->dev, "func clear failed.\n");
+}
+
+static void __hns_roce_function_clear(struct hns_roce_dev *hr_dev, int vf_id)
+{
+ bool fclr_write_fail_flag = false;
+ struct hns_roce_func_clear *resp;
+ struct hns_roce_cmq_desc desc;
+ unsigned long end;
+ int ret = 0;
+
+ if (check_device_is_in_reset(hr_dev))
+ goto out;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_FUNC_CLEAR, false);
+ resp = (struct hns_roce_func_clear *)desc.data;
+ resp->rst_funcid_en = cpu_to_le32(vf_id);
+
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ fclr_write_fail_flag = true;
+ dev_err(hr_dev->dev, "func clear write failed, ret = %d.\n",
+ ret);
+ goto out;
+ }
+
+ msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL);
+ end = HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS;
+ while (end) {
+ if (check_device_is_in_reset(hr_dev))
+ goto out;
+ msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT);
+ end -= HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_FUNC_CLEAR,
+ true);
+
+ resp->rst_funcid_en = cpu_to_le32(vf_id);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ continue;
+
+ if (hr_reg_read(resp, FUNC_CLEAR_RST_FUN_DONE)) {
+ if (vf_id == 0)
+ hr_dev->is_reset = true;
+ return;
+ }
+ }
+
+out:
+ hns_roce_func_clr_rst_proc(hr_dev, ret, fclr_write_fail_flag);
+}
+
+static int hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id)
+{
+ enum hns_roce_opcode_type opcode = HNS_ROCE_OPC_ALLOC_VF_RES;
+ struct hns_roce_cmq_desc desc[2];
+ struct hns_roce_cmq_req *req_a;
+
+ req_a = (struct hns_roce_cmq_req *)desc[0].data;
+ hns_roce_cmq_setup_basic_desc(&desc[0], opcode, false);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false);
+ hr_reg_write(req_a, FUNC_RES_A_VF_ID, vf_id);
+
+ return hns_roce_cmq_send(hr_dev, desc, 2);
+}
+
+static void hns_roce_function_clear(struct hns_roce_dev *hr_dev)
+{
+ int ret;
+ int i;
+
+ if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
+ return;
+
+ for (i = hr_dev->func_num - 1; i >= 0; i--) {
+ __hns_roce_function_clear(hr_dev, i);
+
+ if (i == 0)
+ continue;
+
+ ret = hns_roce_free_vf_resource(hr_dev, i);
+ if (ret)
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to free vf resource, vf_id = %d, ret = %d.\n",
+ i, ret);
+ }
+}
+
+static int hns_roce_clear_extdb_list_info(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CLEAR_EXTDB_LIST_INFO,
+ false);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to clear extended doorbell info, ret = %d.\n",
+ ret);
+
+ return ret;
+}
+
+static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_query_fw_info *resp;
+ struct hns_roce_cmq_desc desc;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_QUERY_FW_VER, true);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+
+ resp = (struct hns_roce_query_fw_info *)desc.data;
+ hr_dev->caps.fw_ver = (u64)(le32_to_cpu(resp->fw_ver));
+
+ return 0;
+}
+
+static int hns_roce_query_func_info(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ int ret;
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ hr_dev->func_num = 1;
+ return 0;
+ }
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_FUNC_INFO,
+ true);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ hr_dev->func_num = 1;
+ return ret;
+ }
+
+ hr_dev->func_num = le32_to_cpu(desc.func_info.own_func_num);
+ hr_dev->cong_algo_tmpl_id = le32_to_cpu(desc.func_info.own_mac_id);
+
+ return 0;
+}
+
+static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ u32 clock_cycles_of_1us;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GLOBAL_PARAM,
+ false);
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ clock_cycles_of_1us = HNS_ROCE_1NS_CFG;
+ else
+ clock_cycles_of_1us = HNS_ROCE_1US_CFG;
+
+ hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, clock_cycles_of_1us);
+ hr_reg_write(req, CFG_GLOBAL_PARAM_UDP_PORT, ROCE_V2_UDP_DPORT);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int load_func_res_caps(struct hns_roce_dev *hr_dev, bool is_vf)
+{
+ struct hns_roce_cmq_desc desc[2];
+ struct hns_roce_cmq_req *r_a = (struct hns_roce_cmq_req *)desc[0].data;
+ struct hns_roce_cmq_req *r_b = (struct hns_roce_cmq_req *)desc[1].data;
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ enum hns_roce_opcode_type opcode;
+ u32 func_num;
+ int ret;
+
+ if (is_vf) {
+ opcode = HNS_ROCE_OPC_QUERY_VF_RES;
+ func_num = 1;
+ } else {
+ opcode = HNS_ROCE_OPC_QUERY_PF_RES;
+ func_num = hr_dev->func_num;
+ }
+
+ hns_roce_cmq_setup_basic_desc(&desc[0], opcode, true);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ hns_roce_cmq_setup_basic_desc(&desc[1], opcode, true);
+
+ ret = hns_roce_cmq_send(hr_dev, desc, 2);
+ if (ret)
+ return ret;
+
+ caps->qpc_bt_num = hr_reg_read(r_a, FUNC_RES_A_QPC_BT_NUM) / func_num;
+ caps->srqc_bt_num = hr_reg_read(r_a, FUNC_RES_A_SRQC_BT_NUM) / func_num;
+ caps->cqc_bt_num = hr_reg_read(r_a, FUNC_RES_A_CQC_BT_NUM) / func_num;
+ caps->mpt_bt_num = hr_reg_read(r_a, FUNC_RES_A_MPT_BT_NUM) / func_num;
+ caps->eqc_bt_num = hr_reg_read(r_a, FUNC_RES_A_EQC_BT_NUM) / func_num;
+ caps->smac_bt_num = hr_reg_read(r_b, FUNC_RES_B_SMAC_NUM) / func_num;
+ caps->sgid_bt_num = hr_reg_read(r_b, FUNC_RES_B_SGID_NUM) / func_num;
+ caps->sccc_bt_num = hr_reg_read(r_b, FUNC_RES_B_SCCC_BT_NUM) / func_num;
+
+ if (is_vf) {
+ caps->sl_num = hr_reg_read(r_b, FUNC_RES_V_QID_NUM) / func_num;
+ caps->gmv_bt_num = hr_reg_read(r_b, FUNC_RES_V_GMV_BT_NUM) /
+ func_num;
+ } else {
+ caps->sl_num = hr_reg_read(r_b, FUNC_RES_B_QID_NUM) / func_num;
+ caps->gmv_bt_num = hr_reg_read(r_b, FUNC_RES_B_GMV_BT_NUM) /
+ func_num;
+ }
+
+ return 0;
+}
+
+static int load_ext_cfg_caps(struct hns_roce_dev *hr_dev, bool is_vf)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ u32 func_num, qp_num;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_EXT_CFG, true);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+
+ func_num = is_vf ? 1 : max_t(u32, 1, hr_dev->func_num);
+ qp_num = hr_reg_read(req, EXT_CFG_QP_PI_NUM) / func_num;
+ caps->num_pi_qps = round_down(qp_num, HNS_ROCE_QP_BANK_NUM);
+
+ qp_num = hr_reg_read(req, EXT_CFG_QP_NUM) / func_num;
+ caps->num_qps = round_down(qp_num, HNS_ROCE_QP_BANK_NUM);
+
+ return 0;
+}
+
+static int load_pf_timer_res_caps(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_PF_TIMER_RES,
+ true);
+
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+
+ caps->qpc_timer_bt_num = hr_reg_read(req, PF_TIMER_RES_QPC_ITEM_NUM);
+ caps->cqc_timer_bt_num = hr_reg_read(req, PF_TIMER_RES_CQC_ITEM_NUM);
+
+ return 0;
+}
+
+static int query_func_resource_caps(struct hns_roce_dev *hr_dev, bool is_vf)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ ret = load_func_res_caps(hr_dev, is_vf);
+ if (ret) {
+ dev_err(dev, "failed to load res caps, ret = %d (%s).\n", ret,
+ is_vf ? "vf" : "pf");
+ return ret;
+ }
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ ret = load_ext_cfg_caps(hr_dev, is_vf);
+ if (ret)
+ dev_err(dev, "failed to load ext cfg, ret = %d (%s).\n",
+ ret, is_vf ? "vf" : "pf");
+ }
+
+ return ret;
+}
+
+static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ ret = query_func_resource_caps(hr_dev, false);
+ if (ret)
+ return ret;
+
+ ret = load_pf_timer_res_caps(hr_dev);
+ if (ret)
+ dev_err(dev, "failed to load pf timer resource, ret = %d.\n",
+ ret);
+
+ return ret;
+}
+
+static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev)
+{
+ return query_func_resource_caps(hr_dev, true);
+}
+
+static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
+ u32 vf_id)
+{
+ struct hns_roce_vf_switch *swt;
+ struct hns_roce_cmq_desc desc;
+ int ret;
+
+ swt = (struct hns_roce_vf_switch *)desc.data;
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true);
+ swt->rocee_sel |= cpu_to_le32(HNS_ICL_SWITCH_CMD_ROCEE_SEL);
+ hr_reg_write(swt, VF_SWITCH_VF_ID, vf_id);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+
+ desc.flag = cpu_to_le16(HNS_ROCE_CMD_FLAG_IN);
+ desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR);
+ hr_reg_enable(swt, VF_SWITCH_ALW_LPBK);
+ hr_reg_clear(swt, VF_SWITCH_ALW_LCL_LPBK);
+ hr_reg_enable(swt, VF_SWITCH_ALW_DST_OVRD);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev)
+{
+ u32 vf_id;
+ int ret;
+
+ for (vf_id = 0; vf_id < hr_dev->func_num; vf_id++) {
+ ret = __hns_roce_set_vf_switch_param(hr_dev, vf_id);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int config_vf_hem_resource(struct hns_roce_dev *hr_dev, int vf_id)
+{
+ struct hns_roce_cmq_desc desc[2];
+ struct hns_roce_cmq_req *r_a = (struct hns_roce_cmq_req *)desc[0].data;
+ struct hns_roce_cmq_req *r_b = (struct hns_roce_cmq_req *)desc[1].data;
+ enum hns_roce_opcode_type opcode = HNS_ROCE_OPC_ALLOC_VF_RES;
+ struct hns_roce_caps *caps = &hr_dev->caps;
+
+ hns_roce_cmq_setup_basic_desc(&desc[0], opcode, false);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false);
+
+ hr_reg_write(r_a, FUNC_RES_A_VF_ID, vf_id);
+
+ hr_reg_write(r_a, FUNC_RES_A_QPC_BT_NUM, caps->qpc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_QPC_BT_IDX, vf_id * caps->qpc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_SRQC_BT_NUM, caps->srqc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_SRQC_BT_IDX, vf_id * caps->srqc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_CQC_BT_NUM, caps->cqc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_CQC_BT_IDX, vf_id * caps->cqc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_MPT_BT_NUM, caps->mpt_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_MPT_BT_IDX, vf_id * caps->mpt_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_EQC_BT_NUM, caps->eqc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_EQC_BT_IDX, vf_id * caps->eqc_bt_num);
+ hr_reg_write(r_b, FUNC_RES_V_QID_NUM, caps->sl_num);
+ hr_reg_write(r_b, FUNC_RES_B_QID_IDX, vf_id * caps->sl_num);
+ hr_reg_write(r_b, FUNC_RES_B_SCCC_BT_NUM, caps->sccc_bt_num);
+ hr_reg_write(r_b, FUNC_RES_B_SCCC_BT_IDX, vf_id * caps->sccc_bt_num);
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ hr_reg_write(r_b, FUNC_RES_V_GMV_BT_NUM, caps->gmv_bt_num);
+ hr_reg_write(r_b, FUNC_RES_B_GMV_BT_IDX,
+ vf_id * caps->gmv_bt_num);
+ } else {
+ hr_reg_write(r_b, FUNC_RES_B_SGID_NUM, caps->sgid_bt_num);
+ hr_reg_write(r_b, FUNC_RES_B_SGID_IDX,
+ vf_id * caps->sgid_bt_num);
+ hr_reg_write(r_b, FUNC_RES_B_SMAC_NUM, caps->smac_bt_num);
+ hr_reg_write(r_b, FUNC_RES_B_SMAC_IDX,
+ vf_id * caps->smac_bt_num);
+ }
+
+ return hns_roce_cmq_send(hr_dev, desc, 2);
+}
+
+static int config_vf_ext_resource(struct hns_roce_dev *hr_dev, u32 vf_id)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ struct hns_roce_caps *caps = &hr_dev->caps;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_EXT_CFG, false);
+
+ hr_reg_write(req, EXT_CFG_VF_ID, vf_id);
+
+ hr_reg_write(req, EXT_CFG_QP_PI_NUM, caps->num_pi_qps);
+ hr_reg_write(req, EXT_CFG_QP_PI_IDX, vf_id * caps->num_pi_qps);
+ hr_reg_write(req, EXT_CFG_QP_NUM, caps->num_qps);
+ hr_reg_write(req, EXT_CFG_QP_IDX, vf_id * caps->num_qps);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
+{
+ u32 func_num = max_t(u32, 1, hr_dev->func_num);
+ u32 vf_id;
+ int ret;
+
+ for (vf_id = 0; vf_id < func_num; vf_id++) {
+ ret = config_vf_hem_resource(hr_dev, vf_id);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to config vf-%u hem res, ret = %d.\n",
+ vf_id, ret);
+ return ret;
+ }
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ ret = config_vf_ext_resource(hr_dev, vf_id);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to config vf-%u ext res, ret = %d.\n",
+ vf_id, ret);
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int hns_roce_v2_set_bt(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ struct hns_roce_caps *caps = &hr_dev->caps;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_BT_ATTR, false);
+
+ hr_reg_write(req, CFG_BT_ATTR_QPC_BA_PGSZ,
+ caps->qpc_ba_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_QPC_BUF_PGSZ,
+ caps->qpc_buf_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_QPC_HOPNUM,
+ to_hr_hem_hopnum(caps->qpc_hop_num, caps->num_qps));
+
+ hr_reg_write(req, CFG_BT_ATTR_SRQC_BA_PGSZ,
+ caps->srqc_ba_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_SRQC_BUF_PGSZ,
+ caps->srqc_buf_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_SRQC_HOPNUM,
+ to_hr_hem_hopnum(caps->srqc_hop_num, caps->num_srqs));
+
+ hr_reg_write(req, CFG_BT_ATTR_CQC_BA_PGSZ,
+ caps->cqc_ba_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_CQC_BUF_PGSZ,
+ caps->cqc_buf_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_CQC_HOPNUM,
+ to_hr_hem_hopnum(caps->cqc_hop_num, caps->num_cqs));
+
+ hr_reg_write(req, CFG_BT_ATTR_MPT_BA_PGSZ,
+ caps->mpt_ba_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_MPT_BUF_PGSZ,
+ caps->mpt_buf_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_MPT_HOPNUM,
+ to_hr_hem_hopnum(caps->mpt_hop_num, caps->num_mtpts));
+
+ hr_reg_write(req, CFG_BT_ATTR_SCCC_BA_PGSZ,
+ caps->sccc_ba_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_SCCC_BUF_PGSZ,
+ caps->sccc_buf_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_SCCC_HOPNUM,
+ to_hr_hem_hopnum(caps->sccc_hop_num, caps->num_qps));
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+/* Use default caps when hns_roce_query_pf_caps() failed or init VF profile */
+static void set_default_caps(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_caps *caps = &hr_dev->caps;
+
+ caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM;
+ caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM;
+ caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM;
+ caps->num_srqs = HNS_ROCE_V2_MAX_SRQ_NUM;
+ caps->min_cqes = HNS_ROCE_MIN_CQE_NUM;
+ caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM;
+ caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM;
+ caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM;
+
+ caps->num_uars = HNS_ROCE_V2_UAR_NUM;
+ caps->phy_num_uars = HNS_ROCE_V2_PHY_UAR_NUM;
+ caps->num_aeq_vectors = HNS_ROCE_V2_AEQE_VEC_NUM;
+ caps->num_other_vectors = HNS_ROCE_V2_ABNORMAL_VEC_NUM;
+ caps->num_comp_vectors = 0;
+
+ caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM;
+ caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM;
+ caps->qpc_timer_bt_num = HNS_ROCE_V2_MAX_QPC_TIMER_BT_NUM;
+ caps->cqc_timer_bt_num = HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM;
+
+ caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA;
+ caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA;
+ caps->max_sq_desc_sz = HNS_ROCE_V2_MAX_SQ_DESC_SZ;
+ caps->max_rq_desc_sz = HNS_ROCE_V2_MAX_RQ_DESC_SZ;
+ caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ;
+ caps->trrl_entry_sz = HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ;
+ caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ;
+ caps->srqc_entry_sz = HNS_ROCE_V2_SRQC_ENTRY_SZ;
+ caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ;
+ caps->idx_entry_sz = HNS_ROCE_V2_IDX_ENTRY_SZ;
+ caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
+ caps->reserved_lkey = 0;
+ caps->reserved_pds = 0;
+ caps->reserved_mrws = 1;
+ caps->reserved_uars = 0;
+ caps->reserved_cqs = 0;
+ caps->reserved_srqs = 0;
+ caps->reserved_qps = HNS_ROCE_V2_RSV_QPS;
+
+ caps->qpc_hop_num = HNS_ROCE_CONTEXT_HOP_NUM;
+ caps->srqc_hop_num = HNS_ROCE_CONTEXT_HOP_NUM;
+ caps->cqc_hop_num = HNS_ROCE_CONTEXT_HOP_NUM;
+ caps->mpt_hop_num = HNS_ROCE_CONTEXT_HOP_NUM;
+ caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM;
+
+ caps->mtt_hop_num = HNS_ROCE_MTT_HOP_NUM;
+ caps->wqe_sq_hop_num = HNS_ROCE_SQWQE_HOP_NUM;
+ caps->wqe_sge_hop_num = HNS_ROCE_EXT_SGE_HOP_NUM;
+ caps->wqe_rq_hop_num = HNS_ROCE_RQWQE_HOP_NUM;
+ caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM;
+ caps->srqwqe_hop_num = HNS_ROCE_SRQWQE_HOP_NUM;
+ caps->idx_hop_num = HNS_ROCE_IDX_HOP_NUM;
+ caps->chunk_sz = HNS_ROCE_V2_TABLE_CHUNK_SIZE;
+
+ caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR |
+ HNS_ROCE_CAP_FLAG_ROCE_V1_V2 |
+ HNS_ROCE_CAP_FLAG_CQ_RECORD_DB |
+ HNS_ROCE_CAP_FLAG_QP_RECORD_DB;
+
+ caps->pkey_table_len[0] = 1;
+ caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM;
+ caps->aeqe_depth = HNS_ROCE_V2_ASYNC_EQE_NUM;
+ caps->local_ca_ack_delay = 0;
+ caps->max_mtu = IB_MTU_4096;
+
+ caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR;
+ caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE;
+
+ caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW |
+ HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR |
+ HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
+
+ caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ caps->flags |= HNS_ROCE_CAP_FLAG_STASH |
+ HNS_ROCE_CAP_FLAG_DIRECT_WQE |
+ HNS_ROCE_CAP_FLAG_XRC;
+ caps->max_sq_inline = HNS_ROCE_V3_MAX_SQ_INLINE;
+ } else {
+ caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
+
+ /* The following configuration are only valid for HIP08 */
+ caps->qpc_sz = HNS_ROCE_V2_QPC_SZ;
+ caps->sccc_sz = HNS_ROCE_V2_SCCC_SZ;
+ caps->cqe_sz = HNS_ROCE_V2_CQE_SIZE;
+ }
+}
+
+static void calc_pg_sz(u32 obj_num, u32 obj_size, u32 hop_num, u32 ctx_bt_num,
+ u32 *buf_page_size, u32 *bt_page_size, u32 hem_type)
+{
+ u64 obj_per_chunk;
+ u64 bt_chunk_size = PAGE_SIZE;
+ u64 buf_chunk_size = PAGE_SIZE;
+ u64 obj_per_chunk_default = buf_chunk_size / obj_size;
+
+ *buf_page_size = 0;
+ *bt_page_size = 0;
+
+ switch (hop_num) {
+ case 3:
+ obj_per_chunk = ctx_bt_num * (bt_chunk_size / BA_BYTE_LEN) *
+ (bt_chunk_size / BA_BYTE_LEN) *
+ (bt_chunk_size / BA_BYTE_LEN) *
+ obj_per_chunk_default;
+ break;
+ case 2:
+ obj_per_chunk = ctx_bt_num * (bt_chunk_size / BA_BYTE_LEN) *
+ (bt_chunk_size / BA_BYTE_LEN) *
+ obj_per_chunk_default;
+ break;
+ case 1:
+ obj_per_chunk = ctx_bt_num * (bt_chunk_size / BA_BYTE_LEN) *
+ obj_per_chunk_default;
+ break;
+ case HNS_ROCE_HOP_NUM_0:
+ obj_per_chunk = ctx_bt_num * obj_per_chunk_default;
+ break;
+ default:
+ pr_err("table %u not support hop_num = %u!\n", hem_type,
+ hop_num);
+ return;
+ }
+
+ if (hem_type >= HEM_TYPE_MTT)
+ *bt_page_size = ilog2(DIV_ROUND_UP(obj_num, obj_per_chunk));
+ else
+ *buf_page_size = ilog2(DIV_ROUND_UP(obj_num, obj_per_chunk));
+}
+
+static void set_hem_page_size(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_caps *caps = &hr_dev->caps;
+
+ /* EQ */
+ caps->eqe_ba_pg_sz = 0;
+ caps->eqe_buf_pg_sz = 0;
+
+ /* Link Table */
+ caps->llm_buf_pg_sz = 0;
+
+ /* MR */
+ caps->mpt_ba_pg_sz = 0;
+ caps->mpt_buf_pg_sz = 0;
+ caps->pbl_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_16K;
+ caps->pbl_buf_pg_sz = 0;
+ calc_pg_sz(caps->num_mtpts, caps->mtpt_entry_sz, caps->mpt_hop_num,
+ caps->mpt_bt_num, &caps->mpt_buf_pg_sz, &caps->mpt_ba_pg_sz,
+ HEM_TYPE_MTPT);
+
+ /* QP */
+ caps->qpc_ba_pg_sz = 0;
+ caps->qpc_buf_pg_sz = 0;
+ caps->qpc_timer_ba_pg_sz = 0;
+ caps->qpc_timer_buf_pg_sz = 0;
+ caps->sccc_ba_pg_sz = 0;
+ caps->sccc_buf_pg_sz = 0;
+ caps->mtt_ba_pg_sz = 0;
+ caps->mtt_buf_pg_sz = 0;
+ calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num,
+ caps->qpc_bt_num, &caps->qpc_buf_pg_sz, &caps->qpc_ba_pg_sz,
+ HEM_TYPE_QPC);
+
+ if (caps->flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL)
+ calc_pg_sz(caps->num_qps, caps->sccc_sz, caps->sccc_hop_num,
+ caps->sccc_bt_num, &caps->sccc_buf_pg_sz,
+ &caps->sccc_ba_pg_sz, HEM_TYPE_SCCC);
+
+ /* CQ */
+ caps->cqc_ba_pg_sz = 0;
+ caps->cqc_buf_pg_sz = 0;
+ caps->cqc_timer_ba_pg_sz = 0;
+ caps->cqc_timer_buf_pg_sz = 0;
+ caps->cqe_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_256K;
+ caps->cqe_buf_pg_sz = 0;
+ calc_pg_sz(caps->num_cqs, caps->cqc_entry_sz, caps->cqc_hop_num,
+ caps->cqc_bt_num, &caps->cqc_buf_pg_sz, &caps->cqc_ba_pg_sz,
+ HEM_TYPE_CQC);
+ calc_pg_sz(caps->max_cqes, caps->cqe_sz, caps->cqe_hop_num,
+ 1, &caps->cqe_buf_pg_sz, &caps->cqe_ba_pg_sz, HEM_TYPE_CQE);
+
+ /* SRQ */
+ if (caps->flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ caps->srqc_ba_pg_sz = 0;
+ caps->srqc_buf_pg_sz = 0;
+ caps->srqwqe_ba_pg_sz = 0;
+ caps->srqwqe_buf_pg_sz = 0;
+ caps->idx_ba_pg_sz = 0;
+ caps->idx_buf_pg_sz = 0;
+ calc_pg_sz(caps->num_srqs, caps->srqc_entry_sz,
+ caps->srqc_hop_num, caps->srqc_bt_num,
+ &caps->srqc_buf_pg_sz, &caps->srqc_ba_pg_sz,
+ HEM_TYPE_SRQC);
+ calc_pg_sz(caps->num_srqwqe_segs, caps->mtt_entry_sz,
+ caps->srqwqe_hop_num, 1, &caps->srqwqe_buf_pg_sz,
+ &caps->srqwqe_ba_pg_sz, HEM_TYPE_SRQWQE);
+ calc_pg_sz(caps->num_idx_segs, caps->idx_entry_sz,
+ caps->idx_hop_num, 1, &caps->idx_buf_pg_sz,
+ &caps->idx_ba_pg_sz, HEM_TYPE_IDX);
+ }
+
+ /* GMV */
+ caps->gmv_ba_pg_sz = 0;
+ caps->gmv_buf_pg_sz = 0;
+}
+
+/* Apply all loaded caps before setting to hardware */
+static void apply_func_caps(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+
+ /* The following configurations don't need to be got from firmware. */
+ caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ;
+ caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ;
+ caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ;
+
+ caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM;
+ caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
+ caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
+
+ caps->num_xrcds = HNS_ROCE_V2_MAX_XRCD_NUM;
+ caps->reserved_xrcds = HNS_ROCE_V2_RSV_XRCD_NUM;
+
+ caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
+ caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS;
+
+ if (!caps->num_comp_vectors)
+ caps->num_comp_vectors =
+ min_t(u32, caps->eqc_bt_num - HNS_ROCE_V2_AEQE_VEC_NUM,
+ (u32)priv->handle->rinfo.num_vectors -
+ (HNS_ROCE_V2_AEQE_VEC_NUM + HNS_ROCE_V2_ABNORMAL_VEC_NUM));
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ caps->eqe_hop_num = HNS_ROCE_V3_EQE_HOP_NUM;
+ caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE;
+ caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE;
+
+ /* The following configurations will be overwritten */
+ caps->qpc_sz = HNS_ROCE_V3_QPC_SZ;
+ caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE;
+ caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ;
+
+ /* The following configurations are not got from firmware */
+ caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ;
+
+ caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0;
+ caps->gid_table_len[0] = caps->gmv_bt_num *
+ (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz);
+
+ caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE /
+ caps->gmv_entry_sz);
+ } else {
+ u32 func_num = max_t(u32, 1, hr_dev->func_num);
+
+ caps->eqe_hop_num = HNS_ROCE_V2_EQE_HOP_NUM;
+ caps->ceqe_size = HNS_ROCE_CEQE_SIZE;
+ caps->aeqe_size = HNS_ROCE_AEQE_SIZE;
+ caps->gid_table_len[0] /= func_num;
+ }
+
+ if (hr_dev->is_vf) {
+ caps->default_aeq_arm_st = 0x3;
+ caps->default_ceq_arm_st = 0x3;
+ caps->default_ceq_max_cnt = 0x1;
+ caps->default_ceq_period = 0x10;
+ caps->default_aeq_max_cnt = 0x1;
+ caps->default_aeq_period = 0x10;
+ }
+
+ set_hem_page_size(hr_dev);
+}
+
+static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc[HNS_ROCE_QUERY_PF_CAPS_CMD_NUM];
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ struct hns_roce_query_pf_caps_a *resp_a;
+ struct hns_roce_query_pf_caps_b *resp_b;
+ struct hns_roce_query_pf_caps_c *resp_c;
+ struct hns_roce_query_pf_caps_d *resp_d;
+ struct hns_roce_query_pf_caps_e *resp_e;
+ int ctx_hop_num;
+ int pbl_hop_num;
+ int ret;
+ int i;
+
+ for (i = 0; i < HNS_ROCE_QUERY_PF_CAPS_CMD_NUM; i++) {
+ hns_roce_cmq_setup_basic_desc(&desc[i],
+ HNS_ROCE_OPC_QUERY_PF_CAPS_NUM,
+ true);
+ if (i < (HNS_ROCE_QUERY_PF_CAPS_CMD_NUM - 1))
+ desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ else
+ desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ }
+
+ ret = hns_roce_cmq_send(hr_dev, desc, HNS_ROCE_QUERY_PF_CAPS_CMD_NUM);
+ if (ret)
+ return ret;
+
+ resp_a = (struct hns_roce_query_pf_caps_a *)desc[0].data;
+ resp_b = (struct hns_roce_query_pf_caps_b *)desc[1].data;
+ resp_c = (struct hns_roce_query_pf_caps_c *)desc[2].data;
+ resp_d = (struct hns_roce_query_pf_caps_d *)desc[3].data;
+ resp_e = (struct hns_roce_query_pf_caps_e *)desc[4].data;
+
+ caps->local_ca_ack_delay = resp_a->local_ca_ack_delay;
+ caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg);
+ caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline);
+ caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg);
+ caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg);
+ caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges);
+ caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges);
+ caps->num_aeq_vectors = resp_a->num_aeq_vectors;
+ caps->num_other_vectors = resp_a->num_other_vectors;
+ caps->max_sq_desc_sz = resp_a->max_sq_desc_sz;
+ caps->max_rq_desc_sz = resp_a->max_rq_desc_sz;
+ caps->cqe_sz = resp_a->cqe_sz;
+
+ caps->mtpt_entry_sz = resp_b->mtpt_entry_sz;
+ caps->irrl_entry_sz = resp_b->irrl_entry_sz;
+ caps->trrl_entry_sz = resp_b->trrl_entry_sz;
+ caps->cqc_entry_sz = resp_b->cqc_entry_sz;
+ caps->srqc_entry_sz = resp_b->srqc_entry_sz;
+ caps->idx_entry_sz = resp_b->idx_entry_sz;
+ caps->sccc_sz = resp_b->sccc_sz;
+ caps->max_mtu = resp_b->max_mtu;
+ caps->qpc_sz = le16_to_cpu(resp_b->qpc_sz);
+ caps->min_cqes = resp_b->min_cqes;
+ caps->min_wqes = resp_b->min_wqes;
+ caps->page_size_cap = le32_to_cpu(resp_b->page_size_cap);
+ caps->pkey_table_len[0] = resp_b->pkey_table_len;
+ caps->phy_num_uars = resp_b->phy_num_uars;
+ ctx_hop_num = resp_b->ctx_hop_num;
+ pbl_hop_num = resp_b->pbl_hop_num;
+
+ caps->num_pds = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_PDS);
+
+ caps->flags = hr_reg_read(resp_c, PF_CAPS_C_CAP_FLAGS);
+ caps->flags |= le16_to_cpu(resp_d->cap_flags_ex) <<
+ HNS_ROCE_CAP_FLAGS_EX_SHIFT;
+
+ caps->num_cqs = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_CQS);
+ caps->gid_table_len[0] = hr_reg_read(resp_c, PF_CAPS_C_MAX_GID);
+ caps->max_cqes = 1 << hr_reg_read(resp_c, PF_CAPS_C_CQ_DEPTH);
+ caps->num_mtpts = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_MRWS);
+ caps->num_qps = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_QPS);
+ caps->max_qp_init_rdma = hr_reg_read(resp_c, PF_CAPS_C_MAX_ORD);
+ caps->max_qp_dest_rdma = caps->max_qp_init_rdma;
+ caps->max_wqes = 1 << le16_to_cpu(resp_c->sq_depth);
+
+ caps->num_srqs = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_SRQS);
+ caps->cong_type = hr_reg_read(resp_d, PF_CAPS_D_CONG_TYPE);
+ caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth);
+ caps->ceqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_CEQ_DEPTH);
+ caps->num_comp_vectors = hr_reg_read(resp_d, PF_CAPS_D_NUM_CEQS);
+ caps->aeqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_AEQ_DEPTH);
+ caps->default_aeq_arm_st = hr_reg_read(resp_d, PF_CAPS_D_AEQ_ARM_ST);
+ caps->default_ceq_arm_st = hr_reg_read(resp_d, PF_CAPS_D_CEQ_ARM_ST);
+ caps->reserved_pds = hr_reg_read(resp_d, PF_CAPS_D_RSV_PDS);
+ caps->num_uars = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_UARS);
+ caps->reserved_qps = hr_reg_read(resp_d, PF_CAPS_D_RSV_QPS);
+ caps->reserved_uars = hr_reg_read(resp_d, PF_CAPS_D_RSV_UARS);
+
+ caps->reserved_mrws = hr_reg_read(resp_e, PF_CAPS_E_RSV_MRWS);
+ caps->chunk_sz = 1 << hr_reg_read(resp_e, PF_CAPS_E_CHUNK_SIZE_SHIFT);
+ caps->reserved_cqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_CQS);
+ caps->reserved_srqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_SRQS);
+ caps->reserved_lkey = hr_reg_read(resp_e, PF_CAPS_E_RSV_LKEYS);
+ caps->default_ceq_max_cnt = le16_to_cpu(resp_e->ceq_max_cnt);
+ caps->default_ceq_period = le16_to_cpu(resp_e->ceq_period);
+ caps->default_aeq_max_cnt = le16_to_cpu(resp_e->aeq_max_cnt);
+ caps->default_aeq_period = le16_to_cpu(resp_e->aeq_period);
+
+ caps->qpc_hop_num = ctx_hop_num;
+ caps->sccc_hop_num = ctx_hop_num;
+ caps->srqc_hop_num = ctx_hop_num;
+ caps->cqc_hop_num = ctx_hop_num;
+ caps->mpt_hop_num = ctx_hop_num;
+ caps->mtt_hop_num = pbl_hop_num;
+ caps->cqe_hop_num = pbl_hop_num;
+ caps->srqwqe_hop_num = pbl_hop_num;
+ caps->idx_hop_num = pbl_hop_num;
+ caps->wqe_sq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_SQWQE_HOP_NUM);
+ caps->wqe_sge_hop_num = hr_reg_read(resp_d, PF_CAPS_D_EX_SGE_HOP_NUM);
+ caps->wqe_rq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_RQWQE_HOP_NUM);
+
+ if (!(caps->page_size_cap & PAGE_SIZE))
+ caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
+
+ return 0;
+}
+
+static int config_hem_entry_size(struct hns_roce_dev *hr_dev, u32 type, u32 val)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_ENTRY_SIZE,
+ false);
+
+ hr_reg_write(req, CFG_HEM_ENTRY_SIZE_TYPE, type);
+ hr_reg_write(req, CFG_HEM_ENTRY_SIZE_VALUE, val);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int hns_roce_config_entry_size(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ int ret;
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ return 0;
+
+ ret = config_hem_entry_size(hr_dev, HNS_ROCE_CFG_QPC_SIZE,
+ caps->qpc_sz);
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to cfg qpc sz, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = config_hem_entry_size(hr_dev, HNS_ROCE_CFG_SCCC_SIZE,
+ caps->sccc_sz);
+ if (ret)
+ dev_err(hr_dev->dev, "failed to cfg sccc sz, ret = %d.\n", ret);
+
+ return ret;
+}
+
+static int hns_roce_v2_vf_profile(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ hr_dev->func_num = 1;
+
+ set_default_caps(hr_dev);
+
+ ret = hns_roce_query_vf_resource(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to query VF resource, ret = %d.\n", ret);
+ return ret;
+ }
+
+ apply_func_caps(hr_dev);
+
+ ret = hns_roce_v2_set_bt(hr_dev);
+ if (ret)
+ dev_err(dev, "failed to config VF BA table, ret = %d.\n", ret);
+
+ return ret;
+}
+
+static int hns_roce_v2_pf_profile(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ ret = hns_roce_query_func_info(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to query func info, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = hns_roce_config_global_param(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to config global param, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = hns_roce_set_vf_switch_param(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to set switch param, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = hns_roce_query_pf_caps(hr_dev);
+ if (ret)
+ set_default_caps(hr_dev);
+
+ ret = hns_roce_query_pf_resource(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to query pf resource, ret = %d.\n", ret);
+ return ret;
+ }
+
+ apply_func_caps(hr_dev);
+
+ ret = hns_roce_alloc_vf_resource(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to alloc vf resource, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = hns_roce_v2_set_bt(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to config BA table, ret = %d.\n", ret);
+ return ret;
+ }
+
+ /* Configure the size of QPC, SCCC, etc. */
+ return hns_roce_config_entry_size(hr_dev);
+}
+
+static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ ret = hns_roce_cmq_query_hw_info(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to query hardware info, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = hns_roce_query_fw_ver(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to query firmware info, ret = %d.\n", ret);
+ return ret;
+ }
+
+ hr_dev->vendor_part_id = hr_dev->pci_dev->device;
+ hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
+
+ if (hr_dev->is_vf)
+ return hns_roce_v2_vf_profile(hr_dev);
+ else
+ return hns_roce_v2_pf_profile(hr_dev);
+}
+
+static void config_llm_table(struct hns_roce_buf *data_buf, void *cfg_buf)
+{
+ u32 i, next_ptr, page_num;
+ __le64 *entry = cfg_buf;
+ dma_addr_t addr;
+ u64 val;
+
+ page_num = data_buf->npages;
+ for (i = 0; i < page_num; i++) {
+ addr = hns_roce_buf_page(data_buf, i);
+ if (i == (page_num - 1))
+ next_ptr = 0;
+ else
+ next_ptr = i + 1;
+
+ val = HNS_ROCE_EXT_LLM_ENTRY(addr, (u64)next_ptr);
+ entry[i] = cpu_to_le64(val);
+ }
+}
+
+static int set_llm_cfg_to_hw(struct hns_roce_dev *hr_dev,
+ struct hns_roce_link_table *table)
+{
+ struct hns_roce_cmq_desc desc[2];
+ struct hns_roce_cmq_req *r_a = (struct hns_roce_cmq_req *)desc[0].data;
+ struct hns_roce_cmq_req *r_b = (struct hns_roce_cmq_req *)desc[1].data;
+ struct hns_roce_buf *buf = table->buf;
+ enum hns_roce_opcode_type opcode;
+ dma_addr_t addr;
+
+ opcode = HNS_ROCE_OPC_CFG_EXT_LLM;
+ hns_roce_cmq_setup_basic_desc(&desc[0], opcode, false);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false);
+
+ hr_reg_write(r_a, CFG_LLM_A_BA_L, lower_32_bits(table->table.map));
+ hr_reg_write(r_a, CFG_LLM_A_BA_H, upper_32_bits(table->table.map));
+ hr_reg_write(r_a, CFG_LLM_A_DEPTH, buf->npages);
+ hr_reg_write(r_a, CFG_LLM_A_PGSZ, to_hr_hw_page_shift(buf->page_shift));
+ hr_reg_enable(r_a, CFG_LLM_A_INIT_EN);
+
+ addr = to_hr_hw_page_addr(hns_roce_buf_page(buf, 0));
+ hr_reg_write(r_a, CFG_LLM_A_HEAD_BA_L, lower_32_bits(addr));
+ hr_reg_write(r_a, CFG_LLM_A_HEAD_BA_H, upper_32_bits(addr));
+ hr_reg_write(r_a, CFG_LLM_A_HEAD_NXTPTR, 1);
+ hr_reg_write(r_a, CFG_LLM_A_HEAD_PTR, 0);
+
+ addr = to_hr_hw_page_addr(hns_roce_buf_page(buf, buf->npages - 1));
+ hr_reg_write(r_b, CFG_LLM_B_TAIL_BA_L, lower_32_bits(addr));
+ hr_reg_write(r_b, CFG_LLM_B_TAIL_BA_H, upper_32_bits(addr));
+ hr_reg_write(r_b, CFG_LLM_B_TAIL_PTR, buf->npages - 1);
+
+ return hns_roce_cmq_send(hr_dev, desc, 2);
+}
+
+static struct hns_roce_link_table *
+alloc_link_table_buf(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_link_table *link_tbl;
+ u32 pg_shift, size, min_size;
+
+ link_tbl = &priv->ext_llm;
+ pg_shift = hr_dev->caps.llm_buf_pg_sz + PAGE_SHIFT;
+ size = hr_dev->caps.num_qps * HNS_ROCE_V2_EXT_LLM_ENTRY_SZ;
+ min_size = HNS_ROCE_EXT_LLM_MIN_PAGES(hr_dev->caps.sl_num) << pg_shift;
+
+ /* Alloc data table */
+ size = max(size, min_size);
+ link_tbl->buf = hns_roce_buf_alloc(hr_dev, size, pg_shift, 0);
+ if (IS_ERR(link_tbl->buf))
+ return ERR_PTR(-ENOMEM);
+
+ /* Alloc config table */
+ size = link_tbl->buf->npages * sizeof(u64);
+ link_tbl->table.buf = dma_alloc_coherent(hr_dev->dev, size,
+ &link_tbl->table.map,
+ GFP_KERNEL);
+ if (!link_tbl->table.buf) {
+ hns_roce_buf_free(hr_dev, link_tbl->buf);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return link_tbl;
+}
+
+static void free_link_table_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_link_table *tbl)
+{
+ if (tbl->buf) {
+ u32 size = tbl->buf->npages * sizeof(u64);
+
+ dma_free_coherent(hr_dev->dev, size, tbl->table.buf,
+ tbl->table.map);
+ }
+
+ hns_roce_buf_free(hr_dev, tbl->buf);
+}
+
+static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_link_table *link_tbl;
+ int ret;
+
+ link_tbl = alloc_link_table_buf(hr_dev);
+ if (IS_ERR(link_tbl))
+ return -ENOMEM;
+
+ if (WARN_ON(link_tbl->buf->npages > HNS_ROCE_V2_EXT_LLM_MAX_DEPTH)) {
+ ret = -EINVAL;
+ goto err_alloc;
+ }
+
+ config_llm_table(link_tbl->buf, link_tbl->table.buf);
+ ret = set_llm_cfg_to_hw(hr_dev, link_tbl);
+ if (ret)
+ goto err_alloc;
+
+ return 0;
+
+err_alloc:
+ free_link_table_buf(hr_dev, link_tbl);
+ return ret;
+}
+
+static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+
+ free_link_table_buf(hr_dev, &priv->ext_llm);
+}
+
+static void free_dip_list(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_dip *hr_dip;
+ struct hns_roce_dip *tmp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&hr_dev->dip_list_lock, flags);
+
+ list_for_each_entry_safe(hr_dip, tmp, &hr_dev->dip_list, node) {
+ list_del(&hr_dip->node);
+ kfree(hr_dip);
+ }
+
+ spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
+}
+
+static struct ib_pd *free_mr_init_pd(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_pd *hr_pd;
+ struct ib_pd *pd;
+
+ hr_pd = kzalloc(sizeof(*hr_pd), GFP_KERNEL);
+ if (ZERO_OR_NULL_PTR(hr_pd))
+ return NULL;
+ pd = &hr_pd->ibpd;
+ pd->device = ibdev;
+
+ if (hns_roce_alloc_pd(pd, NULL)) {
+ ibdev_err(ibdev, "failed to create pd for free mr.\n");
+ kfree(hr_pd);
+ return NULL;
+ }
+ free_mr->rsv_pd = to_hr_pd(pd);
+ free_mr->rsv_pd->ibpd.device = &hr_dev->ib_dev;
+ free_mr->rsv_pd->ibpd.uobject = NULL;
+ free_mr->rsv_pd->ibpd.__internal_mr = NULL;
+ atomic_set(&free_mr->rsv_pd->ibpd.usecnt, 0);
+
+ return pd;
+}
+
+static struct ib_cq *free_mr_init_cq(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct ib_cq_init_attr cq_init_attr = {};
+ struct hns_roce_cq *hr_cq;
+ struct ib_cq *cq;
+
+ cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM;
+
+ hr_cq = kzalloc(sizeof(*hr_cq), GFP_KERNEL);
+ if (ZERO_OR_NULL_PTR(hr_cq))
+ return NULL;
+
+ cq = &hr_cq->ib_cq;
+ cq->device = ibdev;
+
+ if (hns_roce_create_cq(cq, &cq_init_attr, NULL)) {
+ ibdev_err(ibdev, "failed to create cq for free mr.\n");
+ kfree(hr_cq);
+ return NULL;
+ }
+ free_mr->rsv_cq = to_hr_cq(cq);
+ free_mr->rsv_cq->ib_cq.device = &hr_dev->ib_dev;
+ free_mr->rsv_cq->ib_cq.uobject = NULL;
+ free_mr->rsv_cq->ib_cq.comp_handler = NULL;
+ free_mr->rsv_cq->ib_cq.event_handler = NULL;
+ free_mr->rsv_cq->ib_cq.cq_context = NULL;
+ atomic_set(&free_mr->rsv_cq->ib_cq.usecnt, 0);
+
+ return cq;
+}
+
+static int free_mr_init_qp(struct hns_roce_dev *hr_dev, struct ib_cq *cq,
+ struct ib_qp_init_attr *init_attr, int i)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_qp *hr_qp;
+ struct ib_qp *qp;
+ int ret;
+
+ hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
+ if (ZERO_OR_NULL_PTR(hr_qp))
+ return -ENOMEM;
+
+ qp = &hr_qp->ibqp;
+ qp->device = ibdev;
+
+ ret = hns_roce_create_qp(qp, init_attr, NULL);
+ if (ret) {
+ ibdev_err(ibdev, "failed to create qp for free mr.\n");
+ kfree(hr_qp);
+ return ret;
+ }
+
+ free_mr->rsv_qp[i] = hr_qp;
+ free_mr->rsv_qp[i]->ibqp.recv_cq = cq;
+ free_mr->rsv_qp[i]->ibqp.send_cq = cq;
+
+ return 0;
+}
+
+static void free_mr_exit(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_qp *qp;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+ if (free_mr->rsv_qp[i]) {
+ qp = &free_mr->rsv_qp[i]->ibqp;
+ hns_roce_v2_destroy_qp(qp, NULL);
+ kfree(free_mr->rsv_qp[i]);
+ free_mr->rsv_qp[i] = NULL;
+ }
+ }
+
+ if (free_mr->rsv_cq) {
+ hns_roce_destroy_cq(&free_mr->rsv_cq->ib_cq, NULL);
+ kfree(free_mr->rsv_cq);
+ free_mr->rsv_cq = NULL;
+ }
+
+ if (free_mr->rsv_pd) {
+ hns_roce_dealloc_pd(&free_mr->rsv_pd->ibpd, NULL);
+ kfree(free_mr->rsv_pd);
+ free_mr->rsv_pd = NULL;
+ }
+}
+
+static int free_mr_alloc_res(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_qp_init_attr qp_init_attr = {};
+ struct ib_pd *pd;
+ struct ib_cq *cq;
+ int ret;
+ int i;
+
+ pd = free_mr_init_pd(hr_dev);
+ if (!pd)
+ return -ENOMEM;
+
+ cq = free_mr_init_cq(hr_dev);
+ if (!cq) {
+ ret = -ENOMEM;
+ goto create_failed_cq;
+ }
+
+ qp_init_attr.qp_type = IB_QPT_RC;
+ qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ qp_init_attr.send_cq = cq;
+ qp_init_attr.recv_cq = cq;
+ for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+ qp_init_attr.cap.max_send_wr = HNS_ROCE_FREE_MR_USED_SQWQE_NUM;
+ qp_init_attr.cap.max_send_sge = HNS_ROCE_FREE_MR_USED_SQSGE_NUM;
+ qp_init_attr.cap.max_recv_wr = HNS_ROCE_FREE_MR_USED_RQWQE_NUM;
+ qp_init_attr.cap.max_recv_sge = HNS_ROCE_FREE_MR_USED_RQSGE_NUM;
+
+ ret = free_mr_init_qp(hr_dev, cq, &qp_init_attr, i);
+ if (ret)
+ goto create_failed_qp;
+ }
+
+ return 0;
+
+create_failed_qp:
+ for (i--; i >= 0; i--) {
+ hns_roce_v2_destroy_qp(&free_mr->rsv_qp[i]->ibqp, NULL);
+ kfree(free_mr->rsv_qp[i]);
+ }
+ hns_roce_destroy_cq(cq, NULL);
+ kfree(cq);
+
+create_failed_cq:
+ hns_roce_dealloc_pd(pd, NULL);
+ kfree(pd);
+
+ return ret;
+}
+
+static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
+ struct ib_qp_attr *attr, int sl_num)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_qp *hr_qp;
+ int loopback;
+ int mask;
+ int ret;
+
+ hr_qp = to_hr_qp(&free_mr->rsv_qp[sl_num]->ibqp);
+ hr_qp->free_mr_en = 1;
+ hr_qp->ibqp.device = ibdev;
+ hr_qp->ibqp.qp_type = IB_QPT_RC;
+
+ mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS;
+ attr->qp_state = IB_QPS_INIT;
+ attr->port_num = 1;
+ attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
+ IB_QPS_INIT);
+ if (ret) {
+ ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ loopback = hr_dev->loop_idc;
+ /* Set qpc lbi = 1 incidate loopback IO */
+ hr_dev->loop_idc = 1;
+
+ mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN |
+ IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER;
+ attr->qp_state = IB_QPS_RTR;
+ attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+ attr->path_mtu = IB_MTU_256;
+ attr->dest_qp_num = hr_qp->qpn;
+ attr->rq_psn = HNS_ROCE_FREE_MR_USED_PSN;
+
+ rdma_ah_set_sl(&attr->ah_attr, (u8)sl_num);
+
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
+ IB_QPS_RTR);
+ hr_dev->loop_idc = loopback;
+ if (ret) {
+ ibdev_err(ibdev, "failed to modify qp to rtr, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ mask = IB_QP_STATE | IB_QP_SQ_PSN | IB_QP_RETRY_CNT | IB_QP_TIMEOUT |
+ IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC;
+ attr->qp_state = IB_QPS_RTS;
+ attr->sq_psn = HNS_ROCE_FREE_MR_USED_PSN;
+ attr->retry_cnt = HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT;
+ attr->timeout = HNS_ROCE_FREE_MR_USED_QP_TIMEOUT;
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_RTR,
+ IB_QPS_RTS);
+ if (ret)
+ ibdev_err(ibdev, "failed to modify qp to rts, ret = %d.\n",
+ ret);
+
+ return ret;
+}
+
+static int free_mr_modify_qp(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_qp_attr attr = {};
+ int ret;
+ int i;
+
+ rdma_ah_set_grh(&attr.ah_attr, NULL, 0, 0, 1, 0);
+ rdma_ah_set_static_rate(&attr.ah_attr, 3);
+ rdma_ah_set_port_num(&attr.ah_attr, 1);
+
+ for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+ ret = free_mr_modify_rsv_qp(hr_dev, &attr, i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int free_mr_init(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ int ret;
+
+ mutex_init(&free_mr->mutex);
+
+ ret = free_mr_alloc_res(hr_dev);
+ if (ret)
+ return ret;
+
+ ret = free_mr_modify_qp(hr_dev);
+ if (ret)
+ goto err_modify_qp;
+
+ return 0;
+
+err_modify_qp:
+ free_mr_exit(hr_dev);
+
+ return ret;
+}
+
+static int get_hem_table(struct hns_roce_dev *hr_dev)
+{
+ unsigned int qpc_count;
+ unsigned int cqc_count;
+ unsigned int gmv_count;
+ int ret;
+ int i;
+
+ /* Alloc memory for source address table buffer space chunk */
+ for (gmv_count = 0; gmv_count < hr_dev->caps.gmv_entry_num;
+ gmv_count++) {
+ ret = hns_roce_table_get(hr_dev, &hr_dev->gmv_table, gmv_count);
+ if (ret)
+ goto err_gmv_failed;
+ }
+
+ if (hr_dev->is_vf)
+ return 0;
+
+ /* Alloc memory for QPC Timer buffer space chunk */
+ for (qpc_count = 0; qpc_count < hr_dev->caps.qpc_timer_bt_num;
+ qpc_count++) {
+ ret = hns_roce_table_get(hr_dev, &hr_dev->qpc_timer_table,
+ qpc_count);
+ if (ret) {
+ dev_err(hr_dev->dev, "QPC Timer get failed\n");
+ goto err_qpc_timer_failed;
+ }
+ }
+
+ /* Alloc memory for CQC Timer buffer space chunk */
+ for (cqc_count = 0; cqc_count < hr_dev->caps.cqc_timer_bt_num;
+ cqc_count++) {
+ ret = hns_roce_table_get(hr_dev, &hr_dev->cqc_timer_table,
+ cqc_count);
+ if (ret) {
+ dev_err(hr_dev->dev, "CQC Timer get failed\n");
+ goto err_cqc_timer_failed;
+ }
+ }
+
+ return 0;
+
+err_cqc_timer_failed:
+ for (i = 0; i < cqc_count; i++)
+ hns_roce_table_put(hr_dev, &hr_dev->cqc_timer_table, i);
+
+err_qpc_timer_failed:
+ for (i = 0; i < qpc_count; i++)
+ hns_roce_table_put(hr_dev, &hr_dev->qpc_timer_table, i);
+
+err_gmv_failed:
+ for (i = 0; i < gmv_count; i++)
+ hns_roce_table_put(hr_dev, &hr_dev->gmv_table, i);
+
+ return ret;
+}
+
+static void put_hem_table(struct hns_roce_dev *hr_dev)
+{
+ int i;
+
+ for (i = 0; i < hr_dev->caps.gmv_entry_num; i++)
+ hns_roce_table_put(hr_dev, &hr_dev->gmv_table, i);
+
+ if (hr_dev->is_vf)
+ return;
+
+ for (i = 0; i < hr_dev->caps.qpc_timer_bt_num; i++)
+ hns_roce_table_put(hr_dev, &hr_dev->qpc_timer_table, i);
+
+ for (i = 0; i < hr_dev->caps.cqc_timer_bt_num; i++)
+ hns_roce_table_put(hr_dev, &hr_dev->cqc_timer_table, i);
+}
+
+static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
+{
+ int ret;
+
+ /* The hns ROCEE requires the extdb info to be cleared before using */
+ ret = hns_roce_clear_extdb_list_info(hr_dev);
+ if (ret)
+ return ret;
+
+ ret = get_hem_table(hr_dev);
+ if (ret)
+ return ret;
+
+ if (hr_dev->is_vf)
+ return 0;
+
+ ret = hns_roce_init_link_table(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to init llm, ret = %d.\n", ret);
+ goto err_llm_init_failed;
+ }
+
+ return 0;
+
+err_llm_init_failed:
+ put_hem_table(hr_dev);
+
+ return ret;
+}
+
+static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
+{
+ hns_roce_function_clear(hr_dev);
+
+ if (!hr_dev->is_vf)
+ hns_roce_free_link_table(hr_dev);
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP09)
+ free_dip_list(hr_dev);
+}
+
+static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_post_mbox *mb = (struct hns_roce_post_mbox *)desc.data;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_POST_MB, false);
+
+ mb->in_param_l = cpu_to_le32(mbox_msg->in_param);
+ mb->in_param_h = cpu_to_le32(mbox_msg->in_param >> 32);
+ mb->out_param_l = cpu_to_le32(mbox_msg->out_param);
+ mb->out_param_h = cpu_to_le32(mbox_msg->out_param >> 32);
+ mb->cmd_tag = cpu_to_le32(mbox_msg->tag << 8 | mbox_msg->cmd);
+ mb->token_event_en = cpu_to_le32(mbox_msg->event_en << 16 |
+ mbox_msg->token);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int v2_wait_mbox_complete(struct hns_roce_dev *hr_dev, u32 timeout,
+ u8 *complete_status)
+{
+ struct hns_roce_mbox_status *mb_st;
+ struct hns_roce_cmq_desc desc;
+ unsigned long end;
+ int ret = -EBUSY;
+ u32 status;
+ bool busy;
+
+ mb_st = (struct hns_roce_mbox_status *)desc.data;
+ end = msecs_to_jiffies(timeout) + jiffies;
+ while (v2_chk_mbox_is_avail(hr_dev, &busy)) {
+ if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
+ return -EIO;
+
+ status = 0;
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST,
+ true);
+ ret = __hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (!ret) {
+ status = le32_to_cpu(mb_st->mb_status_hw_run);
+ /* No pending message exists in ROCEE mbox. */
+ if (!(status & MB_ST_HW_RUN_M))
+ break;
+ } else if (!v2_chk_mbox_is_avail(hr_dev, &busy)) {
+ break;
+ }
+
+ if (time_after(jiffies, end)) {
+ dev_err_ratelimited(hr_dev->dev,
+ "failed to wait mbox status 0x%x\n",
+ status);
+ return -ETIMEDOUT;
+ }
+
+ cond_resched();
+ ret = -EBUSY;
+ }
+
+ if (!ret) {
+ *complete_status = (u8)(status & MB_ST_COMPLETE_M);
+ } else if (!v2_chk_mbox_is_avail(hr_dev, &busy)) {
+ /* Ignore all errors if the mbox is unavailable. */
+ ret = 0;
+ *complete_status = MB_ST_COMPLETE_M;
+ }
+
+ return ret;
+}
+
+static int v2_post_mbox(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
+{
+ u8 status = 0;
+ int ret;
+
+ /* Waiting for the mbox to be idle */
+ ret = v2_wait_mbox_complete(hr_dev, HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS,
+ &status);
+ if (unlikely(ret)) {
+ dev_err_ratelimited(hr_dev->dev,
+ "failed to check post mbox status = 0x%x, ret = %d.\n",
+ status, ret);
+ return ret;
+ }
+
+ /* Post new message to mbox */
+ ret = hns_roce_mbox_post(hr_dev, mbox_msg);
+ if (ret)
+ dev_err_ratelimited(hr_dev->dev,
+ "failed to post mailbox, ret = %d.\n", ret);
+
+ return ret;
+}
+
+static int v2_poll_mbox_done(struct hns_roce_dev *hr_dev)
+{
+ u8 status = 0;
+ int ret;
+
+ ret = v2_wait_mbox_complete(hr_dev, HNS_ROCE_CMD_TIMEOUT_MSECS,
+ &status);
+ if (!ret) {
+ if (status != MB_ST_COMPLETE_SUCC)
+ return -EBUSY;
+ } else {
+ dev_err_ratelimited(hr_dev->dev,
+ "failed to check mbox status = 0x%x, ret = %d.\n",
+ status, ret);
+ }
+
+ return ret;
+}
+
+static void copy_gid(void *dest, const union ib_gid *gid)
+{
+#define GID_SIZE 4
+ const union ib_gid *src = gid;
+ __le32 (*p)[GID_SIZE] = dest;
+ int i;
+
+ if (!gid)
+ src = &zgid;
+
+ for (i = 0; i < GID_SIZE; i++)
+ (*p)[i] = cpu_to_le32(*(u32 *)&src->raw[i * sizeof(u32)]);
+}
+
+static int config_sgid_table(struct hns_roce_dev *hr_dev,
+ int gid_index, const union ib_gid *gid,
+ enum hns_roce_sgid_type sgid_type)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cfg_sgid_tb *sgid_tb =
+ (struct hns_roce_cfg_sgid_tb *)desc.data;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false);
+
+ hr_reg_write(sgid_tb, CFG_SGID_TB_TABLE_IDX, gid_index);
+ hr_reg_write(sgid_tb, CFG_SGID_TB_VF_SGID_TYPE, sgid_type);
+
+ copy_gid(&sgid_tb->vf_sgid_l, gid);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int config_gmv_table(struct hns_roce_dev *hr_dev,
+ int gid_index, const union ib_gid *gid,
+ enum hns_roce_sgid_type sgid_type,
+ const struct ib_gid_attr *attr)
+{
+ struct hns_roce_cmq_desc desc[2];
+ struct hns_roce_cfg_gmv_tb_a *tb_a =
+ (struct hns_roce_cfg_gmv_tb_a *)desc[0].data;
+ struct hns_roce_cfg_gmv_tb_b *tb_b =
+ (struct hns_roce_cfg_gmv_tb_b *)desc[1].data;
+
+ u16 vlan_id = VLAN_CFI_MASK;
+ u8 mac[ETH_ALEN] = {};
+ int ret;
+
+ if (gid) {
+ ret = rdma_read_gid_l2_fields(attr, &vlan_id, mac);
+ if (ret)
+ return ret;
+ }
+
+ hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_CFG_GMV_TBL, false);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+
+ hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_CFG_GMV_TBL, false);
+
+ copy_gid(&tb_a->vf_sgid_l, gid);
+
+ hr_reg_write(tb_a, GMV_TB_A_VF_SGID_TYPE, sgid_type);
+ hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_EN, vlan_id < VLAN_CFI_MASK);
+ hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_ID, vlan_id);
+
+ tb_b->vf_smac_l = cpu_to_le32(*(u32 *)mac);
+
+ hr_reg_write(tb_b, GMV_TB_B_SMAC_H, *(u16 *)&mac[4]);
+ hr_reg_write(tb_b, GMV_TB_B_SGID_IDX, gid_index);
+
+ return hns_roce_cmq_send(hr_dev, desc, 2);
+}
+
+static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, int gid_index,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
+{
+ enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1;
+ int ret;
+
+ if (gid) {
+ if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+ if (ipv6_addr_v4mapped((void *)gid))
+ sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4;
+ else
+ sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6;
+ } else if (attr->gid_type == IB_GID_TYPE_ROCE) {
+ sgid_type = GID_TYPE_FLAG_ROCE_V1;
+ }
+ }
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ ret = config_gmv_table(hr_dev, gid_index, gid, sgid_type, attr);
+ else
+ ret = config_sgid_table(hr_dev, gid_index, gid, sgid_type);
+
+ if (ret)
+ ibdev_err(&hr_dev->ib_dev, "failed to set gid, ret = %d!\n",
+ ret);
+
+ return ret;
+}
+
+static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
+ const u8 *addr)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cfg_smac_tb *smac_tb =
+ (struct hns_roce_cfg_smac_tb *)desc.data;
+ u16 reg_smac_h;
+ u32 reg_smac_l;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SMAC_TB, false);
+
+ reg_smac_l = *(u32 *)(&addr[0]);
+ reg_smac_h = *(u16 *)(&addr[4]);
+
+ hr_reg_write(smac_tb, CFG_SMAC_TB_IDX, phy_port);
+ hr_reg_write(smac_tb, CFG_SMAC_TB_VF_SMAC_H, reg_smac_h);
+ smac_tb->vf_smac_l = cpu_to_le32(reg_smac_l);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int set_mtpt_pbl(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_mpt_entry *mpt_entry,
+ struct hns_roce_mr *mr)
+{
+ u64 pages[HNS_ROCE_V2_MAX_INNER_MTPT_NUM] = { 0 };
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ dma_addr_t pbl_ba;
+ int i, count;
+
+ count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages,
+ min_t(int, ARRAY_SIZE(pages), mr->npages),
+ &pbl_ba);
+ if (count < 1) {
+ ibdev_err(ibdev, "failed to find PBL mtr, count = %d.\n",
+ count);
+ return -ENOBUFS;
+ }
+
+ /* Aligned to the hardware address access unit */
+ for (i = 0; i < count; i++)
+ pages[i] >>= 6;
+
+ mpt_entry->pbl_size = cpu_to_le32(mr->npages);
+ mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3);
+ hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3));
+
+ mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0]));
+ hr_reg_write(mpt_entry, MPT_PA0_H, upper_32_bits(pages[0]));
+
+ mpt_entry->pa1_l = cpu_to_le32(lower_32_bits(pages[1]));
+ hr_reg_write(mpt_entry, MPT_PA1_H, upper_32_bits(pages[1]));
+ hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
+
+ return 0;
+}
+
+static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
+ void *mb_buf, struct hns_roce_mr *mr)
+{
+ struct hns_roce_v2_mpt_entry *mpt_entry;
+
+ mpt_entry = mb_buf;
+ memset(mpt_entry, 0, sizeof(*mpt_entry));
+
+ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID);
+ hr_reg_write(mpt_entry, MPT_PD, mr->pd);
+
+ hr_reg_write_bool(mpt_entry, MPT_BIND_EN,
+ mr->access & IB_ACCESS_MW_BIND);
+ hr_reg_write_bool(mpt_entry, MPT_ATOMIC_EN,
+ mr->access & IB_ACCESS_REMOTE_ATOMIC);
+ hr_reg_write_bool(mpt_entry, MPT_RR_EN,
+ mr->access & IB_ACCESS_REMOTE_READ);
+ hr_reg_write_bool(mpt_entry, MPT_RW_EN,
+ mr->access & IB_ACCESS_REMOTE_WRITE);
+ hr_reg_write_bool(mpt_entry, MPT_LW_EN,
+ mr->access & IB_ACCESS_LOCAL_WRITE);
+
+ mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size));
+ mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size));
+ mpt_entry->lkey = cpu_to_le32(mr->key);
+ mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova));
+ mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova));
+
+ if (mr->type != MR_TYPE_MR)
+ hr_reg_enable(mpt_entry, MPT_PA);
+
+ if (mr->type == MR_TYPE_DMA)
+ return 0;
+
+ if (mr->pbl_hop_num != HNS_ROCE_HOP_NUM_0)
+ hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, mr->pbl_hop_num);
+
+ hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
+ hr_reg_enable(mpt_entry, MPT_INNER_PA_VLD);
+
+ return set_mtpt_pbl(hr_dev, mpt_entry, mr);
+}
+
+static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mr *mr, int flags,
+ void *mb_buf)
+{
+ struct hns_roce_v2_mpt_entry *mpt_entry = mb_buf;
+ u32 mr_access_flags = mr->access;
+ int ret = 0;
+
+ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID);
+ hr_reg_write(mpt_entry, MPT_PD, mr->pd);
+
+ if (flags & IB_MR_REREG_ACCESS) {
+ hr_reg_write(mpt_entry, MPT_BIND_EN,
+ (mr_access_flags & IB_ACCESS_MW_BIND ? 1 : 0));
+ hr_reg_write(mpt_entry, MPT_ATOMIC_EN,
+ mr_access_flags & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
+ hr_reg_write(mpt_entry, MPT_RR_EN,
+ mr_access_flags & IB_ACCESS_REMOTE_READ ? 1 : 0);
+ hr_reg_write(mpt_entry, MPT_RW_EN,
+ mr_access_flags & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
+ hr_reg_write(mpt_entry, MPT_LW_EN,
+ mr_access_flags & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
+ }
+
+ if (flags & IB_MR_REREG_TRANS) {
+ mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova));
+ mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova));
+ mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size));
+ mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size));
+
+ ret = set_mtpt_pbl(hr_dev, mpt_entry, mr);
+ }
+
+ return ret;
+}
+
+static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev,
+ void *mb_buf, struct hns_roce_mr *mr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_v2_mpt_entry *mpt_entry;
+ dma_addr_t pbl_ba = 0;
+
+ mpt_entry = mb_buf;
+ memset(mpt_entry, 0, sizeof(*mpt_entry));
+
+ if (hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, NULL, 0, &pbl_ba) < 0) {
+ ibdev_err(ibdev, "failed to find frmr mtr.\n");
+ return -ENOBUFS;
+ }
+
+ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE);
+ hr_reg_write(mpt_entry, MPT_PD, mr->pd);
+
+ hr_reg_enable(mpt_entry, MPT_RA_EN);
+ hr_reg_enable(mpt_entry, MPT_R_INV_EN);
+
+ hr_reg_enable(mpt_entry, MPT_FRE);
+ hr_reg_clear(mpt_entry, MPT_MR_MW);
+ hr_reg_enable(mpt_entry, MPT_BPD);
+ hr_reg_clear(mpt_entry, MPT_PA);
+
+ hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, 1);
+ hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
+
+ mpt_entry->pbl_size = cpu_to_le32(mr->npages);
+
+ mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> 3));
+ hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3));
+
+ return 0;
+}
+
+static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
+{
+ struct hns_roce_v2_mpt_entry *mpt_entry;
+
+ mpt_entry = mb_buf;
+ memset(mpt_entry, 0, sizeof(*mpt_entry));
+
+ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE);
+ hr_reg_write(mpt_entry, MPT_PD, mw->pdn);
+
+ hr_reg_enable(mpt_entry, MPT_R_INV_EN);
+ hr_reg_enable(mpt_entry, MPT_LW_EN);
+
+ hr_reg_enable(mpt_entry, MPT_MR_MW);
+ hr_reg_enable(mpt_entry, MPT_BPD);
+ hr_reg_clear(mpt_entry, MPT_PA);
+ hr_reg_write(mpt_entry, MPT_BQP,
+ mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1);
+
+ mpt_entry->lkey = cpu_to_le32(mw->rkey);
+
+ hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM,
+ mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
+ mw->pbl_hop_num);
+ hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ,
+ mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
+ mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+
+ return 0;
+}
+
+static int free_mr_post_send_lp_wqe(struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ const struct ib_send_wr *bad_wr;
+ struct ib_rdma_wr rdma_wr = {};
+ struct ib_send_wr *send_wr;
+ int ret;
+
+ send_wr = &rdma_wr.wr;
+ send_wr->opcode = IB_WR_RDMA_WRITE;
+
+ ret = hns_roce_v2_post_send(&hr_qp->ibqp, send_wr, &bad_wr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to post wqe for free mr, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *wc);
+
+static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_wc wc[ARRAY_SIZE(free_mr->rsv_qp)];
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_qp *hr_qp;
+ unsigned long end;
+ int cqe_cnt = 0;
+ int npolled;
+ int ret;
+ int i;
+
+ /*
+ * If the device initialization is not complete or in the uninstall
+ * process, then there is no need to execute free mr.
+ */
+ if (priv->handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT ||
+ priv->handle->rinfo.instance_state == HNS_ROCE_STATE_INIT ||
+ hr_dev->state == HNS_ROCE_DEVICE_STATE_UNINIT)
+ return;
+
+ mutex_lock(&free_mr->mutex);
+
+ for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+ hr_qp = free_mr->rsv_qp[i];
+
+ ret = free_mr_post_send_lp_wqe(hr_qp);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n",
+ hr_qp->qpn, ret);
+ break;
+ }
+
+ cqe_cnt++;
+ }
+
+ end = msecs_to_jiffies(HNS_ROCE_V2_FREE_MR_TIMEOUT) + jiffies;
+ while (cqe_cnt) {
+ npolled = hns_roce_v2_poll_cq(&free_mr->rsv_cq->ib_cq, cqe_cnt, wc);
+ if (npolled < 0) {
+ ibdev_err(ibdev,
+ "failed to poll cqe for free mr, remain %d cqe.\n",
+ cqe_cnt);
+ goto out;
+ }
+
+ if (time_after(jiffies, end)) {
+ ibdev_err(ibdev,
+ "failed to poll cqe for free mr and timeout, remain %d cqe.\n",
+ cqe_cnt);
+ goto out;
+ }
+ cqe_cnt -= npolled;
+ }
+
+out:
+ mutex_unlock(&free_mr->mutex);
+}
+
+static void hns_roce_v2_dereg_mr(struct hns_roce_dev *hr_dev)
+{
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ free_mr_send_cmd_to_hw(hr_dev);
+}
+
+static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
+{
+ return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size);
+}
+
+static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, unsigned int n)
+{
+ struct hns_roce_v2_cqe *cqe = get_cqe_v2(hr_cq, n & hr_cq->ib_cq.cqe);
+
+ /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */
+ return (hr_reg_read(cqe, CQE_OWNER) ^ !!(n & hr_cq->cq_depth)) ? cqe :
+ NULL;
+}
+
+static inline void update_cq_db(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq)
+{
+ if (likely(hr_cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB)) {
+ *hr_cq->set_ci_db = hr_cq->cons_index & V2_CQ_DB_CONS_IDX_M;
+ } else {
+ struct hns_roce_v2_db cq_db = {};
+
+ hr_reg_write(&cq_db, DB_TAG, hr_cq->cqn);
+ hr_reg_write(&cq_db, DB_CMD, HNS_ROCE_V2_CQ_DB);
+ hr_reg_write(&cq_db, DB_CQ_CI, hr_cq->cons_index);
+ hr_reg_write(&cq_db, DB_CQ_CMD_SN, 1);
+
+ hns_roce_write64(hr_dev, (__le32 *)&cq_db, hr_cq->db_reg);
+ }
+}
+
+static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
+ struct hns_roce_srq *srq)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
+ struct hns_roce_v2_cqe *cqe, *dest;
+ u32 prod_index;
+ int nfreed = 0;
+ int wqe_index;
+ u8 owner_bit;
+
+ for (prod_index = hr_cq->cons_index; get_sw_cqe_v2(hr_cq, prod_index);
+ ++prod_index) {
+ if (prod_index > hr_cq->cons_index + hr_cq->ib_cq.cqe)
+ break;
+ }
+
+ /*
+ * Now backwards through the CQ, removing CQ entries
+ * that match our QP by overwriting them with next entries.
+ */
+ while ((int) --prod_index - (int) hr_cq->cons_index >= 0) {
+ cqe = get_cqe_v2(hr_cq, prod_index & hr_cq->ib_cq.cqe);
+ if (hr_reg_read(cqe, CQE_LCL_QPN) == qpn) {
+ if (srq && hr_reg_read(cqe, CQE_S_R)) {
+ wqe_index = hr_reg_read(cqe, CQE_WQE_IDX);
+ hns_roce_free_srq_wqe(srq, wqe_index);
+ }
+ ++nfreed;
+ } else if (nfreed) {
+ dest = get_cqe_v2(hr_cq, (prod_index + nfreed) &
+ hr_cq->ib_cq.cqe);
+ owner_bit = hr_reg_read(dest, CQE_OWNER);
+ memcpy(dest, cqe, hr_cq->cqe_size);
+ hr_reg_write(dest, CQE_OWNER, owner_bit);
+ }
+ }
+
+ if (nfreed) {
+ hr_cq->cons_index += nfreed;
+ update_cq_db(hr_dev, hr_cq);
+ }
+}
+
+static void hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
+ struct hns_roce_srq *srq)
+{
+ spin_lock_irq(&hr_cq->lock);
+ __hns_roce_v2_cq_clean(hr_cq, qpn, srq);
+ spin_unlock_irq(&hr_cq->lock);
+}
+
+static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq, void *mb_buf,
+ u64 *mtts, dma_addr_t dma_handle)
+{
+ struct hns_roce_v2_cq_context *cq_context;
+
+ cq_context = mb_buf;
+ memset(cq_context, 0, sizeof(*cq_context));
+
+ hr_reg_write(cq_context, CQC_CQ_ST, V2_CQ_STATE_VALID);
+ hr_reg_write(cq_context, CQC_ARM_ST, NO_ARMED);
+ hr_reg_write(cq_context, CQC_SHIFT, ilog2(hr_cq->cq_depth));
+ hr_reg_write(cq_context, CQC_CEQN, hr_cq->vector);
+ hr_reg_write(cq_context, CQC_CQN, hr_cq->cqn);
+
+ if (hr_cq->cqe_size == HNS_ROCE_V3_CQE_SIZE)
+ hr_reg_write(cq_context, CQC_CQE_SIZE, CQE_SIZE_64B);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH)
+ hr_reg_enable(cq_context, CQC_STASH);
+
+ hr_reg_write(cq_context, CQC_CQE_CUR_BLK_ADDR_L,
+ to_hr_hw_page_addr(mtts[0]));
+ hr_reg_write(cq_context, CQC_CQE_CUR_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts[0])));
+ hr_reg_write(cq_context, CQC_CQE_HOP_NUM, hr_dev->caps.cqe_hop_num ==
+ HNS_ROCE_HOP_NUM_0 ? 0 : hr_dev->caps.cqe_hop_num);
+ hr_reg_write(cq_context, CQC_CQE_NEX_BLK_ADDR_L,
+ to_hr_hw_page_addr(mtts[1]));
+ hr_reg_write(cq_context, CQC_CQE_NEX_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts[1])));
+ hr_reg_write(cq_context, CQC_CQE_BAR_PG_SZ,
+ to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(cq_context, CQC_CQE_BUF_PG_SZ,
+ to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.buf_pg_shift));
+ hr_reg_write(cq_context, CQC_CQE_BA_L, dma_handle >> 3);
+ hr_reg_write(cq_context, CQC_CQE_BA_H, (dma_handle >> (32 + 3)));
+ hr_reg_write_bool(cq_context, CQC_DB_RECORD_EN,
+ hr_cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB);
+ hr_reg_write(cq_context, CQC_CQE_DB_RECORD_ADDR_L,
+ ((u32)hr_cq->db.dma) >> 1);
+ hr_reg_write(cq_context, CQC_CQE_DB_RECORD_ADDR_H,
+ hr_cq->db.dma >> 32);
+ hr_reg_write(cq_context, CQC_CQ_MAX_CNT,
+ HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM);
+ hr_reg_write(cq_context, CQC_CQ_PERIOD,
+ HNS_ROCE_V2_CQ_DEFAULT_INTERVAL);
+}
+
+static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
+ enum ib_cq_notify_flags flags)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
+ struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
+ struct hns_roce_v2_db cq_db = {};
+ u32 notify_flag;
+
+ /*
+ * flags = 0, then notify_flag : next
+ * flags = 1, then notify flag : solocited
+ */
+ notify_flag = (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
+ V2_CQ_DB_REQ_NOT : V2_CQ_DB_REQ_NOT_SOL;
+
+ hr_reg_write(&cq_db, DB_TAG, hr_cq->cqn);
+ hr_reg_write(&cq_db, DB_CMD, HNS_ROCE_V2_CQ_DB_NOTIFY);
+ hr_reg_write(&cq_db, DB_CQ_CI, hr_cq->cons_index);
+ hr_reg_write(&cq_db, DB_CQ_CMD_SN, hr_cq->arm_sn);
+ hr_reg_write(&cq_db, DB_CQ_NOTIFY, notify_flag);
+
+ hns_roce_write64(hr_dev, (__le32 *)&cq_db, hr_cq->db_reg);
+
+ return 0;
+}
+
+static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
+ struct hns_roce_qp *qp,
+ struct ib_wc *wc)
+{
+ struct hns_roce_rinl_sge *sge_list;
+ u32 wr_num, wr_cnt, sge_num;
+ u32 sge_cnt, data_len, size;
+ void *wqe_buf;
+
+ wr_num = hr_reg_read(cqe, CQE_WQE_IDX);
+ wr_cnt = wr_num & (qp->rq.wqe_cnt - 1);
+
+ sge_list = qp->rq_inl_buf.wqe_list[wr_cnt].sg_list;
+ sge_num = qp->rq_inl_buf.wqe_list[wr_cnt].sge_cnt;
+ wqe_buf = hns_roce_get_recv_wqe(qp, wr_cnt);
+ data_len = wc->byte_len;
+
+ for (sge_cnt = 0; (sge_cnt < sge_num) && (data_len); sge_cnt++) {
+ size = min(sge_list[sge_cnt].len, data_len);
+ memcpy((void *)sge_list[sge_cnt].addr, wqe_buf, size);
+
+ data_len -= size;
+ wqe_buf += size;
+ }
+
+ if (unlikely(data_len)) {
+ wc->status = IB_WC_LOC_LEN_ERR;
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+static int sw_comp(struct hns_roce_qp *hr_qp, struct hns_roce_wq *wq,
+ int num_entries, struct ib_wc *wc)
+{
+ unsigned int left;
+ int npolled = 0;
+
+ left = wq->head - wq->tail;
+ if (left == 0)
+ return 0;
+
+ left = min_t(unsigned int, (unsigned int)num_entries, left);
+ while (npolled < left) {
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->vendor_err = 0;
+ wc->qp = &hr_qp->ibqp;
+
+ wq->tail++;
+ wc++;
+ npolled++;
+ }
+
+ return npolled;
+}
+
+static int hns_roce_v2_sw_poll_cq(struct hns_roce_cq *hr_cq, int num_entries,
+ struct ib_wc *wc)
+{
+ struct hns_roce_qp *hr_qp;
+ int npolled = 0;
+
+ list_for_each_entry(hr_qp, &hr_cq->sq_list, sq_node) {
+ npolled += sw_comp(hr_qp, &hr_qp->sq,
+ num_entries - npolled, wc + npolled);
+ if (npolled >= num_entries)
+ goto out;
+ }
+
+ list_for_each_entry(hr_qp, &hr_cq->rq_list, rq_node) {
+ npolled += sw_comp(hr_qp, &hr_qp->rq,
+ num_entries - npolled, wc + npolled);
+ if (npolled >= num_entries)
+ goto out;
+ }
+
+out:
+ return npolled;
+}
+
+static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
+ struct hns_roce_cq *cq, struct hns_roce_v2_cqe *cqe,
+ struct ib_wc *wc)
+{
+ static const struct {
+ u32 cqe_status;
+ enum ib_wc_status wc_status;
+ } map[] = {
+ { HNS_ROCE_CQE_V2_SUCCESS, IB_WC_SUCCESS },
+ { HNS_ROCE_CQE_V2_LOCAL_LENGTH_ERR, IB_WC_LOC_LEN_ERR },
+ { HNS_ROCE_CQE_V2_LOCAL_QP_OP_ERR, IB_WC_LOC_QP_OP_ERR },
+ { HNS_ROCE_CQE_V2_LOCAL_PROT_ERR, IB_WC_LOC_PROT_ERR },
+ { HNS_ROCE_CQE_V2_WR_FLUSH_ERR, IB_WC_WR_FLUSH_ERR },
+ { HNS_ROCE_CQE_V2_MW_BIND_ERR, IB_WC_MW_BIND_ERR },
+ { HNS_ROCE_CQE_V2_BAD_RESP_ERR, IB_WC_BAD_RESP_ERR },
+ { HNS_ROCE_CQE_V2_LOCAL_ACCESS_ERR, IB_WC_LOC_ACCESS_ERR },
+ { HNS_ROCE_CQE_V2_REMOTE_INVAL_REQ_ERR, IB_WC_REM_INV_REQ_ERR },
+ { HNS_ROCE_CQE_V2_REMOTE_ACCESS_ERR, IB_WC_REM_ACCESS_ERR },
+ { HNS_ROCE_CQE_V2_REMOTE_OP_ERR, IB_WC_REM_OP_ERR },
+ { HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR,
+ IB_WC_RETRY_EXC_ERR },
+ { HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR, IB_WC_RNR_RETRY_EXC_ERR },
+ { HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR, IB_WC_REM_ABORT_ERR },
+ { HNS_ROCE_CQE_V2_GENERAL_ERR, IB_WC_GENERAL_ERR}
+ };
+
+ u32 cqe_status = hr_reg_read(cqe, CQE_STATUS);
+ int i;
+
+ wc->status = IB_WC_GENERAL_ERR;
+ for (i = 0; i < ARRAY_SIZE(map); i++)
+ if (cqe_status == map[i].cqe_status) {
+ wc->status = map[i].wc_status;
+ break;
+ }
+
+ if (likely(wc->status == IB_WC_SUCCESS ||
+ wc->status == IB_WC_WR_FLUSH_ERR))
+ return;
+
+ ibdev_err(&hr_dev->ib_dev, "error cqe status 0x%x:\n", cqe_status);
+ print_hex_dump(KERN_ERR, "", DUMP_PREFIX_NONE, 16, 4, cqe,
+ cq->cqe_size, false);
+ wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS);
+
+ /*
+ * For hns ROCEE, GENERAL_ERR is an error type that is not defined in
+ * the standard protocol, the driver must ignore it and needn't to set
+ * the QP to an error state.
+ */
+ if (cqe_status == HNS_ROCE_CQE_V2_GENERAL_ERR)
+ return;
+
+ flush_cqe(hr_dev, qp);
+}
+
+static int get_cur_qp(struct hns_roce_cq *hr_cq, struct hns_roce_v2_cqe *cqe,
+ struct hns_roce_qp **cur_qp)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
+ struct hns_roce_qp *hr_qp = *cur_qp;
+ u32 qpn;
+
+ qpn = hr_reg_read(cqe, CQE_LCL_QPN);
+
+ if (!hr_qp || qpn != hr_qp->qpn) {
+ hr_qp = __hns_roce_qp_lookup(hr_dev, qpn);
+ if (unlikely(!hr_qp)) {
+ ibdev_err(&hr_dev->ib_dev,
+ "CQ %06lx with entry for unknown QPN %06x\n",
+ hr_cq->cqn, qpn);
+ return -EINVAL;
+ }
+ *cur_qp = hr_qp;
+ }
+
+ return 0;
+}
+
+/*
+ * mapped-value = 1 + real-value
+ * The ib wc opcode's real value is start from 0, In order to distinguish
+ * between initialized and uninitialized map values, we plus 1 to the actual
+ * value when defining the mapping, so that the validity can be identified by
+ * checking whether the mapped value is greater than 0.
+ */
+#define HR_WC_OP_MAP(hr_key, ib_key) \
+ [HNS_ROCE_V2_WQE_OP_ ## hr_key] = 1 + IB_WC_ ## ib_key
+
+static const u32 wc_send_op_map[] = {
+ HR_WC_OP_MAP(SEND, SEND),
+ HR_WC_OP_MAP(SEND_WITH_INV, SEND),
+ HR_WC_OP_MAP(SEND_WITH_IMM, SEND),
+ HR_WC_OP_MAP(RDMA_READ, RDMA_READ),
+ HR_WC_OP_MAP(RDMA_WRITE, RDMA_WRITE),
+ HR_WC_OP_MAP(RDMA_WRITE_WITH_IMM, RDMA_WRITE),
+ HR_WC_OP_MAP(ATOM_CMP_AND_SWAP, COMP_SWAP),
+ HR_WC_OP_MAP(ATOM_FETCH_AND_ADD, FETCH_ADD),
+ HR_WC_OP_MAP(ATOM_MSK_CMP_AND_SWAP, MASKED_COMP_SWAP),
+ HR_WC_OP_MAP(ATOM_MSK_FETCH_AND_ADD, MASKED_FETCH_ADD),
+ HR_WC_OP_MAP(FAST_REG_PMR, REG_MR),
+ HR_WC_OP_MAP(BIND_MW, REG_MR),
+};
+
+static int to_ib_wc_send_op(u32 hr_opcode)
+{
+ if (hr_opcode >= ARRAY_SIZE(wc_send_op_map))
+ return -EINVAL;
+
+ return wc_send_op_map[hr_opcode] ? wc_send_op_map[hr_opcode] - 1 :
+ -EINVAL;
+}
+
+static const u32 wc_recv_op_map[] = {
+ HR_WC_OP_MAP(RDMA_WRITE_WITH_IMM, WITH_IMM),
+ HR_WC_OP_MAP(SEND, RECV),
+ HR_WC_OP_MAP(SEND_WITH_IMM, WITH_IMM),
+ HR_WC_OP_MAP(SEND_WITH_INV, RECV),
+};
+
+static int to_ib_wc_recv_op(u32 hr_opcode)
+{
+ if (hr_opcode >= ARRAY_SIZE(wc_recv_op_map))
+ return -EINVAL;
+
+ return wc_recv_op_map[hr_opcode] ? wc_recv_op_map[hr_opcode] - 1 :
+ -EINVAL;
+}
+
+static void fill_send_wc(struct ib_wc *wc, struct hns_roce_v2_cqe *cqe)
+{
+ u32 hr_opcode;
+ int ib_opcode;
+
+ wc->wc_flags = 0;
+
+ hr_opcode = hr_reg_read(cqe, CQE_OPCODE);
+ switch (hr_opcode) {
+ case HNS_ROCE_V2_WQE_OP_RDMA_READ:
+ wc->byte_len = le32_to_cpu(cqe->byte_cnt);
+ break;
+ case HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM:
+ case HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM:
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ break;
+ case HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP:
+ case HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD:
+ case HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP:
+ case HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD:
+ wc->byte_len = 8;
+ break;
+ default:
+ break;
+ }
+
+ ib_opcode = to_ib_wc_send_op(hr_opcode);
+ if (ib_opcode < 0)
+ wc->status = IB_WC_GENERAL_ERR;
+ else
+ wc->opcode = ib_opcode;
+}
+
+static inline bool is_rq_inl_enabled(struct ib_wc *wc, u32 hr_opcode,
+ struct hns_roce_v2_cqe *cqe)
+{
+ return wc->qp->qp_type != IB_QPT_UD && wc->qp->qp_type != IB_QPT_GSI &&
+ (hr_opcode == HNS_ROCE_V2_OPCODE_SEND ||
+ hr_opcode == HNS_ROCE_V2_OPCODE_SEND_WITH_IMM ||
+ hr_opcode == HNS_ROCE_V2_OPCODE_SEND_WITH_INV) &&
+ hr_reg_read(cqe, CQE_RQ_INLINE);
+}
+
+static int fill_recv_wc(struct ib_wc *wc, struct hns_roce_v2_cqe *cqe)
+{
+ struct hns_roce_qp *qp = to_hr_qp(wc->qp);
+ u32 hr_opcode;
+ int ib_opcode;
+ int ret;
+
+ wc->byte_len = le32_to_cpu(cqe->byte_cnt);
+
+ hr_opcode = hr_reg_read(cqe, CQE_OPCODE);
+ switch (hr_opcode) {
+ case HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM:
+ case HNS_ROCE_V2_OPCODE_SEND_WITH_IMM:
+ wc->wc_flags = IB_WC_WITH_IMM;
+ wc->ex.imm_data = cpu_to_be32(le32_to_cpu(cqe->immtdata));
+ break;
+ case HNS_ROCE_V2_OPCODE_SEND_WITH_INV:
+ wc->wc_flags = IB_WC_WITH_INVALIDATE;
+ wc->ex.invalidate_rkey = le32_to_cpu(cqe->rkey);
+ break;
+ default:
+ wc->wc_flags = 0;
+ }
+
+ ib_opcode = to_ib_wc_recv_op(hr_opcode);
+ if (ib_opcode < 0)
+ wc->status = IB_WC_GENERAL_ERR;
+ else
+ wc->opcode = ib_opcode;
+
+ if (is_rq_inl_enabled(wc, hr_opcode, cqe)) {
+ ret = hns_roce_handle_recv_inl_wqe(cqe, qp, wc);
+ if (unlikely(ret))
+ return ret;
+ }
+
+ wc->sl = hr_reg_read(cqe, CQE_SL);
+ wc->src_qp = hr_reg_read(cqe, CQE_RMT_QPN);
+ wc->slid = 0;
+ wc->wc_flags |= hr_reg_read(cqe, CQE_GRH) ? IB_WC_GRH : 0;
+ wc->port_num = hr_reg_read(cqe, CQE_PORTN);
+ wc->pkey_index = 0;
+
+ if (hr_reg_read(cqe, CQE_VID_VLD)) {
+ wc->vlan_id = hr_reg_read(cqe, CQE_VID);
+ wc->wc_flags |= IB_WC_WITH_VLAN;
+ } else {
+ wc->vlan_id = 0xffff;
+ }
+
+ wc->network_hdr_type = hr_reg_read(cqe, CQE_PORT_TYPE);
+
+ return 0;
+}
+
+static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
+ struct hns_roce_qp **cur_qp, struct ib_wc *wc)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
+ struct hns_roce_qp *qp = *cur_qp;
+ struct hns_roce_srq *srq = NULL;
+ struct hns_roce_v2_cqe *cqe;
+ struct hns_roce_wq *wq;
+ int is_send;
+ u16 wqe_idx;
+ int ret;
+
+ cqe = get_sw_cqe_v2(hr_cq, hr_cq->cons_index);
+ if (!cqe)
+ return -EAGAIN;
+
+ ++hr_cq->cons_index;
+ /* Memory barrier */
+ rmb();
+
+ ret = get_cur_qp(hr_cq, cqe, &qp);
+ if (ret)
+ return ret;
+
+ wc->qp = &qp->ibqp;
+ wc->vendor_err = 0;
+
+ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX);
+
+ is_send = !hr_reg_read(cqe, CQE_S_R);
+ if (is_send) {
+ wq = &qp->sq;
+
+ /* If sg_signal_bit is set, tail pointer will be updated to
+ * the WQE corresponding to the current CQE.
+ */
+ if (qp->sq_signal_bits)
+ wq->tail += (wqe_idx - (u16)wq->tail) &
+ (wq->wqe_cnt - 1);
+
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+
+ fill_send_wc(wc, cqe);
+ } else {
+ if (qp->ibqp.srq) {
+ srq = to_hr_srq(qp->ibqp.srq);
+ wc->wr_id = srq->wrid[wqe_idx];
+ hns_roce_free_srq_wqe(srq, wqe_idx);
+ } else {
+ wq = &qp->rq;
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+ }
+
+ ret = fill_recv_wc(wc, cqe);
+ }
+
+ get_cqe_status(hr_dev, qp, hr_cq, cqe, wc);
+ if (unlikely(wc->status != IB_WC_SUCCESS))
+ return 0;
+
+ return ret;
+}
+
+static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *wc)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
+ struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
+ struct hns_roce_qp *cur_qp = NULL;
+ unsigned long flags;
+ int npolled;
+
+ spin_lock_irqsave(&hr_cq->lock, flags);
+
+ /*
+ * When the device starts to reset, the state is RST_DOWN. At this time,
+ * there may still be some valid CQEs in the hardware that are not
+ * polled. Therefore, it is not allowed to switch to the software mode
+ * immediately. When the state changes to UNINIT, CQE no longer exists
+ * in the hardware, and then switch to software mode.
+ */
+ if (hr_dev->state == HNS_ROCE_DEVICE_STATE_UNINIT) {
+ npolled = hns_roce_v2_sw_poll_cq(hr_cq, num_entries, wc);
+ goto out;
+ }
+
+ for (npolled = 0; npolled < num_entries; ++npolled) {
+ if (hns_roce_v2_poll_one(hr_cq, &cur_qp, wc + npolled))
+ break;
+ }
+
+ if (npolled)
+ update_cq_db(hr_dev, hr_cq);
+
+out:
+ spin_unlock_irqrestore(&hr_cq->lock, flags);
+
+ return npolled;
+}
+
+static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type,
+ u32 step_idx, u8 *mbox_cmd)
+{
+ u8 cmd;
+
+ switch (type) {
+ case HEM_TYPE_QPC:
+ cmd = HNS_ROCE_CMD_WRITE_QPC_BT0;
+ break;
+ case HEM_TYPE_MTPT:
+ cmd = HNS_ROCE_CMD_WRITE_MPT_BT0;
+ break;
+ case HEM_TYPE_CQC:
+ cmd = HNS_ROCE_CMD_WRITE_CQC_BT0;
+ break;
+ case HEM_TYPE_SRQC:
+ cmd = HNS_ROCE_CMD_WRITE_SRQC_BT0;
+ break;
+ case HEM_TYPE_SCCC:
+ cmd = HNS_ROCE_CMD_WRITE_SCCC_BT0;
+ break;
+ case HEM_TYPE_QPC_TIMER:
+ cmd = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0;
+ break;
+ case HEM_TYPE_CQC_TIMER:
+ cmd = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0;
+ break;
+ default:
+ dev_warn(hr_dev->dev, "failed to check hem type %u.\n", type);
+ return -EINVAL;
+ }
+
+ *mbox_cmd = cmd + step_idx;
+
+ return 0;
+}
+
+static int config_gmv_ba_to_hw(struct hns_roce_dev *hr_dev, unsigned long obj,
+ dma_addr_t base_addr)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ u32 idx = obj / (HNS_HW_PAGE_SIZE / hr_dev->caps.gmv_entry_sz);
+ u64 addr = to_hr_hw_page_addr(base_addr);
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, false);
+
+ hr_reg_write(req, CFG_GMV_BT_BA_L, lower_32_bits(addr));
+ hr_reg_write(req, CFG_GMV_BT_BA_H, upper_32_bits(addr));
+ hr_reg_write(req, CFG_GMV_BT_IDX, idx);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj,
+ dma_addr_t base_addr, u32 hem_type, u32 step_idx)
+{
+ int ret;
+ u8 cmd;
+
+ if (unlikely(hem_type == HEM_TYPE_GMV))
+ return config_gmv_ba_to_hw(hr_dev, obj, base_addr);
+
+ if (unlikely(hem_type == HEM_TYPE_SCCC && step_idx))
+ return 0;
+
+ ret = get_op_for_set_hem(hr_dev, hem_type, step_idx, &cmd);
+ if (ret < 0)
+ return ret;
+
+ return config_hem_ba_to_hw(hr_dev, base_addr, cmd, obj);
+}
+
+static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, int obj,
+ u32 step_idx)
+{
+ struct hns_roce_hem_iter iter;
+ struct hns_roce_hem_mhop mhop;
+ struct hns_roce_hem *hem;
+ unsigned long mhop_obj = obj;
+ int i, j, k;
+ int ret = 0;
+ u64 hem_idx = 0;
+ u64 l1_idx = 0;
+ u64 bt_ba = 0;
+ u32 chunk_ba_num;
+ u32 hop_num;
+
+ if (!hns_roce_check_whether_mhop(hr_dev, table->type))
+ return 0;
+
+ hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop);
+ i = mhop.l0_idx;
+ j = mhop.l1_idx;
+ k = mhop.l2_idx;
+ hop_num = mhop.hop_num;
+ chunk_ba_num = mhop.bt_chunk_size / 8;
+
+ if (hop_num == 2) {
+ hem_idx = i * chunk_ba_num * chunk_ba_num + j * chunk_ba_num +
+ k;
+ l1_idx = i * chunk_ba_num + j;
+ } else if (hop_num == 1) {
+ hem_idx = i * chunk_ba_num + j;
+ } else if (hop_num == HNS_ROCE_HOP_NUM_0) {
+ hem_idx = i;
+ }
+
+ if (table->type == HEM_TYPE_SCCC)
+ obj = mhop.l0_idx;
+
+ if (check_whether_last_step(hop_num, step_idx)) {
+ hem = table->hem[hem_idx];
+ for (hns_roce_hem_first(hem, &iter);
+ !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) {
+ bt_ba = hns_roce_hem_addr(&iter);
+ ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type,
+ step_idx);
+ }
+ } else {
+ if (step_idx == 0)
+ bt_ba = table->bt_l0_dma_addr[i];
+ else if (step_idx == 1 && hop_num == 2)
+ bt_ba = table->bt_l1_dma_addr[l1_idx];
+
+ ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type, step_idx);
+ }
+
+ return ret;
+}
+
+static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table,
+ int tag, u32 step_idx)
+{
+ struct hns_roce_cmd_mailbox *mailbox;
+ struct device *dev = hr_dev->dev;
+ u8 cmd = 0xff;
+ int ret;
+
+ if (!hns_roce_check_whether_mhop(hr_dev, table->type))
+ return 0;
+
+ switch (table->type) {
+ case HEM_TYPE_QPC:
+ cmd = HNS_ROCE_CMD_DESTROY_QPC_BT0;
+ break;
+ case HEM_TYPE_MTPT:
+ cmd = HNS_ROCE_CMD_DESTROY_MPT_BT0;
+ break;
+ case HEM_TYPE_CQC:
+ cmd = HNS_ROCE_CMD_DESTROY_CQC_BT0;
+ break;
+ case HEM_TYPE_SRQC:
+ cmd = HNS_ROCE_CMD_DESTROY_SRQC_BT0;
+ break;
+ case HEM_TYPE_SCCC:
+ case HEM_TYPE_QPC_TIMER:
+ case HEM_TYPE_CQC_TIMER:
+ case HEM_TYPE_GMV:
+ return 0;
+ default:
+ dev_warn(dev, "table %u not to be destroyed by mailbox!\n",
+ table->type);
+ return 0;
+ }
+
+ cmd += step_idx;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, cmd, tag);
+
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask,
+ struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_cmd_mailbox *mailbox;
+ int qpc_size;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ /* The qpc size of HIP08 is only 256B, which is half of HIP09 */
+ qpc_size = hr_dev->caps.qpc_sz;
+ memcpy(mailbox->buf, context, qpc_size);
+ memcpy(mailbox->buf + qpc_size, qpc_mask, qpc_size);
+
+ ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0,
+ HNS_ROCE_CMD_MODIFY_QPC, hr_qp->qpn);
+
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
+static void set_access_flags(struct hns_roce_qp *hr_qp,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask,
+ const struct ib_qp_attr *attr, int attr_mask)
+{
+ u8 dest_rd_atomic;
+ u32 access_flags;
+
+ dest_rd_atomic = (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) ?
+ attr->max_dest_rd_atomic : hr_qp->resp_depth;
+
+ access_flags = (attr_mask & IB_QP_ACCESS_FLAGS) ?
+ attr->qp_access_flags : hr_qp->atomic_rd_en;
+
+ if (!dest_rd_atomic)
+ access_flags &= IB_ACCESS_REMOTE_WRITE;
+
+ hr_reg_write_bool(context, QPC_RRE,
+ access_flags & IB_ACCESS_REMOTE_READ);
+ hr_reg_clear(qpc_mask, QPC_RRE);
+
+ hr_reg_write_bool(context, QPC_RWE,
+ access_flags & IB_ACCESS_REMOTE_WRITE);
+ hr_reg_clear(qpc_mask, QPC_RWE);
+
+ hr_reg_write_bool(context, QPC_ATE,
+ access_flags & IB_ACCESS_REMOTE_ATOMIC);
+ hr_reg_clear(qpc_mask, QPC_ATE);
+ hr_reg_write_bool(context, QPC_EXT_ATE,
+ access_flags & IB_ACCESS_REMOTE_ATOMIC);
+ hr_reg_clear(qpc_mask, QPC_EXT_ATE);
+}
+
+static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ hr_reg_write(context, QPC_SGE_SHIFT,
+ to_hr_hem_entries_shift(hr_qp->sge.sge_cnt,
+ hr_qp->sge.sge_shift));
+
+ hr_reg_write(context, QPC_SQ_SHIFT, ilog2(hr_qp->sq.wqe_cnt));
+
+ hr_reg_write(context, QPC_RQ_SHIFT, ilog2(hr_qp->rq.wqe_cnt));
+}
+
+static inline int get_cqn(struct ib_cq *ib_cq)
+{
+ return ib_cq ? to_hr_cq(ib_cq)->cqn : 0;
+}
+
+static inline int get_pdn(struct ib_pd *ib_pd)
+{
+ return ib_pd ? to_hr_pd(ib_pd)->pdn : 0;
+}
+
+static void modify_qp_reset_to_init(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+
+ /*
+ * In v2 engine, software pass context and context mask to hardware
+ * when modifying qp. If software need modify some fields in context,
+ * we should set all bits of the relevant fields in context mask to
+ * 0 at the same time, else set them to 0x1.
+ */
+ hr_reg_write(context, QPC_TST, to_hr_qp_type(ibqp->qp_type));
+
+ hr_reg_write(context, QPC_PD, get_pdn(ibqp->pd));
+
+ hr_reg_write(context, QPC_RQWS, ilog2(hr_qp->rq.max_gs));
+
+ set_qpc_wqe_cnt(hr_qp, context, qpc_mask);
+
+ /* No VLAN need to set 0xFFF */
+ hr_reg_write(context, QPC_VLAN_ID, 0xfff);
+
+ if (ibqp->qp_type == IB_QPT_XRC_TGT) {
+ context->qkey_xrcd = cpu_to_le32(hr_qp->xrcdn);
+
+ hr_reg_enable(context, QPC_XRC_QP_TYPE);
+ }
+
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
+ hr_reg_enable(context, QPC_RQ_RECORD_EN);
+
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB)
+ hr_reg_enable(context, QPC_OWNER_MODE);
+
+ hr_reg_write(context, QPC_RQ_DB_RECORD_ADDR_L,
+ lower_32_bits(hr_qp->rdb.dma) >> 1);
+ hr_reg_write(context, QPC_RQ_DB_RECORD_ADDR_H,
+ upper_32_bits(hr_qp->rdb.dma));
+
+ if (ibqp->qp_type != IB_QPT_UD && ibqp->qp_type != IB_QPT_GSI)
+ hr_reg_write_bool(context, QPC_RQIE,
+ hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE);
+
+ hr_reg_write(context, QPC_RX_CQN, get_cqn(ibqp->recv_cq));
+
+ if (ibqp->srq) {
+ hr_reg_enable(context, QPC_SRQ_EN);
+ hr_reg_write(context, QPC_SRQN, to_hr_srq(ibqp->srq)->srqn);
+ }
+
+ hr_reg_enable(context, QPC_FRE);
+
+ hr_reg_write(context, QPC_TX_CQN, get_cqn(ibqp->send_cq));
+
+ if (hr_dev->caps.qpc_sz < HNS_ROCE_V3_QPC_SZ)
+ return;
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH)
+ hr_reg_enable(&context->ext, QPCEX_STASH);
+}
+
+static void modify_qp_init_to_init(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ /*
+ * In v2 engine, software pass context and context mask to hardware
+ * when modifying qp. If software need modify some fields in context,
+ * we should set all bits of the relevant fields in context mask to
+ * 0 at the same time, else set them to 0x1.
+ */
+ hr_reg_write(context, QPC_TST, to_hr_qp_type(ibqp->qp_type));
+ hr_reg_clear(qpc_mask, QPC_TST);
+
+ hr_reg_write(context, QPC_PD, get_pdn(ibqp->pd));
+ hr_reg_clear(qpc_mask, QPC_PD);
+
+ hr_reg_write(context, QPC_RX_CQN, get_cqn(ibqp->recv_cq));
+ hr_reg_clear(qpc_mask, QPC_RX_CQN);
+
+ hr_reg_write(context, QPC_TX_CQN, get_cqn(ibqp->send_cq));
+ hr_reg_clear(qpc_mask, QPC_TX_CQN);
+
+ if (ibqp->srq) {
+ hr_reg_enable(context, QPC_SRQ_EN);
+ hr_reg_clear(qpc_mask, QPC_SRQ_EN);
+ hr_reg_write(context, QPC_SRQN, to_hr_srq(ibqp->srq)->srqn);
+ hr_reg_clear(qpc_mask, QPC_SRQN);
+ }
+}
+
+static int config_qp_rq_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ u64 mtts[MTT_MIN_COUNT] = { 0 };
+ u64 wqe_sge_ba;
+ int count;
+
+ /* Search qp buf's mtts */
+ count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, mtts,
+ MTT_MIN_COUNT, &wqe_sge_ba);
+ if (hr_qp->rq.wqe_cnt && count < 1) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to find RQ WQE, QPN = 0x%lx.\n", hr_qp->qpn);
+ return -EINVAL;
+ }
+
+ context->wqe_sge_ba = cpu_to_le32(wqe_sge_ba >> 3);
+ qpc_mask->wqe_sge_ba = 0;
+
+ /*
+ * In v2 engine, software pass context and context mask to hardware
+ * when modifying qp. If software need modify some fields in context,
+ * we should set all bits of the relevant fields in context mask to
+ * 0 at the same time, else set them to 0x1.
+ */
+ hr_reg_write(context, QPC_WQE_SGE_BA_H, wqe_sge_ba >> (32 + 3));
+ hr_reg_clear(qpc_mask, QPC_WQE_SGE_BA_H);
+
+ hr_reg_write(context, QPC_SQ_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.wqe_sq_hop_num,
+ hr_qp->sq.wqe_cnt));
+ hr_reg_clear(qpc_mask, QPC_SQ_HOP_NUM);
+
+ hr_reg_write(context, QPC_SGE_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.wqe_sge_hop_num,
+ hr_qp->sge.sge_cnt));
+ hr_reg_clear(qpc_mask, QPC_SGE_HOP_NUM);
+
+ hr_reg_write(context, QPC_RQ_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.wqe_rq_hop_num,
+ hr_qp->rq.wqe_cnt));
+
+ hr_reg_clear(qpc_mask, QPC_RQ_HOP_NUM);
+
+ hr_reg_write(context, QPC_WQE_SGE_BA_PG_SZ,
+ to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.ba_pg_shift));
+ hr_reg_clear(qpc_mask, QPC_WQE_SGE_BA_PG_SZ);
+
+ hr_reg_write(context, QPC_WQE_SGE_BUF_PG_SZ,
+ to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.buf_pg_shift));
+ hr_reg_clear(qpc_mask, QPC_WQE_SGE_BUF_PG_SZ);
+
+ context->rq_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0]));
+ qpc_mask->rq_cur_blk_addr = 0;
+
+ hr_reg_write(context, QPC_RQ_CUR_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts[0])));
+ hr_reg_clear(qpc_mask, QPC_RQ_CUR_BLK_ADDR_H);
+
+ context->rq_nxt_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[1]));
+ qpc_mask->rq_nxt_blk_addr = 0;
+
+ hr_reg_write(context, QPC_RQ_NXT_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts[1])));
+ hr_reg_clear(qpc_mask, QPC_RQ_NXT_BLK_ADDR_H);
+
+ return 0;
+}
+
+static int config_qp_sq_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u64 sge_cur_blk = 0;
+ u64 sq_cur_blk = 0;
+ int count;
+
+ /* search qp buf's mtts */
+ count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, &sq_cur_blk, 1, NULL);
+ if (count < 1) {
+ ibdev_err(ibdev, "failed to find QP(0x%lx) SQ buf.\n",
+ hr_qp->qpn);
+ return -EINVAL;
+ }
+ if (hr_qp->sge.sge_cnt > 0) {
+ count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
+ hr_qp->sge.offset,
+ &sge_cur_blk, 1, NULL);
+ if (count < 1) {
+ ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf.\n",
+ hr_qp->qpn);
+ return -EINVAL;
+ }
+ }
+
+ /*
+ * In v2 engine, software pass context and context mask to hardware
+ * when modifying qp. If software need modify some fields in context,
+ * we should set all bits of the relevant fields in context mask to
+ * 0 at the same time, else set them to 0x1.
+ */
+ hr_reg_write(context, QPC_SQ_CUR_BLK_ADDR_L,
+ lower_32_bits(to_hr_hw_page_addr(sq_cur_blk)));
+ hr_reg_write(context, QPC_SQ_CUR_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(sq_cur_blk)));
+ hr_reg_clear(qpc_mask, QPC_SQ_CUR_BLK_ADDR_L);
+ hr_reg_clear(qpc_mask, QPC_SQ_CUR_BLK_ADDR_H);
+
+ hr_reg_write(context, QPC_SQ_CUR_SGE_BLK_ADDR_L,
+ lower_32_bits(to_hr_hw_page_addr(sge_cur_blk)));
+ hr_reg_write(context, QPC_SQ_CUR_SGE_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(sge_cur_blk)));
+ hr_reg_clear(qpc_mask, QPC_SQ_CUR_SGE_BLK_ADDR_L);
+ hr_reg_clear(qpc_mask, QPC_SQ_CUR_SGE_BLK_ADDR_H);
+
+ hr_reg_write(context, QPC_RX_SQ_CUR_BLK_ADDR_L,
+ lower_32_bits(to_hr_hw_page_addr(sq_cur_blk)));
+ hr_reg_write(context, QPC_RX_SQ_CUR_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(sq_cur_blk)));
+ hr_reg_clear(qpc_mask, QPC_RX_SQ_CUR_BLK_ADDR_L);
+ hr_reg_clear(qpc_mask, QPC_RX_SQ_CUR_BLK_ADDR_H);
+
+ return 0;
+}
+
+static inline enum ib_mtu get_mtu(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr)
+{
+ if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD)
+ return IB_MTU_4096;
+
+ return attr->path_mtu;
+}
+
+static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr, int attr_mask,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ dma_addr_t trrl_ba;
+ dma_addr_t irrl_ba;
+ enum ib_mtu ib_mtu;
+ const u8 *smac;
+ u8 lp_pktn_ini;
+ u64 *mtts;
+ u8 *dmac;
+ u32 port;
+ int mtu;
+ int ret;
+
+ ret = config_qp_rq_buf(hr_dev, hr_qp, context, qpc_mask);
+ if (ret) {
+ ibdev_err(ibdev, "failed to config rq buf, ret = %d.\n", ret);
+ return ret;
+ }
+
+ /* Search IRRL's mtts */
+ mtts = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table,
+ hr_qp->qpn, &irrl_ba);
+ if (!mtts) {
+ ibdev_err(ibdev, "failed to find qp irrl_table.\n");
+ return -EINVAL;
+ }
+
+ /* Search TRRL's mtts */
+ mtts = hns_roce_table_find(hr_dev, &hr_dev->qp_table.trrl_table,
+ hr_qp->qpn, &trrl_ba);
+ if (!mtts) {
+ ibdev_err(ibdev, "failed to find qp trrl_table.\n");
+ return -EINVAL;
+ }
+
+ if (attr_mask & IB_QP_ALT_PATH) {
+ ibdev_err(ibdev, "INIT2RTR attr_mask (0x%x) error.\n",
+ attr_mask);
+ return -EINVAL;
+ }
+
+ hr_reg_write(context, QPC_TRRL_BA_L, trrl_ba >> 4);
+ hr_reg_clear(qpc_mask, QPC_TRRL_BA_L);
+ context->trrl_ba = cpu_to_le32(trrl_ba >> (16 + 4));
+ qpc_mask->trrl_ba = 0;
+ hr_reg_write(context, QPC_TRRL_BA_H, trrl_ba >> (32 + 16 + 4));
+ hr_reg_clear(qpc_mask, QPC_TRRL_BA_H);
+
+ context->irrl_ba = cpu_to_le32(irrl_ba >> 6);
+ qpc_mask->irrl_ba = 0;
+ hr_reg_write(context, QPC_IRRL_BA_H, irrl_ba >> (32 + 6));
+ hr_reg_clear(qpc_mask, QPC_IRRL_BA_H);
+
+ hr_reg_enable(context, QPC_RMT_E2E);
+ hr_reg_clear(qpc_mask, QPC_RMT_E2E);
+
+ hr_reg_write(context, QPC_SIG_TYPE, hr_qp->sq_signal_bits);
+ hr_reg_clear(qpc_mask, QPC_SIG_TYPE);
+
+ port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port;
+
+ smac = (const u8 *)hr_dev->dev_addr[port];
+ dmac = (u8 *)attr->ah_attr.roce.dmac;
+ /* when dmac equals smac or loop_idc is 1, it should loopback */
+ if (ether_addr_equal_unaligned(dmac, smac) ||
+ hr_dev->loop_idc == 0x1) {
+ hr_reg_write(context, QPC_LBI, hr_dev->loop_idc);
+ hr_reg_clear(qpc_mask, QPC_LBI);
+ }
+
+ if (attr_mask & IB_QP_DEST_QPN) {
+ hr_reg_write(context, QPC_DQPN, attr->dest_qp_num);
+ hr_reg_clear(qpc_mask, QPC_DQPN);
+ }
+
+ memcpy(&context->dmac, dmac, sizeof(u32));
+ hr_reg_write(context, QPC_DMAC_H, *((u16 *)(&dmac[4])));
+ qpc_mask->dmac = 0;
+ hr_reg_clear(qpc_mask, QPC_DMAC_H);
+
+ ib_mtu = get_mtu(ibqp, attr);
+ hr_qp->path_mtu = ib_mtu;
+
+ mtu = ib_mtu_enum_to_int(ib_mtu);
+ if (WARN_ON(mtu <= 0))
+ return -EINVAL;
+#define MIN_LP_MSG_LEN 1024
+ /* mtu * (2 ^ lp_pktn_ini) should be in the range of 1024 to mtu */
+ lp_pktn_ini = ilog2(max(mtu, MIN_LP_MSG_LEN) / mtu);
+
+ if (attr_mask & IB_QP_PATH_MTU) {
+ hr_reg_write(context, QPC_MTU, ib_mtu);
+ hr_reg_clear(qpc_mask, QPC_MTU);
+ }
+
+ hr_reg_write(context, QPC_LP_PKTN_INI, lp_pktn_ini);
+ hr_reg_clear(qpc_mask, QPC_LP_PKTN_INI);
+
+ /* ACK_REQ_FREQ should be larger than or equal to LP_PKTN_INI */
+ hr_reg_write(context, QPC_ACK_REQ_FREQ, lp_pktn_ini);
+ hr_reg_clear(qpc_mask, QPC_ACK_REQ_FREQ);
+
+ hr_reg_clear(qpc_mask, QPC_RX_REQ_PSN_ERR);
+ hr_reg_clear(qpc_mask, QPC_RX_REQ_MSN);
+ hr_reg_clear(qpc_mask, QPC_RX_REQ_LAST_OPTYPE);
+
+ context->rq_rnr_timer = 0;
+ qpc_mask->rq_rnr_timer = 0;
+
+ hr_reg_clear(qpc_mask, QPC_TRRL_HEAD_MAX);
+ hr_reg_clear(qpc_mask, QPC_TRRL_TAIL_MAX);
+
+ /* rocee send 2^lp_sgen_ini segs every time */
+ hr_reg_write(context, QPC_LP_SGEN_INI, 3);
+ hr_reg_clear(qpc_mask, QPC_LP_SGEN_INI);
+
+ return 0;
+}
+
+static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr, int attr_mask,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
+
+ /* Not support alternate path and path migration */
+ if (attr_mask & (IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE)) {
+ ibdev_err(ibdev, "RTR2RTS attr_mask (0x%x)error\n", attr_mask);
+ return -EINVAL;
+ }
+
+ ret = config_qp_sq_buf(hr_dev, hr_qp, context, qpc_mask);
+ if (ret) {
+ ibdev_err(ibdev, "failed to config sq buf, ret = %d.\n", ret);
+ return ret;
+ }
+
+ /*
+ * Set some fields in context to zero, Because the default values
+ * of all fields in context are zero, we need not set them to 0 again.
+ * but we should set the relevant fields of context mask to 0.
+ */
+ hr_reg_clear(qpc_mask, QPC_IRRL_SGE_IDX);
+
+ hr_reg_clear(qpc_mask, QPC_RX_ACK_MSN);
+
+ hr_reg_clear(qpc_mask, QPC_ACK_LAST_OPTYPE);
+ hr_reg_clear(qpc_mask, QPC_IRRL_PSN_VLD);
+ hr_reg_clear(qpc_mask, QPC_IRRL_PSN);
+
+ hr_reg_clear(qpc_mask, QPC_IRRL_TAIL_REAL);
+
+ hr_reg_clear(qpc_mask, QPC_RETRY_MSG_MSN);
+
+ hr_reg_clear(qpc_mask, QPC_RNR_RETRY_FLAG);
+
+ hr_reg_clear(qpc_mask, QPC_CHECK_FLG);
+
+ hr_reg_clear(qpc_mask, QPC_V2_IRRL_HEAD);
+
+ return 0;
+}
+
+static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
+ u32 *dip_idx)
+{
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ u32 *spare_idx = hr_dev->qp_table.idx_table.spare_idx;
+ u32 *head = &hr_dev->qp_table.idx_table.head;
+ u32 *tail = &hr_dev->qp_table.idx_table.tail;
+ struct hns_roce_dip *hr_dip;
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&hr_dev->dip_list_lock, flags);
+
+ spare_idx[*tail] = ibqp->qp_num;
+ *tail = (*tail == hr_dev->caps.num_qps - 1) ? 0 : (*tail + 1);
+
+ list_for_each_entry(hr_dip, &hr_dev->dip_list, node) {
+ if (!memcmp(grh->dgid.raw, hr_dip->dgid, 16)) {
+ *dip_idx = hr_dip->dip_idx;
+ goto out;
+ }
+ }
+
+ /* If no dgid is found, a new dip and a mapping between dgid and
+ * dip_idx will be created.
+ */
+ hr_dip = kzalloc(sizeof(*hr_dip), GFP_ATOMIC);
+ if (!hr_dip) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
+ hr_dip->dip_idx = *dip_idx = spare_idx[*head];
+ *head = (*head == hr_dev->caps.num_qps - 1) ? 0 : (*head + 1);
+ list_add_tail(&hr_dip->node, &hr_dev->dip_list);
+
+out:
+ spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
+ return ret;
+}
+
+enum {
+ CONG_DCQCN,
+ CONG_WINDOW,
+};
+
+enum {
+ UNSUPPORT_CONG_LEVEL,
+ SUPPORT_CONG_LEVEL,
+};
+
+enum {
+ CONG_LDCP,
+ CONG_HC3,
+};
+
+enum {
+ DIP_INVALID,
+ DIP_VALID,
+};
+
+enum {
+ WND_LIMIT,
+ WND_UNLIMIT,
+};
+
+static int check_cong_type(struct ib_qp *ibqp,
+ struct hns_roce_congestion_algorithm *cong_alg)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+
+ if (ibqp->qp_type == IB_QPT_UD)
+ hr_dev->caps.cong_type = CONG_TYPE_DCQCN;
+
+ /* different congestion types match different configurations */
+ switch (hr_dev->caps.cong_type) {
+ case CONG_TYPE_DCQCN:
+ cong_alg->alg_sel = CONG_DCQCN;
+ cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
+ cong_alg->dip_vld = DIP_INVALID;
+ cong_alg->wnd_mode_sel = WND_LIMIT;
+ break;
+ case CONG_TYPE_LDCP:
+ cong_alg->alg_sel = CONG_WINDOW;
+ cong_alg->alg_sub_sel = CONG_LDCP;
+ cong_alg->dip_vld = DIP_INVALID;
+ cong_alg->wnd_mode_sel = WND_UNLIMIT;
+ break;
+ case CONG_TYPE_HC3:
+ cong_alg->alg_sel = CONG_WINDOW;
+ cong_alg->alg_sub_sel = CONG_HC3;
+ cong_alg->dip_vld = DIP_INVALID;
+ cong_alg->wnd_mode_sel = WND_LIMIT;
+ break;
+ case CONG_TYPE_DIP:
+ cong_alg->alg_sel = CONG_DCQCN;
+ cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
+ cong_alg->dip_vld = DIP_VALID;
+ cong_alg->wnd_mode_sel = WND_LIMIT;
+ break;
+ default:
+ ibdev_warn(&hr_dev->ib_dev,
+ "invalid type(%u) for congestion selection.\n",
+ hr_dev->caps.cong_type);
+ hr_dev->caps.cong_type = CONG_TYPE_DCQCN;
+ cong_alg->alg_sel = CONG_DCQCN;
+ cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
+ cong_alg->dip_vld = DIP_INVALID;
+ cong_alg->wnd_mode_sel = WND_LIMIT;
+ break;
+ }
+
+ return 0;
+}
+
+static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
+ struct hns_roce_congestion_algorithm cong_field;
+ struct ib_device *ibdev = ibqp->device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
+ u32 dip_idx = 0;
+ int ret;
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ||
+ grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE)
+ return 0;
+
+ ret = check_cong_type(ibqp, &cong_field);
+ if (ret)
+ return ret;
+
+ hr_reg_write(context, QPC_CONG_ALGO_TMPL_ID, hr_dev->cong_algo_tmpl_id +
+ hr_dev->caps.cong_type * HNS_ROCE_CONG_SIZE);
+ hr_reg_clear(qpc_mask, QPC_CONG_ALGO_TMPL_ID);
+ hr_reg_write(&context->ext, QPCEX_CONG_ALG_SEL, cong_field.alg_sel);
+ hr_reg_clear(&qpc_mask->ext, QPCEX_CONG_ALG_SEL);
+ hr_reg_write(&context->ext, QPCEX_CONG_ALG_SUB_SEL,
+ cong_field.alg_sub_sel);
+ hr_reg_clear(&qpc_mask->ext, QPCEX_CONG_ALG_SUB_SEL);
+ hr_reg_write(&context->ext, QPCEX_DIP_CTX_IDX_VLD, cong_field.dip_vld);
+ hr_reg_clear(&qpc_mask->ext, QPCEX_DIP_CTX_IDX_VLD);
+ hr_reg_write(&context->ext, QPCEX_SQ_RQ_NOT_FORBID_EN,
+ cong_field.wnd_mode_sel);
+ hr_reg_clear(&qpc_mask->ext, QPCEX_SQ_RQ_NOT_FORBID_EN);
+
+ /* if dip is disabled, there is no need to set dip idx */
+ if (cong_field.dip_vld == 0)
+ return 0;
+
+ ret = get_dip_ctx_idx(ibqp, attr, &dip_idx);
+ if (ret) {
+ ibdev_err(ibdev, "failed to fill cong field, ret = %d.\n", ret);
+ return ret;
+ }
+
+ hr_reg_write(&context->ext, QPCEX_DIP_CTX_IDX, dip_idx);
+ hr_reg_write(&qpc_mask->ext, QPCEX_DIP_CTX_IDX, 0);
+
+ return 0;
+}
+
+static int hns_roce_v2_set_path(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ const struct ib_gid_attr *gid_attr = NULL;
+ u8 sl = rdma_ah_get_sl(&attr->ah_attr);
+ int is_roce_protocol;
+ u16 vlan_id = 0xffff;
+ bool is_udp = false;
+ u32 max_sl;
+ u8 ib_port;
+ u8 hr_port;
+ int ret;
+
+ max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1);
+ if (unlikely(sl > max_sl)) {
+ ibdev_err_ratelimited(ibdev,
+ "failed to fill QPC, sl (%u) shouldn't be larger than %u.\n",
+ sl, max_sl);
+ return -EINVAL;
+ }
+
+ /*
+ * If free_mr_en of qp is set, it means that this qp comes from
+ * free mr. This qp will perform the loopback operation.
+ * In the loopback scenario, only sl needs to be set.
+ */
+ if (hr_qp->free_mr_en) {
+ hr_reg_write(context, QPC_SL, sl);
+ hr_reg_clear(qpc_mask, QPC_SL);
+ hr_qp->sl = sl;
+ return 0;
+ }
+
+ ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num : hr_qp->port + 1;
+ hr_port = ib_port - 1;
+ is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) &&
+ rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
+
+ if (is_roce_protocol) {
+ gid_attr = attr->ah_attr.grh.sgid_attr;
+ ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL);
+ if (ret)
+ return ret;
+
+ is_udp = (gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP);
+ }
+
+ /* Only HIP08 needs to set the vlan_en bits in QPC */
+ if (vlan_id < VLAN_N_VID &&
+ hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ hr_reg_enable(context, QPC_RQ_VLAN_EN);
+ hr_reg_clear(qpc_mask, QPC_RQ_VLAN_EN);
+ hr_reg_enable(context, QPC_SQ_VLAN_EN);
+ hr_reg_clear(qpc_mask, QPC_SQ_VLAN_EN);
+ }
+
+ hr_reg_write(context, QPC_VLAN_ID, vlan_id);
+ hr_reg_clear(qpc_mask, QPC_VLAN_ID);
+
+ if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) {
+ ibdev_err(ibdev, "sgid_index(%u) too large. max is %d\n",
+ grh->sgid_index, hr_dev->caps.gid_table_len[hr_port]);
+ return -EINVAL;
+ }
+
+ if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) {
+ ibdev_err(ibdev, "ah attr is not RDMA roce type\n");
+ return -EINVAL;
+ }
+
+ hr_reg_write(context, QPC_UDPSPN,
+ is_udp ? rdma_get_udp_sport(grh->flow_label, ibqp->qp_num,
+ attr->dest_qp_num) :
+ 0);
+
+ hr_reg_clear(qpc_mask, QPC_UDPSPN);
+
+ hr_reg_write(context, QPC_GMV_IDX, grh->sgid_index);
+
+ hr_reg_clear(qpc_mask, QPC_GMV_IDX);
+
+ hr_reg_write(context, QPC_HOPLIMIT, grh->hop_limit);
+ hr_reg_clear(qpc_mask, QPC_HOPLIMIT);
+
+ ret = fill_cong_field(ibqp, attr, context, qpc_mask);
+ if (ret)
+ return ret;
+
+ hr_reg_write(context, QPC_TC, get_tclass(&attr->ah_attr.grh));
+ hr_reg_clear(qpc_mask, QPC_TC);
+
+ hr_reg_write(context, QPC_FL, grh->flow_label);
+ hr_reg_clear(qpc_mask, QPC_FL);
+ memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
+ memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
+
+ hr_qp->sl = sl;
+ hr_reg_write(context, QPC_SL, hr_qp->sl);
+ hr_reg_clear(qpc_mask, QPC_SL);
+
+ return 0;
+}
+
+static bool check_qp_state(enum ib_qp_state cur_state,
+ enum ib_qp_state new_state)
+{
+ static const bool sm[][IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = { [IB_QPS_RESET] = true,
+ [IB_QPS_INIT] = true },
+ [IB_QPS_INIT] = { [IB_QPS_RESET] = true,
+ [IB_QPS_INIT] = true,
+ [IB_QPS_RTR] = true,
+ [IB_QPS_ERR] = true },
+ [IB_QPS_RTR] = { [IB_QPS_RESET] = true,
+ [IB_QPS_RTS] = true,
+ [IB_QPS_ERR] = true },
+ [IB_QPS_RTS] = { [IB_QPS_RESET] = true,
+ [IB_QPS_RTS] = true,
+ [IB_QPS_ERR] = true },
+ [IB_QPS_SQD] = {},
+ [IB_QPS_SQE] = {},
+ [IB_QPS_ERR] = { [IB_QPS_RESET] = true,
+ [IB_QPS_ERR] = true }
+ };
+
+ return sm[cur_state][new_state];
+}
+
+static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask,
+ enum ib_qp_state cur_state,
+ enum ib_qp_state new_state,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ int ret = 0;
+
+ if (!check_qp_state(cur_state, new_state)) {
+ ibdev_err(&hr_dev->ib_dev, "Illegal state for QP!\n");
+ return -EINVAL;
+ }
+
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ memset(qpc_mask, 0, hr_dev->caps.qpc_sz);
+ modify_qp_reset_to_init(ibqp, attr, context, qpc_mask);
+ } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
+ modify_qp_init_to_init(ibqp, attr, context, qpc_mask);
+ } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
+ ret = modify_qp_init_to_rtr(ibqp, attr, attr_mask, context,
+ qpc_mask);
+ } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
+ ret = modify_qp_rtr_to_rts(ibqp, attr, attr_mask, context,
+ qpc_mask);
+ }
+
+ return ret;
+}
+
+static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout)
+{
+#define QP_ACK_TIMEOUT_MAX_HIP08 20
+#define QP_ACK_TIMEOUT_MAX 31
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ if (*timeout > QP_ACK_TIMEOUT_MAX_HIP08) {
+ ibdev_warn(&hr_dev->ib_dev,
+ "local ACK timeout shall be 0 to 20.\n");
+ return false;
+ }
+ *timeout += HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08;
+ } else if (hr_dev->pci_dev->revision > PCI_REVISION_ID_HIP08) {
+ if (*timeout > QP_ACK_TIMEOUT_MAX) {
+ ibdev_warn(&hr_dev->ib_dev,
+ "local ACK timeout shall be 0 to 31.\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ int ret = 0;
+ u8 timeout;
+
+ if (attr_mask & IB_QP_AV) {
+ ret = hns_roce_v2_set_path(ibqp, attr, attr_mask, context,
+ qpc_mask);
+ if (ret)
+ return ret;
+ }
+
+ if (attr_mask & IB_QP_TIMEOUT) {
+ timeout = attr->timeout;
+ if (check_qp_timeout_cfg_range(hr_dev, &timeout)) {
+ hr_reg_write(context, QPC_AT, timeout);
+ hr_reg_clear(qpc_mask, QPC_AT);
+ }
+ }
+
+ if (attr_mask & IB_QP_RETRY_CNT) {
+ hr_reg_write(context, QPC_RETRY_NUM_INIT, attr->retry_cnt);
+ hr_reg_clear(qpc_mask, QPC_RETRY_NUM_INIT);
+
+ hr_reg_write(context, QPC_RETRY_CNT, attr->retry_cnt);
+ hr_reg_clear(qpc_mask, QPC_RETRY_CNT);
+ }
+
+ if (attr_mask & IB_QP_RNR_RETRY) {
+ hr_reg_write(context, QPC_RNR_NUM_INIT, attr->rnr_retry);
+ hr_reg_clear(qpc_mask, QPC_RNR_NUM_INIT);
+
+ hr_reg_write(context, QPC_RNR_CNT, attr->rnr_retry);
+ hr_reg_clear(qpc_mask, QPC_RNR_CNT);
+ }
+
+ if (attr_mask & IB_QP_SQ_PSN) {
+ hr_reg_write(context, QPC_SQ_CUR_PSN, attr->sq_psn);
+ hr_reg_clear(qpc_mask, QPC_SQ_CUR_PSN);
+
+ hr_reg_write(context, QPC_SQ_MAX_PSN, attr->sq_psn);
+ hr_reg_clear(qpc_mask, QPC_SQ_MAX_PSN);
+
+ hr_reg_write(context, QPC_RETRY_MSG_PSN_L, attr->sq_psn);
+ hr_reg_clear(qpc_mask, QPC_RETRY_MSG_PSN_L);
+
+ hr_reg_write(context, QPC_RETRY_MSG_PSN_H,
+ attr->sq_psn >> RETRY_MSG_PSN_SHIFT);
+ hr_reg_clear(qpc_mask, QPC_RETRY_MSG_PSN_H);
+
+ hr_reg_write(context, QPC_RETRY_MSG_FPKT_PSN, attr->sq_psn);
+ hr_reg_clear(qpc_mask, QPC_RETRY_MSG_FPKT_PSN);
+
+ hr_reg_write(context, QPC_RX_ACK_EPSN, attr->sq_psn);
+ hr_reg_clear(qpc_mask, QPC_RX_ACK_EPSN);
+ }
+
+ if ((attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) &&
+ attr->max_dest_rd_atomic) {
+ hr_reg_write(context, QPC_RR_MAX,
+ fls(attr->max_dest_rd_atomic - 1));
+ hr_reg_clear(qpc_mask, QPC_RR_MAX);
+ }
+
+ if ((attr_mask & IB_QP_MAX_QP_RD_ATOMIC) && attr->max_rd_atomic) {
+ hr_reg_write(context, QPC_SR_MAX, fls(attr->max_rd_atomic - 1));
+ hr_reg_clear(qpc_mask, QPC_SR_MAX);
+ }
+
+ if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
+ set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER) {
+ hr_reg_write(context, QPC_MIN_RNR_TIME,
+ hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ?
+ HNS_ROCE_RNR_TIMER_10NS : attr->min_rnr_timer);
+ hr_reg_clear(qpc_mask, QPC_MIN_RNR_TIME);
+ }
+
+ if (attr_mask & IB_QP_RQ_PSN) {
+ hr_reg_write(context, QPC_RX_REQ_EPSN, attr->rq_psn);
+ hr_reg_clear(qpc_mask, QPC_RX_REQ_EPSN);
+
+ hr_reg_write(context, QPC_RAQ_PSN, attr->rq_psn - 1);
+ hr_reg_clear(qpc_mask, QPC_RAQ_PSN);
+ }
+
+ if (attr_mask & IB_QP_QKEY) {
+ context->qkey_xrcd = cpu_to_le32(attr->qkey);
+ qpc_mask->qkey_xrcd = 0;
+ hr_qp->qkey = attr->qkey;
+ }
+
+ return ret;
+}
+
+static void hns_roce_v2_record_opt_fields(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ hr_qp->atomic_rd_en = attr->qp_access_flags;
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ hr_qp->resp_depth = attr->max_dest_rd_atomic;
+ if (attr_mask & IB_QP_PORT) {
+ hr_qp->port = attr->port_num - 1;
+ hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
+ }
+}
+
+static void clear_qp(struct hns_roce_qp *hr_qp)
+{
+ struct ib_qp *ibqp = &hr_qp->ibqp;
+
+ if (ibqp->send_cq)
+ hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq),
+ hr_qp->qpn, NULL);
+
+ if (ibqp->recv_cq && ibqp->recv_cq != ibqp->send_cq)
+ hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq),
+ hr_qp->qpn, ibqp->srq ?
+ to_hr_srq(ibqp->srq) : NULL);
+
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
+ *hr_qp->rdb.db_record = 0;
+
+ hr_qp->rq.head = 0;
+ hr_qp->rq.tail = 0;
+ hr_qp->sq.head = 0;
+ hr_qp->sq.tail = 0;
+ hr_qp->next_sge = 0;
+}
+
+static void v2_set_flushed_fields(struct ib_qp *ibqp,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ unsigned long sq_flag = 0;
+ unsigned long rq_flag = 0;
+
+ if (ibqp->qp_type == IB_QPT_XRC_TGT)
+ return;
+
+ spin_lock_irqsave(&hr_qp->sq.lock, sq_flag);
+ hr_reg_write(context, QPC_SQ_PRODUCER_IDX, hr_qp->sq.head);
+ hr_reg_clear(qpc_mask, QPC_SQ_PRODUCER_IDX);
+ hr_qp->state = IB_QPS_ERR;
+ spin_unlock_irqrestore(&hr_qp->sq.lock, sq_flag);
+
+ if (ibqp->srq || ibqp->qp_type == IB_QPT_XRC_INI) /* no RQ */
+ return;
+
+ spin_lock_irqsave(&hr_qp->rq.lock, rq_flag);
+ hr_reg_write(context, QPC_RQ_PRODUCER_IDX, hr_qp->rq.head);
+ hr_reg_clear(qpc_mask, QPC_RQ_PRODUCER_IDX);
+ spin_unlock_irqrestore(&hr_qp->rq.lock, rq_flag);
+}
+
+static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask, enum ib_qp_state cur_state,
+ enum ib_qp_state new_state)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct hns_roce_v2_qp_context ctx[2];
+ struct hns_roce_v2_qp_context *context = ctx;
+ struct hns_roce_v2_qp_context *qpc_mask = ctx + 1;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
+
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
+ /*
+ * In v2 engine, software pass context and context mask to hardware
+ * when modifying qp. If software need modify some fields in context,
+ * we should set all bits of the relevant fields in context mask to
+ * 0 at the same time, else set them to 0x1.
+ */
+ memset(context, 0, hr_dev->caps.qpc_sz);
+ memset(qpc_mask, 0xff, hr_dev->caps.qpc_sz);
+
+ ret = hns_roce_v2_set_abs_fields(ibqp, attr, attr_mask, cur_state,
+ new_state, context, qpc_mask);
+ if (ret)
+ goto out;
+
+ /* When QP state is err, SQ and RQ WQE should be flushed */
+ if (new_state == IB_QPS_ERR)
+ v2_set_flushed_fields(ibqp, context, qpc_mask);
+
+ /* Configure the optional fields */
+ ret = hns_roce_v2_set_opt_fields(ibqp, attr, attr_mask, context,
+ qpc_mask);
+ if (ret)
+ goto out;
+
+ hr_reg_write_bool(context, QPC_INV_CREDIT,
+ to_hr_qp_type(hr_qp->ibqp.qp_type) == SERV_TYPE_XRC ||
+ ibqp->srq);
+ hr_reg_clear(qpc_mask, QPC_INV_CREDIT);
+
+ /* Every status migrate must change state */
+ hr_reg_write(context, QPC_QP_ST, new_state);
+ hr_reg_clear(qpc_mask, QPC_QP_ST);
+
+ /* SW pass context to HW */
+ ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp);
+ if (ret) {
+ ibdev_err(ibdev, "failed to modify QP, ret = %d.\n", ret);
+ goto out;
+ }
+
+ hr_qp->state = new_state;
+
+ hns_roce_v2_record_opt_fields(ibqp, attr, attr_mask);
+
+ if (new_state == IB_QPS_RESET && !ibqp->uobject)
+ clear_qp(hr_qp);
+
+out:
+ return ret;
+}
+
+static int to_ib_qp_st(enum hns_roce_v2_qp_state state)
+{
+ static const enum ib_qp_state map[] = {
+ [HNS_ROCE_QP_ST_RST] = IB_QPS_RESET,
+ [HNS_ROCE_QP_ST_INIT] = IB_QPS_INIT,
+ [HNS_ROCE_QP_ST_RTR] = IB_QPS_RTR,
+ [HNS_ROCE_QP_ST_RTS] = IB_QPS_RTS,
+ [HNS_ROCE_QP_ST_SQD] = IB_QPS_SQD,
+ [HNS_ROCE_QP_ST_SQER] = IB_QPS_SQE,
+ [HNS_ROCE_QP_ST_ERR] = IB_QPS_ERR,
+ [HNS_ROCE_QP_ST_SQ_DRAINING] = IB_QPS_SQD
+ };
+
+ return (state < ARRAY_SIZE(map)) ? map[state] : -1;
+}
+
+static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev, u32 qpn,
+ void *buffer)
+{
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_QPC,
+ qpn);
+ if (ret)
+ goto out;
+
+ memcpy(buffer, mailbox->buf, hr_dev->caps.qpc_sz);
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static u8 get_qp_timeout_attr(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_qp_context *context)
+{
+ u8 timeout;
+
+ timeout = (u8)hr_reg_read(context, QPC_AT);
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ timeout -= HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08;
+
+ return timeout;
+}
+
+static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct hns_roce_v2_qp_context context = {};
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int tmp_qp_state;
+ int state;
+ int ret;
+
+ memset(qp_attr, 0, sizeof(*qp_attr));
+ memset(qp_init_attr, 0, sizeof(*qp_init_attr));
+
+ mutex_lock(&hr_qp->mutex);
+
+ if (hr_qp->state == IB_QPS_RESET) {
+ qp_attr->qp_state = IB_QPS_RESET;
+ ret = 0;
+ goto done;
+ }
+
+ ret = hns_roce_v2_query_qpc(hr_dev, hr_qp->qpn, &context);
+ if (ret) {
+ ibdev_err(ibdev, "failed to query QPC, ret = %d.\n", ret);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ state = hr_reg_read(&context, QPC_QP_ST);
+ tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state);
+ if (tmp_qp_state == -1) {
+ ibdev_err(ibdev, "Illegal ib_qp_state\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ hr_qp->state = (u8)tmp_qp_state;
+ qp_attr->qp_state = (enum ib_qp_state)hr_qp->state;
+ qp_attr->path_mtu = (enum ib_mtu)hr_reg_read(&context, QPC_MTU);
+ qp_attr->path_mig_state = IB_MIG_ARMED;
+ qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+ if (hr_qp->ibqp.qp_type == IB_QPT_UD)
+ qp_attr->qkey = le32_to_cpu(context.qkey_xrcd);
+
+ qp_attr->rq_psn = hr_reg_read(&context, QPC_RX_REQ_EPSN);
+ qp_attr->sq_psn = (u32)hr_reg_read(&context, QPC_SQ_CUR_PSN);
+ qp_attr->dest_qp_num = hr_reg_read(&context, QPC_DQPN);
+ qp_attr->qp_access_flags =
+ ((hr_reg_read(&context, QPC_RRE)) << V2_QP_RRE_S) |
+ ((hr_reg_read(&context, QPC_RWE)) << V2_QP_RWE_S) |
+ ((hr_reg_read(&context, QPC_ATE)) << V2_QP_ATE_S);
+
+ if (hr_qp->ibqp.qp_type == IB_QPT_RC ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT) {
+ struct ib_global_route *grh =
+ rdma_ah_retrieve_grh(&qp_attr->ah_attr);
+
+ rdma_ah_set_sl(&qp_attr->ah_attr,
+ hr_reg_read(&context, QPC_SL));
+ rdma_ah_set_port_num(&qp_attr->ah_attr, hr_qp->port + 1);
+ rdma_ah_set_ah_flags(&qp_attr->ah_attr, IB_AH_GRH);
+ grh->flow_label = hr_reg_read(&context, QPC_FL);
+ grh->sgid_index = hr_reg_read(&context, QPC_GMV_IDX);
+ grh->hop_limit = hr_reg_read(&context, QPC_HOPLIMIT);
+ grh->traffic_class = hr_reg_read(&context, QPC_TC);
+
+ memcpy(grh->dgid.raw, context.dgid, sizeof(grh->dgid.raw));
+ }
+
+ qp_attr->port_num = hr_qp->port + 1;
+ qp_attr->sq_draining = 0;
+ qp_attr->max_rd_atomic = 1 << hr_reg_read(&context, QPC_SR_MAX);
+ qp_attr->max_dest_rd_atomic = 1 << hr_reg_read(&context, QPC_RR_MAX);
+
+ qp_attr->min_rnr_timer = (u8)hr_reg_read(&context, QPC_MIN_RNR_TIME);
+ qp_attr->timeout = get_qp_timeout_attr(hr_dev, &context);
+ qp_attr->retry_cnt = hr_reg_read(&context, QPC_RETRY_NUM_INIT);
+ qp_attr->rnr_retry = hr_reg_read(&context, QPC_RNR_NUM_INIT);
+
+done:
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
+ qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
+ qp_attr->cap.max_inline_data = hr_qp->max_inline_data;
+
+ qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
+ qp_attr->cap.max_send_sge = hr_qp->sq.max_gs;
+
+ qp_init_attr->qp_context = ibqp->qp_context;
+ qp_init_attr->qp_type = ibqp->qp_type;
+ qp_init_attr->recv_cq = ibqp->recv_cq;
+ qp_init_attr->send_cq = ibqp->send_cq;
+ qp_init_attr->srq = ibqp->srq;
+ qp_init_attr->cap = qp_attr->cap;
+ qp_init_attr->sq_sig_type = hr_qp->sq_signal_bits;
+
+out:
+ mutex_unlock(&hr_qp->mutex);
+ return ret;
+}
+
+static inline int modify_qp_is_ok(struct hns_roce_qp *hr_qp)
+{
+ return ((hr_qp->ibqp.qp_type == IB_QPT_RC ||
+ hr_qp->ibqp.qp_type == IB_QPT_UD ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT) &&
+ hr_qp->state != IB_QPS_RESET);
+}
+
+static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_udata *udata)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_cq *send_cq, *recv_cq;
+ unsigned long flags;
+ int ret = 0;
+
+ if (modify_qp_is_ok(hr_qp)) {
+ /* Modify qp to reset before destroying qp */
+ ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0,
+ hr_qp->state, IB_QPS_RESET);
+ if (ret)
+ ibdev_err(ibdev,
+ "failed to modify QP to RST, ret = %d.\n",
+ ret);
+ }
+
+ send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL;
+ recv_cq = hr_qp->ibqp.recv_cq ? to_hr_cq(hr_qp->ibqp.recv_cq) : NULL;
+
+ spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
+ hns_roce_lock_cqs(send_cq, recv_cq);
+
+ if (!udata) {
+ if (recv_cq)
+ __hns_roce_v2_cq_clean(recv_cq, hr_qp->qpn,
+ (hr_qp->ibqp.srq ?
+ to_hr_srq(hr_qp->ibqp.srq) :
+ NULL));
+
+ if (send_cq && send_cq != recv_cq)
+ __hns_roce_v2_cq_clean(send_cq, hr_qp->qpn, NULL);
+ }
+
+ hns_roce_qp_remove(hr_dev, hr_qp);
+
+ hns_roce_unlock_cqs(send_cq, recv_cq);
+ spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
+
+ return ret;
+}
+
+int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ int ret;
+
+ ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata);
+ if (ret)
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n",
+ hr_qp->qpn, ret);
+
+ hns_roce_qp_destroy(hr_dev, hr_qp, udata);
+
+ return 0;
+}
+
+static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_sccc_clr_done *resp;
+ struct hns_roce_sccc_clr *clr;
+ struct hns_roce_cmq_desc desc;
+ int ret, i;
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return 0;
+
+ mutex_lock(&hr_dev->qp_table.scc_mutex);
+
+ /* set scc ctx clear done flag */
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCCC, false);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d.\n", ret);
+ goto out;
+ }
+
+ /* clear scc context */
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CLR_SCCC, false);
+ clr = (struct hns_roce_sccc_clr *)desc.data;
+ clr->qpn = cpu_to_le32(hr_qp->qpn);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d.\n", ret);
+ goto out;
+ }
+
+ /* query scc context clear is done or not */
+ resp = (struct hns_roce_sccc_clr_done *)desc.data;
+ for (i = 0; i <= HNS_ROCE_CMQ_SCC_CLR_DONE_CNT; i++) {
+ hns_roce_cmq_setup_basic_desc(&desc,
+ HNS_ROCE_OPC_QUERY_SCCC, true);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ ibdev_err(ibdev, "failed to query clr cmq, ret = %d\n",
+ ret);
+ goto out;
+ }
+
+ if (resp->clr_done)
+ goto out;
+
+ msleep(20);
+ }
+
+ ibdev_err(ibdev, "query SCC clr done flag overtime.\n");
+ ret = -ETIMEDOUT;
+
+out:
+ mutex_unlock(&hr_dev->qp_table.scc_mutex);
+ return ret;
+}
+
+#define DMA_IDX_SHIFT 3
+#define DMA_WQE_SHIFT 3
+
+static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq,
+ struct hns_roce_srq_context *ctx)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ struct ib_device *ibdev = srq->ibsrq.device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
+ u64 mtts_idx[MTT_MIN_COUNT] = {};
+ dma_addr_t dma_handle_idx = 0;
+ int ret;
+
+ /* Get physical address of idx que buf */
+ ret = hns_roce_mtr_find(hr_dev, &idx_que->mtr, 0, mtts_idx,
+ ARRAY_SIZE(mtts_idx), &dma_handle_idx);
+ if (ret < 1) {
+ ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n",
+ ret);
+ return -ENOBUFS;
+ }
+
+ hr_reg_write(ctx, SRQC_IDX_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, srq->wqe_cnt));
+
+ hr_reg_write(ctx, SRQC_IDX_BT_BA_L, dma_handle_idx >> DMA_IDX_SHIFT);
+ hr_reg_write(ctx, SRQC_IDX_BT_BA_H,
+ upper_32_bits(dma_handle_idx >> DMA_IDX_SHIFT));
+
+ hr_reg_write(ctx, SRQC_IDX_BA_PG_SZ,
+ to_hr_hw_page_shift(idx_que->mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(ctx, SRQC_IDX_BUF_PG_SZ,
+ to_hr_hw_page_shift(idx_que->mtr.hem_cfg.buf_pg_shift));
+
+ hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_L,
+ to_hr_hw_page_addr(mtts_idx[0]));
+ hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts_idx[0])));
+
+ hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_L,
+ to_hr_hw_page_addr(mtts_idx[1]));
+ hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts_idx[1])));
+
+ return 0;
+}
+
+static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf)
+{
+ struct ib_device *ibdev = srq->ibsrq.device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
+ struct hns_roce_srq_context *ctx = mb_buf;
+ u64 mtts_wqe[MTT_MIN_COUNT] = {};
+ dma_addr_t dma_handle_wqe = 0;
+ int ret;
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ /* Get the physical address of srq buf */
+ ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe,
+ ARRAY_SIZE(mtts_wqe), &dma_handle_wqe);
+ if (ret < 1) {
+ ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n",
+ ret);
+ return -ENOBUFS;
+ }
+
+ hr_reg_write(ctx, SRQC_SRQ_ST, 1);
+ hr_reg_write_bool(ctx, SRQC_SRQ_TYPE,
+ srq->ibsrq.srq_type == IB_SRQT_XRC);
+ hr_reg_write(ctx, SRQC_PD, to_hr_pd(srq->ibsrq.pd)->pdn);
+ hr_reg_write(ctx, SRQC_SRQN, srq->srqn);
+ hr_reg_write(ctx, SRQC_XRCD, srq->xrcdn);
+ hr_reg_write(ctx, SRQC_XRC_CQN, srq->cqn);
+ hr_reg_write(ctx, SRQC_SHIFT, ilog2(srq->wqe_cnt));
+ hr_reg_write(ctx, SRQC_RQWS,
+ srq->max_gs <= 0 ? 0 : fls(srq->max_gs - 1));
+
+ hr_reg_write(ctx, SRQC_WQE_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num,
+ srq->wqe_cnt));
+
+ hr_reg_write(ctx, SRQC_WQE_BT_BA_L, dma_handle_wqe >> DMA_WQE_SHIFT);
+ hr_reg_write(ctx, SRQC_WQE_BT_BA_H,
+ upper_32_bits(dma_handle_wqe >> DMA_WQE_SHIFT));
+
+ hr_reg_write(ctx, SRQC_WQE_BA_PG_SZ,
+ to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(ctx, SRQC_WQE_BUF_PG_SZ,
+ to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift));
+
+ return hns_roce_v2_write_srqc_index_queue(srq, ctx);
+}
+
+static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
+ struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_srq_context *srq_context;
+ struct hns_roce_srq_context *srqc_mask;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ /* Resizing SRQs is not supported yet */
+ if (srq_attr_mask & IB_SRQ_MAX_WR)
+ return -EOPNOTSUPP;
+
+ if (srq_attr_mask & IB_SRQ_LIMIT) {
+ if (srq_attr->srq_limit > srq->wqe_cnt)
+ return -EINVAL;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ srq_context = mailbox->buf;
+ srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1;
+
+ memset(srqc_mask, 0xff, sizeof(*srqc_mask));
+
+ hr_reg_write(srq_context, SRQC_LIMIT_WL, srq_attr->srq_limit);
+ hr_reg_clear(srqc_mask, SRQC_LIMIT_WL);
+
+ ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0,
+ HNS_ROCE_CMD_MODIFY_SRQC, srq->srqn);
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to handle cmd of modifying SRQ, ret = %d.\n",
+ ret);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_srq_context *srq_context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ srq_context = mailbox->buf;
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma,
+ HNS_ROCE_CMD_QUERY_SRQC, srq->srqn);
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to process cmd of querying SRQ, ret = %d.\n",
+ ret);
+ goto out;
+ }
+
+ attr->srq_limit = hr_reg_read(srq_context, SRQC_LIMIT_WL);
+ attr->max_wr = srq->wqe_cnt;
+ attr->max_sge = srq->max_gs - srq->rsv_sge;
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(cq->device);
+ struct hns_roce_v2_cq_context *cq_context;
+ struct hns_roce_cq *hr_cq = to_hr_cq(cq);
+ struct hns_roce_v2_cq_context *cqc_mask;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ cq_context = mailbox->buf;
+ cqc_mask = (struct hns_roce_v2_cq_context *)mailbox->buf + 1;
+
+ memset(cqc_mask, 0xff, sizeof(*cqc_mask));
+
+ hr_reg_write(cq_context, CQC_CQ_MAX_CNT, cq_count);
+ hr_reg_clear(cqc_mask, CQC_CQ_MAX_CNT);
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ if (cq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) {
+ dev_info(hr_dev->dev,
+ "cq_period(%u) reached the upper limit, adjusted to 65.\n",
+ cq_period);
+ cq_period = HNS_ROCE_MAX_CQ_PERIOD;
+ }
+ cq_period *= HNS_ROCE_CLOCK_ADJUST;
+ }
+ hr_reg_write(cq_context, CQC_CQ_PERIOD, cq_period);
+ hr_reg_clear(cqc_mask, CQC_CQ_PERIOD);
+
+ ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0,
+ HNS_ROCE_CMD_MODIFY_CQC, hr_cq->cqn);
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ if (ret)
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to process cmd when modifying CQ, ret = %d.\n",
+ ret);
+
+ return ret;
+}
+
+static int hns_roce_v2_query_cqc(struct hns_roce_dev *hr_dev, u32 cqn,
+ void *buffer)
+{
+ struct hns_roce_v2_cq_context *context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ context = mailbox->buf;
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma,
+ HNS_ROCE_CMD_QUERY_CQC, cqn);
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to process cmd when querying CQ, ret = %d.\n",
+ ret);
+ goto err_mailbox;
+ }
+
+ memcpy(buffer, context, sizeof(*context));
+
+err_mailbox:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
+static int hns_roce_v2_query_mpt(struct hns_roce_dev *hr_dev, u32 key,
+ void *buffer)
+{
+ struct hns_roce_v2_mpt_entry *context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ context = mailbox->buf;
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_MPT,
+ key_to_hw_index(key));
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to process cmd when querying MPT, ret = %d.\n",
+ ret);
+ goto err_mailbox;
+ }
+
+ memcpy(buffer, context, sizeof(*context));
+
+err_mailbox:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
+static void hns_roce_irq_work_handle(struct work_struct *work)
+{
+ struct hns_roce_work *irq_work =
+ container_of(work, struct hns_roce_work, work);
+ struct ib_device *ibdev = &irq_work->hr_dev->ib_dev;
+
+ switch (irq_work->event_type) {
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG:
+ ibdev_info(ibdev, "path migrated succeeded.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
+ ibdev_warn(ibdev, "path migration failed.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_COMM_EST:
+ break;
+ case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
+ ibdev_dbg(ibdev, "send queue drained.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ ibdev_err(ibdev, "local work queue 0x%x catast error, sub_event type is: %d\n",
+ irq_work->queue_num, irq_work->sub_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+ ibdev_err(ibdev, "invalid request local work queue 0x%x error.\n",
+ irq_work->queue_num);
+ break;
+ case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+ ibdev_err(ibdev, "local access violation work queue 0x%x error, sub_event type is: %d\n",
+ irq_work->queue_num, irq_work->sub_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
+ ibdev_dbg(ibdev, "SRQ limit reach.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
+ ibdev_dbg(ibdev, "SRQ last wqe reach.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ ibdev_err(ibdev, "SRQ catas error.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
+ ibdev_err(ibdev, "CQ 0x%x access err.\n", irq_work->queue_num);
+ break;
+ case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
+ ibdev_warn(ibdev, "CQ 0x%x overflow\n", irq_work->queue_num);
+ break;
+ case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
+ ibdev_warn(ibdev, "DB overflow.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_FLR:
+ ibdev_warn(ibdev, "function level reset.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+ ibdev_err(ibdev, "xrc domain violation error.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
+ ibdev_err(ibdev, "invalid xrceth error.\n");
+ break;
+ default:
+ break;
+ }
+
+ kfree(irq_work);
+}
+
+static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq, u32 queue_num)
+{
+ struct hns_roce_work *irq_work;
+
+ irq_work = kzalloc(sizeof(struct hns_roce_work), GFP_ATOMIC);
+ if (!irq_work)
+ return;
+
+ INIT_WORK(&irq_work->work, hns_roce_irq_work_handle);
+ irq_work->hr_dev = hr_dev;
+ irq_work->event_type = eq->event_type;
+ irq_work->sub_type = eq->sub_type;
+ irq_work->queue_num = queue_num;
+ queue_work(hr_dev->irq_workq, &irq_work->work);
+}
+
+static void update_eq_db(struct hns_roce_eq *eq)
+{
+ struct hns_roce_dev *hr_dev = eq->hr_dev;
+ struct hns_roce_v2_db eq_db = {};
+
+ if (eq->type_flag == HNS_ROCE_AEQ) {
+ hr_reg_write(&eq_db, EQ_DB_CMD,
+ eq->arm_st == HNS_ROCE_V2_EQ_ALWAYS_ARMED ?
+ HNS_ROCE_EQ_DB_CMD_AEQ :
+ HNS_ROCE_EQ_DB_CMD_AEQ_ARMED);
+ } else {
+ hr_reg_write(&eq_db, EQ_DB_TAG, eq->eqn);
+
+ hr_reg_write(&eq_db, EQ_DB_CMD,
+ eq->arm_st == HNS_ROCE_V2_EQ_ALWAYS_ARMED ?
+ HNS_ROCE_EQ_DB_CMD_CEQ :
+ HNS_ROCE_EQ_DB_CMD_CEQ_ARMED);
+ }
+
+ hr_reg_write(&eq_db, EQ_DB_CI, eq->cons_index);
+
+ hns_roce_write64(hr_dev, (__le32 *)&eq_db, eq->db_reg);
+}
+
+static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
+{
+ struct hns_roce_aeqe *aeqe;
+
+ aeqe = hns_roce_buf_offset(eq->mtr.kmem,
+ (eq->cons_index & (eq->entries - 1)) *
+ eq->eqe_size);
+
+ return (hr_reg_read(aeqe, AEQE_OWNER) ^
+ !!(eq->cons_index & eq->entries)) ? aeqe : NULL;
+}
+
+static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
+{
+ struct device *dev = hr_dev->dev;
+ struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq);
+ irqreturn_t aeqe_found = IRQ_NONE;
+ int event_type;
+ u32 queue_num;
+ int sub_type;
+
+ while (aeqe) {
+ /* Make sure we read AEQ entry after we have checked the
+ * ownership bit
+ */
+ dma_rmb();
+
+ event_type = hr_reg_read(aeqe, AEQE_EVENT_TYPE);
+ sub_type = hr_reg_read(aeqe, AEQE_SUB_TYPE);
+ queue_num = hr_reg_read(aeqe, AEQE_EVENT_QUEUE_NUM);
+
+ switch (event_type) {
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG:
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
+ case HNS_ROCE_EVENT_TYPE_COMM_EST:
+ case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
+ case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
+ case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+ case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+ case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
+ hns_roce_qp_event(hr_dev, queue_num, event_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
+ case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ hns_roce_srq_event(hr_dev, queue_num, event_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
+ hns_roce_cq_event(hr_dev, queue_num, event_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_MB:
+ hns_roce_cmd_event(hr_dev,
+ le16_to_cpu(aeqe->event.cmd.token),
+ aeqe->event.cmd.status,
+ le64_to_cpu(aeqe->event.cmd.out_param));
+ break;
+ case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
+ case HNS_ROCE_EVENT_TYPE_FLR:
+ break;
+ default:
+ dev_err(dev, "unhandled event %d on EQ %d at idx %u.\n",
+ event_type, eq->eqn, eq->cons_index);
+ break;
+ }
+
+ eq->event_type = event_type;
+ eq->sub_type = sub_type;
+ ++eq->cons_index;
+ aeqe_found = IRQ_HANDLED;
+
+ hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);
+
+ aeqe = next_aeqe_sw_v2(eq);
+ }
+
+ update_eq_db(eq);
+
+ return IRQ_RETVAL(aeqe_found);
+}
+
+static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
+{
+ struct hns_roce_ceqe *ceqe;
+
+ ceqe = hns_roce_buf_offset(eq->mtr.kmem,
+ (eq->cons_index & (eq->entries - 1)) *
+ eq->eqe_size);
+
+ return (hr_reg_read(ceqe, CEQE_OWNER) ^
+ !!(eq->cons_index & eq->entries)) ? ceqe : NULL;
+}
+
+static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
+{
+ struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
+ irqreturn_t ceqe_found = IRQ_NONE;
+ u32 cqn;
+
+ while (ceqe) {
+ /* Make sure we read CEQ entry after we have checked the
+ * ownership bit
+ */
+ dma_rmb();
+
+ cqn = hr_reg_read(ceqe, CEQE_CQN);
+
+ hns_roce_cq_completion(hr_dev, cqn);
+
+ ++eq->cons_index;
+ ceqe_found = IRQ_HANDLED;
+
+ ceqe = next_ceqe_sw_v2(eq);
+ }
+
+ update_eq_db(eq);
+
+ return IRQ_RETVAL(ceqe_found);
+}
+
+static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
+{
+ struct hns_roce_eq *eq = eq_ptr;
+ struct hns_roce_dev *hr_dev = eq->hr_dev;
+ irqreturn_t int_work;
+
+ if (eq->type_flag == HNS_ROCE_CEQ)
+ /* Completion event interrupt */
+ int_work = hns_roce_v2_ceq_int(hr_dev, eq);
+ else
+ /* Asynchronous event interrupt */
+ int_work = hns_roce_v2_aeq_int(hr_dev, eq);
+
+ return IRQ_RETVAL(int_work);
+}
+
+static irqreturn_t abnormal_interrupt_basic(struct hns_roce_dev *hr_dev,
+ u32 int_st)
+{
+ struct pci_dev *pdev = hr_dev->pci_dev;
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+ const struct hnae3_ae_ops *ops = ae_dev->ops;
+ irqreturn_t int_work = IRQ_NONE;
+ u32 int_en;
+
+ int_en = roce_read(hr_dev, ROCEE_VF_ABN_INT_EN_REG);
+
+ if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) {
+ dev_err(hr_dev->dev, "AEQ overflow!\n");
+
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG,
+ 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S);
+
+ /* Set reset level for reset_event() */
+ if (ops->set_default_reset_request)
+ ops->set_default_reset_request(ae_dev,
+ HNAE3_FUNC_RESET);
+ if (ops->reset_event)
+ ops->reset_event(pdev, NULL);
+
+ int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S;
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
+
+ int_work = IRQ_HANDLED;
+ } else {
+ dev_err(hr_dev->dev, "there is no basic abn irq found.\n");
+ }
+
+ return IRQ_RETVAL(int_work);
+}
+
+static int fmea_ram_ecc_query(struct hns_roce_dev *hr_dev,
+ struct fmea_ram_ecc *ecc_info)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_QUERY_RAM_ECC, true);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+
+ ecc_info->is_ecc_err = hr_reg_read(req, QUERY_RAM_ECC_1BIT_ERR);
+ ecc_info->res_type = hr_reg_read(req, QUERY_RAM_ECC_RES_TYPE);
+ ecc_info->index = hr_reg_read(req, QUERY_RAM_ECC_TAG);
+
+ return 0;
+}
+
+static int fmea_recover_gmv(struct hns_roce_dev *hr_dev, u32 idx)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ u32 addr_upper;
+ u32 addr_low;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, true);
+ hr_reg_write(req, CFG_GMV_BT_IDX, idx);
+
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to execute cmd to read gmv, ret = %d.\n", ret);
+ return ret;
+ }
+
+ addr_low = hr_reg_read(req, CFG_GMV_BT_BA_L);
+ addr_upper = hr_reg_read(req, CFG_GMV_BT_BA_H);
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, false);
+ hr_reg_write(req, CFG_GMV_BT_BA_L, addr_low);
+ hr_reg_write(req, CFG_GMV_BT_BA_H, addr_upper);
+ hr_reg_write(req, CFG_GMV_BT_IDX, idx);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static u64 fmea_get_ram_res_addr(u32 res_type, __le64 *data)
+{
+ if (res_type == ECC_RESOURCE_QPC_TIMER ||
+ res_type == ECC_RESOURCE_CQC_TIMER ||
+ res_type == ECC_RESOURCE_SCCC)
+ return le64_to_cpu(*data);
+
+ return le64_to_cpu(*data) << PAGE_SHIFT;
+}
+
+static int fmea_recover_others(struct hns_roce_dev *hr_dev, u32 res_type,
+ u32 index)
+{
+ u8 write_bt0_op = fmea_ram_res[res_type].write_bt0_op;
+ u8 read_bt0_op = fmea_ram_res[res_type].read_bt0_op;
+ struct hns_roce_cmd_mailbox *mailbox;
+ u64 addr;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, read_bt0_op, index);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to execute cmd to read fmea ram, ret = %d.\n",
+ ret);
+ goto out;
+ }
+
+ addr = fmea_get_ram_res_addr(res_type, mailbox->buf);
+
+ ret = hns_roce_cmd_mbox(hr_dev, addr, 0, write_bt0_op, index);
+ if (ret)
+ dev_err(hr_dev->dev,
+ "failed to execute cmd to write fmea ram, ret = %d.\n",
+ ret);
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static void fmea_ram_ecc_recover(struct hns_roce_dev *hr_dev,
+ struct fmea_ram_ecc *ecc_info)
+{
+ u32 res_type = ecc_info->res_type;
+ u32 index = ecc_info->index;
+ int ret;
+
+ BUILD_BUG_ON(ARRAY_SIZE(fmea_ram_res) != ECC_RESOURCE_COUNT);
+
+ if (res_type >= ECC_RESOURCE_COUNT) {
+ dev_err(hr_dev->dev, "unsupported fmea ram ecc type %u.\n",
+ res_type);
+ return;
+ }
+
+ if (res_type == ECC_RESOURCE_GMV)
+ ret = fmea_recover_gmv(hr_dev, index);
+ else
+ ret = fmea_recover_others(hr_dev, res_type, index);
+ if (ret)
+ dev_err(hr_dev->dev,
+ "failed to recover %s, index = %u, ret = %d.\n",
+ fmea_ram_res[res_type].name, index, ret);
+}
+
+static void fmea_ram_ecc_work(struct work_struct *ecc_work)
+{
+ struct hns_roce_dev *hr_dev =
+ container_of(ecc_work, struct hns_roce_dev, ecc_work);
+ struct fmea_ram_ecc ecc_info = {};
+
+ if (fmea_ram_ecc_query(hr_dev, &ecc_info)) {
+ dev_err(hr_dev->dev, "failed to query fmea ram ecc.\n");
+ return;
+ }
+
+ if (!ecc_info.is_ecc_err) {
+ dev_err(hr_dev->dev, "there is no fmea ram ecc err found.\n");
+ return;
+ }
+
+ fmea_ram_ecc_recover(hr_dev, &ecc_info);
+}
+
+static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
+{
+ struct hns_roce_dev *hr_dev = dev_id;
+ irqreturn_t int_work = IRQ_NONE;
+ u32 int_st;
+
+ int_st = roce_read(hr_dev, ROCEE_VF_ABN_INT_ST_REG);
+
+ if (int_st) {
+ int_work = abnormal_interrupt_basic(hr_dev, int_st);
+ } else if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ queue_work(hr_dev->irq_workq, &hr_dev->ecc_work);
+ int_work = IRQ_HANDLED;
+ } else {
+ dev_err(hr_dev->dev, "there is no abnormal irq found.\n");
+ }
+
+ return IRQ_RETVAL(int_work);
+}
+
+static void hns_roce_v2_int_mask_enable(struct hns_roce_dev *hr_dev,
+ int eq_num, u32 enable_flag)
+{
+ int i;
+
+ for (i = 0; i < eq_num; i++)
+ roce_write(hr_dev, ROCEE_VF_EVENT_INT_EN_REG +
+ i * EQ_REG_OFFSET, enable_flag);
+
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, enable_flag);
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_CFG_REG, enable_flag);
+}
+
+static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, u32 eqn)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+ u8 cmd;
+
+ if (eqn < hr_dev->caps.num_comp_vectors)
+ cmd = HNS_ROCE_CMD_DESTROY_CEQC;
+ else
+ cmd = HNS_ROCE_CMD_DESTROY_AEQC;
+
+ ret = hns_roce_destroy_hw_ctx(hr_dev, cmd, eqn & HNS_ROCE_V2_EQN_M);
+ if (ret)
+ dev_err(dev, "[mailbox cmd] destroy eqc(%u) failed.\n", eqn);
+}
+
+static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
+{
+ hns_roce_mtr_destroy(hr_dev, &eq->mtr);
+}
+
+static void init_eq_config(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
+{
+ eq->db_reg = hr_dev->reg_base + ROCEE_VF_EQ_DB_CFG0_REG;
+ eq->cons_index = 0;
+ eq->over_ignore = HNS_ROCE_V2_EQ_OVER_IGNORE_0;
+ eq->coalesce = HNS_ROCE_V2_EQ_COALESCE_0;
+ eq->arm_st = HNS_ROCE_V2_EQ_ALWAYS_ARMED;
+ eq->shift = ilog2((unsigned int)eq->entries);
+}
+
+static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
+ void *mb_buf)
+{
+ u64 eqe_ba[MTT_MIN_COUNT] = { 0 };
+ struct hns_roce_eq_context *eqc;
+ u64 bt_ba = 0;
+ int count;
+
+ eqc = mb_buf;
+ memset(eqc, 0, sizeof(struct hns_roce_eq_context));
+
+ init_eq_config(hr_dev, eq);
+
+ /* if not multi-hop, eqe buffer only use one trunk */
+ count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, eqe_ba, MTT_MIN_COUNT,
+ &bt_ba);
+ if (count < 1) {
+ dev_err(hr_dev->dev, "failed to find EQE mtr\n");
+ return -ENOBUFS;
+ }
+
+ hr_reg_write(eqc, EQC_EQ_ST, HNS_ROCE_V2_EQ_STATE_VALID);
+ hr_reg_write(eqc, EQC_EQE_HOP_NUM, eq->hop_num);
+ hr_reg_write(eqc, EQC_OVER_IGNORE, eq->over_ignore);
+ hr_reg_write(eqc, EQC_COALESCE, eq->coalesce);
+ hr_reg_write(eqc, EQC_ARM_ST, eq->arm_st);
+ hr_reg_write(eqc, EQC_EQN, eq->eqn);
+ hr_reg_write(eqc, EQC_EQE_CNT, HNS_ROCE_EQ_INIT_EQE_CNT);
+ hr_reg_write(eqc, EQC_EQE_BA_PG_SZ,
+ to_hr_hw_page_shift(eq->mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(eqc, EQC_EQE_BUF_PG_SZ,
+ to_hr_hw_page_shift(eq->mtr.hem_cfg.buf_pg_shift));
+ hr_reg_write(eqc, EQC_EQ_PROD_INDX, HNS_ROCE_EQ_INIT_PROD_IDX);
+ hr_reg_write(eqc, EQC_EQ_MAX_CNT, eq->eq_max_cnt);
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ if (eq->eq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) {
+ dev_info(hr_dev->dev, "eq_period(%u) reached the upper limit, adjusted to 65.\n",
+ eq->eq_period);
+ eq->eq_period = HNS_ROCE_MAX_EQ_PERIOD;
+ }
+ eq->eq_period *= HNS_ROCE_CLOCK_ADJUST;
+ }
+
+ hr_reg_write(eqc, EQC_EQ_PERIOD, eq->eq_period);
+ hr_reg_write(eqc, EQC_EQE_REPORT_TIMER, HNS_ROCE_EQ_INIT_REPORT_TIMER);
+ hr_reg_write(eqc, EQC_EQE_BA_L, bt_ba >> 3);
+ hr_reg_write(eqc, EQC_EQE_BA_H, bt_ba >> 35);
+ hr_reg_write(eqc, EQC_SHIFT, eq->shift);
+ hr_reg_write(eqc, EQC_MSI_INDX, HNS_ROCE_EQ_INIT_MSI_IDX);
+ hr_reg_write(eqc, EQC_CUR_EQE_BA_L, eqe_ba[0] >> 12);
+ hr_reg_write(eqc, EQC_CUR_EQE_BA_M, eqe_ba[0] >> 28);
+ hr_reg_write(eqc, EQC_CUR_EQE_BA_H, eqe_ba[0] >> 60);
+ hr_reg_write(eqc, EQC_EQ_CONS_INDX, HNS_ROCE_EQ_INIT_CONS_IDX);
+ hr_reg_write(eqc, EQC_NEX_EQE_BA_L, eqe_ba[1] >> 12);
+ hr_reg_write(eqc, EQC_NEX_EQE_BA_H, eqe_ba[1] >> 44);
+ hr_reg_write(eqc, EQC_EQE_SIZE, eq->eqe_size == HNS_ROCE_V3_EQE_SIZE);
+
+ return 0;
+}
+
+static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
+{
+ struct hns_roce_buf_attr buf_attr = {};
+ int err;
+
+ if (hr_dev->caps.eqe_hop_num == HNS_ROCE_HOP_NUM_0)
+ eq->hop_num = 0;
+ else
+ eq->hop_num = hr_dev->caps.eqe_hop_num;
+
+ buf_attr.page_shift = hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT;
+ buf_attr.region[0].size = eq->entries * eq->eqe_size;
+ buf_attr.region[0].hopnum = eq->hop_num;
+ buf_attr.region_count = 1;
+
+ err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr,
+ hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT, NULL,
+ 0);
+ if (err)
+ dev_err(hr_dev->dev, "failed to alloc EQE mtr, err %d\n", err);
+
+ return err;
+}
+
+static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq, u8 eq_cmd)
+{
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ /* Allocate mailbox memory */
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ret = alloc_eq_buf(hr_dev, eq);
+ if (ret)
+ goto free_cmd_mbox;
+
+ ret = config_eqc(hr_dev, eq, mailbox->buf);
+ if (ret)
+ goto err_cmd_mbox;
+
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, eq_cmd, eq->eqn);
+ if (ret) {
+ dev_err(hr_dev->dev, "[mailbox cmd] create eqc failed.\n");
+ goto err_cmd_mbox;
+ }
+
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return 0;
+
+err_cmd_mbox:
+ free_eq_buf(hr_dev, eq);
+
+free_cmd_mbox:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
+static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
+ int comp_num, int aeq_num, int other_num)
+{
+ struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
+ int i, j;
+ int ret;
+
+ for (i = 0; i < irq_num; i++) {
+ hr_dev->irq_names[i] = kzalloc(HNS_ROCE_INT_NAME_LEN,
+ GFP_KERNEL);
+ if (!hr_dev->irq_names[i]) {
+ ret = -ENOMEM;
+ goto err_kzalloc_failed;
+ }
+ }
+
+ /* irq contains: abnormal + AEQ + CEQ */
+ for (j = 0; j < other_num; j++)
+ snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN,
+ "hns-abn-%d", j);
+
+ for (j = other_num; j < (other_num + aeq_num); j++)
+ snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN,
+ "hns-aeq-%d", j - other_num);
+
+ for (j = (other_num + aeq_num); j < irq_num; j++)
+ snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN,
+ "hns-ceq-%d", j - other_num - aeq_num);
+
+ for (j = 0; j < irq_num; j++) {
+ if (j < other_num)
+ ret = request_irq(hr_dev->irq[j],
+ hns_roce_v2_msix_interrupt_abn,
+ 0, hr_dev->irq_names[j], hr_dev);
+
+ else if (j < (other_num + comp_num))
+ ret = request_irq(eq_table->eq[j - other_num].irq,
+ hns_roce_v2_msix_interrupt_eq,
+ 0, hr_dev->irq_names[j + aeq_num],
+ &eq_table->eq[j - other_num]);
+ else
+ ret = request_irq(eq_table->eq[j - other_num].irq,
+ hns_roce_v2_msix_interrupt_eq,
+ 0, hr_dev->irq_names[j - comp_num],
+ &eq_table->eq[j - other_num]);
+ if (ret) {
+ dev_err(hr_dev->dev, "request irq error!\n");
+ goto err_request_failed;
+ }
+ }
+
+ return 0;
+
+err_request_failed:
+ for (j -= 1; j >= 0; j--)
+ if (j < other_num)
+ free_irq(hr_dev->irq[j], hr_dev);
+ else
+ free_irq(eq_table->eq[j - other_num].irq,
+ &eq_table->eq[j - other_num]);
+
+err_kzalloc_failed:
+ for (i -= 1; i >= 0; i--)
+ kfree(hr_dev->irq_names[i]);
+
+ return ret;
+}
+
+static void __hns_roce_free_irq(struct hns_roce_dev *hr_dev)
+{
+ int irq_num;
+ int eq_num;
+ int i;
+
+ eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
+ irq_num = eq_num + hr_dev->caps.num_other_vectors;
+
+ for (i = 0; i < hr_dev->caps.num_other_vectors; i++)
+ free_irq(hr_dev->irq[i], hr_dev);
+
+ for (i = 0; i < eq_num; i++)
+ free_irq(hr_dev->eq_table.eq[i].irq, &hr_dev->eq_table.eq[i]);
+
+ for (i = 0; i < irq_num; i++)
+ kfree(hr_dev->irq_names[i]);
+}
+
+static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
+ struct device *dev = hr_dev->dev;
+ struct hns_roce_eq *eq;
+ int other_num;
+ int comp_num;
+ int aeq_num;
+ int irq_num;
+ int eq_num;
+ u8 eq_cmd;
+ int ret;
+ int i;
+
+ other_num = hr_dev->caps.num_other_vectors;
+ comp_num = hr_dev->caps.num_comp_vectors;
+ aeq_num = hr_dev->caps.num_aeq_vectors;
+
+ eq_num = comp_num + aeq_num;
+ irq_num = eq_num + other_num;
+
+ eq_table->eq = kcalloc(eq_num, sizeof(*eq_table->eq), GFP_KERNEL);
+ if (!eq_table->eq)
+ return -ENOMEM;
+
+ /* create eq */
+ for (i = 0; i < eq_num; i++) {
+ eq = &eq_table->eq[i];
+ eq->hr_dev = hr_dev;
+ eq->eqn = i;
+ if (i < comp_num) {
+ /* CEQ */
+ eq_cmd = HNS_ROCE_CMD_CREATE_CEQC;
+ eq->type_flag = HNS_ROCE_CEQ;
+ eq->entries = hr_dev->caps.ceqe_depth;
+ eq->eqe_size = hr_dev->caps.ceqe_size;
+ eq->irq = hr_dev->irq[i + other_num + aeq_num];
+ eq->eq_max_cnt = HNS_ROCE_CEQ_DEFAULT_BURST_NUM;
+ eq->eq_period = HNS_ROCE_CEQ_DEFAULT_INTERVAL;
+ } else {
+ /* AEQ */
+ eq_cmd = HNS_ROCE_CMD_CREATE_AEQC;
+ eq->type_flag = HNS_ROCE_AEQ;
+ eq->entries = hr_dev->caps.aeqe_depth;
+ eq->eqe_size = hr_dev->caps.aeqe_size;
+ eq->irq = hr_dev->irq[i - comp_num + other_num];
+ eq->eq_max_cnt = HNS_ROCE_AEQ_DEFAULT_BURST_NUM;
+ eq->eq_period = HNS_ROCE_AEQ_DEFAULT_INTERVAL;
+ }
+
+ ret = hns_roce_v2_create_eq(hr_dev, eq, eq_cmd);
+ if (ret) {
+ dev_err(dev, "failed to create eq.\n");
+ goto err_create_eq_fail;
+ }
+ }
+
+ INIT_WORK(&hr_dev->ecc_work, fmea_ram_ecc_work);
+
+ hr_dev->irq_workq = alloc_ordered_workqueue("hns_roce_irq_workq", 0);
+ if (!hr_dev->irq_workq) {
+ dev_err(dev, "failed to create irq workqueue.\n");
+ ret = -ENOMEM;
+ goto err_create_eq_fail;
+ }
+
+ ret = __hns_roce_request_irq(hr_dev, irq_num, comp_num, aeq_num,
+ other_num);
+ if (ret) {
+ dev_err(dev, "failed to request irq.\n");
+ goto err_request_irq_fail;
+ }
+
+ /* enable irq */
+ hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_ENABLE);
+
+ return 0;
+
+err_request_irq_fail:
+ destroy_workqueue(hr_dev->irq_workq);
+
+err_create_eq_fail:
+ for (i -= 1; i >= 0; i--)
+ free_eq_buf(hr_dev, &eq_table->eq[i]);
+ kfree(eq_table->eq);
+
+ return ret;
+}
+
+static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
+ int eq_num;
+ int i;
+
+ eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
+
+ /* Disable irq */
+ hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_DISABLE);
+
+ __hns_roce_free_irq(hr_dev);
+ destroy_workqueue(hr_dev->irq_workq);
+
+ for (i = 0; i < eq_num; i++) {
+ hns_roce_v2_destroy_eqc(hr_dev, i);
+
+ free_eq_buf(hr_dev, &eq_table->eq[i]);
+ }
+
+ kfree(eq_table->eq);
+}
+
+static const struct ib_device_ops hns_roce_v2_dev_ops = {
+ .destroy_qp = hns_roce_v2_destroy_qp,
+ .modify_cq = hns_roce_v2_modify_cq,
+ .poll_cq = hns_roce_v2_poll_cq,
+ .post_recv = hns_roce_v2_post_recv,
+ .post_send = hns_roce_v2_post_send,
+ .query_qp = hns_roce_v2_query_qp,
+ .req_notify_cq = hns_roce_v2_req_notify_cq,
+};
+
+static const struct ib_device_ops hns_roce_v2_dev_srq_ops = {
+ .modify_srq = hns_roce_v2_modify_srq,
+ .post_srq_recv = hns_roce_v2_post_srq_recv,
+ .query_srq = hns_roce_v2_query_srq,
+};
+
+static const struct hns_roce_hw hns_roce_hw_v2 = {
+ .cmq_init = hns_roce_v2_cmq_init,
+ .cmq_exit = hns_roce_v2_cmq_exit,
+ .hw_profile = hns_roce_v2_profile,
+ .hw_init = hns_roce_v2_init,
+ .hw_exit = hns_roce_v2_exit,
+ .post_mbox = v2_post_mbox,
+ .poll_mbox_done = v2_poll_mbox_done,
+ .chk_mbox_avail = v2_chk_mbox_is_avail,
+ .set_gid = hns_roce_v2_set_gid,
+ .set_mac = hns_roce_v2_set_mac,
+ .write_mtpt = hns_roce_v2_write_mtpt,
+ .rereg_write_mtpt = hns_roce_v2_rereg_write_mtpt,
+ .frmr_write_mtpt = hns_roce_v2_frmr_write_mtpt,
+ .mw_write_mtpt = hns_roce_v2_mw_write_mtpt,
+ .write_cqc = hns_roce_v2_write_cqc,
+ .set_hem = hns_roce_v2_set_hem,
+ .clear_hem = hns_roce_v2_clear_hem,
+ .modify_qp = hns_roce_v2_modify_qp,
+ .dereg_mr = hns_roce_v2_dereg_mr,
+ .qp_flow_control_init = hns_roce_v2_qp_flow_control_init,
+ .init_eq = hns_roce_v2_init_eq_table,
+ .cleanup_eq = hns_roce_v2_cleanup_eq_table,
+ .write_srqc = hns_roce_v2_write_srqc,
+ .query_cqc = hns_roce_v2_query_cqc,
+ .query_qpc = hns_roce_v2_query_qpc,
+ .query_mpt = hns_roce_v2_query_mpt,
+ .hns_roce_dev_ops = &hns_roce_v2_dev_ops,
+ .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops,
+};
+
+static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0},
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0},
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_200G_RDMA), 0},
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_RDMA_DCB_PFC_VF),
+ HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
+ /* required last entry */
+ {0, }
+};
+
+MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl);
+
+static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
+ struct hnae3_handle *handle)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ const struct pci_device_id *id;
+ int i;
+
+ hr_dev->pci_dev = handle->pdev;
+ id = pci_match_id(hns_roce_hw_v2_pci_tbl, hr_dev->pci_dev);
+ hr_dev->is_vf = id->driver_data;
+ hr_dev->dev = &handle->pdev->dev;
+ hr_dev->hw = &hns_roce_hw_v2;
+ hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG;
+ hr_dev->odb_offset = hr_dev->sdb_offset;
+
+ /* Get info from NIC driver. */
+ hr_dev->reg_base = handle->rinfo.roce_io_base;
+ hr_dev->mem_base = handle->rinfo.roce_mem_base;
+ hr_dev->caps.num_ports = 1;
+ hr_dev->iboe.netdevs[0] = handle->rinfo.netdev;
+ hr_dev->iboe.phy_port[0] = 0;
+
+ addrconf_addr_eui48((u8 *)&hr_dev->ib_dev.node_guid,
+ hr_dev->iboe.netdevs[0]->dev_addr);
+
+ for (i = 0; i < handle->rinfo.num_vectors; i++)
+ hr_dev->irq[i] = pci_irq_vector(handle->pdev,
+ i + handle->rinfo.base_vector);
+
+ /* cmd issue mode: 0 is poll, 1 is event */
+ hr_dev->cmd_mod = 1;
+ hr_dev->loop_idc = 0;
+
+ hr_dev->reset_cnt = handle->ae_algo->ops->ae_dev_reset_cnt(handle);
+ priv->handle = handle;
+}
+
+static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
+{
+ struct hns_roce_dev *hr_dev;
+ int ret;
+
+ hr_dev = ib_alloc_device(hns_roce_dev, ib_dev);
+ if (!hr_dev)
+ return -ENOMEM;
+
+ hr_dev->priv = kzalloc(sizeof(struct hns_roce_v2_priv), GFP_KERNEL);
+ if (!hr_dev->priv) {
+ ret = -ENOMEM;
+ goto error_failed_kzalloc;
+ }
+
+ hns_roce_hw_v2_get_cfg(hr_dev, handle);
+
+ ret = hns_roce_init(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev, "RoCE Engine init failed!\n");
+ goto error_failed_roce_init;
+ }
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ ret = free_mr_init(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to init free mr!\n");
+ goto error_failed_free_mr_init;
+ }
+ }
+
+ handle->priv = hr_dev;
+
+ return 0;
+
+error_failed_free_mr_init:
+ hns_roce_exit(hr_dev);
+
+error_failed_roce_init:
+ kfree(hr_dev->priv);
+
+error_failed_kzalloc:
+ ib_dealloc_device(&hr_dev->ib_dev);
+
+ return ret;
+}
+
+static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
+ bool reset)
+{
+ struct hns_roce_dev *hr_dev = handle->priv;
+
+ if (!hr_dev)
+ return;
+
+ handle->priv = NULL;
+
+ hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT;
+ hns_roce_handle_device_err(hr_dev);
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ free_mr_exit(hr_dev);
+
+ hns_roce_exit(hr_dev);
+ kfree(hr_dev->priv);
+ ib_dealloc_device(&hr_dev->ib_dev);
+}
+
+static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
+{
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ const struct pci_device_id *id;
+ struct device *dev = &handle->pdev->dev;
+ int ret;
+
+ handle->rinfo.instance_state = HNS_ROCE_STATE_INIT;
+
+ if (ops->ae_dev_resetting(handle) || ops->get_hw_reset_stat(handle)) {
+ handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
+ goto reset_chk_err;
+ }
+
+ id = pci_match_id(hns_roce_hw_v2_pci_tbl, handle->pdev);
+ if (!id)
+ return 0;
+
+ if (id->driver_data && handle->pdev->revision == PCI_REVISION_ID_HIP08)
+ return 0;
+
+ ret = __hns_roce_hw_v2_init_instance(handle);
+ if (ret) {
+ handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
+ dev_err(dev, "RoCE instance init failed! ret = %d\n", ret);
+ if (ops->ae_dev_resetting(handle) ||
+ ops->get_hw_reset_stat(handle))
+ goto reset_chk_err;
+ else
+ return ret;
+ }
+
+ handle->rinfo.instance_state = HNS_ROCE_STATE_INITED;
+
+ return 0;
+
+reset_chk_err:
+ dev_err(dev, "Device is busy in resetting state.\n"
+ "please retry later.\n");
+
+ return -EBUSY;
+}
+
+static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
+ bool reset)
+{
+ if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED)
+ return;
+
+ handle->rinfo.instance_state = HNS_ROCE_STATE_UNINIT;
+
+ __hns_roce_hw_v2_uninit_instance(handle, reset);
+
+ handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
+}
+static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
+{
+ struct hns_roce_dev *hr_dev;
+
+ if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) {
+ set_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state);
+ return 0;
+ }
+
+ handle->rinfo.reset_state = HNS_ROCE_STATE_RST_DOWN;
+ clear_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state);
+
+ hr_dev = handle->priv;
+ if (!hr_dev)
+ return 0;
+
+ hr_dev->active = false;
+ hr_dev->dis_db = true;
+ hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN;
+
+ return 0;
+}
+
+static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle)
+{
+ struct device *dev = &handle->pdev->dev;
+ int ret;
+
+ if (test_and_clear_bit(HNS_ROCE_RST_DIRECT_RETURN,
+ &handle->rinfo.state)) {
+ handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED;
+ return 0;
+ }
+
+ handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INIT;
+
+ dev_info(&handle->pdev->dev, "In reset process RoCE client reinit.\n");
+ ret = __hns_roce_hw_v2_init_instance(handle);
+ if (ret) {
+ /* when reset notify type is HNAE3_INIT_CLIENT In reset notify
+ * callback function, RoCE Engine reinitialize. If RoCE reinit
+ * failed, we should inform NIC driver.
+ */
+ handle->priv = NULL;
+ dev_err(dev, "In reset process RoCE reinit failed %d.\n", ret);
+ } else {
+ handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED;
+ dev_info(dev, "reset done, RoCE client reinit finished.\n");
+ }
+
+ return ret;
+}
+
+static int hns_roce_hw_v2_reset_notify_uninit(struct hnae3_handle *handle)
+{
+ if (test_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state))
+ return 0;
+
+ handle->rinfo.reset_state = HNS_ROCE_STATE_RST_UNINIT;
+ dev_info(&handle->pdev->dev, "In reset process RoCE client uninit.\n");
+ msleep(HNS_ROCE_V2_HW_RST_UNINT_DELAY);
+ __hns_roce_hw_v2_uninit_instance(handle, false);
+
+ return 0;
+}
+
+static int hns_roce_hw_v2_reset_notify(struct hnae3_handle *handle,
+ enum hnae3_reset_notify_type type)
+{
+ int ret = 0;
+
+ switch (type) {
+ case HNAE3_DOWN_CLIENT:
+ ret = hns_roce_hw_v2_reset_notify_down(handle);
+ break;
+ case HNAE3_INIT_CLIENT:
+ ret = hns_roce_hw_v2_reset_notify_init(handle);
+ break;
+ case HNAE3_UNINIT_CLIENT:
+ ret = hns_roce_hw_v2_reset_notify_uninit(handle);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static const struct hnae3_client_ops hns_roce_hw_v2_ops = {
+ .init_instance = hns_roce_hw_v2_init_instance,
+ .uninit_instance = hns_roce_hw_v2_uninit_instance,
+ .reset_notify = hns_roce_hw_v2_reset_notify,
+};
+
+static struct hnae3_client hns_roce_hw_v2_client = {
+ .name = "hns_roce_hw_v2",
+ .type = HNAE3_CLIENT_ROCE,
+ .ops = &hns_roce_hw_v2_ops,
+};
+
+static int __init hns_roce_hw_v2_init(void)
+{
+ return hnae3_register_client(&hns_roce_hw_v2_client);
+}
+
+static void __exit hns_roce_hw_v2_exit(void)
+{
+ hnae3_unregister_client(&hns_roce_hw_v2_client);
+}
+
+module_init(hns_roce_hw_v2_init);
+module_exit(hns_roce_hw_v2_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Wei Hu <xavier.huwei@huawei.com>");
+MODULE_AUTHOR("Lijun Ou <oulijun@huawei.com>");
+MODULE_AUTHOR("Shaobo Xu <xushaobo2@huawei.com>");
+MODULE_DESCRIPTION("Hisilicon Hip08 Family RoCE Driver");
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
new file mode 100644
index 000000000..2b4dbbb06
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -0,0 +1,1482 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _HNS_ROCE_HW_V2_H
+#define _HNS_ROCE_HW_V2_H
+
+#include <linux/bitops.h>
+
+#define HNS_ROCE_V2_MAX_QP_NUM 0x1000
+#define HNS_ROCE_V2_MAX_WQE_NUM 0x8000
+#define HNS_ROCE_V2_MAX_SRQ_WR 0x8000
+#define HNS_ROCE_V2_MAX_SRQ_SGE 64
+#define HNS_ROCE_V2_MAX_CQ_NUM 0x100000
+#define HNS_ROCE_V2_MAX_QPC_TIMER_BT_NUM 0x100
+#define HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM 0x100
+#define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000
+#define HNS_ROCE_V2_MAX_CQE_NUM 0x400000
+#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 64
+#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 64
+#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20
+#define HNS_ROCE_V3_MAX_SQ_INLINE 0x400
+#define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32
+#define HNS_ROCE_V2_UAR_NUM 256
+#define HNS_ROCE_V2_PHY_UAR_NUM 1
+#define HNS_ROCE_V2_AEQE_VEC_NUM 1
+#define HNS_ROCE_V2_ABNORMAL_VEC_NUM 1
+#define HNS_ROCE_V2_MAX_MTPT_NUM 0x100000
+#define HNS_ROCE_V2_MAX_SRQWQE_SEGS 0x1000000
+#define HNS_ROCE_V2_MAX_IDX_SEGS 0x1000000
+#define HNS_ROCE_V2_MAX_PD_NUM 0x1000000
+#define HNS_ROCE_V2_MAX_XRCD_NUM 0x1000000
+#define HNS_ROCE_V2_RSV_XRCD_NUM 0
+#define HNS_ROCE_V2_MAX_QP_INIT_RDMA 128
+#define HNS_ROCE_V2_MAX_QP_DEST_RDMA 128
+#define HNS_ROCE_V2_MAX_SQ_DESC_SZ 64
+#define HNS_ROCE_V2_MAX_RQ_DESC_SZ 16
+#define HNS_ROCE_V2_IRRL_ENTRY_SZ 64
+#define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ 100
+#define HNS_ROCE_V2_CQC_ENTRY_SZ 64
+#define HNS_ROCE_V2_SRQC_ENTRY_SZ 64
+#define HNS_ROCE_V2_MTPT_ENTRY_SZ 64
+#define HNS_ROCE_V2_MTT_ENTRY_SZ 64
+#define HNS_ROCE_V2_IDX_ENTRY_SZ 4
+
+#define HNS_ROCE_V2_SCCC_SZ 32
+#define HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08 10
+
+#define HNS_ROCE_V3_SCCC_SZ 64
+#define HNS_ROCE_V3_GMV_ENTRY_SZ 32
+
+#define HNS_ROCE_V2_EXT_LLM_ENTRY_SZ 8
+#define HNS_ROCE_V2_EXT_LLM_MAX_DEPTH 4096
+
+#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE
+#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE
+#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFF000
+#define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2
+#define HNS_ROCE_INVALID_LKEY 0x0
+#define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000
+#define HNS_ROCE_CMQ_TX_TIMEOUT 30000
+#define HNS_ROCE_V2_RSV_QPS 8
+
+#define HNS_ROCE_V2_HW_RST_TIMEOUT 1000
+#define HNS_ROCE_V2_HW_RST_UNINT_DELAY 100
+
+#define HNS_ROCE_V2_HW_RST_COMPLETION_WAIT 20
+
+#define HNS_ROCE_CONTEXT_HOP_NUM 1
+#define HNS_ROCE_SCCC_HOP_NUM 1
+#define HNS_ROCE_MTT_HOP_NUM 1
+#define HNS_ROCE_CQE_HOP_NUM 1
+#define HNS_ROCE_SRQWQE_HOP_NUM 1
+#define HNS_ROCE_PBL_HOP_NUM 2
+#define HNS_ROCE_IDX_HOP_NUM 1
+#define HNS_ROCE_SQWQE_HOP_NUM 2
+#define HNS_ROCE_EXT_SGE_HOP_NUM 1
+#define HNS_ROCE_RQWQE_HOP_NUM 2
+
+#define HNS_ROCE_V2_EQE_HOP_NUM 2
+#define HNS_ROCE_V3_EQE_HOP_NUM 1
+
+#define HNS_ROCE_BA_PG_SZ_SUPPORTED_256K 6
+#define HNS_ROCE_BA_PG_SZ_SUPPORTED_16K 2
+#define HNS_ROCE_V2_GID_INDEX_NUM 16
+
+#define HNS_ROCE_V2_TABLE_CHUNK_SIZE (1 << 18)
+
+enum {
+ HNS_ROCE_CMD_FLAG_IN = BIT(0),
+ HNS_ROCE_CMD_FLAG_OUT = BIT(1),
+ HNS_ROCE_CMD_FLAG_NEXT = BIT(2),
+ HNS_ROCE_CMD_FLAG_WR = BIT(3),
+ HNS_ROCE_CMD_FLAG_ERR_INTR = BIT(5),
+};
+
+#define HNS_ROCE_CMQ_DESC_NUM_S 3
+
+#define HNS_ROCE_CMQ_SCC_CLR_DONE_CNT 5
+
+#define HNS_ROCE_CONG_SIZE 64
+
+#define check_whether_last_step(hop_num, step_idx) \
+ ((step_idx == 0 && hop_num == HNS_ROCE_HOP_NUM_0) || \
+ (step_idx == 1 && hop_num == 1) || \
+ (step_idx == 2 && hop_num == 2))
+#define HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT 0
+#define HNS_ICL_SWITCH_CMD_ROCEE_SEL BIT(HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT)
+
+#define CMD_CSQ_DESC_NUM 1024
+#define CMD_CRQ_DESC_NUM 1024
+
+/* Free mr used parameters */
+#define HNS_ROCE_FREE_MR_USED_CQE_NUM 128
+#define HNS_ROCE_FREE_MR_USED_QP_NUM 0x8
+#define HNS_ROCE_FREE_MR_USED_PSN 0x0808
+#define HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT 0x7
+#define HNS_ROCE_FREE_MR_USED_QP_TIMEOUT 0x12
+#define HNS_ROCE_FREE_MR_USED_SQWQE_NUM 128
+#define HNS_ROCE_FREE_MR_USED_SQSGE_NUM 0x2
+#define HNS_ROCE_FREE_MR_USED_RQWQE_NUM 128
+#define HNS_ROCE_FREE_MR_USED_RQSGE_NUM 0x2
+#define HNS_ROCE_V2_FREE_MR_TIMEOUT 4500
+
+enum {
+ NO_ARMED = 0x0,
+ REG_NXT_CEQE = 0x2,
+ REG_NXT_SE_CEQE = 0x3
+};
+
+enum {
+ CQE_SIZE_32B = 0x0,
+ CQE_SIZE_64B = 0x1
+};
+
+#define V2_CQ_DB_REQ_NOT_SOL 0
+#define V2_CQ_DB_REQ_NOT 1
+
+#define V2_CQ_STATE_VALID 1
+#define V2_QKEY_VAL 0x80010000
+
+#define GID_LEN_V2 16
+
+enum {
+ HNS_ROCE_V2_WQE_OP_SEND = 0x0,
+ HNS_ROCE_V2_WQE_OP_SEND_WITH_INV = 0x1,
+ HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM = 0x2,
+ HNS_ROCE_V2_WQE_OP_RDMA_WRITE = 0x3,
+ HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM = 0x4,
+ HNS_ROCE_V2_WQE_OP_RDMA_READ = 0x5,
+ HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP = 0x6,
+ HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD = 0x7,
+ HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP = 0x8,
+ HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD = 0x9,
+ HNS_ROCE_V2_WQE_OP_FAST_REG_PMR = 0xa,
+ HNS_ROCE_V2_WQE_OP_BIND_MW = 0xc,
+ HNS_ROCE_V2_WQE_OP_MASK = 0x1f,
+};
+
+enum {
+ /* rq operations */
+ HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM = 0x0,
+ HNS_ROCE_V2_OPCODE_SEND = 0x1,
+ HNS_ROCE_V2_OPCODE_SEND_WITH_IMM = 0x2,
+ HNS_ROCE_V2_OPCODE_SEND_WITH_INV = 0x3,
+};
+
+enum {
+ HNS_ROCE_V2_SQ_DB,
+ HNS_ROCE_V2_RQ_DB,
+ HNS_ROCE_V2_SRQ_DB,
+ HNS_ROCE_V2_CQ_DB,
+ HNS_ROCE_V2_CQ_DB_NOTIFY
+};
+
+enum {
+ HNS_ROCE_CQE_V2_SUCCESS = 0x00,
+ HNS_ROCE_CQE_V2_LOCAL_LENGTH_ERR = 0x01,
+ HNS_ROCE_CQE_V2_LOCAL_QP_OP_ERR = 0x02,
+ HNS_ROCE_CQE_V2_LOCAL_PROT_ERR = 0x04,
+ HNS_ROCE_CQE_V2_WR_FLUSH_ERR = 0x05,
+ HNS_ROCE_CQE_V2_MW_BIND_ERR = 0x06,
+ HNS_ROCE_CQE_V2_BAD_RESP_ERR = 0x10,
+ HNS_ROCE_CQE_V2_LOCAL_ACCESS_ERR = 0x11,
+ HNS_ROCE_CQE_V2_REMOTE_INVAL_REQ_ERR = 0x12,
+ HNS_ROCE_CQE_V2_REMOTE_ACCESS_ERR = 0x13,
+ HNS_ROCE_CQE_V2_REMOTE_OP_ERR = 0x14,
+ HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR = 0x15,
+ HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR = 0x16,
+ HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR = 0x22,
+ HNS_ROCE_CQE_V2_GENERAL_ERR = 0x23,
+
+ HNS_ROCE_V2_CQE_STATUS_MASK = 0xff,
+};
+
+/* CMQ command */
+enum hns_roce_opcode_type {
+ HNS_QUERY_FW_VER = 0x0001,
+ HNS_ROCE_OPC_QUERY_HW_VER = 0x8000,
+ HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001,
+ HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004,
+ HNS_ROCE_OPC_QUERY_PF_RES = 0x8400,
+ HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401,
+ HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403,
+ HNS_ROCE_OPC_QUERY_PF_TIMER_RES = 0x8406,
+ HNS_ROCE_OPC_QUERY_FUNC_INFO = 0x8407,
+ HNS_ROCE_OPC_QUERY_PF_CAPS_NUM = 0x8408,
+ HNS_ROCE_OPC_CFG_ENTRY_SIZE = 0x8409,
+ HNS_ROCE_OPC_CFG_SGID_TB = 0x8500,
+ HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501,
+ HNS_ROCE_OPC_POST_MB = 0x8504,
+ HNS_ROCE_OPC_QUERY_MB_ST = 0x8505,
+ HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506,
+ HNS_ROCE_OPC_FUNC_CLEAR = 0x8508,
+ HNS_ROCE_OPC_CLR_SCCC = 0x8509,
+ HNS_ROCE_OPC_QUERY_SCCC = 0x850a,
+ HNS_ROCE_OPC_RESET_SCCC = 0x850b,
+ HNS_ROCE_OPC_CLEAR_EXTDB_LIST_INFO = 0x850d,
+ HNS_ROCE_OPC_QUERY_VF_RES = 0x850e,
+ HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f,
+ HNS_ROCE_OPC_CFG_GMV_BT = 0x8510,
+ HNS_ROCE_OPC_EXT_CFG = 0x8512,
+ HNS_ROCE_QUERY_RAM_ECC = 0x8513,
+ HNS_SWITCH_PARAMETER_CFG = 0x1033,
+};
+
+enum {
+ TYPE_CRQ,
+ TYPE_CSQ,
+};
+
+enum hns_roce_cmd_return_status {
+ CMD_EXEC_SUCCESS,
+ CMD_NO_AUTH,
+ CMD_NOT_EXIST,
+ CMD_CRQ_FULL,
+ CMD_NEXT_ERR,
+ CMD_NOT_EXEC,
+ CMD_PARA_ERR,
+ CMD_RESULT_ERR,
+ CMD_TIMEOUT,
+ CMD_HILINK_ERR,
+ CMD_INFO_ILLEGAL,
+ CMD_INVALID,
+ CMD_ROH_CHECK_FAIL,
+ CMD_OTHER_ERR = 0xff
+};
+
+struct hns_roce_cmd_errcode {
+ enum hns_roce_cmd_return_status return_status;
+ int errno;
+};
+
+enum hns_roce_sgid_type {
+ GID_TYPE_FLAG_ROCE_V1 = 0,
+ GID_TYPE_FLAG_ROCE_V2_IPV4,
+ GID_TYPE_FLAG_ROCE_V2_IPV6,
+};
+
+struct hns_roce_v2_cq_context {
+ __le32 byte_4_pg_ceqn;
+ __le32 byte_8_cqn;
+ __le32 cqe_cur_blk_addr;
+ __le32 byte_16_hop_addr;
+ __le32 cqe_nxt_blk_addr;
+ __le32 byte_24_pgsz_addr;
+ __le32 byte_28_cq_pi;
+ __le32 byte_32_cq_ci;
+ __le32 cqe_ba;
+ __le32 byte_40_cqe_ba;
+ __le32 byte_44_db_record;
+ __le32 db_record_addr;
+ __le32 byte_52_cqe_cnt;
+ __le32 byte_56_cqe_period_maxcnt;
+ __le32 cqe_report_timer;
+ __le32 byte_64_se_cqe_idx;
+};
+
+#define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0
+#define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0
+
+#define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l)
+
+#define CQC_CQ_ST CQC_FIELD_LOC(1, 0)
+#define CQC_POLL CQC_FIELD_LOC(2, 2)
+#define CQC_SE CQC_FIELD_LOC(3, 3)
+#define CQC_OVER_IGNORE CQC_FIELD_LOC(4, 4)
+#define CQC_ARM_ST CQC_FIELD_LOC(7, 6)
+#define CQC_SHIFT CQC_FIELD_LOC(12, 8)
+#define CQC_CMD_SN CQC_FIELD_LOC(14, 13)
+#define CQC_CEQN CQC_FIELD_LOC(23, 15)
+#define CQC_CQN CQC_FIELD_LOC(55, 32)
+#define CQC_POE_EN CQC_FIELD_LOC(56, 56)
+#define CQC_POE_NUM CQC_FIELD_LOC(58, 57)
+#define CQC_CQE_SIZE CQC_FIELD_LOC(60, 59)
+#define CQC_CQ_CNT_MODE CQC_FIELD_LOC(61, 61)
+#define CQC_STASH CQC_FIELD_LOC(63, 63)
+#define CQC_CQE_CUR_BLK_ADDR_L CQC_FIELD_LOC(95, 64)
+#define CQC_CQE_CUR_BLK_ADDR_H CQC_FIELD_LOC(115, 96)
+#define CQC_POE_QID CQC_FIELD_LOC(125, 116)
+#define CQC_CQE_HOP_NUM CQC_FIELD_LOC(127, 126)
+#define CQC_CQE_NEX_BLK_ADDR_L CQC_FIELD_LOC(159, 128)
+#define CQC_CQE_NEX_BLK_ADDR_H CQC_FIELD_LOC(179, 160)
+#define CQC_CQE_BAR_PG_SZ CQC_FIELD_LOC(187, 184)
+#define CQC_CQE_BUF_PG_SZ CQC_FIELD_LOC(191, 188)
+#define CQC_CQ_PRODUCER_IDX CQC_FIELD_LOC(215, 192)
+#define CQC_CQ_CONSUMER_IDX CQC_FIELD_LOC(247, 224)
+#define CQC_CQE_BA_L CQC_FIELD_LOC(287, 256)
+#define CQC_CQE_BA_H CQC_FIELD_LOC(316, 288)
+#define CQC_POE_QID_H_0 CQC_FIELD_LOC(319, 317)
+#define CQC_DB_RECORD_EN CQC_FIELD_LOC(320, 320)
+#define CQC_CQE_DB_RECORD_ADDR_L CQC_FIELD_LOC(351, 321)
+#define CQC_CQE_DB_RECORD_ADDR_H CQC_FIELD_LOC(383, 352)
+#define CQC_CQE_CNT CQC_FIELD_LOC(407, 384)
+#define CQC_CQ_MAX_CNT CQC_FIELD_LOC(431, 416)
+#define CQC_CQ_PERIOD CQC_FIELD_LOC(447, 432)
+#define CQC_CQE_REPORT_TIMER CQC_FIELD_LOC(471, 448)
+#define CQC_WR_CQE_IDX CQC_FIELD_LOC(479, 472)
+#define CQC_SE_CQE_IDX CQC_FIELD_LOC(503, 480)
+#define CQC_POE_QID_H_1 CQC_FIELD_LOC(511, 511)
+
+struct hns_roce_srq_context {
+ __le32 data[16];
+};
+
+#define SRQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_srq_context, h, l)
+
+#define SRQC_SRQ_ST SRQC_FIELD_LOC(1, 0)
+#define SRQC_WQE_HOP_NUM SRQC_FIELD_LOC(3, 2)
+#define SRQC_SHIFT SRQC_FIELD_LOC(7, 4)
+#define SRQC_SRQN SRQC_FIELD_LOC(31, 8)
+#define SRQC_LIMIT_WL SRQC_FIELD_LOC(47, 32)
+#define SRQC_RSV0 SRQC_FIELD_LOC(63, 48)
+#define SRQC_XRCD SRQC_FIELD_LOC(87, 64)
+#define SRQC_RSV1 SRQC_FIELD_LOC(95, 88)
+#define SRQC_PRODUCER_IDX SRQC_FIELD_LOC(111, 96)
+#define SRQC_CONSUMER_IDX SRQC_FIELD_LOC(127, 112)
+#define SRQC_WQE_BT_BA_L SRQC_FIELD_LOC(159, 128)
+#define SRQC_WQE_BT_BA_H SRQC_FIELD_LOC(188, 160)
+#define SRQC_RSV2 SRQC_FIELD_LOC(190, 189)
+#define SRQC_SRQ_TYPE SRQC_FIELD_LOC(191, 191)
+#define SRQC_PD SRQC_FIELD_LOC(215, 192)
+#define SRQC_RQWS SRQC_FIELD_LOC(219, 216)
+#define SRQC_RSV3 SRQC_FIELD_LOC(223, 220)
+#define SRQC_IDX_BT_BA_L SRQC_FIELD_LOC(255, 224)
+#define SRQC_IDX_BT_BA_H SRQC_FIELD_LOC(284, 256)
+#define SRQC_RSV4 SRQC_FIELD_LOC(287, 285)
+#define SRQC_IDX_CUR_BLK_ADDR_L SRQC_FIELD_LOC(319, 288)
+#define SRQC_IDX_CUR_BLK_ADDR_H SRQC_FIELD_LOC(339, 320)
+#define SRQC_RSV5 SRQC_FIELD_LOC(341, 340)
+#define SRQC_IDX_HOP_NUM SRQC_FIELD_LOC(343, 342)
+#define SRQC_IDX_BA_PG_SZ SRQC_FIELD_LOC(347, 344)
+#define SRQC_IDX_BUF_PG_SZ SRQC_FIELD_LOC(351, 348)
+#define SRQC_IDX_NXT_BLK_ADDR_L SRQC_FIELD_LOC(383, 352)
+#define SRQC_IDX_NXT_BLK_ADDR_H SRQC_FIELD_LOC(403, 384)
+#define SRQC_RSV6 SRQC_FIELD_LOC(415, 404)
+#define SRQC_XRC_CQN SRQC_FIELD_LOC(439, 416)
+#define SRQC_WQE_BA_PG_SZ SRQC_FIELD_LOC(443, 440)
+#define SRQC_WQE_BUF_PG_SZ SRQC_FIELD_LOC(447, 444)
+#define SRQC_DB_RECORD_EN SRQC_FIELD_LOC(448, 448)
+#define SRQC_DB_RECORD_ADDR_L SRQC_FIELD_LOC(479, 449)
+#define SRQC_DB_RECORD_ADDR_H SRQC_FIELD_LOC(511, 480)
+
+enum {
+ V2_MPT_ST_VALID = 0x1,
+ V2_MPT_ST_FREE = 0x2,
+};
+
+enum hns_roce_v2_qp_state {
+ HNS_ROCE_QP_ST_RST,
+ HNS_ROCE_QP_ST_INIT,
+ HNS_ROCE_QP_ST_RTR,
+ HNS_ROCE_QP_ST_RTS,
+ HNS_ROCE_QP_ST_SQD,
+ HNS_ROCE_QP_ST_SQER,
+ HNS_ROCE_QP_ST_ERR,
+ HNS_ROCE_QP_ST_SQ_DRAINING,
+ HNS_ROCE_QP_NUM_ST
+};
+
+struct hns_roce_v2_qp_context_ex {
+ __le32 data[64];
+};
+
+struct hns_roce_v2_qp_context {
+ __le32 byte_4_sqpn_tst;
+ __le32 wqe_sge_ba;
+ __le32 byte_12_sq_hop;
+ __le32 byte_16_buf_ba_pg_sz;
+ __le32 byte_20_smac_sgid_idx;
+ __le32 byte_24_mtu_tc;
+ __le32 byte_28_at_fl;
+ u8 dgid[GID_LEN_V2];
+ __le32 dmac;
+ __le32 byte_52_udpspn_dmac;
+ __le32 byte_56_dqpn_err;
+ __le32 byte_60_qpst_tempid;
+ __le32 qkey_xrcd;
+ __le32 byte_68_rq_db;
+ __le32 rq_db_record_addr;
+ __le32 byte_76_srqn_op_en;
+ __le32 byte_80_rnr_rx_cqn;
+ __le32 byte_84_rq_ci_pi;
+ __le32 rq_cur_blk_addr;
+ __le32 byte_92_srq_info;
+ __le32 byte_96_rx_reqmsn;
+ __le32 rq_nxt_blk_addr;
+ __le32 byte_104_rq_sge;
+ __le32 byte_108_rx_reqepsn;
+ __le32 rq_rnr_timer;
+ __le32 rx_msg_len;
+ __le32 rx_rkey_pkt_info;
+ __le64 rx_va;
+ __le32 byte_132_trrl;
+ __le32 trrl_ba;
+ __le32 byte_140_raq;
+ __le32 byte_144_raq;
+ __le32 byte_148_raq;
+ __le32 byte_152_raq;
+ __le32 byte_156_raq;
+ __le32 byte_160_sq_ci_pi;
+ __le32 sq_cur_blk_addr;
+ __le32 byte_168_irrl_idx;
+ __le32 byte_172_sq_psn;
+ __le32 byte_176_msg_pktn;
+ __le32 sq_cur_sge_blk_addr;
+ __le32 byte_184_irrl_idx;
+ __le32 cur_sge_offset;
+ __le32 byte_192_ext_sge;
+ __le32 byte_196_sq_psn;
+ __le32 byte_200_sq_max;
+ __le32 irrl_ba;
+ __le32 byte_208_irrl;
+ __le32 byte_212_lsn;
+ __le32 sq_timer;
+ __le32 byte_220_retry_psn_msn;
+ __le32 byte_224_retry_msg;
+ __le32 rx_sq_cur_blk_addr;
+ __le32 byte_232_irrl_sge;
+ __le32 irrl_cur_sge_offset;
+ __le32 byte_240_irrl_tail;
+ __le32 byte_244_rnr_rxack;
+ __le32 byte_248_ack_psn;
+ __le32 byte_252_err_txcqn;
+ __le32 byte_256_sqflush_rqcqe;
+
+ struct hns_roce_v2_qp_context_ex ext;
+};
+
+#define QPC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_qp_context, h, l)
+
+#define QPC_TST QPC_FIELD_LOC(2, 0)
+#define QPC_SGE_SHIFT QPC_FIELD_LOC(7, 3)
+#define QPC_CNP_TIMER QPC_FIELD_LOC(31, 8)
+#define QPC_WQE_SGE_BA_L QPC_FIELD_LOC(63, 32)
+#define QPC_WQE_SGE_BA_H QPC_FIELD_LOC(92, 64)
+#define QPC_SQ_HOP_NUM QPC_FIELD_LOC(94, 93)
+#define QPC_CIRE_EN QPC_FIELD_LOC(95, 95)
+#define QPC_WQE_SGE_BA_PG_SZ QPC_FIELD_LOC(99, 96)
+#define QPC_WQE_SGE_BUF_PG_SZ QPC_FIELD_LOC(103, 100)
+#define QPC_PD QPC_FIELD_LOC(127, 104)
+#define QPC_RQ_HOP_NUM QPC_FIELD_LOC(129, 128)
+#define QPC_SGE_HOP_NUM QPC_FIELD_LOC(131, 130)
+#define QPC_RQWS QPC_FIELD_LOC(135, 132)
+#define QPC_SQ_SHIFT QPC_FIELD_LOC(139, 136)
+#define QPC_RQ_SHIFT QPC_FIELD_LOC(143, 140)
+#define QPC_GMV_IDX QPC_FIELD_LOC(159, 144)
+#define QPC_HOPLIMIT QPC_FIELD_LOC(167, 160)
+#define QPC_TC QPC_FIELD_LOC(175, 168)
+#define QPC_VLAN_ID QPC_FIELD_LOC(187, 176)
+#define QPC_MTU QPC_FIELD_LOC(191, 188)
+#define QPC_FL QPC_FIELD_LOC(211, 192)
+#define QPC_SL QPC_FIELD_LOC(215, 212)
+#define QPC_CNP_TX_FLAG QPC_FIELD_LOC(216, 216)
+#define QPC_CE_FLAG QPC_FIELD_LOC(217, 217)
+#define QPC_LBI QPC_FIELD_LOC(218, 218)
+#define QPC_AT QPC_FIELD_LOC(223, 219)
+#define QPC_DGID QPC_FIELD_LOC(351, 224)
+#define QPC_DMAC_L QPC_FIELD_LOC(383, 352)
+#define QPC_DMAC_H QPC_FIELD_LOC(399, 384)
+#define QPC_UDPSPN QPC_FIELD_LOC(415, 400)
+#define QPC_DQPN QPC_FIELD_LOC(439, 416)
+#define QPC_SQ_TX_ERR QPC_FIELD_LOC(440, 440)
+#define QPC_SQ_RX_ERR QPC_FIELD_LOC(441, 441)
+#define QPC_RQ_TX_ERR QPC_FIELD_LOC(442, 442)
+#define QPC_RQ_RX_ERR QPC_FIELD_LOC(443, 443)
+#define QPC_LP_PKTN_INI QPC_FIELD_LOC(447, 444)
+#define QPC_CONG_ALGO_TMPL_ID QPC_FIELD_LOC(455, 448)
+#define QPC_SCC_TOKEN QPC_FIELD_LOC(474, 456)
+#define QPC_SQ_DB_DOING QPC_FIELD_LOC(475, 475)
+#define QPC_RQ_DB_DOING QPC_FIELD_LOC(476, 476)
+#define QPC_QP_ST QPC_FIELD_LOC(479, 477)
+#define QPC_QKEY_XRCD QPC_FIELD_LOC(511, 480)
+#define QPC_RQ_RECORD_EN QPC_FIELD_LOC(512, 512)
+#define QPC_RQ_DB_RECORD_ADDR_L QPC_FIELD_LOC(543, 513)
+#define QPC_RQ_DB_RECORD_ADDR_H QPC_FIELD_LOC(575, 544)
+#define QPC_SRQN QPC_FIELD_LOC(599, 576)
+#define QPC_SRQ_EN QPC_FIELD_LOC(600, 600)
+#define QPC_RRE QPC_FIELD_LOC(601, 601)
+#define QPC_RWE QPC_FIELD_LOC(602, 602)
+#define QPC_ATE QPC_FIELD_LOC(603, 603)
+#define QPC_RQIE QPC_FIELD_LOC(604, 604)
+#define QPC_EXT_ATE QPC_FIELD_LOC(605, 605)
+#define QPC_RQ_VLAN_EN QPC_FIELD_LOC(606, 606)
+#define QPC_RQ_RTY_TX_ERR QPC_FIELD_LOC(607, 607)
+#define QPC_RX_CQN QPC_FIELD_LOC(631, 608)
+#define QPC_XRC_QP_TYPE QPC_FIELD_LOC(632, 632)
+#define QPC_RSV3 QPC_FIELD_LOC(634, 633)
+#define QPC_MIN_RNR_TIME QPC_FIELD_LOC(639, 635)
+#define QPC_RQ_PRODUCER_IDX QPC_FIELD_LOC(655, 640)
+#define QPC_RQ_CONSUMER_IDX QPC_FIELD_LOC(671, 656)
+#define QPC_RQ_CUR_BLK_ADDR_L QPC_FIELD_LOC(703, 672)
+#define QPC_RQ_CUR_BLK_ADDR_H QPC_FIELD_LOC(723, 704)
+#define QPC_SRQ_INFO QPC_FIELD_LOC(735, 724)
+#define QPC_RX_REQ_MSN QPC_FIELD_LOC(759, 736)
+#define QPC_REDUCE_CODE QPC_FIELD_LOC(766, 760)
+#define QPC_RX_XRC_PKT_CQE_FLG QPC_FIELD_LOC(767, 767)
+#define QPC_RQ_NXT_BLK_ADDR_L QPC_FIELD_LOC(799, 768)
+#define QPC_RQ_NXT_BLK_ADDR_H QPC_FIELD_LOC(819, 800)
+#define QPC_REDUCE_EN QPC_FIELD_LOC(820, 820)
+#define QPC_FLUSH_EN QPC_FIELD_LOC(821, 821)
+#define QPC_AW_EN QPC_FIELD_LOC(822, 822)
+#define QPC_WN_EN QPC_FIELD_LOC(823, 823)
+#define QPC_RQ_CUR_WQE_SGE_NUM QPC_FIELD_LOC(831, 824)
+#define QPC_INV_CREDIT QPC_FIELD_LOC(832, 832)
+#define QPC_LAST_WRITE_TYPE QPC_FIELD_LOC(834, 833)
+#define QPC_RX_REQ_PSN_ERR QPC_FIELD_LOC(835, 835)
+#define QPC_RX_REQ_LAST_OPTYPE QPC_FIELD_LOC(838, 836)
+#define QPC_RX_REQ_RNR QPC_FIELD_LOC(839, 839)
+#define QPC_RX_REQ_EPSN QPC_FIELD_LOC(863, 840)
+#define QPC_RQ_RNR_TIMER QPC_FIELD_LOC(895, 864)
+#define QPC_RX_MSG_LEN QPC_FIELD_LOC(927, 896)
+#define QPC_RX_RKEY_PKT_INFO QPC_FIELD_LOC(959, 928)
+#define QPC_RX_VA QPC_FIELD_LOC(1023, 960)
+#define QPC_TRRL_HEAD_MAX QPC_FIELD_LOC(1031, 1024)
+#define QPC_TRRL_TAIL_MAX QPC_FIELD_LOC(1039, 1032)
+#define QPC_TRRL_BA_L QPC_FIELD_LOC(1055, 1040)
+#define QPC_TRRL_BA_M QPC_FIELD_LOC(1087, 1056)
+#define QPC_TRRL_BA_H QPC_FIELD_LOC(1099, 1088)
+#define QPC_RR_MAX QPC_FIELD_LOC(1102, 1100)
+#define QPC_RQ_RTY_WAIT_DO QPC_FIELD_LOC(1103, 1103)
+#define QPC_RAQ_TRRL_HEAD QPC_FIELD_LOC(1111, 1104)
+#define QPC_RAQ_TRRL_TAIL QPC_FIELD_LOC(1119, 1112)
+#define QPC_RAQ_RTY_INI_PSN QPC_FIELD_LOC(1143, 1120)
+#define QPC_CIRE_SLV_RQ_EN QPC_FIELD_LOC(1144, 1144)
+#define QPC_RAQ_CREDIT QPC_FIELD_LOC(1149, 1145)
+#define QPC_RQ_DB_IN_EXT QPC_FIELD_LOC(1150, 1150)
+#define QPC_RESP_RTY_FLG QPC_FIELD_LOC(1151, 1151)
+#define QPC_RAQ_MSN QPC_FIELD_LOC(1175, 1152)
+#define QPC_RAQ_SYNDROME QPC_FIELD_LOC(1183, 1176)
+#define QPC_RAQ_PSN QPC_FIELD_LOC(1207, 1184)
+#define QPC_RAQ_TRRL_RTY_HEAD QPC_FIELD_LOC(1215, 1208)
+#define QPC_RAQ_USE_PKTN QPC_FIELD_LOC(1239, 1216)
+#define QPC_RQ_SCC_TOKEN QPC_FIELD_LOC(1245, 1240)
+#define QPC_RVD10 QPC_FIELD_LOC(1247, 1246)
+#define QPC_SQ_PRODUCER_IDX QPC_FIELD_LOC(1263, 1248)
+#define QPC_SQ_CONSUMER_IDX QPC_FIELD_LOC(1279, 1264)
+#define QPC_SQ_CUR_BLK_ADDR_L QPC_FIELD_LOC(1311, 1280)
+#define QPC_SQ_CUR_BLK_ADDR_H QPC_FIELD_LOC(1331, 1312)
+#define QPC_MSG_RTY_LP_FLG QPC_FIELD_LOC(1332, 1332)
+#define QPC_SQ_INVLD_FLG QPC_FIELD_LOC(1333, 1333)
+#define QPC_LP_SGEN_INI QPC_FIELD_LOC(1335, 1334)
+#define QPC_SQ_VLAN_EN QPC_FIELD_LOC(1336, 1336)
+#define QPC_POLL_DB_WAIT_DO QPC_FIELD_LOC(1337, 1337)
+#define QPC_SCC_TOKEN_FORBID_SQ_DEQ QPC_FIELD_LOC(1338, 1338)
+#define QPC_WAIT_ACK_TIMEOUT QPC_FIELD_LOC(1339, 1339)
+#define QPC_IRRL_IDX_LSB QPC_FIELD_LOC(1343, 1340)
+#define QPC_ACK_REQ_FREQ QPC_FIELD_LOC(1349, 1344)
+#define QPC_MSG_RNR_FLG QPC_FIELD_LOC(1350, 1350)
+#define QPC_FRE QPC_FIELD_LOC(1351, 1351)
+#define QPC_SQ_CUR_PSN QPC_FIELD_LOC(1375, 1352)
+#define QPC_MSG_USE_PKTN QPC_FIELD_LOC(1399, 1376)
+#define QPC_IRRL_HEAD_PRE QPC_FIELD_LOC(1407, 1400)
+#define QPC_SQ_CUR_SGE_BLK_ADDR_L QPC_FIELD_LOC(1439, 1408)
+#define QPC_SQ_CUR_SGE_BLK_ADDR_H QPC_FIELD_LOC(1459, 1440)
+#define QPC_IRRL_IDX_MSB QPC_FIELD_LOC(1471, 1460)
+#define QPC_CUR_SGE_OFFSET QPC_FIELD_LOC(1503, 1472)
+#define QPC_CUR_SGE_IDX QPC_FIELD_LOC(1527, 1504)
+#define QPC_EXT_SGE_NUM_LEFT QPC_FIELD_LOC(1535, 1528)
+#define QPC_OWNER_MODE QPC_FIELD_LOC(1536, 1536)
+#define QPC_CIRE_SLV_SQ_EN QPC_FIELD_LOC(1537, 1537)
+#define QPC_CIRE_DOING QPC_FIELD_LOC(1538, 1538)
+#define QPC_CIRE_RESULT QPC_FIELD_LOC(1539, 1539)
+#define QPC_OWNER_DB_WAIT_DO QPC_FIELD_LOC(1540, 1540)
+#define QPC_SQ_WQE_INVLD QPC_FIELD_LOC(1541, 1541)
+#define QPC_DCA_MODE QPC_FIELD_LOC(1542, 1542)
+#define QPC_RTY_OWNER_NOCHK QPC_FIELD_LOC(1543, 1543)
+#define QPC_V2_IRRL_HEAD QPC_FIELD_LOC(1543, 1536)
+#define QPC_SQ_MAX_PSN QPC_FIELD_LOC(1567, 1544)
+#define QPC_SQ_MAX_IDX QPC_FIELD_LOC(1583, 1568)
+#define QPC_LCL_OPERATED_CNT QPC_FIELD_LOC(1599, 1584)
+#define QPC_IRRL_BA_L QPC_FIELD_LOC(1631, 1600)
+#define QPC_IRRL_BA_H QPC_FIELD_LOC(1657, 1632)
+#define QPC_PKT_RNR_FLG QPC_FIELD_LOC(1658, 1658)
+#define QPC_PKT_RTY_FLG QPC_FIELD_LOC(1659, 1659)
+#define QPC_RMT_E2E QPC_FIELD_LOC(1660, 1660)
+#define QPC_SR_MAX QPC_FIELD_LOC(1663, 1661)
+#define QPC_LSN QPC_FIELD_LOC(1687, 1664)
+#define QPC_RETRY_NUM_INIT QPC_FIELD_LOC(1690, 1688)
+#define QPC_CHECK_FLG QPC_FIELD_LOC(1692, 1691)
+#define QPC_RETRY_CNT QPC_FIELD_LOC(1695, 1693)
+#define QPC_SQ_TIMER QPC_FIELD_LOC(1727, 1696)
+#define QPC_RETRY_MSG_MSN QPC_FIELD_LOC(1743, 1728)
+#define QPC_RETRY_MSG_PSN_L QPC_FIELD_LOC(1759, 1744)
+#define QPC_RETRY_MSG_PSN_H QPC_FIELD_LOC(1767, 1760)
+#define QPC_RETRY_MSG_FPKT_PSN QPC_FIELD_LOC(1791, 1768)
+#define QPC_RX_SQ_CUR_BLK_ADDR_L QPC_FIELD_LOC(1823, 1792)
+#define QPC_RX_SQ_CUR_BLK_ADDR_H QPC_FIELD_LOC(1843, 1824)
+#define QPC_IRRL_SGE_IDX QPC_FIELD_LOC(1851, 1844)
+#define QPC_LSAN_EN QPC_FIELD_LOC(1852, 1852)
+#define QPC_SO_LP_VLD QPC_FIELD_LOC(1853, 1853)
+#define QPC_FENCE_LP_VLD QPC_FIELD_LOC(1854, 1854)
+#define QPC_IRRL_LP_VLD QPC_FIELD_LOC(1855, 1855)
+#define QPC_IRRL_CUR_SGE_OFFSET QPC_FIELD_LOC(1887, 1856)
+#define QPC_IRRL_TAIL_REAL QPC_FIELD_LOC(1895, 1888)
+#define QPC_IRRL_TAIL_RD QPC_FIELD_LOC(1903, 1896)
+#define QPC_RX_ACK_MSN QPC_FIELD_LOC(1919, 1904)
+#define QPC_RX_ACK_EPSN QPC_FIELD_LOC(1943, 1920)
+#define QPC_RNR_NUM_INIT QPC_FIELD_LOC(1946, 1944)
+#define QPC_RNR_CNT QPC_FIELD_LOC(1949, 1947)
+#define QPC_LCL_OP_FLG QPC_FIELD_LOC(1950, 1950)
+#define QPC_IRRL_RD_FLG QPC_FIELD_LOC(1951, 1951)
+#define QPC_IRRL_PSN QPC_FIELD_LOC(1975, 1952)
+#define QPC_ACK_PSN_ERR QPC_FIELD_LOC(1976, 1976)
+#define QPC_ACK_LAST_OPTYPE QPC_FIELD_LOC(1978, 1977)
+#define QPC_IRRL_PSN_VLD QPC_FIELD_LOC(1979, 1979)
+#define QPC_RNR_RETRY_FLAG QPC_FIELD_LOC(1980, 1980)
+#define QPC_SQ_RTY_TX_ERR QPC_FIELD_LOC(1981, 1981)
+#define QPC_LAST_IND QPC_FIELD_LOC(1982, 1982)
+#define QPC_CQ_ERR_IND QPC_FIELD_LOC(1983, 1983)
+#define QPC_TX_CQN QPC_FIELD_LOC(2007, 1984)
+#define QPC_SIG_TYPE QPC_FIELD_LOC(2008, 2008)
+#define QPC_ERR_TYPE QPC_FIELD_LOC(2015, 2009)
+#define QPC_RQ_CQE_IDX QPC_FIELD_LOC(2031, 2016)
+#define QPC_SQ_FLUSH_IDX QPC_FIELD_LOC(2047, 2032)
+
+#define RETRY_MSG_PSN_SHIFT 16
+
+#define QPCEX_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_qp_context_ex, h, l)
+
+#define QPCEX_CONG_ALG_SEL QPCEX_FIELD_LOC(0, 0)
+#define QPCEX_CONG_ALG_SUB_SEL QPCEX_FIELD_LOC(1, 1)
+#define QPCEX_DIP_CTX_IDX_VLD QPCEX_FIELD_LOC(2, 2)
+#define QPCEX_DIP_CTX_IDX QPCEX_FIELD_LOC(22, 3)
+#define QPCEX_SQ_RQ_NOT_FORBID_EN QPCEX_FIELD_LOC(23, 23)
+#define QPCEX_STASH QPCEX_FIELD_LOC(82, 82)
+
+#define V2_QP_RWE_S 1 /* rdma write enable */
+#define V2_QP_RRE_S 2 /* rdma read enable */
+#define V2_QP_ATE_S 3 /* rdma atomic enable */
+
+struct hns_roce_v2_cqe {
+ __le32 byte_4;
+ union {
+ __le32 rkey;
+ __le32 immtdata;
+ };
+ __le32 byte_12;
+ __le32 byte_16;
+ __le32 byte_cnt;
+ u8 smac[4];
+ __le32 byte_28;
+ __le32 byte_32;
+ __le32 rsv[8];
+};
+
+#define CQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cqe, h, l)
+
+#define CQE_OPCODE CQE_FIELD_LOC(4, 0)
+#define CQE_RQ_INLINE CQE_FIELD_LOC(5, 5)
+#define CQE_S_R CQE_FIELD_LOC(6, 6)
+#define CQE_OWNER CQE_FIELD_LOC(7, 7)
+#define CQE_STATUS CQE_FIELD_LOC(15, 8)
+#define CQE_WQE_IDX CQE_FIELD_LOC(31, 16)
+#define CQE_RKEY_IMMTDATA CQE_FIELD_LOC(63, 32)
+#define CQE_XRC_SRQN CQE_FIELD_LOC(87, 64)
+#define CQE_RSV0 CQE_FIELD_LOC(95, 88)
+#define CQE_LCL_QPN CQE_FIELD_LOC(119, 96)
+#define CQE_SUB_STATUS CQE_FIELD_LOC(127, 120)
+#define CQE_BYTE_CNT CQE_FIELD_LOC(159, 128)
+#define CQE_SMAC CQE_FIELD_LOC(207, 160)
+#define CQE_PORT_TYPE CQE_FIELD_LOC(209, 208)
+#define CQE_VID CQE_FIELD_LOC(221, 210)
+#define CQE_VID_VLD CQE_FIELD_LOC(222, 222)
+#define CQE_RSV2 CQE_FIELD_LOC(223, 223)
+#define CQE_RMT_QPN CQE_FIELD_LOC(247, 224)
+#define CQE_SL CQE_FIELD_LOC(250, 248)
+#define CQE_PORTN CQE_FIELD_LOC(253, 251)
+#define CQE_GRH CQE_FIELD_LOC(254, 254)
+#define CQE_LPK CQE_FIELD_LOC(255, 255)
+#define CQE_RSV3 CQE_FIELD_LOC(511, 256)
+
+struct hns_roce_v2_mpt_entry {
+ __le32 byte_4_pd_hop_st;
+ __le32 byte_8_mw_cnt_en;
+ __le32 byte_12_mw_pa;
+ __le32 bound_lkey;
+ __le32 len_l;
+ __le32 len_h;
+ __le32 lkey;
+ __le32 va_l;
+ __le32 va_h;
+ __le32 pbl_size;
+ __le32 pbl_ba_l;
+ __le32 byte_48_mode_ba;
+ __le32 pa0_l;
+ __le32 byte_56_pa0_h;
+ __le32 pa1_l;
+ __le32 byte_64_buf_pa1;
+};
+
+#define MPT_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_mpt_entry, h, l)
+
+#define MPT_ST MPT_FIELD_LOC(1, 0)
+#define MPT_PBL_HOP_NUM MPT_FIELD_LOC(3, 2)
+#define MPT_PBL_BA_PG_SZ MPT_FIELD_LOC(7, 4)
+#define MPT_PD MPT_FIELD_LOC(31, 8)
+#define MPT_RA_EN MPT_FIELD_LOC(32, 32)
+#define MPT_R_INV_EN MPT_FIELD_LOC(33, 33)
+#define MPT_L_INV_EN MPT_FIELD_LOC(34, 34)
+#define MPT_BIND_EN MPT_FIELD_LOC(35, 35)
+#define MPT_ATOMIC_EN MPT_FIELD_LOC(36, 36)
+#define MPT_RR_EN MPT_FIELD_LOC(37, 37)
+#define MPT_RW_EN MPT_FIELD_LOC(38, 38)
+#define MPT_LW_EN MPT_FIELD_LOC(39, 39)
+#define MPT_MW_CNT MPT_FIELD_LOC(63, 40)
+#define MPT_FRE MPT_FIELD_LOC(64, 64)
+#define MPT_PA MPT_FIELD_LOC(65, 65)
+#define MPT_ZBVA MPT_FIELD_LOC(66, 66)
+#define MPT_SHARE MPT_FIELD_LOC(67, 67)
+#define MPT_MR_MW MPT_FIELD_LOC(68, 68)
+#define MPT_BPD MPT_FIELD_LOC(69, 69)
+#define MPT_BQP MPT_FIELD_LOC(70, 70)
+#define MPT_INNER_PA_VLD MPT_FIELD_LOC(71, 71)
+#define MPT_MW_BIND_QPN MPT_FIELD_LOC(95, 72)
+#define MPT_BOUND_LKEY MPT_FIELD_LOC(127, 96)
+#define MPT_LEN_L MPT_FIELD_LOC(159, 128)
+#define MPT_LEN_H MPT_FIELD_LOC(191, 160)
+#define MPT_LKEY MPT_FIELD_LOC(223, 192)
+#define MPT_VA MPT_FIELD_LOC(287, 224)
+#define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288)
+#define MPT_PBL_BA_L MPT_FIELD_LOC(351, 320)
+#define MPT_PBL_BA_H MPT_FIELD_LOC(380, 352)
+#define MPT_BLK_MODE MPT_FIELD_LOC(381, 381)
+#define MPT_RSV0 MPT_FIELD_LOC(383, 382)
+#define MPT_PA0_L MPT_FIELD_LOC(415, 384)
+#define MPT_PA0_H MPT_FIELD_LOC(441, 416)
+#define MPT_BOUND_VA MPT_FIELD_LOC(447, 442)
+#define MPT_PA1_L MPT_FIELD_LOC(479, 448)
+#define MPT_PA1_H MPT_FIELD_LOC(505, 480)
+#define MPT_PERSIST_EN MPT_FIELD_LOC(506, 506)
+#define MPT_RSV2 MPT_FIELD_LOC(507, 507)
+#define MPT_PBL_BUF_PG_SZ MPT_FIELD_LOC(511, 508)
+
+#define V2_MPT_BYTE_4_MPT_ST_S 0
+#define V2_MPT_BYTE_4_MPT_ST_M GENMASK(1, 0)
+
+#define V2_MPT_BYTE_4_PBL_HOP_NUM_S 2
+#define V2_MPT_BYTE_4_PBL_HOP_NUM_M GENMASK(3, 2)
+
+#define V2_MPT_BYTE_4_PBL_BA_PG_SZ_S 4
+#define V2_MPT_BYTE_4_PBL_BA_PG_SZ_M GENMASK(7, 4)
+
+#define V2_MPT_BYTE_4_PD_S 8
+#define V2_MPT_BYTE_4_PD_M GENMASK(31, 8)
+
+#define V2_MPT_BYTE_8_RA_EN_S 0
+
+#define V2_MPT_BYTE_8_R_INV_EN_S 1
+
+#define V2_MPT_BYTE_8_L_INV_EN_S 2
+
+#define V2_MPT_BYTE_8_BIND_EN_S 3
+
+#define V2_MPT_BYTE_8_ATOMIC_EN_S 4
+
+#define V2_MPT_BYTE_8_RR_EN_S 5
+
+#define V2_MPT_BYTE_8_RW_EN_S 6
+
+#define V2_MPT_BYTE_8_LW_EN_S 7
+
+#define V2_MPT_BYTE_8_MW_CNT_S 8
+#define V2_MPT_BYTE_8_MW_CNT_M GENMASK(31, 8)
+
+#define V2_MPT_BYTE_12_FRE_S 0
+
+#define V2_MPT_BYTE_12_PA_S 1
+
+#define V2_MPT_BYTE_12_MR_MW_S 4
+
+#define V2_MPT_BYTE_12_BPD_S 5
+
+#define V2_MPT_BYTE_12_BQP_S 6
+
+#define V2_MPT_BYTE_12_INNER_PA_VLD_S 7
+
+#define V2_MPT_BYTE_12_MW_BIND_QPN_S 8
+#define V2_MPT_BYTE_12_MW_BIND_QPN_M GENMASK(31, 8)
+
+#define V2_MPT_BYTE_48_PBL_BA_H_S 0
+#define V2_MPT_BYTE_48_PBL_BA_H_M GENMASK(28, 0)
+
+#define V2_MPT_BYTE_48_BLK_MODE_S 29
+
+#define V2_MPT_BYTE_56_PA0_H_S 0
+#define V2_MPT_BYTE_56_PA0_H_M GENMASK(25, 0)
+
+#define V2_MPT_BYTE_64_PA1_H_S 0
+#define V2_MPT_BYTE_64_PA1_H_M GENMASK(25, 0)
+
+#define V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S 28
+#define V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M GENMASK(31, 28)
+
+struct hns_roce_v2_db {
+ __le32 data[2];
+};
+
+#define DB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_db, h, l)
+
+#define DB_TAG DB_FIELD_LOC(23, 0)
+#define DB_CMD DB_FIELD_LOC(27, 24)
+#define DB_FLAG DB_FIELD_LOC(31, 31)
+#define DB_PI DB_FIELD_LOC(47, 32)
+#define DB_SL DB_FIELD_LOC(50, 48)
+#define DB_CQ_CI DB_FIELD_LOC(55, 32)
+#define DB_CQ_NOTIFY DB_FIELD_LOC(56, 56)
+#define DB_CQ_CMD_SN DB_FIELD_LOC(58, 57)
+#define EQ_DB_TAG DB_FIELD_LOC(7, 0)
+#define EQ_DB_CMD DB_FIELD_LOC(17, 16)
+#define EQ_DB_CI DB_FIELD_LOC(55, 32)
+
+#define V2_DB_PRODUCER_IDX_S 0
+#define V2_DB_PRODUCER_IDX_M GENMASK(15, 0)
+
+#define V2_CQ_DB_CONS_IDX_S 0
+#define V2_CQ_DB_CONS_IDX_M GENMASK(23, 0)
+
+struct hns_roce_v2_ud_send_wqe {
+ __le32 byte_4;
+ __le32 msg_len;
+ __le32 immtdata;
+ __le32 byte_16;
+ __le32 byte_20;
+ __le32 byte_24;
+ __le32 qkey;
+ __le32 byte_32;
+ __le32 byte_36;
+ __le32 byte_40;
+ u8 dmac[ETH_ALEN];
+ u8 sgid_index;
+ u8 smac_index;
+ u8 dgid[GID_LEN_V2];
+};
+
+#define UD_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_ud_send_wqe, h, l)
+
+#define UD_SEND_WQE_OPCODE UD_SEND_WQE_FIELD_LOC(4, 0)
+#define UD_SEND_WQE_OWNER UD_SEND_WQE_FIELD_LOC(7, 7)
+#define UD_SEND_WQE_CQE UD_SEND_WQE_FIELD_LOC(8, 8)
+#define UD_SEND_WQE_SE UD_SEND_WQE_FIELD_LOC(11, 11)
+#define UD_SEND_WQE_PD UD_SEND_WQE_FIELD_LOC(119, 96)
+#define UD_SEND_WQE_SGE_NUM UD_SEND_WQE_FIELD_LOC(127, 120)
+#define UD_SEND_WQE_MSG_START_SGE_IDX UD_SEND_WQE_FIELD_LOC(151, 128)
+#define UD_SEND_WQE_UDPSPN UD_SEND_WQE_FIELD_LOC(191, 176)
+#define UD_SEND_WQE_DQPN UD_SEND_WQE_FIELD_LOC(247, 224)
+#define UD_SEND_WQE_VLAN UD_SEND_WQE_FIELD_LOC(271, 256)
+#define UD_SEND_WQE_HOPLIMIT UD_SEND_WQE_FIELD_LOC(279, 272)
+#define UD_SEND_WQE_TCLASS UD_SEND_WQE_FIELD_LOC(287, 280)
+#define UD_SEND_WQE_FLOW_LABEL UD_SEND_WQE_FIELD_LOC(307, 288)
+#define UD_SEND_WQE_SL UD_SEND_WQE_FIELD_LOC(311, 308)
+#define UD_SEND_WQE_VLAN_EN UD_SEND_WQE_FIELD_LOC(318, 318)
+#define UD_SEND_WQE_LBI UD_SEND_WQE_FIELD_LOC(319, 319)
+
+struct hns_roce_v2_rc_send_wqe {
+ __le32 byte_4;
+ __le32 msg_len;
+ union {
+ __le32 inv_key;
+ __le32 immtdata;
+ };
+ __le32 byte_16;
+ __le32 byte_20;
+ __le32 rkey;
+ __le64 va;
+};
+
+#define RC_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_rc_send_wqe, h, l)
+
+#define RC_SEND_WQE_OPCODE RC_SEND_WQE_FIELD_LOC(4, 0)
+#define RC_SEND_WQE_DB_SL_L RC_SEND_WQE_FIELD_LOC(6, 5)
+#define RC_SEND_WQE_DB_SL_H RC_SEND_WQE_FIELD_LOC(14, 13)
+#define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7)
+#define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8)
+#define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9)
+#define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11)
+#define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12)
+#define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15)
+#define RC_SEND_WQE_FLAG RC_SEND_WQE_FIELD_LOC(31, 31)
+#define RC_SEND_WQE_XRC_SRQN RC_SEND_WQE_FIELD_LOC(119, 96)
+#define RC_SEND_WQE_SGE_NUM RC_SEND_WQE_FIELD_LOC(127, 120)
+#define RC_SEND_WQE_MSG_START_SGE_IDX RC_SEND_WQE_FIELD_LOC(151, 128)
+#define RC_SEND_WQE_INL_TYPE RC_SEND_WQE_FIELD_LOC(159, 159)
+
+struct hns_roce_wqe_frmr_seg {
+ __le32 pbl_size;
+ __le32 byte_40;
+};
+
+#define FRMR_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_wqe_frmr_seg, h, l)
+
+#define FRMR_PBL_SIZE FRMR_WQE_FIELD_LOC(31, 0)
+#define FRMR_BLOCK_SIZE FRMR_WQE_FIELD_LOC(35, 32)
+#define FRMR_PBL_BUF_PG_SZ FRMR_WQE_FIELD_LOC(39, 36)
+#define FRMR_BLK_MODE FRMR_WQE_FIELD_LOC(40, 40)
+#define FRMR_ZBVA FRMR_WQE_FIELD_LOC(41, 41)
+#define FRMR_BIND_EN FRMR_WQE_FIELD_LOC(42, 42)
+#define FRMR_ATOMIC FRMR_WQE_FIELD_LOC(43, 43)
+#define FRMR_RR FRMR_WQE_FIELD_LOC(44, 44)
+#define FRMR_RW FRMR_WQE_FIELD_LOC(45, 45)
+#define FRMR_LW FRMR_WQE_FIELD_LOC(46, 46)
+
+struct hns_roce_v2_wqe_data_seg {
+ __le32 len;
+ __le32 lkey;
+ __le64 addr;
+};
+
+struct hns_roce_query_version {
+ __le16 rocee_vendor_id;
+ __le16 rocee_hw_version;
+ __le32 rsv[5];
+};
+
+struct hns_roce_query_fw_info {
+ __le32 fw_ver;
+ __le32 rsv[5];
+};
+
+struct hns_roce_func_clear {
+ __le32 rst_funcid_en;
+ __le32 func_done;
+ __le32 rsv[4];
+};
+
+#define FUNC_CLEAR_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_func_clear, h, l)
+
+#define FUNC_CLEAR_RST_FUN_DONE FUNC_CLEAR_FIELD_LOC(32, 32)
+
+/* Each physical function manages up to 248 virtual functions, it takes up to
+ * 100ms for each function to execute clear. If an abnormal reset occurs, it is
+ * executed twice at most, so it takes up to 249 * 2 * 100ms.
+ */
+#define HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS (249 * 2 * 100)
+#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL 40
+#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT 20
+
+/* Fields of HNS_ROCE_OPC_EXT_CFG */
+#define EXT_CFG_VF_ID CMQ_REQ_FIELD_LOC(31, 0)
+#define EXT_CFG_QP_PI_IDX CMQ_REQ_FIELD_LOC(45, 32)
+#define EXT_CFG_QP_PI_NUM CMQ_REQ_FIELD_LOC(63, 48)
+#define EXT_CFG_QP_NUM CMQ_REQ_FIELD_LOC(87, 64)
+#define EXT_CFG_QP_IDX CMQ_REQ_FIELD_LOC(119, 96)
+#define EXT_CFG_LLM_IDX CMQ_REQ_FIELD_LOC(139, 128)
+#define EXT_CFG_LLM_NUM CMQ_REQ_FIELD_LOC(156, 144)
+
+#define CFG_LLM_A_BA_L CMQ_REQ_FIELD_LOC(31, 0)
+#define CFG_LLM_A_BA_H CMQ_REQ_FIELD_LOC(63, 32)
+#define CFG_LLM_A_DEPTH CMQ_REQ_FIELD_LOC(76, 64)
+#define CFG_LLM_A_PGSZ CMQ_REQ_FIELD_LOC(83, 80)
+#define CFG_LLM_A_INIT_EN CMQ_REQ_FIELD_LOC(84, 84)
+#define CFG_LLM_A_HEAD_BA_L CMQ_REQ_FIELD_LOC(127, 96)
+#define CFG_LLM_A_HEAD_BA_H CMQ_REQ_FIELD_LOC(147, 128)
+#define CFG_LLM_A_HEAD_NXTPTR CMQ_REQ_FIELD_LOC(159, 148)
+#define CFG_LLM_A_HEAD_PTR CMQ_REQ_FIELD_LOC(171, 160)
+#define CFG_LLM_B_TAIL_BA_L CMQ_REQ_FIELD_LOC(31, 0)
+#define CFG_LLM_B_TAIL_BA_H CMQ_REQ_FIELD_LOC(63, 32)
+#define CFG_LLM_B_TAIL_PTR CMQ_REQ_FIELD_LOC(75, 64)
+
+/* Fields of HNS_ROCE_OPC_CFG_GLOBAL_PARAM */
+#define CFG_GLOBAL_PARAM_1US_CYCLES CMQ_REQ_FIELD_LOC(9, 0)
+#define CFG_GLOBAL_PARAM_UDP_PORT CMQ_REQ_FIELD_LOC(31, 16)
+
+/*
+ * Fields of HNS_ROCE_OPC_QUERY_PF_RES, HNS_ROCE_OPC_QUERY_VF_RES
+ * and HNS_ROCE_OPC_ALLOC_VF_RES
+ */
+#define FUNC_RES_A_VF_ID CMQ_REQ_FIELD_LOC(7, 0)
+#define FUNC_RES_A_QPC_BT_IDX CMQ_REQ_FIELD_LOC(42, 32)
+#define FUNC_RES_A_QPC_BT_NUM CMQ_REQ_FIELD_LOC(59, 48)
+#define FUNC_RES_A_SRQC_BT_IDX CMQ_REQ_FIELD_LOC(72, 64)
+#define FUNC_RES_A_SRQC_BT_NUM CMQ_REQ_FIELD_LOC(89, 80)
+#define FUNC_RES_A_CQC_BT_IDX CMQ_REQ_FIELD_LOC(104, 96)
+#define FUNC_RES_A_CQC_BT_NUM CMQ_REQ_FIELD_LOC(121, 112)
+#define FUNC_RES_A_MPT_BT_IDX CMQ_REQ_FIELD_LOC(136, 128)
+#define FUNC_RES_A_MPT_BT_NUM CMQ_REQ_FIELD_LOC(153, 144)
+#define FUNC_RES_A_EQC_BT_IDX CMQ_REQ_FIELD_LOC(168, 160)
+#define FUNC_RES_A_EQC_BT_NUM CMQ_REQ_FIELD_LOC(185, 176)
+#define FUNC_RES_B_SMAC_IDX CMQ_REQ_FIELD_LOC(39, 32)
+#define FUNC_RES_B_SMAC_NUM CMQ_REQ_FIELD_LOC(48, 40)
+#define FUNC_RES_B_SGID_IDX CMQ_REQ_FIELD_LOC(71, 64)
+#define FUNC_RES_B_SGID_NUM CMQ_REQ_FIELD_LOC(80, 72)
+#define FUNC_RES_B_QID_IDX CMQ_REQ_FIELD_LOC(105, 96)
+#define FUNC_RES_B_QID_NUM CMQ_REQ_FIELD_LOC(122, 112)
+#define FUNC_RES_V_QID_NUM CMQ_REQ_FIELD_LOC(115, 112)
+
+#define FUNC_RES_B_SCCC_BT_IDX CMQ_REQ_FIELD_LOC(136, 128)
+#define FUNC_RES_B_SCCC_BT_NUM CMQ_REQ_FIELD_LOC(145, 137)
+#define FUNC_RES_B_GMV_BT_IDX CMQ_REQ_FIELD_LOC(167, 160)
+#define FUNC_RES_B_GMV_BT_NUM CMQ_REQ_FIELD_LOC(176, 168)
+#define FUNC_RES_V_GMV_BT_NUM CMQ_REQ_FIELD_LOC(184, 176)
+
+/* Fields of HNS_ROCE_OPC_QUERY_PF_TIMER_RES */
+#define PF_TIMER_RES_QPC_ITEM_IDX CMQ_REQ_FIELD_LOC(43, 32)
+#define PF_TIMER_RES_QPC_ITEM_NUM CMQ_REQ_FIELD_LOC(60, 48)
+#define PF_TIMER_RES_CQC_ITEM_IDX CMQ_REQ_FIELD_LOC(74, 64)
+#define PF_TIMER_RES_CQC_ITEM_NUM CMQ_REQ_FIELD_LOC(91, 80)
+
+struct hns_roce_vf_switch {
+ __le32 rocee_sel;
+ __le32 fun_id;
+ __le32 cfg;
+ __le32 resv1;
+ __le32 resv2;
+ __le32 resv3;
+};
+
+#define VF_SWITCH_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_vf_switch, h, l)
+
+#define VF_SWITCH_VF_ID VF_SWITCH_FIELD_LOC(42, 35)
+#define VF_SWITCH_ALW_LPBK VF_SWITCH_FIELD_LOC(65, 65)
+#define VF_SWITCH_ALW_LCL_LPBK VF_SWITCH_FIELD_LOC(66, 66)
+#define VF_SWITCH_ALW_DST_OVRD VF_SWITCH_FIELD_LOC(67, 67)
+
+struct hns_roce_post_mbox {
+ __le32 in_param_l;
+ __le32 in_param_h;
+ __le32 out_param_l;
+ __le32 out_param_h;
+ __le32 cmd_tag;
+ __le32 token_event_en;
+};
+
+struct hns_roce_mbox_status {
+ __le32 mb_status_hw_run;
+ __le32 rsv[5];
+};
+
+#define HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS 10000
+
+#define MB_ST_HW_RUN_M BIT(31)
+#define MB_ST_COMPLETE_M GENMASK(7, 0)
+
+#define MB_ST_COMPLETE_SUCC 1
+
+/* Fields of HNS_ROCE_OPC_CFG_BT_ATTR */
+#define CFG_BT_ATTR_QPC_BA_PGSZ CMQ_REQ_FIELD_LOC(3, 0)
+#define CFG_BT_ATTR_QPC_BUF_PGSZ CMQ_REQ_FIELD_LOC(7, 4)
+#define CFG_BT_ATTR_QPC_HOPNUM CMQ_REQ_FIELD_LOC(9, 8)
+#define CFG_BT_ATTR_SRQC_BA_PGSZ CMQ_REQ_FIELD_LOC(35, 32)
+#define CFG_BT_ATTR_SRQC_BUF_PGSZ CMQ_REQ_FIELD_LOC(39, 36)
+#define CFG_BT_ATTR_SRQC_HOPNUM CMQ_REQ_FIELD_LOC(41, 40)
+#define CFG_BT_ATTR_CQC_BA_PGSZ CMQ_REQ_FIELD_LOC(67, 64)
+#define CFG_BT_ATTR_CQC_BUF_PGSZ CMQ_REQ_FIELD_LOC(71, 68)
+#define CFG_BT_ATTR_CQC_HOPNUM CMQ_REQ_FIELD_LOC(73, 72)
+#define CFG_BT_ATTR_MPT_BA_PGSZ CMQ_REQ_FIELD_LOC(99, 96)
+#define CFG_BT_ATTR_MPT_BUF_PGSZ CMQ_REQ_FIELD_LOC(103, 100)
+#define CFG_BT_ATTR_MPT_HOPNUM CMQ_REQ_FIELD_LOC(105, 104)
+#define CFG_BT_ATTR_SCCC_BA_PGSZ CMQ_REQ_FIELD_LOC(131, 128)
+#define CFG_BT_ATTR_SCCC_BUF_PGSZ CMQ_REQ_FIELD_LOC(135, 132)
+#define CFG_BT_ATTR_SCCC_HOPNUM CMQ_REQ_FIELD_LOC(137, 136)
+
+/* Fields of HNS_ROCE_OPC_CFG_ENTRY_SIZE */
+#define CFG_HEM_ENTRY_SIZE_TYPE CMQ_REQ_FIELD_LOC(31, 0)
+enum {
+ HNS_ROCE_CFG_QPC_SIZE = BIT(0),
+ HNS_ROCE_CFG_SCCC_SIZE = BIT(1),
+};
+
+#define CFG_HEM_ENTRY_SIZE_VALUE CMQ_REQ_FIELD_LOC(191, 160)
+
+/* Fields of HNS_ROCE_OPC_CFG_GMV_BT */
+#define CFG_GMV_BT_BA_L CMQ_REQ_FIELD_LOC(31, 0)
+#define CFG_GMV_BT_BA_H CMQ_REQ_FIELD_LOC(51, 32)
+#define CFG_GMV_BT_IDX CMQ_REQ_FIELD_LOC(95, 64)
+
+/* Fields of HNS_ROCE_QUERY_RAM_ECC */
+#define QUERY_RAM_ECC_1BIT_ERR CMQ_REQ_FIELD_LOC(31, 0)
+#define QUERY_RAM_ECC_RES_TYPE CMQ_REQ_FIELD_LOC(63, 32)
+#define QUERY_RAM_ECC_TAG CMQ_REQ_FIELD_LOC(95, 64)
+
+struct hns_roce_cfg_sgid_tb {
+ __le32 table_idx_rsv;
+ __le32 vf_sgid_l;
+ __le32 vf_sgid_ml;
+ __le32 vf_sgid_mh;
+ __le32 vf_sgid_h;
+ __le32 vf_sgid_type_rsv;
+};
+
+#define SGID_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_sgid_tb, h, l)
+
+#define CFG_SGID_TB_TABLE_IDX SGID_TB_FIELD_LOC(7, 0)
+#define CFG_SGID_TB_VF_SGID_TYPE SGID_TB_FIELD_LOC(161, 160)
+
+struct hns_roce_cfg_smac_tb {
+ __le32 tb_idx_rsv;
+ __le32 vf_smac_l;
+ __le32 vf_smac_h_rsv;
+ __le32 rsv[3];
+};
+
+#define SMAC_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_smac_tb, h, l)
+
+#define CFG_SMAC_TB_IDX SMAC_TB_FIELD_LOC(7, 0)
+#define CFG_SMAC_TB_VF_SMAC_H SMAC_TB_FIELD_LOC(79, 64)
+
+struct hns_roce_cfg_gmv_tb_a {
+ __le32 vf_sgid_l;
+ __le32 vf_sgid_ml;
+ __le32 vf_sgid_mh;
+ __le32 vf_sgid_h;
+ __le32 vf_sgid_type_vlan;
+ __le32 resv;
+};
+
+#define GMV_TB_A_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_a, h, l)
+
+#define GMV_TB_A_VF_SGID_TYPE GMV_TB_A_FIELD_LOC(129, 128)
+#define GMV_TB_A_VF_VLAN_EN GMV_TB_A_FIELD_LOC(130, 130)
+#define GMV_TB_A_VF_VLAN_ID GMV_TB_A_FIELD_LOC(155, 144)
+
+struct hns_roce_cfg_gmv_tb_b {
+ __le32 vf_smac_l;
+ __le32 vf_smac_h;
+ __le32 table_idx_rsv;
+ __le32 resv[3];
+};
+
+#define GMV_TB_B_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_b, h, l)
+
+#define GMV_TB_B_SMAC_H GMV_TB_B_FIELD_LOC(47, 32)
+#define GMV_TB_B_SGID_IDX GMV_TB_B_FIELD_LOC(71, 64)
+
+#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5
+struct hns_roce_query_pf_caps_a {
+ u8 number_ports;
+ u8 local_ca_ack_delay;
+ __le16 max_sq_sg;
+ __le16 max_sq_inline;
+ __le16 max_rq_sg;
+ __le32 rsv0;
+ __le16 num_qpc_timer;
+ __le16 num_cqc_timer;
+ __le16 max_srq_sges;
+ u8 num_aeq_vectors;
+ u8 num_other_vectors;
+ u8 max_sq_desc_sz;
+ u8 max_rq_desc_sz;
+ u8 rsv1;
+ u8 cqe_sz;
+};
+
+struct hns_roce_query_pf_caps_b {
+ u8 mtpt_entry_sz;
+ u8 irrl_entry_sz;
+ u8 trrl_entry_sz;
+ u8 cqc_entry_sz;
+ u8 srqc_entry_sz;
+ u8 idx_entry_sz;
+ u8 sccc_sz;
+ u8 max_mtu;
+ __le16 qpc_sz;
+ __le16 qpc_timer_entry_sz;
+ __le16 cqc_timer_entry_sz;
+ u8 min_cqes;
+ u8 min_wqes;
+ __le32 page_size_cap;
+ u8 pkey_table_len;
+ u8 phy_num_uars;
+ u8 ctx_hop_num;
+ u8 pbl_hop_num;
+};
+
+struct hns_roce_query_pf_caps_c {
+ __le32 cap_flags_num_pds;
+ __le32 max_gid_num_cqs;
+ __le32 cq_depth;
+ __le32 num_mrws;
+ __le32 ord_num_qps;
+ __le16 sq_depth;
+ __le16 rq_depth;
+};
+
+#define PF_CAPS_C_FIELD_LOC(h, l) \
+ FIELD_LOC(struct hns_roce_query_pf_caps_c, h, l)
+
+#define PF_CAPS_C_NUM_PDS PF_CAPS_C_FIELD_LOC(19, 0)
+#define PF_CAPS_C_CAP_FLAGS PF_CAPS_C_FIELD_LOC(31, 20)
+#define PF_CAPS_C_NUM_CQS PF_CAPS_C_FIELD_LOC(51, 32)
+#define PF_CAPS_C_MAX_GID PF_CAPS_C_FIELD_LOC(60, 52)
+#define PF_CAPS_C_CQ_DEPTH PF_CAPS_C_FIELD_LOC(86, 64)
+#define PF_CAPS_C_NUM_MRWS PF_CAPS_C_FIELD_LOC(115, 96)
+#define PF_CAPS_C_NUM_QPS PF_CAPS_C_FIELD_LOC(147, 128)
+#define PF_CAPS_C_MAX_ORD PF_CAPS_C_FIELD_LOC(155, 148)
+
+struct hns_roce_query_pf_caps_d {
+ __le32 wq_hop_num_max_srqs;
+ __le16 srq_depth;
+ __le16 cap_flags_ex;
+ __le32 num_ceqs_ceq_depth;
+ __le32 arm_st_aeq_depth;
+ __le32 num_uars_rsv_pds;
+ __le32 rsv_uars_rsv_qps;
+};
+
+#define PF_CAPS_D_FIELD_LOC(h, l) \
+ FIELD_LOC(struct hns_roce_query_pf_caps_d, h, l)
+
+#define PF_CAPS_D_NUM_SRQS PF_CAPS_D_FIELD_LOC(19, 0)
+#define PF_CAPS_D_RQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(21, 20)
+#define PF_CAPS_D_EX_SGE_HOP_NUM PF_CAPS_D_FIELD_LOC(23, 22)
+#define PF_CAPS_D_SQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(25, 24)
+#define PF_CAPS_D_CONG_TYPE PF_CAPS_D_FIELD_LOC(29, 26)
+#define PF_CAPS_D_CEQ_DEPTH PF_CAPS_D_FIELD_LOC(85, 64)
+#define PF_CAPS_D_NUM_CEQS PF_CAPS_D_FIELD_LOC(95, 86)
+#define PF_CAPS_D_AEQ_DEPTH PF_CAPS_D_FIELD_LOC(117, 96)
+#define PF_CAPS_D_AEQ_ARM_ST PF_CAPS_D_FIELD_LOC(119, 118)
+#define PF_CAPS_D_CEQ_ARM_ST PF_CAPS_D_FIELD_LOC(121, 120)
+#define PF_CAPS_D_RSV_PDS PF_CAPS_D_FIELD_LOC(147, 128)
+#define PF_CAPS_D_NUM_UARS PF_CAPS_D_FIELD_LOC(155, 148)
+#define PF_CAPS_D_RSV_QPS PF_CAPS_D_FIELD_LOC(179, 160)
+#define PF_CAPS_D_RSV_UARS PF_CAPS_D_FIELD_LOC(187, 180)
+
+#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12
+
+struct hns_roce_congestion_algorithm {
+ u8 alg_sel;
+ u8 alg_sub_sel;
+ u8 dip_vld;
+ u8 wnd_mode_sel;
+};
+
+struct hns_roce_query_pf_caps_e {
+ __le32 chunk_size_shift_rsv_mrws;
+ __le32 rsv_cqs;
+ __le32 rsv_srqs;
+ __le32 rsv_lkey;
+ __le16 ceq_max_cnt;
+ __le16 ceq_period;
+ __le16 aeq_max_cnt;
+ __le16 aeq_period;
+};
+
+#define PF_CAPS_E_FIELD_LOC(h, l) \
+ FIELD_LOC(struct hns_roce_query_pf_caps_e, h, l)
+
+#define PF_CAPS_E_RSV_MRWS PF_CAPS_E_FIELD_LOC(19, 0)
+#define PF_CAPS_E_CHUNK_SIZE_SHIFT PF_CAPS_E_FIELD_LOC(31, 20)
+#define PF_CAPS_E_RSV_CQS PF_CAPS_E_FIELD_LOC(51, 32)
+#define PF_CAPS_E_RSV_SRQS PF_CAPS_E_FIELD_LOC(83, 64)
+#define PF_CAPS_E_RSV_LKEYS PF_CAPS_E_FIELD_LOC(115, 96)
+
+struct hns_roce_cmq_req {
+ __le32 data[6];
+};
+
+#define CMQ_REQ_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cmq_req, h, l)
+
+struct hns_roce_cmq_desc {
+ __le16 opcode;
+ __le16 flag;
+ __le16 retval;
+ __le16 rsv;
+ union {
+ __le32 data[6];
+ struct {
+ __le32 own_func_num;
+ __le32 own_mac_id;
+ __le32 rsv[4];
+ } func_info;
+ };
+};
+
+struct hns_roce_v2_cmq_ring {
+ dma_addr_t desc_dma_addr;
+ struct hns_roce_cmq_desc *desc;
+ u32 head;
+ u16 buf_size;
+ u16 desc_num;
+ u8 flag;
+ spinlock_t lock; /* command queue lock */
+};
+
+struct hns_roce_v2_cmq {
+ struct hns_roce_v2_cmq_ring csq;
+ u16 tx_timeout;
+};
+
+struct hns_roce_link_table {
+ struct hns_roce_buf_list table;
+ struct hns_roce_buf *buf;
+};
+
+#define HNS_ROCE_EXT_LLM_ENTRY(addr, id) (((id) << (64 - 12)) | ((addr) >> 12))
+#define HNS_ROCE_EXT_LLM_MIN_PAGES(que_num) ((que_num) * 4 + 2)
+
+struct hns_roce_v2_free_mr {
+ struct hns_roce_qp *rsv_qp[HNS_ROCE_FREE_MR_USED_QP_NUM];
+ struct hns_roce_cq *rsv_cq;
+ struct hns_roce_pd *rsv_pd;
+ struct mutex mutex;
+};
+
+struct hns_roce_v2_priv {
+ struct hnae3_handle *handle;
+ struct hns_roce_v2_cmq cmq;
+ struct hns_roce_link_table ext_llm;
+ struct hns_roce_v2_free_mr free_mr;
+};
+
+struct hns_roce_dip {
+ u8 dgid[GID_LEN_V2];
+ u32 dip_idx;
+ struct list_head node; /* all dips are on a list */
+};
+
+struct fmea_ram_ecc {
+ u32 is_ecc_err;
+ u32 res_type;
+ u32 index;
+};
+
+/* only for RNR timeout issue of HIP08 */
+#define HNS_ROCE_CLOCK_ADJUST 1000
+#define HNS_ROCE_MAX_CQ_PERIOD 65
+#define HNS_ROCE_MAX_EQ_PERIOD 65
+#define HNS_ROCE_RNR_TIMER_10NS 1
+#define HNS_ROCE_1US_CFG 999
+#define HNS_ROCE_1NS_CFG 0
+
+#define HNS_ROCE_AEQ_DEFAULT_BURST_NUM 0x0
+#define HNS_ROCE_AEQ_DEFAULT_INTERVAL 0x0
+#define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x0
+#define HNS_ROCE_CEQ_DEFAULT_INTERVAL 0x0
+
+#define HNS_ROCE_V2_EQ_STATE_INVALID 0
+#define HNS_ROCE_V2_EQ_STATE_VALID 1
+#define HNS_ROCE_V2_EQ_STATE_OVERFLOW 2
+#define HNS_ROCE_V2_EQ_STATE_FAILURE 3
+
+#define HNS_ROCE_V2_EQ_OVER_IGNORE_0 0
+#define HNS_ROCE_V2_EQ_OVER_IGNORE_1 1
+
+#define HNS_ROCE_V2_EQ_COALESCE_0 0
+#define HNS_ROCE_V2_EQ_COALESCE_1 1
+
+#define HNS_ROCE_V2_EQ_FIRED 0
+#define HNS_ROCE_V2_EQ_ARMED 1
+#define HNS_ROCE_V2_EQ_ALWAYS_ARMED 3
+
+#define HNS_ROCE_EQ_INIT_EQE_CNT 0
+#define HNS_ROCE_EQ_INIT_PROD_IDX 0
+#define HNS_ROCE_EQ_INIT_REPORT_TIMER 0
+#define HNS_ROCE_EQ_INIT_MSI_IDX 0
+#define HNS_ROCE_EQ_INIT_CONS_IDX 0
+#define HNS_ROCE_EQ_INIT_NXT_EQE_BA 0
+
+#define HNS_ROCE_V2_COMP_EQE_NUM 0x1000
+#define HNS_ROCE_V2_ASYNC_EQE_NUM 0x1000
+
+#define HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S 0
+
+#define HNS_ROCE_EQ_DB_CMD_AEQ 0x0
+#define HNS_ROCE_EQ_DB_CMD_AEQ_ARMED 0x1
+#define HNS_ROCE_EQ_DB_CMD_CEQ 0x2
+#define HNS_ROCE_EQ_DB_CMD_CEQ_ARMED 0x3
+
+#define EQ_ENABLE 1
+#define EQ_DISABLE 0
+
+#define EQ_REG_OFFSET 0x4
+
+#define HNS_ROCE_INT_NAME_LEN 32
+#define HNS_ROCE_V2_EQN_M GENMASK(23, 0)
+
+#define HNS_ROCE_V2_VF_ABN_INT_EN_S 0
+#define HNS_ROCE_V2_VF_ABN_INT_EN_M GENMASK(0, 0)
+#define HNS_ROCE_V2_VF_ABN_INT_ST_M GENMASK(2, 0)
+#define HNS_ROCE_V2_VF_ABN_INT_CFG_M GENMASK(2, 0)
+#define HNS_ROCE_V2_VF_EVENT_INT_EN_M GENMASK(0, 0)
+
+struct hns_roce_eq_context {
+ __le32 data[16];
+};
+
+#define EQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_eq_context, h, l)
+
+#define EQC_EQ_ST EQC_FIELD_LOC(1, 0)
+#define EQC_EQE_HOP_NUM EQC_FIELD_LOC(3, 2)
+#define EQC_OVER_IGNORE EQC_FIELD_LOC(4, 4)
+#define EQC_COALESCE EQC_FIELD_LOC(5, 5)
+#define EQC_ARM_ST EQC_FIELD_LOC(7, 6)
+#define EQC_EQN EQC_FIELD_LOC(15, 8)
+#define EQC_EQE_CNT EQC_FIELD_LOC(31, 16)
+#define EQC_EQE_BA_PG_SZ EQC_FIELD_LOC(35, 32)
+#define EQC_EQE_BUF_PG_SZ EQC_FIELD_LOC(39, 36)
+#define EQC_EQ_PROD_INDX EQC_FIELD_LOC(63, 40)
+#define EQC_EQ_MAX_CNT EQC_FIELD_LOC(79, 64)
+#define EQC_EQ_PERIOD EQC_FIELD_LOC(95, 80)
+#define EQC_EQE_REPORT_TIMER EQC_FIELD_LOC(127, 96)
+#define EQC_EQE_BA_L EQC_FIELD_LOC(159, 128)
+#define EQC_EQE_BA_H EQC_FIELD_LOC(188, 160)
+#define EQC_SHIFT EQC_FIELD_LOC(199, 192)
+#define EQC_MSI_INDX EQC_FIELD_LOC(207, 200)
+#define EQC_CUR_EQE_BA_L EQC_FIELD_LOC(223, 208)
+#define EQC_CUR_EQE_BA_M EQC_FIELD_LOC(255, 224)
+#define EQC_CUR_EQE_BA_H EQC_FIELD_LOC(259, 256)
+#define EQC_EQ_CONS_INDX EQC_FIELD_LOC(287, 264)
+#define EQC_NEX_EQE_BA_L EQC_FIELD_LOC(319, 288)
+#define EQC_NEX_EQE_BA_H EQC_FIELD_LOC(339, 320)
+#define EQC_EQE_SIZE EQC_FIELD_LOC(341, 340)
+
+#define MAX_SERVICE_LEVEL 0x7
+
+struct hns_roce_wqe_atomic_seg {
+ __le64 fetchadd_swap_data;
+ __le64 cmp_data;
+};
+
+struct hns_roce_sccc_clr {
+ __le32 qpn;
+ __le32 rsv[5];
+};
+
+struct hns_roce_sccc_clr_done {
+ __le32 clr_done;
+ __le32 rsv[5];
+};
+
+int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+
+static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2],
+ void __iomem *dest)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+ if (!hr_dev->dis_db && !ops->get_hw_reset_stat(handle))
+ hns_roce_write64_k(val, dest);
+}
+
+#endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
new file mode 100644
index 000000000..da1b33d81
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -0,0 +1,1040 @@
+/*
+ * Copyright (c) 2016 Hisilicon Limited.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_cache.h>
+#include "hns_roce_common.h"
+#include "hns_roce_device.h"
+#include "hns_roce_hem.h"
+
+static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port,
+ const u8 *addr)
+{
+ u8 phy_port;
+ u32 i;
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return 0;
+
+ if (!memcmp(hr_dev->dev_addr[port], addr, ETH_ALEN))
+ return 0;
+
+ for (i = 0; i < ETH_ALEN; i++)
+ hr_dev->dev_addr[port][i] = addr[i];
+
+ phy_port = hr_dev->iboe.phy_port[port];
+ return hr_dev->hw->set_mac(hr_dev, phy_port, addr);
+}
+
+static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
+ u32 port = attr->port_num - 1;
+ int ret;
+
+ if (port >= hr_dev->caps.num_ports)
+ return -EINVAL;
+
+ ret = hr_dev->hw->set_gid(hr_dev, attr->index, &attr->gid, attr);
+
+ return ret;
+}
+
+static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
+ u32 port = attr->port_num - 1;
+ int ret;
+
+ if (port >= hr_dev->caps.num_ports)
+ return -EINVAL;
+
+ ret = hr_dev->hw->set_gid(hr_dev, attr->index, NULL, NULL);
+
+ return ret;
+}
+
+static int handle_en_event(struct hns_roce_dev *hr_dev, u32 port,
+ unsigned long event)
+{
+ struct device *dev = hr_dev->dev;
+ struct net_device *netdev;
+ int ret = 0;
+
+ netdev = hr_dev->iboe.netdevs[port];
+ if (!netdev) {
+ dev_err(dev, "can't find netdev on port(%u)!\n", port);
+ return -ENODEV;
+ }
+
+ switch (event) {
+ case NETDEV_UP:
+ case NETDEV_CHANGE:
+ case NETDEV_REGISTER:
+ case NETDEV_CHANGEADDR:
+ ret = hns_roce_set_mac(hr_dev, port, netdev->dev_addr);
+ break;
+ case NETDEV_DOWN:
+ /*
+ * In v1 engine, only support all ports closed together.
+ */
+ break;
+ default:
+ dev_dbg(dev, "NETDEV event = 0x%x!\n", (u32)(event));
+ break;
+ }
+
+ return ret;
+}
+
+static int hns_roce_netdev_event(struct notifier_block *self,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct hns_roce_ib_iboe *iboe = NULL;
+ struct hns_roce_dev *hr_dev = NULL;
+ int ret;
+ u32 port;
+
+ hr_dev = container_of(self, struct hns_roce_dev, iboe.nb);
+ iboe = &hr_dev->iboe;
+
+ for (port = 0; port < hr_dev->caps.num_ports; port++) {
+ if (dev == iboe->netdevs[port]) {
+ ret = handle_en_event(hr_dev, port, event);
+ if (ret)
+ return NOTIFY_DONE;
+ break;
+ }
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int hns_roce_setup_mtu_mac(struct hns_roce_dev *hr_dev)
+{
+ int ret;
+ u8 i;
+
+ for (i = 0; i < hr_dev->caps.num_ports; i++) {
+ ret = hns_roce_set_mac(hr_dev, i,
+ hr_dev->iboe.netdevs[i]->dev_addr);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int hns_roce_query_device(struct ib_device *ib_dev,
+ struct ib_device_attr *props,
+ struct ib_udata *uhw)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+
+ memset(props, 0, sizeof(*props));
+
+ props->fw_ver = hr_dev->caps.fw_ver;
+ props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid);
+ props->max_mr_size = (u64)(~(0ULL));
+ props->page_size_cap = hr_dev->caps.page_size_cap;
+ props->vendor_id = hr_dev->vendor_id;
+ props->vendor_part_id = hr_dev->vendor_part_id;
+ props->hw_ver = hr_dev->hw_rev;
+ props->max_qp = hr_dev->caps.num_qps;
+ props->max_qp_wr = hr_dev->caps.max_wqes;
+ props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
+ IB_DEVICE_RC_RNR_NAK_GEN;
+ props->max_send_sge = hr_dev->caps.max_sq_sg;
+ props->max_recv_sge = hr_dev->caps.max_rq_sg;
+ props->max_sge_rd = 1;
+ props->max_cq = hr_dev->caps.num_cqs;
+ props->max_cqe = hr_dev->caps.max_cqes;
+ props->max_mr = hr_dev->caps.num_mtpts;
+ props->max_pd = hr_dev->caps.num_pds;
+ props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma;
+ props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma;
+ props->atomic_cap = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_ATOMIC ?
+ IB_ATOMIC_HCA : IB_ATOMIC_NONE;
+ props->max_pkeys = 1;
+ props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ props->max_srq = hr_dev->caps.num_srqs;
+ props->max_srq_wr = hr_dev->caps.max_srq_wrs;
+ props->max_srq_sge = hr_dev->caps.max_srq_sges;
+ }
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR &&
+ hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+ props->max_fast_reg_page_list_len = HNS_ROCE_FRMR_MAX_PA;
+ }
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+ props->device_cap_flags |= IB_DEVICE_XRC;
+
+ return 0;
+}
+
+static int hns_roce_query_port(struct ib_device *ib_dev, u32 port_num,
+ struct ib_port_attr *props)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+ struct device *dev = hr_dev->dev;
+ struct net_device *net_dev;
+ unsigned long flags;
+ enum ib_mtu mtu;
+ u32 port;
+ int ret;
+
+ port = port_num - 1;
+
+ /* props being zeroed by the caller, avoid zeroing it here */
+
+ props->max_mtu = hr_dev->caps.max_mtu;
+ props->gid_tbl_len = hr_dev->caps.gid_table_len[port];
+ props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
+ IB_PORT_VENDOR_CLASS_SUP |
+ IB_PORT_BOOT_MGMT_SUP;
+ props->max_msg_sz = HNS_ROCE_MAX_MSG_LEN;
+ props->pkey_tbl_len = 1;
+ ret = ib_get_eth_speed(ib_dev, port_num, &props->active_speed,
+ &props->active_width);
+ if (ret)
+ ibdev_warn(ib_dev, "failed to get speed, ret = %d.\n", ret);
+
+ spin_lock_irqsave(&hr_dev->iboe.lock, flags);
+
+ net_dev = hr_dev->iboe.netdevs[port];
+ if (!net_dev) {
+ spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
+ dev_err(dev, "find netdev %u failed!\n", port);
+ return -EINVAL;
+ }
+
+ mtu = iboe_get_mtu(net_dev->mtu);
+ props->active_mtu = mtu ? min(props->max_mtu, mtu) : IB_MTU_256;
+ props->state = netif_running(net_dev) && netif_carrier_ok(net_dev) ?
+ IB_PORT_ACTIVE :
+ IB_PORT_DOWN;
+ props->phys_state = props->state == IB_PORT_ACTIVE ?
+ IB_PORT_PHYS_STATE_LINK_UP :
+ IB_PORT_PHYS_STATE_DISABLED;
+
+ spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
+
+ return 0;
+}
+
+static enum rdma_link_layer hns_roce_get_link_layer(struct ib_device *device,
+ u32 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+static int hns_roce_query_pkey(struct ib_device *ib_dev, u32 port, u16 index,
+ u16 *pkey)
+{
+ if (index > 0)
+ return -EINVAL;
+
+ *pkey = PKEY_ID;
+
+ return 0;
+}
+
+static int hns_roce_modify_device(struct ib_device *ib_dev, int mask,
+ struct ib_device_modify *props)
+{
+ unsigned long flags;
+
+ if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
+ return -EOPNOTSUPP;
+
+ if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
+ spin_lock_irqsave(&to_hr_dev(ib_dev)->sm_lock, flags);
+ memcpy(ib_dev->node_desc, props->node_desc, NODE_DESC_SIZE);
+ spin_unlock_irqrestore(&to_hr_dev(ib_dev)->sm_lock, flags);
+ }
+
+ return 0;
+}
+
+struct hns_user_mmap_entry *
+hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
+ size_t length,
+ enum hns_roce_mmap_type mmap_type)
+{
+ struct hns_user_mmap_entry *entry;
+ int ret;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return NULL;
+
+ entry->address = address;
+ entry->mmap_type = mmap_type;
+
+ switch (mmap_type) {
+ /* pgoff 0 must be used by DB for compatibility */
+ case HNS_ROCE_MMAP_TYPE_DB:
+ ret = rdma_user_mmap_entry_insert_exact(
+ ucontext, &entry->rdma_entry, length, 0);
+ break;
+ case HNS_ROCE_MMAP_TYPE_DWQE:
+ ret = rdma_user_mmap_entry_insert_range(
+ ucontext, &entry->rdma_entry, length, 1,
+ U32_MAX);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret) {
+ kfree(entry);
+ return NULL;
+ }
+
+ return entry;
+}
+
+static void hns_roce_dealloc_uar_entry(struct hns_roce_ucontext *context)
+{
+ if (context->db_mmap_entry)
+ rdma_user_mmap_entry_remove(
+ &context->db_mmap_entry->rdma_entry);
+}
+
+static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx)
+{
+ struct hns_roce_ucontext *context = to_hr_ucontext(uctx);
+ u64 address;
+
+ address = context->uar.pfn << PAGE_SHIFT;
+ context->db_mmap_entry = hns_roce_user_mmap_entry_insert(
+ uctx, address, PAGE_SIZE, HNS_ROCE_MMAP_TYPE_DB);
+ if (!context->db_mmap_entry)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
+ struct ib_udata *udata)
+{
+ struct hns_roce_ucontext *context = to_hr_ucontext(uctx);
+ struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
+ struct hns_roce_ib_alloc_ucontext_resp resp = {};
+ struct hns_roce_ib_alloc_ucontext ucmd = {};
+ int ret;
+
+ if (!hr_dev->active)
+ return -EAGAIN;
+
+ resp.qp_tab_size = hr_dev->caps.num_qps;
+ resp.srq_tab_size = hr_dev->caps.num_srqs;
+
+ ret = ib_copy_from_udata(&ucmd, udata,
+ min(udata->inlen, sizeof(ucmd)));
+ if (ret)
+ return ret;
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ context->config = ucmd.config & HNS_ROCE_EXSGE_FLAGS;
+
+ if (context->config & HNS_ROCE_EXSGE_FLAGS) {
+ resp.config |= HNS_ROCE_RSP_EXSGE_FLAGS;
+ resp.max_inline_data = hr_dev->caps.max_sq_inline;
+ }
+
+ ret = hns_roce_uar_alloc(hr_dev, &context->uar);
+ if (ret)
+ goto error_fail_uar_alloc;
+
+ ret = hns_roce_alloc_uar_entry(uctx);
+ if (ret)
+ goto error_fail_uar_entry;
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
+ hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) {
+ INIT_LIST_HEAD(&context->page_list);
+ mutex_init(&context->page_mutex);
+ }
+
+ resp.cqe_size = hr_dev->caps.cqe_sz;
+
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
+ if (ret)
+ goto error_fail_copy_to_udata;
+
+ return 0;
+
+error_fail_copy_to_udata:
+ hns_roce_dealloc_uar_entry(context);
+
+error_fail_uar_entry:
+ ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx);
+
+error_fail_uar_alloc:
+ return ret;
+}
+
+static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device);
+
+ hns_roce_dealloc_uar_entry(context);
+
+ ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx);
+}
+
+static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
+{
+ struct rdma_user_mmap_entry *rdma_entry;
+ struct hns_user_mmap_entry *entry;
+ phys_addr_t pfn;
+ pgprot_t prot;
+ int ret;
+
+ rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff);
+ if (!rdma_entry)
+ return -EINVAL;
+
+ entry = to_hns_mmap(rdma_entry);
+ pfn = entry->address >> PAGE_SHIFT;
+
+ switch (entry->mmap_type) {
+ case HNS_ROCE_MMAP_TYPE_DB:
+ case HNS_ROCE_MMAP_TYPE_DWQE:
+ prot = pgprot_device(vma->vm_page_prot);
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = rdma_user_mmap_io(uctx, vma, pfn, rdma_entry->npages * PAGE_SIZE,
+ prot, rdma_entry);
+
+out:
+ rdma_user_mmap_entry_put(rdma_entry);
+ return ret;
+}
+
+static void hns_roce_free_mmap(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct hns_user_mmap_entry *entry = to_hns_mmap(rdma_entry);
+
+ kfree(entry);
+}
+
+static int hns_roce_port_immutable(struct ib_device *ib_dev, u32 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int ret;
+
+ ret = ib_query_port(ib_dev, port_num, &attr);
+ if (ret)
+ return ret;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ if (to_hr_dev(ib_dev)->caps.flags & HNS_ROCE_CAP_FLAG_ROCE_V1_V2)
+ immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ return 0;
+}
+
+static void hns_roce_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+}
+
+static void hns_roce_get_fw_ver(struct ib_device *device, char *str)
+{
+ u64 fw_ver = to_hr_dev(device)->caps.fw_ver;
+ unsigned int major, minor, sub_minor;
+
+ major = upper_32_bits(fw_ver);
+ minor = high_16_bits(lower_32_bits(fw_ver));
+ sub_minor = low_16_bits(fw_ver);
+
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u.%04u", major, minor,
+ sub_minor);
+}
+
+static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_ib_iboe *iboe = &hr_dev->iboe;
+
+ hr_dev->active = false;
+ unregister_netdevice_notifier(&iboe->nb);
+ ib_unregister_device(&hr_dev->ib_dev);
+}
+
+static const struct ib_device_ops hns_roce_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_HNS,
+ .uverbs_abi_ver = 1,
+ .uverbs_no_driver_id_binding = 1,
+
+ .get_dev_fw_str = hns_roce_get_fw_ver,
+ .add_gid = hns_roce_add_gid,
+ .alloc_pd = hns_roce_alloc_pd,
+ .alloc_ucontext = hns_roce_alloc_ucontext,
+ .create_ah = hns_roce_create_ah,
+ .create_user_ah = hns_roce_create_ah,
+ .create_cq = hns_roce_create_cq,
+ .create_qp = hns_roce_create_qp,
+ .dealloc_pd = hns_roce_dealloc_pd,
+ .dealloc_ucontext = hns_roce_dealloc_ucontext,
+ .del_gid = hns_roce_del_gid,
+ .dereg_mr = hns_roce_dereg_mr,
+ .destroy_ah = hns_roce_destroy_ah,
+ .destroy_cq = hns_roce_destroy_cq,
+ .disassociate_ucontext = hns_roce_disassociate_ucontext,
+ .get_dma_mr = hns_roce_get_dma_mr,
+ .get_link_layer = hns_roce_get_link_layer,
+ .get_port_immutable = hns_roce_port_immutable,
+ .mmap = hns_roce_mmap,
+ .mmap_free = hns_roce_free_mmap,
+ .modify_device = hns_roce_modify_device,
+ .modify_qp = hns_roce_modify_qp,
+ .query_ah = hns_roce_query_ah,
+ .query_device = hns_roce_query_device,
+ .query_pkey = hns_roce_query_pkey,
+ .query_port = hns_roce_query_port,
+ .reg_user_mr = hns_roce_reg_user_mr,
+
+ INIT_RDMA_OBJ_SIZE(ib_ah, hns_roce_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, hns_roce_cq, ib_cq),
+ INIT_RDMA_OBJ_SIZE(ib_pd, hns_roce_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, hns_roce_qp, ibqp),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, hns_roce_ucontext, ibucontext),
+};
+
+static const struct ib_device_ops hns_roce_dev_mr_ops = {
+ .rereg_user_mr = hns_roce_rereg_user_mr,
+};
+
+static const struct ib_device_ops hns_roce_dev_mw_ops = {
+ .alloc_mw = hns_roce_alloc_mw,
+ .dealloc_mw = hns_roce_dealloc_mw,
+
+ INIT_RDMA_OBJ_SIZE(ib_mw, hns_roce_mw, ibmw),
+};
+
+static const struct ib_device_ops hns_roce_dev_frmr_ops = {
+ .alloc_mr = hns_roce_alloc_mr,
+ .map_mr_sg = hns_roce_map_mr_sg,
+};
+
+static const struct ib_device_ops hns_roce_dev_srq_ops = {
+ .create_srq = hns_roce_create_srq,
+ .destroy_srq = hns_roce_destroy_srq,
+
+ INIT_RDMA_OBJ_SIZE(ib_srq, hns_roce_srq, ibsrq),
+};
+
+static const struct ib_device_ops hns_roce_dev_xrcd_ops = {
+ .alloc_xrcd = hns_roce_alloc_xrcd,
+ .dealloc_xrcd = hns_roce_dealloc_xrcd,
+
+ INIT_RDMA_OBJ_SIZE(ib_xrcd, hns_roce_xrcd, ibxrcd),
+};
+
+static const struct ib_device_ops hns_roce_dev_restrack_ops = {
+ .fill_res_cq_entry = hns_roce_fill_res_cq_entry,
+ .fill_res_cq_entry_raw = hns_roce_fill_res_cq_entry_raw,
+ .fill_res_qp_entry = hns_roce_fill_res_qp_entry,
+ .fill_res_qp_entry_raw = hns_roce_fill_res_qp_entry_raw,
+ .fill_res_mr_entry = hns_roce_fill_res_mr_entry,
+ .fill_res_mr_entry_raw = hns_roce_fill_res_mr_entry_raw,
+};
+
+static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
+{
+ int ret;
+ struct hns_roce_ib_iboe *iboe = NULL;
+ struct ib_device *ib_dev = NULL;
+ struct device *dev = hr_dev->dev;
+ unsigned int i;
+
+ iboe = &hr_dev->iboe;
+ spin_lock_init(&iboe->lock);
+
+ ib_dev = &hr_dev->ib_dev;
+
+ ib_dev->node_type = RDMA_NODE_IB_CA;
+ ib_dev->dev.parent = dev;
+
+ ib_dev->phys_port_cnt = hr_dev->caps.num_ports;
+ ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey;
+ ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors;
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR)
+ ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW)
+ ib_set_device_ops(ib_dev, &hns_roce_dev_mw_ops);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR)
+ ib_set_device_ops(ib_dev, &hns_roce_dev_frmr_ops);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops);
+ ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops);
+ }
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+ ib_set_device_ops(ib_dev, &hns_roce_dev_xrcd_ops);
+
+ ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops);
+ ib_set_device_ops(ib_dev, &hns_roce_dev_ops);
+ ib_set_device_ops(ib_dev, &hns_roce_dev_restrack_ops);
+ for (i = 0; i < hr_dev->caps.num_ports; i++) {
+ if (!hr_dev->iboe.netdevs[i])
+ continue;
+
+ ret = ib_device_set_netdev(ib_dev, hr_dev->iboe.netdevs[i],
+ i + 1);
+ if (ret)
+ return ret;
+ }
+ dma_set_max_seg_size(dev, UINT_MAX);
+ ret = ib_register_device(ib_dev, "hns_%d", dev);
+ if (ret) {
+ dev_err(dev, "ib_register_device failed!\n");
+ return ret;
+ }
+
+ ret = hns_roce_setup_mtu_mac(hr_dev);
+ if (ret) {
+ dev_err(dev, "setup_mtu_mac failed!\n");
+ goto error_failed_setup_mtu_mac;
+ }
+
+ iboe->nb.notifier_call = hns_roce_netdev_event;
+ ret = register_netdevice_notifier(&iboe->nb);
+ if (ret) {
+ dev_err(dev, "register_netdevice_notifier failed!\n");
+ goto error_failed_setup_mtu_mac;
+ }
+
+ hr_dev->active = true;
+ return 0;
+
+error_failed_setup_mtu_mac:
+ ib_unregister_device(ib_dev);
+
+ return ret;
+}
+
+static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table,
+ HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz,
+ hr_dev->caps.num_mtpts);
+ if (ret) {
+ dev_err(dev, "failed to init MTPT context memory, aborting.\n");
+ return ret;
+ }
+
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.qp_table,
+ HEM_TYPE_QPC, hr_dev->caps.qpc_sz,
+ hr_dev->caps.num_qps);
+ if (ret) {
+ dev_err(dev, "failed to init QP context memory, aborting.\n");
+ goto err_unmap_dmpt;
+ }
+
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.irrl_table,
+ HEM_TYPE_IRRL,
+ hr_dev->caps.irrl_entry_sz *
+ hr_dev->caps.max_qp_init_rdma,
+ hr_dev->caps.num_qps);
+ if (ret) {
+ dev_err(dev, "failed to init irrl_table memory, aborting.\n");
+ goto err_unmap_qp;
+ }
+
+ if (hr_dev->caps.trrl_entry_sz) {
+ ret = hns_roce_init_hem_table(hr_dev,
+ &hr_dev->qp_table.trrl_table,
+ HEM_TYPE_TRRL,
+ hr_dev->caps.trrl_entry_sz *
+ hr_dev->caps.max_qp_dest_rdma,
+ hr_dev->caps.num_qps);
+ if (ret) {
+ dev_err(dev,
+ "failed to init trrl_table memory, aborting.\n");
+ goto err_unmap_irrl;
+ }
+ }
+
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cq_table.table,
+ HEM_TYPE_CQC, hr_dev->caps.cqc_entry_sz,
+ hr_dev->caps.num_cqs);
+ if (ret) {
+ dev_err(dev, "failed to init CQ context memory, aborting.\n");
+ goto err_unmap_trrl;
+ }
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table,
+ HEM_TYPE_SRQC,
+ hr_dev->caps.srqc_entry_sz,
+ hr_dev->caps.num_srqs);
+ if (ret) {
+ dev_err(dev,
+ "failed to init SRQ context memory, aborting.\n");
+ goto err_unmap_cq;
+ }
+ }
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
+ ret = hns_roce_init_hem_table(hr_dev,
+ &hr_dev->qp_table.sccc_table,
+ HEM_TYPE_SCCC,
+ hr_dev->caps.sccc_sz,
+ hr_dev->caps.num_qps);
+ if (ret) {
+ dev_err(dev,
+ "failed to init SCC context memory, aborting.\n");
+ goto err_unmap_srq;
+ }
+ }
+
+ if (hr_dev->caps.qpc_timer_entry_sz) {
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qpc_timer_table,
+ HEM_TYPE_QPC_TIMER,
+ hr_dev->caps.qpc_timer_entry_sz,
+ hr_dev->caps.qpc_timer_bt_num);
+ if (ret) {
+ dev_err(dev,
+ "failed to init QPC timer memory, aborting.\n");
+ goto err_unmap_ctx;
+ }
+ }
+
+ if (hr_dev->caps.cqc_timer_entry_sz) {
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cqc_timer_table,
+ HEM_TYPE_CQC_TIMER,
+ hr_dev->caps.cqc_timer_entry_sz,
+ hr_dev->caps.cqc_timer_bt_num);
+ if (ret) {
+ dev_err(dev,
+ "failed to init CQC timer memory, aborting.\n");
+ goto err_unmap_qpc_timer;
+ }
+ }
+
+ if (hr_dev->caps.gmv_entry_sz) {
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->gmv_table,
+ HEM_TYPE_GMV,
+ hr_dev->caps.gmv_entry_sz,
+ hr_dev->caps.gmv_entry_num);
+ if (ret) {
+ dev_err(dev,
+ "failed to init gmv table memory, ret = %d\n",
+ ret);
+ goto err_unmap_cqc_timer;
+ }
+ }
+
+ return 0;
+
+err_unmap_cqc_timer:
+ if (hr_dev->caps.cqc_timer_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cqc_timer_table);
+
+err_unmap_qpc_timer:
+ if (hr_dev->caps.qpc_timer_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table);
+
+err_unmap_ctx:
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->qp_table.sccc_table);
+err_unmap_srq:
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table);
+
+err_unmap_cq:
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table);
+
+err_unmap_trrl:
+ if (hr_dev->caps.trrl_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->qp_table.trrl_table);
+
+err_unmap_irrl:
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table);
+
+err_unmap_qp:
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table);
+
+err_unmap_dmpt:
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table);
+
+ return ret;
+}
+
+/**
+ * hns_roce_setup_hca - setup host channel adapter
+ * @hr_dev: pointer to hns roce device
+ * Return : int
+ */
+static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ spin_lock_init(&hr_dev->sm_lock);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
+ hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) {
+ INIT_LIST_HEAD(&hr_dev->pgdir_list);
+ mutex_init(&hr_dev->pgdir_mutex);
+ }
+
+ hns_roce_init_uar_table(hr_dev);
+
+ ret = hns_roce_uar_alloc(hr_dev, &hr_dev->priv_uar);
+ if (ret) {
+ dev_err(dev, "failed to allocate priv_uar.\n");
+ goto err_uar_table_free;
+ }
+
+ ret = hns_roce_init_qp_table(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to init qp_table.\n");
+ goto err_uar_table_free;
+ }
+
+ hns_roce_init_pd_table(hr_dev);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+ hns_roce_init_xrcd_table(hr_dev);
+
+ hns_roce_init_mr_table(hr_dev);
+
+ hns_roce_init_cq_table(hr_dev);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+ hns_roce_init_srq_table(hr_dev);
+
+ return 0;
+
+err_uar_table_free:
+ ida_destroy(&hr_dev->uar_ida.ida);
+ return ret;
+}
+
+static void check_and_get_armed_cq(struct list_head *cq_list, struct ib_cq *cq)
+{
+ struct hns_roce_cq *hr_cq = to_hr_cq(cq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&hr_cq->lock, flags);
+ if (cq->comp_handler) {
+ if (!hr_cq->is_armed) {
+ hr_cq->is_armed = 1;
+ list_add_tail(&hr_cq->node, cq_list);
+ }
+ }
+ spin_unlock_irqrestore(&hr_cq->lock, flags);
+}
+
+void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_qp *hr_qp;
+ struct hns_roce_cq *hr_cq;
+ struct list_head cq_list;
+ unsigned long flags_qp;
+ unsigned long flags;
+
+ INIT_LIST_HEAD(&cq_list);
+
+ spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
+ list_for_each_entry(hr_qp, &hr_dev->qp_list, node) {
+ spin_lock_irqsave(&hr_qp->sq.lock, flags_qp);
+ if (hr_qp->sq.tail != hr_qp->sq.head)
+ check_and_get_armed_cq(&cq_list, hr_qp->ibqp.send_cq);
+ spin_unlock_irqrestore(&hr_qp->sq.lock, flags_qp);
+
+ spin_lock_irqsave(&hr_qp->rq.lock, flags_qp);
+ if ((!hr_qp->ibqp.srq) && (hr_qp->rq.tail != hr_qp->rq.head))
+ check_and_get_armed_cq(&cq_list, hr_qp->ibqp.recv_cq);
+ spin_unlock_irqrestore(&hr_qp->rq.lock, flags_qp);
+ }
+
+ list_for_each_entry(hr_cq, &cq_list, node)
+ hns_roce_cq_completion(hr_dev, hr_cq->cqn);
+
+ spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
+}
+
+int hns_roce_init(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ hr_dev->is_reset = false;
+
+ if (hr_dev->hw->cmq_init) {
+ ret = hr_dev->hw->cmq_init(hr_dev);
+ if (ret) {
+ dev_err(dev, "init RoCE Command Queue failed!\n");
+ return ret;
+ }
+ }
+
+ ret = hr_dev->hw->hw_profile(hr_dev);
+ if (ret) {
+ dev_err(dev, "get RoCE engine profile failed!\n");
+ goto error_failed_cmd_init;
+ }
+
+ ret = hns_roce_cmd_init(hr_dev);
+ if (ret) {
+ dev_err(dev, "cmd init failed!\n");
+ goto error_failed_cmd_init;
+ }
+
+ /* EQ depends on poll mode, event mode depends on EQ */
+ ret = hr_dev->hw->init_eq(hr_dev);
+ if (ret) {
+ dev_err(dev, "eq init failed!\n");
+ goto error_failed_eq_table;
+ }
+
+ if (hr_dev->cmd_mod) {
+ ret = hns_roce_cmd_use_events(hr_dev);
+ if (ret)
+ dev_warn(dev,
+ "Cmd event mode failed, set back to poll!\n");
+ }
+
+ ret = hns_roce_init_hem(hr_dev);
+ if (ret) {
+ dev_err(dev, "init HEM(Hardware Entry Memory) failed!\n");
+ goto error_failed_init_hem;
+ }
+
+ ret = hns_roce_setup_hca(hr_dev);
+ if (ret) {
+ dev_err(dev, "setup hca failed!\n");
+ goto error_failed_setup_hca;
+ }
+
+ if (hr_dev->hw->hw_init) {
+ ret = hr_dev->hw->hw_init(hr_dev);
+ if (ret) {
+ dev_err(dev, "hw_init failed!\n");
+ goto error_failed_engine_init;
+ }
+ }
+
+ INIT_LIST_HEAD(&hr_dev->qp_list);
+ spin_lock_init(&hr_dev->qp_list_lock);
+ INIT_LIST_HEAD(&hr_dev->dip_list);
+ spin_lock_init(&hr_dev->dip_list_lock);
+
+ ret = hns_roce_register_device(hr_dev);
+ if (ret)
+ goto error_failed_register_device;
+
+ return 0;
+
+error_failed_register_device:
+ if (hr_dev->hw->hw_exit)
+ hr_dev->hw->hw_exit(hr_dev);
+
+error_failed_engine_init:
+ hns_roce_cleanup_bitmap(hr_dev);
+
+error_failed_setup_hca:
+ hns_roce_cleanup_hem(hr_dev);
+
+error_failed_init_hem:
+ if (hr_dev->cmd_mod)
+ hns_roce_cmd_use_polling(hr_dev);
+ hr_dev->hw->cleanup_eq(hr_dev);
+
+error_failed_eq_table:
+ hns_roce_cmd_cleanup(hr_dev);
+
+error_failed_cmd_init:
+ if (hr_dev->hw->cmq_exit)
+ hr_dev->hw->cmq_exit(hr_dev);
+
+ return ret;
+}
+
+void hns_roce_exit(struct hns_roce_dev *hr_dev)
+{
+ hns_roce_unregister_device(hr_dev);
+
+ if (hr_dev->hw->hw_exit)
+ hr_dev->hw->hw_exit(hr_dev);
+ hns_roce_cleanup_bitmap(hr_dev);
+ hns_roce_cleanup_hem(hr_dev);
+
+ if (hr_dev->cmd_mod)
+ hns_roce_cmd_use_polling(hr_dev);
+
+ hr_dev->hw->cleanup_eq(hr_dev);
+ hns_roce_cmd_cleanup(hr_dev);
+ if (hr_dev->hw->cmq_exit)
+ hr_dev->hw->cmq_exit(hr_dev);
+}
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Wei Hu <xavier.huwei@huawei.com>");
+MODULE_AUTHOR("Nenglong Zhao <zhaonenglong@hisilicon.com>");
+MODULE_AUTHOR("Lijun Ou <oulijun@huawei.com>");
+MODULE_DESCRIPTION("HNS RoCE Driver");
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
new file mode 100644
index 000000000..14376490a
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -0,0 +1,1052 @@
+/*
+ * Copyright (c) 2016 Hisilicon Limited.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/vmalloc.h>
+#include <rdma/ib_umem.h>
+#include <linux/math.h>
+#include "hns_roce_device.h"
+#include "hns_roce_cmd.h"
+#include "hns_roce_hem.h"
+
+static u32 hw_index_to_key(int ind)
+{
+ return ((u32)ind >> 24) | ((u32)ind << 8);
+}
+
+unsigned long key_to_hw_index(u32 key)
+{
+ return (key << 24) | (key >> 8);
+}
+
+static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
+{
+ struct hns_roce_ida *mtpt_ida = &hr_dev->mr_table.mtpt_ida;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int err;
+ int id;
+
+ /* Allocate a key for mr from mr_table */
+ id = ida_alloc_range(&mtpt_ida->ida, mtpt_ida->min, mtpt_ida->max,
+ GFP_KERNEL);
+ if (id < 0) {
+ ibdev_err(ibdev, "failed to alloc id for MR key, id(%d)\n", id);
+ return -ENOMEM;
+ }
+
+ mr->key = hw_index_to_key(id); /* MR key */
+
+ err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table,
+ (unsigned long)id);
+ if (err) {
+ ibdev_err(ibdev, "failed to alloc mtpt, ret = %d.\n", err);
+ goto err_free_bitmap;
+ }
+
+ return 0;
+err_free_bitmap:
+ ida_free(&mtpt_ida->ida, id);
+ return err;
+}
+
+static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
+{
+ unsigned long obj = key_to_hw_index(mr->key);
+
+ hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, obj);
+ ida_free(&hr_dev->mr_table.mtpt_ida.ida, (int)obj);
+}
+
+static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
+ struct ib_udata *udata, u64 start)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ bool is_fast = mr->type == MR_TYPE_FRMR;
+ struct hns_roce_buf_attr buf_attr = {};
+ int err;
+
+ mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num;
+ buf_attr.page_shift = is_fast ? PAGE_SHIFT :
+ hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT;
+ buf_attr.region[0].size = mr->size;
+ buf_attr.region[0].hopnum = mr->pbl_hop_num;
+ buf_attr.region_count = 1;
+ buf_attr.user_access = mr->access;
+ /* fast MR's buffer is alloced before mapping, not at creation */
+ buf_attr.mtt_only = is_fast;
+
+ err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr,
+ hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT,
+ udata, start);
+ if (err)
+ ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err);
+ else
+ mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count;
+
+ return err;
+}
+
+static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
+{
+ hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr);
+}
+
+static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
+
+ if (mr->enabled) {
+ ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT,
+ key_to_hw_index(mr->key) &
+ (hr_dev->caps.num_mtpts - 1));
+ if (ret)
+ ibdev_warn(ibdev, "failed to destroy mpt, ret = %d.\n",
+ ret);
+ }
+
+ free_mr_pbl(hr_dev, mr);
+ free_mr_key(hr_dev, mr);
+}
+
+static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mr *mr)
+{
+ unsigned long mtpt_idx = key_to_hw_index(mr->key);
+ struct hns_roce_cmd_mailbox *mailbox;
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ /* Allocate mailbox memory */
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ if (mr->type != MR_TYPE_FRMR)
+ ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr);
+ else
+ ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr);
+ if (ret) {
+ dev_err(dev, "failed to write mtpt, ret = %d.\n", ret);
+ goto err_page;
+ }
+
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT,
+ mtpt_idx & (hr_dev->caps.num_mtpts - 1));
+ if (ret) {
+ dev_err(dev, "failed to create mpt, ret = %d.\n", ret);
+ goto err_page;
+ }
+
+ mr->enabled = 1;
+
+err_page:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
+void hns_roce_init_mr_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_ida *mtpt_ida = &hr_dev->mr_table.mtpt_ida;
+
+ ida_init(&mtpt_ida->ida);
+ mtpt_ida->max = hr_dev->caps.num_mtpts - 1;
+ mtpt_ida->min = hr_dev->caps.reserved_mrws;
+}
+
+struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+ struct hns_roce_mr *mr;
+ int ret;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->type = MR_TYPE_DMA;
+ mr->pd = to_hr_pd(pd)->pdn;
+ mr->access = acc;
+
+ /* Allocate memory region key */
+ hns_roce_hem_list_init(&mr->pbl_mtr.hem_list);
+ ret = alloc_mr_key(hr_dev, mr);
+ if (ret)
+ goto err_free;
+
+ ret = hns_roce_mr_enable(hr_dev, mr);
+ if (ret)
+ goto err_mr;
+
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
+
+ return &mr->ibmr;
+err_mr:
+ free_mr_key(hr_dev, mr);
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(ret);
+}
+
+struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+ struct hns_roce_mr *mr;
+ int ret;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->iova = virt_addr;
+ mr->size = length;
+ mr->pd = to_hr_pd(pd)->pdn;
+ mr->access = access_flags;
+ mr->type = MR_TYPE_MR;
+
+ ret = alloc_mr_key(hr_dev, mr);
+ if (ret)
+ goto err_alloc_mr;
+
+ ret = alloc_mr_pbl(hr_dev, mr, udata, start);
+ if (ret)
+ goto err_alloc_key;
+
+ ret = hns_roce_mr_enable(hr_dev, mr);
+ if (ret)
+ goto err_alloc_pbl;
+
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
+
+ return &mr->ibmr;
+
+err_alloc_pbl:
+ free_mr_pbl(hr_dev, mr);
+err_alloc_key:
+ free_mr_key(hr_dev, mr);
+err_alloc_mr:
+ kfree(mr);
+ return ERR_PTR(ret);
+}
+
+struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
+ u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
+ struct ib_device *ib_dev = &hr_dev->ib_dev;
+ struct hns_roce_mr *mr = to_hr_mr(ibmr);
+ struct hns_roce_cmd_mailbox *mailbox;
+ unsigned long mtpt_idx;
+ int ret;
+
+ if (!mr->enabled)
+ return ERR_PTR(-EINVAL);
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return ERR_CAST(mailbox);
+
+ mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1);
+
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_MPT,
+ mtpt_idx);
+ if (ret)
+ goto free_cmd_mbox;
+
+ ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT,
+ mtpt_idx);
+ if (ret)
+ ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret);
+
+ mr->enabled = 0;
+ mr->iova = virt_addr;
+ mr->size = length;
+
+ if (flags & IB_MR_REREG_PD)
+ mr->pd = to_hr_pd(pd)->pdn;
+
+ if (flags & IB_MR_REREG_ACCESS)
+ mr->access = mr_access_flags;
+
+ if (flags & IB_MR_REREG_TRANS) {
+ free_mr_pbl(hr_dev, mr);
+ ret = alloc_mr_pbl(hr_dev, mr, udata, start);
+ if (ret) {
+ ibdev_err(ib_dev, "failed to alloc mr PBL, ret = %d.\n",
+ ret);
+ goto free_cmd_mbox;
+ }
+ }
+
+ ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, mailbox->buf);
+ if (ret) {
+ ibdev_err(ib_dev, "failed to write mtpt, ret = %d.\n", ret);
+ goto free_cmd_mbox;
+ }
+
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT,
+ mtpt_idx);
+ if (ret) {
+ ibdev_err(ib_dev, "failed to create MPT, ret = %d.\n", ret);
+ goto free_cmd_mbox;
+ }
+
+ mr->enabled = 1;
+
+free_cmd_mbox:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ if (ret)
+ return ERR_PTR(ret);
+ return NULL;
+}
+
+int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
+ struct hns_roce_mr *mr = to_hr_mr(ibmr);
+
+ if (hr_dev->hw->dereg_mr)
+ hr_dev->hw->dereg_mr(hr_dev);
+
+ hns_roce_mr_free(hr_dev, mr);
+ kfree(mr);
+
+ return 0;
+}
+
+struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+ struct device *dev = hr_dev->dev;
+ struct hns_roce_mr *mr;
+ int ret;
+
+ if (mr_type != IB_MR_TYPE_MEM_REG)
+ return ERR_PTR(-EINVAL);
+
+ if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
+ dev_err(dev, "max_num_sg larger than %d\n",
+ HNS_ROCE_FRMR_MAX_PA);
+ return ERR_PTR(-EINVAL);
+ }
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->type = MR_TYPE_FRMR;
+ mr->pd = to_hr_pd(pd)->pdn;
+ mr->size = max_num_sg * (1 << PAGE_SHIFT);
+
+ /* Allocate memory region key */
+ ret = alloc_mr_key(hr_dev, mr);
+ if (ret)
+ goto err_free;
+
+ ret = alloc_mr_pbl(hr_dev, mr, NULL, 0);
+ if (ret)
+ goto err_key;
+
+ ret = hns_roce_mr_enable(hr_dev, mr);
+ if (ret)
+ goto err_pbl;
+
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
+ mr->ibmr.length = mr->size;
+
+ return &mr->ibmr;
+
+err_pbl:
+ free_mr_pbl(hr_dev, mr);
+err_key:
+ free_mr_key(hr_dev, mr);
+err_free:
+ kfree(mr);
+ return ERR_PTR(ret);
+}
+
+static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct hns_roce_mr *mr = to_hr_mr(ibmr);
+
+ if (likely(mr->npages < mr->pbl_mtr.hem_cfg.buf_pg_count)) {
+ mr->page_list[mr->npages++] = addr;
+ return 0;
+ }
+
+ return -ENOBUFS;
+}
+
+int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_mr *mr = to_hr_mr(ibmr);
+ struct hns_roce_mtr *mtr = &mr->pbl_mtr;
+ int ret = 0;
+
+ mr->npages = 0;
+ mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count,
+ sizeof(dma_addr_t), GFP_KERNEL);
+ if (!mr->page_list)
+ return ret;
+
+ ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
+ if (ret < 1) {
+ ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n",
+ mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret);
+ goto err_page_list;
+ }
+
+ mtr->hem_cfg.region[0].offset = 0;
+ mtr->hem_cfg.region[0].count = mr->npages;
+ mtr->hem_cfg.region[0].hopnum = mr->pbl_hop_num;
+ mtr->hem_cfg.region_count = 1;
+ ret = hns_roce_mtr_map(hr_dev, mtr, mr->page_list, mr->npages);
+ if (ret) {
+ ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret);
+ ret = 0;
+ } else {
+ mr->pbl_mtr.hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size);
+ ret = mr->npages;
+ }
+
+err_page_list:
+ kvfree(mr->page_list);
+ mr->page_list = NULL;
+
+ return ret;
+}
+
+static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mw *mw)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ if (mw->enabled) {
+ ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT,
+ key_to_hw_index(mw->rkey) &
+ (hr_dev->caps.num_mtpts - 1));
+ if (ret)
+ dev_warn(dev, "MW DESTROY_MPT failed (%d)\n", ret);
+
+ hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
+ key_to_hw_index(mw->rkey));
+ }
+
+ ida_free(&hr_dev->mr_table.mtpt_ida.ida,
+ (int)key_to_hw_index(mw->rkey));
+}
+
+static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mw *mw)
+{
+ struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
+ struct hns_roce_cmd_mailbox *mailbox;
+ struct device *dev = hr_dev->dev;
+ unsigned long mtpt_idx = key_to_hw_index(mw->rkey);
+ int ret;
+
+ /* prepare HEM entry memory */
+ ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
+ if (ret)
+ return ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox)) {
+ ret = PTR_ERR(mailbox);
+ goto err_table;
+ }
+
+ ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw);
+ if (ret) {
+ dev_err(dev, "MW write mtpt fail!\n");
+ goto err_page;
+ }
+
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT,
+ mtpt_idx & (hr_dev->caps.num_mtpts - 1));
+ if (ret) {
+ dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret);
+ goto err_page;
+ }
+
+ mw->enabled = 1;
+
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return 0;
+
+err_page:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+err_table:
+ hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
+
+ return ret;
+}
+
+int hns_roce_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
+ struct hns_roce_ida *mtpt_ida = &hr_dev->mr_table.mtpt_ida;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_mw *mw = to_hr_mw(ibmw);
+ int ret;
+ int id;
+
+ /* Allocate a key for mw from mr_table */
+ id = ida_alloc_range(&mtpt_ida->ida, mtpt_ida->min, mtpt_ida->max,
+ GFP_KERNEL);
+ if (id < 0) {
+ ibdev_err(ibdev, "failed to alloc id for MW key, id(%d)\n", id);
+ return -ENOMEM;
+ }
+
+ mw->rkey = hw_index_to_key(id);
+
+ ibmw->rkey = mw->rkey;
+ mw->pdn = to_hr_pd(ibmw->pd)->pdn;
+ mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
+ mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
+ mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
+
+ ret = hns_roce_mw_enable(hr_dev, mw);
+ if (ret)
+ goto err_mw;
+
+ return 0;
+
+err_mw:
+ hns_roce_mw_free(hr_dev, mw);
+ return ret;
+}
+
+int hns_roce_dealloc_mw(struct ib_mw *ibmw)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
+ struct hns_roce_mw *mw = to_hr_mw(ibmw);
+
+ hns_roce_mw_free(hr_dev, mw);
+ return 0;
+}
+
+static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_region *region, dma_addr_t *pages,
+ int max_count)
+{
+ int count, npage;
+ int offset, end;
+ __le64 *mtts;
+ u64 addr;
+ int i;
+
+ offset = region->offset;
+ end = offset + region->count;
+ npage = 0;
+ while (offset < end && npage < max_count) {
+ count = 0;
+ mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
+ offset, &count);
+ if (!mtts)
+ return -ENOBUFS;
+
+ for (i = 0; i < count && npage < max_count; i++) {
+ addr = pages[npage];
+
+ mtts[i] = cpu_to_le64(addr);
+ npage++;
+ }
+ offset += count;
+ }
+
+ return npage;
+}
+
+static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr)
+{
+ int i;
+
+ for (i = 0; i < attr->region_count; i++)
+ if (attr->region[i].hopnum != HNS_ROCE_HOP_NUM_0 &&
+ attr->region[i].hopnum > 0)
+ return true;
+
+ /* because the mtr only one root base address, when hopnum is 0 means
+ * root base address equals the first buffer address, thus all alloced
+ * memory must in a continuous space accessed by direct mode.
+ */
+ return false;
+}
+
+static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr)
+{
+ size_t size = 0;
+ int i;
+
+ for (i = 0; i < attr->region_count; i++)
+ size += attr->region[i].size;
+
+ return size;
+}
+
+/*
+ * check the given pages in continuous address space
+ * Returns 0 on success, or the error page num.
+ */
+static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count,
+ unsigned int page_shift)
+{
+ size_t page_size = 1 << page_shift;
+ int i;
+
+ for (i = 1; i < page_count; i++)
+ if (pages[i] - pages[i - 1] != page_size)
+ return i;
+
+ return 0;
+}
+
+static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
+{
+ /* release user buffers */
+ if (mtr->umem) {
+ ib_umem_release(mtr->umem);
+ mtr->umem = NULL;
+ }
+
+ /* release kernel buffers */
+ if (mtr->kmem) {
+ hns_roce_buf_free(hr_dev, mtr->kmem);
+ mtr->kmem = NULL;
+ }
+}
+
+static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr,
+ struct ib_udata *udata, unsigned long user_addr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ size_t total_size;
+
+ total_size = mtr_bufs_size(buf_attr);
+
+ if (udata) {
+ mtr->kmem = NULL;
+ mtr->umem = ib_umem_get(ibdev, user_addr, total_size,
+ buf_attr->user_access);
+ if (IS_ERR_OR_NULL(mtr->umem)) {
+ ibdev_err(ibdev, "failed to get umem, ret = %ld.\n",
+ PTR_ERR(mtr->umem));
+ return -ENOMEM;
+ }
+ } else {
+ mtr->umem = NULL;
+ mtr->kmem = hns_roce_buf_alloc(hr_dev, total_size,
+ buf_attr->page_shift,
+ mtr->hem_cfg.is_direct ?
+ HNS_ROCE_BUF_DIRECT : 0);
+ if (IS_ERR(mtr->kmem)) {
+ ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n",
+ PTR_ERR(mtr->kmem));
+ return PTR_ERR(mtr->kmem);
+ }
+ }
+
+ return 0;
+}
+
+static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ int page_count, unsigned int page_shift)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ dma_addr_t *pages;
+ int npage;
+ int ret;
+
+ /* alloc a tmp array to store buffer's dma address */
+ pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ if (mtr->umem)
+ npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count,
+ mtr->umem, page_shift);
+ else
+ npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count,
+ mtr->kmem, page_shift);
+
+ if (npage != page_count) {
+ ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage,
+ page_count);
+ ret = -ENOBUFS;
+ goto err_alloc_list;
+ }
+
+ if (mtr->hem_cfg.is_direct && npage > 1) {
+ ret = mtr_check_direct_pages(pages, npage, page_shift);
+ if (ret) {
+ ibdev_err(ibdev, "failed to check %s page: %d / %d.\n",
+ mtr->umem ? "umtr" : "kmtr", ret, npage);
+ ret = -ENOBUFS;
+ goto err_alloc_list;
+ }
+ }
+
+ ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count);
+ if (ret)
+ ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret);
+
+err_alloc_list:
+ kvfree(pages);
+
+ return ret;
+}
+
+int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ dma_addr_t *pages, unsigned int page_cnt)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_region *r;
+ unsigned int i, mapped_cnt;
+ int ret = 0;
+
+ /*
+ * Only use the first page address as root ba when hopnum is 0, this
+ * is because the addresses of all pages are consecutive in this case.
+ */
+ if (mtr->hem_cfg.is_direct) {
+ mtr->hem_cfg.root_ba = pages[0];
+ return 0;
+ }
+
+ for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count &&
+ mapped_cnt < page_cnt; i++) {
+ r = &mtr->hem_cfg.region[i];
+ /* if hopnum is 0, no need to map pages in this region */
+ if (!r->hopnum) {
+ mapped_cnt += r->count;
+ continue;
+ }
+
+ if (r->offset + r->count > page_cnt) {
+ ret = -EINVAL;
+ ibdev_err(ibdev,
+ "failed to check mtr%u count %u + %u > %u.\n",
+ i, r->offset, r->count, page_cnt);
+ return ret;
+ }
+
+ ret = mtr_map_region(hr_dev, mtr, r, &pages[r->offset],
+ page_cnt - mapped_cnt);
+ if (ret < 0) {
+ ibdev_err(ibdev,
+ "failed to map mtr%u offset %u, ret = %d.\n",
+ i, r->offset, ret);
+ return ret;
+ }
+ mapped_cnt += ret;
+ ret = 0;
+ }
+
+ if (mapped_cnt < page_cnt) {
+ ret = -ENOBUFS;
+ ibdev_err(ibdev, "failed to map mtr pages count: %u < %u.\n",
+ mapped_cnt, page_cnt);
+ }
+
+ return ret;
+}
+
+int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ u32 offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
+{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+ int mtt_count, left;
+ u32 start_index;
+ int total = 0;
+ __le64 *mtts;
+ u32 npage;
+ u64 addr;
+
+ if (!mtt_buf || mtt_max < 1)
+ goto done;
+
+ /* no mtt memory in direct mode, so just return the buffer address */
+ if (cfg->is_direct) {
+ start_index = offset >> HNS_HW_PAGE_SHIFT;
+ for (mtt_count = 0; mtt_count < cfg->region_count &&
+ total < mtt_max; mtt_count++) {
+ npage = cfg->region[mtt_count].offset;
+ if (npage < start_index)
+ continue;
+
+ addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT);
+ mtt_buf[total] = addr;
+
+ total++;
+ }
+
+ goto done;
+ }
+
+ start_index = offset >> cfg->buf_pg_shift;
+ left = mtt_max;
+ while (left > 0) {
+ mtt_count = 0;
+ mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
+ start_index + total,
+ &mtt_count);
+ if (!mtts || !mtt_count)
+ goto done;
+
+ npage = min(mtt_count, left);
+ left -= npage;
+ for (mtt_count = 0; mtt_count < npage; mtt_count++)
+ mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]);
+ }
+
+done:
+ if (base_addr)
+ *base_addr = cfg->root_ba;
+
+ return total;
+}
+
+static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
+ struct hns_roce_buf_attr *attr,
+ struct hns_roce_hem_cfg *cfg,
+ unsigned int *buf_page_shift, u64 unalinged_size)
+{
+ struct hns_roce_buf_region *r;
+ u64 first_region_padding;
+ int page_cnt, region_cnt;
+ unsigned int page_shift;
+ size_t buf_size;
+
+ /* If mtt is disabled, all pages must be within a continuous range */
+ cfg->is_direct = !mtr_has_mtt(attr);
+ buf_size = mtr_bufs_size(attr);
+ if (cfg->is_direct) {
+ /* When HEM buffer uses 0-level addressing, the page size is
+ * equal to the whole buffer size, and we split the buffer into
+ * small pages which is used to check whether the adjacent
+ * units are in the continuous space and its size is fixed to
+ * 4K based on hns ROCEE's requirement.
+ */
+ page_shift = HNS_HW_PAGE_SHIFT;
+
+ /* The ROCEE requires the page size to be 4K * 2 ^ N. */
+ cfg->buf_pg_count = 1;
+ cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT +
+ order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE));
+ first_region_padding = 0;
+ } else {
+ page_shift = attr->page_shift;
+ cfg->buf_pg_count = DIV_ROUND_UP(buf_size + unalinged_size,
+ 1 << page_shift);
+ cfg->buf_pg_shift = page_shift;
+ first_region_padding = unalinged_size;
+ }
+
+ /* Convert buffer size to page index and page count for each region and
+ * the buffer's offset needs to be appended to the first region.
+ */
+ for (page_cnt = 0, region_cnt = 0; region_cnt < attr->region_count &&
+ region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) {
+ r = &cfg->region[region_cnt];
+ r->offset = page_cnt;
+ buf_size = hr_hw_page_align(attr->region[region_cnt].size +
+ first_region_padding);
+ r->count = DIV_ROUND_UP(buf_size, 1 << page_shift);
+ first_region_padding = 0;
+ page_cnt += r->count;
+ r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum,
+ r->count);
+ }
+
+ cfg->region_count = region_cnt;
+ *buf_page_shift = page_shift;
+
+ return page_cnt;
+}
+
+static u64 cal_pages_per_l1ba(unsigned int ba_per_bt, unsigned int hopnum)
+{
+ return int_pow(ba_per_bt, hopnum - 1);
+}
+
+static unsigned int cal_best_bt_pg_sz(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr,
+ unsigned int pg_shift)
+{
+ unsigned long cap = hr_dev->caps.page_size_cap;
+ struct hns_roce_buf_region *re;
+ unsigned int pgs_per_l1ba;
+ unsigned int ba_per_bt;
+ unsigned int ba_num;
+ int i;
+
+ for_each_set_bit_from(pg_shift, &cap, sizeof(cap) * BITS_PER_BYTE) {
+ if (!(BIT(pg_shift) & cap))
+ continue;
+
+ ba_per_bt = BIT(pg_shift) / BA_BYTE_LEN;
+ ba_num = 0;
+ for (i = 0; i < mtr->hem_cfg.region_count; i++) {
+ re = &mtr->hem_cfg.region[i];
+ if (re->hopnum == 0)
+ continue;
+
+ pgs_per_l1ba = cal_pages_per_l1ba(ba_per_bt, re->hopnum);
+ ba_num += DIV_ROUND_UP(re->count, pgs_per_l1ba);
+ }
+
+ if (ba_num <= ba_per_bt)
+ return pg_shift;
+ }
+
+ return 0;
+}
+
+static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ unsigned int ba_page_shift)
+{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+ int ret;
+
+ hns_roce_hem_list_init(&mtr->hem_list);
+ if (!cfg->is_direct) {
+ ba_page_shift = cal_best_bt_pg_sz(hr_dev, mtr, ba_page_shift);
+ if (!ba_page_shift)
+ return -ERANGE;
+
+ ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
+ cfg->region, cfg->region_count,
+ ba_page_shift);
+ if (ret)
+ return ret;
+ cfg->root_ba = mtr->hem_list.root_ba;
+ cfg->ba_pg_shift = ba_page_shift;
+ } else {
+ cfg->ba_pg_shift = cfg->buf_pg_shift;
+ }
+
+ return 0;
+}
+
+static void mtr_free_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
+{
+ hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
+}
+
+/**
+ * hns_roce_mtr_create - Create hns memory translate region.
+ *
+ * @hr_dev: RoCE device struct pointer
+ * @mtr: memory translate region
+ * @buf_attr: buffer attribute for creating mtr
+ * @ba_page_shift: page shift for multi-hop base address table
+ * @udata: user space context, if it's NULL, means kernel space
+ * @user_addr: userspace virtual address to start at
+ */
+int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr,
+ unsigned int ba_page_shift, struct ib_udata *udata,
+ unsigned long user_addr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ unsigned int buf_page_shift = 0;
+ int buf_page_cnt;
+ int ret;
+
+ buf_page_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, &mtr->hem_cfg,
+ &buf_page_shift,
+ udata ? user_addr & ~PAGE_MASK : 0);
+ if (buf_page_cnt < 1 || buf_page_shift < HNS_HW_PAGE_SHIFT) {
+ ibdev_err(ibdev, "failed to init mtr cfg, count %d shift %u.\n",
+ buf_page_cnt, buf_page_shift);
+ return -EINVAL;
+ }
+
+ ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret);
+ return ret;
+ }
+
+ /* The caller has its own buffer list and invokes the hns_roce_mtr_map()
+ * to finish the MTT configuration.
+ */
+ if (buf_attr->mtt_only) {
+ mtr->umem = NULL;
+ mtr->kmem = NULL;
+ return 0;
+ }
+
+ ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc mtr bufs, ret = %d.\n", ret);
+ goto err_alloc_mtt;
+ }
+
+ /* Write buffer's dma address to MTT */
+ ret = mtr_map_bufs(hr_dev, mtr, buf_page_cnt, buf_page_shift);
+ if (ret)
+ ibdev_err(ibdev, "failed to map mtr bufs, ret = %d.\n", ret);
+ else
+ return 0;
+
+ mtr_free_bufs(hr_dev, mtr);
+err_alloc_mtt:
+ mtr_free_mtt(hr_dev, mtr);
+ return ret;
+}
+
+void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
+{
+ /* release multi-hop addressing resource */
+ hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
+
+ /* free buffers */
+ mtr_free_bufs(hr_dev, mtr);
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
new file mode 100644
index 000000000..bd1fe89ca
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2016 Hisilicon Limited.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/pci.h>
+#include "hns_roce_device.h"
+
+void hns_roce_init_pd_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_ida *pd_ida = &hr_dev->pd_ida;
+
+ ida_init(&pd_ida->ida);
+ pd_ida->max = hr_dev->caps.num_pds - 1;
+ pd_ida->min = hr_dev->caps.reserved_pds;
+}
+
+int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct ib_device *ib_dev = ibpd->device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+ struct hns_roce_ida *pd_ida = &hr_dev->pd_ida;
+ struct hns_roce_pd *pd = to_hr_pd(ibpd);
+ int ret = 0;
+ int id;
+
+ id = ida_alloc_range(&pd_ida->ida, pd_ida->min, pd_ida->max,
+ GFP_KERNEL);
+ if (id < 0) {
+ ibdev_err(ib_dev, "failed to alloc pd, id = %d.\n", id);
+ return -ENOMEM;
+ }
+ pd->pdn = (unsigned long)id;
+
+ if (udata) {
+ struct hns_roce_ib_alloc_pd_resp resp = {.pdn = pd->pdn};
+
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
+ if (ret) {
+ ida_free(&pd_ida->ida, id);
+ ibdev_err(ib_dev, "failed to copy to udata, ret = %d\n", ret);
+ }
+ }
+
+ return ret;
+}
+
+int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+
+ ida_free(&hr_dev->pd_ida.ida, (int)to_hr_pd(pd)->pdn);
+
+ return 0;
+}
+
+int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
+{
+ struct hns_roce_ida *uar_ida = &hr_dev->uar_ida;
+ int id;
+
+ /* Using bitmap to manager UAR index */
+ id = ida_alloc_range(&uar_ida->ida, uar_ida->min, uar_ida->max,
+ GFP_KERNEL);
+ if (id < 0) {
+ ibdev_err(&hr_dev->ib_dev, "failed to alloc uar id(%d).\n", id);
+ return -ENOMEM;
+ }
+ uar->logic_idx = (unsigned long)id;
+
+ if (uar->logic_idx > 0 && hr_dev->caps.phy_num_uars > 1)
+ uar->index = (uar->logic_idx - 1) %
+ (hr_dev->caps.phy_num_uars - 1) + 1;
+ else
+ uar->index = 0;
+
+ uar->pfn = ((pci_resource_start(hr_dev->pci_dev, 2)) >> PAGE_SHIFT);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE)
+ hr_dev->dwqe_page = pci_resource_start(hr_dev->pci_dev, 4);
+
+ return 0;
+}
+
+void hns_roce_init_uar_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_ida *uar_ida = &hr_dev->uar_ida;
+
+ ida_init(&uar_ida->ida);
+ uar_ida->max = hr_dev->caps.num_uars - 1;
+ uar_ida->min = hr_dev->caps.reserved_uars;
+}
+
+static int hns_roce_xrcd_alloc(struct hns_roce_dev *hr_dev, u32 *xrcdn)
+{
+ struct hns_roce_ida *xrcd_ida = &hr_dev->xrcd_ida;
+ int id;
+
+ id = ida_alloc_range(&xrcd_ida->ida, xrcd_ida->min, xrcd_ida->max,
+ GFP_KERNEL);
+ if (id < 0) {
+ ibdev_err(&hr_dev->ib_dev, "failed to alloc xrcdn(%d).\n", id);
+ return -ENOMEM;
+ }
+ *xrcdn = (u32)id;
+
+ return 0;
+}
+
+void hns_roce_init_xrcd_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_ida *xrcd_ida = &hr_dev->xrcd_ida;
+
+ ida_init(&xrcd_ida->ida);
+ xrcd_ida->max = hr_dev->caps.num_xrcds - 1;
+ xrcd_ida->min = hr_dev->caps.reserved_xrcds;
+}
+
+int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_xrcd->device);
+ struct hns_roce_xrcd *xrcd = to_hr_xrcd(ib_xrcd);
+ int ret;
+
+ if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC))
+ return -EOPNOTSUPP;
+
+ ret = hns_roce_xrcd_alloc(hr_dev, &xrcd->xrcdn);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+int hns_roce_dealloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_xrcd->device);
+ u32 xrcdn = to_hr_xrcd(ib_xrcd)->xrcdn;
+
+ ida_free(&hr_dev->xrcd_ida.ida, (int)xrcdn);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
new file mode 100644
index 000000000..7b79e6b3f
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -0,0 +1,1563 @@
+/*
+ * Copyright (c) 2016 Hisilicon Limited.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/pci.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_umem.h>
+#include <rdma/uverbs_ioctl.h>
+#include "hns_roce_common.h"
+#include "hns_roce_device.h"
+#include "hns_roce_hem.h"
+
+static void flush_work_handle(struct work_struct *work)
+{
+ struct hns_roce_work *flush_work = container_of(work,
+ struct hns_roce_work, work);
+ struct hns_roce_qp *hr_qp = container_of(flush_work,
+ struct hns_roce_qp, flush_work);
+ struct device *dev = flush_work->hr_dev->dev;
+ struct ib_qp_attr attr;
+ int attr_mask;
+ int ret;
+
+ attr_mask = IB_QP_STATE;
+ attr.qp_state = IB_QPS_ERR;
+
+ if (test_and_clear_bit(HNS_ROCE_FLUSH_FLAG, &hr_qp->flush_flag)) {
+ ret = hns_roce_modify_qp(&hr_qp->ibqp, &attr, attr_mask, NULL);
+ if (ret)
+ dev_err(dev, "modify QP to error state failed(%d) during CQE flush\n",
+ ret);
+ }
+
+ /*
+ * make sure we signal QP destroy leg that flush QP was completed
+ * so that it can safely proceed ahead now and destroy QP
+ */
+ if (refcount_dec_and_test(&hr_qp->refcount))
+ complete(&hr_qp->free);
+}
+
+void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_work *flush_work = &hr_qp->flush_work;
+
+ flush_work->hr_dev = hr_dev;
+ INIT_WORK(&flush_work->work, flush_work_handle);
+ refcount_inc(&hr_qp->refcount);
+ queue_work(hr_dev->irq_workq, &flush_work->work);
+}
+
+void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp)
+{
+ /*
+ * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state
+ * gets into errored mode. Hence, as a workaround to this
+ * hardware limitation, driver needs to assist in flushing. But
+ * the flushing operation uses mailbox to convey the QP state to
+ * the hardware and which can sleep due to the mutex protection
+ * around the mailbox calls. Hence, use the deferred flush for
+ * now.
+ */
+ if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &qp->flush_flag))
+ init_flush_work(dev, qp);
+}
+
+void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
+{
+ struct device *dev = hr_dev->dev;
+ struct hns_roce_qp *qp;
+
+ xa_lock(&hr_dev->qp_table_xa);
+ qp = __hns_roce_qp_lookup(hr_dev, qpn);
+ if (qp)
+ refcount_inc(&qp->refcount);
+ xa_unlock(&hr_dev->qp_table_xa);
+
+ if (!qp) {
+ dev_warn(dev, "async event for bogus QP %08x\n", qpn);
+ return;
+ }
+
+ if (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR ||
+ event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR ||
+ event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR ||
+ event_type == HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION ||
+ event_type == HNS_ROCE_EVENT_TYPE_INVALID_XRCETH) {
+ qp->state = IB_QPS_ERR;
+
+ flush_cqe(hr_dev, qp);
+ }
+
+ qp->event(qp, (enum hns_roce_event)event_type);
+
+ if (refcount_dec_and_test(&qp->refcount))
+ complete(&qp->free);
+}
+
+static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
+ enum hns_roce_event type)
+{
+ struct ib_qp *ibqp = &hr_qp->ibqp;
+ struct ib_event event;
+
+ if (ibqp->event_handler) {
+ event.device = ibqp->device;
+ event.element.qp = ibqp;
+ switch (type) {
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG:
+ event.event = IB_EVENT_PATH_MIG;
+ break;
+ case HNS_ROCE_EVENT_TYPE_COMM_EST:
+ event.event = IB_EVENT_COMM_EST;
+ break;
+ case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
+ event.event = IB_EVENT_SQ_DRAINED;
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
+ event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ break;
+ case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ event.event = IB_EVENT_QP_FATAL;
+ break;
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
+ event.event = IB_EVENT_PATH_MIG_ERR;
+ break;
+ case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+ event.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+ case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ default:
+ dev_dbg(ibqp->device->dev.parent, "roce_ib: Unexpected event type %d on QP %06lx\n",
+ type, hr_qp->qpn);
+ return;
+ }
+ ibqp->event_handler(&event, ibqp->qp_context);
+ }
+}
+
+static u8 get_affinity_cq_bank(u8 qp_bank)
+{
+ return (qp_bank >> 1) & CQ_BANKID_MASK;
+}
+
+static u8 get_least_load_bankid_for_qp(struct ib_qp_init_attr *init_attr,
+ struct hns_roce_bank *bank)
+{
+#define INVALID_LOAD_QPNUM 0xFFFFFFFF
+ struct ib_cq *scq = init_attr->send_cq;
+ u32 least_load = INVALID_LOAD_QPNUM;
+ unsigned long cqn = 0;
+ u8 bankid = 0;
+ u32 bankcnt;
+ u8 i;
+
+ if (scq)
+ cqn = to_hr_cq(scq)->cqn;
+
+ for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) {
+ if (scq && (get_affinity_cq_bank(i) != (cqn & CQ_BANKID_MASK)))
+ continue;
+
+ bankcnt = bank[i].inuse;
+ if (bankcnt < least_load) {
+ least_load = bankcnt;
+ bankid = i;
+ }
+ }
+
+ return bankid;
+}
+
+static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid,
+ unsigned long *qpn)
+{
+ int id;
+
+ id = ida_alloc_range(&bank->ida, bank->next, bank->max, GFP_KERNEL);
+ if (id < 0) {
+ id = ida_alloc_range(&bank->ida, bank->min, bank->max,
+ GFP_KERNEL);
+ if (id < 0)
+ return id;
+ }
+
+ /* the QPN should keep increasing until the max value is reached. */
+ bank->next = (id + 1) > bank->max ? bank->min : id + 1;
+
+ /* the lower 3 bits is bankid */
+ *qpn = (id << 3) | bankid;
+
+ return 0;
+}
+static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
+ unsigned long num = 0;
+ u8 bankid;
+ int ret;
+
+ if (hr_qp->ibqp.qp_type == IB_QPT_GSI) {
+ num = 1;
+ } else {
+ mutex_lock(&qp_table->bank_mutex);
+ bankid = get_least_load_bankid_for_qp(init_attr, qp_table->bank);
+
+ ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid,
+ &num);
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to alloc QPN, ret = %d\n", ret);
+ mutex_unlock(&qp_table->bank_mutex);
+ return ret;
+ }
+
+ qp_table->bank[bankid].inuse++;
+ mutex_unlock(&qp_table->bank_mutex);
+ }
+
+ hr_qp->qpn = num;
+
+ return 0;
+}
+
+static void add_qp_to_list(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_cq *send_cq, struct ib_cq *recv_cq)
+{
+ struct hns_roce_cq *hr_send_cq, *hr_recv_cq;
+ unsigned long flags;
+
+ hr_send_cq = send_cq ? to_hr_cq(send_cq) : NULL;
+ hr_recv_cq = recv_cq ? to_hr_cq(recv_cq) : NULL;
+
+ spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
+ hns_roce_lock_cqs(hr_send_cq, hr_recv_cq);
+
+ list_add_tail(&hr_qp->node, &hr_dev->qp_list);
+ if (hr_send_cq)
+ list_add_tail(&hr_qp->sq_node, &hr_send_cq->sq_list);
+ if (hr_recv_cq)
+ list_add_tail(&hr_qp->rq_node, &hr_recv_cq->rq_list);
+
+ hns_roce_unlock_cqs(hr_send_cq, hr_recv_cq);
+ spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
+}
+
+static int hns_roce_qp_store(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct xarray *xa = &hr_dev->qp_table_xa;
+ int ret;
+
+ if (!hr_qp->qpn)
+ return -EINVAL;
+
+ ret = xa_err(xa_store_irq(xa, hr_qp->qpn, hr_qp, GFP_KERNEL));
+ if (ret)
+ dev_err(hr_dev->dev, "failed to xa store for QPC\n");
+ else
+ /* add QP to device's QP list for softwc */
+ add_qp_to_list(hr_dev, hr_qp, init_attr->send_cq,
+ init_attr->recv_cq);
+
+ return ret;
+}
+
+static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ if (!hr_qp->qpn)
+ return -EINVAL;
+
+ /* Alloc memory for QPC */
+ ret = hns_roce_table_get(hr_dev, &qp_table->qp_table, hr_qp->qpn);
+ if (ret) {
+ dev_err(dev, "failed to get QPC table\n");
+ goto err_out;
+ }
+
+ /* Alloc memory for IRRL */
+ ret = hns_roce_table_get(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
+ if (ret) {
+ dev_err(dev, "failed to get IRRL table\n");
+ goto err_put_qp;
+ }
+
+ if (hr_dev->caps.trrl_entry_sz) {
+ /* Alloc memory for TRRL */
+ ret = hns_roce_table_get(hr_dev, &qp_table->trrl_table,
+ hr_qp->qpn);
+ if (ret) {
+ dev_err(dev, "failed to get TRRL table\n");
+ goto err_put_irrl;
+ }
+ }
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
+ /* Alloc memory for SCC CTX */
+ ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table,
+ hr_qp->qpn);
+ if (ret) {
+ dev_err(dev, "failed to get SCC CTX table\n");
+ goto err_put_trrl;
+ }
+ }
+
+ return 0;
+
+err_put_trrl:
+ if (hr_dev->caps.trrl_entry_sz)
+ hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn);
+
+err_put_irrl:
+ hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
+
+err_put_qp:
+ hns_roce_table_put(hr_dev, &qp_table->qp_table, hr_qp->qpn);
+
+err_out:
+ return ret;
+}
+
+static void qp_user_mmap_entry_remove(struct hns_roce_qp *hr_qp)
+{
+ rdma_user_mmap_entry_remove(&hr_qp->dwqe_mmap_entry->rdma_entry);
+}
+
+void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+{
+ struct xarray *xa = &hr_dev->qp_table_xa;
+ unsigned long flags;
+
+ list_del(&hr_qp->node);
+
+ if (hr_qp->ibqp.qp_type != IB_QPT_XRC_TGT)
+ list_del(&hr_qp->sq_node);
+
+ if (hr_qp->ibqp.qp_type != IB_QPT_XRC_INI &&
+ hr_qp->ibqp.qp_type != IB_QPT_XRC_TGT)
+ list_del(&hr_qp->rq_node);
+
+ xa_lock_irqsave(xa, flags);
+ __xa_erase(xa, hr_qp->qpn);
+ xa_unlock_irqrestore(xa, flags);
+}
+
+static void free_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
+
+ if (hr_dev->caps.trrl_entry_sz)
+ hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn);
+ hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
+}
+
+static inline u8 get_qp_bankid(unsigned long qpn)
+{
+ /* The lower 3 bits of QPN are used to hash to different banks */
+ return (u8)(qpn & GENMASK(2, 0));
+}
+
+static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+{
+ u8 bankid;
+
+ if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
+ return;
+
+ if (hr_qp->qpn < hr_dev->caps.reserved_qps)
+ return;
+
+ bankid = get_qp_bankid(hr_qp->qpn);
+
+ ida_free(&hr_dev->qp_table.bank[bankid].ida, hr_qp->qpn >> 3);
+
+ mutex_lock(&hr_dev->qp_table.bank_mutex);
+ hr_dev->qp_table.bank[bankid].inuse--;
+ mutex_unlock(&hr_dev->qp_table.bank_mutex);
+}
+
+static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp,
+ bool user)
+{
+ u32 max_sge = dev->caps.max_rq_sg;
+
+ if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return max_sge;
+
+ /* Reserve SGEs only for HIP08 in kernel; The userspace driver will
+ * calculate number of max_sge with reserved SGEs when allocating wqe
+ * buf, so there is no need to do this again in kernel. But the number
+ * may exceed the capacity of SGEs recorded in the firmware, so the
+ * kernel driver should just adapt the value accordingly.
+ */
+ if (user)
+ max_sge = roundup_pow_of_two(max_sge + 1);
+ else
+ hr_qp->rq.rsv_sge = 1;
+
+ return max_sge;
+}
+
+static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
+ struct hns_roce_qp *hr_qp, int has_rq, bool user)
+{
+ u32 max_sge = proc_rq_sge(hr_dev, hr_qp, user);
+ u32 cnt;
+
+ /* If srq exist, set zero for relative number of rq */
+ if (!has_rq) {
+ hr_qp->rq.wqe_cnt = 0;
+ hr_qp->rq.max_gs = 0;
+ hr_qp->rq_inl_buf.wqe_cnt = 0;
+ cap->max_recv_wr = 0;
+ cap->max_recv_sge = 0;
+
+ return 0;
+ }
+
+ /* Check the validity of QP support capacity */
+ if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes ||
+ cap->max_recv_sge > max_sge) {
+ ibdev_err(&hr_dev->ib_dev,
+ "RQ config error, depth = %u, sge = %u\n",
+ cap->max_recv_wr, cap->max_recv_sge);
+ return -EINVAL;
+ }
+
+ cnt = roundup_pow_of_two(max(cap->max_recv_wr, hr_dev->caps.min_wqes));
+ if (cnt > hr_dev->caps.max_wqes) {
+ ibdev_err(&hr_dev->ib_dev, "rq depth %u too large\n",
+ cap->max_recv_wr);
+ return -EINVAL;
+ }
+
+ hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) +
+ hr_qp->rq.rsv_sge);
+
+ hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz *
+ hr_qp->rq.max_gs);
+
+ hr_qp->rq.wqe_cnt = cnt;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE &&
+ hr_qp->ibqp.qp_type != IB_QPT_UD &&
+ hr_qp->ibqp.qp_type != IB_QPT_GSI)
+ hr_qp->rq_inl_buf.wqe_cnt = cnt;
+ else
+ hr_qp->rq_inl_buf.wqe_cnt = 0;
+
+ cap->max_recv_wr = cnt;
+ cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
+
+ return 0;
+}
+
+static u32 get_max_inline_data(struct hns_roce_dev *hr_dev,
+ struct ib_qp_cap *cap)
+{
+ if (cap->max_inline_data) {
+ cap->max_inline_data = roundup_pow_of_two(cap->max_inline_data);
+ return min(cap->max_inline_data,
+ hr_dev->caps.max_sq_inline);
+ }
+
+ return 0;
+}
+
+static void update_inline_data(struct hns_roce_qp *hr_qp,
+ struct ib_qp_cap *cap)
+{
+ u32 sge_num = hr_qp->sq.ext_sge_cnt;
+
+ if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
+ if (!(hr_qp->ibqp.qp_type == IB_QPT_GSI ||
+ hr_qp->ibqp.qp_type == IB_QPT_UD))
+ sge_num = max((u32)HNS_ROCE_SGE_IN_WQE, sge_num);
+
+ cap->max_inline_data = max(cap->max_inline_data,
+ sge_num * HNS_ROCE_SGE_SIZE);
+ }
+
+ hr_qp->max_inline_data = cap->max_inline_data;
+}
+
+static u32 get_sge_num_from_max_send_sge(bool is_ud_or_gsi,
+ u32 max_send_sge)
+{
+ unsigned int std_sge_num;
+ unsigned int min_sge;
+
+ std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
+ min_sge = is_ud_or_gsi ? 1 : 0;
+ return max_send_sge > std_sge_num ? (max_send_sge - std_sge_num) :
+ min_sge;
+}
+
+static unsigned int get_sge_num_from_max_inl_data(bool is_ud_or_gsi,
+ u32 max_inline_data)
+{
+ unsigned int inline_sge;
+
+ inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE;
+
+ /*
+ * if max_inline_data less than
+ * HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE,
+ * In addition to ud's mode, no need to extend sge.
+ */
+ if (!is_ud_or_gsi && inline_sge <= HNS_ROCE_SGE_IN_WQE)
+ inline_sge = 0;
+
+ return inline_sge;
+}
+
+static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
+ struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap)
+{
+ bool is_ud_or_gsi = (hr_qp->ibqp.qp_type == IB_QPT_GSI ||
+ hr_qp->ibqp.qp_type == IB_QPT_UD);
+ unsigned int std_sge_num;
+ u32 inline_ext_sge = 0;
+ u32 ext_wqe_sge_cnt;
+ u32 total_sge_cnt;
+
+ cap->max_inline_data = get_max_inline_data(hr_dev, cap);
+
+ hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT;
+ std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
+ ext_wqe_sge_cnt = get_sge_num_from_max_send_sge(is_ud_or_gsi,
+ cap->max_send_sge);
+
+ if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
+ inline_ext_sge = max(ext_wqe_sge_cnt,
+ get_sge_num_from_max_inl_data(is_ud_or_gsi,
+ cap->max_inline_data));
+ hr_qp->sq.ext_sge_cnt = inline_ext_sge ?
+ roundup_pow_of_two(inline_ext_sge) : 0;
+
+ hr_qp->sq.max_gs = max(1U, (hr_qp->sq.ext_sge_cnt + std_sge_num));
+ hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
+
+ ext_wqe_sge_cnt = hr_qp->sq.ext_sge_cnt;
+ } else {
+ hr_qp->sq.max_gs = max(1U, cap->max_send_sge);
+ hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
+ hr_qp->sq.ext_sge_cnt = hr_qp->sq.max_gs;
+ }
+
+ /* If the number of extended sge is not zero, they MUST use the
+ * space of HNS_HW_PAGE_SIZE at least.
+ */
+ if (ext_wqe_sge_cnt) {
+ total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * ext_wqe_sge_cnt);
+ hr_qp->sge.sge_cnt = max(total_sge_cnt,
+ (u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE);
+ }
+
+ update_inline_data(hr_qp, cap);
+}
+
+static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
+ struct ib_qp_cap *cap,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz);
+ u8 max_sq_stride = ilog2(roundup_sq_stride);
+
+ /* Sanity check SQ size before proceeding */
+ if (ucmd->log_sq_stride > max_sq_stride ||
+ ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) {
+ ibdev_err(&hr_dev->ib_dev, "failed to check SQ stride size.\n");
+ return -EINVAL;
+ }
+
+ if (cap->max_send_sge > hr_dev->caps.max_sq_sg) {
+ ibdev_err(&hr_dev->ib_dev, "failed to check SQ SGE size %u.\n",
+ cap->max_send_sge);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int set_user_sq_size(struct hns_roce_dev *hr_dev,
+ struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u32 cnt = 0;
+ int ret;
+
+ if (check_shl_overflow(1, ucmd->log_sq_bb_count, &cnt) ||
+ cnt > hr_dev->caps.max_wqes)
+ return -EINVAL;
+
+ ret = check_sq_size_with_integrity(hr_dev, cap, ucmd);
+ if (ret) {
+ ibdev_err(ibdev, "failed to check user SQ size, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ set_ext_sge_param(hr_dev, cnt, hr_qp, cap);
+
+ hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
+ hr_qp->sq.wqe_cnt = cnt;
+ cap->max_send_sge = hr_qp->sq.max_gs;
+
+ return 0;
+}
+
+static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_buf_attr *buf_attr)
+{
+ int buf_size;
+ int idx = 0;
+
+ hr_qp->buff_size = 0;
+
+ /* SQ WQE */
+ hr_qp->sq.offset = 0;
+ buf_size = to_hr_hem_entries_size(hr_qp->sq.wqe_cnt,
+ hr_qp->sq.wqe_shift);
+ if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
+ buf_attr->region[idx].size = buf_size;
+ buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num;
+ idx++;
+ hr_qp->buff_size += buf_size;
+ }
+
+ /* extend SGE WQE in SQ */
+ hr_qp->sge.offset = hr_qp->buff_size;
+ buf_size = to_hr_hem_entries_size(hr_qp->sge.sge_cnt,
+ hr_qp->sge.sge_shift);
+ if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
+ buf_attr->region[idx].size = buf_size;
+ buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sge_hop_num;
+ idx++;
+ hr_qp->buff_size += buf_size;
+ }
+
+ /* RQ WQE */
+ hr_qp->rq.offset = hr_qp->buff_size;
+ buf_size = to_hr_hem_entries_size(hr_qp->rq.wqe_cnt,
+ hr_qp->rq.wqe_shift);
+ if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
+ buf_attr->region[idx].size = buf_size;
+ buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num;
+ idx++;
+ hr_qp->buff_size += buf_size;
+ }
+
+ if (hr_qp->buff_size < 1)
+ return -EINVAL;
+
+ buf_attr->page_shift = HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
+ buf_attr->region_count = idx;
+
+ return 0;
+}
+
+static int set_kernel_sq_size(struct hns_roce_dev *hr_dev,
+ struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u32 cnt;
+
+ if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes ||
+ cap->max_send_sge > hr_dev->caps.max_sq_sg) {
+ ibdev_err(ibdev, "failed to check SQ WR or SGE num.\n");
+ return -EINVAL;
+ }
+
+ cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes));
+ if (cnt > hr_dev->caps.max_wqes) {
+ ibdev_err(ibdev, "failed to check WQE num, WQE num = %u.\n",
+ cnt);
+ return -EINVAL;
+ }
+
+ hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz);
+ hr_qp->sq.wqe_cnt = cnt;
+
+ set_ext_sge_param(hr_dev, cnt, hr_qp, cap);
+
+ /* sync the parameters of kernel QP to user's configuration */
+ cap->max_send_wr = cnt;
+ cap->max_send_sge = hr_qp->sq.max_gs;
+
+ return 0;
+}
+
+static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr)
+{
+ if (attr->qp_type == IB_QPT_XRC_TGT || !attr->cap.max_send_wr)
+ return 0;
+
+ return 1;
+}
+
+static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr)
+{
+ if (attr->qp_type == IB_QPT_XRC_INI ||
+ attr->qp_type == IB_QPT_XRC_TGT || attr->srq ||
+ !attr->cap.max_recv_wr)
+ return 0;
+
+ return 1;
+}
+
+static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr)
+{
+ u32 max_recv_sge = init_attr->cap.max_recv_sge;
+ u32 wqe_cnt = hr_qp->rq_inl_buf.wqe_cnt;
+ struct hns_roce_rinl_wqe *wqe_list;
+ int i;
+
+ /* allocate recv inline buf */
+ wqe_list = kcalloc(wqe_cnt, sizeof(struct hns_roce_rinl_wqe),
+ GFP_KERNEL);
+ if (!wqe_list)
+ goto err;
+
+ /* Allocate a continuous buffer for all inline sge we need */
+ wqe_list[0].sg_list = kcalloc(wqe_cnt, (max_recv_sge *
+ sizeof(struct hns_roce_rinl_sge)),
+ GFP_KERNEL);
+ if (!wqe_list[0].sg_list)
+ goto err_wqe_list;
+
+ /* Assign buffers of sg_list to each inline wqe */
+ for (i = 1; i < wqe_cnt; i++)
+ wqe_list[i].sg_list = &wqe_list[0].sg_list[i * max_recv_sge];
+
+ hr_qp->rq_inl_buf.wqe_list = wqe_list;
+
+ return 0;
+
+err_wqe_list:
+ kfree(wqe_list);
+
+err:
+ return -ENOMEM;
+}
+
+static void free_rq_inline_buf(struct hns_roce_qp *hr_qp)
+{
+ if (hr_qp->rq_inl_buf.wqe_list)
+ kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
+ kfree(hr_qp->rq_inl_buf.wqe_list);
+}
+
+static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata, unsigned long addr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_attr buf_attr = {};
+ int ret;
+
+ if (!udata && hr_qp->rq_inl_buf.wqe_cnt) {
+ ret = alloc_rq_inline_buf(hr_qp, init_attr);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to alloc inline buf, ret = %d.\n",
+ ret);
+ return ret;
+ }
+ } else {
+ hr_qp->rq_inl_buf.wqe_list = NULL;
+ }
+
+ ret = set_wqe_buf_attr(hr_dev, hr_qp, &buf_attr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret);
+ goto err_inline;
+ }
+ ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr,
+ PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz,
+ udata, addr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret);
+ goto err_inline;
+ }
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE)
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_DIRECT_WQE;
+
+ return 0;
+
+err_inline:
+ free_rq_inline_buf(hr_qp);
+
+ return ret;
+}
+
+static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+{
+ hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr);
+ free_rq_inline_buf(hr_qp);
+}
+
+static inline bool user_qp_has_sdb(struct hns_roce_dev *hr_dev,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp_resp *resp,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) &&
+ udata->outlen >= offsetofend(typeof(*resp), cap_flags) &&
+ hns_roce_qp_has_sq(init_attr) &&
+ udata->inlen >= offsetofend(typeof(*ucmd), sdb_addr));
+}
+
+static inline bool user_qp_has_rdb(struct hns_roce_dev *hr_dev,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp_resp *resp)
+{
+ return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) &&
+ udata->outlen >= offsetofend(typeof(*resp), cap_flags) &&
+ hns_roce_qp_has_rq(init_attr));
+}
+
+static inline bool kernel_qp_has_rdb(struct hns_roce_dev *hr_dev,
+ struct ib_qp_init_attr *init_attr)
+{
+ return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) &&
+ hns_roce_qp_has_rq(init_attr));
+}
+
+static int qp_mmap_entry(struct hns_roce_qp *hr_qp,
+ struct hns_roce_dev *hr_dev,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp_resp *resp)
+{
+ struct hns_roce_ucontext *uctx =
+ rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext, ibucontext);
+ struct rdma_user_mmap_entry *rdma_entry;
+ u64 address;
+
+ address = hr_dev->dwqe_page + hr_qp->qpn * HNS_ROCE_DWQE_SIZE;
+
+ hr_qp->dwqe_mmap_entry =
+ hns_roce_user_mmap_entry_insert(&uctx->ibucontext, address,
+ HNS_ROCE_DWQE_SIZE,
+ HNS_ROCE_MMAP_TYPE_DWQE);
+
+ if (!hr_qp->dwqe_mmap_entry) {
+ ibdev_err(&hr_dev->ib_dev, "failed to get dwqe mmap entry.\n");
+ return -ENOMEM;
+ }
+
+ rdma_entry = &hr_qp->dwqe_mmap_entry->rdma_entry;
+ resp->dwqe_mmap_key = rdma_user_mmap_get_offset(rdma_entry);
+
+ return 0;
+}
+
+static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp *ucmd,
+ struct hns_roce_ib_create_qp_resp *resp)
+{
+ struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext, ibucontext);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
+
+ if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) {
+ ret = hns_roce_db_map_user(uctx, ucmd->sdb_addr, &hr_qp->sdb);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to map user SQ doorbell, ret = %d.\n",
+ ret);
+ goto err_out;
+ }
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB;
+ }
+
+ if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) {
+ ret = hns_roce_db_map_user(uctx, ucmd->db_addr, &hr_qp->rdb);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to map user RQ doorbell, ret = %d.\n",
+ ret);
+ goto err_sdb;
+ }
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
+ }
+
+ return 0;
+
+err_sdb:
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
+ hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
+err_out:
+ return ret;
+}
+
+static int alloc_kernel_qp_db(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ hr_qp->sq.db_reg = hr_dev->mem_base +
+ HNS_ROCE_DWQE_SIZE * hr_qp->qpn;
+ else
+ hr_qp->sq.db_reg = hr_dev->reg_base + hr_dev->sdb_offset +
+ DB_REG_OFFSET * hr_dev->priv_uar.index;
+
+ hr_qp->rq.db_reg = hr_dev->reg_base + hr_dev->odb_offset +
+ DB_REG_OFFSET * hr_dev->priv_uar.index;
+
+ if (kernel_qp_has_rdb(hr_dev, init_attr)) {
+ ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to alloc kernel RQ doorbell, ret = %d.\n",
+ ret);
+ return ret;
+ }
+ *hr_qp->rdb.db_record = 0;
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
+ }
+
+ return 0;
+}
+
+static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp *ucmd,
+ struct hns_roce_ib_create_qp_resp *resp)
+{
+ int ret;
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SDI_MODE)
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_OWNER_DB;
+
+ if (udata) {
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE) {
+ ret = qp_mmap_entry(hr_qp, hr_dev, udata, resp);
+ if (ret)
+ return ret;
+ }
+
+ ret = alloc_user_qp_db(hr_dev, hr_qp, init_attr, udata, ucmd,
+ resp);
+ if (ret)
+ goto err_remove_qp;
+ } else {
+ ret = alloc_kernel_qp_db(hr_dev, hr_qp, init_attr);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+
+err_remove_qp:
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
+ qp_user_mmap_entry_remove(hr_qp);
+
+ return ret;
+}
+
+static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_udata *udata)
+{
+ struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(
+ udata, struct hns_roce_ucontext, ibucontext);
+
+ if (udata) {
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
+ hns_roce_db_unmap_user(uctx, &hr_qp->rdb);
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
+ hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
+ qp_user_mmap_entry_remove(hr_qp);
+ } else {
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
+ hns_roce_free_db(hr_dev, &hr_qp->rdb);
+ }
+}
+
+static int alloc_kernel_wrid(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u64 *sq_wrid = NULL;
+ u64 *rq_wrid = NULL;
+ int ret;
+
+ sq_wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64), GFP_KERNEL);
+ if (ZERO_OR_NULL_PTR(sq_wrid)) {
+ ibdev_err(ibdev, "failed to alloc SQ wrid.\n");
+ return -ENOMEM;
+ }
+
+ if (hr_qp->rq.wqe_cnt) {
+ rq_wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64), GFP_KERNEL);
+ if (ZERO_OR_NULL_PTR(rq_wrid)) {
+ ibdev_err(ibdev, "failed to alloc RQ wrid.\n");
+ ret = -ENOMEM;
+ goto err_sq;
+ }
+ }
+
+ hr_qp->sq.wrid = sq_wrid;
+ hr_qp->rq.wrid = rq_wrid;
+ return 0;
+err_sq:
+ kfree(sq_wrid);
+
+ return ret;
+}
+
+static void free_kernel_wrid(struct hns_roce_qp *hr_qp)
+{
+ kfree(hr_qp->rq.wrid);
+ kfree(hr_qp->sq.wrid);
+}
+
+static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_ucontext *uctx;
+ int ret;
+
+ if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+ hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
+ else
+ hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
+
+ ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp,
+ hns_roce_qp_has_rq(init_attr), !!udata);
+ if (ret) {
+ ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ if (udata) {
+ ret = ib_copy_from_udata(ucmd, udata,
+ min(udata->inlen, sizeof(*ucmd)));
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to copy QP ucmd, ret = %d\n", ret);
+ return ret;
+ }
+
+ uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext,
+ ibucontext);
+ hr_qp->config = uctx->config;
+ ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd);
+ if (ret)
+ ibdev_err(ibdev,
+ "failed to set user SQ size, ret = %d.\n",
+ ret);
+ } else {
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ hr_qp->config = HNS_ROCE_EXSGE_FLAGS;
+ ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp);
+ if (ret)
+ ibdev_err(ibdev,
+ "failed to set kernel SQ size, ret = %d.\n",
+ ret);
+ }
+
+ return ret;
+}
+
+static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
+ struct ib_pd *ib_pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_ib_create_qp_resp resp = {};
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_ib_create_qp ucmd = {};
+ int ret;
+
+ mutex_init(&hr_qp->mutex);
+ spin_lock_init(&hr_qp->sq.lock);
+ spin_lock_init(&hr_qp->rq.lock);
+
+ hr_qp->state = IB_QPS_RESET;
+ hr_qp->flush_flag = 0;
+
+ if (init_attr->create_flags)
+ return -EOPNOTSUPP;
+
+ ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd);
+ if (ret) {
+ ibdev_err(ibdev, "failed to set QP param, ret = %d.\n", ret);
+ return ret;
+ }
+
+ if (!udata) {
+ ret = alloc_kernel_wrid(hr_dev, hr_qp);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc wrid, ret = %d.\n",
+ ret);
+ return ret;
+ }
+ }
+
+ ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc QP buffer, ret = %d.\n", ret);
+ goto err_buf;
+ }
+
+ ret = alloc_qpn(hr_dev, hr_qp, init_attr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc QPN, ret = %d.\n", ret);
+ goto err_qpn;
+ }
+
+ ret = alloc_qp_db(hr_dev, hr_qp, init_attr, udata, &ucmd, &resp);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc QP doorbell, ret = %d.\n",
+ ret);
+ goto err_db;
+ }
+
+ ret = alloc_qpc(hr_dev, hr_qp);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc QP context, ret = %d.\n",
+ ret);
+ goto err_qpc;
+ }
+
+ ret = hns_roce_qp_store(hr_dev, hr_qp, init_attr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to store QP, ret = %d.\n", ret);
+ goto err_store;
+ }
+
+ if (udata) {
+ resp.cap_flags = hr_qp->en_flags;
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
+ if (ret) {
+ ibdev_err(ibdev, "copy qp resp failed!\n");
+ goto err_store;
+ }
+ }
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
+ ret = hr_dev->hw->qp_flow_control_init(hr_dev, hr_qp);
+ if (ret)
+ goto err_flow_ctrl;
+ }
+
+ hr_qp->ibqp.qp_num = hr_qp->qpn;
+ hr_qp->event = hns_roce_ib_qp_event;
+ refcount_set(&hr_qp->refcount, 1);
+ init_completion(&hr_qp->free);
+
+ return 0;
+
+err_flow_ctrl:
+ hns_roce_qp_remove(hr_dev, hr_qp);
+err_store:
+ free_qpc(hr_dev, hr_qp);
+err_qpc:
+ free_qp_db(hr_dev, hr_qp, udata);
+err_db:
+ free_qpn(hr_dev, hr_qp);
+err_qpn:
+ free_qp_buf(hr_dev, hr_qp);
+err_buf:
+ free_kernel_wrid(hr_qp);
+ return ret;
+}
+
+void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_udata *udata)
+{
+ if (refcount_dec_and_test(&hr_qp->refcount))
+ complete(&hr_qp->free);
+ wait_for_completion(&hr_qp->free);
+
+ free_qpc(hr_dev, hr_qp);
+ free_qpn(hr_dev, hr_qp);
+ free_qp_buf(hr_dev, hr_qp);
+ free_kernel_wrid(hr_qp);
+ free_qp_db(hr_dev, hr_qp, udata);
+}
+
+static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type,
+ bool is_user)
+{
+ switch (type) {
+ case IB_QPT_XRC_INI:
+ case IB_QPT_XRC_TGT:
+ if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC))
+ goto out;
+ break;
+ case IB_QPT_UD:
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 &&
+ is_user)
+ goto out;
+ break;
+ case IB_QPT_RC:
+ case IB_QPT_GSI:
+ break;
+ default:
+ goto out;
+ }
+
+ return 0;
+
+out:
+ ibdev_err(&hr_dev->ib_dev, "not support QP type %d\n", type);
+
+ return -EOPNOTSUPP;
+}
+
+int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct ib_device *ibdev = qp->device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
+ struct hns_roce_qp *hr_qp = to_hr_qp(qp);
+ struct ib_pd *pd = qp->pd;
+ int ret;
+
+ ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata);
+ if (ret)
+ return ret;
+
+ if (init_attr->qp_type == IB_QPT_XRC_TGT)
+ hr_qp->xrcdn = to_hr_xrcd(init_attr->xrcd)->xrcdn;
+
+ if (init_attr->qp_type == IB_QPT_GSI) {
+ hr_qp->port = init_attr->port_num - 1;
+ hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
+ }
+
+ ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp);
+ if (ret)
+ ibdev_err(ibdev, "create QP type 0x%x failed(%d)\n",
+ init_attr->qp_type, ret);
+
+ return ret;
+}
+
+int to_hr_qp_type(int qp_type)
+{
+ switch (qp_type) {
+ case IB_QPT_RC:
+ return SERV_TYPE_RC;
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return SERV_TYPE_UD;
+ case IB_QPT_XRC_INI:
+ case IB_QPT_XRC_TGT:
+ return SERV_TYPE_XRC;
+ default:
+ return -1;
+ }
+}
+
+static int check_mtu_validate(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_qp_attr *attr, int attr_mask)
+{
+ enum ib_mtu active_mtu;
+ int p;
+
+ p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
+ active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu);
+
+ if ((hr_dev->caps.max_mtu >= IB_MTU_2048 &&
+ attr->path_mtu > hr_dev->caps.max_mtu) ||
+ attr->path_mtu < IB_MTU_256 || attr->path_mtu > active_mtu) {
+ ibdev_err(&hr_dev->ib_dev,
+ "attr path_mtu(%d)invalid while modify qp",
+ attr->path_mtu);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ int p;
+
+ if ((attr_mask & IB_QP_PORT) &&
+ (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) {
+ ibdev_err(&hr_dev->ib_dev, "invalid attr, port_num = %u.\n",
+ attr->port_num);
+ return -EINVAL;
+ }
+
+ if (attr_mask & IB_QP_PKEY_INDEX) {
+ p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
+ if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid attr, pkey_index = %u.\n",
+ attr->pkey_index);
+ return -EINVAL;
+ }
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+ attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid attr, max_rd_atomic = %u.\n",
+ attr->max_rd_atomic);
+ return -EINVAL;
+ }
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+ attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid attr, max_dest_rd_atomic = %u.\n",
+ attr->max_dest_rd_atomic);
+ return -EINVAL;
+ }
+
+ if (attr_mask & IB_QP_PATH_MTU)
+ return check_mtu_validate(hr_dev, hr_qp, attr, attr_mask);
+
+ return 0;
+}
+
+int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ enum ib_qp_state cur_state, new_state;
+ int ret = -EINVAL;
+
+ mutex_lock(&hr_qp->mutex);
+
+ if (attr_mask & IB_QP_CUR_STATE && attr->cur_qp_state != hr_qp->state)
+ goto out;
+
+ cur_state = hr_qp->state;
+ new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+
+ if (ibqp->uobject &&
+ (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) {
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) {
+ hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr);
+
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
+ hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
+ } else {
+ ibdev_warn(&hr_dev->ib_dev,
+ "flush cqe is not supported in userspace!\n");
+ goto out;
+ }
+ }
+
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+ attr_mask)) {
+ ibdev_err(&hr_dev->ib_dev, "ib_modify_qp_is_ok failed\n");
+ goto out;
+ }
+
+ ret = hns_roce_check_qp_attr(ibqp, attr, attr_mask);
+ if (ret)
+ goto out;
+
+ if (cur_state == new_state && cur_state == IB_QPS_RESET)
+ goto out;
+
+ ret = hr_dev->hw->modify_qp(ibqp, attr, attr_mask, cur_state,
+ new_state);
+
+out:
+ mutex_unlock(&hr_qp->mutex);
+
+ return ret;
+}
+
+void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq)
+ __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
+{
+ if (unlikely(send_cq == NULL && recv_cq == NULL)) {
+ __acquire(&send_cq->lock);
+ __acquire(&recv_cq->lock);
+ } else if (unlikely(send_cq != NULL && recv_cq == NULL)) {
+ spin_lock_irq(&send_cq->lock);
+ __acquire(&recv_cq->lock);
+ } else if (unlikely(send_cq == NULL && recv_cq != NULL)) {
+ spin_lock_irq(&recv_cq->lock);
+ __acquire(&send_cq->lock);
+ } else if (send_cq == recv_cq) {
+ spin_lock_irq(&send_cq->lock);
+ __acquire(&recv_cq->lock);
+ } else if (send_cq->cqn < recv_cq->cqn) {
+ spin_lock_irq(&send_cq->lock);
+ spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock_irq(&recv_cq->lock);
+ spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
+ }
+}
+
+void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
+ struct hns_roce_cq *recv_cq) __releases(&send_cq->lock)
+ __releases(&recv_cq->lock)
+{
+ if (unlikely(send_cq == NULL && recv_cq == NULL)) {
+ __release(&recv_cq->lock);
+ __release(&send_cq->lock);
+ } else if (unlikely(send_cq != NULL && recv_cq == NULL)) {
+ __release(&recv_cq->lock);
+ spin_unlock(&send_cq->lock);
+ } else if (unlikely(send_cq == NULL && recv_cq != NULL)) {
+ __release(&send_cq->lock);
+ spin_unlock(&recv_cq->lock);
+ } else if (send_cq == recv_cq) {
+ __release(&recv_cq->lock);
+ spin_unlock_irq(&send_cq->lock);
+ } else if (send_cq->cqn < recv_cq->cqn) {
+ spin_unlock(&recv_cq->lock);
+ spin_unlock_irq(&send_cq->lock);
+ } else {
+ spin_unlock(&send_cq->lock);
+ spin_unlock_irq(&recv_cq->lock);
+ }
+}
+
+static inline void *get_wqe(struct hns_roce_qp *hr_qp, u32 offset)
+{
+ return hns_roce_buf_offset(hr_qp->mtr.kmem, offset);
+}
+
+void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n)
+{
+ return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift));
+}
+
+void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n)
+{
+ return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift));
+}
+
+void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n)
+{
+ return get_wqe(hr_qp, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift));
+}
+
+bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq,
+ struct ib_cq *ib_cq)
+{
+ struct hns_roce_cq *hr_cq;
+ u32 cur;
+
+ cur = hr_wq->head - hr_wq->tail;
+ if (likely(cur + nreq < hr_wq->wqe_cnt))
+ return false;
+
+ hr_cq = to_hr_cq(ib_cq);
+ spin_lock(&hr_cq->lock);
+ cur = hr_wq->head - hr_wq->tail;
+ spin_unlock(&hr_cq->lock);
+
+ return cur + nreq >= hr_wq->wqe_cnt;
+}
+
+int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
+ unsigned int reserved_from_bot;
+ unsigned int i;
+
+ qp_table->idx_table.spare_idx = kcalloc(hr_dev->caps.num_qps,
+ sizeof(u32), GFP_KERNEL);
+ if (!qp_table->idx_table.spare_idx)
+ return -ENOMEM;
+
+ mutex_init(&qp_table->scc_mutex);
+ mutex_init(&qp_table->bank_mutex);
+ xa_init(&hr_dev->qp_table_xa);
+
+ reserved_from_bot = hr_dev->caps.reserved_qps;
+
+ for (i = 0; i < reserved_from_bot; i++) {
+ hr_dev->qp_table.bank[get_qp_bankid(i)].inuse++;
+ hr_dev->qp_table.bank[get_qp_bankid(i)].min++;
+ }
+
+ for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) {
+ ida_init(&hr_dev->qp_table.bank[i].ida);
+ hr_dev->qp_table.bank[i].max = hr_dev->caps.num_qps /
+ HNS_ROCE_QP_BANK_NUM - 1;
+ hr_dev->qp_table.bank[i].next = hr_dev->qp_table.bank[i].min;
+ }
+
+ return 0;
+}
+
+void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev)
+{
+ int i;
+
+ for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++)
+ ida_destroy(&hr_dev->qp_table.bank[i].ida);
+ kfree(hr_dev->qp_table.idx_table.spare_idx);
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c
new file mode 100644
index 000000000..989a2af2e
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+// Copyright (c) 2019 Hisilicon Limited.
+
+#include <rdma/rdma_cm.h>
+#include <rdma/restrack.h>
+#include <uapi/rdma/rdma_netlink.h>
+#include "hnae3.h"
+#include "hns_roce_common.h"
+#include "hns_roce_device.h"
+#include "hns_roce_hw_v2.h"
+
+#define MAX_ENTRY_NUM 256
+
+int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq)
+{
+ struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+ struct nlattr *table_attr;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (rdma_nl_put_driver_u32(msg, "cq_depth", hr_cq->cq_depth))
+ goto err;
+
+ if (rdma_nl_put_driver_u32(msg, "cons_index", hr_cq->cons_index))
+ goto err;
+
+ if (rdma_nl_put_driver_u32(msg, "cqe_size", hr_cq->cqe_size))
+ goto err;
+
+ if (rdma_nl_put_driver_u32(msg, "arm_sn", hr_cq->arm_sn))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+
+ return -EMSGSIZE;
+}
+
+int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
+ struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+ struct hns_roce_v2_cq_context context;
+ u32 data[MAX_ENTRY_NUM] = {};
+ int offset = 0;
+ int ret;
+
+ if (!hr_dev->hw->query_cqc)
+ return -EINVAL;
+
+ ret = hr_dev->hw->query_cqc(hr_dev, hr_cq->cqn, &context);
+ if (ret)
+ return -EINVAL;
+
+ data[offset++] = hr_reg_read(&context, CQC_CQ_ST);
+ data[offset++] = hr_reg_read(&context, CQC_SHIFT);
+ data[offset++] = hr_reg_read(&context, CQC_CQE_SIZE);
+ data[offset++] = hr_reg_read(&context, CQC_CQE_CNT);
+ data[offset++] = hr_reg_read(&context, CQC_CQ_PRODUCER_IDX);
+ data[offset++] = hr_reg_read(&context, CQC_CQ_CONSUMER_IDX);
+ data[offset++] = hr_reg_read(&context, CQC_DB_RECORD_EN);
+ data[offset++] = hr_reg_read(&context, CQC_ARM_ST);
+ data[offset++] = hr_reg_read(&context, CQC_CMD_SN);
+ data[offset++] = hr_reg_read(&context, CQC_CEQN);
+ data[offset++] = hr_reg_read(&context, CQC_CQ_MAX_CNT);
+ data[offset++] = hr_reg_read(&context, CQC_CQ_PERIOD);
+ data[offset++] = hr_reg_read(&context, CQC_CQE_HOP_NUM);
+ data[offset++] = hr_reg_read(&context, CQC_CQE_BAR_PG_SZ);
+ data[offset++] = hr_reg_read(&context, CQC_CQE_BUF_PG_SZ);
+
+ ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data);
+
+ return ret;
+}
+
+int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp)
+{
+ struct hns_roce_qp *hr_qp = to_hr_qp(ib_qp);
+ struct nlattr *table_attr;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "sq_wqe_cnt", hr_qp->sq.wqe_cnt))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "sq_max_gs", hr_qp->sq.max_gs))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "rq_wqe_cnt", hr_qp->rq.wqe_cnt))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "rq_max_gs", hr_qp->rq.max_gs))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "ext_sge_sge_cnt", hr_qp->sge.sge_cnt))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+
+ return -EMSGSIZE;
+}
+
+int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_qp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ib_qp);
+ struct hns_roce_v2_qp_context context;
+ u32 data[MAX_ENTRY_NUM] = {};
+ int offset = 0;
+ int ret;
+
+ if (!hr_dev->hw->query_qpc)
+ return -EINVAL;
+
+ ret = hr_dev->hw->query_qpc(hr_dev, hr_qp->qpn, &context);
+ if (ret)
+ return -EINVAL;
+
+ data[offset++] = hr_reg_read(&context, QPC_QP_ST);
+ data[offset++] = hr_reg_read(&context, QPC_ERR_TYPE);
+ data[offset++] = hr_reg_read(&context, QPC_CHECK_FLG);
+ data[offset++] = hr_reg_read(&context, QPC_SRQ_EN);
+ data[offset++] = hr_reg_read(&context, QPC_SRQN);
+ data[offset++] = hr_reg_read(&context, QPC_QKEY_XRCD);
+ data[offset++] = hr_reg_read(&context, QPC_TX_CQN);
+ data[offset++] = hr_reg_read(&context, QPC_RX_CQN);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_PRODUCER_IDX);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_CONSUMER_IDX);
+ data[offset++] = hr_reg_read(&context, QPC_RQ_RECORD_EN);
+ data[offset++] = hr_reg_read(&context, QPC_RQ_PRODUCER_IDX);
+ data[offset++] = hr_reg_read(&context, QPC_RQ_CONSUMER_IDX);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_SHIFT);
+ data[offset++] = hr_reg_read(&context, QPC_RQWS);
+ data[offset++] = hr_reg_read(&context, QPC_RQ_SHIFT);
+ data[offset++] = hr_reg_read(&context, QPC_SGE_SHIFT);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_HOP_NUM);
+ data[offset++] = hr_reg_read(&context, QPC_RQ_HOP_NUM);
+ data[offset++] = hr_reg_read(&context, QPC_SGE_HOP_NUM);
+ data[offset++] = hr_reg_read(&context, QPC_WQE_SGE_BA_PG_SZ);
+ data[offset++] = hr_reg_read(&context, QPC_WQE_SGE_BUF_PG_SZ);
+ data[offset++] = hr_reg_read(&context, QPC_RETRY_NUM_INIT);
+ data[offset++] = hr_reg_read(&context, QPC_RETRY_CNT);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_CUR_PSN);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_MAX_PSN);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_FLUSH_IDX);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_MAX_IDX);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_TX_ERR);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_RX_ERR);
+ data[offset++] = hr_reg_read(&context, QPC_RQ_RX_ERR);
+ data[offset++] = hr_reg_read(&context, QPC_RQ_TX_ERR);
+ data[offset++] = hr_reg_read(&context, QPC_RQ_CQE_IDX);
+ data[offset++] = hr_reg_read(&context, QPC_RQ_RTY_TX_ERR);
+
+ ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data);
+
+ return ret;
+}
+
+int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr)
+{
+ struct hns_roce_mr *hr_mr = to_hr_mr(ib_mr);
+ struct nlattr *table_attr;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "pbl_hop_num", hr_mr->pbl_hop_num))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "ba_pg_shift",
+ hr_mr->pbl_mtr.hem_cfg.ba_pg_shift))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "buf_pg_shift",
+ hr_mr->pbl_mtr.hem_cfg.buf_pg_shift))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+
+ return -EMSGSIZE;
+}
+
+int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_mr->device);
+ struct hns_roce_mr *hr_mr = to_hr_mr(ib_mr);
+ struct hns_roce_v2_mpt_entry context;
+ u32 data[MAX_ENTRY_NUM] = {};
+ int offset = 0;
+ int ret;
+
+ if (!hr_dev->hw->query_mpt)
+ return -EINVAL;
+
+ ret = hr_dev->hw->query_mpt(hr_dev, hr_mr->key, &context);
+ if (ret)
+ return -EINVAL;
+
+ data[offset++] = hr_reg_read(&context, MPT_ST);
+ data[offset++] = hr_reg_read(&context, MPT_PD);
+ data[offset++] = hr_reg_read(&context, MPT_LKEY);
+ data[offset++] = hr_reg_read(&context, MPT_LEN_L);
+ data[offset++] = hr_reg_read(&context, MPT_LEN_H);
+ data[offset++] = hr_reg_read(&context, MPT_PBL_SIZE);
+ data[offset++] = hr_reg_read(&context, MPT_PBL_HOP_NUM);
+ data[offset++] = hr_reg_read(&context, MPT_PBL_BA_PG_SZ);
+ data[offset++] = hr_reg_read(&context, MPT_PBL_BUF_PG_SZ);
+
+ ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data);
+
+ return ret;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
new file mode 100644
index 000000000..8dae98f82
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -0,0 +1,465 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018 Hisilicon Limited.
+ */
+
+#include <linux/pci.h>
+#include <rdma/ib_umem.h>
+#include "hns_roce_device.h"
+#include "hns_roce_cmd.h"
+#include "hns_roce_hem.h"
+
+void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ struct hns_roce_srq *srq;
+
+ xa_lock(&srq_table->xa);
+ srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1));
+ if (srq)
+ refcount_inc(&srq->refcount);
+ xa_unlock(&srq_table->xa);
+
+ if (!srq) {
+ dev_warn(hr_dev->dev, "Async event for bogus SRQ %08x\n", srqn);
+ return;
+ }
+
+ srq->event(srq, event_type);
+
+ if (refcount_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+}
+
+static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
+ enum hns_roce_event event_type)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+ struct ib_srq *ibsrq = &srq->ibsrq;
+ struct ib_event event;
+
+ if (ibsrq->event_handler) {
+ event.device = ibsrq->device;
+ event.element.srq = ibsrq;
+ switch (event_type) {
+ case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
+ event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ event.event = IB_EVENT_SRQ_ERR;
+ break;
+ default:
+ dev_err(hr_dev->dev,
+ "hns_roce:Unexpected event type 0x%x on SRQ %06lx\n",
+ event_type, srq->srqn);
+ return;
+ }
+
+ ibsrq->event_handler(&event, ibsrq->srq_context);
+ }
+}
+
+static int alloc_srqn(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ struct hns_roce_ida *srq_ida = &hr_dev->srq_table.srq_ida;
+ int id;
+
+ id = ida_alloc_range(&srq_ida->ida, srq_ida->min, srq_ida->max,
+ GFP_KERNEL);
+ if (id < 0) {
+ ibdev_err(&hr_dev->ib_dev, "failed to alloc srq(%d).\n", id);
+ return -ENOMEM;
+ }
+
+ srq->srqn = id;
+
+ return 0;
+}
+
+static void free_srqn(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ ida_free(&hr_dev->srq_table.srq_ida.ida, (int)srq->srqn);
+}
+
+static int hns_roce_create_srqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox)) {
+ ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n");
+ return PTR_ERR(mailbox);
+ }
+
+ ret = hr_dev->hw->write_srqc(srq, mailbox->buf);
+ if (ret) {
+ ibdev_err(ibdev, "failed to write SRQC.\n");
+ goto err_mbox;
+ }
+
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_SRQ,
+ srq->srqn);
+ if (ret)
+ ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret);
+
+err_mbox:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
+
+ ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn);
+ if (ret) {
+ ibdev_err(ibdev, "failed to get SRQC table, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL));
+ if (ret) {
+ ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret);
+ goto err_put;
+ }
+
+ ret = hns_roce_create_srqc(hr_dev, srq);
+ if (ret)
+ goto err_xa;
+
+ return 0;
+
+err_xa:
+ xa_erase(&srq_table->xa, srq->srqn);
+err_put:
+ hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
+
+ return ret;
+}
+
+static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ int ret;
+
+ ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_SRQ,
+ srq->srqn);
+ if (ret)
+ dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n",
+ ret, srq->srqn);
+
+ xa_erase(&srq_table->xa, srq->srqn);
+
+ if (refcount_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+ wait_for_completion(&srq->free);
+
+ hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
+}
+
+static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
+ struct ib_udata *udata, unsigned long addr)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_attr buf_attr = {};
+ int ret;
+
+ srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ);
+
+ buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + PAGE_SHIFT;
+ buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt,
+ srq->idx_que.entry_shift);
+ buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num;
+ buf_attr.region_count = 1;
+
+ ret = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr,
+ hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT,
+ udata, addr);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to alloc SRQ idx mtr, ret = %d.\n", ret);
+ return ret;
+ }
+
+ if (!udata) {
+ idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL);
+ if (!idx_que->bitmap) {
+ ibdev_err(ibdev, "failed to alloc SRQ idx bitmap.\n");
+ ret = -ENOMEM;
+ goto err_idx_mtr;
+ }
+ }
+
+ idx_que->head = 0;
+ idx_que->tail = 0;
+
+ return 0;
+err_idx_mtr:
+ hns_roce_mtr_destroy(hr_dev, &idx_que->mtr);
+
+ return ret;
+}
+
+static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+
+ bitmap_free(idx_que->bitmap);
+ idx_que->bitmap = NULL;
+ hns_roce_mtr_destroy(hr_dev, &idx_que->mtr);
+}
+
+static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq,
+ struct ib_udata *udata, unsigned long addr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_attr buf_attr = {};
+ int ret;
+
+ srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE,
+ HNS_ROCE_SGE_SIZE *
+ srq->max_gs)));
+
+ buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + PAGE_SHIFT;
+ buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt,
+ srq->wqe_shift);
+ buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num;
+ buf_attr.region_count = 1;
+
+ ret = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr,
+ hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT,
+ udata, addr);
+ if (ret)
+ ibdev_err(ibdev,
+ "failed to alloc SRQ buf mtr, ret = %d.\n", ret);
+
+ return ret;
+}
+
+static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq)
+{
+ hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr);
+}
+
+static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL);
+ if (!srq->wrid)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void free_srq_wrid(struct hns_roce_srq *srq)
+{
+ kvfree(srq->wrid);
+ srq->wrid = NULL;
+}
+
+static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq,
+ bool user)
+{
+ u32 max_sge = dev->caps.max_srq_sges;
+
+ if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return max_sge;
+
+ /* Reserve SGEs only for HIP08 in kernel; The userspace driver will
+ * calculate number of max_sge with reserved SGEs when allocating wqe
+ * buf, so there is no need to do this again in kernel. But the number
+ * may exceed the capacity of SGEs recorded in the firmware, so the
+ * kernel driver should just adapt the value accordingly.
+ */
+ if (user)
+ max_sge = roundup_pow_of_two(max_sge + 1);
+ else
+ hr_srq->rsv_sge = 1;
+
+ return max_sge;
+}
+
+static int set_srq_basic_param(struct hns_roce_srq *srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+ struct ib_srq_attr *attr = &init_attr->attr;
+ u32 max_sge;
+
+ max_sge = proc_srq_sge(hr_dev, srq, !!udata);
+ if (attr->max_wr > hr_dev->caps.max_srq_wrs ||
+ attr->max_sge > max_sge) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid SRQ attr, depth = %u, sge = %u.\n",
+ attr->max_wr, attr->max_sge);
+ return -EINVAL;
+ }
+
+ attr->max_wr = max_t(u32, attr->max_wr, HNS_ROCE_MIN_SRQ_WQE_NUM);
+ srq->wqe_cnt = roundup_pow_of_two(attr->max_wr);
+ srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge);
+
+ attr->max_wr = srq->wqe_cnt;
+ attr->max_sge = srq->max_gs - srq->rsv_sge;
+ attr->srq_limit = 0;
+
+ return 0;
+}
+
+static void set_srq_ext_param(struct hns_roce_srq *srq,
+ struct ib_srq_init_attr *init_attr)
+{
+ srq->cqn = ib_srq_has_cq(init_attr->srq_type) ?
+ to_hr_cq(init_attr->ext.cq)->cqn : 0;
+
+ srq->xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ?
+ to_hr_xrcd(init_attr->ext.xrc.xrcd)->xrcdn : 0;
+}
+
+static int set_srq_param(struct hns_roce_srq *srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ int ret;
+
+ ret = set_srq_basic_param(srq, init_attr, udata);
+ if (ret)
+ return ret;
+
+ set_srq_ext_param(srq, init_attr);
+
+ return 0;
+}
+
+static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
+ struct ib_udata *udata)
+{
+ struct hns_roce_ib_create_srq ucmd = {};
+ int ret;
+
+ if (udata) {
+ ret = ib_copy_from_udata(&ucmd, udata,
+ min(udata->inlen, sizeof(ucmd)));
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to copy SRQ udata, ret = %d.\n",
+ ret);
+ return ret;
+ }
+ }
+
+ ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr);
+ if (ret)
+ return ret;
+
+ ret = alloc_srq_wqe_buf(hr_dev, srq, udata, ucmd.buf_addr);
+ if (ret)
+ goto err_idx;
+
+ if (!udata) {
+ ret = alloc_srq_wrid(hr_dev, srq);
+ if (ret)
+ goto err_wqe_buf;
+ }
+
+ return 0;
+
+err_wqe_buf:
+ free_srq_wqe_buf(hr_dev, srq);
+err_idx:
+ free_srq_idx(hr_dev, srq);
+
+ return ret;
+}
+
+static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ free_srq_wrid(srq);
+ free_srq_wqe_buf(hr_dev, srq);
+ free_srq_idx(hr_dev, srq);
+}
+
+int hns_roce_create_srq(struct ib_srq *ib_srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device);
+ struct hns_roce_ib_create_srq_resp resp = {};
+ struct hns_roce_srq *srq = to_hr_srq(ib_srq);
+ int ret;
+
+ mutex_init(&srq->mutex);
+ spin_lock_init(&srq->lock);
+
+ ret = set_srq_param(srq, init_attr, udata);
+ if (ret)
+ return ret;
+
+ ret = alloc_srq_buf(hr_dev, srq, udata);
+ if (ret)
+ return ret;
+
+ ret = alloc_srqn(hr_dev, srq);
+ if (ret)
+ goto err_srq_buf;
+
+ ret = alloc_srqc(hr_dev, srq);
+ if (ret)
+ goto err_srqn;
+
+ if (udata) {
+ resp.srqn = srq->srqn;
+ if (ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)))) {
+ ret = -EFAULT;
+ goto err_srqc;
+ }
+ }
+
+ srq->db_reg = hr_dev->reg_base + SRQ_DB_REG;
+ srq->event = hns_roce_ib_srq_event;
+ refcount_set(&srq->refcount, 1);
+ init_completion(&srq->free);
+
+ return 0;
+
+err_srqc:
+ free_srqc(hr_dev, srq);
+err_srqn:
+ free_srqn(hr_dev, srq);
+err_srq_buf:
+ free_srq_buf(hr_dev, srq);
+
+ return ret;
+}
+
+int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+
+ free_srqc(hr_dev, srq);
+ free_srqn(hr_dev, srq);
+ free_srq_buf(hr_dev, srq);
+ return 0;
+}
+
+void hns_roce_init_srq_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ struct hns_roce_ida *srq_ida = &srq_table->srq_ida;
+
+ xa_init(&srq_table->xa);
+
+ ida_init(&srq_ida->ida);
+ srq_ida->max = hr_dev->caps.num_srqs - 1;
+ srq_ida->min = hr_dev->caps.reserved_srqs;
+}