diff options
Diffstat (limited to 'drivers/infiniband')
82 files changed, 1381 insertions, 768 deletions
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index c02a96d357..6791df64a5 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -794,7 +794,6 @@ err_free_table: static void release_gid_table(struct ib_device *device, struct ib_gid_table *table) { - bool leak = false; int i; if (!table) @@ -803,15 +802,12 @@ static void release_gid_table(struct ib_device *device, for (i = 0; i < table->sz; i++) { if (is_gid_entry_free(table->data_vec[i])) continue; - if (kref_read(&table->data_vec[i]->kref) > 1) { - dev_err(&device->dev, - "GID entry ref leak for index %d ref=%u\n", i, - kref_read(&table->data_vec[i]->kref)); - leak = true; - } + + WARN_ONCE(true, + "GID entry ref leak for dev %s index %d ref=%u\n", + dev_name(&device->dev), i, + kref_read(&table->data_vec[i]->kref)); } - if (leak) - return; mutex_destroy(&table->lock); kfree(table->data_vec); diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h index 47f3c6e4be..dc622f3778 100644 --- a/drivers/infiniband/core/cma_trace.h +++ b/drivers/infiniband/core/cma_trace.h @@ -84,7 +84,7 @@ TRACE_EVENT(cm_id_attach, sizeof(struct sockaddr_in6)); memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, sizeof(struct sockaddr_in6)); - __assign_str(devname, device->name); + __assign_str(devname); ), TP_printk("cm.id=%u src=%pISpc dst=%pISpc device=%s", @@ -334,7 +334,7 @@ DECLARE_EVENT_CLASS(cma_client_class, ), TP_fast_assign( - __assign_str(name, device->name); + __assign_str(name); ), TP_printk("device name=%s", diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 07cb6c5ffd..46d1c2c32d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2146,6 +2146,9 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, unsigned long flags; int ret; + if (!rdma_is_port_valid(ib_dev, port)) + return -EINVAL; + /* * Drivers wish to call this before ib_register_driver, so we have to * setup the port data early. @@ -2154,9 +2157,6 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, if (ret) return ret; - if (!rdma_is_port_valid(ib_dev, port)) - return -EINVAL; - pdata = &ib_dev->port_data[port]; spin_lock_irqsave(&pdata->netdev_lock, flags); old_ndev = rcu_dereference_protected( @@ -2166,17 +2166,12 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, return 0; } - if (old_ndev) - netdev_tracker_free(ndev, &pdata->netdev_tracker); - if (ndev) - netdev_hold(ndev, &pdata->netdev_tracker, GFP_ATOMIC); rcu_assign_pointer(pdata->netdev, ndev); + netdev_put(old_ndev, &pdata->netdev_tracker); + netdev_hold(ndev, &pdata->netdev_tracker, GFP_ATOMIC); spin_unlock_irqrestore(&pdata->netdev_lock, flags); add_ndev_hash(pdata); - if (old_ndev) - __dev_put(old_ndev); - return 0; } EXPORT_SYMBOL(ib_device_set_netdev); @@ -2235,8 +2230,7 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, spin_lock(&pdata->netdev_lock); res = rcu_dereference_protected( pdata->netdev, lockdep_is_held(&pdata->netdev_lock)); - if (res) - dev_hold(res); + dev_hold(res); spin_unlock(&pdata->netdev_lock); } @@ -2311,9 +2305,7 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev, if (filter(ib_dev, port, idev, filter_cookie)) cb(ib_dev, port, idev, cookie); - - if (idev) - dev_put(idev); + dev_put(idev); } } diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 0301fcad4b..bf3265e678 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -368,8 +368,10 @@ EXPORT_SYMBOL(iw_cm_disconnect); * * Clean up all resources associated with the connection and release * the initial reference taken by iw_create_cm_id. + * + * Returns true if and only if the last cm_id_priv reference has been dropped. */ -static void destroy_cm_id(struct iw_cm_id *cm_id) +static bool destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; struct ib_qp *qp; @@ -439,7 +441,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM); } - (void)iwcm_deref_id(cm_id_priv); + return iwcm_deref_id(cm_id_priv); } /* @@ -450,7 +452,8 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) */ void iw_destroy_cm_id(struct iw_cm_id *cm_id) { - destroy_cm_id(cm_id); + if (!destroy_cm_id(cm_id)) + flush_workqueue(iwcm_wq); } EXPORT_SYMBOL(iw_destroy_cm_id); @@ -1034,7 +1037,7 @@ static void cm_work_handler(struct work_struct *_work) if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { ret = process_event(cm_id_priv, &levent); if (ret) - destroy_cm_id(&cm_id_priv->id); + WARN_ON_ONCE(destroy_cm_id(&cm_id_priv->id)); } else pr_debug("dropping event %d\n", levent.event); if (iwcm_deref_id(cm_id_priv)) diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c index eca6e37c72..8fd80adfe8 100644 --- a/drivers/infiniband/core/lag.c +++ b/drivers/infiniband/core/lag.c @@ -93,8 +93,7 @@ static struct net_device *rdma_get_xmit_slave_udp(struct ib_device *device, slave = netdev_get_xmit_slave(master, skb, !!(device->lag_flags & RDMA_LAG_FLAGS_HASH_ALL_SLAVES)); - if (slave) - dev_hold(slave); + dev_hold(slave); rcu_read_unlock(); kfree_skb(skb); return slave; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 4900a08481..bc79ee630d 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -137,6 +137,8 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING, .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_RES_SUBTYPE] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_RES_SRQ] = { .type = NLA_NESTED }, @@ -164,6 +166,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 }, [RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_DRIVER_DETAILS] = { .type = NLA_U8 }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, @@ -399,7 +402,8 @@ err: return -EMSGSIZE; } -static int fill_res_info(struct sk_buff *msg, struct ib_device *device) +static int fill_res_info(struct sk_buff *msg, struct ib_device *device, + bool show_details) { static const char * const names[RDMA_RESTRACK_MAX] = { [RDMA_RESTRACK_PD] = "pd", @@ -424,7 +428,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device) for (i = 0; i < RDMA_RESTRACK_MAX; i++) { if (!names[i]) continue; - curr = rdma_restrack_count(device, i); + curr = rdma_restrack_count(device, i, show_details); ret = fill_res_info_entry(msg, names[i], curr); if (ret) goto err; @@ -1305,6 +1309,7 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + bool show_details = false; struct ib_device *device; struct sk_buff *msg; u32 index; @@ -1320,6 +1325,9 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, if (!device) return -EINVAL; + if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]) + show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; @@ -1334,7 +1342,7 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err_free; } - ret = fill_res_info(msg, device); + ret = fill_res_info(msg, device, show_details); if (ret) goto err_free; @@ -1364,7 +1372,7 @@ static int _nldev_res_get_dumpit(struct ib_device *device, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), 0, NLM_F_MULTI); - if (!nlh || fill_res_info(skb, device)) { + if (!nlh || fill_res_info(skb, device, false)) { nlmsg_cancel(skb, nlh); goto out; } @@ -1534,6 +1542,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, struct rdma_restrack_entry *res; struct rdma_restrack_root *rt; int err, ret = 0, idx = 0; + bool show_details = false; struct nlattr *table_attr; struct nlattr *entry_attr; struct ib_device *device; @@ -1562,6 +1571,9 @@ static int res_get_common_dumpit(struct sk_buff *skb, if (!device) return -EINVAL; + if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]) + show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]); + /* * If no PORT_INDEX is supplied, we will return all QPs from that device */ @@ -1599,6 +1611,9 @@ static int res_get_common_dumpit(struct sk_buff *skb, * objects. */ xa_for_each(&rt->xa, id, res) { + if (xa_get_mark(&rt->xa, res->id, RESTRACK_DD) && !show_details) + goto next; + if (idx < start || !rdma_restrack_get(res)) goto next; diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 01a499a8b8..3313410014 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -37,22 +37,6 @@ int rdma_restrack_init(struct ib_device *dev) return 0; } -static const char *type2str(enum rdma_restrack_type type) -{ - static const char * const names[RDMA_RESTRACK_MAX] = { - [RDMA_RESTRACK_PD] = "PD", - [RDMA_RESTRACK_CQ] = "CQ", - [RDMA_RESTRACK_QP] = "QP", - [RDMA_RESTRACK_CM_ID] = "CM_ID", - [RDMA_RESTRACK_MR] = "MR", - [RDMA_RESTRACK_CTX] = "CTX", - [RDMA_RESTRACK_COUNTER] = "COUNTER", - [RDMA_RESTRACK_SRQ] = "SRQ", - }; - - return names[type]; -}; - /** * rdma_restrack_clean() - clean resource tracking * @dev: IB device @@ -60,47 +44,14 @@ static const char *type2str(enum rdma_restrack_type type) void rdma_restrack_clean(struct ib_device *dev) { struct rdma_restrack_root *rt = dev->res; - struct rdma_restrack_entry *e; - char buf[TASK_COMM_LEN]; - bool found = false; - const char *owner; int i; for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) { struct xarray *xa = &dev->res[i].xa; - if (!xa_empty(xa)) { - unsigned long index; - - if (!found) { - pr_err("restrack: %s", CUT_HERE); - dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n"); - } - xa_for_each(xa, index, e) { - if (rdma_is_kernel_res(e)) { - owner = e->kern_name; - } else { - /* - * There is no need to call get_task_struct here, - * because we can be here only if there are more - * get_task_struct() call than put_task_struct(). - */ - get_task_comm(buf, e->task); - owner = buf; - } - - pr_err("restrack: %s %s object allocated by %s is not freed\n", - rdma_is_kernel_res(e) ? "Kernel" : - "User", - type2str(e->type), owner); - } - found = true; - } + WARN_ON(!xa_empty(xa)); xa_destroy(xa); } - if (found) - pr_err("restrack: %s", CUT_HERE); - kfree(rt); } @@ -108,8 +59,10 @@ void rdma_restrack_clean(struct ib_device *dev) * rdma_restrack_count() - the current usage of specific object * @dev: IB device * @type: actual type of object to operate + * @show_details: count driver specific objects */ -int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type) +int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, + bool show_details) { struct rdma_restrack_root *rt = &dev->res[type]; struct rdma_restrack_entry *e; @@ -117,8 +70,11 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type) u32 cnt = 0; xa_lock(&rt->xa); - xas_for_each(&xas, e, U32_MAX) + xas_for_each(&xas, e, U32_MAX) { + if (xa_get_mark(&rt->xa, e->id, RESTRACK_DD) && !show_details) + continue; cnt++; + } xa_unlock(&rt->xa); return cnt; } @@ -247,6 +203,9 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) ret = xa_insert(&rt->xa, res->id, res, GFP_KERNEL); if (ret) res->id = 0; + + if (qp->qp_type >= IB_QPT_DRIVER) + xa_set_mark(&rt->xa, res->id, RESTRACK_DD); } else if (res->type == RDMA_RESTRACK_COUNTER) { /* Special case to ensure that cntn points to right counter */ struct rdma_counter *counter; diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index e958c43dd2..d5131b3ba8 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -601,8 +601,7 @@ static void del_netdev_default_ips_join(struct ib_device *ib_dev, u32 port, rcu_read_lock(); master_ndev = netdev_master_upper_dev_get_rcu(rdma_ndev); - if (master_ndev) - dev_hold(master_ndev); + dev_hold(master_ndev); rcu_read_unlock(); if (master_ndev) { diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index f5feca7fa9..2ed749f50a 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -63,6 +63,8 @@ MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace MAD packet access"); MODULE_LICENSE("Dual BSD/GPL"); +#define MAX_UMAD_RECV_LIST_SIZE 200000 + enum { IB_UMAD_MAX_PORTS = RDMA_MAX_PORTS, IB_UMAD_MAX_AGENTS = 32, @@ -113,6 +115,7 @@ struct ib_umad_file { struct mutex mutex; struct ib_umad_port *port; struct list_head recv_list; + atomic_t recv_list_size; struct list_head send_list; struct list_head port_list; spinlock_t send_lock; @@ -180,24 +183,28 @@ static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id) return file->agents_dead ? NULL : file->agent[id]; } -static int queue_packet(struct ib_umad_file *file, - struct ib_mad_agent *agent, - struct ib_umad_packet *packet) +static int queue_packet(struct ib_umad_file *file, struct ib_mad_agent *agent, + struct ib_umad_packet *packet, bool is_recv_mad) { int ret = 1; mutex_lock(&file->mutex); + if (is_recv_mad && + atomic_read(&file->recv_list_size) > MAX_UMAD_RECV_LIST_SIZE) + goto unlock; + for (packet->mad.hdr.id = 0; packet->mad.hdr.id < IB_UMAD_MAX_AGENTS; packet->mad.hdr.id++) if (agent == __get_agent(file, packet->mad.hdr.id)) { list_add_tail(&packet->list, &file->recv_list); + atomic_inc(&file->recv_list_size); wake_up_interruptible(&file->recv_wait); ret = 0; break; } - +unlock: mutex_unlock(&file->mutex); return ret; @@ -224,7 +231,7 @@ static void send_handler(struct ib_mad_agent *agent, if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { packet->length = IB_MGMT_MAD_HDR; packet->mad.hdr.status = ETIMEDOUT; - if (!queue_packet(file, agent, packet)) + if (!queue_packet(file, agent, packet, false)) return; } kfree(packet); @@ -284,7 +291,7 @@ static void recv_handler(struct ib_mad_agent *agent, rdma_destroy_ah_attr(&ah_attr); } - if (queue_packet(file, agent, packet)) + if (queue_packet(file, agent, packet, true)) goto err2; return; @@ -409,6 +416,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, packet = list_entry(file->recv_list.next, struct ib_umad_packet, list); list_del(&packet->list); + atomic_dec(&file->recv_list_size); mutex_unlock(&file->mutex); @@ -421,6 +429,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, /* Requeue packet */ mutex_lock(&file->mutex); list_add(&packet->list, &file->recv_list); + atomic_inc(&file->recv_list_size); mutex_unlock(&file->mutex); } else { if (packet->recv_wc) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index ce9c5bae83..582e83a36c 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2479,7 +2479,7 @@ static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp, break; case IB_WR_SEND_WITH_IMM: wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM; - wqe->send.imm_data = wr->ex.imm_data; + wqe->send.imm_data = be32_to_cpu(wr->ex.imm_data); break; case IB_WR_SEND_WITH_INV: wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV; @@ -2509,7 +2509,7 @@ static int bnxt_re_build_rdma_wqe(const struct ib_send_wr *wr, break; case IB_WR_RDMA_WRITE_WITH_IMM: wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM; - wqe->rdma.imm_data = wr->ex.imm_data; + wqe->rdma.imm_data = be32_to_cpu(wr->ex.imm_data); break; case IB_WR_RDMA_READ: wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_READ; @@ -3581,7 +3581,7 @@ static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *gsi_sqp, wc->byte_len = orig_cqe->length; wc->qp = &gsi_qp->ib_qp; - wc->ex.imm_data = orig_cqe->immdata; + wc->ex.imm_data = cpu_to_be32(le32_to_cpu(orig_cqe->immdata)); wc->src_qp = orig_cqe->src_qp; memcpy(wc->smac, orig_cqe->smac, ETH_ALEN); if (bnxt_re_is_vlan_pkt(orig_cqe, &vlan_id, &sl)) { @@ -3726,7 +3726,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc) (unsigned long)(cqe->qp_handle), struct bnxt_re_qp, qplib_qp); wc->qp = &qp->ib_qp; - wc->ex.imm_data = cqe->immdata; + wc->ex.imm_data = cpu_to_be32(le32_to_cpu(cqe->immdata)); wc->src_qp = cqe->src_qp; memcpy(wc->smac, cqe->smac, ETH_ALEN); wc->port_num = 1; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 7fd4506b35..244da20d11 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -164,7 +164,7 @@ struct bnxt_qplib_swqe { /* Send, with imm, inval key */ struct { union { - __be32 imm_data; + u32 imm_data; u32 inv_key; }; u32 q_key; @@ -182,7 +182,7 @@ struct bnxt_qplib_swqe { /* RDMA write, with imm, read */ struct { union { - __be32 imm_data; + u32 imm_data; u32 inv_key; }; u64 remote_va; @@ -389,7 +389,7 @@ struct bnxt_qplib_cqe { u16 cfa_meta; u64 wr_id; union { - __be32 immdata; + __le32 immdata; u32 invrkey; }; u64 qp_handle; diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 7377c8a9f4..4296662e59 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -110,7 +110,10 @@ struct efa_admin_create_qp_cmd { * virtual (IOVA returned by MR registration) * 1 : rq_virt - If set, RQ ring base address is * virtual (IOVA returned by MR registration) - * 7:2 : reserved - MBZ + * 2 : unsolicited_write_recv - If set, work requests + * will not be consumed for incoming RDMA write with + * immediate + * 7:3 : reserved - MBZ */ u8 flags; @@ -663,7 +666,9 @@ struct efa_admin_feature_device_attr_desc { * polling is supported * 3 : rdma_write - If set, RDMA Write is supported * on TX queues - * 31:4 : reserved - MBZ + * 4 : unsolicited_write_recv - If set, unsolicited + * write with imm. receive is supported + * 31:5 : reserved - MBZ */ u32 device_caps; @@ -1009,6 +1014,7 @@ struct efa_admin_host_info { /* create_qp_cmd */ #define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK BIT(0) #define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK BIT(1) +#define EFA_ADMIN_CREATE_QP_CMD_UNSOLICITED_WRITE_RECV_MASK BIT(2) /* modify_qp_cmd */ #define EFA_ADMIN_MODIFY_QP_CMD_QP_STATE_MASK BIT(0) @@ -1044,6 +1050,7 @@ struct efa_admin_host_info { #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1) #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_DATA_POLLING_128_MASK BIT(2) #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_WRITE_MASK BIT(3) +#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_UNSOLICITED_WRITE_RECV_MASK BIT(4) /* create_eq_cmd */ #define EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index d3398c7b0b..5b9c2b16df 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -32,6 +32,9 @@ int efa_com_create_qp(struct efa_com_dev *edev, params->rq_depth; create_qp_cmd.uar = params->uarn; + if (params->unsolicited_write_recv) + EFA_SET(&create_qp_cmd.flags, EFA_ADMIN_CREATE_QP_CMD_UNSOLICITED_WRITE_RECV, 1); + err = efa_com_cmd_exec(aq, (struct efa_admin_aq_entry *)&create_qp_cmd, sizeof(create_qp_cmd), diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 720a99ba0f..9714105fcf 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -27,6 +27,7 @@ struct efa_com_create_qp_params { u16 pd; u16 uarn; u8 qp_type; + u8 unsolicited_write_recv : 1; }; struct efa_com_create_qp_result { diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 5fa3603c80..d1a48f988f 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -671,11 +671,22 @@ static void efa_remove(struct pci_dev *pdev) efa_remove_device(pdev); } +static void efa_shutdown(struct pci_dev *pdev) +{ + struct efa_dev *dev = pci_get_drvdata(pdev); + + efa_destroy_eqs(dev); + efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_SHUTDOWN); + efa_free_irq(dev, &dev->admin_irq); + efa_disable_msix(dev); +} + static struct pci_driver efa_pci_driver = { .name = DRV_MODULE_NAME, .id_table = efa_pci_tbl, .probe = efa_probe, .remove = efa_remove, + .shutdown = efa_shutdown, }; module_pci_driver(efa_pci_driver); diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 2f412db2ed..8f7a13b79c 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -263,6 +263,9 @@ int efa_query_device(struct ib_device *ibdev, if (EFA_DEV_CAP(dev, RDMA_WRITE)) resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_WRITE; + if (EFA_DEV_CAP(dev, UNSOLICITED_WRITE_RECV)) + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_UNSOLICITED_WRITE_RECV; + if (dev->neqs) resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS; @@ -639,6 +642,7 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, struct efa_ibv_create_qp cmd = {}; struct efa_qp *qp = to_eqp(ibqp); struct efa_ucontext *ucontext; + u16 supported_efa_flags = 0; int err; ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext, @@ -676,13 +680,23 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, goto err_out; } - if (cmd.comp_mask) { + if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_90)) { ibdev_dbg(&dev->ibdev, "Incompatible ABI params, unknown fields in udata\n"); err = -EINVAL; goto err_out; } + if (EFA_DEV_CAP(dev, UNSOLICITED_WRITE_RECV)) + supported_efa_flags |= EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV; + + if (cmd.flags & ~supported_efa_flags) { + ibdev_dbg(&dev->ibdev, "Unsupported EFA QP create flags[%#x], supported[%#x]\n", + cmd.flags, supported_efa_flags); + err = -EOPNOTSUPP; + goto err_out; + } + create_qp_params.uarn = ucontext->uarn; create_qp_params.pd = to_epd(ibqp->pd)->pdn; @@ -722,6 +736,9 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, create_qp_params.rq_base_addr = qp->rq_dma_addr; } + if (cmd.flags & EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV) + create_qp_params.unsolicited_write_recv = true; + err = efa_com_create_qp(&dev->edev, &create_qp_params, &create_qp_resp); if (err) diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h index 5df401a30c..c8bd698e21 100644 --- a/drivers/infiniband/hw/erdma/erdma.h +++ b/drivers/infiniband/hw/erdma/erdma.h @@ -33,7 +33,8 @@ struct erdma_eq { atomic64_t notify_num; void __iomem *db; - u64 *db_record; + u64 *dbrec; + dma_addr_t dbrec_dma; }; struct erdma_cmdq_sq { @@ -48,7 +49,8 @@ struct erdma_cmdq_sq { u16 wqebb_cnt; - u64 *db_record; + u64 *dbrec; + dma_addr_t dbrec_dma; }; struct erdma_cmdq_cq { @@ -61,7 +63,8 @@ struct erdma_cmdq_cq { u32 ci; u32 cmdsn; - u64 *db_record; + u64 *dbrec; + dma_addr_t dbrec_dma; atomic64_t armed_num; }; @@ -177,9 +180,6 @@ enum { ERDMA_RES_CNT = 2, }; -#define ERDMA_EXTRA_BUFFER_SIZE ERDMA_DB_SIZE -#define WARPPED_BUFSIZE(size) ((size) + ERDMA_EXTRA_BUFFER_SIZE) - struct erdma_dev { struct ib_device ibdev; struct net_device *netdev; @@ -213,6 +213,7 @@ struct erdma_dev { atomic_t num_ctx; struct list_head cep_list; + struct dma_pool *db_pool; struct dma_pool *resp_pool; }; diff --git a/drivers/infiniband/hw/erdma/erdma_cmdq.c b/drivers/infiniband/hw/erdma/erdma_cmdq.c index a151a7bdd5..43ff40b5a0 100644 --- a/drivers/infiniband/hw/erdma/erdma_cmdq.c +++ b/drivers/infiniband/hw/erdma/erdma_cmdq.c @@ -14,7 +14,7 @@ static void arm_cmdq_cq(struct erdma_cmdq *cmdq) FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) | FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn); - *cmdq->cq.db_record = db_data; + *cmdq->cq.dbrec = db_data; writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG); atomic64_inc(&cmdq->cq.armed_num); @@ -25,7 +25,7 @@ static void kick_cmdq_db(struct erdma_cmdq *cmdq) struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq); u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi); - *cmdq->sq.db_record = db_data; + *cmdq->sq.dbrec = db_data; writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG); } @@ -89,20 +89,18 @@ static int erdma_cmdq_sq_init(struct erdma_dev *dev) { struct erdma_cmdq *cmdq = &dev->cmdq; struct erdma_cmdq_sq *sq = &cmdq->sq; - u32 buf_size; sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE); sq->depth = cmdq->max_outstandings * sq->wqebb_cnt; - buf_size = sq->depth << SQEBB_SHIFT; - - sq->qbuf = - dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), - &sq->qbuf_dma_addr, GFP_KERNEL); + sq->qbuf = dma_alloc_coherent(&dev->pdev->dev, sq->depth << SQEBB_SHIFT, + &sq->qbuf_dma_addr, GFP_KERNEL); if (!sq->qbuf) return -ENOMEM; - sq->db_record = (u64 *)(sq->qbuf + buf_size); + sq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &sq->dbrec_dma); + if (!sq->dbrec) + goto err_out; spin_lock_init(&sq->lock); @@ -111,30 +109,33 @@ static int erdma_cmdq_sq_init(struct erdma_dev *dev) erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG, lower_32_bits(sq->qbuf_dma_addr)); erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth); - erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG, - sq->qbuf_dma_addr + buf_size); + erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG, sq->dbrec_dma); return 0; + +err_out: + dma_free_coherent(&dev->pdev->dev, sq->depth << SQEBB_SHIFT, + sq->qbuf, sq->qbuf_dma_addr); + + return -ENOMEM; } static int erdma_cmdq_cq_init(struct erdma_dev *dev) { struct erdma_cmdq *cmdq = &dev->cmdq; struct erdma_cmdq_cq *cq = &cmdq->cq; - u32 buf_size; cq->depth = cmdq->sq.depth; - buf_size = cq->depth << CQE_SHIFT; - - cq->qbuf = - dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), - &cq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO); + cq->qbuf = dma_alloc_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT, + &cq->qbuf_dma_addr, GFP_KERNEL); if (!cq->qbuf) return -ENOMEM; spin_lock_init(&cq->lock); - cq->db_record = (u64 *)(cq->qbuf + buf_size); + cq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &cq->dbrec_dma); + if (!cq->dbrec) + goto err_out; atomic64_set(&cq->armed_num, 0); @@ -142,24 +143,25 @@ static int erdma_cmdq_cq_init(struct erdma_dev *dev) upper_32_bits(cq->qbuf_dma_addr)); erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG, lower_32_bits(cq->qbuf_dma_addr)); - erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG, - cq->qbuf_dma_addr + buf_size); + erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG, cq->dbrec_dma); return 0; + +err_out: + dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT, cq->qbuf, + cq->qbuf_dma_addr); + + return -ENOMEM; } static int erdma_cmdq_eq_init(struct erdma_dev *dev) { struct erdma_cmdq *cmdq = &dev->cmdq; struct erdma_eq *eq = &cmdq->eq; - u32 buf_size; eq->depth = cmdq->max_outstandings; - buf_size = eq->depth << EQE_SHIFT; - - eq->qbuf = - dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), - &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO); + eq->qbuf = dma_alloc_coherent(&dev->pdev->dev, eq->depth << EQE_SHIFT, + &eq->qbuf_dma_addr, GFP_KERNEL); if (!eq->qbuf) return -ENOMEM; @@ -167,17 +169,24 @@ static int erdma_cmdq_eq_init(struct erdma_dev *dev) atomic64_set(&eq->event_num, 0); eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG; - eq->db_record = (u64 *)(eq->qbuf + buf_size); + eq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &eq->dbrec_dma); + if (!eq->dbrec) + goto err_out; erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG, upper_32_bits(eq->qbuf_dma_addr)); erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG, lower_32_bits(eq->qbuf_dma_addr)); erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth); - erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG, - eq->qbuf_dma_addr + buf_size); + erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG, eq->dbrec_dma); return 0; + +err_out: + dma_free_coherent(&dev->pdev->dev, eq->depth << EQE_SHIFT, eq->qbuf, + eq->qbuf_dma_addr); + + return -ENOMEM; } int erdma_cmdq_init(struct erdma_dev *dev) @@ -211,17 +220,17 @@ int erdma_cmdq_init(struct erdma_dev *dev) return 0; err_destroy_cq: - dma_free_coherent(&dev->pdev->dev, - (cmdq->cq.depth << CQE_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, + dma_free_coherent(&dev->pdev->dev, cmdq->cq.depth << CQE_SHIFT, cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); + dma_pool_free(dev->db_pool, cmdq->cq.dbrec, cmdq->cq.dbrec_dma); + err_destroy_sq: - dma_free_coherent(&dev->pdev->dev, - (cmdq->sq.depth << SQEBB_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, + dma_free_coherent(&dev->pdev->dev, cmdq->sq.depth << SQEBB_SHIFT, cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); + dma_pool_free(dev->db_pool, cmdq->sq.dbrec, cmdq->sq.dbrec_dma); + return err; } @@ -238,18 +247,20 @@ void erdma_cmdq_destroy(struct erdma_dev *dev) clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); - dma_free_coherent(&dev->pdev->dev, - (cmdq->eq.depth << EQE_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, + dma_free_coherent(&dev->pdev->dev, cmdq->eq.depth << EQE_SHIFT, cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr); - dma_free_coherent(&dev->pdev->dev, - (cmdq->sq.depth << SQEBB_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, + + dma_pool_free(dev->db_pool, cmdq->eq.dbrec, cmdq->eq.dbrec_dma); + + dma_free_coherent(&dev->pdev->dev, cmdq->sq.depth << SQEBB_SHIFT, cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); - dma_free_coherent(&dev->pdev->dev, - (cmdq->cq.depth << CQE_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, + + dma_pool_free(dev->db_pool, cmdq->sq.dbrec, cmdq->sq.dbrec_dma); + + dma_free_coherent(&dev->pdev->dev, cmdq->cq.depth << CQE_SHIFT, cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); + + dma_pool_free(dev->db_pool, cmdq->cq.dbrec, cmdq->cq.dbrec_dma); } static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq) diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c index c1cb5568ea..70f89f0162 100644 --- a/drivers/infiniband/hw/erdma/erdma_cq.c +++ b/drivers/infiniband/hw/erdma/erdma_cq.c @@ -26,7 +26,7 @@ static void notify_cq(struct erdma_cq *cq, u8 solcitied) FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cq->kern_cq.cmdsn) | FIELD_PREP(ERDMA_CQDB_CI_MASK, cq->kern_cq.ci); - *cq->kern_cq.db_record = db_data; + *cq->kern_cq.dbrec = db_data; writeq(db_data, cq->kern_cq.db); } diff --git a/drivers/infiniband/hw/erdma/erdma_eq.c b/drivers/infiniband/hw/erdma/erdma_eq.c index ea47cb21fd..84ccdd8144 100644 --- a/drivers/infiniband/hw/erdma/erdma_eq.c +++ b/drivers/infiniband/hw/erdma/erdma_eq.c @@ -13,7 +13,7 @@ void notify_eq(struct erdma_eq *eq) u64 db_data = FIELD_PREP(ERDMA_EQDB_CI_MASK, eq->ci) | FIELD_PREP(ERDMA_EQDB_ARM_MASK, 1); - *eq->db_record = db_data; + *eq->dbrec = db_data; writeq(db_data, eq->db); atomic64_inc(&eq->notify_num); @@ -83,14 +83,11 @@ void erdma_aeq_event_handler(struct erdma_dev *dev) int erdma_aeq_init(struct erdma_dev *dev) { struct erdma_eq *eq = &dev->aeq; - u32 buf_size; eq->depth = ERDMA_DEFAULT_EQ_DEPTH; - buf_size = eq->depth << EQE_SHIFT; - eq->qbuf = - dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), - &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO); + eq->qbuf = dma_alloc_coherent(&dev->pdev->dev, eq->depth << EQE_SHIFT, + &eq->qbuf_dma_addr, GFP_KERNEL); if (!eq->qbuf) return -ENOMEM; @@ -99,26 +96,34 @@ int erdma_aeq_init(struct erdma_dev *dev) atomic64_set(&eq->notify_num, 0); eq->db = dev->func_bar + ERDMA_REGS_AEQ_DB_REG; - eq->db_record = (u64 *)(eq->qbuf + buf_size); + eq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &eq->dbrec_dma); + if (!eq->dbrec) + goto err_out; erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_H_REG, upper_32_bits(eq->qbuf_dma_addr)); erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_L_REG, lower_32_bits(eq->qbuf_dma_addr)); erdma_reg_write32(dev, ERDMA_REGS_AEQ_DEPTH_REG, eq->depth); - erdma_reg_write64(dev, ERDMA_AEQ_DB_HOST_ADDR_REG, - eq->qbuf_dma_addr + buf_size); + erdma_reg_write64(dev, ERDMA_AEQ_DB_HOST_ADDR_REG, eq->dbrec_dma); return 0; + +err_out: + dma_free_coherent(&dev->pdev->dev, eq->depth << EQE_SHIFT, eq->qbuf, + eq->qbuf_dma_addr); + + return -ENOMEM; } void erdma_aeq_destroy(struct erdma_dev *dev) { struct erdma_eq *eq = &dev->aeq; - dma_free_coherent(&dev->pdev->dev, - WARPPED_BUFSIZE(eq->depth << EQE_SHIFT), eq->qbuf, + dma_free_coherent(&dev->pdev->dev, eq->depth << EQE_SHIFT, eq->qbuf, eq->qbuf_dma_addr); + + dma_pool_free(dev->db_pool, eq->dbrec, eq->dbrec_dma); } void erdma_ceq_completion_handler(struct erdma_eq_cb *ceq_cb) @@ -209,7 +214,6 @@ static void erdma_free_ceq_irq(struct erdma_dev *dev, u16 ceqn) static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq) { struct erdma_cmdq_create_eq_req req; - dma_addr_t db_info_dma_addr; erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, CMDQ_OPCODE_CREATE_EQ); @@ -219,9 +223,8 @@ static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq) req.qtype = ERDMA_EQ_TYPE_CEQ; /* Vector index is the same as EQN. */ req.vector_idx = eqn; - db_info_dma_addr = eq->qbuf_dma_addr + (eq->depth << EQE_SHIFT); - req.db_dma_addr_l = lower_32_bits(db_info_dma_addr); - req.db_dma_addr_h = upper_32_bits(db_info_dma_addr); + req.db_dma_addr_l = lower_32_bits(eq->dbrec_dma); + req.db_dma_addr_h = upper_32_bits(eq->dbrec_dma); return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } @@ -229,12 +232,11 @@ static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq) static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn) { struct erdma_eq *eq = &dev->ceqs[ceqn].eq; - u32 buf_size = ERDMA_DEFAULT_EQ_DEPTH << EQE_SHIFT; int ret; - eq->qbuf = - dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), - &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO); + eq->depth = ERDMA_DEFAULT_EQ_DEPTH; + eq->qbuf = dma_alloc_coherent(&dev->pdev->dev, eq->depth << EQE_SHIFT, + &eq->qbuf_dma_addr, GFP_KERNEL); if (!eq->qbuf) return -ENOMEM; @@ -242,10 +244,16 @@ static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn) atomic64_set(&eq->event_num, 0); atomic64_set(&eq->notify_num, 0); - eq->depth = ERDMA_DEFAULT_EQ_DEPTH; eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG + (ceqn + 1) * ERDMA_DB_SIZE; - eq->db_record = (u64 *)(eq->qbuf + buf_size); + + eq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &eq->dbrec_dma); + if (!eq->dbrec) { + dma_free_coherent(&dev->pdev->dev, eq->depth << EQE_SHIFT, + eq->qbuf, eq->qbuf_dma_addr); + return -ENOMEM; + } + eq->ci = 0; dev->ceqs[ceqn].dev = dev; @@ -259,7 +267,6 @@ static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn) static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn) { struct erdma_eq *eq = &dev->ceqs[ceqn].eq; - u32 buf_size = ERDMA_DEFAULT_EQ_DEPTH << EQE_SHIFT; struct erdma_cmdq_destroy_eq_req req; int err; @@ -276,8 +283,9 @@ static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn) if (err) return; - dma_free_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), eq->qbuf, + dma_free_coherent(&dev->pdev->dev, eq->depth << EQE_SHIFT, eq->qbuf, eq->qbuf_dma_addr); + dma_pool_free(dev->db_pool, eq->dbrec, eq->dbrec_dma); } int erdma_ceqs_init(struct erdma_dev *dev) diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index 3212a12227..05978f3b14 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -240,7 +240,7 @@ struct erdma_cmdq_create_cq_req { u32 qbuf_addr_l; u32 qbuf_addr_h; u32 cfg1; - u64 cq_db_info_addr; + u64 cq_dbrec_dma; u32 first_page_offset; u32 cfg2; }; @@ -335,8 +335,8 @@ struct erdma_cmdq_create_qp_req { u64 rq_buf_addr; u32 sq_mtt_cfg; u32 rq_mtt_cfg; - u64 sq_db_info_dma_addr; - u64 rq_db_info_dma_addr; + u64 sq_dbrec_dma; + u64 rq_dbrec_dma; u64 sq_mtt_entry[3]; u64 rq_mtt_entry[3]; diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c index 472939172f..7080f8a71e 100644 --- a/drivers/infiniband/hw/erdma/erdma_main.c +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -178,16 +178,26 @@ static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev) if (!dev->resp_pool) return -ENOMEM; + dev->db_pool = dma_pool_create("erdma_db_pool", &pdev->dev, + ERDMA_DB_SIZE, ERDMA_DB_SIZE, 0); + if (!dev->db_pool) { + ret = -ENOMEM; + goto destroy_resp_pool; + } + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(ERDMA_PCI_WIDTH)); if (ret) - goto destroy_pool; + goto destroy_db_pool; dma_set_max_seg_size(&pdev->dev, UINT_MAX); return 0; -destroy_pool: +destroy_db_pool: + dma_pool_destroy(dev->db_pool); + +destroy_resp_pool: dma_pool_destroy(dev->resp_pool); return ret; @@ -195,6 +205,7 @@ destroy_pool: static void erdma_device_uninit(struct erdma_dev *dev) { + dma_pool_destroy(dev->db_pool); dma_pool_destroy(dev->resp_pool); } diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index 6d0330badd..4d1f9114cd 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -492,7 +492,7 @@ static void kick_sq_db(struct erdma_qp *qp, u16 pi) u64 db_data = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp)) | FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, pi); - *(u64 *)qp->kern_qp.sq_db_info = db_data; + *(u64 *)qp->kern_qp.sq_dbrec = db_data; writeq(db_data, qp->kern_qp.hw_sq_db); } @@ -557,7 +557,7 @@ static int erdma_post_recv_one(struct erdma_qp *qp, return -EINVAL; } - *(u64 *)qp->kern_qp.rq_db_info = *(u64 *)rqe; + *(u64 *)qp->kern_qp.rq_dbrec = *(u64 *)rqe; writeq(*(u64 *)rqe, qp->kern_qp.hw_rq_db); qp->kern_qp.rwr_tbl[qp->kern_qp.rq_pi & (qp->attrs.rq_size - 1)] = diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 23dfc01603..40c9b6e46b 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -76,10 +76,8 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr; req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr; - req.sq_db_info_dma_addr = qp->kern_qp.sq_buf_dma_addr + - (qp->attrs.sq_size << SQEBB_SHIFT); - req.rq_db_info_dma_addr = qp->kern_qp.rq_buf_dma_addr + - (qp->attrs.rq_size << RQE_SHIFT); + req.sq_dbrec_dma = qp->kern_qp.sq_dbrec_dma; + req.rq_dbrec_dma = qp->kern_qp.rq_dbrec_dma; } else { user_qp = &qp->user_qp; req.sq_cqn_mtt_cfg = FIELD_PREP( @@ -107,8 +105,8 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) assemble_qbuf_mtt_for_cmd(&user_qp->rq_mem, &req.rq_mtt_cfg, &req.rq_buf_addr, req.rq_mtt_entry); - req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr; - req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr; + req.sq_dbrec_dma = user_qp->sq_dbrec_dma; + req.rq_dbrec_dma = user_qp->rq_dbrec_dma; if (uctx->ext_db.enable) { req.sq_cqn_mtt_cfg |= @@ -209,8 +207,7 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq) ERDMA_MR_MTT_0LEVEL); req.first_page_offset = 0; - req.cq_db_info_addr = - cq->kern_cq.qbuf_dma_addr + (cq->depth << CQE_SHIFT); + req.cq_dbrec_dma = cq->kern_cq.dbrec_dma; } else { mem = &cq->user_cq.qbuf_mem; req.cfg0 |= @@ -233,7 +230,7 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq) mem->mtt_nents); req.first_page_offset = mem->page_offset; - req.cq_db_info_addr = cq->user_cq.db_info_dma_addr; + req.cq_dbrec_dma = cq->user_cq.dbrec_dma; if (uctx->ext_db.enable) { req.cfg1 |= FIELD_PREP( @@ -482,16 +479,24 @@ static void free_kernel_qp(struct erdma_qp *qp) vfree(qp->kern_qp.rwr_tbl); if (qp->kern_qp.sq_buf) - dma_free_coherent( - &dev->pdev->dev, - WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT), - qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr); + dma_free_coherent(&dev->pdev->dev, + qp->attrs.sq_size << SQEBB_SHIFT, + qp->kern_qp.sq_buf, + qp->kern_qp.sq_buf_dma_addr); + + if (qp->kern_qp.sq_dbrec) + dma_pool_free(dev->db_pool, qp->kern_qp.sq_dbrec, + qp->kern_qp.sq_dbrec_dma); if (qp->kern_qp.rq_buf) - dma_free_coherent( - &dev->pdev->dev, - WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT), - qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr); + dma_free_coherent(&dev->pdev->dev, + qp->attrs.rq_size << RQE_SHIFT, + qp->kern_qp.rq_buf, + qp->kern_qp.rq_buf_dma_addr); + + if (qp->kern_qp.rq_dbrec) + dma_pool_free(dev->db_pool, qp->kern_qp.rq_dbrec, + qp->kern_qp.rq_dbrec_dma); } static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, @@ -516,20 +521,27 @@ static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, if (!kqp->swr_tbl || !kqp->rwr_tbl) goto err_out; - size = (qp->attrs.sq_size << SQEBB_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE; + size = qp->attrs.sq_size << SQEBB_SHIFT; kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, size, &kqp->sq_buf_dma_addr, GFP_KERNEL); if (!kqp->sq_buf) goto err_out; - size = (qp->attrs.rq_size << RQE_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE; + kqp->sq_dbrec = + dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &kqp->sq_dbrec_dma); + if (!kqp->sq_dbrec) + goto err_out; + + size = qp->attrs.rq_size << RQE_SHIFT; kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, size, &kqp->rq_buf_dma_addr, GFP_KERNEL); if (!kqp->rq_buf) goto err_out; - kqp->sq_db_info = kqp->sq_buf + (qp->attrs.sq_size << SQEBB_SHIFT); - kqp->rq_db_info = kqp->rq_buf + (qp->attrs.rq_size << RQE_SHIFT); + kqp->rq_dbrec = + dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &kqp->rq_dbrec_dma); + if (!kqp->rq_dbrec) + goto err_out; return 0; @@ -864,9 +876,9 @@ erdma_unmap_user_dbrecords(struct erdma_ucontext *ctx, } static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx, - u64 va, u32 len, u64 db_info_va) + u64 va, u32 len, u64 dbrec_va) { - dma_addr_t db_info_dma_addr; + dma_addr_t dbrec_dma; u32 rq_offset; int ret; @@ -889,14 +901,14 @@ static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx, if (ret) goto put_sq_mtt; - ret = erdma_map_user_dbrecords(uctx, db_info_va, + ret = erdma_map_user_dbrecords(uctx, dbrec_va, &qp->user_qp.user_dbr_page, - &db_info_dma_addr); + &dbrec_dma); if (ret) goto put_rq_mtt; - qp->user_qp.sq_db_info_dma_addr = db_info_dma_addr; - qp->user_qp.rq_db_info_dma_addr = db_info_dma_addr + ERDMA_DB_SIZE; + qp->user_qp.sq_dbrec_dma = dbrec_dma; + qp->user_qp.rq_dbrec_dma = dbrec_dma + ERDMA_DB_SIZE; return 0; @@ -1237,9 +1249,10 @@ int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) return err; if (rdma_is_kernel_res(&cq->ibcq.res)) { - dma_free_coherent(&dev->pdev->dev, - WARPPED_BUFSIZE(cq->depth << CQE_SHIFT), + dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT, cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr); + dma_pool_free(dev->db_pool, cq->kern_cq.dbrec, + cq->kern_cq.dbrec_dma); } else { erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page); put_mtt_entries(dev, &cq->user_cq.qbuf_mem); @@ -1279,16 +1292,7 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) wait_for_completion(&qp->safe_free); if (rdma_is_kernel_res(&qp->ibqp.res)) { - vfree(qp->kern_qp.swr_tbl); - vfree(qp->kern_qp.rwr_tbl); - dma_free_coherent( - &dev->pdev->dev, - WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT), - qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr); - dma_free_coherent( - &dev->pdev->dev, - WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT), - qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr); + free_kernel_qp(qp); } else { put_mtt_entries(dev, &qp->user_qp.sq_mem); put_mtt_entries(dev, &qp->user_qp.rq_mem); @@ -1588,7 +1592,7 @@ static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq, ret = erdma_map_user_dbrecords(ctx, ureq->db_record_va, &cq->user_cq.user_dbr_page, - &cq->user_cq.db_info_dma_addr); + &cq->user_cq.dbrec_dma); if (ret) put_mtt_entries(dev, &cq->user_cq.qbuf_mem); @@ -1600,19 +1604,27 @@ static int erdma_init_kernel_cq(struct erdma_cq *cq) struct erdma_dev *dev = to_edev(cq->ibcq.device); cq->kern_cq.qbuf = - dma_alloc_coherent(&dev->pdev->dev, - WARPPED_BUFSIZE(cq->depth << CQE_SHIFT), + dma_alloc_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT, &cq->kern_cq.qbuf_dma_addr, GFP_KERNEL); if (!cq->kern_cq.qbuf) return -ENOMEM; - cq->kern_cq.db_record = - (u64 *)(cq->kern_cq.qbuf + (cq->depth << CQE_SHIFT)); + cq->kern_cq.dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, + &cq->kern_cq.dbrec_dma); + if (!cq->kern_cq.dbrec) + goto err_out; + spin_lock_init(&cq->kern_cq.lock); /* use default cqdb addr */ cq->kern_cq.db = dev->func_bar + ERDMA_BAR_CQDB_SPACE_OFFSET; return 0; + +err_out: + dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT, + cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr); + + return -ENOMEM; } int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, @@ -1676,9 +1688,10 @@ err_free_res: erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page); put_mtt_entries(dev, &cq->user_cq.qbuf_mem); } else { - dma_free_coherent(&dev->pdev->dev, - WARPPED_BUFSIZE(depth << CQE_SHIFT), + dma_free_coherent(&dev->pdev->dev, depth << CQE_SHIFT, cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr); + dma_pool_free(dev->db_pool, cq->kern_cq.dbrec, + cq->kern_cq.dbrec_dma); } err_out_xa: diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index db6018529c..4f02ba06b2 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -140,8 +140,8 @@ struct erdma_uqp { struct erdma_mem sq_mem; struct erdma_mem rq_mem; - dma_addr_t sq_db_info_dma_addr; - dma_addr_t rq_db_info_dma_addr; + dma_addr_t sq_dbrec_dma; + dma_addr_t rq_dbrec_dma; struct erdma_user_dbrecords_page *user_dbr_page; @@ -167,8 +167,11 @@ struct erdma_kqp { void *rq_buf; dma_addr_t rq_buf_dma_addr; - void *sq_db_info; - void *rq_db_info; + void *sq_dbrec; + void *rq_dbrec; + + dma_addr_t sq_dbrec_dma; + dma_addr_t rq_dbrec_dma; u8 sig_all; }; @@ -246,13 +249,14 @@ struct erdma_kcq_info { spinlock_t lock; u8 __iomem *db; - u64 *db_record; + u64 *dbrec; + dma_addr_t dbrec_dma; }; struct erdma_ucq_info { struct erdma_mem qbuf_mem; struct erdma_user_dbrecords_page *user_dbr_page; - dma_addr_t db_info_dma_addr; + dma_addr_t dbrec_dma; }; struct erdma_cq { diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 4b3f1cb125..eb38f81aee 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -2425,7 +2425,7 @@ static inline bool hfi1_need_drop(struct hfi1_devdata *dd) int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) -#define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev)) +#define DD_DEV_ASSIGN(dd) __assign_str(dev) static inline void hfi1_update_ah_attr(struct ib_device *ibdev, struct rdma_ah_attr *attr) diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c index 5d814afdf7..7c9d520300 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_main.c +++ b/drivers/infiniband/hw/hfi1/ipoib_main.c @@ -21,36 +21,25 @@ static int hfi1_ipoib_dev_init(struct net_device *dev) struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); int ret; - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; - ret = priv->netdev_ops->ndo_init(dev); if (ret) - goto out_ret; + return ret; ret = hfi1_netdev_add_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr), dev); if (ret < 0) { priv->netdev_ops->ndo_uninit(dev); - goto out_ret; + return ret; } return 0; -out_ret: - free_percpu(dev->tstats); - dev->tstats = NULL; - return ret; } static void hfi1_ipoib_dev_uninit(struct net_device *dev) { struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); - free_percpu(dev->tstats); - dev->tstats = NULL; - hfi1_netdev_remove_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr)); priv->netdev_ops->ndo_uninit(dev); @@ -107,7 +96,6 @@ static const struct net_device_ops hfi1_ipoib_netdev_ops = { .ndo_uninit = hfi1_ipoib_dev_uninit, .ndo_open = hfi1_ipoib_dev_open, .ndo_stop = hfi1_ipoib_dev_stop, - .ndo_get_stats64 = dev_get_tstats64, }; static int hfi1_ipoib_mcast_attach(struct net_device *dev, @@ -173,9 +161,6 @@ static void hfi1_ipoib_netdev_dtor(struct net_device *dev) hfi1_ipoib_txreq_deinit(priv); hfi1_ipoib_rxq_deinit(priv->netdev); - - free_percpu(dev->tstats); - dev->tstats = NULL; } static void hfi1_ipoib_set_id(struct net_device *dev, int id) @@ -234,6 +219,7 @@ static int hfi1_ipoib_setup_rn(struct ib_device *device, netdev->priv_destructor = hfi1_ipoib_netdev_dtor; netdev->needs_free_netdev = true; + netdev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; return 0; } diff --git a/drivers/infiniband/hw/hfi1/netdev.h b/drivers/infiniband/hw/hfi1/netdev.h index 8aa074670a..07c8f77c91 100644 --- a/drivers/infiniband/hw/hfi1/netdev.h +++ b/drivers/infiniband/hw/hfi1/netdev.h @@ -49,7 +49,7 @@ struct hfi1_netdev_rxq { * When 0 receive queues will be freed. */ struct hfi1_netdev_rx { - struct net_device rx_napi; + struct net_device *rx_napi; struct hfi1_devdata *dd; struct hfi1_netdev_rxq *rxq; int num_rx_q; diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c index 720d4c85c9..8608044203 100644 --- a/drivers/infiniband/hw/hfi1/netdev_rx.c +++ b/drivers/infiniband/hw/hfi1/netdev_rx.c @@ -188,7 +188,7 @@ static int hfi1_netdev_rxq_init(struct hfi1_netdev_rx *rx) int i; int rc; struct hfi1_devdata *dd = rx->dd; - struct net_device *dev = &rx->rx_napi; + struct net_device *dev = rx->rx_napi; rx->num_rx_q = dd->num_netdev_contexts; rx->rxq = kcalloc_node(rx->num_rx_q, sizeof(*rx->rxq), @@ -360,7 +360,11 @@ int hfi1_alloc_rx(struct hfi1_devdata *dd) if (!rx) return -ENOMEM; rx->dd = dd; - init_dummy_netdev(&rx->rx_napi); + rx->rx_napi = alloc_netdev_dummy(0); + if (!rx->rx_napi) { + kfree(rx); + return -ENOMEM; + } xa_init(&rx->dev_tbl); atomic_set(&rx->enabled, 0); @@ -374,6 +378,7 @@ void hfi1_free_rx(struct hfi1_devdata *dd) { if (dd->netdev_rx) { dd_dev_info(dd, "hfi1 rx freed\n"); + free_netdev(dd->netdev_rx->rx_napi); kfree(dd->netdev_rx); dd->netdev_rx = NULL; } diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 119ec2f138..7133964749 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -1207,14 +1207,11 @@ retry: (u32)lnkctl2); /* only write to parent if target is not as high as ours */ if ((lnkctl2 & PCI_EXP_LNKCTL2_TLS) < target_vector) { - lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS; - lnkctl2 |= target_vector; - dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__, - (u32)lnkctl2); - ret = pcie_capability_write_word(parent, - PCI_EXP_LNKCTL2, lnkctl2); + ret = pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_TLS, + target_vector); if (ret) { - dd_dev_err(dd, "Unable to write to PCI config\n"); + dd_dev_err(dd, "Unable to change parent PCI target speed\n"); return_error = 1; goto done; } @@ -1223,22 +1220,11 @@ retry: } dd_dev_info(dd, "%s: setting target link speed\n", __func__); - ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2); + ret = pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_TLS, + target_vector); if (ret) { - dd_dev_err(dd, "Unable to read from PCI config\n"); - return_error = 1; - goto done; - } - - dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__, - (u32)lnkctl2); - lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS; - lnkctl2 |= target_vector; - dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__, - (u32)lnkctl2); - ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2); - if (ret) { - dd_dev_err(dd, "Unable to write to PCI config\n"); + dd_dev_err(dd, "Unable to change device PCI target speed\n"); return_error = 1; goto done; } diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h index 75599d5168..58304b9138 100644 --- a/drivers/infiniband/hw/hfi1/trace_dbg.h +++ b/drivers/infiniband/hw/hfi1/trace_dbg.h @@ -33,7 +33,7 @@ DECLARE_EVENT_CLASS(hfi1_trace_template, TP_STRUCT__entry(__string(function, function) __vstring(msg, vaf->fmt, vaf->va) ), - TP_fast_assign(__assign_str(function, function); + TP_fast_assign(__assign_str(function); __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("(%s) %s", diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index e6904aa80c..8d5e12fe88 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -90,7 +90,7 @@ TRACE_EVENT(hfi1_mmu_invalidate, TP_fast_assign( __entry->ctxt = ctxt; __entry->subctxt = subctxt; - __assign_str(type, type); + __assign_str(type); __entry->start = start; __entry->end = end; ), diff --git a/drivers/infiniband/hw/hfi1/trace_tid.h b/drivers/infiniband/hw/hfi1/trace_tid.h index d129b81959..e358f5b885 100644 --- a/drivers/infiniband/hw/hfi1/trace_tid.h +++ b/drivers/infiniband/hw/hfi1/trace_tid.h @@ -358,7 +358,7 @@ DECLARE_EVENT_CLASS(/* msg */ ), TP_fast_assign(/* assign */ __entry->qpn = qp ? qp->ibqp.qp_num : 0; - __assign_str(msg, msg); + __assign_str(msg); __entry->more = more; ), TP_printk(/* print */ @@ -651,7 +651,7 @@ DECLARE_EVENT_CLASS(/* tid_node */ TP_fast_assign(/* assign */ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)); __entry->qpn = qp->ibqp.qp_num; - __assign_str(msg, msg); + __assign_str(msg); __entry->index = index; __entry->base = base; __entry->map = map; diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h index c79856d4fd..c0ba6b0a2c 100644 --- a/drivers/infiniband/hw/hfi1/trace_tx.h +++ b/drivers/infiniband/hw/hfi1/trace_tx.h @@ -740,8 +740,8 @@ TRACE_EVENT(hfi1_sdma_state, __string(newstate, nstate) ), TP_fast_assign(DD_DEV_ASSIGN(sde->dd); - __assign_str(curstate, cstate); - __assign_str(newstate, nstate); + __assign_str(curstate); + __assign_str(newstate); ), TP_printk("[%s] current state %s new state %s", __get_str(dev), diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index b4209b6aed..3e02c474f5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -59,8 +59,10 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); struct hns_roce_ib_create_ah_resp resp = {}; struct hns_roce_ah *ah = to_hr_ah(ibah); - int ret = 0; - u32 max_sl; + u8 tclass = get_tclass(grh); + u8 priority = 0; + u8 tc_mode = 0; + int ret; if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && udata) return -EOPNOTSUPP; @@ -74,16 +76,23 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, ah->av.hop_limit = grh->hop_limit; ah->av.flowlabel = grh->flow_label; ah->av.udp_sport = get_ah_udp_sport(ah_attr); - ah->av.tclass = get_tclass(grh); - - ah->av.sl = rdma_ah_get_sl(ah_attr); - max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1); - if (unlikely(ah->av.sl > max_sl)) { - ibdev_err_ratelimited(&hr_dev->ib_dev, - "failed to set sl, sl (%u) shouldn't be larger than %u.\n", - ah->av.sl, max_sl); + ah->av.tclass = tclass; + + ret = hr_dev->hw->get_dscp(hr_dev, tclass, &tc_mode, &priority); + if (ret == -EOPNOTSUPP) + ret = 0; + + if (ret && grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) + return ret; + + if (tc_mode == HNAE3_TC_MAP_MODE_DSCP && + grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) + ah->av.sl = priority; + else + ah->av.sl = rdma_ah_get_sl(ah_attr); + + if (!check_sl_valid(hr_dev, ah->av.sl)) return -EINVAL; - } memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); @@ -99,6 +108,8 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, } if (udata) { + resp.priority = ah->av.sl; + resp.tc_mode = tc_mode; memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN); ret = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 11a78ceae5..950c133d42 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -153,8 +153,7 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, return total; } -int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, - int buf_cnt, struct ib_umem *umem, +int hns_roce_get_umem_bufs(dma_addr_t *bufs, int buf_cnt, struct ib_umem *umem, unsigned int page_shift) { struct ib_block_iter biter; diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 68e22f368d..56dc3908da 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -536,4 +536,5 @@ void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev) for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) ida_destroy(&hr_dev->cq_table.bank[i].ida); + mutex_destroy(&hr_dev->cq_table.bank_mutex); } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 0b47c6d688..7d5931872f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -83,6 +83,7 @@ #define MR_TYPE_DMA 0x03 #define HNS_ROCE_FRMR_MAX_PA 512 +#define HNS_ROCE_FRMR_ALIGN_SIZE 128 #define PKEY_ID 0xffff #define NODE_DESC_SIZE 64 @@ -91,6 +92,8 @@ /* Configure to HW for PAGE_SIZE larger than 4KB */ #define PG_SHIFT_OFFSET (PAGE_SHIFT - 12) +#define ATOMIC_WR_LEN 8 + #define HNS_ROCE_IDX_QUE_ENTRY_SZ 4 #define SRQ_DB_REG 0x230 @@ -187,6 +190,9 @@ enum { #define HNS_HW_PAGE_SHIFT 12 #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) +#define HNS_HW_MAX_PAGE_SHIFT 27 +#define HNS_HW_MAX_PAGE_SIZE (1 << HNS_HW_MAX_PAGE_SHIFT) + struct hns_roce_uar { u64 pfn; unsigned long index; @@ -648,6 +654,8 @@ struct hns_roce_qp { struct hns_user_mmap_entry *dwqe_mmap_entry; u32 config; enum hns_roce_cong_type cong_type; + u8 tc_mode; + u8 priority; }; struct hns_roce_ib_iboe { @@ -713,6 +721,7 @@ struct hns_roce_eq { int shift; int event_type; int sub_type; + struct work_struct work; }; struct hns_roce_eq_table { @@ -926,8 +935,7 @@ struct hns_roce_hw { int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, int flags, void *mb_buf); - int (*frmr_write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, - struct hns_roce_mr *mr); + int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr); int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw); void (*write_cqc)(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, @@ -953,6 +961,8 @@ struct hns_roce_hw { int (*query_sccc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer); int (*query_hw_counter)(struct hns_roce_dev *hr_dev, u64 *stats, u32 port, int *hw_counters); + int (*get_dscp)(struct hns_roce_dev *hr_dev, u8 dscp, + u8 *tc_mode, u8 *priority); const struct ib_device_ops *hns_roce_dev_ops; const struct ib_device_ops *hns_roce_dev_srq_ops; }; @@ -1231,7 +1241,7 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int buf_cnt, struct hns_roce_buf *buf, unsigned int page_shift); -int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, +int hns_roce_get_umem_bufs(dma_addr_t *bufs, int buf_cnt, struct ib_umem *umem, unsigned int page_shift); @@ -1295,4 +1305,6 @@ struct hns_user_mmap_entry * hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, size_t length, enum hns_roce_mmap_type mmap_type); +bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl); + #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 658c522be7..02baa853a7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -877,6 +877,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, if (hns_roce_check_whether_mhop(hr_dev, table->type)) { hns_roce_cleanup_mhop_hem_table(hr_dev, table); + mutex_destroy(&table->mutex); return; } @@ -891,6 +892,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, hns_roce_free_hem(hr_dev, table->hem[i]); } + mutex_destroy(&table->mutex); kfree(table->hem); } @@ -986,15 +988,13 @@ static void hem_list_free_all(struct hns_roce_dev *hr_dev, } } -static void hem_list_link_bt(struct hns_roce_dev *hr_dev, void *base_addr, - u64 table_addr) +static void hem_list_link_bt(void *base_addr, u64 table_addr) { *(u64 *)(base_addr) = table_addr; } /* assign L0 table address to hem from root bt */ -static void hem_list_assign_bt(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_item *hem, void *cpu_addr, +static void hem_list_assign_bt(struct hns_roce_hem_item *hem, void *cpu_addr, u64 phy_addr) { hem->addr = cpu_addr; @@ -1163,8 +1163,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, if (level > 1) { pre = hem_ptrs[level - 1]; step = (cur->start - pre->start) / step * BA_BYTE_LEN; - hem_list_link_bt(hr_dev, pre->addr + step, - cur->dma_addr); + hem_list_link_bt(pre->addr + step, cur->dma_addr); } } @@ -1222,7 +1221,7 @@ static int alloc_fake_root_bt(struct hns_roce_dev *hr_dev, void *cpu_base, if (!hem) return -ENOMEM; - hem_list_assign_bt(hr_dev, hem, cpu_base, phy_base); + hem_list_assign_bt(hem, cpu_base, phy_base); list_add(&hem->list, branch_head); list_add(&hem->sibling, leaf_head); @@ -1245,7 +1244,7 @@ static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base, /* if exist mid bt, link L1 to L0 */ list_for_each_entry_safe(hem, temp_hem, branch_head, list) { offset = (hem->start - r->offset) / step * BA_BYTE_LEN; - hem_list_link_bt(hr_dev, cpu_base + offset, hem->dma_addr); + hem_list_link_bt(cpu_base + offset, hem->dma_addr); total++; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8800464c9a..621b057fb9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -36,6 +36,7 @@ #include <linux/iopoll.h> #include <linux/kernel.h> #include <linux/types.h> +#include <linux/workqueue.h> #include <net/addrconf.h> #include <rdma/ib_addr.h> #include <rdma/ib_cache.h> @@ -443,10 +444,6 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit); hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass); hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel); - - if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL)) - return -EINVAL; - hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, ah->av.sl); ud_sq_wqe->sgid_index = ah->av.gid_index; @@ -595,11 +592,16 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || - wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { + if (msg_len != ATOMIC_WR_LEN) + return -EINVAL; set_atomic_seg(wr, rc_sq_wqe, valid_num_sge); - else if (wr->opcode != IB_WR_REG_MR) + } else if (wr->opcode != IB_WR_REG_MR) { ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe, &curr_idx, valid_num_sge); + if (ret) + return ret; + } /* * The pipeline can sequentially post all valid WQEs into WQ buffer, @@ -1273,12 +1275,38 @@ static int hns_roce_cmd_err_convert_errno(u16 desc_ret) return -EIO; } +static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout) +{ + static const struct hns_roce_cmdq_tx_timeout_map cmdq_tx_timeout[] = { + {HNS_ROCE_OPC_POST_MB, HNS_ROCE_OPC_POST_MB_TIMEOUT}, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(cmdq_tx_timeout); i++) + if (cmdq_tx_timeout[i].opcode == opcode) + return cmdq_tx_timeout[i].tx_timeout; + + return tx_timeout; +} + +static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout); + u32 timeout = 0; + + do { + if (hns_roce_cmq_csq_done(hr_dev)) + break; + udelay(1); + } while (++timeout < tx_timeout); +} + static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc *desc, int num) { struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; - u32 timeout = 0; u16 desc_ret; u32 tail; int ret; @@ -1299,12 +1327,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]); - do { - if (hns_roce_cmq_csq_done(hr_dev)) - break; - udelay(1); - } while (++timeout < priv->cmq.tx_timeout); - + hns_roce_wait_csq_done(hr_dev, le16_to_cpu(desc->opcode)); if (hns_roce_cmq_csq_done(hr_dev)) { ret = 0; for (i = 0; i < num; i++) { @@ -2461,14 +2484,16 @@ static int set_llm_cfg_to_hw(struct hns_roce_dev *hr_dev, static struct hns_roce_link_table * alloc_link_table_buf(struct hns_roce_dev *hr_dev) { + u16 total_sl = hr_dev->caps.sl_num * hr_dev->func_num; struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_link_table *link_tbl; u32 pg_shift, size, min_size; link_tbl = &priv->ext_llm; pg_shift = hr_dev->caps.llm_buf_pg_sz + PAGE_SHIFT; - size = hr_dev->caps.num_qps * HNS_ROCE_V2_EXT_LLM_ENTRY_SZ; - min_size = HNS_ROCE_EXT_LLM_MIN_PAGES(hr_dev->caps.sl_num) << pg_shift; + size = hr_dev->caps.num_qps * hr_dev->func_num * + HNS_ROCE_V2_EXT_LLM_ENTRY_SZ; + min_size = HNS_ROCE_EXT_LLM_MIN_PAGES(total_sl) << pg_shift; /* Alloc data table */ size = max(size, min_size); @@ -2671,6 +2696,8 @@ static void free_mr_exit(struct hns_roce_dev *hr_dev) kfree(free_mr->rsv_pd); free_mr->rsv_pd = NULL; } + + mutex_destroy(&free_mr->mutex); } static int free_mr_alloc_res(struct hns_roce_dev *hr_dev) @@ -2821,8 +2848,10 @@ static int free_mr_init(struct hns_roce_dev *hr_dev) mutex_init(&free_mr->mutex); ret = free_mr_alloc_res(hr_dev); - if (ret) + if (ret) { + mutex_destroy(&free_mr->mutex); return ret; + } ret = free_mr_modify_qp(hr_dev); if (ret) @@ -3208,13 +3237,14 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev, /* Aligned to the hardware address access unit */ for (i = 0; i < ARRAY_SIZE(pages); i++) - pages[i] >>= 6; + pages[i] >>= MPT_PBL_BUF_ADDR_S; pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr); mpt_entry->pbl_size = cpu_to_le32(mr->npages); - mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3); - hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3)); + mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> MPT_PBL_BA_ADDR_S); + hr_reg_write(mpt_entry, MPT_PBL_BA_H, + upper_32_bits(pbl_ba >> MPT_PBL_BA_ADDR_S)); mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0])); hr_reg_write(mpt_entry, MPT_PA0_H, upper_32_bits(pages[0])); @@ -3307,8 +3337,7 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, return ret; } -static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev, - void *mb_buf, struct hns_roce_mr *mr) +static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr) { dma_addr_t pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr); struct hns_roce_v2_mpt_entry *mpt_entry; @@ -3335,8 +3364,10 @@ static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev, mpt_entry->pbl_size = cpu_to_le32(mr->npages); - mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> 3)); - hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3)); + mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> + MPT_PBL_BA_ADDR_S)); + hr_reg_write(mpt_entry, MPT_PBL_BA_H, + upper_32_bits(pbl_ba >> MPT_PBL_BA_ADDR_S)); return 0; } @@ -3582,14 +3613,14 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.ba_pg_shift)); hr_reg_write(cq_context, CQC_CQE_BUF_PG_SZ, to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.buf_pg_shift)); - hr_reg_write(cq_context, CQC_CQE_BA_L, dma_handle >> 3); - hr_reg_write(cq_context, CQC_CQE_BA_H, (dma_handle >> (32 + 3))); + hr_reg_write(cq_context, CQC_CQE_BA_L, dma_handle >> CQC_CQE_BA_L_S); + hr_reg_write(cq_context, CQC_CQE_BA_H, dma_handle >> CQC_CQE_BA_H_S); hr_reg_write_bool(cq_context, CQC_DB_RECORD_EN, hr_cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB); hr_reg_write(cq_context, CQC_CQE_DB_RECORD_ADDR_L, ((u32)hr_cq->db.dma) >> 1); hr_reg_write(cq_context, CQC_CQE_DB_RECORD_ADDR_H, - hr_cq->db.dma >> 32); + hr_cq->db.dma >> CQC_CQE_DB_RECORD_ADDR_H_S); hr_reg_write(cq_context, CQC_CQ_MAX_CNT, HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM); hr_reg_write(cq_context, CQC_CQ_PERIOD, @@ -4218,8 +4249,7 @@ static void set_access_flags(struct hns_roce_qp *hr_qp, } static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, - struct hns_roce_v2_qp_context *context, - struct hns_roce_v2_qp_context *qpc_mask) + struct hns_roce_v2_qp_context *context) { hr_reg_write(context, QPC_SGE_SHIFT, to_hr_hem_entries_shift(hr_qp->sge.sge_cnt, @@ -4241,7 +4271,6 @@ static inline int get_pdn(struct ib_pd *ib_pd) } static void modify_qp_reset_to_init(struct ib_qp *ibqp, - const struct ib_qp_attr *attr, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { @@ -4260,7 +4289,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, hr_reg_write(context, QPC_RQWS, ilog2(hr_qp->rq.max_gs)); - set_qpc_wqe_cnt(hr_qp, context, qpc_mask); + set_qpc_wqe_cnt(hr_qp, context); /* No VLAN need to set 0xFFF */ hr_reg_write(context, QPC_VLAN_ID, 0xfff); @@ -4301,7 +4330,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, } static void modify_qp_init_to_init(struct ib_qp *ibqp, - const struct ib_qp_attr *attr, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { @@ -4522,16 +4550,16 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, return -EINVAL; } - hr_reg_write(context, QPC_TRRL_BA_L, trrl_ba >> 4); + hr_reg_write(context, QPC_TRRL_BA_L, trrl_ba >> QPC_TRRL_BA_L_S); hr_reg_clear(qpc_mask, QPC_TRRL_BA_L); - context->trrl_ba = cpu_to_le32(trrl_ba >> (16 + 4)); + context->trrl_ba = cpu_to_le32(trrl_ba >> QPC_TRRL_BA_M_S); qpc_mask->trrl_ba = 0; - hr_reg_write(context, QPC_TRRL_BA_H, trrl_ba >> (32 + 16 + 4)); + hr_reg_write(context, QPC_TRRL_BA_H, trrl_ba >> QPC_TRRL_BA_H_S); hr_reg_clear(qpc_mask, QPC_TRRL_BA_H); - context->irrl_ba = cpu_to_le32(irrl_ba >> 6); + context->irrl_ba = cpu_to_le32(irrl_ba >> QPC_IRRL_BA_L_S); qpc_mask->irrl_ba = 0; - hr_reg_write(context, QPC_IRRL_BA_H, irrl_ba >> (32 + 6)); + hr_reg_write(context, QPC_IRRL_BA_H, irrl_ba >> QPC_IRRL_BA_H_S); hr_reg_clear(qpc_mask, QPC_IRRL_BA_H); hr_reg_enable(context, QPC_RMT_E2E); @@ -4593,8 +4621,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, hr_reg_clear(qpc_mask, QPC_TRRL_HEAD_MAX); hr_reg_clear(qpc_mask, QPC_TRRL_TAIL_MAX); +#define MAX_LP_SGEN 3 /* rocee send 2^lp_sgen_ini segs every time */ - hr_reg_write(context, QPC_LP_SGEN_INI, 3); + hr_reg_write(context, QPC_LP_SGEN_INI, MAX_LP_SGEN); hr_reg_clear(qpc_mask, QPC_LP_SGEN_INI); if (udata && ibqp->qp_type == IB_QPT_RC && @@ -4620,8 +4649,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, return 0; } -static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, - const struct ib_qp_attr *attr, int attr_mask, +static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, int attr_mask, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { @@ -4686,7 +4714,7 @@ static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr, *tail = (*tail == hr_dev->caps.num_qps - 1) ? 0 : (*tail + 1); list_for_each_entry(hr_dip, &hr_dev->dip_list, node) { - if (!memcmp(grh->dgid.raw, hr_dip->dgid, 16)) { + if (!memcmp(grh->dgid.raw, hr_dip->dgid, GID_LEN_V2)) { *dip_idx = hr_dip->dip_idx; goto out; } @@ -4829,6 +4857,69 @@ static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr, return 0; } +static int hns_roce_hw_v2_get_dscp(struct hns_roce_dev *hr_dev, u8 dscp, + u8 *tc_mode, u8 *priority) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + + if (!ops->get_dscp_prio) + return -EOPNOTSUPP; + + return ops->get_dscp_prio(handle, dscp, tc_mode, priority); +} + +bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl) +{ + u32 max_sl; + + max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1); + if (unlikely(sl > max_sl)) { + ibdev_err_ratelimited(&hr_dev->ib_dev, + "failed to set SL(%u). Shouldn't be larger than %u.\n", + sl, max_sl); + return false; + } + + return true; +} + +static int hns_roce_set_sl(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, + struct hns_roce_v2_qp_context *context, + struct hns_roce_v2_qp_context *qpc_mask) +{ + const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct ib_device *ibdev = &hr_dev->ib_dev; + int ret; + + ret = hns_roce_hw_v2_get_dscp(hr_dev, get_tclass(&attr->ah_attr.grh), + &hr_qp->tc_mode, &hr_qp->priority); + if (ret && ret != -EOPNOTSUPP && + grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { + ibdev_err_ratelimited(ibdev, + "failed to get dscp, ret = %d.\n", ret); + return ret; + } + + if (hr_qp->tc_mode == HNAE3_TC_MAP_MODE_DSCP && + grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) + hr_qp->sl = hr_qp->priority; + else + hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); + + if (!check_sl_valid(hr_dev, hr_qp->sl)) + return -EINVAL; + + hr_reg_write(context, QPC_SL, hr_qp->sl); + hr_reg_clear(qpc_mask, QPC_SL); + + return 0; +} + static int hns_roce_v2_set_path(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, @@ -4844,25 +4935,18 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, int is_roce_protocol; u16 vlan_id = 0xffff; bool is_udp = false; - u32 max_sl; u8 ib_port; u8 hr_port; int ret; - max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1); - if (unlikely(sl > max_sl)) { - ibdev_err_ratelimited(ibdev, - "failed to fill QPC, sl (%u) shouldn't be larger than %u.\n", - sl, max_sl); - return -EINVAL; - } - /* * If free_mr_en of qp is set, it means that this qp comes from * free mr. This qp will perform the loopback operation. * In the loopback scenario, only sl needs to be set. */ if (hr_qp->free_mr_en) { + if (!check_sl_valid(hr_dev, sl)) + return -EINVAL; hr_reg_write(context, QPC_SL, sl); hr_reg_clear(qpc_mask, QPC_SL); hr_qp->sl = sl; @@ -4932,11 +5016,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw)); - hr_qp->sl = sl; - hr_reg_write(context, QPC_SL, hr_qp->sl); - hr_reg_clear(qpc_mask, QPC_SL); - - return 0; + return hns_roce_set_sl(ibqp, attr, context, qpc_mask); } static bool check_qp_state(enum ib_qp_state cur_state, @@ -4983,15 +5063,14 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { memset(qpc_mask, 0, hr_dev->caps.qpc_sz); - modify_qp_reset_to_init(ibqp, attr, context, qpc_mask); + modify_qp_reset_to_init(ibqp, context, qpc_mask); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { - modify_qp_init_to_init(ibqp, attr, context, qpc_mask); + modify_qp_init_to_init(ibqp, context, qpc_mask); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { ret = modify_qp_init_to_rtr(ibqp, attr, attr_mask, context, qpc_mask, udata); } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) { - ret = modify_qp_rtr_to_rts(ibqp, attr, attr_mask, context, - qpc_mask); + ret = modify_qp_rtr_to_rts(ibqp, attr_mask, context, qpc_mask); } return ret; @@ -6085,33 +6164,11 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) !!(eq->cons_index & eq->entries)) ? ceqe : NULL; } -static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq) +static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_eq *eq) { - struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq); - irqreturn_t ceqe_found = IRQ_NONE; - u32 cqn; - - while (ceqe) { - /* Make sure we read CEQ entry after we have checked the - * ownership bit - */ - dma_rmb(); + queue_work(system_bh_wq, &eq->work); - cqn = hr_reg_read(ceqe, CEQE_CQN); - - hns_roce_cq_completion(hr_dev, cqn); - - ++eq->cons_index; - ceqe_found = IRQ_HANDLED; - atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]); - - ceqe = next_ceqe_sw_v2(eq); - } - - update_eq_db(eq); - - return IRQ_RETVAL(ceqe_found); + return IRQ_HANDLED; } static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr) @@ -6122,7 +6179,7 @@ static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr) if (eq->type_flag == HNS_ROCE_CEQ) /* Completion event interrupt */ - int_work = hns_roce_v2_ceq_int(hr_dev, eq); + int_work = hns_roce_v2_ceq_int(eq); else /* Asynchronous event interrupt */ int_work = hns_roce_v2_aeq_int(hr_dev, eq); @@ -6334,9 +6391,16 @@ static void hns_roce_v2_int_mask_enable(struct hns_roce_dev *hr_dev, roce_write(hr_dev, ROCEE_VF_ABN_INT_CFG_REG, enable_flag); } -static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, u32 eqn) +static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) +{ + hns_roce_mtr_destroy(hr_dev, &eq->mtr); +} + +static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, + struct hns_roce_eq *eq) { struct device *dev = hr_dev->dev; + int eqn = eq->eqn; int ret; u8 cmd; @@ -6347,12 +6411,9 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, u32 eqn) ret = hns_roce_destroy_hw_ctx(hr_dev, cmd, eqn & HNS_ROCE_V2_EQN_M); if (ret) - dev_err(dev, "[mailbox cmd] destroy eqc(%u) failed.\n", eqn); -} + dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn); -static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) -{ - hns_roce_mtr_destroy(hr_dev, &eq->mtr); + free_eq_buf(hr_dev, eq); } static void init_eq_config(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) @@ -6490,6 +6551,34 @@ free_cmd_mbox: return ret; } +static void hns_roce_ceq_work(struct work_struct *work) +{ + struct hns_roce_eq *eq = from_work(eq, work, work); + struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq); + struct hns_roce_dev *hr_dev = eq->hr_dev; + int ceqe_num = 0; + u32 cqn; + + while (ceqe && ceqe_num < hr_dev->caps.ceqe_depth) { + /* Make sure we read CEQ entry after we have checked the + * ownership bit + */ + dma_rmb(); + + cqn = hr_reg_read(ceqe, CEQE_CQN); + + hns_roce_cq_completion(hr_dev, cqn); + + ++eq->cons_index; + ++ceqe_num; + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]); + + ceqe = next_ceqe_sw_v2(eq); + } + + update_eq_db(eq); +} + static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, int comp_num, int aeq_num, int other_num) { @@ -6521,21 +6610,24 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, j - other_num - aeq_num); for (j = 0; j < irq_num; j++) { - if (j < other_num) + if (j < other_num) { ret = request_irq(hr_dev->irq[j], hns_roce_v2_msix_interrupt_abn, 0, hr_dev->irq_names[j], hr_dev); - - else if (j < (other_num + comp_num)) + } else if (j < (other_num + comp_num)) { + INIT_WORK(&eq_table->eq[j - other_num].work, + hns_roce_ceq_work); ret = request_irq(eq_table->eq[j - other_num].irq, hns_roce_v2_msix_interrupt_eq, 0, hr_dev->irq_names[j + aeq_num], &eq_table->eq[j - other_num]); - else + } else { ret = request_irq(eq_table->eq[j - other_num].irq, hns_roce_v2_msix_interrupt_eq, 0, hr_dev->irq_names[j - comp_num], &eq_table->eq[j - other_num]); + } + if (ret) { dev_err(hr_dev->dev, "request irq error!\n"); goto err_request_failed; @@ -6545,12 +6637,16 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, return 0; err_request_failed: - for (j -= 1; j >= 0; j--) - if (j < other_num) + for (j -= 1; j >= 0; j--) { + if (j < other_num) { free_irq(hr_dev->irq[j], hr_dev); - else - free_irq(eq_table->eq[j - other_num].irq, - &eq_table->eq[j - other_num]); + continue; + } + free_irq(eq_table->eq[j - other_num].irq, + &eq_table->eq[j - other_num]); + if (j < other_num + comp_num) + cancel_work_sync(&eq_table->eq[j - other_num].work); + } err_kzalloc_failed: for (i -= 1; i >= 0; i--) @@ -6571,8 +6667,11 @@ static void __hns_roce_free_irq(struct hns_roce_dev *hr_dev) for (i = 0; i < hr_dev->caps.num_other_vectors; i++) free_irq(hr_dev->irq[i], hr_dev); - for (i = 0; i < eq_num; i++) + for (i = 0; i < eq_num; i++) { free_irq(hr_dev->eq_table.eq[i].irq, &hr_dev->eq_table.eq[i]); + if (i < hr_dev->caps.num_comp_vectors) + cancel_work_sync(&hr_dev->eq_table.eq[i].work); + } for (i = 0; i < irq_num; i++) kfree(hr_dev->irq_names[i]); @@ -6661,7 +6760,7 @@ err_request_irq_fail: err_create_eq_fail: for (i -= 1; i >= 0; i--) - free_eq_buf(hr_dev, &eq_table->eq[i]); + hns_roce_v2_destroy_eqc(hr_dev, &eq_table->eq[i]); kfree(eq_table->eq); return ret; @@ -6681,11 +6780,8 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev) __hns_roce_free_irq(hr_dev); destroy_workqueue(hr_dev->irq_workq); - for (i = 0; i < eq_num; i++) { - hns_roce_v2_destroy_eqc(hr_dev, i); - - free_eq_buf(hr_dev, &eq_table->eq[i]); - } + for (i = 0; i < eq_num; i++) + hns_roce_v2_destroy_eqc(hr_dev, &eq_table->eq[i]); kfree(eq_table->eq); } @@ -6736,6 +6832,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .query_srqc = hns_roce_v2_query_srqc, .query_sccc = hns_roce_v2_query_sccc, .query_hw_counter = hns_roce_hw_v2_query_counter, + .get_dscp = hns_roce_hw_v2_get_dscp, .hns_roce_dev_ops = &hns_roce_v2_dev_ops, .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index dfed6b4ddb..c65f68a14a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -224,6 +224,12 @@ enum hns_roce_opcode_type { HNS_SWITCH_PARAMETER_CFG = 0x1033, }; +#define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000 +struct hns_roce_cmdq_tx_timeout_map { + u16 opcode; + u32 tx_timeout; +}; + enum { TYPE_CRQ, TYPE_CSQ, @@ -276,6 +282,10 @@ struct hns_roce_v2_cq_context { __le32 byte_64_se_cqe_idx; }; +#define CQC_CQE_BA_L_S 3 +#define CQC_CQE_BA_H_S (32 + CQC_CQE_BA_L_S) +#define CQC_CQE_DB_RECORD_ADDR_H_S 32 + #define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0 #define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0 @@ -447,6 +457,12 @@ struct hns_roce_v2_qp_context { struct hns_roce_v2_qp_context_ex ext; }; +#define QPC_TRRL_BA_L_S 4 +#define QPC_TRRL_BA_M_S (16 + QPC_TRRL_BA_L_S) +#define QPC_TRRL_BA_H_S (32 + QPC_TRRL_BA_M_S) +#define QPC_IRRL_BA_L_S 6 +#define QPC_IRRL_BA_H_S (32 + QPC_IRRL_BA_L_S) + #define QPC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_qp_context, h, l) #define QPC_TST QPC_FIELD_LOC(2, 0) @@ -716,6 +732,9 @@ struct hns_roce_v2_mpt_entry { __le32 byte_64_buf_pa1; }; +#define MPT_PBL_BUF_ADDR_S 6 +#define MPT_PBL_BA_ADDR_S 3 + #define MPT_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_mpt_entry, h, l) #define MPT_ST MPT_FIELD_LOC(1, 0) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index d202258368..4cb0af7335 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -429,6 +429,9 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, return 0; error_fail_copy_to_udata: + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || + hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) + mutex_destroy(&context->page_mutex); hns_roce_dealloc_uar_entry(context); error_fail_uar_entry: @@ -445,6 +448,10 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext); struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || + hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) + mutex_destroy(&context->page_mutex); + hns_roce_dealloc_uar_entry(context); ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx); @@ -933,6 +940,15 @@ err_unmap_dmpt: return ret; } +static void hns_roce_teardown_hca(struct hns_roce_dev *hr_dev) +{ + hns_roce_cleanup_bitmap(hr_dev); + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || + hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) + mutex_destroy(&hr_dev->pgdir_mutex); +} + /** * hns_roce_setup_hca - setup host channel adapter * @hr_dev: pointer to hns roce device @@ -981,6 +997,10 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) err_uar_table_free: ida_destroy(&hr_dev->uar_ida.ida); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || + hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) + mutex_destroy(&hr_dev->pgdir_mutex); + return ret; } @@ -1126,7 +1146,7 @@ error_failed_register_device: hr_dev->hw->hw_exit(hr_dev); error_failed_engine_init: - hns_roce_cleanup_bitmap(hr_dev); + hns_roce_teardown_hca(hr_dev); error_failed_setup_hca: hns_roce_cleanup_hem(hr_dev); @@ -1156,7 +1176,7 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev) if (hr_dev->hw->hw_exit) hr_dev->hw->hw_exit(hr_dev); - hns_roce_cleanup_bitmap(hr_dev); + hns_roce_teardown_hca(hr_dev); hns_roce_cleanup_hem(hr_dev); if (hr_dev->cmd_mod) diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 80c050d7d0..846da8c78b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -162,7 +162,7 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, if (mr->type != MR_TYPE_FRMR) ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr); else - ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr); + ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr); if (ret) { dev_err(dev, "failed to write mtpt, ret = %d.\n", ret); goto err_page; @@ -443,6 +443,11 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, struct hns_roce_mtr *mtr = &mr->pbl_mtr; int ret, sg_num = 0; + if (!IS_ALIGNED(*sg_offset, HNS_ROCE_FRMR_ALIGN_SIZE) || + ibmr->page_size < HNS_HW_PAGE_SIZE || + ibmr->page_size > HNS_HW_MAX_PAGE_SIZE) + return sg_num; + mr->npages = 0; mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count, sizeof(dma_addr_t), GFP_KERNEL); @@ -755,7 +760,7 @@ static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) return -ENOMEM; if (mtr->umem) - npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, + npage = hns_roce_get_umem_bufs(pages, page_count, mtr->umem, page_shift); else npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index f35a66325d..1de384ce4d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -410,7 +410,8 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) bankid = get_qp_bankid(hr_qp->qpn); - ida_free(&hr_dev->qp_table.bank[bankid].ida, hr_qp->qpn >> 3); + ida_free(&hr_dev->qp_table.bank[bankid].ida, + hr_qp->qpn / HNS_ROCE_QP_BANK_NUM); mutex_lock(&hr_dev->qp_table.bank_mutex); hr_dev->qp_table.bank[bankid].inuse--; @@ -531,13 +532,15 @@ static unsigned int get_sge_num_from_max_inl_data(bool is_ud_or_gsi, { unsigned int inline_sge; - inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE; + if (!max_inline_data) + return 0; /* * if max_inline_data less than * HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE, * In addition to ud's mode, no need to extend sge. */ + inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE; if (!is_ud_or_gsi && inline_sge <= HNS_ROCE_SGE_IN_WQE) inline_sge = 0; @@ -1117,7 +1120,6 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, } static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, - struct ib_pd *ib_pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, struct hns_roce_qp *hr_qp) @@ -1140,7 +1142,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd); if (ret) { ibdev_err(ibdev, "failed to set QP param, ret = %d.\n", ret); - return ret; + goto err_out; } if (!udata) { @@ -1148,7 +1150,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, if (ret) { ibdev_err(ibdev, "failed to alloc wrid, ret = %d.\n", ret); - return ret; + goto err_out; } } @@ -1219,6 +1221,8 @@ err_qpn: free_qp_buf(hr_dev, hr_qp); err_buf: free_kernel_wrid(hr_qp); +err_out: + mutex_destroy(&hr_qp->mutex); return ret; } @@ -1234,6 +1238,7 @@ void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, free_qp_buf(hr_dev, hr_qp); free_kernel_wrid(hr_qp); free_qp_db(hr_dev, hr_qp, udata); + mutex_destroy(&hr_qp->mutex); } static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type, @@ -1271,7 +1276,6 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr, struct ib_device *ibdev = qp->device; struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); struct hns_roce_qp *hr_qp = to_hr_qp(qp); - struct ib_pd *pd = qp->pd; int ret; ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata); @@ -1286,7 +1290,7 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr, hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; } - ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp); + ret = hns_roce_create_qp_common(hr_dev, init_attr, udata, hr_qp); if (ret) ibdev_err(ibdev, "create QP type 0x%x failed(%d)\n", init_attr->qp_type, ret); @@ -1386,6 +1390,7 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_ib_modify_qp_resp resp = {}; struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); enum ib_qp_state cur_state, new_state; int ret = -EINVAL; @@ -1427,6 +1432,18 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ret = hr_dev->hw->modify_qp(ibqp, attr, attr_mask, cur_state, new_state, udata); + if (ret) + goto out; + + if (udata && udata->outlen) { + resp.tc_mode = hr_qp->tc_mode; + resp.priority = hr_qp->sl; + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); + if (ret) + ibdev_err_ratelimited(&hr_dev->ib_dev, + "failed to copy modify qp resp.\n"); + } out: mutex_unlock(&hr_qp->mutex); @@ -1561,5 +1578,7 @@ void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev) for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) ida_destroy(&hr_dev->qp_table.bank[i].ida); + mutex_destroy(&hr_dev->qp_table.bank_mutex); + mutex_destroy(&hr_dev->qp_table.scc_mutex); kfree(hr_dev->qp_table.idx_table.spare_idx); } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 8f48c6723e..c9b8233f4b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -250,7 +250,7 @@ static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev, hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); } -static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +static int alloc_srq_wrid(struct hns_roce_srq *srq) { srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL); if (!srq->wrid) @@ -297,7 +297,7 @@ static int set_srq_basic_param(struct hns_roce_srq *srq, max_sge = proc_srq_sge(hr_dev, srq, !!udata); if (attr->max_wr > hr_dev->caps.max_srq_wrs || - attr->max_sge > max_sge) { + attr->max_sge > max_sge || !attr->max_sge) { ibdev_err(&hr_dev->ib_dev, "invalid SRQ attr, depth = %u, sge = %u.\n", attr->max_wr, attr->max_sge); @@ -366,7 +366,7 @@ static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, goto err_idx; if (!udata) { - ret = alloc_srq_wrid(hr_dev, srq); + ret = alloc_srq_wrid(srq); if (ret) goto err_wqe_buf; } @@ -518,6 +518,7 @@ err_srq_db: err_srq_buf: free_srq_buf(hr_dev, srq); err_out: + mutex_destroy(&srq->mutex); atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT]); return ret; @@ -532,6 +533,7 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) free_srqn(hr_dev, srq); free_srq_db(hr_dev, srq, udata); free_srq_buf(hr_dev, srq); + mutex_destroy(&srq->mutex); return 0; } diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 1ee7a4e0d8..36bb7e5ce6 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -1985,7 +1985,8 @@ static int irdma_addr_resolve_neigh(struct irdma_device *iwdev, u32 src_ip, __be32 dst_ipaddr = htonl(dst_ip); __be32 src_ipaddr = htonl(src_ip); - rt = ip_route_output(&init_net, dst_ipaddr, src_ipaddr, 0, 0); + rt = ip_route_output(&init_net, dst_ipaddr, src_ipaddr, 0, 0, + RT_SCOPE_UNIVERSE); if (IS_ERR(rt)) { ibdev_dbg(&iwdev->ibdev, "CM: ip_route_output fail\n"); return -EINVAL; diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c index 89fcc09ded..c6a3fd57a1 100644 --- a/drivers/infiniband/hw/mana/cq.c +++ b/drivers/infiniband/hw/mana/cq.c @@ -9,23 +9,23 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq); + struct mana_ib_create_cq_resp resp = {}; + struct mana_ib_ucontext *mana_ucontext; struct ib_device *ibdev = ibcq->device; struct mana_ib_create_cq ucmd = {}; struct mana_ib_dev *mdev; - struct gdma_context *gc; + bool is_rnic_cq; + u32 doorbell; int err; mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); - gc = mdev_to_gc(mdev); - if (udata->inlen < sizeof(ucmd)) - return -EINVAL; + cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors; + cq->cq_handle = INVALID_MANA_HANDLE; - if (attr->comp_vector > gc->max_num_queues) + if (udata->inlen < offsetof(struct mana_ib_create_cq, flags)) return -EINVAL; - cq->comp_vector = attr->comp_vector; - err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); if (err) { ibdev_dbg(ibdev, @@ -33,7 +33,9 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return err; } - if (attr->cqe > mdev->adapter_caps.max_qp_wr) { + is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ); + + if (!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) { ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe); return -EINVAL; } @@ -45,7 +47,41 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return err; } + mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, + ibucontext); + doorbell = mana_ucontext->doorbell; + + if (is_rnic_cq) { + err = mana_ib_gd_create_cq(mdev, cq, doorbell); + if (err) { + ibdev_dbg(ibdev, "Failed to create RNIC cq, %d\n", err); + goto err_destroy_queue; + } + + err = mana_ib_install_cq_cb(mdev, cq); + if (err) { + ibdev_dbg(ibdev, "Failed to install cq callback, %d\n", err); + goto err_destroy_rnic_cq; + } + } + + resp.cqid = cq->queue.id; + err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); + if (err) { + ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err); + goto err_remove_cq_cb; + } + return 0; + +err_remove_cq_cb: + mana_ib_remove_cq_cb(mdev, cq); +err_destroy_rnic_cq: + mana_ib_gd_destroy_cq(mdev, cq); +err_destroy_queue: + mana_ib_destroy_queue(mdev, &cq->queue); + + return err; } int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) @@ -53,15 +89,15 @@ int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq); struct ib_device *ibdev = ibcq->device; struct mana_ib_dev *mdev; - struct gdma_context *gc; mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); - gc = mdev_to_gc(mdev); - if (cq->queue.id != INVALID_QUEUE_ID) { - kfree(gc->cq_table[cq->queue.id]); - gc->cq_table[cq->queue.id] = NULL; - } + mana_ib_remove_cq_cb(mdev, cq); + + /* Ignore return code as there is not much we can do about it. + * The error message is printed inside. + */ + mana_ib_gd_destroy_cq(mdev, cq); mana_ib_destroy_queue(mdev, &cq->queue); @@ -96,3 +132,14 @@ int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq) gc->cq_table[cq->queue.id] = gdma_cq; return 0; } + +void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq) +{ + struct gdma_context *gc = mdev_to_gc(mdev); + + if (cq->queue.id >= gc->max_num_cqs || cq->queue.id == INVALID_QUEUE_ID) + return; + + kfree(gc->cq_table[cq->queue.id]); + gc->cq_table[cq->queue.id] = NULL; +} diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c index 6fa902ee80..7bb7e06392 100644 --- a/drivers/infiniband/hw/mana/device.c +++ b/drivers/infiniband/hw/mana/device.c @@ -5,6 +5,7 @@ #include "mana_ib.h" #include <net/mana/mana_auxiliary.h> +#include <net/addrconf.h> MODULE_DESCRIPTION("Microsoft Azure Network Adapter IB driver"); MODULE_LICENSE("GPL"); @@ -15,6 +16,7 @@ static const struct ib_device_ops mana_ib_dev_ops = { .driver_id = RDMA_DRIVER_MANA, .uverbs_abi_ver = MANA_IB_UVERBS_ABI_VERSION, + .add_gid = mana_ib_gd_add_gid, .alloc_pd = mana_ib_alloc_pd, .alloc_ucontext = mana_ib_alloc_ucontext, .create_cq = mana_ib_create_cq, @@ -23,18 +25,21 @@ static const struct ib_device_ops mana_ib_dev_ops = { .create_wq = mana_ib_create_wq, .dealloc_pd = mana_ib_dealloc_pd, .dealloc_ucontext = mana_ib_dealloc_ucontext, + .del_gid = mana_ib_gd_del_gid, .dereg_mr = mana_ib_dereg_mr, .destroy_cq = mana_ib_destroy_cq, .destroy_qp = mana_ib_destroy_qp, .destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table, .destroy_wq = mana_ib_destroy_wq, .disassociate_ucontext = mana_ib_disassociate_ucontext, + .get_link_layer = mana_ib_get_link_layer, .get_port_immutable = mana_ib_get_port_immutable, .mmap = mana_ib_mmap, .modify_qp = mana_ib_modify_qp, .modify_wq = mana_ib_modify_wq, .query_device = mana_ib_query_device, .query_gid = mana_ib_query_gid, + .query_pkey = mana_ib_query_pkey, .query_port = mana_ib_query_port, .reg_user_mr = mana_ib_reg_user_mr, @@ -51,8 +56,10 @@ static int mana_ib_probe(struct auxiliary_device *adev, { struct mana_adev *madev = container_of(adev, struct mana_adev, adev); struct gdma_dev *mdev = madev->mdev; + struct net_device *ndev; struct mana_context *mc; struct mana_ib_dev *dev; + u8 mac_addr[ETH_ALEN]; int ret; mc = mdev->driver_data; @@ -74,9 +81,26 @@ static int mana_ib_probe(struct auxiliary_device *adev, * num_comp_vectors needs to set to the max MSIX index * when interrupts and event queues are implemented */ - dev->ib_dev.num_comp_vectors = 1; + dev->ib_dev.num_comp_vectors = mdev->gdma_context->max_num_queues; dev->ib_dev.dev.parent = mdev->gdma_context->dev; + rcu_read_lock(); /* required to get primary netdev */ + ndev = mana_get_primary_netdev_rcu(mc, 0); + if (!ndev) { + rcu_read_unlock(); + ret = -ENODEV; + ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1"); + goto free_ib_device; + } + ether_addr_copy(mac_addr, ndev->dev_addr); + addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr); + ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1); + rcu_read_unlock(); + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret); + goto free_ib_device; + } + ret = mana_gd_register_device(&mdev->gdma_context->mana_ib); if (ret) { ibdev_err(&dev->ib_dev, "Failed to register device, ret %d", @@ -92,15 +116,36 @@ static int mana_ib_probe(struct auxiliary_device *adev, goto deregister_device; } + ret = mana_ib_create_eqs(dev); + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret); + goto deregister_device; + } + + ret = mana_ib_gd_create_rnic_adapter(dev); + if (ret) + goto destroy_eqs; + + ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr); + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d", + ret); + goto destroy_rnic; + } + ret = ib_register_device(&dev->ib_dev, "mana_%d", mdev->gdma_context->dev); if (ret) - goto deregister_device; + goto destroy_rnic; dev_set_drvdata(&adev->dev, dev); return 0; +destroy_rnic: + mana_ib_gd_destroy_rnic_adapter(dev); +destroy_eqs: + mana_ib_destroy_eqs(dev); deregister_device: mana_gd_deregister_device(dev->gdma_dev); free_ib_device: @@ -113,9 +158,9 @@ static void mana_ib_remove(struct auxiliary_device *adev) struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev); ib_unregister_device(&dev->ib_dev); - + mana_ib_gd_destroy_rnic_adapter(dev); + mana_ib_destroy_eqs(dev); mana_gd_deregister_device(dev->gdma_dev); - ib_dealloc_device(&dev->ib_dev); } diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index 4524c6b807..2a41135764 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -261,9 +261,7 @@ int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size, } queue->umem = umem; - ibdev_dbg(&mdev->ib_dev, - "create_dma_region ret %d gdma_region 0x%llx\n", - err, queue->gdma_region); + ibdev_dbg(&mdev->ib_dev, "created dma region 0x%llx\n", queue->gdma_region); return 0; free_umem: @@ -527,11 +525,18 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num, struct ib_port_immutable *immutable) { - /* - * This version only support RAW_PACKET - * other values need to be filled for other types - */ + struct ib_port_attr attr; + int err; + + err = ib_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET; + if (port_num == 1) + immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; return 0; } @@ -557,7 +562,42 @@ int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, int mana_ib_query_port(struct ib_device *ibdev, u32 port, struct ib_port_attr *props) { - /* This version doesn't return port properties */ + struct net_device *ndev = mana_ib_get_netdev(ibdev, port); + + if (!ndev) + return -EINVAL; + + memset(props, 0, sizeof(*props)); + props->max_mtu = IB_MTU_4096; + props->active_mtu = ib_mtu_int_to_enum(ndev->mtu); + + if (netif_carrier_ok(ndev) && netif_running(ndev)) { + props->state = IB_PORT_ACTIVE; + props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; + } else { + props->state = IB_PORT_DOWN; + props->phys_state = IB_PORT_PHYS_STATE_DISABLED; + } + + props->active_width = IB_WIDTH_4X; + props->active_speed = IB_SPEED_EDR; + props->pkey_tbl_len = 1; + if (port == 1) + props->gid_tbl_len = 16; + + return 0; +} + +enum rdma_link_layer mana_ib_get_link_layer(struct ib_device *device, u32 port_num) +{ + return IB_LINK_LAYER_ETHERNET; +} + +int mana_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey) +{ + if (index != 0) + return -EINVAL; + *pkey = IB_DEFAULT_PKEY_FULL; return 0; } @@ -613,3 +653,238 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev) return 0; } + +int mana_ib_create_eqs(struct mana_ib_dev *mdev) +{ + struct gdma_context *gc = mdev_to_gc(mdev); + struct gdma_queue_spec spec = {}; + int err, i; + + spec.type = GDMA_EQ; + spec.monitor_avl_buf = false; + spec.queue_size = EQ_SIZE; + spec.eq.callback = NULL; + spec.eq.context = mdev; + spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE; + spec.eq.msix_index = 0; + + err = mana_gd_create_mana_eq(&gc->mana_ib, &spec, &mdev->fatal_err_eq); + if (err) + return err; + + mdev->eqs = kcalloc(mdev->ib_dev.num_comp_vectors, sizeof(struct gdma_queue *), + GFP_KERNEL); + if (!mdev->eqs) { + err = -ENOMEM; + goto destroy_fatal_eq; + } + + for (i = 0; i < mdev->ib_dev.num_comp_vectors; i++) { + spec.eq.msix_index = (i + 1) % gc->num_msix_usable; + err = mana_gd_create_mana_eq(mdev->gdma_dev, &spec, &mdev->eqs[i]); + if (err) + goto destroy_eqs; + } + + return 0; + +destroy_eqs: + while (i-- > 0) + mana_gd_destroy_queue(gc, mdev->eqs[i]); + kfree(mdev->eqs); +destroy_fatal_eq: + mana_gd_destroy_queue(gc, mdev->fatal_err_eq); + return err; +} + +void mana_ib_destroy_eqs(struct mana_ib_dev *mdev) +{ + struct gdma_context *gc = mdev_to_gc(mdev); + int i; + + mana_gd_destroy_queue(gc, mdev->fatal_err_eq); + + for (i = 0; i < mdev->ib_dev.num_comp_vectors; i++) + mana_gd_destroy_queue(gc, mdev->eqs[i]); + + kfree(mdev->eqs); +} + +int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev) +{ + struct mana_rnic_create_adapter_resp resp = {}; + struct mana_rnic_create_adapter_req req = {}; + struct gdma_context *gc = mdev_to_gc(mdev); + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_ADAPTER, sizeof(req), sizeof(resp)); + req.hdr.req.msg_version = GDMA_MESSAGE_V2; + req.hdr.dev_id = gc->mana_ib.dev_id; + req.notify_eq_id = mdev->fatal_err_eq->id; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create RNIC adapter err %d", err); + return err; + } + mdev->adapter_handle = resp.adapter; + + return 0; +} + +int mana_ib_gd_destroy_rnic_adapter(struct mana_ib_dev *mdev) +{ + struct mana_rnic_destroy_adapter_resp resp = {}; + struct mana_rnic_destroy_adapter_req req = {}; + struct gdma_context *gc; + int err; + + gc = mdev_to_gc(mdev); + mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_ADAPTER, sizeof(req), sizeof(resp)); + req.hdr.dev_id = gc->mana_ib.dev_id; + req.adapter = mdev->adapter_handle; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to destroy RNIC adapter err %d", err); + return err; + } + + return 0; +} + +int mana_ib_gd_add_gid(const struct ib_gid_attr *attr, void **context) +{ + struct mana_ib_dev *mdev = container_of(attr->device, struct mana_ib_dev, ib_dev); + enum rdma_network_type ntype = rdma_gid_attr_network_type(attr); + struct mana_rnic_config_addr_resp resp = {}; + struct gdma_context *gc = mdev_to_gc(mdev); + struct mana_rnic_config_addr_req req = {}; + int err; + + if (ntype != RDMA_NETWORK_IPV4 && ntype != RDMA_NETWORK_IPV6) { + ibdev_dbg(&mdev->ib_dev, "Unsupported rdma network type %d", ntype); + return -EINVAL; + } + + mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_IP_ADDR, sizeof(req), sizeof(resp)); + req.hdr.dev_id = gc->mana_ib.dev_id; + req.adapter = mdev->adapter_handle; + req.op = ADDR_OP_ADD; + req.sgid_type = (ntype == RDMA_NETWORK_IPV6) ? SGID_TYPE_IPV6 : SGID_TYPE_IPV4; + copy_in_reverse(req.ip_addr, attr->gid.raw, sizeof(union ib_gid)); + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to config IP addr err %d\n", err); + return err; + } + + return 0; +} + +int mana_ib_gd_del_gid(const struct ib_gid_attr *attr, void **context) +{ + struct mana_ib_dev *mdev = container_of(attr->device, struct mana_ib_dev, ib_dev); + enum rdma_network_type ntype = rdma_gid_attr_network_type(attr); + struct mana_rnic_config_addr_resp resp = {}; + struct gdma_context *gc = mdev_to_gc(mdev); + struct mana_rnic_config_addr_req req = {}; + int err; + + if (ntype != RDMA_NETWORK_IPV4 && ntype != RDMA_NETWORK_IPV6) { + ibdev_dbg(&mdev->ib_dev, "Unsupported rdma network type %d", ntype); + return -EINVAL; + } + + mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_IP_ADDR, sizeof(req), sizeof(resp)); + req.hdr.dev_id = gc->mana_ib.dev_id; + req.adapter = mdev->adapter_handle; + req.op = ADDR_OP_REMOVE; + req.sgid_type = (ntype == RDMA_NETWORK_IPV6) ? SGID_TYPE_IPV6 : SGID_TYPE_IPV4; + copy_in_reverse(req.ip_addr, attr->gid.raw, sizeof(union ib_gid)); + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to config IP addr err %d\n", err); + return err; + } + + return 0; +} + +int mana_ib_gd_config_mac(struct mana_ib_dev *mdev, enum mana_ib_addr_op op, u8 *mac) +{ + struct mana_rnic_config_mac_addr_resp resp = {}; + struct mana_rnic_config_mac_addr_req req = {}; + struct gdma_context *gc = mdev_to_gc(mdev); + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_MAC_ADDR, sizeof(req), sizeof(resp)); + req.hdr.dev_id = gc->mana_ib.dev_id; + req.adapter = mdev->adapter_handle; + req.op = op; + copy_in_reverse(req.mac_addr, mac, ETH_ALEN); + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to config Mac addr err %d", err); + return err; + } + + return 0; +} + +int mana_ib_gd_create_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq, u32 doorbell) +{ + struct gdma_context *gc = mdev_to_gc(mdev); + struct mana_rnic_create_cq_resp resp = {}; + struct mana_rnic_create_cq_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_CQ, sizeof(req), sizeof(resp)); + req.hdr.dev_id = gc->mana_ib.dev_id; + req.adapter = mdev->adapter_handle; + req.gdma_region = cq->queue.gdma_region; + req.eq_id = mdev->eqs[cq->comp_vector]->id; + req.doorbell_page = doorbell; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create cq err %d", err); + return err; + } + + cq->queue.id = resp.cq_id; + cq->cq_handle = resp.cq_handle; + /* The GDMA region is now owned by the CQ handle */ + cq->queue.gdma_region = GDMA_INVALID_DMA_REGION; + + return 0; +} + +int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq) +{ + struct gdma_context *gc = mdev_to_gc(mdev); + struct mana_rnic_destroy_cq_resp resp = {}; + struct mana_rnic_destroy_cq_req req = {}; + int err; + + if (cq->cq_handle == INVALID_MANA_HANDLE) + return 0; + + mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_CQ, sizeof(req), sizeof(resp)); + req.hdr.dev_id = gc->mana_ib.dev_id; + req.adapter = mdev->adapter_handle; + req.cq_handle = cq->cq_handle; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to destroy cq err %d", err); + return err; + } + + return 0; +} diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h index 6acb5c281c..68c3b4f0fa 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -54,16 +54,17 @@ struct mana_ib_queue { struct mana_ib_dev { struct ib_device ib_dev; struct gdma_dev *gdma_dev; + mana_handle_t adapter_handle; + struct gdma_queue *fatal_err_eq; + struct gdma_queue **eqs; struct mana_ib_adapter_caps adapter_caps; }; struct mana_ib_wq { struct ib_wq ibwq; - struct ib_umem *umem; + struct mana_ib_queue queue; int wqe; u32 wq_buf_size; - u64 gdma_region; - u64 id; mana_handle_t rx_object; }; @@ -91,17 +92,14 @@ struct mana_ib_cq { struct mana_ib_queue queue; int cqe; u32 comp_vector; + mana_handle_t cq_handle; }; struct mana_ib_qp { struct ib_qp ibqp; - /* Work queue info */ - struct ib_umem *sq_umem; - int sqe; - u64 sq_gdma_region; - u64 sq_id; - mana_handle_t tx_object; + mana_handle_t qp_handle; + struct mana_ib_queue raw_sq; /* The port on the IB device, starting with 1 */ u32 port; @@ -118,6 +116,12 @@ struct mana_ib_rwq_ind_table { enum mana_ib_command_code { MANA_IB_GET_ADAPTER_CAP = 0x30001, + MANA_IB_CREATE_ADAPTER = 0x30002, + MANA_IB_DESTROY_ADAPTER = 0x30003, + MANA_IB_CONFIG_IP_ADDR = 0x30004, + MANA_IB_CONFIG_MAC_ADDR = 0x30005, + MANA_IB_CREATE_CQ = 0x30008, + MANA_IB_DESTROY_CQ = 0x30009, }; struct mana_ib_query_adapter_caps_req { @@ -146,6 +150,86 @@ struct mana_ib_query_adapter_caps_resp { u32 max_inline_data_size; }; /* HW Data */ +struct mana_rnic_create_adapter_req { + struct gdma_req_hdr hdr; + u32 notify_eq_id; + u32 reserved; + u64 feature_flags; +}; /*HW Data */ + +struct mana_rnic_create_adapter_resp { + struct gdma_resp_hdr hdr; + mana_handle_t adapter; +}; /* HW Data */ + +struct mana_rnic_destroy_adapter_req { + struct gdma_req_hdr hdr; + mana_handle_t adapter; +}; /*HW Data */ + +struct mana_rnic_destroy_adapter_resp { + struct gdma_resp_hdr hdr; +}; /* HW Data */ + +enum mana_ib_addr_op { + ADDR_OP_ADD = 1, + ADDR_OP_REMOVE = 2, +}; + +enum sgid_entry_type { + SGID_TYPE_IPV4 = 1, + SGID_TYPE_IPV6 = 2, +}; + +struct mana_rnic_config_addr_req { + struct gdma_req_hdr hdr; + mana_handle_t adapter; + enum mana_ib_addr_op op; + enum sgid_entry_type sgid_type; + u8 ip_addr[16]; +}; /* HW Data */ + +struct mana_rnic_config_addr_resp { + struct gdma_resp_hdr hdr; +}; /* HW Data */ + +struct mana_rnic_config_mac_addr_req { + struct gdma_req_hdr hdr; + mana_handle_t adapter; + enum mana_ib_addr_op op; + u8 mac_addr[ETH_ALEN]; + u8 reserved[6]; +}; /* HW Data */ + +struct mana_rnic_config_mac_addr_resp { + struct gdma_resp_hdr hdr; +}; /* HW Data */ + +struct mana_rnic_create_cq_req { + struct gdma_req_hdr hdr; + mana_handle_t adapter; + u64 gdma_region; + u32 eq_id; + u32 doorbell_page; +}; /* HW Data */ + +struct mana_rnic_create_cq_resp { + struct gdma_resp_hdr hdr; + mana_handle_t cq_handle; + u32 cq_id; + u32 reserved; +}; /* HW Data */ + +struct mana_rnic_destroy_cq_req { + struct gdma_req_hdr hdr; + mana_handle_t adapter; + mana_handle_t cq_handle; +}; /* HW Data */ + +struct mana_rnic_destroy_cq_resp { + struct gdma_resp_hdr hdr; +}; /* HW Data */ + static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev) { return mdev->gdma_dev->gdma_context; @@ -162,7 +246,16 @@ static inline struct net_device *mana_ib_get_netdev(struct ib_device *ibdev, u32 return mc->ports[port - 1]; } +static inline void copy_in_reverse(u8 *dst, const u8 *src, u32 size) +{ + u32 i; + + for (i = 0; i < size; i++) + dst[size - 1 - i] = src[i]; +} + int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq); +void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq); int mana_ib_create_zero_offset_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem, mana_handle_t *gdma_region); @@ -239,4 +332,26 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index, void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext); int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *mdev); + +int mana_ib_create_eqs(struct mana_ib_dev *mdev); + +void mana_ib_destroy_eqs(struct mana_ib_dev *mdev); + +int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev); + +int mana_ib_gd_destroy_rnic_adapter(struct mana_ib_dev *mdev); + +int mana_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey); + +enum rdma_link_layer mana_ib_get_link_layer(struct ib_device *device, u32 port_num); + +int mana_ib_gd_add_gid(const struct ib_gid_attr *attr, void **context); + +int mana_ib_gd_del_gid(const struct ib_gid_attr *attr, void **context); + +int mana_ib_gd_config_mac(struct mana_ib_dev *mdev, enum mana_ib_addr_op op, u8 *mac); + +int mana_ib_gd_create_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq, u32 doorbell); + +int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq); #endif diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c index 13a49d8fd4..887b09dd86 100644 --- a/drivers/infiniband/hw/mana/mr.c +++ b/drivers/infiniband/hw/mana/mr.c @@ -136,7 +136,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length, } ibdev_dbg(ibdev, - "create_dma_region ret %d gdma_region %llx\n", err, + "created dma region for user-mr 0x%llx\n", dma_region_handle); mr_params.pd_handle = pd->pd_handle; diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index d7485ee6a6..ba13c5abf8 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -15,15 +15,13 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, struct mana_port_context *mpc = netdev_priv(ndev); struct mana_cfg_rx_steer_req_v2 *req; struct mana_cfg_rx_steer_resp resp = {}; - mana_handle_t *req_indir_tab; struct gdma_context *gc; u32 req_buf_size; int i, err; gc = mdev_to_gc(dev); - req_buf_size = - sizeof(*req) + sizeof(mana_handle_t) * MANA_INDIRECT_TABLE_SIZE; + req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_SIZE); req = kzalloc(req_buf_size, GFP_KERNEL); if (!req) return -ENOMEM; @@ -44,20 +42,20 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, req->rss_enable = true; req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE; - req->indir_tab_offset = sizeof(*req); + req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2, + indir_tab); req->update_indir_tab = true; req->cqe_coalescing_enable = 1; - req_indir_tab = (mana_handle_t *)(req + 1); /* The ind table passed to the hardware must have * MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb * ind_table to MANA_INDIRECT_TABLE_SIZE if required */ ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size); for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { - req_indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)]; + req->indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)]; ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i, - req_indir_tab[i]); + req->indir_tab[i]); } req->update_hashkey = true; @@ -97,11 +95,9 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); struct mana_ib_dev *mdev = container_of(pd->device, struct mana_ib_dev, ib_dev); - struct gdma_context *gc = mdev_to_gc(mdev); struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl; struct mana_ib_create_qp_rss_resp resp = {}; struct mana_ib_create_qp_rss ucmd = {}; - struct gdma_queue **gdma_cq_allocated; mana_handle_t *mana_ind_table; struct mana_port_context *mpc; unsigned int ind_tbl_size; @@ -175,13 +171,6 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, goto fail; } - gdma_cq_allocated = kcalloc(ind_tbl_size, sizeof(*gdma_cq_allocated), - GFP_KERNEL); - if (!gdma_cq_allocated) { - ret = -ENOMEM; - goto fail; - } - qp->port = port; for (i = 0; i < ind_tbl_size; i++) { @@ -194,13 +183,13 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, ibcq = ibwq->cq; cq = container_of(ibcq, struct mana_ib_cq, ibcq); - wq_spec.gdma_region = wq->gdma_region; + wq_spec.gdma_region = wq->queue.gdma_region; wq_spec.queue_size = wq->wq_buf_size; cq_spec.gdma_region = cq->queue.gdma_region; cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE; cq_spec.modr_ctx_id = 0; - eq = &mpc->ac->eqs[cq->comp_vector % gc->max_num_queues]; + eq = &mpc->ac->eqs[cq->comp_vector]; cq_spec.attached_eq = eq->eq->id; ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ, @@ -212,18 +201,18 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, } /* The GDMA regions are now owned by the WQ object */ - wq->gdma_region = GDMA_INVALID_DMA_REGION; + wq->queue.gdma_region = GDMA_INVALID_DMA_REGION; cq->queue.gdma_region = GDMA_INVALID_DMA_REGION; - wq->id = wq_spec.queue_index; + wq->queue.id = wq_spec.queue_index; cq->queue.id = cq_spec.queue_index; ibdev_dbg(&mdev->ib_dev, - "ret %d rx_object 0x%llx wq id %llu cq id %llu\n", - ret, wq->rx_object, wq->id, cq->queue.id); + "rx_object 0x%llx wq id %llu cq id %llu\n", + wq->rx_object, wq->queue.id, cq->queue.id); resp.entries[i].cqid = cq->queue.id; - resp.entries[i].wqid = wq->id; + resp.entries[i].wqid = wq->queue.id; mana_ind_table[i] = wq->rx_object; @@ -231,8 +220,6 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, ret = mana_ib_install_cq_cb(mdev, cq); if (ret) goto fail; - - gdma_cq_allocated[i] = gc->cq_table[cq->queue.id]; } resp.num_entries = i; @@ -252,7 +239,6 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, goto fail; } - kfree(gdma_cq_allocated); kfree(mana_ind_table); return 0; @@ -264,13 +250,10 @@ fail: wq = container_of(ibwq, struct mana_ib_wq, ibwq); cq = container_of(ibcq, struct mana_ib_cq, ibcq); - gc->cq_table[cq->queue.id] = NULL; - kfree(gdma_cq_allocated[i]); - + mana_ib_remove_cq_cb(mdev, cq); mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object); } - kfree(gdma_cq_allocated); kfree(mana_ind_table); return ret; @@ -289,15 +272,12 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, struct mana_ib_ucontext *mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, ibucontext); - struct gdma_context *gc = mdev_to_gc(mdev); struct mana_ib_create_qp_resp resp = {}; struct mana_ib_create_qp ucmd = {}; - struct gdma_queue *gdma_cq = NULL; struct mana_obj_spec wq_spec = {}; struct mana_obj_spec cq_spec = {}; struct mana_port_context *mpc; struct net_device *ndev; - struct ib_umem *umem; struct mana_eq *eq; int eq_vec; u32 port; @@ -346,55 +326,38 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, ibdev_dbg(&mdev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n", ucmd.sq_buf_addr, ucmd.port); - umem = ib_umem_get(ibpd->device, ucmd.sq_buf_addr, ucmd.sq_buf_size, - IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(umem)) { - err = PTR_ERR(umem); - ibdev_dbg(&mdev->ib_dev, - "Failed to get umem for create qp-raw, err %d\n", - err); - goto err_free_vport; - } - qp->sq_umem = umem; - - err = mana_ib_create_zero_offset_dma_region(mdev, qp->sq_umem, - &qp->sq_gdma_region); + err = mana_ib_create_queue(mdev, ucmd.sq_buf_addr, ucmd.sq_buf_size, &qp->raw_sq); if (err) { ibdev_dbg(&mdev->ib_dev, - "Failed to create dma region for create qp-raw, %d\n", - err); - goto err_release_umem; + "Failed to create queue for create qp-raw, err %d\n", err); + goto err_free_vport; } - ibdev_dbg(&mdev->ib_dev, - "create_dma_region ret %d gdma_region 0x%llx\n", - err, qp->sq_gdma_region); - /* Create a WQ on the same port handle used by the Ethernet */ - wq_spec.gdma_region = qp->sq_gdma_region; + wq_spec.gdma_region = qp->raw_sq.gdma_region; wq_spec.queue_size = ucmd.sq_buf_size; cq_spec.gdma_region = send_cq->queue.gdma_region; cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE; cq_spec.modr_ctx_id = 0; - eq_vec = send_cq->comp_vector % gc->max_num_queues; + eq_vec = send_cq->comp_vector; eq = &mpc->ac->eqs[eq_vec]; cq_spec.attached_eq = eq->eq->id; err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ, &wq_spec, - &cq_spec, &qp->tx_object); + &cq_spec, &qp->qp_handle); if (err) { ibdev_dbg(&mdev->ib_dev, "Failed to create wq for create raw-qp, err %d\n", err); - goto err_destroy_dma_region; + goto err_destroy_queue; } /* The GDMA regions are now owned by the WQ object */ - qp->sq_gdma_region = GDMA_INVALID_DMA_REGION; + qp->raw_sq.gdma_region = GDMA_INVALID_DMA_REGION; send_cq->queue.gdma_region = GDMA_INVALID_DMA_REGION; - qp->sq_id = wq_spec.queue_index; + qp->raw_sq.id = wq_spec.queue_index; send_cq->queue.id = cq_spec.queue_index; /* Create CQ table entry */ @@ -403,10 +366,10 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, goto err_destroy_wq_obj; ibdev_dbg(&mdev->ib_dev, - "ret %d qp->tx_object 0x%llx sq id %llu cq id %llu\n", err, - qp->tx_object, qp->sq_id, send_cq->queue.id); + "qp->qp_handle 0x%llx sq id %llu cq id %llu\n", + qp->qp_handle, qp->raw_sq.id, send_cq->queue.id); - resp.sqid = qp->sq_id; + resp.sqid = qp->raw_sq.id; resp.cqid = send_cq->queue.id; resp.tx_vp_offset = pd->tx_vp_offset; @@ -415,23 +378,19 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, ibdev_dbg(&mdev->ib_dev, "Failed copy udata for create qp-raw, %d\n", err); - goto err_release_gdma_cq; + goto err_remove_cq_cb; } return 0; -err_release_gdma_cq: - kfree(gdma_cq); - gc->cq_table[send_cq->queue.id] = NULL; +err_remove_cq_cb: + mana_ib_remove_cq_cb(mdev, send_cq); err_destroy_wq_obj: - mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object); + mana_destroy_wq_obj(mpc, GDMA_SQ, qp->qp_handle); -err_destroy_dma_region: - mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region); - -err_release_umem: - ib_umem_release(umem); +err_destroy_queue: + mana_ib_destroy_queue(mdev, &qp->raw_sq); err_free_vport: mana_ib_uncfg_vport(mdev, pd, port); @@ -505,12 +464,9 @@ static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata) mpc = netdev_priv(ndev); pd = container_of(ibpd, struct mana_ib_pd, ibpd); - mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object); + mana_destroy_wq_obj(mpc, GDMA_SQ, qp->qp_handle); - if (qp->sq_umem) { - mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region); - ib_umem_release(qp->sq_umem); - } + mana_ib_destroy_queue(mdev, &qp->raw_sq); mana_ib_uncfg_vport(mdev, pd, qp->port); diff --git a/drivers/infiniband/hw/mana/wq.c b/drivers/infiniband/hw/mana/wq.c index 7c9c699625..f959f4b924 100644 --- a/drivers/infiniband/hw/mana/wq.c +++ b/drivers/infiniband/hw/mana/wq.c @@ -13,7 +13,6 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd, container_of(pd->device, struct mana_ib_dev, ib_dev); struct mana_ib_create_wq ucmd = {}; struct mana_ib_wq *wq; - struct ib_umem *umem; int err; if (udata->inlen < sizeof(ucmd)) @@ -32,39 +31,18 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd, ibdev_dbg(&mdev->ib_dev, "ucmd wq_buf_addr 0x%llx\n", ucmd.wq_buf_addr); - umem = ib_umem_get(pd->device, ucmd.wq_buf_addr, ucmd.wq_buf_size, - IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(umem)) { - err = PTR_ERR(umem); + err = mana_ib_create_queue(mdev, ucmd.wq_buf_addr, ucmd.wq_buf_size, &wq->queue); + if (err) { ibdev_dbg(&mdev->ib_dev, - "Failed to get umem for create wq, err %d\n", err); + "Failed to create queue for create wq, %d\n", err); goto err_free_wq; } - wq->umem = umem; wq->wqe = init_attr->max_wr; wq->wq_buf_size = ucmd.wq_buf_size; wq->rx_object = INVALID_MANA_HANDLE; - - err = mana_ib_create_zero_offset_dma_region(mdev, wq->umem, &wq->gdma_region); - if (err) { - ibdev_dbg(&mdev->ib_dev, - "Failed to create dma region for create wq, %d\n", - err); - goto err_release_umem; - } - - ibdev_dbg(&mdev->ib_dev, - "create_dma_region ret %d gdma_region 0x%llx\n", - err, wq->gdma_region); - - /* WQ ID is returned at wq_create time, doesn't know the value yet */ - return &wq->ibwq; -err_release_umem: - ib_umem_release(umem); - err_free_wq: kfree(wq); @@ -86,8 +64,7 @@ int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) mdev = container_of(ib_dev, struct mana_ib_dev, ib_dev); - mana_ib_gd_destroy_dma_region(mdev, wq->gdma_region); - ib_umem_release(wq->umem); + mana_ib_destroy_queue(mdev, &wq->queue); kfree(wq); diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 111fa88a3b..9a439569ff 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -829,7 +829,7 @@ void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev) int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev) { - char alias_wq_name[15]; + char alias_wq_name[22]; int ret = 0; int i, j; union ib_gid gid; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index a37cfac5e2..dc9cf45d2d 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -2158,7 +2158,7 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, struct mlx4_ib_demux_ctx *ctx, int port) { - char name[12]; + char name[21]; int ret = 0; int i; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 9fb8a54423..43660c831b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -264,8 +264,7 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, */ read_lock(&ibdev->port[port_num - 1].roce.netdev_lock); ndev = ibdev->port[port_num - 1].roce.netdev; - if (ndev) - dev_hold(ndev); + dev_hold(ndev); read_unlock(&ibdev->port[port_num - 1].roce.netdev_lock); out: diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f255a12e26..f9abdca349 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -115,6 +115,19 @@ unsigned long __mlx5_umem_find_best_quantized_pgoff( __mlx5_bit_sz(typ, page_offset_fld), 0, scale, \ page_offset_quantized) +static inline unsigned long +mlx5_umem_dmabuf_find_best_pgsz(struct ib_umem_dmabuf *umem_dmabuf) +{ + /* + * mkeys used for dmabuf are fixed at PAGE_SIZE because we must be able + * to hold any sgl after a move operation. Ideally the mkc page size + * could be changed at runtime to be optimal, but right now the driver + * cannot do that. + */ + return ib_umem_find_best_pgsz(&umem_dmabuf->umem, PAGE_SIZE, + umem_dmabuf->umem.iova); +} + enum { MLX5_IB_MMAP_OFFSET_START = 9, MLX5_IB_MMAP_OFFSET_END = 255, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 4a04cbc5b7..a524181f34 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -705,10 +705,8 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt, return err; } - page_size = mlx5_umem_find_best_pgsz(&umem_dmabuf->umem, mkc, - log_page_size, 0, - umem_dmabuf->umem.iova); - if (unlikely(page_size < PAGE_SIZE)) { + page_size = mlx5_umem_dmabuf_find_best_pgsz(umem_dmabuf); + if (!page_size) { ib_umem_dmabuf_unmap_pages(umem_dmabuf); err = -EINVAL; } else { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8115ab1071..e2164f8136 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3097,7 +3097,6 @@ static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, switch (qp->type) { case MLX5_IB_QPT_DCT: err = create_dct(dev, pd, qp, params); - rdma_restrack_no_track(&qp->ibqp.res); break; case MLX5_IB_QPT_DCI: err = create_dci(dev, pd, qp, params); @@ -3109,9 +3108,9 @@ static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, err = mlx5_ib_create_gsi(pd, qp, params->attr); break; case MLX5_IB_QPT_HW_GSI: - case MLX5_IB_QPT_REG_UMR: rdma_restrack_no_track(&qp->ibqp.res); fallthrough; + case MLX5_IB_QPT_REG_UMR: default: if (params->udata) err = create_user_qp(dev, pd, qp, params); diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index 4ac429e720..affcf8fe94 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -156,6 +156,34 @@ static int fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ibcq) return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_CQ, cq->mcq.cqn); } +static int fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp) +{ + struct mlx5_ib_qp *qp = to_mqp(ibqp); + int ret; + + if (qp->type < IB_QPT_DRIVER) + return 0; + + switch (qp->type) { + case MLX5_IB_QPT_REG_UMR: + ret = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUBTYPE, + "REG_UMR"); + break; + case MLX5_IB_QPT_DCT: + ret = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUBTYPE, "DCT"); + break; + case MLX5_IB_QPT_DCI: + ret = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUBTYPE, "DCI"); + break; + default: + return 0; + } + if (ret) + return ret; + + return nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, IB_QPT_DRIVER); +} + static int fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); @@ -168,6 +196,7 @@ static const struct ib_device_ops restrack_ops = { .fill_res_cq_entry_raw = fill_res_cq_entry_raw, .fill_res_mr_entry = fill_res_mr_entry, .fill_res_mr_entry_raw = fill_res_mr_entry_raw, + .fill_res_qp_entry = fill_res_qp_entry, .fill_res_qp_entry_raw = fill_res_qp_entry_raw, .fill_stat_mr_entry = fill_stat_mr_entry, }; diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index a51fc68549..259303b990 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -447,7 +447,8 @@ qedr_addr4_resolve(struct qedr_dev *dev, struct rtable *rt = NULL; int rc = 0; - rt = ip_route_output(&init_net, dst_ip, src_ip, 0, 0); + rt = ip_route_output(&init_net, dst_ip, src_ip, 0, 0, + RT_SCOPE_UNIVERSE); if (IS_ERR(rt)) { DP_ERR(dev, "ip_route_output returned error\n"); return -EINVAL; diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 26c615772b..8ee4edd788 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1359,7 +1359,6 @@ static inline u32 qib_get_rcvhdrtail(const struct qib_ctxtdata *rcd) * sysfs interface. */ -extern const char ib_qib_version[]; extern const struct attribute_group qib_attr_group; extern const struct attribute_group *qib_attr_port_groups[]; diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index bf3fa12fe9..4fcbef99e4 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -44,12 +44,6 @@ #include "qib.h" -/* - * The size has to be longer than this string, so we can append - * board/chip information to it in the init code. - */ -const char ib_qib_version[] = QIB_DRIVER_VERSION "\n"; - DEFINE_MUTEX(qib_mutex); /* general driver use */ unsigned qib_ibmtu; diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 6af57067c3..78dfe98ebc 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -3281,7 +3281,7 @@ static int qib_7220_intr_fallback(struct qib_devdata *dd) qib_free_irq(dd); dd->msi_lo = 0; - if (pci_alloc_irq_vectors(dd->pcidev, 1, 1, PCI_IRQ_LEGACY) < 0) + if (pci_alloc_irq_vectors(dd->pcidev, 1, 1, PCI_IRQ_INTX) < 0) qib_dev_err(dd, "Failed to enable INTx\n"); qib_setup_7220_interrupt(dd); return 1; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index f93906d8fc..9db29916e3 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -3471,8 +3471,7 @@ try_intx: pci_irq_vector(dd->pcidev, msixnum), ret); qib_7322_free_irq(dd); - pci_alloc_irq_vectors(dd->pcidev, 1, 1, - PCI_IRQ_LEGACY); + pci_alloc_irq_vectors(dd->pcidev, 1, 1, PCI_IRQ_INTX); goto try_intx; } dd->cspec->msix_entries[msixnum].arg = arg; @@ -5143,7 +5142,7 @@ static int qib_7322_intr_fallback(struct qib_devdata *dd) qib_devinfo(dd->pcidev, "MSIx interrupt not detected, trying INTx interrupts\n"); qib_7322_free_irq(dd); - if (pci_alloc_irq_vectors(dd->pcidev, 1, 1, PCI_IRQ_LEGACY) < 0) + if (pci_alloc_irq_vectors(dd->pcidev, 1, 1, PCI_IRQ_INTX) < 0) qib_dev_err(dd, "Failed to enable INTx\n"); qib_setup_7322_interrupt(dd, 0); return 1; diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 47bf64ace0..58c1d62d34 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -210,7 +210,7 @@ int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent) } if (dd->flags & QIB_HAS_INTX) - flags |= PCI_IRQ_LEGACY; + flags |= PCI_IRQ_INTX; maxvec = (nent && *nent) ? *nent : 1; nvec = pci_alloc_irq_vectors(dd->pcidev, 1, maxvec, flags); if (nvec < 0) diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 41c272980f..53ec7510e4 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -585,13 +585,7 @@ static ssize_t hca_type_show(struct device *device, static DEVICE_ATTR_RO(hca_type); static DEVICE_ATTR(board_id, 0444, hca_type_show, NULL); -static ssize_t version_show(struct device *device, - struct device_attribute *attr, char *buf) -{ - /* The string printed here is already newline-terminated. */ - return sysfs_emit(buf, "%s", (char *)ib_qib_version); -} -static DEVICE_ATTR_RO(version); +static DEVICE_STRING_ATTR_RO(version, 0444, QIB_DRIVER_VERSION); static ssize_t boardversion_show(struct device *device, struct device_attribute *attr, char *buf) @@ -721,7 +715,7 @@ static struct attribute *qib_attributes[] = { &dev_attr_hw_rev.attr, &dev_attr_hca_type.attr, &dev_attr_board_id.attr, - &dev_attr_version.attr, + &dev_attr_version.attr.attr, &dev_attr_nctxts.attr, &dev_attr_nfreectxts.attr, &dev_attr_serial.attr, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index a5e8818517..768aad364c 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -531,7 +531,7 @@ static int pvrdma_alloc_intrs(struct pvrdma_dev *dev) PCI_IRQ_MSIX); if (ret < 0) { ret = pci_alloc_irq_vectors(pdev, 1, 1, - PCI_IRQ_MSI | PCI_IRQ_LEGACY); + PCI_IRQ_MSI | PCI_IRQ_INTX); if (ret < 0) return ret; } diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index 4341965a5e..bdb6b9326b 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -4,7 +4,7 @@ */ #define RDI_DEV_ENTRY(rdi) __string(dev, rvt_get_ibdev_name(rdi)) -#define RDI_DEV_ASSIGN(rdi) __assign_str(dev, rvt_get_ibdev_name(rdi)) +#define RDI_DEV_ASSIGN(rdi) __assign_str(dev) #include "trace_rvt.h" #include "trace_qp.h" diff --git a/drivers/infiniband/sw/rdmavt/trace_rvt.h b/drivers/infiniband/sw/rdmavt/trace_rvt.h index df33c2ca97..a00489e66d 100644 --- a/drivers/infiniband/sw/rdmavt/trace_rvt.h +++ b/drivers/infiniband/sw/rdmavt/trace_rvt.h @@ -24,7 +24,7 @@ TRACE_EVENT(rvt_dbg, ), TP_fast_assign( RDI_DEV_ASSIGN(rdi); - __assign_str(msg, msg); + __assign_str(msg); ), TP_printk("[%s]: %s", __get_str(dev), __get_str(msg)) ); diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index c997b7cbf2..d48af21807 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -122,25 +122,16 @@ void retransmit_timer(struct timer_list *t) spin_lock_irqsave(&qp->state_lock, flags); if (qp->valid) { qp->comp.timeout = 1; - rxe_sched_task(&qp->comp.task); + rxe_sched_task(&qp->send_task); } spin_unlock_irqrestore(&qp->state_lock, flags); } void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) { - int must_sched; - - must_sched = skb_queue_len(&qp->resp_pkts) > 0; - if (must_sched != 0) - rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED); - + rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_SENDER_SCHED); skb_queue_tail(&qp->resp_pkts, skb); - - if (must_sched) - rxe_sched_task(&qp->comp.task); - else - rxe_run_task(&qp->comp.task); + rxe_sched_task(&qp->send_task); } static inline enum comp_state get_wqe(struct rxe_qp *qp, @@ -325,7 +316,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, qp->comp.psn = pkt->psn; if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_sched_task(&qp->req.task); + qp->req.again = 1; } } return COMPST_ERROR_RETRY; @@ -476,7 +467,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) */ if (qp->req.wait_fence) { qp->req.wait_fence = 0; - rxe_sched_task(&qp->req.task); + qp->req.again = 1; } } @@ -515,7 +506,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp, if (qp->req.need_rd_atomic) { qp->comp.timeout_retry = 0; qp->req.need_rd_atomic = 0; - rxe_sched_task(&qp->req.task); + qp->req.again = 1; } } @@ -541,7 +532,7 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp, if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_sched_task(&qp->req.task); + qp->req.again = 1; } } @@ -654,6 +645,8 @@ int rxe_completer(struct rxe_qp *qp) int ret; unsigned long flags; + qp->req.again = 0; + spin_lock_irqsave(&qp->state_lock, flags); if (!qp->valid || qp_state(qp) == IB_QPS_ERR || qp_state(qp) == IB_QPS_RESET) { @@ -737,7 +730,7 @@ int rxe_completer(struct rxe_qp *qp) if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_sched_task(&qp->req.task); + qp->req.again = 1; } state = COMPST_DONE; @@ -792,7 +785,7 @@ int rxe_completer(struct rxe_qp *qp) RXE_CNT_COMP_RETRY); qp->req.need_retry = 1; qp->comp.started_retry = 1; - rxe_sched_task(&qp->req.task); + qp->req.again = 1; } goto done; @@ -843,8 +836,9 @@ done: ret = 0; goto out; exit: - ret = -EAGAIN; + ret = (qp->req.again) ? 0 : -EAGAIN; out: + qp->req.again = 0; if (pkt) free_pkt(pkt); return ret; diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c index a012522b57..437917a7d8 100644 --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c @@ -14,7 +14,7 @@ static const struct rdma_stat_desc rxe_counter_descs[] = { [RXE_CNT_RCV_RNR].name = "rcvd_rnr_err", [RXE_CNT_SND_RNR].name = "send_rnr_err", [RXE_CNT_RCV_SEQ_ERR].name = "rcvd_seq_err", - [RXE_CNT_COMPLETER_SCHED].name = "ack_deferred", + [RXE_CNT_SENDER_SCHED].name = "ack_deferred", [RXE_CNT_RETRY_EXCEEDED].name = "retry_exceeded_err", [RXE_CNT_RNR_RETRY_EXCEEDED].name = "retry_rnr_exceeded_err", [RXE_CNT_COMP_RETRY].name = "completer_retry_err", diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h index 71f4d4fa9d..051f9e1c38 100644 --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h @@ -18,7 +18,7 @@ enum rxe_counters { RXE_CNT_RCV_RNR, RXE_CNT_SND_RNR, RXE_CNT_RCV_SEQ_ERR, - RXE_CNT_COMPLETER_SCHED, + RXE_CNT_SENDER_SCHED, RXE_CNT_RETRY_EXCEEDED, RXE_CNT_RNR_RETRY_EXCEEDED, RXE_CNT_COMP_RETRY, diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 746110898a..ded4611915 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -164,7 +164,8 @@ void rxe_dealloc(struct ib_device *ib_dev); int rxe_completer(struct rxe_qp *qp); int rxe_requester(struct rxe_qp *qp); -int rxe_responder(struct rxe_qp *qp); +int rxe_sender(struct rxe_qp *qp); +int rxe_receiver(struct rxe_qp *qp); /* rxe_icrc.c */ int rxe_icrc_init(struct rxe_dev *rxe); diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index e5827064ab..ca9a82e1c4 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -345,25 +345,44 @@ int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, static void rxe_skb_tx_dtor(struct sk_buff *skb) { - struct sock *sk = skb->sk; - struct rxe_qp *qp = sk->sk_user_data; - int skb_out = atomic_dec_return(&qp->skb_out); + struct net_device *ndev = skb->dev; + struct rxe_dev *rxe; + unsigned int qp_index; + struct rxe_qp *qp; + int skb_out; + + rxe = rxe_get_dev_from_net(ndev); + if (!rxe && is_vlan_dev(ndev)) + rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev)); + if (WARN_ON(!rxe)) + return; + + qp_index = (int)(uintptr_t)skb->sk->sk_user_data; + if (!qp_index) + return; + + qp = rxe_pool_get_index(&rxe->qp_pool, qp_index); + if (!qp) + goto put_dev; - if (unlikely(qp->need_req_skb && - skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) - rxe_sched_task(&qp->req.task); + skb_out = atomic_dec_return(&qp->skb_out); + if (qp->need_req_skb && skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW) + rxe_sched_task(&qp->send_task); rxe_put(qp); +put_dev: + ib_device_put(&rxe->ib_dev); + sock_put(skb->sk); } static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) { int err; + struct sock *sk = pkt->qp->sk->sk; + sock_hold(sk); + skb->sk = sk; skb->destructor = rxe_skb_tx_dtor; - skb->sk = pkt->qp->sk->sk; - - rxe_get(pkt->qp); atomic_inc(&pkt->qp->skb_out); if (skb->protocol == htons(ETH_P_IP)) @@ -371,12 +390,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) else err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); - if (unlikely(net_xmit_eval(err))) { - rxe_dbg_qp(pkt->qp, "error sending packet: %d\n", err); - return -EAGAIN; - } - - return 0; + return err; } /* fix up a send packet to match the packets @@ -384,8 +398,15 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) */ static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt) { + struct sock *sk = pkt->qp->sk->sk; + memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt)); + sock_hold(sk); + skb->sk = sk; + skb->destructor = rxe_skb_tx_dtor; + atomic_inc(&pkt->qp->skb_out); + if (skb->protocol == htons(ETH_P_IP)) skb_pull(skb, sizeof(struct iphdr)); else @@ -432,12 +453,6 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, return err; } - if ((qp_type(qp) != IB_QPT_RC) && - (pkt->mask & RXE_END_MASK)) { - pkt->wqe->state = wqe_state_done; - rxe_sched_task(&qp->comp.task); - } - rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS); goto done; diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 6215c6de3a..67567d6219 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -119,7 +119,7 @@ void rxe_pool_cleanup(struct rxe_pool *pool) int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem, bool sleepable) { - int err; + int err = -EINVAL; gfp_t gfp_flags; if (atomic_inc_return(&pool->num_elem) > pool->max_elem) @@ -147,7 +147,7 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem, err_cnt: atomic_dec(&pool->num_elem); - return -EINVAL; + return err; } void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index e3589c0201..d2f7b5195c 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -244,7 +244,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk); if (err < 0) return err; - qp->sk->sk->sk_user_data = qp; + qp->sk->sk->sk_user_data = (void *)(uintptr_t)qp->elem.index; /* pick a source UDP port number for this QP based on * the source QPN. this spreads traffic for different QPs @@ -265,8 +265,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, qp->req.opcode = -1; qp->comp.opcode = -1; - rxe_init_task(&qp->req.task, qp, rxe_requester); - rxe_init_task(&qp->comp.task, qp, rxe_completer); + rxe_init_task(&qp->send_task, qp, rxe_sender); qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */ if (init->qp_type == IB_QPT_RC) { @@ -337,7 +336,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, return err; } - rxe_init_task(&qp->resp.task, qp, rxe_responder); + rxe_init_task(&qp->recv_task, qp, rxe_receiver); qp->resp.opcode = OPCODE_NONE; qp->resp.msn = 0; @@ -514,14 +513,12 @@ err1: static void rxe_qp_reset(struct rxe_qp *qp) { /* stop tasks from running */ - rxe_disable_task(&qp->resp.task); - rxe_disable_task(&qp->comp.task); - rxe_disable_task(&qp->req.task); + rxe_disable_task(&qp->recv_task); + rxe_disable_task(&qp->send_task); /* drain work and packet queuesc */ - rxe_requester(qp); - rxe_completer(qp); - rxe_responder(qp); + rxe_sender(qp); + rxe_receiver(qp); if (qp->rq.queue) rxe_queue_reset(qp->rq.queue); @@ -548,9 +545,8 @@ static void rxe_qp_reset(struct rxe_qp *qp) cleanup_rd_atomic_resources(qp); /* reenable tasks */ - rxe_enable_task(&qp->resp.task); - rxe_enable_task(&qp->comp.task); - rxe_enable_task(&qp->req.task); + rxe_enable_task(&qp->recv_task); + rxe_enable_task(&qp->send_task); } /* move the qp to the error state */ @@ -562,9 +558,8 @@ void rxe_qp_error(struct rxe_qp *qp) qp->attr.qp_state = IB_QPS_ERR; /* drain work and packet queues */ - rxe_sched_task(&qp->resp.task); - rxe_sched_task(&qp->comp.task); - rxe_sched_task(&qp->req.task); + rxe_sched_task(&qp->recv_task); + rxe_sched_task(&qp->send_task); spin_unlock_irqrestore(&qp->state_lock, flags); } @@ -575,8 +570,7 @@ static void rxe_qp_sqd(struct rxe_qp *qp, struct ib_qp_attr *attr, spin_lock_irqsave(&qp->state_lock, flags); qp->attr.sq_draining = 1; - rxe_sched_task(&qp->comp.task); - rxe_sched_task(&qp->req.task); + rxe_sched_task(&qp->send_task); spin_unlock_irqrestore(&qp->state_lock, flags); } @@ -821,19 +815,15 @@ static void rxe_qp_do_cleanup(struct work_struct *work) del_timer_sync(&qp->rnr_nak_timer); } - if (qp->resp.task.func) - rxe_cleanup_task(&qp->resp.task); + if (qp->recv_task.func) + rxe_cleanup_task(&qp->recv_task); - if (qp->req.task.func) - rxe_cleanup_task(&qp->req.task); - - if (qp->comp.task.func) - rxe_cleanup_task(&qp->comp.task); + if (qp->send_task.func) + rxe_cleanup_task(&qp->send_task); /* flush out any receive wr's or pending requests */ - rxe_requester(qp); - rxe_completer(qp); - rxe_responder(qp); + rxe_sender(qp); + rxe_receiver(qp); if (qp->sq.queue) rxe_queue_cleanup(qp->sq.queue); diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index d8c41fd626..479c07e6e4 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -108,7 +108,7 @@ void rnr_nak_timer(struct timer_list *t) /* request a send queue retry */ qp->req.need_retry = 1; qp->req.wait_for_rnr_timer = 0; - rxe_sched_task(&qp->req.task); + rxe_sched_task(&qp->send_task); } spin_unlock_irqrestore(&qp->state_lock, flags); } @@ -424,7 +424,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, int paylen; int solicited; u32 qp_num; - int ack_req; + int ack_req = 0; /* length from start of bth to end of icrc */ paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; @@ -445,8 +445,9 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn : qp->attr.dest_qp_num; - ack_req = ((pkt->mask & RXE_END_MASK) || - (qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK)); + if (qp_type(qp) != IB_QPT_UD && qp_type(qp) != IB_QPT_UC) + ack_req = ((pkt->mask & RXE_END_MASK) || + (qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK)); if (ack_req) qp->req.noack_pkts = 0; @@ -545,6 +546,8 @@ static void update_wqe_state(struct rxe_qp *qp, if (pkt->mask & RXE_END_MASK) { if (qp_type(qp) == IB_QPT_RC) wqe->state = wqe_state_pending; + else + wqe->state = wqe_state_done; } else { wqe->state = wqe_state_processing; } @@ -573,30 +576,6 @@ static void update_wqe_psn(struct rxe_qp *qp, qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; } -static void save_state(struct rxe_send_wqe *wqe, - struct rxe_qp *qp, - struct rxe_send_wqe *rollback_wqe, - u32 *rollback_psn) -{ - rollback_wqe->state = wqe->state; - rollback_wqe->first_psn = wqe->first_psn; - rollback_wqe->last_psn = wqe->last_psn; - rollback_wqe->dma = wqe->dma; - *rollback_psn = qp->req.psn; -} - -static void rollback_state(struct rxe_send_wqe *wqe, - struct rxe_qp *qp, - struct rxe_send_wqe *rollback_wqe, - u32 rollback_psn) -{ - wqe->state = rollback_wqe->state; - wqe->first_psn = rollback_wqe->first_psn; - wqe->last_psn = rollback_wqe->last_psn; - wqe->dma = rollback_wqe->dma; - qp->req.psn = rollback_psn; -} - static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { qp->req.opcode = pkt->opcode; @@ -655,12 +634,6 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) wqe->status = IB_WC_SUCCESS; qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); - /* There is no ack coming for local work requests - * which can lead to a deadlock. So go ahead and complete - * it now. - */ - rxe_sched_task(&qp->comp.task); - return 0; } @@ -676,8 +649,6 @@ int rxe_requester(struct rxe_qp *qp) int opcode; int err; int ret; - struct rxe_send_wqe rollback_wqe; - u32 rollback_psn; struct rxe_queue *q = qp->sq.queue; struct rxe_ah *ah; struct rxe_av *av; @@ -786,7 +757,6 @@ int rxe_requester(struct rxe_qp *qp) qp->req.wqe_index); wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; - rxe_sched_task(&qp->comp.task); goto done; } payload = mtu; @@ -799,9 +769,6 @@ int rxe_requester(struct rxe_qp *qp) pkt.mask = rxe_opcode[opcode].mask; pkt.wqe = wqe; - /* save wqe state before we build and send packet */ - save_state(wqe, qp, &rollback_wqe, &rollback_psn); - av = rxe_get_av(&pkt, &ah); if (unlikely(!av)) { rxe_dbg_qp(qp, "Failed no address vector\n"); @@ -834,31 +801,14 @@ int rxe_requester(struct rxe_qp *qp) if (ah) rxe_put(ah); - /* update wqe state as though we had sent it */ - update_wqe_state(qp, wqe, &pkt); - update_wqe_psn(qp, wqe, &pkt, payload); - err = rxe_xmit_packet(qp, &pkt, skb); if (err) { - if (err != -EAGAIN) { - wqe->status = IB_WC_LOC_QP_OP_ERR; - goto err; - } - - /* the packet was dropped so reset wqe to the state - * before we sent it so we can try to resend - */ - rollback_state(wqe, qp, &rollback_wqe, rollback_psn); - - /* force a delay until the dropped packet is freed and - * the send queue is drained below the low water mark - */ - qp->need_req_skb = 1; - - rxe_sched_task(&qp->req.task); - goto exit; + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto err; } + update_wqe_state(qp, wqe, &pkt); + update_wqe_psn(qp, wqe, &pkt, payload); update_state(qp, &pkt); /* A non-zero return value will cause rxe_do_task to @@ -878,3 +828,20 @@ exit: out: return ret; } + +int rxe_sender(struct rxe_qp *qp) +{ + int req_ret; + int comp_ret; + + /* process the send queue */ + req_ret = rxe_requester(qp); + + /* process the response queue */ + comp_ret = rxe_completer(qp); + + /* exit the task loop if both requester and completer + * are ready + */ + return (req_ret && comp_ret) ? -EAGAIN : 0; +} diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index fa2b87c749..6596a85723 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -49,18 +49,8 @@ static char *resp_state_name[] = { /* rxe_recv calls here to add a request packet to the input queue */ void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) { - int must_sched; - struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); - skb_queue_tail(&qp->req_pkts, skb); - - must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) || - (skb_queue_len(&qp->req_pkts) > 1); - - if (must_sched) - rxe_sched_task(&qp->resp.task); - else - rxe_run_task(&qp->resp.task); + rxe_sched_task(&qp->recv_task); } static inline enum resp_states get_req(struct rxe_qp *qp, @@ -1498,7 +1488,7 @@ static void flush_recv_queue(struct rxe_qp *qp, bool notify) qp->resp.wqe = NULL; } -int rxe_responder(struct rxe_qp *qp) +int rxe_receiver(struct rxe_qp *qp) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); enum resp_states state; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index a7e9510666..de6238ee43 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -905,12 +905,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, /* kickoff processing of any posted wqes */ if (good) - rxe_sched_task(&qp->req.task); - - spin_lock_irqsave(&qp->state_lock, flags); - if (qp_state(qp) == IB_QPS_ERR) - rxe_sched_task(&qp->comp.task); - spin_unlock_irqrestore(&qp->state_lock, flags); + rxe_sched_task(&qp->send_task); return err; } @@ -940,7 +935,7 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, if (qp->is_user) { /* Utilize process context to do protocol processing */ - rxe_run_task(&qp->req.task); + rxe_sched_task(&qp->send_task); } else { err = rxe_post_send_kernel(qp, wr, bad_wr); if (err) @@ -1050,7 +1045,7 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, spin_lock_irqsave(&qp->state_lock, flags); if (qp_state(qp) == IB_QPS_ERR) - rxe_sched_task(&qp->resp.task); + rxe_sched_task(&qp->recv_task); spin_unlock_irqrestore(&qp->state_lock, flags); return err; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index ccb9d19ffe..3c1354f822 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -113,7 +113,7 @@ struct rxe_req_info { int need_retry; int wait_for_rnr_timer; int noack_pkts; - struct rxe_task task; + int again; }; struct rxe_comp_info { @@ -124,7 +124,6 @@ struct rxe_comp_info { int started_retry; u32 retry_cnt; u32 rnr_retry; - struct rxe_task task; }; enum rdatm_res_state { @@ -196,7 +195,6 @@ struct rxe_resp_info { unsigned int res_head; unsigned int res_tail; struct resp_res *res; - struct rxe_task task; }; struct rxe_qp { @@ -229,6 +227,9 @@ struct rxe_qp { struct sk_buff_head req_pkts; struct sk_buff_head resp_pkts; + struct rxe_task send_task; + struct rxe_task recv_task; + struct rxe_req_info req; struct rxe_comp_info comp; struct rxe_resp_info resp; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 6f2a688fcc..4e31bb0b64 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -238,7 +238,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", priv->mcast_mtu); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } @@ -265,7 +265,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) if (carrier_status) netif_carrier_on(dev); } else { - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); } return ret; @@ -329,8 +329,7 @@ static struct net_device *ipoib_get_master_net_dev(struct net_device *dev) rcu_read_lock(); master = netdev_master_upper_dev_get_rcu(dev); - if (master) - dev_hold(master); + dev_hold(master); rcu_read_unlock(); if (master) |