summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/usnic
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 01:02:30 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 01:02:30 +0000
commit76cb841cb886eef6b3bee341a2266c76578724ad (patch)
treef5892e5ba6cc11949952a6ce4ecbe6d516d6ce58 /drivers/infiniband/hw/usnic
parentInitial commit. (diff)
downloadlinux-76cb841cb886eef6b3bee341a2266c76578724ad.tar.xz
linux-76cb841cb886eef6b3bee341a2266c76578724ad.zip
Adding upstream version 4.19.249.upstream/4.19.249upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--drivers/infiniband/hw/usnic/Kconfig10
-rw-r--r--drivers/infiniband/hw/usnic/Makefile16
-rw-r--r--drivers/infiniband/hw/usnic/usnic.h44
-rw-r--r--drivers/infiniband/hw/usnic/usnic_abi.h88
-rw-r--r--drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h41
-rw-r--r--drivers/infiniband/hw/usnic/usnic_common_util.h49
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.c170
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.h44
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.c357
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.h129
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib.h133
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c715
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c766
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h109
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c340
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.h44
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c816
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h92
-rw-r--r--drivers/infiniband/hw/usnic/usnic_log.h73
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.c214
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.h66
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c618
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.h98
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c270
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h88
-rw-r--r--drivers/infiniband/hw/usnic/usnic_vnic.c476
-rw-r--r--drivers/infiniband/hw/usnic/usnic_vnic.h118
27 files changed, 5984 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/usnic/Kconfig b/drivers/infiniband/hw/usnic/Kconfig
new file mode 100644
index 000000000..d1dae2af4
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/Kconfig
@@ -0,0 +1,10 @@
+config INFINIBAND_USNIC
+ tristate "Verbs support for Cisco VIC"
+ depends on NETDEVICES && ETHERNET && INET && PCI && INTEL_IOMMU
+ depends on INFINIBAND_USER_ACCESS
+ select ENIC
+ select NET_VENDOR_CISCO
+ select PCI_IOV
+ ---help---
+ This is a low-level driver for Cisco's Virtual Interface
+ Cards (VICs), including the VIC 1240 and 1280 cards.
diff --git a/drivers/infiniband/hw/usnic/Makefile b/drivers/infiniband/hw/usnic/Makefile
new file mode 100644
index 000000000..94ae7a1a6
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+ccflags-y := -Idrivers/net/ethernet/cisco/enic
+
+obj-$(CONFIG_INFINIBAND_USNIC)+= usnic_verbs.o
+
+usnic_verbs-y=\
+usnic_fwd.o \
+usnic_transport.o \
+usnic_uiom.o \
+usnic_uiom_interval_tree.o \
+usnic_vnic.o \
+usnic_ib_main.o \
+usnic_ib_qp_grp.o \
+usnic_ib_sysfs.o \
+usnic_ib_verbs.o \
+usnic_debugfs.o \
diff --git a/drivers/infiniband/hw/usnic/usnic.h b/drivers/infiniband/hw/usnic/usnic.h
new file mode 100644
index 000000000..f903502d3
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_H_
+#define USNIC_H_
+
+#define DRV_NAME "usnic_verbs"
+
+#define PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC 0x00cf /* User space NIC */
+
+#define DRV_VERSION "1.0.3"
+#define DRV_RELDATE "December 19, 2013"
+
+#endif /* USNIC_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_abi.h b/drivers/infiniband/hw/usnic/usnic_abi.h
new file mode 100644
index 000000000..7fe9502ce
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_abi.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+#ifndef USNIC_ABI_H
+#define USNIC_ABI_H
+
+/* ABI between userspace and kernel */
+#define USNIC_UVERBS_ABI_VERSION 4
+
+#define USNIC_QP_GRP_MAX_WQS 8
+#define USNIC_QP_GRP_MAX_RQS 8
+#define USNIC_QP_GRP_MAX_CQS 16
+
+enum usnic_transport_type {
+ USNIC_TRANSPORT_UNKNOWN = 0,
+ USNIC_TRANSPORT_ROCE_CUSTOM = 1,
+ USNIC_TRANSPORT_IPV4_UDP = 2,
+ USNIC_TRANSPORT_MAX = 3,
+};
+
+struct usnic_transport_spec {
+ enum usnic_transport_type trans_type;
+ union {
+ struct {
+ uint16_t port_num;
+ } usnic_roce;
+ struct {
+ uint32_t sock_fd;
+ } udp;
+ };
+};
+
+struct usnic_ib_create_qp_cmd {
+ struct usnic_transport_spec spec;
+};
+
+/*TODO: Future - usnic_modify_qp needs to pass in generic filters */
+struct usnic_ib_create_qp_resp {
+ u32 vfid;
+ u32 qp_grp_id;
+ u64 bar_bus_addr;
+ u32 bar_len;
+/*
+ * WQ, RQ, CQ are explicity specified bc exposing a generic resources inteface
+ * expands the scope of ABI to many files.
+ */
+ u32 wq_cnt;
+ u32 rq_cnt;
+ u32 cq_cnt;
+ u32 wq_idx[USNIC_QP_GRP_MAX_WQS];
+ u32 rq_idx[USNIC_QP_GRP_MAX_RQS];
+ u32 cq_idx[USNIC_QP_GRP_MAX_CQS];
+ u32 transport;
+ u32 reserved[9];
+};
+
+#endif /* USNIC_ABI_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h b/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h
new file mode 100644
index 000000000..bf7d197a9
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_CMN_PKT_HDR_H
+#define USNIC_CMN_PKT_HDR_H
+
+#define USNIC_ROCE_GRH_VER (8)
+#define USNIC_PROTO_VER (1)
+#define USNIC_ROCE_GRH_VER_SHIFT (4)
+
+#endif /* USNIC_COMMON_PKT_HDR_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_common_util.h b/drivers/infiniband/hw/usnic/usnic_common_util.h
new file mode 100644
index 000000000..ddd81294f
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_common_util.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_CMN_UTIL_H
+#define USNIC_CMN_UTIL_H
+
+#include <net/addrconf.h>
+
+static inline void
+usnic_mac_ip_to_gid(const char *const mac, const __be32 inaddr, char *raw_gid)
+{
+ raw_gid[0] = 0xfe;
+ raw_gid[1] = 0x80;
+ memset(&raw_gid[2], 0, 2);
+ memcpy(&raw_gid[4], &inaddr, 4);
+ addrconf_addr_eui48(&raw_gid[8], mac);
+}
+
+#endif /* USNIC_COMMON_UTIL_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c
new file mode 100644
index 000000000..92dc66cc2
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/module.h>
+
+#include "usnic.h"
+#include "usnic_log.h"
+#include "usnic_debugfs.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_transport.h"
+
+static struct dentry *debugfs_root;
+static struct dentry *flows_dentry;
+
+static ssize_t usnic_debugfs_buildinfo_read(struct file *f, char __user *data,
+ size_t count, loff_t *ppos)
+{
+ char buf[500];
+ int res;
+
+ if (*ppos > 0)
+ return 0;
+
+ res = scnprintf(buf, sizeof(buf),
+ "version: %s\n"
+ "build date: %s\n",
+ DRV_VERSION, DRV_RELDATE);
+
+ return simple_read_from_buffer(data, count, ppos, buf, res);
+}
+
+static const struct file_operations usnic_debugfs_buildinfo_ops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = usnic_debugfs_buildinfo_read
+};
+
+static ssize_t flowinfo_read(struct file *f, char __user *data,
+ size_t count, loff_t *ppos)
+{
+ struct usnic_ib_qp_grp_flow *qp_flow;
+ int n;
+ int left;
+ char *ptr;
+ char buf[512];
+
+ qp_flow = f->private_data;
+ ptr = buf;
+ left = count;
+
+ if (*ppos > 0)
+ return 0;
+
+ spin_lock(&qp_flow->qp_grp->lock);
+ n = scnprintf(ptr, left,
+ "QP Grp ID: %d Transport: %s ",
+ qp_flow->qp_grp->grp_id,
+ usnic_transport_to_str(qp_flow->trans_type));
+ UPDATE_PTR_LEFT(n, ptr, left);
+ if (qp_flow->trans_type == USNIC_TRANSPORT_ROCE_CUSTOM) {
+ n = scnprintf(ptr, left, "Port_Num:%hu\n",
+ qp_flow->usnic_roce.port_num);
+ UPDATE_PTR_LEFT(n, ptr, left);
+ } else if (qp_flow->trans_type == USNIC_TRANSPORT_IPV4_UDP) {
+ n = usnic_transport_sock_to_str(ptr, left,
+ qp_flow->udp.sock);
+ UPDATE_PTR_LEFT(n, ptr, left);
+ n = scnprintf(ptr, left, "\n");
+ UPDATE_PTR_LEFT(n, ptr, left);
+ }
+ spin_unlock(&qp_flow->qp_grp->lock);
+
+ return simple_read_from_buffer(data, count, ppos, buf, ptr - buf);
+}
+
+static const struct file_operations flowinfo_ops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = flowinfo_read,
+};
+
+void usnic_debugfs_init(void)
+{
+ debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
+ if (IS_ERR(debugfs_root)) {
+ usnic_err("Failed to create debugfs root dir, check if debugfs is enabled in kernel configuration\n");
+ goto out_clear_root;
+ }
+
+ flows_dentry = debugfs_create_dir("flows", debugfs_root);
+ if (IS_ERR_OR_NULL(flows_dentry)) {
+ usnic_err("Failed to create debugfs flow dir with err %ld\n",
+ PTR_ERR(flows_dentry));
+ goto out_free_root;
+ }
+
+ debugfs_create_file("build-info", S_IRUGO, debugfs_root,
+ NULL, &usnic_debugfs_buildinfo_ops);
+ return;
+
+out_free_root:
+ debugfs_remove_recursive(debugfs_root);
+out_clear_root:
+ debugfs_root = NULL;
+}
+
+void usnic_debugfs_exit(void)
+{
+ if (!debugfs_root)
+ return;
+
+ debugfs_remove_recursive(debugfs_root);
+ debugfs_root = NULL;
+}
+
+void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ if (IS_ERR_OR_NULL(flows_dentry))
+ return;
+
+ scnprintf(qp_flow->dentry_name, sizeof(qp_flow->dentry_name),
+ "%u", qp_flow->flow->flow_id);
+ qp_flow->dbgfs_dentry = debugfs_create_file(qp_flow->dentry_name,
+ S_IRUGO,
+ flows_dentry,
+ qp_flow,
+ &flowinfo_ops);
+ if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) {
+ usnic_err("Failed to create dbg fs entry for flow %u with error %ld\n",
+ qp_flow->flow->flow_id,
+ PTR_ERR(qp_flow->dbgfs_dentry));
+ }
+}
+
+void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ if (!IS_ERR_OR_NULL(qp_flow->dbgfs_dentry))
+ debugfs_remove(qp_flow->dbgfs_dentry);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.h b/drivers/infiniband/hw/usnic/usnic_debugfs.h
new file mode 100644
index 000000000..98453e91d
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_debugfs.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#ifndef USNIC_DEBUGFS_H_
+#define USNIC_DEBUGFS_H_
+
+#include "usnic_ib_qp_grp.h"
+
+void usnic_debugfs_init(void);
+
+void usnic_debugfs_exit(void);
+void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow);
+void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow);
+
+#endif /*!USNIC_DEBUGFS_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c
new file mode 100644
index 000000000..787588362
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_fwd.c
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+
+#include "enic_api.h"
+#include "usnic_common_pkt_hdr.h"
+#include "usnic_fwd.h"
+#include "usnic_log.h"
+
+static int usnic_fwd_devcmd_locked(struct usnic_fwd_dev *ufdev, int vnic_idx,
+ enum vnic_devcmd_cmd cmd, u64 *a0,
+ u64 *a1)
+{
+ int status;
+ struct net_device *netdev = ufdev->netdev;
+
+ lockdep_assert_held(&ufdev->lock);
+
+ status = enic_api_devcmd_proxy_by_index(netdev,
+ vnic_idx,
+ cmd,
+ a0, a1,
+ 1000);
+ if (status) {
+ if (status == ERR_EINVAL && cmd == CMD_DEL_FILTER) {
+ usnic_dbg("Dev %s vnic idx %u cmd %u already deleted",
+ ufdev->name, vnic_idx, cmd);
+ } else {
+ usnic_err("Dev %s vnic idx %u cmd %u failed with status %d\n",
+ ufdev->name, vnic_idx, cmd,
+ status);
+ }
+ } else {
+ usnic_dbg("Dev %s vnic idx %u cmd %u success",
+ ufdev->name, vnic_idx, cmd);
+ }
+
+ return status;
+}
+
+static int usnic_fwd_devcmd(struct usnic_fwd_dev *ufdev, int vnic_idx,
+ enum vnic_devcmd_cmd cmd, u64 *a0, u64 *a1)
+{
+ int status;
+
+ spin_lock(&ufdev->lock);
+ status = usnic_fwd_devcmd_locked(ufdev, vnic_idx, cmd, a0, a1);
+ spin_unlock(&ufdev->lock);
+
+ return status;
+}
+
+struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev)
+{
+ struct usnic_fwd_dev *ufdev;
+
+ ufdev = kzalloc(sizeof(*ufdev), GFP_KERNEL);
+ if (!ufdev)
+ return NULL;
+
+ ufdev->pdev = pdev;
+ ufdev->netdev = pci_get_drvdata(pdev);
+ spin_lock_init(&ufdev->lock);
+ BUILD_BUG_ON(sizeof(ufdev->name) != sizeof(ufdev->netdev->name));
+ strcpy(ufdev->name, ufdev->netdev->name);
+
+ return ufdev;
+}
+
+void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev)
+{
+ kfree(ufdev);
+}
+
+void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN])
+{
+ spin_lock(&ufdev->lock);
+ memcpy(&ufdev->mac, mac, sizeof(ufdev->mac));
+ spin_unlock(&ufdev->lock);
+}
+
+void usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr)
+{
+ spin_lock(&ufdev->lock);
+ if (!ufdev->inaddr)
+ ufdev->inaddr = inaddr;
+ spin_unlock(&ufdev->lock);
+}
+
+void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev)
+{
+ spin_lock(&ufdev->lock);
+ ufdev->inaddr = 0;
+ spin_unlock(&ufdev->lock);
+}
+
+void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev)
+{
+ spin_lock(&ufdev->lock);
+ ufdev->link_up = 1;
+ spin_unlock(&ufdev->lock);
+}
+
+void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev)
+{
+ spin_lock(&ufdev->lock);
+ ufdev->link_up = 0;
+ spin_unlock(&ufdev->lock);
+}
+
+void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu)
+{
+ spin_lock(&ufdev->lock);
+ ufdev->mtu = mtu;
+ spin_unlock(&ufdev->lock);
+}
+
+static int usnic_fwd_dev_ready_locked(struct usnic_fwd_dev *ufdev)
+{
+ lockdep_assert_held(&ufdev->lock);
+
+ if (!ufdev->link_up)
+ return -EPERM;
+
+ return 0;
+}
+
+static int validate_filter_locked(struct usnic_fwd_dev *ufdev,
+ struct filter *filter)
+{
+
+ lockdep_assert_held(&ufdev->lock);
+
+ if (filter->type == FILTER_IPV4_5TUPLE) {
+ if (!(filter->u.ipv4.flags & FILTER_FIELD_5TUP_DST_AD))
+ return -EACCES;
+ if (!(filter->u.ipv4.flags & FILTER_FIELD_5TUP_DST_PT))
+ return -EBUSY;
+ else if (ufdev->inaddr == 0)
+ return -EINVAL;
+ else if (filter->u.ipv4.dst_port == 0)
+ return -ERANGE;
+ else if (ntohl(ufdev->inaddr) != filter->u.ipv4.dst_addr)
+ return -EFAULT;
+ else
+ return 0;
+ }
+
+ return 0;
+}
+
+static void fill_tlv(struct filter_tlv *tlv, struct filter *filter,
+ struct filter_action *action)
+{
+ tlv->type = CLSF_TLV_FILTER;
+ tlv->length = sizeof(struct filter);
+ *((struct filter *)&tlv->val) = *filter;
+
+ tlv = (struct filter_tlv *)((char *)tlv + sizeof(struct filter_tlv) +
+ sizeof(struct filter));
+ tlv->type = CLSF_TLV_ACTION;
+ tlv->length = sizeof(struct filter_action);
+ *((struct filter_action *)&tlv->val) = *action;
+}
+
+struct usnic_fwd_flow*
+usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter,
+ struct usnic_filter_action *uaction)
+{
+ struct filter_tlv *tlv;
+ struct pci_dev *pdev;
+ struct usnic_fwd_flow *flow;
+ uint64_t a0, a1;
+ uint64_t tlv_size;
+ dma_addr_t tlv_pa;
+ int status;
+
+ pdev = ufdev->pdev;
+ tlv_size = (2*sizeof(struct filter_tlv) + sizeof(struct filter) +
+ sizeof(struct filter_action));
+
+ flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
+ if (!flow)
+ return ERR_PTR(-ENOMEM);
+
+ tlv = pci_alloc_consistent(pdev, tlv_size, &tlv_pa);
+ if (!tlv) {
+ usnic_err("Failed to allocate memory\n");
+ status = -ENOMEM;
+ goto out_free_flow;
+ }
+
+ fill_tlv(tlv, filter, &uaction->action);
+
+ spin_lock(&ufdev->lock);
+ status = usnic_fwd_dev_ready_locked(ufdev);
+ if (status) {
+ usnic_err("Forwarding dev %s not ready with status %d\n",
+ ufdev->name, status);
+ goto out_free_tlv;
+ }
+
+ status = validate_filter_locked(ufdev, filter);
+ if (status) {
+ usnic_err("Failed to validate filter with status %d\n",
+ status);
+ goto out_free_tlv;
+ }
+
+ /* Issue Devcmd */
+ a0 = tlv_pa;
+ a1 = tlv_size;
+ status = usnic_fwd_devcmd_locked(ufdev, uaction->vnic_idx,
+ CMD_ADD_FILTER, &a0, &a1);
+ if (status) {
+ usnic_err("VF %s Filter add failed with status:%d",
+ ufdev->name, status);
+ status = -EFAULT;
+ goto out_free_tlv;
+ } else {
+ usnic_dbg("VF %s FILTER ID:%llu", ufdev->name, a0);
+ }
+
+ flow->flow_id = (uint32_t) a0;
+ flow->vnic_idx = uaction->vnic_idx;
+ flow->ufdev = ufdev;
+
+out_free_tlv:
+ spin_unlock(&ufdev->lock);
+ pci_free_consistent(pdev, tlv_size, tlv, tlv_pa);
+ if (!status)
+ return flow;
+out_free_flow:
+ kfree(flow);
+ return ERR_PTR(status);
+}
+
+int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow)
+{
+ int status;
+ u64 a0, a1;
+
+ a0 = flow->flow_id;
+
+ status = usnic_fwd_devcmd(flow->ufdev, flow->vnic_idx,
+ CMD_DEL_FILTER, &a0, &a1);
+ if (status) {
+ if (status == ERR_EINVAL) {
+ usnic_dbg("Filter %u already deleted for VF Idx %u pf: %s status: %d",
+ flow->flow_id, flow->vnic_idx,
+ flow->ufdev->name, status);
+ } else {
+ usnic_err("PF %s VF Idx %u Filter: %u FILTER DELETE failed with status %d",
+ flow->ufdev->name, flow->vnic_idx,
+ flow->flow_id, status);
+ }
+ status = 0;
+ /*
+ * Log the error and fake success to the caller because if
+ * a flow fails to be deleted in the firmware, it is an
+ * unrecoverable error.
+ */
+ } else {
+ usnic_dbg("PF %s VF Idx %u Filter: %u FILTER DELETED",
+ flow->ufdev->name, flow->vnic_idx,
+ flow->flow_id);
+ }
+
+ kfree(flow);
+ return status;
+}
+
+int usnic_fwd_enable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx)
+{
+ int status;
+ struct net_device *pf_netdev;
+ u64 a0, a1;
+
+ pf_netdev = ufdev->netdev;
+ a0 = qp_idx;
+ a1 = CMD_QP_RQWQ;
+
+ status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_ENABLE,
+ &a0, &a1);
+ if (status) {
+ usnic_err("PF %s VNIC Index %u RQ Index: %u ENABLE Failed with status %d",
+ netdev_name(pf_netdev),
+ vnic_idx,
+ qp_idx,
+ status);
+ } else {
+ usnic_dbg("PF %s VNIC Index %u RQ Index: %u ENABLED",
+ netdev_name(pf_netdev),
+ vnic_idx, qp_idx);
+ }
+
+ return status;
+}
+
+int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx)
+{
+ int status;
+ u64 a0, a1;
+ struct net_device *pf_netdev;
+
+ pf_netdev = ufdev->netdev;
+ a0 = qp_idx;
+ a1 = CMD_QP_RQWQ;
+
+ status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_DISABLE,
+ &a0, &a1);
+ if (status) {
+ usnic_err("PF %s VNIC Index %u RQ Index: %u DISABLE Failed with status %d",
+ netdev_name(pf_netdev),
+ vnic_idx,
+ qp_idx,
+ status);
+ } else {
+ usnic_dbg("PF %s VNIC Index %u RQ Index: %u DISABLED",
+ netdev_name(pf_netdev),
+ vnic_idx,
+ qp_idx);
+ }
+
+ return status;
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h
new file mode 100644
index 000000000..f0b71d593
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_fwd.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_FWD_H_
+#define USNIC_FWD_H_
+
+#include <linux/if.h>
+#include <linux/netdevice.h>
+#include <linux/if_ether.h>
+#include <linux/pci.h>
+#include <linux/in.h>
+
+#include "usnic_abi.h"
+#include "usnic_common_pkt_hdr.h"
+#include "vnic_devcmd.h"
+
+struct usnic_fwd_dev {
+ struct pci_dev *pdev;
+ struct net_device *netdev;
+ spinlock_t lock;
+ /*
+ * The following fields can be read directly off the device.
+ * However, they should be set by a accessor function, except name,
+ * which cannot be changed.
+ */
+ bool link_up;
+ char mac[ETH_ALEN];
+ unsigned int mtu;
+ __be32 inaddr;
+ char name[IFNAMSIZ];
+};
+
+struct usnic_fwd_flow {
+ uint32_t flow_id;
+ struct usnic_fwd_dev *ufdev;
+ unsigned int vnic_idx;
+};
+
+struct usnic_filter_action {
+ int vnic_idx;
+ struct filter_action action;
+};
+
+struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev);
+void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev);
+
+void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]);
+void usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr);
+void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev);
+void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev);
+void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev);
+void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu);
+
+/*
+ * Allocate a flow on this forwarding device. Whoever calls this function,
+ * must monitor netdev events on ufdev's netdevice. If NETDEV_REBOOT or
+ * NETDEV_DOWN is seen, flow will no longer function and must be
+ * immediately freed by calling usnic_dealloc_flow.
+ */
+struct usnic_fwd_flow*
+usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter,
+ struct usnic_filter_action *action);
+int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow);
+int usnic_fwd_enable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx);
+int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx);
+
+static inline void usnic_fwd_init_usnic_filter(struct filter *filter,
+ uint32_t usnic_id)
+{
+ filter->type = FILTER_USNIC_ID;
+ filter->u.usnic.ethtype = ETH_P_IBOE;
+ filter->u.usnic.flags = FILTER_FIELD_USNIC_ETHTYPE |
+ FILTER_FIELD_USNIC_ID |
+ FILTER_FIELD_USNIC_PROTO;
+ filter->u.usnic.proto_version = (USNIC_ROCE_GRH_VER <<
+ USNIC_ROCE_GRH_VER_SHIFT) |
+ USNIC_PROTO_VER;
+ filter->u.usnic.usnic_id = usnic_id;
+}
+
+static inline void usnic_fwd_init_udp_filter(struct filter *filter,
+ uint32_t daddr, uint16_t dport)
+{
+ filter->type = FILTER_IPV4_5TUPLE;
+ filter->u.ipv4.flags = FILTER_FIELD_5TUP_PROTO;
+ filter->u.ipv4.protocol = PROTO_UDP;
+
+ if (daddr) {
+ filter->u.ipv4.flags |= FILTER_FIELD_5TUP_DST_AD;
+ filter->u.ipv4.dst_addr = daddr;
+ }
+
+ if (dport) {
+ filter->u.ipv4.flags |= FILTER_FIELD_5TUP_DST_PT;
+ filter->u.ipv4.dst_port = dport;
+ }
+}
+
+#endif /* !USNIC_FWD_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib.h b/drivers/infiniband/hw/usnic/usnic_ib.h
new file mode 100644
index 000000000..525bf2726
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_H_
+#define USNIC_IB_H_
+
+#include <linux/iommu.h>
+#include <linux/netdevice.h>
+
+#include <rdma/ib_verbs.h>
+
+
+#include "usnic.h"
+#include "usnic_abi.h"
+#include "usnic_vnic.h"
+
+#define USNIC_IB_PORT_CNT 1
+#define USNIC_IB_NUM_COMP_VECTORS 1
+
+extern unsigned int usnic_ib_share_vf;
+
+struct usnic_ib_ucontext {
+ struct ib_ucontext ibucontext;
+ /* Protected by usnic_ib_dev->usdev_lock */
+ struct list_head qp_grp_list;
+ struct list_head link;
+};
+
+struct usnic_ib_pd {
+ struct ib_pd ibpd;
+ struct usnic_uiom_pd *umem_pd;
+};
+
+struct usnic_ib_mr {
+ struct ib_mr ibmr;
+ struct usnic_uiom_reg *umem;
+};
+
+struct usnic_ib_dev {
+ struct ib_device ib_dev;
+ struct pci_dev *pdev;
+ struct net_device *netdev;
+ struct usnic_fwd_dev *ufdev;
+ struct list_head ib_dev_link;
+ struct list_head vf_dev_list;
+ struct list_head ctx_list;
+ struct mutex usdev_lock;
+
+ /* provisioning information */
+ struct kref vf_cnt;
+ unsigned int vf_res_cnt[USNIC_VNIC_RES_TYPE_MAX];
+
+ /* sysfs vars for QPN reporting */
+ struct kobject *qpn_kobj;
+};
+
+struct usnic_ib_vf {
+ struct usnic_ib_dev *pf;
+ spinlock_t lock;
+ struct usnic_vnic *vnic;
+ unsigned int qp_grp_ref_cnt;
+ struct usnic_ib_pd *pd;
+ struct list_head link;
+};
+
+static inline
+struct usnic_ib_dev *to_usdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct usnic_ib_dev, ib_dev);
+}
+
+static inline
+struct usnic_ib_ucontext *to_ucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct usnic_ib_ucontext, ibucontext);
+}
+
+static inline
+struct usnic_ib_pd *to_upd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct usnic_ib_pd, ibpd);
+}
+
+static inline
+struct usnic_ib_ucontext *to_uucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct usnic_ib_ucontext, ibucontext);
+}
+
+static inline
+struct usnic_ib_mr *to_umr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct usnic_ib_mr, ibmr);
+}
+void usnic_ib_log_vf(struct usnic_ib_vf *vf);
+
+#define UPDATE_PTR_LEFT(N, P, L) \
+do { \
+ L -= (N); \
+ P += (N); \
+} while (0)
+
+#endif /* USNIC_IB_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
new file mode 100644
index 000000000..f0538a460
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -0,0 +1,715 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Upinder Malhi <umalhi@cisco.com>
+ * Author: Anant Deepak <anadeepa@cisco.com>
+ * Author: Cesare Cantu' <cantuc@cisco.com>
+ * Author: Jeff Squyres <jsquyres@cisco.com>
+ * Author: Kiran Thirumalai <kithirum@cisco.com>
+ * Author: Xuyang Wang <xuywang@cisco.com>
+ * Author: Reese Faucette <rfaucett@cisco.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/inetdevice.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include "usnic_abi.h"
+#include "usnic_common_util.h"
+#include "usnic_ib.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_log.h"
+#include "usnic_fwd.h"
+#include "usnic_debugfs.h"
+#include "usnic_ib_verbs.h"
+#include "usnic_transport.h"
+#include "usnic_uiom.h"
+#include "usnic_ib_sysfs.h"
+
+unsigned int usnic_log_lvl = USNIC_LOG_LVL_ERR;
+unsigned int usnic_ib_share_vf = 1;
+
+static const char usnic_version[] =
+ DRV_NAME ": Cisco VIC (USNIC) Verbs Driver v"
+ DRV_VERSION " (" DRV_RELDATE ")\n";
+
+static DEFINE_MUTEX(usnic_ib_ibdev_list_lock);
+static LIST_HEAD(usnic_ib_ibdev_list);
+
+/* Callback dump funcs */
+static int usnic_ib_dump_vf_hdr(void *obj, char *buf, int buf_sz)
+{
+ struct usnic_ib_vf *vf = obj;
+ return scnprintf(buf, buf_sz, "PF: %s ", vf->pf->ib_dev.name);
+}
+/* End callback dump funcs */
+
+static void usnic_ib_dump_vf(struct usnic_ib_vf *vf, char *buf, int buf_sz)
+{
+ usnic_vnic_dump(vf->vnic, buf, buf_sz, vf,
+ usnic_ib_dump_vf_hdr,
+ usnic_ib_qp_grp_dump_hdr, usnic_ib_qp_grp_dump_rows);
+}
+
+void usnic_ib_log_vf(struct usnic_ib_vf *vf)
+{
+ char buf[1000];
+ usnic_ib_dump_vf(vf, buf, sizeof(buf));
+ usnic_dbg("%s\n", buf);
+}
+
+/* Start of netdev section */
+static void usnic_ib_qp_grp_modify_active_to_err(struct usnic_ib_dev *us_ibdev)
+{
+ struct usnic_ib_ucontext *ctx;
+ struct usnic_ib_qp_grp *qp_grp;
+ enum ib_qp_state cur_state;
+ int status;
+
+ BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock));
+
+ list_for_each_entry(ctx, &us_ibdev->ctx_list, link) {
+ list_for_each_entry(qp_grp, &ctx->qp_grp_list, link) {
+ cur_state = qp_grp->state;
+ if (cur_state == IB_QPS_INIT ||
+ cur_state == IB_QPS_RTR ||
+ cur_state == IB_QPS_RTS) {
+ status = usnic_ib_qp_grp_modify(qp_grp,
+ IB_QPS_ERR,
+ NULL);
+ if (status) {
+ usnic_err("Failed to transistion qp grp %u from %s to %s\n",
+ qp_grp->grp_id,
+ usnic_ib_qp_grp_state_to_string
+ (cur_state),
+ usnic_ib_qp_grp_state_to_string
+ (IB_QPS_ERR));
+ }
+ }
+ }
+ }
+}
+
+static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
+ unsigned long event)
+{
+ struct net_device *netdev;
+ struct ib_event ib_event;
+
+ memset(&ib_event, 0, sizeof(ib_event));
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ netdev = us_ibdev->netdev;
+ switch (event) {
+ case NETDEV_REBOOT:
+ usnic_info("PF Reset on %s\n", us_ibdev->ib_dev.name);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ ib_event.event = IB_EVENT_PORT_ERR;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ break;
+ case NETDEV_UP:
+ case NETDEV_DOWN:
+ case NETDEV_CHANGE:
+ if (!us_ibdev->ufdev->link_up &&
+ netif_carrier_ok(netdev)) {
+ usnic_fwd_carrier_up(us_ibdev->ufdev);
+ usnic_info("Link UP on %s\n", us_ibdev->ib_dev.name);
+ ib_event.event = IB_EVENT_PORT_ACTIVE;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ } else if (us_ibdev->ufdev->link_up &&
+ !netif_carrier_ok(netdev)) {
+ usnic_fwd_carrier_down(us_ibdev->ufdev);
+ usnic_info("Link DOWN on %s\n", us_ibdev->ib_dev.name);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ ib_event.event = IB_EVENT_PORT_ERR;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ } else {
+ usnic_dbg("Ignoring %s on %s\n",
+ netdev_cmd_to_name(event),
+ us_ibdev->ib_dev.name);
+ }
+ break;
+ case NETDEV_CHANGEADDR:
+ if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr,
+ sizeof(us_ibdev->ufdev->mac))) {
+ usnic_dbg("Ignoring addr change on %s\n",
+ us_ibdev->ib_dev.name);
+ } else {
+ usnic_info(" %s old mac: %pM new mac: %pM\n",
+ us_ibdev->ib_dev.name,
+ us_ibdev->ufdev->mac,
+ netdev->dev_addr);
+ usnic_fwd_set_mac(us_ibdev->ufdev, netdev->dev_addr);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ ib_event.event = IB_EVENT_GID_CHANGE;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ }
+
+ break;
+ case NETDEV_CHANGEMTU:
+ if (us_ibdev->ufdev->mtu != netdev->mtu) {
+ usnic_info("MTU Change on %s old: %u new: %u\n",
+ us_ibdev->ib_dev.name,
+ us_ibdev->ufdev->mtu, netdev->mtu);
+ usnic_fwd_set_mtu(us_ibdev->ufdev, netdev->mtu);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ } else {
+ usnic_dbg("Ignoring MTU change on %s\n",
+ us_ibdev->ib_dev.name);
+ }
+ break;
+ default:
+ usnic_dbg("Ignoring event %s on %s",
+ netdev_cmd_to_name(event),
+ us_ibdev->ib_dev.name);
+ }
+ mutex_unlock(&us_ibdev->usdev_lock);
+}
+
+static int usnic_ib_netdevice_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ mutex_lock(&usnic_ib_ibdev_list_lock);
+ list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
+ if (us_ibdev->netdev == netdev) {
+ usnic_ib_handle_usdev_event(us_ibdev, event);
+ break;
+ }
+ }
+ mutex_unlock(&usnic_ib_ibdev_list_lock);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block usnic_ib_netdevice_notifier = {
+ .notifier_call = usnic_ib_netdevice_event
+};
+/* End of netdev section */
+
+/* Start of inet section */
+static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev,
+ unsigned long event, void *ptr)
+{
+ struct in_ifaddr *ifa = ptr;
+ struct ib_event ib_event;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+
+ switch (event) {
+ case NETDEV_DOWN:
+ usnic_info("%s via ip notifiers",
+ netdev_cmd_to_name(event));
+ usnic_fwd_del_ipaddr(us_ibdev->ufdev);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ ib_event.event = IB_EVENT_GID_CHANGE;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ break;
+ case NETDEV_UP:
+ usnic_fwd_add_ipaddr(us_ibdev->ufdev, ifa->ifa_address);
+ usnic_info("%s via ip notifiers: ip %pI4",
+ netdev_cmd_to_name(event),
+ &us_ibdev->ufdev->inaddr);
+ ib_event.event = IB_EVENT_GID_CHANGE;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ break;
+ default:
+ usnic_info("Ignoring event %s on %s",
+ netdev_cmd_to_name(event),
+ us_ibdev->ib_dev.name);
+ }
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return NOTIFY_DONE;
+}
+
+static int usnic_ib_inetaddr_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct usnic_ib_dev *us_ibdev;
+ struct in_ifaddr *ifa = ptr;
+ struct net_device *netdev = ifa->ifa_dev->dev;
+
+ mutex_lock(&usnic_ib_ibdev_list_lock);
+ list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
+ if (us_ibdev->netdev == netdev) {
+ usnic_ib_handle_inet_event(us_ibdev, event, ptr);
+ break;
+ }
+ }
+ mutex_unlock(&usnic_ib_ibdev_list_lock);
+
+ return NOTIFY_DONE;
+}
+static struct notifier_block usnic_ib_inetaddr_notifier = {
+ .notifier_call = usnic_ib_inetaddr_event
+};
+/* End of inet section*/
+
+static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ immutable->core_cap_flags = RDMA_CORE_PORT_USNIC;
+
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+
+ return 0;
+}
+
+static void usnic_get_dev_fw_str(struct ib_device *device, char *str)
+{
+ struct usnic_ib_dev *us_ibdev =
+ container_of(device, struct usnic_ib_dev, ib_dev);
+ struct ethtool_drvinfo info;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version);
+}
+
+/* Start of PF discovery section */
+static void *usnic_ib_device_add(struct pci_dev *dev)
+{
+ struct usnic_ib_dev *us_ibdev;
+ union ib_gid gid;
+ struct in_device *ind;
+ struct net_device *netdev;
+
+ usnic_dbg("\n");
+ netdev = pci_get_drvdata(dev);
+
+ us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev));
+ if (!us_ibdev) {
+ usnic_err("Device %s context alloc failed\n",
+ netdev_name(pci_get_drvdata(dev)));
+ return ERR_PTR(-EFAULT);
+ }
+
+ us_ibdev->ufdev = usnic_fwd_dev_alloc(dev);
+ if (!us_ibdev->ufdev) {
+ usnic_err("Failed to alloc ufdev for %s\n", pci_name(dev));
+ goto err_dealloc;
+ }
+
+ mutex_init(&us_ibdev->usdev_lock);
+ INIT_LIST_HEAD(&us_ibdev->vf_dev_list);
+ INIT_LIST_HEAD(&us_ibdev->ctx_list);
+
+ us_ibdev->pdev = dev;
+ us_ibdev->netdev = pci_get_drvdata(dev);
+ us_ibdev->ib_dev.owner = THIS_MODULE;
+ us_ibdev->ib_dev.node_type = RDMA_NODE_USNIC_UDP;
+ us_ibdev->ib_dev.phys_port_cnt = USNIC_IB_PORT_CNT;
+ us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS;
+ us_ibdev->ib_dev.dev.parent = &dev->dev;
+ us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION;
+ strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX);
+
+ us_ibdev->ib_dev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_OPEN_QP);
+
+ us_ibdev->ib_dev.query_device = usnic_ib_query_device;
+ us_ibdev->ib_dev.query_port = usnic_ib_query_port;
+ us_ibdev->ib_dev.query_pkey = usnic_ib_query_pkey;
+ us_ibdev->ib_dev.query_gid = usnic_ib_query_gid;
+ us_ibdev->ib_dev.get_netdev = usnic_get_netdev;
+ us_ibdev->ib_dev.get_link_layer = usnic_ib_port_link_layer;
+ us_ibdev->ib_dev.alloc_pd = usnic_ib_alloc_pd;
+ us_ibdev->ib_dev.dealloc_pd = usnic_ib_dealloc_pd;
+ us_ibdev->ib_dev.create_qp = usnic_ib_create_qp;
+ us_ibdev->ib_dev.modify_qp = usnic_ib_modify_qp;
+ us_ibdev->ib_dev.query_qp = usnic_ib_query_qp;
+ us_ibdev->ib_dev.destroy_qp = usnic_ib_destroy_qp;
+ us_ibdev->ib_dev.create_cq = usnic_ib_create_cq;
+ us_ibdev->ib_dev.destroy_cq = usnic_ib_destroy_cq;
+ us_ibdev->ib_dev.reg_user_mr = usnic_ib_reg_mr;
+ us_ibdev->ib_dev.dereg_mr = usnic_ib_dereg_mr;
+ us_ibdev->ib_dev.alloc_ucontext = usnic_ib_alloc_ucontext;
+ us_ibdev->ib_dev.dealloc_ucontext = usnic_ib_dealloc_ucontext;
+ us_ibdev->ib_dev.mmap = usnic_ib_mmap;
+ us_ibdev->ib_dev.create_ah = usnic_ib_create_ah;
+ us_ibdev->ib_dev.destroy_ah = usnic_ib_destroy_ah;
+ us_ibdev->ib_dev.post_send = usnic_ib_post_send;
+ us_ibdev->ib_dev.post_recv = usnic_ib_post_recv;
+ us_ibdev->ib_dev.poll_cq = usnic_ib_poll_cq;
+ us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq;
+ us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr;
+ us_ibdev->ib_dev.get_port_immutable = usnic_port_immutable;
+ us_ibdev->ib_dev.get_dev_fw_str = usnic_get_dev_fw_str;
+
+
+ us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC;
+ if (ib_register_device(&us_ibdev->ib_dev, NULL))
+ goto err_fwd_dealloc;
+
+ usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu);
+ usnic_fwd_set_mac(us_ibdev->ufdev, us_ibdev->netdev->dev_addr);
+ if (netif_carrier_ok(us_ibdev->netdev))
+ usnic_fwd_carrier_up(us_ibdev->ufdev);
+
+ ind = in_dev_get(netdev);
+ if (ind->ifa_list)
+ usnic_fwd_add_ipaddr(us_ibdev->ufdev,
+ ind->ifa_list->ifa_address);
+ in_dev_put(ind);
+
+ usnic_mac_ip_to_gid(us_ibdev->netdev->perm_addr,
+ us_ibdev->ufdev->inaddr, &gid.raw[0]);
+ memcpy(&us_ibdev->ib_dev.node_guid, &gid.global.interface_id,
+ sizeof(gid.global.interface_id));
+ kref_init(&us_ibdev->vf_cnt);
+
+ usnic_info("Added ibdev: %s netdev: %s with mac %pM Link: %u MTU: %u\n",
+ us_ibdev->ib_dev.name, netdev_name(us_ibdev->netdev),
+ us_ibdev->ufdev->mac, us_ibdev->ufdev->link_up,
+ us_ibdev->ufdev->mtu);
+ return us_ibdev;
+
+err_fwd_dealloc:
+ usnic_fwd_dev_free(us_ibdev->ufdev);
+err_dealloc:
+ usnic_err("failed -- deallocing device\n");
+ ib_dealloc_device(&us_ibdev->ib_dev);
+ return NULL;
+}
+
+static void usnic_ib_device_remove(struct usnic_ib_dev *us_ibdev)
+{
+ usnic_info("Unregistering %s\n", us_ibdev->ib_dev.name);
+ usnic_ib_sysfs_unregister_usdev(us_ibdev);
+ usnic_fwd_dev_free(us_ibdev->ufdev);
+ ib_unregister_device(&us_ibdev->ib_dev);
+ ib_dealloc_device(&us_ibdev->ib_dev);
+}
+
+static void usnic_ib_undiscover_pf(struct kref *kref)
+{
+ struct usnic_ib_dev *us_ibdev, *tmp;
+ struct pci_dev *dev;
+ bool found = false;
+
+ dev = container_of(kref, struct usnic_ib_dev, vf_cnt)->pdev;
+ mutex_lock(&usnic_ib_ibdev_list_lock);
+ list_for_each_entry_safe(us_ibdev, tmp,
+ &usnic_ib_ibdev_list, ib_dev_link) {
+ if (us_ibdev->pdev == dev) {
+ list_del(&us_ibdev->ib_dev_link);
+ usnic_ib_device_remove(us_ibdev);
+ found = true;
+ break;
+ }
+ }
+
+ WARN(!found, "Failed to remove PF %s\n", pci_name(dev));
+
+ mutex_unlock(&usnic_ib_ibdev_list_lock);
+}
+
+static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic)
+{
+ struct usnic_ib_dev *us_ibdev;
+ struct pci_dev *parent_pci, *vf_pci;
+ int err;
+
+ vf_pci = usnic_vnic_get_pdev(vnic);
+ parent_pci = pci_physfn(vf_pci);
+
+ BUG_ON(!parent_pci);
+
+ mutex_lock(&usnic_ib_ibdev_list_lock);
+ list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
+ if (us_ibdev->pdev == parent_pci) {
+ kref_get(&us_ibdev->vf_cnt);
+ goto out;
+ }
+ }
+
+ us_ibdev = usnic_ib_device_add(parent_pci);
+ if (IS_ERR_OR_NULL(us_ibdev)) {
+ us_ibdev = us_ibdev ? us_ibdev : ERR_PTR(-EFAULT);
+ goto out;
+ }
+
+ err = usnic_ib_sysfs_register_usdev(us_ibdev);
+ if (err) {
+ usnic_ib_device_remove(us_ibdev);
+ us_ibdev = ERR_PTR(err);
+ goto out;
+ }
+
+ list_add(&us_ibdev->ib_dev_link, &usnic_ib_ibdev_list);
+out:
+ mutex_unlock(&usnic_ib_ibdev_list_lock);
+ return us_ibdev;
+}
+/* End of PF discovery section */
+
+/* Start of PCI section */
+
+static const struct pci_device_id usnic_ib_pci_ids[] = {
+ {PCI_DEVICE(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC)},
+ {0,}
+};
+
+static int usnic_ib_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ int err;
+ struct usnic_ib_dev *pf;
+ struct usnic_ib_vf *vf;
+ enum usnic_vnic_res_type res_type;
+
+ vf = kzalloc(sizeof(*vf), GFP_KERNEL);
+ if (!vf)
+ return -ENOMEM;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ usnic_err("Failed to enable %s with err %d\n",
+ pci_name(pdev), err);
+ goto out_clean_vf;
+ }
+
+ err = pci_request_regions(pdev, DRV_NAME);
+ if (err) {
+ usnic_err("Failed to request region for %s with err %d\n",
+ pci_name(pdev), err);
+ goto out_disable_device;
+ }
+
+ pci_set_master(pdev);
+ pci_set_drvdata(pdev, vf);
+
+ vf->vnic = usnic_vnic_alloc(pdev);
+ if (IS_ERR_OR_NULL(vf->vnic)) {
+ err = vf->vnic ? PTR_ERR(vf->vnic) : -ENOMEM;
+ usnic_err("Failed to alloc vnic for %s with err %d\n",
+ pci_name(pdev), err);
+ goto out_release_regions;
+ }
+
+ pf = usnic_ib_discover_pf(vf->vnic);
+ if (IS_ERR_OR_NULL(pf)) {
+ usnic_err("Failed to discover pf of vnic %s with err%ld\n",
+ pci_name(pdev), PTR_ERR(pf));
+ err = pf ? PTR_ERR(pf) : -EFAULT;
+ goto out_clean_vnic;
+ }
+
+ vf->pf = pf;
+ spin_lock_init(&vf->lock);
+ mutex_lock(&pf->usdev_lock);
+ list_add_tail(&vf->link, &pf->vf_dev_list);
+ /*
+ * Save max settings (will be same for each VF, easier to re-write than
+ * to say "if (!set) { set_values(); set=1; }
+ */
+ for (res_type = USNIC_VNIC_RES_TYPE_EOL+1;
+ res_type < USNIC_VNIC_RES_TYPE_MAX;
+ res_type++) {
+ pf->vf_res_cnt[res_type] = usnic_vnic_res_cnt(vf->vnic,
+ res_type);
+ }
+
+ mutex_unlock(&pf->usdev_lock);
+
+ usnic_info("Registering usnic VF %s into PF %s\n", pci_name(pdev),
+ pf->ib_dev.name);
+ usnic_ib_log_vf(vf);
+ return 0;
+
+out_clean_vnic:
+ usnic_vnic_free(vf->vnic);
+out_release_regions:
+ pci_set_drvdata(pdev, NULL);
+ pci_clear_master(pdev);
+ pci_release_regions(pdev);
+out_disable_device:
+ pci_disable_device(pdev);
+out_clean_vf:
+ kfree(vf);
+ return err;
+}
+
+static void usnic_ib_pci_remove(struct pci_dev *pdev)
+{
+ struct usnic_ib_vf *vf = pci_get_drvdata(pdev);
+ struct usnic_ib_dev *pf = vf->pf;
+
+ mutex_lock(&pf->usdev_lock);
+ list_del(&vf->link);
+ mutex_unlock(&pf->usdev_lock);
+
+ kref_put(&pf->vf_cnt, usnic_ib_undiscover_pf);
+ usnic_vnic_free(vf->vnic);
+ pci_set_drvdata(pdev, NULL);
+ pci_clear_master(pdev);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ kfree(vf);
+
+ usnic_info("Removed VF %s\n", pci_name(pdev));
+}
+
+/* PCI driver entry points */
+static struct pci_driver usnic_ib_pci_driver = {
+ .name = DRV_NAME,
+ .id_table = usnic_ib_pci_ids,
+ .probe = usnic_ib_pci_probe,
+ .remove = usnic_ib_pci_remove,
+};
+/* End of PCI section */
+
+/* Start of module section */
+static int __init usnic_ib_init(void)
+{
+ int err;
+
+ printk_once(KERN_INFO "%s", usnic_version);
+
+ err = usnic_uiom_init(DRV_NAME);
+ if (err) {
+ usnic_err("Unable to initalize umem with err %d\n", err);
+ return err;
+ }
+
+ err = pci_register_driver(&usnic_ib_pci_driver);
+ if (err) {
+ usnic_err("Unable to register with PCI\n");
+ goto out_umem_fini;
+ }
+
+ err = register_netdevice_notifier(&usnic_ib_netdevice_notifier);
+ if (err) {
+ usnic_err("Failed to register netdev notifier\n");
+ goto out_pci_unreg;
+ }
+
+ err = register_inetaddr_notifier(&usnic_ib_inetaddr_notifier);
+ if (err) {
+ usnic_err("Failed to register inet addr notifier\n");
+ goto out_unreg_netdev_notifier;
+ }
+
+ err = usnic_transport_init();
+ if (err) {
+ usnic_err("Failed to initialize transport\n");
+ goto out_unreg_inetaddr_notifier;
+ }
+
+ usnic_debugfs_init();
+
+ return 0;
+
+out_unreg_inetaddr_notifier:
+ unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier);
+out_unreg_netdev_notifier:
+ unregister_netdevice_notifier(&usnic_ib_netdevice_notifier);
+out_pci_unreg:
+ pci_unregister_driver(&usnic_ib_pci_driver);
+out_umem_fini:
+ usnic_uiom_fini();
+
+ return err;
+}
+
+static void __exit usnic_ib_destroy(void)
+{
+ usnic_dbg("\n");
+ usnic_debugfs_exit();
+ usnic_transport_fini();
+ unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier);
+ unregister_netdevice_notifier(&usnic_ib_netdevice_notifier);
+ pci_unregister_driver(&usnic_ib_pci_driver);
+ usnic_uiom_fini();
+}
+
+MODULE_DESCRIPTION("Cisco VIC (usNIC) Verbs Driver");
+MODULE_AUTHOR("Upinder Malhi <umalhi@cisco.com>");
+MODULE_LICENSE("Dual BSD/GPL");
+module_param(usnic_log_lvl, uint, S_IRUGO | S_IWUSR);
+module_param(usnic_ib_share_vf, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(usnic_log_lvl, " Off=0, Err=1, Info=2, Debug=3");
+MODULE_PARM_DESC(usnic_ib_share_vf, "Off=0, On=1 VF sharing amongst QPs");
+MODULE_DEVICE_TABLE(pci, usnic_ib_pci_ids);
+
+module_init(usnic_ib_init);
+module_exit(usnic_ib_destroy);
+/* End of module section */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
new file mode 100644
index 000000000..bf5136533
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -0,0 +1,766 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/bug.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+
+#include "usnic_log.h"
+#include "usnic_vnic.h"
+#include "usnic_fwd.h"
+#include "usnic_uiom.h"
+#include "usnic_debugfs.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_ib_sysfs.h"
+#include "usnic_transport.h"
+
+#define DFLT_RQ_IDX 0
+
+const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state)
+{
+ switch (state) {
+ case IB_QPS_RESET:
+ return "Rst";
+ case IB_QPS_INIT:
+ return "Init";
+ case IB_QPS_RTR:
+ return "RTR";
+ case IB_QPS_RTS:
+ return "RTS";
+ case IB_QPS_SQD:
+ return "SQD";
+ case IB_QPS_SQE:
+ return "SQE";
+ case IB_QPS_ERR:
+ return "ERR";
+ default:
+ return "UNKNOWN STATE";
+
+ }
+}
+
+int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz)
+{
+ return scnprintf(buf, buf_sz, "|QPN\t|State\t|PID\t|VF Idx\t|Fil ID");
+}
+
+int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz)
+{
+ struct usnic_ib_qp_grp *qp_grp = obj;
+ struct usnic_ib_qp_grp_flow *default_flow;
+ if (obj) {
+ default_flow = list_first_entry(&qp_grp->flows_lst,
+ struct usnic_ib_qp_grp_flow, link);
+ return scnprintf(buf, buf_sz, "|%d\t|%s\t|%d\t|%hu\t|%d",
+ qp_grp->ibqp.qp_num,
+ usnic_ib_qp_grp_state_to_string(
+ qp_grp->state),
+ qp_grp->owner_pid,
+ usnic_vnic_get_index(qp_grp->vf->vnic),
+ default_flow->flow->flow_id);
+ } else {
+ return scnprintf(buf, buf_sz, "|N/A\t|N/A\t|N/A\t|N/A\t|N/A");
+ }
+}
+
+static struct usnic_vnic_res_chunk *
+get_qp_res_chunk(struct usnic_ib_qp_grp *qp_grp)
+{
+ lockdep_assert_held(&qp_grp->lock);
+ /*
+ * The QP res chunk, used to derive qp indices,
+ * are just indices of the RQs
+ */
+ return usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
+}
+
+static int enable_qp_grp(struct usnic_ib_qp_grp *qp_grp)
+{
+
+ int status;
+ int i, vnic_idx;
+ struct usnic_vnic_res_chunk *res_chunk;
+ struct usnic_vnic_res *res;
+
+ lockdep_assert_held(&qp_grp->lock);
+
+ vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+
+ res_chunk = get_qp_res_chunk(qp_grp);
+ if (IS_ERR(res_chunk)) {
+ usnic_err("Unable to get qp res with err %ld\n",
+ PTR_ERR(res_chunk));
+ return PTR_ERR(res_chunk);
+ }
+
+ for (i = 0; i < res_chunk->cnt; i++) {
+ res = res_chunk->res[i];
+ status = usnic_fwd_enable_qp(qp_grp->ufdev, vnic_idx,
+ res->vnic_idx);
+ if (status) {
+ usnic_err("Failed to enable qp %d of %s:%d\n with err %d\n",
+ res->vnic_idx, qp_grp->ufdev->name,
+ vnic_idx, status);
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ for (i--; i >= 0; i--) {
+ res = res_chunk->res[i];
+ usnic_fwd_disable_qp(qp_grp->ufdev, vnic_idx,
+ res->vnic_idx);
+ }
+
+ return status;
+}
+
+static int disable_qp_grp(struct usnic_ib_qp_grp *qp_grp)
+{
+ int i, vnic_idx;
+ struct usnic_vnic_res_chunk *res_chunk;
+ struct usnic_vnic_res *res;
+ int status = 0;
+
+ lockdep_assert_held(&qp_grp->lock);
+ vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+
+ res_chunk = get_qp_res_chunk(qp_grp);
+ if (IS_ERR(res_chunk)) {
+ usnic_err("Unable to get qp res with err %ld\n",
+ PTR_ERR(res_chunk));
+ return PTR_ERR(res_chunk);
+ }
+
+ for (i = 0; i < res_chunk->cnt; i++) {
+ res = res_chunk->res[i];
+ status = usnic_fwd_disable_qp(qp_grp->ufdev, vnic_idx,
+ res->vnic_idx);
+ if (status) {
+ usnic_err("Failed to disable rq %d of %s:%d\n with err %d\n",
+ res->vnic_idx,
+ qp_grp->ufdev->name,
+ vnic_idx, status);
+ }
+ }
+
+ return status;
+
+}
+
+static int init_filter_action(struct usnic_ib_qp_grp *qp_grp,
+ struct usnic_filter_action *uaction)
+{
+ struct usnic_vnic_res_chunk *res_chunk;
+
+ res_chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
+ if (IS_ERR(res_chunk)) {
+ usnic_err("Unable to get %s with err %ld\n",
+ usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ),
+ PTR_ERR(res_chunk));
+ return PTR_ERR(res_chunk);
+ }
+
+ uaction->vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+ uaction->action.type = FILTER_ACTION_RQ_STEERING;
+ uaction->action.u.rq_idx = res_chunk->res[DFLT_RQ_IDX]->vnic_idx;
+
+ return 0;
+}
+
+static struct usnic_ib_qp_grp_flow*
+create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp,
+ struct usnic_transport_spec *trans_spec)
+{
+ uint16_t port_num;
+ int err;
+ struct filter filter;
+ struct usnic_filter_action uaction;
+ struct usnic_ib_qp_grp_flow *qp_flow;
+ struct usnic_fwd_flow *flow;
+ enum usnic_transport_type trans_type;
+
+ trans_type = trans_spec->trans_type;
+ port_num = trans_spec->usnic_roce.port_num;
+
+ /* Reserve Port */
+ port_num = usnic_transport_rsrv_port(trans_type, port_num);
+ if (port_num == 0)
+ return ERR_PTR(-EINVAL);
+
+ /* Create Flow */
+ usnic_fwd_init_usnic_filter(&filter, port_num);
+ err = init_filter_action(qp_grp, &uaction);
+ if (err)
+ goto out_unreserve_port;
+
+ flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction);
+ if (IS_ERR_OR_NULL(flow)) {
+ err = flow ? PTR_ERR(flow) : -EFAULT;
+ goto out_unreserve_port;
+ }
+
+ /* Create Flow Handle */
+ qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
+ if (!qp_flow) {
+ err = -ENOMEM;
+ goto out_dealloc_flow;
+ }
+ qp_flow->flow = flow;
+ qp_flow->trans_type = trans_type;
+ qp_flow->usnic_roce.port_num = port_num;
+ qp_flow->qp_grp = qp_grp;
+ return qp_flow;
+
+out_dealloc_flow:
+ usnic_fwd_dealloc_flow(flow);
+out_unreserve_port:
+ usnic_transport_unrsrv_port(trans_type, port_num);
+ return ERR_PTR(err);
+}
+
+static void release_roce_custom_flow(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ usnic_fwd_dealloc_flow(qp_flow->flow);
+ usnic_transport_unrsrv_port(qp_flow->trans_type,
+ qp_flow->usnic_roce.port_num);
+ kfree(qp_flow);
+}
+
+static struct usnic_ib_qp_grp_flow*
+create_udp_flow(struct usnic_ib_qp_grp *qp_grp,
+ struct usnic_transport_spec *trans_spec)
+{
+ struct socket *sock;
+ int sock_fd;
+ int err;
+ struct filter filter;
+ struct usnic_filter_action uaction;
+ struct usnic_ib_qp_grp_flow *qp_flow;
+ struct usnic_fwd_flow *flow;
+ enum usnic_transport_type trans_type;
+ uint32_t addr;
+ uint16_t port_num;
+ int proto;
+
+ trans_type = trans_spec->trans_type;
+ sock_fd = trans_spec->udp.sock_fd;
+
+ /* Get and check socket */
+ sock = usnic_transport_get_socket(sock_fd);
+ if (IS_ERR_OR_NULL(sock))
+ return ERR_CAST(sock);
+
+ err = usnic_transport_sock_get_addr(sock, &proto, &addr, &port_num);
+ if (err)
+ goto out_put_sock;
+
+ if (proto != IPPROTO_UDP) {
+ usnic_err("Protocol for fd %d is not UDP", sock_fd);
+ err = -EPERM;
+ goto out_put_sock;
+ }
+
+ /* Create flow */
+ usnic_fwd_init_udp_filter(&filter, addr, port_num);
+ err = init_filter_action(qp_grp, &uaction);
+ if (err)
+ goto out_put_sock;
+
+ flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction);
+ if (IS_ERR_OR_NULL(flow)) {
+ err = flow ? PTR_ERR(flow) : -EFAULT;
+ goto out_put_sock;
+ }
+
+ /* Create qp_flow */
+ qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
+ if (!qp_flow) {
+ err = -ENOMEM;
+ goto out_dealloc_flow;
+ }
+ qp_flow->flow = flow;
+ qp_flow->trans_type = trans_type;
+ qp_flow->udp.sock = sock;
+ qp_flow->qp_grp = qp_grp;
+ return qp_flow;
+
+out_dealloc_flow:
+ usnic_fwd_dealloc_flow(flow);
+out_put_sock:
+ usnic_transport_put_socket(sock);
+ return ERR_PTR(err);
+}
+
+static void release_udp_flow(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ usnic_fwd_dealloc_flow(qp_flow->flow);
+ usnic_transport_put_socket(qp_flow->udp.sock);
+ kfree(qp_flow);
+}
+
+static struct usnic_ib_qp_grp_flow*
+create_and_add_flow(struct usnic_ib_qp_grp *qp_grp,
+ struct usnic_transport_spec *trans_spec)
+{
+ struct usnic_ib_qp_grp_flow *qp_flow;
+ enum usnic_transport_type trans_type;
+
+ trans_type = trans_spec->trans_type;
+ switch (trans_type) {
+ case USNIC_TRANSPORT_ROCE_CUSTOM:
+ qp_flow = create_roce_custom_flow(qp_grp, trans_spec);
+ break;
+ case USNIC_TRANSPORT_IPV4_UDP:
+ qp_flow = create_udp_flow(qp_grp, trans_spec);
+ break;
+ default:
+ usnic_err("Unsupported transport %u\n",
+ trans_spec->trans_type);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!IS_ERR_OR_NULL(qp_flow)) {
+ list_add_tail(&qp_flow->link, &qp_grp->flows_lst);
+ usnic_debugfs_flow_add(qp_flow);
+ }
+
+
+ return qp_flow;
+}
+
+static void release_and_remove_flow(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ usnic_debugfs_flow_remove(qp_flow);
+ list_del(&qp_flow->link);
+
+ switch (qp_flow->trans_type) {
+ case USNIC_TRANSPORT_ROCE_CUSTOM:
+ release_roce_custom_flow(qp_flow);
+ break;
+ case USNIC_TRANSPORT_IPV4_UDP:
+ release_udp_flow(qp_flow);
+ break;
+ default:
+ WARN(1, "Unsupported transport %u\n",
+ qp_flow->trans_type);
+ break;
+ }
+}
+
+static void release_and_remove_all_flows(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct usnic_ib_qp_grp_flow *qp_flow, *tmp;
+ list_for_each_entry_safe(qp_flow, tmp, &qp_grp->flows_lst, link)
+ release_and_remove_flow(qp_flow);
+}
+
+int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
+ enum ib_qp_state new_state,
+ void *data)
+{
+ int status = 0;
+ struct ib_event ib_event;
+ enum ib_qp_state old_state;
+ struct usnic_transport_spec *trans_spec;
+ struct usnic_ib_qp_grp_flow *qp_flow;
+
+ old_state = qp_grp->state;
+ trans_spec = (struct usnic_transport_spec *) data;
+
+ spin_lock(&qp_grp->lock);
+ switch (new_state) {
+ case IB_QPS_RESET:
+ switch (old_state) {
+ case IB_QPS_RESET:
+ /* NO-OP */
+ break;
+ case IB_QPS_INIT:
+ release_and_remove_all_flows(qp_grp);
+ status = 0;
+ break;
+ case IB_QPS_RTR:
+ case IB_QPS_RTS:
+ case IB_QPS_ERR:
+ status = disable_qp_grp(qp_grp);
+ release_and_remove_all_flows(qp_grp);
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_INIT:
+ switch (old_state) {
+ case IB_QPS_RESET:
+ if (trans_spec) {
+ qp_flow = create_and_add_flow(qp_grp,
+ trans_spec);
+ if (IS_ERR_OR_NULL(qp_flow)) {
+ status = qp_flow ? PTR_ERR(qp_flow) : -EFAULT;
+ break;
+ }
+ } else {
+ /*
+ * Optional to specify filters.
+ */
+ status = 0;
+ }
+ break;
+ case IB_QPS_INIT:
+ if (trans_spec) {
+ qp_flow = create_and_add_flow(qp_grp,
+ trans_spec);
+ if (IS_ERR_OR_NULL(qp_flow)) {
+ status = qp_flow ? PTR_ERR(qp_flow) : -EFAULT;
+ break;
+ }
+ } else {
+ /*
+ * Doesn't make sense to go into INIT state
+ * from INIT state w/o adding filters.
+ */
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_RTR:
+ status = disable_qp_grp(qp_grp);
+ break;
+ case IB_QPS_RTS:
+ status = disable_qp_grp(qp_grp);
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_RTR:
+ switch (old_state) {
+ case IB_QPS_INIT:
+ status = enable_qp_grp(qp_grp);
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_RTS:
+ switch (old_state) {
+ case IB_QPS_RTR:
+ /* NO-OP FOR NOW */
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_ERR:
+ ib_event.device = &qp_grp->vf->pf->ib_dev;
+ ib_event.element.qp = &qp_grp->ibqp;
+ ib_event.event = IB_EVENT_QP_FATAL;
+
+ switch (old_state) {
+ case IB_QPS_RESET:
+ qp_grp->ibqp.event_handler(&ib_event,
+ qp_grp->ibqp.qp_context);
+ break;
+ case IB_QPS_INIT:
+ release_and_remove_all_flows(qp_grp);
+ qp_grp->ibqp.event_handler(&ib_event,
+ qp_grp->ibqp.qp_context);
+ break;
+ case IB_QPS_RTR:
+ case IB_QPS_RTS:
+ status = disable_qp_grp(qp_grp);
+ release_and_remove_all_flows(qp_grp);
+ qp_grp->ibqp.event_handler(&ib_event,
+ qp_grp->ibqp.qp_context);
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ default:
+ status = -EINVAL;
+ }
+ spin_unlock(&qp_grp->lock);
+
+ if (!status) {
+ qp_grp->state = new_state;
+ usnic_info("Transitioned %u from %s to %s",
+ qp_grp->grp_id,
+ usnic_ib_qp_grp_state_to_string(old_state),
+ usnic_ib_qp_grp_state_to_string(new_state));
+ } else {
+ usnic_err("Failed to transition %u from %s to %s",
+ qp_grp->grp_id,
+ usnic_ib_qp_grp_state_to_string(old_state),
+ usnic_ib_qp_grp_state_to_string(new_state));
+ }
+
+ return status;
+}
+
+static struct usnic_vnic_res_chunk**
+alloc_res_chunk_list(struct usnic_vnic *vnic,
+ struct usnic_vnic_res_spec *res_spec, void *owner_obj)
+{
+ enum usnic_vnic_res_type res_type;
+ struct usnic_vnic_res_chunk **res_chunk_list;
+ int err, i, res_cnt, res_lst_sz;
+
+ for (res_lst_sz = 0;
+ res_spec->resources[res_lst_sz].type != USNIC_VNIC_RES_TYPE_EOL;
+ res_lst_sz++) {
+ /* Do Nothing */
+ }
+
+ res_chunk_list = kcalloc(res_lst_sz + 1, sizeof(*res_chunk_list),
+ GFP_ATOMIC);
+ if (!res_chunk_list)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; res_spec->resources[i].type != USNIC_VNIC_RES_TYPE_EOL;
+ i++) {
+ res_type = res_spec->resources[i].type;
+ res_cnt = res_spec->resources[i].cnt;
+
+ res_chunk_list[i] = usnic_vnic_get_resources(vnic, res_type,
+ res_cnt, owner_obj);
+ if (IS_ERR_OR_NULL(res_chunk_list[i])) {
+ err = res_chunk_list[i] ?
+ PTR_ERR(res_chunk_list[i]) : -ENOMEM;
+ usnic_err("Failed to get %s from %s with err %d\n",
+ usnic_vnic_res_type_to_str(res_type),
+ usnic_vnic_pci_name(vnic),
+ err);
+ goto out_free_res;
+ }
+ }
+
+ return res_chunk_list;
+
+out_free_res:
+ for (i--; i >= 0; i--)
+ usnic_vnic_put_resources(res_chunk_list[i]);
+ kfree(res_chunk_list);
+ return ERR_PTR(err);
+}
+
+static void free_qp_grp_res(struct usnic_vnic_res_chunk **res_chunk_list)
+{
+ int i;
+ for (i = 0; res_chunk_list[i]; i++)
+ usnic_vnic_put_resources(res_chunk_list[i]);
+ kfree(res_chunk_list);
+}
+
+static int qp_grp_and_vf_bind(struct usnic_ib_vf *vf,
+ struct usnic_ib_pd *pd,
+ struct usnic_ib_qp_grp *qp_grp)
+{
+ int err;
+ struct pci_dev *pdev;
+
+ lockdep_assert_held(&vf->lock);
+
+ pdev = usnic_vnic_get_pdev(vf->vnic);
+ if (vf->qp_grp_ref_cnt == 0) {
+ err = usnic_uiom_attach_dev_to_pd(pd->umem_pd, &pdev->dev);
+ if (err) {
+ usnic_err("Failed to attach %s to domain\n",
+ pci_name(pdev));
+ return err;
+ }
+ vf->pd = pd;
+ }
+ vf->qp_grp_ref_cnt++;
+
+ WARN_ON(vf->pd != pd);
+ qp_grp->vf = vf;
+
+ return 0;
+}
+
+static void qp_grp_and_vf_unbind(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct pci_dev *pdev;
+ struct usnic_ib_pd *pd;
+
+ lockdep_assert_held(&qp_grp->vf->lock);
+
+ pd = qp_grp->vf->pd;
+ pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic);
+ if (--qp_grp->vf->qp_grp_ref_cnt == 0) {
+ qp_grp->vf->pd = NULL;
+ usnic_uiom_detach_dev_from_pd(pd->umem_pd, &pdev->dev);
+ }
+ qp_grp->vf = NULL;
+}
+
+static void log_spec(struct usnic_vnic_res_spec *res_spec)
+{
+ char buf[512];
+ usnic_vnic_spec_dump(buf, sizeof(buf), res_spec);
+ usnic_dbg("%s\n", buf);
+}
+
+static int qp_grp_id_from_flow(struct usnic_ib_qp_grp_flow *qp_flow,
+ uint32_t *id)
+{
+ enum usnic_transport_type trans_type = qp_flow->trans_type;
+ int err;
+ uint16_t port_num = 0;
+
+ switch (trans_type) {
+ case USNIC_TRANSPORT_ROCE_CUSTOM:
+ *id = qp_flow->usnic_roce.port_num;
+ break;
+ case USNIC_TRANSPORT_IPV4_UDP:
+ err = usnic_transport_sock_get_addr(qp_flow->udp.sock,
+ NULL, NULL,
+ &port_num);
+ if (err)
+ return err;
+ /*
+ * Copy port_num to stack first and then to *id,
+ * so that the short to int cast works for little
+ * and big endian systems.
+ */
+ *id = port_num;
+ break;
+ default:
+ usnic_err("Unsupported transport %u\n", trans_type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+struct usnic_ib_qp_grp *
+usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
+ struct usnic_ib_pd *pd,
+ struct usnic_vnic_res_spec *res_spec,
+ struct usnic_transport_spec *transport_spec)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ int err;
+ enum usnic_transport_type transport = transport_spec->trans_type;
+ struct usnic_ib_qp_grp_flow *qp_flow;
+
+ lockdep_assert_held(&vf->lock);
+
+ err = usnic_vnic_res_spec_satisfied(&min_transport_spec[transport],
+ res_spec);
+ if (err) {
+ usnic_err("Spec does not meet miniumum req for transport %d\n",
+ transport);
+ log_spec(res_spec);
+ return ERR_PTR(err);
+ }
+
+ qp_grp = kzalloc(sizeof(*qp_grp), GFP_ATOMIC);
+ if (!qp_grp)
+ return NULL;
+
+ qp_grp->res_chunk_list = alloc_res_chunk_list(vf->vnic, res_spec,
+ qp_grp);
+ if (IS_ERR_OR_NULL(qp_grp->res_chunk_list)) {
+ err = qp_grp->res_chunk_list ?
+ PTR_ERR(qp_grp->res_chunk_list) : -ENOMEM;
+ goto out_free_qp_grp;
+ }
+
+ err = qp_grp_and_vf_bind(vf, pd, qp_grp);
+ if (err)
+ goto out_free_res;
+
+ INIT_LIST_HEAD(&qp_grp->flows_lst);
+ spin_lock_init(&qp_grp->lock);
+ qp_grp->ufdev = ufdev;
+ qp_grp->state = IB_QPS_RESET;
+ qp_grp->owner_pid = current->pid;
+
+ qp_flow = create_and_add_flow(qp_grp, transport_spec);
+ if (IS_ERR_OR_NULL(qp_flow)) {
+ usnic_err("Unable to create and add flow with err %ld\n",
+ PTR_ERR(qp_flow));
+ err = qp_flow ? PTR_ERR(qp_flow) : -EFAULT;
+ goto out_qp_grp_vf_unbind;
+ }
+
+ err = qp_grp_id_from_flow(qp_flow, &qp_grp->grp_id);
+ if (err)
+ goto out_release_flow;
+ qp_grp->ibqp.qp_num = qp_grp->grp_id;
+
+ usnic_ib_sysfs_qpn_add(qp_grp);
+
+ return qp_grp;
+
+out_release_flow:
+ release_and_remove_flow(qp_flow);
+out_qp_grp_vf_unbind:
+ qp_grp_and_vf_unbind(qp_grp);
+out_free_res:
+ free_qp_grp_res(qp_grp->res_chunk_list);
+out_free_qp_grp:
+ kfree(qp_grp);
+
+ return ERR_PTR(err);
+}
+
+void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
+{
+
+ WARN_ON(qp_grp->state != IB_QPS_RESET);
+ lockdep_assert_held(&qp_grp->vf->lock);
+
+ release_and_remove_all_flows(qp_grp);
+ usnic_ib_sysfs_qpn_remove(qp_grp);
+ qp_grp_and_vf_unbind(qp_grp);
+ free_qp_grp_res(qp_grp->res_chunk_list);
+ kfree(qp_grp);
+}
+
+struct usnic_vnic_res_chunk*
+usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp,
+ enum usnic_vnic_res_type res_type)
+{
+ int i;
+
+ for (i = 0; qp_grp->res_chunk_list[i]; i++) {
+ if (qp_grp->res_chunk_list[i]->type == res_type)
+ return qp_grp->res_chunk_list[i];
+ }
+
+ return ERR_PTR(-EINVAL);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h
new file mode 100644
index 000000000..a8a2314c9
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_QP_GRP_H_
+#define USNIC_IB_QP_GRP_H_
+
+#include <linux/debugfs.h>
+#include <rdma/ib_verbs.h>
+
+#include "usnic_ib.h"
+#include "usnic_abi.h"
+#include "usnic_fwd.h"
+#include "usnic_vnic.h"
+
+/*
+ * The qp group struct represents all the hw resources needed to present a ib_qp
+ */
+struct usnic_ib_qp_grp {
+ struct ib_qp ibqp;
+ enum ib_qp_state state;
+ int grp_id;
+
+ struct usnic_fwd_dev *ufdev;
+ struct usnic_ib_ucontext *ctx;
+ struct list_head flows_lst;
+
+ struct usnic_vnic_res_chunk **res_chunk_list;
+
+ pid_t owner_pid;
+ struct usnic_ib_vf *vf;
+ struct list_head link;
+
+ spinlock_t lock;
+
+ struct kobject kobj;
+};
+
+struct usnic_ib_qp_grp_flow {
+ struct usnic_fwd_flow *flow;
+ enum usnic_transport_type trans_type;
+ union {
+ struct {
+ uint16_t port_num;
+ } usnic_roce;
+ struct {
+ struct socket *sock;
+ } udp;
+ };
+ struct usnic_ib_qp_grp *qp_grp;
+ struct list_head link;
+
+ /* Debug FS */
+ struct dentry *dbgfs_dentry;
+ char dentry_name[32];
+};
+
+extern const struct usnic_vnic_res_spec min_transport_spec[USNIC_TRANSPORT_MAX];
+
+const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state);
+int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz);
+int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz);
+struct usnic_ib_qp_grp *
+usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
+ struct usnic_ib_pd *pd,
+ struct usnic_vnic_res_spec *res_spec,
+ struct usnic_transport_spec *trans_spec);
+void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp);
+int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
+ enum ib_qp_state new_state,
+ void *data);
+struct usnic_vnic_res_chunk
+*usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp,
+ enum usnic_vnic_res_type type);
+static inline
+struct usnic_ib_qp_grp *to_uqp_grp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct usnic_ib_qp_grp, ibqp);
+}
+#endif /* USNIC_IB_QP_GRP_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
new file mode 100644
index 000000000..4210ca140
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -0,0 +1,340 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include "usnic_common_util.h"
+#include "usnic_ib.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_vnic.h"
+#include "usnic_ib_verbs.h"
+#include "usnic_ib_sysfs.h"
+#include "usnic_log.h"
+
+static ssize_t usnic_ib_show_board(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev =
+ container_of(device, struct usnic_ib_dev, ib_dev.dev);
+ unsigned short subsystem_device_id;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ subsystem_device_id = us_ibdev->pdev->subsystem_device;
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id);
+}
+
+/*
+ * Report the configuration for this PF
+ */
+static ssize_t
+usnic_ib_show_config(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+ char *ptr;
+ unsigned left;
+ unsigned n;
+ enum usnic_vnic_res_type res_type;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+ /* Buffer space limit is 1 page */
+ ptr = buf;
+ left = PAGE_SIZE;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ if (kref_read(&us_ibdev->vf_cnt) > 0) {
+ char *busname;
+
+ /*
+ * bus name seems to come with annoying prefix.
+ * Remove it if it is predictable
+ */
+ busname = us_ibdev->pdev->bus->name;
+ if (strncmp(busname, "PCI Bus ", 8) == 0)
+ busname += 8;
+
+ n = scnprintf(ptr, left,
+ "%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:",
+ us_ibdev->ib_dev.name,
+ busname,
+ PCI_SLOT(us_ibdev->pdev->devfn),
+ PCI_FUNC(us_ibdev->pdev->devfn),
+ netdev_name(us_ibdev->netdev),
+ us_ibdev->ufdev->mac,
+ kref_read(&us_ibdev->vf_cnt));
+ UPDATE_PTR_LEFT(n, ptr, left);
+
+ for (res_type = USNIC_VNIC_RES_TYPE_EOL;
+ res_type < USNIC_VNIC_RES_TYPE_MAX;
+ res_type++) {
+ if (us_ibdev->vf_res_cnt[res_type] == 0)
+ continue;
+ n = scnprintf(ptr, left, " %d %s%s",
+ us_ibdev->vf_res_cnt[res_type],
+ usnic_vnic_res_type_to_str(res_type),
+ (res_type < (USNIC_VNIC_RES_TYPE_MAX - 1)) ?
+ "," : "");
+ UPDATE_PTR_LEFT(n, ptr, left);
+ }
+ n = scnprintf(ptr, left, "\n");
+ UPDATE_PTR_LEFT(n, ptr, left);
+ } else {
+ n = scnprintf(ptr, left, "%s: no VFs\n",
+ us_ibdev->ib_dev.name);
+ UPDATE_PTR_LEFT(n, ptr, left);
+ }
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return ptr - buf;
+}
+
+static ssize_t
+usnic_ib_show_iface(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n",
+ netdev_name(us_ibdev->netdev));
+}
+
+static ssize_t
+usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ kref_read(&us_ibdev->vf_cnt));
+}
+
+static ssize_t
+usnic_ib_show_qp_per_vf(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+ int qp_per_vf;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+ qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
+ us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
+
+ return scnprintf(buf, PAGE_SIZE,
+ "%d\n", qp_per_vf);
+}
+
+static ssize_t
+usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n",
+ us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]);
+}
+
+static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL);
+static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL);
+static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL);
+static DEVICE_ATTR(max_vf, S_IRUGO, usnic_ib_show_max_vf, NULL);
+static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL);
+static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL);
+
+static struct device_attribute *usnic_class_attributes[] = {
+ &dev_attr_board_id,
+ &dev_attr_config,
+ &dev_attr_iface,
+ &dev_attr_max_vf,
+ &dev_attr_qp_per_vf,
+ &dev_attr_cq_per_vf,
+};
+
+struct qpn_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct usnic_ib_qp_grp *, char *buf);
+};
+
+/*
+ * Definitions for supporting QPN entries in sysfs
+ */
+static ssize_t
+usnic_ib_qpn_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ struct qpn_attribute *qpn_attr;
+
+ qp_grp = container_of(kobj, struct usnic_ib_qp_grp, kobj);
+ qpn_attr = container_of(attr, struct qpn_attribute, attr);
+
+ return qpn_attr->show(qp_grp, buf);
+}
+
+static const struct sysfs_ops usnic_ib_qpn_sysfs_ops = {
+ .show = usnic_ib_qpn_attr_show
+};
+
+#define QPN_ATTR_RO(NAME) \
+struct qpn_attribute qpn_attr_##NAME = __ATTR_RO(NAME)
+
+static ssize_t context_show(struct usnic_ib_qp_grp *qp_grp, char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "0x%p\n", qp_grp->ctx);
+}
+
+static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf)
+{
+ int i, j, n;
+ int left;
+ char *ptr;
+ struct usnic_vnic_res_chunk *res_chunk;
+ struct usnic_vnic_res *vnic_res;
+
+ left = PAGE_SIZE;
+ ptr = buf;
+
+ n = scnprintf(ptr, left,
+ "QPN: %d State: (%s) PID: %u VF Idx: %hu ",
+ qp_grp->ibqp.qp_num,
+ usnic_ib_qp_grp_state_to_string(qp_grp->state),
+ qp_grp->owner_pid,
+ usnic_vnic_get_index(qp_grp->vf->vnic));
+ UPDATE_PTR_LEFT(n, ptr, left);
+
+ for (i = 0; qp_grp->res_chunk_list[i]; i++) {
+ res_chunk = qp_grp->res_chunk_list[i];
+ for (j = 0; j < res_chunk->cnt; j++) {
+ vnic_res = res_chunk->res[j];
+ n = scnprintf(ptr, left, "%s[%d] ",
+ usnic_vnic_res_type_to_str(vnic_res->type),
+ vnic_res->vnic_idx);
+ UPDATE_PTR_LEFT(n, ptr, left);
+ }
+ }
+
+ n = scnprintf(ptr, left, "\n");
+ UPDATE_PTR_LEFT(n, ptr, left);
+
+ return ptr - buf;
+}
+
+static QPN_ATTR_RO(context);
+static QPN_ATTR_RO(summary);
+
+static struct attribute *usnic_ib_qpn_default_attrs[] = {
+ &qpn_attr_context.attr,
+ &qpn_attr_summary.attr,
+ NULL
+};
+
+static struct kobj_type usnic_ib_qpn_type = {
+ .sysfs_ops = &usnic_ib_qpn_sysfs_ops,
+ .default_attrs = usnic_ib_qpn_default_attrs
+};
+
+int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev)
+{
+ int i;
+ int err;
+ for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
+ err = device_create_file(&us_ibdev->ib_dev.dev,
+ usnic_class_attributes[i]);
+ if (err) {
+ usnic_err("Failed to create device file %d for %s eith err %d",
+ i, us_ibdev->ib_dev.name, err);
+ return -EINVAL;
+ }
+ }
+
+ /* create kernel object for looking at individual QPs */
+ kobject_get(&us_ibdev->ib_dev.dev.kobj);
+ us_ibdev->qpn_kobj = kobject_create_and_add("qpn",
+ &us_ibdev->ib_dev.dev.kobj);
+ if (us_ibdev->qpn_kobj == NULL) {
+ kobject_put(&us_ibdev->ib_dev.dev.kobj);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev)
+{
+ int i;
+ for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
+ device_remove_file(&us_ibdev->ib_dev.dev,
+ usnic_class_attributes[i]);
+ }
+
+ kobject_put(us_ibdev->qpn_kobj);
+}
+
+void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct usnic_ib_dev *us_ibdev;
+ int err;
+
+ us_ibdev = qp_grp->vf->pf;
+
+ err = kobject_init_and_add(&qp_grp->kobj, &usnic_ib_qpn_type,
+ kobject_get(us_ibdev->qpn_kobj),
+ "%d", qp_grp->grp_id);
+ if (err) {
+ kobject_put(us_ibdev->qpn_kobj);
+ return;
+ }
+}
+
+void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ us_ibdev = qp_grp->vf->pf;
+
+ kobject_put(&qp_grp->kobj);
+ kobject_put(us_ibdev->qpn_kobj);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
new file mode 100644
index 000000000..3d98e16cf
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_SYSFS_H_
+#define USNIC_IB_SYSFS_H_
+
+#include "usnic_ib.h"
+
+int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev);
+void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev);
+void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp);
+void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp);
+
+#endif /* !USNIC_IB_SYSFS_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
new file mode 100644
index 000000000..e6c11b5a1
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -0,0 +1,816 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include "usnic_abi.h"
+#include "usnic_ib.h"
+#include "usnic_common_util.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_ib_verbs.h"
+#include "usnic_fwd.h"
+#include "usnic_log.h"
+#include "usnic_uiom.h"
+#include "usnic_transport.h"
+
+#define USNIC_DEFAULT_TRANSPORT USNIC_TRANSPORT_ROCE_CUSTOM
+
+const struct usnic_vnic_res_spec min_transport_spec[USNIC_TRANSPORT_MAX] = {
+ { /*USNIC_TRANSPORT_UNKNOWN*/
+ .resources = {
+ {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
+ },
+ },
+ { /*USNIC_TRANSPORT_ROCE_CUSTOM*/
+ .resources = {
+ {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
+ },
+ },
+ { /*USNIC_TRANSPORT_IPV4_UDP*/
+ .resources = {
+ {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
+ },
+ },
+};
+
+static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver)
+{
+ *fw_ver = *((u64 *)fw_ver_str);
+}
+
+static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
+ struct ib_udata *udata)
+{
+ struct usnic_ib_dev *us_ibdev;
+ struct usnic_ib_create_qp_resp resp;
+ struct pci_dev *pdev;
+ struct vnic_dev_bar *bar;
+ struct usnic_vnic_res_chunk *chunk;
+ struct usnic_ib_qp_grp_flow *default_flow;
+ int i, err;
+
+ memset(&resp, 0, sizeof(resp));
+
+ us_ibdev = qp_grp->vf->pf;
+ pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic);
+ if (!pdev) {
+ usnic_err("Failed to get pdev of qp_grp %d\n",
+ qp_grp->grp_id);
+ return -EFAULT;
+ }
+
+ bar = usnic_vnic_get_bar(qp_grp->vf->vnic, 0);
+ if (!bar) {
+ usnic_err("Failed to get bar0 of qp_grp %d vf %s",
+ qp_grp->grp_id, pci_name(pdev));
+ return -EFAULT;
+ }
+
+ resp.vfid = usnic_vnic_get_index(qp_grp->vf->vnic);
+ resp.bar_bus_addr = bar->bus_addr;
+ resp.bar_len = bar->len;
+
+ chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
+ if (IS_ERR(chunk)) {
+ usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
+ usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ),
+ qp_grp->grp_id,
+ PTR_ERR(chunk));
+ return PTR_ERR(chunk);
+ }
+
+ WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_RQ);
+ resp.rq_cnt = chunk->cnt;
+ for (i = 0; i < chunk->cnt; i++)
+ resp.rq_idx[i] = chunk->res[i]->vnic_idx;
+
+ chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_WQ);
+ if (IS_ERR(chunk)) {
+ usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
+ usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_WQ),
+ qp_grp->grp_id,
+ PTR_ERR(chunk));
+ return PTR_ERR(chunk);
+ }
+
+ WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_WQ);
+ resp.wq_cnt = chunk->cnt;
+ for (i = 0; i < chunk->cnt; i++)
+ resp.wq_idx[i] = chunk->res[i]->vnic_idx;
+
+ chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_CQ);
+ if (IS_ERR(chunk)) {
+ usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
+ usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_CQ),
+ qp_grp->grp_id,
+ PTR_ERR(chunk));
+ return PTR_ERR(chunk);
+ }
+
+ WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_CQ);
+ resp.cq_cnt = chunk->cnt;
+ for (i = 0; i < chunk->cnt; i++)
+ resp.cq_idx[i] = chunk->res[i]->vnic_idx;
+
+ default_flow = list_first_entry(&qp_grp->flows_lst,
+ struct usnic_ib_qp_grp_flow, link);
+ resp.transport = default_flow->trans_type;
+
+ err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (err) {
+ usnic_err("Failed to copy udata for %s", us_ibdev->ib_dev.name);
+ return err;
+ }
+
+ return 0;
+}
+
+static struct usnic_ib_qp_grp*
+find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
+ struct usnic_ib_pd *pd,
+ struct usnic_transport_spec *trans_spec,
+ struct usnic_vnic_res_spec *res_spec)
+{
+ struct usnic_ib_vf *vf;
+ struct usnic_vnic *vnic;
+ struct usnic_ib_qp_grp *qp_grp;
+ struct device *dev, **dev_list;
+ int i;
+
+ BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock));
+
+ if (list_empty(&us_ibdev->vf_dev_list)) {
+ usnic_info("No vfs to allocate\n");
+ return NULL;
+ }
+
+ if (usnic_ib_share_vf) {
+ /* Try to find resouces on a used vf which is in pd */
+ dev_list = usnic_uiom_get_dev_list(pd->umem_pd);
+ if (IS_ERR(dev_list))
+ return ERR_CAST(dev_list);
+ for (i = 0; dev_list[i]; i++) {
+ dev = dev_list[i];
+ vf = pci_get_drvdata(to_pci_dev(dev));
+ spin_lock(&vf->lock);
+ vnic = vf->vnic;
+ if (!usnic_vnic_check_room(vnic, res_spec)) {
+ usnic_dbg("Found used vnic %s from %s\n",
+ us_ibdev->ib_dev.name,
+ pci_name(usnic_vnic_get_pdev(
+ vnic)));
+ qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev,
+ vf, pd,
+ res_spec,
+ trans_spec);
+
+ spin_unlock(&vf->lock);
+ goto qp_grp_check;
+ }
+ spin_unlock(&vf->lock);
+
+ }
+ usnic_uiom_free_dev_list(dev_list);
+ dev_list = NULL;
+ }
+
+ /* Try to find resources on an unused vf */
+ list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) {
+ spin_lock(&vf->lock);
+ vnic = vf->vnic;
+ if (vf->qp_grp_ref_cnt == 0 &&
+ usnic_vnic_check_room(vnic, res_spec) == 0) {
+ qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, vf,
+ pd, res_spec,
+ trans_spec);
+
+ spin_unlock(&vf->lock);
+ goto qp_grp_check;
+ }
+ spin_unlock(&vf->lock);
+ }
+
+ usnic_info("No free qp grp found on %s\n", us_ibdev->ib_dev.name);
+ return ERR_PTR(-ENOMEM);
+
+qp_grp_check:
+ if (IS_ERR_OR_NULL(qp_grp)) {
+ usnic_err("Failed to allocate qp_grp\n");
+ if (usnic_ib_share_vf)
+ usnic_uiom_free_dev_list(dev_list);
+ return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM);
+ }
+ return qp_grp;
+}
+
+static void qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct usnic_ib_vf *vf = qp_grp->vf;
+
+ WARN_ON(qp_grp->state != IB_QPS_RESET);
+
+ spin_lock(&vf->lock);
+ usnic_ib_qp_grp_destroy(qp_grp);
+ spin_unlock(&vf->lock);
+}
+
+static int create_qp_validate_user_data(struct usnic_ib_create_qp_cmd cmd)
+{
+ if (cmd.spec.trans_type <= USNIC_TRANSPORT_UNKNOWN ||
+ cmd.spec.trans_type >= USNIC_TRANSPORT_MAX)
+ return -EINVAL;
+
+ return 0;
+}
+
+/* Start of ib callback functions */
+
+enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device,
+ u8 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+int usnic_ib_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props,
+ struct ib_udata *uhw)
+{
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+ union ib_gid gid;
+ struct ethtool_drvinfo info;
+ int qp_per_vf;
+
+ usnic_dbg("\n");
+ if (uhw->inlen || uhw->outlen)
+ return -EINVAL;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
+ memset(props, 0, sizeof(*props));
+ usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr,
+ &gid.raw[0]);
+ memcpy(&props->sys_image_guid, &gid.global.interface_id,
+ sizeof(gid.global.interface_id));
+ usnic_ib_fw_string_to_u64(&info.fw_version[0], &props->fw_ver);
+ props->max_mr_size = USNIC_UIOM_MAX_MR_SIZE;
+ props->page_size_cap = USNIC_UIOM_PAGE_SIZE;
+ props->vendor_id = PCI_VENDOR_ID_CISCO;
+ props->vendor_part_id = PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC;
+ props->hw_ver = us_ibdev->pdev->subsystem_device;
+ qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
+ us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
+ props->max_qp = qp_per_vf *
+ kref_read(&us_ibdev->vf_cnt);
+ props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
+ IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] *
+ kref_read(&us_ibdev->vf_cnt);
+ props->max_pd = USNIC_UIOM_MAX_PD_CNT;
+ props->max_mr = USNIC_UIOM_MAX_MR_CNT;
+ props->local_ca_ack_delay = 0;
+ props->max_pkeys = 0;
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->masked_atomic_cap = props->atomic_cap;
+ props->max_qp_rd_atom = 0;
+ props->max_qp_init_rd_atom = 0;
+ props->max_res_rd_atom = 0;
+ props->max_srq = 0;
+ props->max_srq_wr = 0;
+ props->max_srq_sge = 0;
+ props->max_fast_reg_page_list_len = 0;
+ props->max_mcast_grp = 0;
+ props->max_mcast_qp_attach = 0;
+ props->max_total_mcast_qp_attach = 0;
+ props->max_map_per_fmr = 0;
+ /* Owned by Userspace
+ * max_qp_wr, max_sge, max_sge_rd, max_cqe */
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return 0;
+}
+
+int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+
+ usnic_dbg("\n");
+
+ if (ib_get_eth_speed(ibdev, port, &props->active_speed,
+ &props->active_width))
+ return -EINVAL;
+
+ /*
+ * usdev_lock is acquired after (and not before) ib_get_eth_speed call
+ * because acquiring rtnl_lock in ib_get_eth_speed, while holding
+ * usdev_lock could lead to a deadlock.
+ */
+ mutex_lock(&us_ibdev->usdev_lock);
+ /* props being zeroed by the caller, avoid zeroing it here */
+
+ props->lid = 0;
+ props->lmc = 1;
+ props->sm_lid = 0;
+ props->sm_sl = 0;
+
+ if (!us_ibdev->ufdev->link_up) {
+ props->state = IB_PORT_DOWN;
+ props->phys_state = 3;
+ } else if (!us_ibdev->ufdev->inaddr) {
+ props->state = IB_PORT_INIT;
+ props->phys_state = 4;
+ } else {
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = 5;
+ }
+
+ props->port_cap_flags = 0;
+ props->gid_tbl_len = 1;
+ props->pkey_tbl_len = 1;
+ props->bad_pkey_cntr = 0;
+ props->qkey_viol_cntr = 0;
+ props->max_mtu = IB_MTU_4096;
+ props->active_mtu = iboe_get_mtu(us_ibdev->ufdev->mtu);
+ /* Userspace will adjust for hdrs */
+ props->max_msg_sz = us_ibdev->ufdev->mtu;
+ props->max_vl_num = 1;
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return 0;
+}
+
+int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ struct usnic_ib_vf *vf;
+ int err;
+
+ usnic_dbg("\n");
+
+ memset(qp_attr, 0, sizeof(*qp_attr));
+ memset(qp_init_attr, 0, sizeof(*qp_init_attr));
+
+ qp_grp = to_uqp_grp(qp);
+ vf = qp_grp->vf;
+ mutex_lock(&vf->pf->usdev_lock);
+ usnic_dbg("\n");
+ qp_attr->qp_state = qp_grp->state;
+ qp_attr->cur_qp_state = qp_grp->state;
+
+ switch (qp_grp->ibqp.qp_type) {
+ case IB_QPT_UD:
+ qp_attr->qkey = 0;
+ break;
+ default:
+ usnic_err("Unexpected qp_type %d\n", qp_grp->ibqp.qp_type);
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ mutex_unlock(&vf->pf->usdev_lock);
+ return 0;
+
+err_out:
+ mutex_unlock(&vf->pf->usdev_lock);
+ return err;
+}
+
+int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid)
+{
+
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+ usnic_dbg("\n");
+
+ if (index > 1)
+ return -EINVAL;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+ usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr,
+ &gid->raw[0]);
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return 0;
+}
+
+struct net_device *usnic_get_netdev(struct ib_device *device, u8 port_num)
+{
+ struct usnic_ib_dev *us_ibdev = to_usdev(device);
+
+ if (us_ibdev->netdev)
+ dev_hold(us_ibdev->netdev);
+
+ return us_ibdev->netdev;
+}
+
+int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey)
+{
+ if (index > 0)
+ return -EINVAL;
+
+ *pkey = 0xffff;
+ return 0;
+}
+
+struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct usnic_ib_pd *pd;
+ void *umem_pd;
+
+ usnic_dbg("\n");
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ umem_pd = pd->umem_pd = usnic_uiom_alloc_pd();
+ if (IS_ERR_OR_NULL(umem_pd)) {
+ kfree(pd);
+ return ERR_PTR(umem_pd ? PTR_ERR(umem_pd) : -ENOMEM);
+ }
+
+ usnic_info("domain 0x%p allocated for context 0x%p and device %s\n",
+ pd, context, ibdev->name);
+ return &pd->ibpd;
+}
+
+int usnic_ib_dealloc_pd(struct ib_pd *pd)
+{
+ usnic_info("freeing domain 0x%p\n", pd);
+
+ usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd);
+ kfree(pd);
+ return 0;
+}
+
+struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ int err;
+ struct usnic_ib_dev *us_ibdev;
+ struct usnic_ib_qp_grp *qp_grp;
+ struct usnic_ib_ucontext *ucontext;
+ int cq_cnt;
+ struct usnic_vnic_res_spec res_spec;
+ struct usnic_ib_create_qp_cmd cmd;
+ struct usnic_transport_spec trans_spec;
+
+ usnic_dbg("\n");
+
+ ucontext = to_uucontext(pd->uobject->context);
+ us_ibdev = to_usdev(pd->device);
+
+ if (init_attr->create_flags)
+ return ERR_PTR(-EINVAL);
+
+ err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
+ if (err) {
+ usnic_err("%s: cannot copy udata for create_qp\n",
+ us_ibdev->ib_dev.name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ err = create_qp_validate_user_data(cmd);
+ if (err) {
+ usnic_err("%s: Failed to validate user data\n",
+ us_ibdev->ib_dev.name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (init_attr->qp_type != IB_QPT_UD) {
+ usnic_err("%s asked to make a non-UD QP: %d\n",
+ us_ibdev->ib_dev.name, init_attr->qp_type);
+ return ERR_PTR(-EINVAL);
+ }
+
+ trans_spec = cmd.spec;
+ mutex_lock(&us_ibdev->usdev_lock);
+ cq_cnt = (init_attr->send_cq == init_attr->recv_cq) ? 1 : 2;
+ res_spec = min_transport_spec[trans_spec.trans_type];
+ usnic_vnic_res_spec_update(&res_spec, USNIC_VNIC_RES_TYPE_CQ, cq_cnt);
+ qp_grp = find_free_vf_and_create_qp_grp(us_ibdev, to_upd(pd),
+ &trans_spec,
+ &res_spec);
+ if (IS_ERR_OR_NULL(qp_grp)) {
+ err = qp_grp ? PTR_ERR(qp_grp) : -ENOMEM;
+ goto out_release_mutex;
+ }
+
+ err = usnic_ib_fill_create_qp_resp(qp_grp, udata);
+ if (err) {
+ err = -EBUSY;
+ goto out_release_qp_grp;
+ }
+
+ qp_grp->ctx = ucontext;
+ list_add_tail(&qp_grp->link, &ucontext->qp_grp_list);
+ usnic_ib_log_vf(qp_grp->vf);
+ mutex_unlock(&us_ibdev->usdev_lock);
+ return &qp_grp->ibqp;
+
+out_release_qp_grp:
+ qp_grp_destroy(qp_grp);
+out_release_mutex:
+ mutex_unlock(&us_ibdev->usdev_lock);
+ return ERR_PTR(err);
+}
+
+int usnic_ib_destroy_qp(struct ib_qp *qp)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ struct usnic_ib_vf *vf;
+
+ usnic_dbg("\n");
+
+ qp_grp = to_uqp_grp(qp);
+ vf = qp_grp->vf;
+ mutex_lock(&vf->pf->usdev_lock);
+ if (usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RESET, NULL)) {
+ usnic_err("Failed to move qp grp %u to reset\n",
+ qp_grp->grp_id);
+ }
+
+ list_del(&qp_grp->link);
+ qp_grp_destroy(qp_grp);
+ mutex_unlock(&vf->pf->usdev_lock);
+
+ return 0;
+}
+
+int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ int status;
+ usnic_dbg("\n");
+
+ qp_grp = to_uqp_grp(ibqp);
+
+ mutex_lock(&qp_grp->vf->pf->usdev_lock);
+ if ((attr_mask & IB_QP_PORT) && attr->port_num != 1) {
+ /* usnic devices only have one port */
+ status = -EINVAL;
+ goto out_unlock;
+ }
+ if (attr_mask & IB_QP_STATE) {
+ status = usnic_ib_qp_grp_modify(qp_grp, attr->qp_state, NULL);
+ } else {
+ usnic_err("Unhandled request, attr_mask=0x%x\n", attr_mask);
+ status = -EINVAL;
+ }
+
+out_unlock:
+ mutex_unlock(&qp_grp->vf->pf->usdev_lock);
+ return status;
+}
+
+struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct ib_cq *cq;
+
+ usnic_dbg("\n");
+ if (attr->flags)
+ return ERR_PTR(-EINVAL);
+
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq)
+ return ERR_PTR(-EBUSY);
+
+ return cq;
+}
+
+int usnic_ib_destroy_cq(struct ib_cq *cq)
+{
+ usnic_dbg("\n");
+ kfree(cq);
+ return 0;
+}
+
+struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata)
+{
+ struct usnic_ib_mr *mr;
+ int err;
+
+ usnic_dbg("start 0x%llx va 0x%llx length 0x%llx\n", start,
+ virt_addr, length);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length,
+ access_flags, 0);
+ if (IS_ERR_OR_NULL(mr->umem)) {
+ err = mr->umem ? PTR_ERR(mr->umem) : -EFAULT;
+ goto err_free;
+ }
+
+ mr->ibmr.lkey = mr->ibmr.rkey = 0;
+ return &mr->ibmr;
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+int usnic_ib_dereg_mr(struct ib_mr *ibmr)
+{
+ struct usnic_ib_mr *mr = to_umr(ibmr);
+
+ usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length);
+
+ usnic_uiom_reg_release(mr->umem, ibmr->uobject->context);
+ kfree(mr);
+ return 0;
+}
+
+struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct usnic_ib_ucontext *context;
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+ usnic_dbg("\n");
+
+ context = kmalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&context->qp_grp_list);
+ mutex_lock(&us_ibdev->usdev_lock);
+ list_add_tail(&context->link, &us_ibdev->ctx_list);
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return &context->ibucontext;
+}
+
+int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct usnic_ib_ucontext *context = to_uucontext(ibcontext);
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibcontext->device);
+ usnic_dbg("\n");
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ BUG_ON(!list_empty(&context->qp_grp_list));
+ list_del(&context->link);
+ mutex_unlock(&us_ibdev->usdev_lock);
+ kfree(context);
+ return 0;
+}
+
+int usnic_ib_mmap(struct ib_ucontext *context,
+ struct vm_area_struct *vma)
+{
+ struct usnic_ib_ucontext *uctx = to_ucontext(context);
+ struct usnic_ib_dev *us_ibdev;
+ struct usnic_ib_qp_grp *qp_grp;
+ struct usnic_ib_vf *vf;
+ struct vnic_dev_bar *bar;
+ dma_addr_t bus_addr;
+ unsigned int len;
+ unsigned int vfid;
+
+ usnic_dbg("\n");
+
+ us_ibdev = to_usdev(context->device);
+ vma->vm_flags |= VM_IO;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vfid = vma->vm_pgoff;
+ usnic_dbg("Page Offset %lu PAGE_SHIFT %u VFID %u\n",
+ vma->vm_pgoff, PAGE_SHIFT, vfid);
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ list_for_each_entry(qp_grp, &uctx->qp_grp_list, link) {
+ vf = qp_grp->vf;
+ if (usnic_vnic_get_index(vf->vnic) == vfid) {
+ bar = usnic_vnic_get_bar(vf->vnic, 0);
+ if ((vma->vm_end - vma->vm_start) != bar->len) {
+ usnic_err("Bar0 Len %lu - Request map %lu\n",
+ bar->len,
+ vma->vm_end - vma->vm_start);
+ mutex_unlock(&us_ibdev->usdev_lock);
+ return -EINVAL;
+ }
+ bus_addr = bar->bus_addr;
+ len = bar->len;
+ usnic_dbg("bus: %pa vaddr: %p size: %ld\n",
+ &bus_addr, bar->vaddr, bar->len);
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return remap_pfn_range(vma,
+ vma->vm_start,
+ bus_addr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ }
+ }
+
+ mutex_unlock(&us_ibdev->usdev_lock);
+ usnic_err("No VF %u found\n", vfid);
+ return -EINVAL;
+}
+
+/* In ib callbacks section - Start of stub funcs */
+struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
+ struct rdma_ah_attr *ah_attr,
+ struct ib_udata *udata)
+
+{
+ usnic_dbg("\n");
+ return ERR_PTR(-EPERM);
+}
+
+int usnic_ib_destroy_ah(struct ib_ah *ah)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *wc)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+int usnic_ib_req_notify_cq(struct ib_cq *cq,
+ enum ib_cq_notify_flags flags)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ usnic_dbg("\n");
+ return ERR_PTR(-ENOMEM);
+}
+
+
+/* In ib callbacks section - End of stub funcs */
+/* End of ib callbacks section */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
new file mode 100644
index 000000000..2a2c9beb7
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_VERBS_H_
+#define USNIC_IB_VERBS_H_
+
+#include "usnic_ib.h"
+
+enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device,
+ u8 port_num);
+int usnic_ib_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props,
+ struct ib_udata *uhw);
+int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props);
+int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr);
+int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid);
+struct net_device *usnic_get_netdev(struct ib_device *device, u8 port_num);
+int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey);
+struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int usnic_ib_dealloc_pd(struct ib_pd *pd);
+struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+int usnic_ib_destroy_qp(struct ib_qp *qp);
+int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int usnic_ib_destroy_cq(struct ib_cq *cq);
+struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata);
+int usnic_ib_dereg_mr(struct ib_mr *ibmr);
+struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata);
+int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext);
+int usnic_ib_mmap(struct ib_ucontext *context,
+ struct vm_area_struct *vma);
+struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
+ struct rdma_ah_attr *ah_attr,
+ struct ib_udata *udata);
+
+int usnic_ib_destroy_ah(struct ib_ah *ah);
+int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr);
+int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
+int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *wc);
+int usnic_ib_req_notify_cq(struct ib_cq *cq,
+ enum ib_cq_notify_flags flags);
+struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc);
+#endif /* !USNIC_IB_VERBS_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_log.h b/drivers/infiniband/hw/usnic/usnic_log.h
new file mode 100644
index 000000000..183fcb6a9
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_log.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_LOG_H_
+#define USNIC_LOG_H_
+
+#include "usnic.h"
+
+extern unsigned int usnic_log_lvl;
+
+#define USNIC_LOG_LVL_NONE (0)
+#define USNIC_LOG_LVL_ERR (1)
+#define USNIC_LOG_LVL_INFO (2)
+#define USNIC_LOG_LVL_DBG (3)
+
+#define usnic_printk(lvl, args...) \
+ do { \
+ printk(lvl "%s:%s:%d: ", DRV_NAME, __func__, \
+ __LINE__); \
+ printk(args); \
+ } while (0)
+
+#define usnic_dbg(args...) \
+ do { \
+ if (unlikely(usnic_log_lvl >= USNIC_LOG_LVL_DBG)) { \
+ usnic_printk(KERN_INFO, args); \
+ } \
+} while (0)
+
+#define usnic_info(args...) \
+do { \
+ if (usnic_log_lvl >= USNIC_LOG_LVL_INFO) { \
+ usnic_printk(KERN_INFO, args); \
+ } \
+} while (0)
+
+#define usnic_err(args...) \
+ do { \
+ if (usnic_log_lvl >= USNIC_LOG_LVL_ERR) { \
+ usnic_printk(KERN_ERR, args); \
+ } \
+ } while (0)
+#endif /* !USNIC_LOG_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c
new file mode 100644
index 000000000..e0a95538c
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_transport.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/bitmap.h>
+#include <linux/file.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <net/inet_sock.h>
+
+#include "usnic_transport.h"
+#include "usnic_log.h"
+
+/* ROCE */
+static unsigned long *roce_bitmap;
+static u16 roce_next_port = 1;
+#define ROCE_BITMAP_SZ ((1 << (8 /*CHAR_BIT*/ * sizeof(u16)))/8 /*CHAR BIT*/)
+static DEFINE_SPINLOCK(roce_bitmap_lock);
+
+const char *usnic_transport_to_str(enum usnic_transport_type type)
+{
+ switch (type) {
+ case USNIC_TRANSPORT_UNKNOWN:
+ return "Unknown";
+ case USNIC_TRANSPORT_ROCE_CUSTOM:
+ return "roce custom";
+ case USNIC_TRANSPORT_IPV4_UDP:
+ return "IPv4 UDP";
+ case USNIC_TRANSPORT_MAX:
+ return "Max?";
+ default:
+ return "Not known";
+ }
+}
+
+int usnic_transport_sock_to_str(char *buf, int buf_sz,
+ struct socket *sock)
+{
+ int err;
+ uint32_t addr;
+ uint16_t port;
+ int proto;
+
+ memset(buf, 0, buf_sz);
+ err = usnic_transport_sock_get_addr(sock, &proto, &addr, &port);
+ if (err)
+ return 0;
+
+ return scnprintf(buf, buf_sz, "Proto:%u Addr:%pI4h Port:%hu",
+ proto, &addr, port);
+}
+
+/*
+ * reserve a port number. if "0" specified, we will try to pick one
+ * starting at roce_next_port. roce_next_port will take on the values
+ * 1..4096
+ */
+u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num)
+{
+ if (type == USNIC_TRANSPORT_ROCE_CUSTOM) {
+ spin_lock(&roce_bitmap_lock);
+ if (!port_num) {
+ port_num = bitmap_find_next_zero_area(roce_bitmap,
+ ROCE_BITMAP_SZ,
+ roce_next_port /* start */,
+ 1 /* nr */,
+ 0 /* align */);
+ roce_next_port = (port_num & 4095) + 1;
+ } else if (test_bit(port_num, roce_bitmap)) {
+ usnic_err("Failed to allocate port for %s\n",
+ usnic_transport_to_str(type));
+ spin_unlock(&roce_bitmap_lock);
+ goto out_fail;
+ }
+ bitmap_set(roce_bitmap, port_num, 1);
+ spin_unlock(&roce_bitmap_lock);
+ } else {
+ usnic_err("Failed to allocate port - transport %s unsupported\n",
+ usnic_transport_to_str(type));
+ goto out_fail;
+ }
+
+ usnic_dbg("Allocating port %hu for %s\n", port_num,
+ usnic_transport_to_str(type));
+ return port_num;
+
+out_fail:
+ return 0;
+}
+
+void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num)
+{
+ if (type == USNIC_TRANSPORT_ROCE_CUSTOM) {
+ spin_lock(&roce_bitmap_lock);
+ if (!port_num) {
+ usnic_err("Unreserved unvalid port num 0 for %s\n",
+ usnic_transport_to_str(type));
+ goto out_roce_custom;
+ }
+
+ if (!test_bit(port_num, roce_bitmap)) {
+ usnic_err("Unreserving invalid %hu for %s\n",
+ port_num,
+ usnic_transport_to_str(type));
+ goto out_roce_custom;
+ }
+ bitmap_clear(roce_bitmap, port_num, 1);
+ usnic_dbg("Freeing port %hu for %s\n", port_num,
+ usnic_transport_to_str(type));
+out_roce_custom:
+ spin_unlock(&roce_bitmap_lock);
+ } else {
+ usnic_err("Freeing invalid port %hu for %d\n", port_num, type);
+ }
+}
+
+struct socket *usnic_transport_get_socket(int sock_fd)
+{
+ struct socket *sock;
+ int err;
+ char buf[25];
+
+ /* sockfd_lookup will internally do a fget */
+ sock = sockfd_lookup(sock_fd, &err);
+ if (!sock) {
+ usnic_err("Unable to lookup socket for fd %d with err %d\n",
+ sock_fd, err);
+ return ERR_PTR(-ENOENT);
+ }
+
+ usnic_transport_sock_to_str(buf, sizeof(buf), sock);
+ usnic_dbg("Get sock %s\n", buf);
+
+ return sock;
+}
+
+void usnic_transport_put_socket(struct socket *sock)
+{
+ char buf[100];
+
+ usnic_transport_sock_to_str(buf, sizeof(buf), sock);
+ usnic_dbg("Put sock %s\n", buf);
+ sockfd_put(sock);
+}
+
+int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
+ uint32_t *addr, uint16_t *port)
+{
+ int err;
+ struct sockaddr_in sock_addr;
+
+ err = sock->ops->getname(sock,
+ (struct sockaddr *)&sock_addr,
+ 0);
+ if (err < 0)
+ return err;
+
+ if (sock_addr.sin_family != AF_INET)
+ return -EINVAL;
+
+ if (proto)
+ *proto = sock->sk->sk_protocol;
+ if (port)
+ *port = ntohs(((struct sockaddr_in *)&sock_addr)->sin_port);
+ if (addr)
+ *addr = ntohl(((struct sockaddr_in *)
+ &sock_addr)->sin_addr.s_addr);
+
+ return 0;
+}
+
+int usnic_transport_init(void)
+{
+ roce_bitmap = kzalloc(ROCE_BITMAP_SZ, GFP_KERNEL);
+ if (!roce_bitmap)
+ return -ENOMEM;
+
+ /* Do not ever allocate bit 0, hence set it here */
+ bitmap_set(roce_bitmap, 0, 1);
+ return 0;
+}
+
+void usnic_transport_fini(void)
+{
+ kfree(roce_bitmap);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_transport.h b/drivers/infiniband/hw/usnic/usnic_transport.h
new file mode 100644
index 000000000..9a7a2d975
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_transport.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_TRANSPORT_H_
+#define USNIC_TRANSPORT_H_
+
+#include "usnic_abi.h"
+
+const char *usnic_transport_to_str(enum usnic_transport_type trans_type);
+/*
+ * Returns number of bytes written, excluding null terminator. If
+ * nothing was written, the function returns 0.
+ */
+int usnic_transport_sock_to_str(char *buf, int buf_sz,
+ struct socket *sock);
+/*
+ * Reserve a port. If "port_num" is set, then the function will try
+ * to reserve that particular port.
+ */
+u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num);
+void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num);
+/*
+ * Do a fget on the socket refered to by sock_fd and returns the socket.
+ * Socket will not be destroyed before usnic_transport_put_socket has
+ * been called.
+ */
+struct socket *usnic_transport_get_socket(int sock_fd);
+void usnic_transport_put_socket(struct socket *sock);
+/*
+ * Call usnic_transport_get_socket before calling *_sock_get_addr
+ */
+int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
+ uint32_t *addr, uint16_t *port);
+int usnic_transport_init(void);
+void usnic_transport_fini(void);
+#endif /* !USNIC_TRANSPORT_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
new file mode 100644
index 000000000..9dd39daa6
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -0,0 +1,618 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2013 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/iommu.h>
+#include <linux/workqueue.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <rdma/ib_verbs.h>
+
+#include "usnic_log.h"
+#include "usnic_uiom.h"
+#include "usnic_uiom_interval_tree.h"
+
+static struct workqueue_struct *usnic_uiom_wq;
+
+#define USNIC_UIOM_PAGE_CHUNK \
+ ((PAGE_SIZE - offsetof(struct usnic_uiom_chunk, page_list)) /\
+ ((void *) &((struct usnic_uiom_chunk *) 0)->page_list[1] - \
+ (void *) &((struct usnic_uiom_chunk *) 0)->page_list[0]))
+
+static void usnic_uiom_reg_account(struct work_struct *work)
+{
+ struct usnic_uiom_reg *umem = container_of(work,
+ struct usnic_uiom_reg, work);
+
+ down_write(&umem->mm->mmap_sem);
+ umem->mm->locked_vm -= umem->diff;
+ up_write(&umem->mm->mmap_sem);
+ mmput(umem->mm);
+ kfree(umem);
+}
+
+static int usnic_uiom_dma_fault(struct iommu_domain *domain,
+ struct device *dev,
+ unsigned long iova, int flags,
+ void *token)
+{
+ usnic_err("Device %s iommu fault domain 0x%pK va 0x%lx flags 0x%x\n",
+ dev_name(dev),
+ domain, iova, flags);
+ return -ENOSYS;
+}
+
+static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
+{
+ struct usnic_uiom_chunk *chunk, *tmp;
+ struct page *page;
+ struct scatterlist *sg;
+ int i;
+ dma_addr_t pa;
+
+ list_for_each_entry_safe(chunk, tmp, chunk_list, list) {
+ for_each_sg(chunk->page_list, sg, chunk->nents, i) {
+ page = sg_page(sg);
+ pa = sg_phys(sg);
+ if (!PageDirty(page) && dirty)
+ set_page_dirty_lock(page);
+ put_page(page);
+ usnic_dbg("pa: %pa\n", &pa);
+ }
+ kfree(chunk);
+ }
+}
+
+static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
+ int dmasync, struct list_head *chunk_list)
+{
+ struct page **page_list;
+ struct scatterlist *sg;
+ struct usnic_uiom_chunk *chunk;
+ unsigned long locked;
+ unsigned long lock_limit;
+ unsigned long cur_base;
+ unsigned long npages;
+ int ret;
+ int off;
+ int i;
+ int flags;
+ dma_addr_t pa;
+ unsigned int gup_flags;
+
+ /*
+ * If the combination of the addr and size requested for this memory
+ * region causes an integer overflow, return error.
+ */
+ if (((addr + size) < addr) || PAGE_ALIGN(addr + size) < (addr + size))
+ return -EINVAL;
+
+ if (!size)
+ return -EINVAL;
+
+ if (!can_do_mlock())
+ return -EPERM;
+
+ INIT_LIST_HEAD(chunk_list);
+
+ page_list = (struct page **) __get_free_page(GFP_KERNEL);
+ if (!page_list)
+ return -ENOMEM;
+
+ npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT;
+
+ down_write(&current->mm->mmap_sem);
+
+ locked = npages + current->mm->pinned_vm;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+ if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ flags = IOMMU_READ | IOMMU_CACHE;
+ flags |= (writable) ? IOMMU_WRITE : 0;
+ gup_flags = FOLL_WRITE;
+ gup_flags |= (writable) ? 0 : FOLL_FORCE;
+ cur_base = addr & PAGE_MASK;
+ ret = 0;
+
+ while (npages) {
+ ret = get_user_pages_longterm(cur_base,
+ min_t(unsigned long, npages,
+ PAGE_SIZE / sizeof(struct page *)),
+ gup_flags, page_list, NULL);
+
+ if (ret < 0)
+ goto out;
+
+ npages -= ret;
+ off = 0;
+
+ while (ret) {
+ chunk = kmalloc(sizeof(*chunk) +
+ sizeof(struct scatterlist) *
+ min_t(int, ret, USNIC_UIOM_PAGE_CHUNK),
+ GFP_KERNEL);
+ if (!chunk) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ chunk->nents = min_t(int, ret, USNIC_UIOM_PAGE_CHUNK);
+ sg_init_table(chunk->page_list, chunk->nents);
+ for_each_sg(chunk->page_list, sg, chunk->nents, i) {
+ sg_set_page(sg, page_list[i + off],
+ PAGE_SIZE, 0);
+ pa = sg_phys(sg);
+ usnic_dbg("va: 0x%lx pa: %pa\n",
+ cur_base + i*PAGE_SIZE, &pa);
+ }
+ cur_base += chunk->nents * PAGE_SIZE;
+ ret -= chunk->nents;
+ off += chunk->nents;
+ list_add_tail(&chunk->list, chunk_list);
+ }
+
+ ret = 0;
+ }
+
+out:
+ if (ret < 0)
+ usnic_uiom_put_pages(chunk_list, 0);
+ else
+ current->mm->pinned_vm = locked;
+
+ up_write(&current->mm->mmap_sem);
+ free_page((unsigned long) page_list);
+ return ret;
+}
+
+static void usnic_uiom_unmap_sorted_intervals(struct list_head *intervals,
+ struct usnic_uiom_pd *pd)
+{
+ struct usnic_uiom_interval_node *interval, *tmp;
+ long unsigned va, size;
+
+ list_for_each_entry_safe(interval, tmp, intervals, link) {
+ va = interval->start << PAGE_SHIFT;
+ size = ((interval->last - interval->start) + 1) << PAGE_SHIFT;
+ while (size > 0) {
+ /* Workaround for RH 970401 */
+ usnic_dbg("va 0x%lx size 0x%lx", va, PAGE_SIZE);
+ iommu_unmap(pd->domain, va, PAGE_SIZE);
+ va += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+ }
+}
+
+static void __usnic_uiom_reg_release(struct usnic_uiom_pd *pd,
+ struct usnic_uiom_reg *uiomr,
+ int dirty)
+{
+ int npages;
+ unsigned long vpn_start, vpn_last;
+ struct usnic_uiom_interval_node *interval, *tmp;
+ int writable = 0;
+ LIST_HEAD(rm_intervals);
+
+ npages = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
+ vpn_start = (uiomr->va & PAGE_MASK) >> PAGE_SHIFT;
+ vpn_last = vpn_start + npages - 1;
+
+ spin_lock(&pd->lock);
+ usnic_uiom_remove_interval(&pd->root, vpn_start,
+ vpn_last, &rm_intervals);
+ usnic_uiom_unmap_sorted_intervals(&rm_intervals, pd);
+
+ list_for_each_entry_safe(interval, tmp, &rm_intervals, link) {
+ if (interval->flags & IOMMU_WRITE)
+ writable = 1;
+ list_del(&interval->link);
+ kfree(interval);
+ }
+
+ usnic_uiom_put_pages(&uiomr->chunk_list, dirty & writable);
+ spin_unlock(&pd->lock);
+}
+
+static int usnic_uiom_map_sorted_intervals(struct list_head *intervals,
+ struct usnic_uiom_reg *uiomr)
+{
+ int i, err;
+ size_t size;
+ struct usnic_uiom_chunk *chunk;
+ struct usnic_uiom_interval_node *interval_node;
+ dma_addr_t pa;
+ dma_addr_t pa_start = 0;
+ dma_addr_t pa_end = 0;
+ long int va_start = -EINVAL;
+ struct usnic_uiom_pd *pd = uiomr->pd;
+ long int va = uiomr->va & PAGE_MASK;
+ int flags = IOMMU_READ | IOMMU_CACHE;
+
+ flags |= (uiomr->writable) ? IOMMU_WRITE : 0;
+ chunk = list_first_entry(&uiomr->chunk_list, struct usnic_uiom_chunk,
+ list);
+ list_for_each_entry(interval_node, intervals, link) {
+iter_chunk:
+ for (i = 0; i < chunk->nents; i++, va += PAGE_SIZE) {
+ pa = sg_phys(&chunk->page_list[i]);
+ if ((va >> PAGE_SHIFT) < interval_node->start)
+ continue;
+
+ if ((va >> PAGE_SHIFT) == interval_node->start) {
+ /* First page of the interval */
+ va_start = va;
+ pa_start = pa;
+ pa_end = pa;
+ }
+
+ WARN_ON(va_start == -EINVAL);
+
+ if ((pa_end + PAGE_SIZE != pa) &&
+ (pa != pa_start)) {
+ /* PAs are not contiguous */
+ size = pa_end - pa_start + PAGE_SIZE;
+ usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x",
+ va_start, &pa_start, size, flags);
+ err = iommu_map(pd->domain, va_start, pa_start,
+ size, flags);
+ if (err) {
+ usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
+ va_start, &pa_start, size, err);
+ goto err_out;
+ }
+ va_start = va;
+ pa_start = pa;
+ pa_end = pa;
+ }
+
+ if ((va >> PAGE_SHIFT) == interval_node->last) {
+ /* Last page of the interval */
+ size = pa - pa_start + PAGE_SIZE;
+ usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n",
+ va_start, &pa_start, size, flags);
+ err = iommu_map(pd->domain, va_start, pa_start,
+ size, flags);
+ if (err) {
+ usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
+ va_start, &pa_start, size, err);
+ goto err_out;
+ }
+ break;
+ }
+
+ if (pa != pa_start)
+ pa_end += PAGE_SIZE;
+ }
+
+ if (i == chunk->nents) {
+ /*
+ * Hit last entry of the chunk,
+ * hence advance to next chunk
+ */
+ chunk = list_first_entry(&chunk->list,
+ struct usnic_uiom_chunk,
+ list);
+ goto iter_chunk;
+ }
+ }
+
+ return 0;
+
+err_out:
+ usnic_uiom_unmap_sorted_intervals(intervals, pd);
+ return err;
+}
+
+struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
+ unsigned long addr, size_t size,
+ int writable, int dmasync)
+{
+ struct usnic_uiom_reg *uiomr;
+ unsigned long va_base, vpn_start, vpn_last;
+ unsigned long npages;
+ int offset, err;
+ LIST_HEAD(sorted_diff_intervals);
+
+ /*
+ * Intel IOMMU map throws an error if a translation entry is
+ * changed from read to write. This module may not unmap
+ * and then remap the entry after fixing the permission
+ * b/c this open up a small windows where hw DMA may page fault
+ * Hence, make all entries to be writable.
+ */
+ writable = 1;
+
+ va_base = addr & PAGE_MASK;
+ offset = addr & ~PAGE_MASK;
+ npages = PAGE_ALIGN(size + offset) >> PAGE_SHIFT;
+ vpn_start = (addr & PAGE_MASK) >> PAGE_SHIFT;
+ vpn_last = vpn_start + npages - 1;
+
+ uiomr = kmalloc(sizeof(*uiomr), GFP_KERNEL);
+ if (!uiomr)
+ return ERR_PTR(-ENOMEM);
+
+ uiomr->va = va_base;
+ uiomr->offset = offset;
+ uiomr->length = size;
+ uiomr->writable = writable;
+ uiomr->pd = pd;
+
+ err = usnic_uiom_get_pages(addr, size, writable, dmasync,
+ &uiomr->chunk_list);
+ if (err) {
+ usnic_err("Failed get_pages vpn [0x%lx,0x%lx] err %d\n",
+ vpn_start, vpn_last, err);
+ goto out_free_uiomr;
+ }
+
+ spin_lock(&pd->lock);
+ err = usnic_uiom_get_intervals_diff(vpn_start, vpn_last,
+ (writable) ? IOMMU_WRITE : 0,
+ IOMMU_WRITE,
+ &pd->root,
+ &sorted_diff_intervals);
+ if (err) {
+ usnic_err("Failed disjoint interval vpn [0x%lx,0x%lx] err %d\n",
+ vpn_start, vpn_last, err);
+ goto out_put_pages;
+ }
+
+ err = usnic_uiom_map_sorted_intervals(&sorted_diff_intervals, uiomr);
+ if (err) {
+ usnic_err("Failed map interval vpn [0x%lx,0x%lx] err %d\n",
+ vpn_start, vpn_last, err);
+ goto out_put_intervals;
+
+ }
+
+ err = usnic_uiom_insert_interval(&pd->root, vpn_start, vpn_last,
+ (writable) ? IOMMU_WRITE : 0);
+ if (err) {
+ usnic_err("Failed insert interval vpn [0x%lx,0x%lx] err %d\n",
+ vpn_start, vpn_last, err);
+ goto out_unmap_intervals;
+ }
+
+ usnic_uiom_put_interval_set(&sorted_diff_intervals);
+ spin_unlock(&pd->lock);
+
+ return uiomr;
+
+out_unmap_intervals:
+ usnic_uiom_unmap_sorted_intervals(&sorted_diff_intervals, pd);
+out_put_intervals:
+ usnic_uiom_put_interval_set(&sorted_diff_intervals);
+out_put_pages:
+ usnic_uiom_put_pages(&uiomr->chunk_list, 0);
+ spin_unlock(&pd->lock);
+out_free_uiomr:
+ kfree(uiomr);
+ return ERR_PTR(err);
+}
+
+void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
+ struct ib_ucontext *ucontext)
+{
+ struct task_struct *task;
+ struct mm_struct *mm;
+ unsigned long diff;
+
+ __usnic_uiom_reg_release(uiomr->pd, uiomr, 1);
+
+ task = get_pid_task(ucontext->tgid, PIDTYPE_PID);
+ if (!task)
+ goto out;
+ mm = get_task_mm(task);
+ put_task_struct(task);
+ if (!mm)
+ goto out;
+
+ diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
+
+ /*
+ * We may be called with the mm's mmap_sem already held. This
+ * can happen when a userspace munmap() is the call that drops
+ * the last reference to our file and calls our release
+ * method. If there are memory regions to destroy, we'll end
+ * up here and not be able to take the mmap_sem. In that case
+ * we defer the vm_locked accounting to the system workqueue.
+ */
+ if (ucontext->closing) {
+ if (!down_write_trylock(&mm->mmap_sem)) {
+ INIT_WORK(&uiomr->work, usnic_uiom_reg_account);
+ uiomr->mm = mm;
+ uiomr->diff = diff;
+
+ queue_work(usnic_uiom_wq, &uiomr->work);
+ return;
+ }
+ } else
+ down_write(&mm->mmap_sem);
+
+ mm->pinned_vm -= diff;
+ up_write(&mm->mmap_sem);
+ mmput(mm);
+out:
+ kfree(uiomr);
+}
+
+struct usnic_uiom_pd *usnic_uiom_alloc_pd(void)
+{
+ struct usnic_uiom_pd *pd;
+ void *domain;
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ pd->domain = domain = iommu_domain_alloc(&pci_bus_type);
+ if (!domain) {
+ usnic_err("Failed to allocate IOMMU domain");
+ kfree(pd);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ iommu_set_fault_handler(pd->domain, usnic_uiom_dma_fault, NULL);
+
+ spin_lock_init(&pd->lock);
+ INIT_LIST_HEAD(&pd->devs);
+
+ return pd;
+}
+
+void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd)
+{
+ iommu_domain_free(pd->domain);
+ kfree(pd);
+}
+
+int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev)
+{
+ struct usnic_uiom_dev *uiom_dev;
+ int err;
+
+ uiom_dev = kzalloc(sizeof(*uiom_dev), GFP_ATOMIC);
+ if (!uiom_dev)
+ return -ENOMEM;
+ uiom_dev->dev = dev;
+
+ err = iommu_attach_device(pd->domain, dev);
+ if (err)
+ goto out_free_dev;
+
+ if (!iommu_capable(dev->bus, IOMMU_CAP_CACHE_COHERENCY)) {
+ usnic_err("IOMMU of %s does not support cache coherency\n",
+ dev_name(dev));
+ err = -EINVAL;
+ goto out_detach_device;
+ }
+
+ spin_lock(&pd->lock);
+ list_add_tail(&uiom_dev->link, &pd->devs);
+ pd->dev_cnt++;
+ spin_unlock(&pd->lock);
+
+ return 0;
+
+out_detach_device:
+ iommu_detach_device(pd->domain, dev);
+out_free_dev:
+ kfree(uiom_dev);
+ return err;
+}
+
+void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd, struct device *dev)
+{
+ struct usnic_uiom_dev *uiom_dev;
+ int found = 0;
+
+ spin_lock(&pd->lock);
+ list_for_each_entry(uiom_dev, &pd->devs, link) {
+ if (uiom_dev->dev == dev) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ usnic_err("Unable to free dev %s - not found\n",
+ dev_name(dev));
+ spin_unlock(&pd->lock);
+ return;
+ }
+
+ list_del(&uiom_dev->link);
+ pd->dev_cnt--;
+ spin_unlock(&pd->lock);
+
+ return iommu_detach_device(pd->domain, dev);
+}
+
+struct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd)
+{
+ struct usnic_uiom_dev *uiom_dev;
+ struct device **devs;
+ int i = 0;
+
+ spin_lock(&pd->lock);
+ devs = kcalloc(pd->dev_cnt + 1, sizeof(*devs), GFP_ATOMIC);
+ if (!devs) {
+ devs = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ list_for_each_entry(uiom_dev, &pd->devs, link) {
+ devs[i++] = uiom_dev->dev;
+ }
+out:
+ spin_unlock(&pd->lock);
+ return devs;
+}
+
+void usnic_uiom_free_dev_list(struct device **devs)
+{
+ kfree(devs);
+}
+
+int usnic_uiom_init(char *drv_name)
+{
+ if (!iommu_present(&pci_bus_type)) {
+ usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n");
+ return -EPERM;
+ }
+
+ usnic_uiom_wq = create_workqueue(drv_name);
+ if (!usnic_uiom_wq) {
+ usnic_err("Unable to alloc wq for drv %s\n", drv_name);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void usnic_uiom_fini(void)
+{
+ flush_workqueue(usnic_uiom_wq);
+ destroy_workqueue(usnic_uiom_wq);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h
new file mode 100644
index 000000000..8c096acff
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_UIOM_H_
+#define USNIC_UIOM_H_
+
+#include <linux/list.h>
+#include <linux/scatterlist.h>
+
+#include "usnic_uiom_interval_tree.h"
+
+struct ib_ucontext;
+
+#define USNIC_UIOM_READ (1)
+#define USNIC_UIOM_WRITE (2)
+
+#define USNIC_UIOM_MAX_PD_CNT (1000)
+#define USNIC_UIOM_MAX_MR_CNT (1000000)
+#define USNIC_UIOM_MAX_MR_SIZE (~0UL)
+#define USNIC_UIOM_PAGE_SIZE (PAGE_SIZE)
+
+struct usnic_uiom_dev {
+ struct device *dev;
+ struct list_head link;
+};
+
+struct usnic_uiom_pd {
+ struct iommu_domain *domain;
+ spinlock_t lock;
+ struct rb_root_cached root;
+ struct list_head devs;
+ int dev_cnt;
+};
+
+struct usnic_uiom_reg {
+ struct usnic_uiom_pd *pd;
+ unsigned long va;
+ size_t length;
+ int offset;
+ int page_size;
+ int writable;
+ struct list_head chunk_list;
+ struct work_struct work;
+ struct mm_struct *mm;
+ unsigned long diff;
+};
+
+struct usnic_uiom_chunk {
+ struct list_head list;
+ int nents;
+ struct scatterlist page_list[0];
+};
+
+struct usnic_uiom_pd *usnic_uiom_alloc_pd(void);
+void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd);
+int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev);
+void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd,
+ struct device *dev);
+struct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd);
+void usnic_uiom_free_dev_list(struct device **devs);
+struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
+ unsigned long addr, size_t size,
+ int access, int dmasync);
+void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
+ struct ib_ucontext *ucontext);
+int usnic_uiom_init(char *drv_name);
+void usnic_uiom_fini(void);
+#endif /* USNIC_UIOM_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
new file mode 100644
index 000000000..d39952320
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/list_sort.h>
+
+#include <linux/interval_tree_generic.h>
+#include "usnic_uiom_interval_tree.h"
+
+#define START(node) ((node)->start)
+#define LAST(node) ((node)->last)
+
+#define MAKE_NODE(node, start, end, ref_cnt, flags, err, err_out) \
+ do { \
+ node = usnic_uiom_interval_node_alloc(start, \
+ end, ref_cnt, flags); \
+ if (!node) { \
+ err = -ENOMEM; \
+ goto err_out; \
+ } \
+ } while (0)
+
+#define MARK_FOR_ADD(node, list) (list_add_tail(&node->link, list))
+
+#define MAKE_NODE_AND_APPEND(node, start, end, ref_cnt, flags, err, \
+ err_out, list) \
+ do { \
+ MAKE_NODE(node, start, end, \
+ ref_cnt, flags, err, \
+ err_out); \
+ MARK_FOR_ADD(node, list); \
+ } while (0)
+
+#define FLAGS_EQUAL(flags1, flags2, mask) \
+ (((flags1) & (mask)) == ((flags2) & (mask)))
+
+static struct usnic_uiom_interval_node*
+usnic_uiom_interval_node_alloc(long int start, long int last, int ref_cnt,
+ int flags)
+{
+ struct usnic_uiom_interval_node *interval = kzalloc(sizeof(*interval),
+ GFP_ATOMIC);
+ if (!interval)
+ return NULL;
+
+ interval->start = start;
+ interval->last = last;
+ interval->flags = flags;
+ interval->ref_cnt = ref_cnt;
+
+ return interval;
+}
+
+static int interval_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+ struct usnic_uiom_interval_node *node_a, *node_b;
+
+ node_a = list_entry(a, struct usnic_uiom_interval_node, link);
+ node_b = list_entry(b, struct usnic_uiom_interval_node, link);
+
+ /* long to int */
+ if (node_a->start < node_b->start)
+ return -1;
+ else if (node_a->start > node_b->start)
+ return 1;
+
+ return 0;
+}
+
+static void
+find_intervals_intersection_sorted(struct rb_root_cached *root,
+ unsigned long start, unsigned long last,
+ struct list_head *list)
+{
+ struct usnic_uiom_interval_node *node;
+
+ INIT_LIST_HEAD(list);
+
+ for (node = usnic_uiom_interval_tree_iter_first(root, start, last);
+ node;
+ node = usnic_uiom_interval_tree_iter_next(node, start, last))
+ list_add_tail(&node->link, list);
+
+ list_sort(NULL, list, interval_cmp);
+}
+
+int usnic_uiom_get_intervals_diff(unsigned long start, unsigned long last,
+ int flags, int flag_mask,
+ struct rb_root_cached *root,
+ struct list_head *diff_set)
+{
+ struct usnic_uiom_interval_node *interval, *tmp;
+ int err = 0;
+ long int pivot = start;
+ LIST_HEAD(intersection_set);
+
+ INIT_LIST_HEAD(diff_set);
+
+ find_intervals_intersection_sorted(root, start, last,
+ &intersection_set);
+
+ list_for_each_entry(interval, &intersection_set, link) {
+ if (pivot < interval->start) {
+ MAKE_NODE_AND_APPEND(tmp, pivot, interval->start - 1,
+ 1, flags, err, err_out,
+ diff_set);
+ pivot = interval->start;
+ }
+
+ /*
+ * Invariant: Set [start, pivot] is either in diff_set or root,
+ * but not in both.
+ */
+
+ if (pivot > interval->last) {
+ continue;
+ } else if (pivot <= interval->last &&
+ FLAGS_EQUAL(interval->flags, flags,
+ flag_mask)) {
+ pivot = interval->last + 1;
+ }
+ }
+
+ if (pivot <= last)
+ MAKE_NODE_AND_APPEND(tmp, pivot, last, 1, flags, err, err_out,
+ diff_set);
+
+ return 0;
+
+err_out:
+ list_for_each_entry_safe(interval, tmp, diff_set, link) {
+ list_del(&interval->link);
+ kfree(interval);
+ }
+
+ return err;
+}
+
+void usnic_uiom_put_interval_set(struct list_head *intervals)
+{
+ struct usnic_uiom_interval_node *interval, *tmp;
+ list_for_each_entry_safe(interval, tmp, intervals, link)
+ kfree(interval);
+}
+
+int usnic_uiom_insert_interval(struct rb_root_cached *root, unsigned long start,
+ unsigned long last, int flags)
+{
+ struct usnic_uiom_interval_node *interval, *tmp;
+ unsigned long istart, ilast;
+ int iref_cnt, iflags;
+ unsigned long lpivot = start;
+ int err = 0;
+ LIST_HEAD(to_add);
+ LIST_HEAD(intersection_set);
+
+ find_intervals_intersection_sorted(root, start, last,
+ &intersection_set);
+
+ list_for_each_entry(interval, &intersection_set, link) {
+ /*
+ * Invariant - lpivot is the left edge of next interval to be
+ * inserted
+ */
+ istart = interval->start;
+ ilast = interval->last;
+ iref_cnt = interval->ref_cnt;
+ iflags = interval->flags;
+
+ if (istart < lpivot) {
+ MAKE_NODE_AND_APPEND(tmp, istart, lpivot - 1, iref_cnt,
+ iflags, err, err_out, &to_add);
+ } else if (istart > lpivot) {
+ MAKE_NODE_AND_APPEND(tmp, lpivot, istart - 1, 1, flags,
+ err, err_out, &to_add);
+ lpivot = istart;
+ } else {
+ lpivot = istart;
+ }
+
+ if (ilast > last) {
+ MAKE_NODE_AND_APPEND(tmp, lpivot, last, iref_cnt + 1,
+ iflags | flags, err, err_out,
+ &to_add);
+ MAKE_NODE_AND_APPEND(tmp, last + 1, ilast, iref_cnt,
+ iflags, err, err_out, &to_add);
+ } else {
+ MAKE_NODE_AND_APPEND(tmp, lpivot, ilast, iref_cnt + 1,
+ iflags | flags, err, err_out,
+ &to_add);
+ }
+
+ lpivot = ilast + 1;
+ }
+
+ if (lpivot <= last)
+ MAKE_NODE_AND_APPEND(tmp, lpivot, last, 1, flags, err, err_out,
+ &to_add);
+
+ list_for_each_entry_safe(interval, tmp, &intersection_set, link) {
+ usnic_uiom_interval_tree_remove(interval, root);
+ kfree(interval);
+ }
+
+ list_for_each_entry(interval, &to_add, link)
+ usnic_uiom_interval_tree_insert(interval, root);
+
+ return 0;
+
+err_out:
+ list_for_each_entry_safe(interval, tmp, &to_add, link)
+ kfree(interval);
+
+ return err;
+}
+
+void usnic_uiom_remove_interval(struct rb_root_cached *root,
+ unsigned long start, unsigned long last,
+ struct list_head *removed)
+{
+ struct usnic_uiom_interval_node *interval;
+
+ for (interval = usnic_uiom_interval_tree_iter_first(root, start, last);
+ interval;
+ interval = usnic_uiom_interval_tree_iter_next(interval,
+ start,
+ last)) {
+ if (--interval->ref_cnt == 0)
+ list_add_tail(&interval->link, removed);
+ }
+
+ list_for_each_entry(interval, removed, link)
+ usnic_uiom_interval_tree_remove(interval, root);
+}
+
+INTERVAL_TREE_DEFINE(struct usnic_uiom_interval_node, rb,
+ unsigned long, __subtree_last,
+ START, LAST, , usnic_uiom_interval_tree)
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h
new file mode 100644
index 000000000..1d7fc3226
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_UIOM_INTERVAL_TREE_H_
+#define USNIC_UIOM_INTERVAL_TREE_H_
+
+#include <linux/rbtree.h>
+
+struct usnic_uiom_interval_node {
+ struct rb_node rb;
+ struct list_head link;
+ unsigned long start;
+ unsigned long last;
+ unsigned long __subtree_last;
+ unsigned int ref_cnt;
+ int flags;
+};
+
+extern void
+usnic_uiom_interval_tree_insert(struct usnic_uiom_interval_node *node,
+ struct rb_root_cached *root);
+extern void
+usnic_uiom_interval_tree_remove(struct usnic_uiom_interval_node *node,
+ struct rb_root_cached *root);
+extern struct usnic_uiom_interval_node *
+usnic_uiom_interval_tree_iter_first(struct rb_root_cached *root,
+ unsigned long start,
+ unsigned long last);
+extern struct usnic_uiom_interval_node *
+usnic_uiom_interval_tree_iter_next(struct usnic_uiom_interval_node *node,
+ unsigned long start, unsigned long last);
+/*
+ * Inserts {start...last} into {root}. If there are overlaps,
+ * nodes will be broken up and merged
+ */
+int usnic_uiom_insert_interval(struct rb_root_cached *root,
+ unsigned long start, unsigned long last,
+ int flags);
+/*
+ * Removed {start...last} from {root}. The nodes removed are returned in
+ * 'removed.' The caller is responsibile for freeing memory of nodes in
+ * 'removed.'
+ */
+void usnic_uiom_remove_interval(struct rb_root_cached *root,
+ unsigned long start, unsigned long last,
+ struct list_head *removed);
+/*
+ * Returns {start...last} - {root} (relative complement of {start...last} in
+ * {root}) in diff_set sorted ascendingly
+ */
+int usnic_uiom_get_intervals_diff(unsigned long start,
+ unsigned long last, int flags,
+ int flag_mask,
+ struct rb_root_cached *root,
+ struct list_head *diff_set);
+/* Call this to free diff_set returned by usnic_uiom_get_intervals_diff */
+void usnic_uiom_put_interval_set(struct list_head *intervals);
+#endif /* USNIC_UIOM_INTERVAL_TREE_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c
new file mode 100644
index 000000000..ebe08f348
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_vnic.c
@@ -0,0 +1,476 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "usnic_ib.h"
+#include "vnic_resource.h"
+#include "usnic_log.h"
+#include "usnic_vnic.h"
+
+struct usnic_vnic {
+ struct vnic_dev *vdev;
+ struct vnic_dev_bar bar[PCI_NUM_RESOURCES];
+ struct usnic_vnic_res_chunk chunks[USNIC_VNIC_RES_TYPE_MAX];
+ spinlock_t res_lock;
+};
+
+static enum vnic_res_type _to_vnic_res_type(enum usnic_vnic_res_type res_type)
+{
+#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \
+ vnic_res_type,
+#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \
+ vnic_res_type,
+ static enum vnic_res_type usnic_vnic_type_2_vnic_type[] = {
+ USNIC_VNIC_RES_TYPES};
+#undef DEFINE_USNIC_VNIC_RES
+#undef DEFINE_USNIC_VNIC_RES_AT
+
+ if (res_type >= USNIC_VNIC_RES_TYPE_MAX)
+ return RES_TYPE_MAX;
+
+ return usnic_vnic_type_2_vnic_type[res_type];
+}
+
+const char *usnic_vnic_res_type_to_str(enum usnic_vnic_res_type res_type)
+{
+#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \
+ desc,
+#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \
+ desc,
+ static const char * const usnic_vnic_res_type_desc[] = {
+ USNIC_VNIC_RES_TYPES};
+#undef DEFINE_USNIC_VNIC_RES
+#undef DEFINE_USNIC_VNIC_RES_AT
+
+ if (res_type >= USNIC_VNIC_RES_TYPE_MAX)
+ return "unknown";
+
+ return usnic_vnic_res_type_desc[res_type];
+
+}
+
+const char *usnic_vnic_pci_name(struct usnic_vnic *vnic)
+{
+ return pci_name(usnic_vnic_get_pdev(vnic));
+}
+
+int usnic_vnic_dump(struct usnic_vnic *vnic, char *buf,
+ int buf_sz,
+ void *hdr_obj,
+ int (*printtitle)(void *, char*, int),
+ int (*printcols)(char *, int),
+ int (*printrow)(void *, char *, int))
+{
+ struct usnic_vnic_res_chunk *chunk;
+ struct usnic_vnic_res *res;
+ struct vnic_dev_bar *bar0;
+ int i, j, offset;
+
+ offset = 0;
+ bar0 = usnic_vnic_get_bar(vnic, 0);
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "VF:%hu BAR0 bus_addr=%pa vaddr=0x%p size=%ld ",
+ usnic_vnic_get_index(vnic),
+ &bar0->bus_addr,
+ bar0->vaddr, bar0->len);
+ if (printtitle)
+ offset += printtitle(hdr_obj, buf + offset, buf_sz - offset);
+ offset += scnprintf(buf + offset, buf_sz - offset, "\n");
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "|RES\t|CTRL_PIN\t\t|IN_USE\t");
+ if (printcols)
+ offset += printcols(buf + offset, buf_sz - offset);
+ offset += scnprintf(buf + offset, buf_sz - offset, "\n");
+
+ spin_lock(&vnic->res_lock);
+ for (i = 0; i < ARRAY_SIZE(vnic->chunks); i++) {
+ chunk = &vnic->chunks[i];
+ for (j = 0; j < chunk->cnt; j++) {
+ res = chunk->res[j];
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "|%s[%u]\t|0x%p\t|%u\t",
+ usnic_vnic_res_type_to_str(res->type),
+ res->vnic_idx, res->ctrl, !!res->owner);
+ if (printrow) {
+ offset += printrow(res->owner, buf + offset,
+ buf_sz - offset);
+ }
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "\n");
+ }
+ }
+ spin_unlock(&vnic->res_lock);
+ return offset;
+}
+
+void usnic_vnic_res_spec_update(struct usnic_vnic_res_spec *spec,
+ enum usnic_vnic_res_type trgt_type,
+ u16 cnt)
+{
+ int i;
+
+ for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+ if (spec->resources[i].type == trgt_type) {
+ spec->resources[i].cnt = cnt;
+ return;
+ }
+ }
+
+ WARN_ON(1);
+}
+
+int usnic_vnic_res_spec_satisfied(const struct usnic_vnic_res_spec *min_spec,
+ struct usnic_vnic_res_spec *res_spec)
+{
+ int found, i, j;
+
+ for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+ found = 0;
+
+ for (j = 0; j < USNIC_VNIC_RES_TYPE_MAX; j++) {
+ if (res_spec->resources[i].type !=
+ min_spec->resources[i].type)
+ continue;
+ found = 1;
+ if (min_spec->resources[i].cnt >
+ res_spec->resources[i].cnt)
+ return -EINVAL;
+ break;
+ }
+
+ if (!found)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int usnic_vnic_spec_dump(char *buf, int buf_sz,
+ struct usnic_vnic_res_spec *res_spec)
+{
+ enum usnic_vnic_res_type res_type;
+ int res_cnt;
+ int i;
+ int offset = 0;
+
+ for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+ res_type = res_spec->resources[i].type;
+ res_cnt = res_spec->resources[i].cnt;
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "Res: %s Cnt: %d ",
+ usnic_vnic_res_type_to_str(res_type),
+ res_cnt);
+ }
+
+ return offset;
+}
+
+int usnic_vnic_check_room(struct usnic_vnic *vnic,
+ struct usnic_vnic_res_spec *res_spec)
+{
+ int i;
+ enum usnic_vnic_res_type res_type;
+ int res_cnt;
+
+ for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+ res_type = res_spec->resources[i].type;
+ res_cnt = res_spec->resources[i].cnt;
+
+ if (res_type == USNIC_VNIC_RES_TYPE_EOL)
+ break;
+
+ if (res_cnt > usnic_vnic_res_free_cnt(vnic, res_type))
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+int usnic_vnic_res_cnt(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type)
+{
+ return vnic->chunks[type].cnt;
+}
+
+int usnic_vnic_res_free_cnt(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type)
+{
+ return vnic->chunks[type].free_cnt;
+}
+
+struct usnic_vnic_res_chunk *
+usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
+ int cnt, void *owner)
+{
+ struct usnic_vnic_res_chunk *src, *ret;
+ struct usnic_vnic_res *res;
+ int i;
+
+ if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 0 || !owner)
+ return ERR_PTR(-EINVAL);
+
+ ret = kzalloc(sizeof(*ret), GFP_ATOMIC);
+ if (!ret)
+ return ERR_PTR(-ENOMEM);
+
+ if (cnt > 0) {
+ ret->res = kcalloc(cnt, sizeof(*(ret->res)), GFP_ATOMIC);
+ if (!ret->res) {
+ kfree(ret);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ spin_lock(&vnic->res_lock);
+ src = &vnic->chunks[type];
+ for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
+ res = src->res[i];
+ if (!res->owner) {
+ src->free_cnt--;
+ res->owner = owner;
+ ret->res[ret->cnt++] = res;
+ }
+ }
+
+ spin_unlock(&vnic->res_lock);
+ }
+ ret->type = type;
+ ret->vnic = vnic;
+ WARN_ON(ret->cnt != cnt);
+
+ return ret;
+}
+
+void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk)
+{
+
+ struct usnic_vnic_res *res;
+ int i;
+ struct usnic_vnic *vnic = chunk->vnic;
+
+ if (chunk->cnt > 0) {
+ spin_lock(&vnic->res_lock);
+ while ((i = --chunk->cnt) >= 0) {
+ res = chunk->res[i];
+ chunk->res[i] = NULL;
+ res->owner = NULL;
+ vnic->chunks[res->type].free_cnt++;
+ }
+ spin_unlock(&vnic->res_lock);
+ }
+
+ kfree(chunk->res);
+ kfree(chunk);
+}
+
+u16 usnic_vnic_get_index(struct usnic_vnic *vnic)
+{
+ return usnic_vnic_get_pdev(vnic)->devfn - 1;
+}
+
+static int usnic_vnic_alloc_res_chunk(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type,
+ struct usnic_vnic_res_chunk *chunk)
+{
+ int cnt, err, i;
+ struct usnic_vnic_res *res;
+
+ cnt = vnic_dev_get_res_count(vnic->vdev, _to_vnic_res_type(type));
+ if (cnt < 1) {
+ usnic_err("Wrong res count with cnt %d\n", cnt);
+ return -EINVAL;
+ }
+
+ chunk->cnt = chunk->free_cnt = cnt;
+ chunk->res = kcalloc(cnt, sizeof(*(chunk->res)), GFP_KERNEL);
+ if (!chunk->res)
+ return -ENOMEM;
+
+ for (i = 0; i < cnt; i++) {
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res) {
+ err = -ENOMEM;
+ goto fail;
+ }
+ res->type = type;
+ res->vnic_idx = i;
+ res->vnic = vnic;
+ res->ctrl = vnic_dev_get_res(vnic->vdev,
+ _to_vnic_res_type(type), i);
+ chunk->res[i] = res;
+ }
+
+ chunk->vnic = vnic;
+ return 0;
+fail:
+ for (i--; i >= 0; i--)
+ kfree(chunk->res[i]);
+ kfree(chunk->res);
+ return err;
+}
+
+static void usnic_vnic_free_res_chunk(struct usnic_vnic_res_chunk *chunk)
+{
+ int i;
+ for (i = 0; i < chunk->cnt; i++)
+ kfree(chunk->res[i]);
+ kfree(chunk->res);
+}
+
+static int usnic_vnic_discover_resources(struct pci_dev *pdev,
+ struct usnic_vnic *vnic)
+{
+ enum usnic_vnic_res_type res_type;
+ int i;
+ int err = 0;
+
+ for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) {
+ if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+ continue;
+ vnic->bar[i].len = pci_resource_len(pdev, i);
+ vnic->bar[i].vaddr = pci_iomap(pdev, i, vnic->bar[i].len);
+ if (!vnic->bar[i].vaddr) {
+ usnic_err("Cannot memory-map BAR %d, aborting\n",
+ i);
+ err = -ENODEV;
+ goto out_clean_bar;
+ }
+ vnic->bar[i].bus_addr = pci_resource_start(pdev, i);
+ }
+
+ vnic->vdev = vnic_dev_register(NULL, pdev, pdev, vnic->bar,
+ ARRAY_SIZE(vnic->bar));
+ if (!vnic->vdev) {
+ usnic_err("Failed to register device %s\n",
+ pci_name(pdev));
+ err = -EINVAL;
+ goto out_clean_bar;
+ }
+
+ for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1;
+ res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) {
+ err = usnic_vnic_alloc_res_chunk(vnic, res_type,
+ &vnic->chunks[res_type]);
+ if (err)
+ goto out_clean_chunks;
+ }
+
+ return 0;
+
+out_clean_chunks:
+ for (res_type--; res_type > USNIC_VNIC_RES_TYPE_EOL; res_type--)
+ usnic_vnic_free_res_chunk(&vnic->chunks[res_type]);
+ vnic_dev_unregister(vnic->vdev);
+out_clean_bar:
+ for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) {
+ if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+ continue;
+ if (!vnic->bar[i].vaddr)
+ break;
+
+ iounmap(vnic->bar[i].vaddr);
+ }
+
+ return err;
+}
+
+struct pci_dev *usnic_vnic_get_pdev(struct usnic_vnic *vnic)
+{
+ return vnic_dev_get_pdev(vnic->vdev);
+}
+
+struct vnic_dev_bar *usnic_vnic_get_bar(struct usnic_vnic *vnic,
+ int bar_num)
+{
+ return (bar_num < ARRAY_SIZE(vnic->bar)) ? &vnic->bar[bar_num] : NULL;
+}
+
+static void usnic_vnic_release_resources(struct usnic_vnic *vnic)
+{
+ int i;
+ struct pci_dev *pdev;
+ enum usnic_vnic_res_type res_type;
+
+ pdev = usnic_vnic_get_pdev(vnic);
+
+ for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1;
+ res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++)
+ usnic_vnic_free_res_chunk(&vnic->chunks[res_type]);
+
+ vnic_dev_unregister(vnic->vdev);
+
+ for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) {
+ if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+ continue;
+ iounmap(vnic->bar[i].vaddr);
+ }
+}
+
+struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev)
+{
+ struct usnic_vnic *vnic;
+ int err = 0;
+
+ if (!pci_is_enabled(pdev)) {
+ usnic_err("PCI dev %s is disabled\n", pci_name(pdev));
+ return ERR_PTR(-EINVAL);
+ }
+
+ vnic = kzalloc(sizeof(*vnic), GFP_KERNEL);
+ if (!vnic)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&vnic->res_lock);
+
+ err = usnic_vnic_discover_resources(pdev, vnic);
+ if (err) {
+ usnic_err("Failed to discover %s resources with err %d\n",
+ pci_name(pdev), err);
+ goto out_free_vnic;
+ }
+
+ usnic_dbg("Allocated vnic for %s\n", usnic_vnic_pci_name(vnic));
+
+ return vnic;
+
+out_free_vnic:
+ kfree(vnic);
+
+ return ERR_PTR(err);
+}
+
+void usnic_vnic_free(struct usnic_vnic *vnic)
+{
+ usnic_vnic_release_resources(vnic);
+ kfree(vnic);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.h b/drivers/infiniband/hw/usnic/usnic_vnic.h
new file mode 100644
index 000000000..a08423e47
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_vnic.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_VNIC_H_
+#define USNIC_VNIC_H_
+
+#include <linux/pci.h>
+
+#include "vnic_dev.h"
+
+/* =USNIC_VNIC_RES_TYPE= =VNIC_RES= =DESC= */
+#define USNIC_VNIC_RES_TYPES \
+ DEFINE_USNIC_VNIC_RES_AT(EOL, RES_TYPE_EOL, "EOL", 0) \
+ DEFINE_USNIC_VNIC_RES(WQ, RES_TYPE_WQ, "WQ") \
+ DEFINE_USNIC_VNIC_RES(RQ, RES_TYPE_RQ, "RQ") \
+ DEFINE_USNIC_VNIC_RES(CQ, RES_TYPE_CQ, "CQ") \
+ DEFINE_USNIC_VNIC_RES(INTR, RES_TYPE_INTR_CTRL, "INT") \
+ DEFINE_USNIC_VNIC_RES(MAX, RES_TYPE_MAX, "MAX")\
+
+#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \
+ USNIC_VNIC_RES_TYPE_##usnic_vnic_res_t = val,
+#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \
+ USNIC_VNIC_RES_TYPE_##usnic_vnic_res_t,
+enum usnic_vnic_res_type {
+ USNIC_VNIC_RES_TYPES
+};
+#undef DEFINE_USNIC_VNIC_RES
+#undef DEFINE_USNIC_VNIC_RES_AT
+
+struct usnic_vnic_res {
+ enum usnic_vnic_res_type type;
+ unsigned int vnic_idx;
+ struct usnic_vnic *vnic;
+ void __iomem *ctrl;
+ void *owner;
+};
+
+struct usnic_vnic_res_chunk {
+ enum usnic_vnic_res_type type;
+ int cnt;
+ int free_cnt;
+ struct usnic_vnic_res **res;
+ struct usnic_vnic *vnic;
+};
+
+struct usnic_vnic_res_desc {
+ enum usnic_vnic_res_type type;
+ uint16_t cnt;
+};
+
+struct usnic_vnic_res_spec {
+ struct usnic_vnic_res_desc resources[USNIC_VNIC_RES_TYPE_MAX];
+};
+
+const char *usnic_vnic_res_type_to_str(enum usnic_vnic_res_type res_type);
+const char *usnic_vnic_pci_name(struct usnic_vnic *vnic);
+int usnic_vnic_dump(struct usnic_vnic *vnic, char *buf, int buf_sz,
+ void *hdr_obj,
+ int (*printtitle)(void *, char*, int),
+ int (*printcols)(char *, int),
+ int (*printrow)(void *, char *, int));
+void usnic_vnic_res_spec_update(struct usnic_vnic_res_spec *spec,
+ enum usnic_vnic_res_type trgt_type,
+ u16 cnt);
+int usnic_vnic_res_spec_satisfied(const struct usnic_vnic_res_spec *min_spec,
+ struct usnic_vnic_res_spec *res_spec);
+int usnic_vnic_spec_dump(char *buf, int buf_sz,
+ struct usnic_vnic_res_spec *res_spec);
+int usnic_vnic_check_room(struct usnic_vnic *vnic,
+ struct usnic_vnic_res_spec *res_spec);
+int usnic_vnic_res_cnt(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type);
+int usnic_vnic_res_free_cnt(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type);
+struct usnic_vnic_res_chunk *
+usnic_vnic_get_resources(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type,
+ int cnt,
+ void *owner);
+void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk);
+struct pci_dev *usnic_vnic_get_pdev(struct usnic_vnic *vnic);
+struct vnic_dev_bar *usnic_vnic_get_bar(struct usnic_vnic *vnic,
+ int bar_num);
+struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev);
+void usnic_vnic_free(struct usnic_vnic *vnic);
+u16 usnic_vnic_get_index(struct usnic_vnic *vnic);
+
+#endif /*!USNIC_VNIC_H_*/